commit e858d074bddc1e8cb3f2b4bac78a30409b40c31c
parent 2143dbf0a452894c24c9ef5b313cd542018344db
Author: Jaromil <jaromil@dyne.org>
Date: Fri, 26 Dec 2014 13:43:19 +0100
new extract command to list addresses found in maildirs
Diffstat:
3 files changed, 196 insertions(+), 125 deletions(-)
diff --git a/src/jaro b/src/jaro
@@ -573,6 +573,8 @@ main()
subcommands_opts[imap]=""
+ subcommands_opts[extract]=""
+
### Detect subcommand
local -aU every_opts #every_opts behave like a set; that is, an array with unique elements
for optspec in $subcommands_opts$main_opts; do
@@ -768,7 +770,12 @@ main()
;;
esac
;;
-
+
+ extract)
+ md_extract ${=@}
+ exitcode=$?
+ ;;
+
*) # unknown command, pass it to autostart
func "unknown command, remote check"
autostart ${PARAM}
diff --git a/src/zlibs/addressbook b/src/zlibs/addressbook
@@ -60,9 +60,9 @@ insert_address() {
INSERT INTO $list (email, name)
VALUES ("${_email}", "${_name}");
EOF
- { test $? != 0 } && {
- func "address already present in $list"
- return 1
+ [[ $? = 0 ]] || {
+ func "address already present in $list"
+ return 1
}
return 0
}
@@ -154,17 +154,20 @@ complete() {
}
sender_isknown() {
- head="`${WORKDIR}/bin/fetchaddr -x From -a`"
+ # extract only headers from stdin
+ head="`awk '
+{ print $0 }
+/^$/ { exit }' | ${WORKDIR}/bin/fetchaddr -x From -a`"
email="${head[(ws:,:)1]}"
exitcode=1
- { test "$email" = "" } && { return 1 }
+ [[ "$email" = "" ]] && { return 1 }
lookup="`lookup_email ${email}`"
- { test "$lookup" = "" } || {
- func "sender_isknown() found <$email> in $list (id $lookup)"
- return 0 }
+ [[ "$lookup" = "" ]] || {
+ func "sender_isknown() found <$email> in $list (id $lookup)"
+ return 0 }
return 1
}
@@ -175,9 +178,11 @@ learn() {
func "dryrun parsing ${PARAM[1]} in stdin piped mails" }
what=sender
- [[ "${PARAM[1]}" != "" ]] && { what=${PARAM[1]} }
+ [[ "$1" = "" ]] || { what="$1" }
func "learning from $what"
- buffer=`cat`
+
+ # read in only headers from stdin (till empty line)
+ buffer=`awk '{ print $0 } /^$/ { exit }'`
case ${what} in
@@ -186,7 +191,7 @@ learn() {
# (Q) eliminates quotes, then word split
email="${(Q)head[(ws:,:)1]}"
name="${(Q)head[(ws:,:)2]}"
- print "$name" "<$email>"
+ print "$head"
[[ $DRYRUN == 1 ]] || {
insert_address "$email" "$name"
{ test $? = 0 } && { act "new: $_name <${_email}>" }
@@ -201,7 +206,7 @@ learn() {
email="${(Q)h[(ws:,:)1]}"
name="${(Q)h[(ws:,:)2]}"
- print "$name" "<$email>"
+ print "$h"
[[ $DRYRUN == 1 ]] || {
insert_address "$email" "$name"
@@ -217,7 +222,7 @@ learn() {
# (Q) eliminates quotes, then word split
email="${(Q)h[(ws:,:)1]}"
name="${(Q)h[(ws:,:)2]}"
- print "$name" "<$email>"
+ print "$h"
[[ $DRYRUN == 1 ]] || {
insert_address "$email" "$name"
@@ -230,7 +235,7 @@ learn() {
# (Q) eliminates quotes, then word split
email="${(Q)h[(ws:,:)1]}"
name="${(Q)h[(ws:,:)2]}"
- print "$name" "<$email>"
+ print "$h"
[[ $DRYRUN == 1 ]] || {
insert_address "$email" "$name"
diff --git a/src/zlibs/maildirs b/src/zlibs/maildirs
@@ -29,13 +29,13 @@ typeset -h formail_cache
# no in all other cases
maildircheck() {
{ test -r "$1" } || {
- error "Maildir not existing: $1"
- return 1 }
+ error "Maildir not existing: $1"
+ return 1 }
{ test -w "$1" } || {
- error "Directory not writable: $1"
- return 1 }
+ error "Directory not writable: $1"
+ return 1 }
{ test -r "$1/cur" } \
- && { return 0 } # Yes is a maildir
+ && { return 0 } # Yes is a maildir
# shortened test to speedup
# && { test -r $1/new } \
# && { test -r $1/tmp } \
@@ -46,20 +46,20 @@ maildircheck() {
maildirmake() {
{ test -z "$1" } && {
- error "internal error: missing argument for maildirmake"
- return 255 }
+ error "internal error: missing argument for maildirmake"
+ return 255 }
{ test -f "$1" } && {
- func "not a maildir, but a file: $1"
- return 254 }
+ func "not a maildir, but a file: $1"
+ return 254 }
{ test -d "$1/new" } && {
- func "maildir already exists: $1"
- return 0 }
+ func "maildir already exists: $1"
+ return 0 }
{ test -r "$1" } && {
- func "directory exists, but is not a maildir: $1"
- return 254 }
+ func "directory exists, but is not a maildir: $1"
+ return 254 }
func "creating maildir: $1"
@@ -75,14 +75,14 @@ maildirmake() {
list_maildirs() {
maildirs=`${=find} "$MAILDIRS" -maxdepth 1 -type d`
for m in ${(f)maildirs}; do
- func "$m"
- { maildircheck "$m" } && {
- # is a maildir
- { test "`${=find} "$m" -type f`" != "" } && {
- # and is not empty
- maildirs+=(`basename "$m"`)
- }
- }
+ func "$m"
+ { maildircheck "$m" } && {
+ # is a maildir
+ { test "`${=find} "$m" -type f`" != "" } && {
+ # and is not empty
+ maildirs+=(`basename "$m"`)
+ }
+ }
done
return ${#maildirs}
}
@@ -93,18 +93,18 @@ maildirs_lastlog() {
typeset -alU dests prio lasts
_folders=`cat "${MAILDIRS}/logs/procmail.log"|awk '/Folder:/ {print $2}' | cut -d/ -f1`
for d in ${(f)_folders}; do
- func "maildir touched by last operation: $d"
- # skip procmail glitch
- { test "$d" = "procmail" } && { continue }
- # put filtered to last
- [[ ${PARAM} == *${d}* ]] && { lasts=($lasts $d); continue }
- # always give priority to known, then to priv, then the rest
- { test "$d" = "known" } && { prio=(known $prio); continue }
- { test "$d" = "priv" } && { prio=($prio priv); continue }
- # skip zz. trash
- [[ $d == zz.* ]] && { continue }
- # put them to filter
- dests+=($d)
+ func "maildir touched by last operation: $d"
+ # skip procmail glitch
+ { test "$d" = "procmail" } && { continue }
+ # put filtered to last
+ [[ ${PARAM} == *${d}* ]] && { lasts=($lasts $d); continue }
+ # always give priority to known, then to priv, then the rest
+ { test "$d" = "known" } && { prio=(known $prio); continue }
+ { test "$d" = "priv" } && { prio=($prio priv); continue }
+ # skip zz. trash
+ [[ $d == zz.* ]] && { continue }
+ # put them to filter
+ dests+=($d)
done
print "${=prio} ${=dests} ${=lasts}"
unset dests
@@ -116,15 +116,15 @@ rmdupes() {
## special argument lastlog
{ test "$1" = "lastlog" } && {
- lastdirs=(`maildirs_lastlog`)
- act "Pruning duplicates across ${#lastdirs} destination maildirs:"
- act "${lastdirs}"
- # recursion here
- rmdupes ${=lastdirs}
- notice "Done pruning"
- # all the prioritization above is so that duplicates are spotted
- # across different maildirs and deleted from the filtered source
- return 0
+ lastdirs=(`maildirs_lastlog`)
+ act "Pruning duplicates across ${#lastdirs} destination maildirs:"
+ act "${lastdirs}"
+ # recursion here
+ rmdupes ${=lastdirs}
+ notice "Done pruning"
+ # all the prioritization above is so that duplicates are spotted
+ # across different maildirs and deleted from the filtered source
+ return 0
}
###############
@@ -135,38 +135,38 @@ rmdupes() {
newlock "$formail_cache"
for folder in ${=@}; do
- { test -r "$folder" } || { folder="$MAILDIRS/$folder" }
- { test -r "$folder" } || {
- error "Directory not found: $folder"
- continue }
-
- { maildircheck "${folder}" } || {
- error "Not a maildir folder: $folder"
- continue }
-
- c=0
- notice "Checking for duplicates in $folder"
- msgs=`${=find} "${folder}" -maxdepth 2 -type f`
- act "Please wait, this can take a while..."
-
-
-
- for m in ${(f)msgs}; do
- func "formail < $m"
- # 128MB should be enough ehre?
- formail -D 128000000 "$formail_cache" <"$m" \
- && rm "$m" && c=$(( $c + 1 ))
- done
- act "$c duplicates found and deleted"
- tot=$(( $tot + $c ))
+ { test -r "$folder" } || { folder="$MAILDIRS/$folder" }
+ { test -r "$folder" } || {
+ error "Directory not found: $folder"
+ continue }
+
+ { maildircheck "${folder}" } || {
+ error "Not a maildir folder: $folder"
+ continue }
+
+ c=0
+ notice "Checking for duplicates in $folder"
+ msgs=`${=find} "${folder}" -maxdepth 2 -type f`
+ act "Please wait, this can take a while..."
+
+
+
+ for m in ${(f)msgs}; do
+ func "formail < $m"
+ # 128MB should be enough ehre?
+ formail -D 128000000 "$formail_cache" <"$m" \
+ && rm "$m" && c=$(( $c + 1 ))
+ done
+ act "$c duplicates found and deleted"
+ tot=$(( $tot + $c ))
done
unlink "$formail_cache"
if [ "$tot" = "0" ]; then
- act "No duplicates found at all"
+ act "No duplicates found at all"
else
- act "$tot total duplicates found and deleted"
+ act "$tot total duplicates found and deleted"
fi
}
@@ -175,16 +175,16 @@ merge() {
dst=${2}
{ test "$src" = "$dst" } && {
- error "Cannot merge same directory in itself: $src = $dst"
- return 1 }
+ error "Cannot merge same directory in itself: $src = $dst"
+ return 1 }
{ maildircheck "$src" } || {
- error "Source is not a maildir: $src"
- return 1 }
+ error "Source is not a maildir: $src"
+ return 1 }
{ maildircheck "$dst" } || {
- error "Destination is not a maildir: $dst"
- return 1 }
+ error "Destination is not a maildir: $dst"
+ return 1 }
# merge does not uses deliver() because
# the new-flag and read-flags must be kept intact.
@@ -197,27 +197,27 @@ merge() {
# cp -p -r -l source/date target/
# rm -rf source/data
for i in ${(f)fr}; do
- mv "$i" "${dst}/cur/"; c=$(($c + 1))
- { test $? = 0 } || {
- error "error moving file: $i"
- error "merge aborted."
- return 1 }
+ mv "$i" "${dst}/cur/"; c=$(($c + 1))
+ { test $? = 0 } || {
+ error "error moving file: $i"
+ error "merge aborted."
+ return 1 }
done
fr=`${=find} ${src}/new -type f`
for i in ${(f)fr}; do
- mv "$i" "${dst}/new/"; c=$(($c + 1))
- { test $? = 0 } || {
- error "error moving file: $i"
- error "merge aborted."
- return 1 }
+ mv "$i" "${dst}/new/"; c=$(($c + 1))
+ { test $? = 0 } || {
+ error "error moving file: $i"
+ error "merge aborted."
+ return 1 }
done
fr=`${=find} ${src}/tmp -type f`
for i in ${(f)fr}; do
- mv "$i" "${dst}/tmp/"; c=$(($c + 1))
- { test $? = 0 } || {
- error "error moving file: $i"
- error "merge aborted."
- return 1 }
+ mv "$i" "${dst}/tmp/"; c=$(($c + 1))
+ { test $? = 0 } || {
+ error "error moving file: $i"
+ error "merge aborted."
+ return 1 }
done
notice "$c mails succesfully moved"
@@ -236,38 +236,38 @@ merge() {
# so that fetchmail does not deletes mail from server
deliver() {
if [ "$1" = "" ]; then
- dest="$MAILDIRS/incoming"
+ dest="$MAILDIRS/incoming"
else
- dest="$MAILDIRS/$1"
- { test -d "$dest" } || { dest="$1"
- { test -d "$dest" } || {
- error "delivery destination path invalid: $1"
- return 1; } }
+ dest="$MAILDIRS/$1"
+ { test -d "$dest" } || { dest="$1"
+ { test -d "$dest" } || {
+ error "delivery destination path invalid: $1"
+ return 1; } }
fi
# create destination maildir if not existing
- { test -r "$dest" } || {
- act "creating destination maildir: $dest"
- maildirmake "$dest" }
+ [[ -r "$dest" ]] || {
+ act "creating destination maildir: $dest"
+ maildirmake "$dest" }
maildircheck "$dest"
- { test $? = 0 } || {
- error "Invalid maildir destination for delivery, operation aborted."
- func "Returning error to caller."
- return 1; }
+ [[ $? = 0 ]] || {
+ error "Invalid maildir destination for delivery, operation aborted."
+ func "Returning error to caller."
+ return 1; }
- base="`hostname`-jaro-`date +%Y-%m-%d-%H.%M.%S`-$RANDOM"
+ base="`hostname`_jaro_`date +%Y-%m-%d_%H-%M-%S`_$RANDOM"
cat > "$dest/new/$base"
- { test $? = 0 } || {
- error "Could not write email file into maildir, operation aborted."
- func "Returning error to caller."
- return 1; }
+ [[ $? = 0 ]] || {
+ error "Could not write email file into maildir, operation aborted."
+ func "Returning error to caller."
+ return 1; }
- { test "$DEBUG" != "0" } && {
- func "Delivery successful, log: $MAILDIRS/logs/jaro-deliver.log"
- awk '
+ [[ $DEBUG = 0 ]] || {
+ func "Delivery successful, log: $MAILDIRS/logs/jaro-deliver.log"
+ awk '
BEGIN { print "Delivery to maildir: '"$1"'" }
{ print $0 }
/^$/ { exit }
@@ -276,3 +276,62 @@ BEGIN { print "Delivery to maildir: '"$1"'" }
return 0
}
+
+# extract and list all recipients in a maildir
+md_extract() {
+ func "md_extract()"
+ _action="$1"
+ case $_action in
+ all) ;;
+ recipient) ;;
+ sender) ;;
+ *) error "unknown extract action: $_action"; return 1 ;;
+ esac
+
+ maildircheck "$2"
+ [[ $? = 0 ]] || {
+ error "Cannot extract $_action from maildir: $2"
+ return 1 }
+ _mails=`find $2 -type f`
+ # TODO ismail() to check if file is a mail
+
+ # we switch dryrun temporarily off to use learn()
+ # without modifying the addressbook
+ _dryrun=$DRYRUN
+ DRYRUN=1
+
+ notice "Extracting and listing $_action in maildir: $2"
+ act "please wait while scanning ${#_mails} mail files..."
+ typeset -a learned
+
+ for i in ${(f)_mails}; do
+ _l=`hdr $i | learn $_action`
+ # handles results on multiple lines (recipients, all)
+ for i in ${(f)_l}; do
+ learned+=("$i")
+ done
+ done
+
+ DRYRUN=$_dryrun
+
+ # eliminates duplicates
+ typeset -A result
+ for i in ${learned}; do
+ _e=${i[(ws:,:)1]}
+ [[ "${result[$_e]}" = "" ]] && {
+ _n=${i[(ws:,:)2]}
+ result+=("$_e" "$_n")
+ print "$_n <$_e>"
+ }
+ done
+ notice "Unique $_action found: ${#result}"
+
+ # counts which addresses are known to us
+ _known=0
+ for i in ${(k)result}; do
+ lookup="`lookup_email ${i}`"
+ [[ "$lookup" = "" ]] || {
+ _known=$(( $_known + 1 )) }
+ done
+ act "addresses known: $_known"
+}