commit d47c710063147ca90dfa07cae7cc284398cc5115
parent c84d7dda314bba9e8235499ef01fc5ff31881048
Author: Jaromil <jaromil@dyne.org>
Date: Wed, 29 Aug 2012 02:30:08 +0200
fixes to filter and backup commands
Diffstat:
3 files changed, 101 insertions(+), 41 deletions(-)
diff --git a/src/jaro b/src/jaro
@@ -66,6 +66,9 @@ typeset -h hostname addressbook addressbook_tmp
# global array for maildirs (filled by list_maildirs)
typeset -al maildirs
+# global variable formail cache (used by rmdupes)
+typeset -h formail_cache
+
# global variable for exit code
typeset exitcode
exitcode=0
diff --git a/src/zlibs/maildirs b/src/zlibs/maildirs
@@ -78,9 +78,40 @@ list_maildirs() {
return ${#maildirs}
}
-rmdupes() {
- tmp=$TMPDIR/$datestamp.rmdupes.$RANDOM
- newlock $tmp
+rmdupes() {
+
+ ## special argument lastlog
+ { test "$1" = "lastlog" } && {
+ # gather a list of destinations for last filtering operation
+ typeset -alU dests prio lasts
+ for d in `cat ${WORKDIR}/log/procmail.log|awk '/Folder:/ {print $2}' | cut -d/ -f1`; do
+ # skip procmail glitch
+ { test "$d" = "procmail" } && { continue }
+ # put filtered to last
+ [[ ${PARAM} == *${d}* ]] && { lasts=($lasts $d); continue }
+ # always give priority to known, then to priv, then the rest
+ { test "$d" = "known" } && { prio=(known $prio); continue }
+ { test "$d" = "priv" } && { prio=($prio priv); continue }
+ # skip zz. trash
+ [[ $d == zz.* ]] && { continue }
+ # put them to filter
+ dests+=($d)
+ done
+ act "Pruning duplicates across ${#dests} destination maildirs:"
+ act "${prio} ${dests} ${lasts}"
+ # recursion here
+ rmdupes ${=prio} ${=dests} ${=lasts}
+ notice "Done pruning"
+ # all the prioritization above is so that duplicates are spotted
+ # across different maildirs and deleted from the filtered source
+ unset dests
+ unset prio
+ return 0
+ }
+ ###############
+
+ tot=0
+ typeset -al msgs
for folder in ${=@}; do
{ test -r "$folder" } || { folder=$MAILDIRS/$folder }
{ test -r "$folder" } || {
@@ -91,20 +122,26 @@ rmdupes() {
error "Not a maildir folder: $folder"
continue }
- notice "Checking for duplicates in $folder"
c=0
- for i in `find ${folder} -type f`; do
+ notice "Checking for duplicates in folder: `basename $folder`"
+ msgs=()
+ for m in `find ${folder} -type f`; do
+ msgs+=($m)
+ done
+ act "${#msgs} messages to check"
+ for m in ${=msgs}; do
# 128MB should be enough ehre?
- formail -D 128000000 $tmp <$i \
- && rm $i && c=`expr $c + 1`
+ formail -D 128000000 $formail_cache <$m \
+ && rm $m && c=$(( $c + 1 ))
done
+ act "$c duplicates found and deleted"
+ tot=$(( $tot + $c ))
done
- unlink $tmp
- if [ "$c" = "0" ]; then
- act "No duplicates found"
+ if [ "$tot" = "0" ]; then
+ act "No duplicates found at all"
else
- act "$c duplicates found and deleted"
+ act "$tot total duplicates found and deleted"
fi
}
@@ -151,7 +188,10 @@ merge() {
act "Removing source directory ${src}"
${=rm} -r ${src}
act "Purging duplicates in destination"
+ formail_cache=$TMPDIR/filter.rmdupes.$datestamp.$RANDOM
+ newlock $formail_cache
rmdupes ${dst}
+ unlock $formail_cache
act "Done. All mails merged into ${dst}"
}
@@ -162,20 +202,15 @@ filter() {
update # update all filters
# archive old procmail log
- if [ -r $WORKDIR/log/procmail.log ]; then
+ { test -r "$WORKDIR/log/procmail.log" } && {
newlock $WORKDIR/log/procmail-${datestamp}.log
cat $WORKDIR/log/procmail.log \
>> $WORKDIR/log/procmail-${datestamp}.log
- rm -f $WORKDIR/log/procmail.log
+ ${=rm} $WORKDIR/log/procmail.log
unlock $WORKDIR/log/procmail-${datestamp}.log
- fi
+ }
prc=$PROCMAILDIR/rc
- { test $TMPRAM = 1 } && { # accelerate using ramdisk if present
- pdir=$TMPDIR/filter.procmail.$RAND
- cp -r $PROCMAILDIR $TMPDIR/$pdir
- prc=$TMPDIR/$pdir/rc
- }
typeset -al fall
@@ -194,15 +229,30 @@ filter() {
for m in `find $folder -type f`; do fall+=($m); done
done
+ { test ${#fall} = 0 } && {
+ error "No messages to filter, operation aborted."
+ return 1
+ }
+
# then process it, this way ignoring new mails send to same folder
- act "Please wait while filtering ${#fall} maildirs..."
+ act "Filtering ${#fall} messages..."
for n in ${=fall}; do
cat $n | procmail -m $prc
done
notice "Done filtering"
unset fall
+
total=`mailstat -k $WORKDIR/log/procmail.log | tail -n1 | awk '{print $2}'`
briefing=`mailstat -kt $WORKDIR/log/procmail.log |awk '!/procmail/ { print " " $2 "\t" $3 }'|sort -nr`
echo "${briefing}"
+
+
+ formail_cache=$TMPDIR/filter.rmdupes.$datestamp.$RANDOM
+ newlock $formail_cache
+
+ # prunes out all duplicates from last filtered mails,
+ rmdupes lastlog
+
+ unlink $formail_cache
}
diff --git a/src/zlibs/search b/src/zlibs/search
@@ -170,8 +170,8 @@ EOF
backup() {
- id=$RANDOM
- rc=$TMPDIR/backup.conf.$id
+ mairixrc=$TMPDIR/backup.rc.$datestamp.$RANDOM
+ mairixdb=$TMPDIR/backup.db.$datestamp.$RANDOM
typeset -al expr
typeset -al fold
@@ -187,7 +187,7 @@ backup() {
func "destination is ${p}"
fold+=(${p})
- elif [ -r ${p} ]; then
+ elif [ -r "${p}" ]; then
{ maildircheck ${p} } && {
func "param ${p} is a maildir"
@@ -199,7 +199,7 @@ backup() {
popd }
}
- elif [ -r ${MAILDIRS}/${p} ]; then
+ elif [ -r "${MAILDIRS}/${p}" ]; then
{ maildircheck ${MAILDIRS}/${p} } && {
func "param ${p} is a jaro maildir"
@@ -219,22 +219,18 @@ backup() {
}
dst=${fold[${#fold}]}
- if [ -r "$dst" ]; then
- act "Backup destination already exists: $dst"
- else
- maildirmake "${dst}"
- fi
+ { test -r "$dst" } || { maildirmake "${dst}" }
{ test ${#expr} = 0 } && {
error "No expression set for backup, please indicate what you want to backup"
- act "For example: d:10y-2y (all mails older than 1 year up to 10 years ago"
+ act "For example: d:10y-2y (all mails older than 1 year up to 10 years ago)"
act "Or a simple search string, all expressions can be verified using search."
return 1
}
# forge the folder string for mairix conf
folders=""
- for f in ${fold}; do
+ for f in ${=fold}; do
{ test $f = $dst } || {
folders="$folders`basename $f`:" }
done
@@ -242,33 +238,44 @@ backup() {
notice "Backup of all mails in '$folders' matching expression '$expr'"
act "Indexing folders"
- cat <<EOF > $rc
+ cat <<EOF > ${mairixrc}
base=$basedir
-database=$TMPDIR/backup.db.$id
+database=$mairixdb
maildir=${folders}
mfolder=$dst
mformat=maildir
EOF
- mairix -F -f $rc 2> /dev/null
+ mairix -F -f ${mairixrc} 2> /dev/null
act "Moving matches to $dst"
pushd `dirname $dst`; basedir=`pwd`; popd
- rm -f $rc; cat <<EOF > $rc
+ rm -f ${mairixrc}; cat <<EOF > ${mairixrc}
base=$basedir
-database=$TMPDIR/backup.db.$id
+database=$mairixdb
maildir=${folders}
mfolder=$dst
mformat=maildir
EOF
- found=`mairix -F -f $rc -H ${expr} 2> /dev/null | awk '{print $2}'`
+ found=`mairix -F -f ${mairixrc} -H ${expr} 2>/dev/null | awk '{print $2}'`
notice "$found matches found, destination folder size is `du -hs $basedir/$dst | awk '{print $1}'`"
+
+ ${=rm} ${mairixrc}
+ ${=rm} ${mairixdb}
+
# invert the order of folders to start with destination in rmdupes
- typeset -al PARAM
+ typeset -al revfold
c=$(( ${#fold} ))
while [ $c -gt 0 ]; do
func "${fold[$c]}"
- PARAM+=(${fold[$c]})
+ revfold+=(${fold[$c]})
c=$(( $c - 1 ))
done
- QUIET=1
- rmdupes
+
+ formail_cache=$TMPDIR/filter.rmdupes.$datestamp.$RANDOM
+ newlock $formail_cache
+
+ rmdupes ${=revfold}
+
+ unlink $formail_cache
+
+ notice "Backup completed to destination: $dst"
}