commit 24bb6d9f782979f2c024a02581288eefc633593e
parent 63ed8f2480c7e42f04a1a9edd4d6a2287074c9ad
Author: Jaromil <jaromil@dyne.org>
Date: Wed, 13 May 2015 12:01:10 +0200
refactoring of the email address extraction, better explotitation of shell pipes
Diffstat:
5 files changed, 401 insertions(+), 309 deletions(-)
diff --git a/src/jaro b/src/jaro
@@ -468,14 +468,19 @@ main() {
subcommands_opts[index]=""
subcommands_opts[search]=""
+ subcommands_opts[nm_search]=""
subcommands_opts[notmuch]=""
subcommands_opts[learn]=""
subcommands_opts[forget]=""
- subcommands_opts[list]=""
+
subcommands_opts[complete]=""
subcommands_opts[isknown]=""
+ subcommands_opts[list]=""
+ subcommands_opts[extract]=""
+ subcommands_opts[parse]=""
+
subcommands_opts[import]=""
subcommands_opts[export]=""
subcommands_opts[vcard]=""
@@ -507,8 +512,6 @@ main() {
subcommands_opts[imap]=""
- subcommands_opts[extract]=""
-
subcommands_opts[smtp]=""
subcommands_opts[crypt]=""
@@ -658,6 +661,9 @@ main() {
exitcode=$?
;;
+ # notmuch search with file output
+ nm_search) nm_search ${PARAM} ;;
+
search) search ${PARAM} ;;
notmuch)
@@ -784,8 +790,18 @@ main() {
esac
;;
- list|extract)
- extract ${PARAM}
+ parse)
+ extract_stdin ${PARAM}
+ exitcode=$?
+ ;;
+
+ list)
+ list_abook ${PARAM}
+ exitcode=$?
+ ;;
+
+ extract)
+ extract_auto ${PARAM}
exitcode=$?
;;
diff --git a/src/mutt/general b/src/mutt/general
@@ -47,6 +47,9 @@ set delete = yes
#set reply_regexp = "^(re|aw):[ \t]*"
#set status_chars = "-*%A"
#set to_chars = " +TCF"
+set pipe_decode = "yes"
+set pipe_split = "no"
+set pipe_sep = "#+++JAROMAIL_PIPE_SEPARATOR\n"
##########
## Sorting
diff --git a/src/zlibs/addressbook b/src/zlibs/addressbook
@@ -199,76 +199,6 @@ learn() {
}
-# extract all addresses found in a list of email files from stdin
-extract_mails() {
- _mails=`cat`
-
- _tot=`print $_mails | wc -l`
- act "$_tot emails to parse"
-
- [[ $_tot -gt 100 ]] && {
- act "operation will take a while, showing progress"
- _prog=0
- c=0
- }
-
- # learn from senders, recipients or all
- _action="$1"
-
-
- # -U eliminates duplicates
- typeset -aU _res
-
- for m in ${(f)_mails}; do
-
- # e_parse fills in e_addr(map) and e_parsed(newline term str)
- hdr $m | e_parse $_action
- for _e in ${(k)e_addr}; do
- _res+=("${(v)e_addr[$_e]} <$_e>")
- done
-
- [[ $_tot -gt 100 ]] && {
- c=$(( $c + 1 ))
- [[ $c -gt 99 ]] && {
- _prog=$(( $_prog + $c ))
- act "$_prog / $_tot processed so far"
- c=1
- }
- }
- done
- # print out results
- for r in $_res; do
- print - $r
- done
- notice "${#_res} unique addresses extracted"
-}
-
-# extract all addresses found into a maildir
-extract_maildir() {
- ## first arg is a directory
- md="$1"
- func "extract maildir: $md"
- ## extract from a maildir
- maildircheck "$md" || return 1
-
- _action="$2"
- case $_action in
- all) ;;
- recipient) ;;
- sender) ;;
- *) _action="all" ;;
- esac
-
- # search files
- _mails=`find $md -type f`
- # search symlinks
- _mails+=`find $md -type l`
-
- print - ${_mails} | extract_mails "$_action"
- return 0
-}
-
-
# import emails from VCard into abook
# checks if the emails are already known
import_vcard() {
@@ -352,209 +282,6 @@ BEGIN { newcard=0; c=0; name=""; email=""; }
notice "Done importing addresses"
}
-# extract all entries in addressbook or all addresses in a pgp keyring
-# or all signatures on a pgp key (even without importing it)
-extract() {
- func "extract() $PARAM"
-
- # without arguments just list all entries in the active list
- # default is whitelist
-
- arg=${PARAM[1]}
-
- func "extract() arg: $arg (param: $PARAM)"
-
- # no arg means print all entries from adressbook
- [[ "$arg" = "" ]] && {
- notice "Extracting all addresses in $list"
- awk -F'=' '
-/^name/ { printf("%s ",$2) }
-/^email/ { printf("<%s>\n",$2) }
-' $ADDRESSBOOK
- return 0
- }
-
-
- [[ -r "$arg" ]] && {
- # if first arg is a file, could be a maildir, a gpg keyring,
- # a gpg pubkey or a vcard
-
- # if first arg is a directory then extract from maildir
- [[ -d "$arg" ]] && {
- notice "Extracting $2 addresses from maildir $1"
- extract_maildir "$1" "$2"
- return $?
- }
-
- func "testing argument with file magic"
- _magic=`file "$arg"`
-
- # a map to eliminate duplicates
- typeset -AU result
-
- ######### GPG
- # first arg is a GnuPG key ring
- [[ "$_magic" =~ "GPG key public ring" ]] && {
- notice "Extracting addresses found in GPG keyring: $arg"
- _addrs=`gpg --list-keys --with-colons | awk -F: '{print $10}'`
- for i in ${(f)_addrs}; do
- [[ $global_quit = 1 ]] && break
- _parsed=`print "From: $i" | ${WORKDIR}/bin/fetchaddr -a -x from`
- _e="${_parsed[(ws:,:)1]:l}"
- isemail "$_e"
- [[ $? = 0 ]] || continue
- # check if the email is not already parsed
- [[ "${result[$_e]}" = "" ]] && {
- _n="${_parsed[(ws:,:)2]}"
- result+=("$_e" "$_n")
- print - "$_n <$_e>"
- }
- done
-
- notice "Unique addresses found: ${#result}"
- act "calculating known and new addresses..."
- # counts which addresses are known to us
- _known=0
- for i in ${(k)result}; do
- [[ $global_quit = 1 ]] && break
-
- lookup_email ${i}
- [[ $? = 0 ]] || {
- _known=$(( $_known + 1 )) }
- done
- act "new addresses: $_known"
- return 0
- }
-
- # first arg is a GnuPG public key
- [[ "$_magic" =~ "PGP public key" ]] && {
- notice "Extracting addresses from sigs on GPG key $arg"
- _gpg="gpg --no-default-keyring --keyring $MAILDIRS/cache/pubkey.gpg --batch --with-colons"
- ${=rm} $MAILDIRS/cache/pubkey.gpg
- ${=_gpg} --import "$arg"
- # first make sure all unknown keys are imported
- _addrs=`${=_gpg} --list-sigs | awk -F: '{print $5 " " $10}'`
- for i in ${(f)_addrs}; do
- [[ $global_quit = 1 ]] && break
-
- [[ "$i" =~ "[User ID not found]" ]] && {
- act "looking up: $i"
- ${=_gpg} --recv-key ${i[(w)1]}
- }
- done
-
- _addrs=`${=_gpg} --list-sigs | awk -F: '{print $10}'`
- for i in ${(f)_addrs}; do
- [[ $global_quit = 1 ]] && break
-
- _parsed=`print "From: $i" | ${WORKDIR}/bin/fetchaddr -a -x from`
- _e="${_parsed[(ws:,:)1]:l}"
- isemail "$_e"
- [[ $? = 0 ]] || continue
- # check if the email is not already parsed
- [[ "${result[$_e]}" = "" ]] && {
- _n="${_parsed[(ws:,:)2]}"
- result+=("$_e" "$_n")
- print - "$_n <$_e>"
- }
- done
-
- notice "Unique addresses found: ${#result}"
- act "calculating known and new addresses..."
- # counts which addresses are known to us
- _known=0
- for i in ${(k)result}; do
- [[ $global_quit = 1 ]] && break
-
- lookup_email ${i}
- [[ $? = 0 ]] || {
- _known=$(( $_known + 1 )) }
- done
- act "new addresses: $_known"
- return 0
- }
-
- [[ "$_magic" =~ "vCard" ]] && {
- # parse the vcard and print a simple name and email list
- # each value on a single line, entry tuples followed by a #
- # we skip entries that don't have an email
- addresses=`awk '
-BEGIN { newcard=0; c=0; name=""; email=""; }
-/^BEGIN:VCARD/ { newcard=1 }
-/^FN:/ { if(newcard = 1) name=$0 }
-/^EMAIL/ { if(newcard = 1) email=$0 }
-/^END:VCARD/ {
- if(newcard = 1) {
- newcard=0
- if(email != "") {
- c+=1
- print name
- print email
- print "# " c
- }
- email=""
- next
- }
-}
-' $arg | cut -d: -f2`
- newa=1; _name=""; _email=""
- for a in ${(f)addresses}; do
- [[ $global_quit = 1 ]] && break
-
- [[ "${a[1]}" = "#" ]] && {
- newa=1; # its the end of the entry
-
- # handle lines with multiple emails in vcard
- for ee in ${=_email}; do
- # check if we have this email already
- _e=`print ${ee//\^M/} | extract_emails`
- isemail "$_e"
- [[ $? = 0 ]] || continue
- # check if the email is not already parsed
- [[ "${result[$_e]}" = "" ]] && {
- _n="${_name//\^M/}"
- result+=("$_e" "$_n")
- print - "$_n <$_e>"
- }
- done
-
- continue
- }
- if [[ $newa = 1 ]]; then
- # (V) makes special chars visible, we need to remove them..
- _name="${(V)a[(ws:^:)1]}"; newa=0; continue
- elif [[ $newa = 0 ]]; then
- _email="${(V)a[(ws:^:)1]}"
- fi
-
- done
-
- notice "Unique addresses found: ${#result}"
- act "calculating known and new addresses..."
- # counts which addresses are known to us
- _known=0
- for i in ${(k)result}; do
- [[ $global_quit = 1 ]] && break
-
- lookup_email ${i}
- [[ $? = 0 ]] || {
- _known=$(( $_known + 1 )) }
- done
- act "new addresses: $_known"
- return 0
-
- }
-
- } # closes condition in which arg is a file
-
- # if no file is recognized, use string as search query
- notice "Extracting addresses from search query: $PARAM"
-
- # run a search and list email files
- nm_search ${=PARAM} | extract_mails
-}
-
-
# import address lists from stdin
import() {
@@ -751,4 +478,15 @@ set sort_field=name
EOF
) --datafile "$ADDRESSBOOK"
}
+
+# print out all addresses into the selected addressbook
+list_abook() {
+ notice "Extracting all addresses in $list"
+ awk -F'=' '
+/^name/ { printf("%s ",$2) }
+/^email/ { printf("<%s>\n",$2) }
+' $ADDRESSBOOK
+ return 0
+}
+
###################
diff --git a/src/zlibs/helpers b/src/zlibs/helpers
@@ -168,7 +168,7 @@ e_parse() {
# check if an email address was found
isemail "$_e" || continue
# avoid duplicates
- [[ "${(v)e_addr[$_e]}" = "" ]] || continue
+# [[ "${(v)e_addr[$_e]}" = "" ]] || continue
# extract also the name using comma separator
_n="${(Q)_p[(ws:,:)2]}"
@@ -331,17 +331,30 @@ is_online() {
human_size() {
- { test $1 -gt 0 } || {
- error "human_size() called with zero argument"
- return 1 }
+ [[ $1 -gt 0 ]] || {
+ error "human_size() called with invalid argument"
+ return 1
+ }
+
+ # we use the binary operation for speed
+ # shift right 10 is divide by 1024
+
+ # gigabytes
+ [[ $1 -gt 1073741824 ]] && {
+ print -n "$(( $1 >> 30 )) GB"
+ return 0
+ }
+
# megabytes
- { test $1 -gt 1048576 } && {
- print -n "$(( $1 / 1024 / 1024 )) MB"
- return 0}
+ [[ $1 -gt 1048576 ]] && {
+ print -n "$(( $1 >> 20 )) MB"
+ return 0
+ }
# kilobytes
- { test $1 -gt 1024 } && {
- print -n "$(( $1 / 1024 )) KB"
- return 0}
+ [[ $1 -gt 1024 ]] && {
+ print -n "$(( $1 >> 10 )) KB"
+ return 0
+ }
# bytes
print -n "$1 Bytes"
return 0
@@ -352,27 +365,6 @@ human_size() {
# downloads and/or installs certificates
-# TODO: save fingeprints of servers and check connections
-get-smtp-fingerprint() {
-# dyne.org one:
-# tls_fingerprint 6A:D4:DF:E4:20:32:F9:66:94:35:0C:33:9D:74:96:5C
-
- read_account ${account}
- { test $? != 0 } && {
- error "Invalid account: $account"
- return 1
- }
- # defaults
- [[ -z $smtp_port ]] && { smtp_port=25 }
-
- print QUIT \
- | openssl s_client -starttls smtp \
- -connect $smtp:$smtp_port \
- -showcerts 2>/dev/null \
- | openssl x509 -fingerprint -md5 -noout \
- | awk -F '=' '/Fingerprint/ {print $2}' | sysread fingerprint
-
-}
cert() {
diff --git a/src/zlibs/parse b/src/zlibs/parse
@@ -0,0 +1,343 @@
+#!/usr/bin/env zsh
+#
+# Jaro Mail, your humble and faithful electronic postman
+#
+# a tool to easily and privately handle your e-mail communication
+#
+# Copyleft (C) 2010-2015 Denis Roio <jaromil@dyne.org>
+#
+# This source code is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This source code is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# Please refer to the GNU Public License for more details.
+#
+# You should have received a copy of the GNU Public License along with
+# this source code; if not, write to:
+# Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+
+# extract all addresses found in a list of email files from stdin
+extract_mails() {
+ _mails=`cat`
+
+ _tot=`print $_mails | wc -l`
+ act "$_tot emails to parse"
+
+ [[ $_tot -gt 100 ]] && {
+ act "operation will take a while, showing progress"
+ _prog=0
+ c=0
+ }
+
+ # learn from senders, recipients or all
+ _action="$1"
+
+
+ # # -U eliminates duplicates
+ # typeset -aU _res
+
+ _found=0
+ for m in ${(f)_mails}; do
+
+ # e_parse fills in e_addr(map) and e_parsed(newline term str)
+ hdr $m | e_parse $_action
+ for _e in ${(k)e_addr}; do
+ # _res+=("${(v)e_addr[$_e]} <$_e>")
+ print - "${(v)e_addr[$_e]} <$_e>"
+ _found=$(( $_found + 1 ))
+ done
+
+ [[ $_tot -gt 100 ]] && {
+ c=$(( $c + 1 ))
+ [[ $c -gt 99 ]] && {
+ _prog=$(( $_prog + $c ))
+ act "$_prog / $_tot processed so far"
+ c=1
+ }
+ }
+ done
+ # print out results
+ # for r in $_res; do
+ # print - $r
+ # done
+ notice "${_found} unique addresses extracted"
+}
+
+# extract all addresses found into a maildir
+extract_maildir() {
+ ## first arg is a directory
+ md="$1"
+ func "extract maildir: $md"
+ ## extract from a maildir
+ maildircheck "$md" || return 1
+
+ _action="$2"
+ case $_action in
+ all) ;;
+ recipient) ;;
+ sender) ;;
+ *) _action="all" ;;
+ esac
+
+ # search files
+ _mails=`find $md -type f`
+ # search symlinks
+ _mails+=`find $md -type l`
+
+ print - ${_mails} | extract_mails "$_action"
+ return 0
+}
+
+# Extract all entries found in stdin. Supports two formats (autodetected)
+# 1) list of complete paths to filenames as returned by search
+# 2) mbox format big file with special jaromail separator as produced by mutt tagging
+extract_stdin() {
+ func "extract_stdin()"
+
+ _in=`cat`
+ # take first line
+ for i in ${(f)_in}; do _first="$i"; break; done
+
+ if [[ "${_first[(w)1]}" = "Date:" ]]; then
+ # is an email or stream of emails
+
+ _headers=`print - $_in | awk '
+BEGIN { header=0 }
+/JAROMAIL_PIPE_SEPARATOR/ { header=1; next }
+/^$/ { header=0; print "\n" }
+{ if(header==1) { print $0 } }
+'`
+
+ e_addr=()
+ _nextline=0
+ _gotit=""
+
+ for h in ${(f)_headers}; do
+
+ [[ "${h[(w)1]}" = "Date:" ]] && _nextline=1
+ [[ $_nextline = 1 ]] && _gotit+="$h\n"
+ [[ "${h[(w)1]}" = "Subject:" ]] && {
+ _nextline=0
+ print - ${_gotit} | e_parse
+
+ _gotit=""
+ }
+ done
+
+ for i in ${(k)e_addr}; do
+ print - "${e_addr[$i]} <$i>"
+ done
+
+
+ elif [[ -r "$_first" ]]; then
+ notice "Parsing ${PARAM} emails addresses from stdin list of files"
+ # is a list of files
+ extract_mails ${PARAM}
+ _res=$?
+ else
+ error "Cannot process stream from stdin, unknown format"
+ return 1
+ fi
+ return $_res
+}
+
+# extract all entries in addressbook or all addresses in a pgp keyring
+# or all signatures on a pgp key (even without importing it)
+extract_auto() {
+ func "extract() $PARAM"
+
+ # without arguments just list all entries in the active list
+ # default is whitelist
+
+ arg=${PARAM[1]}
+
+ func "extract() arg: $arg (param: $PARAM)"
+
+ # no arg means parse from stdin
+ [[ "$arg" = "" ]] && { extract_stdin; return $? }
+
+ [[ -r "$arg" ]] && {
+ # if first arg is a file, could be a maildir, a gpg keyring,
+ # a gpg pubkey or a vcard
+
+ # if first arg is a directory then extract from maildir
+ [[ -d "$arg" ]] && {
+ notice "Extracting $2 addresses from maildir $1"
+ extract_maildir "$1" "$2"
+ return $?
+ }
+
+ func "testing argument with file magic"
+ _magic=`file "$arg"`
+
+ # a map to eliminate duplicates
+ typeset -AU result
+
+ ######### GPG
+ # first arg is a GnuPG key ring
+ [[ "$_magic" =~ "GPG key public ring" ]] && {
+ notice "Extracting addresses found in GPG keyring: $arg"
+ _addrs=`gpg --list-keys --with-colons | awk -F: '{print $10}'`
+ for i in ${(f)_addrs}; do
+ [[ $global_quit = 1 ]] && break
+ _parsed=`print "From: $i" | ${WORKDIR}/bin/fetchaddr -a -x from`
+ _e="${_parsed[(ws:,:)1]:l}"
+ isemail "$_e"
+ [[ $? = 0 ]] || continue
+ # check if the email is not already parsed
+ [[ "${result[$_e]}" = "" ]] && {
+ _n="${_parsed[(ws:,:)2]}"
+ result+=("$_e" "$_n")
+ print - "$_n <$_e>"
+ }
+ done
+
+ notice "Unique addresses found: ${#result}"
+ act "calculating known and new addresses..."
+ # counts which addresses are known to us
+ _known=0
+ for i in ${(k)result}; do
+ [[ $global_quit = 1 ]] && break
+
+ lookup_email ${i}
+ [[ $? = 0 ]] || {
+ _known=$(( $_known + 1 )) }
+ done
+ act "new addresses: $_known"
+ return 0
+ }
+
+ # first arg is a GnuPG public key
+ [[ "$_magic" =~ "PGP public key" ]] && {
+ notice "Extracting addresses from sigs on GPG key $arg"
+ _gpg="gpg --no-default-keyring --keyring $MAILDIRS/cache/pubkey.gpg --batch --with-colons"
+ ${=rm} $MAILDIRS/cache/pubkey.gpg
+ ${=_gpg} --import "$arg"
+ # first make sure all unknown keys are imported
+ _addrs=`${=_gpg} --list-sigs | awk -F: '{print $5 " " $10}'`
+ for i in ${(f)_addrs}; do
+ [[ $global_quit = 1 ]] && break
+
+ [[ "$i" =~ "[User ID not found]" ]] && {
+ act "looking up: $i"
+ ${=_gpg} --recv-key ${i[(w)1]}
+ }
+ done
+
+ _addrs=`${=_gpg} --list-sigs | awk -F: '{print $10}'`
+ for i in ${(f)_addrs}; do
+ [[ $global_quit = 1 ]] && break
+
+ _parsed=`print "From: $i" | ${WORKDIR}/bin/fetchaddr -a -x from`
+ _e="${_parsed[(ws:,:)1]:l}"
+ isemail "$_e"
+ [[ $? = 0 ]] || continue
+ # check if the email is not already parsed
+ [[ "${result[$_e]}" = "" ]] && {
+ _n="${_parsed[(ws:,:)2]}"
+ result+=("$_e" "$_n")
+ print - "$_n <$_e>"
+ }
+ done
+
+ notice "Unique addresses found: ${#result}"
+ act "calculating known and new addresses..."
+ # counts which addresses are known to us
+ _known=0
+ for i in ${(k)result}; do
+ [[ $global_quit = 1 ]] && break
+
+ lookup_email ${i}
+ [[ $? = 0 ]] || {
+ _known=$(( $_known + 1 )) }
+ done
+ act "new addresses: $_known"
+ return 0
+ }
+
+ [[ "$_magic" =~ "vCard" ]] && {
+ # parse the vcard and print a simple name and email list
+ # each value on a single line, entry tuples followed by a #
+ # we skip entries that don't have an email
+ addresses=`awk '
+BEGIN { newcard=0; c=0; name=""; email=""; }
+/^BEGIN:VCARD/ { newcard=1 }
+/^FN:/ { if(newcard = 1) name=$0 }
+/^EMAIL/ { if(newcard = 1) email=$0 }
+/^END:VCARD/ {
+ if(newcard = 1) {
+ newcard=0
+ if(email != "") {
+ c+=1
+ print name
+ print email
+ print "# " c
+ }
+ email=""
+ next
+ }
+}
+' $arg | cut -d: -f2`
+ newa=1; _name=""; _email=""
+ for a in ${(f)addresses}; do
+ [[ $global_quit = 1 ]] && break
+
+ [[ "${a[1]}" = "#" ]] && {
+ newa=1; # its the end of the entry
+
+ # handle lines with multiple emails in vcard
+ for ee in ${=_email}; do
+ # check if we have this email already
+ _e=`print ${ee//\^M/} | extract_emails`
+ isemail "$_e"
+ [[ $? = 0 ]] || continue
+ # check if the email is not already parsed
+ [[ "${result[$_e]}" = "" ]] && {
+ _n="${_name//\^M/}"
+ result+=("$_e" "$_n")
+ print - "$_n <$_e>"
+ }
+ done
+
+ continue
+ }
+ if [[ $newa = 1 ]]; then
+ # (V) makes special chars visible, we need to remove them..
+ _name="${(V)a[(ws:^:)1]}"; newa=0; continue
+ elif [[ $newa = 0 ]]; then
+ _email="${(V)a[(ws:^:)1]}"
+ fi
+
+ done
+
+ notice "Unique addresses found: ${#result}"
+ act "calculating known and new addresses..."
+ # counts which addresses are known to us
+ _known=0
+ for i in ${(k)result}; do
+ [[ $global_quit = 1 ]] && break
+
+ lookup_email ${i}
+ [[ $? = 0 ]] || {
+ _known=$(( $_known + 1 )) }
+ done
+ act "new addresses: $_known"
+ return 0
+
+ }
+
+ } # closes condition in which arg is a file
+
+ # if no file is recognized, use string as search query
+ notice "Extracting addresses from search query: $PARAM"
+
+ # run a search and list email files
+ nm_search ${=PARAM} | extract_mails
+}
+