jaromail

a commandline tool to easily and privately handle your e-mail
git clone git://parazyd.org/jaromail.git
Log | Files | Refs | Submodules | README

commit 24bb6d9f782979f2c024a02581288eefc633593e
parent 63ed8f2480c7e42f04a1a9edd4d6a2287074c9ad
Author: Jaromil <jaromil@dyne.org>
Date:   Wed, 13 May 2015 12:01:10 +0200

refactoring of the email address extraction, better explotitation of shell pipes

Diffstat:
Msrc/jaro | 26+++++++++++++++++++++-----
Msrc/mutt/general | 3+++
Msrc/zlibs/addressbook | 284++++---------------------------------------------------------------------------
Msrc/zlibs/helpers | 54+++++++++++++++++++++++-------------------------------
Asrc/zlibs/parse | 343+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 401 insertions(+), 309 deletions(-)

diff --git a/src/jaro b/src/jaro @@ -468,14 +468,19 @@ main() { subcommands_opts[index]="" subcommands_opts[search]="" + subcommands_opts[nm_search]="" subcommands_opts[notmuch]="" subcommands_opts[learn]="" subcommands_opts[forget]="" - subcommands_opts[list]="" + subcommands_opts[complete]="" subcommands_opts[isknown]="" + subcommands_opts[list]="" + subcommands_opts[extract]="" + subcommands_opts[parse]="" + subcommands_opts[import]="" subcommands_opts[export]="" subcommands_opts[vcard]="" @@ -507,8 +512,6 @@ main() { subcommands_opts[imap]="" - subcommands_opts[extract]="" - subcommands_opts[smtp]="" subcommands_opts[crypt]="" @@ -658,6 +661,9 @@ main() { exitcode=$? ;; + # notmuch search with file output + nm_search) nm_search ${PARAM} ;; + search) search ${PARAM} ;; notmuch) @@ -784,8 +790,18 @@ main() { esac ;; - list|extract) - extract ${PARAM} + parse) + extract_stdin ${PARAM} + exitcode=$? + ;; + + list) + list_abook ${PARAM} + exitcode=$? + ;; + + extract) + extract_auto ${PARAM} exitcode=$? ;; diff --git a/src/mutt/general b/src/mutt/general @@ -47,6 +47,9 @@ set delete = yes #set reply_regexp = "^(re|aw):[ \t]*" #set status_chars = "-*%A" #set to_chars = " +TCF" +set pipe_decode = "yes" +set pipe_split = "no" +set pipe_sep = "#+++JAROMAIL_PIPE_SEPARATOR\n" ########## ## Sorting diff --git a/src/zlibs/addressbook b/src/zlibs/addressbook @@ -199,76 +199,6 @@ learn() { } -# extract all addresses found in a list of email files from stdin -extract_mails() { - _mails=`cat` - - _tot=`print $_mails | wc -l` - act "$_tot emails to parse" - - [[ $_tot -gt 100 ]] && { - act "operation will take a while, showing progress" - _prog=0 - c=0 - } - - # learn from senders, recipients or all - _action="$1" - - - # -U eliminates duplicates - typeset -aU _res - - for m in ${(f)_mails}; do - - # e_parse fills in e_addr(map) and e_parsed(newline term str) - hdr $m | e_parse $_action - for _e in ${(k)e_addr}; do - _res+=("${(v)e_addr[$_e]} <$_e>") - done - - [[ $_tot -gt 100 ]] && { - c=$(( $c + 1 )) - [[ $c -gt 99 ]] && { - _prog=$(( $_prog + $c )) - act "$_prog / $_tot processed so far" - c=1 - } - } - done - # print out results - for r in $_res; do - print - $r - done - notice "${#_res} unique addresses extracted" -} - -# extract all addresses found into a maildir -extract_maildir() { - ## first arg is a directory - md="$1" - func "extract maildir: $md" - ## extract from a maildir - maildircheck "$md" || return 1 - - _action="$2" - case $_action in - all) ;; - recipient) ;; - sender) ;; - *) _action="all" ;; - esac - - # search files - _mails=`find $md -type f` - # search symlinks - _mails+=`find $md -type l` - - print - ${_mails} | extract_mails "$_action" - return 0 -} - - # import emails from VCard into abook # checks if the emails are already known import_vcard() { @@ -352,209 +282,6 @@ BEGIN { newcard=0; c=0; name=""; email=""; } notice "Done importing addresses" } -# extract all entries in addressbook or all addresses in a pgp keyring -# or all signatures on a pgp key (even without importing it) -extract() { - func "extract() $PARAM" - - # without arguments just list all entries in the active list - # default is whitelist - - arg=${PARAM[1]} - - func "extract() arg: $arg (param: $PARAM)" - - # no arg means print all entries from adressbook - [[ "$arg" = "" ]] && { - notice "Extracting all addresses in $list" - awk -F'=' ' -/^name/ { printf("%s ",$2) } -/^email/ { printf("<%s>\n",$2) } -' $ADDRESSBOOK - return 0 - } - - - [[ -r "$arg" ]] && { - # if first arg is a file, could be a maildir, a gpg keyring, - # a gpg pubkey or a vcard - - # if first arg is a directory then extract from maildir - [[ -d "$arg" ]] && { - notice "Extracting $2 addresses from maildir $1" - extract_maildir "$1" "$2" - return $? - } - - func "testing argument with file magic" - _magic=`file "$arg"` - - # a map to eliminate duplicates - typeset -AU result - - ######### GPG - # first arg is a GnuPG key ring - [[ "$_magic" =~ "GPG key public ring" ]] && { - notice "Extracting addresses found in GPG keyring: $arg" - _addrs=`gpg --list-keys --with-colons | awk -F: '{print $10}'` - for i in ${(f)_addrs}; do - [[ $global_quit = 1 ]] && break - _parsed=`print "From: $i" | ${WORKDIR}/bin/fetchaddr -a -x from` - _e="${_parsed[(ws:,:)1]:l}" - isemail "$_e" - [[ $? = 0 ]] || continue - # check if the email is not already parsed - [[ "${result[$_e]}" = "" ]] && { - _n="${_parsed[(ws:,:)2]}" - result+=("$_e" "$_n") - print - "$_n <$_e>" - } - done - - notice "Unique addresses found: ${#result}" - act "calculating known and new addresses..." - # counts which addresses are known to us - _known=0 - for i in ${(k)result}; do - [[ $global_quit = 1 ]] && break - - lookup_email ${i} - [[ $? = 0 ]] || { - _known=$(( $_known + 1 )) } - done - act "new addresses: $_known" - return 0 - } - - # first arg is a GnuPG public key - [[ "$_magic" =~ "PGP public key" ]] && { - notice "Extracting addresses from sigs on GPG key $arg" - _gpg="gpg --no-default-keyring --keyring $MAILDIRS/cache/pubkey.gpg --batch --with-colons" - ${=rm} $MAILDIRS/cache/pubkey.gpg - ${=_gpg} --import "$arg" - # first make sure all unknown keys are imported - _addrs=`${=_gpg} --list-sigs | awk -F: '{print $5 " " $10}'` - for i in ${(f)_addrs}; do - [[ $global_quit = 1 ]] && break - - [[ "$i" =~ "[User ID not found]" ]] && { - act "looking up: $i" - ${=_gpg} --recv-key ${i[(w)1]} - } - done - - _addrs=`${=_gpg} --list-sigs | awk -F: '{print $10}'` - for i in ${(f)_addrs}; do - [[ $global_quit = 1 ]] && break - - _parsed=`print "From: $i" | ${WORKDIR}/bin/fetchaddr -a -x from` - _e="${_parsed[(ws:,:)1]:l}" - isemail "$_e" - [[ $? = 0 ]] || continue - # check if the email is not already parsed - [[ "${result[$_e]}" = "" ]] && { - _n="${_parsed[(ws:,:)2]}" - result+=("$_e" "$_n") - print - "$_n <$_e>" - } - done - - notice "Unique addresses found: ${#result}" - act "calculating known and new addresses..." - # counts which addresses are known to us - _known=0 - for i in ${(k)result}; do - [[ $global_quit = 1 ]] && break - - lookup_email ${i} - [[ $? = 0 ]] || { - _known=$(( $_known + 1 )) } - done - act "new addresses: $_known" - return 0 - } - - [[ "$_magic" =~ "vCard" ]] && { - # parse the vcard and print a simple name and email list - # each value on a single line, entry tuples followed by a # - # we skip entries that don't have an email - addresses=`awk ' -BEGIN { newcard=0; c=0; name=""; email=""; } -/^BEGIN:VCARD/ { newcard=1 } -/^FN:/ { if(newcard = 1) name=$0 } -/^EMAIL/ { if(newcard = 1) email=$0 } -/^END:VCARD/ { - if(newcard = 1) { - newcard=0 - if(email != "") { - c+=1 - print name - print email - print "# " c - } - email="" - next - } -} -' $arg | cut -d: -f2` - newa=1; _name=""; _email="" - for a in ${(f)addresses}; do - [[ $global_quit = 1 ]] && break - - [[ "${a[1]}" = "#" ]] && { - newa=1; # its the end of the entry - - # handle lines with multiple emails in vcard - for ee in ${=_email}; do - # check if we have this email already - _e=`print ${ee//\^M/} | extract_emails` - isemail "$_e" - [[ $? = 0 ]] || continue - # check if the email is not already parsed - [[ "${result[$_e]}" = "" ]] && { - _n="${_name//\^M/}" - result+=("$_e" "$_n") - print - "$_n <$_e>" - } - done - - continue - } - if [[ $newa = 1 ]]; then - # (V) makes special chars visible, we need to remove them.. - _name="${(V)a[(ws:^:)1]}"; newa=0; continue - elif [[ $newa = 0 ]]; then - _email="${(V)a[(ws:^:)1]}" - fi - - done - - notice "Unique addresses found: ${#result}" - act "calculating known and new addresses..." - # counts which addresses are known to us - _known=0 - for i in ${(k)result}; do - [[ $global_quit = 1 ]] && break - - lookup_email ${i} - [[ $? = 0 ]] || { - _known=$(( $_known + 1 )) } - done - act "new addresses: $_known" - return 0 - - } - - } # closes condition in which arg is a file - - # if no file is recognized, use string as search query - notice "Extracting addresses from search query: $PARAM" - - # run a search and list email files - nm_search ${=PARAM} | extract_mails -} - - # import address lists from stdin import() { @@ -751,4 +478,15 @@ set sort_field=name EOF ) --datafile "$ADDRESSBOOK" } + +# print out all addresses into the selected addressbook +list_abook() { + notice "Extracting all addresses in $list" + awk -F'=' ' +/^name/ { printf("%s ",$2) } +/^email/ { printf("<%s>\n",$2) } +' $ADDRESSBOOK + return 0 +} + ################### diff --git a/src/zlibs/helpers b/src/zlibs/helpers @@ -168,7 +168,7 @@ e_parse() { # check if an email address was found isemail "$_e" || continue # avoid duplicates - [[ "${(v)e_addr[$_e]}" = "" ]] || continue +# [[ "${(v)e_addr[$_e]}" = "" ]] || continue # extract also the name using comma separator _n="${(Q)_p[(ws:,:)2]}" @@ -331,17 +331,30 @@ is_online() { human_size() { - { test $1 -gt 0 } || { - error "human_size() called with zero argument" - return 1 } + [[ $1 -gt 0 ]] || { + error "human_size() called with invalid argument" + return 1 + } + + # we use the binary operation for speed + # shift right 10 is divide by 1024 + + # gigabytes + [[ $1 -gt 1073741824 ]] && { + print -n "$(( $1 >> 30 )) GB" + return 0 + } + # megabytes - { test $1 -gt 1048576 } && { - print -n "$(( $1 / 1024 / 1024 )) MB" - return 0} + [[ $1 -gt 1048576 ]] && { + print -n "$(( $1 >> 20 )) MB" + return 0 + } # kilobytes - { test $1 -gt 1024 } && { - print -n "$(( $1 / 1024 )) KB" - return 0} + [[ $1 -gt 1024 ]] && { + print -n "$(( $1 >> 10 )) KB" + return 0 + } # bytes print -n "$1 Bytes" return 0 @@ -352,27 +365,6 @@ human_size() { # downloads and/or installs certificates -# TODO: save fingeprints of servers and check connections -get-smtp-fingerprint() { -# dyne.org one: -# tls_fingerprint 6A:D4:DF:E4:20:32:F9:66:94:35:0C:33:9D:74:96:5C - - read_account ${account} - { test $? != 0 } && { - error "Invalid account: $account" - return 1 - } - # defaults - [[ -z $smtp_port ]] && { smtp_port=25 } - - print QUIT \ - | openssl s_client -starttls smtp \ - -connect $smtp:$smtp_port \ - -showcerts 2>/dev/null \ - | openssl x509 -fingerprint -md5 -noout \ - | awk -F '=' '/Fingerprint/ {print $2}' | sysread fingerprint - -} cert() { diff --git a/src/zlibs/parse b/src/zlibs/parse @@ -0,0 +1,343 @@ +#!/usr/bin/env zsh +# +# Jaro Mail, your humble and faithful electronic postman +# +# a tool to easily and privately handle your e-mail communication +# +# Copyleft (C) 2010-2015 Denis Roio <jaromil@dyne.org> +# +# This source code is free software; you can redistribute it and/or +# modify it under the terms of the GNU Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This source code is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# Please refer to the GNU Public License for more details. +# +# You should have received a copy of the GNU Public License along with +# this source code; if not, write to: +# Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + + +# extract all addresses found in a list of email files from stdin +extract_mails() { + _mails=`cat` + + _tot=`print $_mails | wc -l` + act "$_tot emails to parse" + + [[ $_tot -gt 100 ]] && { + act "operation will take a while, showing progress" + _prog=0 + c=0 + } + + # learn from senders, recipients or all + _action="$1" + + + # # -U eliminates duplicates + # typeset -aU _res + + _found=0 + for m in ${(f)_mails}; do + + # e_parse fills in e_addr(map) and e_parsed(newline term str) + hdr $m | e_parse $_action + for _e in ${(k)e_addr}; do + # _res+=("${(v)e_addr[$_e]} <$_e>") + print - "${(v)e_addr[$_e]} <$_e>" + _found=$(( $_found + 1 )) + done + + [[ $_tot -gt 100 ]] && { + c=$(( $c + 1 )) + [[ $c -gt 99 ]] && { + _prog=$(( $_prog + $c )) + act "$_prog / $_tot processed so far" + c=1 + } + } + done + # print out results + # for r in $_res; do + # print - $r + # done + notice "${_found} unique addresses extracted" +} + +# extract all addresses found into a maildir +extract_maildir() { + ## first arg is a directory + md="$1" + func "extract maildir: $md" + ## extract from a maildir + maildircheck "$md" || return 1 + + _action="$2" + case $_action in + all) ;; + recipient) ;; + sender) ;; + *) _action="all" ;; + esac + + # search files + _mails=`find $md -type f` + # search symlinks + _mails+=`find $md -type l` + + print - ${_mails} | extract_mails "$_action" + return 0 +} + +# Extract all entries found in stdin. Supports two formats (autodetected) +# 1) list of complete paths to filenames as returned by search +# 2) mbox format big file with special jaromail separator as produced by mutt tagging +extract_stdin() { + func "extract_stdin()" + + _in=`cat` + # take first line + for i in ${(f)_in}; do _first="$i"; break; done + + if [[ "${_first[(w)1]}" = "Date:" ]]; then + # is an email or stream of emails + + _headers=`print - $_in | awk ' +BEGIN { header=0 } +/JAROMAIL_PIPE_SEPARATOR/ { header=1; next } +/^$/ { header=0; print "\n" } +{ if(header==1) { print $0 } } +'` + + e_addr=() + _nextline=0 + _gotit="" + + for h in ${(f)_headers}; do + + [[ "${h[(w)1]}" = "Date:" ]] && _nextline=1 + [[ $_nextline = 1 ]] && _gotit+="$h\n" + [[ "${h[(w)1]}" = "Subject:" ]] && { + _nextline=0 + print - ${_gotit} | e_parse + + _gotit="" + } + done + + for i in ${(k)e_addr}; do + print - "${e_addr[$i]} <$i>" + done + + + elif [[ -r "$_first" ]]; then + notice "Parsing ${PARAM} emails addresses from stdin list of files" + # is a list of files + extract_mails ${PARAM} + _res=$? + else + error "Cannot process stream from stdin, unknown format" + return 1 + fi + return $_res +} + +# extract all entries in addressbook or all addresses in a pgp keyring +# or all signatures on a pgp key (even without importing it) +extract_auto() { + func "extract() $PARAM" + + # without arguments just list all entries in the active list + # default is whitelist + + arg=${PARAM[1]} + + func "extract() arg: $arg (param: $PARAM)" + + # no arg means parse from stdin + [[ "$arg" = "" ]] && { extract_stdin; return $? } + + [[ -r "$arg" ]] && { + # if first arg is a file, could be a maildir, a gpg keyring, + # a gpg pubkey or a vcard + + # if first arg is a directory then extract from maildir + [[ -d "$arg" ]] && { + notice "Extracting $2 addresses from maildir $1" + extract_maildir "$1" "$2" + return $? + } + + func "testing argument with file magic" + _magic=`file "$arg"` + + # a map to eliminate duplicates + typeset -AU result + + ######### GPG + # first arg is a GnuPG key ring + [[ "$_magic" =~ "GPG key public ring" ]] && { + notice "Extracting addresses found in GPG keyring: $arg" + _addrs=`gpg --list-keys --with-colons | awk -F: '{print $10}'` + for i in ${(f)_addrs}; do + [[ $global_quit = 1 ]] && break + _parsed=`print "From: $i" | ${WORKDIR}/bin/fetchaddr -a -x from` + _e="${_parsed[(ws:,:)1]:l}" + isemail "$_e" + [[ $? = 0 ]] || continue + # check if the email is not already parsed + [[ "${result[$_e]}" = "" ]] && { + _n="${_parsed[(ws:,:)2]}" + result+=("$_e" "$_n") + print - "$_n <$_e>" + } + done + + notice "Unique addresses found: ${#result}" + act "calculating known and new addresses..." + # counts which addresses are known to us + _known=0 + for i in ${(k)result}; do + [[ $global_quit = 1 ]] && break + + lookup_email ${i} + [[ $? = 0 ]] || { + _known=$(( $_known + 1 )) } + done + act "new addresses: $_known" + return 0 + } + + # first arg is a GnuPG public key + [[ "$_magic" =~ "PGP public key" ]] && { + notice "Extracting addresses from sigs on GPG key $arg" + _gpg="gpg --no-default-keyring --keyring $MAILDIRS/cache/pubkey.gpg --batch --with-colons" + ${=rm} $MAILDIRS/cache/pubkey.gpg + ${=_gpg} --import "$arg" + # first make sure all unknown keys are imported + _addrs=`${=_gpg} --list-sigs | awk -F: '{print $5 " " $10}'` + for i in ${(f)_addrs}; do + [[ $global_quit = 1 ]] && break + + [[ "$i" =~ "[User ID not found]" ]] && { + act "looking up: $i" + ${=_gpg} --recv-key ${i[(w)1]} + } + done + + _addrs=`${=_gpg} --list-sigs | awk -F: '{print $10}'` + for i in ${(f)_addrs}; do + [[ $global_quit = 1 ]] && break + + _parsed=`print "From: $i" | ${WORKDIR}/bin/fetchaddr -a -x from` + _e="${_parsed[(ws:,:)1]:l}" + isemail "$_e" + [[ $? = 0 ]] || continue + # check if the email is not already parsed + [[ "${result[$_e]}" = "" ]] && { + _n="${_parsed[(ws:,:)2]}" + result+=("$_e" "$_n") + print - "$_n <$_e>" + } + done + + notice "Unique addresses found: ${#result}" + act "calculating known and new addresses..." + # counts which addresses are known to us + _known=0 + for i in ${(k)result}; do + [[ $global_quit = 1 ]] && break + + lookup_email ${i} + [[ $? = 0 ]] || { + _known=$(( $_known + 1 )) } + done + act "new addresses: $_known" + return 0 + } + + [[ "$_magic" =~ "vCard" ]] && { + # parse the vcard and print a simple name and email list + # each value on a single line, entry tuples followed by a # + # we skip entries that don't have an email + addresses=`awk ' +BEGIN { newcard=0; c=0; name=""; email=""; } +/^BEGIN:VCARD/ { newcard=1 } +/^FN:/ { if(newcard = 1) name=$0 } +/^EMAIL/ { if(newcard = 1) email=$0 } +/^END:VCARD/ { + if(newcard = 1) { + newcard=0 + if(email != "") { + c+=1 + print name + print email + print "# " c + } + email="" + next + } +} +' $arg | cut -d: -f2` + newa=1; _name=""; _email="" + for a in ${(f)addresses}; do + [[ $global_quit = 1 ]] && break + + [[ "${a[1]}" = "#" ]] && { + newa=1; # its the end of the entry + + # handle lines with multiple emails in vcard + for ee in ${=_email}; do + # check if we have this email already + _e=`print ${ee//\^M/} | extract_emails` + isemail "$_e" + [[ $? = 0 ]] || continue + # check if the email is not already parsed + [[ "${result[$_e]}" = "" ]] && { + _n="${_name//\^M/}" + result+=("$_e" "$_n") + print - "$_n <$_e>" + } + done + + continue + } + if [[ $newa = 1 ]]; then + # (V) makes special chars visible, we need to remove them.. + _name="${(V)a[(ws:^:)1]}"; newa=0; continue + elif [[ $newa = 0 ]]; then + _email="${(V)a[(ws:^:)1]}" + fi + + done + + notice "Unique addresses found: ${#result}" + act "calculating known and new addresses..." + # counts which addresses are known to us + _known=0 + for i in ${(k)result}; do + [[ $global_quit = 1 ]] && break + + lookup_email ${i} + [[ $? = 0 ]] || { + _known=$(( $_known + 1 )) } + done + act "new addresses: $_known" + return 0 + + } + + } # closes condition in which arg is a file + + # if no file is recognized, use string as search query + notice "Extracting addresses from search query: $PARAM" + + # run a search and list email files + nm_search ${=PARAM} | extract_mails +} +