commit f959567ceb7d1c1d26c8e1d5b744316c3f70c986
parent bde69469efb18bdd8c90ded91f1eb7c49464f499
Author: Jaromil <jaromil@dyne.org>
Date: Fri, 15 Jun 2012 22:46:54 +0200
New search function using mairix
Diffstat:
78 files changed, 33685 insertions(+), 4 deletions(-)
diff --git a/build/build-osx.sh b/build/build-osx.sh
@@ -102,7 +102,7 @@ lipo mairix.32 mairix.64 -create -output mairix 2>&1 > /dev/null
cd -
cp src/fetchaddr build/osx/
-
+cp src/mairix/mairix build/osx/
copydeps bin/mutt
copydeps bin/mutt_dotlock
copydeps bin/msmtp
diff --git a/src/jaro b/src/jaro
@@ -583,7 +583,7 @@ autostart() {
mutt -F $MUTTDIR/rc "${1}"
return 0
}
- # or a path to folder
+ # or a path to folder
{ test -r ${1} } && {
mutt -F $MUTTDIR/rc -f ${1}
return 0
@@ -997,12 +997,12 @@ source $MUTTDIR/mboxes
# specific configuration files
source $MUTTDIR/crypto
-source $MUTTDIR/colors
source $MUTTDIR/general
source $MUTTDIR/formats
source $MUTTDIR/keybindings
source $MUTTDIR/identity
source $MUTTDIR/password
+source $MUTTDIR/colors
source $WORKDIR/Mutt.txt
## end of Jaro Mail generated muttrc
####################################
@@ -1319,7 +1319,48 @@ editor() {
return $?
}
-
+#######################
+## Search into maildirs
+# using mairix
+search() {
+ { which mairix > /dev/null } || { return 1 }
+ id=$RANDOM
+ rc=$WORKDIR/tmp/search.conf.$id
+
+ # make index if no params given
+ ml=""; c=0
+ for i in `ls $MAILDIRS`; do
+ # is it a maildir?
+ { test -r $MAILDIRS/${i}/cur } \
+ && { test -r $MAILDIRS/${i}/new } \
+ && { test -r $MAILDIRS/${i}/tmp } \
+ && { c=`expr $c + 1`; ml="$ml:$i" }
+ done
+ func "searching maildirs: $ml"
+ cat <<EOF > $rc
+base=$MAILDIRS
+database=$WORKDIR/search.db
+maildir=${ml}
+mfolder=$WORKDIR/tmp/search.result.$id
+mformat=maildir
+EOF
+ # just index
+ { test ${#PARAM} = 0 } && {
+ act "Indexing $c maildirs for search"
+ act "please be patient..."
+ mairix -F -f $rc
+ rm -f $rc
+ exitcode=$?
+ { test $exitcode = 0 } && { notice "Done." } || { error "Error, indexing aborted." }
+ return $exitcode
+ }
+ act "Searching maildirs for: $PARAM"
+ act -n ""
+ mairix -F -f $rc ${=PARAM} 2> /dev/null
+ { test $? = 0 } && { jaro -q $WORKDIR/tmp/search.result.$id }
+ rm -rf $WORKDIR/tmp/search.result.$id
+ rm -f $rc
+}
##############
## Open a File
open_file() {
@@ -1563,6 +1604,7 @@ main()
subcommands_opts[update]=""
subcommands_opts[stats]=""
+ subcommands_opts[search]=""
subcommands_opts[addr]=""
subcommands_opts[query]=""
subcommands_opts[learn]=""
@@ -1672,6 +1714,7 @@ main()
update) update ;;
+ search) CLEANEXIT=0; search ${PARAM} ;;
stats) CLEANEXIT=0; stats ;;
addr) CLEANEXIT=0; address ${PARAM} ;;
query) CLEANEXIT=0; query ${PARAM} ;;
diff --git a/src/mairix/ACKNOWLEDGEMENTS b/src/mairix/ACKNOWLEDGEMENTS
@@ -0,0 +1,60 @@
+These people have contributed useful patches, ideas and suggestions:
+
+Anand Kumria
+André Costa
+Andreas Amann
+Andre Costa
+Aredridel
+Balázs Szabó
+Bardur Arantsson
+Benj. Mako Hill
+Chris Mason
+Christoph Dworzak
+Christopher Rosado
+Chung-chieh Shan
+Claus Alboege
+Corrin Lakeland
+Dan Egnor
+Daniel Jacobowitz
+Dirk Huebner
+Ed Blackman
+Emil Sit
+Felipe Gustavo de Almeida
+Ico Doornekamp
+Jaime Velasco Juan
+James Leifer
+Jerry Jorgenson
+Joerg Desch
+Johannes Schindelin
+Johannes Weißl
+John Arthur Kane
+John Keener
+Jonathan Kamens
+Josh Purinton
+Karsten Petersen
+Kevin Rosenberg
+Mark Hills
+Martin Danielsson
+Matthias Teege
+Mikael Ylikoski
+Mika Fischer
+Oliver Braun
+Paramjit Oberoi
+Paul Fox
+Peter Chines
+Peter Jeremy
+Robert Hofer
+Roberto Boati
+Samuel Tardieu
+Sanjoy Mahajan
+Satyaki Das
+Steven Lumos
+Tim Harder
+Tom Doherty
+Vincent Lefevre
+Vladimir V. Kisil
+Will Yardley
+Wolfgang Weisselberg
+
+I apologise to any contributors who have been omitted from this list!
+
diff --git a/src/mairix/COPYING b/src/mairix/COPYING
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/src/mairix/INSTALL b/src/mairix/INSTALL
@@ -0,0 +1,22 @@
+Installation of mairix goes as follows:
+
+./configure
+make
+make install
+
+You need to be root to run the final step unless you're installing under your
+own home directory somewhere.
+
+However, you might want to tune the options further. The configure script
+shares its common options with the usual autoconf-generated scripts, even
+though it's not autoconf-generated itself. For example, a fuller build could
+use
+
+CC=gcc CFLAGS="-O2 -Wall" ./configure \
+ --prefix=/opt/mairix \
+ --infodir=/usr/share/info
+make
+make install
+
+The final step is to create a ~/.mairixrc file. An example is included in the
+file dotmairixrc.eg. Just copy that to ~/.mairixrc and edit it.
diff --git a/src/mairix/Makefile b/src/mairix/Makefile
@@ -0,0 +1,114 @@
+#########################################################################
+#
+# mairix - message index builder and finder for maildir folders.
+#
+# Copyright (C) Richard P. Curnow 2002-2004,2006
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# =======================================================================
+
+#########################################################################
+# Edit the following variables as required
+CC=gcc
+CFLAGS=-O2 -m64 -DHAS_STDINT_H -DHAS_INTTYPES_H -DUSE_GZIP_MBOX -DUSE_BZIP_MBOX
+CPPFLAGS=
+LDFLAGS=
+LIBS=-lz -lbz2
+
+#######################################################################
+# If you're generating a package, you may want to use
+# make DESTDIR=temporary_dir install
+# to get the software installed to a directory where you can create
+# a tdl.tar.gz from it
+DESTDIR=
+
+#######################################################################
+
+prefix=$(DESTDIR)/usr/local
+bindir=$(DESTDIR)/usr/local/bin
+mandir=$(DESTDIR)/usr/local/man
+man1dir=$(mandir)/man1
+man5dir=$(mandir)/man5
+infodir=$(DESTDIR)/usr/local/info
+docdir=$(DESTDIR)/usr/local/doc/mairix-0.23
+
+#########################################################################
+# Things below this point shouldn't need to be edited.
+
+OBJ = mairix.o db.o rfc822.o tok.o hash.o dirscan.o writer.o \
+ reader.o search.o stats.o dates.o datescan.o mbox.o md5.o \
+ fromcheck.o glob.o dumper.o expandstr.o dotlock.o \
+ nvp.o nvpscan.o
+
+all : mairix
+
+mairix : $(OBJ)
+ $(CC) -o mairix $(CFLAGS) $(LDFLAGS) $(OBJ) $(LIBS)
+
+%.o : %.c memmac.h mairix.h reader.h Makefile
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -o $@ $<
+
+datescan.c datescan.h : datescan.nfa ./dfasyn/dfasyn
+ ./dfasyn/dfasyn -o datescan.c -ho datescan.h -r datescan.report -u datescan.nfa
+
+fromcheck.c fromcheck.h : fromcheck.nfa ./dfasyn/dfasyn
+ ./dfasyn/dfasyn -o fromcheck.c -ho fromcheck.h -r fromcheck.report -u fromcheck.nfa
+
+nvpscan.c nvpscan.h : nvp.nfa ./dfasyn/dfasyn
+ ./dfasyn/dfasyn -o nvpscan.c -ho nvpscan.h -r nvpscan.report -u nvp.nfa
+
+dates.o : datescan.h
+mbox.o : fromcheck.h
+nvp.o : nvpscan.h
+
+version.h:
+ ./mkversion
+
+./dfasyn/dfasyn:
+ if [ -d dfasyn ]; then cd dfasyn ; $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)" ; else echo "No dfasyn subdirectory?" ; exit 1 ; fi
+
+clean:
+ -rm -f *~ *.o mairix *.s core
+ -rm -f mairix.cp mairix.fn mairix.aux mairix.log mairix.ky mairix.pg mairix.toc mairix.tp mairix.vr
+ -rm -f fromcheck.[ch] datescan.[ch]
+ -rm -f nvpscan.[ch]
+ if [ -d dfasyn ]; then cd dfasyn ; $(MAKE) clean ; fi
+ if [ -d test ]; then cd test ; $(MAKE) clean ; fi
+
+distclean: clean
+ if [ -d test ]; then cd test ; $(MAKE) distclean ; fi
+ -rm -f Makefile config.log
+
+install:
+ [ -d $(prefix) ] || mkdir -p $(prefix)
+ [ -d $(bindir) ] || mkdir -p $(bindir)
+ [ -d $(mandir) ] || mkdir -p $(mandir)
+ [ -d $(man1dir) ] || mkdir -p $(man1dir)
+ [ -d $(man5dir) ] || mkdir -p $(man5dir)
+ cp -f mairix $(bindir)
+ chmod 555 $(bindir)/mairix
+ cp -f mairix.1 $(man1dir)
+ chmod 444 $(man1dir)/mairix.1
+ cp -f mairixrc.5 $(man5dir)
+ chmod 444 $(man5dir)/mairixrc.5
+
+check: mairix
+ if [ -d test ]; then cd test ; $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)" check ; else echo "No test subdirectory?" ; exit 1 ; fi
+
+.PHONY : all install clean distclean check
+
+mairix.o : version.h
+
+
diff --git a/src/mairix/Makefile.in b/src/mairix/Makefile.in
@@ -0,0 +1,114 @@
+#########################################################################
+#
+# mairix - message index builder and finder for maildir folders.
+#
+# Copyright (C) Richard P. Curnow 2002-2004,2006
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# =======================================================================
+
+#########################################################################
+# Edit the following variables as required
+CC=@cc@
+CFLAGS=@cflags@ @defs@
+CPPFLAGS=@CPPFLAGS@
+LDFLAGS=@LDFLAGS@
+LIBS=@LIBS@
+
+#######################################################################
+# If you're generating a package, you may want to use
+# make DESTDIR=temporary_dir install
+# to get the software installed to a directory where you can create
+# a tdl.tar.gz from it
+DESTDIR=
+
+#######################################################################
+
+prefix=$(DESTDIR)@prefix@
+bindir=$(DESTDIR)@bindir@
+mandir=$(DESTDIR)@mandir@
+man1dir=$(mandir)/man1
+man5dir=$(mandir)/man5
+infodir=$(DESTDIR)@infodir@
+docdir=$(DESTDIR)@docdir@
+
+#########################################################################
+# Things below this point shouldn't need to be edited.
+
+OBJ = mairix.o db.o rfc822.o tok.o hash.o dirscan.o writer.o \
+ reader.o search.o stats.o dates.o datescan.o mbox.o md5.o \
+ fromcheck.o glob.o dumper.o expandstr.o dotlock.o \
+ nvp.o nvpscan.o
+
+all : mairix
+
+mairix : $(OBJ)
+ $(CC) -o mairix $(CFLAGS) $(LDFLAGS) $(OBJ) $(LIBS)
+
+%.o : %.c memmac.h mairix.h reader.h Makefile
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -o $@ $<
+
+datescan.c datescan.h : datescan.nfa ./dfasyn/dfasyn
+ ./dfasyn/dfasyn -o datescan.c -ho datescan.h -r datescan.report -u datescan.nfa
+
+fromcheck.c fromcheck.h : fromcheck.nfa ./dfasyn/dfasyn
+ ./dfasyn/dfasyn -o fromcheck.c -ho fromcheck.h -r fromcheck.report -u fromcheck.nfa
+
+nvpscan.c nvpscan.h : nvp.nfa ./dfasyn/dfasyn
+ ./dfasyn/dfasyn -o nvpscan.c -ho nvpscan.h -r nvpscan.report -u nvp.nfa
+
+dates.o : datescan.h
+mbox.o : fromcheck.h
+nvp.o : nvpscan.h
+
+version.h:
+ ./mkversion
+
+./dfasyn/dfasyn:
+ if [ -d dfasyn ]; then cd dfasyn ; $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)" ; else echo "No dfasyn subdirectory?" ; exit 1 ; fi
+
+clean:
+ -rm -f *~ *.o mairix *.s core
+ -rm -f mairix.cp mairix.fn mairix.aux mairix.log mairix.ky mairix.pg mairix.toc mairix.tp mairix.vr
+ -rm -f fromcheck.[ch] datescan.[ch]
+ -rm -f nvpscan.[ch]
+ if [ -d dfasyn ]; then cd dfasyn ; $(MAKE) clean ; fi
+ if [ -d test ]; then cd test ; $(MAKE) clean ; fi
+
+distclean: clean
+ if [ -d test ]; then cd test ; $(MAKE) distclean ; fi
+ -rm -f Makefile config.log
+
+install:
+ [ -d $(prefix) ] || mkdir -p $(prefix)
+ [ -d $(bindir) ] || mkdir -p $(bindir)
+ [ -d $(mandir) ] || mkdir -p $(mandir)
+ [ -d $(man1dir) ] || mkdir -p $(man1dir)
+ [ -d $(man5dir) ] || mkdir -p $(man5dir)
+ cp -f mairix $(bindir)
+ chmod 555 $(bindir)/mairix
+ cp -f mairix.1 $(man1dir)
+ chmod 444 $(man1dir)/mairix.1
+ cp -f mairixrc.5 $(man5dir)
+ chmod 444 $(man5dir)/mairixrc.5
+
+check: mairix
+ if [ -d test ]; then cd test ; $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)" check ; else echo "No test subdirectory?" ; exit 1 ; fi
+
+.PHONY : all install clean distclean check
+
+mairix.o : version.h
+
+
diff --git a/src/mairix/NEWS b/src/mairix/NEWS
@@ -0,0 +1,317 @@
+NEW IN VERSION 0.23
+===================
+* Allow '=' in message-id search for RFC2822 conformance
+* Add the option -H to force hardlinks
+* Skip .gitignore files
+* Do not interpret special characters [~,/=^] in Message-ID queries
+* Fix faultly mbox message separators
+* Improve reporting of unparsed MIME headers & remove code duplication
+* Allow empty sections in MIME headers
+* Add support for uuencoded attachments
+* Improve the parsing of MIME boundaries
+* Fix SEGV if mbox shrinks
+* Add test suite
+* Fix building in parallel
+
+NEW IN VERSION 0.22
+===================
+
+* Skip symlinks when using mbox (R A Lichtensteiger)
+* Update copyright year info throughout
+* Update ACKNOWLEDGEMENTS and copyright headers where more credit was due
+* Update FSF address in file headers
+* Update COPYING to latest gpl-2.0.txt
+* Improve error message if home directory cannot be determined
+* Honour HOME environment variable (Andreas Amann)
+* MIME types are allowed to have "+" characters in them. (Jonathan Kamens)
+* Fix deficiencies in the parsing of mbox From lines (Jonathan Kamens)
+* Include the existing -x flag in the help message (Mark Hills)
+* Fix documentation nits (Tom Doherty)
+* Remove spurious message when the mtime of a message file has changed
+* Do not export functions already exported through a callback structure. (Samuel Tardieu)
+* Fix two manpages buglets. (Samuel Tardieu)
+* When freeing a struct nvp, do not forget to free the struct nvp_entry. (Samuel Tardieu)
+* Do not leak memory if duplicate fields are present. (Samuel Tardieu)
+* Initialize the date header with a known value. (Samuel Tardieu)
+* Merge two conflicting solutions for bad MIME encoding
+* Fix segfault when last char is not a newline (Mika Fischer)
+* fix for MIME-related crash (Paramjit Oberoi)
+* Add support claws-mail (Anand Kumria)
+* Add MH sub-type support for ezmlm-archives (Claus Alboege)
+* Detect a trailing -f or -o with no following argument
+* Allow lines starting "From" to occur part-way through the header.o
+* Display message-ID in search -x mode
+* Remove execute permission from source files
+* Handle mbox from separators where email address is in angle brackets
+* Fix a bug in rfc822.c: Some headers weren't correctly parsed. (Jaime Velasco Juan)
+
+NEW IN VERSION 0.21
+===================
+
+* Fix make clean target in dfasyn/ (Benj. Mako Hill)
+* Limit number of messages that are examined when an end boundary is missing in
+ an mbox (Chung-chieh Shan)
+* Avoid examining . and .. when traversing MH folder hierarchy (Steven Lumos)
+* Fix various bugs in the name/value parser
+* Add some RFC2231 support to the name/value parser (continuations)
+* Fix indexing when existing database only contains 1 message
+
+NEW IN VERSION 0.20
+===================
+
+* Cache uncompressed mbox data (Chris Mason, further work by me)
+* Fix gaps in date ranges for search
+* Unlock database if mairix is interrupted (Paul Fox)
+* Add fast index option (-F)
+* Fix conditional compilation errors for compressed mbox
+* Reimplement MIME header parsing
+* Add capability to search on names of attachments
+* Add capability to search on state of message flags
+* Create maildir-format mfolder filenames correctly with regard to flags
+* Various bug fixes (Oliver Braun, Matthias Teege)
+
+
+NEW IN VERSION 0.19
+===================
+* mairix.spec fixes (André Costa)
+* bug fix: freeing of message structures (Karsten Petersen)
+* Add new -x (--excerpt-output) option, an alternative mode for searching.
+ This displays the key headers from the matching messages on stdout.
+* Add notes about the mairix-users mailing list and the SourceForge page to
+ README.
+* Fix configuration + compilation to allow building with gzip support but
+ without bzlib support.
+* Rename internal functions like zopen() to avoid name conflicts on MacOS X.
+ (Vincent Lefevre)
+* Remove a spurious ; in bison input file (Vincent Lefevre)
+* Improve output given in various error conditions (based on patch by Karsten
+ Petersen)
+
+NEW IN VERSION 0.18
+===================
+
+* Support bzip2'd mbox folders
+* Fix bugs in parsing mbox folders containing unquoted 'From ' lines inside
+ MIME body parts
+* Fix bug in parsing content-type data containing quotes with whitespace
+ before
+* Clone the message flags (when both the source folder and mfolder are both
+ of maildir type)
+* New manpages mairix.1 and mairixrc.5 are included, and the old texinfo-based
+ documentation is deprecated into the old_docs/ directory.
+* Upgrade scanners to new version of dfasyn
+* Support Mew's MH folder subtype
+
+
+NEW IN VERSION 0.17.1
+=====================
+
+* Fix detection of MH folder subtype used by nnml (Gnus)
+* Fix filename format generated in the /cur/ directory for maildir mfolders.
+* Syntax fix in configure script
+
+NEW IN VERSION 0.17
+===================
+
+* Support gzipped mbox folders (any file matched by a mbox= line in the config
+ file is considered as a gzipped mbox if its name ends in .gz)
+* Rework directory traversal for the '...' construct to speed up indexing and
+ the check that mfolder isn't going to overwrite a real folder when searching.
+* Check whether database exists before attempting to do searching.
+* Matched new maildir messages go in /new/ subdirectory of maildir mfolder.
+* Fix lots of compiler warnings generated by gcc4.x
+* Don't create and immediately scrub database entries for empty mbox folders.
+* Fix usage() info for bare word in searching
+* Allow '.' on the ends of numeric filenames in MH folders (to work
+ with Evolution)
+* Update .PHONY target so that 'make install' etc are more reliable.
+* Add X-source-folder header to indicate the original folder of a match found
+ in an mbox.
+* Migration to git for revision control.
+
+NEW IN VERSION 0.16.1
+=====================
+
+* Remove the lockfile if the program terminates for any reason.
+
+NEW IN VERSION 0.16
+===================
+
+* Home directory (~) and environment variable ($foo / ${foo}) expansion in the
+ .mairixrc file
+* Add -Q flag to skip database integrity checks during indexing (equivalently
+ the nochecks option in .mairixrc file). This speeds up indexing but loses
+ some robustness.
+* Add ^ word prefix to require substring search to be left-anchored
+* Split 'make clean' into separate clean and clean_docs
+* Improve some error messages
+* Add online help entries for -o and -d
+* Don't write out the database if there are no changes found during indexing.
+* Fix stale information about the 'and' and 'or' delimiters in the online help.
+* Add the capability to omit particular folders from indexing (omit keyword in
+ .mairixrc file.) This allows broad wildcards to be used with selected
+ folders removed from the wildcard which is much more convenient in many
+ set-ups.
+* Avoid writing matches to any folder on the list of folders to be indexed
+ (affects both mfolder option and argument of -o command line switch.) This
+ prevents disasterous loss of messages in the event of trying to overwrite an
+ wanted folder with the matches.
+* Implement dot-locking on the database file to prevent corruption due to
+ concurrent updates. Add --unlock file to forcibly remove a stray lockfile.
+* Display message path in warning messages from rfc822 parsing.
+
+NEW IN VERSION 0.15
+===================
+
+* Migrate to GNU Arch for hosting the development archive
+* In mbox parsing, handle return path in 'From ' line only being a local part
+ (reported by several people)
+* Don't output number of matched messages in raw mode (to make output more
+ useful to scripts etc) (Samuel Tardieu)
+* Fix vfolder->mfolder in dotmairixrc.eg (reported by several people)
+* Handle spaces in multipart message boundary strings (Chung-chieh Shan)
+* Be more tolerant of bad multipart message boundary separators (Chung-chieh
+ Shan)
+* Add rudimentary database dump command (-d/--dump)
+* Fix bug in handling of per-database hash key
+* Improve standards-compliance of maildir output file names (Jeff King)
+* Remove most compiler warnings
+
+NEW IN VERSION 0.14.1
+=====================
+
+* Bug fix : splitting of messages in mboxes was too strict regarding whitespace
+
+NEW IN VERSION 0.14
+===================
+
+* Fix error in path (p:) searching for messages in mboxes.
+* Improve usage() function
+
+NEW IN VERSION 0.13
+===================
+
+* Fixes to support the mbox format used by Mozilla mail
+* When creating vfolder directories for maildir/mh, remove existing
+ non-directory at the same path, if present. When creating mbox vfolder file,
+ complain if there's already a directory at the same path and exit.
+* Switch from the term "virtual folder" to "match folder"
+* Fix bug in path matches (p:) containing upper-case letters - previously they
+ matched on corresponding all lower-case paths.
+
+NEW IN VERSION 0.12
+===================
+
+! Change in database file format - existing databases need to be destroyed and
+ recreated.
+
+* Indexing of mbox folders in addition to the existing maildir & MH support
+* Output to mbox format vfolder
+* Return exit status 1 if no messages are matched in search mode, and exit
+ status 2 for all error conditions.
+* Allow wildcards to be used in specifying maildir and mh folder paths.
+* Searching on messages having a particular Message-ID (m:msgid expression in
+ search mode).
+* When indexing whole email addresses, '+' is now considered a valid character.
+* Use ',' instead of '+' in search expressions, and '/' instead of ','. This
+ is to allow '+' to be used inside email addresses that are being searched
+ for. The '/' character is traditionally associated with meaning 'or', so it
+ made more sense to move ',' to mean 'and'. (Unfortunately, there were very
+ few metacharacters left which don't have some special meaning to shells, and
+ I wanted to avoid the need to quote or escape the search expressions.)
+* Bug fix checking return status of mmap.
+* Handle ">From " at the start of the message headers
+* Handle mis-formatted encoding strings "7 bit" and "8 bit"
+* Make every database use a random seed for the token hash function (to prevent
+ denial of service attacks against mairix through carefully crafted messages.)
+* Rename some options in the mairixrc file, to put the folder formats on an
+ equal footing.
+* Properly handle the case where a maildir vfolder exists but one or more of
+ the new,tmp,cur subdirectories is missing.
+* Add configure script (not autoconf-based)
+
+NEW IN VERSION 0.11
+===================
+
+* Detect failed malloc (out of memory) conditions properly and report it.
+* Improved date specification syntax for d: option
+* Allow vfolder to be an absolute path or relative to current directory,
+ instead of just relative to base directory.
+
+NEW IN VERSION 0.10
+===================
+
+* Add 'raw' mode for searching.
+* When purging, only print the pass[12] message in verbose mode
+* Add an ACKNOWLEDGEMENTS file.
+* Hack to handle missing NAME_MAX on various non-Linux systems
+* Improve mairix.spec file for RPM building
+* Change default value for prefix in Makefile to make it more standard.
+
+NEW IN VERSION 0.9
+==================
+
+* Fix problem with auditing headers if a uucp/mbox-style "from " header is
+ present at the start.
+* Allow \: sequence in folder names to specify a :
+
+NEW IN VERSION 0.8
+==================
+
+* Fix bug : mairix used to crash if a message had corrupted RFC822 header lines
+
+NEW IN VERSION 0.7
+==================
+
+* Fix bug : mairix likely to crash if a non-existant folder is listed in the
+ conf file.
+* Allow multiple folders and mh_folders lines in the conf file for people who
+ have many separate folders.
+* Print an extra 'comfort' message in verbose mode before starting to scan the
+ directory tree.
+
+NEW IN VERSION 0.6
+==================
+
+* When an unrecognized encoding is found, ignore the body part instead of
+ aborting the run.
+
+NEW IN VERSION 0.5
+==================
+
+* When -a option is used for search, avoid symlinking the same message twice if
+ it matches more than one query.
+* Fixes to rpm spec file.
+* Fix handling of = in base64-encoded attachments.
+* Support non POSIX locales.
+* Support rfc2047 encoding in headers.
+* Create vfolder if it doesn't already exist.
+* Allow searching on complete email addresses as well as individual words in
+ to, cc and from fields.
+* New -o option to allow vfolder name to be given on the command line.
+
+NEW IN VERSION 0.4
+==================
+
+* Support for MH folders
+* Create database with mode 0600 instead of 0644 (better security).
+* Add Makefile target to install whichever forms of the documentation have been
+ built.
+
+NEW IN VERSION 0.3
+==================
+
+* Various bug fixes
+
+NEW IN VERSION 0.2
+==================
+
+* Substrings of message paths can be used as search expressions (p:substring
+ option)
+* = now used instead of / as the delimiter for number of errors in an
+ approximate match (to help with path search)
+* Bug fix when using -t mode for search with unpurged dead messages still in
+ the database.
+
+==================
+# vim:comments-=mb\:*:comments+=fb\:*
diff --git a/src/mairix/README b/src/mairix/README
@@ -0,0 +1,63 @@
+mairix is a program for indexing and searching email messages stored in
+Maildir, MH or mbox folders.
+
+* Indexing is fast. It runs incrementally on new messages - any particular
+ message only gets scanned once in the lifetime of the index file.
+
+* The search mode populates a "virtual" folder with symlinks(*) which
+ point to the real messages. This folder can be opened as usual in your mail
+ program.
+
+* The search mode is very fast.
+
+* Indexing and searching works on the basis of words. The index file tabulates
+ which words occur in which parts (particular headers + body) of which
+ messages.
+
+The program is a very useful complement to mail programs like mutt
+(http://www.mutt.org/, which supports Maildir, MH and mbox folders) and
+Sylpheed (which supports MH folders).
+
+[(*) where the input or output folder is an mbox, a copy of the message is made
+instead of symlinking.]
+
+See also the mairix.txt file.
+
+*********************************************************************
+ Copyright (C) Richard P. Curnow 2002-2004
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of version 2 of the GNU General Public License as
+ published by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+*********************************************************************
+
+Suggestions, bug reports, experiences, praise, complaints etc to the author
+please, at <rc@rc0.org.uk>
+
+Since July 2006, there is a mairix-users mailing list. To subscribe or to view
+the archives, visit
+
+ https://lists.sourceforge.net/lists/listinfo/mairix-users
+
+The main website for mairix is
+
+ http://www.rc0.org.uk/mairix
+
+The SourceForge project page is
+
+ http://www.sf.net/projects/mairix
+
+ACKNOWLEDGEMENTS
+================
+
+See the ACKNOWLEDGEMENTS file
diff --git a/src/mairix/config.log b/src/mairix/config.log
@@ -0,0 +1,14 @@
+Test program is
+#include <zlib.h>
+int main () {
+ const char *foo;
+ foo = zlibVersion();
+ return 0;
+}
+Test program is
+#include <bzlib.h>
+int main () {
+ const char *foo;
+ foo = BZ2_bzlibVersion();
+ return 0;
+}
diff --git a/src/mairix/configure b/src/mairix/configure
@@ -0,0 +1,337 @@
+#!/bin/sh
+#########################################################################
+#
+# mairix - message index builder and finder for maildir folders.
+#
+# Copyright (C) Richard P. Curnow 2003,2004,2005
+# Copyright (C) Paramjit Oberoi 2005
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# =======================================================================
+
+if [ -f config.log ]; then rm -f config.log ; fi
+exec 5>config.log
+
+MYCC=${CC:-gcc}
+MYCFLAGS=${CFLAGS:--O2 -Wall}
+MYCPPFLAGS=${CPPFLAGS:-}
+MYLDFLAGS=${LDFLAGS:-}
+
+# =======================================================================
+# Functions
+
+#{{{ cleanup
+cleanup () {
+ if [ -f docheck.c ]; then rm -f docheck.c ; fi
+ if [ -f docheck.o ]; then rm -f docheck.o ; fi
+ if [ -f docheck ]; then rm -f docheck ; fi
+ rm -rf docheck.c docheck.o docheck
+}
+#}}}
+
+#{{{ test_cc : basic compiler sanity check
+test_cc () {
+ printf "Testing whether your compiler \"$MYCC $MYCFLAGS\" works : "
+ cat >docheck.c <<EOF;
+#include <stdio.h>
+int main (int argc, char **argv)
+{
+ return 0;
+}
+EOF
+ ${MYCC} ${MYCFLAGS} -o docheck docheck.c 1>&5 2>&5
+ if [ $? -eq 0 ]
+ then
+ printf "it works\n"
+ else
+ printf "it doesn't work\n"
+ printf "Failed program was\n" 1>&5
+ cat docheck.c 1>&5
+ rm -f docheck.c docheck
+ exit 1
+ fi
+ cleanup
+}
+#}}}
+
+#{{{ test_for_stdint_h
+test_for_stdint_h () {
+ cat >docheck.c <<EOF;
+#include <stdint.h>
+int main(int argc, char **argv) {
+ return 0;
+}
+EOF
+
+ ${MYCC} ${MYCFLAGS} -c -o docheck.o docheck.c >/dev/null 2>&1
+ if [ $? -eq 0 ]
+ then
+ result=0
+ else
+ result=1
+ fi
+
+ rm -f docheck.c docheck.o
+ echo $result
+}
+#}}}
+#{{{ test_for_inttypes_h
+test_for_inttypes_h () {
+ cat >docheck.c <<EOF;
+#include <inttypes.h>
+int main(int argc, char **argv) {
+ return 0;
+}
+EOF
+
+ ${MYCC} ${MYCFLAGS} -c -o docheck.o docheck.c >/dev/null 2>&1
+ if [ $? -eq 0 ]
+ then
+ result=0
+ else
+ result=1
+ fi
+
+ rm -f docheck.c docheck.o
+ echo $result
+}
+#}}}
+#{{{ test_for_zlib
+test_for_zlib () {
+ cat > docheck.c <<EOF;
+#include <zlib.h>
+int main () {
+ const char *foo;
+ foo = zlibVersion();
+ return 0;
+}
+EOF
+ echo "Test program is" 1>&5
+ cat docheck.c 1>&5
+ ${MYCC} ${MYCPPFLAGS} ${MYCFLAGS} ${MYLDFLAGS} -o docheck docheck.c -lz 1>&5 2>&1
+ if [ $? -eq 0 ]
+ then
+ result=0
+ else
+ result=1
+ fi
+ rm -f docheck.c docheck
+ echo $result
+}
+#}}}
+#{{{ test_for_bzlib
+test_for_bzlib () {
+ cat > docheck.c <<EOF;
+#include <bzlib.h>
+int main () {
+ const char *foo;
+ foo = BZ2_bzlibVersion();
+ return 0;
+}
+EOF
+ echo "Test program is" 1>&5
+ cat docheck.c 1>&5
+ ${MYCC} ${MYCPPFLAGS} ${MYCFLAGS} ${MYLDFLAGS} -o docheck docheck.c -lbz2 1>&5 2>&1
+ if [ $? -eq 0 ]
+ then
+ result=0
+ else
+ result=1
+ fi
+ rm -f docheck.c docheck
+ echo $result
+}
+#}}}
+#{{{ usage
+usage () {
+ cat <<EOF;
+\`configure' configures tdl to adapt to many kinds of systems.
+
+Usage: ./configure [OPTION]...
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help display this help and exit
+
+Installation directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [/usr/local]
+
+By default, \`make install' will install all the files in
+\`/usr/local/bin', \`/usr/local/lib' etc. You can specify
+an installation prefix other than \`/usr/local' using \`--prefix',
+for instance \`--prefix=$HOME'.
+
+Fine tuning of the installation directories:
+ --bindir=DIR user executables [EPREFIX/bin]
+ --infodir=DIR info documentation [PREFIX/info]
+ --mandir=DIR man documentation [PREFIX/man]
+ --docdir=DIR other documentation [PREFIX/doc/mairix-\$version]
+
+Other options:
+ --enable-gzip-mbox attempt to support gzipped mboxes (requires zlib)
+ --disable-gzip-mbox don't attempt to support gzipped mboxes
+ --enable-bzip-mbox attempt to support bzip2ed mboxes (requires bzlib)
+ --disable-bzip-mbox don't attempt to support bzip2ed mboxes
+
+Some influential environment variables:
+ CC C compiler command
+ CFLAGS C compiler flags
+ CPPFLAGS Extra C preprocessor flags, e.g. -I<include dir> if you
+ have header files in a nonstandard directory <include dir>
+ LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
+ nonstandard directory <lib dir>
+
+Use these variables to override the choices made by \`configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to <rc@rc0.org.uk>.
+EOF
+}
+#}}}
+# =======================================================================
+
+# Defaults for variables
+PREFIX=/usr/local
+
+use_readline=yes
+bad_options=no
+use_gzip_mbox=yes
+use_bzip_mbox=yes
+
+# Parse options to configure
+for option
+do
+ case "$option" in
+
+ --prefix=* | --install-prefix=* )
+ PREFIX=`echo $option | sed -e 's/[^=]*=//;'`
+ ;;
+ --bindir=* )
+ BINDIR=`echo $option | sed -e 's/[^=]*=//;'`
+ ;;
+ --mandir=* )
+ MANDIR=`echo $option | sed -e 's/[^=]*=//;'`
+ ;;
+ --infodir=* )
+ INFODIR=`echo $option | sed -e 's/[^=]*=//;'`
+ ;;
+ --docdir=* )
+ DOCDIR=`echo $option | sed -e 's/[^=]*=//;'`
+ ;;
+ --enable-gzip-mbox )
+ use_gzip_mbox=yes
+ ;;
+ --disable-gzip-mbox )
+ use_gzip_mbox=no
+ ;;
+ --enable-bzip-mbox )
+ use_bzip_mbox=yes
+ ;;
+ --disable-bzip-mbox )
+ use_bzip_mbox=no
+ ;;
+ -h | --help )
+ usage
+ exit 1
+ ;;
+ * )
+ printf "Unrecognized option : $option\n"
+ bad_options=yes
+ ;;
+ esac
+done
+
+if [ ${bad_options} = yes ]; then
+ exit 1
+fi
+
+DEFS=""
+test_cc
+
+printf "Checking for <stdint.h> : "
+if [ `test_for_stdint_h` -eq 0 ]; then
+ printf "Yes\n"
+ DEFS="${DEFS} -DHAS_STDINT_H"
+else
+ printf "No\n"
+fi
+
+printf "Checking for <inttypes.h> : "
+if [ `test_for_inttypes_h` -eq 0 ]; then
+ printf "Yes\n"
+ DEFS="${DEFS} -DHAS_INTTYPES_H"
+else
+ printf "No\n"
+fi
+
+if [ $use_gzip_mbox = "yes" ]; then
+ printf "Checking for zlib : "
+ if [ `test_for_zlib` -eq 0 ]; then
+ printf "Yes\n";
+ DEFS="${DEFS} -DUSE_GZIP_MBOX"
+ LIBS="-lz"
+ else
+ printf "No (disabled gzipped mbox support)\n";
+ fi
+fi
+
+if [ $use_bzip_mbox = "yes" ]; then
+ printf "Checking for bzlib : "
+ if [ `test_for_bzlib` -eq 0 ]; then
+ printf "Yes\n";
+ DEFS="${DEFS} -DUSE_BZIP_MBOX"
+ LIBS="${LIBS} -lbz2"
+ else
+ printf "No (disabled bzip2ed mbox support)\n";
+ fi
+fi
+
+#{{{ Determine version number of the program.
+if [ -f version.txt ]; then
+ revision=`cat version.txt`
+else
+ revision="DEVELOPMENT"
+fi
+
+#}}}
+if [ "x" = "x${BINDIR}" ]; then BINDIR=${PREFIX}/bin ; fi
+if [ "x" = "x${MANDIR}" ]; then MANDIR=${PREFIX}/man ; fi
+if [ "x" = "x${INFODIR}" ]; then INFODIR=${PREFIX}/info ; fi
+if [ "x" = "x${DOCDIR}" ]; then DOCDIR=${PREFIX}/doc/mairix-${revision} ; fi
+
+echo "Generating Makefile"
+
+rm -f Makefile
+sed -e "s%@cc@%${MYCC}%; \
+ s%@defs@%${DEFS}%; \
+ s%@cflags@%${MYCFLAGS}%; \
+ s%@prefix@%${PREFIX}%; \
+ s%@bindir@%${BINDIR}%; \
+ s%@mandir@%${MANDIR}%; \
+ s%@infodir@%${INFODIR}%; \
+ s%@docdir@%${DOCDIR}%; \
+ s%@LIBS@%${LIBS}%; \
+ s%@CPPFLAGS@%${MYCPPFLAGS}%; \
+ s%@LDFLAGS@%${MYLDFLAGS}%; \
+ " < Makefile.in > Makefile
+
+# Avoid editing Makefile instead of Makefile.in
+chmod ugo-w Makefile
+
+# =======================================================================
+# vim:et:sw=2:ht=2:sts=2:fdm=marker:cms=#%s
+
diff --git a/src/mairix/dates.c b/src/mairix/dates.c
@@ -0,0 +1,404 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002-2004,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+#include <assert.h>
+#include "mairix.h"
+#include "dates.h"
+#include "datescan.h"
+
+static enum DATESCAN_TYPE discover_type(char *first, char *last)/*{{{*/
+{
+ int current_state = 0;
+ int token;
+ char *p;
+ p = first;
+ while (p < last) {
+ token = datescan_char2tok[(int)*(unsigned char*)p];
+ current_state = datescan_next_state(current_state, token);
+ if (current_state < 0) break;
+ p++;
+ }
+
+ if (current_state < 0) {
+ return DS_FAILURE;
+ } else {
+ return datescan_attr[current_state];
+ }
+}
+/*}}}*/
+static int match_month(char *p)/*{{{*/
+{
+ if (!strncasecmp(p, "jan", 3)) return 1;
+ if (!strncasecmp(p, "feb", 3)) return 2;
+ if (!strncasecmp(p, "mar", 3)) return 3;
+ if (!strncasecmp(p, "apr", 3)) return 4;
+ if (!strncasecmp(p, "may", 3)) return 5;
+ if (!strncasecmp(p, "jun", 3)) return 6;
+ if (!strncasecmp(p, "jul", 3)) return 7;
+ if (!strncasecmp(p, "aug", 3)) return 8;
+ if (!strncasecmp(p, "sep", 3)) return 9;
+ if (!strncasecmp(p, "oct", 3)) return 10;
+ if (!strncasecmp(p, "nov", 3)) return 11;
+ if (!strncasecmp(p, "dec", 3)) return 12;
+ return 0;
+}
+/*}}}*/
+static int year_fix(int y)/*{{{*/
+{
+ if (y>100) {
+ return y-1900;
+ } else if (y < 70) {
+ /* 2000-2069 */
+ return y+100;
+ } else {
+ /* 1970-1999 */
+ return y;
+ }
+}
+/*}}}*/
+static int last_day(int mon, int y) {/*{{{*/
+ /* mon in [0,11], y=year-1900 */
+
+ static unsigned char days[12] = {31,28,31,30,31,30,31,31,30,31,30,31};
+ if (mon != 1) {
+ return days[mon];
+ } else {
+ /* Because 2000 was a leap year, we don't have to bother about the %100
+ * rule, at least not in this range of dates. */
+ if ((y % 4) == 0) {
+ return 29;
+ } else {
+ return 28;
+ }
+ }
+}
+/*}}}*/
+static void set_day(struct tm *x, int y)/*{{{*/
+{
+ if (y > x->tm_mday) {
+ /* Shorthand for that day in previous month */
+ if (x->tm_mon == 0) {
+ x->tm_mon = 11;
+ --x->tm_year;
+ } else {
+ --x->tm_mon;
+ }
+ }
+ x->tm_mday = y; /* Always */
+}
+/*}}}*/
+static int is_later_dm(struct tm *x, int m, int d)/*{{{*/
+{
+ int m1 = m-1;
+ return ((x->tm_mon < m1) || ((x->tm_mon == m1) && (x->tm_mday < d)));
+}
+/*}}}*/
+static int scan_date_expr(char *first, char *last, struct tm *start, struct tm *end)/*{{{*/
+{
+ enum DATESCAN_TYPE type;
+ time_t now;
+
+ time(&now);
+ type = discover_type(first, last);
+
+ if (type == DS_SCALED) {/*{{{*/
+ int v;
+ char *p;
+ time_t then;
+
+ p = first;
+ v = 0;
+ while (isdigit(*p)) {
+ v = (v*10) + (*p - '0');
+ p++;
+ }
+ switch(*p) {
+ case 'd': v *= 86400; break;
+ case 'w': v *= 7*86400; break;
+ case 'm': v *= 30*86400; break;
+ case 'y': v *= 365*86400; break;
+ default:
+ fprintf(stderr, "Unrecognized relative date scaling '%c'\n", *p);
+ return -1;
+ }
+ then = now - v;
+ if (start) {
+ *start = *localtime(&then);
+ }
+ if (end) {
+ *end = *localtime(&then);
+ }/*}}}*/
+ } else if (type == DS_FAILURE) {
+ fputs("Cannot parse date expression [", stderr);
+ fwrite(first, sizeof(char), last-first, stderr);
+ fputs("]\n", stderr);
+ return -1;
+ } else {
+ /* something else */
+ int v1, v3;
+ int m2; /* decoded month */
+ char *p;
+
+ v1 = v3 = m2 = 0;
+ p = first;
+ while (p < last && isdigit(*p)) {
+ v1 = (v1*10) + (*p - '0');
+ p++;
+ }
+ if (p < last) {
+ m2 = match_month(p);
+ p += 3;
+ if (m2 == 0) {
+ return -1; /* failure */
+ }
+
+ }
+ while (p < last && isdigit(*p)) {
+ v3 = (v3*10) + (*p - '0');
+ p++;
+ }
+ assert(p==last); /* should be true in all cases. */
+
+ switch (type) {
+ case DS_D:/*{{{*/
+ if (start) set_day(start, v1);
+ if (end) set_day(end, v1);
+ break;
+/*}}}*/
+ case DS_Y:/*{{{*/
+ if (start) {
+ start->tm_mday = 1;
+ start->tm_mon = 0; /* january */
+ start->tm_year = year_fix(v1);
+ }
+ if (end) {
+ end->tm_mday = 31;
+ end->tm_mon = 11;
+ end->tm_year = year_fix(v1);
+ }
+ break;
+/*}}}*/
+ case DS_YYMMDD:/*{{{*/
+ if (start) {
+ start->tm_mday = v1 % 100;
+ start->tm_mon = ((v1 / 100) % 100) - 1;
+ start->tm_year = year_fix(v1/10000);
+ }
+ if (end) {
+ end->tm_mday = v1 % 100;
+ end->tm_mon = ((v1 / 100) % 100) - 1;
+ end->tm_year = year_fix(v1/10000);
+ }
+ break;
+/*}}}*/
+ case DS_M:/*{{{*/
+ if (start) {
+ if (m2-1 > start->tm_mon) --start->tm_year; /* shorthand for previous year */
+ start->tm_mon = m2-1;
+ start->tm_mday = 1;
+ }
+ if (end) {
+ if (m2-1 > end->tm_mon) --end->tm_year; /* shorthand for previous year */
+ end->tm_mon = m2-1;
+ end->tm_mday = last_day(m2-1, end->tm_year);
+ }
+ break;
+/*}}}*/
+ case DS_DM:/*{{{*/
+ if (start) {
+ if (is_later_dm(start, m2, v1)) --start->tm_year; /* shorthand for previous year. */
+ start->tm_mon = m2-1;
+ start->tm_mday = v1;
+ }
+ if (end) {
+ if (is_later_dm(end, m2, v1)) --end->tm_year; /* shorthand for previous year. */
+ end->tm_mon = m2-1;
+ end->tm_mday = v1;
+ }
+ break;
+/*}}}*/
+ case DS_MD:/*{{{*/
+ if (start) {
+ if (is_later_dm(start, m2, v3)) --start->tm_year; /* shorthand for previous year. */
+ start->tm_mon = m2-1;
+ start->tm_mday = v3;
+ }
+ if (end) {
+ if (is_later_dm(end, m2, v3)) --end->tm_year; /* shorthand for previous year. */
+ end->tm_mon = m2-1;
+ end->tm_mday = v3;
+ }
+ break;
+/*}}}*/
+ case DS_DMY:/*{{{*/
+ if (start) {
+ start->tm_mon = m2-1;
+ start->tm_mday = v1;
+ start->tm_year = year_fix(v3);
+ }
+ if (end) {
+ end->tm_mon = m2-1;
+ end->tm_mday = v1;
+ end->tm_year = year_fix(v3);
+ }
+ break;
+/*}}}*/
+ case DS_YMD:/*{{{*/
+ if (start) {
+ start->tm_mon = m2-1;
+ start->tm_mday = v3;
+ start->tm_year = year_fix(v1);
+ }
+ if (end) {
+ end->tm_mon = m2-1;
+ end->tm_mday = v3;
+ end->tm_year = year_fix(v1);
+ }
+ break;
+/*}}}*/
+ case DS_MY:/*{{{*/
+ if (start) {
+ start->tm_year = year_fix(v3);
+ start->tm_mon = m2 - 1;
+ start->tm_mday = 1;
+ }
+ if (end) {
+ end->tm_year = year_fix(v3);
+ end->tm_mon = m2 - 1;
+ end->tm_mday = last_day(end->tm_mon, end->tm_year);
+ }
+ break;
+/*}}}*/
+ case DS_YM:/*{{{*/
+ if (start) {
+ start->tm_year = year_fix(v1);
+ start->tm_mon = m2 - 1;
+ start->tm_mday = 1;
+ }
+ if (end) {
+ end->tm_year = year_fix(v1);
+ end->tm_mon = m2 - 1;
+ end->tm_mday = last_day(end->tm_mon, end->tm_year);
+ }
+ break;/*}}}*/
+ case DS_FAILURE:
+ return -1;
+ break;
+
+ case DS_SCALED:
+ assert(0);
+ break;
+
+ }
+ }
+ return 0;
+}
+/*}}}*/
+
+int scan_date_string(char *in, time_t *start, int *has_start, time_t *end, int *has_end)/*{{{*/
+{
+ char *hyphen;
+ time_t now;
+ struct tm start_tm, end_tm;
+ char *nullchar;
+ int status;
+
+ *has_start = *has_end = 0;
+
+ nullchar = in;
+ while (*nullchar) nullchar++;
+
+ time(&now);
+ start_tm = end_tm = *localtime(&now);
+ start_tm.tm_hour = 0;
+ start_tm.tm_min = 0;
+ start_tm.tm_sec = 0;
+ end_tm.tm_hour = 23;
+ end_tm.tm_min = 59;
+ end_tm.tm_sec = 59;
+
+ hyphen = strchr(in, '-');
+ if (!hyphen) {
+ /* Start and end are the same. */
+ *has_start = *has_end = 1;
+ status = scan_date_expr(in, nullchar, &start_tm, &end_tm);
+ if (status) return status;
+ *start = mktime(&start_tm);
+ *end = mktime(&end_tm);
+ return 0;
+ } else {
+ if (hyphen+1 < nullchar) {
+ *has_end = 1;
+ status = scan_date_expr(hyphen+1, nullchar, NULL, &end_tm);
+ if (status) return status;
+ *end = mktime(&end_tm);
+ start_tm = end_tm;
+ }
+ if (hyphen > in) {
+ *has_start = 1;
+ status = scan_date_expr(in, hyphen, &start_tm, NULL);
+ if (status) return status;
+ *start = mktime(&start_tm);
+ }
+ }
+ return 0;
+}
+/*}}}*/
+
+#ifdef TEST
+static void check(char *in)/*{{{*/
+{
+ struct tm start, end;
+ int result;
+ result = scan_date_string(in, &start, &end);
+ if (result) printf("Conversion for <%s> failed\n", in);
+ else {
+ char buf1[128], buf2[128];
+ strftime(buf1, 128, "%d-%b-%Y", &start);
+ strftime(buf2, 128, "%d-%b-%Y", &end);
+ printf("Computed range for <%s> : %s - %s\n", in, buf1, buf2);
+ }
+
+}
+/*}}}*/
+int main (int argc, char **argv)/*{{{*/
+{
+
+ check("2w-1w");
+ check("4m-1w");
+ check("2002-2003");
+ check("may2002-2003");
+ check("2002may-2003");
+ check("feb98-15may99");
+ check("feb98-15may1999");
+ check("2feb98-1y");
+ check("02feb98-1y");
+ check("970617-20010618");
+
+ return 0;
+}
+/*}}}*/
+#endif
diff --git a/src/mairix/dates.h b/src/mairix/dates.h
@@ -0,0 +1,45 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002-2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#ifndef DATES_H
+#define DATES_H
+
+enum DATESCAN_TYPE {
+ DS_FAILURE,
+ DS_D,
+ DS_Y,
+ DS_YYMMDD,
+ DS_SCALED,
+ DS_M,
+ DS_DM,
+ DS_MD,
+ DS_YM,
+ DS_MY,
+ DS_YMD,
+ DS_DMY,
+};
+
+extern int datescan_next_state(int current_state, int next_token);
+extern enum DATESCAN_TYPE datescan_exitval[];
+
+
+#endif /* DATES_H */
diff --git a/src/mairix/datescan.nfa b/src/mairix/datescan.nfa
@@ -0,0 +1,112 @@
+#########################################################################
+#
+# mairix - message index builder and finder for maildir folders.
+#
+# Copyright (C) Richard P. Curnow 2002-2004,2006
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# =======================================================================
+
+# NFA description for parsing dates
+
+# Stuff to pass through verbatim
+%{
+#include "dates.h"
+%}
+
+Abbrev A = [a-zA-Z]
+
+BLOCK day {
+ State in
+ [12] ; [0-9] -> out
+ [3] ; [01] -> out
+}
+
+# Match 2 digit year
+BLOCK year {
+ State in
+ [04-9] ; [0-9] -> out
+ [3] ; [2-9] -> out
+}
+
+BLOCK month {
+ State in
+ A ; A ; A -> out
+}
+
+BLOCK scaled {
+ State in
+ [0-9] -> in, after_value
+
+ State after_value
+ A -> out
+}
+
+BLOCK ccyy {
+ State in
+ [1-9] ; [0-9] ; [0-9] ; [0-9] -> out
+}
+
+BLOCK main {
+ State in
+ [1-9] = DS_D
+ <day:in->out> = DS_D
+ <year:in->out> = DS_Y
+ <ccyy:in->out> = DS_Y
+ [0-9] ; [0-9] ; [0-9] ; [0-9] ; [0-9] ; [0-9] = DS_YYMMDD
+ [0-9] ; [0-9] ; [0-9] ; [0-9] ;
+ [0-9] ; [0-9] ; [0-9] ; [0-9] = DS_YYMMDD
+ <scaled:in->out> = DS_SCALED
+ <month:in->out> = DS_M
+ [1-9] ; <month:in->out> = DS_DM
+ <day:in->out> ; <month:in->out> = DS_DM
+ <month:in->out> ; [1-9] = DS_MD
+ <month:in->out> ; <day:in->out> = DS_MD
+ <year:in->out> ; <month:in->out> = DS_YM
+ <month:in->out> ; <year:in->out> = DS_MY
+ <ccyy:in->out> ; <month:in->out> = DS_YM
+ <month:in->out> ; <ccyy:in->out> = DS_MY
+
+ <year:in->out> ; <month:in->out> ; [1-9] = DS_YMD
+ <year:in->out> ; <month:in->out> ; <day:in->out> = DS_YMD
+ [1-9] ; <month:in->out> ; <year:in->out> = DS_DMY
+ <day:in->out> ; <month:in->out> ; <year:in->out> = DS_DMY
+
+ <ccyy:in->out> ; <month:in->out> ; [1-9] = DS_YMD
+ <ccyy:in->out> ; <month:in->out> ; <day:in->out> = DS_YMD
+ [1-9] ; <month:in->out> ; <ccyy:in->out> = DS_DMY
+ <day:in->out> ; <month:in->out> ; <ccyy:in->out> = DS_DMY
+}
+
+ATTR DS_D
+ATTR DS_Y
+ATTR DS_YYMMDD
+ATTR DS_SCALED
+ATTR DS_M
+ATTR DS_DM
+ATTR DS_MD
+ATTR DS_YM
+ATTR DS_MY
+ATTR DS_YMD
+ATTR DS_DMY
+
+DEFATTR DS_FAILURE
+
+TYPE "enum DATESCAN_TYPE"
+PREFIX datescan
+
+
+# vim:ft=txt:et:sw=4:sts=4:ht=4
+
diff --git a/src/mairix/datescan.report b/src/mairix/datescan.report
@@ -0,0 +1,3303 @@
+NFA state 0 = in
+ [(epsilon)] -> day#37.in
+ 1:[1] -> #55
+ 4:[4-9] -> #55
+ 3:[3] -> #55
+ 2:[2] -> #55
+ [(epsilon)] -> ccyy#32.in
+ [(epsilon)] -> ccyy#30.in
+ [(epsilon)] -> day#27.in
+ 1:[1] -> #43
+ 4:[4-9] -> #43
+ 3:[3] -> #43
+ 2:[2] -> #43
+ [(epsilon)] -> year#22.in
+ [(epsilon)] -> year#20.in
+ [(epsilon)] -> month#18.in
+ [(epsilon)] -> ccyy#16.in
+ [(epsilon)] -> month#14.in
+ [(epsilon)] -> year#12.in
+ [(epsilon)] -> month#10.in
+ [(epsilon)] -> month#9.in
+ [(epsilon)] -> day#7.in
+ 1:[1] -> #21
+ 4:[4-9] -> #21
+ 3:[3] -> #21
+ 2:[2] -> #21
+ [(epsilon)] -> month#5.in
+ [(epsilon)] -> scaled#4.in
+ 0:[0] -> #11
+ 4:[4-9] -> #11
+ 3:[3] -> #11
+ 2:[2] -> #11
+ 1:[1] -> #11
+ 0:[0] -> #5
+ 4:[4-9] -> #5
+ 3:[3] -> #5
+ 2:[2] -> #5
+ 1:[1] -> #5
+ [(epsilon)] -> ccyy#3.in
+ [(epsilon)] -> year#2.in
+ [(epsilon)] -> day#1.in
+ 1:[1] -> #1
+ 4:[4-9] -> #1
+ 3:[3] -> #1
+ 2:[2] -> #1
+ Epsilon closure :
+ (self)
+ day#1.in
+ year#2.in
+ ccyy#3.in
+ scaled#4.in
+ month#5.in
+ day#7.in
+ month#9.in
+ month#10.in
+ year#12.in
+ month#14.in
+ ccyy#16.in
+ month#18.in
+ year#20.in
+ year#22.in
+ day#27.in
+ ccyy#30.in
+ ccyy#32.in
+ day#37.in
+
+NFA state 1 = #1
+ Tags : DS_D
+ Epsilon closure :
+ (self)
+
+NFA state 2 = #2
+ Tags : DS_D
+ Epsilon closure :
+ (self)
+
+NFA state 3 = day#1.in
+ 1:[1] -> day#1.#1
+ 2:[2] -> day#1.#1
+ 3:[3] -> day#1.#2
+ Epsilon closure :
+ (self)
+
+NFA state 4 = day#1.#1
+ 0:[0] -> day#1.out
+ 4:[4-9] -> day#1.out
+ 3:[3] -> day#1.out
+ 2:[2] -> day#1.out
+ 1:[1] -> day#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 5 = day#1.#2
+ 0:[0] -> day#1.out
+ 1:[1] -> day#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 6 = day#1.out
+ [(epsilon)] -> #2
+ Epsilon closure :
+ (self)
+ #2
+
+NFA state 7 = #3
+ Tags : DS_Y
+ Epsilon closure :
+ (self)
+
+NFA state 8 = year#2.in
+ 0:[0] -> year#2.#1
+ 4:[4-9] -> year#2.#1
+ 3:[3] -> year#2.#2
+ Epsilon closure :
+ (self)
+
+NFA state 9 = year#2.#1
+ 0:[0] -> year#2.out
+ 4:[4-9] -> year#2.out
+ 3:[3] -> year#2.out
+ 2:[2] -> year#2.out
+ 1:[1] -> year#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 10 = year#2.#2
+ 2:[2] -> year#2.out
+ 4:[4-9] -> year#2.out
+ 3:[3] -> year#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 11 = year#2.out
+ [(epsilon)] -> #3
+ Epsilon closure :
+ (self)
+ #3
+
+NFA state 12 = #4
+ Tags : DS_Y
+ Epsilon closure :
+ (self)
+
+NFA state 13 = ccyy#3.in
+ 1:[1] -> ccyy#3.#1
+ 4:[4-9] -> ccyy#3.#1
+ 3:[3] -> ccyy#3.#1
+ 2:[2] -> ccyy#3.#1
+ Epsilon closure :
+ (self)
+
+NFA state 14 = ccyy#3.#1
+ 0:[0] -> ccyy#3.#2
+ 4:[4-9] -> ccyy#3.#2
+ 3:[3] -> ccyy#3.#2
+ 2:[2] -> ccyy#3.#2
+ 1:[1] -> ccyy#3.#2
+ Epsilon closure :
+ (self)
+
+NFA state 15 = ccyy#3.#2
+ 0:[0] -> ccyy#3.#3
+ 4:[4-9] -> ccyy#3.#3
+ 3:[3] -> ccyy#3.#3
+ 2:[2] -> ccyy#3.#3
+ 1:[1] -> ccyy#3.#3
+ Epsilon closure :
+ (self)
+
+NFA state 16 = ccyy#3.#3
+ 0:[0] -> ccyy#3.out
+ 4:[4-9] -> ccyy#3.out
+ 3:[3] -> ccyy#3.out
+ 2:[2] -> ccyy#3.out
+ 1:[1] -> ccyy#3.out
+ Epsilon closure :
+ (self)
+
+NFA state 17 = ccyy#3.out
+ [(epsilon)] -> #4
+ Epsilon closure :
+ (self)
+ #4
+
+NFA state 18 = #5
+ 0:[0] -> #6
+ 4:[4-9] -> #6
+ 3:[3] -> #6
+ 2:[2] -> #6
+ 1:[1] -> #6
+ Epsilon closure :
+ (self)
+
+NFA state 19 = #6
+ 0:[0] -> #7
+ 4:[4-9] -> #7
+ 3:[3] -> #7
+ 2:[2] -> #7
+ 1:[1] -> #7
+ Epsilon closure :
+ (self)
+
+NFA state 20 = #7
+ 0:[0] -> #8
+ 4:[4-9] -> #8
+ 3:[3] -> #8
+ 2:[2] -> #8
+ 1:[1] -> #8
+ Epsilon closure :
+ (self)
+
+NFA state 21 = #8
+ 0:[0] -> #9
+ 4:[4-9] -> #9
+ 3:[3] -> #9
+ 2:[2] -> #9
+ 1:[1] -> #9
+ Epsilon closure :
+ (self)
+
+NFA state 22 = #9
+ 0:[0] -> #10
+ 4:[4-9] -> #10
+ 3:[3] -> #10
+ 2:[2] -> #10
+ 1:[1] -> #10
+ Epsilon closure :
+ (self)
+
+NFA state 23 = #10
+ Tags : DS_YYMMDD
+ Epsilon closure :
+ (self)
+
+NFA state 24 = #11
+ 0:[0] -> #12
+ 4:[4-9] -> #12
+ 3:[3] -> #12
+ 2:[2] -> #12
+ 1:[1] -> #12
+ Epsilon closure :
+ (self)
+
+NFA state 25 = #12
+ 0:[0] -> #13
+ 4:[4-9] -> #13
+ 3:[3] -> #13
+ 2:[2] -> #13
+ 1:[1] -> #13
+ Epsilon closure :
+ (self)
+
+NFA state 26 = #13
+ 0:[0] -> #14
+ 4:[4-9] -> #14
+ 3:[3] -> #14
+ 2:[2] -> #14
+ 1:[1] -> #14
+ Epsilon closure :
+ (self)
+
+NFA state 27 = #14
+ 0:[0] -> #15
+ 4:[4-9] -> #15
+ 3:[3] -> #15
+ 2:[2] -> #15
+ 1:[1] -> #15
+ Epsilon closure :
+ (self)
+
+NFA state 28 = #15
+ 0:[0] -> #16
+ 4:[4-9] -> #16
+ 3:[3] -> #16
+ 2:[2] -> #16
+ 1:[1] -> #16
+ Epsilon closure :
+ (self)
+
+NFA state 29 = #16
+ 0:[0] -> #17
+ 4:[4-9] -> #17
+ 3:[3] -> #17
+ 2:[2] -> #17
+ 1:[1] -> #17
+ Epsilon closure :
+ (self)
+
+NFA state 30 = #17
+ 0:[0] -> #18
+ 4:[4-9] -> #18
+ 3:[3] -> #18
+ 2:[2] -> #18
+ 1:[1] -> #18
+ Epsilon closure :
+ (self)
+
+NFA state 31 = #18
+ Tags : DS_YYMMDD
+ Epsilon closure :
+ (self)
+
+NFA state 32 = #19
+ Tags : DS_SCALED
+ Epsilon closure :
+ (self)
+
+NFA state 33 = scaled#4.in
+ 0:[0] -> scaled#4.in
+ 4:[4-9] -> scaled#4.in
+ 3:[3] -> scaled#4.in
+ 2:[2] -> scaled#4.in
+ 1:[1] -> scaled#4.in
+ 0:[0] -> scaled#4.after_value
+ 4:[4-9] -> scaled#4.after_value
+ 3:[3] -> scaled#4.after_value
+ 2:[2] -> scaled#4.after_value
+ 1:[1] -> scaled#4.after_value
+ Epsilon closure :
+ (self)
+
+NFA state 34 = scaled#4.after_value
+ 5:[A-Za-z] -> scaled#4.out
+ Epsilon closure :
+ (self)
+
+NFA state 35 = scaled#4.out
+ [(epsilon)] -> #19
+ Epsilon closure :
+ (self)
+ #19
+
+NFA state 36 = #20
+ Tags : DS_M
+ Epsilon closure :
+ (self)
+
+NFA state 37 = month#5.in
+ 5:[A-Za-z] -> month#5.#1
+ Epsilon closure :
+ (self)
+
+NFA state 38 = month#5.#1
+ 5:[A-Za-z] -> month#5.#2
+ Epsilon closure :
+ (self)
+
+NFA state 39 = month#5.#2
+ 5:[A-Za-z] -> month#5.out
+ Epsilon closure :
+ (self)
+
+NFA state 40 = month#5.out
+ [(epsilon)] -> #20
+ Epsilon closure :
+ (self)
+ #20
+
+NFA state 41 = #21
+ [(epsilon)] -> month#6.in
+ Epsilon closure :
+ (self)
+ month#6.in
+
+NFA state 42 = #22
+ Tags : DS_DM
+ Epsilon closure :
+ (self)
+
+NFA state 43 = month#6.in
+ 5:[A-Za-z] -> month#6.#1
+ Epsilon closure :
+ (self)
+
+NFA state 44 = month#6.#1
+ 5:[A-Za-z] -> month#6.#2
+ Epsilon closure :
+ (self)
+
+NFA state 45 = month#6.#2
+ 5:[A-Za-z] -> month#6.out
+ Epsilon closure :
+ (self)
+
+NFA state 46 = month#6.out
+ [(epsilon)] -> #22
+ Epsilon closure :
+ (self)
+ #22
+
+NFA state 47 = #23
+ [(epsilon)] -> month#8.in
+ Epsilon closure :
+ (self)
+ month#8.in
+
+NFA state 48 = day#7.in
+ 1:[1] -> day#7.#1
+ 2:[2] -> day#7.#1
+ 3:[3] -> day#7.#2
+ Epsilon closure :
+ (self)
+
+NFA state 49 = day#7.#1
+ 0:[0] -> day#7.out
+ 4:[4-9] -> day#7.out
+ 3:[3] -> day#7.out
+ 2:[2] -> day#7.out
+ 1:[1] -> day#7.out
+ Epsilon closure :
+ (self)
+
+NFA state 50 = day#7.#2
+ 0:[0] -> day#7.out
+ 1:[1] -> day#7.out
+ Epsilon closure :
+ (self)
+
+NFA state 51 = day#7.out
+ [(epsilon)] -> #23
+ Epsilon closure :
+ (self)
+ #23
+ month#8.in
+
+NFA state 52 = #24
+ Tags : DS_DM
+ Epsilon closure :
+ (self)
+
+NFA state 53 = month#8.in
+ 5:[A-Za-z] -> month#8.#1
+ Epsilon closure :
+ (self)
+
+NFA state 54 = month#8.#1
+ 5:[A-Za-z] -> month#8.#2
+ Epsilon closure :
+ (self)
+
+NFA state 55 = month#8.#2
+ 5:[A-Za-z] -> month#8.out
+ Epsilon closure :
+ (self)
+
+NFA state 56 = month#8.out
+ [(epsilon)] -> #24
+ Epsilon closure :
+ (self)
+ #24
+
+NFA state 57 = #25
+ 1:[1] -> #26
+ 4:[4-9] -> #26
+ 3:[3] -> #26
+ 2:[2] -> #26
+ Epsilon closure :
+ (self)
+
+NFA state 58 = month#9.in
+ 5:[A-Za-z] -> month#9.#1
+ Epsilon closure :
+ (self)
+
+NFA state 59 = month#9.#1
+ 5:[A-Za-z] -> month#9.#2
+ Epsilon closure :
+ (self)
+
+NFA state 60 = month#9.#2
+ 5:[A-Za-z] -> month#9.out
+ Epsilon closure :
+ (self)
+
+NFA state 61 = month#9.out
+ [(epsilon)] -> #25
+ Epsilon closure :
+ (self)
+ #25
+
+NFA state 62 = #26
+ Tags : DS_MD
+ Epsilon closure :
+ (self)
+
+NFA state 63 = #27
+ [(epsilon)] -> day#11.in
+ Epsilon closure :
+ (self)
+ day#11.in
+
+NFA state 64 = month#10.in
+ 5:[A-Za-z] -> month#10.#1
+ Epsilon closure :
+ (self)
+
+NFA state 65 = month#10.#1
+ 5:[A-Za-z] -> month#10.#2
+ Epsilon closure :
+ (self)
+
+NFA state 66 = month#10.#2
+ 5:[A-Za-z] -> month#10.out
+ Epsilon closure :
+ (self)
+
+NFA state 67 = month#10.out
+ [(epsilon)] -> #27
+ Epsilon closure :
+ (self)
+ #27
+ day#11.in
+
+NFA state 68 = #28
+ Tags : DS_MD
+ Epsilon closure :
+ (self)
+
+NFA state 69 = day#11.in
+ 1:[1] -> day#11.#1
+ 2:[2] -> day#11.#1
+ 3:[3] -> day#11.#2
+ Epsilon closure :
+ (self)
+
+NFA state 70 = day#11.#1
+ 0:[0] -> day#11.out
+ 4:[4-9] -> day#11.out
+ 3:[3] -> day#11.out
+ 2:[2] -> day#11.out
+ 1:[1] -> day#11.out
+ Epsilon closure :
+ (self)
+
+NFA state 71 = day#11.#2
+ 0:[0] -> day#11.out
+ 1:[1] -> day#11.out
+ Epsilon closure :
+ (self)
+
+NFA state 72 = day#11.out
+ [(epsilon)] -> #28
+ Epsilon closure :
+ (self)
+ #28
+
+NFA state 73 = #29
+ [(epsilon)] -> month#13.in
+ Epsilon closure :
+ (self)
+ month#13.in
+
+NFA state 74 = year#12.in
+ 0:[0] -> year#12.#1
+ 4:[4-9] -> year#12.#1
+ 3:[3] -> year#12.#2
+ Epsilon closure :
+ (self)
+
+NFA state 75 = year#12.#1
+ 0:[0] -> year#12.out
+ 4:[4-9] -> year#12.out
+ 3:[3] -> year#12.out
+ 2:[2] -> year#12.out
+ 1:[1] -> year#12.out
+ Epsilon closure :
+ (self)
+
+NFA state 76 = year#12.#2
+ 2:[2] -> year#12.out
+ 4:[4-9] -> year#12.out
+ 3:[3] -> year#12.out
+ Epsilon closure :
+ (self)
+
+NFA state 77 = year#12.out
+ [(epsilon)] -> #29
+ Epsilon closure :
+ (self)
+ #29
+ month#13.in
+
+NFA state 78 = #30
+ Tags : DS_YM
+ Epsilon closure :
+ (self)
+
+NFA state 79 = month#13.in
+ 5:[A-Za-z] -> month#13.#1
+ Epsilon closure :
+ (self)
+
+NFA state 80 = month#13.#1
+ 5:[A-Za-z] -> month#13.#2
+ Epsilon closure :
+ (self)
+
+NFA state 81 = month#13.#2
+ 5:[A-Za-z] -> month#13.out
+ Epsilon closure :
+ (self)
+
+NFA state 82 = month#13.out
+ [(epsilon)] -> #30
+ Epsilon closure :
+ (self)
+ #30
+
+NFA state 83 = #31
+ [(epsilon)] -> year#15.in
+ Epsilon closure :
+ (self)
+ year#15.in
+
+NFA state 84 = month#14.in
+ 5:[A-Za-z] -> month#14.#1
+ Epsilon closure :
+ (self)
+
+NFA state 85 = month#14.#1
+ 5:[A-Za-z] -> month#14.#2
+ Epsilon closure :
+ (self)
+
+NFA state 86 = month#14.#2
+ 5:[A-Za-z] -> month#14.out
+ Epsilon closure :
+ (self)
+
+NFA state 87 = month#14.out
+ [(epsilon)] -> #31
+ Epsilon closure :
+ (self)
+ #31
+ year#15.in
+
+NFA state 88 = #32
+ Tags : DS_MY
+ Epsilon closure :
+ (self)
+
+NFA state 89 = year#15.in
+ 0:[0] -> year#15.#1
+ 4:[4-9] -> year#15.#1
+ 3:[3] -> year#15.#2
+ Epsilon closure :
+ (self)
+
+NFA state 90 = year#15.#1
+ 0:[0] -> year#15.out
+ 4:[4-9] -> year#15.out
+ 3:[3] -> year#15.out
+ 2:[2] -> year#15.out
+ 1:[1] -> year#15.out
+ Epsilon closure :
+ (self)
+
+NFA state 91 = year#15.#2
+ 2:[2] -> year#15.out
+ 4:[4-9] -> year#15.out
+ 3:[3] -> year#15.out
+ Epsilon closure :
+ (self)
+
+NFA state 92 = year#15.out
+ [(epsilon)] -> #32
+ Epsilon closure :
+ (self)
+ #32
+
+NFA state 93 = #33
+ [(epsilon)] -> month#17.in
+ Epsilon closure :
+ (self)
+ month#17.in
+
+NFA state 94 = ccyy#16.in
+ 1:[1] -> ccyy#16.#1
+ 4:[4-9] -> ccyy#16.#1
+ 3:[3] -> ccyy#16.#1
+ 2:[2] -> ccyy#16.#1
+ Epsilon closure :
+ (self)
+
+NFA state 95 = ccyy#16.#1
+ 0:[0] -> ccyy#16.#2
+ 4:[4-9] -> ccyy#16.#2
+ 3:[3] -> ccyy#16.#2
+ 2:[2] -> ccyy#16.#2
+ 1:[1] -> ccyy#16.#2
+ Epsilon closure :
+ (self)
+
+NFA state 96 = ccyy#16.#2
+ 0:[0] -> ccyy#16.#3
+ 4:[4-9] -> ccyy#16.#3
+ 3:[3] -> ccyy#16.#3
+ 2:[2] -> ccyy#16.#3
+ 1:[1] -> ccyy#16.#3
+ Epsilon closure :
+ (self)
+
+NFA state 97 = ccyy#16.#3
+ 0:[0] -> ccyy#16.out
+ 4:[4-9] -> ccyy#16.out
+ 3:[3] -> ccyy#16.out
+ 2:[2] -> ccyy#16.out
+ 1:[1] -> ccyy#16.out
+ Epsilon closure :
+ (self)
+
+NFA state 98 = ccyy#16.out
+ [(epsilon)] -> #33
+ Epsilon closure :
+ (self)
+ #33
+ month#17.in
+
+NFA state 99 = #34
+ Tags : DS_YM
+ Epsilon closure :
+ (self)
+
+NFA state 100 = month#17.in
+ 5:[A-Za-z] -> month#17.#1
+ Epsilon closure :
+ (self)
+
+NFA state 101 = month#17.#1
+ 5:[A-Za-z] -> month#17.#2
+ Epsilon closure :
+ (self)
+
+NFA state 102 = month#17.#2
+ 5:[A-Za-z] -> month#17.out
+ Epsilon closure :
+ (self)
+
+NFA state 103 = month#17.out
+ [(epsilon)] -> #34
+ Epsilon closure :
+ (self)
+ #34
+
+NFA state 104 = #35
+ [(epsilon)] -> ccyy#19.in
+ Epsilon closure :
+ (self)
+ ccyy#19.in
+
+NFA state 105 = month#18.in
+ 5:[A-Za-z] -> month#18.#1
+ Epsilon closure :
+ (self)
+
+NFA state 106 = month#18.#1
+ 5:[A-Za-z] -> month#18.#2
+ Epsilon closure :
+ (self)
+
+NFA state 107 = month#18.#2
+ 5:[A-Za-z] -> month#18.out
+ Epsilon closure :
+ (self)
+
+NFA state 108 = month#18.out
+ [(epsilon)] -> #35
+ Epsilon closure :
+ (self)
+ #35
+ ccyy#19.in
+
+NFA state 109 = #36
+ Tags : DS_MY
+ Epsilon closure :
+ (self)
+
+NFA state 110 = ccyy#19.in
+ 1:[1] -> ccyy#19.#1
+ 4:[4-9] -> ccyy#19.#1
+ 3:[3] -> ccyy#19.#1
+ 2:[2] -> ccyy#19.#1
+ Epsilon closure :
+ (self)
+
+NFA state 111 = ccyy#19.#1
+ 0:[0] -> ccyy#19.#2
+ 4:[4-9] -> ccyy#19.#2
+ 3:[3] -> ccyy#19.#2
+ 2:[2] -> ccyy#19.#2
+ 1:[1] -> ccyy#19.#2
+ Epsilon closure :
+ (self)
+
+NFA state 112 = ccyy#19.#2
+ 0:[0] -> ccyy#19.#3
+ 4:[4-9] -> ccyy#19.#3
+ 3:[3] -> ccyy#19.#3
+ 2:[2] -> ccyy#19.#3
+ 1:[1] -> ccyy#19.#3
+ Epsilon closure :
+ (self)
+
+NFA state 113 = ccyy#19.#3
+ 0:[0] -> ccyy#19.out
+ 4:[4-9] -> ccyy#19.out
+ 3:[3] -> ccyy#19.out
+ 2:[2] -> ccyy#19.out
+ 1:[1] -> ccyy#19.out
+ Epsilon closure :
+ (self)
+
+NFA state 114 = ccyy#19.out
+ [(epsilon)] -> #36
+ Epsilon closure :
+ (self)
+ #36
+
+NFA state 115 = #37
+ [(epsilon)] -> month#21.in
+ Epsilon closure :
+ (self)
+ month#21.in
+
+NFA state 116 = year#20.in
+ 0:[0] -> year#20.#1
+ 4:[4-9] -> year#20.#1
+ 3:[3] -> year#20.#2
+ Epsilon closure :
+ (self)
+
+NFA state 117 = year#20.#1
+ 0:[0] -> year#20.out
+ 4:[4-9] -> year#20.out
+ 3:[3] -> year#20.out
+ 2:[2] -> year#20.out
+ 1:[1] -> year#20.out
+ Epsilon closure :
+ (self)
+
+NFA state 118 = year#20.#2
+ 2:[2] -> year#20.out
+ 4:[4-9] -> year#20.out
+ 3:[3] -> year#20.out
+ Epsilon closure :
+ (self)
+
+NFA state 119 = year#20.out
+ [(epsilon)] -> #37
+ Epsilon closure :
+ (self)
+ #37
+ month#21.in
+
+NFA state 120 = #38
+ 1:[1] -> #39
+ 4:[4-9] -> #39
+ 3:[3] -> #39
+ 2:[2] -> #39
+ Epsilon closure :
+ (self)
+
+NFA state 121 = month#21.in
+ 5:[A-Za-z] -> month#21.#1
+ Epsilon closure :
+ (self)
+
+NFA state 122 = month#21.#1
+ 5:[A-Za-z] -> month#21.#2
+ Epsilon closure :
+ (self)
+
+NFA state 123 = month#21.#2
+ 5:[A-Za-z] -> month#21.out
+ Epsilon closure :
+ (self)
+
+NFA state 124 = month#21.out
+ [(epsilon)] -> #38
+ Epsilon closure :
+ (self)
+ #38
+
+NFA state 125 = #39
+ Tags : DS_YMD
+ Epsilon closure :
+ (self)
+
+NFA state 126 = #40
+ [(epsilon)] -> month#23.in
+ Epsilon closure :
+ (self)
+ month#23.in
+
+NFA state 127 = year#22.in
+ 0:[0] -> year#22.#1
+ 4:[4-9] -> year#22.#1
+ 3:[3] -> year#22.#2
+ Epsilon closure :
+ (self)
+
+NFA state 128 = year#22.#1
+ 0:[0] -> year#22.out
+ 4:[4-9] -> year#22.out
+ 3:[3] -> year#22.out
+ 2:[2] -> year#22.out
+ 1:[1] -> year#22.out
+ Epsilon closure :
+ (self)
+
+NFA state 129 = year#22.#2
+ 2:[2] -> year#22.out
+ 4:[4-9] -> year#22.out
+ 3:[3] -> year#22.out
+ Epsilon closure :
+ (self)
+
+NFA state 130 = year#22.out
+ [(epsilon)] -> #40
+ Epsilon closure :
+ (self)
+ #40
+ month#23.in
+
+NFA state 131 = #41
+ [(epsilon)] -> day#24.in
+ Epsilon closure :
+ (self)
+ day#24.in
+
+NFA state 132 = month#23.in
+ 5:[A-Za-z] -> month#23.#1
+ Epsilon closure :
+ (self)
+
+NFA state 133 = month#23.#1
+ 5:[A-Za-z] -> month#23.#2
+ Epsilon closure :
+ (self)
+
+NFA state 134 = month#23.#2
+ 5:[A-Za-z] -> month#23.out
+ Epsilon closure :
+ (self)
+
+NFA state 135 = month#23.out
+ [(epsilon)] -> #41
+ Epsilon closure :
+ (self)
+ #41
+ day#24.in
+
+NFA state 136 = #42
+ Tags : DS_YMD
+ Epsilon closure :
+ (self)
+
+NFA state 137 = day#24.in
+ 1:[1] -> day#24.#1
+ 2:[2] -> day#24.#1
+ 3:[3] -> day#24.#2
+ Epsilon closure :
+ (self)
+
+NFA state 138 = day#24.#1
+ 0:[0] -> day#24.out
+ 4:[4-9] -> day#24.out
+ 3:[3] -> day#24.out
+ 2:[2] -> day#24.out
+ 1:[1] -> day#24.out
+ Epsilon closure :
+ (self)
+
+NFA state 139 = day#24.#2
+ 0:[0] -> day#24.out
+ 1:[1] -> day#24.out
+ Epsilon closure :
+ (self)
+
+NFA state 140 = day#24.out
+ [(epsilon)] -> #42
+ Epsilon closure :
+ (self)
+ #42
+
+NFA state 141 = #43
+ [(epsilon)] -> month#25.in
+ Epsilon closure :
+ (self)
+ month#25.in
+
+NFA state 142 = #44
+ [(epsilon)] -> year#26.in
+ Epsilon closure :
+ (self)
+ year#26.in
+
+NFA state 143 = month#25.in
+ 5:[A-Za-z] -> month#25.#1
+ Epsilon closure :
+ (self)
+
+NFA state 144 = month#25.#1
+ 5:[A-Za-z] -> month#25.#2
+ Epsilon closure :
+ (self)
+
+NFA state 145 = month#25.#2
+ 5:[A-Za-z] -> month#25.out
+ Epsilon closure :
+ (self)
+
+NFA state 146 = month#25.out
+ [(epsilon)] -> #44
+ Epsilon closure :
+ (self)
+ #44
+ year#26.in
+
+NFA state 147 = #45
+ Tags : DS_DMY
+ Epsilon closure :
+ (self)
+
+NFA state 148 = year#26.in
+ 0:[0] -> year#26.#1
+ 4:[4-9] -> year#26.#1
+ 3:[3] -> year#26.#2
+ Epsilon closure :
+ (self)
+
+NFA state 149 = year#26.#1
+ 0:[0] -> year#26.out
+ 4:[4-9] -> year#26.out
+ 3:[3] -> year#26.out
+ 2:[2] -> year#26.out
+ 1:[1] -> year#26.out
+ Epsilon closure :
+ (self)
+
+NFA state 150 = year#26.#2
+ 2:[2] -> year#26.out
+ 4:[4-9] -> year#26.out
+ 3:[3] -> year#26.out
+ Epsilon closure :
+ (self)
+
+NFA state 151 = year#26.out
+ [(epsilon)] -> #45
+ Epsilon closure :
+ (self)
+ #45
+
+NFA state 152 = #46
+ [(epsilon)] -> month#28.in
+ Epsilon closure :
+ (self)
+ month#28.in
+
+NFA state 153 = day#27.in
+ 1:[1] -> day#27.#1
+ 2:[2] -> day#27.#1
+ 3:[3] -> day#27.#2
+ Epsilon closure :
+ (self)
+
+NFA state 154 = day#27.#1
+ 0:[0] -> day#27.out
+ 4:[4-9] -> day#27.out
+ 3:[3] -> day#27.out
+ 2:[2] -> day#27.out
+ 1:[1] -> day#27.out
+ Epsilon closure :
+ (self)
+
+NFA state 155 = day#27.#2
+ 0:[0] -> day#27.out
+ 1:[1] -> day#27.out
+ Epsilon closure :
+ (self)
+
+NFA state 156 = day#27.out
+ [(epsilon)] -> #46
+ Epsilon closure :
+ (self)
+ #46
+ month#28.in
+
+NFA state 157 = #47
+ [(epsilon)] -> year#29.in
+ Epsilon closure :
+ (self)
+ year#29.in
+
+NFA state 158 = month#28.in
+ 5:[A-Za-z] -> month#28.#1
+ Epsilon closure :
+ (self)
+
+NFA state 159 = month#28.#1
+ 5:[A-Za-z] -> month#28.#2
+ Epsilon closure :
+ (self)
+
+NFA state 160 = month#28.#2
+ 5:[A-Za-z] -> month#28.out
+ Epsilon closure :
+ (self)
+
+NFA state 161 = month#28.out
+ [(epsilon)] -> #47
+ Epsilon closure :
+ (self)
+ #47
+ year#29.in
+
+NFA state 162 = #48
+ Tags : DS_DMY
+ Epsilon closure :
+ (self)
+
+NFA state 163 = year#29.in
+ 0:[0] -> year#29.#1
+ 4:[4-9] -> year#29.#1
+ 3:[3] -> year#29.#2
+ Epsilon closure :
+ (self)
+
+NFA state 164 = year#29.#1
+ 0:[0] -> year#29.out
+ 4:[4-9] -> year#29.out
+ 3:[3] -> year#29.out
+ 2:[2] -> year#29.out
+ 1:[1] -> year#29.out
+ Epsilon closure :
+ (self)
+
+NFA state 165 = year#29.#2
+ 2:[2] -> year#29.out
+ 4:[4-9] -> year#29.out
+ 3:[3] -> year#29.out
+ Epsilon closure :
+ (self)
+
+NFA state 166 = year#29.out
+ [(epsilon)] -> #48
+ Epsilon closure :
+ (self)
+ #48
+
+NFA state 167 = #49
+ [(epsilon)] -> month#31.in
+ Epsilon closure :
+ (self)
+ month#31.in
+
+NFA state 168 = ccyy#30.in
+ 1:[1] -> ccyy#30.#1
+ 4:[4-9] -> ccyy#30.#1
+ 3:[3] -> ccyy#30.#1
+ 2:[2] -> ccyy#30.#1
+ Epsilon closure :
+ (self)
+
+NFA state 169 = ccyy#30.#1
+ 0:[0] -> ccyy#30.#2
+ 4:[4-9] -> ccyy#30.#2
+ 3:[3] -> ccyy#30.#2
+ 2:[2] -> ccyy#30.#2
+ 1:[1] -> ccyy#30.#2
+ Epsilon closure :
+ (self)
+
+NFA state 170 = ccyy#30.#2
+ 0:[0] -> ccyy#30.#3
+ 4:[4-9] -> ccyy#30.#3
+ 3:[3] -> ccyy#30.#3
+ 2:[2] -> ccyy#30.#3
+ 1:[1] -> ccyy#30.#3
+ Epsilon closure :
+ (self)
+
+NFA state 171 = ccyy#30.#3
+ 0:[0] -> ccyy#30.out
+ 4:[4-9] -> ccyy#30.out
+ 3:[3] -> ccyy#30.out
+ 2:[2] -> ccyy#30.out
+ 1:[1] -> ccyy#30.out
+ Epsilon closure :
+ (self)
+
+NFA state 172 = ccyy#30.out
+ [(epsilon)] -> #49
+ Epsilon closure :
+ (self)
+ #49
+ month#31.in
+
+NFA state 173 = #50
+ 1:[1] -> #51
+ 4:[4-9] -> #51
+ 3:[3] -> #51
+ 2:[2] -> #51
+ Epsilon closure :
+ (self)
+
+NFA state 174 = month#31.in
+ 5:[A-Za-z] -> month#31.#1
+ Epsilon closure :
+ (self)
+
+NFA state 175 = month#31.#1
+ 5:[A-Za-z] -> month#31.#2
+ Epsilon closure :
+ (self)
+
+NFA state 176 = month#31.#2
+ 5:[A-Za-z] -> month#31.out
+ Epsilon closure :
+ (self)
+
+NFA state 177 = month#31.out
+ [(epsilon)] -> #50
+ Epsilon closure :
+ (self)
+ #50
+
+NFA state 178 = #51
+ Tags : DS_YMD
+ Epsilon closure :
+ (self)
+
+NFA state 179 = #52
+ [(epsilon)] -> month#33.in
+ Epsilon closure :
+ (self)
+ month#33.in
+
+NFA state 180 = ccyy#32.in
+ 1:[1] -> ccyy#32.#1
+ 4:[4-9] -> ccyy#32.#1
+ 3:[3] -> ccyy#32.#1
+ 2:[2] -> ccyy#32.#1
+ Epsilon closure :
+ (self)
+
+NFA state 181 = ccyy#32.#1
+ 0:[0] -> ccyy#32.#2
+ 4:[4-9] -> ccyy#32.#2
+ 3:[3] -> ccyy#32.#2
+ 2:[2] -> ccyy#32.#2
+ 1:[1] -> ccyy#32.#2
+ Epsilon closure :
+ (self)
+
+NFA state 182 = ccyy#32.#2
+ 0:[0] -> ccyy#32.#3
+ 4:[4-9] -> ccyy#32.#3
+ 3:[3] -> ccyy#32.#3
+ 2:[2] -> ccyy#32.#3
+ 1:[1] -> ccyy#32.#3
+ Epsilon closure :
+ (self)
+
+NFA state 183 = ccyy#32.#3
+ 0:[0] -> ccyy#32.out
+ 4:[4-9] -> ccyy#32.out
+ 3:[3] -> ccyy#32.out
+ 2:[2] -> ccyy#32.out
+ 1:[1] -> ccyy#32.out
+ Epsilon closure :
+ (self)
+
+NFA state 184 = ccyy#32.out
+ [(epsilon)] -> #52
+ Epsilon closure :
+ (self)
+ #52
+ month#33.in
+
+NFA state 185 = #53
+ [(epsilon)] -> day#34.in
+ Epsilon closure :
+ (self)
+ day#34.in
+
+NFA state 186 = month#33.in
+ 5:[A-Za-z] -> month#33.#1
+ Epsilon closure :
+ (self)
+
+NFA state 187 = month#33.#1
+ 5:[A-Za-z] -> month#33.#2
+ Epsilon closure :
+ (self)
+
+NFA state 188 = month#33.#2
+ 5:[A-Za-z] -> month#33.out
+ Epsilon closure :
+ (self)
+
+NFA state 189 = month#33.out
+ [(epsilon)] -> #53
+ Epsilon closure :
+ (self)
+ #53
+ day#34.in
+
+NFA state 190 = #54
+ Tags : DS_YMD
+ Epsilon closure :
+ (self)
+
+NFA state 191 = day#34.in
+ 1:[1] -> day#34.#1
+ 2:[2] -> day#34.#1
+ 3:[3] -> day#34.#2
+ Epsilon closure :
+ (self)
+
+NFA state 192 = day#34.#1
+ 0:[0] -> day#34.out
+ 4:[4-9] -> day#34.out
+ 3:[3] -> day#34.out
+ 2:[2] -> day#34.out
+ 1:[1] -> day#34.out
+ Epsilon closure :
+ (self)
+
+NFA state 193 = day#34.#2
+ 0:[0] -> day#34.out
+ 1:[1] -> day#34.out
+ Epsilon closure :
+ (self)
+
+NFA state 194 = day#34.out
+ [(epsilon)] -> #54
+ Epsilon closure :
+ (self)
+ #54
+
+NFA state 195 = #55
+ [(epsilon)] -> month#35.in
+ Epsilon closure :
+ (self)
+ month#35.in
+
+NFA state 196 = #56
+ [(epsilon)] -> ccyy#36.in
+ Epsilon closure :
+ (self)
+ ccyy#36.in
+
+NFA state 197 = month#35.in
+ 5:[A-Za-z] -> month#35.#1
+ Epsilon closure :
+ (self)
+
+NFA state 198 = month#35.#1
+ 5:[A-Za-z] -> month#35.#2
+ Epsilon closure :
+ (self)
+
+NFA state 199 = month#35.#2
+ 5:[A-Za-z] -> month#35.out
+ Epsilon closure :
+ (self)
+
+NFA state 200 = month#35.out
+ [(epsilon)] -> #56
+ Epsilon closure :
+ (self)
+ #56
+ ccyy#36.in
+
+NFA state 201 = #57
+ Tags : DS_DMY
+ Epsilon closure :
+ (self)
+
+NFA state 202 = ccyy#36.in
+ 1:[1] -> ccyy#36.#1
+ 4:[4-9] -> ccyy#36.#1
+ 3:[3] -> ccyy#36.#1
+ 2:[2] -> ccyy#36.#1
+ Epsilon closure :
+ (self)
+
+NFA state 203 = ccyy#36.#1
+ 0:[0] -> ccyy#36.#2
+ 4:[4-9] -> ccyy#36.#2
+ 3:[3] -> ccyy#36.#2
+ 2:[2] -> ccyy#36.#2
+ 1:[1] -> ccyy#36.#2
+ Epsilon closure :
+ (self)
+
+NFA state 204 = ccyy#36.#2
+ 0:[0] -> ccyy#36.#3
+ 4:[4-9] -> ccyy#36.#3
+ 3:[3] -> ccyy#36.#3
+ 2:[2] -> ccyy#36.#3
+ 1:[1] -> ccyy#36.#3
+ Epsilon closure :
+ (self)
+
+NFA state 205 = ccyy#36.#3
+ 0:[0] -> ccyy#36.out
+ 4:[4-9] -> ccyy#36.out
+ 3:[3] -> ccyy#36.out
+ 2:[2] -> ccyy#36.out
+ 1:[1] -> ccyy#36.out
+ Epsilon closure :
+ (self)
+
+NFA state 206 = ccyy#36.out
+ [(epsilon)] -> #57
+ Epsilon closure :
+ (self)
+ #57
+
+NFA state 207 = #58
+ [(epsilon)] -> month#38.in
+ Epsilon closure :
+ (self)
+ month#38.in
+
+NFA state 208 = day#37.in
+ 1:[1] -> day#37.#1
+ 2:[2] -> day#37.#1
+ 3:[3] -> day#37.#2
+ Epsilon closure :
+ (self)
+
+NFA state 209 = day#37.#1
+ 0:[0] -> day#37.out
+ 4:[4-9] -> day#37.out
+ 3:[3] -> day#37.out
+ 2:[2] -> day#37.out
+ 1:[1] -> day#37.out
+ Epsilon closure :
+ (self)
+
+NFA state 210 = day#37.#2
+ 0:[0] -> day#37.out
+ 1:[1] -> day#37.out
+ Epsilon closure :
+ (self)
+
+NFA state 211 = day#37.out
+ [(epsilon)] -> #58
+ Epsilon closure :
+ (self)
+ #58
+ month#38.in
+
+NFA state 212 = #59
+ [(epsilon)] -> ccyy#39.in
+ Epsilon closure :
+ (self)
+ ccyy#39.in
+
+NFA state 213 = month#38.in
+ 5:[A-Za-z] -> month#38.#1
+ Epsilon closure :
+ (self)
+
+NFA state 214 = month#38.#1
+ 5:[A-Za-z] -> month#38.#2
+ Epsilon closure :
+ (self)
+
+NFA state 215 = month#38.#2
+ 5:[A-Za-z] -> month#38.out
+ Epsilon closure :
+ (self)
+
+NFA state 216 = month#38.out
+ [(epsilon)] -> #59
+ Epsilon closure :
+ (self)
+ #59
+ ccyy#39.in
+
+NFA state 217 = #60
+ Tags : DS_DMY
+ Epsilon closure :
+ (self)
+
+NFA state 218 = ccyy#39.in
+ 1:[1] -> ccyy#39.#1
+ 4:[4-9] -> ccyy#39.#1
+ 3:[3] -> ccyy#39.#1
+ 2:[2] -> ccyy#39.#1
+ Epsilon closure :
+ (self)
+
+NFA state 219 = ccyy#39.#1
+ 0:[0] -> ccyy#39.#2
+ 4:[4-9] -> ccyy#39.#2
+ 3:[3] -> ccyy#39.#2
+ 2:[2] -> ccyy#39.#2
+ 1:[1] -> ccyy#39.#2
+ Epsilon closure :
+ (self)
+
+NFA state 220 = ccyy#39.#2
+ 0:[0] -> ccyy#39.#3
+ 4:[4-9] -> ccyy#39.#3
+ 3:[3] -> ccyy#39.#3
+ 2:[2] -> ccyy#39.#3
+ 1:[1] -> ccyy#39.#3
+ Epsilon closure :
+ (self)
+
+NFA state 221 = ccyy#39.#3
+ 0:[0] -> ccyy#39.out
+ 4:[4-9] -> ccyy#39.out
+ 3:[3] -> ccyy#39.out
+ 2:[2] -> ccyy#39.out
+ 1:[1] -> ccyy#39.out
+ Epsilon closure :
+ (self)
+
+NFA state 222 = ccyy#39.out
+ [(epsilon)] -> #60
+ Epsilon closure :
+ (self)
+ #60
+
+--------------------------------
+DFA structure before compression
+--------------------------------
+DFA state 0
+ NFA states :
+ in
+ day#1.in
+ year#2.in
+ ccyy#3.in
+ scaled#4.in
+ month#5.in
+ day#7.in
+ month#9.in
+ month#10.in
+ year#12.in
+ month#14.in
+ ccyy#16.in
+ month#18.in
+ year#20.in
+ year#22.in
+ day#27.in
+ ccyy#30.in
+ ccyy#32.in
+ day#37.in
+
+ Forward route :
+ (START)->(HERE)
+ Transitions :
+ 0:[0] -> 1
+ 1:[1] -> 2
+ 2:[2] -> 2
+ 3:[3] -> 3
+ 4:[4-9] -> 4
+ 5:[A-Za-z] -> 5
+
+DFA state 1
+ NFA states :
+ year#2.#1
+ #5
+ #11
+ scaled#4.in
+ scaled#4.after_value
+ year#12.#1
+ year#20.#1
+ year#22.#1
+
+ Forward route : (from state 0)
+ (START)->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 6
+ 1:[1] -> 6
+ 2:[2] -> 6
+ 3:[3] -> 6
+ 4:[4-9] -> 6
+ 5:[A-Za-z] -> 7
+
+DFA state 2
+ NFA states :
+ #1
+ day#1.#1
+ ccyy#3.#1
+ #5
+ #11
+ scaled#4.in
+ scaled#4.after_value
+ #21
+ month#6.in
+ day#7.#1
+ ccyy#16.#1
+ #43
+ month#25.in
+ day#27.#1
+ ccyy#30.#1
+ ccyy#32.#1
+ #55
+ month#35.in
+ day#37.#1
+
+ Forward route : (from state 0)
+ (START)->1:[1]->(HERE)
+ Transitions :
+ 0:[0] -> 8
+ 1:[1] -> 8
+ 2:[2] -> 8
+ 3:[3] -> 8
+ 4:[4-9] -> 8
+ 5:[A-Za-z] -> 9
+ NFA exit tags applying :
+ DS_D
+ Attributes for <(DEFAULT)> : DS_D
+
+DFA state 3
+ NFA states :
+ #1
+ day#1.#2
+ year#2.#2
+ ccyy#3.#1
+ #5
+ #11
+ scaled#4.in
+ scaled#4.after_value
+ #21
+ month#6.in
+ day#7.#2
+ year#12.#2
+ ccyy#16.#1
+ year#20.#2
+ year#22.#2
+ #43
+ month#25.in
+ day#27.#2
+ ccyy#30.#1
+ ccyy#32.#1
+ #55
+ month#35.in
+ day#37.#2
+
+ Forward route : (from state 0)
+ (START)->3:[3]->(HERE)
+ Transitions :
+ 0:[0] -> 8
+ 1:[1] -> 8
+ 2:[2] -> 10
+ 3:[3] -> 10
+ 4:[4-9] -> 10
+ 5:[A-Za-z] -> 9
+ NFA exit tags applying :
+ DS_D
+ Attributes for <(DEFAULT)> : DS_D
+
+DFA state 4
+ NFA states :
+ #1
+ year#2.#1
+ ccyy#3.#1
+ #5
+ #11
+ scaled#4.in
+ scaled#4.after_value
+ #21
+ month#6.in
+ year#12.#1
+ ccyy#16.#1
+ year#20.#1
+ year#22.#1
+ #43
+ month#25.in
+ ccyy#30.#1
+ ccyy#32.#1
+ #55
+ month#35.in
+
+ Forward route : (from state 0)
+ (START)->4:[4-9]->(HERE)
+ Transitions :
+ 0:[0] -> 10
+ 1:[1] -> 10
+ 2:[2] -> 10
+ 3:[3] -> 10
+ 4:[4-9] -> 10
+ 5:[A-Za-z] -> 9
+ NFA exit tags applying :
+ DS_D
+ Attributes for <(DEFAULT)> : DS_D
+
+DFA state 5
+ NFA states :
+ month#5.#1
+ month#9.#1
+ month#10.#1
+ month#14.#1
+ month#18.#1
+
+ Forward route : (from state 0)
+ (START)->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 11
+
+DFA state 6
+ NFA states :
+ #3
+ year#2.out
+ #6
+ #12
+ scaled#4.in
+ scaled#4.after_value
+ #29
+ year#12.out
+ month#13.in
+ #37
+ year#20.out
+ month#21.in
+ #40
+ year#22.out
+ month#23.in
+
+ Forward route : (from state 1)
+ (START)->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 12
+ 1:[1] -> 12
+ 2:[2] -> 12
+ 3:[3] -> 12
+ 4:[4-9] -> 12
+ 5:[A-Za-z] -> 13
+ NFA exit tags applying :
+ DS_Y
+ Attributes for <(DEFAULT)> : DS_Y
+
+DFA state 7
+ NFA states :
+ #19
+ scaled#4.out
+
+ Forward route : (from state 1)
+ (START)->0:[0]->5:[A-Za-z]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_SCALED
+ Attributes for <(DEFAULT)> : DS_SCALED
+
+DFA state 8
+ NFA states :
+ #2
+ day#1.out
+ ccyy#3.#2
+ #6
+ #12
+ scaled#4.in
+ scaled#4.after_value
+ #23
+ day#7.out
+ month#8.in
+ ccyy#16.#2
+ #46
+ day#27.out
+ month#28.in
+ ccyy#30.#2
+ ccyy#32.#2
+ #58
+ day#37.out
+ month#38.in
+
+ Forward route : (from state 2)
+ (START)->1:[1]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 14
+ 1:[1] -> 14
+ 2:[2] -> 14
+ 3:[3] -> 14
+ 4:[4-9] -> 14
+ 5:[A-Za-z] -> 15
+ NFA exit tags applying :
+ DS_D
+ Attributes for <(DEFAULT)> : DS_D
+
+DFA state 9
+ NFA states :
+ #19
+ scaled#4.out
+ month#6.#1
+ month#25.#1
+ month#35.#1
+
+ Forward route : (from state 2)
+ (START)->1:[1]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 16
+ NFA exit tags applying :
+ DS_SCALED
+ Attributes for <(DEFAULT)> : DS_SCALED
+
+DFA state 10
+ NFA states :
+ #3
+ year#2.out
+ ccyy#3.#2
+ #6
+ #12
+ scaled#4.in
+ scaled#4.after_value
+ #29
+ year#12.out
+ month#13.in
+ ccyy#16.#2
+ #37
+ year#20.out
+ month#21.in
+ #40
+ year#22.out
+ month#23.in
+ ccyy#30.#2
+ ccyy#32.#2
+
+ Forward route : (from state 3)
+ (START)->3:[3]->2:[2]->(HERE)
+ Transitions :
+ 0:[0] -> 14
+ 1:[1] -> 14
+ 2:[2] -> 14
+ 3:[3] -> 14
+ 4:[4-9] -> 14
+ 5:[A-Za-z] -> 13
+ NFA exit tags applying :
+ DS_Y
+ Attributes for <(DEFAULT)> : DS_Y
+
+DFA state 11
+ NFA states :
+ month#5.#2
+ month#9.#2
+ month#10.#2
+ month#14.#2
+ month#18.#2
+
+ Forward route : (from state 5)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 17
+
+DFA state 12
+ NFA states :
+ #7
+ #13
+ scaled#4.in
+ scaled#4.after_value
+
+ Forward route : (from state 6)
+ (START)->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 18
+ 1:[1] -> 18
+ 2:[2] -> 18
+ 3:[3] -> 18
+ 4:[4-9] -> 18
+ 5:[A-Za-z] -> 7
+
+DFA state 13
+ NFA states :
+ #19
+ scaled#4.out
+ month#13.#1
+ month#21.#1
+ month#23.#1
+
+ Forward route : (from state 6)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 19
+ NFA exit tags applying :
+ DS_SCALED
+ Attributes for <(DEFAULT)> : DS_SCALED
+
+DFA state 14
+ NFA states :
+ ccyy#3.#3
+ #7
+ #13
+ scaled#4.in
+ scaled#4.after_value
+ ccyy#16.#3
+ ccyy#30.#3
+ ccyy#32.#3
+
+ Forward route : (from state 8)
+ (START)->1:[1]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 20
+ 1:[1] -> 20
+ 2:[2] -> 20
+ 3:[3] -> 20
+ 4:[4-9] -> 20
+ 5:[A-Za-z] -> 7
+
+DFA state 15
+ NFA states :
+ #19
+ scaled#4.out
+ month#8.#1
+ month#28.#1
+ month#38.#1
+
+ Forward route : (from state 8)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 21
+ NFA exit tags applying :
+ DS_SCALED
+ Attributes for <(DEFAULT)> : DS_SCALED
+
+DFA state 16
+ NFA states :
+ month#6.#2
+ month#25.#2
+ month#35.#2
+
+ Forward route : (from state 9)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 22
+
+DFA state 17
+ NFA states :
+ #20
+ month#5.out
+ #25
+ month#9.out
+ #27
+ month#10.out
+ day#11.in
+ #31
+ month#14.out
+ year#15.in
+ #35
+ month#18.out
+ ccyy#19.in
+
+ Forward route : (from state 11)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 0:[0] -> 23
+ 1:[1] -> 24
+ 2:[2] -> 24
+ 3:[3] -> 25
+ 4:[4-9] -> 26
+ NFA exit tags applying :
+ DS_M
+ Attributes for <(DEFAULT)> : DS_M
+
+DFA state 18
+ NFA states :
+ #8
+ #14
+ scaled#4.in
+ scaled#4.after_value
+
+ Forward route : (from state 12)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 27
+ 1:[1] -> 27
+ 2:[2] -> 27
+ 3:[3] -> 27
+ 4:[4-9] -> 27
+ 5:[A-Za-z] -> 7
+
+DFA state 19
+ NFA states :
+ month#13.#2
+ month#21.#2
+ month#23.#2
+
+ Forward route : (from state 13)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 28
+
+DFA state 20
+ NFA states :
+ #4
+ ccyy#3.out
+ #8
+ #14
+ scaled#4.in
+ scaled#4.after_value
+ #33
+ ccyy#16.out
+ month#17.in
+ #49
+ ccyy#30.out
+ month#31.in
+ #52
+ ccyy#32.out
+ month#33.in
+
+ Forward route : (from state 14)
+ (START)->1:[1]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 27
+ 1:[1] -> 27
+ 2:[2] -> 27
+ 3:[3] -> 27
+ 4:[4-9] -> 27
+ 5:[A-Za-z] -> 29
+ NFA exit tags applying :
+ DS_Y
+ Attributes for <(DEFAULT)> : DS_Y
+
+DFA state 21
+ NFA states :
+ month#8.#2
+ month#28.#2
+ month#38.#2
+
+ Forward route : (from state 15)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 30
+
+DFA state 22
+ NFA states :
+ #22
+ month#6.out
+ #44
+ month#25.out
+ year#26.in
+ #56
+ month#35.out
+ ccyy#36.in
+
+ Forward route : (from state 16)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 0:[0] -> 31
+ 1:[1] -> 32
+ 2:[2] -> 32
+ 3:[3] -> 33
+ 4:[4-9] -> 34
+ NFA exit tags applying :
+ DS_DM
+ Attributes for <(DEFAULT)> : DS_DM
+
+DFA state 23
+ NFA states :
+ year#15.#1
+
+ Forward route : (from state 17)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 35
+ 1:[1] -> 35
+ 2:[2] -> 35
+ 3:[3] -> 35
+ 4:[4-9] -> 35
+
+DFA state 24
+ NFA states :
+ #26
+ day#11.#1
+ ccyy#19.#1
+
+ Forward route : (from state 17)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE)
+ Transitions :
+ 0:[0] -> 36
+ 1:[1] -> 36
+ 2:[2] -> 36
+ 3:[3] -> 36
+ 4:[4-9] -> 36
+ NFA exit tags applying :
+ DS_MD
+ Attributes for <(DEFAULT)> : DS_MD
+
+DFA state 25
+ NFA states :
+ #26
+ day#11.#2
+ year#15.#2
+ ccyy#19.#1
+
+ Forward route : (from state 17)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE)
+ Transitions :
+ 0:[0] -> 36
+ 1:[1] -> 36
+ 2:[2] -> 37
+ 3:[3] -> 37
+ 4:[4-9] -> 37
+ NFA exit tags applying :
+ DS_MD
+ Attributes for <(DEFAULT)> : DS_MD
+
+DFA state 26
+ NFA states :
+ #26
+ year#15.#1
+ ccyy#19.#1
+
+ Forward route : (from state 17)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE)
+ Transitions :
+ 0:[0] -> 37
+ 1:[1] -> 37
+ 2:[2] -> 37
+ 3:[3] -> 37
+ 4:[4-9] -> 37
+ NFA exit tags applying :
+ DS_MD
+ Attributes for <(DEFAULT)> : DS_MD
+
+DFA state 27
+ NFA states :
+ #9
+ #15
+ scaled#4.in
+ scaled#4.after_value
+
+ Forward route : (from state 18)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 38
+ 1:[1] -> 38
+ 2:[2] -> 38
+ 3:[3] -> 38
+ 4:[4-9] -> 38
+ 5:[A-Za-z] -> 7
+
+DFA state 28
+ NFA states :
+ #30
+ month#13.out
+ #38
+ month#21.out
+ #41
+ month#23.out
+ day#24.in
+
+ Forward route : (from state 19)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 1:[1] -> 39
+ 2:[2] -> 39
+ 3:[3] -> 40
+ 4:[4-9] -> 41
+ NFA exit tags applying :
+ DS_YM
+ Attributes for <(DEFAULT)> : DS_YM
+
+DFA state 29
+ NFA states :
+ #19
+ scaled#4.out
+ month#17.#1
+ month#31.#1
+ month#33.#1
+
+ Forward route : (from state 20)
+ (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 42
+ NFA exit tags applying :
+ DS_SCALED
+ Attributes for <(DEFAULT)> : DS_SCALED
+
+DFA state 30
+ NFA states :
+ #24
+ month#8.out
+ #47
+ month#28.out
+ year#29.in
+ #59
+ month#38.out
+ ccyy#39.in
+
+ Forward route : (from state 21)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 0:[0] -> 43
+ 1:[1] -> 44
+ 2:[2] -> 44
+ 3:[3] -> 45
+ 4:[4-9] -> 46
+ NFA exit tags applying :
+ DS_DM
+ Attributes for <(DEFAULT)> : DS_DM
+
+DFA state 31
+ NFA states :
+ year#26.#1
+
+ Forward route : (from state 22)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 47
+ 1:[1] -> 47
+ 2:[2] -> 47
+ 3:[3] -> 47
+ 4:[4-9] -> 47
+
+DFA state 32
+ NFA states :
+ ccyy#36.#1
+
+ Forward route : (from state 22)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE)
+ Transitions :
+ 0:[0] -> 48
+ 1:[1] -> 48
+ 2:[2] -> 48
+ 3:[3] -> 48
+ 4:[4-9] -> 48
+
+DFA state 33
+ NFA states :
+ year#26.#2
+ ccyy#36.#1
+
+ Forward route : (from state 22)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE)
+ Transitions :
+ 0:[0] -> 48
+ 1:[1] -> 48
+ 2:[2] -> 49
+ 3:[3] -> 49
+ 4:[4-9] -> 49
+
+DFA state 34
+ NFA states :
+ year#26.#1
+ ccyy#36.#1
+
+ Forward route : (from state 22)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE)
+ Transitions :
+ 0:[0] -> 49
+ 1:[1] -> 49
+ 2:[2] -> 49
+ 3:[3] -> 49
+ 4:[4-9] -> 49
+
+DFA state 35
+ NFA states :
+ #32
+ year#15.out
+
+ Forward route : (from state 23)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_MY
+ Attributes for <(DEFAULT)> : DS_MY
+
+DFA state 36
+ NFA states :
+ #28
+ day#11.out
+ ccyy#19.#2
+
+ Forward route : (from state 24)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 50
+ 1:[1] -> 50
+ 2:[2] -> 50
+ 3:[3] -> 50
+ 4:[4-9] -> 50
+ NFA exit tags applying :
+ DS_MD
+ Attributes for <(DEFAULT)> : DS_MD
+
+DFA state 37
+ NFA states :
+ #32
+ year#15.out
+ ccyy#19.#2
+
+ Forward route : (from state 25)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->2:[2]->(HERE)
+ Transitions :
+ 0:[0] -> 50
+ 1:[1] -> 50
+ 2:[2] -> 50
+ 3:[3] -> 50
+ 4:[4-9] -> 50
+ NFA exit tags applying :
+ DS_MY
+ Attributes for <(DEFAULT)> : DS_MY
+
+DFA state 38
+ NFA states :
+ #10
+ #16
+ scaled#4.in
+ scaled#4.after_value
+
+ Forward route : (from state 27)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 51
+ 1:[1] -> 51
+ 2:[2] -> 51
+ 3:[3] -> 51
+ 4:[4-9] -> 51
+ 5:[A-Za-z] -> 7
+ NFA exit tags applying :
+ DS_YYMMDD
+ Attributes for <(DEFAULT)> : DS_YYMMDD
+
+DFA state 39
+ NFA states :
+ #39
+ day#24.#1
+
+ Forward route : (from state 28)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE)
+ Transitions :
+ 0:[0] -> 52
+ 1:[1] -> 52
+ 2:[2] -> 52
+ 3:[3] -> 52
+ 4:[4-9] -> 52
+ NFA exit tags applying :
+ DS_YMD
+ Attributes for <(DEFAULT)> : DS_YMD
+
+DFA state 40
+ NFA states :
+ #39
+ day#24.#2
+
+ Forward route : (from state 28)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE)
+ Transitions :
+ 0:[0] -> 52
+ 1:[1] -> 52
+ NFA exit tags applying :
+ DS_YMD
+ Attributes for <(DEFAULT)> : DS_YMD
+
+DFA state 41
+ NFA states :
+ #39
+
+ Forward route : (from state 28)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_YMD
+ Attributes for <(DEFAULT)> : DS_YMD
+
+DFA state 42
+ NFA states :
+ month#17.#2
+ month#31.#2
+ month#33.#2
+
+ Forward route : (from state 29)
+ (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 53
+
+DFA state 43
+ NFA states :
+ year#29.#1
+
+ Forward route : (from state 30)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 54
+ 1:[1] -> 54
+ 2:[2] -> 54
+ 3:[3] -> 54
+ 4:[4-9] -> 54
+
+DFA state 44
+ NFA states :
+ ccyy#39.#1
+
+ Forward route : (from state 30)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE)
+ Transitions :
+ 0:[0] -> 55
+ 1:[1] -> 55
+ 2:[2] -> 55
+ 3:[3] -> 55
+ 4:[4-9] -> 55
+
+DFA state 45
+ NFA states :
+ year#29.#2
+ ccyy#39.#1
+
+ Forward route : (from state 30)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE)
+ Transitions :
+ 0:[0] -> 55
+ 1:[1] -> 55
+ 2:[2] -> 56
+ 3:[3] -> 56
+ 4:[4-9] -> 56
+
+DFA state 46
+ NFA states :
+ year#29.#1
+ ccyy#39.#1
+
+ Forward route : (from state 30)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE)
+ Transitions :
+ 0:[0] -> 56
+ 1:[1] -> 56
+ 2:[2] -> 56
+ 3:[3] -> 56
+ 4:[4-9] -> 56
+
+DFA state 47
+ NFA states :
+ #45
+ year#26.out
+
+ Forward route : (from state 31)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_DMY
+ Attributes for <(DEFAULT)> : DS_DMY
+
+DFA state 48
+ NFA states :
+ ccyy#36.#2
+
+ Forward route : (from state 32)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 57
+ 1:[1] -> 57
+ 2:[2] -> 57
+ 3:[3] -> 57
+ 4:[4-9] -> 57
+
+DFA state 49
+ NFA states :
+ #45
+ year#26.out
+ ccyy#36.#2
+
+ Forward route : (from state 33)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->2:[2]->(HERE)
+ Transitions :
+ 0:[0] -> 57
+ 1:[1] -> 57
+ 2:[2] -> 57
+ 3:[3] -> 57
+ 4:[4-9] -> 57
+ NFA exit tags applying :
+ DS_DMY
+ Attributes for <(DEFAULT)> : DS_DMY
+
+DFA state 50
+ NFA states :
+ ccyy#19.#3
+
+ Forward route : (from state 36)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 58
+ 1:[1] -> 58
+ 2:[2] -> 58
+ 3:[3] -> 58
+ 4:[4-9] -> 58
+
+DFA state 51
+ NFA states :
+ #17
+ scaled#4.in
+ scaled#4.after_value
+
+ Forward route : (from state 38)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 59
+ 1:[1] -> 59
+ 2:[2] -> 59
+ 3:[3] -> 59
+ 4:[4-9] -> 59
+ 5:[A-Za-z] -> 7
+
+DFA state 52
+ NFA states :
+ #42
+ day#24.out
+
+ Forward route : (from state 39)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_YMD
+ Attributes for <(DEFAULT)> : DS_YMD
+
+DFA state 53
+ NFA states :
+ #34
+ month#17.out
+ #50
+ month#31.out
+ #53
+ month#33.out
+ day#34.in
+
+ Forward route : (from state 42)
+ (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 1:[1] -> 60
+ 2:[2] -> 60
+ 3:[3] -> 61
+ 4:[4-9] -> 62
+ NFA exit tags applying :
+ DS_YM
+ Attributes for <(DEFAULT)> : DS_YM
+
+DFA state 54
+ NFA states :
+ #48
+ year#29.out
+
+ Forward route : (from state 43)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_DMY
+ Attributes for <(DEFAULT)> : DS_DMY
+
+DFA state 55
+ NFA states :
+ ccyy#39.#2
+
+ Forward route : (from state 44)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 63
+ 1:[1] -> 63
+ 2:[2] -> 63
+ 3:[3] -> 63
+ 4:[4-9] -> 63
+
+DFA state 56
+ NFA states :
+ #48
+ year#29.out
+ ccyy#39.#2
+
+ Forward route : (from state 45)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->2:[2]->(HERE)
+ Transitions :
+ 0:[0] -> 63
+ 1:[1] -> 63
+ 2:[2] -> 63
+ 3:[3] -> 63
+ 4:[4-9] -> 63
+ NFA exit tags applying :
+ DS_DMY
+ Attributes for <(DEFAULT)> : DS_DMY
+
+DFA state 57
+ NFA states :
+ ccyy#36.#3
+
+ Forward route : (from state 48)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 64
+ 1:[1] -> 64
+ 2:[2] -> 64
+ 3:[3] -> 64
+ 4:[4-9] -> 64
+
+DFA state 58
+ NFA states :
+ #36
+ ccyy#19.out
+
+ Forward route : (from state 50)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_MY
+ Attributes for <(DEFAULT)> : DS_MY
+
+DFA state 59
+ NFA states :
+ #18
+ scaled#4.in
+ scaled#4.after_value
+
+ Forward route : (from state 51)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 65
+ 1:[1] -> 65
+ 2:[2] -> 65
+ 3:[3] -> 65
+ 4:[4-9] -> 65
+ 5:[A-Za-z] -> 7
+ NFA exit tags applying :
+ DS_YYMMDD
+ Attributes for <(DEFAULT)> : DS_YYMMDD
+
+DFA state 60
+ NFA states :
+ #51
+ day#34.#1
+
+ Forward route : (from state 53)
+ (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE)
+ Transitions :
+ 0:[0] -> 66
+ 1:[1] -> 66
+ 2:[2] -> 66
+ 3:[3] -> 66
+ 4:[4-9] -> 66
+ NFA exit tags applying :
+ DS_YMD
+ Attributes for <(DEFAULT)> : DS_YMD
+
+DFA state 61
+ NFA states :
+ #51
+ day#34.#2
+
+ Forward route : (from state 53)
+ (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE)
+ Transitions :
+ 0:[0] -> 66
+ 1:[1] -> 66
+ NFA exit tags applying :
+ DS_YMD
+ Attributes for <(DEFAULT)> : DS_YMD
+
+DFA state 62
+ NFA states :
+ #51
+
+ Forward route : (from state 53)
+ (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_YMD
+ Attributes for <(DEFAULT)> : DS_YMD
+
+DFA state 63
+ NFA states :
+ ccyy#39.#3
+
+ Forward route : (from state 55)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 67
+ 1:[1] -> 67
+ 2:[2] -> 67
+ 3:[3] -> 67
+ 4:[4-9] -> 67
+
+DFA state 64
+ NFA states :
+ #57
+ ccyy#36.out
+
+ Forward route : (from state 57)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_DMY
+ Attributes for <(DEFAULT)> : DS_DMY
+
+DFA state 65
+ NFA states :
+ scaled#4.in
+ scaled#4.after_value
+
+ Forward route : (from state 59)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 65
+ 1:[1] -> 65
+ 2:[2] -> 65
+ 3:[3] -> 65
+ 4:[4-9] -> 65
+ 5:[A-Za-z] -> 7
+
+DFA state 66
+ NFA states :
+ #54
+ day#34.out
+
+ Forward route : (from state 60)
+ (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_YMD
+ Attributes for <(DEFAULT)> : DS_YMD
+
+DFA state 67
+ NFA states :
+ #60
+ ccyy#39.out
+
+ Forward route : (from state 63)
+ (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_DMY
+ Attributes for <(DEFAULT)> : DS_DMY
+
+
+Entry states in DFA:
+Entry <(ONLY ENTRY)> : 0
+Searching for dead states...
+(no dead states found)
+
+-----------------------------
+------ COMPRESSING DFA ------
+-----------------------------
+Old DFA state 0 becomes 0
+Old DFA state 1 becomes 1
+Old DFA state 2 becomes 2
+Old DFA state 3 becomes 3
+Old DFA state 4 becomes 4
+Old DFA state 5 becomes 5
+Old DFA state 6 becomes 6
+Old DFA state 7 becomes 7
+Old DFA state 8 becomes 8
+Old DFA state 9 becomes 9
+Old DFA state 10 becomes 10
+Old DFA state 11 becomes 11
+Old DFA state 12 becomes 12
+Old DFA state 13 becomes 13
+Old DFA state 14 becomes 14
+Old DFA state 15 becomes 9 (formerly 9)
+Old DFA state 16 becomes 15
+Old DFA state 17 becomes 16
+Old DFA state 18 becomes 17
+Old DFA state 19 becomes 18
+Old DFA state 20 becomes 19
+Old DFA state 21 becomes 15 (formerly 16)
+Old DFA state 22 becomes 20
+Old DFA state 23 becomes 21
+Old DFA state 24 becomes 22
+Old DFA state 25 becomes 23
+Old DFA state 26 becomes 24
+Old DFA state 27 becomes 25
+Old DFA state 28 becomes 26
+Old DFA state 29 becomes 13 (formerly 13)
+Old DFA state 30 becomes 20 (formerly 22)
+Old DFA state 31 becomes 27
+Old DFA state 32 becomes 28
+Old DFA state 33 becomes 29
+Old DFA state 34 becomes 30
+Old DFA state 35 becomes 31
+Old DFA state 36 becomes 32
+Old DFA state 37 becomes 33
+Old DFA state 38 becomes 34
+Old DFA state 39 becomes 35
+Old DFA state 40 becomes 36
+Old DFA state 41 becomes 37
+Old DFA state 42 becomes 18 (formerly 19)
+Old DFA state 43 becomes 27 (formerly 31)
+Old DFA state 44 becomes 28 (formerly 32)
+Old DFA state 45 becomes 29 (formerly 33)
+Old DFA state 46 becomes 30 (formerly 34)
+Old DFA state 47 becomes 38
+Old DFA state 48 becomes 39
+Old DFA state 49 becomes 40
+Old DFA state 50 becomes 21 (formerly 23)
+Old DFA state 51 becomes 41
+Old DFA state 52 becomes 37 (formerly 41)
+Old DFA state 53 becomes 26 (formerly 28)
+Old DFA state 54 becomes 38 (formerly 47)
+Old DFA state 55 becomes 39 (formerly 48)
+Old DFA state 56 becomes 40 (formerly 49)
+Old DFA state 57 becomes 27 (formerly 31)
+Old DFA state 58 becomes 31 (formerly 35)
+Old DFA state 59 becomes 42
+Old DFA state 60 becomes 35 (formerly 39)
+Old DFA state 61 becomes 36 (formerly 40)
+Old DFA state 62 becomes 37 (formerly 41)
+Old DFA state 63 becomes 27 (formerly 31)
+Old DFA state 64 becomes 38 (formerly 47)
+Old DFA state 65 becomes 43
+Old DFA state 66 becomes 37 (formerly 41)
+Old DFA state 67 becomes 38 (formerly 47)
+Entry <(ONLY ENTRY)>, formerly state 0, now state 0
+-------------------------------
+DFA structure after compression
+-------------------------------
+DFA state 0
+ Forward route :
+ (START)->(HERE)
+ Transitions :
+ 0:[0] -> 1
+ 1:[1] -> 2
+ 2:[2] -> 2
+ 3:[3] -> 3
+ 4:[4-9] -> 4
+ 5:[A-Za-z] -> 5
+
+DFA state 1
+ Forward route : (from state 0)
+ (START)->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 6
+ 1:[1] -> 6
+ 2:[2] -> 6
+ 3:[3] -> 6
+ 4:[4-9] -> 6
+ 5:[A-Za-z] -> 7
+
+DFA state 2
+ Forward route : (from state 0)
+ (START)->1:[1]->(HERE)
+ Transitions :
+ 0:[0] -> 8
+ 1:[1] -> 8
+ 2:[2] -> 8
+ 3:[3] -> 8
+ 4:[4-9] -> 8
+ 5:[A-Za-z] -> 9
+ NFA exit tags applying :
+ DS_D
+ Attributes for <(DEFAULT)> : DS_D
+
+DFA state 3
+ Forward route : (from state 0)
+ (START)->3:[3]->(HERE)
+ Transitions :
+ 0:[0] -> 8
+ 1:[1] -> 8
+ 2:[2] -> 10
+ 3:[3] -> 10
+ 4:[4-9] -> 10
+ 5:[A-Za-z] -> 9
+ Use state 2 as basis (3 fixups)
+ NFA exit tags applying :
+ DS_D
+ Attributes for <(DEFAULT)> : DS_D
+
+DFA state 4
+ Forward route : (from state 0)
+ (START)->4:[4-9]->(HERE)
+ Transitions :
+ 0:[0] -> 10
+ 1:[1] -> 10
+ 2:[2] -> 10
+ 3:[3] -> 10
+ 4:[4-9] -> 10
+ 5:[A-Za-z] -> 9
+ NFA exit tags applying :
+ DS_D
+ Attributes for <(DEFAULT)> : DS_D
+
+DFA state 5
+ Forward route : (from state 0)
+ (START)->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 11
+
+DFA state 6
+ Forward route : (from state 1)
+ (START)->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 12
+ 1:[1] -> 12
+ 2:[2] -> 12
+ 3:[3] -> 12
+ 4:[4-9] -> 12
+ 5:[A-Za-z] -> 13
+ NFA exit tags applying :
+ DS_Y
+ Attributes for <(DEFAULT)> : DS_Y
+
+DFA state 7
+ Forward route : (from state 1)
+ (START)->0:[0]->5:[A-Za-z]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_SCALED
+ Attributes for <(DEFAULT)> : DS_SCALED
+
+DFA state 8
+ Forward route : (from state 2)
+ (START)->1:[1]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 14
+ 1:[1] -> 14
+ 2:[2] -> 14
+ 3:[3] -> 14
+ 4:[4-9] -> 14
+ 5:[A-Za-z] -> 9
+ NFA exit tags applying :
+ DS_D
+ Attributes for <(DEFAULT)> : DS_D
+
+DFA state 9
+ Forward route : (from state 2)
+ (START)->1:[1]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 15
+ NFA exit tags applying :
+ DS_SCALED
+ Attributes for <(DEFAULT)> : DS_SCALED
+
+DFA state 10
+ Forward route : (from state 3)
+ (START)->3:[3]->2:[2]->(HERE)
+ Transitions :
+ 0:[0] -> 14
+ 1:[1] -> 14
+ 2:[2] -> 14
+ 3:[3] -> 14
+ 4:[4-9] -> 14
+ 5:[A-Za-z] -> 13
+ Use state 8 as basis (1 fixups)
+ NFA exit tags applying :
+ DS_Y
+ Attributes for <(DEFAULT)> : DS_Y
+
+DFA state 11
+ Forward route : (from state 5)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 16
+
+DFA state 12
+ Forward route : (from state 6)
+ (START)->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 17
+ 1:[1] -> 17
+ 2:[2] -> 17
+ 3:[3] -> 17
+ 4:[4-9] -> 17
+ 5:[A-Za-z] -> 7
+
+DFA state 13
+ Forward route : (from state 6)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 18
+ NFA exit tags applying :
+ DS_SCALED
+ Attributes for <(DEFAULT)> : DS_SCALED
+
+DFA state 14
+ Forward route : (from state 8)
+ (START)->1:[1]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 19
+ 1:[1] -> 19
+ 2:[2] -> 19
+ 3:[3] -> 19
+ 4:[4-9] -> 19
+ 5:[A-Za-z] -> 7
+
+DFA state 15
+ Forward route : (from state 9)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 20
+
+DFA state 16
+ Forward route : (from state 11)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 0:[0] -> 21
+ 1:[1] -> 22
+ 2:[2] -> 22
+ 3:[3] -> 23
+ 4:[4-9] -> 24
+ NFA exit tags applying :
+ DS_M
+ Attributes for <(DEFAULT)> : DS_M
+
+DFA state 17
+ Forward route : (from state 12)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 25
+ 1:[1] -> 25
+ 2:[2] -> 25
+ 3:[3] -> 25
+ 4:[4-9] -> 25
+ 5:[A-Za-z] -> 7
+
+DFA state 18
+ Forward route : (from state 13)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 5:[A-Za-z] -> 26
+
+DFA state 19
+ Forward route : (from state 14)
+ (START)->1:[1]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 25
+ 1:[1] -> 25
+ 2:[2] -> 25
+ 3:[3] -> 25
+ 4:[4-9] -> 25
+ 5:[A-Za-z] -> 13
+ Use state 17 as basis (1 fixups)
+ NFA exit tags applying :
+ DS_Y
+ Attributes for <(DEFAULT)> : DS_Y
+
+DFA state 20
+ Forward route : (from state 15)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 0:[0] -> 27
+ 1:[1] -> 28
+ 2:[2] -> 28
+ 3:[3] -> 29
+ 4:[4-9] -> 30
+ NFA exit tags applying :
+ DS_DM
+ Attributes for <(DEFAULT)> : DS_DM
+
+DFA state 21
+ Forward route : (from state 16)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 31
+ 1:[1] -> 31
+ 2:[2] -> 31
+ 3:[3] -> 31
+ 4:[4-9] -> 31
+
+DFA state 22
+ Forward route : (from state 16)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE)
+ Transitions :
+ 0:[0] -> 32
+ 1:[1] -> 32
+ 2:[2] -> 32
+ 3:[3] -> 32
+ 4:[4-9] -> 32
+ NFA exit tags applying :
+ DS_MD
+ Attributes for <(DEFAULT)> : DS_MD
+
+DFA state 23
+ Forward route : (from state 16)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE)
+ Transitions :
+ 0:[0] -> 32
+ 1:[1] -> 32
+ 2:[2] -> 33
+ 3:[3] -> 33
+ 4:[4-9] -> 33
+ NFA exit tags applying :
+ DS_MD
+ Attributes for <(DEFAULT)> : DS_MD
+
+DFA state 24
+ Forward route : (from state 16)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE)
+ Transitions :
+ 0:[0] -> 33
+ 1:[1] -> 33
+ 2:[2] -> 33
+ 3:[3] -> 33
+ 4:[4-9] -> 33
+ Use state 23 as basis (2 fixups)
+ NFA exit tags applying :
+ DS_MD
+ Attributes for <(DEFAULT)> : DS_MD
+
+DFA state 25
+ Forward route : (from state 17)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 34
+ 1:[1] -> 34
+ 2:[2] -> 34
+ 3:[3] -> 34
+ 4:[4-9] -> 34
+ 5:[A-Za-z] -> 7
+
+DFA state 26
+ Forward route : (from state 18)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE)
+ Transitions :
+ 1:[1] -> 35
+ 2:[2] -> 35
+ 3:[3] -> 36
+ 4:[4-9] -> 37
+ NFA exit tags applying :
+ DS_YM
+ Attributes for <(DEFAULT)> : DS_YM
+
+DFA state 27
+ Forward route : (from state 20)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 38
+ 1:[1] -> 38
+ 2:[2] -> 38
+ 3:[3] -> 38
+ 4:[4-9] -> 38
+
+DFA state 28
+ Forward route : (from state 20)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE)
+ Transitions :
+ 0:[0] -> 39
+ 1:[1] -> 39
+ 2:[2] -> 39
+ 3:[3] -> 39
+ 4:[4-9] -> 39
+
+DFA state 29
+ Forward route : (from state 20)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE)
+ Transitions :
+ 0:[0] -> 39
+ 1:[1] -> 39
+ 2:[2] -> 40
+ 3:[3] -> 40
+ 4:[4-9] -> 40
+
+DFA state 30
+ Forward route : (from state 20)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE)
+ Transitions :
+ 0:[0] -> 40
+ 1:[1] -> 40
+ 2:[2] -> 40
+ 3:[3] -> 40
+ 4:[4-9] -> 40
+ Use state 29 as basis (2 fixups)
+
+DFA state 31
+ Forward route : (from state 21)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_MY
+ Attributes for <(DEFAULT)> : DS_MY
+
+DFA state 32
+ Forward route : (from state 22)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 21
+ 1:[1] -> 21
+ 2:[2] -> 21
+ 3:[3] -> 21
+ 4:[4-9] -> 21
+ NFA exit tags applying :
+ DS_MD
+ Attributes for <(DEFAULT)> : DS_MD
+
+DFA state 33
+ Forward route : (from state 23)
+ (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->2:[2]->(HERE)
+ Transitions :
+ 0:[0] -> 21
+ 1:[1] -> 21
+ 2:[2] -> 21
+ 3:[3] -> 21
+ 4:[4-9] -> 21
+ Use state 32 as basis (0 fixups)
+ NFA exit tags applying :
+ DS_MY
+ Attributes for <(DEFAULT)> : DS_MY
+
+DFA state 34
+ Forward route : (from state 25)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 41
+ 1:[1] -> 41
+ 2:[2] -> 41
+ 3:[3] -> 41
+ 4:[4-9] -> 41
+ 5:[A-Za-z] -> 7
+ NFA exit tags applying :
+ DS_YYMMDD
+ Attributes for <(DEFAULT)> : DS_YYMMDD
+
+DFA state 35
+ Forward route : (from state 26)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE)
+ Transitions :
+ 0:[0] -> 37
+ 1:[1] -> 37
+ 2:[2] -> 37
+ 3:[3] -> 37
+ 4:[4-9] -> 37
+ NFA exit tags applying :
+ DS_YMD
+ Attributes for <(DEFAULT)> : DS_YMD
+
+DFA state 36
+ Forward route : (from state 26)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE)
+ Transitions :
+ 0:[0] -> 37
+ 1:[1] -> 37
+ NFA exit tags applying :
+ DS_YMD
+ Attributes for <(DEFAULT)> : DS_YMD
+
+DFA state 37
+ Forward route : (from state 26)
+ (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_YMD
+ Attributes for <(DEFAULT)> : DS_YMD
+
+DFA state 38
+ Forward route : (from state 27)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ DS_DMY
+ Attributes for <(DEFAULT)> : DS_DMY
+
+DFA state 39
+ Forward route : (from state 28)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 27
+ 1:[1] -> 27
+ 2:[2] -> 27
+ 3:[3] -> 27
+ 4:[4-9] -> 27
+
+DFA state 40
+ Forward route : (from state 29)
+ (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->2:[2]->(HERE)
+ Transitions :
+ 0:[0] -> 27
+ 1:[1] -> 27
+ 2:[2] -> 27
+ 3:[3] -> 27
+ 4:[4-9] -> 27
+ Use state 39 as basis (0 fixups)
+ NFA exit tags applying :
+ DS_DMY
+ Attributes for <(DEFAULT)> : DS_DMY
+
+DFA state 41
+ Forward route : (from state 34)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 42
+ 1:[1] -> 42
+ 2:[2] -> 42
+ 3:[3] -> 42
+ 4:[4-9] -> 42
+ 5:[A-Za-z] -> 7
+
+DFA state 42
+ Forward route : (from state 41)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 43
+ 1:[1] -> 43
+ 2:[2] -> 43
+ 3:[3] -> 43
+ 4:[4-9] -> 43
+ 5:[A-Za-z] -> 7
+ NFA exit tags applying :
+ DS_YYMMDD
+ Attributes for <(DEFAULT)> : DS_YYMMDD
+
+DFA state 43
+ Forward route : (from state 42)
+ (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE)
+ Transitions :
+ 0:[0] -> 43
+ 1:[1] -> 43
+ 2:[2] -> 43
+ 3:[3] -> 43
+ 4:[4-9] -> 43
+ 5:[A-Za-z] -> 7
+ Use state 42 as basis (0 fixups)
+
+
+Entry states in DFA:
+Entry <(ONLY ENTRY)> : 0
diff --git a/src/mairix/db.c b/src/mairix/db.c
@@ -0,0 +1,1297 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006,2007,2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+/* Handle complete database */
+
+#include "mairix.h"
+#include "reader.h"
+#include <ctype.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+struct sortable_token {/*{{{*/
+ char *text;
+ int index;
+};
+/*}}}*/
+static int compare_sortable_tokens(const void *a, const void *b)/*{{{*/
+{
+ const struct sortable_token *aa = (const struct sortable_token *) a;
+ const struct sortable_token *bb = (const struct sortable_token *) b;
+ int foo;
+ foo = strcmp(aa->text, bb->text);
+ if (foo) {
+ return foo;
+ } else {
+ if (aa->index < bb->index) return -1;
+ else if (aa->index > bb->index) return +1;
+ else return 0;
+ }
+}
+/*}}}*/
+static void check_toktable_enc_integrity(int n_msgs, struct toktable *table)/*{{{*/
+{
+ /* FIXME : Check reachability of tokens that are displaced from their natural
+ * hash bucket (if deletions have occurred during purge). */
+
+ int idx, incr;
+ int i, k;
+ unsigned char *j, *last_char;
+ int broken_chains = 0;
+ struct sortable_token *sort_list;
+ int any_duplicates;
+
+ for (i=0; i<table->size; i++) {
+ struct token *tok = table->tokens[i];
+ if (tok) {
+ idx = 0;
+ incr = 0;
+ last_char = tok->match0.msginfo + tok->match0.n;
+ for (j = tok->match0.msginfo; j < last_char; ) {
+ incr = read_increment(&j);
+ idx += incr;
+ }
+ if (idx != tok->match0.highest) {
+ fprintf(stderr, "broken encoding chain for token <%s>, highest=%ld\n", tok->text, tok->match0.highest);
+ fflush(stderr);
+ broken_chains = 1;
+ }
+ if (idx >= n_msgs) {
+ fprintf(stderr, "end of chain higher than number of message paths (%d) for token <%s>\n", n_msgs, tok->text);
+ fflush(stderr);
+ broken_chains = 1;
+ }
+ }
+ }
+
+ assert(!broken_chains);
+
+ /* Check there are no duplicated tokens in the table. */
+ sort_list = new_array(struct sortable_token, table->n);
+ k = 0;
+ for (i=0; i<table->size; i++) {
+ struct token *tok = table->tokens[i];
+ if (tok) {
+ sort_list[k].text = new_string(tok->text);
+ sort_list[k].index = i;
+ k++;
+ }
+ }
+ assert(k == table->n);
+
+ qsort(sort_list, table->n, sizeof(struct sortable_token), compare_sortable_tokens);
+ /* Check for uniqueness of neighbouring token texts */
+ any_duplicates = 0;
+ for (i=0; i<(table->n - 1); i++) {
+ if (!strcmp(sort_list[i].text, sort_list[i+1].text)) {
+ fprintf(stderr, "Token table contains duplicated token %s at indices %d and %d\n",
+ sort_list[i].text, sort_list[i].index, sort_list[i+1].index);
+ any_duplicates = 1;
+ }
+ }
+
+ /* release */
+ for (i=0; i<table->n; i++) {
+ free(sort_list[i].text);
+ }
+ free(sort_list);
+
+ if (any_duplicates) {
+ fprintf(stderr, "Token table contained duplicate entries, aborting\n");
+ assert(0);
+ }
+}
+/*}}}*/
+static int compare_strings(const void *a, const void *b)/*{{{*/
+{
+ const char **aa = (const char **) a;
+ const char **bb = (const char **) b;
+ return strcmp(*aa, *bb);
+}
+/*}}}*/
+static void check_message_path_integrity(struct database *db)/*{{{*/
+{
+ /* TODO : for now only checks integrity of non-mbox paths. */
+ /* Check there are no duplicates */
+ int i;
+ int n;
+ int has_duplicate = 0;
+
+ char **paths;
+ paths = new_array(char *, db->n_msgs);
+ for (i=0, n=0; i<db->n_msgs; i++) {
+ switch (db->type[i]) {
+ case MTY_DEAD:
+ case MTY_MBOX:
+ break;
+ case MTY_FILE:
+ paths[n++] = db->msgs[i].src.mpf.path;
+ break;
+ }
+ }
+
+ qsort(paths, n, sizeof(char *), compare_strings);
+
+ for (i=1; i<n; i++) {
+ if (!strcmp(paths[i-1], paths[i])) {
+ fprintf(stderr, "Path <%s> repeated\n", paths[i]);
+ has_duplicate = 1;
+ }
+ }
+
+ fflush(stderr);
+ assert(!has_duplicate);
+
+ free(paths);
+ return;
+}
+/*}}}*/
+void check_database_integrity(struct database *db)/*{{{*/
+{
+ if (verbose) fprintf(stderr, "Checking message path integrity\n");
+ check_message_path_integrity(db);
+
+ /* Just check encoding chains for now */
+ if (verbose) fprintf(stderr, "Checking to\n");
+ check_toktable_enc_integrity(db->n_msgs, db->to);
+ if (verbose) fprintf(stderr, "Checking cc\n");
+ check_toktable_enc_integrity(db->n_msgs, db->cc);
+ if (verbose) fprintf(stderr, "Checking from\n");
+ check_toktable_enc_integrity(db->n_msgs, db->from);
+ if (verbose) fprintf(stderr, "Checking subject\n");
+ check_toktable_enc_integrity(db->n_msgs, db->subject);
+ if (verbose) fprintf(stderr, "Checking body\n");
+ check_toktable_enc_integrity(db->n_msgs, db->body);
+ if (verbose) fprintf(stderr, "Checking attachment_name\n");
+ check_toktable_enc_integrity(db->n_msgs, db->attachment_name);
+}
+/*}}}*/
+struct database *new_database(unsigned int hash_key)/*{{{*/
+{
+ struct database *result = new(struct database);
+ struct timeval tv;
+ pid_t pid;
+
+ result->to = new_toktable();
+ result->cc = new_toktable();
+ result->from = new_toktable();
+ result->subject = new_toktable();
+ result->body = new_toktable();
+ result->attachment_name = new_toktable();
+
+ result->msg_ids = new_toktable2();
+
+ if ( hash_key == CREATE_RANDOM_DATABASE_HASH )
+ {
+ gettimeofday(&tv, NULL);
+ pid = getpid();
+ hash_key = tv.tv_sec ^ (pid ^ (tv.tv_usec << 15));
+ }
+ result->hash_key = hash_key;
+
+ result->msgs = NULL;
+ result->type = NULL;
+ result->n_msgs = 0;
+ result->max_msgs = 0;
+
+ result->mboxen = NULL;
+ result->n_mboxen = 0;
+ result->max_mboxen = 0;
+
+ return result;
+}
+/*}}}*/
+void free_database(struct database *db)/*{{{*/
+{
+ int i;
+
+ free_toktable(db->to);
+ free_toktable(db->cc);
+ free_toktable(db->from);
+ free_toktable(db->subject);
+ free_toktable(db->body);
+ free_toktable(db->attachment_name);
+ free_toktable2(db->msg_ids);
+
+ if (db->msgs) {
+ for (i=0; i<db->n_msgs; i++) {
+ switch (db->type[i]) {
+ case MTY_DEAD:
+ break;
+ case MTY_MBOX:
+ break;
+ case MTY_FILE:
+ assert(db->msgs[i].src.mpf.path);
+ free(db->msgs[i].src.mpf.path);
+ break;
+ }
+ }
+ free(db->msgs);
+ free(db->type);
+ }
+
+ free(db);
+}
+/*}}}*/
+
+static int get_max (int a, int b) {/*{{{*/
+ return (a > b) ? a : b;
+}
+/*}}}*/
+static void import_toktable(char *data, unsigned int hash_key, int n_msgs, struct toktable_db *in, struct toktable *out)/*{{{*/
+{
+ int n, size, i;
+
+ n = in->n;
+ size = 1;
+ while (size < n) size <<= 1;
+ size <<= 1; /* safe hash table size */
+
+ out->size = size;
+ out->mask = size - 1;
+ out->n = n;
+ out->tokens = new_array(struct token *, size);
+ memset(out->tokens, 0, size * sizeof(struct token *));
+ out->hwm = (n + size) >> 1;
+
+ for (i=0; i<n; i++) {
+ unsigned int hash, index;
+ char *text;
+ unsigned char *enc;
+ int enc_len;
+ struct token *nt;
+ int enc_hi;
+ int idx, incr;
+ unsigned char *j;
+
+ /* Recover enc_len and enc_hi from the data */
+ enc = (unsigned char *) data + in->enc_offsets[i];
+ idx = 0;
+ for (j = enc; *j != 0xff; ) {
+ incr = read_increment(&j);
+ idx += incr;
+ }
+ enc_len = j - enc;
+ enc_hi = idx;
+
+ text = data + in->tok_offsets[i];
+ hash = hashfn((unsigned char *) text, strlen(text), hash_key);
+
+ nt = new(struct token);
+ nt->hashval = hash;
+ nt->text = new_string(text);
+ /* Allow a bit of headroom for adding more entries later */
+ nt->match0.max = get_max(16, enc_len + (enc_len >> 1));
+ nt->match0.n = enc_len;
+ nt->match0.highest = enc_hi;
+ assert(nt->match0.highest < n_msgs);
+ nt->match0.msginfo = new_array(unsigned char, nt->match0.max);
+ memcpy(nt->match0.msginfo, enc, nt->match0.n);
+
+ index = hash & out->mask;
+ while (out->tokens[index]) {
+ /* Audit to look for corrupt database with multiple entries for the same
+ * string. */
+ if (!strcmp(nt->text, out->tokens[index]->text)) {
+ fprintf(stderr, "\n!!! Corrupt token table found in database, token <%s> duplicated, aborting\n",
+ nt->text);
+ fprintf(stderr, " Delete the database file and rebuild from scratch as a workaround\n");
+ /* No point going on - need to find out why the database got corrupted
+ * in the 1st place. Workaround for user - rebuild database from
+ * scratch by deleting it then rerunning. */
+ unlock_and_exit(1);
+ }
+ ++index;
+ index &= out->mask;
+ }
+
+ out->tokens[index] = nt;
+ }
+}
+/*}}}*/
+static void import_toktable2(char *data, unsigned int hash_key, int n_msgs, struct toktable2_db *in, struct toktable2 *out)/*{{{*/
+{
+ int n, size, i;
+
+ n = in->n;
+ size = 1;
+ while (size < n) size <<= 1;
+ size <<= 1; /* safe hash table size */
+
+ out->size = size;
+ out->mask = size - 1;
+ out->n = n;
+ out->tokens = new_array(struct token2 *, size);
+ memset(out->tokens, 0, size * sizeof(struct token *));
+ out->hwm = (n + size) >> 1;
+
+ for (i=0; i<n; i++) {
+ unsigned int hash, index;
+ char *text;
+ struct token2 *nt;
+ unsigned char *enc0, *enc1;
+ int enc0_len, enc1_len;
+ int enc0_hi, enc1_hi;
+ int idx, incr;
+ unsigned char *j;
+
+/*{{{ do enc0*/
+ enc0 = (unsigned char *) data + in->enc0_offsets[i];
+ idx = 0;
+ for (j = enc0; *j != 0xff; ) {
+ incr = read_increment(&j);
+ idx += incr;
+ }
+ enc0_len = j - enc0;
+ enc0_hi = idx;
+/*}}}*/
+/*{{{ do enc1*/
+ enc1 = (unsigned char *) data + in->enc1_offsets[i];
+ idx = 0;
+ for (j = enc1; *j != 0xff; ) {
+ incr = read_increment(&j);
+ idx += incr;
+ }
+ enc1_len = j - enc1;
+ enc1_hi = idx;
+/*}}}*/
+
+ text = data + in->tok_offsets[i];
+ hash = hashfn((unsigned char *) text, strlen(text), hash_key);
+
+ nt = new(struct token2);
+ nt->hashval = hash;
+ nt->text = new_string(text);
+ /* Allow a bit of headroom for adding more entries later */
+ /*{{{ set up match0 chain */
+ nt->match0.max = get_max(16, enc0_len + (enc0_len >> 1));
+ nt->match0.n = enc0_len;
+ nt->match0.highest = enc0_hi;
+ assert(nt->match0.highest < n_msgs);
+ nt->match0.msginfo = new_array(unsigned char, nt->match0.max);
+ memcpy(nt->match0.msginfo, enc0, nt->match0.n);
+ /*}}}*/
+ /*{{{ set up match1 chain */
+ nt->match1.max = get_max(16, enc1_len + (enc1_len >> 1));
+ nt->match1.n = enc1_len;
+ nt->match1.highest = enc1_hi;
+ assert(nt->match1.highest < n_msgs);
+ nt->match1.msginfo = new_array(unsigned char, nt->match1.max);
+ memcpy(nt->match1.msginfo, enc1, nt->match1.n);
+ /*}}}*/
+
+ index = hash & out->mask;
+ while (out->tokens[index]) {
+ ++index;
+ index &= out->mask;
+ }
+
+ out->tokens[index] = nt;
+ }
+}
+/*}}}*/
+struct database *new_database_from_file(char *db_filename, int do_integrity_checks)/*{{{*/
+{
+ /* Read existing database from file for doing incremental update */
+ struct database *result;
+ struct read_db *input;
+ int i, n, N;
+
+ result = new_database( CREATE_RANDOM_DATABASE_HASH );
+ input = open_db(db_filename);
+ if (!input) {
+ /* Nothing to initialise */
+ if (verbose) printf("Database file was empty, creating a new database\n");
+ return result;
+ }
+
+ /* Build pathname information */
+ n = result->n_msgs = input->n_msgs;
+ result->max_msgs = input->n_msgs; /* let it be extended as-and-when */
+ result->msgs = new_array(struct msgpath, n);
+ result->type = new_array(enum message_type, n);
+
+ result->hash_key = input->hash_key;
+
+ /* Set up mbox structures */
+ N = result->n_mboxen = result->max_mboxen = input->n_mboxen;
+ result->mboxen = N ? (new_array(struct mbox, N)) : NULL;
+ for (i=0; i<N; i++) {
+ int nn;
+ if (input->mbox_paths_table[i]) {
+ result->mboxen[i].path = new_string(input->data + input->mbox_paths_table[i]);
+ } else {
+ /* mbox is dead. */
+ result->mboxen[i].path = NULL;
+ }
+ result->mboxen[i].file_mtime = input->mbox_mtime_table[i];
+ result->mboxen[i].file_size = input->mbox_size_table[i];
+ nn = result->mboxen[i].n_msgs = input->mbox_entries_table[i];
+ result->mboxen[i].max_msgs = nn;
+ result->mboxen[i].start = new_array(off_t, nn);
+ result->mboxen[i].len = new_array(size_t, nn);
+ result->mboxen[i].check_all = new_array(checksum_t, nn);
+ /* Copy the entire checksum table in one go. */
+ memcpy(result->mboxen[i].check_all,
+ input->data + input->mbox_checksum_table[i],
+ nn * sizeof(checksum_t));
+ result->mboxen[i].n_so_far = 0;
+ }
+
+ for (i=0; i<n; i++) {
+ switch (rd_msg_type(input, i)) {
+ case DB_MSG_DEAD:
+ result->type[i] = MTY_DEAD;
+ break;
+ case DB_MSG_FILE:
+ result->type[i] = MTY_FILE;
+ result->msgs[i].src.mpf.path = new_string(input->data + input->path_offsets[i]);
+ result->msgs[i].src.mpf.mtime = input->mtime_table[i];
+ result->msgs[i].src.mpf.size = input->size_table[i];
+ break;
+ case DB_MSG_MBOX:
+ {
+ unsigned int mbi, msgi;
+ int n;
+ struct mbox *mb;
+ result->type[i] = MTY_MBOX;
+ decode_mbox_indices(input->path_offsets[i], &mbi, &msgi);
+ result->msgs[i].src.mbox.file_index = mbi;
+ mb = &result->mboxen[mbi];
+ assert(mb->n_so_far == msgi);
+ n = mb->n_so_far;
+ result->msgs[i].src.mbox.msg_index = n;
+ mb->start[n] = input->mtime_table[i];
+ mb->len[n] = input->size_table[i];
+ ++mb->n_so_far;
+ }
+
+ break;
+ }
+ result->msgs[i].seen = (input->msg_type_and_flags[i] & FLAG_SEEN) ? 1:0;
+ result->msgs[i].replied = (input->msg_type_and_flags[i] & FLAG_REPLIED) ? 1:0;
+ result->msgs[i].flagged = (input->msg_type_and_flags[i] & FLAG_FLAGGED) ? 1:0;
+ result->msgs[i].date = input->date_table[i];
+ result->msgs[i].tid = input->tid_table[i];
+ }
+
+ import_toktable(input->data, input->hash_key, result->n_msgs, &input->to, result->to);
+ import_toktable(input->data, input->hash_key, result->n_msgs, &input->cc, result->cc);
+ import_toktable(input->data, input->hash_key, result->n_msgs, &input->from, result->from);
+ import_toktable(input->data, input->hash_key, result->n_msgs, &input->subject, result->subject);
+ import_toktable(input->data, input->hash_key, result->n_msgs, &input->body, result->body);
+ import_toktable(input->data, input->hash_key, result->n_msgs, &input->attachment_name, result->attachment_name);
+ import_toktable2(input->data, input->hash_key, result->n_msgs, &input->msg_ids, result->msg_ids);
+
+ close_db(input);
+
+ if (do_integrity_checks) {
+ check_database_integrity(result);
+ }
+
+ return result;
+}
+/*}}}*/
+
+static void add_angled_terms(int file_index, unsigned int hash_key, struct toktable2 *table, int add_to_chain1, char *s)/*{{{*/
+{
+ char *left, *right;
+
+ if (s) {
+ left = strchr(s, '<');
+ while (left) {
+ right = strchr(left, '>');
+ if (right) {
+ *right = '\0';
+ add_token2_in_file(file_index, hash_key, left+1, table, add_to_chain1);
+ *right = '>'; /* restore */
+ } else {
+ break;
+ }
+ left = strchr(right, '<');
+ }
+ }
+}
+/*}}}*/
+
+/* Macro for what characters can make up token strings.
+
+ The following characters have special meanings:
+ 0x2b +
+ 0x2d -
+ 0x2e .
+ 0x40 @
+ 0x5f _
+
+ since they can occur within email addresses and message IDs when considered
+ as a whole rather than as individual words. Underscore (0x5f) is considered
+ a word-character always too.
+
+ */
+static unsigned char special_table[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00-0f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10-1f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 0, /* 20-2f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 30-3f */
+ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40-4f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, /* 50-5f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60-6f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70-7f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90-9f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a0-af */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* b0-bf */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* c0-cf */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d0-df */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* e0-ef */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* f0-ff */
+};
+
+#if 0
+#define CHAR_VALID(x,mask) (isalnum((unsigned char) x) || (special_table[(unsigned int)(unsigned char) x] & mask))
+#endif
+static inline int char_valid_p(char x, unsigned int mask)/*{{{*/
+{
+ unsigned char xx = (unsigned char) x;
+ if (isalnum(xx)) return 1;
+ else if (special_table[(unsigned int) xx] & mask) return 1;
+ else return 0;
+}
+/*}}}*/
+static void tokenise_string(int file_index, unsigned int hash_key, struct toktable *table, char *data, int match_mask)/*{{{*/
+{
+ char *ss, *es, old_es;
+ ss = data;
+ for (;;) {
+ while (*ss && !char_valid_p(*ss,match_mask)) ss++;
+ if (!*ss) break;
+ es = ss + 1;
+ while (*es && char_valid_p(*es,match_mask)) es++;
+
+ /* deal with token [ss,es) */
+ old_es = *es;
+ *es = '\0';
+ /* FIXME: Ought to do this by passing start and length - clean up later */
+ add_token_in_file(file_index, hash_key, ss, table);
+ *es = old_es;
+
+ if (!*es) break;
+ ss = es;
+ }
+}
+/*}}}*/
+static void tokenise_html_string(int file_index, unsigned int hash_key, struct toktable *table, char *data)/*{{{*/
+{
+ char *ss, *es, old_es;
+
+ /* FIXME : Probably want to rewrite this as an explicit FSM */
+
+ ss = data;
+ for (;;) {
+ /* Assume < and > are never valid token characters ! */
+ while (*ss && !char_valid_p(*ss, 1)) {
+ if (*ss++ == '<') {
+ /* Skip over HTML tag */
+ while (*ss && (*ss != '>')) ss++;
+ }
+ }
+ if (!*ss) break;
+
+ es = ss + 1;
+ while (*es && char_valid_p(*es, 1)) es++;
+
+ /* deal with token [ss,es) */
+ old_es = *es;
+ *es = '\0';
+ /* FIXME: Ought to do this by passing start and length - clean up later */
+ add_token_in_file(file_index, hash_key, ss, table);
+ *es = old_es;
+
+ if (!*es) break;
+ ss = es;
+ }
+}
+/*}}}*/
+void tokenise_message(int file_index, struct database *db, struct rfc822 *msg)/*{{{*/
+{
+ struct attachment *a;
+
+ /* Match on whole addresses in these headers as well as the individual words */
+ if (msg->hdrs.to) {
+ tokenise_string(file_index, db->hash_key, db->to, msg->hdrs.to, 1);
+ tokenise_string(file_index, db->hash_key, db->to, msg->hdrs.to, 2);
+ }
+ if (msg->hdrs.cc) {
+ tokenise_string(file_index, db->hash_key, db->cc, msg->hdrs.cc, 1);
+ tokenise_string(file_index, db->hash_key, db->cc, msg->hdrs.cc, 2);
+ }
+ if (msg->hdrs.from) {
+ tokenise_string(file_index, db->hash_key, db->from, msg->hdrs.from, 1);
+ tokenise_string(file_index, db->hash_key, db->from, msg->hdrs.from, 2);
+ }
+ if (msg->hdrs.subject) tokenise_string(file_index, db->hash_key, db->subject, msg->hdrs.subject, 1);
+
+ for (a=msg->atts.next; a!=&msg->atts; a=a->next) {
+ switch (a->ct) {
+ case CT_TEXT_PLAIN:
+ tokenise_string(file_index, db->hash_key, db->body, a->data.normal.bytes, 1);
+ break;
+ case CT_TEXT_HTML:
+ tokenise_html_string(file_index, db->hash_key, db->body, a->data.normal.bytes);
+ break;
+ case CT_MESSAGE_RFC822:
+ /* Just recurse for now - maybe we should have separate token tables
+ * for tokens occurring in embedded messages? */
+
+ if (a->data.rfc822) {
+ tokenise_message(file_index, db, a->data.rfc822);
+ }
+ break;
+ default:
+ /* Don't do anything - unknown text format or some nasty binary stuff.
+ * In future, we could have all kinds of 'plug-ins' here, e.g.
+ * something that can parse PDF to get the basic text strings out of
+ * the pages? */
+ break;
+ }
+
+ if (a->filename) {
+ add_token_in_file(file_index, db->hash_key, a->filename, db->attachment_name);
+ }
+
+ }
+
+ /* Deal with threading information */
+ add_angled_terms(file_index, db->hash_key, db->msg_ids, 1, msg->hdrs.message_id);
+ add_angled_terms(file_index, db->hash_key, db->msg_ids, 0, msg->hdrs.in_reply_to);
+ add_angled_terms(file_index, db->hash_key, db->msg_ids, 0, msg->hdrs.references);
+}
+/*}}}*/
+
+static void scan_maildir_flags(struct msgpath *m)/*{{{*/
+{
+ const char *p, *start;
+ start = m->src.mpf.path;
+ m->seen = 0;
+ m->replied = 0;
+ m->flagged = 0;
+ for (p=start; *p; p++) {}
+ for (p--; (p >= start) && ((*p) != ':'); p--) {}
+ if (p >= start) {
+ if (!strncmp(p, ":2,", 3)) {
+ p += 3;
+ while (*p) {
+ switch (*p) {
+ case 'F': m->flagged = 1; break;
+ case 'R': m->replied = 1; break;
+ case 'S': m->seen = 1; break;
+ default: break;
+ }
+ p++;
+ }
+ }
+ }
+}
+/*}}}*/
+static void scan_new_messages(struct database *db, int start_at)/*{{{*/
+{
+ int i;
+ for (i=start_at; i<db->n_msgs; i++) {
+ struct rfc822 *msg = NULL;
+ int len = strlen(db->msgs[i].src.mpf.path);
+
+ if (len > 10 && !strcmp(db->msgs[i].src.mpf.path + len - 11, "/.gitignore"))
+ continue;
+
+ switch (db->type[i]) {
+ case MTY_DEAD:
+ assert(0);
+ break;
+ case MTY_MBOX:
+ assert(0); /* Should never get here - mbox messages are scanned elsewhere. */
+ break;
+ case MTY_FILE:
+ if (verbose) fprintf(stderr, "Scanning <%s>\n", db->msgs[i].src.mpf.path);
+ msg = make_rfc822(db->msgs[i].src.mpf.path);
+ break;
+ }
+ if(msg)
+ {
+ db->msgs[i].date = msg->hdrs.date;
+ scan_maildir_flags(&db->msgs[i]);
+ tokenise_message(i, db, msg);
+ free_rfc822(msg);
+ }
+ else
+ fprintf(stderr, "Skipping %s (could not parse message)\n", db->msgs[i].src.mpf.path);
+ }
+}
+/*}}}*/
+
+static inline void set_bit(unsigned long *x, int n)/*{{{*/
+{
+ int set;
+ unsigned long mask;
+ set = (n >> 5);
+ mask = (1UL << (n & 31));
+ x[set] |= mask;
+}
+/*}}}*/
+static inline int isset_bit(unsigned long *x, int n)/*{{{*/
+{
+ int set;
+ unsigned long mask;
+ set = (n >> 5);
+ mask = (1UL << (n & 31));
+ return (x[set] & mask) ? 1 : 0;
+}
+/*}}}*/
+static int find_base(int *table, int index) {/*{{{*/
+ int a = index;
+
+ /* TODO : make this compress the path lengths down to the base entry */
+ while (table[a] != a) {
+ a = table[a];
+ }
+ return a;
+}
+/*}}}*/
+static void find_threading(struct database *db)/*{{{*/
+{
+
+ /* ix is a table mapping path array index to the lowest path array index that
+ * is known to share at least one message ID in its hdrs somewhere (i.e. they
+ * must be in the same thread) */
+ int *ix;
+
+ int i, m, np, nm, sm;
+ int next_tid;
+
+ np = db->n_msgs;
+ nm = db->msg_ids->n;
+ sm = db->msg_ids->size;
+
+ ix = new_array(int, np);
+ for (i=0; i<np; i++) {
+ ix[i] = i; /* default - every message in a thread of its own */
+ }
+
+ for (m=0; m<sm; m++) {
+ struct token2 *tok = db->msg_ids->tokens[m];
+ if (tok) {
+ unsigned char *j = tok->match0.msginfo;
+ unsigned char *last_char = j + tok->match0.n;
+ int cur = 0, incr, first=1;
+ int new_base=-1, old_base;
+ while (j < last_char) {
+ incr = read_increment(&j);
+ cur += incr;
+ if (first) {
+ new_base = find_base(ix, cur);
+ first = 0;
+ } else {
+ old_base = find_base(ix, cur);
+ if (old_base < new_base) {
+ ix[new_base] = old_base;
+ new_base = old_base;
+ } else if (old_base > new_base) {
+ assert(new_base != -1);
+ ix[old_base] = new_base;
+ }
+ }
+ }
+ }
+ }
+
+ /* Now make each entry point directly to its base */
+ for (i=0; i<np; i++) {
+ if (ix[i] != i) {
+ /* Sure to work as we're going up from the bottom */
+ ix[i] = ix[ix[i]];
+ }
+ }
+
+ /* Now allocate contiguous thread group numbers */
+ next_tid = 0;
+ for (i=0; i<np; i++) {
+ if (ix[i] == i) {
+ db->msgs[i].tid = next_tid++;
+ } else {
+ db->msgs[i].tid = db->msgs[ix[i]].tid;
+ }
+ }
+
+ free(ix);
+ return;
+}
+/*}}}*/
+static int lookup_msgpath(struct msgpath *sorted_paths, int n_msgs, char *key)/*{{{*/
+{
+ /* Implement bisection search */
+ int l, h, m, r;
+ l = 0, h = n_msgs;
+ m = -1;
+ while (h > l) {
+ m = (h + l) >> 1;
+ /* Should only get called on 'file' type messages - TBC */
+ r = strcmp(sorted_paths[m].src.mpf.path, key);
+ if (r == 0) break;
+ if (l == m) return -1;
+ if (r > 0) h = m;
+ else l = m;
+ }
+ return m;
+}
+/*}}}*/
+void maybe_grow_message_arrays(struct database *db)/*{{{*/
+{
+ if (db->n_msgs == db->max_msgs) {
+ if (db->max_msgs <= 128) {
+ db->max_msgs = 256;
+ } else {
+ db->max_msgs += (db->max_msgs >> 1);
+ }
+ db->msgs = grow_array(struct msgpath, db->max_msgs, db->msgs);
+ db->type = grow_array(enum message_type, db->max_msgs, db->type);
+ }
+}
+/*}}}*/
+static void add_msg_path(struct database *db, char *path, time_t mtime, size_t message_size)/*{{{*/
+{
+ maybe_grow_message_arrays(db);
+ db->type[db->n_msgs] = MTY_FILE;
+ db->msgs[db->n_msgs].src.mpf.path = new_string(path);
+ db->msgs[db->n_msgs].src.mpf.mtime = mtime;
+ db->msgs[db->n_msgs].src.mpf.size = message_size;
+ ++db->n_msgs;
+}
+/*}}}*/
+
+static int do_stat(struct msgpath *mp)/*{{{*/
+{
+ struct stat sb;
+ int status;
+ status = stat(mp->src.mpf.path, &sb);
+ if ((status < 0) ||
+ !S_ISREG(sb.st_mode)) {
+ return 0;
+ } else {
+ mp->src.mpf.mtime = sb.st_mtime;
+ mp->src.mpf.size = sb.st_size;
+ return 1;
+ }
+}
+/*}}}*/
+int update_database(struct database *db, struct msgpath *sorted_paths, int n_msgs, int do_fast_index)/*{{{*/
+{
+ /* The incoming list must be sorted into order, to make binary searching
+ * possible. We search for each existing path in the incoming sorted array.
+ * If the date differs, or the file no longer exist, the existing database
+ * entry for that file is nulled. (These are only recovered if the database
+ * is actively compressed.) If the date differed, a new entry for the file
+ * is put at the end of the list. Similarly, any new file goes at the end.
+ * These new entries are all rescanned to find tokens and add them to the
+ * database. */
+
+ char *file_in_db, *file_in_new_list;
+ int matched_index;
+ int i, new_entries_start_at;
+ int any_new, n_newly_pruned, n_already_dead;
+ int status;
+
+ file_in_db = new_array(char, n_msgs);
+ file_in_new_list = new_array(char, db->n_msgs);
+ bzero(file_in_db, n_msgs);
+ bzero(file_in_new_list, db->n_msgs);
+
+ n_already_dead = 0;
+ n_newly_pruned = 0;
+
+ for (i=0; i<db->n_msgs; i++) {
+ switch (db->type[i]) {
+ case MTY_FILE:
+ matched_index = lookup_msgpath(sorted_paths, n_msgs, db->msgs[i].src.mpf.path);
+ if (matched_index >= 0) {
+ if (do_fast_index) {
+ /* Assume the presence of a matching path is good enough without
+ * even bothering to stat the file that's there now. */
+ file_in_db[matched_index] = 1;
+ file_in_new_list[i] = 1;
+ } else {
+ status = do_stat(sorted_paths + matched_index);
+ if (status) {
+ if (sorted_paths[matched_index].src.mpf.mtime == db->msgs[i].src.mpf.mtime) {
+ /* Treat stale files as though the path has changed. */
+ file_in_db[matched_index] = 1;
+ file_in_new_list[i] = 1;
+ }
+ } else {
+ /* This path will get treated as dead, and be re-stated below.
+ * When that stat fails, the path won't get added to the db. */
+ }
+ }
+ }
+ break;
+ case MTY_MBOX:
+ /* Nothing to do on this pass. */
+ break;
+ case MTY_DEAD:
+ break;
+ }
+ }
+
+ /* Add new entries to database */
+ new_entries_start_at = db->n_msgs;
+
+ for (i=0; i<db->n_msgs; i++) {
+ /* Weed dead entries */
+ switch (db->type[i]) {
+ case MTY_FILE:
+ if (!file_in_new_list[i]) {
+ free(db->msgs[i].src.mpf.path);
+ db->msgs[i].src.mpf.path = NULL;
+ db->type[i] = MTY_DEAD;
+ ++n_newly_pruned;
+ }
+ break;
+ case MTY_MBOX:
+ {
+ int msg_index, file_index, number_valid;
+ int mbox_valid;
+ msg_index = db->msgs[i].src.mbox.msg_index;
+ file_index = db->msgs[i].src.mbox.file_index;
+ assert (file_index < db->n_mboxen);
+ mbox_valid = (db->mboxen[file_index].path) ? 1 : 0;
+ number_valid = db->mboxen[file_index].n_old_msgs_valid;
+ if (!mbox_valid || (msg_index >= number_valid)) {
+ db->type[i] = MTY_DEAD;
+ ++n_newly_pruned;
+ }
+ }
+ break;
+ case MTY_DEAD:
+ /* already dead */
+ ++n_already_dead;
+ break;
+ }
+ }
+
+ if (verbose) {
+ fprintf(stderr, "%d newly dead messages, %d messages now dead in total\n", n_newly_pruned, n_newly_pruned+n_already_dead);
+ }
+
+ any_new = 0;
+ for (i=0; i<n_msgs; i++) {
+ if (!file_in_db[i]) {
+ int status;
+ any_new = 1;
+ /* The 'sorted_paths' array is only used for file-per-message folders. */
+ status = do_stat(sorted_paths + i);
+ if (status) {
+ /* We only add files that could be successfully stat()'d as regular
+ * files. */
+ add_msg_path(db, sorted_paths[i].src.mpf.path, sorted_paths[i].src.mpf.mtime, sorted_paths[i].src.mpf.size);
+ } else {
+ fprintf(stderr, "Cannot add '%s' to database; stat() failed\n", sorted_paths[i].src.mpf.path);
+ }
+ }
+ }
+
+ if (any_new) {
+ scan_new_messages(db, new_entries_start_at);
+ }
+
+ /* Add newly found mbox messages. */
+ any_new |= add_mbox_messages(db);
+
+ if (any_new) {
+ find_threading(db);
+ } else {
+ if (verbose) fprintf(stderr, "No new messages found\n");
+ }
+
+ free(file_in_db);
+ free(file_in_new_list);
+
+ return any_new || (n_newly_pruned > 0);
+}
+/*}}}*/
+static void recode_encoding(struct matches *m, int *new_idx)/*{{{*/
+{
+ unsigned char *new_enc, *old_enc;
+ unsigned char *j, *last_char;
+ int incr, idx, n_idx;
+
+ old_enc = m->msginfo;
+ j = old_enc;
+ last_char = old_enc + m->n;
+
+ new_enc = new_array(unsigned char, m->max); /* Probably not bigger than this. */
+ m->n = 0;
+ m->highest = 0;
+ m->msginfo = new_enc;
+ idx = 0;
+
+ while (j < last_char) {
+ incr = read_increment(&j);
+ idx += incr;
+ n_idx = new_idx[idx];
+ if (n_idx >= 0) {
+ check_and_enlarge_encoding(m);
+ insert_index_on_encoding(m, n_idx);
+ }
+ }
+ free(old_enc);
+}
+/*}}}*/
+static void recode_toktable(struct toktable *tbl, int *new_idx)/*{{{*/
+{
+ /* Re-encode the vectors according to the new path indices */
+ int i;
+ int any_dead = 0;
+ int any_moved, pass;
+
+ for (i=0; i<tbl->size; i++) {
+ struct token *tok = tbl->tokens[i];
+ if (tok) {
+ recode_encoding(&tok->match0, new_idx);
+ if (tok->match0.n == 0) {
+ /* Delete this token. Gotcha - there may be tokens further on in the
+ * array that didn't get their natural hash bucket due to collisions.
+ * Need to shuffle such tokens up to guarantee that the buckets between
+ * the natural one and the one where they are now are all occupied, to
+ * prevent their lookups failing. */
+
+#if 0
+ fprintf(stderr, "Token <%s> (bucket %d) no longer has files containing it, deleting\n", tok->text, i);
+#endif
+ free_token(tok);
+ tbl->tokens[i] = NULL;
+ --tbl->n; /* Maintain number in use counter */
+ any_dead = 1;
+ }
+
+ }
+ }
+
+
+ if (any_dead) {
+ /* Now close gaps. This has to be done in a second pass, otherwise we get a
+ * problem with moving entries that need deleting back before the current
+ scan point. */
+
+ pass = 1;
+ for (;;) {
+ int i;
+
+ if (verbose) {
+ fprintf(stderr, "Pass %d\n", pass);
+ }
+
+ any_moved = 0;
+
+ for (i=0; i<tbl->size; i++) {
+ if (tbl->tokens[i]) {
+ int nat_bucket_i;
+ nat_bucket_i = tbl->tokens[i]->hashval & tbl->mask;
+ if (nat_bucket_i != i) {
+ /* Find earliest bucket that we could move i to */
+ int j = nat_bucket_i;
+ while (j != i) {
+ if (!tbl->tokens[j]) {
+ /* put it here */
+#if 0
+ fprintf(stderr, "Moved <%s> from bucket %d to %d (natural bucket %d)\n", tbl->tokens[i]->text, i, j, nat_bucket_i);
+#endif
+ tbl->tokens[j] = tbl->tokens[i];
+ tbl->tokens[i] = NULL;
+ any_moved = 1;
+ break;
+ } else {
+ j++;
+ j &= tbl->mask;
+ }
+ }
+ if (tbl->tokens[i]) {
+#if 0
+ fprintf(stderr, "NOT moved <%s> from bucket %d (natural bucket %d)\n", tbl->tokens[i]->text, i, nat_bucket_i);
+#endif
+ }
+ }
+ }
+ }
+
+ if (!any_moved) break;
+ pass++;
+ }
+ }
+}
+/*}}}*/
+static void recode_toktable2(struct toktable2 *tbl, int *new_idx)/*{{{*/
+{
+ /* Re-encode the vectors according to the new path indices */
+ int i;
+ int any_dead = 0;
+ int any_moved, pass;
+
+ for (i=0; i<tbl->size; i++) {
+ struct token2 *tok = tbl->tokens[i];
+ if (tok) {
+ recode_encoding(&tok->match0, new_idx);
+ recode_encoding(&tok->match1, new_idx);
+ if ((tok->match0.n == 0) && (tok->match1.n == 0)) {
+ /* Delete this token. Gotcha - there may be tokens further on in the
+ * array that didn't get their natural hash bucket due to collisions.
+ * Need to shuffle such tokens up to guarantee that the buckets between
+ * the natural one and the one where they are now are all occupied, to
+ * prevent their lookups failing. */
+
+#if 0
+ fprintf(stderr, "Token <%s> (bucket %d) no longer has files containing it, deleting\n", tok->text, i);
+#endif
+ free_token2(tok);
+ tbl->tokens[i] = NULL;
+ --tbl->n; /* Maintain number in use counter */
+ any_dead = 1;
+ }
+ }
+ }
+
+ if (any_dead) {
+ /* Now close gaps. This has to be done in a second pass, otherwise we get a
+ * problem with moving entries that need deleting back before the current
+ scan point. */
+
+ pass = 1;
+ for (;;) {
+ int i;
+
+ if (verbose) {
+ fprintf(stderr, "Pass %d\n", pass);
+ }
+
+ any_moved = 0;
+
+ for (i=0; i<tbl->size; i++) {
+ if (tbl->tokens[i]) {
+ int nat_bucket_i;
+ nat_bucket_i = tbl->tokens[i]->hashval & tbl->mask;
+ if (nat_bucket_i != i) {
+ /* Find earliest bucket that we could move i to */
+ int j = nat_bucket_i;
+ while (j != i) {
+ if (!tbl->tokens[j]) {
+ /* put it here */
+#if 0
+ fprintf(stderr, "Moved <%s> from bucket %d to %d (natural bucket %d)\n", tbl->tokens[i]->text, i, j, nat_bucket_i);
+#endif
+ tbl->tokens[j] = tbl->tokens[i];
+ tbl->tokens[i] = NULL;
+ any_moved = 1;
+ break;
+ } else {
+ j++;
+ j &= tbl->mask;
+ }
+ }
+ if (tbl->tokens[i]) {
+#if 0
+ fprintf(stderr, "NOT moved <%s> from bucket %d (natural bucket %d)\n", tbl->tokens[i]->text, i, nat_bucket_i);
+#endif
+ }
+ }
+ }
+ }
+
+ if (!any_moved) break;
+ pass++;
+ }
+ }
+}
+/*}}}*/
+int cull_dead_messages(struct database *db, int do_integrity_checks)/*{{{*/
+{
+ /* Return true if any culled */
+
+ int *new_idx, i, j, n_old;
+ int any_culled = 0;
+
+ /* Check db is OK before we start on this. (Check afterwards is done in the
+ * writer.c code.) */
+ if (do_integrity_checks) {
+ check_database_integrity(db);
+ }
+
+ if (verbose) {
+ fprintf(stderr, "Culling dead messages\n");
+ }
+
+ n_old = db->n_msgs;
+
+ new_idx = new_array(int, n_old);
+ for (i=0, j=0; i<n_old; i++) {
+ switch (db->type[i]) {
+ case MTY_FILE:
+ case MTY_MBOX:
+ new_idx[i] = j++;
+ break;
+ case MTY_DEAD:
+ new_idx[i] = -1;
+ any_culled = 1;
+ break;
+ }
+ }
+
+ recode_toktable(db->to, new_idx);
+ recode_toktable(db->cc, new_idx);
+ recode_toktable(db->from, new_idx);
+ recode_toktable(db->subject, new_idx);
+ recode_toktable(db->body, new_idx);
+ recode_toktable(db->attachment_name, new_idx);
+ recode_toktable2(db->msg_ids, new_idx);
+
+ /* And crunch down the filename table */
+ for (i=0, j=0; i<n_old; i++) {
+ switch (db->type[i]) {
+ case MTY_DEAD:
+ break;
+ case MTY_FILE:
+ case MTY_MBOX:
+ if (i > j) {
+ db->msgs[j] = db->msgs[i];
+ db->type[j] = db->type[i];
+ }
+ j++;
+ break;
+ }
+ }
+ db->n_msgs = j;
+
+ free(new_idx);
+
+ /* .. and cull dead mboxen */
+ cull_dead_mboxen(db);
+
+ return any_culled;
+}
+/*}}}*/
diff --git a/src/mairix/dfasyn/COPYING b/src/mairix/dfasyn/COPYING
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/src/mairix/dfasyn/INSTALL b/src/mairix/dfasyn/INSTALL
@@ -0,0 +1,19 @@
+There is no real configure mechanism (yet).
+
+To build the program
+
+ make
+
+To install the program (perhaps as root)
+
+ make prefix=/usr/local install
+
+or as yourself you might do
+
+ make prefix=$HOME install
+
+or if your distribution puts manpages in /usr/share/man, you might do
+
+ make prefix=/usr/local mandir=/usr/share/man install
+
+# vim:et:sw=4
diff --git a/src/mairix/dfasyn/Makefile b/src/mairix/dfasyn/Makefile
@@ -0,0 +1,62 @@
+# Makefile for NFA->DFA conversion utility
+#
+# Copyright (C) Richard P. Curnow 2000-2001,2003,2005,2006,2007
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+CC=gcc
+#CFLAGS=-g -Wall
+#CFLAGS=-O2 -pg
+CFLAGS=-Wall
+prefix?=/usr/local
+bindir=$(prefix)/bin
+mandir?=$(prefix)/man
+man1dir=$(mandir)/man1
+man5dir=$(mandir)/man5
+
+OBJ = dfasyn.o parse.o scan.o \
+ tokens.o abbrevs.o charclass.o \
+ stimulus.o \
+ blocks.o states.o \
+ n2d.o expr.o evaluator.o \
+ tabcompr.o compdfa.o
+
+all : dfasyn
+
+install : all
+ [ -d $(bindir) ] || mkdir -p $(bindir)
+ [ -d $(man1dir) ] || mkdir -p $(man1dir)
+ [ -d $(man5dir) ] || mkdir -p $(man5dir)
+ cp dfasyn $(bindir)
+ cp dfasyn.1 $(man1dir)
+ cp dfasyn.5 $(man5dir)
+
+dfasyn : $(OBJ)
+ $(CC) $(CFLAGS) -o dfasyn $(OBJ)
+
+parse.c parse.h : parse.y
+ bison -v -d -o parse.c parse.y
+
+parse.o : parse.c dfasyn.h
+
+scan.c : scan.l
+ flex -t -s scan.l > scan.c
+
+scan.o : scan.c parse.h dfasyn.h
+
+$(OBJ) : dfasyn.h
+
+clean:
+ rm -f dfasyn *.o scan.c parse.c parse.h parse.output
+
diff --git a/src/mairix/dfasyn/NEWS b/src/mairix/dfasyn/NEWS
@@ -0,0 +1,5 @@
+New in version 0.2
+==================
+
+* Added README and NEWS files
+
diff --git a/src/mairix/dfasyn/README b/src/mairix/dfasyn/README
@@ -0,0 +1,8 @@
+dfasyn is a tool for constructing state machines. The input language allows a
+lot of generality. For example, it allows repeated elements to be specified
+where the items have constraints between the end of one and the start of the
+next. (I could not find a way to define such an automaton in the lex/flex
+input language, which prompted the writing of the tool.) Currently, you must
+do a fair amount of work yourself to build a parser around the resulting state
+machine.
+
diff --git a/src/mairix/dfasyn/abbrevs.c b/src/mairix/dfasyn/abbrevs.c
@@ -0,0 +1,67 @@
+/***************************************
+ Handle state-related stuff
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2000-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "dfasyn.h"
+
+static struct Abbrev *abbrevtable=NULL;
+static int nabbrevs = 0;
+static int maxabbrevs = 0;
+
+static void grow_abbrevs(void)/*{{{*/
+{
+ maxabbrevs += 32;
+ abbrevtable = resize_array(struct Abbrev, abbrevtable, maxabbrevs);
+}
+/*}}}*/
+struct Abbrev * create_abbrev(const char *name, struct StimulusList *stimuli)/*{{{*/
+{
+ struct Abbrev *result;
+ if (nabbrevs == maxabbrevs) {
+ grow_abbrevs();
+ }
+ result = abbrevtable + (nabbrevs++);
+ result->lhs = new_string(name);
+ result->stimuli = stimuli;
+ return result;
+}
+/*}}}*/
+struct Abbrev * lookup_abbrev(char *name)/*{{{*/
+{
+ int found = -1;
+ int i;
+ struct Abbrev *result = NULL;
+ /* Scan table in reverse order. If a name has been redefined,
+ make sure the most recent definition is picked up. */
+ for (i=nabbrevs-1; i>=0; i--) {
+ if (!strcmp(abbrevtable[i].lhs, name)) {
+ found = i;
+ result = abbrevtable + found;
+ break;
+ }
+ }
+
+ return result;
+}
+/*}}}*/
+
diff --git a/src/mairix/dfasyn/blocks.c b/src/mairix/dfasyn/blocks.c
@@ -0,0 +1,168 @@
+/***************************************
+ Handle blocks
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2000-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "dfasyn.h"
+
+
+static Block **blocks = NULL;
+static int nblocks = 0;
+static int maxblocks = 0;
+
+/* ================================================================= */
+
+static void grow_blocks(void)/*{{{*/
+{
+ maxblocks += 32;
+ blocks = resize_array(Block*, blocks, maxblocks);
+}
+/*}}}*/
+static Block * create_block(char *name)/*{{{*/
+{
+ Block *result;
+ int i;
+
+ if (nblocks == maxblocks) {
+ grow_blocks();
+ }
+
+#if 0
+ /* Not especially useful to show this */
+ if (verbose) {
+ fprintf(stderr, " %s", name);
+ }
+#endif
+
+ result = blocks[nblocks++] = new(Block);
+ result->name = new_string(name);
+ for (i=0; i<HASH_BUCKETS; i++) {
+ result->state_hash[i].states = NULL;
+ result->state_hash[i].nstates = 0;
+ result->state_hash[i].maxstates = 0;
+ }
+ result->states = NULL;
+ result->nstates = result->maxstates = 0;
+ result->eclo = NULL;
+
+ result->subcount = 1;
+ result->subblockcount = 1;
+ return result;
+}
+/*}}}*/
+Block * lookup_block(char *name, int create)/*{{{*/
+{
+ Block *found = NULL;
+ int i;
+ for (i=0; i<nblocks; i++) {
+ if (!strcmp(blocks[i]->name, name)) {
+ found = blocks[i];
+ break;
+ }
+ }
+
+ switch (create) {
+ case USE_OLD_MUST_EXIST:
+ if (!found) {
+ fprintf(stderr, "Could not find block '%s' to instantiate\n", name);
+ exit(1);
+ }
+ break;
+ case CREATE_MUST_NOT_EXIST:
+ if (found) {
+ fprintf(stderr, "Already have a block called '%s', cannot redefine\n", name);
+ exit(1);
+ } else {
+ found = create_block(name);
+ }
+ break;
+ case CREATE_OR_USE_OLD:
+ if (!found) {
+ found = create_block(name);
+ }
+ break;
+ }
+
+ return found;
+}
+/*}}}*/
+/* ================================================================= */
+void instantiate_block(Block *curblock, char *block_name, char *instance_name)/*{{{*/
+{
+ Block *master = lookup_block(block_name, USE_OLD_MUST_EXIST);
+ char namebuf[1024];
+ int i;
+ for (i=0; i<master->nstates; i++) {
+ State *s = master->states[i];
+ State *new_state;
+ TransList *tl;
+ Stringlist *sl, *ex;
+
+ strcpy(namebuf, instance_name);
+ strcat(namebuf, ".");
+ strcat(namebuf, s->name);
+
+ /* In perverse circumstances, we might already have a state called this */
+ new_state = lookup_state(curblock, namebuf, CREATE_OR_USE_OLD);
+
+ for (tl=s->transitions; tl; tl=tl->next) {
+ TransList *new_tl = new(TransList);
+ new_tl->type = tl->type;
+ /* Might cause some dangling ref problem later... */
+ new_tl->x = tl->x;
+ strcpy(namebuf, instance_name);
+ strcat(namebuf, ".");
+ strcat(namebuf, tl->ds_name);
+ new_tl->ds_name = new_string(namebuf);
+ new_tl->ds_ref = NULL;
+ new_tl->next = new_state->transitions;
+ new_state->transitions = new_tl;
+ }
+
+ /*{{{ Copy state tags */
+ ex = NULL;
+ for (sl=s->tags; sl; sl=sl->next) {
+ Stringlist *new_sl = new(Stringlist);
+ new_sl->string = sl->string;
+ new_sl->next = ex;
+ ex = new_sl;
+ }
+ new_state->tags = ex;
+ /*}}}*/
+
+ /* **DON'T** COPY ENTRIES : these are deliberately dropped if they occur
+ * in a block that gets instantiated elsewhere. */
+
+ }
+}
+/*}}}*/
+/* ================================================================= */
+InlineBlock *create_inline_block(char *type, char *in, char *out)/*{{{*/
+{
+ InlineBlock *result;
+ result = new(InlineBlock);
+ result->type = new_string(type);
+ result->in = new_string(in);
+ result->out = new_string(out);
+ return result;
+}
+/*}}}*/
diff --git a/src/mairix/dfasyn/charclass.c b/src/mairix/dfasyn/charclass.c
@@ -0,0 +1,364 @@
+/***************************************
+ Handle character classes
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2001-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "dfasyn.h"
+#include <ctype.h>
+
+struct cc_list {
+ struct cc_list *next;
+ CharClass *cc;
+};
+
+static struct cc_list *cc_list = NULL;
+static short mapping[256];
+
+int n_charclasses;
+static char *strings[256];
+
+static void set_bit(unsigned long *bitmap, int entry)/*{{{*/
+{
+ int i, j, mask;
+ i = (entry >> 5);
+ j = entry & 31;
+ mask = 1<<j;
+ bitmap[i] |= mask;
+}
+/*}}}*/
+static void clear_bit(unsigned long *bitmap, int entry)/*{{{*/
+{
+ int i, j, mask;
+ i = (entry >> 5);
+ j = entry & 31;
+ mask = 1<<j;
+ bitmap[i] &= ~mask;
+}
+/*}}}*/
+int cc_test_bit(const unsigned long *bitmap, int entry)/*{{{*/
+{
+ int i, j, mask;
+ i = (entry >> 5);
+ j = entry & 31;
+ mask = 1<<j;
+ return (bitmap[i] & mask) ? 1 : 0;
+}
+/*}}}*/
+CharClass *new_charclass(void)/*{{{*/
+{
+ CharClass *result = new(CharClass);
+ result->is_used = 0;
+ memset(result->char_bitmap, 0, sizeof(result->char_bitmap));
+ memset(result->group_bitmap, 0, sizeof(result->group_bitmap));
+ return result;
+}
+/*}}}*/
+void free_charclass(CharClass *what)/*{{{*/
+{
+ free(what);
+}
+/*}}}*/
+void add_charclass_to_list(CharClass *cc)/*{{{*/
+{
+ /* Add the cc to the master list for later processing. */
+ struct cc_list *elt = new(struct cc_list);
+ elt->next = cc_list;
+ elt->cc = cc;
+ cc_list = elt;
+}
+/*}}}*/
+void add_singleton_to_charclass(CharClass *towhat, char thechar)/*{{{*/
+{
+ int x;
+ x = (int)(unsigned char) thechar;
+ set_bit(towhat->char_bitmap, x);
+}
+/*}}}*/
+void add_range_to_charclass(CharClass *towhat, char start, char end)/*{{{*/
+{
+ int sx, ex, t;
+ sx = (int)(unsigned char) start;
+ ex = (int)(unsigned char) end;
+ if (sx > ex) {
+ t = sx, sx = ex, ex = t;
+ }
+ for (t=sx; t<=ex; t++) {
+ set_bit(towhat->char_bitmap, t);
+ }
+}
+/*}}}*/
+void invert_charclass(CharClass *what)/*{{{*/
+{
+ int i;
+ for (i=0; i<ULONGS_PER_CC; i++) {
+ what->char_bitmap[i] ^= 0xffffffffUL;
+ }
+}
+/*}}}*/
+void diff_charclasses(CharClass *left, CharClass *right)/*{{{*/
+{
+ /* Compute set difference */
+ int i;
+ for (i=0; i<ULONGS_PER_CC; i++) {
+ left->char_bitmap[i] &= ~(right->char_bitmap[i]);
+ }
+}
+/*}}}*/
+
+static char *emit_char (char *p, int i)/*{{{*/
+{
+ if (i == '\\') {
+ *p++ = '\\';
+ *p++ = '\\';
+ } else if (isprint(i) && (i != '-')) {
+ *p++ = i;
+ } else if (i == '\n') {
+ *p++ = '\\';
+ *p++ = 'n';
+ } else if (i == '\r') {
+ *p++ = '\\';
+ *p++ = 'r';
+ } else if (i == '\f') {
+ *p++ = '\\';
+ *p++ = 'f';
+ } else if (i == '\t') {
+ *p++ = '\\';
+ *p++ = 't';
+ } else {
+ p += sprintf(p, "\\%03o", i);
+ }
+ return p;
+}
+/*}}}*/
+static void generate_string(int idx, const unsigned long *x)/*{{{*/
+{
+ int i, j;
+ char buffer[4096];
+ char *p;
+
+ p = buffer;
+ *p++ = '[';
+ /* Force '-' to be shown at the start. */
+ i = 0;
+ do {
+ while ((i < 256) && !cc_test_bit(x,i)) i++;
+ if (i>=256) break;
+
+ j = i + 1;
+ while ((j < 256) && cc_test_bit(x,j)) j++;
+ j--;
+
+ p = emit_char(p, i);
+ if (j == (i + 1)) {
+ p = emit_char(p, j);
+ } else if (j > (i + 1)) {
+ *p++ = '-';
+ p = emit_char(p, j);
+ }
+
+ i = j + 1;
+ } while (i < 256);
+ *p++ = ']';
+ *p = 0;
+ strings[idx] = new_string(buffer);
+ return;
+}
+/*}}}*/
+static void combine(unsigned long *into, const unsigned long *with)/*{{{*/
+{
+ int i;
+ for (i=0; i<ULONGS_PER_CC; i++) into[i] |= with[i];
+}
+/*}}}*/
+static void set_all(unsigned long *x)/*{{{*/
+{
+ int i;
+ for (i=0; i<ULONGS_PER_CC; i++) x[i] = 0xffffffffUL;
+}
+/*}}}*/
+static void clear_all(unsigned long *x)/*{{{*/
+{
+ int i;
+ for (i=0; i<ULONGS_PER_CC; i++) x[i] = 0x0UL;
+}
+/*}}}*/
+static int find_lowest_bit_set(const unsigned long *x)/*{{{*/
+{
+ int i;
+ for (i=0; i<ULONGS_PER_CC; i++) {
+ if (x[i]) {
+ int pos = 0;
+ unsigned long val = x[i];
+ if (!(val & 0xffff)) pos += 16, val >>= 16;
+ if (!(val & 0x00ff)) pos += 8, val >>= 8;
+ if (!(val & 0x000f)) pos += 4, val >>= 4;
+ if (!(val & 0x0003)) pos += 2, val >>= 2;
+ if (!(val & 0x0001)) pos += 1;
+ return (i << 5) + pos;
+ }
+ }
+ return -1;
+}
+/*}}}*/
+
+static void mark_used_in_block(const Block *b)/*{{{*/
+{
+ int i;
+
+ for (i=0; i<b->nstates; i++) {
+ const State *s = b->states[i];
+ const TransList *tl;
+ for (tl=s->transitions; tl; tl=tl->next) {
+ switch (tl->type) {
+ case TT_CHARCLASS:
+ tl->x.char_class->is_used = 1;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+}
+/*}}}*/
+static void reduce_list(void)/*{{{*/
+{
+ struct cc_list *ccl, *next_ccl;
+ ccl = cc_list;
+ cc_list = NULL;
+ while (ccl) {
+ next_ccl = ccl->next;
+ if (ccl->cc->is_used) {
+ ccl->next = cc_list;
+ cc_list = ccl;
+ } else {
+ free(ccl->cc);
+ free(ccl);
+ }
+ ccl = next_ccl;
+ }
+}
+/*}}}*/
+void split_charclasses(const Block *b)/*{{{*/
+{
+ unsigned long cc_union[ULONGS_PER_CC];
+ struct cc_list *elt;
+ int i;
+ int any_left;
+
+ mark_used_in_block(b);
+ reduce_list();
+
+ n_charclasses = 0;
+
+ if (!cc_list) {
+ if (verbose) fprintf(stderr, "No charclasses used\n");
+ return;
+ }
+
+ /* Form union */
+ clear_all(cc_union);
+ for (elt=cc_list; elt; elt=elt->next) {
+ combine(cc_union, elt->cc->char_bitmap);
+ }
+
+ for (i=0; i<256; i++) mapping[i] = -1;
+
+ do {
+ int first_char;
+ int i;
+ unsigned long pos[ULONGS_PER_CC], neg[ULONGS_PER_CC];
+ first_char = find_lowest_bit_set(cc_union);
+ set_all(pos);
+ clear_all(neg);
+ for (elt=cc_list; elt; elt=elt->next) {
+ if (cc_test_bit(elt->cc->char_bitmap, first_char)) {
+ for (i=0; i<ULONGS_PER_CC; i++) pos[i] &= elt->cc->char_bitmap[i];
+ } else {
+ for (i=0; i<ULONGS_PER_CC; i++) neg[i] |= elt->cc->char_bitmap[i];
+ }
+ }
+
+ for (i=0; i<ULONGS_PER_CC; i++) {
+ pos[i] &= ~neg[i];
+ }
+
+ generate_string(n_charclasses, pos);
+
+ for (i=0; i<256; i++) {
+ if (cc_test_bit(pos, i)) {
+ mapping[i] = n_charclasses;
+ clear_bit(cc_union, i);
+ }
+ }
+
+ n_charclasses++;
+ any_left = 0;
+ for (i=0; i<ULONGS_PER_CC; i++) {
+ if (cc_union[i]) {
+ any_left = 1;
+ break;
+ }
+ }
+ } while (any_left);
+
+ /* Build group bitmaps */
+ for (elt=cc_list; elt; elt=elt->next) {
+ for (i=0; i<256; i++) {
+ if (cc_test_bit(elt->cc->char_bitmap, i)) {
+ set_bit(elt->cc->group_bitmap, mapping[i]);
+ }
+ }
+ }
+
+ fprintf(stderr, "Got %d character classes\n", n_charclasses);
+
+ return;
+}
+/*}}}*/
+void print_charclass_mapping(FILE *out, FILE *header_out, const char *prefix_under)/*{{{*/
+{
+ int i;
+ if (!cc_list) return;
+ fprintf(out, "short %schar2tok[256] = {", prefix_under);
+ for (i=0; i<256; i++) {
+ if (i > 0) fputs(", ", out);
+ if ((i & 15) == 0) fputs("\n ", out);
+ if (mapping[i] >= 0) {
+ fprintf(out, "%3d", mapping[i] + ntokens);
+ } else {
+ fprintf(out, "%3d", mapping[i]);
+ }
+ }
+ fputs("\n};\n", out);
+ if (header_out) {
+ fprintf(header_out, "extern short %schar2tok[256];\n",
+ prefix_under);
+ }
+ return;
+}
+/*}}}*/
+void print_charclass(FILE *out, int idx)/*{{{*/
+{
+ fprintf(out, "%d:%s", idx, strings[idx]);
+}
+/*}}}*/
+
diff --git a/src/mairix/dfasyn/compdfa.c b/src/mairix/dfasyn/compdfa.c
@@ -0,0 +1,479 @@
+/***************************************
+ Routines for compressing the DFA by commoning-up equivalent states
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2001-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+/*
+ The input to this stage is the 'raw' DFA build from the NFA by the subset
+ construction. Depending on the style of the NFA, there may be large chunks
+ of the DFA that have equivalent functionality, in terms of resulting in the
+ same attributes for the same sequence of input tokens, but which are reached
+ by different prefixes. The idea of this stage is to common up such regions,
+ to reduce the size of the DFA and hence the table sizes that are generated.
+
+ Conceptually, the basis of the algorithm is to assign the DFA states to
+ equivalence classes. If there are N different tags-combinations, there are
+ initially N+1 classes. All states that can exit with a particular value are
+ placed in a class together, and all non-accepting states are placed together.
+ Now, a pass is made over all pairs of states. Two states remain equivalent
+ if for each token, their outbound transitions go to states in the same class.
+ If the states do not stay equivalent, the class they were in is split
+ accordingly. This is repeated again and again until no more bisections
+ occur.
+
+ The algorithm actually used is to assign an ordering to the states based on
+ their current class and outbound transitions. The states are then sorted.
+ This allows all checking to be done on near-neighbours in the sequence
+ generated by the sort, which brings the execution time down to something
+ finite.
+
+ */
+
+#include "dfasyn.h"
+
+static int last_eq_class; /* Next class to assign */
+static int Nt; /* Number of tokens; has to be made static to be visible to comparison fn. */
+
+/* To give 'general_compre' visibility of the current equiv. classes of the
+ destination states */
+static DFANode **local_dfas;
+
+static void calculate_signatures(DFANode **seq, DFANode **dfas, int ndfas)/*{{{*/
+/**** Determine state signatures based on transitions and current classes. ****/
+{
+ unsigned long sig;
+ int i, t;
+
+ for (i=0; i<ndfas; i++) {
+ DFANode *s = seq[i];
+ sig = 0UL;
+ for (t=0; t<Nt; t++) {
+ int di = s->map[t];
+ if (di >= 0) {
+ DFANode *d = dfas[di];
+ int deq_class = d->eq_class;
+
+ sig = increment(sig, deq_class & 0xf); /* 16 bit pairs in sig */
+ }
+ }
+
+ s->signature = sig;
+ }
+}
+/*}}}*/
+static int general_compare(const void *a, const void *b)/*{{{*/
+/************************* Do full compare on states *************************/
+{
+ Castderef (a, const DFANode *, aa);
+ Castderef (b, const DFANode *, bb);
+
+ if (aa->eq_class < bb->eq_class) {
+ return -1;
+ } else if (aa->eq_class > bb->eq_class) {
+ return +1;
+ } else if (aa->signature < bb->signature) {
+ return -1;
+ } else if (aa->signature > bb->signature) {
+ return +1;
+ } else {
+ /* The hard way... */
+ int i;
+ for (i=0; i<Nt; i++) {
+ int am = aa->map[i];
+ int bm = bb->map[i];
+
+ /* Map transition destinations to the current equivalence class of the
+ destination state (otherwise compressor is very pessimistic). */
+ am = (am>=0) ? local_dfas[am]->eq_class: -1;
+ bm = (bm>=0) ? local_dfas[bm]->eq_class: -1;
+
+ if (am < bm) return -1;
+ else if (am > bm) return +1;
+ }
+
+ }
+
+ /* If you get here, the states are still equivalent */
+ return 0;
+
+}
+/*}}}*/
+static int split_classes(DFANode **seq, DFANode **dfas, int ndfas)/*{{{*/
+/*********************** Do one pass of class splitting ***********************/
+{
+ int i;
+ int had_to_split = 0;
+
+ calculate_signatures(seq, dfas, ndfas);
+ qsort(seq, ndfas, sizeof(DFANode *), general_compare);
+
+ seq[0]->new_eq_class = seq[0]->eq_class;
+
+ for (i=1; i<ndfas; i++) {
+ seq[i]->new_eq_class = seq[i]->eq_class;
+
+ if (seq[i]->eq_class == seq[i-1]->eq_class) {
+ /* May need to split, otherwise states were previously separated anyway
+ */
+
+ if (general_compare(seq+i, seq+i-1) != 0) {
+ /* Different transition pattern, split existing equivalent class */
+ had_to_split = 1;
+ seq[i]->new_eq_class = ++last_eq_class;
+ if (verbose) fprintf(stderr, "Found %d equivalence classes\r", last_eq_class+1);
+ } else {
+ /* This works even if seq[i-1] was assigned a new class due to
+ splitting from seq[i-2] etc. */
+ seq[i]->new_eq_class = seq[i-1]->new_eq_class;
+ }
+ }
+ }
+
+ /* Set classes to new class values. */
+ for (i=0; i<ndfas; i++) {
+ seq[i]->eq_class = seq[i]->new_eq_class;
+ }
+
+ return had_to_split;
+
+}
+/*}}}*/
+static int initial_compare(const void *a, const void *b)/*{{{*/
+/************************** Sort based on tags **************************/
+{
+ Castderef (a, const DFANode *, aa);
+ Castderef (b, const DFANode *, bb);
+ int status;
+ int i;
+
+ for (i=0; i<n_evaluators; i++) {
+
+ const char *ar = aa->attrs[i], *br = bb->attrs[i];
+ if (!ar) ar = get_defattr(i);
+ if (!br) br = get_defattr(i);
+
+ /* Sort so that states with identical attributes appear together. */
+ if (!ar && br) {
+ return -1;
+ } else if (ar && !br) {
+ return +1;
+ } else {
+ if (ar && br) {
+ status = strcmp(ar, br);
+ if (status < 0) return -1;
+ else if (status > 0) return +1;
+ }
+
+ /* So neither had an attribute at all, or both did and they were equal.
+ * i.e. need to look at attributes further up the vectors */
+ }
+ }
+
+ /* Got here => both states were identical in terms of their attribute sets */
+ return 0;
+}
+/*}}}*/
+static void assign_initial_classes(DFANode **seq, int ndfas)/*{{{*/
+/******************* Determine initial equivalence classes. *******************/
+{
+ int i;
+ qsort(seq, ndfas, sizeof(DFANode *), initial_compare);
+
+ last_eq_class = 0;
+
+ seq[0]->eq_class = last_eq_class;
+
+ for (i=1; i<ndfas; i++) {
+ if (initial_compare(seq+i-1, seq+i) != 0) {
+ /* Not same as previous entry, assign a new class */
+ seq[i]->eq_class = ++last_eq_class;
+ } else {
+ /* Same class as last entry */
+ seq[i]->eq_class = last_eq_class;
+ }
+ }
+}
+/*}}}*/
+/*{{{ compress_states() */
+static void compress_states(struct DFA *dfa, int n_dfa_entries, struct DFAEntry *dfa_entries)
+/***** Compress the DFA so there is precisely one state in each eq. class *****/
+{
+ int *reps;
+ int i, j, t;
+ int neqc;
+ int new_index;
+
+ if (verbose) fprintf(stderr, "%d DFA states before compression\n", dfa->n);
+
+ if (report) {
+ fprintf(report,
+ "\n-----------------------------\n"
+ "------ COMPRESSING DFA ------\n"
+ "-----------------------------\n");
+ }
+
+ neqc = 1 + last_eq_class;
+
+ /* Array containing which state is the representative of each eq. class.
+ Keep the state which had the lowest array index. */
+ reps = new_array(int, neqc);
+
+ for (i=0; i<neqc; i++) reps[i] = -1; /* undefined */
+
+ /* Go through DFA states to find the representative of each class. */
+ for (i=0; i<dfa->n; i++) {
+ int eqc = dfa->s[i]->eq_class;
+ if (reps[eqc] < 0) {
+ reps[eqc] = i;
+ dfa->s[i]->is_rep = 1;
+ } else {
+ dfa->s[i]->is_rep = 0;
+ }
+ }
+
+ /* Go through DFA states and assign new indices. */
+ for (i=0, new_index=0; i<dfa->n; i++) {
+ if (dfa->s[i]->is_dead) {
+ dfa->s[i]->new_index = -1;
+ if (report) fprintf(report, "Old DFA state %d becomes -1 (dead state)\n", i);
+ } else if (dfa->s[i]->is_rep) {
+ dfa->s[i]->new_index = new_index++;
+ if (report) fprintf(report, "Old DFA state %d becomes %d\n", i, dfa->s[i]->new_index);
+ } else {
+ int eqc = dfa->s[i]->eq_class;
+ int rep = reps[eqc];
+
+ /* This assignment works because the representative for the class
+ must have been done earlier in the loop. */
+ dfa->s[i]->new_index = dfa->s[rep]->new_index;
+
+ if (report) fprintf(report, "Old DFA state %d becomes %d (formerly %d)\n", i, dfa->s[i]->new_index, rep);
+ }
+ }
+
+ /* Go through all transitions and fix them up. */
+ for (i=0; i<dfa->n; i++) {
+ DFANode *s = dfa->s[i];
+ for (t=0; t<Nt; t++) {
+ int dest = s->map[t];
+ if (dest >= 0) {
+ s->map[t] = dfa->s[dest]->new_index;
+ }
+ }
+ }
+
+ /* Go through the entries and fix their states */
+ for (i=0; i<n_dfa_entries; i++) {
+ int ni = dfa->s[dfa_entries[i].state_number]->new_index;
+ if (report) {
+ fprintf(report, "Entry <%s>, formerly state %d, now state %d\n",
+ dfa_entries[i].entry_name,
+ dfa_entries[i].state_number, ni);
+ }
+ dfa_entries[i].state_number = dfa->s[dfa_entries[i].state_number]->new_index;
+ }
+
+ /* Fix from_state */
+ for (i=0; i<dfa->n; i++) {
+ int old_from_state, new_from_state;
+ /* If we're not going to preserve the state, move along */
+ if (!dfa->s[i]->is_rep) continue;
+ old_from_state = dfa->s[i]->from_state;
+ /* Any entry state ..., move along */
+ if (old_from_state < 0) continue;
+ new_from_state = dfa->s[reps[dfa->s[old_from_state]->eq_class]]->new_index;
+ dfa->s[i]->from_state = new_from_state;
+ }
+
+ /* Go through and crunch the entries in the DFA array, fixing up the indices */
+ for (i=j=0; i<dfa->n; i++) {
+ if (!dfa->s[i]->is_dead && dfa->s[i]->is_rep) {
+ dfa->s[j] = dfa->s[i];
+ dfa->s[j]->index = dfa->s[j]->new_index;
+ j++;
+ }
+ }
+
+ free(reps);
+ dfa->n = new_index; /* ignore dead states which are completely pruned. */
+ if (verbose) fprintf(stderr, "%d DFA states after compression", dfa->n);
+}
+/*}}}*/
+static void discard_nfa_bitmaps(struct DFA *dfa)/*{{{*/
+/********** Discard the (now inaccurate) NFA bitmaps from the states **********/
+{
+ int i;
+ for (i=0; i<dfa->n; i++) {
+ free(dfa->s[i]->nfas);
+ dfa->s[i]->nfas = NULL;
+ }
+ return;
+}
+/*}}}*/
+static void print_classes(DFANode **dfas, int ndfas)/*{{{*/
+{
+ int i;
+#if 1
+ /* Comment out to print this stuff for debug */
+ return;
+#endif
+ if (!report) return;
+ fprintf(report, "Equivalence classes are :\n");
+ for (i=0; i<ndfas; i++) {
+ fprintf(report, "State %d class %d\n", i, dfas[i]->eq_class);
+ }
+ fprintf(report, "\n");
+ return;
+}
+/*}}}*/
+static int has_any_nondefault_attribute(const DFANode *x)/*{{{*/
+{
+ int result = 0;
+ int i;
+ for (i=0; i<n_evaluators; i++) {
+ if (x->attrs[i]) {
+ char *defattr;
+ defattr = get_defattr(i);
+ if (defattr && strcmp(defattr, x->attrs[i])) {
+ result = 1;
+ break;
+ }
+ }
+ }
+ return result;
+}
+/*}}}*/
+static void find_dead_states(DFANode **dfas, int ndfas, int ntokens)/*{{{*/
+{
+ /* Find any state that has no transitions out of it and no attribute.
+ * If you get there, you're guaranteed to be stuck.
+ * Then, repeatedly look for states which are such that all transitions from
+ * them lead to dead states. Mark these dead too.
+ * Then, go through all the dead states and remove their transitions.
+ * This will force them all into a single class later. */
+
+ int did_any;
+ int i, j;
+ /* Eventually, consider looking for results that are non-default. */
+ char *leads_to_result;
+ int total_found = 0;
+
+ leads_to_result = new_array(char, ndfas);
+ memset(leads_to_result, 0, ndfas);
+
+ if (report) {
+ fprintf(report, "Searching for dead states...\n");
+ }
+
+ do {
+ did_any = 0;
+ for (i=0; i<ndfas; i++) {
+ if (leads_to_result[i] == 0) {
+ if (has_any_nondefault_attribute(dfas[i])) {
+ leads_to_result[i] = 1;
+ did_any = 1;
+ continue;
+ }
+
+ for (j=0; j<ntokens; j++) {
+ int next_state = dfas[i]->map[j];
+ if ((next_state >= 0) && leads_to_result[next_state]) {
+ leads_to_result[i] = 1;
+ did_any = 1;
+ goto do_next_dfa_state;
+ }
+ }
+ }
+do_next_dfa_state:
+ (void) 0;
+ }
+ } while (did_any);
+
+
+ /* Now prune any transition to states that have no path to a result. */
+ for (i=0; i<ndfas; i++) {
+ if (leads_to_result[i] == 0) {
+ total_found++;
+ if (report) {
+ fprintf(report, "DFA state %d is dead\n", i);
+ }
+ dfas[i]->from_state = -1;
+ dfas[i]->via_token = -1;
+ dfas[i]->is_dead = 1;
+ } else {
+ dfas[i]->is_dead = 0;
+ }
+
+ for (j=0; j<ntokens; j++) {
+ int next_state = dfas[i]->map[j];
+ if (leads_to_result[next_state] == 0) {
+ dfas[i]->map[j] = -1;
+ }
+ }
+ }
+
+ free(leads_to_result);
+
+ if (!total_found && report) {
+ fprintf(report, "(no dead states found)\n");
+ }
+}
+/*}}}*/
+/*{{{ compress_dfa() */
+void compress_dfa(struct DFA *dfa, int ntokens,
+ int n_dfa_entries, struct DFAEntry *dfa_entries)
+{
+ DFANode **seq; /* Storage for node sequence */
+ int i;
+ int had_to_split;
+
+ /* Safety net */
+ if (dfa->n <= 0) return;
+
+ local_dfas = dfa->s;
+ Nt = ntokens;
+
+ seq = new_array(DFANode *, dfa->n);
+ for (i=0; i<dfa->n; i++) {
+ seq[i] = dfa->s[i];
+ }
+
+ find_dead_states(dfa->s, dfa->n, ntokens);
+
+ assign_initial_classes(seq, dfa->n);
+
+ do {
+ print_classes(dfa->s, dfa->n);
+ had_to_split = split_classes(seq, dfa->s, dfa->n);
+ } while (had_to_split);
+
+ print_classes(dfa->s, dfa->n);
+
+ compress_states(dfa, n_dfa_entries, dfa_entries);
+ discard_nfa_bitmaps(dfa);
+
+ free(seq);
+ return;
+
+}
+/*}}}*/
+
diff --git a/src/mairix/dfasyn/configure b/src/mairix/dfasyn/configure
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+egrep -v '^#' INSTALL
+
diff --git a/src/mairix/dfasyn/dfasyn.1 b/src/mairix/dfasyn/dfasyn.1
@@ -0,0 +1,154 @@
+.TH DFASYN 1 ""
+.SH NAME
+dfasyn \- generate deterministic finite automata
+.SH SYNOPSYS
+.B dfasyn
+[
+.BR \-o | \-\-output
+.I C-filename
+] [
+.BR \-ho | \-\-header-output
+.I H-filename
+] [
+.BR \-r | \-\-report
+.I report-filename
+] [
+.BR \-p | \-\-prefix
+.I prefix
+] [
+.BR \-u | \-\-uncompressed-tables
+] [
+.BR \-ud | \-\-uncompressed-dfa
+] [
+.BR \-I | \-\-inline-function
+] [
+.BR \-v | \-\-verbose
+] [
+.BR \-h | \-\-help
+]
+.I input-file
+
+.SH DESCRIPTION
+.B dfasyn
+generates a deterministic finite automaton (DFA) from a description file.
+
+.SH OPTIONS
+.SS Options controlling output files
+.TP
+.BI "-o " C-filename
+.br
+.ns
+.TP
+.BI "--output " C-filename
+.br
+Specify the name of the file to which the C program text will be written.
+If this option is not present, the C program text will be written to stdout.
+
+.TP
+.BI "-ho " H-filename
+.br
+.ns
+.TP
+.BI "--header-output " H-filename
+.br
+Specify the name of the file to which the header information will be written.
+
+.TP
+.BI "-r " report-filename
+.br
+.ns
+.TP
+.BI "--report " report-filename
+.br
+Specify the name of the file to which the report on the generated automaton
+will be written. If this option is not present, no report will be written.
+
+.TP
+.I input-file
+.br
+This is the name of the file containing the definition of the automaton. Refer
+to
+.BR dfasyn (5)
+for more information about the format of this file.
+
+.SS Options controlling the generated automaton
+.TP
+.BI "-p " prefix
+.br
+.ns
+.TP
+.BI "--prefix " prefix
+.br
+Specify the prefix to be prepended onto each symbol that
+.B dfasyn
+generates in the output file. This allows multiple automata to be linked into
+the same final program without namespace clashes.
+
+The string prepended is actually
+.I prefix
+followed by an underscore ('_').
+
+.TP
+.BR -u ", " --uncompressed-tables
+.br
+Do not compress the transition tables. By default,
+.B dfasyn
+emits the transition tables compressed, and it emits a next-state function that
+uses a bisection algorithm to search the tables. By contrast, uncompressed
+tables use a simple array indexing algorithm in the next-state algorithm.
+However, the generated tables will be much larger, especially if there is a
+large set of input symbols and the transitions in the automaton are relatively
+sparse. This option therefore represents a speed versus space trade-off in the
+generated DFA.
+
+.TP
+.BR -ud ", " --uncompressed-dfa
+.br
+Do not compress the generated DFA. By default,
+.B dfasyn
+compresses the DFA to combine common states into a single state in the final
+DFA and to remove unreachable states. This option suppresses the compression.
+Giving this option can only be to the detriment of the final DFA, in terms of
+the array sizes of its tables. However, the option is useful for debugging
+.B dfasyn
+and will also reduce the run time of
+.B dfasyn
+since a potentially complex processing step can be omitted.
+
+.TP
+.BR -I ", " --inline-function
+.br
+This causes the next-state function to emitted as an inline function in the header output.
+Specifying this option without
+.B -ho
+is non-sensical and
+.B dfasyn
+will complain in that situation.
+
+Normally,
+.B dfasyn
+will emit the next_state function in the C program text output. This will
+incur a function call overhead for each input symbol when the DFA is used at
+run-time. If this is significant to the final application, the
+.B -I
+option may be useful to allow the next-state function to be inlined.
+
+.SS General options
+
+.TP
+.BR -v ", " --verbose
+.br
+Make the output more verbose; provide more comfort messages whilst
+.B dfasyn
+is running.
+
+.TP
+.BR -h ", " --help
+.br
+Show usage summary and exit
+
+.SH "SEE ALSO"
+.BR dfasyn (5),
+.BR bison (1),
+.BR flex (1)
+
diff --git a/src/mairix/dfasyn/dfasyn.5 b/src/mairix/dfasyn/dfasyn.5
@@ -0,0 +1,650 @@
+.TH DFASYN 5 ""
+.SH NAME
+dfasyn
+.SH SYNOPSYS
+This page describes the format of the
+.I input-file
+for the
+.B dfasyn
+deterministic finite automaton generator.
+.SH DESCRIPTION
+.SS Overview
+Reserved words may be given in all-lowercase, all-uppercase, initial capitals,
+or 'WikiWord' format (e.g.
+.B endblock
+may be given as
+.BR endblock ", " Endblock ", " EndBlock " or " ENDBLOCK .
+
+.SS Block declaration
+A
+.B block
+declaration is used to group together a set of state declarations. Blocks are
+useful if there are blocks of states and their interconnections that occur more
+than once in the NFA. In this case it is useful to declare a block, allowing
+that block to be instantiated more than once elsewhere in the input file.
+
+Since state declarations are only allowed inside blocks, there must be at least
+one block declaration in any useful input file.
+
+The syntax of a block declaration is
+.RS
+.B block
+.I block-name
+{
+.br
+.RS 2
+[
+.I instance-declarations
+]
+.br
+[
+.I state-declarations
+]
+.RE
+.br
+}
+.RE
+
+.SS State declarations
+A
+.B state
+declaration gives rise to a state in the input NFA.
+
+The syntax of a state declaration is
+.RS
+.B state
+.I state-name
+[
+.B entry
+.I entry-name
+]
+.br
+.RS 2
+[
+.I transitions
+]
+.RE
+.RE
+
+States are implicitly terminated by the beginning of another type of construct.
+
+.B entry
+.I entry-name
+(if present) defines the name of an entry point into the scanner. In the
+resulting C-code, a symbol called
+.I entry-name
+will be declared. Its value will be the DFA state number of the state
+containing just this NFA state (plus its epsilon closure.) This allows for
+multiple scanners to be generated from the same input file. For example, if
+one scanner is the same as another but with some extra text that must match at
+the beginning, two different
+.B entry
+states can be declared to represent this.
+.B dfasyn
+will be able to common-up all of the common part of the DFA's transition
+tables.
+
+If there are no
+.B entry
+directives anywhere in the input file,
+.B dfasyn
+defaults to the last mentioned state in the last block being the entry state.
+
+.I transitions
+is a whitespace-separated sequence of zero or more transitions. These define which
+of the automaton's input symbols cause a transition from this state to which other
+states.
+
+The same state may be declared more than once inside its block. In this case,
+the transitions given in the second declaration will be merged with those given
+in the first, as though all the transitions had been given in the first place.
+
+.SS Instance declarations
+A block may be instantiated inside another block. This is useful if there is a
+block of states with their transitions that occurs in more than once place
+within the NFA.
+
+The syntax for an instance declaration is
+
+.RS
+.I instance-name
+:
+.I block-name
+.RE
+
+where
+.I instance-name
+is the name of the new instance, and
+.I block-name
+is the name of the block that is being instantiated. This block
+.B must
+have been declared earlier in the input file. For one thing, this prevents
+mutually recursive definitions.
+
+When such an instance has been created, the states inside it may be referred to
+within the enclosing block by prefixing their names with the
+.I instance-name
+followed by a period.
+
+.SS Transitions
+A state-to-state transition is specified as follows.
+
+.RS
+.I transition
+->
+.I destinations
+.RE
+
+.I destinations
+is a comma-separated list of one or more fully-qualified state names. These
+are the states to which the NFA moves if the
+.I transition
+is matched next in the input. The destination state names are allowed to be
+forward-references; just the name is stored during parsing, and a second pass
+later is used to resolve all the names. There is no need for a named
+destination to actually be declared with another state definition; a state just
+comes into being if it is named at all.
+
+A
+.I transition
+defines the inputs that are required to cause the scanner to move
+from one state to another. A
+.I transition
+is a semicolon-separated list of one or more
+.I stimuli.
+(If there is only one stimulus, no semicolon is required.) The transition
+matches as a whole if the stimuli are matched individually in sequential order
+from left to right.
+
+.SS Transitions to a tag
+Where a transition leads to a tagged exit state, the following syntax is used:
+
+.RS
+.I transition
+=
+.I tags
+.RE
+
+where
+.I tags
+is a comma-separated list of one or more tag names. Thus a construction like
+
+.RS
+state foo XXX = TAG1
+.RE
+
+indicates that matching the token XXX leads to a state in which TAG1 applies.
+
+.SS Stimuli
+A
+.B stimulus
+is a pipe-separated list of alternatives. Each alternative may be one of the following:
+.IP "*" 7
+the name of a token
+.IP "*" 7
+a character class
+.IP "*" 7
+the name of an abbreviation
+.IP "*" 7
+an empty string (which gives rise to an
+.B epsilon transition
+)
+.IP "*" 7
+an inline block instance
+
+.SS Input symbols
+Input symbols can be defined in two ways. The first is to use ASCII characters
+directly. The second is to define a set of
+.I tokens
+and use a front-end module to generate these based on the actual input. You
+can actually mix both types of input symbol. For example, you might wish to
+use ASCII characters mostly, but detect \(dqend-of-file\(dq as an explicit symbol.
+
+.SS ASCII input and character classes.
+
+Single ASCII characters can be given in double-quotes. Sets of ASCII
+characters can be given in square brackets, similar to shell globbing.
+Character classes can be negated and differenced.
+
+.IP [a] 12
+The character "a".
+.IP [abe-h] 12
+Any of the characters "a", "b", "e", "f", "g", "h".
+.IP ~[abc] 12
+Any of the 253 characters excluding "a", "b" and "c"; a negated character class.
+.IP [^abc] 12
+Ditto - another way of expressing a negated character class.
+.IP [a-z]~[c] 12
+Equivalent to [abd-z].
+
+.PP
+The following special cases are available within the square brackets:
+
+.IP \(rs- 8
+A hyphen. Normally the hyphen is used as a range separator. To get a literal
+hyphen, it must be escaped by a back-slash.
+.IP \(rs] 8
+A closing square bracket. The escaping is required to prevent it being handled
+as the end of the character class.
+.IP \(rs\(rs 8
+A literal backslash.
+.IP \(rs^ 8
+A literal "^".
+.IP \(rsn 8
+The same character as "\(rsn" in C.
+.IP \(rsr 8
+The same character as "\(rsr" in C.
+.IP \(rsf 8
+The same character as "\(rsf" in C.
+.IP \(rst 8
+The same character as "\(rst" in C.
+.IP ^A 8
+Generate a control character, in this case ASCII character 1. Defined for ^@
+through to ^Z.
+.IP \(rsxa9 8
+The ASCII character with hex value 0xa9. Upper or lower case hex may be used.
+.IP \(rs234
+The ASCII character with octal value 0234.
+
+.SS Tokens
+To define non-ASCII inputs, at least one
+.B tokens
+directive must be used. The syntax is
+.PP
+.B tokens
+.I list-of-tokens
+.PP
+where
+.I list-of-tokens
+is a space-separated list of token names. Each token name is a string that
+will be acceptable as a C macro name when prefixed by the current prefix string
+plus an underscore.
+
+If more than one
+.B tokens
+line appears in the input file, the 2nd and subsequent lines are treated as
+though their entries were concatenated with the 1st line.
+
+.SS Abbreviations
+An
+.B abbreviation
+provides a convenient way to define a shorthand name for a frequently used
+.B stimulus.
+
+The syntax is
+
+.RS
+.B abbrev
+.I abbrev-name
+=
+.I stimulus
+.RE
+
+For example:
+
+.RS
+abbrev FOO = [aeiouAEIOU] | A_TOKEN | <xyzzy:in->out>
+.RE
+
+.SS Inline block instances
+A
+.B stimulus
+may take the form of a block instance. This is a convenient shorthand when a
+complex sequence of input tokens needs to be matched as part of a transition.
+
+The syntax of an inline block instance is
+.RS
+.RI < block_name : entry_state "->" exit_state >
+.RE
+
+As an example, given a block
+.B double_a
+defined like this
+.RS
+block double_a
+ state in A -> out
+.br
+endblock
+.RE
+
+the following construction
+.RS
+block x
+ state foo <double_a:in->out> ; B ; <double_a:in->out> -> bar
+.br
+endblock
+.RE
+
+is equivalent to
+.RS
+block x
+ aa1 : double_a
+ aa2 : double_a
+ state foo -> aa1.in
+ state aa1.out
+ B -> aa2.in
+ state aa2.out -> bar
+.br
+endblock
+.RE
+
+Note that in the second example, where explicit instances have been created,
+they must have unique names. In the first case,
+.B dfasyn
+will create the two anonymous instances automatically and handle all the
+plumbing to connect up the in and out states. Note there is no requirement for
+the states to be named 'in' and 'out'; that is merely a convention. An
+instanced block may have multiple inputs, with different inputs being used in
+different instantiations of the block, for example.
+
+.SS Tags and attributes
+.B Tags
+are associated with the NFA states in the input. An NFA state may have an
+arbitrary number of tags associated with it, through what amounts to a list of
+strings.
+.B Attributes
+are attached to the DFA states in the output. In the generated C-file, the
+attributes are expressed in terms of an array which is indexed by the DFA state
+number and whose elements are the attribute values applying to the states.
+
+Once the DFA has been generated,
+.B dfasyn
+knows the NFA states that apply in each DFA state. From this, the tags
+associated with a DFA state are given by the union of all the tags appylying in
+all the NFA states that apply in that DFA state.
+
+The input file defines how a set of tags applying in a DFA state is to be
+reduced to a single attribute value. A boolean expression language is provided
+for this purpose.
+
+Although the default is to generate a single attribute table,
+.B dfasyn
+can generate arbitrarily many tables if required. This is achieved by using
+.B attribute groups.
+The NFA tag namespace is shared across all such groups. The group syntax is as
+follows:
+
+.RS
+.B group
+.I groupname
+.B {
+.I declaration
+[
+.RI ", " declaration
+\ ...
+]
+.B }
+.RE
+
+where each
+.I declaration
+is one of the following:
+
+.RS
+.B attr
+.I attribute-name
+[
+.RI ", " attribute-name
+\ ... ]
+.br
+.B attr
+.I attribute-name
+.B :
+.I expression
+.br
+.B early
+.B attr
+.I attribute-name
+[
+.RI ", " attribute-name
+\ ... ]
+.br
+.B early
+.B attr
+.I attribute-name
+.B :
+.I expression
+.RE
+
+In the form with no expression, each
+.I attribute-name
+has an implicit expression consisting of just the tag with the same name as
+itself.
+
+.I expression
+is defined in the section
+.B Expressions
+later. The short form
+
+.RS
+.B attr
+foo
+.RE
+
+is short for
+.RS
+.B attr
+foo
+.B :
+foo
+.RE
+
+i.e. it allows an attribute to be defined which has the same name as a tag and
+which is active in the cases where precisely that tag is active.
+
+If an attribute is prefixed by
+.BR early ,
+it means that the C-code you provide to drive the DFA is going to stop scanning
+once this state attribute is detected. For example, this would apply if you
+were coding a "shortest match" scanner.
+.B dfasyn
+will prune all the transitions away from any DFA state having such an
+attribute. This may lead to greater opportunities for
+.B dfasyn
+to compress the DFA.
+
+A default attribute must be declared. This is used to fill all the entries in
+the attribute array for DFA states that end up with no explicit attribute
+defined. (It is also used in determining where the DFA may be optimised to
+remove "dead states".) The syntax is
+
+.RS
+.B defattr
+.I default-attribute-string
+.RE
+
+Finally, the C-type of the attribute must be declared. This becomes the base
+type of the array indexed by the DFA state number. The syntax is
+
+.RS
+.B type
+.I attribute-type-name
+.RE
+
+It is illegal for more than one attribute in a particular attribute group to be
+active in a DFA state. If this situation occurs, it indicates that the
+expression logic for that group is defective.
+
+.SS Expressions
+An
+.I expression
+defines an attribute in terms of a boolean relationship between one more more
+tags. An
+.I expression
+may be any one of the following:
+
+.RS
+.IR expression " & " expression
+.br
+.IR expression " | " expression
+.br
+.IR expression " ^ " expression
+.br
+.IR expression " ? " expression " : " expression
+.br
+.RI ( expression )
+.br
+.RI "~" expression
+.br
+.RI "!" expression
+.br
+.I tag-name
+.RE
+
+Note that
+.RI "~" expression
+and
+.RI "!" expression
+both mean the negation of expression.
+
+The operator precedence is what would be expected for a C-programmer.
+
+.SH Prefix specification
+The
+.B prefix
+used in the generated C-file can optionally be set in the input file using the following syntax:
+
+.RS
+.B prefix
+.I prefix-string
+.RE
+
+where
+.IR prefix-string _
+(i.e. the specific string followed by an underscore) will occur at the start of
+each symbol name in the generated C-file.
+
+If the prefix has been set via the command line using
+.BR -p ,
+the
+.B prefix
+line in the input file will be ignored and a warning given.
+
+.SH "THE GENERATED C-FILE"
+The generated file exports the following symbols that can be used by the calling program:
+
+.TP
+.B short
+.IB prefix_ char2tok
+[256];
+.br
+If character classes have been used, this table maps from ASCII values to the
+internal tokens numbers used by the generated DFA. This array will be defined
+in the generated C-file. If a header file is being generated, it will be
+declared in there also.
+
+.TP
+.B #define
+.IB prefix_ TOKEN
+.I numeric_value
+.br
+If a
+.b tokens
+directive has been used, each such token will be assigned a number. These
+assignments are emitted by
+.b dfasyn
+as a series of #define lines. Each token name from the input file will have the
+.I prefix
+and an underscore prepended to form the name of the symbol in the #define.
+If a header file is being generated
+.RB ( -ho ),
+these definitions are placed in the header file. Otherwise, they are placed in
+the main output C-file.
+
+.TP 7
+.B int
+.IB prefix_ next_state
+(int current_state, int next_state);
+.br
+This is the prototype for the next state function which the calling program must invoke.
+
+If no
+.B -I
+option has been used, this function will be defined in the generated C-file.
+If a header file is being generated, it will be prototyped in there also.
+
+If
+.B -I
+has been used, the function will be defined in the header file.
+
+.TP
+.B int
+.IB prefix _ entry-name
+.br
+If the
+.B entrystruct
+directive has not been used, this format is used to define the DFA state
+numbers for the defined entry points. The calling program uses these values to
+set the
+.I current_state
+at the start of the scanning process, depending on which entry point is being
+used.
+
+If there is more than one entry, there will be more than one such line.
+
+
+.TP
+.B struct
+.I entrystruct-type
+{ ... }
+.I entrystruct-var
+.br
+If the
+.B entrystruct
+directive has been used, the DFA state numbers for the entry points are
+declared as elements of a struct. The struct member names are identical to the
+entry names used in the
+.B dfasyn
+input file. The declaration of the struct variable containing the state
+numbers will be in the generated C-file. If a header file is being generated
+.RB ( -ho ),
+the definition of the struct type will be in there. Otherwise, it will be in
+the C-file also.
+
+.TP 12
+.I attr-type
+.IB prefix_ attr
+.RI [ #DFA-states ]
+.br
+This defines the attributes for each of the DFA states in the default attribute
+group. If no
+.B type
+.I attr-type
+declaration was in the input file, the default of
+.B short
+will be used.
+
+If other attribute groups are defined, there will be a similar array for each one:
+
+.TP 18
+.I group-attr-type
+.I prefix_group-name
+.RI [ #DFA-states ]
+.br
+For the attribute group declared with
+.B group
+.I group-name
+in the input file, this defines the attribute of each of the DFA states in that
+group.
+
+.SH TEXT PASSTHROUGH
+To pass a block of literal text through to the output file without
+interpretation, enclose it in %{ ... %} like this:
+
+.RS
+%{
+.br
+#include "foo.h"
+.br
+%}
+.RE
+
+The opening and closing patterns must be on lines on their own (trailing
+whitespace is allowed).
+
+
+.SH "SEE ALSO"
+.BR dfasyn (1)
+
+
+
diff --git a/src/mairix/dfasyn/dfasyn.c b/src/mairix/dfasyn/dfasyn.c
@@ -0,0 +1,690 @@
+/***************************************
+ Main program for NFA to DFA table builder program.
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2000-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "dfasyn.h"
+
+FILE *report = NULL;
+FILE *output = NULL;
+FILE *header_output = NULL;
+
+/* If non-null this gets prepended onto the names of the all the entities that
+ * are generated in the output file. */
+char *prefix = NULL;
+
+extern int yyparse(void);
+
+/* ================================================================= */
+static char *entrystruct = NULL;
+static char *entryvar = NULL;
+
+void define_entrystruct(const char *s, const char *v)/*{{{*/
+{
+ if (!entrystruct) {
+ entrystruct = new_string(s);
+ entryvar = new_string(v);
+ } else {
+ fprintf(stderr, "Can't redefine entrystruct with <%s>\n", s);
+ exit(1);
+ }
+}
+/*}}}*/
+/* ================================================================= */
+static void print_token_table(void)/*{{{*/
+{
+ FILE *dest;
+ int i;
+ extern char *prefix;
+
+ dest = header_output ? header_output : output;
+ /* Not sure how it makes sense to write this to the C file : maybe if you're going
+ * to include the C file into a bigger one it's reasonable? Anyway, the intention
+ * is that you're more likely to use this for real if you're writing a header file. */
+
+ for (i=0; i<ntokens; i++) {
+ fprintf(dest, "#define %s_%s %d\n",
+ prefix ? prefix : "TOK_",
+ toktable[i], i);
+ }
+}
+/*}}}*/
+static void print_attr_tables(struct DFA *dfa, const char *prefix_under)/*{{{*/
+{
+ int i, tab;
+
+ for (tab=0; tab<n_evaluators; tab++) {
+ char *defattr = get_defattr(tab);
+ char *attrname = get_attr_name(tab);
+ if (!attrname) attrname = "attr";
+ fprintf(output, "%s %s%s[] = {\n", get_attr_type(tab), prefix_under, attrname);
+ for (i=0; i<dfa->n; i++) {
+ char *attr = dfa->s[i]->attrs[tab];
+ fprintf(output, " %s", attr ? attr : defattr);
+ fputc ((i<(dfa->n - 1)) ? ',' : ' ', output);
+ fprintf(output, " /* State %d */\n", i);
+ }
+ fprintf(output, "};\n\n");
+ if (header_output) {
+ fprintf(header_output, "extern %s %s%s[];\n", get_attr_type(tab), prefix_under, attrname);
+ }
+ }
+}
+/*}}}*/
+static void check_default_attrs(void)/*{{{*/
+{
+ int tab;
+ int fail = 0;
+
+ for (tab=0; tab<n_evaluators; tab++) {
+ char *defattr = get_defattr(tab);
+ char *attrname = get_attr_name(tab);
+ attrname = attrname ? attrname : "(DEFAULT)";
+ if (!defattr) {
+ fprintf(stderr, "ERROR: No defattr definition for %s\n", attrname);
+ fail = 1;
+ }
+ }
+ if (fail) {
+ exit(1);
+ }
+}
+/*}}}*/
+static void write_next_state_function_uncompressed(int Nt, int do_inline, const char *prefix_under)/*{{{*/
+{
+ FILE *dest;
+
+ dest = do_inline ? header_output : output;
+
+ fprintf(dest, "%sint %snext_state(int current_state, int next_token) {\n",
+ do_inline ? "static inline " : "",
+ prefix_under);
+ fprintf(dest, " if (next_token < 0 || next_token >= %d) return -1;\n", Nt);
+ fprintf(dest, " return %strans[%d*current_state + next_token];\n",
+ prefix_under, Nt);
+ fprintf(dest, "}\n");
+ if (!do_inline && header_output) {
+ fprintf(header_output, "extern int %snext_state(int current_state, int next_token);\n",
+ prefix_under);
+ }
+}
+/*}}}*/
+static void print_uncompressed_tables(struct DFA *dfa, int do_inline, const char *prefix_under)/*{{{*/
+/* Print out the state/transition table uncompressed, i.e. every
+ token has an array entry in every state. This is fast to access
+ but quite wasteful on memory with many states and many tokens. */
+{
+ int Nt = ntokens + n_charclasses;
+ int n, i, j;
+
+ n = 0;
+ fprintf(output, "%sshort %strans[] = {",
+ do_inline ? "" : "static ",
+ prefix_under);
+
+ if (do_inline) {
+ fprintf(header_output, "extern short %strans[];\n",
+ prefix_under);
+ }
+
+ for (i=0; i<dfa->n; i++) {
+ for (j=0; j<Nt; j++) {
+ if (n>0) fputc (',', output);
+ if (n%8 == 0) {
+ fprintf(output, "\n ");
+ } else {
+ fputc(' ', output);
+ }
+ n++;
+ fprintf(output, "%4d", dfa->s[i]->map[j]);
+ }
+ }
+
+ fprintf(output, "\n};\n\n");
+
+ write_next_state_function_uncompressed(Nt, do_inline, prefix_under);
+
+}
+/*}}}*/
+static int check_include_char(struct DFA *dfa, int this_state, int token)/*{{{*/
+{
+ if (dfa->s[this_state]->defstate >= 0) {
+ return (dfa->s[this_state]->map[token] !=
+ dfa->s[dfa->s[this_state]->defstate]->map[token]);
+ } else {
+ return (dfa->s[this_state]->map[token] >= 0);
+ }
+}
+/*}}}*/
+static void write_next_state_function_compressed(int do_inline, const char *prefix_under)/*{{{*/
+/* Write the next_state function for traversing compressed tables into the
+ output file. */
+{
+ FILE *dest;
+ dest = do_inline ? header_output : output;
+
+ fprintf(dest, "%sint %snext_state(int current_state, int next_token) {\n",
+ do_inline ? "static inline " : "",
+ prefix_under);
+ fprintf(dest, " int h, l, m, xm;\n");
+ fprintf(dest, " while (current_state >= 0) {\n");
+ fprintf(dest, " l = %sbase[current_state], h = %sbase[current_state+1];\n", prefix_under, prefix_under);
+ fprintf(dest, " while (h > l) {\n");
+ fprintf(dest, " m = (h + l) >> 1; xm = %stoken[m];\n", prefix_under);
+ fprintf(dest, " if (xm == next_token) goto done;\n");
+ fprintf(dest, " if (m == l) break;\n");
+ fprintf(dest, " if (xm > next_token) h = m;\n");
+ fprintf(dest, " else l = m;\n");
+ fprintf(dest, " }\n");
+ fprintf(dest, " current_state = %sdefstate[current_state];\n", prefix_under);
+ fprintf(dest, " }\n");
+ fprintf(dest, " return -1;\n");
+ fprintf(dest, " done:\n");
+ fprintf(dest, " return %snextstate[m];\n", prefix_under);
+ fprintf(dest, "}\n");
+ if (!do_inline && header_output) {
+ fprintf(header_output, "extern int %snext_state(int current_state, int next_token);\n",
+ prefix_under);
+ }
+
+}
+/*}}}*/
+static void print_compressed_tables(struct DFA *dfa, int do_inline, const char *prefix_under)/*{{{*/
+/* Print state/transition table in compressed form. This is more
+ economical on storage, but requires a bisection search to find
+ the next state for a given current state & token */
+{
+ int *basetab = new_array(int, dfa->n + 1);
+ int Nt = ntokens + n_charclasses;
+ int n, i, j;
+
+ n = 0;
+ fprintf(output, "%sunsigned char %stoken[] = {",
+ do_inline ? "" : "static ",
+ prefix_under);
+ for (i=0; i<dfa->n; i++) {
+ for (j=0; j<Nt; j++) {
+ if (check_include_char(dfa, i, j)) {
+ if (n>0) fputc (',', output);
+ if (n%8 == 0) {
+ fprintf(output, "\n ");
+ } else {
+ fputc(' ', output);
+ }
+ n++;
+ fprintf(output, "%3d", j);
+ }
+ }
+ }
+ fprintf(output, "\n};\n\n");
+
+ n = 0;
+ fprintf(output, "%sshort %snextstate[] = {",
+ do_inline ? "" : "static ",
+ prefix_under);
+ for (i=0; i<dfa->n; i++) {
+ basetab[i] = n;
+ for (j=0; j<Nt; j++) {
+ if (check_include_char(dfa, i, j)) {
+ if (n>0) fputc (',', output);
+ if (n%8 == 0) {
+ fprintf(output, "\n ");
+ } else {
+ fputc(' ', output);
+ }
+ n++;
+ fprintf(output, "%5d", dfa->s[i]->map[j]);
+ }
+ }
+ }
+ fprintf(output, "\n};\n\n");
+ basetab[dfa->n] = n;
+
+ n = 0;
+ fprintf(output, "%sunsigned short %sbase[] = {",
+ do_inline ? "" : "static ",
+ prefix_under);
+ for (i=0; i<=dfa->n; i++) {
+ if (n>0) fputc (',', output);
+ if (n%8 == 0) {
+ fprintf(output, "\n ");
+ } else {
+ fputc(' ', output);
+ }
+ n++;
+ fprintf(output, "%5d", basetab[i]);
+ }
+ fprintf(output, "\n};\n\n");
+
+ n = 0;
+ fprintf(output, "%sshort %sdefstate[] = {",
+ do_inline ? "" : "static ",
+ prefix_under);
+ for (i=0; i<dfa->n; i++) {
+ if (n>0) fputc (',', output);
+ if (n%8 == 0) {
+ fprintf(output, "\n ");
+ } else {
+ fputc(' ', output);
+ }
+ n++;
+ fprintf(output, "%5d", dfa->s[i]->defstate);
+ }
+ fprintf(output, "\n};\n\n");
+
+ if (do_inline) {
+ fprintf(header_output, "extern unsigned char %stoken[];\n", prefix_under);
+ fprintf(header_output, "extern short %snextstate[];\n", prefix_under);
+ fprintf(header_output, "extern unsigned short %sbase[];\n", prefix_under);
+ fprintf(header_output, "extern short %sdefstate[];\n", prefix_under);
+ }
+ free(basetab);
+
+ write_next_state_function_compressed(do_inline, prefix_under);
+}
+/*}}}*/
+static void print_entries_table(const char *prefix_under)/*{{{*/
+{
+ int i;
+ if (entrystruct) {
+ int first;
+ /* If we write the struct defn to the header file, we ought not to emit the
+ * full struct defn again in the main output. This is tricky unless we can
+ * guarantee the header will get included, though. */
+ fprintf(output, "struct %s {\n", entrystruct);
+ if (header_output) {
+ fprintf(header_output, "extern struct %s {\n", entrystruct);
+ }
+ for (i=0; i<n_dfa_entries; i++) {
+ fprintf(output, " int %s;\n", dfa_entries[i].entry_name);
+ if (header_output) {
+ fprintf(header_output, " int %s;\n", dfa_entries[i].entry_name);
+ }
+ }
+ fprintf(output, "} %s = {\n", entryvar);
+ if (header_output) {
+ fprintf(header_output, "} %s;\n", entryvar);
+ }
+ for (i=0, first=1; i<n_dfa_entries; i++, first=0) {
+ if (!first) {
+ fputs(",\n", output);
+ }
+ fprintf(output, " %d", dfa_entries[i].state_number);
+ }
+ fputs("\n};\n", output);
+ } else {
+ for (i=0; i<n_dfa_entries; i++) {
+ fprintf(output, "int %s%s = %d;\n",
+ prefix_under,
+ dfa_entries[i].entry_name, dfa_entries[i].state_number);
+ if (header_output) {
+ fprintf(header_output, "extern int %s%s;\n",
+ prefix_under,
+ dfa_entries[i].entry_name);
+ }
+ }
+ }
+}
+/*}}}*/
+/* ================================================================= */
+static void deal_with_multiple_entries(Block **blk, struct DFA **dfa)/*{{{*/
+{
+ /* Get the list of blocks that are to be combined to form a union of all their states. */
+ struct Entrylist *e;
+ int Ne;
+ Block **blocks;
+ Block *jumbo;
+ int bi, Nb, Ns, si, ei;
+
+ for (Ne=0, e=entries; e; e=e->next) Ne++;
+ if (report) {
+ fprintf(report, "Processing %d separate entry points\n", Ne);
+ }
+ blocks = new_array(Block*, Ne);
+ for (Nb=0, e=entries; e; e=e->next) {
+ int matched = 0;
+ for (bi=0; bi<Nb; bi++) {
+ if (e->state->parent == blocks[bi]) {
+ matched = 1;
+ break;
+ }
+ }
+ if (!matched) {
+ blocks[Nb++] = e->state->parent;
+ }
+ }
+ for (Ns=0, bi=0; bi<Nb; bi++) {
+ Ns += blocks[bi]->nstates;
+ }
+
+ if (report) {
+ fprintf(report, "Entries in %d blocks, total of %d states\n",
+ Nb, Ns);
+ }
+
+ jumbo = new(Block);
+ jumbo->name = "(UNION OF MULTIPLE BLOCKS)";
+ jumbo->nstates = jumbo->maxstates = Ns;
+ jumbo->states = new_array(State *, Ns);
+ jumbo->eclo = NULL;
+
+ for (bi=0, si=0; bi<Nb; bi++) {
+ int ns = blocks[bi]->nstates;
+ int i;
+ int block_name_len;
+ memcpy(jumbo->states + si, blocks[bi]->states, sizeof(State *) * ns);
+ block_name_len = strlen(blocks[bi]->name);
+ for (i=0; i<ns; i++) {
+ int len;
+ char *new_name;
+ State *s = jumbo->states[si + i];
+ len = block_name_len + strlen(s->name) + 2;
+ new_name = new_array(char, len);
+ strcpy(new_name, blocks[bi]->name);
+ strcat(new_name, ".");
+ strcat(new_name, s->name);
+ free(s->name);
+ s->name = new_name;
+ }
+ si += ns;
+ }
+
+ /* Reindex all the states */
+ for (si=0; si<Ns; si++) {
+ jumbo->states[si]->index = si;
+ }
+
+ split_charclasses(jumbo);
+ expand_charclass_transitions(jumbo);
+
+ if (verbose) fprintf(stderr, "Computing epsilon closure...\n");
+ generate_epsilon_closure(jumbo);
+ print_nfa(jumbo);
+ build_transmap(jumbo);
+
+ if (verbose) fprintf(stderr, "Building DFA...\n");
+ n_dfa_entries = Ne;
+ dfa_entries = new_array(struct DFAEntry, Ne);
+ for (e=entries, ei=0; e; e=e->next, ei++) {
+ dfa_entries[ei].entry_name = new_string(e->entry_name);
+ dfa_entries[ei].state_number = e->state->index;
+ }
+ *dfa = build_dfa(jumbo);
+ *blk = jumbo;
+
+}
+/*}}}*/
+/* ================================================================= */
+static void usage(void)/*{{{*/
+{
+ fprintf(stderr,
+ "dfasyn, Copyright (C) 2001-2003,2005,2006 Richard P. Curnow\n"
+ "\n"
+ "dfasyn comes with ABSOLUTELY NO WARRANTY.\n"
+ "This is free software, and you are welcome to redistribute it\n"
+ "under certain conditions; see the GNU General Public License for details.\n"
+ "\n"
+ "Usage: dfasyn [OPTION]... FILE\n"
+ "Read state-machine description from FILE and generate a deterministic automaton.\n"
+ "Write results to stdout unless options dictate otherwise.\n"
+ "\n"
+ "Output files:\n"
+ " -o, --output FILE Define the name of the output file (e.g. foobar.c)\n"
+ " -ho, --header-output FILE Define the name of the header output file (e.g. foobar.h)\n"
+ " -r, --report FILE Define the name where the full generator report goes (e.g. foobar.report)\n"
+ "\n"
+ "Generated automaton:\n"
+ " -p, --prefix PREFIX Specify a prefix for the variables and functions in the generated file(s)\n"
+ " -u, --uncompressed-tables Don't compress the generated transition tables\n"
+ " -ud, --uncompressed-dfa Don't common-up identical states in the DFA\n"
+ " -I, --inline-function Make the next_state function inline (requires -ho)\n"
+ "\n"
+ "General:\n"
+ " -v, --verbose Be verbose\n"
+ " -h, --help Display this help message\n"
+ );
+
+}
+/*}}}*/
+/* ================================================================= */
+int main (int argc, char **argv)/*{{{*/
+{
+ int result;
+
+ Block *main_block;
+ char *input_name = NULL;
+ char *output_name = NULL;
+ char *header_output_name = NULL;
+ char *report_name = NULL;
+ int uncompressed_tables = 0;
+ int uncompressed_dfa = 0; /* Useful for debug */
+ int do_inline = 0;
+ extern char *prefix;
+ char *prefix_under;
+ FILE *input = NULL;
+ struct DFA *dfa;
+
+ verbose = 0;
+ report = NULL;
+
+ /*{{{ Parse cmd line arguments */
+ while (++argv, --argc) {
+ if (!strcmp(*argv, "-h") || !strcmp(*argv, "--help")) {
+ usage();
+ exit(0);
+ } else if (!strcmp(*argv, "-v") || !strcmp(*argv, "--verbose")) {
+ verbose = 1;
+ } else if (!strcmp(*argv, "-o") || !strcmp(*argv, "--output")) {
+ ++argv, --argc;
+ output_name = *argv;
+ } else if (!strcmp(*argv, "-ho") || !strcmp(*argv, "--header-output")) {
+ ++argv, --argc;
+ header_output_name = *argv;
+ } else if (!strcmp(*argv, "-r") || !strcmp(*argv, "--report")) {
+ ++argv, --argc;
+ report_name = *argv;
+ } else if (!strcmp(*argv, "-u") || !strcmp(*argv, "--uncompressed-tables")) {
+ uncompressed_tables = 1;
+ } else if (!strcmp(*argv, "-ud") || !strcmp(*argv, "--uncompressed-dfa")) {
+ uncompressed_dfa = 1;
+ } else if (!strcmp(*argv, "-I") || !strcmp(*argv, "--inline-function")) {
+ do_inline = 1;
+ } else if (!strcmp(*argv, "-p") || !strcmp(*argv, "--prefix")) {
+ ++argv, --argc;
+ prefix = *argv;
+ } else if ((*argv)[0] == '-') {
+ fprintf(stderr, "Unrecognized command line option %s\n", *argv);
+ } else {
+ input_name = *argv;
+ }
+ }
+ /*}}}*/
+
+ if (do_inline && !header_output_name) {/*{{{*/
+ fprintf(stderr,
+ "--------------------------------------------------------------\n"
+ "It doesn't make sense to try inlining if you're not generating\n"
+ "a separate header file.\n"
+ "Not inlining the transition function.\n"
+ "--------------------------------------------------------------\n"
+ );
+ do_inline = 0;
+ }
+/*}}}*/
+ if (input_name) {/*{{{*/
+ input = fopen(input_name, "r");
+ if (!input) {
+ fprintf(stderr, "Can't open %s for input, exiting\n", input_name);
+ exit(1);
+ }
+ } else {
+ input = stdin;
+ }
+ /*}}}*/
+ if (output_name) {/*{{{*/
+ output = fopen(output_name, "w");
+ if (!output) {
+ fprintf(stderr, "Can't open %s for writing, exiting\n", output_name);
+ exit(1);
+ }
+ } else {
+ output = stdout;
+ }
+/*}}}*/
+ if (header_output_name) {/*{{{*/
+ header_output = fopen(header_output_name, "w");
+ if (!header_output) {
+ fprintf(stderr, "Can't open %s for writing, exiting\n", header_output_name);
+ exit(1);
+ }
+ }
+ /* otherwise the header stuff just goes to the same fd as the main output. */
+
+/*}}}*/
+ if (report_name) {/*{{{*/
+ report = fopen(report_name, "w");
+ if (!report) {
+ fprintf(stderr, "Can't open %s for writing, no report will be created\n", report_name);
+ }
+ }
+/*}}}*/
+
+ if (verbose) {
+ fprintf(stderr, "General-purpose automaton builder\n");
+ fprintf(stderr, "Copyright (C) Richard P. Curnow 2000-2003,2005,2006\n");
+ }
+
+ eval_initialise();
+
+ if (verbose) fprintf(stderr, "Parsing input...");
+ yyin = input;
+
+ /* Set yyout. This means that if anything leaks from the scanner, or appears
+ in a %{ .. %} block, it goes to the right place. */
+ yyout = output;
+
+ result = yyparse();
+ if (result > 0) exit(1);
+ if (verbose) fprintf(stderr, "\n");
+
+ make_evaluator_array();
+ check_default_attrs();
+
+ if (!entries) {
+ /* Support legacy method : the last state to be current in the input file
+ * is the entry state of the NFA */
+ State *start_state;
+ start_state = get_curstate();
+ main_block = start_state->parent;
+ split_charclasses(main_block);
+ expand_charclass_transitions(main_block);
+ if (verbose) fprintf(stderr, "Computing epsilon closure...\n");
+ generate_epsilon_closure(main_block);
+ print_nfa(main_block);
+ build_transmap(main_block);
+
+ if (verbose) fprintf(stderr, "Building DFA...\n");
+ {
+ struct DFAEntry entry[1];
+ n_dfa_entries = 1;
+ dfa_entries = entry;
+ entry[0].entry_name = "(ONLY ENTRY)";
+ entry[0].state_number = start_state->index;
+ dfa = build_dfa(main_block);
+ }
+ } else {
+ /* Allow generation of multiple entry states, so you can use the same input file when
+ * you need several automata that have a lot of logic in common. */
+ deal_with_multiple_entries(&main_block, &dfa);
+ }
+ if (report) {
+ fprintf(report, "--------------------------------\n"
+ "DFA structure before compression\n"
+ "--------------------------------\n");
+ }
+ print_dfa(dfa);
+
+ if (had_ambiguous_result) {
+ fprintf(stderr, "No output written, there were ambiguous attribute values for accepting states\n");
+ exit(2);
+ }
+
+ if (!uncompressed_dfa) {
+ if (verbose) fprintf(stderr, "\nCompressing DFA...\n");
+ compress_dfa(dfa, ntokens + n_charclasses, n_dfa_entries, dfa_entries);
+ }
+
+ if (verbose) fprintf(stderr, "\nCompressing transition tables...\n");
+ compress_transition_table(dfa, ntokens + n_charclasses);
+
+ if (report) {
+ fprintf(report, "-------------------------------\n"
+ "DFA structure after compression\n"
+ "-------------------------------\n");
+ }
+ if (verbose) fprintf(stderr, "Writing outputs...\n");
+ print_dfa(dfa);
+
+ if (prefix) {
+ prefix_under = new_array(char, 2 + strlen(prefix));
+ strcpy(prefix_under, prefix);
+ strcat(prefix_under, "_");
+ } else {
+ prefix_under = "";
+ }
+
+ if (header_output) {
+ fprintf(header_output, "#ifndef %sHEADER_H\n", prefix_under);
+ fprintf(header_output, "#define %sHEADER_H\n", prefix_under);
+ }
+
+ print_token_table();
+ print_charclass_mapping(output, header_output, prefix_under);
+ print_attr_tables(dfa, prefix_under);
+
+ if (uncompressed_tables) {
+ print_uncompressed_tables(dfa, do_inline, prefix_under);
+ } else {
+ print_compressed_tables(dfa, do_inline, prefix_under);
+ }
+
+ if (entries) {
+ /* Emit entry table */
+ print_entries_table(prefix_under);
+ } else {
+ /* Legacy behaviour - DFA state 0 is implicitly the single entry state. */
+ }
+
+ if (report) {
+ fclose(report);
+ report = NULL;
+ }
+
+ report_unused_tags();
+
+ if (header_output) {
+ fprintf(header_output, "#endif\n");
+ }
+
+ return result;
+}
+/*}}}*/
diff --git a/src/mairix/dfasyn/dfasyn.h b/src/mairix/dfasyn/dfasyn.h
@@ -0,0 +1,365 @@
+/***************************************
+ Header file for NFA->DFA conversion utility.
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2001-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#ifndef N2D_H
+#define N2D_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define new(T) ((T *) malloc(sizeof(T)))
+#define new_array(T,N) ((T *) malloc((N) * sizeof(T)))
+#define resize_array(T,arr,newN) ((T *) ((arr) ? realloc(arr,(newN)*sizeof(T)) : malloc((newN)*sizeof(T))))
+#define new_string(s) strcpy((char *)malloc((strlen(s)+1)*sizeof(char)),s)
+
+/* For typecasting, especially useful for declarations of local ptrs to args
+ of a qsort comparison fn */
+#define Castdecl(x, T, nx) T nx = (T) x
+
+#define Castderef(x, T, nx) T nx = *(T*) x
+
+/* Globally visible options to control reporting */
+extern FILE *report;
+extern FILE *report;
+extern FILE *output;
+extern FILE *header_output;
+
+/* Bison interface. */
+extern FILE *yyin;
+extern FILE *yyout;
+
+extern int verbose;
+
+extern char *prefix;
+
+/* Temporary - this will be done better when the charclass stuff is
+ * added. */
+extern char **toktable;
+extern int ntokens;
+
+extern int n_charclasses;
+
+extern int had_ambiguous_result;
+
+extern int n_dfa_entries;
+extern struct DFAEntry *dfa_entries;
+
+struct State;
+struct Block;
+struct StimulusList;
+
+struct Abbrev {/*{{{*/
+ char *lhs; /* Defined name */
+ struct StimulusList *stimuli;
+#if 0
+ char **rhs; /* Token/define */
+ int nrhs;
+ int maxrhs;
+#endif
+};
+/*}}}*/
+
+typedef enum StimulusType {/*{{{*/
+ T_EPSILON,
+ T_TOKEN,
+ T_ABBREV,
+ T_INLINEBLOCK,
+ T_CHARCLASS
+} StimulusType;
+/*}}}*/
+typedef struct InlineBlock {/*{{{*/
+ char *type; /* Block type */
+ char *in; /* Name of input node */
+ char *out; /* Name of output node */
+} InlineBlock;
+/*}}}*/
+
+#define ULONGS_PER_CC 8
+
+typedef struct CharClass {/*{{{*/
+ int is_used;
+ unsigned long char_bitmap[ULONGS_PER_CC];
+ unsigned long group_bitmap[ULONGS_PER_CC];
+} CharClass;
+/*}}}*/
+typedef struct Stimulus {/*{{{*/
+ StimulusType type;
+ union {
+ /* TODO : token should eventually become a struct ref ? */
+ int token;
+ struct Abbrev *abbrev;
+ /* placeholders */
+ InlineBlock *inline_block;
+ CharClass *char_class;
+ } x;
+} Stimulus;
+/*}}}*/
+typedef struct StimulusList {/*{{{*/
+ struct StimulusList *next;
+ Stimulus *stimulus;
+} StimulusList;
+/*}}}*/
+typedef enum TransType {/*{{{*/
+ TT_EPSILON,
+ TT_TOKEN,
+ TT_CHARCLASS
+} TransType;
+/*}}}*/
+typedef struct TransList {/*{{{*/
+ struct TransList *next;
+ TransType type;
+ union {
+ int token;
+ CharClass *char_class;
+ } x;
+ char *ds_name;
+ struct State *ds_ref;
+} TransList;
+/*}}}*/
+typedef struct Stringlist {/*{{{*/
+ struct Stringlist *next;
+ char *string;
+} Stringlist;
+/*}}}*/
+
+#if 0
+typedef struct InlineBlockList {/*{{{*/
+ struct InlineBlockList *next;
+ InlineBlock *ib;
+} InlineBlockList;
+/*}}}*/
+#endif
+
+typedef struct State {/*{{{*/
+ char *name;
+ int index; /* Array index in containing block */
+ struct Block *parent;
+ TransList *transitions;
+ Stringlist *tags;
+ Stringlist *entries;
+
+ /* Pointers to the nodes in the 'transitions' list, sorted into canonical order */
+ TransList **ordered_trans;
+ int n_transitions;
+
+ unsigned char removed; /* Flag indicating state has been pruned by compression stage */
+} State;
+/*}}}*/
+typedef struct S_Stateset {/*{{{*/
+ State **states;
+ int nstates;
+ int maxstates;
+} Stateset;
+/*}}}*/
+#define HASH_BUCKETS 64
+#define HASH_MASK (HASH_BUCKETS-1)
+
+typedef struct Block {/*{{{*/
+ char *name;
+
+ /* The master table of states within this block. This has to be in a flat
+ array because we have to work with respect to state indices when doing the
+ 2D bitmap stuff for the subset construction. */
+ State **states;
+ int nstates;
+ int maxstates;
+
+ /* epsilon closure for this block (treating it as a top-level block.) */
+ unsigned long **eclo;
+
+ /* Hash table for getting rapid access to a state within the block, given
+ its name */
+ Stateset state_hash[HASH_BUCKETS];
+
+ int subcount; /* Number for generating substates */
+ int subblockcount; /* Number for generating inline subblocks */
+} Block;
+/*}}}*/
+struct Entrylist {/*{{{*/
+ struct Entrylist *next;
+ char *entry_name;
+ State *state;
+};
+/*}}}*/
+extern struct Entrylist *entries;
+
+typedef struct DFANode {/*{{{*/
+ unsigned long *nfas;
+ unsigned long signature; /* All the longwords in the nfas array xor'ed together */
+ int index; /* Entry's own index in the array */
+ int *map; /* index by token code */
+ int from_state; /* the state which provided the first transition to this one (leading to its creation) */
+ int via_token; /* the token through which we got to this state the first time. */
+ Stringlist *nfa_exit_sl; /* NFA exit values */
+ Stringlist *nfa_attr_sl; /* NFA exit values */
+ char **attrs; /* Attributes, computed by boolean expressions defined in input text */
+ int has_early_exit; /* If !=0, the scanner is expected to exit immediately this DFA state is entered.
+ It means that no out-bound transitions have to be created. */
+
+ /* Fields calculated in compdfa.c */
+
+ /* The equivalence class the state is in. */
+ int eq_class;
+
+ /* Temp. storage for the new eq. class within a single pass of the splitting alg. */
+ int new_eq_class;
+
+ /* Signature field from above is also re-used. */
+
+ int is_rep; /* Set if state is chosen as the representative of its equivalence class. */
+ int is_dead; /* Set if the state has no path to a non-default result */
+ int new_index; /* New index assigned to the state. */
+
+ /* Fields calculated in tabcompr.c */
+
+ unsigned long transition_sig;
+
+ /* Default state, i.e. the one that supplies transitions for tokens not
+ explicitly listed for this one. */
+ int defstate;
+
+ /* Number of transitions that this state has different to those in the
+ default state. */
+ int best_diff;
+
+} DFANode;
+/*}}}*/
+struct DFAEntry {/*{{{*/
+ char *entry_name;
+ /* Initially the NFA number, overwritten with DFA number by build_dfa */
+ int state_number;
+};
+/*}}}*/
+struct DFA {/*{{{*/
+ DFANode **s; /* states */
+ int n;
+ int max;
+
+ /* the original block that the DFA comes from. */
+ Block *b;
+};
+/*}}}*/
+
+void yyerror(const char *s);
+extern int yylex(void);
+
+/* Constants for 'create' args */
+#define USE_OLD_MUST_EXIST 0
+#define CREATE_MUST_NOT_EXIST 1
+#define CREATE_OR_USE_OLD 2
+
+State *get_curstate(void);
+
+struct Abbrev;
+extern struct Abbrev * create_abbrev(const char *name, struct StimulusList *stimuli);
+
+int lookup_token(char *name, int create);
+Block *lookup_block(char *name, int create);
+State *lookup_state(Block *in_block, char *name, int create);
+void add_entry_to_state(State *curstate, const char *entry);
+void define_entrystruct(const char *s, const char *v);
+Stringlist * add_string_to_list(Stringlist *existing, const char *token);
+void add_transitions(Block *curblock, State *curstate, StimulusList *stimuli, char *destination);
+State * add_transitions_to_internal(Block *curblock, State *addtostate, StimulusList *stimuli);
+void add_tags(State *curstate, Stringlist *sl);
+InlineBlock *create_inline_block(char *type, char *in, char *out);
+void instantiate_block(Block *curblock, char *block_name, char *instance_name);
+void fixup_state_refs(Block *b);
+void expand_charclass_transitions(Block *b);
+
+void compress_nfa(Block *b);
+
+extern void generate_epsilon_closure(Block *b);
+extern void print_nfa(Block *b);
+extern void build_transmap(Block *b);
+extern struct DFA *build_dfa(Block *b);
+extern void print_dfa(struct DFA *dfa);
+
+/* In expr.c */
+typedef struct Expr Expr;
+
+Expr * new_not_expr(Expr *c);
+Expr * new_and_expr(Expr *c1, Expr *c2);
+Expr * new_or_expr(Expr *c1, Expr *c2);
+Expr * new_xor_expr(Expr *c1, Expr *c2);
+Expr * new_cond_expr(Expr *c1, Expr *c2, Expr *c3);
+Expr * new_tag_expr(char *tag_name);
+extern int eval(Expr *e);
+void define_tag(char *name, Expr *e);
+void clear_tag_values(void);
+void report_unused_tags(void);
+
+/* In evaluator.c */
+typedef struct evaluator Evaluator;
+extern int n_evaluators;
+extern Evaluator *default_evaluator;
+extern Evaluator *start_evaluator(const char *name);
+void define_attr(Evaluator *x, char *string, Expr *e, int early);
+void define_defattr(Evaluator *x, char *string);
+void set_tag_value(char *tag_name);
+int evaluate_attrs(char ***, int *);
+int evaluator_is_used(Evaluator *x);
+void define_defattr(Evaluator *x, char *text);
+void define_type(Evaluator *x, char *text);
+char* get_defattr(int i);
+char* get_attr_type(int i);
+char* get_attr_name(int i);
+void make_evaluator_array(void);
+void emit_dfa_attr_report(char **results, FILE *out);
+void eval_initialise(void);
+
+void compress_transition_table(struct DFA *dfa, int ntokens);
+unsigned long increment(unsigned long x, int field);
+unsigned long count_bits_set(unsigned long x);
+
+/* in abbrevs.c */
+struct Abbrev * lookup_abbrev(char *name);
+
+/* in stimulus.c */
+extern Stimulus *stimulus_from_epsilon(void);
+extern Stimulus *stimulus_from_string(char *str);
+extern Stimulus *stimulus_from_inline_block(InlineBlock *block);
+extern Stimulus *stimulus_from_char_class(CharClass *char_class);
+extern StimulusList *append_stimulus_to_list(StimulusList *existing, Stimulus *stim);
+
+/* in charclass.c */
+extern int cc_test_bit(const unsigned long *bitmap, int entry);
+extern CharClass *new_charclass(void);
+extern void free_charclass(CharClass *what);
+extern void add_charclass_to_list(CharClass *cc);
+extern void add_singleton_to_charclass(CharClass *towhat, char thechar);
+extern void add_range_to_charclass(CharClass *towhat, char star, char end);
+extern void invert_charclass(CharClass *what);
+extern void diff_charclasses(CharClass *left, CharClass *right);
+extern void split_charclasses(const Block *b);
+extern void print_charclass_mapping(FILE *out, FILE *header_out, const char *prefix_under);
+extern void print_charclass(FILE *out, int idx);
+
+/* Return new number of DFA states */
+extern void compress_dfa(struct DFA *dfa, int ntokens,
+ int n_dfa_entries, struct DFAEntry *dfa_entries);
+
+#endif /* N2D_H */
+
diff --git a/src/mairix/dfasyn/dfasyn.texi b/src/mairix/dfasyn/dfasyn.texi
@@ -0,0 +1,85 @@
+@setfilename dfasyn.info
+@settitle User guide for the dfasyn DFA construction utility
+
+@titlepage
+@title dfasyn user guide
+@subtitle This manual describes how to use dfasyn.
+@author Richard P. Curnow
+@page
+@end titlepage
+
+@c{{{ Top node
+@node Top
+@top
+@menu
+* Introduction:: The introduction
+* Input file format:: A reference for the input file
+* Concept Index:: Index of concepts
+@end menu
+@c}}}
+@c{{{ ch:Introduction
+@node Introduction
+@chapter Introduction
+
+@menu
+* Uses for dfasyn:: The types of problem to which dfasyn is well-suited
+@end menu
+
+@node Uses for dfasyn
+@section Uses for dfasyn
+dfasyn is particularly suited to the following types of scanning problem, both of
+which exceed flex's capabilities
+
+@itemize @bullet
+@item When the pattern describing a token cannot be written as a regular
+expression. For example, there may be iteration but with constraints between
+the end of one iteration and the start of the next.
+@item When more than 1 rule matches in a flex input file, flex chooses between
+them based on
+
+ @itemize -
+ @item Longest match first
+ @item Earliest rule in the file if more than 1 match of the same length exists
+ @end itemize
+
+dfasyn allows for a more general method of resolving multiple matches.
+Conceptually, it works out which rules match, giving a true/false status for
+each rule. The input file defines an arbitrarily complex set of boolean
+expressions to reduce the multiple matches down to one unique one. (If more than
+one of the boolean expressions evaluates true, this is an error.)
+
+@item When a customised method is required to construct the input tokens that
+pass to the scanner. For example, if the tokens are the characters in a string
+(rather than coming from a file), or if some special logic has to be used to
+generate the tokens from the input character stream.
+
+@item If you want to add actions to the scanning loop, e.g. to remember special
+locations within the word being scanned.
+
+@end itemize
+
+@node Non-uses for dfasyn
+@section Cases where flex might be better
+
+In general, flex is easier and more convenient to use. Where it is applicable
+to your problem, there are no obvious benefits to using dfasyn.
+
+@node Why written
+@section Why was dfasyn written?
+@c}}}
+
+@c{{{ ch:Input file format
+@node Input file format
+@chapter Input file format
+This section describes the format of the input file.
+
+@c}}}
+
+
+@node Concept Index
+@unnumbered Concept Index
+@printindex cp
+@bye
+
+@c vim:syntax=OFF:fdm=marker:fdc=4:cms=@c%s
+
diff --git a/src/mairix/dfasyn/evaluator.c b/src/mairix/dfasyn/evaluator.c
@@ -0,0 +1,248 @@
+/***************************************
+ Routines for merging and prioritising exit tags and attribute tags
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2001-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+/* Handle boolean expressions used to determine the final scanner result from
+ the set of NFA accepting states that are simultaneously active at the end of
+ the scan. */
+
+#include "dfasyn.h"
+
+struct Attr {
+ char *attr; /* The string to write to the output file */
+ /* The boolean expression that defines whether the attribute is active */
+ Expr *e;
+ /* If != 0, assume the state machine that the program's output is embedded in
+ will exit immediately if this result occurs. This may allow lots of
+ states to be culled from the DFA. */
+ int early;
+};
+
+typedef struct Attr Attr;
+struct evaluator {
+ Attr *attrs;
+ int is_used; /* Set if any input rules reference this evaluator */
+ int n_attrs;
+ int max_attrs;
+ char *name;
+ char *defattr;
+ char *attr_type;
+};
+
+Evaluator *default_evaluator;
+
+struct evaluator_list {
+ struct evaluator_list *next;
+ Evaluator *evaluator;
+};
+
+static struct evaluator_list *evaluator_list = NULL;
+
+/* Array pointer */
+static struct evaluator **evaluators = NULL;
+int n_evaluators = 0;
+
+Evaluator* start_evaluator(const char *name)/*{{{*/
+{
+ Evaluator *x = NULL;
+ struct evaluator_list *el;
+ for (el=evaluator_list; el; el=el->next) {
+ /* name is null for the default (anonymous) attribute group */
+ const char *een = el->evaluator->name;
+ if ((!een && !name) ||
+ (een && name && !strcmp(een, name))) {
+ x = el->evaluator;
+ break;
+ }
+ }
+ if (!x) {
+ struct evaluator_list *nel;
+ x = new(struct evaluator);
+ x->attrs = NULL;
+ x->is_used = 0;
+ x->n_attrs = x->max_attrs = 0;
+ x->name = name ? new_string(name) : NULL;
+ x->defattr = NULL;
+ x->attr_type = NULL;
+ nel = new(struct evaluator_list);
+ nel->next = evaluator_list;
+ nel->evaluator = x;
+ evaluator_list = nel;
+ }
+ return x;
+}
+/*}}}*/
+void destroy_evaluator(Evaluator *x)/*{{{*/
+{
+ /* Just leak memory for now, no need to clean up. */
+ return;
+}
+/*}}}*/
+void define_defattr(Evaluator *x, char *text)/*{{{*/
+{
+ x = x ? x : default_evaluator;
+ x->defattr = new_string(text);
+ x->is_used = 1;
+}
+/*}}}*/
+void define_type(Evaluator *x, char *text)/*{{{*/
+{
+ x = x ? x : default_evaluator;
+ x->attr_type = new_string(text);
+ x->is_used = 1;
+}
+/*}}}*/
+char* get_defattr(int i)/*{{{*/
+{
+ Evaluator *x = evaluators[i];
+ return x->defattr;
+}
+/*}}}*/
+char* get_attr_type(int i)/*{{{*/
+{
+ Evaluator *x = evaluators[i];
+ return x->attr_type ? x->attr_type : "short";
+}
+/*}}}*/
+char* get_attr_name(int i)/*{{{*/
+{
+ Evaluator *x = evaluators[i];
+ return x->name ? x->name : NULL;
+}
+/*}}}*/
+static void grow_attrs(Evaluator *x)/*{{{*/
+{
+ if (x->n_attrs == x->max_attrs) {
+ x->max_attrs += 32;
+ x->attrs = resize_array(Attr, x->attrs, x->max_attrs);
+ }
+}
+/*}}}*/
+
+void define_attr(Evaluator *x, char *string, Expr *e, int early)/*{{{*/
+/*++++++++++++++++++++
+ Add a attr defn. If the expr is null, it means build a single expr corr.
+ to the value of the tag with the same name as the attr string.
+ ++++++++++++++++++++*/
+{
+ Attr *r;
+
+ x = x ? x : default_evaluator;
+
+ x->is_used = 1;
+ grow_attrs(x);
+ r = &(x->attrs[x->n_attrs++]);
+ r->attr = new_string(string);
+ r->early = early;
+ if (e) {
+ r->e = e;
+ } else {
+ Expr *ne;
+ ne = new_tag_expr(string);
+ r->e = ne;
+ }
+
+ return;
+}
+/*}}}*/
+
+void make_evaluator_array(void)/*{{{*/
+{
+ int n;
+ struct evaluator_list *el;
+ for (el=evaluator_list, n=0; el; el=el->next, n++) ;
+ evaluators = new_array(struct evaluator *, n);
+ n_evaluators = n;
+ for (el=evaluator_list, n=0; el; el=el->next, n++) {
+ evaluators[n] = el->evaluator;
+ }
+}
+/*}}}*/
+int evaluate_attrs(char ***attrs, int *attr_early)/*{{{*/
+/*++++++++++++++++++++
+ Evaluate the attr which holds given the tags that are set
+ ++++++++++++++++++++*/
+{
+ int i, j;
+ int status;
+
+ if (attr_early) *attr_early = 0;
+ status = 1;
+
+ *attrs = new_array(char *, n_evaluators);
+
+ for (j=0; j<n_evaluators; j++) {
+ char **attr;
+ struct evaluator *x;
+ int any_attrs_so_far = 0;
+ int matched = -1;
+
+ attr = &(*attrs)[j];
+ x = evaluators[j];
+
+ for (i=0; i<x->n_attrs; i++) {
+ if (eval(x->attrs[i].e)) {
+ if (matched >= 0) {
+ *attr = NULL;
+ status = 0;
+ break;
+ } else {
+ any_attrs_so_far = 1;
+ matched = i;
+ }
+ }
+ }
+ if (matched < 0) {
+ *attr = NULL;
+ } else {
+ *attr = x->attrs[matched].attr;
+ if (attr_early) *attr_early |= x->attrs[matched].early;
+ }
+ }
+
+ return status;
+}
+/*}}}*/
+int evaluator_is_used(Evaluator *x)/*{{{*/
+{
+ return x->is_used;
+}
+/*}}}*/
+void emit_dfa_attr_report(char **attrs, FILE *out)/*{{{*/
+{
+ int i;
+ for (i=0; i<n_evaluators; i++) {
+ if (attrs[i]) {
+ const char *name = evaluators[i]->name;
+ fprintf(out, " Attributes for <%s> : %s\n",
+ name ? name : "(DEFAULT)", attrs[i]);
+ }
+ }
+}
+/*}}}*/
+/* Initialisation */
+void eval_initialise(void)/*{{{*/
+{
+ default_evaluator = start_evaluator(NULL);
+}
+/*}}}*/
diff --git a/src/mairix/dfasyn/expr.c b/src/mairix/dfasyn/expr.c
@@ -0,0 +1,243 @@
+/***************************************
+ Routines for merging and prioritising exit tags and attribute tags
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2001-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+/* Handle boolean expressions used to determine the final scanner result from
+ the set of NFA accepting states that are simultaneously active at the end of
+ the scan. */
+
+#include "dfasyn.h"
+
+enum ExprType {
+ E_AND, E_OR, E_XOR, E_COND, E_NOT, E_TAG
+};
+
+struct Tag;
+
+struct Expr {
+ enum ExprType type;
+ union {
+ struct { struct Expr *c1, *c2; } and;
+ struct { struct Expr *c1, *c2; } or;
+ struct { struct Expr *c1, *c2; } xor;
+ struct { struct Expr *c1, *c2, *c3; } cond;
+ struct { struct Expr *c1; } not;
+ struct { char *name; struct Tag *s; } tag;
+ } data;
+};
+
+struct Tag {
+ char *name;
+ int is_expr;
+ union {
+ Expr *e;
+ int val;
+ } data;
+ int is_used;
+};
+
+struct TagList {
+ struct TagList *next;
+ struct Tag *tag;
+};
+
+typedef struct Tag Tag;
+typedef struct TagList TagList;
+
+static TagList *tags = NULL;
+
+Expr * new_not_expr(Expr *c)/*{{{*/
+{
+ Expr *r = new(Expr);
+ r->type = E_NOT;
+ r->data.not.c1 = c;
+ return r;
+}
+/*}}}*/
+Expr * new_and_expr(Expr *c1, Expr *c2)/*{{{*/
+{
+ Expr *r = new(Expr);
+ r->type = E_AND;
+ r->data.and.c1 = c1;
+ r->data.and.c2 = c2;
+ return r;
+}
+/*}}}*/
+Expr * new_or_expr(Expr *c1, Expr *c2)/*{{{*/
+{
+ Expr *r = new(Expr);
+ r->type = E_OR;
+ r->data.or.c1 = c1;
+ r->data.or.c2 = c2;
+ return r;
+}
+/*}}}*/
+Expr * new_xor_expr(Expr *c1, Expr *c2)/*{{{*/
+{
+ Expr *r = new(Expr);
+ r->type = E_XOR;
+ r->data.xor.c1 = c1;
+ r->data.xor.c2 = c2;
+ return r;
+}
+/*}}}*/
+Expr * new_cond_expr(Expr *c1, Expr *c2, Expr *c3)/*{{{*/
+{
+ Expr *r = new(Expr);
+ r->type = E_COND;
+ r->data.cond.c1 = c1;
+ r->data.cond.c2 = c2;
+ r->data.cond.c3 = c3;
+ return r;
+}
+/*}}}*/
+
+Expr * new_tag_expr(char *tag_name)/*{{{*/
+/* Return expr for tag name if it already exist, else create. Don't bind to
+ actual tag instance yet. At the stage of parsing where this function is
+ used, we don't know yet which tag table the tag has to exist in. */
+{
+ Expr *r;
+
+ r = new(Expr);
+ r->type = E_TAG;
+ r->data.tag.name = new_string(tag_name);
+ r->data.tag.s = NULL; /* Force binding at first use */
+ return r;
+}
+/*}}}*/
+static void add_new_tag(Tag *s)/*{{{*/
+{
+ TagList *nsl = new(TagList);
+ nsl->tag = s;
+ nsl->next = tags;
+ tags = nsl;
+}
+ /*}}}*/
+static Tag * find_tag_or_create(char *tag_name)/*{{{*/
+{
+ Tag *s;
+ TagList *sl;
+ for (sl=tags; sl; sl=sl->next) {
+ s = sl->tag;
+ if (!strcmp(s->name, tag_name)) {
+ return s;
+ }
+ }
+
+ s = new(Tag);
+ add_new_tag(s);
+ s->is_expr = 0; /* Until proven otherwise */
+ s->data.val = 0; /* Force initial value to be well-defined */
+ s->name = new_string(tag_name);
+ s->is_used = 0;
+ return s;
+}
+/*}}}*/
+void define_tag(char *name, Expr *e)/*{{{*/
+/*++++++++++++++++++++
+ Define an entry in the tag table.
+ ++++++++++++++++++++*/
+{
+ Tag *s;
+ s = find_tag_or_create(name);
+ s->data.e = e;
+ s->is_expr = 1;
+ return;
+}
+/*}}}*/
+
+void clear_tag_values(void)/*{{{*/
+{
+ TagList *sl;
+ for (sl=tags; sl; sl=sl->next) {
+ Tag *s = sl->tag;
+ if (0 == s->is_expr) {
+ s->data.val = 0;
+ }
+ }
+}
+/*}}}*/
+void set_tag_value(char *tag_name)/*{{{*/
+{
+ Tag *s;
+
+ s = find_tag_or_create(tag_name);
+ if (s->is_expr) {
+ fprintf(stderr, "Cannot set value for tag '%s', it is defined by an expression\n", s->name);
+ exit(2);
+ } else {
+ s->data.val = 1;
+ }
+}
+/*}}}*/
+int eval(Expr *e)/*{{{*/
+/*++++++++++++++++++++
+ Evaluate the value of an expr
+ ++++++++++++++++++++*/
+{
+ switch (e->type) {
+ case E_AND:
+ return eval(e->data.and.c1) && eval(e->data.and.c2);
+ case E_OR:
+ return eval(e->data.or.c1) || eval(e->data.or.c2);
+ case E_XOR:
+ return eval(e->data.xor.c1) ^ eval(e->data.xor.c2);
+ case E_COND:
+ return eval(e->data.cond.c1) ? eval(e->data.cond.c2) : eval(e->data.cond.c3);
+ case E_NOT:
+ return !eval(e->data.not.c1);
+ case E_TAG:
+ {
+ Tag *s = e->data.tag.s;
+ int result;
+ if (!s) {
+ /* Not bound yet */
+ e->data.tag.s = s = find_tag_or_create(e->data.tag.name);
+ }
+ if (s->is_expr) {
+ result = eval(s->data.e);
+ } else {
+ result = s->data.val;
+ }
+ s->is_used = 1;
+ return result;
+ }
+ default:
+ fprintf(stderr, "Interal error : Can't get here!\n");
+ exit(2);
+ }
+}
+/*}}}*/
+void report_unused_tags(void)/*{{{*/
+{
+ Tag *s;
+ TagList *sl;
+ for (sl=tags; sl; sl=sl->next) {
+ s = sl->tag;
+ if (!s->is_used) {
+ fprintf(stderr, "Warning: tag <%s> not referenced by any attribute expression\n", s->name);
+ }
+ }
+}
+/*}}}*/
diff --git a/src/mairix/dfasyn/n2d.c b/src/mairix/dfasyn/n2d.c
@@ -0,0 +1,696 @@
+/***************************************
+ Convert NFA to DFA
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2000-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+/* {{{ General comments
+ Convert a nondeterminstic finite automaton (NFA) into a deterministic finite
+ automaton (DFA).
+
+ The NFA is defined in terms of a set of states, with transitions between the
+ states. The transitions may occur on any one of a set of symbols (specified
+ with | characters between the options), or may be 'epsilon' transitions, i.e.
+ occurring without consumption of any input. A state may have multiple
+ transitions for the same input symbol (hence 'nondeterministic'). The final
+ state encountered within the final block defined in the input file is taken
+ to be the start state of the whole NFA. A state may be entered more than
+ once in the file; the transitions in the multiple definitions are combined to
+ give the complete transition set. A state may have 1 or more tags assigned
+ (with =); this is the return value of the automaton if the end of string is
+ encountered when in that state.
+ }}} */
+
+#include <ctype.h>
+#include "dfasyn.h"
+#include <assert.h>
+
+/* Globally visible options to control reporting */
+int verbose;
+
+struct Entrylist *entries = NULL;
+
+/* ================================================================= */
+static inline int round_up(const int x) {/*{{{*/
+ return (x+31)>>5;
+}
+/*}}}*/
+static inline void set_bit(unsigned long *x, int n)/*{{{*/
+{
+ int r = n>>5;
+ unsigned long m = 1UL<<(n&31);
+ x[r] |= m;
+}
+/*}}}*/
+static inline int is_set(unsigned long *x, int n)/*{{{*/
+{
+ int r = n>>5;
+ unsigned long m = 1UL<<(n&31);
+ return !!(x[r] & m);
+}
+/*}}}*/
+/* ================================================================= */
+static void transitively_close_eclo(unsigned long **eclo, int N)/*{{{*/
+{
+ int from;
+ unsigned long *from_row;
+ unsigned long *todo, this_todo;
+ int Nru;
+ int i, i32, j, k, merge_idx;
+ int j_limit;
+ int any_changes;
+
+ Nru = round_up(N);
+ todo = new_array(unsigned long, Nru);
+
+ for (from=0; from<N; from++) {
+ from_row = eclo[from];
+ for (i=0; i<Nru; i++) {
+ todo[i] = from_row[i];
+ }
+ any_changes = 1;
+ while (any_changes) {
+ any_changes = 0;
+ for (i=0; i<Nru; i++) { /* loop over words in bitvector */
+ i32 = i<<5;
+ this_todo = todo[i];
+ todo[i] = 0UL; /* reset to avoid oo-loop */
+ if (!this_todo) continue; /* none to do in this block */
+ j_limit = N - i32;
+ if (j_limit > 32) j_limit = 32;
+
+ for (j=0; j<j_limit;) { /* loop over bits in this word */
+ if (this_todo & 1) {
+ /* Merge in */
+ merge_idx = i32 + j;
+ for (k=0; k<Nru; k++) {
+ unsigned long to_merge = eclo[merge_idx][k];
+ unsigned long orig = from_row[k];
+ unsigned long diffs = to_merge & (~orig);
+ from_row[k] |= to_merge;
+ if (diffs) any_changes = 1;
+ todo[k] |= diffs;
+ }
+ }
+ this_todo >>= 1;
+ if (!this_todo) break; /* Workload reduction at end */
+ j++;
+ }
+ }
+ }
+ }
+}
+/*}}}*/
+void generate_epsilon_closure(Block *b)/*{{{*/
+{
+ int i, j, N;
+
+ N = b->nstates;
+ b->eclo = new_array(unsigned long*, N);
+ for (i=0; i<N; i++) {
+ b->eclo[i] = new_array(unsigned long, round_up(N));
+ for (j=0; j<round_up(N); j++) {
+ b->eclo[i][j] = 0;
+ }
+ }
+
+ /* Determine initial immediate transitions */
+ for (i=0; i<N; i++) {
+ State *s = b->states[i];
+ TransList *tl;
+ int from_state = s->index;
+ set_bit(b->eclo[from_state], from_state); /* Always reflexive */
+
+ for (tl=s->transitions; tl; tl=tl->next) {
+ switch (tl->type) {
+ case TT_EPSILON:
+ {
+ int to_state = tl->ds_ref->index;
+ set_bit(b->eclo[from_state], to_state);
+ }
+ break;
+ case TT_TOKEN:
+ /* smoke out old method of indicating an epsilon trans */
+ assert(tl->x.token >= 0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ }
+
+ transitively_close_eclo(b->eclo, N);
+
+}
+/*}}}*/
+void print_nfa(Block *b)/*{{{*/
+{
+ int i, j, N;
+ N = b->nstates;
+
+ if (!report) return;
+
+ for (i=0; i<N; i++) {
+ State *s = b->states[i];
+ TransList *tl;
+ Stringlist *sl;
+ fprintf(report, "NFA state %d = %s", i, s->name);
+ if (s->entries) {
+ int first = 1;
+ Stringlist *e = s->entries;
+ fputs(" [Entries: ", report);
+ while (e) {
+ if (!first) {
+ fputc(',', report);
+ }
+ first = 0;
+ fputs(e->string, report);
+ e = e->next;
+ }
+ fputc(']', report);
+ }
+ fputc('\n', report);
+ for (tl=s->transitions; tl; tl=tl->next) {
+ switch (tl->type) {
+ case TT_EPSILON:
+ fprintf(report, " [(epsilon)] -> ");
+ break;
+ case TT_TOKEN:
+ assert(tl->x.token >= 0);
+ if (tl->x.token >= ntokens) {
+ fprintf(report, " ");
+ print_charclass(report, tl->x.token - ntokens);
+ fprintf(report, " -> ");
+ } else {
+ fprintf(report, " %s -> ", toktable[tl->x.token]);
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ fprintf(report, "%s\n", tl->ds_name);
+ }
+ if (s->tags) {
+ int first = 1;
+ fprintf(report, " Tags : ");
+ for (sl=s->tags; sl; sl=sl->next) {
+ fprintf(report, "%s%s",
+ first ? "" : "|",
+ sl->string);
+ }
+ fprintf(report, "\n");
+ }
+ fprintf(report, " Epsilon closure :\n (self)\n");
+ for (j=0; j<N; j++) {
+ if (i!=j && is_set(b->eclo[i], j)) {
+ fprintf(report, " %s\n", b->states[j]->name);
+ }
+ }
+
+ fprintf(report, "\n");
+ }
+
+}
+/*}}}*/
+/* ================================================================= */
+
+/* Indexed [from_state][token][to_state], flag set if there is
+ a transition from from_state to to_state, via token then zero or more
+ epsilon transitions */
+
+static unsigned long ***transmap;
+
+/* Index [from_nfa_state][token], flag set if there is a transition
+ to any destination nfa state for that token. */
+static unsigned long **anytrans;
+
+/* ================================================================= */
+void build_transmap(Block *b)/*{{{*/
+{
+ int N = b->nstates;
+ int Nt = ntokens + n_charclasses;
+ int i, j, k, m, dest;
+
+ transmap = new_array(unsigned long **, N);
+ anytrans = new_array(unsigned long *, N);
+ for (i=0; i<N; i++) {
+ transmap[i] = new_array(unsigned long *, Nt);
+ anytrans[i] = new_array(unsigned long, round_up(Nt));
+ for (j=0; j<round_up(Nt); j++) {
+ anytrans[i][j] = 0UL;
+ }
+ for (j=0; j<Nt; j++) {
+ transmap[i][j] = new_array(unsigned long, round_up(N));
+ for (k=0; k<round_up(N); k++) {
+ transmap[i][j][k] = 0UL;
+ }
+ }
+ }
+
+ for (i=0; i<N; i++) {
+ State *s = b->states[i];
+ TransList *tl;
+ for (tl=s->transitions; tl; tl=tl->next) {
+ switch (tl->type) {
+ case TT_EPSILON:
+ break;
+ case TT_TOKEN:
+ {
+ assert(tl->x.token >= 0);
+ dest = tl->ds_ref->index;
+ for (m=0; m<round_up(N); m++) {
+ unsigned long x = b->eclo[dest][m];
+ transmap[i][tl->x.token][m] |= x;
+ if (!!x) set_bit(anytrans[i], tl->x.token);
+ }
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ }
+}
+/*}}}*/
+/* ================================================================= */
+
+int had_ambiguous_result = 0;
+
+/* ================================================================= */
+
+/* Implement an array of linked lists to access DFA states directly. The
+ * hashes are given by folding the signatures down to single bytes. */
+
+struct DFAList {
+ struct DFAList *next;
+ DFANode *dfa;
+};
+
+#define DFA_HASHSIZE 256
+static struct DFAList *dfa_hashtable[DFA_HASHSIZE];
+
+/* ================================================================= */
+
+int n_dfa_entries;
+struct DFAEntry *dfa_entries = NULL;
+
+/* ================================================================= */
+static void grow_dfa(struct DFA *dfa)/*{{{*/
+{
+ dfa->max += 32;
+ dfa->s = resize_array(DFANode*, dfa->s, dfa->max);
+}
+/*}}}*/
+static unsigned long fold_signature(unsigned long sig)/*{{{*/
+{
+ unsigned long folded;
+ folded = sig ^ (sig >> 16);
+ folded ^= (folded >> 8);
+ folded &= 0xff;
+ return folded;
+}
+/*}}}*/
+/* ================================================================= */
+static int find_dfa(unsigned long *nfas, int N)/*{{{*/
+/* Simple linear search. Use 'signatures' to get rapid rejection
+ of any DFA state that can't possibly match */
+{
+ int j;
+ unsigned long signature = 0UL;
+ unsigned long folded_signature;
+ struct DFAList *dfal;
+
+ for (j=0; j<round_up(N); j++) {
+ signature ^= nfas[j];
+ }
+ folded_signature = fold_signature(signature);
+
+ for(dfal=dfa_hashtable[folded_signature]; dfal; dfal = dfal->next) {
+ DFANode *dfa = dfal->dfa;
+ int matched;
+
+ if (signature != dfa->signature) continue;
+
+ matched=1;
+
+ for (j=0; j<round_up(N); j++) {
+ if (nfas[j] != dfa->nfas[j]) {
+ matched = 0;
+ break;
+ }
+ }
+ if (matched) {
+ return dfa->index;
+ }
+ }
+ return -1;
+}
+/*}}}*/
+
+/*{{{ add_dfa() */
+static int add_dfa(Block *b, struct DFA *dfa, unsigned long *nfas, int N, int Nt, int from_state, int via_token)
+{
+ int j;
+ int result = dfa->n;
+ int this_result_unambiguous;
+
+ Stringlist *ex;
+ unsigned long signature = 0UL, folded_signature;
+ struct DFAList *dfal;
+
+ if (verbose) {
+ fprintf(stderr, "Adding DFA state %d\r", dfa->n);
+ fflush(stderr);
+ }
+
+ if (dfa->max == dfa->n) {
+ grow_dfa(dfa);
+ }
+
+ dfa->s[dfa->n] = new(DFANode);
+ dfa->s[dfa->n]->nfas = new_array(unsigned long, round_up(N));
+ dfa->s[dfa->n]->map = new_array(int, Nt);
+ for (j=0; j<Nt; j++) dfa->s[dfa->n]->map[j] = -1;
+ dfa->s[dfa->n]->index = dfa->n;
+ dfa->s[dfa->n]->defstate = -1;
+
+ dfa->s[dfa->n]->from_state = from_state;
+ dfa->s[dfa->n]->via_token = via_token;
+
+ for (j=0; j<round_up(N); j++) {
+ unsigned long x = nfas[j];
+ signature ^= x;
+ dfa->s[dfa->n]->nfas[j] = x;
+ }
+ dfa->s[dfa->n]->signature = signature;
+
+ folded_signature = fold_signature(signature);
+ dfal = new(struct DFAList);
+ dfal->dfa = dfa->s[dfa->n];
+ dfal->next = dfa_hashtable[folded_signature];
+ dfa_hashtable[folded_signature] = dfal;
+
+ /* {{{ Boolean reductions to get attributes */
+ ex = NULL;
+ clear_tag_values();
+ for (j=0; j<N; j++) {
+ if (is_set(dfa->s[dfa->n]->nfas, j)) {
+ Stringlist *sl;
+ State *s = b->states[j];
+ for (sl = s->tags; sl; sl = sl->next) {
+ Stringlist *new_sl;
+ new_sl = new(Stringlist);
+ new_sl->string = sl->string;
+ new_sl->next = ex;
+ ex = new_sl;
+
+ set_tag_value(sl->string);
+ }
+ }
+ }
+
+ dfa->s[dfa->n]->nfa_exit_sl = ex;
+
+ this_result_unambiguous =
+ evaluate_attrs(&dfa->s[dfa->n]->attrs, &dfa->s[dfa->n]->has_early_exit);
+
+ if (!this_result_unambiguous) {
+ Stringlist *sl;
+ fprintf(stderr, "WARNING : Ambiguous exit state abandoned for DFA state %d\n", dfa->n);
+ fprintf(stderr, "NFA exit tags applying in this stage :\n");
+ for (sl = ex; sl; sl = sl->next) {
+ fprintf(stderr, " %s\n", sl->string);
+ }
+ had_ambiguous_result = 1;
+ }
+ /*}}}*/
+
+ ++dfa->n;
+ return result;
+}
+/*}}}*/
+static void clear_nfas(unsigned long *nfas, int N)/*{{{*/
+{
+ int i;
+ for (i=0; i<round_up(N); i++) {
+ nfas[i] = 0;
+ }
+}
+/*}}}*/
+struct DFA *build_dfa(Block *b)/*{{{*/
+{
+ unsigned long **nfas;
+ int i;
+ int j;
+ int N, Nt;
+ int next_to_do;
+ int *found_any;
+ int rup_N;
+ struct DFA *dfa;
+
+ dfa = new(struct DFA);
+ dfa->n = 0;
+ dfa->max = 0;
+ dfa->s = NULL;
+ dfa->b = b;
+
+ for (i=0; i<DFA_HASHSIZE; i++) dfa_hashtable[i] = NULL;
+
+ N = b->nstates;
+ rup_N = round_up(N);
+ Nt = ntokens + n_charclasses;
+
+ nfas = new_array(unsigned long *, Nt);
+ for (i=0; i<Nt; i++) {
+ nfas[i] = new_array(unsigned long, round_up(N));
+ }
+
+ /* Add initial states */
+ for (j=0; j<n_dfa_entries; j++) {
+ int idx;
+ clear_nfas(nfas[0], N);
+ for (i=0; i<round_up(N); i++) {
+ nfas[0][i] |= b->eclo[dfa_entries[j].state_number][i];
+ }
+ /* Must handle the case where >=2 of the start states are actually identical;
+ * nothing in the input language prevents this. */
+ idx = find_dfa(nfas[0], N);
+ if (idx < 0) {
+ idx = dfa->n;
+ add_dfa(b, dfa, nfas[0], N, Nt, -1, -1);
+ }
+ dfa_entries[j].state_number = idx;
+ }
+
+ next_to_do = 0;
+ found_any = new_array(int, Nt);
+
+ /* Now the heart of the program : the subset construction to turn the NFA
+ into a DFA. This is a major performance hog in the program, so there are
+ lots of tricks to speed this up (particularly, hoisting intermediate
+ pointer computations out of the loop to assert the fact that there is no
+ aliasing between the arrays.) */
+
+ while (next_to_do < dfa->n) {
+
+ int t; /* token index */
+ int j0, j0_5, j1, j, mask, k;
+ int idx;
+ unsigned long *current_nfas;
+ unsigned long block_bitmap;
+
+ /* If the next DFA state has the result_early flag set, it means that the scanner will
+ * always exit straight away when that state is reached, so there's no need to compute
+ * any transitions out of it. */
+
+ if (dfa->s[next_to_do]->has_early_exit) {
+ next_to_do++;
+ continue;
+ }
+
+ for (j=0; j<Nt; j++) {
+ clear_nfas(nfas[j], N);
+ found_any[j] = 0;
+ }
+
+ current_nfas = dfa->s[next_to_do]->nfas;
+ for (j0=0; j0<rup_N; j0++) { /* Loop over NFA states which may be in this DFA state */
+ block_bitmap = current_nfas[j0];
+ if (!block_bitmap) continue;
+ j0_5 = j0 << 5;
+ for (mask=1UL, j1=0; j1<32; mask<<=1, j1++) {
+ j = j0_5 + j1;
+ if (block_bitmap & mask) { /* Is NFA state in DFA */
+ unsigned long **transmap_j = transmap[j];
+ unsigned long *anytrans_j = anytrans[j];
+ for (t=0; t<Nt; t++) { /* Loop over transition symbols */
+ unsigned long *transmap_t;
+ unsigned long *nfas_t;
+ unsigned long found_any_t;
+ if (!is_set(anytrans_j, t)) continue;
+ transmap_t = transmap_j[t];
+ nfas_t = nfas[t];
+ found_any_t = found_any[t];
+ for (k=0; k<rup_N; k++) { /* Loop over destination NFA states */
+ unsigned long x;
+ x = transmap_t[k];
+ nfas_t[k] |= x;
+ found_any_t |= !!x;
+ }
+ found_any[t] = found_any_t;
+ }
+ }
+ }
+ }
+
+ for (t=0; t<Nt; t++) {
+ if (found_any[t]) {
+ idx = find_dfa(nfas[t], N);
+ if (idx < 0) {
+ idx = add_dfa(b, dfa, nfas[t], N, Nt, next_to_do, t);
+ }
+ } else {
+ idx = -1;
+ }
+ dfa->s[next_to_do]->map[t] = idx;
+ }
+
+ next_to_do++;
+ }
+
+ free(found_any);
+ for (i=0; i<Nt; i++) free(nfas[i]);
+ free(nfas);
+ return dfa;
+}
+/*}}}*/
+/* ================================================================= */
+static void display_route(struct DFA *dfa, int idx, FILE *out)/*{{{*/
+{
+ int from_state, via_token;
+ from_state = dfa->s[idx]->from_state;
+ if (from_state >= 0) {
+ display_route(dfa, from_state, out);
+ fputs("->", out);
+ }
+
+ via_token = dfa->s[idx]->via_token;
+ if (via_token >= ntokens) {
+ print_charclass(out, via_token - ntokens);
+ } else if (via_token >= 0) {
+ fprintf(out, "%s", toktable[via_token]);
+ }
+}
+/*}}}*/
+void print_dfa(struct DFA *dfa)/*{{{*/
+{
+ int N = dfa->b->nstates;
+ int Nt = ntokens + n_charclasses;
+
+ int i, j0, j0_5, j1, t;
+ unsigned long mask;
+ unsigned long current_nfas;
+ int rup_N = round_up(N);
+ int from_state, this_state;
+
+ if (!report) return;
+
+ for (i=0; i<dfa->n; i++) {
+ fprintf(report, "DFA state %d\n", i);
+ if (dfa->s[i]->nfas) {
+ fprintf(report, " NFA states :\n");
+ for (j0=0; j0<rup_N; j0++) {
+ current_nfas = dfa->s[i]->nfas[j0];
+ if (!current_nfas) continue;
+ j0_5 = j0<<5;
+ for (j1=0, mask=1UL; j1<32; mask<<=1, j1++) {
+ if (current_nfas & mask) {
+ fprintf(report, " %s\n", dfa->b->states[j0_5 + j1]->name);
+ }
+ }
+ }
+ fprintf(report, "\n");
+ }
+ fprintf(report, " Forward route :");
+ this_state = i;
+ from_state = dfa->s[i]->from_state;
+ if (from_state >= 0) {
+ fprintf(report, " (from state %d)", from_state);
+ }
+ fputs("\n (START)", report);
+ display_route(dfa, i, report);
+ fputs("->(HERE)", report);
+ fprintf(report, "\n");
+
+ fprintf(report, " Transitions :\n");
+ for (t=0; t<Nt; t++) {
+ int dest = dfa->s[i]->map[t];
+ if (dest >= 0) {
+ if (t >= ntokens) {
+ fprintf(report, " ");
+ print_charclass(report, t - ntokens);
+ fprintf(report, " -> %d\n", dest);
+ } else {
+ fprintf(report, " %s -> %d\n", toktable[t], dest);
+ }
+ }
+ }
+ if (dfa->s[i]->defstate >= 0) {
+ fprintf(report, " Use state %d as basis (%d fixups)\n",
+ dfa->s[i]->defstate, dfa->s[i]->best_diff);
+ }
+ if (dfa->s[i]->nfa_exit_sl) {
+ Stringlist *sl;
+ fprintf(report, " NFA exit tags applying :\n");
+ for (sl=dfa->s[i]->nfa_exit_sl; sl; sl = sl->next) {
+ fprintf(report, " %s\n", sl->string);
+ }
+ }
+
+ emit_dfa_attr_report(dfa->s[i]->attrs, report);
+ fprintf(report, "\n");
+ }
+ fprintf(report, "\nEntry states in DFA:\n");
+ for (i=0; i<n_dfa_entries; i++) {
+ fprintf(report, "Entry <%s> : %d\n",
+ dfa_entries[i].entry_name,
+ dfa_entries[i].state_number);
+ }
+
+}
+/*}}}*/
+/* ================================================================= */
+void yyerror (const char *s)/*{{{*/
+{
+ extern int lineno;
+ fprintf(stderr, "%s at line %d\n", s, lineno);
+}
+/*}}}*/
+int yywrap(void) /*{{{*/
+{
+ return -1;
+}
+/*}}}*/
+/* ================================================================= */
+
diff --git a/src/mairix/dfasyn/n2d.h b/src/mairix/dfasyn/n2d.h
@@ -0,0 +1,226 @@
+/***************************************
+ $Header: /cvs/src/dfasyn/n2d.h,v 1.2 2003/03/02 23:42:11 richard Exp $
+
+ Header file for NFA->DFA conversion utility.
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2001-2003,2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#ifndef N2D_H
+#define N2D_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define new(T) ((T *) malloc(sizeof(T)))
+#define new_array(T,N) ((T *) malloc((N) * sizeof(T)))
+#define resize_array(T,arr,newN) ((T *) ((arr) ? realloc(arr,(newN)*sizeof(T)) : malloc((newN)*sizeof(T))))
+#define new_string(s) strcpy((char *)malloc((strlen(s)+1)*sizeof(char)),s)
+
+/* For typecasting, especially useful for declarations of local ptrs to args
+ of a qsort comparison fn */
+#define Castdecl(x, T, nx) T nx = (T) x
+
+#define Castderef(x, T, nx) T nx = *(T*) x
+
+/* Globally visible options to control reporting */
+extern FILE *report;
+extern int verbose;
+
+struct State;
+struct Block;
+
+typedef struct Translist {
+ struct Translist *next;
+ int token;
+ char *ds_name;
+ struct State *ds_ref;
+} Translist;
+
+typedef struct Stringlist {
+ struct Stringlist *next;
+ char *string;
+} Stringlist;
+
+typedef struct InlineBlock {
+ char *type; /* Block type */
+ char *in; /* Name of input node */
+ char *out; /* Name of output node */
+} InlineBlock;
+
+typedef struct InlineBlockList {
+ struct InlineBlockList *next;
+ InlineBlock *ib;
+} InlineBlockList;
+
+typedef struct State {
+ char *name;
+ int index; /* Array index in containing block */
+ struct Block *parent;
+ Translist *transitions;
+ Stringlist *exitvals;
+ Stringlist *attributes;
+
+ /* Pointers to the nodes in the 'transitions' list, sorted into canonical order */
+ Translist **ordered_trans;
+ int n_transitions;
+
+ unsigned char removed; /* Flag indicating state has been pruned by compression stage */
+} State;
+
+typedef struct S_Stateset {
+ State **states;
+ int nstates;
+ int maxstates;
+} Stateset;
+
+#define HASH_BUCKETS 64
+#define HASH_MASK (HASH_BUCKETS-1)
+
+typedef struct Block {
+ char *name;
+
+ /* The master table of states within this block. This has to be in a flat
+ array because we have to work with respect to state indices when doing the
+ 2D bitmap stuff for the subset construction. */
+ State **states;
+ int nstates;
+ int maxstates;
+
+ /* Hash table for getting rapid access to a state within the block, given
+ its name */
+ Stateset state_hash[HASH_BUCKETS];
+
+ int subcount; /* Number for generating substates */
+ int subblockcount; /* Number for generating inline subblocks */
+} Block;
+
+typedef struct {
+ unsigned long *nfas;
+ unsigned long signature; /* All the longwords in the nfas array xor'ed together */
+ int index; /* Entry's own index in the array */
+ int *map; /* index by token code */
+ int from_state; /* the state which provided the first transition to this one (leading to its creation) */
+ int via_token; /* the token through which we got to this state the first time. */
+ Stringlist *nfa_exit_sl; /* NFA exit values */
+ Stringlist *nfa_attr_sl; /* NFA exit values */
+ char *result; /* Result token, computed by boolean expressions defined in input text */
+ int result_early; /* If !=0, the scanner is expected to exit immediately this DFA state is entered.
+ It means that no out-bound transitions have to be created. */
+ char *attribute; /* Attribute token, computed by boolean expressions defined in input text */
+
+ /* Fields calculated in compdfa.c */
+
+ /* The equivalence class the state is in. */
+ int eq_class;
+
+ /* Temp. storage for the new eq. class within a single pass of the splitting alg. */
+ int new_eq_class;
+
+ /* Signature field from above is also re-used. */
+
+ int is_rep; /* Set if state is chosen as the representative of its equivalence class. */
+ int new_index; /* New index assigned to the state. */
+
+ /* Fields calculated in tabcompr.c */
+
+ unsigned long transition_sig;
+
+ /* Default state, i.e. the one that supplies transitions for tokens not
+ explicitly listed for this one. */
+ int defstate;
+
+ /* Number of transitions that this state has different to those in the
+ default state. */
+ int best_diff;
+
+} DFANode;
+
+
+void yyerror(const char *s);
+extern int yylex(void);
+
+/* Constants for 'create' args */
+#define USE_OLD_MUST_EXIST 0
+#define CREATE_MUST_NOT_EXIST 1
+#define CREATE_OR_USE_OLD 2
+
+State *get_curstate(void);
+
+struct Abbrev;
+extern struct Abbrev * create_abbrev(char *name);
+extern void add_tok_to_abbrev(struct Abbrev *abbrev, char *tok);
+
+int lookup_token(char *name, int create);
+Block *lookup_block(char *name, int create);
+State *lookup_state(Block *in_block, char *name, int create);
+Stringlist * add_token(Stringlist *existing, char *token);
+void add_transitions(State *curstate, Stringlist *tokens, char *destination);
+State * add_transitions_to_internal(Block *curblock, State *addtostate, Stringlist *tokens);
+void add_exit_value(State *curstate, char *value);
+void set_state_attribute(State *curstate, char *name);
+InlineBlock *create_inline_block(char *type, char *in, char *out);
+InlineBlockList *add_inline_block(InlineBlockList *existing, InlineBlock *nib);
+State * add_inline_block_transitions(Block *curblock, State *addtostate, InlineBlockList *ibl);
+void instantiate_block(Block *curblock, char *block_name, char *instance_name);
+void fixup_state_refs(Block *b);
+
+void compress_nfa(Block *b);
+
+/* In expr.c */
+typedef struct Expr Expr;
+
+typedef struct evaluator Evaluator;
+extern Evaluator *exit_evaluator;
+extern Evaluator *attr_evaluator;
+
+Expr * new_wild_expr(void);
+Expr * new_not_expr(Expr *c);
+Expr * new_and_expr(Expr *c1, Expr *c2);
+Expr * new_or_expr(Expr *c1, Expr *c2);
+Expr * new_xor_expr(Expr *c1, Expr *c2);
+Expr * new_cond_expr(Expr *c1, Expr *c2, Expr *c3);
+Expr * new_sym_expr(char *sym_name);
+
+void define_symbol(Evaluator *x, char *name, Expr *e);
+void define_result(Evaluator *x, char *string, Expr *e, int early);
+void define_symresult(Evaluator *x, char *string, Expr *e, int early);
+void define_defresult(Evaluator *x, char *string);
+void clear_symbol_values(Evaluator *x);
+void set_symbol_value(Evaluator *x, char *sym_name);
+int evaluate_result(Evaluator *x, char **, int *);
+int evaluator_is_used(Evaluator *x);
+void define_defresult(Evaluator *x, char *text);
+void define_type(Evaluator *x, char *text);
+char* get_defresult(Evaluator *x);
+char* get_result_type(Evaluator *x);
+void eval_initialise(void);
+
+void compress_transition_table(DFANode **dfas, int ndfas, int ntokens);
+unsigned long increment(unsigned long x, int field);
+unsigned long count_bits_set(unsigned long x);
+
+/* Return new number of DFA states */
+int compress_dfa(DFANode **dfas, int ndfas, int ntokens);
+
+#endif /* N2D_H */
+
diff --git a/src/mairix/dfasyn/parse.y b/src/mairix/dfasyn/parse.y
@@ -0,0 +1,262 @@
+/**********************************************************************
+ Grammar definition for input files defining an NFA
+ *********************************************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2001-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+%{
+#include "dfasyn.h"
+
+static Block *curblock = NULL; /* Current block being built */
+static State *curstate = NULL; /* Current state being worked on */
+static State *addtostate = NULL; /* Current state (incl ext) to which transitions are added */
+static StimulusList *curtranslist = NULL; /* Final option set of stimuli prior to ARROW */
+static CharClass *curcharclass = NULL;
+static Evaluator *current_evaluator = NULL;
+
+State *get_curstate(void) { return curstate; }
+
+%}
+
+%union {
+ char c;
+ char *s;
+ int i;
+ Stringlist *sl;
+ Stimulus *st;
+ StimulusList *stl;
+ InlineBlock *ib;
+ CharClass *cc;
+ Expr *e;
+}
+
+%token STRING STATE TOKENS PREFIX ARROW BLOCK ENDBLOCK COLON EQUAL SEMICOLON COMMA
+%token ABBREV DEFINE
+%type<s> STRING
+%type<st> stimulus
+%type<sl> tag_seq
+%type<stl> stimulus_seq
+%type<stl> transition_seq
+%type<e> expr
+%type<ib> inline_block
+%type<c> CHAR
+%type<cc> char_class simple_char_class negated_char_class char_class_diff
+
+%token ATTR TAG
+%token DEFATTR
+%token EARLY
+%token TYPE
+%token ENTRY
+%token ENTRYSTRUCT
+%token GROUP
+%token LBRACE RBRACE
+
+%token LSQUARE RSQUARE
+%token LSQUARE_CARET
+%token CHAR HYPHEN
+
+%right QUERY COLON
+%left PIPE
+%left XOR
+%left AND
+%left NOT
+%left LPAREN RPAREN
+%left LANGLE RANGLE
+
+%%
+
+all : decl_seq ;
+
+decl_seq : /* empty */ | decl_seq decl ;
+
+decl : block_decl
+ | tokens_decl | abbrev_decl
+ | attr_decl | group_decl | tag_decl
+ | prefix_decl | entrystruct_decl ;
+
+/* Don't invalidate curstate at the end, this is the means of working out the
+ starting state of the NFA */
+block_decl : block1 block2 { fixup_state_refs(curblock); curblock = NULL; } ;
+
+block1 : BLOCK STRING LBRACE { curblock = lookup_block($2, CREATE_MUST_NOT_EXIST); addtostate = curstate = NULL; } ;
+
+block2 : instance_decl_seq state_decl_seq RBRACE ;
+
+prefix_decl : PREFIX STRING
+ { if (!prefix) {
+ prefix = $2;
+ } else {
+ fprintf(stderr, "\n\nWarning: prefix declaration ignored; already set on the command line\n");
+ }
+ };
+
+tokens_decl : TOKENS token_seq ;
+
+abbrev_decl : ABBREV STRING EQUAL stimulus_seq
+ { create_abbrev($2, $4); }
+ ;
+
+token_seq : token_seq token | token ;
+
+token : STRING { (void) lookup_token($1, CREATE_MUST_NOT_EXIST); } ;
+
+instance_decl_seq : /* empty */ | instance_decl_seq instance_decl ;
+
+state_decl_seq : /* empty */ | state_decl_seq state_decl ;
+
+state_decl : STATE STRING { addtostate = curstate = lookup_state(curblock, $2, CREATE_OR_USE_OLD); }
+ sdecl_seq
+ | STATE STRING ENTRY STRING { addtostate = curstate = lookup_state(curblock, $2, CREATE_OR_USE_OLD);
+ add_entry_to_state(curstate, $4); }
+ sdecl_seq
+ ;
+
+sdecl_seq : /* empty */ | sdecl_seq sdecl ;
+
+sdecl : transition_decl ;
+
+instance_decl : STRING COLON STRING { instantiate_block(curblock, $3 /* master_block_name */, $1 /* instance_name */ ); } ;
+
+transition_decl : transition_seq ARROW { curtranslist = $1; } destination_seq { addtostate = curstate; }
+ | transition_seq EQUAL tag_seq { addtostate = add_transitions_to_internal(curblock, addtostate, $1);
+ add_tags(addtostate, $3);
+ addtostate = curstate; }
+ ;
+
+destination_seq : STRING { add_transitions(curblock, addtostate, curtranslist, $1); }
+ | destination_seq COMMA STRING { add_transitions(curblock, addtostate, curtranslist, $3); }
+ ;
+
+transition_seq : stimulus_seq { $$ = $1; }
+ | transition_seq SEMICOLON stimulus_seq
+ {
+ addtostate = add_transitions_to_internal(curblock, addtostate, $1);
+ $$ = $3;
+ }
+ ;
+
+tag_seq : STRING { $$ = add_string_to_list(NULL, $1); }
+ | tag_seq COMMA STRING { $$ = add_string_to_list($1, $3); }
+ ;
+
+stimulus_seq : stimulus
+ { $$ = append_stimulus_to_list(NULL, $1); }
+ | stimulus_seq PIPE stimulus
+ { $$ = append_stimulus_to_list($1, $3); }
+ ;
+
+/* A 'thing' that will make the DFA move from one state to another */
+stimulus : STRING
+ { $$ = stimulus_from_string($1); }
+ | inline_block
+ { $$ = stimulus_from_inline_block($1); }
+ | char_class
+ { add_charclass_to_list($1); /* freeze it into the list. */
+ $$ = stimulus_from_char_class($1); }
+ | /* empty */
+ { $$ = stimulus_from_epsilon(); }
+ ;
+
+inline_block : LANGLE STRING COLON STRING ARROW STRING RANGLE
+ { $$ = create_inline_block($2, $4, $6); }
+ ;
+
+char_class : simple_char_class
+ | negated_char_class
+ | char_class_diff
+ ;
+
+negated_char_class : NOT simple_char_class
+ { invert_charclass($2); $$ = $2; }
+ ;
+
+char_class_diff : simple_char_class NOT simple_char_class
+ { diff_charclasses($1, $3);
+ free_charclass($3);
+ $$ = $1;
+ }
+ ;
+
+simple_char_class : LSQUARE { curcharclass = new_charclass(); }
+ cc_body
+ RSQUARE { $$ = curcharclass;
+ curcharclass = NULL; }
+ | LSQUARE_CARET { curcharclass = new_charclass(); }
+ cc_body
+ RSQUARE { $$ = curcharclass;
+ invert_charclass($$);
+ curcharclass = NULL; }
+ ;
+
+cc_body : CHAR { add_singleton_to_charclass(curcharclass, $1); }
+ | CHAR HYPHEN CHAR { add_range_to_charclass(curcharclass, $1, $3); }
+ | cc_body CHAR { add_singleton_to_charclass(curcharclass, $2); }
+ | cc_body CHAR HYPHEN CHAR { add_range_to_charclass(curcharclass, $2, $4); }
+ ;
+
+attr_decl : ATTR simple_attr_seq
+ | ATTR STRING COLON expr { define_attr(current_evaluator, $2, $4, 0); }
+ | EARLY ATTR early_attr_seq
+ | EARLY ATTR STRING COLON expr { define_attr(current_evaluator, $3, $5, 1); }
+ | DEFATTR STRING { define_defattr(current_evaluator, $2); }
+ | TYPE STRING { define_type(current_evaluator, $2); }
+ ;
+
+simple_attr_seq : STRING
+ { define_attr(current_evaluator, $1, NULL, 0); }
+ | simple_attr_seq COMMA STRING
+ { define_attr(current_evaluator, $3, NULL, 0); }
+ ;
+
+early_attr_seq : STRING
+ { define_attr(current_evaluator, $1, NULL, 1); }
+ | early_attr_seq COMMA STRING
+ { define_attr(current_evaluator, $3, NULL, 1); }
+ ;
+
+group_decl : GROUP STRING LBRACE { current_evaluator = start_evaluator($2); }
+ attr_decl_seq
+ RBRACE { current_evaluator = NULL; }
+ ;
+
+attr_decl_seq : /* empty */
+ | attr_decl_seq attr_decl
+ ;
+
+tag_decl : TAG STRING EQUAL expr { define_tag($2, $4); }
+ ;
+
+entrystruct_decl :
+ ENTRYSTRUCT STRING STRING { define_entrystruct($2, $3); }
+ ;
+
+expr : NOT expr { $$ = new_not_expr($2); }
+ | expr AND expr { $$ = new_and_expr($1, $3); }
+ | expr PIPE /* OR */ expr { $$ = new_or_expr($1, $3); }
+ | expr XOR expr { $$ = new_xor_expr($1, $3); }
+ | expr QUERY expr COLON expr { $$ = new_cond_expr($1, $3, $5); }
+ | LPAREN expr RPAREN { $$ = $2; }
+ | STRING { $$ = new_tag_expr($1); }
+ ;
+
+/* vim:et
+*/
+
diff --git a/src/mairix/dfasyn/scan.l b/src/mairix/dfasyn/scan.l
@@ -0,0 +1,111 @@
+/**********************************************************************
+ Lexical analyser definition for input files defining an NFA
+ *********************************************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2001-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+%{
+#include "dfasyn.h"
+#include "parse.h"
+
+/* yyunput() not used - define this to avoid compiler warnings */
+#define YY_NO_UNPUT
+
+int lineno = 1;
+%}
+
+%x PASSTHRU
+%x STR
+%x CHARCLASS
+
+%%
+
+STATE|State|state { return STATE; }
+ABBREV|Abbrev|abbrev { return ABBREV; }
+DEFINE|Define|define { return DEFINE; }
+TOKENS|Tokens|tokens { return TOKENS; }
+PREFIX|Prefix|prefix { return PREFIX; }
+BLOCK|Block|block { return BLOCK; }
+TYPE|Type|type { return TYPE; }
+ENTRY|Entry|entry { return ENTRY; }
+ENTRYSTRUCT { return ENTRYSTRUCT; }
+EntryStruct { return ENTRYSTRUCT; }
+Entrystruct { return ENTRYSTRUCT; }
+entrystruct { return ENTRYSTRUCT; }
+ATTR|Attr|attr { return ATTR; }
+EARLY|Early|early { return EARLY; }
+DEFATTR|DefAttr { return DEFATTR; }
+Defattr|defattr { return DEFATTR; }
+TAG|Tag|tag { return TAG; }
+GROUP|Group|group { return GROUP; }
+[A-Za-z0-9_.]+ { yylval.s = new_string(yytext); return STRING; }
+\#.*$ { /* strip comments */ }
+\-\> { return ARROW; }
+= { return EQUAL; }
+\| { return PIPE; /* OR */ }
+\& { return AND; }
+\~ { return NOT; }
+\! { return NOT; }
+\^ { return XOR; }
+\? { return QUERY; }
+\: { return COLON; }
+\; { return SEMICOLON; }
+\( { return LPAREN; }
+\) { return RPAREN; }
+\{ { return LBRACE; }
+\} { return RBRACE; }
+\< { return LANGLE; }
+\> { return RANGLE; }
+\[ { BEGIN CHARCLASS; return LSQUARE; }
+\[\^ { BEGIN CHARCLASS; return LSQUARE_CARET; }
+\, { return COMMA; }
+\n { lineno++; }
+[ \t]+ { /* ignore */ }
+^\%\{[ \t]*\n { BEGIN PASSTHRU; }
+\" { BEGIN STR; }
+. { printf("Unmatched input <%s> at line %d\n", yytext, lineno); exit (1); }
+
+<PASSTHRU>^\%\}[ \t]*\n { BEGIN INITIAL; }
+<PASSTHRU>\n { fputs(yytext, yyout); lineno++; }
+<PASSTHRU>.+ { fputs(yytext, yyout); }
+
+<STR>\" { BEGIN INITIAL; }
+<STR>[^"]* { yylval.s = new_string(yytext); return STRING; }
+
+<CHARCLASS>\] { BEGIN INITIAL; return RSQUARE; }
+<CHARCLASS>\- { return HYPHEN; }
+<CHARCLASS>\\- { yylval.c = '-'; return CHAR; }
+<CHARCLASS>\\] { yylval.c = ']'; return CHAR; }
+<CHARCLASS>\\^ { yylval.c = '^'; return CHAR; }
+<CHARCLASS>\\n { yylval.c = '\n'; return CHAR; }
+<CHARCLASS>\\r { yylval.c = '\r'; return CHAR; }
+<CHARCLASS>\\f { yylval.c = '\f'; return CHAR; }
+<CHARCLASS>\\t { yylval.c = '\t'; return CHAR; }
+<CHARCLASS>\\\\ { yylval.c = '\\'; return CHAR; }
+<CHARCLASS>\^[@A-Z] { yylval.c = yytext[1] - '@'; return CHAR; }
+<CHARCLASS>\\x[0-9a-fA-F][0-9a-fA-F] { unsigned int foo; sscanf(yytext+2,"%x",&foo); yylval.c = (char) foo; return CHAR; }
+<CHARCLASS>\\[0-7][0-7][0-7] { unsigned int foo; sscanf(yytext+1,"%o",&foo); yylval.c = (char) foo; return CHAR; }
+<CHARCLASS>. { yylval.c = yytext[0]; return CHAR; }
+
+%{
+/* vim:et
+*/
+%}
diff --git a/src/mairix/dfasyn/states.c b/src/mairix/dfasyn/states.c
@@ -0,0 +1,303 @@
+/***************************************
+ Handle state-related stuff
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2000-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "dfasyn.h"
+
+static void maybe_grow_states(Block *b, int hash)/*{{{*/
+{
+ Stateset *ss = b->state_hash + hash;
+ if (ss->nstates == ss->maxstates) {
+ ss->maxstates += 8;
+ ss->states = resize_array(State*, ss->states, ss->maxstates);
+ }
+ if (b->nstates == b->maxstates) {
+ b->maxstates += 32;
+ b->states = resize_array(State*, b->states, b->maxstates);
+ }
+
+}
+/*}}}*/
+static unsigned long hashfn(const char *s)/*{{{*/
+{
+ unsigned long y = 0UL, v, w, x, k;
+ const char *t = s;
+ while (1) {
+ k = (unsigned long) *(unsigned char *)(t++);
+ if (!k) break;
+ v = ~y;
+ w = y<<13;
+ x = v>>6;
+ y = w ^ x;
+ y += k;
+ }
+ y ^= (y>>13);
+ y &= HASH_MASK;
+ return y;
+}
+/*}}}*/
+static State * create_state(Block *b, char *name)/*{{{*/
+{
+ State *result;
+ int hash;
+ Stateset *ss;
+ hash = hashfn(name);
+ maybe_grow_states(b, hash);
+ ss = b->state_hash + hash;
+ result = b->states[b->nstates++] = ss->states[ss->nstates++] = new(State);
+ result->name = new_string(name);
+ result->parent = b;
+ result->index = b->nstates - 1;
+ result->transitions = NULL;
+ result->tags = NULL;
+ result->entries = NULL;
+ result->ordered_trans = NULL;
+ result->n_transitions = 0;
+ result->removed = 0;
+ return result;
+}
+/*}}}*/
+State * lookup_state(Block *b, char *name, int create)/*{{{*/
+{
+ State *found = NULL;
+ int i;
+ int hash;
+ Stateset *ss;
+
+ hash = hashfn(name);
+ ss = b->state_hash + hash;
+
+ for (i=0; i<ss->nstates; i++) {
+ if (!strcmp(ss->states[i]->name, name)) {
+ found = ss->states[i];
+ break;
+ }
+ }
+
+ switch (create) {
+ case USE_OLD_MUST_EXIST:
+ if (!found) {
+ fprintf(stderr, "Could not find a state '%s' in block '%s' to transition to\n", name, b->name);
+ exit(1);
+ }
+ break;
+ case CREATE_MUST_NOT_EXIST:
+ if (found) {
+ fprintf(stderr, "Warning : already have a state '%s' in block '%s'\n", name, b->name);
+ } else {
+ found = create_state(b, name);
+ }
+ break;
+ case CREATE_OR_USE_OLD:
+ if (!found) {
+ found = create_state(b, name);
+ }
+ break;
+ }
+
+ return found;
+}
+/*}}}*/
+void add_entry_to_state(State *curstate, const char *entry_tag)/*{{{*/
+{
+ struct Entrylist *new_entries = new(struct Entrylist);
+ new_entries->entry_name = new_string(entry_tag);
+ new_entries->state = curstate;
+ new_entries->next = entries;
+ entries = new_entries;
+ curstate->entries = add_string_to_list(curstate->entries, entry_tag);
+}
+/*}}}*/
+/* ================================================================= */
+static void add_transition(Block *curblock, State *curstate, Stimulus *stimulus, char *destination);
+/* ================================================================= */
+Stringlist * add_string_to_list(Stringlist *existing, const char *token)/*{{{*/
+{
+ Stringlist *result = new(Stringlist);
+ if (token) {
+ result->string = new_string(token);
+ } else {
+ result->string = NULL;
+ }
+ result->next = existing;
+ return result;
+}
+/*}}}*/
+static TransList *new_translist(struct TransList *existing, char *destination)/*{{{*/
+{
+ TransList *result;
+ result = new(TransList);
+ result->next = existing;
+ result->ds_name = new_string(destination);
+ return result;
+}
+/*}}}*/
+static void add_epsilon_transition(State *curstate, char *destination)/*{{{*/
+{
+ TransList *tl = new_translist(curstate->transitions, destination);
+ tl->type = TT_EPSILON;
+ curstate->transitions = tl;
+}
+/*}}}*/
+static void add_token_transition(State *curstate, int token, char *destination)/*{{{*/
+{
+ TransList *tl = new_translist(curstate->transitions, destination);
+ tl->type = TT_TOKEN;
+ tl->x.token = token;
+ curstate->transitions = tl;
+}
+/*}}}*/
+static void add_abbrev_transition(Block *curblock, State *curstate, struct Abbrev *abbrev, char *destination)/*{{{*/
+{
+ StimulusList *stimuli;
+ for (stimuli = abbrev->stimuli; stimuli; stimuli = stimuli->next) {
+ add_transition(curblock, curstate, stimuli->stimulus, destination);
+ }
+}
+/*}}}*/
+static void add_inline_block_transition(Block *curblock, State *curstate, InlineBlock *ib, char *destination)/*{{{*/
+{
+ char block_name[1024];
+ char input_name[1024];
+ char output_name[1024];
+ State *output_state;
+
+ sprintf(block_name, "%s#%d", ib->type, curblock->subblockcount++);
+ instantiate_block(curblock, ib->type, block_name);
+ sprintf(input_name, "%s.%s", block_name, ib->in);
+ sprintf(output_name, "%s.%s", block_name, ib->out);
+ output_state = lookup_state(curblock, output_name, CREATE_OR_USE_OLD);
+ add_epsilon_transition(curstate, input_name);
+ add_epsilon_transition(output_state, destination);
+}
+/*}}}*/
+static void add_char_class_transition(State *curstate, CharClass *cc, char *destination)/*{{{*/
+{
+ TransList *tl = new_translist(curstate->transitions, destination);
+ tl->type = TT_CHARCLASS;
+ tl->x.char_class = cc;
+ curstate->transitions = tl;
+}
+/*}}}*/
+static void add_transition(Block *curblock, State *curstate, Stimulus *stimulus, char *destination)/*{{{*/
+/* Add a single transition to the state. Allow definitions to be
+ recursive */
+{
+ switch (stimulus->type) {
+ case T_EPSILON:
+ add_epsilon_transition(curstate, destination);
+ break;
+ case T_TOKEN:
+ add_token_transition(curstate, stimulus->x.token, destination);
+ break;
+ case T_ABBREV:
+ add_abbrev_transition(curblock, curstate, stimulus->x.abbrev, destination);
+ break;
+ case T_INLINEBLOCK:
+ add_inline_block_transition(curblock, curstate, stimulus->x.inline_block, destination);
+ break;
+ case T_CHARCLASS:
+ add_char_class_transition(curstate, stimulus->x.char_class, destination);
+ break;
+ }
+
+}
+/*}}}*/
+void add_transitions(Block *curblock, State *curstate, StimulusList *stimuli, char *destination)/*{{{*/
+{
+ StimulusList *sl;
+ for (sl=stimuli; sl; sl=sl->next) {
+ add_transition(curblock, curstate, sl->stimulus, destination);
+ }
+}
+/*}}}*/
+State * add_transitions_to_internal(Block *curblock, State *addtostate, StimulusList *stimuli)/*{{{*/
+{
+ char buffer[1024];
+ State *result;
+ sprintf(buffer, "#%d", curblock->subcount++);
+ result = lookup_state(curblock, buffer, CREATE_MUST_NOT_EXIST);
+ add_transitions(curblock, addtostate, stimuli, result->name);
+ return result;
+}
+/*}}}*/
+void add_tags(State *curstate, Stringlist *sl)/*{{{*/
+{
+ if (curstate->tags) {
+ /* If we already have some, stick them on the end of the new list */
+ Stringlist *xsl = sl;
+ while (xsl->next) xsl = xsl->next;
+ xsl->next = curstate->tags;
+ }
+ curstate->tags = sl;
+}
+/*}}}*/
+/* ================================================================= */
+void fixup_state_refs(Block *b)/*{{{*/
+{
+ int i;
+ for (i=0; i<b->nstates; i++) {
+ State *s = b->states[i];
+ TransList *tl;
+ for (tl=s->transitions; tl; tl=tl->next) {
+ tl->ds_ref = lookup_state(b, tl->ds_name, CREATE_OR_USE_OLD);
+ }
+ }
+}
+/*}}}*/
+/* ================================================================= */
+void expand_charclass_transitions(Block *b)/*{{{*/
+{
+ int i;
+ for (i=0; i<b->nstates; i++) {
+ State *s = b->states[i];
+ TransList *tl;
+ for (tl=s->transitions; tl; tl=tl->next) {
+ if (tl->type == TT_CHARCLASS) {
+ int i, first;
+ CharClass *cc = tl->x.char_class;
+ first = 1;
+ for (i=0; i<256; i++) {
+ /* Insert separate transitions for each subclass of the charclass */
+ if (cc_test_bit(cc->group_bitmap, i)) {
+ if (first) {
+ tl->type = TT_TOKEN;
+ tl->x.token = ntokens + i;
+ } else {
+ TransList *ntl = new(TransList);
+ ntl->next = tl->next;
+ ntl->ds_name = new_string(tl->ds_name);
+ ntl->ds_ref = tl->ds_ref;
+ ntl->type = TT_TOKEN;
+ ntl->x.token = ntokens + i;
+ tl->next = ntl;
+ }
+ first = 0;
+ }
+ }
+ }
+ }
+ }
+}
+/*}}}*/
+/* ================================================================= */
diff --git a/src/mairix/dfasyn/stimulus.c b/src/mairix/dfasyn/stimulus.c
@@ -0,0 +1,87 @@
+/***************************************
+ Handle stimulus-related stuff
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "dfasyn.h"
+
+Stimulus *stimulus_from_epsilon(void)/*{{{*/
+{
+ Stimulus *result;
+ result = new(Stimulus);
+ result->type = T_EPSILON;
+ return result;
+}
+/*}}}*/
+Stimulus *stimulus_from_string(char *str)/*{{{*/
+{
+ struct Abbrev *abbrev;
+ Stimulus *result;
+
+ result = new(Stimulus);
+
+ /* See if an abbrev exists with the name */
+ abbrev = lookup_abbrev(str);
+
+ if (abbrev) {
+ result->type = T_ABBREV;
+ result->x.abbrev = abbrev;
+ } else {
+ /* Token */
+ int token;
+ token = lookup_token(str, USE_OLD_MUST_EXIST);
+ /* lookup_token will have bombed if it wasn't found. */
+ result->type = T_TOKEN;
+ result->x.token = token;
+ }
+
+ return result;
+
+}
+/*}}}*/
+Stimulus *stimulus_from_inline_block(InlineBlock *block)/*{{{*/
+{
+ Stimulus *result;
+ result = new(Stimulus);
+ result->type = T_INLINEBLOCK;
+ result->x.inline_block = block;
+ return result;
+}
+/*}}}*/
+Stimulus *stimulus_from_char_class(CharClass *char_class)/*{{{*/
+{
+ Stimulus *result;
+ result = new(Stimulus);
+ result->type = T_CHARCLASS;
+ result->x.char_class = char_class;
+ return result;
+}
+/*}}}*/
+StimulusList *append_stimulus_to_list(StimulusList *existing, Stimulus *stim)/*{{{*/
+{
+ StimulusList *result;
+ result = new(StimulusList);
+ result->next = existing;
+ result->stimulus = stim;
+ return result;
+}
+/*}}}*/
diff --git a/src/mairix/dfasyn/tabcompr.c b/src/mairix/dfasyn/tabcompr.c
@@ -0,0 +1,181 @@
+/***************************************
+ Routines to compress the DFA transition tables, by identifying where two DFA
+ states have a lot of transitions the same.
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2001-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "dfasyn.h"
+
+/* ================================================================= */
+/* Treat 'x' as a set of 16 bit pairs, with field (0..15) specifying
+ which. Increment the field'th bit pair as a gray code, in the
+ pattern 00->01->11->10->00 */
+
+unsigned long increment(unsigned long x, int field)
+{
+ int f2 = field + field;
+ static unsigned char transxor[4] = {1, 2, 2, 1};
+ unsigned long g = x >> f2;
+ unsigned long h = transxor[g&3];
+ return x ^ (h<<f2);
+}
+
+/* ================================================================= */
+/* Calculate the number of bits set in an unsigned long. */
+
+unsigned long count_bits_set(unsigned long x)
+{
+ unsigned long y = x;
+ unsigned long c;
+ c = 0x55555555UL;
+ y = ((y>>1) & c) + (y & c);
+ c = 0x33333333UL;
+ y = ((y>>2) & c) + (y & c);
+ y = (y>>4) + y;
+ c = 0x0f0f0f0fUL;
+ y &= c;
+ y = (y>>8) + y;
+ y = (y>>16) + y;
+ return y & 0x1f;
+}
+
+/* ================================================================= */
+/* Compute 'signatures' of the transitions out of a particular state.
+ The signature is given by considering the destination state numbers mod 16,
+ and counting how many transitions there are in each resulting equivalence
+ class. The number is encoded using the gray code implied by the increment
+ fn. */
+
+static void
+compute_transition_sigs(struct DFA *dfa, int ntokens)
+{
+ int i, j;
+ for (i=0; i<dfa->n; i++) {
+ unsigned long ts = 0UL; /* transition signature */
+ for (j=0; j<ntokens; j++) {
+ unsigned long dest = dfa->s[i]->map[j];
+ dest &= 0xf; /* 16 bit pairs in 'ts' */
+ ts = increment(ts, dest);
+ }
+ dfa->s[i]->transition_sig = ts;
+ }
+}
+
+
+/* ================================================================= */
+
+#define REQUIRED_BENEFIT 2
+
+static void
+find_default_states(struct DFA *dfa, int ntokens)
+{
+ int i, j, t;
+ int best_index;
+ int best_diff;
+ int trans_count; /* Number of transitions in working state */
+ unsigned long tsi;
+
+ for (i=0; i<dfa->n; i++) {
+ trans_count = 0;
+ for (t=0; t<ntokens; t++) {
+ if (dfa->s[i]->map[t] >= 0) trans_count++;
+ }
+
+ dfa->s[i]->defstate = -1; /* not defaulted */
+ best_index = -1;
+ best_diff = ntokens + 1; /* Worse than any computed value */
+ tsi = dfa->s[i]->transition_sig;
+ for (j=0; j<i; j++) {
+ unsigned long tsj;
+ unsigned long sigdiff;
+ int diffsize;
+
+ if (dfa->s[j]->defstate >= 0) continue; /* Avoid chains of defstates */
+ tsj = dfa->s[j]->transition_sig;
+
+ /* This is the heart of the technique : if we xor two vectors of bit
+ pairs encoded with the gray code above, and count the number of bits
+ set in the result, we get the sum of absolute differences of the bit
+ pairs. The number of outgoing transitions that differ between the
+ states must be _at_least_ this value. It may in fact be much greater
+ (i.e. we may get 'false matches'). However, this algorithm is a quick
+ way of filtering most of the useless potential default states out. */
+
+ sigdiff = tsi ^ tsj;
+ diffsize = count_bits_set(sigdiff);
+ if (diffsize >= best_diff) continue;
+ if (diffsize >= trans_count) continue; /* Else pointless! */
+
+ /* Otherwise, do an exact check (i.e. see how much false matching we
+ suffered). */
+ diffsize = 0;
+ for (t=0; t<ntokens; t++) {
+ if (dfa->s[i]->map[t] != dfa->s[j]->map[t]) {
+ diffsize++;
+ }
+ }
+
+ if (((best_index < 0) || (diffsize < best_diff))
+ &&
+ (diffsize < (trans_count - REQUIRED_BENEFIT))) {
+ best_index = j;
+ best_diff = diffsize;
+ }
+ }
+
+ dfa->s[i]->defstate = best_index;
+ dfa->s[i]->best_diff = best_diff;
+ }
+}
+
+/* ================================================================= */
+
+void
+compress_transition_table(struct DFA *dfa, int ntokens)
+{
+ compute_transition_sigs(dfa, ntokens);
+ find_default_states(dfa, ntokens);
+}
+
+/* ================================================================= */
+
+#ifdef TEST
+int main () {
+ unsigned long x = 0;
+ unsigned long x1, x2, x3, x4;
+ x1 = increment(x, 2);
+ x2 = increment(x1, 2);
+ x3 = increment(x2, 2);
+ x4 = increment(x3, 2);
+ printf("%d %d %d %d %d\n", x, x1, x2, x3, x4);
+
+ printf("1=%d\n", count_bits_set(0x00000001));
+ printf("2=%d\n", count_bits_set(0x00000003));
+ printf("3=%d\n", count_bits_set(0x00000007));
+ printf("4=%d\n", count_bits_set(0x0000000f));
+ printf("4=%d\n", count_bits_set(0xf0000000));
+
+ return 0;
+}
+#endif
+
+
diff --git a/src/mairix/dfasyn/tokens.c b/src/mairix/dfasyn/tokens.c
@@ -0,0 +1,85 @@
+/***************************************
+ Handle token-related stuff
+ ***************************************/
+
+/*
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2000-2003,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "dfasyn.h"
+
+char **toktable=NULL;
+int ntokens = 0;
+static int maxtokens = 0;
+/* ================================================================= */
+static void grow_tokens(void)/*{{{*/
+{
+ maxtokens += 32;
+ toktable = resize_array(char *, toktable, maxtokens);
+}
+/*}}}*/
+static int create_token(char *name)/*{{{*/
+{
+ int result;
+ if (ntokens == maxtokens) {
+ grow_tokens();
+ }
+ result = ntokens++;
+ toktable[result] = new_string(name);
+ return result;
+}
+/*}}}*/
+int lookup_token(char *name, int create)/*{{{*/
+{
+ int found = -1;
+ int i;
+ for (i=0; i<ntokens; i++) {
+ if (!strcmp(toktable[i], name)) {
+ found = i;
+ break;
+ }
+ }
+
+ switch (create) {
+ case USE_OLD_MUST_EXIST:
+ if (found < 0) {
+ fprintf(stderr, "Token '%s' was never declared\n", name);
+ exit(1);
+ }
+ break;
+ case CREATE_MUST_NOT_EXIST:
+ if (found >= 0) {
+ fprintf(stderr, "Token '%s' already declared\n", name);
+ exit(1);
+ } else {
+ found = create_token(name);
+ }
+ break;
+ case CREATE_OR_USE_OLD:
+ if (found < 0) {
+ found = create_token(name);
+ }
+ break;
+ }
+
+ return found;
+}
+/*}}}*/
+
+
diff --git a/src/mairix/dirscan.c b/src/mairix/dirscan.c
@@ -0,0 +1,420 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006,2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+/* Traverse a directory tree and find maildirs, then list files in them. */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <assert.h>
+#include "mairix.h"
+
+struct msgpath_array *new_msgpath_array(void)/*{{{*/
+{
+ struct msgpath_array *result;
+ result = new(struct msgpath_array);
+ result->paths = NULL;
+ result->type = NULL;
+ result->n = 0;
+ result->max = 0;
+ return result;
+}
+/*}}}*/
+void free_msgpath_array(struct msgpath_array *x)/*{{{*/
+{
+ int i;
+ if (x->paths) {
+ for (i=0; i<x->n; i++) {
+ switch (x->type[i]) {
+ case MTY_FILE:
+ free(x->paths[i].src.mpf.path);
+ break;
+ case MTY_MBOX:
+ break;
+ case MTY_DEAD:
+ break;
+ }
+ }
+ free(x->type);
+ free(x->paths);
+ }
+ free(x);
+}
+/*}}}*/
+static void add_file_to_list(char *x, struct msgpath_array *arr) {/*{{{*/
+ char *y = new_string(x);
+ if (arr->n == arr->max) {
+ arr->max += 1024;
+ arr->paths = grow_array(struct msgpath, arr->max, arr->paths);
+ arr->type = grow_array(enum message_type, arr->max, arr->type);
+ }
+ arr->type[arr->n] = MTY_FILE;
+ arr->paths[arr->n].src.mpf.path = y;
+ ++arr->n;
+ return;
+}
+/*}}}*/
+static void get_maildir_message_paths(char *folder, struct msgpath_array *arr)/*{{{*/
+{
+ char *subdir, *fname;
+ int i;
+ static char *subdirs[] = {"new", "cur"};
+ DIR *d;
+ struct dirent *de;
+ int folder_len = strlen(folder);
+
+ /* FIXME : just store mdir-rooted paths in array and have common prefix elsewhere. */
+
+ subdir = new_array(char, folder_len + 6);
+ fname = new_array(char, folder_len + 8 + NAME_MAX);
+ for (i=0; i<2; i++) {
+ strcpy(subdir, folder);
+ strcat(subdir, "/");
+ strcat(subdir, subdirs[i]);
+ d = opendir(subdir);
+ if (d) {
+ while ((de = readdir(d))) {
+ /* TODO : Perhaps we ought to do some validation on the path here?
+ i.e. check that the filename looks valid for a maildir message. */
+ if (!strcmp(de->d_name, ".") ||
+ !strcmp(de->d_name, "..")) {
+ continue;
+ }
+ strcpy(fname, subdir);
+ strcat(fname, "/");
+ strcat(fname, de->d_name);
+ add_file_to_list(fname, arr);
+ }
+ closedir(d);
+ }
+ }
+ free(subdir);
+ free(fname);
+ return;
+}
+/*}}}*/
+int valid_mh_filename_p(const char *x)/*{{{*/
+{
+ const char *p;
+
+ if (!*x) return 0; /* Must not be empty */
+ p = x;
+ while (*p) {
+ if (!isdigit(*p)) {
+ /* Handle MH folders generated by Evolution, which have '.' on the ends
+ * of the numerical filenames for the messages. */
+ if ((p[0] == '.') && (p[1] == 0)) return 1;
+ else return 0;
+ }
+ p++;
+ }
+ return 1;
+}
+/*}}}*/
+static void get_mh_message_paths(char *folder, struct msgpath_array *arr)/*{{{*/
+{
+ char *fname;
+ DIR *d;
+ struct dirent *de;
+ int folder_len = strlen(folder);
+
+ fname = new_array(char, folder_len + 8 + NAME_MAX);
+ d = opendir(folder);
+ if (d) {
+ while ((de = readdir(d))) {
+ if (!strcmp(de->d_name, ".") ||
+ !strcmp(de->d_name, "..")) {
+ continue;
+ }
+ strcpy(fname, folder);
+ strcat(fname, "/");
+ strcat(fname, de->d_name);
+ if (valid_mh_filename_p(de->d_name)) {
+ add_file_to_list(fname, arr);
+ }
+ }
+ closedir(d);
+ }
+ free(fname);
+ return;
+}
+/*}}}*/
+static int child_stat(const char *base, const char *child, struct stat *sb)/*{{{*/
+{
+ int result = 0;
+ char *scratch;
+ int len;
+
+ len = strlen(base) + strlen(child) + 2;
+ scratch = new_array(char, len);
+
+ strcpy(scratch, base);
+ strcat(scratch, "/");
+ strcat(scratch, child);
+
+ result = stat(scratch, sb);
+ free(scratch);
+ return result;
+}
+/*}}}*/
+static int has_child_file(const char *base, const char *child)/*{{{*/
+{
+ int result = 0;
+ int status;
+ struct stat sb;
+
+ status = child_stat(base, child, &sb);
+ if ((status >= 0) && S_ISREG(sb.st_mode)) {
+ result = 1;
+ }
+
+ return result;
+}
+/*}}}*/
+static int has_child_dir(const char *base, const char *child)/*{{{*/
+{
+ int result = 0;
+ int status;
+ struct stat sb;
+
+ status = child_stat(base, child, &sb);
+ if ((status >= 0) && S_ISDIR(sb.st_mode)) {
+ result = 1;
+ }
+
+ return result;
+}
+/*}}}*/
+static enum traverse_check scrutinize_maildir_entry(int parent_is_maildir, const char *de_name)/*{{{*/
+{
+ if (parent_is_maildir) {
+ /* Process any subdirectory that's not part of this maildir itself. */
+ if (!strcmp(de_name, "new") ||
+ !strcmp(de_name, "cur") ||
+ !strcmp(de_name, "tmp")) {
+ return TRAV_IGNORE;
+ } else {
+ return TRAV_PROCESS;
+ }
+ } else {
+ return TRAV_PROCESS;
+ }
+}
+/*}}}*/
+static int filter_is_maildir(const char *path, const struct stat *sb)/*{{{*/
+{
+ if (S_ISDIR(sb->st_mode)) {
+ if (has_child_dir(path, "new") &&
+ has_child_dir(path, "tmp") &&
+ has_child_dir(path, "cur")) {
+ return 1;
+ }
+ }
+ return 0;
+}
+/*}}}*/
+struct traverse_methods maildir_traverse_methods = {/*{{{*/
+ .filter = filter_is_maildir,
+ .scrutinize = scrutinize_maildir_entry
+};
+/*}}}*/
+static enum traverse_check scrutinize_mh_entry(int parent_is_mh, const char *de_name)/*{{{*/
+{
+ /* Have to allow sub-folders within a folder until we think of a better
+ * solution. */
+ if (valid_mh_filename_p(de_name)) {
+ return TRAV_IGNORE;
+ } else {
+ return TRAV_PROCESS;
+ }
+}
+/*}}}*/
+static int filter_is_mh(const char *path, const struct stat *sb)/*{{{*/
+{
+ int result = 0;
+ if (S_ISDIR(sb->st_mode)) {
+ /* TODO : find a way of making this more scalable? e.g. if a folder of a
+ * particular subtype is found once, try that subtype first later, since
+ * the user presumably uses a consistent MH-subtype (i.e. a single MUA). */
+ if (has_child_file(path, ".xmhcache") ||
+ has_child_file(path, ".mh_sequences") ||
+ /* Sylpheed */
+ has_child_file(path, ".sylpheed_cache") ||
+ has_child_file(path, ".sylpheed_mark") ||
+ /* claws-mail */
+ has_child_file(path, ".claws_cache") ||
+ has_child_file(path, ".claws_mark") ||
+ /* NNML (Gnus) */
+ has_child_file(path, ".marks") ||
+ has_child_file(path, ".overview") ||
+ /* Evolution */
+ has_child_file(path, "cmeta") ||
+ has_child_file(path, "summary") ||
+ /* Mew */
+ has_child_file(path, ".mew-summary") ||
+ /* ezmlm/archive */
+ has_child_file(path, "index")
+ ) {
+ result = 1;
+ }
+ }
+ return result;
+}
+/*}}}*/
+struct traverse_methods mh_traverse_methods = {/*{{{*/
+ .filter = filter_is_mh,
+ .scrutinize = scrutinize_mh_entry
+};
+/*}}}*/
+#if 0
+static void scan_directory(char *folder_base, char *this_folder, enum folder_type ft, struct msgpath_array *arr)/*{{{*/
+{
+ DIR *d;
+ struct dirent *de;
+ struct stat sb;
+ char *fname, *sname;
+ char *name;
+ int folder_base_len = strlen(folder_base);
+ int this_folder_len = strlen(this_folder);
+
+ name = new_array(char, folder_base_len + this_folder_len + 2);
+ strcpy(name, folder_base);
+ strcat(name, "/");
+ strcat(name, this_folder);
+
+ switch (ft) {
+ case FT_MAILDIR:
+ if (looks_like_maildir(folder_base, this_folder)) {
+ get_maildir_message_paths(folder_base, this_folder, arr);
+ }
+ break;
+ case FT_MH:
+ get_mh_message_paths(folder_base, this_folder, arr);
+ break;
+ default:
+ break;
+ }
+
+ fname = new_array(char, strlen(name) + 2 + NAME_MAX);
+ sname = new_array(char, this_folder_len + 2 + NAME_MAX);
+
+ d = opendir(name);
+ if (d) {
+ while ((de = readdir(d))) {
+ if (!strcmp(de->d_name, ".") ||
+ !strcmp(de->d_name, "..")) {
+ continue;
+ }
+
+ strcpy(fname, name);
+ strcat(fname, "/");
+ strcat(fname, de->d_name);
+
+ strcpy(sname, this_folder);
+ strcat(sname, "/");
+ strcat(sname, de->d_name);
+
+ if (stat(fname, &sb) >= 0) {
+ if (S_ISDIR(sb.st_mode)) {
+ scan_directory(folder_base, sname, ft, arr);
+ }
+ }
+ }
+ closedir(d);
+ }
+
+ free(fname);
+ free(sname);
+ free(name);
+ return;
+}
+/*}}}*/
+#endif
+static int message_compare(const void *a, const void *b)/*{{{*/
+{
+ /* FIXME : Is this a sensible way to do this with mbox messages in the picture? */
+ struct msgpath *aa = (struct msgpath *) a;
+ struct msgpath *bb = (struct msgpath *) b;
+ /* This should only get called on 'file' type messages - TBC! */
+ return strcmp(aa->src.mpf.path, bb->src.mpf.path);
+}
+/*}}}*/
+static void sort_message_list(struct msgpath_array *arr)/*{{{*/
+{
+ qsort(arr->paths, arr->n, sizeof(struct msgpath), message_compare);
+}
+/*}}}*/
+/*{{{ void build_message_list */
+void build_message_list(char *folder_base, char *folders, enum folder_type ft,
+ struct msgpath_array *msgs,
+ struct globber_array *omit_globs)
+{
+ char **raw_paths, **paths;
+ int n_raw_paths, n_paths, i;
+
+ split_on_colons(folders, &n_raw_paths, &raw_paths);
+ switch (ft) {
+ case FT_MAILDIR:
+ glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &maildir_traverse_methods, omit_globs);
+ for (i=0; i<n_paths; i++) {
+ get_maildir_message_paths(paths[i], msgs);
+ }
+ break;
+ case FT_MH:
+ glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &mh_traverse_methods, omit_globs);
+ for (i=0; i<n_paths; i++) {
+ get_mh_message_paths(paths[i], msgs);
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (paths) free(paths);
+
+ sort_message_list(msgs);
+ return;
+}
+/*}}}*/
+
+#ifdef TEST
+int main (int argc, char **argv)
+{
+ int i;
+ struct msgpath_array *arr;
+
+ arr = build_message_list(".");
+
+ for (i=0; i<arr->n; i++) {
+ printf("%08lx %s\n", arr->paths[i].mtime, arr->paths[i].path);
+ }
+
+ free_msgpath_array(arr);
+
+ return 0;
+}
+#endif
+
+
diff --git a/src/mairix/dotlock.c b/src/mairix/dotlock.c
@@ -0,0 +1,116 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "mairix.h"
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <pwd.h>
+#include <unistd.h>
+
+static char *lock_file_name = NULL;
+
+/* This locking code was originally written for tdl */
+
+void lock_database(char *path, int forced_unlock)/*{{{*/
+{
+ struct utsname uu;
+ struct passwd *pw;
+ int pid;
+ int len;
+ char *tname;
+ struct stat sb;
+ FILE *out;
+
+ if (uname(&uu) < 0) {
+ perror("uname");
+ exit(1);
+ }
+ pw = getpwuid(getuid());
+ if (!pw) {
+ perror("getpwuid");
+ exit(1);
+ }
+ pid = getpid();
+ len = 1 + strlen(path) + 5;
+ lock_file_name = new_array(char, len);
+ sprintf(lock_file_name, "%s.lock", path);
+
+ if (forced_unlock) {
+ unlock_database();
+ forced_unlock = 0;
+ }
+
+ len += strlen(uu.nodename);
+ /* add on max width of pid field (allow up to 32 bit pid_t) + 2 '.' chars */
+ len += (10 + 2);
+ tname = new_array(char, len);
+ sprintf(tname, "%s.%d.%s", lock_file_name, pid, uu.nodename);
+ out = fopen(tname, "w");
+ if (!out) {
+ fprintf(stderr, "Cannot open lock file %s for writing\n", tname);
+ exit(1);
+ }
+ fprintf(out, "%d,%s,%s\n", pid, uu.nodename, pw->pw_name);
+ fclose(out);
+
+ if (link(tname, lock_file_name) < 0) {
+ /* check if link count==2 */
+ if (stat(tname, &sb) < 0) {
+ fprintf(stderr, "Could not stat the lock file\n");
+ unlink(tname);
+ exit(1);
+ } else {
+ if (sb.st_nlink != 2) {
+ FILE *in;
+ in = fopen(lock_file_name, "r");
+ if (in) {
+ char line[2048];
+ fgets(line, sizeof(line), in);
+ line[strlen(line)-1] = 0; /* strip trailing newline */
+ fprintf(stderr, "Database %s appears to be locked by (pid,node,user)=(%s)\n", path, line);
+ unlink(tname);
+ exit(1);
+ }
+ } else {
+ /* lock succeeded apparently */
+ }
+ }
+ } else {
+ /* lock succeeded apparently */
+ }
+ unlink(tname);
+ free(tname);
+ return;
+}
+/*}}}*/
+void unlock_database(void)/*{{{*/
+{
+ if (lock_file_name) unlink(lock_file_name);
+ return;
+}
+/*}}}*/
+void unlock_and_exit(int code)/*{{{*/
+{
+ unlock_database();
+ exit(code);
+}
+/*}}}*/
diff --git a/src/mairix/dotmairixrc.eg b/src/mairix/dotmairixrc.eg
@@ -0,0 +1,41 @@
+#######################################################################
+#
+# Example ~/.mairixrc file
+#
+# Any line starting with # is a comment.
+#
+#######################################################################
+# Set this to the directory where your maildir folders live
+base=/home/richard/mail
+
+#######################################################################
+# You need to define at least one of maildir, mh and mbox. You probably don't
+# need to define all three! You can use >1 line for any of these.
+
+# Set this to a list of maildir folders within 'base'. 3 dots at the end means
+# there are sub-folders within this folder.
+maildir=inbox:archive...
+maildir=lists...
+
+# Set this to a list of MH folders within 'base'. 3 dots at the end means
+# there are sub-folders within this folder.
+mh=mh_archive...
+
+# Set this to a list of mbox folders within 'base'.
+mbox=mboxen/folder1:mboxen/folder2:mboxen/foobar
+
+#######################################################################
+# Set this to the folder within 'base' where you want the search mode
+# to write its output.
+mfolder=mfolder
+
+# Set this if you want the format of mfolder to be mh or mbox (the default is
+# maildir).
+#
+# mformat=mh
+# mformat=mbox
+
+#######################################################################
+# Set this to the path where the index database file will be kept
+database=/home/richard/mail/mairix_database
+
diff --git a/src/mairix/dumper.c b/src/mairix/dumper.c
@@ -0,0 +1,151 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2004, 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+/* Database dumper */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/mman.h>
+
+#include "mairix.h"
+#include "reader.h"
+#include "memmac.h"
+
+static void dump_token_chain(struct read_db *db, unsigned int n, unsigned int *tok_offsets, unsigned int *enc_offsets)
+{
+ int i, j, incr;
+ int on_line;
+ unsigned char *foo;
+ printf("%d entries\n", n);
+ for (i=0; i<n; i++) {
+ printf("Word %d : <%s>\n", i, db->data + tok_offsets[i]);
+ foo = (unsigned char *) db->data + enc_offsets[i];
+ j = 0;
+ on_line = 0;
+ printf(" ");
+ while (*foo != 0xff) {
+ if (on_line > 15) {
+ printf("\n");
+ on_line = 0;
+ }
+ incr = read_increment(&foo);
+ j += incr;
+ printf("%d ", j);
+ on_line++;
+ }
+ printf("\n");
+ }
+}
+
+static void dump_toktable(struct read_db *db, struct toktable_db *tbl, const char *title)
+{
+ printf("Contents of <%s> table\n", title);
+ dump_token_chain( db, tbl->n, tbl->tok_offsets, tbl->enc_offsets);
+}
+
+static void dump_toktable2(struct read_db *db, struct toktable2_db *tbl, const char *title)
+{
+ unsigned int n;
+ n = tbl->n;
+ printf("Contents of <%s> table\n", title);
+ printf("Chain 0\n");
+ dump_token_chain( db, n, tbl->tok_offsets, tbl->enc0_offsets);
+ printf("Chain 1\n");
+ dump_token_chain( db, n, tbl->tok_offsets, tbl->enc1_offsets);
+}
+
+void dump_database(char *filename)
+{
+ struct read_db *db;
+ int i;
+
+ db = open_db(filename);
+
+ printf("Dump of %s\n", filename);
+ printf("%d messages\n", db->n_msgs);
+ for (i=0; i<db->n_msgs; i++) {
+ printf("%6d: ", i);
+ switch (rd_msg_type(db, i)) {
+ case DB_MSG_DEAD:
+ printf("DEAD");
+ break;
+ case DB_MSG_FILE:
+ printf("FILE %s, size=%d, tid=%d",
+ db->data + db->path_offsets[i], db->size_table[i], db->tid_table[i]);
+ break;
+ case DB_MSG_MBOX:
+ {
+ unsigned int mbix, msgix;
+ decode_mbox_indices(db->path_offsets[i], &mbix, &msgix);
+
+ printf("MBOX %d, msg %d, offset=%d, size=%d, tid=%d",
+ mbix, msgix, db->mtime_table[i], db->size_table[i], db->tid_table[i]);
+ }
+ break;
+ }
+ if (db->msg_type_and_flags[i] & FLAG_SEEN) printf(" seen");
+ if (db->msg_type_and_flags[i] & FLAG_REPLIED) printf(" replied");
+ if (db->msg_type_and_flags[i] & FLAG_FLAGGED) printf(" flagged");
+ printf("\n");
+ }
+ printf("\n");
+ if (db->n_mboxen > 0) {
+ printf("\nMBOX INFORMATION\n");
+ printf("%d mboxen\n", db->n_mboxen);
+ for (i=0; i<db->n_mboxen; i++) {
+ if (db->mbox_paths_table[i]) {
+ printf("%4d: %d msgs in %s\n", i, db->mbox_entries_table[i], db->data + db->mbox_paths_table[i]);
+ } else {
+ printf("%4d: dead\n", i);
+ }
+ }
+ printf("\n");
+ }
+
+ printf("Hash key %08x\n\n", db->hash_key);
+ printf("--------------------------------\n");
+ dump_toktable(db, &db->to, "To");
+ printf("--------------------------------\n");
+ dump_toktable(db, &db->cc, "Cc");
+ printf("--------------------------------\n");
+ dump_toktable(db, &db->from, "From");
+ printf("--------------------------------\n");
+ dump_toktable(db, &db->subject, "Subject");
+ printf("--------------------------------\n");
+ dump_toktable(db, &db->body, "Body");
+ printf("--------------------------------\n");
+ dump_toktable(db, &db->attachment_name, "Attachment names");
+ printf("--------------------------------\n");
+ dump_toktable2(db, &db->msg_ids, "Message Ids");
+ printf("--------------------------------\n");
+
+ close_db(db);
+ return;
+}
+
diff --git a/src/mairix/expandstr.c b/src/mairix/expandstr.c
@@ -0,0 +1,196 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2004
+ * Copyright (C) Andreas Amann 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "mairix.h"
+#include <stdlib.h>
+#include <sys/types.h>
+#include <pwd.h>
+#include <ctype.h>
+#include <unistd.h>
+
+static int isenv(unsigned char x)/*{{{*/
+{
+ /* Return true if x is valid as part of an environment variable name. */
+ if (isalnum(x))
+ return 1;
+ else if (x == '_')
+ return 1;
+ else
+ return 0;
+}
+/*}}}*/
+static int home_dir_len(void)/*{{{*/
+{
+ struct passwd *foo;
+ char *lookup;
+ lookup = getenv("HOME");
+ if (lookup) {
+ return strlen(lookup);
+ }
+ foo = getpwuid(getuid());
+ return strlen(foo->pw_dir);
+}
+/*}}}*/
+static char *env_lookup(const char *p, const char *q)/*{{{*/
+{
+ char *var;
+ char *lookup, *result;
+ char *s;
+ var = new_array(char, (q-p)+1);
+ for (s=var; p<q; p++, s++) {
+ *s = *p;
+ }
+ *s = 0;
+ lookup = getenv(var);
+ if (lookup) {
+ result = new_string(lookup);
+ } else {
+ result = NULL;
+ }
+ free(var);
+ return result;
+}
+/*}}}*/
+static int env_lookup_len(const char *p, const char *q) {/*{{{*/
+ char *foo;
+ int len;
+ foo = env_lookup(p, q);
+ if (!foo) len = 0;
+ else {
+ len = strlen(foo);
+ free(foo);
+ }
+ return len;
+}
+/*}}}*/
+static int compute_length(const char *p)/*{{{*/
+{
+ const char *q;
+ int first;
+ int len;
+ first = 1;
+ len = 0;
+ while (*p) {
+ if (first && (*p == '~') && (p[1] == '/')) {
+ /* Make no attempt to expand ~other_user form */
+ len += home_dir_len();
+ p++;
+ } else if ((*p == '$') && (p[1] == '{')) {
+ p += 2;
+ q = p;
+ while (*q && (*q != '}')) q++;
+ len += env_lookup_len(p, q);
+ p = *q ? (q + 1) : q;
+ } else if (*p == '$') {
+ p++;
+ q = p;
+ while (*q && isenv(*(unsigned char*)q)) q++;
+ len += env_lookup_len(p, q);
+ p = q;
+ } else {
+ len++;
+ p++;
+ }
+ first = 0;
+ }
+ return len;
+}
+/*}}}*/
+static char *append_home_dir(char *to)/*{{{*/
+{
+ struct passwd *foo;
+ int len;
+ char *lookup;
+ lookup = getenv("HOME");
+ if (lookup) {
+ len = strlen(lookup);
+ strcpy(to, lookup);
+ } else {
+ foo = getpwuid(getuid());
+ len = strlen(foo->pw_dir);
+ strcpy(to, foo->pw_dir);
+ }
+ return to + len;
+}
+/*}}}*/
+static char *append_env(char *to, const char *p, const char *q)/*{{{*/
+{
+ char *foo;
+ int len;
+ foo = env_lookup(p, q);
+ if (foo) {
+ len = strlen(foo);
+ strcpy(to, foo);
+ free(foo);
+ } else {
+ len = 0;
+ }
+ return (to + len);
+}
+/*}}}*/
+static void do_expand(const char *p, char *result)/*{{{*/
+{
+ const char *q;
+ int first;
+ first = 1;
+ while (*p) {
+ if (first && (*p == '~') && (p[1] == '/')) {
+ result = append_home_dir(result);
+ p++;
+ } else if ((*p == '$') && (p[1] == '{')) {
+ p += 2;
+ q = p;
+ while (*q && (*q != '}')) q++;
+ result = append_env(result, p, q);
+ p = *q ? (q + 1) : q;
+ } else if (*p == '$') {
+ p++;
+ q = p;
+ while (*q && isenv(*(unsigned char*)q)) q++;
+ result = append_env(result, p, q);
+ p = q;
+ } else {
+ *result++ = *p++;
+ }
+ first = 0;
+ }
+ *result = 0;
+}
+/*}}}*/
+char *expand_string(const char *p)/*{{{*/
+{
+ /* Return a copy of p, but with
+
+ ~ expanded to the user's home directory
+ $env expanded to the value of that environment variable
+ */
+
+ int len;
+ char *result;
+
+ len = compute_length(p);
+ result = new_array(char, len+1);
+ do_expand(p, result);
+ return result;
+}
+/*}}}*/
diff --git a/src/mairix/from.h b/src/mairix/from.h
@@ -0,0 +1,32 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002-2004,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#ifndef _FROM_H
+#define _FROM_H
+
+enum fromcheck_result {
+ FROMCHECK_PASS,
+ FROMCHECK_FAIL
+};
+
+#endif
+
diff --git a/src/mairix/fromcheck.nfa b/src/mairix/fromcheck.nfa
@@ -0,0 +1,218 @@
+#########################################################################
+#
+# mairix - message index builder and finder for maildir folders.
+#
+# Copyright (C) Richard P. Curnow 2002-2004,2006
+# Copyright (C) Jonathan Kamens 2010
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# =======================================================================
+
+%{
+#include "from.h"
+%}
+
+
+# Define tokens
+# CR : \n
+# DIGIT : [0-9]
+# AT : @
+# COLON : :
+# WHITE : ' ', \t
+# LOWER : [a-z]
+# UPPER : [A-Z]
+# PLUSMINUS : [+-]
+# OTHER_EMAIL : other stuff valid in the LHS of an address
+# DOMAIN : stuff valid in the RHS of an address
+
+Abbrev LF = [\n]
+Abbrev CR = [\r]
+Abbrev DIGIT = [0-9]
+Abbrev PERIOD = [.]
+Abbrev AT = [@]
+Abbrev LOWER = [a-z]
+Abbrev UPPER = [A-Z]
+Abbrev COLON = [:]
+Abbrev WHITE = [ \t]
+Abbrev PLUSMINUS = [+\-]
+# Explained clearly at
+# http://en.wikipedia.org/wiki/E-mail_address#RFC_specification
+Abbrev OTHER_EMAIL = [.!#$%&'*/=?^_`{|}~]
+Abbrev LT = [<]
+Abbrev GT = [>]
+Abbrev EMAIL = LOWER | UPPER | DIGIT | PLUSMINUS | OTHER_EMAIL
+Abbrev OTHER_DOMAIN = [\-_.]
+Abbrev DOMAIN = LOWER | UPPER | DIGIT | OTHER_DOMAIN
+Abbrev DQUOTE = ["]
+Abbrev OTHER_QUOTED = [@:<>]
+Abbrev LEFTSQUARE = [[]
+Abbrev RIGHTSQUARE = [\]]
+
+BLOCK email {
+ STATE in
+ EMAIL -> in, before_at
+ DQUOTE -> quoted_before_at
+ AT -> domain_route
+
+ STATE domain_route
+ DOMAIN -> domain_route
+ COLON -> in
+
+ STATE quoted_before_at
+ EMAIL | WHITE | OTHER_QUOTED -> quoted_before_at
+ DQUOTE -> before_at
+
+ STATE before_at
+ EMAIL -> before_at
+ DQUOTE -> quoted_before_at
+ # Local part only : >=1 characters will suffice, which we've already
+ # matched.
+ -> out
+ AT -> start_of_domain
+
+ STATE start_of_domain
+ LEFTSQUARE -> dotted_quad
+ DOMAIN -> after_at
+
+ STATE dotted_quad
+ DIGIT | PERIOD -> dotted_quad
+ RIGHTSQUARE -> out
+
+ STATE after_at
+ DOMAIN -> after_at, out
+
+}
+
+BLOCK angled_email {
+ STATE in
+ LT -> in_angles
+
+ STATE in_angles
+ <email:in->out> -> before_gt
+
+ STATE before_gt
+ GT -> out
+}
+
+BLOCK zone {
+ # Make this pretty lenient
+ STATE in
+ UPPER -> zone2
+ UPPER -> out
+ PLUSMINUS -> zone2
+
+ STATE zone2
+ UPPER | LOWER -> zone2, out
+ DIGIT -> zone2, out
+}
+
+BLOCK date {
+ STATE in
+ WHITE -> in, before_weekday
+
+ STATE before_weekday
+ UPPER ; LOWER ; LOWER ; WHITE -> after_weekday
+
+ STATE after_weekday
+ WHITE -> after_weekday
+ UPPER ; LOWER ; LOWER ; WHITE -> after_month
+
+ STATE after_month
+ WHITE -> after_month
+ DIGIT ; WHITE -> after_day
+ DIGIT ; DIGIT ; WHITE -> after_day
+
+ STATE after_day
+ WHITE -> after_day
+ # Accept HH:MM:SS
+ DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time
+ # Accept HH:MM
+ DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time
+
+ # Allow either 1 or 2 words of timezone
+ STATE after_time
+ WHITE -> after_time
+ -> after_timezone
+ <zone:in->out> ; WHITE -> after_timezone
+ <zone:in->out> ; WHITE -> after_timezone_1
+
+ # It appears that Pine puts the timezone after the year
+ DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year_before_zone
+
+ STATE after_year_before_zone
+ WHITE -> after_year_before_zone
+ <zone:in->out> -> after_timezone_after_year
+ <zone:in->out> ; WHITE -> after_timezone_after_year_1
+
+ STATE after_timezone_after_year_1
+ WHITE -> after_timezone_after_year_1
+ <zone:in->out> -> after_timezone_after_year
+
+ STATE after_timezone_after_year
+ WHITE -> after_timezone_after_year
+ -> out
+
+ STATE after_timezone_1
+ WHITE -> after_timezone_1
+ <zone:in->out> ; WHITE -> after_timezone
+
+ STATE after_timezone
+ WHITE -> after_timezone
+ DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year
+
+ STATE after_year
+ WHITE -> after_year
+ -> out
+
+}
+
+# Assume the earlier code has identified the '\nFrom ' sequence,
+# and the validator starts scanning from the character beyond the space
+
+BLOCK main {
+
+ STATE in
+ # Real return address.
+ WHITE -> in
+ <email:in->out> -> before_date
+ <angled_email:in->out> -> before_date
+
+ # Cope with Mozilla mbox folder format which just uses a '-' as
+ # the return address field.
+ PLUSMINUS -> before_date
+
+ # Empty return address
+ -> before_date
+
+ STATE before_date
+ <date:in->out> ; LF = FROMCHECK_PASS
+
+ # Cope with mozilla mbox format
+ <date:in->out> ; CR ; LF = FROMCHECK_PASS
+
+ # Mention this state last : the last mentioned state in the last defined
+ # block becomes the entry state of the scanner.
+
+ STATE in
+
+}
+
+ATTR FROMCHECK_PASS
+ATTR FROMCHECK_FAIL
+DEFATTR FROMCHECK_FAIL
+PREFIX fromcheck
+TYPE "enum fromcheck_result"
+
+# vim:ft=txt:et:sw=4:sts=4:ht=4
diff --git a/src/mairix/fromcheck.report b/src/mairix/fromcheck.report
@@ -0,0 +1,3222 @@
+NFA state 0 = in
+ [(epsilon)] -> before_date
+ 5:[+] -> before_date
+ 6:[\055] -> before_date
+ [(epsilon)] -> angled_email#2.in
+ [(epsilon)] -> email#1.in
+ 0:[\t ] -> in
+ Epsilon closure :
+ (self)
+ email#1.in
+ angled_email#2.in
+ before_date
+ date#3.in
+ date#4.in
+
+NFA state 1 = email#1.in
+ 3:[!#-'*/=?^`{-~] -> email#1.in
+ 16:[_] -> email#1.in
+ 7:[.] -> email#1.in
+ 5:[+] -> email#1.in
+ 6:[\055] -> email#1.in
+ 8:[0-9] -> email#1.in
+ 13:[A-Z] -> email#1.in
+ 17:[a-z] -> email#1.in
+ 3:[!#-'*/=?^`{-~] -> email#1.before_at
+ 16:[_] -> email#1.before_at
+ 7:[.] -> email#1.before_at
+ 5:[+] -> email#1.before_at
+ 6:[\055] -> email#1.before_at
+ 8:[0-9] -> email#1.before_at
+ 13:[A-Z] -> email#1.before_at
+ 17:[a-z] -> email#1.before_at
+ 4:["] -> email#1.quoted_before_at
+ 12:[@] -> email#1.domain_route
+ Epsilon closure :
+ (self)
+
+NFA state 2 = email#1.domain_route
+ 6:[\055] -> email#1.domain_route
+ 16:[_] -> email#1.domain_route
+ 7:[.] -> email#1.domain_route
+ 8:[0-9] -> email#1.domain_route
+ 13:[A-Z] -> email#1.domain_route
+ 17:[a-z] -> email#1.domain_route
+ 9:[:] -> email#1.in
+ Epsilon closure :
+ (self)
+
+NFA state 3 = email#1.quoted_before_at
+ 9:[:] -> email#1.quoted_before_at
+ 12:[@] -> email#1.quoted_before_at
+ 11:[>] -> email#1.quoted_before_at
+ 10:[<] -> email#1.quoted_before_at
+ 0:[\t ] -> email#1.quoted_before_at
+ 3:[!#-'*/=?^`{-~] -> email#1.quoted_before_at
+ 16:[_] -> email#1.quoted_before_at
+ 7:[.] -> email#1.quoted_before_at
+ 5:[+] -> email#1.quoted_before_at
+ 6:[\055] -> email#1.quoted_before_at
+ 8:[0-9] -> email#1.quoted_before_at
+ 13:[A-Z] -> email#1.quoted_before_at
+ 17:[a-z] -> email#1.quoted_before_at
+ 4:["] -> email#1.before_at
+ Epsilon closure :
+ (self)
+
+NFA state 4 = email#1.before_at
+ 3:[!#-'*/=?^`{-~] -> email#1.before_at
+ 16:[_] -> email#1.before_at
+ 7:[.] -> email#1.before_at
+ 5:[+] -> email#1.before_at
+ 6:[\055] -> email#1.before_at
+ 8:[0-9] -> email#1.before_at
+ 13:[A-Z] -> email#1.before_at
+ 17:[a-z] -> email#1.before_at
+ 4:["] -> email#1.quoted_before_at
+ [(epsilon)] -> email#1.out
+ 12:[@] -> email#1.start_of_domain
+ Epsilon closure :
+ (self)
+ email#1.out
+ before_date
+ date#3.in
+ date#4.in
+
+NFA state 5 = email#1.start_of_domain
+ 14:[[] -> email#1.dotted_quad
+ 6:[\055] -> email#1.after_at
+ 16:[_] -> email#1.after_at
+ 7:[.] -> email#1.after_at
+ 8:[0-9] -> email#1.after_at
+ 13:[A-Z] -> email#1.after_at
+ 17:[a-z] -> email#1.after_at
+ Epsilon closure :
+ (self)
+
+NFA state 6 = email#1.dotted_quad
+ 7:[.] -> email#1.dotted_quad
+ 8:[0-9] -> email#1.dotted_quad
+ 15:[]] -> email#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 7 = email#1.after_at
+ 6:[\055] -> email#1.after_at
+ 16:[_] -> email#1.after_at
+ 7:[.] -> email#1.after_at
+ 8:[0-9] -> email#1.after_at
+ 13:[A-Z] -> email#1.after_at
+ 17:[a-z] -> email#1.after_at
+ 6:[\055] -> email#1.out
+ 16:[_] -> email#1.out
+ 7:[.] -> email#1.out
+ 8:[0-9] -> email#1.out
+ 13:[A-Z] -> email#1.out
+ 17:[a-z] -> email#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 8 = email#1.out
+ [(epsilon)] -> before_date
+ Epsilon closure :
+ (self)
+ before_date
+ date#3.in
+ date#4.in
+
+NFA state 9 = angled_email#2.in
+ 10:[<] -> angled_email#2.in_angles
+ Epsilon closure :
+ (self)
+
+NFA state 10 = angled_email#2.in_angles
+ [(epsilon)] -> angled_email#2.email#1.in
+ Epsilon closure :
+ (self)
+ angled_email#2.email#1.in
+
+NFA state 11 = angled_email#2.email#1.in
+ 12:[@] -> angled_email#2.email#1.domain_route
+ 4:["] -> angled_email#2.email#1.quoted_before_at
+ 17:[a-z] -> angled_email#2.email#1.before_at
+ 13:[A-Z] -> angled_email#2.email#1.before_at
+ 8:[0-9] -> angled_email#2.email#1.before_at
+ 5:[+] -> angled_email#2.email#1.before_at
+ 6:[\055] -> angled_email#2.email#1.before_at
+ 3:[!#-'*/=?^`{-~] -> angled_email#2.email#1.before_at
+ 16:[_] -> angled_email#2.email#1.before_at
+ 7:[.] -> angled_email#2.email#1.before_at
+ 17:[a-z] -> angled_email#2.email#1.in
+ 13:[A-Z] -> angled_email#2.email#1.in
+ 8:[0-9] -> angled_email#2.email#1.in
+ 5:[+] -> angled_email#2.email#1.in
+ 6:[\055] -> angled_email#2.email#1.in
+ 3:[!#-'*/=?^`{-~] -> angled_email#2.email#1.in
+ 16:[_] -> angled_email#2.email#1.in
+ 7:[.] -> angled_email#2.email#1.in
+ Epsilon closure :
+ (self)
+
+NFA state 12 = angled_email#2.email#1.domain_route
+ 9:[:] -> angled_email#2.email#1.in
+ 17:[a-z] -> angled_email#2.email#1.domain_route
+ 13:[A-Z] -> angled_email#2.email#1.domain_route
+ 8:[0-9] -> angled_email#2.email#1.domain_route
+ 6:[\055] -> angled_email#2.email#1.domain_route
+ 16:[_] -> angled_email#2.email#1.domain_route
+ 7:[.] -> angled_email#2.email#1.domain_route
+ Epsilon closure :
+ (self)
+
+NFA state 13 = angled_email#2.email#1.quoted_before_at
+ 4:["] -> angled_email#2.email#1.before_at
+ 17:[a-z] -> angled_email#2.email#1.quoted_before_at
+ 13:[A-Z] -> angled_email#2.email#1.quoted_before_at
+ 8:[0-9] -> angled_email#2.email#1.quoted_before_at
+ 5:[+] -> angled_email#2.email#1.quoted_before_at
+ 6:[\055] -> angled_email#2.email#1.quoted_before_at
+ 3:[!#-'*/=?^`{-~] -> angled_email#2.email#1.quoted_before_at
+ 16:[_] -> angled_email#2.email#1.quoted_before_at
+ 7:[.] -> angled_email#2.email#1.quoted_before_at
+ 0:[\t ] -> angled_email#2.email#1.quoted_before_at
+ 9:[:] -> angled_email#2.email#1.quoted_before_at
+ 12:[@] -> angled_email#2.email#1.quoted_before_at
+ 11:[>] -> angled_email#2.email#1.quoted_before_at
+ 10:[<] -> angled_email#2.email#1.quoted_before_at
+ Epsilon closure :
+ (self)
+
+NFA state 14 = angled_email#2.email#1.before_at
+ 12:[@] -> angled_email#2.email#1.start_of_domain
+ [(epsilon)] -> angled_email#2.email#1.out
+ 4:["] -> angled_email#2.email#1.quoted_before_at
+ 17:[a-z] -> angled_email#2.email#1.before_at
+ 13:[A-Z] -> angled_email#2.email#1.before_at
+ 8:[0-9] -> angled_email#2.email#1.before_at
+ 5:[+] -> angled_email#2.email#1.before_at
+ 6:[\055] -> angled_email#2.email#1.before_at
+ 3:[!#-'*/=?^`{-~] -> angled_email#2.email#1.before_at
+ 16:[_] -> angled_email#2.email#1.before_at
+ 7:[.] -> angled_email#2.email#1.before_at
+ Epsilon closure :
+ (self)
+ angled_email#2.email#1.out
+ angled_email#2.before_gt
+
+NFA state 15 = angled_email#2.email#1.start_of_domain
+ 17:[a-z] -> angled_email#2.email#1.after_at
+ 13:[A-Z] -> angled_email#2.email#1.after_at
+ 8:[0-9] -> angled_email#2.email#1.after_at
+ 6:[\055] -> angled_email#2.email#1.after_at
+ 16:[_] -> angled_email#2.email#1.after_at
+ 7:[.] -> angled_email#2.email#1.after_at
+ 14:[[] -> angled_email#2.email#1.dotted_quad
+ Epsilon closure :
+ (self)
+
+NFA state 16 = angled_email#2.email#1.dotted_quad
+ 15:[]] -> angled_email#2.email#1.out
+ 8:[0-9] -> angled_email#2.email#1.dotted_quad
+ 7:[.] -> angled_email#2.email#1.dotted_quad
+ Epsilon closure :
+ (self)
+
+NFA state 17 = angled_email#2.email#1.after_at
+ 17:[a-z] -> angled_email#2.email#1.out
+ 13:[A-Z] -> angled_email#2.email#1.out
+ 8:[0-9] -> angled_email#2.email#1.out
+ 6:[\055] -> angled_email#2.email#1.out
+ 16:[_] -> angled_email#2.email#1.out
+ 7:[.] -> angled_email#2.email#1.out
+ 17:[a-z] -> angled_email#2.email#1.after_at
+ 13:[A-Z] -> angled_email#2.email#1.after_at
+ 8:[0-9] -> angled_email#2.email#1.after_at
+ 6:[\055] -> angled_email#2.email#1.after_at
+ 16:[_] -> angled_email#2.email#1.after_at
+ 7:[.] -> angled_email#2.email#1.after_at
+ Epsilon closure :
+ (self)
+
+NFA state 18 = angled_email#2.email#1.out
+ [(epsilon)] -> angled_email#2.before_gt
+ Epsilon closure :
+ (self)
+ angled_email#2.before_gt
+
+NFA state 19 = angled_email#2.before_gt
+ 11:[>] -> angled_email#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 20 = angled_email#2.out
+ [(epsilon)] -> before_date
+ Epsilon closure :
+ (self)
+ before_date
+ date#3.in
+ date#4.in
+
+NFA state 21 = before_date
+ [(epsilon)] -> date#4.in
+ [(epsilon)] -> date#3.in
+ Epsilon closure :
+ (self)
+ date#3.in
+ date#4.in
+
+NFA state 22 = #1
+ 1:[\n] -> #2
+ Epsilon closure :
+ (self)
+
+NFA state 23 = date#3.in
+ 0:[\t ] -> date#3.in
+ 0:[\t ] -> date#3.before_weekday
+ Epsilon closure :
+ (self)
+
+NFA state 24 = date#3.before_weekday
+ 13:[A-Z] -> date#3.#1
+ Epsilon closure :
+ (self)
+
+NFA state 25 = date#3.#1
+ 17:[a-z] -> date#3.#2
+ Epsilon closure :
+ (self)
+
+NFA state 26 = date#3.#2
+ 17:[a-z] -> date#3.#3
+ Epsilon closure :
+ (self)
+
+NFA state 27 = date#3.#3
+ 0:[\t ] -> date#3.after_weekday
+ Epsilon closure :
+ (self)
+
+NFA state 28 = date#3.after_weekday
+ 0:[\t ] -> date#3.after_weekday
+ 13:[A-Z] -> date#3.#4
+ Epsilon closure :
+ (self)
+
+NFA state 29 = date#3.#4
+ 17:[a-z] -> date#3.#5
+ Epsilon closure :
+ (self)
+
+NFA state 30 = date#3.#5
+ 17:[a-z] -> date#3.#6
+ Epsilon closure :
+ (self)
+
+NFA state 31 = date#3.#6
+ 0:[\t ] -> date#3.after_month
+ Epsilon closure :
+ (self)
+
+NFA state 32 = date#3.after_month
+ 0:[\t ] -> date#3.after_month
+ 8:[0-9] -> date#3.#7
+ 8:[0-9] -> date#3.#8
+ Epsilon closure :
+ (self)
+
+NFA state 33 = date#3.#7
+ 0:[\t ] -> date#3.after_day
+ Epsilon closure :
+ (self)
+
+NFA state 34 = date#3.#8
+ 8:[0-9] -> date#3.#9
+ Epsilon closure :
+ (self)
+
+NFA state 35 = date#3.#9
+ 0:[\t ] -> date#3.after_day
+ Epsilon closure :
+ (self)
+
+NFA state 36 = date#3.after_day
+ 0:[\t ] -> date#3.after_day
+ 8:[0-9] -> date#3.#10
+ 8:[0-9] -> date#3.#18
+ Epsilon closure :
+ (self)
+
+NFA state 37 = date#3.#10
+ 8:[0-9] -> date#3.#11
+ Epsilon closure :
+ (self)
+
+NFA state 38 = date#3.#11
+ 9:[:] -> date#3.#12
+ Epsilon closure :
+ (self)
+
+NFA state 39 = date#3.#12
+ 8:[0-9] -> date#3.#13
+ Epsilon closure :
+ (self)
+
+NFA state 40 = date#3.#13
+ 8:[0-9] -> date#3.#14
+ Epsilon closure :
+ (self)
+
+NFA state 41 = date#3.#14
+ 9:[:] -> date#3.#15
+ Epsilon closure :
+ (self)
+
+NFA state 42 = date#3.#15
+ 8:[0-9] -> date#3.#16
+ Epsilon closure :
+ (self)
+
+NFA state 43 = date#3.#16
+ 8:[0-9] -> date#3.#17
+ Epsilon closure :
+ (self)
+
+NFA state 44 = date#3.#17
+ 0:[\t ] -> date#3.after_time
+ Epsilon closure :
+ (self)
+
+NFA state 45 = date#3.#18
+ 8:[0-9] -> date#3.#19
+ Epsilon closure :
+ (self)
+
+NFA state 46 = date#3.#19
+ 9:[:] -> date#3.#20
+ Epsilon closure :
+ (self)
+
+NFA state 47 = date#3.#20
+ 8:[0-9] -> date#3.#21
+ Epsilon closure :
+ (self)
+
+NFA state 48 = date#3.#21
+ 8:[0-9] -> date#3.#22
+ Epsilon closure :
+ (self)
+
+NFA state 49 = date#3.#22
+ 0:[\t ] -> date#3.after_time
+ Epsilon closure :
+ (self)
+
+NFA state 50 = date#3.after_time
+ 0:[\t ] -> date#3.after_time
+ [(epsilon)] -> date#3.after_timezone
+ [(epsilon)] -> date#3.zone#1.in
+ [(epsilon)] -> date#3.zone#2.in
+ 8:[0-9] -> date#3.#25
+ Epsilon closure :
+ (self)
+ date#3.zone#1.in
+ date#3.zone#2.in
+ date#3.after_timezone
+
+NFA state 51 = date#3.#23
+ 0:[\t ] -> date#3.after_timezone
+ Epsilon closure :
+ (self)
+
+NFA state 52 = date#3.zone#1.in
+ 5:[+] -> date#3.zone#1.zone2
+ 6:[\055] -> date#3.zone#1.zone2
+ 13:[A-Z] -> date#3.zone#1.out
+ 13:[A-Z] -> date#3.zone#1.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 53 = date#3.zone#1.zone2
+ 8:[0-9] -> date#3.zone#1.out
+ 8:[0-9] -> date#3.zone#1.zone2
+ 13:[A-Z] -> date#3.zone#1.out
+ 17:[a-z] -> date#3.zone#1.out
+ 13:[A-Z] -> date#3.zone#1.zone2
+ 17:[a-z] -> date#3.zone#1.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 54 = date#3.zone#1.out
+ [(epsilon)] -> date#3.#23
+ Epsilon closure :
+ (self)
+ date#3.#23
+
+NFA state 55 = date#3.#24
+ 0:[\t ] -> date#3.after_timezone_1
+ Epsilon closure :
+ (self)
+
+NFA state 56 = date#3.zone#2.in
+ 5:[+] -> date#3.zone#2.zone2
+ 6:[\055] -> date#3.zone#2.zone2
+ 13:[A-Z] -> date#3.zone#2.out
+ 13:[A-Z] -> date#3.zone#2.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 57 = date#3.zone#2.zone2
+ 8:[0-9] -> date#3.zone#2.out
+ 8:[0-9] -> date#3.zone#2.zone2
+ 13:[A-Z] -> date#3.zone#2.out
+ 17:[a-z] -> date#3.zone#2.out
+ 13:[A-Z] -> date#3.zone#2.zone2
+ 17:[a-z] -> date#3.zone#2.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 58 = date#3.zone#2.out
+ [(epsilon)] -> date#3.#24
+ Epsilon closure :
+ (self)
+ date#3.#24
+
+NFA state 59 = date#3.#25
+ 8:[0-9] -> date#3.#26
+ Epsilon closure :
+ (self)
+
+NFA state 60 = date#3.#26
+ 8:[0-9] -> date#3.#27
+ Epsilon closure :
+ (self)
+
+NFA state 61 = date#3.#27
+ 8:[0-9] -> date#3.after_year_before_zone
+ Epsilon closure :
+ (self)
+
+NFA state 62 = date#3.after_year_before_zone
+ 0:[\t ] -> date#3.after_year_before_zone
+ [(epsilon)] -> date#3.zone#3.in
+ [(epsilon)] -> date#3.zone#4.in
+ Epsilon closure :
+ (self)
+ date#3.zone#3.in
+ date#3.zone#4.in
+
+NFA state 63 = date#3.zone#3.in
+ 5:[+] -> date#3.zone#3.zone2
+ 6:[\055] -> date#3.zone#3.zone2
+ 13:[A-Z] -> date#3.zone#3.out
+ 13:[A-Z] -> date#3.zone#3.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 64 = date#3.zone#3.zone2
+ 8:[0-9] -> date#3.zone#3.out
+ 8:[0-9] -> date#3.zone#3.zone2
+ 13:[A-Z] -> date#3.zone#3.out
+ 17:[a-z] -> date#3.zone#3.out
+ 13:[A-Z] -> date#3.zone#3.zone2
+ 17:[a-z] -> date#3.zone#3.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 65 = date#3.zone#3.out
+ [(epsilon)] -> date#3.after_timezone_after_year
+ Epsilon closure :
+ (self)
+ #1
+ date#3.after_timezone_after_year
+ date#3.out
+
+NFA state 66 = date#3.#28
+ 0:[\t ] -> date#3.after_timezone_after_year_1
+ Epsilon closure :
+ (self)
+
+NFA state 67 = date#3.zone#4.in
+ 5:[+] -> date#3.zone#4.zone2
+ 6:[\055] -> date#3.zone#4.zone2
+ 13:[A-Z] -> date#3.zone#4.out
+ 13:[A-Z] -> date#3.zone#4.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 68 = date#3.zone#4.zone2
+ 8:[0-9] -> date#3.zone#4.out
+ 8:[0-9] -> date#3.zone#4.zone2
+ 13:[A-Z] -> date#3.zone#4.out
+ 17:[a-z] -> date#3.zone#4.out
+ 13:[A-Z] -> date#3.zone#4.zone2
+ 17:[a-z] -> date#3.zone#4.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 69 = date#3.zone#4.out
+ [(epsilon)] -> date#3.#28
+ Epsilon closure :
+ (self)
+ date#3.#28
+
+NFA state 70 = date#3.after_timezone_after_year_1
+ 0:[\t ] -> date#3.after_timezone_after_year_1
+ [(epsilon)] -> date#3.zone#5.in
+ Epsilon closure :
+ (self)
+ date#3.zone#5.in
+
+NFA state 71 = date#3.zone#5.in
+ 5:[+] -> date#3.zone#5.zone2
+ 6:[\055] -> date#3.zone#5.zone2
+ 13:[A-Z] -> date#3.zone#5.out
+ 13:[A-Z] -> date#3.zone#5.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 72 = date#3.zone#5.zone2
+ 8:[0-9] -> date#3.zone#5.out
+ 8:[0-9] -> date#3.zone#5.zone2
+ 13:[A-Z] -> date#3.zone#5.out
+ 17:[a-z] -> date#3.zone#5.out
+ 13:[A-Z] -> date#3.zone#5.zone2
+ 17:[a-z] -> date#3.zone#5.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 73 = date#3.zone#5.out
+ [(epsilon)] -> date#3.after_timezone_after_year
+ Epsilon closure :
+ (self)
+ #1
+ date#3.after_timezone_after_year
+ date#3.out
+
+NFA state 74 = date#3.after_timezone_after_year
+ 0:[\t ] -> date#3.after_timezone_after_year
+ [(epsilon)] -> date#3.out
+ Epsilon closure :
+ (self)
+ #1
+ date#3.out
+
+NFA state 75 = date#3.after_timezone_1
+ 0:[\t ] -> date#3.after_timezone_1
+ [(epsilon)] -> date#3.zone#6.in
+ Epsilon closure :
+ (self)
+ date#3.zone#6.in
+
+NFA state 76 = date#3.#29
+ 0:[\t ] -> date#3.after_timezone
+ Epsilon closure :
+ (self)
+
+NFA state 77 = date#3.zone#6.in
+ 5:[+] -> date#3.zone#6.zone2
+ 6:[\055] -> date#3.zone#6.zone2
+ 13:[A-Z] -> date#3.zone#6.out
+ 13:[A-Z] -> date#3.zone#6.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 78 = date#3.zone#6.zone2
+ 8:[0-9] -> date#3.zone#6.out
+ 8:[0-9] -> date#3.zone#6.zone2
+ 13:[A-Z] -> date#3.zone#6.out
+ 17:[a-z] -> date#3.zone#6.out
+ 13:[A-Z] -> date#3.zone#6.zone2
+ 17:[a-z] -> date#3.zone#6.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 79 = date#3.zone#6.out
+ [(epsilon)] -> date#3.#29
+ Epsilon closure :
+ (self)
+ date#3.#29
+
+NFA state 80 = date#3.after_timezone
+ 0:[\t ] -> date#3.after_timezone
+ 8:[0-9] -> date#3.#30
+ Epsilon closure :
+ (self)
+
+NFA state 81 = date#3.#30
+ 8:[0-9] -> date#3.#31
+ Epsilon closure :
+ (self)
+
+NFA state 82 = date#3.#31
+ 8:[0-9] -> date#3.#32
+ Epsilon closure :
+ (self)
+
+NFA state 83 = date#3.#32
+ 8:[0-9] -> date#3.after_year
+ Epsilon closure :
+ (self)
+
+NFA state 84 = date#3.after_year
+ 0:[\t ] -> date#3.after_year
+ [(epsilon)] -> date#3.out
+ Epsilon closure :
+ (self)
+ #1
+ date#3.out
+
+NFA state 85 = date#3.out
+ [(epsilon)] -> #1
+ Epsilon closure :
+ (self)
+ #1
+
+NFA state 86 = #2
+ Tags : FROMCHECK_PASS
+ Epsilon closure :
+ (self)
+
+NFA state 87 = #3
+ 2:[\r] -> #4
+ Epsilon closure :
+ (self)
+
+NFA state 88 = date#4.in
+ 0:[\t ] -> date#4.in
+ 0:[\t ] -> date#4.before_weekday
+ Epsilon closure :
+ (self)
+
+NFA state 89 = date#4.before_weekday
+ 13:[A-Z] -> date#4.#1
+ Epsilon closure :
+ (self)
+
+NFA state 90 = date#4.#1
+ 17:[a-z] -> date#4.#2
+ Epsilon closure :
+ (self)
+
+NFA state 91 = date#4.#2
+ 17:[a-z] -> date#4.#3
+ Epsilon closure :
+ (self)
+
+NFA state 92 = date#4.#3
+ 0:[\t ] -> date#4.after_weekday
+ Epsilon closure :
+ (self)
+
+NFA state 93 = date#4.after_weekday
+ 0:[\t ] -> date#4.after_weekday
+ 13:[A-Z] -> date#4.#4
+ Epsilon closure :
+ (self)
+
+NFA state 94 = date#4.#4
+ 17:[a-z] -> date#4.#5
+ Epsilon closure :
+ (self)
+
+NFA state 95 = date#4.#5
+ 17:[a-z] -> date#4.#6
+ Epsilon closure :
+ (self)
+
+NFA state 96 = date#4.#6
+ 0:[\t ] -> date#4.after_month
+ Epsilon closure :
+ (self)
+
+NFA state 97 = date#4.after_month
+ 0:[\t ] -> date#4.after_month
+ 8:[0-9] -> date#4.#7
+ 8:[0-9] -> date#4.#8
+ Epsilon closure :
+ (self)
+
+NFA state 98 = date#4.#7
+ 0:[\t ] -> date#4.after_day
+ Epsilon closure :
+ (self)
+
+NFA state 99 = date#4.#8
+ 8:[0-9] -> date#4.#9
+ Epsilon closure :
+ (self)
+
+NFA state 100 = date#4.#9
+ 0:[\t ] -> date#4.after_day
+ Epsilon closure :
+ (self)
+
+NFA state 101 = date#4.after_day
+ 0:[\t ] -> date#4.after_day
+ 8:[0-9] -> date#4.#10
+ 8:[0-9] -> date#4.#18
+ Epsilon closure :
+ (self)
+
+NFA state 102 = date#4.#10
+ 8:[0-9] -> date#4.#11
+ Epsilon closure :
+ (self)
+
+NFA state 103 = date#4.#11
+ 9:[:] -> date#4.#12
+ Epsilon closure :
+ (self)
+
+NFA state 104 = date#4.#12
+ 8:[0-9] -> date#4.#13
+ Epsilon closure :
+ (self)
+
+NFA state 105 = date#4.#13
+ 8:[0-9] -> date#4.#14
+ Epsilon closure :
+ (self)
+
+NFA state 106 = date#4.#14
+ 9:[:] -> date#4.#15
+ Epsilon closure :
+ (self)
+
+NFA state 107 = date#4.#15
+ 8:[0-9] -> date#4.#16
+ Epsilon closure :
+ (self)
+
+NFA state 108 = date#4.#16
+ 8:[0-9] -> date#4.#17
+ Epsilon closure :
+ (self)
+
+NFA state 109 = date#4.#17
+ 0:[\t ] -> date#4.after_time
+ Epsilon closure :
+ (self)
+
+NFA state 110 = date#4.#18
+ 8:[0-9] -> date#4.#19
+ Epsilon closure :
+ (self)
+
+NFA state 111 = date#4.#19
+ 9:[:] -> date#4.#20
+ Epsilon closure :
+ (self)
+
+NFA state 112 = date#4.#20
+ 8:[0-9] -> date#4.#21
+ Epsilon closure :
+ (self)
+
+NFA state 113 = date#4.#21
+ 8:[0-9] -> date#4.#22
+ Epsilon closure :
+ (self)
+
+NFA state 114 = date#4.#22
+ 0:[\t ] -> date#4.after_time
+ Epsilon closure :
+ (self)
+
+NFA state 115 = date#4.after_time
+ 0:[\t ] -> date#4.after_time
+ [(epsilon)] -> date#4.after_timezone
+ [(epsilon)] -> date#4.zone#1.in
+ [(epsilon)] -> date#4.zone#2.in
+ 8:[0-9] -> date#4.#25
+ Epsilon closure :
+ (self)
+ date#4.zone#1.in
+ date#4.zone#2.in
+ date#4.after_timezone
+
+NFA state 116 = date#4.#23
+ 0:[\t ] -> date#4.after_timezone
+ Epsilon closure :
+ (self)
+
+NFA state 117 = date#4.zone#1.in
+ 5:[+] -> date#4.zone#1.zone2
+ 6:[\055] -> date#4.zone#1.zone2
+ 13:[A-Z] -> date#4.zone#1.out
+ 13:[A-Z] -> date#4.zone#1.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 118 = date#4.zone#1.zone2
+ 8:[0-9] -> date#4.zone#1.out
+ 8:[0-9] -> date#4.zone#1.zone2
+ 13:[A-Z] -> date#4.zone#1.out
+ 17:[a-z] -> date#4.zone#1.out
+ 13:[A-Z] -> date#4.zone#1.zone2
+ 17:[a-z] -> date#4.zone#1.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 119 = date#4.zone#1.out
+ [(epsilon)] -> date#4.#23
+ Epsilon closure :
+ (self)
+ date#4.#23
+
+NFA state 120 = date#4.#24
+ 0:[\t ] -> date#4.after_timezone_1
+ Epsilon closure :
+ (self)
+
+NFA state 121 = date#4.zone#2.in
+ 5:[+] -> date#4.zone#2.zone2
+ 6:[\055] -> date#4.zone#2.zone2
+ 13:[A-Z] -> date#4.zone#2.out
+ 13:[A-Z] -> date#4.zone#2.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 122 = date#4.zone#2.zone2
+ 8:[0-9] -> date#4.zone#2.out
+ 8:[0-9] -> date#4.zone#2.zone2
+ 13:[A-Z] -> date#4.zone#2.out
+ 17:[a-z] -> date#4.zone#2.out
+ 13:[A-Z] -> date#4.zone#2.zone2
+ 17:[a-z] -> date#4.zone#2.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 123 = date#4.zone#2.out
+ [(epsilon)] -> date#4.#24
+ Epsilon closure :
+ (self)
+ date#4.#24
+
+NFA state 124 = date#4.#25
+ 8:[0-9] -> date#4.#26
+ Epsilon closure :
+ (self)
+
+NFA state 125 = date#4.#26
+ 8:[0-9] -> date#4.#27
+ Epsilon closure :
+ (self)
+
+NFA state 126 = date#4.#27
+ 8:[0-9] -> date#4.after_year_before_zone
+ Epsilon closure :
+ (self)
+
+NFA state 127 = date#4.after_year_before_zone
+ 0:[\t ] -> date#4.after_year_before_zone
+ [(epsilon)] -> date#4.zone#3.in
+ [(epsilon)] -> date#4.zone#4.in
+ Epsilon closure :
+ (self)
+ date#4.zone#3.in
+ date#4.zone#4.in
+
+NFA state 128 = date#4.zone#3.in
+ 5:[+] -> date#4.zone#3.zone2
+ 6:[\055] -> date#4.zone#3.zone2
+ 13:[A-Z] -> date#4.zone#3.out
+ 13:[A-Z] -> date#4.zone#3.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 129 = date#4.zone#3.zone2
+ 8:[0-9] -> date#4.zone#3.out
+ 8:[0-9] -> date#4.zone#3.zone2
+ 13:[A-Z] -> date#4.zone#3.out
+ 17:[a-z] -> date#4.zone#3.out
+ 13:[A-Z] -> date#4.zone#3.zone2
+ 17:[a-z] -> date#4.zone#3.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 130 = date#4.zone#3.out
+ [(epsilon)] -> date#4.after_timezone_after_year
+ Epsilon closure :
+ (self)
+ #3
+ date#4.after_timezone_after_year
+ date#4.out
+
+NFA state 131 = date#4.#28
+ 0:[\t ] -> date#4.after_timezone_after_year_1
+ Epsilon closure :
+ (self)
+
+NFA state 132 = date#4.zone#4.in
+ 5:[+] -> date#4.zone#4.zone2
+ 6:[\055] -> date#4.zone#4.zone2
+ 13:[A-Z] -> date#4.zone#4.out
+ 13:[A-Z] -> date#4.zone#4.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 133 = date#4.zone#4.zone2
+ 8:[0-9] -> date#4.zone#4.out
+ 8:[0-9] -> date#4.zone#4.zone2
+ 13:[A-Z] -> date#4.zone#4.out
+ 17:[a-z] -> date#4.zone#4.out
+ 13:[A-Z] -> date#4.zone#4.zone2
+ 17:[a-z] -> date#4.zone#4.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 134 = date#4.zone#4.out
+ [(epsilon)] -> date#4.#28
+ Epsilon closure :
+ (self)
+ date#4.#28
+
+NFA state 135 = date#4.after_timezone_after_year_1
+ 0:[\t ] -> date#4.after_timezone_after_year_1
+ [(epsilon)] -> date#4.zone#5.in
+ Epsilon closure :
+ (self)
+ date#4.zone#5.in
+
+NFA state 136 = date#4.zone#5.in
+ 5:[+] -> date#4.zone#5.zone2
+ 6:[\055] -> date#4.zone#5.zone2
+ 13:[A-Z] -> date#4.zone#5.out
+ 13:[A-Z] -> date#4.zone#5.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 137 = date#4.zone#5.zone2
+ 8:[0-9] -> date#4.zone#5.out
+ 8:[0-9] -> date#4.zone#5.zone2
+ 13:[A-Z] -> date#4.zone#5.out
+ 17:[a-z] -> date#4.zone#5.out
+ 13:[A-Z] -> date#4.zone#5.zone2
+ 17:[a-z] -> date#4.zone#5.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 138 = date#4.zone#5.out
+ [(epsilon)] -> date#4.after_timezone_after_year
+ Epsilon closure :
+ (self)
+ #3
+ date#4.after_timezone_after_year
+ date#4.out
+
+NFA state 139 = date#4.after_timezone_after_year
+ 0:[\t ] -> date#4.after_timezone_after_year
+ [(epsilon)] -> date#4.out
+ Epsilon closure :
+ (self)
+ #3
+ date#4.out
+
+NFA state 140 = date#4.after_timezone_1
+ 0:[\t ] -> date#4.after_timezone_1
+ [(epsilon)] -> date#4.zone#6.in
+ Epsilon closure :
+ (self)
+ date#4.zone#6.in
+
+NFA state 141 = date#4.#29
+ 0:[\t ] -> date#4.after_timezone
+ Epsilon closure :
+ (self)
+
+NFA state 142 = date#4.zone#6.in
+ 5:[+] -> date#4.zone#6.zone2
+ 6:[\055] -> date#4.zone#6.zone2
+ 13:[A-Z] -> date#4.zone#6.out
+ 13:[A-Z] -> date#4.zone#6.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 143 = date#4.zone#6.zone2
+ 8:[0-9] -> date#4.zone#6.out
+ 8:[0-9] -> date#4.zone#6.zone2
+ 13:[A-Z] -> date#4.zone#6.out
+ 17:[a-z] -> date#4.zone#6.out
+ 13:[A-Z] -> date#4.zone#6.zone2
+ 17:[a-z] -> date#4.zone#6.zone2
+ Epsilon closure :
+ (self)
+
+NFA state 144 = date#4.zone#6.out
+ [(epsilon)] -> date#4.#29
+ Epsilon closure :
+ (self)
+ date#4.#29
+
+NFA state 145 = date#4.after_timezone
+ 0:[\t ] -> date#4.after_timezone
+ 8:[0-9] -> date#4.#30
+ Epsilon closure :
+ (self)
+
+NFA state 146 = date#4.#30
+ 8:[0-9] -> date#4.#31
+ Epsilon closure :
+ (self)
+
+NFA state 147 = date#4.#31
+ 8:[0-9] -> date#4.#32
+ Epsilon closure :
+ (self)
+
+NFA state 148 = date#4.#32
+ 8:[0-9] -> date#4.after_year
+ Epsilon closure :
+ (self)
+
+NFA state 149 = date#4.after_year
+ 0:[\t ] -> date#4.after_year
+ [(epsilon)] -> date#4.out
+ Epsilon closure :
+ (self)
+ #3
+ date#4.out
+
+NFA state 150 = date#4.out
+ [(epsilon)] -> #3
+ Epsilon closure :
+ (self)
+ #3
+
+NFA state 151 = #4
+ 1:[\n] -> #5
+ Epsilon closure :
+ (self)
+
+NFA state 152 = #5
+ Tags : FROMCHECK_PASS
+ Epsilon closure :
+ (self)
+
+--------------------------------
+DFA structure before compression
+--------------------------------
+DFA state 0
+ NFA states :
+ in
+ email#1.in
+ angled_email#2.in
+ before_date
+ date#3.in
+ date#4.in
+
+ Forward route :
+ (START)->(HERE)
+ Transitions :
+ 0:[\t ] -> 1
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 10:[<] -> 4
+ 12:[@] -> 5
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 2
+
+DFA state 1
+ NFA states :
+ in
+ email#1.in
+ angled_email#2.in
+ before_date
+ date#3.in
+ date#3.before_weekday
+ date#4.in
+ date#4.before_weekday
+
+ Forward route : (from state 0)
+ (START)->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 1
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 10:[<] -> 4
+ 12:[@] -> 5
+ 13:[A-Z] -> 6
+ 16:[_] -> 2
+ 17:[a-z] -> 2
+
+DFA state 2
+ NFA states :
+ email#1.in
+ email#1.before_at
+ email#1.out
+ before_date
+ date#3.in
+ date#4.in
+
+ Forward route : (from state 0)
+ (START)->3:[!#-'*/=?^`{-~]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 12:[@] -> 8
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 2
+
+DFA state 3
+ NFA states :
+ email#1.quoted_before_at
+
+ Forward route : (from state 0)
+ (START)->4:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 3
+ 3:[!#-'*/=?^`{-~] -> 3
+ 4:["] -> 9
+ 5:[+] -> 3
+ 6:[\055] -> 3
+ 7:[.] -> 3
+ 8:[0-9] -> 3
+ 9:[:] -> 3
+ 10:[<] -> 3
+ 11:[>] -> 3
+ 12:[@] -> 3
+ 13:[A-Z] -> 3
+ 16:[_] -> 3
+ 17:[a-z] -> 3
+
+DFA state 4
+ NFA states :
+ angled_email#2.in_angles
+ angled_email#2.email#1.in
+
+ Forward route : (from state 0)
+ (START)->10:[<]->(HERE)
+ Transitions :
+ 3:[!#-'*/=?^`{-~] -> 10
+ 4:["] -> 11
+ 5:[+] -> 10
+ 6:[\055] -> 10
+ 7:[.] -> 10
+ 8:[0-9] -> 10
+ 12:[@] -> 12
+ 13:[A-Z] -> 10
+ 16:[_] -> 10
+ 17:[a-z] -> 10
+
+DFA state 5
+ NFA states :
+ email#1.domain_route
+
+ Forward route : (from state 0)
+ (START)->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 5
+ 7:[.] -> 5
+ 8:[0-9] -> 5
+ 9:[:] -> 13
+ 13:[A-Z] -> 5
+ 16:[_] -> 5
+ 17:[a-z] -> 5
+
+DFA state 6
+ NFA states :
+ email#1.in
+ email#1.before_at
+ email#1.out
+ before_date
+ date#3.in
+ date#3.#1
+ date#4.in
+ date#4.#1
+
+ Forward route : (from state 1)
+ (START)->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 12:[@] -> 8
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 14
+
+DFA state 7
+ NFA states :
+ date#3.in
+ date#3.before_weekday
+ date#4.in
+ date#4.before_weekday
+
+ Forward route : (from state 2)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 13:[A-Z] -> 15
+
+DFA state 8
+ NFA states :
+ email#1.domain_route
+ email#1.start_of_domain
+
+ Forward route : (from state 2)
+ (START)->3:[!#-'*/=?^`{-~]->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 16
+ 7:[.] -> 16
+ 8:[0-9] -> 16
+ 9:[:] -> 13
+ 13:[A-Z] -> 16
+ 14:[[] -> 17
+ 16:[_] -> 16
+ 17:[a-z] -> 16
+
+DFA state 9
+ NFA states :
+ email#1.before_at
+ email#1.out
+ before_date
+ date#3.in
+ date#4.in
+
+ Forward route : (from state 3)
+ (START)->4:["]->4:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 3:[!#-'*/=?^`{-~] -> 9
+ 4:["] -> 3
+ 5:[+] -> 9
+ 6:[\055] -> 9
+ 7:[.] -> 9
+ 8:[0-9] -> 9
+ 12:[@] -> 18
+ 13:[A-Z] -> 9
+ 16:[_] -> 9
+ 17:[a-z] -> 9
+
+DFA state 10
+ NFA states :
+ angled_email#2.email#1.in
+ angled_email#2.email#1.before_at
+ angled_email#2.email#1.out
+ angled_email#2.before_gt
+
+ Forward route : (from state 4)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->(HERE)
+ Transitions :
+ 3:[!#-'*/=?^`{-~] -> 10
+ 4:["] -> 11
+ 5:[+] -> 10
+ 6:[\055] -> 10
+ 7:[.] -> 10
+ 8:[0-9] -> 10
+ 11:[>] -> 19
+ 12:[@] -> 20
+ 13:[A-Z] -> 10
+ 16:[_] -> 10
+ 17:[a-z] -> 10
+
+DFA state 11
+ NFA states :
+ angled_email#2.email#1.quoted_before_at
+
+ Forward route : (from state 4)
+ (START)->10:[<]->4:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 11
+ 3:[!#-'*/=?^`{-~] -> 11
+ 4:["] -> 21
+ 5:[+] -> 11
+ 6:[\055] -> 11
+ 7:[.] -> 11
+ 8:[0-9] -> 11
+ 9:[:] -> 11
+ 10:[<] -> 11
+ 11:[>] -> 11
+ 12:[@] -> 11
+ 13:[A-Z] -> 11
+ 16:[_] -> 11
+ 17:[a-z] -> 11
+
+DFA state 12
+ NFA states :
+ angled_email#2.email#1.domain_route
+
+ Forward route : (from state 4)
+ (START)->10:[<]->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 12
+ 7:[.] -> 12
+ 8:[0-9] -> 12
+ 9:[:] -> 22
+ 13:[A-Z] -> 12
+ 16:[_] -> 12
+ 17:[a-z] -> 12
+
+DFA state 13
+ NFA states :
+ email#1.in
+
+ Forward route : (from state 5)
+ (START)->12:[@]->9:[:]->(HERE)
+ Transitions :
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 12:[@] -> 5
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 2
+
+DFA state 14
+ NFA states :
+ email#1.in
+ email#1.before_at
+ email#1.out
+ before_date
+ date#3.in
+ date#3.#2
+ date#4.in
+ date#4.#2
+
+ Forward route : (from state 6)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 12:[@] -> 8
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 23
+
+DFA state 15
+ NFA states :
+ date#3.#1
+ date#4.#1
+
+ Forward route : (from state 7)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 24
+
+DFA state 16
+ NFA states :
+ email#1.domain_route
+ email#1.after_at
+
+ Forward route : (from state 8)
+ (START)->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 25
+ 7:[.] -> 25
+ 8:[0-9] -> 25
+ 9:[:] -> 13
+ 13:[A-Z] -> 25
+ 16:[_] -> 25
+ 17:[a-z] -> 25
+
+DFA state 17
+ NFA states :
+ email#1.dotted_quad
+
+ Forward route : (from state 8)
+ (START)->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->(HERE)
+ Transitions :
+ 7:[.] -> 17
+ 8:[0-9] -> 17
+ 15:[]] -> 26
+
+DFA state 18
+ NFA states :
+ email#1.start_of_domain
+
+ Forward route : (from state 9)
+ (START)->4:["]->4:["]->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 27
+ 7:[.] -> 27
+ 8:[0-9] -> 27
+ 13:[A-Z] -> 27
+ 14:[[] -> 17
+ 16:[_] -> 27
+ 17:[a-z] -> 27
+
+DFA state 19
+ NFA states :
+ angled_email#2.out
+ before_date
+ date#3.in
+ date#4.in
+
+ Forward route : (from state 10)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->11:[>]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+
+DFA state 20
+ NFA states :
+ angled_email#2.email#1.domain_route
+ angled_email#2.email#1.start_of_domain
+
+ Forward route : (from state 10)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 28
+ 7:[.] -> 28
+ 8:[0-9] -> 28
+ 9:[:] -> 22
+ 13:[A-Z] -> 28
+ 14:[[] -> 29
+ 16:[_] -> 28
+ 17:[a-z] -> 28
+
+DFA state 21
+ NFA states :
+ angled_email#2.email#1.before_at
+ angled_email#2.email#1.out
+ angled_email#2.before_gt
+
+ Forward route : (from state 11)
+ (START)->10:[<]->4:["]->4:["]->(HERE)
+ Transitions :
+ 3:[!#-'*/=?^`{-~] -> 21
+ 4:["] -> 11
+ 5:[+] -> 21
+ 6:[\055] -> 21
+ 7:[.] -> 21
+ 8:[0-9] -> 21
+ 11:[>] -> 19
+ 12:[@] -> 30
+ 13:[A-Z] -> 21
+ 16:[_] -> 21
+ 17:[a-z] -> 21
+
+DFA state 22
+ NFA states :
+ angled_email#2.email#1.in
+
+ Forward route : (from state 12)
+ (START)->10:[<]->12:[@]->9:[:]->(HERE)
+ Transitions :
+ 3:[!#-'*/=?^`{-~] -> 10
+ 4:["] -> 11
+ 5:[+] -> 10
+ 6:[\055] -> 10
+ 7:[.] -> 10
+ 8:[0-9] -> 10
+ 12:[@] -> 12
+ 13:[A-Z] -> 10
+ 16:[_] -> 10
+ 17:[a-z] -> 10
+
+DFA state 23
+ NFA states :
+ email#1.in
+ email#1.before_at
+ email#1.out
+ before_date
+ date#3.in
+ date#3.#3
+ date#4.in
+ date#4.#3
+
+ Forward route : (from state 14)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 31
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 12:[@] -> 8
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 2
+
+DFA state 24
+ NFA states :
+ date#3.#2
+ date#4.#2
+
+ Forward route : (from state 15)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 32
+
+DFA state 25
+ NFA states :
+ email#1.domain_route
+ email#1.after_at
+ email#1.out
+ before_date
+ date#3.in
+ date#4.in
+
+ Forward route : (from state 16)
+ (START)->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->6:[\055]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 6:[\055] -> 25
+ 7:[.] -> 25
+ 8:[0-9] -> 25
+ 9:[:] -> 13
+ 13:[A-Z] -> 25
+ 16:[_] -> 25
+ 17:[a-z] -> 25
+
+DFA state 26
+ NFA states :
+ email#1.out
+ before_date
+ date#3.in
+ date#4.in
+
+ Forward route : (from state 17)
+ (START)->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->15:[]]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+
+DFA state 27
+ NFA states :
+ email#1.after_at
+
+ Forward route : (from state 18)
+ (START)->4:["]->4:["]->12:[@]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 33
+ 7:[.] -> 33
+ 8:[0-9] -> 33
+ 13:[A-Z] -> 33
+ 16:[_] -> 33
+ 17:[a-z] -> 33
+
+DFA state 28
+ NFA states :
+ angled_email#2.email#1.domain_route
+ angled_email#2.email#1.after_at
+
+ Forward route : (from state 20)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 34
+ 7:[.] -> 34
+ 8:[0-9] -> 34
+ 9:[:] -> 22
+ 13:[A-Z] -> 34
+ 16:[_] -> 34
+ 17:[a-z] -> 34
+
+DFA state 29
+ NFA states :
+ angled_email#2.email#1.dotted_quad
+
+ Forward route : (from state 20)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->(HERE)
+ Transitions :
+ 7:[.] -> 29
+ 8:[0-9] -> 29
+ 15:[]] -> 35
+
+DFA state 30
+ NFA states :
+ angled_email#2.email#1.start_of_domain
+
+ Forward route : (from state 21)
+ (START)->10:[<]->4:["]->4:["]->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 36
+ 7:[.] -> 36
+ 8:[0-9] -> 36
+ 13:[A-Z] -> 36
+ 14:[[] -> 29
+ 16:[_] -> 36
+ 17:[a-z] -> 36
+
+DFA state 31
+ NFA states :
+ date#3.in
+ date#3.before_weekday
+ date#3.after_weekday
+ date#4.in
+ date#4.before_weekday
+ date#4.after_weekday
+
+ Forward route : (from state 23)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 31
+ 13:[A-Z] -> 37
+
+DFA state 32
+ NFA states :
+ date#3.#3
+ date#4.#3
+
+ Forward route : (from state 24)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 38
+
+DFA state 33
+ NFA states :
+ email#1.after_at
+ email#1.out
+ before_date
+ date#3.in
+ date#4.in
+
+ Forward route : (from state 27)
+ (START)->4:["]->4:["]->12:[@]->6:[\055]->6:[\055]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 6:[\055] -> 33
+ 7:[.] -> 33
+ 8:[0-9] -> 33
+ 13:[A-Z] -> 33
+ 16:[_] -> 33
+ 17:[a-z] -> 33
+
+DFA state 34
+ NFA states :
+ angled_email#2.email#1.domain_route
+ angled_email#2.email#1.after_at
+ angled_email#2.email#1.out
+ angled_email#2.before_gt
+
+ Forward route : (from state 28)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 34
+ 7:[.] -> 34
+ 8:[0-9] -> 34
+ 9:[:] -> 22
+ 11:[>] -> 19
+ 13:[A-Z] -> 34
+ 16:[_] -> 34
+ 17:[a-z] -> 34
+
+DFA state 35
+ NFA states :
+ angled_email#2.email#1.out
+ angled_email#2.before_gt
+
+ Forward route : (from state 29)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->15:[]]->(HERE)
+ Transitions :
+ 11:[>] -> 19
+
+DFA state 36
+ NFA states :
+ angled_email#2.email#1.after_at
+
+ Forward route : (from state 30)
+ (START)->10:[<]->4:["]->4:["]->12:[@]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 39
+ 7:[.] -> 39
+ 8:[0-9] -> 39
+ 13:[A-Z] -> 39
+ 16:[_] -> 39
+ 17:[a-z] -> 39
+
+DFA state 37
+ NFA states :
+ date#3.#1
+ date#3.#4
+ date#4.#1
+ date#4.#4
+
+ Forward route : (from state 31)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 40
+
+DFA state 38
+ NFA states :
+ date#3.after_weekday
+ date#4.after_weekday
+
+ Forward route : (from state 32)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 38
+ 13:[A-Z] -> 41
+
+DFA state 39
+ NFA states :
+ angled_email#2.email#1.after_at
+ angled_email#2.email#1.out
+ angled_email#2.before_gt
+
+ Forward route : (from state 36)
+ (START)->10:[<]->4:["]->4:["]->12:[@]->6:[\055]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 39
+ 7:[.] -> 39
+ 8:[0-9] -> 39
+ 11:[>] -> 19
+ 13:[A-Z] -> 39
+ 16:[_] -> 39
+ 17:[a-z] -> 39
+
+DFA state 40
+ NFA states :
+ date#3.#2
+ date#3.#5
+ date#4.#2
+ date#4.#5
+
+ Forward route : (from state 37)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 42
+
+DFA state 41
+ NFA states :
+ date#3.#4
+ date#4.#4
+
+ Forward route : (from state 38)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 43
+
+DFA state 42
+ NFA states :
+ date#3.#3
+ date#3.#6
+ date#4.#3
+ date#4.#6
+
+ Forward route : (from state 40)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 44
+
+DFA state 43
+ NFA states :
+ date#3.#5
+ date#4.#5
+
+ Forward route : (from state 41)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 45
+
+DFA state 44
+ NFA states :
+ date#3.after_weekday
+ date#3.after_month
+ date#4.after_weekday
+ date#4.after_month
+
+ Forward route : (from state 42)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 44
+ 8:[0-9] -> 46
+ 13:[A-Z] -> 41
+
+DFA state 45
+ NFA states :
+ date#3.#6
+ date#4.#6
+
+ Forward route : (from state 43)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 47
+
+DFA state 46
+ NFA states :
+ date#3.#7
+ date#3.#8
+ date#4.#7
+ date#4.#8
+
+ Forward route : (from state 44)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 48
+ 8:[0-9] -> 49
+
+DFA state 47
+ NFA states :
+ date#3.after_month
+ date#4.after_month
+
+ Forward route : (from state 45)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 47
+ 8:[0-9] -> 46
+
+DFA state 48
+ NFA states :
+ date#3.after_day
+ date#4.after_day
+
+ Forward route : (from state 46)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 48
+ 8:[0-9] -> 50
+
+DFA state 49
+ NFA states :
+ date#3.#9
+ date#4.#9
+
+ Forward route : (from state 46)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 48
+
+DFA state 50
+ NFA states :
+ date#3.#10
+ date#3.#18
+ date#4.#10
+ date#4.#18
+
+ Forward route : (from state 48)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 51
+
+DFA state 51
+ NFA states :
+ date#3.#11
+ date#3.#19
+ date#4.#11
+ date#4.#19
+
+ Forward route : (from state 50)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 9:[:] -> 52
+
+DFA state 52
+ NFA states :
+ date#3.#12
+ date#3.#20
+ date#4.#12
+ date#4.#20
+
+ Forward route : (from state 51)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->(HERE)
+ Transitions :
+ 8:[0-9] -> 53
+
+DFA state 53
+ NFA states :
+ date#3.#13
+ date#3.#21
+ date#4.#13
+ date#4.#21
+
+ Forward route : (from state 52)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 54
+
+DFA state 54
+ NFA states :
+ date#3.#14
+ date#3.#22
+ date#4.#14
+ date#4.#22
+
+ Forward route : (from state 53)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 55
+ 9:[:] -> 56
+
+DFA state 55
+ NFA states :
+ date#3.after_time
+ date#3.zone#1.in
+ date#3.zone#2.in
+ date#3.after_timezone
+ date#4.after_time
+ date#4.zone#1.in
+ date#4.zone#2.in
+ date#4.after_timezone
+
+ Forward route : (from state 54)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 55
+ 5:[+] -> 57
+ 6:[\055] -> 57
+ 8:[0-9] -> 58
+ 13:[A-Z] -> 59
+
+DFA state 56
+ NFA states :
+ date#3.#15
+ date#4.#15
+
+ Forward route : (from state 54)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->(HERE)
+ Transitions :
+ 8:[0-9] -> 60
+
+DFA state 57
+ NFA states :
+ date#3.zone#1.zone2
+ date#3.zone#2.zone2
+ date#4.zone#1.zone2
+ date#4.zone#2.zone2
+
+ Forward route : (from state 55)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->5:[+]->(HERE)
+ Transitions :
+ 8:[0-9] -> 59
+ 13:[A-Z] -> 59
+ 17:[a-z] -> 59
+
+DFA state 58
+ NFA states :
+ date#3.#25
+ date#3.#30
+ date#4.#25
+ date#4.#30
+
+ Forward route : (from state 55)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 61
+
+DFA state 59
+ NFA states :
+ date#3.#23
+ date#3.zone#1.zone2
+ date#3.zone#1.out
+ date#3.#24
+ date#3.zone#2.zone2
+ date#3.zone#2.out
+ date#4.#23
+ date#4.zone#1.zone2
+ date#4.zone#1.out
+ date#4.#24
+ date#4.zone#2.zone2
+ date#4.zone#2.out
+
+ Forward route : (from state 55)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 62
+ 8:[0-9] -> 59
+ 13:[A-Z] -> 59
+ 17:[a-z] -> 59
+
+DFA state 60
+ NFA states :
+ date#3.#16
+ date#4.#16
+
+ Forward route : (from state 56)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 63
+
+DFA state 61
+ NFA states :
+ date#3.#26
+ date#3.#31
+ date#4.#26
+ date#4.#31
+
+ Forward route : (from state 58)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 64
+
+DFA state 62
+ NFA states :
+ date#3.after_timezone_1
+ date#3.zone#6.in
+ date#3.after_timezone
+ date#4.after_timezone_1
+ date#4.zone#6.in
+ date#4.after_timezone
+
+ Forward route : (from state 59)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 62
+ 5:[+] -> 65
+ 6:[\055] -> 65
+ 8:[0-9] -> 66
+ 13:[A-Z] -> 67
+
+DFA state 63
+ NFA states :
+ date#3.#17
+ date#4.#17
+
+ Forward route : (from state 60)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 55
+
+DFA state 64
+ NFA states :
+ date#3.#27
+ date#3.#32
+ date#4.#27
+ date#4.#32
+
+ Forward route : (from state 61)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 68
+
+DFA state 65
+ NFA states :
+ date#3.zone#6.zone2
+ date#4.zone#6.zone2
+
+ Forward route : (from state 62)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->5:[+]->(HERE)
+ Transitions :
+ 8:[0-9] -> 67
+ 13:[A-Z] -> 67
+ 17:[a-z] -> 67
+
+DFA state 66
+ NFA states :
+ date#3.#30
+ date#4.#30
+
+ Forward route : (from state 62)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 69
+
+DFA state 67
+ NFA states :
+ date#3.#29
+ date#3.zone#6.zone2
+ date#3.zone#6.out
+ date#4.#29
+ date#4.zone#6.zone2
+ date#4.zone#6.out
+
+ Forward route : (from state 62)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 70
+ 8:[0-9] -> 67
+ 13:[A-Z] -> 67
+ 17:[a-z] -> 67
+
+DFA state 68
+ NFA states :
+ #1
+ date#3.after_year_before_zone
+ date#3.zone#3.in
+ date#3.zone#4.in
+ date#3.after_year
+ date#3.out
+ #3
+ date#4.after_year_before_zone
+ date#4.zone#3.in
+ date#4.zone#4.in
+ date#4.after_year
+ date#4.out
+
+ Forward route : (from state 64)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 68
+ 1:[\n] -> 71
+ 2:[\r] -> 72
+ 5:[+] -> 73
+ 6:[\055] -> 73
+ 13:[A-Z] -> 74
+
+DFA state 69
+ NFA states :
+ date#3.#31
+ date#4.#31
+
+ Forward route : (from state 66)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 75
+
+DFA state 70
+ NFA states :
+ date#3.after_timezone
+ date#4.after_timezone
+
+ Forward route : (from state 67)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->13:[A-Z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 70
+ 8:[0-9] -> 66
+
+DFA state 71
+ NFA states :
+ #2
+
+ Forward route : (from state 68)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->1:[\n]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ FROMCHECK_PASS
+ Attributes for <(DEFAULT)> : FROMCHECK_PASS
+
+DFA state 72
+ NFA states :
+ #4
+
+ Forward route : (from state 68)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->2:[\r]->(HERE)
+ Transitions :
+ 1:[\n] -> 76
+
+DFA state 73
+ NFA states :
+ date#3.zone#3.zone2
+ date#3.zone#4.zone2
+ date#4.zone#3.zone2
+ date#4.zone#4.zone2
+
+ Forward route : (from state 68)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->5:[+]->(HERE)
+ Transitions :
+ 8:[0-9] -> 74
+ 13:[A-Z] -> 74
+ 17:[a-z] -> 74
+
+DFA state 74
+ NFA states :
+ #1
+ date#3.zone#3.zone2
+ date#3.zone#3.out
+ date#3.#28
+ date#3.zone#4.zone2
+ date#3.zone#4.out
+ date#3.after_timezone_after_year
+ date#3.out
+ #3
+ date#4.zone#3.zone2
+ date#4.zone#3.out
+ date#4.#28
+ date#4.zone#4.zone2
+ date#4.zone#4.out
+ date#4.after_timezone_after_year
+ date#4.out
+
+ Forward route : (from state 68)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 77
+ 1:[\n] -> 71
+ 2:[\r] -> 72
+ 8:[0-9] -> 74
+ 13:[A-Z] -> 74
+ 17:[a-z] -> 74
+
+DFA state 75
+ NFA states :
+ date#3.#32
+ date#4.#32
+
+ Forward route : (from state 69)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 78
+
+DFA state 76
+ NFA states :
+ #5
+
+ Forward route : (from state 72)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->2:[\r]->1:[\n]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ FROMCHECK_PASS
+ Attributes for <(DEFAULT)> : FROMCHECK_PASS
+
+DFA state 77
+ NFA states :
+ #1
+ date#3.after_timezone_after_year_1
+ date#3.zone#5.in
+ date#3.after_timezone_after_year
+ date#3.out
+ #3
+ date#4.after_timezone_after_year_1
+ date#4.zone#5.in
+ date#4.after_timezone_after_year
+ date#4.out
+
+ Forward route : (from state 74)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 77
+ 1:[\n] -> 71
+ 2:[\r] -> 72
+ 5:[+] -> 79
+ 6:[\055] -> 79
+ 13:[A-Z] -> 80
+
+DFA state 78
+ NFA states :
+ #1
+ date#3.after_year
+ date#3.out
+ #3
+ date#4.after_year
+ date#4.out
+
+ Forward route : (from state 75)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 78
+ 1:[\n] -> 71
+ 2:[\r] -> 72
+
+DFA state 79
+ NFA states :
+ date#3.zone#5.zone2
+ date#4.zone#5.zone2
+
+ Forward route : (from state 77)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->5:[+]->(HERE)
+ Transitions :
+ 8:[0-9] -> 80
+ 13:[A-Z] -> 80
+ 17:[a-z] -> 80
+
+DFA state 80
+ NFA states :
+ #1
+ date#3.zone#5.zone2
+ date#3.zone#5.out
+ date#3.after_timezone_after_year
+ date#3.out
+ #3
+ date#4.zone#5.zone2
+ date#4.zone#5.out
+ date#4.after_timezone_after_year
+ date#4.out
+
+ Forward route : (from state 77)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 81
+ 1:[\n] -> 71
+ 2:[\r] -> 72
+ 8:[0-9] -> 80
+ 13:[A-Z] -> 80
+ 17:[a-z] -> 80
+
+DFA state 81
+ NFA states :
+ #1
+ date#3.after_timezone_after_year
+ date#3.out
+ #3
+ date#4.after_timezone_after_year
+ date#4.out
+
+ Forward route : (from state 80)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->13:[A-Z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 81
+ 1:[\n] -> 71
+ 2:[\r] -> 72
+
+
+Entry states in DFA:
+Entry <(ONLY ENTRY)> : 0
+Searching for dead states...
+(no dead states found)
+
+-----------------------------
+------ COMPRESSING DFA ------
+-----------------------------
+Old DFA state 0 becomes 0
+Old DFA state 1 becomes 1
+Old DFA state 2 becomes 2
+Old DFA state 3 becomes 3
+Old DFA state 4 becomes 4
+Old DFA state 5 becomes 5
+Old DFA state 6 becomes 6
+Old DFA state 7 becomes 7
+Old DFA state 8 becomes 8
+Old DFA state 9 becomes 9
+Old DFA state 10 becomes 10
+Old DFA state 11 becomes 11
+Old DFA state 12 becomes 12
+Old DFA state 13 becomes 13
+Old DFA state 14 becomes 14
+Old DFA state 15 becomes 15
+Old DFA state 16 becomes 16
+Old DFA state 17 becomes 17
+Old DFA state 18 becomes 18
+Old DFA state 19 becomes 19
+Old DFA state 20 becomes 20
+Old DFA state 21 becomes 21
+Old DFA state 22 becomes 4 (formerly 4)
+Old DFA state 23 becomes 22
+Old DFA state 24 becomes 23
+Old DFA state 25 becomes 24
+Old DFA state 26 becomes 19 (formerly 19)
+Old DFA state 27 becomes 25
+Old DFA state 28 becomes 26
+Old DFA state 29 becomes 27
+Old DFA state 30 becomes 28
+Old DFA state 31 becomes 29
+Old DFA state 32 becomes 30
+Old DFA state 33 becomes 31
+Old DFA state 34 becomes 32
+Old DFA state 35 becomes 33
+Old DFA state 36 becomes 34
+Old DFA state 37 becomes 35
+Old DFA state 38 becomes 36
+Old DFA state 39 becomes 37
+Old DFA state 40 becomes 38
+Old DFA state 41 becomes 39
+Old DFA state 42 becomes 40
+Old DFA state 43 becomes 41
+Old DFA state 44 becomes 42
+Old DFA state 45 becomes 43
+Old DFA state 46 becomes 44
+Old DFA state 47 becomes 45
+Old DFA state 48 becomes 46
+Old DFA state 49 becomes 47
+Old DFA state 50 becomes 48
+Old DFA state 51 becomes 49
+Old DFA state 52 becomes 50
+Old DFA state 53 becomes 51
+Old DFA state 54 becomes 52
+Old DFA state 55 becomes 53
+Old DFA state 56 becomes 54
+Old DFA state 57 becomes 55
+Old DFA state 58 becomes 56
+Old DFA state 59 becomes 57
+Old DFA state 60 becomes 58
+Old DFA state 61 becomes 59
+Old DFA state 62 becomes 60
+Old DFA state 63 becomes 61
+Old DFA state 64 becomes 62
+Old DFA state 65 becomes 63
+Old DFA state 66 becomes 64
+Old DFA state 67 becomes 65
+Old DFA state 68 becomes 66
+Old DFA state 69 becomes 67
+Old DFA state 70 becomes 68
+Old DFA state 71 becomes 69
+Old DFA state 72 becomes 70
+Old DFA state 73 becomes 71
+Old DFA state 74 becomes 72
+Old DFA state 75 becomes 73
+Old DFA state 76 becomes 69 (formerly 71)
+Old DFA state 77 becomes 74
+Old DFA state 78 becomes 75
+Old DFA state 79 becomes 76
+Old DFA state 80 becomes 77
+Old DFA state 81 becomes 75 (formerly 78)
+Entry <(ONLY ENTRY)>, formerly state 0, now state 0
+-------------------------------
+DFA structure after compression
+-------------------------------
+DFA state 0
+ Forward route :
+ (START)->(HERE)
+ Transitions :
+ 0:[\t ] -> 1
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 10:[<] -> 4
+ 12:[@] -> 5
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 2
+
+DFA state 1
+ Forward route : (from state 0)
+ (START)->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 1
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 10:[<] -> 4
+ 12:[@] -> 5
+ 13:[A-Z] -> 6
+ 16:[_] -> 2
+ 17:[a-z] -> 2
+ Use state 0 as basis (1 fixups)
+
+DFA state 2
+ Forward route : (from state 0)
+ (START)->3:[!#-'*/=?^`{-~]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 12:[@] -> 8
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 2
+ Use state 0 as basis (3 fixups)
+
+DFA state 3
+ Forward route : (from state 0)
+ (START)->4:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 3
+ 3:[!#-'*/=?^`{-~] -> 3
+ 4:["] -> 9
+ 5:[+] -> 3
+ 6:[\055] -> 3
+ 7:[.] -> 3
+ 8:[0-9] -> 3
+ 9:[:] -> 3
+ 10:[<] -> 3
+ 11:[>] -> 3
+ 12:[@] -> 3
+ 13:[A-Z] -> 3
+ 16:[_] -> 3
+ 17:[a-z] -> 3
+
+DFA state 4
+ Forward route : (from state 0)
+ (START)->10:[<]->(HERE)
+ Transitions :
+ 3:[!#-'*/=?^`{-~] -> 10
+ 4:["] -> 11
+ 5:[+] -> 10
+ 6:[\055] -> 10
+ 7:[.] -> 10
+ 8:[0-9] -> 10
+ 12:[@] -> 12
+ 13:[A-Z] -> 10
+ 16:[_] -> 10
+ 17:[a-z] -> 10
+
+DFA state 5
+ Forward route : (from state 0)
+ (START)->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 5
+ 7:[.] -> 5
+ 8:[0-9] -> 5
+ 9:[:] -> 13
+ 13:[A-Z] -> 5
+ 16:[_] -> 5
+ 17:[a-z] -> 5
+
+DFA state 6
+ Forward route : (from state 1)
+ (START)->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 12:[@] -> 8
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 14
+ Use state 0 as basis (4 fixups)
+
+DFA state 7
+ Forward route : (from state 2)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 13:[A-Z] -> 15
+
+DFA state 8
+ Forward route : (from state 2)
+ (START)->3:[!#-'*/=?^`{-~]->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 16
+ 7:[.] -> 16
+ 8:[0-9] -> 16
+ 9:[:] -> 13
+ 13:[A-Z] -> 16
+ 14:[[] -> 17
+ 16:[_] -> 16
+ 17:[a-z] -> 16
+
+DFA state 9
+ Forward route : (from state 3)
+ (START)->4:["]->4:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 3:[!#-'*/=?^`{-~] -> 9
+ 4:["] -> 3
+ 5:[+] -> 9
+ 6:[\055] -> 9
+ 7:[.] -> 9
+ 8:[0-9] -> 9
+ 12:[@] -> 18
+ 13:[A-Z] -> 9
+ 16:[_] -> 9
+ 17:[a-z] -> 9
+
+DFA state 10
+ Forward route : (from state 4)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->(HERE)
+ Transitions :
+ 3:[!#-'*/=?^`{-~] -> 10
+ 4:["] -> 11
+ 5:[+] -> 10
+ 6:[\055] -> 10
+ 7:[.] -> 10
+ 8:[0-9] -> 10
+ 11:[>] -> 19
+ 12:[@] -> 20
+ 13:[A-Z] -> 10
+ 16:[_] -> 10
+ 17:[a-z] -> 10
+ Use state 4 as basis (2 fixups)
+
+DFA state 11
+ Forward route : (from state 4)
+ (START)->10:[<]->4:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 11
+ 3:[!#-'*/=?^`{-~] -> 11
+ 4:["] -> 21
+ 5:[+] -> 11
+ 6:[\055] -> 11
+ 7:[.] -> 11
+ 8:[0-9] -> 11
+ 9:[:] -> 11
+ 10:[<] -> 11
+ 11:[>] -> 11
+ 12:[@] -> 11
+ 13:[A-Z] -> 11
+ 16:[_] -> 11
+ 17:[a-z] -> 11
+
+DFA state 12
+ Forward route : (from state 4)
+ (START)->10:[<]->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 12
+ 7:[.] -> 12
+ 8:[0-9] -> 12
+ 9:[:] -> 4
+ 13:[A-Z] -> 12
+ 16:[_] -> 12
+ 17:[a-z] -> 12
+
+DFA state 13
+ Forward route : (from state 5)
+ (START)->12:[@]->9:[:]->(HERE)
+ Transitions :
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 12:[@] -> 5
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 2
+ Use state 0 as basis (2 fixups)
+
+DFA state 14
+ Forward route : (from state 6)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 12:[@] -> 8
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 22
+ Use state 0 as basis (4 fixups)
+
+DFA state 15
+ Forward route : (from state 7)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 23
+
+DFA state 16
+ Forward route : (from state 8)
+ (START)->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 24
+ 7:[.] -> 24
+ 8:[0-9] -> 24
+ 9:[:] -> 13
+ 13:[A-Z] -> 24
+ 16:[_] -> 24
+ 17:[a-z] -> 24
+
+DFA state 17
+ Forward route : (from state 8)
+ (START)->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->(HERE)
+ Transitions :
+ 7:[.] -> 17
+ 8:[0-9] -> 17
+ 15:[]] -> 19
+
+DFA state 18
+ Forward route : (from state 9)
+ (START)->4:["]->4:["]->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 25
+ 7:[.] -> 25
+ 8:[0-9] -> 25
+ 13:[A-Z] -> 25
+ 14:[[] -> 17
+ 16:[_] -> 25
+ 17:[a-z] -> 25
+
+DFA state 19
+ Forward route : (from state 10)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->11:[>]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+
+DFA state 20
+ Forward route : (from state 10)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 26
+ 7:[.] -> 26
+ 8:[0-9] -> 26
+ 9:[:] -> 4
+ 13:[A-Z] -> 26
+ 14:[[] -> 27
+ 16:[_] -> 26
+ 17:[a-z] -> 26
+
+DFA state 21
+ Forward route : (from state 11)
+ (START)->10:[<]->4:["]->4:["]->(HERE)
+ Transitions :
+ 3:[!#-'*/=?^`{-~] -> 21
+ 4:["] -> 11
+ 5:[+] -> 21
+ 6:[\055] -> 21
+ 7:[.] -> 21
+ 8:[0-9] -> 21
+ 11:[>] -> 19
+ 12:[@] -> 28
+ 13:[A-Z] -> 21
+ 16:[_] -> 21
+ 17:[a-z] -> 21
+
+DFA state 22
+ Forward route : (from state 14)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 29
+ 3:[!#-'*/=?^`{-~] -> 2
+ 4:["] -> 3
+ 5:[+] -> 2
+ 6:[\055] -> 2
+ 7:[.] -> 2
+ 8:[0-9] -> 2
+ 12:[@] -> 8
+ 13:[A-Z] -> 2
+ 16:[_] -> 2
+ 17:[a-z] -> 2
+ Use state 0 as basis (3 fixups)
+
+DFA state 23
+ Forward route : (from state 15)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 30
+
+DFA state 24
+ Forward route : (from state 16)
+ (START)->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->6:[\055]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 6:[\055] -> 24
+ 7:[.] -> 24
+ 8:[0-9] -> 24
+ 9:[:] -> 13
+ 13:[A-Z] -> 24
+ 16:[_] -> 24
+ 17:[a-z] -> 24
+ Use state 16 as basis (1 fixups)
+
+DFA state 25
+ Forward route : (from state 18)
+ (START)->4:["]->4:["]->12:[@]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 31
+ 7:[.] -> 31
+ 8:[0-9] -> 31
+ 13:[A-Z] -> 31
+ 16:[_] -> 31
+ 17:[a-z] -> 31
+
+DFA state 26
+ Forward route : (from state 20)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 32
+ 7:[.] -> 32
+ 8:[0-9] -> 32
+ 9:[:] -> 4
+ 13:[A-Z] -> 32
+ 16:[_] -> 32
+ 17:[a-z] -> 32
+
+DFA state 27
+ Forward route : (from state 20)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->(HERE)
+ Transitions :
+ 7:[.] -> 27
+ 8:[0-9] -> 27
+ 15:[]] -> 33
+
+DFA state 28
+ Forward route : (from state 21)
+ (START)->10:[<]->4:["]->4:["]->12:[@]->(HERE)
+ Transitions :
+ 6:[\055] -> 34
+ 7:[.] -> 34
+ 8:[0-9] -> 34
+ 13:[A-Z] -> 34
+ 14:[[] -> 27
+ 16:[_] -> 34
+ 17:[a-z] -> 34
+
+DFA state 29
+ Forward route : (from state 22)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 29
+ 13:[A-Z] -> 35
+
+DFA state 30
+ Forward route : (from state 23)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 36
+
+DFA state 31
+ Forward route : (from state 25)
+ (START)->4:["]->4:["]->12:[@]->6:[\055]->6:[\055]->(HERE)
+ Transitions :
+ 0:[\t ] -> 7
+ 6:[\055] -> 31
+ 7:[.] -> 31
+ 8:[0-9] -> 31
+ 13:[A-Z] -> 31
+ 16:[_] -> 31
+ 17:[a-z] -> 31
+ Use state 25 as basis (1 fixups)
+
+DFA state 32
+ Forward route : (from state 26)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 32
+ 7:[.] -> 32
+ 8:[0-9] -> 32
+ 9:[:] -> 4
+ 11:[>] -> 19
+ 13:[A-Z] -> 32
+ 16:[_] -> 32
+ 17:[a-z] -> 32
+ Use state 26 as basis (1 fixups)
+
+DFA state 33
+ Forward route : (from state 27)
+ (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->15:[]]->(HERE)
+ Transitions :
+ 11:[>] -> 19
+
+DFA state 34
+ Forward route : (from state 28)
+ (START)->10:[<]->4:["]->4:["]->12:[@]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 37
+ 7:[.] -> 37
+ 8:[0-9] -> 37
+ 13:[A-Z] -> 37
+ 16:[_] -> 37
+ 17:[a-z] -> 37
+
+DFA state 35
+ Forward route : (from state 29)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 38
+
+DFA state 36
+ Forward route : (from state 30)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 36
+ 13:[A-Z] -> 39
+
+DFA state 37
+ Forward route : (from state 34)
+ (START)->10:[<]->4:["]->4:["]->12:[@]->6:[\055]->6:[\055]->(HERE)
+ Transitions :
+ 6:[\055] -> 37
+ 7:[.] -> 37
+ 8:[0-9] -> 37
+ 11:[>] -> 19
+ 13:[A-Z] -> 37
+ 16:[_] -> 37
+ 17:[a-z] -> 37
+ Use state 34 as basis (1 fixups)
+
+DFA state 38
+ Forward route : (from state 35)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 40
+
+DFA state 39
+ Forward route : (from state 36)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 41
+
+DFA state 40
+ Forward route : (from state 38)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 42
+
+DFA state 41
+ Forward route : (from state 39)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE)
+ Transitions :
+ 17:[a-z] -> 43
+
+DFA state 42
+ Forward route : (from state 40)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 42
+ 8:[0-9] -> 44
+ 13:[A-Z] -> 39
+
+DFA state 43
+ Forward route : (from state 41)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 45
+
+DFA state 44
+ Forward route : (from state 42)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 46
+ 8:[0-9] -> 47
+
+DFA state 45
+ Forward route : (from state 43)
+ (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 45
+ 8:[0-9] -> 44
+
+DFA state 46
+ Forward route : (from state 44)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 46
+ 8:[0-9] -> 48
+
+DFA state 47
+ Forward route : (from state 44)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 46
+
+DFA state 48
+ Forward route : (from state 46)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 49
+
+DFA state 49
+ Forward route : (from state 48)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 9:[:] -> 50
+
+DFA state 50
+ Forward route : (from state 49)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->(HERE)
+ Transitions :
+ 8:[0-9] -> 51
+
+DFA state 51
+ Forward route : (from state 50)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 52
+
+DFA state 52
+ Forward route : (from state 51)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 53
+ 9:[:] -> 54
+
+DFA state 53
+ Forward route : (from state 52)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 53
+ 5:[+] -> 55
+ 6:[\055] -> 55
+ 8:[0-9] -> 56
+ 13:[A-Z] -> 57
+
+DFA state 54
+ Forward route : (from state 52)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->(HERE)
+ Transitions :
+ 8:[0-9] -> 58
+
+DFA state 55
+ Forward route : (from state 53)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->5:[+]->(HERE)
+ Transitions :
+ 8:[0-9] -> 57
+ 13:[A-Z] -> 57
+ 17:[a-z] -> 57
+
+DFA state 56
+ Forward route : (from state 53)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 59
+
+DFA state 57
+ Forward route : (from state 53)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 60
+ 8:[0-9] -> 57
+ 13:[A-Z] -> 57
+ 17:[a-z] -> 57
+ Use state 55 as basis (1 fixups)
+
+DFA state 58
+ Forward route : (from state 54)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 61
+
+DFA state 59
+ Forward route : (from state 56)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 62
+
+DFA state 60
+ Forward route : (from state 57)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 60
+ 5:[+] -> 63
+ 6:[\055] -> 63
+ 8:[0-9] -> 64
+ 13:[A-Z] -> 65
+
+DFA state 61
+ Forward route : (from state 58)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 53
+
+DFA state 62
+ Forward route : (from state 59)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 66
+
+DFA state 63
+ Forward route : (from state 60)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->5:[+]->(HERE)
+ Transitions :
+ 8:[0-9] -> 65
+ 13:[A-Z] -> 65
+ 17:[a-z] -> 65
+
+DFA state 64
+ Forward route : (from state 60)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 67
+
+DFA state 65
+ Forward route : (from state 60)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 68
+ 8:[0-9] -> 65
+ 13:[A-Z] -> 65
+ 17:[a-z] -> 65
+ Use state 63 as basis (1 fixups)
+
+DFA state 66
+ Forward route : (from state 62)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 66
+ 1:[\n] -> 69
+ 2:[\r] -> 70
+ 5:[+] -> 71
+ 6:[\055] -> 71
+ 13:[A-Z] -> 72
+
+DFA state 67
+ Forward route : (from state 64)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 73
+
+DFA state 68
+ Forward route : (from state 65)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->13:[A-Z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 68
+ 8:[0-9] -> 64
+
+DFA state 69
+ Forward route : (from state 66)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->1:[\n]->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ FROMCHECK_PASS
+ Attributes for <(DEFAULT)> : FROMCHECK_PASS
+
+DFA state 70
+ Forward route : (from state 66)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->2:[\r]->(HERE)
+ Transitions :
+ 1:[\n] -> 69
+
+DFA state 71
+ Forward route : (from state 66)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->5:[+]->(HERE)
+ Transitions :
+ 8:[0-9] -> 72
+ 13:[A-Z] -> 72
+ 17:[a-z] -> 72
+
+DFA state 72
+ Forward route : (from state 66)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 74
+ 1:[\n] -> 69
+ 2:[\r] -> 70
+ 8:[0-9] -> 72
+ 13:[A-Z] -> 72
+ 17:[a-z] -> 72
+ Use state 71 as basis (3 fixups)
+
+DFA state 73
+ Forward route : (from state 67)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 8:[0-9] -> 75
+
+DFA state 74
+ Forward route : (from state 72)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 74
+ 1:[\n] -> 69
+ 2:[\r] -> 70
+ 5:[+] -> 76
+ 6:[\055] -> 76
+ 13:[A-Z] -> 77
+
+DFA state 75
+ Forward route : (from state 73)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 75
+ 1:[\n] -> 69
+ 2:[\r] -> 70
+
+DFA state 76
+ Forward route : (from state 74)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->5:[+]->(HERE)
+ Transitions :
+ 8:[0-9] -> 77
+ 13:[A-Z] -> 77
+ 17:[a-z] -> 77
+
+DFA state 77
+ Forward route : (from state 74)
+ (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->13:[A-Z]->(HERE)
+ Transitions :
+ 0:[\t ] -> 75
+ 1:[\n] -> 69
+ 2:[\r] -> 70
+ 8:[0-9] -> 77
+ 13:[A-Z] -> 77
+ 17:[a-z] -> 77
+ Use state 75 as basis (3 fixups)
+
+
+Entry states in DFA:
+Entry <(ONLY ENTRY)> : 0
diff --git a/src/mairix/glob.c b/src/mairix/glob.c
@@ -0,0 +1,393 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2003,2004,2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <ctype.h>
+#include "mairix.h"
+
+
+struct globber {
+ unsigned int pat[256];
+ unsigned int starpat;
+ unsigned int twostarpat;
+ unsigned int hit;
+};
+
+struct globber_array {
+ int n;
+ struct globber **globs;
+};
+
+static const char *parse_charclass(const char *in, struct globber *result, unsigned int mask)/*{{{*/
+{
+ int first = 1;
+ int prev = -1;
+ in++; /* Advance over '[' */
+ while (*in) {
+ if (*in == ']') {
+ if (first) {
+ result->pat[(int)']'] |= mask;
+ } else {
+ return in;
+ }
+ } else if (*in == '-') {
+ /* Maybe range */
+ if ((prev < 0) || !in[1] || (in[1]==']')) {
+ /* - at either end of string (or right after an earlier range) means
+ * normal - */
+ result->pat['-'] |= mask;
+ } else {
+ int next = in[1];
+ int hi, lo;
+ int i;
+ /* Cope with range being inverted */
+ if (prev < next) {
+ lo = prev, hi = next;
+ } else {
+ lo = next, hi = prev;
+ }
+ for (i=lo; i<=hi; i++) {
+ int index = 0xff & i;
+ result->pat[index] |= mask;
+ }
+ /* require 1 extra increment */
+ in++;
+ prev = -1; /* Avoid junk like [a-e-z] */
+ }
+ } else {
+ int index = 0xff & (int)*in;
+ result->pat[index] |= mask;
+ }
+ prev = *in;
+ first = 0;
+ in++;
+ }
+ return in;
+}
+/*}}}*/
+
+struct globber *make_globber(const char *wildstring)/*{{{*/
+{
+ struct globber *result;
+ int n, i;
+ const char *p;
+ char c;
+ int index;
+ unsigned int mask;
+
+ result = new(struct globber);
+ memset(&result->pat, 0x00, 256*sizeof(unsigned int));
+ memset(&result->starpat, 0x00, sizeof(unsigned int));
+ memset(&result->twostarpat, 0x00, sizeof(unsigned int));
+ mask = 0x1;
+
+ n = 0;
+ for (p=wildstring; *p; p++) {
+ mask = 1<<n;
+ c = *p;
+ switch (c) {
+ case '*':/*{{{*/
+ if (p[1] == '*') {
+ result->twostarpat |= mask;
+ p++;
+ } else {
+ /* Match zero or more of anything */
+ result->starpat |= mask;
+ }
+ break;
+/*}}}*/
+ case '[':/*{{{*/
+ p = parse_charclass(p, result, mask);
+ n++;
+ break;
+/*}}}*/
+ case '?':/*{{{*/
+ for (i=0; i<256; i++) {
+ result->pat[i] |= mask;
+ }
+ n++;
+ break;
+/*}}}*/
+ default:/*{{{*/
+ index = 0xff & (int)c;
+ result->pat[index] |= mask;
+ n++;
+ break;
+/*}}}*/
+ }
+ }
+
+ result->hit = (1<<n);
+ return result;
+
+}
+/*}}}*/
+void free_globber(struct globber *old)/*{{{*/
+{
+ free(old);
+}
+/*}}}*/
+
+#define DODEBUG 0
+
+int is_glob_match(struct globber *g, const char *s)/*{{{*/
+{
+ unsigned int reg;
+ unsigned int stars;
+ unsigned int twostars;
+ unsigned int stars2;
+ int index;
+
+ reg = 0x1;
+ while (*s) {
+ index = 0xff & (int) *s;
+#if DODEBUG
+ printf("*s=%c index=%02x old_reg=%08lx pat=%08lx //",
+ *s, index, reg, g->pat[index]);
+#endif
+ stars = (reg & g->starpat);
+ twostars = (reg & g->twostarpat);
+ if (index != '/') {
+ stars2 = stars | twostars;
+ } else {
+ stars2 = twostars;
+ }
+ reg &= g->pat[index];
+ reg <<= 1;
+ reg |= stars2;
+#if DODEBUG
+ printf(" new_reg=%08lx ", reg);
+ printf("starpat=%08lx stars=%08lx stars2=%08lx\n", g->starpat, stars, stars2);
+#endif
+ s++;
+ }
+
+#if DODEBUG
+ printf("reg=%08lx hit=%08lx\n", reg, g->hit);
+#endif
+ reg &= g->hit;
+ if (reg) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+/*}}}*/
+
+struct globber_array *colon_sep_string_to_globber_array(const char *in)/*{{{*/
+{
+ char **strings;
+ int n_strings;
+ int i;
+ struct globber_array *result;
+
+ split_on_colons(in, &n_strings, &strings);
+ result = new(struct globber_array);
+ result->n = n_strings;
+ result->globs = new_array(struct globber *, n_strings);
+ for (i=0; i<n_strings; i++) {
+ result->globs[i] = make_globber(strings[i]);
+ free(strings[i]);
+ }
+ free(strings);
+ return result;
+}
+/*}}}*/
+int is_globber_array_match(struct globber_array *ga, const char *s)/*{{{*/
+{
+ int i;
+ if (!ga) return 0;
+ for (i=0; i<ga->n; i++) {
+ if (is_glob_match(ga->globs[i], s)) return 1;
+ }
+ return 0;
+}
+/*}}}*/
+void free_globber_array(struct globber_array *in)/*{{{*/
+{
+ int i;
+ for (i=0; i<in->n; i++) {
+ free_globber(in->globs[i]);
+ }
+ free(in);
+}
+/*}}}*/
+
+static char *copy_folder_name(const char *start, const char *end)/*{{{*/
+{
+ /* 'start' points to start of string to copy.
+ Any '\:' sequence is replaced by ':' .
+ Otherwise \ is treated normally.
+ 'end' can be 1 beyond the end of the string to copy. Otherwise it can be
+ null, meaning treat 'start' as the start of a normal null-terminated
+ string. */
+ char *p;
+ const char *q;
+ int len;
+ char *result;
+ if (end) {
+ len = end - start;
+ } else {
+ len = strlen(start);
+ }
+ result = new_array(char, len + 1);
+ for (p=result, q=start;
+ end ? (q < end) : *q;
+ q++) {
+ if ((q[0] == '\\') && (q[1] == ':')) {
+ /* Escaped colon : drop the backslash */
+ } else {
+ *p++ = *q;
+ }
+ }
+ *p = '\0';
+ return result;
+}
+/*}}}*/
+void string_list_to_array(struct string_list *list, int *n, char ***arr)/*{{{*/
+{
+ int N, i;
+ struct string_list *a, *next_a;
+ char **result;
+ for (N=0, a=list->next; a!=list; a=a->next, N++) ;
+
+ result = new_array(char *, N);
+ for (i=0, a=list->next; i<N; a=next_a, i++) {
+ result[i] = a->data;
+ next_a = a->next;
+ free(a);
+ }
+
+ *n = N;
+ *arr = result;
+}
+/*}}}*/
+void split_on_colons(const char *str, int *n, char ***arr)/*{{{*/
+{
+ struct string_list list, *new_cell;
+ const char *left_to_do;
+
+ list.next = list.prev = &list;
+ left_to_do = str;
+ do {
+ char *colon;
+ char *xx;
+
+ colon = strchr(left_to_do, ':');
+ /* Allow backslash-escaped colons in filenames */
+ if (colon && (colon > left_to_do) && (colon[-1]=='\\')) {
+ int is_escaped;
+ do {
+ colon = strchr(colon + 1, ':');
+ is_escaped = (colon && (colon[-1] == '\\'));
+ } while (colon && is_escaped);
+ }
+ /* 'colon' now points to the first non-escaped colon or is null if there
+ were no more such colons in the rest of the line. */
+
+ xx = copy_folder_name(left_to_do, colon);
+ if (colon) {
+ left_to_do = colon + 1;
+ } else {
+ while (*left_to_do) ++left_to_do;
+ }
+
+ new_cell = new(struct string_list);
+ new_cell->data = xx;
+ new_cell->next = &list;
+ new_cell->prev = list.prev;
+ list.prev->next = new_cell;
+ list.prev = new_cell;
+ } while (*left_to_do);
+
+ string_list_to_array(&list, n, arr);
+
+}
+/*}}}*/
+
+#if defined (TEST)
+void run1(char *ref, char *s, int expected)/*{{{*/
+{
+ struct globber *g;
+ int result;
+ g = make_globber(ref);
+ result = is_glob_match(g, s);
+
+ printf("ref=%s, str=%s, %s %s\n", ref, s, result ? "MATCHED" : "not matched", (expected==result) ? "" : "??????");
+ free_globber(g);
+}
+/*}}}*/
+int main (int argc, char **argv)/*{{{*/
+{
+
+ run1("ab?de", "abdde", 1);
+ run1("ab?de", "abcde", 1);
+ run1("ab?de", "Abcde", 0);
+ run1("ab?de", "abcd", 0);
+ run1("ab?de", "abc", 0);
+ run1("ab[cd]de", "abdde", 1);
+ run1("ab[cd]de", "abbde", 0);
+ run1("ab[cd]de", "abcde", 1);
+ run1("ab*de", "ade", 0);
+ run1("ab*de", "abde", 1);
+ run1("ab*de", "abcde", 1);
+ run1("ab*de", "abccde", 1);
+ run1("ab*de", "abccdfde", 1);
+ run1("ab*de", "abccdedf", 0);
+ run1("ab[b-d]de", "abade",0);
+ run1("ab[b-d]de", "abcDe",0);
+ run1("ab[b-d]de", "abcde",1);
+ run1("ab[b-d]de", "abdde",1);
+ run1("ab[b-d]de", "abEde", 0);
+ run1("[a-z][0-9A-F][]a-f-]", "yE]", 1);
+ run1("[a-z][0-9A-F][]a-f-]", "uE[", 0);
+ run1("[a-z][0-9A-F][]a-f-]", "vG-", 0);
+ run1("[a-z][0-9A-F][]a-f-]", "w8-", 1);
+ run1("*", "a", 1);
+ run1("*", "", 1);
+ run1("a*", "a", 1);
+ run1("a*", "aa", 1);
+ run1("a*", "aaA", 1);
+ run1("*a", "aaa", 1);
+ run1("*a", "a", 1);
+ run1("x*abc", "xabdxabc", 1);
+ run1("*", "", 1);
+ run1("a*", "", 0);
+ run1("*a", "", 0);
+ run1("a", "", 0);
+
+ run1("*abc*", "x/abc/y", 0);
+ run1("**abc**", "x/abc/y", 1);
+ run1("x/*/abc**", "x/z/abc/y", 1);
+ run1("x/*/abc**", "x/z/w/abc/y", 0);
+ run1("x/*/abc**", "x/zz/w/abc/y", 0);
+ run1("x/*/abc**", "x/z/ww/abc/y", 0);
+ run1("x/**/abc**", "x/z/w/abc/y", 1);
+ run1("x/**/abc**", "x/zz/w/abc/y", 1);
+
+ return 0;
+}
+/*}}}*/
+#endif
+
diff --git a/src/mairix/hash.c b/src/mairix/hash.c
@@ -0,0 +1,143 @@
+/* Hash function */
+
+#include "mairix.h"
+
+/*
+--------------------------------------------------------------------
+lookup2.c, by Bob Jenkins, December 1996, Public Domain.
+hash(), hash2(), hash3, and mix() are externally useful functions.
+Routines to test the hash are included if SELF_TEST is defined.
+You can use this free for any purpose. It has no warranty.
+--------------------------------------------------------------------
+*/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#define hashsize(n) ((unsigned int)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+
+/*
+--------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+For every delta with one or two bit set, and the deltas of all three
+ high bits or all three low bits, whether the original value of a,b,c
+ is almost all zero or is uniformly distributed,
+* If mix() is run forward or backward, at least 32 bits in a,b,c
+ have at least 1/4 probability of changing.
+* If mix() is run forward, every bit of c will change between 1/3 and
+ 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
+mix() was built out of 36 single-cycle latency instructions in a
+ structure that could supported 2x parallelism, like so:
+ a -= b;
+ a -= c; x = (c>>13);
+ b -= c; a ^= x;
+ b -= a; x = (a<<8);
+ c -= a; b ^= x;
+ c -= b; x = (b>>13);
+ ...
+ Unfortunately, superscalar Pentiums and Sparcs can't take advantage
+ of that parallelism. They've also turned some of those single-cycle
+ latency instructions into multi-cycle latency instructions. Still,
+ this is the fastest good hash I could find. There were about 2^^68
+ to choose from. I only looked at a billion or so.
+--------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<<8); \
+ c -= a; c -= b; c ^= (b>>13); \
+ a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); \
+ c -= a; c -= b; c ^= (b>>5); \
+ a -= b; a -= c; a ^= (c>>3); \
+ b -= c; b -= a; b ^= (a<<10); \
+ c -= a; c -= b; c ^= (b>>15); \
+}
+
+/* same, but slower, works on systems that might have 8 byte ub4's */
+#define mix2(a,b,c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<< 8); \
+ c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
+ a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
+ b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
+ c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
+ a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
+ b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
+ c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
+}
+
+/*
+--------------------------------------------------------------------
+hash() -- hash a variable-length key into a 32-bit value
+ k : the key (the unaligned variable-length array of bytes)
+ len : the length of the key, counting by bytes
+ level : can be any 4-byte value
+Returns a 32-bit value. Every bit of the key affects every bit of
+the return value. Every 1-bit and 2-bit delta achieves avalanche.
+About 36+6len instructions.
+
+The best hash table sizes are powers of 2. There is no need to do
+mod a prime (mod is sooo slow!). If you need less than 32 bits,
+use a bitmask. For example, if you need only 10 bits, do
+ h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+ for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
+
+By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial. It's free.
+
+See http://burlteburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^32 is
+acceptable. Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------
+*/
+
+unsigned int hashfn( unsigned char *k, unsigned int length, unsigned int initval)
+{
+ register unsigned int a,b,c,len;
+
+ /* Set up the internal state */
+ len = length;
+ a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
+ c = initval; /* the previous hash value */
+
+ /*---------------------------------------- handle most of the key */
+ while (len >= 12)
+ {
+ a += (k[0] +((unsigned int)k[1]<<8) +((unsigned int)k[2]<<16) +((unsigned int)k[3]<<24));
+ b += (k[4] +((unsigned int)k[5]<<8) +((unsigned int)k[6]<<16) +((unsigned int)k[7]<<24));
+ c += (k[8] +((unsigned int)k[9]<<8) +((unsigned int)k[10]<<16)+((unsigned int)k[11]<<24));
+ mix(a,b,c);
+ k += 12; len -= 12;
+ }
+
+ /*------------------------------------- handle the last 11 bytes */
+ c += length;
+ switch(len) /* all the case statements fall through */
+ {
+ case 11: c+=((unsigned int)k[10]<<24);
+ case 10: c+=((unsigned int)k[9]<<16);
+ case 9 : c+=((unsigned int)k[8]<<8);
+ /* the first byte of c is reserved for the length */
+ case 8 : b+=((unsigned int)k[7]<<24);
+ case 7 : b+=((unsigned int)k[6]<<16);
+ case 6 : b+=((unsigned int)k[5]<<8);
+ case 5 : b+=k[4];
+ case 4 : a+=((unsigned int)k[3]<<24);
+ case 3 : a+=((unsigned int)k[2]<<16);
+ case 2 : a+=((unsigned int)k[1]<<8);
+ case 1 : a+=k[0];
+ /* case 0: nothing left to add */
+ }
+ mix(a,b,c);
+ /*-------------------------------------------- report the result */
+ return c;
+}
+
+
diff --git a/src/mairix/mairix.1 b/src/mairix/mairix.1
@@ -0,0 +1,673 @@
+.TH MAIRIX 1 "January 2006"
+.de Sx
+.PP
+.ne \\$1
+.nf
+.na
+.RS 7
+..
+.de Ex
+.RE
+.fi
+.ad
+.PP
+..
+.de Sy
+.PP
+.ne \\$1
+.nf
+.na
+.RS 12
+..
+.de Ey
+.RE
+.fi
+.ad
+.IP "" 7
+..
+.SH NAME
+mairix \- index and search mail folders
+.SH SYNOPSIS
+.SS Indexing
+.B mairix
+[
+.BR \-v | \-\-verbose
+] [
+.BR \-p | \-\-purge
+] [
+.BR \-f | \-\-rcfile
+.I mairixrc
+] [
+.BR \-F | \-\-fast-index
+] [
+.BR \-\-force-hash-key-new-database
+.I hash
+]
+
+.SS Searching
+.B mairix
+[
+.BR \-v | \-\-verbose
+] [
+.BR \-f | \-\-rcfile
+.I mairixrc
+] [
+.BR \-r | \-\-raw-output
+] [
+.BR \-x | \-\-excerpt-output
+] [
+.BR \-H | \-\-force-hardlinks
+] [
+.BR \-o | \-\-mfolder
+.I mfolder
+] [
+.BR \-a | \-\-augment
+] [
+.BR \-t | \-\-threads
+]
+.I search-patterns
+
+.SS Other
+.B mairix
+[
+.BR \-h | \-\-help
+]
+
+.B mairix
+[
+.BR \-V | \-\-version
+]
+
+.B mairix
+[
+.BR \-d | \-\-dump
+]
+
+.SH DESCRIPTION
+.I mairix
+indexes and searches a collection of email messages. The folders containing
+the messages for indexing are defined in the configuration file. The indexing
+stage produces a database file. The database file provides rapid access to
+details of the indexed messages during searching operations. A search normally
+produces a folder (so-called
+.BR mfolder )
+containing the matched messages. However, a raw mode
+.RB ( \-r )
+exists which just lists the matched messages instead.
+.PP
+It can operate with the following folder types
+.IP *
+maildir
+.IP *
+MH (compatible with the MH folder formats used by xmh, sylpheed, claws-mail, nnml (Gnus) and evolution)
+.IP *
+mbox (including mboxes that have been compressed with gzip or bzip2)
+.PP
+If maildir or MH source folders are used, and a search outputs its matches to
+an mfolder in maildir or MH format, symbolic links are used to reference the
+original messages inside the mfolder. However, if mbox folders are involved,
+copies of messages are made instead.
+
+.SH OPTIONS
+
+.B mairix
+decides whether indexing or searching is required by looking for the presence of any
+.I search-patterns
+on the command line.
+
+.SS Special modes
+.TP
+.B -h, --help
+.br
+Show usage summary and exit
+
+.TP
+.B -V, --version
+Show program version and exit
+
+.TP
+.B -d
+.br
+Dump the database's contents in human-readable form to stdout.
+
+.SS General options
+.TP
+.BI "-f " mairixrc
+.br
+.ns
+.TP
+.BI "--rcfile " mairixrc
+.br
+Specify an alternative configuration file to use. The default configuration file is
+.IR ~/.mairixrc .
+
+.TP
+.B -v, --verbose
+.br
+Make the output more verbose
+
+.TP
+.B -Q, --no-integrity-checks
+.br
+Normally
+.I mairix
+will do some internal integrity tests on the database. The
+.B -Q
+option removes these checks, making
+.I mairix
+run faster, but it will be less likely to detect internal problems if any bugs creep in.
+
+The
+.I nochecks
+directive in the rc file has the same effect.
+
+.TP
+.B \-\-unlock
+.br
+.I mairix
+locks its database file during any indexing or searching operation to prevent
+multiple indexing runs interfering with each other, or an indexing run
+interfering with search runs. The
+.B --unlock
+option removes the lockfile before doing the requested indexing or searching
+operation. This is a convenient way of cleaning up a stale lockfile if an
+earlier run crashed for some reason or was aborted.
+
+.SS Indexing options
+
+.TP
+.B -p, --purge
+.br
+Cause stale (dead) messages to be purged from the database during an indexing
+run. (Normally, stale messages are left in the database because of the
+additional cost of compacting away the storage that they take up.)
+
+.TP
+.B -F, --fast-index
+.br
+When processing maildir and MH folders,
+.I mairix
+normally compares the mtime and size of each message against the values stored
+in the database. If they have changed, the message will be rescanned. This
+check requires each message file to be stat'ed. For large numbers of messages
+in these folder types, this can be a sizeable overhead.
+
+This option tells
+.I mairix
+to assume that when a message currently on-disc has a name matching one already
+in the database, it should assume the message is unchanged.
+
+A later indexing run without using this option will fix up any rescans that
+were missed due to its use.
+
+.TP
+.BI "--force-hash-key-new-database " hash
+.br
+This option should only be used for debugging.
+.br
+If a new database is created,
+.I hash
+is used as hash key, instead of a random hash.
+
+.SS Search options
+.TP
+.B -a, --augment
+.br
+Append newly matches messages to the current mfolder instead of creating the
+mfolder from scratch.
+
+.TP
+.B -t, --threads
+.br
+As well as returning the matched messages, also return every message in the
+same thread as one of the real matches.
+
+.TP
+.B -r, --raw-output
+.br
+Instead of creating an mfolder containing the matched messages, just show their
+paths on stdout.
+
+.TP
+.B -x, --excerpt-output
+.br
+Instead of creating an mfolder containing the matched messages, display an
+excerpt from their headers on stdout. The excerpt shows To, Cc, From, Subject
+and Date.
+
+.TP
+.B -H, --force-hardlinks
+.br
+Instead of creating symbolic links, force the use of hardlinks. This helps
+mailers such as alpine to realize that there are new mails in the search
+folder.
+
+.TP
+.BI "-o " mfolder
+.br
+.ns
+.TP
+.BI "--mfolder " mfolder
+.br
+Specify a temporary alternative path for the mfolder to use, overriding the
+.I mfolder
+directive in the rc file.
+
+.B mairix
+will refuse to output search results into any folder that appears to be amongst
+those that are indexed. This is to prevent accidental deletion of emails.
+
+.SS Search patterns
+.TP
+.BI t: word
+.br
+Match
+.I word
+in the To: header.
+
+.TP
+.BI c: word
+.br
+Match
+.I word
+in the Cc: header.
+
+.TP
+.BI f: word
+.br
+Match
+.I word
+in the From: header.
+
+.TP
+.BI s: word
+.br
+Match
+.I word
+in the Subject: header.
+
+.TP
+.BI m: word
+.br
+Match
+.I word
+in the Message-ID: header.
+
+.TP
+.BI b: word
+.br
+Match
+.I word
+in the message body.
+
+.B Message body
+is taken to mean any body part of type text/plain or text/html. For text/html,
+text within meta tags is ignored. In particular, the URLs inside <A
+HREF="..."> tags are not currently indexed. Non-text attachments are ignored.
+If there's an attachment of type message/rfc822, this is parsed and the match
+is performed on this sub-message too. If a hit occurs, the enclosing message
+is treated as having a hit.
+
+.TP
+.BI d: "[start-datespec]" - "[end-datespec]"
+.br
+Match messages with Date: headers lying in the specific range.
+
+.TP
+.BI z: "[low-size]" - "[high-size]"
+.br
+Match messages whose size lies in the specified range. If the
+.I low-size
+argument is omitted it defaults to zero. If the
+.I high-size
+argument is omitted it defaults to infinite size.
+
+For example, to match messages between 10kilobytes and 20kilobytes in size, the
+following search term can be used:
+.Sy 1
+mairix z:10k-20k
+.Ey
+
+The suffix 'k' on a number means multiply by 1024, and the suffix 'M' on a
+number means multiply by 1024*1024.
+
+.TP
+.BI n: word
+.br
+Match
+.I word
+occurring as the name of an attachment in the message. Since attachment names
+are usually long, this option would usually be used in the substring form. So
+.Sy 1
+mairix n:mairix=
+.Ey
+
+would match all messages which have attachments whose names contain the
+substring
+.IR mairix .
+
+The attachment name is determined from the name=xxx or filename=xxx qualifiers
+on the Content-Type: and Content-Disposition: headers respectively.
+
+.TP
+.BI F: flags
+.br
+Match messages with particular flag settings. The available flags are 's'
+meaning seen, 'r' meaning replied, and 'f' meaning flagged. The flags are
+case-insensitive. A flag letter may be prefixed by a '-' to negate its sense. Thus
+
+.Sy 1
+mairix F:-s d:1w-
+.Ey
+
+would match any unread message less than a week old, and
+
+.Sy 1
+mairix F:f-r d:-1m
+.Ey
+
+would match any flagged message older than a month which you haven't replied to yet.
+
+Note that the flag characters and their meanings agree with those used as the
+suffix letters on message filenames in maildir folders.
+
+.SS Searching for a match amongst more than one part of a message
+.PP
+Multiple body parts may be grouped together, if a match in any of them is
+sought. Common examples follow.
+
+.TP
+.BI tc: word
+.br
+Match
+.I word
+in either the To: or Cc: headers (or both).
+
+.TP
+.BI bs: word
+.br
+Match
+.I word
+in either the Subject: header or the message body (or both).
+
+.PP
+The
+.B a:
+search pattern is an abbreviation for
+.BR tcf: ;
+i.e. match the word in the To:, Cc: or From: headers. ("a" stands for
+"address" in this case.)
+
+.SS Match words
+The
+.I word
+argument to the search strings can take various forms.
+
+.TP
+.I ~word
+.br
+Match messages
+.B not
+containing the word.
+
+.TP
+.I word1,word2
+.br
+This matches if both the words are matched in the specified message part.
+
+.TP
+.I word1/word2
+.br
+This matches if either of the words are matched in the specified message part.
+
+.TP
+.I substring=
+.br
+Match any word containing
+.I substring
+as a substring
+
+.TP
+.I substring=N
+.br
+Match any word containing
+.IR substring ,
+allowing up to
+.I N
+errors in the match. For example, if
+.I N
+is 1, a single error is allowed, where an error can be
+.IP *
+a missing letter
+.IP *
+an extra letter
+.IP *
+a different letter.
+
+.TP
+.I ^substring=
+.br
+Match any word containing
+.I substring
+as a substring, with the requirement that
+.I substring
+occurs at the beginning of the matched word.
+
+.SS Precedence matters
+
+The binding order of the constructions is:
+
+.IP "1."
+Individual command line arguments define separate conditions which are AND-ed
+together
+
+.IP "2."
+Within a single argument, the letters before the colon define which message
+parts the expression applies to. If there is no colon, the expression applies
+to all the headers listed earlier and the body.
+
+.IP "3."
+After the colon, commas delineate separate disjuncts, which are
+OR-ed together.
+
+.IP "4."
+Each disjunct may contain separate conjuncts, which are separated
+by plus signs. These conditions are AND-ed together.
+
+.IP "5."
+Each conjunct may start with a tilde to negate it, and may be
+followed by a slash to indicate a substring match, optionally
+followed by an integer to define the maximum number of errors
+allowed.
+
+.SS Date specification
+.PP
+This section describes the syntax used for specifying dates when
+searching using the `d:' option.
+
+Dates are specified as a range. The start and end of the range can both be
+specified. Alternatively, if the start is omitted, it is treated as being the
+beginning of time. If the end is omitted, it is treated as the current time.
+
+There are 4 basic formats:
+.TP
+.BI d: start-end
+.br
+Specify both start and end explicitly
+.TP
+.BI d: start-
+Specify start, end is the current time
+.TP
+.BI d: -end
+Specify end, start is 'a long time ago' (i.e. early enough to include any
+message).
+.TP
+.BI d: period
+Specify start and end implicitly, as the start and end of the
+period given.
+
+.PP
+The start and end can be specified either absolute or relative. A relative
+endpoint is given as a number followed by a single letter defining the scaling:
+
+.TS
+box tab(&);
+lb | lb | lb | lb.
+letter & short for & example & meaning
+=
+.T&
+l | l | l | l.
+d & days & 3d & 3 days
+w & weeks & 2w & 2 weeks (14 days)
+m & months & 5m & 5 months (150 days)
+y & years & 4y & 4 years (4*365 days)
+.TE
+
+.PP
+Months are always treated as 30 days, and years as 365 days, for
+this purpose.
+
+Absolute times can be specified in many forms. Some forms have different
+meanings when they define a start date from that when they define an end date.
+Where a single expression specifies both the start and end (i.e. where the
+argument to d: doesn't contain a `-'), it will usually have different
+interpretations in the two cases.
+
+In the examples below, suppose the current date is Sunday May 18th,
+2003 (when I started to write this material.)
+
+.TS
+box tab(&);
+l | l | l | l.
+Example & Start date & End date & Notes
+=
+d:20030301\-20030425 & March 1st, 2003 & 25th April, 2003
+d:030301\-030425 & March 1st, 2003 & April 25th, 2003 & century assumed
+d:mar1\-apr25 & March 1st, 2003 & April 25th, 2003
+d:Mar1\-Apr25 & March 1st, 2003 & April 25th, 2003 & case insensitive
+d:MAR1\-APR25 & March 1st, 2003 & April 25th, 2003 & case insensitive
+d:1mar\-25apr & March 1st, 2003 & April 25th, 2003 & date and month in either order
+d:2002 & January 1st, 2002 & December 31st, 2002 & whole year
+d:mar & March 1st, 2003 & March 31st, 2003 & most recent March
+d:oct & October 1st, 2002 & October 31st, 2002 & most recent October
+d:21oct\-mar & October 21st, 2002 & March 31st, 2003 & start before end
+d:21apr\-mar & April 21st, 2002 & March 31st, 2003 & start before end
+d:21apr\- & April 21st, 2003 & May 18th, 2003 & end omitted
+d:\-21apr & January 1st, 1900 & April 21st, 2003 & start omitted
+d:6w\-2w & April 6th, 2003 & May 4th, 2003 & both dates relative
+d:21apr\-1w & April 21st, 2003 & May 11th, 2003 & one date relative
+d:21apr\-2y & April 21st, 2001 & May 11th, 2001 & start before end
+d:99\-11 & January 1st, 1999 & May 11th, 2003 &T{
+2 digits are a day of the month if possible, otherwise a year
+T}
+d:99oct\-1oct & October 1st, 1999 & October 1st, 2002 &T{
+end before now, single digit is a day of the month
+T}
+d:99oct\-01oct & October 1st, 1999 & October 31st, 2001 &T{
+2 digits starting with zero treated as a year
+T}
+d:oct99\-oct1 & October 1st, 1999 & October 1st, 2002 &T{
+day and month in either order
+T}
+d:oct99\-oct01 & October 1st, 1999 & October 31st, 2001 &T{
+year and month in either order
+T}
+.TE
+
+.PP
+The principles in the table work as follows.
+.IP \(bu
+When the expression defines a period of more than a day (i.e. if a month or
+year is specified), the earliest day in the period is taken when the start date
+is defined, and the last day in the period if the end of the range is being
+defined.
+.IP \(bu
+The end date is always taken to be on or before the current date.
+.IP \(bu
+The start date is always taken to be on or before the end date.
+
+.SH "SETTING UP THE MATCH FOLDER"
+
+If the match folder does not exist when running in search mode, it is
+automatically created. For 'mformat=maildir' (the default), this
+should be all you need to do. If you use 'mformat=mh', you may have to
+run some commands before your mailer will recognize the folder. e.g.
+for mutt, you could do
+.Sx 2
+mkdir -p /home/richard/Mail/mfolder
+touch /home/richard/Mail/mfolder/.mh_sequences
+.Ex
+which seems to work. Alternatively, within mutt, you could set MBOX_TYPE to
+'mh' and save a message to '+mfolder' to have mutt set up the structure for you
+in advance.
+
+If you use Sylpheed, the best way seems to be to create the new folder from
+within Sylpheed before letting mairix write into it.
+
+.SH EXAMPLES
+.PP
+Suppose my email address is <richard@doesnt.exist>.
+
+Either of the following will match all messages newer than 3 months from me
+with the word 'chrony' in the subject line:
+.Sx 2
+mairix d:3m- f:richard+doesnt+exist s:chrony
+mairix d:3m- f:richard@doesnt.exist s:chrony
+.Ex
+Suppose I don't mind a few spurious matches on the address, I want a wider date
+range, and I suspect that some messages I replied to might have had the subject
+keyword spelt wrongly (let's allow up to 2 errors):
+.Sx 1
+mairix d:6m- f:richard s:chrony=2
+.Ex
+
+.SH NOTES
+.PP
+.B mairix
+works exclusively in terms of
+.IR words .
+The index that's built
+in indexing mode contains a table of which words occur in which
+messages. Hence, the search capability is based on finding messages
+that contain particular words.
+.B mairix
+defines a word as any string of alphanumeric characters + underscore. Any
+whitespace, punctuation, hyphens etc are treated as word boundaries.
+
+.B mairix
+has special handling for the To:, Cc: and From: headers.
+Besides the normal word scan, these headers are scanned a second time,
+where the characters '@', '-' and '.' are also treated as word
+characters. This allows most (if not all) email addresses to appear in
+the database as single words. So if you have a mail from
+wibble@foobar.zzz, it will match on both these searches
+
+.Sx 2
+mairix f:foobar
+mairix f:wibble@foobar.zzz
+.Ex
+It should be clear by now that the searching cannot be used to find messages
+matching general regular expressions. This has never been much of a
+limitation. Most searches are for particular keywords that were in the
+messages, or details of the recipients, or the approximate date.
+
+It's also worth pointing out that there is no 'locality' information
+stored, so you can't search for messages that have one words 'close' to
+some other word. For every message and every word, there is a simple
+yes/no condition stored - whether the message contains the word in a
+particular header or in the body. So far this has proved to be
+adequate.
+.B mairix
+has a similar feel to using an Internet search engine.
+
+.SH FILES
+.I ~/.mairixrc
+
+.SH AUTHOR
+Copyright (C) 2002-2006 Richard P. Curnow <rc@rc0.org.uk>
+.SH "SEE ALSO"
+mairixrc(5)
+.SH BUGS
+.PP
+We need a plugin scheme to allow more types of attachment to be scanned and indexed.
+
diff --git a/src/mairix/mairix.32 b/src/mairix/mairix.32
Binary files differ.
diff --git a/src/mairix/mairix.64 b/src/mairix/mairix.64
Binary files differ.
diff --git a/src/mairix/mairix.c b/src/mairix/mairix.c
@@ -0,0 +1,774 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006,2007,2008
+ * Copyright (C) Sanjoy Mahajan 2005
+ * - mfolder validation code
+ * Copyright (C) James Cameron 2005
+ * Copyright (C) Paul Fox 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "mairix.h"
+#include "version.h"
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pwd.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <locale.h>
+#include <signal.h>
+
+#ifdef TEST_OOM
+int total_bytes=0;
+#endif
+
+int verbose = 0;
+int do_hardlinks = 0;
+
+static char *folder_base = NULL;
+static char *maildir_folders = NULL;
+static char *mh_folders = NULL;
+static char *mboxen = NULL;
+static char *mfolder = NULL;
+static char *omit = NULL;
+static char *database_path = NULL;
+static enum folder_type output_folder_type = FT_MAILDIR;
+static int skip_integrity_checks = 0;
+
+enum filetype {
+ M_NONE, M_FILE, M_DIR, M_OTHER
+};
+
+static enum filetype classify_file(char *name)/*{{{*/
+{
+ struct stat sb;
+ if (stat(name, &sb) < 0) {
+ return M_NONE;
+ }
+ if (S_ISREG(sb.st_mode)) {
+ return M_FILE;
+ } else if (S_ISDIR(sb.st_mode)) {
+ return M_DIR;
+ } else {
+ return M_OTHER;
+ }
+}
+/*}}}*/
+/*{{{ member of*/
+/* returns 1 iff COMPLETE_MFOLDER (i.e. the match folder with
+ folder_base prepended if needed) matches one of the FOLDERS after
+ expanding the wildcards and recursion. Used to make sure that the
+ match folder will not overwrite a valuable mail file or
+ directory. */
+int member_of (const char *complete_mfolder,
+ const char *folder_base,
+ const char *folders,
+ enum folder_type ft,
+ struct globber_array *omit_globs) {
+ char **raw_paths, **paths;
+ int n_raw_paths, n_paths, i;
+
+ if (!folders)
+ return 0;
+ split_on_colons(folders, &n_raw_paths, &raw_paths);
+ switch (ft) {
+ case FT_MAILDIR:
+ glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &maildir_traverse_methods, omit_globs);
+ break;
+ case FT_MH:
+ glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &mh_traverse_methods, omit_globs);
+ break;
+ case FT_MBOX:
+ glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &mbox_traverse_methods, omit_globs);
+ break;
+ case FT_RAW: /* cannot happen but to keep compiler happy */
+ case FT_EXCERPT:
+ break;
+ }
+ for (i=0; i<n_paths; i++) {
+ struct stat mfolder_sb, src_folder_sb; /* for checking inode numbers */
+
+ /* if the complete path names are the same, definitely a match */
+ if (strcmp (complete_mfolder, paths[i]) == 0)
+ return 1;
+ /* also a match if they point to the same file or directory but
+ via different routes (e.g. absolute path for one but path with
+ ../.. for the other), so check inode numbers */
+ /* if cannot even get stat() info, probably not wrecking any mail
+ files or dirs, so continue, i.e. skip inode check. */
+ if (stat (complete_mfolder, &mfolder_sb) != 0 ||
+ stat (paths[i], &src_folder_sb) != 0)
+ continue;
+ if (mfolder_sb.st_ino == src_folder_sb.st_ino)
+ return 1;
+ }
+ return 0;
+}
+/*}}}*/
+static char *copy_value(char *text)/*{{{*/
+{
+ char *p;
+ char *result;
+ for (p = text; *p && (*p != '='); p++) ;
+ if (!*p) return NULL;
+ p++;
+ result = expand_string(p);
+ return result;
+}
+/*}}}*/
+static void add_folders(char **folders, char *extra_folders)/*{{{*/
+{
+ /* note : extra_pointers is stale after this routine exits. */
+
+ if (!*folders) {
+ *folders = extra_folders;
+ } else {
+ char *old_folders = *folders;
+ char *new_folders;
+ int old_len, extra_len;
+ old_len = strlen(old_folders);
+ extra_len = strlen(extra_folders);
+ new_folders = new_array(char, old_len + extra_len + 2);
+ strcpy(new_folders, old_folders);
+ strcpy(new_folders + old_len, ":");
+ strcpy(new_folders + old_len + 1, extra_folders);
+ *folders = new_folders;
+ free(old_folders);
+ }
+}
+/*}}}*/
+static void parse_output_folder(char *p)/*{{{*/
+{
+ char *temp;
+ temp = copy_value(p);
+ if (!strncasecmp(temp, "mh", 2)) {
+ output_folder_type = FT_MH;
+ } else if (!strncasecmp(temp, "maildir", 7)) {
+ output_folder_type = FT_MAILDIR;
+ } else if (!strncasecmp(temp, "raw", 3)) {
+ output_folder_type = FT_RAW;
+ } else if (!strncasecmp(temp, "excerpt", 3)) {
+ output_folder_type = FT_EXCERPT;
+ } else if (!strncasecmp(temp, "mbox", 4)) {
+ output_folder_type = FT_MBOX;
+ }
+ else {
+ fprintf(stderr, "Unrecognized mformat <%s>\n", temp);
+ }
+ free(temp);
+}
+/*}}}*/
+static void parse_rc_file(char *name)/*{{{*/
+{
+ FILE *in;
+ char line[4096], *p;
+ int len, lineno;
+ int all_blank;
+ int used_default_name = 0;
+
+ if (!name) {
+ /* open default file */
+ struct passwd *pw;
+ char *home;
+ home = getenv("HOME");
+ if (!home) {
+ pw = getpwuid(getuid());
+ if (!pw) {
+ fprintf(stderr, "Cannot determine home directory\n");
+ exit(2);
+ }
+ home = pw->pw_dir;
+ }
+ name = new_array(char, strlen(home) + 12);
+ strcpy(name, home);
+ strcat(name, "/.mairixrc");
+ used_default_name = 1;
+ }
+
+ in = fopen(name, "r");
+ if (!in) {
+ fprintf(stderr, "Cannot open %s, exiting\n", name);
+ exit(2);
+ }
+
+ lineno = 0;
+ while(fgets(line, sizeof(line), in)) {
+ lineno++;
+ len = strlen(line);
+ if (len > sizeof(line) - 4) {
+ fprintf(stderr, "Line %d in %s too long, exiting\n", lineno, name);
+ exit(2);
+ }
+
+ if (line[len-1] == '\n') {
+ line[len-1] = '\0';
+ }
+
+ /* Strip trailing comments. */
+ for (p=line; *p && !strchr("#!;%", *p); p++) ;
+ if (*p) *p = '\0';
+
+ /* Discard blank lines */
+ all_blank = 1;
+ for (p=line; *p; p++) {
+ if (!isspace(*(unsigned char *)p)) {
+ all_blank = 0;
+ break;
+ }
+ }
+
+ if (all_blank) continue;
+
+ /* Now a real line to parse */
+ if (!strncasecmp(p, "base", 4)) folder_base = copy_value(p);
+ else if (!strncasecmp(p, "folders", 7)) {
+ fprintf(stderr, "'folders=' option in rc file is depracated, use 'maildir='\n");
+ add_folders(&maildir_folders, copy_value(p));
+ }
+ else if (!strncasecmp(p, "maildir=", 8)) add_folders(&maildir_folders, copy_value(p));
+ else if (!strncasecmp(p, "mh_folders=", 11)) {
+ fprintf(stderr, "'mh_folders=' option in rc file is depracated, use 'mh='\n");
+ add_folders(&mh_folders, copy_value(p));
+ }
+ else if (!strncasecmp(p, "mh=", 3)) add_folders(&mh_folders, copy_value(p));
+ else if (!strncasecmp(p, "mbox=", 5)) add_folders(&mboxen, copy_value(p));
+ else if (!strncasecmp(p, "omit=", 5)) add_folders(&omit, copy_value(p));
+
+ else if (!strncasecmp(p, "mformat=", 8)) {
+ parse_output_folder(p);
+ }
+ else if (!strncasecmp(p, "mfolder=", 8)) mfolder = copy_value(p);
+ else if (!strncasecmp(p, "database=", 9)) database_path = copy_value(p);
+ else if (!strncasecmp(p, "nochecks", 8)) skip_integrity_checks = 1;
+ else {
+ if (verbose) {
+ fprintf(stderr, "Unrecognized option at line %d in %s\n", lineno, name);
+ }
+ }
+ }
+
+ fclose(in);
+
+ if (used_default_name) free(name);
+}
+/*}}}*/
+static int compare_strings(const void *a, const void *b)/*{{{*/
+{
+ const char **aa = (const char **) a;
+ const char **bb = (const char **) b;
+ return strcmp(*aa, *bb);
+}
+/*}}}*/
+static int check_message_list_for_duplicates(struct msgpath_array *msgs)/*{{{*/
+{
+ /* Caveat : only examines the file-per-message case */
+ char **sorted_paths;
+ int i, n, nn;
+ int result;
+
+ n = msgs->n;
+ sorted_paths = new_array(char *, n);
+ for (i=0, nn=0; i<n; i++) {
+ switch (msgs->type[i]) {
+ case MTY_MBOX:
+ break;
+ case MTY_DEAD:
+ assert(0);
+ break;
+ case MTY_FILE:
+ sorted_paths[nn++] = msgs->paths[i].src.mpf.path;
+ break;
+ }
+ }
+ qsort(sorted_paths, nn, sizeof(char *), compare_strings);
+
+ result = 0;
+ for (i=1; i<nn; i++) {
+ if (!strcmp(sorted_paths[i-1], sorted_paths[i])) {
+ result = 1;
+ break;
+ }
+ }
+
+ free(sorted_paths);
+ return result;
+}
+/*}}}*/
+
+static void emit_int(int x)/*{{{*/
+{
+ char buf1[20], buf2[20];
+ char *p, *q;
+ int neg=0;
+ p = buf1;
+ *p = '0'; /* In case x is zero */
+ if (x < 0) {
+ neg = 1;
+ x = -x;
+ }
+ while (x) {
+ *p++ = '0' + (x % 10);
+ x /= 10;
+ }
+ p--;
+ q = buf2;
+ if (neg) *q++ = '-';
+ while (p >= buf1) {
+ *q++ = *p--;
+ }
+ write(2, buf2, q-buf2);
+ return;
+}
+/*}}}*/
+void out_of_mem(char *file, int line, size_t size)/*{{{*/
+{
+ /* Hairy coding ahead - can't use any [s]printf, itoa etc because
+ * those might try to use the heap! */
+
+ int filelen;
+ char *p;
+
+ static char msg1[] = "Out of memory (at ";
+ static char msg2[] = " bytes)\n";
+ /* Perhaps even strlen is unsafe in this situation? */
+ p = file;
+ while (*p) p++;
+ filelen = p - file;
+ write(2, msg1, sizeof(msg1));
+ write(2, file, filelen);
+ write(2, ":", 1);
+ emit_int(line);
+ write(2, ", ", 2);
+ emit_int(size);
+ write(2, msg2, sizeof(msg2));
+ exit(2);
+}
+/*}}}*/
+void report_error(const char *str, const char *filename)/*{{{*/
+{
+ if (filename) {
+ int len = strlen(str) + strlen(filename) + 4;
+ char *t;
+ t = new_array(char, len);
+ sprintf(t, "%s '%s'", str, filename);
+ perror(t);
+ free(t);
+ } else {
+ perror(str);
+ }
+}
+/*}}}*/
+static void print_copyright(void)/*{{{*/
+{
+ fprintf(stderr,
+ "mairix %s, Copyright (C) 2002-2010 Richard P. Curnow\n"
+ "mairix comes with ABSOLUTELY NO WARRANTY.\n"
+ "This is free software, and you are welcome to redistribute it\n"
+ "under certain conditions; see the GNU General Public License for details.\n\n",
+ PROGRAM_VERSION);
+}
+/*}}}*/
+static void print_version(void)/*{{{*/
+{
+ fprintf(stdout,
+ "mairix %s\n",
+ PROGRAM_VERSION);
+}
+/*}}}*/
+static void handlesig(int signo)/*{{{*/
+{
+ unlock_and_exit(7);
+}
+/*}}}*/
+static void usage(void)/*{{{*/
+{
+ print_copyright();
+
+ printf("mairix [-h] : Show help\n"
+ "mairix [-f <rcfile>] [-v] [-p] [-F] : Build index\n"
+ "mairix [-f <rcfile>] [-a] [-t] expr1 ... exprN : Run search\n"
+ "mairix [-f <rcfile>] -d : Dump database to stdout\n"
+ "-h : show this help\n"
+ "-f <rcfile> : use alternative rc file (default ~/.mairixrc)\n"
+ "-V : show version\n"
+ "-v : be verbose\n"
+ "-p : purge messages that no longer exist\n"
+ "-F : fast scan for maildir and MH folders (no mtime or size checks)\n"
+ "-a : add new matches to match folder (default : clear it first)\n"
+ "-x : display excerpt of message headers (default : use match folder)\n"
+ "-t : include all messages in same threads as matching messages\n"
+ "-o <mfolder> : override setting of mfolder from mairixrc file\n"
+ "-r : force raw output regardless of mformat setting in mairixrc file\n"
+ "-H : force hard links rather than symbolic ones\n"
+ "expr_i : search expression (all expr's AND'ed together):\n"
+ " word : match word in message body and major headers\n"
+ " t:word : match word in To: header\n"
+ " c:word : match word in Cc: header\n"
+ " f:word : match word in From: header\n"
+ " a:word : match word in To:, Cc: or From: headers (address)\n"
+ " s:word : match word in Subject: header\n"
+ " b:word : match word in message body\n"
+ " m:word : match word in Message-ID: header\n"
+ " n:word : match name of attachment within message\n"
+ " F:flags : match on message flags (s=seen,r=replied,f=flagged,-=negate)\n"
+ " p:substring : match substring of path\n"
+ " d:start-end : match date range\n"
+ " z:low-high : match messages in size range\n"
+ " bs:word : match word in Subject: header or body (or any other group of prefixes)\n"
+ " s:word1,word2 : match both words in Subject:\n"
+ " s:word1/word2 : match either word or both words in Subject:\n"
+ " s:~word : match messages not containing word in Subject:\n"
+ " s:substring= : match substring in any word in Subject:\n"
+ " s:^substring= : match left-anchored substring in any word in Subject:\n"
+ " s:substring=2 : match substring with <=2 errors in any word in Subject:\n"
+ "\n"
+ " (See documentation for more examples)\n"
+ );
+}
+ /*}}}*/
+/* Notes on folder management: {{{
+
+ Assumption is that the user wants to keep the 'mfolder' directories under a
+ common root with the real maildir folders. This allows a common value for
+ mutt's 'folder' variable => the '+' and '=' prefixes work better. This
+ means the indexer here can't just scan down all subdirectories of a single
+ ancestor, because it'll pick up its own mfolders. So, use environment
+ variables to tailor the folders.
+
+ MAIRIX_FOLDER_BASE is the common parent directory of the folders (aka
+ mutt's 'folder' variable)
+
+ MAIRIX_MAILDIR_FOLDERS, MAIRIX_MH_FOLDERS, MAIRIX_MBOXEN are
+ colon-separated lists of folders to index, with '...' after a
+ component meaning any maildir underneath it.
+
+ MAIRIX_MFOLDER is the folder to put the match data.
+
+ For example, if
+ MAIRIX_FOLDER_BASE = "/home/foobar/mail"
+ MAIRIX_FOLDERS = "inbox:lists...:action:archive..."
+ MAIRIX_MFOLDER = "mf"
+
+ then /home/foobar/mail/mf/{new,cur,tmp} contain the output of the search.
+ }}} */
+
+int main (int argc, char **argv)/*{{{*/
+{
+ struct msgpath_array *msgs;
+ struct database *db = NULL;
+
+ char *arg_rc_file_path = NULL;
+ char *arg_mfolder = NULL;
+ char *e;
+ int do_augment = 0;
+ int do_threads = 0;
+ int do_search = 0;
+ int do_purge = 0;
+ int any_updates = 0;
+ int any_purges = 0;
+ int do_help = 0;
+ int do_raw_output = 0;
+ int do_excerpt_output = 0;
+ int do_dump = 0;
+ int do_integrity_checks = 1;
+ int do_forced_unlock = 0;
+ int do_fast_index = 0;
+
+ unsigned int forced_hash_key = CREATE_RANDOM_DATABASE_HASH;
+
+ struct globber_array *omit_globs;
+
+ int result;
+
+ setlocale(LC_CTYPE, "");
+
+ while (++argv, --argc) {
+ if (!*argv) {
+ break;
+ } else if (!strcmp(*argv, "-f") || !strcmp(*argv, "--rcfile")) {
+ ++argv, --argc;
+ if (!argc) {
+ fprintf(stderr, "No filename given after -f argument\n");
+ exit(1);
+ }
+ arg_rc_file_path = *argv;
+ } else if (!strcmp(*argv, "-t") || !strcmp(*argv, "--threads")) {
+ do_search = 1;
+ do_threads = 1;
+ } else if (!strcmp(*argv, "-a") || !strcmp(*argv, "--augment")) {
+ do_search = 1;
+ do_augment = 1;
+ } else if (!strcmp(*argv, "-o") || !strcmp(*argv, "--mfolder")) {
+ ++argv, --argc;
+ if (!argc) {
+ fprintf(stderr, "No folder name given after -o argument\n");
+ exit(1);
+ }
+ arg_mfolder = *argv;
+ } else if (!strcmp(*argv, "-p") || !strcmp(*argv, "--purge")) {
+ do_purge = 1;
+ } else if (!strcmp(*argv, "-d") || !strcmp(*argv, "--dump")) {
+ do_dump = 1;
+ } else if (!strcmp(*argv, "-r") || !strcmp(*argv, "--raw-output")) {
+ do_raw_output = 1;
+ } else if (!strcmp(*argv, "-x") || !strcmp(*argv, "--excerpt-output")) {
+ do_excerpt_output = 1;
+ } else if (!strcmp(*argv, "-H") || !strcmp(*argv, "--force-hardlinks")) {
+ do_hardlinks = 1;
+ } else if (!strcmp(*argv, "-Q") || !strcmp(*argv, "--no-integrity-checks")) {
+ do_integrity_checks = 0;
+ } else if (!strcmp(*argv, "--unlock")) {
+ do_forced_unlock = 1;
+ } else if (!strcmp(*argv, "-F") ||
+ !strcmp(*argv, "--fast-index")) {
+ do_fast_index = 1;
+ } else if (!strcmp(*argv, "--force-hash-key-new-database")) {
+ ++argv, --argc;
+ if (!argc) {
+ fprintf(stderr, "No hash key given after --force-hash-key-new-database\n");
+ exit(1);
+ }
+ if ( 1 != sscanf(*argv, "%u", &forced_hash_key) )
+ {
+ fprintf(stderr, "Hash key given after --force-hash-key-new-database could not be parsed\n");
+ exit(1);
+ }
+ } else if (!strcmp(*argv, "-v") || !strcmp(*argv, "--verbose")) {
+ verbose = 1;
+ } else if (!strcmp(*argv, "-V") || !strcmp(*argv, "--version")) {
+ print_version();
+ exit(0);
+ } else if (!strcmp(*argv, "-h") ||
+ !strcmp(*argv, "--help")) {
+ do_help = 1;
+ } else if ((*argv)[0] == '-') {
+ fprintf(stderr, "Unrecognized option %s\n", *argv);
+ } else if (!strcmp(*argv, "--")) {
+ /* End of args */
+ break;
+ } else {
+ /* standard args start */
+ break;
+ }
+ }
+
+ if (do_help) {
+ usage();
+ exit(0);
+ }
+
+ if (verbose) {
+ print_copyright();
+ }
+
+ if (*argv) {
+ /* There are still args to process */
+ do_search = 1;
+ }
+
+ parse_rc_file(arg_rc_file_path);
+
+ if (getenv("MAIRIX_FOLDER_BASE")) {
+ folder_base = getenv("MAIRIX_FOLDER_BASE");
+ }
+
+ if (getenv("MAIRIX_MAILDIR_FOLDERS")) {
+ maildir_folders = getenv("MAIRIX_MAIDIR_FOLDERS");
+ }
+
+ if (getenv("MAIRIX_MH_FOLDERS")) {
+ mh_folders = getenv("MAIRIX_MH_FOLDERS");
+ }
+
+ if ((e = getenv("MAIRIX_MBOXEN"))) {
+ mboxen = e;
+ }
+
+ if (getenv("MAIRIX_MFOLDER")) {
+ mfolder = getenv("MAIRIX_MFOLDER");
+ }
+
+ if (getenv("MAIRIX_DATABASE")) {
+ database_path = getenv("MAIRIX_DATABASE");
+ }
+
+ if (arg_mfolder) {
+ mfolder = arg_mfolder;
+ }
+
+ if (skip_integrity_checks) {
+ do_integrity_checks = 0;
+ }
+
+ if (!folder_base) {
+ fprintf(stderr, "No folder_base/MAIRIX_FOLDER_BASE set\n");
+ exit(2);
+ }
+
+ if (!database_path) {
+ fprintf(stderr, "No database/MAIRIX_DATABASE set\n");
+ exit(2);
+ }
+
+ if (do_raw_output) {
+ output_folder_type = FT_RAW;
+ } else if (do_excerpt_output) {
+ output_folder_type = FT_EXCERPT;
+ }
+
+ if (omit) {
+ omit_globs = colon_sep_string_to_globber_array(omit);
+ } else {
+ omit_globs = NULL;
+ }
+
+ /* Lock database.
+ * Prevent concurrent updates due to parallel indexing (e.g. due to stuck
+ * cron jobs).
+ * Prevent concurrent searching and indexing. */
+
+ signal(SIGHUP, handlesig);
+ signal(SIGINT, handlesig);
+ signal(SIGQUIT, handlesig);
+
+ lock_database(database_path, do_forced_unlock);
+
+ if (do_dump) {
+ dump_database(database_path);
+ result = 0;
+
+ } else if (do_search) {
+ int len;
+ char *complete_mfolder;
+ enum filetype ftype;
+
+ if (!mfolder) {
+ switch (output_folder_type) {
+ case FT_RAW:
+ case FT_EXCERPT:
+ break;
+ default:
+ fprintf(stderr, "No mfolder/MAIRIX_MFOLDER set\n");
+ unlock_and_exit(2);
+ }
+ mfolder = new_string("");
+ }
+
+ /* complete_mfolder is needed by search_top() and member_of() so
+ compute it once here rather than in search_top() as well */
+ if ((mfolder[0] == '/') ||
+ ((mfolder[0] == '.') && (mfolder[1] == '/'))) {
+ complete_mfolder = new_string(mfolder);
+ } else {
+ len = strlen(folder_base) + strlen(mfolder) + 2;
+ complete_mfolder = new_array(char, len);
+ strcpy(complete_mfolder, folder_base);
+ strcat(complete_mfolder, "/");
+ strcat(complete_mfolder, mfolder);
+ }
+ /* check whether mfolder output would destroy a mail folder or mbox */
+ switch (output_folder_type) {
+ case FT_RAW:
+ case FT_EXCERPT:
+ break;
+ default:
+ if ((member_of(complete_mfolder,folder_base, maildir_folders, FT_MAILDIR, omit_globs)||
+ member_of (complete_mfolder, folder_base, mh_folders, FT_MH, omit_globs) ||
+ member_of (complete_mfolder, folder_base, mboxen, FT_MBOX, omit_globs))) {
+ fprintf (stderr,
+ "You asked search results to go to the folder '%s'.\n"
+ "That folder appears to be one of the indexed mail folders!\n"
+ "For your own good, I refuse to output search results to an indexed mail folder.\n",
+ mfolder);
+ unlock_and_exit(3);
+ }
+ }
+
+ ftype = classify_file(database_path);
+ if (ftype != M_FILE) {
+ fprintf(stderr, "No database file '%s' is present.\nYou need to do an indexing run first.\n",
+ database_path);
+ unlock_and_exit(3);
+ }
+ result = search_top(do_threads, do_augment, database_path, complete_mfolder, argv, output_folder_type, verbose);
+
+ } else {
+ enum filetype ftype;
+
+ if (!maildir_folders && !mh_folders && !mboxen) {
+ fprintf(stderr, "No [mh_]folders/mboxen/MAIRIX_[MH_]FOLDERS set\n");
+ unlock_and_exit(2);
+ }
+
+ if (verbose) printf("Finding all currently existing messages...\n");
+ msgs = new_msgpath_array();
+ if (maildir_folders) {
+ build_message_list(folder_base, maildir_folders, FT_MAILDIR, msgs, omit_globs);
+ }
+ if (mh_folders) {
+ build_message_list(folder_base, mh_folders, FT_MH, msgs, omit_globs);
+ }
+
+ /* The next call sorts the msgs array as part of looking for duplicates. */
+ if (check_message_list_for_duplicates(msgs)) {
+ fprintf(stderr, "Message list contains duplicates - check your 'folders' setting\n");
+ unlock_and_exit(2);
+ }
+
+ /* Try to open existing database */
+ ftype = classify_file(database_path);
+ if (ftype == M_FILE) {
+ if (verbose) printf("Reading existing database...\n");
+ db = new_database_from_file(database_path, do_integrity_checks);
+ if (verbose) printf("Loaded %d existing messages\n", db->n_msgs);
+ } else if (ftype == M_NONE) {
+ if (verbose) printf("Starting new database\n");
+ db = new_database( forced_hash_key );
+ } else {
+ fprintf(stderr, "database path %s is not a file; you can't put the database there\n", database_path);
+ unlock_and_exit(2);
+ }
+
+ build_mbox_lists(db, folder_base, mboxen, omit_globs);
+
+ any_updates = update_database(db, msgs->paths, msgs->n, do_fast_index);
+ if (do_purge) {
+ any_purges = cull_dead_messages(db, do_integrity_checks);
+ }
+ if (any_updates || any_purges) {
+ /* For now write it every time. This is obviously the most reliable method. */
+ write_database(db, database_path, do_integrity_checks);
+ }
+
+#if 0
+ get_db_stats(db);
+#endif
+
+ free_database(db);
+ free_msgpath_array(msgs);
+
+ result = 0;
+ }
+
+ unlock_database();
+
+ return result;
+}
+/*}}}*/
diff --git a/src/mairix/mairix.h b/src/mairix/mairix.h
@@ -0,0 +1,402 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+
+#ifndef MAIRIX_H
+#define MAIRIX_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "memmac.h"
+
+struct msgpath {/*{{{*/
+ /* The 'selector' for this union is the corresponding entry of type 'enum
+ * message_type' */
+ union {
+ struct {
+ char *path;
+ size_t size; /* size of the message in bytes */
+ time_t mtime; /* mtime of message file on disc */
+ } mpf; /* message per file */
+ struct {
+ int file_index; /* index into table of mbox files */
+ int msg_index; /* index of message within the file */
+ } mbox; /* for messages in mbox format folders */
+ } src;
+
+ /* Now fields that are common to both types of message. */
+ time_t date; /* representation of Date: header in message */
+ int tid; /* thread-id */
+
+ /* Message flags. */
+ unsigned int seen:1;
+ unsigned int replied:1;
+ unsigned int flagged:1;
+
+ /* + other stuff eventually */
+};
+/*}}}*/
+
+enum message_type {/*{{{*/
+ MTY_DEAD, /* msg no longer exists, i.e. don't report in searches,
+ prune it on a '-p' run. */
+ MTY_FILE, /* msg <-> file in 1-1 correspondence e.g. maildir, MH */
+ MTY_MBOX /* multiple msgs per file : MBOX format file */
+};
+/*}}}*/
+struct msgpath_array {/*{{{*/
+ enum message_type *type;
+ struct msgpath *paths;
+ int n;
+ int max;
+};
+/*}}}*/
+
+struct matches {/*{{{*/
+ unsigned char *msginfo;
+ int n; /* bytes in use */
+ int max; /* bytes allocated */
+ unsigned long highest;
+};
+/*}}}*/
+struct token {/*{{{*/
+ char *text;
+ unsigned long hashval;
+ /* to store delta-compressed info of which msgpaths match the token */
+ struct matches match0;
+};
+/*}}}*/
+struct token2 {/*{{{*/
+ char *text;
+ unsigned long hashval;
+ /* to store delta-compressed info of which msgpaths match the token */
+ struct matches match0;
+ struct matches match1;
+};
+/*}}}*/
+struct toktable {/*{{{*/
+ struct token **tokens;
+ int n; /* # in use */
+ int size; /* # allocated */
+ unsigned int mask; /* for masking down hash values */
+ int hwm; /* number to have before expanding */
+};
+/*}}}*/
+struct toktable2 {/*{{{*/
+ struct token2 **tokens;
+ int n; /* # in use */
+ int size; /* # allocated */
+ unsigned int mask; /* for masking down hash values */
+ int hwm; /* number to have before expanding */
+};
+/*}}}*/
+
+enum content_type {/*{{{*/
+ CT_TEXT_PLAIN,
+ CT_TEXT_HTML,
+ CT_TEXT_OTHER,
+ CT_MESSAGE_RFC822,
+ CT_OTHER
+};
+/*}}}*/
+struct rfc822;
+struct attachment {/*{{{*/
+ struct attachment *next;
+ struct attachment *prev;
+ enum content_type ct;
+ char *filename;
+ union attachment_body {
+ struct normal_attachment_body {
+ int len;
+ char *bytes;
+ } normal;
+ struct rfc822 *rfc822;
+ } data;
+};
+/*}}}*/
+struct headers {/*{{{*/
+ char *to;
+ char *cc;
+ char *from;
+ char *subject;
+
+ /* The following are needed to support threading */
+ char *message_id;
+ char *in_reply_to;
+ char *references;
+
+ struct {
+ unsigned int seen:1;
+ unsigned int replied:1;
+ unsigned int flagged:1;
+ } flags;
+
+ time_t date;
+};
+/*}}}*/
+struct rfc822 {/*{{{*/
+ struct headers hdrs;
+ struct attachment atts;
+};
+/*}}}*/
+
+typedef char checksum_t[16];
+
+struct message_list {/*{{{*/
+ struct message_list *next;
+ off_t start;
+ size_t len;
+};
+/*}}}*/
+struct mbox {/*{{{*/
+ /* If path==NULL, this indicates that the mbox is dead, i.e. no longer
+ * exists. */
+ char *path;
+ /* As read in from database (i.e. current last time mairix scan was run.) */
+ time_t file_mtime;
+ size_t file_size;
+ /* As found in the filesystem now. */
+ time_t current_mtime;
+ size_t current_size;
+ /* After reconciling a loaded database with what's on the disc, this entry
+ stores how many of the msgs that used to be there last time are still
+ present at the head of the file. Thus, all messages beyond that are
+ treated as dead, and scanning starts at that point to find 'new' messages
+ (whch may actually be old ones that have moved, but they're treated as
+ new.) */
+ int n_old_msgs_valid;
+
+ /* Hold list of new messages and their number. Number is temporary -
+ * eventually just list walking in case >=2 have to be reattached. */
+ struct message_list *new_msgs;
+ int n_new_msgs;
+
+ int n_so_far; /* Used during database load. */
+
+ int n_msgs; /* Number of entries in 'start' and 'len' */
+ int max_msgs; /* Allocated size of 'start' and 'len' */
+ /* File offset to the start of each message (first line of real header, not to mbox 'From ' line) */
+ off_t *start;
+ /* Length of each message */
+ size_t *len;
+ /* Checksums on whole messages. */
+ checksum_t *check_all;
+
+};
+/*}}}*/
+struct database {/*{{{*/
+ /* Used to hold an entire mapping between an array of filenames, each
+ containing a single message, and the sets of tokens that occur in various
+ parts of those messages */
+
+ enum message_type *type;
+ struct msgpath *msgs; /* Paths to messages */
+ int n_msgs; /* Number in use */
+ int max_msgs; /* Space allocated */
+
+ struct mbox *mboxen;
+ int n_mboxen; /* number in use. */
+ int max_mboxen; /* space allocated */
+
+ /* Seed for hashing in the token tables. Randomly created for
+ * each new database - avoid DoS attacks through carefully
+ * crafted messages. */
+ unsigned int hash_key;
+
+ /* Token tables */
+ struct toktable *to;
+ struct toktable *cc;
+ struct toktable *from;
+ struct toktable *subject;
+ struct toktable *body;
+ struct toktable *attachment_name;
+
+ /* Encoding chain 0 stores all msgids appearing in the following message headers:
+ * Message-Id, In-Reply-To, References. Used for thread reconciliation.
+ * Encoding chain 1 stores just the Message-Id. Used for search by message ID.
+ */
+ struct toktable2 *msg_ids;
+};
+/*}}}*/
+
+enum folder_type {/*{{{*/
+ FT_MAILDIR,
+ FT_MH,
+ FT_MBOX,
+ FT_RAW,
+ FT_EXCERPT
+};
+/*}}}*/
+
+struct string_list {/*{{{*/
+ struct string_list *next;
+ struct string_list *prev;
+ char *data;
+};
+/*}}}*/
+
+struct msg_src {
+ enum {MS_FILE, MS_MBOX} type;
+ char *filename;
+ off_t start;
+ size_t len;
+};
+
+/* Outcomes of checking a filename/dirname to see whether to keep on looking
+ * at filenames within this dir. */
+enum traverse_check {
+ TRAV_PROCESS, /* Continue looking at this entry */
+ TRAV_IGNORE, /* Ignore just this dir entry */
+ TRAV_FINISH /* Ignore this dir entry and don't bother looking at the rest of the directory */
+};
+
+struct traverse_methods {
+ int (*filter)(const char *, const struct stat *);
+ enum traverse_check (*scrutinize)(int, const char *);
+};
+
+extern struct traverse_methods maildir_traverse_methods;
+extern struct traverse_methods mh_traverse_methods;
+extern struct traverse_methods mbox_traverse_methods;
+
+extern int verbose; /* cmd line -v switch */
+extern int do_hardlinks; /* cmd line -H switch */
+
+/* Lame fix for systems where NAME_MAX isn't defined after including the above
+ * set of .h files (Solaris, FreeBSD so far). Probably grossly oversized but
+ * it'll do. */
+
+#if !defined(NAME_MAX)
+#define NAME_MAX 4096
+#endif
+
+/* In glob.c */
+struct globber;
+struct globber_array;
+
+struct globber *make_globber(const char *wildstring);
+void free_globber(struct globber *old);
+int is_glob_match(struct globber *g, const char *s);
+struct globber_array *colon_sep_string_to_globber_array(const char *in);
+int is_globber_array_match(struct globber_array *ga, const char *s);
+void free_globber_array(struct globber_array *in);
+
+/* In hash.c */
+unsigned int hashfn( unsigned char *k, unsigned int length, unsigned int initval);
+
+/* In dirscan.c */
+struct msgpath_array *new_msgpath_array(void);
+int valid_mh_filename_p(const char *x);
+void free_msgpath_array(struct msgpath_array *x);
+void string_list_to_array(struct string_list *list, int *n, char ***arr);
+void split_on_colons(const char *str, int *n, char ***arr);
+void build_message_list(char *folder_base, char *folders, enum folder_type ft,
+ struct msgpath_array *msgs, struct globber_array *omit_globs);
+
+/* In rfc822.c */
+struct rfc822 *make_rfc822(char *filename);
+void free_rfc822(struct rfc822 *msg);
+enum data_to_rfc822_error {
+ DTR8_OK,
+ DTR8_MISSING_END, /* missing endpoint marker. */
+ DTR8_MULTIPART_SANS_BOUNDARY, /* multipart with no boundary string defined */
+ DTR8_BAD_HEADERS, /* corrupt headers */
+ DTR8_BAD_ATTACHMENT /* corrupt attachment (e.g. no body part) */
+};
+struct rfc822 *data_to_rfc822(struct msg_src *src, char *data, int length, enum data_to_rfc822_error *error);
+void create_ro_mapping(const char *filename, unsigned char **data, int *len);
+void free_ro_mapping(unsigned char *data, int len);
+char *format_msg_src(struct msg_src *src);
+
+/* In tok.c */
+struct toktable *new_toktable(void);
+struct toktable2 *new_toktable2(void);
+void free_token(struct token *x);
+void free_token2(struct token2 *x);
+void free_toktable(struct toktable *x);
+void free_toktable2(struct toktable2 *x);
+void add_token_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable *table);
+void check_and_enlarge_encoding(struct matches *m);
+void insert_index_on_encoding(struct matches *m, int idx);
+void add_token2_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable2 *table, int add_to_chain1);
+
+/* In db.c */
+#define CREATE_RANDOM_DATABASE_HASH 0
+struct database *new_database(unsigned int hash_key);
+struct database *new_database_from_file(char *db_filename, int do_integrity_checks);
+void free_database(struct database *db);
+void maybe_grow_message_arrays(struct database *db);
+void tokenise_message(int file_index, struct database *db, struct rfc822 *msg);
+int update_database(struct database *db, struct msgpath *sorted_paths, int n_paths, int do_fast_index);
+void check_database_integrity(struct database *db);
+int cull_dead_messages(struct database *db, int do_integrity_checks);
+
+/* In mbox.c */
+void build_mbox_lists(struct database *db, const char *folder_base,
+ const char *mboxen_paths, struct globber_array *omit_globs);
+int add_mbox_messages(struct database *db);
+void compute_checksum(const char *data, size_t len, checksum_t *csum);
+void cull_dead_mboxen(struct database *db);
+unsigned int encode_mbox_indices(unsigned int mb, unsigned int msg);
+void decode_mbox_indices(unsigned int index, unsigned int *mb, unsigned int *msg);
+int verify_mbox_size_constraints(struct database *db);
+void glob_and_expand_paths(const char *folder_base, char **paths_in, int n_in, char ***paths_out, int *n_out, const struct traverse_methods *methods, struct globber_array *omit_globs);
+
+/* In glob.c */
+struct globber;
+
+struct globber *make_globber(const char *wildstring);
+void free_globber(struct globber *old);
+int is_glob_match(struct globber *g, const char *s);
+
+/* In writer.c */
+void write_database(struct database *db, char *filename, int do_integrity_checks);
+
+/* In search.c */
+int search_top(int do_threads, int do_augment, char *database_path, char *complete_mfolder, char **argv, enum folder_type ft, int verbose);
+
+/* In stats.c */
+void get_db_stats(struct database *db);
+
+/* In dates.c */
+int scan_date_string(char *in, time_t *start, int *has_start, time_t *end, int *has_end);
+
+/* In dumper.c */
+void dump_database(char *filename);
+
+/* In strexpand.c */
+char *expand_string(const char *p);
+
+/* In dotlock.c */
+void lock_database(char *path, int forced_unlock);
+void unlock_database(void);
+void unlock_and_exit(int code);
+
+/* In mairix.c */
+void report_error(const char *str, const char *filename);
+
+#endif /* MAIRIX_H */
diff --git a/src/mairix/mairix.spec b/src/mairix/mairix.spec
@@ -0,0 +1,45 @@
+Name: mairix
+Summary: A maildir indexer and searcher
+Version: 0.23
+Release: 1
+Source: %{name}-%{version}.tar.gz
+License: GPL
+Group: Application/Internet
+Packager: Richard P. Curnow
+BuildRoot: %{_tmppath}/%{name}-%{version}-root-%(id -u -n)
+Requires: info
+URL: http://www.rc0.org.uk/mairix
+
+%description
+mairix is a tool for indexing email messages stored in maildir format folders
+and performing fast searches on the resulting index. The output is a new
+maildir folder containing symbolic links to the matched messages.
+
+%prep
+%setup -q
+
+%build
+CFLAGS="$RPM_OPT_FLAGS" ./configure --prefix=%{_prefix}
+make
+
+%install
+rm -rf $RPM_BUILD_ROOT
+cd $RPM_BUILD_DIR/mairix-%{version}
+make install DESTDIR=$RPM_BUILD_ROOT mandir=$RPM_BUILD_ROOT/%{_mandir}
+cp README dotmairixrc.eg ..
+
+%files
+%{_bindir}/mairix
+%doc README
+%doc dotmairixrc.eg
+%doc %{_mandir}/man1/mairix.1.gz
+%doc %{_mandir}/man5/mairixrc.5.gz
+
+%changelog
+* Fri Mar 24 2006 Andre Costa <blueser@gmail.com> - 0.18
+- Updated to version 0.18
+- Included URL on header
+- removed references to 'mairix.txt', 'mairix.html' and 'mairix.info'
+- .info files have been deprecated
+- removed useless 'post' section
+- makefile's "mandir" is pointing to /usr/man instead of /usr/share/man
diff --git a/src/mairix/mairixrc.5 b/src/mairix/mairixrc.5
@@ -0,0 +1,405 @@
+.TH MAIRIXRC 5 "January 2006"
+.de Sx
+.PP
+.ne \\$1
+.nf
+.na
+.RS 12
+..
+.de Ex
+.RE
+.fi
+.ad
+.IP "" 7
+..
+.SH NAME
+mairixrc \- configuration file for mairix(1)
+.SH SYNOPSIS
+$HOME/.mairixrc
+.SH DESCRIPTION
+.PP
+The
+.I mairixrc
+file tells
+.B mairix
+where your mail folders are located. It also tells
+.B mairix
+where the results of searches are to be written.
+
+.B mairix
+searches for this file at
+.I $HOME/.mairixrc
+unless the
+.B -f
+option is used.
+
+The directives
+.BR base ,
+.BR mfolder ,
+and
+.B database
+must always appear in the file. There must also be some folder definitions
+(using the
+.BR maildir ,
+.BR mh ,
+or
+.BR mbox )
+directives.
+
+.SS Comments
+Any line starting with a '#' character is treated as a comment.
+
+.SS Directives
+.TP
+.BI base= base-directory
+.br
+This defines the path to the common parent directory of all your
+maildir folders.
+
+If the path is relative, it is treated as relative to the location of the
+.I mairixrc
+file.
+
+.TP
+.BI maildir= list-of-folder-specifications
+This is a colon-separated list of the Maildir folders (relative to
+`base') that you want indexed. Any entry that ends `...' is
+recursively scanned to find any Maildir folders underneath it.
+
+More than one line starting with `maildir' can be included. In
+this case, mairix joins the lines together with colons as though a
+single list of folders had been given on a single very long line.
+
+Each colon-separated entry may be a wildcard. See the discussion
+under mbox (below) for the wildcard syntax. For example
+.Sx 1
+maildir=zzz/foo*...
+.Ex
+will match maildir folders like these (relative to the
+.IR base-directory )
+.Sx 4
+zzz/foobar/xyz
+zzz/fooquux
+zzz/foo
+zzz/fooabc/u/v/w
+.Ex
+
+and
+.Sx 1
+maildir=zzz/foo[abc]*
+.Ex
+will match maildir folders like these (relative to the folder_base)
+.Sx 4
+zzz/fooa
+zzz/fooaaaxyz
+zzz/foobcd
+zzz/fooccccccc
+.Ex
+If a folder name contains a colon, you can write this by using the
+sequence '\\:' to escape the colon. Otherwise, the backslash
+character is treated normally. (If the folder name actually
+contains the sequence '\\:', you're out of luck.)
+
+.TP
+.BI mh= list-of-folder-specifications
+.br
+This is a colon-separated list of the MH folders (relative to
+`base') that you want indexed. Any entry that ends '...' is
+recursively scanned to find any MH folders underneath it.
+
+More than one line starting with 'mh' can be included. In this
+case, mairix joins the lines together with colons as though a
+single list of folders had been given on a single very long line.
+
+Each colon-separated entry may be a wildcard, see the discussion
+under maildir (above) and mbox (below) for the syntax and
+semantics of specifying wildcards.
+
+.b mairix
+recognizes the types of MH folders created by the following email applications:
+.RS 7
+.IP "*"
+xmh
+.IP "*"
+sylpheed
+.IP "*"
+claws-mail
+.IP "*"
+evolution
+.IP "*"
+NNML
+.IP "*"
+Mew
+.RE
+
+.TP
+.BI mbox= list-of-folder-specifications
+.br
+This is a colon-separated list of the mbox folders (relative to
+`base') that you want indexed.
+
+Each colon-separated item in the list can be suffixed by '...'.
+If the item matches a regular file, that file is treated as a mbox
+folder and the '...' suffix is ignored. If the item matches a
+directory, a recursive scan of everything inside that directory is
+made, and all regular files are initially considered as mbox
+folders. (Any directories found in this scan are themselves
+scanned, since the scan is recursive.)
+
+Each colon-separated item may contain wildcard operators, but only
+in its final path component. The wildcard operators currently
+supported are
+
+.TP
+*
+.br
+Match zero or more characters (each character matched is
+arbitrary)
+
+.TP
+?
+.br
+Match exactly one arbitrary character
+
+.TP
+[abcs-z]
+.br
+Character class : match a single character from the set a, b,
+c, s, t, u, v, w, x, y and z.
+
+To include a literal ']' in the class, place it immediately
+after the opening '['. To include a literal '-' in the
+class, place it immediately before the closing ']'.
+
+If these metacharacters are included in non-final path components,
+they have no special meaning.
+
+Here are some examples
+
+.TP
+mbox=foo/bar*
+.br
+matches 'foo/bar', 'foo/bar1', 'foo/barrrr' etc
+
+.TP
+mbox=foo*/bar*
+.br
+matches 'foo*/bar', 'foo*/bar1', 'foo*/barrrr' etc
+
+.TP
+mbox=foo/*
+.br
+matches 'foo/bar', 'foo/bar1', 'foo/barrrr', 'foo/foo',
+\'foo/x' etc
+
+.TP
+mbox=foo...
+.br
+matches any regular file in the tree rooted at 'foo'
+
+.TP
+mbox=foo/*...
+.br
+same as before
+
+.TP
+mbox=foo/[a-z]*...
+.br
+matches 'foo/a', 'foo/aardvark/xxx', 'foo/zzz/foobar',
+\'foo/w/x/y/zzz', but not 'foo/A/foobar'
+
+Regular files that are mbox folder candidates are examined
+internally. Only files containing standard mbox 'From ' separator
+lines will be scanned for messages.
+
+If a regular file has a name ending in '.gz', and gzip support is
+compiled into the
+.B mairix
+binary, the file will be treated as a gzipped mbox.
+
+If a regular file has a name ending in '.bz2', and bzip support is
+compiled into the
+.B mairix
+binary, the file will be treated as a bzip2'd mbox.
+
+More than one line starting with 'mbox' can be included. In this
+case,
+.B mairix
+joins the lines together with colons as though a
+single list of folders had been given on a single very long line.
+
+.B mairix
+performs no locking of mbox folders when it is accessing
+them. If a mail delivery program is modifying the mbox at the
+same time, it is likely that one or messages in the mbox will
+never get indexed by
+.B mairix
+(until the database is removed and recreated from scratch, anyway.) The
+assumption is that
+.B mairix
+will be used to index archive folders rather than incoming ones, so this is
+unlikely to be much of a problem in reality.
+
+.B mairix
+can support a maximum of 65536 separate mboxes, and a
+maximum of 65536 messages within any one mbox.
+
+.TP
+.BI omit= list-of-glob-patterns
+This is a colon-separated list of glob patterns for folders to be omitted from
+the indexing. This allows wide wildcards and recursive elements to be used
+in the
+.BR maildir , mh ", and" mbox
+directives, with the
+.B omit
+option used to selectively remove unwanted folders from the folder
+lists.
+
+Within the glob patterns, a single '*' matches any
+sequence of characters other than '/'. However '**' matches any
+sequence of characters including '/'. This allows glob patterns
+to be constructed which have a wildcard for just one directory
+component, or for any number of directory components.
+
+The _omit_ option can be specified as many times as required so
+that the list of patterns doesn't all have to fit on one line.
+
+As an example,
+.Sx 2
+mbox=bulk...
+omit=bulk/spam*
+.Ex
+will index all mbox folders at any level under the 'bulk'
+subdirectory of the base folder, except for those folders whose
+names start 'bulk/spam', e.g. 'bulk/spam', 'bulk/spam2005' etc.
+
+In constrast,
+.Sx 2
+mbox=bulk...
+omit=bulk/spam**
+.Ex
+will index all mbox folders at any level under the 'bulk'
+subdirectory of the base folder, except for those folders whose
+names start 'bulk/spam', e.g. 'bulk/spam', 'bulk/spam2005',
+\'bulk/spam/2005', 'bulk/spam/2005/jan' etc.
+
+.TP
+.B nochecks
+This takes no arguments. If a line starting with
+.B nochecks is
+present, it is the equivalent of specifying the
+.B -Q
+flag to every indexing run.
+
+.TP
+.BI mfolder= match-folder-name
+This defines the name of the folder (within the directory
+specified by
+.BR base )
+into which the search mode writes its output. (If the
+.B mformat
+used is 'raw' or 'excerpt', then this setting is not used and may be omitted.)
+
+The
+.B mfolder
+setting may be over-ridden for a particular search by using the
+.B -o
+option to
+.BR mairix .
+
+.B mairix
+will refuse to output search results to a folder that appears to be amongst
+those that are indexed. This is to prevent accidental deletion of emails.
+
+If the first character of the mfolder value is '/' or '.', it is
+taken as a pathname in its own right. This allows you to specify
+absolute paths and paths relative to the current directory where
+the mfolder should be written. Otherwise, the value of mfolder is
+appended to the value of base, in the same way as for the source
+folders.
+
+.TP
+.BI mformat= format
+This defines the type of folder used for the match folder where
+the search results go. There are four valid settings for
+.IR format ,
+namely 'maildir', 'mh', 'mbox', 'raw' or 'excerpt'. If the 'raw' setting is
+used then
+.B mairix
+will just print out the path names of the files that match and no match folder
+will be created. If the 'excerpt' setting is used,
+.B mairix
+will also print out the To:, Cc:, From:, Subject: and Date: headers of the
+matching messages. 'maildir' is the default if this option is not
+defined. The setting is case-insensitive.
+
+.TP
+.BI database= path-to-database
+.br
+This defines the path where
+.BR mairix 's
+index database is kept. You can keep this file anywhere you like.
+
+Currently,
+.B mairix
+will place a single database file at the location indicated by
+.IR path-to-database .
+However, a future version of
+.B mairix
+may instead place a directory containing several files at this location.
+
+.I path-to-database
+should be an absolute pathname (starting with '/'). If a relative pathname is
+used, it will be interpreted relative to the current directory at the time
+.B mairix
+is run,
+.RB ( not
+relative to the location of the
+.I mairixrc
+file or anything like that.)
+
+.SS Expansions
+
+The part of each line in '.mairixrc' following the equals sign can
+contain the following types of expansion:
+
+.TP
+.B Home directory expansion
+If the sequence '~/' appears at the start of the text after the
+equals sign, it is expanded to the user's home directory. Example:
+.Sx 1
+database=~/Mail/mairix_database
+.Ex
+.TP
+.B Environment expansion
+If a '$' is followed by a sequence of alpha-numeric characters (or
+\'_'), the whole string is replaced by looking up the corresponding
+environment variable. Similarly, if '$' is followed by an open
+brace ('{'), everything up to the next close brace is looked up as
+an environment variable and the result replaces the entire
+sequence.
+
+Suppose in the shell we do
+.Sx 1
+export FOO=bar
+.Ex
+and the '.mairixrc' file contains
+.Sx 2
+maildir=xxx/$FOO
+mbox=yyy/a${FOO}b
+.Ex
+this is equivalent to
+.Sx 2
+maildir=xxx/bar
+mbox=yyy/abarb
+.Ex
+If the specified environment variable is not set, the replacement
+is the empty string.
+
+.SH NOTES
+.PP
+An alternative path to the configuration file may be given with the
+.B \-f
+option to mairix(1).
+
+
diff --git a/src/mairix/mbox.c b/src/mairix/mbox.c
@@ -0,0 +1,1060 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2003,2004,2005,2006,2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <assert.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include "mairix.h"
+#include "from.h"
+#include "fromcheck.h"
+#include "md5.h"
+
+struct extant_mbox {/*{{{*/
+ char *full_path;
+ time_t mtime;
+ size_t size;
+ int db_index;
+ /* + stuff to store positions etc of individual messages. */
+};
+/*}}}*/
+static int compare_extant_mboxen(const void *a, const void *b)/*{{{*/
+{
+ const struct extant_mbox *aa = (const struct extant_mbox *) a;
+ const struct extant_mbox *bb = (const struct extant_mbox *) b;
+ return strcmp(aa->full_path, bb->full_path);
+}
+/*}}}*/
+static int lookup_extant_mbox(struct extant_mbox *sorted_mboxen, int n_extant, char *key)/*{{{*/
+{
+ /* Implement bisection search */
+ int l, h, m, r;
+ l = 0, h = n_extant;
+ m = -1;
+ while (h > l) {
+ m = (h + l) >> 1;
+ /* Should only get called on 'file' type messages - TBC */
+ r = strcmp(sorted_mboxen[m].full_path, key);
+ if (r == 0) break;
+ if (l == m) return -1;
+ if (r > 0) h = m;
+ else l = m;
+ }
+ return m;
+}
+/*}}}*/
+static void append_new_mboxen_to_db(struct database *db, struct extant_mbox *extant_mboxen, int n_extant)/*{{{*/
+{
+ int N, n_reqd;
+ int i, j;
+
+ for (i=N=0; i<n_extant; i++) {
+ if (extant_mboxen[i].db_index < 0) N++;
+ }
+
+ n_reqd = db->n_mboxen + N;
+ if (n_reqd > db->max_mboxen) {
+ db->max_mboxen = n_reqd;
+ db->mboxen = grow_array(struct mbox, n_reqd, db->mboxen);
+ }
+ /* Init new entries. */
+ for (j=0, i=db->n_mboxen; j<n_extant; j++) {
+ if (extant_mboxen[j].db_index < 0) {
+ db->mboxen[i].path = new_string(extant_mboxen[j].full_path);
+ db->mboxen[i].current_mtime = extant_mboxen[j].mtime;
+ db->mboxen[i].current_size = extant_mboxen[j].size;
+ db->mboxen[i].file_mtime = 0;
+ db->mboxen[i].file_size = 0;
+ db->mboxen[i].n_msgs = 0;
+ db->mboxen[i].n_old_msgs_valid = 0;
+ db->mboxen[i].max_msgs = 0;
+ db->mboxen[i].start = NULL;
+ db->mboxen[i].len = NULL;
+ db->mboxen[i].check_all = NULL;
+ i++;
+ }
+ }
+
+ db->n_mboxen = n_reqd;
+}
+/*}}}*/
+void compute_checksum(const char *data, size_t len, checksum_t *csum)/*{{{*/
+{
+ MD5_CTX md5;
+ MD5Init(&md5);
+ MD5Update(&md5, (unsigned char *) data, len);
+ MD5Final(&md5);
+ memcpy(csum, md5.digest, sizeof(md5.digest));
+ return;
+}
+/*}}}*/
+static int message_is_intact(struct mbox *mb, int idx, char *va, size_t len)/*{{{*/
+{
+ /* TODO : later, look at whether to optimise this in some way, e.g. by doing
+ an initial check on just the first 1k of a message, this will detect
+ failures much faster at the cost of extra storage. */
+
+ if (mb->start[idx] + mb->len[idx] > len) {
+ /* Message overruns the end of the file - can't possibly be intact. */
+ return 0;
+ } else {
+ checksum_t csum;
+ compute_checksum(va + mb->start[idx], mb->len[idx], &csum);
+ if (!memcmp(mb->check_all[idx], &csum, sizeof(checksum_t))) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ return 0;
+}
+/*}}}*/
+static int find_number_intact(struct mbox *mb, char *va, size_t len)/*{{{*/
+{
+ /* Pick up the common obvious case first - where new messages have been appended to the
+ end of the mbox */
+ if (mb->n_msgs == 0) {
+ return 0;
+ } else if (message_is_intact(mb, mb->n_msgs - 1, va, len)) {
+ return mb->n_msgs; /* The lot */
+ } else if (!message_is_intact(mb, 0, va, len)) {
+ return 0; /* None of them. */
+ } else {
+ /* Looks like a deletion has occurred earlier in the file => binary chop
+ search to find the last message that's still valid. Assume that
+ everything below a valid message is still valid itself (possibly
+ dangerous assumption, time will tell.) */
+
+ int l, m, h;
+ l = 0;
+ h = mb->n_msgs;
+ /* Loop invariant : always, mesasage[l] is intact, message[h] isn't. */
+ while (l < h) {
+ m = (h + l) >> 1;
+ if (m==l) break;
+ if (message_is_intact(mb, m, va, len)) {
+ l = m;
+ } else {
+ h = m;
+ }
+ }
+ /* By loop invariant, message[l] is the highest valid one. */
+ return (l + 1);
+ }
+}
+/*}}}*/
+
+
+static int fromtab_inited = 0;
+static signed char fromtab[256];
+
+static void init_fromtab(void)/*{{{*/
+{
+ memset(fromtab, 0xff, 256);
+ fromtab[(int)(unsigned char)'\n'] = ~(1<<0);
+ fromtab[(int)(unsigned char)'F'] = ~(1<<1);
+ fromtab[(int)(unsigned char)'r'] = ~(1<<2);
+ fromtab[(int)(unsigned char)'o'] = ~(1<<3);
+ fromtab[(int)(unsigned char)'m'] = ~(1<<4);
+ fromtab[(int)(unsigned char)' '] = ~(1<<5);
+}
+/*}}}*/
+
+/* REAL CHECKING : need to see if the line looks like this:
+ * From [ <return-path> ] <weekday> <month> <day> <time> [ <timezone> ] <year>
+ (from the mutt sources).
+ * where timezone can be two words rather than one sometimes. */
+
+#undef DEBUG_DFA
+
+static int looks_like_from_separator(off_t n, char *va, size_t len, int verbose)/*{{{*/
+{
+ char p;
+ int current_state = 0;
+ int result = 0;
+
+ n++; /* look beyond the space. */
+
+ while (n < len) {
+ p = va[n++];
+ if (verbose) {
+ printf("current_state=%d, p=%02x (%1c) ", current_state, (int)(unsigned char)p, ((p>=32)&&(p<=126))?p:'.');
+ }
+ current_state = fromcheck_next_state(current_state, (int)fromcheck_char2tok[(int)(unsigned char)p]);
+ if (verbose) {
+ printf("next_state=%d\n", current_state);
+ }
+ if (current_state < 0) {
+ /* not matched */
+ break;
+ }
+ if (fromcheck_attr[current_state] == FROMCHECK_PASS) {
+ result = 1; /* matched good separator */
+ break;
+ }
+ }
+
+ /* If we hit the end of the file, it doesn't look like a real 'From' line. */
+#ifdef DEBUG_DFA
+ unlock_and_exit(0);
+#endif
+ return result;
+}
+/*}}}*/
+
+static off_t find_next_from(off_t n, char *va, size_t len)/*{{{*/
+{
+ unsigned char c;
+ unsigned long hit;
+ unsigned long reg;
+ unsigned long mask;
+
+ if (!n) {
+ if ((len >= 5) && !strncmp(va, "From ", 5)) {
+ return 0;
+ }
+ }
+
+scan_again:
+
+ reg = (unsigned long) -1;
+ hit = ~(1<<5);
+ while (n < len) {
+ c = va[n];
+ mask = (unsigned long)(signed long) fromtab[(int)c];
+ reg = (reg << 1) | mask;
+ if (~(reg|hit)) {
+ if (looks_like_from_separator(n, va, len, 0)) {
+ return (n-4);
+ } else {
+#if 0
+ int nn;
+ printf("Rejecting from line at %d\n", n);
+ nn = n;
+ printf(" >> ");
+ while (nn < len) {
+ unsigned char c = va[nn++];
+ putchar(c);
+ if (c=='\n') break;
+ }
+ looks_like_from_separator(n, va, len, 1);
+#endif
+ goto scan_again;
+ }
+ }
+ n++;
+ }
+ return -1;
+}
+/*}}}*/
+static off_t start_of_next_line(off_t n, char *va, size_t len)/*{{{*/
+{
+ unsigned char c;
+ /* We are always starting from 'From ' so we can advance before testing */
+ do {
+ c = va[n];
+ n++;
+ }
+ while ((n < len) && (c != '\n'));
+ if (n == len) {
+ return -1;
+ } else {
+ return n;
+ }
+}
+/*}}}*/
+
+
+static struct message_list *build_new_message_list(struct mbox *mb, char *va, size_t len, int *n_messages)/*{{{*/
+{
+ struct message_list *result, *here, *next;
+ off_t start_from, start_pos, end_from;
+ int old_percent = -100;
+ int N;
+
+#define PERCENT_GRAN 2
+
+ *n_messages = 0;
+
+ result = here = NULL;
+ N = mb->n_old_msgs_valid;
+ if (N == 0) {
+ start_from = 0;
+ } else {
+ /* Must point to the \n at the end of the preceding message, otherwise the
+ 'From ' at the start of the first message in the section to be rescanned
+ won't get detected and that message won't get indexed. */
+ start_from = mb->start[N - 1] + mb->len[N - 1] - 1;
+ }
+
+ if (!fromtab_inited) {
+ init_fromtab();
+ fromtab_inited = 1;
+ }
+
+ /* Locate next 'From ' at the start of a line */
+ start_from = find_next_from(start_from, va, len);
+ while (start_from != -1) {
+ start_pos = start_of_next_line(start_from, va, len);
+ if (start_pos == -1) {
+ /* Something is awry. */
+ goto done;
+ }
+ if (verbose) {
+ int percent;
+ percent = (int)(0.5 + 100.0 * (double) start_pos / (double) len);
+ if (percent > (old_percent+PERCENT_GRAN)) {
+ printf("Scanning mbox %s : %3d%% done\r", mb->path, percent);
+ fflush(stdout);
+ old_percent = percent;
+ }
+ }
+
+ end_from = find_next_from(start_pos, va, len);
+ next = new(struct message_list);
+ next->next = NULL;
+ next->start = start_pos;
+ if (end_from == -1) {
+ /* message runs through to end of file. */
+ next->len = len - start_pos;
+ } else {
+ next->len = end_from - start_pos;
+ }
+ if (!result) {
+ result = here = next;
+ } else {
+ here->next = next;
+ here = next;
+ }
+ ++*n_messages;
+ start_from = end_from;
+ }
+
+done:
+ if (verbose) {
+ printf("Scanning mbox %s : 100%% done\n", mb->path);
+ fflush(stdout);
+ }
+ return result;
+
+}
+/*}}}*/
+static void rescan_mbox(struct mbox *mb, char *va, size_t len)/*{{{*/
+{
+ /* We get here if it's determined that
+ * 1. the mbox file still exists
+ * 2. the mtime or size has changed, i.e. it's been modified in some way
+ since the last mairix run.
+ */
+
+ /* Find the last message in the box that appears to be intact. */
+ mb->n_old_msgs_valid = find_number_intact(mb, va, len);
+ mb->new_msgs = build_new_message_list(mb, va, len, &mb->n_new_msgs);
+}
+/*}}}*/
+static void deaden_mbox(struct mbox *mb)/*{{{*/
+{
+ mb->n_old_msgs_valid = 0;
+ mb->n_msgs = 0;
+
+ free(mb->path);
+ mb->path = NULL;
+
+ if (mb->max_msgs > 0) {
+ free(mb->start);
+ free(mb->len);
+ free(mb->check_all);
+ mb->max_msgs = 0;
+ }
+}
+/*}}}*/
+static void marry_up_mboxen(struct database *db, struct extant_mbox *extant_mboxen, int n_extant)/*{{{*/
+{
+ int *old_to_new_idx;
+ int i;
+
+ for (i=0; i<n_extant; i++) extant_mboxen[i].db_index = -1;
+
+ old_to_new_idx = NULL;
+ if (db->n_mboxen > 0) {
+ old_to_new_idx = new_array(int, db->n_mboxen);
+ for (i=0; i<db->n_mboxen; i++) old_to_new_idx[i] = -1;
+
+ for (i=0; i<db->n_mboxen; i++) {
+ if (db->mboxen[i].path) {
+ int idx;
+ idx = lookup_extant_mbox(extant_mboxen, n_extant, db->mboxen[i].path);
+ if (idx >= 0) {
+ struct mbox *mb = &db->mboxen[i];
+ old_to_new_idx[i] = idx;
+ extant_mboxen[idx].db_index = i;
+ mb->current_mtime = extant_mboxen[idx].mtime;
+ mb->current_size = extant_mboxen[idx].size;
+ }
+ }
+ }
+ }
+
+ for (i=0; i<db->n_mboxen; i++) {
+ if (old_to_new_idx[i] < 0) {
+ /* old mbox is no more. */
+ deaden_mbox(&db->mboxen[i]);
+ }
+ }
+
+ /* Append entries for newly discovered mboxen */
+ append_new_mboxen_to_db(db, extant_mboxen, n_extant);
+
+ /* From here on, everything we need is in the db */
+ if (old_to_new_idx)
+ free(old_to_new_idx);
+
+}
+/*}}}*/
+static void check_duplicates(struct extant_mbox *extant_mboxen, int n_extant)/*{{{*/
+{
+ /* Note, list is sorted at this point */
+ int i;
+ int any_dupl = 0;
+ for (i=0; i<n_extant-1; i++) {
+ if (!strcmp(extant_mboxen[i].full_path, extant_mboxen[i+1].full_path)) {
+ printf("mbox %s is listed twice in the mairixrc file\n", extant_mboxen[i].full_path);
+ any_dupl = 1;
+ }
+ }
+ if (any_dupl) {
+ printf("Exiting, the mairixrc file needs fixing\n");
+ unlock_and_exit(1);
+ }
+}
+/*}}}*/
+static char *find_last_slash(char *in)/*{{{*/
+{
+ char *p = in;
+ char *result = NULL;
+ while (*p) {
+ if (*p == '/') result = p;
+ p++;
+ }
+ return result;
+}
+/*}}}*/
+static int append_shallow(char *path, int base_len, struct stat *sb, struct string_list *list, /*{{{*/
+ const struct traverse_methods *methods,
+ struct globber_array *omit_globs)
+{
+ int result = 0;
+ if ((methods->filter)(path, sb)) {
+ if (!is_globber_array_match(omit_globs, path + base_len)) {
+ struct string_list *nn = new(struct string_list);
+ nn->data = new_string(path);
+ nn->next = list;
+ nn->prev = list->prev;
+ list->prev->next = nn;
+ list->prev = nn;
+ result = 1;
+ }
+ }
+ return result;
+}
+/*}}}*/
+static int append_deep(char *path, int base_len, struct stat *sb, struct string_list *list, /*{{{*/
+ const struct traverse_methods *methods,
+ struct globber_array *omit_globs)
+{
+ /* path is dir : read its contents, call append_shallow or self accordingly. */
+ /* path is file : call append_shallow. */
+ struct stat sb2;
+ char *xpath;
+ DIR *d;
+ struct dirent *de;
+ int appended_any = 0;
+ int this_file_matched;
+
+ this_file_matched = append_shallow(path, base_len, sb, list, methods, omit_globs);
+ appended_any |= this_file_matched;
+
+ if (S_ISDIR(sb->st_mode)) {
+ xpath = new_array(char, strlen(path) + 2 + NAME_MAX);
+ d = opendir(path);
+ if (d) {
+ while ((de = readdir(d))) {
+ enum traverse_check status;
+ if (!strcmp(de->d_name, ".")) continue;
+ if (!strcmp(de->d_name, "..")) continue;
+ strcpy(xpath, path);
+ strcat(xpath, "/");
+ strcat(xpath, de->d_name);
+ if (!is_globber_array_match(omit_globs, xpath+base_len)) {
+ /* Filter out omissions at this point, e.g. to avoid wasting time on
+ * a recursive expansion of a tree that's going to get pruned in at
+ * the deepest level anyway. */
+ status = (methods->scrutinize)(this_file_matched, de->d_name);
+#if 0
+ /* debugging */
+ fprintf(stderr, "scrutinize for %s in %s returned %s\n",
+ de->d_name,
+ path,
+ (status == TRAV_FINISH) ? "FINISH" :
+ (status == TRAV_IGNORE) ? "IGNORE" : "PROCESS");
+#endif
+ switch (status) {
+ case TRAV_FINISH:
+ goto done_this_dir;
+ case TRAV_IGNORE:
+ goto next_path;
+ case TRAV_PROCESS:
+ if (stat(xpath, &sb2) >= 0) {
+ if (S_ISREG(sb2.st_mode)) {
+ appended_any |= append_shallow(xpath, base_len, &sb2, list, methods, omit_globs);
+ } else if (S_ISDIR(sb2.st_mode)) {
+ appended_any |= append_deep(xpath, base_len, &sb2, list, methods, omit_globs);
+ }
+ }
+ break;
+ }
+ }
+next_path:
+ (void) 0;
+ }
+done_this_dir:
+ closedir(d);
+ }
+ free(xpath);
+ }
+ return appended_any;
+}
+/*}}}*/
+static void handle_wild(char *path, int base_len, char *last_comp, struct string_list *list,/*{{{*/
+ int (*append)(char *, int, struct stat *, struct string_list *,
+ const struct traverse_methods *, struct globber_array *),
+ const struct traverse_methods *methods,
+ struct globber_array *omit_globs)
+{
+ /* last_comp is the character within 'path' where the wildcard stuff starts. */
+ struct globber *gg;
+ char *temp_path, *xpath;
+ DIR *d;
+ struct dirent *de;
+ int had_matches;
+
+ gg = make_globber(last_comp);
+
+ /* Null-terminate parent directory, i.e. null the character where the trailing / is */
+ if (last_comp > path) {
+ int len = last_comp - path;
+ temp_path = new_array(char, len);
+ memcpy(temp_path, path, len-1);
+ temp_path[len-1] = '\0';
+ xpath = new_array(char, len + 2 + NAME_MAX);
+ } else {
+ temp_path = new_string(".");
+ xpath = new_array(char, 3 + NAME_MAX);
+ }
+
+ d = opendir(temp_path);
+ had_matches = 0;
+ if (d) {
+ while ((de = readdir(d))) {
+ if (!strcmp(de->d_name, ".")) continue;
+ if (!strcmp(de->d_name, "..")) continue;
+ if (is_glob_match(gg, de->d_name)) {
+ struct stat xsb;
+ strcpy(xpath, temp_path);
+ strcat(xpath, "/");
+ strcat(xpath, de->d_name);
+ if (!is_globber_array_match(omit_globs, xpath+base_len)) {
+ /* Filter out omissions at this point, e.g. to avoid wasting time on
+ * a recursive expansion of a tree that's going to get pruned in full
+ * later anyway. */
+ had_matches = 1;
+ if (stat(xpath, &xsb) >= 0) {
+ (*append)(xpath, base_len, &xsb, list, methods, omit_globs);
+ }
+ }
+ }
+ }
+ closedir(d);
+ if (!had_matches) {
+ fprintf(stderr, "WARNING: Wildcard \"%s\" matched nothing in %s\n", last_comp, temp_path);
+ }
+ } else {
+ fprintf(stderr, "WARNING: Folder path %s does not exist\n", temp_path);
+ }
+
+
+ free(temp_path);
+ free(xpath);
+ free(gg);
+}
+/*}}}*/
+static void handle_single(char *path, int base_len, struct string_list *list,/*{{{*/
+ int (*append)(char *, int, struct stat *, struct string_list *,
+ const struct traverse_methods *, struct globber_array *),
+ const struct traverse_methods *methods,
+ struct globber_array *omit_globs)
+{
+ struct stat sb;
+ if (stat(path, &sb) >= 0) {
+ (*append)(path, base_len, &sb, list, methods, omit_globs);
+ } else {
+ fprintf(stderr, "WARNING: Folder path %s does not exist\n", path);
+ }
+}
+/*}}}*/
+static int filter_is_file(const char *x, const struct stat *sb)/*{{{*/
+{
+ if (S_ISREG(sb->st_mode))
+ return 1;
+ else
+ return 0;
+}
+/*}}}*/
+enum traverse_check scrutinize_mbox_entry(int parent_is_mbox, const char *de_name)/*{{{*/
+{
+ /* We have to keep looking at everything in this case. */
+ return TRAV_PROCESS;
+}
+/*}}}*/
+struct traverse_methods mbox_traverse_methods = {/*{{{*/
+ .filter = filter_is_file,
+ .scrutinize = scrutinize_mbox_entry
+};
+/*}}}*/
+static int is_wild(const char *x)/*{{{*/
+{
+ const char *p;
+ p = x;
+ while (*p) {
+ switch (*p) {
+ case '[':
+ case '*':
+ case '?':
+ return 1;
+ }
+ p++;
+ }
+ return 0;
+}
+/*}}}*/
+/*{{{ handle_one_path() */
+static void handle_one_path(const char *folder_base,
+ const char *path,
+ struct string_list *list,
+ const struct traverse_methods *methods,
+ struct globber_array *omit_globs)
+{
+ /* Valid syntaxen ([.]=optional):
+ * [xxx/]foo : single path
+ * [xxx/]foo... : if foo is a file, as before; if a directory, every ordinary file under it
+ * [xxx/]wild : any single path matching the wildcard
+ * [xxx/]wild... : consider each match of the wildcard by the rule 2 lines above
+
+ * <wild> contains any of these shell-like metacharacters
+ * * : any string of 1 or more arbitrary characters
+ * ? : any 1 arbitrary character
+ * [a-z] : character class
+ * [^a-z] : negated character class.
+
+ */
+ int folder_base_len = strlen(folder_base);
+ char *full_path;
+ int is_abs;
+ int len;
+ char *last_slash;
+ char *last_comp;
+ int base_len;
+
+ is_abs = (path[0] == '/') ? 1 : 0;
+ if (is_abs) {
+ full_path = new_string(path);
+ base_len = 0;
+ } else {
+ full_path = new_array(char, folder_base_len + strlen(path) + 2);
+ strcpy(full_path, folder_base);
+ strcat(full_path, "/");
+ strcat(full_path, path);
+ base_len = strlen(folder_base) + 1;
+ }
+ len = strlen(full_path);
+ last_slash = find_last_slash(full_path);
+ last_comp = last_slash ? (last_slash + 1) : full_path;
+ if ((len >= 4) && !strcmp(full_path + (len - 3), "...")) {
+ full_path[len - 3] = '\0';
+ if (is_wild(last_comp)) {
+ handle_wild(full_path, base_len, last_comp, list, append_deep, methods, omit_globs);
+ } else {
+ handle_single(full_path, base_len, list, append_deep, methods, omit_globs);
+ }
+ } else {
+ if (is_wild(last_comp)) {
+ handle_wild(full_path, base_len, last_comp, list, append_shallow, methods, omit_globs);
+ } else {
+ handle_single(full_path, base_len, list, append_shallow, methods, omit_globs);
+ }
+ }
+ free(full_path);
+}
+/*}}}*/
+/*{{{ glob_and_expand_paths() */
+void glob_and_expand_paths(const char *folder_base,
+ char **paths_in, int n_in,
+ char ***paths_out, int *n_out,
+ const struct traverse_methods *methods,
+ struct globber_array *omit_globs)
+{
+ struct string_list list;
+ int i;
+
+ /* Clear it. */
+ list.next = list.prev = &list;
+
+ for (i=0; i<n_in; i++) {
+ char *path = paths_in[i];
+ handle_one_path(folder_base, path, &list, methods, omit_globs);
+ }
+
+ string_list_to_array(&list, n_out, paths_out);
+}
+/*}}}*/
+
+void build_mbox_lists(struct database *db, const char *folder_base, /*{{{*/
+ const char *mboxen_paths, struct globber_array *omit_globs)
+{
+ char **raw_paths, **paths;
+ int n_raw_paths, i;
+ int n_paths;
+ struct stat sb;
+
+ int n_extant;
+ struct extant_mbox *extant_mboxen;
+
+ n_extant = 0;
+
+ if (mboxen_paths) {
+ split_on_colons(mboxen_paths, &n_raw_paths, &raw_paths);
+ glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &mbox_traverse_methods, omit_globs);
+ extant_mboxen = new_array(struct extant_mbox, n_paths);
+ } else {
+ n_paths = 0;
+ paths = NULL;
+ extant_mboxen = NULL;
+ }
+
+ /* Assume maximal size array. TODO : new strategy when globbing is included.
+ * */
+
+ /* TODO TODO ::: Build a sorted list of the paths and check that there aren't
+ any duplicates!! */
+
+ for (i=0; i<n_paths; i++) {
+ char *path = paths[i];
+ if (lstat(path, &sb) < 0) {
+ /* can't stat */
+ } else {
+ if (S_ISLNK(sb.st_mode)) {
+ /* Skip mbox if symlink */
+ if (verbose) {
+ printf("%s is a link - skipping\n", path);
+ }
+ } else {
+ extant_mboxen[n_extant].full_path = new_string(path);
+ extant_mboxen[n_extant].mtime = sb.st_mtime;
+ extant_mboxen[n_extant].size = sb.st_size;
+ n_extant++;
+ }
+ }
+ free(paths[i]);
+ }
+ if (paths) {
+ free(paths);
+ paths=NULL;
+ }
+
+ /* Reconcile list against that in the db. : sort, match etc. */
+ if (n_extant) {
+ qsort(extant_mboxen, n_extant, sizeof(struct extant_mbox), compare_extant_mboxen);
+ }
+
+ check_duplicates(extant_mboxen, n_extant);
+
+ marry_up_mboxen(db, extant_mboxen, n_extant);
+
+ /* Now look for new/modified mboxen, find how many of the old messages are
+ * still valid and scan the remainder. */
+
+ for (i=0; i<db->n_mboxen; i++) {
+ struct mbox *mb = &db->mboxen[i];
+ mb->new_msgs = NULL;
+ if (mb->path) {
+ if ((mb->current_mtime == mb->file_mtime) &&
+ (mb->current_size == mb->file_size)) {
+ mb->n_old_msgs_valid = mb->n_msgs;
+ } else {
+ unsigned char *va;
+ int len;
+ create_ro_mapping(mb->path, &va, &len);
+ if (va) {
+ rescan_mbox(mb, (char *) va, len);
+ free_ro_mapping(va, len);
+ } else if (!len) {
+ mb->n_old_msgs_valid = mb->n_msgs = 0;
+ } else {
+ /* Treat as dead mbox */
+ deaden_mbox(mb);
+ }
+ }
+ }
+ }
+
+ /* At the end of this, we want the db->mboxen table to contain up to date info about
+ * the mboxen, together with how much of the old info was still current. */
+}
+/*}}}*/
+
+static struct msg_src *setup_msg_src(char *filename, off_t start, size_t len)/*{{{*/
+{
+ static struct msg_src result;
+ result.type = MS_MBOX;
+ result.filename = filename;
+ result.start = start;
+ result.len = len;
+ return &result;
+}
+/*}}}*/
+int add_mbox_messages(struct database *db)/*{{{*/
+{
+ int i, j;
+ int any_new = 0;
+ int N;
+ unsigned char *va;
+ int valen;
+ enum data_to_rfc822_error error;
+
+ for (i=0; i<db->n_mboxen; i++) {
+ struct mbox *mb = &db->mboxen[i];
+ struct message_list *here, *next;
+
+ if (mb->new_msgs) {
+ /* Upper bound : we may need to coalesce 2 or more messages if false
+ * matches on From lines have occurred inside MIME encoded body parts. */
+ N = mb->n_old_msgs_valid + mb->n_new_msgs;
+ if (N > mb->max_msgs) {
+ mb->max_msgs = N;
+ mb->start = grow_array(off_t, N, mb->start);
+ mb->len = grow_array(size_t, N, mb->len);
+ mb->check_all = grow_array(checksum_t, N, mb->check_all);
+ }
+
+ va = NULL; /* lazy mmap */
+ for (j=mb->n_old_msgs_valid, here=mb->new_msgs; here; j++, here=next) {
+ int n;
+ int trials = 0;
+ off_t start;
+ size_t len;
+ struct rfc822 *r8;
+ struct msg_src *msg_src;
+ struct message_list *last, *xx, *xn;
+
+ next = here->next;
+
+ if (!va) {
+ create_ro_mapping(mb->path, &va, &valen);
+ }
+ if (!va) {
+ fprintf(stderr, "Couldn't create mapping of file %s\n", mb->path);
+ unlock_and_exit(1);
+ }
+
+
+ /* Try to parse the next 'From' -to- 'From' chunk as an rfc822 message.
+ * If we get an unterminated MIME encoding, coalesce the next chunk
+ * onto the current one and try again. Keep going until it works, or
+ * we run out of chunks. If we run out, back up to just using the
+ * first chunk and assume it is broken.
+ *
+ * This is to deal with cases such as having a text/plain attachment
+ * that is actually an mbox file in its own right, i.e. will have
+ * embedded '^From ' lines in it.
+ *
+ * 'last' is the last chunk currently in the putative message. */
+ last = here;
+ do {
+ len = last->start + last->len - here->start;
+ msg_src = setup_msg_src(mb->path, here->start, len);
+ r8 = data_to_rfc822(msg_src, (char *) va + here->start, len, &error);
+ if (error == DTR8_MISSING_END) {
+ if (r8) free_rfc822(r8);
+ r8 = NULL;
+ last = last->next; /* Try with another chunk on the end */
+ ++trials;
+ } else {
+ /* Treat as success */
+ next = last->next;
+ break;
+ }
+ } while (last && trials < 100);
+
+ if (last && trials < 100) {
+ start = mb->start[j] = here->start;
+ mb->len[j] = len;
+ compute_checksum((char *) va + here->start, len, &mb->check_all[j]);
+ } else {
+ /* Faulty message or last message in the file */
+ start = mb->start[j] = here->start;
+ len = mb->len[j] = here->len;
+ compute_checksum((char *) va + here->start, len, &mb->check_all[j]);
+ msg_src = setup_msg_src(mb->path, start, len);
+ r8 = data_to_rfc822(msg_src, (char *) va + start, len, &error);
+ if (error == DTR8_MISSING_END) {
+ fprintf(stderr, "Can't find end boundary in multipart message %s\n",
+ format_msg_src(msg_src));
+ }
+ }
+
+ /* Release all the list entries in the range [here,next) (inclusive) */
+ for (xx=here; xx!=next; xx=xn) {
+ xn = xx->next;
+ free(xx);
+ }
+
+ /* Only do this once a valid rfc822 structure has been obtained. */
+ maybe_grow_message_arrays(db);
+ n = db->n_msgs;
+ db->type[n] = MTY_MBOX;
+ db->msgs[n].src.mbox.file_index = i;
+ db->msgs[n].src.mbox.msg_index = j;
+
+ if (r8) {
+ if (verbose) {
+ printf("Scanning %s[%d] at [%d,%d)\n", mb->path, j, (int)start, (int)(start + len));
+ }
+ db->msgs[n].date = r8->hdrs.date;
+ db->msgs[n].seen = r8->hdrs.flags.seen;
+ db->msgs[n].replied = r8->hdrs.flags.replied;
+ db->msgs[n].flagged = r8->hdrs.flags.flagged;
+ tokenise_message(n, db, r8);
+ free_rfc822(r8);
+ } else {
+ printf("Message in %s at [%d,%d) is misformatted\n", mb->path, (int)start, (int)(start + len));
+ }
+
+ ++db->n_msgs;
+ any_new = 1;
+ }
+ mb->n_msgs = j;
+ if (va) {
+ free_ro_mapping(va, valen);
+ }
+ }
+ }
+ return any_new;
+}
+/*}}}*/
+
+/* OTHER */
+void cull_dead_mboxen(struct database *db)/*{{{*/
+{
+ int n_alive, i, j, n;
+ int *old_to_new;
+ struct mbox *newtab;
+
+ n = db->n_mboxen;
+ for (i=0, n_alive=0; i<n; i++) {
+ if (db->mboxen[i].path) n_alive++;
+ }
+
+ /* Simple case - no dead mboxen */
+ if (n_alive == n) return;
+
+ newtab = new_array(struct mbox, n_alive);
+ old_to_new = new_array(int, n);
+ for (i=0, j=0; i<n; i++) {
+ if (db->mboxen[i].path) {
+ old_to_new[i] = j;
+ newtab[j] = db->mboxen[i];
+ printf("Copying mbox[%d] to [%d], path=%s\n", i, j, db->mboxen[i].path);
+ j++;
+ } else {
+ printf("Pruning old mbox[%d], dead\n", i);
+ old_to_new[i] = -1;
+ }
+ }
+
+ /* Renumber file indices in messages */
+ n = db->n_msgs;
+ for (i=0; i<n; i++) {
+ if (db->type[i] == MTY_MBOX) {
+ int old_idx = db->msgs[i].src.mbox.file_index;
+ assert(old_to_new[old_idx] != -1);
+ db->msgs[i].src.mbox.file_index = old_to_new[old_idx];
+ }
+ }
+
+ /* Fix up pointers */
+ db->n_mboxen = db->max_mboxen = n_alive;
+ free(db->mboxen);
+ db->mboxen = newtab;
+ free(old_to_new);
+ return;
+}
+/*}}}*/
+
+unsigned int encode_mbox_indices(unsigned int mb, unsigned int msg)/*{{{*/
+{
+ unsigned int result;
+ result = ((mb & 0xffff) << 16) | (msg & 0xffff);
+ return result;
+}
+/*}}}*/
+void decode_mbox_indices(unsigned int index, unsigned int *mb, unsigned int *msg)/*{{{*/
+{
+ *mb = (index >> 16) & 0xffff;
+ *msg = (index & 0xffff);
+}
+/*}}}*/
+int verify_mbox_size_constraints(struct database *db)/*{{{*/
+{
+ int i;
+ int fail;
+ if (db->n_mboxen > 65536) {
+ fprintf(stderr, "Too many mboxes (max 65536, you have %d)\n", db->n_mboxen);
+ return 0;
+ }
+ fail = 0;
+ for (i=0; i<db->n_mboxen; i++) {
+ if (db->mboxen[i].n_msgs > 65536) {
+ fprintf(stderr, "Too many messages in mbox %s (max 65536, you have %d)\n",
+ db->mboxen[i].path, db->mboxen[i].n_msgs);
+ fail = 1;
+ }
+ }
+ if (fail) return 0;
+ else return 1;
+}
+/*}}}*/
+
diff --git a/src/mairix/md5.c b/src/mairix/md5.c
@@ -0,0 +1,322 @@
+/*
+ ***********************************************************************
+ ** md5.c -- the source code for MD5 routines **
+ ** RSA Data Security, Inc. MD5 Message-Digest Algorithm **
+ ** Created: 2/17/90 RLR **
+ ** Revised: 1/91 SRD,AJ,BSK,JT Reference C Version **
+ ** Revised (for MD5): RLR 4/27/91 **
+ ** -- G modified to have y&~z instead of y&z **
+ ** -- FF, GG, HH modified to add in last register done **
+ ** -- Access pattern: round 2 works mod 5, round 3 works mod 3 **
+ ** -- distinct additive constant for each step **
+ ** -- round 4 added, working mod 7 **
+ ***********************************************************************
+ */
+
+/*
+ ***********************************************************************
+ ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. **
+ ** **
+ ** License to copy and use this software is granted provided that **
+ ** it is identified as the "RSA Data Security, Inc. MD5 Message- **
+ ** Digest Algorithm" in all material mentioning or referencing this **
+ ** software or this function. **
+ ** **
+ ** License is also granted to make and use derivative works **
+ ** provided that such works are identified as "derived from the RSA **
+ ** Data Security, Inc. MD5 Message-Digest Algorithm" in all **
+ ** material mentioning or referencing the derived work. **
+ ** **
+ ** RSA Data Security, Inc. makes no representations concerning **
+ ** either the merchantability of this software or the suitability **
+ ** of this software for any particular purpose. It is provided "as **
+ ** is" without express or implied warranty of any kind. **
+ ** **
+ ** These notices must be retained in any copies of any part of this **
+ ** documentation and/or software. **
+ ***********************************************************************
+ */
+
+#include "md5.h"
+
+/*
+ ***********************************************************************
+ ** Message-digest routines: **
+ ** To form the message digest for a message M **
+ ** (1) Initialize a context buffer mdContext using MD5Init **
+ ** (2) Call MD5Update on mdContext and M **
+ ** (3) Call MD5Final on mdContext **
+ ** The message digest is now in mdContext->digest[0...15] **
+ ***********************************************************************
+ */
+
+/* forward declaration */
+static void Transform (UINT4 *, UINT4 *);
+
+#ifdef __STDC__
+static const
+#else
+static
+#endif
+unsigned char PADDING[64] = {
+ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+/* F, G, H and I are basic MD5 functions */
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | (~z)))
+
+/* ROTATE_LEFT rotates x left n bits */
+#if defined(FAST_MD5) && defined(__GNUC__) && defined(mc68000)
+/*
+ * If we're on a 68000 based CPU and using a GNU C compiler with
+ * inline assembly code, we can speed this up a bit.
+ */
+inline UINT4 ROTATE_LEFT(UINT4 x, int n)
+{
+ asm("roll %2,%0" : "=d" (x) : "0" (x), "Ir" (n));
+ return x;
+}
+#else
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+#endif
+
+
+/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4 */
+/* Rotation is separate from addition to prevent recomputation */
+#define FF(a, b, c, d, x, s, ac) \
+ {(a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+#define GG(a, b, c, d, x, s, ac) \
+ {(a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+#define HH(a, b, c, d, x, s, ac) \
+ {(a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+#define II(a, b, c, d, x, s, ac) \
+ {(a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+
+/* The routine MD5Init initializes the message-digest context
+ mdContext. All fields are set to zero.
+ */
+void MD5Init (mdContext)
+MD5_CTX *mdContext;
+{
+ mdContext->i[0] = mdContext->i[1] = (UINT4)0;
+
+ /* Load magic initialization constants.
+ */
+ mdContext->buf[0] = (UINT4)0x67452301;
+ mdContext->buf[1] = (UINT4)0xefcdab89;
+ mdContext->buf[2] = (UINT4)0x98badcfe;
+ mdContext->buf[3] = (UINT4)0x10325476;
+}
+
+/* The routine MD5Update updates the message-digest context to
+ account for the presence of each of the characters inBuf[0..inLen-1]
+ in the message whose digest is being computed.
+ */
+void MD5Update (mdContext, inBuf, inLen)
+MD5_CTX *mdContext;
+unsigned const char *inBuf;
+unsigned int inLen;
+{
+ UINT4 in[16];
+ int mdi;
+ unsigned int i, ii;
+
+ /* compute number of bytes mod 64 */
+ mdi = (int)((mdContext->i[0] >> 3) & 0x3F);
+
+ /* update number of bits */
+ if ((mdContext->i[0] + ((UINT4)inLen << 3)) < mdContext->i[0])
+ mdContext->i[1]++;
+ mdContext->i[0] += ((UINT4)inLen << 3);
+ mdContext->i[1] += ((UINT4)inLen >> 29);
+
+ while (inLen--) {
+ /* add new character to buffer, increment mdi */
+ mdContext->in[mdi++] = *inBuf++;
+
+ /* transform if necessary */
+ if (mdi == 0x40) {
+ for (i = 0, ii = 0; i < 16; i++, ii += 4)
+ in[i] = (((UINT4)mdContext->in[ii+3]) << 24) |
+ (((UINT4)mdContext->in[ii+2]) << 16) |
+ (((UINT4)mdContext->in[ii+1]) << 8) |
+ ((UINT4)mdContext->in[ii]);
+ Transform (mdContext->buf, in);
+ mdi = 0;
+ }
+ }
+}
+
+/* The routine MD5Final terminates the message-digest computation and
+ ends with the desired message digest in mdContext->digest[0...15].
+ */
+
+void MD5Final (mdContext)
+MD5_CTX *mdContext;
+{
+ UINT4 in[16];
+ int mdi;
+ unsigned int i, ii;
+ unsigned int padLen;
+
+ /* save number of bits */
+ in[14] = mdContext->i[0];
+ in[15] = mdContext->i[1];
+
+ /* compute number of bytes mod 64 */
+ mdi = (int)((mdContext->i[0] >> 3) & 0x3F);
+
+ /* pad out to 56 mod 64 */
+ padLen = (mdi < 56) ? (56 - mdi) : (120 - mdi);
+ MD5Update (mdContext, PADDING, padLen);
+
+ /* append length in bits and transform */
+ for (i = 0, ii = 0; i < 14; i++, ii += 4)
+ in[i] = (((UINT4)mdContext->in[ii+3]) << 24) |
+ (((UINT4)mdContext->in[ii+2]) << 16) |
+ (((UINT4)mdContext->in[ii+1]) << 8) |
+ ((UINT4)mdContext->in[ii]);
+ Transform (mdContext->buf, in);
+
+ /* store buffer in digest */
+ for (i = 0, ii = 0; i < 4; i++, ii += 4) {
+ mdContext->digest[ii] = (unsigned char)(mdContext->buf[i] & 0xFF);
+ mdContext->digest[ii+1] =
+ (unsigned char)((mdContext->buf[i] >> 8) & 0xFF);
+ mdContext->digest[ii+2] =
+ (unsigned char)((mdContext->buf[i] >> 16) & 0xFF);
+ mdContext->digest[ii+3] =
+ (unsigned char)((mdContext->buf[i] >> 24) & 0xFF);
+ }
+}
+
+/* Basic MD5 step. Transforms buf based on in.
+ */
+static void Transform (buf, in)
+UINT4 *buf;
+UINT4 *in;
+{
+ UINT4 a = buf[0], b = buf[1], c = buf[2], d = buf[3];
+
+ /* Round 1 */
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+
+ FF ( a, b, c, d, in[ 0], S11, 0xd76aa478); /* 1 */
+ FF ( d, a, b, c, in[ 1], S12, 0xe8c7b756); /* 2 */
+ FF ( c, d, a, b, in[ 2], S13, 0x242070db); /* 3 */
+ FF ( b, c, d, a, in[ 3], S14, 0xc1bdceee); /* 4 */
+ FF ( a, b, c, d, in[ 4], S11, 0xf57c0faf); /* 5 */
+ FF ( d, a, b, c, in[ 5], S12, 0x4787c62a); /* 6 */
+ FF ( c, d, a, b, in[ 6], S13, 0xa8304613); /* 7 */
+ FF ( b, c, d, a, in[ 7], S14, 0xfd469501); /* 8 */
+ FF ( a, b, c, d, in[ 8], S11, 0x698098d8); /* 9 */
+ FF ( d, a, b, c, in[ 9], S12, 0x8b44f7af); /* 10 */
+ FF ( c, d, a, b, in[10], S13, 0xffff5bb1); /* 11 */
+ FF ( b, c, d, a, in[11], S14, 0x895cd7be); /* 12 */
+ FF ( a, b, c, d, in[12], S11, 0x6b901122); /* 13 */
+ FF ( d, a, b, c, in[13], S12, 0xfd987193); /* 14 */
+ FF ( c, d, a, b, in[14], S13, 0xa679438e); /* 15 */
+ FF ( b, c, d, a, in[15], S14, 0x49b40821); /* 16 */
+
+ /* Round 2 */
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+ GG ( a, b, c, d, in[ 1], S21, 0xf61e2562); /* 17 */
+ GG ( d, a, b, c, in[ 6], S22, 0xc040b340); /* 18 */
+ GG ( c, d, a, b, in[11], S23, 0x265e5a51); /* 19 */
+ GG ( b, c, d, a, in[ 0], S24, 0xe9b6c7aa); /* 20 */
+ GG ( a, b, c, d, in[ 5], S21, 0xd62f105d); /* 21 */
+ GG ( d, a, b, c, in[10], S22, 0x2441453); /* 22 */
+ GG ( c, d, a, b, in[15], S23, 0xd8a1e681); /* 23 */
+ GG ( b, c, d, a, in[ 4], S24, 0xe7d3fbc8); /* 24 */
+ GG ( a, b, c, d, in[ 9], S21, 0x21e1cde6); /* 25 */
+ GG ( d, a, b, c, in[14], S22, 0xc33707d6); /* 26 */
+ GG ( c, d, a, b, in[ 3], S23, 0xf4d50d87); /* 27 */
+ GG ( b, c, d, a, in[ 8], S24, 0x455a14ed); /* 28 */
+ GG ( a, b, c, d, in[13], S21, 0xa9e3e905); /* 29 */
+ GG ( d, a, b, c, in[ 2], S22, 0xfcefa3f8); /* 30 */
+ GG ( c, d, a, b, in[ 7], S23, 0x676f02d9); /* 31 */
+ GG ( b, c, d, a, in[12], S24, 0x8d2a4c8a); /* 32 */
+
+ /* Round 3 */
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+ HH ( a, b, c, d, in[ 5], S31, 0xfffa3942); /* 33 */
+ HH ( d, a, b, c, in[ 8], S32, 0x8771f681); /* 34 */
+ HH ( c, d, a, b, in[11], S33, 0x6d9d6122); /* 35 */
+ HH ( b, c, d, a, in[14], S34, 0xfde5380c); /* 36 */
+ HH ( a, b, c, d, in[ 1], S31, 0xa4beea44); /* 37 */
+ HH ( d, a, b, c, in[ 4], S32, 0x4bdecfa9); /* 38 */
+ HH ( c, d, a, b, in[ 7], S33, 0xf6bb4b60); /* 39 */
+ HH ( b, c, d, a, in[10], S34, 0xbebfbc70); /* 40 */
+ HH ( a, b, c, d, in[13], S31, 0x289b7ec6); /* 41 */
+ HH ( d, a, b, c, in[ 0], S32, 0xeaa127fa); /* 42 */
+ HH ( c, d, a, b, in[ 3], S33, 0xd4ef3085); /* 43 */
+ HH ( b, c, d, a, in[ 6], S34, 0x4881d05); /* 44 */
+ HH ( a, b, c, d, in[ 9], S31, 0xd9d4d039); /* 45 */
+ HH ( d, a, b, c, in[12], S32, 0xe6db99e5); /* 46 */
+ HH ( c, d, a, b, in[15], S33, 0x1fa27cf8); /* 47 */
+ HH ( b, c, d, a, in[ 2], S34, 0xc4ac5665); /* 48 */
+
+ /* Round 4 */
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+ II ( a, b, c, d, in[ 0], S41, 0xf4292244); /* 49 */
+ II ( d, a, b, c, in[ 7], S42, 0x432aff97); /* 50 */
+ II ( c, d, a, b, in[14], S43, 0xab9423a7); /* 51 */
+ II ( b, c, d, a, in[ 5], S44, 0xfc93a039); /* 52 */
+ II ( a, b, c, d, in[12], S41, 0x655b59c3); /* 53 */
+ II ( d, a, b, c, in[ 3], S42, 0x8f0ccc92); /* 54 */
+ II ( c, d, a, b, in[10], S43, 0xffeff47d); /* 55 */
+ II ( b, c, d, a, in[ 1], S44, 0x85845dd1); /* 56 */
+ II ( a, b, c, d, in[ 8], S41, 0x6fa87e4f); /* 57 */
+ II ( d, a, b, c, in[15], S42, 0xfe2ce6e0); /* 58 */
+ II ( c, d, a, b, in[ 6], S43, 0xa3014314); /* 59 */
+ II ( b, c, d, a, in[13], S44, 0x4e0811a1); /* 60 */
+ II ( a, b, c, d, in[ 4], S41, 0xf7537e82); /* 61 */
+ II ( d, a, b, c, in[11], S42, 0xbd3af235); /* 62 */
+ II ( c, d, a, b, in[ 2], S43, 0x2ad7d2bb); /* 63 */
+ II ( b, c, d, a, in[ 9], S44, 0xeb86d391); /* 64 */
+
+ buf[0] += a;
+ buf[1] += b;
+ buf[2] += c;
+ buf[3] += d;
+}
+
+/*
+ ***********************************************************************
+ ** End of md5.c **
+ ******************************** (cut) ********************************
+ */
diff --git a/src/mairix/md5.h b/src/mairix/md5.h
@@ -0,0 +1,62 @@
+/*
+ ***********************************************************************
+ ** md5.h -- header file for implementation of MD5 **
+ ** RSA Data Security, Inc. MD5 Message-Digest Algorithm **
+ ** Created: 2/17/90 RLR **
+ ** Revised: 12/27/90 SRD,AJ,BSK,JT Reference C version **
+ ** Revised (for MD5): RLR 4/27/91 **
+ ***********************************************************************
+ */
+
+/*
+ ***********************************************************************
+ ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. **
+ ** **
+ ** License to copy and use this software is granted provided that **
+ ** it is identified as the "RSA Data Security, Inc. MD5 Message- **
+ ** Digest Algorithm" in all material mentioning or referencing this **
+ ** software or this function. **
+ ** **
+ ** License is also granted to make and use derivative works **
+ ** provided that such works are identified as "derived from the RSA **
+ ** Data Security, Inc. MD5 Message-Digest Algorithm" in all **
+ ** material mentioning or referencing the derived work. **
+ ** **
+ ** RSA Data Security, Inc. makes no representations concerning **
+ ** either the merchantability of this software or the suitability **
+ ** of this software for any particular purpose. It is provided "as **
+ ** is" without express or implied warranty of any kind. **
+ ** **
+ ** These notices must be retained in any copies of any part of this **
+ ** documentation and/or software. **
+ ***********************************************************************
+ */
+
+#ifdef HAS_STDINT_H
+#include <stdint.h>
+#elif defined(HAS_INTTYPES_H)
+#include <inttypes.h>
+#else
+#error "No <stdint.h> or <inttypes.h>"
+#endif
+
+/* typedef a 32-bit type */
+typedef uint32_t UINT4;
+
+/* Data structure for MD5 (Message-Digest) computation */
+typedef struct {
+ UINT4 i[2]; /* number of _bits_ handled mod 2^64 */
+ UINT4 buf[4]; /* scratch buffer */
+ unsigned char in[64]; /* input buffer */
+ unsigned char digest[16]; /* actual digest after MD5Final call */
+} MD5_CTX;
+
+void MD5Init (MD5_CTX *mdContext);
+void MD5Update (MD5_CTX *, unsigned const char *, unsigned int);
+void MD5Final (MD5_CTX *);
+
+/*
+ ***********************************************************************
+ ** End of md5.h **
+ ******************************** (cut) ********************************
+ */
diff --git a/src/mairix/memmac.h b/src/mairix/memmac.h
@@ -0,0 +1,72 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002-2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+
+#ifndef MEMMAC_H
+#define MEMMAC_H
+
+/*{{{ Safe alloc helpers (GCC extensions) */
+extern void out_of_mem(char *file, int line, size_t size);
+
+#undef TEST_OOM
+
+#ifdef TEST_OOM
+extern int total_bytes;
+#endif
+
+static __inline__ void* safe_malloc(char *file, int line, size_t s)/*{{{*/
+{
+ void *x = malloc(s);
+#ifdef TEST_OOM
+ total_bytes += s;
+ if (total_bytes > 131072) x = NULL;
+#endif
+ if (!x) out_of_mem(file, line, s);
+ return x;
+}
+/*}}}*/
+static __inline__ void* safe_realloc(char *file, int line, void *old_ptr, size_t s)/*{{{*/
+{
+ void *x = realloc(old_ptr, s);
+ if (!x) out_of_mem(file, line, s);
+ return x;
+}
+/*}}}*/
+#ifndef TEST
+#define Malloc(s) safe_malloc(__FILE__, __LINE__, s)
+#define Realloc(xx,s) safe_realloc(__FILE__, __LINE__,xx,s)
+#else
+#define Malloc(s) malloc(s)
+#define Realloc(xx,s) realloc(xx,s)
+#endif
+/*}}}*/
+
+/*{{{ Memory macros*/
+#define new_string(s) strcpy((char *) Malloc(1+strlen(s)), (s))
+#define extend_string(x,s) (strcat(Realloc(x, (strlen(x)+strlen(s)+1)), s))
+#define new(T) (T *) Malloc(sizeof(T))
+#define new_array(T, n) (T *) Malloc(sizeof(T) * (n))
+#define grow_array(T, n, oldX) (T *) ((oldX) ? Realloc(oldX, (sizeof(T) * (n))) : Malloc(sizeof(T) * (n)))
+#define EMPTY(x) {&(x), &(x)}
+/*}}}*/
+
+#endif /* MEMMAC_H */
diff --git a/src/mairix/mkversion b/src/mairix/mkversion
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+rm -f version.h
+echo "#ifndef VERSION_H" > version.h
+echo "#define VERSION_H 1" >> version.h
+
+if [ -f version.txt ]; then
+ ver=`cat version.txt`
+ echo "#define PROGRAM_VERSION \"$ver\"" >> version.h
+else
+ echo "#define PROGRAM_VERSION \"DEVELOPMENT\"" >> version.h
+fi
+
+echo "#endif /* VERSION_H */" >> version.h
+
diff --git a/src/mairix/nvp.c b/src/mairix/nvp.c
@@ -0,0 +1,416 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2006,2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#ifdef VERBOSE_TEST
+#define TEST 1
+#endif
+
+/* Parse name/value pairs from mail headers into a lookup table. */
+#include <stdio.h>
+#include <ctype.h>
+#include "mairix.h"
+#include "nvptypes.h"
+#include "nvpscan.h"
+#include "nvp.h"
+
+enum nvp_type {/*{{{*/
+ NVP_NAME,
+ NVP_MAJORMINOR,
+ NVP_NAMEVALUE
+};
+/*}}}*/
+struct nvp_entry {/*{{{*/
+ struct nvp_entry *next;
+ struct nvp_entry *prev;
+ enum nvp_type type;
+ char *lhs;
+ char *rhs;
+};
+/*}}}*/
+struct nvp {/*{{{*/
+ struct nvp_entry *first, *last;
+};
+/*}}}*/
+static void append(struct nvp *nvp, struct nvp_entry *ne)/*{{{*/
+{
+ ne->next = NULL;
+ ne->prev = nvp->last;
+ if (nvp->last) nvp->last->next = ne;
+ else nvp->first = ne;
+ nvp->last = ne;
+}
+/*}}}*/
+static void append_name(struct nvp *nvp, char *name)/*{{{*/
+{
+ struct nvp_entry *ne;
+ ne = new(struct nvp_entry);
+ ne->type = NVP_NAME;
+ ne->lhs = new_string(name);
+ append(nvp, ne);
+}
+/*}}}*/
+static void append_majorminor(struct nvp *nvp, char *major, char *minor)/*{{{*/
+{
+ struct nvp_entry *ne;
+ ne = new(struct nvp_entry);
+ ne->type = NVP_MAJORMINOR;
+ ne->lhs = new_string(major);
+ ne->rhs = new_string(minor);
+ append(nvp, ne);
+
+}
+/*}}}*/
+static void append_namevalue(struct nvp *nvp, char *name, char *value)/*{{{*/
+{
+ struct nvp_entry *ne;
+ ne = new(struct nvp_entry);
+ ne->type = NVP_NAMEVALUE;
+ ne->lhs = new_string(name);
+ ne->rhs = new_string(value);
+ append(nvp, ne);
+}
+/*}}}*/
+static void combine_namevalue(struct nvp *nvp, char *name, char *value)/*{{{*/
+{
+ struct nvp_entry *n;
+ for (n=nvp->first; n; n=n->next) {
+ if (n->type == NVP_NAMEVALUE) {
+ if (!strcmp(n->lhs, name)) {
+ char *new_rhs;
+ new_rhs = new_array(char, strlen(n->rhs) + strlen(value) + 1);
+ strcpy(new_rhs, n->rhs);
+ strcat(new_rhs, value);
+ free(n->rhs);
+ n->rhs = new_rhs;
+ return;
+ }
+ }
+ }
+ /* No match : it's the first one */
+ append_namevalue(nvp, name, value);
+}
+/*}}}*/
+static void release_nvp(struct nvp *nvp)/*{{{*/
+{
+ struct nvp_entry *e, *ne;
+ for (e=nvp->first; e; e=ne) {
+ ne = e->next;
+ switch (e->type) {
+ case NVP_NAME:
+ free(e->lhs);
+ break;
+ case NVP_MAJORMINOR:
+ case NVP_NAMEVALUE:
+ free(e->lhs);
+ free(e->rhs);
+ break;
+ }
+ free(e);
+ }
+ free(nvp);
+}
+/*}}}*/
+struct nvp *make_nvp(struct msg_src *src, char *s, const char *pfx)/*{{{*/
+{
+ int current_state;
+ unsigned int tok;
+ char *q;
+ unsigned char qq;
+ char name[256];
+ char minor[256];
+ char value[256];
+ enum nvp_action last_action, current_action;
+ struct nvp *result;
+ size_t pfxlen;
+ char *nn, *mm, *vv;
+
+ pfxlen = strlen(pfx);
+ if (strncasecmp(pfx, s, pfxlen))
+ return NULL;
+ s += pfxlen;
+
+ result = new(struct nvp);
+ result->first = result->last = NULL;
+
+ current_state = nvp_in;
+
+ q = s;
+ nn = name;
+ mm = minor;
+ vv = value;
+ last_action = GOT_NOTHING;
+ do {
+ qq = *(unsigned char *) q;
+ if (qq) {
+ tok = nvp_char2tok[qq];
+ } else {
+ tok = nvp_EOS;
+ }
+ current_state = nvp_next_state(current_state, tok);
+#ifdef VERBOSE_TEST
+ fprintf(stderr, "Char %02x (%c) tok=%d new_current_state=%d\n",
+ qq, ((qq>=32) && (qq<=126)) ? qq : '.',
+ tok, current_state);
+#endif
+
+ if (current_state < 0) {
+#ifdef TEST
+ fprintf(stderr, "'%s' could not be parsed\n", s);
+#else
+ fprintf(stderr, "Header '%s%s' in %s could not be parsed\n",
+ pfx, s, format_msg_src(src));
+#endif
+ release_nvp(result);
+ return NULL;
+ }
+
+ switch (nvp_copier[current_state]) {
+ case COPY_TO_NAME:
+#ifdef VERBOSE_TEST
+ fprintf(stderr, " COPY_TO_NAME\n");
+#endif
+ *nn++ = *q;
+ break;
+ case COPY_TO_MINOR:
+#ifdef VERBOSE_TEST
+ fprintf(stderr, " COPY_TO_MINOR\n");
+#endif
+ *mm++ = *q;
+ break;
+ case COPY_TO_VALUE:
+#ifdef VERBOSE_TEST
+ fprintf(stderr, " COPY_TO_VALUE\n");
+#endif
+ *vv++ = *q;
+ break;
+ case COPY_NOWHERE:
+ break;
+ }
+
+ current_action = nvp_action[current_state];
+ switch (current_action) {
+ case GOT_NAME:
+ case GOT_NAME_TRAILING_SPACE:
+ case GOT_MAJORMINOR:
+ case GOT_NAMEVALUE:
+ case GOT_NAMEVALUE_CONT:
+#ifdef VERBOSE_TEST
+ fprintf(stderr, " Setting last action to %d\n", current_action);
+#endif
+ last_action = current_action;
+ break;
+ case GOT_TERMINATOR:
+#ifdef VERBOSE_TEST
+ fprintf(stderr, " Hit terminator; last_action=%d\n", last_action);
+#endif
+ switch (last_action) {
+ case GOT_NAME:
+ *nn = 0;
+ append_name(result, name);
+ break;
+ case GOT_NAME_TRAILING_SPACE:
+ while (isspace(*--nn)) {}
+ *++nn = 0;
+ append_name(result, name);
+ break;
+ case GOT_MAJORMINOR:
+ *nn = 0;
+ *mm = 0;
+ append_majorminor(result, name, minor);
+ break;
+ case GOT_NAMEVALUE:
+ *nn = 0;
+ *vv = 0;
+ append_namevalue(result, name, value);
+ break;
+ case GOT_NAMEVALUE_CONT:
+ *nn = 0;
+ *vv = 0;
+ combine_namevalue(result, name, value);
+ break;
+ default:
+ break;
+ }
+ nn = name;
+ mm = minor;
+ vv = value;
+ break;
+ case GOT_NOTHING:
+ break;
+ }
+
+ q++;
+ } while (tok != nvp_EOS);
+
+ return result;
+}
+/*}}}*/
+void free_nvp(struct nvp *nvp)/*{{{*/
+{
+ struct nvp_entry *ne, *nne;
+ for (ne = nvp->first; ne; ne=nne) {
+ nne = ne->next;
+ switch (ne->type) {
+ case NVP_NAME:
+ free(ne->lhs);
+ break;
+ case NVP_MAJORMINOR:
+ case NVP_NAMEVALUE:
+ free(ne->lhs);
+ free(ne->rhs);
+ break;
+ }
+ free(ne);
+ }
+ free(nvp);
+}
+/*}}}*/
+const char *nvp_lookup(struct nvp *nvp, const char *name)/*{{{*/
+{
+ struct nvp_entry *ne;
+ for (ne = nvp->first; ne; ne=ne->next) {
+ if (ne->type == NVP_NAMEVALUE) {
+ if (!strcmp(ne->lhs, name)) {
+ return ne->rhs;
+ }
+ }
+ }
+ return NULL;
+}
+/*}}}*/
+const char *nvp_lookupcase(struct nvp *nvp, const char *name)/*{{{*/
+{
+ struct nvp_entry *ne;
+ for (ne = nvp->first; ne; ne=ne->next) {
+ if (ne->type == NVP_NAMEVALUE) {
+ if (!strcasecmp(ne->lhs, name)) {
+ return ne->rhs;
+ }
+ }
+ }
+ return NULL;
+}
+/*}}}*/
+
+void nvp_dump(struct nvp *nvp, FILE *out)/*{{{*/
+{
+ struct nvp_entry *ne;
+ fprintf(out, "----\n");
+ for (ne = nvp->first; ne; ne=ne->next) {
+ switch (ne->type) {
+ case NVP_NAME:
+ fprintf(out, "NAME: %s\n", ne->lhs);
+ break;
+ case NVP_MAJORMINOR:
+ fprintf(out, "MAJORMINOR: %s/%s\n", ne->lhs, ne->rhs);
+ break;
+ case NVP_NAMEVALUE:
+ fprintf(out, "NAMEVALUE: %s=%s\n", ne->lhs, ne->rhs);
+ break;
+ }
+ }
+}
+/*}}}*/
+
+/* In these cases, we only look at the first entry */
+const char *nvp_major(struct nvp *nvp)/*{{{*/
+{
+ struct nvp_entry *ne;
+ ne = nvp->first;
+ if (ne) {
+ if (ne->type == NVP_MAJORMINOR) {
+ return ne->lhs;
+ } else {
+ return NULL;
+ }
+ } else {
+ return NULL;
+ }
+}
+/*}}}*/
+const char *nvp_minor(struct nvp *nvp)/*{{{*/
+{
+ struct nvp_entry *ne;
+ ne = nvp->first;
+ if (ne) {
+ if (ne->type == NVP_MAJORMINOR) {
+ return ne->rhs;
+ } else {
+ return NULL;
+ }
+ } else {
+ return NULL;
+ }
+}
+/*}}}*/
+const char *nvp_first(struct nvp *nvp)/*{{{*/
+{
+ struct nvp_entry *ne;
+ ne = nvp->first;
+ if (ne) {
+ if (ne->type == NVP_NAME) {
+ return ne->lhs;
+ } else {
+ return NULL;
+ }
+ } else {
+ return NULL;
+ }
+}
+/*}}}*/
+
+#ifdef TEST
+
+static void do_test(char *s)
+{
+ struct nvp *n;
+ n = make_nvp(NULL, s, "");
+ if (n) {
+ nvp_dump(n, stderr);
+ free_nvp(n);
+ }
+}
+
+
+int main (int argc, char **argv) {
+ struct nvp *n;
+#if 0
+ do_test("attachment; filename=\"foo.c\"; prot=ro");
+ do_test("attachment; filename= \"foo bar.c\" ;prot=ro");
+ do_test("attachment ; filename= \"foo bar.c\" ;prot= ro");
+ do_test("attachment ; filename= \"foo bar.c\" ;prot= ro");
+ do_test("attachment ; filename= \"foo ; bar.c\" ;prot= ro");
+ do_test("attachment ; x*0=\"hi \"; x*1=\"there\"");
+#endif
+
+ do_test("application/vnd.ms-excel; name=\"thequiz.xls\"");
+#if 0
+ do_test("inline; filename*0=\"aaaa bbbb cccc dddd eeee ffff gggg hhhh iiii jjjj\t kkkkllll\"");
+ do_test(" text/plain ; name= \"foo bar.c\" ;prot= ro/rw; read/write; read= foo bar");
+#endif
+ return 0;
+}
+#endif
+
+
+
+
diff --git a/src/mairix/nvp.h b/src/mairix/nvp.h
@@ -0,0 +1,38 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2006,2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#ifndef NVP_H
+#define NVP_H
+
+struct nvp;
+struct msg_src;
+extern struct nvp *make_nvp(struct msg_src *, char *, const char *);
+extern void free_nvp(struct nvp *);
+extern void nvp_dump(struct nvp *nvp, FILE *out);
+extern const char *nvp_major(struct nvp *n);
+extern const char *nvp_minor(struct nvp *n);
+extern const char *nvp_first(struct nvp *n);
+extern const char *nvp_lookup(struct nvp *n, const char *name);
+extern const char *nvp_lookupcase(struct nvp *n, const char *name);
+
+#endif
+
diff --git a/src/mairix/nvp.nfa b/src/mairix/nvp.nfa
@@ -0,0 +1,197 @@
+#########################################################################
+#
+# mairix - message index builder and finder for maildir folders.
+#
+# Copyright (C) Richard P. Curnow 2006,2007
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# =======================================================================
+
+Tokens EOS
+Abbrev VALUE = [\041-~]~[\\";]
+Abbrev QVALUE = VALUE | [\011\040;] | <escape:in->out>
+Abbrev NAME1 = [0-9a-zA-Z_\-]
+Abbrev MINOR = NAME1 | [\.\-+]
+Abbrev OWS = <optwhite:in->out>
+
+%{
+#include "nvptypes.h"
+%}
+
+Block escape {
+ State in
+ [\\] ; [\\] -> out
+ [\\] ; ["] -> out
+}
+
+Block optwhite {
+ State in
+ -> out
+ # I have seen headers with ^M in them...
+ [ \t\r] -> in
+}
+
+Block name {
+ # This needs to cope with embedded spaces, e.g. for mailers that write '7
+ # bit' instead of '7bit'
+ State in
+ NAME1 -> name1
+
+ State name1
+ = COPY_TO_NAME
+ = GOT_NAME
+ NAME1 -> name1
+ [ \t] -> name2
+ -> out
+
+ State name2
+ = COPY_TO_NAME
+ = GOT_NAME_TRAILING_SPACE
+ [ \t] -> name2
+ NAME1 -> name1
+ -> out
+
+ State out
+}
+
+Block value {
+ State in
+ VALUE -> v1
+ State v1
+ = COPY_TO_VALUE
+ -> out
+ VALUE -> v1
+}
+
+Block qvalue {
+ State in
+ ["] -> qv0
+
+ State qv0
+ QVALUE -> qv1
+
+ State qv1
+ = COPY_TO_VALUE
+ QVALUE -> qv1
+ -> qv2
+
+ State qv2
+ ["] -> out
+}
+
+Block digits {
+ State in
+ [0-9] -> out
+ [0-9] -> in
+}
+
+Block namevalue {
+ State in
+ OWS ; <name:in->out> ; OWS ; [=] -> rhs_normal
+ OWS ; <name:in->out> ; [*] ; <digits:in->out> ; OWS ; [=] -> rhs_continue
+
+ State rhs_normal
+ OWS ; <qvalue:in->out> ; OWS -> out_normal
+ OWS ; <value:in->out> ; OWS -> out_normal
+ OWS ; ; EOS -> out_normal
+
+ State rhs_continue
+ OWS ; <qvalue:in->out> ; OWS -> out_continue
+ OWS ; <value:in->out> ; OWS -> out_continue
+
+ State out_normal = GOT_NAMEVALUE
+ -> out
+ State out_continue = GOT_NAMEVALUE_CONT
+ -> out
+}
+
+Block major {
+ State in
+ NAME1 -> name1
+
+ State name1
+ NAME1 -> name1
+ -> out
+}
+
+Block minor {
+ State in
+ MINOR -> minor1
+
+ State minor1
+ = COPY_TO_MINOR
+ MINOR -> minor1
+ -> out
+}
+
+Block majorminor {
+ State in
+ <major:in->out> -> foo
+
+ State foo
+ [/] -> bar
+
+ State bar
+ <minor:in->out> -> out
+
+ State out = GOT_MAJORMINOR
+}
+
+Block component {
+ State in
+ <namevalue:in->out> -> out
+ <name:in->out> -> out
+ <majorminor:in->out> -> out
+}
+
+Block main {
+ State in Entry in
+ OWS ; <component:in->out> ; OWS ; EOS -> out2
+ OWS ; <component:in->out> ; OWS ; [;] ; OWS ; EOS -> out2
+ OWS ; <component:in->out> ; OWS ; [;] -> in2
+
+ State in2
+ = GOT_TERMINATOR
+ -> in
+
+ State out2
+ = GOT_TERMINATOR
+ -> out
+}
+
+Defattr 0
+Prefix nvp
+
+Group action {
+ Attr GOT_NAMEVALUE
+ Attr GOT_NAMEVALUE_CONT
+ Attr GOT_NAME
+ Attr GOT_NAME_TRAILING_SPACE
+ Attr GOT_MAJORMINOR
+ Attr GOT_TERMINATOR
+ Defattr GOT_NOTHING
+ Type "enum nvp_action"
+}
+
+Group copier {
+ Attr COPY_TO_NAME
+ Attr COPY_TO_MINOR
+ Attr COPY_TO_VALUE
+ Defattr COPY_NOWHERE
+ Type "enum nvp_copier"
+}
+
+# vim:et:sts=4:sw=4:ht=8
+
diff --git a/src/mairix/nvpscan.report b/src/mairix/nvpscan.report
@@ -0,0 +1,6352 @@
+Processing 1 separate entry points
+Entries in 1 blocks, total of 415 states
+NFA state 0 = main.in [Entries: in]
+ [(epsilon)] -> optwhite#8.in
+ [(epsilon)] -> optwhite#4.in
+ [(epsilon)] -> optwhite#1.in
+ Epsilon closure :
+ (self)
+ main.#1
+ main.optwhite#1.in
+ main.optwhite#1.out
+ main.component#2.in
+ main.component#2.namevalue#1.in
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.in
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.in
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+ main.component#2.name#2.in
+ main.component#2.majorminor#3.in
+ main.component#2.majorminor#3.major#1.in
+ main.#4
+ main.optwhite#4.in
+ main.optwhite#4.out
+ main.component#5.in
+ main.component#5.namevalue#1.in
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.in
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.in
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+ main.component#5.name#2.in
+ main.component#5.majorminor#3.in
+ main.component#5.majorminor#3.major#1.in
+ main.#9
+ main.optwhite#8.in
+ main.optwhite#8.out
+ main.component#9.in
+ main.component#9.namevalue#1.in
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.in
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.in
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+ main.component#9.name#2.in
+ main.component#9.majorminor#3.in
+ main.component#9.majorminor#3.major#1.in
+
+NFA state 1 = main.#1
+ [(epsilon)] -> component#2.in
+ Epsilon closure :
+ (self)
+ main.component#2.in
+ main.component#2.namevalue#1.in
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.in
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.in
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+ main.component#2.name#2.in
+ main.component#2.majorminor#3.in
+ main.component#2.majorminor#3.major#1.in
+
+NFA state 2 = main.optwhite#1.in
+ [(epsilon)] -> optwhite#1.out
+ 0:[\t ] -> optwhite#1.in
+ 1:[\r] -> optwhite#1.in
+ Epsilon closure :
+ (self)
+ main.#1
+ main.optwhite#1.out
+ main.component#2.in
+ main.component#2.namevalue#1.in
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.in
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.in
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+ main.component#2.name#2.in
+ main.component#2.majorminor#3.in
+ main.component#2.majorminor#3.major#1.in
+
+NFA state 3 = main.optwhite#1.out
+ [(epsilon)] -> #1
+ Epsilon closure :
+ (self)
+ main.#1
+ main.component#2.in
+ main.component#2.namevalue#1.in
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.in
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.in
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+ main.component#2.name#2.in
+ main.component#2.majorminor#3.in
+ main.component#2.majorminor#3.major#1.in
+
+NFA state 4 = main.#2
+ [(epsilon)] -> optwhite#3.in
+ Epsilon closure :
+ (self)
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 5 = main.component#2.in
+ [(epsilon)] -> component#2.namevalue#1.in
+ [(epsilon)] -> component#2.name#2.in
+ [(epsilon)] -> component#2.majorminor#3.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.in
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.in
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.in
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+ main.component#2.name#2.in
+ main.component#2.majorminor#3.in
+ main.component#2.majorminor#3.major#1.in
+
+NFA state 6 = main.component#2.namevalue#1.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#4.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#1.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.in
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.in
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+
+NFA state 7 = main.component#2.namevalue#1.#1
+ [(epsilon)] -> component#2.namevalue#1.name#2.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.name#2.in
+
+NFA state 8 = main.component#2.namevalue#1.optwhite#1.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#1.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#1.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#1.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+
+NFA state 9 = main.component#2.namevalue#1.optwhite#1.out
+ [(epsilon)] -> component#2.namevalue#1.#1
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.name#2.in
+
+NFA state 10 = main.component#2.namevalue#1.#2
+ [(epsilon)] -> component#2.namevalue#1.optwhite#3.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#3
+ main.component#2.namevalue#1.optwhite#3.in
+ main.component#2.namevalue#1.optwhite#3.out
+
+NFA state 11 = main.component#2.namevalue#1.name#2.in
+ 6:[\055] -> component#2.namevalue#1.name#2.name1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.name#2.name1
+ 8:[0-9] -> component#2.namevalue#1.name#2.name1
+ Epsilon closure :
+ (self)
+
+NFA state 12 = main.component#2.namevalue#1.name#2.name1
+ [(epsilon)] -> component#2.namevalue#1.name#2.#1
+ [(epsilon)] -> component#2.namevalue#1.name#2.#2
+ 6:[\055] -> component#2.namevalue#1.name#2.name1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.name#2.name1
+ 8:[0-9] -> component#2.namevalue#1.name#2.name1
+ 0:[\t ] -> component#2.namevalue#1.name#2.name2
+ [(epsilon)] -> component#2.namevalue#1.name#2.out
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#2
+ main.component#2.namevalue#1.name#2.#1
+ main.component#2.namevalue#1.name#2.#2
+ main.component#2.namevalue#1.name#2.out
+ main.component#2.namevalue#1.#3
+ main.component#2.namevalue#1.optwhite#3.in
+ main.component#2.namevalue#1.optwhite#3.out
+
+NFA state 13 = main.component#2.namevalue#1.name#2.#1
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 14 = main.component#2.namevalue#1.name#2.#2
+ Tags : GOT_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 15 = main.component#2.namevalue#1.name#2.name2
+ [(epsilon)] -> component#2.namevalue#1.name#2.#3
+ [(epsilon)] -> component#2.namevalue#1.name#2.#4
+ 0:[\t ] -> component#2.namevalue#1.name#2.name2
+ 6:[\055] -> component#2.namevalue#1.name#2.name1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.name#2.name1
+ 8:[0-9] -> component#2.namevalue#1.name#2.name1
+ [(epsilon)] -> component#2.namevalue#1.name#2.out
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#2
+ main.component#2.namevalue#1.name#2.#3
+ main.component#2.namevalue#1.name#2.#4
+ main.component#2.namevalue#1.name#2.out
+ main.component#2.namevalue#1.#3
+ main.component#2.namevalue#1.optwhite#3.in
+ main.component#2.namevalue#1.optwhite#3.out
+
+NFA state 16 = main.component#2.namevalue#1.name#2.#3
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 17 = main.component#2.namevalue#1.name#2.#4
+ Tags : GOT_NAME_TRAILING_SPACE
+ Epsilon closure :
+ (self)
+
+NFA state 18 = main.component#2.namevalue#1.name#2.out
+ [(epsilon)] -> component#2.namevalue#1.#2
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#2
+ main.component#2.namevalue#1.#3
+ main.component#2.namevalue#1.optwhite#3.in
+ main.component#2.namevalue#1.optwhite#3.out
+
+NFA state 19 = main.component#2.namevalue#1.#3
+ 10:[=] -> component#2.namevalue#1.rhs_normal
+ Epsilon closure :
+ (self)
+
+NFA state 20 = main.component#2.namevalue#1.optwhite#3.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#3.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#3.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#3.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#3
+ main.component#2.namevalue#1.optwhite#3.out
+
+NFA state 21 = main.component#2.namevalue#1.optwhite#3.out
+ [(epsilon)] -> component#2.namevalue#1.#3
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#3
+
+NFA state 22 = main.component#2.namevalue#1.#4
+ [(epsilon)] -> component#2.namevalue#1.name#5.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.name#5.in
+
+NFA state 23 = main.component#2.namevalue#1.optwhite#4.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#4.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#4.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#4.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+
+NFA state 24 = main.component#2.namevalue#1.optwhite#4.out
+ [(epsilon)] -> component#2.namevalue#1.#4
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.name#5.in
+
+NFA state 25 = main.component#2.namevalue#1.#5
+ 4:[*] -> component#2.namevalue#1.#6
+ Epsilon closure :
+ (self)
+
+NFA state 26 = main.component#2.namevalue#1.name#5.in
+ 6:[\055] -> component#2.namevalue#1.name#5.name1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.name#5.name1
+ 8:[0-9] -> component#2.namevalue#1.name#5.name1
+ Epsilon closure :
+ (self)
+
+NFA state 27 = main.component#2.namevalue#1.name#5.name1
+ [(epsilon)] -> component#2.namevalue#1.name#5.#1
+ [(epsilon)] -> component#2.namevalue#1.name#5.#2
+ 6:[\055] -> component#2.namevalue#1.name#5.name1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.name#5.name1
+ 8:[0-9] -> component#2.namevalue#1.name#5.name1
+ 0:[\t ] -> component#2.namevalue#1.name#5.name2
+ [(epsilon)] -> component#2.namevalue#1.name#5.out
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#5
+ main.component#2.namevalue#1.name#5.#1
+ main.component#2.namevalue#1.name#5.#2
+ main.component#2.namevalue#1.name#5.out
+
+NFA state 28 = main.component#2.namevalue#1.name#5.#1
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 29 = main.component#2.namevalue#1.name#5.#2
+ Tags : GOT_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 30 = main.component#2.namevalue#1.name#5.name2
+ [(epsilon)] -> component#2.namevalue#1.name#5.#3
+ [(epsilon)] -> component#2.namevalue#1.name#5.#4
+ 0:[\t ] -> component#2.namevalue#1.name#5.name2
+ 6:[\055] -> component#2.namevalue#1.name#5.name1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.name#5.name1
+ 8:[0-9] -> component#2.namevalue#1.name#5.name1
+ [(epsilon)] -> component#2.namevalue#1.name#5.out
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#5
+ main.component#2.namevalue#1.name#5.#3
+ main.component#2.namevalue#1.name#5.#4
+ main.component#2.namevalue#1.name#5.out
+
+NFA state 31 = main.component#2.namevalue#1.name#5.#3
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 32 = main.component#2.namevalue#1.name#5.#4
+ Tags : GOT_NAME_TRAILING_SPACE
+ Epsilon closure :
+ (self)
+
+NFA state 33 = main.component#2.namevalue#1.name#5.out
+ [(epsilon)] -> component#2.namevalue#1.#5
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#5
+
+NFA state 34 = main.component#2.namevalue#1.#6
+ [(epsilon)] -> component#2.namevalue#1.digits#6.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.digits#6.in
+
+NFA state 35 = main.component#2.namevalue#1.#7
+ [(epsilon)] -> component#2.namevalue#1.optwhite#7.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#8
+ main.component#2.namevalue#1.optwhite#7.in
+ main.component#2.namevalue#1.optwhite#7.out
+
+NFA state 36 = main.component#2.namevalue#1.digits#6.in
+ 8:[0-9] -> component#2.namevalue#1.digits#6.out
+ 8:[0-9] -> component#2.namevalue#1.digits#6.in
+ Epsilon closure :
+ (self)
+
+NFA state 37 = main.component#2.namevalue#1.digits#6.out
+ [(epsilon)] -> component#2.namevalue#1.#7
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#7
+ main.component#2.namevalue#1.#8
+ main.component#2.namevalue#1.optwhite#7.in
+ main.component#2.namevalue#1.optwhite#7.out
+
+NFA state 38 = main.component#2.namevalue#1.#8
+ 10:[=] -> component#2.namevalue#1.rhs_continue
+ Epsilon closure :
+ (self)
+
+NFA state 39 = main.component#2.namevalue#1.optwhite#7.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#7.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#7.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#7.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#8
+ main.component#2.namevalue#1.optwhite#7.out
+
+NFA state 40 = main.component#2.namevalue#1.optwhite#7.out
+ [(epsilon)] -> component#2.namevalue#1.#8
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#8
+
+NFA state 41 = main.component#2.namevalue#1.rhs_normal
+ [(epsilon)] -> component#2.namevalue#1.optwhite#14.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#11.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#8.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#9
+ main.component#2.namevalue#1.optwhite#8.in
+ main.component#2.namevalue#1.optwhite#8.out
+ main.component#2.namevalue#1.qvalue#9.in
+ main.component#2.namevalue#1.#11
+ main.component#2.namevalue#1.optwhite#11.in
+ main.component#2.namevalue#1.optwhite#11.out
+ main.component#2.namevalue#1.value#12.in
+ main.component#2.namevalue#1.#13
+ main.component#2.namevalue#1.optwhite#14.in
+ main.component#2.namevalue#1.optwhite#14.out
+ main.component#2.namevalue#1.#14
+
+NFA state 42 = main.component#2.namevalue#1.#9
+ [(epsilon)] -> component#2.namevalue#1.qvalue#9.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.qvalue#9.in
+
+NFA state 43 = main.component#2.namevalue#1.optwhite#8.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#8.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#8.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#8.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#9
+ main.component#2.namevalue#1.optwhite#8.out
+ main.component#2.namevalue#1.qvalue#9.in
+
+NFA state 44 = main.component#2.namevalue#1.optwhite#8.out
+ [(epsilon)] -> component#2.namevalue#1.#9
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#9
+ main.component#2.namevalue#1.qvalue#9.in
+
+NFA state 45 = main.component#2.namevalue#1.#10
+ [(epsilon)] -> component#2.namevalue#1.optwhite#10.in
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.optwhite#10.in
+ main.component#2.namevalue#1.optwhite#10.out
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 46 = main.component#2.namevalue#1.qvalue#9.in
+ 3:["] -> component#2.namevalue#1.qvalue#9.qv0
+ Epsilon closure :
+ (self)
+
+NFA state 47 = main.component#2.namevalue#1.qvalue#9.qv0
+ [(epsilon)] -> component#2.namevalue#1.qvalue#9.escape#1.in
+ 0:[\t ] -> component#2.namevalue#1.qvalue#9.qv1
+ 9:[;] -> component#2.namevalue#1.qvalue#9.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.qvalue#9.qv1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.qvalue#9.qv1
+ 10:[=] -> component#2.namevalue#1.qvalue#9.qv1
+ 8:[0-9] -> component#2.namevalue#1.qvalue#9.qv1
+ 7:[/] -> component#2.namevalue#1.qvalue#9.qv1
+ 6:[\055] -> component#2.namevalue#1.qvalue#9.qv1
+ 5:[+.] -> component#2.namevalue#1.qvalue#9.qv1
+ 4:[*] -> component#2.namevalue#1.qvalue#9.qv1
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.qvalue#9.escape#1.in
+
+NFA state 48 = main.component#2.namevalue#1.qvalue#9.escape#1.in
+ 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#1.#2
+ 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#1.#1
+ Epsilon closure :
+ (self)
+
+NFA state 49 = main.component#2.namevalue#1.qvalue#9.escape#1.#1
+ 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 50 = main.component#2.namevalue#1.qvalue#9.escape#1.#2
+ 3:["] -> component#2.namevalue#1.qvalue#9.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 51 = main.component#2.namevalue#1.qvalue#9.escape#1.out
+ [(epsilon)] -> component#2.namevalue#1.qvalue#9.qv1
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.qvalue#9.qv1
+ main.component#2.namevalue#1.qvalue#9.#1
+ main.component#2.namevalue#1.qvalue#9.escape#2.in
+ main.component#2.namevalue#1.qvalue#9.qv2
+
+NFA state 52 = main.component#2.namevalue#1.qvalue#9.qv1
+ [(epsilon)] -> component#2.namevalue#1.qvalue#9.#1
+ [(epsilon)] -> component#2.namevalue#1.qvalue#9.escape#2.in
+ 0:[\t ] -> component#2.namevalue#1.qvalue#9.qv1
+ 9:[;] -> component#2.namevalue#1.qvalue#9.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.qvalue#9.qv1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.qvalue#9.qv1
+ 10:[=] -> component#2.namevalue#1.qvalue#9.qv1
+ 8:[0-9] -> component#2.namevalue#1.qvalue#9.qv1
+ 7:[/] -> component#2.namevalue#1.qvalue#9.qv1
+ 6:[\055] -> component#2.namevalue#1.qvalue#9.qv1
+ 5:[+.] -> component#2.namevalue#1.qvalue#9.qv1
+ 4:[*] -> component#2.namevalue#1.qvalue#9.qv1
+ [(epsilon)] -> component#2.namevalue#1.qvalue#9.qv2
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.qvalue#9.#1
+ main.component#2.namevalue#1.qvalue#9.escape#2.in
+ main.component#2.namevalue#1.qvalue#9.qv2
+
+NFA state 53 = main.component#2.namevalue#1.qvalue#9.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 54 = main.component#2.namevalue#1.qvalue#9.escape#2.in
+ 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#2.#2
+ 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#2.#1
+ Epsilon closure :
+ (self)
+
+NFA state 55 = main.component#2.namevalue#1.qvalue#9.escape#2.#1
+ 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 56 = main.component#2.namevalue#1.qvalue#9.escape#2.#2
+ 3:["] -> component#2.namevalue#1.qvalue#9.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 57 = main.component#2.namevalue#1.qvalue#9.escape#2.out
+ [(epsilon)] -> component#2.namevalue#1.qvalue#9.qv1
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.qvalue#9.qv1
+ main.component#2.namevalue#1.qvalue#9.#1
+ main.component#2.namevalue#1.qvalue#9.escape#2.in
+ main.component#2.namevalue#1.qvalue#9.qv2
+
+NFA state 58 = main.component#2.namevalue#1.qvalue#9.qv2
+ 3:["] -> component#2.namevalue#1.qvalue#9.out
+ Epsilon closure :
+ (self)
+
+NFA state 59 = main.component#2.namevalue#1.qvalue#9.out
+ [(epsilon)] -> component#2.namevalue#1.#10
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.#10
+ main.component#2.namevalue#1.optwhite#10.in
+ main.component#2.namevalue#1.optwhite#10.out
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 60 = main.component#2.namevalue#1.optwhite#10.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#10.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#10.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#10.in
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.optwhite#10.out
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 61 = main.component#2.namevalue#1.optwhite#10.out
+ [(epsilon)] -> component#2.namevalue#1.out_normal
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 62 = main.component#2.namevalue#1.#11
+ [(epsilon)] -> component#2.namevalue#1.value#12.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.value#12.in
+
+NFA state 63 = main.component#2.namevalue#1.optwhite#11.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#11.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#11.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#11.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#11
+ main.component#2.namevalue#1.optwhite#11.out
+ main.component#2.namevalue#1.value#12.in
+
+NFA state 64 = main.component#2.namevalue#1.optwhite#11.out
+ [(epsilon)] -> component#2.namevalue#1.#11
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#11
+ main.component#2.namevalue#1.value#12.in
+
+NFA state 65 = main.component#2.namevalue#1.#12
+ [(epsilon)] -> component#2.namevalue#1.optwhite#13.in
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.optwhite#13.in
+ main.component#2.namevalue#1.optwhite#13.out
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 66 = main.component#2.namevalue#1.value#12.in
+ 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.value#12.v1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.value#12.v1
+ 10:[=] -> component#2.namevalue#1.value#12.v1
+ 8:[0-9] -> component#2.namevalue#1.value#12.v1
+ 7:[/] -> component#2.namevalue#1.value#12.v1
+ 6:[\055] -> component#2.namevalue#1.value#12.v1
+ 5:[+.] -> component#2.namevalue#1.value#12.v1
+ 4:[*] -> component#2.namevalue#1.value#12.v1
+ Epsilon closure :
+ (self)
+
+NFA state 67 = main.component#2.namevalue#1.value#12.v1
+ [(epsilon)] -> component#2.namevalue#1.value#12.#1
+ [(epsilon)] -> component#2.namevalue#1.value#12.out
+ 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.value#12.v1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.value#12.v1
+ 10:[=] -> component#2.namevalue#1.value#12.v1
+ 8:[0-9] -> component#2.namevalue#1.value#12.v1
+ 7:[/] -> component#2.namevalue#1.value#12.v1
+ 6:[\055] -> component#2.namevalue#1.value#12.v1
+ 5:[+.] -> component#2.namevalue#1.value#12.v1
+ 4:[*] -> component#2.namevalue#1.value#12.v1
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.#12
+ main.component#2.namevalue#1.value#12.#1
+ main.component#2.namevalue#1.value#12.out
+ main.component#2.namevalue#1.optwhite#13.in
+ main.component#2.namevalue#1.optwhite#13.out
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 68 = main.component#2.namevalue#1.value#12.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 69 = main.component#2.namevalue#1.value#12.out
+ [(epsilon)] -> component#2.namevalue#1.#12
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.#12
+ main.component#2.namevalue#1.optwhite#13.in
+ main.component#2.namevalue#1.optwhite#13.out
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 70 = main.component#2.namevalue#1.optwhite#13.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#13.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#13.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#13.in
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.optwhite#13.out
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 71 = main.component#2.namevalue#1.optwhite#13.out
+ [(epsilon)] -> component#2.namevalue#1.out_normal
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 72 = main.component#2.namevalue#1.#13
+ [(epsilon)] -> component#2.namevalue#1.#14
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#14
+
+NFA state 73 = main.component#2.namevalue#1.optwhite#14.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#14.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#14.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#14.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#13
+ main.component#2.namevalue#1.optwhite#14.out
+ main.component#2.namevalue#1.#14
+
+NFA state 74 = main.component#2.namevalue#1.optwhite#14.out
+ [(epsilon)] -> component#2.namevalue#1.#13
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#13
+ main.component#2.namevalue#1.#14
+
+NFA state 75 = main.component#2.namevalue#1.#14
+ EOS -> component#2.namevalue#1.out_normal
+ Epsilon closure :
+ (self)
+
+NFA state 76 = main.component#2.namevalue#1.rhs_continue
+ [(epsilon)] -> component#2.namevalue#1.optwhite#18.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#15.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#15
+ main.component#2.namevalue#1.optwhite#15.in
+ main.component#2.namevalue#1.optwhite#15.out
+ main.component#2.namevalue#1.qvalue#16.in
+ main.component#2.namevalue#1.#17
+ main.component#2.namevalue#1.optwhite#18.in
+ main.component#2.namevalue#1.optwhite#18.out
+ main.component#2.namevalue#1.value#19.in
+
+NFA state 77 = main.component#2.namevalue#1.#15
+ [(epsilon)] -> component#2.namevalue#1.qvalue#16.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.qvalue#16.in
+
+NFA state 78 = main.component#2.namevalue#1.optwhite#15.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#15.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#15.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#15.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#15
+ main.component#2.namevalue#1.optwhite#15.out
+ main.component#2.namevalue#1.qvalue#16.in
+
+NFA state 79 = main.component#2.namevalue#1.optwhite#15.out
+ [(epsilon)] -> component#2.namevalue#1.#15
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#15
+ main.component#2.namevalue#1.qvalue#16.in
+
+NFA state 80 = main.component#2.namevalue#1.#16
+ [(epsilon)] -> component#2.namevalue#1.optwhite#17.in
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.optwhite#17.in
+ main.component#2.namevalue#1.optwhite#17.out
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 81 = main.component#2.namevalue#1.qvalue#16.in
+ 3:["] -> component#2.namevalue#1.qvalue#16.qv0
+ Epsilon closure :
+ (self)
+
+NFA state 82 = main.component#2.namevalue#1.qvalue#16.qv0
+ [(epsilon)] -> component#2.namevalue#1.qvalue#16.escape#1.in
+ 0:[\t ] -> component#2.namevalue#1.qvalue#16.qv1
+ 9:[;] -> component#2.namevalue#1.qvalue#16.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.qvalue#16.qv1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.qvalue#16.qv1
+ 10:[=] -> component#2.namevalue#1.qvalue#16.qv1
+ 8:[0-9] -> component#2.namevalue#1.qvalue#16.qv1
+ 7:[/] -> component#2.namevalue#1.qvalue#16.qv1
+ 6:[\055] -> component#2.namevalue#1.qvalue#16.qv1
+ 5:[+.] -> component#2.namevalue#1.qvalue#16.qv1
+ 4:[*] -> component#2.namevalue#1.qvalue#16.qv1
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.qvalue#16.escape#1.in
+
+NFA state 83 = main.component#2.namevalue#1.qvalue#16.escape#1.in
+ 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#1.#2
+ 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#1.#1
+ Epsilon closure :
+ (self)
+
+NFA state 84 = main.component#2.namevalue#1.qvalue#16.escape#1.#1
+ 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 85 = main.component#2.namevalue#1.qvalue#16.escape#1.#2
+ 3:["] -> component#2.namevalue#1.qvalue#16.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 86 = main.component#2.namevalue#1.qvalue#16.escape#1.out
+ [(epsilon)] -> component#2.namevalue#1.qvalue#16.qv1
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.qvalue#16.qv1
+ main.component#2.namevalue#1.qvalue#16.#1
+ main.component#2.namevalue#1.qvalue#16.escape#2.in
+ main.component#2.namevalue#1.qvalue#16.qv2
+
+NFA state 87 = main.component#2.namevalue#1.qvalue#16.qv1
+ [(epsilon)] -> component#2.namevalue#1.qvalue#16.#1
+ [(epsilon)] -> component#2.namevalue#1.qvalue#16.escape#2.in
+ 0:[\t ] -> component#2.namevalue#1.qvalue#16.qv1
+ 9:[;] -> component#2.namevalue#1.qvalue#16.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.qvalue#16.qv1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.qvalue#16.qv1
+ 10:[=] -> component#2.namevalue#1.qvalue#16.qv1
+ 8:[0-9] -> component#2.namevalue#1.qvalue#16.qv1
+ 7:[/] -> component#2.namevalue#1.qvalue#16.qv1
+ 6:[\055] -> component#2.namevalue#1.qvalue#16.qv1
+ 5:[+.] -> component#2.namevalue#1.qvalue#16.qv1
+ 4:[*] -> component#2.namevalue#1.qvalue#16.qv1
+ [(epsilon)] -> component#2.namevalue#1.qvalue#16.qv2
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.qvalue#16.#1
+ main.component#2.namevalue#1.qvalue#16.escape#2.in
+ main.component#2.namevalue#1.qvalue#16.qv2
+
+NFA state 88 = main.component#2.namevalue#1.qvalue#16.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 89 = main.component#2.namevalue#1.qvalue#16.escape#2.in
+ 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#2.#2
+ 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#2.#1
+ Epsilon closure :
+ (self)
+
+NFA state 90 = main.component#2.namevalue#1.qvalue#16.escape#2.#1
+ 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 91 = main.component#2.namevalue#1.qvalue#16.escape#2.#2
+ 3:["] -> component#2.namevalue#1.qvalue#16.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 92 = main.component#2.namevalue#1.qvalue#16.escape#2.out
+ [(epsilon)] -> component#2.namevalue#1.qvalue#16.qv1
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.qvalue#16.qv1
+ main.component#2.namevalue#1.qvalue#16.#1
+ main.component#2.namevalue#1.qvalue#16.escape#2.in
+ main.component#2.namevalue#1.qvalue#16.qv2
+
+NFA state 93 = main.component#2.namevalue#1.qvalue#16.qv2
+ 3:["] -> component#2.namevalue#1.qvalue#16.out
+ Epsilon closure :
+ (self)
+
+NFA state 94 = main.component#2.namevalue#1.qvalue#16.out
+ [(epsilon)] -> component#2.namevalue#1.#16
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.#16
+ main.component#2.namevalue#1.optwhite#17.in
+ main.component#2.namevalue#1.optwhite#17.out
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 95 = main.component#2.namevalue#1.optwhite#17.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#17.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#17.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#17.in
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.optwhite#17.out
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 96 = main.component#2.namevalue#1.optwhite#17.out
+ [(epsilon)] -> component#2.namevalue#1.out_continue
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 97 = main.component#2.namevalue#1.#17
+ [(epsilon)] -> component#2.namevalue#1.value#19.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.value#19.in
+
+NFA state 98 = main.component#2.namevalue#1.optwhite#18.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#18.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#18.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#18.in
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#17
+ main.component#2.namevalue#1.optwhite#18.out
+ main.component#2.namevalue#1.value#19.in
+
+NFA state 99 = main.component#2.namevalue#1.optwhite#18.out
+ [(epsilon)] -> component#2.namevalue#1.#17
+ Epsilon closure :
+ (self)
+ main.component#2.namevalue#1.#17
+ main.component#2.namevalue#1.value#19.in
+
+NFA state 100 = main.component#2.namevalue#1.#18
+ [(epsilon)] -> component#2.namevalue#1.optwhite#20.in
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.optwhite#20.in
+ main.component#2.namevalue#1.optwhite#20.out
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 101 = main.component#2.namevalue#1.value#19.in
+ 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.value#19.v1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.value#19.v1
+ 10:[=] -> component#2.namevalue#1.value#19.v1
+ 8:[0-9] -> component#2.namevalue#1.value#19.v1
+ 7:[/] -> component#2.namevalue#1.value#19.v1
+ 6:[\055] -> component#2.namevalue#1.value#19.v1
+ 5:[+.] -> component#2.namevalue#1.value#19.v1
+ 4:[*] -> component#2.namevalue#1.value#19.v1
+ Epsilon closure :
+ (self)
+
+NFA state 102 = main.component#2.namevalue#1.value#19.v1
+ [(epsilon)] -> component#2.namevalue#1.value#19.#1
+ [(epsilon)] -> component#2.namevalue#1.value#19.out
+ 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.value#19.v1
+ 11:[A-Z_a-z] -> component#2.namevalue#1.value#19.v1
+ 10:[=] -> component#2.namevalue#1.value#19.v1
+ 8:[0-9] -> component#2.namevalue#1.value#19.v1
+ 7:[/] -> component#2.namevalue#1.value#19.v1
+ 6:[\055] -> component#2.namevalue#1.value#19.v1
+ 5:[+.] -> component#2.namevalue#1.value#19.v1
+ 4:[*] -> component#2.namevalue#1.value#19.v1
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.#18
+ main.component#2.namevalue#1.value#19.#1
+ main.component#2.namevalue#1.value#19.out
+ main.component#2.namevalue#1.optwhite#20.in
+ main.component#2.namevalue#1.optwhite#20.out
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 103 = main.component#2.namevalue#1.value#19.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 104 = main.component#2.namevalue#1.value#19.out
+ [(epsilon)] -> component#2.namevalue#1.#18
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.#18
+ main.component#2.namevalue#1.optwhite#20.in
+ main.component#2.namevalue#1.optwhite#20.out
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 105 = main.component#2.namevalue#1.optwhite#20.in
+ [(epsilon)] -> component#2.namevalue#1.optwhite#20.out
+ 0:[\t ] -> component#2.namevalue#1.optwhite#20.in
+ 1:[\r] -> component#2.namevalue#1.optwhite#20.in
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.optwhite#20.out
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 106 = main.component#2.namevalue#1.optwhite#20.out
+ [(epsilon)] -> component#2.namevalue#1.out_continue
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 107 = main.component#2.namevalue#1.out_normal
+ [(epsilon)] -> component#2.namevalue#1.out
+ [(epsilon)] -> component#2.namevalue#1.#19
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 108 = main.component#2.namevalue#1.#19
+ Tags : GOT_NAMEVALUE
+ Epsilon closure :
+ (self)
+
+NFA state 109 = main.component#2.namevalue#1.out_continue
+ [(epsilon)] -> component#2.namevalue#1.out
+ [(epsilon)] -> component#2.namevalue#1.#20
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 110 = main.component#2.namevalue#1.#20
+ Tags : GOT_NAMEVALUE_CONT
+ Epsilon closure :
+ (self)
+
+NFA state 111 = main.component#2.namevalue#1.out
+ [(epsilon)] -> component#2.out
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 112 = main.component#2.name#2.in
+ 6:[\055] -> component#2.name#2.name1
+ 11:[A-Z_a-z] -> component#2.name#2.name1
+ 8:[0-9] -> component#2.name#2.name1
+ Epsilon closure :
+ (self)
+
+NFA state 113 = main.component#2.name#2.name1
+ [(epsilon)] -> component#2.name#2.out
+ 0:[\t ] -> component#2.name#2.name2
+ 6:[\055] -> component#2.name#2.name1
+ 11:[A-Z_a-z] -> component#2.name#2.name1
+ 8:[0-9] -> component#2.name#2.name1
+ [(epsilon)] -> component#2.name#2.#2
+ [(epsilon)] -> component#2.name#2.#1
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.name#2.#1
+ main.component#2.name#2.#2
+ main.component#2.name#2.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 114 = main.component#2.name#2.#1
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 115 = main.component#2.name#2.#2
+ Tags : GOT_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 116 = main.component#2.name#2.name2
+ [(epsilon)] -> component#2.name#2.out
+ 6:[\055] -> component#2.name#2.name1
+ 11:[A-Z_a-z] -> component#2.name#2.name1
+ 8:[0-9] -> component#2.name#2.name1
+ 0:[\t ] -> component#2.name#2.name2
+ [(epsilon)] -> component#2.name#2.#4
+ [(epsilon)] -> component#2.name#2.#3
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.name#2.#3
+ main.component#2.name#2.#4
+ main.component#2.name#2.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 117 = main.component#2.name#2.#3
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 118 = main.component#2.name#2.#4
+ Tags : GOT_NAME_TRAILING_SPACE
+ Epsilon closure :
+ (self)
+
+NFA state 119 = main.component#2.name#2.out
+ [(epsilon)] -> component#2.out
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 120 = main.component#2.majorminor#3.in
+ [(epsilon)] -> component#2.majorminor#3.major#1.in
+ Epsilon closure :
+ (self)
+ main.component#2.majorminor#3.major#1.in
+
+NFA state 121 = main.component#2.majorminor#3.major#1.in
+ 6:[\055] -> component#2.majorminor#3.major#1.name1
+ 11:[A-Z_a-z] -> component#2.majorminor#3.major#1.name1
+ 8:[0-9] -> component#2.majorminor#3.major#1.name1
+ Epsilon closure :
+ (self)
+
+NFA state 122 = main.component#2.majorminor#3.major#1.name1
+ 6:[\055] -> component#2.majorminor#3.major#1.name1
+ 11:[A-Z_a-z] -> component#2.majorminor#3.major#1.name1
+ 8:[0-9] -> component#2.majorminor#3.major#1.name1
+ [(epsilon)] -> component#2.majorminor#3.major#1.out
+ Epsilon closure :
+ (self)
+ main.component#2.majorminor#3.major#1.out
+ main.component#2.majorminor#3.foo
+
+NFA state 123 = main.component#2.majorminor#3.major#1.out
+ [(epsilon)] -> component#2.majorminor#3.foo
+ Epsilon closure :
+ (self)
+ main.component#2.majorminor#3.foo
+
+NFA state 124 = main.component#2.majorminor#3.foo
+ 7:[/] -> component#2.majorminor#3.bar
+ Epsilon closure :
+ (self)
+
+NFA state 125 = main.component#2.majorminor#3.bar
+ [(epsilon)] -> component#2.majorminor#3.minor#2.in
+ Epsilon closure :
+ (self)
+ main.component#2.majorminor#3.minor#2.in
+
+NFA state 126 = main.component#2.majorminor#3.minor#2.in
+ 5:[+.] -> component#2.majorminor#3.minor#2.minor1
+ 12:[\\] -> component#2.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#2.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#2.majorminor#3.minor#2.minor1
+ 11:[A-Z_a-z] -> component#2.majorminor#3.minor#2.minor1
+ 8:[0-9] -> component#2.majorminor#3.minor#2.minor1
+ Epsilon closure :
+ (self)
+
+NFA state 127 = main.component#2.majorminor#3.minor#2.minor1
+ [(epsilon)] -> component#2.majorminor#3.minor#2.#1
+ 5:[+.] -> component#2.majorminor#3.minor#2.minor1
+ 12:[\\] -> component#2.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#2.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#2.majorminor#3.minor#2.minor1
+ 11:[A-Z_a-z] -> component#2.majorminor#3.minor#2.minor1
+ 8:[0-9] -> component#2.majorminor#3.minor#2.minor1
+ [(epsilon)] -> component#2.majorminor#3.minor#2.out
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.majorminor#3.minor#2.#1
+ main.component#2.majorminor#3.minor#2.out
+ main.component#2.majorminor#3.out
+ main.component#2.majorminor#3.#1
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 128 = main.component#2.majorminor#3.minor#2.#1
+ Tags : COPY_TO_MINOR
+ Epsilon closure :
+ (self)
+
+NFA state 129 = main.component#2.majorminor#3.minor#2.out
+ [(epsilon)] -> component#2.majorminor#3.out
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.majorminor#3.out
+ main.component#2.majorminor#3.#1
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 130 = main.component#2.majorminor#3.out
+ [(epsilon)] -> component#2.majorminor#3.#1
+ [(epsilon)] -> component#2.out
+ Epsilon closure :
+ (self)
+ main.#2
+ main.component#2.majorminor#3.#1
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 131 = main.component#2.majorminor#3.#1
+ Tags : GOT_MAJORMINOR
+ Epsilon closure :
+ (self)
+
+NFA state 132 = main.component#2.out
+ [(epsilon)] -> #2
+ Epsilon closure :
+ (self)
+ main.#2
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+
+NFA state 133 = main.#3
+ EOS -> out2
+ Epsilon closure :
+ (self)
+
+NFA state 134 = main.optwhite#3.in
+ [(epsilon)] -> optwhite#3.out
+ 0:[\t ] -> optwhite#3.in
+ 1:[\r] -> optwhite#3.in
+ Epsilon closure :
+ (self)
+ main.#3
+ main.optwhite#3.out
+
+NFA state 135 = main.optwhite#3.out
+ [(epsilon)] -> #3
+ Epsilon closure :
+ (self)
+ main.#3
+
+NFA state 136 = main.#4
+ [(epsilon)] -> component#5.in
+ Epsilon closure :
+ (self)
+ main.component#5.in
+ main.component#5.namevalue#1.in
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.in
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.in
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+ main.component#5.name#2.in
+ main.component#5.majorminor#3.in
+ main.component#5.majorminor#3.major#1.in
+
+NFA state 137 = main.optwhite#4.in
+ [(epsilon)] -> optwhite#4.out
+ 0:[\t ] -> optwhite#4.in
+ 1:[\r] -> optwhite#4.in
+ Epsilon closure :
+ (self)
+ main.#4
+ main.optwhite#4.out
+ main.component#5.in
+ main.component#5.namevalue#1.in
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.in
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.in
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+ main.component#5.name#2.in
+ main.component#5.majorminor#3.in
+ main.component#5.majorminor#3.major#1.in
+
+NFA state 138 = main.optwhite#4.out
+ [(epsilon)] -> #4
+ Epsilon closure :
+ (self)
+ main.#4
+ main.component#5.in
+ main.component#5.namevalue#1.in
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.in
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.in
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+ main.component#5.name#2.in
+ main.component#5.majorminor#3.in
+ main.component#5.majorminor#3.major#1.in
+
+NFA state 139 = main.#5
+ [(epsilon)] -> optwhite#6.in
+ Epsilon closure :
+ (self)
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 140 = main.component#5.in
+ [(epsilon)] -> component#5.namevalue#1.in
+ [(epsilon)] -> component#5.name#2.in
+ [(epsilon)] -> component#5.majorminor#3.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.in
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.in
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.in
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+ main.component#5.name#2.in
+ main.component#5.majorminor#3.in
+ main.component#5.majorminor#3.major#1.in
+
+NFA state 141 = main.component#5.namevalue#1.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#4.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#1.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.in
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.in
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+
+NFA state 142 = main.component#5.namevalue#1.#1
+ [(epsilon)] -> component#5.namevalue#1.name#2.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.name#2.in
+
+NFA state 143 = main.component#5.namevalue#1.optwhite#1.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#1.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#1.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#1.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+
+NFA state 144 = main.component#5.namevalue#1.optwhite#1.out
+ [(epsilon)] -> component#5.namevalue#1.#1
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.name#2.in
+
+NFA state 145 = main.component#5.namevalue#1.#2
+ [(epsilon)] -> component#5.namevalue#1.optwhite#3.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#3
+ main.component#5.namevalue#1.optwhite#3.in
+ main.component#5.namevalue#1.optwhite#3.out
+
+NFA state 146 = main.component#5.namevalue#1.name#2.in
+ 6:[\055] -> component#5.namevalue#1.name#2.name1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.name#2.name1
+ 8:[0-9] -> component#5.namevalue#1.name#2.name1
+ Epsilon closure :
+ (self)
+
+NFA state 147 = main.component#5.namevalue#1.name#2.name1
+ [(epsilon)] -> component#5.namevalue#1.name#2.#1
+ [(epsilon)] -> component#5.namevalue#1.name#2.#2
+ 6:[\055] -> component#5.namevalue#1.name#2.name1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.name#2.name1
+ 8:[0-9] -> component#5.namevalue#1.name#2.name1
+ 0:[\t ] -> component#5.namevalue#1.name#2.name2
+ [(epsilon)] -> component#5.namevalue#1.name#2.out
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#2
+ main.component#5.namevalue#1.name#2.#1
+ main.component#5.namevalue#1.name#2.#2
+ main.component#5.namevalue#1.name#2.out
+ main.component#5.namevalue#1.#3
+ main.component#5.namevalue#1.optwhite#3.in
+ main.component#5.namevalue#1.optwhite#3.out
+
+NFA state 148 = main.component#5.namevalue#1.name#2.#1
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 149 = main.component#5.namevalue#1.name#2.#2
+ Tags : GOT_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 150 = main.component#5.namevalue#1.name#2.name2
+ [(epsilon)] -> component#5.namevalue#1.name#2.#3
+ [(epsilon)] -> component#5.namevalue#1.name#2.#4
+ 0:[\t ] -> component#5.namevalue#1.name#2.name2
+ 6:[\055] -> component#5.namevalue#1.name#2.name1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.name#2.name1
+ 8:[0-9] -> component#5.namevalue#1.name#2.name1
+ [(epsilon)] -> component#5.namevalue#1.name#2.out
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#2
+ main.component#5.namevalue#1.name#2.#3
+ main.component#5.namevalue#1.name#2.#4
+ main.component#5.namevalue#1.name#2.out
+ main.component#5.namevalue#1.#3
+ main.component#5.namevalue#1.optwhite#3.in
+ main.component#5.namevalue#1.optwhite#3.out
+
+NFA state 151 = main.component#5.namevalue#1.name#2.#3
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 152 = main.component#5.namevalue#1.name#2.#4
+ Tags : GOT_NAME_TRAILING_SPACE
+ Epsilon closure :
+ (self)
+
+NFA state 153 = main.component#5.namevalue#1.name#2.out
+ [(epsilon)] -> component#5.namevalue#1.#2
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#2
+ main.component#5.namevalue#1.#3
+ main.component#5.namevalue#1.optwhite#3.in
+ main.component#5.namevalue#1.optwhite#3.out
+
+NFA state 154 = main.component#5.namevalue#1.#3
+ 10:[=] -> component#5.namevalue#1.rhs_normal
+ Epsilon closure :
+ (self)
+
+NFA state 155 = main.component#5.namevalue#1.optwhite#3.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#3.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#3.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#3.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#3
+ main.component#5.namevalue#1.optwhite#3.out
+
+NFA state 156 = main.component#5.namevalue#1.optwhite#3.out
+ [(epsilon)] -> component#5.namevalue#1.#3
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#3
+
+NFA state 157 = main.component#5.namevalue#1.#4
+ [(epsilon)] -> component#5.namevalue#1.name#5.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.name#5.in
+
+NFA state 158 = main.component#5.namevalue#1.optwhite#4.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#4.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#4.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#4.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+
+NFA state 159 = main.component#5.namevalue#1.optwhite#4.out
+ [(epsilon)] -> component#5.namevalue#1.#4
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.name#5.in
+
+NFA state 160 = main.component#5.namevalue#1.#5
+ 4:[*] -> component#5.namevalue#1.#6
+ Epsilon closure :
+ (self)
+
+NFA state 161 = main.component#5.namevalue#1.name#5.in
+ 6:[\055] -> component#5.namevalue#1.name#5.name1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.name#5.name1
+ 8:[0-9] -> component#5.namevalue#1.name#5.name1
+ Epsilon closure :
+ (self)
+
+NFA state 162 = main.component#5.namevalue#1.name#5.name1
+ [(epsilon)] -> component#5.namevalue#1.name#5.#1
+ [(epsilon)] -> component#5.namevalue#1.name#5.#2
+ 6:[\055] -> component#5.namevalue#1.name#5.name1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.name#5.name1
+ 8:[0-9] -> component#5.namevalue#1.name#5.name1
+ 0:[\t ] -> component#5.namevalue#1.name#5.name2
+ [(epsilon)] -> component#5.namevalue#1.name#5.out
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#5
+ main.component#5.namevalue#1.name#5.#1
+ main.component#5.namevalue#1.name#5.#2
+ main.component#5.namevalue#1.name#5.out
+
+NFA state 163 = main.component#5.namevalue#1.name#5.#1
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 164 = main.component#5.namevalue#1.name#5.#2
+ Tags : GOT_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 165 = main.component#5.namevalue#1.name#5.name2
+ [(epsilon)] -> component#5.namevalue#1.name#5.#3
+ [(epsilon)] -> component#5.namevalue#1.name#5.#4
+ 0:[\t ] -> component#5.namevalue#1.name#5.name2
+ 6:[\055] -> component#5.namevalue#1.name#5.name1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.name#5.name1
+ 8:[0-9] -> component#5.namevalue#1.name#5.name1
+ [(epsilon)] -> component#5.namevalue#1.name#5.out
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#5
+ main.component#5.namevalue#1.name#5.#3
+ main.component#5.namevalue#1.name#5.#4
+ main.component#5.namevalue#1.name#5.out
+
+NFA state 166 = main.component#5.namevalue#1.name#5.#3
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 167 = main.component#5.namevalue#1.name#5.#4
+ Tags : GOT_NAME_TRAILING_SPACE
+ Epsilon closure :
+ (self)
+
+NFA state 168 = main.component#5.namevalue#1.name#5.out
+ [(epsilon)] -> component#5.namevalue#1.#5
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#5
+
+NFA state 169 = main.component#5.namevalue#1.#6
+ [(epsilon)] -> component#5.namevalue#1.digits#6.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.digits#6.in
+
+NFA state 170 = main.component#5.namevalue#1.#7
+ [(epsilon)] -> component#5.namevalue#1.optwhite#7.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#8
+ main.component#5.namevalue#1.optwhite#7.in
+ main.component#5.namevalue#1.optwhite#7.out
+
+NFA state 171 = main.component#5.namevalue#1.digits#6.in
+ 8:[0-9] -> component#5.namevalue#1.digits#6.out
+ 8:[0-9] -> component#5.namevalue#1.digits#6.in
+ Epsilon closure :
+ (self)
+
+NFA state 172 = main.component#5.namevalue#1.digits#6.out
+ [(epsilon)] -> component#5.namevalue#1.#7
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#7
+ main.component#5.namevalue#1.#8
+ main.component#5.namevalue#1.optwhite#7.in
+ main.component#5.namevalue#1.optwhite#7.out
+
+NFA state 173 = main.component#5.namevalue#1.#8
+ 10:[=] -> component#5.namevalue#1.rhs_continue
+ Epsilon closure :
+ (self)
+
+NFA state 174 = main.component#5.namevalue#1.optwhite#7.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#7.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#7.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#7.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#8
+ main.component#5.namevalue#1.optwhite#7.out
+
+NFA state 175 = main.component#5.namevalue#1.optwhite#7.out
+ [(epsilon)] -> component#5.namevalue#1.#8
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#8
+
+NFA state 176 = main.component#5.namevalue#1.rhs_normal
+ [(epsilon)] -> component#5.namevalue#1.optwhite#14.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#11.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#8.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#9
+ main.component#5.namevalue#1.optwhite#8.in
+ main.component#5.namevalue#1.optwhite#8.out
+ main.component#5.namevalue#1.qvalue#9.in
+ main.component#5.namevalue#1.#11
+ main.component#5.namevalue#1.optwhite#11.in
+ main.component#5.namevalue#1.optwhite#11.out
+ main.component#5.namevalue#1.value#12.in
+ main.component#5.namevalue#1.#13
+ main.component#5.namevalue#1.optwhite#14.in
+ main.component#5.namevalue#1.optwhite#14.out
+ main.component#5.namevalue#1.#14
+
+NFA state 177 = main.component#5.namevalue#1.#9
+ [(epsilon)] -> component#5.namevalue#1.qvalue#9.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.qvalue#9.in
+
+NFA state 178 = main.component#5.namevalue#1.optwhite#8.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#8.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#8.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#8.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#9
+ main.component#5.namevalue#1.optwhite#8.out
+ main.component#5.namevalue#1.qvalue#9.in
+
+NFA state 179 = main.component#5.namevalue#1.optwhite#8.out
+ [(epsilon)] -> component#5.namevalue#1.#9
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#9
+ main.component#5.namevalue#1.qvalue#9.in
+
+NFA state 180 = main.component#5.namevalue#1.#10
+ [(epsilon)] -> component#5.namevalue#1.optwhite#10.in
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.optwhite#10.in
+ main.component#5.namevalue#1.optwhite#10.out
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 181 = main.component#5.namevalue#1.qvalue#9.in
+ 3:["] -> component#5.namevalue#1.qvalue#9.qv0
+ Epsilon closure :
+ (self)
+
+NFA state 182 = main.component#5.namevalue#1.qvalue#9.qv0
+ [(epsilon)] -> component#5.namevalue#1.qvalue#9.escape#1.in
+ 0:[\t ] -> component#5.namevalue#1.qvalue#9.qv1
+ 9:[;] -> component#5.namevalue#1.qvalue#9.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.qvalue#9.qv1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.qvalue#9.qv1
+ 10:[=] -> component#5.namevalue#1.qvalue#9.qv1
+ 8:[0-9] -> component#5.namevalue#1.qvalue#9.qv1
+ 7:[/] -> component#5.namevalue#1.qvalue#9.qv1
+ 6:[\055] -> component#5.namevalue#1.qvalue#9.qv1
+ 5:[+.] -> component#5.namevalue#1.qvalue#9.qv1
+ 4:[*] -> component#5.namevalue#1.qvalue#9.qv1
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.qvalue#9.escape#1.in
+
+NFA state 183 = main.component#5.namevalue#1.qvalue#9.escape#1.in
+ 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#1.#2
+ 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#1.#1
+ Epsilon closure :
+ (self)
+
+NFA state 184 = main.component#5.namevalue#1.qvalue#9.escape#1.#1
+ 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 185 = main.component#5.namevalue#1.qvalue#9.escape#1.#2
+ 3:["] -> component#5.namevalue#1.qvalue#9.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 186 = main.component#5.namevalue#1.qvalue#9.escape#1.out
+ [(epsilon)] -> component#5.namevalue#1.qvalue#9.qv1
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.qvalue#9.qv1
+ main.component#5.namevalue#1.qvalue#9.#1
+ main.component#5.namevalue#1.qvalue#9.escape#2.in
+ main.component#5.namevalue#1.qvalue#9.qv2
+
+NFA state 187 = main.component#5.namevalue#1.qvalue#9.qv1
+ [(epsilon)] -> component#5.namevalue#1.qvalue#9.#1
+ [(epsilon)] -> component#5.namevalue#1.qvalue#9.escape#2.in
+ 0:[\t ] -> component#5.namevalue#1.qvalue#9.qv1
+ 9:[;] -> component#5.namevalue#1.qvalue#9.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.qvalue#9.qv1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.qvalue#9.qv1
+ 10:[=] -> component#5.namevalue#1.qvalue#9.qv1
+ 8:[0-9] -> component#5.namevalue#1.qvalue#9.qv1
+ 7:[/] -> component#5.namevalue#1.qvalue#9.qv1
+ 6:[\055] -> component#5.namevalue#1.qvalue#9.qv1
+ 5:[+.] -> component#5.namevalue#1.qvalue#9.qv1
+ 4:[*] -> component#5.namevalue#1.qvalue#9.qv1
+ [(epsilon)] -> component#5.namevalue#1.qvalue#9.qv2
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.qvalue#9.#1
+ main.component#5.namevalue#1.qvalue#9.escape#2.in
+ main.component#5.namevalue#1.qvalue#9.qv2
+
+NFA state 188 = main.component#5.namevalue#1.qvalue#9.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 189 = main.component#5.namevalue#1.qvalue#9.escape#2.in
+ 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#2.#2
+ 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#2.#1
+ Epsilon closure :
+ (self)
+
+NFA state 190 = main.component#5.namevalue#1.qvalue#9.escape#2.#1
+ 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 191 = main.component#5.namevalue#1.qvalue#9.escape#2.#2
+ 3:["] -> component#5.namevalue#1.qvalue#9.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 192 = main.component#5.namevalue#1.qvalue#9.escape#2.out
+ [(epsilon)] -> component#5.namevalue#1.qvalue#9.qv1
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.qvalue#9.qv1
+ main.component#5.namevalue#1.qvalue#9.#1
+ main.component#5.namevalue#1.qvalue#9.escape#2.in
+ main.component#5.namevalue#1.qvalue#9.qv2
+
+NFA state 193 = main.component#5.namevalue#1.qvalue#9.qv2
+ 3:["] -> component#5.namevalue#1.qvalue#9.out
+ Epsilon closure :
+ (self)
+
+NFA state 194 = main.component#5.namevalue#1.qvalue#9.out
+ [(epsilon)] -> component#5.namevalue#1.#10
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.#10
+ main.component#5.namevalue#1.optwhite#10.in
+ main.component#5.namevalue#1.optwhite#10.out
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 195 = main.component#5.namevalue#1.optwhite#10.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#10.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#10.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#10.in
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.optwhite#10.out
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 196 = main.component#5.namevalue#1.optwhite#10.out
+ [(epsilon)] -> component#5.namevalue#1.out_normal
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 197 = main.component#5.namevalue#1.#11
+ [(epsilon)] -> component#5.namevalue#1.value#12.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.value#12.in
+
+NFA state 198 = main.component#5.namevalue#1.optwhite#11.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#11.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#11.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#11.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#11
+ main.component#5.namevalue#1.optwhite#11.out
+ main.component#5.namevalue#1.value#12.in
+
+NFA state 199 = main.component#5.namevalue#1.optwhite#11.out
+ [(epsilon)] -> component#5.namevalue#1.#11
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#11
+ main.component#5.namevalue#1.value#12.in
+
+NFA state 200 = main.component#5.namevalue#1.#12
+ [(epsilon)] -> component#5.namevalue#1.optwhite#13.in
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.optwhite#13.in
+ main.component#5.namevalue#1.optwhite#13.out
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 201 = main.component#5.namevalue#1.value#12.in
+ 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.value#12.v1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.value#12.v1
+ 10:[=] -> component#5.namevalue#1.value#12.v1
+ 8:[0-9] -> component#5.namevalue#1.value#12.v1
+ 7:[/] -> component#5.namevalue#1.value#12.v1
+ 6:[\055] -> component#5.namevalue#1.value#12.v1
+ 5:[+.] -> component#5.namevalue#1.value#12.v1
+ 4:[*] -> component#5.namevalue#1.value#12.v1
+ Epsilon closure :
+ (self)
+
+NFA state 202 = main.component#5.namevalue#1.value#12.v1
+ [(epsilon)] -> component#5.namevalue#1.value#12.#1
+ [(epsilon)] -> component#5.namevalue#1.value#12.out
+ 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.value#12.v1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.value#12.v1
+ 10:[=] -> component#5.namevalue#1.value#12.v1
+ 8:[0-9] -> component#5.namevalue#1.value#12.v1
+ 7:[/] -> component#5.namevalue#1.value#12.v1
+ 6:[\055] -> component#5.namevalue#1.value#12.v1
+ 5:[+.] -> component#5.namevalue#1.value#12.v1
+ 4:[*] -> component#5.namevalue#1.value#12.v1
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.#12
+ main.component#5.namevalue#1.value#12.#1
+ main.component#5.namevalue#1.value#12.out
+ main.component#5.namevalue#1.optwhite#13.in
+ main.component#5.namevalue#1.optwhite#13.out
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 203 = main.component#5.namevalue#1.value#12.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 204 = main.component#5.namevalue#1.value#12.out
+ [(epsilon)] -> component#5.namevalue#1.#12
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.#12
+ main.component#5.namevalue#1.optwhite#13.in
+ main.component#5.namevalue#1.optwhite#13.out
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 205 = main.component#5.namevalue#1.optwhite#13.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#13.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#13.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#13.in
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.optwhite#13.out
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 206 = main.component#5.namevalue#1.optwhite#13.out
+ [(epsilon)] -> component#5.namevalue#1.out_normal
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 207 = main.component#5.namevalue#1.#13
+ [(epsilon)] -> component#5.namevalue#1.#14
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#14
+
+NFA state 208 = main.component#5.namevalue#1.optwhite#14.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#14.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#14.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#14.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#13
+ main.component#5.namevalue#1.optwhite#14.out
+ main.component#5.namevalue#1.#14
+
+NFA state 209 = main.component#5.namevalue#1.optwhite#14.out
+ [(epsilon)] -> component#5.namevalue#1.#13
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#13
+ main.component#5.namevalue#1.#14
+
+NFA state 210 = main.component#5.namevalue#1.#14
+ EOS -> component#5.namevalue#1.out_normal
+ Epsilon closure :
+ (self)
+
+NFA state 211 = main.component#5.namevalue#1.rhs_continue
+ [(epsilon)] -> component#5.namevalue#1.optwhite#18.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#15.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#15
+ main.component#5.namevalue#1.optwhite#15.in
+ main.component#5.namevalue#1.optwhite#15.out
+ main.component#5.namevalue#1.qvalue#16.in
+ main.component#5.namevalue#1.#17
+ main.component#5.namevalue#1.optwhite#18.in
+ main.component#5.namevalue#1.optwhite#18.out
+ main.component#5.namevalue#1.value#19.in
+
+NFA state 212 = main.component#5.namevalue#1.#15
+ [(epsilon)] -> component#5.namevalue#1.qvalue#16.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.qvalue#16.in
+
+NFA state 213 = main.component#5.namevalue#1.optwhite#15.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#15.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#15.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#15.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#15
+ main.component#5.namevalue#1.optwhite#15.out
+ main.component#5.namevalue#1.qvalue#16.in
+
+NFA state 214 = main.component#5.namevalue#1.optwhite#15.out
+ [(epsilon)] -> component#5.namevalue#1.#15
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#15
+ main.component#5.namevalue#1.qvalue#16.in
+
+NFA state 215 = main.component#5.namevalue#1.#16
+ [(epsilon)] -> component#5.namevalue#1.optwhite#17.in
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.optwhite#17.in
+ main.component#5.namevalue#1.optwhite#17.out
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 216 = main.component#5.namevalue#1.qvalue#16.in
+ 3:["] -> component#5.namevalue#1.qvalue#16.qv0
+ Epsilon closure :
+ (self)
+
+NFA state 217 = main.component#5.namevalue#1.qvalue#16.qv0
+ [(epsilon)] -> component#5.namevalue#1.qvalue#16.escape#1.in
+ 0:[\t ] -> component#5.namevalue#1.qvalue#16.qv1
+ 9:[;] -> component#5.namevalue#1.qvalue#16.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.qvalue#16.qv1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.qvalue#16.qv1
+ 10:[=] -> component#5.namevalue#1.qvalue#16.qv1
+ 8:[0-9] -> component#5.namevalue#1.qvalue#16.qv1
+ 7:[/] -> component#5.namevalue#1.qvalue#16.qv1
+ 6:[\055] -> component#5.namevalue#1.qvalue#16.qv1
+ 5:[+.] -> component#5.namevalue#1.qvalue#16.qv1
+ 4:[*] -> component#5.namevalue#1.qvalue#16.qv1
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.qvalue#16.escape#1.in
+
+NFA state 218 = main.component#5.namevalue#1.qvalue#16.escape#1.in
+ 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#1.#2
+ 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#1.#1
+ Epsilon closure :
+ (self)
+
+NFA state 219 = main.component#5.namevalue#1.qvalue#16.escape#1.#1
+ 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 220 = main.component#5.namevalue#1.qvalue#16.escape#1.#2
+ 3:["] -> component#5.namevalue#1.qvalue#16.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 221 = main.component#5.namevalue#1.qvalue#16.escape#1.out
+ [(epsilon)] -> component#5.namevalue#1.qvalue#16.qv1
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.qvalue#16.qv1
+ main.component#5.namevalue#1.qvalue#16.#1
+ main.component#5.namevalue#1.qvalue#16.escape#2.in
+ main.component#5.namevalue#1.qvalue#16.qv2
+
+NFA state 222 = main.component#5.namevalue#1.qvalue#16.qv1
+ [(epsilon)] -> component#5.namevalue#1.qvalue#16.#1
+ [(epsilon)] -> component#5.namevalue#1.qvalue#16.escape#2.in
+ 0:[\t ] -> component#5.namevalue#1.qvalue#16.qv1
+ 9:[;] -> component#5.namevalue#1.qvalue#16.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.qvalue#16.qv1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.qvalue#16.qv1
+ 10:[=] -> component#5.namevalue#1.qvalue#16.qv1
+ 8:[0-9] -> component#5.namevalue#1.qvalue#16.qv1
+ 7:[/] -> component#5.namevalue#1.qvalue#16.qv1
+ 6:[\055] -> component#5.namevalue#1.qvalue#16.qv1
+ 5:[+.] -> component#5.namevalue#1.qvalue#16.qv1
+ 4:[*] -> component#5.namevalue#1.qvalue#16.qv1
+ [(epsilon)] -> component#5.namevalue#1.qvalue#16.qv2
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.qvalue#16.#1
+ main.component#5.namevalue#1.qvalue#16.escape#2.in
+ main.component#5.namevalue#1.qvalue#16.qv2
+
+NFA state 223 = main.component#5.namevalue#1.qvalue#16.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 224 = main.component#5.namevalue#1.qvalue#16.escape#2.in
+ 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#2.#2
+ 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#2.#1
+ Epsilon closure :
+ (self)
+
+NFA state 225 = main.component#5.namevalue#1.qvalue#16.escape#2.#1
+ 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 226 = main.component#5.namevalue#1.qvalue#16.escape#2.#2
+ 3:["] -> component#5.namevalue#1.qvalue#16.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 227 = main.component#5.namevalue#1.qvalue#16.escape#2.out
+ [(epsilon)] -> component#5.namevalue#1.qvalue#16.qv1
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.qvalue#16.qv1
+ main.component#5.namevalue#1.qvalue#16.#1
+ main.component#5.namevalue#1.qvalue#16.escape#2.in
+ main.component#5.namevalue#1.qvalue#16.qv2
+
+NFA state 228 = main.component#5.namevalue#1.qvalue#16.qv2
+ 3:["] -> component#5.namevalue#1.qvalue#16.out
+ Epsilon closure :
+ (self)
+
+NFA state 229 = main.component#5.namevalue#1.qvalue#16.out
+ [(epsilon)] -> component#5.namevalue#1.#16
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.#16
+ main.component#5.namevalue#1.optwhite#17.in
+ main.component#5.namevalue#1.optwhite#17.out
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 230 = main.component#5.namevalue#1.optwhite#17.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#17.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#17.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#17.in
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.optwhite#17.out
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 231 = main.component#5.namevalue#1.optwhite#17.out
+ [(epsilon)] -> component#5.namevalue#1.out_continue
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 232 = main.component#5.namevalue#1.#17
+ [(epsilon)] -> component#5.namevalue#1.value#19.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.value#19.in
+
+NFA state 233 = main.component#5.namevalue#1.optwhite#18.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#18.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#18.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#18.in
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#17
+ main.component#5.namevalue#1.optwhite#18.out
+ main.component#5.namevalue#1.value#19.in
+
+NFA state 234 = main.component#5.namevalue#1.optwhite#18.out
+ [(epsilon)] -> component#5.namevalue#1.#17
+ Epsilon closure :
+ (self)
+ main.component#5.namevalue#1.#17
+ main.component#5.namevalue#1.value#19.in
+
+NFA state 235 = main.component#5.namevalue#1.#18
+ [(epsilon)] -> component#5.namevalue#1.optwhite#20.in
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.optwhite#20.in
+ main.component#5.namevalue#1.optwhite#20.out
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 236 = main.component#5.namevalue#1.value#19.in
+ 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.value#19.v1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.value#19.v1
+ 10:[=] -> component#5.namevalue#1.value#19.v1
+ 8:[0-9] -> component#5.namevalue#1.value#19.v1
+ 7:[/] -> component#5.namevalue#1.value#19.v1
+ 6:[\055] -> component#5.namevalue#1.value#19.v1
+ 5:[+.] -> component#5.namevalue#1.value#19.v1
+ 4:[*] -> component#5.namevalue#1.value#19.v1
+ Epsilon closure :
+ (self)
+
+NFA state 237 = main.component#5.namevalue#1.value#19.v1
+ [(epsilon)] -> component#5.namevalue#1.value#19.#1
+ [(epsilon)] -> component#5.namevalue#1.value#19.out
+ 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.value#19.v1
+ 11:[A-Z_a-z] -> component#5.namevalue#1.value#19.v1
+ 10:[=] -> component#5.namevalue#1.value#19.v1
+ 8:[0-9] -> component#5.namevalue#1.value#19.v1
+ 7:[/] -> component#5.namevalue#1.value#19.v1
+ 6:[\055] -> component#5.namevalue#1.value#19.v1
+ 5:[+.] -> component#5.namevalue#1.value#19.v1
+ 4:[*] -> component#5.namevalue#1.value#19.v1
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.#18
+ main.component#5.namevalue#1.value#19.#1
+ main.component#5.namevalue#1.value#19.out
+ main.component#5.namevalue#1.optwhite#20.in
+ main.component#5.namevalue#1.optwhite#20.out
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 238 = main.component#5.namevalue#1.value#19.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 239 = main.component#5.namevalue#1.value#19.out
+ [(epsilon)] -> component#5.namevalue#1.#18
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.#18
+ main.component#5.namevalue#1.optwhite#20.in
+ main.component#5.namevalue#1.optwhite#20.out
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 240 = main.component#5.namevalue#1.optwhite#20.in
+ [(epsilon)] -> component#5.namevalue#1.optwhite#20.out
+ 0:[\t ] -> component#5.namevalue#1.optwhite#20.in
+ 1:[\r] -> component#5.namevalue#1.optwhite#20.in
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.optwhite#20.out
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 241 = main.component#5.namevalue#1.optwhite#20.out
+ [(epsilon)] -> component#5.namevalue#1.out_continue
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 242 = main.component#5.namevalue#1.out_normal
+ [(epsilon)] -> component#5.namevalue#1.out
+ [(epsilon)] -> component#5.namevalue#1.#19
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 243 = main.component#5.namevalue#1.#19
+ Tags : GOT_NAMEVALUE
+ Epsilon closure :
+ (self)
+
+NFA state 244 = main.component#5.namevalue#1.out_continue
+ [(epsilon)] -> component#5.namevalue#1.out
+ [(epsilon)] -> component#5.namevalue#1.#20
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 245 = main.component#5.namevalue#1.#20
+ Tags : GOT_NAMEVALUE_CONT
+ Epsilon closure :
+ (self)
+
+NFA state 246 = main.component#5.namevalue#1.out
+ [(epsilon)] -> component#5.out
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 247 = main.component#5.name#2.in
+ 6:[\055] -> component#5.name#2.name1
+ 11:[A-Z_a-z] -> component#5.name#2.name1
+ 8:[0-9] -> component#5.name#2.name1
+ Epsilon closure :
+ (self)
+
+NFA state 248 = main.component#5.name#2.name1
+ [(epsilon)] -> component#5.name#2.out
+ 0:[\t ] -> component#5.name#2.name2
+ 6:[\055] -> component#5.name#2.name1
+ 11:[A-Z_a-z] -> component#5.name#2.name1
+ 8:[0-9] -> component#5.name#2.name1
+ [(epsilon)] -> component#5.name#2.#2
+ [(epsilon)] -> component#5.name#2.#1
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.name#2.#1
+ main.component#5.name#2.#2
+ main.component#5.name#2.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 249 = main.component#5.name#2.#1
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 250 = main.component#5.name#2.#2
+ Tags : GOT_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 251 = main.component#5.name#2.name2
+ [(epsilon)] -> component#5.name#2.out
+ 6:[\055] -> component#5.name#2.name1
+ 11:[A-Z_a-z] -> component#5.name#2.name1
+ 8:[0-9] -> component#5.name#2.name1
+ 0:[\t ] -> component#5.name#2.name2
+ [(epsilon)] -> component#5.name#2.#4
+ [(epsilon)] -> component#5.name#2.#3
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.name#2.#3
+ main.component#5.name#2.#4
+ main.component#5.name#2.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 252 = main.component#5.name#2.#3
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 253 = main.component#5.name#2.#4
+ Tags : GOT_NAME_TRAILING_SPACE
+ Epsilon closure :
+ (self)
+
+NFA state 254 = main.component#5.name#2.out
+ [(epsilon)] -> component#5.out
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 255 = main.component#5.majorminor#3.in
+ [(epsilon)] -> component#5.majorminor#3.major#1.in
+ Epsilon closure :
+ (self)
+ main.component#5.majorminor#3.major#1.in
+
+NFA state 256 = main.component#5.majorminor#3.major#1.in
+ 6:[\055] -> component#5.majorminor#3.major#1.name1
+ 11:[A-Z_a-z] -> component#5.majorminor#3.major#1.name1
+ 8:[0-9] -> component#5.majorminor#3.major#1.name1
+ Epsilon closure :
+ (self)
+
+NFA state 257 = main.component#5.majorminor#3.major#1.name1
+ 6:[\055] -> component#5.majorminor#3.major#1.name1
+ 11:[A-Z_a-z] -> component#5.majorminor#3.major#1.name1
+ 8:[0-9] -> component#5.majorminor#3.major#1.name1
+ [(epsilon)] -> component#5.majorminor#3.major#1.out
+ Epsilon closure :
+ (self)
+ main.component#5.majorminor#3.major#1.out
+ main.component#5.majorminor#3.foo
+
+NFA state 258 = main.component#5.majorminor#3.major#1.out
+ [(epsilon)] -> component#5.majorminor#3.foo
+ Epsilon closure :
+ (self)
+ main.component#5.majorminor#3.foo
+
+NFA state 259 = main.component#5.majorminor#3.foo
+ 7:[/] -> component#5.majorminor#3.bar
+ Epsilon closure :
+ (self)
+
+NFA state 260 = main.component#5.majorminor#3.bar
+ [(epsilon)] -> component#5.majorminor#3.minor#2.in
+ Epsilon closure :
+ (self)
+ main.component#5.majorminor#3.minor#2.in
+
+NFA state 261 = main.component#5.majorminor#3.minor#2.in
+ 5:[+.] -> component#5.majorminor#3.minor#2.minor1
+ 12:[\\] -> component#5.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#5.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#5.majorminor#3.minor#2.minor1
+ 11:[A-Z_a-z] -> component#5.majorminor#3.minor#2.minor1
+ 8:[0-9] -> component#5.majorminor#3.minor#2.minor1
+ Epsilon closure :
+ (self)
+
+NFA state 262 = main.component#5.majorminor#3.minor#2.minor1
+ [(epsilon)] -> component#5.majorminor#3.minor#2.#1
+ 5:[+.] -> component#5.majorminor#3.minor#2.minor1
+ 12:[\\] -> component#5.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#5.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#5.majorminor#3.minor#2.minor1
+ 11:[A-Z_a-z] -> component#5.majorminor#3.minor#2.minor1
+ 8:[0-9] -> component#5.majorminor#3.minor#2.minor1
+ [(epsilon)] -> component#5.majorminor#3.minor#2.out
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.majorminor#3.minor#2.#1
+ main.component#5.majorminor#3.minor#2.out
+ main.component#5.majorminor#3.out
+ main.component#5.majorminor#3.#1
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 263 = main.component#5.majorminor#3.minor#2.#1
+ Tags : COPY_TO_MINOR
+ Epsilon closure :
+ (self)
+
+NFA state 264 = main.component#5.majorminor#3.minor#2.out
+ [(epsilon)] -> component#5.majorminor#3.out
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.majorminor#3.out
+ main.component#5.majorminor#3.#1
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 265 = main.component#5.majorminor#3.out
+ [(epsilon)] -> component#5.majorminor#3.#1
+ [(epsilon)] -> component#5.out
+ Epsilon closure :
+ (self)
+ main.#5
+ main.component#5.majorminor#3.#1
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 266 = main.component#5.majorminor#3.#1
+ Tags : GOT_MAJORMINOR
+ Epsilon closure :
+ (self)
+
+NFA state 267 = main.component#5.out
+ [(epsilon)] -> #5
+ Epsilon closure :
+ (self)
+ main.#5
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+
+NFA state 268 = main.#6
+ 9:[;] -> #7
+ Epsilon closure :
+ (self)
+
+NFA state 269 = main.optwhite#6.in
+ [(epsilon)] -> optwhite#6.out
+ 0:[\t ] -> optwhite#6.in
+ 1:[\r] -> optwhite#6.in
+ Epsilon closure :
+ (self)
+ main.#6
+ main.optwhite#6.out
+
+NFA state 270 = main.optwhite#6.out
+ [(epsilon)] -> #6
+ Epsilon closure :
+ (self)
+ main.#6
+
+NFA state 271 = main.#7
+ [(epsilon)] -> optwhite#7.in
+ Epsilon closure :
+ (self)
+ main.#8
+ main.optwhite#7.in
+ main.optwhite#7.out
+
+NFA state 272 = main.#8
+ EOS -> out2
+ Epsilon closure :
+ (self)
+
+NFA state 273 = main.optwhite#7.in
+ [(epsilon)] -> optwhite#7.out
+ 0:[\t ] -> optwhite#7.in
+ 1:[\r] -> optwhite#7.in
+ Epsilon closure :
+ (self)
+ main.#8
+ main.optwhite#7.out
+
+NFA state 274 = main.optwhite#7.out
+ [(epsilon)] -> #8
+ Epsilon closure :
+ (self)
+ main.#8
+
+NFA state 275 = main.#9
+ [(epsilon)] -> component#9.in
+ Epsilon closure :
+ (self)
+ main.component#9.in
+ main.component#9.namevalue#1.in
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.in
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.in
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+ main.component#9.name#2.in
+ main.component#9.majorminor#3.in
+ main.component#9.majorminor#3.major#1.in
+
+NFA state 276 = main.optwhite#8.in
+ [(epsilon)] -> optwhite#8.out
+ 0:[\t ] -> optwhite#8.in
+ 1:[\r] -> optwhite#8.in
+ Epsilon closure :
+ (self)
+ main.#9
+ main.optwhite#8.out
+ main.component#9.in
+ main.component#9.namevalue#1.in
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.in
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.in
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+ main.component#9.name#2.in
+ main.component#9.majorminor#3.in
+ main.component#9.majorminor#3.major#1.in
+
+NFA state 277 = main.optwhite#8.out
+ [(epsilon)] -> #9
+ Epsilon closure :
+ (self)
+ main.#9
+ main.component#9.in
+ main.component#9.namevalue#1.in
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.in
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.in
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+ main.component#9.name#2.in
+ main.component#9.majorminor#3.in
+ main.component#9.majorminor#3.major#1.in
+
+NFA state 278 = main.#10
+ [(epsilon)] -> optwhite#10.in
+ Epsilon closure :
+ (self)
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 279 = main.component#9.in
+ [(epsilon)] -> component#9.namevalue#1.in
+ [(epsilon)] -> component#9.name#2.in
+ [(epsilon)] -> component#9.majorminor#3.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.in
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.in
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.in
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+ main.component#9.name#2.in
+ main.component#9.majorminor#3.in
+ main.component#9.majorminor#3.major#1.in
+
+NFA state 280 = main.component#9.namevalue#1.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#4.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#1.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.in
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.in
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+
+NFA state 281 = main.component#9.namevalue#1.#1
+ [(epsilon)] -> component#9.namevalue#1.name#2.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.name#2.in
+
+NFA state 282 = main.component#9.namevalue#1.optwhite#1.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#1.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#1.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#1.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+
+NFA state 283 = main.component#9.namevalue#1.optwhite#1.out
+ [(epsilon)] -> component#9.namevalue#1.#1
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.name#2.in
+
+NFA state 284 = main.component#9.namevalue#1.#2
+ [(epsilon)] -> component#9.namevalue#1.optwhite#3.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#3
+ main.component#9.namevalue#1.optwhite#3.in
+ main.component#9.namevalue#1.optwhite#3.out
+
+NFA state 285 = main.component#9.namevalue#1.name#2.in
+ 6:[\055] -> component#9.namevalue#1.name#2.name1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.name#2.name1
+ 8:[0-9] -> component#9.namevalue#1.name#2.name1
+ Epsilon closure :
+ (self)
+
+NFA state 286 = main.component#9.namevalue#1.name#2.name1
+ [(epsilon)] -> component#9.namevalue#1.name#2.#1
+ [(epsilon)] -> component#9.namevalue#1.name#2.#2
+ 6:[\055] -> component#9.namevalue#1.name#2.name1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.name#2.name1
+ 8:[0-9] -> component#9.namevalue#1.name#2.name1
+ 0:[\t ] -> component#9.namevalue#1.name#2.name2
+ [(epsilon)] -> component#9.namevalue#1.name#2.out
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#2
+ main.component#9.namevalue#1.name#2.#1
+ main.component#9.namevalue#1.name#2.#2
+ main.component#9.namevalue#1.name#2.out
+ main.component#9.namevalue#1.#3
+ main.component#9.namevalue#1.optwhite#3.in
+ main.component#9.namevalue#1.optwhite#3.out
+
+NFA state 287 = main.component#9.namevalue#1.name#2.#1
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 288 = main.component#9.namevalue#1.name#2.#2
+ Tags : GOT_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 289 = main.component#9.namevalue#1.name#2.name2
+ [(epsilon)] -> component#9.namevalue#1.name#2.#3
+ [(epsilon)] -> component#9.namevalue#1.name#2.#4
+ 0:[\t ] -> component#9.namevalue#1.name#2.name2
+ 6:[\055] -> component#9.namevalue#1.name#2.name1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.name#2.name1
+ 8:[0-9] -> component#9.namevalue#1.name#2.name1
+ [(epsilon)] -> component#9.namevalue#1.name#2.out
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#2
+ main.component#9.namevalue#1.name#2.#3
+ main.component#9.namevalue#1.name#2.#4
+ main.component#9.namevalue#1.name#2.out
+ main.component#9.namevalue#1.#3
+ main.component#9.namevalue#1.optwhite#3.in
+ main.component#9.namevalue#1.optwhite#3.out
+
+NFA state 290 = main.component#9.namevalue#1.name#2.#3
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 291 = main.component#9.namevalue#1.name#2.#4
+ Tags : GOT_NAME_TRAILING_SPACE
+ Epsilon closure :
+ (self)
+
+NFA state 292 = main.component#9.namevalue#1.name#2.out
+ [(epsilon)] -> component#9.namevalue#1.#2
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#2
+ main.component#9.namevalue#1.#3
+ main.component#9.namevalue#1.optwhite#3.in
+ main.component#9.namevalue#1.optwhite#3.out
+
+NFA state 293 = main.component#9.namevalue#1.#3
+ 10:[=] -> component#9.namevalue#1.rhs_normal
+ Epsilon closure :
+ (self)
+
+NFA state 294 = main.component#9.namevalue#1.optwhite#3.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#3.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#3.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#3.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#3
+ main.component#9.namevalue#1.optwhite#3.out
+
+NFA state 295 = main.component#9.namevalue#1.optwhite#3.out
+ [(epsilon)] -> component#9.namevalue#1.#3
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#3
+
+NFA state 296 = main.component#9.namevalue#1.#4
+ [(epsilon)] -> component#9.namevalue#1.name#5.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.name#5.in
+
+NFA state 297 = main.component#9.namevalue#1.optwhite#4.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#4.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#4.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#4.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+
+NFA state 298 = main.component#9.namevalue#1.optwhite#4.out
+ [(epsilon)] -> component#9.namevalue#1.#4
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.name#5.in
+
+NFA state 299 = main.component#9.namevalue#1.#5
+ 4:[*] -> component#9.namevalue#1.#6
+ Epsilon closure :
+ (self)
+
+NFA state 300 = main.component#9.namevalue#1.name#5.in
+ 6:[\055] -> component#9.namevalue#1.name#5.name1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.name#5.name1
+ 8:[0-9] -> component#9.namevalue#1.name#5.name1
+ Epsilon closure :
+ (self)
+
+NFA state 301 = main.component#9.namevalue#1.name#5.name1
+ [(epsilon)] -> component#9.namevalue#1.name#5.#1
+ [(epsilon)] -> component#9.namevalue#1.name#5.#2
+ 6:[\055] -> component#9.namevalue#1.name#5.name1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.name#5.name1
+ 8:[0-9] -> component#9.namevalue#1.name#5.name1
+ 0:[\t ] -> component#9.namevalue#1.name#5.name2
+ [(epsilon)] -> component#9.namevalue#1.name#5.out
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#5
+ main.component#9.namevalue#1.name#5.#1
+ main.component#9.namevalue#1.name#5.#2
+ main.component#9.namevalue#1.name#5.out
+
+NFA state 302 = main.component#9.namevalue#1.name#5.#1
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 303 = main.component#9.namevalue#1.name#5.#2
+ Tags : GOT_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 304 = main.component#9.namevalue#1.name#5.name2
+ [(epsilon)] -> component#9.namevalue#1.name#5.#3
+ [(epsilon)] -> component#9.namevalue#1.name#5.#4
+ 0:[\t ] -> component#9.namevalue#1.name#5.name2
+ 6:[\055] -> component#9.namevalue#1.name#5.name1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.name#5.name1
+ 8:[0-9] -> component#9.namevalue#1.name#5.name1
+ [(epsilon)] -> component#9.namevalue#1.name#5.out
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#5
+ main.component#9.namevalue#1.name#5.#3
+ main.component#9.namevalue#1.name#5.#4
+ main.component#9.namevalue#1.name#5.out
+
+NFA state 305 = main.component#9.namevalue#1.name#5.#3
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 306 = main.component#9.namevalue#1.name#5.#4
+ Tags : GOT_NAME_TRAILING_SPACE
+ Epsilon closure :
+ (self)
+
+NFA state 307 = main.component#9.namevalue#1.name#5.out
+ [(epsilon)] -> component#9.namevalue#1.#5
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#5
+
+NFA state 308 = main.component#9.namevalue#1.#6
+ [(epsilon)] -> component#9.namevalue#1.digits#6.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.digits#6.in
+
+NFA state 309 = main.component#9.namevalue#1.#7
+ [(epsilon)] -> component#9.namevalue#1.optwhite#7.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#8
+ main.component#9.namevalue#1.optwhite#7.in
+ main.component#9.namevalue#1.optwhite#7.out
+
+NFA state 310 = main.component#9.namevalue#1.digits#6.in
+ 8:[0-9] -> component#9.namevalue#1.digits#6.out
+ 8:[0-9] -> component#9.namevalue#1.digits#6.in
+ Epsilon closure :
+ (self)
+
+NFA state 311 = main.component#9.namevalue#1.digits#6.out
+ [(epsilon)] -> component#9.namevalue#1.#7
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#7
+ main.component#9.namevalue#1.#8
+ main.component#9.namevalue#1.optwhite#7.in
+ main.component#9.namevalue#1.optwhite#7.out
+
+NFA state 312 = main.component#9.namevalue#1.#8
+ 10:[=] -> component#9.namevalue#1.rhs_continue
+ Epsilon closure :
+ (self)
+
+NFA state 313 = main.component#9.namevalue#1.optwhite#7.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#7.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#7.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#7.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#8
+ main.component#9.namevalue#1.optwhite#7.out
+
+NFA state 314 = main.component#9.namevalue#1.optwhite#7.out
+ [(epsilon)] -> component#9.namevalue#1.#8
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#8
+
+NFA state 315 = main.component#9.namevalue#1.rhs_normal
+ [(epsilon)] -> component#9.namevalue#1.optwhite#14.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#11.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#8.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#9
+ main.component#9.namevalue#1.optwhite#8.in
+ main.component#9.namevalue#1.optwhite#8.out
+ main.component#9.namevalue#1.qvalue#9.in
+ main.component#9.namevalue#1.#11
+ main.component#9.namevalue#1.optwhite#11.in
+ main.component#9.namevalue#1.optwhite#11.out
+ main.component#9.namevalue#1.value#12.in
+ main.component#9.namevalue#1.#13
+ main.component#9.namevalue#1.optwhite#14.in
+ main.component#9.namevalue#1.optwhite#14.out
+ main.component#9.namevalue#1.#14
+
+NFA state 316 = main.component#9.namevalue#1.#9
+ [(epsilon)] -> component#9.namevalue#1.qvalue#9.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.qvalue#9.in
+
+NFA state 317 = main.component#9.namevalue#1.optwhite#8.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#8.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#8.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#8.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#9
+ main.component#9.namevalue#1.optwhite#8.out
+ main.component#9.namevalue#1.qvalue#9.in
+
+NFA state 318 = main.component#9.namevalue#1.optwhite#8.out
+ [(epsilon)] -> component#9.namevalue#1.#9
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#9
+ main.component#9.namevalue#1.qvalue#9.in
+
+NFA state 319 = main.component#9.namevalue#1.#10
+ [(epsilon)] -> component#9.namevalue#1.optwhite#10.in
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.optwhite#10.in
+ main.component#9.namevalue#1.optwhite#10.out
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 320 = main.component#9.namevalue#1.qvalue#9.in
+ 3:["] -> component#9.namevalue#1.qvalue#9.qv0
+ Epsilon closure :
+ (self)
+
+NFA state 321 = main.component#9.namevalue#1.qvalue#9.qv0
+ [(epsilon)] -> component#9.namevalue#1.qvalue#9.escape#1.in
+ 0:[\t ] -> component#9.namevalue#1.qvalue#9.qv1
+ 9:[;] -> component#9.namevalue#1.qvalue#9.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.qvalue#9.qv1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.qvalue#9.qv1
+ 10:[=] -> component#9.namevalue#1.qvalue#9.qv1
+ 8:[0-9] -> component#9.namevalue#1.qvalue#9.qv1
+ 7:[/] -> component#9.namevalue#1.qvalue#9.qv1
+ 6:[\055] -> component#9.namevalue#1.qvalue#9.qv1
+ 5:[+.] -> component#9.namevalue#1.qvalue#9.qv1
+ 4:[*] -> component#9.namevalue#1.qvalue#9.qv1
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.qvalue#9.escape#1.in
+
+NFA state 322 = main.component#9.namevalue#1.qvalue#9.escape#1.in
+ 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#1.#2
+ 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#1.#1
+ Epsilon closure :
+ (self)
+
+NFA state 323 = main.component#9.namevalue#1.qvalue#9.escape#1.#1
+ 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 324 = main.component#9.namevalue#1.qvalue#9.escape#1.#2
+ 3:["] -> component#9.namevalue#1.qvalue#9.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 325 = main.component#9.namevalue#1.qvalue#9.escape#1.out
+ [(epsilon)] -> component#9.namevalue#1.qvalue#9.qv1
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.qvalue#9.qv1
+ main.component#9.namevalue#1.qvalue#9.#1
+ main.component#9.namevalue#1.qvalue#9.escape#2.in
+ main.component#9.namevalue#1.qvalue#9.qv2
+
+NFA state 326 = main.component#9.namevalue#1.qvalue#9.qv1
+ [(epsilon)] -> component#9.namevalue#1.qvalue#9.#1
+ [(epsilon)] -> component#9.namevalue#1.qvalue#9.escape#2.in
+ 0:[\t ] -> component#9.namevalue#1.qvalue#9.qv1
+ 9:[;] -> component#9.namevalue#1.qvalue#9.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.qvalue#9.qv1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.qvalue#9.qv1
+ 10:[=] -> component#9.namevalue#1.qvalue#9.qv1
+ 8:[0-9] -> component#9.namevalue#1.qvalue#9.qv1
+ 7:[/] -> component#9.namevalue#1.qvalue#9.qv1
+ 6:[\055] -> component#9.namevalue#1.qvalue#9.qv1
+ 5:[+.] -> component#9.namevalue#1.qvalue#9.qv1
+ 4:[*] -> component#9.namevalue#1.qvalue#9.qv1
+ [(epsilon)] -> component#9.namevalue#1.qvalue#9.qv2
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.qvalue#9.#1
+ main.component#9.namevalue#1.qvalue#9.escape#2.in
+ main.component#9.namevalue#1.qvalue#9.qv2
+
+NFA state 327 = main.component#9.namevalue#1.qvalue#9.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 328 = main.component#9.namevalue#1.qvalue#9.escape#2.in
+ 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#2.#2
+ 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#2.#1
+ Epsilon closure :
+ (self)
+
+NFA state 329 = main.component#9.namevalue#1.qvalue#9.escape#2.#1
+ 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 330 = main.component#9.namevalue#1.qvalue#9.escape#2.#2
+ 3:["] -> component#9.namevalue#1.qvalue#9.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 331 = main.component#9.namevalue#1.qvalue#9.escape#2.out
+ [(epsilon)] -> component#9.namevalue#1.qvalue#9.qv1
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.qvalue#9.qv1
+ main.component#9.namevalue#1.qvalue#9.#1
+ main.component#9.namevalue#1.qvalue#9.escape#2.in
+ main.component#9.namevalue#1.qvalue#9.qv2
+
+NFA state 332 = main.component#9.namevalue#1.qvalue#9.qv2
+ 3:["] -> component#9.namevalue#1.qvalue#9.out
+ Epsilon closure :
+ (self)
+
+NFA state 333 = main.component#9.namevalue#1.qvalue#9.out
+ [(epsilon)] -> component#9.namevalue#1.#10
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.#10
+ main.component#9.namevalue#1.optwhite#10.in
+ main.component#9.namevalue#1.optwhite#10.out
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 334 = main.component#9.namevalue#1.optwhite#10.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#10.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#10.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#10.in
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.optwhite#10.out
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 335 = main.component#9.namevalue#1.optwhite#10.out
+ [(epsilon)] -> component#9.namevalue#1.out_normal
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 336 = main.component#9.namevalue#1.#11
+ [(epsilon)] -> component#9.namevalue#1.value#12.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.value#12.in
+
+NFA state 337 = main.component#9.namevalue#1.optwhite#11.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#11.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#11.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#11.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#11
+ main.component#9.namevalue#1.optwhite#11.out
+ main.component#9.namevalue#1.value#12.in
+
+NFA state 338 = main.component#9.namevalue#1.optwhite#11.out
+ [(epsilon)] -> component#9.namevalue#1.#11
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#11
+ main.component#9.namevalue#1.value#12.in
+
+NFA state 339 = main.component#9.namevalue#1.#12
+ [(epsilon)] -> component#9.namevalue#1.optwhite#13.in
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.optwhite#13.in
+ main.component#9.namevalue#1.optwhite#13.out
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 340 = main.component#9.namevalue#1.value#12.in
+ 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.value#12.v1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.value#12.v1
+ 10:[=] -> component#9.namevalue#1.value#12.v1
+ 8:[0-9] -> component#9.namevalue#1.value#12.v1
+ 7:[/] -> component#9.namevalue#1.value#12.v1
+ 6:[\055] -> component#9.namevalue#1.value#12.v1
+ 5:[+.] -> component#9.namevalue#1.value#12.v1
+ 4:[*] -> component#9.namevalue#1.value#12.v1
+ Epsilon closure :
+ (self)
+
+NFA state 341 = main.component#9.namevalue#1.value#12.v1
+ [(epsilon)] -> component#9.namevalue#1.value#12.#1
+ [(epsilon)] -> component#9.namevalue#1.value#12.out
+ 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.value#12.v1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.value#12.v1
+ 10:[=] -> component#9.namevalue#1.value#12.v1
+ 8:[0-9] -> component#9.namevalue#1.value#12.v1
+ 7:[/] -> component#9.namevalue#1.value#12.v1
+ 6:[\055] -> component#9.namevalue#1.value#12.v1
+ 5:[+.] -> component#9.namevalue#1.value#12.v1
+ 4:[*] -> component#9.namevalue#1.value#12.v1
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.#12
+ main.component#9.namevalue#1.value#12.#1
+ main.component#9.namevalue#1.value#12.out
+ main.component#9.namevalue#1.optwhite#13.in
+ main.component#9.namevalue#1.optwhite#13.out
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 342 = main.component#9.namevalue#1.value#12.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 343 = main.component#9.namevalue#1.value#12.out
+ [(epsilon)] -> component#9.namevalue#1.#12
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.#12
+ main.component#9.namevalue#1.optwhite#13.in
+ main.component#9.namevalue#1.optwhite#13.out
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 344 = main.component#9.namevalue#1.optwhite#13.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#13.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#13.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#13.in
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.optwhite#13.out
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 345 = main.component#9.namevalue#1.optwhite#13.out
+ [(epsilon)] -> component#9.namevalue#1.out_normal
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 346 = main.component#9.namevalue#1.#13
+ [(epsilon)] -> component#9.namevalue#1.#14
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#14
+
+NFA state 347 = main.component#9.namevalue#1.optwhite#14.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#14.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#14.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#14.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#13
+ main.component#9.namevalue#1.optwhite#14.out
+ main.component#9.namevalue#1.#14
+
+NFA state 348 = main.component#9.namevalue#1.optwhite#14.out
+ [(epsilon)] -> component#9.namevalue#1.#13
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#13
+ main.component#9.namevalue#1.#14
+
+NFA state 349 = main.component#9.namevalue#1.#14
+ EOS -> component#9.namevalue#1.out_normal
+ Epsilon closure :
+ (self)
+
+NFA state 350 = main.component#9.namevalue#1.rhs_continue
+ [(epsilon)] -> component#9.namevalue#1.optwhite#18.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#15.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#15
+ main.component#9.namevalue#1.optwhite#15.in
+ main.component#9.namevalue#1.optwhite#15.out
+ main.component#9.namevalue#1.qvalue#16.in
+ main.component#9.namevalue#1.#17
+ main.component#9.namevalue#1.optwhite#18.in
+ main.component#9.namevalue#1.optwhite#18.out
+ main.component#9.namevalue#1.value#19.in
+
+NFA state 351 = main.component#9.namevalue#1.#15
+ [(epsilon)] -> component#9.namevalue#1.qvalue#16.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.qvalue#16.in
+
+NFA state 352 = main.component#9.namevalue#1.optwhite#15.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#15.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#15.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#15.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#15
+ main.component#9.namevalue#1.optwhite#15.out
+ main.component#9.namevalue#1.qvalue#16.in
+
+NFA state 353 = main.component#9.namevalue#1.optwhite#15.out
+ [(epsilon)] -> component#9.namevalue#1.#15
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#15
+ main.component#9.namevalue#1.qvalue#16.in
+
+NFA state 354 = main.component#9.namevalue#1.#16
+ [(epsilon)] -> component#9.namevalue#1.optwhite#17.in
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.optwhite#17.in
+ main.component#9.namevalue#1.optwhite#17.out
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 355 = main.component#9.namevalue#1.qvalue#16.in
+ 3:["] -> component#9.namevalue#1.qvalue#16.qv0
+ Epsilon closure :
+ (self)
+
+NFA state 356 = main.component#9.namevalue#1.qvalue#16.qv0
+ [(epsilon)] -> component#9.namevalue#1.qvalue#16.escape#1.in
+ 0:[\t ] -> component#9.namevalue#1.qvalue#16.qv1
+ 9:[;] -> component#9.namevalue#1.qvalue#16.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.qvalue#16.qv1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.qvalue#16.qv1
+ 10:[=] -> component#9.namevalue#1.qvalue#16.qv1
+ 8:[0-9] -> component#9.namevalue#1.qvalue#16.qv1
+ 7:[/] -> component#9.namevalue#1.qvalue#16.qv1
+ 6:[\055] -> component#9.namevalue#1.qvalue#16.qv1
+ 5:[+.] -> component#9.namevalue#1.qvalue#16.qv1
+ 4:[*] -> component#9.namevalue#1.qvalue#16.qv1
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.qvalue#16.escape#1.in
+
+NFA state 357 = main.component#9.namevalue#1.qvalue#16.escape#1.in
+ 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#1.#2
+ 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#1.#1
+ Epsilon closure :
+ (self)
+
+NFA state 358 = main.component#9.namevalue#1.qvalue#16.escape#1.#1
+ 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 359 = main.component#9.namevalue#1.qvalue#16.escape#1.#2
+ 3:["] -> component#9.namevalue#1.qvalue#16.escape#1.out
+ Epsilon closure :
+ (self)
+
+NFA state 360 = main.component#9.namevalue#1.qvalue#16.escape#1.out
+ [(epsilon)] -> component#9.namevalue#1.qvalue#16.qv1
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.qvalue#16.qv1
+ main.component#9.namevalue#1.qvalue#16.#1
+ main.component#9.namevalue#1.qvalue#16.escape#2.in
+ main.component#9.namevalue#1.qvalue#16.qv2
+
+NFA state 361 = main.component#9.namevalue#1.qvalue#16.qv1
+ [(epsilon)] -> component#9.namevalue#1.qvalue#16.#1
+ [(epsilon)] -> component#9.namevalue#1.qvalue#16.escape#2.in
+ 0:[\t ] -> component#9.namevalue#1.qvalue#16.qv1
+ 9:[;] -> component#9.namevalue#1.qvalue#16.qv1
+ 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.qvalue#16.qv1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.qvalue#16.qv1
+ 10:[=] -> component#9.namevalue#1.qvalue#16.qv1
+ 8:[0-9] -> component#9.namevalue#1.qvalue#16.qv1
+ 7:[/] -> component#9.namevalue#1.qvalue#16.qv1
+ 6:[\055] -> component#9.namevalue#1.qvalue#16.qv1
+ 5:[+.] -> component#9.namevalue#1.qvalue#16.qv1
+ 4:[*] -> component#9.namevalue#1.qvalue#16.qv1
+ [(epsilon)] -> component#9.namevalue#1.qvalue#16.qv2
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.qvalue#16.#1
+ main.component#9.namevalue#1.qvalue#16.escape#2.in
+ main.component#9.namevalue#1.qvalue#16.qv2
+
+NFA state 362 = main.component#9.namevalue#1.qvalue#16.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 363 = main.component#9.namevalue#1.qvalue#16.escape#2.in
+ 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#2.#2
+ 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#2.#1
+ Epsilon closure :
+ (self)
+
+NFA state 364 = main.component#9.namevalue#1.qvalue#16.escape#2.#1
+ 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 365 = main.component#9.namevalue#1.qvalue#16.escape#2.#2
+ 3:["] -> component#9.namevalue#1.qvalue#16.escape#2.out
+ Epsilon closure :
+ (self)
+
+NFA state 366 = main.component#9.namevalue#1.qvalue#16.escape#2.out
+ [(epsilon)] -> component#9.namevalue#1.qvalue#16.qv1
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.qvalue#16.qv1
+ main.component#9.namevalue#1.qvalue#16.#1
+ main.component#9.namevalue#1.qvalue#16.escape#2.in
+ main.component#9.namevalue#1.qvalue#16.qv2
+
+NFA state 367 = main.component#9.namevalue#1.qvalue#16.qv2
+ 3:["] -> component#9.namevalue#1.qvalue#16.out
+ Epsilon closure :
+ (self)
+
+NFA state 368 = main.component#9.namevalue#1.qvalue#16.out
+ [(epsilon)] -> component#9.namevalue#1.#16
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.#16
+ main.component#9.namevalue#1.optwhite#17.in
+ main.component#9.namevalue#1.optwhite#17.out
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 369 = main.component#9.namevalue#1.optwhite#17.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#17.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#17.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#17.in
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.optwhite#17.out
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 370 = main.component#9.namevalue#1.optwhite#17.out
+ [(epsilon)] -> component#9.namevalue#1.out_continue
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 371 = main.component#9.namevalue#1.#17
+ [(epsilon)] -> component#9.namevalue#1.value#19.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.value#19.in
+
+NFA state 372 = main.component#9.namevalue#1.optwhite#18.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#18.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#18.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#18.in
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#17
+ main.component#9.namevalue#1.optwhite#18.out
+ main.component#9.namevalue#1.value#19.in
+
+NFA state 373 = main.component#9.namevalue#1.optwhite#18.out
+ [(epsilon)] -> component#9.namevalue#1.#17
+ Epsilon closure :
+ (self)
+ main.component#9.namevalue#1.#17
+ main.component#9.namevalue#1.value#19.in
+
+NFA state 374 = main.component#9.namevalue#1.#18
+ [(epsilon)] -> component#9.namevalue#1.optwhite#20.in
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.optwhite#20.in
+ main.component#9.namevalue#1.optwhite#20.out
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 375 = main.component#9.namevalue#1.value#19.in
+ 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.value#19.v1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.value#19.v1
+ 10:[=] -> component#9.namevalue#1.value#19.v1
+ 8:[0-9] -> component#9.namevalue#1.value#19.v1
+ 7:[/] -> component#9.namevalue#1.value#19.v1
+ 6:[\055] -> component#9.namevalue#1.value#19.v1
+ 5:[+.] -> component#9.namevalue#1.value#19.v1
+ 4:[*] -> component#9.namevalue#1.value#19.v1
+ Epsilon closure :
+ (self)
+
+NFA state 376 = main.component#9.namevalue#1.value#19.v1
+ [(epsilon)] -> component#9.namevalue#1.value#19.#1
+ [(epsilon)] -> component#9.namevalue#1.value#19.out
+ 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.value#19.v1
+ 11:[A-Z_a-z] -> component#9.namevalue#1.value#19.v1
+ 10:[=] -> component#9.namevalue#1.value#19.v1
+ 8:[0-9] -> component#9.namevalue#1.value#19.v1
+ 7:[/] -> component#9.namevalue#1.value#19.v1
+ 6:[\055] -> component#9.namevalue#1.value#19.v1
+ 5:[+.] -> component#9.namevalue#1.value#19.v1
+ 4:[*] -> component#9.namevalue#1.value#19.v1
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.#18
+ main.component#9.namevalue#1.value#19.#1
+ main.component#9.namevalue#1.value#19.out
+ main.component#9.namevalue#1.optwhite#20.in
+ main.component#9.namevalue#1.optwhite#20.out
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 377 = main.component#9.namevalue#1.value#19.#1
+ Tags : COPY_TO_VALUE
+ Epsilon closure :
+ (self)
+
+NFA state 378 = main.component#9.namevalue#1.value#19.out
+ [(epsilon)] -> component#9.namevalue#1.#18
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.#18
+ main.component#9.namevalue#1.optwhite#20.in
+ main.component#9.namevalue#1.optwhite#20.out
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 379 = main.component#9.namevalue#1.optwhite#20.in
+ [(epsilon)] -> component#9.namevalue#1.optwhite#20.out
+ 0:[\t ] -> component#9.namevalue#1.optwhite#20.in
+ 1:[\r] -> component#9.namevalue#1.optwhite#20.in
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.optwhite#20.out
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 380 = main.component#9.namevalue#1.optwhite#20.out
+ [(epsilon)] -> component#9.namevalue#1.out_continue
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 381 = main.component#9.namevalue#1.out_normal
+ [(epsilon)] -> component#9.namevalue#1.out
+ [(epsilon)] -> component#9.namevalue#1.#19
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 382 = main.component#9.namevalue#1.#19
+ Tags : GOT_NAMEVALUE
+ Epsilon closure :
+ (self)
+
+NFA state 383 = main.component#9.namevalue#1.out_continue
+ [(epsilon)] -> component#9.namevalue#1.out
+ [(epsilon)] -> component#9.namevalue#1.#20
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 384 = main.component#9.namevalue#1.#20
+ Tags : GOT_NAMEVALUE_CONT
+ Epsilon closure :
+ (self)
+
+NFA state 385 = main.component#9.namevalue#1.out
+ [(epsilon)] -> component#9.out
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 386 = main.component#9.name#2.in
+ 6:[\055] -> component#9.name#2.name1
+ 11:[A-Z_a-z] -> component#9.name#2.name1
+ 8:[0-9] -> component#9.name#2.name1
+ Epsilon closure :
+ (self)
+
+NFA state 387 = main.component#9.name#2.name1
+ [(epsilon)] -> component#9.name#2.out
+ 0:[\t ] -> component#9.name#2.name2
+ 6:[\055] -> component#9.name#2.name1
+ 11:[A-Z_a-z] -> component#9.name#2.name1
+ 8:[0-9] -> component#9.name#2.name1
+ [(epsilon)] -> component#9.name#2.#2
+ [(epsilon)] -> component#9.name#2.#1
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.name#2.#1
+ main.component#9.name#2.#2
+ main.component#9.name#2.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 388 = main.component#9.name#2.#1
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 389 = main.component#9.name#2.#2
+ Tags : GOT_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 390 = main.component#9.name#2.name2
+ [(epsilon)] -> component#9.name#2.out
+ 6:[\055] -> component#9.name#2.name1
+ 11:[A-Z_a-z] -> component#9.name#2.name1
+ 8:[0-9] -> component#9.name#2.name1
+ 0:[\t ] -> component#9.name#2.name2
+ [(epsilon)] -> component#9.name#2.#4
+ [(epsilon)] -> component#9.name#2.#3
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.name#2.#3
+ main.component#9.name#2.#4
+ main.component#9.name#2.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 391 = main.component#9.name#2.#3
+ Tags : COPY_TO_NAME
+ Epsilon closure :
+ (self)
+
+NFA state 392 = main.component#9.name#2.#4
+ Tags : GOT_NAME_TRAILING_SPACE
+ Epsilon closure :
+ (self)
+
+NFA state 393 = main.component#9.name#2.out
+ [(epsilon)] -> component#9.out
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 394 = main.component#9.majorminor#3.in
+ [(epsilon)] -> component#9.majorminor#3.major#1.in
+ Epsilon closure :
+ (self)
+ main.component#9.majorminor#3.major#1.in
+
+NFA state 395 = main.component#9.majorminor#3.major#1.in
+ 6:[\055] -> component#9.majorminor#3.major#1.name1
+ 11:[A-Z_a-z] -> component#9.majorminor#3.major#1.name1
+ 8:[0-9] -> component#9.majorminor#3.major#1.name1
+ Epsilon closure :
+ (self)
+
+NFA state 396 = main.component#9.majorminor#3.major#1.name1
+ 6:[\055] -> component#9.majorminor#3.major#1.name1
+ 11:[A-Z_a-z] -> component#9.majorminor#3.major#1.name1
+ 8:[0-9] -> component#9.majorminor#3.major#1.name1
+ [(epsilon)] -> component#9.majorminor#3.major#1.out
+ Epsilon closure :
+ (self)
+ main.component#9.majorminor#3.major#1.out
+ main.component#9.majorminor#3.foo
+
+NFA state 397 = main.component#9.majorminor#3.major#1.out
+ [(epsilon)] -> component#9.majorminor#3.foo
+ Epsilon closure :
+ (self)
+ main.component#9.majorminor#3.foo
+
+NFA state 398 = main.component#9.majorminor#3.foo
+ 7:[/] -> component#9.majorminor#3.bar
+ Epsilon closure :
+ (self)
+
+NFA state 399 = main.component#9.majorminor#3.bar
+ [(epsilon)] -> component#9.majorminor#3.minor#2.in
+ Epsilon closure :
+ (self)
+ main.component#9.majorminor#3.minor#2.in
+
+NFA state 400 = main.component#9.majorminor#3.minor#2.in
+ 5:[+.] -> component#9.majorminor#3.minor#2.minor1
+ 12:[\\] -> component#9.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#9.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#9.majorminor#3.minor#2.minor1
+ 11:[A-Z_a-z] -> component#9.majorminor#3.minor#2.minor1
+ 8:[0-9] -> component#9.majorminor#3.minor#2.minor1
+ Epsilon closure :
+ (self)
+
+NFA state 401 = main.component#9.majorminor#3.minor#2.minor1
+ [(epsilon)] -> component#9.majorminor#3.minor#2.#1
+ 5:[+.] -> component#9.majorminor#3.minor#2.minor1
+ 12:[\\] -> component#9.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#9.majorminor#3.minor#2.minor1
+ 6:[\055] -> component#9.majorminor#3.minor#2.minor1
+ 11:[A-Z_a-z] -> component#9.majorminor#3.minor#2.minor1
+ 8:[0-9] -> component#9.majorminor#3.minor#2.minor1
+ [(epsilon)] -> component#9.majorminor#3.minor#2.out
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.majorminor#3.minor#2.#1
+ main.component#9.majorminor#3.minor#2.out
+ main.component#9.majorminor#3.out
+ main.component#9.majorminor#3.#1
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 402 = main.component#9.majorminor#3.minor#2.#1
+ Tags : COPY_TO_MINOR
+ Epsilon closure :
+ (self)
+
+NFA state 403 = main.component#9.majorminor#3.minor#2.out
+ [(epsilon)] -> component#9.majorminor#3.out
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.majorminor#3.out
+ main.component#9.majorminor#3.#1
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 404 = main.component#9.majorminor#3.out
+ [(epsilon)] -> component#9.majorminor#3.#1
+ [(epsilon)] -> component#9.out
+ Epsilon closure :
+ (self)
+ main.#10
+ main.component#9.majorminor#3.#1
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 405 = main.component#9.majorminor#3.#1
+ Tags : GOT_MAJORMINOR
+ Epsilon closure :
+ (self)
+
+NFA state 406 = main.component#9.out
+ [(epsilon)] -> #10
+ Epsilon closure :
+ (self)
+ main.#10
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+NFA state 407 = main.#11
+ 9:[;] -> in2
+ Epsilon closure :
+ (self)
+
+NFA state 408 = main.optwhite#10.in
+ [(epsilon)] -> optwhite#10.out
+ 0:[\t ] -> optwhite#10.in
+ 1:[\r] -> optwhite#10.in
+ Epsilon closure :
+ (self)
+ main.#11
+ main.optwhite#10.out
+
+NFA state 409 = main.optwhite#10.out
+ [(epsilon)] -> #11
+ Epsilon closure :
+ (self)
+ main.#11
+
+NFA state 410 = main.in2
+ [(epsilon)] -> in
+ [(epsilon)] -> #12
+ Epsilon closure :
+ (self)
+ main.in
+ main.#1
+ main.optwhite#1.in
+ main.optwhite#1.out
+ main.component#2.in
+ main.component#2.namevalue#1.in
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.in
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.in
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+ main.component#2.name#2.in
+ main.component#2.majorminor#3.in
+ main.component#2.majorminor#3.major#1.in
+ main.#4
+ main.optwhite#4.in
+ main.optwhite#4.out
+ main.component#5.in
+ main.component#5.namevalue#1.in
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.in
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.in
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+ main.component#5.name#2.in
+ main.component#5.majorminor#3.in
+ main.component#5.majorminor#3.major#1.in
+ main.#9
+ main.optwhite#8.in
+ main.optwhite#8.out
+ main.component#9.in
+ main.component#9.namevalue#1.in
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.in
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.in
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+ main.component#9.name#2.in
+ main.component#9.majorminor#3.in
+ main.component#9.majorminor#3.major#1.in
+ main.#12
+
+NFA state 411 = main.#12
+ Tags : GOT_TERMINATOR
+ Epsilon closure :
+ (self)
+
+NFA state 412 = main.out2
+ [(epsilon)] -> out
+ [(epsilon)] -> #13
+ Epsilon closure :
+ (self)
+ main.#13
+ main.out
+
+NFA state 413 = main.#13
+ Tags : GOT_TERMINATOR
+ Epsilon closure :
+ (self)
+
+NFA state 414 = main.out
+ Epsilon closure :
+ (self)
+
+--------------------------------
+DFA structure before compression
+--------------------------------
+DFA state 0
+ NFA states :
+ main.in
+ main.#1
+ main.optwhite#1.in
+ main.optwhite#1.out
+ main.component#2.in
+ main.component#2.namevalue#1.in
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.in
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.in
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+ main.component#2.name#2.in
+ main.component#2.majorminor#3.in
+ main.component#2.majorminor#3.major#1.in
+ main.#4
+ main.optwhite#4.in
+ main.optwhite#4.out
+ main.component#5.in
+ main.component#5.namevalue#1.in
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.in
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.in
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+ main.component#5.name#2.in
+ main.component#5.majorminor#3.in
+ main.component#5.majorminor#3.major#1.in
+ main.#9
+ main.optwhite#8.in
+ main.optwhite#8.out
+ main.component#9.in
+ main.component#9.namevalue#1.in
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.in
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.in
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+ main.component#9.name#2.in
+ main.component#9.majorminor#3.in
+ main.component#9.majorminor#3.major#1.in
+
+ Forward route :
+ (START)->(HERE)
+ Transitions :
+ 0:[\t ] -> 1
+ 1:[\r] -> 1
+ 6:[\055] -> 2
+ 8:[0-9] -> 2
+ 11:[A-Z_a-z] -> 2
+
+DFA state 1
+ NFA states :
+ main.#1
+ main.optwhite#1.in
+ main.optwhite#1.out
+ main.component#2.in
+ main.component#2.namevalue#1.in
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.in
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.in
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+ main.component#2.name#2.in
+ main.component#2.majorminor#3.in
+ main.component#2.majorminor#3.major#1.in
+ main.#4
+ main.optwhite#4.in
+ main.optwhite#4.out
+ main.component#5.in
+ main.component#5.namevalue#1.in
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.in
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.in
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+ main.component#5.name#2.in
+ main.component#5.majorminor#3.in
+ main.component#5.majorminor#3.major#1.in
+ main.#9
+ main.optwhite#8.in
+ main.optwhite#8.out
+ main.component#9.in
+ main.component#9.namevalue#1.in
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.in
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.in
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+ main.component#9.name#2.in
+ main.component#9.majorminor#3.in
+ main.component#9.majorminor#3.major#1.in
+
+ Forward route : (from state 0)
+ (START)->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 1
+ 1:[\r] -> 1
+ 6:[\055] -> 2
+ 8:[0-9] -> 2
+ 11:[A-Z_a-z] -> 2
+
+DFA state 2
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.#2
+ main.component#2.namevalue#1.name#2.name1
+ main.component#2.namevalue#1.name#2.#1
+ main.component#2.namevalue#1.name#2.#2
+ main.component#2.namevalue#1.name#2.out
+ main.component#2.namevalue#1.#3
+ main.component#2.namevalue#1.optwhite#3.in
+ main.component#2.namevalue#1.optwhite#3.out
+ main.component#2.namevalue#1.#5
+ main.component#2.namevalue#1.name#5.name1
+ main.component#2.namevalue#1.name#5.#1
+ main.component#2.namevalue#1.name#5.#2
+ main.component#2.namevalue#1.name#5.out
+ main.component#2.name#2.name1
+ main.component#2.name#2.#1
+ main.component#2.name#2.#2
+ main.component#2.name#2.out
+ main.component#2.majorminor#3.major#1.name1
+ main.component#2.majorminor#3.major#1.out
+ main.component#2.majorminor#3.foo
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.#2
+ main.component#5.namevalue#1.name#2.name1
+ main.component#5.namevalue#1.name#2.#1
+ main.component#5.namevalue#1.name#2.#2
+ main.component#5.namevalue#1.name#2.out
+ main.component#5.namevalue#1.#3
+ main.component#5.namevalue#1.optwhite#3.in
+ main.component#5.namevalue#1.optwhite#3.out
+ main.component#5.namevalue#1.#5
+ main.component#5.namevalue#1.name#5.name1
+ main.component#5.namevalue#1.name#5.#1
+ main.component#5.namevalue#1.name#5.#2
+ main.component#5.namevalue#1.name#5.out
+ main.component#5.name#2.name1
+ main.component#5.name#2.#1
+ main.component#5.name#2.#2
+ main.component#5.name#2.out
+ main.component#5.majorminor#3.major#1.name1
+ main.component#5.majorminor#3.major#1.out
+ main.component#5.majorminor#3.foo
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.#2
+ main.component#9.namevalue#1.name#2.name1
+ main.component#9.namevalue#1.name#2.#1
+ main.component#9.namevalue#1.name#2.#2
+ main.component#9.namevalue#1.name#2.out
+ main.component#9.namevalue#1.#3
+ main.component#9.namevalue#1.optwhite#3.in
+ main.component#9.namevalue#1.optwhite#3.out
+ main.component#9.namevalue#1.#5
+ main.component#9.namevalue#1.name#5.name1
+ main.component#9.namevalue#1.name#5.#1
+ main.component#9.namevalue#1.name#5.#2
+ main.component#9.namevalue#1.name#5.out
+ main.component#9.name#2.name1
+ main.component#9.name#2.#1
+ main.component#9.name#2.#2
+ main.component#9.name#2.out
+ main.component#9.majorminor#3.major#1.name1
+ main.component#9.majorminor#3.major#1.out
+ main.component#9.majorminor#3.foo
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 0)
+ (START)->6:[\055]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 4
+ 1:[\r] -> 5
+ 4:[*] -> 6
+ 6:[\055] -> 2
+ 7:[/] -> 7
+ 8:[0-9] -> 2
+ 9:[;] -> 8
+ 10:[=] -> 9
+ 11:[A-Z_a-z] -> 2
+ NFA exit tags applying :
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ Attributes for <copier> : COPY_TO_NAME
+ Attributes for <action> : GOT_NAME
+
+DFA state 3
+ NFA states :
+ main.out2
+ main.#13
+ main.out
+
+ Forward route : (from state 2)
+ (START)->6:[\055]->EOS->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ GOT_TERMINATOR
+ Attributes for <action> : GOT_TERMINATOR
+
+DFA state 4
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.#2
+ main.component#2.namevalue#1.name#2.name2
+ main.component#2.namevalue#1.name#2.#3
+ main.component#2.namevalue#1.name#2.#4
+ main.component#2.namevalue#1.name#2.out
+ main.component#2.namevalue#1.#3
+ main.component#2.namevalue#1.optwhite#3.in
+ main.component#2.namevalue#1.optwhite#3.out
+ main.component#2.namevalue#1.#5
+ main.component#2.namevalue#1.name#5.name2
+ main.component#2.namevalue#1.name#5.#3
+ main.component#2.namevalue#1.name#5.#4
+ main.component#2.namevalue#1.name#5.out
+ main.component#2.name#2.name2
+ main.component#2.name#2.#3
+ main.component#2.name#2.#4
+ main.component#2.name#2.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.#2
+ main.component#5.namevalue#1.name#2.name2
+ main.component#5.namevalue#1.name#2.#3
+ main.component#5.namevalue#1.name#2.#4
+ main.component#5.namevalue#1.name#2.out
+ main.component#5.namevalue#1.#3
+ main.component#5.namevalue#1.optwhite#3.in
+ main.component#5.namevalue#1.optwhite#3.out
+ main.component#5.namevalue#1.#5
+ main.component#5.namevalue#1.name#5.name2
+ main.component#5.namevalue#1.name#5.#3
+ main.component#5.namevalue#1.name#5.#4
+ main.component#5.namevalue#1.name#5.out
+ main.component#5.name#2.name2
+ main.component#5.name#2.#3
+ main.component#5.name#2.#4
+ main.component#5.name#2.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.#2
+ main.component#9.namevalue#1.name#2.name2
+ main.component#9.namevalue#1.name#2.#3
+ main.component#9.namevalue#1.name#2.#4
+ main.component#9.namevalue#1.name#2.out
+ main.component#9.namevalue#1.#3
+ main.component#9.namevalue#1.optwhite#3.in
+ main.component#9.namevalue#1.optwhite#3.out
+ main.component#9.namevalue#1.#5
+ main.component#9.namevalue#1.name#5.name2
+ main.component#9.namevalue#1.name#5.#3
+ main.component#9.namevalue#1.name#5.#4
+ main.component#9.namevalue#1.name#5.out
+ main.component#9.name#2.name2
+ main.component#9.name#2.#3
+ main.component#9.name#2.#4
+ main.component#9.name#2.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 2)
+ (START)->6:[\055]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 4
+ 1:[\r] -> 5
+ 4:[*] -> 6
+ 6:[\055] -> 10
+ 8:[0-9] -> 10
+ 9:[;] -> 8
+ 10:[=] -> 9
+ 11:[A-Z_a-z] -> 10
+ NFA exit tags applying :
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ Attributes for <copier> : COPY_TO_NAME
+ Attributes for <action> : GOT_NAME_TRAILING_SPACE
+
+DFA state 5
+ NFA states :
+ main.component#2.namevalue#1.#3
+ main.component#2.namevalue#1.optwhite#3.in
+ main.component#2.namevalue#1.optwhite#3.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.component#5.namevalue#1.#3
+ main.component#5.namevalue#1.optwhite#3.in
+ main.component#5.namevalue#1.optwhite#3.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.component#9.namevalue#1.#3
+ main.component#9.namevalue#1.optwhite#3.in
+ main.component#9.namevalue#1.optwhite#3.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 2)
+ (START)->6:[\055]->1:[\r]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 5
+ 1:[\r] -> 5
+ 9:[;] -> 8
+ 10:[=] -> 9
+
+DFA state 6
+ NFA states :
+ main.component#2.namevalue#1.#6
+ main.component#2.namevalue#1.digits#6.in
+ main.component#5.namevalue#1.#6
+ main.component#5.namevalue#1.digits#6.in
+ main.component#9.namevalue#1.#6
+ main.component#9.namevalue#1.digits#6.in
+
+ Forward route : (from state 2)
+ (START)->6:[\055]->4:[*]->(HERE)
+ Transitions :
+ 8:[0-9] -> 11
+
+DFA state 7
+ NFA states :
+ main.component#2.majorminor#3.bar
+ main.component#2.majorminor#3.minor#2.in
+ main.component#5.majorminor#3.bar
+ main.component#5.majorminor#3.minor#2.in
+ main.component#9.majorminor#3.bar
+ main.component#9.majorminor#3.minor#2.in
+
+ Forward route : (from state 2)
+ (START)->6:[\055]->7:[/]->(HERE)
+ Transitions :
+ 5:[+.] -> 12
+ 6:[\055] -> 12
+ 8:[0-9] -> 12
+ 11:[A-Z_a-z] -> 12
+ 12:[\\] -> 12
+
+DFA state 8
+ NFA states :
+ main.in
+ main.#1
+ main.optwhite#1.in
+ main.optwhite#1.out
+ main.component#2.in
+ main.component#2.namevalue#1.in
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.in
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.in
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+ main.component#2.name#2.in
+ main.component#2.majorminor#3.in
+ main.component#2.majorminor#3.major#1.in
+ main.#4
+ main.optwhite#4.in
+ main.optwhite#4.out
+ main.component#5.in
+ main.component#5.namevalue#1.in
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.in
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.in
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+ main.component#5.name#2.in
+ main.component#5.majorminor#3.in
+ main.component#5.majorminor#3.major#1.in
+ main.#7
+ main.#8
+ main.optwhite#7.in
+ main.optwhite#7.out
+ main.#9
+ main.optwhite#8.in
+ main.optwhite#8.out
+ main.component#9.in
+ main.component#9.namevalue#1.in
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.in
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.in
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+ main.component#9.name#2.in
+ main.component#9.majorminor#3.in
+ main.component#9.majorminor#3.major#1.in
+ main.in2
+ main.#12
+
+ Forward route : (from state 2)
+ (START)->6:[\055]->9:[;]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 13
+ 1:[\r] -> 13
+ 6:[\055] -> 2
+ 8:[0-9] -> 2
+ 11:[A-Z_a-z] -> 2
+ NFA exit tags applying :
+ GOT_TERMINATOR
+ Attributes for <action> : GOT_TERMINATOR
+
+DFA state 9
+ NFA states :
+ main.component#2.namevalue#1.rhs_normal
+ main.component#2.namevalue#1.#9
+ main.component#2.namevalue#1.optwhite#8.in
+ main.component#2.namevalue#1.optwhite#8.out
+ main.component#2.namevalue#1.qvalue#9.in
+ main.component#2.namevalue#1.#11
+ main.component#2.namevalue#1.optwhite#11.in
+ main.component#2.namevalue#1.optwhite#11.out
+ main.component#2.namevalue#1.value#12.in
+ main.component#2.namevalue#1.#13
+ main.component#2.namevalue#1.optwhite#14.in
+ main.component#2.namevalue#1.optwhite#14.out
+ main.component#2.namevalue#1.#14
+ main.component#5.namevalue#1.rhs_normal
+ main.component#5.namevalue#1.#9
+ main.component#5.namevalue#1.optwhite#8.in
+ main.component#5.namevalue#1.optwhite#8.out
+ main.component#5.namevalue#1.qvalue#9.in
+ main.component#5.namevalue#1.#11
+ main.component#5.namevalue#1.optwhite#11.in
+ main.component#5.namevalue#1.optwhite#11.out
+ main.component#5.namevalue#1.value#12.in
+ main.component#5.namevalue#1.#13
+ main.component#5.namevalue#1.optwhite#14.in
+ main.component#5.namevalue#1.optwhite#14.out
+ main.component#5.namevalue#1.#14
+ main.component#9.namevalue#1.rhs_normal
+ main.component#9.namevalue#1.#9
+ main.component#9.namevalue#1.optwhite#8.in
+ main.component#9.namevalue#1.optwhite#8.out
+ main.component#9.namevalue#1.qvalue#9.in
+ main.component#9.namevalue#1.#11
+ main.component#9.namevalue#1.optwhite#11.in
+ main.component#9.namevalue#1.optwhite#11.out
+ main.component#9.namevalue#1.value#12.in
+ main.component#9.namevalue#1.#13
+ main.component#9.namevalue#1.optwhite#14.in
+ main.component#9.namevalue#1.optwhite#14.out
+ main.component#9.namevalue#1.#14
+
+ Forward route : (from state 2)
+ (START)->6:[\055]->10:[=]->(HERE)
+ Transitions :
+ EOS -> 14
+ 0:[\t ] -> 15
+ 1:[\r] -> 15
+ 2:[!#-),:<>-@[]^`{-~] -> 16
+ 3:["] -> 17
+ 4:[*] -> 16
+ 5:[+.] -> 16
+ 6:[\055] -> 16
+ 7:[/] -> 16
+ 8:[0-9] -> 16
+ 10:[=] -> 16
+ 11:[A-Z_a-z] -> 16
+
+DFA state 10
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.#2
+ main.component#2.namevalue#1.name#2.name1
+ main.component#2.namevalue#1.name#2.#1
+ main.component#2.namevalue#1.name#2.#2
+ main.component#2.namevalue#1.name#2.out
+ main.component#2.namevalue#1.#3
+ main.component#2.namevalue#1.optwhite#3.in
+ main.component#2.namevalue#1.optwhite#3.out
+ main.component#2.namevalue#1.#5
+ main.component#2.namevalue#1.name#5.name1
+ main.component#2.namevalue#1.name#5.#1
+ main.component#2.namevalue#1.name#5.#2
+ main.component#2.namevalue#1.name#5.out
+ main.component#2.name#2.name1
+ main.component#2.name#2.#1
+ main.component#2.name#2.#2
+ main.component#2.name#2.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.#2
+ main.component#5.namevalue#1.name#2.name1
+ main.component#5.namevalue#1.name#2.#1
+ main.component#5.namevalue#1.name#2.#2
+ main.component#5.namevalue#1.name#2.out
+ main.component#5.namevalue#1.#3
+ main.component#5.namevalue#1.optwhite#3.in
+ main.component#5.namevalue#1.optwhite#3.out
+ main.component#5.namevalue#1.#5
+ main.component#5.namevalue#1.name#5.name1
+ main.component#5.namevalue#1.name#5.#1
+ main.component#5.namevalue#1.name#5.#2
+ main.component#5.namevalue#1.name#5.out
+ main.component#5.name#2.name1
+ main.component#5.name#2.#1
+ main.component#5.name#2.#2
+ main.component#5.name#2.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.#2
+ main.component#9.namevalue#1.name#2.name1
+ main.component#9.namevalue#1.name#2.#1
+ main.component#9.namevalue#1.name#2.#2
+ main.component#9.namevalue#1.name#2.out
+ main.component#9.namevalue#1.#3
+ main.component#9.namevalue#1.optwhite#3.in
+ main.component#9.namevalue#1.optwhite#3.out
+ main.component#9.namevalue#1.#5
+ main.component#9.namevalue#1.name#5.name1
+ main.component#9.namevalue#1.name#5.#1
+ main.component#9.namevalue#1.name#5.#2
+ main.component#9.namevalue#1.name#5.out
+ main.component#9.name#2.name1
+ main.component#9.name#2.#1
+ main.component#9.name#2.#2
+ main.component#9.name#2.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 4)
+ (START)->6:[\055]->0:[\t ]->6:[\055]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 4
+ 1:[\r] -> 5
+ 4:[*] -> 6
+ 6:[\055] -> 10
+ 8:[0-9] -> 10
+ 9:[;] -> 8
+ 10:[=] -> 9
+ 11:[A-Z_a-z] -> 10
+ NFA exit tags applying :
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ Attributes for <copier> : COPY_TO_NAME
+ Attributes for <action> : GOT_NAME
+
+DFA state 11
+ NFA states :
+ main.component#2.namevalue#1.#7
+ main.component#2.namevalue#1.digits#6.in
+ main.component#2.namevalue#1.digits#6.out
+ main.component#2.namevalue#1.#8
+ main.component#2.namevalue#1.optwhite#7.in
+ main.component#2.namevalue#1.optwhite#7.out
+ main.component#5.namevalue#1.#7
+ main.component#5.namevalue#1.digits#6.in
+ main.component#5.namevalue#1.digits#6.out
+ main.component#5.namevalue#1.#8
+ main.component#5.namevalue#1.optwhite#7.in
+ main.component#5.namevalue#1.optwhite#7.out
+ main.component#9.namevalue#1.#7
+ main.component#9.namevalue#1.digits#6.in
+ main.component#9.namevalue#1.digits#6.out
+ main.component#9.namevalue#1.#8
+ main.component#9.namevalue#1.optwhite#7.in
+ main.component#9.namevalue#1.optwhite#7.out
+
+ Forward route : (from state 6)
+ (START)->6:[\055]->4:[*]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 18
+ 1:[\r] -> 18
+ 8:[0-9] -> 11
+ 10:[=] -> 19
+
+DFA state 12
+ NFA states :
+ main.#2
+ main.component#2.majorminor#3.minor#2.minor1
+ main.component#2.majorminor#3.minor#2.#1
+ main.component#2.majorminor#3.minor#2.out
+ main.component#2.majorminor#3.out
+ main.component#2.majorminor#3.#1
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.majorminor#3.minor#2.minor1
+ main.component#5.majorminor#3.minor#2.#1
+ main.component#5.majorminor#3.minor#2.out
+ main.component#5.majorminor#3.out
+ main.component#5.majorminor#3.#1
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.majorminor#3.minor#2.minor1
+ main.component#9.majorminor#3.minor#2.#1
+ main.component#9.majorminor#3.minor#2.out
+ main.component#9.majorminor#3.out
+ main.component#9.majorminor#3.#1
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 7)
+ (START)->6:[\055]->7:[/]->5:[+.]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 20
+ 1:[\r] -> 20
+ 5:[+.] -> 12
+ 6:[\055] -> 12
+ 8:[0-9] -> 12
+ 9:[;] -> 8
+ 11:[A-Z_a-z] -> 12
+ 12:[\\] -> 12
+ NFA exit tags applying :
+ GOT_MAJORMINOR
+ COPY_TO_MINOR
+ GOT_MAJORMINOR
+ COPY_TO_MINOR
+ GOT_MAJORMINOR
+ COPY_TO_MINOR
+ Attributes for <copier> : COPY_TO_MINOR
+ Attributes for <action> : GOT_MAJORMINOR
+
+DFA state 13
+ NFA states :
+ main.#1
+ main.optwhite#1.in
+ main.optwhite#1.out
+ main.component#2.in
+ main.component#2.namevalue#1.in
+ main.component#2.namevalue#1.#1
+ main.component#2.namevalue#1.optwhite#1.in
+ main.component#2.namevalue#1.optwhite#1.out
+ main.component#2.namevalue#1.name#2.in
+ main.component#2.namevalue#1.#4
+ main.component#2.namevalue#1.optwhite#4.in
+ main.component#2.namevalue#1.optwhite#4.out
+ main.component#2.namevalue#1.name#5.in
+ main.component#2.name#2.in
+ main.component#2.majorminor#3.in
+ main.component#2.majorminor#3.major#1.in
+ main.#4
+ main.optwhite#4.in
+ main.optwhite#4.out
+ main.component#5.in
+ main.component#5.namevalue#1.in
+ main.component#5.namevalue#1.#1
+ main.component#5.namevalue#1.optwhite#1.in
+ main.component#5.namevalue#1.optwhite#1.out
+ main.component#5.namevalue#1.name#2.in
+ main.component#5.namevalue#1.#4
+ main.component#5.namevalue#1.optwhite#4.in
+ main.component#5.namevalue#1.optwhite#4.out
+ main.component#5.namevalue#1.name#5.in
+ main.component#5.name#2.in
+ main.component#5.majorminor#3.in
+ main.component#5.majorminor#3.major#1.in
+ main.#8
+ main.optwhite#7.in
+ main.optwhite#7.out
+ main.#9
+ main.optwhite#8.in
+ main.optwhite#8.out
+ main.component#9.in
+ main.component#9.namevalue#1.in
+ main.component#9.namevalue#1.#1
+ main.component#9.namevalue#1.optwhite#1.in
+ main.component#9.namevalue#1.optwhite#1.out
+ main.component#9.namevalue#1.name#2.in
+ main.component#9.namevalue#1.#4
+ main.component#9.namevalue#1.optwhite#4.in
+ main.component#9.namevalue#1.optwhite#4.out
+ main.component#9.namevalue#1.name#5.in
+ main.component#9.name#2.in
+ main.component#9.majorminor#3.in
+ main.component#9.majorminor#3.major#1.in
+
+ Forward route : (from state 8)
+ (START)->6:[\055]->9:[;]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 13
+ 1:[\r] -> 13
+ 6:[\055] -> 2
+ 8:[0-9] -> 2
+ 11:[A-Z_a-z] -> 2
+
+DFA state 14
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 9)
+ (START)->6:[\055]->10:[=]->EOS->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 20
+ 1:[\r] -> 20
+ 9:[;] -> 8
+ NFA exit tags applying :
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ Attributes for <action> : GOT_NAMEVALUE
+
+DFA state 15
+ NFA states :
+ main.component#2.namevalue#1.#9
+ main.component#2.namevalue#1.optwhite#8.in
+ main.component#2.namevalue#1.optwhite#8.out
+ main.component#2.namevalue#1.qvalue#9.in
+ main.component#2.namevalue#1.#11
+ main.component#2.namevalue#1.optwhite#11.in
+ main.component#2.namevalue#1.optwhite#11.out
+ main.component#2.namevalue#1.value#12.in
+ main.component#2.namevalue#1.#13
+ main.component#2.namevalue#1.optwhite#14.in
+ main.component#2.namevalue#1.optwhite#14.out
+ main.component#2.namevalue#1.#14
+ main.component#5.namevalue#1.#9
+ main.component#5.namevalue#1.optwhite#8.in
+ main.component#5.namevalue#1.optwhite#8.out
+ main.component#5.namevalue#1.qvalue#9.in
+ main.component#5.namevalue#1.#11
+ main.component#5.namevalue#1.optwhite#11.in
+ main.component#5.namevalue#1.optwhite#11.out
+ main.component#5.namevalue#1.value#12.in
+ main.component#5.namevalue#1.#13
+ main.component#5.namevalue#1.optwhite#14.in
+ main.component#5.namevalue#1.optwhite#14.out
+ main.component#5.namevalue#1.#14
+ main.component#9.namevalue#1.#9
+ main.component#9.namevalue#1.optwhite#8.in
+ main.component#9.namevalue#1.optwhite#8.out
+ main.component#9.namevalue#1.qvalue#9.in
+ main.component#9.namevalue#1.#11
+ main.component#9.namevalue#1.optwhite#11.in
+ main.component#9.namevalue#1.optwhite#11.out
+ main.component#9.namevalue#1.value#12.in
+ main.component#9.namevalue#1.#13
+ main.component#9.namevalue#1.optwhite#14.in
+ main.component#9.namevalue#1.optwhite#14.out
+ main.component#9.namevalue#1.#14
+
+ Forward route : (from state 9)
+ (START)->6:[\055]->10:[=]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 14
+ 0:[\t ] -> 15
+ 1:[\r] -> 15
+ 2:[!#-),:<>-@[]^`{-~] -> 16
+ 3:["] -> 17
+ 4:[*] -> 16
+ 5:[+.] -> 16
+ 6:[\055] -> 16
+ 7:[/] -> 16
+ 8:[0-9] -> 16
+ 10:[=] -> 16
+ 11:[A-Z_a-z] -> 16
+
+DFA state 16
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.#12
+ main.component#2.namevalue#1.value#12.v1
+ main.component#2.namevalue#1.value#12.#1
+ main.component#2.namevalue#1.value#12.out
+ main.component#2.namevalue#1.optwhite#13.in
+ main.component#2.namevalue#1.optwhite#13.out
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.#12
+ main.component#5.namevalue#1.value#12.v1
+ main.component#5.namevalue#1.value#12.#1
+ main.component#5.namevalue#1.value#12.out
+ main.component#5.namevalue#1.optwhite#13.in
+ main.component#5.namevalue#1.optwhite#13.out
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.#12
+ main.component#9.namevalue#1.value#12.v1
+ main.component#9.namevalue#1.value#12.#1
+ main.component#9.namevalue#1.value#12.out
+ main.component#9.namevalue#1.optwhite#13.in
+ main.component#9.namevalue#1.optwhite#13.out
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 9)
+ (START)->6:[\055]->10:[=]->2:[!#-),:<>-@[]^`{-~]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 21
+ 1:[\r] -> 21
+ 2:[!#-),:<>-@[]^`{-~] -> 16
+ 4:[*] -> 16
+ 5:[+.] -> 16
+ 6:[\055] -> 16
+ 7:[/] -> 16
+ 8:[0-9] -> 16
+ 9:[;] -> 8
+ 10:[=] -> 16
+ 11:[A-Z_a-z] -> 16
+ NFA exit tags applying :
+ GOT_NAMEVALUE
+ COPY_TO_VALUE
+ GOT_NAMEVALUE
+ COPY_TO_VALUE
+ GOT_NAMEVALUE
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+ Attributes for <action> : GOT_NAMEVALUE
+
+DFA state 17
+ NFA states :
+ main.component#2.namevalue#1.qvalue#9.qv0
+ main.component#2.namevalue#1.qvalue#9.escape#1.in
+ main.component#5.namevalue#1.qvalue#9.qv0
+ main.component#5.namevalue#1.qvalue#9.escape#1.in
+ main.component#9.namevalue#1.qvalue#9.qv0
+ main.component#9.namevalue#1.qvalue#9.escape#1.in
+
+ Forward route : (from state 9)
+ (START)->6:[\055]->10:[=]->3:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 22
+ 2:[!#-),:<>-@[]^`{-~] -> 22
+ 4:[*] -> 22
+ 5:[+.] -> 22
+ 6:[\055] -> 22
+ 7:[/] -> 22
+ 8:[0-9] -> 22
+ 9:[;] -> 22
+ 10:[=] -> 22
+ 11:[A-Z_a-z] -> 22
+ 12:[\\] -> 23
+
+DFA state 18
+ NFA states :
+ main.component#2.namevalue#1.#8
+ main.component#2.namevalue#1.optwhite#7.in
+ main.component#2.namevalue#1.optwhite#7.out
+ main.component#5.namevalue#1.#8
+ main.component#5.namevalue#1.optwhite#7.in
+ main.component#5.namevalue#1.optwhite#7.out
+ main.component#9.namevalue#1.#8
+ main.component#9.namevalue#1.optwhite#7.in
+ main.component#9.namevalue#1.optwhite#7.out
+
+ Forward route : (from state 11)
+ (START)->6:[\055]->4:[*]->8:[0-9]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 18
+ 1:[\r] -> 18
+ 10:[=] -> 19
+
+DFA state 19
+ NFA states :
+ main.component#2.namevalue#1.rhs_continue
+ main.component#2.namevalue#1.#15
+ main.component#2.namevalue#1.optwhite#15.in
+ main.component#2.namevalue#1.optwhite#15.out
+ main.component#2.namevalue#1.qvalue#16.in
+ main.component#2.namevalue#1.#17
+ main.component#2.namevalue#1.optwhite#18.in
+ main.component#2.namevalue#1.optwhite#18.out
+ main.component#2.namevalue#1.value#19.in
+ main.component#5.namevalue#1.rhs_continue
+ main.component#5.namevalue#1.#15
+ main.component#5.namevalue#1.optwhite#15.in
+ main.component#5.namevalue#1.optwhite#15.out
+ main.component#5.namevalue#1.qvalue#16.in
+ main.component#5.namevalue#1.#17
+ main.component#5.namevalue#1.optwhite#18.in
+ main.component#5.namevalue#1.optwhite#18.out
+ main.component#5.namevalue#1.value#19.in
+ main.component#9.namevalue#1.rhs_continue
+ main.component#9.namevalue#1.#15
+ main.component#9.namevalue#1.optwhite#15.in
+ main.component#9.namevalue#1.optwhite#15.out
+ main.component#9.namevalue#1.qvalue#16.in
+ main.component#9.namevalue#1.#17
+ main.component#9.namevalue#1.optwhite#18.in
+ main.component#9.namevalue#1.optwhite#18.out
+ main.component#9.namevalue#1.value#19.in
+
+ Forward route : (from state 11)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->(HERE)
+ Transitions :
+ 0:[\t ] -> 24
+ 1:[\r] -> 24
+ 2:[!#-),:<>-@[]^`{-~] -> 25
+ 3:["] -> 26
+ 4:[*] -> 25
+ 5:[+.] -> 25
+ 6:[\055] -> 25
+ 7:[/] -> 25
+ 8:[0-9] -> 25
+ 10:[=] -> 25
+ 11:[A-Z_a-z] -> 25
+
+DFA state 20
+ NFA states :
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 12)
+ (START)->6:[\055]->7:[/]->5:[+.]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 20
+ 1:[\r] -> 20
+ 9:[;] -> 8
+
+DFA state 21
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.optwhite#13.in
+ main.component#2.namevalue#1.optwhite#13.out
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.optwhite#13.in
+ main.component#5.namevalue#1.optwhite#13.out
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.optwhite#13.in
+ main.component#9.namevalue#1.optwhite#13.out
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 16)
+ (START)->6:[\055]->10:[=]->2:[!#-),:<>-@[]^`{-~]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 21
+ 1:[\r] -> 21
+ 9:[;] -> 8
+ NFA exit tags applying :
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ Attributes for <action> : GOT_NAMEVALUE
+
+DFA state 22
+ NFA states :
+ main.component#2.namevalue#1.qvalue#9.qv1
+ main.component#2.namevalue#1.qvalue#9.#1
+ main.component#2.namevalue#1.qvalue#9.escape#2.in
+ main.component#2.namevalue#1.qvalue#9.qv2
+ main.component#5.namevalue#1.qvalue#9.qv1
+ main.component#5.namevalue#1.qvalue#9.#1
+ main.component#5.namevalue#1.qvalue#9.escape#2.in
+ main.component#5.namevalue#1.qvalue#9.qv2
+ main.component#9.namevalue#1.qvalue#9.qv1
+ main.component#9.namevalue#1.qvalue#9.#1
+ main.component#9.namevalue#1.qvalue#9.escape#2.in
+ main.component#9.namevalue#1.qvalue#9.qv2
+
+ Forward route : (from state 17)
+ (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 22
+ 2:[!#-),:<>-@[]^`{-~] -> 22
+ 3:["] -> 27
+ 4:[*] -> 22
+ 5:[+.] -> 22
+ 6:[\055] -> 22
+ 7:[/] -> 22
+ 8:[0-9] -> 22
+ 9:[;] -> 22
+ 10:[=] -> 22
+ 11:[A-Z_a-z] -> 22
+ 12:[\\] -> 28
+ NFA exit tags applying :
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+
+DFA state 23
+ NFA states :
+ main.component#2.namevalue#1.qvalue#9.escape#1.#1
+ main.component#2.namevalue#1.qvalue#9.escape#1.#2
+ main.component#5.namevalue#1.qvalue#9.escape#1.#1
+ main.component#5.namevalue#1.qvalue#9.escape#1.#2
+ main.component#9.namevalue#1.qvalue#9.escape#1.#1
+ main.component#9.namevalue#1.qvalue#9.escape#1.#2
+
+ Forward route : (from state 17)
+ (START)->6:[\055]->10:[=]->3:["]->12:[\\]->(HERE)
+ Transitions :
+ 3:["] -> 29
+ 12:[\\] -> 29
+
+DFA state 24
+ NFA states :
+ main.component#2.namevalue#1.#15
+ main.component#2.namevalue#1.optwhite#15.in
+ main.component#2.namevalue#1.optwhite#15.out
+ main.component#2.namevalue#1.qvalue#16.in
+ main.component#2.namevalue#1.#17
+ main.component#2.namevalue#1.optwhite#18.in
+ main.component#2.namevalue#1.optwhite#18.out
+ main.component#2.namevalue#1.value#19.in
+ main.component#5.namevalue#1.#15
+ main.component#5.namevalue#1.optwhite#15.in
+ main.component#5.namevalue#1.optwhite#15.out
+ main.component#5.namevalue#1.qvalue#16.in
+ main.component#5.namevalue#1.#17
+ main.component#5.namevalue#1.optwhite#18.in
+ main.component#5.namevalue#1.optwhite#18.out
+ main.component#5.namevalue#1.value#19.in
+ main.component#9.namevalue#1.#15
+ main.component#9.namevalue#1.optwhite#15.in
+ main.component#9.namevalue#1.optwhite#15.out
+ main.component#9.namevalue#1.qvalue#16.in
+ main.component#9.namevalue#1.#17
+ main.component#9.namevalue#1.optwhite#18.in
+ main.component#9.namevalue#1.optwhite#18.out
+ main.component#9.namevalue#1.value#19.in
+
+ Forward route : (from state 19)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 24
+ 1:[\r] -> 24
+ 2:[!#-),:<>-@[]^`{-~] -> 25
+ 3:["] -> 26
+ 4:[*] -> 25
+ 5:[+.] -> 25
+ 6:[\055] -> 25
+ 7:[/] -> 25
+ 8:[0-9] -> 25
+ 10:[=] -> 25
+ 11:[A-Z_a-z] -> 25
+
+DFA state 25
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.#18
+ main.component#2.namevalue#1.value#19.v1
+ main.component#2.namevalue#1.value#19.#1
+ main.component#2.namevalue#1.value#19.out
+ main.component#2.namevalue#1.optwhite#20.in
+ main.component#2.namevalue#1.optwhite#20.out
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.#18
+ main.component#5.namevalue#1.value#19.v1
+ main.component#5.namevalue#1.value#19.#1
+ main.component#5.namevalue#1.value#19.out
+ main.component#5.namevalue#1.optwhite#20.in
+ main.component#5.namevalue#1.optwhite#20.out
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.#18
+ main.component#9.namevalue#1.value#19.v1
+ main.component#9.namevalue#1.value#19.#1
+ main.component#9.namevalue#1.value#19.out
+ main.component#9.namevalue#1.optwhite#20.in
+ main.component#9.namevalue#1.optwhite#20.out
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 19)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->2:[!#-),:<>-@[]^`{-~]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 30
+ 1:[\r] -> 30
+ 2:[!#-),:<>-@[]^`{-~] -> 25
+ 4:[*] -> 25
+ 5:[+.] -> 25
+ 6:[\055] -> 25
+ 7:[/] -> 25
+ 8:[0-9] -> 25
+ 9:[;] -> 8
+ 10:[=] -> 25
+ 11:[A-Z_a-z] -> 25
+ NFA exit tags applying :
+ GOT_NAMEVALUE_CONT
+ COPY_TO_VALUE
+ GOT_NAMEVALUE_CONT
+ COPY_TO_VALUE
+ GOT_NAMEVALUE_CONT
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+ Attributes for <action> : GOT_NAMEVALUE_CONT
+
+DFA state 26
+ NFA states :
+ main.component#2.namevalue#1.qvalue#16.qv0
+ main.component#2.namevalue#1.qvalue#16.escape#1.in
+ main.component#5.namevalue#1.qvalue#16.qv0
+ main.component#5.namevalue#1.qvalue#16.escape#1.in
+ main.component#9.namevalue#1.qvalue#16.qv0
+ main.component#9.namevalue#1.qvalue#16.escape#1.in
+
+ Forward route : (from state 19)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 31
+ 2:[!#-),:<>-@[]^`{-~] -> 31
+ 4:[*] -> 31
+ 5:[+.] -> 31
+ 6:[\055] -> 31
+ 7:[/] -> 31
+ 8:[0-9] -> 31
+ 9:[;] -> 31
+ 10:[=] -> 31
+ 11:[A-Z_a-z] -> 31
+ 12:[\\] -> 32
+
+DFA state 27
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.#10
+ main.component#2.namevalue#1.qvalue#9.out
+ main.component#2.namevalue#1.optwhite#10.in
+ main.component#2.namevalue#1.optwhite#10.out
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.#10
+ main.component#5.namevalue#1.qvalue#9.out
+ main.component#5.namevalue#1.optwhite#10.in
+ main.component#5.namevalue#1.optwhite#10.out
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.#10
+ main.component#9.namevalue#1.qvalue#9.out
+ main.component#9.namevalue#1.optwhite#10.in
+ main.component#9.namevalue#1.optwhite#10.out
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 22)
+ (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->3:["]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 33
+ 1:[\r] -> 33
+ 9:[;] -> 8
+ NFA exit tags applying :
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ Attributes for <action> : GOT_NAMEVALUE
+
+DFA state 28
+ NFA states :
+ main.component#2.namevalue#1.qvalue#9.escape#2.#1
+ main.component#2.namevalue#1.qvalue#9.escape#2.#2
+ main.component#5.namevalue#1.qvalue#9.escape#2.#1
+ main.component#5.namevalue#1.qvalue#9.escape#2.#2
+ main.component#9.namevalue#1.qvalue#9.escape#2.#1
+ main.component#9.namevalue#1.qvalue#9.escape#2.#2
+
+ Forward route : (from state 22)
+ (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->12:[\\]->(HERE)
+ Transitions :
+ 3:["] -> 34
+ 12:[\\] -> 34
+
+DFA state 29
+ NFA states :
+ main.component#2.namevalue#1.qvalue#9.escape#1.out
+ main.component#2.namevalue#1.qvalue#9.qv1
+ main.component#2.namevalue#1.qvalue#9.#1
+ main.component#2.namevalue#1.qvalue#9.escape#2.in
+ main.component#2.namevalue#1.qvalue#9.qv2
+ main.component#5.namevalue#1.qvalue#9.escape#1.out
+ main.component#5.namevalue#1.qvalue#9.qv1
+ main.component#5.namevalue#1.qvalue#9.#1
+ main.component#5.namevalue#1.qvalue#9.escape#2.in
+ main.component#5.namevalue#1.qvalue#9.qv2
+ main.component#9.namevalue#1.qvalue#9.escape#1.out
+ main.component#9.namevalue#1.qvalue#9.qv1
+ main.component#9.namevalue#1.qvalue#9.#1
+ main.component#9.namevalue#1.qvalue#9.escape#2.in
+ main.component#9.namevalue#1.qvalue#9.qv2
+
+ Forward route : (from state 23)
+ (START)->6:[\055]->10:[=]->3:["]->12:[\\]->3:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 22
+ 2:[!#-),:<>-@[]^`{-~] -> 22
+ 3:["] -> 27
+ 4:[*] -> 22
+ 5:[+.] -> 22
+ 6:[\055] -> 22
+ 7:[/] -> 22
+ 8:[0-9] -> 22
+ 9:[;] -> 22
+ 10:[=] -> 22
+ 11:[A-Z_a-z] -> 22
+ 12:[\\] -> 28
+ NFA exit tags applying :
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+
+DFA state 30
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.optwhite#20.in
+ main.component#2.namevalue#1.optwhite#20.out
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.optwhite#20.in
+ main.component#5.namevalue#1.optwhite#20.out
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.optwhite#20.in
+ main.component#9.namevalue#1.optwhite#20.out
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 25)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->2:[!#-),:<>-@[]^`{-~]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 30
+ 1:[\r] -> 30
+ 9:[;] -> 8
+ NFA exit tags applying :
+ GOT_NAMEVALUE_CONT
+ GOT_NAMEVALUE_CONT
+ GOT_NAMEVALUE_CONT
+ Attributes for <action> : GOT_NAMEVALUE_CONT
+
+DFA state 31
+ NFA states :
+ main.component#2.namevalue#1.qvalue#16.qv1
+ main.component#2.namevalue#1.qvalue#16.#1
+ main.component#2.namevalue#1.qvalue#16.escape#2.in
+ main.component#2.namevalue#1.qvalue#16.qv2
+ main.component#5.namevalue#1.qvalue#16.qv1
+ main.component#5.namevalue#1.qvalue#16.#1
+ main.component#5.namevalue#1.qvalue#16.escape#2.in
+ main.component#5.namevalue#1.qvalue#16.qv2
+ main.component#9.namevalue#1.qvalue#16.qv1
+ main.component#9.namevalue#1.qvalue#16.#1
+ main.component#9.namevalue#1.qvalue#16.escape#2.in
+ main.component#9.namevalue#1.qvalue#16.qv2
+
+ Forward route : (from state 26)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 31
+ 2:[!#-),:<>-@[]^`{-~] -> 31
+ 3:["] -> 35
+ 4:[*] -> 31
+ 5:[+.] -> 31
+ 6:[\055] -> 31
+ 7:[/] -> 31
+ 8:[0-9] -> 31
+ 9:[;] -> 31
+ 10:[=] -> 31
+ 11:[A-Z_a-z] -> 31
+ 12:[\\] -> 36
+ NFA exit tags applying :
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+
+DFA state 32
+ NFA states :
+ main.component#2.namevalue#1.qvalue#16.escape#1.#1
+ main.component#2.namevalue#1.qvalue#16.escape#1.#2
+ main.component#5.namevalue#1.qvalue#16.escape#1.#1
+ main.component#5.namevalue#1.qvalue#16.escape#1.#2
+ main.component#9.namevalue#1.qvalue#16.escape#1.#1
+ main.component#9.namevalue#1.qvalue#16.escape#1.#2
+
+ Forward route : (from state 26)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->12:[\\]->(HERE)
+ Transitions :
+ 3:["] -> 37
+ 12:[\\] -> 37
+
+DFA state 33
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.optwhite#10.in
+ main.component#2.namevalue#1.optwhite#10.out
+ main.component#2.namevalue#1.out_normal
+ main.component#2.namevalue#1.#19
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.optwhite#10.in
+ main.component#5.namevalue#1.optwhite#10.out
+ main.component#5.namevalue#1.out_normal
+ main.component#5.namevalue#1.#19
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.optwhite#10.in
+ main.component#9.namevalue#1.optwhite#10.out
+ main.component#9.namevalue#1.out_normal
+ main.component#9.namevalue#1.#19
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 27)
+ (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->3:["]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 33
+ 1:[\r] -> 33
+ 9:[;] -> 8
+ NFA exit tags applying :
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ Attributes for <action> : GOT_NAMEVALUE
+
+DFA state 34
+ NFA states :
+ main.component#2.namevalue#1.qvalue#9.qv1
+ main.component#2.namevalue#1.qvalue#9.#1
+ main.component#2.namevalue#1.qvalue#9.escape#2.in
+ main.component#2.namevalue#1.qvalue#9.escape#2.out
+ main.component#2.namevalue#1.qvalue#9.qv2
+ main.component#5.namevalue#1.qvalue#9.qv1
+ main.component#5.namevalue#1.qvalue#9.#1
+ main.component#5.namevalue#1.qvalue#9.escape#2.in
+ main.component#5.namevalue#1.qvalue#9.escape#2.out
+ main.component#5.namevalue#1.qvalue#9.qv2
+ main.component#9.namevalue#1.qvalue#9.qv1
+ main.component#9.namevalue#1.qvalue#9.#1
+ main.component#9.namevalue#1.qvalue#9.escape#2.in
+ main.component#9.namevalue#1.qvalue#9.escape#2.out
+ main.component#9.namevalue#1.qvalue#9.qv2
+
+ Forward route : (from state 28)
+ (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->12:[\\]->3:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 22
+ 2:[!#-),:<>-@[]^`{-~] -> 22
+ 3:["] -> 27
+ 4:[*] -> 22
+ 5:[+.] -> 22
+ 6:[\055] -> 22
+ 7:[/] -> 22
+ 8:[0-9] -> 22
+ 9:[;] -> 22
+ 10:[=] -> 22
+ 11:[A-Z_a-z] -> 22
+ 12:[\\] -> 28
+ NFA exit tags applying :
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+
+DFA state 35
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.#16
+ main.component#2.namevalue#1.qvalue#16.out
+ main.component#2.namevalue#1.optwhite#17.in
+ main.component#2.namevalue#1.optwhite#17.out
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.#16
+ main.component#5.namevalue#1.qvalue#16.out
+ main.component#5.namevalue#1.optwhite#17.in
+ main.component#5.namevalue#1.optwhite#17.out
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.#16
+ main.component#9.namevalue#1.qvalue#16.out
+ main.component#9.namevalue#1.optwhite#17.in
+ main.component#9.namevalue#1.optwhite#17.out
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 31)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->3:["]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 38
+ 1:[\r] -> 38
+ 9:[;] -> 8
+ NFA exit tags applying :
+ GOT_NAMEVALUE_CONT
+ GOT_NAMEVALUE_CONT
+ GOT_NAMEVALUE_CONT
+ Attributes for <action> : GOT_NAMEVALUE_CONT
+
+DFA state 36
+ NFA states :
+ main.component#2.namevalue#1.qvalue#16.escape#2.#1
+ main.component#2.namevalue#1.qvalue#16.escape#2.#2
+ main.component#5.namevalue#1.qvalue#16.escape#2.#1
+ main.component#5.namevalue#1.qvalue#16.escape#2.#2
+ main.component#9.namevalue#1.qvalue#16.escape#2.#1
+ main.component#9.namevalue#1.qvalue#16.escape#2.#2
+
+ Forward route : (from state 31)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->12:[\\]->(HERE)
+ Transitions :
+ 3:["] -> 39
+ 12:[\\] -> 39
+
+DFA state 37
+ NFA states :
+ main.component#2.namevalue#1.qvalue#16.escape#1.out
+ main.component#2.namevalue#1.qvalue#16.qv1
+ main.component#2.namevalue#1.qvalue#16.#1
+ main.component#2.namevalue#1.qvalue#16.escape#2.in
+ main.component#2.namevalue#1.qvalue#16.qv2
+ main.component#5.namevalue#1.qvalue#16.escape#1.out
+ main.component#5.namevalue#1.qvalue#16.qv1
+ main.component#5.namevalue#1.qvalue#16.#1
+ main.component#5.namevalue#1.qvalue#16.escape#2.in
+ main.component#5.namevalue#1.qvalue#16.qv2
+ main.component#9.namevalue#1.qvalue#16.escape#1.out
+ main.component#9.namevalue#1.qvalue#16.qv1
+ main.component#9.namevalue#1.qvalue#16.#1
+ main.component#9.namevalue#1.qvalue#16.escape#2.in
+ main.component#9.namevalue#1.qvalue#16.qv2
+
+ Forward route : (from state 32)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->12:[\\]->3:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 31
+ 2:[!#-),:<>-@[]^`{-~] -> 31
+ 3:["] -> 35
+ 4:[*] -> 31
+ 5:[+.] -> 31
+ 6:[\055] -> 31
+ 7:[/] -> 31
+ 8:[0-9] -> 31
+ 9:[;] -> 31
+ 10:[=] -> 31
+ 11:[A-Z_a-z] -> 31
+ 12:[\\] -> 36
+ NFA exit tags applying :
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+
+DFA state 38
+ NFA states :
+ main.#2
+ main.component#2.namevalue#1.optwhite#17.in
+ main.component#2.namevalue#1.optwhite#17.out
+ main.component#2.namevalue#1.out_continue
+ main.component#2.namevalue#1.#20
+ main.component#2.namevalue#1.out
+ main.component#2.out
+ main.#3
+ main.optwhite#3.in
+ main.optwhite#3.out
+ main.#5
+ main.component#5.namevalue#1.optwhite#17.in
+ main.component#5.namevalue#1.optwhite#17.out
+ main.component#5.namevalue#1.out_continue
+ main.component#5.namevalue#1.#20
+ main.component#5.namevalue#1.out
+ main.component#5.out
+ main.#6
+ main.optwhite#6.in
+ main.optwhite#6.out
+ main.#10
+ main.component#9.namevalue#1.optwhite#17.in
+ main.component#9.namevalue#1.optwhite#17.out
+ main.component#9.namevalue#1.out_continue
+ main.component#9.namevalue#1.#20
+ main.component#9.namevalue#1.out
+ main.component#9.out
+ main.#11
+ main.optwhite#10.in
+ main.optwhite#10.out
+
+ Forward route : (from state 35)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->3:["]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 3
+ 0:[\t ] -> 38
+ 1:[\r] -> 38
+ 9:[;] -> 8
+ NFA exit tags applying :
+ GOT_NAMEVALUE_CONT
+ GOT_NAMEVALUE_CONT
+ GOT_NAMEVALUE_CONT
+ Attributes for <action> : GOT_NAMEVALUE_CONT
+
+DFA state 39
+ NFA states :
+ main.component#2.namevalue#1.qvalue#16.qv1
+ main.component#2.namevalue#1.qvalue#16.#1
+ main.component#2.namevalue#1.qvalue#16.escape#2.in
+ main.component#2.namevalue#1.qvalue#16.escape#2.out
+ main.component#2.namevalue#1.qvalue#16.qv2
+ main.component#5.namevalue#1.qvalue#16.qv1
+ main.component#5.namevalue#1.qvalue#16.#1
+ main.component#5.namevalue#1.qvalue#16.escape#2.in
+ main.component#5.namevalue#1.qvalue#16.escape#2.out
+ main.component#5.namevalue#1.qvalue#16.qv2
+ main.component#9.namevalue#1.qvalue#16.qv1
+ main.component#9.namevalue#1.qvalue#16.#1
+ main.component#9.namevalue#1.qvalue#16.escape#2.in
+ main.component#9.namevalue#1.qvalue#16.escape#2.out
+ main.component#9.namevalue#1.qvalue#16.qv2
+
+ Forward route : (from state 36)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->12:[\\]->3:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 31
+ 2:[!#-),:<>-@[]^`{-~] -> 31
+ 3:["] -> 35
+ 4:[*] -> 31
+ 5:[+.] -> 31
+ 6:[\055] -> 31
+ 7:[/] -> 31
+ 8:[0-9] -> 31
+ 9:[;] -> 31
+ 10:[=] -> 31
+ 11:[A-Z_a-z] -> 31
+ 12:[\\] -> 36
+ NFA exit tags applying :
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+
+
+Entry states in DFA:
+Entry <in> : 0
+Searching for dead states...
+(no dead states found)
+
+-----------------------------
+------ COMPRESSING DFA ------
+-----------------------------
+Old DFA state 0 becomes 0
+Old DFA state 1 becomes 0 (formerly 0)
+Old DFA state 2 becomes 1
+Old DFA state 3 becomes 2
+Old DFA state 4 becomes 3
+Old DFA state 5 becomes 4
+Old DFA state 6 becomes 5
+Old DFA state 7 becomes 6
+Old DFA state 8 becomes 7
+Old DFA state 9 becomes 8
+Old DFA state 10 becomes 9
+Old DFA state 11 becomes 10
+Old DFA state 12 becomes 11
+Old DFA state 13 becomes 12
+Old DFA state 14 becomes 13
+Old DFA state 15 becomes 8 (formerly 9)
+Old DFA state 16 becomes 14
+Old DFA state 17 becomes 15
+Old DFA state 18 becomes 16
+Old DFA state 19 becomes 17
+Old DFA state 20 becomes 18
+Old DFA state 21 becomes 19
+Old DFA state 22 becomes 20
+Old DFA state 23 becomes 21
+Old DFA state 24 becomes 17 (formerly 19)
+Old DFA state 25 becomes 22
+Old DFA state 26 becomes 23
+Old DFA state 27 becomes 19 (formerly 21)
+Old DFA state 28 becomes 21 (formerly 23)
+Old DFA state 29 becomes 20 (formerly 22)
+Old DFA state 30 becomes 24
+Old DFA state 31 becomes 25
+Old DFA state 32 becomes 26
+Old DFA state 33 becomes 19 (formerly 21)
+Old DFA state 34 becomes 20 (formerly 22)
+Old DFA state 35 becomes 24 (formerly 30)
+Old DFA state 36 becomes 26 (formerly 32)
+Old DFA state 37 becomes 25 (formerly 31)
+Old DFA state 38 becomes 24 (formerly 30)
+Old DFA state 39 becomes 25 (formerly 31)
+Entry <in>, formerly state 0, now state 0
+-------------------------------
+DFA structure after compression
+-------------------------------
+DFA state 0
+ Forward route :
+ (START)->(HERE)
+ Transitions :
+ 0:[\t ] -> 0
+ 1:[\r] -> 0
+ 6:[\055] -> 1
+ 8:[0-9] -> 1
+ 11:[A-Z_a-z] -> 1
+
+DFA state 1
+ Forward route : (from state 0)
+ (START)->6:[\055]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 3
+ 1:[\r] -> 4
+ 4:[*] -> 5
+ 6:[\055] -> 1
+ 7:[/] -> 6
+ 8:[0-9] -> 1
+ 9:[;] -> 7
+ 10:[=] -> 8
+ 11:[A-Z_a-z] -> 1
+ NFA exit tags applying :
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ Attributes for <copier> : COPY_TO_NAME
+ Attributes for <action> : GOT_NAME
+
+DFA state 2
+ Forward route : (from state 1)
+ (START)->6:[\055]->EOS->(HERE)
+ Transitions :
+ NFA exit tags applying :
+ GOT_TERMINATOR
+ Attributes for <action> : GOT_TERMINATOR
+
+DFA state 3
+ Forward route : (from state 1)
+ (START)->6:[\055]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 3
+ 1:[\r] -> 4
+ 4:[*] -> 5
+ 6:[\055] -> 9
+ 8:[0-9] -> 9
+ 9:[;] -> 7
+ 10:[=] -> 8
+ 11:[A-Z_a-z] -> 9
+ Use state 1 as basis (4 fixups)
+ NFA exit tags applying :
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ GOT_NAME_TRAILING_SPACE
+ COPY_TO_NAME
+ Attributes for <copier> : COPY_TO_NAME
+ Attributes for <action> : GOT_NAME_TRAILING_SPACE
+
+DFA state 4
+ Forward route : (from state 1)
+ (START)->6:[\055]->1:[\r]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 4
+ 1:[\r] -> 4
+ 9:[;] -> 7
+ 10:[=] -> 8
+
+DFA state 5
+ Forward route : (from state 1)
+ (START)->6:[\055]->4:[*]->(HERE)
+ Transitions :
+ 8:[0-9] -> 10
+
+DFA state 6
+ Forward route : (from state 1)
+ (START)->6:[\055]->7:[/]->(HERE)
+ Transitions :
+ 5:[+.] -> 11
+ 6:[\055] -> 11
+ 8:[0-9] -> 11
+ 11:[A-Z_a-z] -> 11
+ 12:[\\] -> 11
+
+DFA state 7
+ Forward route : (from state 1)
+ (START)->6:[\055]->9:[;]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 12
+ 1:[\r] -> 12
+ 6:[\055] -> 1
+ 8:[0-9] -> 1
+ 11:[A-Z_a-z] -> 1
+ NFA exit tags applying :
+ GOT_TERMINATOR
+ Attributes for <action> : GOT_TERMINATOR
+
+DFA state 8
+ Forward route : (from state 1)
+ (START)->6:[\055]->10:[=]->(HERE)
+ Transitions :
+ EOS -> 13
+ 0:[\t ] -> 8
+ 1:[\r] -> 8
+ 2:[!#-),:<>-@[]^`{-~] -> 14
+ 3:["] -> 15
+ 4:[*] -> 14
+ 5:[+.] -> 14
+ 6:[\055] -> 14
+ 7:[/] -> 14
+ 8:[0-9] -> 14
+ 10:[=] -> 14
+ 11:[A-Z_a-z] -> 14
+
+DFA state 9
+ Forward route : (from state 3)
+ (START)->6:[\055]->0:[\t ]->6:[\055]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 3
+ 1:[\r] -> 4
+ 4:[*] -> 5
+ 6:[\055] -> 9
+ 8:[0-9] -> 9
+ 9:[;] -> 7
+ 10:[=] -> 8
+ 11:[A-Z_a-z] -> 9
+ Use state 1 as basis (4 fixups)
+ NFA exit tags applying :
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ GOT_NAME
+ COPY_TO_NAME
+ Attributes for <copier> : COPY_TO_NAME
+ Attributes for <action> : GOT_NAME
+
+DFA state 10
+ Forward route : (from state 5)
+ (START)->6:[\055]->4:[*]->8:[0-9]->(HERE)
+ Transitions :
+ 0:[\t ] -> 16
+ 1:[\r] -> 16
+ 8:[0-9] -> 10
+ 10:[=] -> 17
+
+DFA state 11
+ Forward route : (from state 6)
+ (START)->6:[\055]->7:[/]->5:[+.]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 18
+ 1:[\r] -> 18
+ 5:[+.] -> 11
+ 6:[\055] -> 11
+ 8:[0-9] -> 11
+ 9:[;] -> 7
+ 11:[A-Z_a-z] -> 11
+ 12:[\\] -> 11
+ Use state 6 as basis (4 fixups)
+ NFA exit tags applying :
+ GOT_MAJORMINOR
+ COPY_TO_MINOR
+ GOT_MAJORMINOR
+ COPY_TO_MINOR
+ GOT_MAJORMINOR
+ COPY_TO_MINOR
+ Attributes for <copier> : COPY_TO_MINOR
+ Attributes for <action> : GOT_MAJORMINOR
+
+DFA state 12
+ Forward route : (from state 7)
+ (START)->6:[\055]->9:[;]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 12
+ 1:[\r] -> 12
+ 6:[\055] -> 1
+ 8:[0-9] -> 1
+ 11:[A-Z_a-z] -> 1
+ Use state 7 as basis (0 fixups)
+
+DFA state 13
+ Forward route : (from state 8)
+ (START)->6:[\055]->10:[=]->EOS->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 18
+ 1:[\r] -> 18
+ 9:[;] -> 7
+ NFA exit tags applying :
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ Attributes for <action> : GOT_NAMEVALUE
+
+DFA state 14
+ Forward route : (from state 8)
+ (START)->6:[\055]->10:[=]->2:[!#-),:<>-@[]^`{-~]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 19
+ 1:[\r] -> 19
+ 2:[!#-),:<>-@[]^`{-~] -> 14
+ 4:[*] -> 14
+ 5:[+.] -> 14
+ 6:[\055] -> 14
+ 7:[/] -> 14
+ 8:[0-9] -> 14
+ 9:[;] -> 7
+ 10:[=] -> 14
+ 11:[A-Z_a-z] -> 14
+ Use state 8 as basis (5 fixups)
+ NFA exit tags applying :
+ GOT_NAMEVALUE
+ COPY_TO_VALUE
+ GOT_NAMEVALUE
+ COPY_TO_VALUE
+ GOT_NAMEVALUE
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+ Attributes for <action> : GOT_NAMEVALUE
+
+DFA state 15
+ Forward route : (from state 8)
+ (START)->6:[\055]->10:[=]->3:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 20
+ 2:[!#-),:<>-@[]^`{-~] -> 20
+ 4:[*] -> 20
+ 5:[+.] -> 20
+ 6:[\055] -> 20
+ 7:[/] -> 20
+ 8:[0-9] -> 20
+ 9:[;] -> 20
+ 10:[=] -> 20
+ 11:[A-Z_a-z] -> 20
+ 12:[\\] -> 21
+
+DFA state 16
+ Forward route : (from state 10)
+ (START)->6:[\055]->4:[*]->8:[0-9]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 16
+ 1:[\r] -> 16
+ 10:[=] -> 17
+
+DFA state 17
+ Forward route : (from state 10)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->(HERE)
+ Transitions :
+ 0:[\t ] -> 17
+ 1:[\r] -> 17
+ 2:[!#-),:<>-@[]^`{-~] -> 22
+ 3:["] -> 23
+ 4:[*] -> 22
+ 5:[+.] -> 22
+ 6:[\055] -> 22
+ 7:[/] -> 22
+ 8:[0-9] -> 22
+ 10:[=] -> 22
+ 11:[A-Z_a-z] -> 22
+
+DFA state 18
+ Forward route : (from state 11)
+ (START)->6:[\055]->7:[/]->5:[+.]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 18
+ 1:[\r] -> 18
+ 9:[;] -> 7
+ Use state 13 as basis (0 fixups)
+
+DFA state 19
+ Forward route : (from state 14)
+ (START)->6:[\055]->10:[=]->2:[!#-),:<>-@[]^`{-~]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 19
+ 1:[\r] -> 19
+ 9:[;] -> 7
+ NFA exit tags applying :
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ GOT_NAMEVALUE
+ Attributes for <action> : GOT_NAMEVALUE
+
+DFA state 20
+ Forward route : (from state 15)
+ (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 20
+ 2:[!#-),:<>-@[]^`{-~] -> 20
+ 3:["] -> 19
+ 4:[*] -> 20
+ 5:[+.] -> 20
+ 6:[\055] -> 20
+ 7:[/] -> 20
+ 8:[0-9] -> 20
+ 9:[;] -> 20
+ 10:[=] -> 20
+ 11:[A-Z_a-z] -> 20
+ 12:[\\] -> 21
+ Use state 15 as basis (1 fixups)
+ NFA exit tags applying :
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+
+DFA state 21
+ Forward route : (from state 15)
+ (START)->6:[\055]->10:[=]->3:["]->12:[\\]->(HERE)
+ Transitions :
+ 3:["] -> 20
+ 12:[\\] -> 20
+
+DFA state 22
+ Forward route : (from state 17)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->2:[!#-),:<>-@[]^`{-~]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 24
+ 1:[\r] -> 24
+ 2:[!#-),:<>-@[]^`{-~] -> 22
+ 4:[*] -> 22
+ 5:[+.] -> 22
+ 6:[\055] -> 22
+ 7:[/] -> 22
+ 8:[0-9] -> 22
+ 9:[;] -> 7
+ 10:[=] -> 22
+ 11:[A-Z_a-z] -> 22
+ Use state 17 as basis (5 fixups)
+ NFA exit tags applying :
+ GOT_NAMEVALUE_CONT
+ COPY_TO_VALUE
+ GOT_NAMEVALUE_CONT
+ COPY_TO_VALUE
+ GOT_NAMEVALUE_CONT
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+ Attributes for <action> : GOT_NAMEVALUE_CONT
+
+DFA state 23
+ Forward route : (from state 17)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->(HERE)
+ Transitions :
+ 0:[\t ] -> 25
+ 2:[!#-),:<>-@[]^`{-~] -> 25
+ 4:[*] -> 25
+ 5:[+.] -> 25
+ 6:[\055] -> 25
+ 7:[/] -> 25
+ 8:[0-9] -> 25
+ 9:[;] -> 25
+ 10:[=] -> 25
+ 11:[A-Z_a-z] -> 25
+ 12:[\\] -> 26
+
+DFA state 24
+ Forward route : (from state 22)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->2:[!#-),:<>-@[]^`{-~]->0:[\t ]->(HERE)
+ Transitions :
+ EOS -> 2
+ 0:[\t ] -> 24
+ 1:[\r] -> 24
+ 9:[;] -> 7
+ NFA exit tags applying :
+ GOT_NAMEVALUE_CONT
+ GOT_NAMEVALUE_CONT
+ GOT_NAMEVALUE_CONT
+ Attributes for <action> : GOT_NAMEVALUE_CONT
+
+DFA state 25
+ Forward route : (from state 23)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->(HERE)
+ Transitions :
+ 0:[\t ] -> 25
+ 2:[!#-),:<>-@[]^`{-~] -> 25
+ 3:["] -> 24
+ 4:[*] -> 25
+ 5:[+.] -> 25
+ 6:[\055] -> 25
+ 7:[/] -> 25
+ 8:[0-9] -> 25
+ 9:[;] -> 25
+ 10:[=] -> 25
+ 11:[A-Z_a-z] -> 25
+ 12:[\\] -> 26
+ Use state 23 as basis (1 fixups)
+ NFA exit tags applying :
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ COPY_TO_VALUE
+ Attributes for <copier> : COPY_TO_VALUE
+
+DFA state 26
+ Forward route : (from state 23)
+ (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->12:[\\]->(HERE)
+ Transitions :
+ 3:["] -> 25
+ 12:[\\] -> 25
+
+
+Entry states in DFA:
+Entry <in> : 0
diff --git a/src/mairix/nvptypes.h b/src/mairix/nvptypes.h
@@ -0,0 +1,43 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2006,2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#ifndef NVPTYPES_H
+#define NVPTYPES_H
+
+enum nvp_action {
+ GOT_NAMEVALUE,
+ GOT_NAMEVALUE_CONT,
+ GOT_NAME,
+ GOT_NAME_TRAILING_SPACE,
+ GOT_MAJORMINOR,
+ GOT_TERMINATOR,
+ GOT_NOTHING
+};
+
+enum nvp_copier {
+ COPY_TO_NAME,
+ COPY_TO_MINOR,
+ COPY_TO_VALUE,
+ COPY_NOWHERE
+};
+
+#endif
diff --git a/src/mairix/old_docs/mairix.texi b/src/mairix/old_docs/mairix.texi
@@ -0,0 +1,885 @@
+\input texinfo
+@c {{{ Main header stuff
+@afourwide
+@paragraphindent 0
+@setfilename mairix.info
+@settitle User guide for the mairix program
+@c @setchapternewpage off
+
+@ifinfo
+@dircategory Utilities
+@direntry
+* mairix: (mairix). Indexing/searching utility for maildir folders
+@end direntry
+@end ifinfo
+
+@titlepage
+@sp 10
+@title The mairix program
+@subtitle This manual describes how to use
+@subtitle the mairix program for indexing and
+@subtitle searching email messages stored in maildir folders.
+@author Richard P. Curnow
+@page
+@vskip 0pt plus 1filll
+Copyright @copyright{} 2002,2003,2004,2005 Richard P. Curnow
+@end titlepage
+
+@contents
+@c }}}
+
+@ifnottex
+@node Top
+@top
+@menu
+* Introduction::
+* Installation:: Compiling and installing the software
+* Use:: Quickstart guide and examples of use
+@end menu
+@end ifnottex
+
+@node Introduction
+@chapter Introduction
+@menu
+* Background:: How mairix came to be written.
+@end menu
+
+@node Background
+@section Background
+The @emph{mairix} program arose from a need to index and search 100's or 1000's
+of email messages in an efficient way. It began life supporting just Maildir
+format folder, but now MH and mbox formats are also supported.
+
+I use the @emph{mutt} email client. @emph{mutt} has a feature called
+@emph{limit}, where the display of messages in the current folder can be
+filtered based on matching regular expressions in particular parts of the
+messages. I find this really useful. But there is a snag - it only works on
+the current folder. If you have messages spread across many folders, you're
+out of luck with limit. OK - so why not keep all messages in a single folder?
+The problem is that the performance drops badly. This is true regardless of
+folder format - mbox, maildir etc, though probably worse for some formats than
+others depending on the sizes of messages in the folders.
+
+So on the one hand, we want small folders to keep the performance high. But on
+the other hand, we want useful searching.
+
+I use the maildir format for my incoming folders. This scheme has one file per
+message. On my inboxes@footnote{of which I have many, because I (naturally)
+use @emph{procmail} to split my incoming mail}, I like this for 2 reasons :
+
+@itemize @bullet
+@item Fast deletion of messages I don't want to keep (spam, circulars, mailing
+list threads I'm not interested in etc). (Compare mbox, where the whole file
+would need to be rewritten.)
+@item No locking issues whatever. Maybe I'm over cautious, but I don't really
+trust all that locking stuff to protect a single mbox file in all cases, and a
+single file seems just too vulnerable to corruption.) Also, I sometimes read
+the mail over NFS mounted filesystems, where locking tends to be a real
+disaster area.
+@end itemize
+
+Since I'm using maildir for inboxes, I've traditionally used it for all my
+folders, for uniformity.
+
+So, I hear you ask, if you use a one-file-per-message format, why not just use
+find + egrep to search for messages? I saw the following problems with this:
+
+@itemize @bullet
+@item What if I want to find all messages to/cc me, from Homer Simpson, dated
+between 1 and 2 months ago, with the word "wubble" in the body? This would
+involve a pretty nasty set of regexps in a pipeline of separate egreps (and
+bear in mind, headers could be split over line boundaries...)
+@item What if the message body has quoted-printable (or worse, base64) transfer
+encoding? The egrep for "wubble" could come very unstuck.
+@item How would the matching messages be conveniently arranged into a new
+folder to allow browsing with mutt?
+@item What if I wanted to see all messages in the same threads as those
+matching the above condition?
+@item If I had 1000's of messages, this wasn't going to be quick, especially if
+I wanted to keep tuning the search condition.@footnote{This may be a non-issue
+for people with the lastest technology under their desk, but at the time I
+started writing mairix, I had a 1996 model 486 at home}.
+@end itemize
+
+So find + egrep was a non-starter. I looked around for other technology. I
+found @emph{grepmail}, but this only works for mbox format folders, and
+involved scanning each message every time (so lost on the speed issue).
+
+I decided that this was going to be my next project, and mairix was born. By
+the way, the name originally came from abbreviating @emph{MAildIR IndeX}, but
+this is now an anachronism since MH and mbox are supported too.
+
+@node Installation
+@chapter Installation
+
+There is not much to this. In the simplest case you can just do
+
+@example
+./configure
+make
+make install
+@end example
+
+You need to be root to run the final step unless you're installing under your
+own home directory somewhere.
+
+However, you might want to tune the options further. The @file{configure}
+script shares its common options with the usual autoconf-generated scripts,
+even though it's not autoconf-generated itself. For example, a fuller build could use
+
+@example
+CC=gcc CFLAGS="-O2 -Wall" ./configure \
+ --prefix=/opt/mairix \
+ --infodir=/usr/share/info
+make
+make install
+make docs
+make install_docs
+@end example
+
+The final step is to create a @file{~/.mairixrc} file. An example is included
+in the file @file{dotmairixrc.eg}. Just copy that to @file{~/.mairixrc} and edit
+it.
+
+@node Use
+@chapter Use
+
+@menu
+* use_intro:: Overview of use
+* capabilities:: Indexing strategy and search capabilities
+* mairixrc:: The @file{~/.mairixrc} file
+* mfolder_setup:: Setting up the match folder
+* command_line:: Command line options
+* date_syntax:: Syntax used for date searches
+@end menu
+
+@node use_intro
+@section Overview of use
+
+@emph{mairix} has two modes of use : index building and searching. The
+searching mode runs whenever the command line contains any expressions to
+search for. Otherwise, the indexing mode is run.
+
+To begin with, an indexing run must be performed before searching will work at
+all. Otherwise your search will be operating on an empty database and won't
+produce any output.
+
+The output of the search mode is usually placed in a @emph{match folder}. You
+can select the type of folder that is used. For Maildir, it is just a normal
+maildir directory (i.e. containing @file{new}, @file{tmp} and @file{cur})
+subdirectories. If you select MH it is a directory containing entries with
+numerical filenames, so you can open it as a normal MH folder in your mail
+program. If you select mbox, it is a single file in mbox format.
+
+You configure the path for the match folder in your @file{~/.mairixrc} file.
+When writing to a mfolder in maildir or MH format, mairix will populate it with
+symbolic links pointing to the paths of the real messages that were matched by
+the search expression.@footnote{Although symlinks use up more inodes than hard
+links, I decided they were more useful because it makes it possible to see the
+filenames of the original messages via @command{ls -l}.} If a message in a
+mbox folder matches, mairix will copy the message contents to a single file in
+the mfolder directory.
+
+If the mfolder is in mbox format, mairix will copy the message contents of each
+matching message into the mfolder file. (There is no way of exploiting
+symlinks to avoid the copying in this case.)
+
+If desired, mairix can produce just a list of files that match the search
+expression and omit the building of the match folder (the so-called 'raw'
+output mode). This mode of operation may be useful in communicating the
+results of the search to other programs.
+
+@node capabilities
+@section Indexing strategy and search capabilities
+
+@emph{mairix} works exclusively in terms of @emph{words}. The index that's
+built in non-search mode contains a table of which words occur in which
+messages. Hence, the search capability is based on finding messages that
+contain particular words. @emph{mairix} defines a word as any string of
+alphanumeric characters + underscore. Any whitespace, punctuation, hyphens etc
+are treated as word boundaries.
+
+@emph{mairix} has special handling for the @t{To:}, @t{Cc:} and @t{From:}
+headers. Besides the normal word scan, these headers are scanned a second
+time, where the characters @samp{@@}, @samp{-} and @samp{.} are also treated as
+word characters. This allows most (if not all) email addresses to appear in
+the database as single words. So if you have a mail from
+@t{wibble@@foobar.zzz}, it will match on both these searches
+
+@example
+mairix f:foobar
+mairix f:wibble@@foobar.zzz
+@end example
+
+It should be clear by now that the searching cannot be used to find messages
+matching general regular expressions. Personally, I don't find that much use
+anyway for locating old messages - I'm far more likely to remember particular
+keywords that were in the messages, or details of the recipients, or the
+approximate date.
+
+It's also worth pointing out that there is no 'locality' information stored, so
+you can't search for messages that have one words 'close' to some other word.
+For every message and every word, there is a simple yes/no condition stored -
+whether the message contains the word in a particular header or in the body.
+So far this has proved to be adequate. mairix has a similar feel to using an
+Internet search engine.
+
+There are three further searching criteria that are supported (besides word
+searching):
+
+@itemize @bullet
+@item Searching for messages whose @t{Date:} header is in a particular range
+@item Searching for messages whose size is in a particular range. (I see this
+being used mainly for finding 'huge' messages, as you're most likely to want to
+cull these to recover disc space.)
+@item Searching for messages with a particular substring in their paths. You
+can use this feature to limit the search to particular folders in your mail
+hierarchy, for example.
+@end itemize
+
+@node mairixrc
+@section The @file{~/.mairixrc} file
+
+@subsection Overview
+
+This file contains information about where you keep your mail folders, where
+you want the index file to be stored and where you want the match folder to
+be, into which the search mode places the symlinks.
+
+mairix searches for this file at @file{~/.mairixrc} unless you specify the
+@samp{-f} command line option.
+
+If a # character appears in the file, the rest of that line is ignored. This
+allows you to specify comments.
+
+There are 3 entries (@samp{base}, @samp{mfolder} and @samp{database}) that must
+appear in the file. Also at least one of @samp{maildir}, @samp{mh} and
+@samp{mbox} must appear. Optionally, the @samp{mformat} entry may
+appear. An example illustrates:
+
+@example
+base=/home/richard/mail
+maildir=new-mail:new-chrony
+maildir=recent...:ancient...
+mh=an_mh_folder
+mbox=archive1:archive2
+mfolder=mfolder
+mformat=maildir
+database=/home/richard/.mairix_database
+@end example
+
+@subsection mairixrc file keys
+The keys are as follows:
+
+@table @asis
+@item base
+This is the path to the common parent directory of all your maildir folders.
+@item maildir
+This is a colon-separated list of the Maildir folders (relative to @samp{base})
+that you want indexed. Any entry that ends @samp{...} is recursively scanned
+to find any Maildir folders underneath it.
+
+More than one line starting with @samp{maildir} can be included. In this case,
+mairix joins the lines together with colons as though a single list of folders had
+been given on a single very long line.
+
+Each colon-separated entry may be a wildcard. See the discussion under mbox (below) for the
+wildcard syntax. For example
+
+@example
+maildir=zzz/foo*...
+@end example
+
+will match maildir folders like these (relative to the folder_base)
+
+@example
+zzz/foobar/xyz
+zzz/fooquux
+zzz/foo
+zzz/fooabc/u/v/w
+@end example
+
+and
+
+@example
+maildir=zzz/foo[abc]*
+@end example
+
+will match maildir folders like these (relative to the folder_base)
+
+@example
+zzz/fooa
+zzz/fooaaaxyz
+zzz/foobcd
+zzz/fooccccccc
+@end example
+
+If a folder name contains a colon, you can write this by using the sequence
+@samp{\:} to escape the colon. Otherwise, the backslash character is treated
+normally. (If the folder name actually contains the sequence @samp{\:}, you're
+out of luck.)
+
+@item mh
+This is a colon-separated list of the MH folders (relative to @samp{base}) that
+you want indexed. Any entry that ends @samp{...} is recursively scanned to
+find any MH folders underneath it.
+
+More than one line starting with @samp{mh} can be included. In this case,
+mairix joins the lines together with colons as though a single list of folders had
+been given on a single very long line.
+
+Each colon-separated entry may be a wildcard, see the discussion under maildir
+(above) and mbox (below) for the syntax and semantics of specifying wildcards.
+
+@item mbox
+This is a colon-separated list of the mbox folders (relative to @samp{base}) that
+you want indexed.
+
+Each colon-separated item in the list can be suffixed by @samp{...}. If the
+item matches a regular file, that file is treated as a mbox folder and the
+@samp{...} suffix is ignored. If the item matches a directory, a recursive
+scan of everything inside that directory is made, and all regular files are
+initially considered as mbox folders. (Any directories found in this scan are
+themselves scanned, since the scan is recursive.)
+
+Each colon-separated item may contain wildcard operators, but only in its final
+path component. The wildcard operators currently supported are
+
+@table @asis
+@item *
+Match zero or more characters (each character matched is arbitrary)
+@item ?
+Match exactly one arbitrary character
+@item [abcs-z]
+Character class : match a single character from the set a, b, c, s, t, u, v, w,
+x, y and z.
+
+To include a literal @samp{]} in the class, place it immediately after the opening @samp{[}.
+To include a literal @samp{-} in the class, place it immediately before the closing @samp{]}.
+
+@end table
+
+If these metacharacters are included in non-final path components, they have no
+special meaning.
+
+Here are some examples
+
+@table @asis
+@item mbox=foo/bar*
+matches @file{foo/bar}, @file{foo/bar1}, @file{foo/barrrr} etc
+@item mbox=foo*/bar*
+matches @file{foo*/bar}, @file{foo*/bar1}, @file{foo*/barrrr} etc
+@item mbox=foo/*
+matches @file{foo/bar}, @file{foo/bar1}, @file{foo/barrrr}, @file{foo/foo}, @file{foo/x} etc
+@item mbox=foo...
+matches any regular file in the tree rooted at @file{foo}
+@item mbox=foo/*...
+same as before
+@item mbox=foo/[a-z]*...
+matches @file{foo/a}, @file{foo/aardvark/xxx}, @file{foo/zzz/foobar},
+@file{foo/w/x/y/zzz}, but @b{not} @file{foo/A/foobar}
+@end table
+
+Regular files that are mbox folder candidates are examined internally. Only
+files containing standard mbox @samp{From } separator lines will be scanned for
+messages.
+
+If a regular file has a name ending in @file{.gz}, and gzip support is compiled
+into the mairix binary, the file will be treated as a gzipped mbox.
+
+If a regular file has a name ending in @file{.bz2}, and bzip support is compiled
+into the mairix binary, the file will be treated as a bzip2'd mbox.
+
+More than one line starting with @samp{mbox} can be included. In this case,
+mairix joins the lines together with colons as though a single list of folders had
+been given on a single very long line.
+
+mairix performs @b{no} locking of mbox folders when it is accessing them. If a
+mail delivery program is modifying the mbox at the same time, it is likely that
+one or messages in the mbox will never get indexed by mairix (until the
+database is removed and recreated from scratch, anyway.) The assumption is
+that mairix will be used to index archive folders rather than incoming ones, so
+this is unlikely to be much of a problem in reality.
+
+@emph{mairix} can support a maximum of 65536 separate mboxes, and a maximum of
+65536 messages within any one mbox.
+
+@item omit
+This is a colon-separated list of glob patterns for folders to be
+omitted from the indexing. This allows wide wildcards to be used in the
+@emph{maildir}, @emph{mh} and @emph{mbox} arguments, with the @emph{omit}
+option used to selectively remove unwanted folders from the folder lists.
+Within the glob patterns, a single @samp{*} matches any sequence of characters
+other than @samp{/}. However @samp{**} matches any sequence of characters
+including @samp{/}. This allows glob patterns to be constructed which have a
+wildcard for just one directory component, or for any number of directory
+components.
+
+The @emph{omit} option can be specified as many times as required so that the
+list of patterns doesn't all have to fit on one line.
+
+As an example,
+
+@example
+mbox=bulk...
+omit=bulk/spam*
+@end example
+
+will index all mbox folders at any level under the @file{bulk} subdirectory of
+the base folder, except for those folders whose names start @file{bulk/spam},
+e.g. @file{bulk/spam}, @file{bulk/spam2005} etc. In constrast,
+
+@example
+mbox=bulk...
+omit=bulk/spam**
+@end example
+
+will index all mbox folders at any level under the @file{bulk} subdirectory of
+the base folder, except for those folders whose names start @file{bulk/spam},
+e.g. @file{bulk/spam}, @file{bulk/spam2005}, @file{bulk/spam/2005},
+@file{bulk/spam/2005/jan} etc.
+
+@item nochecks
+This takes no arguments. If a line starting with @samp{nochecks} is present,
+it is the equivalent of specifying the @samp{-Q} flag to every indexing run.
+
+@item mfolder
+This defines the name of the @emph{match} folder (within the directory
+specified by @samp{base}) into which the search mode writes its output.
+(If the mformat used is @samp{raw}, then this setting is not
+used and may be excluded.)
+
+If the first character of the @b{mfolder} value is @samp{/} or @samp{.}, it is
+taken as a pathname in its own right. This allows you to specify absolute
+paths and paths relative to the current directory where the mfolder should be
+written. Otherwise, the value of @b{mfolder} is appended to the value of
+@b{base}, in the same way as for the source folders.
+
+@item mformat
+This defines the type of folder used for the @emph{match folder} where the
+search results go. There are four valid settings for this @samp{mh},
+@samp{maildir}, @samp{mbox} or @samp{raw}. If the @samp{raw} setting is used then
+mairix will just print out the path names of the files that match and
+no match folder will be created. @samp{maildir} is the default if this
+option is not defined. The setting is case-insensitive.
+
+@item database
+This defines the path where mairix's index database is kept. You can keep this
+file anywhere you like.
+@end table
+
+It is illegal to have a folder listed twice. Once mairix has built a list of
+all the messages currently in your folders, it will search for duplicates
+before proceeding. If any duplicates are found (arising from the same folder
+being specified twice), it will give an error message and exit. This is to
+prevent corrupting the index database file.
+
+@subsection mairixrc expansions
+
+The part of each line in @file{.mairixrc} following the equals sign can contain
+the following types of expansion:
+
+@table @asis
+@item Home directory expansion
+If the sequence @samp{~/} appears at the start of the text after the equals
+sign, it is expanded to the user's home directory. Example:
+
+@example
+database=~/Mail/mairix_database
+@end example
+
+@item Environment expansion
+If a @samp{$} is followed by a sequence of alpha-numeric characters (or
+@samp{_}), the whole string is replaced by looking up the corresponding
+environment variable. Similarly, if @samp{$} is followed by an open brace
+(@samp{@{}), everything up to the next close brace is looked up as an
+environment variable and the result replaces the entire sequence.
+
+Suppose in the shell we do
+@example
+export FOO=bar
+@end example
+
+and the @file{.mairixrc} file contains
+@example
+maildir=xxx/$FOO
+mbox=yyy/a$@{FOO@}b
+@end example
+
+this is equivalent to
+@example
+maildir=xxx/bar
+mbox=yyy/abarb
+@end example
+
+If the specified environment variable is not set, the replacement is the empty
+string.
+
+@end table
+
+@node mfolder_setup
+@section Setting up the match folder
+If the match folder does not exist when running in search mode, it is
+automatically created. For @samp{mformat=maildir} (the default), this
+should be all you need to do. If you use @samp{mformat=mh}, you may
+have to run some commands before your mailer will recognize the folder. e.g.
+for mutt, you could do
+
+@example
+mkdir -p /home/richard/Mail/mfolder
+touch /home/richard/Mail/mfolder/.mh_sequences
+@end example
+
+which seems to work. Alternatively, within mutt, you could set @var{mbox_type}
+to @samp{mh} and save a message to @samp{+mfolder} to have mutt set up the
+structure for you in advance.
+
+If you use Sylpheed, the best way seems to be to create the new folder from
+within Sylpheed before letting mairix write into it. This seems to be all you
+need to do.
+
+@node command_line
+@section Command line options
+
+The command line syntax is
+
+For indexing mode:
+@example
+mairix [-f path] [-p] [-v] [-Q]
+@end example
+For search mode
+@example
+mairix [-f path] [-t] [-v] [-a] [-r] [-o mfolder] expr1 [expr2] ... [exprn]
+@end example
+For database dump mode
+@example
+mairix [-f path] -d
+@end example
+
+The @samp{-f} or @samp{--rcfile} flag allows a different path to the
+@file{mairixrc} file to be given, replacing the default of @file{~/.mairixrc}.
+
+The @samp{-p} or @samp{--purge} flag is used in indexing mode. Indexing works
+incrementally. When new messages are found, they are scanned and information
+about the words they contain is appended onto the existing information. When
+messages are deleted, holes are normally left in the message sequence. These
+holes take up space in the database file. This flag will compress the deleted
+paths out of the database to save space. Additionally, where @samp{mbox}
+folders are in use, information in the database about folders that no longer
+exist, or which are no longer referenced in the rc-file, will be purged also.
+
+The @samp{-v} or @samp{--verbose} flag is used in indexing mode. It causes
+more information to be shown during the indexing process. In search mode, it
+causes debug information to be shown if there are problems creating the
+symlinks. (Normally this would be an annoyance. If a message matches multiple
+queries when using @samp{-a}, mairix will try to create the same symlink
+multiple times. This prevents the same message being shown multiple times in
+the match folder.)
+
+The @samp{-Q} or @samp{--no-integrity-checks} flag is used in indexing mode.
+Normally, mairix will do various integrity checks on the database after loading
+it in, and before writing the modified database out again. The checking helps
+to detect mairix bugs much earlier, but it has a performance penalty. This
+flag skips the checks, at the cost of some loss in robustness. See also the
+@samp{nochecks} directive in @ref{mairixrc}.
+
+The @samp{--unlock} flag is used in any mode. mairix dot-locks the database
+file to prevent corruption due to concurrent accesses. If the process holding
+the lock exits prematurely for any reason, the lockfile will be left behind.
+By using the @samp{--unlock} option, an unwanted lockfile can be conveniently
+removed.
+
+The @samp{-t} or @samp{--threads} option applies to search mode. Normally,
+only the messages matching all the specified expressions are included in the
+@emph{match folder} that is built. With the @samp{-t} flag, any message in
+the same thread as one of the matched messages will be included too. Note, the
+threading is based on processing the @t{Message-ID}, @t{In-Reply-To} and
+@t{References} headers in the messages. Some mailers don't generate these
+headers in a co-operative way and will cause problems with this threading
+support. (Outlook seems to be one culprit.) If you are plagued by this
+problem, the 'edit threads' patch to mutt may be useful to you.
+
+The @samp{-d} or @samp{--dump} option causes mairix to dump the database
+contents in human-readable form to stdout. It is mainly for use in debugging.
+If this option is specified, neither indexing nor searching are performed.
+
+The @samp{-a} or @samp{--augment} option also applies to search mode.
+Normally, the first action of the search mode is to clear any existing message
+links from the match folder. With the @samp{-a} flag, this step is
+suppressed. It allows the folder contents to be built up by matching with 2 or
+more diverse sets of match expressions. If this mode is used, and a message
+matches multiple queries, only a single symlink will be created for it.
+
+The @samp{-r} or @samp{--raw-output} option is used to force the raw output
+mode for a particular search, in preference to the output format defined by the
+@samp{mformat} line in the @file{mairixrc} file. This may be useful for
+identifying which mbox contains a particular match, since there is way to see
+this when the matching messages are placed in the mfolder in this case. (Note
+for matches in maildir and MH folders when @samp{mformat} is maildir or MH, the
+symbolic links in the mfolder will show the path to the matching message.)
+
+The @samp{-o} or @samp{--mfolder} option is used in search mode to specify a
+match folder different to the one specified in the @file{mairixrc} to be
+used. The path given by the @samp{mfolder} argument after this flag is
+relative to the folder base directory given in the @file{mairixrc} file, in the
+same way as the directory in the mfolder specification in that file is. So if
+your @file{mairixrc} file contains
+
+@example
+base=/home/foobar/Mail
+@end example
+
+and you run mairix like this
+
+@example
+mairix -o mfolder2 make,money,fast
+@end example
+
+mairix will find all of your saved junk emails containing these three words and
+put the results into @file{/home/foobar/Mail/mfolder2}.
+
+The @samp{-o} argument obeys the same conventions regarding initial @samp{/}
+and @samp{.} characters as the @b{mfolder} line in the @file{.mairixrc} file
+does.
+
+@emph{Mairix} will refuse to output search results (whether specified
+by the @samp{-o} or in the @file{.mairixrc} file) into one of the
+folders that are indexed; it figures out that list by looking in the
+@file{.mairixrc} file, or in the file you specify using the @samp{-f}
+option. This sanity check prevents you inadvertantly destroying one
+of your important folders (but won't catch all such cases, sadly).
+
+The search mode runs when there is at least one search expression. Search
+expressions can take forms such as (in increasing order of complexity):
+
+@itemize @bullet
+@item A date expression. The format for specifying the date is described in section @ref{date_syntax}.
+
+@item A size expression. This matches all messages whose size in bytes is in a
+particular range. For example, to match all messages bigger than 1 Megabyte
+the following command can be used
+
+@example
+mairix z:1m-
+@end example
+
+To match all messages between 10kbytes and 20kbytes in size, the following
+command can be used:
+
+@example
+mairix z:10k-20k
+@end example
+
+@item A word, e.g. @samp{pointer}. This matches any message with the word
+@samp{pointer} in the @t{To}, @t{Cc}, @t{From} or @t{Subject} headers, or in
+the message body.@footnote{Message body is taken to mean any body part of type
+text/plain or text/html. For text/html, text within meta tags is ignored. In
+particular, the URLs inside <A HREF="..."> tags are not currently indexed.
+Non-text attachments are ignored. If there's an attachment of type
+message/rfc822, this is parsed and the match is performed on this sub-message
+too. If a hit occurs, the enclosing message is treated as having a hit.}
+
+@item A word in a particular part of the message, e.g. @samp{s:pointer}. This
+matches any message with the word @samp{pointer} in the subject. The
+qualifiers for this are :
+
+@table @asis
+@item @t{t:pointer}
+to match @samp{pointer} in the @t{To:} header,
+@item @t{c:pointer}
+to match @samp{pointer} in the @t{Cc:} header,
+@item @t{a:pointer}
+to match @samp{pointer} in the @t{To:}, @t{Cc:} or @t{From:} headers (@samp{a} meaning @samp{address}),
+@item @t{f:pointer}
+to match @samp{pointer} in the @t{From:} header,
+@item @t{s:pointer}
+to match @samp{pointer} in the @t{Subject:} header,
+@item @t{b:pointer}
+to match @samp{pointer} in the message body.
+@item @t{m:pointer}
+to match messages having a Message-ID header of @samp{pointer}.
+@end table
+
+Multiple fields may be specified, e.g. @t{sb:pointer} to match in the
+@t{Subject:} header or the body.
+
+@item A negated word, e.g. @samp{s:~pointer}. This matches all messages that
+don't have the word @samp{pointer} in the subject line.
+
+@item A substring match, e.g. @samp{s:point=}. This matches all messages
+containing a word in their subject line where the word has @samp{point} as a
+substring, e.g. @samp{pointer}, @samp{disappoint}.
+
+@item An approximate match, e.g. @samp{s:point=1}. This matches all messages
+containing a word in their subject line where the word has @samp{point} as a
+substring with at most one error, e.g. @samp{jointed} contains @samp{joint}
+which can be got from @samp{point} with one letter changed. An error can be a
+single letter changed, inserted or deleted.
+
+@item A left-anchored substring match, e.g. @samp{s:^point=}. This matches all
+messages containing a word in their subject line where the word begins with the
+string @samp{point}. (This feature is intended to be useful for inflected
+languages where the substring search is used to avoid the grammatical ending on
+the word.) This left-anchored facility can be combined with the approximate
+match facility, e.g. @samp{s:^point=1}.
+
+Note, if the @samp{^} prefix is used without the @samp{=} suffix, it is ignored.
+For example, @samp{s:^point} means the same thing as @samp{s:point}.
+
+@item A disjunction, e.g. @samp{s:pointer/dereference}. This matches all
+messages with one or both of the words @samp{pointer} and @samp{dereference} in
+their subject lines.
+
+@item Each disjunction may be a conjunction, e.g.
+@samp{s:null,pointer/dereference=2} matches all messages whose subject lines
+either contain both the words @samp{null} and @samp{pointer}, or contain the
+word @samp{dereference} with up to 2 errors (or both).
+
+@item A path expression. This matches all messages with a particular substring
+in their path. The syntax is very similar to that for words within the message
+(above), and all the rules for @samp{+}, @samp{,}, approximate matching etc are
+the same. The word prefix used for a path expression is @samp{p:}. Examples:
+
+@example
+mairix p:/archive/
+@end example
+
+matches all messages with @samp{/archive/} in their path, and
+
+@example
+mairix p:wibble=1 s:wibble=1
+@end example
+
+matches all messages with @samp{wibble} in their path and in their subject
+line, allowing up to 1 error in each case (the errors may be different for a
+particular message.)
+
+Path expressions always use substring matches and never exact matches (it's
+very unlikely you want to type in the whole of a message path as a search
+expression!) The matches are always @b{case-sensitive}. (All matches on words
+within messages are case-insensitive.) There is a limit of 32 characters on
+the match expression.
+
+@end itemize
+
+The binding order of the constructions is:
+
+@enumerate
+@item Individual command line arguments define separate conditions which are
+AND-ed together
+
+@item Within a single argument, the letters before the colon define which
+message parts the expression applies to. If there is no colon, the expression
+applies to all the headers listed earlier and the body.
+
+@item After the colon, commas delineate separate disjuncts, which are OR-ed together.
+
+@item Each disjunct may contain separate conjuncts, which are separated by plus
+signs. These conditions are AND-ed together.
+
+@item Each conjunct may start with a tilde to negate it, and may be followed by
+a slash to indicate a substring match, optionally followed by an integer to
+define the maximum number of errors allowed.
+
+@end enumerate
+
+Now some examples. Suppose my email address is @email{richard@@doesnt.exist}.
+
+The following will match all messages newer than 3 months from me with the word
+@samp{chrony} in the subject line:
+
+@example
+mairix d:3m- f:richard+doesnt+exist s:chrony
+@end example
+
+Suppose I don't mind a few spurious matches on the address, I want a wider date
+range, and I suspect that some messages I replied to might have had the subject
+keyword spelt wrongly (let's allow up to 2 errors):
+
+@example
+mairix d:6m- f:richard s:chrony=2
+@end example
+
+@node date_syntax
+@section Syntax used for specifying dates
+This section describes the syntax used for specifying dates when searching
+using the @samp{d:} option.
+
+Dates are specified as a range. The start and end of the range can both be
+specified. Alternatively, if the start is omitted, it is treated as being the
+beginning of time. If the end is omitted, it is treated as the current time.
+
+There are 4 basic formats:
+@table @samp
+@item d:start-end
+Specify both start and end explicitly
+@item d:start-
+Specify start, end is the current time
+@item d:-end
+Specify end, start is 'a long time ago' (i.e. early enough to include any message).
+@item d:period
+Specify start and end implicitly, as the start and end of the period given.
+@end table
+
+The start and end can be specified either absolute or relative. A relative
+endpoint is given as a number followed by a single letter defining the scaling:
+
+@multitable @columnfractions 0.15 0.2 0.2 0.45
+@item @b{letter} @tab @b{meaning} @tab @b{example} @tab @b{meaning}
+@item d @tab days @tab 3d @tab 3 days
+@item w @tab weeks @tab 2w @tab 2 weeks (14 days)
+@item m @tab months @tab 5m @tab 5 months (150 days)
+@item y @tab years @tab 4y @tab 4 years (4*365 days)
+@end multitable
+
+Months are always treated as 30 days, and years as 365 days, for this purpose.
+
+Absolute times can be specified in a lot of forms. Some forms have different
+meanings when they define a start date from that when they define an end date.
+Where a single expression specifies both the start and end (i.e. where the
+argument to d: doesn't contain a @samp{-}), it will usually have different
+interpretations in the two cases.
+
+In the examples below, suppose the current date is Sunday May 18th, 2003 (when
+I started to write this material.)
+
+@multitable @columnfractions 0.24 0.24 0.24 0.28
+@item @b{Example} @tab @b{Start date} @tab @b{End date} @tab @b{Notes}
+@item d:20030301@minus{}20030425 @tab March 1st, 2003 @tab 25th April, 2003
+@item d:030301@minus{}030425 @tab March 1st, 2003 @tab April 25th, 2003 @tab century assumed
+@item d:mar1@minus{}apr25 @tab March 1st, 2003 @tab April 25th, 2003
+@item d:Mar1@minus{}Apr25 @tab March 1st, 2003 @tab April 25th, 2003 @tab case insensitive
+@item d:MAR1@minus{}APR25 @tab March 1st, 2003 @tab April 25th, 2003 @tab case insensitive
+@item d:1mar@minus{}25apr @tab March 1st, 2003 @tab April 25th, 2003 @tab date and month in either order
+@item d:2002 @tab January 1st, 2002 @tab December 31st, 2002 @tab whole year
+@item d:mar @tab March 1st, 2003 @tab March 31st, 2003 @tab most recent March
+@item d:oct @tab October 1st, 2002 @tab October 31st, 2002 @tab most recent October
+@item d:21oct@minus{}mar @tab October 21st, 2002 @tab March 31st, 2003 @tab start before end
+@item d:21apr@minus{}mar @tab April 21st, 2002 @tab March 31st, 2003 @tab start before end
+@item d:21apr@minus{} @tab April 21st, 2003 @tab May 18th, 2003 @tab end omitted
+@item d:@minus{}21apr @tab January 1st, 1900 @tab April 21st, 2003 @tab start omitted
+@item d:6w@minus{}2w @tab April 6th, 2003 @tab May 4th, 2003 @tab both dates relative
+@item d:21apr@minus{}1w @tab April 21st, 2003 @tab May 11th, 2003 @tab one date relative
+@item d:21apr@minus{}2y @tab April 21st, 2001 @tab May 11th, 2001 @tab start before end
+@item d:99@minus{}11 @tab January 1st, 1999 @tab May 11th, 2003 @tab 2 digits are a day of the month if possible, otherwise a year
+@item d:99oct@minus{}1oct @tab October 1st, 1999 @tab October 1st, 2002 @tab end before now, single digit is a day of the month
+@item d:99oct@minus{}01oct @tab October 1st, 1999 @tab October 31st, 2001 @tab 2 digits starting with zero treated as a year
+@item d:oct99@minus{}oct1 @tab October 1st, 1999 @tab October 1st, 2002 @tab day and month in either order
+@item d:oct99@minus{}oct01 @tab October 1st, 1999 @tab October 31st, 2001 @tab year and month in either order
+@end multitable
+
+The principles in the table work as follows.
+@itemize @bullet
+@item
+When the expression defines a period of more than a day (i.e. if a month or
+year is specified), the earliest day in the period is taken when the start date
+is defined, and the last day in the period if the end of the range is being
+defined.
+@item
+The end date is always taken to be on or before the current date.
+@item
+The start date is always taken to be on or before the end date.
+@end itemize
+
+@bye
+@c vim:cms=@c\ %s:fdm=marker:fdc=5:syntax=off
diff --git a/src/mairix/reader.c b/src/mairix/reader.c
@@ -0,0 +1,212 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002,2003,2004,2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+/* Database reader */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/mman.h>
+
+#include "reader.h"
+#include "memmac.h"
+#include "mairix.h"
+
+int read_increment(unsigned char **encpos) {/*{{{*/
+ unsigned char *j = *encpos;
+ int result;
+ unsigned char x0, x1, x2, x3;
+
+ x0 = *j++;
+ if ((x0 & 0xc0) == 0xc0) {
+ /* 4 byte encoding */
+ x1 = *j++;
+ x2 = *j++;
+ x3 = *j++;
+ result = ((x0 & 0x3f) << 24) + (x1 << 16) + (x2 << 8) + x3;
+ } else if (x0 & 0x80) {
+ /* 2 byte encoding */
+ x1 = *j++;
+ result = ((x0 & 0x7f) << 8) + x1;
+ } else {
+ /* Single byte encoding */
+ result = x0;
+ }
+
+ *encpos = j;
+ return result;
+}
+/*}}}*/
+static void read_toktable_db(char *data, struct toktable_db *toktable, int start, unsigned int *uidata)/*{{{*/
+{
+ int n;
+ n = toktable->n = uidata[start];
+ toktable->tok_offsets = uidata + uidata[start+1];
+ toktable->enc_offsets = uidata + uidata[start+2];
+ return;
+}
+/*}}}*/
+static void read_toktable2_db(char *data, struct toktable2_db *toktable, int start, unsigned int *uidata)/*{{{*/
+{
+ int n;
+ n = toktable->n = uidata[start];
+ toktable->tok_offsets = uidata + uidata[start+1];
+ toktable->enc0_offsets = uidata + uidata[start+2];
+ toktable->enc1_offsets = uidata + uidata[start+3];
+ return;
+}
+/*}}}*/
+struct read_db *open_db(char *filename)/*{{{*/
+{
+ int fd, len;
+ char *data;
+ struct stat sb;
+ struct read_db *result;
+ unsigned int *uidata;
+ unsigned char *ucdata;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ report_error("open", filename);
+ unlock_and_exit (2);
+ }
+
+ if (fstat(fd, &sb) < 0) {
+ report_error("stat", filename);
+ unlock_and_exit(2);
+ }
+
+ len = sb.st_size;
+
+ data = (char *) mmap(0, len, PROT_READ, MAP_SHARED, fd, 0);
+ if (data == MAP_FAILED) {
+ report_error("reader:mmap", filename);
+ unlock_and_exit(2);
+ }
+
+ if (!data) {
+ /* Empty file opened => database corrupt for sure */
+ if (close(fd) < 0) {
+ report_error("close", filename);
+ unlock_and_exit(2);
+ }
+ return NULL;
+ }
+
+ if (close(fd) < 0) {
+ report_error("close", filename);
+ unlock_and_exit(2);
+ }
+
+ result = new(struct read_db);
+ uidata = (unsigned int *) data; /* alignment is assured */
+ ucdata = (unsigned char *) data;
+ result->len = len;
+ result->data = data;
+
+ /*{{{ Magic number check */
+ if (ucdata[0] == HEADER_MAGIC0 ||
+ ucdata[1] == HEADER_MAGIC1 ||
+ ucdata[2] == HEADER_MAGIC2) {
+ if (ucdata[3] != HEADER_MAGIC3) {
+ fprintf(stderr, "Another version of this program produced the existing database! Please rebuild.\n");
+ unlock_and_exit(2);
+ }
+ } else {
+ fprintf(stderr, "The existing database wasn't produced by this program! Please rebuild.\n");
+ unlock_and_exit(2);
+ }
+ /*}}}*/
+ /* {{{ Endianness check */
+ if (uidata[UI_ENDIAN] == 0x11223344) {
+ fprintf(stderr, "The endianness of the database is reversed for this machine\n");
+ unlock_and_exit(2);
+ } else if (uidata[UI_ENDIAN] != 0x44332211) {
+ fprintf(stderr, "The endianness of this machine is strange (or database is corrupt)\n");
+ unlock_and_exit(2);
+ }
+ /* }}} */
+
+ /* Now build tables of where things are in the file */
+ result->n_msgs = uidata[UI_N_MSGS];
+ result->msg_type_and_flags = ucdata + uidata[UI_MSG_TYPE_AND_FLAGS];
+ result->path_offsets = uidata + uidata[UI_MSG_CDATA];
+ result->mtime_table = uidata + uidata[UI_MSG_MTIME];
+ result->size_table = uidata + uidata[UI_MSG_SIZE];
+ result->date_table = uidata + uidata[UI_MSG_DATE];
+ result->tid_table = uidata + uidata[UI_MSG_TID];
+
+ result->n_mboxen = uidata[UI_MBOX_N];
+ result->mbox_paths_table = uidata + uidata[UI_MBOX_PATHS];
+ result->mbox_entries_table = uidata + uidata[UI_MBOX_ENTRIES];
+ result->mbox_mtime_table = uidata + uidata[UI_MBOX_MTIME];
+ result->mbox_size_table = uidata + uidata[UI_MBOX_SIZE];
+ result->mbox_checksum_table = uidata + uidata[UI_MBOX_CKSUM];
+
+ result->hash_key = uidata[UI_HASH_KEY];
+
+ read_toktable_db(data, &result->to, UI_TO_BASE, uidata);
+ read_toktable_db(data, &result->cc, UI_CC_BASE, uidata);
+ read_toktable_db(data, &result->from, UI_FROM_BASE, uidata);
+ read_toktable_db(data, &result->subject, UI_SUBJECT_BASE, uidata);
+ read_toktable_db(data, &result->body, UI_BODY_BASE, uidata);
+ read_toktable_db(data, &result->attachment_name, UI_ATTACHMENT_NAME_BASE, uidata);
+ read_toktable2_db(data, &result->msg_ids, UI_MSGID_BASE, uidata);
+
+ return result;
+}
+/*}}}*/
+static void free_toktable_db(struct toktable_db *x)/*{{{*/
+{
+ /* Nothing to do */
+}
+/*}}}*/
+static void free_toktable2_db(struct toktable2_db *x)/*{{{*/
+{
+ /* Nothing to do */
+}
+/*}}}*/
+void close_db(struct read_db *x)/*{{{*/
+{
+ free_toktable_db(&x->to);
+ free_toktable_db(&x->cc);
+ free_toktable_db(&x->from);
+ free_toktable_db(&x->subject);
+ free_toktable_db(&x->body);
+ free_toktable_db(&x->attachment_name);
+ free_toktable2_db(&x->msg_ids);
+
+ if (munmap(x->data, x->len) < 0) {
+ perror("munmap");
+ unlock_and_exit(2);
+ }
+ free(x);
+ return;
+}
+/*}}}*/
+
diff --git a/src/mairix/reader.h b/src/mairix/reader.h
@@ -0,0 +1,182 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002-2004,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#ifndef READER_H
+#define READER_H
+
+/* MX, then a high byte, then the version no. */
+#define HEADER_MAGIC0 'M'
+#define HEADER_MAGIC1 'X'
+#define HEADER_MAGIC2 0xA5
+#define HEADER_MAGIC3 0x03
+
+/*{{{ Constants for file data positions */
+#define UI_ENDIAN 1
+#define UI_N_MSGS 2
+
+/* Offset to byte-per-message table encoding message types */
+#define UI_MSG_TYPE_AND_FLAGS 3
+
+/* Header positions containing offsets to the per-message tables. */
+/* Character data:
+ * for maildir/MH : the path of the box.
+ * for mbox : index of mbox containing the message */
+
+#define UI_MSG_CDATA 4
+/* For maildir/MH : mtime of file containing message */
+#define UI_MSG_MTIME 5
+/* For mbox msgs : the offset into the file */
+#define UI_MSG_OFFSET 5
+/* For all formats : message size */
+#define UI_MSG_SIZE 6
+/* For mbox msgs : offset into file */
+#define UI_MSG_START 6
+/* These are common to Maildir,MH,mbox messages */
+#define UI_MSG_DATE 7
+#define UI_MSG_TID 8
+
+/* Header positions for mbox (file-level) information */
+/* Number of mboxes */
+#define UI_MBOX_N 9
+#define UI_MBOX_PATHS 10
+#define UI_MBOX_ENTRIES 11
+/* mtime of mboxes */
+#define UI_MBOX_MTIME 12
+/* Size in bytes */
+#define UI_MBOX_SIZE 13
+/* Base of checksums for messages in each mbox */
+#define UI_MBOX_CKSUM 14
+
+#define UI_HASH_KEY 15
+
+/* Header positions for token tables */
+#define UI_TO_BASE 16
+#define UI_CC_BASE 19
+#define UI_FROM_BASE 22
+#define UI_SUBJECT_BASE 25
+#define UI_BODY_BASE 28
+#define UI_ATTACHMENT_NAME_BASE 31
+#define UI_MSGID_BASE 34
+
+/* Larger than the last table offset. */
+#define UI_HEADER_LEN 40
+#define UC_HEADER_LEN ((UI_HEADER_LEN) << 2)
+
+#define UI_N_OFFSET 0
+#define UI_TOK_OFFSET 1
+#define UI_ENC_OFFSET 2
+
+#define UI_TO_N (UI_TO_BASE + UI_N_OFFSET)
+#define UI_TO_TOK (UI_TO_BASE + UI_TOK_OFFSET)
+#define UI_TO_ENC (UI_TO_BASE + UI_ENC_OFFSET)
+#define UI_CC_N (UI_CC_BASE + UI_N_OFFSET)
+#define UI_CC_TOK (UI_CC_BASE + UI_TOK_OFFSET)
+#define UI_CC_ENC (UI_CC_BASE + UI_ENC_OFFSET)
+#define UI_FROM_N (UI_FROM_BASE + UI_N_OFFSET)
+#define UI_FROM_TOK (UI_FROM_BASE + UI_TOK_OFFSET)
+#define UI_FROM_ENC (UI_FROM_BASE + UI_ENC_OFFSET)
+#define UI_SUBJECT_N (UI_SUBJECT_BASE + UI_N_OFFSET)
+#define UI_SUBJECT_TOK (UI_SUBJECT_BASE + UI_TOK_OFFSET)
+#define UI_SUBJECT_ENC (UI_SUBJECT_BASE + UI_ENC_OFFSET)
+#define UI_BODY_N (UI_BODY_BASE + UI_N_OFFSET)
+#define UI_BODY_TOK (UI_BODY_BASE + UI_TOK_OFFSET)
+#define UI_BODY_ENC (UI_BODY_BASE + UI_ENC_OFFSET)
+#define UI_ATTACHMENT_NAME_N (UI_ATTACHMENT_NAME_BASE + UI_N_OFFSET)
+#define UI_ATTACHMENT_NAME_TOK (UI_ATTACHMENT_NAME_BASE + UI_TOK_OFFSET)
+#define UI_ATTACHMENT_NAME_ENC (UI_ATTACHMENT_NAME_BASE + UI_ENC_OFFSET)
+#define UI_MSGID_N (UI_MSGID_BASE + UI_N_OFFSET)
+#define UI_MSGID_TOK (UI_MSGID_BASE + UI_TOK_OFFSET)
+#define UI_MSGID_ENC0 (UI_MSGID_BASE + UI_ENC_OFFSET)
+#define UI_MSGID_ENC1 (UI_MSGID_ENC0 + 1)
+
+/*}}}*/
+
+/*{{{ Literals used for encoding messages types in database file */
+#define DB_MSG_DEAD 0
+/* maildir/MH : one file per message */
+#define DB_MSG_FILE 1
+/* mbox : multiple files per message */
+#define DB_MSG_MBOX 2
+/*}}}*/
+
+#define FLAG_SEEN (1<<3)
+#define FLAG_REPLIED (1<<4)
+#define FLAG_FLAGGED (1<<5)
+
+struct toktable_db {/*{{{*/
+ unsigned int n; /* number of entries in this table */
+ unsigned int *tok_offsets; /* offset to table of token offsets */
+ unsigned int *enc_offsets; /* offset to table of encoding offsets */
+};
+/*}}}*/
+struct toktable2_db {/*{{{*/
+ unsigned int n; /* number of entries in this table */
+ unsigned int *tok_offsets; /* offset to table of token offsets */
+ unsigned int *enc0_offsets; /* offset to table of encoding offsets */
+ unsigned int *enc1_offsets; /* offset to table of encoding offsets */
+};
+/*}}}*/
+struct read_db {/*{{{*/
+ /* Raw file parameters, needed later for munmap */
+ char *data;
+ int len;
+
+ /* Pathname information */
+ int n_msgs;
+ unsigned char *msg_type_and_flags;
+ unsigned int *path_offsets; /* or (mbox index, msg index) */
+ unsigned int *mtime_table; /* or offset into mbox */
+ unsigned int *size_table; /* either file size or span inside mbox */
+ unsigned int *date_table;
+ unsigned int *tid_table;
+
+ int n_mboxen;
+ unsigned int *mbox_paths_table;
+ unsigned int *mbox_entries_table; /* table of number of messages per mbox */
+ unsigned int *mbox_mtime_table;
+ unsigned int *mbox_size_table;
+ unsigned int *mbox_checksum_table;
+
+ unsigned int hash_key;
+
+ struct toktable_db to;
+ struct toktable_db cc;
+ struct toktable_db from;
+ struct toktable_db subject;
+ struct toktable_db body;
+ struct toktable_db attachment_name;
+ struct toktable2_db msg_ids;
+
+};
+/*}}}*/
+
+struct read_db *open_db(char *filename);
+void close_db(struct read_db *x);
+
+static inline int rd_msg_type(struct read_db *db, int i) {
+ return db->msg_type_and_flags[i] & 0x7;
+}
+
+/* Common to search and db reader. */
+int read_increment(unsigned char **encpos);
+
+#endif /* READER_H */
diff --git a/src/mairix/rfc822.c b/src/mairix/rfc822.c
@@ -0,0 +1,1536 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006,2007,2010
+ * rfc2047 decode:
+ * Copyright (C) Mikael Ylikoski 2002
+ * gzip mbox support:
+ * Copyright (C) Ico Doornekamp 2005
+ * Copyright (C) Felipe Gustavo de Almeida 2005
+ * bzip2 mbox support:
+ * Copyright (C) Paramjit Oberoi 2005
+ * caching uncompressed mbox data:
+ * Copyright (C) Chris Mason 2006
+ * memory leak fixes:
+ * Copyright (C) Samuel Tardieu 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "mairix.h"
+#include "nvp.h"
+
+#include <assert.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#ifdef USE_GZIP_MBOX
+# include <zlib.h>
+#endif
+#ifdef USE_BZIP_MBOX
+# include <bzlib.h>
+#endif
+
+struct DLL {/*{{{*/
+ struct DLL *next;
+ struct DLL *prev;
+};
+/*}}}*/
+static void enqueue(void *head, void *x)/*{{{*/
+{
+ /* Declare this way so it can be used with any kind of double linked list
+ * having next & prev pointers in its first two words. */
+ struct DLL *h = (struct DLL *) head;
+ struct DLL *xx = (struct DLL *) x;
+ xx->next = h;
+ xx->prev = h->prev;
+ h->prev->next = xx;
+ h->prev = xx;
+ return;
+}
+/*}}}*/
+
+enum encoding_type {/*{{{*/
+ ENC_UNKNOWN,
+ ENC_NONE,
+ ENC_BINARY,
+ ENC_7BIT,
+ ENC_8BIT,
+ ENC_QUOTED_PRINTABLE,
+ ENC_BASE64,
+ ENC_UUENCODE
+};
+/*}}}*/
+struct content_type_header {/*{{{*/
+ const char *major; /* e.g. text */
+ const char *minor; /* e.g. plain */
+ const char *boundary; /* for multipart */
+ /* charset? */
+};
+/*}}}*/
+struct line {/*{{{*/
+ struct line *next;
+ struct line *prev;
+ char *text;
+};
+/*}}}*/
+
+static void init_headers(struct headers *hdrs)/*{{{*/
+{
+ hdrs->to = NULL;
+ hdrs->cc = NULL;
+ hdrs->from = NULL;
+ hdrs->subject = NULL;
+ hdrs->message_id = NULL;
+ hdrs->in_reply_to = NULL;
+ hdrs->references = NULL;
+ hdrs->date = 0;
+ hdrs->flags.seen = 0;
+ hdrs->flags.replied = 0;
+ hdrs->flags.flagged = 0;
+};
+/*}}}*/
+static void splice_header_lines(struct line *header)/*{{{*/
+{
+ /* Deal with newline then tab in header */
+ struct line *x, *next;
+ for (x=header->next; x!=header; x=next) {
+#if 0
+ printf("next header, x->text=%08lx\n", x->text);
+ printf("header=<%s>\n", x->text);
+#endif
+ next = x->next;
+ if (isspace(x->text[0] & 0xff)) {
+ /* Glue to previous line */
+ char *p, *newbuf, *oldbuf;
+ struct line *y;
+ for (p=x->text; *p; p++) {
+ if (!isspace(*(unsigned char *)p)) break;
+ }
+ p--; /* point to final space */
+ y = x->prev;
+#if 0
+ printf("y=%08lx p=%08lx\n", y->text, p);
+#endif
+ newbuf = new_array(char, strlen(y->text) + strlen(p) + 1);
+ strcpy(newbuf, y->text);
+ strcat(newbuf, p);
+ oldbuf = y->text;
+ y->text = newbuf;
+ free(oldbuf);
+ y->next = x->next;
+ x->next->prev = y;
+ free(x->text);
+ free(x);
+ }
+ }
+ return;
+}
+/*}}}*/
+static int audit_header(struct line *header)/*{{{*/
+{
+ /* Check for obvious broken-ness
+ * 1st line has no leading spaces, single word then colon
+ * following lines have leading spaces or single word followed by colon
+ * */
+ struct line *x;
+ int first=1;
+ int count=1;
+ for (x=header->next; x!=header; x=x->next) {
+ int has_leading_space=0;
+ int is_blank;
+ int has_word_colon=0;
+
+ if (1 || first) {
+ /* Ignore any UUCP or mbox style From line at the start */
+ if (!strncmp("From ", x->text, 5)) {
+ continue;
+ }
+ /* Ignore escaped From line at the start */
+ if (!strncmp(">From ", x->text, 6)) {
+ continue;
+ }
+ }
+
+ is_blank = !(x->text[0]);
+ if (!is_blank) {
+ char *p;
+ int saw_char = 0;
+ has_leading_space = isspace(x->text[0] & 0xff);
+ has_word_colon = 0; /* default */
+ p = x->text;
+ while(*p) {
+ if(*p == ':') {
+ has_word_colon = saw_char;
+ break;
+ } else if (isspace(*(unsigned char *) p)) {
+ has_word_colon = 0;
+ break;
+ } else {
+ saw_char = 1;
+ }
+ p++;
+ }
+ }
+
+ if (( first && (is_blank || has_leading_space || !has_word_colon)) ||
+ (!first && (is_blank || !(has_leading_space || has_word_colon)))) {
+#if 0
+ fprintf(stderr, "Header line %d <%s> fails because:", count, x->text);
+ if (first && is_blank) { fprintf(stderr, " [first && is_blank]"); }
+ if (first && has_leading_space) { fprintf(stderr, " [first && has_leading_space]"); }
+ if (first && !has_word_colon) { fprintf(stderr, " [first && !has_word_colon]"); }
+ if (!first && is_blank) { fprintf(stderr, " [!first && is_blank]"); }
+ if (!first && !(has_leading_space||has_word_colon)) { fprintf(stderr, " [!first && !has_leading_space||has_word_colon]"); }
+ fprintf(stderr, "\n");
+#endif
+ /* Header fails the audit */
+ return 0;
+ }
+ first = 0;
+ count++;
+ }
+ /* If we get here the header must have been OK */
+ return 1;
+}/*}}}*/
+static int match_string(const char *ref, const char *candidate)/*{{{*/
+{
+ int len = strlen(ref);
+ return !strncasecmp(ref, candidate, len);
+}
+/*}}}*/
+
+static char equal_table[] = {/*{{{*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00-0f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10-1f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20-2f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* 30-3f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40-4f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50-5f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60-6f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70-7f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90-9f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a0-af */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* b0-bf */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* c0-cf */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d0-df */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* e0-ef */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* f0-ff */
+};
+/*}}}*/
+static int base64_table[] = {/*{{{*/
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00-0f */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10-1f */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, /* 20-2f */
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, 0, -1, -1, /* 30-3f */
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 40-4f */
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 50-5f */
+ -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 60-6f */
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, /* 70-7f */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 80-8f */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 90-9f */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a0-af */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* b0-bf */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* c0-cf */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* d0-df */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* e0-ef */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 /* f0-ff */
+};
+/*}}}*/
+static int hex_to_val(char x) {/*{{{*/
+ switch (x) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return (x - '0');
+ break;
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ return 10 + (x - 'a');
+ break;
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ return 10 + (x - 'A');
+ break;
+ default:
+ return 0;
+ }
+}
+/*}}}*/
+static void decode_header_value(char *text){/*{{{*/
+ /* rfc2047 decode, written by Mikael Ylikoski */
+
+ char *s, *a, *b, *e, *p, *q;
+
+ for (p = q = s = text; (s = strstr(s, "=?")); s = e + 2) {
+ if (p == q)
+ p = q = s;
+ else
+ while (q != s)
+ *p++ = *q++;
+ s += 2;
+ a = strchr(s, '?');
+ if (!a) break;
+ a++;
+ b = strchr(a, '?');
+ if (!b) break;
+ b++;
+ e = strstr(b, "?=");
+ if (!e) break;
+ /* have found an encoded-word */
+ if (b - a != 2)
+ continue; /* unknown encoding */
+ if (*a == 'q' || *a == 'Q') {
+ int val;
+ q = b;
+ while (q < e) {
+ if (*q == '_') {
+ *p++ = 0x20;
+ q++;
+ } else if (*q == '=') {
+ q++;
+ val = hex_to_val(*q++) << 4;
+ val += hex_to_val(*q++);
+ *p++ = val;
+ } else
+ *p++ = *q++;
+ }
+ } else if (*a == 'b' || *a == 'B') {
+ int reg, nc, eq; /* register, #characters in reg, #equals */
+ int dc; /* decoded character */
+ eq = reg = nc = 0;
+ for (q = b; q < e; q++) {
+ unsigned char cq = *(unsigned char *)q;
+ dc = base64_table[cq];
+ eq += equal_table[cq];
+
+ if (dc >= 0) {
+ reg <<= 6;
+ reg += dc;
+ nc++;
+ if (nc == 4) {
+ *p++ = ((reg >> 16) & 0xff);
+ if (eq < 2) *p++ = ((reg >> 8) & 0xff);
+ if (eq < 1) *p++ = reg & 0xff;
+ nc = reg = 0;
+ if (eq) break;
+ }
+ }
+ }
+ } else {
+ continue; /* unknown encoding */
+ }
+ q = e + 2;
+ }
+ if (p == q) return;
+ while (*q != '\0')
+ *p++ = *q++;
+ *p = '\0';
+}
+/*}}}*/
+static char *copy_header_value(char *text){/*{{{*/
+ char *p;
+ for (p = text; *p && (*p != ':'); p++) ;
+ if (!*p) return NULL;
+ p++;
+ p = new_string(p);
+ decode_header_value(p);
+ return p;
+}
+/*}}}*/
+static void copy_or_concat_header_value(char **previous, char *text){/*{{{*/
+ char *p = copy_header_value(text);
+ if (*previous)
+ {
+ *previous = extend_string(*previous, ", ");
+ *previous = extend_string(*previous, p);
+ free(p);
+ }
+ else
+ *previous = p;
+}
+/*}}}*/
+static enum encoding_type decode_encoding_type(const char *e)/*{{{*/
+{
+ enum encoding_type result;
+ const char *p;
+ if (!e) {
+ result = ENC_NONE;
+ } else {
+ for (p=e; *p && isspace(*(unsigned char *)p); p++) ;
+ if ( match_string("7bit", p)
+ || match_string("7-bit", p)
+ || match_string("7 bit", p)) {
+ result = ENC_7BIT;
+ } else if (match_string("8bit", p)
+ || match_string("8-bit", p)
+ || match_string("8 bit", p)) {
+ result = ENC_8BIT;
+ } else if (match_string("quoted-printable", p)) {
+ result = ENC_QUOTED_PRINTABLE;
+ } else if (match_string("base64", p)) {
+ result = ENC_BASE64;
+ } else if (match_string("binary", p)) {
+ result = ENC_BINARY;
+ } else if (match_string("x-uuencode", p)) {
+ result = ENC_UUENCODE;
+ } else {
+ fprintf(stderr, "Warning: unknown encoding type: '%s'\n", e);
+ result = ENC_UNKNOWN;
+ }
+ }
+ return result;
+}
+/*}}}*/
+static void parse_content_type(struct nvp *ct_nvp, struct content_type_header *result)/*{{{*/
+{
+ result->major = NULL;
+ result->minor = NULL;
+ result->boundary = NULL;
+
+ result->major = nvp_major(ct_nvp);
+ if (result->major) {
+ result->minor = nvp_minor(ct_nvp);
+ } else {
+ result->minor = NULL;
+ result->major = nvp_first(ct_nvp);
+ }
+
+ result->boundary = nvp_lookupcase(ct_nvp, "boundary");
+}
+
+/*}}}*/
+static char *looking_at_ws_then_newline(char *start)/*{{{*/
+{
+ char *result;
+ result = start;
+ do {
+ if (*result == '\n') return result;
+ else if (!isspace(*(unsigned char *) result)) return NULL;
+ else result++;
+ } while (1);
+
+ /* Can't get here */
+ assert(0);
+}
+/*}}}*/
+
+static char *unencode_data(struct msg_src *src, char *input, int input_len, const char *enc, int *output_len)/*{{{*/
+{
+ enum encoding_type encoding;
+ char *result, *end_result;
+ char *end_input;
+
+ encoding = decode_encoding_type(enc);
+ end_input = input + input_len;
+
+ /* All mime encodings result in expanded data, so this is guaranteed to
+ * safely oversize the output array */
+ result = new_array(char, input_len + 1);
+
+ /* Now decode */
+ switch (encoding) {
+ case ENC_7BIT:/*{{{*/
+ case ENC_8BIT:
+ case ENC_BINARY:
+ case ENC_NONE:
+ {
+ memcpy(result, input, input_len);
+ end_result = result + input_len;
+ }
+ break;
+/*}}}*/
+ case ENC_QUOTED_PRINTABLE:/*{{{*/
+ {
+ char *p, *q;
+ p = result;
+ for (p=result, q=input;
+ q<end_input; ) {
+
+ if (*q == '=') {
+ /* followed by optional whitespace then \n? discard them. */
+ char *r;
+ int val;
+ q++;
+ r = looking_at_ws_then_newline(q);
+ if (r) {
+ q = r + 1; /* Point into next line */
+ continue;
+ }
+ /* not that case. */
+ val = hex_to_val(*q++) << 4;
+ val += hex_to_val(*q++);
+ *p++ = val;
+
+ } else {
+ /* Normal character */
+ *p++ = *q++;
+ }
+ }
+ end_result = p;
+ }
+ break;
+/*}}}*/
+ case ENC_BASE64:/*{{{*/
+ {
+ char *p, *q;
+ int reg, nc, eq; /* register, #characters in reg, #equals */
+ int dc; /* decoded character */
+ eq = reg = nc = 0;
+ for (q=input, p=result; q<end_input; q++) {
+ unsigned char cq = * (unsigned char *)q;
+ /* Might want a 256 entry array instead of this sub-optimal mess
+ * eventually. */
+ dc = base64_table[cq];
+ eq += equal_table[cq];
+
+ if (dc >= 0) {
+ reg <<= 6;
+ reg += dc;
+ nc++;
+ if (nc == 4) {
+ *p++ = ((reg >> 16) & 0xff);
+ if (eq < 2) *p++ = ((reg >> 8) & 0xff);
+ if (eq < 1) *p++ = reg & 0xff;
+ nc = reg = 0;
+ if (eq) goto done_base_64;
+ }
+ }
+ }
+ done_base_64:
+ end_result = p;
+ }
+ break;
+ /*}}}*/
+ case ENC_UUENCODE:/*{{{*/
+ {
+ char *p, *q;
+ /* Find 'begin ' */
+ for (q = input; q < end_input - 6 && memcmp(q, "begin ", 6); q++)
+ ;
+ q += 6;
+ /* skip to EOL */
+ while (q < end_input && *q != '\n')
+ q++;
+ p = result;
+ while (q < end_input) { /* process line */
+#define DEC(c) (((c) - ' ') & 077)
+ int len = DEC(*q++);
+ if (len == 0)
+ break;
+ for (; len > 0; q += 4, len -= 3) {
+ if (len >= 3) {
+ *p++ = DEC(q[0]) << 2 | DEC(q[1]) >> 4;
+ *p++ = DEC(q[1]) << 4 | DEC(q[2]) >> 2;
+ *p++ = DEC(q[2]) << 6 | DEC(q[3]);
+ } else {
+ if (len >= 1)
+ *p++ = DEC(q[0]) << 2 | DEC(q[1]) >> 4;
+ if (len >= 2)
+ *p++ = DEC(q[1]) << 4 | DEC(q[2]) >> 2;
+ }
+ }
+ while (q < end_input && *q != '\n')
+ q++;
+ }
+ end_result = p;
+ }
+ break;
+ /*}}}*/
+ case ENC_UNKNOWN:/*{{{*/
+ fprintf(stderr, "Unknown encoding type in %s\n", format_msg_src(src));
+ /* fall through - ignore this data */
+ /*}}}*/
+ default:/*{{{*/
+ end_result = result;
+ break;
+ /*}}}*/
+ }
+ *output_len = end_result - result;
+ result[*output_len] = '\0'; /* for convenience with text/plain etc to make it printable */
+ return result;
+}
+/*}}}*/
+char *format_msg_src(struct msg_src *src)/*{{{*/
+{
+ static char *buffer = NULL;
+ static int buffer_len = 0;
+ char *result;
+ int len;
+ switch (src->type) {
+ case MS_FILE:
+ result = src->filename;
+ break;
+ case MS_MBOX:
+ len = strlen(src->filename);
+ len += 32;
+ if (!buffer || (len > buffer_len)) {
+ free(buffer);
+ buffer = new_array(char, len);
+ buffer_len = len;
+ }
+ sprintf(buffer, "%s[%d,%d)", src->filename,
+ (int) src->start, (int) (src->start + src->len));
+ result = buffer;
+ break;
+ default:
+ result = NULL;
+ break;
+ }
+ return result;
+}
+/*}}}*/
+static int split_and_splice_header(struct msg_src *src, char *data, struct line *header, char **body_start)/*{{{*/
+{
+ char *sol, *eol;
+ int blank_line;
+ header->next = header->prev = header;
+ sol = data;
+ do {
+ if (!*sol) break;
+ blank_line = 1; /* until proven otherwise */
+ eol = sol;
+ while (*eol && (*eol != '\n')) {
+ if (!isspace(*(unsigned char *) eol)) blank_line = 0;
+ eol++;
+ }
+ if (*eol == '\n') {
+ if (!blank_line) {
+ int line_length = eol - sol;
+ char *line_text = new_array(char, 1 + line_length);
+ struct line *new_header;
+
+ strncpy(line_text, sol, line_length);
+ line_text[line_length] = '\0';
+ new_header = new(struct line);
+ new_header->text = line_text;
+ enqueue(header, new_header);
+ }
+ sol = eol + 1; /* Start of next line */
+ } else { /* must be null char */
+ fprintf(stderr, "Got null character whilst processing header of %s\n",
+ format_msg_src(src));
+ return -1; /* & leak memory */
+ }
+ } while (!blank_line);
+
+ *body_start = sol;
+
+ if (audit_header(header)) {
+ splice_header_lines(header);
+ return 0;
+ } else {
+#if 0
+ /* Caller generates message */
+ fprintf(stderr, "Message had bad rfc822 headers, ignoring\n");
+#endif
+ return -1;
+ }
+}
+/*}}}*/
+
+/* Forward prototypes */
+static void do_multipart(struct msg_src *src, char *input, int input_len,
+ const char *boundary, struct attachment *atts,
+ enum data_to_rfc822_error *error);
+
+/*{{{ do_body() */
+static void do_body(struct msg_src *src,
+ char *body_start, int body_len,
+ struct nvp *ct_nvp, struct nvp *cte_nvp,
+ struct nvp *cd_nvp,
+ struct attachment *atts,
+ enum data_to_rfc822_error *error)
+{
+ char *decoded_body;
+ int decoded_body_len;
+ const char *content_transfer_encoding;
+ content_transfer_encoding = NULL;
+ if (cte_nvp) {
+ content_transfer_encoding = nvp_first(cte_nvp);
+ if (!content_transfer_encoding) {
+ fprintf(stderr, "Giving up on %s, content_transfer_encoding header not parseable\n",
+ format_msg_src(src));
+ return;
+ }
+ }
+
+ decoded_body = unencode_data(src, body_start, body_len, content_transfer_encoding, &decoded_body_len);
+
+ if (ct_nvp) {
+ struct content_type_header ct;
+ parse_content_type(ct_nvp, &ct);
+ if (ct.major && !strcasecmp(ct.major, "multipart")) {
+ do_multipart(src, decoded_body, decoded_body_len, ct.boundary, atts, error);
+ /* Don't need decoded body any longer - copies have been taken if
+ * required when handling multipart attachments. */
+ free(decoded_body);
+ if (error && (*error == DTR8_MISSING_END)) return;
+ } else {
+ /* unipart */
+ struct attachment *new_att;
+ const char *disposition;
+ new_att = new(struct attachment);
+ disposition = cd_nvp ? nvp_first(cd_nvp) : NULL;
+ if (disposition && !strcasecmp(disposition, "attachment")) {
+ const char *lookup;
+ lookup = nvp_lookupcase(cd_nvp, "filename");
+ if (lookup) {
+ new_att->filename = new_string(lookup);
+ } else {
+ /* Some messages have name=... in content-type: instead of
+ * filename=... in content-disposition. */
+ lookup = nvp_lookup(ct_nvp, "name");
+ if (lookup) {
+ new_att->filename = new_string(lookup);
+ } else {
+ new_att->filename = NULL;
+ }
+ }
+ } else {
+ new_att->filename = NULL;
+ }
+ if (ct.major && !strcasecmp(ct.major, "text")) {
+ if (ct.minor && !strcasecmp(ct.minor, "plain")) {
+ new_att->ct = CT_TEXT_PLAIN;
+ } else if (ct.minor && !strcasecmp(ct.minor, "html")) {
+ new_att->ct = CT_TEXT_HTML;
+ } else {
+ new_att->ct = CT_TEXT_OTHER;
+ }
+ } else if (ct.major && !strcasecmp(ct.major, "message") &&
+ ct.minor && !strcasecmp(ct.minor, "rfc822")) {
+ new_att->ct = CT_MESSAGE_RFC822;
+ } else {
+ new_att->ct = CT_OTHER;
+ }
+
+ if (new_att->ct == CT_MESSAGE_RFC822) {
+ new_att->data.rfc822 = data_to_rfc822(src, decoded_body, decoded_body_len, error);
+ free(decoded_body); /* data no longer needed */
+ } else {
+ new_att->data.normal.len = decoded_body_len;
+ new_att->data.normal.bytes = decoded_body;
+ }
+ enqueue(atts, new_att);
+ }
+ } else {
+ /* Treat as text/plain {{{*/
+ struct attachment *new_att;
+ new_att = new(struct attachment);
+ new_att->filename = NULL;
+ new_att->ct = CT_TEXT_PLAIN;
+ new_att->data.normal.len = decoded_body_len;
+ /* Add null termination on the end */
+ new_att->data.normal.bytes = new_array(char, decoded_body_len + 1);
+ memcpy(new_att->data.normal.bytes, decoded_body, decoded_body_len + 1);
+ free(decoded_body);
+ enqueue(atts, new_att);/*}}}*/
+ }
+}
+/*}}}*/
+/*{{{ do_attachment() */
+static void do_attachment(struct msg_src *src,
+ char *start, char *after_end,
+ struct attachment *atts)
+{
+ /* decode attachment and add to attachment list */
+ struct line header, *x, *nx;
+ char *body_start;
+ int body_len;
+
+ struct nvp *ct_nvp, *cte_nvp, *cd_nvp, *nvp;
+
+ if (split_and_splice_header(src, start, &header, &body_start) < 0) {
+ fprintf(stderr, "Giving up on attachment with bad header in %s\n",
+ format_msg_src(src));
+ return;
+ }
+
+ /* Extract key headers */
+ ct_nvp = cte_nvp = cd_nvp = NULL;
+ for (x=header.next; x!=&header; x=x->next) {
+ if ((nvp = make_nvp(src, x->text, "content-type:"))) {
+ ct_nvp = nvp;
+ } else if ((nvp = make_nvp(src, x->text, "content-transfer-encoding:"))) {
+ cte_nvp = nvp;
+ } else if ((nvp = make_nvp(src, x->text, "content-disposition:"))) {
+ cd_nvp = nvp;
+ }
+ }
+
+#if 0
+ if (ct_nvp) {
+ fprintf(stderr, "======\n");
+ fprintf(stderr, "Dump of content-type hdr\n");
+ nvp_dump(ct_nvp, stderr);
+ free(ct_nvp);
+ }
+
+ if (cte_nvp) {
+ fprintf(stderr, "======\n");
+ fprintf(stderr, "Dump of content-transfer-encoding hdr\n");
+ nvp_dump(cte_nvp, stderr);
+ free(cte_nvp);
+ }
+#endif
+
+ if (body_start > after_end) {
+ /* This is a (maliciously?) b0rken attachment, e.g. maybe empty */
+ if (verbose) {
+ fprintf(stderr, "Message %s contains an invalid attachment, length=%d bytes\n",
+ format_msg_src(src), (int)(after_end - start));
+ }
+ } else {
+ body_len = after_end - body_start;
+ /* Ignore errors in nested body parts. */
+ do_body(src, body_start, body_len, ct_nvp, cte_nvp, cd_nvp, atts, NULL);
+ }
+
+ /* Free header memory */
+ for (x=header.next; x!=&header; x=nx) {
+ nx = x->next;
+ free(x->text);
+ free(x);
+ }
+
+ if (ct_nvp) free_nvp(ct_nvp);
+ if (cte_nvp) free_nvp(cte_nvp);
+ if (cd_nvp) free_nvp(cd_nvp);
+}
+/*}}}*/
+/*{{{ do_multipart() */
+static void do_multipart(struct msg_src *src,
+ char *input, int input_len,
+ const char *boundary,
+ struct attachment *atts,
+ enum data_to_rfc822_error *error)
+{
+ char *b0, *b1, *be, *bx;
+ char *line_after_b0, *start_b1_search_from;
+ int boundary_len;
+ int looking_at_end_boundary;
+
+ if (!boundary) {
+ fprintf(stderr, "Can't process multipart message %s with no boundary string\n",
+ format_msg_src(src));
+ if (error) *error = DTR8_MULTIPART_SANS_BOUNDARY;
+ return;
+ }
+
+ boundary_len = strlen(boundary);
+
+ b0 = NULL;
+ line_after_b0 = input;
+ be = input + input_len;
+
+ do {
+ int boundary_ok;
+ start_b1_search_from = line_after_b0;
+ do {
+ /* reject boundaries that aren't a whole line */
+ b1 = NULL;
+ for (bx = start_b1_search_from; bx < be - (boundary_len + 4); bx++) {
+ if (bx[0] == '-' && bx[1] == '-' &&
+ !strncmp(bx+2, boundary, boundary_len)) {
+ b1 = bx;
+ break;
+ }
+ }
+ if (!b1) {
+ if (error)
+ *error = DTR8_MISSING_END;
+ return;
+ }
+
+ looking_at_end_boundary = (b1[boundary_len+2] == '-' &&
+ b1[boundary_len+3] == '-');
+ boundary_ok = 1;
+ if ((b1 > input) && (*(b1-1) != '\n'))
+ boundary_ok = 0;
+ if (!looking_at_end_boundary && (b1 + boundary_len + 2 < input + input_len) && (*(b1 + boundary_len + 2) != '\n'))
+ boundary_ok = 0;
+ if (!boundary_ok) {
+ char *eol = strchr(b1, '\n');
+ if (!eol) {
+ fprintf(stderr, "Oops, didn't find another normal boundary in %s\n",
+ format_msg_src(src));
+ return;
+ }
+ start_b1_search_from = 1 + eol;
+ }
+ } while (!boundary_ok);
+
+ /* b1 is now looking at a good boundary, which might be the final one */
+
+ if (b0) {
+ /* don't treat preamble as an attachment */
+ do_attachment(src, line_after_b0, b1, atts);
+ }
+
+ b0 = b1;
+ line_after_b0 = strchr(b0, '\n');
+ if (line_after_b0 == 0)
+ line_after_b0 = b0 + strlen(b0);
+ else
+ ++line_after_b0;
+ } while (b1 < be && !looking_at_end_boundary);
+}
+/*}}}*/
+static time_t parse_rfc822_date(char *date_string)/*{{{*/
+{
+ struct tm tm;
+ char *s, *z;
+ /* Format [weekday ,] day-of-month month year hour:minute:second timezone.
+
+ Some of the ideas, sanity checks etc taken from parse.c in the mutt
+ sources, credit to Michael R. Elkins et al
+ */
+
+ s = date_string;
+ z = strchr(s, ',');
+ if (z) s = z + 1;
+ while (*s && isspace(*s)) s++;
+ /* Should now be looking at day number */
+ if (!isdigit(*s)) goto tough_cheese;
+ tm.tm_mday = atoi(s);
+ if (tm.tm_mday > 31) goto tough_cheese;
+
+ while (isdigit(*s)) s++;
+ while (*s && isspace(*s)) s++;
+ if (!*s) goto tough_cheese;
+ if (!strncasecmp(s, "jan", 3)) tm.tm_mon = 0;
+ else if (!strncasecmp(s, "feb", 3)) tm.tm_mon = 1;
+ else if (!strncasecmp(s, "mar", 3)) tm.tm_mon = 2;
+ else if (!strncasecmp(s, "apr", 3)) tm.tm_mon = 3;
+ else if (!strncasecmp(s, "may", 3)) tm.tm_mon = 4;
+ else if (!strncasecmp(s, "jun", 3)) tm.tm_mon = 5;
+ else if (!strncasecmp(s, "jul", 3)) tm.tm_mon = 6;
+ else if (!strncasecmp(s, "aug", 3)) tm.tm_mon = 7;
+ else if (!strncasecmp(s, "sep", 3)) tm.tm_mon = 8;
+ else if (!strncasecmp(s, "oct", 3)) tm.tm_mon = 9;
+ else if (!strncasecmp(s, "nov", 3)) tm.tm_mon = 10;
+ else if (!strncasecmp(s, "dec", 3)) tm.tm_mon = 11;
+ else goto tough_cheese;
+
+ while (!isspace(*s)) s++;
+ while (*s && isspace(*s)) s++;
+ if (!isdigit(*s)) goto tough_cheese;
+ tm.tm_year = atoi(s);
+ if (tm.tm_year < 70) {
+ tm.tm_year += 100;
+ } else if (tm.tm_year >= 1900) {
+ tm.tm_year -= 1900;
+ }
+
+ while (isdigit(*s)) s++;
+ while (*s && isspace(*s)) s++;
+ if (!*s) goto tough_cheese;
+
+ /* Now looking at hms */
+ /* For now, forget this. The searching will be vague enough that nearest day is good enough. */
+
+ tm.tm_hour = 0;
+ tm.tm_min = 0;
+ tm.tm_sec = 0;
+ tm.tm_isdst = 0;
+ return mktime(&tm);
+
+tough_cheese:
+ return (time_t) -1; /* default value */
+}
+/*}}}*/
+
+static void scan_status_flags(const char *s, struct headers *hdrs)/*{{{*/
+{
+ const char *p;
+ for (p=s; *p; p++) {
+ switch (*p) {
+ case 'R': hdrs->flags.seen = 1; break;
+ case 'A': hdrs->flags.replied = 1; break;
+ case 'F': hdrs->flags.flagged = 1; break;
+ default: break;
+ }
+ }
+}
+/*}}}*/
+
+/*{{{ data_to_rfc822() */
+struct rfc822 *data_to_rfc822(struct msg_src *src,
+ char *data, int length,
+ enum data_to_rfc822_error *error)
+{
+ struct rfc822 *result;
+ char *body_start;
+ struct line header;
+ struct line *x, *nx;
+ struct nvp *ct_nvp, *cte_nvp, *cd_nvp, *nvp;
+ int body_len;
+
+ if (error) *error = DTR8_OK; /* default */
+ result = new(struct rfc822);
+ init_headers(&result->hdrs);
+ result->atts.next = result->atts.prev = &result->atts;
+
+ if (split_and_splice_header(src, data, &header, &body_start) < 0) {
+ if (verbose) {
+ fprintf(stderr, "Giving up on message %s with bad header\n",
+ format_msg_src(src));
+ }
+ if (error) *error = DTR8_BAD_HEADERS;
+ return NULL;
+ }
+
+ /* Extract key headers {{{*/
+ ct_nvp = cte_nvp = cd_nvp = NULL;
+ for (x=header.next; x!=&header; x=x->next) {
+ if (match_string("to", x->text))
+ copy_or_concat_header_value(&result->hdrs.to, x->text);
+ else if (match_string("cc", x->text))
+ copy_or_concat_header_value(&result->hdrs.cc, x->text);
+ else if (!result->hdrs.from && match_string("from", x->text))
+ result->hdrs.from = copy_header_value(x->text);
+ else if (!result->hdrs.subject && match_string("subject", x->text))
+ result->hdrs.subject = copy_header_value(x->text);
+ else if (!ct_nvp && (nvp = make_nvp(src, x->text, "content-type:")))
+ ct_nvp = nvp;
+ else if (!cte_nvp && (nvp = make_nvp(src, x->text, "content-transfer-encoding:")))
+ cte_nvp = nvp;
+ else if (!cd_nvp && (nvp = make_nvp(src, x->text, "content-disposition:")))
+ cd_nvp = nvp;
+ else if (!result->hdrs.date && match_string("date", x->text)) {
+ char *date_string = copy_header_value(x->text);
+ result->hdrs.date = parse_rfc822_date(date_string);
+ free(date_string);
+ } else if (!result->hdrs.message_id && match_string("message-id", x->text))
+ result->hdrs.message_id = copy_header_value(x->text);
+ else if (!result->hdrs.in_reply_to && match_string("in-reply-to", x->text))
+ result->hdrs.in_reply_to = copy_header_value(x->text);
+ else if (!result->hdrs.references && match_string("references", x->text))
+ result->hdrs.references = copy_header_value(x->text);
+ else if (match_string("status", x->text))
+ scan_status_flags(x->text + sizeof("status:"), &result->hdrs);
+ else if (match_string("x-status", x->text))
+ scan_status_flags(x->text + sizeof("x-status:"), &result->hdrs);
+ }
+/*}}}*/
+
+ /* Process body */
+ body_len = length - (body_start - data);
+ do_body(src, body_start, body_len, ct_nvp, cte_nvp, cd_nvp, &result->atts, error);
+
+ /* Free header memory */
+ for (x=header.next; x!=&header; x=nx) {
+ nx = x->next;
+ free(x->text);
+ free(x);
+ }
+
+ if (ct_nvp) free_nvp(ct_nvp);
+ if (cte_nvp) free_nvp(cte_nvp);
+ if (cd_nvp) free_nvp(cd_nvp);
+
+ return result;
+
+}
+/*}}}*/
+
+#define ALLOC_NONE 1
+#define ALLOC_MMAP 2
+#define ALLOC_MALLOC 3
+
+int data_alloc_type;
+
+#if USE_GZIP_MBOX || USE_BZIP_MBOX
+
+#define SIZE_STEP (8 * 1024 * 1024)
+
+#define COMPRESSION_NONE 0
+#define COMPRESSION_GZIP 1
+#define COMPRESSION_BZIP 2
+
+static int get_compression_type(const char *filename) {/*{{{*/
+ size_t len = strlen(filename);
+ int ptr;
+
+#ifdef USE_GZIP_MBOX
+ ptr = len - 3;
+ if (len > 3 && strncasecmp(filename + ptr, ".gz", 3) == 0) {
+ return COMPRESSION_GZIP;
+ }
+#endif
+
+#ifdef USE_BZIP_MBOX
+ ptr = len - 4;
+ if (len > 3 && strncasecmp(filename + ptr, ".bz2", 4) == 0) {
+ return COMPRESSION_BZIP;
+ }
+#endif
+
+ return COMPRESSION_NONE;
+}
+/*}}}*/
+
+static int is_compressed(const char *filename) {/*{{{*/
+ return (get_compression_type(filename) != COMPRESSION_NONE);
+}
+/*}}}*/
+
+struct zFile {/*{{{*/
+ union {
+ /* Both gzFile and BZFILE* are defined as void pointers
+ * in their respective header files.
+ */
+#ifdef USE_GZIP_MBOX
+ gzFile gzf;
+#endif
+#ifdef USE_BZIP_MBOX
+ BZFILE *bzf;
+#endif
+ void *zptr;
+ } foo;
+ int type;
+};
+/*}}}*/
+
+static struct zFile * xx_zopen(const char *filename, const char *mode) {/*{{{*/
+ struct zFile *zf = new(struct zFile);
+
+ zf->type = get_compression_type(filename);
+ switch (zf->type) {
+#ifdef USE_GZIP_MBOX
+ case COMPRESSION_GZIP:
+ zf->foo.gzf = gzopen(filename, "rb");
+ break;
+#endif
+#ifdef USE_BZIP_MBOX
+ case COMPRESSION_BZIP:
+ zf->foo.bzf = BZ2_bzopen(filename, "rb");
+ break;
+#endif
+ default:
+ zf->foo.zptr = NULL;
+ break;
+ }
+
+ if (!zf->foo.zptr) {
+ free(zf);
+ return 0;
+ }
+
+ return zf;
+}
+/*}}}*/
+static void xx_zclose(struct zFile *zf) {/*{{{*/
+ switch (zf->type) {
+#ifdef USE_GZIP_MBOX
+ case COMPRESSION_GZIP:
+ gzclose(zf->foo.gzf);
+ break;
+#endif
+#ifdef USE_BZIP_MBOX
+ case COMPRESSION_BZIP:
+ BZ2_bzclose(zf->foo.bzf);
+ break;
+#endif
+ default:
+ zf->foo.zptr = NULL;
+ break;
+ }
+ free(zf);
+}
+/*}}}*/
+static int xx_zread(struct zFile *zf, void *buf, int len) {/*{{{*/
+ switch (zf->type) {
+#ifdef USE_GZIP_MBOX
+ case COMPRESSION_GZIP:
+ return gzread(zf->foo.gzf, buf, len);
+ break;
+#endif
+#ifdef USE_BZIP_MBOX
+ case COMPRESSION_BZIP:
+ return BZ2_bzread(zf->foo.bzf, buf, len);
+ break;
+#endif
+ default:
+ return 0;
+ break;
+ }
+}
+/*}}}*/
+#endif
+
+#if USE_GZIP_MBOX || USE_BZIP_MBOX
+/* do we need ROCACHE_SIZE > 1? the code supports any number here */
+#define ROCACHE_SIZE 1
+struct ro_mapping {
+ char *filename;
+ unsigned char *map;
+ size_t len;
+};
+static int ro_cache_init = 0;
+static struct ro_mapping ro_mapping_cache[ROCACHE_SIZE];
+
+/* find a temp file in the mapping cache. If nothing is found lasti is
+ * set to the next slot to use for insertion. You have to check that slot
+ * to see if it is currently in use
+ */
+static struct ro_mapping *find_ro_cache(const char *filename, int *lasti)
+{
+ int i = 0;
+ struct ro_mapping *ro = NULL;
+ if (lasti)
+ *lasti = 0;
+ if (!ro_cache_init)
+ return NULL;
+ for (i = 0 ; i < ROCACHE_SIZE ; i++) {
+ ro = ro_mapping_cache + i;
+ if (!ro->map) {
+ if (lasti)
+ *lasti = i;
+ return NULL;
+ }
+ if (strcmp(filename, ro->filename) == 0)
+ return ro;
+ }
+ /* if we're here, the map is full. They will reuse slot 0 */
+ return NULL;
+}
+
+/*
+ * put a new tempfile into the cache. It is mmaped as part of this function
+ * so you can safely close the file handle after calling this.
+ */
+static struct ro_mapping *add_ro_cache(const char *filename, int fd, size_t len)
+{
+ int i = 0;
+ struct ro_mapping *ro = NULL;
+ if (!ro_cache_init) {
+ memset(&ro_mapping_cache, 0, sizeof(ro_mapping_cache));
+ ro_cache_init = 1;
+ }
+ ro = find_ro_cache(filename, &i);
+ if (ro) {
+ fprintf(stderr, "%s already in ro cache\n", filename);
+ return NULL;
+ }
+ ro = ro_mapping_cache + i;
+ if (ro->map) {
+ munmap(ro->map, ro->len);
+ ro->map = NULL;
+ free(ro->filename);
+ }
+ ro->map = (unsigned char *)mmap(0, len, PROT_READ, MAP_SHARED, fd, 0);
+ if (ro->map == MAP_FAILED) {
+ ro->map = NULL;
+ perror("rfc822:mmap");
+ return NULL;
+ }
+ ro->len = len;
+ ro->filename = new_string(filename);
+ return ro;
+}
+#endif /* USE_GZIP_MBOX || USE_BZIP_MBOX */
+
+void create_ro_mapping(const char *filename, unsigned char **data, int *len)/*{{{*/
+{
+ struct stat sb;
+ int fd;
+
+#if USE_GZIP_MBOX || USE_BZIP_MBOX
+ struct zFile *zf;
+#endif
+
+ if (stat(filename, &sb) < 0)
+ {
+ report_error("stat", filename);
+ *data = NULL;
+ return;
+ }
+
+#if USE_GZIP_MBOX || USE_BZIP_MBOX
+ if(is_compressed(filename)) {
+ unsigned char *p;
+ size_t cur_read;
+ struct ro_mapping *ro;
+ FILE *tmpf;
+
+ /* this branch never returns things that are freeable */
+ data_alloc_type = ALLOC_NONE;
+ ro = find_ro_cache(filename, NULL);
+ if (ro) {
+ *data = ro->map;
+ *len = ro->len;
+ return;
+ }
+
+ if(verbose) {
+ fprintf(stderr, "Decompressing %s...\n", filename);
+ }
+
+ tmpf = tmpfile();
+ if (!tmpf) {
+ perror("tmpfile");
+ goto comp_error;
+ }
+ zf = xx_zopen(filename, "rb");
+ if (!zf) {
+ fprintf(stderr, "Could not open %s\n", filename);
+ goto comp_error;
+ }
+ p = new_array(unsigned char, SIZE_STEP);
+ cur_read = xx_zread(zf, p, SIZE_STEP);
+ if (fwrite(p, cur_read, 1, tmpf) != 1) {
+ fprintf(stderr, "failed writing to temp file for %s\n", filename);
+ goto comp_error;
+ }
+ *len = cur_read;
+ if (cur_read >= SIZE_STEP) {
+ while(1) {
+ int ret;
+ cur_read = xx_zread(zf, p, SIZE_STEP);
+ if (cur_read <= 0)
+ break;
+ *len += cur_read;
+ ret = fwrite(p, cur_read, 1, tmpf);
+ if (ret != 1) {
+ fprintf(stderr, "failed writing to temp file for %s\n", filename);
+ goto comp_error;
+ }
+ }
+ }
+ free(p);
+ xx_zclose(zf);
+
+ if(*len > 0) {
+ ro = add_ro_cache(filename, fileno(tmpf), *len);
+ if (!ro)
+ goto comp_error;
+ *data = ro->map;
+ *len = ro->len;
+ } else {
+ *data = NULL;
+ }
+ fclose(tmpf);
+ return;
+
+comp_error:
+ *data = NULL;
+ *len = 0;
+ if (tmpf)
+ fclose(tmpf);
+ return;
+ }
+#endif /* USE_GZIP_MBOX || USE_BZIP_MBOX */
+
+ *len = sb.st_size;
+ if (*len == 0) {
+ *data = NULL;
+ return;
+ }
+
+ if (!S_ISREG(sb.st_mode)) {
+ *data = NULL;
+ return;
+ }
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ {
+ report_error("open", filename);
+ *data = NULL;
+ return;
+ }
+
+ *data = (unsigned char *) mmap(0, *len, PROT_READ, MAP_SHARED, fd, 0);
+ if (close(fd) < 0)
+ report_error("close", filename);
+ if (*data == MAP_FAILED) {
+ report_error("rfc822:mmap", filename);
+ *data = NULL;
+ return;
+ }
+ data_alloc_type = ALLOC_MMAP;
+}
+/*}}}*/
+void free_ro_mapping(unsigned char *data, int len)/*{{{*/
+{
+ int r;
+
+ if(data_alloc_type == ALLOC_MALLOC) {
+ free(data);
+ }
+
+ if(data_alloc_type == ALLOC_MMAP) {
+ r = munmap(data, len);
+ if(r < 0) {
+ fprintf(stderr, "munmap() errord\n");
+ exit(1);
+ }
+ }
+}
+/*}}}*/
+
+static struct msg_src *setup_msg_src(char *filename)/*{{{*/
+{
+ static struct msg_src result;
+ result.type = MS_FILE;
+ result.filename = filename;
+ return &result;
+}
+/*}}}*/
+struct rfc822 *make_rfc822(char *filename)/*{{{*/
+{
+ int len;
+ unsigned char *data;
+ struct rfc822 *result;
+
+ create_ro_mapping(filename, &data, &len);
+
+ /* Don't process empty files */
+ result = NULL;
+
+ if (data)
+ {
+ struct msg_src *src;
+ /* Now process the data */
+ src = setup_msg_src(filename);
+ /* For one message per file, ignore missing end boundary condition. */
+ result = data_to_rfc822(src, (char *) data, len, NULL);
+
+ free_ro_mapping(data, len);
+ }
+
+ return result;
+}
+/*}}}*/
+void free_rfc822(struct rfc822 *msg)/*{{{*/
+{
+ struct attachment *a, *na;
+
+ if (!msg) return;
+
+ if (msg->hdrs.to) free(msg->hdrs.to);
+ if (msg->hdrs.cc) free(msg->hdrs.cc);
+ if (msg->hdrs.from) free(msg->hdrs.from);
+ if (msg->hdrs.subject) free(msg->hdrs.subject);
+ if (msg->hdrs.message_id) free(msg->hdrs.message_id);
+ if (msg->hdrs.in_reply_to) free(msg->hdrs.in_reply_to);
+ if (msg->hdrs.references) free(msg->hdrs.references);
+
+ for (a = msg->atts.next; a != &msg->atts; a = na) {
+ na = a->next;
+ if (a->filename) free(a->filename);
+ if (a->ct == CT_MESSAGE_RFC822) {
+ free_rfc822(a->data.rfc822);
+ } else {
+ free(a->data.normal.bytes);
+ }
+ free(a);
+ }
+ free(msg);
+}
+/*}}}*/
+
+#ifdef TEST
+
+static void do_indent(int indent)/*{{{*/
+{
+ int i;
+ for (i=indent; i>0; i--) {
+ putchar(' ');
+ }
+}
+/*}}}*/
+static void show_header(char *tag, char *x, int indent)/*{{{*/
+{
+ if (x) {
+ do_indent(indent);
+ printf("%s: %s\n", tag, x);
+ }
+}
+/*}}}*/
+static void show_rfc822(struct rfc822 *msg, int indent)/*{{{*/
+{
+ struct attachment *a;
+ show_header("From", msg->hdrs.from, indent);
+ show_header("To", msg->hdrs.to, indent);
+ show_header("Cc", msg->hdrs.cc, indent);
+ show_header("Date", msg->hdrs.date, indent);
+ show_header("Subject", msg->hdrs.subject, indent);
+
+ for (a = msg->atts.next; a != &msg->atts; a=a->next) {
+ printf("========================\n");
+ switch (a->ct) {
+ case CT_TEXT_PLAIN: printf("Attachment type text/plain\n"); break;
+ case CT_TEXT_HTML: printf("Attachment type text/html\n"); break;
+ case CT_TEXT_OTHER: printf("Attachment type text/non-plain\n"); break;
+ case CT_MESSAGE_RFC822: printf("Attachment type message/rfc822\n"); break;
+ case CT_OTHER: printf("Attachment type other\n"); break;
+ }
+ if (a->ct != CT_MESSAGE_RFC822) {
+ printf("%d bytes\n", a->data.normal.len);
+ }
+ if ((a->ct == CT_TEXT_PLAIN) || (a->ct == CT_TEXT_HTML) || (a->ct == CT_TEXT_OTHER)) {
+ printf("----------\n");
+ printf("%s\n", a->data.normal.bytes);
+ }
+ if (a->ct == CT_MESSAGE_RFC822) {
+ show_rfc822(a->data.rfc822, indent + 4);
+ }
+ }
+}
+/*}}}*/
+
+int main (int argc, char **argv)/*{{{*/
+{
+ struct rfc822 *msg;
+
+ if (argc < 2) {
+ fprintf(stderr, "Need a path\n");
+ unlock_and_exit(2);
+ }
+
+ msg = make_rfc822(argv[1]);
+ show_rfc822(msg, 0);
+ free_rfc822(msg);
+
+ /* Print out some stuff */
+
+ return 0;
+}
+/*}}}*/
+#endif /* TEST */
diff --git a/src/mairix/search.c b/src/mairix/search.c
@@ -0,0 +1,1482 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+
+/* Lame fix for systems where NAME_MAX isn't defined after including the above
+ * set of .h files (Solaris, FreeBSD so far). Probably grossly oversized but
+ * it'll do. */
+
+#if !defined(NAME_MAX)
+#define NAME_MAX 4096
+#endif
+
+#include "mairix.h"
+#include "reader.h"
+#include "memmac.h"
+
+static void mark_hits_in_table(struct read_db *db, struct toktable_db *tt, int hit_tok, char *hits)/*{{{*/
+{
+ /* mark files containing matched token */
+ int idx;
+ unsigned char *j, *first_char;
+ idx = 0;
+ first_char = (unsigned char *) db->data + tt->enc_offsets[hit_tok];
+ for (j = first_char; *j != 0xff; ) {
+ idx += read_increment(&j);
+ assert(idx < db->n_msgs);
+ hits[idx] = 1;
+ }
+}
+/*}}}*/
+static void mark_hits_in_table2(struct read_db *db, struct toktable2_db *tt, int hit_tok, char *hits)/*{{{*/
+{
+ /* mark files containing matched token */
+ int idx;
+ unsigned char *j, *first_char;
+ idx = 0;
+ first_char = (unsigned char *) db->data + tt->enc1_offsets[hit_tok];
+ for (j = first_char; *j != 0xff; ) {
+ idx += read_increment(&j);
+ assert(idx < db->n_msgs);
+ hits[idx] = 1;
+ }
+}
+/*}}}*/
+
+/* See "Fast text searching with errors, Sun Wu and Udi Manber, TR 91-11,
+ University of Arizona. I have been informed that this algorithm is NOT
+ patented. This implementation of it is entirely the work of Richard P.
+ Curnow - I haven't looked at any related source (webglimpse, agrep etc) in
+ writing this.
+*/
+static void build_match_vector(char *substring, unsigned long *a, unsigned long *hit)/*{{{*/
+{
+ int len;
+ char *p;
+ int i;
+
+ len = strlen(substring);
+ if (len > 31 || len == 0) {
+ fprintf(stderr, "Can't match patterns longer than 31 characters or empty\n");
+ unlock_and_exit(2);
+ }
+ memset(a, 0xff, 256 * sizeof(unsigned long));
+ for (p=substring, i=0; *p; p++, i++) {
+ a[(unsigned int) *(unsigned char *)p] &= ~(1UL << i);
+ }
+ *hit = ~(1UL << (len-1));
+ return;
+}
+/*}}}*/
+static int substring_match_0(unsigned long *a, unsigned long hit, int left_anchor, char *token)/*{{{*/
+{
+ int got_hit=0;
+ char *p;
+ unsigned long r0;
+ unsigned long anchor, anchor1;
+
+ r0 = ~0;
+ got_hit = 0;
+ anchor = 0;
+ anchor1 = left_anchor ? 0x1 : 0x0;
+ for(p=token; *p; p++) {
+ int idx = (unsigned int) *(unsigned char *)p;
+ r0 = (r0<<1) | anchor | a[idx];
+ if (~(r0 | hit)) {
+ got_hit = 1;
+ break;
+ }
+ anchor = anchor1;
+ }
+ return got_hit;
+}
+/*}}}*/
+static int substring_match_1(unsigned long *a, unsigned long hit, int left_anchor, char *token)/*{{{*/
+{
+ int got_hit=0;
+ char *p;
+ unsigned long r0, r1, nr0;
+ unsigned long anchor, anchor1;
+
+ r0 = ~0;
+ r1 = r0<<1;
+ got_hit = 0;
+ anchor = 0;
+ anchor1 = left_anchor ? 0x1 : 0x0;
+ for(p=token; *p; p++) {
+ int idx = (unsigned int) *(unsigned char *)p;
+ nr0 = (r0<<1) | anchor | a[idx];
+ r1 = ((r1<<1) | anchor | a[idx]) & ((r0 & nr0) << 1) & r0;
+ r0 = nr0;
+ if (~((r0 & r1) | hit)) {
+ got_hit = 1;
+ break;
+ }
+ anchor = anchor1;
+ }
+ return got_hit;
+}
+/*}}}*/
+static int substring_match_2(unsigned long *a, unsigned long hit, int left_anchor, char *token)/*{{{*/
+{
+ int got_hit=0;
+ char *p;
+ unsigned long r0, r1, r2, nr0, nr1;
+ unsigned long anchor, anchor1;
+
+ r0 = ~0;
+ r1 = r0<<1;
+ r2 = r1<<1;
+ got_hit = 0;
+ anchor = 0;
+ anchor1 = left_anchor ? 0x1 : 0x0;
+ for(p=token; *p; p++) {
+ int idx = (unsigned int) *(unsigned char *)p;
+ nr0 = (r0<<1) | anchor | a[idx];
+ nr1 = ((r1<<1) | anchor | a[idx]) & ((r0 & nr0) << 1) & r0;
+ r2 = ((r2<<1) | anchor | a[idx]) & ((r1 & nr1) << 1) & r1;
+ r0 = nr0;
+ r1 = nr1;
+ if (~((r0 & r1 & r2) | hit)) {
+ got_hit = 1;
+ break;
+ }
+ anchor = anchor1;
+ }
+ return got_hit;
+}
+/*}}}*/
+static int substring_match_3(unsigned long *a, unsigned long hit, int left_anchor, char *token)/*{{{*/
+{
+ int got_hit=0;
+ char *p;
+ unsigned long r0, r1, r2, r3, nr0, nr1, nr2;
+ unsigned long anchor, anchor1;
+
+ r0 = ~0;
+ r1 = r0<<1;
+ r2 = r1<<1;
+ r3 = r2<<1;
+ got_hit = 0;
+ anchor = 0;
+ anchor1 = left_anchor ? 0x1 : 0x0;
+ for(p=token; *p; p++) {
+ int idx = (unsigned int) *(unsigned char *)p;
+ nr0 = (r0<<1) | anchor | a[idx];
+ nr1 = ((r1<<1) | anchor | a[idx]) & ((r0 & nr0) << 1) & r0;
+ nr2 = ((r2<<1) | anchor | a[idx]) & ((r1 & nr1) << 1) & r1;
+ r3 = ((r3<<1) | anchor | a[idx]) & ((r2 & nr2) << 1) & r2;
+ r0 = nr0;
+ r1 = nr1;
+ r2 = nr2;
+ if (~((r0 & r1 & r2 & r3) | hit)) {
+ got_hit = 1;
+ break;
+ }
+ anchor = anchor1;
+ }
+ return got_hit;
+}
+/*}}}*/
+static int substring_match_general(unsigned long *a, unsigned long hit, int left_anchor, char *token, int max_errors, unsigned long *r, unsigned long *nr)/*{{{*/
+{
+ int got_hit=0;
+ char *p;
+ int j;
+ unsigned long anchor, anchor1;
+
+ r[0] = ~0;
+ anchor = 0;
+ anchor1 = left_anchor ? 0x1 : 0x0;
+ for (j=1; j<=max_errors; j++) {
+ r[j] = r[j-1] << 1;
+ }
+ got_hit = 0;
+ for(p=token; *p; p++) {
+ int idx = (unsigned int) *(unsigned char *)p;
+ int d;
+ unsigned int compo;
+
+ compo = nr[0] = ((r[0]<<1) | anchor | a[idx]);
+ for (d=1; d<=max_errors; d++) {
+ nr[d] = ((r[d]<<1) | anchor | a[idx])
+ & ((r[d-1] & nr[d-1])<<1)
+ & r[d-1];
+ compo &= nr[d];
+ }
+ memcpy(r, nr, (1 + max_errors) * sizeof(unsigned long));
+ if (~(compo | hit)) {
+ got_hit = 1;
+ break;
+ }
+ anchor = anchor1;
+ }
+ return got_hit;
+}
+/*}}}*/
+
+static void match_substring_in_table(struct read_db *db, struct toktable_db *tt, char *substring, int max_errors, int left_anchor, char *hits)/*{{{*/
+{
+
+ int i, got_hit;
+ unsigned long a[256];
+ unsigned long *r=NULL, *nr=NULL;
+ unsigned long hit;
+ char *token;
+
+ build_match_vector(substring, a, &hit);
+
+ got_hit = 0;
+ if (max_errors > 3) {
+ r = new_array(unsigned long, 1 + max_errors);
+ nr = new_array(unsigned long, 1 + max_errors);
+ }
+ for (i=0; i<tt->n; i++) {
+ token = db->data + tt->tok_offsets[i];
+ switch (max_errors) {
+ /* Optimise common cases for few errors to allow optimizer to keep bitmaps
+ * in registers */
+ case 0:
+ got_hit = substring_match_0(a, hit, left_anchor, token);
+ break;
+ case 1:
+ got_hit = substring_match_1(a, hit, left_anchor, token);
+ break;
+ case 2:
+ got_hit = substring_match_2(a, hit, left_anchor, token);
+ break;
+ case 3:
+ got_hit = substring_match_3(a, hit, left_anchor, token);
+ break;
+ default:
+ got_hit = substring_match_general(a, hit, left_anchor, token, max_errors, r, nr);
+ break;
+ }
+ if (got_hit) {
+ mark_hits_in_table(db, tt, i, hits);
+ }
+ }
+ if (r) free(r);
+ if (nr) free(nr);
+}
+/*}}}*/
+static void match_substring_in_table2(struct read_db *db, struct toktable2_db *tt, char *substring, int max_errors, int left_anchor, char *hits)/*{{{*/
+{
+
+ int i, got_hit;
+ unsigned long a[256];
+ unsigned long *r=NULL, *nr=NULL;
+ unsigned long hit;
+ char *token;
+
+ build_match_vector(substring, a, &hit);
+
+ got_hit = 0;
+ if (max_errors > 3) {
+ r = new_array(unsigned long, 1 + max_errors);
+ nr = new_array(unsigned long, 1 + max_errors);
+ }
+ for (i=0; i<tt->n; i++) {
+ token = db->data + tt->tok_offsets[i];
+ switch (max_errors) {
+ /* Optimise common cases for few errors to allow optimizer to keep bitmaps
+ * in registers */
+ case 0:
+ got_hit = substring_match_0(a, hit, left_anchor, token);
+ break;
+ case 1:
+ got_hit = substring_match_1(a, hit, left_anchor, token);
+ break;
+ case 2:
+ got_hit = substring_match_2(a, hit, left_anchor, token);
+ break;
+ case 3:
+ got_hit = substring_match_3(a, hit, left_anchor, token);
+ break;
+ default:
+ got_hit = substring_match_general(a, hit, left_anchor, token, max_errors, r, nr);
+ break;
+ }
+ if (got_hit) {
+ mark_hits_in_table2(db, tt, i, hits);
+ }
+ }
+ if (r) free(r);
+ if (nr) free(nr);
+}
+/*}}}*/
+static void match_substring_in_paths(struct read_db *db, char *substring, int max_errors, int left_anchor, char *hits)/*{{{*/
+{
+
+ int i;
+ unsigned long a[256];
+ unsigned long *r=NULL, *nr=NULL;
+ unsigned long hit;
+
+ build_match_vector(substring, a, &hit);
+
+ if (max_errors > 3) {
+ r = new_array(unsigned long, 1 + max_errors);
+ nr = new_array(unsigned long, 1 + max_errors);
+ }
+ for (i=0; i<db->n_msgs; i++) {
+ char *token = NULL;
+ unsigned int mbix, msgix;
+ switch (rd_msg_type(db, i)) {
+ case DB_MSG_FILE:
+ token = db->data + db->path_offsets[i];
+ break;
+ case DB_MSG_MBOX:
+ decode_mbox_indices(db->path_offsets[i], &mbix, &msgix);
+ token = db->data + db->mbox_paths_table[mbix];
+ break;
+ case DB_MSG_DEAD:
+ hits[i] = 0; /* never match on dead paths */
+ goto next_message;
+ }
+
+ assert(token);
+
+ switch (max_errors) {
+ /* Optimise common cases for few errors to allow optimizer to keep bitmaps
+ * in registers */
+ case 0:
+ hits[i] = substring_match_0(a, hit, left_anchor, token);
+ break;
+ case 1:
+ hits[i] = substring_match_1(a, hit, left_anchor, token);
+ break;
+ case 2:
+ hits[i] = substring_match_2(a, hit, left_anchor, token);
+ break;
+ case 3:
+ hits[i] = substring_match_3(a, hit, left_anchor, token);
+ break;
+ default:
+ hits[i] = substring_match_general(a, hit, left_anchor, token, max_errors, r, nr);
+ break;
+ }
+next_message:
+ (void) 0;
+ }
+
+ if (r) free(r);
+ if (nr) free(nr);
+}
+/*}}}*/
+static void match_string_in_table(struct read_db *db, struct toktable_db *tt, char *key, char *hits)/*{{{*/
+{
+ /* TODO : replace with binary search? */
+ int i;
+
+ for (i=0; i<tt->n; i++) {
+ if (!strcmp(key, db->data + tt->tok_offsets[i])) {
+ /* get all matching files */
+ mark_hits_in_table(db, tt, i, hits);
+ }
+ }
+}
+/*}}}*/
+static void match_string_in_table2(struct read_db *db, struct toktable2_db *tt, char *key, char *hits)/*{{{*/
+{
+ /* TODO : replace with binary search? */
+ int i;
+
+ for (i=0; i<tt->n; i++) {
+ if (!strcmp(key, db->data + tt->tok_offsets[i])) {
+ /* get all matching files */
+ mark_hits_in_table2(db, tt, i, hits);
+ }
+ }
+}
+/*}}}*/
+static int parse_size_expr(char *x)/*{{{*/
+{
+ int result;
+ int n;
+
+ if (1 == sscanf(x, "%d%n", &result, &n)) {
+ x += n;
+ switch (*x) {
+ case 'k':
+ case 'K':
+ result <<= 10;
+ break;
+ case 'm':
+ case 'M':
+ result <<= 20;
+ break;
+ default:
+ break;
+ }
+
+ return result;
+ } else {
+ fprintf(stderr, "Could not parse message size expression <%s>\n", x);
+ return -1;
+ }
+}
+/*}}}*/
+static void parse_size_range(char *size_expr, int *has_start, int *start, int *has_end, int *end)/*{{{*/
+{
+ char *x = size_expr;
+ char *dash;
+ int len;
+
+ if (*x == ':') x++;
+ len = strlen(x);
+ dash = strchr(x, '-');
+ *has_start = *has_end = 0;
+ if (dash) {
+ char *p, *q;
+ if (dash > x) {
+ char *s;
+ s = new_array(char, dash - x + 1);
+ for (p=s, q=x; q<dash; ) *p++ = *q++;
+ *p = 0;
+ *start = parse_size_expr(s);
+ *has_start = 1;
+ free(s);
+ }
+ if (dash[1]) { /* dash not at end of arg */
+ char *e;
+ e = new_array(char, (x + len) - dash);
+ for (p=e, q=dash+1; *q; ) *p++ = *q++;
+ *p = 0;
+ *end = parse_size_expr(e);
+ *has_end = 1;
+ free(e);
+ }
+ } else {
+ *has_start = 0;
+ *end = parse_size_expr(size_expr);
+ *has_end = 1;
+ }
+ return;
+}
+/*}}}*/
+static void find_size_matches_in_table(struct read_db *db, char *size_expr, char *hits)/*{{{*/
+{
+ int start, end;
+ int has_start, has_end, start_cond, end_cond;
+ int i;
+
+ start = end = -1; /* avoid compiler warning about uninitialised variables. */
+ parse_size_range(size_expr, &has_start, &start, &has_end, &end);
+ if (has_start && has_end) {
+ /* Allow user to put the endpoints in backwards */
+ if (start > end) {
+ int temp = start;
+ start = end;
+ end = temp;
+ }
+ }
+
+ for (i=0; i<db->n_msgs; i++) {
+ start_cond = has_start ? (db->size_table[i] > start) : 1;
+ end_cond = has_end ? (db->size_table[i] < end ) : 1;
+ if (start_cond && end_cond) {
+ hits[i] = 1;
+ }
+ }
+}
+/*}}}*/
+static void find_date_matches_in_table(struct read_db *db, char *date_expr, char *hits)/*{{{*/
+{
+ time_t start, end;
+ int has_start, has_end, start_cond, end_cond;
+ int i;
+ int status;
+
+ status = scan_date_string(date_expr, &start, &has_start, &end, &has_end);
+ if (status) {
+ unlock_and_exit (2);
+ }
+
+ if (has_start && has_end) {
+ /* Allow user to put the endpoints in backwards */
+ if (start > end) {
+ time_t temp = start;
+ start = end;
+ end = temp;
+ }
+ }
+
+ for (i=0; i<db->n_msgs; i++) {
+ start_cond = has_start ? (db->date_table[i] > start) : 1;
+ end_cond = has_end ? (db->date_table[i] < end ) : 1;
+ if (start_cond && end_cond) {
+ hits[i] = 1;
+ }
+ }
+}
+/*}}}*/
+static void find_flag_matches_in_table(struct read_db *db, char *flag_expr, char *hits)/*{{{*/
+{
+ int pos_seen, neg_seen;
+ int pos_replied, neg_replied;
+ int pos_flagged, neg_flagged;
+ int negate;
+ char *p;
+ int i;
+
+ negate = 0;
+ pos_seen = neg_seen = 0;
+ pos_replied = neg_replied = 0;
+ pos_flagged = neg_flagged = 0;
+ for (p=flag_expr; *p; p++) {
+ switch (*p) {
+ case '-':
+ negate = 1;
+ break;
+ case 's':
+ case 'S':
+ if (negate) neg_seen = 1;
+ else pos_seen = 1;
+ negate = 0;
+ break;
+ case 'r':
+ case 'R':
+ if (negate) neg_replied = 1;
+ else pos_replied = 1;
+ negate = 0;
+ break;
+ case 'f':
+ case 'F':
+ if (negate) neg_flagged = 1;
+ else pos_flagged = 1;
+ negate = 0;
+ break;
+ default:
+ fprintf(stderr, "Did not understand the character '%c' (0x%02x) in the flags argument F:%s\n",
+ isprint(*p) ? *p : '.',
+ (int) *(unsigned char *) p,
+ flag_expr);
+ break;
+ }
+ }
+
+ for (i=0; i<db->n_msgs; i++) {
+ if ((!pos_seen || (db->msg_type_and_flags[i] & FLAG_SEEN)) &&
+ (!neg_seen || !(db->msg_type_and_flags[i] & FLAG_SEEN)) &&
+ (!pos_replied || (db->msg_type_and_flags[i] & FLAG_REPLIED)) &&
+ (!neg_replied || !(db->msg_type_and_flags[i] & FLAG_REPLIED)) &&
+ (!pos_flagged || (db->msg_type_and_flags[i] & FLAG_FLAGGED)) &&
+ (!neg_flagged || !(db->msg_type_and_flags[i] & FLAG_FLAGGED))) {
+ hits[i] = 1;
+ }
+ }
+}
+/*}}}*/
+
+static char *mk_maildir_path(int token, char *output_dir, int is_in_new,
+ int is_seen, int is_replied, int is_flagged)/*{{{*/
+{
+ char *result;
+ char uniq_buf[48];
+ int len;
+
+ len = strlen(output_dir) + 64; /* oversize */
+ result = new_array(char, len + 1 + sizeof(":2,FRS"));
+ strcpy(result, output_dir);
+ strcat(result, is_in_new ? "/new/" : "/cur/");
+ sprintf(uniq_buf, "123456789.%d.mairix", token);
+ strcat(result, uniq_buf);
+ if (is_seen || is_replied || is_flagged) {
+ strcat(result, ":2,");
+ }
+ if (is_flagged) strcat(result, "F");
+ if (is_replied) strcat(result, "R");
+ if (is_seen) strcat(result, "S");
+ return result;
+}
+/*}}}*/
+static char *mk_mh_path(int token, char *output_dir)/*{{{*/
+{
+ char *result;
+ char uniq_buf[8];
+ int len;
+
+ len = strlen(output_dir) + 10; /* oversize */
+ result = new_array(char, len);
+ strcpy(result, output_dir);
+ strcat(result, "/");
+ sprintf(uniq_buf, "%d", token+1);
+ strcat(result, uniq_buf);
+ return result;
+}
+/*}}}*/
+static int looks_like_maildir_new_p(const char *p)/*{{{*/
+{
+ const char *s1, *s2;
+ s2 = p;
+ while (*s2) s2++;
+ while ((s2 > p) && (*s2 != '/')) s2--;
+ if (s2 <= p) return 0;
+ s1 = s2 - 1;
+ while ((s1 > p) && (*s1 != '/')) s1--;
+ if (s1 <= p) return 0;
+ if (!strncmp(s1, "/new/", 5)) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+/*}}}*/
+static void create_symlink(char *link_target, char *new_link)/*{{{*/
+{
+ if ((!do_hardlinks && symlink(link_target, new_link) < 0) || link(link_target, new_link)) {
+ if (verbose) {
+ perror("symlink");
+ fprintf(stderr, "Failed path <%s> -> <%s>\n", link_target, new_link);
+ }
+ }
+}
+/*}}}*/
+static void mbox_terminate(const unsigned char *data, int len, FILE *out)/*{{{*/
+{
+ if (len == 0)
+ fputs("\n", out);
+ else if (len == 1) {
+ if (data[0] != '\n')
+ fputs("\n", out);
+ }
+ else if (data[len-1] != '\n')
+ fputs("\n\n", out);
+ else if (data[len-2] != '\n')
+ fputs("\n", out);
+}
+/*}}}*/
+static void append_file_to_mbox(const char *path, FILE *out)/*{{{*/
+{
+ unsigned char *data;
+ int len;
+ create_ro_mapping(path, &data, &len);
+ if (data) {
+ fprintf(out, "From mairix@mairix Mon Jan 1 12:34:56 1970\n");
+ fprintf(out, "X-source-folder: %s\n", path);
+ fwrite (data, sizeof(unsigned char), len, out);
+ mbox_terminate(data, len, out);
+ free_ro_mapping(data, len);
+ }
+ return;
+}
+/*}}}*/
+
+static int had_failed_checksum;
+
+static void get_validated_mbox_msg(struct read_db *db, int msg_index,/*{{{*/
+ int *mbox_index,
+ unsigned char **mbox_data, int *mbox_len,
+ unsigned char **msg_data, int *msg_len)
+{
+ /* msg_data==NULL if checksum mismatches */
+ unsigned char *start;
+ checksum_t csum;
+ unsigned int mbi, msgi;
+
+ *msg_data = NULL;
+ *msg_len = 0;
+
+ decode_mbox_indices(db->path_offsets[msg_index], &mbi, &msgi);
+ *mbox_index = mbi;
+
+ create_ro_mapping(db->data + db->mbox_paths_table[mbi], mbox_data, mbox_len);
+ if (!*mbox_data) return;
+
+ start = *mbox_data + db->mtime_table[msg_index];
+
+ /* Ensure that we don't run off the end of the mmap'd file */
+ if (db->mtime_table[msg_index] >= *mbox_len)
+ *msg_len = 0;
+ else if (db->mtime_table[msg_index] + db->size_table[msg_index] >= *mbox_len)
+ *msg_len = *mbox_len - db->mtime_table[msg_index];
+ else
+ *msg_len = db->size_table[msg_index];
+
+ compute_checksum((char *)start, *msg_len, &csum);
+ if (!memcmp((db->data + db->mbox_checksum_table[mbi] + (msgi * sizeof(checksum_t))), &csum, sizeof(checksum_t))) {
+ *msg_data = start;
+ } else {
+ had_failed_checksum = 1;
+ }
+ return;
+}
+/*}}}*/
+static void append_mboxmsg_to_mbox(struct read_db *db, int msg_index, FILE *out)/*{{{*/
+{
+ /* Need to common up code with try_copy_to_path */
+ unsigned char *mbox_start, *msg_start;
+ int mbox_len, msg_len;
+ int mbox_index;
+
+ get_validated_mbox_msg(db, msg_index, &mbox_index, &mbox_start, &mbox_len, &msg_start, &msg_len);
+ if (msg_start) {
+ /* Artificial from line, we don't have the envelope sender so this is
+ going to be artificial anyway. */
+ fprintf(out, "From mairix@mairix Mon Jan 1 12:34:56 1970\n");
+ fprintf(out, "X-source-folder: %s\n",
+ db->data + db->mbox_paths_table[mbox_index]);
+ fwrite(msg_start, sizeof(unsigned char), msg_len, out);
+ mbox_terminate(msg_start, msg_len, out);
+ }
+ if (mbox_start) {
+ free_ro_mapping(mbox_start, mbox_len);
+ }
+}
+/*}}}*/
+static void try_copy_to_path(struct read_db *db, int msg_index, char *target_path)/*{{{*/
+{
+ unsigned char *data;
+ int mbox_len, msg_len;
+ int mbi;
+ FILE *out;
+ unsigned char *start;
+
+ get_validated_mbox_msg(db, msg_index, &mbi, &data, &mbox_len, &start, &msg_len);
+
+ if (start) {
+ out = fopen(target_path, "wb");
+ if (out) {
+ fprintf(out, "X-source-folder: %s\n",
+ db->data + db->mbox_paths_table[mbi]);
+ fwrite(start, sizeof(char), msg_len?msg_len-1:0, out);
+ fclose(out);
+ }
+ }
+
+ if (data) {
+ free_ro_mapping(data, mbox_len);
+ }
+ return;
+}
+/*}}}*/
+static struct msg_src *setup_mbox_msg_src(char *filename, off_t start, size_t len)/*{{{*/
+{
+ static struct msg_src result;
+ result.type = MS_MBOX;
+ result.filename = filename;
+ result.start = start;
+ result.len = len;
+ return &result;
+}
+/*}}}*/
+
+static void get_flags_from_file(struct read_db *db, int idx, int *is_seen, int *is_replied, int *is_flagged)
+{
+ *is_seen = (db->msg_type_and_flags[idx] & FLAG_SEEN) ? 1 : 0;
+ *is_replied = (db->msg_type_and_flags[idx] & FLAG_REPLIED) ? 1 : 0;
+ *is_flagged = (db->msg_type_and_flags[idx] & FLAG_FLAGGED) ? 1 : 0;
+}
+
+static void string_tolower(char *str)
+{
+ char *p;
+ for (p=str; *p; p++) {
+ *p = tolower(*(unsigned char *)p);
+ }
+}
+
+static int do_search(struct read_db *db, char **args, char *output_path, int show_threads, enum folder_type ft, int verbose)/*{{{*/
+{
+ char *colon, *start_words;
+ int do_body, do_subject, do_from, do_to, do_cc, do_date, do_size;
+ int do_att_name;
+ int do_flags;
+ int do_path, do_msgid;
+ char *key;
+ char *hit0, *hit1, *hit2, *hit3;
+ int i;
+ int n_hits;
+ int left_anchor;
+
+ had_failed_checksum = 0;
+
+ hit0 = new_array(char, db->n_msgs);
+ hit1 = new_array(char, db->n_msgs);
+ hit2 = new_array(char, db->n_msgs);
+ hit3 = new_array(char, db->n_msgs);
+
+ /* Argument structure is
+ * x:tokena+tokenb,~tokenc,tokend+tokene
+ *
+ * + (and) binds more tightly than ,
+ * , (or) binds more tightly than separate args
+ *
+ *
+ * hit1 gathers the tokens and'ed with +
+ * hit2 gathers the tokens or'ed with ,
+ * hit3 gathers the separate args and'ed with <gap>
+ * */
+
+
+ /* Everything matches until proven otherwise */
+ memset(hit3, 1, db->n_msgs);
+
+ while (*args) {
+ /* key is a single argument, separate args are and-ed together */
+ key = *args++;
+
+ memset(hit2, 0, db->n_msgs);
+ memset(hit1, 1, db->n_msgs);
+
+ do_to = 0;
+ do_cc = 0;
+ do_from = 0;
+ do_subject = 0;
+ do_body = 0;
+ do_date = 0;
+ do_size = 0;
+ do_path = 0;
+ do_msgid = 0;
+ do_att_name = 0;
+ do_flags = 0;
+
+ colon = strchr(key, ':');
+
+ if (colon) {
+ char *p;
+ for (p=key; p<colon; p++) {
+ switch(*p) {
+ case 'b': do_body = 1; break;
+ case 's': do_subject = 1; break;
+ case 't': do_to = 1; break;
+ case 'c': do_cc = 1; break;
+ case 'f': do_from = 1; break;
+ case 'r': do_to = do_cc = 1; break;
+ case 'a': do_to = do_cc = do_from = 1; break;
+ case 'd': do_date = 1; break;
+ case 'z': do_size = 1; break;
+ case 'p': do_path = 1; break;
+ case 'm': do_msgid = 1; break;
+ case 'n': do_att_name = 1; break;
+ case 'F': do_flags = 1; break;
+ default: fprintf(stderr, "Unknown key type <%c>\n", *p); break;
+ }
+ }
+ if (do_msgid && (p-key) > 1) {
+ fprintf(stderr, "Message-ID key <m> can't be used with other keys\n");
+ unlock_and_exit(2);
+ }
+ start_words = 1 + colon;
+ } else {
+ do_body = do_subject = do_to = do_cc = do_from = 1;
+ start_words = key;
+ }
+
+ if (do_date || do_size || do_flags) {
+ memset(hit0, 0, db->n_msgs);
+ if (do_date) {
+ find_date_matches_in_table(db, start_words, hit0);
+ } else if (do_size) {
+ find_size_matches_in_table(db, start_words, hit0);
+ } else if (do_flags) {
+ find_flag_matches_in_table(db, start_words, hit0);
+ }
+
+ /* AND-combine match vectors */
+ for (i=0; i<db->n_msgs; i++) {
+ hit1[i] &= hit0[i];
+ }
+ } else if (do_msgid) {
+ char *lower_word = new_string(start_words);
+ string_tolower(lower_word);
+ memset(hit0, 0, db->n_msgs);
+ match_string_in_table2(db, &db->msg_ids, lower_word, hit0);
+ free(lower_word);
+ /* AND-combine match vectors */
+ for (i=0; i<db->n_msgs; i++) {
+ hit1[i] &= hit0[i];
+ }
+ } else {
+/*{{{ Scan over separate words within this argument */
+
+ do {
+ /* / = 'or' separator
+ * , = 'and' separator */
+ char *orsep;
+ char *andsep;
+ char *word, *orig_word, *lower_word;
+ char *equal;
+ char *p;
+ int negate;
+ int had_orsep;
+ int max_errors;
+
+ orsep = strchr(start_words, '/');
+ andsep = strchr(start_words, ',');
+ had_orsep = 0;
+
+ if (andsep && (!orsep || (andsep < orsep))) {
+ char *p, *q;
+ word = new_array(char, 1 + (andsep - start_words)); /* maybe oversize */
+ for (p=word, q=start_words; q < andsep; q++) {
+ if (!isspace(*(unsigned char *)q)) {
+ *p++ = *q;
+ }
+ }
+ *p = 0;
+ start_words = andsep + 1;
+ } else if (orsep) { /* comes before + if there's a + */
+ char *p, *q;
+ word = new_array(char, 1 + (orsep - start_words)); /* maybe oversize */
+ for (p=word, q=start_words; q < orsep; q++) {
+ if (!isspace(*(unsigned char *)q)) {
+ *p++ = *q;
+ }
+ }
+ *p = 0;
+ start_words = orsep + 1;
+ had_orsep = 1;
+
+ } else {
+ word = new_string(start_words);
+ while (*start_words) ++start_words;
+ }
+
+ orig_word = word;
+
+ if (word[0] == '~') {
+ negate = 1;
+ word++;
+ } else {
+ negate = 0;
+ }
+
+ if (word[0] == '^') {
+ left_anchor = 1;
+ word++;
+ } else {
+ left_anchor = 0;
+ }
+
+ equal = strchr(word, '=');
+ if (equal) {
+ *equal = 0;
+ max_errors = atoi(equal + 1);
+ /* Extend this to do anchoring etc */
+ } else {
+ max_errors = 0; /* keep GCC quiet */
+ }
+
+ /* Canonicalise search string to lowercase, since the database has all
+ * tokens handled that way. But not for path search! */
+ lower_word = new_string(word);
+ string_tolower(lower_word);
+
+ memset(hit0, 0, db->n_msgs);
+ if (equal) {
+ if (do_to) match_substring_in_table(db, &db->to, lower_word, max_errors, left_anchor, hit0);
+ if (do_cc) match_substring_in_table(db, &db->cc, lower_word, max_errors, left_anchor, hit0);
+ if (do_from) match_substring_in_table(db, &db->from, lower_word, max_errors, left_anchor, hit0);
+ if (do_subject) match_substring_in_table(db, &db->subject, lower_word, max_errors, left_anchor, hit0);
+ if (do_body) match_substring_in_table(db, &db->body, lower_word, max_errors, left_anchor, hit0);
+ if (do_att_name) match_substring_in_table(db, &db->attachment_name, lower_word, max_errors, left_anchor, hit0);
+ if (do_path) match_substring_in_paths(db, word, max_errors, left_anchor, hit0);
+ } else {
+ if (do_to) match_string_in_table(db, &db->to, lower_word, hit0);
+ if (do_cc) match_string_in_table(db, &db->cc, lower_word, hit0);
+ if (do_from) match_string_in_table(db, &db->from, lower_word, hit0);
+ if (do_subject) match_string_in_table(db, &db->subject, lower_word, hit0);
+ if (do_body) match_string_in_table(db, &db->body, lower_word, hit0);
+ if (do_att_name) match_string_in_table(db, &db->attachment_name, lower_word, hit0);
+ /* FIXME */
+ if (do_path) match_substring_in_paths(db, word, 0, left_anchor, hit0);
+ }
+
+ free(lower_word);
+
+ /* AND-combine match vectors */
+ for (i=0; i<db->n_msgs; i++) {
+ if (negate) {
+ hit1[i] &= !hit0[i];
+ } else {
+ hit1[i] &= hit0[i];
+ }
+ }
+
+ if (had_orsep) {
+ /* OR-combine match vectors */
+ for (i=0; i<db->n_msgs; i++) {
+ hit2[i] |= hit1[i];
+ }
+ memset(hit1, 1, db->n_msgs);
+ }
+
+ free(orig_word);
+
+ } while (*start_words);
+/*}}}*/
+ }
+
+ /* OR-combine match vectors */
+ for (i=0; i<db->n_msgs; i++) {
+ hit2[i] |= hit1[i];
+ }
+
+ /* AND-combine match vectors */
+ for (i=0; i<db->n_msgs; i++) {
+ hit3[i] &= hit2[i];
+ }
+ }
+
+ n_hits = 0;
+
+ if (show_threads) {/*{{{*/
+ char *tids;
+ tids = new_array(char, db->n_msgs);
+ memset(tids, 0, db->n_msgs);
+ for (i=0; i<db->n_msgs; i++) {
+ if (hit3[i]) {
+ tids[db->tid_table[i]] = 1;
+ }
+ }
+ for (i=0; i<db->n_msgs; i++) {
+ if (tids[db->tid_table[i]]) {
+ hit3[i] = 1;
+ }
+ }
+ free(tids);
+ }
+/*}}}*/
+ switch (ft) {
+ case FT_MAILDIR:/*{{{*/
+ for (i=0; i<db->n_msgs; i++) {
+ if (hit3[i]) {
+ int is_seen, is_replied, is_flagged;
+ get_flags_from_file(db, i, &is_seen, &is_replied, &is_flagged);
+ switch (rd_msg_type(db, i)) {
+ case DB_MSG_FILE:
+ {
+ char *target_path;
+ char *message_path;
+ int is_in_new;
+ message_path = db->data + db->path_offsets[i];
+ is_in_new = looks_like_maildir_new_p(message_path);
+ target_path = mk_maildir_path(i, output_path, is_in_new, is_seen, is_replied, is_flagged);
+ create_symlink(message_path, target_path);
+ free(target_path);
+ ++n_hits;
+ }
+ break;
+ case DB_MSG_MBOX:
+ {
+ char *target_path = mk_maildir_path(i, output_path, !is_seen, is_seen, is_replied, is_flagged);
+ try_copy_to_path(db, i, target_path);
+ free(target_path);
+ ++n_hits;
+ }
+ break;
+ case DB_MSG_DEAD:
+ break;
+ }
+ }
+ }
+ break;
+/*}}}*/
+ case FT_MH:/*{{{*/
+ for (i=0; i<db->n_msgs; i++) {
+ if (hit3[i]) {
+ switch (rd_msg_type(db, i)) {
+ case DB_MSG_FILE:
+ {
+ char *target_path = mk_mh_path(i, output_path);
+ create_symlink(db->data + db->path_offsets[i], target_path);
+ free(target_path);
+ ++n_hits;
+ }
+ break;
+ case DB_MSG_MBOX:
+ {
+ char *target_path = mk_mh_path(i, output_path);
+ try_copy_to_path(db, i, target_path);
+ free(target_path);
+ ++n_hits;
+ }
+ break;
+ case DB_MSG_DEAD:
+ break;
+ }
+ }
+ }
+ break;
+/*}}}*/
+ case FT_MBOX:/*{{{*/
+ {
+ FILE *out;
+ out = fopen(output_path, "ab");
+ if (!out) {
+ fprintf(stderr, "Cannot open output folder %s\n", output_path);
+ unlock_and_exit(1);
+ }
+
+ for (i=0; i<db->n_msgs; i++) {
+ if (hit3[i]) {
+ switch (rd_msg_type(db, i)) {
+ case DB_MSG_FILE:
+ {
+ append_file_to_mbox(db->data + db->path_offsets[i], out);
+ ++n_hits;
+ }
+ break;
+ case DB_MSG_MBOX:
+ {
+ append_mboxmsg_to_mbox(db, i, out);
+ ++n_hits;
+ }
+ break;
+ case DB_MSG_DEAD:
+ break;
+ }
+ }
+ }
+ fclose(out);
+ }
+
+ break;
+/*}}}*/
+ case FT_RAW:/*{{{*/
+ for (i=0; i<db->n_msgs; i++) {
+ if (hit3[i]) {
+ switch (rd_msg_type(db, i)) {
+ case DB_MSG_FILE:
+ {
+ ++n_hits;
+ printf("%s\n", db->data + db->path_offsets[i]);
+ }
+ break;
+ case DB_MSG_MBOX:
+ {
+ unsigned int mbix, msgix;
+ int start, len, after_end;
+ start = db->mtime_table[i];
+ len = db->size_table[i];
+ after_end = start + len;
+ ++n_hits;
+ decode_mbox_indices(db->path_offsets[i], &mbix, &msgix);
+ printf("mbox:%s [%d,%d)\n", db->data + db->mbox_paths_table[mbix], start, after_end);
+ }
+ break;
+ case DB_MSG_DEAD:
+ break;
+ }
+ }
+ }
+ break;
+/*}}}*/
+ case FT_EXCERPT:/*{{{*/
+ for (i=0; i<db->n_msgs; i++) {
+ if (hit3[i]) {
+ struct rfc822 *parsed = NULL;
+ switch (rd_msg_type(db, i)) {
+ case DB_MSG_FILE:
+ {
+ char *filename;
+ ++n_hits;
+ printf("---------------------------------\n");
+ filename = db->data + db->path_offsets[i];
+ printf("%s\n", filename);
+ parsed = make_rfc822(filename);
+ }
+ break;
+ case DB_MSG_MBOX:
+ {
+ unsigned int mbix, msgix;
+ int start, len, after_end;
+ unsigned char *mbox_start, *msg_start;
+ int mbox_len, msg_len;
+ int mbox_index;
+
+ start = db->mtime_table[i];
+ len = db->size_table[i];
+ after_end = start + len;
+ ++n_hits;
+ printf("---------------------------------\n");
+ decode_mbox_indices(db->path_offsets[i], &mbix, &msgix);
+ printf("mbox:%s [%d,%d)\n", db->data + db->mbox_paths_table[mbix], start, after_end);
+
+ get_validated_mbox_msg(db, i, &mbox_index, &mbox_start, &mbox_len, &msg_start, &msg_len);
+ if (msg_start) {
+ enum data_to_rfc822_error error;
+ struct msg_src *msg_src;
+ msg_src = setup_mbox_msg_src(db->data + db->mbox_paths_table[mbix], start, msg_len);
+ parsed = data_to_rfc822(msg_src, (char *) msg_start, msg_len, &error);
+ }
+ if (mbox_start) {
+ free_ro_mapping(mbox_start, mbox_len);
+ }
+ }
+ break;
+ case DB_MSG_DEAD:
+ break;
+ }
+
+ if (parsed) {
+ char datebuf[64];
+ struct tm *thetm;
+ if (parsed->hdrs.to) printf(" To: %s\n", parsed->hdrs.to);
+ if (parsed->hdrs.cc) printf(" Cc: %s\n", parsed->hdrs.cc);
+ if (parsed->hdrs.from) printf(" From: %s\n", parsed->hdrs.from);
+ if (parsed->hdrs.subject) printf(" Subject: %s\n", parsed->hdrs.subject);
+ if (parsed->hdrs.message_id)
+ printf(" Message-ID: %s\n", parsed->hdrs.message_id);
+ thetm = gmtime(&parsed->hdrs.date);
+ strftime(datebuf, sizeof(datebuf), "%a, %d %b %Y", thetm);
+ printf(" Date: %s\n", datebuf);
+ free_rfc822(parsed);
+ }
+ }
+ }
+ break;
+/*}}}*/
+ default:
+ assert(0);
+ break;
+ }
+
+ free(hit0);
+ free(hit1);
+ free(hit2);
+ free(hit3);
+ if ((ft != FT_RAW) && (ft != FT_EXCERPT)) {
+ printf("Matched %d messages\n", n_hits);
+ }
+ fflush(stdout);
+
+ if (had_failed_checksum) {
+ fprintf(stderr,
+ "WARNING : \n"
+ "Matches were found in mbox folders but the message checksums failed.\n"
+ "You may need to run mairix in indexing mode then repeat your search.\n");
+ }
+
+ /* Return error code 1 to the shell if no messages were matched. */
+ return (n_hits == 0) ? 1 : 0;
+}
+/*}}}*/
+
+static int directory_exists_remove_other(char *name)/*{{{*/
+{
+ struct stat sb;
+
+ if (stat(name, &sb) < 0) {
+ return 0;
+ }
+ if (S_ISDIR(sb.st_mode)) {
+ return 1;
+ } else {
+ /* Try to remove. */
+ unlink(name);
+ return 0;
+ }
+}
+/*}}}*/
+static void create_dir(char *path)/*{{{*/
+{
+ if (mkdir(path, 0700) < 0) {
+ fprintf(stderr, "Could not create directory %s\n", path);
+ unlock_and_exit(2);
+ }
+ fprintf(stderr, "Created directory %s\n", path);
+ return;
+}
+/*}}}*/
+static void maybe_create_maildir(char *path)/*{{{*/
+{
+ char *subdir, *tailpos;
+ int len;
+
+ if (!directory_exists_remove_other(path)) {
+ create_dir(path);
+ }
+
+ len = strlen(path);
+ subdir = new_array(char, len + 5);
+ strcpy(subdir, path);
+ strcpy(subdir+len, "/");
+ tailpos = subdir + len + 1;
+
+ strcpy(tailpos,"cur");
+ if (!directory_exists_remove_other(subdir)) {
+ create_dir(subdir);
+ }
+ strcpy(tailpos,"new");
+ if (!directory_exists_remove_other(subdir)) {
+ create_dir(subdir);
+ }
+ strcpy(tailpos,"tmp");
+ if (!directory_exists_remove_other(subdir)) {
+ create_dir(subdir);
+ }
+ free(subdir);
+ return;
+}
+/*}}}*/
+static void clear_maildir_subfolder(char *path, char *subdir)/*{{{*/
+{
+ char *sdir;
+ char *fpath;
+ int len;
+ DIR *d;
+ struct dirent *de;
+ struct stat sb;
+
+ len = strlen(path) + strlen(subdir);
+
+ sdir = new_array(char, len + 2);
+ fpath = new_array(char, len + 3 + NAME_MAX);
+ strcpy(sdir, path);
+ strcat(sdir, "/");
+ strcat(sdir, subdir);
+
+ d = opendir(sdir);
+ if (d) {
+ while ((de = readdir(d))) {
+ strcpy(fpath, sdir);
+ strcat(fpath, "/");
+ strcat(fpath, de->d_name);
+ if (lstat(fpath, &sb) >= 0) {
+ /* Deal with both symlinks to maildir/MH messages as well as real files
+ * where mbox messages have been written. */
+ if (S_ISLNK(sb.st_mode) || S_ISREG(sb.st_mode)) {
+ /* FIXME : Can you unlink from a directory while doing a readdir loop over it? */
+ if (unlink(fpath) < 0) {
+ fprintf(stderr, "Unlinking %s failed\n", fpath);
+ }
+ }
+ }
+ }
+ closedir(d);
+ }
+
+ free(fpath);
+ free(sdir);
+}
+/*}}}*/
+static void clear_mh_folder(char *path)/*{{{*/
+{
+ char *fpath;
+ int len;
+ DIR *d;
+ struct dirent *de;
+ struct stat sb;
+
+ len = strlen(path);
+
+ fpath = new_array(char, len + 3 + NAME_MAX);
+
+ d = opendir(path);
+ if (d) {
+ while ((de = readdir(d))) {
+ if (valid_mh_filename_p(de->d_name)) {
+ strcpy(fpath, path);
+ strcat(fpath, "/");
+ strcat(fpath, de->d_name);
+ if (lstat(fpath, &sb) >= 0) {
+ /* See under maildir above for explanation */
+ if (S_ISLNK(sb.st_mode) || S_ISREG(sb.st_mode)) {
+ /* FIXME : Can you unlink from a directory while doing a readdir loop over it? */
+ if (unlink(fpath) < 0) {
+ fprintf(stderr, "Unlinking %s failed\n", fpath);
+ }
+ }
+ }
+ }
+ }
+ closedir(d);
+ }
+
+ free(fpath);
+}
+/*}}}*/
+static void clear_mbox_folder(char *path)/*{{{*/
+{
+ unlink(path);
+}
+/*}}}*/
+
+int search_top(int do_threads, int do_augment, char *database_path, char *complete_mfolder, char **argv, enum folder_type ft, int verbose)/*{{{*/
+{
+ struct read_db *db;
+ int result;
+
+ db = open_db(database_path);
+
+ switch (ft) {
+ case FT_MAILDIR:
+ maybe_create_maildir(complete_mfolder);
+ break;
+ case FT_MH:
+ if (!directory_exists_remove_other(complete_mfolder)) {
+ create_dir(complete_mfolder);
+ }
+ break;
+ case FT_MBOX:
+ /* Nothing to do */
+ break;
+ case FT_RAW:
+ case FT_EXCERPT:
+ break;
+ default:
+ assert(0);
+ }
+
+ if (!do_augment) {
+ switch (ft) {
+ case FT_MAILDIR:
+ clear_maildir_subfolder(complete_mfolder, "new");
+ clear_maildir_subfolder(complete_mfolder, "cur");
+ break;
+ case FT_MH:
+ clear_mh_folder(complete_mfolder);
+ break;
+ case FT_MBOX:
+ clear_mbox_folder(complete_mfolder);
+ break;
+ case FT_RAW:
+ case FT_EXCERPT:
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ result = do_search(db, argv, complete_mfolder, do_threads, ft, verbose);
+ free(complete_mfolder);
+ close_db(db);
+ return result;
+}
+/*}}}*/
+
+
diff --git a/src/mairix/stats.c b/src/mairix/stats.c
@@ -0,0 +1,128 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002-2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+#include "mairix.h"
+#include "memmac.h"
+#include "reader.h"
+
+static void do_toktable(struct toktable *x, int *lc, int *elc, int *ec, int size, int *ml, int *mel, int *me)
+{
+ int i;
+ for (i=0; i<x->size; i++) {
+ struct token *tok = x->tokens[i];
+ unsigned char *j, *last_char;
+ int incr;
+
+ if (tok) {
+ int len = strlen(tok->text);
+ if (len > size) {
+ fprintf(stderr, "Token length %d exceeds size\n", len);
+ } else {
+ lc[len]++;
+ if (len > *ml) *ml = len;
+ }
+
+ /* Deal with encoding length */
+ if (tok->match0.n > size) {
+ fprintf(stderr, "Token encoding length %d exceeds size\n", tok->match0.n);
+ } else {
+ elc[tok->match0.n]++;
+ if (tok->match0.n > *mel) *mel = tok->match0.n;
+ }
+
+ /* Deal with encoding */
+ j = tok->match0.msginfo;
+ last_char = j + tok->match0.n;
+ while (j < last_char) {
+ incr = read_increment(&j);
+ if (incr > size) {
+ fprintf(stderr, "Encoding increment %d exceeds size\n", incr);
+ } else {
+ ec[incr]++;
+ if (incr > *me) *me = incr;
+ }
+ }
+ }
+ }
+}
+
+void print_table(int *x, int max) {
+ int total, sum;
+ int i;
+ int kk, kk1;
+
+ total = 0;
+ for (i = 0; i<=max; i++) {
+ total += x[i];
+ }
+ sum = 0;
+ kk1 = 0;
+ for (i = 0; i<=max; i++) {
+ sum += x[i];
+ kk = (int)((double)sum*256.0/(double)total);
+ printf("%5d : %5d %3d %3d\n", i, x[i], kk-kk1, kk);
+ kk1 = kk;
+ }
+}
+
+void get_db_stats(struct database *db)
+{
+ /* Deal with paths later - problem is, they will be biased by length of folder_base at the moment. */
+
+ int size = 4096;
+ int *len_counts, *enc_len_counts, *enc_counts;
+ int max_len, max_enc_len, max_enc;
+
+ max_len = 0;
+ max_enc_len = 0;
+ max_enc = 0;
+
+ len_counts = new_array(int, size);
+ memset(len_counts, 0, size * sizeof(int));
+ enc_len_counts = new_array(int, size);
+ memset(enc_len_counts, 0, size * sizeof(int));
+ enc_counts = new_array(int, size);
+ memset(enc_counts, 0, size * sizeof(int));
+
+ do_toktable(db->to, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
+ do_toktable(db->cc, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
+ do_toktable(db->from, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
+ do_toktable(db->subject, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
+ do_toktable(db->body, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
+#if 0
+ /* no longer works now that the msg_ids table has 2 encoding chains. fix
+ * this when required. */
+ do_toktable(db->msg_ids, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
+#endif
+
+ printf("Max token length : %d\n", max_len);
+ print_table(len_counts, max_len);
+
+ printf("Max encoding vector length : %d\n", max_enc_len);
+ print_table(enc_len_counts, max_enc_len);
+
+ printf("Max encoding increment : %d\n", max_enc);
+ print_table(enc_counts, max_enc);
+
+ return;
+}
+
diff --git a/src/mairix/tok.c b/src/mairix/tok.c
@@ -0,0 +1,344 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002-2004, 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+/* Functions for handling tokens */
+
+#include <assert.h>
+#include <ctype.h>
+#include "mairix.h"
+
+static void init_matches(struct matches *m) {/*{{{*/
+ m->msginfo = NULL;
+ m->n = 0;
+ m->max = 0;
+ m->highest = 0;
+}
+/*}}}*/
+struct token *new_token(void)/*{{{*/
+{
+ struct token *result = new(struct token);
+ result->text = NULL;
+ init_matches(&result->match0);
+ return result;
+}
+/*}}}*/
+struct token2 *new_token2(void)/*{{{*/
+{
+ struct token2 *result = new(struct token2);
+ result->text = NULL;
+ init_matches(&result->match0);
+ init_matches(&result->match1);
+ return result;
+}
+/*}}}*/
+void free_token(struct token *x)/*{{{*/
+{
+ if (x->text) free(x->text);
+ if (x->match0.msginfo) free(x->match0.msginfo);
+ free(x);
+}
+/*}}}*/
+void free_token2(struct token2 *x)/*{{{*/
+{
+ if (x->text) free(x->text);
+ if (x->match0.msginfo) free(x->match0.msginfo);
+ if (x->match1.msginfo) free(x->match1.msginfo);
+ free(x);
+}
+/*}}}*/
+struct toktable *new_toktable(void)/*{{{*/
+{
+ struct toktable *result = new(struct toktable);
+ result->tokens = NULL;
+ result->n = 0;
+ result->hwm = 0;
+ result->size = 0;
+ return result;
+}
+/*}}}*/
+struct toktable2 *new_toktable2(void)/*{{{*/
+{
+ struct toktable2 *result = new(struct toktable2);
+ result->tokens = NULL;
+ result->n = 0;
+ result->hwm = 0;
+ result->size = 0;
+ return result;
+}
+/*}}}*/
+void free_toktable(struct toktable *x)/*{{{*/
+{
+ if (x->tokens) {
+ int i;
+ for (i=0; i<x->size; i++) {
+ if (x->tokens[i]) {
+ free_token(x->tokens[i]);
+ }
+ }
+ free(x->tokens);
+ }
+ free(x);
+}
+/*}}}*/
+void free_toktable2(struct toktable2 *x)/*{{{*/
+{
+ if (x->tokens) {
+ int i;
+ for (i=0; i<x->size; i++) {
+ if (x->tokens[i]) {
+ free_token2(x->tokens[i]);
+ }
+ }
+ free(x->tokens);
+ }
+ free(x);
+}
+/*}}}*/
+/* FIXME : This stuff really needs cleaning up. */
+static void enlarge_toktable(struct toktable *table)/*{{{*/
+{
+ if (table->size == 0) {
+ int i;
+ /* initial allocation */
+ table->size = 1024;
+ table->mask = table->size - 1;
+ table->tokens = new_array(struct token *, table->size);
+ for (i=0; i<table->size; i++) {
+ table->tokens[i] = NULL;
+ }
+ } else {
+ struct token **old_tokens;
+ int old_size = table->size;
+ int i;
+ /* reallocate */
+ old_tokens = table->tokens;
+ table->size <<= 1;
+ table->mask = table->size - 1;
+ table->tokens = new_array(struct token *, table->size);
+ for (i=0; i<table->size; i++) {
+ table->tokens[i] = NULL;
+ }
+ for (i=0; i<old_size; i++) {
+ unsigned long new_index;
+ if (old_tokens[i]) {
+ new_index = old_tokens[i]->hashval & table->mask;
+ while (table->tokens[new_index]) {
+ new_index++;
+ new_index &= table->mask;
+ }
+ table->tokens[new_index] = old_tokens[i];
+ }
+ }
+ free(old_tokens);
+ }
+ table->hwm = (table->size >> 2) + (table->size >> 3); /* allow 3/8 of nodes to be used */
+}
+/*}}}*/
+static void enlarge_toktable2(struct toktable2 *table)/*{{{*/
+{
+ if (table->size == 0) {
+ int i;
+ /* initial allocation */
+ table->size = 1024;
+ table->mask = table->size - 1;
+ table->tokens = new_array(struct token2 *, table->size);
+ for (i=0; i<table->size; i++) {
+ table->tokens[i] = NULL;
+ }
+ } else {
+ struct token2 **old_tokens;
+ int old_size = table->size;
+ int i;
+ /* reallocate */
+ old_tokens = table->tokens;
+ table->size <<= 1;
+ table->mask = table->size - 1;
+ table->tokens = new_array(struct token2 *, table->size);
+ for (i=0; i<table->size; i++) {
+ table->tokens[i] = NULL;
+ }
+ for (i=0; i<old_size; i++) {
+ unsigned long new_index;
+ if (old_tokens[i]) {
+ new_index = old_tokens[i]->hashval & table->mask;
+ while (table->tokens[new_index]) {
+ new_index++;
+ new_index &= table->mask;
+ }
+ table->tokens[new_index] = old_tokens[i];
+ }
+ }
+ free(old_tokens);
+ }
+ table->hwm = (table->size >> 2) + (table->size >> 3); /* allow 3/8 of nodes to be used */
+}
+/*}}}*/
+static int insert_value(unsigned char *x, int val)/*{{{*/
+{
+ assert(val >= 0);
+ if (val <= 127) {
+ *x = val;
+ return 1;
+ } else if (val <= 16383) {
+ *x++ = (val >> 8) | 0x80;
+ *x = (val & 0xff);
+ return 2;
+ } else {
+ int a = (val >> 24);
+ assert (a <= 63);
+ *x++ = a | 0xc0;
+ *x++ = ((val >> 16) & 0xff);
+ *x++ = ((val >> 8) & 0xff);
+ *x = (val & 0xff);
+ return 4;
+ }
+}
+/*}}}*/
+void check_and_enlarge_encoding(struct matches *m)/*{{{*/
+{
+ if (m->n + 4 >= m->max) {
+ if (m->max == 0) {
+ m->max = 16;
+ } else {
+ m->max += (m->max >> 1);
+ }
+ m->msginfo = grow_array(unsigned char, m->max, m->msginfo);
+ }
+}
+/*}}}*/
+void insert_index_on_encoding(struct matches *m, int idx)/*{{{*/
+{
+ if (m->n == 0) {
+ /* Always encode value */
+ m->n += insert_value(m->msginfo + m->n, idx);
+ } else {
+ assert(idx >= m->highest);
+ if (idx > m->highest) {
+ int increment = idx - m->highest;
+ m->n += insert_value(m->msginfo + m->n, increment);
+ } else {
+ /* token has already been seen in this file */
+ }
+ }
+ m->highest = idx;
+}
+/*}}}*/
+void add_token_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable *table)/*{{{*/
+{
+ unsigned long hash;
+ int index;
+ struct token *tok;
+ char *lc_tok_text;
+ char *p;
+
+ lc_tok_text = new_string((char*)tok_text);
+ for (p = lc_tok_text; *p; p++) {
+ *p = tolower(*(unsigned char *) p);
+ }
+ /* 2nd arg is string length */
+ hash = hashfn((unsigned char *) lc_tok_text, p - lc_tok_text, hash_key);
+
+ if (table->n >= table->hwm) {
+ enlarge_toktable(table);
+ }
+
+ index = hash & table->mask;
+ while (table->tokens[index]) {
+ /* strcmp ok as text has been tolower'd earlier */
+ if (!strcmp(lc_tok_text, table->tokens[index]->text))
+ break;
+ index++;
+ index &= table->mask;
+ }
+
+ if (!table->tokens[index]) {
+ /* Allocate new */
+ struct token *new_tok = new_token();
+ /* New token takes ownership of lc_tok_text, no need to free that later. */
+ new_tok->text = (char *) lc_tok_text;
+ new_tok->hashval = hash; /* save full width for later */
+ table->tokens[index] = new_tok;
+ ++table->n;
+ } else {
+ free(lc_tok_text);
+ }
+
+ tok = table->tokens[index];
+
+ check_and_enlarge_encoding(&tok->match0);
+ insert_index_on_encoding(&tok->match0, file_index);
+}
+/*}}}*/
+void add_token2_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable2 *table, int add_to_chain1)/*{{{*/
+{
+ unsigned long hash;
+ int index;
+ struct token2 *tok;
+ char *lc_tok_text;
+ char *p;
+
+ lc_tok_text = new_string(tok_text);
+ for (p = lc_tok_text; *p; p++) {
+ *p = tolower(*(unsigned char *) p);
+ }
+ /* 2nd arg is string length */
+ hash = hashfn((unsigned char *) lc_tok_text, p - lc_tok_text, hash_key);
+
+ if (table->n >= table->hwm) {
+ enlarge_toktable2(table);
+ }
+
+ index = hash & table->mask;
+ while (table->tokens[index]) {
+ /* strcmp ok as text has been tolower'd earlier */
+ if (!strcmp(lc_tok_text, table->tokens[index]->text))
+ break;
+ index++;
+ index &= table->mask;
+ }
+
+ if (!table->tokens[index]) {
+ /* Allocate new */
+ struct token2 *new_tok = new_token2();
+ /* New token takes ownership of lc_tok_text, no need to free that later. */
+ new_tok->text = lc_tok_text;
+ new_tok->hashval = hash; /* save full width for later */
+ table->tokens[index] = new_tok;
+ ++table->n;
+ } else {
+ free(lc_tok_text);
+ }
+
+ tok = table->tokens[index];
+
+ check_and_enlarge_encoding(&tok->match0);
+ insert_index_on_encoding(&tok->match0, file_index);
+ if (add_to_chain1) {
+ check_and_enlarge_encoding(&tok->match1);
+ insert_index_on_encoding(&tok->match1, file_index);
+ }
+}
+/*}}}*/
+
+
+
+
diff --git a/src/mairix/version.h b/src/mairix/version.h
@@ -0,0 +1,4 @@
+#ifndef VERSION_H
+#define VERSION_H 1
+#define PROGRAM_VERSION "0.23"
+#endif /* VERSION_H */
diff --git a/src/mairix/version.txt b/src/mairix/version.txt
@@ -0,0 +1 @@
+0.23
diff --git a/src/mairix/writer.c b/src/mairix/writer.c
@@ -0,0 +1,614 @@
+/*
+ mairix - message index builder and finder for maildir folders.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ **********************************************************************
+ */
+
+/* Write the database to disc. */
+
+#include "mairix.h"
+#include "reader.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/mman.h>
+
+struct write_map_toktable {/*{{{*/
+
+ /* Table of character offsets to null-terminated token texts */
+ int tok_offset;
+
+ /* Table of character offsets to byte strings containing compressed
+ * delta-encoding of file indices matching the token */
+ int enc_offset;
+};/*}}}*/
+struct write_map_toktable2 {/*{{{*/
+
+ /* Table of character offsets to null-terminated token texts */
+ int tok_offset;
+
+ /* Table of character offsets to byte strings containing compressed
+ * delta-encoding of file indices matching the token */
+ int enc0_offset;
+ int enc1_offset;
+};/*}}}*/
+
+struct write_map {/*{{{*/
+/* Contain offset information for the various tables.
+ UI stuff in 4 byte units rel to base addr.
+ Char stuff in byte units rel to base addr. */
+
+ /* Path information */
+ int path_offset;
+ int mtime_offset; /* Message file mtimes (maildir/mh), mbox number (mbox) */
+ int size_offset; /* Message sizes (maildir/mh), entry in respective mbox (mbox) */
+ int date_offset; /* Message dates (all folder types) */
+ int tid_offset; /* Thread group index table (all folder types) */
+
+ int mbox_paths_offset;
+ int mbox_entries_offset;
+ int mbox_mtime_offset;
+ int mbox_size_offset;
+ /* Character offset to checksum of first msg in the mbox. Positions of
+ * subsequent messages computed by indexing - no explicit table entries
+ * anywhere. */
+ int mbox_checksum_offset;
+
+ struct write_map_toktable to;
+ struct write_map_toktable cc;
+ struct write_map_toktable from;
+ struct write_map_toktable subject;
+ struct write_map_toktable body;
+ struct write_map_toktable attachment_name;
+ struct write_map_toktable2 msg_ids;
+
+ /* To get base address for character data */
+ int beyond_last_ui_offset;
+};
+/*}}}*/
+
+static void create_rw_mapping(char *filename, size_t len, int *out_fd, char **out_data)/*{{{*/
+{
+ int fd;
+ char *data;
+ struct stat sb;
+
+ fd = open(filename, O_RDWR | O_CREAT, 0600);
+ if (fd < 0) {
+ report_error("open", filename);
+ unlock_and_exit(2);
+ }
+
+ if (fstat(fd, &sb) < 0) {
+ report_error("stat", filename);
+ unlock_and_exit(2);
+ }
+
+ if (sb.st_size < len) {
+ /* Extend */
+ if (lseek(fd, len - 1, SEEK_SET) < 0) {
+ report_error("lseek", filename);
+ unlock_and_exit(2);
+ }
+ if (write(fd, "\000", 1) < 0) {
+ report_error("write", filename);
+ unlock_and_exit(2);
+ }
+ } else if (sb.st_size > len) {
+ /* Truncate */
+ if (ftruncate(fd, len) < 0) {
+ report_error("ftruncate", filename);
+ unlock_and_exit(2);
+ }
+ } else {
+ /* Exactly the right length already - nothing to do! */
+ }
+
+ data = mmap(0, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (data == MAP_FAILED) {
+ report_error("writer:mmap", filename);
+ unlock_and_exit(2);
+ }
+
+ *out_data = data;
+ *out_fd = fd;
+}
+/*}}}*/
+
+static int toktable_char_length(struct toktable *tab)/*{{{*/
+{
+ int result = 0;
+ int i;
+ for (i=0; i<tab->size; i++) {
+ if (tab->tokens[i]) {
+ result += (1 + strlen(tab->tokens[i]->text));
+ result += (1 + tab->tokens[i]->match0.n);
+ }
+ }
+ return result;
+}
+/*}}}*/
+static int toktable2_char_length(struct toktable2 *tab)/*{{{*/
+{
+ int result = 0;
+ int i;
+ for (i=0; i<tab->size; i++) {
+ if (tab->tokens[i]) {
+ result += (1 + strlen(tab->tokens[i]->text));
+ result += (1 + tab->tokens[i]->match0.n);
+ result += (1 + tab->tokens[i]->match1.n);
+ }
+ }
+ return result;
+}
+/*}}}*/
+static int char_length(struct database *db)/*{{{*/
+{
+ /* Return total length of character data to be written. */
+ int result;
+ int i;
+
+ result = 0;
+
+ /* For type table. */
+ result += db->n_msgs;
+
+ for (i=0; i<db->n_msgs; i++) {
+ switch (db->type[i]) {
+ case MTY_DEAD:
+ break;
+ case MTY_MBOX:
+ break;
+ case MTY_FILE:
+ assert(db->msgs[i].src.mpf.path);
+ result += (1 + strlen(db->msgs[i].src.mpf.path));
+ break;
+ }
+ }
+
+ for (i=0; i<db->n_mboxen; i++) {
+ struct mbox *mb = &db->mboxen[i];
+ result += mb->n_msgs * sizeof(checksum_t);
+ if (mb->path) {
+ result += (1 + strlen(mb->path));
+ }
+ }
+
+ result += toktable_char_length(db->to);
+ result += toktable_char_length(db->cc);
+ result += toktable_char_length(db->from);
+ result += toktable_char_length(db->subject);
+ result += toktable_char_length(db->body);
+ result += toktable_char_length(db->attachment_name);
+ result += toktable2_char_length(db->msg_ids);
+
+ return result;
+}
+/*}}}*/
+
+static void compute_mapping(struct database *db, struct write_map *map)/*{{{*/
+{
+ int total = UI_HEADER_LEN;
+
+ map->path_offset = total, total += db->n_msgs;
+ map->mtime_offset = total, total += db->n_msgs;
+ map->date_offset = total, total += db->n_msgs;
+ map->size_offset = total, total += db->n_msgs;
+ map->tid_offset = total, total += db->n_msgs;
+
+ map->mbox_paths_offset = total, total += db->n_mboxen;
+ map->mbox_entries_offset = total, total += db->n_mboxen;
+ map->mbox_mtime_offset = total, total += db->n_mboxen;
+ map->mbox_size_offset = total, total += db->n_mboxen;
+ map->mbox_checksum_offset = total, total += db->n_mboxen;
+
+ map->to.tok_offset = total, total += db->to->n;
+ map->to.enc_offset = total, total += db->to->n;
+
+ map->cc.tok_offset = total, total += db->cc->n;
+ map->cc.enc_offset = total, total += db->cc->n;
+
+ map->from.tok_offset = total, total += db->from->n;
+ map->from.enc_offset = total, total += db->from->n;
+
+ map->subject.tok_offset = total, total += db->subject->n;
+ map->subject.enc_offset = total, total += db->subject->n;
+
+ map->body.tok_offset = total, total += db->body->n;
+ map->body.enc_offset = total, total += db->body->n;
+
+ map->attachment_name.tok_offset = total, total += db->attachment_name->n;
+ map->attachment_name.enc_offset = total, total += db->attachment_name->n;
+
+ map->msg_ids.tok_offset = total, total += db->msg_ids->n;
+ map->msg_ids.enc0_offset = total, total += db->msg_ids->n;
+ map->msg_ids.enc1_offset = total, total += db->msg_ids->n;
+
+ map->beyond_last_ui_offset = total;
+}
+/*}}}*/
+static void write_header(char *data, unsigned int *uidata, struct database *db, struct write_map *map)/*{{{*/
+{
+ /* Endianness-independent writes - at least the magic number will be
+ * recognized if the database is read by this program on a machine of
+ * opposite endianness. */
+ unsigned char *ucdata = (unsigned char *) data;
+
+ ucdata[0] = HEADER_MAGIC0;
+ ucdata[1] = HEADER_MAGIC1;
+ ucdata[2] = HEADER_MAGIC2;
+ ucdata[3] = HEADER_MAGIC3;
+
+ uidata[UI_ENDIAN] = 0x44332211; /* For checking reversed endianness on read */
+ uidata[UI_N_MSGS] = db->n_msgs;
+ uidata[UI_MSG_CDATA] = map->path_offset; /* offset table of ptrs to filenames */
+ uidata[UI_MSG_MTIME] = map->mtime_offset; /* offset of mtime table */
+ uidata[UI_MSG_DATE] = map->date_offset; /* offset of table of message Date: header lines as time_t */
+ uidata[UI_MSG_SIZE] = map->size_offset; /* offset of table of message sizes in bytes */
+ uidata[UI_MSG_TID] = map->tid_offset; /* offset of table of thread group numbers */
+
+ uidata[UI_MBOX_N] = db->n_mboxen;
+ uidata[UI_MBOX_PATHS] = map->mbox_paths_offset;
+ uidata[UI_MBOX_ENTRIES] = map->mbox_entries_offset;
+ uidata[UI_MBOX_MTIME] = map->mbox_mtime_offset;
+ uidata[UI_MBOX_SIZE] = map->mbox_size_offset;
+ uidata[UI_MBOX_CKSUM] = map->mbox_checksum_offset;
+
+ uidata[UI_HASH_KEY] = db->hash_key;
+
+ uidata[UI_TO_N] = db->to->n;
+ uidata[UI_TO_TOK] = map->to.tok_offset;
+ uidata[UI_TO_ENC] = map->to.enc_offset;
+
+ uidata[UI_CC_N] = db->cc->n;
+ uidata[UI_CC_TOK] = map->cc.tok_offset;
+ uidata[UI_CC_ENC] = map->cc.enc_offset;
+
+ uidata[UI_FROM_N] = db->from->n;
+ uidata[UI_FROM_TOK] = map->from.tok_offset;
+ uidata[UI_FROM_ENC] = map->from.enc_offset;
+
+ uidata[UI_SUBJECT_N] = db->subject->n;
+ uidata[UI_SUBJECT_TOK] = map->subject.tok_offset;
+ uidata[UI_SUBJECT_ENC] = map->subject.enc_offset;
+
+ uidata[UI_BODY_N] = db->body->n;
+ uidata[UI_BODY_TOK] = map->body.tok_offset;
+ uidata[UI_BODY_ENC] = map->body.enc_offset;
+
+ uidata[UI_ATTACHMENT_NAME_N] = db->attachment_name->n;
+ uidata[UI_ATTACHMENT_NAME_TOK] = map->attachment_name.tok_offset;
+ uidata[UI_ATTACHMENT_NAME_ENC] = map->attachment_name.enc_offset;
+
+ uidata[UI_MSGID_N] = db->msg_ids->n;
+ uidata[UI_MSGID_TOK] = map->msg_ids.tok_offset;
+ uidata[UI_MSGID_ENC0] = map->msg_ids.enc0_offset;
+ uidata[UI_MSGID_ENC1] = map->msg_ids.enc1_offset;
+
+ return;
+}
+/*}}}*/
+static char *write_type_and_flag_table(struct database *db, unsigned int *uidata, char *data, char *cdata)/*{{{*/
+{
+ int i;
+ for (i=0; i<db->n_msgs; i++) {
+ struct msgpath *msgdata = db->msgs + i;
+ switch (db->type[i]) {
+ case MTY_FILE:
+ cdata[i] = DB_MSG_FILE;
+ break;
+ case MTY_MBOX:
+ cdata[i] = DB_MSG_MBOX;
+ break;
+ case MTY_DEAD:
+ cdata[i] = DB_MSG_DEAD;
+ break;
+ }
+
+ if (msgdata->seen) cdata[i] |= FLAG_SEEN;
+ if (msgdata->replied) cdata[i] |= FLAG_REPLIED;
+ if (msgdata->flagged) cdata[i] |= FLAG_FLAGGED;
+ }
+ uidata[UI_MSG_TYPE_AND_FLAGS] = cdata - data;
+ return cdata + db->n_msgs;
+}
+/*}}}*/
+static char *write_messages(struct database *db, struct write_map *map, unsigned int *uidata, char *data, char *cdata)/*{{{*/
+{
+ int i;
+ char *start_cdata = cdata;
+
+ for (i=0; i<db->n_msgs; i++) {
+ int slen;
+ switch (db->type[i]) {
+ case MTY_FILE:
+ slen = strlen(db->msgs[i].src.mpf.path);
+ uidata[map->path_offset + i] = cdata - data;
+ uidata[map->mtime_offset + i] = db->msgs[i].src.mpf.mtime;
+ uidata[map->size_offset + i] = db->msgs[i].src.mpf.size;
+ uidata[map->date_offset + i] = db->msgs[i].date;
+ uidata[map->tid_offset + i] = db->msgs[i].tid;
+ memcpy(cdata, db->msgs[i].src.mpf.path, 1 + slen); /* include trailing null */
+ cdata += (1 + slen);
+ break;
+ case MTY_MBOX:
+ {
+ int mbno = db->msgs[i].src.mbox.file_index;
+ int msgno = db->msgs[i].src.mbox.msg_index;
+ struct mbox *mb = &db->mboxen[mbno];
+ uidata[map->path_offset + i] = encode_mbox_indices(mbno, msgno);
+ uidata[map->mtime_offset + i] = mb->start[msgno];
+ uidata[map->size_offset + i] = mb->len[msgno];
+ uidata[map->date_offset + i] = db->msgs[i].date;
+ uidata[map->tid_offset + i] = db->msgs[i].tid;
+ }
+ break;
+ case MTY_DEAD:
+ uidata[map->path_offset + i] = 0; /* Can't ever happen for real */
+ uidata[map->mtime_offset + i] = 0; /* For cleanliness */
+ uidata[map->size_offset + i] = 0; /* For cleanliness */
+ /* The following line is necessary, otherwise 'random' tid
+ * information is written to the database, which can crash the search
+ * functions. */
+ uidata[map->tid_offset + i] = db->msgs[i].tid;
+ break;
+ }
+ }
+ if (verbose) {
+ printf("Wrote %d messages (%d bytes of tables, %d bytes of text)\n",
+ db->n_msgs, 4*5*db->n_msgs, (int)(cdata - start_cdata));
+ }
+ return cdata; /* new value */
+}
+/*}}}*/
+#if 0
+static int compare_tokens(const void *a, const void *b)/*{{{*/
+{
+ const struct token **aa = (const struct token **) a;
+ const struct token **bb = (const struct token **) b;
+ return strcmp((*aa)->text, (*bb)->text);
+}
+/*}}}*/
+#endif
+
+static char *write_mbox_headers(struct database *db, struct write_map *map, unsigned int *uidata, char *data, char *cdata)/*{{{*/
+{
+ int i, len;
+ char *start_cdata = cdata;
+
+ for (i=0; i<db->n_mboxen; i++) {
+ struct mbox *mb = &db->mboxen[i];
+ uidata[map->mbox_entries_offset + i] = mb->n_msgs;
+ uidata[map->mbox_mtime_offset + i] = mb->current_mtime;
+ uidata[map->mbox_size_offset + i] = mb->current_size;
+ if (mb->path) {
+ uidata[map->mbox_paths_offset + i] = cdata - data;
+ len = strlen(mb->path);
+ memcpy(cdata, mb->path, 1+len);
+ cdata += 1+len;
+ } else {
+ uidata[map->mbox_paths_offset + i] = 0;
+ }
+ }
+ if (verbose) {
+ printf("Wrote %d mbox headers (%d bytes of tables, %d bytes of paths)\n",
+ db->n_mboxen, 4*4*db->n_mboxen, (int)(cdata - start_cdata));
+ }
+ return cdata;
+}
+/*}}}*/
+static char * write_mbox_checksums(struct database *db, struct write_map *map, unsigned int *uidata, char *data, char *cdata)/*{{{*/
+{
+ int i, j;
+ char *start_cdata = cdata;
+
+ for (i=0; i<db->n_mboxen; i++) {
+ struct mbox *mb = &db->mboxen[i];
+ uidata[map->mbox_checksum_offset + i] = cdata - data;
+ for (j=0; j<mb->n_msgs; j++) {
+ memcpy(cdata, mb->check_all[j], sizeof(checksum_t));
+ cdata += sizeof(checksum_t);
+ }
+ }
+ if (verbose) {
+ printf("Wrote %d bytes of mbox message checksums\n",
+ (int)(cdata - start_cdata));
+ }
+ return cdata;
+}
+/*}}}*/
+
+static char *write_toktable(struct toktable *tab, struct write_map_toktable *map, unsigned int *uidata, char *data, char *cdata, char *header_name)/*{{{*/
+{
+ int i, j, n, max;
+ char *start_cdata, *mid_cdata;
+ struct token **stok;
+ stok = new_array(struct token *, tab->n);
+ max = tab->size;
+ n = tab->n;
+
+ for (i=0, j=0; i<max; i++) {
+ struct token *tok = tab->tokens[i];
+ if (tok) {
+ stok[j++] = tok;
+ }
+ }
+
+ assert(j == n);
+
+#if 0
+ /* The search functions don't rely on the tokens being sorted. So not
+ * sorting here will save time. */
+ qsort(stok, n, sizeof(struct token *), compare_tokens);
+#endif
+
+ start_cdata = cdata;
+
+ /* FIXME : Eventually, the tokens have to be sorted - need to feed them from
+ * a different data structure (array with no holes) */
+ for (i=0; i<n; i++) {
+ int slen;
+ uidata[map->tok_offset + i] = cdata - data;
+ slen = strlen(stok[i]->text);
+ memcpy(cdata, stok[i]->text, 1 + slen);
+ cdata += (1 + slen);
+ }
+
+ mid_cdata = cdata;
+
+ for (i=0; i<n; i++) {
+ int dlen;
+ dlen = stok[i]->match0.n;
+ uidata[map->enc_offset + i] = cdata - data;
+ memcpy(cdata, stok[i]->match0.msginfo, dlen);
+ cdata += dlen;
+ *cdata++ = 0xff; /* termination character */
+ }
+
+ if (verbose) {
+ printf("%s: Wrote %d tokens (%d bytes of tables, %d bytes of text, %d bytes of hit encoding)\n",
+ header_name, n, 2*4*n, (int)(mid_cdata - start_cdata), (int)(cdata - mid_cdata));
+ }
+
+ free(stok);
+ return cdata;
+}
+/*}}}*/
+static char *write_toktable2(struct toktable2 *tab, struct write_map_toktable2 *map, unsigned int *uidata, char *data, char *cdata, char *header_name)/*{{{*/
+{
+ int i, j, n, max;
+ char *start_cdata, *mid_cdata;
+ struct token2 **stok;
+ stok = new_array(struct token2 *, tab->n);
+ max = tab->size;
+ n = tab->n;
+
+ for (i=0, j=0; i<max; i++) {
+ struct token2 *tok = tab->tokens[i];
+ if (tok) {
+ stok[j++] = tok;
+ }
+ }
+
+ assert(j == n);
+
+#if 0
+ /* The search functions don't rely on the tokens being sorted. So not
+ * sorting here will save time. */
+ qsort(stok, n, sizeof(struct token *), compare_tokens);
+#endif
+
+ start_cdata = cdata;
+
+ /* FIXME : Eventually, the tokens have to be sorted - need to feed them from
+ * a different data structure (array with no holes) */
+ for (i=0; i<n; i++) {
+ int slen;
+ uidata[map->tok_offset + i] = cdata - data;
+ slen = strlen(stok[i]->text);
+ memcpy(cdata, stok[i]->text, 1 + slen);
+ cdata += (1 + slen);
+ }
+
+ mid_cdata = cdata;
+
+ for (i=0; i<n; i++) {
+ int dlen;
+ dlen = stok[i]->match0.n;
+ uidata[map->enc0_offset + i] = cdata - data;
+ memcpy(cdata, stok[i]->match0.msginfo, dlen);
+ cdata += dlen;
+ *cdata++ = 0xff; /* termination character */
+ }
+
+ for (i=0; i<n; i++) {
+ int dlen;
+ dlen = stok[i]->match1.n;
+ uidata[map->enc1_offset + i] = cdata - data;
+ memcpy(cdata, stok[i]->match1.msginfo, dlen);
+ cdata += dlen;
+ *cdata++ = 0xff; /* termination character */
+ }
+
+ if (verbose) {
+ printf("%s: Wrote %d tokens (%d bytes of tables, %d bytes of text, %d bytes of hit encoding)\n",
+ header_name, n, 2*4*n, (int)(mid_cdata - start_cdata), (int)(cdata - mid_cdata));
+ }
+
+ free(stok);
+ return cdata;
+}
+/*}}}*/
+void write_database(struct database *db, char *filename, int do_integrity_checks)/*{{{*/
+{
+ int file_len;
+ int fd;
+ char *data, *cdata;
+ unsigned int *uidata;
+ struct write_map map;
+
+ if (do_integrity_checks) {
+ check_database_integrity(db);
+ }
+
+ if (!verify_mbox_size_constraints(db)) {
+ unlock_and_exit(1);
+ }
+
+ /* Work out mappings */
+ compute_mapping(db, &map);
+
+ file_len = char_length(db) + (4 * map.beyond_last_ui_offset);
+
+ create_rw_mapping(filename, file_len, &fd, &data);
+ uidata = (unsigned int *) data; /* align(int) < align(page)! */
+ cdata = data + (4 * map.beyond_last_ui_offset);
+
+ write_header(data, uidata, db, &map);
+ cdata = write_type_and_flag_table(db, uidata, data, cdata);
+ cdata = write_messages(db, &map, uidata, data, cdata);
+ cdata = write_mbox_headers(db, &map, uidata, data, cdata);
+ cdata = write_mbox_checksums(db, &map, uidata, data, cdata);
+ cdata = write_toktable(db->to, &map.to, uidata, data, cdata, "To");
+ cdata = write_toktable(db->cc, &map.cc, uidata, data, cdata, "Cc");
+ cdata = write_toktable(db->from, &map.from, uidata, data, cdata, "From");
+ cdata = write_toktable(db->subject, &map.subject, uidata, data, cdata, "Subject");
+ cdata = write_toktable(db->body, &map.body, uidata, data, cdata, "Body");
+ cdata = write_toktable(db->attachment_name, &map.attachment_name, uidata, data, cdata, "Attachment Name");
+ cdata = write_toktable2(db->msg_ids, &map.msg_ids, uidata, data, cdata, "(Threading)");
+
+ /* Write data */
+ /* Unmap / close file */
+ if (munmap(data, file_len) < 0) {
+ report_error("munmap", filename);
+ unlock_and_exit(2);
+ }
+ if (fsync(fd) < 0) {
+ report_error("fsync", filename);
+ unlock_and_exit(2);
+ }
+ if (close(fd) < 0) {
+ report_error("close", filename);
+ unlock_and_exit(2);
+ }
+}
+ /*}}}*/