jaromail

a commandline tool to easily and privately handle your e-mail
git clone git://parazyd.org/jaromail.git
Log | Files | Refs | Submodules | README

commit f959567ceb7d1c1d26c8e1d5b744316c3f70c986
parent bde69469efb18bdd8c90ded91f1eb7c49464f499
Author: Jaromil <jaromil@dyne.org>
Date:   Fri, 15 Jun 2012 22:46:54 +0200

New search function using mairix

Diffstat:
Mbuild/build-osx.sh | 2+-
Msrc/jaro | 49++++++++++++++++++++++++++++++++++++++++++++++---
Asrc/mairix/ACKNOWLEDGEMENTS | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/COPYING | 339+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/INSTALL | 22++++++++++++++++++++++
Asrc/mairix/Makefile | 114+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/Makefile.in | 114+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/NEWS | 317+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/README | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/config.log | 14++++++++++++++
Asrc/mairix/configure | 337+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dates.c | 404+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dates.h | 45+++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/datescan.nfa | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/datescan.report | 3303+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/db.c | 1297+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/COPYING | 339+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/INSTALL | 19+++++++++++++++++++
Asrc/mairix/dfasyn/Makefile | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/NEWS | 5+++++
Asrc/mairix/dfasyn/README | 8++++++++
Asrc/mairix/dfasyn/abbrevs.c | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/blocks.c | 168+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/charclass.c | 364+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/compdfa.c | 479+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/configure | 4++++
Asrc/mairix/dfasyn/dfasyn.1 | 154+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/dfasyn.5 | 650+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/dfasyn.c | 690+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/dfasyn.h | 365+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/dfasyn.texi | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/evaluator.c | 248+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/expr.c | 243+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/n2d.c | 696+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/n2d.h | 226+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/parse.y | 262+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/scan.l | 111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/states.c | 303+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/stimulus.c | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/tabcompr.c | 181+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dfasyn/tokens.c | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dirscan.c | 420+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dotlock.c | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dotmairixrc.eg | 41+++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/dumper.c | 151++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/expandstr.c | 196+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/from.h | 32++++++++++++++++++++++++++++++++
Asrc/mairix/fromcheck.nfa | 218+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/fromcheck.report | 3222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/glob.c | 393+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/hash.c | 143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/mairix.1 | 673+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/mairix.32 | 0
Asrc/mairix/mairix.64 | 0
Asrc/mairix/mairix.c | 774+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/mairix.h | 402+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/mairix.spec | 45+++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/mairixrc.5 | 405+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/mbox.c | 1060+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/md5.c | 322+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/md5.h | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/memmac.h | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/mkversion | 15+++++++++++++++
Asrc/mairix/nvp.c | 416+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/nvp.h | 38++++++++++++++++++++++++++++++++++++++
Asrc/mairix/nvp.nfa | 197+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/nvpscan.report | 6352+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/nvptypes.h | 43+++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/old_docs/mairix.texi | 885+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/reader.c | 212+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/reader.h | 182+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/rfc822.c | 1536+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/search.c | 1482+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/stats.c | 128+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/tok.c | 344+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mairix/version.h | 4++++
Asrc/mairix/version.txt | 1+
Asrc/mairix/writer.c | 614+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
78 files changed, 33685 insertions(+), 4 deletions(-)

diff --git a/build/build-osx.sh b/build/build-osx.sh @@ -102,7 +102,7 @@ lipo mairix.32 mairix.64 -create -output mairix 2>&1 > /dev/null cd - cp src/fetchaddr build/osx/ - +cp src/mairix/mairix build/osx/ copydeps bin/mutt copydeps bin/mutt_dotlock copydeps bin/msmtp diff --git a/src/jaro b/src/jaro @@ -583,7 +583,7 @@ autostart() { mutt -F $MUTTDIR/rc "${1}" return 0 } - # or a path to folder + # or a path to folder { test -r ${1} } && { mutt -F $MUTTDIR/rc -f ${1} return 0 @@ -997,12 +997,12 @@ source $MUTTDIR/mboxes # specific configuration files source $MUTTDIR/crypto -source $MUTTDIR/colors source $MUTTDIR/general source $MUTTDIR/formats source $MUTTDIR/keybindings source $MUTTDIR/identity source $MUTTDIR/password +source $MUTTDIR/colors source $WORKDIR/Mutt.txt ## end of Jaro Mail generated muttrc #################################### @@ -1319,7 +1319,48 @@ editor() { return $? } - +####################### +## Search into maildirs +# using mairix +search() { + { which mairix > /dev/null } || { return 1 } + id=$RANDOM + rc=$WORKDIR/tmp/search.conf.$id + + # make index if no params given + ml=""; c=0 + for i in `ls $MAILDIRS`; do + # is it a maildir? + { test -r $MAILDIRS/${i}/cur } \ + && { test -r $MAILDIRS/${i}/new } \ + && { test -r $MAILDIRS/${i}/tmp } \ + && { c=`expr $c + 1`; ml="$ml:$i" } + done + func "searching maildirs: $ml" + cat <<EOF > $rc +base=$MAILDIRS +database=$WORKDIR/search.db +maildir=${ml} +mfolder=$WORKDIR/tmp/search.result.$id +mformat=maildir +EOF + # just index + { test ${#PARAM} = 0 } && { + act "Indexing $c maildirs for search" + act "please be patient..." + mairix -F -f $rc + rm -f $rc + exitcode=$? + { test $exitcode = 0 } && { notice "Done." } || { error "Error, indexing aborted." } + return $exitcode + } + act "Searching maildirs for: $PARAM" + act -n "" + mairix -F -f $rc ${=PARAM} 2> /dev/null + { test $? = 0 } && { jaro -q $WORKDIR/tmp/search.result.$id } + rm -rf $WORKDIR/tmp/search.result.$id + rm -f $rc +} ############## ## Open a File open_file() { @@ -1563,6 +1604,7 @@ main() subcommands_opts[update]="" subcommands_opts[stats]="" + subcommands_opts[search]="" subcommands_opts[addr]="" subcommands_opts[query]="" subcommands_opts[learn]="" @@ -1672,6 +1714,7 @@ main() update) update ;; + search) CLEANEXIT=0; search ${PARAM} ;; stats) CLEANEXIT=0; stats ;; addr) CLEANEXIT=0; address ${PARAM} ;; query) CLEANEXIT=0; query ${PARAM} ;; diff --git a/src/mairix/ACKNOWLEDGEMENTS b/src/mairix/ACKNOWLEDGEMENTS @@ -0,0 +1,60 @@ +These people have contributed useful patches, ideas and suggestions: + +Anand Kumria +André Costa +Andreas Amann +Andre Costa +Aredridel +Balázs Szabó +Bardur Arantsson +Benj. Mako Hill +Chris Mason +Christoph Dworzak +Christopher Rosado +Chung-chieh Shan +Claus Alboege +Corrin Lakeland +Dan Egnor +Daniel Jacobowitz +Dirk Huebner +Ed Blackman +Emil Sit +Felipe Gustavo de Almeida +Ico Doornekamp +Jaime Velasco Juan +James Leifer +Jerry Jorgenson +Joerg Desch +Johannes Schindelin +Johannes Weißl +John Arthur Kane +John Keener +Jonathan Kamens +Josh Purinton +Karsten Petersen +Kevin Rosenberg +Mark Hills +Martin Danielsson +Matthias Teege +Mikael Ylikoski +Mika Fischer +Oliver Braun +Paramjit Oberoi +Paul Fox +Peter Chines +Peter Jeremy +Robert Hofer +Roberto Boati +Samuel Tardieu +Sanjoy Mahajan +Satyaki Das +Steven Lumos +Tim Harder +Tom Doherty +Vincent Lefevre +Vladimir V. Kisil +Will Yardley +Wolfgang Weisselberg + +I apologise to any contributors who have been omitted from this list! + diff --git a/src/mairix/COPYING b/src/mairix/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/src/mairix/INSTALL b/src/mairix/INSTALL @@ -0,0 +1,22 @@ +Installation of mairix goes as follows: + +./configure +make +make install + +You need to be root to run the final step unless you're installing under your +own home directory somewhere. + +However, you might want to tune the options further. The configure script +shares its common options with the usual autoconf-generated scripts, even +though it's not autoconf-generated itself. For example, a fuller build could +use + +CC=gcc CFLAGS="-O2 -Wall" ./configure \ + --prefix=/opt/mairix \ + --infodir=/usr/share/info +make +make install + +The final step is to create a ~/.mairixrc file. An example is included in the +file dotmairixrc.eg. Just copy that to ~/.mairixrc and edit it. diff --git a/src/mairix/Makefile b/src/mairix/Makefile @@ -0,0 +1,114 @@ +######################################################################### +# +# mairix - message index builder and finder for maildir folders. +# +# Copyright (C) Richard P. Curnow 2002-2004,2006 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# ======================================================================= + +######################################################################### +# Edit the following variables as required +CC=gcc +CFLAGS=-O2 -m64 -DHAS_STDINT_H -DHAS_INTTYPES_H -DUSE_GZIP_MBOX -DUSE_BZIP_MBOX +CPPFLAGS= +LDFLAGS= +LIBS=-lz -lbz2 + +####################################################################### +# If you're generating a package, you may want to use +# make DESTDIR=temporary_dir install +# to get the software installed to a directory where you can create +# a tdl.tar.gz from it +DESTDIR= + +####################################################################### + +prefix=$(DESTDIR)/usr/local +bindir=$(DESTDIR)/usr/local/bin +mandir=$(DESTDIR)/usr/local/man +man1dir=$(mandir)/man1 +man5dir=$(mandir)/man5 +infodir=$(DESTDIR)/usr/local/info +docdir=$(DESTDIR)/usr/local/doc/mairix-0.23 + +######################################################################### +# Things below this point shouldn't need to be edited. + +OBJ = mairix.o db.o rfc822.o tok.o hash.o dirscan.o writer.o \ + reader.o search.o stats.o dates.o datescan.o mbox.o md5.o \ + fromcheck.o glob.o dumper.o expandstr.o dotlock.o \ + nvp.o nvpscan.o + +all : mairix + +mairix : $(OBJ) + $(CC) -o mairix $(CFLAGS) $(LDFLAGS) $(OBJ) $(LIBS) + +%.o : %.c memmac.h mairix.h reader.h Makefile + $(CC) -c $(CFLAGS) $(CPPFLAGS) -o $@ $< + +datescan.c datescan.h : datescan.nfa ./dfasyn/dfasyn + ./dfasyn/dfasyn -o datescan.c -ho datescan.h -r datescan.report -u datescan.nfa + +fromcheck.c fromcheck.h : fromcheck.nfa ./dfasyn/dfasyn + ./dfasyn/dfasyn -o fromcheck.c -ho fromcheck.h -r fromcheck.report -u fromcheck.nfa + +nvpscan.c nvpscan.h : nvp.nfa ./dfasyn/dfasyn + ./dfasyn/dfasyn -o nvpscan.c -ho nvpscan.h -r nvpscan.report -u nvp.nfa + +dates.o : datescan.h +mbox.o : fromcheck.h +nvp.o : nvpscan.h + +version.h: + ./mkversion + +./dfasyn/dfasyn: + if [ -d dfasyn ]; then cd dfasyn ; $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)" ; else echo "No dfasyn subdirectory?" ; exit 1 ; fi + +clean: + -rm -f *~ *.o mairix *.s core + -rm -f mairix.cp mairix.fn mairix.aux mairix.log mairix.ky mairix.pg mairix.toc mairix.tp mairix.vr + -rm -f fromcheck.[ch] datescan.[ch] + -rm -f nvpscan.[ch] + if [ -d dfasyn ]; then cd dfasyn ; $(MAKE) clean ; fi + if [ -d test ]; then cd test ; $(MAKE) clean ; fi + +distclean: clean + if [ -d test ]; then cd test ; $(MAKE) distclean ; fi + -rm -f Makefile config.log + +install: + [ -d $(prefix) ] || mkdir -p $(prefix) + [ -d $(bindir) ] || mkdir -p $(bindir) + [ -d $(mandir) ] || mkdir -p $(mandir) + [ -d $(man1dir) ] || mkdir -p $(man1dir) + [ -d $(man5dir) ] || mkdir -p $(man5dir) + cp -f mairix $(bindir) + chmod 555 $(bindir)/mairix + cp -f mairix.1 $(man1dir) + chmod 444 $(man1dir)/mairix.1 + cp -f mairixrc.5 $(man5dir) + chmod 444 $(man5dir)/mairixrc.5 + +check: mairix + if [ -d test ]; then cd test ; $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)" check ; else echo "No test subdirectory?" ; exit 1 ; fi + +.PHONY : all install clean distclean check + +mairix.o : version.h + + diff --git a/src/mairix/Makefile.in b/src/mairix/Makefile.in @@ -0,0 +1,114 @@ +######################################################################### +# +# mairix - message index builder and finder for maildir folders. +# +# Copyright (C) Richard P. Curnow 2002-2004,2006 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# ======================================================================= + +######################################################################### +# Edit the following variables as required +CC=@cc@ +CFLAGS=@cflags@ @defs@ +CPPFLAGS=@CPPFLAGS@ +LDFLAGS=@LDFLAGS@ +LIBS=@LIBS@ + +####################################################################### +# If you're generating a package, you may want to use +# make DESTDIR=temporary_dir install +# to get the software installed to a directory where you can create +# a tdl.tar.gz from it +DESTDIR= + +####################################################################### + +prefix=$(DESTDIR)@prefix@ +bindir=$(DESTDIR)@bindir@ +mandir=$(DESTDIR)@mandir@ +man1dir=$(mandir)/man1 +man5dir=$(mandir)/man5 +infodir=$(DESTDIR)@infodir@ +docdir=$(DESTDIR)@docdir@ + +######################################################################### +# Things below this point shouldn't need to be edited. + +OBJ = mairix.o db.o rfc822.o tok.o hash.o dirscan.o writer.o \ + reader.o search.o stats.o dates.o datescan.o mbox.o md5.o \ + fromcheck.o glob.o dumper.o expandstr.o dotlock.o \ + nvp.o nvpscan.o + +all : mairix + +mairix : $(OBJ) + $(CC) -o mairix $(CFLAGS) $(LDFLAGS) $(OBJ) $(LIBS) + +%.o : %.c memmac.h mairix.h reader.h Makefile + $(CC) -c $(CFLAGS) $(CPPFLAGS) -o $@ $< + +datescan.c datescan.h : datescan.nfa ./dfasyn/dfasyn + ./dfasyn/dfasyn -o datescan.c -ho datescan.h -r datescan.report -u datescan.nfa + +fromcheck.c fromcheck.h : fromcheck.nfa ./dfasyn/dfasyn + ./dfasyn/dfasyn -o fromcheck.c -ho fromcheck.h -r fromcheck.report -u fromcheck.nfa + +nvpscan.c nvpscan.h : nvp.nfa ./dfasyn/dfasyn + ./dfasyn/dfasyn -o nvpscan.c -ho nvpscan.h -r nvpscan.report -u nvp.nfa + +dates.o : datescan.h +mbox.o : fromcheck.h +nvp.o : nvpscan.h + +version.h: + ./mkversion + +./dfasyn/dfasyn: + if [ -d dfasyn ]; then cd dfasyn ; $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)" ; else echo "No dfasyn subdirectory?" ; exit 1 ; fi + +clean: + -rm -f *~ *.o mairix *.s core + -rm -f mairix.cp mairix.fn mairix.aux mairix.log mairix.ky mairix.pg mairix.toc mairix.tp mairix.vr + -rm -f fromcheck.[ch] datescan.[ch] + -rm -f nvpscan.[ch] + if [ -d dfasyn ]; then cd dfasyn ; $(MAKE) clean ; fi + if [ -d test ]; then cd test ; $(MAKE) clean ; fi + +distclean: clean + if [ -d test ]; then cd test ; $(MAKE) distclean ; fi + -rm -f Makefile config.log + +install: + [ -d $(prefix) ] || mkdir -p $(prefix) + [ -d $(bindir) ] || mkdir -p $(bindir) + [ -d $(mandir) ] || mkdir -p $(mandir) + [ -d $(man1dir) ] || mkdir -p $(man1dir) + [ -d $(man5dir) ] || mkdir -p $(man5dir) + cp -f mairix $(bindir) + chmod 555 $(bindir)/mairix + cp -f mairix.1 $(man1dir) + chmod 444 $(man1dir)/mairix.1 + cp -f mairixrc.5 $(man5dir) + chmod 444 $(man5dir)/mairixrc.5 + +check: mairix + if [ -d test ]; then cd test ; $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)" check ; else echo "No test subdirectory?" ; exit 1 ; fi + +.PHONY : all install clean distclean check + +mairix.o : version.h + + diff --git a/src/mairix/NEWS b/src/mairix/NEWS @@ -0,0 +1,317 @@ +NEW IN VERSION 0.23 +=================== +* Allow '=' in message-id search for RFC2822 conformance +* Add the option -H to force hardlinks +* Skip .gitignore files +* Do not interpret special characters [~,/=^] in Message-ID queries +* Fix faultly mbox message separators +* Improve reporting of unparsed MIME headers & remove code duplication +* Allow empty sections in MIME headers +* Add support for uuencoded attachments +* Improve the parsing of MIME boundaries +* Fix SEGV if mbox shrinks +* Add test suite +* Fix building in parallel + +NEW IN VERSION 0.22 +=================== + +* Skip symlinks when using mbox (R A Lichtensteiger) +* Update copyright year info throughout +* Update ACKNOWLEDGEMENTS and copyright headers where more credit was due +* Update FSF address in file headers +* Update COPYING to latest gpl-2.0.txt +* Improve error message if home directory cannot be determined +* Honour HOME environment variable (Andreas Amann) +* MIME types are allowed to have "+" characters in them. (Jonathan Kamens) +* Fix deficiencies in the parsing of mbox From lines (Jonathan Kamens) +* Include the existing -x flag in the help message (Mark Hills) +* Fix documentation nits (Tom Doherty) +* Remove spurious message when the mtime of a message file has changed +* Do not export functions already exported through a callback structure. (Samuel Tardieu) +* Fix two manpages buglets. (Samuel Tardieu) +* When freeing a struct nvp, do not forget to free the struct nvp_entry. (Samuel Tardieu) +* Do not leak memory if duplicate fields are present. (Samuel Tardieu) +* Initialize the date header with a known value. (Samuel Tardieu) +* Merge two conflicting solutions for bad MIME encoding +* Fix segfault when last char is not a newline (Mika Fischer) +* fix for MIME-related crash (Paramjit Oberoi) +* Add support claws-mail (Anand Kumria) +* Add MH sub-type support for ezmlm-archives (Claus Alboege) +* Detect a trailing -f or -o with no following argument +* Allow lines starting "From" to occur part-way through the header.o +* Display message-ID in search -x mode +* Remove execute permission from source files +* Handle mbox from separators where email address is in angle brackets +* Fix a bug in rfc822.c: Some headers weren't correctly parsed. (Jaime Velasco Juan) + +NEW IN VERSION 0.21 +=================== + +* Fix make clean target in dfasyn/ (Benj. Mako Hill) +* Limit number of messages that are examined when an end boundary is missing in + an mbox (Chung-chieh Shan) +* Avoid examining . and .. when traversing MH folder hierarchy (Steven Lumos) +* Fix various bugs in the name/value parser +* Add some RFC2231 support to the name/value parser (continuations) +* Fix indexing when existing database only contains 1 message + +NEW IN VERSION 0.20 +=================== + +* Cache uncompressed mbox data (Chris Mason, further work by me) +* Fix gaps in date ranges for search +* Unlock database if mairix is interrupted (Paul Fox) +* Add fast index option (-F) +* Fix conditional compilation errors for compressed mbox +* Reimplement MIME header parsing +* Add capability to search on names of attachments +* Add capability to search on state of message flags +* Create maildir-format mfolder filenames correctly with regard to flags +* Various bug fixes (Oliver Braun, Matthias Teege) + + +NEW IN VERSION 0.19 +=================== +* mairix.spec fixes (André Costa) +* bug fix: freeing of message structures (Karsten Petersen) +* Add new -x (--excerpt-output) option, an alternative mode for searching. + This displays the key headers from the matching messages on stdout. +* Add notes about the mairix-users mailing list and the SourceForge page to + README. +* Fix configuration + compilation to allow building with gzip support but + without bzlib support. +* Rename internal functions like zopen() to avoid name conflicts on MacOS X. + (Vincent Lefevre) +* Remove a spurious ; in bison input file (Vincent Lefevre) +* Improve output given in various error conditions (based on patch by Karsten + Petersen) + +NEW IN VERSION 0.18 +=================== + +* Support bzip2'd mbox folders +* Fix bugs in parsing mbox folders containing unquoted 'From ' lines inside + MIME body parts +* Fix bug in parsing content-type data containing quotes with whitespace + before +* Clone the message flags (when both the source folder and mfolder are both + of maildir type) +* New manpages mairix.1 and mairixrc.5 are included, and the old texinfo-based + documentation is deprecated into the old_docs/ directory. +* Upgrade scanners to new version of dfasyn +* Support Mew's MH folder subtype + + +NEW IN VERSION 0.17.1 +===================== + +* Fix detection of MH folder subtype used by nnml (Gnus) +* Fix filename format generated in the /cur/ directory for maildir mfolders. +* Syntax fix in configure script + +NEW IN VERSION 0.17 +=================== + +* Support gzipped mbox folders (any file matched by a mbox= line in the config + file is considered as a gzipped mbox if its name ends in .gz) +* Rework directory traversal for the '...' construct to speed up indexing and + the check that mfolder isn't going to overwrite a real folder when searching. +* Check whether database exists before attempting to do searching. +* Matched new maildir messages go in /new/ subdirectory of maildir mfolder. +* Fix lots of compiler warnings generated by gcc4.x +* Don't create and immediately scrub database entries for empty mbox folders. +* Fix usage() info for bare word in searching +* Allow '.' on the ends of numeric filenames in MH folders (to work + with Evolution) +* Update .PHONY target so that 'make install' etc are more reliable. +* Add X-source-folder header to indicate the original folder of a match found + in an mbox. +* Migration to git for revision control. + +NEW IN VERSION 0.16.1 +===================== + +* Remove the lockfile if the program terminates for any reason. + +NEW IN VERSION 0.16 +=================== + +* Home directory (~) and environment variable ($foo / ${foo}) expansion in the + .mairixrc file +* Add -Q flag to skip database integrity checks during indexing (equivalently + the nochecks option in .mairixrc file). This speeds up indexing but loses + some robustness. +* Add ^ word prefix to require substring search to be left-anchored +* Split 'make clean' into separate clean and clean_docs +* Improve some error messages +* Add online help entries for -o and -d +* Don't write out the database if there are no changes found during indexing. +* Fix stale information about the 'and' and 'or' delimiters in the online help. +* Add the capability to omit particular folders from indexing (omit keyword in + .mairixrc file.) This allows broad wildcards to be used with selected + folders removed from the wildcard which is much more convenient in many + set-ups. +* Avoid writing matches to any folder on the list of folders to be indexed + (affects both mfolder option and argument of -o command line switch.) This + prevents disasterous loss of messages in the event of trying to overwrite an + wanted folder with the matches. +* Implement dot-locking on the database file to prevent corruption due to + concurrent updates. Add --unlock file to forcibly remove a stray lockfile. +* Display message path in warning messages from rfc822 parsing. + +NEW IN VERSION 0.15 +=================== + +* Migrate to GNU Arch for hosting the development archive +* In mbox parsing, handle return path in 'From ' line only being a local part + (reported by several people) +* Don't output number of matched messages in raw mode (to make output more + useful to scripts etc) (Samuel Tardieu) +* Fix vfolder->mfolder in dotmairixrc.eg (reported by several people) +* Handle spaces in multipart message boundary strings (Chung-chieh Shan) +* Be more tolerant of bad multipart message boundary separators (Chung-chieh + Shan) +* Add rudimentary database dump command (-d/--dump) +* Fix bug in handling of per-database hash key +* Improve standards-compliance of maildir output file names (Jeff King) +* Remove most compiler warnings + +NEW IN VERSION 0.14.1 +===================== + +* Bug fix : splitting of messages in mboxes was too strict regarding whitespace + +NEW IN VERSION 0.14 +=================== + +* Fix error in path (p:) searching for messages in mboxes. +* Improve usage() function + +NEW IN VERSION 0.13 +=================== + +* Fixes to support the mbox format used by Mozilla mail +* When creating vfolder directories for maildir/mh, remove existing + non-directory at the same path, if present. When creating mbox vfolder file, + complain if there's already a directory at the same path and exit. +* Switch from the term "virtual folder" to "match folder" +* Fix bug in path matches (p:) containing upper-case letters - previously they + matched on corresponding all lower-case paths. + +NEW IN VERSION 0.12 +=================== + +! Change in database file format - existing databases need to be destroyed and + recreated. + +* Indexing of mbox folders in addition to the existing maildir & MH support +* Output to mbox format vfolder +* Return exit status 1 if no messages are matched in search mode, and exit + status 2 for all error conditions. +* Allow wildcards to be used in specifying maildir and mh folder paths. +* Searching on messages having a particular Message-ID (m:msgid expression in + search mode). +* When indexing whole email addresses, '+' is now considered a valid character. +* Use ',' instead of '+' in search expressions, and '/' instead of ','. This + is to allow '+' to be used inside email addresses that are being searched + for. The '/' character is traditionally associated with meaning 'or', so it + made more sense to move ',' to mean 'and'. (Unfortunately, there were very + few metacharacters left which don't have some special meaning to shells, and + I wanted to avoid the need to quote or escape the search expressions.) +* Bug fix checking return status of mmap. +* Handle ">From " at the start of the message headers +* Handle mis-formatted encoding strings "7 bit" and "8 bit" +* Make every database use a random seed for the token hash function (to prevent + denial of service attacks against mairix through carefully crafted messages.) +* Rename some options in the mairixrc file, to put the folder formats on an + equal footing. +* Properly handle the case where a maildir vfolder exists but one or more of + the new,tmp,cur subdirectories is missing. +* Add configure script (not autoconf-based) + +NEW IN VERSION 0.11 +=================== + +* Detect failed malloc (out of memory) conditions properly and report it. +* Improved date specification syntax for d: option +* Allow vfolder to be an absolute path or relative to current directory, + instead of just relative to base directory. + +NEW IN VERSION 0.10 +=================== + +* Add 'raw' mode for searching. +* When purging, only print the pass[12] message in verbose mode +* Add an ACKNOWLEDGEMENTS file. +* Hack to handle missing NAME_MAX on various non-Linux systems +* Improve mairix.spec file for RPM building +* Change default value for prefix in Makefile to make it more standard. + +NEW IN VERSION 0.9 +================== + +* Fix problem with auditing headers if a uucp/mbox-style "from " header is + present at the start. +* Allow \: sequence in folder names to specify a : + +NEW IN VERSION 0.8 +================== + +* Fix bug : mairix used to crash if a message had corrupted RFC822 header lines + +NEW IN VERSION 0.7 +================== + +* Fix bug : mairix likely to crash if a non-existant folder is listed in the + conf file. +* Allow multiple folders and mh_folders lines in the conf file for people who + have many separate folders. +* Print an extra 'comfort' message in verbose mode before starting to scan the + directory tree. + +NEW IN VERSION 0.6 +================== + +* When an unrecognized encoding is found, ignore the body part instead of + aborting the run. + +NEW IN VERSION 0.5 +================== + +* When -a option is used for search, avoid symlinking the same message twice if + it matches more than one query. +* Fixes to rpm spec file. +* Fix handling of = in base64-encoded attachments. +* Support non POSIX locales. +* Support rfc2047 encoding in headers. +* Create vfolder if it doesn't already exist. +* Allow searching on complete email addresses as well as individual words in + to, cc and from fields. +* New -o option to allow vfolder name to be given on the command line. + +NEW IN VERSION 0.4 +================== + +* Support for MH folders +* Create database with mode 0600 instead of 0644 (better security). +* Add Makefile target to install whichever forms of the documentation have been + built. + +NEW IN VERSION 0.3 +================== + +* Various bug fixes + +NEW IN VERSION 0.2 +================== + +* Substrings of message paths can be used as search expressions (p:substring + option) +* = now used instead of / as the delimiter for number of errors in an + approximate match (to help with path search) +* Bug fix when using -t mode for search with unpurged dead messages still in + the database. + +================== +# vim:comments-=mb\:*:comments+=fb\:* diff --git a/src/mairix/README b/src/mairix/README @@ -0,0 +1,63 @@ +mairix is a program for indexing and searching email messages stored in +Maildir, MH or mbox folders. + +* Indexing is fast. It runs incrementally on new messages - any particular + message only gets scanned once in the lifetime of the index file. + +* The search mode populates a "virtual" folder with symlinks(*) which + point to the real messages. This folder can be opened as usual in your mail + program. + +* The search mode is very fast. + +* Indexing and searching works on the basis of words. The index file tabulates + which words occur in which parts (particular headers + body) of which + messages. + +The program is a very useful complement to mail programs like mutt +(http://www.mutt.org/, which supports Maildir, MH and mbox folders) and +Sylpheed (which supports MH folders). + +[(*) where the input or output folder is an mbox, a copy of the message is made +instead of symlinking.] + +See also the mairix.txt file. + +********************************************************************* + Copyright (C) Richard P. Curnow 2002-2004 + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +********************************************************************* + +Suggestions, bug reports, experiences, praise, complaints etc to the author +please, at <rc@rc0.org.uk> + +Since July 2006, there is a mairix-users mailing list. To subscribe or to view +the archives, visit + + https://lists.sourceforge.net/lists/listinfo/mairix-users + +The main website for mairix is + + http://www.rc0.org.uk/mairix + +The SourceForge project page is + + http://www.sf.net/projects/mairix + +ACKNOWLEDGEMENTS +================ + +See the ACKNOWLEDGEMENTS file diff --git a/src/mairix/config.log b/src/mairix/config.log @@ -0,0 +1,14 @@ +Test program is +#include <zlib.h> +int main () { + const char *foo; + foo = zlibVersion(); + return 0; +} +Test program is +#include <bzlib.h> +int main () { + const char *foo; + foo = BZ2_bzlibVersion(); + return 0; +} diff --git a/src/mairix/configure b/src/mairix/configure @@ -0,0 +1,337 @@ +#!/bin/sh +######################################################################### +# +# mairix - message index builder and finder for maildir folders. +# +# Copyright (C) Richard P. Curnow 2003,2004,2005 +# Copyright (C) Paramjit Oberoi 2005 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# ======================================================================= + +if [ -f config.log ]; then rm -f config.log ; fi +exec 5>config.log + +MYCC=${CC:-gcc} +MYCFLAGS=${CFLAGS:--O2 -Wall} +MYCPPFLAGS=${CPPFLAGS:-} +MYLDFLAGS=${LDFLAGS:-} + +# ======================================================================= +# Functions + +#{{{ cleanup +cleanup () { + if [ -f docheck.c ]; then rm -f docheck.c ; fi + if [ -f docheck.o ]; then rm -f docheck.o ; fi + if [ -f docheck ]; then rm -f docheck ; fi + rm -rf docheck.c docheck.o docheck +} +#}}} + +#{{{ test_cc : basic compiler sanity check +test_cc () { + printf "Testing whether your compiler \"$MYCC $MYCFLAGS\" works : " + cat >docheck.c <<EOF; +#include <stdio.h> +int main (int argc, char **argv) +{ + return 0; +} +EOF + ${MYCC} ${MYCFLAGS} -o docheck docheck.c 1>&5 2>&5 + if [ $? -eq 0 ] + then + printf "it works\n" + else + printf "it doesn't work\n" + printf "Failed program was\n" 1>&5 + cat docheck.c 1>&5 + rm -f docheck.c docheck + exit 1 + fi + cleanup +} +#}}} + +#{{{ test_for_stdint_h +test_for_stdint_h () { + cat >docheck.c <<EOF; +#include <stdint.h> +int main(int argc, char **argv) { + return 0; +} +EOF + + ${MYCC} ${MYCFLAGS} -c -o docheck.o docheck.c >/dev/null 2>&1 + if [ $? -eq 0 ] + then + result=0 + else + result=1 + fi + + rm -f docheck.c docheck.o + echo $result +} +#}}} +#{{{ test_for_inttypes_h +test_for_inttypes_h () { + cat >docheck.c <<EOF; +#include <inttypes.h> +int main(int argc, char **argv) { + return 0; +} +EOF + + ${MYCC} ${MYCFLAGS} -c -o docheck.o docheck.c >/dev/null 2>&1 + if [ $? -eq 0 ] + then + result=0 + else + result=1 + fi + + rm -f docheck.c docheck.o + echo $result +} +#}}} +#{{{ test_for_zlib +test_for_zlib () { + cat > docheck.c <<EOF; +#include <zlib.h> +int main () { + const char *foo; + foo = zlibVersion(); + return 0; +} +EOF + echo "Test program is" 1>&5 + cat docheck.c 1>&5 + ${MYCC} ${MYCPPFLAGS} ${MYCFLAGS} ${MYLDFLAGS} -o docheck docheck.c -lz 1>&5 2>&1 + if [ $? -eq 0 ] + then + result=0 + else + result=1 + fi + rm -f docheck.c docheck + echo $result +} +#}}} +#{{{ test_for_bzlib +test_for_bzlib () { + cat > docheck.c <<EOF; +#include <bzlib.h> +int main () { + const char *foo; + foo = BZ2_bzlibVersion(); + return 0; +} +EOF + echo "Test program is" 1>&5 + cat docheck.c 1>&5 + ${MYCC} ${MYCPPFLAGS} ${MYCFLAGS} ${MYLDFLAGS} -o docheck docheck.c -lbz2 1>&5 2>&1 + if [ $? -eq 0 ] + then + result=0 + else + result=1 + fi + rm -f docheck.c docheck + echo $result +} +#}}} +#{{{ usage +usage () { + cat <<EOF; +\`configure' configures tdl to adapt to many kinds of systems. + +Usage: ./configure [OPTION]... + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [/usr/local] + +By default, \`make install' will install all the files in +\`/usr/local/bin', \`/usr/local/lib' etc. You can specify +an installation prefix other than \`/usr/local' using \`--prefix', +for instance \`--prefix=$HOME'. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --infodir=DIR info documentation [PREFIX/info] + --mandir=DIR man documentation [PREFIX/man] + --docdir=DIR other documentation [PREFIX/doc/mairix-\$version] + +Other options: + --enable-gzip-mbox attempt to support gzipped mboxes (requires zlib) + --disable-gzip-mbox don't attempt to support gzipped mboxes + --enable-bzip-mbox attempt to support bzip2ed mboxes (requires bzlib) + --disable-bzip-mbox don't attempt to support bzip2ed mboxes + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + CPPFLAGS Extra C preprocessor flags, e.g. -I<include dir> if you + have header files in a nonstandard directory <include dir> + LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a + nonstandard directory <lib dir> + +Use these variables to override the choices made by \`configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to <rc@rc0.org.uk>. +EOF +} +#}}} +# ======================================================================= + +# Defaults for variables +PREFIX=/usr/local + +use_readline=yes +bad_options=no +use_gzip_mbox=yes +use_bzip_mbox=yes + +# Parse options to configure +for option +do + case "$option" in + + --prefix=* | --install-prefix=* ) + PREFIX=`echo $option | sed -e 's/[^=]*=//;'` + ;; + --bindir=* ) + BINDIR=`echo $option | sed -e 's/[^=]*=//;'` + ;; + --mandir=* ) + MANDIR=`echo $option | sed -e 's/[^=]*=//;'` + ;; + --infodir=* ) + INFODIR=`echo $option | sed -e 's/[^=]*=//;'` + ;; + --docdir=* ) + DOCDIR=`echo $option | sed -e 's/[^=]*=//;'` + ;; + --enable-gzip-mbox ) + use_gzip_mbox=yes + ;; + --disable-gzip-mbox ) + use_gzip_mbox=no + ;; + --enable-bzip-mbox ) + use_bzip_mbox=yes + ;; + --disable-bzip-mbox ) + use_bzip_mbox=no + ;; + -h | --help ) + usage + exit 1 + ;; + * ) + printf "Unrecognized option : $option\n" + bad_options=yes + ;; + esac +done + +if [ ${bad_options} = yes ]; then + exit 1 +fi + +DEFS="" +test_cc + +printf "Checking for <stdint.h> : " +if [ `test_for_stdint_h` -eq 0 ]; then + printf "Yes\n" + DEFS="${DEFS} -DHAS_STDINT_H" +else + printf "No\n" +fi + +printf "Checking for <inttypes.h> : " +if [ `test_for_inttypes_h` -eq 0 ]; then + printf "Yes\n" + DEFS="${DEFS} -DHAS_INTTYPES_H" +else + printf "No\n" +fi + +if [ $use_gzip_mbox = "yes" ]; then + printf "Checking for zlib : " + if [ `test_for_zlib` -eq 0 ]; then + printf "Yes\n"; + DEFS="${DEFS} -DUSE_GZIP_MBOX" + LIBS="-lz" + else + printf "No (disabled gzipped mbox support)\n"; + fi +fi + +if [ $use_bzip_mbox = "yes" ]; then + printf "Checking for bzlib : " + if [ `test_for_bzlib` -eq 0 ]; then + printf "Yes\n"; + DEFS="${DEFS} -DUSE_BZIP_MBOX" + LIBS="${LIBS} -lbz2" + else + printf "No (disabled bzip2ed mbox support)\n"; + fi +fi + +#{{{ Determine version number of the program. +if [ -f version.txt ]; then + revision=`cat version.txt` +else + revision="DEVELOPMENT" +fi + +#}}} +if [ "x" = "x${BINDIR}" ]; then BINDIR=${PREFIX}/bin ; fi +if [ "x" = "x${MANDIR}" ]; then MANDIR=${PREFIX}/man ; fi +if [ "x" = "x${INFODIR}" ]; then INFODIR=${PREFIX}/info ; fi +if [ "x" = "x${DOCDIR}" ]; then DOCDIR=${PREFIX}/doc/mairix-${revision} ; fi + +echo "Generating Makefile" + +rm -f Makefile +sed -e "s%@cc@%${MYCC}%; \ + s%@defs@%${DEFS}%; \ + s%@cflags@%${MYCFLAGS}%; \ + s%@prefix@%${PREFIX}%; \ + s%@bindir@%${BINDIR}%; \ + s%@mandir@%${MANDIR}%; \ + s%@infodir@%${INFODIR}%; \ + s%@docdir@%${DOCDIR}%; \ + s%@LIBS@%${LIBS}%; \ + s%@CPPFLAGS@%${MYCPPFLAGS}%; \ + s%@LDFLAGS@%${MYLDFLAGS}%; \ + " < Makefile.in > Makefile + +# Avoid editing Makefile instead of Makefile.in +chmod ugo-w Makefile + +# ======================================================================= +# vim:et:sw=2:ht=2:sts=2:fdm=marker:cms=#%s + diff --git a/src/mairix/dates.c b/src/mairix/dates.c @@ -0,0 +1,404 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002-2004,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include <stdio.h> +#include <string.h> +#include <time.h> +#include <ctype.h> +#include <assert.h> +#include "mairix.h" +#include "dates.h" +#include "datescan.h" + +static enum DATESCAN_TYPE discover_type(char *first, char *last)/*{{{*/ +{ + int current_state = 0; + int token; + char *p; + p = first; + while (p < last) { + token = datescan_char2tok[(int)*(unsigned char*)p]; + current_state = datescan_next_state(current_state, token); + if (current_state < 0) break; + p++; + } + + if (current_state < 0) { + return DS_FAILURE; + } else { + return datescan_attr[current_state]; + } +} +/*}}}*/ +static int match_month(char *p)/*{{{*/ +{ + if (!strncasecmp(p, "jan", 3)) return 1; + if (!strncasecmp(p, "feb", 3)) return 2; + if (!strncasecmp(p, "mar", 3)) return 3; + if (!strncasecmp(p, "apr", 3)) return 4; + if (!strncasecmp(p, "may", 3)) return 5; + if (!strncasecmp(p, "jun", 3)) return 6; + if (!strncasecmp(p, "jul", 3)) return 7; + if (!strncasecmp(p, "aug", 3)) return 8; + if (!strncasecmp(p, "sep", 3)) return 9; + if (!strncasecmp(p, "oct", 3)) return 10; + if (!strncasecmp(p, "nov", 3)) return 11; + if (!strncasecmp(p, "dec", 3)) return 12; + return 0; +} +/*}}}*/ +static int year_fix(int y)/*{{{*/ +{ + if (y>100) { + return y-1900; + } else if (y < 70) { + /* 2000-2069 */ + return y+100; + } else { + /* 1970-1999 */ + return y; + } +} +/*}}}*/ +static int last_day(int mon, int y) {/*{{{*/ + /* mon in [0,11], y=year-1900 */ + + static unsigned char days[12] = {31,28,31,30,31,30,31,31,30,31,30,31}; + if (mon != 1) { + return days[mon]; + } else { + /* Because 2000 was a leap year, we don't have to bother about the %100 + * rule, at least not in this range of dates. */ + if ((y % 4) == 0) { + return 29; + } else { + return 28; + } + } +} +/*}}}*/ +static void set_day(struct tm *x, int y)/*{{{*/ +{ + if (y > x->tm_mday) { + /* Shorthand for that day in previous month */ + if (x->tm_mon == 0) { + x->tm_mon = 11; + --x->tm_year; + } else { + --x->tm_mon; + } + } + x->tm_mday = y; /* Always */ +} +/*}}}*/ +static int is_later_dm(struct tm *x, int m, int d)/*{{{*/ +{ + int m1 = m-1; + return ((x->tm_mon < m1) || ((x->tm_mon == m1) && (x->tm_mday < d))); +} +/*}}}*/ +static int scan_date_expr(char *first, char *last, struct tm *start, struct tm *end)/*{{{*/ +{ + enum DATESCAN_TYPE type; + time_t now; + + time(&now); + type = discover_type(first, last); + + if (type == DS_SCALED) {/*{{{*/ + int v; + char *p; + time_t then; + + p = first; + v = 0; + while (isdigit(*p)) { + v = (v*10) + (*p - '0'); + p++; + } + switch(*p) { + case 'd': v *= 86400; break; + case 'w': v *= 7*86400; break; + case 'm': v *= 30*86400; break; + case 'y': v *= 365*86400; break; + default: + fprintf(stderr, "Unrecognized relative date scaling '%c'\n", *p); + return -1; + } + then = now - v; + if (start) { + *start = *localtime(&then); + } + if (end) { + *end = *localtime(&then); + }/*}}}*/ + } else if (type == DS_FAILURE) { + fputs("Cannot parse date expression [", stderr); + fwrite(first, sizeof(char), last-first, stderr); + fputs("]\n", stderr); + return -1; + } else { + /* something else */ + int v1, v3; + int m2; /* decoded month */ + char *p; + + v1 = v3 = m2 = 0; + p = first; + while (p < last && isdigit(*p)) { + v1 = (v1*10) + (*p - '0'); + p++; + } + if (p < last) { + m2 = match_month(p); + p += 3; + if (m2 == 0) { + return -1; /* failure */ + } + + } + while (p < last && isdigit(*p)) { + v3 = (v3*10) + (*p - '0'); + p++; + } + assert(p==last); /* should be true in all cases. */ + + switch (type) { + case DS_D:/*{{{*/ + if (start) set_day(start, v1); + if (end) set_day(end, v1); + break; +/*}}}*/ + case DS_Y:/*{{{*/ + if (start) { + start->tm_mday = 1; + start->tm_mon = 0; /* january */ + start->tm_year = year_fix(v1); + } + if (end) { + end->tm_mday = 31; + end->tm_mon = 11; + end->tm_year = year_fix(v1); + } + break; +/*}}}*/ + case DS_YYMMDD:/*{{{*/ + if (start) { + start->tm_mday = v1 % 100; + start->tm_mon = ((v1 / 100) % 100) - 1; + start->tm_year = year_fix(v1/10000); + } + if (end) { + end->tm_mday = v1 % 100; + end->tm_mon = ((v1 / 100) % 100) - 1; + end->tm_year = year_fix(v1/10000); + } + break; +/*}}}*/ + case DS_M:/*{{{*/ + if (start) { + if (m2-1 > start->tm_mon) --start->tm_year; /* shorthand for previous year */ + start->tm_mon = m2-1; + start->tm_mday = 1; + } + if (end) { + if (m2-1 > end->tm_mon) --end->tm_year; /* shorthand for previous year */ + end->tm_mon = m2-1; + end->tm_mday = last_day(m2-1, end->tm_year); + } + break; +/*}}}*/ + case DS_DM:/*{{{*/ + if (start) { + if (is_later_dm(start, m2, v1)) --start->tm_year; /* shorthand for previous year. */ + start->tm_mon = m2-1; + start->tm_mday = v1; + } + if (end) { + if (is_later_dm(end, m2, v1)) --end->tm_year; /* shorthand for previous year. */ + end->tm_mon = m2-1; + end->tm_mday = v1; + } + break; +/*}}}*/ + case DS_MD:/*{{{*/ + if (start) { + if (is_later_dm(start, m2, v3)) --start->tm_year; /* shorthand for previous year. */ + start->tm_mon = m2-1; + start->tm_mday = v3; + } + if (end) { + if (is_later_dm(end, m2, v3)) --end->tm_year; /* shorthand for previous year. */ + end->tm_mon = m2-1; + end->tm_mday = v3; + } + break; +/*}}}*/ + case DS_DMY:/*{{{*/ + if (start) { + start->tm_mon = m2-1; + start->tm_mday = v1; + start->tm_year = year_fix(v3); + } + if (end) { + end->tm_mon = m2-1; + end->tm_mday = v1; + end->tm_year = year_fix(v3); + } + break; +/*}}}*/ + case DS_YMD:/*{{{*/ + if (start) { + start->tm_mon = m2-1; + start->tm_mday = v3; + start->tm_year = year_fix(v1); + } + if (end) { + end->tm_mon = m2-1; + end->tm_mday = v3; + end->tm_year = year_fix(v1); + } + break; +/*}}}*/ + case DS_MY:/*{{{*/ + if (start) { + start->tm_year = year_fix(v3); + start->tm_mon = m2 - 1; + start->tm_mday = 1; + } + if (end) { + end->tm_year = year_fix(v3); + end->tm_mon = m2 - 1; + end->tm_mday = last_day(end->tm_mon, end->tm_year); + } + break; +/*}}}*/ + case DS_YM:/*{{{*/ + if (start) { + start->tm_year = year_fix(v1); + start->tm_mon = m2 - 1; + start->tm_mday = 1; + } + if (end) { + end->tm_year = year_fix(v1); + end->tm_mon = m2 - 1; + end->tm_mday = last_day(end->tm_mon, end->tm_year); + } + break;/*}}}*/ + case DS_FAILURE: + return -1; + break; + + case DS_SCALED: + assert(0); + break; + + } + } + return 0; +} +/*}}}*/ + +int scan_date_string(char *in, time_t *start, int *has_start, time_t *end, int *has_end)/*{{{*/ +{ + char *hyphen; + time_t now; + struct tm start_tm, end_tm; + char *nullchar; + int status; + + *has_start = *has_end = 0; + + nullchar = in; + while (*nullchar) nullchar++; + + time(&now); + start_tm = end_tm = *localtime(&now); + start_tm.tm_hour = 0; + start_tm.tm_min = 0; + start_tm.tm_sec = 0; + end_tm.tm_hour = 23; + end_tm.tm_min = 59; + end_tm.tm_sec = 59; + + hyphen = strchr(in, '-'); + if (!hyphen) { + /* Start and end are the same. */ + *has_start = *has_end = 1; + status = scan_date_expr(in, nullchar, &start_tm, &end_tm); + if (status) return status; + *start = mktime(&start_tm); + *end = mktime(&end_tm); + return 0; + } else { + if (hyphen+1 < nullchar) { + *has_end = 1; + status = scan_date_expr(hyphen+1, nullchar, NULL, &end_tm); + if (status) return status; + *end = mktime(&end_tm); + start_tm = end_tm; + } + if (hyphen > in) { + *has_start = 1; + status = scan_date_expr(in, hyphen, &start_tm, NULL); + if (status) return status; + *start = mktime(&start_tm); + } + } + return 0; +} +/*}}}*/ + +#ifdef TEST +static void check(char *in)/*{{{*/ +{ + struct tm start, end; + int result; + result = scan_date_string(in, &start, &end); + if (result) printf("Conversion for <%s> failed\n", in); + else { + char buf1[128], buf2[128]; + strftime(buf1, 128, "%d-%b-%Y", &start); + strftime(buf2, 128, "%d-%b-%Y", &end); + printf("Computed range for <%s> : %s - %s\n", in, buf1, buf2); + } + +} +/*}}}*/ +int main (int argc, char **argv)/*{{{*/ +{ + + check("2w-1w"); + check("4m-1w"); + check("2002-2003"); + check("may2002-2003"); + check("2002may-2003"); + check("feb98-15may99"); + check("feb98-15may1999"); + check("2feb98-1y"); + check("02feb98-1y"); + check("970617-20010618"); + + return 0; +} +/*}}}*/ +#endif diff --git a/src/mairix/dates.h b/src/mairix/dates.h @@ -0,0 +1,45 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002-2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#ifndef DATES_H +#define DATES_H + +enum DATESCAN_TYPE { + DS_FAILURE, + DS_D, + DS_Y, + DS_YYMMDD, + DS_SCALED, + DS_M, + DS_DM, + DS_MD, + DS_YM, + DS_MY, + DS_YMD, + DS_DMY, +}; + +extern int datescan_next_state(int current_state, int next_token); +extern enum DATESCAN_TYPE datescan_exitval[]; + + +#endif /* DATES_H */ diff --git a/src/mairix/datescan.nfa b/src/mairix/datescan.nfa @@ -0,0 +1,112 @@ +######################################################################### +# +# mairix - message index builder and finder for maildir folders. +# +# Copyright (C) Richard P. Curnow 2002-2004,2006 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# ======================================================================= + +# NFA description for parsing dates + +# Stuff to pass through verbatim +%{ +#include "dates.h" +%} + +Abbrev A = [a-zA-Z] + +BLOCK day { + State in + [12] ; [0-9] -> out + [3] ; [01] -> out +} + +# Match 2 digit year +BLOCK year { + State in + [04-9] ; [0-9] -> out + [3] ; [2-9] -> out +} + +BLOCK month { + State in + A ; A ; A -> out +} + +BLOCK scaled { + State in + [0-9] -> in, after_value + + State after_value + A -> out +} + +BLOCK ccyy { + State in + [1-9] ; [0-9] ; [0-9] ; [0-9] -> out +} + +BLOCK main { + State in + [1-9] = DS_D + <day:in->out> = DS_D + <year:in->out> = DS_Y + <ccyy:in->out> = DS_Y + [0-9] ; [0-9] ; [0-9] ; [0-9] ; [0-9] ; [0-9] = DS_YYMMDD + [0-9] ; [0-9] ; [0-9] ; [0-9] ; + [0-9] ; [0-9] ; [0-9] ; [0-9] = DS_YYMMDD + <scaled:in->out> = DS_SCALED + <month:in->out> = DS_M + [1-9] ; <month:in->out> = DS_DM + <day:in->out> ; <month:in->out> = DS_DM + <month:in->out> ; [1-9] = DS_MD + <month:in->out> ; <day:in->out> = DS_MD + <year:in->out> ; <month:in->out> = DS_YM + <month:in->out> ; <year:in->out> = DS_MY + <ccyy:in->out> ; <month:in->out> = DS_YM + <month:in->out> ; <ccyy:in->out> = DS_MY + + <year:in->out> ; <month:in->out> ; [1-9] = DS_YMD + <year:in->out> ; <month:in->out> ; <day:in->out> = DS_YMD + [1-9] ; <month:in->out> ; <year:in->out> = DS_DMY + <day:in->out> ; <month:in->out> ; <year:in->out> = DS_DMY + + <ccyy:in->out> ; <month:in->out> ; [1-9] = DS_YMD + <ccyy:in->out> ; <month:in->out> ; <day:in->out> = DS_YMD + [1-9] ; <month:in->out> ; <ccyy:in->out> = DS_DMY + <day:in->out> ; <month:in->out> ; <ccyy:in->out> = DS_DMY +} + +ATTR DS_D +ATTR DS_Y +ATTR DS_YYMMDD +ATTR DS_SCALED +ATTR DS_M +ATTR DS_DM +ATTR DS_MD +ATTR DS_YM +ATTR DS_MY +ATTR DS_YMD +ATTR DS_DMY + +DEFATTR DS_FAILURE + +TYPE "enum DATESCAN_TYPE" +PREFIX datescan + + +# vim:ft=txt:et:sw=4:sts=4:ht=4 + diff --git a/src/mairix/datescan.report b/src/mairix/datescan.report @@ -0,0 +1,3303 @@ +NFA state 0 = in + [(epsilon)] -> day#37.in + 1:[1] -> #55 + 4:[4-9] -> #55 + 3:[3] -> #55 + 2:[2] -> #55 + [(epsilon)] -> ccyy#32.in + [(epsilon)] -> ccyy#30.in + [(epsilon)] -> day#27.in + 1:[1] -> #43 + 4:[4-9] -> #43 + 3:[3] -> #43 + 2:[2] -> #43 + [(epsilon)] -> year#22.in + [(epsilon)] -> year#20.in + [(epsilon)] -> month#18.in + [(epsilon)] -> ccyy#16.in + [(epsilon)] -> month#14.in + [(epsilon)] -> year#12.in + [(epsilon)] -> month#10.in + [(epsilon)] -> month#9.in + [(epsilon)] -> day#7.in + 1:[1] -> #21 + 4:[4-9] -> #21 + 3:[3] -> #21 + 2:[2] -> #21 + [(epsilon)] -> month#5.in + [(epsilon)] -> scaled#4.in + 0:[0] -> #11 + 4:[4-9] -> #11 + 3:[3] -> #11 + 2:[2] -> #11 + 1:[1] -> #11 + 0:[0] -> #5 + 4:[4-9] -> #5 + 3:[3] -> #5 + 2:[2] -> #5 + 1:[1] -> #5 + [(epsilon)] -> ccyy#3.in + [(epsilon)] -> year#2.in + [(epsilon)] -> day#1.in + 1:[1] -> #1 + 4:[4-9] -> #1 + 3:[3] -> #1 + 2:[2] -> #1 + Epsilon closure : + (self) + day#1.in + year#2.in + ccyy#3.in + scaled#4.in + month#5.in + day#7.in + month#9.in + month#10.in + year#12.in + month#14.in + ccyy#16.in + month#18.in + year#20.in + year#22.in + day#27.in + ccyy#30.in + ccyy#32.in + day#37.in + +NFA state 1 = #1 + Tags : DS_D + Epsilon closure : + (self) + +NFA state 2 = #2 + Tags : DS_D + Epsilon closure : + (self) + +NFA state 3 = day#1.in + 1:[1] -> day#1.#1 + 2:[2] -> day#1.#1 + 3:[3] -> day#1.#2 + Epsilon closure : + (self) + +NFA state 4 = day#1.#1 + 0:[0] -> day#1.out + 4:[4-9] -> day#1.out + 3:[3] -> day#1.out + 2:[2] -> day#1.out + 1:[1] -> day#1.out + Epsilon closure : + (self) + +NFA state 5 = day#1.#2 + 0:[0] -> day#1.out + 1:[1] -> day#1.out + Epsilon closure : + (self) + +NFA state 6 = day#1.out + [(epsilon)] -> #2 + Epsilon closure : + (self) + #2 + +NFA state 7 = #3 + Tags : DS_Y + Epsilon closure : + (self) + +NFA state 8 = year#2.in + 0:[0] -> year#2.#1 + 4:[4-9] -> year#2.#1 + 3:[3] -> year#2.#2 + Epsilon closure : + (self) + +NFA state 9 = year#2.#1 + 0:[0] -> year#2.out + 4:[4-9] -> year#2.out + 3:[3] -> year#2.out + 2:[2] -> year#2.out + 1:[1] -> year#2.out + Epsilon closure : + (self) + +NFA state 10 = year#2.#2 + 2:[2] -> year#2.out + 4:[4-9] -> year#2.out + 3:[3] -> year#2.out + Epsilon closure : + (self) + +NFA state 11 = year#2.out + [(epsilon)] -> #3 + Epsilon closure : + (self) + #3 + +NFA state 12 = #4 + Tags : DS_Y + Epsilon closure : + (self) + +NFA state 13 = ccyy#3.in + 1:[1] -> ccyy#3.#1 + 4:[4-9] -> ccyy#3.#1 + 3:[3] -> ccyy#3.#1 + 2:[2] -> ccyy#3.#1 + Epsilon closure : + (self) + +NFA state 14 = ccyy#3.#1 + 0:[0] -> ccyy#3.#2 + 4:[4-9] -> ccyy#3.#2 + 3:[3] -> ccyy#3.#2 + 2:[2] -> ccyy#3.#2 + 1:[1] -> ccyy#3.#2 + Epsilon closure : + (self) + +NFA state 15 = ccyy#3.#2 + 0:[0] -> ccyy#3.#3 + 4:[4-9] -> ccyy#3.#3 + 3:[3] -> ccyy#3.#3 + 2:[2] -> ccyy#3.#3 + 1:[1] -> ccyy#3.#3 + Epsilon closure : + (self) + +NFA state 16 = ccyy#3.#3 + 0:[0] -> ccyy#3.out + 4:[4-9] -> ccyy#3.out + 3:[3] -> ccyy#3.out + 2:[2] -> ccyy#3.out + 1:[1] -> ccyy#3.out + Epsilon closure : + (self) + +NFA state 17 = ccyy#3.out + [(epsilon)] -> #4 + Epsilon closure : + (self) + #4 + +NFA state 18 = #5 + 0:[0] -> #6 + 4:[4-9] -> #6 + 3:[3] -> #6 + 2:[2] -> #6 + 1:[1] -> #6 + Epsilon closure : + (self) + +NFA state 19 = #6 + 0:[0] -> #7 + 4:[4-9] -> #7 + 3:[3] -> #7 + 2:[2] -> #7 + 1:[1] -> #7 + Epsilon closure : + (self) + +NFA state 20 = #7 + 0:[0] -> #8 + 4:[4-9] -> #8 + 3:[3] -> #8 + 2:[2] -> #8 + 1:[1] -> #8 + Epsilon closure : + (self) + +NFA state 21 = #8 + 0:[0] -> #9 + 4:[4-9] -> #9 + 3:[3] -> #9 + 2:[2] -> #9 + 1:[1] -> #9 + Epsilon closure : + (self) + +NFA state 22 = #9 + 0:[0] -> #10 + 4:[4-9] -> #10 + 3:[3] -> #10 + 2:[2] -> #10 + 1:[1] -> #10 + Epsilon closure : + (self) + +NFA state 23 = #10 + Tags : DS_YYMMDD + Epsilon closure : + (self) + +NFA state 24 = #11 + 0:[0] -> #12 + 4:[4-9] -> #12 + 3:[3] -> #12 + 2:[2] -> #12 + 1:[1] -> #12 + Epsilon closure : + (self) + +NFA state 25 = #12 + 0:[0] -> #13 + 4:[4-9] -> #13 + 3:[3] -> #13 + 2:[2] -> #13 + 1:[1] -> #13 + Epsilon closure : + (self) + +NFA state 26 = #13 + 0:[0] -> #14 + 4:[4-9] -> #14 + 3:[3] -> #14 + 2:[2] -> #14 + 1:[1] -> #14 + Epsilon closure : + (self) + +NFA state 27 = #14 + 0:[0] -> #15 + 4:[4-9] -> #15 + 3:[3] -> #15 + 2:[2] -> #15 + 1:[1] -> #15 + Epsilon closure : + (self) + +NFA state 28 = #15 + 0:[0] -> #16 + 4:[4-9] -> #16 + 3:[3] -> #16 + 2:[2] -> #16 + 1:[1] -> #16 + Epsilon closure : + (self) + +NFA state 29 = #16 + 0:[0] -> #17 + 4:[4-9] -> #17 + 3:[3] -> #17 + 2:[2] -> #17 + 1:[1] -> #17 + Epsilon closure : + (self) + +NFA state 30 = #17 + 0:[0] -> #18 + 4:[4-9] -> #18 + 3:[3] -> #18 + 2:[2] -> #18 + 1:[1] -> #18 + Epsilon closure : + (self) + +NFA state 31 = #18 + Tags : DS_YYMMDD + Epsilon closure : + (self) + +NFA state 32 = #19 + Tags : DS_SCALED + Epsilon closure : + (self) + +NFA state 33 = scaled#4.in + 0:[0] -> scaled#4.in + 4:[4-9] -> scaled#4.in + 3:[3] -> scaled#4.in + 2:[2] -> scaled#4.in + 1:[1] -> scaled#4.in + 0:[0] -> scaled#4.after_value + 4:[4-9] -> scaled#4.after_value + 3:[3] -> scaled#4.after_value + 2:[2] -> scaled#4.after_value + 1:[1] -> scaled#4.after_value + Epsilon closure : + (self) + +NFA state 34 = scaled#4.after_value + 5:[A-Za-z] -> scaled#4.out + Epsilon closure : + (self) + +NFA state 35 = scaled#4.out + [(epsilon)] -> #19 + Epsilon closure : + (self) + #19 + +NFA state 36 = #20 + Tags : DS_M + Epsilon closure : + (self) + +NFA state 37 = month#5.in + 5:[A-Za-z] -> month#5.#1 + Epsilon closure : + (self) + +NFA state 38 = month#5.#1 + 5:[A-Za-z] -> month#5.#2 + Epsilon closure : + (self) + +NFA state 39 = month#5.#2 + 5:[A-Za-z] -> month#5.out + Epsilon closure : + (self) + +NFA state 40 = month#5.out + [(epsilon)] -> #20 + Epsilon closure : + (self) + #20 + +NFA state 41 = #21 + [(epsilon)] -> month#6.in + Epsilon closure : + (self) + month#6.in + +NFA state 42 = #22 + Tags : DS_DM + Epsilon closure : + (self) + +NFA state 43 = month#6.in + 5:[A-Za-z] -> month#6.#1 + Epsilon closure : + (self) + +NFA state 44 = month#6.#1 + 5:[A-Za-z] -> month#6.#2 + Epsilon closure : + (self) + +NFA state 45 = month#6.#2 + 5:[A-Za-z] -> month#6.out + Epsilon closure : + (self) + +NFA state 46 = month#6.out + [(epsilon)] -> #22 + Epsilon closure : + (self) + #22 + +NFA state 47 = #23 + [(epsilon)] -> month#8.in + Epsilon closure : + (self) + month#8.in + +NFA state 48 = day#7.in + 1:[1] -> day#7.#1 + 2:[2] -> day#7.#1 + 3:[3] -> day#7.#2 + Epsilon closure : + (self) + +NFA state 49 = day#7.#1 + 0:[0] -> day#7.out + 4:[4-9] -> day#7.out + 3:[3] -> day#7.out + 2:[2] -> day#7.out + 1:[1] -> day#7.out + Epsilon closure : + (self) + +NFA state 50 = day#7.#2 + 0:[0] -> day#7.out + 1:[1] -> day#7.out + Epsilon closure : + (self) + +NFA state 51 = day#7.out + [(epsilon)] -> #23 + Epsilon closure : + (self) + #23 + month#8.in + +NFA state 52 = #24 + Tags : DS_DM + Epsilon closure : + (self) + +NFA state 53 = month#8.in + 5:[A-Za-z] -> month#8.#1 + Epsilon closure : + (self) + +NFA state 54 = month#8.#1 + 5:[A-Za-z] -> month#8.#2 + Epsilon closure : + (self) + +NFA state 55 = month#8.#2 + 5:[A-Za-z] -> month#8.out + Epsilon closure : + (self) + +NFA state 56 = month#8.out + [(epsilon)] -> #24 + Epsilon closure : + (self) + #24 + +NFA state 57 = #25 + 1:[1] -> #26 + 4:[4-9] -> #26 + 3:[3] -> #26 + 2:[2] -> #26 + Epsilon closure : + (self) + +NFA state 58 = month#9.in + 5:[A-Za-z] -> month#9.#1 + Epsilon closure : + (self) + +NFA state 59 = month#9.#1 + 5:[A-Za-z] -> month#9.#2 + Epsilon closure : + (self) + +NFA state 60 = month#9.#2 + 5:[A-Za-z] -> month#9.out + Epsilon closure : + (self) + +NFA state 61 = month#9.out + [(epsilon)] -> #25 + Epsilon closure : + (self) + #25 + +NFA state 62 = #26 + Tags : DS_MD + Epsilon closure : + (self) + +NFA state 63 = #27 + [(epsilon)] -> day#11.in + Epsilon closure : + (self) + day#11.in + +NFA state 64 = month#10.in + 5:[A-Za-z] -> month#10.#1 + Epsilon closure : + (self) + +NFA state 65 = month#10.#1 + 5:[A-Za-z] -> month#10.#2 + Epsilon closure : + (self) + +NFA state 66 = month#10.#2 + 5:[A-Za-z] -> month#10.out + Epsilon closure : + (self) + +NFA state 67 = month#10.out + [(epsilon)] -> #27 + Epsilon closure : + (self) + #27 + day#11.in + +NFA state 68 = #28 + Tags : DS_MD + Epsilon closure : + (self) + +NFA state 69 = day#11.in + 1:[1] -> day#11.#1 + 2:[2] -> day#11.#1 + 3:[3] -> day#11.#2 + Epsilon closure : + (self) + +NFA state 70 = day#11.#1 + 0:[0] -> day#11.out + 4:[4-9] -> day#11.out + 3:[3] -> day#11.out + 2:[2] -> day#11.out + 1:[1] -> day#11.out + Epsilon closure : + (self) + +NFA state 71 = day#11.#2 + 0:[0] -> day#11.out + 1:[1] -> day#11.out + Epsilon closure : + (self) + +NFA state 72 = day#11.out + [(epsilon)] -> #28 + Epsilon closure : + (self) + #28 + +NFA state 73 = #29 + [(epsilon)] -> month#13.in + Epsilon closure : + (self) + month#13.in + +NFA state 74 = year#12.in + 0:[0] -> year#12.#1 + 4:[4-9] -> year#12.#1 + 3:[3] -> year#12.#2 + Epsilon closure : + (self) + +NFA state 75 = year#12.#1 + 0:[0] -> year#12.out + 4:[4-9] -> year#12.out + 3:[3] -> year#12.out + 2:[2] -> year#12.out + 1:[1] -> year#12.out + Epsilon closure : + (self) + +NFA state 76 = year#12.#2 + 2:[2] -> year#12.out + 4:[4-9] -> year#12.out + 3:[3] -> year#12.out + Epsilon closure : + (self) + +NFA state 77 = year#12.out + [(epsilon)] -> #29 + Epsilon closure : + (self) + #29 + month#13.in + +NFA state 78 = #30 + Tags : DS_YM + Epsilon closure : + (self) + +NFA state 79 = month#13.in + 5:[A-Za-z] -> month#13.#1 + Epsilon closure : + (self) + +NFA state 80 = month#13.#1 + 5:[A-Za-z] -> month#13.#2 + Epsilon closure : + (self) + +NFA state 81 = month#13.#2 + 5:[A-Za-z] -> month#13.out + Epsilon closure : + (self) + +NFA state 82 = month#13.out + [(epsilon)] -> #30 + Epsilon closure : + (self) + #30 + +NFA state 83 = #31 + [(epsilon)] -> year#15.in + Epsilon closure : + (self) + year#15.in + +NFA state 84 = month#14.in + 5:[A-Za-z] -> month#14.#1 + Epsilon closure : + (self) + +NFA state 85 = month#14.#1 + 5:[A-Za-z] -> month#14.#2 + Epsilon closure : + (self) + +NFA state 86 = month#14.#2 + 5:[A-Za-z] -> month#14.out + Epsilon closure : + (self) + +NFA state 87 = month#14.out + [(epsilon)] -> #31 + Epsilon closure : + (self) + #31 + year#15.in + +NFA state 88 = #32 + Tags : DS_MY + Epsilon closure : + (self) + +NFA state 89 = year#15.in + 0:[0] -> year#15.#1 + 4:[4-9] -> year#15.#1 + 3:[3] -> year#15.#2 + Epsilon closure : + (self) + +NFA state 90 = year#15.#1 + 0:[0] -> year#15.out + 4:[4-9] -> year#15.out + 3:[3] -> year#15.out + 2:[2] -> year#15.out + 1:[1] -> year#15.out + Epsilon closure : + (self) + +NFA state 91 = year#15.#2 + 2:[2] -> year#15.out + 4:[4-9] -> year#15.out + 3:[3] -> year#15.out + Epsilon closure : + (self) + +NFA state 92 = year#15.out + [(epsilon)] -> #32 + Epsilon closure : + (self) + #32 + +NFA state 93 = #33 + [(epsilon)] -> month#17.in + Epsilon closure : + (self) + month#17.in + +NFA state 94 = ccyy#16.in + 1:[1] -> ccyy#16.#1 + 4:[4-9] -> ccyy#16.#1 + 3:[3] -> ccyy#16.#1 + 2:[2] -> ccyy#16.#1 + Epsilon closure : + (self) + +NFA state 95 = ccyy#16.#1 + 0:[0] -> ccyy#16.#2 + 4:[4-9] -> ccyy#16.#2 + 3:[3] -> ccyy#16.#2 + 2:[2] -> ccyy#16.#2 + 1:[1] -> ccyy#16.#2 + Epsilon closure : + (self) + +NFA state 96 = ccyy#16.#2 + 0:[0] -> ccyy#16.#3 + 4:[4-9] -> ccyy#16.#3 + 3:[3] -> ccyy#16.#3 + 2:[2] -> ccyy#16.#3 + 1:[1] -> ccyy#16.#3 + Epsilon closure : + (self) + +NFA state 97 = ccyy#16.#3 + 0:[0] -> ccyy#16.out + 4:[4-9] -> ccyy#16.out + 3:[3] -> ccyy#16.out + 2:[2] -> ccyy#16.out + 1:[1] -> ccyy#16.out + Epsilon closure : + (self) + +NFA state 98 = ccyy#16.out + [(epsilon)] -> #33 + Epsilon closure : + (self) + #33 + month#17.in + +NFA state 99 = #34 + Tags : DS_YM + Epsilon closure : + (self) + +NFA state 100 = month#17.in + 5:[A-Za-z] -> month#17.#1 + Epsilon closure : + (self) + +NFA state 101 = month#17.#1 + 5:[A-Za-z] -> month#17.#2 + Epsilon closure : + (self) + +NFA state 102 = month#17.#2 + 5:[A-Za-z] -> month#17.out + Epsilon closure : + (self) + +NFA state 103 = month#17.out + [(epsilon)] -> #34 + Epsilon closure : + (self) + #34 + +NFA state 104 = #35 + [(epsilon)] -> ccyy#19.in + Epsilon closure : + (self) + ccyy#19.in + +NFA state 105 = month#18.in + 5:[A-Za-z] -> month#18.#1 + Epsilon closure : + (self) + +NFA state 106 = month#18.#1 + 5:[A-Za-z] -> month#18.#2 + Epsilon closure : + (self) + +NFA state 107 = month#18.#2 + 5:[A-Za-z] -> month#18.out + Epsilon closure : + (self) + +NFA state 108 = month#18.out + [(epsilon)] -> #35 + Epsilon closure : + (self) + #35 + ccyy#19.in + +NFA state 109 = #36 + Tags : DS_MY + Epsilon closure : + (self) + +NFA state 110 = ccyy#19.in + 1:[1] -> ccyy#19.#1 + 4:[4-9] -> ccyy#19.#1 + 3:[3] -> ccyy#19.#1 + 2:[2] -> ccyy#19.#1 + Epsilon closure : + (self) + +NFA state 111 = ccyy#19.#1 + 0:[0] -> ccyy#19.#2 + 4:[4-9] -> ccyy#19.#2 + 3:[3] -> ccyy#19.#2 + 2:[2] -> ccyy#19.#2 + 1:[1] -> ccyy#19.#2 + Epsilon closure : + (self) + +NFA state 112 = ccyy#19.#2 + 0:[0] -> ccyy#19.#3 + 4:[4-9] -> ccyy#19.#3 + 3:[3] -> ccyy#19.#3 + 2:[2] -> ccyy#19.#3 + 1:[1] -> ccyy#19.#3 + Epsilon closure : + (self) + +NFA state 113 = ccyy#19.#3 + 0:[0] -> ccyy#19.out + 4:[4-9] -> ccyy#19.out + 3:[3] -> ccyy#19.out + 2:[2] -> ccyy#19.out + 1:[1] -> ccyy#19.out + Epsilon closure : + (self) + +NFA state 114 = ccyy#19.out + [(epsilon)] -> #36 + Epsilon closure : + (self) + #36 + +NFA state 115 = #37 + [(epsilon)] -> month#21.in + Epsilon closure : + (self) + month#21.in + +NFA state 116 = year#20.in + 0:[0] -> year#20.#1 + 4:[4-9] -> year#20.#1 + 3:[3] -> year#20.#2 + Epsilon closure : + (self) + +NFA state 117 = year#20.#1 + 0:[0] -> year#20.out + 4:[4-9] -> year#20.out + 3:[3] -> year#20.out + 2:[2] -> year#20.out + 1:[1] -> year#20.out + Epsilon closure : + (self) + +NFA state 118 = year#20.#2 + 2:[2] -> year#20.out + 4:[4-9] -> year#20.out + 3:[3] -> year#20.out + Epsilon closure : + (self) + +NFA state 119 = year#20.out + [(epsilon)] -> #37 + Epsilon closure : + (self) + #37 + month#21.in + +NFA state 120 = #38 + 1:[1] -> #39 + 4:[4-9] -> #39 + 3:[3] -> #39 + 2:[2] -> #39 + Epsilon closure : + (self) + +NFA state 121 = month#21.in + 5:[A-Za-z] -> month#21.#1 + Epsilon closure : + (self) + +NFA state 122 = month#21.#1 + 5:[A-Za-z] -> month#21.#2 + Epsilon closure : + (self) + +NFA state 123 = month#21.#2 + 5:[A-Za-z] -> month#21.out + Epsilon closure : + (self) + +NFA state 124 = month#21.out + [(epsilon)] -> #38 + Epsilon closure : + (self) + #38 + +NFA state 125 = #39 + Tags : DS_YMD + Epsilon closure : + (self) + +NFA state 126 = #40 + [(epsilon)] -> month#23.in + Epsilon closure : + (self) + month#23.in + +NFA state 127 = year#22.in + 0:[0] -> year#22.#1 + 4:[4-9] -> year#22.#1 + 3:[3] -> year#22.#2 + Epsilon closure : + (self) + +NFA state 128 = year#22.#1 + 0:[0] -> year#22.out + 4:[4-9] -> year#22.out + 3:[3] -> year#22.out + 2:[2] -> year#22.out + 1:[1] -> year#22.out + Epsilon closure : + (self) + +NFA state 129 = year#22.#2 + 2:[2] -> year#22.out + 4:[4-9] -> year#22.out + 3:[3] -> year#22.out + Epsilon closure : + (self) + +NFA state 130 = year#22.out + [(epsilon)] -> #40 + Epsilon closure : + (self) + #40 + month#23.in + +NFA state 131 = #41 + [(epsilon)] -> day#24.in + Epsilon closure : + (self) + day#24.in + +NFA state 132 = month#23.in + 5:[A-Za-z] -> month#23.#1 + Epsilon closure : + (self) + +NFA state 133 = month#23.#1 + 5:[A-Za-z] -> month#23.#2 + Epsilon closure : + (self) + +NFA state 134 = month#23.#2 + 5:[A-Za-z] -> month#23.out + Epsilon closure : + (self) + +NFA state 135 = month#23.out + [(epsilon)] -> #41 + Epsilon closure : + (self) + #41 + day#24.in + +NFA state 136 = #42 + Tags : DS_YMD + Epsilon closure : + (self) + +NFA state 137 = day#24.in + 1:[1] -> day#24.#1 + 2:[2] -> day#24.#1 + 3:[3] -> day#24.#2 + Epsilon closure : + (self) + +NFA state 138 = day#24.#1 + 0:[0] -> day#24.out + 4:[4-9] -> day#24.out + 3:[3] -> day#24.out + 2:[2] -> day#24.out + 1:[1] -> day#24.out + Epsilon closure : + (self) + +NFA state 139 = day#24.#2 + 0:[0] -> day#24.out + 1:[1] -> day#24.out + Epsilon closure : + (self) + +NFA state 140 = day#24.out + [(epsilon)] -> #42 + Epsilon closure : + (self) + #42 + +NFA state 141 = #43 + [(epsilon)] -> month#25.in + Epsilon closure : + (self) + month#25.in + +NFA state 142 = #44 + [(epsilon)] -> year#26.in + Epsilon closure : + (self) + year#26.in + +NFA state 143 = month#25.in + 5:[A-Za-z] -> month#25.#1 + Epsilon closure : + (self) + +NFA state 144 = month#25.#1 + 5:[A-Za-z] -> month#25.#2 + Epsilon closure : + (self) + +NFA state 145 = month#25.#2 + 5:[A-Za-z] -> month#25.out + Epsilon closure : + (self) + +NFA state 146 = month#25.out + [(epsilon)] -> #44 + Epsilon closure : + (self) + #44 + year#26.in + +NFA state 147 = #45 + Tags : DS_DMY + Epsilon closure : + (self) + +NFA state 148 = year#26.in + 0:[0] -> year#26.#1 + 4:[4-9] -> year#26.#1 + 3:[3] -> year#26.#2 + Epsilon closure : + (self) + +NFA state 149 = year#26.#1 + 0:[0] -> year#26.out + 4:[4-9] -> year#26.out + 3:[3] -> year#26.out + 2:[2] -> year#26.out + 1:[1] -> year#26.out + Epsilon closure : + (self) + +NFA state 150 = year#26.#2 + 2:[2] -> year#26.out + 4:[4-9] -> year#26.out + 3:[3] -> year#26.out + Epsilon closure : + (self) + +NFA state 151 = year#26.out + [(epsilon)] -> #45 + Epsilon closure : + (self) + #45 + +NFA state 152 = #46 + [(epsilon)] -> month#28.in + Epsilon closure : + (self) + month#28.in + +NFA state 153 = day#27.in + 1:[1] -> day#27.#1 + 2:[2] -> day#27.#1 + 3:[3] -> day#27.#2 + Epsilon closure : + (self) + +NFA state 154 = day#27.#1 + 0:[0] -> day#27.out + 4:[4-9] -> day#27.out + 3:[3] -> day#27.out + 2:[2] -> day#27.out + 1:[1] -> day#27.out + Epsilon closure : + (self) + +NFA state 155 = day#27.#2 + 0:[0] -> day#27.out + 1:[1] -> day#27.out + Epsilon closure : + (self) + +NFA state 156 = day#27.out + [(epsilon)] -> #46 + Epsilon closure : + (self) + #46 + month#28.in + +NFA state 157 = #47 + [(epsilon)] -> year#29.in + Epsilon closure : + (self) + year#29.in + +NFA state 158 = month#28.in + 5:[A-Za-z] -> month#28.#1 + Epsilon closure : + (self) + +NFA state 159 = month#28.#1 + 5:[A-Za-z] -> month#28.#2 + Epsilon closure : + (self) + +NFA state 160 = month#28.#2 + 5:[A-Za-z] -> month#28.out + Epsilon closure : + (self) + +NFA state 161 = month#28.out + [(epsilon)] -> #47 + Epsilon closure : + (self) + #47 + year#29.in + +NFA state 162 = #48 + Tags : DS_DMY + Epsilon closure : + (self) + +NFA state 163 = year#29.in + 0:[0] -> year#29.#1 + 4:[4-9] -> year#29.#1 + 3:[3] -> year#29.#2 + Epsilon closure : + (self) + +NFA state 164 = year#29.#1 + 0:[0] -> year#29.out + 4:[4-9] -> year#29.out + 3:[3] -> year#29.out + 2:[2] -> year#29.out + 1:[1] -> year#29.out + Epsilon closure : + (self) + +NFA state 165 = year#29.#2 + 2:[2] -> year#29.out + 4:[4-9] -> year#29.out + 3:[3] -> year#29.out + Epsilon closure : + (self) + +NFA state 166 = year#29.out + [(epsilon)] -> #48 + Epsilon closure : + (self) + #48 + +NFA state 167 = #49 + [(epsilon)] -> month#31.in + Epsilon closure : + (self) + month#31.in + +NFA state 168 = ccyy#30.in + 1:[1] -> ccyy#30.#1 + 4:[4-9] -> ccyy#30.#1 + 3:[3] -> ccyy#30.#1 + 2:[2] -> ccyy#30.#1 + Epsilon closure : + (self) + +NFA state 169 = ccyy#30.#1 + 0:[0] -> ccyy#30.#2 + 4:[4-9] -> ccyy#30.#2 + 3:[3] -> ccyy#30.#2 + 2:[2] -> ccyy#30.#2 + 1:[1] -> ccyy#30.#2 + Epsilon closure : + (self) + +NFA state 170 = ccyy#30.#2 + 0:[0] -> ccyy#30.#3 + 4:[4-9] -> ccyy#30.#3 + 3:[3] -> ccyy#30.#3 + 2:[2] -> ccyy#30.#3 + 1:[1] -> ccyy#30.#3 + Epsilon closure : + (self) + +NFA state 171 = ccyy#30.#3 + 0:[0] -> ccyy#30.out + 4:[4-9] -> ccyy#30.out + 3:[3] -> ccyy#30.out + 2:[2] -> ccyy#30.out + 1:[1] -> ccyy#30.out + Epsilon closure : + (self) + +NFA state 172 = ccyy#30.out + [(epsilon)] -> #49 + Epsilon closure : + (self) + #49 + month#31.in + +NFA state 173 = #50 + 1:[1] -> #51 + 4:[4-9] -> #51 + 3:[3] -> #51 + 2:[2] -> #51 + Epsilon closure : + (self) + +NFA state 174 = month#31.in + 5:[A-Za-z] -> month#31.#1 + Epsilon closure : + (self) + +NFA state 175 = month#31.#1 + 5:[A-Za-z] -> month#31.#2 + Epsilon closure : + (self) + +NFA state 176 = month#31.#2 + 5:[A-Za-z] -> month#31.out + Epsilon closure : + (self) + +NFA state 177 = month#31.out + [(epsilon)] -> #50 + Epsilon closure : + (self) + #50 + +NFA state 178 = #51 + Tags : DS_YMD + Epsilon closure : + (self) + +NFA state 179 = #52 + [(epsilon)] -> month#33.in + Epsilon closure : + (self) + month#33.in + +NFA state 180 = ccyy#32.in + 1:[1] -> ccyy#32.#1 + 4:[4-9] -> ccyy#32.#1 + 3:[3] -> ccyy#32.#1 + 2:[2] -> ccyy#32.#1 + Epsilon closure : + (self) + +NFA state 181 = ccyy#32.#1 + 0:[0] -> ccyy#32.#2 + 4:[4-9] -> ccyy#32.#2 + 3:[3] -> ccyy#32.#2 + 2:[2] -> ccyy#32.#2 + 1:[1] -> ccyy#32.#2 + Epsilon closure : + (self) + +NFA state 182 = ccyy#32.#2 + 0:[0] -> ccyy#32.#3 + 4:[4-9] -> ccyy#32.#3 + 3:[3] -> ccyy#32.#3 + 2:[2] -> ccyy#32.#3 + 1:[1] -> ccyy#32.#3 + Epsilon closure : + (self) + +NFA state 183 = ccyy#32.#3 + 0:[0] -> ccyy#32.out + 4:[4-9] -> ccyy#32.out + 3:[3] -> ccyy#32.out + 2:[2] -> ccyy#32.out + 1:[1] -> ccyy#32.out + Epsilon closure : + (self) + +NFA state 184 = ccyy#32.out + [(epsilon)] -> #52 + Epsilon closure : + (self) + #52 + month#33.in + +NFA state 185 = #53 + [(epsilon)] -> day#34.in + Epsilon closure : + (self) + day#34.in + +NFA state 186 = month#33.in + 5:[A-Za-z] -> month#33.#1 + Epsilon closure : + (self) + +NFA state 187 = month#33.#1 + 5:[A-Za-z] -> month#33.#2 + Epsilon closure : + (self) + +NFA state 188 = month#33.#2 + 5:[A-Za-z] -> month#33.out + Epsilon closure : + (self) + +NFA state 189 = month#33.out + [(epsilon)] -> #53 + Epsilon closure : + (self) + #53 + day#34.in + +NFA state 190 = #54 + Tags : DS_YMD + Epsilon closure : + (self) + +NFA state 191 = day#34.in + 1:[1] -> day#34.#1 + 2:[2] -> day#34.#1 + 3:[3] -> day#34.#2 + Epsilon closure : + (self) + +NFA state 192 = day#34.#1 + 0:[0] -> day#34.out + 4:[4-9] -> day#34.out + 3:[3] -> day#34.out + 2:[2] -> day#34.out + 1:[1] -> day#34.out + Epsilon closure : + (self) + +NFA state 193 = day#34.#2 + 0:[0] -> day#34.out + 1:[1] -> day#34.out + Epsilon closure : + (self) + +NFA state 194 = day#34.out + [(epsilon)] -> #54 + Epsilon closure : + (self) + #54 + +NFA state 195 = #55 + [(epsilon)] -> month#35.in + Epsilon closure : + (self) + month#35.in + +NFA state 196 = #56 + [(epsilon)] -> ccyy#36.in + Epsilon closure : + (self) + ccyy#36.in + +NFA state 197 = month#35.in + 5:[A-Za-z] -> month#35.#1 + Epsilon closure : + (self) + +NFA state 198 = month#35.#1 + 5:[A-Za-z] -> month#35.#2 + Epsilon closure : + (self) + +NFA state 199 = month#35.#2 + 5:[A-Za-z] -> month#35.out + Epsilon closure : + (self) + +NFA state 200 = month#35.out + [(epsilon)] -> #56 + Epsilon closure : + (self) + #56 + ccyy#36.in + +NFA state 201 = #57 + Tags : DS_DMY + Epsilon closure : + (self) + +NFA state 202 = ccyy#36.in + 1:[1] -> ccyy#36.#1 + 4:[4-9] -> ccyy#36.#1 + 3:[3] -> ccyy#36.#1 + 2:[2] -> ccyy#36.#1 + Epsilon closure : + (self) + +NFA state 203 = ccyy#36.#1 + 0:[0] -> ccyy#36.#2 + 4:[4-9] -> ccyy#36.#2 + 3:[3] -> ccyy#36.#2 + 2:[2] -> ccyy#36.#2 + 1:[1] -> ccyy#36.#2 + Epsilon closure : + (self) + +NFA state 204 = ccyy#36.#2 + 0:[0] -> ccyy#36.#3 + 4:[4-9] -> ccyy#36.#3 + 3:[3] -> ccyy#36.#3 + 2:[2] -> ccyy#36.#3 + 1:[1] -> ccyy#36.#3 + Epsilon closure : + (self) + +NFA state 205 = ccyy#36.#3 + 0:[0] -> ccyy#36.out + 4:[4-9] -> ccyy#36.out + 3:[3] -> ccyy#36.out + 2:[2] -> ccyy#36.out + 1:[1] -> ccyy#36.out + Epsilon closure : + (self) + +NFA state 206 = ccyy#36.out + [(epsilon)] -> #57 + Epsilon closure : + (self) + #57 + +NFA state 207 = #58 + [(epsilon)] -> month#38.in + Epsilon closure : + (self) + month#38.in + +NFA state 208 = day#37.in + 1:[1] -> day#37.#1 + 2:[2] -> day#37.#1 + 3:[3] -> day#37.#2 + Epsilon closure : + (self) + +NFA state 209 = day#37.#1 + 0:[0] -> day#37.out + 4:[4-9] -> day#37.out + 3:[3] -> day#37.out + 2:[2] -> day#37.out + 1:[1] -> day#37.out + Epsilon closure : + (self) + +NFA state 210 = day#37.#2 + 0:[0] -> day#37.out + 1:[1] -> day#37.out + Epsilon closure : + (self) + +NFA state 211 = day#37.out + [(epsilon)] -> #58 + Epsilon closure : + (self) + #58 + month#38.in + +NFA state 212 = #59 + [(epsilon)] -> ccyy#39.in + Epsilon closure : + (self) + ccyy#39.in + +NFA state 213 = month#38.in + 5:[A-Za-z] -> month#38.#1 + Epsilon closure : + (self) + +NFA state 214 = month#38.#1 + 5:[A-Za-z] -> month#38.#2 + Epsilon closure : + (self) + +NFA state 215 = month#38.#2 + 5:[A-Za-z] -> month#38.out + Epsilon closure : + (self) + +NFA state 216 = month#38.out + [(epsilon)] -> #59 + Epsilon closure : + (self) + #59 + ccyy#39.in + +NFA state 217 = #60 + Tags : DS_DMY + Epsilon closure : + (self) + +NFA state 218 = ccyy#39.in + 1:[1] -> ccyy#39.#1 + 4:[4-9] -> ccyy#39.#1 + 3:[3] -> ccyy#39.#1 + 2:[2] -> ccyy#39.#1 + Epsilon closure : + (self) + +NFA state 219 = ccyy#39.#1 + 0:[0] -> ccyy#39.#2 + 4:[4-9] -> ccyy#39.#2 + 3:[3] -> ccyy#39.#2 + 2:[2] -> ccyy#39.#2 + 1:[1] -> ccyy#39.#2 + Epsilon closure : + (self) + +NFA state 220 = ccyy#39.#2 + 0:[0] -> ccyy#39.#3 + 4:[4-9] -> ccyy#39.#3 + 3:[3] -> ccyy#39.#3 + 2:[2] -> ccyy#39.#3 + 1:[1] -> ccyy#39.#3 + Epsilon closure : + (self) + +NFA state 221 = ccyy#39.#3 + 0:[0] -> ccyy#39.out + 4:[4-9] -> ccyy#39.out + 3:[3] -> ccyy#39.out + 2:[2] -> ccyy#39.out + 1:[1] -> ccyy#39.out + Epsilon closure : + (self) + +NFA state 222 = ccyy#39.out + [(epsilon)] -> #60 + Epsilon closure : + (self) + #60 + +-------------------------------- +DFA structure before compression +-------------------------------- +DFA state 0 + NFA states : + in + day#1.in + year#2.in + ccyy#3.in + scaled#4.in + month#5.in + day#7.in + month#9.in + month#10.in + year#12.in + month#14.in + ccyy#16.in + month#18.in + year#20.in + year#22.in + day#27.in + ccyy#30.in + ccyy#32.in + day#37.in + + Forward route : + (START)->(HERE) + Transitions : + 0:[0] -> 1 + 1:[1] -> 2 + 2:[2] -> 2 + 3:[3] -> 3 + 4:[4-9] -> 4 + 5:[A-Za-z] -> 5 + +DFA state 1 + NFA states : + year#2.#1 + #5 + #11 + scaled#4.in + scaled#4.after_value + year#12.#1 + year#20.#1 + year#22.#1 + + Forward route : (from state 0) + (START)->0:[0]->(HERE) + Transitions : + 0:[0] -> 6 + 1:[1] -> 6 + 2:[2] -> 6 + 3:[3] -> 6 + 4:[4-9] -> 6 + 5:[A-Za-z] -> 7 + +DFA state 2 + NFA states : + #1 + day#1.#1 + ccyy#3.#1 + #5 + #11 + scaled#4.in + scaled#4.after_value + #21 + month#6.in + day#7.#1 + ccyy#16.#1 + #43 + month#25.in + day#27.#1 + ccyy#30.#1 + ccyy#32.#1 + #55 + month#35.in + day#37.#1 + + Forward route : (from state 0) + (START)->1:[1]->(HERE) + Transitions : + 0:[0] -> 8 + 1:[1] -> 8 + 2:[2] -> 8 + 3:[3] -> 8 + 4:[4-9] -> 8 + 5:[A-Za-z] -> 9 + NFA exit tags applying : + DS_D + Attributes for <(DEFAULT)> : DS_D + +DFA state 3 + NFA states : + #1 + day#1.#2 + year#2.#2 + ccyy#3.#1 + #5 + #11 + scaled#4.in + scaled#4.after_value + #21 + month#6.in + day#7.#2 + year#12.#2 + ccyy#16.#1 + year#20.#2 + year#22.#2 + #43 + month#25.in + day#27.#2 + ccyy#30.#1 + ccyy#32.#1 + #55 + month#35.in + day#37.#2 + + Forward route : (from state 0) + (START)->3:[3]->(HERE) + Transitions : + 0:[0] -> 8 + 1:[1] -> 8 + 2:[2] -> 10 + 3:[3] -> 10 + 4:[4-9] -> 10 + 5:[A-Za-z] -> 9 + NFA exit tags applying : + DS_D + Attributes for <(DEFAULT)> : DS_D + +DFA state 4 + NFA states : + #1 + year#2.#1 + ccyy#3.#1 + #5 + #11 + scaled#4.in + scaled#4.after_value + #21 + month#6.in + year#12.#1 + ccyy#16.#1 + year#20.#1 + year#22.#1 + #43 + month#25.in + ccyy#30.#1 + ccyy#32.#1 + #55 + month#35.in + + Forward route : (from state 0) + (START)->4:[4-9]->(HERE) + Transitions : + 0:[0] -> 10 + 1:[1] -> 10 + 2:[2] -> 10 + 3:[3] -> 10 + 4:[4-9] -> 10 + 5:[A-Za-z] -> 9 + NFA exit tags applying : + DS_D + Attributes for <(DEFAULT)> : DS_D + +DFA state 5 + NFA states : + month#5.#1 + month#9.#1 + month#10.#1 + month#14.#1 + month#18.#1 + + Forward route : (from state 0) + (START)->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 11 + +DFA state 6 + NFA states : + #3 + year#2.out + #6 + #12 + scaled#4.in + scaled#4.after_value + #29 + year#12.out + month#13.in + #37 + year#20.out + month#21.in + #40 + year#22.out + month#23.in + + Forward route : (from state 1) + (START)->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 12 + 1:[1] -> 12 + 2:[2] -> 12 + 3:[3] -> 12 + 4:[4-9] -> 12 + 5:[A-Za-z] -> 13 + NFA exit tags applying : + DS_Y + Attributes for <(DEFAULT)> : DS_Y + +DFA state 7 + NFA states : + #19 + scaled#4.out + + Forward route : (from state 1) + (START)->0:[0]->5:[A-Za-z]->(HERE) + Transitions : + NFA exit tags applying : + DS_SCALED + Attributes for <(DEFAULT)> : DS_SCALED + +DFA state 8 + NFA states : + #2 + day#1.out + ccyy#3.#2 + #6 + #12 + scaled#4.in + scaled#4.after_value + #23 + day#7.out + month#8.in + ccyy#16.#2 + #46 + day#27.out + month#28.in + ccyy#30.#2 + ccyy#32.#2 + #58 + day#37.out + month#38.in + + Forward route : (from state 2) + (START)->1:[1]->0:[0]->(HERE) + Transitions : + 0:[0] -> 14 + 1:[1] -> 14 + 2:[2] -> 14 + 3:[3] -> 14 + 4:[4-9] -> 14 + 5:[A-Za-z] -> 15 + NFA exit tags applying : + DS_D + Attributes for <(DEFAULT)> : DS_D + +DFA state 9 + NFA states : + #19 + scaled#4.out + month#6.#1 + month#25.#1 + month#35.#1 + + Forward route : (from state 2) + (START)->1:[1]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 16 + NFA exit tags applying : + DS_SCALED + Attributes for <(DEFAULT)> : DS_SCALED + +DFA state 10 + NFA states : + #3 + year#2.out + ccyy#3.#2 + #6 + #12 + scaled#4.in + scaled#4.after_value + #29 + year#12.out + month#13.in + ccyy#16.#2 + #37 + year#20.out + month#21.in + #40 + year#22.out + month#23.in + ccyy#30.#2 + ccyy#32.#2 + + Forward route : (from state 3) + (START)->3:[3]->2:[2]->(HERE) + Transitions : + 0:[0] -> 14 + 1:[1] -> 14 + 2:[2] -> 14 + 3:[3] -> 14 + 4:[4-9] -> 14 + 5:[A-Za-z] -> 13 + NFA exit tags applying : + DS_Y + Attributes for <(DEFAULT)> : DS_Y + +DFA state 11 + NFA states : + month#5.#2 + month#9.#2 + month#10.#2 + month#14.#2 + month#18.#2 + + Forward route : (from state 5) + (START)->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 17 + +DFA state 12 + NFA states : + #7 + #13 + scaled#4.in + scaled#4.after_value + + Forward route : (from state 6) + (START)->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 18 + 1:[1] -> 18 + 2:[2] -> 18 + 3:[3] -> 18 + 4:[4-9] -> 18 + 5:[A-Za-z] -> 7 + +DFA state 13 + NFA states : + #19 + scaled#4.out + month#13.#1 + month#21.#1 + month#23.#1 + + Forward route : (from state 6) + (START)->0:[0]->0:[0]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 19 + NFA exit tags applying : + DS_SCALED + Attributes for <(DEFAULT)> : DS_SCALED + +DFA state 14 + NFA states : + ccyy#3.#3 + #7 + #13 + scaled#4.in + scaled#4.after_value + ccyy#16.#3 + ccyy#30.#3 + ccyy#32.#3 + + Forward route : (from state 8) + (START)->1:[1]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 20 + 1:[1] -> 20 + 2:[2] -> 20 + 3:[3] -> 20 + 4:[4-9] -> 20 + 5:[A-Za-z] -> 7 + +DFA state 15 + NFA states : + #19 + scaled#4.out + month#8.#1 + month#28.#1 + month#38.#1 + + Forward route : (from state 8) + (START)->1:[1]->0:[0]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 21 + NFA exit tags applying : + DS_SCALED + Attributes for <(DEFAULT)> : DS_SCALED + +DFA state 16 + NFA states : + month#6.#2 + month#25.#2 + month#35.#2 + + Forward route : (from state 9) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 22 + +DFA state 17 + NFA states : + #20 + month#5.out + #25 + month#9.out + #27 + month#10.out + day#11.in + #31 + month#14.out + year#15.in + #35 + month#18.out + ccyy#19.in + + Forward route : (from state 11) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 0:[0] -> 23 + 1:[1] -> 24 + 2:[2] -> 24 + 3:[3] -> 25 + 4:[4-9] -> 26 + NFA exit tags applying : + DS_M + Attributes for <(DEFAULT)> : DS_M + +DFA state 18 + NFA states : + #8 + #14 + scaled#4.in + scaled#4.after_value + + Forward route : (from state 12) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 27 + 1:[1] -> 27 + 2:[2] -> 27 + 3:[3] -> 27 + 4:[4-9] -> 27 + 5:[A-Za-z] -> 7 + +DFA state 19 + NFA states : + month#13.#2 + month#21.#2 + month#23.#2 + + Forward route : (from state 13) + (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 28 + +DFA state 20 + NFA states : + #4 + ccyy#3.out + #8 + #14 + scaled#4.in + scaled#4.after_value + #33 + ccyy#16.out + month#17.in + #49 + ccyy#30.out + month#31.in + #52 + ccyy#32.out + month#33.in + + Forward route : (from state 14) + (START)->1:[1]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 27 + 1:[1] -> 27 + 2:[2] -> 27 + 3:[3] -> 27 + 4:[4-9] -> 27 + 5:[A-Za-z] -> 29 + NFA exit tags applying : + DS_Y + Attributes for <(DEFAULT)> : DS_Y + +DFA state 21 + NFA states : + month#8.#2 + month#28.#2 + month#38.#2 + + Forward route : (from state 15) + (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 30 + +DFA state 22 + NFA states : + #22 + month#6.out + #44 + month#25.out + year#26.in + #56 + month#35.out + ccyy#36.in + + Forward route : (from state 16) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 0:[0] -> 31 + 1:[1] -> 32 + 2:[2] -> 32 + 3:[3] -> 33 + 4:[4-9] -> 34 + NFA exit tags applying : + DS_DM + Attributes for <(DEFAULT)> : DS_DM + +DFA state 23 + NFA states : + year#15.#1 + + Forward route : (from state 17) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->(HERE) + Transitions : + 0:[0] -> 35 + 1:[1] -> 35 + 2:[2] -> 35 + 3:[3] -> 35 + 4:[4-9] -> 35 + +DFA state 24 + NFA states : + #26 + day#11.#1 + ccyy#19.#1 + + Forward route : (from state 17) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE) + Transitions : + 0:[0] -> 36 + 1:[1] -> 36 + 2:[2] -> 36 + 3:[3] -> 36 + 4:[4-9] -> 36 + NFA exit tags applying : + DS_MD + Attributes for <(DEFAULT)> : DS_MD + +DFA state 25 + NFA states : + #26 + day#11.#2 + year#15.#2 + ccyy#19.#1 + + Forward route : (from state 17) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE) + Transitions : + 0:[0] -> 36 + 1:[1] -> 36 + 2:[2] -> 37 + 3:[3] -> 37 + 4:[4-9] -> 37 + NFA exit tags applying : + DS_MD + Attributes for <(DEFAULT)> : DS_MD + +DFA state 26 + NFA states : + #26 + year#15.#1 + ccyy#19.#1 + + Forward route : (from state 17) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE) + Transitions : + 0:[0] -> 37 + 1:[1] -> 37 + 2:[2] -> 37 + 3:[3] -> 37 + 4:[4-9] -> 37 + NFA exit tags applying : + DS_MD + Attributes for <(DEFAULT)> : DS_MD + +DFA state 27 + NFA states : + #9 + #15 + scaled#4.in + scaled#4.after_value + + Forward route : (from state 18) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 38 + 1:[1] -> 38 + 2:[2] -> 38 + 3:[3] -> 38 + 4:[4-9] -> 38 + 5:[A-Za-z] -> 7 + +DFA state 28 + NFA states : + #30 + month#13.out + #38 + month#21.out + #41 + month#23.out + day#24.in + + Forward route : (from state 19) + (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 1:[1] -> 39 + 2:[2] -> 39 + 3:[3] -> 40 + 4:[4-9] -> 41 + NFA exit tags applying : + DS_YM + Attributes for <(DEFAULT)> : DS_YM + +DFA state 29 + NFA states : + #19 + scaled#4.out + month#17.#1 + month#31.#1 + month#33.#1 + + Forward route : (from state 20) + (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 42 + NFA exit tags applying : + DS_SCALED + Attributes for <(DEFAULT)> : DS_SCALED + +DFA state 30 + NFA states : + #24 + month#8.out + #47 + month#28.out + year#29.in + #59 + month#38.out + ccyy#39.in + + Forward route : (from state 21) + (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 0:[0] -> 43 + 1:[1] -> 44 + 2:[2] -> 44 + 3:[3] -> 45 + 4:[4-9] -> 46 + NFA exit tags applying : + DS_DM + Attributes for <(DEFAULT)> : DS_DM + +DFA state 31 + NFA states : + year#26.#1 + + Forward route : (from state 22) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->(HERE) + Transitions : + 0:[0] -> 47 + 1:[1] -> 47 + 2:[2] -> 47 + 3:[3] -> 47 + 4:[4-9] -> 47 + +DFA state 32 + NFA states : + ccyy#36.#1 + + Forward route : (from state 22) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE) + Transitions : + 0:[0] -> 48 + 1:[1] -> 48 + 2:[2] -> 48 + 3:[3] -> 48 + 4:[4-9] -> 48 + +DFA state 33 + NFA states : + year#26.#2 + ccyy#36.#1 + + Forward route : (from state 22) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE) + Transitions : + 0:[0] -> 48 + 1:[1] -> 48 + 2:[2] -> 49 + 3:[3] -> 49 + 4:[4-9] -> 49 + +DFA state 34 + NFA states : + year#26.#1 + ccyy#36.#1 + + Forward route : (from state 22) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE) + Transitions : + 0:[0] -> 49 + 1:[1] -> 49 + 2:[2] -> 49 + 3:[3] -> 49 + 4:[4-9] -> 49 + +DFA state 35 + NFA states : + #32 + year#15.out + + Forward route : (from state 23) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->0:[0]->(HERE) + Transitions : + NFA exit tags applying : + DS_MY + Attributes for <(DEFAULT)> : DS_MY + +DFA state 36 + NFA states : + #28 + day#11.out + ccyy#19.#2 + + Forward route : (from state 24) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE) + Transitions : + 0:[0] -> 50 + 1:[1] -> 50 + 2:[2] -> 50 + 3:[3] -> 50 + 4:[4-9] -> 50 + NFA exit tags applying : + DS_MD + Attributes for <(DEFAULT)> : DS_MD + +DFA state 37 + NFA states : + #32 + year#15.out + ccyy#19.#2 + + Forward route : (from state 25) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->2:[2]->(HERE) + Transitions : + 0:[0] -> 50 + 1:[1] -> 50 + 2:[2] -> 50 + 3:[3] -> 50 + 4:[4-9] -> 50 + NFA exit tags applying : + DS_MY + Attributes for <(DEFAULT)> : DS_MY + +DFA state 38 + NFA states : + #10 + #16 + scaled#4.in + scaled#4.after_value + + Forward route : (from state 27) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 51 + 1:[1] -> 51 + 2:[2] -> 51 + 3:[3] -> 51 + 4:[4-9] -> 51 + 5:[A-Za-z] -> 7 + NFA exit tags applying : + DS_YYMMDD + Attributes for <(DEFAULT)> : DS_YYMMDD + +DFA state 39 + NFA states : + #39 + day#24.#1 + + Forward route : (from state 28) + (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE) + Transitions : + 0:[0] -> 52 + 1:[1] -> 52 + 2:[2] -> 52 + 3:[3] -> 52 + 4:[4-9] -> 52 + NFA exit tags applying : + DS_YMD + Attributes for <(DEFAULT)> : DS_YMD + +DFA state 40 + NFA states : + #39 + day#24.#2 + + Forward route : (from state 28) + (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE) + Transitions : + 0:[0] -> 52 + 1:[1] -> 52 + NFA exit tags applying : + DS_YMD + Attributes for <(DEFAULT)> : DS_YMD + +DFA state 41 + NFA states : + #39 + + Forward route : (from state 28) + (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE) + Transitions : + NFA exit tags applying : + DS_YMD + Attributes for <(DEFAULT)> : DS_YMD + +DFA state 42 + NFA states : + month#17.#2 + month#31.#2 + month#33.#2 + + Forward route : (from state 29) + (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 53 + +DFA state 43 + NFA states : + year#29.#1 + + Forward route : (from state 30) + (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->(HERE) + Transitions : + 0:[0] -> 54 + 1:[1] -> 54 + 2:[2] -> 54 + 3:[3] -> 54 + 4:[4-9] -> 54 + +DFA state 44 + NFA states : + ccyy#39.#1 + + Forward route : (from state 30) + (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE) + Transitions : + 0:[0] -> 55 + 1:[1] -> 55 + 2:[2] -> 55 + 3:[3] -> 55 + 4:[4-9] -> 55 + +DFA state 45 + NFA states : + year#29.#2 + ccyy#39.#1 + + Forward route : (from state 30) + (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE) + Transitions : + 0:[0] -> 55 + 1:[1] -> 55 + 2:[2] -> 56 + 3:[3] -> 56 + 4:[4-9] -> 56 + +DFA state 46 + NFA states : + year#29.#1 + ccyy#39.#1 + + Forward route : (from state 30) + (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE) + Transitions : + 0:[0] -> 56 + 1:[1] -> 56 + 2:[2] -> 56 + 3:[3] -> 56 + 4:[4-9] -> 56 + +DFA state 47 + NFA states : + #45 + year#26.out + + Forward route : (from state 31) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->0:[0]->(HERE) + Transitions : + NFA exit tags applying : + DS_DMY + Attributes for <(DEFAULT)> : DS_DMY + +DFA state 48 + NFA states : + ccyy#36.#2 + + Forward route : (from state 32) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE) + Transitions : + 0:[0] -> 57 + 1:[1] -> 57 + 2:[2] -> 57 + 3:[3] -> 57 + 4:[4-9] -> 57 + +DFA state 49 + NFA states : + #45 + year#26.out + ccyy#36.#2 + + Forward route : (from state 33) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->2:[2]->(HERE) + Transitions : + 0:[0] -> 57 + 1:[1] -> 57 + 2:[2] -> 57 + 3:[3] -> 57 + 4:[4-9] -> 57 + NFA exit tags applying : + DS_DMY + Attributes for <(DEFAULT)> : DS_DMY + +DFA state 50 + NFA states : + ccyy#19.#3 + + Forward route : (from state 36) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 58 + 1:[1] -> 58 + 2:[2] -> 58 + 3:[3] -> 58 + 4:[4-9] -> 58 + +DFA state 51 + NFA states : + #17 + scaled#4.in + scaled#4.after_value + + Forward route : (from state 38) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 59 + 1:[1] -> 59 + 2:[2] -> 59 + 3:[3] -> 59 + 4:[4-9] -> 59 + 5:[A-Za-z] -> 7 + +DFA state 52 + NFA states : + #42 + day#24.out + + Forward route : (from state 39) + (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE) + Transitions : + NFA exit tags applying : + DS_YMD + Attributes for <(DEFAULT)> : DS_YMD + +DFA state 53 + NFA states : + #34 + month#17.out + #50 + month#31.out + #53 + month#33.out + day#34.in + + Forward route : (from state 42) + (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 1:[1] -> 60 + 2:[2] -> 60 + 3:[3] -> 61 + 4:[4-9] -> 62 + NFA exit tags applying : + DS_YM + Attributes for <(DEFAULT)> : DS_YM + +DFA state 54 + NFA states : + #48 + year#29.out + + Forward route : (from state 43) + (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->0:[0]->(HERE) + Transitions : + NFA exit tags applying : + DS_DMY + Attributes for <(DEFAULT)> : DS_DMY + +DFA state 55 + NFA states : + ccyy#39.#2 + + Forward route : (from state 44) + (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE) + Transitions : + 0:[0] -> 63 + 1:[1] -> 63 + 2:[2] -> 63 + 3:[3] -> 63 + 4:[4-9] -> 63 + +DFA state 56 + NFA states : + #48 + year#29.out + ccyy#39.#2 + + Forward route : (from state 45) + (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->2:[2]->(HERE) + Transitions : + 0:[0] -> 63 + 1:[1] -> 63 + 2:[2] -> 63 + 3:[3] -> 63 + 4:[4-9] -> 63 + NFA exit tags applying : + DS_DMY + Attributes for <(DEFAULT)> : DS_DMY + +DFA state 57 + NFA states : + ccyy#36.#3 + + Forward route : (from state 48) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 64 + 1:[1] -> 64 + 2:[2] -> 64 + 3:[3] -> 64 + 4:[4-9] -> 64 + +DFA state 58 + NFA states : + #36 + ccyy#19.out + + Forward route : (from state 50) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + NFA exit tags applying : + DS_MY + Attributes for <(DEFAULT)> : DS_MY + +DFA state 59 + NFA states : + #18 + scaled#4.in + scaled#4.after_value + + Forward route : (from state 51) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 65 + 1:[1] -> 65 + 2:[2] -> 65 + 3:[3] -> 65 + 4:[4-9] -> 65 + 5:[A-Za-z] -> 7 + NFA exit tags applying : + DS_YYMMDD + Attributes for <(DEFAULT)> : DS_YYMMDD + +DFA state 60 + NFA states : + #51 + day#34.#1 + + Forward route : (from state 53) + (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE) + Transitions : + 0:[0] -> 66 + 1:[1] -> 66 + 2:[2] -> 66 + 3:[3] -> 66 + 4:[4-9] -> 66 + NFA exit tags applying : + DS_YMD + Attributes for <(DEFAULT)> : DS_YMD + +DFA state 61 + NFA states : + #51 + day#34.#2 + + Forward route : (from state 53) + (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE) + Transitions : + 0:[0] -> 66 + 1:[1] -> 66 + NFA exit tags applying : + DS_YMD + Attributes for <(DEFAULT)> : DS_YMD + +DFA state 62 + NFA states : + #51 + + Forward route : (from state 53) + (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE) + Transitions : + NFA exit tags applying : + DS_YMD + Attributes for <(DEFAULT)> : DS_YMD + +DFA state 63 + NFA states : + ccyy#39.#3 + + Forward route : (from state 55) + (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 67 + 1:[1] -> 67 + 2:[2] -> 67 + 3:[3] -> 67 + 4:[4-9] -> 67 + +DFA state 64 + NFA states : + #57 + ccyy#36.out + + Forward route : (from state 57) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + NFA exit tags applying : + DS_DMY + Attributes for <(DEFAULT)> : DS_DMY + +DFA state 65 + NFA states : + scaled#4.in + scaled#4.after_value + + Forward route : (from state 59) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 65 + 1:[1] -> 65 + 2:[2] -> 65 + 3:[3] -> 65 + 4:[4-9] -> 65 + 5:[A-Za-z] -> 7 + +DFA state 66 + NFA states : + #54 + day#34.out + + Forward route : (from state 60) + (START)->1:[1]->0:[0]->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE) + Transitions : + NFA exit tags applying : + DS_YMD + Attributes for <(DEFAULT)> : DS_YMD + +DFA state 67 + NFA states : + #60 + ccyy#39.out + + Forward route : (from state 63) + (START)->1:[1]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + NFA exit tags applying : + DS_DMY + Attributes for <(DEFAULT)> : DS_DMY + + +Entry states in DFA: +Entry <(ONLY ENTRY)> : 0 +Searching for dead states... +(no dead states found) + +----------------------------- +------ COMPRESSING DFA ------ +----------------------------- +Old DFA state 0 becomes 0 +Old DFA state 1 becomes 1 +Old DFA state 2 becomes 2 +Old DFA state 3 becomes 3 +Old DFA state 4 becomes 4 +Old DFA state 5 becomes 5 +Old DFA state 6 becomes 6 +Old DFA state 7 becomes 7 +Old DFA state 8 becomes 8 +Old DFA state 9 becomes 9 +Old DFA state 10 becomes 10 +Old DFA state 11 becomes 11 +Old DFA state 12 becomes 12 +Old DFA state 13 becomes 13 +Old DFA state 14 becomes 14 +Old DFA state 15 becomes 9 (formerly 9) +Old DFA state 16 becomes 15 +Old DFA state 17 becomes 16 +Old DFA state 18 becomes 17 +Old DFA state 19 becomes 18 +Old DFA state 20 becomes 19 +Old DFA state 21 becomes 15 (formerly 16) +Old DFA state 22 becomes 20 +Old DFA state 23 becomes 21 +Old DFA state 24 becomes 22 +Old DFA state 25 becomes 23 +Old DFA state 26 becomes 24 +Old DFA state 27 becomes 25 +Old DFA state 28 becomes 26 +Old DFA state 29 becomes 13 (formerly 13) +Old DFA state 30 becomes 20 (formerly 22) +Old DFA state 31 becomes 27 +Old DFA state 32 becomes 28 +Old DFA state 33 becomes 29 +Old DFA state 34 becomes 30 +Old DFA state 35 becomes 31 +Old DFA state 36 becomes 32 +Old DFA state 37 becomes 33 +Old DFA state 38 becomes 34 +Old DFA state 39 becomes 35 +Old DFA state 40 becomes 36 +Old DFA state 41 becomes 37 +Old DFA state 42 becomes 18 (formerly 19) +Old DFA state 43 becomes 27 (formerly 31) +Old DFA state 44 becomes 28 (formerly 32) +Old DFA state 45 becomes 29 (formerly 33) +Old DFA state 46 becomes 30 (formerly 34) +Old DFA state 47 becomes 38 +Old DFA state 48 becomes 39 +Old DFA state 49 becomes 40 +Old DFA state 50 becomes 21 (formerly 23) +Old DFA state 51 becomes 41 +Old DFA state 52 becomes 37 (formerly 41) +Old DFA state 53 becomes 26 (formerly 28) +Old DFA state 54 becomes 38 (formerly 47) +Old DFA state 55 becomes 39 (formerly 48) +Old DFA state 56 becomes 40 (formerly 49) +Old DFA state 57 becomes 27 (formerly 31) +Old DFA state 58 becomes 31 (formerly 35) +Old DFA state 59 becomes 42 +Old DFA state 60 becomes 35 (formerly 39) +Old DFA state 61 becomes 36 (formerly 40) +Old DFA state 62 becomes 37 (formerly 41) +Old DFA state 63 becomes 27 (formerly 31) +Old DFA state 64 becomes 38 (formerly 47) +Old DFA state 65 becomes 43 +Old DFA state 66 becomes 37 (formerly 41) +Old DFA state 67 becomes 38 (formerly 47) +Entry <(ONLY ENTRY)>, formerly state 0, now state 0 +------------------------------- +DFA structure after compression +------------------------------- +DFA state 0 + Forward route : + (START)->(HERE) + Transitions : + 0:[0] -> 1 + 1:[1] -> 2 + 2:[2] -> 2 + 3:[3] -> 3 + 4:[4-9] -> 4 + 5:[A-Za-z] -> 5 + +DFA state 1 + Forward route : (from state 0) + (START)->0:[0]->(HERE) + Transitions : + 0:[0] -> 6 + 1:[1] -> 6 + 2:[2] -> 6 + 3:[3] -> 6 + 4:[4-9] -> 6 + 5:[A-Za-z] -> 7 + +DFA state 2 + Forward route : (from state 0) + (START)->1:[1]->(HERE) + Transitions : + 0:[0] -> 8 + 1:[1] -> 8 + 2:[2] -> 8 + 3:[3] -> 8 + 4:[4-9] -> 8 + 5:[A-Za-z] -> 9 + NFA exit tags applying : + DS_D + Attributes for <(DEFAULT)> : DS_D + +DFA state 3 + Forward route : (from state 0) + (START)->3:[3]->(HERE) + Transitions : + 0:[0] -> 8 + 1:[1] -> 8 + 2:[2] -> 10 + 3:[3] -> 10 + 4:[4-9] -> 10 + 5:[A-Za-z] -> 9 + Use state 2 as basis (3 fixups) + NFA exit tags applying : + DS_D + Attributes for <(DEFAULT)> : DS_D + +DFA state 4 + Forward route : (from state 0) + (START)->4:[4-9]->(HERE) + Transitions : + 0:[0] -> 10 + 1:[1] -> 10 + 2:[2] -> 10 + 3:[3] -> 10 + 4:[4-9] -> 10 + 5:[A-Za-z] -> 9 + NFA exit tags applying : + DS_D + Attributes for <(DEFAULT)> : DS_D + +DFA state 5 + Forward route : (from state 0) + (START)->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 11 + +DFA state 6 + Forward route : (from state 1) + (START)->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 12 + 1:[1] -> 12 + 2:[2] -> 12 + 3:[3] -> 12 + 4:[4-9] -> 12 + 5:[A-Za-z] -> 13 + NFA exit tags applying : + DS_Y + Attributes for <(DEFAULT)> : DS_Y + +DFA state 7 + Forward route : (from state 1) + (START)->0:[0]->5:[A-Za-z]->(HERE) + Transitions : + NFA exit tags applying : + DS_SCALED + Attributes for <(DEFAULT)> : DS_SCALED + +DFA state 8 + Forward route : (from state 2) + (START)->1:[1]->0:[0]->(HERE) + Transitions : + 0:[0] -> 14 + 1:[1] -> 14 + 2:[2] -> 14 + 3:[3] -> 14 + 4:[4-9] -> 14 + 5:[A-Za-z] -> 9 + NFA exit tags applying : + DS_D + Attributes for <(DEFAULT)> : DS_D + +DFA state 9 + Forward route : (from state 2) + (START)->1:[1]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 15 + NFA exit tags applying : + DS_SCALED + Attributes for <(DEFAULT)> : DS_SCALED + +DFA state 10 + Forward route : (from state 3) + (START)->3:[3]->2:[2]->(HERE) + Transitions : + 0:[0] -> 14 + 1:[1] -> 14 + 2:[2] -> 14 + 3:[3] -> 14 + 4:[4-9] -> 14 + 5:[A-Za-z] -> 13 + Use state 8 as basis (1 fixups) + NFA exit tags applying : + DS_Y + Attributes for <(DEFAULT)> : DS_Y + +DFA state 11 + Forward route : (from state 5) + (START)->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 16 + +DFA state 12 + Forward route : (from state 6) + (START)->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 17 + 1:[1] -> 17 + 2:[2] -> 17 + 3:[3] -> 17 + 4:[4-9] -> 17 + 5:[A-Za-z] -> 7 + +DFA state 13 + Forward route : (from state 6) + (START)->0:[0]->0:[0]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 18 + NFA exit tags applying : + DS_SCALED + Attributes for <(DEFAULT)> : DS_SCALED + +DFA state 14 + Forward route : (from state 8) + (START)->1:[1]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 19 + 1:[1] -> 19 + 2:[2] -> 19 + 3:[3] -> 19 + 4:[4-9] -> 19 + 5:[A-Za-z] -> 7 + +DFA state 15 + Forward route : (from state 9) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 20 + +DFA state 16 + Forward route : (from state 11) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 0:[0] -> 21 + 1:[1] -> 22 + 2:[2] -> 22 + 3:[3] -> 23 + 4:[4-9] -> 24 + NFA exit tags applying : + DS_M + Attributes for <(DEFAULT)> : DS_M + +DFA state 17 + Forward route : (from state 12) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 25 + 1:[1] -> 25 + 2:[2] -> 25 + 3:[3] -> 25 + 4:[4-9] -> 25 + 5:[A-Za-z] -> 7 + +DFA state 18 + Forward route : (from state 13) + (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 5:[A-Za-z] -> 26 + +DFA state 19 + Forward route : (from state 14) + (START)->1:[1]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 25 + 1:[1] -> 25 + 2:[2] -> 25 + 3:[3] -> 25 + 4:[4-9] -> 25 + 5:[A-Za-z] -> 13 + Use state 17 as basis (1 fixups) + NFA exit tags applying : + DS_Y + Attributes for <(DEFAULT)> : DS_Y + +DFA state 20 + Forward route : (from state 15) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 0:[0] -> 27 + 1:[1] -> 28 + 2:[2] -> 28 + 3:[3] -> 29 + 4:[4-9] -> 30 + NFA exit tags applying : + DS_DM + Attributes for <(DEFAULT)> : DS_DM + +DFA state 21 + Forward route : (from state 16) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->(HERE) + Transitions : + 0:[0] -> 31 + 1:[1] -> 31 + 2:[2] -> 31 + 3:[3] -> 31 + 4:[4-9] -> 31 + +DFA state 22 + Forward route : (from state 16) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE) + Transitions : + 0:[0] -> 32 + 1:[1] -> 32 + 2:[2] -> 32 + 3:[3] -> 32 + 4:[4-9] -> 32 + NFA exit tags applying : + DS_MD + Attributes for <(DEFAULT)> : DS_MD + +DFA state 23 + Forward route : (from state 16) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE) + Transitions : + 0:[0] -> 32 + 1:[1] -> 32 + 2:[2] -> 33 + 3:[3] -> 33 + 4:[4-9] -> 33 + NFA exit tags applying : + DS_MD + Attributes for <(DEFAULT)> : DS_MD + +DFA state 24 + Forward route : (from state 16) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE) + Transitions : + 0:[0] -> 33 + 1:[1] -> 33 + 2:[2] -> 33 + 3:[3] -> 33 + 4:[4-9] -> 33 + Use state 23 as basis (2 fixups) + NFA exit tags applying : + DS_MD + Attributes for <(DEFAULT)> : DS_MD + +DFA state 25 + Forward route : (from state 17) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 34 + 1:[1] -> 34 + 2:[2] -> 34 + 3:[3] -> 34 + 4:[4-9] -> 34 + 5:[A-Za-z] -> 7 + +DFA state 26 + Forward route : (from state 18) + (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->(HERE) + Transitions : + 1:[1] -> 35 + 2:[2] -> 35 + 3:[3] -> 36 + 4:[4-9] -> 37 + NFA exit tags applying : + DS_YM + Attributes for <(DEFAULT)> : DS_YM + +DFA state 27 + Forward route : (from state 20) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->(HERE) + Transitions : + 0:[0] -> 38 + 1:[1] -> 38 + 2:[2] -> 38 + 3:[3] -> 38 + 4:[4-9] -> 38 + +DFA state 28 + Forward route : (from state 20) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE) + Transitions : + 0:[0] -> 39 + 1:[1] -> 39 + 2:[2] -> 39 + 3:[3] -> 39 + 4:[4-9] -> 39 + +DFA state 29 + Forward route : (from state 20) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE) + Transitions : + 0:[0] -> 39 + 1:[1] -> 39 + 2:[2] -> 40 + 3:[3] -> 40 + 4:[4-9] -> 40 + +DFA state 30 + Forward route : (from state 20) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE) + Transitions : + 0:[0] -> 40 + 1:[1] -> 40 + 2:[2] -> 40 + 3:[3] -> 40 + 4:[4-9] -> 40 + Use state 29 as basis (2 fixups) + +DFA state 31 + Forward route : (from state 21) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->0:[0]->(HERE) + Transitions : + NFA exit tags applying : + DS_MY + Attributes for <(DEFAULT)> : DS_MY + +DFA state 32 + Forward route : (from state 22) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE) + Transitions : + 0:[0] -> 21 + 1:[1] -> 21 + 2:[2] -> 21 + 3:[3] -> 21 + 4:[4-9] -> 21 + NFA exit tags applying : + DS_MD + Attributes for <(DEFAULT)> : DS_MD + +DFA state 33 + Forward route : (from state 23) + (START)->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->2:[2]->(HERE) + Transitions : + 0:[0] -> 21 + 1:[1] -> 21 + 2:[2] -> 21 + 3:[3] -> 21 + 4:[4-9] -> 21 + Use state 32 as basis (0 fixups) + NFA exit tags applying : + DS_MY + Attributes for <(DEFAULT)> : DS_MY + +DFA state 34 + Forward route : (from state 25) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 41 + 1:[1] -> 41 + 2:[2] -> 41 + 3:[3] -> 41 + 4:[4-9] -> 41 + 5:[A-Za-z] -> 7 + NFA exit tags applying : + DS_YYMMDD + Attributes for <(DEFAULT)> : DS_YYMMDD + +DFA state 35 + Forward route : (from state 26) + (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->(HERE) + Transitions : + 0:[0] -> 37 + 1:[1] -> 37 + 2:[2] -> 37 + 3:[3] -> 37 + 4:[4-9] -> 37 + NFA exit tags applying : + DS_YMD + Attributes for <(DEFAULT)> : DS_YMD + +DFA state 36 + Forward route : (from state 26) + (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->(HERE) + Transitions : + 0:[0] -> 37 + 1:[1] -> 37 + NFA exit tags applying : + DS_YMD + Attributes for <(DEFAULT)> : DS_YMD + +DFA state 37 + Forward route : (from state 26) + (START)->0:[0]->0:[0]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->4:[4-9]->(HERE) + Transitions : + NFA exit tags applying : + DS_YMD + Attributes for <(DEFAULT)> : DS_YMD + +DFA state 38 + Forward route : (from state 27) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->0:[0]->0:[0]->(HERE) + Transitions : + NFA exit tags applying : + DS_DMY + Attributes for <(DEFAULT)> : DS_DMY + +DFA state 39 + Forward route : (from state 28) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->1:[1]->0:[0]->(HERE) + Transitions : + 0:[0] -> 27 + 1:[1] -> 27 + 2:[2] -> 27 + 3:[3] -> 27 + 4:[4-9] -> 27 + +DFA state 40 + Forward route : (from state 29) + (START)->1:[1]->5:[A-Za-z]->5:[A-Za-z]->5:[A-Za-z]->3:[3]->2:[2]->(HERE) + Transitions : + 0:[0] -> 27 + 1:[1] -> 27 + 2:[2] -> 27 + 3:[3] -> 27 + 4:[4-9] -> 27 + Use state 39 as basis (0 fixups) + NFA exit tags applying : + DS_DMY + Attributes for <(DEFAULT)> : DS_DMY + +DFA state 41 + Forward route : (from state 34) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 42 + 1:[1] -> 42 + 2:[2] -> 42 + 3:[3] -> 42 + 4:[4-9] -> 42 + 5:[A-Za-z] -> 7 + +DFA state 42 + Forward route : (from state 41) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 43 + 1:[1] -> 43 + 2:[2] -> 43 + 3:[3] -> 43 + 4:[4-9] -> 43 + 5:[A-Za-z] -> 7 + NFA exit tags applying : + DS_YYMMDD + Attributes for <(DEFAULT)> : DS_YYMMDD + +DFA state 43 + Forward route : (from state 42) + (START)->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->0:[0]->(HERE) + Transitions : + 0:[0] -> 43 + 1:[1] -> 43 + 2:[2] -> 43 + 3:[3] -> 43 + 4:[4-9] -> 43 + 5:[A-Za-z] -> 7 + Use state 42 as basis (0 fixups) + + +Entry states in DFA: +Entry <(ONLY ENTRY)> : 0 diff --git a/src/mairix/db.c b/src/mairix/db.c @@ -0,0 +1,1297 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006,2007,2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +/* Handle complete database */ + +#include "mairix.h" +#include "reader.h" +#include <ctype.h> +#include <assert.h> +#include <sys/time.h> +#include <unistd.h> + +struct sortable_token {/*{{{*/ + char *text; + int index; +}; +/*}}}*/ +static int compare_sortable_tokens(const void *a, const void *b)/*{{{*/ +{ + const struct sortable_token *aa = (const struct sortable_token *) a; + const struct sortable_token *bb = (const struct sortable_token *) b; + int foo; + foo = strcmp(aa->text, bb->text); + if (foo) { + return foo; + } else { + if (aa->index < bb->index) return -1; + else if (aa->index > bb->index) return +1; + else return 0; + } +} +/*}}}*/ +static void check_toktable_enc_integrity(int n_msgs, struct toktable *table)/*{{{*/ +{ + /* FIXME : Check reachability of tokens that are displaced from their natural + * hash bucket (if deletions have occurred during purge). */ + + int idx, incr; + int i, k; + unsigned char *j, *last_char; + int broken_chains = 0; + struct sortable_token *sort_list; + int any_duplicates; + + for (i=0; i<table->size; i++) { + struct token *tok = table->tokens[i]; + if (tok) { + idx = 0; + incr = 0; + last_char = tok->match0.msginfo + tok->match0.n; + for (j = tok->match0.msginfo; j < last_char; ) { + incr = read_increment(&j); + idx += incr; + } + if (idx != tok->match0.highest) { + fprintf(stderr, "broken encoding chain for token <%s>, highest=%ld\n", tok->text, tok->match0.highest); + fflush(stderr); + broken_chains = 1; + } + if (idx >= n_msgs) { + fprintf(stderr, "end of chain higher than number of message paths (%d) for token <%s>\n", n_msgs, tok->text); + fflush(stderr); + broken_chains = 1; + } + } + } + + assert(!broken_chains); + + /* Check there are no duplicated tokens in the table. */ + sort_list = new_array(struct sortable_token, table->n); + k = 0; + for (i=0; i<table->size; i++) { + struct token *tok = table->tokens[i]; + if (tok) { + sort_list[k].text = new_string(tok->text); + sort_list[k].index = i; + k++; + } + } + assert(k == table->n); + + qsort(sort_list, table->n, sizeof(struct sortable_token), compare_sortable_tokens); + /* Check for uniqueness of neighbouring token texts */ + any_duplicates = 0; + for (i=0; i<(table->n - 1); i++) { + if (!strcmp(sort_list[i].text, sort_list[i+1].text)) { + fprintf(stderr, "Token table contains duplicated token %s at indices %d and %d\n", + sort_list[i].text, sort_list[i].index, sort_list[i+1].index); + any_duplicates = 1; + } + } + + /* release */ + for (i=0; i<table->n; i++) { + free(sort_list[i].text); + } + free(sort_list); + + if (any_duplicates) { + fprintf(stderr, "Token table contained duplicate entries, aborting\n"); + assert(0); + } +} +/*}}}*/ +static int compare_strings(const void *a, const void *b)/*{{{*/ +{ + const char **aa = (const char **) a; + const char **bb = (const char **) b; + return strcmp(*aa, *bb); +} +/*}}}*/ +static void check_message_path_integrity(struct database *db)/*{{{*/ +{ + /* TODO : for now only checks integrity of non-mbox paths. */ + /* Check there are no duplicates */ + int i; + int n; + int has_duplicate = 0; + + char **paths; + paths = new_array(char *, db->n_msgs); + for (i=0, n=0; i<db->n_msgs; i++) { + switch (db->type[i]) { + case MTY_DEAD: + case MTY_MBOX: + break; + case MTY_FILE: + paths[n++] = db->msgs[i].src.mpf.path; + break; + } + } + + qsort(paths, n, sizeof(char *), compare_strings); + + for (i=1; i<n; i++) { + if (!strcmp(paths[i-1], paths[i])) { + fprintf(stderr, "Path <%s> repeated\n", paths[i]); + has_duplicate = 1; + } + } + + fflush(stderr); + assert(!has_duplicate); + + free(paths); + return; +} +/*}}}*/ +void check_database_integrity(struct database *db)/*{{{*/ +{ + if (verbose) fprintf(stderr, "Checking message path integrity\n"); + check_message_path_integrity(db); + + /* Just check encoding chains for now */ + if (verbose) fprintf(stderr, "Checking to\n"); + check_toktable_enc_integrity(db->n_msgs, db->to); + if (verbose) fprintf(stderr, "Checking cc\n"); + check_toktable_enc_integrity(db->n_msgs, db->cc); + if (verbose) fprintf(stderr, "Checking from\n"); + check_toktable_enc_integrity(db->n_msgs, db->from); + if (verbose) fprintf(stderr, "Checking subject\n"); + check_toktable_enc_integrity(db->n_msgs, db->subject); + if (verbose) fprintf(stderr, "Checking body\n"); + check_toktable_enc_integrity(db->n_msgs, db->body); + if (verbose) fprintf(stderr, "Checking attachment_name\n"); + check_toktable_enc_integrity(db->n_msgs, db->attachment_name); +} +/*}}}*/ +struct database *new_database(unsigned int hash_key)/*{{{*/ +{ + struct database *result = new(struct database); + struct timeval tv; + pid_t pid; + + result->to = new_toktable(); + result->cc = new_toktable(); + result->from = new_toktable(); + result->subject = new_toktable(); + result->body = new_toktable(); + result->attachment_name = new_toktable(); + + result->msg_ids = new_toktable2(); + + if ( hash_key == CREATE_RANDOM_DATABASE_HASH ) + { + gettimeofday(&tv, NULL); + pid = getpid(); + hash_key = tv.tv_sec ^ (pid ^ (tv.tv_usec << 15)); + } + result->hash_key = hash_key; + + result->msgs = NULL; + result->type = NULL; + result->n_msgs = 0; + result->max_msgs = 0; + + result->mboxen = NULL; + result->n_mboxen = 0; + result->max_mboxen = 0; + + return result; +} +/*}}}*/ +void free_database(struct database *db)/*{{{*/ +{ + int i; + + free_toktable(db->to); + free_toktable(db->cc); + free_toktable(db->from); + free_toktable(db->subject); + free_toktable(db->body); + free_toktable(db->attachment_name); + free_toktable2(db->msg_ids); + + if (db->msgs) { + for (i=0; i<db->n_msgs; i++) { + switch (db->type[i]) { + case MTY_DEAD: + break; + case MTY_MBOX: + break; + case MTY_FILE: + assert(db->msgs[i].src.mpf.path); + free(db->msgs[i].src.mpf.path); + break; + } + } + free(db->msgs); + free(db->type); + } + + free(db); +} +/*}}}*/ + +static int get_max (int a, int b) {/*{{{*/ + return (a > b) ? a : b; +} +/*}}}*/ +static void import_toktable(char *data, unsigned int hash_key, int n_msgs, struct toktable_db *in, struct toktable *out)/*{{{*/ +{ + int n, size, i; + + n = in->n; + size = 1; + while (size < n) size <<= 1; + size <<= 1; /* safe hash table size */ + + out->size = size; + out->mask = size - 1; + out->n = n; + out->tokens = new_array(struct token *, size); + memset(out->tokens, 0, size * sizeof(struct token *)); + out->hwm = (n + size) >> 1; + + for (i=0; i<n; i++) { + unsigned int hash, index; + char *text; + unsigned char *enc; + int enc_len; + struct token *nt; + int enc_hi; + int idx, incr; + unsigned char *j; + + /* Recover enc_len and enc_hi from the data */ + enc = (unsigned char *) data + in->enc_offsets[i]; + idx = 0; + for (j = enc; *j != 0xff; ) { + incr = read_increment(&j); + idx += incr; + } + enc_len = j - enc; + enc_hi = idx; + + text = data + in->tok_offsets[i]; + hash = hashfn((unsigned char *) text, strlen(text), hash_key); + + nt = new(struct token); + nt->hashval = hash; + nt->text = new_string(text); + /* Allow a bit of headroom for adding more entries later */ + nt->match0.max = get_max(16, enc_len + (enc_len >> 1)); + nt->match0.n = enc_len; + nt->match0.highest = enc_hi; + assert(nt->match0.highest < n_msgs); + nt->match0.msginfo = new_array(unsigned char, nt->match0.max); + memcpy(nt->match0.msginfo, enc, nt->match0.n); + + index = hash & out->mask; + while (out->tokens[index]) { + /* Audit to look for corrupt database with multiple entries for the same + * string. */ + if (!strcmp(nt->text, out->tokens[index]->text)) { + fprintf(stderr, "\n!!! Corrupt token table found in database, token <%s> duplicated, aborting\n", + nt->text); + fprintf(stderr, " Delete the database file and rebuild from scratch as a workaround\n"); + /* No point going on - need to find out why the database got corrupted + * in the 1st place. Workaround for user - rebuild database from + * scratch by deleting it then rerunning. */ + unlock_and_exit(1); + } + ++index; + index &= out->mask; + } + + out->tokens[index] = nt; + } +} +/*}}}*/ +static void import_toktable2(char *data, unsigned int hash_key, int n_msgs, struct toktable2_db *in, struct toktable2 *out)/*{{{*/ +{ + int n, size, i; + + n = in->n; + size = 1; + while (size < n) size <<= 1; + size <<= 1; /* safe hash table size */ + + out->size = size; + out->mask = size - 1; + out->n = n; + out->tokens = new_array(struct token2 *, size); + memset(out->tokens, 0, size * sizeof(struct token *)); + out->hwm = (n + size) >> 1; + + for (i=0; i<n; i++) { + unsigned int hash, index; + char *text; + struct token2 *nt; + unsigned char *enc0, *enc1; + int enc0_len, enc1_len; + int enc0_hi, enc1_hi; + int idx, incr; + unsigned char *j; + +/*{{{ do enc0*/ + enc0 = (unsigned char *) data + in->enc0_offsets[i]; + idx = 0; + for (j = enc0; *j != 0xff; ) { + incr = read_increment(&j); + idx += incr; + } + enc0_len = j - enc0; + enc0_hi = idx; +/*}}}*/ +/*{{{ do enc1*/ + enc1 = (unsigned char *) data + in->enc1_offsets[i]; + idx = 0; + for (j = enc1; *j != 0xff; ) { + incr = read_increment(&j); + idx += incr; + } + enc1_len = j - enc1; + enc1_hi = idx; +/*}}}*/ + + text = data + in->tok_offsets[i]; + hash = hashfn((unsigned char *) text, strlen(text), hash_key); + + nt = new(struct token2); + nt->hashval = hash; + nt->text = new_string(text); + /* Allow a bit of headroom for adding more entries later */ + /*{{{ set up match0 chain */ + nt->match0.max = get_max(16, enc0_len + (enc0_len >> 1)); + nt->match0.n = enc0_len; + nt->match0.highest = enc0_hi; + assert(nt->match0.highest < n_msgs); + nt->match0.msginfo = new_array(unsigned char, nt->match0.max); + memcpy(nt->match0.msginfo, enc0, nt->match0.n); + /*}}}*/ + /*{{{ set up match1 chain */ + nt->match1.max = get_max(16, enc1_len + (enc1_len >> 1)); + nt->match1.n = enc1_len; + nt->match1.highest = enc1_hi; + assert(nt->match1.highest < n_msgs); + nt->match1.msginfo = new_array(unsigned char, nt->match1.max); + memcpy(nt->match1.msginfo, enc1, nt->match1.n); + /*}}}*/ + + index = hash & out->mask; + while (out->tokens[index]) { + ++index; + index &= out->mask; + } + + out->tokens[index] = nt; + } +} +/*}}}*/ +struct database *new_database_from_file(char *db_filename, int do_integrity_checks)/*{{{*/ +{ + /* Read existing database from file for doing incremental update */ + struct database *result; + struct read_db *input; + int i, n, N; + + result = new_database( CREATE_RANDOM_DATABASE_HASH ); + input = open_db(db_filename); + if (!input) { + /* Nothing to initialise */ + if (verbose) printf("Database file was empty, creating a new database\n"); + return result; + } + + /* Build pathname information */ + n = result->n_msgs = input->n_msgs; + result->max_msgs = input->n_msgs; /* let it be extended as-and-when */ + result->msgs = new_array(struct msgpath, n); + result->type = new_array(enum message_type, n); + + result->hash_key = input->hash_key; + + /* Set up mbox structures */ + N = result->n_mboxen = result->max_mboxen = input->n_mboxen; + result->mboxen = N ? (new_array(struct mbox, N)) : NULL; + for (i=0; i<N; i++) { + int nn; + if (input->mbox_paths_table[i]) { + result->mboxen[i].path = new_string(input->data + input->mbox_paths_table[i]); + } else { + /* mbox is dead. */ + result->mboxen[i].path = NULL; + } + result->mboxen[i].file_mtime = input->mbox_mtime_table[i]; + result->mboxen[i].file_size = input->mbox_size_table[i]; + nn = result->mboxen[i].n_msgs = input->mbox_entries_table[i]; + result->mboxen[i].max_msgs = nn; + result->mboxen[i].start = new_array(off_t, nn); + result->mboxen[i].len = new_array(size_t, nn); + result->mboxen[i].check_all = new_array(checksum_t, nn); + /* Copy the entire checksum table in one go. */ + memcpy(result->mboxen[i].check_all, + input->data + input->mbox_checksum_table[i], + nn * sizeof(checksum_t)); + result->mboxen[i].n_so_far = 0; + } + + for (i=0; i<n; i++) { + switch (rd_msg_type(input, i)) { + case DB_MSG_DEAD: + result->type[i] = MTY_DEAD; + break; + case DB_MSG_FILE: + result->type[i] = MTY_FILE; + result->msgs[i].src.mpf.path = new_string(input->data + input->path_offsets[i]); + result->msgs[i].src.mpf.mtime = input->mtime_table[i]; + result->msgs[i].src.mpf.size = input->size_table[i]; + break; + case DB_MSG_MBOX: + { + unsigned int mbi, msgi; + int n; + struct mbox *mb; + result->type[i] = MTY_MBOX; + decode_mbox_indices(input->path_offsets[i], &mbi, &msgi); + result->msgs[i].src.mbox.file_index = mbi; + mb = &result->mboxen[mbi]; + assert(mb->n_so_far == msgi); + n = mb->n_so_far; + result->msgs[i].src.mbox.msg_index = n; + mb->start[n] = input->mtime_table[i]; + mb->len[n] = input->size_table[i]; + ++mb->n_so_far; + } + + break; + } + result->msgs[i].seen = (input->msg_type_and_flags[i] & FLAG_SEEN) ? 1:0; + result->msgs[i].replied = (input->msg_type_and_flags[i] & FLAG_REPLIED) ? 1:0; + result->msgs[i].flagged = (input->msg_type_and_flags[i] & FLAG_FLAGGED) ? 1:0; + result->msgs[i].date = input->date_table[i]; + result->msgs[i].tid = input->tid_table[i]; + } + + import_toktable(input->data, input->hash_key, result->n_msgs, &input->to, result->to); + import_toktable(input->data, input->hash_key, result->n_msgs, &input->cc, result->cc); + import_toktable(input->data, input->hash_key, result->n_msgs, &input->from, result->from); + import_toktable(input->data, input->hash_key, result->n_msgs, &input->subject, result->subject); + import_toktable(input->data, input->hash_key, result->n_msgs, &input->body, result->body); + import_toktable(input->data, input->hash_key, result->n_msgs, &input->attachment_name, result->attachment_name); + import_toktable2(input->data, input->hash_key, result->n_msgs, &input->msg_ids, result->msg_ids); + + close_db(input); + + if (do_integrity_checks) { + check_database_integrity(result); + } + + return result; +} +/*}}}*/ + +static void add_angled_terms(int file_index, unsigned int hash_key, struct toktable2 *table, int add_to_chain1, char *s)/*{{{*/ +{ + char *left, *right; + + if (s) { + left = strchr(s, '<'); + while (left) { + right = strchr(left, '>'); + if (right) { + *right = '\0'; + add_token2_in_file(file_index, hash_key, left+1, table, add_to_chain1); + *right = '>'; /* restore */ + } else { + break; + } + left = strchr(right, '<'); + } + } +} +/*}}}*/ + +/* Macro for what characters can make up token strings. + + The following characters have special meanings: + 0x2b + + 0x2d - + 0x2e . + 0x40 @ + 0x5f _ + + since they can occur within email addresses and message IDs when considered + as a whole rather than as individual words. Underscore (0x5f) is considered + a word-character always too. + + */ +static unsigned char special_table[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00-0f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10-1f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 0, /* 20-2f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 30-3f */ + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40-4f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, /* 50-5f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60-6f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70-7f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90-9f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a0-af */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* b0-bf */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* c0-cf */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d0-df */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* e0-ef */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* f0-ff */ +}; + +#if 0 +#define CHAR_VALID(x,mask) (isalnum((unsigned char) x) || (special_table[(unsigned int)(unsigned char) x] & mask)) +#endif +static inline int char_valid_p(char x, unsigned int mask)/*{{{*/ +{ + unsigned char xx = (unsigned char) x; + if (isalnum(xx)) return 1; + else if (special_table[(unsigned int) xx] & mask) return 1; + else return 0; +} +/*}}}*/ +static void tokenise_string(int file_index, unsigned int hash_key, struct toktable *table, char *data, int match_mask)/*{{{*/ +{ + char *ss, *es, old_es; + ss = data; + for (;;) { + while (*ss && !char_valid_p(*ss,match_mask)) ss++; + if (!*ss) break; + es = ss + 1; + while (*es && char_valid_p(*es,match_mask)) es++; + + /* deal with token [ss,es) */ + old_es = *es; + *es = '\0'; + /* FIXME: Ought to do this by passing start and length - clean up later */ + add_token_in_file(file_index, hash_key, ss, table); + *es = old_es; + + if (!*es) break; + ss = es; + } +} +/*}}}*/ +static void tokenise_html_string(int file_index, unsigned int hash_key, struct toktable *table, char *data)/*{{{*/ +{ + char *ss, *es, old_es; + + /* FIXME : Probably want to rewrite this as an explicit FSM */ + + ss = data; + for (;;) { + /* Assume < and > are never valid token characters ! */ + while (*ss && !char_valid_p(*ss, 1)) { + if (*ss++ == '<') { + /* Skip over HTML tag */ + while (*ss && (*ss != '>')) ss++; + } + } + if (!*ss) break; + + es = ss + 1; + while (*es && char_valid_p(*es, 1)) es++; + + /* deal with token [ss,es) */ + old_es = *es; + *es = '\0'; + /* FIXME: Ought to do this by passing start and length - clean up later */ + add_token_in_file(file_index, hash_key, ss, table); + *es = old_es; + + if (!*es) break; + ss = es; + } +} +/*}}}*/ +void tokenise_message(int file_index, struct database *db, struct rfc822 *msg)/*{{{*/ +{ + struct attachment *a; + + /* Match on whole addresses in these headers as well as the individual words */ + if (msg->hdrs.to) { + tokenise_string(file_index, db->hash_key, db->to, msg->hdrs.to, 1); + tokenise_string(file_index, db->hash_key, db->to, msg->hdrs.to, 2); + } + if (msg->hdrs.cc) { + tokenise_string(file_index, db->hash_key, db->cc, msg->hdrs.cc, 1); + tokenise_string(file_index, db->hash_key, db->cc, msg->hdrs.cc, 2); + } + if (msg->hdrs.from) { + tokenise_string(file_index, db->hash_key, db->from, msg->hdrs.from, 1); + tokenise_string(file_index, db->hash_key, db->from, msg->hdrs.from, 2); + } + if (msg->hdrs.subject) tokenise_string(file_index, db->hash_key, db->subject, msg->hdrs.subject, 1); + + for (a=msg->atts.next; a!=&msg->atts; a=a->next) { + switch (a->ct) { + case CT_TEXT_PLAIN: + tokenise_string(file_index, db->hash_key, db->body, a->data.normal.bytes, 1); + break; + case CT_TEXT_HTML: + tokenise_html_string(file_index, db->hash_key, db->body, a->data.normal.bytes); + break; + case CT_MESSAGE_RFC822: + /* Just recurse for now - maybe we should have separate token tables + * for tokens occurring in embedded messages? */ + + if (a->data.rfc822) { + tokenise_message(file_index, db, a->data.rfc822); + } + break; + default: + /* Don't do anything - unknown text format or some nasty binary stuff. + * In future, we could have all kinds of 'plug-ins' here, e.g. + * something that can parse PDF to get the basic text strings out of + * the pages? */ + break; + } + + if (a->filename) { + add_token_in_file(file_index, db->hash_key, a->filename, db->attachment_name); + } + + } + + /* Deal with threading information */ + add_angled_terms(file_index, db->hash_key, db->msg_ids, 1, msg->hdrs.message_id); + add_angled_terms(file_index, db->hash_key, db->msg_ids, 0, msg->hdrs.in_reply_to); + add_angled_terms(file_index, db->hash_key, db->msg_ids, 0, msg->hdrs.references); +} +/*}}}*/ + +static void scan_maildir_flags(struct msgpath *m)/*{{{*/ +{ + const char *p, *start; + start = m->src.mpf.path; + m->seen = 0; + m->replied = 0; + m->flagged = 0; + for (p=start; *p; p++) {} + for (p--; (p >= start) && ((*p) != ':'); p--) {} + if (p >= start) { + if (!strncmp(p, ":2,", 3)) { + p += 3; + while (*p) { + switch (*p) { + case 'F': m->flagged = 1; break; + case 'R': m->replied = 1; break; + case 'S': m->seen = 1; break; + default: break; + } + p++; + } + } + } +} +/*}}}*/ +static void scan_new_messages(struct database *db, int start_at)/*{{{*/ +{ + int i; + for (i=start_at; i<db->n_msgs; i++) { + struct rfc822 *msg = NULL; + int len = strlen(db->msgs[i].src.mpf.path); + + if (len > 10 && !strcmp(db->msgs[i].src.mpf.path + len - 11, "/.gitignore")) + continue; + + switch (db->type[i]) { + case MTY_DEAD: + assert(0); + break; + case MTY_MBOX: + assert(0); /* Should never get here - mbox messages are scanned elsewhere. */ + break; + case MTY_FILE: + if (verbose) fprintf(stderr, "Scanning <%s>\n", db->msgs[i].src.mpf.path); + msg = make_rfc822(db->msgs[i].src.mpf.path); + break; + } + if(msg) + { + db->msgs[i].date = msg->hdrs.date; + scan_maildir_flags(&db->msgs[i]); + tokenise_message(i, db, msg); + free_rfc822(msg); + } + else + fprintf(stderr, "Skipping %s (could not parse message)\n", db->msgs[i].src.mpf.path); + } +} +/*}}}*/ + +static inline void set_bit(unsigned long *x, int n)/*{{{*/ +{ + int set; + unsigned long mask; + set = (n >> 5); + mask = (1UL << (n & 31)); + x[set] |= mask; +} +/*}}}*/ +static inline int isset_bit(unsigned long *x, int n)/*{{{*/ +{ + int set; + unsigned long mask; + set = (n >> 5); + mask = (1UL << (n & 31)); + return (x[set] & mask) ? 1 : 0; +} +/*}}}*/ +static int find_base(int *table, int index) {/*{{{*/ + int a = index; + + /* TODO : make this compress the path lengths down to the base entry */ + while (table[a] != a) { + a = table[a]; + } + return a; +} +/*}}}*/ +static void find_threading(struct database *db)/*{{{*/ +{ + + /* ix is a table mapping path array index to the lowest path array index that + * is known to share at least one message ID in its hdrs somewhere (i.e. they + * must be in the same thread) */ + int *ix; + + int i, m, np, nm, sm; + int next_tid; + + np = db->n_msgs; + nm = db->msg_ids->n; + sm = db->msg_ids->size; + + ix = new_array(int, np); + for (i=0; i<np; i++) { + ix[i] = i; /* default - every message in a thread of its own */ + } + + for (m=0; m<sm; m++) { + struct token2 *tok = db->msg_ids->tokens[m]; + if (tok) { + unsigned char *j = tok->match0.msginfo; + unsigned char *last_char = j + tok->match0.n; + int cur = 0, incr, first=1; + int new_base=-1, old_base; + while (j < last_char) { + incr = read_increment(&j); + cur += incr; + if (first) { + new_base = find_base(ix, cur); + first = 0; + } else { + old_base = find_base(ix, cur); + if (old_base < new_base) { + ix[new_base] = old_base; + new_base = old_base; + } else if (old_base > new_base) { + assert(new_base != -1); + ix[old_base] = new_base; + } + } + } + } + } + + /* Now make each entry point directly to its base */ + for (i=0; i<np; i++) { + if (ix[i] != i) { + /* Sure to work as we're going up from the bottom */ + ix[i] = ix[ix[i]]; + } + } + + /* Now allocate contiguous thread group numbers */ + next_tid = 0; + for (i=0; i<np; i++) { + if (ix[i] == i) { + db->msgs[i].tid = next_tid++; + } else { + db->msgs[i].tid = db->msgs[ix[i]].tid; + } + } + + free(ix); + return; +} +/*}}}*/ +static int lookup_msgpath(struct msgpath *sorted_paths, int n_msgs, char *key)/*{{{*/ +{ + /* Implement bisection search */ + int l, h, m, r; + l = 0, h = n_msgs; + m = -1; + while (h > l) { + m = (h + l) >> 1; + /* Should only get called on 'file' type messages - TBC */ + r = strcmp(sorted_paths[m].src.mpf.path, key); + if (r == 0) break; + if (l == m) return -1; + if (r > 0) h = m; + else l = m; + } + return m; +} +/*}}}*/ +void maybe_grow_message_arrays(struct database *db)/*{{{*/ +{ + if (db->n_msgs == db->max_msgs) { + if (db->max_msgs <= 128) { + db->max_msgs = 256; + } else { + db->max_msgs += (db->max_msgs >> 1); + } + db->msgs = grow_array(struct msgpath, db->max_msgs, db->msgs); + db->type = grow_array(enum message_type, db->max_msgs, db->type); + } +} +/*}}}*/ +static void add_msg_path(struct database *db, char *path, time_t mtime, size_t message_size)/*{{{*/ +{ + maybe_grow_message_arrays(db); + db->type[db->n_msgs] = MTY_FILE; + db->msgs[db->n_msgs].src.mpf.path = new_string(path); + db->msgs[db->n_msgs].src.mpf.mtime = mtime; + db->msgs[db->n_msgs].src.mpf.size = message_size; + ++db->n_msgs; +} +/*}}}*/ + +static int do_stat(struct msgpath *mp)/*{{{*/ +{ + struct stat sb; + int status; + status = stat(mp->src.mpf.path, &sb); + if ((status < 0) || + !S_ISREG(sb.st_mode)) { + return 0; + } else { + mp->src.mpf.mtime = sb.st_mtime; + mp->src.mpf.size = sb.st_size; + return 1; + } +} +/*}}}*/ +int update_database(struct database *db, struct msgpath *sorted_paths, int n_msgs, int do_fast_index)/*{{{*/ +{ + /* The incoming list must be sorted into order, to make binary searching + * possible. We search for each existing path in the incoming sorted array. + * If the date differs, or the file no longer exist, the existing database + * entry for that file is nulled. (These are only recovered if the database + * is actively compressed.) If the date differed, a new entry for the file + * is put at the end of the list. Similarly, any new file goes at the end. + * These new entries are all rescanned to find tokens and add them to the + * database. */ + + char *file_in_db, *file_in_new_list; + int matched_index; + int i, new_entries_start_at; + int any_new, n_newly_pruned, n_already_dead; + int status; + + file_in_db = new_array(char, n_msgs); + file_in_new_list = new_array(char, db->n_msgs); + bzero(file_in_db, n_msgs); + bzero(file_in_new_list, db->n_msgs); + + n_already_dead = 0; + n_newly_pruned = 0; + + for (i=0; i<db->n_msgs; i++) { + switch (db->type[i]) { + case MTY_FILE: + matched_index = lookup_msgpath(sorted_paths, n_msgs, db->msgs[i].src.mpf.path); + if (matched_index >= 0) { + if (do_fast_index) { + /* Assume the presence of a matching path is good enough without + * even bothering to stat the file that's there now. */ + file_in_db[matched_index] = 1; + file_in_new_list[i] = 1; + } else { + status = do_stat(sorted_paths + matched_index); + if (status) { + if (sorted_paths[matched_index].src.mpf.mtime == db->msgs[i].src.mpf.mtime) { + /* Treat stale files as though the path has changed. */ + file_in_db[matched_index] = 1; + file_in_new_list[i] = 1; + } + } else { + /* This path will get treated as dead, and be re-stated below. + * When that stat fails, the path won't get added to the db. */ + } + } + } + break; + case MTY_MBOX: + /* Nothing to do on this pass. */ + break; + case MTY_DEAD: + break; + } + } + + /* Add new entries to database */ + new_entries_start_at = db->n_msgs; + + for (i=0; i<db->n_msgs; i++) { + /* Weed dead entries */ + switch (db->type[i]) { + case MTY_FILE: + if (!file_in_new_list[i]) { + free(db->msgs[i].src.mpf.path); + db->msgs[i].src.mpf.path = NULL; + db->type[i] = MTY_DEAD; + ++n_newly_pruned; + } + break; + case MTY_MBOX: + { + int msg_index, file_index, number_valid; + int mbox_valid; + msg_index = db->msgs[i].src.mbox.msg_index; + file_index = db->msgs[i].src.mbox.file_index; + assert (file_index < db->n_mboxen); + mbox_valid = (db->mboxen[file_index].path) ? 1 : 0; + number_valid = db->mboxen[file_index].n_old_msgs_valid; + if (!mbox_valid || (msg_index >= number_valid)) { + db->type[i] = MTY_DEAD; + ++n_newly_pruned; + } + } + break; + case MTY_DEAD: + /* already dead */ + ++n_already_dead; + break; + } + } + + if (verbose) { + fprintf(stderr, "%d newly dead messages, %d messages now dead in total\n", n_newly_pruned, n_newly_pruned+n_already_dead); + } + + any_new = 0; + for (i=0; i<n_msgs; i++) { + if (!file_in_db[i]) { + int status; + any_new = 1; + /* The 'sorted_paths' array is only used for file-per-message folders. */ + status = do_stat(sorted_paths + i); + if (status) { + /* We only add files that could be successfully stat()'d as regular + * files. */ + add_msg_path(db, sorted_paths[i].src.mpf.path, sorted_paths[i].src.mpf.mtime, sorted_paths[i].src.mpf.size); + } else { + fprintf(stderr, "Cannot add '%s' to database; stat() failed\n", sorted_paths[i].src.mpf.path); + } + } + } + + if (any_new) { + scan_new_messages(db, new_entries_start_at); + } + + /* Add newly found mbox messages. */ + any_new |= add_mbox_messages(db); + + if (any_new) { + find_threading(db); + } else { + if (verbose) fprintf(stderr, "No new messages found\n"); + } + + free(file_in_db); + free(file_in_new_list); + + return any_new || (n_newly_pruned > 0); +} +/*}}}*/ +static void recode_encoding(struct matches *m, int *new_idx)/*{{{*/ +{ + unsigned char *new_enc, *old_enc; + unsigned char *j, *last_char; + int incr, idx, n_idx; + + old_enc = m->msginfo; + j = old_enc; + last_char = old_enc + m->n; + + new_enc = new_array(unsigned char, m->max); /* Probably not bigger than this. */ + m->n = 0; + m->highest = 0; + m->msginfo = new_enc; + idx = 0; + + while (j < last_char) { + incr = read_increment(&j); + idx += incr; + n_idx = new_idx[idx]; + if (n_idx >= 0) { + check_and_enlarge_encoding(m); + insert_index_on_encoding(m, n_idx); + } + } + free(old_enc); +} +/*}}}*/ +static void recode_toktable(struct toktable *tbl, int *new_idx)/*{{{*/ +{ + /* Re-encode the vectors according to the new path indices */ + int i; + int any_dead = 0; + int any_moved, pass; + + for (i=0; i<tbl->size; i++) { + struct token *tok = tbl->tokens[i]; + if (tok) { + recode_encoding(&tok->match0, new_idx); + if (tok->match0.n == 0) { + /* Delete this token. Gotcha - there may be tokens further on in the + * array that didn't get their natural hash bucket due to collisions. + * Need to shuffle such tokens up to guarantee that the buckets between + * the natural one and the one where they are now are all occupied, to + * prevent their lookups failing. */ + +#if 0 + fprintf(stderr, "Token <%s> (bucket %d) no longer has files containing it, deleting\n", tok->text, i); +#endif + free_token(tok); + tbl->tokens[i] = NULL; + --tbl->n; /* Maintain number in use counter */ + any_dead = 1; + } + + } + } + + + if (any_dead) { + /* Now close gaps. This has to be done in a second pass, otherwise we get a + * problem with moving entries that need deleting back before the current + scan point. */ + + pass = 1; + for (;;) { + int i; + + if (verbose) { + fprintf(stderr, "Pass %d\n", pass); + } + + any_moved = 0; + + for (i=0; i<tbl->size; i++) { + if (tbl->tokens[i]) { + int nat_bucket_i; + nat_bucket_i = tbl->tokens[i]->hashval & tbl->mask; + if (nat_bucket_i != i) { + /* Find earliest bucket that we could move i to */ + int j = nat_bucket_i; + while (j != i) { + if (!tbl->tokens[j]) { + /* put it here */ +#if 0 + fprintf(stderr, "Moved <%s> from bucket %d to %d (natural bucket %d)\n", tbl->tokens[i]->text, i, j, nat_bucket_i); +#endif + tbl->tokens[j] = tbl->tokens[i]; + tbl->tokens[i] = NULL; + any_moved = 1; + break; + } else { + j++; + j &= tbl->mask; + } + } + if (tbl->tokens[i]) { +#if 0 + fprintf(stderr, "NOT moved <%s> from bucket %d (natural bucket %d)\n", tbl->tokens[i]->text, i, nat_bucket_i); +#endif + } + } + } + } + + if (!any_moved) break; + pass++; + } + } +} +/*}}}*/ +static void recode_toktable2(struct toktable2 *tbl, int *new_idx)/*{{{*/ +{ + /* Re-encode the vectors according to the new path indices */ + int i; + int any_dead = 0; + int any_moved, pass; + + for (i=0; i<tbl->size; i++) { + struct token2 *tok = tbl->tokens[i]; + if (tok) { + recode_encoding(&tok->match0, new_idx); + recode_encoding(&tok->match1, new_idx); + if ((tok->match0.n == 0) && (tok->match1.n == 0)) { + /* Delete this token. Gotcha - there may be tokens further on in the + * array that didn't get their natural hash bucket due to collisions. + * Need to shuffle such tokens up to guarantee that the buckets between + * the natural one and the one where they are now are all occupied, to + * prevent their lookups failing. */ + +#if 0 + fprintf(stderr, "Token <%s> (bucket %d) no longer has files containing it, deleting\n", tok->text, i); +#endif + free_token2(tok); + tbl->tokens[i] = NULL; + --tbl->n; /* Maintain number in use counter */ + any_dead = 1; + } + } + } + + if (any_dead) { + /* Now close gaps. This has to be done in a second pass, otherwise we get a + * problem with moving entries that need deleting back before the current + scan point. */ + + pass = 1; + for (;;) { + int i; + + if (verbose) { + fprintf(stderr, "Pass %d\n", pass); + } + + any_moved = 0; + + for (i=0; i<tbl->size; i++) { + if (tbl->tokens[i]) { + int nat_bucket_i; + nat_bucket_i = tbl->tokens[i]->hashval & tbl->mask; + if (nat_bucket_i != i) { + /* Find earliest bucket that we could move i to */ + int j = nat_bucket_i; + while (j != i) { + if (!tbl->tokens[j]) { + /* put it here */ +#if 0 + fprintf(stderr, "Moved <%s> from bucket %d to %d (natural bucket %d)\n", tbl->tokens[i]->text, i, j, nat_bucket_i); +#endif + tbl->tokens[j] = tbl->tokens[i]; + tbl->tokens[i] = NULL; + any_moved = 1; + break; + } else { + j++; + j &= tbl->mask; + } + } + if (tbl->tokens[i]) { +#if 0 + fprintf(stderr, "NOT moved <%s> from bucket %d (natural bucket %d)\n", tbl->tokens[i]->text, i, nat_bucket_i); +#endif + } + } + } + } + + if (!any_moved) break; + pass++; + } + } +} +/*}}}*/ +int cull_dead_messages(struct database *db, int do_integrity_checks)/*{{{*/ +{ + /* Return true if any culled */ + + int *new_idx, i, j, n_old; + int any_culled = 0; + + /* Check db is OK before we start on this. (Check afterwards is done in the + * writer.c code.) */ + if (do_integrity_checks) { + check_database_integrity(db); + } + + if (verbose) { + fprintf(stderr, "Culling dead messages\n"); + } + + n_old = db->n_msgs; + + new_idx = new_array(int, n_old); + for (i=0, j=0; i<n_old; i++) { + switch (db->type[i]) { + case MTY_FILE: + case MTY_MBOX: + new_idx[i] = j++; + break; + case MTY_DEAD: + new_idx[i] = -1; + any_culled = 1; + break; + } + } + + recode_toktable(db->to, new_idx); + recode_toktable(db->cc, new_idx); + recode_toktable(db->from, new_idx); + recode_toktable(db->subject, new_idx); + recode_toktable(db->body, new_idx); + recode_toktable(db->attachment_name, new_idx); + recode_toktable2(db->msg_ids, new_idx); + + /* And crunch down the filename table */ + for (i=0, j=0; i<n_old; i++) { + switch (db->type[i]) { + case MTY_DEAD: + break; + case MTY_FILE: + case MTY_MBOX: + if (i > j) { + db->msgs[j] = db->msgs[i]; + db->type[j] = db->type[i]; + } + j++; + break; + } + } + db->n_msgs = j; + + free(new_idx); + + /* .. and cull dead mboxen */ + cull_dead_mboxen(db); + + return any_culled; +} +/*}}}*/ diff --git a/src/mairix/dfasyn/COPYING b/src/mairix/dfasyn/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/src/mairix/dfasyn/INSTALL b/src/mairix/dfasyn/INSTALL @@ -0,0 +1,19 @@ +There is no real configure mechanism (yet). + +To build the program + + make + +To install the program (perhaps as root) + + make prefix=/usr/local install + +or as yourself you might do + + make prefix=$HOME install + +or if your distribution puts manpages in /usr/share/man, you might do + + make prefix=/usr/local mandir=/usr/share/man install + +# vim:et:sw=4 diff --git a/src/mairix/dfasyn/Makefile b/src/mairix/dfasyn/Makefile @@ -0,0 +1,62 @@ +# Makefile for NFA->DFA conversion utility +# +# Copyright (C) Richard P. Curnow 2000-2001,2003,2005,2006,2007 +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +CC=gcc +#CFLAGS=-g -Wall +#CFLAGS=-O2 -pg +CFLAGS=-Wall +prefix?=/usr/local +bindir=$(prefix)/bin +mandir?=$(prefix)/man +man1dir=$(mandir)/man1 +man5dir=$(mandir)/man5 + +OBJ = dfasyn.o parse.o scan.o \ + tokens.o abbrevs.o charclass.o \ + stimulus.o \ + blocks.o states.o \ + n2d.o expr.o evaluator.o \ + tabcompr.o compdfa.o + +all : dfasyn + +install : all + [ -d $(bindir) ] || mkdir -p $(bindir) + [ -d $(man1dir) ] || mkdir -p $(man1dir) + [ -d $(man5dir) ] || mkdir -p $(man5dir) + cp dfasyn $(bindir) + cp dfasyn.1 $(man1dir) + cp dfasyn.5 $(man5dir) + +dfasyn : $(OBJ) + $(CC) $(CFLAGS) -o dfasyn $(OBJ) + +parse.c parse.h : parse.y + bison -v -d -o parse.c parse.y + +parse.o : parse.c dfasyn.h + +scan.c : scan.l + flex -t -s scan.l > scan.c + +scan.o : scan.c parse.h dfasyn.h + +$(OBJ) : dfasyn.h + +clean: + rm -f dfasyn *.o scan.c parse.c parse.h parse.output + diff --git a/src/mairix/dfasyn/NEWS b/src/mairix/dfasyn/NEWS @@ -0,0 +1,5 @@ +New in version 0.2 +================== + +* Added README and NEWS files + diff --git a/src/mairix/dfasyn/README b/src/mairix/dfasyn/README @@ -0,0 +1,8 @@ +dfasyn is a tool for constructing state machines. The input language allows a +lot of generality. For example, it allows repeated elements to be specified +where the items have constraints between the end of one and the start of the +next. (I could not find a way to define such an automaton in the lex/flex +input language, which prompted the writing of the tool.) Currently, you must +do a fair amount of work yourself to build a parser around the resulting state +machine. + diff --git a/src/mairix/dfasyn/abbrevs.c b/src/mairix/dfasyn/abbrevs.c @@ -0,0 +1,67 @@ +/*************************************** + Handle state-related stuff + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2000-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "dfasyn.h" + +static struct Abbrev *abbrevtable=NULL; +static int nabbrevs = 0; +static int maxabbrevs = 0; + +static void grow_abbrevs(void)/*{{{*/ +{ + maxabbrevs += 32; + abbrevtable = resize_array(struct Abbrev, abbrevtable, maxabbrevs); +} +/*}}}*/ +struct Abbrev * create_abbrev(const char *name, struct StimulusList *stimuli)/*{{{*/ +{ + struct Abbrev *result; + if (nabbrevs == maxabbrevs) { + grow_abbrevs(); + } + result = abbrevtable + (nabbrevs++); + result->lhs = new_string(name); + result->stimuli = stimuli; + return result; +} +/*}}}*/ +struct Abbrev * lookup_abbrev(char *name)/*{{{*/ +{ + int found = -1; + int i; + struct Abbrev *result = NULL; + /* Scan table in reverse order. If a name has been redefined, + make sure the most recent definition is picked up. */ + for (i=nabbrevs-1; i>=0; i--) { + if (!strcmp(abbrevtable[i].lhs, name)) { + found = i; + result = abbrevtable + found; + break; + } + } + + return result; +} +/*}}}*/ + diff --git a/src/mairix/dfasyn/blocks.c b/src/mairix/dfasyn/blocks.c @@ -0,0 +1,168 @@ +/*************************************** + Handle blocks + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2000-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "dfasyn.h" + + +static Block **blocks = NULL; +static int nblocks = 0; +static int maxblocks = 0; + +/* ================================================================= */ + +static void grow_blocks(void)/*{{{*/ +{ + maxblocks += 32; + blocks = resize_array(Block*, blocks, maxblocks); +} +/*}}}*/ +static Block * create_block(char *name)/*{{{*/ +{ + Block *result; + int i; + + if (nblocks == maxblocks) { + grow_blocks(); + } + +#if 0 + /* Not especially useful to show this */ + if (verbose) { + fprintf(stderr, " %s", name); + } +#endif + + result = blocks[nblocks++] = new(Block); + result->name = new_string(name); + for (i=0; i<HASH_BUCKETS; i++) { + result->state_hash[i].states = NULL; + result->state_hash[i].nstates = 0; + result->state_hash[i].maxstates = 0; + } + result->states = NULL; + result->nstates = result->maxstates = 0; + result->eclo = NULL; + + result->subcount = 1; + result->subblockcount = 1; + return result; +} +/*}}}*/ +Block * lookup_block(char *name, int create)/*{{{*/ +{ + Block *found = NULL; + int i; + for (i=0; i<nblocks; i++) { + if (!strcmp(blocks[i]->name, name)) { + found = blocks[i]; + break; + } + } + + switch (create) { + case USE_OLD_MUST_EXIST: + if (!found) { + fprintf(stderr, "Could not find block '%s' to instantiate\n", name); + exit(1); + } + break; + case CREATE_MUST_NOT_EXIST: + if (found) { + fprintf(stderr, "Already have a block called '%s', cannot redefine\n", name); + exit(1); + } else { + found = create_block(name); + } + break; + case CREATE_OR_USE_OLD: + if (!found) { + found = create_block(name); + } + break; + } + + return found; +} +/*}}}*/ +/* ================================================================= */ +void instantiate_block(Block *curblock, char *block_name, char *instance_name)/*{{{*/ +{ + Block *master = lookup_block(block_name, USE_OLD_MUST_EXIST); + char namebuf[1024]; + int i; + for (i=0; i<master->nstates; i++) { + State *s = master->states[i]; + State *new_state; + TransList *tl; + Stringlist *sl, *ex; + + strcpy(namebuf, instance_name); + strcat(namebuf, "."); + strcat(namebuf, s->name); + + /* In perverse circumstances, we might already have a state called this */ + new_state = lookup_state(curblock, namebuf, CREATE_OR_USE_OLD); + + for (tl=s->transitions; tl; tl=tl->next) { + TransList *new_tl = new(TransList); + new_tl->type = tl->type; + /* Might cause some dangling ref problem later... */ + new_tl->x = tl->x; + strcpy(namebuf, instance_name); + strcat(namebuf, "."); + strcat(namebuf, tl->ds_name); + new_tl->ds_name = new_string(namebuf); + new_tl->ds_ref = NULL; + new_tl->next = new_state->transitions; + new_state->transitions = new_tl; + } + + /*{{{ Copy state tags */ + ex = NULL; + for (sl=s->tags; sl; sl=sl->next) { + Stringlist *new_sl = new(Stringlist); + new_sl->string = sl->string; + new_sl->next = ex; + ex = new_sl; + } + new_state->tags = ex; + /*}}}*/ + + /* **DON'T** COPY ENTRIES : these are deliberately dropped if they occur + * in a block that gets instantiated elsewhere. */ + + } +} +/*}}}*/ +/* ================================================================= */ +InlineBlock *create_inline_block(char *type, char *in, char *out)/*{{{*/ +{ + InlineBlock *result; + result = new(InlineBlock); + result->type = new_string(type); + result->in = new_string(in); + result->out = new_string(out); + return result; +} +/*}}}*/ diff --git a/src/mairix/dfasyn/charclass.c b/src/mairix/dfasyn/charclass.c @@ -0,0 +1,364 @@ +/*************************************** + Handle character classes + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2001-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "dfasyn.h" +#include <ctype.h> + +struct cc_list { + struct cc_list *next; + CharClass *cc; +}; + +static struct cc_list *cc_list = NULL; +static short mapping[256]; + +int n_charclasses; +static char *strings[256]; + +static void set_bit(unsigned long *bitmap, int entry)/*{{{*/ +{ + int i, j, mask; + i = (entry >> 5); + j = entry & 31; + mask = 1<<j; + bitmap[i] |= mask; +} +/*}}}*/ +static void clear_bit(unsigned long *bitmap, int entry)/*{{{*/ +{ + int i, j, mask; + i = (entry >> 5); + j = entry & 31; + mask = 1<<j; + bitmap[i] &= ~mask; +} +/*}}}*/ +int cc_test_bit(const unsigned long *bitmap, int entry)/*{{{*/ +{ + int i, j, mask; + i = (entry >> 5); + j = entry & 31; + mask = 1<<j; + return (bitmap[i] & mask) ? 1 : 0; +} +/*}}}*/ +CharClass *new_charclass(void)/*{{{*/ +{ + CharClass *result = new(CharClass); + result->is_used = 0; + memset(result->char_bitmap, 0, sizeof(result->char_bitmap)); + memset(result->group_bitmap, 0, sizeof(result->group_bitmap)); + return result; +} +/*}}}*/ +void free_charclass(CharClass *what)/*{{{*/ +{ + free(what); +} +/*}}}*/ +void add_charclass_to_list(CharClass *cc)/*{{{*/ +{ + /* Add the cc to the master list for later processing. */ + struct cc_list *elt = new(struct cc_list); + elt->next = cc_list; + elt->cc = cc; + cc_list = elt; +} +/*}}}*/ +void add_singleton_to_charclass(CharClass *towhat, char thechar)/*{{{*/ +{ + int x; + x = (int)(unsigned char) thechar; + set_bit(towhat->char_bitmap, x); +} +/*}}}*/ +void add_range_to_charclass(CharClass *towhat, char start, char end)/*{{{*/ +{ + int sx, ex, t; + sx = (int)(unsigned char) start; + ex = (int)(unsigned char) end; + if (sx > ex) { + t = sx, sx = ex, ex = t; + } + for (t=sx; t<=ex; t++) { + set_bit(towhat->char_bitmap, t); + } +} +/*}}}*/ +void invert_charclass(CharClass *what)/*{{{*/ +{ + int i; + for (i=0; i<ULONGS_PER_CC; i++) { + what->char_bitmap[i] ^= 0xffffffffUL; + } +} +/*}}}*/ +void diff_charclasses(CharClass *left, CharClass *right)/*{{{*/ +{ + /* Compute set difference */ + int i; + for (i=0; i<ULONGS_PER_CC; i++) { + left->char_bitmap[i] &= ~(right->char_bitmap[i]); + } +} +/*}}}*/ + +static char *emit_char (char *p, int i)/*{{{*/ +{ + if (i == '\\') { + *p++ = '\\'; + *p++ = '\\'; + } else if (isprint(i) && (i != '-')) { + *p++ = i; + } else if (i == '\n') { + *p++ = '\\'; + *p++ = 'n'; + } else if (i == '\r') { + *p++ = '\\'; + *p++ = 'r'; + } else if (i == '\f') { + *p++ = '\\'; + *p++ = 'f'; + } else if (i == '\t') { + *p++ = '\\'; + *p++ = 't'; + } else { + p += sprintf(p, "\\%03o", i); + } + return p; +} +/*}}}*/ +static void generate_string(int idx, const unsigned long *x)/*{{{*/ +{ + int i, j; + char buffer[4096]; + char *p; + + p = buffer; + *p++ = '['; + /* Force '-' to be shown at the start. */ + i = 0; + do { + while ((i < 256) && !cc_test_bit(x,i)) i++; + if (i>=256) break; + + j = i + 1; + while ((j < 256) && cc_test_bit(x,j)) j++; + j--; + + p = emit_char(p, i); + if (j == (i + 1)) { + p = emit_char(p, j); + } else if (j > (i + 1)) { + *p++ = '-'; + p = emit_char(p, j); + } + + i = j + 1; + } while (i < 256); + *p++ = ']'; + *p = 0; + strings[idx] = new_string(buffer); + return; +} +/*}}}*/ +static void combine(unsigned long *into, const unsigned long *with)/*{{{*/ +{ + int i; + for (i=0; i<ULONGS_PER_CC; i++) into[i] |= with[i]; +} +/*}}}*/ +static void set_all(unsigned long *x)/*{{{*/ +{ + int i; + for (i=0; i<ULONGS_PER_CC; i++) x[i] = 0xffffffffUL; +} +/*}}}*/ +static void clear_all(unsigned long *x)/*{{{*/ +{ + int i; + for (i=0; i<ULONGS_PER_CC; i++) x[i] = 0x0UL; +} +/*}}}*/ +static int find_lowest_bit_set(const unsigned long *x)/*{{{*/ +{ + int i; + for (i=0; i<ULONGS_PER_CC; i++) { + if (x[i]) { + int pos = 0; + unsigned long val = x[i]; + if (!(val & 0xffff)) pos += 16, val >>= 16; + if (!(val & 0x00ff)) pos += 8, val >>= 8; + if (!(val & 0x000f)) pos += 4, val >>= 4; + if (!(val & 0x0003)) pos += 2, val >>= 2; + if (!(val & 0x0001)) pos += 1; + return (i << 5) + pos; + } + } + return -1; +} +/*}}}*/ + +static void mark_used_in_block(const Block *b)/*{{{*/ +{ + int i; + + for (i=0; i<b->nstates; i++) { + const State *s = b->states[i]; + const TransList *tl; + for (tl=s->transitions; tl; tl=tl->next) { + switch (tl->type) { + case TT_CHARCLASS: + tl->x.char_class->is_used = 1; + break; + default: + break; + } + } + } +} +/*}}}*/ +static void reduce_list(void)/*{{{*/ +{ + struct cc_list *ccl, *next_ccl; + ccl = cc_list; + cc_list = NULL; + while (ccl) { + next_ccl = ccl->next; + if (ccl->cc->is_used) { + ccl->next = cc_list; + cc_list = ccl; + } else { + free(ccl->cc); + free(ccl); + } + ccl = next_ccl; + } +} +/*}}}*/ +void split_charclasses(const Block *b)/*{{{*/ +{ + unsigned long cc_union[ULONGS_PER_CC]; + struct cc_list *elt; + int i; + int any_left; + + mark_used_in_block(b); + reduce_list(); + + n_charclasses = 0; + + if (!cc_list) { + if (verbose) fprintf(stderr, "No charclasses used\n"); + return; + } + + /* Form union */ + clear_all(cc_union); + for (elt=cc_list; elt; elt=elt->next) { + combine(cc_union, elt->cc->char_bitmap); + } + + for (i=0; i<256; i++) mapping[i] = -1; + + do { + int first_char; + int i; + unsigned long pos[ULONGS_PER_CC], neg[ULONGS_PER_CC]; + first_char = find_lowest_bit_set(cc_union); + set_all(pos); + clear_all(neg); + for (elt=cc_list; elt; elt=elt->next) { + if (cc_test_bit(elt->cc->char_bitmap, first_char)) { + for (i=0; i<ULONGS_PER_CC; i++) pos[i] &= elt->cc->char_bitmap[i]; + } else { + for (i=0; i<ULONGS_PER_CC; i++) neg[i] |= elt->cc->char_bitmap[i]; + } + } + + for (i=0; i<ULONGS_PER_CC; i++) { + pos[i] &= ~neg[i]; + } + + generate_string(n_charclasses, pos); + + for (i=0; i<256; i++) { + if (cc_test_bit(pos, i)) { + mapping[i] = n_charclasses; + clear_bit(cc_union, i); + } + } + + n_charclasses++; + any_left = 0; + for (i=0; i<ULONGS_PER_CC; i++) { + if (cc_union[i]) { + any_left = 1; + break; + } + } + } while (any_left); + + /* Build group bitmaps */ + for (elt=cc_list; elt; elt=elt->next) { + for (i=0; i<256; i++) { + if (cc_test_bit(elt->cc->char_bitmap, i)) { + set_bit(elt->cc->group_bitmap, mapping[i]); + } + } + } + + fprintf(stderr, "Got %d character classes\n", n_charclasses); + + return; +} +/*}}}*/ +void print_charclass_mapping(FILE *out, FILE *header_out, const char *prefix_under)/*{{{*/ +{ + int i; + if (!cc_list) return; + fprintf(out, "short %schar2tok[256] = {", prefix_under); + for (i=0; i<256; i++) { + if (i > 0) fputs(", ", out); + if ((i & 15) == 0) fputs("\n ", out); + if (mapping[i] >= 0) { + fprintf(out, "%3d", mapping[i] + ntokens); + } else { + fprintf(out, "%3d", mapping[i]); + } + } + fputs("\n};\n", out); + if (header_out) { + fprintf(header_out, "extern short %schar2tok[256];\n", + prefix_under); + } + return; +} +/*}}}*/ +void print_charclass(FILE *out, int idx)/*{{{*/ +{ + fprintf(out, "%d:%s", idx, strings[idx]); +} +/*}}}*/ + diff --git a/src/mairix/dfasyn/compdfa.c b/src/mairix/dfasyn/compdfa.c @@ -0,0 +1,479 @@ +/*************************************** + Routines for compressing the DFA by commoning-up equivalent states + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2001-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +/* + The input to this stage is the 'raw' DFA build from the NFA by the subset + construction. Depending on the style of the NFA, there may be large chunks + of the DFA that have equivalent functionality, in terms of resulting in the + same attributes for the same sequence of input tokens, but which are reached + by different prefixes. The idea of this stage is to common up such regions, + to reduce the size of the DFA and hence the table sizes that are generated. + + Conceptually, the basis of the algorithm is to assign the DFA states to + equivalence classes. If there are N different tags-combinations, there are + initially N+1 classes. All states that can exit with a particular value are + placed in a class together, and all non-accepting states are placed together. + Now, a pass is made over all pairs of states. Two states remain equivalent + if for each token, their outbound transitions go to states in the same class. + If the states do not stay equivalent, the class they were in is split + accordingly. This is repeated again and again until no more bisections + occur. + + The algorithm actually used is to assign an ordering to the states based on + their current class and outbound transitions. The states are then sorted. + This allows all checking to be done on near-neighbours in the sequence + generated by the sort, which brings the execution time down to something + finite. + + */ + +#include "dfasyn.h" + +static int last_eq_class; /* Next class to assign */ +static int Nt; /* Number of tokens; has to be made static to be visible to comparison fn. */ + +/* To give 'general_compre' visibility of the current equiv. classes of the + destination states */ +static DFANode **local_dfas; + +static void calculate_signatures(DFANode **seq, DFANode **dfas, int ndfas)/*{{{*/ +/**** Determine state signatures based on transitions and current classes. ****/ +{ + unsigned long sig; + int i, t; + + for (i=0; i<ndfas; i++) { + DFANode *s = seq[i]; + sig = 0UL; + for (t=0; t<Nt; t++) { + int di = s->map[t]; + if (di >= 0) { + DFANode *d = dfas[di]; + int deq_class = d->eq_class; + + sig = increment(sig, deq_class & 0xf); /* 16 bit pairs in sig */ + } + } + + s->signature = sig; + } +} +/*}}}*/ +static int general_compare(const void *a, const void *b)/*{{{*/ +/************************* Do full compare on states *************************/ +{ + Castderef (a, const DFANode *, aa); + Castderef (b, const DFANode *, bb); + + if (aa->eq_class < bb->eq_class) { + return -1; + } else if (aa->eq_class > bb->eq_class) { + return +1; + } else if (aa->signature < bb->signature) { + return -1; + } else if (aa->signature > bb->signature) { + return +1; + } else { + /* The hard way... */ + int i; + for (i=0; i<Nt; i++) { + int am = aa->map[i]; + int bm = bb->map[i]; + + /* Map transition destinations to the current equivalence class of the + destination state (otherwise compressor is very pessimistic). */ + am = (am>=0) ? local_dfas[am]->eq_class: -1; + bm = (bm>=0) ? local_dfas[bm]->eq_class: -1; + + if (am < bm) return -1; + else if (am > bm) return +1; + } + + } + + /* If you get here, the states are still equivalent */ + return 0; + +} +/*}}}*/ +static int split_classes(DFANode **seq, DFANode **dfas, int ndfas)/*{{{*/ +/*********************** Do one pass of class splitting ***********************/ +{ + int i; + int had_to_split = 0; + + calculate_signatures(seq, dfas, ndfas); + qsort(seq, ndfas, sizeof(DFANode *), general_compare); + + seq[0]->new_eq_class = seq[0]->eq_class; + + for (i=1; i<ndfas; i++) { + seq[i]->new_eq_class = seq[i]->eq_class; + + if (seq[i]->eq_class == seq[i-1]->eq_class) { + /* May need to split, otherwise states were previously separated anyway + */ + + if (general_compare(seq+i, seq+i-1) != 0) { + /* Different transition pattern, split existing equivalent class */ + had_to_split = 1; + seq[i]->new_eq_class = ++last_eq_class; + if (verbose) fprintf(stderr, "Found %d equivalence classes\r", last_eq_class+1); + } else { + /* This works even if seq[i-1] was assigned a new class due to + splitting from seq[i-2] etc. */ + seq[i]->new_eq_class = seq[i-1]->new_eq_class; + } + } + } + + /* Set classes to new class values. */ + for (i=0; i<ndfas; i++) { + seq[i]->eq_class = seq[i]->new_eq_class; + } + + return had_to_split; + +} +/*}}}*/ +static int initial_compare(const void *a, const void *b)/*{{{*/ +/************************** Sort based on tags **************************/ +{ + Castderef (a, const DFANode *, aa); + Castderef (b, const DFANode *, bb); + int status; + int i; + + for (i=0; i<n_evaluators; i++) { + + const char *ar = aa->attrs[i], *br = bb->attrs[i]; + if (!ar) ar = get_defattr(i); + if (!br) br = get_defattr(i); + + /* Sort so that states with identical attributes appear together. */ + if (!ar && br) { + return -1; + } else if (ar && !br) { + return +1; + } else { + if (ar && br) { + status = strcmp(ar, br); + if (status < 0) return -1; + else if (status > 0) return +1; + } + + /* So neither had an attribute at all, or both did and they were equal. + * i.e. need to look at attributes further up the vectors */ + } + } + + /* Got here => both states were identical in terms of their attribute sets */ + return 0; +} +/*}}}*/ +static void assign_initial_classes(DFANode **seq, int ndfas)/*{{{*/ +/******************* Determine initial equivalence classes. *******************/ +{ + int i; + qsort(seq, ndfas, sizeof(DFANode *), initial_compare); + + last_eq_class = 0; + + seq[0]->eq_class = last_eq_class; + + for (i=1; i<ndfas; i++) { + if (initial_compare(seq+i-1, seq+i) != 0) { + /* Not same as previous entry, assign a new class */ + seq[i]->eq_class = ++last_eq_class; + } else { + /* Same class as last entry */ + seq[i]->eq_class = last_eq_class; + } + } +} +/*}}}*/ +/*{{{ compress_states() */ +static void compress_states(struct DFA *dfa, int n_dfa_entries, struct DFAEntry *dfa_entries) +/***** Compress the DFA so there is precisely one state in each eq. class *****/ +{ + int *reps; + int i, j, t; + int neqc; + int new_index; + + if (verbose) fprintf(stderr, "%d DFA states before compression\n", dfa->n); + + if (report) { + fprintf(report, + "\n-----------------------------\n" + "------ COMPRESSING DFA ------\n" + "-----------------------------\n"); + } + + neqc = 1 + last_eq_class; + + /* Array containing which state is the representative of each eq. class. + Keep the state which had the lowest array index. */ + reps = new_array(int, neqc); + + for (i=0; i<neqc; i++) reps[i] = -1; /* undefined */ + + /* Go through DFA states to find the representative of each class. */ + for (i=0; i<dfa->n; i++) { + int eqc = dfa->s[i]->eq_class; + if (reps[eqc] < 0) { + reps[eqc] = i; + dfa->s[i]->is_rep = 1; + } else { + dfa->s[i]->is_rep = 0; + } + } + + /* Go through DFA states and assign new indices. */ + for (i=0, new_index=0; i<dfa->n; i++) { + if (dfa->s[i]->is_dead) { + dfa->s[i]->new_index = -1; + if (report) fprintf(report, "Old DFA state %d becomes -1 (dead state)\n", i); + } else if (dfa->s[i]->is_rep) { + dfa->s[i]->new_index = new_index++; + if (report) fprintf(report, "Old DFA state %d becomes %d\n", i, dfa->s[i]->new_index); + } else { + int eqc = dfa->s[i]->eq_class; + int rep = reps[eqc]; + + /* This assignment works because the representative for the class + must have been done earlier in the loop. */ + dfa->s[i]->new_index = dfa->s[rep]->new_index; + + if (report) fprintf(report, "Old DFA state %d becomes %d (formerly %d)\n", i, dfa->s[i]->new_index, rep); + } + } + + /* Go through all transitions and fix them up. */ + for (i=0; i<dfa->n; i++) { + DFANode *s = dfa->s[i]; + for (t=0; t<Nt; t++) { + int dest = s->map[t]; + if (dest >= 0) { + s->map[t] = dfa->s[dest]->new_index; + } + } + } + + /* Go through the entries and fix their states */ + for (i=0; i<n_dfa_entries; i++) { + int ni = dfa->s[dfa_entries[i].state_number]->new_index; + if (report) { + fprintf(report, "Entry <%s>, formerly state %d, now state %d\n", + dfa_entries[i].entry_name, + dfa_entries[i].state_number, ni); + } + dfa_entries[i].state_number = dfa->s[dfa_entries[i].state_number]->new_index; + } + + /* Fix from_state */ + for (i=0; i<dfa->n; i++) { + int old_from_state, new_from_state; + /* If we're not going to preserve the state, move along */ + if (!dfa->s[i]->is_rep) continue; + old_from_state = dfa->s[i]->from_state; + /* Any entry state ..., move along */ + if (old_from_state < 0) continue; + new_from_state = dfa->s[reps[dfa->s[old_from_state]->eq_class]]->new_index; + dfa->s[i]->from_state = new_from_state; + } + + /* Go through and crunch the entries in the DFA array, fixing up the indices */ + for (i=j=0; i<dfa->n; i++) { + if (!dfa->s[i]->is_dead && dfa->s[i]->is_rep) { + dfa->s[j] = dfa->s[i]; + dfa->s[j]->index = dfa->s[j]->new_index; + j++; + } + } + + free(reps); + dfa->n = new_index; /* ignore dead states which are completely pruned. */ + if (verbose) fprintf(stderr, "%d DFA states after compression", dfa->n); +} +/*}}}*/ +static void discard_nfa_bitmaps(struct DFA *dfa)/*{{{*/ +/********** Discard the (now inaccurate) NFA bitmaps from the states **********/ +{ + int i; + for (i=0; i<dfa->n; i++) { + free(dfa->s[i]->nfas); + dfa->s[i]->nfas = NULL; + } + return; +} +/*}}}*/ +static void print_classes(DFANode **dfas, int ndfas)/*{{{*/ +{ + int i; +#if 1 + /* Comment out to print this stuff for debug */ + return; +#endif + if (!report) return; + fprintf(report, "Equivalence classes are :\n"); + for (i=0; i<ndfas; i++) { + fprintf(report, "State %d class %d\n", i, dfas[i]->eq_class); + } + fprintf(report, "\n"); + return; +} +/*}}}*/ +static int has_any_nondefault_attribute(const DFANode *x)/*{{{*/ +{ + int result = 0; + int i; + for (i=0; i<n_evaluators; i++) { + if (x->attrs[i]) { + char *defattr; + defattr = get_defattr(i); + if (defattr && strcmp(defattr, x->attrs[i])) { + result = 1; + break; + } + } + } + return result; +} +/*}}}*/ +static void find_dead_states(DFANode **dfas, int ndfas, int ntokens)/*{{{*/ +{ + /* Find any state that has no transitions out of it and no attribute. + * If you get there, you're guaranteed to be stuck. + * Then, repeatedly look for states which are such that all transitions from + * them lead to dead states. Mark these dead too. + * Then, go through all the dead states and remove their transitions. + * This will force them all into a single class later. */ + + int did_any; + int i, j; + /* Eventually, consider looking for results that are non-default. */ + char *leads_to_result; + int total_found = 0; + + leads_to_result = new_array(char, ndfas); + memset(leads_to_result, 0, ndfas); + + if (report) { + fprintf(report, "Searching for dead states...\n"); + } + + do { + did_any = 0; + for (i=0; i<ndfas; i++) { + if (leads_to_result[i] == 0) { + if (has_any_nondefault_attribute(dfas[i])) { + leads_to_result[i] = 1; + did_any = 1; + continue; + } + + for (j=0; j<ntokens; j++) { + int next_state = dfas[i]->map[j]; + if ((next_state >= 0) && leads_to_result[next_state]) { + leads_to_result[i] = 1; + did_any = 1; + goto do_next_dfa_state; + } + } + } +do_next_dfa_state: + (void) 0; + } + } while (did_any); + + + /* Now prune any transition to states that have no path to a result. */ + for (i=0; i<ndfas; i++) { + if (leads_to_result[i] == 0) { + total_found++; + if (report) { + fprintf(report, "DFA state %d is dead\n", i); + } + dfas[i]->from_state = -1; + dfas[i]->via_token = -1; + dfas[i]->is_dead = 1; + } else { + dfas[i]->is_dead = 0; + } + + for (j=0; j<ntokens; j++) { + int next_state = dfas[i]->map[j]; + if (leads_to_result[next_state] == 0) { + dfas[i]->map[j] = -1; + } + } + } + + free(leads_to_result); + + if (!total_found && report) { + fprintf(report, "(no dead states found)\n"); + } +} +/*}}}*/ +/*{{{ compress_dfa() */ +void compress_dfa(struct DFA *dfa, int ntokens, + int n_dfa_entries, struct DFAEntry *dfa_entries) +{ + DFANode **seq; /* Storage for node sequence */ + int i; + int had_to_split; + + /* Safety net */ + if (dfa->n <= 0) return; + + local_dfas = dfa->s; + Nt = ntokens; + + seq = new_array(DFANode *, dfa->n); + for (i=0; i<dfa->n; i++) { + seq[i] = dfa->s[i]; + } + + find_dead_states(dfa->s, dfa->n, ntokens); + + assign_initial_classes(seq, dfa->n); + + do { + print_classes(dfa->s, dfa->n); + had_to_split = split_classes(seq, dfa->s, dfa->n); + } while (had_to_split); + + print_classes(dfa->s, dfa->n); + + compress_states(dfa, n_dfa_entries, dfa_entries); + discard_nfa_bitmaps(dfa); + + free(seq); + return; + +} +/*}}}*/ + diff --git a/src/mairix/dfasyn/configure b/src/mairix/dfasyn/configure @@ -0,0 +1,4 @@ +#!/bin/sh + +egrep -v '^#' INSTALL + diff --git a/src/mairix/dfasyn/dfasyn.1 b/src/mairix/dfasyn/dfasyn.1 @@ -0,0 +1,154 @@ +.TH DFASYN 1 "" +.SH NAME +dfasyn \- generate deterministic finite automata +.SH SYNOPSYS +.B dfasyn +[ +.BR \-o | \-\-output +.I C-filename +] [ +.BR \-ho | \-\-header-output +.I H-filename +] [ +.BR \-r | \-\-report +.I report-filename +] [ +.BR \-p | \-\-prefix +.I prefix +] [ +.BR \-u | \-\-uncompressed-tables +] [ +.BR \-ud | \-\-uncompressed-dfa +] [ +.BR \-I | \-\-inline-function +] [ +.BR \-v | \-\-verbose +] [ +.BR \-h | \-\-help +] +.I input-file + +.SH DESCRIPTION +.B dfasyn +generates a deterministic finite automaton (DFA) from a description file. + +.SH OPTIONS +.SS Options controlling output files +.TP +.BI "-o " C-filename +.br +.ns +.TP +.BI "--output " C-filename +.br +Specify the name of the file to which the C program text will be written. +If this option is not present, the C program text will be written to stdout. + +.TP +.BI "-ho " H-filename +.br +.ns +.TP +.BI "--header-output " H-filename +.br +Specify the name of the file to which the header information will be written. + +.TP +.BI "-r " report-filename +.br +.ns +.TP +.BI "--report " report-filename +.br +Specify the name of the file to which the report on the generated automaton +will be written. If this option is not present, no report will be written. + +.TP +.I input-file +.br +This is the name of the file containing the definition of the automaton. Refer +to +.BR dfasyn (5) +for more information about the format of this file. + +.SS Options controlling the generated automaton +.TP +.BI "-p " prefix +.br +.ns +.TP +.BI "--prefix " prefix +.br +Specify the prefix to be prepended onto each symbol that +.B dfasyn +generates in the output file. This allows multiple automata to be linked into +the same final program without namespace clashes. + +The string prepended is actually +.I prefix +followed by an underscore ('_'). + +.TP +.BR -u ", " --uncompressed-tables +.br +Do not compress the transition tables. By default, +.B dfasyn +emits the transition tables compressed, and it emits a next-state function that +uses a bisection algorithm to search the tables. By contrast, uncompressed +tables use a simple array indexing algorithm in the next-state algorithm. +However, the generated tables will be much larger, especially if there is a +large set of input symbols and the transitions in the automaton are relatively +sparse. This option therefore represents a speed versus space trade-off in the +generated DFA. + +.TP +.BR -ud ", " --uncompressed-dfa +.br +Do not compress the generated DFA. By default, +.B dfasyn +compresses the DFA to combine common states into a single state in the final +DFA and to remove unreachable states. This option suppresses the compression. +Giving this option can only be to the detriment of the final DFA, in terms of +the array sizes of its tables. However, the option is useful for debugging +.B dfasyn +and will also reduce the run time of +.B dfasyn +since a potentially complex processing step can be omitted. + +.TP +.BR -I ", " --inline-function +.br +This causes the next-state function to emitted as an inline function in the header output. +Specifying this option without +.B -ho +is non-sensical and +.B dfasyn +will complain in that situation. + +Normally, +.B dfasyn +will emit the next_state function in the C program text output. This will +incur a function call overhead for each input symbol when the DFA is used at +run-time. If this is significant to the final application, the +.B -I +option may be useful to allow the next-state function to be inlined. + +.SS General options + +.TP +.BR -v ", " --verbose +.br +Make the output more verbose; provide more comfort messages whilst +.B dfasyn +is running. + +.TP +.BR -h ", " --help +.br +Show usage summary and exit + +.SH "SEE ALSO" +.BR dfasyn (5), +.BR bison (1), +.BR flex (1) + diff --git a/src/mairix/dfasyn/dfasyn.5 b/src/mairix/dfasyn/dfasyn.5 @@ -0,0 +1,650 @@ +.TH DFASYN 5 "" +.SH NAME +dfasyn +.SH SYNOPSYS +This page describes the format of the +.I input-file +for the +.B dfasyn +deterministic finite automaton generator. +.SH DESCRIPTION +.SS Overview +Reserved words may be given in all-lowercase, all-uppercase, initial capitals, +or 'WikiWord' format (e.g. +.B endblock +may be given as +.BR endblock ", " Endblock ", " EndBlock " or " ENDBLOCK . + +.SS Block declaration +A +.B block +declaration is used to group together a set of state declarations. Blocks are +useful if there are blocks of states and their interconnections that occur more +than once in the NFA. In this case it is useful to declare a block, allowing +that block to be instantiated more than once elsewhere in the input file. + +Since state declarations are only allowed inside blocks, there must be at least +one block declaration in any useful input file. + +The syntax of a block declaration is +.RS +.B block +.I block-name +{ +.br +.RS 2 +[ +.I instance-declarations +] +.br +[ +.I state-declarations +] +.RE +.br +} +.RE + +.SS State declarations +A +.B state +declaration gives rise to a state in the input NFA. + +The syntax of a state declaration is +.RS +.B state +.I state-name +[ +.B entry +.I entry-name +] +.br +.RS 2 +[ +.I transitions +] +.RE +.RE + +States are implicitly terminated by the beginning of another type of construct. + +.B entry +.I entry-name +(if present) defines the name of an entry point into the scanner. In the +resulting C-code, a symbol called +.I entry-name +will be declared. Its value will be the DFA state number of the state +containing just this NFA state (plus its epsilon closure.) This allows for +multiple scanners to be generated from the same input file. For example, if +one scanner is the same as another but with some extra text that must match at +the beginning, two different +.B entry +states can be declared to represent this. +.B dfasyn +will be able to common-up all of the common part of the DFA's transition +tables. + +If there are no +.B entry +directives anywhere in the input file, +.B dfasyn +defaults to the last mentioned state in the last block being the entry state. + +.I transitions +is a whitespace-separated sequence of zero or more transitions. These define which +of the automaton's input symbols cause a transition from this state to which other +states. + +The same state may be declared more than once inside its block. In this case, +the transitions given in the second declaration will be merged with those given +in the first, as though all the transitions had been given in the first place. + +.SS Instance declarations +A block may be instantiated inside another block. This is useful if there is a +block of states with their transitions that occurs in more than once place +within the NFA. + +The syntax for an instance declaration is + +.RS +.I instance-name +: +.I block-name +.RE + +where +.I instance-name +is the name of the new instance, and +.I block-name +is the name of the block that is being instantiated. This block +.B must +have been declared earlier in the input file. For one thing, this prevents +mutually recursive definitions. + +When such an instance has been created, the states inside it may be referred to +within the enclosing block by prefixing their names with the +.I instance-name +followed by a period. + +.SS Transitions +A state-to-state transition is specified as follows. + +.RS +.I transition +-> +.I destinations +.RE + +.I destinations +is a comma-separated list of one or more fully-qualified state names. These +are the states to which the NFA moves if the +.I transition +is matched next in the input. The destination state names are allowed to be +forward-references; just the name is stored during parsing, and a second pass +later is used to resolve all the names. There is no need for a named +destination to actually be declared with another state definition; a state just +comes into being if it is named at all. + +A +.I transition +defines the inputs that are required to cause the scanner to move +from one state to another. A +.I transition +is a semicolon-separated list of one or more +.I stimuli. +(If there is only one stimulus, no semicolon is required.) The transition +matches as a whole if the stimuli are matched individually in sequential order +from left to right. + +.SS Transitions to a tag +Where a transition leads to a tagged exit state, the following syntax is used: + +.RS +.I transition += +.I tags +.RE + +where +.I tags +is a comma-separated list of one or more tag names. Thus a construction like + +.RS +state foo XXX = TAG1 +.RE + +indicates that matching the token XXX leads to a state in which TAG1 applies. + +.SS Stimuli +A +.B stimulus +is a pipe-separated list of alternatives. Each alternative may be one of the following: +.IP "*" 7 +the name of a token +.IP "*" 7 +a character class +.IP "*" 7 +the name of an abbreviation +.IP "*" 7 +an empty string (which gives rise to an +.B epsilon transition +) +.IP "*" 7 +an inline block instance + +.SS Input symbols +Input symbols can be defined in two ways. The first is to use ASCII characters +directly. The second is to define a set of +.I tokens +and use a front-end module to generate these based on the actual input. You +can actually mix both types of input symbol. For example, you might wish to +use ASCII characters mostly, but detect \(dqend-of-file\(dq as an explicit symbol. + +.SS ASCII input and character classes. + +Single ASCII characters can be given in double-quotes. Sets of ASCII +characters can be given in square brackets, similar to shell globbing. +Character classes can be negated and differenced. + +.IP [a] 12 +The character "a". +.IP [abe-h] 12 +Any of the characters "a", "b", "e", "f", "g", "h". +.IP ~[abc] 12 +Any of the 253 characters excluding "a", "b" and "c"; a negated character class. +.IP [^abc] 12 +Ditto - another way of expressing a negated character class. +.IP [a-z]~[c] 12 +Equivalent to [abd-z]. + +.PP +The following special cases are available within the square brackets: + +.IP \(rs- 8 +A hyphen. Normally the hyphen is used as a range separator. To get a literal +hyphen, it must be escaped by a back-slash. +.IP \(rs] 8 +A closing square bracket. The escaping is required to prevent it being handled +as the end of the character class. +.IP \(rs\(rs 8 +A literal backslash. +.IP \(rs^ 8 +A literal "^". +.IP \(rsn 8 +The same character as "\(rsn" in C. +.IP \(rsr 8 +The same character as "\(rsr" in C. +.IP \(rsf 8 +The same character as "\(rsf" in C. +.IP \(rst 8 +The same character as "\(rst" in C. +.IP ^A 8 +Generate a control character, in this case ASCII character 1. Defined for ^@ +through to ^Z. +.IP \(rsxa9 8 +The ASCII character with hex value 0xa9. Upper or lower case hex may be used. +.IP \(rs234 +The ASCII character with octal value 0234. + +.SS Tokens +To define non-ASCII inputs, at least one +.B tokens +directive must be used. The syntax is +.PP +.B tokens +.I list-of-tokens +.PP +where +.I list-of-tokens +is a space-separated list of token names. Each token name is a string that +will be acceptable as a C macro name when prefixed by the current prefix string +plus an underscore. + +If more than one +.B tokens +line appears in the input file, the 2nd and subsequent lines are treated as +though their entries were concatenated with the 1st line. + +.SS Abbreviations +An +.B abbreviation +provides a convenient way to define a shorthand name for a frequently used +.B stimulus. + +The syntax is + +.RS +.B abbrev +.I abbrev-name += +.I stimulus +.RE + +For example: + +.RS +abbrev FOO = [aeiouAEIOU] | A_TOKEN | <xyzzy:in->out> +.RE + +.SS Inline block instances +A +.B stimulus +may take the form of a block instance. This is a convenient shorthand when a +complex sequence of input tokens needs to be matched as part of a transition. + +The syntax of an inline block instance is +.RS +.RI < block_name : entry_state "->" exit_state > +.RE + +As an example, given a block +.B double_a +defined like this +.RS +block double_a + state in A -> out +.br +endblock +.RE + +the following construction +.RS +block x + state foo <double_a:in->out> ; B ; <double_a:in->out> -> bar +.br +endblock +.RE + +is equivalent to +.RS +block x + aa1 : double_a + aa2 : double_a + state foo -> aa1.in + state aa1.out + B -> aa2.in + state aa2.out -> bar +.br +endblock +.RE + +Note that in the second example, where explicit instances have been created, +they must have unique names. In the first case, +.B dfasyn +will create the two anonymous instances automatically and handle all the +plumbing to connect up the in and out states. Note there is no requirement for +the states to be named 'in' and 'out'; that is merely a convention. An +instanced block may have multiple inputs, with different inputs being used in +different instantiations of the block, for example. + +.SS Tags and attributes +.B Tags +are associated with the NFA states in the input. An NFA state may have an +arbitrary number of tags associated with it, through what amounts to a list of +strings. +.B Attributes +are attached to the DFA states in the output. In the generated C-file, the +attributes are expressed in terms of an array which is indexed by the DFA state +number and whose elements are the attribute values applying to the states. + +Once the DFA has been generated, +.B dfasyn +knows the NFA states that apply in each DFA state. From this, the tags +associated with a DFA state are given by the union of all the tags appylying in +all the NFA states that apply in that DFA state. + +The input file defines how a set of tags applying in a DFA state is to be +reduced to a single attribute value. A boolean expression language is provided +for this purpose. + +Although the default is to generate a single attribute table, +.B dfasyn +can generate arbitrarily many tables if required. This is achieved by using +.B attribute groups. +The NFA tag namespace is shared across all such groups. The group syntax is as +follows: + +.RS +.B group +.I groupname +.B { +.I declaration +[ +.RI ", " declaration +\ ... +] +.B } +.RE + +where each +.I declaration +is one of the following: + +.RS +.B attr +.I attribute-name +[ +.RI ", " attribute-name +\ ... ] +.br +.B attr +.I attribute-name +.B : +.I expression +.br +.B early +.B attr +.I attribute-name +[ +.RI ", " attribute-name +\ ... ] +.br +.B early +.B attr +.I attribute-name +.B : +.I expression +.RE + +In the form with no expression, each +.I attribute-name +has an implicit expression consisting of just the tag with the same name as +itself. + +.I expression +is defined in the section +.B Expressions +later. The short form + +.RS +.B attr +foo +.RE + +is short for +.RS +.B attr +foo +.B : +foo +.RE + +i.e. it allows an attribute to be defined which has the same name as a tag and +which is active in the cases where precisely that tag is active. + +If an attribute is prefixed by +.BR early , +it means that the C-code you provide to drive the DFA is going to stop scanning +once this state attribute is detected. For example, this would apply if you +were coding a "shortest match" scanner. +.B dfasyn +will prune all the transitions away from any DFA state having such an +attribute. This may lead to greater opportunities for +.B dfasyn +to compress the DFA. + +A default attribute must be declared. This is used to fill all the entries in +the attribute array for DFA states that end up with no explicit attribute +defined. (It is also used in determining where the DFA may be optimised to +remove "dead states".) The syntax is + +.RS +.B defattr +.I default-attribute-string +.RE + +Finally, the C-type of the attribute must be declared. This becomes the base +type of the array indexed by the DFA state number. The syntax is + +.RS +.B type +.I attribute-type-name +.RE + +It is illegal for more than one attribute in a particular attribute group to be +active in a DFA state. If this situation occurs, it indicates that the +expression logic for that group is defective. + +.SS Expressions +An +.I expression +defines an attribute in terms of a boolean relationship between one more more +tags. An +.I expression +may be any one of the following: + +.RS +.IR expression " & " expression +.br +.IR expression " | " expression +.br +.IR expression " ^ " expression +.br +.IR expression " ? " expression " : " expression +.br +.RI ( expression ) +.br +.RI "~" expression +.br +.RI "!" expression +.br +.I tag-name +.RE + +Note that +.RI "~" expression +and +.RI "!" expression +both mean the negation of expression. + +The operator precedence is what would be expected for a C-programmer. + +.SH Prefix specification +The +.B prefix +used in the generated C-file can optionally be set in the input file using the following syntax: + +.RS +.B prefix +.I prefix-string +.RE + +where +.IR prefix-string _ +(i.e. the specific string followed by an underscore) will occur at the start of +each symbol name in the generated C-file. + +If the prefix has been set via the command line using +.BR -p , +the +.B prefix +line in the input file will be ignored and a warning given. + +.SH "THE GENERATED C-FILE" +The generated file exports the following symbols that can be used by the calling program: + +.TP +.B short +.IB prefix_ char2tok +[256]; +.br +If character classes have been used, this table maps from ASCII values to the +internal tokens numbers used by the generated DFA. This array will be defined +in the generated C-file. If a header file is being generated, it will be +declared in there also. + +.TP +.B #define +.IB prefix_ TOKEN +.I numeric_value +.br +If a +.b tokens +directive has been used, each such token will be assigned a number. These +assignments are emitted by +.b dfasyn +as a series of #define lines. Each token name from the input file will have the +.I prefix +and an underscore prepended to form the name of the symbol in the #define. +If a header file is being generated +.RB ( -ho ), +these definitions are placed in the header file. Otherwise, they are placed in +the main output C-file. + +.TP 7 +.B int +.IB prefix_ next_state +(int current_state, int next_state); +.br +This is the prototype for the next state function which the calling program must invoke. + +If no +.B -I +option has been used, this function will be defined in the generated C-file. +If a header file is being generated, it will be prototyped in there also. + +If +.B -I +has been used, the function will be defined in the header file. + +.TP +.B int +.IB prefix _ entry-name +.br +If the +.B entrystruct +directive has not been used, this format is used to define the DFA state +numbers for the defined entry points. The calling program uses these values to +set the +.I current_state +at the start of the scanning process, depending on which entry point is being +used. + +If there is more than one entry, there will be more than one such line. + + +.TP +.B struct +.I entrystruct-type +{ ... } +.I entrystruct-var +.br +If the +.B entrystruct +directive has been used, the DFA state numbers for the entry points are +declared as elements of a struct. The struct member names are identical to the +entry names used in the +.B dfasyn +input file. The declaration of the struct variable containing the state +numbers will be in the generated C-file. If a header file is being generated +.RB ( -ho ), +the definition of the struct type will be in there. Otherwise, it will be in +the C-file also. + +.TP 12 +.I attr-type +.IB prefix_ attr +.RI [ #DFA-states ] +.br +This defines the attributes for each of the DFA states in the default attribute +group. If no +.B type +.I attr-type +declaration was in the input file, the default of +.B short +will be used. + +If other attribute groups are defined, there will be a similar array for each one: + +.TP 18 +.I group-attr-type +.I prefix_group-name +.RI [ #DFA-states ] +.br +For the attribute group declared with +.B group +.I group-name +in the input file, this defines the attribute of each of the DFA states in that +group. + +.SH TEXT PASSTHROUGH +To pass a block of literal text through to the output file without +interpretation, enclose it in %{ ... %} like this: + +.RS +%{ +.br +#include "foo.h" +.br +%} +.RE + +The opening and closing patterns must be on lines on their own (trailing +whitespace is allowed). + + +.SH "SEE ALSO" +.BR dfasyn (1) + + + diff --git a/src/mairix/dfasyn/dfasyn.c b/src/mairix/dfasyn/dfasyn.c @@ -0,0 +1,690 @@ +/*************************************** + Main program for NFA to DFA table builder program. + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2000-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "dfasyn.h" + +FILE *report = NULL; +FILE *output = NULL; +FILE *header_output = NULL; + +/* If non-null this gets prepended onto the names of the all the entities that + * are generated in the output file. */ +char *prefix = NULL; + +extern int yyparse(void); + +/* ================================================================= */ +static char *entrystruct = NULL; +static char *entryvar = NULL; + +void define_entrystruct(const char *s, const char *v)/*{{{*/ +{ + if (!entrystruct) { + entrystruct = new_string(s); + entryvar = new_string(v); + } else { + fprintf(stderr, "Can't redefine entrystruct with <%s>\n", s); + exit(1); + } +} +/*}}}*/ +/* ================================================================= */ +static void print_token_table(void)/*{{{*/ +{ + FILE *dest; + int i; + extern char *prefix; + + dest = header_output ? header_output : output; + /* Not sure how it makes sense to write this to the C file : maybe if you're going + * to include the C file into a bigger one it's reasonable? Anyway, the intention + * is that you're more likely to use this for real if you're writing a header file. */ + + for (i=0; i<ntokens; i++) { + fprintf(dest, "#define %s_%s %d\n", + prefix ? prefix : "TOK_", + toktable[i], i); + } +} +/*}}}*/ +static void print_attr_tables(struct DFA *dfa, const char *prefix_under)/*{{{*/ +{ + int i, tab; + + for (tab=0; tab<n_evaluators; tab++) { + char *defattr = get_defattr(tab); + char *attrname = get_attr_name(tab); + if (!attrname) attrname = "attr"; + fprintf(output, "%s %s%s[] = {\n", get_attr_type(tab), prefix_under, attrname); + for (i=0; i<dfa->n; i++) { + char *attr = dfa->s[i]->attrs[tab]; + fprintf(output, " %s", attr ? attr : defattr); + fputc ((i<(dfa->n - 1)) ? ',' : ' ', output); + fprintf(output, " /* State %d */\n", i); + } + fprintf(output, "};\n\n"); + if (header_output) { + fprintf(header_output, "extern %s %s%s[];\n", get_attr_type(tab), prefix_under, attrname); + } + } +} +/*}}}*/ +static void check_default_attrs(void)/*{{{*/ +{ + int tab; + int fail = 0; + + for (tab=0; tab<n_evaluators; tab++) { + char *defattr = get_defattr(tab); + char *attrname = get_attr_name(tab); + attrname = attrname ? attrname : "(DEFAULT)"; + if (!defattr) { + fprintf(stderr, "ERROR: No defattr definition for %s\n", attrname); + fail = 1; + } + } + if (fail) { + exit(1); + } +} +/*}}}*/ +static void write_next_state_function_uncompressed(int Nt, int do_inline, const char *prefix_under)/*{{{*/ +{ + FILE *dest; + + dest = do_inline ? header_output : output; + + fprintf(dest, "%sint %snext_state(int current_state, int next_token) {\n", + do_inline ? "static inline " : "", + prefix_under); + fprintf(dest, " if (next_token < 0 || next_token >= %d) return -1;\n", Nt); + fprintf(dest, " return %strans[%d*current_state + next_token];\n", + prefix_under, Nt); + fprintf(dest, "}\n"); + if (!do_inline && header_output) { + fprintf(header_output, "extern int %snext_state(int current_state, int next_token);\n", + prefix_under); + } +} +/*}}}*/ +static void print_uncompressed_tables(struct DFA *dfa, int do_inline, const char *prefix_under)/*{{{*/ +/* Print out the state/transition table uncompressed, i.e. every + token has an array entry in every state. This is fast to access + but quite wasteful on memory with many states and many tokens. */ +{ + int Nt = ntokens + n_charclasses; + int n, i, j; + + n = 0; + fprintf(output, "%sshort %strans[] = {", + do_inline ? "" : "static ", + prefix_under); + + if (do_inline) { + fprintf(header_output, "extern short %strans[];\n", + prefix_under); + } + + for (i=0; i<dfa->n; i++) { + for (j=0; j<Nt; j++) { + if (n>0) fputc (',', output); + if (n%8 == 0) { + fprintf(output, "\n "); + } else { + fputc(' ', output); + } + n++; + fprintf(output, "%4d", dfa->s[i]->map[j]); + } + } + + fprintf(output, "\n};\n\n"); + + write_next_state_function_uncompressed(Nt, do_inline, prefix_under); + +} +/*}}}*/ +static int check_include_char(struct DFA *dfa, int this_state, int token)/*{{{*/ +{ + if (dfa->s[this_state]->defstate >= 0) { + return (dfa->s[this_state]->map[token] != + dfa->s[dfa->s[this_state]->defstate]->map[token]); + } else { + return (dfa->s[this_state]->map[token] >= 0); + } +} +/*}}}*/ +static void write_next_state_function_compressed(int do_inline, const char *prefix_under)/*{{{*/ +/* Write the next_state function for traversing compressed tables into the + output file. */ +{ + FILE *dest; + dest = do_inline ? header_output : output; + + fprintf(dest, "%sint %snext_state(int current_state, int next_token) {\n", + do_inline ? "static inline " : "", + prefix_under); + fprintf(dest, " int h, l, m, xm;\n"); + fprintf(dest, " while (current_state >= 0) {\n"); + fprintf(dest, " l = %sbase[current_state], h = %sbase[current_state+1];\n", prefix_under, prefix_under); + fprintf(dest, " while (h > l) {\n"); + fprintf(dest, " m = (h + l) >> 1; xm = %stoken[m];\n", prefix_under); + fprintf(dest, " if (xm == next_token) goto done;\n"); + fprintf(dest, " if (m == l) break;\n"); + fprintf(dest, " if (xm > next_token) h = m;\n"); + fprintf(dest, " else l = m;\n"); + fprintf(dest, " }\n"); + fprintf(dest, " current_state = %sdefstate[current_state];\n", prefix_under); + fprintf(dest, " }\n"); + fprintf(dest, " return -1;\n"); + fprintf(dest, " done:\n"); + fprintf(dest, " return %snextstate[m];\n", prefix_under); + fprintf(dest, "}\n"); + if (!do_inline && header_output) { + fprintf(header_output, "extern int %snext_state(int current_state, int next_token);\n", + prefix_under); + } + +} +/*}}}*/ +static void print_compressed_tables(struct DFA *dfa, int do_inline, const char *prefix_under)/*{{{*/ +/* Print state/transition table in compressed form. This is more + economical on storage, but requires a bisection search to find + the next state for a given current state & token */ +{ + int *basetab = new_array(int, dfa->n + 1); + int Nt = ntokens + n_charclasses; + int n, i, j; + + n = 0; + fprintf(output, "%sunsigned char %stoken[] = {", + do_inline ? "" : "static ", + prefix_under); + for (i=0; i<dfa->n; i++) { + for (j=0; j<Nt; j++) { + if (check_include_char(dfa, i, j)) { + if (n>0) fputc (',', output); + if (n%8 == 0) { + fprintf(output, "\n "); + } else { + fputc(' ', output); + } + n++; + fprintf(output, "%3d", j); + } + } + } + fprintf(output, "\n};\n\n"); + + n = 0; + fprintf(output, "%sshort %snextstate[] = {", + do_inline ? "" : "static ", + prefix_under); + for (i=0; i<dfa->n; i++) { + basetab[i] = n; + for (j=0; j<Nt; j++) { + if (check_include_char(dfa, i, j)) { + if (n>0) fputc (',', output); + if (n%8 == 0) { + fprintf(output, "\n "); + } else { + fputc(' ', output); + } + n++; + fprintf(output, "%5d", dfa->s[i]->map[j]); + } + } + } + fprintf(output, "\n};\n\n"); + basetab[dfa->n] = n; + + n = 0; + fprintf(output, "%sunsigned short %sbase[] = {", + do_inline ? "" : "static ", + prefix_under); + for (i=0; i<=dfa->n; i++) { + if (n>0) fputc (',', output); + if (n%8 == 0) { + fprintf(output, "\n "); + } else { + fputc(' ', output); + } + n++; + fprintf(output, "%5d", basetab[i]); + } + fprintf(output, "\n};\n\n"); + + n = 0; + fprintf(output, "%sshort %sdefstate[] = {", + do_inline ? "" : "static ", + prefix_under); + for (i=0; i<dfa->n; i++) { + if (n>0) fputc (',', output); + if (n%8 == 0) { + fprintf(output, "\n "); + } else { + fputc(' ', output); + } + n++; + fprintf(output, "%5d", dfa->s[i]->defstate); + } + fprintf(output, "\n};\n\n"); + + if (do_inline) { + fprintf(header_output, "extern unsigned char %stoken[];\n", prefix_under); + fprintf(header_output, "extern short %snextstate[];\n", prefix_under); + fprintf(header_output, "extern unsigned short %sbase[];\n", prefix_under); + fprintf(header_output, "extern short %sdefstate[];\n", prefix_under); + } + free(basetab); + + write_next_state_function_compressed(do_inline, prefix_under); +} +/*}}}*/ +static void print_entries_table(const char *prefix_under)/*{{{*/ +{ + int i; + if (entrystruct) { + int first; + /* If we write the struct defn to the header file, we ought not to emit the + * full struct defn again in the main output. This is tricky unless we can + * guarantee the header will get included, though. */ + fprintf(output, "struct %s {\n", entrystruct); + if (header_output) { + fprintf(header_output, "extern struct %s {\n", entrystruct); + } + for (i=0; i<n_dfa_entries; i++) { + fprintf(output, " int %s;\n", dfa_entries[i].entry_name); + if (header_output) { + fprintf(header_output, " int %s;\n", dfa_entries[i].entry_name); + } + } + fprintf(output, "} %s = {\n", entryvar); + if (header_output) { + fprintf(header_output, "} %s;\n", entryvar); + } + for (i=0, first=1; i<n_dfa_entries; i++, first=0) { + if (!first) { + fputs(",\n", output); + } + fprintf(output, " %d", dfa_entries[i].state_number); + } + fputs("\n};\n", output); + } else { + for (i=0; i<n_dfa_entries; i++) { + fprintf(output, "int %s%s = %d;\n", + prefix_under, + dfa_entries[i].entry_name, dfa_entries[i].state_number); + if (header_output) { + fprintf(header_output, "extern int %s%s;\n", + prefix_under, + dfa_entries[i].entry_name); + } + } + } +} +/*}}}*/ +/* ================================================================= */ +static void deal_with_multiple_entries(Block **blk, struct DFA **dfa)/*{{{*/ +{ + /* Get the list of blocks that are to be combined to form a union of all their states. */ + struct Entrylist *e; + int Ne; + Block **blocks; + Block *jumbo; + int bi, Nb, Ns, si, ei; + + for (Ne=0, e=entries; e; e=e->next) Ne++; + if (report) { + fprintf(report, "Processing %d separate entry points\n", Ne); + } + blocks = new_array(Block*, Ne); + for (Nb=0, e=entries; e; e=e->next) { + int matched = 0; + for (bi=0; bi<Nb; bi++) { + if (e->state->parent == blocks[bi]) { + matched = 1; + break; + } + } + if (!matched) { + blocks[Nb++] = e->state->parent; + } + } + for (Ns=0, bi=0; bi<Nb; bi++) { + Ns += blocks[bi]->nstates; + } + + if (report) { + fprintf(report, "Entries in %d blocks, total of %d states\n", + Nb, Ns); + } + + jumbo = new(Block); + jumbo->name = "(UNION OF MULTIPLE BLOCKS)"; + jumbo->nstates = jumbo->maxstates = Ns; + jumbo->states = new_array(State *, Ns); + jumbo->eclo = NULL; + + for (bi=0, si=0; bi<Nb; bi++) { + int ns = blocks[bi]->nstates; + int i; + int block_name_len; + memcpy(jumbo->states + si, blocks[bi]->states, sizeof(State *) * ns); + block_name_len = strlen(blocks[bi]->name); + for (i=0; i<ns; i++) { + int len; + char *new_name; + State *s = jumbo->states[si + i]; + len = block_name_len + strlen(s->name) + 2; + new_name = new_array(char, len); + strcpy(new_name, blocks[bi]->name); + strcat(new_name, "."); + strcat(new_name, s->name); + free(s->name); + s->name = new_name; + } + si += ns; + } + + /* Reindex all the states */ + for (si=0; si<Ns; si++) { + jumbo->states[si]->index = si; + } + + split_charclasses(jumbo); + expand_charclass_transitions(jumbo); + + if (verbose) fprintf(stderr, "Computing epsilon closure...\n"); + generate_epsilon_closure(jumbo); + print_nfa(jumbo); + build_transmap(jumbo); + + if (verbose) fprintf(stderr, "Building DFA...\n"); + n_dfa_entries = Ne; + dfa_entries = new_array(struct DFAEntry, Ne); + for (e=entries, ei=0; e; e=e->next, ei++) { + dfa_entries[ei].entry_name = new_string(e->entry_name); + dfa_entries[ei].state_number = e->state->index; + } + *dfa = build_dfa(jumbo); + *blk = jumbo; + +} +/*}}}*/ +/* ================================================================= */ +static void usage(void)/*{{{*/ +{ + fprintf(stderr, + "dfasyn, Copyright (C) 2001-2003,2005,2006 Richard P. Curnow\n" + "\n" + "dfasyn comes with ABSOLUTELY NO WARRANTY.\n" + "This is free software, and you are welcome to redistribute it\n" + "under certain conditions; see the GNU General Public License for details.\n" + "\n" + "Usage: dfasyn [OPTION]... FILE\n" + "Read state-machine description from FILE and generate a deterministic automaton.\n" + "Write results to stdout unless options dictate otherwise.\n" + "\n" + "Output files:\n" + " -o, --output FILE Define the name of the output file (e.g. foobar.c)\n" + " -ho, --header-output FILE Define the name of the header output file (e.g. foobar.h)\n" + " -r, --report FILE Define the name where the full generator report goes (e.g. foobar.report)\n" + "\n" + "Generated automaton:\n" + " -p, --prefix PREFIX Specify a prefix for the variables and functions in the generated file(s)\n" + " -u, --uncompressed-tables Don't compress the generated transition tables\n" + " -ud, --uncompressed-dfa Don't common-up identical states in the DFA\n" + " -I, --inline-function Make the next_state function inline (requires -ho)\n" + "\n" + "General:\n" + " -v, --verbose Be verbose\n" + " -h, --help Display this help message\n" + ); + +} +/*}}}*/ +/* ================================================================= */ +int main (int argc, char **argv)/*{{{*/ +{ + int result; + + Block *main_block; + char *input_name = NULL; + char *output_name = NULL; + char *header_output_name = NULL; + char *report_name = NULL; + int uncompressed_tables = 0; + int uncompressed_dfa = 0; /* Useful for debug */ + int do_inline = 0; + extern char *prefix; + char *prefix_under; + FILE *input = NULL; + struct DFA *dfa; + + verbose = 0; + report = NULL; + + /*{{{ Parse cmd line arguments */ + while (++argv, --argc) { + if (!strcmp(*argv, "-h") || !strcmp(*argv, "--help")) { + usage(); + exit(0); + } else if (!strcmp(*argv, "-v") || !strcmp(*argv, "--verbose")) { + verbose = 1; + } else if (!strcmp(*argv, "-o") || !strcmp(*argv, "--output")) { + ++argv, --argc; + output_name = *argv; + } else if (!strcmp(*argv, "-ho") || !strcmp(*argv, "--header-output")) { + ++argv, --argc; + header_output_name = *argv; + } else if (!strcmp(*argv, "-r") || !strcmp(*argv, "--report")) { + ++argv, --argc; + report_name = *argv; + } else if (!strcmp(*argv, "-u") || !strcmp(*argv, "--uncompressed-tables")) { + uncompressed_tables = 1; + } else if (!strcmp(*argv, "-ud") || !strcmp(*argv, "--uncompressed-dfa")) { + uncompressed_dfa = 1; + } else if (!strcmp(*argv, "-I") || !strcmp(*argv, "--inline-function")) { + do_inline = 1; + } else if (!strcmp(*argv, "-p") || !strcmp(*argv, "--prefix")) { + ++argv, --argc; + prefix = *argv; + } else if ((*argv)[0] == '-') { + fprintf(stderr, "Unrecognized command line option %s\n", *argv); + } else { + input_name = *argv; + } + } + /*}}}*/ + + if (do_inline && !header_output_name) {/*{{{*/ + fprintf(stderr, + "--------------------------------------------------------------\n" + "It doesn't make sense to try inlining if you're not generating\n" + "a separate header file.\n" + "Not inlining the transition function.\n" + "--------------------------------------------------------------\n" + ); + do_inline = 0; + } +/*}}}*/ + if (input_name) {/*{{{*/ + input = fopen(input_name, "r"); + if (!input) { + fprintf(stderr, "Can't open %s for input, exiting\n", input_name); + exit(1); + } + } else { + input = stdin; + } + /*}}}*/ + if (output_name) {/*{{{*/ + output = fopen(output_name, "w"); + if (!output) { + fprintf(stderr, "Can't open %s for writing, exiting\n", output_name); + exit(1); + } + } else { + output = stdout; + } +/*}}}*/ + if (header_output_name) {/*{{{*/ + header_output = fopen(header_output_name, "w"); + if (!header_output) { + fprintf(stderr, "Can't open %s for writing, exiting\n", header_output_name); + exit(1); + } + } + /* otherwise the header stuff just goes to the same fd as the main output. */ + +/*}}}*/ + if (report_name) {/*{{{*/ + report = fopen(report_name, "w"); + if (!report) { + fprintf(stderr, "Can't open %s for writing, no report will be created\n", report_name); + } + } +/*}}}*/ + + if (verbose) { + fprintf(stderr, "General-purpose automaton builder\n"); + fprintf(stderr, "Copyright (C) Richard P. Curnow 2000-2003,2005,2006\n"); + } + + eval_initialise(); + + if (verbose) fprintf(stderr, "Parsing input..."); + yyin = input; + + /* Set yyout. This means that if anything leaks from the scanner, or appears + in a %{ .. %} block, it goes to the right place. */ + yyout = output; + + result = yyparse(); + if (result > 0) exit(1); + if (verbose) fprintf(stderr, "\n"); + + make_evaluator_array(); + check_default_attrs(); + + if (!entries) { + /* Support legacy method : the last state to be current in the input file + * is the entry state of the NFA */ + State *start_state; + start_state = get_curstate(); + main_block = start_state->parent; + split_charclasses(main_block); + expand_charclass_transitions(main_block); + if (verbose) fprintf(stderr, "Computing epsilon closure...\n"); + generate_epsilon_closure(main_block); + print_nfa(main_block); + build_transmap(main_block); + + if (verbose) fprintf(stderr, "Building DFA...\n"); + { + struct DFAEntry entry[1]; + n_dfa_entries = 1; + dfa_entries = entry; + entry[0].entry_name = "(ONLY ENTRY)"; + entry[0].state_number = start_state->index; + dfa = build_dfa(main_block); + } + } else { + /* Allow generation of multiple entry states, so you can use the same input file when + * you need several automata that have a lot of logic in common. */ + deal_with_multiple_entries(&main_block, &dfa); + } + if (report) { + fprintf(report, "--------------------------------\n" + "DFA structure before compression\n" + "--------------------------------\n"); + } + print_dfa(dfa); + + if (had_ambiguous_result) { + fprintf(stderr, "No output written, there were ambiguous attribute values for accepting states\n"); + exit(2); + } + + if (!uncompressed_dfa) { + if (verbose) fprintf(stderr, "\nCompressing DFA...\n"); + compress_dfa(dfa, ntokens + n_charclasses, n_dfa_entries, dfa_entries); + } + + if (verbose) fprintf(stderr, "\nCompressing transition tables...\n"); + compress_transition_table(dfa, ntokens + n_charclasses); + + if (report) { + fprintf(report, "-------------------------------\n" + "DFA structure after compression\n" + "-------------------------------\n"); + } + if (verbose) fprintf(stderr, "Writing outputs...\n"); + print_dfa(dfa); + + if (prefix) { + prefix_under = new_array(char, 2 + strlen(prefix)); + strcpy(prefix_under, prefix); + strcat(prefix_under, "_"); + } else { + prefix_under = ""; + } + + if (header_output) { + fprintf(header_output, "#ifndef %sHEADER_H\n", prefix_under); + fprintf(header_output, "#define %sHEADER_H\n", prefix_under); + } + + print_token_table(); + print_charclass_mapping(output, header_output, prefix_under); + print_attr_tables(dfa, prefix_under); + + if (uncompressed_tables) { + print_uncompressed_tables(dfa, do_inline, prefix_under); + } else { + print_compressed_tables(dfa, do_inline, prefix_under); + } + + if (entries) { + /* Emit entry table */ + print_entries_table(prefix_under); + } else { + /* Legacy behaviour - DFA state 0 is implicitly the single entry state. */ + } + + if (report) { + fclose(report); + report = NULL; + } + + report_unused_tags(); + + if (header_output) { + fprintf(header_output, "#endif\n"); + } + + return result; +} +/*}}}*/ diff --git a/src/mairix/dfasyn/dfasyn.h b/src/mairix/dfasyn/dfasyn.h @@ -0,0 +1,365 @@ +/*************************************** + Header file for NFA->DFA conversion utility. + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2001-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#ifndef N2D_H +#define N2D_H + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define new(T) ((T *) malloc(sizeof(T))) +#define new_array(T,N) ((T *) malloc((N) * sizeof(T))) +#define resize_array(T,arr,newN) ((T *) ((arr) ? realloc(arr,(newN)*sizeof(T)) : malloc((newN)*sizeof(T)))) +#define new_string(s) strcpy((char *)malloc((strlen(s)+1)*sizeof(char)),s) + +/* For typecasting, especially useful for declarations of local ptrs to args + of a qsort comparison fn */ +#define Castdecl(x, T, nx) T nx = (T) x + +#define Castderef(x, T, nx) T nx = *(T*) x + +/* Globally visible options to control reporting */ +extern FILE *report; +extern FILE *report; +extern FILE *output; +extern FILE *header_output; + +/* Bison interface. */ +extern FILE *yyin; +extern FILE *yyout; + +extern int verbose; + +extern char *prefix; + +/* Temporary - this will be done better when the charclass stuff is + * added. */ +extern char **toktable; +extern int ntokens; + +extern int n_charclasses; + +extern int had_ambiguous_result; + +extern int n_dfa_entries; +extern struct DFAEntry *dfa_entries; + +struct State; +struct Block; +struct StimulusList; + +struct Abbrev {/*{{{*/ + char *lhs; /* Defined name */ + struct StimulusList *stimuli; +#if 0 + char **rhs; /* Token/define */ + int nrhs; + int maxrhs; +#endif +}; +/*}}}*/ + +typedef enum StimulusType {/*{{{*/ + T_EPSILON, + T_TOKEN, + T_ABBREV, + T_INLINEBLOCK, + T_CHARCLASS +} StimulusType; +/*}}}*/ +typedef struct InlineBlock {/*{{{*/ + char *type; /* Block type */ + char *in; /* Name of input node */ + char *out; /* Name of output node */ +} InlineBlock; +/*}}}*/ + +#define ULONGS_PER_CC 8 + +typedef struct CharClass {/*{{{*/ + int is_used; + unsigned long char_bitmap[ULONGS_PER_CC]; + unsigned long group_bitmap[ULONGS_PER_CC]; +} CharClass; +/*}}}*/ +typedef struct Stimulus {/*{{{*/ + StimulusType type; + union { + /* TODO : token should eventually become a struct ref ? */ + int token; + struct Abbrev *abbrev; + /* placeholders */ + InlineBlock *inline_block; + CharClass *char_class; + } x; +} Stimulus; +/*}}}*/ +typedef struct StimulusList {/*{{{*/ + struct StimulusList *next; + Stimulus *stimulus; +} StimulusList; +/*}}}*/ +typedef enum TransType {/*{{{*/ + TT_EPSILON, + TT_TOKEN, + TT_CHARCLASS +} TransType; +/*}}}*/ +typedef struct TransList {/*{{{*/ + struct TransList *next; + TransType type; + union { + int token; + CharClass *char_class; + } x; + char *ds_name; + struct State *ds_ref; +} TransList; +/*}}}*/ +typedef struct Stringlist {/*{{{*/ + struct Stringlist *next; + char *string; +} Stringlist; +/*}}}*/ + +#if 0 +typedef struct InlineBlockList {/*{{{*/ + struct InlineBlockList *next; + InlineBlock *ib; +} InlineBlockList; +/*}}}*/ +#endif + +typedef struct State {/*{{{*/ + char *name; + int index; /* Array index in containing block */ + struct Block *parent; + TransList *transitions; + Stringlist *tags; + Stringlist *entries; + + /* Pointers to the nodes in the 'transitions' list, sorted into canonical order */ + TransList **ordered_trans; + int n_transitions; + + unsigned char removed; /* Flag indicating state has been pruned by compression stage */ +} State; +/*}}}*/ +typedef struct S_Stateset {/*{{{*/ + State **states; + int nstates; + int maxstates; +} Stateset; +/*}}}*/ +#define HASH_BUCKETS 64 +#define HASH_MASK (HASH_BUCKETS-1) + +typedef struct Block {/*{{{*/ + char *name; + + /* The master table of states within this block. This has to be in a flat + array because we have to work with respect to state indices when doing the + 2D bitmap stuff for the subset construction. */ + State **states; + int nstates; + int maxstates; + + /* epsilon closure for this block (treating it as a top-level block.) */ + unsigned long **eclo; + + /* Hash table for getting rapid access to a state within the block, given + its name */ + Stateset state_hash[HASH_BUCKETS]; + + int subcount; /* Number for generating substates */ + int subblockcount; /* Number for generating inline subblocks */ +} Block; +/*}}}*/ +struct Entrylist {/*{{{*/ + struct Entrylist *next; + char *entry_name; + State *state; +}; +/*}}}*/ +extern struct Entrylist *entries; + +typedef struct DFANode {/*{{{*/ + unsigned long *nfas; + unsigned long signature; /* All the longwords in the nfas array xor'ed together */ + int index; /* Entry's own index in the array */ + int *map; /* index by token code */ + int from_state; /* the state which provided the first transition to this one (leading to its creation) */ + int via_token; /* the token through which we got to this state the first time. */ + Stringlist *nfa_exit_sl; /* NFA exit values */ + Stringlist *nfa_attr_sl; /* NFA exit values */ + char **attrs; /* Attributes, computed by boolean expressions defined in input text */ + int has_early_exit; /* If !=0, the scanner is expected to exit immediately this DFA state is entered. + It means that no out-bound transitions have to be created. */ + + /* Fields calculated in compdfa.c */ + + /* The equivalence class the state is in. */ + int eq_class; + + /* Temp. storage for the new eq. class within a single pass of the splitting alg. */ + int new_eq_class; + + /* Signature field from above is also re-used. */ + + int is_rep; /* Set if state is chosen as the representative of its equivalence class. */ + int is_dead; /* Set if the state has no path to a non-default result */ + int new_index; /* New index assigned to the state. */ + + /* Fields calculated in tabcompr.c */ + + unsigned long transition_sig; + + /* Default state, i.e. the one that supplies transitions for tokens not + explicitly listed for this one. */ + int defstate; + + /* Number of transitions that this state has different to those in the + default state. */ + int best_diff; + +} DFANode; +/*}}}*/ +struct DFAEntry {/*{{{*/ + char *entry_name; + /* Initially the NFA number, overwritten with DFA number by build_dfa */ + int state_number; +}; +/*}}}*/ +struct DFA {/*{{{*/ + DFANode **s; /* states */ + int n; + int max; + + /* the original block that the DFA comes from. */ + Block *b; +}; +/*}}}*/ + +void yyerror(const char *s); +extern int yylex(void); + +/* Constants for 'create' args */ +#define USE_OLD_MUST_EXIST 0 +#define CREATE_MUST_NOT_EXIST 1 +#define CREATE_OR_USE_OLD 2 + +State *get_curstate(void); + +struct Abbrev; +extern struct Abbrev * create_abbrev(const char *name, struct StimulusList *stimuli); + +int lookup_token(char *name, int create); +Block *lookup_block(char *name, int create); +State *lookup_state(Block *in_block, char *name, int create); +void add_entry_to_state(State *curstate, const char *entry); +void define_entrystruct(const char *s, const char *v); +Stringlist * add_string_to_list(Stringlist *existing, const char *token); +void add_transitions(Block *curblock, State *curstate, StimulusList *stimuli, char *destination); +State * add_transitions_to_internal(Block *curblock, State *addtostate, StimulusList *stimuli); +void add_tags(State *curstate, Stringlist *sl); +InlineBlock *create_inline_block(char *type, char *in, char *out); +void instantiate_block(Block *curblock, char *block_name, char *instance_name); +void fixup_state_refs(Block *b); +void expand_charclass_transitions(Block *b); + +void compress_nfa(Block *b); + +extern void generate_epsilon_closure(Block *b); +extern void print_nfa(Block *b); +extern void build_transmap(Block *b); +extern struct DFA *build_dfa(Block *b); +extern void print_dfa(struct DFA *dfa); + +/* In expr.c */ +typedef struct Expr Expr; + +Expr * new_not_expr(Expr *c); +Expr * new_and_expr(Expr *c1, Expr *c2); +Expr * new_or_expr(Expr *c1, Expr *c2); +Expr * new_xor_expr(Expr *c1, Expr *c2); +Expr * new_cond_expr(Expr *c1, Expr *c2, Expr *c3); +Expr * new_tag_expr(char *tag_name); +extern int eval(Expr *e); +void define_tag(char *name, Expr *e); +void clear_tag_values(void); +void report_unused_tags(void); + +/* In evaluator.c */ +typedef struct evaluator Evaluator; +extern int n_evaluators; +extern Evaluator *default_evaluator; +extern Evaluator *start_evaluator(const char *name); +void define_attr(Evaluator *x, char *string, Expr *e, int early); +void define_defattr(Evaluator *x, char *string); +void set_tag_value(char *tag_name); +int evaluate_attrs(char ***, int *); +int evaluator_is_used(Evaluator *x); +void define_defattr(Evaluator *x, char *text); +void define_type(Evaluator *x, char *text); +char* get_defattr(int i); +char* get_attr_type(int i); +char* get_attr_name(int i); +void make_evaluator_array(void); +void emit_dfa_attr_report(char **results, FILE *out); +void eval_initialise(void); + +void compress_transition_table(struct DFA *dfa, int ntokens); +unsigned long increment(unsigned long x, int field); +unsigned long count_bits_set(unsigned long x); + +/* in abbrevs.c */ +struct Abbrev * lookup_abbrev(char *name); + +/* in stimulus.c */ +extern Stimulus *stimulus_from_epsilon(void); +extern Stimulus *stimulus_from_string(char *str); +extern Stimulus *stimulus_from_inline_block(InlineBlock *block); +extern Stimulus *stimulus_from_char_class(CharClass *char_class); +extern StimulusList *append_stimulus_to_list(StimulusList *existing, Stimulus *stim); + +/* in charclass.c */ +extern int cc_test_bit(const unsigned long *bitmap, int entry); +extern CharClass *new_charclass(void); +extern void free_charclass(CharClass *what); +extern void add_charclass_to_list(CharClass *cc); +extern void add_singleton_to_charclass(CharClass *towhat, char thechar); +extern void add_range_to_charclass(CharClass *towhat, char star, char end); +extern void invert_charclass(CharClass *what); +extern void diff_charclasses(CharClass *left, CharClass *right); +extern void split_charclasses(const Block *b); +extern void print_charclass_mapping(FILE *out, FILE *header_out, const char *prefix_under); +extern void print_charclass(FILE *out, int idx); + +/* Return new number of DFA states */ +extern void compress_dfa(struct DFA *dfa, int ntokens, + int n_dfa_entries, struct DFAEntry *dfa_entries); + +#endif /* N2D_H */ + diff --git a/src/mairix/dfasyn/dfasyn.texi b/src/mairix/dfasyn/dfasyn.texi @@ -0,0 +1,85 @@ +@setfilename dfasyn.info +@settitle User guide for the dfasyn DFA construction utility + +@titlepage +@title dfasyn user guide +@subtitle This manual describes how to use dfasyn. +@author Richard P. Curnow +@page +@end titlepage + +@c{{{ Top node +@node Top +@top +@menu +* Introduction:: The introduction +* Input file format:: A reference for the input file +* Concept Index:: Index of concepts +@end menu +@c}}} +@c{{{ ch:Introduction +@node Introduction +@chapter Introduction + +@menu +* Uses for dfasyn:: The types of problem to which dfasyn is well-suited +@end menu + +@node Uses for dfasyn +@section Uses for dfasyn +dfasyn is particularly suited to the following types of scanning problem, both of +which exceed flex's capabilities + +@itemize @bullet +@item When the pattern describing a token cannot be written as a regular +expression. For example, there may be iteration but with constraints between +the end of one iteration and the start of the next. +@item When more than 1 rule matches in a flex input file, flex chooses between +them based on + + @itemize - + @item Longest match first + @item Earliest rule in the file if more than 1 match of the same length exists + @end itemize + +dfasyn allows for a more general method of resolving multiple matches. +Conceptually, it works out which rules match, giving a true/false status for +each rule. The input file defines an arbitrarily complex set of boolean +expressions to reduce the multiple matches down to one unique one. (If more than +one of the boolean expressions evaluates true, this is an error.) + +@item When a customised method is required to construct the input tokens that +pass to the scanner. For example, if the tokens are the characters in a string +(rather than coming from a file), or if some special logic has to be used to +generate the tokens from the input character stream. + +@item If you want to add actions to the scanning loop, e.g. to remember special +locations within the word being scanned. + +@end itemize + +@node Non-uses for dfasyn +@section Cases where flex might be better + +In general, flex is easier and more convenient to use. Where it is applicable +to your problem, there are no obvious benefits to using dfasyn. + +@node Why written +@section Why was dfasyn written? +@c}}} + +@c{{{ ch:Input file format +@node Input file format +@chapter Input file format +This section describes the format of the input file. + +@c}}} + + +@node Concept Index +@unnumbered Concept Index +@printindex cp +@bye + +@c vim:syntax=OFF:fdm=marker:fdc=4:cms=@c%s + diff --git a/src/mairix/dfasyn/evaluator.c b/src/mairix/dfasyn/evaluator.c @@ -0,0 +1,248 @@ +/*************************************** + Routines for merging and prioritising exit tags and attribute tags + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2001-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +/* Handle boolean expressions used to determine the final scanner result from + the set of NFA accepting states that are simultaneously active at the end of + the scan. */ + +#include "dfasyn.h" + +struct Attr { + char *attr; /* The string to write to the output file */ + /* The boolean expression that defines whether the attribute is active */ + Expr *e; + /* If != 0, assume the state machine that the program's output is embedded in + will exit immediately if this result occurs. This may allow lots of + states to be culled from the DFA. */ + int early; +}; + +typedef struct Attr Attr; +struct evaluator { + Attr *attrs; + int is_used; /* Set if any input rules reference this evaluator */ + int n_attrs; + int max_attrs; + char *name; + char *defattr; + char *attr_type; +}; + +Evaluator *default_evaluator; + +struct evaluator_list { + struct evaluator_list *next; + Evaluator *evaluator; +}; + +static struct evaluator_list *evaluator_list = NULL; + +/* Array pointer */ +static struct evaluator **evaluators = NULL; +int n_evaluators = 0; + +Evaluator* start_evaluator(const char *name)/*{{{*/ +{ + Evaluator *x = NULL; + struct evaluator_list *el; + for (el=evaluator_list; el; el=el->next) { + /* name is null for the default (anonymous) attribute group */ + const char *een = el->evaluator->name; + if ((!een && !name) || + (een && name && !strcmp(een, name))) { + x = el->evaluator; + break; + } + } + if (!x) { + struct evaluator_list *nel; + x = new(struct evaluator); + x->attrs = NULL; + x->is_used = 0; + x->n_attrs = x->max_attrs = 0; + x->name = name ? new_string(name) : NULL; + x->defattr = NULL; + x->attr_type = NULL; + nel = new(struct evaluator_list); + nel->next = evaluator_list; + nel->evaluator = x; + evaluator_list = nel; + } + return x; +} +/*}}}*/ +void destroy_evaluator(Evaluator *x)/*{{{*/ +{ + /* Just leak memory for now, no need to clean up. */ + return; +} +/*}}}*/ +void define_defattr(Evaluator *x, char *text)/*{{{*/ +{ + x = x ? x : default_evaluator; + x->defattr = new_string(text); + x->is_used = 1; +} +/*}}}*/ +void define_type(Evaluator *x, char *text)/*{{{*/ +{ + x = x ? x : default_evaluator; + x->attr_type = new_string(text); + x->is_used = 1; +} +/*}}}*/ +char* get_defattr(int i)/*{{{*/ +{ + Evaluator *x = evaluators[i]; + return x->defattr; +} +/*}}}*/ +char* get_attr_type(int i)/*{{{*/ +{ + Evaluator *x = evaluators[i]; + return x->attr_type ? x->attr_type : "short"; +} +/*}}}*/ +char* get_attr_name(int i)/*{{{*/ +{ + Evaluator *x = evaluators[i]; + return x->name ? x->name : NULL; +} +/*}}}*/ +static void grow_attrs(Evaluator *x)/*{{{*/ +{ + if (x->n_attrs == x->max_attrs) { + x->max_attrs += 32; + x->attrs = resize_array(Attr, x->attrs, x->max_attrs); + } +} +/*}}}*/ + +void define_attr(Evaluator *x, char *string, Expr *e, int early)/*{{{*/ +/*++++++++++++++++++++ + Add a attr defn. If the expr is null, it means build a single expr corr. + to the value of the tag with the same name as the attr string. + ++++++++++++++++++++*/ +{ + Attr *r; + + x = x ? x : default_evaluator; + + x->is_used = 1; + grow_attrs(x); + r = &(x->attrs[x->n_attrs++]); + r->attr = new_string(string); + r->early = early; + if (e) { + r->e = e; + } else { + Expr *ne; + ne = new_tag_expr(string); + r->e = ne; + } + + return; +} +/*}}}*/ + +void make_evaluator_array(void)/*{{{*/ +{ + int n; + struct evaluator_list *el; + for (el=evaluator_list, n=0; el; el=el->next, n++) ; + evaluators = new_array(struct evaluator *, n); + n_evaluators = n; + for (el=evaluator_list, n=0; el; el=el->next, n++) { + evaluators[n] = el->evaluator; + } +} +/*}}}*/ +int evaluate_attrs(char ***attrs, int *attr_early)/*{{{*/ +/*++++++++++++++++++++ + Evaluate the attr which holds given the tags that are set + ++++++++++++++++++++*/ +{ + int i, j; + int status; + + if (attr_early) *attr_early = 0; + status = 1; + + *attrs = new_array(char *, n_evaluators); + + for (j=0; j<n_evaluators; j++) { + char **attr; + struct evaluator *x; + int any_attrs_so_far = 0; + int matched = -1; + + attr = &(*attrs)[j]; + x = evaluators[j]; + + for (i=0; i<x->n_attrs; i++) { + if (eval(x->attrs[i].e)) { + if (matched >= 0) { + *attr = NULL; + status = 0; + break; + } else { + any_attrs_so_far = 1; + matched = i; + } + } + } + if (matched < 0) { + *attr = NULL; + } else { + *attr = x->attrs[matched].attr; + if (attr_early) *attr_early |= x->attrs[matched].early; + } + } + + return status; +} +/*}}}*/ +int evaluator_is_used(Evaluator *x)/*{{{*/ +{ + return x->is_used; +} +/*}}}*/ +void emit_dfa_attr_report(char **attrs, FILE *out)/*{{{*/ +{ + int i; + for (i=0; i<n_evaluators; i++) { + if (attrs[i]) { + const char *name = evaluators[i]->name; + fprintf(out, " Attributes for <%s> : %s\n", + name ? name : "(DEFAULT)", attrs[i]); + } + } +} +/*}}}*/ +/* Initialisation */ +void eval_initialise(void)/*{{{*/ +{ + default_evaluator = start_evaluator(NULL); +} +/*}}}*/ diff --git a/src/mairix/dfasyn/expr.c b/src/mairix/dfasyn/expr.c @@ -0,0 +1,243 @@ +/*************************************** + Routines for merging and prioritising exit tags and attribute tags + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2001-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +/* Handle boolean expressions used to determine the final scanner result from + the set of NFA accepting states that are simultaneously active at the end of + the scan. */ + +#include "dfasyn.h" + +enum ExprType { + E_AND, E_OR, E_XOR, E_COND, E_NOT, E_TAG +}; + +struct Tag; + +struct Expr { + enum ExprType type; + union { + struct { struct Expr *c1, *c2; } and; + struct { struct Expr *c1, *c2; } or; + struct { struct Expr *c1, *c2; } xor; + struct { struct Expr *c1, *c2, *c3; } cond; + struct { struct Expr *c1; } not; + struct { char *name; struct Tag *s; } tag; + } data; +}; + +struct Tag { + char *name; + int is_expr; + union { + Expr *e; + int val; + } data; + int is_used; +}; + +struct TagList { + struct TagList *next; + struct Tag *tag; +}; + +typedef struct Tag Tag; +typedef struct TagList TagList; + +static TagList *tags = NULL; + +Expr * new_not_expr(Expr *c)/*{{{*/ +{ + Expr *r = new(Expr); + r->type = E_NOT; + r->data.not.c1 = c; + return r; +} +/*}}}*/ +Expr * new_and_expr(Expr *c1, Expr *c2)/*{{{*/ +{ + Expr *r = new(Expr); + r->type = E_AND; + r->data.and.c1 = c1; + r->data.and.c2 = c2; + return r; +} +/*}}}*/ +Expr * new_or_expr(Expr *c1, Expr *c2)/*{{{*/ +{ + Expr *r = new(Expr); + r->type = E_OR; + r->data.or.c1 = c1; + r->data.or.c2 = c2; + return r; +} +/*}}}*/ +Expr * new_xor_expr(Expr *c1, Expr *c2)/*{{{*/ +{ + Expr *r = new(Expr); + r->type = E_XOR; + r->data.xor.c1 = c1; + r->data.xor.c2 = c2; + return r; +} +/*}}}*/ +Expr * new_cond_expr(Expr *c1, Expr *c2, Expr *c3)/*{{{*/ +{ + Expr *r = new(Expr); + r->type = E_COND; + r->data.cond.c1 = c1; + r->data.cond.c2 = c2; + r->data.cond.c3 = c3; + return r; +} +/*}}}*/ + +Expr * new_tag_expr(char *tag_name)/*{{{*/ +/* Return expr for tag name if it already exist, else create. Don't bind to + actual tag instance yet. At the stage of parsing where this function is + used, we don't know yet which tag table the tag has to exist in. */ +{ + Expr *r; + + r = new(Expr); + r->type = E_TAG; + r->data.tag.name = new_string(tag_name); + r->data.tag.s = NULL; /* Force binding at first use */ + return r; +} +/*}}}*/ +static void add_new_tag(Tag *s)/*{{{*/ +{ + TagList *nsl = new(TagList); + nsl->tag = s; + nsl->next = tags; + tags = nsl; +} + /*}}}*/ +static Tag * find_tag_or_create(char *tag_name)/*{{{*/ +{ + Tag *s; + TagList *sl; + for (sl=tags; sl; sl=sl->next) { + s = sl->tag; + if (!strcmp(s->name, tag_name)) { + return s; + } + } + + s = new(Tag); + add_new_tag(s); + s->is_expr = 0; /* Until proven otherwise */ + s->data.val = 0; /* Force initial value to be well-defined */ + s->name = new_string(tag_name); + s->is_used = 0; + return s; +} +/*}}}*/ +void define_tag(char *name, Expr *e)/*{{{*/ +/*++++++++++++++++++++ + Define an entry in the tag table. + ++++++++++++++++++++*/ +{ + Tag *s; + s = find_tag_or_create(name); + s->data.e = e; + s->is_expr = 1; + return; +} +/*}}}*/ + +void clear_tag_values(void)/*{{{*/ +{ + TagList *sl; + for (sl=tags; sl; sl=sl->next) { + Tag *s = sl->tag; + if (0 == s->is_expr) { + s->data.val = 0; + } + } +} +/*}}}*/ +void set_tag_value(char *tag_name)/*{{{*/ +{ + Tag *s; + + s = find_tag_or_create(tag_name); + if (s->is_expr) { + fprintf(stderr, "Cannot set value for tag '%s', it is defined by an expression\n", s->name); + exit(2); + } else { + s->data.val = 1; + } +} +/*}}}*/ +int eval(Expr *e)/*{{{*/ +/*++++++++++++++++++++ + Evaluate the value of an expr + ++++++++++++++++++++*/ +{ + switch (e->type) { + case E_AND: + return eval(e->data.and.c1) && eval(e->data.and.c2); + case E_OR: + return eval(e->data.or.c1) || eval(e->data.or.c2); + case E_XOR: + return eval(e->data.xor.c1) ^ eval(e->data.xor.c2); + case E_COND: + return eval(e->data.cond.c1) ? eval(e->data.cond.c2) : eval(e->data.cond.c3); + case E_NOT: + return !eval(e->data.not.c1); + case E_TAG: + { + Tag *s = e->data.tag.s; + int result; + if (!s) { + /* Not bound yet */ + e->data.tag.s = s = find_tag_or_create(e->data.tag.name); + } + if (s->is_expr) { + result = eval(s->data.e); + } else { + result = s->data.val; + } + s->is_used = 1; + return result; + } + default: + fprintf(stderr, "Interal error : Can't get here!\n"); + exit(2); + } +} +/*}}}*/ +void report_unused_tags(void)/*{{{*/ +{ + Tag *s; + TagList *sl; + for (sl=tags; sl; sl=sl->next) { + s = sl->tag; + if (!s->is_used) { + fprintf(stderr, "Warning: tag <%s> not referenced by any attribute expression\n", s->name); + } + } +} +/*}}}*/ diff --git a/src/mairix/dfasyn/n2d.c b/src/mairix/dfasyn/n2d.c @@ -0,0 +1,696 @@ +/*************************************** + Convert NFA to DFA + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2000-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +/* {{{ General comments + Convert a nondeterminstic finite automaton (NFA) into a deterministic finite + automaton (DFA). + + The NFA is defined in terms of a set of states, with transitions between the + states. The transitions may occur on any one of a set of symbols (specified + with | characters between the options), or may be 'epsilon' transitions, i.e. + occurring without consumption of any input. A state may have multiple + transitions for the same input symbol (hence 'nondeterministic'). The final + state encountered within the final block defined in the input file is taken + to be the start state of the whole NFA. A state may be entered more than + once in the file; the transitions in the multiple definitions are combined to + give the complete transition set. A state may have 1 or more tags assigned + (with =); this is the return value of the automaton if the end of string is + encountered when in that state. + }}} */ + +#include <ctype.h> +#include "dfasyn.h" +#include <assert.h> + +/* Globally visible options to control reporting */ +int verbose; + +struct Entrylist *entries = NULL; + +/* ================================================================= */ +static inline int round_up(const int x) {/*{{{*/ + return (x+31)>>5; +} +/*}}}*/ +static inline void set_bit(unsigned long *x, int n)/*{{{*/ +{ + int r = n>>5; + unsigned long m = 1UL<<(n&31); + x[r] |= m; +} +/*}}}*/ +static inline int is_set(unsigned long *x, int n)/*{{{*/ +{ + int r = n>>5; + unsigned long m = 1UL<<(n&31); + return !!(x[r] & m); +} +/*}}}*/ +/* ================================================================= */ +static void transitively_close_eclo(unsigned long **eclo, int N)/*{{{*/ +{ + int from; + unsigned long *from_row; + unsigned long *todo, this_todo; + int Nru; + int i, i32, j, k, merge_idx; + int j_limit; + int any_changes; + + Nru = round_up(N); + todo = new_array(unsigned long, Nru); + + for (from=0; from<N; from++) { + from_row = eclo[from]; + for (i=0; i<Nru; i++) { + todo[i] = from_row[i]; + } + any_changes = 1; + while (any_changes) { + any_changes = 0; + for (i=0; i<Nru; i++) { /* loop over words in bitvector */ + i32 = i<<5; + this_todo = todo[i]; + todo[i] = 0UL; /* reset to avoid oo-loop */ + if (!this_todo) continue; /* none to do in this block */ + j_limit = N - i32; + if (j_limit > 32) j_limit = 32; + + for (j=0; j<j_limit;) { /* loop over bits in this word */ + if (this_todo & 1) { + /* Merge in */ + merge_idx = i32 + j; + for (k=0; k<Nru; k++) { + unsigned long to_merge = eclo[merge_idx][k]; + unsigned long orig = from_row[k]; + unsigned long diffs = to_merge & (~orig); + from_row[k] |= to_merge; + if (diffs) any_changes = 1; + todo[k] |= diffs; + } + } + this_todo >>= 1; + if (!this_todo) break; /* Workload reduction at end */ + j++; + } + } + } + } +} +/*}}}*/ +void generate_epsilon_closure(Block *b)/*{{{*/ +{ + int i, j, N; + + N = b->nstates; + b->eclo = new_array(unsigned long*, N); + for (i=0; i<N; i++) { + b->eclo[i] = new_array(unsigned long, round_up(N)); + for (j=0; j<round_up(N); j++) { + b->eclo[i][j] = 0; + } + } + + /* Determine initial immediate transitions */ + for (i=0; i<N; i++) { + State *s = b->states[i]; + TransList *tl; + int from_state = s->index; + set_bit(b->eclo[from_state], from_state); /* Always reflexive */ + + for (tl=s->transitions; tl; tl=tl->next) { + switch (tl->type) { + case TT_EPSILON: + { + int to_state = tl->ds_ref->index; + set_bit(b->eclo[from_state], to_state); + } + break; + case TT_TOKEN: + /* smoke out old method of indicating an epsilon trans */ + assert(tl->x.token >= 0); + break; + default: + assert(0); + break; + } + } + } + + transitively_close_eclo(b->eclo, N); + +} +/*}}}*/ +void print_nfa(Block *b)/*{{{*/ +{ + int i, j, N; + N = b->nstates; + + if (!report) return; + + for (i=0; i<N; i++) { + State *s = b->states[i]; + TransList *tl; + Stringlist *sl; + fprintf(report, "NFA state %d = %s", i, s->name); + if (s->entries) { + int first = 1; + Stringlist *e = s->entries; + fputs(" [Entries: ", report); + while (e) { + if (!first) { + fputc(',', report); + } + first = 0; + fputs(e->string, report); + e = e->next; + } + fputc(']', report); + } + fputc('\n', report); + for (tl=s->transitions; tl; tl=tl->next) { + switch (tl->type) { + case TT_EPSILON: + fprintf(report, " [(epsilon)] -> "); + break; + case TT_TOKEN: + assert(tl->x.token >= 0); + if (tl->x.token >= ntokens) { + fprintf(report, " "); + print_charclass(report, tl->x.token - ntokens); + fprintf(report, " -> "); + } else { + fprintf(report, " %s -> ", toktable[tl->x.token]); + } + break; + default: + assert(0); + break; + } + fprintf(report, "%s\n", tl->ds_name); + } + if (s->tags) { + int first = 1; + fprintf(report, " Tags : "); + for (sl=s->tags; sl; sl=sl->next) { + fprintf(report, "%s%s", + first ? "" : "|", + sl->string); + } + fprintf(report, "\n"); + } + fprintf(report, " Epsilon closure :\n (self)\n"); + for (j=0; j<N; j++) { + if (i!=j && is_set(b->eclo[i], j)) { + fprintf(report, " %s\n", b->states[j]->name); + } + } + + fprintf(report, "\n"); + } + +} +/*}}}*/ +/* ================================================================= */ + +/* Indexed [from_state][token][to_state], flag set if there is + a transition from from_state to to_state, via token then zero or more + epsilon transitions */ + +static unsigned long ***transmap; + +/* Index [from_nfa_state][token], flag set if there is a transition + to any destination nfa state for that token. */ +static unsigned long **anytrans; + +/* ================================================================= */ +void build_transmap(Block *b)/*{{{*/ +{ + int N = b->nstates; + int Nt = ntokens + n_charclasses; + int i, j, k, m, dest; + + transmap = new_array(unsigned long **, N); + anytrans = new_array(unsigned long *, N); + for (i=0; i<N; i++) { + transmap[i] = new_array(unsigned long *, Nt); + anytrans[i] = new_array(unsigned long, round_up(Nt)); + for (j=0; j<round_up(Nt); j++) { + anytrans[i][j] = 0UL; + } + for (j=0; j<Nt; j++) { + transmap[i][j] = new_array(unsigned long, round_up(N)); + for (k=0; k<round_up(N); k++) { + transmap[i][j][k] = 0UL; + } + } + } + + for (i=0; i<N; i++) { + State *s = b->states[i]; + TransList *tl; + for (tl=s->transitions; tl; tl=tl->next) { + switch (tl->type) { + case TT_EPSILON: + break; + case TT_TOKEN: + { + assert(tl->x.token >= 0); + dest = tl->ds_ref->index; + for (m=0; m<round_up(N); m++) { + unsigned long x = b->eclo[dest][m]; + transmap[i][tl->x.token][m] |= x; + if (!!x) set_bit(anytrans[i], tl->x.token); + } + } + break; + default: + assert(0); + break; + } + } + } +} +/*}}}*/ +/* ================================================================= */ + +int had_ambiguous_result = 0; + +/* ================================================================= */ + +/* Implement an array of linked lists to access DFA states directly. The + * hashes are given by folding the signatures down to single bytes. */ + +struct DFAList { + struct DFAList *next; + DFANode *dfa; +}; + +#define DFA_HASHSIZE 256 +static struct DFAList *dfa_hashtable[DFA_HASHSIZE]; + +/* ================================================================= */ + +int n_dfa_entries; +struct DFAEntry *dfa_entries = NULL; + +/* ================================================================= */ +static void grow_dfa(struct DFA *dfa)/*{{{*/ +{ + dfa->max += 32; + dfa->s = resize_array(DFANode*, dfa->s, dfa->max); +} +/*}}}*/ +static unsigned long fold_signature(unsigned long sig)/*{{{*/ +{ + unsigned long folded; + folded = sig ^ (sig >> 16); + folded ^= (folded >> 8); + folded &= 0xff; + return folded; +} +/*}}}*/ +/* ================================================================= */ +static int find_dfa(unsigned long *nfas, int N)/*{{{*/ +/* Simple linear search. Use 'signatures' to get rapid rejection + of any DFA state that can't possibly match */ +{ + int j; + unsigned long signature = 0UL; + unsigned long folded_signature; + struct DFAList *dfal; + + for (j=0; j<round_up(N); j++) { + signature ^= nfas[j]; + } + folded_signature = fold_signature(signature); + + for(dfal=dfa_hashtable[folded_signature]; dfal; dfal = dfal->next) { + DFANode *dfa = dfal->dfa; + int matched; + + if (signature != dfa->signature) continue; + + matched=1; + + for (j=0; j<round_up(N); j++) { + if (nfas[j] != dfa->nfas[j]) { + matched = 0; + break; + } + } + if (matched) { + return dfa->index; + } + } + return -1; +} +/*}}}*/ + +/*{{{ add_dfa() */ +static int add_dfa(Block *b, struct DFA *dfa, unsigned long *nfas, int N, int Nt, int from_state, int via_token) +{ + int j; + int result = dfa->n; + int this_result_unambiguous; + + Stringlist *ex; + unsigned long signature = 0UL, folded_signature; + struct DFAList *dfal; + + if (verbose) { + fprintf(stderr, "Adding DFA state %d\r", dfa->n); + fflush(stderr); + } + + if (dfa->max == dfa->n) { + grow_dfa(dfa); + } + + dfa->s[dfa->n] = new(DFANode); + dfa->s[dfa->n]->nfas = new_array(unsigned long, round_up(N)); + dfa->s[dfa->n]->map = new_array(int, Nt); + for (j=0; j<Nt; j++) dfa->s[dfa->n]->map[j] = -1; + dfa->s[dfa->n]->index = dfa->n; + dfa->s[dfa->n]->defstate = -1; + + dfa->s[dfa->n]->from_state = from_state; + dfa->s[dfa->n]->via_token = via_token; + + for (j=0; j<round_up(N); j++) { + unsigned long x = nfas[j]; + signature ^= x; + dfa->s[dfa->n]->nfas[j] = x; + } + dfa->s[dfa->n]->signature = signature; + + folded_signature = fold_signature(signature); + dfal = new(struct DFAList); + dfal->dfa = dfa->s[dfa->n]; + dfal->next = dfa_hashtable[folded_signature]; + dfa_hashtable[folded_signature] = dfal; + + /* {{{ Boolean reductions to get attributes */ + ex = NULL; + clear_tag_values(); + for (j=0; j<N; j++) { + if (is_set(dfa->s[dfa->n]->nfas, j)) { + Stringlist *sl; + State *s = b->states[j]; + for (sl = s->tags; sl; sl = sl->next) { + Stringlist *new_sl; + new_sl = new(Stringlist); + new_sl->string = sl->string; + new_sl->next = ex; + ex = new_sl; + + set_tag_value(sl->string); + } + } + } + + dfa->s[dfa->n]->nfa_exit_sl = ex; + + this_result_unambiguous = + evaluate_attrs(&dfa->s[dfa->n]->attrs, &dfa->s[dfa->n]->has_early_exit); + + if (!this_result_unambiguous) { + Stringlist *sl; + fprintf(stderr, "WARNING : Ambiguous exit state abandoned for DFA state %d\n", dfa->n); + fprintf(stderr, "NFA exit tags applying in this stage :\n"); + for (sl = ex; sl; sl = sl->next) { + fprintf(stderr, " %s\n", sl->string); + } + had_ambiguous_result = 1; + } + /*}}}*/ + + ++dfa->n; + return result; +} +/*}}}*/ +static void clear_nfas(unsigned long *nfas, int N)/*{{{*/ +{ + int i; + for (i=0; i<round_up(N); i++) { + nfas[i] = 0; + } +} +/*}}}*/ +struct DFA *build_dfa(Block *b)/*{{{*/ +{ + unsigned long **nfas; + int i; + int j; + int N, Nt; + int next_to_do; + int *found_any; + int rup_N; + struct DFA *dfa; + + dfa = new(struct DFA); + dfa->n = 0; + dfa->max = 0; + dfa->s = NULL; + dfa->b = b; + + for (i=0; i<DFA_HASHSIZE; i++) dfa_hashtable[i] = NULL; + + N = b->nstates; + rup_N = round_up(N); + Nt = ntokens + n_charclasses; + + nfas = new_array(unsigned long *, Nt); + for (i=0; i<Nt; i++) { + nfas[i] = new_array(unsigned long, round_up(N)); + } + + /* Add initial states */ + for (j=0; j<n_dfa_entries; j++) { + int idx; + clear_nfas(nfas[0], N); + for (i=0; i<round_up(N); i++) { + nfas[0][i] |= b->eclo[dfa_entries[j].state_number][i]; + } + /* Must handle the case where >=2 of the start states are actually identical; + * nothing in the input language prevents this. */ + idx = find_dfa(nfas[0], N); + if (idx < 0) { + idx = dfa->n; + add_dfa(b, dfa, nfas[0], N, Nt, -1, -1); + } + dfa_entries[j].state_number = idx; + } + + next_to_do = 0; + found_any = new_array(int, Nt); + + /* Now the heart of the program : the subset construction to turn the NFA + into a DFA. This is a major performance hog in the program, so there are + lots of tricks to speed this up (particularly, hoisting intermediate + pointer computations out of the loop to assert the fact that there is no + aliasing between the arrays.) */ + + while (next_to_do < dfa->n) { + + int t; /* token index */ + int j0, j0_5, j1, j, mask, k; + int idx; + unsigned long *current_nfas; + unsigned long block_bitmap; + + /* If the next DFA state has the result_early flag set, it means that the scanner will + * always exit straight away when that state is reached, so there's no need to compute + * any transitions out of it. */ + + if (dfa->s[next_to_do]->has_early_exit) { + next_to_do++; + continue; + } + + for (j=0; j<Nt; j++) { + clear_nfas(nfas[j], N); + found_any[j] = 0; + } + + current_nfas = dfa->s[next_to_do]->nfas; + for (j0=0; j0<rup_N; j0++) { /* Loop over NFA states which may be in this DFA state */ + block_bitmap = current_nfas[j0]; + if (!block_bitmap) continue; + j0_5 = j0 << 5; + for (mask=1UL, j1=0; j1<32; mask<<=1, j1++) { + j = j0_5 + j1; + if (block_bitmap & mask) { /* Is NFA state in DFA */ + unsigned long **transmap_j = transmap[j]; + unsigned long *anytrans_j = anytrans[j]; + for (t=0; t<Nt; t++) { /* Loop over transition symbols */ + unsigned long *transmap_t; + unsigned long *nfas_t; + unsigned long found_any_t; + if (!is_set(anytrans_j, t)) continue; + transmap_t = transmap_j[t]; + nfas_t = nfas[t]; + found_any_t = found_any[t]; + for (k=0; k<rup_N; k++) { /* Loop over destination NFA states */ + unsigned long x; + x = transmap_t[k]; + nfas_t[k] |= x; + found_any_t |= !!x; + } + found_any[t] = found_any_t; + } + } + } + } + + for (t=0; t<Nt; t++) { + if (found_any[t]) { + idx = find_dfa(nfas[t], N); + if (idx < 0) { + idx = add_dfa(b, dfa, nfas[t], N, Nt, next_to_do, t); + } + } else { + idx = -1; + } + dfa->s[next_to_do]->map[t] = idx; + } + + next_to_do++; + } + + free(found_any); + for (i=0; i<Nt; i++) free(nfas[i]); + free(nfas); + return dfa; +} +/*}}}*/ +/* ================================================================= */ +static void display_route(struct DFA *dfa, int idx, FILE *out)/*{{{*/ +{ + int from_state, via_token; + from_state = dfa->s[idx]->from_state; + if (from_state >= 0) { + display_route(dfa, from_state, out); + fputs("->", out); + } + + via_token = dfa->s[idx]->via_token; + if (via_token >= ntokens) { + print_charclass(out, via_token - ntokens); + } else if (via_token >= 0) { + fprintf(out, "%s", toktable[via_token]); + } +} +/*}}}*/ +void print_dfa(struct DFA *dfa)/*{{{*/ +{ + int N = dfa->b->nstates; + int Nt = ntokens + n_charclasses; + + int i, j0, j0_5, j1, t; + unsigned long mask; + unsigned long current_nfas; + int rup_N = round_up(N); + int from_state, this_state; + + if (!report) return; + + for (i=0; i<dfa->n; i++) { + fprintf(report, "DFA state %d\n", i); + if (dfa->s[i]->nfas) { + fprintf(report, " NFA states :\n"); + for (j0=0; j0<rup_N; j0++) { + current_nfas = dfa->s[i]->nfas[j0]; + if (!current_nfas) continue; + j0_5 = j0<<5; + for (j1=0, mask=1UL; j1<32; mask<<=1, j1++) { + if (current_nfas & mask) { + fprintf(report, " %s\n", dfa->b->states[j0_5 + j1]->name); + } + } + } + fprintf(report, "\n"); + } + fprintf(report, " Forward route :"); + this_state = i; + from_state = dfa->s[i]->from_state; + if (from_state >= 0) { + fprintf(report, " (from state %d)", from_state); + } + fputs("\n (START)", report); + display_route(dfa, i, report); + fputs("->(HERE)", report); + fprintf(report, "\n"); + + fprintf(report, " Transitions :\n"); + for (t=0; t<Nt; t++) { + int dest = dfa->s[i]->map[t]; + if (dest >= 0) { + if (t >= ntokens) { + fprintf(report, " "); + print_charclass(report, t - ntokens); + fprintf(report, " -> %d\n", dest); + } else { + fprintf(report, " %s -> %d\n", toktable[t], dest); + } + } + } + if (dfa->s[i]->defstate >= 0) { + fprintf(report, " Use state %d as basis (%d fixups)\n", + dfa->s[i]->defstate, dfa->s[i]->best_diff); + } + if (dfa->s[i]->nfa_exit_sl) { + Stringlist *sl; + fprintf(report, " NFA exit tags applying :\n"); + for (sl=dfa->s[i]->nfa_exit_sl; sl; sl = sl->next) { + fprintf(report, " %s\n", sl->string); + } + } + + emit_dfa_attr_report(dfa->s[i]->attrs, report); + fprintf(report, "\n"); + } + fprintf(report, "\nEntry states in DFA:\n"); + for (i=0; i<n_dfa_entries; i++) { + fprintf(report, "Entry <%s> : %d\n", + dfa_entries[i].entry_name, + dfa_entries[i].state_number); + } + +} +/*}}}*/ +/* ================================================================= */ +void yyerror (const char *s)/*{{{*/ +{ + extern int lineno; + fprintf(stderr, "%s at line %d\n", s, lineno); +} +/*}}}*/ +int yywrap(void) /*{{{*/ +{ + return -1; +} +/*}}}*/ +/* ================================================================= */ + diff --git a/src/mairix/dfasyn/n2d.h b/src/mairix/dfasyn/n2d.h @@ -0,0 +1,226 @@ +/*************************************** + $Header: /cvs/src/dfasyn/n2d.h,v 1.2 2003/03/02 23:42:11 richard Exp $ + + Header file for NFA->DFA conversion utility. + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2001-2003,2005 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#ifndef N2D_H +#define N2D_H + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define new(T) ((T *) malloc(sizeof(T))) +#define new_array(T,N) ((T *) malloc((N) * sizeof(T))) +#define resize_array(T,arr,newN) ((T *) ((arr) ? realloc(arr,(newN)*sizeof(T)) : malloc((newN)*sizeof(T)))) +#define new_string(s) strcpy((char *)malloc((strlen(s)+1)*sizeof(char)),s) + +/* For typecasting, especially useful for declarations of local ptrs to args + of a qsort comparison fn */ +#define Castdecl(x, T, nx) T nx = (T) x + +#define Castderef(x, T, nx) T nx = *(T*) x + +/* Globally visible options to control reporting */ +extern FILE *report; +extern int verbose; + +struct State; +struct Block; + +typedef struct Translist { + struct Translist *next; + int token; + char *ds_name; + struct State *ds_ref; +} Translist; + +typedef struct Stringlist { + struct Stringlist *next; + char *string; +} Stringlist; + +typedef struct InlineBlock { + char *type; /* Block type */ + char *in; /* Name of input node */ + char *out; /* Name of output node */ +} InlineBlock; + +typedef struct InlineBlockList { + struct InlineBlockList *next; + InlineBlock *ib; +} InlineBlockList; + +typedef struct State { + char *name; + int index; /* Array index in containing block */ + struct Block *parent; + Translist *transitions; + Stringlist *exitvals; + Stringlist *attributes; + + /* Pointers to the nodes in the 'transitions' list, sorted into canonical order */ + Translist **ordered_trans; + int n_transitions; + + unsigned char removed; /* Flag indicating state has been pruned by compression stage */ +} State; + +typedef struct S_Stateset { + State **states; + int nstates; + int maxstates; +} Stateset; + +#define HASH_BUCKETS 64 +#define HASH_MASK (HASH_BUCKETS-1) + +typedef struct Block { + char *name; + + /* The master table of states within this block. This has to be in a flat + array because we have to work with respect to state indices when doing the + 2D bitmap stuff for the subset construction. */ + State **states; + int nstates; + int maxstates; + + /* Hash table for getting rapid access to a state within the block, given + its name */ + Stateset state_hash[HASH_BUCKETS]; + + int subcount; /* Number for generating substates */ + int subblockcount; /* Number for generating inline subblocks */ +} Block; + +typedef struct { + unsigned long *nfas; + unsigned long signature; /* All the longwords in the nfas array xor'ed together */ + int index; /* Entry's own index in the array */ + int *map; /* index by token code */ + int from_state; /* the state which provided the first transition to this one (leading to its creation) */ + int via_token; /* the token through which we got to this state the first time. */ + Stringlist *nfa_exit_sl; /* NFA exit values */ + Stringlist *nfa_attr_sl; /* NFA exit values */ + char *result; /* Result token, computed by boolean expressions defined in input text */ + int result_early; /* If !=0, the scanner is expected to exit immediately this DFA state is entered. + It means that no out-bound transitions have to be created. */ + char *attribute; /* Attribute token, computed by boolean expressions defined in input text */ + + /* Fields calculated in compdfa.c */ + + /* The equivalence class the state is in. */ + int eq_class; + + /* Temp. storage for the new eq. class within a single pass of the splitting alg. */ + int new_eq_class; + + /* Signature field from above is also re-used. */ + + int is_rep; /* Set if state is chosen as the representative of its equivalence class. */ + int new_index; /* New index assigned to the state. */ + + /* Fields calculated in tabcompr.c */ + + unsigned long transition_sig; + + /* Default state, i.e. the one that supplies transitions for tokens not + explicitly listed for this one. */ + int defstate; + + /* Number of transitions that this state has different to those in the + default state. */ + int best_diff; + +} DFANode; + + +void yyerror(const char *s); +extern int yylex(void); + +/* Constants for 'create' args */ +#define USE_OLD_MUST_EXIST 0 +#define CREATE_MUST_NOT_EXIST 1 +#define CREATE_OR_USE_OLD 2 + +State *get_curstate(void); + +struct Abbrev; +extern struct Abbrev * create_abbrev(char *name); +extern void add_tok_to_abbrev(struct Abbrev *abbrev, char *tok); + +int lookup_token(char *name, int create); +Block *lookup_block(char *name, int create); +State *lookup_state(Block *in_block, char *name, int create); +Stringlist * add_token(Stringlist *existing, char *token); +void add_transitions(State *curstate, Stringlist *tokens, char *destination); +State * add_transitions_to_internal(Block *curblock, State *addtostate, Stringlist *tokens); +void add_exit_value(State *curstate, char *value); +void set_state_attribute(State *curstate, char *name); +InlineBlock *create_inline_block(char *type, char *in, char *out); +InlineBlockList *add_inline_block(InlineBlockList *existing, InlineBlock *nib); +State * add_inline_block_transitions(Block *curblock, State *addtostate, InlineBlockList *ibl); +void instantiate_block(Block *curblock, char *block_name, char *instance_name); +void fixup_state_refs(Block *b); + +void compress_nfa(Block *b); + +/* In expr.c */ +typedef struct Expr Expr; + +typedef struct evaluator Evaluator; +extern Evaluator *exit_evaluator; +extern Evaluator *attr_evaluator; + +Expr * new_wild_expr(void); +Expr * new_not_expr(Expr *c); +Expr * new_and_expr(Expr *c1, Expr *c2); +Expr * new_or_expr(Expr *c1, Expr *c2); +Expr * new_xor_expr(Expr *c1, Expr *c2); +Expr * new_cond_expr(Expr *c1, Expr *c2, Expr *c3); +Expr * new_sym_expr(char *sym_name); + +void define_symbol(Evaluator *x, char *name, Expr *e); +void define_result(Evaluator *x, char *string, Expr *e, int early); +void define_symresult(Evaluator *x, char *string, Expr *e, int early); +void define_defresult(Evaluator *x, char *string); +void clear_symbol_values(Evaluator *x); +void set_symbol_value(Evaluator *x, char *sym_name); +int evaluate_result(Evaluator *x, char **, int *); +int evaluator_is_used(Evaluator *x); +void define_defresult(Evaluator *x, char *text); +void define_type(Evaluator *x, char *text); +char* get_defresult(Evaluator *x); +char* get_result_type(Evaluator *x); +void eval_initialise(void); + +void compress_transition_table(DFANode **dfas, int ndfas, int ntokens); +unsigned long increment(unsigned long x, int field); +unsigned long count_bits_set(unsigned long x); + +/* Return new number of DFA states */ +int compress_dfa(DFANode **dfas, int ndfas, int ntokens); + +#endif /* N2D_H */ + diff --git a/src/mairix/dfasyn/parse.y b/src/mairix/dfasyn/parse.y @@ -0,0 +1,262 @@ +/********************************************************************** + Grammar definition for input files defining an NFA + *********************************************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2001-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +%{ +#include "dfasyn.h" + +static Block *curblock = NULL; /* Current block being built */ +static State *curstate = NULL; /* Current state being worked on */ +static State *addtostate = NULL; /* Current state (incl ext) to which transitions are added */ +static StimulusList *curtranslist = NULL; /* Final option set of stimuli prior to ARROW */ +static CharClass *curcharclass = NULL; +static Evaluator *current_evaluator = NULL; + +State *get_curstate(void) { return curstate; } + +%} + +%union { + char c; + char *s; + int i; + Stringlist *sl; + Stimulus *st; + StimulusList *stl; + InlineBlock *ib; + CharClass *cc; + Expr *e; +} + +%token STRING STATE TOKENS PREFIX ARROW BLOCK ENDBLOCK COLON EQUAL SEMICOLON COMMA +%token ABBREV DEFINE +%type<s> STRING +%type<st> stimulus +%type<sl> tag_seq +%type<stl> stimulus_seq +%type<stl> transition_seq +%type<e> expr +%type<ib> inline_block +%type<c> CHAR +%type<cc> char_class simple_char_class negated_char_class char_class_diff + +%token ATTR TAG +%token DEFATTR +%token EARLY +%token TYPE +%token ENTRY +%token ENTRYSTRUCT +%token GROUP +%token LBRACE RBRACE + +%token LSQUARE RSQUARE +%token LSQUARE_CARET +%token CHAR HYPHEN + +%right QUERY COLON +%left PIPE +%left XOR +%left AND +%left NOT +%left LPAREN RPAREN +%left LANGLE RANGLE + +%% + +all : decl_seq ; + +decl_seq : /* empty */ | decl_seq decl ; + +decl : block_decl + | tokens_decl | abbrev_decl + | attr_decl | group_decl | tag_decl + | prefix_decl | entrystruct_decl ; + +/* Don't invalidate curstate at the end, this is the means of working out the + starting state of the NFA */ +block_decl : block1 block2 { fixup_state_refs(curblock); curblock = NULL; } ; + +block1 : BLOCK STRING LBRACE { curblock = lookup_block($2, CREATE_MUST_NOT_EXIST); addtostate = curstate = NULL; } ; + +block2 : instance_decl_seq state_decl_seq RBRACE ; + +prefix_decl : PREFIX STRING + { if (!prefix) { + prefix = $2; + } else { + fprintf(stderr, "\n\nWarning: prefix declaration ignored; already set on the command line\n"); + } + }; + +tokens_decl : TOKENS token_seq ; + +abbrev_decl : ABBREV STRING EQUAL stimulus_seq + { create_abbrev($2, $4); } + ; + +token_seq : token_seq token | token ; + +token : STRING { (void) lookup_token($1, CREATE_MUST_NOT_EXIST); } ; + +instance_decl_seq : /* empty */ | instance_decl_seq instance_decl ; + +state_decl_seq : /* empty */ | state_decl_seq state_decl ; + +state_decl : STATE STRING { addtostate = curstate = lookup_state(curblock, $2, CREATE_OR_USE_OLD); } + sdecl_seq + | STATE STRING ENTRY STRING { addtostate = curstate = lookup_state(curblock, $2, CREATE_OR_USE_OLD); + add_entry_to_state(curstate, $4); } + sdecl_seq + ; + +sdecl_seq : /* empty */ | sdecl_seq sdecl ; + +sdecl : transition_decl ; + +instance_decl : STRING COLON STRING { instantiate_block(curblock, $3 /* master_block_name */, $1 /* instance_name */ ); } ; + +transition_decl : transition_seq ARROW { curtranslist = $1; } destination_seq { addtostate = curstate; } + | transition_seq EQUAL tag_seq { addtostate = add_transitions_to_internal(curblock, addtostate, $1); + add_tags(addtostate, $3); + addtostate = curstate; } + ; + +destination_seq : STRING { add_transitions(curblock, addtostate, curtranslist, $1); } + | destination_seq COMMA STRING { add_transitions(curblock, addtostate, curtranslist, $3); } + ; + +transition_seq : stimulus_seq { $$ = $1; } + | transition_seq SEMICOLON stimulus_seq + { + addtostate = add_transitions_to_internal(curblock, addtostate, $1); + $$ = $3; + } + ; + +tag_seq : STRING { $$ = add_string_to_list(NULL, $1); } + | tag_seq COMMA STRING { $$ = add_string_to_list($1, $3); } + ; + +stimulus_seq : stimulus + { $$ = append_stimulus_to_list(NULL, $1); } + | stimulus_seq PIPE stimulus + { $$ = append_stimulus_to_list($1, $3); } + ; + +/* A 'thing' that will make the DFA move from one state to another */ +stimulus : STRING + { $$ = stimulus_from_string($1); } + | inline_block + { $$ = stimulus_from_inline_block($1); } + | char_class + { add_charclass_to_list($1); /* freeze it into the list. */ + $$ = stimulus_from_char_class($1); } + | /* empty */ + { $$ = stimulus_from_epsilon(); } + ; + +inline_block : LANGLE STRING COLON STRING ARROW STRING RANGLE + { $$ = create_inline_block($2, $4, $6); } + ; + +char_class : simple_char_class + | negated_char_class + | char_class_diff + ; + +negated_char_class : NOT simple_char_class + { invert_charclass($2); $$ = $2; } + ; + +char_class_diff : simple_char_class NOT simple_char_class + { diff_charclasses($1, $3); + free_charclass($3); + $$ = $1; + } + ; + +simple_char_class : LSQUARE { curcharclass = new_charclass(); } + cc_body + RSQUARE { $$ = curcharclass; + curcharclass = NULL; } + | LSQUARE_CARET { curcharclass = new_charclass(); } + cc_body + RSQUARE { $$ = curcharclass; + invert_charclass($$); + curcharclass = NULL; } + ; + +cc_body : CHAR { add_singleton_to_charclass(curcharclass, $1); } + | CHAR HYPHEN CHAR { add_range_to_charclass(curcharclass, $1, $3); } + | cc_body CHAR { add_singleton_to_charclass(curcharclass, $2); } + | cc_body CHAR HYPHEN CHAR { add_range_to_charclass(curcharclass, $2, $4); } + ; + +attr_decl : ATTR simple_attr_seq + | ATTR STRING COLON expr { define_attr(current_evaluator, $2, $4, 0); } + | EARLY ATTR early_attr_seq + | EARLY ATTR STRING COLON expr { define_attr(current_evaluator, $3, $5, 1); } + | DEFATTR STRING { define_defattr(current_evaluator, $2); } + | TYPE STRING { define_type(current_evaluator, $2); } + ; + +simple_attr_seq : STRING + { define_attr(current_evaluator, $1, NULL, 0); } + | simple_attr_seq COMMA STRING + { define_attr(current_evaluator, $3, NULL, 0); } + ; + +early_attr_seq : STRING + { define_attr(current_evaluator, $1, NULL, 1); } + | early_attr_seq COMMA STRING + { define_attr(current_evaluator, $3, NULL, 1); } + ; + +group_decl : GROUP STRING LBRACE { current_evaluator = start_evaluator($2); } + attr_decl_seq + RBRACE { current_evaluator = NULL; } + ; + +attr_decl_seq : /* empty */ + | attr_decl_seq attr_decl + ; + +tag_decl : TAG STRING EQUAL expr { define_tag($2, $4); } + ; + +entrystruct_decl : + ENTRYSTRUCT STRING STRING { define_entrystruct($2, $3); } + ; + +expr : NOT expr { $$ = new_not_expr($2); } + | expr AND expr { $$ = new_and_expr($1, $3); } + | expr PIPE /* OR */ expr { $$ = new_or_expr($1, $3); } + | expr XOR expr { $$ = new_xor_expr($1, $3); } + | expr QUERY expr COLON expr { $$ = new_cond_expr($1, $3, $5); } + | LPAREN expr RPAREN { $$ = $2; } + | STRING { $$ = new_tag_expr($1); } + ; + +/* vim:et +*/ + diff --git a/src/mairix/dfasyn/scan.l b/src/mairix/dfasyn/scan.l @@ -0,0 +1,111 @@ +/********************************************************************** + Lexical analyser definition for input files defining an NFA + *********************************************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2001-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +%{ +#include "dfasyn.h" +#include "parse.h" + +/* yyunput() not used - define this to avoid compiler warnings */ +#define YY_NO_UNPUT + +int lineno = 1; +%} + +%x PASSTHRU +%x STR +%x CHARCLASS + +%% + +STATE|State|state { return STATE; } +ABBREV|Abbrev|abbrev { return ABBREV; } +DEFINE|Define|define { return DEFINE; } +TOKENS|Tokens|tokens { return TOKENS; } +PREFIX|Prefix|prefix { return PREFIX; } +BLOCK|Block|block { return BLOCK; } +TYPE|Type|type { return TYPE; } +ENTRY|Entry|entry { return ENTRY; } +ENTRYSTRUCT { return ENTRYSTRUCT; } +EntryStruct { return ENTRYSTRUCT; } +Entrystruct { return ENTRYSTRUCT; } +entrystruct { return ENTRYSTRUCT; } +ATTR|Attr|attr { return ATTR; } +EARLY|Early|early { return EARLY; } +DEFATTR|DefAttr { return DEFATTR; } +Defattr|defattr { return DEFATTR; } +TAG|Tag|tag { return TAG; } +GROUP|Group|group { return GROUP; } +[A-Za-z0-9_.]+ { yylval.s = new_string(yytext); return STRING; } +\#.*$ { /* strip comments */ } +\-\> { return ARROW; } += { return EQUAL; } +\| { return PIPE; /* OR */ } +\& { return AND; } +\~ { return NOT; } +\! { return NOT; } +\^ { return XOR; } +\? { return QUERY; } +\: { return COLON; } +\; { return SEMICOLON; } +\( { return LPAREN; } +\) { return RPAREN; } +\{ { return LBRACE; } +\} { return RBRACE; } +\< { return LANGLE; } +\> { return RANGLE; } +\[ { BEGIN CHARCLASS; return LSQUARE; } +\[\^ { BEGIN CHARCLASS; return LSQUARE_CARET; } +\, { return COMMA; } +\n { lineno++; } +[ \t]+ { /* ignore */ } +^\%\{[ \t]*\n { BEGIN PASSTHRU; } +\" { BEGIN STR; } +. { printf("Unmatched input <%s> at line %d\n", yytext, lineno); exit (1); } + +<PASSTHRU>^\%\}[ \t]*\n { BEGIN INITIAL; } +<PASSTHRU>\n { fputs(yytext, yyout); lineno++; } +<PASSTHRU>.+ { fputs(yytext, yyout); } + +<STR>\" { BEGIN INITIAL; } +<STR>[^"]* { yylval.s = new_string(yytext); return STRING; } + +<CHARCLASS>\] { BEGIN INITIAL; return RSQUARE; } +<CHARCLASS>\- { return HYPHEN; } +<CHARCLASS>\\- { yylval.c = '-'; return CHAR; } +<CHARCLASS>\\] { yylval.c = ']'; return CHAR; } +<CHARCLASS>\\^ { yylval.c = '^'; return CHAR; } +<CHARCLASS>\\n { yylval.c = '\n'; return CHAR; } +<CHARCLASS>\\r { yylval.c = '\r'; return CHAR; } +<CHARCLASS>\\f { yylval.c = '\f'; return CHAR; } +<CHARCLASS>\\t { yylval.c = '\t'; return CHAR; } +<CHARCLASS>\\\\ { yylval.c = '\\'; return CHAR; } +<CHARCLASS>\^[@A-Z] { yylval.c = yytext[1] - '@'; return CHAR; } +<CHARCLASS>\\x[0-9a-fA-F][0-9a-fA-F] { unsigned int foo; sscanf(yytext+2,"%x",&foo); yylval.c = (char) foo; return CHAR; } +<CHARCLASS>\\[0-7][0-7][0-7] { unsigned int foo; sscanf(yytext+1,"%o",&foo); yylval.c = (char) foo; return CHAR; } +<CHARCLASS>. { yylval.c = yytext[0]; return CHAR; } + +%{ +/* vim:et +*/ +%} diff --git a/src/mairix/dfasyn/states.c b/src/mairix/dfasyn/states.c @@ -0,0 +1,303 @@ +/*************************************** + Handle state-related stuff + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2000-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "dfasyn.h" + +static void maybe_grow_states(Block *b, int hash)/*{{{*/ +{ + Stateset *ss = b->state_hash + hash; + if (ss->nstates == ss->maxstates) { + ss->maxstates += 8; + ss->states = resize_array(State*, ss->states, ss->maxstates); + } + if (b->nstates == b->maxstates) { + b->maxstates += 32; + b->states = resize_array(State*, b->states, b->maxstates); + } + +} +/*}}}*/ +static unsigned long hashfn(const char *s)/*{{{*/ +{ + unsigned long y = 0UL, v, w, x, k; + const char *t = s; + while (1) { + k = (unsigned long) *(unsigned char *)(t++); + if (!k) break; + v = ~y; + w = y<<13; + x = v>>6; + y = w ^ x; + y += k; + } + y ^= (y>>13); + y &= HASH_MASK; + return y; +} +/*}}}*/ +static State * create_state(Block *b, char *name)/*{{{*/ +{ + State *result; + int hash; + Stateset *ss; + hash = hashfn(name); + maybe_grow_states(b, hash); + ss = b->state_hash + hash; + result = b->states[b->nstates++] = ss->states[ss->nstates++] = new(State); + result->name = new_string(name); + result->parent = b; + result->index = b->nstates - 1; + result->transitions = NULL; + result->tags = NULL; + result->entries = NULL; + result->ordered_trans = NULL; + result->n_transitions = 0; + result->removed = 0; + return result; +} +/*}}}*/ +State * lookup_state(Block *b, char *name, int create)/*{{{*/ +{ + State *found = NULL; + int i; + int hash; + Stateset *ss; + + hash = hashfn(name); + ss = b->state_hash + hash; + + for (i=0; i<ss->nstates; i++) { + if (!strcmp(ss->states[i]->name, name)) { + found = ss->states[i]; + break; + } + } + + switch (create) { + case USE_OLD_MUST_EXIST: + if (!found) { + fprintf(stderr, "Could not find a state '%s' in block '%s' to transition to\n", name, b->name); + exit(1); + } + break; + case CREATE_MUST_NOT_EXIST: + if (found) { + fprintf(stderr, "Warning : already have a state '%s' in block '%s'\n", name, b->name); + } else { + found = create_state(b, name); + } + break; + case CREATE_OR_USE_OLD: + if (!found) { + found = create_state(b, name); + } + break; + } + + return found; +} +/*}}}*/ +void add_entry_to_state(State *curstate, const char *entry_tag)/*{{{*/ +{ + struct Entrylist *new_entries = new(struct Entrylist); + new_entries->entry_name = new_string(entry_tag); + new_entries->state = curstate; + new_entries->next = entries; + entries = new_entries; + curstate->entries = add_string_to_list(curstate->entries, entry_tag); +} +/*}}}*/ +/* ================================================================= */ +static void add_transition(Block *curblock, State *curstate, Stimulus *stimulus, char *destination); +/* ================================================================= */ +Stringlist * add_string_to_list(Stringlist *existing, const char *token)/*{{{*/ +{ + Stringlist *result = new(Stringlist); + if (token) { + result->string = new_string(token); + } else { + result->string = NULL; + } + result->next = existing; + return result; +} +/*}}}*/ +static TransList *new_translist(struct TransList *existing, char *destination)/*{{{*/ +{ + TransList *result; + result = new(TransList); + result->next = existing; + result->ds_name = new_string(destination); + return result; +} +/*}}}*/ +static void add_epsilon_transition(State *curstate, char *destination)/*{{{*/ +{ + TransList *tl = new_translist(curstate->transitions, destination); + tl->type = TT_EPSILON; + curstate->transitions = tl; +} +/*}}}*/ +static void add_token_transition(State *curstate, int token, char *destination)/*{{{*/ +{ + TransList *tl = new_translist(curstate->transitions, destination); + tl->type = TT_TOKEN; + tl->x.token = token; + curstate->transitions = tl; +} +/*}}}*/ +static void add_abbrev_transition(Block *curblock, State *curstate, struct Abbrev *abbrev, char *destination)/*{{{*/ +{ + StimulusList *stimuli; + for (stimuli = abbrev->stimuli; stimuli; stimuli = stimuli->next) { + add_transition(curblock, curstate, stimuli->stimulus, destination); + } +} +/*}}}*/ +static void add_inline_block_transition(Block *curblock, State *curstate, InlineBlock *ib, char *destination)/*{{{*/ +{ + char block_name[1024]; + char input_name[1024]; + char output_name[1024]; + State *output_state; + + sprintf(block_name, "%s#%d", ib->type, curblock->subblockcount++); + instantiate_block(curblock, ib->type, block_name); + sprintf(input_name, "%s.%s", block_name, ib->in); + sprintf(output_name, "%s.%s", block_name, ib->out); + output_state = lookup_state(curblock, output_name, CREATE_OR_USE_OLD); + add_epsilon_transition(curstate, input_name); + add_epsilon_transition(output_state, destination); +} +/*}}}*/ +static void add_char_class_transition(State *curstate, CharClass *cc, char *destination)/*{{{*/ +{ + TransList *tl = new_translist(curstate->transitions, destination); + tl->type = TT_CHARCLASS; + tl->x.char_class = cc; + curstate->transitions = tl; +} +/*}}}*/ +static void add_transition(Block *curblock, State *curstate, Stimulus *stimulus, char *destination)/*{{{*/ +/* Add a single transition to the state. Allow definitions to be + recursive */ +{ + switch (stimulus->type) { + case T_EPSILON: + add_epsilon_transition(curstate, destination); + break; + case T_TOKEN: + add_token_transition(curstate, stimulus->x.token, destination); + break; + case T_ABBREV: + add_abbrev_transition(curblock, curstate, stimulus->x.abbrev, destination); + break; + case T_INLINEBLOCK: + add_inline_block_transition(curblock, curstate, stimulus->x.inline_block, destination); + break; + case T_CHARCLASS: + add_char_class_transition(curstate, stimulus->x.char_class, destination); + break; + } + +} +/*}}}*/ +void add_transitions(Block *curblock, State *curstate, StimulusList *stimuli, char *destination)/*{{{*/ +{ + StimulusList *sl; + for (sl=stimuli; sl; sl=sl->next) { + add_transition(curblock, curstate, sl->stimulus, destination); + } +} +/*}}}*/ +State * add_transitions_to_internal(Block *curblock, State *addtostate, StimulusList *stimuli)/*{{{*/ +{ + char buffer[1024]; + State *result; + sprintf(buffer, "#%d", curblock->subcount++); + result = lookup_state(curblock, buffer, CREATE_MUST_NOT_EXIST); + add_transitions(curblock, addtostate, stimuli, result->name); + return result; +} +/*}}}*/ +void add_tags(State *curstate, Stringlist *sl)/*{{{*/ +{ + if (curstate->tags) { + /* If we already have some, stick them on the end of the new list */ + Stringlist *xsl = sl; + while (xsl->next) xsl = xsl->next; + xsl->next = curstate->tags; + } + curstate->tags = sl; +} +/*}}}*/ +/* ================================================================= */ +void fixup_state_refs(Block *b)/*{{{*/ +{ + int i; + for (i=0; i<b->nstates; i++) { + State *s = b->states[i]; + TransList *tl; + for (tl=s->transitions; tl; tl=tl->next) { + tl->ds_ref = lookup_state(b, tl->ds_name, CREATE_OR_USE_OLD); + } + } +} +/*}}}*/ +/* ================================================================= */ +void expand_charclass_transitions(Block *b)/*{{{*/ +{ + int i; + for (i=0; i<b->nstates; i++) { + State *s = b->states[i]; + TransList *tl; + for (tl=s->transitions; tl; tl=tl->next) { + if (tl->type == TT_CHARCLASS) { + int i, first; + CharClass *cc = tl->x.char_class; + first = 1; + for (i=0; i<256; i++) { + /* Insert separate transitions for each subclass of the charclass */ + if (cc_test_bit(cc->group_bitmap, i)) { + if (first) { + tl->type = TT_TOKEN; + tl->x.token = ntokens + i; + } else { + TransList *ntl = new(TransList); + ntl->next = tl->next; + ntl->ds_name = new_string(tl->ds_name); + ntl->ds_ref = tl->ds_ref; + ntl->type = TT_TOKEN; + ntl->x.token = ntokens + i; + tl->next = ntl; + } + first = 0; + } + } + } + } + } +} +/*}}}*/ +/* ================================================================= */ diff --git a/src/mairix/dfasyn/stimulus.c b/src/mairix/dfasyn/stimulus.c @@ -0,0 +1,87 @@ +/*************************************** + Handle stimulus-related stuff + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "dfasyn.h" + +Stimulus *stimulus_from_epsilon(void)/*{{{*/ +{ + Stimulus *result; + result = new(Stimulus); + result->type = T_EPSILON; + return result; +} +/*}}}*/ +Stimulus *stimulus_from_string(char *str)/*{{{*/ +{ + struct Abbrev *abbrev; + Stimulus *result; + + result = new(Stimulus); + + /* See if an abbrev exists with the name */ + abbrev = lookup_abbrev(str); + + if (abbrev) { + result->type = T_ABBREV; + result->x.abbrev = abbrev; + } else { + /* Token */ + int token; + token = lookup_token(str, USE_OLD_MUST_EXIST); + /* lookup_token will have bombed if it wasn't found. */ + result->type = T_TOKEN; + result->x.token = token; + } + + return result; + +} +/*}}}*/ +Stimulus *stimulus_from_inline_block(InlineBlock *block)/*{{{*/ +{ + Stimulus *result; + result = new(Stimulus); + result->type = T_INLINEBLOCK; + result->x.inline_block = block; + return result; +} +/*}}}*/ +Stimulus *stimulus_from_char_class(CharClass *char_class)/*{{{*/ +{ + Stimulus *result; + result = new(Stimulus); + result->type = T_CHARCLASS; + result->x.char_class = char_class; + return result; +} +/*}}}*/ +StimulusList *append_stimulus_to_list(StimulusList *existing, Stimulus *stim)/*{{{*/ +{ + StimulusList *result; + result = new(StimulusList); + result->next = existing; + result->stimulus = stim; + return result; +} +/*}}}*/ diff --git a/src/mairix/dfasyn/tabcompr.c b/src/mairix/dfasyn/tabcompr.c @@ -0,0 +1,181 @@ +/*************************************** + Routines to compress the DFA transition tables, by identifying where two DFA + states have a lot of transitions the same. + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2001-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "dfasyn.h" + +/* ================================================================= */ +/* Treat 'x' as a set of 16 bit pairs, with field (0..15) specifying + which. Increment the field'th bit pair as a gray code, in the + pattern 00->01->11->10->00 */ + +unsigned long increment(unsigned long x, int field) +{ + int f2 = field + field; + static unsigned char transxor[4] = {1, 2, 2, 1}; + unsigned long g = x >> f2; + unsigned long h = transxor[g&3]; + return x ^ (h<<f2); +} + +/* ================================================================= */ +/* Calculate the number of bits set in an unsigned long. */ + +unsigned long count_bits_set(unsigned long x) +{ + unsigned long y = x; + unsigned long c; + c = 0x55555555UL; + y = ((y>>1) & c) + (y & c); + c = 0x33333333UL; + y = ((y>>2) & c) + (y & c); + y = (y>>4) + y; + c = 0x0f0f0f0fUL; + y &= c; + y = (y>>8) + y; + y = (y>>16) + y; + return y & 0x1f; +} + +/* ================================================================= */ +/* Compute 'signatures' of the transitions out of a particular state. + The signature is given by considering the destination state numbers mod 16, + and counting how many transitions there are in each resulting equivalence + class. The number is encoded using the gray code implied by the increment + fn. */ + +static void +compute_transition_sigs(struct DFA *dfa, int ntokens) +{ + int i, j; + for (i=0; i<dfa->n; i++) { + unsigned long ts = 0UL; /* transition signature */ + for (j=0; j<ntokens; j++) { + unsigned long dest = dfa->s[i]->map[j]; + dest &= 0xf; /* 16 bit pairs in 'ts' */ + ts = increment(ts, dest); + } + dfa->s[i]->transition_sig = ts; + } +} + + +/* ================================================================= */ + +#define REQUIRED_BENEFIT 2 + +static void +find_default_states(struct DFA *dfa, int ntokens) +{ + int i, j, t; + int best_index; + int best_diff; + int trans_count; /* Number of transitions in working state */ + unsigned long tsi; + + for (i=0; i<dfa->n; i++) { + trans_count = 0; + for (t=0; t<ntokens; t++) { + if (dfa->s[i]->map[t] >= 0) trans_count++; + } + + dfa->s[i]->defstate = -1; /* not defaulted */ + best_index = -1; + best_diff = ntokens + 1; /* Worse than any computed value */ + tsi = dfa->s[i]->transition_sig; + for (j=0; j<i; j++) { + unsigned long tsj; + unsigned long sigdiff; + int diffsize; + + if (dfa->s[j]->defstate >= 0) continue; /* Avoid chains of defstates */ + tsj = dfa->s[j]->transition_sig; + + /* This is the heart of the technique : if we xor two vectors of bit + pairs encoded with the gray code above, and count the number of bits + set in the result, we get the sum of absolute differences of the bit + pairs. The number of outgoing transitions that differ between the + states must be _at_least_ this value. It may in fact be much greater + (i.e. we may get 'false matches'). However, this algorithm is a quick + way of filtering most of the useless potential default states out. */ + + sigdiff = tsi ^ tsj; + diffsize = count_bits_set(sigdiff); + if (diffsize >= best_diff) continue; + if (diffsize >= trans_count) continue; /* Else pointless! */ + + /* Otherwise, do an exact check (i.e. see how much false matching we + suffered). */ + diffsize = 0; + for (t=0; t<ntokens; t++) { + if (dfa->s[i]->map[t] != dfa->s[j]->map[t]) { + diffsize++; + } + } + + if (((best_index < 0) || (diffsize < best_diff)) + && + (diffsize < (trans_count - REQUIRED_BENEFIT))) { + best_index = j; + best_diff = diffsize; + } + } + + dfa->s[i]->defstate = best_index; + dfa->s[i]->best_diff = best_diff; + } +} + +/* ================================================================= */ + +void +compress_transition_table(struct DFA *dfa, int ntokens) +{ + compute_transition_sigs(dfa, ntokens); + find_default_states(dfa, ntokens); +} + +/* ================================================================= */ + +#ifdef TEST +int main () { + unsigned long x = 0; + unsigned long x1, x2, x3, x4; + x1 = increment(x, 2); + x2 = increment(x1, 2); + x3 = increment(x2, 2); + x4 = increment(x3, 2); + printf("%d %d %d %d %d\n", x, x1, x2, x3, x4); + + printf("1=%d\n", count_bits_set(0x00000001)); + printf("2=%d\n", count_bits_set(0x00000003)); + printf("3=%d\n", count_bits_set(0x00000007)); + printf("4=%d\n", count_bits_set(0x0000000f)); + printf("4=%d\n", count_bits_set(0xf0000000)); + + return 0; +} +#endif + + diff --git a/src/mairix/dfasyn/tokens.c b/src/mairix/dfasyn/tokens.c @@ -0,0 +1,85 @@ +/*************************************** + Handle token-related stuff + ***************************************/ + +/* + ********************************************************************** + * Copyright (C) Richard P. Curnow 2000-2003,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "dfasyn.h" + +char **toktable=NULL; +int ntokens = 0; +static int maxtokens = 0; +/* ================================================================= */ +static void grow_tokens(void)/*{{{*/ +{ + maxtokens += 32; + toktable = resize_array(char *, toktable, maxtokens); +} +/*}}}*/ +static int create_token(char *name)/*{{{*/ +{ + int result; + if (ntokens == maxtokens) { + grow_tokens(); + } + result = ntokens++; + toktable[result] = new_string(name); + return result; +} +/*}}}*/ +int lookup_token(char *name, int create)/*{{{*/ +{ + int found = -1; + int i; + for (i=0; i<ntokens; i++) { + if (!strcmp(toktable[i], name)) { + found = i; + break; + } + } + + switch (create) { + case USE_OLD_MUST_EXIST: + if (found < 0) { + fprintf(stderr, "Token '%s' was never declared\n", name); + exit(1); + } + break; + case CREATE_MUST_NOT_EXIST: + if (found >= 0) { + fprintf(stderr, "Token '%s' already declared\n", name); + exit(1); + } else { + found = create_token(name); + } + break; + case CREATE_OR_USE_OLD: + if (found < 0) { + found = create_token(name); + } + break; + } + + return found; +} +/*}}}*/ + + diff --git a/src/mairix/dirscan.c b/src/mairix/dirscan.c @@ -0,0 +1,420 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006,2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +/* Traverse a directory tree and find maildirs, then list files in them. */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <ctype.h> +#include <unistd.h> +#include <dirent.h> +#include <assert.h> +#include "mairix.h" + +struct msgpath_array *new_msgpath_array(void)/*{{{*/ +{ + struct msgpath_array *result; + result = new(struct msgpath_array); + result->paths = NULL; + result->type = NULL; + result->n = 0; + result->max = 0; + return result; +} +/*}}}*/ +void free_msgpath_array(struct msgpath_array *x)/*{{{*/ +{ + int i; + if (x->paths) { + for (i=0; i<x->n; i++) { + switch (x->type[i]) { + case MTY_FILE: + free(x->paths[i].src.mpf.path); + break; + case MTY_MBOX: + break; + case MTY_DEAD: + break; + } + } + free(x->type); + free(x->paths); + } + free(x); +} +/*}}}*/ +static void add_file_to_list(char *x, struct msgpath_array *arr) {/*{{{*/ + char *y = new_string(x); + if (arr->n == arr->max) { + arr->max += 1024; + arr->paths = grow_array(struct msgpath, arr->max, arr->paths); + arr->type = grow_array(enum message_type, arr->max, arr->type); + } + arr->type[arr->n] = MTY_FILE; + arr->paths[arr->n].src.mpf.path = y; + ++arr->n; + return; +} +/*}}}*/ +static void get_maildir_message_paths(char *folder, struct msgpath_array *arr)/*{{{*/ +{ + char *subdir, *fname; + int i; + static char *subdirs[] = {"new", "cur"}; + DIR *d; + struct dirent *de; + int folder_len = strlen(folder); + + /* FIXME : just store mdir-rooted paths in array and have common prefix elsewhere. */ + + subdir = new_array(char, folder_len + 6); + fname = new_array(char, folder_len + 8 + NAME_MAX); + for (i=0; i<2; i++) { + strcpy(subdir, folder); + strcat(subdir, "/"); + strcat(subdir, subdirs[i]); + d = opendir(subdir); + if (d) { + while ((de = readdir(d))) { + /* TODO : Perhaps we ought to do some validation on the path here? + i.e. check that the filename looks valid for a maildir message. */ + if (!strcmp(de->d_name, ".") || + !strcmp(de->d_name, "..")) { + continue; + } + strcpy(fname, subdir); + strcat(fname, "/"); + strcat(fname, de->d_name); + add_file_to_list(fname, arr); + } + closedir(d); + } + } + free(subdir); + free(fname); + return; +} +/*}}}*/ +int valid_mh_filename_p(const char *x)/*{{{*/ +{ + const char *p; + + if (!*x) return 0; /* Must not be empty */ + p = x; + while (*p) { + if (!isdigit(*p)) { + /* Handle MH folders generated by Evolution, which have '.' on the ends + * of the numerical filenames for the messages. */ + if ((p[0] == '.') && (p[1] == 0)) return 1; + else return 0; + } + p++; + } + return 1; +} +/*}}}*/ +static void get_mh_message_paths(char *folder, struct msgpath_array *arr)/*{{{*/ +{ + char *fname; + DIR *d; + struct dirent *de; + int folder_len = strlen(folder); + + fname = new_array(char, folder_len + 8 + NAME_MAX); + d = opendir(folder); + if (d) { + while ((de = readdir(d))) { + if (!strcmp(de->d_name, ".") || + !strcmp(de->d_name, "..")) { + continue; + } + strcpy(fname, folder); + strcat(fname, "/"); + strcat(fname, de->d_name); + if (valid_mh_filename_p(de->d_name)) { + add_file_to_list(fname, arr); + } + } + closedir(d); + } + free(fname); + return; +} +/*}}}*/ +static int child_stat(const char *base, const char *child, struct stat *sb)/*{{{*/ +{ + int result = 0; + char *scratch; + int len; + + len = strlen(base) + strlen(child) + 2; + scratch = new_array(char, len); + + strcpy(scratch, base); + strcat(scratch, "/"); + strcat(scratch, child); + + result = stat(scratch, sb); + free(scratch); + return result; +} +/*}}}*/ +static int has_child_file(const char *base, const char *child)/*{{{*/ +{ + int result = 0; + int status; + struct stat sb; + + status = child_stat(base, child, &sb); + if ((status >= 0) && S_ISREG(sb.st_mode)) { + result = 1; + } + + return result; +} +/*}}}*/ +static int has_child_dir(const char *base, const char *child)/*{{{*/ +{ + int result = 0; + int status; + struct stat sb; + + status = child_stat(base, child, &sb); + if ((status >= 0) && S_ISDIR(sb.st_mode)) { + result = 1; + } + + return result; +} +/*}}}*/ +static enum traverse_check scrutinize_maildir_entry(int parent_is_maildir, const char *de_name)/*{{{*/ +{ + if (parent_is_maildir) { + /* Process any subdirectory that's not part of this maildir itself. */ + if (!strcmp(de_name, "new") || + !strcmp(de_name, "cur") || + !strcmp(de_name, "tmp")) { + return TRAV_IGNORE; + } else { + return TRAV_PROCESS; + } + } else { + return TRAV_PROCESS; + } +} +/*}}}*/ +static int filter_is_maildir(const char *path, const struct stat *sb)/*{{{*/ +{ + if (S_ISDIR(sb->st_mode)) { + if (has_child_dir(path, "new") && + has_child_dir(path, "tmp") && + has_child_dir(path, "cur")) { + return 1; + } + } + return 0; +} +/*}}}*/ +struct traverse_methods maildir_traverse_methods = {/*{{{*/ + .filter = filter_is_maildir, + .scrutinize = scrutinize_maildir_entry +}; +/*}}}*/ +static enum traverse_check scrutinize_mh_entry(int parent_is_mh, const char *de_name)/*{{{*/ +{ + /* Have to allow sub-folders within a folder until we think of a better + * solution. */ + if (valid_mh_filename_p(de_name)) { + return TRAV_IGNORE; + } else { + return TRAV_PROCESS; + } +} +/*}}}*/ +static int filter_is_mh(const char *path, const struct stat *sb)/*{{{*/ +{ + int result = 0; + if (S_ISDIR(sb->st_mode)) { + /* TODO : find a way of making this more scalable? e.g. if a folder of a + * particular subtype is found once, try that subtype first later, since + * the user presumably uses a consistent MH-subtype (i.e. a single MUA). */ + if (has_child_file(path, ".xmhcache") || + has_child_file(path, ".mh_sequences") || + /* Sylpheed */ + has_child_file(path, ".sylpheed_cache") || + has_child_file(path, ".sylpheed_mark") || + /* claws-mail */ + has_child_file(path, ".claws_cache") || + has_child_file(path, ".claws_mark") || + /* NNML (Gnus) */ + has_child_file(path, ".marks") || + has_child_file(path, ".overview") || + /* Evolution */ + has_child_file(path, "cmeta") || + has_child_file(path, "summary") || + /* Mew */ + has_child_file(path, ".mew-summary") || + /* ezmlm/archive */ + has_child_file(path, "index") + ) { + result = 1; + } + } + return result; +} +/*}}}*/ +struct traverse_methods mh_traverse_methods = {/*{{{*/ + .filter = filter_is_mh, + .scrutinize = scrutinize_mh_entry +}; +/*}}}*/ +#if 0 +static void scan_directory(char *folder_base, char *this_folder, enum folder_type ft, struct msgpath_array *arr)/*{{{*/ +{ + DIR *d; + struct dirent *de; + struct stat sb; + char *fname, *sname; + char *name; + int folder_base_len = strlen(folder_base); + int this_folder_len = strlen(this_folder); + + name = new_array(char, folder_base_len + this_folder_len + 2); + strcpy(name, folder_base); + strcat(name, "/"); + strcat(name, this_folder); + + switch (ft) { + case FT_MAILDIR: + if (looks_like_maildir(folder_base, this_folder)) { + get_maildir_message_paths(folder_base, this_folder, arr); + } + break; + case FT_MH: + get_mh_message_paths(folder_base, this_folder, arr); + break; + default: + break; + } + + fname = new_array(char, strlen(name) + 2 + NAME_MAX); + sname = new_array(char, this_folder_len + 2 + NAME_MAX); + + d = opendir(name); + if (d) { + while ((de = readdir(d))) { + if (!strcmp(de->d_name, ".") || + !strcmp(de->d_name, "..")) { + continue; + } + + strcpy(fname, name); + strcat(fname, "/"); + strcat(fname, de->d_name); + + strcpy(sname, this_folder); + strcat(sname, "/"); + strcat(sname, de->d_name); + + if (stat(fname, &sb) >= 0) { + if (S_ISDIR(sb.st_mode)) { + scan_directory(folder_base, sname, ft, arr); + } + } + } + closedir(d); + } + + free(fname); + free(sname); + free(name); + return; +} +/*}}}*/ +#endif +static int message_compare(const void *a, const void *b)/*{{{*/ +{ + /* FIXME : Is this a sensible way to do this with mbox messages in the picture? */ + struct msgpath *aa = (struct msgpath *) a; + struct msgpath *bb = (struct msgpath *) b; + /* This should only get called on 'file' type messages - TBC! */ + return strcmp(aa->src.mpf.path, bb->src.mpf.path); +} +/*}}}*/ +static void sort_message_list(struct msgpath_array *arr)/*{{{*/ +{ + qsort(arr->paths, arr->n, sizeof(struct msgpath), message_compare); +} +/*}}}*/ +/*{{{ void build_message_list */ +void build_message_list(char *folder_base, char *folders, enum folder_type ft, + struct msgpath_array *msgs, + struct globber_array *omit_globs) +{ + char **raw_paths, **paths; + int n_raw_paths, n_paths, i; + + split_on_colons(folders, &n_raw_paths, &raw_paths); + switch (ft) { + case FT_MAILDIR: + glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &maildir_traverse_methods, omit_globs); + for (i=0; i<n_paths; i++) { + get_maildir_message_paths(paths[i], msgs); + } + break; + case FT_MH: + glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &mh_traverse_methods, omit_globs); + for (i=0; i<n_paths; i++) { + get_mh_message_paths(paths[i], msgs); + } + break; + default: + assert(0); + break; + } + + if (paths) free(paths); + + sort_message_list(msgs); + return; +} +/*}}}*/ + +#ifdef TEST +int main (int argc, char **argv) +{ + int i; + struct msgpath_array *arr; + + arr = build_message_list("."); + + for (i=0; i<arr->n; i++) { + printf("%08lx %s\n", arr->paths[i].mtime, arr->paths[i].path); + } + + free_msgpath_array(arr); + + return 0; +} +#endif + + diff --git a/src/mairix/dotlock.c b/src/mairix/dotlock.c @@ -0,0 +1,116 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2005 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "mairix.h" +#include <sys/utsname.h> +#include <sys/types.h> +#include <pwd.h> +#include <unistd.h> + +static char *lock_file_name = NULL; + +/* This locking code was originally written for tdl */ + +void lock_database(char *path, int forced_unlock)/*{{{*/ +{ + struct utsname uu; + struct passwd *pw; + int pid; + int len; + char *tname; + struct stat sb; + FILE *out; + + if (uname(&uu) < 0) { + perror("uname"); + exit(1); + } + pw = getpwuid(getuid()); + if (!pw) { + perror("getpwuid"); + exit(1); + } + pid = getpid(); + len = 1 + strlen(path) + 5; + lock_file_name = new_array(char, len); + sprintf(lock_file_name, "%s.lock", path); + + if (forced_unlock) { + unlock_database(); + forced_unlock = 0; + } + + len += strlen(uu.nodename); + /* add on max width of pid field (allow up to 32 bit pid_t) + 2 '.' chars */ + len += (10 + 2); + tname = new_array(char, len); + sprintf(tname, "%s.%d.%s", lock_file_name, pid, uu.nodename); + out = fopen(tname, "w"); + if (!out) { + fprintf(stderr, "Cannot open lock file %s for writing\n", tname); + exit(1); + } + fprintf(out, "%d,%s,%s\n", pid, uu.nodename, pw->pw_name); + fclose(out); + + if (link(tname, lock_file_name) < 0) { + /* check if link count==2 */ + if (stat(tname, &sb) < 0) { + fprintf(stderr, "Could not stat the lock file\n"); + unlink(tname); + exit(1); + } else { + if (sb.st_nlink != 2) { + FILE *in; + in = fopen(lock_file_name, "r"); + if (in) { + char line[2048]; + fgets(line, sizeof(line), in); + line[strlen(line)-1] = 0; /* strip trailing newline */ + fprintf(stderr, "Database %s appears to be locked by (pid,node,user)=(%s)\n", path, line); + unlink(tname); + exit(1); + } + } else { + /* lock succeeded apparently */ + } + } + } else { + /* lock succeeded apparently */ + } + unlink(tname); + free(tname); + return; +} +/*}}}*/ +void unlock_database(void)/*{{{*/ +{ + if (lock_file_name) unlink(lock_file_name); + return; +} +/*}}}*/ +void unlock_and_exit(int code)/*{{{*/ +{ + unlock_database(); + exit(code); +} +/*}}}*/ diff --git a/src/mairix/dotmairixrc.eg b/src/mairix/dotmairixrc.eg @@ -0,0 +1,41 @@ +####################################################################### +# +# Example ~/.mairixrc file +# +# Any line starting with # is a comment. +# +####################################################################### +# Set this to the directory where your maildir folders live +base=/home/richard/mail + +####################################################################### +# You need to define at least one of maildir, mh and mbox. You probably don't +# need to define all three! You can use >1 line for any of these. + +# Set this to a list of maildir folders within 'base'. 3 dots at the end means +# there are sub-folders within this folder. +maildir=inbox:archive... +maildir=lists... + +# Set this to a list of MH folders within 'base'. 3 dots at the end means +# there are sub-folders within this folder. +mh=mh_archive... + +# Set this to a list of mbox folders within 'base'. +mbox=mboxen/folder1:mboxen/folder2:mboxen/foobar + +####################################################################### +# Set this to the folder within 'base' where you want the search mode +# to write its output. +mfolder=mfolder + +# Set this if you want the format of mfolder to be mh or mbox (the default is +# maildir). +# +# mformat=mh +# mformat=mbox + +####################################################################### +# Set this to the path where the index database file will be kept +database=/home/richard/mail/mairix_database + diff --git a/src/mairix/dumper.c b/src/mairix/dumper.c @@ -0,0 +1,151 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2004, 2005 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +/* Database dumper */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <assert.h> +#include <sys/mman.h> + +#include "mairix.h" +#include "reader.h" +#include "memmac.h" + +static void dump_token_chain(struct read_db *db, unsigned int n, unsigned int *tok_offsets, unsigned int *enc_offsets) +{ + int i, j, incr; + int on_line; + unsigned char *foo; + printf("%d entries\n", n); + for (i=0; i<n; i++) { + printf("Word %d : <%s>\n", i, db->data + tok_offsets[i]); + foo = (unsigned char *) db->data + enc_offsets[i]; + j = 0; + on_line = 0; + printf(" "); + while (*foo != 0xff) { + if (on_line > 15) { + printf("\n"); + on_line = 0; + } + incr = read_increment(&foo); + j += incr; + printf("%d ", j); + on_line++; + } + printf("\n"); + } +} + +static void dump_toktable(struct read_db *db, struct toktable_db *tbl, const char *title) +{ + printf("Contents of <%s> table\n", title); + dump_token_chain( db, tbl->n, tbl->tok_offsets, tbl->enc_offsets); +} + +static void dump_toktable2(struct read_db *db, struct toktable2_db *tbl, const char *title) +{ + unsigned int n; + n = tbl->n; + printf("Contents of <%s> table\n", title); + printf("Chain 0\n"); + dump_token_chain( db, n, tbl->tok_offsets, tbl->enc0_offsets); + printf("Chain 1\n"); + dump_token_chain( db, n, tbl->tok_offsets, tbl->enc1_offsets); +} + +void dump_database(char *filename) +{ + struct read_db *db; + int i; + + db = open_db(filename); + + printf("Dump of %s\n", filename); + printf("%d messages\n", db->n_msgs); + for (i=0; i<db->n_msgs; i++) { + printf("%6d: ", i); + switch (rd_msg_type(db, i)) { + case DB_MSG_DEAD: + printf("DEAD"); + break; + case DB_MSG_FILE: + printf("FILE %s, size=%d, tid=%d", + db->data + db->path_offsets[i], db->size_table[i], db->tid_table[i]); + break; + case DB_MSG_MBOX: + { + unsigned int mbix, msgix; + decode_mbox_indices(db->path_offsets[i], &mbix, &msgix); + + printf("MBOX %d, msg %d, offset=%d, size=%d, tid=%d", + mbix, msgix, db->mtime_table[i], db->size_table[i], db->tid_table[i]); + } + break; + } + if (db->msg_type_and_flags[i] & FLAG_SEEN) printf(" seen"); + if (db->msg_type_and_flags[i] & FLAG_REPLIED) printf(" replied"); + if (db->msg_type_and_flags[i] & FLAG_FLAGGED) printf(" flagged"); + printf("\n"); + } + printf("\n"); + if (db->n_mboxen > 0) { + printf("\nMBOX INFORMATION\n"); + printf("%d mboxen\n", db->n_mboxen); + for (i=0; i<db->n_mboxen; i++) { + if (db->mbox_paths_table[i]) { + printf("%4d: %d msgs in %s\n", i, db->mbox_entries_table[i], db->data + db->mbox_paths_table[i]); + } else { + printf("%4d: dead\n", i); + } + } + printf("\n"); + } + + printf("Hash key %08x\n\n", db->hash_key); + printf("--------------------------------\n"); + dump_toktable(db, &db->to, "To"); + printf("--------------------------------\n"); + dump_toktable(db, &db->cc, "Cc"); + printf("--------------------------------\n"); + dump_toktable(db, &db->from, "From"); + printf("--------------------------------\n"); + dump_toktable(db, &db->subject, "Subject"); + printf("--------------------------------\n"); + dump_toktable(db, &db->body, "Body"); + printf("--------------------------------\n"); + dump_toktable(db, &db->attachment_name, "Attachment names"); + printf("--------------------------------\n"); + dump_toktable2(db, &db->msg_ids, "Message Ids"); + printf("--------------------------------\n"); + + close_db(db); + return; +} + diff --git a/src/mairix/expandstr.c b/src/mairix/expandstr.c @@ -0,0 +1,196 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2004 + * Copyright (C) Andreas Amann 2010 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "mairix.h" +#include <stdlib.h> +#include <sys/types.h> +#include <pwd.h> +#include <ctype.h> +#include <unistd.h> + +static int isenv(unsigned char x)/*{{{*/ +{ + /* Return true if x is valid as part of an environment variable name. */ + if (isalnum(x)) + return 1; + else if (x == '_') + return 1; + else + return 0; +} +/*}}}*/ +static int home_dir_len(void)/*{{{*/ +{ + struct passwd *foo; + char *lookup; + lookup = getenv("HOME"); + if (lookup) { + return strlen(lookup); + } + foo = getpwuid(getuid()); + return strlen(foo->pw_dir); +} +/*}}}*/ +static char *env_lookup(const char *p, const char *q)/*{{{*/ +{ + char *var; + char *lookup, *result; + char *s; + var = new_array(char, (q-p)+1); + for (s=var; p<q; p++, s++) { + *s = *p; + } + *s = 0; + lookup = getenv(var); + if (lookup) { + result = new_string(lookup); + } else { + result = NULL; + } + free(var); + return result; +} +/*}}}*/ +static int env_lookup_len(const char *p, const char *q) {/*{{{*/ + char *foo; + int len; + foo = env_lookup(p, q); + if (!foo) len = 0; + else { + len = strlen(foo); + free(foo); + } + return len; +} +/*}}}*/ +static int compute_length(const char *p)/*{{{*/ +{ + const char *q; + int first; + int len; + first = 1; + len = 0; + while (*p) { + if (first && (*p == '~') && (p[1] == '/')) { + /* Make no attempt to expand ~other_user form */ + len += home_dir_len(); + p++; + } else if ((*p == '$') && (p[1] == '{')) { + p += 2; + q = p; + while (*q && (*q != '}')) q++; + len += env_lookup_len(p, q); + p = *q ? (q + 1) : q; + } else if (*p == '$') { + p++; + q = p; + while (*q && isenv(*(unsigned char*)q)) q++; + len += env_lookup_len(p, q); + p = q; + } else { + len++; + p++; + } + first = 0; + } + return len; +} +/*}}}*/ +static char *append_home_dir(char *to)/*{{{*/ +{ + struct passwd *foo; + int len; + char *lookup; + lookup = getenv("HOME"); + if (lookup) { + len = strlen(lookup); + strcpy(to, lookup); + } else { + foo = getpwuid(getuid()); + len = strlen(foo->pw_dir); + strcpy(to, foo->pw_dir); + } + return to + len; +} +/*}}}*/ +static char *append_env(char *to, const char *p, const char *q)/*{{{*/ +{ + char *foo; + int len; + foo = env_lookup(p, q); + if (foo) { + len = strlen(foo); + strcpy(to, foo); + free(foo); + } else { + len = 0; + } + return (to + len); +} +/*}}}*/ +static void do_expand(const char *p, char *result)/*{{{*/ +{ + const char *q; + int first; + first = 1; + while (*p) { + if (first && (*p == '~') && (p[1] == '/')) { + result = append_home_dir(result); + p++; + } else if ((*p == '$') && (p[1] == '{')) { + p += 2; + q = p; + while (*q && (*q != '}')) q++; + result = append_env(result, p, q); + p = *q ? (q + 1) : q; + } else if (*p == '$') { + p++; + q = p; + while (*q && isenv(*(unsigned char*)q)) q++; + result = append_env(result, p, q); + p = q; + } else { + *result++ = *p++; + } + first = 0; + } + *result = 0; +} +/*}}}*/ +char *expand_string(const char *p)/*{{{*/ +{ + /* Return a copy of p, but with + + ~ expanded to the user's home directory + $env expanded to the value of that environment variable + */ + + int len; + char *result; + + len = compute_length(p); + result = new_array(char, len+1); + do_expand(p, result); + return result; +} +/*}}}*/ diff --git a/src/mairix/from.h b/src/mairix/from.h @@ -0,0 +1,32 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002-2004,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#ifndef _FROM_H +#define _FROM_H + +enum fromcheck_result { + FROMCHECK_PASS, + FROMCHECK_FAIL +}; + +#endif + diff --git a/src/mairix/fromcheck.nfa b/src/mairix/fromcheck.nfa @@ -0,0 +1,218 @@ +######################################################################### +# +# mairix - message index builder and finder for maildir folders. +# +# Copyright (C) Richard P. Curnow 2002-2004,2006 +# Copyright (C) Jonathan Kamens 2010 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# ======================================================================= + +%{ +#include "from.h" +%} + + +# Define tokens +# CR : \n +# DIGIT : [0-9] +# AT : @ +# COLON : : +# WHITE : ' ', \t +# LOWER : [a-z] +# UPPER : [A-Z] +# PLUSMINUS : [+-] +# OTHER_EMAIL : other stuff valid in the LHS of an address +# DOMAIN : stuff valid in the RHS of an address + +Abbrev LF = [\n] +Abbrev CR = [\r] +Abbrev DIGIT = [0-9] +Abbrev PERIOD = [.] +Abbrev AT = [@] +Abbrev LOWER = [a-z] +Abbrev UPPER = [A-Z] +Abbrev COLON = [:] +Abbrev WHITE = [ \t] +Abbrev PLUSMINUS = [+\-] +# Explained clearly at +# http://en.wikipedia.org/wiki/E-mail_address#RFC_specification +Abbrev OTHER_EMAIL = [.!#$%&'*/=?^_`{|}~] +Abbrev LT = [<] +Abbrev GT = [>] +Abbrev EMAIL = LOWER | UPPER | DIGIT | PLUSMINUS | OTHER_EMAIL +Abbrev OTHER_DOMAIN = [\-_.] +Abbrev DOMAIN = LOWER | UPPER | DIGIT | OTHER_DOMAIN +Abbrev DQUOTE = ["] +Abbrev OTHER_QUOTED = [@:<>] +Abbrev LEFTSQUARE = [[] +Abbrev RIGHTSQUARE = [\]] + +BLOCK email { + STATE in + EMAIL -> in, before_at + DQUOTE -> quoted_before_at + AT -> domain_route + + STATE domain_route + DOMAIN -> domain_route + COLON -> in + + STATE quoted_before_at + EMAIL | WHITE | OTHER_QUOTED -> quoted_before_at + DQUOTE -> before_at + + STATE before_at + EMAIL -> before_at + DQUOTE -> quoted_before_at + # Local part only : >=1 characters will suffice, which we've already + # matched. + -> out + AT -> start_of_domain + + STATE start_of_domain + LEFTSQUARE -> dotted_quad + DOMAIN -> after_at + + STATE dotted_quad + DIGIT | PERIOD -> dotted_quad + RIGHTSQUARE -> out + + STATE after_at + DOMAIN -> after_at, out + +} + +BLOCK angled_email { + STATE in + LT -> in_angles + + STATE in_angles + <email:in->out> -> before_gt + + STATE before_gt + GT -> out +} + +BLOCK zone { + # Make this pretty lenient + STATE in + UPPER -> zone2 + UPPER -> out + PLUSMINUS -> zone2 + + STATE zone2 + UPPER | LOWER -> zone2, out + DIGIT -> zone2, out +} + +BLOCK date { + STATE in + WHITE -> in, before_weekday + + STATE before_weekday + UPPER ; LOWER ; LOWER ; WHITE -> after_weekday + + STATE after_weekday + WHITE -> after_weekday + UPPER ; LOWER ; LOWER ; WHITE -> after_month + + STATE after_month + WHITE -> after_month + DIGIT ; WHITE -> after_day + DIGIT ; DIGIT ; WHITE -> after_day + + STATE after_day + WHITE -> after_day + # Accept HH:MM:SS + DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time + # Accept HH:MM + DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time + + # Allow either 1 or 2 words of timezone + STATE after_time + WHITE -> after_time + -> after_timezone + <zone:in->out> ; WHITE -> after_timezone + <zone:in->out> ; WHITE -> after_timezone_1 + + # It appears that Pine puts the timezone after the year + DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year_before_zone + + STATE after_year_before_zone + WHITE -> after_year_before_zone + <zone:in->out> -> after_timezone_after_year + <zone:in->out> ; WHITE -> after_timezone_after_year_1 + + STATE after_timezone_after_year_1 + WHITE -> after_timezone_after_year_1 + <zone:in->out> -> after_timezone_after_year + + STATE after_timezone_after_year + WHITE -> after_timezone_after_year + -> out + + STATE after_timezone_1 + WHITE -> after_timezone_1 + <zone:in->out> ; WHITE -> after_timezone + + STATE after_timezone + WHITE -> after_timezone + DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year + + STATE after_year + WHITE -> after_year + -> out + +} + +# Assume the earlier code has identified the '\nFrom ' sequence, +# and the validator starts scanning from the character beyond the space + +BLOCK main { + + STATE in + # Real return address. + WHITE -> in + <email:in->out> -> before_date + <angled_email:in->out> -> before_date + + # Cope with Mozilla mbox folder format which just uses a '-' as + # the return address field. + PLUSMINUS -> before_date + + # Empty return address + -> before_date + + STATE before_date + <date:in->out> ; LF = FROMCHECK_PASS + + # Cope with mozilla mbox format + <date:in->out> ; CR ; LF = FROMCHECK_PASS + + # Mention this state last : the last mentioned state in the last defined + # block becomes the entry state of the scanner. + + STATE in + +} + +ATTR FROMCHECK_PASS +ATTR FROMCHECK_FAIL +DEFATTR FROMCHECK_FAIL +PREFIX fromcheck +TYPE "enum fromcheck_result" + +# vim:ft=txt:et:sw=4:sts=4:ht=4 diff --git a/src/mairix/fromcheck.report b/src/mairix/fromcheck.report @@ -0,0 +1,3222 @@ +NFA state 0 = in + [(epsilon)] -> before_date + 5:[+] -> before_date + 6:[\055] -> before_date + [(epsilon)] -> angled_email#2.in + [(epsilon)] -> email#1.in + 0:[\t ] -> in + Epsilon closure : + (self) + email#1.in + angled_email#2.in + before_date + date#3.in + date#4.in + +NFA state 1 = email#1.in + 3:[!#-'*/=?^`{-~] -> email#1.in + 16:[_] -> email#1.in + 7:[.] -> email#1.in + 5:[+] -> email#1.in + 6:[\055] -> email#1.in + 8:[0-9] -> email#1.in + 13:[A-Z] -> email#1.in + 17:[a-z] -> email#1.in + 3:[!#-'*/=?^`{-~] -> email#1.before_at + 16:[_] -> email#1.before_at + 7:[.] -> email#1.before_at + 5:[+] -> email#1.before_at + 6:[\055] -> email#1.before_at + 8:[0-9] -> email#1.before_at + 13:[A-Z] -> email#1.before_at + 17:[a-z] -> email#1.before_at + 4:["] -> email#1.quoted_before_at + 12:[@] -> email#1.domain_route + Epsilon closure : + (self) + +NFA state 2 = email#1.domain_route + 6:[\055] -> email#1.domain_route + 16:[_] -> email#1.domain_route + 7:[.] -> email#1.domain_route + 8:[0-9] -> email#1.domain_route + 13:[A-Z] -> email#1.domain_route + 17:[a-z] -> email#1.domain_route + 9:[:] -> email#1.in + Epsilon closure : + (self) + +NFA state 3 = email#1.quoted_before_at + 9:[:] -> email#1.quoted_before_at + 12:[@] -> email#1.quoted_before_at + 11:[>] -> email#1.quoted_before_at + 10:[<] -> email#1.quoted_before_at + 0:[\t ] -> email#1.quoted_before_at + 3:[!#-'*/=?^`{-~] -> email#1.quoted_before_at + 16:[_] -> email#1.quoted_before_at + 7:[.] -> email#1.quoted_before_at + 5:[+] -> email#1.quoted_before_at + 6:[\055] -> email#1.quoted_before_at + 8:[0-9] -> email#1.quoted_before_at + 13:[A-Z] -> email#1.quoted_before_at + 17:[a-z] -> email#1.quoted_before_at + 4:["] -> email#1.before_at + Epsilon closure : + (self) + +NFA state 4 = email#1.before_at + 3:[!#-'*/=?^`{-~] -> email#1.before_at + 16:[_] -> email#1.before_at + 7:[.] -> email#1.before_at + 5:[+] -> email#1.before_at + 6:[\055] -> email#1.before_at + 8:[0-9] -> email#1.before_at + 13:[A-Z] -> email#1.before_at + 17:[a-z] -> email#1.before_at + 4:["] -> email#1.quoted_before_at + [(epsilon)] -> email#1.out + 12:[@] -> email#1.start_of_domain + Epsilon closure : + (self) + email#1.out + before_date + date#3.in + date#4.in + +NFA state 5 = email#1.start_of_domain + 14:[[] -> email#1.dotted_quad + 6:[\055] -> email#1.after_at + 16:[_] -> email#1.after_at + 7:[.] -> email#1.after_at + 8:[0-9] -> email#1.after_at + 13:[A-Z] -> email#1.after_at + 17:[a-z] -> email#1.after_at + Epsilon closure : + (self) + +NFA state 6 = email#1.dotted_quad + 7:[.] -> email#1.dotted_quad + 8:[0-9] -> email#1.dotted_quad + 15:[]] -> email#1.out + Epsilon closure : + (self) + +NFA state 7 = email#1.after_at + 6:[\055] -> email#1.after_at + 16:[_] -> email#1.after_at + 7:[.] -> email#1.after_at + 8:[0-9] -> email#1.after_at + 13:[A-Z] -> email#1.after_at + 17:[a-z] -> email#1.after_at + 6:[\055] -> email#1.out + 16:[_] -> email#1.out + 7:[.] -> email#1.out + 8:[0-9] -> email#1.out + 13:[A-Z] -> email#1.out + 17:[a-z] -> email#1.out + Epsilon closure : + (self) + +NFA state 8 = email#1.out + [(epsilon)] -> before_date + Epsilon closure : + (self) + before_date + date#3.in + date#4.in + +NFA state 9 = angled_email#2.in + 10:[<] -> angled_email#2.in_angles + Epsilon closure : + (self) + +NFA state 10 = angled_email#2.in_angles + [(epsilon)] -> angled_email#2.email#1.in + Epsilon closure : + (self) + angled_email#2.email#1.in + +NFA state 11 = angled_email#2.email#1.in + 12:[@] -> angled_email#2.email#1.domain_route + 4:["] -> angled_email#2.email#1.quoted_before_at + 17:[a-z] -> angled_email#2.email#1.before_at + 13:[A-Z] -> angled_email#2.email#1.before_at + 8:[0-9] -> angled_email#2.email#1.before_at + 5:[+] -> angled_email#2.email#1.before_at + 6:[\055] -> angled_email#2.email#1.before_at + 3:[!#-'*/=?^`{-~] -> angled_email#2.email#1.before_at + 16:[_] -> angled_email#2.email#1.before_at + 7:[.] -> angled_email#2.email#1.before_at + 17:[a-z] -> angled_email#2.email#1.in + 13:[A-Z] -> angled_email#2.email#1.in + 8:[0-9] -> angled_email#2.email#1.in + 5:[+] -> angled_email#2.email#1.in + 6:[\055] -> angled_email#2.email#1.in + 3:[!#-'*/=?^`{-~] -> angled_email#2.email#1.in + 16:[_] -> angled_email#2.email#1.in + 7:[.] -> angled_email#2.email#1.in + Epsilon closure : + (self) + +NFA state 12 = angled_email#2.email#1.domain_route + 9:[:] -> angled_email#2.email#1.in + 17:[a-z] -> angled_email#2.email#1.domain_route + 13:[A-Z] -> angled_email#2.email#1.domain_route + 8:[0-9] -> angled_email#2.email#1.domain_route + 6:[\055] -> angled_email#2.email#1.domain_route + 16:[_] -> angled_email#2.email#1.domain_route + 7:[.] -> angled_email#2.email#1.domain_route + Epsilon closure : + (self) + +NFA state 13 = angled_email#2.email#1.quoted_before_at + 4:["] -> angled_email#2.email#1.before_at + 17:[a-z] -> angled_email#2.email#1.quoted_before_at + 13:[A-Z] -> angled_email#2.email#1.quoted_before_at + 8:[0-9] -> angled_email#2.email#1.quoted_before_at + 5:[+] -> angled_email#2.email#1.quoted_before_at + 6:[\055] -> angled_email#2.email#1.quoted_before_at + 3:[!#-'*/=?^`{-~] -> angled_email#2.email#1.quoted_before_at + 16:[_] -> angled_email#2.email#1.quoted_before_at + 7:[.] -> angled_email#2.email#1.quoted_before_at + 0:[\t ] -> angled_email#2.email#1.quoted_before_at + 9:[:] -> angled_email#2.email#1.quoted_before_at + 12:[@] -> angled_email#2.email#1.quoted_before_at + 11:[>] -> angled_email#2.email#1.quoted_before_at + 10:[<] -> angled_email#2.email#1.quoted_before_at + Epsilon closure : + (self) + +NFA state 14 = angled_email#2.email#1.before_at + 12:[@] -> angled_email#2.email#1.start_of_domain + [(epsilon)] -> angled_email#2.email#1.out + 4:["] -> angled_email#2.email#1.quoted_before_at + 17:[a-z] -> angled_email#2.email#1.before_at + 13:[A-Z] -> angled_email#2.email#1.before_at + 8:[0-9] -> angled_email#2.email#1.before_at + 5:[+] -> angled_email#2.email#1.before_at + 6:[\055] -> angled_email#2.email#1.before_at + 3:[!#-'*/=?^`{-~] -> angled_email#2.email#1.before_at + 16:[_] -> angled_email#2.email#1.before_at + 7:[.] -> angled_email#2.email#1.before_at + Epsilon closure : + (self) + angled_email#2.email#1.out + angled_email#2.before_gt + +NFA state 15 = angled_email#2.email#1.start_of_domain + 17:[a-z] -> angled_email#2.email#1.after_at + 13:[A-Z] -> angled_email#2.email#1.after_at + 8:[0-9] -> angled_email#2.email#1.after_at + 6:[\055] -> angled_email#2.email#1.after_at + 16:[_] -> angled_email#2.email#1.after_at + 7:[.] -> angled_email#2.email#1.after_at + 14:[[] -> angled_email#2.email#1.dotted_quad + Epsilon closure : + (self) + +NFA state 16 = angled_email#2.email#1.dotted_quad + 15:[]] -> angled_email#2.email#1.out + 8:[0-9] -> angled_email#2.email#1.dotted_quad + 7:[.] -> angled_email#2.email#1.dotted_quad + Epsilon closure : + (self) + +NFA state 17 = angled_email#2.email#1.after_at + 17:[a-z] -> angled_email#2.email#1.out + 13:[A-Z] -> angled_email#2.email#1.out + 8:[0-9] -> angled_email#2.email#1.out + 6:[\055] -> angled_email#2.email#1.out + 16:[_] -> angled_email#2.email#1.out + 7:[.] -> angled_email#2.email#1.out + 17:[a-z] -> angled_email#2.email#1.after_at + 13:[A-Z] -> angled_email#2.email#1.after_at + 8:[0-9] -> angled_email#2.email#1.after_at + 6:[\055] -> angled_email#2.email#1.after_at + 16:[_] -> angled_email#2.email#1.after_at + 7:[.] -> angled_email#2.email#1.after_at + Epsilon closure : + (self) + +NFA state 18 = angled_email#2.email#1.out + [(epsilon)] -> angled_email#2.before_gt + Epsilon closure : + (self) + angled_email#2.before_gt + +NFA state 19 = angled_email#2.before_gt + 11:[>] -> angled_email#2.out + Epsilon closure : + (self) + +NFA state 20 = angled_email#2.out + [(epsilon)] -> before_date + Epsilon closure : + (self) + before_date + date#3.in + date#4.in + +NFA state 21 = before_date + [(epsilon)] -> date#4.in + [(epsilon)] -> date#3.in + Epsilon closure : + (self) + date#3.in + date#4.in + +NFA state 22 = #1 + 1:[\n] -> #2 + Epsilon closure : + (self) + +NFA state 23 = date#3.in + 0:[\t ] -> date#3.in + 0:[\t ] -> date#3.before_weekday + Epsilon closure : + (self) + +NFA state 24 = date#3.before_weekday + 13:[A-Z] -> date#3.#1 + Epsilon closure : + (self) + +NFA state 25 = date#3.#1 + 17:[a-z] -> date#3.#2 + Epsilon closure : + (self) + +NFA state 26 = date#3.#2 + 17:[a-z] -> date#3.#3 + Epsilon closure : + (self) + +NFA state 27 = date#3.#3 + 0:[\t ] -> date#3.after_weekday + Epsilon closure : + (self) + +NFA state 28 = date#3.after_weekday + 0:[\t ] -> date#3.after_weekday + 13:[A-Z] -> date#3.#4 + Epsilon closure : + (self) + +NFA state 29 = date#3.#4 + 17:[a-z] -> date#3.#5 + Epsilon closure : + (self) + +NFA state 30 = date#3.#5 + 17:[a-z] -> date#3.#6 + Epsilon closure : + (self) + +NFA state 31 = date#3.#6 + 0:[\t ] -> date#3.after_month + Epsilon closure : + (self) + +NFA state 32 = date#3.after_month + 0:[\t ] -> date#3.after_month + 8:[0-9] -> date#3.#7 + 8:[0-9] -> date#3.#8 + Epsilon closure : + (self) + +NFA state 33 = date#3.#7 + 0:[\t ] -> date#3.after_day + Epsilon closure : + (self) + +NFA state 34 = date#3.#8 + 8:[0-9] -> date#3.#9 + Epsilon closure : + (self) + +NFA state 35 = date#3.#9 + 0:[\t ] -> date#3.after_day + Epsilon closure : + (self) + +NFA state 36 = date#3.after_day + 0:[\t ] -> date#3.after_day + 8:[0-9] -> date#3.#10 + 8:[0-9] -> date#3.#18 + Epsilon closure : + (self) + +NFA state 37 = date#3.#10 + 8:[0-9] -> date#3.#11 + Epsilon closure : + (self) + +NFA state 38 = date#3.#11 + 9:[:] -> date#3.#12 + Epsilon closure : + (self) + +NFA state 39 = date#3.#12 + 8:[0-9] -> date#3.#13 + Epsilon closure : + (self) + +NFA state 40 = date#3.#13 + 8:[0-9] -> date#3.#14 + Epsilon closure : + (self) + +NFA state 41 = date#3.#14 + 9:[:] -> date#3.#15 + Epsilon closure : + (self) + +NFA state 42 = date#3.#15 + 8:[0-9] -> date#3.#16 + Epsilon closure : + (self) + +NFA state 43 = date#3.#16 + 8:[0-9] -> date#3.#17 + Epsilon closure : + (self) + +NFA state 44 = date#3.#17 + 0:[\t ] -> date#3.after_time + Epsilon closure : + (self) + +NFA state 45 = date#3.#18 + 8:[0-9] -> date#3.#19 + Epsilon closure : + (self) + +NFA state 46 = date#3.#19 + 9:[:] -> date#3.#20 + Epsilon closure : + (self) + +NFA state 47 = date#3.#20 + 8:[0-9] -> date#3.#21 + Epsilon closure : + (self) + +NFA state 48 = date#3.#21 + 8:[0-9] -> date#3.#22 + Epsilon closure : + (self) + +NFA state 49 = date#3.#22 + 0:[\t ] -> date#3.after_time + Epsilon closure : + (self) + +NFA state 50 = date#3.after_time + 0:[\t ] -> date#3.after_time + [(epsilon)] -> date#3.after_timezone + [(epsilon)] -> date#3.zone#1.in + [(epsilon)] -> date#3.zone#2.in + 8:[0-9] -> date#3.#25 + Epsilon closure : + (self) + date#3.zone#1.in + date#3.zone#2.in + date#3.after_timezone + +NFA state 51 = date#3.#23 + 0:[\t ] -> date#3.after_timezone + Epsilon closure : + (self) + +NFA state 52 = date#3.zone#1.in + 5:[+] -> date#3.zone#1.zone2 + 6:[\055] -> date#3.zone#1.zone2 + 13:[A-Z] -> date#3.zone#1.out + 13:[A-Z] -> date#3.zone#1.zone2 + Epsilon closure : + (self) + +NFA state 53 = date#3.zone#1.zone2 + 8:[0-9] -> date#3.zone#1.out + 8:[0-9] -> date#3.zone#1.zone2 + 13:[A-Z] -> date#3.zone#1.out + 17:[a-z] -> date#3.zone#1.out + 13:[A-Z] -> date#3.zone#1.zone2 + 17:[a-z] -> date#3.zone#1.zone2 + Epsilon closure : + (self) + +NFA state 54 = date#3.zone#1.out + [(epsilon)] -> date#3.#23 + Epsilon closure : + (self) + date#3.#23 + +NFA state 55 = date#3.#24 + 0:[\t ] -> date#3.after_timezone_1 + Epsilon closure : + (self) + +NFA state 56 = date#3.zone#2.in + 5:[+] -> date#3.zone#2.zone2 + 6:[\055] -> date#3.zone#2.zone2 + 13:[A-Z] -> date#3.zone#2.out + 13:[A-Z] -> date#3.zone#2.zone2 + Epsilon closure : + (self) + +NFA state 57 = date#3.zone#2.zone2 + 8:[0-9] -> date#3.zone#2.out + 8:[0-9] -> date#3.zone#2.zone2 + 13:[A-Z] -> date#3.zone#2.out + 17:[a-z] -> date#3.zone#2.out + 13:[A-Z] -> date#3.zone#2.zone2 + 17:[a-z] -> date#3.zone#2.zone2 + Epsilon closure : + (self) + +NFA state 58 = date#3.zone#2.out + [(epsilon)] -> date#3.#24 + Epsilon closure : + (self) + date#3.#24 + +NFA state 59 = date#3.#25 + 8:[0-9] -> date#3.#26 + Epsilon closure : + (self) + +NFA state 60 = date#3.#26 + 8:[0-9] -> date#3.#27 + Epsilon closure : + (self) + +NFA state 61 = date#3.#27 + 8:[0-9] -> date#3.after_year_before_zone + Epsilon closure : + (self) + +NFA state 62 = date#3.after_year_before_zone + 0:[\t ] -> date#3.after_year_before_zone + [(epsilon)] -> date#3.zone#3.in + [(epsilon)] -> date#3.zone#4.in + Epsilon closure : + (self) + date#3.zone#3.in + date#3.zone#4.in + +NFA state 63 = date#3.zone#3.in + 5:[+] -> date#3.zone#3.zone2 + 6:[\055] -> date#3.zone#3.zone2 + 13:[A-Z] -> date#3.zone#3.out + 13:[A-Z] -> date#3.zone#3.zone2 + Epsilon closure : + (self) + +NFA state 64 = date#3.zone#3.zone2 + 8:[0-9] -> date#3.zone#3.out + 8:[0-9] -> date#3.zone#3.zone2 + 13:[A-Z] -> date#3.zone#3.out + 17:[a-z] -> date#3.zone#3.out + 13:[A-Z] -> date#3.zone#3.zone2 + 17:[a-z] -> date#3.zone#3.zone2 + Epsilon closure : + (self) + +NFA state 65 = date#3.zone#3.out + [(epsilon)] -> date#3.after_timezone_after_year + Epsilon closure : + (self) + #1 + date#3.after_timezone_after_year + date#3.out + +NFA state 66 = date#3.#28 + 0:[\t ] -> date#3.after_timezone_after_year_1 + Epsilon closure : + (self) + +NFA state 67 = date#3.zone#4.in + 5:[+] -> date#3.zone#4.zone2 + 6:[\055] -> date#3.zone#4.zone2 + 13:[A-Z] -> date#3.zone#4.out + 13:[A-Z] -> date#3.zone#4.zone2 + Epsilon closure : + (self) + +NFA state 68 = date#3.zone#4.zone2 + 8:[0-9] -> date#3.zone#4.out + 8:[0-9] -> date#3.zone#4.zone2 + 13:[A-Z] -> date#3.zone#4.out + 17:[a-z] -> date#3.zone#4.out + 13:[A-Z] -> date#3.zone#4.zone2 + 17:[a-z] -> date#3.zone#4.zone2 + Epsilon closure : + (self) + +NFA state 69 = date#3.zone#4.out + [(epsilon)] -> date#3.#28 + Epsilon closure : + (self) + date#3.#28 + +NFA state 70 = date#3.after_timezone_after_year_1 + 0:[\t ] -> date#3.after_timezone_after_year_1 + [(epsilon)] -> date#3.zone#5.in + Epsilon closure : + (self) + date#3.zone#5.in + +NFA state 71 = date#3.zone#5.in + 5:[+] -> date#3.zone#5.zone2 + 6:[\055] -> date#3.zone#5.zone2 + 13:[A-Z] -> date#3.zone#5.out + 13:[A-Z] -> date#3.zone#5.zone2 + Epsilon closure : + (self) + +NFA state 72 = date#3.zone#5.zone2 + 8:[0-9] -> date#3.zone#5.out + 8:[0-9] -> date#3.zone#5.zone2 + 13:[A-Z] -> date#3.zone#5.out + 17:[a-z] -> date#3.zone#5.out + 13:[A-Z] -> date#3.zone#5.zone2 + 17:[a-z] -> date#3.zone#5.zone2 + Epsilon closure : + (self) + +NFA state 73 = date#3.zone#5.out + [(epsilon)] -> date#3.after_timezone_after_year + Epsilon closure : + (self) + #1 + date#3.after_timezone_after_year + date#3.out + +NFA state 74 = date#3.after_timezone_after_year + 0:[\t ] -> date#3.after_timezone_after_year + [(epsilon)] -> date#3.out + Epsilon closure : + (self) + #1 + date#3.out + +NFA state 75 = date#3.after_timezone_1 + 0:[\t ] -> date#3.after_timezone_1 + [(epsilon)] -> date#3.zone#6.in + Epsilon closure : + (self) + date#3.zone#6.in + +NFA state 76 = date#3.#29 + 0:[\t ] -> date#3.after_timezone + Epsilon closure : + (self) + +NFA state 77 = date#3.zone#6.in + 5:[+] -> date#3.zone#6.zone2 + 6:[\055] -> date#3.zone#6.zone2 + 13:[A-Z] -> date#3.zone#6.out + 13:[A-Z] -> date#3.zone#6.zone2 + Epsilon closure : + (self) + +NFA state 78 = date#3.zone#6.zone2 + 8:[0-9] -> date#3.zone#6.out + 8:[0-9] -> date#3.zone#6.zone2 + 13:[A-Z] -> date#3.zone#6.out + 17:[a-z] -> date#3.zone#6.out + 13:[A-Z] -> date#3.zone#6.zone2 + 17:[a-z] -> date#3.zone#6.zone2 + Epsilon closure : + (self) + +NFA state 79 = date#3.zone#6.out + [(epsilon)] -> date#3.#29 + Epsilon closure : + (self) + date#3.#29 + +NFA state 80 = date#3.after_timezone + 0:[\t ] -> date#3.after_timezone + 8:[0-9] -> date#3.#30 + Epsilon closure : + (self) + +NFA state 81 = date#3.#30 + 8:[0-9] -> date#3.#31 + Epsilon closure : + (self) + +NFA state 82 = date#3.#31 + 8:[0-9] -> date#3.#32 + Epsilon closure : + (self) + +NFA state 83 = date#3.#32 + 8:[0-9] -> date#3.after_year + Epsilon closure : + (self) + +NFA state 84 = date#3.after_year + 0:[\t ] -> date#3.after_year + [(epsilon)] -> date#3.out + Epsilon closure : + (self) + #1 + date#3.out + +NFA state 85 = date#3.out + [(epsilon)] -> #1 + Epsilon closure : + (self) + #1 + +NFA state 86 = #2 + Tags : FROMCHECK_PASS + Epsilon closure : + (self) + +NFA state 87 = #3 + 2:[\r] -> #4 + Epsilon closure : + (self) + +NFA state 88 = date#4.in + 0:[\t ] -> date#4.in + 0:[\t ] -> date#4.before_weekday + Epsilon closure : + (self) + +NFA state 89 = date#4.before_weekday + 13:[A-Z] -> date#4.#1 + Epsilon closure : + (self) + +NFA state 90 = date#4.#1 + 17:[a-z] -> date#4.#2 + Epsilon closure : + (self) + +NFA state 91 = date#4.#2 + 17:[a-z] -> date#4.#3 + Epsilon closure : + (self) + +NFA state 92 = date#4.#3 + 0:[\t ] -> date#4.after_weekday + Epsilon closure : + (self) + +NFA state 93 = date#4.after_weekday + 0:[\t ] -> date#4.after_weekday + 13:[A-Z] -> date#4.#4 + Epsilon closure : + (self) + +NFA state 94 = date#4.#4 + 17:[a-z] -> date#4.#5 + Epsilon closure : + (self) + +NFA state 95 = date#4.#5 + 17:[a-z] -> date#4.#6 + Epsilon closure : + (self) + +NFA state 96 = date#4.#6 + 0:[\t ] -> date#4.after_month + Epsilon closure : + (self) + +NFA state 97 = date#4.after_month + 0:[\t ] -> date#4.after_month + 8:[0-9] -> date#4.#7 + 8:[0-9] -> date#4.#8 + Epsilon closure : + (self) + +NFA state 98 = date#4.#7 + 0:[\t ] -> date#4.after_day + Epsilon closure : + (self) + +NFA state 99 = date#4.#8 + 8:[0-9] -> date#4.#9 + Epsilon closure : + (self) + +NFA state 100 = date#4.#9 + 0:[\t ] -> date#4.after_day + Epsilon closure : + (self) + +NFA state 101 = date#4.after_day + 0:[\t ] -> date#4.after_day + 8:[0-9] -> date#4.#10 + 8:[0-9] -> date#4.#18 + Epsilon closure : + (self) + +NFA state 102 = date#4.#10 + 8:[0-9] -> date#4.#11 + Epsilon closure : + (self) + +NFA state 103 = date#4.#11 + 9:[:] -> date#4.#12 + Epsilon closure : + (self) + +NFA state 104 = date#4.#12 + 8:[0-9] -> date#4.#13 + Epsilon closure : + (self) + +NFA state 105 = date#4.#13 + 8:[0-9] -> date#4.#14 + Epsilon closure : + (self) + +NFA state 106 = date#4.#14 + 9:[:] -> date#4.#15 + Epsilon closure : + (self) + +NFA state 107 = date#4.#15 + 8:[0-9] -> date#4.#16 + Epsilon closure : + (self) + +NFA state 108 = date#4.#16 + 8:[0-9] -> date#4.#17 + Epsilon closure : + (self) + +NFA state 109 = date#4.#17 + 0:[\t ] -> date#4.after_time + Epsilon closure : + (self) + +NFA state 110 = date#4.#18 + 8:[0-9] -> date#4.#19 + Epsilon closure : + (self) + +NFA state 111 = date#4.#19 + 9:[:] -> date#4.#20 + Epsilon closure : + (self) + +NFA state 112 = date#4.#20 + 8:[0-9] -> date#4.#21 + Epsilon closure : + (self) + +NFA state 113 = date#4.#21 + 8:[0-9] -> date#4.#22 + Epsilon closure : + (self) + +NFA state 114 = date#4.#22 + 0:[\t ] -> date#4.after_time + Epsilon closure : + (self) + +NFA state 115 = date#4.after_time + 0:[\t ] -> date#4.after_time + [(epsilon)] -> date#4.after_timezone + [(epsilon)] -> date#4.zone#1.in + [(epsilon)] -> date#4.zone#2.in + 8:[0-9] -> date#4.#25 + Epsilon closure : + (self) + date#4.zone#1.in + date#4.zone#2.in + date#4.after_timezone + +NFA state 116 = date#4.#23 + 0:[\t ] -> date#4.after_timezone + Epsilon closure : + (self) + +NFA state 117 = date#4.zone#1.in + 5:[+] -> date#4.zone#1.zone2 + 6:[\055] -> date#4.zone#1.zone2 + 13:[A-Z] -> date#4.zone#1.out + 13:[A-Z] -> date#4.zone#1.zone2 + Epsilon closure : + (self) + +NFA state 118 = date#4.zone#1.zone2 + 8:[0-9] -> date#4.zone#1.out + 8:[0-9] -> date#4.zone#1.zone2 + 13:[A-Z] -> date#4.zone#1.out + 17:[a-z] -> date#4.zone#1.out + 13:[A-Z] -> date#4.zone#1.zone2 + 17:[a-z] -> date#4.zone#1.zone2 + Epsilon closure : + (self) + +NFA state 119 = date#4.zone#1.out + [(epsilon)] -> date#4.#23 + Epsilon closure : + (self) + date#4.#23 + +NFA state 120 = date#4.#24 + 0:[\t ] -> date#4.after_timezone_1 + Epsilon closure : + (self) + +NFA state 121 = date#4.zone#2.in + 5:[+] -> date#4.zone#2.zone2 + 6:[\055] -> date#4.zone#2.zone2 + 13:[A-Z] -> date#4.zone#2.out + 13:[A-Z] -> date#4.zone#2.zone2 + Epsilon closure : + (self) + +NFA state 122 = date#4.zone#2.zone2 + 8:[0-9] -> date#4.zone#2.out + 8:[0-9] -> date#4.zone#2.zone2 + 13:[A-Z] -> date#4.zone#2.out + 17:[a-z] -> date#4.zone#2.out + 13:[A-Z] -> date#4.zone#2.zone2 + 17:[a-z] -> date#4.zone#2.zone2 + Epsilon closure : + (self) + +NFA state 123 = date#4.zone#2.out + [(epsilon)] -> date#4.#24 + Epsilon closure : + (self) + date#4.#24 + +NFA state 124 = date#4.#25 + 8:[0-9] -> date#4.#26 + Epsilon closure : + (self) + +NFA state 125 = date#4.#26 + 8:[0-9] -> date#4.#27 + Epsilon closure : + (self) + +NFA state 126 = date#4.#27 + 8:[0-9] -> date#4.after_year_before_zone + Epsilon closure : + (self) + +NFA state 127 = date#4.after_year_before_zone + 0:[\t ] -> date#4.after_year_before_zone + [(epsilon)] -> date#4.zone#3.in + [(epsilon)] -> date#4.zone#4.in + Epsilon closure : + (self) + date#4.zone#3.in + date#4.zone#4.in + +NFA state 128 = date#4.zone#3.in + 5:[+] -> date#4.zone#3.zone2 + 6:[\055] -> date#4.zone#3.zone2 + 13:[A-Z] -> date#4.zone#3.out + 13:[A-Z] -> date#4.zone#3.zone2 + Epsilon closure : + (self) + +NFA state 129 = date#4.zone#3.zone2 + 8:[0-9] -> date#4.zone#3.out + 8:[0-9] -> date#4.zone#3.zone2 + 13:[A-Z] -> date#4.zone#3.out + 17:[a-z] -> date#4.zone#3.out + 13:[A-Z] -> date#4.zone#3.zone2 + 17:[a-z] -> date#4.zone#3.zone2 + Epsilon closure : + (self) + +NFA state 130 = date#4.zone#3.out + [(epsilon)] -> date#4.after_timezone_after_year + Epsilon closure : + (self) + #3 + date#4.after_timezone_after_year + date#4.out + +NFA state 131 = date#4.#28 + 0:[\t ] -> date#4.after_timezone_after_year_1 + Epsilon closure : + (self) + +NFA state 132 = date#4.zone#4.in + 5:[+] -> date#4.zone#4.zone2 + 6:[\055] -> date#4.zone#4.zone2 + 13:[A-Z] -> date#4.zone#4.out + 13:[A-Z] -> date#4.zone#4.zone2 + Epsilon closure : + (self) + +NFA state 133 = date#4.zone#4.zone2 + 8:[0-9] -> date#4.zone#4.out + 8:[0-9] -> date#4.zone#4.zone2 + 13:[A-Z] -> date#4.zone#4.out + 17:[a-z] -> date#4.zone#4.out + 13:[A-Z] -> date#4.zone#4.zone2 + 17:[a-z] -> date#4.zone#4.zone2 + Epsilon closure : + (self) + +NFA state 134 = date#4.zone#4.out + [(epsilon)] -> date#4.#28 + Epsilon closure : + (self) + date#4.#28 + +NFA state 135 = date#4.after_timezone_after_year_1 + 0:[\t ] -> date#4.after_timezone_after_year_1 + [(epsilon)] -> date#4.zone#5.in + Epsilon closure : + (self) + date#4.zone#5.in + +NFA state 136 = date#4.zone#5.in + 5:[+] -> date#4.zone#5.zone2 + 6:[\055] -> date#4.zone#5.zone2 + 13:[A-Z] -> date#4.zone#5.out + 13:[A-Z] -> date#4.zone#5.zone2 + Epsilon closure : + (self) + +NFA state 137 = date#4.zone#5.zone2 + 8:[0-9] -> date#4.zone#5.out + 8:[0-9] -> date#4.zone#5.zone2 + 13:[A-Z] -> date#4.zone#5.out + 17:[a-z] -> date#4.zone#5.out + 13:[A-Z] -> date#4.zone#5.zone2 + 17:[a-z] -> date#4.zone#5.zone2 + Epsilon closure : + (self) + +NFA state 138 = date#4.zone#5.out + [(epsilon)] -> date#4.after_timezone_after_year + Epsilon closure : + (self) + #3 + date#4.after_timezone_after_year + date#4.out + +NFA state 139 = date#4.after_timezone_after_year + 0:[\t ] -> date#4.after_timezone_after_year + [(epsilon)] -> date#4.out + Epsilon closure : + (self) + #3 + date#4.out + +NFA state 140 = date#4.after_timezone_1 + 0:[\t ] -> date#4.after_timezone_1 + [(epsilon)] -> date#4.zone#6.in + Epsilon closure : + (self) + date#4.zone#6.in + +NFA state 141 = date#4.#29 + 0:[\t ] -> date#4.after_timezone + Epsilon closure : + (self) + +NFA state 142 = date#4.zone#6.in + 5:[+] -> date#4.zone#6.zone2 + 6:[\055] -> date#4.zone#6.zone2 + 13:[A-Z] -> date#4.zone#6.out + 13:[A-Z] -> date#4.zone#6.zone2 + Epsilon closure : + (self) + +NFA state 143 = date#4.zone#6.zone2 + 8:[0-9] -> date#4.zone#6.out + 8:[0-9] -> date#4.zone#6.zone2 + 13:[A-Z] -> date#4.zone#6.out + 17:[a-z] -> date#4.zone#6.out + 13:[A-Z] -> date#4.zone#6.zone2 + 17:[a-z] -> date#4.zone#6.zone2 + Epsilon closure : + (self) + +NFA state 144 = date#4.zone#6.out + [(epsilon)] -> date#4.#29 + Epsilon closure : + (self) + date#4.#29 + +NFA state 145 = date#4.after_timezone + 0:[\t ] -> date#4.after_timezone + 8:[0-9] -> date#4.#30 + Epsilon closure : + (self) + +NFA state 146 = date#4.#30 + 8:[0-9] -> date#4.#31 + Epsilon closure : + (self) + +NFA state 147 = date#4.#31 + 8:[0-9] -> date#4.#32 + Epsilon closure : + (self) + +NFA state 148 = date#4.#32 + 8:[0-9] -> date#4.after_year + Epsilon closure : + (self) + +NFA state 149 = date#4.after_year + 0:[\t ] -> date#4.after_year + [(epsilon)] -> date#4.out + Epsilon closure : + (self) + #3 + date#4.out + +NFA state 150 = date#4.out + [(epsilon)] -> #3 + Epsilon closure : + (self) + #3 + +NFA state 151 = #4 + 1:[\n] -> #5 + Epsilon closure : + (self) + +NFA state 152 = #5 + Tags : FROMCHECK_PASS + Epsilon closure : + (self) + +-------------------------------- +DFA structure before compression +-------------------------------- +DFA state 0 + NFA states : + in + email#1.in + angled_email#2.in + before_date + date#3.in + date#4.in + + Forward route : + (START)->(HERE) + Transitions : + 0:[\t ] -> 1 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 10:[<] -> 4 + 12:[@] -> 5 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 2 + +DFA state 1 + NFA states : + in + email#1.in + angled_email#2.in + before_date + date#3.in + date#3.before_weekday + date#4.in + date#4.before_weekday + + Forward route : (from state 0) + (START)->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 1 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 10:[<] -> 4 + 12:[@] -> 5 + 13:[A-Z] -> 6 + 16:[_] -> 2 + 17:[a-z] -> 2 + +DFA state 2 + NFA states : + email#1.in + email#1.before_at + email#1.out + before_date + date#3.in + date#4.in + + Forward route : (from state 0) + (START)->3:[!#-'*/=?^`{-~]->(HERE) + Transitions : + 0:[\t ] -> 7 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 12:[@] -> 8 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 2 + +DFA state 3 + NFA states : + email#1.quoted_before_at + + Forward route : (from state 0) + (START)->4:["]->(HERE) + Transitions : + 0:[\t ] -> 3 + 3:[!#-'*/=?^`{-~] -> 3 + 4:["] -> 9 + 5:[+] -> 3 + 6:[\055] -> 3 + 7:[.] -> 3 + 8:[0-9] -> 3 + 9:[:] -> 3 + 10:[<] -> 3 + 11:[>] -> 3 + 12:[@] -> 3 + 13:[A-Z] -> 3 + 16:[_] -> 3 + 17:[a-z] -> 3 + +DFA state 4 + NFA states : + angled_email#2.in_angles + angled_email#2.email#1.in + + Forward route : (from state 0) + (START)->10:[<]->(HERE) + Transitions : + 3:[!#-'*/=?^`{-~] -> 10 + 4:["] -> 11 + 5:[+] -> 10 + 6:[\055] -> 10 + 7:[.] -> 10 + 8:[0-9] -> 10 + 12:[@] -> 12 + 13:[A-Z] -> 10 + 16:[_] -> 10 + 17:[a-z] -> 10 + +DFA state 5 + NFA states : + email#1.domain_route + + Forward route : (from state 0) + (START)->12:[@]->(HERE) + Transitions : + 6:[\055] -> 5 + 7:[.] -> 5 + 8:[0-9] -> 5 + 9:[:] -> 13 + 13:[A-Z] -> 5 + 16:[_] -> 5 + 17:[a-z] -> 5 + +DFA state 6 + NFA states : + email#1.in + email#1.before_at + email#1.out + before_date + date#3.in + date#3.#1 + date#4.in + date#4.#1 + + Forward route : (from state 1) + (START)->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 0:[\t ] -> 7 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 12:[@] -> 8 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 14 + +DFA state 7 + NFA states : + date#3.in + date#3.before_weekday + date#4.in + date#4.before_weekday + + Forward route : (from state 2) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 7 + 13:[A-Z] -> 15 + +DFA state 8 + NFA states : + email#1.domain_route + email#1.start_of_domain + + Forward route : (from state 2) + (START)->3:[!#-'*/=?^`{-~]->12:[@]->(HERE) + Transitions : + 6:[\055] -> 16 + 7:[.] -> 16 + 8:[0-9] -> 16 + 9:[:] -> 13 + 13:[A-Z] -> 16 + 14:[[] -> 17 + 16:[_] -> 16 + 17:[a-z] -> 16 + +DFA state 9 + NFA states : + email#1.before_at + email#1.out + before_date + date#3.in + date#4.in + + Forward route : (from state 3) + (START)->4:["]->4:["]->(HERE) + Transitions : + 0:[\t ] -> 7 + 3:[!#-'*/=?^`{-~] -> 9 + 4:["] -> 3 + 5:[+] -> 9 + 6:[\055] -> 9 + 7:[.] -> 9 + 8:[0-9] -> 9 + 12:[@] -> 18 + 13:[A-Z] -> 9 + 16:[_] -> 9 + 17:[a-z] -> 9 + +DFA state 10 + NFA states : + angled_email#2.email#1.in + angled_email#2.email#1.before_at + angled_email#2.email#1.out + angled_email#2.before_gt + + Forward route : (from state 4) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->(HERE) + Transitions : + 3:[!#-'*/=?^`{-~] -> 10 + 4:["] -> 11 + 5:[+] -> 10 + 6:[\055] -> 10 + 7:[.] -> 10 + 8:[0-9] -> 10 + 11:[>] -> 19 + 12:[@] -> 20 + 13:[A-Z] -> 10 + 16:[_] -> 10 + 17:[a-z] -> 10 + +DFA state 11 + NFA states : + angled_email#2.email#1.quoted_before_at + + Forward route : (from state 4) + (START)->10:[<]->4:["]->(HERE) + Transitions : + 0:[\t ] -> 11 + 3:[!#-'*/=?^`{-~] -> 11 + 4:["] -> 21 + 5:[+] -> 11 + 6:[\055] -> 11 + 7:[.] -> 11 + 8:[0-9] -> 11 + 9:[:] -> 11 + 10:[<] -> 11 + 11:[>] -> 11 + 12:[@] -> 11 + 13:[A-Z] -> 11 + 16:[_] -> 11 + 17:[a-z] -> 11 + +DFA state 12 + NFA states : + angled_email#2.email#1.domain_route + + Forward route : (from state 4) + (START)->10:[<]->12:[@]->(HERE) + Transitions : + 6:[\055] -> 12 + 7:[.] -> 12 + 8:[0-9] -> 12 + 9:[:] -> 22 + 13:[A-Z] -> 12 + 16:[_] -> 12 + 17:[a-z] -> 12 + +DFA state 13 + NFA states : + email#1.in + + Forward route : (from state 5) + (START)->12:[@]->9:[:]->(HERE) + Transitions : + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 12:[@] -> 5 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 2 + +DFA state 14 + NFA states : + email#1.in + email#1.before_at + email#1.out + before_date + date#3.in + date#3.#2 + date#4.in + date#4.#2 + + Forward route : (from state 6) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE) + Transitions : + 0:[\t ] -> 7 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 12:[@] -> 8 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 23 + +DFA state 15 + NFA states : + date#3.#1 + date#4.#1 + + Forward route : (from state 7) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 17:[a-z] -> 24 + +DFA state 16 + NFA states : + email#1.domain_route + email#1.after_at + + Forward route : (from state 8) + (START)->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 25 + 7:[.] -> 25 + 8:[0-9] -> 25 + 9:[:] -> 13 + 13:[A-Z] -> 25 + 16:[_] -> 25 + 17:[a-z] -> 25 + +DFA state 17 + NFA states : + email#1.dotted_quad + + Forward route : (from state 8) + (START)->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->(HERE) + Transitions : + 7:[.] -> 17 + 8:[0-9] -> 17 + 15:[]] -> 26 + +DFA state 18 + NFA states : + email#1.start_of_domain + + Forward route : (from state 9) + (START)->4:["]->4:["]->12:[@]->(HERE) + Transitions : + 6:[\055] -> 27 + 7:[.] -> 27 + 8:[0-9] -> 27 + 13:[A-Z] -> 27 + 14:[[] -> 17 + 16:[_] -> 27 + 17:[a-z] -> 27 + +DFA state 19 + NFA states : + angled_email#2.out + before_date + date#3.in + date#4.in + + Forward route : (from state 10) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->11:[>]->(HERE) + Transitions : + 0:[\t ] -> 7 + +DFA state 20 + NFA states : + angled_email#2.email#1.domain_route + angled_email#2.email#1.start_of_domain + + Forward route : (from state 10) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->(HERE) + Transitions : + 6:[\055] -> 28 + 7:[.] -> 28 + 8:[0-9] -> 28 + 9:[:] -> 22 + 13:[A-Z] -> 28 + 14:[[] -> 29 + 16:[_] -> 28 + 17:[a-z] -> 28 + +DFA state 21 + NFA states : + angled_email#2.email#1.before_at + angled_email#2.email#1.out + angled_email#2.before_gt + + Forward route : (from state 11) + (START)->10:[<]->4:["]->4:["]->(HERE) + Transitions : + 3:[!#-'*/=?^`{-~] -> 21 + 4:["] -> 11 + 5:[+] -> 21 + 6:[\055] -> 21 + 7:[.] -> 21 + 8:[0-9] -> 21 + 11:[>] -> 19 + 12:[@] -> 30 + 13:[A-Z] -> 21 + 16:[_] -> 21 + 17:[a-z] -> 21 + +DFA state 22 + NFA states : + angled_email#2.email#1.in + + Forward route : (from state 12) + (START)->10:[<]->12:[@]->9:[:]->(HERE) + Transitions : + 3:[!#-'*/=?^`{-~] -> 10 + 4:["] -> 11 + 5:[+] -> 10 + 6:[\055] -> 10 + 7:[.] -> 10 + 8:[0-9] -> 10 + 12:[@] -> 12 + 13:[A-Z] -> 10 + 16:[_] -> 10 + 17:[a-z] -> 10 + +DFA state 23 + NFA states : + email#1.in + email#1.before_at + email#1.out + before_date + date#3.in + date#3.#3 + date#4.in + date#4.#3 + + Forward route : (from state 14) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE) + Transitions : + 0:[\t ] -> 31 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 12:[@] -> 8 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 2 + +DFA state 24 + NFA states : + date#3.#2 + date#4.#2 + + Forward route : (from state 15) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE) + Transitions : + 17:[a-z] -> 32 + +DFA state 25 + NFA states : + email#1.domain_route + email#1.after_at + email#1.out + before_date + date#3.in + date#4.in + + Forward route : (from state 16) + (START)->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->6:[\055]->(HERE) + Transitions : + 0:[\t ] -> 7 + 6:[\055] -> 25 + 7:[.] -> 25 + 8:[0-9] -> 25 + 9:[:] -> 13 + 13:[A-Z] -> 25 + 16:[_] -> 25 + 17:[a-z] -> 25 + +DFA state 26 + NFA states : + email#1.out + before_date + date#3.in + date#4.in + + Forward route : (from state 17) + (START)->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->15:[]]->(HERE) + Transitions : + 0:[\t ] -> 7 + +DFA state 27 + NFA states : + email#1.after_at + + Forward route : (from state 18) + (START)->4:["]->4:["]->12:[@]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 33 + 7:[.] -> 33 + 8:[0-9] -> 33 + 13:[A-Z] -> 33 + 16:[_] -> 33 + 17:[a-z] -> 33 + +DFA state 28 + NFA states : + angled_email#2.email#1.domain_route + angled_email#2.email#1.after_at + + Forward route : (from state 20) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 34 + 7:[.] -> 34 + 8:[0-9] -> 34 + 9:[:] -> 22 + 13:[A-Z] -> 34 + 16:[_] -> 34 + 17:[a-z] -> 34 + +DFA state 29 + NFA states : + angled_email#2.email#1.dotted_quad + + Forward route : (from state 20) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->(HERE) + Transitions : + 7:[.] -> 29 + 8:[0-9] -> 29 + 15:[]] -> 35 + +DFA state 30 + NFA states : + angled_email#2.email#1.start_of_domain + + Forward route : (from state 21) + (START)->10:[<]->4:["]->4:["]->12:[@]->(HERE) + Transitions : + 6:[\055] -> 36 + 7:[.] -> 36 + 8:[0-9] -> 36 + 13:[A-Z] -> 36 + 14:[[] -> 29 + 16:[_] -> 36 + 17:[a-z] -> 36 + +DFA state 31 + NFA states : + date#3.in + date#3.before_weekday + date#3.after_weekday + date#4.in + date#4.before_weekday + date#4.after_weekday + + Forward route : (from state 23) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 31 + 13:[A-Z] -> 37 + +DFA state 32 + NFA states : + date#3.#3 + date#4.#3 + + Forward route : (from state 24) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE) + Transitions : + 0:[\t ] -> 38 + +DFA state 33 + NFA states : + email#1.after_at + email#1.out + before_date + date#3.in + date#4.in + + Forward route : (from state 27) + (START)->4:["]->4:["]->12:[@]->6:[\055]->6:[\055]->(HERE) + Transitions : + 0:[\t ] -> 7 + 6:[\055] -> 33 + 7:[.] -> 33 + 8:[0-9] -> 33 + 13:[A-Z] -> 33 + 16:[_] -> 33 + 17:[a-z] -> 33 + +DFA state 34 + NFA states : + angled_email#2.email#1.domain_route + angled_email#2.email#1.after_at + angled_email#2.email#1.out + angled_email#2.before_gt + + Forward route : (from state 28) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 34 + 7:[.] -> 34 + 8:[0-9] -> 34 + 9:[:] -> 22 + 11:[>] -> 19 + 13:[A-Z] -> 34 + 16:[_] -> 34 + 17:[a-z] -> 34 + +DFA state 35 + NFA states : + angled_email#2.email#1.out + angled_email#2.before_gt + + Forward route : (from state 29) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->15:[]]->(HERE) + Transitions : + 11:[>] -> 19 + +DFA state 36 + NFA states : + angled_email#2.email#1.after_at + + Forward route : (from state 30) + (START)->10:[<]->4:["]->4:["]->12:[@]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 39 + 7:[.] -> 39 + 8:[0-9] -> 39 + 13:[A-Z] -> 39 + 16:[_] -> 39 + 17:[a-z] -> 39 + +DFA state 37 + NFA states : + date#3.#1 + date#3.#4 + date#4.#1 + date#4.#4 + + Forward route : (from state 31) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 17:[a-z] -> 40 + +DFA state 38 + NFA states : + date#3.after_weekday + date#4.after_weekday + + Forward route : (from state 32) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 38 + 13:[A-Z] -> 41 + +DFA state 39 + NFA states : + angled_email#2.email#1.after_at + angled_email#2.email#1.out + angled_email#2.before_gt + + Forward route : (from state 36) + (START)->10:[<]->4:["]->4:["]->12:[@]->6:[\055]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 39 + 7:[.] -> 39 + 8:[0-9] -> 39 + 11:[>] -> 19 + 13:[A-Z] -> 39 + 16:[_] -> 39 + 17:[a-z] -> 39 + +DFA state 40 + NFA states : + date#3.#2 + date#3.#5 + date#4.#2 + date#4.#5 + + Forward route : (from state 37) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE) + Transitions : + 17:[a-z] -> 42 + +DFA state 41 + NFA states : + date#3.#4 + date#4.#4 + + Forward route : (from state 38) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 17:[a-z] -> 43 + +DFA state 42 + NFA states : + date#3.#3 + date#3.#6 + date#4.#3 + date#4.#6 + + Forward route : (from state 40) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE) + Transitions : + 0:[\t ] -> 44 + +DFA state 43 + NFA states : + date#3.#5 + date#4.#5 + + Forward route : (from state 41) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE) + Transitions : + 17:[a-z] -> 45 + +DFA state 44 + NFA states : + date#3.after_weekday + date#3.after_month + date#4.after_weekday + date#4.after_month + + Forward route : (from state 42) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 44 + 8:[0-9] -> 46 + 13:[A-Z] -> 41 + +DFA state 45 + NFA states : + date#3.#6 + date#4.#6 + + Forward route : (from state 43) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE) + Transitions : + 0:[\t ] -> 47 + +DFA state 46 + NFA states : + date#3.#7 + date#3.#8 + date#4.#7 + date#4.#8 + + Forward route : (from state 44) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 48 + 8:[0-9] -> 49 + +DFA state 47 + NFA states : + date#3.after_month + date#4.after_month + + Forward route : (from state 45) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 47 + 8:[0-9] -> 46 + +DFA state 48 + NFA states : + date#3.after_day + date#4.after_day + + Forward route : (from state 46) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 48 + 8:[0-9] -> 50 + +DFA state 49 + NFA states : + date#3.#9 + date#4.#9 + + Forward route : (from state 46) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 48 + +DFA state 50 + NFA states : + date#3.#10 + date#3.#18 + date#4.#10 + date#4.#18 + + Forward route : (from state 48) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 51 + +DFA state 51 + NFA states : + date#3.#11 + date#3.#19 + date#4.#11 + date#4.#19 + + Forward route : (from state 50) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 9:[:] -> 52 + +DFA state 52 + NFA states : + date#3.#12 + date#3.#20 + date#4.#12 + date#4.#20 + + Forward route : (from state 51) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->(HERE) + Transitions : + 8:[0-9] -> 53 + +DFA state 53 + NFA states : + date#3.#13 + date#3.#21 + date#4.#13 + date#4.#21 + + Forward route : (from state 52) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 54 + +DFA state 54 + NFA states : + date#3.#14 + date#3.#22 + date#4.#14 + date#4.#22 + + Forward route : (from state 53) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 55 + 9:[:] -> 56 + +DFA state 55 + NFA states : + date#3.after_time + date#3.zone#1.in + date#3.zone#2.in + date#3.after_timezone + date#4.after_time + date#4.zone#1.in + date#4.zone#2.in + date#4.after_timezone + + Forward route : (from state 54) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 55 + 5:[+] -> 57 + 6:[\055] -> 57 + 8:[0-9] -> 58 + 13:[A-Z] -> 59 + +DFA state 56 + NFA states : + date#3.#15 + date#4.#15 + + Forward route : (from state 54) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->(HERE) + Transitions : + 8:[0-9] -> 60 + +DFA state 57 + NFA states : + date#3.zone#1.zone2 + date#3.zone#2.zone2 + date#4.zone#1.zone2 + date#4.zone#2.zone2 + + Forward route : (from state 55) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->5:[+]->(HERE) + Transitions : + 8:[0-9] -> 59 + 13:[A-Z] -> 59 + 17:[a-z] -> 59 + +DFA state 58 + NFA states : + date#3.#25 + date#3.#30 + date#4.#25 + date#4.#30 + + Forward route : (from state 55) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 61 + +DFA state 59 + NFA states : + date#3.#23 + date#3.zone#1.zone2 + date#3.zone#1.out + date#3.#24 + date#3.zone#2.zone2 + date#3.zone#2.out + date#4.#23 + date#4.zone#1.zone2 + date#4.zone#1.out + date#4.#24 + date#4.zone#2.zone2 + date#4.zone#2.out + + Forward route : (from state 55) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 0:[\t ] -> 62 + 8:[0-9] -> 59 + 13:[A-Z] -> 59 + 17:[a-z] -> 59 + +DFA state 60 + NFA states : + date#3.#16 + date#4.#16 + + Forward route : (from state 56) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 63 + +DFA state 61 + NFA states : + date#3.#26 + date#3.#31 + date#4.#26 + date#4.#31 + + Forward route : (from state 58) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 64 + +DFA state 62 + NFA states : + date#3.after_timezone_1 + date#3.zone#6.in + date#3.after_timezone + date#4.after_timezone_1 + date#4.zone#6.in + date#4.after_timezone + + Forward route : (from state 59) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 62 + 5:[+] -> 65 + 6:[\055] -> 65 + 8:[0-9] -> 66 + 13:[A-Z] -> 67 + +DFA state 63 + NFA states : + date#3.#17 + date#4.#17 + + Forward route : (from state 60) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 55 + +DFA state 64 + NFA states : + date#3.#27 + date#3.#32 + date#4.#27 + date#4.#32 + + Forward route : (from state 61) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 68 + +DFA state 65 + NFA states : + date#3.zone#6.zone2 + date#4.zone#6.zone2 + + Forward route : (from state 62) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->5:[+]->(HERE) + Transitions : + 8:[0-9] -> 67 + 13:[A-Z] -> 67 + 17:[a-z] -> 67 + +DFA state 66 + NFA states : + date#3.#30 + date#4.#30 + + Forward route : (from state 62) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 69 + +DFA state 67 + NFA states : + date#3.#29 + date#3.zone#6.zone2 + date#3.zone#6.out + date#4.#29 + date#4.zone#6.zone2 + date#4.zone#6.out + + Forward route : (from state 62) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 0:[\t ] -> 70 + 8:[0-9] -> 67 + 13:[A-Z] -> 67 + 17:[a-z] -> 67 + +DFA state 68 + NFA states : + #1 + date#3.after_year_before_zone + date#3.zone#3.in + date#3.zone#4.in + date#3.after_year + date#3.out + #3 + date#4.after_year_before_zone + date#4.zone#3.in + date#4.zone#4.in + date#4.after_year + date#4.out + + Forward route : (from state 64) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 68 + 1:[\n] -> 71 + 2:[\r] -> 72 + 5:[+] -> 73 + 6:[\055] -> 73 + 13:[A-Z] -> 74 + +DFA state 69 + NFA states : + date#3.#31 + date#4.#31 + + Forward route : (from state 66) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 75 + +DFA state 70 + NFA states : + date#3.after_timezone + date#4.after_timezone + + Forward route : (from state 67) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->13:[A-Z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 70 + 8:[0-9] -> 66 + +DFA state 71 + NFA states : + #2 + + Forward route : (from state 68) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->1:[\n]->(HERE) + Transitions : + NFA exit tags applying : + FROMCHECK_PASS + Attributes for <(DEFAULT)> : FROMCHECK_PASS + +DFA state 72 + NFA states : + #4 + + Forward route : (from state 68) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->2:[\r]->(HERE) + Transitions : + 1:[\n] -> 76 + +DFA state 73 + NFA states : + date#3.zone#3.zone2 + date#3.zone#4.zone2 + date#4.zone#3.zone2 + date#4.zone#4.zone2 + + Forward route : (from state 68) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->5:[+]->(HERE) + Transitions : + 8:[0-9] -> 74 + 13:[A-Z] -> 74 + 17:[a-z] -> 74 + +DFA state 74 + NFA states : + #1 + date#3.zone#3.zone2 + date#3.zone#3.out + date#3.#28 + date#3.zone#4.zone2 + date#3.zone#4.out + date#3.after_timezone_after_year + date#3.out + #3 + date#4.zone#3.zone2 + date#4.zone#3.out + date#4.#28 + date#4.zone#4.zone2 + date#4.zone#4.out + date#4.after_timezone_after_year + date#4.out + + Forward route : (from state 68) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->(HERE) + Transitions : + 0:[\t ] -> 77 + 1:[\n] -> 71 + 2:[\r] -> 72 + 8:[0-9] -> 74 + 13:[A-Z] -> 74 + 17:[a-z] -> 74 + +DFA state 75 + NFA states : + date#3.#32 + date#4.#32 + + Forward route : (from state 69) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 78 + +DFA state 76 + NFA states : + #5 + + Forward route : (from state 72) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->2:[\r]->1:[\n]->(HERE) + Transitions : + NFA exit tags applying : + FROMCHECK_PASS + Attributes for <(DEFAULT)> : FROMCHECK_PASS + +DFA state 77 + NFA states : + #1 + date#3.after_timezone_after_year_1 + date#3.zone#5.in + date#3.after_timezone_after_year + date#3.out + #3 + date#4.after_timezone_after_year_1 + date#4.zone#5.in + date#4.after_timezone_after_year + date#4.out + + Forward route : (from state 74) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 77 + 1:[\n] -> 71 + 2:[\r] -> 72 + 5:[+] -> 79 + 6:[\055] -> 79 + 13:[A-Z] -> 80 + +DFA state 78 + NFA states : + #1 + date#3.after_year + date#3.out + #3 + date#4.after_year + date#4.out + + Forward route : (from state 75) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 78 + 1:[\n] -> 71 + 2:[\r] -> 72 + +DFA state 79 + NFA states : + date#3.zone#5.zone2 + date#4.zone#5.zone2 + + Forward route : (from state 77) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->5:[+]->(HERE) + Transitions : + 8:[0-9] -> 80 + 13:[A-Z] -> 80 + 17:[a-z] -> 80 + +DFA state 80 + NFA states : + #1 + date#3.zone#5.zone2 + date#3.zone#5.out + date#3.after_timezone_after_year + date#3.out + #3 + date#4.zone#5.zone2 + date#4.zone#5.out + date#4.after_timezone_after_year + date#4.out + + Forward route : (from state 77) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 0:[\t ] -> 81 + 1:[\n] -> 71 + 2:[\r] -> 72 + 8:[0-9] -> 80 + 13:[A-Z] -> 80 + 17:[a-z] -> 80 + +DFA state 81 + NFA states : + #1 + date#3.after_timezone_after_year + date#3.out + #3 + date#4.after_timezone_after_year + date#4.out + + Forward route : (from state 80) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->13:[A-Z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 81 + 1:[\n] -> 71 + 2:[\r] -> 72 + + +Entry states in DFA: +Entry <(ONLY ENTRY)> : 0 +Searching for dead states... +(no dead states found) + +----------------------------- +------ COMPRESSING DFA ------ +----------------------------- +Old DFA state 0 becomes 0 +Old DFA state 1 becomes 1 +Old DFA state 2 becomes 2 +Old DFA state 3 becomes 3 +Old DFA state 4 becomes 4 +Old DFA state 5 becomes 5 +Old DFA state 6 becomes 6 +Old DFA state 7 becomes 7 +Old DFA state 8 becomes 8 +Old DFA state 9 becomes 9 +Old DFA state 10 becomes 10 +Old DFA state 11 becomes 11 +Old DFA state 12 becomes 12 +Old DFA state 13 becomes 13 +Old DFA state 14 becomes 14 +Old DFA state 15 becomes 15 +Old DFA state 16 becomes 16 +Old DFA state 17 becomes 17 +Old DFA state 18 becomes 18 +Old DFA state 19 becomes 19 +Old DFA state 20 becomes 20 +Old DFA state 21 becomes 21 +Old DFA state 22 becomes 4 (formerly 4) +Old DFA state 23 becomes 22 +Old DFA state 24 becomes 23 +Old DFA state 25 becomes 24 +Old DFA state 26 becomes 19 (formerly 19) +Old DFA state 27 becomes 25 +Old DFA state 28 becomes 26 +Old DFA state 29 becomes 27 +Old DFA state 30 becomes 28 +Old DFA state 31 becomes 29 +Old DFA state 32 becomes 30 +Old DFA state 33 becomes 31 +Old DFA state 34 becomes 32 +Old DFA state 35 becomes 33 +Old DFA state 36 becomes 34 +Old DFA state 37 becomes 35 +Old DFA state 38 becomes 36 +Old DFA state 39 becomes 37 +Old DFA state 40 becomes 38 +Old DFA state 41 becomes 39 +Old DFA state 42 becomes 40 +Old DFA state 43 becomes 41 +Old DFA state 44 becomes 42 +Old DFA state 45 becomes 43 +Old DFA state 46 becomes 44 +Old DFA state 47 becomes 45 +Old DFA state 48 becomes 46 +Old DFA state 49 becomes 47 +Old DFA state 50 becomes 48 +Old DFA state 51 becomes 49 +Old DFA state 52 becomes 50 +Old DFA state 53 becomes 51 +Old DFA state 54 becomes 52 +Old DFA state 55 becomes 53 +Old DFA state 56 becomes 54 +Old DFA state 57 becomes 55 +Old DFA state 58 becomes 56 +Old DFA state 59 becomes 57 +Old DFA state 60 becomes 58 +Old DFA state 61 becomes 59 +Old DFA state 62 becomes 60 +Old DFA state 63 becomes 61 +Old DFA state 64 becomes 62 +Old DFA state 65 becomes 63 +Old DFA state 66 becomes 64 +Old DFA state 67 becomes 65 +Old DFA state 68 becomes 66 +Old DFA state 69 becomes 67 +Old DFA state 70 becomes 68 +Old DFA state 71 becomes 69 +Old DFA state 72 becomes 70 +Old DFA state 73 becomes 71 +Old DFA state 74 becomes 72 +Old DFA state 75 becomes 73 +Old DFA state 76 becomes 69 (formerly 71) +Old DFA state 77 becomes 74 +Old DFA state 78 becomes 75 +Old DFA state 79 becomes 76 +Old DFA state 80 becomes 77 +Old DFA state 81 becomes 75 (formerly 78) +Entry <(ONLY ENTRY)>, formerly state 0, now state 0 +------------------------------- +DFA structure after compression +------------------------------- +DFA state 0 + Forward route : + (START)->(HERE) + Transitions : + 0:[\t ] -> 1 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 10:[<] -> 4 + 12:[@] -> 5 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 2 + +DFA state 1 + Forward route : (from state 0) + (START)->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 1 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 10:[<] -> 4 + 12:[@] -> 5 + 13:[A-Z] -> 6 + 16:[_] -> 2 + 17:[a-z] -> 2 + Use state 0 as basis (1 fixups) + +DFA state 2 + Forward route : (from state 0) + (START)->3:[!#-'*/=?^`{-~]->(HERE) + Transitions : + 0:[\t ] -> 7 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 12:[@] -> 8 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 2 + Use state 0 as basis (3 fixups) + +DFA state 3 + Forward route : (from state 0) + (START)->4:["]->(HERE) + Transitions : + 0:[\t ] -> 3 + 3:[!#-'*/=?^`{-~] -> 3 + 4:["] -> 9 + 5:[+] -> 3 + 6:[\055] -> 3 + 7:[.] -> 3 + 8:[0-9] -> 3 + 9:[:] -> 3 + 10:[<] -> 3 + 11:[>] -> 3 + 12:[@] -> 3 + 13:[A-Z] -> 3 + 16:[_] -> 3 + 17:[a-z] -> 3 + +DFA state 4 + Forward route : (from state 0) + (START)->10:[<]->(HERE) + Transitions : + 3:[!#-'*/=?^`{-~] -> 10 + 4:["] -> 11 + 5:[+] -> 10 + 6:[\055] -> 10 + 7:[.] -> 10 + 8:[0-9] -> 10 + 12:[@] -> 12 + 13:[A-Z] -> 10 + 16:[_] -> 10 + 17:[a-z] -> 10 + +DFA state 5 + Forward route : (from state 0) + (START)->12:[@]->(HERE) + Transitions : + 6:[\055] -> 5 + 7:[.] -> 5 + 8:[0-9] -> 5 + 9:[:] -> 13 + 13:[A-Z] -> 5 + 16:[_] -> 5 + 17:[a-z] -> 5 + +DFA state 6 + Forward route : (from state 1) + (START)->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 0:[\t ] -> 7 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 12:[@] -> 8 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 14 + Use state 0 as basis (4 fixups) + +DFA state 7 + Forward route : (from state 2) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 7 + 13:[A-Z] -> 15 + +DFA state 8 + Forward route : (from state 2) + (START)->3:[!#-'*/=?^`{-~]->12:[@]->(HERE) + Transitions : + 6:[\055] -> 16 + 7:[.] -> 16 + 8:[0-9] -> 16 + 9:[:] -> 13 + 13:[A-Z] -> 16 + 14:[[] -> 17 + 16:[_] -> 16 + 17:[a-z] -> 16 + +DFA state 9 + Forward route : (from state 3) + (START)->4:["]->4:["]->(HERE) + Transitions : + 0:[\t ] -> 7 + 3:[!#-'*/=?^`{-~] -> 9 + 4:["] -> 3 + 5:[+] -> 9 + 6:[\055] -> 9 + 7:[.] -> 9 + 8:[0-9] -> 9 + 12:[@] -> 18 + 13:[A-Z] -> 9 + 16:[_] -> 9 + 17:[a-z] -> 9 + +DFA state 10 + Forward route : (from state 4) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->(HERE) + Transitions : + 3:[!#-'*/=?^`{-~] -> 10 + 4:["] -> 11 + 5:[+] -> 10 + 6:[\055] -> 10 + 7:[.] -> 10 + 8:[0-9] -> 10 + 11:[>] -> 19 + 12:[@] -> 20 + 13:[A-Z] -> 10 + 16:[_] -> 10 + 17:[a-z] -> 10 + Use state 4 as basis (2 fixups) + +DFA state 11 + Forward route : (from state 4) + (START)->10:[<]->4:["]->(HERE) + Transitions : + 0:[\t ] -> 11 + 3:[!#-'*/=?^`{-~] -> 11 + 4:["] -> 21 + 5:[+] -> 11 + 6:[\055] -> 11 + 7:[.] -> 11 + 8:[0-9] -> 11 + 9:[:] -> 11 + 10:[<] -> 11 + 11:[>] -> 11 + 12:[@] -> 11 + 13:[A-Z] -> 11 + 16:[_] -> 11 + 17:[a-z] -> 11 + +DFA state 12 + Forward route : (from state 4) + (START)->10:[<]->12:[@]->(HERE) + Transitions : + 6:[\055] -> 12 + 7:[.] -> 12 + 8:[0-9] -> 12 + 9:[:] -> 4 + 13:[A-Z] -> 12 + 16:[_] -> 12 + 17:[a-z] -> 12 + +DFA state 13 + Forward route : (from state 5) + (START)->12:[@]->9:[:]->(HERE) + Transitions : + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 12:[@] -> 5 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 2 + Use state 0 as basis (2 fixups) + +DFA state 14 + Forward route : (from state 6) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE) + Transitions : + 0:[\t ] -> 7 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 12:[@] -> 8 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 22 + Use state 0 as basis (4 fixups) + +DFA state 15 + Forward route : (from state 7) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 17:[a-z] -> 23 + +DFA state 16 + Forward route : (from state 8) + (START)->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 24 + 7:[.] -> 24 + 8:[0-9] -> 24 + 9:[:] -> 13 + 13:[A-Z] -> 24 + 16:[_] -> 24 + 17:[a-z] -> 24 + +DFA state 17 + Forward route : (from state 8) + (START)->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->(HERE) + Transitions : + 7:[.] -> 17 + 8:[0-9] -> 17 + 15:[]] -> 19 + +DFA state 18 + Forward route : (from state 9) + (START)->4:["]->4:["]->12:[@]->(HERE) + Transitions : + 6:[\055] -> 25 + 7:[.] -> 25 + 8:[0-9] -> 25 + 13:[A-Z] -> 25 + 14:[[] -> 17 + 16:[_] -> 25 + 17:[a-z] -> 25 + +DFA state 19 + Forward route : (from state 10) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->11:[>]->(HERE) + Transitions : + 0:[\t ] -> 7 + +DFA state 20 + Forward route : (from state 10) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->(HERE) + Transitions : + 6:[\055] -> 26 + 7:[.] -> 26 + 8:[0-9] -> 26 + 9:[:] -> 4 + 13:[A-Z] -> 26 + 14:[[] -> 27 + 16:[_] -> 26 + 17:[a-z] -> 26 + +DFA state 21 + Forward route : (from state 11) + (START)->10:[<]->4:["]->4:["]->(HERE) + Transitions : + 3:[!#-'*/=?^`{-~] -> 21 + 4:["] -> 11 + 5:[+] -> 21 + 6:[\055] -> 21 + 7:[.] -> 21 + 8:[0-9] -> 21 + 11:[>] -> 19 + 12:[@] -> 28 + 13:[A-Z] -> 21 + 16:[_] -> 21 + 17:[a-z] -> 21 + +DFA state 22 + Forward route : (from state 14) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE) + Transitions : + 0:[\t ] -> 29 + 3:[!#-'*/=?^`{-~] -> 2 + 4:["] -> 3 + 5:[+] -> 2 + 6:[\055] -> 2 + 7:[.] -> 2 + 8:[0-9] -> 2 + 12:[@] -> 8 + 13:[A-Z] -> 2 + 16:[_] -> 2 + 17:[a-z] -> 2 + Use state 0 as basis (3 fixups) + +DFA state 23 + Forward route : (from state 15) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE) + Transitions : + 17:[a-z] -> 30 + +DFA state 24 + Forward route : (from state 16) + (START)->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->6:[\055]->(HERE) + Transitions : + 0:[\t ] -> 7 + 6:[\055] -> 24 + 7:[.] -> 24 + 8:[0-9] -> 24 + 9:[:] -> 13 + 13:[A-Z] -> 24 + 16:[_] -> 24 + 17:[a-z] -> 24 + Use state 16 as basis (1 fixups) + +DFA state 25 + Forward route : (from state 18) + (START)->4:["]->4:["]->12:[@]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 31 + 7:[.] -> 31 + 8:[0-9] -> 31 + 13:[A-Z] -> 31 + 16:[_] -> 31 + 17:[a-z] -> 31 + +DFA state 26 + Forward route : (from state 20) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 32 + 7:[.] -> 32 + 8:[0-9] -> 32 + 9:[:] -> 4 + 13:[A-Z] -> 32 + 16:[_] -> 32 + 17:[a-z] -> 32 + +DFA state 27 + Forward route : (from state 20) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->(HERE) + Transitions : + 7:[.] -> 27 + 8:[0-9] -> 27 + 15:[]] -> 33 + +DFA state 28 + Forward route : (from state 21) + (START)->10:[<]->4:["]->4:["]->12:[@]->(HERE) + Transitions : + 6:[\055] -> 34 + 7:[.] -> 34 + 8:[0-9] -> 34 + 13:[A-Z] -> 34 + 14:[[] -> 27 + 16:[_] -> 34 + 17:[a-z] -> 34 + +DFA state 29 + Forward route : (from state 22) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 29 + 13:[A-Z] -> 35 + +DFA state 30 + Forward route : (from state 23) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE) + Transitions : + 0:[\t ] -> 36 + +DFA state 31 + Forward route : (from state 25) + (START)->4:["]->4:["]->12:[@]->6:[\055]->6:[\055]->(HERE) + Transitions : + 0:[\t ] -> 7 + 6:[\055] -> 31 + 7:[.] -> 31 + 8:[0-9] -> 31 + 13:[A-Z] -> 31 + 16:[_] -> 31 + 17:[a-z] -> 31 + Use state 25 as basis (1 fixups) + +DFA state 32 + Forward route : (from state 26) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->6:[\055]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 32 + 7:[.] -> 32 + 8:[0-9] -> 32 + 9:[:] -> 4 + 11:[>] -> 19 + 13:[A-Z] -> 32 + 16:[_] -> 32 + 17:[a-z] -> 32 + Use state 26 as basis (1 fixups) + +DFA state 33 + Forward route : (from state 27) + (START)->10:[<]->3:[!#-'*/=?^`{-~]->12:[@]->14:[[]->15:[]]->(HERE) + Transitions : + 11:[>] -> 19 + +DFA state 34 + Forward route : (from state 28) + (START)->10:[<]->4:["]->4:["]->12:[@]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 37 + 7:[.] -> 37 + 8:[0-9] -> 37 + 13:[A-Z] -> 37 + 16:[_] -> 37 + 17:[a-z] -> 37 + +DFA state 35 + Forward route : (from state 29) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 17:[a-z] -> 38 + +DFA state 36 + Forward route : (from state 30) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 36 + 13:[A-Z] -> 39 + +DFA state 37 + Forward route : (from state 34) + (START)->10:[<]->4:["]->4:["]->12:[@]->6:[\055]->6:[\055]->(HERE) + Transitions : + 6:[\055] -> 37 + 7:[.] -> 37 + 8:[0-9] -> 37 + 11:[>] -> 19 + 13:[A-Z] -> 37 + 16:[_] -> 37 + 17:[a-z] -> 37 + Use state 34 as basis (1 fixups) + +DFA state 38 + Forward route : (from state 35) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE) + Transitions : + 17:[a-z] -> 40 + +DFA state 39 + Forward route : (from state 36) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 17:[a-z] -> 41 + +DFA state 40 + Forward route : (from state 38) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE) + Transitions : + 0:[\t ] -> 42 + +DFA state 41 + Forward route : (from state 39) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->(HERE) + Transitions : + 17:[a-z] -> 43 + +DFA state 42 + Forward route : (from state 40) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 42 + 8:[0-9] -> 44 + 13:[A-Z] -> 39 + +DFA state 43 + Forward route : (from state 41) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->(HERE) + Transitions : + 0:[\t ] -> 45 + +DFA state 44 + Forward route : (from state 42) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 46 + 8:[0-9] -> 47 + +DFA state 45 + Forward route : (from state 43) + (START)->3:[!#-'*/=?^`{-~]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 45 + 8:[0-9] -> 44 + +DFA state 46 + Forward route : (from state 44) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 46 + 8:[0-9] -> 48 + +DFA state 47 + Forward route : (from state 44) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 46 + +DFA state 48 + Forward route : (from state 46) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 49 + +DFA state 49 + Forward route : (from state 48) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 9:[:] -> 50 + +DFA state 50 + Forward route : (from state 49) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->(HERE) + Transitions : + 8:[0-9] -> 51 + +DFA state 51 + Forward route : (from state 50) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 52 + +DFA state 52 + Forward route : (from state 51) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 53 + 9:[:] -> 54 + +DFA state 53 + Forward route : (from state 52) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 53 + 5:[+] -> 55 + 6:[\055] -> 55 + 8:[0-9] -> 56 + 13:[A-Z] -> 57 + +DFA state 54 + Forward route : (from state 52) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->(HERE) + Transitions : + 8:[0-9] -> 58 + +DFA state 55 + Forward route : (from state 53) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->5:[+]->(HERE) + Transitions : + 8:[0-9] -> 57 + 13:[A-Z] -> 57 + 17:[a-z] -> 57 + +DFA state 56 + Forward route : (from state 53) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 59 + +DFA state 57 + Forward route : (from state 53) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 0:[\t ] -> 60 + 8:[0-9] -> 57 + 13:[A-Z] -> 57 + 17:[a-z] -> 57 + Use state 55 as basis (1 fixups) + +DFA state 58 + Forward route : (from state 54) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 61 + +DFA state 59 + Forward route : (from state 56) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 62 + +DFA state 60 + Forward route : (from state 57) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 60 + 5:[+] -> 63 + 6:[\055] -> 63 + 8:[0-9] -> 64 + 13:[A-Z] -> 65 + +DFA state 61 + Forward route : (from state 58) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 53 + +DFA state 62 + Forward route : (from state 59) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 66 + +DFA state 63 + Forward route : (from state 60) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->5:[+]->(HERE) + Transitions : + 8:[0-9] -> 65 + 13:[A-Z] -> 65 + 17:[a-z] -> 65 + +DFA state 64 + Forward route : (from state 60) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 67 + +DFA state 65 + Forward route : (from state 60) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 0:[\t ] -> 68 + 8:[0-9] -> 65 + 13:[A-Z] -> 65 + 17:[a-z] -> 65 + Use state 63 as basis (1 fixups) + +DFA state 66 + Forward route : (from state 62) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 66 + 1:[\n] -> 69 + 2:[\r] -> 70 + 5:[+] -> 71 + 6:[\055] -> 71 + 13:[A-Z] -> 72 + +DFA state 67 + Forward route : (from state 64) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 73 + +DFA state 68 + Forward route : (from state 65) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->13:[A-Z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 68 + 8:[0-9] -> 64 + +DFA state 69 + Forward route : (from state 66) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->1:[\n]->(HERE) + Transitions : + NFA exit tags applying : + FROMCHECK_PASS + Attributes for <(DEFAULT)> : FROMCHECK_PASS + +DFA state 70 + Forward route : (from state 66) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->2:[\r]->(HERE) + Transitions : + 1:[\n] -> 69 + +DFA state 71 + Forward route : (from state 66) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->5:[+]->(HERE) + Transitions : + 8:[0-9] -> 72 + 13:[A-Z] -> 72 + 17:[a-z] -> 72 + +DFA state 72 + Forward route : (from state 66) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->(HERE) + Transitions : + 0:[\t ] -> 74 + 1:[\n] -> 69 + 2:[\r] -> 70 + 8:[0-9] -> 72 + 13:[A-Z] -> 72 + 17:[a-z] -> 72 + Use state 71 as basis (3 fixups) + +DFA state 73 + Forward route : (from state 67) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 8:[0-9] -> 75 + +DFA state 74 + Forward route : (from state 72) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 74 + 1:[\n] -> 69 + 2:[\r] -> 70 + 5:[+] -> 76 + 6:[\055] -> 76 + 13:[A-Z] -> 77 + +DFA state 75 + Forward route : (from state 73) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->13:[A-Z]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 75 + 1:[\n] -> 69 + 2:[\r] -> 70 + +DFA state 76 + Forward route : (from state 74) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->5:[+]->(HERE) + Transitions : + 8:[0-9] -> 77 + 13:[A-Z] -> 77 + 17:[a-z] -> 77 + +DFA state 77 + Forward route : (from state 74) + (START)->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->13:[A-Z]->17:[a-z]->17:[a-z]->0:[\t ]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->9:[:]->8:[0-9]->8:[0-9]->0:[\t ]->8:[0-9]->8:[0-9]->8:[0-9]->8:[0-9]->13:[A-Z]->0:[\t ]->13:[A-Z]->(HERE) + Transitions : + 0:[\t ] -> 75 + 1:[\n] -> 69 + 2:[\r] -> 70 + 8:[0-9] -> 77 + 13:[A-Z] -> 77 + 17:[a-z] -> 77 + Use state 75 as basis (3 fixups) + + +Entry states in DFA: +Entry <(ONLY ENTRY)> : 0 diff --git a/src/mairix/glob.c b/src/mairix/glob.c @@ -0,0 +1,393 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2003,2004,2005 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include <ctype.h> +#include "mairix.h" + + +struct globber { + unsigned int pat[256]; + unsigned int starpat; + unsigned int twostarpat; + unsigned int hit; +}; + +struct globber_array { + int n; + struct globber **globs; +}; + +static const char *parse_charclass(const char *in, struct globber *result, unsigned int mask)/*{{{*/ +{ + int first = 1; + int prev = -1; + in++; /* Advance over '[' */ + while (*in) { + if (*in == ']') { + if (first) { + result->pat[(int)']'] |= mask; + } else { + return in; + } + } else if (*in == '-') { + /* Maybe range */ + if ((prev < 0) || !in[1] || (in[1]==']')) { + /* - at either end of string (or right after an earlier range) means + * normal - */ + result->pat['-'] |= mask; + } else { + int next = in[1]; + int hi, lo; + int i; + /* Cope with range being inverted */ + if (prev < next) { + lo = prev, hi = next; + } else { + lo = next, hi = prev; + } + for (i=lo; i<=hi; i++) { + int index = 0xff & i; + result->pat[index] |= mask; + } + /* require 1 extra increment */ + in++; + prev = -1; /* Avoid junk like [a-e-z] */ + } + } else { + int index = 0xff & (int)*in; + result->pat[index] |= mask; + } + prev = *in; + first = 0; + in++; + } + return in; +} +/*}}}*/ + +struct globber *make_globber(const char *wildstring)/*{{{*/ +{ + struct globber *result; + int n, i; + const char *p; + char c; + int index; + unsigned int mask; + + result = new(struct globber); + memset(&result->pat, 0x00, 256*sizeof(unsigned int)); + memset(&result->starpat, 0x00, sizeof(unsigned int)); + memset(&result->twostarpat, 0x00, sizeof(unsigned int)); + mask = 0x1; + + n = 0; + for (p=wildstring; *p; p++) { + mask = 1<<n; + c = *p; + switch (c) { + case '*':/*{{{*/ + if (p[1] == '*') { + result->twostarpat |= mask; + p++; + } else { + /* Match zero or more of anything */ + result->starpat |= mask; + } + break; +/*}}}*/ + case '[':/*{{{*/ + p = parse_charclass(p, result, mask); + n++; + break; +/*}}}*/ + case '?':/*{{{*/ + for (i=0; i<256; i++) { + result->pat[i] |= mask; + } + n++; + break; +/*}}}*/ + default:/*{{{*/ + index = 0xff & (int)c; + result->pat[index] |= mask; + n++; + break; +/*}}}*/ + } + } + + result->hit = (1<<n); + return result; + +} +/*}}}*/ +void free_globber(struct globber *old)/*{{{*/ +{ + free(old); +} +/*}}}*/ + +#define DODEBUG 0 + +int is_glob_match(struct globber *g, const char *s)/*{{{*/ +{ + unsigned int reg; + unsigned int stars; + unsigned int twostars; + unsigned int stars2; + int index; + + reg = 0x1; + while (*s) { + index = 0xff & (int) *s; +#if DODEBUG + printf("*s=%c index=%02x old_reg=%08lx pat=%08lx //", + *s, index, reg, g->pat[index]); +#endif + stars = (reg & g->starpat); + twostars = (reg & g->twostarpat); + if (index != '/') { + stars2 = stars | twostars; + } else { + stars2 = twostars; + } + reg &= g->pat[index]; + reg <<= 1; + reg |= stars2; +#if DODEBUG + printf(" new_reg=%08lx ", reg); + printf("starpat=%08lx stars=%08lx stars2=%08lx\n", g->starpat, stars, stars2); +#endif + s++; + } + +#if DODEBUG + printf("reg=%08lx hit=%08lx\n", reg, g->hit); +#endif + reg &= g->hit; + if (reg) { + return 1; + } else { + return 0; + } +} +/*}}}*/ + +struct globber_array *colon_sep_string_to_globber_array(const char *in)/*{{{*/ +{ + char **strings; + int n_strings; + int i; + struct globber_array *result; + + split_on_colons(in, &n_strings, &strings); + result = new(struct globber_array); + result->n = n_strings; + result->globs = new_array(struct globber *, n_strings); + for (i=0; i<n_strings; i++) { + result->globs[i] = make_globber(strings[i]); + free(strings[i]); + } + free(strings); + return result; +} +/*}}}*/ +int is_globber_array_match(struct globber_array *ga, const char *s)/*{{{*/ +{ + int i; + if (!ga) return 0; + for (i=0; i<ga->n; i++) { + if (is_glob_match(ga->globs[i], s)) return 1; + } + return 0; +} +/*}}}*/ +void free_globber_array(struct globber_array *in)/*{{{*/ +{ + int i; + for (i=0; i<in->n; i++) { + free_globber(in->globs[i]); + } + free(in); +} +/*}}}*/ + +static char *copy_folder_name(const char *start, const char *end)/*{{{*/ +{ + /* 'start' points to start of string to copy. + Any '\:' sequence is replaced by ':' . + Otherwise \ is treated normally. + 'end' can be 1 beyond the end of the string to copy. Otherwise it can be + null, meaning treat 'start' as the start of a normal null-terminated + string. */ + char *p; + const char *q; + int len; + char *result; + if (end) { + len = end - start; + } else { + len = strlen(start); + } + result = new_array(char, len + 1); + for (p=result, q=start; + end ? (q < end) : *q; + q++) { + if ((q[0] == '\\') && (q[1] == ':')) { + /* Escaped colon : drop the backslash */ + } else { + *p++ = *q; + } + } + *p = '\0'; + return result; +} +/*}}}*/ +void string_list_to_array(struct string_list *list, int *n, char ***arr)/*{{{*/ +{ + int N, i; + struct string_list *a, *next_a; + char **result; + for (N=0, a=list->next; a!=list; a=a->next, N++) ; + + result = new_array(char *, N); + for (i=0, a=list->next; i<N; a=next_a, i++) { + result[i] = a->data; + next_a = a->next; + free(a); + } + + *n = N; + *arr = result; +} +/*}}}*/ +void split_on_colons(const char *str, int *n, char ***arr)/*{{{*/ +{ + struct string_list list, *new_cell; + const char *left_to_do; + + list.next = list.prev = &list; + left_to_do = str; + do { + char *colon; + char *xx; + + colon = strchr(left_to_do, ':'); + /* Allow backslash-escaped colons in filenames */ + if (colon && (colon > left_to_do) && (colon[-1]=='\\')) { + int is_escaped; + do { + colon = strchr(colon + 1, ':'); + is_escaped = (colon && (colon[-1] == '\\')); + } while (colon && is_escaped); + } + /* 'colon' now points to the first non-escaped colon or is null if there + were no more such colons in the rest of the line. */ + + xx = copy_folder_name(left_to_do, colon); + if (colon) { + left_to_do = colon + 1; + } else { + while (*left_to_do) ++left_to_do; + } + + new_cell = new(struct string_list); + new_cell->data = xx; + new_cell->next = &list; + new_cell->prev = list.prev; + list.prev->next = new_cell; + list.prev = new_cell; + } while (*left_to_do); + + string_list_to_array(&list, n, arr); + +} +/*}}}*/ + +#if defined (TEST) +void run1(char *ref, char *s, int expected)/*{{{*/ +{ + struct globber *g; + int result; + g = make_globber(ref); + result = is_glob_match(g, s); + + printf("ref=%s, str=%s, %s %s\n", ref, s, result ? "MATCHED" : "not matched", (expected==result) ? "" : "??????"); + free_globber(g); +} +/*}}}*/ +int main (int argc, char **argv)/*{{{*/ +{ + + run1("ab?de", "abdde", 1); + run1("ab?de", "abcde", 1); + run1("ab?de", "Abcde", 0); + run1("ab?de", "abcd", 0); + run1("ab?de", "abc", 0); + run1("ab[cd]de", "abdde", 1); + run1("ab[cd]de", "abbde", 0); + run1("ab[cd]de", "abcde", 1); + run1("ab*de", "ade", 0); + run1("ab*de", "abde", 1); + run1("ab*de", "abcde", 1); + run1("ab*de", "abccde", 1); + run1("ab*de", "abccdfde", 1); + run1("ab*de", "abccdedf", 0); + run1("ab[b-d]de", "abade",0); + run1("ab[b-d]de", "abcDe",0); + run1("ab[b-d]de", "abcde",1); + run1("ab[b-d]de", "abdde",1); + run1("ab[b-d]de", "abEde", 0); + run1("[a-z][0-9A-F][]a-f-]", "yE]", 1); + run1("[a-z][0-9A-F][]a-f-]", "uE[", 0); + run1("[a-z][0-9A-F][]a-f-]", "vG-", 0); + run1("[a-z][0-9A-F][]a-f-]", "w8-", 1); + run1("*", "a", 1); + run1("*", "", 1); + run1("a*", "a", 1); + run1("a*", "aa", 1); + run1("a*", "aaA", 1); + run1("*a", "aaa", 1); + run1("*a", "a", 1); + run1("x*abc", "xabdxabc", 1); + run1("*", "", 1); + run1("a*", "", 0); + run1("*a", "", 0); + run1("a", "", 0); + + run1("*abc*", "x/abc/y", 0); + run1("**abc**", "x/abc/y", 1); + run1("x/*/abc**", "x/z/abc/y", 1); + run1("x/*/abc**", "x/z/w/abc/y", 0); + run1("x/*/abc**", "x/zz/w/abc/y", 0); + run1("x/*/abc**", "x/z/ww/abc/y", 0); + run1("x/**/abc**", "x/z/w/abc/y", 1); + run1("x/**/abc**", "x/zz/w/abc/y", 1); + + return 0; +} +/*}}}*/ +#endif + diff --git a/src/mairix/hash.c b/src/mairix/hash.c @@ -0,0 +1,143 @@ +/* Hash function */ + +#include "mairix.h" + +/* +-------------------------------------------------------------------- +lookup2.c, by Bob Jenkins, December 1996, Public Domain. +hash(), hash2(), hash3, and mix() are externally useful functions. +Routines to test the hash are included if SELF_TEST is defined. +You can use this free for any purpose. It has no warranty. +-------------------------------------------------------------------- +*/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> + +#define hashsize(n) ((unsigned int)1<<(n)) +#define hashmask(n) (hashsize(n)-1) + +/* +-------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. +For every delta with one or two bit set, and the deltas of all three + high bits or all three low bits, whether the original value of a,b,c + is almost all zero or is uniformly distributed, +* If mix() is run forward or backward, at least 32 bits in a,b,c + have at least 1/4 probability of changing. +* If mix() is run forward, every bit of c will change between 1/3 and + 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) +mix() was built out of 36 single-cycle latency instructions in a + structure that could supported 2x parallelism, like so: + a -= b; + a -= c; x = (c>>13); + b -= c; a ^= x; + b -= a; x = (a<<8); + c -= a; b ^= x; + c -= b; x = (b>>13); + ... + Unfortunately, superscalar Pentiums and Sparcs can't take advantage + of that parallelism. They've also turned some of those single-cycle + latency instructions into multi-cycle latency instructions. Still, + this is the fastest good hash I could find. There were about 2^^68 + to choose from. I only looked at a billion or so. +-------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* same, but slower, works on systems that might have 8 byte ub4's */ +#define mix2(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<< 8); \ + c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \ + a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \ + b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \ + c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \ + a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \ + b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \ + c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \ +} + +/* +-------------------------------------------------------------------- +hash() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + len : the length of the key, counting by bytes + level : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Every 1-bit and 2-bit delta achieves avalanche. +About 36+6len instructions. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (ub1 **)k, do it like this: + for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); + +By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this +code any way you wish, private, educational, or commercial. It's free. + +See http://burlteburtle.net/bob/hash/evahash.html +Use for hash table lookup, or anything where one collision in 2^32 is +acceptable. Do NOT use for cryptographic purposes. +-------------------------------------------------------------------- +*/ + +unsigned int hashfn( unsigned char *k, unsigned int length, unsigned int initval) +{ + register unsigned int a,b,c,len; + + /* Set up the internal state */ + len = length; + a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + c = initval; /* the previous hash value */ + + /*---------------------------------------- handle most of the key */ + while (len >= 12) + { + a += (k[0] +((unsigned int)k[1]<<8) +((unsigned int)k[2]<<16) +((unsigned int)k[3]<<24)); + b += (k[4] +((unsigned int)k[5]<<8) +((unsigned int)k[6]<<16) +((unsigned int)k[7]<<24)); + c += (k[8] +((unsigned int)k[9]<<8) +((unsigned int)k[10]<<16)+((unsigned int)k[11]<<24)); + mix(a,b,c); + k += 12; len -= 12; + } + + /*------------------------------------- handle the last 11 bytes */ + c += length; + switch(len) /* all the case statements fall through */ + { + case 11: c+=((unsigned int)k[10]<<24); + case 10: c+=((unsigned int)k[9]<<16); + case 9 : c+=((unsigned int)k[8]<<8); + /* the first byte of c is reserved for the length */ + case 8 : b+=((unsigned int)k[7]<<24); + case 7 : b+=((unsigned int)k[6]<<16); + case 6 : b+=((unsigned int)k[5]<<8); + case 5 : b+=k[4]; + case 4 : a+=((unsigned int)k[3]<<24); + case 3 : a+=((unsigned int)k[2]<<16); + case 2 : a+=((unsigned int)k[1]<<8); + case 1 : a+=k[0]; + /* case 0: nothing left to add */ + } + mix(a,b,c); + /*-------------------------------------------- report the result */ + return c; +} + + diff --git a/src/mairix/mairix.1 b/src/mairix/mairix.1 @@ -0,0 +1,673 @@ +.TH MAIRIX 1 "January 2006" +.de Sx +.PP +.ne \\$1 +.nf +.na +.RS 7 +.. +.de Ex +.RE +.fi +.ad +.PP +.. +.de Sy +.PP +.ne \\$1 +.nf +.na +.RS 12 +.. +.de Ey +.RE +.fi +.ad +.IP "" 7 +.. +.SH NAME +mairix \- index and search mail folders +.SH SYNOPSIS +.SS Indexing +.B mairix +[ +.BR \-v | \-\-verbose +] [ +.BR \-p | \-\-purge +] [ +.BR \-f | \-\-rcfile +.I mairixrc +] [ +.BR \-F | \-\-fast-index +] [ +.BR \-\-force-hash-key-new-database +.I hash +] + +.SS Searching +.B mairix +[ +.BR \-v | \-\-verbose +] [ +.BR \-f | \-\-rcfile +.I mairixrc +] [ +.BR \-r | \-\-raw-output +] [ +.BR \-x | \-\-excerpt-output +] [ +.BR \-H | \-\-force-hardlinks +] [ +.BR \-o | \-\-mfolder +.I mfolder +] [ +.BR \-a | \-\-augment +] [ +.BR \-t | \-\-threads +] +.I search-patterns + +.SS Other +.B mairix +[ +.BR \-h | \-\-help +] + +.B mairix +[ +.BR \-V | \-\-version +] + +.B mairix +[ +.BR \-d | \-\-dump +] + +.SH DESCRIPTION +.I mairix +indexes and searches a collection of email messages. The folders containing +the messages for indexing are defined in the configuration file. The indexing +stage produces a database file. The database file provides rapid access to +details of the indexed messages during searching operations. A search normally +produces a folder (so-called +.BR mfolder ) +containing the matched messages. However, a raw mode +.RB ( \-r ) +exists which just lists the matched messages instead. +.PP +It can operate with the following folder types +.IP * +maildir +.IP * +MH (compatible with the MH folder formats used by xmh, sylpheed, claws-mail, nnml (Gnus) and evolution) +.IP * +mbox (including mboxes that have been compressed with gzip or bzip2) +.PP +If maildir or MH source folders are used, and a search outputs its matches to +an mfolder in maildir or MH format, symbolic links are used to reference the +original messages inside the mfolder. However, if mbox folders are involved, +copies of messages are made instead. + +.SH OPTIONS + +.B mairix +decides whether indexing or searching is required by looking for the presence of any +.I search-patterns +on the command line. + +.SS Special modes +.TP +.B -h, --help +.br +Show usage summary and exit + +.TP +.B -V, --version +Show program version and exit + +.TP +.B -d +.br +Dump the database's contents in human-readable form to stdout. + +.SS General options +.TP +.BI "-f " mairixrc +.br +.ns +.TP +.BI "--rcfile " mairixrc +.br +Specify an alternative configuration file to use. The default configuration file is +.IR ~/.mairixrc . + +.TP +.B -v, --verbose +.br +Make the output more verbose + +.TP +.B -Q, --no-integrity-checks +.br +Normally +.I mairix +will do some internal integrity tests on the database. The +.B -Q +option removes these checks, making +.I mairix +run faster, but it will be less likely to detect internal problems if any bugs creep in. + +The +.I nochecks +directive in the rc file has the same effect. + +.TP +.B \-\-unlock +.br +.I mairix +locks its database file during any indexing or searching operation to prevent +multiple indexing runs interfering with each other, or an indexing run +interfering with search runs. The +.B --unlock +option removes the lockfile before doing the requested indexing or searching +operation. This is a convenient way of cleaning up a stale lockfile if an +earlier run crashed for some reason or was aborted. + +.SS Indexing options + +.TP +.B -p, --purge +.br +Cause stale (dead) messages to be purged from the database during an indexing +run. (Normally, stale messages are left in the database because of the +additional cost of compacting away the storage that they take up.) + +.TP +.B -F, --fast-index +.br +When processing maildir and MH folders, +.I mairix +normally compares the mtime and size of each message against the values stored +in the database. If they have changed, the message will be rescanned. This +check requires each message file to be stat'ed. For large numbers of messages +in these folder types, this can be a sizeable overhead. + +This option tells +.I mairix +to assume that when a message currently on-disc has a name matching one already +in the database, it should assume the message is unchanged. + +A later indexing run without using this option will fix up any rescans that +were missed due to its use. + +.TP +.BI "--force-hash-key-new-database " hash +.br +This option should only be used for debugging. +.br +If a new database is created, +.I hash +is used as hash key, instead of a random hash. + +.SS Search options +.TP +.B -a, --augment +.br +Append newly matches messages to the current mfolder instead of creating the +mfolder from scratch. + +.TP +.B -t, --threads +.br +As well as returning the matched messages, also return every message in the +same thread as one of the real matches. + +.TP +.B -r, --raw-output +.br +Instead of creating an mfolder containing the matched messages, just show their +paths on stdout. + +.TP +.B -x, --excerpt-output +.br +Instead of creating an mfolder containing the matched messages, display an +excerpt from their headers on stdout. The excerpt shows To, Cc, From, Subject +and Date. + +.TP +.B -H, --force-hardlinks +.br +Instead of creating symbolic links, force the use of hardlinks. This helps +mailers such as alpine to realize that there are new mails in the search +folder. + +.TP +.BI "-o " mfolder +.br +.ns +.TP +.BI "--mfolder " mfolder +.br +Specify a temporary alternative path for the mfolder to use, overriding the +.I mfolder +directive in the rc file. + +.B mairix +will refuse to output search results into any folder that appears to be amongst +those that are indexed. This is to prevent accidental deletion of emails. + +.SS Search patterns +.TP +.BI t: word +.br +Match +.I word +in the To: header. + +.TP +.BI c: word +.br +Match +.I word +in the Cc: header. + +.TP +.BI f: word +.br +Match +.I word +in the From: header. + +.TP +.BI s: word +.br +Match +.I word +in the Subject: header. + +.TP +.BI m: word +.br +Match +.I word +in the Message-ID: header. + +.TP +.BI b: word +.br +Match +.I word +in the message body. + +.B Message body +is taken to mean any body part of type text/plain or text/html. For text/html, +text within meta tags is ignored. In particular, the URLs inside <A +HREF="..."> tags are not currently indexed. Non-text attachments are ignored. +If there's an attachment of type message/rfc822, this is parsed and the match +is performed on this sub-message too. If a hit occurs, the enclosing message +is treated as having a hit. + +.TP +.BI d: "[start-datespec]" - "[end-datespec]" +.br +Match messages with Date: headers lying in the specific range. + +.TP +.BI z: "[low-size]" - "[high-size]" +.br +Match messages whose size lies in the specified range. If the +.I low-size +argument is omitted it defaults to zero. If the +.I high-size +argument is omitted it defaults to infinite size. + +For example, to match messages between 10kilobytes and 20kilobytes in size, the +following search term can be used: +.Sy 1 +mairix z:10k-20k +.Ey + +The suffix 'k' on a number means multiply by 1024, and the suffix 'M' on a +number means multiply by 1024*1024. + +.TP +.BI n: word +.br +Match +.I word +occurring as the name of an attachment in the message. Since attachment names +are usually long, this option would usually be used in the substring form. So +.Sy 1 +mairix n:mairix= +.Ey + +would match all messages which have attachments whose names contain the +substring +.IR mairix . + +The attachment name is determined from the name=xxx or filename=xxx qualifiers +on the Content-Type: and Content-Disposition: headers respectively. + +.TP +.BI F: flags +.br +Match messages with particular flag settings. The available flags are 's' +meaning seen, 'r' meaning replied, and 'f' meaning flagged. The flags are +case-insensitive. A flag letter may be prefixed by a '-' to negate its sense. Thus + +.Sy 1 +mairix F:-s d:1w- +.Ey + +would match any unread message less than a week old, and + +.Sy 1 +mairix F:f-r d:-1m +.Ey + +would match any flagged message older than a month which you haven't replied to yet. + +Note that the flag characters and their meanings agree with those used as the +suffix letters on message filenames in maildir folders. + +.SS Searching for a match amongst more than one part of a message +.PP +Multiple body parts may be grouped together, if a match in any of them is +sought. Common examples follow. + +.TP +.BI tc: word +.br +Match +.I word +in either the To: or Cc: headers (or both). + +.TP +.BI bs: word +.br +Match +.I word +in either the Subject: header or the message body (or both). + +.PP +The +.B a: +search pattern is an abbreviation for +.BR tcf: ; +i.e. match the word in the To:, Cc: or From: headers. ("a" stands for +"address" in this case.) + +.SS Match words +The +.I word +argument to the search strings can take various forms. + +.TP +.I ~word +.br +Match messages +.B not +containing the word. + +.TP +.I word1,word2 +.br +This matches if both the words are matched in the specified message part. + +.TP +.I word1/word2 +.br +This matches if either of the words are matched in the specified message part. + +.TP +.I substring= +.br +Match any word containing +.I substring +as a substring + +.TP +.I substring=N +.br +Match any word containing +.IR substring , +allowing up to +.I N +errors in the match. For example, if +.I N +is 1, a single error is allowed, where an error can be +.IP * +a missing letter +.IP * +an extra letter +.IP * +a different letter. + +.TP +.I ^substring= +.br +Match any word containing +.I substring +as a substring, with the requirement that +.I substring +occurs at the beginning of the matched word. + +.SS Precedence matters + +The binding order of the constructions is: + +.IP "1." +Individual command line arguments define separate conditions which are AND-ed +together + +.IP "2." +Within a single argument, the letters before the colon define which message +parts the expression applies to. If there is no colon, the expression applies +to all the headers listed earlier and the body. + +.IP "3." +After the colon, commas delineate separate disjuncts, which are +OR-ed together. + +.IP "4." +Each disjunct may contain separate conjuncts, which are separated +by plus signs. These conditions are AND-ed together. + +.IP "5." +Each conjunct may start with a tilde to negate it, and may be +followed by a slash to indicate a substring match, optionally +followed by an integer to define the maximum number of errors +allowed. + +.SS Date specification +.PP +This section describes the syntax used for specifying dates when +searching using the `d:' option. + +Dates are specified as a range. The start and end of the range can both be +specified. Alternatively, if the start is omitted, it is treated as being the +beginning of time. If the end is omitted, it is treated as the current time. + +There are 4 basic formats: +.TP +.BI d: start-end +.br +Specify both start and end explicitly +.TP +.BI d: start- +Specify start, end is the current time +.TP +.BI d: -end +Specify end, start is 'a long time ago' (i.e. early enough to include any +message). +.TP +.BI d: period +Specify start and end implicitly, as the start and end of the +period given. + +.PP +The start and end can be specified either absolute or relative. A relative +endpoint is given as a number followed by a single letter defining the scaling: + +.TS +box tab(&); +lb | lb | lb | lb. +letter & short for & example & meaning += +.T& +l | l | l | l. +d & days & 3d & 3 days +w & weeks & 2w & 2 weeks (14 days) +m & months & 5m & 5 months (150 days) +y & years & 4y & 4 years (4*365 days) +.TE + +.PP +Months are always treated as 30 days, and years as 365 days, for +this purpose. + +Absolute times can be specified in many forms. Some forms have different +meanings when they define a start date from that when they define an end date. +Where a single expression specifies both the start and end (i.e. where the +argument to d: doesn't contain a `-'), it will usually have different +interpretations in the two cases. + +In the examples below, suppose the current date is Sunday May 18th, +2003 (when I started to write this material.) + +.TS +box tab(&); +l | l | l | l. +Example & Start date & End date & Notes += +d:20030301\-20030425 & March 1st, 2003 & 25th April, 2003 +d:030301\-030425 & March 1st, 2003 & April 25th, 2003 & century assumed +d:mar1\-apr25 & March 1st, 2003 & April 25th, 2003 +d:Mar1\-Apr25 & March 1st, 2003 & April 25th, 2003 & case insensitive +d:MAR1\-APR25 & March 1st, 2003 & April 25th, 2003 & case insensitive +d:1mar\-25apr & March 1st, 2003 & April 25th, 2003 & date and month in either order +d:2002 & January 1st, 2002 & December 31st, 2002 & whole year +d:mar & March 1st, 2003 & March 31st, 2003 & most recent March +d:oct & October 1st, 2002 & October 31st, 2002 & most recent October +d:21oct\-mar & October 21st, 2002 & March 31st, 2003 & start before end +d:21apr\-mar & April 21st, 2002 & March 31st, 2003 & start before end +d:21apr\- & April 21st, 2003 & May 18th, 2003 & end omitted +d:\-21apr & January 1st, 1900 & April 21st, 2003 & start omitted +d:6w\-2w & April 6th, 2003 & May 4th, 2003 & both dates relative +d:21apr\-1w & April 21st, 2003 & May 11th, 2003 & one date relative +d:21apr\-2y & April 21st, 2001 & May 11th, 2001 & start before end +d:99\-11 & January 1st, 1999 & May 11th, 2003 &T{ +2 digits are a day of the month if possible, otherwise a year +T} +d:99oct\-1oct & October 1st, 1999 & October 1st, 2002 &T{ +end before now, single digit is a day of the month +T} +d:99oct\-01oct & October 1st, 1999 & October 31st, 2001 &T{ +2 digits starting with zero treated as a year +T} +d:oct99\-oct1 & October 1st, 1999 & October 1st, 2002 &T{ +day and month in either order +T} +d:oct99\-oct01 & October 1st, 1999 & October 31st, 2001 &T{ +year and month in either order +T} +.TE + +.PP +The principles in the table work as follows. +.IP \(bu +When the expression defines a period of more than a day (i.e. if a month or +year is specified), the earliest day in the period is taken when the start date +is defined, and the last day in the period if the end of the range is being +defined. +.IP \(bu +The end date is always taken to be on or before the current date. +.IP \(bu +The start date is always taken to be on or before the end date. + +.SH "SETTING UP THE MATCH FOLDER" + +If the match folder does not exist when running in search mode, it is +automatically created. For 'mformat=maildir' (the default), this +should be all you need to do. If you use 'mformat=mh', you may have to +run some commands before your mailer will recognize the folder. e.g. +for mutt, you could do +.Sx 2 +mkdir -p /home/richard/Mail/mfolder +touch /home/richard/Mail/mfolder/.mh_sequences +.Ex +which seems to work. Alternatively, within mutt, you could set MBOX_TYPE to +'mh' and save a message to '+mfolder' to have mutt set up the structure for you +in advance. + +If you use Sylpheed, the best way seems to be to create the new folder from +within Sylpheed before letting mairix write into it. + +.SH EXAMPLES +.PP +Suppose my email address is <richard@doesnt.exist>. + +Either of the following will match all messages newer than 3 months from me +with the word 'chrony' in the subject line: +.Sx 2 +mairix d:3m- f:richard+doesnt+exist s:chrony +mairix d:3m- f:richard@doesnt.exist s:chrony +.Ex +Suppose I don't mind a few spurious matches on the address, I want a wider date +range, and I suspect that some messages I replied to might have had the subject +keyword spelt wrongly (let's allow up to 2 errors): +.Sx 1 +mairix d:6m- f:richard s:chrony=2 +.Ex + +.SH NOTES +.PP +.B mairix +works exclusively in terms of +.IR words . +The index that's built +in indexing mode contains a table of which words occur in which +messages. Hence, the search capability is based on finding messages +that contain particular words. +.B mairix +defines a word as any string of alphanumeric characters + underscore. Any +whitespace, punctuation, hyphens etc are treated as word boundaries. + +.B mairix +has special handling for the To:, Cc: and From: headers. +Besides the normal word scan, these headers are scanned a second time, +where the characters '@', '-' and '.' are also treated as word +characters. This allows most (if not all) email addresses to appear in +the database as single words. So if you have a mail from +wibble@foobar.zzz, it will match on both these searches + +.Sx 2 +mairix f:foobar +mairix f:wibble@foobar.zzz +.Ex +It should be clear by now that the searching cannot be used to find messages +matching general regular expressions. This has never been much of a +limitation. Most searches are for particular keywords that were in the +messages, or details of the recipients, or the approximate date. + +It's also worth pointing out that there is no 'locality' information +stored, so you can't search for messages that have one words 'close' to +some other word. For every message and every word, there is a simple +yes/no condition stored - whether the message contains the word in a +particular header or in the body. So far this has proved to be +adequate. +.B mairix +has a similar feel to using an Internet search engine. + +.SH FILES +.I ~/.mairixrc + +.SH AUTHOR +Copyright (C) 2002-2006 Richard P. Curnow <rc@rc0.org.uk> +.SH "SEE ALSO" +mairixrc(5) +.SH BUGS +.PP +We need a plugin scheme to allow more types of attachment to be scanned and indexed. + diff --git a/src/mairix/mairix.32 b/src/mairix/mairix.32 Binary files differ. diff --git a/src/mairix/mairix.64 b/src/mairix/mairix.64 Binary files differ. diff --git a/src/mairix/mairix.c b/src/mairix/mairix.c @@ -0,0 +1,774 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006,2007,2008 + * Copyright (C) Sanjoy Mahajan 2005 + * - mfolder validation code + * Copyright (C) James Cameron 2005 + * Copyright (C) Paul Fox 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "mairix.h" +#include "version.h" +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <pwd.h> +#include <unistd.h> +#include <ctype.h> +#include <locale.h> +#include <signal.h> + +#ifdef TEST_OOM +int total_bytes=0; +#endif + +int verbose = 0; +int do_hardlinks = 0; + +static char *folder_base = NULL; +static char *maildir_folders = NULL; +static char *mh_folders = NULL; +static char *mboxen = NULL; +static char *mfolder = NULL; +static char *omit = NULL; +static char *database_path = NULL; +static enum folder_type output_folder_type = FT_MAILDIR; +static int skip_integrity_checks = 0; + +enum filetype { + M_NONE, M_FILE, M_DIR, M_OTHER +}; + +static enum filetype classify_file(char *name)/*{{{*/ +{ + struct stat sb; + if (stat(name, &sb) < 0) { + return M_NONE; + } + if (S_ISREG(sb.st_mode)) { + return M_FILE; + } else if (S_ISDIR(sb.st_mode)) { + return M_DIR; + } else { + return M_OTHER; + } +} +/*}}}*/ +/*{{{ member of*/ +/* returns 1 iff COMPLETE_MFOLDER (i.e. the match folder with + folder_base prepended if needed) matches one of the FOLDERS after + expanding the wildcards and recursion. Used to make sure that the + match folder will not overwrite a valuable mail file or + directory. */ +int member_of (const char *complete_mfolder, + const char *folder_base, + const char *folders, + enum folder_type ft, + struct globber_array *omit_globs) { + char **raw_paths, **paths; + int n_raw_paths, n_paths, i; + + if (!folders) + return 0; + split_on_colons(folders, &n_raw_paths, &raw_paths); + switch (ft) { + case FT_MAILDIR: + glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &maildir_traverse_methods, omit_globs); + break; + case FT_MH: + glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &mh_traverse_methods, omit_globs); + break; + case FT_MBOX: + glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &mbox_traverse_methods, omit_globs); + break; + case FT_RAW: /* cannot happen but to keep compiler happy */ + case FT_EXCERPT: + break; + } + for (i=0; i<n_paths; i++) { + struct stat mfolder_sb, src_folder_sb; /* for checking inode numbers */ + + /* if the complete path names are the same, definitely a match */ + if (strcmp (complete_mfolder, paths[i]) == 0) + return 1; + /* also a match if they point to the same file or directory but + via different routes (e.g. absolute path for one but path with + ../.. for the other), so check inode numbers */ + /* if cannot even get stat() info, probably not wrecking any mail + files or dirs, so continue, i.e. skip inode check. */ + if (stat (complete_mfolder, &mfolder_sb) != 0 || + stat (paths[i], &src_folder_sb) != 0) + continue; + if (mfolder_sb.st_ino == src_folder_sb.st_ino) + return 1; + } + return 0; +} +/*}}}*/ +static char *copy_value(char *text)/*{{{*/ +{ + char *p; + char *result; + for (p = text; *p && (*p != '='); p++) ; + if (!*p) return NULL; + p++; + result = expand_string(p); + return result; +} +/*}}}*/ +static void add_folders(char **folders, char *extra_folders)/*{{{*/ +{ + /* note : extra_pointers is stale after this routine exits. */ + + if (!*folders) { + *folders = extra_folders; + } else { + char *old_folders = *folders; + char *new_folders; + int old_len, extra_len; + old_len = strlen(old_folders); + extra_len = strlen(extra_folders); + new_folders = new_array(char, old_len + extra_len + 2); + strcpy(new_folders, old_folders); + strcpy(new_folders + old_len, ":"); + strcpy(new_folders + old_len + 1, extra_folders); + *folders = new_folders; + free(old_folders); + } +} +/*}}}*/ +static void parse_output_folder(char *p)/*{{{*/ +{ + char *temp; + temp = copy_value(p); + if (!strncasecmp(temp, "mh", 2)) { + output_folder_type = FT_MH; + } else if (!strncasecmp(temp, "maildir", 7)) { + output_folder_type = FT_MAILDIR; + } else if (!strncasecmp(temp, "raw", 3)) { + output_folder_type = FT_RAW; + } else if (!strncasecmp(temp, "excerpt", 3)) { + output_folder_type = FT_EXCERPT; + } else if (!strncasecmp(temp, "mbox", 4)) { + output_folder_type = FT_MBOX; + } + else { + fprintf(stderr, "Unrecognized mformat <%s>\n", temp); + } + free(temp); +} +/*}}}*/ +static void parse_rc_file(char *name)/*{{{*/ +{ + FILE *in; + char line[4096], *p; + int len, lineno; + int all_blank; + int used_default_name = 0; + + if (!name) { + /* open default file */ + struct passwd *pw; + char *home; + home = getenv("HOME"); + if (!home) { + pw = getpwuid(getuid()); + if (!pw) { + fprintf(stderr, "Cannot determine home directory\n"); + exit(2); + } + home = pw->pw_dir; + } + name = new_array(char, strlen(home) + 12); + strcpy(name, home); + strcat(name, "/.mairixrc"); + used_default_name = 1; + } + + in = fopen(name, "r"); + if (!in) { + fprintf(stderr, "Cannot open %s, exiting\n", name); + exit(2); + } + + lineno = 0; + while(fgets(line, sizeof(line), in)) { + lineno++; + len = strlen(line); + if (len > sizeof(line) - 4) { + fprintf(stderr, "Line %d in %s too long, exiting\n", lineno, name); + exit(2); + } + + if (line[len-1] == '\n') { + line[len-1] = '\0'; + } + + /* Strip trailing comments. */ + for (p=line; *p && !strchr("#!;%", *p); p++) ; + if (*p) *p = '\0'; + + /* Discard blank lines */ + all_blank = 1; + for (p=line; *p; p++) { + if (!isspace(*(unsigned char *)p)) { + all_blank = 0; + break; + } + } + + if (all_blank) continue; + + /* Now a real line to parse */ + if (!strncasecmp(p, "base", 4)) folder_base = copy_value(p); + else if (!strncasecmp(p, "folders", 7)) { + fprintf(stderr, "'folders=' option in rc file is depracated, use 'maildir='\n"); + add_folders(&maildir_folders, copy_value(p)); + } + else if (!strncasecmp(p, "maildir=", 8)) add_folders(&maildir_folders, copy_value(p)); + else if (!strncasecmp(p, "mh_folders=", 11)) { + fprintf(stderr, "'mh_folders=' option in rc file is depracated, use 'mh='\n"); + add_folders(&mh_folders, copy_value(p)); + } + else if (!strncasecmp(p, "mh=", 3)) add_folders(&mh_folders, copy_value(p)); + else if (!strncasecmp(p, "mbox=", 5)) add_folders(&mboxen, copy_value(p)); + else if (!strncasecmp(p, "omit=", 5)) add_folders(&omit, copy_value(p)); + + else if (!strncasecmp(p, "mformat=", 8)) { + parse_output_folder(p); + } + else if (!strncasecmp(p, "mfolder=", 8)) mfolder = copy_value(p); + else if (!strncasecmp(p, "database=", 9)) database_path = copy_value(p); + else if (!strncasecmp(p, "nochecks", 8)) skip_integrity_checks = 1; + else { + if (verbose) { + fprintf(stderr, "Unrecognized option at line %d in %s\n", lineno, name); + } + } + } + + fclose(in); + + if (used_default_name) free(name); +} +/*}}}*/ +static int compare_strings(const void *a, const void *b)/*{{{*/ +{ + const char **aa = (const char **) a; + const char **bb = (const char **) b; + return strcmp(*aa, *bb); +} +/*}}}*/ +static int check_message_list_for_duplicates(struct msgpath_array *msgs)/*{{{*/ +{ + /* Caveat : only examines the file-per-message case */ + char **sorted_paths; + int i, n, nn; + int result; + + n = msgs->n; + sorted_paths = new_array(char *, n); + for (i=0, nn=0; i<n; i++) { + switch (msgs->type[i]) { + case MTY_MBOX: + break; + case MTY_DEAD: + assert(0); + break; + case MTY_FILE: + sorted_paths[nn++] = msgs->paths[i].src.mpf.path; + break; + } + } + qsort(sorted_paths, nn, sizeof(char *), compare_strings); + + result = 0; + for (i=1; i<nn; i++) { + if (!strcmp(sorted_paths[i-1], sorted_paths[i])) { + result = 1; + break; + } + } + + free(sorted_paths); + return result; +} +/*}}}*/ + +static void emit_int(int x)/*{{{*/ +{ + char buf1[20], buf2[20]; + char *p, *q; + int neg=0; + p = buf1; + *p = '0'; /* In case x is zero */ + if (x < 0) { + neg = 1; + x = -x; + } + while (x) { + *p++ = '0' + (x % 10); + x /= 10; + } + p--; + q = buf2; + if (neg) *q++ = '-'; + while (p >= buf1) { + *q++ = *p--; + } + write(2, buf2, q-buf2); + return; +} +/*}}}*/ +void out_of_mem(char *file, int line, size_t size)/*{{{*/ +{ + /* Hairy coding ahead - can't use any [s]printf, itoa etc because + * those might try to use the heap! */ + + int filelen; + char *p; + + static char msg1[] = "Out of memory (at "; + static char msg2[] = " bytes)\n"; + /* Perhaps even strlen is unsafe in this situation? */ + p = file; + while (*p) p++; + filelen = p - file; + write(2, msg1, sizeof(msg1)); + write(2, file, filelen); + write(2, ":", 1); + emit_int(line); + write(2, ", ", 2); + emit_int(size); + write(2, msg2, sizeof(msg2)); + exit(2); +} +/*}}}*/ +void report_error(const char *str, const char *filename)/*{{{*/ +{ + if (filename) { + int len = strlen(str) + strlen(filename) + 4; + char *t; + t = new_array(char, len); + sprintf(t, "%s '%s'", str, filename); + perror(t); + free(t); + } else { + perror(str); + } +} +/*}}}*/ +static void print_copyright(void)/*{{{*/ +{ + fprintf(stderr, + "mairix %s, Copyright (C) 2002-2010 Richard P. Curnow\n" + "mairix comes with ABSOLUTELY NO WARRANTY.\n" + "This is free software, and you are welcome to redistribute it\n" + "under certain conditions; see the GNU General Public License for details.\n\n", + PROGRAM_VERSION); +} +/*}}}*/ +static void print_version(void)/*{{{*/ +{ + fprintf(stdout, + "mairix %s\n", + PROGRAM_VERSION); +} +/*}}}*/ +static void handlesig(int signo)/*{{{*/ +{ + unlock_and_exit(7); +} +/*}}}*/ +static void usage(void)/*{{{*/ +{ + print_copyright(); + + printf("mairix [-h] : Show help\n" + "mairix [-f <rcfile>] [-v] [-p] [-F] : Build index\n" + "mairix [-f <rcfile>] [-a] [-t] expr1 ... exprN : Run search\n" + "mairix [-f <rcfile>] -d : Dump database to stdout\n" + "-h : show this help\n" + "-f <rcfile> : use alternative rc file (default ~/.mairixrc)\n" + "-V : show version\n" + "-v : be verbose\n" + "-p : purge messages that no longer exist\n" + "-F : fast scan for maildir and MH folders (no mtime or size checks)\n" + "-a : add new matches to match folder (default : clear it first)\n" + "-x : display excerpt of message headers (default : use match folder)\n" + "-t : include all messages in same threads as matching messages\n" + "-o <mfolder> : override setting of mfolder from mairixrc file\n" + "-r : force raw output regardless of mformat setting in mairixrc file\n" + "-H : force hard links rather than symbolic ones\n" + "expr_i : search expression (all expr's AND'ed together):\n" + " word : match word in message body and major headers\n" + " t:word : match word in To: header\n" + " c:word : match word in Cc: header\n" + " f:word : match word in From: header\n" + " a:word : match word in To:, Cc: or From: headers (address)\n" + " s:word : match word in Subject: header\n" + " b:word : match word in message body\n" + " m:word : match word in Message-ID: header\n" + " n:word : match name of attachment within message\n" + " F:flags : match on message flags (s=seen,r=replied,f=flagged,-=negate)\n" + " p:substring : match substring of path\n" + " d:start-end : match date range\n" + " z:low-high : match messages in size range\n" + " bs:word : match word in Subject: header or body (or any other group of prefixes)\n" + " s:word1,word2 : match both words in Subject:\n" + " s:word1/word2 : match either word or both words in Subject:\n" + " s:~word : match messages not containing word in Subject:\n" + " s:substring= : match substring in any word in Subject:\n" + " s:^substring= : match left-anchored substring in any word in Subject:\n" + " s:substring=2 : match substring with <=2 errors in any word in Subject:\n" + "\n" + " (See documentation for more examples)\n" + ); +} + /*}}}*/ +/* Notes on folder management: {{{ + + Assumption is that the user wants to keep the 'mfolder' directories under a + common root with the real maildir folders. This allows a common value for + mutt's 'folder' variable => the '+' and '=' prefixes work better. This + means the indexer here can't just scan down all subdirectories of a single + ancestor, because it'll pick up its own mfolders. So, use environment + variables to tailor the folders. + + MAIRIX_FOLDER_BASE is the common parent directory of the folders (aka + mutt's 'folder' variable) + + MAIRIX_MAILDIR_FOLDERS, MAIRIX_MH_FOLDERS, MAIRIX_MBOXEN are + colon-separated lists of folders to index, with '...' after a + component meaning any maildir underneath it. + + MAIRIX_MFOLDER is the folder to put the match data. + + For example, if + MAIRIX_FOLDER_BASE = "/home/foobar/mail" + MAIRIX_FOLDERS = "inbox:lists...:action:archive..." + MAIRIX_MFOLDER = "mf" + + then /home/foobar/mail/mf/{new,cur,tmp} contain the output of the search. + }}} */ + +int main (int argc, char **argv)/*{{{*/ +{ + struct msgpath_array *msgs; + struct database *db = NULL; + + char *arg_rc_file_path = NULL; + char *arg_mfolder = NULL; + char *e; + int do_augment = 0; + int do_threads = 0; + int do_search = 0; + int do_purge = 0; + int any_updates = 0; + int any_purges = 0; + int do_help = 0; + int do_raw_output = 0; + int do_excerpt_output = 0; + int do_dump = 0; + int do_integrity_checks = 1; + int do_forced_unlock = 0; + int do_fast_index = 0; + + unsigned int forced_hash_key = CREATE_RANDOM_DATABASE_HASH; + + struct globber_array *omit_globs; + + int result; + + setlocale(LC_CTYPE, ""); + + while (++argv, --argc) { + if (!*argv) { + break; + } else if (!strcmp(*argv, "-f") || !strcmp(*argv, "--rcfile")) { + ++argv, --argc; + if (!argc) { + fprintf(stderr, "No filename given after -f argument\n"); + exit(1); + } + arg_rc_file_path = *argv; + } else if (!strcmp(*argv, "-t") || !strcmp(*argv, "--threads")) { + do_search = 1; + do_threads = 1; + } else if (!strcmp(*argv, "-a") || !strcmp(*argv, "--augment")) { + do_search = 1; + do_augment = 1; + } else if (!strcmp(*argv, "-o") || !strcmp(*argv, "--mfolder")) { + ++argv, --argc; + if (!argc) { + fprintf(stderr, "No folder name given after -o argument\n"); + exit(1); + } + arg_mfolder = *argv; + } else if (!strcmp(*argv, "-p") || !strcmp(*argv, "--purge")) { + do_purge = 1; + } else if (!strcmp(*argv, "-d") || !strcmp(*argv, "--dump")) { + do_dump = 1; + } else if (!strcmp(*argv, "-r") || !strcmp(*argv, "--raw-output")) { + do_raw_output = 1; + } else if (!strcmp(*argv, "-x") || !strcmp(*argv, "--excerpt-output")) { + do_excerpt_output = 1; + } else if (!strcmp(*argv, "-H") || !strcmp(*argv, "--force-hardlinks")) { + do_hardlinks = 1; + } else if (!strcmp(*argv, "-Q") || !strcmp(*argv, "--no-integrity-checks")) { + do_integrity_checks = 0; + } else if (!strcmp(*argv, "--unlock")) { + do_forced_unlock = 1; + } else if (!strcmp(*argv, "-F") || + !strcmp(*argv, "--fast-index")) { + do_fast_index = 1; + } else if (!strcmp(*argv, "--force-hash-key-new-database")) { + ++argv, --argc; + if (!argc) { + fprintf(stderr, "No hash key given after --force-hash-key-new-database\n"); + exit(1); + } + if ( 1 != sscanf(*argv, "%u", &forced_hash_key) ) + { + fprintf(stderr, "Hash key given after --force-hash-key-new-database could not be parsed\n"); + exit(1); + } + } else if (!strcmp(*argv, "-v") || !strcmp(*argv, "--verbose")) { + verbose = 1; + } else if (!strcmp(*argv, "-V") || !strcmp(*argv, "--version")) { + print_version(); + exit(0); + } else if (!strcmp(*argv, "-h") || + !strcmp(*argv, "--help")) { + do_help = 1; + } else if ((*argv)[0] == '-') { + fprintf(stderr, "Unrecognized option %s\n", *argv); + } else if (!strcmp(*argv, "--")) { + /* End of args */ + break; + } else { + /* standard args start */ + break; + } + } + + if (do_help) { + usage(); + exit(0); + } + + if (verbose) { + print_copyright(); + } + + if (*argv) { + /* There are still args to process */ + do_search = 1; + } + + parse_rc_file(arg_rc_file_path); + + if (getenv("MAIRIX_FOLDER_BASE")) { + folder_base = getenv("MAIRIX_FOLDER_BASE"); + } + + if (getenv("MAIRIX_MAILDIR_FOLDERS")) { + maildir_folders = getenv("MAIRIX_MAIDIR_FOLDERS"); + } + + if (getenv("MAIRIX_MH_FOLDERS")) { + mh_folders = getenv("MAIRIX_MH_FOLDERS"); + } + + if ((e = getenv("MAIRIX_MBOXEN"))) { + mboxen = e; + } + + if (getenv("MAIRIX_MFOLDER")) { + mfolder = getenv("MAIRIX_MFOLDER"); + } + + if (getenv("MAIRIX_DATABASE")) { + database_path = getenv("MAIRIX_DATABASE"); + } + + if (arg_mfolder) { + mfolder = arg_mfolder; + } + + if (skip_integrity_checks) { + do_integrity_checks = 0; + } + + if (!folder_base) { + fprintf(stderr, "No folder_base/MAIRIX_FOLDER_BASE set\n"); + exit(2); + } + + if (!database_path) { + fprintf(stderr, "No database/MAIRIX_DATABASE set\n"); + exit(2); + } + + if (do_raw_output) { + output_folder_type = FT_RAW; + } else if (do_excerpt_output) { + output_folder_type = FT_EXCERPT; + } + + if (omit) { + omit_globs = colon_sep_string_to_globber_array(omit); + } else { + omit_globs = NULL; + } + + /* Lock database. + * Prevent concurrent updates due to parallel indexing (e.g. due to stuck + * cron jobs). + * Prevent concurrent searching and indexing. */ + + signal(SIGHUP, handlesig); + signal(SIGINT, handlesig); + signal(SIGQUIT, handlesig); + + lock_database(database_path, do_forced_unlock); + + if (do_dump) { + dump_database(database_path); + result = 0; + + } else if (do_search) { + int len; + char *complete_mfolder; + enum filetype ftype; + + if (!mfolder) { + switch (output_folder_type) { + case FT_RAW: + case FT_EXCERPT: + break; + default: + fprintf(stderr, "No mfolder/MAIRIX_MFOLDER set\n"); + unlock_and_exit(2); + } + mfolder = new_string(""); + } + + /* complete_mfolder is needed by search_top() and member_of() so + compute it once here rather than in search_top() as well */ + if ((mfolder[0] == '/') || + ((mfolder[0] == '.') && (mfolder[1] == '/'))) { + complete_mfolder = new_string(mfolder); + } else { + len = strlen(folder_base) + strlen(mfolder) + 2; + complete_mfolder = new_array(char, len); + strcpy(complete_mfolder, folder_base); + strcat(complete_mfolder, "/"); + strcat(complete_mfolder, mfolder); + } + /* check whether mfolder output would destroy a mail folder or mbox */ + switch (output_folder_type) { + case FT_RAW: + case FT_EXCERPT: + break; + default: + if ((member_of(complete_mfolder,folder_base, maildir_folders, FT_MAILDIR, omit_globs)|| + member_of (complete_mfolder, folder_base, mh_folders, FT_MH, omit_globs) || + member_of (complete_mfolder, folder_base, mboxen, FT_MBOX, omit_globs))) { + fprintf (stderr, + "You asked search results to go to the folder '%s'.\n" + "That folder appears to be one of the indexed mail folders!\n" + "For your own good, I refuse to output search results to an indexed mail folder.\n", + mfolder); + unlock_and_exit(3); + } + } + + ftype = classify_file(database_path); + if (ftype != M_FILE) { + fprintf(stderr, "No database file '%s' is present.\nYou need to do an indexing run first.\n", + database_path); + unlock_and_exit(3); + } + result = search_top(do_threads, do_augment, database_path, complete_mfolder, argv, output_folder_type, verbose); + + } else { + enum filetype ftype; + + if (!maildir_folders && !mh_folders && !mboxen) { + fprintf(stderr, "No [mh_]folders/mboxen/MAIRIX_[MH_]FOLDERS set\n"); + unlock_and_exit(2); + } + + if (verbose) printf("Finding all currently existing messages...\n"); + msgs = new_msgpath_array(); + if (maildir_folders) { + build_message_list(folder_base, maildir_folders, FT_MAILDIR, msgs, omit_globs); + } + if (mh_folders) { + build_message_list(folder_base, mh_folders, FT_MH, msgs, omit_globs); + } + + /* The next call sorts the msgs array as part of looking for duplicates. */ + if (check_message_list_for_duplicates(msgs)) { + fprintf(stderr, "Message list contains duplicates - check your 'folders' setting\n"); + unlock_and_exit(2); + } + + /* Try to open existing database */ + ftype = classify_file(database_path); + if (ftype == M_FILE) { + if (verbose) printf("Reading existing database...\n"); + db = new_database_from_file(database_path, do_integrity_checks); + if (verbose) printf("Loaded %d existing messages\n", db->n_msgs); + } else if (ftype == M_NONE) { + if (verbose) printf("Starting new database\n"); + db = new_database( forced_hash_key ); + } else { + fprintf(stderr, "database path %s is not a file; you can't put the database there\n", database_path); + unlock_and_exit(2); + } + + build_mbox_lists(db, folder_base, mboxen, omit_globs); + + any_updates = update_database(db, msgs->paths, msgs->n, do_fast_index); + if (do_purge) { + any_purges = cull_dead_messages(db, do_integrity_checks); + } + if (any_updates || any_purges) { + /* For now write it every time. This is obviously the most reliable method. */ + write_database(db, database_path, do_integrity_checks); + } + +#if 0 + get_db_stats(db); +#endif + + free_database(db); + free_msgpath_array(msgs); + + result = 0; + } + + unlock_database(); + + return result; +} +/*}}}*/ diff --git a/src/mairix/mairix.h b/src/mairix/mairix.h @@ -0,0 +1,402 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + + +#ifndef MAIRIX_H +#define MAIRIX_H + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "memmac.h" + +struct msgpath {/*{{{*/ + /* The 'selector' for this union is the corresponding entry of type 'enum + * message_type' */ + union { + struct { + char *path; + size_t size; /* size of the message in bytes */ + time_t mtime; /* mtime of message file on disc */ + } mpf; /* message per file */ + struct { + int file_index; /* index into table of mbox files */ + int msg_index; /* index of message within the file */ + } mbox; /* for messages in mbox format folders */ + } src; + + /* Now fields that are common to both types of message. */ + time_t date; /* representation of Date: header in message */ + int tid; /* thread-id */ + + /* Message flags. */ + unsigned int seen:1; + unsigned int replied:1; + unsigned int flagged:1; + + /* + other stuff eventually */ +}; +/*}}}*/ + +enum message_type {/*{{{*/ + MTY_DEAD, /* msg no longer exists, i.e. don't report in searches, + prune it on a '-p' run. */ + MTY_FILE, /* msg <-> file in 1-1 correspondence e.g. maildir, MH */ + MTY_MBOX /* multiple msgs per file : MBOX format file */ +}; +/*}}}*/ +struct msgpath_array {/*{{{*/ + enum message_type *type; + struct msgpath *paths; + int n; + int max; +}; +/*}}}*/ + +struct matches {/*{{{*/ + unsigned char *msginfo; + int n; /* bytes in use */ + int max; /* bytes allocated */ + unsigned long highest; +}; +/*}}}*/ +struct token {/*{{{*/ + char *text; + unsigned long hashval; + /* to store delta-compressed info of which msgpaths match the token */ + struct matches match0; +}; +/*}}}*/ +struct token2 {/*{{{*/ + char *text; + unsigned long hashval; + /* to store delta-compressed info of which msgpaths match the token */ + struct matches match0; + struct matches match1; +}; +/*}}}*/ +struct toktable {/*{{{*/ + struct token **tokens; + int n; /* # in use */ + int size; /* # allocated */ + unsigned int mask; /* for masking down hash values */ + int hwm; /* number to have before expanding */ +}; +/*}}}*/ +struct toktable2 {/*{{{*/ + struct token2 **tokens; + int n; /* # in use */ + int size; /* # allocated */ + unsigned int mask; /* for masking down hash values */ + int hwm; /* number to have before expanding */ +}; +/*}}}*/ + +enum content_type {/*{{{*/ + CT_TEXT_PLAIN, + CT_TEXT_HTML, + CT_TEXT_OTHER, + CT_MESSAGE_RFC822, + CT_OTHER +}; +/*}}}*/ +struct rfc822; +struct attachment {/*{{{*/ + struct attachment *next; + struct attachment *prev; + enum content_type ct; + char *filename; + union attachment_body { + struct normal_attachment_body { + int len; + char *bytes; + } normal; + struct rfc822 *rfc822; + } data; +}; +/*}}}*/ +struct headers {/*{{{*/ + char *to; + char *cc; + char *from; + char *subject; + + /* The following are needed to support threading */ + char *message_id; + char *in_reply_to; + char *references; + + struct { + unsigned int seen:1; + unsigned int replied:1; + unsigned int flagged:1; + } flags; + + time_t date; +}; +/*}}}*/ +struct rfc822 {/*{{{*/ + struct headers hdrs; + struct attachment atts; +}; +/*}}}*/ + +typedef char checksum_t[16]; + +struct message_list {/*{{{*/ + struct message_list *next; + off_t start; + size_t len; +}; +/*}}}*/ +struct mbox {/*{{{*/ + /* If path==NULL, this indicates that the mbox is dead, i.e. no longer + * exists. */ + char *path; + /* As read in from database (i.e. current last time mairix scan was run.) */ + time_t file_mtime; + size_t file_size; + /* As found in the filesystem now. */ + time_t current_mtime; + size_t current_size; + /* After reconciling a loaded database with what's on the disc, this entry + stores how many of the msgs that used to be there last time are still + present at the head of the file. Thus, all messages beyond that are + treated as dead, and scanning starts at that point to find 'new' messages + (whch may actually be old ones that have moved, but they're treated as + new.) */ + int n_old_msgs_valid; + + /* Hold list of new messages and their number. Number is temporary - + * eventually just list walking in case >=2 have to be reattached. */ + struct message_list *new_msgs; + int n_new_msgs; + + int n_so_far; /* Used during database load. */ + + int n_msgs; /* Number of entries in 'start' and 'len' */ + int max_msgs; /* Allocated size of 'start' and 'len' */ + /* File offset to the start of each message (first line of real header, not to mbox 'From ' line) */ + off_t *start; + /* Length of each message */ + size_t *len; + /* Checksums on whole messages. */ + checksum_t *check_all; + +}; +/*}}}*/ +struct database {/*{{{*/ + /* Used to hold an entire mapping between an array of filenames, each + containing a single message, and the sets of tokens that occur in various + parts of those messages */ + + enum message_type *type; + struct msgpath *msgs; /* Paths to messages */ + int n_msgs; /* Number in use */ + int max_msgs; /* Space allocated */ + + struct mbox *mboxen; + int n_mboxen; /* number in use. */ + int max_mboxen; /* space allocated */ + + /* Seed for hashing in the token tables. Randomly created for + * each new database - avoid DoS attacks through carefully + * crafted messages. */ + unsigned int hash_key; + + /* Token tables */ + struct toktable *to; + struct toktable *cc; + struct toktable *from; + struct toktable *subject; + struct toktable *body; + struct toktable *attachment_name; + + /* Encoding chain 0 stores all msgids appearing in the following message headers: + * Message-Id, In-Reply-To, References. Used for thread reconciliation. + * Encoding chain 1 stores just the Message-Id. Used for search by message ID. + */ + struct toktable2 *msg_ids; +}; +/*}}}*/ + +enum folder_type {/*{{{*/ + FT_MAILDIR, + FT_MH, + FT_MBOX, + FT_RAW, + FT_EXCERPT +}; +/*}}}*/ + +struct string_list {/*{{{*/ + struct string_list *next; + struct string_list *prev; + char *data; +}; +/*}}}*/ + +struct msg_src { + enum {MS_FILE, MS_MBOX} type; + char *filename; + off_t start; + size_t len; +}; + +/* Outcomes of checking a filename/dirname to see whether to keep on looking + * at filenames within this dir. */ +enum traverse_check { + TRAV_PROCESS, /* Continue looking at this entry */ + TRAV_IGNORE, /* Ignore just this dir entry */ + TRAV_FINISH /* Ignore this dir entry and don't bother looking at the rest of the directory */ +}; + +struct traverse_methods { + int (*filter)(const char *, const struct stat *); + enum traverse_check (*scrutinize)(int, const char *); +}; + +extern struct traverse_methods maildir_traverse_methods; +extern struct traverse_methods mh_traverse_methods; +extern struct traverse_methods mbox_traverse_methods; + +extern int verbose; /* cmd line -v switch */ +extern int do_hardlinks; /* cmd line -H switch */ + +/* Lame fix for systems where NAME_MAX isn't defined after including the above + * set of .h files (Solaris, FreeBSD so far). Probably grossly oversized but + * it'll do. */ + +#if !defined(NAME_MAX) +#define NAME_MAX 4096 +#endif + +/* In glob.c */ +struct globber; +struct globber_array; + +struct globber *make_globber(const char *wildstring); +void free_globber(struct globber *old); +int is_glob_match(struct globber *g, const char *s); +struct globber_array *colon_sep_string_to_globber_array(const char *in); +int is_globber_array_match(struct globber_array *ga, const char *s); +void free_globber_array(struct globber_array *in); + +/* In hash.c */ +unsigned int hashfn( unsigned char *k, unsigned int length, unsigned int initval); + +/* In dirscan.c */ +struct msgpath_array *new_msgpath_array(void); +int valid_mh_filename_p(const char *x); +void free_msgpath_array(struct msgpath_array *x); +void string_list_to_array(struct string_list *list, int *n, char ***arr); +void split_on_colons(const char *str, int *n, char ***arr); +void build_message_list(char *folder_base, char *folders, enum folder_type ft, + struct msgpath_array *msgs, struct globber_array *omit_globs); + +/* In rfc822.c */ +struct rfc822 *make_rfc822(char *filename); +void free_rfc822(struct rfc822 *msg); +enum data_to_rfc822_error { + DTR8_OK, + DTR8_MISSING_END, /* missing endpoint marker. */ + DTR8_MULTIPART_SANS_BOUNDARY, /* multipart with no boundary string defined */ + DTR8_BAD_HEADERS, /* corrupt headers */ + DTR8_BAD_ATTACHMENT /* corrupt attachment (e.g. no body part) */ +}; +struct rfc822 *data_to_rfc822(struct msg_src *src, char *data, int length, enum data_to_rfc822_error *error); +void create_ro_mapping(const char *filename, unsigned char **data, int *len); +void free_ro_mapping(unsigned char *data, int len); +char *format_msg_src(struct msg_src *src); + +/* In tok.c */ +struct toktable *new_toktable(void); +struct toktable2 *new_toktable2(void); +void free_token(struct token *x); +void free_token2(struct token2 *x); +void free_toktable(struct toktable *x); +void free_toktable2(struct toktable2 *x); +void add_token_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable *table); +void check_and_enlarge_encoding(struct matches *m); +void insert_index_on_encoding(struct matches *m, int idx); +void add_token2_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable2 *table, int add_to_chain1); + +/* In db.c */ +#define CREATE_RANDOM_DATABASE_HASH 0 +struct database *new_database(unsigned int hash_key); +struct database *new_database_from_file(char *db_filename, int do_integrity_checks); +void free_database(struct database *db); +void maybe_grow_message_arrays(struct database *db); +void tokenise_message(int file_index, struct database *db, struct rfc822 *msg); +int update_database(struct database *db, struct msgpath *sorted_paths, int n_paths, int do_fast_index); +void check_database_integrity(struct database *db); +int cull_dead_messages(struct database *db, int do_integrity_checks); + +/* In mbox.c */ +void build_mbox_lists(struct database *db, const char *folder_base, + const char *mboxen_paths, struct globber_array *omit_globs); +int add_mbox_messages(struct database *db); +void compute_checksum(const char *data, size_t len, checksum_t *csum); +void cull_dead_mboxen(struct database *db); +unsigned int encode_mbox_indices(unsigned int mb, unsigned int msg); +void decode_mbox_indices(unsigned int index, unsigned int *mb, unsigned int *msg); +int verify_mbox_size_constraints(struct database *db); +void glob_and_expand_paths(const char *folder_base, char **paths_in, int n_in, char ***paths_out, int *n_out, const struct traverse_methods *methods, struct globber_array *omit_globs); + +/* In glob.c */ +struct globber; + +struct globber *make_globber(const char *wildstring); +void free_globber(struct globber *old); +int is_glob_match(struct globber *g, const char *s); + +/* In writer.c */ +void write_database(struct database *db, char *filename, int do_integrity_checks); + +/* In search.c */ +int search_top(int do_threads, int do_augment, char *database_path, char *complete_mfolder, char **argv, enum folder_type ft, int verbose); + +/* In stats.c */ +void get_db_stats(struct database *db); + +/* In dates.c */ +int scan_date_string(char *in, time_t *start, int *has_start, time_t *end, int *has_end); + +/* In dumper.c */ +void dump_database(char *filename); + +/* In strexpand.c */ +char *expand_string(const char *p); + +/* In dotlock.c */ +void lock_database(char *path, int forced_unlock); +void unlock_database(void); +void unlock_and_exit(int code); + +/* In mairix.c */ +void report_error(const char *str, const char *filename); + +#endif /* MAIRIX_H */ diff --git a/src/mairix/mairix.spec b/src/mairix/mairix.spec @@ -0,0 +1,45 @@ +Name: mairix +Summary: A maildir indexer and searcher +Version: 0.23 +Release: 1 +Source: %{name}-%{version}.tar.gz +License: GPL +Group: Application/Internet +Packager: Richard P. Curnow +BuildRoot: %{_tmppath}/%{name}-%{version}-root-%(id -u -n) +Requires: info +URL: http://www.rc0.org.uk/mairix + +%description +mairix is a tool for indexing email messages stored in maildir format folders +and performing fast searches on the resulting index. The output is a new +maildir folder containing symbolic links to the matched messages. + +%prep +%setup -q + +%build +CFLAGS="$RPM_OPT_FLAGS" ./configure --prefix=%{_prefix} +make + +%install +rm -rf $RPM_BUILD_ROOT +cd $RPM_BUILD_DIR/mairix-%{version} +make install DESTDIR=$RPM_BUILD_ROOT mandir=$RPM_BUILD_ROOT/%{_mandir} +cp README dotmairixrc.eg .. + +%files +%{_bindir}/mairix +%doc README +%doc dotmairixrc.eg +%doc %{_mandir}/man1/mairix.1.gz +%doc %{_mandir}/man5/mairixrc.5.gz + +%changelog +* Fri Mar 24 2006 Andre Costa <blueser@gmail.com> - 0.18 +- Updated to version 0.18 +- Included URL on header +- removed references to 'mairix.txt', 'mairix.html' and 'mairix.info' +- .info files have been deprecated +- removed useless 'post' section +- makefile's "mandir" is pointing to /usr/man instead of /usr/share/man diff --git a/src/mairix/mairixrc.5 b/src/mairix/mairixrc.5 @@ -0,0 +1,405 @@ +.TH MAIRIXRC 5 "January 2006" +.de Sx +.PP +.ne \\$1 +.nf +.na +.RS 12 +.. +.de Ex +.RE +.fi +.ad +.IP "" 7 +.. +.SH NAME +mairixrc \- configuration file for mairix(1) +.SH SYNOPSIS +$HOME/.mairixrc +.SH DESCRIPTION +.PP +The +.I mairixrc +file tells +.B mairix +where your mail folders are located. It also tells +.B mairix +where the results of searches are to be written. + +.B mairix +searches for this file at +.I $HOME/.mairixrc +unless the +.B -f +option is used. + +The directives +.BR base , +.BR mfolder , +and +.B database +must always appear in the file. There must also be some folder definitions +(using the +.BR maildir , +.BR mh , +or +.BR mbox ) +directives. + +.SS Comments +Any line starting with a '#' character is treated as a comment. + +.SS Directives +.TP +.BI base= base-directory +.br +This defines the path to the common parent directory of all your +maildir folders. + +If the path is relative, it is treated as relative to the location of the +.I mairixrc +file. + +.TP +.BI maildir= list-of-folder-specifications +This is a colon-separated list of the Maildir folders (relative to +`base') that you want indexed. Any entry that ends `...' is +recursively scanned to find any Maildir folders underneath it. + +More than one line starting with `maildir' can be included. In +this case, mairix joins the lines together with colons as though a +single list of folders had been given on a single very long line. + +Each colon-separated entry may be a wildcard. See the discussion +under mbox (below) for the wildcard syntax. For example +.Sx 1 +maildir=zzz/foo*... +.Ex +will match maildir folders like these (relative to the +.IR base-directory ) +.Sx 4 +zzz/foobar/xyz +zzz/fooquux +zzz/foo +zzz/fooabc/u/v/w +.Ex + +and +.Sx 1 +maildir=zzz/foo[abc]* +.Ex +will match maildir folders like these (relative to the folder_base) +.Sx 4 +zzz/fooa +zzz/fooaaaxyz +zzz/foobcd +zzz/fooccccccc +.Ex +If a folder name contains a colon, you can write this by using the +sequence '\\:' to escape the colon. Otherwise, the backslash +character is treated normally. (If the folder name actually +contains the sequence '\\:', you're out of luck.) + +.TP +.BI mh= list-of-folder-specifications +.br +This is a colon-separated list of the MH folders (relative to +`base') that you want indexed. Any entry that ends '...' is +recursively scanned to find any MH folders underneath it. + +More than one line starting with 'mh' can be included. In this +case, mairix joins the lines together with colons as though a +single list of folders had been given on a single very long line. + +Each colon-separated entry may be a wildcard, see the discussion +under maildir (above) and mbox (below) for the syntax and +semantics of specifying wildcards. + +.b mairix +recognizes the types of MH folders created by the following email applications: +.RS 7 +.IP "*" +xmh +.IP "*" +sylpheed +.IP "*" +claws-mail +.IP "*" +evolution +.IP "*" +NNML +.IP "*" +Mew +.RE + +.TP +.BI mbox= list-of-folder-specifications +.br +This is a colon-separated list of the mbox folders (relative to +`base') that you want indexed. + +Each colon-separated item in the list can be suffixed by '...'. +If the item matches a regular file, that file is treated as a mbox +folder and the '...' suffix is ignored. If the item matches a +directory, a recursive scan of everything inside that directory is +made, and all regular files are initially considered as mbox +folders. (Any directories found in this scan are themselves +scanned, since the scan is recursive.) + +Each colon-separated item may contain wildcard operators, but only +in its final path component. The wildcard operators currently +supported are + +.TP +* +.br +Match zero or more characters (each character matched is +arbitrary) + +.TP +? +.br +Match exactly one arbitrary character + +.TP +[abcs-z] +.br +Character class : match a single character from the set a, b, +c, s, t, u, v, w, x, y and z. + +To include a literal ']' in the class, place it immediately +after the opening '['. To include a literal '-' in the +class, place it immediately before the closing ']'. + +If these metacharacters are included in non-final path components, +they have no special meaning. + +Here are some examples + +.TP +mbox=foo/bar* +.br +matches 'foo/bar', 'foo/bar1', 'foo/barrrr' etc + +.TP +mbox=foo*/bar* +.br +matches 'foo*/bar', 'foo*/bar1', 'foo*/barrrr' etc + +.TP +mbox=foo/* +.br +matches 'foo/bar', 'foo/bar1', 'foo/barrrr', 'foo/foo', +\'foo/x' etc + +.TP +mbox=foo... +.br +matches any regular file in the tree rooted at 'foo' + +.TP +mbox=foo/*... +.br +same as before + +.TP +mbox=foo/[a-z]*... +.br +matches 'foo/a', 'foo/aardvark/xxx', 'foo/zzz/foobar', +\'foo/w/x/y/zzz', but not 'foo/A/foobar' + +Regular files that are mbox folder candidates are examined +internally. Only files containing standard mbox 'From ' separator +lines will be scanned for messages. + +If a regular file has a name ending in '.gz', and gzip support is +compiled into the +.B mairix +binary, the file will be treated as a gzipped mbox. + +If a regular file has a name ending in '.bz2', and bzip support is +compiled into the +.B mairix +binary, the file will be treated as a bzip2'd mbox. + +More than one line starting with 'mbox' can be included. In this +case, +.B mairix +joins the lines together with colons as though a +single list of folders had been given on a single very long line. + +.B mairix +performs no locking of mbox folders when it is accessing +them. If a mail delivery program is modifying the mbox at the +same time, it is likely that one or messages in the mbox will +never get indexed by +.B mairix +(until the database is removed and recreated from scratch, anyway.) The +assumption is that +.B mairix +will be used to index archive folders rather than incoming ones, so this is +unlikely to be much of a problem in reality. + +.B mairix +can support a maximum of 65536 separate mboxes, and a +maximum of 65536 messages within any one mbox. + +.TP +.BI omit= list-of-glob-patterns +This is a colon-separated list of glob patterns for folders to be omitted from +the indexing. This allows wide wildcards and recursive elements to be used +in the +.BR maildir , mh ", and" mbox +directives, with the +.B omit +option used to selectively remove unwanted folders from the folder +lists. + +Within the glob patterns, a single '*' matches any +sequence of characters other than '/'. However '**' matches any +sequence of characters including '/'. This allows glob patterns +to be constructed which have a wildcard for just one directory +component, or for any number of directory components. + +The _omit_ option can be specified as many times as required so +that the list of patterns doesn't all have to fit on one line. + +As an example, +.Sx 2 +mbox=bulk... +omit=bulk/spam* +.Ex +will index all mbox folders at any level under the 'bulk' +subdirectory of the base folder, except for those folders whose +names start 'bulk/spam', e.g. 'bulk/spam', 'bulk/spam2005' etc. + +In constrast, +.Sx 2 +mbox=bulk... +omit=bulk/spam** +.Ex +will index all mbox folders at any level under the 'bulk' +subdirectory of the base folder, except for those folders whose +names start 'bulk/spam', e.g. 'bulk/spam', 'bulk/spam2005', +\'bulk/spam/2005', 'bulk/spam/2005/jan' etc. + +.TP +.B nochecks +This takes no arguments. If a line starting with +.B nochecks is +present, it is the equivalent of specifying the +.B -Q +flag to every indexing run. + +.TP +.BI mfolder= match-folder-name +This defines the name of the folder (within the directory +specified by +.BR base ) +into which the search mode writes its output. (If the +.B mformat +used is 'raw' or 'excerpt', then this setting is not used and may be omitted.) + +The +.B mfolder +setting may be over-ridden for a particular search by using the +.B -o +option to +.BR mairix . + +.B mairix +will refuse to output search results to a folder that appears to be amongst +those that are indexed. This is to prevent accidental deletion of emails. + +If the first character of the mfolder value is '/' or '.', it is +taken as a pathname in its own right. This allows you to specify +absolute paths and paths relative to the current directory where +the mfolder should be written. Otherwise, the value of mfolder is +appended to the value of base, in the same way as for the source +folders. + +.TP +.BI mformat= format +This defines the type of folder used for the match folder where +the search results go. There are four valid settings for +.IR format , +namely 'maildir', 'mh', 'mbox', 'raw' or 'excerpt'. If the 'raw' setting is +used then +.B mairix +will just print out the path names of the files that match and no match folder +will be created. If the 'excerpt' setting is used, +.B mairix +will also print out the To:, Cc:, From:, Subject: and Date: headers of the +matching messages. 'maildir' is the default if this option is not +defined. The setting is case-insensitive. + +.TP +.BI database= path-to-database +.br +This defines the path where +.BR mairix 's +index database is kept. You can keep this file anywhere you like. + +Currently, +.B mairix +will place a single database file at the location indicated by +.IR path-to-database . +However, a future version of +.B mairix +may instead place a directory containing several files at this location. + +.I path-to-database +should be an absolute pathname (starting with '/'). If a relative pathname is +used, it will be interpreted relative to the current directory at the time +.B mairix +is run, +.RB ( not +relative to the location of the +.I mairixrc +file or anything like that.) + +.SS Expansions + +The part of each line in '.mairixrc' following the equals sign can +contain the following types of expansion: + +.TP +.B Home directory expansion +If the sequence '~/' appears at the start of the text after the +equals sign, it is expanded to the user's home directory. Example: +.Sx 1 +database=~/Mail/mairix_database +.Ex +.TP +.B Environment expansion +If a '$' is followed by a sequence of alpha-numeric characters (or +\'_'), the whole string is replaced by looking up the corresponding +environment variable. Similarly, if '$' is followed by an open +brace ('{'), everything up to the next close brace is looked up as +an environment variable and the result replaces the entire +sequence. + +Suppose in the shell we do +.Sx 1 +export FOO=bar +.Ex +and the '.mairixrc' file contains +.Sx 2 +maildir=xxx/$FOO +mbox=yyy/a${FOO}b +.Ex +this is equivalent to +.Sx 2 +maildir=xxx/bar +mbox=yyy/abarb +.Ex +If the specified environment variable is not set, the replacement +is the empty string. + +.SH NOTES +.PP +An alternative path to the configuration file may be given with the +.B \-f +option to mairix(1). + + diff --git a/src/mairix/mbox.c b/src/mairix/mbox.c @@ -0,0 +1,1060 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2003,2004,2005,2006,2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <assert.h> +#include <dirent.h> +#include <ctype.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include "mairix.h" +#include "from.h" +#include "fromcheck.h" +#include "md5.h" + +struct extant_mbox {/*{{{*/ + char *full_path; + time_t mtime; + size_t size; + int db_index; + /* + stuff to store positions etc of individual messages. */ +}; +/*}}}*/ +static int compare_extant_mboxen(const void *a, const void *b)/*{{{*/ +{ + const struct extant_mbox *aa = (const struct extant_mbox *) a; + const struct extant_mbox *bb = (const struct extant_mbox *) b; + return strcmp(aa->full_path, bb->full_path); +} +/*}}}*/ +static int lookup_extant_mbox(struct extant_mbox *sorted_mboxen, int n_extant, char *key)/*{{{*/ +{ + /* Implement bisection search */ + int l, h, m, r; + l = 0, h = n_extant; + m = -1; + while (h > l) { + m = (h + l) >> 1; + /* Should only get called on 'file' type messages - TBC */ + r = strcmp(sorted_mboxen[m].full_path, key); + if (r == 0) break; + if (l == m) return -1; + if (r > 0) h = m; + else l = m; + } + return m; +} +/*}}}*/ +static void append_new_mboxen_to_db(struct database *db, struct extant_mbox *extant_mboxen, int n_extant)/*{{{*/ +{ + int N, n_reqd; + int i, j; + + for (i=N=0; i<n_extant; i++) { + if (extant_mboxen[i].db_index < 0) N++; + } + + n_reqd = db->n_mboxen + N; + if (n_reqd > db->max_mboxen) { + db->max_mboxen = n_reqd; + db->mboxen = grow_array(struct mbox, n_reqd, db->mboxen); + } + /* Init new entries. */ + for (j=0, i=db->n_mboxen; j<n_extant; j++) { + if (extant_mboxen[j].db_index < 0) { + db->mboxen[i].path = new_string(extant_mboxen[j].full_path); + db->mboxen[i].current_mtime = extant_mboxen[j].mtime; + db->mboxen[i].current_size = extant_mboxen[j].size; + db->mboxen[i].file_mtime = 0; + db->mboxen[i].file_size = 0; + db->mboxen[i].n_msgs = 0; + db->mboxen[i].n_old_msgs_valid = 0; + db->mboxen[i].max_msgs = 0; + db->mboxen[i].start = NULL; + db->mboxen[i].len = NULL; + db->mboxen[i].check_all = NULL; + i++; + } + } + + db->n_mboxen = n_reqd; +} +/*}}}*/ +void compute_checksum(const char *data, size_t len, checksum_t *csum)/*{{{*/ +{ + MD5_CTX md5; + MD5Init(&md5); + MD5Update(&md5, (unsigned char *) data, len); + MD5Final(&md5); + memcpy(csum, md5.digest, sizeof(md5.digest)); + return; +} +/*}}}*/ +static int message_is_intact(struct mbox *mb, int idx, char *va, size_t len)/*{{{*/ +{ + /* TODO : later, look at whether to optimise this in some way, e.g. by doing + an initial check on just the first 1k of a message, this will detect + failures much faster at the cost of extra storage. */ + + if (mb->start[idx] + mb->len[idx] > len) { + /* Message overruns the end of the file - can't possibly be intact. */ + return 0; + } else { + checksum_t csum; + compute_checksum(va + mb->start[idx], mb->len[idx], &csum); + if (!memcmp(mb->check_all[idx], &csum, sizeof(checksum_t))) { + return 1; + } else { + return 0; + } + } + return 0; +} +/*}}}*/ +static int find_number_intact(struct mbox *mb, char *va, size_t len)/*{{{*/ +{ + /* Pick up the common obvious case first - where new messages have been appended to the + end of the mbox */ + if (mb->n_msgs == 0) { + return 0; + } else if (message_is_intact(mb, mb->n_msgs - 1, va, len)) { + return mb->n_msgs; /* The lot */ + } else if (!message_is_intact(mb, 0, va, len)) { + return 0; /* None of them. */ + } else { + /* Looks like a deletion has occurred earlier in the file => binary chop + search to find the last message that's still valid. Assume that + everything below a valid message is still valid itself (possibly + dangerous assumption, time will tell.) */ + + int l, m, h; + l = 0; + h = mb->n_msgs; + /* Loop invariant : always, mesasage[l] is intact, message[h] isn't. */ + while (l < h) { + m = (h + l) >> 1; + if (m==l) break; + if (message_is_intact(mb, m, va, len)) { + l = m; + } else { + h = m; + } + } + /* By loop invariant, message[l] is the highest valid one. */ + return (l + 1); + } +} +/*}}}*/ + + +static int fromtab_inited = 0; +static signed char fromtab[256]; + +static void init_fromtab(void)/*{{{*/ +{ + memset(fromtab, 0xff, 256); + fromtab[(int)(unsigned char)'\n'] = ~(1<<0); + fromtab[(int)(unsigned char)'F'] = ~(1<<1); + fromtab[(int)(unsigned char)'r'] = ~(1<<2); + fromtab[(int)(unsigned char)'o'] = ~(1<<3); + fromtab[(int)(unsigned char)'m'] = ~(1<<4); + fromtab[(int)(unsigned char)' '] = ~(1<<5); +} +/*}}}*/ + +/* REAL CHECKING : need to see if the line looks like this: + * From [ <return-path> ] <weekday> <month> <day> <time> [ <timezone> ] <year> + (from the mutt sources). + * where timezone can be two words rather than one sometimes. */ + +#undef DEBUG_DFA + +static int looks_like_from_separator(off_t n, char *va, size_t len, int verbose)/*{{{*/ +{ + char p; + int current_state = 0; + int result = 0; + + n++; /* look beyond the space. */ + + while (n < len) { + p = va[n++]; + if (verbose) { + printf("current_state=%d, p=%02x (%1c) ", current_state, (int)(unsigned char)p, ((p>=32)&&(p<=126))?p:'.'); + } + current_state = fromcheck_next_state(current_state, (int)fromcheck_char2tok[(int)(unsigned char)p]); + if (verbose) { + printf("next_state=%d\n", current_state); + } + if (current_state < 0) { + /* not matched */ + break; + } + if (fromcheck_attr[current_state] == FROMCHECK_PASS) { + result = 1; /* matched good separator */ + break; + } + } + + /* If we hit the end of the file, it doesn't look like a real 'From' line. */ +#ifdef DEBUG_DFA + unlock_and_exit(0); +#endif + return result; +} +/*}}}*/ + +static off_t find_next_from(off_t n, char *va, size_t len)/*{{{*/ +{ + unsigned char c; + unsigned long hit; + unsigned long reg; + unsigned long mask; + + if (!n) { + if ((len >= 5) && !strncmp(va, "From ", 5)) { + return 0; + } + } + +scan_again: + + reg = (unsigned long) -1; + hit = ~(1<<5); + while (n < len) { + c = va[n]; + mask = (unsigned long)(signed long) fromtab[(int)c]; + reg = (reg << 1) | mask; + if (~(reg|hit)) { + if (looks_like_from_separator(n, va, len, 0)) { + return (n-4); + } else { +#if 0 + int nn; + printf("Rejecting from line at %d\n", n); + nn = n; + printf(" >> "); + while (nn < len) { + unsigned char c = va[nn++]; + putchar(c); + if (c=='\n') break; + } + looks_like_from_separator(n, va, len, 1); +#endif + goto scan_again; + } + } + n++; + } + return -1; +} +/*}}}*/ +static off_t start_of_next_line(off_t n, char *va, size_t len)/*{{{*/ +{ + unsigned char c; + /* We are always starting from 'From ' so we can advance before testing */ + do { + c = va[n]; + n++; + } + while ((n < len) && (c != '\n')); + if (n == len) { + return -1; + } else { + return n; + } +} +/*}}}*/ + + +static struct message_list *build_new_message_list(struct mbox *mb, char *va, size_t len, int *n_messages)/*{{{*/ +{ + struct message_list *result, *here, *next; + off_t start_from, start_pos, end_from; + int old_percent = -100; + int N; + +#define PERCENT_GRAN 2 + + *n_messages = 0; + + result = here = NULL; + N = mb->n_old_msgs_valid; + if (N == 0) { + start_from = 0; + } else { + /* Must point to the \n at the end of the preceding message, otherwise the + 'From ' at the start of the first message in the section to be rescanned + won't get detected and that message won't get indexed. */ + start_from = mb->start[N - 1] + mb->len[N - 1] - 1; + } + + if (!fromtab_inited) { + init_fromtab(); + fromtab_inited = 1; + } + + /* Locate next 'From ' at the start of a line */ + start_from = find_next_from(start_from, va, len); + while (start_from != -1) { + start_pos = start_of_next_line(start_from, va, len); + if (start_pos == -1) { + /* Something is awry. */ + goto done; + } + if (verbose) { + int percent; + percent = (int)(0.5 + 100.0 * (double) start_pos / (double) len); + if (percent > (old_percent+PERCENT_GRAN)) { + printf("Scanning mbox %s : %3d%% done\r", mb->path, percent); + fflush(stdout); + old_percent = percent; + } + } + + end_from = find_next_from(start_pos, va, len); + next = new(struct message_list); + next->next = NULL; + next->start = start_pos; + if (end_from == -1) { + /* message runs through to end of file. */ + next->len = len - start_pos; + } else { + next->len = end_from - start_pos; + } + if (!result) { + result = here = next; + } else { + here->next = next; + here = next; + } + ++*n_messages; + start_from = end_from; + } + +done: + if (verbose) { + printf("Scanning mbox %s : 100%% done\n", mb->path); + fflush(stdout); + } + return result; + +} +/*}}}*/ +static void rescan_mbox(struct mbox *mb, char *va, size_t len)/*{{{*/ +{ + /* We get here if it's determined that + * 1. the mbox file still exists + * 2. the mtime or size has changed, i.e. it's been modified in some way + since the last mairix run. + */ + + /* Find the last message in the box that appears to be intact. */ + mb->n_old_msgs_valid = find_number_intact(mb, va, len); + mb->new_msgs = build_new_message_list(mb, va, len, &mb->n_new_msgs); +} +/*}}}*/ +static void deaden_mbox(struct mbox *mb)/*{{{*/ +{ + mb->n_old_msgs_valid = 0; + mb->n_msgs = 0; + + free(mb->path); + mb->path = NULL; + + if (mb->max_msgs > 0) { + free(mb->start); + free(mb->len); + free(mb->check_all); + mb->max_msgs = 0; + } +} +/*}}}*/ +static void marry_up_mboxen(struct database *db, struct extant_mbox *extant_mboxen, int n_extant)/*{{{*/ +{ + int *old_to_new_idx; + int i; + + for (i=0; i<n_extant; i++) extant_mboxen[i].db_index = -1; + + old_to_new_idx = NULL; + if (db->n_mboxen > 0) { + old_to_new_idx = new_array(int, db->n_mboxen); + for (i=0; i<db->n_mboxen; i++) old_to_new_idx[i] = -1; + + for (i=0; i<db->n_mboxen; i++) { + if (db->mboxen[i].path) { + int idx; + idx = lookup_extant_mbox(extant_mboxen, n_extant, db->mboxen[i].path); + if (idx >= 0) { + struct mbox *mb = &db->mboxen[i]; + old_to_new_idx[i] = idx; + extant_mboxen[idx].db_index = i; + mb->current_mtime = extant_mboxen[idx].mtime; + mb->current_size = extant_mboxen[idx].size; + } + } + } + } + + for (i=0; i<db->n_mboxen; i++) { + if (old_to_new_idx[i] < 0) { + /* old mbox is no more. */ + deaden_mbox(&db->mboxen[i]); + } + } + + /* Append entries for newly discovered mboxen */ + append_new_mboxen_to_db(db, extant_mboxen, n_extant); + + /* From here on, everything we need is in the db */ + if (old_to_new_idx) + free(old_to_new_idx); + +} +/*}}}*/ +static void check_duplicates(struct extant_mbox *extant_mboxen, int n_extant)/*{{{*/ +{ + /* Note, list is sorted at this point */ + int i; + int any_dupl = 0; + for (i=0; i<n_extant-1; i++) { + if (!strcmp(extant_mboxen[i].full_path, extant_mboxen[i+1].full_path)) { + printf("mbox %s is listed twice in the mairixrc file\n", extant_mboxen[i].full_path); + any_dupl = 1; + } + } + if (any_dupl) { + printf("Exiting, the mairixrc file needs fixing\n"); + unlock_and_exit(1); + } +} +/*}}}*/ +static char *find_last_slash(char *in)/*{{{*/ +{ + char *p = in; + char *result = NULL; + while (*p) { + if (*p == '/') result = p; + p++; + } + return result; +} +/*}}}*/ +static int append_shallow(char *path, int base_len, struct stat *sb, struct string_list *list, /*{{{*/ + const struct traverse_methods *methods, + struct globber_array *omit_globs) +{ + int result = 0; + if ((methods->filter)(path, sb)) { + if (!is_globber_array_match(omit_globs, path + base_len)) { + struct string_list *nn = new(struct string_list); + nn->data = new_string(path); + nn->next = list; + nn->prev = list->prev; + list->prev->next = nn; + list->prev = nn; + result = 1; + } + } + return result; +} +/*}}}*/ +static int append_deep(char *path, int base_len, struct stat *sb, struct string_list *list, /*{{{*/ + const struct traverse_methods *methods, + struct globber_array *omit_globs) +{ + /* path is dir : read its contents, call append_shallow or self accordingly. */ + /* path is file : call append_shallow. */ + struct stat sb2; + char *xpath; + DIR *d; + struct dirent *de; + int appended_any = 0; + int this_file_matched; + + this_file_matched = append_shallow(path, base_len, sb, list, methods, omit_globs); + appended_any |= this_file_matched; + + if (S_ISDIR(sb->st_mode)) { + xpath = new_array(char, strlen(path) + 2 + NAME_MAX); + d = opendir(path); + if (d) { + while ((de = readdir(d))) { + enum traverse_check status; + if (!strcmp(de->d_name, ".")) continue; + if (!strcmp(de->d_name, "..")) continue; + strcpy(xpath, path); + strcat(xpath, "/"); + strcat(xpath, de->d_name); + if (!is_globber_array_match(omit_globs, xpath+base_len)) { + /* Filter out omissions at this point, e.g. to avoid wasting time on + * a recursive expansion of a tree that's going to get pruned in at + * the deepest level anyway. */ + status = (methods->scrutinize)(this_file_matched, de->d_name); +#if 0 + /* debugging */ + fprintf(stderr, "scrutinize for %s in %s returned %s\n", + de->d_name, + path, + (status == TRAV_FINISH) ? "FINISH" : + (status == TRAV_IGNORE) ? "IGNORE" : "PROCESS"); +#endif + switch (status) { + case TRAV_FINISH: + goto done_this_dir; + case TRAV_IGNORE: + goto next_path; + case TRAV_PROCESS: + if (stat(xpath, &sb2) >= 0) { + if (S_ISREG(sb2.st_mode)) { + appended_any |= append_shallow(xpath, base_len, &sb2, list, methods, omit_globs); + } else if (S_ISDIR(sb2.st_mode)) { + appended_any |= append_deep(xpath, base_len, &sb2, list, methods, omit_globs); + } + } + break; + } + } +next_path: + (void) 0; + } +done_this_dir: + closedir(d); + } + free(xpath); + } + return appended_any; +} +/*}}}*/ +static void handle_wild(char *path, int base_len, char *last_comp, struct string_list *list,/*{{{*/ + int (*append)(char *, int, struct stat *, struct string_list *, + const struct traverse_methods *, struct globber_array *), + const struct traverse_methods *methods, + struct globber_array *omit_globs) +{ + /* last_comp is the character within 'path' where the wildcard stuff starts. */ + struct globber *gg; + char *temp_path, *xpath; + DIR *d; + struct dirent *de; + int had_matches; + + gg = make_globber(last_comp); + + /* Null-terminate parent directory, i.e. null the character where the trailing / is */ + if (last_comp > path) { + int len = last_comp - path; + temp_path = new_array(char, len); + memcpy(temp_path, path, len-1); + temp_path[len-1] = '\0'; + xpath = new_array(char, len + 2 + NAME_MAX); + } else { + temp_path = new_string("."); + xpath = new_array(char, 3 + NAME_MAX); + } + + d = opendir(temp_path); + had_matches = 0; + if (d) { + while ((de = readdir(d))) { + if (!strcmp(de->d_name, ".")) continue; + if (!strcmp(de->d_name, "..")) continue; + if (is_glob_match(gg, de->d_name)) { + struct stat xsb; + strcpy(xpath, temp_path); + strcat(xpath, "/"); + strcat(xpath, de->d_name); + if (!is_globber_array_match(omit_globs, xpath+base_len)) { + /* Filter out omissions at this point, e.g. to avoid wasting time on + * a recursive expansion of a tree that's going to get pruned in full + * later anyway. */ + had_matches = 1; + if (stat(xpath, &xsb) >= 0) { + (*append)(xpath, base_len, &xsb, list, methods, omit_globs); + } + } + } + } + closedir(d); + if (!had_matches) { + fprintf(stderr, "WARNING: Wildcard \"%s\" matched nothing in %s\n", last_comp, temp_path); + } + } else { + fprintf(stderr, "WARNING: Folder path %s does not exist\n", temp_path); + } + + + free(temp_path); + free(xpath); + free(gg); +} +/*}}}*/ +static void handle_single(char *path, int base_len, struct string_list *list,/*{{{*/ + int (*append)(char *, int, struct stat *, struct string_list *, + const struct traverse_methods *, struct globber_array *), + const struct traverse_methods *methods, + struct globber_array *omit_globs) +{ + struct stat sb; + if (stat(path, &sb) >= 0) { + (*append)(path, base_len, &sb, list, methods, omit_globs); + } else { + fprintf(stderr, "WARNING: Folder path %s does not exist\n", path); + } +} +/*}}}*/ +static int filter_is_file(const char *x, const struct stat *sb)/*{{{*/ +{ + if (S_ISREG(sb->st_mode)) + return 1; + else + return 0; +} +/*}}}*/ +enum traverse_check scrutinize_mbox_entry(int parent_is_mbox, const char *de_name)/*{{{*/ +{ + /* We have to keep looking at everything in this case. */ + return TRAV_PROCESS; +} +/*}}}*/ +struct traverse_methods mbox_traverse_methods = {/*{{{*/ + .filter = filter_is_file, + .scrutinize = scrutinize_mbox_entry +}; +/*}}}*/ +static int is_wild(const char *x)/*{{{*/ +{ + const char *p; + p = x; + while (*p) { + switch (*p) { + case '[': + case '*': + case '?': + return 1; + } + p++; + } + return 0; +} +/*}}}*/ +/*{{{ handle_one_path() */ +static void handle_one_path(const char *folder_base, + const char *path, + struct string_list *list, + const struct traverse_methods *methods, + struct globber_array *omit_globs) +{ + /* Valid syntaxen ([.]=optional): + * [xxx/]foo : single path + * [xxx/]foo... : if foo is a file, as before; if a directory, every ordinary file under it + * [xxx/]wild : any single path matching the wildcard + * [xxx/]wild... : consider each match of the wildcard by the rule 2 lines above + + * <wild> contains any of these shell-like metacharacters + * * : any string of 1 or more arbitrary characters + * ? : any 1 arbitrary character + * [a-z] : character class + * [^a-z] : negated character class. + + */ + int folder_base_len = strlen(folder_base); + char *full_path; + int is_abs; + int len; + char *last_slash; + char *last_comp; + int base_len; + + is_abs = (path[0] == '/') ? 1 : 0; + if (is_abs) { + full_path = new_string(path); + base_len = 0; + } else { + full_path = new_array(char, folder_base_len + strlen(path) + 2); + strcpy(full_path, folder_base); + strcat(full_path, "/"); + strcat(full_path, path); + base_len = strlen(folder_base) + 1; + } + len = strlen(full_path); + last_slash = find_last_slash(full_path); + last_comp = last_slash ? (last_slash + 1) : full_path; + if ((len >= 4) && !strcmp(full_path + (len - 3), "...")) { + full_path[len - 3] = '\0'; + if (is_wild(last_comp)) { + handle_wild(full_path, base_len, last_comp, list, append_deep, methods, omit_globs); + } else { + handle_single(full_path, base_len, list, append_deep, methods, omit_globs); + } + } else { + if (is_wild(last_comp)) { + handle_wild(full_path, base_len, last_comp, list, append_shallow, methods, omit_globs); + } else { + handle_single(full_path, base_len, list, append_shallow, methods, omit_globs); + } + } + free(full_path); +} +/*}}}*/ +/*{{{ glob_and_expand_paths() */ +void glob_and_expand_paths(const char *folder_base, + char **paths_in, int n_in, + char ***paths_out, int *n_out, + const struct traverse_methods *methods, + struct globber_array *omit_globs) +{ + struct string_list list; + int i; + + /* Clear it. */ + list.next = list.prev = &list; + + for (i=0; i<n_in; i++) { + char *path = paths_in[i]; + handle_one_path(folder_base, path, &list, methods, omit_globs); + } + + string_list_to_array(&list, n_out, paths_out); +} +/*}}}*/ + +void build_mbox_lists(struct database *db, const char *folder_base, /*{{{*/ + const char *mboxen_paths, struct globber_array *omit_globs) +{ + char **raw_paths, **paths; + int n_raw_paths, i; + int n_paths; + struct stat sb; + + int n_extant; + struct extant_mbox *extant_mboxen; + + n_extant = 0; + + if (mboxen_paths) { + split_on_colons(mboxen_paths, &n_raw_paths, &raw_paths); + glob_and_expand_paths(folder_base, raw_paths, n_raw_paths, &paths, &n_paths, &mbox_traverse_methods, omit_globs); + extant_mboxen = new_array(struct extant_mbox, n_paths); + } else { + n_paths = 0; + paths = NULL; + extant_mboxen = NULL; + } + + /* Assume maximal size array. TODO : new strategy when globbing is included. + * */ + + /* TODO TODO ::: Build a sorted list of the paths and check that there aren't + any duplicates!! */ + + for (i=0; i<n_paths; i++) { + char *path = paths[i]; + if (lstat(path, &sb) < 0) { + /* can't stat */ + } else { + if (S_ISLNK(sb.st_mode)) { + /* Skip mbox if symlink */ + if (verbose) { + printf("%s is a link - skipping\n", path); + } + } else { + extant_mboxen[n_extant].full_path = new_string(path); + extant_mboxen[n_extant].mtime = sb.st_mtime; + extant_mboxen[n_extant].size = sb.st_size; + n_extant++; + } + } + free(paths[i]); + } + if (paths) { + free(paths); + paths=NULL; + } + + /* Reconcile list against that in the db. : sort, match etc. */ + if (n_extant) { + qsort(extant_mboxen, n_extant, sizeof(struct extant_mbox), compare_extant_mboxen); + } + + check_duplicates(extant_mboxen, n_extant); + + marry_up_mboxen(db, extant_mboxen, n_extant); + + /* Now look for new/modified mboxen, find how many of the old messages are + * still valid and scan the remainder. */ + + for (i=0; i<db->n_mboxen; i++) { + struct mbox *mb = &db->mboxen[i]; + mb->new_msgs = NULL; + if (mb->path) { + if ((mb->current_mtime == mb->file_mtime) && + (mb->current_size == mb->file_size)) { + mb->n_old_msgs_valid = mb->n_msgs; + } else { + unsigned char *va; + int len; + create_ro_mapping(mb->path, &va, &len); + if (va) { + rescan_mbox(mb, (char *) va, len); + free_ro_mapping(va, len); + } else if (!len) { + mb->n_old_msgs_valid = mb->n_msgs = 0; + } else { + /* Treat as dead mbox */ + deaden_mbox(mb); + } + } + } + } + + /* At the end of this, we want the db->mboxen table to contain up to date info about + * the mboxen, together with how much of the old info was still current. */ +} +/*}}}*/ + +static struct msg_src *setup_msg_src(char *filename, off_t start, size_t len)/*{{{*/ +{ + static struct msg_src result; + result.type = MS_MBOX; + result.filename = filename; + result.start = start; + result.len = len; + return &result; +} +/*}}}*/ +int add_mbox_messages(struct database *db)/*{{{*/ +{ + int i, j; + int any_new = 0; + int N; + unsigned char *va; + int valen; + enum data_to_rfc822_error error; + + for (i=0; i<db->n_mboxen; i++) { + struct mbox *mb = &db->mboxen[i]; + struct message_list *here, *next; + + if (mb->new_msgs) { + /* Upper bound : we may need to coalesce 2 or more messages if false + * matches on From lines have occurred inside MIME encoded body parts. */ + N = mb->n_old_msgs_valid + mb->n_new_msgs; + if (N > mb->max_msgs) { + mb->max_msgs = N; + mb->start = grow_array(off_t, N, mb->start); + mb->len = grow_array(size_t, N, mb->len); + mb->check_all = grow_array(checksum_t, N, mb->check_all); + } + + va = NULL; /* lazy mmap */ + for (j=mb->n_old_msgs_valid, here=mb->new_msgs; here; j++, here=next) { + int n; + int trials = 0; + off_t start; + size_t len; + struct rfc822 *r8; + struct msg_src *msg_src; + struct message_list *last, *xx, *xn; + + next = here->next; + + if (!va) { + create_ro_mapping(mb->path, &va, &valen); + } + if (!va) { + fprintf(stderr, "Couldn't create mapping of file %s\n", mb->path); + unlock_and_exit(1); + } + + + /* Try to parse the next 'From' -to- 'From' chunk as an rfc822 message. + * If we get an unterminated MIME encoding, coalesce the next chunk + * onto the current one and try again. Keep going until it works, or + * we run out of chunks. If we run out, back up to just using the + * first chunk and assume it is broken. + * + * This is to deal with cases such as having a text/plain attachment + * that is actually an mbox file in its own right, i.e. will have + * embedded '^From ' lines in it. + * + * 'last' is the last chunk currently in the putative message. */ + last = here; + do { + len = last->start + last->len - here->start; + msg_src = setup_msg_src(mb->path, here->start, len); + r8 = data_to_rfc822(msg_src, (char *) va + here->start, len, &error); + if (error == DTR8_MISSING_END) { + if (r8) free_rfc822(r8); + r8 = NULL; + last = last->next; /* Try with another chunk on the end */ + ++trials; + } else { + /* Treat as success */ + next = last->next; + break; + } + } while (last && trials < 100); + + if (last && trials < 100) { + start = mb->start[j] = here->start; + mb->len[j] = len; + compute_checksum((char *) va + here->start, len, &mb->check_all[j]); + } else { + /* Faulty message or last message in the file */ + start = mb->start[j] = here->start; + len = mb->len[j] = here->len; + compute_checksum((char *) va + here->start, len, &mb->check_all[j]); + msg_src = setup_msg_src(mb->path, start, len); + r8 = data_to_rfc822(msg_src, (char *) va + start, len, &error); + if (error == DTR8_MISSING_END) { + fprintf(stderr, "Can't find end boundary in multipart message %s\n", + format_msg_src(msg_src)); + } + } + + /* Release all the list entries in the range [here,next) (inclusive) */ + for (xx=here; xx!=next; xx=xn) { + xn = xx->next; + free(xx); + } + + /* Only do this once a valid rfc822 structure has been obtained. */ + maybe_grow_message_arrays(db); + n = db->n_msgs; + db->type[n] = MTY_MBOX; + db->msgs[n].src.mbox.file_index = i; + db->msgs[n].src.mbox.msg_index = j; + + if (r8) { + if (verbose) { + printf("Scanning %s[%d] at [%d,%d)\n", mb->path, j, (int)start, (int)(start + len)); + } + db->msgs[n].date = r8->hdrs.date; + db->msgs[n].seen = r8->hdrs.flags.seen; + db->msgs[n].replied = r8->hdrs.flags.replied; + db->msgs[n].flagged = r8->hdrs.flags.flagged; + tokenise_message(n, db, r8); + free_rfc822(r8); + } else { + printf("Message in %s at [%d,%d) is misformatted\n", mb->path, (int)start, (int)(start + len)); + } + + ++db->n_msgs; + any_new = 1; + } + mb->n_msgs = j; + if (va) { + free_ro_mapping(va, valen); + } + } + } + return any_new; +} +/*}}}*/ + +/* OTHER */ +void cull_dead_mboxen(struct database *db)/*{{{*/ +{ + int n_alive, i, j, n; + int *old_to_new; + struct mbox *newtab; + + n = db->n_mboxen; + for (i=0, n_alive=0; i<n; i++) { + if (db->mboxen[i].path) n_alive++; + } + + /* Simple case - no dead mboxen */ + if (n_alive == n) return; + + newtab = new_array(struct mbox, n_alive); + old_to_new = new_array(int, n); + for (i=0, j=0; i<n; i++) { + if (db->mboxen[i].path) { + old_to_new[i] = j; + newtab[j] = db->mboxen[i]; + printf("Copying mbox[%d] to [%d], path=%s\n", i, j, db->mboxen[i].path); + j++; + } else { + printf("Pruning old mbox[%d], dead\n", i); + old_to_new[i] = -1; + } + } + + /* Renumber file indices in messages */ + n = db->n_msgs; + for (i=0; i<n; i++) { + if (db->type[i] == MTY_MBOX) { + int old_idx = db->msgs[i].src.mbox.file_index; + assert(old_to_new[old_idx] != -1); + db->msgs[i].src.mbox.file_index = old_to_new[old_idx]; + } + } + + /* Fix up pointers */ + db->n_mboxen = db->max_mboxen = n_alive; + free(db->mboxen); + db->mboxen = newtab; + free(old_to_new); + return; +} +/*}}}*/ + +unsigned int encode_mbox_indices(unsigned int mb, unsigned int msg)/*{{{*/ +{ + unsigned int result; + result = ((mb & 0xffff) << 16) | (msg & 0xffff); + return result; +} +/*}}}*/ +void decode_mbox_indices(unsigned int index, unsigned int *mb, unsigned int *msg)/*{{{*/ +{ + *mb = (index >> 16) & 0xffff; + *msg = (index & 0xffff); +} +/*}}}*/ +int verify_mbox_size_constraints(struct database *db)/*{{{*/ +{ + int i; + int fail; + if (db->n_mboxen > 65536) { + fprintf(stderr, "Too many mboxes (max 65536, you have %d)\n", db->n_mboxen); + return 0; + } + fail = 0; + for (i=0; i<db->n_mboxen; i++) { + if (db->mboxen[i].n_msgs > 65536) { + fprintf(stderr, "Too many messages in mbox %s (max 65536, you have %d)\n", + db->mboxen[i].path, db->mboxen[i].n_msgs); + fail = 1; + } + } + if (fail) return 0; + else return 1; +} +/*}}}*/ + diff --git a/src/mairix/md5.c b/src/mairix/md5.c @@ -0,0 +1,322 @@ +/* + *********************************************************************** + ** md5.c -- the source code for MD5 routines ** + ** RSA Data Security, Inc. MD5 Message-Digest Algorithm ** + ** Created: 2/17/90 RLR ** + ** Revised: 1/91 SRD,AJ,BSK,JT Reference C Version ** + ** Revised (for MD5): RLR 4/27/91 ** + ** -- G modified to have y&~z instead of y&z ** + ** -- FF, GG, HH modified to add in last register done ** + ** -- Access pattern: round 2 works mod 5, round 3 works mod 3 ** + ** -- distinct additive constant for each step ** + ** -- round 4 added, working mod 7 ** + *********************************************************************** + */ + +/* + *********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** License to copy and use this software is granted provided that ** + ** it is identified as the "RSA Data Security, Inc. MD5 Message- ** + ** Digest Algorithm" in all material mentioning or referencing this ** + ** software or this function. ** + ** ** + ** License is also granted to make and use derivative works ** + ** provided that such works are identified as "derived from the RSA ** + ** Data Security, Inc. MD5 Message-Digest Algorithm" in all ** + ** material mentioning or referencing the derived work. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + *********************************************************************** + */ + +#include "md5.h" + +/* + *********************************************************************** + ** Message-digest routines: ** + ** To form the message digest for a message M ** + ** (1) Initialize a context buffer mdContext using MD5Init ** + ** (2) Call MD5Update on mdContext and M ** + ** (3) Call MD5Final on mdContext ** + ** The message digest is now in mdContext->digest[0...15] ** + *********************************************************************** + */ + +/* forward declaration */ +static void Transform (UINT4 *, UINT4 *); + +#ifdef __STDC__ +static const +#else +static +#endif +unsigned char PADDING[64] = { + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* F, G, H and I are basic MD5 functions */ +#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) +#define G(x, y, z) (((x) & (z)) | ((y) & (~z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define I(x, y, z) ((y) ^ ((x) | (~z))) + +/* ROTATE_LEFT rotates x left n bits */ +#if defined(FAST_MD5) && defined(__GNUC__) && defined(mc68000) +/* + * If we're on a 68000 based CPU and using a GNU C compiler with + * inline assembly code, we can speed this up a bit. + */ +inline UINT4 ROTATE_LEFT(UINT4 x, int n) +{ + asm("roll %2,%0" : "=d" (x) : "0" (x), "Ir" (n)); + return x; +} +#else +#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) +#endif + + +/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4 */ +/* Rotation is separate from addition to prevent recomputation */ +#define FF(a, b, c, d, x, s, ac) \ + {(a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } +#define GG(a, b, c, d, x, s, ac) \ + {(a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } +#define HH(a, b, c, d, x, s, ac) \ + {(a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } +#define II(a, b, c, d, x, s, ac) \ + {(a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } + +/* The routine MD5Init initializes the message-digest context + mdContext. All fields are set to zero. + */ +void MD5Init (mdContext) +MD5_CTX *mdContext; +{ + mdContext->i[0] = mdContext->i[1] = (UINT4)0; + + /* Load magic initialization constants. + */ + mdContext->buf[0] = (UINT4)0x67452301; + mdContext->buf[1] = (UINT4)0xefcdab89; + mdContext->buf[2] = (UINT4)0x98badcfe; + mdContext->buf[3] = (UINT4)0x10325476; +} + +/* The routine MD5Update updates the message-digest context to + account for the presence of each of the characters inBuf[0..inLen-1] + in the message whose digest is being computed. + */ +void MD5Update (mdContext, inBuf, inLen) +MD5_CTX *mdContext; +unsigned const char *inBuf; +unsigned int inLen; +{ + UINT4 in[16]; + int mdi; + unsigned int i, ii; + + /* compute number of bytes mod 64 */ + mdi = (int)((mdContext->i[0] >> 3) & 0x3F); + + /* update number of bits */ + if ((mdContext->i[0] + ((UINT4)inLen << 3)) < mdContext->i[0]) + mdContext->i[1]++; + mdContext->i[0] += ((UINT4)inLen << 3); + mdContext->i[1] += ((UINT4)inLen >> 29); + + while (inLen--) { + /* add new character to buffer, increment mdi */ + mdContext->in[mdi++] = *inBuf++; + + /* transform if necessary */ + if (mdi == 0x40) { + for (i = 0, ii = 0; i < 16; i++, ii += 4) + in[i] = (((UINT4)mdContext->in[ii+3]) << 24) | + (((UINT4)mdContext->in[ii+2]) << 16) | + (((UINT4)mdContext->in[ii+1]) << 8) | + ((UINT4)mdContext->in[ii]); + Transform (mdContext->buf, in); + mdi = 0; + } + } +} + +/* The routine MD5Final terminates the message-digest computation and + ends with the desired message digest in mdContext->digest[0...15]. + */ + +void MD5Final (mdContext) +MD5_CTX *mdContext; +{ + UINT4 in[16]; + int mdi; + unsigned int i, ii; + unsigned int padLen; + + /* save number of bits */ + in[14] = mdContext->i[0]; + in[15] = mdContext->i[1]; + + /* compute number of bytes mod 64 */ + mdi = (int)((mdContext->i[0] >> 3) & 0x3F); + + /* pad out to 56 mod 64 */ + padLen = (mdi < 56) ? (56 - mdi) : (120 - mdi); + MD5Update (mdContext, PADDING, padLen); + + /* append length in bits and transform */ + for (i = 0, ii = 0; i < 14; i++, ii += 4) + in[i] = (((UINT4)mdContext->in[ii+3]) << 24) | + (((UINT4)mdContext->in[ii+2]) << 16) | + (((UINT4)mdContext->in[ii+1]) << 8) | + ((UINT4)mdContext->in[ii]); + Transform (mdContext->buf, in); + + /* store buffer in digest */ + for (i = 0, ii = 0; i < 4; i++, ii += 4) { + mdContext->digest[ii] = (unsigned char)(mdContext->buf[i] & 0xFF); + mdContext->digest[ii+1] = + (unsigned char)((mdContext->buf[i] >> 8) & 0xFF); + mdContext->digest[ii+2] = + (unsigned char)((mdContext->buf[i] >> 16) & 0xFF); + mdContext->digest[ii+3] = + (unsigned char)((mdContext->buf[i] >> 24) & 0xFF); + } +} + +/* Basic MD5 step. Transforms buf based on in. + */ +static void Transform (buf, in) +UINT4 *buf; +UINT4 *in; +{ + UINT4 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; + + /* Round 1 */ +#define S11 7 +#define S12 12 +#define S13 17 +#define S14 22 + + FF ( a, b, c, d, in[ 0], S11, 0xd76aa478); /* 1 */ + FF ( d, a, b, c, in[ 1], S12, 0xe8c7b756); /* 2 */ + FF ( c, d, a, b, in[ 2], S13, 0x242070db); /* 3 */ + FF ( b, c, d, a, in[ 3], S14, 0xc1bdceee); /* 4 */ + FF ( a, b, c, d, in[ 4], S11, 0xf57c0faf); /* 5 */ + FF ( d, a, b, c, in[ 5], S12, 0x4787c62a); /* 6 */ + FF ( c, d, a, b, in[ 6], S13, 0xa8304613); /* 7 */ + FF ( b, c, d, a, in[ 7], S14, 0xfd469501); /* 8 */ + FF ( a, b, c, d, in[ 8], S11, 0x698098d8); /* 9 */ + FF ( d, a, b, c, in[ 9], S12, 0x8b44f7af); /* 10 */ + FF ( c, d, a, b, in[10], S13, 0xffff5bb1); /* 11 */ + FF ( b, c, d, a, in[11], S14, 0x895cd7be); /* 12 */ + FF ( a, b, c, d, in[12], S11, 0x6b901122); /* 13 */ + FF ( d, a, b, c, in[13], S12, 0xfd987193); /* 14 */ + FF ( c, d, a, b, in[14], S13, 0xa679438e); /* 15 */ + FF ( b, c, d, a, in[15], S14, 0x49b40821); /* 16 */ + + /* Round 2 */ +#define S21 5 +#define S22 9 +#define S23 14 +#define S24 20 + GG ( a, b, c, d, in[ 1], S21, 0xf61e2562); /* 17 */ + GG ( d, a, b, c, in[ 6], S22, 0xc040b340); /* 18 */ + GG ( c, d, a, b, in[11], S23, 0x265e5a51); /* 19 */ + GG ( b, c, d, a, in[ 0], S24, 0xe9b6c7aa); /* 20 */ + GG ( a, b, c, d, in[ 5], S21, 0xd62f105d); /* 21 */ + GG ( d, a, b, c, in[10], S22, 0x2441453); /* 22 */ + GG ( c, d, a, b, in[15], S23, 0xd8a1e681); /* 23 */ + GG ( b, c, d, a, in[ 4], S24, 0xe7d3fbc8); /* 24 */ + GG ( a, b, c, d, in[ 9], S21, 0x21e1cde6); /* 25 */ + GG ( d, a, b, c, in[14], S22, 0xc33707d6); /* 26 */ + GG ( c, d, a, b, in[ 3], S23, 0xf4d50d87); /* 27 */ + GG ( b, c, d, a, in[ 8], S24, 0x455a14ed); /* 28 */ + GG ( a, b, c, d, in[13], S21, 0xa9e3e905); /* 29 */ + GG ( d, a, b, c, in[ 2], S22, 0xfcefa3f8); /* 30 */ + GG ( c, d, a, b, in[ 7], S23, 0x676f02d9); /* 31 */ + GG ( b, c, d, a, in[12], S24, 0x8d2a4c8a); /* 32 */ + + /* Round 3 */ +#define S31 4 +#define S32 11 +#define S33 16 +#define S34 23 + HH ( a, b, c, d, in[ 5], S31, 0xfffa3942); /* 33 */ + HH ( d, a, b, c, in[ 8], S32, 0x8771f681); /* 34 */ + HH ( c, d, a, b, in[11], S33, 0x6d9d6122); /* 35 */ + HH ( b, c, d, a, in[14], S34, 0xfde5380c); /* 36 */ + HH ( a, b, c, d, in[ 1], S31, 0xa4beea44); /* 37 */ + HH ( d, a, b, c, in[ 4], S32, 0x4bdecfa9); /* 38 */ + HH ( c, d, a, b, in[ 7], S33, 0xf6bb4b60); /* 39 */ + HH ( b, c, d, a, in[10], S34, 0xbebfbc70); /* 40 */ + HH ( a, b, c, d, in[13], S31, 0x289b7ec6); /* 41 */ + HH ( d, a, b, c, in[ 0], S32, 0xeaa127fa); /* 42 */ + HH ( c, d, a, b, in[ 3], S33, 0xd4ef3085); /* 43 */ + HH ( b, c, d, a, in[ 6], S34, 0x4881d05); /* 44 */ + HH ( a, b, c, d, in[ 9], S31, 0xd9d4d039); /* 45 */ + HH ( d, a, b, c, in[12], S32, 0xe6db99e5); /* 46 */ + HH ( c, d, a, b, in[15], S33, 0x1fa27cf8); /* 47 */ + HH ( b, c, d, a, in[ 2], S34, 0xc4ac5665); /* 48 */ + + /* Round 4 */ +#define S41 6 +#define S42 10 +#define S43 15 +#define S44 21 + II ( a, b, c, d, in[ 0], S41, 0xf4292244); /* 49 */ + II ( d, a, b, c, in[ 7], S42, 0x432aff97); /* 50 */ + II ( c, d, a, b, in[14], S43, 0xab9423a7); /* 51 */ + II ( b, c, d, a, in[ 5], S44, 0xfc93a039); /* 52 */ + II ( a, b, c, d, in[12], S41, 0x655b59c3); /* 53 */ + II ( d, a, b, c, in[ 3], S42, 0x8f0ccc92); /* 54 */ + II ( c, d, a, b, in[10], S43, 0xffeff47d); /* 55 */ + II ( b, c, d, a, in[ 1], S44, 0x85845dd1); /* 56 */ + II ( a, b, c, d, in[ 8], S41, 0x6fa87e4f); /* 57 */ + II ( d, a, b, c, in[15], S42, 0xfe2ce6e0); /* 58 */ + II ( c, d, a, b, in[ 6], S43, 0xa3014314); /* 59 */ + II ( b, c, d, a, in[13], S44, 0x4e0811a1); /* 60 */ + II ( a, b, c, d, in[ 4], S41, 0xf7537e82); /* 61 */ + II ( d, a, b, c, in[11], S42, 0xbd3af235); /* 62 */ + II ( c, d, a, b, in[ 2], S43, 0x2ad7d2bb); /* 63 */ + II ( b, c, d, a, in[ 9], S44, 0xeb86d391); /* 64 */ + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +/* + *********************************************************************** + ** End of md5.c ** + ******************************** (cut) ******************************** + */ diff --git a/src/mairix/md5.h b/src/mairix/md5.h @@ -0,0 +1,62 @@ +/* + *********************************************************************** + ** md5.h -- header file for implementation of MD5 ** + ** RSA Data Security, Inc. MD5 Message-Digest Algorithm ** + ** Created: 2/17/90 RLR ** + ** Revised: 12/27/90 SRD,AJ,BSK,JT Reference C version ** + ** Revised (for MD5): RLR 4/27/91 ** + *********************************************************************** + */ + +/* + *********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** License to copy and use this software is granted provided that ** + ** it is identified as the "RSA Data Security, Inc. MD5 Message- ** + ** Digest Algorithm" in all material mentioning or referencing this ** + ** software or this function. ** + ** ** + ** License is also granted to make and use derivative works ** + ** provided that such works are identified as "derived from the RSA ** + ** Data Security, Inc. MD5 Message-Digest Algorithm" in all ** + ** material mentioning or referencing the derived work. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + *********************************************************************** + */ + +#ifdef HAS_STDINT_H +#include <stdint.h> +#elif defined(HAS_INTTYPES_H) +#include <inttypes.h> +#else +#error "No <stdint.h> or <inttypes.h>" +#endif + +/* typedef a 32-bit type */ +typedef uint32_t UINT4; + +/* Data structure for MD5 (Message-Digest) computation */ +typedef struct { + UINT4 i[2]; /* number of _bits_ handled mod 2^64 */ + UINT4 buf[4]; /* scratch buffer */ + unsigned char in[64]; /* input buffer */ + unsigned char digest[16]; /* actual digest after MD5Final call */ +} MD5_CTX; + +void MD5Init (MD5_CTX *mdContext); +void MD5Update (MD5_CTX *, unsigned const char *, unsigned int); +void MD5Final (MD5_CTX *); + +/* + *********************************************************************** + ** End of md5.h ** + ******************************** (cut) ******************************** + */ diff --git a/src/mairix/memmac.h b/src/mairix/memmac.h @@ -0,0 +1,72 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002-2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + + +#ifndef MEMMAC_H +#define MEMMAC_H + +/*{{{ Safe alloc helpers (GCC extensions) */ +extern void out_of_mem(char *file, int line, size_t size); + +#undef TEST_OOM + +#ifdef TEST_OOM +extern int total_bytes; +#endif + +static __inline__ void* safe_malloc(char *file, int line, size_t s)/*{{{*/ +{ + void *x = malloc(s); +#ifdef TEST_OOM + total_bytes += s; + if (total_bytes > 131072) x = NULL; +#endif + if (!x) out_of_mem(file, line, s); + return x; +} +/*}}}*/ +static __inline__ void* safe_realloc(char *file, int line, void *old_ptr, size_t s)/*{{{*/ +{ + void *x = realloc(old_ptr, s); + if (!x) out_of_mem(file, line, s); + return x; +} +/*}}}*/ +#ifndef TEST +#define Malloc(s) safe_malloc(__FILE__, __LINE__, s) +#define Realloc(xx,s) safe_realloc(__FILE__, __LINE__,xx,s) +#else +#define Malloc(s) malloc(s) +#define Realloc(xx,s) realloc(xx,s) +#endif +/*}}}*/ + +/*{{{ Memory macros*/ +#define new_string(s) strcpy((char *) Malloc(1+strlen(s)), (s)) +#define extend_string(x,s) (strcat(Realloc(x, (strlen(x)+strlen(s)+1)), s)) +#define new(T) (T *) Malloc(sizeof(T)) +#define new_array(T, n) (T *) Malloc(sizeof(T) * (n)) +#define grow_array(T, n, oldX) (T *) ((oldX) ? Realloc(oldX, (sizeof(T) * (n))) : Malloc(sizeof(T) * (n))) +#define EMPTY(x) {&(x), &(x)} +/*}}}*/ + +#endif /* MEMMAC_H */ diff --git a/src/mairix/mkversion b/src/mairix/mkversion @@ -0,0 +1,15 @@ +#!/bin/sh + +rm -f version.h +echo "#ifndef VERSION_H" > version.h +echo "#define VERSION_H 1" >> version.h + +if [ -f version.txt ]; then + ver=`cat version.txt` + echo "#define PROGRAM_VERSION \"$ver\"" >> version.h +else + echo "#define PROGRAM_VERSION \"DEVELOPMENT\"" >> version.h +fi + +echo "#endif /* VERSION_H */" >> version.h + diff --git a/src/mairix/nvp.c b/src/mairix/nvp.c @@ -0,0 +1,416 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2006,2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#ifdef VERBOSE_TEST +#define TEST 1 +#endif + +/* Parse name/value pairs from mail headers into a lookup table. */ +#include <stdio.h> +#include <ctype.h> +#include "mairix.h" +#include "nvptypes.h" +#include "nvpscan.h" +#include "nvp.h" + +enum nvp_type {/*{{{*/ + NVP_NAME, + NVP_MAJORMINOR, + NVP_NAMEVALUE +}; +/*}}}*/ +struct nvp_entry {/*{{{*/ + struct nvp_entry *next; + struct nvp_entry *prev; + enum nvp_type type; + char *lhs; + char *rhs; +}; +/*}}}*/ +struct nvp {/*{{{*/ + struct nvp_entry *first, *last; +}; +/*}}}*/ +static void append(struct nvp *nvp, struct nvp_entry *ne)/*{{{*/ +{ + ne->next = NULL; + ne->prev = nvp->last; + if (nvp->last) nvp->last->next = ne; + else nvp->first = ne; + nvp->last = ne; +} +/*}}}*/ +static void append_name(struct nvp *nvp, char *name)/*{{{*/ +{ + struct nvp_entry *ne; + ne = new(struct nvp_entry); + ne->type = NVP_NAME; + ne->lhs = new_string(name); + append(nvp, ne); +} +/*}}}*/ +static void append_majorminor(struct nvp *nvp, char *major, char *minor)/*{{{*/ +{ + struct nvp_entry *ne; + ne = new(struct nvp_entry); + ne->type = NVP_MAJORMINOR; + ne->lhs = new_string(major); + ne->rhs = new_string(minor); + append(nvp, ne); + +} +/*}}}*/ +static void append_namevalue(struct nvp *nvp, char *name, char *value)/*{{{*/ +{ + struct nvp_entry *ne; + ne = new(struct nvp_entry); + ne->type = NVP_NAMEVALUE; + ne->lhs = new_string(name); + ne->rhs = new_string(value); + append(nvp, ne); +} +/*}}}*/ +static void combine_namevalue(struct nvp *nvp, char *name, char *value)/*{{{*/ +{ + struct nvp_entry *n; + for (n=nvp->first; n; n=n->next) { + if (n->type == NVP_NAMEVALUE) { + if (!strcmp(n->lhs, name)) { + char *new_rhs; + new_rhs = new_array(char, strlen(n->rhs) + strlen(value) + 1); + strcpy(new_rhs, n->rhs); + strcat(new_rhs, value); + free(n->rhs); + n->rhs = new_rhs; + return; + } + } + } + /* No match : it's the first one */ + append_namevalue(nvp, name, value); +} +/*}}}*/ +static void release_nvp(struct nvp *nvp)/*{{{*/ +{ + struct nvp_entry *e, *ne; + for (e=nvp->first; e; e=ne) { + ne = e->next; + switch (e->type) { + case NVP_NAME: + free(e->lhs); + break; + case NVP_MAJORMINOR: + case NVP_NAMEVALUE: + free(e->lhs); + free(e->rhs); + break; + } + free(e); + } + free(nvp); +} +/*}}}*/ +struct nvp *make_nvp(struct msg_src *src, char *s, const char *pfx)/*{{{*/ +{ + int current_state; + unsigned int tok; + char *q; + unsigned char qq; + char name[256]; + char minor[256]; + char value[256]; + enum nvp_action last_action, current_action; + struct nvp *result; + size_t pfxlen; + char *nn, *mm, *vv; + + pfxlen = strlen(pfx); + if (strncasecmp(pfx, s, pfxlen)) + return NULL; + s += pfxlen; + + result = new(struct nvp); + result->first = result->last = NULL; + + current_state = nvp_in; + + q = s; + nn = name; + mm = minor; + vv = value; + last_action = GOT_NOTHING; + do { + qq = *(unsigned char *) q; + if (qq) { + tok = nvp_char2tok[qq]; + } else { + tok = nvp_EOS; + } + current_state = nvp_next_state(current_state, tok); +#ifdef VERBOSE_TEST + fprintf(stderr, "Char %02x (%c) tok=%d new_current_state=%d\n", + qq, ((qq>=32) && (qq<=126)) ? qq : '.', + tok, current_state); +#endif + + if (current_state < 0) { +#ifdef TEST + fprintf(stderr, "'%s' could not be parsed\n", s); +#else + fprintf(stderr, "Header '%s%s' in %s could not be parsed\n", + pfx, s, format_msg_src(src)); +#endif + release_nvp(result); + return NULL; + } + + switch (nvp_copier[current_state]) { + case COPY_TO_NAME: +#ifdef VERBOSE_TEST + fprintf(stderr, " COPY_TO_NAME\n"); +#endif + *nn++ = *q; + break; + case COPY_TO_MINOR: +#ifdef VERBOSE_TEST + fprintf(stderr, " COPY_TO_MINOR\n"); +#endif + *mm++ = *q; + break; + case COPY_TO_VALUE: +#ifdef VERBOSE_TEST + fprintf(stderr, " COPY_TO_VALUE\n"); +#endif + *vv++ = *q; + break; + case COPY_NOWHERE: + break; + } + + current_action = nvp_action[current_state]; + switch (current_action) { + case GOT_NAME: + case GOT_NAME_TRAILING_SPACE: + case GOT_MAJORMINOR: + case GOT_NAMEVALUE: + case GOT_NAMEVALUE_CONT: +#ifdef VERBOSE_TEST + fprintf(stderr, " Setting last action to %d\n", current_action); +#endif + last_action = current_action; + break; + case GOT_TERMINATOR: +#ifdef VERBOSE_TEST + fprintf(stderr, " Hit terminator; last_action=%d\n", last_action); +#endif + switch (last_action) { + case GOT_NAME: + *nn = 0; + append_name(result, name); + break; + case GOT_NAME_TRAILING_SPACE: + while (isspace(*--nn)) {} + *++nn = 0; + append_name(result, name); + break; + case GOT_MAJORMINOR: + *nn = 0; + *mm = 0; + append_majorminor(result, name, minor); + break; + case GOT_NAMEVALUE: + *nn = 0; + *vv = 0; + append_namevalue(result, name, value); + break; + case GOT_NAMEVALUE_CONT: + *nn = 0; + *vv = 0; + combine_namevalue(result, name, value); + break; + default: + break; + } + nn = name; + mm = minor; + vv = value; + break; + case GOT_NOTHING: + break; + } + + q++; + } while (tok != nvp_EOS); + + return result; +} +/*}}}*/ +void free_nvp(struct nvp *nvp)/*{{{*/ +{ + struct nvp_entry *ne, *nne; + for (ne = nvp->first; ne; ne=nne) { + nne = ne->next; + switch (ne->type) { + case NVP_NAME: + free(ne->lhs); + break; + case NVP_MAJORMINOR: + case NVP_NAMEVALUE: + free(ne->lhs); + free(ne->rhs); + break; + } + free(ne); + } + free(nvp); +} +/*}}}*/ +const char *nvp_lookup(struct nvp *nvp, const char *name)/*{{{*/ +{ + struct nvp_entry *ne; + for (ne = nvp->first; ne; ne=ne->next) { + if (ne->type == NVP_NAMEVALUE) { + if (!strcmp(ne->lhs, name)) { + return ne->rhs; + } + } + } + return NULL; +} +/*}}}*/ +const char *nvp_lookupcase(struct nvp *nvp, const char *name)/*{{{*/ +{ + struct nvp_entry *ne; + for (ne = nvp->first; ne; ne=ne->next) { + if (ne->type == NVP_NAMEVALUE) { + if (!strcasecmp(ne->lhs, name)) { + return ne->rhs; + } + } + } + return NULL; +} +/*}}}*/ + +void nvp_dump(struct nvp *nvp, FILE *out)/*{{{*/ +{ + struct nvp_entry *ne; + fprintf(out, "----\n"); + for (ne = nvp->first; ne; ne=ne->next) { + switch (ne->type) { + case NVP_NAME: + fprintf(out, "NAME: %s\n", ne->lhs); + break; + case NVP_MAJORMINOR: + fprintf(out, "MAJORMINOR: %s/%s\n", ne->lhs, ne->rhs); + break; + case NVP_NAMEVALUE: + fprintf(out, "NAMEVALUE: %s=%s\n", ne->lhs, ne->rhs); + break; + } + } +} +/*}}}*/ + +/* In these cases, we only look at the first entry */ +const char *nvp_major(struct nvp *nvp)/*{{{*/ +{ + struct nvp_entry *ne; + ne = nvp->first; + if (ne) { + if (ne->type == NVP_MAJORMINOR) { + return ne->lhs; + } else { + return NULL; + } + } else { + return NULL; + } +} +/*}}}*/ +const char *nvp_minor(struct nvp *nvp)/*{{{*/ +{ + struct nvp_entry *ne; + ne = nvp->first; + if (ne) { + if (ne->type == NVP_MAJORMINOR) { + return ne->rhs; + } else { + return NULL; + } + } else { + return NULL; + } +} +/*}}}*/ +const char *nvp_first(struct nvp *nvp)/*{{{*/ +{ + struct nvp_entry *ne; + ne = nvp->first; + if (ne) { + if (ne->type == NVP_NAME) { + return ne->lhs; + } else { + return NULL; + } + } else { + return NULL; + } +} +/*}}}*/ + +#ifdef TEST + +static void do_test(char *s) +{ + struct nvp *n; + n = make_nvp(NULL, s, ""); + if (n) { + nvp_dump(n, stderr); + free_nvp(n); + } +} + + +int main (int argc, char **argv) { + struct nvp *n; +#if 0 + do_test("attachment; filename=\"foo.c\"; prot=ro"); + do_test("attachment; filename= \"foo bar.c\" ;prot=ro"); + do_test("attachment ; filename= \"foo bar.c\" ;prot= ro"); + do_test("attachment ; filename= \"foo bar.c\" ;prot= ro"); + do_test("attachment ; filename= \"foo ; bar.c\" ;prot= ro"); + do_test("attachment ; x*0=\"hi \"; x*1=\"there\""); +#endif + + do_test("application/vnd.ms-excel; name=\"thequiz.xls\""); +#if 0 + do_test("inline; filename*0=\"aaaa bbbb cccc dddd eeee ffff gggg hhhh iiii jjjj\t kkkkllll\""); + do_test(" text/plain ; name= \"foo bar.c\" ;prot= ro/rw; read/write; read= foo bar"); +#endif + return 0; +} +#endif + + + + diff --git a/src/mairix/nvp.h b/src/mairix/nvp.h @@ -0,0 +1,38 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2006,2010 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#ifndef NVP_H +#define NVP_H + +struct nvp; +struct msg_src; +extern struct nvp *make_nvp(struct msg_src *, char *, const char *); +extern void free_nvp(struct nvp *); +extern void nvp_dump(struct nvp *nvp, FILE *out); +extern const char *nvp_major(struct nvp *n); +extern const char *nvp_minor(struct nvp *n); +extern const char *nvp_first(struct nvp *n); +extern const char *nvp_lookup(struct nvp *n, const char *name); +extern const char *nvp_lookupcase(struct nvp *n, const char *name); + +#endif + diff --git a/src/mairix/nvp.nfa b/src/mairix/nvp.nfa @@ -0,0 +1,197 @@ +######################################################################### +# +# mairix - message index builder and finder for maildir folders. +# +# Copyright (C) Richard P. Curnow 2006,2007 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# ======================================================================= + +Tokens EOS +Abbrev VALUE = [\041-~]~[\\";] +Abbrev QVALUE = VALUE | [\011\040;] | <escape:in->out> +Abbrev NAME1 = [0-9a-zA-Z_\-] +Abbrev MINOR = NAME1 | [\.\-+] +Abbrev OWS = <optwhite:in->out> + +%{ +#include "nvptypes.h" +%} + +Block escape { + State in + [\\] ; [\\] -> out + [\\] ; ["] -> out +} + +Block optwhite { + State in + -> out + # I have seen headers with ^M in them... + [ \t\r] -> in +} + +Block name { + # This needs to cope with embedded spaces, e.g. for mailers that write '7 + # bit' instead of '7bit' + State in + NAME1 -> name1 + + State name1 + = COPY_TO_NAME + = GOT_NAME + NAME1 -> name1 + [ \t] -> name2 + -> out + + State name2 + = COPY_TO_NAME + = GOT_NAME_TRAILING_SPACE + [ \t] -> name2 + NAME1 -> name1 + -> out + + State out +} + +Block value { + State in + VALUE -> v1 + State v1 + = COPY_TO_VALUE + -> out + VALUE -> v1 +} + +Block qvalue { + State in + ["] -> qv0 + + State qv0 + QVALUE -> qv1 + + State qv1 + = COPY_TO_VALUE + QVALUE -> qv1 + -> qv2 + + State qv2 + ["] -> out +} + +Block digits { + State in + [0-9] -> out + [0-9] -> in +} + +Block namevalue { + State in + OWS ; <name:in->out> ; OWS ; [=] -> rhs_normal + OWS ; <name:in->out> ; [*] ; <digits:in->out> ; OWS ; [=] -> rhs_continue + + State rhs_normal + OWS ; <qvalue:in->out> ; OWS -> out_normal + OWS ; <value:in->out> ; OWS -> out_normal + OWS ; ; EOS -> out_normal + + State rhs_continue + OWS ; <qvalue:in->out> ; OWS -> out_continue + OWS ; <value:in->out> ; OWS -> out_continue + + State out_normal = GOT_NAMEVALUE + -> out + State out_continue = GOT_NAMEVALUE_CONT + -> out +} + +Block major { + State in + NAME1 -> name1 + + State name1 + NAME1 -> name1 + -> out +} + +Block minor { + State in + MINOR -> minor1 + + State minor1 + = COPY_TO_MINOR + MINOR -> minor1 + -> out +} + +Block majorminor { + State in + <major:in->out> -> foo + + State foo + [/] -> bar + + State bar + <minor:in->out> -> out + + State out = GOT_MAJORMINOR +} + +Block component { + State in + <namevalue:in->out> -> out + <name:in->out> -> out + <majorminor:in->out> -> out +} + +Block main { + State in Entry in + OWS ; <component:in->out> ; OWS ; EOS -> out2 + OWS ; <component:in->out> ; OWS ; [;] ; OWS ; EOS -> out2 + OWS ; <component:in->out> ; OWS ; [;] -> in2 + + State in2 + = GOT_TERMINATOR + -> in + + State out2 + = GOT_TERMINATOR + -> out +} + +Defattr 0 +Prefix nvp + +Group action { + Attr GOT_NAMEVALUE + Attr GOT_NAMEVALUE_CONT + Attr GOT_NAME + Attr GOT_NAME_TRAILING_SPACE + Attr GOT_MAJORMINOR + Attr GOT_TERMINATOR + Defattr GOT_NOTHING + Type "enum nvp_action" +} + +Group copier { + Attr COPY_TO_NAME + Attr COPY_TO_MINOR + Attr COPY_TO_VALUE + Defattr COPY_NOWHERE + Type "enum nvp_copier" +} + +# vim:et:sts=4:sw=4:ht=8 + diff --git a/src/mairix/nvpscan.report b/src/mairix/nvpscan.report @@ -0,0 +1,6352 @@ +Processing 1 separate entry points +Entries in 1 blocks, total of 415 states +NFA state 0 = main.in [Entries: in] + [(epsilon)] -> optwhite#8.in + [(epsilon)] -> optwhite#4.in + [(epsilon)] -> optwhite#1.in + Epsilon closure : + (self) + main.#1 + main.optwhite#1.in + main.optwhite#1.out + main.component#2.in + main.component#2.namevalue#1.in + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.in + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.in + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + main.component#2.name#2.in + main.component#2.majorminor#3.in + main.component#2.majorminor#3.major#1.in + main.#4 + main.optwhite#4.in + main.optwhite#4.out + main.component#5.in + main.component#5.namevalue#1.in + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.in + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.in + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + main.component#5.name#2.in + main.component#5.majorminor#3.in + main.component#5.majorminor#3.major#1.in + main.#9 + main.optwhite#8.in + main.optwhite#8.out + main.component#9.in + main.component#9.namevalue#1.in + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.in + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.in + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + main.component#9.name#2.in + main.component#9.majorminor#3.in + main.component#9.majorminor#3.major#1.in + +NFA state 1 = main.#1 + [(epsilon)] -> component#2.in + Epsilon closure : + (self) + main.component#2.in + main.component#2.namevalue#1.in + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.in + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.in + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + main.component#2.name#2.in + main.component#2.majorminor#3.in + main.component#2.majorminor#3.major#1.in + +NFA state 2 = main.optwhite#1.in + [(epsilon)] -> optwhite#1.out + 0:[\t ] -> optwhite#1.in + 1:[\r] -> optwhite#1.in + Epsilon closure : + (self) + main.#1 + main.optwhite#1.out + main.component#2.in + main.component#2.namevalue#1.in + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.in + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.in + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + main.component#2.name#2.in + main.component#2.majorminor#3.in + main.component#2.majorminor#3.major#1.in + +NFA state 3 = main.optwhite#1.out + [(epsilon)] -> #1 + Epsilon closure : + (self) + main.#1 + main.component#2.in + main.component#2.namevalue#1.in + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.in + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.in + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + main.component#2.name#2.in + main.component#2.majorminor#3.in + main.component#2.majorminor#3.major#1.in + +NFA state 4 = main.#2 + [(epsilon)] -> optwhite#3.in + Epsilon closure : + (self) + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 5 = main.component#2.in + [(epsilon)] -> component#2.namevalue#1.in + [(epsilon)] -> component#2.name#2.in + [(epsilon)] -> component#2.majorminor#3.in + Epsilon closure : + (self) + main.component#2.namevalue#1.in + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.in + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.in + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + main.component#2.name#2.in + main.component#2.majorminor#3.in + main.component#2.majorminor#3.major#1.in + +NFA state 6 = main.component#2.namevalue#1.in + [(epsilon)] -> component#2.namevalue#1.optwhite#4.in + [(epsilon)] -> component#2.namevalue#1.optwhite#1.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.in + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.in + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + +NFA state 7 = main.component#2.namevalue#1.#1 + [(epsilon)] -> component#2.namevalue#1.name#2.in + Epsilon closure : + (self) + main.component#2.namevalue#1.name#2.in + +NFA state 8 = main.component#2.namevalue#1.optwhite#1.in + [(epsilon)] -> component#2.namevalue#1.optwhite#1.out + 0:[\t ] -> component#2.namevalue#1.optwhite#1.in + 1:[\r] -> component#2.namevalue#1.optwhite#1.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + +NFA state 9 = main.component#2.namevalue#1.optwhite#1.out + [(epsilon)] -> component#2.namevalue#1.#1 + Epsilon closure : + (self) + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.name#2.in + +NFA state 10 = main.component#2.namevalue#1.#2 + [(epsilon)] -> component#2.namevalue#1.optwhite#3.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#3 + main.component#2.namevalue#1.optwhite#3.in + main.component#2.namevalue#1.optwhite#3.out + +NFA state 11 = main.component#2.namevalue#1.name#2.in + 6:[\055] -> component#2.namevalue#1.name#2.name1 + 11:[A-Z_a-z] -> component#2.namevalue#1.name#2.name1 + 8:[0-9] -> component#2.namevalue#1.name#2.name1 + Epsilon closure : + (self) + +NFA state 12 = main.component#2.namevalue#1.name#2.name1 + [(epsilon)] -> component#2.namevalue#1.name#2.#1 + [(epsilon)] -> component#2.namevalue#1.name#2.#2 + 6:[\055] -> component#2.namevalue#1.name#2.name1 + 11:[A-Z_a-z] -> component#2.namevalue#1.name#2.name1 + 8:[0-9] -> component#2.namevalue#1.name#2.name1 + 0:[\t ] -> component#2.namevalue#1.name#2.name2 + [(epsilon)] -> component#2.namevalue#1.name#2.out + Epsilon closure : + (self) + main.component#2.namevalue#1.#2 + main.component#2.namevalue#1.name#2.#1 + main.component#2.namevalue#1.name#2.#2 + main.component#2.namevalue#1.name#2.out + main.component#2.namevalue#1.#3 + main.component#2.namevalue#1.optwhite#3.in + main.component#2.namevalue#1.optwhite#3.out + +NFA state 13 = main.component#2.namevalue#1.name#2.#1 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 14 = main.component#2.namevalue#1.name#2.#2 + Tags : GOT_NAME + Epsilon closure : + (self) + +NFA state 15 = main.component#2.namevalue#1.name#2.name2 + [(epsilon)] -> component#2.namevalue#1.name#2.#3 + [(epsilon)] -> component#2.namevalue#1.name#2.#4 + 0:[\t ] -> component#2.namevalue#1.name#2.name2 + 6:[\055] -> component#2.namevalue#1.name#2.name1 + 11:[A-Z_a-z] -> component#2.namevalue#1.name#2.name1 + 8:[0-9] -> component#2.namevalue#1.name#2.name1 + [(epsilon)] -> component#2.namevalue#1.name#2.out + Epsilon closure : + (self) + main.component#2.namevalue#1.#2 + main.component#2.namevalue#1.name#2.#3 + main.component#2.namevalue#1.name#2.#4 + main.component#2.namevalue#1.name#2.out + main.component#2.namevalue#1.#3 + main.component#2.namevalue#1.optwhite#3.in + main.component#2.namevalue#1.optwhite#3.out + +NFA state 16 = main.component#2.namevalue#1.name#2.#3 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 17 = main.component#2.namevalue#1.name#2.#4 + Tags : GOT_NAME_TRAILING_SPACE + Epsilon closure : + (self) + +NFA state 18 = main.component#2.namevalue#1.name#2.out + [(epsilon)] -> component#2.namevalue#1.#2 + Epsilon closure : + (self) + main.component#2.namevalue#1.#2 + main.component#2.namevalue#1.#3 + main.component#2.namevalue#1.optwhite#3.in + main.component#2.namevalue#1.optwhite#3.out + +NFA state 19 = main.component#2.namevalue#1.#3 + 10:[=] -> component#2.namevalue#1.rhs_normal + Epsilon closure : + (self) + +NFA state 20 = main.component#2.namevalue#1.optwhite#3.in + [(epsilon)] -> component#2.namevalue#1.optwhite#3.out + 0:[\t ] -> component#2.namevalue#1.optwhite#3.in + 1:[\r] -> component#2.namevalue#1.optwhite#3.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#3 + main.component#2.namevalue#1.optwhite#3.out + +NFA state 21 = main.component#2.namevalue#1.optwhite#3.out + [(epsilon)] -> component#2.namevalue#1.#3 + Epsilon closure : + (self) + main.component#2.namevalue#1.#3 + +NFA state 22 = main.component#2.namevalue#1.#4 + [(epsilon)] -> component#2.namevalue#1.name#5.in + Epsilon closure : + (self) + main.component#2.namevalue#1.name#5.in + +NFA state 23 = main.component#2.namevalue#1.optwhite#4.in + [(epsilon)] -> component#2.namevalue#1.optwhite#4.out + 0:[\t ] -> component#2.namevalue#1.optwhite#4.in + 1:[\r] -> component#2.namevalue#1.optwhite#4.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + +NFA state 24 = main.component#2.namevalue#1.optwhite#4.out + [(epsilon)] -> component#2.namevalue#1.#4 + Epsilon closure : + (self) + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.name#5.in + +NFA state 25 = main.component#2.namevalue#1.#5 + 4:[*] -> component#2.namevalue#1.#6 + Epsilon closure : + (self) + +NFA state 26 = main.component#2.namevalue#1.name#5.in + 6:[\055] -> component#2.namevalue#1.name#5.name1 + 11:[A-Z_a-z] -> component#2.namevalue#1.name#5.name1 + 8:[0-9] -> component#2.namevalue#1.name#5.name1 + Epsilon closure : + (self) + +NFA state 27 = main.component#2.namevalue#1.name#5.name1 + [(epsilon)] -> component#2.namevalue#1.name#5.#1 + [(epsilon)] -> component#2.namevalue#1.name#5.#2 + 6:[\055] -> component#2.namevalue#1.name#5.name1 + 11:[A-Z_a-z] -> component#2.namevalue#1.name#5.name1 + 8:[0-9] -> component#2.namevalue#1.name#5.name1 + 0:[\t ] -> component#2.namevalue#1.name#5.name2 + [(epsilon)] -> component#2.namevalue#1.name#5.out + Epsilon closure : + (self) + main.component#2.namevalue#1.#5 + main.component#2.namevalue#1.name#5.#1 + main.component#2.namevalue#1.name#5.#2 + main.component#2.namevalue#1.name#5.out + +NFA state 28 = main.component#2.namevalue#1.name#5.#1 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 29 = main.component#2.namevalue#1.name#5.#2 + Tags : GOT_NAME + Epsilon closure : + (self) + +NFA state 30 = main.component#2.namevalue#1.name#5.name2 + [(epsilon)] -> component#2.namevalue#1.name#5.#3 + [(epsilon)] -> component#2.namevalue#1.name#5.#4 + 0:[\t ] -> component#2.namevalue#1.name#5.name2 + 6:[\055] -> component#2.namevalue#1.name#5.name1 + 11:[A-Z_a-z] -> component#2.namevalue#1.name#5.name1 + 8:[0-9] -> component#2.namevalue#1.name#5.name1 + [(epsilon)] -> component#2.namevalue#1.name#5.out + Epsilon closure : + (self) + main.component#2.namevalue#1.#5 + main.component#2.namevalue#1.name#5.#3 + main.component#2.namevalue#1.name#5.#4 + main.component#2.namevalue#1.name#5.out + +NFA state 31 = main.component#2.namevalue#1.name#5.#3 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 32 = main.component#2.namevalue#1.name#5.#4 + Tags : GOT_NAME_TRAILING_SPACE + Epsilon closure : + (self) + +NFA state 33 = main.component#2.namevalue#1.name#5.out + [(epsilon)] -> component#2.namevalue#1.#5 + Epsilon closure : + (self) + main.component#2.namevalue#1.#5 + +NFA state 34 = main.component#2.namevalue#1.#6 + [(epsilon)] -> component#2.namevalue#1.digits#6.in + Epsilon closure : + (self) + main.component#2.namevalue#1.digits#6.in + +NFA state 35 = main.component#2.namevalue#1.#7 + [(epsilon)] -> component#2.namevalue#1.optwhite#7.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#8 + main.component#2.namevalue#1.optwhite#7.in + main.component#2.namevalue#1.optwhite#7.out + +NFA state 36 = main.component#2.namevalue#1.digits#6.in + 8:[0-9] -> component#2.namevalue#1.digits#6.out + 8:[0-9] -> component#2.namevalue#1.digits#6.in + Epsilon closure : + (self) + +NFA state 37 = main.component#2.namevalue#1.digits#6.out + [(epsilon)] -> component#2.namevalue#1.#7 + Epsilon closure : + (self) + main.component#2.namevalue#1.#7 + main.component#2.namevalue#1.#8 + main.component#2.namevalue#1.optwhite#7.in + main.component#2.namevalue#1.optwhite#7.out + +NFA state 38 = main.component#2.namevalue#1.#8 + 10:[=] -> component#2.namevalue#1.rhs_continue + Epsilon closure : + (self) + +NFA state 39 = main.component#2.namevalue#1.optwhite#7.in + [(epsilon)] -> component#2.namevalue#1.optwhite#7.out + 0:[\t ] -> component#2.namevalue#1.optwhite#7.in + 1:[\r] -> component#2.namevalue#1.optwhite#7.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#8 + main.component#2.namevalue#1.optwhite#7.out + +NFA state 40 = main.component#2.namevalue#1.optwhite#7.out + [(epsilon)] -> component#2.namevalue#1.#8 + Epsilon closure : + (self) + main.component#2.namevalue#1.#8 + +NFA state 41 = main.component#2.namevalue#1.rhs_normal + [(epsilon)] -> component#2.namevalue#1.optwhite#14.in + [(epsilon)] -> component#2.namevalue#1.optwhite#11.in + [(epsilon)] -> component#2.namevalue#1.optwhite#8.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#9 + main.component#2.namevalue#1.optwhite#8.in + main.component#2.namevalue#1.optwhite#8.out + main.component#2.namevalue#1.qvalue#9.in + main.component#2.namevalue#1.#11 + main.component#2.namevalue#1.optwhite#11.in + main.component#2.namevalue#1.optwhite#11.out + main.component#2.namevalue#1.value#12.in + main.component#2.namevalue#1.#13 + main.component#2.namevalue#1.optwhite#14.in + main.component#2.namevalue#1.optwhite#14.out + main.component#2.namevalue#1.#14 + +NFA state 42 = main.component#2.namevalue#1.#9 + [(epsilon)] -> component#2.namevalue#1.qvalue#9.in + Epsilon closure : + (self) + main.component#2.namevalue#1.qvalue#9.in + +NFA state 43 = main.component#2.namevalue#1.optwhite#8.in + [(epsilon)] -> component#2.namevalue#1.optwhite#8.out + 0:[\t ] -> component#2.namevalue#1.optwhite#8.in + 1:[\r] -> component#2.namevalue#1.optwhite#8.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#9 + main.component#2.namevalue#1.optwhite#8.out + main.component#2.namevalue#1.qvalue#9.in + +NFA state 44 = main.component#2.namevalue#1.optwhite#8.out + [(epsilon)] -> component#2.namevalue#1.#9 + Epsilon closure : + (self) + main.component#2.namevalue#1.#9 + main.component#2.namevalue#1.qvalue#9.in + +NFA state 45 = main.component#2.namevalue#1.#10 + [(epsilon)] -> component#2.namevalue#1.optwhite#10.in + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.optwhite#10.in + main.component#2.namevalue#1.optwhite#10.out + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 46 = main.component#2.namevalue#1.qvalue#9.in + 3:["] -> component#2.namevalue#1.qvalue#9.qv0 + Epsilon closure : + (self) + +NFA state 47 = main.component#2.namevalue#1.qvalue#9.qv0 + [(epsilon)] -> component#2.namevalue#1.qvalue#9.escape#1.in + 0:[\t ] -> component#2.namevalue#1.qvalue#9.qv1 + 9:[;] -> component#2.namevalue#1.qvalue#9.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.qvalue#9.qv1 + 11:[A-Z_a-z] -> component#2.namevalue#1.qvalue#9.qv1 + 10:[=] -> component#2.namevalue#1.qvalue#9.qv1 + 8:[0-9] -> component#2.namevalue#1.qvalue#9.qv1 + 7:[/] -> component#2.namevalue#1.qvalue#9.qv1 + 6:[\055] -> component#2.namevalue#1.qvalue#9.qv1 + 5:[+.] -> component#2.namevalue#1.qvalue#9.qv1 + 4:[*] -> component#2.namevalue#1.qvalue#9.qv1 + Epsilon closure : + (self) + main.component#2.namevalue#1.qvalue#9.escape#1.in + +NFA state 48 = main.component#2.namevalue#1.qvalue#9.escape#1.in + 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#1.#2 + 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#1.#1 + Epsilon closure : + (self) + +NFA state 49 = main.component#2.namevalue#1.qvalue#9.escape#1.#1 + 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#1.out + Epsilon closure : + (self) + +NFA state 50 = main.component#2.namevalue#1.qvalue#9.escape#1.#2 + 3:["] -> component#2.namevalue#1.qvalue#9.escape#1.out + Epsilon closure : + (self) + +NFA state 51 = main.component#2.namevalue#1.qvalue#9.escape#1.out + [(epsilon)] -> component#2.namevalue#1.qvalue#9.qv1 + Epsilon closure : + (self) + main.component#2.namevalue#1.qvalue#9.qv1 + main.component#2.namevalue#1.qvalue#9.#1 + main.component#2.namevalue#1.qvalue#9.escape#2.in + main.component#2.namevalue#1.qvalue#9.qv2 + +NFA state 52 = main.component#2.namevalue#1.qvalue#9.qv1 + [(epsilon)] -> component#2.namevalue#1.qvalue#9.#1 + [(epsilon)] -> component#2.namevalue#1.qvalue#9.escape#2.in + 0:[\t ] -> component#2.namevalue#1.qvalue#9.qv1 + 9:[;] -> component#2.namevalue#1.qvalue#9.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.qvalue#9.qv1 + 11:[A-Z_a-z] -> component#2.namevalue#1.qvalue#9.qv1 + 10:[=] -> component#2.namevalue#1.qvalue#9.qv1 + 8:[0-9] -> component#2.namevalue#1.qvalue#9.qv1 + 7:[/] -> component#2.namevalue#1.qvalue#9.qv1 + 6:[\055] -> component#2.namevalue#1.qvalue#9.qv1 + 5:[+.] -> component#2.namevalue#1.qvalue#9.qv1 + 4:[*] -> component#2.namevalue#1.qvalue#9.qv1 + [(epsilon)] -> component#2.namevalue#1.qvalue#9.qv2 + Epsilon closure : + (self) + main.component#2.namevalue#1.qvalue#9.#1 + main.component#2.namevalue#1.qvalue#9.escape#2.in + main.component#2.namevalue#1.qvalue#9.qv2 + +NFA state 53 = main.component#2.namevalue#1.qvalue#9.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 54 = main.component#2.namevalue#1.qvalue#9.escape#2.in + 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#2.#2 + 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#2.#1 + Epsilon closure : + (self) + +NFA state 55 = main.component#2.namevalue#1.qvalue#9.escape#2.#1 + 12:[\\] -> component#2.namevalue#1.qvalue#9.escape#2.out + Epsilon closure : + (self) + +NFA state 56 = main.component#2.namevalue#1.qvalue#9.escape#2.#2 + 3:["] -> component#2.namevalue#1.qvalue#9.escape#2.out + Epsilon closure : + (self) + +NFA state 57 = main.component#2.namevalue#1.qvalue#9.escape#2.out + [(epsilon)] -> component#2.namevalue#1.qvalue#9.qv1 + Epsilon closure : + (self) + main.component#2.namevalue#1.qvalue#9.qv1 + main.component#2.namevalue#1.qvalue#9.#1 + main.component#2.namevalue#1.qvalue#9.escape#2.in + main.component#2.namevalue#1.qvalue#9.qv2 + +NFA state 58 = main.component#2.namevalue#1.qvalue#9.qv2 + 3:["] -> component#2.namevalue#1.qvalue#9.out + Epsilon closure : + (self) + +NFA state 59 = main.component#2.namevalue#1.qvalue#9.out + [(epsilon)] -> component#2.namevalue#1.#10 + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.#10 + main.component#2.namevalue#1.optwhite#10.in + main.component#2.namevalue#1.optwhite#10.out + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 60 = main.component#2.namevalue#1.optwhite#10.in + [(epsilon)] -> component#2.namevalue#1.optwhite#10.out + 0:[\t ] -> component#2.namevalue#1.optwhite#10.in + 1:[\r] -> component#2.namevalue#1.optwhite#10.in + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.optwhite#10.out + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 61 = main.component#2.namevalue#1.optwhite#10.out + [(epsilon)] -> component#2.namevalue#1.out_normal + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 62 = main.component#2.namevalue#1.#11 + [(epsilon)] -> component#2.namevalue#1.value#12.in + Epsilon closure : + (self) + main.component#2.namevalue#1.value#12.in + +NFA state 63 = main.component#2.namevalue#1.optwhite#11.in + [(epsilon)] -> component#2.namevalue#1.optwhite#11.out + 0:[\t ] -> component#2.namevalue#1.optwhite#11.in + 1:[\r] -> component#2.namevalue#1.optwhite#11.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#11 + main.component#2.namevalue#1.optwhite#11.out + main.component#2.namevalue#1.value#12.in + +NFA state 64 = main.component#2.namevalue#1.optwhite#11.out + [(epsilon)] -> component#2.namevalue#1.#11 + Epsilon closure : + (self) + main.component#2.namevalue#1.#11 + main.component#2.namevalue#1.value#12.in + +NFA state 65 = main.component#2.namevalue#1.#12 + [(epsilon)] -> component#2.namevalue#1.optwhite#13.in + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.optwhite#13.in + main.component#2.namevalue#1.optwhite#13.out + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 66 = main.component#2.namevalue#1.value#12.in + 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.value#12.v1 + 11:[A-Z_a-z] -> component#2.namevalue#1.value#12.v1 + 10:[=] -> component#2.namevalue#1.value#12.v1 + 8:[0-9] -> component#2.namevalue#1.value#12.v1 + 7:[/] -> component#2.namevalue#1.value#12.v1 + 6:[\055] -> component#2.namevalue#1.value#12.v1 + 5:[+.] -> component#2.namevalue#1.value#12.v1 + 4:[*] -> component#2.namevalue#1.value#12.v1 + Epsilon closure : + (self) + +NFA state 67 = main.component#2.namevalue#1.value#12.v1 + [(epsilon)] -> component#2.namevalue#1.value#12.#1 + [(epsilon)] -> component#2.namevalue#1.value#12.out + 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.value#12.v1 + 11:[A-Z_a-z] -> component#2.namevalue#1.value#12.v1 + 10:[=] -> component#2.namevalue#1.value#12.v1 + 8:[0-9] -> component#2.namevalue#1.value#12.v1 + 7:[/] -> component#2.namevalue#1.value#12.v1 + 6:[\055] -> component#2.namevalue#1.value#12.v1 + 5:[+.] -> component#2.namevalue#1.value#12.v1 + 4:[*] -> component#2.namevalue#1.value#12.v1 + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.#12 + main.component#2.namevalue#1.value#12.#1 + main.component#2.namevalue#1.value#12.out + main.component#2.namevalue#1.optwhite#13.in + main.component#2.namevalue#1.optwhite#13.out + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 68 = main.component#2.namevalue#1.value#12.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 69 = main.component#2.namevalue#1.value#12.out + [(epsilon)] -> component#2.namevalue#1.#12 + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.#12 + main.component#2.namevalue#1.optwhite#13.in + main.component#2.namevalue#1.optwhite#13.out + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 70 = main.component#2.namevalue#1.optwhite#13.in + [(epsilon)] -> component#2.namevalue#1.optwhite#13.out + 0:[\t ] -> component#2.namevalue#1.optwhite#13.in + 1:[\r] -> component#2.namevalue#1.optwhite#13.in + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.optwhite#13.out + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 71 = main.component#2.namevalue#1.optwhite#13.out + [(epsilon)] -> component#2.namevalue#1.out_normal + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 72 = main.component#2.namevalue#1.#13 + [(epsilon)] -> component#2.namevalue#1.#14 + Epsilon closure : + (self) + main.component#2.namevalue#1.#14 + +NFA state 73 = main.component#2.namevalue#1.optwhite#14.in + [(epsilon)] -> component#2.namevalue#1.optwhite#14.out + 0:[\t ] -> component#2.namevalue#1.optwhite#14.in + 1:[\r] -> component#2.namevalue#1.optwhite#14.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#13 + main.component#2.namevalue#1.optwhite#14.out + main.component#2.namevalue#1.#14 + +NFA state 74 = main.component#2.namevalue#1.optwhite#14.out + [(epsilon)] -> component#2.namevalue#1.#13 + Epsilon closure : + (self) + main.component#2.namevalue#1.#13 + main.component#2.namevalue#1.#14 + +NFA state 75 = main.component#2.namevalue#1.#14 + EOS -> component#2.namevalue#1.out_normal + Epsilon closure : + (self) + +NFA state 76 = main.component#2.namevalue#1.rhs_continue + [(epsilon)] -> component#2.namevalue#1.optwhite#18.in + [(epsilon)] -> component#2.namevalue#1.optwhite#15.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#15 + main.component#2.namevalue#1.optwhite#15.in + main.component#2.namevalue#1.optwhite#15.out + main.component#2.namevalue#1.qvalue#16.in + main.component#2.namevalue#1.#17 + main.component#2.namevalue#1.optwhite#18.in + main.component#2.namevalue#1.optwhite#18.out + main.component#2.namevalue#1.value#19.in + +NFA state 77 = main.component#2.namevalue#1.#15 + [(epsilon)] -> component#2.namevalue#1.qvalue#16.in + Epsilon closure : + (self) + main.component#2.namevalue#1.qvalue#16.in + +NFA state 78 = main.component#2.namevalue#1.optwhite#15.in + [(epsilon)] -> component#2.namevalue#1.optwhite#15.out + 0:[\t ] -> component#2.namevalue#1.optwhite#15.in + 1:[\r] -> component#2.namevalue#1.optwhite#15.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#15 + main.component#2.namevalue#1.optwhite#15.out + main.component#2.namevalue#1.qvalue#16.in + +NFA state 79 = main.component#2.namevalue#1.optwhite#15.out + [(epsilon)] -> component#2.namevalue#1.#15 + Epsilon closure : + (self) + main.component#2.namevalue#1.#15 + main.component#2.namevalue#1.qvalue#16.in + +NFA state 80 = main.component#2.namevalue#1.#16 + [(epsilon)] -> component#2.namevalue#1.optwhite#17.in + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.optwhite#17.in + main.component#2.namevalue#1.optwhite#17.out + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 81 = main.component#2.namevalue#1.qvalue#16.in + 3:["] -> component#2.namevalue#1.qvalue#16.qv0 + Epsilon closure : + (self) + +NFA state 82 = main.component#2.namevalue#1.qvalue#16.qv0 + [(epsilon)] -> component#2.namevalue#1.qvalue#16.escape#1.in + 0:[\t ] -> component#2.namevalue#1.qvalue#16.qv1 + 9:[;] -> component#2.namevalue#1.qvalue#16.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.qvalue#16.qv1 + 11:[A-Z_a-z] -> component#2.namevalue#1.qvalue#16.qv1 + 10:[=] -> component#2.namevalue#1.qvalue#16.qv1 + 8:[0-9] -> component#2.namevalue#1.qvalue#16.qv1 + 7:[/] -> component#2.namevalue#1.qvalue#16.qv1 + 6:[\055] -> component#2.namevalue#1.qvalue#16.qv1 + 5:[+.] -> component#2.namevalue#1.qvalue#16.qv1 + 4:[*] -> component#2.namevalue#1.qvalue#16.qv1 + Epsilon closure : + (self) + main.component#2.namevalue#1.qvalue#16.escape#1.in + +NFA state 83 = main.component#2.namevalue#1.qvalue#16.escape#1.in + 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#1.#2 + 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#1.#1 + Epsilon closure : + (self) + +NFA state 84 = main.component#2.namevalue#1.qvalue#16.escape#1.#1 + 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#1.out + Epsilon closure : + (self) + +NFA state 85 = main.component#2.namevalue#1.qvalue#16.escape#1.#2 + 3:["] -> component#2.namevalue#1.qvalue#16.escape#1.out + Epsilon closure : + (self) + +NFA state 86 = main.component#2.namevalue#1.qvalue#16.escape#1.out + [(epsilon)] -> component#2.namevalue#1.qvalue#16.qv1 + Epsilon closure : + (self) + main.component#2.namevalue#1.qvalue#16.qv1 + main.component#2.namevalue#1.qvalue#16.#1 + main.component#2.namevalue#1.qvalue#16.escape#2.in + main.component#2.namevalue#1.qvalue#16.qv2 + +NFA state 87 = main.component#2.namevalue#1.qvalue#16.qv1 + [(epsilon)] -> component#2.namevalue#1.qvalue#16.#1 + [(epsilon)] -> component#2.namevalue#1.qvalue#16.escape#2.in + 0:[\t ] -> component#2.namevalue#1.qvalue#16.qv1 + 9:[;] -> component#2.namevalue#1.qvalue#16.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.qvalue#16.qv1 + 11:[A-Z_a-z] -> component#2.namevalue#1.qvalue#16.qv1 + 10:[=] -> component#2.namevalue#1.qvalue#16.qv1 + 8:[0-9] -> component#2.namevalue#1.qvalue#16.qv1 + 7:[/] -> component#2.namevalue#1.qvalue#16.qv1 + 6:[\055] -> component#2.namevalue#1.qvalue#16.qv1 + 5:[+.] -> component#2.namevalue#1.qvalue#16.qv1 + 4:[*] -> component#2.namevalue#1.qvalue#16.qv1 + [(epsilon)] -> component#2.namevalue#1.qvalue#16.qv2 + Epsilon closure : + (self) + main.component#2.namevalue#1.qvalue#16.#1 + main.component#2.namevalue#1.qvalue#16.escape#2.in + main.component#2.namevalue#1.qvalue#16.qv2 + +NFA state 88 = main.component#2.namevalue#1.qvalue#16.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 89 = main.component#2.namevalue#1.qvalue#16.escape#2.in + 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#2.#2 + 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#2.#1 + Epsilon closure : + (self) + +NFA state 90 = main.component#2.namevalue#1.qvalue#16.escape#2.#1 + 12:[\\] -> component#2.namevalue#1.qvalue#16.escape#2.out + Epsilon closure : + (self) + +NFA state 91 = main.component#2.namevalue#1.qvalue#16.escape#2.#2 + 3:["] -> component#2.namevalue#1.qvalue#16.escape#2.out + Epsilon closure : + (self) + +NFA state 92 = main.component#2.namevalue#1.qvalue#16.escape#2.out + [(epsilon)] -> component#2.namevalue#1.qvalue#16.qv1 + Epsilon closure : + (self) + main.component#2.namevalue#1.qvalue#16.qv1 + main.component#2.namevalue#1.qvalue#16.#1 + main.component#2.namevalue#1.qvalue#16.escape#2.in + main.component#2.namevalue#1.qvalue#16.qv2 + +NFA state 93 = main.component#2.namevalue#1.qvalue#16.qv2 + 3:["] -> component#2.namevalue#1.qvalue#16.out + Epsilon closure : + (self) + +NFA state 94 = main.component#2.namevalue#1.qvalue#16.out + [(epsilon)] -> component#2.namevalue#1.#16 + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.#16 + main.component#2.namevalue#1.optwhite#17.in + main.component#2.namevalue#1.optwhite#17.out + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 95 = main.component#2.namevalue#1.optwhite#17.in + [(epsilon)] -> component#2.namevalue#1.optwhite#17.out + 0:[\t ] -> component#2.namevalue#1.optwhite#17.in + 1:[\r] -> component#2.namevalue#1.optwhite#17.in + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.optwhite#17.out + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 96 = main.component#2.namevalue#1.optwhite#17.out + [(epsilon)] -> component#2.namevalue#1.out_continue + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 97 = main.component#2.namevalue#1.#17 + [(epsilon)] -> component#2.namevalue#1.value#19.in + Epsilon closure : + (self) + main.component#2.namevalue#1.value#19.in + +NFA state 98 = main.component#2.namevalue#1.optwhite#18.in + [(epsilon)] -> component#2.namevalue#1.optwhite#18.out + 0:[\t ] -> component#2.namevalue#1.optwhite#18.in + 1:[\r] -> component#2.namevalue#1.optwhite#18.in + Epsilon closure : + (self) + main.component#2.namevalue#1.#17 + main.component#2.namevalue#1.optwhite#18.out + main.component#2.namevalue#1.value#19.in + +NFA state 99 = main.component#2.namevalue#1.optwhite#18.out + [(epsilon)] -> component#2.namevalue#1.#17 + Epsilon closure : + (self) + main.component#2.namevalue#1.#17 + main.component#2.namevalue#1.value#19.in + +NFA state 100 = main.component#2.namevalue#1.#18 + [(epsilon)] -> component#2.namevalue#1.optwhite#20.in + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.optwhite#20.in + main.component#2.namevalue#1.optwhite#20.out + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 101 = main.component#2.namevalue#1.value#19.in + 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.value#19.v1 + 11:[A-Z_a-z] -> component#2.namevalue#1.value#19.v1 + 10:[=] -> component#2.namevalue#1.value#19.v1 + 8:[0-9] -> component#2.namevalue#1.value#19.v1 + 7:[/] -> component#2.namevalue#1.value#19.v1 + 6:[\055] -> component#2.namevalue#1.value#19.v1 + 5:[+.] -> component#2.namevalue#1.value#19.v1 + 4:[*] -> component#2.namevalue#1.value#19.v1 + Epsilon closure : + (self) + +NFA state 102 = main.component#2.namevalue#1.value#19.v1 + [(epsilon)] -> component#2.namevalue#1.value#19.#1 + [(epsilon)] -> component#2.namevalue#1.value#19.out + 2:[!#-),:<>-@[]^`{-~] -> component#2.namevalue#1.value#19.v1 + 11:[A-Z_a-z] -> component#2.namevalue#1.value#19.v1 + 10:[=] -> component#2.namevalue#1.value#19.v1 + 8:[0-9] -> component#2.namevalue#1.value#19.v1 + 7:[/] -> component#2.namevalue#1.value#19.v1 + 6:[\055] -> component#2.namevalue#1.value#19.v1 + 5:[+.] -> component#2.namevalue#1.value#19.v1 + 4:[*] -> component#2.namevalue#1.value#19.v1 + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.#18 + main.component#2.namevalue#1.value#19.#1 + main.component#2.namevalue#1.value#19.out + main.component#2.namevalue#1.optwhite#20.in + main.component#2.namevalue#1.optwhite#20.out + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 103 = main.component#2.namevalue#1.value#19.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 104 = main.component#2.namevalue#1.value#19.out + [(epsilon)] -> component#2.namevalue#1.#18 + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.#18 + main.component#2.namevalue#1.optwhite#20.in + main.component#2.namevalue#1.optwhite#20.out + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 105 = main.component#2.namevalue#1.optwhite#20.in + [(epsilon)] -> component#2.namevalue#1.optwhite#20.out + 0:[\t ] -> component#2.namevalue#1.optwhite#20.in + 1:[\r] -> component#2.namevalue#1.optwhite#20.in + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.optwhite#20.out + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 106 = main.component#2.namevalue#1.optwhite#20.out + [(epsilon)] -> component#2.namevalue#1.out_continue + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 107 = main.component#2.namevalue#1.out_normal + [(epsilon)] -> component#2.namevalue#1.out + [(epsilon)] -> component#2.namevalue#1.#19 + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 108 = main.component#2.namevalue#1.#19 + Tags : GOT_NAMEVALUE + Epsilon closure : + (self) + +NFA state 109 = main.component#2.namevalue#1.out_continue + [(epsilon)] -> component#2.namevalue#1.out + [(epsilon)] -> component#2.namevalue#1.#20 + Epsilon closure : + (self) + main.#2 + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 110 = main.component#2.namevalue#1.#20 + Tags : GOT_NAMEVALUE_CONT + Epsilon closure : + (self) + +NFA state 111 = main.component#2.namevalue#1.out + [(epsilon)] -> component#2.out + Epsilon closure : + (self) + main.#2 + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 112 = main.component#2.name#2.in + 6:[\055] -> component#2.name#2.name1 + 11:[A-Z_a-z] -> component#2.name#2.name1 + 8:[0-9] -> component#2.name#2.name1 + Epsilon closure : + (self) + +NFA state 113 = main.component#2.name#2.name1 + [(epsilon)] -> component#2.name#2.out + 0:[\t ] -> component#2.name#2.name2 + 6:[\055] -> component#2.name#2.name1 + 11:[A-Z_a-z] -> component#2.name#2.name1 + 8:[0-9] -> component#2.name#2.name1 + [(epsilon)] -> component#2.name#2.#2 + [(epsilon)] -> component#2.name#2.#1 + Epsilon closure : + (self) + main.#2 + main.component#2.name#2.#1 + main.component#2.name#2.#2 + main.component#2.name#2.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 114 = main.component#2.name#2.#1 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 115 = main.component#2.name#2.#2 + Tags : GOT_NAME + Epsilon closure : + (self) + +NFA state 116 = main.component#2.name#2.name2 + [(epsilon)] -> component#2.name#2.out + 6:[\055] -> component#2.name#2.name1 + 11:[A-Z_a-z] -> component#2.name#2.name1 + 8:[0-9] -> component#2.name#2.name1 + 0:[\t ] -> component#2.name#2.name2 + [(epsilon)] -> component#2.name#2.#4 + [(epsilon)] -> component#2.name#2.#3 + Epsilon closure : + (self) + main.#2 + main.component#2.name#2.#3 + main.component#2.name#2.#4 + main.component#2.name#2.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 117 = main.component#2.name#2.#3 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 118 = main.component#2.name#2.#4 + Tags : GOT_NAME_TRAILING_SPACE + Epsilon closure : + (self) + +NFA state 119 = main.component#2.name#2.out + [(epsilon)] -> component#2.out + Epsilon closure : + (self) + main.#2 + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 120 = main.component#2.majorminor#3.in + [(epsilon)] -> component#2.majorminor#3.major#1.in + Epsilon closure : + (self) + main.component#2.majorminor#3.major#1.in + +NFA state 121 = main.component#2.majorminor#3.major#1.in + 6:[\055] -> component#2.majorminor#3.major#1.name1 + 11:[A-Z_a-z] -> component#2.majorminor#3.major#1.name1 + 8:[0-9] -> component#2.majorminor#3.major#1.name1 + Epsilon closure : + (self) + +NFA state 122 = main.component#2.majorminor#3.major#1.name1 + 6:[\055] -> component#2.majorminor#3.major#1.name1 + 11:[A-Z_a-z] -> component#2.majorminor#3.major#1.name1 + 8:[0-9] -> component#2.majorminor#3.major#1.name1 + [(epsilon)] -> component#2.majorminor#3.major#1.out + Epsilon closure : + (self) + main.component#2.majorminor#3.major#1.out + main.component#2.majorminor#3.foo + +NFA state 123 = main.component#2.majorminor#3.major#1.out + [(epsilon)] -> component#2.majorminor#3.foo + Epsilon closure : + (self) + main.component#2.majorminor#3.foo + +NFA state 124 = main.component#2.majorminor#3.foo + 7:[/] -> component#2.majorminor#3.bar + Epsilon closure : + (self) + +NFA state 125 = main.component#2.majorminor#3.bar + [(epsilon)] -> component#2.majorminor#3.minor#2.in + Epsilon closure : + (self) + main.component#2.majorminor#3.minor#2.in + +NFA state 126 = main.component#2.majorminor#3.minor#2.in + 5:[+.] -> component#2.majorminor#3.minor#2.minor1 + 12:[\\] -> component#2.majorminor#3.minor#2.minor1 + 6:[\055] -> component#2.majorminor#3.minor#2.minor1 + 6:[\055] -> component#2.majorminor#3.minor#2.minor1 + 11:[A-Z_a-z] -> component#2.majorminor#3.minor#2.minor1 + 8:[0-9] -> component#2.majorminor#3.minor#2.minor1 + Epsilon closure : + (self) + +NFA state 127 = main.component#2.majorminor#3.minor#2.minor1 + [(epsilon)] -> component#2.majorminor#3.minor#2.#1 + 5:[+.] -> component#2.majorminor#3.minor#2.minor1 + 12:[\\] -> component#2.majorminor#3.minor#2.minor1 + 6:[\055] -> component#2.majorminor#3.minor#2.minor1 + 6:[\055] -> component#2.majorminor#3.minor#2.minor1 + 11:[A-Z_a-z] -> component#2.majorminor#3.minor#2.minor1 + 8:[0-9] -> component#2.majorminor#3.minor#2.minor1 + [(epsilon)] -> component#2.majorminor#3.minor#2.out + Epsilon closure : + (self) + main.#2 + main.component#2.majorminor#3.minor#2.#1 + main.component#2.majorminor#3.minor#2.out + main.component#2.majorminor#3.out + main.component#2.majorminor#3.#1 + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 128 = main.component#2.majorminor#3.minor#2.#1 + Tags : COPY_TO_MINOR + Epsilon closure : + (self) + +NFA state 129 = main.component#2.majorminor#3.minor#2.out + [(epsilon)] -> component#2.majorminor#3.out + Epsilon closure : + (self) + main.#2 + main.component#2.majorminor#3.out + main.component#2.majorminor#3.#1 + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 130 = main.component#2.majorminor#3.out + [(epsilon)] -> component#2.majorminor#3.#1 + [(epsilon)] -> component#2.out + Epsilon closure : + (self) + main.#2 + main.component#2.majorminor#3.#1 + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 131 = main.component#2.majorminor#3.#1 + Tags : GOT_MAJORMINOR + Epsilon closure : + (self) + +NFA state 132 = main.component#2.out + [(epsilon)] -> #2 + Epsilon closure : + (self) + main.#2 + main.#3 + main.optwhite#3.in + main.optwhite#3.out + +NFA state 133 = main.#3 + EOS -> out2 + Epsilon closure : + (self) + +NFA state 134 = main.optwhite#3.in + [(epsilon)] -> optwhite#3.out + 0:[\t ] -> optwhite#3.in + 1:[\r] -> optwhite#3.in + Epsilon closure : + (self) + main.#3 + main.optwhite#3.out + +NFA state 135 = main.optwhite#3.out + [(epsilon)] -> #3 + Epsilon closure : + (self) + main.#3 + +NFA state 136 = main.#4 + [(epsilon)] -> component#5.in + Epsilon closure : + (self) + main.component#5.in + main.component#5.namevalue#1.in + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.in + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.in + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + main.component#5.name#2.in + main.component#5.majorminor#3.in + main.component#5.majorminor#3.major#1.in + +NFA state 137 = main.optwhite#4.in + [(epsilon)] -> optwhite#4.out + 0:[\t ] -> optwhite#4.in + 1:[\r] -> optwhite#4.in + Epsilon closure : + (self) + main.#4 + main.optwhite#4.out + main.component#5.in + main.component#5.namevalue#1.in + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.in + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.in + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + main.component#5.name#2.in + main.component#5.majorminor#3.in + main.component#5.majorminor#3.major#1.in + +NFA state 138 = main.optwhite#4.out + [(epsilon)] -> #4 + Epsilon closure : + (self) + main.#4 + main.component#5.in + main.component#5.namevalue#1.in + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.in + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.in + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + main.component#5.name#2.in + main.component#5.majorminor#3.in + main.component#5.majorminor#3.major#1.in + +NFA state 139 = main.#5 + [(epsilon)] -> optwhite#6.in + Epsilon closure : + (self) + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 140 = main.component#5.in + [(epsilon)] -> component#5.namevalue#1.in + [(epsilon)] -> component#5.name#2.in + [(epsilon)] -> component#5.majorminor#3.in + Epsilon closure : + (self) + main.component#5.namevalue#1.in + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.in + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.in + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + main.component#5.name#2.in + main.component#5.majorminor#3.in + main.component#5.majorminor#3.major#1.in + +NFA state 141 = main.component#5.namevalue#1.in + [(epsilon)] -> component#5.namevalue#1.optwhite#4.in + [(epsilon)] -> component#5.namevalue#1.optwhite#1.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.in + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.in + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + +NFA state 142 = main.component#5.namevalue#1.#1 + [(epsilon)] -> component#5.namevalue#1.name#2.in + Epsilon closure : + (self) + main.component#5.namevalue#1.name#2.in + +NFA state 143 = main.component#5.namevalue#1.optwhite#1.in + [(epsilon)] -> component#5.namevalue#1.optwhite#1.out + 0:[\t ] -> component#5.namevalue#1.optwhite#1.in + 1:[\r] -> component#5.namevalue#1.optwhite#1.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + +NFA state 144 = main.component#5.namevalue#1.optwhite#1.out + [(epsilon)] -> component#5.namevalue#1.#1 + Epsilon closure : + (self) + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.name#2.in + +NFA state 145 = main.component#5.namevalue#1.#2 + [(epsilon)] -> component#5.namevalue#1.optwhite#3.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#3 + main.component#5.namevalue#1.optwhite#3.in + main.component#5.namevalue#1.optwhite#3.out + +NFA state 146 = main.component#5.namevalue#1.name#2.in + 6:[\055] -> component#5.namevalue#1.name#2.name1 + 11:[A-Z_a-z] -> component#5.namevalue#1.name#2.name1 + 8:[0-9] -> component#5.namevalue#1.name#2.name1 + Epsilon closure : + (self) + +NFA state 147 = main.component#5.namevalue#1.name#2.name1 + [(epsilon)] -> component#5.namevalue#1.name#2.#1 + [(epsilon)] -> component#5.namevalue#1.name#2.#2 + 6:[\055] -> component#5.namevalue#1.name#2.name1 + 11:[A-Z_a-z] -> component#5.namevalue#1.name#2.name1 + 8:[0-9] -> component#5.namevalue#1.name#2.name1 + 0:[\t ] -> component#5.namevalue#1.name#2.name2 + [(epsilon)] -> component#5.namevalue#1.name#2.out + Epsilon closure : + (self) + main.component#5.namevalue#1.#2 + main.component#5.namevalue#1.name#2.#1 + main.component#5.namevalue#1.name#2.#2 + main.component#5.namevalue#1.name#2.out + main.component#5.namevalue#1.#3 + main.component#5.namevalue#1.optwhite#3.in + main.component#5.namevalue#1.optwhite#3.out + +NFA state 148 = main.component#5.namevalue#1.name#2.#1 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 149 = main.component#5.namevalue#1.name#2.#2 + Tags : GOT_NAME + Epsilon closure : + (self) + +NFA state 150 = main.component#5.namevalue#1.name#2.name2 + [(epsilon)] -> component#5.namevalue#1.name#2.#3 + [(epsilon)] -> component#5.namevalue#1.name#2.#4 + 0:[\t ] -> component#5.namevalue#1.name#2.name2 + 6:[\055] -> component#5.namevalue#1.name#2.name1 + 11:[A-Z_a-z] -> component#5.namevalue#1.name#2.name1 + 8:[0-9] -> component#5.namevalue#1.name#2.name1 + [(epsilon)] -> component#5.namevalue#1.name#2.out + Epsilon closure : + (self) + main.component#5.namevalue#1.#2 + main.component#5.namevalue#1.name#2.#3 + main.component#5.namevalue#1.name#2.#4 + main.component#5.namevalue#1.name#2.out + main.component#5.namevalue#1.#3 + main.component#5.namevalue#1.optwhite#3.in + main.component#5.namevalue#1.optwhite#3.out + +NFA state 151 = main.component#5.namevalue#1.name#2.#3 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 152 = main.component#5.namevalue#1.name#2.#4 + Tags : GOT_NAME_TRAILING_SPACE + Epsilon closure : + (self) + +NFA state 153 = main.component#5.namevalue#1.name#2.out + [(epsilon)] -> component#5.namevalue#1.#2 + Epsilon closure : + (self) + main.component#5.namevalue#1.#2 + main.component#5.namevalue#1.#3 + main.component#5.namevalue#1.optwhite#3.in + main.component#5.namevalue#1.optwhite#3.out + +NFA state 154 = main.component#5.namevalue#1.#3 + 10:[=] -> component#5.namevalue#1.rhs_normal + Epsilon closure : + (self) + +NFA state 155 = main.component#5.namevalue#1.optwhite#3.in + [(epsilon)] -> component#5.namevalue#1.optwhite#3.out + 0:[\t ] -> component#5.namevalue#1.optwhite#3.in + 1:[\r] -> component#5.namevalue#1.optwhite#3.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#3 + main.component#5.namevalue#1.optwhite#3.out + +NFA state 156 = main.component#5.namevalue#1.optwhite#3.out + [(epsilon)] -> component#5.namevalue#1.#3 + Epsilon closure : + (self) + main.component#5.namevalue#1.#3 + +NFA state 157 = main.component#5.namevalue#1.#4 + [(epsilon)] -> component#5.namevalue#1.name#5.in + Epsilon closure : + (self) + main.component#5.namevalue#1.name#5.in + +NFA state 158 = main.component#5.namevalue#1.optwhite#4.in + [(epsilon)] -> component#5.namevalue#1.optwhite#4.out + 0:[\t ] -> component#5.namevalue#1.optwhite#4.in + 1:[\r] -> component#5.namevalue#1.optwhite#4.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + +NFA state 159 = main.component#5.namevalue#1.optwhite#4.out + [(epsilon)] -> component#5.namevalue#1.#4 + Epsilon closure : + (self) + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.name#5.in + +NFA state 160 = main.component#5.namevalue#1.#5 + 4:[*] -> component#5.namevalue#1.#6 + Epsilon closure : + (self) + +NFA state 161 = main.component#5.namevalue#1.name#5.in + 6:[\055] -> component#5.namevalue#1.name#5.name1 + 11:[A-Z_a-z] -> component#5.namevalue#1.name#5.name1 + 8:[0-9] -> component#5.namevalue#1.name#5.name1 + Epsilon closure : + (self) + +NFA state 162 = main.component#5.namevalue#1.name#5.name1 + [(epsilon)] -> component#5.namevalue#1.name#5.#1 + [(epsilon)] -> component#5.namevalue#1.name#5.#2 + 6:[\055] -> component#5.namevalue#1.name#5.name1 + 11:[A-Z_a-z] -> component#5.namevalue#1.name#5.name1 + 8:[0-9] -> component#5.namevalue#1.name#5.name1 + 0:[\t ] -> component#5.namevalue#1.name#5.name2 + [(epsilon)] -> component#5.namevalue#1.name#5.out + Epsilon closure : + (self) + main.component#5.namevalue#1.#5 + main.component#5.namevalue#1.name#5.#1 + main.component#5.namevalue#1.name#5.#2 + main.component#5.namevalue#1.name#5.out + +NFA state 163 = main.component#5.namevalue#1.name#5.#1 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 164 = main.component#5.namevalue#1.name#5.#2 + Tags : GOT_NAME + Epsilon closure : + (self) + +NFA state 165 = main.component#5.namevalue#1.name#5.name2 + [(epsilon)] -> component#5.namevalue#1.name#5.#3 + [(epsilon)] -> component#5.namevalue#1.name#5.#4 + 0:[\t ] -> component#5.namevalue#1.name#5.name2 + 6:[\055] -> component#5.namevalue#1.name#5.name1 + 11:[A-Z_a-z] -> component#5.namevalue#1.name#5.name1 + 8:[0-9] -> component#5.namevalue#1.name#5.name1 + [(epsilon)] -> component#5.namevalue#1.name#5.out + Epsilon closure : + (self) + main.component#5.namevalue#1.#5 + main.component#5.namevalue#1.name#5.#3 + main.component#5.namevalue#1.name#5.#4 + main.component#5.namevalue#1.name#5.out + +NFA state 166 = main.component#5.namevalue#1.name#5.#3 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 167 = main.component#5.namevalue#1.name#5.#4 + Tags : GOT_NAME_TRAILING_SPACE + Epsilon closure : + (self) + +NFA state 168 = main.component#5.namevalue#1.name#5.out + [(epsilon)] -> component#5.namevalue#1.#5 + Epsilon closure : + (self) + main.component#5.namevalue#1.#5 + +NFA state 169 = main.component#5.namevalue#1.#6 + [(epsilon)] -> component#5.namevalue#1.digits#6.in + Epsilon closure : + (self) + main.component#5.namevalue#1.digits#6.in + +NFA state 170 = main.component#5.namevalue#1.#7 + [(epsilon)] -> component#5.namevalue#1.optwhite#7.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#8 + main.component#5.namevalue#1.optwhite#7.in + main.component#5.namevalue#1.optwhite#7.out + +NFA state 171 = main.component#5.namevalue#1.digits#6.in + 8:[0-9] -> component#5.namevalue#1.digits#6.out + 8:[0-9] -> component#5.namevalue#1.digits#6.in + Epsilon closure : + (self) + +NFA state 172 = main.component#5.namevalue#1.digits#6.out + [(epsilon)] -> component#5.namevalue#1.#7 + Epsilon closure : + (self) + main.component#5.namevalue#1.#7 + main.component#5.namevalue#1.#8 + main.component#5.namevalue#1.optwhite#7.in + main.component#5.namevalue#1.optwhite#7.out + +NFA state 173 = main.component#5.namevalue#1.#8 + 10:[=] -> component#5.namevalue#1.rhs_continue + Epsilon closure : + (self) + +NFA state 174 = main.component#5.namevalue#1.optwhite#7.in + [(epsilon)] -> component#5.namevalue#1.optwhite#7.out + 0:[\t ] -> component#5.namevalue#1.optwhite#7.in + 1:[\r] -> component#5.namevalue#1.optwhite#7.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#8 + main.component#5.namevalue#1.optwhite#7.out + +NFA state 175 = main.component#5.namevalue#1.optwhite#7.out + [(epsilon)] -> component#5.namevalue#1.#8 + Epsilon closure : + (self) + main.component#5.namevalue#1.#8 + +NFA state 176 = main.component#5.namevalue#1.rhs_normal + [(epsilon)] -> component#5.namevalue#1.optwhite#14.in + [(epsilon)] -> component#5.namevalue#1.optwhite#11.in + [(epsilon)] -> component#5.namevalue#1.optwhite#8.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#9 + main.component#5.namevalue#1.optwhite#8.in + main.component#5.namevalue#1.optwhite#8.out + main.component#5.namevalue#1.qvalue#9.in + main.component#5.namevalue#1.#11 + main.component#5.namevalue#1.optwhite#11.in + main.component#5.namevalue#1.optwhite#11.out + main.component#5.namevalue#1.value#12.in + main.component#5.namevalue#1.#13 + main.component#5.namevalue#1.optwhite#14.in + main.component#5.namevalue#1.optwhite#14.out + main.component#5.namevalue#1.#14 + +NFA state 177 = main.component#5.namevalue#1.#9 + [(epsilon)] -> component#5.namevalue#1.qvalue#9.in + Epsilon closure : + (self) + main.component#5.namevalue#1.qvalue#9.in + +NFA state 178 = main.component#5.namevalue#1.optwhite#8.in + [(epsilon)] -> component#5.namevalue#1.optwhite#8.out + 0:[\t ] -> component#5.namevalue#1.optwhite#8.in + 1:[\r] -> component#5.namevalue#1.optwhite#8.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#9 + main.component#5.namevalue#1.optwhite#8.out + main.component#5.namevalue#1.qvalue#9.in + +NFA state 179 = main.component#5.namevalue#1.optwhite#8.out + [(epsilon)] -> component#5.namevalue#1.#9 + Epsilon closure : + (self) + main.component#5.namevalue#1.#9 + main.component#5.namevalue#1.qvalue#9.in + +NFA state 180 = main.component#5.namevalue#1.#10 + [(epsilon)] -> component#5.namevalue#1.optwhite#10.in + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.optwhite#10.in + main.component#5.namevalue#1.optwhite#10.out + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 181 = main.component#5.namevalue#1.qvalue#9.in + 3:["] -> component#5.namevalue#1.qvalue#9.qv0 + Epsilon closure : + (self) + +NFA state 182 = main.component#5.namevalue#1.qvalue#9.qv0 + [(epsilon)] -> component#5.namevalue#1.qvalue#9.escape#1.in + 0:[\t ] -> component#5.namevalue#1.qvalue#9.qv1 + 9:[;] -> component#5.namevalue#1.qvalue#9.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.qvalue#9.qv1 + 11:[A-Z_a-z] -> component#5.namevalue#1.qvalue#9.qv1 + 10:[=] -> component#5.namevalue#1.qvalue#9.qv1 + 8:[0-9] -> component#5.namevalue#1.qvalue#9.qv1 + 7:[/] -> component#5.namevalue#1.qvalue#9.qv1 + 6:[\055] -> component#5.namevalue#1.qvalue#9.qv1 + 5:[+.] -> component#5.namevalue#1.qvalue#9.qv1 + 4:[*] -> component#5.namevalue#1.qvalue#9.qv1 + Epsilon closure : + (self) + main.component#5.namevalue#1.qvalue#9.escape#1.in + +NFA state 183 = main.component#5.namevalue#1.qvalue#9.escape#1.in + 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#1.#2 + 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#1.#1 + Epsilon closure : + (self) + +NFA state 184 = main.component#5.namevalue#1.qvalue#9.escape#1.#1 + 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#1.out + Epsilon closure : + (self) + +NFA state 185 = main.component#5.namevalue#1.qvalue#9.escape#1.#2 + 3:["] -> component#5.namevalue#1.qvalue#9.escape#1.out + Epsilon closure : + (self) + +NFA state 186 = main.component#5.namevalue#1.qvalue#9.escape#1.out + [(epsilon)] -> component#5.namevalue#1.qvalue#9.qv1 + Epsilon closure : + (self) + main.component#5.namevalue#1.qvalue#9.qv1 + main.component#5.namevalue#1.qvalue#9.#1 + main.component#5.namevalue#1.qvalue#9.escape#2.in + main.component#5.namevalue#1.qvalue#9.qv2 + +NFA state 187 = main.component#5.namevalue#1.qvalue#9.qv1 + [(epsilon)] -> component#5.namevalue#1.qvalue#9.#1 + [(epsilon)] -> component#5.namevalue#1.qvalue#9.escape#2.in + 0:[\t ] -> component#5.namevalue#1.qvalue#9.qv1 + 9:[;] -> component#5.namevalue#1.qvalue#9.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.qvalue#9.qv1 + 11:[A-Z_a-z] -> component#5.namevalue#1.qvalue#9.qv1 + 10:[=] -> component#5.namevalue#1.qvalue#9.qv1 + 8:[0-9] -> component#5.namevalue#1.qvalue#9.qv1 + 7:[/] -> component#5.namevalue#1.qvalue#9.qv1 + 6:[\055] -> component#5.namevalue#1.qvalue#9.qv1 + 5:[+.] -> component#5.namevalue#1.qvalue#9.qv1 + 4:[*] -> component#5.namevalue#1.qvalue#9.qv1 + [(epsilon)] -> component#5.namevalue#1.qvalue#9.qv2 + Epsilon closure : + (self) + main.component#5.namevalue#1.qvalue#9.#1 + main.component#5.namevalue#1.qvalue#9.escape#2.in + main.component#5.namevalue#1.qvalue#9.qv2 + +NFA state 188 = main.component#5.namevalue#1.qvalue#9.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 189 = main.component#5.namevalue#1.qvalue#9.escape#2.in + 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#2.#2 + 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#2.#1 + Epsilon closure : + (self) + +NFA state 190 = main.component#5.namevalue#1.qvalue#9.escape#2.#1 + 12:[\\] -> component#5.namevalue#1.qvalue#9.escape#2.out + Epsilon closure : + (self) + +NFA state 191 = main.component#5.namevalue#1.qvalue#9.escape#2.#2 + 3:["] -> component#5.namevalue#1.qvalue#9.escape#2.out + Epsilon closure : + (self) + +NFA state 192 = main.component#5.namevalue#1.qvalue#9.escape#2.out + [(epsilon)] -> component#5.namevalue#1.qvalue#9.qv1 + Epsilon closure : + (self) + main.component#5.namevalue#1.qvalue#9.qv1 + main.component#5.namevalue#1.qvalue#9.#1 + main.component#5.namevalue#1.qvalue#9.escape#2.in + main.component#5.namevalue#1.qvalue#9.qv2 + +NFA state 193 = main.component#5.namevalue#1.qvalue#9.qv2 + 3:["] -> component#5.namevalue#1.qvalue#9.out + Epsilon closure : + (self) + +NFA state 194 = main.component#5.namevalue#1.qvalue#9.out + [(epsilon)] -> component#5.namevalue#1.#10 + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.#10 + main.component#5.namevalue#1.optwhite#10.in + main.component#5.namevalue#1.optwhite#10.out + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 195 = main.component#5.namevalue#1.optwhite#10.in + [(epsilon)] -> component#5.namevalue#1.optwhite#10.out + 0:[\t ] -> component#5.namevalue#1.optwhite#10.in + 1:[\r] -> component#5.namevalue#1.optwhite#10.in + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.optwhite#10.out + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 196 = main.component#5.namevalue#1.optwhite#10.out + [(epsilon)] -> component#5.namevalue#1.out_normal + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 197 = main.component#5.namevalue#1.#11 + [(epsilon)] -> component#5.namevalue#1.value#12.in + Epsilon closure : + (self) + main.component#5.namevalue#1.value#12.in + +NFA state 198 = main.component#5.namevalue#1.optwhite#11.in + [(epsilon)] -> component#5.namevalue#1.optwhite#11.out + 0:[\t ] -> component#5.namevalue#1.optwhite#11.in + 1:[\r] -> component#5.namevalue#1.optwhite#11.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#11 + main.component#5.namevalue#1.optwhite#11.out + main.component#5.namevalue#1.value#12.in + +NFA state 199 = main.component#5.namevalue#1.optwhite#11.out + [(epsilon)] -> component#5.namevalue#1.#11 + Epsilon closure : + (self) + main.component#5.namevalue#1.#11 + main.component#5.namevalue#1.value#12.in + +NFA state 200 = main.component#5.namevalue#1.#12 + [(epsilon)] -> component#5.namevalue#1.optwhite#13.in + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.optwhite#13.in + main.component#5.namevalue#1.optwhite#13.out + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 201 = main.component#5.namevalue#1.value#12.in + 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.value#12.v1 + 11:[A-Z_a-z] -> component#5.namevalue#1.value#12.v1 + 10:[=] -> component#5.namevalue#1.value#12.v1 + 8:[0-9] -> component#5.namevalue#1.value#12.v1 + 7:[/] -> component#5.namevalue#1.value#12.v1 + 6:[\055] -> component#5.namevalue#1.value#12.v1 + 5:[+.] -> component#5.namevalue#1.value#12.v1 + 4:[*] -> component#5.namevalue#1.value#12.v1 + Epsilon closure : + (self) + +NFA state 202 = main.component#5.namevalue#1.value#12.v1 + [(epsilon)] -> component#5.namevalue#1.value#12.#1 + [(epsilon)] -> component#5.namevalue#1.value#12.out + 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.value#12.v1 + 11:[A-Z_a-z] -> component#5.namevalue#1.value#12.v1 + 10:[=] -> component#5.namevalue#1.value#12.v1 + 8:[0-9] -> component#5.namevalue#1.value#12.v1 + 7:[/] -> component#5.namevalue#1.value#12.v1 + 6:[\055] -> component#5.namevalue#1.value#12.v1 + 5:[+.] -> component#5.namevalue#1.value#12.v1 + 4:[*] -> component#5.namevalue#1.value#12.v1 + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.#12 + main.component#5.namevalue#1.value#12.#1 + main.component#5.namevalue#1.value#12.out + main.component#5.namevalue#1.optwhite#13.in + main.component#5.namevalue#1.optwhite#13.out + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 203 = main.component#5.namevalue#1.value#12.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 204 = main.component#5.namevalue#1.value#12.out + [(epsilon)] -> component#5.namevalue#1.#12 + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.#12 + main.component#5.namevalue#1.optwhite#13.in + main.component#5.namevalue#1.optwhite#13.out + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 205 = main.component#5.namevalue#1.optwhite#13.in + [(epsilon)] -> component#5.namevalue#1.optwhite#13.out + 0:[\t ] -> component#5.namevalue#1.optwhite#13.in + 1:[\r] -> component#5.namevalue#1.optwhite#13.in + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.optwhite#13.out + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 206 = main.component#5.namevalue#1.optwhite#13.out + [(epsilon)] -> component#5.namevalue#1.out_normal + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 207 = main.component#5.namevalue#1.#13 + [(epsilon)] -> component#5.namevalue#1.#14 + Epsilon closure : + (self) + main.component#5.namevalue#1.#14 + +NFA state 208 = main.component#5.namevalue#1.optwhite#14.in + [(epsilon)] -> component#5.namevalue#1.optwhite#14.out + 0:[\t ] -> component#5.namevalue#1.optwhite#14.in + 1:[\r] -> component#5.namevalue#1.optwhite#14.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#13 + main.component#5.namevalue#1.optwhite#14.out + main.component#5.namevalue#1.#14 + +NFA state 209 = main.component#5.namevalue#1.optwhite#14.out + [(epsilon)] -> component#5.namevalue#1.#13 + Epsilon closure : + (self) + main.component#5.namevalue#1.#13 + main.component#5.namevalue#1.#14 + +NFA state 210 = main.component#5.namevalue#1.#14 + EOS -> component#5.namevalue#1.out_normal + Epsilon closure : + (self) + +NFA state 211 = main.component#5.namevalue#1.rhs_continue + [(epsilon)] -> component#5.namevalue#1.optwhite#18.in + [(epsilon)] -> component#5.namevalue#1.optwhite#15.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#15 + main.component#5.namevalue#1.optwhite#15.in + main.component#5.namevalue#1.optwhite#15.out + main.component#5.namevalue#1.qvalue#16.in + main.component#5.namevalue#1.#17 + main.component#5.namevalue#1.optwhite#18.in + main.component#5.namevalue#1.optwhite#18.out + main.component#5.namevalue#1.value#19.in + +NFA state 212 = main.component#5.namevalue#1.#15 + [(epsilon)] -> component#5.namevalue#1.qvalue#16.in + Epsilon closure : + (self) + main.component#5.namevalue#1.qvalue#16.in + +NFA state 213 = main.component#5.namevalue#1.optwhite#15.in + [(epsilon)] -> component#5.namevalue#1.optwhite#15.out + 0:[\t ] -> component#5.namevalue#1.optwhite#15.in + 1:[\r] -> component#5.namevalue#1.optwhite#15.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#15 + main.component#5.namevalue#1.optwhite#15.out + main.component#5.namevalue#1.qvalue#16.in + +NFA state 214 = main.component#5.namevalue#1.optwhite#15.out + [(epsilon)] -> component#5.namevalue#1.#15 + Epsilon closure : + (self) + main.component#5.namevalue#1.#15 + main.component#5.namevalue#1.qvalue#16.in + +NFA state 215 = main.component#5.namevalue#1.#16 + [(epsilon)] -> component#5.namevalue#1.optwhite#17.in + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.optwhite#17.in + main.component#5.namevalue#1.optwhite#17.out + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 216 = main.component#5.namevalue#1.qvalue#16.in + 3:["] -> component#5.namevalue#1.qvalue#16.qv0 + Epsilon closure : + (self) + +NFA state 217 = main.component#5.namevalue#1.qvalue#16.qv0 + [(epsilon)] -> component#5.namevalue#1.qvalue#16.escape#1.in + 0:[\t ] -> component#5.namevalue#1.qvalue#16.qv1 + 9:[;] -> component#5.namevalue#1.qvalue#16.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.qvalue#16.qv1 + 11:[A-Z_a-z] -> component#5.namevalue#1.qvalue#16.qv1 + 10:[=] -> component#5.namevalue#1.qvalue#16.qv1 + 8:[0-9] -> component#5.namevalue#1.qvalue#16.qv1 + 7:[/] -> component#5.namevalue#1.qvalue#16.qv1 + 6:[\055] -> component#5.namevalue#1.qvalue#16.qv1 + 5:[+.] -> component#5.namevalue#1.qvalue#16.qv1 + 4:[*] -> component#5.namevalue#1.qvalue#16.qv1 + Epsilon closure : + (self) + main.component#5.namevalue#1.qvalue#16.escape#1.in + +NFA state 218 = main.component#5.namevalue#1.qvalue#16.escape#1.in + 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#1.#2 + 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#1.#1 + Epsilon closure : + (self) + +NFA state 219 = main.component#5.namevalue#1.qvalue#16.escape#1.#1 + 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#1.out + Epsilon closure : + (self) + +NFA state 220 = main.component#5.namevalue#1.qvalue#16.escape#1.#2 + 3:["] -> component#5.namevalue#1.qvalue#16.escape#1.out + Epsilon closure : + (self) + +NFA state 221 = main.component#5.namevalue#1.qvalue#16.escape#1.out + [(epsilon)] -> component#5.namevalue#1.qvalue#16.qv1 + Epsilon closure : + (self) + main.component#5.namevalue#1.qvalue#16.qv1 + main.component#5.namevalue#1.qvalue#16.#1 + main.component#5.namevalue#1.qvalue#16.escape#2.in + main.component#5.namevalue#1.qvalue#16.qv2 + +NFA state 222 = main.component#5.namevalue#1.qvalue#16.qv1 + [(epsilon)] -> component#5.namevalue#1.qvalue#16.#1 + [(epsilon)] -> component#5.namevalue#1.qvalue#16.escape#2.in + 0:[\t ] -> component#5.namevalue#1.qvalue#16.qv1 + 9:[;] -> component#5.namevalue#1.qvalue#16.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.qvalue#16.qv1 + 11:[A-Z_a-z] -> component#5.namevalue#1.qvalue#16.qv1 + 10:[=] -> component#5.namevalue#1.qvalue#16.qv1 + 8:[0-9] -> component#5.namevalue#1.qvalue#16.qv1 + 7:[/] -> component#5.namevalue#1.qvalue#16.qv1 + 6:[\055] -> component#5.namevalue#1.qvalue#16.qv1 + 5:[+.] -> component#5.namevalue#1.qvalue#16.qv1 + 4:[*] -> component#5.namevalue#1.qvalue#16.qv1 + [(epsilon)] -> component#5.namevalue#1.qvalue#16.qv2 + Epsilon closure : + (self) + main.component#5.namevalue#1.qvalue#16.#1 + main.component#5.namevalue#1.qvalue#16.escape#2.in + main.component#5.namevalue#1.qvalue#16.qv2 + +NFA state 223 = main.component#5.namevalue#1.qvalue#16.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 224 = main.component#5.namevalue#1.qvalue#16.escape#2.in + 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#2.#2 + 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#2.#1 + Epsilon closure : + (self) + +NFA state 225 = main.component#5.namevalue#1.qvalue#16.escape#2.#1 + 12:[\\] -> component#5.namevalue#1.qvalue#16.escape#2.out + Epsilon closure : + (self) + +NFA state 226 = main.component#5.namevalue#1.qvalue#16.escape#2.#2 + 3:["] -> component#5.namevalue#1.qvalue#16.escape#2.out + Epsilon closure : + (self) + +NFA state 227 = main.component#5.namevalue#1.qvalue#16.escape#2.out + [(epsilon)] -> component#5.namevalue#1.qvalue#16.qv1 + Epsilon closure : + (self) + main.component#5.namevalue#1.qvalue#16.qv1 + main.component#5.namevalue#1.qvalue#16.#1 + main.component#5.namevalue#1.qvalue#16.escape#2.in + main.component#5.namevalue#1.qvalue#16.qv2 + +NFA state 228 = main.component#5.namevalue#1.qvalue#16.qv2 + 3:["] -> component#5.namevalue#1.qvalue#16.out + Epsilon closure : + (self) + +NFA state 229 = main.component#5.namevalue#1.qvalue#16.out + [(epsilon)] -> component#5.namevalue#1.#16 + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.#16 + main.component#5.namevalue#1.optwhite#17.in + main.component#5.namevalue#1.optwhite#17.out + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 230 = main.component#5.namevalue#1.optwhite#17.in + [(epsilon)] -> component#5.namevalue#1.optwhite#17.out + 0:[\t ] -> component#5.namevalue#1.optwhite#17.in + 1:[\r] -> component#5.namevalue#1.optwhite#17.in + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.optwhite#17.out + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 231 = main.component#5.namevalue#1.optwhite#17.out + [(epsilon)] -> component#5.namevalue#1.out_continue + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 232 = main.component#5.namevalue#1.#17 + [(epsilon)] -> component#5.namevalue#1.value#19.in + Epsilon closure : + (self) + main.component#5.namevalue#1.value#19.in + +NFA state 233 = main.component#5.namevalue#1.optwhite#18.in + [(epsilon)] -> component#5.namevalue#1.optwhite#18.out + 0:[\t ] -> component#5.namevalue#1.optwhite#18.in + 1:[\r] -> component#5.namevalue#1.optwhite#18.in + Epsilon closure : + (self) + main.component#5.namevalue#1.#17 + main.component#5.namevalue#1.optwhite#18.out + main.component#5.namevalue#1.value#19.in + +NFA state 234 = main.component#5.namevalue#1.optwhite#18.out + [(epsilon)] -> component#5.namevalue#1.#17 + Epsilon closure : + (self) + main.component#5.namevalue#1.#17 + main.component#5.namevalue#1.value#19.in + +NFA state 235 = main.component#5.namevalue#1.#18 + [(epsilon)] -> component#5.namevalue#1.optwhite#20.in + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.optwhite#20.in + main.component#5.namevalue#1.optwhite#20.out + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 236 = main.component#5.namevalue#1.value#19.in + 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.value#19.v1 + 11:[A-Z_a-z] -> component#5.namevalue#1.value#19.v1 + 10:[=] -> component#5.namevalue#1.value#19.v1 + 8:[0-9] -> component#5.namevalue#1.value#19.v1 + 7:[/] -> component#5.namevalue#1.value#19.v1 + 6:[\055] -> component#5.namevalue#1.value#19.v1 + 5:[+.] -> component#5.namevalue#1.value#19.v1 + 4:[*] -> component#5.namevalue#1.value#19.v1 + Epsilon closure : + (self) + +NFA state 237 = main.component#5.namevalue#1.value#19.v1 + [(epsilon)] -> component#5.namevalue#1.value#19.#1 + [(epsilon)] -> component#5.namevalue#1.value#19.out + 2:[!#-),:<>-@[]^`{-~] -> component#5.namevalue#1.value#19.v1 + 11:[A-Z_a-z] -> component#5.namevalue#1.value#19.v1 + 10:[=] -> component#5.namevalue#1.value#19.v1 + 8:[0-9] -> component#5.namevalue#1.value#19.v1 + 7:[/] -> component#5.namevalue#1.value#19.v1 + 6:[\055] -> component#5.namevalue#1.value#19.v1 + 5:[+.] -> component#5.namevalue#1.value#19.v1 + 4:[*] -> component#5.namevalue#1.value#19.v1 + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.#18 + main.component#5.namevalue#1.value#19.#1 + main.component#5.namevalue#1.value#19.out + main.component#5.namevalue#1.optwhite#20.in + main.component#5.namevalue#1.optwhite#20.out + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 238 = main.component#5.namevalue#1.value#19.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 239 = main.component#5.namevalue#1.value#19.out + [(epsilon)] -> component#5.namevalue#1.#18 + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.#18 + main.component#5.namevalue#1.optwhite#20.in + main.component#5.namevalue#1.optwhite#20.out + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 240 = main.component#5.namevalue#1.optwhite#20.in + [(epsilon)] -> component#5.namevalue#1.optwhite#20.out + 0:[\t ] -> component#5.namevalue#1.optwhite#20.in + 1:[\r] -> component#5.namevalue#1.optwhite#20.in + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.optwhite#20.out + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 241 = main.component#5.namevalue#1.optwhite#20.out + [(epsilon)] -> component#5.namevalue#1.out_continue + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 242 = main.component#5.namevalue#1.out_normal + [(epsilon)] -> component#5.namevalue#1.out + [(epsilon)] -> component#5.namevalue#1.#19 + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 243 = main.component#5.namevalue#1.#19 + Tags : GOT_NAMEVALUE + Epsilon closure : + (self) + +NFA state 244 = main.component#5.namevalue#1.out_continue + [(epsilon)] -> component#5.namevalue#1.out + [(epsilon)] -> component#5.namevalue#1.#20 + Epsilon closure : + (self) + main.#5 + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 245 = main.component#5.namevalue#1.#20 + Tags : GOT_NAMEVALUE_CONT + Epsilon closure : + (self) + +NFA state 246 = main.component#5.namevalue#1.out + [(epsilon)] -> component#5.out + Epsilon closure : + (self) + main.#5 + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 247 = main.component#5.name#2.in + 6:[\055] -> component#5.name#2.name1 + 11:[A-Z_a-z] -> component#5.name#2.name1 + 8:[0-9] -> component#5.name#2.name1 + Epsilon closure : + (self) + +NFA state 248 = main.component#5.name#2.name1 + [(epsilon)] -> component#5.name#2.out + 0:[\t ] -> component#5.name#2.name2 + 6:[\055] -> component#5.name#2.name1 + 11:[A-Z_a-z] -> component#5.name#2.name1 + 8:[0-9] -> component#5.name#2.name1 + [(epsilon)] -> component#5.name#2.#2 + [(epsilon)] -> component#5.name#2.#1 + Epsilon closure : + (self) + main.#5 + main.component#5.name#2.#1 + main.component#5.name#2.#2 + main.component#5.name#2.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 249 = main.component#5.name#2.#1 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 250 = main.component#5.name#2.#2 + Tags : GOT_NAME + Epsilon closure : + (self) + +NFA state 251 = main.component#5.name#2.name2 + [(epsilon)] -> component#5.name#2.out + 6:[\055] -> component#5.name#2.name1 + 11:[A-Z_a-z] -> component#5.name#2.name1 + 8:[0-9] -> component#5.name#2.name1 + 0:[\t ] -> component#5.name#2.name2 + [(epsilon)] -> component#5.name#2.#4 + [(epsilon)] -> component#5.name#2.#3 + Epsilon closure : + (self) + main.#5 + main.component#5.name#2.#3 + main.component#5.name#2.#4 + main.component#5.name#2.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 252 = main.component#5.name#2.#3 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 253 = main.component#5.name#2.#4 + Tags : GOT_NAME_TRAILING_SPACE + Epsilon closure : + (self) + +NFA state 254 = main.component#5.name#2.out + [(epsilon)] -> component#5.out + Epsilon closure : + (self) + main.#5 + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 255 = main.component#5.majorminor#3.in + [(epsilon)] -> component#5.majorminor#3.major#1.in + Epsilon closure : + (self) + main.component#5.majorminor#3.major#1.in + +NFA state 256 = main.component#5.majorminor#3.major#1.in + 6:[\055] -> component#5.majorminor#3.major#1.name1 + 11:[A-Z_a-z] -> component#5.majorminor#3.major#1.name1 + 8:[0-9] -> component#5.majorminor#3.major#1.name1 + Epsilon closure : + (self) + +NFA state 257 = main.component#5.majorminor#3.major#1.name1 + 6:[\055] -> component#5.majorminor#3.major#1.name1 + 11:[A-Z_a-z] -> component#5.majorminor#3.major#1.name1 + 8:[0-9] -> component#5.majorminor#3.major#1.name1 + [(epsilon)] -> component#5.majorminor#3.major#1.out + Epsilon closure : + (self) + main.component#5.majorminor#3.major#1.out + main.component#5.majorminor#3.foo + +NFA state 258 = main.component#5.majorminor#3.major#1.out + [(epsilon)] -> component#5.majorminor#3.foo + Epsilon closure : + (self) + main.component#5.majorminor#3.foo + +NFA state 259 = main.component#5.majorminor#3.foo + 7:[/] -> component#5.majorminor#3.bar + Epsilon closure : + (self) + +NFA state 260 = main.component#5.majorminor#3.bar + [(epsilon)] -> component#5.majorminor#3.minor#2.in + Epsilon closure : + (self) + main.component#5.majorminor#3.minor#2.in + +NFA state 261 = main.component#5.majorminor#3.minor#2.in + 5:[+.] -> component#5.majorminor#3.minor#2.minor1 + 12:[\\] -> component#5.majorminor#3.minor#2.minor1 + 6:[\055] -> component#5.majorminor#3.minor#2.minor1 + 6:[\055] -> component#5.majorminor#3.minor#2.minor1 + 11:[A-Z_a-z] -> component#5.majorminor#3.minor#2.minor1 + 8:[0-9] -> component#5.majorminor#3.minor#2.minor1 + Epsilon closure : + (self) + +NFA state 262 = main.component#5.majorminor#3.minor#2.minor1 + [(epsilon)] -> component#5.majorminor#3.minor#2.#1 + 5:[+.] -> component#5.majorminor#3.minor#2.minor1 + 12:[\\] -> component#5.majorminor#3.minor#2.minor1 + 6:[\055] -> component#5.majorminor#3.minor#2.minor1 + 6:[\055] -> component#5.majorminor#3.minor#2.minor1 + 11:[A-Z_a-z] -> component#5.majorminor#3.minor#2.minor1 + 8:[0-9] -> component#5.majorminor#3.minor#2.minor1 + [(epsilon)] -> component#5.majorminor#3.minor#2.out + Epsilon closure : + (self) + main.#5 + main.component#5.majorminor#3.minor#2.#1 + main.component#5.majorminor#3.minor#2.out + main.component#5.majorminor#3.out + main.component#5.majorminor#3.#1 + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 263 = main.component#5.majorminor#3.minor#2.#1 + Tags : COPY_TO_MINOR + Epsilon closure : + (self) + +NFA state 264 = main.component#5.majorminor#3.minor#2.out + [(epsilon)] -> component#5.majorminor#3.out + Epsilon closure : + (self) + main.#5 + main.component#5.majorminor#3.out + main.component#5.majorminor#3.#1 + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 265 = main.component#5.majorminor#3.out + [(epsilon)] -> component#5.majorminor#3.#1 + [(epsilon)] -> component#5.out + Epsilon closure : + (self) + main.#5 + main.component#5.majorminor#3.#1 + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 266 = main.component#5.majorminor#3.#1 + Tags : GOT_MAJORMINOR + Epsilon closure : + (self) + +NFA state 267 = main.component#5.out + [(epsilon)] -> #5 + Epsilon closure : + (self) + main.#5 + main.#6 + main.optwhite#6.in + main.optwhite#6.out + +NFA state 268 = main.#6 + 9:[;] -> #7 + Epsilon closure : + (self) + +NFA state 269 = main.optwhite#6.in + [(epsilon)] -> optwhite#6.out + 0:[\t ] -> optwhite#6.in + 1:[\r] -> optwhite#6.in + Epsilon closure : + (self) + main.#6 + main.optwhite#6.out + +NFA state 270 = main.optwhite#6.out + [(epsilon)] -> #6 + Epsilon closure : + (self) + main.#6 + +NFA state 271 = main.#7 + [(epsilon)] -> optwhite#7.in + Epsilon closure : + (self) + main.#8 + main.optwhite#7.in + main.optwhite#7.out + +NFA state 272 = main.#8 + EOS -> out2 + Epsilon closure : + (self) + +NFA state 273 = main.optwhite#7.in + [(epsilon)] -> optwhite#7.out + 0:[\t ] -> optwhite#7.in + 1:[\r] -> optwhite#7.in + Epsilon closure : + (self) + main.#8 + main.optwhite#7.out + +NFA state 274 = main.optwhite#7.out + [(epsilon)] -> #8 + Epsilon closure : + (self) + main.#8 + +NFA state 275 = main.#9 + [(epsilon)] -> component#9.in + Epsilon closure : + (self) + main.component#9.in + main.component#9.namevalue#1.in + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.in + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.in + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + main.component#9.name#2.in + main.component#9.majorminor#3.in + main.component#9.majorminor#3.major#1.in + +NFA state 276 = main.optwhite#8.in + [(epsilon)] -> optwhite#8.out + 0:[\t ] -> optwhite#8.in + 1:[\r] -> optwhite#8.in + Epsilon closure : + (self) + main.#9 + main.optwhite#8.out + main.component#9.in + main.component#9.namevalue#1.in + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.in + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.in + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + main.component#9.name#2.in + main.component#9.majorminor#3.in + main.component#9.majorminor#3.major#1.in + +NFA state 277 = main.optwhite#8.out + [(epsilon)] -> #9 + Epsilon closure : + (self) + main.#9 + main.component#9.in + main.component#9.namevalue#1.in + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.in + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.in + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + main.component#9.name#2.in + main.component#9.majorminor#3.in + main.component#9.majorminor#3.major#1.in + +NFA state 278 = main.#10 + [(epsilon)] -> optwhite#10.in + Epsilon closure : + (self) + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 279 = main.component#9.in + [(epsilon)] -> component#9.namevalue#1.in + [(epsilon)] -> component#9.name#2.in + [(epsilon)] -> component#9.majorminor#3.in + Epsilon closure : + (self) + main.component#9.namevalue#1.in + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.in + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.in + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + main.component#9.name#2.in + main.component#9.majorminor#3.in + main.component#9.majorminor#3.major#1.in + +NFA state 280 = main.component#9.namevalue#1.in + [(epsilon)] -> component#9.namevalue#1.optwhite#4.in + [(epsilon)] -> component#9.namevalue#1.optwhite#1.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.in + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.in + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + +NFA state 281 = main.component#9.namevalue#1.#1 + [(epsilon)] -> component#9.namevalue#1.name#2.in + Epsilon closure : + (self) + main.component#9.namevalue#1.name#2.in + +NFA state 282 = main.component#9.namevalue#1.optwhite#1.in + [(epsilon)] -> component#9.namevalue#1.optwhite#1.out + 0:[\t ] -> component#9.namevalue#1.optwhite#1.in + 1:[\r] -> component#9.namevalue#1.optwhite#1.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + +NFA state 283 = main.component#9.namevalue#1.optwhite#1.out + [(epsilon)] -> component#9.namevalue#1.#1 + Epsilon closure : + (self) + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.name#2.in + +NFA state 284 = main.component#9.namevalue#1.#2 + [(epsilon)] -> component#9.namevalue#1.optwhite#3.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#3 + main.component#9.namevalue#1.optwhite#3.in + main.component#9.namevalue#1.optwhite#3.out + +NFA state 285 = main.component#9.namevalue#1.name#2.in + 6:[\055] -> component#9.namevalue#1.name#2.name1 + 11:[A-Z_a-z] -> component#9.namevalue#1.name#2.name1 + 8:[0-9] -> component#9.namevalue#1.name#2.name1 + Epsilon closure : + (self) + +NFA state 286 = main.component#9.namevalue#1.name#2.name1 + [(epsilon)] -> component#9.namevalue#1.name#2.#1 + [(epsilon)] -> component#9.namevalue#1.name#2.#2 + 6:[\055] -> component#9.namevalue#1.name#2.name1 + 11:[A-Z_a-z] -> component#9.namevalue#1.name#2.name1 + 8:[0-9] -> component#9.namevalue#1.name#2.name1 + 0:[\t ] -> component#9.namevalue#1.name#2.name2 + [(epsilon)] -> component#9.namevalue#1.name#2.out + Epsilon closure : + (self) + main.component#9.namevalue#1.#2 + main.component#9.namevalue#1.name#2.#1 + main.component#9.namevalue#1.name#2.#2 + main.component#9.namevalue#1.name#2.out + main.component#9.namevalue#1.#3 + main.component#9.namevalue#1.optwhite#3.in + main.component#9.namevalue#1.optwhite#3.out + +NFA state 287 = main.component#9.namevalue#1.name#2.#1 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 288 = main.component#9.namevalue#1.name#2.#2 + Tags : GOT_NAME + Epsilon closure : + (self) + +NFA state 289 = main.component#9.namevalue#1.name#2.name2 + [(epsilon)] -> component#9.namevalue#1.name#2.#3 + [(epsilon)] -> component#9.namevalue#1.name#2.#4 + 0:[\t ] -> component#9.namevalue#1.name#2.name2 + 6:[\055] -> component#9.namevalue#1.name#2.name1 + 11:[A-Z_a-z] -> component#9.namevalue#1.name#2.name1 + 8:[0-9] -> component#9.namevalue#1.name#2.name1 + [(epsilon)] -> component#9.namevalue#1.name#2.out + Epsilon closure : + (self) + main.component#9.namevalue#1.#2 + main.component#9.namevalue#1.name#2.#3 + main.component#9.namevalue#1.name#2.#4 + main.component#9.namevalue#1.name#2.out + main.component#9.namevalue#1.#3 + main.component#9.namevalue#1.optwhite#3.in + main.component#9.namevalue#1.optwhite#3.out + +NFA state 290 = main.component#9.namevalue#1.name#2.#3 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 291 = main.component#9.namevalue#1.name#2.#4 + Tags : GOT_NAME_TRAILING_SPACE + Epsilon closure : + (self) + +NFA state 292 = main.component#9.namevalue#1.name#2.out + [(epsilon)] -> component#9.namevalue#1.#2 + Epsilon closure : + (self) + main.component#9.namevalue#1.#2 + main.component#9.namevalue#1.#3 + main.component#9.namevalue#1.optwhite#3.in + main.component#9.namevalue#1.optwhite#3.out + +NFA state 293 = main.component#9.namevalue#1.#3 + 10:[=] -> component#9.namevalue#1.rhs_normal + Epsilon closure : + (self) + +NFA state 294 = main.component#9.namevalue#1.optwhite#3.in + [(epsilon)] -> component#9.namevalue#1.optwhite#3.out + 0:[\t ] -> component#9.namevalue#1.optwhite#3.in + 1:[\r] -> component#9.namevalue#1.optwhite#3.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#3 + main.component#9.namevalue#1.optwhite#3.out + +NFA state 295 = main.component#9.namevalue#1.optwhite#3.out + [(epsilon)] -> component#9.namevalue#1.#3 + Epsilon closure : + (self) + main.component#9.namevalue#1.#3 + +NFA state 296 = main.component#9.namevalue#1.#4 + [(epsilon)] -> component#9.namevalue#1.name#5.in + Epsilon closure : + (self) + main.component#9.namevalue#1.name#5.in + +NFA state 297 = main.component#9.namevalue#1.optwhite#4.in + [(epsilon)] -> component#9.namevalue#1.optwhite#4.out + 0:[\t ] -> component#9.namevalue#1.optwhite#4.in + 1:[\r] -> component#9.namevalue#1.optwhite#4.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + +NFA state 298 = main.component#9.namevalue#1.optwhite#4.out + [(epsilon)] -> component#9.namevalue#1.#4 + Epsilon closure : + (self) + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.name#5.in + +NFA state 299 = main.component#9.namevalue#1.#5 + 4:[*] -> component#9.namevalue#1.#6 + Epsilon closure : + (self) + +NFA state 300 = main.component#9.namevalue#1.name#5.in + 6:[\055] -> component#9.namevalue#1.name#5.name1 + 11:[A-Z_a-z] -> component#9.namevalue#1.name#5.name1 + 8:[0-9] -> component#9.namevalue#1.name#5.name1 + Epsilon closure : + (self) + +NFA state 301 = main.component#9.namevalue#1.name#5.name1 + [(epsilon)] -> component#9.namevalue#1.name#5.#1 + [(epsilon)] -> component#9.namevalue#1.name#5.#2 + 6:[\055] -> component#9.namevalue#1.name#5.name1 + 11:[A-Z_a-z] -> component#9.namevalue#1.name#5.name1 + 8:[0-9] -> component#9.namevalue#1.name#5.name1 + 0:[\t ] -> component#9.namevalue#1.name#5.name2 + [(epsilon)] -> component#9.namevalue#1.name#5.out + Epsilon closure : + (self) + main.component#9.namevalue#1.#5 + main.component#9.namevalue#1.name#5.#1 + main.component#9.namevalue#1.name#5.#2 + main.component#9.namevalue#1.name#5.out + +NFA state 302 = main.component#9.namevalue#1.name#5.#1 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 303 = main.component#9.namevalue#1.name#5.#2 + Tags : GOT_NAME + Epsilon closure : + (self) + +NFA state 304 = main.component#9.namevalue#1.name#5.name2 + [(epsilon)] -> component#9.namevalue#1.name#5.#3 + [(epsilon)] -> component#9.namevalue#1.name#5.#4 + 0:[\t ] -> component#9.namevalue#1.name#5.name2 + 6:[\055] -> component#9.namevalue#1.name#5.name1 + 11:[A-Z_a-z] -> component#9.namevalue#1.name#5.name1 + 8:[0-9] -> component#9.namevalue#1.name#5.name1 + [(epsilon)] -> component#9.namevalue#1.name#5.out + Epsilon closure : + (self) + main.component#9.namevalue#1.#5 + main.component#9.namevalue#1.name#5.#3 + main.component#9.namevalue#1.name#5.#4 + main.component#9.namevalue#1.name#5.out + +NFA state 305 = main.component#9.namevalue#1.name#5.#3 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 306 = main.component#9.namevalue#1.name#5.#4 + Tags : GOT_NAME_TRAILING_SPACE + Epsilon closure : + (self) + +NFA state 307 = main.component#9.namevalue#1.name#5.out + [(epsilon)] -> component#9.namevalue#1.#5 + Epsilon closure : + (self) + main.component#9.namevalue#1.#5 + +NFA state 308 = main.component#9.namevalue#1.#6 + [(epsilon)] -> component#9.namevalue#1.digits#6.in + Epsilon closure : + (self) + main.component#9.namevalue#1.digits#6.in + +NFA state 309 = main.component#9.namevalue#1.#7 + [(epsilon)] -> component#9.namevalue#1.optwhite#7.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#8 + main.component#9.namevalue#1.optwhite#7.in + main.component#9.namevalue#1.optwhite#7.out + +NFA state 310 = main.component#9.namevalue#1.digits#6.in + 8:[0-9] -> component#9.namevalue#1.digits#6.out + 8:[0-9] -> component#9.namevalue#1.digits#6.in + Epsilon closure : + (self) + +NFA state 311 = main.component#9.namevalue#1.digits#6.out + [(epsilon)] -> component#9.namevalue#1.#7 + Epsilon closure : + (self) + main.component#9.namevalue#1.#7 + main.component#9.namevalue#1.#8 + main.component#9.namevalue#1.optwhite#7.in + main.component#9.namevalue#1.optwhite#7.out + +NFA state 312 = main.component#9.namevalue#1.#8 + 10:[=] -> component#9.namevalue#1.rhs_continue + Epsilon closure : + (self) + +NFA state 313 = main.component#9.namevalue#1.optwhite#7.in + [(epsilon)] -> component#9.namevalue#1.optwhite#7.out + 0:[\t ] -> component#9.namevalue#1.optwhite#7.in + 1:[\r] -> component#9.namevalue#1.optwhite#7.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#8 + main.component#9.namevalue#1.optwhite#7.out + +NFA state 314 = main.component#9.namevalue#1.optwhite#7.out + [(epsilon)] -> component#9.namevalue#1.#8 + Epsilon closure : + (self) + main.component#9.namevalue#1.#8 + +NFA state 315 = main.component#9.namevalue#1.rhs_normal + [(epsilon)] -> component#9.namevalue#1.optwhite#14.in + [(epsilon)] -> component#9.namevalue#1.optwhite#11.in + [(epsilon)] -> component#9.namevalue#1.optwhite#8.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#9 + main.component#9.namevalue#1.optwhite#8.in + main.component#9.namevalue#1.optwhite#8.out + main.component#9.namevalue#1.qvalue#9.in + main.component#9.namevalue#1.#11 + main.component#9.namevalue#1.optwhite#11.in + main.component#9.namevalue#1.optwhite#11.out + main.component#9.namevalue#1.value#12.in + main.component#9.namevalue#1.#13 + main.component#9.namevalue#1.optwhite#14.in + main.component#9.namevalue#1.optwhite#14.out + main.component#9.namevalue#1.#14 + +NFA state 316 = main.component#9.namevalue#1.#9 + [(epsilon)] -> component#9.namevalue#1.qvalue#9.in + Epsilon closure : + (self) + main.component#9.namevalue#1.qvalue#9.in + +NFA state 317 = main.component#9.namevalue#1.optwhite#8.in + [(epsilon)] -> component#9.namevalue#1.optwhite#8.out + 0:[\t ] -> component#9.namevalue#1.optwhite#8.in + 1:[\r] -> component#9.namevalue#1.optwhite#8.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#9 + main.component#9.namevalue#1.optwhite#8.out + main.component#9.namevalue#1.qvalue#9.in + +NFA state 318 = main.component#9.namevalue#1.optwhite#8.out + [(epsilon)] -> component#9.namevalue#1.#9 + Epsilon closure : + (self) + main.component#9.namevalue#1.#9 + main.component#9.namevalue#1.qvalue#9.in + +NFA state 319 = main.component#9.namevalue#1.#10 + [(epsilon)] -> component#9.namevalue#1.optwhite#10.in + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.optwhite#10.in + main.component#9.namevalue#1.optwhite#10.out + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 320 = main.component#9.namevalue#1.qvalue#9.in + 3:["] -> component#9.namevalue#1.qvalue#9.qv0 + Epsilon closure : + (self) + +NFA state 321 = main.component#9.namevalue#1.qvalue#9.qv0 + [(epsilon)] -> component#9.namevalue#1.qvalue#9.escape#1.in + 0:[\t ] -> component#9.namevalue#1.qvalue#9.qv1 + 9:[;] -> component#9.namevalue#1.qvalue#9.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.qvalue#9.qv1 + 11:[A-Z_a-z] -> component#9.namevalue#1.qvalue#9.qv1 + 10:[=] -> component#9.namevalue#1.qvalue#9.qv1 + 8:[0-9] -> component#9.namevalue#1.qvalue#9.qv1 + 7:[/] -> component#9.namevalue#1.qvalue#9.qv1 + 6:[\055] -> component#9.namevalue#1.qvalue#9.qv1 + 5:[+.] -> component#9.namevalue#1.qvalue#9.qv1 + 4:[*] -> component#9.namevalue#1.qvalue#9.qv1 + Epsilon closure : + (self) + main.component#9.namevalue#1.qvalue#9.escape#1.in + +NFA state 322 = main.component#9.namevalue#1.qvalue#9.escape#1.in + 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#1.#2 + 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#1.#1 + Epsilon closure : + (self) + +NFA state 323 = main.component#9.namevalue#1.qvalue#9.escape#1.#1 + 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#1.out + Epsilon closure : + (self) + +NFA state 324 = main.component#9.namevalue#1.qvalue#9.escape#1.#2 + 3:["] -> component#9.namevalue#1.qvalue#9.escape#1.out + Epsilon closure : + (self) + +NFA state 325 = main.component#9.namevalue#1.qvalue#9.escape#1.out + [(epsilon)] -> component#9.namevalue#1.qvalue#9.qv1 + Epsilon closure : + (self) + main.component#9.namevalue#1.qvalue#9.qv1 + main.component#9.namevalue#1.qvalue#9.#1 + main.component#9.namevalue#1.qvalue#9.escape#2.in + main.component#9.namevalue#1.qvalue#9.qv2 + +NFA state 326 = main.component#9.namevalue#1.qvalue#9.qv1 + [(epsilon)] -> component#9.namevalue#1.qvalue#9.#1 + [(epsilon)] -> component#9.namevalue#1.qvalue#9.escape#2.in + 0:[\t ] -> component#9.namevalue#1.qvalue#9.qv1 + 9:[;] -> component#9.namevalue#1.qvalue#9.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.qvalue#9.qv1 + 11:[A-Z_a-z] -> component#9.namevalue#1.qvalue#9.qv1 + 10:[=] -> component#9.namevalue#1.qvalue#9.qv1 + 8:[0-9] -> component#9.namevalue#1.qvalue#9.qv1 + 7:[/] -> component#9.namevalue#1.qvalue#9.qv1 + 6:[\055] -> component#9.namevalue#1.qvalue#9.qv1 + 5:[+.] -> component#9.namevalue#1.qvalue#9.qv1 + 4:[*] -> component#9.namevalue#1.qvalue#9.qv1 + [(epsilon)] -> component#9.namevalue#1.qvalue#9.qv2 + Epsilon closure : + (self) + main.component#9.namevalue#1.qvalue#9.#1 + main.component#9.namevalue#1.qvalue#9.escape#2.in + main.component#9.namevalue#1.qvalue#9.qv2 + +NFA state 327 = main.component#9.namevalue#1.qvalue#9.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 328 = main.component#9.namevalue#1.qvalue#9.escape#2.in + 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#2.#2 + 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#2.#1 + Epsilon closure : + (self) + +NFA state 329 = main.component#9.namevalue#1.qvalue#9.escape#2.#1 + 12:[\\] -> component#9.namevalue#1.qvalue#9.escape#2.out + Epsilon closure : + (self) + +NFA state 330 = main.component#9.namevalue#1.qvalue#9.escape#2.#2 + 3:["] -> component#9.namevalue#1.qvalue#9.escape#2.out + Epsilon closure : + (self) + +NFA state 331 = main.component#9.namevalue#1.qvalue#9.escape#2.out + [(epsilon)] -> component#9.namevalue#1.qvalue#9.qv1 + Epsilon closure : + (self) + main.component#9.namevalue#1.qvalue#9.qv1 + main.component#9.namevalue#1.qvalue#9.#1 + main.component#9.namevalue#1.qvalue#9.escape#2.in + main.component#9.namevalue#1.qvalue#9.qv2 + +NFA state 332 = main.component#9.namevalue#1.qvalue#9.qv2 + 3:["] -> component#9.namevalue#1.qvalue#9.out + Epsilon closure : + (self) + +NFA state 333 = main.component#9.namevalue#1.qvalue#9.out + [(epsilon)] -> component#9.namevalue#1.#10 + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.#10 + main.component#9.namevalue#1.optwhite#10.in + main.component#9.namevalue#1.optwhite#10.out + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 334 = main.component#9.namevalue#1.optwhite#10.in + [(epsilon)] -> component#9.namevalue#1.optwhite#10.out + 0:[\t ] -> component#9.namevalue#1.optwhite#10.in + 1:[\r] -> component#9.namevalue#1.optwhite#10.in + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.optwhite#10.out + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 335 = main.component#9.namevalue#1.optwhite#10.out + [(epsilon)] -> component#9.namevalue#1.out_normal + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 336 = main.component#9.namevalue#1.#11 + [(epsilon)] -> component#9.namevalue#1.value#12.in + Epsilon closure : + (self) + main.component#9.namevalue#1.value#12.in + +NFA state 337 = main.component#9.namevalue#1.optwhite#11.in + [(epsilon)] -> component#9.namevalue#1.optwhite#11.out + 0:[\t ] -> component#9.namevalue#1.optwhite#11.in + 1:[\r] -> component#9.namevalue#1.optwhite#11.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#11 + main.component#9.namevalue#1.optwhite#11.out + main.component#9.namevalue#1.value#12.in + +NFA state 338 = main.component#9.namevalue#1.optwhite#11.out + [(epsilon)] -> component#9.namevalue#1.#11 + Epsilon closure : + (self) + main.component#9.namevalue#1.#11 + main.component#9.namevalue#1.value#12.in + +NFA state 339 = main.component#9.namevalue#1.#12 + [(epsilon)] -> component#9.namevalue#1.optwhite#13.in + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.optwhite#13.in + main.component#9.namevalue#1.optwhite#13.out + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 340 = main.component#9.namevalue#1.value#12.in + 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.value#12.v1 + 11:[A-Z_a-z] -> component#9.namevalue#1.value#12.v1 + 10:[=] -> component#9.namevalue#1.value#12.v1 + 8:[0-9] -> component#9.namevalue#1.value#12.v1 + 7:[/] -> component#9.namevalue#1.value#12.v1 + 6:[\055] -> component#9.namevalue#1.value#12.v1 + 5:[+.] -> component#9.namevalue#1.value#12.v1 + 4:[*] -> component#9.namevalue#1.value#12.v1 + Epsilon closure : + (self) + +NFA state 341 = main.component#9.namevalue#1.value#12.v1 + [(epsilon)] -> component#9.namevalue#1.value#12.#1 + [(epsilon)] -> component#9.namevalue#1.value#12.out + 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.value#12.v1 + 11:[A-Z_a-z] -> component#9.namevalue#1.value#12.v1 + 10:[=] -> component#9.namevalue#1.value#12.v1 + 8:[0-9] -> component#9.namevalue#1.value#12.v1 + 7:[/] -> component#9.namevalue#1.value#12.v1 + 6:[\055] -> component#9.namevalue#1.value#12.v1 + 5:[+.] -> component#9.namevalue#1.value#12.v1 + 4:[*] -> component#9.namevalue#1.value#12.v1 + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.#12 + main.component#9.namevalue#1.value#12.#1 + main.component#9.namevalue#1.value#12.out + main.component#9.namevalue#1.optwhite#13.in + main.component#9.namevalue#1.optwhite#13.out + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 342 = main.component#9.namevalue#1.value#12.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 343 = main.component#9.namevalue#1.value#12.out + [(epsilon)] -> component#9.namevalue#1.#12 + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.#12 + main.component#9.namevalue#1.optwhite#13.in + main.component#9.namevalue#1.optwhite#13.out + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 344 = main.component#9.namevalue#1.optwhite#13.in + [(epsilon)] -> component#9.namevalue#1.optwhite#13.out + 0:[\t ] -> component#9.namevalue#1.optwhite#13.in + 1:[\r] -> component#9.namevalue#1.optwhite#13.in + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.optwhite#13.out + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 345 = main.component#9.namevalue#1.optwhite#13.out + [(epsilon)] -> component#9.namevalue#1.out_normal + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 346 = main.component#9.namevalue#1.#13 + [(epsilon)] -> component#9.namevalue#1.#14 + Epsilon closure : + (self) + main.component#9.namevalue#1.#14 + +NFA state 347 = main.component#9.namevalue#1.optwhite#14.in + [(epsilon)] -> component#9.namevalue#1.optwhite#14.out + 0:[\t ] -> component#9.namevalue#1.optwhite#14.in + 1:[\r] -> component#9.namevalue#1.optwhite#14.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#13 + main.component#9.namevalue#1.optwhite#14.out + main.component#9.namevalue#1.#14 + +NFA state 348 = main.component#9.namevalue#1.optwhite#14.out + [(epsilon)] -> component#9.namevalue#1.#13 + Epsilon closure : + (self) + main.component#9.namevalue#1.#13 + main.component#9.namevalue#1.#14 + +NFA state 349 = main.component#9.namevalue#1.#14 + EOS -> component#9.namevalue#1.out_normal + Epsilon closure : + (self) + +NFA state 350 = main.component#9.namevalue#1.rhs_continue + [(epsilon)] -> component#9.namevalue#1.optwhite#18.in + [(epsilon)] -> component#9.namevalue#1.optwhite#15.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#15 + main.component#9.namevalue#1.optwhite#15.in + main.component#9.namevalue#1.optwhite#15.out + main.component#9.namevalue#1.qvalue#16.in + main.component#9.namevalue#1.#17 + main.component#9.namevalue#1.optwhite#18.in + main.component#9.namevalue#1.optwhite#18.out + main.component#9.namevalue#1.value#19.in + +NFA state 351 = main.component#9.namevalue#1.#15 + [(epsilon)] -> component#9.namevalue#1.qvalue#16.in + Epsilon closure : + (self) + main.component#9.namevalue#1.qvalue#16.in + +NFA state 352 = main.component#9.namevalue#1.optwhite#15.in + [(epsilon)] -> component#9.namevalue#1.optwhite#15.out + 0:[\t ] -> component#9.namevalue#1.optwhite#15.in + 1:[\r] -> component#9.namevalue#1.optwhite#15.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#15 + main.component#9.namevalue#1.optwhite#15.out + main.component#9.namevalue#1.qvalue#16.in + +NFA state 353 = main.component#9.namevalue#1.optwhite#15.out + [(epsilon)] -> component#9.namevalue#1.#15 + Epsilon closure : + (self) + main.component#9.namevalue#1.#15 + main.component#9.namevalue#1.qvalue#16.in + +NFA state 354 = main.component#9.namevalue#1.#16 + [(epsilon)] -> component#9.namevalue#1.optwhite#17.in + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.optwhite#17.in + main.component#9.namevalue#1.optwhite#17.out + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 355 = main.component#9.namevalue#1.qvalue#16.in + 3:["] -> component#9.namevalue#1.qvalue#16.qv0 + Epsilon closure : + (self) + +NFA state 356 = main.component#9.namevalue#1.qvalue#16.qv0 + [(epsilon)] -> component#9.namevalue#1.qvalue#16.escape#1.in + 0:[\t ] -> component#9.namevalue#1.qvalue#16.qv1 + 9:[;] -> component#9.namevalue#1.qvalue#16.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.qvalue#16.qv1 + 11:[A-Z_a-z] -> component#9.namevalue#1.qvalue#16.qv1 + 10:[=] -> component#9.namevalue#1.qvalue#16.qv1 + 8:[0-9] -> component#9.namevalue#1.qvalue#16.qv1 + 7:[/] -> component#9.namevalue#1.qvalue#16.qv1 + 6:[\055] -> component#9.namevalue#1.qvalue#16.qv1 + 5:[+.] -> component#9.namevalue#1.qvalue#16.qv1 + 4:[*] -> component#9.namevalue#1.qvalue#16.qv1 + Epsilon closure : + (self) + main.component#9.namevalue#1.qvalue#16.escape#1.in + +NFA state 357 = main.component#9.namevalue#1.qvalue#16.escape#1.in + 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#1.#2 + 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#1.#1 + Epsilon closure : + (self) + +NFA state 358 = main.component#9.namevalue#1.qvalue#16.escape#1.#1 + 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#1.out + Epsilon closure : + (self) + +NFA state 359 = main.component#9.namevalue#1.qvalue#16.escape#1.#2 + 3:["] -> component#9.namevalue#1.qvalue#16.escape#1.out + Epsilon closure : + (self) + +NFA state 360 = main.component#9.namevalue#1.qvalue#16.escape#1.out + [(epsilon)] -> component#9.namevalue#1.qvalue#16.qv1 + Epsilon closure : + (self) + main.component#9.namevalue#1.qvalue#16.qv1 + main.component#9.namevalue#1.qvalue#16.#1 + main.component#9.namevalue#1.qvalue#16.escape#2.in + main.component#9.namevalue#1.qvalue#16.qv2 + +NFA state 361 = main.component#9.namevalue#1.qvalue#16.qv1 + [(epsilon)] -> component#9.namevalue#1.qvalue#16.#1 + [(epsilon)] -> component#9.namevalue#1.qvalue#16.escape#2.in + 0:[\t ] -> component#9.namevalue#1.qvalue#16.qv1 + 9:[;] -> component#9.namevalue#1.qvalue#16.qv1 + 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.qvalue#16.qv1 + 11:[A-Z_a-z] -> component#9.namevalue#1.qvalue#16.qv1 + 10:[=] -> component#9.namevalue#1.qvalue#16.qv1 + 8:[0-9] -> component#9.namevalue#1.qvalue#16.qv1 + 7:[/] -> component#9.namevalue#1.qvalue#16.qv1 + 6:[\055] -> component#9.namevalue#1.qvalue#16.qv1 + 5:[+.] -> component#9.namevalue#1.qvalue#16.qv1 + 4:[*] -> component#9.namevalue#1.qvalue#16.qv1 + [(epsilon)] -> component#9.namevalue#1.qvalue#16.qv2 + Epsilon closure : + (self) + main.component#9.namevalue#1.qvalue#16.#1 + main.component#9.namevalue#1.qvalue#16.escape#2.in + main.component#9.namevalue#1.qvalue#16.qv2 + +NFA state 362 = main.component#9.namevalue#1.qvalue#16.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 363 = main.component#9.namevalue#1.qvalue#16.escape#2.in + 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#2.#2 + 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#2.#1 + Epsilon closure : + (self) + +NFA state 364 = main.component#9.namevalue#1.qvalue#16.escape#2.#1 + 12:[\\] -> component#9.namevalue#1.qvalue#16.escape#2.out + Epsilon closure : + (self) + +NFA state 365 = main.component#9.namevalue#1.qvalue#16.escape#2.#2 + 3:["] -> component#9.namevalue#1.qvalue#16.escape#2.out + Epsilon closure : + (self) + +NFA state 366 = main.component#9.namevalue#1.qvalue#16.escape#2.out + [(epsilon)] -> component#9.namevalue#1.qvalue#16.qv1 + Epsilon closure : + (self) + main.component#9.namevalue#1.qvalue#16.qv1 + main.component#9.namevalue#1.qvalue#16.#1 + main.component#9.namevalue#1.qvalue#16.escape#2.in + main.component#9.namevalue#1.qvalue#16.qv2 + +NFA state 367 = main.component#9.namevalue#1.qvalue#16.qv2 + 3:["] -> component#9.namevalue#1.qvalue#16.out + Epsilon closure : + (self) + +NFA state 368 = main.component#9.namevalue#1.qvalue#16.out + [(epsilon)] -> component#9.namevalue#1.#16 + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.#16 + main.component#9.namevalue#1.optwhite#17.in + main.component#9.namevalue#1.optwhite#17.out + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 369 = main.component#9.namevalue#1.optwhite#17.in + [(epsilon)] -> component#9.namevalue#1.optwhite#17.out + 0:[\t ] -> component#9.namevalue#1.optwhite#17.in + 1:[\r] -> component#9.namevalue#1.optwhite#17.in + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.optwhite#17.out + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 370 = main.component#9.namevalue#1.optwhite#17.out + [(epsilon)] -> component#9.namevalue#1.out_continue + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 371 = main.component#9.namevalue#1.#17 + [(epsilon)] -> component#9.namevalue#1.value#19.in + Epsilon closure : + (self) + main.component#9.namevalue#1.value#19.in + +NFA state 372 = main.component#9.namevalue#1.optwhite#18.in + [(epsilon)] -> component#9.namevalue#1.optwhite#18.out + 0:[\t ] -> component#9.namevalue#1.optwhite#18.in + 1:[\r] -> component#9.namevalue#1.optwhite#18.in + Epsilon closure : + (self) + main.component#9.namevalue#1.#17 + main.component#9.namevalue#1.optwhite#18.out + main.component#9.namevalue#1.value#19.in + +NFA state 373 = main.component#9.namevalue#1.optwhite#18.out + [(epsilon)] -> component#9.namevalue#1.#17 + Epsilon closure : + (self) + main.component#9.namevalue#1.#17 + main.component#9.namevalue#1.value#19.in + +NFA state 374 = main.component#9.namevalue#1.#18 + [(epsilon)] -> component#9.namevalue#1.optwhite#20.in + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.optwhite#20.in + main.component#9.namevalue#1.optwhite#20.out + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 375 = main.component#9.namevalue#1.value#19.in + 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.value#19.v1 + 11:[A-Z_a-z] -> component#9.namevalue#1.value#19.v1 + 10:[=] -> component#9.namevalue#1.value#19.v1 + 8:[0-9] -> component#9.namevalue#1.value#19.v1 + 7:[/] -> component#9.namevalue#1.value#19.v1 + 6:[\055] -> component#9.namevalue#1.value#19.v1 + 5:[+.] -> component#9.namevalue#1.value#19.v1 + 4:[*] -> component#9.namevalue#1.value#19.v1 + Epsilon closure : + (self) + +NFA state 376 = main.component#9.namevalue#1.value#19.v1 + [(epsilon)] -> component#9.namevalue#1.value#19.#1 + [(epsilon)] -> component#9.namevalue#1.value#19.out + 2:[!#-),:<>-@[]^`{-~] -> component#9.namevalue#1.value#19.v1 + 11:[A-Z_a-z] -> component#9.namevalue#1.value#19.v1 + 10:[=] -> component#9.namevalue#1.value#19.v1 + 8:[0-9] -> component#9.namevalue#1.value#19.v1 + 7:[/] -> component#9.namevalue#1.value#19.v1 + 6:[\055] -> component#9.namevalue#1.value#19.v1 + 5:[+.] -> component#9.namevalue#1.value#19.v1 + 4:[*] -> component#9.namevalue#1.value#19.v1 + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.#18 + main.component#9.namevalue#1.value#19.#1 + main.component#9.namevalue#1.value#19.out + main.component#9.namevalue#1.optwhite#20.in + main.component#9.namevalue#1.optwhite#20.out + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 377 = main.component#9.namevalue#1.value#19.#1 + Tags : COPY_TO_VALUE + Epsilon closure : + (self) + +NFA state 378 = main.component#9.namevalue#1.value#19.out + [(epsilon)] -> component#9.namevalue#1.#18 + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.#18 + main.component#9.namevalue#1.optwhite#20.in + main.component#9.namevalue#1.optwhite#20.out + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 379 = main.component#9.namevalue#1.optwhite#20.in + [(epsilon)] -> component#9.namevalue#1.optwhite#20.out + 0:[\t ] -> component#9.namevalue#1.optwhite#20.in + 1:[\r] -> component#9.namevalue#1.optwhite#20.in + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.optwhite#20.out + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 380 = main.component#9.namevalue#1.optwhite#20.out + [(epsilon)] -> component#9.namevalue#1.out_continue + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 381 = main.component#9.namevalue#1.out_normal + [(epsilon)] -> component#9.namevalue#1.out + [(epsilon)] -> component#9.namevalue#1.#19 + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 382 = main.component#9.namevalue#1.#19 + Tags : GOT_NAMEVALUE + Epsilon closure : + (self) + +NFA state 383 = main.component#9.namevalue#1.out_continue + [(epsilon)] -> component#9.namevalue#1.out + [(epsilon)] -> component#9.namevalue#1.#20 + Epsilon closure : + (self) + main.#10 + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 384 = main.component#9.namevalue#1.#20 + Tags : GOT_NAMEVALUE_CONT + Epsilon closure : + (self) + +NFA state 385 = main.component#9.namevalue#1.out + [(epsilon)] -> component#9.out + Epsilon closure : + (self) + main.#10 + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 386 = main.component#9.name#2.in + 6:[\055] -> component#9.name#2.name1 + 11:[A-Z_a-z] -> component#9.name#2.name1 + 8:[0-9] -> component#9.name#2.name1 + Epsilon closure : + (self) + +NFA state 387 = main.component#9.name#2.name1 + [(epsilon)] -> component#9.name#2.out + 0:[\t ] -> component#9.name#2.name2 + 6:[\055] -> component#9.name#2.name1 + 11:[A-Z_a-z] -> component#9.name#2.name1 + 8:[0-9] -> component#9.name#2.name1 + [(epsilon)] -> component#9.name#2.#2 + [(epsilon)] -> component#9.name#2.#1 + Epsilon closure : + (self) + main.#10 + main.component#9.name#2.#1 + main.component#9.name#2.#2 + main.component#9.name#2.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 388 = main.component#9.name#2.#1 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 389 = main.component#9.name#2.#2 + Tags : GOT_NAME + Epsilon closure : + (self) + +NFA state 390 = main.component#9.name#2.name2 + [(epsilon)] -> component#9.name#2.out + 6:[\055] -> component#9.name#2.name1 + 11:[A-Z_a-z] -> component#9.name#2.name1 + 8:[0-9] -> component#9.name#2.name1 + 0:[\t ] -> component#9.name#2.name2 + [(epsilon)] -> component#9.name#2.#4 + [(epsilon)] -> component#9.name#2.#3 + Epsilon closure : + (self) + main.#10 + main.component#9.name#2.#3 + main.component#9.name#2.#4 + main.component#9.name#2.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 391 = main.component#9.name#2.#3 + Tags : COPY_TO_NAME + Epsilon closure : + (self) + +NFA state 392 = main.component#9.name#2.#4 + Tags : GOT_NAME_TRAILING_SPACE + Epsilon closure : + (self) + +NFA state 393 = main.component#9.name#2.out + [(epsilon)] -> component#9.out + Epsilon closure : + (self) + main.#10 + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 394 = main.component#9.majorminor#3.in + [(epsilon)] -> component#9.majorminor#3.major#1.in + Epsilon closure : + (self) + main.component#9.majorminor#3.major#1.in + +NFA state 395 = main.component#9.majorminor#3.major#1.in + 6:[\055] -> component#9.majorminor#3.major#1.name1 + 11:[A-Z_a-z] -> component#9.majorminor#3.major#1.name1 + 8:[0-9] -> component#9.majorminor#3.major#1.name1 + Epsilon closure : + (self) + +NFA state 396 = main.component#9.majorminor#3.major#1.name1 + 6:[\055] -> component#9.majorminor#3.major#1.name1 + 11:[A-Z_a-z] -> component#9.majorminor#3.major#1.name1 + 8:[0-9] -> component#9.majorminor#3.major#1.name1 + [(epsilon)] -> component#9.majorminor#3.major#1.out + Epsilon closure : + (self) + main.component#9.majorminor#3.major#1.out + main.component#9.majorminor#3.foo + +NFA state 397 = main.component#9.majorminor#3.major#1.out + [(epsilon)] -> component#9.majorminor#3.foo + Epsilon closure : + (self) + main.component#9.majorminor#3.foo + +NFA state 398 = main.component#9.majorminor#3.foo + 7:[/] -> component#9.majorminor#3.bar + Epsilon closure : + (self) + +NFA state 399 = main.component#9.majorminor#3.bar + [(epsilon)] -> component#9.majorminor#3.minor#2.in + Epsilon closure : + (self) + main.component#9.majorminor#3.minor#2.in + +NFA state 400 = main.component#9.majorminor#3.minor#2.in + 5:[+.] -> component#9.majorminor#3.minor#2.minor1 + 12:[\\] -> component#9.majorminor#3.minor#2.minor1 + 6:[\055] -> component#9.majorminor#3.minor#2.minor1 + 6:[\055] -> component#9.majorminor#3.minor#2.minor1 + 11:[A-Z_a-z] -> component#9.majorminor#3.minor#2.minor1 + 8:[0-9] -> component#9.majorminor#3.minor#2.minor1 + Epsilon closure : + (self) + +NFA state 401 = main.component#9.majorminor#3.minor#2.minor1 + [(epsilon)] -> component#9.majorminor#3.minor#2.#1 + 5:[+.] -> component#9.majorminor#3.minor#2.minor1 + 12:[\\] -> component#9.majorminor#3.minor#2.minor1 + 6:[\055] -> component#9.majorminor#3.minor#2.minor1 + 6:[\055] -> component#9.majorminor#3.minor#2.minor1 + 11:[A-Z_a-z] -> component#9.majorminor#3.minor#2.minor1 + 8:[0-9] -> component#9.majorminor#3.minor#2.minor1 + [(epsilon)] -> component#9.majorminor#3.minor#2.out + Epsilon closure : + (self) + main.#10 + main.component#9.majorminor#3.minor#2.#1 + main.component#9.majorminor#3.minor#2.out + main.component#9.majorminor#3.out + main.component#9.majorminor#3.#1 + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 402 = main.component#9.majorminor#3.minor#2.#1 + Tags : COPY_TO_MINOR + Epsilon closure : + (self) + +NFA state 403 = main.component#9.majorminor#3.minor#2.out + [(epsilon)] -> component#9.majorminor#3.out + Epsilon closure : + (self) + main.#10 + main.component#9.majorminor#3.out + main.component#9.majorminor#3.#1 + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 404 = main.component#9.majorminor#3.out + [(epsilon)] -> component#9.majorminor#3.#1 + [(epsilon)] -> component#9.out + Epsilon closure : + (self) + main.#10 + main.component#9.majorminor#3.#1 + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 405 = main.component#9.majorminor#3.#1 + Tags : GOT_MAJORMINOR + Epsilon closure : + (self) + +NFA state 406 = main.component#9.out + [(epsilon)] -> #10 + Epsilon closure : + (self) + main.#10 + main.#11 + main.optwhite#10.in + main.optwhite#10.out + +NFA state 407 = main.#11 + 9:[;] -> in2 + Epsilon closure : + (self) + +NFA state 408 = main.optwhite#10.in + [(epsilon)] -> optwhite#10.out + 0:[\t ] -> optwhite#10.in + 1:[\r] -> optwhite#10.in + Epsilon closure : + (self) + main.#11 + main.optwhite#10.out + +NFA state 409 = main.optwhite#10.out + [(epsilon)] -> #11 + Epsilon closure : + (self) + main.#11 + +NFA state 410 = main.in2 + [(epsilon)] -> in + [(epsilon)] -> #12 + Epsilon closure : + (self) + main.in + main.#1 + main.optwhite#1.in + main.optwhite#1.out + main.component#2.in + main.component#2.namevalue#1.in + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.in + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.in + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + main.component#2.name#2.in + main.component#2.majorminor#3.in + main.component#2.majorminor#3.major#1.in + main.#4 + main.optwhite#4.in + main.optwhite#4.out + main.component#5.in + main.component#5.namevalue#1.in + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.in + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.in + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + main.component#5.name#2.in + main.component#5.majorminor#3.in + main.component#5.majorminor#3.major#1.in + main.#9 + main.optwhite#8.in + main.optwhite#8.out + main.component#9.in + main.component#9.namevalue#1.in + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.in + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.in + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + main.component#9.name#2.in + main.component#9.majorminor#3.in + main.component#9.majorminor#3.major#1.in + main.#12 + +NFA state 411 = main.#12 + Tags : GOT_TERMINATOR + Epsilon closure : + (self) + +NFA state 412 = main.out2 + [(epsilon)] -> out + [(epsilon)] -> #13 + Epsilon closure : + (self) + main.#13 + main.out + +NFA state 413 = main.#13 + Tags : GOT_TERMINATOR + Epsilon closure : + (self) + +NFA state 414 = main.out + Epsilon closure : + (self) + +-------------------------------- +DFA structure before compression +-------------------------------- +DFA state 0 + NFA states : + main.in + main.#1 + main.optwhite#1.in + main.optwhite#1.out + main.component#2.in + main.component#2.namevalue#1.in + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.in + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.in + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + main.component#2.name#2.in + main.component#2.majorminor#3.in + main.component#2.majorminor#3.major#1.in + main.#4 + main.optwhite#4.in + main.optwhite#4.out + main.component#5.in + main.component#5.namevalue#1.in + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.in + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.in + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + main.component#5.name#2.in + main.component#5.majorminor#3.in + main.component#5.majorminor#3.major#1.in + main.#9 + main.optwhite#8.in + main.optwhite#8.out + main.component#9.in + main.component#9.namevalue#1.in + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.in + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.in + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + main.component#9.name#2.in + main.component#9.majorminor#3.in + main.component#9.majorminor#3.major#1.in + + Forward route : + (START)->(HERE) + Transitions : + 0:[\t ] -> 1 + 1:[\r] -> 1 + 6:[\055] -> 2 + 8:[0-9] -> 2 + 11:[A-Z_a-z] -> 2 + +DFA state 1 + NFA states : + main.#1 + main.optwhite#1.in + main.optwhite#1.out + main.component#2.in + main.component#2.namevalue#1.in + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.in + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.in + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + main.component#2.name#2.in + main.component#2.majorminor#3.in + main.component#2.majorminor#3.major#1.in + main.#4 + main.optwhite#4.in + main.optwhite#4.out + main.component#5.in + main.component#5.namevalue#1.in + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.in + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.in + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + main.component#5.name#2.in + main.component#5.majorminor#3.in + main.component#5.majorminor#3.major#1.in + main.#9 + main.optwhite#8.in + main.optwhite#8.out + main.component#9.in + main.component#9.namevalue#1.in + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.in + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.in + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + main.component#9.name#2.in + main.component#9.majorminor#3.in + main.component#9.majorminor#3.major#1.in + + Forward route : (from state 0) + (START)->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 1 + 1:[\r] -> 1 + 6:[\055] -> 2 + 8:[0-9] -> 2 + 11:[A-Z_a-z] -> 2 + +DFA state 2 + NFA states : + main.#2 + main.component#2.namevalue#1.#2 + main.component#2.namevalue#1.name#2.name1 + main.component#2.namevalue#1.name#2.#1 + main.component#2.namevalue#1.name#2.#2 + main.component#2.namevalue#1.name#2.out + main.component#2.namevalue#1.#3 + main.component#2.namevalue#1.optwhite#3.in + main.component#2.namevalue#1.optwhite#3.out + main.component#2.namevalue#1.#5 + main.component#2.namevalue#1.name#5.name1 + main.component#2.namevalue#1.name#5.#1 + main.component#2.namevalue#1.name#5.#2 + main.component#2.namevalue#1.name#5.out + main.component#2.name#2.name1 + main.component#2.name#2.#1 + main.component#2.name#2.#2 + main.component#2.name#2.out + main.component#2.majorminor#3.major#1.name1 + main.component#2.majorminor#3.major#1.out + main.component#2.majorminor#3.foo + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.#2 + main.component#5.namevalue#1.name#2.name1 + main.component#5.namevalue#1.name#2.#1 + main.component#5.namevalue#1.name#2.#2 + main.component#5.namevalue#1.name#2.out + main.component#5.namevalue#1.#3 + main.component#5.namevalue#1.optwhite#3.in + main.component#5.namevalue#1.optwhite#3.out + main.component#5.namevalue#1.#5 + main.component#5.namevalue#1.name#5.name1 + main.component#5.namevalue#1.name#5.#1 + main.component#5.namevalue#1.name#5.#2 + main.component#5.namevalue#1.name#5.out + main.component#5.name#2.name1 + main.component#5.name#2.#1 + main.component#5.name#2.#2 + main.component#5.name#2.out + main.component#5.majorminor#3.major#1.name1 + main.component#5.majorminor#3.major#1.out + main.component#5.majorminor#3.foo + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.#2 + main.component#9.namevalue#1.name#2.name1 + main.component#9.namevalue#1.name#2.#1 + main.component#9.namevalue#1.name#2.#2 + main.component#9.namevalue#1.name#2.out + main.component#9.namevalue#1.#3 + main.component#9.namevalue#1.optwhite#3.in + main.component#9.namevalue#1.optwhite#3.out + main.component#9.namevalue#1.#5 + main.component#9.namevalue#1.name#5.name1 + main.component#9.namevalue#1.name#5.#1 + main.component#9.namevalue#1.name#5.#2 + main.component#9.namevalue#1.name#5.out + main.component#9.name#2.name1 + main.component#9.name#2.#1 + main.component#9.name#2.#2 + main.component#9.name#2.out + main.component#9.majorminor#3.major#1.name1 + main.component#9.majorminor#3.major#1.out + main.component#9.majorminor#3.foo + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 0) + (START)->6:[\055]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 4 + 1:[\r] -> 5 + 4:[*] -> 6 + 6:[\055] -> 2 + 7:[/] -> 7 + 8:[0-9] -> 2 + 9:[;] -> 8 + 10:[=] -> 9 + 11:[A-Z_a-z] -> 2 + NFA exit tags applying : + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + Attributes for <copier> : COPY_TO_NAME + Attributes for <action> : GOT_NAME + +DFA state 3 + NFA states : + main.out2 + main.#13 + main.out + + Forward route : (from state 2) + (START)->6:[\055]->EOS->(HERE) + Transitions : + NFA exit tags applying : + GOT_TERMINATOR + Attributes for <action> : GOT_TERMINATOR + +DFA state 4 + NFA states : + main.#2 + main.component#2.namevalue#1.#2 + main.component#2.namevalue#1.name#2.name2 + main.component#2.namevalue#1.name#2.#3 + main.component#2.namevalue#1.name#2.#4 + main.component#2.namevalue#1.name#2.out + main.component#2.namevalue#1.#3 + main.component#2.namevalue#1.optwhite#3.in + main.component#2.namevalue#1.optwhite#3.out + main.component#2.namevalue#1.#5 + main.component#2.namevalue#1.name#5.name2 + main.component#2.namevalue#1.name#5.#3 + main.component#2.namevalue#1.name#5.#4 + main.component#2.namevalue#1.name#5.out + main.component#2.name#2.name2 + main.component#2.name#2.#3 + main.component#2.name#2.#4 + main.component#2.name#2.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.#2 + main.component#5.namevalue#1.name#2.name2 + main.component#5.namevalue#1.name#2.#3 + main.component#5.namevalue#1.name#2.#4 + main.component#5.namevalue#1.name#2.out + main.component#5.namevalue#1.#3 + main.component#5.namevalue#1.optwhite#3.in + main.component#5.namevalue#1.optwhite#3.out + main.component#5.namevalue#1.#5 + main.component#5.namevalue#1.name#5.name2 + main.component#5.namevalue#1.name#5.#3 + main.component#5.namevalue#1.name#5.#4 + main.component#5.namevalue#1.name#5.out + main.component#5.name#2.name2 + main.component#5.name#2.#3 + main.component#5.name#2.#4 + main.component#5.name#2.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.#2 + main.component#9.namevalue#1.name#2.name2 + main.component#9.namevalue#1.name#2.#3 + main.component#9.namevalue#1.name#2.#4 + main.component#9.namevalue#1.name#2.out + main.component#9.namevalue#1.#3 + main.component#9.namevalue#1.optwhite#3.in + main.component#9.namevalue#1.optwhite#3.out + main.component#9.namevalue#1.#5 + main.component#9.namevalue#1.name#5.name2 + main.component#9.namevalue#1.name#5.#3 + main.component#9.namevalue#1.name#5.#4 + main.component#9.namevalue#1.name#5.out + main.component#9.name#2.name2 + main.component#9.name#2.#3 + main.component#9.name#2.#4 + main.component#9.name#2.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 2) + (START)->6:[\055]->0:[\t ]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 4 + 1:[\r] -> 5 + 4:[*] -> 6 + 6:[\055] -> 10 + 8:[0-9] -> 10 + 9:[;] -> 8 + 10:[=] -> 9 + 11:[A-Z_a-z] -> 10 + NFA exit tags applying : + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + Attributes for <copier> : COPY_TO_NAME + Attributes for <action> : GOT_NAME_TRAILING_SPACE + +DFA state 5 + NFA states : + main.component#2.namevalue#1.#3 + main.component#2.namevalue#1.optwhite#3.in + main.component#2.namevalue#1.optwhite#3.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.component#5.namevalue#1.#3 + main.component#5.namevalue#1.optwhite#3.in + main.component#5.namevalue#1.optwhite#3.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.component#9.namevalue#1.#3 + main.component#9.namevalue#1.optwhite#3.in + main.component#9.namevalue#1.optwhite#3.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 2) + (START)->6:[\055]->1:[\r]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 5 + 1:[\r] -> 5 + 9:[;] -> 8 + 10:[=] -> 9 + +DFA state 6 + NFA states : + main.component#2.namevalue#1.#6 + main.component#2.namevalue#1.digits#6.in + main.component#5.namevalue#1.#6 + main.component#5.namevalue#1.digits#6.in + main.component#9.namevalue#1.#6 + main.component#9.namevalue#1.digits#6.in + + Forward route : (from state 2) + (START)->6:[\055]->4:[*]->(HERE) + Transitions : + 8:[0-9] -> 11 + +DFA state 7 + NFA states : + main.component#2.majorminor#3.bar + main.component#2.majorminor#3.minor#2.in + main.component#5.majorminor#3.bar + main.component#5.majorminor#3.minor#2.in + main.component#9.majorminor#3.bar + main.component#9.majorminor#3.minor#2.in + + Forward route : (from state 2) + (START)->6:[\055]->7:[/]->(HERE) + Transitions : + 5:[+.] -> 12 + 6:[\055] -> 12 + 8:[0-9] -> 12 + 11:[A-Z_a-z] -> 12 + 12:[\\] -> 12 + +DFA state 8 + NFA states : + main.in + main.#1 + main.optwhite#1.in + main.optwhite#1.out + main.component#2.in + main.component#2.namevalue#1.in + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.in + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.in + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + main.component#2.name#2.in + main.component#2.majorminor#3.in + main.component#2.majorminor#3.major#1.in + main.#4 + main.optwhite#4.in + main.optwhite#4.out + main.component#5.in + main.component#5.namevalue#1.in + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.in + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.in + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + main.component#5.name#2.in + main.component#5.majorminor#3.in + main.component#5.majorminor#3.major#1.in + main.#7 + main.#8 + main.optwhite#7.in + main.optwhite#7.out + main.#9 + main.optwhite#8.in + main.optwhite#8.out + main.component#9.in + main.component#9.namevalue#1.in + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.in + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.in + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + main.component#9.name#2.in + main.component#9.majorminor#3.in + main.component#9.majorminor#3.major#1.in + main.in2 + main.#12 + + Forward route : (from state 2) + (START)->6:[\055]->9:[;]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 13 + 1:[\r] -> 13 + 6:[\055] -> 2 + 8:[0-9] -> 2 + 11:[A-Z_a-z] -> 2 + NFA exit tags applying : + GOT_TERMINATOR + Attributes for <action> : GOT_TERMINATOR + +DFA state 9 + NFA states : + main.component#2.namevalue#1.rhs_normal + main.component#2.namevalue#1.#9 + main.component#2.namevalue#1.optwhite#8.in + main.component#2.namevalue#1.optwhite#8.out + main.component#2.namevalue#1.qvalue#9.in + main.component#2.namevalue#1.#11 + main.component#2.namevalue#1.optwhite#11.in + main.component#2.namevalue#1.optwhite#11.out + main.component#2.namevalue#1.value#12.in + main.component#2.namevalue#1.#13 + main.component#2.namevalue#1.optwhite#14.in + main.component#2.namevalue#1.optwhite#14.out + main.component#2.namevalue#1.#14 + main.component#5.namevalue#1.rhs_normal + main.component#5.namevalue#1.#9 + main.component#5.namevalue#1.optwhite#8.in + main.component#5.namevalue#1.optwhite#8.out + main.component#5.namevalue#1.qvalue#9.in + main.component#5.namevalue#1.#11 + main.component#5.namevalue#1.optwhite#11.in + main.component#5.namevalue#1.optwhite#11.out + main.component#5.namevalue#1.value#12.in + main.component#5.namevalue#1.#13 + main.component#5.namevalue#1.optwhite#14.in + main.component#5.namevalue#1.optwhite#14.out + main.component#5.namevalue#1.#14 + main.component#9.namevalue#1.rhs_normal + main.component#9.namevalue#1.#9 + main.component#9.namevalue#1.optwhite#8.in + main.component#9.namevalue#1.optwhite#8.out + main.component#9.namevalue#1.qvalue#9.in + main.component#9.namevalue#1.#11 + main.component#9.namevalue#1.optwhite#11.in + main.component#9.namevalue#1.optwhite#11.out + main.component#9.namevalue#1.value#12.in + main.component#9.namevalue#1.#13 + main.component#9.namevalue#1.optwhite#14.in + main.component#9.namevalue#1.optwhite#14.out + main.component#9.namevalue#1.#14 + + Forward route : (from state 2) + (START)->6:[\055]->10:[=]->(HERE) + Transitions : + EOS -> 14 + 0:[\t ] -> 15 + 1:[\r] -> 15 + 2:[!#-),:<>-@[]^`{-~] -> 16 + 3:["] -> 17 + 4:[*] -> 16 + 5:[+.] -> 16 + 6:[\055] -> 16 + 7:[/] -> 16 + 8:[0-9] -> 16 + 10:[=] -> 16 + 11:[A-Z_a-z] -> 16 + +DFA state 10 + NFA states : + main.#2 + main.component#2.namevalue#1.#2 + main.component#2.namevalue#1.name#2.name1 + main.component#2.namevalue#1.name#2.#1 + main.component#2.namevalue#1.name#2.#2 + main.component#2.namevalue#1.name#2.out + main.component#2.namevalue#1.#3 + main.component#2.namevalue#1.optwhite#3.in + main.component#2.namevalue#1.optwhite#3.out + main.component#2.namevalue#1.#5 + main.component#2.namevalue#1.name#5.name1 + main.component#2.namevalue#1.name#5.#1 + main.component#2.namevalue#1.name#5.#2 + main.component#2.namevalue#1.name#5.out + main.component#2.name#2.name1 + main.component#2.name#2.#1 + main.component#2.name#2.#2 + main.component#2.name#2.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.#2 + main.component#5.namevalue#1.name#2.name1 + main.component#5.namevalue#1.name#2.#1 + main.component#5.namevalue#1.name#2.#2 + main.component#5.namevalue#1.name#2.out + main.component#5.namevalue#1.#3 + main.component#5.namevalue#1.optwhite#3.in + main.component#5.namevalue#1.optwhite#3.out + main.component#5.namevalue#1.#5 + main.component#5.namevalue#1.name#5.name1 + main.component#5.namevalue#1.name#5.#1 + main.component#5.namevalue#1.name#5.#2 + main.component#5.namevalue#1.name#5.out + main.component#5.name#2.name1 + main.component#5.name#2.#1 + main.component#5.name#2.#2 + main.component#5.name#2.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.#2 + main.component#9.namevalue#1.name#2.name1 + main.component#9.namevalue#1.name#2.#1 + main.component#9.namevalue#1.name#2.#2 + main.component#9.namevalue#1.name#2.out + main.component#9.namevalue#1.#3 + main.component#9.namevalue#1.optwhite#3.in + main.component#9.namevalue#1.optwhite#3.out + main.component#9.namevalue#1.#5 + main.component#9.namevalue#1.name#5.name1 + main.component#9.namevalue#1.name#5.#1 + main.component#9.namevalue#1.name#5.#2 + main.component#9.namevalue#1.name#5.out + main.component#9.name#2.name1 + main.component#9.name#2.#1 + main.component#9.name#2.#2 + main.component#9.name#2.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 4) + (START)->6:[\055]->0:[\t ]->6:[\055]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 4 + 1:[\r] -> 5 + 4:[*] -> 6 + 6:[\055] -> 10 + 8:[0-9] -> 10 + 9:[;] -> 8 + 10:[=] -> 9 + 11:[A-Z_a-z] -> 10 + NFA exit tags applying : + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + Attributes for <copier> : COPY_TO_NAME + Attributes for <action> : GOT_NAME + +DFA state 11 + NFA states : + main.component#2.namevalue#1.#7 + main.component#2.namevalue#1.digits#6.in + main.component#2.namevalue#1.digits#6.out + main.component#2.namevalue#1.#8 + main.component#2.namevalue#1.optwhite#7.in + main.component#2.namevalue#1.optwhite#7.out + main.component#5.namevalue#1.#7 + main.component#5.namevalue#1.digits#6.in + main.component#5.namevalue#1.digits#6.out + main.component#5.namevalue#1.#8 + main.component#5.namevalue#1.optwhite#7.in + main.component#5.namevalue#1.optwhite#7.out + main.component#9.namevalue#1.#7 + main.component#9.namevalue#1.digits#6.in + main.component#9.namevalue#1.digits#6.out + main.component#9.namevalue#1.#8 + main.component#9.namevalue#1.optwhite#7.in + main.component#9.namevalue#1.optwhite#7.out + + Forward route : (from state 6) + (START)->6:[\055]->4:[*]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 18 + 1:[\r] -> 18 + 8:[0-9] -> 11 + 10:[=] -> 19 + +DFA state 12 + NFA states : + main.#2 + main.component#2.majorminor#3.minor#2.minor1 + main.component#2.majorminor#3.minor#2.#1 + main.component#2.majorminor#3.minor#2.out + main.component#2.majorminor#3.out + main.component#2.majorminor#3.#1 + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.majorminor#3.minor#2.minor1 + main.component#5.majorminor#3.minor#2.#1 + main.component#5.majorminor#3.minor#2.out + main.component#5.majorminor#3.out + main.component#5.majorminor#3.#1 + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.majorminor#3.minor#2.minor1 + main.component#9.majorminor#3.minor#2.#1 + main.component#9.majorminor#3.minor#2.out + main.component#9.majorminor#3.out + main.component#9.majorminor#3.#1 + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 7) + (START)->6:[\055]->7:[/]->5:[+.]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 20 + 1:[\r] -> 20 + 5:[+.] -> 12 + 6:[\055] -> 12 + 8:[0-9] -> 12 + 9:[;] -> 8 + 11:[A-Z_a-z] -> 12 + 12:[\\] -> 12 + NFA exit tags applying : + GOT_MAJORMINOR + COPY_TO_MINOR + GOT_MAJORMINOR + COPY_TO_MINOR + GOT_MAJORMINOR + COPY_TO_MINOR + Attributes for <copier> : COPY_TO_MINOR + Attributes for <action> : GOT_MAJORMINOR + +DFA state 13 + NFA states : + main.#1 + main.optwhite#1.in + main.optwhite#1.out + main.component#2.in + main.component#2.namevalue#1.in + main.component#2.namevalue#1.#1 + main.component#2.namevalue#1.optwhite#1.in + main.component#2.namevalue#1.optwhite#1.out + main.component#2.namevalue#1.name#2.in + main.component#2.namevalue#1.#4 + main.component#2.namevalue#1.optwhite#4.in + main.component#2.namevalue#1.optwhite#4.out + main.component#2.namevalue#1.name#5.in + main.component#2.name#2.in + main.component#2.majorminor#3.in + main.component#2.majorminor#3.major#1.in + main.#4 + main.optwhite#4.in + main.optwhite#4.out + main.component#5.in + main.component#5.namevalue#1.in + main.component#5.namevalue#1.#1 + main.component#5.namevalue#1.optwhite#1.in + main.component#5.namevalue#1.optwhite#1.out + main.component#5.namevalue#1.name#2.in + main.component#5.namevalue#1.#4 + main.component#5.namevalue#1.optwhite#4.in + main.component#5.namevalue#1.optwhite#4.out + main.component#5.namevalue#1.name#5.in + main.component#5.name#2.in + main.component#5.majorminor#3.in + main.component#5.majorminor#3.major#1.in + main.#8 + main.optwhite#7.in + main.optwhite#7.out + main.#9 + main.optwhite#8.in + main.optwhite#8.out + main.component#9.in + main.component#9.namevalue#1.in + main.component#9.namevalue#1.#1 + main.component#9.namevalue#1.optwhite#1.in + main.component#9.namevalue#1.optwhite#1.out + main.component#9.namevalue#1.name#2.in + main.component#9.namevalue#1.#4 + main.component#9.namevalue#1.optwhite#4.in + main.component#9.namevalue#1.optwhite#4.out + main.component#9.namevalue#1.name#5.in + main.component#9.name#2.in + main.component#9.majorminor#3.in + main.component#9.majorminor#3.major#1.in + + Forward route : (from state 8) + (START)->6:[\055]->9:[;]->0:[\t ]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 13 + 1:[\r] -> 13 + 6:[\055] -> 2 + 8:[0-9] -> 2 + 11:[A-Z_a-z] -> 2 + +DFA state 14 + NFA states : + main.#2 + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 9) + (START)->6:[\055]->10:[=]->EOS->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 20 + 1:[\r] -> 20 + 9:[;] -> 8 + NFA exit tags applying : + GOT_NAMEVALUE + GOT_NAMEVALUE + GOT_NAMEVALUE + Attributes for <action> : GOT_NAMEVALUE + +DFA state 15 + NFA states : + main.component#2.namevalue#1.#9 + main.component#2.namevalue#1.optwhite#8.in + main.component#2.namevalue#1.optwhite#8.out + main.component#2.namevalue#1.qvalue#9.in + main.component#2.namevalue#1.#11 + main.component#2.namevalue#1.optwhite#11.in + main.component#2.namevalue#1.optwhite#11.out + main.component#2.namevalue#1.value#12.in + main.component#2.namevalue#1.#13 + main.component#2.namevalue#1.optwhite#14.in + main.component#2.namevalue#1.optwhite#14.out + main.component#2.namevalue#1.#14 + main.component#5.namevalue#1.#9 + main.component#5.namevalue#1.optwhite#8.in + main.component#5.namevalue#1.optwhite#8.out + main.component#5.namevalue#1.qvalue#9.in + main.component#5.namevalue#1.#11 + main.component#5.namevalue#1.optwhite#11.in + main.component#5.namevalue#1.optwhite#11.out + main.component#5.namevalue#1.value#12.in + main.component#5.namevalue#1.#13 + main.component#5.namevalue#1.optwhite#14.in + main.component#5.namevalue#1.optwhite#14.out + main.component#5.namevalue#1.#14 + main.component#9.namevalue#1.#9 + main.component#9.namevalue#1.optwhite#8.in + main.component#9.namevalue#1.optwhite#8.out + main.component#9.namevalue#1.qvalue#9.in + main.component#9.namevalue#1.#11 + main.component#9.namevalue#1.optwhite#11.in + main.component#9.namevalue#1.optwhite#11.out + main.component#9.namevalue#1.value#12.in + main.component#9.namevalue#1.#13 + main.component#9.namevalue#1.optwhite#14.in + main.component#9.namevalue#1.optwhite#14.out + main.component#9.namevalue#1.#14 + + Forward route : (from state 9) + (START)->6:[\055]->10:[=]->0:[\t ]->(HERE) + Transitions : + EOS -> 14 + 0:[\t ] -> 15 + 1:[\r] -> 15 + 2:[!#-),:<>-@[]^`{-~] -> 16 + 3:["] -> 17 + 4:[*] -> 16 + 5:[+.] -> 16 + 6:[\055] -> 16 + 7:[/] -> 16 + 8:[0-9] -> 16 + 10:[=] -> 16 + 11:[A-Z_a-z] -> 16 + +DFA state 16 + NFA states : + main.#2 + main.component#2.namevalue#1.#12 + main.component#2.namevalue#1.value#12.v1 + main.component#2.namevalue#1.value#12.#1 + main.component#2.namevalue#1.value#12.out + main.component#2.namevalue#1.optwhite#13.in + main.component#2.namevalue#1.optwhite#13.out + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.#12 + main.component#5.namevalue#1.value#12.v1 + main.component#5.namevalue#1.value#12.#1 + main.component#5.namevalue#1.value#12.out + main.component#5.namevalue#1.optwhite#13.in + main.component#5.namevalue#1.optwhite#13.out + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.#12 + main.component#9.namevalue#1.value#12.v1 + main.component#9.namevalue#1.value#12.#1 + main.component#9.namevalue#1.value#12.out + main.component#9.namevalue#1.optwhite#13.in + main.component#9.namevalue#1.optwhite#13.out + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 9) + (START)->6:[\055]->10:[=]->2:[!#-),:<>-@[]^`{-~]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 21 + 1:[\r] -> 21 + 2:[!#-),:<>-@[]^`{-~] -> 16 + 4:[*] -> 16 + 5:[+.] -> 16 + 6:[\055] -> 16 + 7:[/] -> 16 + 8:[0-9] -> 16 + 9:[;] -> 8 + 10:[=] -> 16 + 11:[A-Z_a-z] -> 16 + NFA exit tags applying : + GOT_NAMEVALUE + COPY_TO_VALUE + GOT_NAMEVALUE + COPY_TO_VALUE + GOT_NAMEVALUE + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + Attributes for <action> : GOT_NAMEVALUE + +DFA state 17 + NFA states : + main.component#2.namevalue#1.qvalue#9.qv0 + main.component#2.namevalue#1.qvalue#9.escape#1.in + main.component#5.namevalue#1.qvalue#9.qv0 + main.component#5.namevalue#1.qvalue#9.escape#1.in + main.component#9.namevalue#1.qvalue#9.qv0 + main.component#9.namevalue#1.qvalue#9.escape#1.in + + Forward route : (from state 9) + (START)->6:[\055]->10:[=]->3:["]->(HERE) + Transitions : + 0:[\t ] -> 22 + 2:[!#-),:<>-@[]^`{-~] -> 22 + 4:[*] -> 22 + 5:[+.] -> 22 + 6:[\055] -> 22 + 7:[/] -> 22 + 8:[0-9] -> 22 + 9:[;] -> 22 + 10:[=] -> 22 + 11:[A-Z_a-z] -> 22 + 12:[\\] -> 23 + +DFA state 18 + NFA states : + main.component#2.namevalue#1.#8 + main.component#2.namevalue#1.optwhite#7.in + main.component#2.namevalue#1.optwhite#7.out + main.component#5.namevalue#1.#8 + main.component#5.namevalue#1.optwhite#7.in + main.component#5.namevalue#1.optwhite#7.out + main.component#9.namevalue#1.#8 + main.component#9.namevalue#1.optwhite#7.in + main.component#9.namevalue#1.optwhite#7.out + + Forward route : (from state 11) + (START)->6:[\055]->4:[*]->8:[0-9]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 18 + 1:[\r] -> 18 + 10:[=] -> 19 + +DFA state 19 + NFA states : + main.component#2.namevalue#1.rhs_continue + main.component#2.namevalue#1.#15 + main.component#2.namevalue#1.optwhite#15.in + main.component#2.namevalue#1.optwhite#15.out + main.component#2.namevalue#1.qvalue#16.in + main.component#2.namevalue#1.#17 + main.component#2.namevalue#1.optwhite#18.in + main.component#2.namevalue#1.optwhite#18.out + main.component#2.namevalue#1.value#19.in + main.component#5.namevalue#1.rhs_continue + main.component#5.namevalue#1.#15 + main.component#5.namevalue#1.optwhite#15.in + main.component#5.namevalue#1.optwhite#15.out + main.component#5.namevalue#1.qvalue#16.in + main.component#5.namevalue#1.#17 + main.component#5.namevalue#1.optwhite#18.in + main.component#5.namevalue#1.optwhite#18.out + main.component#5.namevalue#1.value#19.in + main.component#9.namevalue#1.rhs_continue + main.component#9.namevalue#1.#15 + main.component#9.namevalue#1.optwhite#15.in + main.component#9.namevalue#1.optwhite#15.out + main.component#9.namevalue#1.qvalue#16.in + main.component#9.namevalue#1.#17 + main.component#9.namevalue#1.optwhite#18.in + main.component#9.namevalue#1.optwhite#18.out + main.component#9.namevalue#1.value#19.in + + Forward route : (from state 11) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->(HERE) + Transitions : + 0:[\t ] -> 24 + 1:[\r] -> 24 + 2:[!#-),:<>-@[]^`{-~] -> 25 + 3:["] -> 26 + 4:[*] -> 25 + 5:[+.] -> 25 + 6:[\055] -> 25 + 7:[/] -> 25 + 8:[0-9] -> 25 + 10:[=] -> 25 + 11:[A-Z_a-z] -> 25 + +DFA state 20 + NFA states : + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 12) + (START)->6:[\055]->7:[/]->5:[+.]->0:[\t ]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 20 + 1:[\r] -> 20 + 9:[;] -> 8 + +DFA state 21 + NFA states : + main.#2 + main.component#2.namevalue#1.optwhite#13.in + main.component#2.namevalue#1.optwhite#13.out + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.optwhite#13.in + main.component#5.namevalue#1.optwhite#13.out + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.optwhite#13.in + main.component#9.namevalue#1.optwhite#13.out + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 16) + (START)->6:[\055]->10:[=]->2:[!#-),:<>-@[]^`{-~]->0:[\t ]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 21 + 1:[\r] -> 21 + 9:[;] -> 8 + NFA exit tags applying : + GOT_NAMEVALUE + GOT_NAMEVALUE + GOT_NAMEVALUE + Attributes for <action> : GOT_NAMEVALUE + +DFA state 22 + NFA states : + main.component#2.namevalue#1.qvalue#9.qv1 + main.component#2.namevalue#1.qvalue#9.#1 + main.component#2.namevalue#1.qvalue#9.escape#2.in + main.component#2.namevalue#1.qvalue#9.qv2 + main.component#5.namevalue#1.qvalue#9.qv1 + main.component#5.namevalue#1.qvalue#9.#1 + main.component#5.namevalue#1.qvalue#9.escape#2.in + main.component#5.namevalue#1.qvalue#9.qv2 + main.component#9.namevalue#1.qvalue#9.qv1 + main.component#9.namevalue#1.qvalue#9.#1 + main.component#9.namevalue#1.qvalue#9.escape#2.in + main.component#9.namevalue#1.qvalue#9.qv2 + + Forward route : (from state 17) + (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 22 + 2:[!#-),:<>-@[]^`{-~] -> 22 + 3:["] -> 27 + 4:[*] -> 22 + 5:[+.] -> 22 + 6:[\055] -> 22 + 7:[/] -> 22 + 8:[0-9] -> 22 + 9:[;] -> 22 + 10:[=] -> 22 + 11:[A-Z_a-z] -> 22 + 12:[\\] -> 28 + NFA exit tags applying : + COPY_TO_VALUE + COPY_TO_VALUE + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + +DFA state 23 + NFA states : + main.component#2.namevalue#1.qvalue#9.escape#1.#1 + main.component#2.namevalue#1.qvalue#9.escape#1.#2 + main.component#5.namevalue#1.qvalue#9.escape#1.#1 + main.component#5.namevalue#1.qvalue#9.escape#1.#2 + main.component#9.namevalue#1.qvalue#9.escape#1.#1 + main.component#9.namevalue#1.qvalue#9.escape#1.#2 + + Forward route : (from state 17) + (START)->6:[\055]->10:[=]->3:["]->12:[\\]->(HERE) + Transitions : + 3:["] -> 29 + 12:[\\] -> 29 + +DFA state 24 + NFA states : + main.component#2.namevalue#1.#15 + main.component#2.namevalue#1.optwhite#15.in + main.component#2.namevalue#1.optwhite#15.out + main.component#2.namevalue#1.qvalue#16.in + main.component#2.namevalue#1.#17 + main.component#2.namevalue#1.optwhite#18.in + main.component#2.namevalue#1.optwhite#18.out + main.component#2.namevalue#1.value#19.in + main.component#5.namevalue#1.#15 + main.component#5.namevalue#1.optwhite#15.in + main.component#5.namevalue#1.optwhite#15.out + main.component#5.namevalue#1.qvalue#16.in + main.component#5.namevalue#1.#17 + main.component#5.namevalue#1.optwhite#18.in + main.component#5.namevalue#1.optwhite#18.out + main.component#5.namevalue#1.value#19.in + main.component#9.namevalue#1.#15 + main.component#9.namevalue#1.optwhite#15.in + main.component#9.namevalue#1.optwhite#15.out + main.component#9.namevalue#1.qvalue#16.in + main.component#9.namevalue#1.#17 + main.component#9.namevalue#1.optwhite#18.in + main.component#9.namevalue#1.optwhite#18.out + main.component#9.namevalue#1.value#19.in + + Forward route : (from state 19) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 24 + 1:[\r] -> 24 + 2:[!#-),:<>-@[]^`{-~] -> 25 + 3:["] -> 26 + 4:[*] -> 25 + 5:[+.] -> 25 + 6:[\055] -> 25 + 7:[/] -> 25 + 8:[0-9] -> 25 + 10:[=] -> 25 + 11:[A-Z_a-z] -> 25 + +DFA state 25 + NFA states : + main.#2 + main.component#2.namevalue#1.#18 + main.component#2.namevalue#1.value#19.v1 + main.component#2.namevalue#1.value#19.#1 + main.component#2.namevalue#1.value#19.out + main.component#2.namevalue#1.optwhite#20.in + main.component#2.namevalue#1.optwhite#20.out + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.#18 + main.component#5.namevalue#1.value#19.v1 + main.component#5.namevalue#1.value#19.#1 + main.component#5.namevalue#1.value#19.out + main.component#5.namevalue#1.optwhite#20.in + main.component#5.namevalue#1.optwhite#20.out + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.#18 + main.component#9.namevalue#1.value#19.v1 + main.component#9.namevalue#1.value#19.#1 + main.component#9.namevalue#1.value#19.out + main.component#9.namevalue#1.optwhite#20.in + main.component#9.namevalue#1.optwhite#20.out + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 19) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->2:[!#-),:<>-@[]^`{-~]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 30 + 1:[\r] -> 30 + 2:[!#-),:<>-@[]^`{-~] -> 25 + 4:[*] -> 25 + 5:[+.] -> 25 + 6:[\055] -> 25 + 7:[/] -> 25 + 8:[0-9] -> 25 + 9:[;] -> 8 + 10:[=] -> 25 + 11:[A-Z_a-z] -> 25 + NFA exit tags applying : + GOT_NAMEVALUE_CONT + COPY_TO_VALUE + GOT_NAMEVALUE_CONT + COPY_TO_VALUE + GOT_NAMEVALUE_CONT + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + Attributes for <action> : GOT_NAMEVALUE_CONT + +DFA state 26 + NFA states : + main.component#2.namevalue#1.qvalue#16.qv0 + main.component#2.namevalue#1.qvalue#16.escape#1.in + main.component#5.namevalue#1.qvalue#16.qv0 + main.component#5.namevalue#1.qvalue#16.escape#1.in + main.component#9.namevalue#1.qvalue#16.qv0 + main.component#9.namevalue#1.qvalue#16.escape#1.in + + Forward route : (from state 19) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->(HERE) + Transitions : + 0:[\t ] -> 31 + 2:[!#-),:<>-@[]^`{-~] -> 31 + 4:[*] -> 31 + 5:[+.] -> 31 + 6:[\055] -> 31 + 7:[/] -> 31 + 8:[0-9] -> 31 + 9:[;] -> 31 + 10:[=] -> 31 + 11:[A-Z_a-z] -> 31 + 12:[\\] -> 32 + +DFA state 27 + NFA states : + main.#2 + main.component#2.namevalue#1.#10 + main.component#2.namevalue#1.qvalue#9.out + main.component#2.namevalue#1.optwhite#10.in + main.component#2.namevalue#1.optwhite#10.out + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.#10 + main.component#5.namevalue#1.qvalue#9.out + main.component#5.namevalue#1.optwhite#10.in + main.component#5.namevalue#1.optwhite#10.out + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.#10 + main.component#9.namevalue#1.qvalue#9.out + main.component#9.namevalue#1.optwhite#10.in + main.component#9.namevalue#1.optwhite#10.out + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 22) + (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->3:["]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 33 + 1:[\r] -> 33 + 9:[;] -> 8 + NFA exit tags applying : + GOT_NAMEVALUE + GOT_NAMEVALUE + GOT_NAMEVALUE + Attributes for <action> : GOT_NAMEVALUE + +DFA state 28 + NFA states : + main.component#2.namevalue#1.qvalue#9.escape#2.#1 + main.component#2.namevalue#1.qvalue#9.escape#2.#2 + main.component#5.namevalue#1.qvalue#9.escape#2.#1 + main.component#5.namevalue#1.qvalue#9.escape#2.#2 + main.component#9.namevalue#1.qvalue#9.escape#2.#1 + main.component#9.namevalue#1.qvalue#9.escape#2.#2 + + Forward route : (from state 22) + (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->12:[\\]->(HERE) + Transitions : + 3:["] -> 34 + 12:[\\] -> 34 + +DFA state 29 + NFA states : + main.component#2.namevalue#1.qvalue#9.escape#1.out + main.component#2.namevalue#1.qvalue#9.qv1 + main.component#2.namevalue#1.qvalue#9.#1 + main.component#2.namevalue#1.qvalue#9.escape#2.in + main.component#2.namevalue#1.qvalue#9.qv2 + main.component#5.namevalue#1.qvalue#9.escape#1.out + main.component#5.namevalue#1.qvalue#9.qv1 + main.component#5.namevalue#1.qvalue#9.#1 + main.component#5.namevalue#1.qvalue#9.escape#2.in + main.component#5.namevalue#1.qvalue#9.qv2 + main.component#9.namevalue#1.qvalue#9.escape#1.out + main.component#9.namevalue#1.qvalue#9.qv1 + main.component#9.namevalue#1.qvalue#9.#1 + main.component#9.namevalue#1.qvalue#9.escape#2.in + main.component#9.namevalue#1.qvalue#9.qv2 + + Forward route : (from state 23) + (START)->6:[\055]->10:[=]->3:["]->12:[\\]->3:["]->(HERE) + Transitions : + 0:[\t ] -> 22 + 2:[!#-),:<>-@[]^`{-~] -> 22 + 3:["] -> 27 + 4:[*] -> 22 + 5:[+.] -> 22 + 6:[\055] -> 22 + 7:[/] -> 22 + 8:[0-9] -> 22 + 9:[;] -> 22 + 10:[=] -> 22 + 11:[A-Z_a-z] -> 22 + 12:[\\] -> 28 + NFA exit tags applying : + COPY_TO_VALUE + COPY_TO_VALUE + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + +DFA state 30 + NFA states : + main.#2 + main.component#2.namevalue#1.optwhite#20.in + main.component#2.namevalue#1.optwhite#20.out + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.optwhite#20.in + main.component#5.namevalue#1.optwhite#20.out + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.optwhite#20.in + main.component#9.namevalue#1.optwhite#20.out + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 25) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->2:[!#-),:<>-@[]^`{-~]->0:[\t ]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 30 + 1:[\r] -> 30 + 9:[;] -> 8 + NFA exit tags applying : + GOT_NAMEVALUE_CONT + GOT_NAMEVALUE_CONT + GOT_NAMEVALUE_CONT + Attributes for <action> : GOT_NAMEVALUE_CONT + +DFA state 31 + NFA states : + main.component#2.namevalue#1.qvalue#16.qv1 + main.component#2.namevalue#1.qvalue#16.#1 + main.component#2.namevalue#1.qvalue#16.escape#2.in + main.component#2.namevalue#1.qvalue#16.qv2 + main.component#5.namevalue#1.qvalue#16.qv1 + main.component#5.namevalue#1.qvalue#16.#1 + main.component#5.namevalue#1.qvalue#16.escape#2.in + main.component#5.namevalue#1.qvalue#16.qv2 + main.component#9.namevalue#1.qvalue#16.qv1 + main.component#9.namevalue#1.qvalue#16.#1 + main.component#9.namevalue#1.qvalue#16.escape#2.in + main.component#9.namevalue#1.qvalue#16.qv2 + + Forward route : (from state 26) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 31 + 2:[!#-),:<>-@[]^`{-~] -> 31 + 3:["] -> 35 + 4:[*] -> 31 + 5:[+.] -> 31 + 6:[\055] -> 31 + 7:[/] -> 31 + 8:[0-9] -> 31 + 9:[;] -> 31 + 10:[=] -> 31 + 11:[A-Z_a-z] -> 31 + 12:[\\] -> 36 + NFA exit tags applying : + COPY_TO_VALUE + COPY_TO_VALUE + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + +DFA state 32 + NFA states : + main.component#2.namevalue#1.qvalue#16.escape#1.#1 + main.component#2.namevalue#1.qvalue#16.escape#1.#2 + main.component#5.namevalue#1.qvalue#16.escape#1.#1 + main.component#5.namevalue#1.qvalue#16.escape#1.#2 + main.component#9.namevalue#1.qvalue#16.escape#1.#1 + main.component#9.namevalue#1.qvalue#16.escape#1.#2 + + Forward route : (from state 26) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->12:[\\]->(HERE) + Transitions : + 3:["] -> 37 + 12:[\\] -> 37 + +DFA state 33 + NFA states : + main.#2 + main.component#2.namevalue#1.optwhite#10.in + main.component#2.namevalue#1.optwhite#10.out + main.component#2.namevalue#1.out_normal + main.component#2.namevalue#1.#19 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.optwhite#10.in + main.component#5.namevalue#1.optwhite#10.out + main.component#5.namevalue#1.out_normal + main.component#5.namevalue#1.#19 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.optwhite#10.in + main.component#9.namevalue#1.optwhite#10.out + main.component#9.namevalue#1.out_normal + main.component#9.namevalue#1.#19 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 27) + (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->3:["]->0:[\t ]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 33 + 1:[\r] -> 33 + 9:[;] -> 8 + NFA exit tags applying : + GOT_NAMEVALUE + GOT_NAMEVALUE + GOT_NAMEVALUE + Attributes for <action> : GOT_NAMEVALUE + +DFA state 34 + NFA states : + main.component#2.namevalue#1.qvalue#9.qv1 + main.component#2.namevalue#1.qvalue#9.#1 + main.component#2.namevalue#1.qvalue#9.escape#2.in + main.component#2.namevalue#1.qvalue#9.escape#2.out + main.component#2.namevalue#1.qvalue#9.qv2 + main.component#5.namevalue#1.qvalue#9.qv1 + main.component#5.namevalue#1.qvalue#9.#1 + main.component#5.namevalue#1.qvalue#9.escape#2.in + main.component#5.namevalue#1.qvalue#9.escape#2.out + main.component#5.namevalue#1.qvalue#9.qv2 + main.component#9.namevalue#1.qvalue#9.qv1 + main.component#9.namevalue#1.qvalue#9.#1 + main.component#9.namevalue#1.qvalue#9.escape#2.in + main.component#9.namevalue#1.qvalue#9.escape#2.out + main.component#9.namevalue#1.qvalue#9.qv2 + + Forward route : (from state 28) + (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->12:[\\]->3:["]->(HERE) + Transitions : + 0:[\t ] -> 22 + 2:[!#-),:<>-@[]^`{-~] -> 22 + 3:["] -> 27 + 4:[*] -> 22 + 5:[+.] -> 22 + 6:[\055] -> 22 + 7:[/] -> 22 + 8:[0-9] -> 22 + 9:[;] -> 22 + 10:[=] -> 22 + 11:[A-Z_a-z] -> 22 + 12:[\\] -> 28 + NFA exit tags applying : + COPY_TO_VALUE + COPY_TO_VALUE + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + +DFA state 35 + NFA states : + main.#2 + main.component#2.namevalue#1.#16 + main.component#2.namevalue#1.qvalue#16.out + main.component#2.namevalue#1.optwhite#17.in + main.component#2.namevalue#1.optwhite#17.out + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.#16 + main.component#5.namevalue#1.qvalue#16.out + main.component#5.namevalue#1.optwhite#17.in + main.component#5.namevalue#1.optwhite#17.out + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.#16 + main.component#9.namevalue#1.qvalue#16.out + main.component#9.namevalue#1.optwhite#17.in + main.component#9.namevalue#1.optwhite#17.out + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 31) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->3:["]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 38 + 1:[\r] -> 38 + 9:[;] -> 8 + NFA exit tags applying : + GOT_NAMEVALUE_CONT + GOT_NAMEVALUE_CONT + GOT_NAMEVALUE_CONT + Attributes for <action> : GOT_NAMEVALUE_CONT + +DFA state 36 + NFA states : + main.component#2.namevalue#1.qvalue#16.escape#2.#1 + main.component#2.namevalue#1.qvalue#16.escape#2.#2 + main.component#5.namevalue#1.qvalue#16.escape#2.#1 + main.component#5.namevalue#1.qvalue#16.escape#2.#2 + main.component#9.namevalue#1.qvalue#16.escape#2.#1 + main.component#9.namevalue#1.qvalue#16.escape#2.#2 + + Forward route : (from state 31) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->12:[\\]->(HERE) + Transitions : + 3:["] -> 39 + 12:[\\] -> 39 + +DFA state 37 + NFA states : + main.component#2.namevalue#1.qvalue#16.escape#1.out + main.component#2.namevalue#1.qvalue#16.qv1 + main.component#2.namevalue#1.qvalue#16.#1 + main.component#2.namevalue#1.qvalue#16.escape#2.in + main.component#2.namevalue#1.qvalue#16.qv2 + main.component#5.namevalue#1.qvalue#16.escape#1.out + main.component#5.namevalue#1.qvalue#16.qv1 + main.component#5.namevalue#1.qvalue#16.#1 + main.component#5.namevalue#1.qvalue#16.escape#2.in + main.component#5.namevalue#1.qvalue#16.qv2 + main.component#9.namevalue#1.qvalue#16.escape#1.out + main.component#9.namevalue#1.qvalue#16.qv1 + main.component#9.namevalue#1.qvalue#16.#1 + main.component#9.namevalue#1.qvalue#16.escape#2.in + main.component#9.namevalue#1.qvalue#16.qv2 + + Forward route : (from state 32) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->12:[\\]->3:["]->(HERE) + Transitions : + 0:[\t ] -> 31 + 2:[!#-),:<>-@[]^`{-~] -> 31 + 3:["] -> 35 + 4:[*] -> 31 + 5:[+.] -> 31 + 6:[\055] -> 31 + 7:[/] -> 31 + 8:[0-9] -> 31 + 9:[;] -> 31 + 10:[=] -> 31 + 11:[A-Z_a-z] -> 31 + 12:[\\] -> 36 + NFA exit tags applying : + COPY_TO_VALUE + COPY_TO_VALUE + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + +DFA state 38 + NFA states : + main.#2 + main.component#2.namevalue#1.optwhite#17.in + main.component#2.namevalue#1.optwhite#17.out + main.component#2.namevalue#1.out_continue + main.component#2.namevalue#1.#20 + main.component#2.namevalue#1.out + main.component#2.out + main.#3 + main.optwhite#3.in + main.optwhite#3.out + main.#5 + main.component#5.namevalue#1.optwhite#17.in + main.component#5.namevalue#1.optwhite#17.out + main.component#5.namevalue#1.out_continue + main.component#5.namevalue#1.#20 + main.component#5.namevalue#1.out + main.component#5.out + main.#6 + main.optwhite#6.in + main.optwhite#6.out + main.#10 + main.component#9.namevalue#1.optwhite#17.in + main.component#9.namevalue#1.optwhite#17.out + main.component#9.namevalue#1.out_continue + main.component#9.namevalue#1.#20 + main.component#9.namevalue#1.out + main.component#9.out + main.#11 + main.optwhite#10.in + main.optwhite#10.out + + Forward route : (from state 35) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->3:["]->0:[\t ]->(HERE) + Transitions : + EOS -> 3 + 0:[\t ] -> 38 + 1:[\r] -> 38 + 9:[;] -> 8 + NFA exit tags applying : + GOT_NAMEVALUE_CONT + GOT_NAMEVALUE_CONT + GOT_NAMEVALUE_CONT + Attributes for <action> : GOT_NAMEVALUE_CONT + +DFA state 39 + NFA states : + main.component#2.namevalue#1.qvalue#16.qv1 + main.component#2.namevalue#1.qvalue#16.#1 + main.component#2.namevalue#1.qvalue#16.escape#2.in + main.component#2.namevalue#1.qvalue#16.escape#2.out + main.component#2.namevalue#1.qvalue#16.qv2 + main.component#5.namevalue#1.qvalue#16.qv1 + main.component#5.namevalue#1.qvalue#16.#1 + main.component#5.namevalue#1.qvalue#16.escape#2.in + main.component#5.namevalue#1.qvalue#16.escape#2.out + main.component#5.namevalue#1.qvalue#16.qv2 + main.component#9.namevalue#1.qvalue#16.qv1 + main.component#9.namevalue#1.qvalue#16.#1 + main.component#9.namevalue#1.qvalue#16.escape#2.in + main.component#9.namevalue#1.qvalue#16.escape#2.out + main.component#9.namevalue#1.qvalue#16.qv2 + + Forward route : (from state 36) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->12:[\\]->3:["]->(HERE) + Transitions : + 0:[\t ] -> 31 + 2:[!#-),:<>-@[]^`{-~] -> 31 + 3:["] -> 35 + 4:[*] -> 31 + 5:[+.] -> 31 + 6:[\055] -> 31 + 7:[/] -> 31 + 8:[0-9] -> 31 + 9:[;] -> 31 + 10:[=] -> 31 + 11:[A-Z_a-z] -> 31 + 12:[\\] -> 36 + NFA exit tags applying : + COPY_TO_VALUE + COPY_TO_VALUE + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + + +Entry states in DFA: +Entry <in> : 0 +Searching for dead states... +(no dead states found) + +----------------------------- +------ COMPRESSING DFA ------ +----------------------------- +Old DFA state 0 becomes 0 +Old DFA state 1 becomes 0 (formerly 0) +Old DFA state 2 becomes 1 +Old DFA state 3 becomes 2 +Old DFA state 4 becomes 3 +Old DFA state 5 becomes 4 +Old DFA state 6 becomes 5 +Old DFA state 7 becomes 6 +Old DFA state 8 becomes 7 +Old DFA state 9 becomes 8 +Old DFA state 10 becomes 9 +Old DFA state 11 becomes 10 +Old DFA state 12 becomes 11 +Old DFA state 13 becomes 12 +Old DFA state 14 becomes 13 +Old DFA state 15 becomes 8 (formerly 9) +Old DFA state 16 becomes 14 +Old DFA state 17 becomes 15 +Old DFA state 18 becomes 16 +Old DFA state 19 becomes 17 +Old DFA state 20 becomes 18 +Old DFA state 21 becomes 19 +Old DFA state 22 becomes 20 +Old DFA state 23 becomes 21 +Old DFA state 24 becomes 17 (formerly 19) +Old DFA state 25 becomes 22 +Old DFA state 26 becomes 23 +Old DFA state 27 becomes 19 (formerly 21) +Old DFA state 28 becomes 21 (formerly 23) +Old DFA state 29 becomes 20 (formerly 22) +Old DFA state 30 becomes 24 +Old DFA state 31 becomes 25 +Old DFA state 32 becomes 26 +Old DFA state 33 becomes 19 (formerly 21) +Old DFA state 34 becomes 20 (formerly 22) +Old DFA state 35 becomes 24 (formerly 30) +Old DFA state 36 becomes 26 (formerly 32) +Old DFA state 37 becomes 25 (formerly 31) +Old DFA state 38 becomes 24 (formerly 30) +Old DFA state 39 becomes 25 (formerly 31) +Entry <in>, formerly state 0, now state 0 +------------------------------- +DFA structure after compression +------------------------------- +DFA state 0 + Forward route : + (START)->(HERE) + Transitions : + 0:[\t ] -> 0 + 1:[\r] -> 0 + 6:[\055] -> 1 + 8:[0-9] -> 1 + 11:[A-Z_a-z] -> 1 + +DFA state 1 + Forward route : (from state 0) + (START)->6:[\055]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 3 + 1:[\r] -> 4 + 4:[*] -> 5 + 6:[\055] -> 1 + 7:[/] -> 6 + 8:[0-9] -> 1 + 9:[;] -> 7 + 10:[=] -> 8 + 11:[A-Z_a-z] -> 1 + NFA exit tags applying : + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + Attributes for <copier> : COPY_TO_NAME + Attributes for <action> : GOT_NAME + +DFA state 2 + Forward route : (from state 1) + (START)->6:[\055]->EOS->(HERE) + Transitions : + NFA exit tags applying : + GOT_TERMINATOR + Attributes for <action> : GOT_TERMINATOR + +DFA state 3 + Forward route : (from state 1) + (START)->6:[\055]->0:[\t ]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 3 + 1:[\r] -> 4 + 4:[*] -> 5 + 6:[\055] -> 9 + 8:[0-9] -> 9 + 9:[;] -> 7 + 10:[=] -> 8 + 11:[A-Z_a-z] -> 9 + Use state 1 as basis (4 fixups) + NFA exit tags applying : + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + GOT_NAME_TRAILING_SPACE + COPY_TO_NAME + Attributes for <copier> : COPY_TO_NAME + Attributes for <action> : GOT_NAME_TRAILING_SPACE + +DFA state 4 + Forward route : (from state 1) + (START)->6:[\055]->1:[\r]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 4 + 1:[\r] -> 4 + 9:[;] -> 7 + 10:[=] -> 8 + +DFA state 5 + Forward route : (from state 1) + (START)->6:[\055]->4:[*]->(HERE) + Transitions : + 8:[0-9] -> 10 + +DFA state 6 + Forward route : (from state 1) + (START)->6:[\055]->7:[/]->(HERE) + Transitions : + 5:[+.] -> 11 + 6:[\055] -> 11 + 8:[0-9] -> 11 + 11:[A-Z_a-z] -> 11 + 12:[\\] -> 11 + +DFA state 7 + Forward route : (from state 1) + (START)->6:[\055]->9:[;]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 12 + 1:[\r] -> 12 + 6:[\055] -> 1 + 8:[0-9] -> 1 + 11:[A-Z_a-z] -> 1 + NFA exit tags applying : + GOT_TERMINATOR + Attributes for <action> : GOT_TERMINATOR + +DFA state 8 + Forward route : (from state 1) + (START)->6:[\055]->10:[=]->(HERE) + Transitions : + EOS -> 13 + 0:[\t ] -> 8 + 1:[\r] -> 8 + 2:[!#-),:<>-@[]^`{-~] -> 14 + 3:["] -> 15 + 4:[*] -> 14 + 5:[+.] -> 14 + 6:[\055] -> 14 + 7:[/] -> 14 + 8:[0-9] -> 14 + 10:[=] -> 14 + 11:[A-Z_a-z] -> 14 + +DFA state 9 + Forward route : (from state 3) + (START)->6:[\055]->0:[\t ]->6:[\055]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 3 + 1:[\r] -> 4 + 4:[*] -> 5 + 6:[\055] -> 9 + 8:[0-9] -> 9 + 9:[;] -> 7 + 10:[=] -> 8 + 11:[A-Z_a-z] -> 9 + Use state 1 as basis (4 fixups) + NFA exit tags applying : + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + GOT_NAME + COPY_TO_NAME + Attributes for <copier> : COPY_TO_NAME + Attributes for <action> : GOT_NAME + +DFA state 10 + Forward route : (from state 5) + (START)->6:[\055]->4:[*]->8:[0-9]->(HERE) + Transitions : + 0:[\t ] -> 16 + 1:[\r] -> 16 + 8:[0-9] -> 10 + 10:[=] -> 17 + +DFA state 11 + Forward route : (from state 6) + (START)->6:[\055]->7:[/]->5:[+.]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 18 + 1:[\r] -> 18 + 5:[+.] -> 11 + 6:[\055] -> 11 + 8:[0-9] -> 11 + 9:[;] -> 7 + 11:[A-Z_a-z] -> 11 + 12:[\\] -> 11 + Use state 6 as basis (4 fixups) + NFA exit tags applying : + GOT_MAJORMINOR + COPY_TO_MINOR + GOT_MAJORMINOR + COPY_TO_MINOR + GOT_MAJORMINOR + COPY_TO_MINOR + Attributes for <copier> : COPY_TO_MINOR + Attributes for <action> : GOT_MAJORMINOR + +DFA state 12 + Forward route : (from state 7) + (START)->6:[\055]->9:[;]->0:[\t ]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 12 + 1:[\r] -> 12 + 6:[\055] -> 1 + 8:[0-9] -> 1 + 11:[A-Z_a-z] -> 1 + Use state 7 as basis (0 fixups) + +DFA state 13 + Forward route : (from state 8) + (START)->6:[\055]->10:[=]->EOS->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 18 + 1:[\r] -> 18 + 9:[;] -> 7 + NFA exit tags applying : + GOT_NAMEVALUE + GOT_NAMEVALUE + GOT_NAMEVALUE + Attributes for <action> : GOT_NAMEVALUE + +DFA state 14 + Forward route : (from state 8) + (START)->6:[\055]->10:[=]->2:[!#-),:<>-@[]^`{-~]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 19 + 1:[\r] -> 19 + 2:[!#-),:<>-@[]^`{-~] -> 14 + 4:[*] -> 14 + 5:[+.] -> 14 + 6:[\055] -> 14 + 7:[/] -> 14 + 8:[0-9] -> 14 + 9:[;] -> 7 + 10:[=] -> 14 + 11:[A-Z_a-z] -> 14 + Use state 8 as basis (5 fixups) + NFA exit tags applying : + GOT_NAMEVALUE + COPY_TO_VALUE + GOT_NAMEVALUE + COPY_TO_VALUE + GOT_NAMEVALUE + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + Attributes for <action> : GOT_NAMEVALUE + +DFA state 15 + Forward route : (from state 8) + (START)->6:[\055]->10:[=]->3:["]->(HERE) + Transitions : + 0:[\t ] -> 20 + 2:[!#-),:<>-@[]^`{-~] -> 20 + 4:[*] -> 20 + 5:[+.] -> 20 + 6:[\055] -> 20 + 7:[/] -> 20 + 8:[0-9] -> 20 + 9:[;] -> 20 + 10:[=] -> 20 + 11:[A-Z_a-z] -> 20 + 12:[\\] -> 21 + +DFA state 16 + Forward route : (from state 10) + (START)->6:[\055]->4:[*]->8:[0-9]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 16 + 1:[\r] -> 16 + 10:[=] -> 17 + +DFA state 17 + Forward route : (from state 10) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->(HERE) + Transitions : + 0:[\t ] -> 17 + 1:[\r] -> 17 + 2:[!#-),:<>-@[]^`{-~] -> 22 + 3:["] -> 23 + 4:[*] -> 22 + 5:[+.] -> 22 + 6:[\055] -> 22 + 7:[/] -> 22 + 8:[0-9] -> 22 + 10:[=] -> 22 + 11:[A-Z_a-z] -> 22 + +DFA state 18 + Forward route : (from state 11) + (START)->6:[\055]->7:[/]->5:[+.]->0:[\t ]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 18 + 1:[\r] -> 18 + 9:[;] -> 7 + Use state 13 as basis (0 fixups) + +DFA state 19 + Forward route : (from state 14) + (START)->6:[\055]->10:[=]->2:[!#-),:<>-@[]^`{-~]->0:[\t ]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 19 + 1:[\r] -> 19 + 9:[;] -> 7 + NFA exit tags applying : + GOT_NAMEVALUE + GOT_NAMEVALUE + GOT_NAMEVALUE + Attributes for <action> : GOT_NAMEVALUE + +DFA state 20 + Forward route : (from state 15) + (START)->6:[\055]->10:[=]->3:["]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 20 + 2:[!#-),:<>-@[]^`{-~] -> 20 + 3:["] -> 19 + 4:[*] -> 20 + 5:[+.] -> 20 + 6:[\055] -> 20 + 7:[/] -> 20 + 8:[0-9] -> 20 + 9:[;] -> 20 + 10:[=] -> 20 + 11:[A-Z_a-z] -> 20 + 12:[\\] -> 21 + Use state 15 as basis (1 fixups) + NFA exit tags applying : + COPY_TO_VALUE + COPY_TO_VALUE + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + +DFA state 21 + Forward route : (from state 15) + (START)->6:[\055]->10:[=]->3:["]->12:[\\]->(HERE) + Transitions : + 3:["] -> 20 + 12:[\\] -> 20 + +DFA state 22 + Forward route : (from state 17) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->2:[!#-),:<>-@[]^`{-~]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 24 + 1:[\r] -> 24 + 2:[!#-),:<>-@[]^`{-~] -> 22 + 4:[*] -> 22 + 5:[+.] -> 22 + 6:[\055] -> 22 + 7:[/] -> 22 + 8:[0-9] -> 22 + 9:[;] -> 7 + 10:[=] -> 22 + 11:[A-Z_a-z] -> 22 + Use state 17 as basis (5 fixups) + NFA exit tags applying : + GOT_NAMEVALUE_CONT + COPY_TO_VALUE + GOT_NAMEVALUE_CONT + COPY_TO_VALUE + GOT_NAMEVALUE_CONT + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + Attributes for <action> : GOT_NAMEVALUE_CONT + +DFA state 23 + Forward route : (from state 17) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->(HERE) + Transitions : + 0:[\t ] -> 25 + 2:[!#-),:<>-@[]^`{-~] -> 25 + 4:[*] -> 25 + 5:[+.] -> 25 + 6:[\055] -> 25 + 7:[/] -> 25 + 8:[0-9] -> 25 + 9:[;] -> 25 + 10:[=] -> 25 + 11:[A-Z_a-z] -> 25 + 12:[\\] -> 26 + +DFA state 24 + Forward route : (from state 22) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->2:[!#-),:<>-@[]^`{-~]->0:[\t ]->(HERE) + Transitions : + EOS -> 2 + 0:[\t ] -> 24 + 1:[\r] -> 24 + 9:[;] -> 7 + NFA exit tags applying : + GOT_NAMEVALUE_CONT + GOT_NAMEVALUE_CONT + GOT_NAMEVALUE_CONT + Attributes for <action> : GOT_NAMEVALUE_CONT + +DFA state 25 + Forward route : (from state 23) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->0:[\t ]->(HERE) + Transitions : + 0:[\t ] -> 25 + 2:[!#-),:<>-@[]^`{-~] -> 25 + 3:["] -> 24 + 4:[*] -> 25 + 5:[+.] -> 25 + 6:[\055] -> 25 + 7:[/] -> 25 + 8:[0-9] -> 25 + 9:[;] -> 25 + 10:[=] -> 25 + 11:[A-Z_a-z] -> 25 + 12:[\\] -> 26 + Use state 23 as basis (1 fixups) + NFA exit tags applying : + COPY_TO_VALUE + COPY_TO_VALUE + COPY_TO_VALUE + Attributes for <copier> : COPY_TO_VALUE + +DFA state 26 + Forward route : (from state 23) + (START)->6:[\055]->4:[*]->8:[0-9]->10:[=]->3:["]->12:[\\]->(HERE) + Transitions : + 3:["] -> 25 + 12:[\\] -> 25 + + +Entry states in DFA: +Entry <in> : 0 diff --git a/src/mairix/nvptypes.h b/src/mairix/nvptypes.h @@ -0,0 +1,43 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2006,2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#ifndef NVPTYPES_H +#define NVPTYPES_H + +enum nvp_action { + GOT_NAMEVALUE, + GOT_NAMEVALUE_CONT, + GOT_NAME, + GOT_NAME_TRAILING_SPACE, + GOT_MAJORMINOR, + GOT_TERMINATOR, + GOT_NOTHING +}; + +enum nvp_copier { + COPY_TO_NAME, + COPY_TO_MINOR, + COPY_TO_VALUE, + COPY_NOWHERE +}; + +#endif diff --git a/src/mairix/old_docs/mairix.texi b/src/mairix/old_docs/mairix.texi @@ -0,0 +1,885 @@ +\input texinfo +@c {{{ Main header stuff +@afourwide +@paragraphindent 0 +@setfilename mairix.info +@settitle User guide for the mairix program +@c @setchapternewpage off + +@ifinfo +@dircategory Utilities +@direntry +* mairix: (mairix). Indexing/searching utility for maildir folders +@end direntry +@end ifinfo + +@titlepage +@sp 10 +@title The mairix program +@subtitle This manual describes how to use +@subtitle the mairix program for indexing and +@subtitle searching email messages stored in maildir folders. +@author Richard P. Curnow +@page +@vskip 0pt plus 1filll +Copyright @copyright{} 2002,2003,2004,2005 Richard P. Curnow +@end titlepage + +@contents +@c }}} + +@ifnottex +@node Top +@top +@menu +* Introduction:: +* Installation:: Compiling and installing the software +* Use:: Quickstart guide and examples of use +@end menu +@end ifnottex + +@node Introduction +@chapter Introduction +@menu +* Background:: How mairix came to be written. +@end menu + +@node Background +@section Background +The @emph{mairix} program arose from a need to index and search 100's or 1000's +of email messages in an efficient way. It began life supporting just Maildir +format folder, but now MH and mbox formats are also supported. + +I use the @emph{mutt} email client. @emph{mutt} has a feature called +@emph{limit}, where the display of messages in the current folder can be +filtered based on matching regular expressions in particular parts of the +messages. I find this really useful. But there is a snag - it only works on +the current folder. If you have messages spread across many folders, you're +out of luck with limit. OK - so why not keep all messages in a single folder? +The problem is that the performance drops badly. This is true regardless of +folder format - mbox, maildir etc, though probably worse for some formats than +others depending on the sizes of messages in the folders. + +So on the one hand, we want small folders to keep the performance high. But on +the other hand, we want useful searching. + +I use the maildir format for my incoming folders. This scheme has one file per +message. On my inboxes@footnote{of which I have many, because I (naturally) +use @emph{procmail} to split my incoming mail}, I like this for 2 reasons : + +@itemize @bullet +@item Fast deletion of messages I don't want to keep (spam, circulars, mailing +list threads I'm not interested in etc). (Compare mbox, where the whole file +would need to be rewritten.) +@item No locking issues whatever. Maybe I'm over cautious, but I don't really +trust all that locking stuff to protect a single mbox file in all cases, and a +single file seems just too vulnerable to corruption.) Also, I sometimes read +the mail over NFS mounted filesystems, where locking tends to be a real +disaster area. +@end itemize + +Since I'm using maildir for inboxes, I've traditionally used it for all my +folders, for uniformity. + +So, I hear you ask, if you use a one-file-per-message format, why not just use +find + egrep to search for messages? I saw the following problems with this: + +@itemize @bullet +@item What if I want to find all messages to/cc me, from Homer Simpson, dated +between 1 and 2 months ago, with the word "wubble" in the body? This would +involve a pretty nasty set of regexps in a pipeline of separate egreps (and +bear in mind, headers could be split over line boundaries...) +@item What if the message body has quoted-printable (or worse, base64) transfer +encoding? The egrep for "wubble" could come very unstuck. +@item How would the matching messages be conveniently arranged into a new +folder to allow browsing with mutt? +@item What if I wanted to see all messages in the same threads as those +matching the above condition? +@item If I had 1000's of messages, this wasn't going to be quick, especially if +I wanted to keep tuning the search condition.@footnote{This may be a non-issue +for people with the lastest technology under their desk, but at the time I +started writing mairix, I had a 1996 model 486 at home}. +@end itemize + +So find + egrep was a non-starter. I looked around for other technology. I +found @emph{grepmail}, but this only works for mbox format folders, and +involved scanning each message every time (so lost on the speed issue). + +I decided that this was going to be my next project, and mairix was born. By +the way, the name originally came from abbreviating @emph{MAildIR IndeX}, but +this is now an anachronism since MH and mbox are supported too. + +@node Installation +@chapter Installation + +There is not much to this. In the simplest case you can just do + +@example +./configure +make +make install +@end example + +You need to be root to run the final step unless you're installing under your +own home directory somewhere. + +However, you might want to tune the options further. The @file{configure} +script shares its common options with the usual autoconf-generated scripts, +even though it's not autoconf-generated itself. For example, a fuller build could use + +@example +CC=gcc CFLAGS="-O2 -Wall" ./configure \ + --prefix=/opt/mairix \ + --infodir=/usr/share/info +make +make install +make docs +make install_docs +@end example + +The final step is to create a @file{~/.mairixrc} file. An example is included +in the file @file{dotmairixrc.eg}. Just copy that to @file{~/.mairixrc} and edit +it. + +@node Use +@chapter Use + +@menu +* use_intro:: Overview of use +* capabilities:: Indexing strategy and search capabilities +* mairixrc:: The @file{~/.mairixrc} file +* mfolder_setup:: Setting up the match folder +* command_line:: Command line options +* date_syntax:: Syntax used for date searches +@end menu + +@node use_intro +@section Overview of use + +@emph{mairix} has two modes of use : index building and searching. The +searching mode runs whenever the command line contains any expressions to +search for. Otherwise, the indexing mode is run. + +To begin with, an indexing run must be performed before searching will work at +all. Otherwise your search will be operating on an empty database and won't +produce any output. + +The output of the search mode is usually placed in a @emph{match folder}. You +can select the type of folder that is used. For Maildir, it is just a normal +maildir directory (i.e. containing @file{new}, @file{tmp} and @file{cur}) +subdirectories. If you select MH it is a directory containing entries with +numerical filenames, so you can open it as a normal MH folder in your mail +program. If you select mbox, it is a single file in mbox format. + +You configure the path for the match folder in your @file{~/.mairixrc} file. +When writing to a mfolder in maildir or MH format, mairix will populate it with +symbolic links pointing to the paths of the real messages that were matched by +the search expression.@footnote{Although symlinks use up more inodes than hard +links, I decided they were more useful because it makes it possible to see the +filenames of the original messages via @command{ls -l}.} If a message in a +mbox folder matches, mairix will copy the message contents to a single file in +the mfolder directory. + +If the mfolder is in mbox format, mairix will copy the message contents of each +matching message into the mfolder file. (There is no way of exploiting +symlinks to avoid the copying in this case.) + +If desired, mairix can produce just a list of files that match the search +expression and omit the building of the match folder (the so-called 'raw' +output mode). This mode of operation may be useful in communicating the +results of the search to other programs. + +@node capabilities +@section Indexing strategy and search capabilities + +@emph{mairix} works exclusively in terms of @emph{words}. The index that's +built in non-search mode contains a table of which words occur in which +messages. Hence, the search capability is based on finding messages that +contain particular words. @emph{mairix} defines a word as any string of +alphanumeric characters + underscore. Any whitespace, punctuation, hyphens etc +are treated as word boundaries. + +@emph{mairix} has special handling for the @t{To:}, @t{Cc:} and @t{From:} +headers. Besides the normal word scan, these headers are scanned a second +time, where the characters @samp{@@}, @samp{-} and @samp{.} are also treated as +word characters. This allows most (if not all) email addresses to appear in +the database as single words. So if you have a mail from +@t{wibble@@foobar.zzz}, it will match on both these searches + +@example +mairix f:foobar +mairix f:wibble@@foobar.zzz +@end example + +It should be clear by now that the searching cannot be used to find messages +matching general regular expressions. Personally, I don't find that much use +anyway for locating old messages - I'm far more likely to remember particular +keywords that were in the messages, or details of the recipients, or the +approximate date. + +It's also worth pointing out that there is no 'locality' information stored, so +you can't search for messages that have one words 'close' to some other word. +For every message and every word, there is a simple yes/no condition stored - +whether the message contains the word in a particular header or in the body. +So far this has proved to be adequate. mairix has a similar feel to using an +Internet search engine. + +There are three further searching criteria that are supported (besides word +searching): + +@itemize @bullet +@item Searching for messages whose @t{Date:} header is in a particular range +@item Searching for messages whose size is in a particular range. (I see this +being used mainly for finding 'huge' messages, as you're most likely to want to +cull these to recover disc space.) +@item Searching for messages with a particular substring in their paths. You +can use this feature to limit the search to particular folders in your mail +hierarchy, for example. +@end itemize + +@node mairixrc +@section The @file{~/.mairixrc} file + +@subsection Overview + +This file contains information about where you keep your mail folders, where +you want the index file to be stored and where you want the match folder to +be, into which the search mode places the symlinks. + +mairix searches for this file at @file{~/.mairixrc} unless you specify the +@samp{-f} command line option. + +If a # character appears in the file, the rest of that line is ignored. This +allows you to specify comments. + +There are 3 entries (@samp{base}, @samp{mfolder} and @samp{database}) that must +appear in the file. Also at least one of @samp{maildir}, @samp{mh} and +@samp{mbox} must appear. Optionally, the @samp{mformat} entry may +appear. An example illustrates: + +@example +base=/home/richard/mail +maildir=new-mail:new-chrony +maildir=recent...:ancient... +mh=an_mh_folder +mbox=archive1:archive2 +mfolder=mfolder +mformat=maildir +database=/home/richard/.mairix_database +@end example + +@subsection mairixrc file keys +The keys are as follows: + +@table @asis +@item base +This is the path to the common parent directory of all your maildir folders. +@item maildir +This is a colon-separated list of the Maildir folders (relative to @samp{base}) +that you want indexed. Any entry that ends @samp{...} is recursively scanned +to find any Maildir folders underneath it. + +More than one line starting with @samp{maildir} can be included. In this case, +mairix joins the lines together with colons as though a single list of folders had +been given on a single very long line. + +Each colon-separated entry may be a wildcard. See the discussion under mbox (below) for the +wildcard syntax. For example + +@example +maildir=zzz/foo*... +@end example + +will match maildir folders like these (relative to the folder_base) + +@example +zzz/foobar/xyz +zzz/fooquux +zzz/foo +zzz/fooabc/u/v/w +@end example + +and + +@example +maildir=zzz/foo[abc]* +@end example + +will match maildir folders like these (relative to the folder_base) + +@example +zzz/fooa +zzz/fooaaaxyz +zzz/foobcd +zzz/fooccccccc +@end example + +If a folder name contains a colon, you can write this by using the sequence +@samp{\:} to escape the colon. Otherwise, the backslash character is treated +normally. (If the folder name actually contains the sequence @samp{\:}, you're +out of luck.) + +@item mh +This is a colon-separated list of the MH folders (relative to @samp{base}) that +you want indexed. Any entry that ends @samp{...} is recursively scanned to +find any MH folders underneath it. + +More than one line starting with @samp{mh} can be included. In this case, +mairix joins the lines together with colons as though a single list of folders had +been given on a single very long line. + +Each colon-separated entry may be a wildcard, see the discussion under maildir +(above) and mbox (below) for the syntax and semantics of specifying wildcards. + +@item mbox +This is a colon-separated list of the mbox folders (relative to @samp{base}) that +you want indexed. + +Each colon-separated item in the list can be suffixed by @samp{...}. If the +item matches a regular file, that file is treated as a mbox folder and the +@samp{...} suffix is ignored. If the item matches a directory, a recursive +scan of everything inside that directory is made, and all regular files are +initially considered as mbox folders. (Any directories found in this scan are +themselves scanned, since the scan is recursive.) + +Each colon-separated item may contain wildcard operators, but only in its final +path component. The wildcard operators currently supported are + +@table @asis +@item * +Match zero or more characters (each character matched is arbitrary) +@item ? +Match exactly one arbitrary character +@item [abcs-z] +Character class : match a single character from the set a, b, c, s, t, u, v, w, +x, y and z. + +To include a literal @samp{]} in the class, place it immediately after the opening @samp{[}. +To include a literal @samp{-} in the class, place it immediately before the closing @samp{]}. + +@end table + +If these metacharacters are included in non-final path components, they have no +special meaning. + +Here are some examples + +@table @asis +@item mbox=foo/bar* +matches @file{foo/bar}, @file{foo/bar1}, @file{foo/barrrr} etc +@item mbox=foo*/bar* +matches @file{foo*/bar}, @file{foo*/bar1}, @file{foo*/barrrr} etc +@item mbox=foo/* +matches @file{foo/bar}, @file{foo/bar1}, @file{foo/barrrr}, @file{foo/foo}, @file{foo/x} etc +@item mbox=foo... +matches any regular file in the tree rooted at @file{foo} +@item mbox=foo/*... +same as before +@item mbox=foo/[a-z]*... +matches @file{foo/a}, @file{foo/aardvark/xxx}, @file{foo/zzz/foobar}, +@file{foo/w/x/y/zzz}, but @b{not} @file{foo/A/foobar} +@end table + +Regular files that are mbox folder candidates are examined internally. Only +files containing standard mbox @samp{From } separator lines will be scanned for +messages. + +If a regular file has a name ending in @file{.gz}, and gzip support is compiled +into the mairix binary, the file will be treated as a gzipped mbox. + +If a regular file has a name ending in @file{.bz2}, and bzip support is compiled +into the mairix binary, the file will be treated as a bzip2'd mbox. + +More than one line starting with @samp{mbox} can be included. In this case, +mairix joins the lines together with colons as though a single list of folders had +been given on a single very long line. + +mairix performs @b{no} locking of mbox folders when it is accessing them. If a +mail delivery program is modifying the mbox at the same time, it is likely that +one or messages in the mbox will never get indexed by mairix (until the +database is removed and recreated from scratch, anyway.) The assumption is +that mairix will be used to index archive folders rather than incoming ones, so +this is unlikely to be much of a problem in reality. + +@emph{mairix} can support a maximum of 65536 separate mboxes, and a maximum of +65536 messages within any one mbox. + +@item omit +This is a colon-separated list of glob patterns for folders to be +omitted from the indexing. This allows wide wildcards to be used in the +@emph{maildir}, @emph{mh} and @emph{mbox} arguments, with the @emph{omit} +option used to selectively remove unwanted folders from the folder lists. +Within the glob patterns, a single @samp{*} matches any sequence of characters +other than @samp{/}. However @samp{**} matches any sequence of characters +including @samp{/}. This allows glob patterns to be constructed which have a +wildcard for just one directory component, or for any number of directory +components. + +The @emph{omit} option can be specified as many times as required so that the +list of patterns doesn't all have to fit on one line. + +As an example, + +@example +mbox=bulk... +omit=bulk/spam* +@end example + +will index all mbox folders at any level under the @file{bulk} subdirectory of +the base folder, except for those folders whose names start @file{bulk/spam}, +e.g. @file{bulk/spam}, @file{bulk/spam2005} etc. In constrast, + +@example +mbox=bulk... +omit=bulk/spam** +@end example + +will index all mbox folders at any level under the @file{bulk} subdirectory of +the base folder, except for those folders whose names start @file{bulk/spam}, +e.g. @file{bulk/spam}, @file{bulk/spam2005}, @file{bulk/spam/2005}, +@file{bulk/spam/2005/jan} etc. + +@item nochecks +This takes no arguments. If a line starting with @samp{nochecks} is present, +it is the equivalent of specifying the @samp{-Q} flag to every indexing run. + +@item mfolder +This defines the name of the @emph{match} folder (within the directory +specified by @samp{base}) into which the search mode writes its output. +(If the mformat used is @samp{raw}, then this setting is not +used and may be excluded.) + +If the first character of the @b{mfolder} value is @samp{/} or @samp{.}, it is +taken as a pathname in its own right. This allows you to specify absolute +paths and paths relative to the current directory where the mfolder should be +written. Otherwise, the value of @b{mfolder} is appended to the value of +@b{base}, in the same way as for the source folders. + +@item mformat +This defines the type of folder used for the @emph{match folder} where the +search results go. There are four valid settings for this @samp{mh}, +@samp{maildir}, @samp{mbox} or @samp{raw}. If the @samp{raw} setting is used then +mairix will just print out the path names of the files that match and +no match folder will be created. @samp{maildir} is the default if this +option is not defined. The setting is case-insensitive. + +@item database +This defines the path where mairix's index database is kept. You can keep this +file anywhere you like. +@end table + +It is illegal to have a folder listed twice. Once mairix has built a list of +all the messages currently in your folders, it will search for duplicates +before proceeding. If any duplicates are found (arising from the same folder +being specified twice), it will give an error message and exit. This is to +prevent corrupting the index database file. + +@subsection mairixrc expansions + +The part of each line in @file{.mairixrc} following the equals sign can contain +the following types of expansion: + +@table @asis +@item Home directory expansion +If the sequence @samp{~/} appears at the start of the text after the equals +sign, it is expanded to the user's home directory. Example: + +@example +database=~/Mail/mairix_database +@end example + +@item Environment expansion +If a @samp{$} is followed by a sequence of alpha-numeric characters (or +@samp{_}), the whole string is replaced by looking up the corresponding +environment variable. Similarly, if @samp{$} is followed by an open brace +(@samp{@{}), everything up to the next close brace is looked up as an +environment variable and the result replaces the entire sequence. + +Suppose in the shell we do +@example +export FOO=bar +@end example + +and the @file{.mairixrc} file contains +@example +maildir=xxx/$FOO +mbox=yyy/a$@{FOO@}b +@end example + +this is equivalent to +@example +maildir=xxx/bar +mbox=yyy/abarb +@end example + +If the specified environment variable is not set, the replacement is the empty +string. + +@end table + +@node mfolder_setup +@section Setting up the match folder +If the match folder does not exist when running in search mode, it is +automatically created. For @samp{mformat=maildir} (the default), this +should be all you need to do. If you use @samp{mformat=mh}, you may +have to run some commands before your mailer will recognize the folder. e.g. +for mutt, you could do + +@example +mkdir -p /home/richard/Mail/mfolder +touch /home/richard/Mail/mfolder/.mh_sequences +@end example + +which seems to work. Alternatively, within mutt, you could set @var{mbox_type} +to @samp{mh} and save a message to @samp{+mfolder} to have mutt set up the +structure for you in advance. + +If you use Sylpheed, the best way seems to be to create the new folder from +within Sylpheed before letting mairix write into it. This seems to be all you +need to do. + +@node command_line +@section Command line options + +The command line syntax is + +For indexing mode: +@example +mairix [-f path] [-p] [-v] [-Q] +@end example +For search mode +@example +mairix [-f path] [-t] [-v] [-a] [-r] [-o mfolder] expr1 [expr2] ... [exprn] +@end example +For database dump mode +@example +mairix [-f path] -d +@end example + +The @samp{-f} or @samp{--rcfile} flag allows a different path to the +@file{mairixrc} file to be given, replacing the default of @file{~/.mairixrc}. + +The @samp{-p} or @samp{--purge} flag is used in indexing mode. Indexing works +incrementally. When new messages are found, they are scanned and information +about the words they contain is appended onto the existing information. When +messages are deleted, holes are normally left in the message sequence. These +holes take up space in the database file. This flag will compress the deleted +paths out of the database to save space. Additionally, where @samp{mbox} +folders are in use, information in the database about folders that no longer +exist, or which are no longer referenced in the rc-file, will be purged also. + +The @samp{-v} or @samp{--verbose} flag is used in indexing mode. It causes +more information to be shown during the indexing process. In search mode, it +causes debug information to be shown if there are problems creating the +symlinks. (Normally this would be an annoyance. If a message matches multiple +queries when using @samp{-a}, mairix will try to create the same symlink +multiple times. This prevents the same message being shown multiple times in +the match folder.) + +The @samp{-Q} or @samp{--no-integrity-checks} flag is used in indexing mode. +Normally, mairix will do various integrity checks on the database after loading +it in, and before writing the modified database out again. The checking helps +to detect mairix bugs much earlier, but it has a performance penalty. This +flag skips the checks, at the cost of some loss in robustness. See also the +@samp{nochecks} directive in @ref{mairixrc}. + +The @samp{--unlock} flag is used in any mode. mairix dot-locks the database +file to prevent corruption due to concurrent accesses. If the process holding +the lock exits prematurely for any reason, the lockfile will be left behind. +By using the @samp{--unlock} option, an unwanted lockfile can be conveniently +removed. + +The @samp{-t} or @samp{--threads} option applies to search mode. Normally, +only the messages matching all the specified expressions are included in the +@emph{match folder} that is built. With the @samp{-t} flag, any message in +the same thread as one of the matched messages will be included too. Note, the +threading is based on processing the @t{Message-ID}, @t{In-Reply-To} and +@t{References} headers in the messages. Some mailers don't generate these +headers in a co-operative way and will cause problems with this threading +support. (Outlook seems to be one culprit.) If you are plagued by this +problem, the 'edit threads' patch to mutt may be useful to you. + +The @samp{-d} or @samp{--dump} option causes mairix to dump the database +contents in human-readable form to stdout. It is mainly for use in debugging. +If this option is specified, neither indexing nor searching are performed. + +The @samp{-a} or @samp{--augment} option also applies to search mode. +Normally, the first action of the search mode is to clear any existing message +links from the match folder. With the @samp{-a} flag, this step is +suppressed. It allows the folder contents to be built up by matching with 2 or +more diverse sets of match expressions. If this mode is used, and a message +matches multiple queries, only a single symlink will be created for it. + +The @samp{-r} or @samp{--raw-output} option is used to force the raw output +mode for a particular search, in preference to the output format defined by the +@samp{mformat} line in the @file{mairixrc} file. This may be useful for +identifying which mbox contains a particular match, since there is way to see +this when the matching messages are placed in the mfolder in this case. (Note +for matches in maildir and MH folders when @samp{mformat} is maildir or MH, the +symbolic links in the mfolder will show the path to the matching message.) + +The @samp{-o} or @samp{--mfolder} option is used in search mode to specify a +match folder different to the one specified in the @file{mairixrc} to be +used. The path given by the @samp{mfolder} argument after this flag is +relative to the folder base directory given in the @file{mairixrc} file, in the +same way as the directory in the mfolder specification in that file is. So if +your @file{mairixrc} file contains + +@example +base=/home/foobar/Mail +@end example + +and you run mairix like this + +@example +mairix -o mfolder2 make,money,fast +@end example + +mairix will find all of your saved junk emails containing these three words and +put the results into @file{/home/foobar/Mail/mfolder2}. + +The @samp{-o} argument obeys the same conventions regarding initial @samp{/} +and @samp{.} characters as the @b{mfolder} line in the @file{.mairixrc} file +does. + +@emph{Mairix} will refuse to output search results (whether specified +by the @samp{-o} or in the @file{.mairixrc} file) into one of the +folders that are indexed; it figures out that list by looking in the +@file{.mairixrc} file, or in the file you specify using the @samp{-f} +option. This sanity check prevents you inadvertantly destroying one +of your important folders (but won't catch all such cases, sadly). + +The search mode runs when there is at least one search expression. Search +expressions can take forms such as (in increasing order of complexity): + +@itemize @bullet +@item A date expression. The format for specifying the date is described in section @ref{date_syntax}. + +@item A size expression. This matches all messages whose size in bytes is in a +particular range. For example, to match all messages bigger than 1 Megabyte +the following command can be used + +@example +mairix z:1m- +@end example + +To match all messages between 10kbytes and 20kbytes in size, the following +command can be used: + +@example +mairix z:10k-20k +@end example + +@item A word, e.g. @samp{pointer}. This matches any message with the word +@samp{pointer} in the @t{To}, @t{Cc}, @t{From} or @t{Subject} headers, or in +the message body.@footnote{Message body is taken to mean any body part of type +text/plain or text/html. For text/html, text within meta tags is ignored. In +particular, the URLs inside <A HREF="..."> tags are not currently indexed. +Non-text attachments are ignored. If there's an attachment of type +message/rfc822, this is parsed and the match is performed on this sub-message +too. If a hit occurs, the enclosing message is treated as having a hit.} + +@item A word in a particular part of the message, e.g. @samp{s:pointer}. This +matches any message with the word @samp{pointer} in the subject. The +qualifiers for this are : + +@table @asis +@item @t{t:pointer} +to match @samp{pointer} in the @t{To:} header, +@item @t{c:pointer} +to match @samp{pointer} in the @t{Cc:} header, +@item @t{a:pointer} +to match @samp{pointer} in the @t{To:}, @t{Cc:} or @t{From:} headers (@samp{a} meaning @samp{address}), +@item @t{f:pointer} +to match @samp{pointer} in the @t{From:} header, +@item @t{s:pointer} +to match @samp{pointer} in the @t{Subject:} header, +@item @t{b:pointer} +to match @samp{pointer} in the message body. +@item @t{m:pointer} +to match messages having a Message-ID header of @samp{pointer}. +@end table + +Multiple fields may be specified, e.g. @t{sb:pointer} to match in the +@t{Subject:} header or the body. + +@item A negated word, e.g. @samp{s:~pointer}. This matches all messages that +don't have the word @samp{pointer} in the subject line. + +@item A substring match, e.g. @samp{s:point=}. This matches all messages +containing a word in their subject line where the word has @samp{point} as a +substring, e.g. @samp{pointer}, @samp{disappoint}. + +@item An approximate match, e.g. @samp{s:point=1}. This matches all messages +containing a word in their subject line where the word has @samp{point} as a +substring with at most one error, e.g. @samp{jointed} contains @samp{joint} +which can be got from @samp{point} with one letter changed. An error can be a +single letter changed, inserted or deleted. + +@item A left-anchored substring match, e.g. @samp{s:^point=}. This matches all +messages containing a word in their subject line where the word begins with the +string @samp{point}. (This feature is intended to be useful for inflected +languages where the substring search is used to avoid the grammatical ending on +the word.) This left-anchored facility can be combined with the approximate +match facility, e.g. @samp{s:^point=1}. + +Note, if the @samp{^} prefix is used without the @samp{=} suffix, it is ignored. +For example, @samp{s:^point} means the same thing as @samp{s:point}. + +@item A disjunction, e.g. @samp{s:pointer/dereference}. This matches all +messages with one or both of the words @samp{pointer} and @samp{dereference} in +their subject lines. + +@item Each disjunction may be a conjunction, e.g. +@samp{s:null,pointer/dereference=2} matches all messages whose subject lines +either contain both the words @samp{null} and @samp{pointer}, or contain the +word @samp{dereference} with up to 2 errors (or both). + +@item A path expression. This matches all messages with a particular substring +in their path. The syntax is very similar to that for words within the message +(above), and all the rules for @samp{+}, @samp{,}, approximate matching etc are +the same. The word prefix used for a path expression is @samp{p:}. Examples: + +@example +mairix p:/archive/ +@end example + +matches all messages with @samp{/archive/} in their path, and + +@example +mairix p:wibble=1 s:wibble=1 +@end example + +matches all messages with @samp{wibble} in their path and in their subject +line, allowing up to 1 error in each case (the errors may be different for a +particular message.) + +Path expressions always use substring matches and never exact matches (it's +very unlikely you want to type in the whole of a message path as a search +expression!) The matches are always @b{case-sensitive}. (All matches on words +within messages are case-insensitive.) There is a limit of 32 characters on +the match expression. + +@end itemize + +The binding order of the constructions is: + +@enumerate +@item Individual command line arguments define separate conditions which are +AND-ed together + +@item Within a single argument, the letters before the colon define which +message parts the expression applies to. If there is no colon, the expression +applies to all the headers listed earlier and the body. + +@item After the colon, commas delineate separate disjuncts, which are OR-ed together. + +@item Each disjunct may contain separate conjuncts, which are separated by plus +signs. These conditions are AND-ed together. + +@item Each conjunct may start with a tilde to negate it, and may be followed by +a slash to indicate a substring match, optionally followed by an integer to +define the maximum number of errors allowed. + +@end enumerate + +Now some examples. Suppose my email address is @email{richard@@doesnt.exist}. + +The following will match all messages newer than 3 months from me with the word +@samp{chrony} in the subject line: + +@example +mairix d:3m- f:richard+doesnt+exist s:chrony +@end example + +Suppose I don't mind a few spurious matches on the address, I want a wider date +range, and I suspect that some messages I replied to might have had the subject +keyword spelt wrongly (let's allow up to 2 errors): + +@example +mairix d:6m- f:richard s:chrony=2 +@end example + +@node date_syntax +@section Syntax used for specifying dates +This section describes the syntax used for specifying dates when searching +using the @samp{d:} option. + +Dates are specified as a range. The start and end of the range can both be +specified. Alternatively, if the start is omitted, it is treated as being the +beginning of time. If the end is omitted, it is treated as the current time. + +There are 4 basic formats: +@table @samp +@item d:start-end +Specify both start and end explicitly +@item d:start- +Specify start, end is the current time +@item d:-end +Specify end, start is 'a long time ago' (i.e. early enough to include any message). +@item d:period +Specify start and end implicitly, as the start and end of the period given. +@end table + +The start and end can be specified either absolute or relative. A relative +endpoint is given as a number followed by a single letter defining the scaling: + +@multitable @columnfractions 0.15 0.2 0.2 0.45 +@item @b{letter} @tab @b{meaning} @tab @b{example} @tab @b{meaning} +@item d @tab days @tab 3d @tab 3 days +@item w @tab weeks @tab 2w @tab 2 weeks (14 days) +@item m @tab months @tab 5m @tab 5 months (150 days) +@item y @tab years @tab 4y @tab 4 years (4*365 days) +@end multitable + +Months are always treated as 30 days, and years as 365 days, for this purpose. + +Absolute times can be specified in a lot of forms. Some forms have different +meanings when they define a start date from that when they define an end date. +Where a single expression specifies both the start and end (i.e. where the +argument to d: doesn't contain a @samp{-}), it will usually have different +interpretations in the two cases. + +In the examples below, suppose the current date is Sunday May 18th, 2003 (when +I started to write this material.) + +@multitable @columnfractions 0.24 0.24 0.24 0.28 +@item @b{Example} @tab @b{Start date} @tab @b{End date} @tab @b{Notes} +@item d:20030301@minus{}20030425 @tab March 1st, 2003 @tab 25th April, 2003 +@item d:030301@minus{}030425 @tab March 1st, 2003 @tab April 25th, 2003 @tab century assumed +@item d:mar1@minus{}apr25 @tab March 1st, 2003 @tab April 25th, 2003 +@item d:Mar1@minus{}Apr25 @tab March 1st, 2003 @tab April 25th, 2003 @tab case insensitive +@item d:MAR1@minus{}APR25 @tab March 1st, 2003 @tab April 25th, 2003 @tab case insensitive +@item d:1mar@minus{}25apr @tab March 1st, 2003 @tab April 25th, 2003 @tab date and month in either order +@item d:2002 @tab January 1st, 2002 @tab December 31st, 2002 @tab whole year +@item d:mar @tab March 1st, 2003 @tab March 31st, 2003 @tab most recent March +@item d:oct @tab October 1st, 2002 @tab October 31st, 2002 @tab most recent October +@item d:21oct@minus{}mar @tab October 21st, 2002 @tab March 31st, 2003 @tab start before end +@item d:21apr@minus{}mar @tab April 21st, 2002 @tab March 31st, 2003 @tab start before end +@item d:21apr@minus{} @tab April 21st, 2003 @tab May 18th, 2003 @tab end omitted +@item d:@minus{}21apr @tab January 1st, 1900 @tab April 21st, 2003 @tab start omitted +@item d:6w@minus{}2w @tab April 6th, 2003 @tab May 4th, 2003 @tab both dates relative +@item d:21apr@minus{}1w @tab April 21st, 2003 @tab May 11th, 2003 @tab one date relative +@item d:21apr@minus{}2y @tab April 21st, 2001 @tab May 11th, 2001 @tab start before end +@item d:99@minus{}11 @tab January 1st, 1999 @tab May 11th, 2003 @tab 2 digits are a day of the month if possible, otherwise a year +@item d:99oct@minus{}1oct @tab October 1st, 1999 @tab October 1st, 2002 @tab end before now, single digit is a day of the month +@item d:99oct@minus{}01oct @tab October 1st, 1999 @tab October 31st, 2001 @tab 2 digits starting with zero treated as a year +@item d:oct99@minus{}oct1 @tab October 1st, 1999 @tab October 1st, 2002 @tab day and month in either order +@item d:oct99@minus{}oct01 @tab October 1st, 1999 @tab October 31st, 2001 @tab year and month in either order +@end multitable + +The principles in the table work as follows. +@itemize @bullet +@item +When the expression defines a period of more than a day (i.e. if a month or +year is specified), the earliest day in the period is taken when the start date +is defined, and the last day in the period if the end of the range is being +defined. +@item +The end date is always taken to be on or before the current date. +@item +The start date is always taken to be on or before the end date. +@end itemize + +@bye +@c vim:cms=@c\ %s:fdm=marker:fdc=5:syntax=off diff --git a/src/mairix/reader.c b/src/mairix/reader.c @@ -0,0 +1,212 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002,2003,2004,2005 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +/* Database reader */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <assert.h> +#include <sys/mman.h> + +#include "reader.h" +#include "memmac.h" +#include "mairix.h" + +int read_increment(unsigned char **encpos) {/*{{{*/ + unsigned char *j = *encpos; + int result; + unsigned char x0, x1, x2, x3; + + x0 = *j++; + if ((x0 & 0xc0) == 0xc0) { + /* 4 byte encoding */ + x1 = *j++; + x2 = *j++; + x3 = *j++; + result = ((x0 & 0x3f) << 24) + (x1 << 16) + (x2 << 8) + x3; + } else if (x0 & 0x80) { + /* 2 byte encoding */ + x1 = *j++; + result = ((x0 & 0x7f) << 8) + x1; + } else { + /* Single byte encoding */ + result = x0; + } + + *encpos = j; + return result; +} +/*}}}*/ +static void read_toktable_db(char *data, struct toktable_db *toktable, int start, unsigned int *uidata)/*{{{*/ +{ + int n; + n = toktable->n = uidata[start]; + toktable->tok_offsets = uidata + uidata[start+1]; + toktable->enc_offsets = uidata + uidata[start+2]; + return; +} +/*}}}*/ +static void read_toktable2_db(char *data, struct toktable2_db *toktable, int start, unsigned int *uidata)/*{{{*/ +{ + int n; + n = toktable->n = uidata[start]; + toktable->tok_offsets = uidata + uidata[start+1]; + toktable->enc0_offsets = uidata + uidata[start+2]; + toktable->enc1_offsets = uidata + uidata[start+3]; + return; +} +/*}}}*/ +struct read_db *open_db(char *filename)/*{{{*/ +{ + int fd, len; + char *data; + struct stat sb; + struct read_db *result; + unsigned int *uidata; + unsigned char *ucdata; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + report_error("open", filename); + unlock_and_exit (2); + } + + if (fstat(fd, &sb) < 0) { + report_error("stat", filename); + unlock_and_exit(2); + } + + len = sb.st_size; + + data = (char *) mmap(0, len, PROT_READ, MAP_SHARED, fd, 0); + if (data == MAP_FAILED) { + report_error("reader:mmap", filename); + unlock_and_exit(2); + } + + if (!data) { + /* Empty file opened => database corrupt for sure */ + if (close(fd) < 0) { + report_error("close", filename); + unlock_and_exit(2); + } + return NULL; + } + + if (close(fd) < 0) { + report_error("close", filename); + unlock_and_exit(2); + } + + result = new(struct read_db); + uidata = (unsigned int *) data; /* alignment is assured */ + ucdata = (unsigned char *) data; + result->len = len; + result->data = data; + + /*{{{ Magic number check */ + if (ucdata[0] == HEADER_MAGIC0 || + ucdata[1] == HEADER_MAGIC1 || + ucdata[2] == HEADER_MAGIC2) { + if (ucdata[3] != HEADER_MAGIC3) { + fprintf(stderr, "Another version of this program produced the existing database! Please rebuild.\n"); + unlock_and_exit(2); + } + } else { + fprintf(stderr, "The existing database wasn't produced by this program! Please rebuild.\n"); + unlock_and_exit(2); + } + /*}}}*/ + /* {{{ Endianness check */ + if (uidata[UI_ENDIAN] == 0x11223344) { + fprintf(stderr, "The endianness of the database is reversed for this machine\n"); + unlock_and_exit(2); + } else if (uidata[UI_ENDIAN] != 0x44332211) { + fprintf(stderr, "The endianness of this machine is strange (or database is corrupt)\n"); + unlock_and_exit(2); + } + /* }}} */ + + /* Now build tables of where things are in the file */ + result->n_msgs = uidata[UI_N_MSGS]; + result->msg_type_and_flags = ucdata + uidata[UI_MSG_TYPE_AND_FLAGS]; + result->path_offsets = uidata + uidata[UI_MSG_CDATA]; + result->mtime_table = uidata + uidata[UI_MSG_MTIME]; + result->size_table = uidata + uidata[UI_MSG_SIZE]; + result->date_table = uidata + uidata[UI_MSG_DATE]; + result->tid_table = uidata + uidata[UI_MSG_TID]; + + result->n_mboxen = uidata[UI_MBOX_N]; + result->mbox_paths_table = uidata + uidata[UI_MBOX_PATHS]; + result->mbox_entries_table = uidata + uidata[UI_MBOX_ENTRIES]; + result->mbox_mtime_table = uidata + uidata[UI_MBOX_MTIME]; + result->mbox_size_table = uidata + uidata[UI_MBOX_SIZE]; + result->mbox_checksum_table = uidata + uidata[UI_MBOX_CKSUM]; + + result->hash_key = uidata[UI_HASH_KEY]; + + read_toktable_db(data, &result->to, UI_TO_BASE, uidata); + read_toktable_db(data, &result->cc, UI_CC_BASE, uidata); + read_toktable_db(data, &result->from, UI_FROM_BASE, uidata); + read_toktable_db(data, &result->subject, UI_SUBJECT_BASE, uidata); + read_toktable_db(data, &result->body, UI_BODY_BASE, uidata); + read_toktable_db(data, &result->attachment_name, UI_ATTACHMENT_NAME_BASE, uidata); + read_toktable2_db(data, &result->msg_ids, UI_MSGID_BASE, uidata); + + return result; +} +/*}}}*/ +static void free_toktable_db(struct toktable_db *x)/*{{{*/ +{ + /* Nothing to do */ +} +/*}}}*/ +static void free_toktable2_db(struct toktable2_db *x)/*{{{*/ +{ + /* Nothing to do */ +} +/*}}}*/ +void close_db(struct read_db *x)/*{{{*/ +{ + free_toktable_db(&x->to); + free_toktable_db(&x->cc); + free_toktable_db(&x->from); + free_toktable_db(&x->subject); + free_toktable_db(&x->body); + free_toktable_db(&x->attachment_name); + free_toktable2_db(&x->msg_ids); + + if (munmap(x->data, x->len) < 0) { + perror("munmap"); + unlock_and_exit(2); + } + free(x); + return; +} +/*}}}*/ + diff --git a/src/mairix/reader.h b/src/mairix/reader.h @@ -0,0 +1,182 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002-2004,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#ifndef READER_H +#define READER_H + +/* MX, then a high byte, then the version no. */ +#define HEADER_MAGIC0 'M' +#define HEADER_MAGIC1 'X' +#define HEADER_MAGIC2 0xA5 +#define HEADER_MAGIC3 0x03 + +/*{{{ Constants for file data positions */ +#define UI_ENDIAN 1 +#define UI_N_MSGS 2 + +/* Offset to byte-per-message table encoding message types */ +#define UI_MSG_TYPE_AND_FLAGS 3 + +/* Header positions containing offsets to the per-message tables. */ +/* Character data: + * for maildir/MH : the path of the box. + * for mbox : index of mbox containing the message */ + +#define UI_MSG_CDATA 4 +/* For maildir/MH : mtime of file containing message */ +#define UI_MSG_MTIME 5 +/* For mbox msgs : the offset into the file */ +#define UI_MSG_OFFSET 5 +/* For all formats : message size */ +#define UI_MSG_SIZE 6 +/* For mbox msgs : offset into file */ +#define UI_MSG_START 6 +/* These are common to Maildir,MH,mbox messages */ +#define UI_MSG_DATE 7 +#define UI_MSG_TID 8 + +/* Header positions for mbox (file-level) information */ +/* Number of mboxes */ +#define UI_MBOX_N 9 +#define UI_MBOX_PATHS 10 +#define UI_MBOX_ENTRIES 11 +/* mtime of mboxes */ +#define UI_MBOX_MTIME 12 +/* Size in bytes */ +#define UI_MBOX_SIZE 13 +/* Base of checksums for messages in each mbox */ +#define UI_MBOX_CKSUM 14 + +#define UI_HASH_KEY 15 + +/* Header positions for token tables */ +#define UI_TO_BASE 16 +#define UI_CC_BASE 19 +#define UI_FROM_BASE 22 +#define UI_SUBJECT_BASE 25 +#define UI_BODY_BASE 28 +#define UI_ATTACHMENT_NAME_BASE 31 +#define UI_MSGID_BASE 34 + +/* Larger than the last table offset. */ +#define UI_HEADER_LEN 40 +#define UC_HEADER_LEN ((UI_HEADER_LEN) << 2) + +#define UI_N_OFFSET 0 +#define UI_TOK_OFFSET 1 +#define UI_ENC_OFFSET 2 + +#define UI_TO_N (UI_TO_BASE + UI_N_OFFSET) +#define UI_TO_TOK (UI_TO_BASE + UI_TOK_OFFSET) +#define UI_TO_ENC (UI_TO_BASE + UI_ENC_OFFSET) +#define UI_CC_N (UI_CC_BASE + UI_N_OFFSET) +#define UI_CC_TOK (UI_CC_BASE + UI_TOK_OFFSET) +#define UI_CC_ENC (UI_CC_BASE + UI_ENC_OFFSET) +#define UI_FROM_N (UI_FROM_BASE + UI_N_OFFSET) +#define UI_FROM_TOK (UI_FROM_BASE + UI_TOK_OFFSET) +#define UI_FROM_ENC (UI_FROM_BASE + UI_ENC_OFFSET) +#define UI_SUBJECT_N (UI_SUBJECT_BASE + UI_N_OFFSET) +#define UI_SUBJECT_TOK (UI_SUBJECT_BASE + UI_TOK_OFFSET) +#define UI_SUBJECT_ENC (UI_SUBJECT_BASE + UI_ENC_OFFSET) +#define UI_BODY_N (UI_BODY_BASE + UI_N_OFFSET) +#define UI_BODY_TOK (UI_BODY_BASE + UI_TOK_OFFSET) +#define UI_BODY_ENC (UI_BODY_BASE + UI_ENC_OFFSET) +#define UI_ATTACHMENT_NAME_N (UI_ATTACHMENT_NAME_BASE + UI_N_OFFSET) +#define UI_ATTACHMENT_NAME_TOK (UI_ATTACHMENT_NAME_BASE + UI_TOK_OFFSET) +#define UI_ATTACHMENT_NAME_ENC (UI_ATTACHMENT_NAME_BASE + UI_ENC_OFFSET) +#define UI_MSGID_N (UI_MSGID_BASE + UI_N_OFFSET) +#define UI_MSGID_TOK (UI_MSGID_BASE + UI_TOK_OFFSET) +#define UI_MSGID_ENC0 (UI_MSGID_BASE + UI_ENC_OFFSET) +#define UI_MSGID_ENC1 (UI_MSGID_ENC0 + 1) + +/*}}}*/ + +/*{{{ Literals used for encoding messages types in database file */ +#define DB_MSG_DEAD 0 +/* maildir/MH : one file per message */ +#define DB_MSG_FILE 1 +/* mbox : multiple files per message */ +#define DB_MSG_MBOX 2 +/*}}}*/ + +#define FLAG_SEEN (1<<3) +#define FLAG_REPLIED (1<<4) +#define FLAG_FLAGGED (1<<5) + +struct toktable_db {/*{{{*/ + unsigned int n; /* number of entries in this table */ + unsigned int *tok_offsets; /* offset to table of token offsets */ + unsigned int *enc_offsets; /* offset to table of encoding offsets */ +}; +/*}}}*/ +struct toktable2_db {/*{{{*/ + unsigned int n; /* number of entries in this table */ + unsigned int *tok_offsets; /* offset to table of token offsets */ + unsigned int *enc0_offsets; /* offset to table of encoding offsets */ + unsigned int *enc1_offsets; /* offset to table of encoding offsets */ +}; +/*}}}*/ +struct read_db {/*{{{*/ + /* Raw file parameters, needed later for munmap */ + char *data; + int len; + + /* Pathname information */ + int n_msgs; + unsigned char *msg_type_and_flags; + unsigned int *path_offsets; /* or (mbox index, msg index) */ + unsigned int *mtime_table; /* or offset into mbox */ + unsigned int *size_table; /* either file size or span inside mbox */ + unsigned int *date_table; + unsigned int *tid_table; + + int n_mboxen; + unsigned int *mbox_paths_table; + unsigned int *mbox_entries_table; /* table of number of messages per mbox */ + unsigned int *mbox_mtime_table; + unsigned int *mbox_size_table; + unsigned int *mbox_checksum_table; + + unsigned int hash_key; + + struct toktable_db to; + struct toktable_db cc; + struct toktable_db from; + struct toktable_db subject; + struct toktable_db body; + struct toktable_db attachment_name; + struct toktable2_db msg_ids; + +}; +/*}}}*/ + +struct read_db *open_db(char *filename); +void close_db(struct read_db *x); + +static inline int rd_msg_type(struct read_db *db, int i) { + return db->msg_type_and_flags[i] & 0x7; +} + +/* Common to search and db reader. */ +int read_increment(unsigned char **encpos); + +#endif /* READER_H */ diff --git a/src/mairix/rfc822.c b/src/mairix/rfc822.c @@ -0,0 +1,1536 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006,2007,2010 + * rfc2047 decode: + * Copyright (C) Mikael Ylikoski 2002 + * gzip mbox support: + * Copyright (C) Ico Doornekamp 2005 + * Copyright (C) Felipe Gustavo de Almeida 2005 + * bzip2 mbox support: + * Copyright (C) Paramjit Oberoi 2005 + * caching uncompressed mbox data: + * Copyright (C) Chris Mason 2006 + * memory leak fixes: + * Copyright (C) Samuel Tardieu 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "mairix.h" +#include "nvp.h" + +#include <assert.h> +#include <ctype.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/mman.h> +#ifdef USE_GZIP_MBOX +# include <zlib.h> +#endif +#ifdef USE_BZIP_MBOX +# include <bzlib.h> +#endif + +struct DLL {/*{{{*/ + struct DLL *next; + struct DLL *prev; +}; +/*}}}*/ +static void enqueue(void *head, void *x)/*{{{*/ +{ + /* Declare this way so it can be used with any kind of double linked list + * having next & prev pointers in its first two words. */ + struct DLL *h = (struct DLL *) head; + struct DLL *xx = (struct DLL *) x; + xx->next = h; + xx->prev = h->prev; + h->prev->next = xx; + h->prev = xx; + return; +} +/*}}}*/ + +enum encoding_type {/*{{{*/ + ENC_UNKNOWN, + ENC_NONE, + ENC_BINARY, + ENC_7BIT, + ENC_8BIT, + ENC_QUOTED_PRINTABLE, + ENC_BASE64, + ENC_UUENCODE +}; +/*}}}*/ +struct content_type_header {/*{{{*/ + const char *major; /* e.g. text */ + const char *minor; /* e.g. plain */ + const char *boundary; /* for multipart */ + /* charset? */ +}; +/*}}}*/ +struct line {/*{{{*/ + struct line *next; + struct line *prev; + char *text; +}; +/*}}}*/ + +static void init_headers(struct headers *hdrs)/*{{{*/ +{ + hdrs->to = NULL; + hdrs->cc = NULL; + hdrs->from = NULL; + hdrs->subject = NULL; + hdrs->message_id = NULL; + hdrs->in_reply_to = NULL; + hdrs->references = NULL; + hdrs->date = 0; + hdrs->flags.seen = 0; + hdrs->flags.replied = 0; + hdrs->flags.flagged = 0; +}; +/*}}}*/ +static void splice_header_lines(struct line *header)/*{{{*/ +{ + /* Deal with newline then tab in header */ + struct line *x, *next; + for (x=header->next; x!=header; x=next) { +#if 0 + printf("next header, x->text=%08lx\n", x->text); + printf("header=<%s>\n", x->text); +#endif + next = x->next; + if (isspace(x->text[0] & 0xff)) { + /* Glue to previous line */ + char *p, *newbuf, *oldbuf; + struct line *y; + for (p=x->text; *p; p++) { + if (!isspace(*(unsigned char *)p)) break; + } + p--; /* point to final space */ + y = x->prev; +#if 0 + printf("y=%08lx p=%08lx\n", y->text, p); +#endif + newbuf = new_array(char, strlen(y->text) + strlen(p) + 1); + strcpy(newbuf, y->text); + strcat(newbuf, p); + oldbuf = y->text; + y->text = newbuf; + free(oldbuf); + y->next = x->next; + x->next->prev = y; + free(x->text); + free(x); + } + } + return; +} +/*}}}*/ +static int audit_header(struct line *header)/*{{{*/ +{ + /* Check for obvious broken-ness + * 1st line has no leading spaces, single word then colon + * following lines have leading spaces or single word followed by colon + * */ + struct line *x; + int first=1; + int count=1; + for (x=header->next; x!=header; x=x->next) { + int has_leading_space=0; + int is_blank; + int has_word_colon=0; + + if (1 || first) { + /* Ignore any UUCP or mbox style From line at the start */ + if (!strncmp("From ", x->text, 5)) { + continue; + } + /* Ignore escaped From line at the start */ + if (!strncmp(">From ", x->text, 6)) { + continue; + } + } + + is_blank = !(x->text[0]); + if (!is_blank) { + char *p; + int saw_char = 0; + has_leading_space = isspace(x->text[0] & 0xff); + has_word_colon = 0; /* default */ + p = x->text; + while(*p) { + if(*p == ':') { + has_word_colon = saw_char; + break; + } else if (isspace(*(unsigned char *) p)) { + has_word_colon = 0; + break; + } else { + saw_char = 1; + } + p++; + } + } + + if (( first && (is_blank || has_leading_space || !has_word_colon)) || + (!first && (is_blank || !(has_leading_space || has_word_colon)))) { +#if 0 + fprintf(stderr, "Header line %d <%s> fails because:", count, x->text); + if (first && is_blank) { fprintf(stderr, " [first && is_blank]"); } + if (first && has_leading_space) { fprintf(stderr, " [first && has_leading_space]"); } + if (first && !has_word_colon) { fprintf(stderr, " [first && !has_word_colon]"); } + if (!first && is_blank) { fprintf(stderr, " [!first && is_blank]"); } + if (!first && !(has_leading_space||has_word_colon)) { fprintf(stderr, " [!first && !has_leading_space||has_word_colon]"); } + fprintf(stderr, "\n"); +#endif + /* Header fails the audit */ + return 0; + } + first = 0; + count++; + } + /* If we get here the header must have been OK */ + return 1; +}/*}}}*/ +static int match_string(const char *ref, const char *candidate)/*{{{*/ +{ + int len = strlen(ref); + return !strncasecmp(ref, candidate, len); +} +/*}}}*/ + +static char equal_table[] = {/*{{{*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00-0f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10-1f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20-2f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* 30-3f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40-4f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50-5f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60-6f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70-7f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90-9f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a0-af */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* b0-bf */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* c0-cf */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d0-df */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* e0-ef */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* f0-ff */ +}; +/*}}}*/ +static int base64_table[] = {/*{{{*/ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00-0f */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10-1f */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, /* 20-2f */ + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, 0, -1, -1, /* 30-3f */ + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 40-4f */ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 50-5f */ + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 60-6f */ + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, /* 70-7f */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 80-8f */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 90-9f */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a0-af */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* b0-bf */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* c0-cf */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* d0-df */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* e0-ef */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 /* f0-ff */ +}; +/*}}}*/ +static int hex_to_val(char x) {/*{{{*/ + switch (x) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return (x - '0'); + break; + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + return 10 + (x - 'a'); + break; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + return 10 + (x - 'A'); + break; + default: + return 0; + } +} +/*}}}*/ +static void decode_header_value(char *text){/*{{{*/ + /* rfc2047 decode, written by Mikael Ylikoski */ + + char *s, *a, *b, *e, *p, *q; + + for (p = q = s = text; (s = strstr(s, "=?")); s = e + 2) { + if (p == q) + p = q = s; + else + while (q != s) + *p++ = *q++; + s += 2; + a = strchr(s, '?'); + if (!a) break; + a++; + b = strchr(a, '?'); + if (!b) break; + b++; + e = strstr(b, "?="); + if (!e) break; + /* have found an encoded-word */ + if (b - a != 2) + continue; /* unknown encoding */ + if (*a == 'q' || *a == 'Q') { + int val; + q = b; + while (q < e) { + if (*q == '_') { + *p++ = 0x20; + q++; + } else if (*q == '=') { + q++; + val = hex_to_val(*q++) << 4; + val += hex_to_val(*q++); + *p++ = val; + } else + *p++ = *q++; + } + } else if (*a == 'b' || *a == 'B') { + int reg, nc, eq; /* register, #characters in reg, #equals */ + int dc; /* decoded character */ + eq = reg = nc = 0; + for (q = b; q < e; q++) { + unsigned char cq = *(unsigned char *)q; + dc = base64_table[cq]; + eq += equal_table[cq]; + + if (dc >= 0) { + reg <<= 6; + reg += dc; + nc++; + if (nc == 4) { + *p++ = ((reg >> 16) & 0xff); + if (eq < 2) *p++ = ((reg >> 8) & 0xff); + if (eq < 1) *p++ = reg & 0xff; + nc = reg = 0; + if (eq) break; + } + } + } + } else { + continue; /* unknown encoding */ + } + q = e + 2; + } + if (p == q) return; + while (*q != '\0') + *p++ = *q++; + *p = '\0'; +} +/*}}}*/ +static char *copy_header_value(char *text){/*{{{*/ + char *p; + for (p = text; *p && (*p != ':'); p++) ; + if (!*p) return NULL; + p++; + p = new_string(p); + decode_header_value(p); + return p; +} +/*}}}*/ +static void copy_or_concat_header_value(char **previous, char *text){/*{{{*/ + char *p = copy_header_value(text); + if (*previous) + { + *previous = extend_string(*previous, ", "); + *previous = extend_string(*previous, p); + free(p); + } + else + *previous = p; +} +/*}}}*/ +static enum encoding_type decode_encoding_type(const char *e)/*{{{*/ +{ + enum encoding_type result; + const char *p; + if (!e) { + result = ENC_NONE; + } else { + for (p=e; *p && isspace(*(unsigned char *)p); p++) ; + if ( match_string("7bit", p) + || match_string("7-bit", p) + || match_string("7 bit", p)) { + result = ENC_7BIT; + } else if (match_string("8bit", p) + || match_string("8-bit", p) + || match_string("8 bit", p)) { + result = ENC_8BIT; + } else if (match_string("quoted-printable", p)) { + result = ENC_QUOTED_PRINTABLE; + } else if (match_string("base64", p)) { + result = ENC_BASE64; + } else if (match_string("binary", p)) { + result = ENC_BINARY; + } else if (match_string("x-uuencode", p)) { + result = ENC_UUENCODE; + } else { + fprintf(stderr, "Warning: unknown encoding type: '%s'\n", e); + result = ENC_UNKNOWN; + } + } + return result; +} +/*}}}*/ +static void parse_content_type(struct nvp *ct_nvp, struct content_type_header *result)/*{{{*/ +{ + result->major = NULL; + result->minor = NULL; + result->boundary = NULL; + + result->major = nvp_major(ct_nvp); + if (result->major) { + result->minor = nvp_minor(ct_nvp); + } else { + result->minor = NULL; + result->major = nvp_first(ct_nvp); + } + + result->boundary = nvp_lookupcase(ct_nvp, "boundary"); +} + +/*}}}*/ +static char *looking_at_ws_then_newline(char *start)/*{{{*/ +{ + char *result; + result = start; + do { + if (*result == '\n') return result; + else if (!isspace(*(unsigned char *) result)) return NULL; + else result++; + } while (1); + + /* Can't get here */ + assert(0); +} +/*}}}*/ + +static char *unencode_data(struct msg_src *src, char *input, int input_len, const char *enc, int *output_len)/*{{{*/ +{ + enum encoding_type encoding; + char *result, *end_result; + char *end_input; + + encoding = decode_encoding_type(enc); + end_input = input + input_len; + + /* All mime encodings result in expanded data, so this is guaranteed to + * safely oversize the output array */ + result = new_array(char, input_len + 1); + + /* Now decode */ + switch (encoding) { + case ENC_7BIT:/*{{{*/ + case ENC_8BIT: + case ENC_BINARY: + case ENC_NONE: + { + memcpy(result, input, input_len); + end_result = result + input_len; + } + break; +/*}}}*/ + case ENC_QUOTED_PRINTABLE:/*{{{*/ + { + char *p, *q; + p = result; + for (p=result, q=input; + q<end_input; ) { + + if (*q == '=') { + /* followed by optional whitespace then \n? discard them. */ + char *r; + int val; + q++; + r = looking_at_ws_then_newline(q); + if (r) { + q = r + 1; /* Point into next line */ + continue; + } + /* not that case. */ + val = hex_to_val(*q++) << 4; + val += hex_to_val(*q++); + *p++ = val; + + } else { + /* Normal character */ + *p++ = *q++; + } + } + end_result = p; + } + break; +/*}}}*/ + case ENC_BASE64:/*{{{*/ + { + char *p, *q; + int reg, nc, eq; /* register, #characters in reg, #equals */ + int dc; /* decoded character */ + eq = reg = nc = 0; + for (q=input, p=result; q<end_input; q++) { + unsigned char cq = * (unsigned char *)q; + /* Might want a 256 entry array instead of this sub-optimal mess + * eventually. */ + dc = base64_table[cq]; + eq += equal_table[cq]; + + if (dc >= 0) { + reg <<= 6; + reg += dc; + nc++; + if (nc == 4) { + *p++ = ((reg >> 16) & 0xff); + if (eq < 2) *p++ = ((reg >> 8) & 0xff); + if (eq < 1) *p++ = reg & 0xff; + nc = reg = 0; + if (eq) goto done_base_64; + } + } + } + done_base_64: + end_result = p; + } + break; + /*}}}*/ + case ENC_UUENCODE:/*{{{*/ + { + char *p, *q; + /* Find 'begin ' */ + for (q = input; q < end_input - 6 && memcmp(q, "begin ", 6); q++) + ; + q += 6; + /* skip to EOL */ + while (q < end_input && *q != '\n') + q++; + p = result; + while (q < end_input) { /* process line */ +#define DEC(c) (((c) - ' ') & 077) + int len = DEC(*q++); + if (len == 0) + break; + for (; len > 0; q += 4, len -= 3) { + if (len >= 3) { + *p++ = DEC(q[0]) << 2 | DEC(q[1]) >> 4; + *p++ = DEC(q[1]) << 4 | DEC(q[2]) >> 2; + *p++ = DEC(q[2]) << 6 | DEC(q[3]); + } else { + if (len >= 1) + *p++ = DEC(q[0]) << 2 | DEC(q[1]) >> 4; + if (len >= 2) + *p++ = DEC(q[1]) << 4 | DEC(q[2]) >> 2; + } + } + while (q < end_input && *q != '\n') + q++; + } + end_result = p; + } + break; + /*}}}*/ + case ENC_UNKNOWN:/*{{{*/ + fprintf(stderr, "Unknown encoding type in %s\n", format_msg_src(src)); + /* fall through - ignore this data */ + /*}}}*/ + default:/*{{{*/ + end_result = result; + break; + /*}}}*/ + } + *output_len = end_result - result; + result[*output_len] = '\0'; /* for convenience with text/plain etc to make it printable */ + return result; +} +/*}}}*/ +char *format_msg_src(struct msg_src *src)/*{{{*/ +{ + static char *buffer = NULL; + static int buffer_len = 0; + char *result; + int len; + switch (src->type) { + case MS_FILE: + result = src->filename; + break; + case MS_MBOX: + len = strlen(src->filename); + len += 32; + if (!buffer || (len > buffer_len)) { + free(buffer); + buffer = new_array(char, len); + buffer_len = len; + } + sprintf(buffer, "%s[%d,%d)", src->filename, + (int) src->start, (int) (src->start + src->len)); + result = buffer; + break; + default: + result = NULL; + break; + } + return result; +} +/*}}}*/ +static int split_and_splice_header(struct msg_src *src, char *data, struct line *header, char **body_start)/*{{{*/ +{ + char *sol, *eol; + int blank_line; + header->next = header->prev = header; + sol = data; + do { + if (!*sol) break; + blank_line = 1; /* until proven otherwise */ + eol = sol; + while (*eol && (*eol != '\n')) { + if (!isspace(*(unsigned char *) eol)) blank_line = 0; + eol++; + } + if (*eol == '\n') { + if (!blank_line) { + int line_length = eol - sol; + char *line_text = new_array(char, 1 + line_length); + struct line *new_header; + + strncpy(line_text, sol, line_length); + line_text[line_length] = '\0'; + new_header = new(struct line); + new_header->text = line_text; + enqueue(header, new_header); + } + sol = eol + 1; /* Start of next line */ + } else { /* must be null char */ + fprintf(stderr, "Got null character whilst processing header of %s\n", + format_msg_src(src)); + return -1; /* & leak memory */ + } + } while (!blank_line); + + *body_start = sol; + + if (audit_header(header)) { + splice_header_lines(header); + return 0; + } else { +#if 0 + /* Caller generates message */ + fprintf(stderr, "Message had bad rfc822 headers, ignoring\n"); +#endif + return -1; + } +} +/*}}}*/ + +/* Forward prototypes */ +static void do_multipart(struct msg_src *src, char *input, int input_len, + const char *boundary, struct attachment *atts, + enum data_to_rfc822_error *error); + +/*{{{ do_body() */ +static void do_body(struct msg_src *src, + char *body_start, int body_len, + struct nvp *ct_nvp, struct nvp *cte_nvp, + struct nvp *cd_nvp, + struct attachment *atts, + enum data_to_rfc822_error *error) +{ + char *decoded_body; + int decoded_body_len; + const char *content_transfer_encoding; + content_transfer_encoding = NULL; + if (cte_nvp) { + content_transfer_encoding = nvp_first(cte_nvp); + if (!content_transfer_encoding) { + fprintf(stderr, "Giving up on %s, content_transfer_encoding header not parseable\n", + format_msg_src(src)); + return; + } + } + + decoded_body = unencode_data(src, body_start, body_len, content_transfer_encoding, &decoded_body_len); + + if (ct_nvp) { + struct content_type_header ct; + parse_content_type(ct_nvp, &ct); + if (ct.major && !strcasecmp(ct.major, "multipart")) { + do_multipart(src, decoded_body, decoded_body_len, ct.boundary, atts, error); + /* Don't need decoded body any longer - copies have been taken if + * required when handling multipart attachments. */ + free(decoded_body); + if (error && (*error == DTR8_MISSING_END)) return; + } else { + /* unipart */ + struct attachment *new_att; + const char *disposition; + new_att = new(struct attachment); + disposition = cd_nvp ? nvp_first(cd_nvp) : NULL; + if (disposition && !strcasecmp(disposition, "attachment")) { + const char *lookup; + lookup = nvp_lookupcase(cd_nvp, "filename"); + if (lookup) { + new_att->filename = new_string(lookup); + } else { + /* Some messages have name=... in content-type: instead of + * filename=... in content-disposition. */ + lookup = nvp_lookup(ct_nvp, "name"); + if (lookup) { + new_att->filename = new_string(lookup); + } else { + new_att->filename = NULL; + } + } + } else { + new_att->filename = NULL; + } + if (ct.major && !strcasecmp(ct.major, "text")) { + if (ct.minor && !strcasecmp(ct.minor, "plain")) { + new_att->ct = CT_TEXT_PLAIN; + } else if (ct.minor && !strcasecmp(ct.minor, "html")) { + new_att->ct = CT_TEXT_HTML; + } else { + new_att->ct = CT_TEXT_OTHER; + } + } else if (ct.major && !strcasecmp(ct.major, "message") && + ct.minor && !strcasecmp(ct.minor, "rfc822")) { + new_att->ct = CT_MESSAGE_RFC822; + } else { + new_att->ct = CT_OTHER; + } + + if (new_att->ct == CT_MESSAGE_RFC822) { + new_att->data.rfc822 = data_to_rfc822(src, decoded_body, decoded_body_len, error); + free(decoded_body); /* data no longer needed */ + } else { + new_att->data.normal.len = decoded_body_len; + new_att->data.normal.bytes = decoded_body; + } + enqueue(atts, new_att); + } + } else { + /* Treat as text/plain {{{*/ + struct attachment *new_att; + new_att = new(struct attachment); + new_att->filename = NULL; + new_att->ct = CT_TEXT_PLAIN; + new_att->data.normal.len = decoded_body_len; + /* Add null termination on the end */ + new_att->data.normal.bytes = new_array(char, decoded_body_len + 1); + memcpy(new_att->data.normal.bytes, decoded_body, decoded_body_len + 1); + free(decoded_body); + enqueue(atts, new_att);/*}}}*/ + } +} +/*}}}*/ +/*{{{ do_attachment() */ +static void do_attachment(struct msg_src *src, + char *start, char *after_end, + struct attachment *atts) +{ + /* decode attachment and add to attachment list */ + struct line header, *x, *nx; + char *body_start; + int body_len; + + struct nvp *ct_nvp, *cte_nvp, *cd_nvp, *nvp; + + if (split_and_splice_header(src, start, &header, &body_start) < 0) { + fprintf(stderr, "Giving up on attachment with bad header in %s\n", + format_msg_src(src)); + return; + } + + /* Extract key headers */ + ct_nvp = cte_nvp = cd_nvp = NULL; + for (x=header.next; x!=&header; x=x->next) { + if ((nvp = make_nvp(src, x->text, "content-type:"))) { + ct_nvp = nvp; + } else if ((nvp = make_nvp(src, x->text, "content-transfer-encoding:"))) { + cte_nvp = nvp; + } else if ((nvp = make_nvp(src, x->text, "content-disposition:"))) { + cd_nvp = nvp; + } + } + +#if 0 + if (ct_nvp) { + fprintf(stderr, "======\n"); + fprintf(stderr, "Dump of content-type hdr\n"); + nvp_dump(ct_nvp, stderr); + free(ct_nvp); + } + + if (cte_nvp) { + fprintf(stderr, "======\n"); + fprintf(stderr, "Dump of content-transfer-encoding hdr\n"); + nvp_dump(cte_nvp, stderr); + free(cte_nvp); + } +#endif + + if (body_start > after_end) { + /* This is a (maliciously?) b0rken attachment, e.g. maybe empty */ + if (verbose) { + fprintf(stderr, "Message %s contains an invalid attachment, length=%d bytes\n", + format_msg_src(src), (int)(after_end - start)); + } + } else { + body_len = after_end - body_start; + /* Ignore errors in nested body parts. */ + do_body(src, body_start, body_len, ct_nvp, cte_nvp, cd_nvp, atts, NULL); + } + + /* Free header memory */ + for (x=header.next; x!=&header; x=nx) { + nx = x->next; + free(x->text); + free(x); + } + + if (ct_nvp) free_nvp(ct_nvp); + if (cte_nvp) free_nvp(cte_nvp); + if (cd_nvp) free_nvp(cd_nvp); +} +/*}}}*/ +/*{{{ do_multipart() */ +static void do_multipart(struct msg_src *src, + char *input, int input_len, + const char *boundary, + struct attachment *atts, + enum data_to_rfc822_error *error) +{ + char *b0, *b1, *be, *bx; + char *line_after_b0, *start_b1_search_from; + int boundary_len; + int looking_at_end_boundary; + + if (!boundary) { + fprintf(stderr, "Can't process multipart message %s with no boundary string\n", + format_msg_src(src)); + if (error) *error = DTR8_MULTIPART_SANS_BOUNDARY; + return; + } + + boundary_len = strlen(boundary); + + b0 = NULL; + line_after_b0 = input; + be = input + input_len; + + do { + int boundary_ok; + start_b1_search_from = line_after_b0; + do { + /* reject boundaries that aren't a whole line */ + b1 = NULL; + for (bx = start_b1_search_from; bx < be - (boundary_len + 4); bx++) { + if (bx[0] == '-' && bx[1] == '-' && + !strncmp(bx+2, boundary, boundary_len)) { + b1 = bx; + break; + } + } + if (!b1) { + if (error) + *error = DTR8_MISSING_END; + return; + } + + looking_at_end_boundary = (b1[boundary_len+2] == '-' && + b1[boundary_len+3] == '-'); + boundary_ok = 1; + if ((b1 > input) && (*(b1-1) != '\n')) + boundary_ok = 0; + if (!looking_at_end_boundary && (b1 + boundary_len + 2 < input + input_len) && (*(b1 + boundary_len + 2) != '\n')) + boundary_ok = 0; + if (!boundary_ok) { + char *eol = strchr(b1, '\n'); + if (!eol) { + fprintf(stderr, "Oops, didn't find another normal boundary in %s\n", + format_msg_src(src)); + return; + } + start_b1_search_from = 1 + eol; + } + } while (!boundary_ok); + + /* b1 is now looking at a good boundary, which might be the final one */ + + if (b0) { + /* don't treat preamble as an attachment */ + do_attachment(src, line_after_b0, b1, atts); + } + + b0 = b1; + line_after_b0 = strchr(b0, '\n'); + if (line_after_b0 == 0) + line_after_b0 = b0 + strlen(b0); + else + ++line_after_b0; + } while (b1 < be && !looking_at_end_boundary); +} +/*}}}*/ +static time_t parse_rfc822_date(char *date_string)/*{{{*/ +{ + struct tm tm; + char *s, *z; + /* Format [weekday ,] day-of-month month year hour:minute:second timezone. + + Some of the ideas, sanity checks etc taken from parse.c in the mutt + sources, credit to Michael R. Elkins et al + */ + + s = date_string; + z = strchr(s, ','); + if (z) s = z + 1; + while (*s && isspace(*s)) s++; + /* Should now be looking at day number */ + if (!isdigit(*s)) goto tough_cheese; + tm.tm_mday = atoi(s); + if (tm.tm_mday > 31) goto tough_cheese; + + while (isdigit(*s)) s++; + while (*s && isspace(*s)) s++; + if (!*s) goto tough_cheese; + if (!strncasecmp(s, "jan", 3)) tm.tm_mon = 0; + else if (!strncasecmp(s, "feb", 3)) tm.tm_mon = 1; + else if (!strncasecmp(s, "mar", 3)) tm.tm_mon = 2; + else if (!strncasecmp(s, "apr", 3)) tm.tm_mon = 3; + else if (!strncasecmp(s, "may", 3)) tm.tm_mon = 4; + else if (!strncasecmp(s, "jun", 3)) tm.tm_mon = 5; + else if (!strncasecmp(s, "jul", 3)) tm.tm_mon = 6; + else if (!strncasecmp(s, "aug", 3)) tm.tm_mon = 7; + else if (!strncasecmp(s, "sep", 3)) tm.tm_mon = 8; + else if (!strncasecmp(s, "oct", 3)) tm.tm_mon = 9; + else if (!strncasecmp(s, "nov", 3)) tm.tm_mon = 10; + else if (!strncasecmp(s, "dec", 3)) tm.tm_mon = 11; + else goto tough_cheese; + + while (!isspace(*s)) s++; + while (*s && isspace(*s)) s++; + if (!isdigit(*s)) goto tough_cheese; + tm.tm_year = atoi(s); + if (tm.tm_year < 70) { + tm.tm_year += 100; + } else if (tm.tm_year >= 1900) { + tm.tm_year -= 1900; + } + + while (isdigit(*s)) s++; + while (*s && isspace(*s)) s++; + if (!*s) goto tough_cheese; + + /* Now looking at hms */ + /* For now, forget this. The searching will be vague enough that nearest day is good enough. */ + + tm.tm_hour = 0; + tm.tm_min = 0; + tm.tm_sec = 0; + tm.tm_isdst = 0; + return mktime(&tm); + +tough_cheese: + return (time_t) -1; /* default value */ +} +/*}}}*/ + +static void scan_status_flags(const char *s, struct headers *hdrs)/*{{{*/ +{ + const char *p; + for (p=s; *p; p++) { + switch (*p) { + case 'R': hdrs->flags.seen = 1; break; + case 'A': hdrs->flags.replied = 1; break; + case 'F': hdrs->flags.flagged = 1; break; + default: break; + } + } +} +/*}}}*/ + +/*{{{ data_to_rfc822() */ +struct rfc822 *data_to_rfc822(struct msg_src *src, + char *data, int length, + enum data_to_rfc822_error *error) +{ + struct rfc822 *result; + char *body_start; + struct line header; + struct line *x, *nx; + struct nvp *ct_nvp, *cte_nvp, *cd_nvp, *nvp; + int body_len; + + if (error) *error = DTR8_OK; /* default */ + result = new(struct rfc822); + init_headers(&result->hdrs); + result->atts.next = result->atts.prev = &result->atts; + + if (split_and_splice_header(src, data, &header, &body_start) < 0) { + if (verbose) { + fprintf(stderr, "Giving up on message %s with bad header\n", + format_msg_src(src)); + } + if (error) *error = DTR8_BAD_HEADERS; + return NULL; + } + + /* Extract key headers {{{*/ + ct_nvp = cte_nvp = cd_nvp = NULL; + for (x=header.next; x!=&header; x=x->next) { + if (match_string("to", x->text)) + copy_or_concat_header_value(&result->hdrs.to, x->text); + else if (match_string("cc", x->text)) + copy_or_concat_header_value(&result->hdrs.cc, x->text); + else if (!result->hdrs.from && match_string("from", x->text)) + result->hdrs.from = copy_header_value(x->text); + else if (!result->hdrs.subject && match_string("subject", x->text)) + result->hdrs.subject = copy_header_value(x->text); + else if (!ct_nvp && (nvp = make_nvp(src, x->text, "content-type:"))) + ct_nvp = nvp; + else if (!cte_nvp && (nvp = make_nvp(src, x->text, "content-transfer-encoding:"))) + cte_nvp = nvp; + else if (!cd_nvp && (nvp = make_nvp(src, x->text, "content-disposition:"))) + cd_nvp = nvp; + else if (!result->hdrs.date && match_string("date", x->text)) { + char *date_string = copy_header_value(x->text); + result->hdrs.date = parse_rfc822_date(date_string); + free(date_string); + } else if (!result->hdrs.message_id && match_string("message-id", x->text)) + result->hdrs.message_id = copy_header_value(x->text); + else if (!result->hdrs.in_reply_to && match_string("in-reply-to", x->text)) + result->hdrs.in_reply_to = copy_header_value(x->text); + else if (!result->hdrs.references && match_string("references", x->text)) + result->hdrs.references = copy_header_value(x->text); + else if (match_string("status", x->text)) + scan_status_flags(x->text + sizeof("status:"), &result->hdrs); + else if (match_string("x-status", x->text)) + scan_status_flags(x->text + sizeof("x-status:"), &result->hdrs); + } +/*}}}*/ + + /* Process body */ + body_len = length - (body_start - data); + do_body(src, body_start, body_len, ct_nvp, cte_nvp, cd_nvp, &result->atts, error); + + /* Free header memory */ + for (x=header.next; x!=&header; x=nx) { + nx = x->next; + free(x->text); + free(x); + } + + if (ct_nvp) free_nvp(ct_nvp); + if (cte_nvp) free_nvp(cte_nvp); + if (cd_nvp) free_nvp(cd_nvp); + + return result; + +} +/*}}}*/ + +#define ALLOC_NONE 1 +#define ALLOC_MMAP 2 +#define ALLOC_MALLOC 3 + +int data_alloc_type; + +#if USE_GZIP_MBOX || USE_BZIP_MBOX + +#define SIZE_STEP (8 * 1024 * 1024) + +#define COMPRESSION_NONE 0 +#define COMPRESSION_GZIP 1 +#define COMPRESSION_BZIP 2 + +static int get_compression_type(const char *filename) {/*{{{*/ + size_t len = strlen(filename); + int ptr; + +#ifdef USE_GZIP_MBOX + ptr = len - 3; + if (len > 3 && strncasecmp(filename + ptr, ".gz", 3) == 0) { + return COMPRESSION_GZIP; + } +#endif + +#ifdef USE_BZIP_MBOX + ptr = len - 4; + if (len > 3 && strncasecmp(filename + ptr, ".bz2", 4) == 0) { + return COMPRESSION_BZIP; + } +#endif + + return COMPRESSION_NONE; +} +/*}}}*/ + +static int is_compressed(const char *filename) {/*{{{*/ + return (get_compression_type(filename) != COMPRESSION_NONE); +} +/*}}}*/ + +struct zFile {/*{{{*/ + union { + /* Both gzFile and BZFILE* are defined as void pointers + * in their respective header files. + */ +#ifdef USE_GZIP_MBOX + gzFile gzf; +#endif +#ifdef USE_BZIP_MBOX + BZFILE *bzf; +#endif + void *zptr; + } foo; + int type; +}; +/*}}}*/ + +static struct zFile * xx_zopen(const char *filename, const char *mode) {/*{{{*/ + struct zFile *zf = new(struct zFile); + + zf->type = get_compression_type(filename); + switch (zf->type) { +#ifdef USE_GZIP_MBOX + case COMPRESSION_GZIP: + zf->foo.gzf = gzopen(filename, "rb"); + break; +#endif +#ifdef USE_BZIP_MBOX + case COMPRESSION_BZIP: + zf->foo.bzf = BZ2_bzopen(filename, "rb"); + break; +#endif + default: + zf->foo.zptr = NULL; + break; + } + + if (!zf->foo.zptr) { + free(zf); + return 0; + } + + return zf; +} +/*}}}*/ +static void xx_zclose(struct zFile *zf) {/*{{{*/ + switch (zf->type) { +#ifdef USE_GZIP_MBOX + case COMPRESSION_GZIP: + gzclose(zf->foo.gzf); + break; +#endif +#ifdef USE_BZIP_MBOX + case COMPRESSION_BZIP: + BZ2_bzclose(zf->foo.bzf); + break; +#endif + default: + zf->foo.zptr = NULL; + break; + } + free(zf); +} +/*}}}*/ +static int xx_zread(struct zFile *zf, void *buf, int len) {/*{{{*/ + switch (zf->type) { +#ifdef USE_GZIP_MBOX + case COMPRESSION_GZIP: + return gzread(zf->foo.gzf, buf, len); + break; +#endif +#ifdef USE_BZIP_MBOX + case COMPRESSION_BZIP: + return BZ2_bzread(zf->foo.bzf, buf, len); + break; +#endif + default: + return 0; + break; + } +} +/*}}}*/ +#endif + +#if USE_GZIP_MBOX || USE_BZIP_MBOX +/* do we need ROCACHE_SIZE > 1? the code supports any number here */ +#define ROCACHE_SIZE 1 +struct ro_mapping { + char *filename; + unsigned char *map; + size_t len; +}; +static int ro_cache_init = 0; +static struct ro_mapping ro_mapping_cache[ROCACHE_SIZE]; + +/* find a temp file in the mapping cache. If nothing is found lasti is + * set to the next slot to use for insertion. You have to check that slot + * to see if it is currently in use + */ +static struct ro_mapping *find_ro_cache(const char *filename, int *lasti) +{ + int i = 0; + struct ro_mapping *ro = NULL; + if (lasti) + *lasti = 0; + if (!ro_cache_init) + return NULL; + for (i = 0 ; i < ROCACHE_SIZE ; i++) { + ro = ro_mapping_cache + i; + if (!ro->map) { + if (lasti) + *lasti = i; + return NULL; + } + if (strcmp(filename, ro->filename) == 0) + return ro; + } + /* if we're here, the map is full. They will reuse slot 0 */ + return NULL; +} + +/* + * put a new tempfile into the cache. It is mmaped as part of this function + * so you can safely close the file handle after calling this. + */ +static struct ro_mapping *add_ro_cache(const char *filename, int fd, size_t len) +{ + int i = 0; + struct ro_mapping *ro = NULL; + if (!ro_cache_init) { + memset(&ro_mapping_cache, 0, sizeof(ro_mapping_cache)); + ro_cache_init = 1; + } + ro = find_ro_cache(filename, &i); + if (ro) { + fprintf(stderr, "%s already in ro cache\n", filename); + return NULL; + } + ro = ro_mapping_cache + i; + if (ro->map) { + munmap(ro->map, ro->len); + ro->map = NULL; + free(ro->filename); + } + ro->map = (unsigned char *)mmap(0, len, PROT_READ, MAP_SHARED, fd, 0); + if (ro->map == MAP_FAILED) { + ro->map = NULL; + perror("rfc822:mmap"); + return NULL; + } + ro->len = len; + ro->filename = new_string(filename); + return ro; +} +#endif /* USE_GZIP_MBOX || USE_BZIP_MBOX */ + +void create_ro_mapping(const char *filename, unsigned char **data, int *len)/*{{{*/ +{ + struct stat sb; + int fd; + +#if USE_GZIP_MBOX || USE_BZIP_MBOX + struct zFile *zf; +#endif + + if (stat(filename, &sb) < 0) + { + report_error("stat", filename); + *data = NULL; + return; + } + +#if USE_GZIP_MBOX || USE_BZIP_MBOX + if(is_compressed(filename)) { + unsigned char *p; + size_t cur_read; + struct ro_mapping *ro; + FILE *tmpf; + + /* this branch never returns things that are freeable */ + data_alloc_type = ALLOC_NONE; + ro = find_ro_cache(filename, NULL); + if (ro) { + *data = ro->map; + *len = ro->len; + return; + } + + if(verbose) { + fprintf(stderr, "Decompressing %s...\n", filename); + } + + tmpf = tmpfile(); + if (!tmpf) { + perror("tmpfile"); + goto comp_error; + } + zf = xx_zopen(filename, "rb"); + if (!zf) { + fprintf(stderr, "Could not open %s\n", filename); + goto comp_error; + } + p = new_array(unsigned char, SIZE_STEP); + cur_read = xx_zread(zf, p, SIZE_STEP); + if (fwrite(p, cur_read, 1, tmpf) != 1) { + fprintf(stderr, "failed writing to temp file for %s\n", filename); + goto comp_error; + } + *len = cur_read; + if (cur_read >= SIZE_STEP) { + while(1) { + int ret; + cur_read = xx_zread(zf, p, SIZE_STEP); + if (cur_read <= 0) + break; + *len += cur_read; + ret = fwrite(p, cur_read, 1, tmpf); + if (ret != 1) { + fprintf(stderr, "failed writing to temp file for %s\n", filename); + goto comp_error; + } + } + } + free(p); + xx_zclose(zf); + + if(*len > 0) { + ro = add_ro_cache(filename, fileno(tmpf), *len); + if (!ro) + goto comp_error; + *data = ro->map; + *len = ro->len; + } else { + *data = NULL; + } + fclose(tmpf); + return; + +comp_error: + *data = NULL; + *len = 0; + if (tmpf) + fclose(tmpf); + return; + } +#endif /* USE_GZIP_MBOX || USE_BZIP_MBOX */ + + *len = sb.st_size; + if (*len == 0) { + *data = NULL; + return; + } + + if (!S_ISREG(sb.st_mode)) { + *data = NULL; + return; + } + + fd = open(filename, O_RDONLY); + if (fd < 0) + { + report_error("open", filename); + *data = NULL; + return; + } + + *data = (unsigned char *) mmap(0, *len, PROT_READ, MAP_SHARED, fd, 0); + if (close(fd) < 0) + report_error("close", filename); + if (*data == MAP_FAILED) { + report_error("rfc822:mmap", filename); + *data = NULL; + return; + } + data_alloc_type = ALLOC_MMAP; +} +/*}}}*/ +void free_ro_mapping(unsigned char *data, int len)/*{{{*/ +{ + int r; + + if(data_alloc_type == ALLOC_MALLOC) { + free(data); + } + + if(data_alloc_type == ALLOC_MMAP) { + r = munmap(data, len); + if(r < 0) { + fprintf(stderr, "munmap() errord\n"); + exit(1); + } + } +} +/*}}}*/ + +static struct msg_src *setup_msg_src(char *filename)/*{{{*/ +{ + static struct msg_src result; + result.type = MS_FILE; + result.filename = filename; + return &result; +} +/*}}}*/ +struct rfc822 *make_rfc822(char *filename)/*{{{*/ +{ + int len; + unsigned char *data; + struct rfc822 *result; + + create_ro_mapping(filename, &data, &len); + + /* Don't process empty files */ + result = NULL; + + if (data) + { + struct msg_src *src; + /* Now process the data */ + src = setup_msg_src(filename); + /* For one message per file, ignore missing end boundary condition. */ + result = data_to_rfc822(src, (char *) data, len, NULL); + + free_ro_mapping(data, len); + } + + return result; +} +/*}}}*/ +void free_rfc822(struct rfc822 *msg)/*{{{*/ +{ + struct attachment *a, *na; + + if (!msg) return; + + if (msg->hdrs.to) free(msg->hdrs.to); + if (msg->hdrs.cc) free(msg->hdrs.cc); + if (msg->hdrs.from) free(msg->hdrs.from); + if (msg->hdrs.subject) free(msg->hdrs.subject); + if (msg->hdrs.message_id) free(msg->hdrs.message_id); + if (msg->hdrs.in_reply_to) free(msg->hdrs.in_reply_to); + if (msg->hdrs.references) free(msg->hdrs.references); + + for (a = msg->atts.next; a != &msg->atts; a = na) { + na = a->next; + if (a->filename) free(a->filename); + if (a->ct == CT_MESSAGE_RFC822) { + free_rfc822(a->data.rfc822); + } else { + free(a->data.normal.bytes); + } + free(a); + } + free(msg); +} +/*}}}*/ + +#ifdef TEST + +static void do_indent(int indent)/*{{{*/ +{ + int i; + for (i=indent; i>0; i--) { + putchar(' '); + } +} +/*}}}*/ +static void show_header(char *tag, char *x, int indent)/*{{{*/ +{ + if (x) { + do_indent(indent); + printf("%s: %s\n", tag, x); + } +} +/*}}}*/ +static void show_rfc822(struct rfc822 *msg, int indent)/*{{{*/ +{ + struct attachment *a; + show_header("From", msg->hdrs.from, indent); + show_header("To", msg->hdrs.to, indent); + show_header("Cc", msg->hdrs.cc, indent); + show_header("Date", msg->hdrs.date, indent); + show_header("Subject", msg->hdrs.subject, indent); + + for (a = msg->atts.next; a != &msg->atts; a=a->next) { + printf("========================\n"); + switch (a->ct) { + case CT_TEXT_PLAIN: printf("Attachment type text/plain\n"); break; + case CT_TEXT_HTML: printf("Attachment type text/html\n"); break; + case CT_TEXT_OTHER: printf("Attachment type text/non-plain\n"); break; + case CT_MESSAGE_RFC822: printf("Attachment type message/rfc822\n"); break; + case CT_OTHER: printf("Attachment type other\n"); break; + } + if (a->ct != CT_MESSAGE_RFC822) { + printf("%d bytes\n", a->data.normal.len); + } + if ((a->ct == CT_TEXT_PLAIN) || (a->ct == CT_TEXT_HTML) || (a->ct == CT_TEXT_OTHER)) { + printf("----------\n"); + printf("%s\n", a->data.normal.bytes); + } + if (a->ct == CT_MESSAGE_RFC822) { + show_rfc822(a->data.rfc822, indent + 4); + } + } +} +/*}}}*/ + +int main (int argc, char **argv)/*{{{*/ +{ + struct rfc822 *msg; + + if (argc < 2) { + fprintf(stderr, "Need a path\n"); + unlock_and_exit(2); + } + + msg = make_rfc822(argv[1]); + show_rfc822(msg, 0); + free_rfc822(msg); + + /* Print out some stuff */ + + return 0; +} +/*}}}*/ +#endif /* TEST */ diff --git a/src/mairix/search.c b/src/mairix/search.c @@ -0,0 +1,1482 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <unistd.h> +#include <assert.h> +#include <dirent.h> +#include <errno.h> + +/* Lame fix for systems where NAME_MAX isn't defined after including the above + * set of .h files (Solaris, FreeBSD so far). Probably grossly oversized but + * it'll do. */ + +#if !defined(NAME_MAX) +#define NAME_MAX 4096 +#endif + +#include "mairix.h" +#include "reader.h" +#include "memmac.h" + +static void mark_hits_in_table(struct read_db *db, struct toktable_db *tt, int hit_tok, char *hits)/*{{{*/ +{ + /* mark files containing matched token */ + int idx; + unsigned char *j, *first_char; + idx = 0; + first_char = (unsigned char *) db->data + tt->enc_offsets[hit_tok]; + for (j = first_char; *j != 0xff; ) { + idx += read_increment(&j); + assert(idx < db->n_msgs); + hits[idx] = 1; + } +} +/*}}}*/ +static void mark_hits_in_table2(struct read_db *db, struct toktable2_db *tt, int hit_tok, char *hits)/*{{{*/ +{ + /* mark files containing matched token */ + int idx; + unsigned char *j, *first_char; + idx = 0; + first_char = (unsigned char *) db->data + tt->enc1_offsets[hit_tok]; + for (j = first_char; *j != 0xff; ) { + idx += read_increment(&j); + assert(idx < db->n_msgs); + hits[idx] = 1; + } +} +/*}}}*/ + +/* See "Fast text searching with errors, Sun Wu and Udi Manber, TR 91-11, + University of Arizona. I have been informed that this algorithm is NOT + patented. This implementation of it is entirely the work of Richard P. + Curnow - I haven't looked at any related source (webglimpse, agrep etc) in + writing this. +*/ +static void build_match_vector(char *substring, unsigned long *a, unsigned long *hit)/*{{{*/ +{ + int len; + char *p; + int i; + + len = strlen(substring); + if (len > 31 || len == 0) { + fprintf(stderr, "Can't match patterns longer than 31 characters or empty\n"); + unlock_and_exit(2); + } + memset(a, 0xff, 256 * sizeof(unsigned long)); + for (p=substring, i=0; *p; p++, i++) { + a[(unsigned int) *(unsigned char *)p] &= ~(1UL << i); + } + *hit = ~(1UL << (len-1)); + return; +} +/*}}}*/ +static int substring_match_0(unsigned long *a, unsigned long hit, int left_anchor, char *token)/*{{{*/ +{ + int got_hit=0; + char *p; + unsigned long r0; + unsigned long anchor, anchor1; + + r0 = ~0; + got_hit = 0; + anchor = 0; + anchor1 = left_anchor ? 0x1 : 0x0; + for(p=token; *p; p++) { + int idx = (unsigned int) *(unsigned char *)p; + r0 = (r0<<1) | anchor | a[idx]; + if (~(r0 | hit)) { + got_hit = 1; + break; + } + anchor = anchor1; + } + return got_hit; +} +/*}}}*/ +static int substring_match_1(unsigned long *a, unsigned long hit, int left_anchor, char *token)/*{{{*/ +{ + int got_hit=0; + char *p; + unsigned long r0, r1, nr0; + unsigned long anchor, anchor1; + + r0 = ~0; + r1 = r0<<1; + got_hit = 0; + anchor = 0; + anchor1 = left_anchor ? 0x1 : 0x0; + for(p=token; *p; p++) { + int idx = (unsigned int) *(unsigned char *)p; + nr0 = (r0<<1) | anchor | a[idx]; + r1 = ((r1<<1) | anchor | a[idx]) & ((r0 & nr0) << 1) & r0; + r0 = nr0; + if (~((r0 & r1) | hit)) { + got_hit = 1; + break; + } + anchor = anchor1; + } + return got_hit; +} +/*}}}*/ +static int substring_match_2(unsigned long *a, unsigned long hit, int left_anchor, char *token)/*{{{*/ +{ + int got_hit=0; + char *p; + unsigned long r0, r1, r2, nr0, nr1; + unsigned long anchor, anchor1; + + r0 = ~0; + r1 = r0<<1; + r2 = r1<<1; + got_hit = 0; + anchor = 0; + anchor1 = left_anchor ? 0x1 : 0x0; + for(p=token; *p; p++) { + int idx = (unsigned int) *(unsigned char *)p; + nr0 = (r0<<1) | anchor | a[idx]; + nr1 = ((r1<<1) | anchor | a[idx]) & ((r0 & nr0) << 1) & r0; + r2 = ((r2<<1) | anchor | a[idx]) & ((r1 & nr1) << 1) & r1; + r0 = nr0; + r1 = nr1; + if (~((r0 & r1 & r2) | hit)) { + got_hit = 1; + break; + } + anchor = anchor1; + } + return got_hit; +} +/*}}}*/ +static int substring_match_3(unsigned long *a, unsigned long hit, int left_anchor, char *token)/*{{{*/ +{ + int got_hit=0; + char *p; + unsigned long r0, r1, r2, r3, nr0, nr1, nr2; + unsigned long anchor, anchor1; + + r0 = ~0; + r1 = r0<<1; + r2 = r1<<1; + r3 = r2<<1; + got_hit = 0; + anchor = 0; + anchor1 = left_anchor ? 0x1 : 0x0; + for(p=token; *p; p++) { + int idx = (unsigned int) *(unsigned char *)p; + nr0 = (r0<<1) | anchor | a[idx]; + nr1 = ((r1<<1) | anchor | a[idx]) & ((r0 & nr0) << 1) & r0; + nr2 = ((r2<<1) | anchor | a[idx]) & ((r1 & nr1) << 1) & r1; + r3 = ((r3<<1) | anchor | a[idx]) & ((r2 & nr2) << 1) & r2; + r0 = nr0; + r1 = nr1; + r2 = nr2; + if (~((r0 & r1 & r2 & r3) | hit)) { + got_hit = 1; + break; + } + anchor = anchor1; + } + return got_hit; +} +/*}}}*/ +static int substring_match_general(unsigned long *a, unsigned long hit, int left_anchor, char *token, int max_errors, unsigned long *r, unsigned long *nr)/*{{{*/ +{ + int got_hit=0; + char *p; + int j; + unsigned long anchor, anchor1; + + r[0] = ~0; + anchor = 0; + anchor1 = left_anchor ? 0x1 : 0x0; + for (j=1; j<=max_errors; j++) { + r[j] = r[j-1] << 1; + } + got_hit = 0; + for(p=token; *p; p++) { + int idx = (unsigned int) *(unsigned char *)p; + int d; + unsigned int compo; + + compo = nr[0] = ((r[0]<<1) | anchor | a[idx]); + for (d=1; d<=max_errors; d++) { + nr[d] = ((r[d]<<1) | anchor | a[idx]) + & ((r[d-1] & nr[d-1])<<1) + & r[d-1]; + compo &= nr[d]; + } + memcpy(r, nr, (1 + max_errors) * sizeof(unsigned long)); + if (~(compo | hit)) { + got_hit = 1; + break; + } + anchor = anchor1; + } + return got_hit; +} +/*}}}*/ + +static void match_substring_in_table(struct read_db *db, struct toktable_db *tt, char *substring, int max_errors, int left_anchor, char *hits)/*{{{*/ +{ + + int i, got_hit; + unsigned long a[256]; + unsigned long *r=NULL, *nr=NULL; + unsigned long hit; + char *token; + + build_match_vector(substring, a, &hit); + + got_hit = 0; + if (max_errors > 3) { + r = new_array(unsigned long, 1 + max_errors); + nr = new_array(unsigned long, 1 + max_errors); + } + for (i=0; i<tt->n; i++) { + token = db->data + tt->tok_offsets[i]; + switch (max_errors) { + /* Optimise common cases for few errors to allow optimizer to keep bitmaps + * in registers */ + case 0: + got_hit = substring_match_0(a, hit, left_anchor, token); + break; + case 1: + got_hit = substring_match_1(a, hit, left_anchor, token); + break; + case 2: + got_hit = substring_match_2(a, hit, left_anchor, token); + break; + case 3: + got_hit = substring_match_3(a, hit, left_anchor, token); + break; + default: + got_hit = substring_match_general(a, hit, left_anchor, token, max_errors, r, nr); + break; + } + if (got_hit) { + mark_hits_in_table(db, tt, i, hits); + } + } + if (r) free(r); + if (nr) free(nr); +} +/*}}}*/ +static void match_substring_in_table2(struct read_db *db, struct toktable2_db *tt, char *substring, int max_errors, int left_anchor, char *hits)/*{{{*/ +{ + + int i, got_hit; + unsigned long a[256]; + unsigned long *r=NULL, *nr=NULL; + unsigned long hit; + char *token; + + build_match_vector(substring, a, &hit); + + got_hit = 0; + if (max_errors > 3) { + r = new_array(unsigned long, 1 + max_errors); + nr = new_array(unsigned long, 1 + max_errors); + } + for (i=0; i<tt->n; i++) { + token = db->data + tt->tok_offsets[i]; + switch (max_errors) { + /* Optimise common cases for few errors to allow optimizer to keep bitmaps + * in registers */ + case 0: + got_hit = substring_match_0(a, hit, left_anchor, token); + break; + case 1: + got_hit = substring_match_1(a, hit, left_anchor, token); + break; + case 2: + got_hit = substring_match_2(a, hit, left_anchor, token); + break; + case 3: + got_hit = substring_match_3(a, hit, left_anchor, token); + break; + default: + got_hit = substring_match_general(a, hit, left_anchor, token, max_errors, r, nr); + break; + } + if (got_hit) { + mark_hits_in_table2(db, tt, i, hits); + } + } + if (r) free(r); + if (nr) free(nr); +} +/*}}}*/ +static void match_substring_in_paths(struct read_db *db, char *substring, int max_errors, int left_anchor, char *hits)/*{{{*/ +{ + + int i; + unsigned long a[256]; + unsigned long *r=NULL, *nr=NULL; + unsigned long hit; + + build_match_vector(substring, a, &hit); + + if (max_errors > 3) { + r = new_array(unsigned long, 1 + max_errors); + nr = new_array(unsigned long, 1 + max_errors); + } + for (i=0; i<db->n_msgs; i++) { + char *token = NULL; + unsigned int mbix, msgix; + switch (rd_msg_type(db, i)) { + case DB_MSG_FILE: + token = db->data + db->path_offsets[i]; + break; + case DB_MSG_MBOX: + decode_mbox_indices(db->path_offsets[i], &mbix, &msgix); + token = db->data + db->mbox_paths_table[mbix]; + break; + case DB_MSG_DEAD: + hits[i] = 0; /* never match on dead paths */ + goto next_message; + } + + assert(token); + + switch (max_errors) { + /* Optimise common cases for few errors to allow optimizer to keep bitmaps + * in registers */ + case 0: + hits[i] = substring_match_0(a, hit, left_anchor, token); + break; + case 1: + hits[i] = substring_match_1(a, hit, left_anchor, token); + break; + case 2: + hits[i] = substring_match_2(a, hit, left_anchor, token); + break; + case 3: + hits[i] = substring_match_3(a, hit, left_anchor, token); + break; + default: + hits[i] = substring_match_general(a, hit, left_anchor, token, max_errors, r, nr); + break; + } +next_message: + (void) 0; + } + + if (r) free(r); + if (nr) free(nr); +} +/*}}}*/ +static void match_string_in_table(struct read_db *db, struct toktable_db *tt, char *key, char *hits)/*{{{*/ +{ + /* TODO : replace with binary search? */ + int i; + + for (i=0; i<tt->n; i++) { + if (!strcmp(key, db->data + tt->tok_offsets[i])) { + /* get all matching files */ + mark_hits_in_table(db, tt, i, hits); + } + } +} +/*}}}*/ +static void match_string_in_table2(struct read_db *db, struct toktable2_db *tt, char *key, char *hits)/*{{{*/ +{ + /* TODO : replace with binary search? */ + int i; + + for (i=0; i<tt->n; i++) { + if (!strcmp(key, db->data + tt->tok_offsets[i])) { + /* get all matching files */ + mark_hits_in_table2(db, tt, i, hits); + } + } +} +/*}}}*/ +static int parse_size_expr(char *x)/*{{{*/ +{ + int result; + int n; + + if (1 == sscanf(x, "%d%n", &result, &n)) { + x += n; + switch (*x) { + case 'k': + case 'K': + result <<= 10; + break; + case 'm': + case 'M': + result <<= 20; + break; + default: + break; + } + + return result; + } else { + fprintf(stderr, "Could not parse message size expression <%s>\n", x); + return -1; + } +} +/*}}}*/ +static void parse_size_range(char *size_expr, int *has_start, int *start, int *has_end, int *end)/*{{{*/ +{ + char *x = size_expr; + char *dash; + int len; + + if (*x == ':') x++; + len = strlen(x); + dash = strchr(x, '-'); + *has_start = *has_end = 0; + if (dash) { + char *p, *q; + if (dash > x) { + char *s; + s = new_array(char, dash - x + 1); + for (p=s, q=x; q<dash; ) *p++ = *q++; + *p = 0; + *start = parse_size_expr(s); + *has_start = 1; + free(s); + } + if (dash[1]) { /* dash not at end of arg */ + char *e; + e = new_array(char, (x + len) - dash); + for (p=e, q=dash+1; *q; ) *p++ = *q++; + *p = 0; + *end = parse_size_expr(e); + *has_end = 1; + free(e); + } + } else { + *has_start = 0; + *end = parse_size_expr(size_expr); + *has_end = 1; + } + return; +} +/*}}}*/ +static void find_size_matches_in_table(struct read_db *db, char *size_expr, char *hits)/*{{{*/ +{ + int start, end; + int has_start, has_end, start_cond, end_cond; + int i; + + start = end = -1; /* avoid compiler warning about uninitialised variables. */ + parse_size_range(size_expr, &has_start, &start, &has_end, &end); + if (has_start && has_end) { + /* Allow user to put the endpoints in backwards */ + if (start > end) { + int temp = start; + start = end; + end = temp; + } + } + + for (i=0; i<db->n_msgs; i++) { + start_cond = has_start ? (db->size_table[i] > start) : 1; + end_cond = has_end ? (db->size_table[i] < end ) : 1; + if (start_cond && end_cond) { + hits[i] = 1; + } + } +} +/*}}}*/ +static void find_date_matches_in_table(struct read_db *db, char *date_expr, char *hits)/*{{{*/ +{ + time_t start, end; + int has_start, has_end, start_cond, end_cond; + int i; + int status; + + status = scan_date_string(date_expr, &start, &has_start, &end, &has_end); + if (status) { + unlock_and_exit (2); + } + + if (has_start && has_end) { + /* Allow user to put the endpoints in backwards */ + if (start > end) { + time_t temp = start; + start = end; + end = temp; + } + } + + for (i=0; i<db->n_msgs; i++) { + start_cond = has_start ? (db->date_table[i] > start) : 1; + end_cond = has_end ? (db->date_table[i] < end ) : 1; + if (start_cond && end_cond) { + hits[i] = 1; + } + } +} +/*}}}*/ +static void find_flag_matches_in_table(struct read_db *db, char *flag_expr, char *hits)/*{{{*/ +{ + int pos_seen, neg_seen; + int pos_replied, neg_replied; + int pos_flagged, neg_flagged; + int negate; + char *p; + int i; + + negate = 0; + pos_seen = neg_seen = 0; + pos_replied = neg_replied = 0; + pos_flagged = neg_flagged = 0; + for (p=flag_expr; *p; p++) { + switch (*p) { + case '-': + negate = 1; + break; + case 's': + case 'S': + if (negate) neg_seen = 1; + else pos_seen = 1; + negate = 0; + break; + case 'r': + case 'R': + if (negate) neg_replied = 1; + else pos_replied = 1; + negate = 0; + break; + case 'f': + case 'F': + if (negate) neg_flagged = 1; + else pos_flagged = 1; + negate = 0; + break; + default: + fprintf(stderr, "Did not understand the character '%c' (0x%02x) in the flags argument F:%s\n", + isprint(*p) ? *p : '.', + (int) *(unsigned char *) p, + flag_expr); + break; + } + } + + for (i=0; i<db->n_msgs; i++) { + if ((!pos_seen || (db->msg_type_and_flags[i] & FLAG_SEEN)) && + (!neg_seen || !(db->msg_type_and_flags[i] & FLAG_SEEN)) && + (!pos_replied || (db->msg_type_and_flags[i] & FLAG_REPLIED)) && + (!neg_replied || !(db->msg_type_and_flags[i] & FLAG_REPLIED)) && + (!pos_flagged || (db->msg_type_and_flags[i] & FLAG_FLAGGED)) && + (!neg_flagged || !(db->msg_type_and_flags[i] & FLAG_FLAGGED))) { + hits[i] = 1; + } + } +} +/*}}}*/ + +static char *mk_maildir_path(int token, char *output_dir, int is_in_new, + int is_seen, int is_replied, int is_flagged)/*{{{*/ +{ + char *result; + char uniq_buf[48]; + int len; + + len = strlen(output_dir) + 64; /* oversize */ + result = new_array(char, len + 1 + sizeof(":2,FRS")); + strcpy(result, output_dir); + strcat(result, is_in_new ? "/new/" : "/cur/"); + sprintf(uniq_buf, "123456789.%d.mairix", token); + strcat(result, uniq_buf); + if (is_seen || is_replied || is_flagged) { + strcat(result, ":2,"); + } + if (is_flagged) strcat(result, "F"); + if (is_replied) strcat(result, "R"); + if (is_seen) strcat(result, "S"); + return result; +} +/*}}}*/ +static char *mk_mh_path(int token, char *output_dir)/*{{{*/ +{ + char *result; + char uniq_buf[8]; + int len; + + len = strlen(output_dir) + 10; /* oversize */ + result = new_array(char, len); + strcpy(result, output_dir); + strcat(result, "/"); + sprintf(uniq_buf, "%d", token+1); + strcat(result, uniq_buf); + return result; +} +/*}}}*/ +static int looks_like_maildir_new_p(const char *p)/*{{{*/ +{ + const char *s1, *s2; + s2 = p; + while (*s2) s2++; + while ((s2 > p) && (*s2 != '/')) s2--; + if (s2 <= p) return 0; + s1 = s2 - 1; + while ((s1 > p) && (*s1 != '/')) s1--; + if (s1 <= p) return 0; + if (!strncmp(s1, "/new/", 5)) { + return 1; + } else { + return 0; + } +} +/*}}}*/ +static void create_symlink(char *link_target, char *new_link)/*{{{*/ +{ + if ((!do_hardlinks && symlink(link_target, new_link) < 0) || link(link_target, new_link)) { + if (verbose) { + perror("symlink"); + fprintf(stderr, "Failed path <%s> -> <%s>\n", link_target, new_link); + } + } +} +/*}}}*/ +static void mbox_terminate(const unsigned char *data, int len, FILE *out)/*{{{*/ +{ + if (len == 0) + fputs("\n", out); + else if (len == 1) { + if (data[0] != '\n') + fputs("\n", out); + } + else if (data[len-1] != '\n') + fputs("\n\n", out); + else if (data[len-2] != '\n') + fputs("\n", out); +} +/*}}}*/ +static void append_file_to_mbox(const char *path, FILE *out)/*{{{*/ +{ + unsigned char *data; + int len; + create_ro_mapping(path, &data, &len); + if (data) { + fprintf(out, "From mairix@mairix Mon Jan 1 12:34:56 1970\n"); + fprintf(out, "X-source-folder: %s\n", path); + fwrite (data, sizeof(unsigned char), len, out); + mbox_terminate(data, len, out); + free_ro_mapping(data, len); + } + return; +} +/*}}}*/ + +static int had_failed_checksum; + +static void get_validated_mbox_msg(struct read_db *db, int msg_index,/*{{{*/ + int *mbox_index, + unsigned char **mbox_data, int *mbox_len, + unsigned char **msg_data, int *msg_len) +{ + /* msg_data==NULL if checksum mismatches */ + unsigned char *start; + checksum_t csum; + unsigned int mbi, msgi; + + *msg_data = NULL; + *msg_len = 0; + + decode_mbox_indices(db->path_offsets[msg_index], &mbi, &msgi); + *mbox_index = mbi; + + create_ro_mapping(db->data + db->mbox_paths_table[mbi], mbox_data, mbox_len); + if (!*mbox_data) return; + + start = *mbox_data + db->mtime_table[msg_index]; + + /* Ensure that we don't run off the end of the mmap'd file */ + if (db->mtime_table[msg_index] >= *mbox_len) + *msg_len = 0; + else if (db->mtime_table[msg_index] + db->size_table[msg_index] >= *mbox_len) + *msg_len = *mbox_len - db->mtime_table[msg_index]; + else + *msg_len = db->size_table[msg_index]; + + compute_checksum((char *)start, *msg_len, &csum); + if (!memcmp((db->data + db->mbox_checksum_table[mbi] + (msgi * sizeof(checksum_t))), &csum, sizeof(checksum_t))) { + *msg_data = start; + } else { + had_failed_checksum = 1; + } + return; +} +/*}}}*/ +static void append_mboxmsg_to_mbox(struct read_db *db, int msg_index, FILE *out)/*{{{*/ +{ + /* Need to common up code with try_copy_to_path */ + unsigned char *mbox_start, *msg_start; + int mbox_len, msg_len; + int mbox_index; + + get_validated_mbox_msg(db, msg_index, &mbox_index, &mbox_start, &mbox_len, &msg_start, &msg_len); + if (msg_start) { + /* Artificial from line, we don't have the envelope sender so this is + going to be artificial anyway. */ + fprintf(out, "From mairix@mairix Mon Jan 1 12:34:56 1970\n"); + fprintf(out, "X-source-folder: %s\n", + db->data + db->mbox_paths_table[mbox_index]); + fwrite(msg_start, sizeof(unsigned char), msg_len, out); + mbox_terminate(msg_start, msg_len, out); + } + if (mbox_start) { + free_ro_mapping(mbox_start, mbox_len); + } +} +/*}}}*/ +static void try_copy_to_path(struct read_db *db, int msg_index, char *target_path)/*{{{*/ +{ + unsigned char *data; + int mbox_len, msg_len; + int mbi; + FILE *out; + unsigned char *start; + + get_validated_mbox_msg(db, msg_index, &mbi, &data, &mbox_len, &start, &msg_len); + + if (start) { + out = fopen(target_path, "wb"); + if (out) { + fprintf(out, "X-source-folder: %s\n", + db->data + db->mbox_paths_table[mbi]); + fwrite(start, sizeof(char), msg_len?msg_len-1:0, out); + fclose(out); + } + } + + if (data) { + free_ro_mapping(data, mbox_len); + } + return; +} +/*}}}*/ +static struct msg_src *setup_mbox_msg_src(char *filename, off_t start, size_t len)/*{{{*/ +{ + static struct msg_src result; + result.type = MS_MBOX; + result.filename = filename; + result.start = start; + result.len = len; + return &result; +} +/*}}}*/ + +static void get_flags_from_file(struct read_db *db, int idx, int *is_seen, int *is_replied, int *is_flagged) +{ + *is_seen = (db->msg_type_and_flags[idx] & FLAG_SEEN) ? 1 : 0; + *is_replied = (db->msg_type_and_flags[idx] & FLAG_REPLIED) ? 1 : 0; + *is_flagged = (db->msg_type_and_flags[idx] & FLAG_FLAGGED) ? 1 : 0; +} + +static void string_tolower(char *str) +{ + char *p; + for (p=str; *p; p++) { + *p = tolower(*(unsigned char *)p); + } +} + +static int do_search(struct read_db *db, char **args, char *output_path, int show_threads, enum folder_type ft, int verbose)/*{{{*/ +{ + char *colon, *start_words; + int do_body, do_subject, do_from, do_to, do_cc, do_date, do_size; + int do_att_name; + int do_flags; + int do_path, do_msgid; + char *key; + char *hit0, *hit1, *hit2, *hit3; + int i; + int n_hits; + int left_anchor; + + had_failed_checksum = 0; + + hit0 = new_array(char, db->n_msgs); + hit1 = new_array(char, db->n_msgs); + hit2 = new_array(char, db->n_msgs); + hit3 = new_array(char, db->n_msgs); + + /* Argument structure is + * x:tokena+tokenb,~tokenc,tokend+tokene + * + * + (and) binds more tightly than , + * , (or) binds more tightly than separate args + * + * + * hit1 gathers the tokens and'ed with + + * hit2 gathers the tokens or'ed with , + * hit3 gathers the separate args and'ed with <gap> + * */ + + + /* Everything matches until proven otherwise */ + memset(hit3, 1, db->n_msgs); + + while (*args) { + /* key is a single argument, separate args are and-ed together */ + key = *args++; + + memset(hit2, 0, db->n_msgs); + memset(hit1, 1, db->n_msgs); + + do_to = 0; + do_cc = 0; + do_from = 0; + do_subject = 0; + do_body = 0; + do_date = 0; + do_size = 0; + do_path = 0; + do_msgid = 0; + do_att_name = 0; + do_flags = 0; + + colon = strchr(key, ':'); + + if (colon) { + char *p; + for (p=key; p<colon; p++) { + switch(*p) { + case 'b': do_body = 1; break; + case 's': do_subject = 1; break; + case 't': do_to = 1; break; + case 'c': do_cc = 1; break; + case 'f': do_from = 1; break; + case 'r': do_to = do_cc = 1; break; + case 'a': do_to = do_cc = do_from = 1; break; + case 'd': do_date = 1; break; + case 'z': do_size = 1; break; + case 'p': do_path = 1; break; + case 'm': do_msgid = 1; break; + case 'n': do_att_name = 1; break; + case 'F': do_flags = 1; break; + default: fprintf(stderr, "Unknown key type <%c>\n", *p); break; + } + } + if (do_msgid && (p-key) > 1) { + fprintf(stderr, "Message-ID key <m> can't be used with other keys\n"); + unlock_and_exit(2); + } + start_words = 1 + colon; + } else { + do_body = do_subject = do_to = do_cc = do_from = 1; + start_words = key; + } + + if (do_date || do_size || do_flags) { + memset(hit0, 0, db->n_msgs); + if (do_date) { + find_date_matches_in_table(db, start_words, hit0); + } else if (do_size) { + find_size_matches_in_table(db, start_words, hit0); + } else if (do_flags) { + find_flag_matches_in_table(db, start_words, hit0); + } + + /* AND-combine match vectors */ + for (i=0; i<db->n_msgs; i++) { + hit1[i] &= hit0[i]; + } + } else if (do_msgid) { + char *lower_word = new_string(start_words); + string_tolower(lower_word); + memset(hit0, 0, db->n_msgs); + match_string_in_table2(db, &db->msg_ids, lower_word, hit0); + free(lower_word); + /* AND-combine match vectors */ + for (i=0; i<db->n_msgs; i++) { + hit1[i] &= hit0[i]; + } + } else { +/*{{{ Scan over separate words within this argument */ + + do { + /* / = 'or' separator + * , = 'and' separator */ + char *orsep; + char *andsep; + char *word, *orig_word, *lower_word; + char *equal; + char *p; + int negate; + int had_orsep; + int max_errors; + + orsep = strchr(start_words, '/'); + andsep = strchr(start_words, ','); + had_orsep = 0; + + if (andsep && (!orsep || (andsep < orsep))) { + char *p, *q; + word = new_array(char, 1 + (andsep - start_words)); /* maybe oversize */ + for (p=word, q=start_words; q < andsep; q++) { + if (!isspace(*(unsigned char *)q)) { + *p++ = *q; + } + } + *p = 0; + start_words = andsep + 1; + } else if (orsep) { /* comes before + if there's a + */ + char *p, *q; + word = new_array(char, 1 + (orsep - start_words)); /* maybe oversize */ + for (p=word, q=start_words; q < orsep; q++) { + if (!isspace(*(unsigned char *)q)) { + *p++ = *q; + } + } + *p = 0; + start_words = orsep + 1; + had_orsep = 1; + + } else { + word = new_string(start_words); + while (*start_words) ++start_words; + } + + orig_word = word; + + if (word[0] == '~') { + negate = 1; + word++; + } else { + negate = 0; + } + + if (word[0] == '^') { + left_anchor = 1; + word++; + } else { + left_anchor = 0; + } + + equal = strchr(word, '='); + if (equal) { + *equal = 0; + max_errors = atoi(equal + 1); + /* Extend this to do anchoring etc */ + } else { + max_errors = 0; /* keep GCC quiet */ + } + + /* Canonicalise search string to lowercase, since the database has all + * tokens handled that way. But not for path search! */ + lower_word = new_string(word); + string_tolower(lower_word); + + memset(hit0, 0, db->n_msgs); + if (equal) { + if (do_to) match_substring_in_table(db, &db->to, lower_word, max_errors, left_anchor, hit0); + if (do_cc) match_substring_in_table(db, &db->cc, lower_word, max_errors, left_anchor, hit0); + if (do_from) match_substring_in_table(db, &db->from, lower_word, max_errors, left_anchor, hit0); + if (do_subject) match_substring_in_table(db, &db->subject, lower_word, max_errors, left_anchor, hit0); + if (do_body) match_substring_in_table(db, &db->body, lower_word, max_errors, left_anchor, hit0); + if (do_att_name) match_substring_in_table(db, &db->attachment_name, lower_word, max_errors, left_anchor, hit0); + if (do_path) match_substring_in_paths(db, word, max_errors, left_anchor, hit0); + } else { + if (do_to) match_string_in_table(db, &db->to, lower_word, hit0); + if (do_cc) match_string_in_table(db, &db->cc, lower_word, hit0); + if (do_from) match_string_in_table(db, &db->from, lower_word, hit0); + if (do_subject) match_string_in_table(db, &db->subject, lower_word, hit0); + if (do_body) match_string_in_table(db, &db->body, lower_word, hit0); + if (do_att_name) match_string_in_table(db, &db->attachment_name, lower_word, hit0); + /* FIXME */ + if (do_path) match_substring_in_paths(db, word, 0, left_anchor, hit0); + } + + free(lower_word); + + /* AND-combine match vectors */ + for (i=0; i<db->n_msgs; i++) { + if (negate) { + hit1[i] &= !hit0[i]; + } else { + hit1[i] &= hit0[i]; + } + } + + if (had_orsep) { + /* OR-combine match vectors */ + for (i=0; i<db->n_msgs; i++) { + hit2[i] |= hit1[i]; + } + memset(hit1, 1, db->n_msgs); + } + + free(orig_word); + + } while (*start_words); +/*}}}*/ + } + + /* OR-combine match vectors */ + for (i=0; i<db->n_msgs; i++) { + hit2[i] |= hit1[i]; + } + + /* AND-combine match vectors */ + for (i=0; i<db->n_msgs; i++) { + hit3[i] &= hit2[i]; + } + } + + n_hits = 0; + + if (show_threads) {/*{{{*/ + char *tids; + tids = new_array(char, db->n_msgs); + memset(tids, 0, db->n_msgs); + for (i=0; i<db->n_msgs; i++) { + if (hit3[i]) { + tids[db->tid_table[i]] = 1; + } + } + for (i=0; i<db->n_msgs; i++) { + if (tids[db->tid_table[i]]) { + hit3[i] = 1; + } + } + free(tids); + } +/*}}}*/ + switch (ft) { + case FT_MAILDIR:/*{{{*/ + for (i=0; i<db->n_msgs; i++) { + if (hit3[i]) { + int is_seen, is_replied, is_flagged; + get_flags_from_file(db, i, &is_seen, &is_replied, &is_flagged); + switch (rd_msg_type(db, i)) { + case DB_MSG_FILE: + { + char *target_path; + char *message_path; + int is_in_new; + message_path = db->data + db->path_offsets[i]; + is_in_new = looks_like_maildir_new_p(message_path); + target_path = mk_maildir_path(i, output_path, is_in_new, is_seen, is_replied, is_flagged); + create_symlink(message_path, target_path); + free(target_path); + ++n_hits; + } + break; + case DB_MSG_MBOX: + { + char *target_path = mk_maildir_path(i, output_path, !is_seen, is_seen, is_replied, is_flagged); + try_copy_to_path(db, i, target_path); + free(target_path); + ++n_hits; + } + break; + case DB_MSG_DEAD: + break; + } + } + } + break; +/*}}}*/ + case FT_MH:/*{{{*/ + for (i=0; i<db->n_msgs; i++) { + if (hit3[i]) { + switch (rd_msg_type(db, i)) { + case DB_MSG_FILE: + { + char *target_path = mk_mh_path(i, output_path); + create_symlink(db->data + db->path_offsets[i], target_path); + free(target_path); + ++n_hits; + } + break; + case DB_MSG_MBOX: + { + char *target_path = mk_mh_path(i, output_path); + try_copy_to_path(db, i, target_path); + free(target_path); + ++n_hits; + } + break; + case DB_MSG_DEAD: + break; + } + } + } + break; +/*}}}*/ + case FT_MBOX:/*{{{*/ + { + FILE *out; + out = fopen(output_path, "ab"); + if (!out) { + fprintf(stderr, "Cannot open output folder %s\n", output_path); + unlock_and_exit(1); + } + + for (i=0; i<db->n_msgs; i++) { + if (hit3[i]) { + switch (rd_msg_type(db, i)) { + case DB_MSG_FILE: + { + append_file_to_mbox(db->data + db->path_offsets[i], out); + ++n_hits; + } + break; + case DB_MSG_MBOX: + { + append_mboxmsg_to_mbox(db, i, out); + ++n_hits; + } + break; + case DB_MSG_DEAD: + break; + } + } + } + fclose(out); + } + + break; +/*}}}*/ + case FT_RAW:/*{{{*/ + for (i=0; i<db->n_msgs; i++) { + if (hit3[i]) { + switch (rd_msg_type(db, i)) { + case DB_MSG_FILE: + { + ++n_hits; + printf("%s\n", db->data + db->path_offsets[i]); + } + break; + case DB_MSG_MBOX: + { + unsigned int mbix, msgix; + int start, len, after_end; + start = db->mtime_table[i]; + len = db->size_table[i]; + after_end = start + len; + ++n_hits; + decode_mbox_indices(db->path_offsets[i], &mbix, &msgix); + printf("mbox:%s [%d,%d)\n", db->data + db->mbox_paths_table[mbix], start, after_end); + } + break; + case DB_MSG_DEAD: + break; + } + } + } + break; +/*}}}*/ + case FT_EXCERPT:/*{{{*/ + for (i=0; i<db->n_msgs; i++) { + if (hit3[i]) { + struct rfc822 *parsed = NULL; + switch (rd_msg_type(db, i)) { + case DB_MSG_FILE: + { + char *filename; + ++n_hits; + printf("---------------------------------\n"); + filename = db->data + db->path_offsets[i]; + printf("%s\n", filename); + parsed = make_rfc822(filename); + } + break; + case DB_MSG_MBOX: + { + unsigned int mbix, msgix; + int start, len, after_end; + unsigned char *mbox_start, *msg_start; + int mbox_len, msg_len; + int mbox_index; + + start = db->mtime_table[i]; + len = db->size_table[i]; + after_end = start + len; + ++n_hits; + printf("---------------------------------\n"); + decode_mbox_indices(db->path_offsets[i], &mbix, &msgix); + printf("mbox:%s [%d,%d)\n", db->data + db->mbox_paths_table[mbix], start, after_end); + + get_validated_mbox_msg(db, i, &mbox_index, &mbox_start, &mbox_len, &msg_start, &msg_len); + if (msg_start) { + enum data_to_rfc822_error error; + struct msg_src *msg_src; + msg_src = setup_mbox_msg_src(db->data + db->mbox_paths_table[mbix], start, msg_len); + parsed = data_to_rfc822(msg_src, (char *) msg_start, msg_len, &error); + } + if (mbox_start) { + free_ro_mapping(mbox_start, mbox_len); + } + } + break; + case DB_MSG_DEAD: + break; + } + + if (parsed) { + char datebuf[64]; + struct tm *thetm; + if (parsed->hdrs.to) printf(" To: %s\n", parsed->hdrs.to); + if (parsed->hdrs.cc) printf(" Cc: %s\n", parsed->hdrs.cc); + if (parsed->hdrs.from) printf(" From: %s\n", parsed->hdrs.from); + if (parsed->hdrs.subject) printf(" Subject: %s\n", parsed->hdrs.subject); + if (parsed->hdrs.message_id) + printf(" Message-ID: %s\n", parsed->hdrs.message_id); + thetm = gmtime(&parsed->hdrs.date); + strftime(datebuf, sizeof(datebuf), "%a, %d %b %Y", thetm); + printf(" Date: %s\n", datebuf); + free_rfc822(parsed); + } + } + } + break; +/*}}}*/ + default: + assert(0); + break; + } + + free(hit0); + free(hit1); + free(hit2); + free(hit3); + if ((ft != FT_RAW) && (ft != FT_EXCERPT)) { + printf("Matched %d messages\n", n_hits); + } + fflush(stdout); + + if (had_failed_checksum) { + fprintf(stderr, + "WARNING : \n" + "Matches were found in mbox folders but the message checksums failed.\n" + "You may need to run mairix in indexing mode then repeat your search.\n"); + } + + /* Return error code 1 to the shell if no messages were matched. */ + return (n_hits == 0) ? 1 : 0; +} +/*}}}*/ + +static int directory_exists_remove_other(char *name)/*{{{*/ +{ + struct stat sb; + + if (stat(name, &sb) < 0) { + return 0; + } + if (S_ISDIR(sb.st_mode)) { + return 1; + } else { + /* Try to remove. */ + unlink(name); + return 0; + } +} +/*}}}*/ +static void create_dir(char *path)/*{{{*/ +{ + if (mkdir(path, 0700) < 0) { + fprintf(stderr, "Could not create directory %s\n", path); + unlock_and_exit(2); + } + fprintf(stderr, "Created directory %s\n", path); + return; +} +/*}}}*/ +static void maybe_create_maildir(char *path)/*{{{*/ +{ + char *subdir, *tailpos; + int len; + + if (!directory_exists_remove_other(path)) { + create_dir(path); + } + + len = strlen(path); + subdir = new_array(char, len + 5); + strcpy(subdir, path); + strcpy(subdir+len, "/"); + tailpos = subdir + len + 1; + + strcpy(tailpos,"cur"); + if (!directory_exists_remove_other(subdir)) { + create_dir(subdir); + } + strcpy(tailpos,"new"); + if (!directory_exists_remove_other(subdir)) { + create_dir(subdir); + } + strcpy(tailpos,"tmp"); + if (!directory_exists_remove_other(subdir)) { + create_dir(subdir); + } + free(subdir); + return; +} +/*}}}*/ +static void clear_maildir_subfolder(char *path, char *subdir)/*{{{*/ +{ + char *sdir; + char *fpath; + int len; + DIR *d; + struct dirent *de; + struct stat sb; + + len = strlen(path) + strlen(subdir); + + sdir = new_array(char, len + 2); + fpath = new_array(char, len + 3 + NAME_MAX); + strcpy(sdir, path); + strcat(sdir, "/"); + strcat(sdir, subdir); + + d = opendir(sdir); + if (d) { + while ((de = readdir(d))) { + strcpy(fpath, sdir); + strcat(fpath, "/"); + strcat(fpath, de->d_name); + if (lstat(fpath, &sb) >= 0) { + /* Deal with both symlinks to maildir/MH messages as well as real files + * where mbox messages have been written. */ + if (S_ISLNK(sb.st_mode) || S_ISREG(sb.st_mode)) { + /* FIXME : Can you unlink from a directory while doing a readdir loop over it? */ + if (unlink(fpath) < 0) { + fprintf(stderr, "Unlinking %s failed\n", fpath); + } + } + } + } + closedir(d); + } + + free(fpath); + free(sdir); +} +/*}}}*/ +static void clear_mh_folder(char *path)/*{{{*/ +{ + char *fpath; + int len; + DIR *d; + struct dirent *de; + struct stat sb; + + len = strlen(path); + + fpath = new_array(char, len + 3 + NAME_MAX); + + d = opendir(path); + if (d) { + while ((de = readdir(d))) { + if (valid_mh_filename_p(de->d_name)) { + strcpy(fpath, path); + strcat(fpath, "/"); + strcat(fpath, de->d_name); + if (lstat(fpath, &sb) >= 0) { + /* See under maildir above for explanation */ + if (S_ISLNK(sb.st_mode) || S_ISREG(sb.st_mode)) { + /* FIXME : Can you unlink from a directory while doing a readdir loop over it? */ + if (unlink(fpath) < 0) { + fprintf(stderr, "Unlinking %s failed\n", fpath); + } + } + } + } + } + closedir(d); + } + + free(fpath); +} +/*}}}*/ +static void clear_mbox_folder(char *path)/*{{{*/ +{ + unlink(path); +} +/*}}}*/ + +int search_top(int do_threads, int do_augment, char *database_path, char *complete_mfolder, char **argv, enum folder_type ft, int verbose)/*{{{*/ +{ + struct read_db *db; + int result; + + db = open_db(database_path); + + switch (ft) { + case FT_MAILDIR: + maybe_create_maildir(complete_mfolder); + break; + case FT_MH: + if (!directory_exists_remove_other(complete_mfolder)) { + create_dir(complete_mfolder); + } + break; + case FT_MBOX: + /* Nothing to do */ + break; + case FT_RAW: + case FT_EXCERPT: + break; + default: + assert(0); + } + + if (!do_augment) { + switch (ft) { + case FT_MAILDIR: + clear_maildir_subfolder(complete_mfolder, "new"); + clear_maildir_subfolder(complete_mfolder, "cur"); + break; + case FT_MH: + clear_mh_folder(complete_mfolder); + break; + case FT_MBOX: + clear_mbox_folder(complete_mfolder); + break; + case FT_RAW: + case FT_EXCERPT: + break; + default: + assert(0); + } + } + + result = do_search(db, argv, complete_mfolder, do_threads, ft, verbose); + free(complete_mfolder); + close_db(db); + return result; +} +/*}}}*/ + + diff --git a/src/mairix/stats.c b/src/mairix/stats.c @@ -0,0 +1,128 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002-2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +#include "mairix.h" +#include "memmac.h" +#include "reader.h" + +static void do_toktable(struct toktable *x, int *lc, int *elc, int *ec, int size, int *ml, int *mel, int *me) +{ + int i; + for (i=0; i<x->size; i++) { + struct token *tok = x->tokens[i]; + unsigned char *j, *last_char; + int incr; + + if (tok) { + int len = strlen(tok->text); + if (len > size) { + fprintf(stderr, "Token length %d exceeds size\n", len); + } else { + lc[len]++; + if (len > *ml) *ml = len; + } + + /* Deal with encoding length */ + if (tok->match0.n > size) { + fprintf(stderr, "Token encoding length %d exceeds size\n", tok->match0.n); + } else { + elc[tok->match0.n]++; + if (tok->match0.n > *mel) *mel = tok->match0.n; + } + + /* Deal with encoding */ + j = tok->match0.msginfo; + last_char = j + tok->match0.n; + while (j < last_char) { + incr = read_increment(&j); + if (incr > size) { + fprintf(stderr, "Encoding increment %d exceeds size\n", incr); + } else { + ec[incr]++; + if (incr > *me) *me = incr; + } + } + } + } +} + +void print_table(int *x, int max) { + int total, sum; + int i; + int kk, kk1; + + total = 0; + for (i = 0; i<=max; i++) { + total += x[i]; + } + sum = 0; + kk1 = 0; + for (i = 0; i<=max; i++) { + sum += x[i]; + kk = (int)((double)sum*256.0/(double)total); + printf("%5d : %5d %3d %3d\n", i, x[i], kk-kk1, kk); + kk1 = kk; + } +} + +void get_db_stats(struct database *db) +{ + /* Deal with paths later - problem is, they will be biased by length of folder_base at the moment. */ + + int size = 4096; + int *len_counts, *enc_len_counts, *enc_counts; + int max_len, max_enc_len, max_enc; + + max_len = 0; + max_enc_len = 0; + max_enc = 0; + + len_counts = new_array(int, size); + memset(len_counts, 0, size * sizeof(int)); + enc_len_counts = new_array(int, size); + memset(enc_len_counts, 0, size * sizeof(int)); + enc_counts = new_array(int, size); + memset(enc_counts, 0, size * sizeof(int)); + + do_toktable(db->to, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc); + do_toktable(db->cc, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc); + do_toktable(db->from, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc); + do_toktable(db->subject, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc); + do_toktable(db->body, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc); +#if 0 + /* no longer works now that the msg_ids table has 2 encoding chains. fix + * this when required. */ + do_toktable(db->msg_ids, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc); +#endif + + printf("Max token length : %d\n", max_len); + print_table(len_counts, max_len); + + printf("Max encoding vector length : %d\n", max_enc_len); + print_table(enc_len_counts, max_enc_len); + + printf("Max encoding increment : %d\n", max_enc); + print_table(enc_counts, max_enc); + + return; +} + diff --git a/src/mairix/tok.c b/src/mairix/tok.c @@ -0,0 +1,344 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002-2004, 2005 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +/* Functions for handling tokens */ + +#include <assert.h> +#include <ctype.h> +#include "mairix.h" + +static void init_matches(struct matches *m) {/*{{{*/ + m->msginfo = NULL; + m->n = 0; + m->max = 0; + m->highest = 0; +} +/*}}}*/ +struct token *new_token(void)/*{{{*/ +{ + struct token *result = new(struct token); + result->text = NULL; + init_matches(&result->match0); + return result; +} +/*}}}*/ +struct token2 *new_token2(void)/*{{{*/ +{ + struct token2 *result = new(struct token2); + result->text = NULL; + init_matches(&result->match0); + init_matches(&result->match1); + return result; +} +/*}}}*/ +void free_token(struct token *x)/*{{{*/ +{ + if (x->text) free(x->text); + if (x->match0.msginfo) free(x->match0.msginfo); + free(x); +} +/*}}}*/ +void free_token2(struct token2 *x)/*{{{*/ +{ + if (x->text) free(x->text); + if (x->match0.msginfo) free(x->match0.msginfo); + if (x->match1.msginfo) free(x->match1.msginfo); + free(x); +} +/*}}}*/ +struct toktable *new_toktable(void)/*{{{*/ +{ + struct toktable *result = new(struct toktable); + result->tokens = NULL; + result->n = 0; + result->hwm = 0; + result->size = 0; + return result; +} +/*}}}*/ +struct toktable2 *new_toktable2(void)/*{{{*/ +{ + struct toktable2 *result = new(struct toktable2); + result->tokens = NULL; + result->n = 0; + result->hwm = 0; + result->size = 0; + return result; +} +/*}}}*/ +void free_toktable(struct toktable *x)/*{{{*/ +{ + if (x->tokens) { + int i; + for (i=0; i<x->size; i++) { + if (x->tokens[i]) { + free_token(x->tokens[i]); + } + } + free(x->tokens); + } + free(x); +} +/*}}}*/ +void free_toktable2(struct toktable2 *x)/*{{{*/ +{ + if (x->tokens) { + int i; + for (i=0; i<x->size; i++) { + if (x->tokens[i]) { + free_token2(x->tokens[i]); + } + } + free(x->tokens); + } + free(x); +} +/*}}}*/ +/* FIXME : This stuff really needs cleaning up. */ +static void enlarge_toktable(struct toktable *table)/*{{{*/ +{ + if (table->size == 0) { + int i; + /* initial allocation */ + table->size = 1024; + table->mask = table->size - 1; + table->tokens = new_array(struct token *, table->size); + for (i=0; i<table->size; i++) { + table->tokens[i] = NULL; + } + } else { + struct token **old_tokens; + int old_size = table->size; + int i; + /* reallocate */ + old_tokens = table->tokens; + table->size <<= 1; + table->mask = table->size - 1; + table->tokens = new_array(struct token *, table->size); + for (i=0; i<table->size; i++) { + table->tokens[i] = NULL; + } + for (i=0; i<old_size; i++) { + unsigned long new_index; + if (old_tokens[i]) { + new_index = old_tokens[i]->hashval & table->mask; + while (table->tokens[new_index]) { + new_index++; + new_index &= table->mask; + } + table->tokens[new_index] = old_tokens[i]; + } + } + free(old_tokens); + } + table->hwm = (table->size >> 2) + (table->size >> 3); /* allow 3/8 of nodes to be used */ +} +/*}}}*/ +static void enlarge_toktable2(struct toktable2 *table)/*{{{*/ +{ + if (table->size == 0) { + int i; + /* initial allocation */ + table->size = 1024; + table->mask = table->size - 1; + table->tokens = new_array(struct token2 *, table->size); + for (i=0; i<table->size; i++) { + table->tokens[i] = NULL; + } + } else { + struct token2 **old_tokens; + int old_size = table->size; + int i; + /* reallocate */ + old_tokens = table->tokens; + table->size <<= 1; + table->mask = table->size - 1; + table->tokens = new_array(struct token2 *, table->size); + for (i=0; i<table->size; i++) { + table->tokens[i] = NULL; + } + for (i=0; i<old_size; i++) { + unsigned long new_index; + if (old_tokens[i]) { + new_index = old_tokens[i]->hashval & table->mask; + while (table->tokens[new_index]) { + new_index++; + new_index &= table->mask; + } + table->tokens[new_index] = old_tokens[i]; + } + } + free(old_tokens); + } + table->hwm = (table->size >> 2) + (table->size >> 3); /* allow 3/8 of nodes to be used */ +} +/*}}}*/ +static int insert_value(unsigned char *x, int val)/*{{{*/ +{ + assert(val >= 0); + if (val <= 127) { + *x = val; + return 1; + } else if (val <= 16383) { + *x++ = (val >> 8) | 0x80; + *x = (val & 0xff); + return 2; + } else { + int a = (val >> 24); + assert (a <= 63); + *x++ = a | 0xc0; + *x++ = ((val >> 16) & 0xff); + *x++ = ((val >> 8) & 0xff); + *x = (val & 0xff); + return 4; + } +} +/*}}}*/ +void check_and_enlarge_encoding(struct matches *m)/*{{{*/ +{ + if (m->n + 4 >= m->max) { + if (m->max == 0) { + m->max = 16; + } else { + m->max += (m->max >> 1); + } + m->msginfo = grow_array(unsigned char, m->max, m->msginfo); + } +} +/*}}}*/ +void insert_index_on_encoding(struct matches *m, int idx)/*{{{*/ +{ + if (m->n == 0) { + /* Always encode value */ + m->n += insert_value(m->msginfo + m->n, idx); + } else { + assert(idx >= m->highest); + if (idx > m->highest) { + int increment = idx - m->highest; + m->n += insert_value(m->msginfo + m->n, increment); + } else { + /* token has already been seen in this file */ + } + } + m->highest = idx; +} +/*}}}*/ +void add_token_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable *table)/*{{{*/ +{ + unsigned long hash; + int index; + struct token *tok; + char *lc_tok_text; + char *p; + + lc_tok_text = new_string((char*)tok_text); + for (p = lc_tok_text; *p; p++) { + *p = tolower(*(unsigned char *) p); + } + /* 2nd arg is string length */ + hash = hashfn((unsigned char *) lc_tok_text, p - lc_tok_text, hash_key); + + if (table->n >= table->hwm) { + enlarge_toktable(table); + } + + index = hash & table->mask; + while (table->tokens[index]) { + /* strcmp ok as text has been tolower'd earlier */ + if (!strcmp(lc_tok_text, table->tokens[index]->text)) + break; + index++; + index &= table->mask; + } + + if (!table->tokens[index]) { + /* Allocate new */ + struct token *new_tok = new_token(); + /* New token takes ownership of lc_tok_text, no need to free that later. */ + new_tok->text = (char *) lc_tok_text; + new_tok->hashval = hash; /* save full width for later */ + table->tokens[index] = new_tok; + ++table->n; + } else { + free(lc_tok_text); + } + + tok = table->tokens[index]; + + check_and_enlarge_encoding(&tok->match0); + insert_index_on_encoding(&tok->match0, file_index); +} +/*}}}*/ +void add_token2_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable2 *table, int add_to_chain1)/*{{{*/ +{ + unsigned long hash; + int index; + struct token2 *tok; + char *lc_tok_text; + char *p; + + lc_tok_text = new_string(tok_text); + for (p = lc_tok_text; *p; p++) { + *p = tolower(*(unsigned char *) p); + } + /* 2nd arg is string length */ + hash = hashfn((unsigned char *) lc_tok_text, p - lc_tok_text, hash_key); + + if (table->n >= table->hwm) { + enlarge_toktable2(table); + } + + index = hash & table->mask; + while (table->tokens[index]) { + /* strcmp ok as text has been tolower'd earlier */ + if (!strcmp(lc_tok_text, table->tokens[index]->text)) + break; + index++; + index &= table->mask; + } + + if (!table->tokens[index]) { + /* Allocate new */ + struct token2 *new_tok = new_token2(); + /* New token takes ownership of lc_tok_text, no need to free that later. */ + new_tok->text = lc_tok_text; + new_tok->hashval = hash; /* save full width for later */ + table->tokens[index] = new_tok; + ++table->n; + } else { + free(lc_tok_text); + } + + tok = table->tokens[index]; + + check_and_enlarge_encoding(&tok->match0); + insert_index_on_encoding(&tok->match0, file_index); + if (add_to_chain1) { + check_and_enlarge_encoding(&tok->match1); + insert_index_on_encoding(&tok->match1, file_index); + } +} +/*}}}*/ + + + + diff --git a/src/mairix/version.h b/src/mairix/version.h @@ -0,0 +1,4 @@ +#ifndef VERSION_H +#define VERSION_H 1 +#define PROGRAM_VERSION "0.23" +#endif /* VERSION_H */ diff --git a/src/mairix/version.txt b/src/mairix/version.txt @@ -0,0 +1 @@ +0.23 diff --git a/src/mairix/writer.c b/src/mairix/writer.c @@ -0,0 +1,614 @@ +/* + mairix - message index builder and finder for maildir folders. + + ********************************************************************** + * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + ********************************************************************** + */ + +/* Write the database to disc. */ + +#include "mairix.h" +#include "reader.h" + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <assert.h> +#include <sys/mman.h> + +struct write_map_toktable {/*{{{*/ + + /* Table of character offsets to null-terminated token texts */ + int tok_offset; + + /* Table of character offsets to byte strings containing compressed + * delta-encoding of file indices matching the token */ + int enc_offset; +};/*}}}*/ +struct write_map_toktable2 {/*{{{*/ + + /* Table of character offsets to null-terminated token texts */ + int tok_offset; + + /* Table of character offsets to byte strings containing compressed + * delta-encoding of file indices matching the token */ + int enc0_offset; + int enc1_offset; +};/*}}}*/ + +struct write_map {/*{{{*/ +/* Contain offset information for the various tables. + UI stuff in 4 byte units rel to base addr. + Char stuff in byte units rel to base addr. */ + + /* Path information */ + int path_offset; + int mtime_offset; /* Message file mtimes (maildir/mh), mbox number (mbox) */ + int size_offset; /* Message sizes (maildir/mh), entry in respective mbox (mbox) */ + int date_offset; /* Message dates (all folder types) */ + int tid_offset; /* Thread group index table (all folder types) */ + + int mbox_paths_offset; + int mbox_entries_offset; + int mbox_mtime_offset; + int mbox_size_offset; + /* Character offset to checksum of first msg in the mbox. Positions of + * subsequent messages computed by indexing - no explicit table entries + * anywhere. */ + int mbox_checksum_offset; + + struct write_map_toktable to; + struct write_map_toktable cc; + struct write_map_toktable from; + struct write_map_toktable subject; + struct write_map_toktable body; + struct write_map_toktable attachment_name; + struct write_map_toktable2 msg_ids; + + /* To get base address for character data */ + int beyond_last_ui_offset; +}; +/*}}}*/ + +static void create_rw_mapping(char *filename, size_t len, int *out_fd, char **out_data)/*{{{*/ +{ + int fd; + char *data; + struct stat sb; + + fd = open(filename, O_RDWR | O_CREAT, 0600); + if (fd < 0) { + report_error("open", filename); + unlock_and_exit(2); + } + + if (fstat(fd, &sb) < 0) { + report_error("stat", filename); + unlock_and_exit(2); + } + + if (sb.st_size < len) { + /* Extend */ + if (lseek(fd, len - 1, SEEK_SET) < 0) { + report_error("lseek", filename); + unlock_and_exit(2); + } + if (write(fd, "\000", 1) < 0) { + report_error("write", filename); + unlock_and_exit(2); + } + } else if (sb.st_size > len) { + /* Truncate */ + if (ftruncate(fd, len) < 0) { + report_error("ftruncate", filename); + unlock_and_exit(2); + } + } else { + /* Exactly the right length already - nothing to do! */ + } + + data = mmap(0, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (data == MAP_FAILED) { + report_error("writer:mmap", filename); + unlock_and_exit(2); + } + + *out_data = data; + *out_fd = fd; +} +/*}}}*/ + +static int toktable_char_length(struct toktable *tab)/*{{{*/ +{ + int result = 0; + int i; + for (i=0; i<tab->size; i++) { + if (tab->tokens[i]) { + result += (1 + strlen(tab->tokens[i]->text)); + result += (1 + tab->tokens[i]->match0.n); + } + } + return result; +} +/*}}}*/ +static int toktable2_char_length(struct toktable2 *tab)/*{{{*/ +{ + int result = 0; + int i; + for (i=0; i<tab->size; i++) { + if (tab->tokens[i]) { + result += (1 + strlen(tab->tokens[i]->text)); + result += (1 + tab->tokens[i]->match0.n); + result += (1 + tab->tokens[i]->match1.n); + } + } + return result; +} +/*}}}*/ +static int char_length(struct database *db)/*{{{*/ +{ + /* Return total length of character data to be written. */ + int result; + int i; + + result = 0; + + /* For type table. */ + result += db->n_msgs; + + for (i=0; i<db->n_msgs; i++) { + switch (db->type[i]) { + case MTY_DEAD: + break; + case MTY_MBOX: + break; + case MTY_FILE: + assert(db->msgs[i].src.mpf.path); + result += (1 + strlen(db->msgs[i].src.mpf.path)); + break; + } + } + + for (i=0; i<db->n_mboxen; i++) { + struct mbox *mb = &db->mboxen[i]; + result += mb->n_msgs * sizeof(checksum_t); + if (mb->path) { + result += (1 + strlen(mb->path)); + } + } + + result += toktable_char_length(db->to); + result += toktable_char_length(db->cc); + result += toktable_char_length(db->from); + result += toktable_char_length(db->subject); + result += toktable_char_length(db->body); + result += toktable_char_length(db->attachment_name); + result += toktable2_char_length(db->msg_ids); + + return result; +} +/*}}}*/ + +static void compute_mapping(struct database *db, struct write_map *map)/*{{{*/ +{ + int total = UI_HEADER_LEN; + + map->path_offset = total, total += db->n_msgs; + map->mtime_offset = total, total += db->n_msgs; + map->date_offset = total, total += db->n_msgs; + map->size_offset = total, total += db->n_msgs; + map->tid_offset = total, total += db->n_msgs; + + map->mbox_paths_offset = total, total += db->n_mboxen; + map->mbox_entries_offset = total, total += db->n_mboxen; + map->mbox_mtime_offset = total, total += db->n_mboxen; + map->mbox_size_offset = total, total += db->n_mboxen; + map->mbox_checksum_offset = total, total += db->n_mboxen; + + map->to.tok_offset = total, total += db->to->n; + map->to.enc_offset = total, total += db->to->n; + + map->cc.tok_offset = total, total += db->cc->n; + map->cc.enc_offset = total, total += db->cc->n; + + map->from.tok_offset = total, total += db->from->n; + map->from.enc_offset = total, total += db->from->n; + + map->subject.tok_offset = total, total += db->subject->n; + map->subject.enc_offset = total, total += db->subject->n; + + map->body.tok_offset = total, total += db->body->n; + map->body.enc_offset = total, total += db->body->n; + + map->attachment_name.tok_offset = total, total += db->attachment_name->n; + map->attachment_name.enc_offset = total, total += db->attachment_name->n; + + map->msg_ids.tok_offset = total, total += db->msg_ids->n; + map->msg_ids.enc0_offset = total, total += db->msg_ids->n; + map->msg_ids.enc1_offset = total, total += db->msg_ids->n; + + map->beyond_last_ui_offset = total; +} +/*}}}*/ +static void write_header(char *data, unsigned int *uidata, struct database *db, struct write_map *map)/*{{{*/ +{ + /* Endianness-independent writes - at least the magic number will be + * recognized if the database is read by this program on a machine of + * opposite endianness. */ + unsigned char *ucdata = (unsigned char *) data; + + ucdata[0] = HEADER_MAGIC0; + ucdata[1] = HEADER_MAGIC1; + ucdata[2] = HEADER_MAGIC2; + ucdata[3] = HEADER_MAGIC3; + + uidata[UI_ENDIAN] = 0x44332211; /* For checking reversed endianness on read */ + uidata[UI_N_MSGS] = db->n_msgs; + uidata[UI_MSG_CDATA] = map->path_offset; /* offset table of ptrs to filenames */ + uidata[UI_MSG_MTIME] = map->mtime_offset; /* offset of mtime table */ + uidata[UI_MSG_DATE] = map->date_offset; /* offset of table of message Date: header lines as time_t */ + uidata[UI_MSG_SIZE] = map->size_offset; /* offset of table of message sizes in bytes */ + uidata[UI_MSG_TID] = map->tid_offset; /* offset of table of thread group numbers */ + + uidata[UI_MBOX_N] = db->n_mboxen; + uidata[UI_MBOX_PATHS] = map->mbox_paths_offset; + uidata[UI_MBOX_ENTRIES] = map->mbox_entries_offset; + uidata[UI_MBOX_MTIME] = map->mbox_mtime_offset; + uidata[UI_MBOX_SIZE] = map->mbox_size_offset; + uidata[UI_MBOX_CKSUM] = map->mbox_checksum_offset; + + uidata[UI_HASH_KEY] = db->hash_key; + + uidata[UI_TO_N] = db->to->n; + uidata[UI_TO_TOK] = map->to.tok_offset; + uidata[UI_TO_ENC] = map->to.enc_offset; + + uidata[UI_CC_N] = db->cc->n; + uidata[UI_CC_TOK] = map->cc.tok_offset; + uidata[UI_CC_ENC] = map->cc.enc_offset; + + uidata[UI_FROM_N] = db->from->n; + uidata[UI_FROM_TOK] = map->from.tok_offset; + uidata[UI_FROM_ENC] = map->from.enc_offset; + + uidata[UI_SUBJECT_N] = db->subject->n; + uidata[UI_SUBJECT_TOK] = map->subject.tok_offset; + uidata[UI_SUBJECT_ENC] = map->subject.enc_offset; + + uidata[UI_BODY_N] = db->body->n; + uidata[UI_BODY_TOK] = map->body.tok_offset; + uidata[UI_BODY_ENC] = map->body.enc_offset; + + uidata[UI_ATTACHMENT_NAME_N] = db->attachment_name->n; + uidata[UI_ATTACHMENT_NAME_TOK] = map->attachment_name.tok_offset; + uidata[UI_ATTACHMENT_NAME_ENC] = map->attachment_name.enc_offset; + + uidata[UI_MSGID_N] = db->msg_ids->n; + uidata[UI_MSGID_TOK] = map->msg_ids.tok_offset; + uidata[UI_MSGID_ENC0] = map->msg_ids.enc0_offset; + uidata[UI_MSGID_ENC1] = map->msg_ids.enc1_offset; + + return; +} +/*}}}*/ +static char *write_type_and_flag_table(struct database *db, unsigned int *uidata, char *data, char *cdata)/*{{{*/ +{ + int i; + for (i=0; i<db->n_msgs; i++) { + struct msgpath *msgdata = db->msgs + i; + switch (db->type[i]) { + case MTY_FILE: + cdata[i] = DB_MSG_FILE; + break; + case MTY_MBOX: + cdata[i] = DB_MSG_MBOX; + break; + case MTY_DEAD: + cdata[i] = DB_MSG_DEAD; + break; + } + + if (msgdata->seen) cdata[i] |= FLAG_SEEN; + if (msgdata->replied) cdata[i] |= FLAG_REPLIED; + if (msgdata->flagged) cdata[i] |= FLAG_FLAGGED; + } + uidata[UI_MSG_TYPE_AND_FLAGS] = cdata - data; + return cdata + db->n_msgs; +} +/*}}}*/ +static char *write_messages(struct database *db, struct write_map *map, unsigned int *uidata, char *data, char *cdata)/*{{{*/ +{ + int i; + char *start_cdata = cdata; + + for (i=0; i<db->n_msgs; i++) { + int slen; + switch (db->type[i]) { + case MTY_FILE: + slen = strlen(db->msgs[i].src.mpf.path); + uidata[map->path_offset + i] = cdata - data; + uidata[map->mtime_offset + i] = db->msgs[i].src.mpf.mtime; + uidata[map->size_offset + i] = db->msgs[i].src.mpf.size; + uidata[map->date_offset + i] = db->msgs[i].date; + uidata[map->tid_offset + i] = db->msgs[i].tid; + memcpy(cdata, db->msgs[i].src.mpf.path, 1 + slen); /* include trailing null */ + cdata += (1 + slen); + break; + case MTY_MBOX: + { + int mbno = db->msgs[i].src.mbox.file_index; + int msgno = db->msgs[i].src.mbox.msg_index; + struct mbox *mb = &db->mboxen[mbno]; + uidata[map->path_offset + i] = encode_mbox_indices(mbno, msgno); + uidata[map->mtime_offset + i] = mb->start[msgno]; + uidata[map->size_offset + i] = mb->len[msgno]; + uidata[map->date_offset + i] = db->msgs[i].date; + uidata[map->tid_offset + i] = db->msgs[i].tid; + } + break; + case MTY_DEAD: + uidata[map->path_offset + i] = 0; /* Can't ever happen for real */ + uidata[map->mtime_offset + i] = 0; /* For cleanliness */ + uidata[map->size_offset + i] = 0; /* For cleanliness */ + /* The following line is necessary, otherwise 'random' tid + * information is written to the database, which can crash the search + * functions. */ + uidata[map->tid_offset + i] = db->msgs[i].tid; + break; + } + } + if (verbose) { + printf("Wrote %d messages (%d bytes of tables, %d bytes of text)\n", + db->n_msgs, 4*5*db->n_msgs, (int)(cdata - start_cdata)); + } + return cdata; /* new value */ +} +/*}}}*/ +#if 0 +static int compare_tokens(const void *a, const void *b)/*{{{*/ +{ + const struct token **aa = (const struct token **) a; + const struct token **bb = (const struct token **) b; + return strcmp((*aa)->text, (*bb)->text); +} +/*}}}*/ +#endif + +static char *write_mbox_headers(struct database *db, struct write_map *map, unsigned int *uidata, char *data, char *cdata)/*{{{*/ +{ + int i, len; + char *start_cdata = cdata; + + for (i=0; i<db->n_mboxen; i++) { + struct mbox *mb = &db->mboxen[i]; + uidata[map->mbox_entries_offset + i] = mb->n_msgs; + uidata[map->mbox_mtime_offset + i] = mb->current_mtime; + uidata[map->mbox_size_offset + i] = mb->current_size; + if (mb->path) { + uidata[map->mbox_paths_offset + i] = cdata - data; + len = strlen(mb->path); + memcpy(cdata, mb->path, 1+len); + cdata += 1+len; + } else { + uidata[map->mbox_paths_offset + i] = 0; + } + } + if (verbose) { + printf("Wrote %d mbox headers (%d bytes of tables, %d bytes of paths)\n", + db->n_mboxen, 4*4*db->n_mboxen, (int)(cdata - start_cdata)); + } + return cdata; +} +/*}}}*/ +static char * write_mbox_checksums(struct database *db, struct write_map *map, unsigned int *uidata, char *data, char *cdata)/*{{{*/ +{ + int i, j; + char *start_cdata = cdata; + + for (i=0; i<db->n_mboxen; i++) { + struct mbox *mb = &db->mboxen[i]; + uidata[map->mbox_checksum_offset + i] = cdata - data; + for (j=0; j<mb->n_msgs; j++) { + memcpy(cdata, mb->check_all[j], sizeof(checksum_t)); + cdata += sizeof(checksum_t); + } + } + if (verbose) { + printf("Wrote %d bytes of mbox message checksums\n", + (int)(cdata - start_cdata)); + } + return cdata; +} +/*}}}*/ + +static char *write_toktable(struct toktable *tab, struct write_map_toktable *map, unsigned int *uidata, char *data, char *cdata, char *header_name)/*{{{*/ +{ + int i, j, n, max; + char *start_cdata, *mid_cdata; + struct token **stok; + stok = new_array(struct token *, tab->n); + max = tab->size; + n = tab->n; + + for (i=0, j=0; i<max; i++) { + struct token *tok = tab->tokens[i]; + if (tok) { + stok[j++] = tok; + } + } + + assert(j == n); + +#if 0 + /* The search functions don't rely on the tokens being sorted. So not + * sorting here will save time. */ + qsort(stok, n, sizeof(struct token *), compare_tokens); +#endif + + start_cdata = cdata; + + /* FIXME : Eventually, the tokens have to be sorted - need to feed them from + * a different data structure (array with no holes) */ + for (i=0; i<n; i++) { + int slen; + uidata[map->tok_offset + i] = cdata - data; + slen = strlen(stok[i]->text); + memcpy(cdata, stok[i]->text, 1 + slen); + cdata += (1 + slen); + } + + mid_cdata = cdata; + + for (i=0; i<n; i++) { + int dlen; + dlen = stok[i]->match0.n; + uidata[map->enc_offset + i] = cdata - data; + memcpy(cdata, stok[i]->match0.msginfo, dlen); + cdata += dlen; + *cdata++ = 0xff; /* termination character */ + } + + if (verbose) { + printf("%s: Wrote %d tokens (%d bytes of tables, %d bytes of text, %d bytes of hit encoding)\n", + header_name, n, 2*4*n, (int)(mid_cdata - start_cdata), (int)(cdata - mid_cdata)); + } + + free(stok); + return cdata; +} +/*}}}*/ +static char *write_toktable2(struct toktable2 *tab, struct write_map_toktable2 *map, unsigned int *uidata, char *data, char *cdata, char *header_name)/*{{{*/ +{ + int i, j, n, max; + char *start_cdata, *mid_cdata; + struct token2 **stok; + stok = new_array(struct token2 *, tab->n); + max = tab->size; + n = tab->n; + + for (i=0, j=0; i<max; i++) { + struct token2 *tok = tab->tokens[i]; + if (tok) { + stok[j++] = tok; + } + } + + assert(j == n); + +#if 0 + /* The search functions don't rely on the tokens being sorted. So not + * sorting here will save time. */ + qsort(stok, n, sizeof(struct token *), compare_tokens); +#endif + + start_cdata = cdata; + + /* FIXME : Eventually, the tokens have to be sorted - need to feed them from + * a different data structure (array with no holes) */ + for (i=0; i<n; i++) { + int slen; + uidata[map->tok_offset + i] = cdata - data; + slen = strlen(stok[i]->text); + memcpy(cdata, stok[i]->text, 1 + slen); + cdata += (1 + slen); + } + + mid_cdata = cdata; + + for (i=0; i<n; i++) { + int dlen; + dlen = stok[i]->match0.n; + uidata[map->enc0_offset + i] = cdata - data; + memcpy(cdata, stok[i]->match0.msginfo, dlen); + cdata += dlen; + *cdata++ = 0xff; /* termination character */ + } + + for (i=0; i<n; i++) { + int dlen; + dlen = stok[i]->match1.n; + uidata[map->enc1_offset + i] = cdata - data; + memcpy(cdata, stok[i]->match1.msginfo, dlen); + cdata += dlen; + *cdata++ = 0xff; /* termination character */ + } + + if (verbose) { + printf("%s: Wrote %d tokens (%d bytes of tables, %d bytes of text, %d bytes of hit encoding)\n", + header_name, n, 2*4*n, (int)(mid_cdata - start_cdata), (int)(cdata - mid_cdata)); + } + + free(stok); + return cdata; +} +/*}}}*/ +void write_database(struct database *db, char *filename, int do_integrity_checks)/*{{{*/ +{ + int file_len; + int fd; + char *data, *cdata; + unsigned int *uidata; + struct write_map map; + + if (do_integrity_checks) { + check_database_integrity(db); + } + + if (!verify_mbox_size_constraints(db)) { + unlock_and_exit(1); + } + + /* Work out mappings */ + compute_mapping(db, &map); + + file_len = char_length(db) + (4 * map.beyond_last_ui_offset); + + create_rw_mapping(filename, file_len, &fd, &data); + uidata = (unsigned int *) data; /* align(int) < align(page)! */ + cdata = data + (4 * map.beyond_last_ui_offset); + + write_header(data, uidata, db, &map); + cdata = write_type_and_flag_table(db, uidata, data, cdata); + cdata = write_messages(db, &map, uidata, data, cdata); + cdata = write_mbox_headers(db, &map, uidata, data, cdata); + cdata = write_mbox_checksums(db, &map, uidata, data, cdata); + cdata = write_toktable(db->to, &map.to, uidata, data, cdata, "To"); + cdata = write_toktable(db->cc, &map.cc, uidata, data, cdata, "Cc"); + cdata = write_toktable(db->from, &map.from, uidata, data, cdata, "From"); + cdata = write_toktable(db->subject, &map.subject, uidata, data, cdata, "Subject"); + cdata = write_toktable(db->body, &map.body, uidata, data, cdata, "Body"); + cdata = write_toktable(db->attachment_name, &map.attachment_name, uidata, data, cdata, "Attachment Name"); + cdata = write_toktable2(db->msg_ids, &map.msg_ids, uidata, data, cdata, "(Threading)"); + + /* Write data */ + /* Unmap / close file */ + if (munmap(data, file_len) < 0) { + report_error("munmap", filename); + unlock_and_exit(2); + } + if (fsync(fd) < 0) { + report_error("fsync", filename); + unlock_and_exit(2); + } + if (close(fd) < 0) { + report_error("close", filename); + unlock_and_exit(2); + } +} + /*}}}*/