menhir-20130116/0002755000175000017500000000000012075533613012272 5ustar stephstephmenhir-20130116/Makefile.arch0000644000175000017500000000045712075533602014650 0ustar stephsteph# If ocaml reports that Sys.os_type is Unix, we assume Unix, otherwise # we assume Windows. ifeq "$(shell rm -f ./o.ml && echo print_endline Sys.os_type > ./o.ml && ocaml ./o.ml && rm -f ./o.ml)" "Unix" MENHIREXE := menhir OBJ := o else MENHIREXE := menhir.exe OBJ := obj endif menhir-20130116/AUTHORS0000644000175000017500000000014212075533602013333 0ustar stephstephFranois Pottier Yann Rgis-Gianas menhir-20130116/menhir.10000644000175000017500000000612012075533603013632 0ustar stephsteph.\" Hey, EMACS: -*- nroff -*- .TH MENHIR 1 "April 19, 2006" .\" Please adjust this date whenever revising the manpage. .\" .\" Some roff macros, for reference: .\" .nh disable hyphenation .\" .hy enable hyphenation .\" .ad l left justify .\" .ad b justify to both left and right margins .\" .nf disable filling .\" .fi enable filling .\" .br insert line break .\" .sp insert n+1 empty lines .\" for manpage-specific macros, see man(7) .SH NAME menhir \- parser generator for OCaml .SH SYNOPSIS .B menhir .RI [ options ] " files" .SH DESCRIPTION .B menhir is a LR(1) parser generator for the Objective Caml programming language. That is, Menhir compiles LR(1) grammar specifications down to Objective Caml code. It is mostly compatible with .BR ocamlyacc (1). .SH OPTIONS .TP .B \-h, \-\-help Show summary of options. .TP .BI \-b,\ \-\-base\ basename Specifies a base name for the output file(s). .TP .B \-\-comment Include comments in the generated code. .TP .B \-\-depend Invoke ocamldep and display dependencies. .TP .B \-\-dump Describe the automaton in .IR basename .automaton. .TP .B \-\-error\-recovery Attempt recovery by discarding tokens after errors. .TP .B \-\-explain Explain conflicts in .IR basename .conflicts. .TP .BI \-\-external\-tokens\ module Import token type definition from .IR module . .TP .B \-\-graph Write grammar's dependency graph to .IR basename .dot. .TP .B \-\-infer Invoke ocamlc for ahead of time type inference. .TP .B \-\-interpret Interpret the sentences provided on stdin. .TP .B \-\-interpret\-show\-cst Show a concrete syntax tree upon acceptance. .TP .BI \-la,\ \-\-log\-automaton\ level Log information about the automaton. .TP .BI \-lc,\ \-\-log\-code\ level Log information about the generated code. .TP .BI \-lg,\ \-\-log\-grammar\ level Log information about the grammar. .TP .B \-\-no\-inline Ignore the %inline keyword. .TP .B \-\-no\-stdlib Do not load the standard library. .TP .BI \-\-ocamlc\ command Specifies how ocamlc should be invoked. .TP .BI \-\-ocamldep\ command Specifies how ocamldep should be invoked. .TP .B \-\-only\-preprocess Print a simplified grammar and exit. .TP .B \-\-only\-tokens Generate token type definition only, no code. .TP .B \-\-raw\-depend Invoke ocamldep and echo its raw output. .TP .BI \-\-stdlib\ directory Specify where the standard library lies. .TP .B \-\-suggest\-comp\-flags Suggest compilation flags for ocaml{c,opt}. .TP .B \-\-suggest\-link\-flags-byte Suggest link flags for ocamlc. .TP .B \-\-suggest\-link\-flags-opt Suggest link flags for ocamlopt. .TP .B \-t, \-\-table Use the table-based back-end. .TP .B \-\-timings Display internal timings. .TP .B \-\-trace Include tracing instructions in the generated code. .TP .B \-\-version Show version number and exit. .TP .B \-v Synonymous with .BR \-\-dump\ \-\-explain . .SH SEE ALSO .BR ocaml (1). .SH AUTHOR .B menhir was written by Franois Pottier and Yann Rgis-Gianas. .PP This manual page was written by Samuel Mimram , for the Debian project (but may be used by others). menhir-20130116/Makefile0000644000175000017500000000562212075533602013733 0ustar stephsteph# This is the main Makefile that is shipped as part of the source package. # Keep in mind that the hierarchy that is shipped is not identical to the # hierarchy within the svn repository: some sub-directories are not shipped; # the documentation is pre-built. # The hierarchy that is shipped includes: # demos # menhir.1 # manual.pdf # src # Makefile (this one) # ---------------------------------------------------------------------------- # By default, we attempt to use ocamlfind (if present in the PATH), but it it # is possible to prevent that externally by setting USE_OCAMLFIND to false. ifndef USE_OCAMLFIND USE_OCAMLFIND = ocamlfind ocamlc -v >/dev/null 2>&1 endif # ---------------------------------------------------------------------------- # A few settings differ on Windows versus Unix. include Makefile.arch # ---------------------------------------------------------------------------- # Installation paths. # TEMPORARY GODIVA and Linux do not agree on the standard paths... ifndef PREFIX $(error Please define PREFIX) endif bindir := ${PREFIX}/bin docdir := ${PREFIX}/share/doc/menhir libdir := ${PREFIX}/share/menhir mandir := ${PREFIX}/share/man/man1 MANS := menhir.1 DOCS := manual.pdf demos MLYLIB := src/standard.mly # ------------------------------------------------------------------------- # Building menhirLib. ifeq ($(TARGET),byte) MENHIRLIB := menhirLib.cmi menhirLib.cmo else MENHIRLIB := menhirLib.cmi menhirLib.cmo menhirLib.cmx menhirLib.$(OBJ) endif # ---------------------------------------------------------------------------- # Compilation. # Installation time settings are recorded within src/installation.ml. # This file is recreated every time so as to avoid becoming stale. .PHONY: all install uninstall all: rm -f src/installation.ml echo "let libdir = \"${libdir}\"" > src/installation.ml if $(USE_OCAMLFIND) ; then \ echo "let ocamlfind = true" >> src/installation.ml ; \ else \ echo "let ocamlfind = false" >> src/installation.ml ; \ fi $(MAKE) $(MFLAGS) -C src -f Makefile $(MAKE) $(MFLAGS) -C src -f Makefile $(MENHIRLIB) # ---------------------------------------------------------------------------- # Installation. install: all mkdir -p $(bindir) mkdir -p $(libdir) mkdir -p $(docdir) mkdir -p $(mandir) install src/$(MENHIREXE) $(bindir) install -m 644 $(MLYLIB) $(libdir) cp -r $(DOCS) $(docdir) cp -r $(MANS) $(mandir) @cd src && if $(USE_OCAMLFIND) ; then \ echo Installing MenhirLib via ocamlfind. ; \ ocamlfind install menhirLib META $(MENHIRLIB) ; \ else \ echo Installing MenhirLib manually. ; \ install -m 644 $(MENHIRLIB) $(libdir) ; \ fi uninstall: rm -rf $(bindir)/$(MENHIREXE) rm -rf $(libdir) rm -rf $(docdir) rm -rf $(mandir)/$(MANS) @if $(USE_OCAMLFIND) ; then \ echo Un-installing MenhirLib via ocamlfind. ; \ ocamlfind remove menhirLib ; \ fi menhir-20130116/LICENSE0000644000175000017500000007627512075533602013314 0ustar stephstephIn the following, "the Library" refers to the following files: src/standard.mly src/infiniteArray.{ml,mli} src/packedIntArray.{ml,mli} src/rowDisplacement.{ml,mli} src/engineTypes.ml src/engine.{ml,mli} src/tableFormat.ml src/tableInterpreter.{ml,mli} src/convert.{ml,mli} while "the Generator" refers to all other files in this archive. The Generator is distributed under the terms of the Q Public License version 1.0 with a change to choice of law (included below). The Library is distributed under the terms of the GNU Library General Public License version 2 (included below). As a special exception to the Q Public Licence, you may develop application programs, reusable components and other software items that link with the original or modified versions of the Generator and are not made available to the general public, without any of the additional requirements listed in clause 6c of the Q Public licence. As a special exception to the GNU Library General Public License, you may link, statically or dynamically, a "work that uses the Library" with a publicly distributed version of the Library to produce an executable file containing portions of the Library, and distribute that executable file under terms of your choice, without any of the additional requirements listed in clause 6 of the GNU Library General Public License. By "a publicly distributed version of the Library", we mean either the unmodified Library as distributed by INRIA, or a modified version of the Library that is distributed under the conditions defined in clause 3 of the GNU Library General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU Library General Public License. ---------------------------------------------------------------------- THE Q PUBLIC LICENSE version 1.0 Copyright (C) 1999 Troll Tech AS, Norway. Everyone is permitted to copy and distribute this license document. The intent of this license is to establish freedom to share and change the software regulated by this license under the open source model. This license applies to any software containing a notice placed by the copyright holder saying that it may be distributed under the terms of the Q Public License version 1.0. Such software is herein referred to as the Software. This license covers modification and distribution of the Software, use of third-party application programs based on the Software, and development of free software which uses the Software. Granted Rights 1. You are granted the non-exclusive rights set forth in this license provided you agree to and comply with any and all conditions in this license. Whole or partial distribution of the Software, or software items that link with the Software, in any form signifies acceptance of this license. 2. You may copy and distribute the Software in unmodified form provided that the entire package, including - but not restricted to - copyright, trademark notices and disclaimers, as released by the initial developer of the Software, is distributed. 3. You may make modifications to the Software and distribute your modifications, in a form that is separate from the Software, such as patches. The following restrictions apply to modifications: a. Modifications must not alter or remove any copyright notices in the Software. b. When modifications to the Software are released under this license, a non-exclusive royalty-free right is granted to the initial developer of the Software to distribute your modification in future versions of the Software provided such versions remain available under these terms in addition to any other license(s) of the initial developer. 4. You may distribute machine-executable forms of the Software or machine-executable forms of modified versions of the Software, provided that you meet these restrictions: a. You must include this license document in the distribution. b. You must ensure that all recipients of the machine-executable forms are also able to receive the complete machine-readable source code to the distributed Software, including all modifications, without any charge beyond the costs of data transfer, and place prominent notices in the distribution explaining this. c. You must ensure that all modifications included in the machine-executable forms are available under the terms of this license. 5. You may use the original or modified versions of the Software to compile, link and run application programs legally developed by you or by others. 6. You may develop application programs, reusable components and other software items that link with the original or modified versions of the Software. These items, when distributed, are subject to the following requirements: a. You must ensure that all recipients of machine-executable forms of these items are also able to receive and use the complete machine-readable source code to the items without any charge beyond the costs of data transfer. b. You must explicitly license all recipients of your items to use and re-distribute original and modified versions of the items in both machine-executable and source code forms. The recipients must be able to do so without any charges whatsoever, and they must be able to re-distribute to anyone they choose. c. If the items are not available to the general public, and the initial developer of the Software requests a copy of the items, then you must supply one. Limitations of Liability In no event shall the initial developers or copyright holders be liable for any damages whatsoever, including - but not restricted to - lost revenue or profits or other direct, indirect, special, incidental or consequential damages, even if they have been advised of the possibility of such damages, except to the extent invariable law, if any, provides otherwise. No Warranty The Software and this license document are provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Choice of Law This license is governed by the Laws of France. ---------------------------------------------------------------------- GNU LIBRARY GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1991 Free Software Foundation, Inc. 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the library GPL. It is numbered 2 because it goes with version 2 of the ordinary GPL.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Library General Public License, applies to some specially designated Free Software Foundation software, and to any other libraries whose authors decide to use it. You can use it for your libraries, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library, or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link a program with the library, you must provide complete object files to the recipients so that they can relink them with the library, after making changes to the library and recompiling it. And you must show them these terms so they know their rights. Our method of protecting your rights has two steps: (1) copyright the library, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the library. Also, for each distributor's protection, we want to make certain that everyone understands that there is no warranty for this free library. If the library is modified by someone else and passed on, we want its recipients to know that what they have is not the original version, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that companies distributing free software will individually obtain patent licenses, thus in effect transforming the program into proprietary software. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License, which was designed for utility programs. This license, the GNU Library General Public License, applies to certain designated libraries. This license is quite different from the ordinary one; be sure to read it in full, and don't assume that anything in it is the same as in the ordinary license. The reason we have a separate public license for some libraries is that they blur the distinction we usually make between modifying or adding to a program and simply using it. Linking a program with a library, without changing the library, is in some sense simply using the library, and is analogous to running a utility program or application program. However, in a textual and legal sense, the linked executable is a combined work, a derivative of the original library, and the ordinary General Public License treats it as such. Because of this blurred distinction, using the ordinary General Public License for libraries did not effectively promote software sharing, because most developers did not use the libraries. We concluded that weaker conditions might promote sharing better. However, unrestricted linking of non-free programs would deprive the users of those programs of all benefit from the free status of the libraries themselves. This Library General Public License is intended to permit developers of non-free programs to use free libraries, while preserving your freedom as a user of such programs to change the free libraries that are incorporated in them. (We have not seen how to achieve this as regards changes in header files, but we have achieved it as regards changes in the actual functions of the Library.) The hope is that this will lead to faster development of free libraries. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, while the latter only works together with the library. Note that it is possible for a library to be covered by the ordinary General Public License rather than by this special one. GNU LIBRARY GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Library General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also compile or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. c) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. d) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Library General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS Appendix: How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! menhir-20130116/src/0002755000175000017500000000000012075533603013060 5ustar stephstephmenhir-20130116/src/item.mli0000644000175000017500000000551212075533603014522 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar (* An LR(0) item encodes a pair of integers, namely the index of the production and the index of the bullet in the production's right-hand side. *) type t val import: Production.index * int -> t val export: t -> Production.index * int (* Comparison. *) val equal: t -> t -> bool (* [def item] looks up the production associated with this item in the grammar and returns [prod, nt, rhs, pos, length], where [prod] is the production's index, [nt] and [rhs] represent the production, [pos] is the position of the bullet in the item, and [length] is the length of the production's right-hand side. *) val def: t -> Production.index * Nonterminal.t * Symbol.t array * int * int (* If [item] is a start item, [startnt item] returns the start nonterminal that corresponds to [item]. *) val startnt: t -> Nonterminal.t (* Printing. *) val print: t -> string (* Classifying items as shift or reduce items. A shift item is one where the bullet can still advance. A reduce item is one where the bullet has reached the end of the right-hand side. *) type kind = | Shift of Symbol.t * t | Reduce of Production.index val classify: t -> kind (* Sets of items and maps over items. Hashing these data structures is specifically allowed. *) module Set : GSet.S with type element = t module Map : GMap.S with type key = t and type Domain.t = Set.t (* This functor performs precomputation that helps efficiently compute the closure of an LR(0) or LR(1) state. The precomputation requires time linear in the size of the grammar. The nature of the lookahead sets remains abstract. *) module Closure (L : Lookahead.S) : sig (* A state maps items to lookahead information. *) type state = L.t Map.t (* This takes the closure of a state through all epsilon transitions. *) val closure: state -> state end menhir-20130116/src/rowDisplacement.ml0000644000175000017500000002114212075533603016550 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This module compresses a two-dimensional table, where some values are considered insignificant, via row displacement. *) (* This idea reportedly appears in Aho and Ullman's ``Principles of Compiler Design'' (1977). It is evaluated in Tarjan and Yao's ``Storing a Sparse Table'' (1979) and in Dencker, Drre, and Heuft's ``Optimization of Parser Tables for Portable Compilers'' (1984). *) (* A compressed table is represented as a pair of arrays. The displacement array is an array of offsets into the data array. *) type 'a table = int array * (* displacement *) 'a array (* data *) (* In a natural version of this algorithm, displacements would be greater than (or equal to) [-n]. However, in the particular setting of Menhir, both arrays are intended to be compressed with [PackedIntArray], which does not efficiently support negative numbers. For this reason, we are careful not to produce negative displacements. *) (* In order to avoid producing negative displacements, we simply use the least significant bit as the sign bit. This is implemented by [encode] and [decode] below. *) (* One could also think, say, of adding [n] to every displacement, so as to ensure that all displacements are nonnegative. This would work, but would require [n] to be published, for use by the decoder. *) let encode (displacement : int) : int = if displacement >= 0 then displacement lsl 1 else (-displacement) lsl 1 + 1 let decode (displacement : int) : int = if displacement land 1 = 0 then displacement lsr 1 else -(displacement lsr 1) (* It is reasonable to assume that, as matrices grow large, their density becomes low, i.e., they have many insignificant entries. As a result, it is important to work with a sparse data structure for rows. We internally represent a row as a list of its significant entries, where each entry is a pair of a [j] index and an element. *) type 'a row = (int * 'a) list (* [compress equal insignificant dummy m n t] turns the two-dimensional table [t] into a compressed table. The parameter [equal] is equality of data values. The parameter [wildcard] tells which data values are insignificant, and can thus be overwritten with other values. The parameter [dummy] is used to fill holes in the data array. [m] and [n] are the integer dimensions of the table [t]. *) let compress (equal : 'a -> 'a -> bool) (insignificant : 'a -> bool) (dummy : 'a) (m : int) (n : int) (t : 'a array array) : 'a table = (* Be defensive. *) assert (Array.length t = m); assert begin for i = 0 to m - 1 do assert (Array.length t.(i) = n) done; true end; (* This turns a row-as-array into a row-as-sparse-list. The row is accompanied by its index [i] and by its rank (the number of its significant entries, that is, the length of the row-as-a-list. *) let sparse (i : int) (line : 'a array) : int * int * 'a row (* index, rank, row *) = let rec loop (j : int) (rank : int) (row : 'a row) = if j < 0 then i, rank, row else let x = line.(j) in if insignificant x then loop (j - 1) rank row else loop (j - 1) (1 + rank) ((j, x) :: row) in loop (n - 1) 0 [] in (* Construct an array of all rows, together with their index and rank. *) let rows : (int * int * 'a row) array = (* index, rank, row *) Array.mapi sparse t in (* Sort this array by decreasing rank. This does not have any impact on correctness, but reportedly improves compression. The intuitive idea is that rows with few significant elements are easy to fit, so they should be inserted last, after the problem has become quite constrained by fitting the heavier rows. This heuristic is attributed to Ziegler. *) Array.fast_sort (fun (_, rank1, _) (_, rank2, _) -> compare rank2 rank1 ) rows; (* Allocate a one-dimensional array of displacements. *) let displacement : int array = Array.make m 0 in (* Allocate a one-dimensional, infinite array of values. Indices into this array are written [k]. *) let data : 'a InfiniteArray.t = InfiniteArray.make dummy in (* Determine whether [row] fits at offset [k] within the current [data] array, up to extension of this array. *) (* Note that this check always succeeds when [k] equals the length of the [data] array. Indeed, the loop is then skipped. This property guarantees the termination of the recursive function [fit] below. *) let fits k (row : 'a row) : bool = let d = InfiniteArray.extent data in let rec loop = function | [] -> true | (j, x) :: row -> (* [x] is a significant element. *) (* By hypothesis, [k + j] is nonnegative. If it is greater than or equal to the current length of the data array, stop -- the row fits. *) assert (k + j >= 0); if k + j >= d then true (* We now know that [k + j] is within bounds of the data array. Check whether it is compatible with the element [y] found there. If it is, continue. If it isn't, stop -- the row does not fit. *) else let y = InfiniteArray.get data (k + j) in if insignificant y || equal x y then loop row else false in loop row in (* Find the leftmost position where a row fits. *) (* If the leftmost significant element in this row is at offset [j], then we can hope to fit as far left as [-j] -- so this element lands at offset [0] in the data array. *) (* Note that displacements may be negative. This means that, for insignificant elements, accesses to the data array could fail: they could be out of bounds, either towards the left or towards the right. This is not a problem, as long as [get] is invoked only at significant elements. *) let rec fit k row : int = if fits k row then k else fit (k + 1) row in let fit row = match row with | [] -> 0 (* irrelevant *) | (j, _) :: _ -> fit (-j) row in (* Write [row] at (compatible) offset [k]. *) let rec write k = function | [] -> () | (j, x) :: row -> InfiniteArray.set data (k + j) x; write k row in (* Iterate over the sorted array of rows. Fit and write each row at the leftmost compatible offset. Update the displacement table. *) Array.iter (fun (i, _, row) -> let k = fit row in (* if [row] has leading insignificant elements, then [k] can be negative *) write k row; displacement.(i) <- encode k ) rows; (* Return the compressed tables. *) displacement, InfiniteArray.domain data (* [get ct i j] returns the value found at indices [i] and [j] in the compressed table [ct]. This function call is permitted only if the value found at indices [i] and [j] in the original table is significant -- otherwise, it could fail abruptly. *) (* Together, [compress] and [get] have the property that, if the value found at indices [i] and [j] in an uncompressed table [t] is significant, then [get (compress t) i j] is equal to that value. *) let get (displacement, data) i j = assert (0 <= i && i < Array.length displacement); let k = decode displacement.(i) in assert (0 <= k + j && k + j < Array.length data); (* failure of this assertion indicates an attempt to access an insignificant element that happens to be mapped out of the bounds of the [data] array. *) data.(k + j) (* [getget] is a variant of [get] which only requires read access, via accessors, to the two components of the table. *) let getget get_displacement get_data (displacement, data) i j = let k = decode (get_displacement displacement i) in get_data data (k + j) menhir-20130116/src/mark.ml0000644000175000017500000000244412075533603014346 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (** This module implements a very simple notion of ``mark''. A mark is really a reference cell (without content). Creating a new mark requires allocating a new cell, and comparing marks requires comparing pointers. *) type t = unit ref let fresh = ref let same = (==) let none = fresh() menhir-20130116/src/rawPrinter.mli0000644000175000017500000000234212075533603015717 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* A debugging pretty-printer for [IL]. Newlines are used liberally, so as to facilitate diffs. *) module Make (X : sig (* This is the channel that is being written to. *) val f: out_channel end) : sig val expr: IL.expr -> unit end menhir-20130116/src/standard.mly0000644000175000017500000001164512075533603015410 0ustar stephsteph/**************************************************************************/ /* */ /* Menhir */ /* */ /* Franois Pottier, INRIA Rocquencourt */ /* Yann Rgis-Gianas, PPS, Universit Paris Diderot */ /* */ /* Copyright 2005-2008 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the GNU Library General Public License, with the */ /* special exception on linking described in file LICENSE. */ /* */ /**************************************************************************/ (* This is menhir's standard library. It offers a number of parameterized nonterminal definitions, such as options and lists, that should be useful in a number of circumstances. *) %% (* ------------------------------------------------------------------------- *) (* Options. *) (* [option(X)] recognizes either nothing or [X]. It produces a value of type ['a option] if [X] produces a value of type ['a]. *) %public option(X): /* nothing */ { None } | x = X { Some x } (* [ioption(X)] is identical to [option(X)], except its definition is inlined. This has the effect of duplicating the production that refers to it, possibly eliminating an LR(1) conflict. *) %public %inline ioption(X): /* nothing */ { None } | x = X { Some x } (* [boption(X)] recognizes either nothing or [X]. It produces a value of type [bool]. *) %public boption(X): /* nothing */ { false } | X { true } (* [loption(X)] recognizes either nothing or [X]. It produces a value of type ['a list] if [X] produces a value of type ['a list]. *) %public loption(X): /* nothing */ { [] } | x = X { x } (* ------------------------------------------------------------------------- *) (* Sequences. *) (* [pair(X, Y)] recognizes the sequence [X Y]. It produces a value of type ['a * 'b] if [X] and [Y] produce values of type ['a] and ['b], respectively. *) %public %inline pair(X, Y): x = X; y = Y { (x, y) } (* [separated_pair(X, sep, Y)] recognizes the sequence [X sep Y]. It produces a value of type ['a * 'b] if [X] and [Y] produce values of type ['a] and ['b], respectively. *) %public %inline separated_pair(X, sep, Y): x = X; sep; y = Y { (x, y) } (* [preceded(opening, X)] recognizes the sequence [opening X]. It passes on the value produced by [X], so that it produces a value of type ['a] if [X] produces a value of type ['a]. *) %public %inline preceded(opening, X): opening; x = X { x } (* [terminated(X, closing)] recognizes the sequence [X closing]. It passes on the value produced by [X], so that it produces a value of type ['a] if [X] produces a value of type ['a]. *) %public %inline terminated(X, closing): x = X; closing { x } (* [delimited(opening, X, closing)] recognizes the sequence [opening X closing]. It passes on the value produced by [X], so that it produces a value of type ['a] if [X] produces a value of type ['a]. *) %public %inline delimited(opening, X, closing): opening; x = X; closing { x } (* ------------------------------------------------------------------------- *) (* Lists. *) (* [list(X)] recognizes a possibly empty list of [X]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public list(X): /* nothing */ { [] } | x = X; xs = list(X) { x :: xs } (* [nonempty_list(X)] recognizes a nonempty list of [X]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public nonempty_list(X): x = X { [ x ] } | x = X; xs = nonempty_list(X) { x :: xs } (* [separated_list(separator, X)] recognizes a possibly empty list of [X]'s, separated with [separator]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public %inline separated_list(separator, X): xs = loption(separated_nonempty_list(separator, X)) { xs } (* [separated_nonempty_list(separator, X)] recognizes a nonempty list of [X]'s, separated with [separator]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public separated_nonempty_list(separator, X): x = X { [ x ] } | x = X; separator; xs = separated_nonempty_list(separator, X) { x :: xs } %% menhir-20130116/src/gMap.ml0000644000175000017500000001471312075533603014302 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) module type S = sig (* Keys are assumed to have a natural total order. *) type key (* The type of maps whose data have type ['a]. *) type 'a t (* The empty map. *) val empty: 'a t (* [lookup k m] looks up the value associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. *) val lookup: key -> 'a t -> 'a val find: key -> 'a t -> 'a (* [add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding already exists for [k], it is overridden. *) val add: key -> 'a -> 'a t -> 'a t (* [strict_add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding already exists for [k] then [Unchanged] is raised. *) exception Unchanged val strict_add: key -> 'a -> 'a t -> 'a t (* [fine_add decide k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding from [k] to [d0] already exists, then the resulting map contains a binding from [k] to [decide d0 d]. *) type 'a decision = 'a -> 'a -> 'a val fine_add: 'a decision -> key -> 'a -> 'a t -> 'a t (* [mem k m] tells whether the key [k] appears in the domain of the map [m]. *) val mem: key -> 'a t -> bool (* [singleton k d] returns a map whose only binding is from [k] to [d]. *) val singleton: key -> 'a -> 'a t (* [is_empty m] returns [true] if and only if the map [m] defines no bindings at all. *) val is_empty: 'a t -> bool (* [is_singleton s] returns [Some x] if [s] is a singleton containing [x] as its only element; otherwise, it returns [None]. *) val is_singleton: 'a t -> (key * 'a) option (* [cardinal m] returns [m]'s cardinal, that is, the number of keys it binds, or, in other words, the cardinal of its domain. *) val cardinal: 'a t -> int (* [choose m] returns an arbitrarily chosen binding in [m], if [m] is nonempty, and raises [Not_found] otherwise. *) val choose: 'a t -> key * 'a (* [lookup_and_remove k m] looks up the value [v] associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. The call returns the value [v], together with the map [m] deprived from the binding from [k] to [v]. *) val lookup_and_remove: key -> 'a t -> 'a * 'a t val find_and_remove: key -> 'a t -> 'a * 'a t (* [remove k m] is the map [m] deprived from any binding for [k]. *) val remove: key -> 'a t -> 'a t (* [union m1 m2] returns the union of the maps [m1] and [m2]. Bindings in [m2] take precedence over those in [m1]. *) val union: 'a t -> 'a t -> 'a t (* [fine_union decide m1 m2] returns the union of the maps [m1] and [m2]. If a key [k] is bound to [x1] (resp. [x2]) within [m1] (resp. [m2]), then [decide] is called. It is passed [x1] and [x2], and must return the value that shall be bound to [k] in the final map. *) val fine_union: 'a decision -> 'a t -> 'a t -> 'a t (* [iter f m] invokes [f k x], in turn, for each binding from key [k] to element [x] in the map [m]. Keys are presented to [f] in increasing order. *) val iter: (key -> 'a -> unit) -> 'a t -> unit (* [fold f m seed] invokes [f k d accu], in turn, for each binding from key [k] to datum [d] in the map [m]. Keys are presented to [f] in increasing order. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. *) val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b (* [fold_rev] performs exactly the same job as [fold], but presents keys to [f] in the opposite order. *) val fold_rev: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b (* It is valid to evaluate [iter2 f m1 m2] if and only if [m1] and [m2] have equal domains. Doing so invokes [f k x1 x2], in turn, for each key [k] bound to [x1] in [m1] and to [x2] in [m2]. Bindings are presented to [f] in increasing order. *) val iter2: (key -> 'a -> 'b -> unit) -> 'a t -> 'b t -> unit (* [map f m] returns the map obtained by composing the map [m] with the function [f]; that is, the map $k\mapsto f(m(k))$. *) val map: ('a -> 'b) -> 'a t -> 'b t (* [endo_map] is similar to [map], but attempts to physically share its result with its input. This saves memory when [f] is the identity function. *) val endo_map: ('a -> 'a) -> 'a t -> 'a t (* If [dcompare] is an ordering over data, then [compare dcompare] is an ordering over maps. *) val compare: ('a -> 'a -> int) -> 'a t -> 'a t -> int (* A map's domain is a set. Thus, to be able to perform operations on domains, we need set operations, provided by the [Domain] sub-module. The two-way connection between maps and their domains is given by two additional functions, [domain] and [lift]. [domain m] returns [m]'s domain. [lift f s] returns the map $k\mapsto f(k)$, where $k$ ranges over a set of keys [s]. *) module Domain : GSet.S with type element = key val domain: 'a t -> Domain.t val lift: (key -> 'a) -> Domain.t -> 'a t (* [corestrict m d] performs a co-restriction of the map [m] to the domain [d]. That is, it returns the map $k\mapsto m(k)$, where $k$ ranges over all keys bound in [m] but \emph{not} present in [d]. *) val corestrict: 'a t -> Domain.t -> 'a t end menhir-20130116/src/listMonad.mli0000644000175000017500000000256112075533603015517 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: listMonad.mli,v 1.2 2005/12/01 16:20:06 regisgia Exp $ *) (** Monad type which represents a list of results. *) type 'a m = 'a list (** [bind x f] applies [f] to a list of results, returning a list of results. *) val bind: 'a m -> ('a -> 'b m) -> 'b m val ( >>= ) : 'a m -> ('a -> 'b m) -> 'b m (** [return x] is the left and right unit of [bind]. *) val return: 'a -> 'a m menhir-20130116/src/interface.ml0000644000175000017500000000424512075533603015355 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open UnparameterizedSyntax open IL open CodeBits open TokenType (* This is the [Error] exception. *) let excname = "Error" let excdef = { excname = excname; exceq = None; } let excredef = { excdef with exceq = Some excname } (* The type of the entry point for the start symbol [symbol]. *) let entrytypescheme symbol = let ocamltype = try StringMap.find symbol PreFront.grammar.types with Not_found -> (* Every start symbol should have a type. *) assert false in type2scheme (marrow [ arrow tlexbuf ttoken; tlexbuf ] (TypTextual ocamltype)) (* This is the interface of the generated parser. *) let interface = { paramdecls = PreFront.grammar.parameters; excdecls = [ excdef ]; typedecls = tokentypedef; valdecls = StringSet.fold (fun symbol decls -> (Misc.normalize symbol, entrytypescheme symbol) :: decls ) PreFront.grammar.start_symbols [] } (* Writing the interface to a file. *) let write () = let mli = open_out (Settings.base ^ ".mli") in let module P = Printer.Make (struct let f = mli let locate_stretches = None let raw_stretch_action = false end) in P.interface interface; close_out mli menhir-20130116/src/engine.ml0000644000175000017500000002651512075533603014666 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) open EngineTypes (* The LR parsing engine. *) (* This module is used: - at compile time, if so requested by the user, via the --interpret options; - at run time, in the table-based back-end. *) module Make (T : TABLE) = struct (* This propagates type and exception definitions. *) include T let _eRR : exn = Error (* --------------------------------------------------------------------------- *) (* [discard] takes a token off the input stream, queries the lexer for a new one, and stores it into [env.token], overwriting the previous token. If [env.shifted] has not yet reached its limit, it is incremented. *) let discard env = let lexbuf = env.lexbuf in let token = env.lexer lexbuf in env.token <- token; Log.lookahead_token lexbuf (T.token2terminal token); let shifted = env.shifted + 1 in if shifted >= 0 then env.shifted <- shifted (* --------------------------------------------------------------------------- *) (* The type [void] is empty. Many of the functions below have return type [void]. This guarantees that they never return a value. Instead, they must stop by raising an exception: either [Accept] or [Error]. *) type void (* --------------------------------------------------------------------------- *) (* In the code-based back-end, the [run] function is sometimes responsible for pushing a new cell on the stack. This is motivated by code sharing concerns. In this interpreter, there is no such concern; [run]'s caller is always responsible for updating the stack. *) (* In the code-based back-end, there is a [run] function for each state [s]. This function can behave in two slightly different ways, depending on when it is invoked, or (equivalently) depending on [s]. If [run] is invoked after shifting a terminal symbol (or, equivalently, if [s] has a terminal incoming symbol), then [run] discards a token, unless [s] has a default reduction on [#]. (Indeed, in that case, requesting the next token might drive the lexer off the end of the input stream.) If, on the other hand, [run] is invoked after performing a goto transition, or invoked directly by an entry point, then there is nothing to discard. These two cases are reflected in [CodeBackend.gettoken]. Here, the code is structured in a slightly different way. It is up to the caller of [run] to indicate whether to discard a token. *) let rec run env please_discard : void = (* Log the fact that we just entered this state. *) let s = env.current in Log.state s; (* If [please_discard] is set, discard a token and fetch the next one. *) (* This flag is set when [s] is being entered by shifting a terminal symbol and [s] does not have a default reduction on [#]. *) if please_discard then discard env; (* Examine what situation we are in. This case analysis is analogous to that performed in [CodeBackend.gettoken], in the sub-case where we do not have a terminal incoming symbol. *) T.default_reduction s reduce (* there is a default reduction; perform it *) continue (* there is none; continue below *) env and continue env : void = (* There is no default reduction. Consult the current lookahead token so as to determine which action should be taken. *) (* Peeking at the first input token, without taking it off the input stream, is normally done by reading [env.token]. However, we check [env.shifted] first: if it is -1, then the lookahead token is the [error] token. *) (* Note that, if we just called [discard] above, then the lookahead token cannot be [error]. *) if env.shifted = (-1) then begin Log.resuming_error_handling(); error env end else action env (* --------------------------------------------------------------------------- *) (* When [action] is invoked, we know that the current state does not have a default reduction. We also know that the current lookahead token is not [error]: it is a real token, stored in [env.token]. *) and action env : void = (* We consult the two-dimensional action table, indexed by the current state and the current lookahead token, in order to determine which action should be taken. *) let token = env.token in T.action env.current (* determines a row *) (T.token2terminal token) (* determines a column *) (T.token2value token) shift (* shift continuation *) reduce (* reduce continuation *) initiate (* failure continuation *) env (* --------------------------------------------------------------------------- *) (* This function takes care of shift transitions along a terminal symbol. (Goto transitions are taken care of within [reduce] below.) The symbol can be either an actual token or the [error] pseudo-token. *) and shift env (please_discard : bool) (terminal : terminal) (value : semantic_value) (s' : state) : void = (* Log the transition. *) Log.shift terminal s'; (* Push a new cell onto the stack, containing the identity of the state that we are leaving. *) let lexbuf = env.lexbuf in env.stack <- { state = env.current; semv = value; startp = lexbuf.Lexing.lex_start_p; endp = lexbuf.Lexing.lex_curr_p; next = env.stack; }; (* Switch to state [s']. *) env.current <- s'; run env please_discard (* --------------------------------------------------------------------------- *) (* This function takes care of reductions. *) and reduce env (prod : production) : void = (* Log a reduction event. *) Log.reduce_or_accept prod; (* Invoke the semantic action. The semantic action is responsible for truncating the stack, updating the current state, producing a cell that contains a new semantic value, and raising [Accept] or [Error] if appropriate. *) (* If the semantic action raises [Error], we catch it immediately and initiate error handling. *) (* The apparently weird idiom used here is an encoding for a [let/unless] construct, which does not exist in ocaml. *) if ( try T.semantic_action prod env; true with Error -> false ) then begin (* By our convention, the semantic action is responsible for updating the stack. The state now found in the top stack cell is the return state. *) (* Perform a goto transition. The target state is determined by consulting the goto table at the return state and at production [prod]. *) env.current <- T.goto env.stack.state prod; run env false end else errorbookkeeping env (* --------------------------------------------------------------------------- *) (* The following functions deal with errors. *) (* [initiate] and [errorbookkeeping] initiate error handling. See the functions by the same names in [CodeBackend]. *) and initiate env : void = assert (env.shifted >= 0); if T.recovery && env.shifted = 0 then begin Log.discarding_last_token (T.token2terminal env.token); discard env; env.shifted <- 0; action env end else errorbookkeeping env and errorbookkeeping env = Log.initiating_error_handling(); env.previouserror <- env.shifted; env.shifted <- (-1); error env (* [error] handles errors. *) and error env : void = (* Consult the column associated with the [error] pseudo-token in the action table. *) T.action env.current (* determines a row *) T.error_terminal (* determines a column *) T.error_value error_shift (* shift continuation *) error_reduce (* reduce continuation *) error_fail (* failure continuation *) env and error_shift env please_discard terminal value s' = (* Here, [terminal] is [T.error_terminal], and [value] is [T.error_value]. *) assert (terminal = T.error_terminal && value = T.error_value); (* This state is capable of shifting the [error] token. *) Log.handling_error env.current; shift env please_discard terminal value s' and error_reduce env prod = (* This state is capable of performing a reduction on [error]. *) Log.handling_error env.current; reduce env prod and error_fail env = (* This state is unable to handle errors. Attempt to pop a stack cell. *) let cell = env.stack in let next = cell.next in if next == cell then (* The stack is empty. Die. *) raise _eRR else begin (* The stack is nonempty. Pop a cell, updating the current state with that found in the popped cell, and try again. *) env.stack <- next; env.current <- cell.state; error env end (* --------------------------------------------------------------------------- *) let entry (s : state) (lexer : Lexing.lexbuf -> token) (lexbuf : Lexing.lexbuf) : semantic_value = (* Build an empty stack. This is a dummy cell, which is its own successor. Its fields other than [next] contain dummy values. *) let rec empty = { state = s; (* dummy *) semv = T.error_value; (* dummy *) startp = lexbuf.Lexing.lex_start_p; (* dummy *) endp = lexbuf.Lexing.lex_curr_p; (* dummy *) next = empty; } in (* Perform an initial call to the lexer. *) let token : token = lexer lexbuf in (* Log our first lookahead token. *) Log.lookahead_token lexbuf (T.token2terminal token); (* Build an initial environment. *) let env = { lexer = lexer; lexbuf = lexbuf; token = token; shifted = max_int; previouserror = max_int; stack = empty; current = s; } in (* Run. Catch [Accept], which represents normal termination. Let [Error] escape. *) try (* If ocaml offered a [match/with] construct with zero branches, this is what we would use here, since the type [void] has zero cases. *) let (_ : void) = run env false in assert false (* cannot fail *) with | Accept v -> v end menhir-20130116/src/coqBackend.ml0000644000175000017500000004755512075533603015462 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Printf open Grammar module Run (T: sig end) = struct let print_term t = assert (not (Terminal.pseudo t)); sprintf "%s_t" (Terminal.print t) let print_nterm nt = sprintf "%s_nt" (Nonterminal.print true nt) let print_symbol = function | Symbol.N nt -> sprintf "NT %s" (print_nterm nt) | Symbol.T t -> sprintf "T %s" (print_term t) let print_type ty = if Settings.coq_no_actions then "unit" else match ty with | None -> raise Not_found | Some t -> match t with | Stretch.Declared s -> s.Stretch.stretch_content | Stretch.Inferred _ -> assert false (* We cannot infer coq types *) let is_final_state node = match Invariant.has_default_reduction node with | Some (prod, _) -> begin match Production.classify prod with | Some _ -> true | None -> false end | None -> false let lr1_iter_nonfinal f = Lr1.iter (fun node -> if not (is_final_state node) then f node) let lr1_iterx_nonfinal f = Lr1.iterx (fun node -> if not (is_final_state node) then f node) let lr1_foldx_nonfinal f = Lr1.foldx (fun accu node -> if not (is_final_state node) then f accu node else accu) let print_nis nis = sprintf "Nis_%d" (Lr1.number nis) let print_init init = sprintf "Init_%d" (Lr1.number init) let print_st st = match Lr1.incoming_symbol st with | Some _ -> sprintf "Ninit %s" (print_nis st) | None -> sprintf "Init %s" (print_init st) let (prod_ids, _) = Production.foldx (fun p (prod_ids, counters) -> let lhs = Production.nt p in let id = try SymbolMap.find (Symbol.N lhs) counters with Not_found -> 0 in (ProductionMap.add p id prod_ids, SymbolMap.add (Symbol.N lhs) (id+1) counters)) (ProductionMap.empty, SymbolMap.empty) let print_prod p = sprintf "Prod_%s_%d" (Nonterminal.print true (Production.nt p)) (ProductionMap.find p prod_ids) let () = if not Settings.coq_no_actions then begin Nonterminal.iterx (fun nonterminal -> match Nonterminal.ocamltype nonterminal with | None -> Error.error [] (sprintf "I don't know the type of non-terminal %s" (Nonterminal.print false nonterminal)) | Some _ -> ()); Production.iterx (fun prod -> let act = Production.action prod in if Action.has_previouserror act || Action.has_syntaxerror act || Action.has_leftstart act || Action.has_leftend act || Action.use_dollar act then Error.error [] ("$previouserror, $syntaxerror, $start, $end, $i are not "^ "supported by the coq back-end")) end; Production.iterx (fun prod -> Array.iter (fun symb -> match symb with | Symbol.T t -> if t = Terminal.error then Error.error [] "The coq back-end does not support error" | _ -> ()) (Production.rhs prod)); if Front.grammar.UnparameterizedSyntax.parameters <> [] then Error.error [] "The coq back-end does not support %parameter" (* Optimized because if we extract some constants to the right caml term, the ocaml inlining+constant unfolding replaces that by the actual constant *) let rec write_optimized_int31 f n = match n with | 0 -> fprintf f "Int31.On" | 1 -> fprintf f "Int31.In" | k when k land 1 = 0 -> fprintf f "(twice "; write_optimized_int31 f (n lsr 1); fprintf f ")" | _ -> fprintf f "(twice_plus_one "; write_optimized_int31 f (n lsr 1); fprintf f ")" let write_inductive_alphabet f name constrs = fprintf f "Inductive %s' : Set :=" name; List.iter (fprintf f "\n | %s") constrs; fprintf f ".\n"; fprintf f "Definition %s := %s'.\n\n" name name; if List.length constrs > 0 then begin let iteri f = ignore (List.fold_left (fun k x -> f k x; succ k) 0 constrs) in fprintf f "Program Instance %sNum : Numbered %s :=\n" name name; fprintf f " { inj := fun x => match x return _ with "; iteri (fun k constr -> fprintf f "| %s => " constr; write_optimized_int31 f k; fprintf f " "; ); fprintf f "end;\n"; fprintf f " surj := (fun n => match n return _ with "; iteri (fprintf f "| %d => %s "); fprintf f "| _ => %s end)%%int31;\n" (List.hd constrs); fprintf f " inj_bound := %d%%int31 }.\n" (List.length constrs); fprintf f "Solve Obligations using (intro x; case x; reflexivity).\n\n"; end else begin fprintf f "Program Instance %sAlph : Alphabet %s :=\n" name name; fprintf f " { AlphabetComparable := {| compare := fun x y =>\n"; fprintf f " match x, y return comparison with end |};\n"; fprintf f " AlphabetEnumerable := {| all_list := [] |} }."; fprintf f "Solve Obligations using (intro x; case x)." end let write_terminals f = write_inductive_alphabet f "terminal" ( Terminal.fold (fun t l -> if Terminal.pseudo t then l else print_term t::l) []); fprintf f "Instance TerminalAlph : Alphabet terminal := _.\n\n" let write_nonterminals f = write_inductive_alphabet f "nonterminal" ( Nonterminal.foldx (fun nt l -> (print_nterm nt)::l) []); fprintf f "Instance NonTerminalAlph : Alphabet nonterminal := _.\n\n" let write_symbol_semantic_type f = fprintf f "Definition terminal_semantic_type (t:terminal) : Type:=\n"; fprintf f " match t with\n"; Terminal.iter (fun terminal -> if not (Terminal.pseudo terminal) then fprintf f " | %s => %s%%type\n" (print_term terminal) (try print_type (Terminal.ocamltype terminal) with Not_found -> "unit") ); fprintf f " end.\n\n"; fprintf f "Definition nonterminal_semantic_type (nt:nonterminal) : Type:=\n"; fprintf f " match nt with\n"; Nonterminal.iterx (fun nonterminal -> fprintf f " | %s => %s%%type\n" (print_nterm nonterminal) (print_type (Nonterminal.ocamltype nonterminal))); fprintf f " end.\n\n"; fprintf f "Definition symbol_semantic_type (s:symbol) : Type:=\n"; fprintf f " match s with\n"; fprintf f " | T t => terminal_semantic_type t\n"; fprintf f " | NT nt => nonterminal_semantic_type nt\n"; fprintf f " end.\n\n" let write_productions f = write_inductive_alphabet f "production" ( Production.foldx (fun prod l -> (print_prod prod)::l) []); fprintf f "Instance ProductionAlph : Alphabet production := _.\n\n" let write_productions_contents f = fprintf f "Definition prod_contents (p:production) :\n"; fprintf f " { p:nonterminal * list symbol &\n"; fprintf f " arrows_left (map symbol_semantic_type (rev (snd p)))\n"; fprintf f " (symbol_semantic_type (NT (fst p))) }\n"; fprintf f " :=\n"; fprintf f " let box := existT (fun p =>\n"; fprintf f " arrows_left (map symbol_semantic_type (rev (snd p)))\n"; fprintf f " (symbol_semantic_type (NT (fst p))))\n"; fprintf f " in\n"; fprintf f " match p with\n"; Production.iterx (fun prod -> fprintf f " | %s => box\n" (print_prod prod); fprintf f " (%s, [%s])\n" (print_nterm (Production.nt prod)) (String.concat "; " (List.map print_symbol (List.rev (Array.to_list (Production.rhs prod))))); if Production.length prod = 0 then fprintf f " (\n" else fprintf f " (fun %s =>\n" (String.concat " " (List.rev (Array.to_list (Production.identifiers prod)))); if Settings.coq_no_actions then fprintf f "()" else Action.print f (Production.action prod); fprintf f "\n)\n"); fprintf f " end.\n\n"; fprintf f "Definition prod_lhs (p:production) :=\n"; fprintf f " fst (projT1 (prod_contents p)).\n"; fprintf f "Definition prod_rhs_rev (p:production) :=\n"; fprintf f " snd (projT1 (prod_contents p)).\n"; fprintf f "Definition prod_action (p:production) :=\n"; fprintf f " projT2 (prod_contents p).\n\n" let write_nullable_first f = fprintf f "Definition nullable_nterm (nt:nonterminal) : bool :=\n"; fprintf f " match nt with\n"; Nonterminal.iterx (fun nt -> fprintf f " | %s => %b\n" (print_nterm nt) (fst (Analysis.nullable_first_rhs (Array.of_list [Symbol.N nt]) 0))); fprintf f " end.\n\n"; fprintf f "Definition first_nterm (nt:nonterminal) : list terminal :=\n"; fprintf f " match nt with\n"; Nonterminal.iterx (fun nt -> let firstSet = snd (Analysis.nullable_first_rhs (Array.of_list [Symbol.N nt]) 0) in fprintf f " | %s => [" (print_nterm nt); let first = ref true in TerminalSet.iter (fun t -> if !first then first := false else fprintf f "; "; fprintf f "%s" (print_term t) ) firstSet; fprintf f "]\n"); fprintf f " end.\n\n" let write_grammar f = fprintf f "Module Import Gram <: Grammar.T.\n\n"; write_terminals f; write_nonterminals f; fprintf f "Include Grammar.Symbol.\n\n"; write_symbol_semantic_type f; write_productions f; write_productions_contents f; fprintf f "Include Grammar.Defs.\n\n"; fprintf f "End Gram.\n\n" let write_nis f = write_inductive_alphabet f "noninitstate" ( lr1_foldx_nonfinal (fun l node -> (print_nis node)::l) []); fprintf f "Instance NonInitStateAlph : Alphabet noninitstate := _.\n\n" let write_init f = write_inductive_alphabet f "initstate" ( ProductionMap.fold (fun prod node l -> (print_init node)::l) Lr1.entry []); fprintf f "Instance InitStateAlph : Alphabet initstate := _.\n\n" let write_start_nt f = fprintf f "Definition start_nt (init:initstate) : nonterminal :=\n"; fprintf f " match init with\n"; ProductionMap.iter (fun prod node -> match Production.rhs prod with | [| Symbol.N startnt |] -> fprintf f " | %s => %s\n" (print_init node) (print_nterm startnt) | _ -> assert false ) Lr1.entry; fprintf f " end.\n\n" let write_actions f = fprintf f "Definition action_table (state:state) : action :=\n"; fprintf f " match state with\n"; lr1_iter_nonfinal (fun node -> fprintf f " | %s => " (print_st node); match Invariant.has_default_reduction node with | Some (prod, _) -> fprintf f "Default_reduce_act %s\n" (print_prod prod) | None -> fprintf f "Lookahead_act (fun terminal:terminal =>\n"; fprintf f " match terminal return lookahead_action terminal with\n"; let has_fail = ref false in Terminal.iter (fun t -> if not (Terminal.pseudo t) then begin try let target = SymbolMap.find (Symbol.T t) (Lr1.transitions node) in fprintf f " | %s => Shift_act %s (eq_refl _)\n" (print_term t) (print_nis target) with Not_found -> try let prod = Misc.single (TerminalMap.find t (Lr1.reductions node)) in fprintf f " | %s => Reduce_act %s\n" (print_term t) (print_prod prod) with Not_found -> has_fail := true end); if !has_fail then fprintf f " | _ => Fail_act\n"; fprintf f " end)\n" ); fprintf f " end.\n\n" let write_gotos f = fprintf f "Definition goto_table (state:state) (nt:nonterminal) :=\n"; fprintf f " match state, nt return option { s:noninitstate | NT nt = last_symb_of_non_init_state s } with\n"; let has_none = ref false in lr1_iter_nonfinal (fun node -> Nonterminal.iterx (fun nt -> try let target = SymbolMap.find (Symbol.N nt) (Lr1.transitions node) in fprintf f " | %s, %s => " (print_st node) (print_nterm nt); if is_final_state target then fprintf f "None" else fprintf f "Some (exist _ %s (eq_refl _))\n" (print_nis target) with Not_found -> has_none := true)); if !has_none then fprintf f " | _, _ => None\n"; fprintf f " end.\n\n" let write_last_symb f = fprintf f "Definition last_symb_of_non_init_state (noninitstate:noninitstate) : symbol :=\n"; fprintf f " match noninitstate with\n"; lr1_iterx_nonfinal (fun node -> match Lr1.incoming_symbol node with | Some s -> fprintf f " | %s => %s\n" (print_nis node) (print_symbol s) | None -> assert false); fprintf f " end.\n\n" let write_past_symb f = fprintf f "Definition past_symb_of_non_init_state (noninitstate:noninitstate) : list symbol :=\n"; fprintf f " match noninitstate with\n"; lr1_iterx_nonfinal (fun node -> let s = String.concat "; " (List.tl (Invariant.fold (fun l _ symb _ -> print_symbol symb::l) [] (Invariant.stack node))) in fprintf f " | %s => [%s]\n" (print_nis node) s); fprintf f " end.\n"; fprintf f "Extract Constant past_symb_of_non_init_state => \"fun _ -> assert false\".\n\n" let write_past_states f = fprintf f "Definition past_state_of_non_init_state (s:noninitstate) : list (state -> bool) :=\n"; fprintf f " match s with\n"; lr1_iterx_nonfinal (fun node -> let s = String.concat ";\n " (Invariant.fold (fun accu _ _ states -> let b = Buffer.create 16 in bprintf b "fun s:state =>\n"; bprintf b " match s return bool with\n"; bprintf b " "; Lr1.NodeSet.iter (fun st -> bprintf b "| %s " (print_st st)) states; bprintf b "=> true\n"; bprintf b " | _ => false\n"; bprintf b " end"; Buffer.contents b::accu) [] (Invariant.stack node)) in fprintf f " | %s =>\n [ %s ]\n" (print_nis node) s); fprintf f " end.\n\n"; fprintf f "Extract Constant past_state_of_non_init_state => \"fun _ -> assert false\".\n\n" let write_items f = if not Settings.coq_no_complete then begin lr1_iter_nonfinal (fun node -> fprintf f "Definition items_of_state_%d : list item :=\n" (Lr1.number node); fprintf f " [ "; let first = ref true in Item.Map.iter (fun item lookaheads -> let prod, pos = Item.export item in match Production.classify prod with | None -> if !first then first := false else fprintf f ";\n "; fprintf f "{| prod_item := %s;\n" (print_prod prod); fprintf f " dot_pos_item := %d;\n" pos; fprintf f " lookaheads_item := ["; let first = ref true in let lookaheads = if TerminalSet.mem Terminal.sharp lookaheads then TerminalSet.universe else lookaheads in TerminalSet.iter (fun lookahead -> if !first then first := false else fprintf f "; "; fprintf f "%s" (print_term lookahead) ) lookaheads; fprintf f "] |}" | Some _ -> () ) (Lr0.closure (Lr0.export (Lr1.state node))); fprintf f " ].\n"; fprintf f "Extract Inlined Constant items_of_state_%d => \"assert false\".\n\n" (Lr1.number node) ); fprintf f "Definition items_of_state (s:state) : list item :=\n"; fprintf f " match s with\n"; lr1_iter_nonfinal (fun node -> fprintf f " | %s => items_of_state_%d\n" (print_st node) (Lr1.number node)); fprintf f " end.\n"; end else fprintf f "Definition items_of_state (s:state): list item := [].\n"; fprintf f "Extract Constant items_of_state => \"fun _ -> assert false\".\n\n" let write_automaton f = fprintf f "Module Aut <: Automaton.T.\n\n"; fprintf f "Module Gram := Gram.\n"; fprintf f "Module GramDefs := Gram.\n\n"; write_nullable_first f; write_nis f; write_last_symb f; write_init f; fprintf f "Include Automaton.Types.\n\n"; write_start_nt f; write_actions f; write_gotos f; write_past_symb f; write_past_states f; write_items f; fprintf f "End Aut.\n\n" let write_theorems f = fprintf f "Require Import Main.\n\n"; fprintf f "Module Parser := Main.Make Aut.\n"; fprintf f "Theorem safe:\n"; fprintf f " Parser.safe_validator () = true.\n"; fprintf f "Proof eq_refl true<:Parser.safe_validator () = true.\n\n"; if not Settings.coq_no_complete then begin fprintf f "Theorem complete:\n"; fprintf f " Parser.complete_validator () = true.\n"; fprintf f "Proof eq_refl true<:Parser.complete_validator () = true.\n\n"; end; ProductionMap.iter (fun prod node -> match Production.rhs prod with | [| Symbol.N startnt |] -> let funName = Nonterminal.print true startnt in fprintf f "Definition %s := Parser.parse safe Aut.%s.\n\n" funName (print_init node); fprintf f "Theorem %s_correct iterator buffer:\n" funName; fprintf f " match %s iterator buffer with\n" funName; fprintf f " | Parser.Inter.Parsed_pr sem buffer_new =>\n"; fprintf f " exists word,\n"; fprintf f " buffer = Parser.Inter.app_str word buffer_new /\\\n"; fprintf f " inhabited (Gram.parse_tree (%s) word sem)\n" (print_symbol (Symbol.N startnt)); fprintf f " | _ => True\n"; fprintf f " end.\n"; fprintf f "Proof. apply Parser.parse_correct. Qed.\n\n"; if not Settings.coq_no_complete then begin fprintf f "Theorem %s_complete (iterator:nat) word buffer_end (output:%s):\n" funName (print_type (Nonterminal.ocamltype startnt)); fprintf f " forall tree:Gram.parse_tree (%s) word output,\n" (print_symbol (Symbol.N startnt)); fprintf f " match %s iterator (Parser.Inter.app_str word buffer_end) with\n" funName; fprintf f " | Parser.Inter.Fail_pr => False\n"; fprintf f " | Parser.Inter.Parsed_pr output_res buffer_end_res =>\n"; fprintf f " output_res = output /\\ buffer_end_res = buffer_end /\\\n"; fprintf f " le (Gram.pt_size tree) iterator\n"; fprintf f " | Parser.Inter.Timeout_pr => lt iterator (Gram.pt_size tree)\n"; fprintf f " end.\n"; fprintf f "Proof. apply Parser.parse_complete with (init:=Aut.%s); exact complete. Qed.\n\n" (print_init node); end | _ -> assert false) Lr1.entry let write_all f = if not Settings.coq_no_actions then List.iter (fun s -> fprintf f "%s\n\n" s.Stretch.stretch_content) Front.grammar.UnparameterizedSyntax.preludes; fprintf f "Require Import List.\n"; fprintf f "Require Import Int31.\n"; fprintf f "Require Import Syntax.\n"; fprintf f "Require Import Tuples.\n"; fprintf f "Require Import Alphabet.\n"; fprintf f "Require Grammar.\n"; fprintf f "Require Automaton.\n\n"; write_grammar f; write_automaton f; write_theorems f; if not Settings.coq_no_actions then List.iter (fprintf f "\n\n%s") Front.grammar.UnparameterizedSyntax.postludes end menhir-20130116/src/coqBackend.mli0000644000175000017500000000212212075533603015610 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* The coq code generator. *) module Run (T: sig end) : sig val write_all: out_channel -> unit end menhir-20130116/src/dot.ml0000644000175000017500000000675512075533603014213 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Printf (* ------------------------------------------------------------------------- *) (* Type definitions. *) type size = float * float (* in inches *) type orientation = | Portrait | Landscape type rankdir = | LeftToRight | TopToBottom type ratio = | Compress | Fill | Auto type style = (* Both nodes and edges. *) | Solid | Dashed | Dotted | Bold | Invisible (* Nodes only. *) | Filled | Diagonals | Rounded (* ------------------------------------------------------------------------- *) (* Basic printers. *) let print_style = function | None -> "" | Some style -> let style = match style with | Solid -> "solid" | Dashed -> "dashed" | Dotted -> "dotted" | Bold -> "bold" | Invisible -> "invis" | Filled -> "filled" | Diagonals -> "diagonals" | Rounded -> "rounded" in sprintf ", style = %s" style (* ------------------------------------------------------------------------- *) (* The graph printer. *) module Print (G : sig type vertex val name: vertex -> string val successors: (?style:style -> label:string -> vertex -> unit) -> vertex -> unit val iter: (?style:style -> label:string -> vertex -> unit) -> unit end) = struct let print ?(directed = true) ?size ?(orientation = Landscape) ?(rankdir = LeftToRight) ?(ratio = Compress) (f : out_channel) = fprintf f "%s G {\n" (if directed then "digraph" else "graph"); Option.iter (fun (hsize, vsize) -> fprintf f "size=\"%f, %f\";\n" hsize vsize ) size; begin match orientation with | Portrait -> fprintf f "orientation = portrait;\n" | Landscape -> fprintf f "orientation = landscape;\n" end; begin match rankdir with | LeftToRight -> fprintf f "rankdir = LR;\n" | TopToBottom -> fprintf f "rankdir = TB;\n" end; begin match ratio with | Compress -> fprintf f "ratio = compress;\n" | Fill -> fprintf f "ratio = fill;\n" | Auto -> fprintf f "ratio = auto;\n" end; G.iter (fun ?style ~label vertex -> fprintf f "%s [ label=\"%s\"%s ] ;\n" (G.name vertex) label (print_style style) ); G.iter (fun ?style ~label source -> G.successors (fun ?style ~label destination -> fprintf f "%s %s %s [ label=\"%s\"%s ] ;\n" (G.name source) (if directed then "->" else "--") (G.name destination) label (print_style style) ) source ); fprintf f "\n}\n" end menhir-20130116/src/invariant.mli0000644000175000017500000001272712075533602015564 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module discovers and publishes information about the automaton. It determines the shape of the stack when a state is about to be entered, when a production is about to be reduced, and when a goto transition is about to be taken. It also determines which states should be represented (that is, need to physically exist on the stack at runtime) and which symbols need to keep track of (start or end) positions. It also determines which automaton states could potentially perform error recovery, and which states could have to deal with an [error] token. *) open Grammar (* ------------------------------------------------------------------------- *) (* A representation of stack shapes. *) (* A word is a representation of a stack or stack suffix. *) type word (* [fold] folds over a word. At each cell, [f] is applied to the accumulator, to a Boolean flag that tells whether the cell holds a state, to the set of possible states of the cell, and to the symbol associated with the cell. The stack is visited from bottom to top. *) val fold: ('a -> bool -> Symbol.t -> Lr1.NodeSet.t -> 'a) -> 'a -> word -> 'a (* [fold_top f accu s] is analogous to [fold], but only folds over the top stack cell, if there is one, so that [f] is either not invoked at all or invoked just once. *) val fold_top: (bool -> Symbol.t -> 'a) -> 'a -> word -> 'a (* ------------------------------------------------------------------------- *) (* Information about the stack. *) (* [stack s] is the structure of the stack at state [s]. *) val stack: Lr1.node -> word (* [prodstack prod] is the structure of the stack when production [prod] is about to be reduced. This function should not be called if production [prod] is never reduced. *) val prodstack: Production.index -> word (* [gotostack nt] is the structure of the stack when a shift transition over nonterminal [nt] is about to be taken. It consists of just one cell. *) val gotostack: Nonterminal.t -> word (* [rewind s] explains how to rewind the stack when dealing with an error in state [s]. It produces an instruction to either die (because no state on the stack can handle errors) or pop a suffix of the stack. In the latter case, one reaches a state that is either represented (its identity is physically stored in the bottommost cell that is popped) or unrepresented (its identity is statically known). *) type instruction = | Die | DownTo of word * state and state = | Represented | UnRepresented of Lr1.node val rewind: Lr1.node -> instruction (* ------------------------------------------------------------------------- *) (* Information about which states and positions need to physically exist on the stack. *) (* [represented s] tells whether state [s] must have an explicit representation, that is, whether it is pushed onto the stack. *) val represented: Lr1.node -> bool (* [startp symbol] and [endp symbol] tell whether start or end positions must be recorded for symbol [symbol]. *) val startp: Symbol.t -> bool val endp: Symbol.t -> bool (* ------------------------------------------------------------------------- *) (* Information about error handling. *) (* [recoverer s] tells whether state [s] can potentially do error recovery. *) val recoverer: Lr1.node -> bool (* [errorpeeker s] tells whether state [s] can potentially peek at an error. This is the case if, in state [s], [env.shifted] may be -1, that is, if an error token may be on the stream. *) val errorpeeker: Lr1.node -> bool (* ------------------------------------------------------------------------- *) (* Information about which productions are reduced and where. *) (* [ever_reduced prod] tells whether production [prod] is ever reduced. *) val ever_reduced: Production.index -> bool (* [fold_reduced prod] folds over all states that can reduce production [prod]. *) val fold_reduced: (Lr1.node -> 'a -> 'a) -> Production.index -> 'a -> 'a (* ------------------------------------------------------------------------- *) (* Information about default reductions. *) (* [has_default_reduction s] tells whether state [s] has a default reduction, and, if so, upon which set of tokens. *) val has_default_reduction : Lr1.node -> (Production.index * TerminalSet.t) option (* ------------------------------------------------------------------------- *) (* Miscellaneous. *) (* [universal symbol] tells whether every represented state has an outgoing transition along [symbol]. *) val universal: Symbol.t -> bool menhir-20130116/src/misc.mli0000644000175000017500000001540112075533603014515 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* TEMPORARY tidy up, comment, remove dead code *) (* Converting an option to a string, with [None] converted to the empty string. *) val o2s: 'a option -> ('a -> string) -> string (* Projection out of a singleton list. *) val single: 'a list -> 'a (* A variant of [List.map] where [f] returns a pair of elements, to be flattened into the new list. *) val mapd: ('a -> 'b * 'b) -> 'a list -> 'b list (* Tabulating a function using an internal array. [tabulate n f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type [int] and are supposed to lie in the range [0..n). *) val tabulate: int -> (int -> 'a) -> (int -> 'a) (* Tabulating a function using an internal array. [tabulateb n f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type [int] and are supposed to lie in the range [0..n). The result type of [f] is assumed to be of type [bool]. [tabulateb] also returns the number of points where [f] is [true]. *) val tabulateb: int -> (int -> bool) -> (int -> bool) * int (* [tabulatef number fold n dummy f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type ['a] and are mapped by [number] into the range [0..n). [fold] allows folding over the domain of [f]. [dummy] is used to initialize the internal array. Its value has no impact if [fold] is surjective. *) val tabulatef: ('a -> int) -> ((unit -> 'a -> unit) -> unit -> unit) -> int -> 'b -> ('a -> 'b) -> ('a -> 'b) (* [tabulateo number fold n f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type ['a] and are mapped by [number] into the range [0..n). [fold] allows folding over the domain of [f]. The result type of [f] is an option type, and [tabulateo] also returns the number of points where [f] is [Some _]. *) val tabulateo: ('a -> int) -> ((unit -> 'a -> unit) -> unit -> unit) -> int -> ('a -> 'b option) -> ('a -> 'b option) * int (* Truncature of a list. *) val truncate: int -> 'a list -> 'a list (* A list of repeated elements. *) val repeat: int -> 'a -> 'a list (* Reverse function application. *) val ( $$ ) : 'a -> ('a -> 'b) -> 'b (* Sets of strings and maps over strings. *) module IntSet : Set.S with type elt = int (* [separated_list_to_string printer sep l] converts [l] into a string representation built using [printer] on each element and [sep] as a separator. *) val separated_list_to_string: ('a -> string) -> string -> 'a list -> string (* [index_map f] returns a triple (indexed_f, domain_indexation, domain_array). [indexed_f] is a mapping from [0..n-1] to the elements of the map [f] ([n] being the size of the image of [f]). [domain_indexation] is a mapping from the domain of the map [f] to indexes. [domain_array] is a mapping from the indexes to the domain of [f]. The indexation implements [f] ie: - forall x in domain(m), indexed_f (domain_indexation x) = f (x). - forall x in domain(m), domain_array (domain_indexation x) = x. *) val index_map : 'a StringMap.t -> (int -> 'a) * (string -> int) * (int -> string) (* [support_assoc l x] returns the second component of the first couple in [l] whose first component is [x]. If it does not exist, it returns [x]. *) val support_assoc : ('a * 'a) list -> 'a -> 'a (* [index] indexes a list of (distinct) strings, that is, assigns an integer index to each string and builds mappings both ways between strings and indices. *) val index: string list -> int * string array * int StringMap.t (* Turning an implicit list, stored using pointers through a hash table, into an explicit list. The head of the implicit list is not included in the explicit list. *) val materialize: ('a, 'a option) Hashtbl.t -> 'a -> 'a list (* [iteri] implements a [for] loop over integers, from 0 to [n-1]. *) val iteri: int -> (int -> unit) -> unit (* [foldi] implements a [for] loop over integers, from 0 to [n-1], with an accumulator. [foldij] implements a [for] loop over integers, from [start] to [n-1], with an accumulator. *) val foldi: int -> (int -> 'a -> 'a) -> 'a -> 'a val foldij: int -> int -> (int -> 'a -> 'a) -> 'a -> 'a (* [mapi n f] produces the list [ f 0; ... f (n-1) ]. *) val mapi: int -> (int -> 'a) -> 'a list (* [qfold f accu q] repeatedly takes an element [x] off the queue [q] and applies [f] to the accumulator and to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. *) val qfold: ('a -> 'b -> 'a) -> 'a -> 'b Queue.t -> 'a (* [qiter f q] repeatedly takes an element [x] off the queue [q] and applies [f] to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. *) val qiter: ('b -> unit) -> 'b Queue.t -> unit (* [smap] has the same semantics as [List.map], but attempts to physically return the input list when [f] is the identity. *) val smap: ('a -> 'a) -> 'a list -> 'a list (* [smapa] is a variant of [smap] that maintains an accumulator. *) val smapa: ('b -> 'a -> 'b * 'a) -> 'b -> 'a list -> 'b * 'a list (* [normalize s] returns a copy of [s] where parentheses and commas are replaced with underscores. *) val normalize: string -> string (* [postincrement r] increments [r] and returns its original value. *) val postincrement: int ref -> int (* [gcp] returns the greatest common prefix of two strings. *) val gcp: string -> string -> string (* [gcps] returns the greatest common prefix of a nonempty list of strings. *) val gcps : string list -> string (* [array_forall p a] computes the conjunction of the predicate [p] over all elements of the array [a]. *) val array_forall: ('a -> bool) -> 'a array -> bool menhir-20130116/src/cst.ml0000644000175000017500000000656312075533603014213 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar (* Concrete syntax trees. *) (* A concrete syntax tree is one of a leaf -- which corresponds to a terminal symbol; a node -- which corresponds to a non-terminal symbol, and whose immediate descendants form an expansion of that symbol; or an error leaf -- which corresponds to a point where the [error] pseudo-token was shifted. *) type cst = | CstTerminal of Terminal.t | CstNonTerminal of Production.index * cst array | CstError (* This is a (mostly) unambiguous printer for concrete syntax trees, in an sexp-like notation. *) let rec pcst b = function | CstTerminal tok -> (* A leaf is denoted by a terminal symbol. *) Printf.bprintf b "%s" (Terminal.print tok) | CstNonTerminal (prod, csts) -> (* A node is denoted by a bracketed, whitespace-separated list, whose head is a non-terminal symbol (followed with a colon) and whose tail consists of the node's descendants. *) (* There is in fact some ambiguity in this notation, since we only print the non-terminal symbol that forms the left-hand side of production [prod], instead of the production itself. This abuse makes things much more readable, and should be acceptable for the moment. The cases where ambiguity actually arises should be rare. *) Printf.bprintf b "[%s:%a]" (Nonterminal.print false (Production.nt prod)) pcsts csts | CstError -> (* An error leaf is denoted by [error]. *) Printf.bprintf b "error" and pcsts b (csts : cst array) = Array.iter (fun cst -> Printf.bprintf b " %a" pcst cst ) csts (* This is the public interface. *) let wrap print f x = let b = Buffer.create 32768 in print b x; Buffer.output_buffer f b let print = wrap pcst (* This is a pretty-printer for concrete syntax trees. The notation is the same as that used by the above printer; the only difference is that the [Pprint] library is used to manage indentation. *) open Pprint let rec build : cst -> document = function | CstTerminal tok -> text (Terminal.print tok) | CstNonTerminal (prod, csts) -> brackets ( group ( text (Nonterminal.print false (Production.nt prod)) ^^ colon ^^ group ( nest 2 ( Array.fold_left (fun doc cst -> doc ^^ break1 ^^ build cst ) empty csts ) ) ^^ break0 ) ) | CstError -> text "error" let show f cst = Channel.pretty 0.8 80 f (build cst) menhir-20130116/src/installation.mli0000644000175000017500000000250012075533603016257 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module defines a number of installation settings. Its source code is generated by the main [Makefile]. *) (* The directory where Menhir's standard library, [standard.mly], is installed. *) val libdir: string (* Whether MenhirLib was installed via [ocamlfind] or (manually) in the above directory. *) val ocamlfind: bool menhir-20130116/src/inliner.ml0000644000175000017500000002032312075533603015050 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open IL open CodeBits (* In the following, we only inline global functions. In order to avoid unintended capture, as we traverse terms, we keep track of local identifiers that hide global ones. The following little class helps do that. (The pathological case where a local binding hides a global one probably does not arise very often. Fortunately, checking against it in this way is quite cheap, and lets me sleep safely.) *) class locals table = object(self) method pvar (locals : StringSet.t) (id : string) = if Hashtbl.mem table id then StringSet.add id locals else locals end (* Here is the inliner. *) let inline ({ valdefs = defs } as p : program) = (* Create a table of all global definitions. *) let before, table = Traverse.tabulate_defs defs in (* Prepare to count how many times each function is used, including inside its own definition. The public functions serve as starting points for this discovery phase. *) let queue : valdef Queue.t = Queue.create() and usage : int StringMap.t ref = ref StringMap.empty in (* [visit] is called at every identifier occurrence. *) let visit locals id = if StringSet.mem id locals then (* This is a local identifier. Do nothing. *) () else try let _, def = Hashtbl.find table id in (* This is a globally defined identifier. Increment its usage count. If it was never visited, enqueue its definition for exploration. *) let n = try StringMap.find id !usage with Not_found -> Queue.add def queue; 0 in usage := StringMap.add id (n + 1) !usage with Not_found -> (* This identifier is not global. It is either local or a reference to some external library, e.g. ocaml's standard library. *) () in (* Look for occurrences of identifiers inside expressions. *) let o = object inherit [ StringSet.t, unit ] Traverse.fold inherit locals table method evar locals () id = visit locals id end in (* Initialize the queue with all public definitions, and work from there. We assume that the left-hand side of every definition is a variable. *) List.iter (fun { valpublic = public; valpat = p } -> if public then visit StringSet.empty (pat2var p) ) defs; Misc.qfold (o#valdef StringSet.empty) () queue; let usage = !usage in (* Now, inline every function that is called at most once. At the same time, every function that is never called is dropped. The public functions again serve as starting points for the traversal. *) let queue : valdef Queue.t = Queue.create() and emitted = ref StringSet.empty in let enqueue def = let id = pat2var def.valpat in if not (StringSet.mem id !emitted) then begin emitted := StringSet.add id !emitted; Queue.add def queue end in (* A simple application is an application of a variable to a number of variables, constants, or record accesses out of variables. *) let rec is_simple_arg = function | EVar _ | EData (_, []) | ERecordAccess (EVar _, _) -> true | EMagic e -> is_simple_arg e | _ -> false in let is_simple_app = function | EApp (EVar _, actuals) -> List.for_all is_simple_arg actuals | _ -> false in (* Taking a fresh instance of a type scheme. Ugly. *) let instance = let count = ref 0 in let fresh tv = incr count; tv, Printf.sprintf "freshtv%d" !count in fun scheme -> let mapping = List.map fresh scheme.quantifiers in let rec sub typ = match typ with | TypTextual _ -> typ | TypVar v -> begin try TypVar (List.assoc v mapping) with Not_found -> typ end | TypApp (f, typs) -> TypApp (f, List.map sub typs) | TypTuple typs -> TypTuple (List.map sub typs) | TypArrow (typ1, typ2) -> TypArrow (sub typ1, sub typ2) in sub scheme.body in (* Destructuring a type annotation. *) let rec annotate formals body typ = match formals, typ with | [], _ -> [], EAnnot (body, type2scheme typ) | formal :: formals, TypArrow (targ, tres) -> let formals, body = annotate formals body tres in PAnnot (formal, targ) :: formals, body | _ :: _, _ -> (* Type annotation has insufficient arity. *) assert false in (* The heart of the inliner: rewriting a function call to a [let] expression. If there was a type annotation at the function definition site, it is dropped, provided [--infer] was enabled. Otherwise, it is kept, because, due to the presence of [EMagic] expressions in the code, dropping a type annotation could cause an ill-typed program to become apparently well-typed. Keeping a type annotation requires taking a fresh instance of the type scheme, because OCaml doesn't have support for locally and existentially bound type variables. Yuck. *) let inline formals actuals body oscheme = assert (List.length actuals = List.length formals); match oscheme with | Some scheme when not Settings.infer -> let formals, body = annotate formals body (instance scheme) in mlet formals actuals body | _ -> mlet formals actuals body in (* Look for occurrences of identifiers inside expressions, branches, etc. and replace them with their definitions if they have only one use site or if their definitions are sufficiently simple. *) let o = object (self) inherit [ StringSet.t ] Traverse.map as super inherit locals table method eapp locals e actuals = match e with | EVar id when (Hashtbl.mem table id) && (* a global identifier *) (not (StringSet.mem id locals)) (* not hidden by a local identifier *) -> let _, def = Hashtbl.find table id in (* cannot fail, thanks to the above check *) let formals, body, oscheme = match def with | { valval = EFun (formals, body) } -> formals, body, None | { valval = EAnnot (EFun (formals, body), scheme) } -> formals, body, Some scheme | { valval = _ } -> (* The definition is not a function definition. This should not happen in the kind of code that we generate. *) assert false in assert (StringMap.mem id usage); if StringMap.find id usage = 1 || is_simple_app body then (* The definition can be inlined, with beta reduction. *) inline formals (self#exprs locals actuals) (EComment (id, self#expr locals body)) oscheme else begin (* The definition cannot be inlined. *) enqueue def; super#eapp locals e actuals end | _ -> (* The thing in function position is not a reference to a global. *) super#eapp locals e actuals end in (* Initialize the queue with all public definitions, and work from there. *) List.iter (function { valpublic = public } as def -> if public then enqueue def ) defs; let valdefs = Misc.qfold (fun defs def -> o#valdef StringSet.empty def :: defs ) [] queue in Error.logC 1 (fun f -> Printf.fprintf f "%d functions before inlining, %d functions after inlining.\n" before (List.length valdefs)); Time.tick "Inlining"; { p with valdefs = valdefs } (* The external entry point. *) let inline p = if Settings.code_inlining then inline p else p menhir-20130116/src/nonTerminalDefinitionInlining.ml0000644000175000017500000002113512075533603021401 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: nonTerminalDefinitionInlining.ml,v 1.17 2006/06/26 09:41:33 regisgia Exp $ *) open UnparameterizedSyntax open ListMonad (* This exception will be raised when a branch does not need inlining. *) exception NoInlining (* Color are used to detect cycles. *) type 'a color = | BeingExpanded | Expanded of 'a (* Inline a grammar. The resulting grammar does not contain any definitions that can be inlined. *) let inline grammar = let names producers = List.fold_left (fun s -> function (_, Some x) -> StringSet.add x s | _ -> s) StringSet.empty producers in (* This function returns a fresh name beginning with [prefix] and that is not in the set of names [names]. *) let rec fresh ?(c=0) names prefix = let name = prefix^string_of_int c in if StringSet.mem name names then fresh ~c:(c+1) names prefix else name in let use_inline = ref false in (* This table associates a color to each non terminal that can be expanded. *) let expanded_non_terminals = Hashtbl.create 13 in let expanded_state k = Hashtbl.find expanded_non_terminals k in let mark_as_being_expanded k = Hashtbl.add expanded_non_terminals k BeingExpanded in let mark_as_expanded k r = Hashtbl.replace expanded_non_terminals k (Expanded r); r in (* This function traverses the producers of the branch [b] and find the first non terminal that can be inlined. If it finds one, it inlines its branches into [b], that's why this function can return several branches. If it does not find one non terminal to be inlined, it raises [NoInlining]. *) let rec find_inline_producer b = let prefix, nt, p, psym, suffix = let rec chop_inline i (prefix, suffix) = match suffix with | [] -> raise NoInlining | ((nt, id) as x) :: xs -> try let r = StringMap.find nt grammar.rules in let id = match id with | None -> "_"^string_of_int i | Some id -> id in if r.inline_flag then (* We have to inline the rule [r] into [b] between [prefix] and [xs]. *) List.rev prefix, nt, r, id, xs else chop_inline (i + 1) (x :: prefix, xs) with Not_found -> chop_inline (i + 1) (x :: prefix, xs) in chop_inline 1 ([], b.producers) in prefix, expand_rule nt p, nt, psym, suffix (* We have to rename producers' names of the inlined production if they clashes with the producers' names of the branch into which we do the inlining. *) and rename_if_necessary b producers = (* First we compute the set of names already in use. *) let producers_names = names (b.producers @ producers) in (* Compute a renaming and the new inlined producers' names. *) let phi, producers' = List.fold_left (fun (phi, producers) -> function (p, Some x) -> if StringSet.mem x producers_names then let x' = fresh producers_names x in ((x, x') :: phi, (p, Some x') :: producers) else (phi, (p, Some x) :: producers) | p -> phi, p :: producers) ([], []) producers in phi, List.rev producers' (* Inline the non terminals that can be inlined in [b]. We use the ListMonad to combine the results. *) and expand_branch (b : branch) : branch ListMonad.m = try let prefix, p, nt, psym, suffix = find_inline_producer b in use_inline := true; if Action.use_dollar b.action then Error.error [ b.branch_position ] (Printf.sprintf "You cannot use %s and the $i syntax in this branch since the \ definition of %s has to be inlined." nt nt) else (* Inline a branch of [nt] at position [prefix] ... [suffix] in the branch [b]. *) let inline_branch pb = (* Rename the producers of this branch is they conflict with the name of the host's producers. *) let phi, inlined_producers = rename_if_necessary b pb.producers in (* Define the renaming environment given the shape of the branch. *) let renaming_env, prefix', suffix' = let start_position, prefix' = match List.rev prefix with (* If the prefix is empty, the start position is the rule start position. *) | [] -> (Keyword.Left, Keyword.WhereStart), prefix (* If the last producer of prefix is unnamed, we cannot refer to its position. We give it a name. *) | (p, None) :: ps -> let x = fresh (names (inlined_producers @ prefix @ suffix)) (CodeBits.prefix "p") in (Keyword.RightNamed x, Keyword.WhereEnd), List.rev ((p, Some x) :: ps) (* The last producer of prefix is named [x], $startpos in the inlined rule will be changed to $endpos(x). *) | (_, Some x) :: _ -> (Keyword.RightNamed x, Keyword.WhereEnd), prefix in (* Same thing for the suffix. *) let end_position, suffix' = match suffix with | [] -> (Keyword.Left, Keyword.WhereEnd), suffix | (p, None) :: ps -> let x = fresh (names (inlined_producers @ prefix' @ suffix)) (CodeBits.prefix "p") in ((Keyword.RightNamed x, Keyword.WhereStart), (p, Some x) :: ps) | (_, Some x) :: _ -> (Keyword.RightNamed x, Keyword.WhereStart), suffix in (psym, start_position, end_position), prefix', suffix' in (* Rename the host semantic action. Each reference of the inlined non terminal [psym] must be taken into account. $startpos(psym) is changed to $startpos(x) where [x] is the first producer of the inlined branch if it is not empty or the preceding producer found in the prefix. *) let outer_action, (used1, used2) = Action.rename_inlined_psym renaming_env [] b.action in let action', (used1', used2') = Action.rename renaming_env phi pb.action in let prefix = if used1 || used1' then prefix' else prefix in let suffix = if used2 || used2' then suffix' else suffix in { b with producers = prefix @ inlined_producers @ suffix; action = Action.compose psym action' outer_action } in List.map inline_branch p.branches >>= expand_branch with NoInlining -> return b (* Expand a rule if necessary. *) and expand_rule k r = try (match expanded_state k with | BeingExpanded -> Error.error r.positions (Printf.sprintf "there is a cycle in the definition of %s." k) | Expanded r -> r) with Not_found -> mark_as_being_expanded k; mark_as_expanded k { r with branches = r.branches >>= expand_branch } in (* We check that the %inline rules do not use $i syntax since expansion of $i is impossible. *) let _ = StringMap.iter (fun _ r -> if r.inline_flag && List.exists (fun b -> Action.use_dollar b.action) r.branches then Error.error r.positions (Printf.sprintf "You cannot use $i syntax in this branch since its \ definition will be inlined.")) grammar.rules in (* If we are in Coq mode, %inline is forbidden. *) let _ = if Settings.coq then StringMap.iter (fun _ r -> if r.inline_flag then Error.error r.positions (Printf.sprintf "%%inline is not supported by the coq back-end")) grammar.rules in (* To expand a grammar, we expand all its rules and remove the %inline rules. *) let expanded_rules = StringMap.mapi expand_rule grammar.rules and useful_types = StringMap.filter (fun k _ -> try not (StringMap.find k grammar.rules).inline_flag with Not_found -> true) grammar.types in { grammar with rules = StringMap.filter (fun _ r -> not r.inline_flag) expanded_rules; types = useful_types }, !use_inline menhir-20130116/src/traverse.ml0000644000175000017500000003332112075533603015245 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* Code for traversing or transforming [IL] terms. *) open IL open CodeBits (* This turns a list of value definitions into a hash table. It also counts and numbers the definitions. We assume that the left-hand side of every definition is a variable. *) let tabulate_defs (defs : valdef list) : int * (string, int * valdef) Hashtbl.t = let count = ref 0 in let table = Hashtbl.create 1023 in List.iter (fun def -> let k = !count in count := k + 1; Hashtbl.add table (pat2var def.valpat) (k, def) ) defs; !count, table (* This mixin class, used by [map] and [fold] below, helps maintain environments, which can be used to keep track of local variable bindings. *) class virtual ['env] env = object(self) (* The virtual method [pvar] records a local variable binding in the environment. *) method virtual pvar: 'env -> string -> 'env method pat env = function | PWildcard | PUnit -> env | PVar id -> self#pvar env id | PTuple ps | POr ps | PData (_, ps) -> self#pats env ps | PAnnot (p, _) -> self#pat env p | PRecord fps -> self#fpats env fps method pats env ps = List.fold_left self#pat env ps method fpats env fps = List.fold_left self#fpat env fps method fpat env (_, p) = self#pat env p end (* A class that helps transform expressions. The environment [env] can be used to keep track of local variable bindings. *) exception NoChange class virtual ['env] map = object (self) inherit ['env] env method expr (env : 'env) e = try match e with | EVar x -> self#evar env x | EFun (ps, e) -> self#efun env ps e | EApp (e, es) -> self#eapp env e es | ELet (bs, e) -> self#elet env bs e | EMatch (e, bs) -> self#ematch env e bs | EIfThen (e, e1) -> self#eifthen env e e1 | EIfThenElse (e, e1, e2) -> self#eifthenelse env e e1 e2 | ERaise e -> self#eraise env e | ETry (e, bs) -> self#etry env e bs | EUnit -> self#eunit env | EIntConst k -> self#eintconst env k | EMaxInt -> self#emaxint env | EStringConst s -> self#estringconst env s | EData (d, es) -> self#edata env d es | ETuple es -> self#etuple env es | EAnnot (e, t) -> self#eannot env e t | EMagic e -> self#emagic env e | ERepr _ -> self#erepr env e | ERecord fs -> self#erecord env fs | ERecordAccess (e, f) -> self#erecordaccess env e f | ERecordWrite (e, f, e1) -> self#erecordwrite env e f e1 | ETextual action -> self#etextual env action | EComment (s, e) -> self#ecomment env s e | EPatComment (s, p, e) -> self#epatcomment env s p e | EArray es -> self#earray env es | EArrayAccess (e, i) -> self#earrayaccess env e i with NoChange -> e method evar env x = raise NoChange method efun env ps e = let e' = self#expr (self#pats env ps) e in if e == e' then raise NoChange else EFun (ps, e') method eapp env e es = let e' = self#expr env e and es' = self#exprs env es in if e == e' && es == es' then raise NoChange else EApp (e', es') method elet env bs e = let env, bs' = self#bindings env bs in let e' = self#expr env e in if bs == bs' && e == e' then raise NoChange else ELet (bs', e') method ematch env e bs = let e' = self#expr env e and bs' = self#branches env bs in if e == e' && bs == bs' then raise NoChange else EMatch (e', bs') method eifthen env e e1 = let e' = self#expr env e and e1' = self#expr env e1 in if e == e' && e1 == e1' then raise NoChange else EIfThen (e', e1') method eifthenelse env e e1 e2 = let e' = self#expr env e and e1' = self#expr env e1 and e2' = self#expr env e2 in if e == e' && e1 == e1' && e2 == e2' then raise NoChange else EIfThenElse (e', e1', e2') method eraise env e = let e' = self#expr env e in if e == e' then raise NoChange else ERaise e' method etry env e bs = let e' = self#expr env e and bs' = self#branches env bs in if e == e' && bs == bs' then raise NoChange else ETry (e', bs') method eunit env = raise NoChange method eintconst env k = raise NoChange method emaxint env = raise NoChange method estringconst env s = raise NoChange method edata env d es = let es' = self#exprs env es in if es == es' then raise NoChange else EData (d, es') method etuple env es = let es' = self#exprs env es in if es == es' then raise NoChange else ETuple es' method eannot env e t = let e' = self#expr env e in if e == e' then raise NoChange else EAnnot (e', t) method emagic env e = let e' = self#expr env e in if e == e' then raise NoChange else EMagic e' method erepr env e = let e' = self#expr env e in if e == e' then raise NoChange else ERepr e' method erecord env fs = let fs' = self#fields env fs in if fs == fs' then raise NoChange else ERecord fs' method erecordaccess env e f = let e' = self#expr env e in if e == e' then raise NoChange else ERecordAccess (e', f) method erecordwrite env e f e1 = let e' = self#expr env e and e1' = self#expr env e1 in if e == e' && e1 == e1' then raise NoChange else ERecordWrite (e', f, e1') method earray env es = let es' = self#exprs env es in if es == es' then raise NoChange else EArray es' method earrayaccess env e i = let e' = self#expr env e in if e == e' then raise NoChange else EArrayAccess (e', i) method etextual env action = raise NoChange method ecomment env s e = let e' = self#expr env e in if e == e' then raise NoChange else EComment (s, e') method epatcomment env s p e = let e' = self#expr env e in if e == e' then raise NoChange else EPatComment (s, p, e') method exprs env es = Misc.smap (self#expr env) es method fields env fs = Misc.smap (self#field env) fs method field env ((f, e) as field) = let e' = self#expr env e in if e == e' then field else (f, e') method branches env bs = Misc.smap (self#branch env) bs method branch env b = let e = b.branchbody in let e' = self#expr (self#pat env b.branchpat) e in if e == e' then b else { b with branchbody = e' } (* The method [binding] produces a pair of an updated environment and a transformed binding. *) method binding env ((p, e) as b) = let e' = self#expr env e in self#pat env p, if e == e' then b else (p, e') (* For nested non-recursive bindings, the environment produced by each binding is used to traverse the following bindings. The method [binding] produces a pair of an updated environment and a transformed list of bindings. *) method bindings env bs = Misc.smapa self#binding env bs method valdef env def = let e = def.valval in let e' = self#expr env e in if e == e' then def else { def with valval = e' } method valdefs env defs = Misc.smap (self#valdef env) defs end (* A class that helps iterate, or fold, over expressions. *) class virtual ['env, 'a] fold = object (self) inherit ['env] env method expr (env : 'env) (accu : 'a) e = match e with | EVar x -> self#evar env accu x | EFun (ps, e) -> self#efun env accu ps e | EApp (e, es) -> self#eapp env accu e es | ELet (bs, e) -> self#elet env accu bs e | EMatch (e, bs) -> self#ematch env accu e bs | EIfThen (e, e1) -> self#eifthen env accu e e1 | EIfThenElse (e, e1, e2) -> self#eifthenelse env accu e e1 e2 | ERaise e -> self#eraise env accu e | ETry (e, bs) -> self#etry env accu e bs | EUnit -> self#eunit env accu | EIntConst k -> self#eintconst env accu k | EMaxInt -> self#emaxint env accu | EStringConst s -> self#estringconst env accu s | EData (d, es) -> self#edata env accu d es | ETuple es -> self#etuple env accu es | EAnnot (e, t) -> self#eannot env accu e t | EMagic e -> self#emagic env accu e | ERepr _ -> self#erepr env accu e | ERecord fs -> self#erecord env accu fs | ERecordAccess (e, f) -> self#erecordaccess env accu e f | ERecordWrite (e, f, e1) -> self#erecordwrite env accu e f e1 | ETextual action -> self#etextual env accu action | EComment (s, e) -> self#ecomment env accu s e | EPatComment (s, p, e) -> self#epatcomment env accu s p e | EArray es -> self#earray env accu es | EArrayAccess (e, i) -> self#earrayaccess env accu e i method evar (env : 'env) (accu : 'a) x = accu method efun (env : 'env) (accu : 'a) ps e = let accu = self#expr (self#pats env ps) accu e in accu method eapp (env : 'env) (accu : 'a) e es = let accu = self#expr env accu e in let accu = self#exprs env accu es in accu method elet (env : 'env) (accu : 'a) bs e = let env, accu = self#bindings env accu bs in let accu = self#expr env accu e in accu method ematch (env : 'env) (accu : 'a) e bs = let accu = self#expr env accu e in let accu = self#branches env accu bs in accu method eifthen (env : 'env) (accu : 'a) e e1 = let accu = self#expr env accu e in let accu = self#expr env accu e1 in accu method eifthenelse (env : 'env) (accu : 'a) e e1 e2 = let accu = self#expr env accu e in let accu = self#expr env accu e1 in let accu = self#expr env accu e2 in accu method eraise (env : 'env) (accu : 'a) e = let accu = self#expr env accu e in accu method etry (env : 'env) (accu : 'a) e bs = let accu = self#expr env accu e in let accu = self#branches env accu bs in accu method eunit (env : 'env) (accu : 'a) = accu method eintconst (env : 'env) (accu : 'a) k = accu method emaxint (env : 'env) (accu : 'a) = accu method estringconst (env : 'env) (accu : 'a) s = accu method edata (env : 'env) (accu : 'a) d es = let accu = self#exprs env accu es in accu method etuple (env : 'env) (accu : 'a) es = let accu = self#exprs env accu es in accu method eannot (env : 'env) (accu : 'a) e t = let accu = self#expr env accu e in accu method emagic (env : 'env) (accu : 'a) e = let accu = self#expr env accu e in accu method erepr (env : 'env) (accu : 'a) e = let accu = self#expr env accu e in accu method erecord (env : 'env) (accu : 'a) fs = let accu = self#fields env accu fs in accu method erecordaccess (env : 'env) (accu : 'a) e f = let accu = self#expr env accu e in accu method erecordwrite (env : 'env) (accu : 'a) e f e1 = let accu = self#expr env accu e in let accu = self#expr env accu e1 in accu method earray (env : 'env) (accu : 'a) es = let accu = self#exprs env accu es in accu method earrayaccess (env : 'env) (accu : 'a) e i = let accu = self#expr env accu e in accu method etextual (env : 'env) (accu : 'a) action = accu method ecomment (env : 'env) (accu : 'a) s e = let accu = self#expr env accu e in accu method epatcomment (env : 'env) (accu : 'a) s p e = let accu = self#expr env accu e in accu method exprs (env : 'env) (accu : 'a) es = List.fold_left (self#expr env) accu es method fields (env : 'env) (accu : 'a) fs = List.fold_left (self#field env) accu fs method field (env : 'env) (accu : 'a) (f, e) = let accu = self#expr env accu e in accu method branches (env : 'env) (accu : 'a) bs = List.fold_left (self#branch env) accu bs method branch (env : 'env) (accu : 'a) b = let accu = self#expr (self#pat env b.branchpat) accu b.branchbody in accu method binding ((env, accu) : 'env * 'a) (p, e) = let accu = self#expr env accu e in self#pat env p, accu method bindings (env : 'env) (accu : 'a) bs = List.fold_left self#binding (env, accu) bs method valdef (env : 'env) (accu : 'a) def = let accu = self#expr env accu def.valval in accu method valdefs (env : 'env) (accu : 'a) defs = List.fold_left (self#valdef env) accu defs end menhir-20130116/src/action.mli0000644000175000017500000000645612075533603015051 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: action.mli,v 1.8 2006/06/26 09:41:33 regisgia Exp $ *) (** Semantic action's type. *) type t (** [compose x a1 a2] builds the action [let x = a1 in a2]. This feature is used during the processing of the %inline keyword. *) val compose : string -> t -> t -> t (** [rename renaming_env phi a] builds the action [let x1 = x1' and ... xn = xn' in a] if [phi] is [(x1, x1') ... (xn, xn')]. Moreover, [renaming_env] is used to correctly replace $startpos/$endpos present in the semantic action. *) val rename: string * (Keyword.subject * Keyword.where) * (Keyword.subject * Keyword.where) -> (string * string) list -> t -> t * (bool * bool) (** [rename_inlined_psym renaming_env phi a] updates the occurrences of the inlined non terminal in the action [a]. *) val rename_inlined_psym: string * (Keyword.subject * Keyword.where) * (Keyword.subject * Keyword.where) -> (string * string) list -> t -> t * (bool * bool) (** Semantic actions are translated into [IL] code using the [IL.ETextual] and [IL.ELet] constructors. *) val to_il_expr: t -> IL.expr (** A semantic action might be the inlining of several others. The filenames of the different parts are given by [filenames a]. This can be used, for instance, to check whether all parts come from the standard library. *) val filenames: t -> string list (** [pkeywords a] returns a list of all keyword occurrences in [a]. *) val pkeywords: t -> Keyword.keyword Positions.located list (** [keywords a] is the set of keywords used in the semantic action [a]. *) val keywords: t -> Keyword.KeywordSet.t (** [print f a] prints [a] to channel [f]. *) val print: out_channel -> t -> unit (** [from_stretch s] builds an action out of a textual piece of code. *) val from_stretch: Stretch.t -> t (** Check whether the keyword $previouserror is used in the action. *) val has_previouserror: t -> bool (** Check whether the keyword $syntaxerror is used in the action. *) val has_syntaxerror: t -> bool (** Check whether the keyword $start is used in the action. *) val has_leftstart: t -> bool (** Check whether the keyword $end is used in the action. *) val has_leftend: t -> bool (** Check whether a particular $i keyword is used in the action. *) val has_dollar: int -> t -> bool (** Check whether any $i keyword is used in the action. *) val use_dollar: t -> bool menhir-20130116/src/back.ml0000644000175000017500000000510512075533603014311 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* Driver for the back-end. *) open UnparameterizedSyntax module I = Interpret (* artificial dependency; ensures that [Interpret] runs first *) (* Define an .ml file writer . *) let write program = let module P = Printer.Make (struct let filename = Settings.base ^ ".ml" let f = open_out filename let locate_stretches = if Settings.infer then (* Typechecking should not fail at this stage. Omit #line directives. *) None else (* 2011/10/19: do not use [Filename.basename]. The [#] annotations that we insert in the [.ml] file must retain their full path. This does mean that the [#] annotations depend on how menhir is invoked -- e.g. [menhir foo/bar.mly] and [cd foo && menhir bar.mly] will produce different files. Nevertheless, this seems useful/reasonable. *) Some filename let raw_stretch_action = false end) in P.program program (* Construct the code, using either the table-based or the code-based back-end, and pass it on to the printer. (This continuation-passing style is imposed by the fact that there is no conditional in ocaml's module language.) *) let () = if Settings.coq then let module B = CoqBackend.Run (struct end) in let filename = Settings.base ^ ".v" in let f = open_out filename in B.write_all f; exit 0 else if Settings.table then let module B = TableBackend.Run (struct end) in write B.program else let module B = CodeBackend.Run (struct end) in write (Inliner.inline B.program) (* Write the interface file. *) let () = Interface.write() let () = Time.tick "Printing" menhir-20130116/src/pprint.ml0000644000175000017500000006746112075533603014742 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This is an adaptation of Daan Leijen's [PPrint] library, which itself is based on the ideas developed by Philip Wadler in ``A Prettier Printer''. For more information, see: http://www.cs.uu.nl/~daan/pprint.html http://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf *) (* ------------------------------------------------------------------------- *) (* A uniform interface for output channels. *) module type OUTPUT = sig type channel val char: channel -> char -> unit val substring: channel -> string -> int (* offset *) -> int (* length *) -> unit end (* ------------------------------------------------------------------------- *) (* Two implementations of the above interface, respectively based on output channels and memory buffers. This compensates for the fact that ocaml's standard library does not allow creating an output channel out of a memory buffer (a regrettable omission). *) module ChannelOutput : OUTPUT with type channel = out_channel = struct type channel = out_channel let char = output_char let substring = output end module BufferOutput : OUTPUT with type channel = Buffer.t = struct type channel = Buffer.t let char = Buffer.add_char let substring = Buffer.add_substring end (* ------------------------------------------------------------------------- *) (* Here is the algebraic data type of documents. It is analogous to Daan Leijen's version, but the binary constructor [Union] is replaced with the unary constructor [Group], and the constant [Line] is replaced with more general constructions, namely [IfFlat], which provides alternative forms depending on the current flattening mode, and [HardLine], which represents a newline character, and is invalid in flattening mode. *) type document = (* [Empty] is the empty document. *) | Empty (* [Char c] is a document that consists of the single character [c]. We enforce the invariant that [c] is not a newline character. *) | Char of char (* [String (s, ofs, len)] is a document that consists of the portion of the string [s] delimited by the offset [ofs] and the length [len]. We assume, but do not check, that this portion does not contain a newline character. *) | String of string * int * int (* [Blank n] is a document that consists of [n] blank characters. *) | Blank of int (* When in flattening mode, [IfFlat (d1, d2)] turns into the document [d1]. When not in flattening mode, it turns into the document [d2]. *) | IfFlat of document * document (* When in flattening mode, [HardLine] is illegal. When not in flattening mode, it represents a newline character, followed with an appropriate number of indentation. A safe way of using [HardLine] is to only use it directly within the right branch of an [IfFlat] construct. *) | HardLine (* [Cat doc1 doc2] is the concatenation of the documents [doc1] and [doc2]. *) | Cat of document * document (* [Nest (j, doc)] is the document [doc], in which the indentation level has been increased by [j], that is, in which [j] blanks have been inserted after every newline character. *) | Nest of int * document (* [Group doc] represents an alternative: it is either a flattened form of [doc], in which occurrences of [Group] disappear and occurrences of [IfFlat] resolve to their left branch, or [doc] itself. *) | Group of document (* [Column f] is the document obtained by applying [f] to the current column number. *) | Column of (int -> document) (* [Nesting f] is the document obtained by applying [f] to the current indentation level, that is, the number of blanks that were printed at the beginning of the current line. *) | Nesting of (int -> document) (* ------------------------------------------------------------------------- *) (* A signature for document renderers. *) module type RENDERER = sig (* Output channels. *) type channel (* [pretty rfrac width channel document] pretty-prints the document [document] to the output channel [channel]. The parameter [width] is the maximum number of characters per line. The parameter [rfrac] is the ribbon width, a fraction relative to [width]. The ribbon width is the maximum number of non-indentation characters per line. *) val pretty: float -> int -> channel -> document -> unit (* [compact channel document] prints the document [document] to the output channel [channel]. No indentation is used. All newline instructions are respected, that is, no groups are flattened. *) val compact: channel -> document -> unit end (* ------------------------------------------------------------------------- *) (* The pretty rendering algorithm: preliminary declarations. *) (* The renderer is supposed to behave exactly like Daan Leijen's, although its implementation is quite radically different. Instead of relying on Haskell's lazy evaluation mechanism, we implement an abstract machine with mutable current state, forking, backtracking (via an explicit stack of choice points), and cut (disposal of earlier choice points). *) (* The renderer's input consists of an ordered sequence of documents. Each document carries an extra indentation level, akin to an implicit [Nest] constructor, and a ``flattening'' flag, which, if set, means that this document should be printed in flattening mode. *) (* An alternative coding style would be to avoid decorating each input document with an indentation level and a flattening mode, and allow the input sequence to contain instructions that set the current nesting level or reset the flattening mode. That would perhaps be slightly more readable, and slightly less efficient. *) type input = | INil | ICons of int * bool * document * input (* When possible (that is, when the stack is empty), the renderer writes directly to the output channel. Otherwise, output is buffered until either a failure point is reached (then, the buffered output is discarded) or a cut is reached (then, all buffered output is committed to the output channel). At all times, the length of the buffered output is at most one line. *) (* The buffered output consists of a list of characters and strings. It is stored in reverse order (the head of the list should be printed last). *) type output = | OEmpty | OChar of char * output | OString of string * int * int * output | OBlank of int * output (* The renderer maintains the following state record. For efficiency, the record is mutable; it is copied when the renderer forks, that is, at choice points. *) type 'channel state = { (* The line width and ribbon width. *) width: int; ribbon: int; (* The output channel. *) channel: 'channel; (* The current indentation level. This is the number of blanks that were printed at the beginning of the current line. *) mutable indentation: int; (* The current column. *) mutable column: int; (* The renderer's input. For efficiency, the input is assumed to never be empty, and the leading [ICons] constructor is inlined within the state record. In other words, the fields [nest1], [flatten1], and [input1] concern the first input document, and the field [input] contains the rest of the input sequence. *) mutable indent1: int; mutable flatten1: bool; mutable input1: document; mutable input: input; (* The renderer's buffer output. *) mutable output: output; } (* The renderer maintains a stack of resumptions, that is, states in which execution should be resumed if the current thread of execution fails by lack of space on the current line. *) (* It is not difficult to prove that the stack is empty if and only if flattening mode is off. Furthermore, when flattening mode is on, all groups are ignored, so no new choice points are pushed onto the stack. As a result, the stack has height one at most at all times, so that the stack height is zero when flattening mode is off and one when flattening mode is on. *) type 'channel stack = 'channel state list (* ------------------------------------------------------------------------- *) (* The pretty rendering algorithm: code. *) (* The renderer is parameterized over an implementation of output channels. *) module Renderer (Output : OUTPUT) = struct type channel = Output.channel (* Printing blank space (indentation characters). *) let blank_length = 80 let blank_buffer = String.make blank_length ' ' let rec blanks channel n = if n <= 0 then () else if n <= blank_length then Output.substring channel blank_buffer 0 n else begin Output.substring channel blank_buffer 0 blank_length; blanks channel (n - blank_length) end (* Committing buffered output to the output channel. The list is printed in reverse order. The code is not tail recursive, but there is no risk of stack overflow, since the length of the buffered output cannot exceed one line. *) let rec commit channel = function | OEmpty -> () | OChar (c, output) -> commit channel output; Output.char channel c | OString (s, ofs, len, output) -> commit channel output; Output.substring channel s ofs len | OBlank (n, output) -> commit channel output; blanks channel n (* The renderer's abstract machine. *) (* The procedures [run], [shift], [emit_char], [emit_string], and [emit_blanks] are mutually recursive, and are tail recursive. They maintain a stack and a current state. The states in the stack, and the current state, are pairwise distinct, so that the current state can be mutated without affecting the contents of the stack. *) (* An invariant is: the buffered output is nonempty only when the stack is nonempty. The contrapositive is: if the stack is empty, then the buffered output is empty. Indeed, the fact that the stack is empty means that no choices were made, so we are not in a speculative mode of execution: as a result, all output can be sent directly to the output channel. On the contrary, when the stack is nonempty, there is a possibility that we might backtrack in the future, so all output should be held in a buffer. *) (* [run] is allowed to call itself recursively only when no material is printed. In that case, the check for failure is skipped -- indeed, this test is performed only within [shift]. *) let rec run (stack : channel stack) (state : channel state) : unit = (* Examine the first piece of input, as well as (in some cases) the current flattening mode. *) match state.input1, state.flatten1 with (* The first piece of input is an empty document. Discard it and continue. *) | Empty, _ -> shift stack state (* The first piece of input is a character. Emit it and continue. *) | Char c, _ -> emit_char stack state c (* The first piece of input is a string. Emit it and continue. *) | String (s, ofs, len), _ -> emit_string stack state s ofs len | Blank n, _ -> emit_blanks stack state n (* The first piece of input is a hard newline instruction. Such an instruction is valid only when flattening mode is off. *) (* We emit a newline character, followed by the prescribed amount of indentation. We update the current state to record how many indentation characters were printed and to to reflect the new column number. Then, we discard the current piece of input and continue. *) | HardLine, flattening -> assert (not flattening); (* flattening mode must be off. *) assert (stack = []); (* since flattening mode is off, the stack must be empty. *) Output.char state.channel '\n'; let i = state.indent1 in blanks state.channel i; state.column <- i; state.indentation <- i; shift stack state (* The first piece of input is an [IfFlat] conditional instruction. *) | IfFlat (doc, _), true | IfFlat (_, doc), false -> state.input1 <- doc; run stack state (* The first piece of input is a concatenation operator. We take it apart and queue both documents in the input sequence. *) | Cat (doc1, doc2), _ -> state.input1 <- doc1; state.input <- ICons (state.indent1, state.flatten1, doc2, state.input); run stack state (* The first piece of input is a [Nest] operator. We increase the amount of indentation to be applied to the first input document. *) | Nest (j, doc), _ -> state.indent1 <- state.indent1 + j; state.input1 <- doc; run stack state (* The first piece of input is a [Group] operator, and flattening mode is currently off. This introduces a choice point: either we flatten this whole group, or we don't. We try the former possibility first: this is done by enabling flattening mode. Should this avenue fail, we push the current state, in which flattening mode is disabled, onto the stack. *) (* Note that the current state is copied before continuing, so that the state that is pushed on the stack is not affected by future modifications. This is a fork. *) | Group doc, false -> state.input1 <- doc; run (state :: stack) { state with flatten1 = true } (* The first piece of input is a [Group] operator, and flattening mode is currently on. The operator is ignored. *) | Group doc, true -> state.input1 <- doc; run stack state (* The first piece of input is a [Column] operator. The current column is fed into it, so as to produce a document, with which we continue. *) | Column f, _ -> state.input1 <- f state.column; run stack state (* The first piece of input is a [Column] operator. The current indentation level is fed into it, so as to produce a document, with which we continue. *) | Nesting f, _ -> state.input1 <- f state.indentation; run stack state (* [shift] discards the first document in the input sequence, so that the second input document, if there is one, becomes first. The renderer stops if there is none. *) and shift stack state = assert (state.output = OEmpty || stack <> []); assert (state.flatten1 = (stack <> [])); (* If the stack is nonempty and we have exceeded either the width or the ribbon width parameters, then fail. Backtracking is implemented by discarding the current state, popping a state off the stack, and making it the current state. *) match stack with | resumption :: stack when state.column > state.width || state.column - state.indentation > state.ribbon -> run stack resumption | _ -> match state.input with | INil -> (* End of input. Commit any buffered output and stop. *) commit state.channel state.output | ICons (indent, flatten, head, tail) -> (* There is an input document. Move it one slot ahead and check if we are leaving flattening mode. *) state.indent1 <- indent; state.input1 <- head; state.input <- tail; if state.flatten1 && not flatten then begin (* Leaving flattening mode means success: we have flattened a certain group, and fitted it all on a line, without reaching a failure point. We would now like to commit our decision to flatten this group. This is a Prolog cut. We discard the stack of choice points, replacing it with an empty stack, and commit all buffered output. *) state.flatten1 <- flatten; (* false *) commit state.channel state.output; state.output <- OEmpty; run [] state end else run stack state (* [emit_char] prints a character (either to the output channel or to the output buffer), increments the current column, discards the first piece of input, and continues. *) and emit_char stack state c = begin match stack with | [] -> Output.char state.channel c | _ -> state.output <- OChar (c, state.output) end; state.column <- state.column + 1; shift stack state (* [emit_string] prints a string (either to the output channel or to the output buffer), updates the current column, discards the first piece of input, and continues. *) and emit_string stack state s ofs len = begin match stack with | [] -> Output.substring state.channel s ofs len | _ -> state.output <- OString (s, ofs, len, state.output) end; state.column <- state.column + len; shift stack state (* [emit_blanks] prints a blank string (either to the output channel or to the output buffer), updates the current column, discards the first piece of input, and continues. *) and emit_blanks stack state n = begin match stack with | [] -> blanks state.channel n | _ -> state.output <- OBlank (n, state.output) end; state.column <- state.column + n; shift stack state (* This is the renderer's main entry point. *) let pretty rfrac width channel document = run [] { width = width; ribbon = max 0 (min width (truncate (float_of_int width *. rfrac))); channel = channel; indentation = 0; column = 0; indent1 = 0; flatten1 = false; input1 = document; input = INil; output = OEmpty; } (* ------------------------------------------------------------------------- *) (* The compact rendering algorithm. *) let compact channel document = let column = ref 0 in let rec scan = function | Empty -> () | Char c -> Output.char channel c; column := !column + 1 | String (s, ofs, len) -> Output.substring channel s ofs len; column := !column + len | Blank n -> blanks channel n; column := !column + n | HardLine -> Output.char channel '\n'; column := 0 | Cat (doc1, doc2) -> scan doc1; scan doc2 | IfFlat (doc, _) | Nest (_, doc) | Group doc -> scan doc | Column f -> scan (f !column) | Nesting f -> scan (f 0) in scan document end (* ------------------------------------------------------------------------- *) (* Instantiating the renderers for the two kinds of output channels. *) module Channel = Renderer(ChannelOutput) module Buffer = Renderer(BufferOutput) (* ------------------------------------------------------------------------- *) (* Constructors. *) let empty = Empty let (^^) x y = match x, y with | Empty, x | x, Empty -> x | _, _ -> Cat (x, y) let ifflat doc1 doc2 = IfFlat (doc1, doc2) let hardline = HardLine let char c = assert (c <> '\n'); Char c let substring s ofs len = if len = 0 then Empty else String (s, ofs, len) let text s = substring s 0 (String.length s) let blank n = if n = 0 then Empty else Blank n let nest i x = assert (i >= 0); Nest (i, x) let column f = Column f let nesting f = Nesting f let group x = Group x (* ------------------------------------------------------------------------- *) (* Low-level combinators for alignment and indentation. *) let align d = column (fun k -> nesting (fun i -> nest (k - i) d ) ) let hang i d = align (nest i d) let indent i d = hang i (blank i ^^ d) (* ------------------------------------------------------------------------- *) (* High-level combinators. *) let lparen = char '(' let rparen = char ')' let langle = char '<' let rangle = char '>' let lbrace = char '{' let rbrace = char '}' let lbracket = char '[' let rbracket = char ']' let squote = char '\'' let dquote = char '"' let bquote = char '`' let semi = char ';' let colon = char ':' let comma = char ',' let space = char ' ' let dot = char '.' let sharp = char '#' let backslash = char '\\' let equals = char '=' let qmark = char '?' let tilde = char '~' let at = char '@' let percent = char '%' let dollar = char '$' let caret = char '^' let ampersand = char '&' let star = char '*' let plus = char '+' let minus = char '-' let underscore = char '_' let bang = char '!' let bar = char '|' let break i = ifflat (text (String.make i ' ')) hardline let break0 = ifflat empty hardline let break1 = ifflat space hardline let string s = let n = String.length s in let rec chop i = try let j = String.index_from s i '\n' in substring s i (j - i) ^^ break1 ^^ chop (j + 1) with Not_found -> substring s i (n - i) in chop 0 let group_break1 = group break1 let words s = let n = String.length s in let rec blank accu i = (* we have skipped over at least one blank character *) if i = n then accu ^^ group_break1 else match s.[i] with | ' ' | '\t' | '\n' | '\r' -> blank accu (i + 1) | _ -> word break1 accu i (i + 1) and word prefix accu i j = (* we have skipped over at least one non-blank character *) if j = n then accu ^^ group (prefix ^^ substring s i (j - i)) else match s.[j] with | ' ' | '\t' | '\n' | '\r' -> blank (accu ^^ group (prefix ^^ substring s i (j - i))) (j + 1) | _ -> word prefix accu i (j + 1) in if n = 0 then empty else match s.[0] with | ' ' | '\t' | '\n' | '\r' -> blank empty 1 | _ -> word empty empty 0 1 let enclose l r x = l ^^ x ^^ r let squotes = enclose squote squote let dquotes = enclose dquote dquote let bquotes = enclose bquote bquote let braces = enclose lbrace rbrace let parens = enclose lparen rparen let angles = enclose langle rangle let brackets = enclose lbracket rbracket let fold f docs = List.fold_right f docs empty let rec fold1 f docs = match docs with | [] -> empty | [ doc ] -> doc | doc :: docs -> f doc (fold1 f docs) let rec fold1map f g docs = match docs with | [] -> empty | [ doc ] -> g doc | doc :: docs -> let doc = g doc in (* force left-to-right evaluation *) f doc (fold1map f g docs) let sepmap sep g docs = fold1map (fun x y -> x ^^ sep ^^ y) g docs let optional f = function | None -> empty | Some x -> f x let group1 d = group (nest 1 d) let group2 d = group (nest 2 d) module Operators = struct let ( !^ ) = text let ( ^^ ) = ( ^^ ) let ( ^/^ ) x y = x ^^ break1 ^^ y let ( ^//^ ) x y = group (x ^^ nest 2 (break1 ^^ y)) let ( ^@^ ) x y = group (x ^^ break1 ^^ y) let ( ^@@^ ) x y = group2 (x ^^ break1 ^^ y) end open Operators let prefix op x = !^op ^//^ x let infix op x y = (x ^^ space ^^ !^op) ^//^ y let infix_dot op x y = group2 ((x ^^ !^op) ^^ break0 ^^ y) let infix_com op x y = x ^^ !^op ^^ group_break1 ^^ y let surround n sep open_doc contents close_doc = group (open_doc ^^ nest n (sep ^^ contents) ^^ sep ^^ close_doc) let surround1 open_txt contents close_txt = surround 1 break0 !^open_txt contents !^close_txt let surround2 open_txt contents close_txt = surround 2 break1 !^open_txt contents !^close_txt let soft_surround n sep open_doc contents close_doc = group (open_doc ^^ nest n (group sep ^^ contents) ^^ group (sep ^^ close_doc)) let seq indent break empty_seq open_seq sep_seq close_seq = function | [] -> empty_seq | xs -> surround indent break open_seq (fold1 (fun x xs -> x ^^ sep_seq ^^ xs) xs) close_seq let seq1 open_txt sep_txt close_txt = seq 1 break0 !^(open_txt ^ close_txt) !^open_txt (!^sep_txt ^^ break1) !^close_txt let seq2 open_txt sep_txt close_txt = seq 2 break1 !^(open_txt ^ close_txt) !^open_txt (!^sep_txt ^^ break1) !^close_txt let sprintf fmt = Printf.ksprintf string fmt (* A signature for value representations. This is compatible with the associated Camlp4 generator: SwitchValueRepresentation *) module type VALUE_REPRESENTATION = sig (* The type of value representation *) type t (* [variant type_name data_constructor_name tag arguments] Given information about the variant and its arguments, this function produces a new value representation. *) val variant : string -> string -> int -> t list -> t (* [record type_name fields] Given a type name and a list of record fields, this function produces the value representation of a record. *) val record : string -> (string * t) list -> t (* [tuple arguments] Given a list of value representation this function produces a new value representation. *) val tuple : t list -> t (* ------------------------------------------------------------------------- *) (* Value representation for primitive types. *) val string : string -> t val int : int -> t val int32 : int32 -> t val int64 : int64 -> t val nativeint : nativeint -> t val float : float -> t val char : char -> t val bool : bool -> t val option : ('a -> t) -> 'a option -> t val list : ('a -> t) -> 'a list -> t val array : ('a -> t) -> 'a array -> t val ref : ('a -> t) -> 'a ref -> t (* Value representation for any other value. *) val unknown : string -> 'a -> t end module type DOCUMENT_VALUE_REPRESENTATION = VALUE_REPRESENTATION with type t = document (* please remove as soon as this will be available in ocaml *) module MissingFloatRepr = struct let valid_float_lexeme s = let l = String.length s in let rec loop i = if i >= l then s ^ "." else match s.[i] with | '0' .. '9' | '-' -> loop (i+1) | _ -> s in loop 0 let float_repres f = match classify_float f with FP_nan -> "nan" | FP_infinite -> if f < 0.0 then "neg_infinity" else "infinity" | _ -> let s1 = Printf.sprintf "%.12g" f in if f = float_of_string s1 then valid_float_lexeme s1 else let s2 = Printf.sprintf "%.15g" f in if f = float_of_string s2 then valid_float_lexeme s2 else Printf.sprintf "%.18g" f end module ML = struct type t = document let tuple = seq1 "(" "," ")" let variant _ cons _ args = if args = [] then !^cons else !^cons ^^ tuple args let record _ fields = seq2 "{" ";" "}" (List.map (fun (k, v) -> infix ":" !^k v) fields) let option f = function | Some x -> !^"Some" ^^ tuple [f x] | None -> !^"None" let list f xs = seq2 "[" ";" "]" (List.map f xs) let array f xs = seq2 "[|" ";" "|]" (Array.to_list (Array.map f xs)) let ref f x = record "ref" ["contents", f !x] let float f = string (MissingFloatRepr.float_repres f) let int = sprintf "%d" let int32 = sprintf "%ld" let int64 = sprintf "%Ld" let nativeint = sprintf "%nd" let char = sprintf "%C" let bool = sprintf "%B" let string = sprintf "%S" let unknown tyname _ = sprintf "" tyname end (* Deprecated *) let line = ifflat space hardline let linebreak = ifflat empty hardline let softline = group line let softbreak = group linebreak menhir-20130116/src/parameterizedGrammar.mli0000644000175000017500000000334412075533603017730 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: parameterizedGrammar.mli,v 1.6 2005/12/01 16:20:06 regisgia Exp $ *) (* This turns a grammar where nonterminal symbols can be parameterized into a grammar where nonterminal symbols are not parameterized. The transformation is a textual expansion process, whose termination is guaranteed by a simple type system. Expansion creates new nonterminal symbols whose names contain parentheses and commas. These names can be printed directly in informational messages (error messages, conflict reports, descriptions of the automaton, etc.). However, they must be sanitized via [Misc.normalize] when printed in a context where a valid identifier is expected. *) val expand : InternalSyntax.grammar -> UnparameterizedSyntax.grammar menhir-20130116/src/parameters.ml0000644000175000017500000000445412075533603015562 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: parameters.ml,v 1.12 2005/12/02 16:16:22 regisgia Exp $ *) (* TEMPORARY clean up and write an .mli file *) open Syntax open Misc open Positions let app p ps = match ps with | [] -> ParameterVar p | _ -> ParameterApp (p, ps) let oapp1 o p = match o with | None -> p | Some var -> ParameterApp (var, [ p ]) let unapp = function | ParameterVar x -> (x, []) | ParameterApp (p, ps) -> (p, ps) let rec map f = function | ParameterVar x -> ParameterVar (f x) | ParameterApp (p, ps) -> ParameterApp (f p, List.map (map f) ps) let rec fold f init = function | ParameterVar x -> f init x | ParameterApp (p, ps) -> f (List.fold_left (fold f) init ps) p let identifiers m p = fold (fun acu x -> StringMap.add x.value x.position acu) m p type t = parameter let rec equal x y = match x, y with | ParameterVar x, ParameterVar y when x.value = y.value -> true | ParameterApp (p1, p2), ParameterApp (p1', p2') -> p1.value = p1'.value && List.for_all2 equal p2 p2' | _ -> false let hash = function | ParameterVar x | ParameterApp (x, _) -> Hashtbl.hash (Positions.value x) let position = function | ParameterVar x | ParameterApp (x, _) -> Positions.position x let with_pos p = Positions.with_pos (position p) p menhir-20130116/src/reachability.mli0000644000175000017500000000232112075533603016217 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This extremely simple analysis restricts a grammar to the set of nonterminals that are reachable, via productions, from the start nonterminals. *) val trim: UnparameterizedSyntax.grammar -> UnparameterizedSyntax.grammar menhir-20130116/src/lexdep.mll0000644000175000017500000000410512075533603015045 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This code analyzes the output of [ocamldep] and returns the list of [.cmi] files that the [.cmo] file depends on. *) { open Lexing exception Error of string let fail lexbuf = raise (Error (Printf.sprintf "failed to make sense of ocamldep's output (character %d).\n" lexbuf.lex_curr_p.pos_cnum) ) } let newline = ('\n' | '\r' | "\r\n") let whitespace = ( ' ' | '\t' | ('\\' newline) ) let entrychar = [^ '\n' '\r' '\t' ' ' '\\' ':' ] let entry = ((entrychar+ as basename) ".cm" ('i' | 'o' | 'x') as filename) (* [main] recognizes a sequence of lines, where a line consists of an entry, followed by a colon, followed by a list of entries. *) rule main = parse | eof { [] } | entry whitespace* ":" { let bfs = collect [] lexbuf in ((basename, filename), bfs) :: main lexbuf } | _ { fail lexbuf } (* [collect] recognizes a list of entries, separated with spaces and ending in a newline. *) and collect bfs = parse | whitespace+ entry { collect ((basename, filename) :: bfs) lexbuf } | whitespace* newline { bfs } | _ | eof { fail lexbuf } menhir-20130116/src/packedIntArray.mli0000644000175000017500000000463712075533603016474 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* A packed integer array is represented as a pair of an integer [k] and a string [s]. The integer [k] is the number of bits per integer that we use. The string [s] is just an array of bits, which is read in 8-bit chunks. *) (* The ocaml programming language treats string literals and array literals in slightly different ways: the former are statically allocated, while the latter are dynamically allocated. (This is rather arbitrary.) In the context of Menhir's table-based back-end, where compact, immutable integer arrays are needed, ocaml strings are preferable to ocaml arrays. *) type t = int * string (* [pack a] turns an array of integers into a packed integer array. *) (* Because the sign bit is the most significant bit, the magnitude of any negative number is the word size. In other words, [pack] does not achieve any space savings as soon as [a] contains any negative numbers, even if they are ``small''. *) val pack: int array -> t (* [get t i] returns the integer stored in the packed array [t] at index [i]. *) (* Together, [pack] and [get] satisfy the following property: if the index [i] is within bounds, then [get (pack a) i] equals [a.(i)]. *) val get: t -> int -> int (* [get1 t i] returns the integer stored in the packed array [t] at index [i]. It assumes (and does not check) that the array's bit width is [1]. The parameter [t] is just a string. *) val get1: string -> int -> int menhir-20130116/src/unparameterizedSyntax.mli0000644000175000017500000000531712075533603020175 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* A parameterized branch may instantiate parameterized non terminals. If the parameterized branch contributes to the definition of a parameterized terminal, then the instantiation of parameterized non terminals that are defined simultaneously must only be done with formal parameters. Furthermore, all the parameterized non terminals that are in a common mutual recursive definition must have the same arity. These conditions are sufficient to ensure termination of expansion. For example: C[x] : ... // This definition does not involve A or B. A[x,y] : B[x,y] C[Y] // This mutual recursive definition is ok. B[x,y] : A[x,y] D[x] : E[D[x]] // This one is incorrect. E[y] : D[y] *) open Syntax type branch = { branch_position : Positions.t; producers : (symbol * identifier option) list; (* TEMPORARY convention renverse par rapport syntax.mli; faire un type record au lieu d'une paire? *) action : action; branch_shift_precedence : branch_shift_precedence; branch_reduce_precedence : branch_reduce_precedence } type rule = { branches : branch list; positions : Positions.t list; (* This flag is not relevant after the NonTerminalInlining.inline pass. *) inline_flag : bool; } type grammar = { preludes : Stretch.t list; postludes : Syntax.trailer list; parameters : Stretch.t list; start_symbols : StringSet.t; types : Stretch.ocamltype StringMap.t; tokens : Syntax.token_properties StringMap.t; rules : rule StringMap.t; } menhir-20130116/src/interpret.ml0000644000175000017500000001255512075533603015434 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module is in charge of handling the [--interpret] option, if it is present. *) open Grammar module I = Invariant (* artificial dependency; ensures that [Invariant] runs first *) (* --------------------------------------------------------------------------- *) (* A sentence is a pair of an optional non-terminal start symbol and a list of terminal symbols. *) type sentence = Nonterminal.t option * Terminal.t list (* --------------------------------------------------------------------------- *) (* [stream] turns a finite list of terminals into a stream of terminals. *) exception EndOfStream let stream (toks : Terminal.t list) : unit -> Terminal.t * Lexing.position * Lexing.position = let toks = ref toks in fun () -> let tok = match !toks with | tok :: more -> (* Take a token off the list, and return it. *) toks := more; tok | [] -> (* The finite list has been exhausted. Here, two plausible behaviors come to mind. The first behavior consists in raising an exception. In that case, we are creating a finite stream, and it is up to the parser to not read past its end. The second behavior consists in returning a designated token. In that case, we are creating an infinite, eventually constant, stream. The choice between these two behaviors is somewhat arbitrary; furthermore, in the second case, the choice of the designated token is arbitrary as well. Here, we adopt the second behavior if and only if the grammar has an EOF token, and we use EOF as the designated token. Again, this is arbitrary, and could be changed in the future. *) match Terminal.eof with | Some eof -> eof | None -> raise EndOfStream in (* For now, return dummy positions. *) tok, Lexing.dummy_pos, Lexing.dummy_pos (* --------------------------------------------------------------------------- *) (* [interpret] interprets a sentence. *) let interpret ((nto, toks) : sentence) : unit = (* Check whether a start symbol was provided. If not, use the grammar's unique start symbol, if there is one. *) (* The code that finds the unique start symbol is not very pretty. *) let nt = match nto, ProductionMap.is_singleton Lr1.entry with | Some nt, _ -> nt | None, Some (prod, _) -> begin match Production.classify prod with | Some nt -> nt | None -> assert false end | None, None -> Error.error [] "Because the grammar has multiple start symbols, each of the\n\ sentences provided on the standard input channel must be of the\n\ form: : *" in (* Run the reference interpreter. This can produce a concrete syntax tree ([Some cst]), fail with a parser error ([None]), or fail with a lexer error ([EndOfStream]). *) (* In either case, we produce just one line of output, so it should be clear to the user which outcomes correspond to which sentences (should multiple sentences be supplied). *) begin try match MenhirLib.Convert.Simplified.traditional2revised (ReferenceInterpreter.interpret Settings.trace nt) (stream toks) with | Some cst -> (* Success. *) Printf.printf "ACCEPT"; if Settings.interpret_show_cst then begin print_newline(); Cst.show stdout cst end | None -> (* Parser failure. *) Printf.printf "REJECT" with EndOfStream -> (* Lexer failure. *) Printf.printf "OVERSHOOT" end; print_newline() (* --------------------------------------------------------------------------- *) (* If [--interpret] is set, interpret the sentences found on the standard input channel, then stop, without generating a parser. *) open Lexing let () = if Settings.interpret then begin (* Read a series of sentences from the standard input channel. *) (* For more comfortable interaction, we interpret each sentence as soon as it is read. *) let lexbuf = from_channel stdin in lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = "(stdin)" }; let read () = try SentenceParser.sentence SentenceLexer.lex lexbuf with Parsing.Parse_error -> Error.error (Positions.lexbuf lexbuf) "Ill-formed input sentence." in let rec loop () = match read() with | None -> exit 0 | Some sentence -> interpret sentence; loop() in loop() end menhir-20130116/src/preFront.ml0000644000175000017500000000720212075533603015210 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Printf open Syntax let read_whole_file filename = (* Open the file in text mode, so that (under Windows) CRLF is converted to LF. This guarantees that one byte is one character and seems to be required in order to report accurate positions. *) let channel = open_in filename in (* The standard library functions [pos_in] and [seek_in] do not work correctly when CRLF conversion is being performed, so we abandon their use. (They were used to go and extract the text of semantic actions.) Instead we load the entire file into memory up front, and work with a string. *) (* The standard library function [in_channel_length] does not work correctly when CRLF conversion is being performed, so we do not use it to read the whole file. And the standard library function [Buffer.add_channel] uses [really_input] internally, so we cannot use it either. Bummer. *) let block_size = 16384 in let b = Buffer.create block_size in let s = String.create block_size in let rec loop () = let read = input channel s 0 block_size in if read > 0 then begin Buffer.add_substring b s 0 read; loop() end in loop(); close_in channel; Buffer.contents b let load_partial_grammar filename = let validExt = if Settings.coq then ".vy" else ".mly" in if Filename.check_suffix filename validExt then Error.set_filename filename else Error.error [] (sprintf "argument file names should end in %s. \"%s\" is not accepted." validExt filename); try let contents = read_whole_file filename in Error.file_contents := Some contents; let lexbuf = Lexing.from_string contents in lexbuf.Lexing.lex_curr_p <- { Lexing.pos_fname = filename; Lexing.pos_lnum = 1; Lexing.pos_bol = 0; Lexing.pos_cnum = 0 }; let grammar = { (Parser.grammar Lexer.main lexbuf) with ConcreteSyntax.pg_filename = filename } in Error.file_contents := None; (* If there were errors during parsing, stop. This has to be done explicitly here because the parser performs error recovery and does not die at the first error. One could even go further and attempt to work with the grammar in spite of the parse errors, but we choose not to. *) if Error.errors () then exit 1 else grammar with Sys_error msg -> Error.error [] msg let partial_grammars = List.map load_partial_grammar Settings.filenames let () = Time.tick "Lexing and parsing" let parameterized_grammar = PartialGrammar.join_partial_grammars partial_grammars let grammar = ParameterizedGrammar.expand parameterized_grammar let () = Time.tick "Joining and expanding" menhir-20130116/src/internalSyntax.mli0000644000175000017500000000264012075533603016606 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) type grammar = { p_preludes : Stretch.t list; p_postludes : Syntax.trailer list; p_parameters : Stretch.t list; p_start_symbols : Positions.t StringMap.t; p_types : (Syntax.parameter * Stretch.ocamltype Positions.located) list; p_tokens : Syntax.token_properties StringMap.t; p_rules : Syntax.parameterized_rule StringMap.t; } menhir-20130116/src/tokenType.mli0000644000175000017500000000327712075533603015554 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module deals with a few details regarding the definition of the [token] type. In particular, if [--only-tokens] was specified, it emits the type definition and exits. *) open Grammar (* This is the conventional name of the [token] type, for use by the code generator. *) val tctoken: string val ttoken: IL.typ (* This is the type of lexers. It refers to the [token] type, which is why it is defined here. *) val tlexer: IL.typ (* This is the definition of the type of tokens, for use by the code generator. *) val tokentypedef: IL.typedef list (* This function prefixes the name of a token with an appropriate Objective Caml module name, if necessary. *) val tokenprefix: string -> string menhir-20130116/src/slr.mli0000644000175000017500000000254512075533603014367 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module extends the LR(0) automaton with lookahead information in order to construct an SLR(1) automaton. The lookahead information is obtained by considering the FOLLOW sets. *) (* This construction is not used by Menhir, but can be used to check whether the grammar is in the class SLR(1). This check is performed when the log level [lg] is at least 1. *) menhir-20130116/src/back.mli0000644000175000017500000000210112075533603014453 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module drives the back-end. No functionality is offered by this module. *) menhir-20130116/src/stringSet.mli0000644000175000017500000000205412075533603015544 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) include Set.S with type elt = string val of_list: elt list -> t menhir-20130116/src/lr1.ml0000644000175000017500000010255612075533603014117 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar open Slr (* artificial dependency; ensures that [Slr] runs first *) (* This module constructs an LR(1) automaton by following Pager's method, that is, by merging states on the fly when they are weakly compatible. *) (* ------------------------------------------------------------------------ *) (* Nodes. *) type node = { (* A node number, assigned during construction. *) raw_number: int; (* A node number, assigned after conflict resolution has taken place and after inacessible nodes have been removed. This yields sequential numbers, from the client's point of view. *) mutable number: int; (* Each node is associated with a state. This state can change during construction as nodes are merged. *) mutable state: Lr0.lr1state; (* Each node carries information about its outgoing transitions and about its reductions. *) mutable transitions: node SymbolMap.t; mutable reductions: Production.index list TerminalMap.t; (* Tokens for which there are several possible behaviors are conflict tokens. *) mutable conflict_tokens: TerminalSet.t; (* Transitions are also stored in reverse, so as to allow reverse traversals of the automaton. *) mutable predecessors: node list; (* If a node has any incoming transitions, then they all carry the same symbol. This is it. *) mutable incoming_symbol: Symbol.t option; (* Transient marks are used during construction and traversal. *) mutable mark: Mark.t; (* (New as of 2012/01/23.) This flag records whether a shift/reduce conflict in this node was solved in favor of neither (%nonassoc). This is later used to forbid a default reduction at this node. *) mutable forbid_default_reduction: bool; } module Node = struct type t = node let compare node1 node2 = node1.number - node2.number end module NodeSet = Set.Make (Node) module NodeMap = Map.Make (Node) module ImperativeNodeMap = struct type key = NodeMap.key type 'data t = 'data NodeMap.t ref let create () = ref NodeMap.empty let clear t = t := NodeMap.empty let add k d t = t := NodeMap.add k d !t let find k t = NodeMap.find k !t let iter f t = NodeMap.iter f !t end (* ------------------------------------------------------------------------ *) (* Output debugging information if [--follow-construction] is enabled. *) let follow_transition (again : bool) (source : node) (symbol : Symbol.t) (state : Lr0.lr1state) = if Settings.follow then Printf.fprintf stderr "%s transition out of state r%d along symbol %s.\nProposed target state:\n%s" (if again then "Re-examining" else "Examining") source.raw_number (Symbol.print symbol) (Lr0.print_closure state) let follow_state (msg : string) (node : node) (print : bool) = if Settings.follow then Printf.fprintf stderr "%s: r%d.\n%s\n" msg node.raw_number (if print then Lr0.print_closure node.state else "") (* ------------------------------------------------------------------------ *) (* The following two mutually recursive functions are invoked when the state associated with an existing node grows. The node's descendants are examined and grown into a fixpoint is reached. This work is performed in an eager manner: we do not attempt to build any new transitions until all existing nodes have been suitably grown. Indeed, building new transitions requires making merging decisions, and such decisions cannot be made on a sound basis unless all existing nodes have been suitably grown. Otherwise, one could run into a dead end where two successive, incompatible merging decisions are made, because the consequences of the first decision (growing descendant nodes) were not made explicit before the second decision was taken. This was a bug in versions of Menhir ante 20070520. Although I wrote this code independently, I later found out that it seems quite similar to the code in Karl Schimpf's Ph.D. thesis (1981), page 35. It is necessary that all existing transitions be explicit before the [grow] functions are called. In other words, if it has been decided that there will be a transition from [node1] to [node2], then [node1.transitions] must be updated before [grow] is invoked. *) (* [grow node state] grows the existing node [node], if necessary, so that its associated state subsumes [state]. If this represents an actual (strict) growth, then [node]'s descendants are grown as well. *) let rec grow node state = if Lr0.subsume state node.state then follow_state "Target state is unaffected" node false else begin (* In versions of Menhir prior to June 2008, I wrote this: If I know what I am doing, then the new state that is being merged into the existing state should be compatible, in Pager's sense, with the existing node. In other words, compatibility should be preserved through transitions. and the code contained this assertion: assert (Lr0.compatible state node.state); assert (Lr0.eos_compatible state node.state); However, this was wrong. See, for instance, the sample grammars cocci.mly and boris-mini.mly. The problem is particularly clearly apparent in boris-mini.mly, where it only involves inclusion of states -- the definition of Pager's weak compatibility does not enter the picture. Here is, roughly, what is going on. Assume we have built some state A, which, along some symbol S, has a transition to itself. This means, in fact, that computing the successor of A along S yields a *subset* of A, that is, succ(A, S) <= A. Then, we wish to build a new state A', which turns out to be a superset of A, so we decide to grow A. (The fact that A is a subset of A' implies that A and A' are Pager-compatible.) As per the code below, we immediately update the state A in place, to become A'. Then, we inspect the transition along symbol S. We find that the state succ(A', S) must be merged into A'. In this situation, the assertions above require succ(A', S) to be compatible with A'. However, this is not necessarily the case. By monotonicity of succ, we do have succ(A, S) <= succ(A', S). But nothing says that succ(A', S) are related with respect to inclusion, or even Pager-compatible. The grammar in boris-mini.mly shows that they are not. *) (* Grow [node]. *) node.state <- Lr0.union state node.state; follow_state "Growing existing state" node true; (* Grow [node]'s successors. *) grow_successors node end (* [grow_successors node] grows [node]'s successors. *) (* Note that, if there is a cycle in the graph, [grow_successors] can be invoked several times at a single node [node], with [node.state] taking on a new value every time. In such a case, this code should be correct, although probably not very efficient. *) and grow_successors node = SymbolMap.iter (fun symbol (successor_node : node) -> let successor_state = Lr0.transition symbol node.state in follow_transition true node symbol successor_state; grow successor_node successor_state ) node.transitions (* ------------------------------------------------------------------------ *) (* Data structures maintained during the construction of the automaton. *) (* A queue of pending nodes, whose outgoing transitions have not yet been built. *) let queue : node Queue.t = Queue.create() (* A mapping of LR(0) node numbers to lists of nodes. This allows us to efficiently find all existing nodes that are core-compatible with a newly found state. *) let map : node list array = Array.create Lr0.n [] (* A counter that allows assigning raw numbers to nodes. *) let num = ref 0 (* ------------------------------------------------------------------------ *) (* [create state] creates a new node that stands for the state [state]. It is expected that [state] does not subsume, and is not subsumed by, any existing state. *) let create (state : Lr0.lr1state) : node = (* Allocate a new node. *) let node = { state = state; transitions = SymbolMap.empty; reductions = TerminalMap.empty; conflict_tokens = TerminalSet.empty; raw_number = Misc.postincrement num; number = 0; (* temporary placeholder *) mark = Mark.none; predecessors = []; incoming_symbol = None; forbid_default_reduction = false; } in (* Update the mapping of LR(0) cores to lists of nodes. *) let k = Lr0.core state in assert (k < Lr0.n); map.(k) <- node :: map.(k); (* Enqueue this node for further examination. *) Queue.add node queue; (* Debugging output. *) follow_state "Creating a new state" node false; (* Return the freshly created node. *) node (* ------------------------------------------------------------------------ *) (* Materializing a transition turns its target state into a (fresh or existing). There are three scenarios: the proposed new state can be subsumed by an existing state, compatible with an existing state, or neither. *) exception Subsumed of node exception Compatible of node let materialize (source : node) (symbol : Symbol.t) (target : Lr0.lr1state) : unit = try (* Debugging output. *) follow_transition false source symbol target; (* Find all existing core-compatible states. *) let k = Lr0.core target in assert (k < Lr0.n); let similar = map.(k) in (* Check whether we need to create a new node or can reuse an existing state. *) (* 20120525: the manner in which this check is performed depends on [Settings.construction_mode]. There are now three modes. *) begin match Settings.construction_mode with | Settings.ModeCanonical -> (* In a canonical automaton, two states can be merged only if they are identical. *) List.iter (fun node -> if Lr0.subsume target node.state && Lr0.subsume node.state target then raise (Subsumed node) ) similar | Settings.ModeInclusionOnly | Settings.ModePager -> (* A more aggressive approach is to take subsumption into account: if the new candidate state is a subset of an existing state, then no new node needs to be created. Furthermore, the existing state does not need to be enlarged. *) (* 20110124: require error compatibility in addition to subsumption. *) List.iter (fun node -> if Lr0.subsume target node.state && Lr0.error_compatible target node.state then raise (Subsumed node) ) similar end; begin match Settings.construction_mode with | Settings.ModeCanonical | Settings.ModeInclusionOnly -> () | Settings.ModePager -> (* One can be even more aggressive and check whether the existing state is compatible, in Pager's sense, with the new state. If so, there is no need to create a new state: just merge the new state into the existing one. The result is a state that may be larger than each of the two states that have been merged. *) (* 20110124: require error compatibility in addition to the existing compatibility criteria. *) if Settings.construction_mode = Settings.ModePager then List.iter (fun node -> if Lr0.compatible target node.state && Lr0.eos_compatible target node.state && Lr0.error_compatible target node.state then raise (Compatible node) ) similar end; (* The above checks have failed. Create a new node. Two states that are in the subsumption relation are also compatible. This implies that the newly created node does not subsume any existing states. *) source.transitions <- SymbolMap.add symbol (create target) source.transitions with | Subsumed node -> (* Join an existing target node. *) follow_state "Joining existing state" node false; source.transitions <- SymbolMap.add symbol node source.transitions | Compatible node -> (* Join and grow an existing target node. It seems important that the new transition is created before [grow_successors] is invoked, so that all transition decisions made so far are explicit. *) node.state <- Lr0.union target node.state; follow_state "Joining and growing existing state (Pager says, fine)" node true; source.transitions <- SymbolMap.add symbol node source.transitions; grow_successors node (* ------------------------------------------------------------------------ *) (* The actual construction process. *) (* Populate the queue with the start nodes and store them in an array. *) let entry : node ProductionMap.t = ProductionMap.map (fun (k : Lr0.node) -> create (Lr0.start k) ) Lr0.entry (* Pick a node in the queue, that is, a node whose transitions have not yet been built. Build these transitions, and continue. *) (* Note that building a transition can cause existing nodes to grow, so [node.state] is not necessarily invariant throughout the inner loop. *) let () = Misc.qiter (fun node -> List.iter (fun symbol -> materialize node symbol (Lr0.transition symbol node.state) ) (Lr0.outgoing_symbols (Lr0.core node.state)) ) queue (* Record how many nodes were constructed. *) let n = !num let () = Error.logA 1 (fun f -> Printf.fprintf f "Built an LR(1) automaton with %d states.\n" !num) (* ------------------------------------------------------------------------ *) (* We now perform one depth-first traversal of the automaton, recording predecessor edges, numbering nodes, sorting nodes according to their incoming symbol, building reduction tables, and finding out which nodes have conflicts. *) (* A count of all nodes. *) let () = num := 0 (* A list of all nodes. *) let nodes : node list ref = ref [] (* A list of nodes with conflicts. *) let conflict_nodes : node list ref = ref [] (* Counts of nodes with shift/reduce and reduce/reduce conflicts. *) let shift_reduce = ref 0 let reduce_reduce = ref 0 (* Count of the shift/reduce conflicts that could be silently resolved. *) let silently_solved = ref 0 (* A mapping of symbols to lists of nodes that admit this incoming symbol. *) let incoming : node list SymbolMap.t ref = ref SymbolMap.empty (* Go ahead. *) let () = let marked = Mark.fresh() in let rec visit node = if not (Mark.same node.mark marked) then begin node.mark <- marked; nodes := node :: !nodes; (* Number this node. *) let number = !num in num := number + 1; node.number <- number; (* Insertion of a new reduce action into the table of reductions. *) let addl prod tok reductions = let prods = try TerminalMap.lookup tok reductions with Not_found -> [] in TerminalMap.add tok (prod :: prods) reductions in (* Build the reduction table. Here, we gather all potential reductions, without attempting to solve shift/reduce conflicts on the fly, because that would potentially hide shift/reduce/reduce conflicts, which we want to be aware of. *) let reductions = List.fold_left (fun reductions (toks, prod) -> TerminalSet.fold (addl prod) toks reductions ) TerminalMap.empty (Lr0.reductions node.state) in (* Detect conflicts. Attempt to solve shift/reduce conflicts when unambiguously allowed by priorities. *) let has_shift_reduce = ref false and has_reduce_reduce = ref false in node.reductions <- TerminalMap.fold (fun tok prods reductions -> if SymbolMap.mem (Symbol.T tok) node.transitions then begin (* There is a transition in addition to the reduction(s). We have (at least) a shift/reduce conflict. *) assert (not (Terminal.equal tok Terminal.sharp)); match prods with | [] -> assert false | [ prod ] -> begin (* This is a single shift/reduce conflict. If priorities tell us how to solve it, we follow that and modify the automaton. *) match Precedence.shift_reduce tok prod with | Precedence.ChooseShift -> (* Suppress the reduce action. *) incr silently_solved; reductions | Precedence.ChooseReduce -> (* Record the reduce action and suppress the shift transition. The automaton is modified in place. This can have the subtle effect of making some nodes unreachable. Any conflicts in these nodes will then be ignored (as they should be). *) incr silently_solved; node.transitions <- SymbolMap.remove (Symbol.T tok) node.transitions; TerminalMap.add tok prods reductions | Precedence.ChooseNeither -> (* Suppress the reduce action and the shift transition. *) incr silently_solved; node.transitions <- SymbolMap.remove (Symbol.T tok) node.transitions; node.forbid_default_reduction <- true; reductions | Precedence.DontKnow -> (* Priorities don't allow concluding. Record the existence of a shift/reduce conflict. *) node.conflict_tokens <- Grammar.TerminalSet.add tok node.conflict_tokens; has_shift_reduce := true; TerminalMap.add tok prods reductions end | prod1 :: prod2 :: _ -> (* This is a shift/reduce/reduce conflict. If the priorities are such that each individual shift/reduce conflict is solved in favor of shifting or in favor of neither, then solve the entire composite conflict in the same way. Otherwise, report the conflict. *) let choices = List.map (Precedence.shift_reduce tok) prods in if List.for_all (fun choice -> match choice with | Precedence.ChooseShift -> true | _ -> false ) choices then begin (* Suppress the reduce action. *) silently_solved := !silently_solved + List.length prods; reductions end else if List.for_all (fun choice -> match choice with | Precedence.ChooseNeither -> true | _ -> false ) choices then begin (* Suppress the reduce action and the shift transition. *) silently_solved := !silently_solved + List.length prods; node.transitions <- SymbolMap.remove (Symbol.T tok) node.transitions; reductions end else begin (* Record a shift/reduce/reduce conflict. Keep all reductions. *) node.conflict_tokens <- Grammar.TerminalSet.add tok node.conflict_tokens; has_shift_reduce := true; has_reduce_reduce := true; TerminalMap.add tok prods reductions end end else let () = match prods with | [] | [ _ ] -> () | prod1 :: prod2 :: _ -> (* There is no transition in addition to the reduction(s). We have a pure reduce/reduce conflict. Do nothing about it at this point. *) node.conflict_tokens <- Grammar.TerminalSet.add tok node.conflict_tokens; has_reduce_reduce := true in TerminalMap.add tok prods reductions ) reductions TerminalMap.empty; (* Record statistics about conflicts. *) if not (TerminalSet.is_empty node.conflict_tokens) then begin conflict_nodes := node :: !conflict_nodes; if !has_shift_reduce then incr shift_reduce; if !has_reduce_reduce then incr reduce_reduce end; (* Continue the depth-first traversal. Record predecessors edges as we go. No ancestor appears twice in a list of predecessors, because two nodes cannot be related by two edges that carry distinct symbols. *) SymbolMap.iter (fun symbol son -> begin match son.incoming_symbol with | None -> son.incoming_symbol <- Some symbol; let others = try SymbolMap.find symbol !incoming with Not_found -> [] in incoming := SymbolMap.add symbol (son :: others) !incoming | Some symbol' -> assert (Symbol.equal symbol symbol') end; son.predecessors <- node :: son.predecessors; visit son ) node.transitions end in ProductionMap.iter (fun _ node -> visit node) entry let nodes = List.rev !nodes (* list is now sorted by increasing node numbers *) let conflict_nodes = !conflict_nodes let incoming = !incoming let () = if !silently_solved = 1 then Error.logA 1 (fun f -> Printf.fprintf f "One shift/reduce conflict was silently solved.\n") else if !silently_solved > 1 then Error.logA 1 (fun f -> Printf.fprintf f "%d shift/reduce conflicts were silently solved.\n" !silently_solved); if !num < n then Error.logA 1 (fun f -> Printf.fprintf f "Only %d states remain after resolving shift/reduce conflicts.\n" !num) let () = Grammar.diagnostics() let n = !num let forbid_default_reduction node = node.forbid_default_reduction (* ------------------------------------------------------------------------ *) (* Breadth-first iteration over all nodes. *) let bfs = let module B = Breadth.Make (struct type vertex = node type label = Symbol.t let set_mark node m = node.mark <- m let get_mark node = node.mark let entry f = ProductionMap.iter (fun _ node -> f node) entry let successors f node = SymbolMap.iter f node.transitions end) in B.search (* ------------------------------------------------------------------------ *) (* Iteration over all nodes. *) let fold f accu = List.fold_left f accu nodes let iter f = fold (fun () node -> f node) () let map f = List.map f nodes let foldx f = fold (fun accu node -> match node.incoming_symbol with | None -> accu | Some _ -> f accu node) let iterx f = iter (fun node -> match node.incoming_symbol with | None -> () | Some _ -> f node) (* -------------------------------------------------------------------------- *) (* Our output channel. *) let out = lazy (open_out (Settings.base ^ ".automaton")) (* ------------------------------------------------------------------------ *) (* If requested, dump a verbose description of the automaton. *) let () = Time.tick "Construction of the LR(1) automaton"; if Settings.dump then begin fold (fun () node -> let out = Lazy.force out in Printf.fprintf out "State %d%s:\n%s" node.number (if Settings.follow then Printf.sprintf " (r%d)" node.raw_number else "") (Lr0.print node.state); SymbolMap.iter (fun symbol node -> Printf.fprintf out "-- On %s shift to state %d\n" (Symbol.print symbol) node.number ) node.transitions; TerminalMap.iter (fun tok prods -> List.iter (fun prod -> (* TEMPORARY factoriser les symboles qui conduisent a reduire une meme production *) Printf.fprintf out "-- On %s " (Terminal.print tok); match Production.classify prod with | Some nt -> Printf.fprintf out "accept %s\n" (Nonterminal.print false nt) | None -> Printf.fprintf out "reduce production %s\n" (Production.print prod) ) prods ) node.reductions; if not (TerminalSet.is_empty node.conflict_tokens) then Printf.fprintf out "** Conflict on %s\n" (TerminalSet.print node.conflict_tokens); Printf.fprintf out "\n%!" ) (); Time.tick "Dumping the LR(1) automaton" end (* ------------------------------------------------------------------------ *) (* [reverse_dfs goal] performs a reverse depth-first search through the automaton, starting at node [goal], and marking the nodes traversed. It returns a function that tells whether a node is marked, that is, whether a path leads from that node to the goal node. *) let reverse_dfs goal = let mark = Mark.fresh() in let marked node = Mark.same node.mark mark in let rec visit node = if not (marked node) then begin node.mark <- mark; List.iter visit node.predecessors end in visit goal; marked (* ------------------------------------------------------------------------ *) (* Iterating over all nodes that are targets of edges carrying a certain symbol. The sources of the corresponding edges are also provided. *) let targets f accu symbol = let targets = try SymbolMap.find symbol incoming with Not_found -> (* There are no incoming transitions on the start symbols. *) [] in List.fold_left (fun accu target -> f accu target.predecessors target ) accu targets (* ------------------------------------------------------------------------ *) (* Converting a start node into the single item that it contains. *) let start2item node = let state : Lr0.lr1state = node.state in let core : Lr0.node = Lr0.core state in let items : Item.Set.t = Lr0.items core in assert (Item.Set.cardinal items = 1); Item.Set.choose items (* ------------------------------------------------------------------------ *) (* Accessors. *) let number node = node.number let state node = node.state let transitions node = node.transitions let reductions node = node.reductions let conflicts f = List.iter (fun node -> f node.conflict_tokens node ) conflict_nodes let incoming_symbol node = node.incoming_symbol let predecessors node = node.predecessors (* ------------------------------------------------------------------------ *) (* This inverts a mapping of tokens to productions into a mapping of productions to sets of tokens. *) (* This is needed, in [CodeBackend], to avoid producing two (or more) separate branches that call the same [reduce] function. Instead, we generate just one branch, guarded by a [POr] pattern. *) let invert reductions : TerminalSet.t ProductionMap.t = TerminalMap.fold (fun tok prods inverse -> let prod = Misc.single prods in let toks = try ProductionMap.lookup prod inverse with Not_found -> TerminalSet.empty in ProductionMap.add prod (TerminalSet.add tok toks) inverse ) reductions ProductionMap.empty (* ------------------------------------------------------------------------ *) (* Computing which terminal symbols a state is willing to act upon. This function is currently unused, but could be used as part of an error reporting system. One must keep in mind that, due to the merging of states, a state might be willing to perform a reduction on a certain token, yet the reduction can take us to another state where this token causes an error. In other words, the set of terminal symbols that is computed here is really an over-approximation of the set of symbols that will not cause an error. And there seems to be no way of performing an exact computation, as we would need to know not only the current state, but the contents of the stack as well. *) let acceptable_tokens (s : node) = (* If this state is willing to act on the error token, ignore it -- we do not wish to report that an error would be accepted in this state :-) *) let transitions = SymbolMap.remove (Symbol.T Terminal.error) (transitions s) and reductions = TerminalMap.remove Terminal.error (reductions s) in (* Accumulate the tokens carried by outgoing transitions. *) let covered = SymbolMap.fold (fun symbol _ covered -> match symbol with | Symbol.T tok -> TerminalSet.add tok covered | Symbol.N _ -> covered ) transitions TerminalSet.empty in (* Accumulate the tokens that permit reduction. *) let covered = ProductionMap.fold (fun _ toks covered -> TerminalSet.union toks covered ) (invert reductions) covered in (* That's it. *) covered (* ------------------------------------------------------------------------ *) (* Report statistics. *) (* Produce the reports. *) let () = if !shift_reduce = 1 then Error.grammar_warning [] "one state has shift/reduce conflicts." else if !shift_reduce > 1 then Error.grammar_warning [] (Printf.sprintf "%d states have shift/reduce conflicts." !shift_reduce); if !reduce_reduce = 1 then Error.grammar_warning [] "one state has reduce/reduce conflicts." else if !reduce_reduce > 1 then Error.grammar_warning [] (Printf.sprintf "%d states have reduce/reduce conflicts." !reduce_reduce) (* There is a global check for errors at the end of [Invariant], so we do not need to check & stop here. *) (* ------------------------------------------------------------------------ *) (* When requested by the code generator, apply default conflict resolution to ensure that the automaton is deterministic. *) (* [best prod prods] chooses which production should be reduced among the list [prod :: prods]. It fails if no best choice exists. *) let rec best choice = function | [] -> choice | prod :: prods -> match Precedence.reduce_reduce choice prod with | Some choice -> best choice prods | None -> Error.signal (Production.positions choice @ Production.positions prod) (Printf.sprintf "will not resolve reduce/reduce conflict between\n\ productions that originate in distinct source files:\n%s\n%s" (Production.print choice) (Production.print prod)); choice (* dummy *) (* Go ahead. *) let default_conflict_resolution () = let shift_reduce = ref 0 and reduce_reduce = ref 0 in List.iter (fun node -> node.reductions <- TerminalMap.fold (fun tok prods reductions -> try let (_ : node) = SymbolMap.find (Symbol.T tok) node.transitions in (* There is a transition at this symbol, so this is a (possibly multiway) shift/reduce conflict. Resolve in favor of shifting by suppressing all reductions. *) shift_reduce := List.length prods + !shift_reduce; reductions with Not_found -> (* There is no transition at this symbol. Check whether we have multiple reductions. *) match prods with | [] -> assert false | [ _ ] -> TerminalMap.add tok prods reductions | prod :: ((_ :: _) as prods) -> (* We have a reduce/reduce conflict. Resolve, if possible, in favor of a single reduction. This reduction must be preferrable to each of the others. *) reduce_reduce := List.length prods + !reduce_reduce; TerminalMap.add tok [ best prod prods ] reductions ) node.reductions TerminalMap.empty ) conflict_nodes; if !shift_reduce = 1 then Error.warning [] "one shift/reduce conflict was arbitrarily resolved." else if !shift_reduce > 1 then Error.warning [] (Printf.sprintf "%d shift/reduce conflicts were arbitrarily resolved." !shift_reduce); if !reduce_reduce = 1 then Error.warning [] "one reduce/reduce conflict was arbitrarily resolved." else if !reduce_reduce > 1 then Error.warning [] (Printf.sprintf "%d reduce/reduce conflicts were arbitrarily resolved." !reduce_reduce); (* Now, ensure that states that have a reduce action at the pseudo-token "#" have no other action. *) let ambiguities = ref 0 in fold (fun () node -> try let prods, reductions = TerminalMap.lookup_and_remove Terminal.sharp node.reductions in let prod = Misc.single prods in (* This node has a reduce action at "#". Determine whether there exist other actions. If there exist any other actions, suppress this reduce action, and signal an ambiguity. We signal an ambiguity even in the case where all actions at this node call for reducing a single production. Indeed, in that case, even though we know that this production must be reduced, we do not know whether we should first discard the current token (and call the lexer). *) let has_ambiguity = ref false in let toks = ref TerminalSet.empty in TerminalMap.iter (fun tok prods -> node.reductions <- reductions; has_ambiguity := true; toks := TerminalSet.add tok !toks ) reductions; SymbolMap.iter (fun symbol _ -> match symbol with | Symbol.N _ -> () | Symbol.T tok -> node.reductions <- reductions; has_ambiguity := true; toks := TerminalSet.add tok !toks ) node.transitions; if !has_ambiguity then begin incr ambiguities; if Settings.dump then begin Printf.fprintf (Lazy.force out) "State %d has an end-of-stream conflict. There is a tension between\n\ (1) %s\n\ without even requesting a lookahead token, and\n\ (2) checking whether the lookahead token is %s%s,\n\ which would require some other action.\n\n" (number node) (match Production.classify prod with | Some nt -> Printf.sprintf "accepting %s" (Nonterminal.print false nt) | None -> Printf.sprintf "reducing production %s" (Production.print prod)) (if TerminalSet.cardinal !toks > 1 then "one of " else "") (TerminalSet.print !toks) end end with Not_found -> () ) (); if !ambiguities = 1 then Error.grammar_warning [] "one state has an end-of-stream conflict." else if !ambiguities > 1 then Error.grammar_warning [] (Printf.sprintf "%d states have an end-of-stream conflict." !ambiguities) menhir-20130116/src/lookahead.mli0000644000175000017500000000274712075533603015522 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* These are the operations required of lookahead sets during a closure computation. This signature is exploited by the functor [Item.Closure]. *) module type S = sig (* The type of lookahead sets. *) type t (* The empty lookahead set. Redundant with the following, but convenient. *) val empty: t (* A concrete, constant set of terminal symbols. *) val constant: Grammar.TerminalSet.t -> t (* [union s1 s2] returns the union of [s1] and [s2]. *) val union: t -> t -> t end menhir-20130116/src/item.ml0000644000175000017500000002672012075533603014355 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar (* ------------------------------------------------------------------------ *) (* Items. *) (* An LR(0) item encodes a pair of integers, namely the index of the production and the index of the bullet in the production's right-hand side. *) (* Both integers are packed into a single integer, using 7 bits for the bullet position and the rest (usually 24 bits) for the production index. These widths could be adjusted. *) type t = int let import (prod, pos) = assert (pos < 128); (Production.p2i prod) lsl 7 + pos let export t = (Production.i2p (t lsr 7), t mod 128) (* Comparison. *) let equal (item1 : t) (item2: t) = item1 = item2 (* Position. *) let positions (item : t) = let prod, _ = export item in Production.positions prod (* [def item] looks up the production associated with this item in the grammar and returns [prod, nt, rhs, pos, length], where [prod] is the production's index, [nt] and [rhs] represent the production, [pos] is the position of the bullet in the item, and [length] is the length of the production's right-hand side. *) let def t = let prod, pos = export t in let nt, rhs = Production.def prod in let length = Array.length rhs in assert ((pos >= 0) && (pos <= length)); prod, nt, rhs, pos, length let nt t = let _, nt, _, _, _ = def t in nt let startnt t = let _, _, rhs, pos, length = def t in assert (pos = 0 && length = 1); match rhs.(0) with | Symbol.N nt -> nt | Symbol.T _ -> assert false (* Printing. *) let print item = let _, nt, rhs, pos, length = def item in Printf.sprintf "%s -> %s" (Nonterminal.print false nt) (Symbol.printaod 0 pos rhs) (* Classifying items. *) type kind = | Shift of Symbol.t * t | Reduce of Production.index let classify item = let prod, _, rhs, pos, length = def item in if pos = length then Reduce prod else Shift (rhs.(pos), import (prod, pos + 1)) (* Sets of items and maps over items. Hashing these data structures is specifically allowed, so balanced trees (for instance) would not be applicable here. *) module Map = Patricia.Big module Set = Map.Domain (* This functor performs precomputation that helps efficiently compute the closure of an LR(0) or LR(1) state. The precomputation requires time linear in the size of the grammar. The nature of the lookahead sets remains abstract. *) (* The precomputation consists in building the LR(0) nondeterministic automaton. This is a graph whose nodes are items and whose edges are epsilon transitions. (We do not care about shift transitions here.) Lookahead information can be attached to nodes and is propagated through the graph during closure computations. *) module Closure (L : Lookahead.S) = struct type state = L.t Map.t type node = { (* Nodes are sequentially numbered so as to allow applying Tarjan's algorithm (below). *) num: int; (* Each node is associated with an item. *) item: t; (* All of the epsilon transitions that leave a node have the same behavior with respect to lookahead information. *) (* The lookahead set transmitted along an epsilon transition is either a constant, or the union of a constant and the lookahead set at the source node. The former case corresponds to a source item whose trailer is not nullable, the latter to a source item whose trailer is nullable. *) epsilon_constant: L.t; epsilon_transmits: bool; (* Each node carries pointers to its successors through epsilon transitions. This field is never modified once initialization is over. *) mutable epsilon_transitions: node list; (* The following fields are transient, that is, only used temporarily during graph traversals. Marks are used to recognize which nodes have been traversed already. Lists of predecessors are used to record which edges have been traversed. Lookahead information is attached with each node. *) mutable mark: Mark.t; mutable predecessors: node list; mutable lookahead: L.t; } (* Allocate one graph node per item and build a mapping of items to nodes. *) let count = ref 0 let mapping : node array array = Array.create Production.n [||] let item2node item = let prod, pos = export item in mapping.(Production.p2i prod).(pos) let () = Production.iter (fun prod -> let nt, rhs = Production.def prod in let length = Array.length rhs in mapping.(Production.p2i prod) <- Array.init (length+1) (fun pos -> let item = import (prod, pos) in let num = !count in count := num + 1; (* The lookahead set transmitted through an epsilon transition is the FIRST set of the remainder of the source item, plus, if that is nullable, the lookahead set of the source item. *) let constant, transmits = if pos < length then let nullable, first = Analysis.nullable_first_rhs rhs (pos + 1) in L.constant first, nullable else (* No epsilon transitions leave this item. *) L.empty, false in { num = num; item = item; epsilon_constant = constant; epsilon_transmits = transmits; epsilon_transitions = []; (* temporary placeholder *) mark = Mark.none; predecessors = []; lookahead = L.empty; } ) ) (* At each node, compute transitions. *) let () = Production.iter (fun prod -> let nt, rhs = Production.def prod in let length = Array.length rhs in Array.iteri (fun pos node -> node.epsilon_transitions <- if pos < length then match rhs.(pos) with | Symbol.N nt -> Production.foldnt nt [] (fun prod nodes -> (item2node (import (prod, 0))) :: nodes ) | Symbol.T _ -> [] else [] ) mapping.(Production.p2i prod) ) (* Detect and reject cycles of transitions that transmit a lookahead set. We need to ensure that there are no such cycles in order to be able to traverse these transitions in topological order. Each such cycle corresponds to a set of productions of the form A1 -> A2, A2 -> A3, ..., An -> A1 (modulo nullable trailers). Such cycles are unlikely to occur in realistic grammars, so our current approach is to reject the grammar if such a cycle exists. Actually, according to DeRemer and Pennello (1982), such a cycle is exactly an includes cycle, and implies that the grammar is not LR(k) for any k, unless A1, ..., An are in fact uninhabited. In other words, this is a pathological case. *) (* Yes, indeed, this is called a cycle in Aho & Ullman's book, and a loop in Grune & Jacobs' book. It is not difficult to see that (provided all symbols are inhabited) the grammar is infinitely ambiguous if and only if there is a loop. *) module P = struct type foo = node type node = foo let n = !count let index node = node.num let iter f = Array.iter (fun nodes -> Array.iter f nodes ) mapping let successors f node = if node.epsilon_transmits then List.iter f node.epsilon_transitions end module T = Tarjan.Run (P) let cycle scc = let items = List.map (fun node -> node.item) scc in let positions = List.flatten (List.map positions items) in let names = String.concat "\n" (List.map print items) in Error.error positions (Printf.sprintf "the grammar is ambiguous.\n\ The following items participate in an epsilon-cycle:\n\ %s" names) let () = P.iter (fun node -> let scc = T.scc node in match scc with | [] -> () | [ node ] -> (* This is a strongly connected component of one node. Check whether it carries a self-loop. Forbidding self-loops is not strictly required by the code that follows, but is consistent with the fact that we forbid cycles of length greater than 1. *) P.successors (fun successor -> if successor.num = node.num then cycle scc ) node | _ -> (* This is a strongly connected component of at least two elements. *) cycle scc ) (* Closure computation. *) let closure (items : state) : state = (* Explore the graph forwards, starting from these items. Marks are used to tell which nodes have been visited. Build a list of all visited nodes; this is in fact the list of all items in the closure. At initial nodes and when reaching a node through a transition, record a lookahead set. When we reach a node through a transition that transmits the lookahead set found at its source, record its source, so as to allow re-traversing this transition backwards (below). *) let this = Mark.fresh() in let nodes = ref [] in let rec visit father transmits toks node = if Mark.same node.mark this then begin (* Node has been visited already. *) node.lookahead <- L.union toks node.lookahead; if transmits then node.predecessors <- father :: node.predecessors end else begin (* Node is new. *) node.predecessors <- if transmits then [ father ] else []; node.lookahead <- toks; follow node end and follow node = node.mark <- this; nodes := node :: !nodes; List.iter (visit node node.epsilon_transmits node.epsilon_constant) node.epsilon_transitions in Map.iter (fun item toks -> let node = item2node item in visit node (* dummy! *) false toks node ) items; let nodes = !nodes in (* Explore the graph of transmitting transitions backwards. By hypothesis, it is acyclic, so this is a topological walk. Lookahead sets are inherited through transitions. *) let this = Mark.fresh() in let rec walk node = if not (Mark.same node.mark this) then begin (* Node is new. *) node.mark <- this; (* Explore all predecessors and merge their lookahead sets into the current node's own lookahead set. *) List.iter (fun predecessor -> walk predecessor; node.lookahead <- L.union predecessor.lookahead node.lookahead ) node.predecessors end in List.iter walk nodes; (* Done. Produce a mapping of items to lookahead sets. Clear all transient fields so as to reduce pressure on the GC -- this does not make much difference. *) List.fold_left (fun closure node -> node.predecessors <- []; let closure = Map.add node.item node.lookahead closure in node.lookahead <- L.empty; closure ) Map.empty nodes (* End of closure computation *) end menhir-20130116/src/META0000644000175000017500000000024212075533603013525 0ustar stephstephrequires = "" description = "Runtime support for code generated by Menhir" archive(byte) = "menhirLib.cmo" archive(native) = "menhirLib.cmx" version = "20130116" menhir-20130116/src/codePieces.mli0000644000175000017500000001136612075533603015633 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module defines many internal naming conventions for use by the two code generators, [CodeBackend] and [TableBackend]. It also offers a few code generation facilities. *) open IL open Grammar (* ------------------------------------------------------------------------ *) (* Naming conventions. *) (* The type variable associated with a nonterminal [nt]. *) val ntvar : Nonterminal.t -> string (* The variable that holds the environment. This is a parameter to all functions. We do not make it a global variable because we wish to preserve re-entrancy. *) val env : string (* A variable used to hold a semantic value. *) val semv : string (* A variable used to hold a stack. *) val stack: string (* A variable used to hold a state. *) val state: string (* A variable used to hold a token. *) val token: string (* Variables used to hold start and end positions. *) val startp: string val endp: string (* ------------------------------------------------------------------------ *) (* Types for semantic values. *) (* [semvtypent nt] is the type of the semantic value associated with nonterminal [nt]. *) val semvtypent : Nonterminal.t -> typ (* [semvtypetok tok] is the type of the semantic value associated with token [tok]. There is no such type if the token does not have a semantic value. *) val semvtypetok : Terminal.t -> typ list (* [semvtype symbol] is the type of the semantic value associated with [symbol]. *) val semvtype : Symbol.t -> typ list (* [symvalt] returns the empty list if the symbol at hand carries no semantic value and the singleton list [[f t]] if it carries a semantic value of type [t]. *) val symvalt : Symbol.t -> (typ -> 'a) -> 'a list (* [symval symbol x] returns either the empty list or the singleton list [[x]], depending on whether [symbol] carries a semantic value. *) val symval : Symbol.t -> 'a -> 'a list (* [tokval] is a version of [symval], specialized for terminal symbols. *) val tokval : Terminal.t -> 'a -> 'a list (* ------------------------------------------------------------------------ *) (* Patterns for tokens. *) (* [tokpat tok] is a pattern that matches the token [tok], without binding its semantic value. *) val tokpat: Terminal.t -> pattern (* [tokpatv tok] is a pattern that matches the token [tok], and binds its semantic value, if it has one, to the variable [semv]. *) val tokpatv: Terminal.t -> pattern (* [tokspat toks] is a pattern that matches any token in the set [toks], without binding its semantic value. *) val tokspat: TerminalSet.t -> pattern (* [destructuretokendef name codomain bindsemv branch] generates the definition of a function that destructure tokens. [name] is the name of the function that is generated. [codomain] is its return type. [bindsemv] tells whether the variable [semv] should be bound. [branch] is applied to each (non-pseudo) terminal and must produce code for each branch. *) val destructuretokendef: string -> typ -> bool -> (Terminal.t -> expr) -> valdef (* ------------------------------------------------------------------------ *) (* Bindings for exotic keywords. *) (* This provides definitions for the [$startofs], [$endofs], and [$previouserror] keywords, if required by a semantic action. The [ofs] keyword family is defined in terms of the [pos] family by accessing the [pos_cnum] field. The [$previouserror] keyword simply provides access to the current value of [env.previouserror]. *) val extrabindings: string -> Action.t -> (pattern * expr) list (* ------------------------------------------------------------------------ *) (* A global variable holds the exception [Error]. *) (* The definition of this global variable. *) val excvaldef: valdef (* A reference to this global variable. *) val errorval: expr menhir-20130116/src/settings.mli0000644000175000017500000001212212075533602015416 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module parses the command line. *) (* The list of file names that appear on the command line. *) val filenames: string list (* How to deal with the type of tokens. *) type token_type_mode = | TokenTypeAndCode (* produce the definition of the [token] type and code for the parser *) | TokenTypeOnly (* produce the type definition only *) | CodeOnly of string (* produce the code only, by relying on an external token type *) val token_type_mode: token_type_mode (* How to construct the automaton. *) type construction_mode = | ModeCanonical (* --canonical: canonical Knuth LR(1) automaton *) | ModeInclusionOnly (* --no-pager : states are merged when there is an inclusion relationship, default reductions are used *) | ModePager (* normal mode: states are merged as per Pager's criterion, default reductions are used *) val construction_mode: construction_mode (* Whether conflicts should be explained. *) val explain: bool (* Whether the automaton should be dumped. *) val dump: bool (* Whether the automaton's construction should be explained (very verbose). *) val follow: bool (* Whether the grammar's dependence graph should be dumped. *) val graph: bool (* Whether tracing instructions should be generated. *) val trace: bool (* Whether error recovery should be attempted. This consists in discarding tokens, after the [error] token has been shifted, until a token that can be accepted is found. *) val recovery: bool (* Whether one should stop and print the grammar after joining and expanding the grammar. *) type print_mode = | PrintNormal | PrintUnitActions | PrintUnitActionsUnitTokens type preprocess_mode = | PMNormal (* preprocess and continue *) | PMOnlyPreprocess of print_mode (* preprocess, print grammar, stop *) val preprocess_mode: preprocess_mode (* Whether one should invoke ocamlc in order to infer types for all nonterminals. *) val infer: bool (* Whether one should inline the non terminal definitions marked with the %inline keyword. *) val inline: bool (* Whether and how one should invoke ocamldep in order to compute and display dependencies. *) type ocamldep_mode = | OMNone (* do not invoke ocamldep *) | OMRaw (* invoke ocamldep and echo its raw output *) | OMPostprocess (* invoke ocamldep and postprocess its output *) val depend: ocamldep_mode (* Whether comments should be printed or discarded. *) val comment: bool (* This undocumented flag suppresses prefixing of identifiers with an unlikely prefix in the generated code. This increases the code's readability, but can cause identifiers in semantic actions to be captured. *) val noprefix: bool (* This undocumented flag causes the code to be transformed by [Inline]. It is on by default. *) val code_inlining: bool (* How [ocamlc] and [ocamldep] should be invoked. *) val ocamlc: string val ocamldep: string (* How verbose we should be. *) val logG: int (* diagnostics on the grammar *) val logA: int (* diagnostics on the automaton *) val logC: int (* diagnostics on the generated code *) (* Whether tasks should be timed. *) val timings: bool (* The base name that should be used for the files that we create. This name can contain a path. *) val base: string (* The filename of the standard library. *) val stdlib_filename : string (* Whether Menhir should behave as an interpreter. *) val interpret : bool (* Whether the interpreter should build and display concrete syntax trees. *) val interpret_show_cst : bool (* Whether to use the table-based back-end ([true]) or the code-based back-end ([false]). *) val table : bool (* Whether to generate a coq description of the grammar and automaton. *) val coq : bool (* Whether the coq description must contain completeness proofs. *) val coq_no_complete : bool (* Whether the coq backend should ignore types and semantic actions. *) val coq_no_actions : bool (* Whether unresolved LR(1) conflicts, useless precedence declarations, productions that are never reduced, etc. should be treated as errors. *) val strict: bool menhir-20130116/src/unparameterizedPrinter.mli0000644000175000017500000000306412075533602020326 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This is a pretty-printer for grammars. *) (* If the [mode] parameter requests ``unit actions'', then semantic actions are dropped: that is, they are replaced with trivial semantic actions that return unit. Accordingly, all [%type] declarations are changed to unit. The prologue and epilogue are dropped. All bindings for semantic values are suppressed. If, furthermore, the [mode] parameter requests ``unit tokens'', then the types carried by tokens are changed to unit. *) val print: Settings.print_mode -> out_channel -> UnparameterizedSyntax.grammar -> unit menhir-20130116/src/slr.ml0000644000175000017500000001327712075533603014222 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module extends the LR(0) automaton with lookahead information in order to construct an SLR(1) automaton. The lookahead information is obtained by considering the FOLLOW sets. *) (* This construction is not used by Menhir, but can be used to check whether the grammar is in the class SLR(1). This check is performed when the log level [lg] is at least 1. *) open Grammar (* This flag, which is reserved for internal use, causes more information about SLR(1) conflict states to be printed. *) let tell_me_everything = false (* The following function turns an LR(0) state into an SLR(1) state. *) let make_slr_state (s : Lr0.node) : Lr0.concretelr1state = (* Obtain the set of LR(0) items associated with the state [s]. *) let items = Lr0.items s in (* Unfortunately, this set is not closed. We do not have a function that computes the closure of a set of LR(0) items -- we could build one using [Item.Closure], but that would be overkill. So, we first convert this set to a set of LR(1) items, then compute the closure at this level, and finally we turn this LR(1) state into an SLR(1) state by letting the lookahead sets be the FOLLOW sets. This is somewhat ugly and naïve, but seems to work. *) (* Convert this set to a set of LR(1) items. Here, we can use any set of tokens as the lookahead set. We use the empty set. *) let s = Item.Map.lift (fun item -> TerminalSet.empty) items in (* Compute the LR(1) closure. *) let s = Lr0.closure s in (* We now have an LR(1) state that has the correct set of LR(0) items but phony lookahead information. We convert it into an SLR(1) state by deciding that, for each item, the lookahead set is the FOLLOW set of the symbol that appears on the left-hand side of the item. *) Item.Map.fold (fun item toks accu -> let _, nt, _, _, _ = Item.def item in let follow_nt = Analysis.follow nt in assert (TerminalSet.subset toks follow_nt); (* sanity check *) Item.Map.add item follow_nt accu ) s Item.Map.empty (* Insertion of a new reduce action into the table of reductions. Copied from [Lr1] (boo, hiss). *) let addl prod tok reductions = let prods = try TerminalMap.lookup tok reductions with Not_found -> [] in TerminalMap.add tok (prod :: prods) reductions (* Same thing, for a set of tokens. *) let addl prod toks reductions = TerminalSet.fold (addl prod) toks reductions (* The following function turns a closed LR(1) state into a map of terminal symbols to reduction actions. Copied from a related function in [Lr0]. *) let reductions (s : Lr0.concretelr1state) : Production.index list TerminalMap.t = Item.Map.fold (fun item toks reductions -> match Item.classify item with | Item.Reduce prod -> addl prod toks reductions | Item.Shift _ -> reductions ) s TerminalMap.empty (* The following function turns a closed LR(1) state into a set of shift actions. *) let transitions (s : Lr0.concretelr1state) : TerminalSet.t = Item.Map.fold (fun item _ transitions -> match Item.classify item with | Item.Shift (Symbol.T tok, _) -> TerminalSet.add tok transitions | Item.Shift (Symbol.N _, _) | Item.Reduce _ -> transitions ) s TerminalSet.empty (* This function computes the domain of a terminal map, producing a terminal set. *) let domain (m : 'a TerminalMap.t) : TerminalSet.t = TerminalMap.fold (fun tok _ accu -> TerminalSet.add tok accu ) m TerminalSet.empty (* The following function checks whether a closed LR(1) state is free of conflicts. *) let state_is_ok (s : Lr0.concretelr1state) : bool = let reductions = reductions s and transitions = transitions s in (* Check for shift/reduce conflicts. *) TerminalSet.disjoint transitions (domain reductions) && (* Check for reduce/reduce conflicts. *) TerminalMap.fold (fun _ prods ok -> ok && match prods with | [] | [ _ ] -> true | _ :: _ :: _ -> false ) reductions true (* The following function counts the number of states in the SLR(1) automaton that have a conflict. *) let count_slr_violations () : int = let count = ref 0 in for s = 0 to Lr0.n - 1 do let s = make_slr_state s in if not (state_is_ok s) then begin incr count; if tell_me_everything then Printf.fprintf stderr "The following SLR(1) state has a conflict:\n%s" (Lr0.print_concrete s) end done; !count (* At log level 1, indicate whether the grammar is SLR(1). *) let () = Error.logG 1 (fun f -> let count = count_slr_violations() in if count = 0 then Printf.fprintf f "The grammar is SLR(1).\n" else Printf.fprintf f "The grammar is not SLR(1) -- %d states have a conflict.\n" count ) menhir-20130116/src/conflict.ml0000644000175000017500000003725312075533603015223 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar (* -------------------------------------------------------------------------- *) (* Our output channel. *) let out = lazy (open_out (Settings.base ^ ".conflicts")) (* -------------------------------------------------------------------------- *) (* Explaining shift actions. *) (* The existence of a shift action stems from the existence of a shift item in the LR(0) core that underlies the LR(1) state of interest. That is, lookahead sets are not relevant. The existence of a shift item in the LR(0) core is explained by finding a path from a start item to the shift item in the LR(0) nondeterministic automaton, such that the symbols read along this path form the (previously fixed) symbol string that leads to the conflict state in the LR(1) automaton. There may be several such paths: a shortest one is chosen. There may also be several shift items in the conflict state: an arbitrary one is chosen. I believe it would not be interesting to display traces for several shift items: they would be identical except in their last line (where the desired shift item actually appears). *) (* Symbolic execution of the nondeterministic LR(0) automaton. *) (* Configurations are pairs of an LR(0) item and an offset into the input string, which indicates how much has been read so far. *) type configuration0 = Item.t * int (* This function builds a derivation out of a (nonempty, reversed) sequence of configurations. The derivation is constructed from bottom to top, that is, beginning at the last configuration and moving back towards to the start configuration. *) let rec follow derivation offset' = function | [] -> assert (offset' = 0); derivation | (item, offset) :: configs -> let _, _, rhs, pos, _ = Item.def item in let derivation = if offset = offset' then (* This is an epsilon transition. Put a new root node on top of the existing derivation. *) Derivation.build pos rhs derivation None else (* This was a shift transition. Tack symbol in front of the forest. *) Derivation.prepend rhs.(pos) derivation in follow derivation offset configs (* Symbolic execution begins with a start item (corresponding to one of the automaton's entry nodes), a fixed string of input symbols, to be fully consumed, and a goal item. The objective is to find a path through the automaton that leads from the start configuration [(stop, 0)] to the goal configuration [(stop, n)], where [n] is the length of the input string. The automaton is explored via breadth-first search. A hash table is used to record which configurations have been visited and to build a spanning tree of shortest paths. *) exception Done let explain_shift_item (start : Item.t) (input : Symbol.t array) (stop : Item.t) : Derivation.t = let n = Array.length input in let table : (configuration0, configuration0 option) Hashtbl.t = Hashtbl.create 1023 in let queue : configuration0 Queue.t = Queue.create() in let enqueue ancestor config = try let _ = Hashtbl.find table config in () with Not_found -> Hashtbl.add table config ancestor; Queue.add config queue in enqueue None (start, 0); try Misc.qiter (function (item, offset) as config -> (* If the item we're looking at is the goal item and if we have read all of the input symbols, stop. *) if (Item.equal item stop) && (offset = n) then raise Done; (* Otherwise, explore the transitions out of this item. *) let prod, _, rhs, pos, length = Item.def item in (* Shift transition, followed only if the symbol matches the symbol found in the input string. *) if (pos < length) && (offset < n) && (Symbol.equal rhs.(pos) input.(offset)) then begin let config' = (Item.import (prod, pos+1), offset+1) in enqueue (Some config) config' end; (* Epsilon transitions. *) if pos < length then match rhs.(pos) with | Symbol.N nt -> Production.iternt nt (fun prod -> let config' = (Item.import (prod, 0), offset) in enqueue (Some config) config' ) | Symbol.T _ -> () ) queue; assert false with Done -> (* We have found a (shortest) path from the start configuration to the goal configuration. Turn it into an explicit derivation. *) let configs = Misc.materialize table (stop, n) in let _, _, rhs, pos, _ = Item.def stop in let derivation = Derivation.tail pos rhs in let derivation = follow derivation n configs in derivation (* -------------------------------------------------------------------------- *) (* Explaining reduce actions. *) (* The existence of a reduce action stems from the existence of a reduce item, whose lookahead set contains the token of interest, in the state of interest. Here, lookahead sets are relevant only insofar as they contain or do not contain the token of interest -- in other words, lookahead sets can be abstracted by Boolean values. The existence of the reduce item is explained by finding a path from a start item to the reduce item in the LR(1) nondeterministic automaton, such that the symbols read along this path form the (previously fixed) symbol string that leads to the conflict state in the LR(1) automaton. There may be several such paths: a shortest one is chosen. *) (* Symbolic execution of the nondeterministic LR(1) automaton. *) (* Configurations are pairs of an LR(1) item and an offset into the input string, which indicates how much has been read so far. An LR(1) item is itself represented as the combination of an LR(0) item and a Boolean flag, telling whether the token of interest appears or does not appear in the lookahead set. *) type configuration1 = Item.t * bool * int (* This function builds a derivation out of a sequence of configurations. The end of the sequence is dealt with specially -- we want to explain how the lookahead symbol appears and is inherited. Once that is done, the rest (that is, the beginning) of the derivation is dealt with as above. *) let config1toconfig0 (item, _, offset) = (item, offset) let rec follow1 tok derivation offset' = function | [] -> assert (Terminal.equal tok Terminal.sharp); (* One could emit a comment saying that the lookahead token is initially [#]. That comment would have to be displayed above the derivation, though, and there is no support for that at the moment, so let's skip it. *) derivation | (item, _, offset) :: configs -> let _, _, rhs, pos, length = Item.def item in if offset = offset' then (* This is an epsilon transition. Attack a new line and add a comment that explains why the lookahead symbol is produced or inherited. *) let nullable, first = Analysis.nullable_first_rhs rhs (pos + 1) in if TerminalSet.mem tok first then (* The lookahead symbol is produced (and perhaps also inherited, but let's ignore that). *) let e = Analysis.explain_first_rhs tok rhs (pos + 1) in let comment = "lookahead token appears" ^ (if e = "" then "" else " because " ^ e) in let derivation = Derivation.build pos rhs derivation (Some comment) in (* Print the rest of the derivation without paying attention to the lookahead symbols. *) follow derivation offset (List.map config1toconfig0 configs) else begin (* The lookahead symbol is not produced, so it is definitely inherited. *) assert nullable; let comment = "lookahead token is inherited" ^ (if pos + 1 < length then Printf.sprintf " because %scan vanish" (Symbol.printao (pos + 1) rhs) else "") in let derivation = Derivation.build pos rhs derivation (Some comment) in follow1 tok derivation offset configs end else (* This is a shift transition. Tack symbol in front of forest. *) let derivation = Derivation.prepend rhs.(pos) derivation in follow1 tok derivation offset configs (* Symbolic execution is performed in the same manner as above. *) let explain_reduce_item (tok : Terminal.t) (start : Item.t) (input : Symbol.t array) (stop : Item.t) : Derivation.t = let n = Array.length input in let table : (configuration1, configuration1 option) Hashtbl.t = Hashtbl.create 1023 in let queue : configuration1 Queue.t = Queue.create() in let enqueue ancestor config = try let _ = Hashtbl.find table config in () with Not_found -> Hashtbl.add table config ancestor; Queue.add config queue in (* If the lookahead token is #, then it initially appear in the lookahead set, otherwise it doesn't. *) enqueue None (start, Terminal.equal tok Terminal.sharp, 0); try Misc.qiter (function (item, lookahead, offset) as config -> (* If the item we're looking at is the goal item and if we have read all of the input symbols, stop. *) if (Item.equal item stop) && lookahead && (offset = n) then raise Done; (* Otherwise, explore the transitions out of this item. *) let prod, nt, rhs, pos, length = Item.def item in (* Shift transition, followed only if the symbol matches the symbol found in the input string. *) if (pos < length) && (offset < n) && (Symbol.equal rhs.(pos) input.(offset)) then begin let config' = (Item.import (prod, pos+1), lookahead, offset+1) in enqueue (Some config) config' end; (* Epsilon transitions. *) if pos < length then match rhs.(pos) with | Symbol.N nt -> let nullable, first = Analysis.nullable_first_rhs rhs (pos + 1) in let first : bool = TerminalSet.mem tok first in let lookahead' = if nullable then first || lookahead else first in Production.iternt nt (fun prod -> let config' = (Item.import (prod, 0), lookahead', offset) in enqueue (Some config) config' ) | Symbol.T _ -> () ) queue; assert false with Done -> (* We have found a (shortest) path from the start configuration to the goal configuration. Turn it into an explicit derivation. *) let configs = Misc.materialize table (stop, true, n) in let derivation = Derivation.empty in let derivation = follow1 tok derivation n configs in derivation (* -------------------------------------------------------------------------- *) (* Putting it all together. *) let () = if Settings.explain then begin Lr1.conflicts (fun toks node -> (* Construct a partial LR(1) automaton, looking for a conflict in a state that corresponds to this node. Because Pager's algorithm can merge two states as soon as one of them has a conflict, we can't be too specific about the conflict that we expect to find in the canonical automaton. So, we must supply a set of conflict tokens and accept any kind of conflict that involves one of them. *) (* TEMPORARY with the new compatibility criterion, we can be sure that every conflict token is indeed involved in a conflict. Exploit that? Avoid focusing on a single token? *) let module P = Lr1partial.Run (struct let tokens = toks let goal = node end) in let closure = Lr0.closure P.goal in (* Determine what kind of conflict was found. *) let shift, reduce = Item.Map.fold (fun item toks (shift, reduce) -> match Item.classify item with | Item.Shift (Symbol.T tok, _) when Terminal.equal tok P.token -> shift + 1, reduce | Item.Reduce prod when TerminalSet.mem P.token toks -> shift, reduce + 1 | _ -> shift, reduce ) closure (0, 0) in let kind = if (shift > 0) && (reduce > 1) then "shift/reduce/reduce" else if (shift > 0) then "shift/reduce" else "reduce/reduce" in (* Explain how the conflict state is reached. *) let out = Lazy.force out in Printf.fprintf out "\n\ ** Conflict (%s) in state %d.\n\ ** Token%s involved: %s\n%s\ ** This state is reached from %s after reading:\n\n%s\n" kind (Lr1.number node) (if TerminalSet.cardinal toks > 1 then "s" else "") (TerminalSet.print toks) (if TerminalSet.cardinal toks > 1 then Printf.sprintf "** The following explanations concentrate on token %s.\n" (Terminal.print P.token) else "") (Nonterminal.print false (Item.startnt P.source)) (Symbol.printa P.path); (* Examine the items in that state, focusing on one particular token. Out of the shift items, we explain just one -- this seems enough. We explain each of the reduce items. *) (* First, build a mapping of items to derivations. *) let (_ : bool), derivations = Item.Map.fold (fun item toks (still_looking_for_shift_item, derivations) -> match Item.classify item with | Item.Shift (Symbol.T tok, _) when still_looking_for_shift_item && (Terminal.equal tok P.token) -> false, let derivation = explain_shift_item P.source P.path item in Item.Map.add item derivation derivations | Item.Reduce prod when TerminalSet.mem P.token toks -> still_looking_for_shift_item, let derivation = explain_reduce_item P.token P.source P.path item in Item.Map.add item derivation derivations | _ -> still_looking_for_shift_item, derivations ) closure (true, Item.Map.empty) in (* Factor out the common context among all derivations, so as to avoid repeating it. This helps prevent derivation trees from drifting too far away towards the right. It also helps produce sub-derivations that are quite compact. *) let context, derivations = Derivation.factor derivations in (* Display the common context. *) Printf.fprintf out "\n** The derivations that appear below have the following common factor:\ \n** (The question mark symbol (?) represents the spot where the derivations begin to differ.)\n\n"; Derivation.printc out context; (* Then, display the sub-derivations. *) Item.Map.iter (fun item derivation -> Printf.fprintf out "\n** In state %d, looking ahead at %s, " (Lr1.number node) (Terminal.print P.token); begin match Item.classify item with | Item.Shift _ -> Printf.fprintf out "shifting is permitted\n** because of the following sub-derivation:\n\n" | Item.Reduce prod -> Printf.fprintf out "reducing production\n** %s\n** is permitted because of the following sub-derivation:\n\n" (Production.print prod) end; Derivation.print out derivation ) derivations; flush out ); Time.tick "Explaining conflicts" end (* ------------------------------------------------------------------------ *) (* Resolve the conflicts that remain in the automaton. *) let () = Lr1.default_conflict_resolution(); Time.tick "Resolving remaining conflicts" menhir-20130116/src/unionFind.mli0000644000175000017500000000501612075533603015514 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: unionFind.mli,v 1.5 2005/12/01 16:20:07 regisgia Exp $ *) (** This module implements a simple and efficient union/find algorithm. See Robert E. Tarjan, ``Efficiency of a Good But Not Linear Set Union Algorithm'', JACM 22(2), 1975. *) (** The abstraction defined by this module is a set of points, partitioned into equivalence classes. With each equivalence class, a piece of information, of abstract type ['a], is associated; we call it a descriptor. *) type 'a point (** [fresh desc] creates a fresh point and returns it. It forms an equivalence class of its own, whose descriptor is [desc]. *) val fresh: 'a -> 'a point (** [find point] returns the descriptor associated with [point]'s equivalence class. *) val find: 'a point -> 'a (** [union point1 point2] merges the equivalence classes associated with [point1] and [point2] (which must be distinct) into a single class whose descriptor is that originally associated with [point2]. *) val union: 'a point -> 'a point -> unit (** [equivalent point1 point2] tells whether [point1] and [point2] belong to the same equivalence class. *) val equivalent: 'a point -> 'a point -> bool (** [eunion point1 point2] is identical to [union], except it does nothing if [point1] and [point2] are already equivalent. *) val eunion: 'a point -> 'a point -> unit (** [redundant] maps all members of an equivalence class, but one, to [true]. *) val redundant: 'a point -> bool (** [change p d] updates the descriptor of [p] to [d]. *) val change: 'a point -> 'a -> unit menhir-20130116/src/codeBackend.ml0000644000175000017500000016751212075533603015606 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* The code generator. *) (* TEMPORARY env.startp seems to be always equal to env.lexbuf.lex_start_p, and similarly for env.endp. Is there a point to copying these positions to the env record? Maybe just making these positions accessible via a single indirection, instead of two? I forget. *) module Run (T : sig end) = struct open Grammar open IL open CodeBits open CodePieces open TokenType open Interface (* ------------------------------------------------------------------------ *) (* Here is a description of our code generation mechanism. Every internal function that we produce is parameterized by the parser environment [env], which contains (pointers to) the lexer, the lexing buffer, the last token read, etc. No global variables are exploited, so our parsers are reentrant. The functions that we export do not expect an environment as a parameter; they create a fresh one when invoked. Every state [s] is translated to a [run] function and an [action] function. To a first approximation, the only parameter of the [run] function, besides [env], is the stack. However, in some cases (consult the predicate [runpushes]), the top stack cell is not yet allocated when [run s] is called. The cell's contents are passed as extra parameters, and it is [run]'s responsibility to allocate that cell. (When [run] is the target of a shift transition, the position parameters [startp] and [endp] are redundant with the [env] parameter, because they are always equal to [env.startp] and [env.endp]. However, this does not appear to make a great difference in terms of code size, and makes our life easier, so we do not attempt to eliminate this redundancy.) The first thing in [run] is to discard a token, if the state was entered through a shift transition, and to peek at the lookahead token. When the current token is to be discarded, the [discard] function is invoked. It discards the current token, invokes the lexer to obtain a new token, and returns the latter. When we only wish to peek at the current token, without discarding it, we simply read [env.token]. (We have to be careful in cases where the current lookahead token might be [error], since, in those cases, [env.token] is meaningless; see below.) Once the lookahead token is obtained, [run] calls [action]. The parameters of [action] are the stack and the lookahead token. [action] performs a case analysis of the lookahead token. Each branch performs one of the following. In shift branches, control is dispatched to another [run] function, with appropriate parameters, typically the current stack plus the information that should go into the new top stack cell (a state, a semantic value, locations). In reduce branches, a [reduce] function is invoked. In the default branch, error handling is initiated (see below). The [reduce] function associated with production [prod] pops as many stack cells as necessary, retrieving semantic values and the state [s] that initiated the reduction. It then evaluates the semantic action, which yields a new semantic value. (This is the only place where semantic actions are evaluated, so that semantic actions are never duplicated.) It then passes control on to the [goto] function associated with the nonterminal [nt], where [nt] is the left-hand side of the production [prod]. The [goto] function associated with nonterminal [nt] expects just one parameter besides the environment -- namely, the stack. However, in some cases (consult the predicate [gotopushes]), the top stack cell is not allocated yet, so its contents are passed as extra parameters. In that case, [goto] first allocates that cell. Then, it examines the state found in that cell and performs a goto transition, that is, a shift transition on the nonterminal symbol [nt]. This simply consists in passing control to the [run] function associated with the transition's target state. If this case analysis only has one branch, because all transitions for [nt] lead to the same target state, then no case analysis is required. In principle, a stack cell contains a state, a semantic value, and start and end positions. However, the state can be omitted if it is never consulted by a [goto] function. The semantic value can be omitted if it is associated with a token that was declared not to carry a semantic value. (One could also omit semantic values for nonterminals whose type was declared to be [unit], but that does not seem very useful.) The start or end position can be omitted if they are associated with a symbol that does not require keeping track of positions. When all components of a stack cell are omitted, the entire cell disappears, so that no memory allocation is required. For each start symbol [nt], an entry point function, named after [nt], is generated. Its parameters are a lexer and a lexing buffer. The function allocates and initializes a parser environment and transfers control to the appropriate [run] function. Our functions are grouped into one huge [let rec] definition. The inliner, implemented as a separate module, will inline functions that are called at most once, remove dead code (although there should be none or next to none), and possibly perform other transformations. I note that, if a state can be entered only through (nondefault) reductions, then, in that state, the lookahead token must be a member of the set of tokens that allow these reductions, and by construction, there must exist an action on that token in that state. Thus, the default branch (which signals an error when the lookahead token is not a member of the expected set) is in fact dead. It would be nice (but difficult) to exploit types to prove that. However, one could at least replace the code of that branch with a simple [assert false]. TEMPORARY do it *) (* ------------------------------------------------------------------------ *) (* Here is a description of our error recovery mechanism. With every state [s], we associate an [error] function. If [s] is willing to act when the lookahead token is [error], then this function tells how. This includes *both* shift *and* reduce actions. (For some reason, yacc/ocamlyacc/mule/bison can only shift on [error].) If [s] is unable to act when the lookahead token is [error], then this function pops a stack cell, extracts a state [s'] out of it, and transfers control, via a global [errorcase] dispatch function, to the [error] function associated with [s']. (Because some stack cells do not physically hold a state, this description is somewhat simpler than the truth, but that's the idea.) When an error is detected in state [s], one of two things happens (see [initiate]). a. If [s] can do error recovery and if no token was successfully shifted since the last [error] token was shifted, then the current token is discarded and the current state remains unchanged, that is, the [action] function associated with [s] is re-entered. b. Otherwise, the [error] function associated with [s] is invoked. In case (b), immediately before invoking the [error] function, the counter [env.shifted] is reset to -1. By convention, this means that the current token is discarded and replaced with an [error] token. The [error] token transparently inherits the positions associated with the underlying concrete token. Whenever we attempt to consult the current token, we check whether [env.shifted] is -1 and, if that is the case, resume error handling by calling the [error] function associated with the current state. This allows a series of reductions to correctly take place when the lookahead token is [error]. In many states, though, it is possible to statically prove that [env.shifted] cannot be -1. In that case, we produce a lookup of [env.token] without checking [env.shifted]. The counter [env.shifted] is incremented when a token is shifted. In particular, immediately after the [error] token is shifted, [env.shifted] is zero. The increment is conditional, so as to avoid overflow. It is performed inside [discard]. States with default reductions perform a reduction regardless of the current lookahead token, which can be either [error] or a regular token. A question that bothered me for a while was, when unwinding the stack, do we stop at a state that has a default reduction? Should it be considered able to handle the error token? I now believe that the answer is, this cannot happen. Indeed, if a state has a default reduction, then, whenever it is entered, reduction is performed and that state is exited, which means that it is never pushed onto the stack. So, it is fine to consider that a state with a default reduction is unable to handle errors. I note that a state that can handle [error] and has a default reduction must in fact have a reduction action on [error]. A state that can perform error recovery (that is, a state whose incoming symbol is [error]) never performs a default reduction. The reason why this is so is given in [Invariant]. A consequence of this decision is that reduction is not performed until error recovery is successful. This behavior could be surprising if it were the default behavior; however, recall that error recovery is disabled unless [--error-recovery] was specified. When an error is detected and an error production is reduced, the user might like to know how recent the previous error was, so as (for instance) to suppress diagnostic messages if it was too recent. (yacc and ocamlyacc have their own, hard-wired, idiosyncratic mechanism for that.) We provide access to this information as follows. When a new error is detected and [env.shifted] is set to -1, the previous value of [env.shifted] is saved to [env.previouserror]. Thus, the number of tokens that were shifted between the two errors is recorded. This information is then made available to the user via the $previouserror keyword. I note that error recovery, case (a) above, can cause the parser to enter an infinite loop. Indeed, the token stream is in principle infinite -- for instance, many lexers will return an EOF token forever after some finite supply of tokens has been exhausted. If we hit EOF while in error recovery mode, and if EOF is not accepted at the current state, we will keep discarding EOF and asking for a new token. The way out of this situation is to design the grammar in such a way that it cannot happen. We provide a warning to help with this task. *) (* The type of environments. *) let tcenv = env let tenv = TypApp (tcenv, []) (* The [assertfalse] function. We have just one of these, in order to save code size. It should become unnecessary when we add GADTs. *) let assertfalse = prefix "fail" (* The [discard] function. *) let discard = prefix "discard" (* The [initenv] function. *) let initenv = prefix "init" (* The [run] function associated with a state [s]. *) let run s = prefix (Printf.sprintf "run%d" (Lr1.number s)) (* The [action] function associated with a state [s]. *) let action s = prefix (Printf.sprintf "action%d" (Lr1.number s)) (* The [goto] function associated with a nonterminal [nt]. *) let goto nt = prefix (Printf.sprintf "goto_%s" (Nonterminal.print true nt)) (* The [reduce] function associated with a production [prod]. *) let reduce prod = prefix (Printf.sprintf "reduce%d" (Production.p2i prod)) (* The [errorcase] function. *) let errorcase = prefix "errorcase" (* The [error] function associated with a state [s]. *) let error s = prefix (Printf.sprintf "error%d" (Lr1.number s)) (* The constant associated with a state [s]. *) let statecon s = dataprefix (Printf.sprintf "State%d" (Lr1.number s)) let estatecon s = EData (statecon s, []) let rec begins_with s1 s2 i1 i2 n1 n2 = if i1 = n1 then true else if i2 = n2 then false else if String.unsafe_get s1 i1 = String.unsafe_get s2 i2 then begins_with s1 s2 (i1 + 1) (i2 + 1) n1 n2 else false let begins_with s1 s2 = begins_with s1 s2 0 0 (String.length s1) (String.length s2) (* This predicate tells whether a data constructor represents a state. It is based on the name, which is inelegant and inefficient. TEMPORARY *) let is_statecon : string -> bool = begins_with (dataprefix "State") let pstatecon s = PData (statecon s, []) let pstatescon ss = POr (List.map pstatecon ss) (* The type of states. *) let tcstate = prefix "state" let tstate = TypApp (tcstate, []) (* The [print_token] function. This automatically generated function is used in [--trace] mode. *) let print_token = prefix "print_token" (* Fields in the environment record. *) let flexer = prefix "lexer" let flexbuf = prefix "lexbuf" let ftoken = prefix "token" let fshifted = prefix "shifted" let fstartp = prefix "startp" let fendp = prefix "endp" let fpreviouserror = prefix "previouserror" (* The type variable that represents the stack tail. *) let tvtail = tvprefix "tail" let ttail = TypVar tvtail (* The result type for every function. TEMPORARY *) let tvresult = tvprefix "return" let tresult = TypVar tvresult (* ------------------------------------------------------------------------ *) (* Helpers for code production. *) let concatif condition xs = if condition then xs else [] let insertif condition x = if condition then [ x ] else [] let var x : expr = EVar x let vars xs = List.map var xs let pvar x : pattern = PVar x let magic e : expr = EMagic e let nomagic e = e (* [env.shifted] is either [-1], which means that we have an [error] token at the head of the token stream, or a nonnegative number. (The code in [discard], which increments [env.shifted], takes care to avoid overflow.) The following assertion checks that [env.shifted] is not [-1], that is, it is greater than or equal to [0]. Prior to 2011/01/24, two forms of this test co-existed, but it seems more uniform to have just one form. *) let assertshifted : pattern * expr = PUnit, EApp (EVar "assert", [ EApp (EVar "Pervasives.(<>)", [ ERecordAccess (EVar env, fshifted); EIntConst (-1) ]) ]) let etuple = function | [] -> assert false | [ e ] -> e | es -> ETuple es let ptuple = function | [] -> assert false | [ p ] -> p | ps -> PTuple ps let trace (format : string) (args : expr list) : (pattern * expr) list = if Settings.trace then [ PUnit, EApp (EVar "Printf.fprintf", (EVar "Pervasives.stderr") :: (EStringConst (format ^"\n%!")) :: args) ] else [] let tracecomment (comment : string) (body : expr) : expr = if Settings.trace then blet (trace comment [], body) else EComment (comment, body) let auto2scheme t = scheme [ tvtail; tvresult ] t (* ------------------------------------------------------------------------ *) (* Determine whether at least one semantic action mentions $previouserror. *) let previouserror_required : bool = Production.foldx (fun prod accu -> accu || Action.has_previouserror (Production.action prod) ) false (* ------------------------------------------------------------------------ *) (* Determine whether the [goto] function for nonterminal [nt] will push a new cell onto the stack. If it doesn't, then that job is delegated to the [run] functions called by [goto]. One could decide that [gotopushes] always returns true, and produce decent code. As a refinement, we decide to drive the [push] operation inside the [run] functions if all of them are able to eliminate this operation via shiftreduce optimization. This will be the case if all of these [run] functions implement a default reduction of a non-epsilon production. If that is not the case, then [gotopushes] returns true. In general, it is good to place the [push] operation inside [goto], because multiple [reduce] functions transfer control to [goto], and [goto] in turn transfers control to multiple [run] functions. Hence, this is where code sharing is maximal. All of the [run] functions that [goto] can transfer control to expect a stack cell of the same shape (indeed, the symbol [nt] is the same in every case, and the state is always represented), which makes this decision possible. *) let gotopushes : Nonterminal.t -> bool = Nonterminal.tabulate (fun nt -> not ( Lr1.targets (fun accu _ target -> accu && match Invariant.has_default_reduction target with | Some (prod, _) -> Production.length prod > 0 | None -> false ) true (Symbol.N nt) ) ) (* ------------------------------------------------------------------------ *) (* Determine whether the [run] function for state [s] will push a new cell onto the stack. Our convention is this. If this [run] function is entered via a shift transition, then it is in charge of pushing a new stack cell. If it is entered via a goto transition, then it is in charge of pushing a new cell if and only if the [goto] function that invoked it did not do so. Last, if this [run] function is invoked directly by an entry point, then it does not push a stack cell. *) let runpushes s = match Lr1.incoming_symbol s with | Some (Symbol.T _) -> true | Some (Symbol.N nt) -> not (gotopushes nt) | None -> false (* ------------------------------------------------------------------------ *) (* In some situations, we are able to fuse a shift (or goto) transition with a reduce transition, which means that we save the cost (in speed and in code size) of pushing and popping the top stack cell. This involves creating a modified version of the [reduce] function associated with a production [prod], where the contents of the top stack cell are passed as extra parameters. Because we wish to avoid code duplication, we perform this change only if all call sites for [reduce] agree on this modified calling convention. At the call site, the optimization is possible only if a stack cell allocation exists and is immediately followed by a call to [reduce]. This is the case inside the [run] function for state [s] when [run] pushes a stack cell and performs a default reduction. This optimization amounts to coalescing the push operation inside [run] with the pop operation that follows inside [reduce]. Unit production elimination, on the other hand, would coalesce the pop operation inside [reduce] with the push operation that follows inside [goto]. For this reason, the two are contradictory. As a result, we do not attempt to perform unit production elimination. In fact, we did implement it at one point and found that it was seldom applicable, because preference was given to the shiftreduce optimization. There are cases where shiftreduce optimization does not make any difference, for instance, if production [prod] is never reduced, or if the top stack cell is in fact nonexistent. *) let (shiftreduce : Production.index -> bool), shiftreducecount = Production.tabulateb (fun prod -> (* Check that this production pops at least one stack cell. *) Production.length prod > 0 && (* Check that all call sites push a stack cell and have a default reduction. *) Invariant.fold_reduced (fun s accu -> accu && (match Invariant.has_default_reduction s with None -> false | Some _ -> true) && (runpushes s) ) prod true ) let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d productions exploit shiftreduce optimization.\n" shiftreducecount Production.n) (* Check that, as predicted above, [gotopushes nt] returns [false] only when all of the [run] functions that follow it perform shiftreduce optimization. This can be proved as follows. If [gotopushes nt] returns [false], then every successor state [s] has a default reduction for some non-epsilon production [prod]. Furthermore, all states that can reduce [prod] must be successors of that same [goto] function: indeed, because the right-hand side of the production ends with symbol [nt], every state that can reduce [prod] must be entered through [nt]. So, at all such states, [runpushes] is true, which guarantees that [shiftreduce prod] is true as well. *) let () = assert ( Nonterminal.fold (fun nt accu -> accu && if gotopushes nt then true else Lr1.targets (fun accu _ target -> accu && match Invariant.has_default_reduction target with | Some (prod, _) -> shiftreduce prod | None -> false ) true (Symbol.N nt) ) true ) (* ------------------------------------------------------------------------ *) (* Type production. *) (* This is the type of states. Only states that are represented are declared. *) let statetypedef = { typename = tcstate; typeparams = []; typerhs = TDefSum ( Lr1.fold (fun defs s -> if Invariant.represented s then { dataname = statecon s; datavalparams = []; datatypeparams = None } :: defs else defs ) [] ); typeconstraint = None } (* This is the type of parser environments. *) let field modifiable name t = { modifiable = modifiable; fieldname = name; fieldtype = type2scheme t } let envtypedef = { typename = tcenv; typeparams = []; typerhs = TDefRecord ([ (* The lexer itself. *) field false flexer tlexer; (* The lexing buffer. *) field false flexbuf tlexbuf; (* The last token that was read from the lexer. This is the head of the token stream, unless [env.shifted] is [-1]. *) field true ftoken ttoken; (* The start position of the above token. *) field true fstartp tposition; (* The end position of the above token. *) field true fendp tposition; (* How many tokens were successfully shifted since the last [error] token was shifted. When this counter is -1, the head of the token stream is the [error] token, and the contents of the [token] field is irrelevant. The token following [error] is obtained by invoking the lexer again. *) field true fshifted tint; ] @ (* If at least one semantic action mentions $previouserror, then we keep track of this information. *) insertif previouserror_required (field true fpreviouserror tint) ); typeconstraint = None } (* [curry] curries the top stack cell in a type [t] of the form [(stack type) arrow (result type)]. [t] remains unchanged if the stack type does not make at least one cell explicit. *) let curry = function | TypArrow (TypTuple (tstack :: tcell), tresult) -> TypArrow (tstack, marrow tcell tresult) | TypArrow _ as t -> t | _ -> assert false (* [curryif true] is [curry], [curryif false] is the identity. *) let curryif flag t = if flag then curry t else t (* Types for stack cells. [celltype tailtype holds_state symbol] returns the type of a stack cell. The parameter [tailtype] is the type of the tail of the stack. The flag [holds_state] tells whether the cell holds a state. The parameter [symbol] is used to determine whether the cell holds a semantic value and what its type is. A subtlety here and in [curry] above is that singleton stack cells give rise to singleton tuple types, which the type printer eliminates, but which do exist internally. As a result, [curry] always correctly removes the top stack cell, even if it is a singleton tuple cell. *) let celltype tailtype holds_state symbol _ = TypTuple ( tailtype :: insertif holds_state tstate @ semvtype symbol @ insertif (Invariant.startp symbol) tposition @ insertif (Invariant.endp symbol) tposition ) (* Types for stacks. [stacktype s] is the type of the stack at state [s]. [reducestacktype prod] is the type of the stack when about to reduce production [prod]. [gotostacktype nt] is the type of the stack when the [goto] function associated with [nt] is called. In all cases, the tail (that is, the unknown part) of the stack is represented by [ttail], currently a type variable. These stack types are obtained by folding [celltype] over a description of the stack provided by module [Invariant]. *) let stacktype s = Invariant.fold celltype ttail (Invariant.stack s) let reducestacktype prod = Invariant.fold celltype ttail (Invariant.prodstack prod) let gotostacktype nt = Invariant.fold celltype ttail (Invariant.gotostack nt) (* The type of the [run] function. As announced earlier, if [s] is the target of shift transitions, the type of the stack is curried, that is, the top stack cell is not yet allocated, so its contents are passed as extra parameters. If [s] is the target of goto transitions, the top stack cell is allocated. If [s] is a start state, this issue makes no difference. *) let runtypescheme s = auto2scheme ( arrow tenv ( curryif (runpushes s) ( arrow (stacktype s) tresult ) ) ) (* The type of the [action] function. The top stack cell is not curried. There is an additional parameter of type [token]. *) let actiontypescheme s = auto2scheme (marrow [ tenv; stacktype s; ttoken ] tresult) (* The type of the [goto] function. The top stack cell is curried. *) let gototypescheme nt = auto2scheme (arrow tenv (curry (arrow (gotostacktype nt) tresult))) (* If [prod] is an epsilon production and if the [goto] function associated with it expects a state parameter, then the [reduce] function associated with [prod] also requires a state parameter. *) let reduce_expects_state_param prod = let nt = Production.nt prod in Production.length prod = 0 && Invariant.fold (fun _ holds_state _ _ -> holds_state) false (Invariant.gotostack nt) (* The type of the [reduce] function. If shiftreduce optimization is performed for this production, then the top stack cell is not explicitly allocated. *) let reducetypescheme prod = auto2scheme ( arrow tenv ( curryif (shiftreduce prod) ( arrow (reducestacktype prod) ( arrowif (reduce_expects_state_param prod) tstate tresult ) ) ) ) (* The type of the [errorcase] function. The shape of the stack is unknown, and is determined by examining the state parameter. *) let errorcasetypescheme = auto2scheme (marrow [ tenv; ttail; tstate ] tresult) (* The type of the [error] function. The shape of the stack is the one associated with state [s]. *) let errortypescheme s = auto2scheme ( marrow [ tenv; stacktype s ] tresult) (* ------------------------------------------------------------------------ *) (* Code production preliminaries. *) (* This flag will be set to [true] if we ever raise the [Error] exception. This happens when we unwind the entire stack without finding a state that can handle errors. *) let can_die = ref false (* A code pattern for an exception handling construct where both alternatives are in tail position. Concrete syntax for this would be [let x = e in e1 unless Error -> e2]. Since Objective Caml does not support this construct, we emulate it using a combination of [try/with], [match/with], and an [option] value. *) let letunless e x e1 e2 = EMatch ( ETry ( EData ("Some", [ e ]), [ { branchpat = PData (excname, []); branchbody = EData ("None", []) } ] ), [ { branchpat = PData ("Some", [ PVar x ]); branchbody = e1 }; { branchpat = PData ("None", []); branchbody = e2 } ] ) (* ------------------------------------------------------------------------ *) (* Calling conventions. *) (* The contents of a stack cell, exposed as individual parameters. The choice of identifiers is suitable for use in the definition of [run]. *) let runcellparams var holds_state symbol = insertif holds_state (var state) @ symval symbol (var semv) @ insertif (Invariant.startp symbol) (var startp) @ insertif (Invariant.endp symbol) (var endp) (* The contents of a stack cell, exposed as individual parameters, again. The choice of identifiers is suitable for use in the definition of a [reduce] function. [prod] is the production's index. The integer [i] tells which symbol on the right-hand side we are focusing on, that is, which symbol this stack cell is associated with. *) let reducecellparams prod i holds_state symbol = let ids = Production.identifiers prod and used = Production.used prod in (* If the semantic value is used in the semantic action, then it is bound to the variable [ids.(i)]. If the semantic value is not used in the semantic action, then it is dropped using a wildcard pattern. *) let semvpat t = if used.(i) then PVar ids.(i) else PWildcard in insertif holds_state (if i = 0 then PVar state else PWildcard) @ symvalt symbol semvpat @ insertif (Invariant.startp symbol) (PVar (Printf.sprintf "_startpos_%s_" ids.(i))) @ insertif (Invariant.endp symbol) (PVar (Printf.sprintf "_endpos_%s_" ids.(i))) (* The contents of a stack cell, exposed as individual parameters, again. The choice of identifiers is suitable for use in the definition of [error]. *) let errorcellparams (i, pat) holds_state symbol _ = i + 1, ptuple ( pat :: insertif holds_state (if i = 0 then PVar state else PWildcard) @ symval symbol PWildcard @ insertif (Invariant.startp symbol) PWildcard @ insertif (Invariant.endp symbol) PWildcard ) (* Calls to [run]. *) let runparams magic var s = var env :: magic (var stack) :: concatif (runpushes s) (Invariant.fold_top (runcellparams var) [] (Invariant.stack s)) let call_run s actuals = EApp (EVar (run s), actuals) (* Calls to [action]. *) let actionparams var = [ var env; var stack; var token ] let call_action s = EApp (EVar (action s), actionparams var) (* The parameters to [reduce]. When shiftreduce optimization is in effect, the top stack cell is not allocated, so extra parameters are required. Note that [shiftreduce prod] and [reduce_expects_state_param prod] are mutually exclusive conditions, so the [state] parameter is never bound twice. *) let reduceparams prod = PVar env :: PVar stack :: concatif (shiftreduce prod) ( Invariant.fold_top (reducecellparams prod (Production.length prod - 1)) [] (Invariant.prodstack prod) ) @ insertif (reduce_expects_state_param prod) (PVar state) (* Calls to [reduce]. One must specify the production [prod] as well as the current state [s]. *) let call_reduce prod s = let actuals = (EVar env) :: (EMagic (EVar stack)) :: concatif (shiftreduce prod) (Invariant.fold_top (runcellparams var) [] (Invariant.stack s)) (* compare with [runpushcell s] *) @ insertif (reduce_expects_state_param prod) (estatecon s) in EApp (EVar (reduce prod), actuals) (* Calls to [goto]. *) let gotoparams var nt = var env :: var stack :: Invariant.fold_top (runcellparams var) [] (Invariant.gotostack nt) let call_goto nt = EApp (EVar (goto nt), gotoparams var nt) (* Calls to [errorcase]. *) let errorcaseparams magic var = [ var env; magic (var stack); var state ] let call_errorcase = EApp (EVar errorcase, errorcaseparams magic var) (* Calls to [error]. *) let errorparams magic var = [ var env; magic (var stack) ] let call_error magic s = EApp (EVar (error s), errorparams magic var) let call_error_via_errorcase magic s = (* TEMPORARY document *) if Invariant.represented s then EApp (EVar errorcase, [ var env; magic (var stack); estatecon s ]) else call_error magic s (* Calls to [assertfalse]. *) let call_assertfalse = EApp (EVar assertfalse, [ EVar "()" ]) (* ------------------------------------------------------------------------ *) (* Emit a warning when a state can do error recovery but does not accept EOF. This can lead to non-termination if the end of file is reached while attempting to recover from an error. *) let check_recoverer covered s = match Terminal.eof with | None -> (* We do not know which token represents the end of file, so we say nothing. *) () | Some eof -> if not (TerminalSet.mem eof covered) then (* This state has no (shift or reduce) action at EOF. *) Error.warning [] (Printf.sprintf "state %d can perform error recovery, but does not accept EOF.\n\ ** Hitting the end of file during error recovery will cause non-termination." (Lr1.number s)) (* ------------------------------------------------------------------------ *) (* Code production for the automaton functions. *) (* Count how many states actually perform error recovery. This figure is, in general, inferior or equal to the number of states at which [Invariant.recoverer] is true. Indeed, some of these states have a default reduction, while some will accept every token; in either case, error recovery is not performed. *) let recoverers = ref 0 (* Count how many states actually can peek at an error recovery. This figure is, in general, inferior or equal to the number of states at which [Invariant.errorpeeker] is true, because some of these states have a default reduction and will not consult the lookahead token. *) let errorpeekers = ref 0 (* Code for calling the reduction function for token [prod] upon finding a token within [toks]. This produces a branch, to be inserted in an [action] function for state [s]. *) let reducebranch toks prod s = { branchpat = tokspat toks; branchbody = call_reduce prod s } (* Code for shifting from state [s] to state [s'] via the token [tok]. This produces a branch, to be inserted in an [action] function for state [s]. The callee, [run s'], is responsible for taking the current token off the input stream. (There is actually a case where the token is *not* taken off the stream: when [s'] has a default reduction on [#].) It is also responsible for pushing a new stack cell. The rationale behind this decision is that there may be multiple shift transitions into [s'], so we actually share that code by placing it inside [run s'] rather than inside every transition. *) let shiftbranchbody s tok s' = (* Construct the actual parameters for [run s']. *) let actuals = (EVar env) :: (EMagic (EVar stack)) :: Invariant.fold_top (fun holds_state symbol -> assert (Symbol.equal (Symbol.T tok) symbol); insertif holds_state (estatecon s) @ tokval tok (EVar semv) @ insertif (Invariant.startp symbol) (ERecordAccess (EVar env, fstartp)) @ insertif (Invariant.endp symbol) (ERecordAccess (EVar env, fendp)) ) [] (Invariant.stack s') in (* Call [run s']. *) tracecomment (Printf.sprintf "Shifting (%s) to state %d" (Terminal.print tok) (Lr1.number s')) (call_run s' actuals) let shiftbranch s tok s' = assert (not (Terminal.pseudo tok)); { branchpat = PData (tokenprefix (Terminal.print tok), tokval tok (PVar semv)); branchbody = shiftbranchbody s tok s' } (* This generates code for pushing a new stack cell upon entering the [run] function for state [s]. *) let runpushcell s e = if runpushes s then let contents = var stack :: Invariant.fold_top (runcellparams var) [] (Invariant.stack s) in mlet [ pvar stack ] [ etuple contents ] e else e let runpushcellunless shiftreduce s e = if shiftreduce then EComment ("Not allocating top stack cell", e) else runpushcell s e (* This generates code for dealing with the lookahead token upon entering the [run] function for state [s]. If [s] is the target of a shift transition, then we must take the current token (which was consumed in the shift transition) off the input stream. Whether [s] was entered through a shift or a goto transition, we want to peek at the next token, unless we are performing a default reduction. The parameter [defred] tells which default reduction, if any, we are about to perform. *) let gettoken s defred e = match Lr1.incoming_symbol s, defred with | Some (Symbol.T _), Some (_, toks) when TerminalSet.mem Terminal.sharp toks -> assert (TerminalSet.cardinal toks = 1); (* There is a default reduction on token [#]. We cannot request the next token, since that might drive the lexer off the end of the input stream, so we cannot call [discard]. Do nothing. *) e | Some (Symbol.T _), Some _ -> (* There is some other default reduction. Discard the first input token. *) blet ([ PWildcard, EApp (EVar discard, [ EVar env ]) ], e) | Some (Symbol.T _), None -> (* There is no default reduction. Discard the first input token and peek at the next one. *) blet ([ PVar token, EApp (EVar discard, [ EVar env ]) ], e) | (Some (Symbol.N _) | None), Some _ -> (* There is some default reduction. Do not peek at the input token. *) e | (Some (Symbol.N _) | None), None -> (* There is no default reduction. Peek at the first input token, without taking it off the input stream. This is normally done by reading [env.token], unless the token might be [error]: then, we check [env.shifted] first. *) if Invariant.errorpeeker s then begin incr errorpeekers; EIfThenElse ( EApp (EVar "Pervasives.(=)", [ ERecordAccess (EVar env, fshifted); EIntConst (-1) ]), tracecomment "Resuming error handling" (call_error_via_errorcase magic s), blet ([ PVar token, ERecordAccess (EVar env, ftoken) ], e) ) end else blet ([ assertshifted; PVar token, ERecordAccess (EVar env, ftoken) ], e) (* This produces the definition of a [run] function. *) let rundef s body = let body = tracecomment (Printf.sprintf "State %d:" (Lr1.number s)) body in { valpublic = false; valpat = PVar (run s); valval = EAnnot (EFun (runparams nomagic pvar s, body), runtypescheme s) } (* This produces the definition of an [action] function. *) let actiondef s body = { valpublic = false; valpat = PVar (action s); valval = EAnnot (EFun (actionparams pvar, body), actiontypescheme s) } (* This produces the comment attached with a default reduction. *) let defaultreductioncomment toks e = EPatComment ( "Reducing without looking ahead at ", tokspat toks, e ) (* This produces some bookkeeping code that is used when initiating error handling. First, we copy [env.shifted] to [env.previouserror]. Of course, this is done only if at least one semantic action uses the [$previouserror] keyword. Then, we reset the count of tokens shifted since the last error to -1, so that it becomes zero *after* the error token itself is shifted. By convention, when [shifted] is -1, the field [env.token] becomes meaningless and one considers that the first token on the input stream is [error]. As a result, the next peek at the lookahead token will cause error handling to be resumed. The next call to [discard] will take the [error] token off the input stream and increment [env.shifted] to zero. *) let errorbookkeeping e = tracecomment "Initiating error handling" (blet ( concatif previouserror_required [ PUnit, ERecordWrite (EVar env, fpreviouserror, ERecordAccess (EVar env, fshifted)) ] @ [ PUnit, ERecordWrite (EVar env, fshifted, EIntConst (-1)) ], e )) (* This code is used to indicate that a new error has been detected in state [s]. [covered] is the set of tokens that [s] knows how to handle. If I am correct, the count of shifted tokens is never -1 here. Indeed, that would mean that we first found an error, and then signaled another error before being able to shift the first error token. My understanding is that this cannot happen: when the first error is signaled, we end up at a state that is willing to handle the error token, by a series of reductions followed by a shift. In the simplest case, the state [s] cannot do error recovery. In that case, we initiate error handling, which is done by first performing the standard bookkeeping described above, then transferring control to the [error] function associated with [s]. If, on the other hand, [s] can do error recovery, then we check whether any tokens at all were shifted since the last error occurred. If none were, then we discard the current token and transfer control back to the [action] function associated with [s]. The token is discarded via a call to [discard], followed by resetting [env.shifted] to zero, to counter-act the effect of [discard], which increments that counter. *) let initiate covered s = blet ( [ assertshifted ], if Invariant.recoverer s then begin incr recoverers; check_recoverer covered s; EIfThenElse ( EApp (EVar "Pervasives.(=)", [ ERecordAccess (EVar env, fshifted); EIntConst 0 ]), blet ( trace "Discarding last token read (%s)" [ EApp (EVar print_token, [ ERecordAccess (EVar env, ftoken) ]) ] @ [ PVar token, EApp (EVar discard, [ EVar env ]); PUnit, ERecordWrite (EVar env, fshifted, EIntConst 0) ], call_action s ), errorbookkeeping (call_error_via_errorcase magic s) ) end else errorbookkeeping (call_error_via_errorcase magic s) ) (* This produces the definitions of the [run] and [action] functions associated with state [s]. The [action] function implements the internal case analysis. It receives the lookahead token as a parameter. It does not affect the input stream. It does not set up exception handlers for dealing with errors. The existence of this internal function is made necessary by the error recovery mechanism (which discards tokens when attempting to resynchronize after an error). In many states, recovery can in fact not be performed, so no self-call to [action] will be generated and [action] will be inlined into [run]. *) let rec runactiondef s : valdef list = match Invariant.has_default_reduction s with | Some (prod, toks) as defred -> (* Perform reduction without looking ahead. In this case, no separate [action] function is required. If shiftreduce optimization is being performed, then no stack cell is allocated. The contents of the top stack cell are passed do [reduce] as extra parameters. *) [ rundef s ( runpushcellunless (shiftreduce prod) s ( gettoken s defred ( defaultreductioncomment toks ( call_reduce prod s ) ) ) ) ] | None -> (* If this state is willing to act on the error token, ignore that -- this is taken care of elsewhere. *) let transitions = SymbolMap.remove (Symbol.T Terminal.error) (Lr1.transitions s) and reductions = TerminalMap.remove Terminal.error (Lr1.reductions s) in (* Construct the main case analysis that determines what action should be taken next. A default branch, where an error is detected, is added if the analysis is not exhaustive. In the default branch, we initiate error handling. *) let covered, branches = ProductionMap.fold (fun prod toks (covered, branches) -> (* There is a reduction for these tokens. *) TerminalSet.union toks covered, reducebranch toks prod s :: branches ) (Lr1.invert reductions) (TerminalSet.empty, []) in let covered, branches = SymbolMap.fold (fun symbol s' (covered, branches) -> match symbol with | Symbol.T tok -> (* There is a shift transition for this token. *) TerminalSet.add tok covered, shiftbranch s tok s' :: branches | Symbol.N _ -> covered, branches ) transitions (covered, branches) in let branches = if TerminalSet.subset TerminalSet.universe covered then branches else branches @ [ { branchpat = PWildcard; branchbody = initiate covered s } ] in (* Finally, construct the code for [run] and [action]. The former pushes things onto the stack, obtains the lookahead token, and calls the [action] function. The latter performs the main case analysis on the lookahead token. *) [ rundef s ( runpushcell s ( gettoken s None ( call_action s ) ) ); actiondef s ( EMatch ( EVar token, branches ) ) ] (* This is the body of the [reduce] function associated with production [prod]. *) let reducebody prod = (* Find out about the left-hand side of this production and about the identifiers that have been bound to the symbols in the right-hand side. These represent variables that we should bind to semantic values before invoking the semantic action. *) let nt, rhs = Production.def prod and ids = Production.identifiers prod and used = Production.used prod and length = Production.length prod in (* Build a pattern that represents the shape of the stack. Out of the stack, we extract a state (except when the production is an epsilon production) and a number of semantic values. If shiftreduce optimization is being performed, then the top stack cell is not explicitly allocated, so we do not include it in the pattern that is built. *) let (_ : int), pat = Invariant.fold (fun (i, pat) holds_state symbol _ -> i + 1, if i = length - 1 && shiftreduce prod then pat else ptuple (pat :: reducecellparams prod i holds_state symbol) ) (0, PVar stack) (Invariant.prodstack prod) in (* If any identifiers refer to terminal symbols without a semantic value, then bind these identifiers to the unit value. This provides the illusion that every symbol, terminal or nonterminal, has a semantic value. This is more regular and allows applying operators such as ? to terminal symbols without a semantic value. *) let unitbindings = Misc.foldi length (fun i unitbindings -> if used.(i) then match semvtype rhs.(i) with | [] -> (PVar ids.(i), EUnit) :: unitbindings | _ -> unitbindings else unitbindings ) [] in (* If necessary, determine start and end positions for the left-hand side of the production. If the right-hand side is nonempty, this is done by extracting position information out of the first and last symbols of the right-hand side. If it is empty, then both positions are taken to be the current lookahead token's start position. Note that [Keyword.has_leftstart keywords] does not imply [Invariant.startp symbol], and similarly for end positions. *) let symbol = Symbol.N nt in let posbindings action = let bind_startp = Action.has_leftstart action || Invariant.startp symbol and bind_endp = Action.has_leftend action || Invariant.endp symbol in insertif bind_startp ( PVar startp, if length > 0 then EVar (Printf.sprintf "_startpos_%s_" ids.(0)) else ERecordAccess (EVar env, fstartp) ) @ insertif bind_endp ( PVar endp, if length > 0 then EVar (Printf.sprintf "_endpos_%s_" ids.(length - 1)) else if bind_startp then EVar startp else ERecordAccess (EVar env, fstartp) ) in (* If this production is one of the start productions, then reducing it means accepting the input. In that case, we return a final semantic value and stop. Otherwise, we transfer control to the [goto] function, unless the semantic action raises [Error], in which case we transfer control to [errorcase]. *) match Production.classify prod with | Some nt -> tracecomment "Accepting" (blet ( [ pat, EVar stack ], EMagic (EVar ids.(0)) )) | None -> let action = Production.action prod in let act = EAnnot (Action.to_il_expr action, type2scheme (semvtypent nt)) in tracecomment (Printf.sprintf "Reducing production %s" (Production.print prod)) (blet ( (pat, EVar stack) :: unitbindings @ posbindings action @ extrabindings fpreviouserror action, (* If the semantic action is susceptible of raising [Error], use a [let/unless] construct, otherwise use [let]. *) if Action.has_syntaxerror action then letunless act semv (call_goto nt) (errorbookkeeping call_errorcase) else blet ([ PVar semv, act ], call_goto nt) )) (* This is the definition of the [reduce] function associated with production [prod]. *) let reducedef prod = { valpublic = false; valpat = PVar (reduce prod); valval = EAnnot ( EFun ( reduceparams prod, reducebody prod ), reducetypescheme prod ) } (* This generates code for pushing a new stack cell inside [goto]. *) let gotopushcell nt e = if gotopushes nt then let contents = var stack :: Invariant.fold_top (runcellparams var) [] (Invariant.gotostack nt) in mlet [ pvar stack ] [ etuple contents ] e else e (* This is the heart of the [goto] function associated with nonterminal [nt]. *) let gotobody nt = (* Examine the current state to determine where to go next. *) let branches = Lr1.targets (fun branches sources target -> { branchpat = pstatescon sources; branchbody = call_run target (runparams magic var target) } :: branches ) [] (Symbol.N nt) in match branches with | [] -> (* If there are no branches, then this [goto] function is never invoked. The inliner will drop it, so whatever we generate here is unimportant. *) call_assertfalse | [ branch ] -> (* If there is only one branch, no case analysis is required. This optimization is not strictly necessary if GADTs are used by the compiler to prove that the case analysis is exhaustive. It does improve readability, though, and is also useful if the compiler does not have GADTs. *) EPatComment ( "State should be ", branch.branchpat, branch.branchbody ) | _ -> (* In the general case, we keep the branches computed above and, unless [nt] is universal, add a default branch, which is theoretically useless but helps avoid warnings if the compiler does not have GADTs. *) let default = { branchpat = PWildcard; branchbody = call_assertfalse } in EMatch ( EVar state, branches @ (if Invariant.universal (Symbol.N nt) then [] else [ default ]) ) (* This the [goto] function associated with nonterminal [nt]. *) let gotodef nt = { valpublic = false; valpat = PVar (goto nt); valval = EAnnot (EFun (gotoparams pvar nt, gotopushcell nt (gotobody nt)), gototypescheme nt) } (* ------------------------------------------------------------------------ *) (* Code production for the error handling functions. *) (* This is the body of the [error] function associated with state [s]. *) let handle s e = tracecomment (Printf.sprintf "Handling error in state %d" (Lr1.number s)) e let errorbody s = try let s' = SymbolMap.find (Symbol.T Terminal.error) (Lr1.transitions s) in (* There is a shift transition on error. *) handle s ( shiftbranchbody s Terminal.error s' ) with Not_found -> try let prods = TerminalMap.lookup Terminal.error (Lr1.reductions s) in let prod = Misc.single prods in (* There is a reduce transition on error. If shiftreduce optimization is enabled for this production, then we must pop an extra cell for [reduce]'s calling convention to be met. *) let extrapop e = if shiftreduce prod then let pat = ptuple (PVar stack :: Invariant.fold_top (runcellparams pvar) [] (Invariant.stack s)) in blet ([ pat, EVar stack ], e) else e in handle s ( extrapop ( call_reduce prod s ) ) with Not_found -> (* This state is unable to handle errors. Pop the stack to find a state that does handle errors, a state that can further pop the stack, or die. *) match Invariant.rewind s with | Invariant.Die -> can_die := true; ERaise errorval | Invariant.DownTo (w, st) -> let _, pat = Invariant.fold errorcellparams (0, PVar stack) w in blet ( [ pat, EVar stack ], match st with | Invariant.Represented -> call_errorcase | Invariant.UnRepresented s -> call_error magic s ) (* This is the [error] function associated with state [s]. *) let errordef s = { valpublic = false; valpat = PVar (error s); valval = EAnnot ( EFun ( errorparams nomagic pvar, errorbody s ), errortypescheme s ) } (* This is the [errorcase] function. It examines its state parameter and dispatches control to an appropriate [error] function. *) let errorcasedef = let branches = Lr1.fold (fun branches s -> if Invariant.represented s then { branchpat = pstatecon s; branchbody = EApp (EVar (error s), [ EVar env; EMagic (EVar stack) ]) } :: branches else branches ) [] in { valpublic = false; valpat = PVar errorcase; valval = EAnnot ( EFun ( errorcaseparams nomagic pvar, EMatch ( EVar state, branches ) ), errorcasetypescheme ) } (* ------------------------------------------------------------------------ *) (* Code production for the entry points. *) (* This is the entry point associated with a start state [s]. By convention, it is named after the nonterminal [nt] that corresponds to this state. This is a public definition. The code initializes a parser environment, an empty stack, and invokes [run]. *) let entrydef s = let nt = Item.startnt (Lr1.start2item s) in let lexer = "lexer" and lexbuf = "lexbuf" in { valpublic = true; valpat = PVar (Nonterminal.print true nt); valval = EAnnot ( EFun ( [ PVar lexer; PVar lexbuf ], blet ( [ PVar env, EApp (EVar initenv, [ EVar lexer; EVar lexbuf ]) ], EMagic (EApp (EVar (run s), [ EVar env; EUnit ])) ) ), entrytypescheme (Nonterminal.print true nt) ) } (* ------------------------------------------------------------------------ *) (* Code production for auxiliary functions. *) (* This is [assertfalse], used when internal failure is detected. This should never happen if our tool is correct. *) let assertfalsedef = { valpublic = false; valpat = PVar assertfalse; valval = EAnnot ( EFun ([ PUnit ], blet ([ PUnit, EApp (EVar "Printf.fprintf", [ EVar "Pervasives.stderr"; EStringConst "Internal failure -- please contact the parser generator's developers.\n%!" ]); ], EApp (EVar "assert", [ efalse ]) ) ), scheme [ "a" ] (arrow tunit (tvar "a")) ) } (* This is [print_token], used to print tokens in [--trace] mode. *) let printtokendef = destructuretokendef print_token tstring false (fun tok -> EStringConst (Terminal.print tok)) (* This is [discard], used to take a token off the input stream and query the lexer for a new one. The code queries the lexer for a new token and stores it into [env.token], overwriting the previous token. It also stores the start and positions of the new token. Last, if [env.shifted] has not yet reached its limit, then it is incremented. We use the lexer's [lex_start_p] and [lex_curr_p] fields to extract the start and end positions of the token that we just read. In practice, it seems that [lex_start_p] can be inaccurate (that is the case when the lexer calls itself recursively, instead of simply recognizing an atomic pattern and returning immediately). However, we are 100% compatible with ocamlyacc here, and there is no better solution anyway. *) let discarddef = { valpublic = false; valpat = PVar discard; valval = let lexbuf = "lexbuf" and shifted = "shifted" in EAnnot ( EFun ( [ PVar env ], blet ([ PVar lexbuf, ERecordAccess (EVar env, flexbuf); PVar token, EApp (ERecordAccess (EVar env, flexer), [ EVar lexbuf ]); PUnit, ERecordWrite (EVar env, ftoken, EVar token); PUnit, ERecordWrite (EVar env, fstartp, ERecordAccess (EVar lexbuf, "Lexing.lex_start_p")); PUnit, ERecordWrite (EVar env, fendp, ERecordAccess (EVar lexbuf, "Lexing.lex_curr_p")) ] @ trace "Lookahead token is now %s (%d-%d)" [ EApp (EVar print_token, [ EVar token ]); ERecordAccess (ERecordAccess (EVar env, fstartp), "Lexing.pos_cnum"); ERecordAccess (ERecordAccess (EVar env, fendp), "Lexing.pos_cnum") ] @ [ PVar shifted, EApp (EVar "Pervasives.(+)", [ ERecordAccess (EVar env, fshifted); EIntConst 1 ]); PUnit, EIfThen ( EApp (EVar "Pervasives.(>=)", [ EVar shifted; EIntConst 0 ]), ERecordWrite (EVar env, fshifted, EVar shifted) ) ], EVar token ) ), type2scheme (arrow tenv ttoken) ) } (* This is [initenv], used to allocate a fresh parser environment. It performs the very first call to the lexer, and fills in all fields in a straightforward way. *) let initenvdef = let lexer = "lexer" and lexbuf = "lexbuf" in { valpublic = false; valpat = PVar initenv; valval = EAnnot ( EFun ( [ PVar lexer; PVar lexbuf ], blet ( [ PVar token, EApp (EVar lexer, [ EVar lexbuf ]) ] @ trace "Lookahead token is now %s (%d-%d)" [ EApp (EVar print_token, [ EVar token ]); ERecordAccess (ERecordAccess (EVar lexbuf, "Lexing.lex_start_p"), "Lexing.pos_cnum"); ERecordAccess (ERecordAccess (EVar lexbuf, "Lexing.lex_curr_p"), "Lexing.pos_cnum") ], ERecord ([ (flexer, EVar lexer); (flexbuf, EVar lexbuf); (ftoken, EVar token); (fstartp, ERecordAccess (EVar lexbuf, "Lexing.lex_start_p")); (fendp, ERecordAccess (EVar lexbuf, "Lexing.lex_curr_p")); (fshifted, EMaxInt) ] @ insertif previouserror_required (fpreviouserror, EMaxInt) ) ) ), type2scheme (marrow [ tlexer; tlexbuf ] tenv) ) } (* ------------------------------------------------------------------------ *) (* Here is complete code for the parser. *) let program = { paramdefs = Front.grammar.UnparameterizedSyntax.parameters; prologue = Front.grammar.UnparameterizedSyntax.preludes; excdefs = [ excdef ]; typedefs = tokentypedef @ [ envtypedef; statetypedef ]; nonrecvaldefs = [ excvaldef ]; valdefs = ProductionMap.fold (fun _ s defs -> entrydef s :: defs ) Lr1.entry ( Lr1.fold (fun defs s -> runactiondef s @ errordef s :: defs ) ( Nonterminal.foldx (fun nt defs -> gotodef nt :: defs ) (Production.fold (fun prod defs -> if Invariant.ever_reduced prod then reducedef prod :: defs else defs ) [ discarddef; initenvdef; printtokendef; assertfalsedef; errorcasedef ]))); moduledefs = []; postlogue = Front.grammar.UnparameterizedSyntax.postludes } (* ------------------------------------------------------------------------ *) (* We are done! *) let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d states can peek at an error.\n\ %d out of %d states can do error recovery.\n" !errorpeekers Lr1.n !recoverers Lr1.n) let () = if not !can_die then Error.logC 1 (fun f -> Printf.fprintf f "The generated parser cannot raise Error.\n") let () = Time.tick "Producing abstract syntax" end menhir-20130116/src/lr0.ml0000644000175000017500000004450412075533603014114 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar module InfiniteArray = MenhirLib.InfiniteArray (* ------------------------------------------------------------------------ *) (* Symbolic lookahead information. *) (* A symbolic lookahead set consists of an actual concrete set of terminal symbols and of a number of set variables. Set variables as encoded as integers. *) module SymbolicLookahead = struct type t = TerminalSet.t * CompressedBitSet.t let constant toks = (toks, CompressedBitSet.empty) let empty = constant TerminalSet.empty let union (toks1, vars1) ((toks2, vars2) as s2) = let toks = TerminalSet.union toks1 toks2 and vars = CompressedBitSet.union vars1 vars2 in if toks2 == toks && vars2 == vars then s2 else (toks, vars) let variable (var : int) : t = (TerminalSet.empty, CompressedBitSet.singleton var) let project (toks, vars) = assert (CompressedBitSet.is_empty vars); toks end (* We will perform closure operations over symbolic lookahead sets. This allows us to later represent LR(1) states as pairs of an LR(0) node number and an array of concrete lookahead sets. *) module SymbolicClosure = Item.Closure(SymbolicLookahead) (* Closure operations over concrete lookahead sets are also used (when explaining conflicts). One could take another instance of the functor. The approach below is somewhat less elegant and makes each call to [closure] somewhat slower, but saves the cost of instantiating the functor again -- which is linear in the size of the grammar. *) type concretelr1state = TerminalSet.t Item.Map.t let closure (state : concretelr1state) : concretelr1state = Item.Map.map SymbolicLookahead.project (SymbolicClosure.closure (Item.Map.map SymbolicLookahead.constant state)) (* ------------------------------------------------------------------------ *) (* Finding which non-epsilon transitions leave a set of items. This code is parametric in the nature of lookahead sets. *) let transitions (state : 'a Item.Map.t) : 'a Item.Map.t SymbolMap.t = Item.Map.fold (fun item toks transitions -> match Item.classify item with | Item.Shift (symbol, item') -> let items : 'a Item.Map.t = try SymbolMap.find symbol transitions with Not_found -> Item.Map.empty in SymbolMap.add symbol (Item.Map.add item' toks items) transitions | Item.Reduce _ -> transitions ) state SymbolMap.empty (* ------------------------------------------------------------------------ *) (* Determining the reduction opportunities at a (closed) state. They are represented as a list of pairs of a lookahead set and a production index. This code is again parametric in the nature of lookahead sets. *) let reductions (state : 'a Item.Map.t) : ('a * Production.index) list = Item.Map.fold (fun item toks accu -> match Item.classify item with | Item.Reduce prod -> (toks, prod) :: accu | Item.Shift _ -> accu ) state [] (* ------------------------------------------------------------------------ *) (* Construction of the the LR(0) automaton. *) (* Nodes are numbered sequentially. *) type node = int (* A symbolic transition is a pair of the target state number and an array of symbolic lookahead sets. The variables in these sets are numbered in [0,g) where g is the number of items in the source LR(0) state. Items are numbered in the order of presentation by [Item.Set.fold]. *) type symbolic_transition_target = node * SymbolicLookahead.t array (* The automaton is represented by (growing) arrays of states (sets of items), symbolic transition information, and symbolic reduction information, indexed by node numbers. Conversely, a hash table maps states (sets of items) to node numbers. *) let n = ref 0 let states : Item.Set.t InfiniteArray.t = InfiniteArray.make Item.Set.empty let _transitions : symbolic_transition_target SymbolMap.t InfiniteArray.t = InfiniteArray.make SymbolMap.empty let _reductions : (SymbolicLookahead.t * Production.index) list InfiniteArray.t = InfiniteArray.make [] let map : (Item.Set.t, node) Hashtbl.t = Hashtbl.create 50021 (* The automaton is built depth-first. *) let rec explore (state : Item.Set.t) : node = (* Find out whether this state was already explored. *) try Hashtbl.find map state with Not_found -> (* If not, create a new node. *) let k = !n in n := k + 1; InfiniteArray.set states k state; Hashtbl.add map state k; (* Build a symbolic version of the current state, where each item is associated with a distinct lookahead set variable, numbered consecutively. *) let (_ : int), (symbolic_state : SymbolicClosure.state) = Item.Set.fold (fun item (i, symbolic_state) -> i+1, Item.Map.add item (SymbolicLookahead.variable i) symbolic_state ) state (0, Item.Map.empty) in (* Compute the symbolic closure. *) let closure = SymbolicClosure.closure symbolic_state in (* Compute symbolic information about reductions. *) InfiniteArray.set _reductions k (reductions closure); (* Compute symbolic information about the transitions, and, by dropping the symbolic lookahead information, explore the transitions to further LR(0) states. *) InfiniteArray.set _transitions k (SymbolMap.map (fun symbolic_state -> let (k : node) = explore (Item.Map.domain symbolic_state) in let lookahead : SymbolicLookahead.t array = Array.create (Item.Map.cardinal symbolic_state) SymbolicLookahead.empty in let (_ : int) = Item.Map.fold (fun _ s i -> lookahead.(i) <- s; i+1 ) symbolic_state 0 in ((k, lookahead) : symbolic_transition_target) ) (transitions closure)); k (* Creating a start state out of a start production. It contains a single item, consisting of the start production, at position 0. *) let start prod : Item.Set.t = Item.Set.singleton (Item.import (prod, 0)) (* This starts the construction of the automaton and records the entry nodes in an array. *) let entry : node ProductionMap.t = ProductionMap.start (fun prod -> explore (start prod) ) let () = Hashtbl.clear map let n = !n let () = Error.logA 1 (fun f -> Printf.fprintf f "Built an LR(0) automaton with %d states.\n" n); Time.tick "Construction of the LR(0) automaton" (* ------------------------------------------------------------------------ *) (* Accessors. *) let items node : Item.Set.t = InfiniteArray.get states node (* ------------------------------------------------------------------------ *) (* Help for building the LR(1) automaton. *) (* An LR(1) state is represented as a pair of an LR(0) state number and an array of concrete lookahead sets (whose length depends on the LR(0) state). *) type lr1state = node * TerminalSet.t array (* An encoded LR(1) state can be turned into a concrete representation, that is, a mapping of items to concrete lookahead sets. *) let export (k, toksr) = let (_ : int), items = Item.Set.fold (fun item (i, items) -> i+1, Item.Map.add item toksr.(i) items ) (InfiniteArray.get states k) (0, Item.Map.empty) in items (* Displaying a concrete state. *) let print_concrete (state : concretelr1state) = let buffer = Buffer.create 1024 in Item.Map.iter (fun item toks -> Printf.bprintf buffer "%s[ %s ]\n" (Item.print item) (TerminalSet.print toks) ) state; Buffer.contents buffer (* Displaying a state. By default, only the kernel is displayed, not the closure. *) let print state = print_concrete (export state) let print_closure state = print_concrete (closure (export state)) (* The core of an LR(1) state is the underlying LR(0) state. *) let core (k, _) = k (* A sanity check. *) let well_formed (k, toksr) = Array.length toksr = Item.Set.cardinal (InfiniteArray.get states k) (* An LR(1) start state is the combination of an LR(0) start state (which consists of a single item) with a singleton lookahead set that consists of the end-of-file pseudo-token. *) let start k = let state = (k, [| TerminalSet.singleton Terminal.sharp |]) in assert (well_formed state); state (* Interpreting a symbolic lookahead set with respect to a source state. The variables in the symbolic lookahead set (which are integers) are interpreted as indices into the state's array of concrete lookahead sets. The result is a concrete lookahead set. *) let interpret ((_, toksr) as state : lr1state) ((toks, vars) : SymbolicLookahead.t) : TerminalSet.t = assert (well_formed state); CompressedBitSet.fold (fun var toks -> assert (var >= 0 && var < Array.length toksr); TerminalSet.union toksr.(var) toks ) vars toks (* Out of an LR(1) state, one produces information about reductions and transitions. This is done in an efficient way by interpreting the precomputed symbolic information with respect to that state. *) let reductions ((k, _) as state : lr1state) : (TerminalSet.t * Production.index) list = List.map (fun (s, prod) -> interpret state s, prod ) (InfiniteArray.get _reductions k) let transitions ((k, _) as state : lr1state) : lr1state SymbolMap.t = SymbolMap.map (fun ((k, sr) : symbolic_transition_target) -> ((k, Array.map (interpret state) sr) : lr1state) ) (InfiniteArray.get _transitions k) let outgoing_symbols (k : node) : Symbol.t list = SymbolMap.domain (InfiniteArray.get _transitions k) let transition symbol ((k, _) as state : lr1state) : lr1state = let ((k, sr) : symbolic_transition_target) = try SymbolMap.find symbol (InfiniteArray.get _transitions k) with Not_found -> assert false (* no transition along this symbol *) in (k, Array.map (interpret state) sr) (* Equality of states. *) let equal ((k1, toksr1) as state1) ((k2, toksr2) as state2) = assert (k1 = k2 && well_formed state1 && well_formed state2); let rec loop i = if i = 0 then true else let i = i - 1 in (TerminalSet.equal toksr1.(i) toksr2.(i)) && (loop i) in loop (Array.length toksr1) (* Subsumption between states. *) let subsume ((k1, toksr1) as state1) ((k2, toksr2) as state2) = assert (k1 = k2 && well_formed state1 && well_formed state2); let rec loop i = if i = 0 then true else let i = i - 1 in (TerminalSet.subset toksr1.(i) toksr2.(i)) && (loop i) in loop (Array.length toksr1) (* This function determines whether two (core-equivalent) states are compatible, according to a criterion that is close to Pager's weak compatibility criterion. Pager's criterion guarantees that if a merged state has a potential conflict at [(i, j)] -- that is, some token [t] appears within the lookahead sets of both item [i] and item [j] -- then there exists a state in the canonical automaton that also has a potential conflict at [(i, j)] -- that is, some token [u] appears within the lookahead sets of both item [i] and item [j]. Note that [t] and [u] can be distinct. Pager has shown that his weak compatibility criterion is stable, that is, preserved by transitions and closure. This means that, if two states can be merged, then so can their successors. This is important, because merging two states means committing to merging their successors, even though we have not even built these successors yet. The criterion used here is a slightly more restrictive version of Pager's criterion, which guarantees equality of the tokens [t] and [u]. This is done essentially by applying Pager's original criterion on a token-wise basis. Pager's original criterion states that two states can be merged if the new state has no conflict or one of the original states has a conflict. Our more restrictive criterion states that two states can be merged if, for every token [t], the new state has no conflict at [t] or one of the original states has a conflict at [t]. This modified criterion is also stable. My experiments show that it is almost as effective in practice: out of more than a hundred real-world sample grammars, only one automaton was affected, and only one extra state appeared as a result of using the modified criterion. Its advantage is to potentially make conflict explanations easier: if there appears to be a conflict at [t], then some conflict at [t] can be explained. This was not true when using Pager's original criterion. *) let compatible (k1, toksr1) (k2, toksr2) = assert (k1 = k2); let n = Array.length toksr1 in (* Two states are compatible if and only if they are compatible at every pair (i, j), where i and j are distinct. *) let rec loopi i = if i = n then true else let toksr1i = toksr1.(i) and toksr2i = toksr2.(i) in let rec loopj j = if j = i then true else let toksr1j = toksr1.(j) and toksr2j = toksr2.(j) in (* The two states are compatible at (i, j) if every conflict token in the merged state already was a conflict token in one of the two original states. This could be written as follows: TerminalSet.subset (TerminalSet.inter (TerminalSet.union toksr1i toksr2i) (TerminalSet.union toksr1j toksr2j)) (TerminalSet.union (TerminalSet.inter toksr1i toksr1j) (TerminalSet.inter toksr2i toksr2j)) but is easily seen (on paper) to be equivalent to: *) TerminalSet.subset (TerminalSet.inter toksr2i toksr1j) (TerminalSet.union toksr1i toksr2j) && TerminalSet.subset (TerminalSet.inter toksr1i toksr2j) (TerminalSet.union toksr2i toksr1j) && loopj (j+1) in loopj 0 && loopi (i+1) in loopi 0 (* This function determines whether two (core-equivalent) states can be merged without creating an end-of-stream conflict, now or in the future. The rule is, if an item appears in one state with the singleton "#" as its lookahead set, then its lookahead set in the other state must contain "#". So, either the second lookahead set is also the singleton "#", and no end-of-stream conflict exists, or it is larger, and the second state already contains an end-of-stream conflict. Put another way, we do not want to merge two lookahead sets when one contains "#" alone and the other does not contain "#". I invented this rule to complement Pager's criterion. I believe, but I am not 100% sure, that it does indeed prevent end-of-stream conflicts and that it is stable. Thanks to Sbastien Hinderer for reporting the bug caused by the absence of this extra criterion. *) let eos_compatible (k1, toksr1) (k2, toksr2) = assert (k1 = k2); let n = Array.length toksr1 in let rec loop i = if i = n then true else let toks1 = toksr1.(i) and toks2 = toksr2.(i) in begin if TerminalSet.mem Terminal.sharp toks1 && TerminalSet.cardinal toks1 = 1 then (* "#" is alone in one set: it must be a member of the other set. *) TerminalSet.mem Terminal.sharp toks2 else if TerminalSet.mem Terminal.sharp toks2 && TerminalSet.cardinal toks2 = 1 then (* Symmetric condition. *) TerminalSet.mem Terminal.sharp toks1 else true end && loop (i+1) in loop 0 (* This function determines whether two (core-equivalent) states can be merged without creating spurious reductions on the [error] token. The rule is, we merge two states only if they agree on which reductions are permitted on the [error] token. Without this restriction, we might end up in a situation where we decide to introduce an [error] token into the input stream and perform a reduction, whereas a canonical LR(1) automaton, confronted with the same input string, would fail normally -- that is, it would introduce an [error] token into the input stream, but it would not be able to perform a reduction right away: the current state would be discarded. In the interest of more accurate (or sane, or predictable) error handling, I decided to introduce this restriction as of 20110124. This will cause an increase in the size of automata for grammars that use the [error] token. It might actually make the [error] token somewhat easier to use. Note that two sets can be in the subsumption relation and still be error-incompatible. Error-compatibility requires equality of the lookahead sets, restricted to [error]. Thanks to Didier Rmy for reporting a bug caused by the absence of this extra criterion. *) let error_compatible (k1, toksr1) (k2, toksr2) = assert (k1 = k2); let n = Array.length toksr1 in let rec loop i = if i = n then true else let toks1 = toksr1.(i) and toks2 = toksr2.(i) in begin if TerminalSet.mem Terminal.error toks1 then (* [error] is a member of one set: it must be a member of the other set. *) TerminalSet.mem Terminal.error toks2 else if TerminalSet.mem Terminal.error toks2 then (* Symmetric condition. *) TerminalSet.mem Terminal.error toks1 else true end && loop (i+1) in loop 0 (* Union of two states. The two states must have the same core. The new state is obtained by pointwise union of the lookahead sets. *) let union (k1, toksr1) (k2, toksr2) = assert (k1 = k2); k1, Array.init (Array.length toksr1) (fun i -> TerminalSet.union toksr1.(i) toksr2.(i) ) (* Restriction of a state to a set of tokens of interest. Every lookahead set is intersected with that set. *) let restrict toks (k, toksr) = k, Array.map (fun toksri -> TerminalSet.inter toksri toks ) toksr menhir-20130116/src/interface.mli0000644000175000017500000000257512075533603015532 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module defines the interface of the generated parser. *) (* This is the [Error] exception. *) val excname: string val excdef: IL.excdef val excredef: IL.excdef (* The type of the entry point for the nonterminal start symbol [symbol]. *) val entrytypescheme: string -> IL.typescheme (* This writes the interface of the generated parser to the [.mli] file. *) val write: unit -> unit menhir-20130116/src/keyword.mli0000644000175000017500000000437512075533603015256 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module provides some type and function definitions that help deal with the keywords that we recognize within semantic actions. *) (* The user can request position information either at type [int] (a simple offset) or at type [Lexing.position]. *) type flavor = | FlavorOffset | FlavorPosition (* The user can request position information about the start or end of a symbol. *) type where = | WhereStart | WhereEnd (* The user can request position information about a production's left-hand side or about one of the symbols in its right-hand side, which he can refer to by position or by name. *) type subject = | Left | RightDollar of int | RightNamed of string (* Keywords inside semantic actions. They allow access to semantic values or to position information. *) type keyword = | Dollar of int | Position of subject * where * flavor | PreviousError | SyntaxError (* This maps a [Position] keyword to the name of the variable that the keyword is replaced with. *) val posvar: subject -> where -> flavor -> string (* Sets of keywords. *) module KeywordSet : sig include Set.S (* This converts a list of keywords with positions into a set of keywords. *) val from_list: elt list -> t end with type elt = keyword menhir-20130116/src/tableFormat.ml0000644000175000017500000001333312075533603015653 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This signature defines the format of the parse tables. It is used as an argument to [TableInterpreter]. *) module type TABLES = sig (* This is the parser's type of tokens. *) type token (* This maps a token to its internal (generation-time) integer code. *) val token2terminal: token -> int (* This is the integer code for the error pseudo-token. *) val error_terminal: int (* This maps a token to its semantic value. *) val token2value: token -> Obj.t (* Traditionally, an LR automaton is described by two tables, namely, an action table and a goto table. See, for instance, the Dragon book. The action table is a two-dimensional matrix that maps a state and a lookahead token to an action. An action is one of: shift to a certain state, reduce a certain production, accept, or fail. The goto table is a two-dimensional matrix that maps a state and a non-terminal symbol to either a state or undefined. By construction, this table is sparse: its undefined entries are never looked up. A compression technique is free to overlap them with other entries. In Menhir, things are slightly different. If a state has a default reduction on token [#], then that reduction must be performed without consulting the lookahead token. As a result, we must first determine whether that is the case, before we can obtain a lookahead token and use it as an index in the action table. Thus, Menhir's tables are as follows. A one-dimensional default reduction table maps a state to either ``no default reduction'' (encoded as: 0) or ``by default, reduce prod'' (encoded as: 1 + prod). The action table is looked up only when there is no default reduction. *) val default_reduction: PackedIntArray.t (* Menhir follows Dencker, Drre and Heuft, who point out that, although the action table is not sparse by nature (i.e., the error entries are significant), it can be made sparse by first factoring out a binary error matrix, then replacing the error entries in the action table with undefined entries. Thus: A two-dimensional error bitmap maps a state and a terminal to either ``fail'' (encoded as: 0) or ``do not fail'' (encoded as: 1). The action table, which is now sparse, is looked up only in the latter case. *) (* The error bitmap is flattened into a one-dimensional table; its width is recorded so as to allow indexing. The table is then compressed via [PackedIntArray]. The bit width of the resulting packed array must be [1], so it is not explicitly recorded. *) (* The error bitmap does not contain a column for the [#] pseudo-terminal. Thus, its width is [Terminal.n - 1]. We exploit the fact that the integer code assigned to [#] is greatest: the fact that the right-most column in the bitmap is missing does not affect the code for accessing it. *) val error: int (* width of the bitmap *) * string (* second component of [PackedIntArray.t] *) (* A two-dimensional action table maps a state and a terminal to one of ``shift to state s and discard the current token'' (encoded as: s | 10), ``shift to state s without discarding the current token'' (encoded as: s | 11), or ``reduce prod'' (encoded as: prod | 01). *) (* The action table is first compressed via [RowDisplacement], then packed via [PackedIntArray]. *) (* Like the error bitmap, the action table does not contain a column for the [#] pseudo-terminal. *) val action: PackedIntArray.t * PackedIntArray.t (* A one-dimensional lhs table maps a production to its left-hand side (a non-terminal symbol). *) val lhs: PackedIntArray.t (* A two-dimensional goto table maps a state and a non-terminal symbol to either undefined (encoded as: 0) or a new state s (encoded as: 1 + s). *) (* The goto table is first compressed via [RowDisplacement], then packed via [PackedIntArray]. *) val goto: PackedIntArray.t * PackedIntArray.t (* A one-dimensional semantic action table maps productions to semantic actions. The calling convention for semantic actions is described in [EngineTypes]. *) val semantic_action: ((int, Obj.t, token) EngineTypes.env -> unit) array (* The parser defines its own [Error] exception. This exception can be raised by semantic actions and caught by the engine, and raised by the engine towards the final user. *) exception Error (* The parser indicates whether to perform error recovery. *) val recovery: bool (* The parser indicates whether to generate a trace. Generating a trace requires two extra tables, which respectively map a terminal symbol and a production to a string. *) val trace: (string array * string array) option end menhir-20130116/src/tarjan.mli0000644000175000017500000000444612075533603015050 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module provides an implementation of Tarjan's algorithm for finding the strongly connected components of a graph. The algorithm runs when the functor is applied. Its complexity is $O(V+E)$, where $V$ is the number of vertices in the graph $G$, and $E$ is the number of edges. *) module Run (G : sig type node (* We assume each node has a unique index. Indices must range from $0$ to $n-1$, where $n$ is the number of nodes in the graph. *) val n: int val index: node -> int (* Iterating over a node's immediate successors. *) val successors: (node -> unit) -> node -> unit (* Iterating over all nodes. *) val iter: (node -> unit) -> unit end) : sig open G (* This function maps each node to a representative element of its strongly connected component. *) val representative: node -> node (* This function maps each representative element to a list of all members of its strongly connected component. Non-representative elements are mapped to an empty list. *) val scc: node -> node list (* [iter action] allows iterating over all strongly connected components. For each component, the [action] function is applied to the representative element and to a (non-empty) list of all elements. *) val iter: (node -> node list -> unit) -> unit end menhir-20130116/src/lr1partial.mli0000644000175000017500000000340512075533603015636 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar module Run (X : sig (* A restricted set of tokens of interest. *) val tokens: TerminalSet.t (* A state of the (merged) LR(1) automaton that we're trying to simulate. *) val goal: Lr1.node end) : sig (* What we are after is a path, in the canonical LR(1) automaton, that leads from some entry node to a node [N] such that (i) [N] has a conflict involving one of the tokens of interest and (ii) [N] corresponds to the goal node, that is, the path that leads to [N] in the canonical LR(1) automaton leads to the goal node in the merged LR(1) automaton. *) val source: Item.t val path: Symbol.t array val goal: Lr0.concretelr1state (* An (arbitrarily chosen) conflict token in the goal state. *) val token: Terminal.t end menhir-20130116/src/tableBackend.ml0000644000175000017500000004514112075533603015754 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open CodeBits open Grammar open IL open Interface open PreFront open Printf open TokenType open CodePieces module Run (T : sig end) = struct (* ------------------------------------------------------------------------ *) (* Conventional names for modules, exceptions, record fields, functions. *) let menhirlib = "MenhirLib" let tableInterpreter = menhirlib ^ ".TableInterpreter" let make = tableInterpreter ^ ".Make" let accept = tableInterpreter ^ ".Accept" let engineTypes = menhirlib ^ ".EngineTypes" let field x = engineTypes ^ "." ^ x let fstate = field "state" let fsemv = field "semv" let fstartp = field "startp" let fendp = field "endp" let fnext = field "next" let fstack = field "stack" let fcurrent = field "current" let flexbuf = field "lexbuf" let fpreviouserror = field "previouserror" let flex_start_p = "Lexing.lex_start_p" let interpreter = "MenhirInterpreter" let entry = interpreter ^ ".entry" (* ------------------------------------------------------------------------ *) (* Code generation for semantic actions. *) (* The functions [reducecellparams] and [reducebody] are adpated from [CodeBackend]. *) (* Things are slightly more regular here than in the code-based back-end, since there is no optimization: every stack cell has the same structure and holds a state, a semantic value, and a pair of positions. Because every semantic value is represented, we do not have a separate [unitbindings]. *) (* [reducecellparams] constructs a pattern that describes the contents of a stack cell. If this is the bottom cell, the variable [state] is bound to the state found in the cell. If [ids.(i)] is used in the semantic action, then it is bound to the semantic value. The position variables are always bound. *) let reducecellparams prod i symbol (next : pattern) : pattern = let ids = Production.identifiers prod and used = Production.used prod in PRecord [ fstate, (if i = 0 then PVar state else PWildcard); fsemv, (if used.(i) then PVar ids.(i) else PWildcard); fstartp, PVar (Printf.sprintf "_startpos_%s_" ids.(i)); fendp, PVar (Printf.sprintf "_endpos_%s_" ids.(i)); fnext, next; ] (* The semantic values bound in [reducecellparams] have type [Obj.t]. They should now be cast to their real type. If we had [PMagic] in the syntax of patterns, we could do that in one swoop; since we don't, we have to issue a series of casts a posteriori. *) let reducecellcasts prod i symbol casts = let ids = Production.identifiers prod and used = Production.used prod in if used.(i) then let id = ids.(i) in let t : typ = match semvtype symbol with | [] -> tunit | [ t ] -> t | _ -> assert false in (* Cast: [let id = ((Obj.magic id) : t) in ...]. *) ( PVar id, EAnnot (EMagic (EVar id), type2scheme t) ) :: casts else casts (* This is the body of the [reduce] function associated with production [prod]. It assumes that the variables [env] and [stack] have been bound. *) let reducebody prod = let nt, rhs = Production.def prod and ids = Production.identifiers prod and length = Production.length prod in (* Build a pattern that represents the shape of the stack. Out of the stack, we extract a state (except when the production is an epsilon production) and a number of semantic values. *) (* At the same time, build a series of casts. *) let (_ : int), pat, casts = Invariant.fold (fun (i, pat, casts) (_ : bool) symbol _ -> i + 1, reducecellparams prod i symbol pat, reducecellcasts prod i symbol casts ) (0, PVar stack, []) (Invariant.prodstack prod) in (* Determine start and end positions for the left-hand side of the production. *) let posbindings = ( PVar startp, if length > 0 then EVar (Printf.sprintf "_startpos_%s_" ids.(0)) else ERecordAccess(ERecordAccess (EVar env, flexbuf), flex_start_p) ) :: ( PVar endp, if length > 0 then EVar (Printf.sprintf "_endpos_%s_" ids.(length - 1)) else EVar startp ) :: [] in (* Is this is one of the start productions? *) match Production.classify prod with | Some nt -> (* This is a start production. Raise [Accept]. *) EComment ( sprintf "Accepting %s" (Nonterminal.print false nt), blet ( [ pat, EVar stack ], ERaise (EData (accept, [ EVar ids.(0) ])) ) ) | None -> (* This is a regular production. Perform a reduction. *) let action = Production.action prod in let act = EAnnot (Action.to_il_expr action, type2scheme (semvtypent nt)) in EComment ( Production.print prod, blet ( (pat, EVar stack) :: (* destructure the stack *) casts @ (* perform type casts *) posbindings @ (* bind [startp] and [endp] *) extrabindings fpreviouserror action @ (* add bindings for the weird keywords *) [ PVar semv, act ], (* run the user's code and bind [semv] *) ERecordWrite ( EVar env, fstack, (* update the stack with ... *) ERecord [ (* ... a new stack cell *) fstate, EVar state; (* the current state after popping; it will be updated by [goto] *) fsemv, ERepr (EVar semv); (* the newly computed semantic value *) fstartp, EVar startp; (* the newly computed start and end positions *) fendp, EVar endp; fnext, EVar stack; (* this is the stack after popping *) ] ) ) ) let semantic_action prod = EFun ( [ PVar env ], if Invariant.ever_reduced prod then (* Access the stack and current state via the environment. *) (* In fact, the current state needs be bound here only if this is an epsilon production. Otherwise, the variable [state] will be bound by the pattern produced by [reducecellparams] above. *) ELet ( [ PVar stack, ERecordAccess (EVar env, fstack) ] @ (if Production.length prod = 0 then [ PVar state, ERecordAccess (EVar env, fcurrent) ] else []), (* Then, *) reducebody prod ) else (* For productions that are never reduced, generate no code. *) (* We do this mainly because [Invariant.prodstack] does not support productions that are never reduced. *) EComment ( "a production never reduced", EApp (EVar "assert", [ EData ("false", []) ]) ) ) (* ------------------------------------------------------------------------ *) (* Table encodings. *) (* Encodings of entries in the default reduction table. *) let encode_DefRed prod = (* 1 + prod *) 1 + Production.p2i prod let encode_NoDefRed = (* 0 *) 0 (* Encodings of entries in the action table. *) let encode_Reduce prod = (* prod | 01 *) (Production.p2i prod lsl 2) lor 1 let encode_ShiftDiscard s = (* s | 10 *) ((Lr1.number s) lsl 2) lor 0b10 let encode_ShiftNoDiscard s = (* s | 11 *) ((Lr1.number s) lsl 2) lor 0b11 let encode_Fail = (* 00 *) 0 (* Encodings of entries in the goto table. *) let encode_Goto node = (* 1 + node *) 1 + Lr1.number node let encode_NoGoto = (* 0 *) 0 (* Encodings of the hole in the action and goto tables. *) let hole = assert (encode_Fail = 0); assert (encode_NoGoto = 0); 0 (* Encodings of entries in the error bitmap. *) let encode_Error = (* 0 *) 0 let encode_NoError = (* 1 *) 1 (* ------------------------------------------------------------------------ *) (* Statistics. *) (* Integer division, rounded up. *) let div a b = if a mod b = 0 then a / b else a / b + 1 (* [size] provides a rough measure of the size of its argument, in words. The [unboxed] parameter is true if we have already counted 1 for the pointer to the object. *) let rec size unboxed = function | EIntConst _ | EMaxInt | ETuple [] | EData (_, []) -> if unboxed then 0 else 1 | EStringConst s -> 1 + div (String.length s * 8) Sys.word_size | ETuple es | EData (_, es) | EArray es -> 1 + List.length es + List.fold_left (fun s e -> s + size true e) 0 es | _ -> assert false (* not implemented *) let size = size false (* Optionally, print a measure of each of the tables that we are defining. *) let define (name, expr) = { valpublic = true; valpat = PVar name; valval = expr } let define_and_measure (x, e) = Error.logC 1 (fun f -> fprintf f "The %s table occupies roughly %d bytes.\n" x (size e * (Sys.word_size / 8)) ); define (x, e) (* ------------------------------------------------------------------------ *) (* Table compression. *) (* Our sparse, two-dimensional tables are turned into one-dimensional tables via [RowDisplacement]. *) (* The error bitmap, which is two-dimensional but not sparse, is made one-dimensional by simple flattening. *) (* Every one-dimensional table is then packed via [PackedIntArray]. *) (* Optionally, we print some information about the compression ratio. *) (* [population] counts the number of significant entries in a two-dimensional matrix. *) let population (matrix : int array array) = Array.fold_left (fun population row -> Array.fold_left (fun population entry -> if entry = hole then population else population + 1 ) population row ) 0 matrix (* [marshal1] marshals a one-dimensional array. *) let marshal1 (table : int array) = let (bits : int), (text : string) = MenhirLib.PackedIntArray.pack table in ETuple [ EIntConst bits; EStringConst text ] (* [marshal11] marshals a one-dimensional array whose bit width is statically known to be [1]. *) let marshal11 (table : int array) = let (bits : int), (text : string) = MenhirLib.PackedIntArray.pack table in assert (bits = 1); EStringConst text (* [marshal2] marshals a two-dimensional table. *) let marshal2 name m n (matrix : int list list) = let matrix : int array array = Array.of_list (List.map Array.of_list matrix) in let (displacement : int array), (data : int array) = MenhirLib.RowDisplacement.compress (=) (fun x -> x = hole) hole m n matrix in Error.logC 1 (fun f -> fprintf f "The %s table is %d entries; %d non-zero; %d compressed.\n" name (m * n) (population matrix) (Array.length displacement + Array.length data) ); ETuple [ marshal1 displacement; marshal1 data; ] let marshal1 (table : int list) = marshal1 (Array.of_list table) let marshal11 (table : int list) = marshal11 (Array.of_list table) (* ------------------------------------------------------------------------ *) (* Table generation. *) (* The action table. *) let action node t = match Invariant.has_default_reduction node with | Some _ -> (* [node] has a default reduction; in that case, the action table is never looked up. *) hole | None -> try let target = SymbolMap.find (Symbol.T t) (Lr1.transitions node) in (* [node] has a transition to [target]. If [target] has a default reduction on [#], use [ShiftNoDiscard], otherwise [ShiftDiscard]. *) match Invariant.has_default_reduction target with | Some (_, toks) when TerminalSet.mem Terminal.sharp toks -> assert (TerminalSet.cardinal toks = 1); encode_ShiftNoDiscard target | _ -> encode_ShiftDiscard target with Not_found -> try (* [node] has a reduction. *) let prod = Misc.single (TerminalMap.find t (Lr1.reductions node)) in encode_Reduce prod with Not_found -> (* [node] has no action. *) encode_Fail (* In the error bitmap and in the action table, the row that corresponds to the [#] pseudo-terminal is never accessed. Thus, we do not create this row. This does not create a gap in the table, because this is the right-most row. For sanity, we check this fact here. *) let () = assert (Terminal.t2i Terminal.sharp = Terminal.n - 1) (* The goto table. *) let goto node nt = try let target = SymbolMap.find (Symbol.N nt) (Lr1.transitions node) in encode_Goto target with Not_found -> encode_NoGoto (* The error bitmap reflects which entries in the action table are [Fail]. Like the action table, it is not accessed when [node] has a default reduction. *) let error node t = if action node t = encode_Fail then encode_Error else encode_NoError (* The default reductions table. *) let default_reduction node = match Invariant.has_default_reduction node with | Some (prod, _) -> encode_DefRed prod | None -> encode_NoDefRed (* Generate the table definitions. *) let action = define_and_measure ( "action", marshal2 "action" Lr1.n (Terminal.n - 1) ( Lr1.map (fun node -> Terminal.mapx (fun t -> action node t ) ) ) ) let goto = define_and_measure ( "goto", marshal2 "goto" Lr1.n Nonterminal.n ( Lr1.map (fun node -> Nonterminal.map (fun nt -> goto node nt ) ) ) ) let error = define_and_measure ( "error", ETuple [ EIntConst (Terminal.n - 1); marshal11 ( List.flatten ( Lr1.map (fun node -> Terminal.mapx (fun t -> error node t ) ) ) ) ] ) let default_reduction = define_and_measure ( "default_reduction", marshal1 ( Lr1.map (fun node -> default_reduction node ) ) ) let lhs = define_and_measure ( "lhs", marshal1 ( Production.map (fun prod -> Nonterminal.n2i (Production.nt prod) ) ) ) let semantic_action = define ( "semantic_action", EArray (Production.map semantic_action) ) (* ------------------------------------------------------------------------ *) (* When [--trace] is enabled, we need tables that map terminals and productions to strings. *) let stringwrap f x = EStringConst (f x) let reduce_or_accept prod = match Production.classify prod with | Some _ -> "Accepting" | None -> "Reducing production " ^ (Production.print prod) let trace = define_and_measure ( "trace", if Settings.trace then EData ("Some", [ ETuple [ EArray (Terminal.map (stringwrap Terminal.print)); EArray (Production.map (stringwrap reduce_or_accept)); ] ]) else EData ("None", []) ) (* ------------------------------------------------------------------------ *) (* Generate the two functions that map a token to its integer code and to its semantic value, respectively. *) let token2terminal = destructuretokendef "token2terminal" tint false (fun tok -> EIntConst (Terminal.t2i tok)) let token2value = destructuretokendef "token2value" tobj true (fun tok -> ERepr ( match Terminal.ocamltype tok with | None -> EUnit | Some _ -> EVar semv ) ) (* ------------------------------------------------------------------------ *) (* We are now ready to apply the functor [TableInterpreter.Make]. *) (* The type [token], which was defined at toplevel, must be defined again in the functor argument. We would like to write [type token = token], but that is not valid ocaml. The usual workaround involves a dummy type. *) let jeton = prefix "jeton" let tokendef1 = { typename = jeton; typeparams = []; typerhs = TAbbrev ttoken; typeconstraint = None; } let tokendef2 = { typename = "token"; (* not [TokenType.tctoken], as it might carry an undesired prefix *) typeparams = []; typerhs = TAbbrev (TypApp (jeton, [])); typeconstraint = None; } (* Here is the application of [TableInterpreter.Make]. Note that the exception [Error], which is defined at toplevel, is re-defined within the functor argument: [exception Error = Error]. *) let application = { modulename = interpreter; modulerhs = MApp ( MVar make, MStruct { struct_excdefs = [ excredef; ]; struct_typedefs = [ tokendef2; ]; struct_nonrecvaldefs = [ token2terminal; define ("error_terminal", EIntConst (Terminal.t2i Terminal.error)); token2value; default_reduction; error; action; lhs; goto; semantic_action; define ("recovery", eboolconst Settings.recovery); trace; ]; } ); } (* ------------------------------------------------------------------------ *) (* The client API invokes the interpreter with an appropriate start state. *) let api : IL.valdef list = let lexer = "lexer" and lexbuf = "lexbuf" in ProductionMap.fold (fun prod state api -> let nt : Nonterminal.t = match Production.classify prod with | Some nt -> nt | None -> assert false (* this is a start production *) in let t : typ = match Nonterminal.ocamltype nt with | Some t -> TypTextual t | None -> assert false (* every start symbol should carry a type *) in define ( Nonterminal.print true nt, EFun ( [ PVar lexer; PVar lexbuf ], EAnnot ( EMagic ( EApp ( EVar entry, [ EIntConst (Lr1.number state); EVar lexer; EVar lexbuf ] ) ), type2scheme t ) ) ) :: api ) Lr1.entry [] (* ------------------------------------------------------------------------ *) (* Let's put everything together. *) let program = { paramdefs = Front.grammar.UnparameterizedSyntax.parameters; prologue = Front.grammar.UnparameterizedSyntax.preludes; excdefs = [ excdef ]; typedefs = tokentypedef @ [ tokendef1 ]; nonrecvaldefs = [ excvaldef ]; moduledefs = [ application ]; valdefs = api; postlogue = Front.grammar.UnparameterizedSyntax.postludes } let () = Time.tick "Producing abstract syntax" end menhir-20130116/src/nonpositiveCycles.ml0000644000175000017500000000753512075533603017142 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module uses Floyd and Warshall's algorithm to detect whether a graph with integer-weighted edges contains a simple cycle of negative weight. *) (* The algorithm runs in cubic time in the number of vertices. It may be worthwhile to first use Tarjan's algorithm to obtain the graph's strongly connected components, and use Floyd and Warshall's algorithm only on each component. *) module Run (G : sig type node (* We assume each node has a unique index. Indices must range from $0$ to $n-1$, where $n$ is the number of nodes in the graph. *) val n: int val index: node -> int (* Iterating over a node's immediate successors. Edges are weighted. *) val successors: (int -> node -> unit) -> node -> unit (* Iterating over all nodes. *) val iter: (node -> unit) -> unit end) = struct open G (* Integers with infinity. *) type distance = | Infinity | Finite of int let add d1 d2 = match d1, d2 with | Infinity, _ | _, Infinity -> Infinity | Finite i1, Finite i2 -> Finite (i1 + i2) let min d1 d2 = match d1, d2 with | Infinity, d | d, Infinity -> d | Finite i1, Finite i2 -> Finite (min i1 i2) let le d1 d2 = match d1, d2 with | Infinity, Infinity -> true | Infinity, Finite _ -> false | Finite _, Infinity -> true | Finite i1, Finite i2 -> i1 <= i2 (* Allocate and initialize a distance matrix. At allocation time, every entry is initialized to infinity. Then, we iterate over all edges, and copy them into the distance matrix. *) (* Note that, by default, [d.(i).(i)] is not initialized to zero: it is initialized to infinity. This is because we are looking for paths of non-zero length. In other words, we are computing a transitive closure, not a reflexive, transitive closure. *) let d = Array.init n (fun i -> Array.init n (fun j -> Infinity ) ) let () = iter (fun source -> successors (fun weight target -> (* We use a min operation, so the graph may be a multi-graph, that is, multiple edges between two nodes are permitted. *) let i = index source and j = index target in d.(i).(j) <- min (Finite weight) d.(i).(j) ) source ) (* The algorithm. *) (* Stefan Hougardy notes that, in the presence of negative cycles, distances can grow exponentially fast (towards minus infinity), so there is a risk of overflow. To avoid this, one must check for negative cycles during the computation -- as opposed to waiting until the end. *) exception Detection let graph_has_nonpositive_simple_cycle : bool = try for k = 0 to n-1 do for i = 0 to n-1 do for j = 0 to n-1 do d.(i).(j) <- min d.(i).(j) (add d.(i).(k) d.(k).(j)); if i = j && le d.(i).(j) (Finite 0) then raise Detection done done done; false with Detection -> true end menhir-20130116/src/IO.mli0000644000175000017500000000323112075533603014067 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* Input-output utilities. *) (* [exhaust channel] reads all of the data that's available on [channel]. *) val exhaust: in_channel -> string (* [invoke command] invokes an external command (which expects no input) and returns its output, if the command succeeds. It returns [None] if the command fails. *) val invoke: string -> string option (* [winvoke writers command cleaners] invokes each of the [writer] functions, invokes the command [command], and runs each of the [cleaner] functions. Then, it either returns the command's output, if the command succeeded, or exits, otherwise. *) val winvoke: (unit -> unit) list -> string -> (unit -> unit) list -> string menhir-20130116/src/kmp.ml0000644000175000017500000001723712075533603014211 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* An implementation of Knuth, Morris, and Pratt's algorithm for searching for a pattern in a string. *) (* The pattern [p] is considered a finite array of length [m]. *) (* The string [s] is considered a finite or infinite array. It is represented as an accessor function, which accepts an integer index and either returns a character or raises the exception [OutOfBounds]. *) exception OutOfBounds (* The characters in the pattern and the string are integers. *) (* TEMPORARY This relation need not be equality. In particular, a certain character in the pattern may match multiple characters in the string, and a certain character in the string may match multiple characters in the pattern. In other words, both the pattern and the string may contain wildcards. *) (* If the search succeeds, it returns the offset in the string where the pattern was found. *) module MutableBitSetAsInt = struct type t = int let create n = if n < Sys.word_size then 0 else raise (Invalid_argument "MutableBitSet.create") let init n f = let rec loop i mask s = (* [mask] is two to the [i] *) if i = n then s else loop (i+1) (mask lsl 1) (if f i then mask lor s else s) in loop 0 1 (create n) let conjunction s1 s2 = s1 land s2 let shift s = (s lsl 1) lor 1 (* excess bits on the left are ok *) let get i s = s land (1 lsl i) <> 0 end module MutableBitSetAsIntArray = struct type t = int array let w = Sys.word_size - 1 let msb_mask = 1 lsl (w - 1) let create n = Array.create (if n mod w = 0 then n / w else n / w + 1) 0 let init n f = let s = create n in (* In the following loop, [i] is the logical index that we are considering; [q] is [i / w]; [r] is [i mod w], or possibly [w]; [mask] is two to the [r]; and [current] is the integer bit field that we are preparing in order to eventually write to [s.(q)]. *) let rec loop i q r mask current = if i = n then begin s.(q) <- current; s end else if r = w then begin s.(q) <- current; let q = q + 1 in loop i q 0 1 0 end else loop (i+1) q (r+1) (mask lsl 1) (if f i then mask lor current else current) in loop 0 0 0 1 0 let conjunction s1 s2 = let width = Array.length s1 in for q = 0 to width-1 do s1.(q) <- s1.(q) land s2.(q) done; s1 let shift s = let width = Array.length s in let carry = ref 1 in for q = 0 to width-1 do let current = s.(q) in let next_carry = if current land msb_mask = 0 then 0 else 1 in let current = current lsl 1 in (* excess bits on the left are ok *) let current = current lor !carry in s.(q) <- current; carry := next_carry done; s let get i s = let q = i / w and r = i mod w in s.(q) land (1 lsl r) <> 0 end let rec nodup1 x ys = match ys with | [] -> [] | y :: ys -> if x = y then nodup1 x ys else y :: nodup1 y ys let nodup xs = match xs with | [] -> [] | x :: xs -> x :: nodup1 x xs let rec search compare key data = let rec loop i j = if i = j then raise Not_found else let m = i + (j - i) / 2 in let key', datum = data.(m) in let c = compare key key' in if c = 0 then datum else if c < 0 then loop i m else loop (m+1) j in loop 0 (Array.length data) module Make (MutableBitSet : sig type t val create: int -> t (* initialized at 0 everywhere *) val init: int -> (int -> bool) -> t (* initialized as the user desires *) val conjunction: t -> t -> t (* in-place conjunction on left argument *) val shift: t -> t (* in-place left shift by 1; also sets the lsb *) val get: int -> t -> bool (* read a bit *) end) = struct type state = MutableBitSet.t open MutableBitSet let kmp (p : int array) (s : int -> int) : int option = let m = Array.length p in (* The transition mask associated with a nonzero symbol [a]. *) let transition_mask a = MutableBitSet.init m (fun i -> p.(i) = 0 || p.(i) = a ) in (* The transition mask associated with a symbol [a] that is not among the [p.(i)]'s. *) let default_transition_mask = MutableBitSet.init m (fun i -> p.(i) = 0 ) in (* Precompute a map of the nonzero characters [a] that appear in the pattern to their transition mask. *) let nonzero_chars = Array.fold_left (fun accu a -> if a = 0 then accu else a :: accu ) [] p in let nonzero_chars = Array.of_list (nodup (List.fast_sort Pervasives.compare nonzero_chars)) (* TEMPORARY could allocate less memory by using a version of heapsort that (filters out zero elements and?) removes duplicates on the fly? *) in let transition_masks = Array.map (fun a -> (a, transition_mask a) ) nonzero_chars in let transition_mask a : MutableBitSet.t = try search Pervasives.compare a transition_masks with Not_found -> default_transition_mask in (* Define the automaton's transition function. *) (* The integer index [i] represents the current state of the automaton. It is comprised between [0] (inclusive) and [m] (exclusive). The symbol [a] is the character that we are consuming. *) let transition (is : state) (a : int) : state = (* Apply the transition mask associated with the symbol [a]. This kills all states [i] that do not have an outgoing transition along [a]. If [a] is the wildcard [0], nothing is killed. *) let is = if a = 0 then is else conjunction is (transition_mask a) in (* Shift the states that remain. This reflects the transitions that are succesfully taken, as well as the loop from state 0 to itself. *) shift is in (* Define the automaton's initial state. *) let is = MutableBitSet.init m (fun i -> i = 0) in (* Search. *) (* The variable [i] is the automaton's current state. *) (* The variable [j] is the offset in the string [s] of the symbol that the automaton is about to consume. *) let rec loop is j = if get m is then (* [m] is the accepting state *) Some (j - m) (* success; return match offset *) else loop (transition is (s j)) (j+1) in try loop is 0 with OutOfBounds -> None end module K = Make(MutableBitSetAsIntArray) let wrap s = let n = Array.length s in fun i -> if i < n then s.(i) else raise OutOfBounds let wrap_infinite s = let n = Array.length s in fun i -> if i < n then s.(i) else 0 let test = K.kmp [| 0; 1; 2 |] (wrap [| 3; 1; 0; 1; 2; 3; 0 |]) menhir-20130116/src/rawPrinter.ml0000644000175000017500000001351112075533603015546 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* A debugging pretty-printer for [IL]. Newlines are used liberally, so as to facilitate diffs. *) open IL open Printf module Make (X : sig (* This is the channel that is being written to. *) val f: out_channel end) = struct (* ------------------------------------------------------------------------- *) (* XML-style trees. *) type tree = | Node of string * tree list let node label ts = Node (label, ts) (* ------------------------------------------------------------------------- *) (* Dealing with newlines and indentation. *) let maxindent = 120 let whitespace = String.make maxindent ' ' let indentation = ref 0 let line = ref 1 (* [rawnl] is, in principle, the only place where writing a newline character to the output channel is permitted. This ensures that the line counter remains correct. But see also [stretch] and [typ0]. *) let rawnl f = incr line; output_char f '\n' let nl f = rawnl f; output f whitespace 0 !indentation let indent ofs producer f x = let old_indentation = !indentation in let new_indentation = old_indentation + ofs in if new_indentation <= maxindent then indentation := new_indentation; nl f; producer f x; indentation := old_indentation (* ------------------------------------------------------------------------- *) (* Tree printers. *) let rec print_tree f = function | Node (label, []) -> output_char f '<'; output_string f label; output_char f '/'; output_char f '>'; nl f | Node (label, ts) -> output_char f '<'; output_string f label; output_char f '>'; indent 2 print_trees f ts; output_char f '<'; output_char f '/'; output_string f label; output_char f '>'; nl f and print_trees f = function | [] -> () | t :: ts -> print_tree f t; print_trees f ts (* ------------------------------------------------------------------------- *) (* Expression-to-tree converter. *) let rec expr e = match e with | EComment (c, e) -> node "comment" [ string c; expr e ] | EPatComment (s, p, e) -> node "patcomment" [ string s; pat p; expr e ] | ELet (pes, e2) -> node "let" ( patexprs pes @ [ expr e2 ]) | ERecordWrite (e1, field, e2) -> node "recordwrite" [ expr e1; string field; expr e2 ] | EMatch (e, brs) -> node "match" ( expr e :: branches brs ) | ETry (e, brs) -> node "try" ( expr e :: branches brs ) | EIfThen (e1, e2) -> node "ifthen" [ expr e1; expr e2 ] | EIfThenElse (e0, e1, e2) -> node "ifthenelse" [ expr e0; expr e1; expr e2 ] | EFun (ps, e) -> node "fun" ( pats ps @ [ expr e ]) | EApp (e, args) -> node "app" ( expr e :: exprs args ) | ERaise e -> node "raise" [ expr e ] | EMagic e -> node "magic" [ expr e ] | ERepr e -> node "repr" [ expr e ] | EData (d, args) -> node "data" ( string d :: exprs args ) | EVar v -> node "var" [ string v ] | ETextual action -> node "text" [ stretch action ] | EUnit -> node "unit" [] | EIntConst k -> node "int" [ int k ] | EMaxInt -> node "max_int" [] | EStringConst s -> node "string" [ string s ] | ETuple es -> node "tuple" ( exprs es ) | EAnnot (e, s) -> node "annot" [ expr e; scheme s ] | ERecordAccess (e, field) -> node "recordaccess" [ expr e; string field ] | ERecord fs -> node "record" (fields fs) | EArray fs -> node "array" (exprs fs) | EArrayAccess (e1, e2) -> node "arrayaccess" [ expr e1; expr e2 ] and exprs es = List.map expr es and stretch stretch = string stretch.Stretch.stretch_content and branches brs = List.map branch brs and branch br = node "branch" [ pat br.branchpat; expr br.branchbody ] and fields fs = List.map field fs and field (label, e) = node "field" [ string label; expr e ] and pats ps = List.map pat ps and pat = function | PUnit -> node "punit" [] | PWildcard -> node "pwildcard" [] | PVar x -> node "pvar" [ string x ] | PTuple ps -> node "ptuple" (pats ps) | PAnnot (p, t) -> node "pannot" [ pat p; typ t ] | PData (d, args) -> node "pdata" (string d :: pats args) | PRecord fps -> node "precord" (fpats fps) | POr ps -> node "por" (pats ps) and fpats fps = List.map fpat fps and fpat (_, p) = pat p and patexprs pes = List.map patexpr pes and patexpr (p, e) = node "patexpr" [ pat p; expr e ] and string s = node s [] and int k = node (string_of_int k) [] and bool b = node (if b then "true" else "false") [] and scheme s = string "omitted" (* TEMPORARY to be completed, someday *) and typ t = string "omitted" (* TEMPORARY to be completed, someday *) (* ------------------------------------------------------------------------- *) (* Convert to a tree, then print the tree. *) let expr e = print_tree X.f (expr e) end menhir-20130116/src/compressedBitSet.mli0000644000175000017500000000202112075533603017033 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) include GSet.S with type element = int menhir-20130116/src/Makefile0000644000175000017500000002210512075533602014515 0ustar stephsteph.PHONY: clean COLD += clean # ---------------------------------------------------------------------------- # A few settings differ on Windows versus Unix. include ../Makefile.arch # ---------------------------------------------------------------------------- # Locating the ocaml compilers. # If ocamlfind is available, then it is used for that purpose. CAMLTOP := ocaml CAMLC := $(shell if ocamlfind ocamlc -v >/dev/null 2>&1 ; \ then echo ocamlfind ocamlc ; \ elif ocamlc.opt -v >/dev/null 2>&1 ; \ then echo ocamlc.opt ; \ else echo ocamlc ; fi) CAMLOPT := $(shell if ocamlfind ocamlopt -v >/dev/null 2>&1 ; \ then echo ocamlfind ocamlopt ; \ elif ocamlopt.opt -v >/dev/null 2>&1 ; \ then echo ocamlopt.opt ; \ else echo ocamlopt ; fi) CAMLDEP := $(shell if ocamlfind ocamldep -version >/dev/null 2>&1 ; \ then echo ocamlfind ocamldep ; \ elif ocamldep.opt -version >/dev/null 2>&1 ; \ then echo ocamldep.opt ; \ else echo ocamldep ; fi) CAMLDEPWRAPPER := ../demos/ocamldep.wrapper CAMLLEX := ocamllex CAMLYACC := ocamlyacc -v # ------------------------------------------------------------------------- # Compilation flags. BFLAGS := -g OFLAGS := -inline 1000 LNKBFLAGS := -g LNKOFLAGS := BLIBS := unix.cma OLIBS := unix.cmxa PGFLAGS := -v -lg 1 -la 1 -lc 1 --comment --infer --error-recovery --stdlib . --strict # ------------------------------------------------------------------------- # A list of the source files that must be generated prior to dependency # analysis. GENERATED := installation.ml lexmli.ml lexer.ml parser.mli parser.ml \ lineCount.ml lexdep.ml sentenceParser.mli sentenceParser.ml sentenceLexer.ml # ------------------------------------------------------------------------- # A list of the modules that must be linked into the MenhirLib library. # This library is used both at compile time (i.e., within Menhir itself) # and at run time (i.e., it is made available to Menhir users, who need # to link it with their own executables if they have used the --table # option). # If you change this list, please also update the files LICENSE and # GNUmakefile in the toplevel directory. LIBMODULES := infiniteArray packedIntArray rowDisplacement engineTypes \ engine tableFormat tableInterpreter convert # ------------------------------------------------------------------------- # A list of the modules that must be linked into the Menhir executable. # Some of these modules have side effects and must be executed in the # following order: # Settings parses the command line # PreFront reads the grammar description files # TokenType deals with --only-tokens and exits # Front deals with --depend, --infer, --only-preprocess, and exits # Grammar performs a number of analyses of the grammar # Lr0 constructs the LR(0) automaton # Slr determines whether the grammar is SLR # Lr1 constructs the LR(1) automaton # Conflict performs default conflict resolution and explains conflicts # Invariant performs a number of analyses of the automaton # Interpret deals with --interpret and exits # Back produces the output and exits MODULES := menhirLib Fix stringSet stringMap mark compressedBitSet \ unionFind tarjan patricia misc option \ breadth listMonad dot installation version settings time \ positions error parameters keyword lineCount printer \ action parserAux parser lexer partialGrammar \ parameterizedGrammar reachability unparameterizedPrinter \ preFront codeBits tokenType interface IO lexmli lexdep \ infer nonTerminalDefinitionInlining front grammar item lr0 \ slr lr1 lr1partial derivation conflict invariant codePieces \ sentenceParser sentenceLexer pprint cst \ referenceInterpreter interpret tableBackend codeBackend \ coqBackend traverse inliner back # ------------------------------------------------------------------------- # How to bootstrap. # Set TARGET to byte or opt depending on the desired architecture. ifndef TARGET TARGET := opt endif # The variable GOAL is the name of the executable file. GOAL := menhir.$(TARGET) # We create a symbolic link of GOAL to MENHIREXE. $(MENHIREXE): .versioncheck # Build a stage one executable using ocamlyacc. $(MAKE) -s PGEN="$(CAMLYACC)" PARSER=parser $(GOAL) # Remove the ocamlyacc-built parser. @/bin/rm -f parser.ml parser.mli # Build a stage two executable using the stage one executable (which is overwritten). $(MAKE) -s PGEN="./$(GOAL) $(PGFLAGS)" PARSER=fancy-parser $(GOAL) # Create a stage three parser and make sure that it is identical. @./$(GOAL) $(PGFLAGS) -b reference fancy-parser.mly 2>/dev/null @if diff parser.mli reference.mli 2>&1 >/dev/null ; then \ if diff parser.ml reference.ml 2>&1 >/dev/null ; then \ echo "Bootstrap successful." ; \ else \ echo "Bootstrap FAILED: the implementation files differ." && false ; \ fi ; \ else \ echo "Bootstrap FAILED: the interface files differ." && false ; \ fi @rm -f reference.ml reference.mli # Rename the stage two executable to the desired name. # Use a symbolic link, so that further development builds implicitly update # menhir. @ln -sf $(GOAL) $@ # ------------------------------------------------------------------------- # Linking. menhirLib.cmo menhirLib.cmi: $(LIBMODULES:=.cmo) $(CAMLC) $(BFLAGS) -pack -o menhirLib.cmo $^ menhirLib.cmx menhirLib.o: $(LIBMODULES:=.cmx) $(CAMLOPT) -pack -o menhirLib.cmx $^ menhir.byte: $(MODULES:=.cmo) $(CAMLC) -o $@ $(LNKBFLAGS) $(BLIBS) $^ menhir.opt: $(MODULES:=.cmx) $(CAMLOPT) -o $@ $(LNKOFLAGS) $(OLIBS) $^ # ------------------------------------------------------------------------- # Computing dependencies. This can be done in a simple way, even though # we exploit --infer, because we compile in two stages. Not a good example # of how to do it yourself -- have a look at demos/Makefile.shared. # For completeness, we must force ocamldep to understand that MenhirLib # is a module name. We do this by creating phantom source files for it. .depend: $(wildcard *.ml *.mli) $(GENERATED) @/bin/rm -f .depend for i in *.ml *.mli; do \ $(CAMLDEPWRAPPER) menhirLib.ml menhirLib.mli - $(CAMLDEP) $$i \ >> $@; \ done ifeq ($(findstring $(MAKECMDGOALS),$(COLD)),) -include .depend endif # ------------------------------------------------------------------------- # Cleaning up. clean:: /bin/rm -f menhir.byte menhir.opt $(MENHIREXE) /bin/rm -f *.cmi *.cmx *.cmo *.$(OBJ) *~ .*~ /bin/rm -f reference.ml reference.mli $(GENERATED) /bin/rm -f .depend *.conflicts *.automaton *.annot *.output # ------------------------------------------------------------------------- # Compiling. The parser source is found in $(PARSER).mly and is # processed using $(PGEN). # These two default definitions really shouldn't be necessary, but # there are corner cases where they are needed (e.g. when make is # invoked without a target and the .depend file is regenerated). ifndef PGEN PGEN := $(CAMLYACC) endif ifndef PARSER PARSER := parser endif %.cmi: %.mli $(CAMLC) $(BFLAGS) -c $< %.cmo: %.ml $(CAMLC) $(BFLAGS) -c $< # If the module that is being compiled is part of MenhirLib, add the # -for-pack option to the command line. This is required only when # compiling to native code (the bytecode compiler accepts but ignores # this option). PACK = $(shell if echo $(LIBMODULES) | grep $* >/dev/null ; then echo -for-pack MenhirLib ; else echo ; fi) %.cmx %.o: %.ml $(CAMLOPT) $(OFLAGS) $(PACK) -c $< # The source file for this parser varies. It is either parser.mly or # fancy-parser.mly. # parser.ml parser.mli: $(PARSER).mly @/bin/rm -f parser.ml parser.mli $(PGEN) -b parser $< # This parser must be built with ocamlyacc, because its client # watches for Parsing.Parse_error! # # Using ocamlyacc or Menhir interchangeably would be possible, # via an ocamlyacc wrapper that adds the definition "exception # Error = Parsing.Parse_error" at the end of the generated .ml # and .mli files. # sentenceParser.ml sentenceParser.mli : sentenceParser.mly @/bin/rm -f sentenceParser.ml sentenceParser.mli $(CAMLYACC) -b sentenceParser $< %.ml: %.mll @/bin/rm -f $@ $(CAMLLEX) $< # ---------------------------------------------------------------------------- # Checking the version of the ocaml compiler. # # We check the bytecode compiler only, because some architectures don't have # the native code compiler. We assume that both compilers, if present, are in # sync. # We build a bytecode executable (rather than use the toplevel loop) because # we need to load str.cma and some ocaml ports do not support dynamic loading # (e.g. ocaml 3.09, MacOS/Intel). .versioncheck: @ echo Checking that Objective Caml is recent enough... @ $(CAMLC) -o check-ocaml-version str.cma checkOCamlVersion.ml @ ./check-ocaml-version --verbose --gt "3.09" @ rm check-ocaml-version @ touch .versioncheck clean:: rm -f .versioncheck menhir-20130116/src/grammar.ml0000644000175000017500000007572712075533603015060 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open UnparameterizedSyntax open Syntax open Stretch open Positions (* ------------------------------------------------------------------------ *) (* Precedence levels for tokens or pseudo-tokens alike. *) module TokPrecedence = struct (* This set records, on a token by token basis, whether the token's precedence level is ever useful. This allows emitting warnings about useless precedence declarations. *) let ever_useful : StringSet.t ref = ref StringSet.empty let use id = ever_useful := StringSet.add id !ever_useful (* This function is invoked when someone wants to consult a token's precedence level. This does not yet mean that this level is useful, though. Indeed, if it is subsequently compared against [UndefinedPrecedence], it will not allow solving a conflict. So, in addition to the desired precedence level, we return a delayed computation which, when evaluated, records that this precedence level was useful. *) let levelip id properties = lazy (use id), properties.tk_priority let leveli id = let properties = try StringMap.find id Front.grammar.tokens with Not_found -> assert false (* well-formedness check has been performed earlier *) in levelip id properties (* This function is invoked after the automaton has been constructed. It warns about unused precedence levels. *) let diagnostics () = StringMap.iter (fun id properties -> if not (StringSet.mem id !ever_useful) then match properties.tk_priority with | UndefinedPrecedence -> () | PrecedenceLevel (_, _, pos1, pos2) -> Error.grammar_warning (Positions.two pos1 pos2) (Printf.sprintf "the precedence level assigned to %s is never useful." id) ) Front.grammar.tokens end (* ------------------------------------------------------------------------ *) (* Nonterminals. *) module Nonterminal = struct type t = int let n2i i = i let compare = (-) (* Determine how many nonterminals we have and build mappings both ways between names and indices. A new nonterminal is created for every start symbol. *) let new_start_nonterminals = StringSet.fold (fun symbol ss -> (symbol ^ "'") :: ss) Front.grammar.start_symbols [] let original_nonterminals = StringMap.fold (fun nt _ rules -> nt :: rules) Front.grammar.rules [] let start = List.length new_start_nonterminals let (n : int), (name : string array), (map : int StringMap.t) = Misc.index (new_start_nonterminals @ original_nonterminals) let () = Error.logG 1 (fun f -> Printf.fprintf f "Grammar has %d nonterminal symbols, among which %d start symbols.\n" (n - start) start ) let is_start nt = nt < start let print normalize nt = if normalize then Misc.normalize name.(nt) else name.(nt) let lookup name = StringMap.find name map let positions nt = (StringMap.find (print false nt) Front.grammar.rules).positions let iter f = Misc.iteri n f let fold f accu = Misc.foldi n f accu let map f = Misc.mapi n f let iterx f = for nt = start to n - 1 do f nt done let foldx f accu = Misc.foldij start n f accu let ocamltype nt = assert (not (is_start nt)); try Some (StringMap.find (print false nt) Front.grammar.types) with Not_found -> None let tabulate f = Array.get (Array.init n f) end (* Sets and maps over nonterminals, used only below. *) module NonterminalMap = Patricia.Big module NonterminalSet = Patricia.Big.Domain (* ------------------------------------------------------------------------ *) (* Terminals. *) module Terminal = struct type t = int let t2i i = i let compare = (-) let equal (tok1 : t) (tok2 : t) = tok1 = tok2 (* Determine how many terminals we have and build mappings both ways between names and indices. A new terminal "#" is created. A new terminal "error" is created. The fact that the integer code assigned to the "error" pseudo-terminal is the last one is exploited in the table-based back-end. Pseudo-tokens (used in %prec declarations, but never declared using %token) are filtered out. *) let (n : int), (name : string array), (map : int StringMap.t) = let tokens = StringMap.fold (fun token properties tokens -> if properties.tk_is_declared then token :: tokens else tokens ) Front.grammar.tokens [] in match tokens with | [] -> Error.error [] "no tokens have been declared." | _ -> Misc.index ("error" :: tokens @ [ "#" ]) let print tok = name.(tok) let lookup name = StringMap.find name map let sharp = lookup "#" let error = lookup "error" let pseudo tok = (tok = sharp) || (tok = error) let token_properties = let not_so_dummy_properties = (* applicable to [error] and [#] *) { tk_filename = "__primitives__"; tk_priority = UndefinedPrecedence; tk_associativity = UndefinedAssoc; tk_ocamltype = None; tk_is_declared = true; tk_position = Positions.dummy; } in Array.init n (fun tok -> try StringMap.find name.(tok) Front.grammar.tokens with Not_found -> assert (tok = sharp || tok = error); not_so_dummy_properties ) let () = Error.logG 1 (fun f -> Printf.fprintf f "Grammar has %d terminal symbols.\n" (n - 2) ) let precedence_level tok = TokPrecedence.levelip (print tok) token_properties.(tok) let associativity tok = token_properties.(tok).tk_associativity let ocamltype tok = token_properties.(tok).tk_ocamltype let iter f = Misc.iteri n f let fold f accu = Misc.foldi n f accu let map f = Misc.mapi n f let mapx f = assert (sharp = n - 1); Misc.mapi (n-1) f (* If a token named [EOF] exists, then it is assumed to represent ocamllex's [eof] pattern, which means that the lexer may eventually produce an infinite stream of [EOF] tokens. This, combined with our error recovery mechanism, may lead to non-termination. We provide a warning against this somewhat obscure situation. Relying on the token's name is somewhat fragile, but this saves introducing an extra keyword for declaring which token represents [eof], and should not introduce much confusion. *) let eof = try Some (lookup "EOF") with Not_found -> None end (* Sets of terminals are used intensively in the LR(1) construction, so it is important that they be as efficient as possible. *) module TerminalSet = struct include CompressedBitSet let print toks = let _, accu = fold (fun tok (first, accu) -> false, if first then accu ^ (Terminal.print tok) else accu ^ " " ^ (Terminal.print tok) ) toks (true, "") in accu let universe = remove Terminal.sharp ( remove Terminal.error ( Terminal.fold add empty ) ) end (* Maps over terminals. *) module TerminalMap = Patricia.Big (* ------------------------------------------------------------------------ *) (* Symbols. *) module Symbol = struct type t = | N of Nonterminal.t | T of Terminal.t let compare sym1 sym2 = match sym1, sym2 with | N nt1, N nt2 -> Nonterminal.compare nt1 nt2 | T tok1, T tok2 -> Terminal.compare tok1 tok2 | N _, T _ -> 1 | T _, N _ -> -1 let equal sym1 sym2 = compare sym1 sym2 = 0 let rec lequal syms1 syms2 = match syms1, syms2 with | [], [] -> true | sym1 :: syms1, sym2 :: syms2 -> equal sym1 sym2 && lequal syms1 syms2 | _ :: _, [] | [], _ :: _ -> false let print = function | N nt -> Nonterminal.print false nt | T tok -> Terminal.print tok let nonterminal = function | T _ -> false | N _ -> true (* Printing an array of symbols. [offset] is the start offset -- we print everything to its right. [dot] is the dot offset -- we print a dot at this offset, if we find it. *) let printaod offset dot symbols = let buffer = Buffer.create 512 in let length = Array.length symbols in for i = offset to length do if i = dot then Buffer.add_string buffer ". "; if i < length then begin Buffer.add_string buffer (print symbols.(i)); Buffer.add_char buffer ' ' end done; Buffer.contents buffer let printao offset symbols = printaod offset (-1) symbols let printa symbols = printao 0 symbols let printl symbols = printa (Array.of_list symbols) let lookup name = try T (Terminal.lookup name) with Not_found -> try N (Nonterminal.lookup name) with Not_found -> assert false (* well-formedness check has been performed earlier *) end (* Sets of symbols. *) module SymbolSet = Set.Make(Symbol) (* Maps over symbols. *) module SymbolMap = struct include Map.Make(Symbol) let domain m = fold (fun symbol _ accu -> symbol :: accu ) m [] let purelynonterminal m = fold (fun symbol _ accu -> accu && Symbol.nonterminal symbol ) m true end (* ------------------------------------------------------------------------ *) (* Productions. *) module Production = struct type index = int (* Create an array of productions. Record which productions are associated with every nonterminal. A new production S' -> S is created for every start symbol S. It is known as a start production. *) let n : int = let n = StringMap.fold (fun _ { branches = branches } n -> n + List.length branches ) Front.grammar.rules 0 in Error.logG 1 (fun f -> Printf.fprintf f "Grammar has %d productions.\n" n); n + StringSet.cardinal Front.grammar.start_symbols let p2i prod = prod let i2p prod = assert (prod >= 0 && prod < n); prod let table : (Nonterminal.t * Symbol.t array) array = Array.make n (-1, [||]) let identifiers : identifier array array = Array.make n [||] let used : bool array array = Array.make n [||] let actions : action option array = Array.make n None let ntprods : (int * int) array = Array.make Nonterminal.n (-1, -1) let positions : Positions.t list array = Array.make n [] let (start : int), (startprods : index NonterminalMap.t) = StringSet.fold (fun nonterminal (k, startprods) -> let nt = Nonterminal.lookup nonterminal and nt' = Nonterminal.lookup (nonterminal ^ "'") in table.(k) <- (nt', [| Symbol.N nt |]); identifiers.(k) <- [| "_1" |]; used.(k) <- [| true |]; ntprods.(nt') <- (k, k+1); positions.(k) <- Nonterminal.positions nt; k+1, NonterminalMap.add nt k startprods ) Front.grammar.start_symbols (0, NonterminalMap.empty) let prec_decl : symbol located option array = Array.make n None let reduce_precedence : precedence_level array = Array.make n UndefinedPrecedence let (_ : int) = StringMap.fold (fun nonterminal { branches = branches } k -> let nt = Nonterminal.lookup nonterminal in let k' = List.fold_left (fun k branch -> let action = branch.action and sprec = branch.branch_shift_precedence and rprec = branch.branch_reduce_precedence in let symbols = Array.of_list branch.producers in table.(k) <- (nt, Array.map (fun (v, _) -> Symbol.lookup v) symbols); identifiers.(k) <- Array.mapi (fun i (_, ido) -> match ido with | None -> (* Symbols for which no name was chosen will be represented by variables named _1, _2, etc. *) Printf.sprintf "_%d" (i + 1) | Some id -> (* Symbols for which a name was explicitly chosen will be known by that name in semantic actions. *) id ) symbols; used.(k) <- Array.mapi (fun i (_, ido) -> match ido with | None -> (* A symbol referred to as [$i] is used if and only if the [$i] keyword appears in the semantic action. *) Action.has_dollar (i + 1) action | Some _ -> (* A symbol referred to via a name is considered used. This is a conservative approximation. *) true ) symbols; actions.(k) <- Some action; reduce_precedence.(k) <- rprec; prec_decl.(k) <- sprec; positions.(k) <- [ branch.branch_position ]; k+1 ) k branches in ntprods.(nt) <- (k, k'); k' ) Front.grammar.rules start (* Iteration over the productions associated with a specific nonterminal. *) let iternt nt f = let k, k' = ntprods.(nt) in for prod = k to k' - 1 do f prod done let foldnt (nt : Nonterminal.t) (accu : 'a) (f : index -> 'a -> 'a) : 'a = let k, k' = ntprods.(nt) in let rec loop accu prod = if prod < k' then loop (f prod accu) (prod + 1) else accu in loop accu k (* Accessors. *) let def prod = table.(prod) let nt prod = let nt, _ = table.(prod) in nt let rhs prod = let _, rhs = table.(prod) in rhs let length prod = Array.length (rhs prod) let identifiers prod = identifiers.(prod) let used prod = used.(prod) let is_start prod = prod < start let classify prod = if is_start prod then match (rhs prod).(0) with | Symbol.N nt -> Some nt | Symbol.T _ -> assert false else None let action prod = match actions.(prod) with | Some action -> action | None -> (* Start productions have no action. *) assert (is_start prod); assert false let positions prod = positions.(prod) let startsymbol2startprod nt = try NonterminalMap.find nt startprods with Not_found -> assert false (* [nt] is not a start symbol *) (* Iteration. *) let iter f = Misc.iteri n f let fold f accu = Misc.foldi n f accu let map f = Misc.mapi n f let iterx f = for prod = start to n - 1 do f prod done let foldx f accu = Misc.foldij start n f accu (* Printing a production. *) let print prod = assert (not (is_start prod)); let nt, rhs = table.(prod) in Printf.sprintf "%s -> %s" (Nonterminal.print false nt) (Symbol.printao 0 rhs) (* Tabulation. *) let tabulate f = Misc.tabulate n f let tabulateb f = Misc.tabulateb n f (* This array allows recording, on a production by production basis, whether the production's shift precedence is ever useful. This allows emitting warnings about useless %prec declarations. *) let prec_decl_ever_useful = Array.make n false let consult_prec_decl prod = lazy (prec_decl_ever_useful.(prod) <- true), prec_decl.(prod) let diagnostics () = iterx (fun prod -> if not prec_decl_ever_useful.(prod) then match prec_decl.(prod) with | None -> () | Some id -> Error.grammar_warning [Positions.position id] "this %prec declaration is never useful." ) (* Determining the precedence level of a production. If no %prec declaration was explicitly supplied, it is the precedence level of the rightmost terminal symbol in the production's right-hand side. *) type production_level = | PNone | PRightmostToken of Terminal.t | PPrecDecl of symbol let rightmost_terminal prod = Array.fold_left (fun accu symbol -> match symbol with | Symbol.T tok -> PRightmostToken tok | Symbol.N _ -> accu ) PNone (rhs prod) let combine e1 e2 = lazy (Lazy.force e1; Lazy.force e2) let shift_precedence prod = let fact1, prec_decl = consult_prec_decl prod in let oterminal = match prec_decl with | None -> rightmost_terminal prod | Some { value = terminal } -> PPrecDecl terminal in match oterminal with | PNone -> fact1, UndefinedPrecedence | PRightmostToken tok -> let fact2, level = Terminal.precedence_level tok in combine fact1 fact2, level | PPrecDecl id -> let fact2, level = TokPrecedence.leveli id in combine fact1 fact2, level end (* ------------------------------------------------------------------------ *) (* Maps over productions. *) module ProductionMap = struct include Patricia.Big (* Iteration over the start productions only. *) let start f = Misc.foldi Production.start (fun prod m -> add prod (f prod) m ) empty end (* ------------------------------------------------------------------------ *) (* Build the grammar's forward and backward reference graphs. In the backward reference graph, edges relate each nonterminal [nt] to each of the nonterminals whose definition mentions [nt]. The reverse reference graph is used in the computation of the nullable, nonempty, and FIRST sets. The forward reference graph is unused but can be printed on demand. *) let forward : NonterminalSet.t array = Array.create Nonterminal.n NonterminalSet.empty let backward : NonterminalSet.t array = Array.create Nonterminal.n NonterminalSet.empty let () = Array.iter (fun (nt1, rhs) -> Array.iter (function | Symbol.T _ -> () | Symbol.N nt2 -> forward.(nt1) <- NonterminalSet.add nt2 forward.(nt1); backward.(nt2) <- NonterminalSet.add nt1 backward.(nt2) ) rhs ) Production.table (* ------------------------------------------------------------------------ *) (* If requested, dump the forward reference graph. *) let () = if Settings.graph then let module P = Dot.Print (struct type vertex = Nonterminal.t let name nt = Printf.sprintf "nt%d" nt let successors (f : ?style:Dot.style -> label:string -> vertex -> unit) nt = NonterminalSet.iter (fun successor -> f ~label:"" successor ) forward.(nt) let iter (f : ?style:Dot.style -> label:string -> vertex -> unit) = Nonterminal.iter (fun nt -> f ~label:(Nonterminal.print false nt) nt ) end) in let f = open_out (Settings.base ^ ".dot") in P.print f; close_out f (* ------------------------------------------------------------------------ *) (* Generic support for fixpoint computations. A fixpoint computation associates a property with every nonterminal. A monotone function tells how properties are computed. [compute nt] updates the property associated with nonterminal [nt] and returns a flag that tells whether the property actually needed an update. The state of the computation is maintained entirely inside [compute] and is invisible here. Whenever a property of [nt] is updated, the properties of the terminals whose definitions depend on [nt] are updated. The dependency graph must be explicitly supplied. *) let fixpoint (dependencies : NonterminalSet.t array) (compute : Nonterminal.t -> bool) : unit = let queue : Nonterminal.t Queue.t = Queue.create () in let onqueue : bool array = Array.make Nonterminal.n true in for i = 0 to Nonterminal.n - 1 do Queue.add i queue done; Misc.qiter (fun nt -> onqueue.(nt) <- false; let changed = compute nt in if changed then NonterminalSet.iter (fun nt -> if not onqueue.(nt) then begin Queue.add nt queue; onqueue.(nt) <- true end ) dependencies.(nt) ) queue (* ------------------------------------------------------------------------ *) (* Compute which nonterminals are nonempty, that is, recognize a nonempty language. Also, compute which nonterminals are nullable. The two computations are almost identical. The only difference is in the base case: a single terminal symbol is not nullable, but is nonempty. *) let compute (basecase : bool) : (bool array) * (Symbol.t -> bool) = let property : bool array = Array.make Nonterminal.n false in let symbol_has_property = function | Symbol.T _ -> basecase | Symbol.N nt -> property.(nt) in fixpoint backward (fun nt -> if property.(nt) then false (* no change *) else (* disjunction over all productions for this nonterminal *) let updated = Production.foldnt nt false (fun prod accu -> accu || let rhs = Production.rhs prod in (* conjunction over all symbols in the right-hand side *) Array.fold_left (fun accu symbol -> accu && symbol_has_property symbol ) true rhs ) in property.(nt) <- updated; updated ); property, symbol_has_property let () = let nonempty, _ = compute true in for nt = Nonterminal.start to Nonterminal.n - 1 do if not nonempty.(nt) then Error.grammar_warning (Nonterminal.positions nt) (Printf.sprintf "%s generates the empty language." (Nonterminal.print false nt)) done let (nullable : bool array), (nullable_symbol : Symbol.t -> bool) = compute false (* ------------------------------------------------------------------------ *) (* Compute FIRST sets. *) let first = Array.make Nonterminal.n TerminalSet.empty let first_symbol = function | Symbol.T tok -> TerminalSet.singleton tok | Symbol.N nt -> first.(nt) let nullable_first_rhs (rhs : Symbol.t array) (i : int) : bool * TerminalSet.t = let length = Array.length rhs in assert (i <= length); let rec loop i toks = if i = length then true, toks else let symbol = rhs.(i) in let toks = TerminalSet.union (first_symbol symbol) toks in if nullable_symbol symbol then loop (i+1) toks else false, toks in loop i TerminalSet.empty let () = fixpoint backward (fun nt -> let original = first.(nt) in (* union over all productions for this nonterminal *) let updated = Production.foldnt nt TerminalSet.empty (fun prod accu -> let rhs = Production.rhs prod in let _, toks = nullable_first_rhs rhs 0 in TerminalSet.union toks accu ) in first.(nt) <- updated; TerminalSet.compare original updated <> 0 ) (* ------------------------------------------------------------------------ *) (* Dump the analysis results. *) let () = Error.logG 2 (fun f -> for nt = 0 to Nonterminal.n - 1 do Printf.fprintf f "nullable(%s) = %b\n" (Nonterminal.print false nt) nullable.(nt) done; for nt = 0 to Nonterminal.n - 1 do Printf.fprintf f "first(%s) = %s\n" (Nonterminal.print false nt) (TerminalSet.print first.(nt)) done ) let () = Time.tick "Analysis of the grammar" (* ------------------------------------------------------------------------ *) (* Compute FOLLOW sets. Unnecessary for us, but requested by a user. Also, this is useful for the SLR(1) test. Thus, we perform this analysis only on demand. *) let follow : TerminalSet.t array Lazy.t = lazy ( let follow = Array.make Nonterminal.n TerminalSet.empty and forward : NonterminalSet.t array = Array.create Nonterminal.n NonterminalSet.empty and backward : NonterminalSet.t array = Array.create Nonterminal.n NonterminalSet.empty in (* Iterate over all start symbols. *) for nt = 0 to Nonterminal.start - 1 do assert (Nonterminal.is_start nt); (* Add # to FOLLOW(nt). *) follow.(nt) <- TerminalSet.singleton Terminal.sharp done; (* We need to do this explicitly because our start productions are of the form S' -> S, not S' -> S #, so # will not automatically appear into FOLLOW(S) when the start productions are examined. *) (* Iterate over all productions. *) Array.iter (fun (nt1, rhs) -> (* Iterate over all nonterminal symbols [nt2] in the right-hand side. *) Array.iteri (fun i symbol -> match symbol with | Symbol.T _ -> () | Symbol.N nt2 -> let nullable, first = nullable_first_rhs rhs (i+1) in (* The FIRST set of the remainder of the right-hand side contributes to the FOLLOW set of [nt2]. *) follow.(nt2) <- TerminalSet.union first follow.(nt2); (* If the remainder of the right-hand side is nullable, FOLLOW(nt1) contributes to FOLLOW(nt2). *) if nullable then begin forward.(nt1) <- NonterminalSet.add nt2 forward.(nt1); backward.(nt2) <- NonterminalSet.add nt1 backward.(nt2) end ) rhs ) Production.table; (* The fixpoint computation used here is not the most efficient algorithm -- one could do better by first collapsing the strongly connected components, then walking the graph in topological order. But this will do. *) fixpoint forward (fun nt -> let original = follow.(nt) in (* union over all contributors *) let updated = NonterminalSet.fold (fun nt' accu -> TerminalSet.union follow.(nt') accu ) backward.(nt) original in follow.(nt) <- updated; TerminalSet.compare original updated <> 0 ); follow ) (* Define an accessor that triggers the computation of the FOLLOW sets if it has not been performed already. *) let follow nt = (Lazy.force follow).(nt) (* At log level 2, display the FOLLOW sets. *) let () = Error.logG 2 (fun f -> for nt = 0 to Nonterminal.n - 1 do Printf.fprintf f "follow(%s) = %s\n" (Nonterminal.print false nt) (TerminalSet.print (follow nt)) done ) (* Compute FOLLOW sets for the terminal symbols as well. Again, unnecessary for us, but requested by a user. This is done in a single pass over the grammar -- no new fixpoint computation is required. *) let tfollow : TerminalSet.t array Lazy.t = lazy ( let tfollow = Array.make Terminal.n TerminalSet.empty in (* Iterate over all productions. *) Array.iter (fun (nt1, rhs) -> (* Iterate over all terminal symbols [t2] in the right-hand side. *) Array.iteri (fun i symbol -> match symbol with | Symbol.N _ -> () | Symbol.T t2 -> let nullable, first = nullable_first_rhs rhs (i+1) in (* The FIRST set of the remainder of the right-hand side contributes to the FOLLOW set of [t2]. *) tfollow.(t2) <- TerminalSet.union first tfollow.(t2); (* If the remainder of the right-hand side is nullable, FOLLOW(nt1) contributes to FOLLOW(t2). *) if nullable then tfollow.(t2) <- TerminalSet.union (follow nt1) tfollow.(t2) ) rhs ) Production.table; tfollow ) (* Define another accessor. *) let tfollow t = (Lazy.force tfollow).(t) (* At log level 3, display the FOLLOW sets for terminal symbols. *) let () = Error.logG 3 (fun f -> for t = 0 to Terminal.n - 1 do Printf.fprintf f "follow(%s) = %s\n" (Terminal.print t) (TerminalSet.print (tfollow t)) done ) (* ------------------------------------------------------------------------ *) (* Provide explanations about FIRST sets. *) (* The idea is to explain why a certain token appears in the FIRST set for a certain sequence of symbols. Such an explanation involves basic assertions of the form (i) symbol N is nullable and (ii) the token appears in the FIRST set for symbol N. We choose to take these basic facts for granted, instead of recursively explaining them, so as to keep explanations short. *) (* We first produce an explanation in abstract syntax, then convert it to a human-readable string. *) type explanation = | EObvious (* sequence begins with desired token *) | EFirst of Terminal.t * Nonterminal.t (* sequence begins with a nonterminal that produces desired token *) | ENullable of Symbol.t list * explanation (* sequence begins with a list of nullable symbols and ... *) let explain (tok : Terminal.t) (rhs : Symbol.t array) (i : int) = let length = Array.length rhs in let rec loop i = assert (i < length); let symbol = rhs.(i) in match symbol with | Symbol.T tok' -> assert (Terminal.equal tok tok'); EObvious | Symbol.N nt -> if TerminalSet.mem tok first.(nt) then EFirst (tok, nt) else begin assert nullable.(nt); match loop (i + 1) with | ENullable (symbols, e) -> ENullable (symbol :: symbols, e) | e -> ENullable ([ symbol ], e) end in loop i let rec convert = function | EObvious -> "" | EFirst (tok, nt) -> Printf.sprintf "%s can begin with %s" (Nonterminal.print false nt) (Terminal.print tok) | ENullable (symbols, e) -> let e = convert e in Printf.sprintf "%scan vanish%s%s" (Symbol.printl symbols) (if e = "" then "" else " and ") e (* ------------------------------------------------------------------------ *) (* Package the analysis results. *) module Analysis = struct let nullable_first_rhs = nullable_first_rhs let explain_first_rhs (tok : Terminal.t) (rhs : Symbol.t array) (i : int) = convert (explain tok rhs i) let follow = follow end (* ------------------------------------------------------------------------ *) (* Conflict resolution via precedences. *) module Precedence = struct type choice = | ChooseShift | ChooseReduce | ChooseNeither | DontKnow type order = Lt | Gt | Eq | Ic let precedence_order p1 p2 = match p1, p2 with | UndefinedPrecedence, _ | _, UndefinedPrecedence -> Ic | PrecedenceLevel (m1, l1, _, _), PrecedenceLevel (m2, l2, _, _) -> if not (Mark.same m1 m2) then Ic else if l1 > l2 then Gt else if l1 < l2 then Lt else Eq let shift_reduce tok prod = let fact1, tokp = Terminal.precedence_level tok and fact2, prodp = Production.shift_precedence prod in match precedence_order tokp prodp with (* Our information is inconclusive. Drop [fact1] and [fact2], that is, do not record that this information was useful. *) | Ic -> DontKnow (* Our information is useful. Record that fact by evaluating [fact1] and [fact2]. *) | (Eq | Lt | Gt) as c -> Lazy.force fact1; Lazy.force fact2; match c with | Ic -> assert false (* already dispatched *) | Eq -> begin match Terminal.associativity tok with | LeftAssoc -> ChooseReduce | RightAssoc -> ChooseShift | NonAssoc -> ChooseNeither | _ -> assert false (* If [tok]'s precedence level is defined, then its associativity must be defined as well. *) end | Lt -> ChooseReduce | Gt -> ChooseShift let reduce_reduce prod1 prod2 = let rp1 = Production.reduce_precedence.(prod1) and rp2 = Production.reduce_precedence.(prod2) in match precedence_order rp1 rp2 with | Lt -> Some prod1 | Gt -> Some prod2 | Eq -> (* the order is strict except in presence of inlining: two branches can have the same precedence level when they come from an inlined one. *) None | Ic -> None end let diagnostics () = TokPrecedence.diagnostics(); Production.diagnostics() menhir-20130116/src/derivation.ml0000644000175000017500000002276012075533603015563 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar (* -------------------------------------------------------------------------- *) (* This is a data structure for linear derivation trees. These are derivation trees that are list-like (that is, they do not branch), because a single path is of interest. A tree is either empty or formed of a non-terminal symbol at the root and a forest below the root. A forest is an ordered list of elements. However, its elements are not trees, as one would perhaps expect. Because we are interested in *linear* derivation trees, only one element of the forest receives focus and is a tree. All other elements remain un-expanded, so they are just symbols. In other words, a linear derivation tree is roughly just a list of levels, where each forest corresponds to one level. *) type 'focus level = { prefix: Symbol.t list; focus: 'focus; suffix: Symbol.t list; comment: string } type tree = | TEmpty | TRooted of Symbol.t * forest and forest = tree level (* We make use of contexts with a forest-shaped hole. We have tree contexts and forest contexts. Tree contexts do not have a case for holes, since we work with forest-shaped holes only. Forest contexts have one. *) type ctree = | CRooted of Symbol.t * cforest and cforest = | CHole | CCons of ctree level (* Make a few types visible to clients. *) type t = forest type context = cforest (* -------------------------------------------------------------------------- *) (* Construction. *) let rec array_to_list a i j = if i = j then [] else a.(i) :: array_to_list a (i + 1) j let empty = { prefix = []; focus = TEmpty; suffix = []; comment = "" } let tail pos rhs = let length = Array.length rhs in assert (pos < length); { prefix = []; focus = TEmpty; suffix = array_to_list rhs pos length; comment = "" } let build pos rhs forest comment = let length = Array.length rhs in assert (pos < length); match rhs.(pos) with | Symbol.T _ -> assert false | Symbol.N _ as symbol -> { prefix = []; focus = TRooted (symbol, forest); suffix = array_to_list rhs (pos + 1) length; comment = (match comment with None -> "" | Some comment -> comment) } let prepend symbol forest = { forest with prefix = symbol :: forest.prefix } (* -------------------------------------------------------------------------- *) (* Display. *) let buffer = Buffer.create 32768 let rec print_blank k = if k > 0 then begin Buffer.add_char buffer ' '; print_blank (k - 1) end let print_symbol symbol = let word = Symbol.print symbol in Buffer.add_string buffer word; Buffer.add_char buffer ' '; String.length word + 1 let print_symbols symbols = List.fold_left (fun offset symbol -> offset + print_symbol symbol ) 0 symbols let print_level print_focus_root print_focus_remainder offset forest = print_blank offset; let offset = offset + print_symbols forest.prefix in print_focus_root forest.focus; let (_ : int) = print_symbols forest.suffix in if String.length forest.comment > 0 then begin Buffer.add_string buffer "// "; Buffer.add_string buffer forest.comment end; Buffer.add_char buffer '\n'; print_focus_remainder offset forest.focus let print_tree_root = function | TEmpty -> Buffer.add_string buffer ". " | TRooted (symbol, _) -> let (_ : int) = print_symbol symbol in () let rec print_forest offset forest = print_level print_tree_root print_tree_remainder offset forest and print_tree_remainder offset = function | TEmpty -> () | TRooted (_, forest) -> print_forest offset forest let print_ctree_root = function | CRooted (symbol, _) -> let (_ : int) = print_symbol symbol in () let rec print_cforest offset cforest = match cforest with | CHole -> print_blank offset; Buffer.add_string buffer "(?)\n" | CCons forest -> print_level print_ctree_root print_ctree_remainder offset forest and print_ctree_remainder offset = function | CRooted (_, cforest) -> print_cforest offset cforest let wrap print channel x = Buffer.clear buffer; print 0 x; Buffer.output_buffer channel buffer let print = wrap print_forest let printc = wrap print_cforest (* -------------------------------------------------------------------------- *) (* [punch] turns a (tree or forest) into a pair of a (tree or forest) context and a residual forest, where the context is chosen maximal. In other words, the residual forest consists of a single level -- its focus is [TEmpty]. *) let rec punch_tree tree : (ctree * forest) option = match tree with | TEmpty -> None | TRooted (symbol, forest) -> let forest1, forest2 = punch_forest forest in Some (CRooted (symbol, forest1), forest2) and punch_forest forest : cforest * forest = match punch_tree forest.focus with | None -> CHole, forest | Some (ctree1, forest2) -> CCons { prefix = forest.prefix; focus = ctree1; suffix = forest.suffix; comment = forest.comment }, forest2 (* [fill] fills a (tree or forest) context with a forest so as to produce a new (tree or forest). *) let rec fill_tree ctree1 forest2 : tree = match ctree1 with | CRooted (symbol1, cforest1) -> TRooted (symbol1, fill_forest cforest1 forest2) and fill_forest cforest1 forest2 : forest = match cforest1 with | CHole -> forest2 | CCons level1 -> { prefix = level1.prefix; focus = fill_tree level1.focus forest2; suffix = level1.suffix; comment = level1.comment } (* [common] factors the maximal common (tree or forest) context out of a pair of a (tree or forest) context and a (tree or forest). It returns the (tree or forest) context as well as the residuals of the two parameters. *) let rec common_tree ctree1 tree2 : (ctree * cforest * forest) option = match ctree1, tree2 with | CRooted _, TEmpty -> None | CRooted (symbol1, cforest1), TRooted (symbol2, forest2) -> if Symbol.equal symbol1 symbol2 then let cforest, cforest1, forest2 = common_forest cforest1 forest2 in Some (CRooted (symbol1, cforest), cforest1, forest2) else None and common_forest cforest1 forest2 : cforest * cforest * forest = match cforest1 with | CHole -> CHole, cforest1, forest2 | CCons forest1 -> if Symbol.lequal forest1.prefix forest2.prefix && Symbol.lequal forest1.suffix forest2.suffix && forest1.comment = forest2.comment then begin match common_tree forest1.focus forest2.focus with | None -> CHole, cforest1, forest2 | Some (ctree, csubforest1, subforest2) -> let cforest = { prefix = forest1.prefix; focus = ctree; suffix = forest1.suffix; comment = forest1.comment } in CCons cforest, csubforest1, subforest2 end else CHole, cforest1, forest2 (* [factor] factors the maximal common forest context out of a nonempty family of forests. We assume that the family is represented as a map indexed by items, because this is convenient for the application that we have in mind, but this assumption is really irrelevant. *) let rec factor forests = match Item.Map.fold (fun item forest accu -> match accu with | None -> (* First time through the loop, so [forest] is the first forest that we examine. Punch it, so as to produce a maximal forest context and a residual forest. *) let context, residual = punch_forest forest in Some (context, Item.Map.singleton item residual) | Some (context, residuals) -> (* Another iteration through the loop. [context] and [residuals] are the maximal common context and the residuals of the forests examined so far. *) (* Combine the common context obtained so far with the forest at hand. This yields a new, smaller common context, as well as residuals for the previous common context and for the forest at hand. *) let context, contextr, forestr = common_forest context forest in (* The residual forests are now: (i) the residual forest [forestr]; and (ii) the previous residual forests [residuals], each of which must be placed with the residual context [contextr]. *) let residuals = Item.Map.add item forestr (Item.Map.map (fill_forest contextr) residuals) in Some (context, residuals) ) forests None with | None -> assert false (* parameter [forests] was an empty map *) | Some (context, residuals) -> context, residuals menhir-20130116/src/unionFind.ml0000644000175000017500000001277612075533603015356 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: unionFind.ml,v 1.5 2005/12/01 16:20:07 regisgia Exp $ *) (** This module implements a simple and efficient union/find algorithm. See Robert E. Tarjan, ``Efficiency of a Good But Not Linear Set Union Algorithm'', JACM 22(2), 1975. *) (** The abstraction defined by this module is a set of points, partitioned into equivalence classes. With each equivalence class, a piece of information, of abstract type ['a], is associated; we call it a descriptor. A point is implemented as a cell, whose (mutable) contents consist of a single link to either information about the equivalence class, or another point. Thus, points form a graph, which must be acyclic, and whose connected components are the equivalence classes. In every equivalence class, exactly one point has no outgoing edge, and carries information about the class instead. It is the class's representative element. Information about a class consists of an integer weight (the number of elements in the class) and of the class's descriptor. *) type 'a point = { mutable link: 'a link } and 'a link = | Info of 'a info | Link of 'a point and 'a info = { mutable weight: int; mutable descriptor: 'a } (** [fresh desc] creates a fresh point and returns it. It forms an equivalence class of its own, whose descriptor is [desc]. *) let fresh desc = { link = Info { weight = 1; descriptor = desc } } (** [repr point] returns the representative element of [point]'s equivalence class. It is found by starting at [point] and following the links. For efficiency, the function performs path compression at the same time. *) let rec repr point = match point.link with | Link point' -> let point'' = repr point' in if point'' != point' then (* [point''] is [point']'s representative element. Because we just invoked [repr point'], [point'.link] must be [Link point'']. We write this value into [point.link], thus performing path compression. Note that this function never performs memory allocation. *) point.link <- point'.link; point'' | Info _ -> point (** [find point] returns the descriptor associated with [point]'s equivalence class. *) let rec find point = (* By not calling [repr] immediately, we optimize the common cases where the path starting at [point] has length 0 or 1, at the expense of the general case. *) match point.link with | Info info | Link { link = Info info } -> info.descriptor | Link { link = Link _ } -> find (repr point) let rec change point v = match point.link with | Info info | Link { link = Info info } -> info.descriptor <- v | Link { link = Link _ } -> change (repr point) v (** [union point1 point2] merges the equivalence classes associated with [point1] and [point2] (which must be distinct) into a single class whose descriptor is that originally associated with [point2]. The fact that [point1] and [point2] do not originally belong to the same class guarantees that we do not create a cycle in the graph. The weights are used to determine whether [point1] should be made to point to [point2], or vice-versa. By making the representative of the smaller class point to that of the larger class, we guarantee that paths remain of logarithmic length (not accounting for path compression, which makes them yet smaller). *) let union point1 point2 = let point1 = repr point1 and point2 = repr point2 in assert (point1 != point2); match point1.link, point2.link with | Info info1, Info info2 -> let weight1 = info1.weight and weight2 = info2.weight in if weight1 >= weight2 then begin point2.link <- Link point1; info1.weight <- weight1 + weight2; info1.descriptor <- info2.descriptor end else begin point1.link <- Link point2; info2.weight <- weight1 + weight2 end | _, _ -> assert false (* [repr] guarantees that [link] matches [Info _]. *) (** [equivalent point1 point2] tells whether [point1] and [point2] belong to the same equivalence class. *) let equivalent point1 point2 = repr point1 == repr point2 (** [eunion point1 point2] is identical to [union], except it does nothing if [point1] and [point2] are already equivalent. *) let eunion point1 point2 = if not (equivalent point1 point2) then union point1 point2 (** [redundant] maps all members of an equivalence class, but one, to [true]. *) let redundant = function | { link = Link _ } -> true | { link = Info _ } -> false menhir-20130116/src/positions.ml0000644000175000017500000000762312075533603015447 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: positions.ml,v 1.9 2005/12/01 16:20:07 regisgia Exp $ *) (* TEMPORARY vrifier que ces fonctions sont utilises partout et de faon cohrente; interaction avec [Error]? *) open Lexing type t = { start_p : Lexing.position; end_p : Lexing.position } type 'a located = { value : 'a; position : t; } let value { value = v } = v let position { position = p } = p let with_pos p v = { value = v; position = p; } let with_poss p1 p2 v = with_pos { start_p = p1; end_p = p2 } v let map f v = { value = f v.value; position = v.position; } let iter f { value = v } = f v let mapd f v = let w1, w2 = f v.value in let pos = v.position in { value = w1; position = pos }, { value = w2; position = pos } let dummy = { start_p = Lexing.dummy_pos; end_p = Lexing.dummy_pos } let unknown_pos v = { value = v; position = dummy } let start_of_position p = p.start_p let end_of_position p = p.end_p let filename_of_position p = p.start_p.Lexing.pos_fname let line p = p.pos_lnum let column p = p.pos_cnum - p.pos_bol let characters p1 p2 = (column p1, p2.pos_cnum - p1.pos_bol) (* intentionally [p1.pos_bol] *) let join x1 x2 = { start_p = if x1 = dummy then x2.start_p else x1.start_p; end_p = if x2 = dummy then x1.end_p else x2.end_p } let lex_join x1 x2 = { start_p = x1; end_p = x2 } let join_located l1 l2 f = { value = f l1.value l2.value; position = join l1.position l2.position; } let string_of_lex_pos p = let c = p.pos_cnum - p.pos_bol in (string_of_int p.pos_lnum)^":"^(string_of_int c) let string_of_pos p = let filename = filename_of_position p in assert (filename <> ""); let l = line p.start_p in let c1, c2 = characters p.start_p p.end_p in Printf.sprintf "File \"%s\", line %d, characters %d-%d" filename l c1 c2 let pos_or_undef = function | None -> dummy | Some x -> x let cpos lexbuf = { start_p = Lexing.lexeme_start_p lexbuf; end_p = Lexing.lexeme_end_p lexbuf; } let with_cpos lexbuf v = with_pos (cpos lexbuf) v let string_of_cpos lexbuf = string_of_pos (cpos lexbuf) let joinf f t1 t2 = join (f t1) (f t2) let ljoinf f = List.fold_left (fun p t -> join p (f t)) dummy let join_located_list ls f = { value = f (List.map (fun l -> l.value) ls); position = ljoinf (fun x -> x.position) ls } (* The functions that print error messages and warnings require a list of positions. The following auxiliary functions help build such lists. *) type positions = t list let one (pos : Lexing.position) : positions = [ { start_p = pos; end_p = pos } ] (* or: lex_join pos pos *) let two (pos1 : Lexing.position) (pos2 : Lexing.position) : positions = [ lex_join pos1 pos2 ] let lexbuf (lexbuf : Lexing.lexbuf) : positions = [ lex_join lexbuf.Lexing.lex_start_p lexbuf.Lexing.lex_curr_p ] menhir-20130116/src/tarjan.ml0000644000175000017500000001433312075533603014673 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module provides an implementation of Tarjan's algorithm for finding the strongly connected components of a graph. The algorithm runs when the functor is applied. Its complexity is $O(V+E)$, where $V$ is the number of vertices in the graph $G$, and $E$ is the number of edges. *) module Run (G : sig type node (* We assume each node has a unique index. Indices must range from $0$ to $n-1$, where $n$ is the number of nodes in the graph. *) val n: int val index: node -> int (* Iterating over a node's immediate successors. *) val successors: (node -> unit) -> node -> unit (* Iterating over all nodes. *) val iter: (node -> unit) -> unit end) = struct (* Define the internal data structure associated with each node. *) type data = { (* Each node carries a flag which tells whether it appears within the SCC stack (which is defined below). *) mutable stacked: bool; (* Each node carries a number. Numbers represent the order in which nodes were discovered. *) mutable number: int; (* Each node [x] records the lowest number associated to a node already detected within [x]'s SCC. *) mutable low: int; (* Each node carries a pointer to a representative element of its SCC. This field is used by the algorithm to store its results. *) mutable representative: G.node; (* Each representative node carries a list of the nodes in its SCC. This field is used by the algorithm to store its results. *) mutable scc: G.node list } (* Define a mapping from external nodes to internal ones. Here, we simply use each node's index as an entry into a global array. *) let table = (* Create the array. We initially fill it with [None], of type [data option], because we have no meaningful initial value of type [data] at hand. *) let table = Array.create G.n None in (* Initialize the array. *) G.iter (fun x -> table.(G.index x) <- Some { stacked = false; number = 0; low = 0; representative = x; scc = [] } ); (* Define a function which gives easy access to the array. It maps each node to its associated piece of internal data. *) function x -> match table.(G.index x) with | Some dx -> dx | None -> assert false (* Indices do not cover the range $0\ldots n$, as expected. *) (* Create an empty stack, used to record all nodes which belong to the current SCC. *) let scc_stack = Stack.create() (* Initialize a function which allocates numbers for (internal) nodes. A new number is assigned to each node the first time it is visited. Numbers returned by this function start at 1 and increase. Initially, all nodes have number 0, so they are considered unvisited. *) let mark = let counter = ref 0 in fun dx -> incr counter; dx.number <- !counter; dx.low <- !counter (* This reference will hold a list of all representative nodes. *) let representatives = ref [] (* Look at all nodes of the graph, one after the other. Any unvisited nodes become roots of the search forest. *) let () = G.iter (fun root -> let droot = table root in if droot.number = 0 then begin (* This node hasn't been visited yet. Start a depth-first walk from it. *) mark droot; droot.stacked <- true; Stack.push droot scc_stack; let rec walk x = let dx = table x in G.successors (fun y -> let dy = table y in if dy.number = 0 then begin (* $y$ hasn't been visited yet, so $(x,y)$ is a regular edge, part of the search forest. *) mark dy; dy.stacked <- true; Stack.push dy scc_stack; (* Continue walking, depth-first. *) walk y; if dy.low < dx.low then dx.low <- dy.low end else if (dy.low < dx.low) & dy.stacked then begin (* The first condition above indicates that $y$ has been visited before $x$, so $(x, y)$ is a backwards or transverse edge. The second condition indicates that $y$ is inside the same SCC as $x$; indeed, if it belongs to another SCC, then the latter has already been identified and moved out of [scc_stack]. *) if dy.number < dx.low then dx.low <- dy.number end ) x; (* We are done visiting $x$'s neighbors. *) if dx.low = dx.number then begin (* $x$ is the entry point of a SCC. The whole SCC is now available; move it out of the stack. We pop elements out of the SCC stack until $x$ itself is found. *) let rec loop () = let element = Stack.pop scc_stack in element.stacked <- false; dx.scc <- element.representative :: dx.scc; element.representative <- x; if element != dx then loop() in loop(); representatives := x :: !representatives end in walk root end ) (* There only remains to make our results accessible to the outside. *) let representative x = (table x).representative let scc x = (table x).scc let iter action = List.iter (fun x -> let data = table x in assert (data.representative == x); (* a sanity check *) assert (data.scc <> []); (* a sanity check *) action x data.scc ) !representatives end menhir-20130116/src/parameterizedGrammar.ml0000644000175000017500000004712212075533603017561 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Positions open Syntax open UnparameterizedSyntax open InternalSyntax open Misc (* Inference for non terminals. *) (* Unification variables convey [variable_info] to describe the multi-equation they take part of. *) type variable_info = { mutable structure : nt_type option; mutable name : string option; mutable mark : Mark.t } (* [UnionFind] is used to improve the union and the equality test between multi-equations. *) and variable = variable_info UnionFind.point (* Types are simple types. [star] denotes the type of ground symbol (non terminal or terminal). [Arrow] describes the type of a parameterized non terminal. *) and nt_type = Arrow of variable list let star = Arrow [] (* [var_name] is a name generator for unification variables. *) let var_name = let name_counter = ref (-1) in let next_name () = incr name_counter; String.make 1 (char_of_int (97 + !name_counter mod 26)) ^ let d = !name_counter / 26 in if d = 0 then "" else string_of_int d in fun v -> let repr = UnionFind.find v in match repr.name with None -> let name = next_name () in repr.name <- Some name; name | Some x -> x (* [string_of_nt_type] is a simple pretty printer for types (they can be recursive). *) (* 2011/04/05: types can no longer be recursive, but I won't touch the printer -fpottier *) let string_of paren_fun ?paren ?colors t : string = let colors = match colors with None -> (Mark.fresh (), Mark.fresh ()) | Some cs -> cs in let s, p = paren_fun colors t in if paren <> None && p = true then "("^ s ^")" else s let rec paren_nt_type ((white, black) as colors) = function Arrow [] -> "*", false | Arrow ins -> let args = separated_list_to_string (string_of paren_var ~paren:true ~colors) ", " ins in let args = if List.length ins > 1 then "("^ args ^ ")" else args in args^" -> *", true and paren_var (white, black) x = let descr = UnionFind.find x in if Mark.same descr.mark white then begin descr.mark <- black; var_name x, false end else begin descr.mark <- white; let s, p = match descr.structure with None -> var_name x, false | Some t -> paren_nt_type (white, black) t in if Mark.same descr.mark black then (var_name x ^ " = " ^ s, true) else (s, p) end let string_of_nt_type ?paren ?colors t = string_of ?colors paren_nt_type t let string_of_var ?paren ?colors v = string_of ?colors paren_var v (* [print_env env] returns a string description of the typing environment. *) let print_env = List.iter (fun (k, (_, v)) -> Printf.eprintf "%s: %s\n" k (string_of_var v)) (* [occurs_check x y] checks that [x] does not occur within [y]. *) let dfs action x = let black = Mark.fresh () in let rec visit_var x = let descr = UnionFind.find x in if not (Mark.same descr.mark black) then begin descr.mark <- black; action x; match descr.structure with | None -> () | Some t -> visit_term t end and visit_term (Arrow ins) = List.iter visit_var ins in visit_var x exception OccursError of variable * variable let occurs_check x y = dfs (fun z -> if UnionFind.equivalent x z then raise (OccursError (x, y))) y (* First order unification. *) (* 2011/04/05: perform an eager occurs check and prevent the construction of any cycles. *) let fresh_flexible_variable () = UnionFind.fresh { structure = None; name = None; mark = Mark.none } let fresh_structured_variable t = UnionFind.fresh { structure = Some t; name = None; mark = Mark.none } let star_variable = fresh_structured_variable star exception UnificationError of nt_type * nt_type exception BadArityError of int * int let rec unify_var toplevel x y = if not (UnionFind.equivalent x y) then let reprx, repry = UnionFind.find x, UnionFind.find y in match reprx.structure, repry.structure with None, Some t -> occurs_check x y; UnionFind.union x y | Some t, None -> occurs_check y x; UnionFind.union y x | None, None -> UnionFind.union x y | Some t, Some t' -> unify toplevel t t'; UnionFind.union x y and unify toplevel t1 t2 = match t1, t2 with | Arrow ins, Arrow ins' -> let n1, n2 = List.length ins, List.length ins' in if n1 <> n2 then if n1 = 0 || n2 = 0 || not toplevel then raise (UnificationError (t1, t2)) else (* the flag [toplevel] is used only here and influences which exception is raised; BadArityError is raised only at toplevel *) raise (BadArityError (n1, n2)); List.iter2 (unify_var false) ins ins' let unify_var x y = unify_var true x y (* Typing environment. *) type environment = (string * (Positions.t list * variable)) list (* [lookup x env] returns the type related to [x] in the typing environment [env]. By convention, identifiers that are not in [env] are terminals. They are given the type [Star]. *) let lookup x (env: environment) = try snd (List.assoc x env) with Not_found -> star_variable (* This function checks that the symbol [k] has the type [expected_type]. *) let check positions env k expected_type = let inference_var = lookup k env in let checking_var = fresh_structured_variable expected_type in try unify_var inference_var checking_var with UnificationError (t1, t2) -> Error.error positions (Printf.sprintf "How is this symbol parameterized?\n\ It is used at sorts %s and %s.\n\ The sort %s is not compatible with the sort %s." (string_of_var inference_var) (string_of_var checking_var) (string_of_nt_type t1) (string_of_nt_type t2)) | BadArityError (n1, n2) -> Error.error positions (Printf.sprintf "does this symbol expect %d or %d arguments?" (min n1 n2) (max n1 n2)) | OccursError (x, y) -> Error.error positions (Printf.sprintf "How is this symbol parameterized?\n\ It is used at sorts %s and %s.\n\ The sort %s cannot be unified with the sort %s." (string_of_var inference_var) (string_of_var checking_var) (string_of_var x) (string_of_var y)) (* An identifier can be used either in a total application or as a higher-order non terminal (no partial application is allowed). *) let rec parameter_type env = function | ParameterVar x -> lookup x.value env | ParameterApp (x, args) -> assert (args <> []); let expected_type = (* [x] is applied, it must be to the exact number of arguments. *) Arrow (List.map (parameter_type env) args) in (* Check the well-formedness of the application. *) check [x.position] env x.value expected_type; (* Similarly, if it was a total application the result is [Star] otherwise it is the flexible variable. *) star_variable let check_grammar p_grammar = (* [n] is the grammar size. *) let n = StringMap.cardinal p_grammar.p_rules in (* The successors of the non terminal [N] are its producers. It induce a graph over the non terminals and its successor function is implemented by [successors]. Non terminals are indexed using [nt]. *) let nt, conv, iconv = index_map p_grammar.p_rules in let parameters, name, branches, positions = (fun n -> (nt n).pr_parameters), (fun n -> (nt n).pr_nt), (fun n -> (nt n).pr_branches), (fun n -> (nt n).pr_positions) in (* The successors function is implemented as an array using the indexing previously created. *) let successors = Array.init n (fun node -> (* We only are interested by parameterized non terminals. *) if parameters node <> [] then List.fold_left (fun succs { pr_producers = symbols } -> List.fold_left (fun succs -> function (_, p) -> let symbol, _ = Parameters.unapp p in try let symbol_node = conv symbol.value in (* [symbol] is a parameterized non terminal, we add it to the successors set. *) if parameters symbol_node <> [] then IntSet.add symbol_node succs else succs with Not_found -> (* [symbol] is a token, it is not interesting for type inference purpose. *) succs ) succs symbols ) IntSet.empty (branches node) else Misc.IntSet.empty ) in (* The successors function and the indexing induce the following graph module. *) let module RulesGraph = struct type node = int let n = n let index node = node let successors f node = IntSet.iter f successors.(node) let iter f = for i = 0 to n - 1 do f i done end in let module ConnectedComponents = Tarjan.Run (RulesGraph) in (* We check that: - all the parameterized definitions of a particular component have the same number of parameters. - every parameterized non terminal definition always uses parameterized definitions of the same component with its formal parameters. Components are marked during the traversal: -1 means unvisited n with n > 0 is the number of parameters of the clique. *) let unseen = -1 in let marked_components = Array.create n unseen in let flexible_arrow args = let ty = Arrow (List.map (fun _ -> fresh_flexible_variable ()) args) in fresh_structured_variable ty in (* [nt_type i] is the type of the i-th non terminal. *) let nt_type i = match parameters i with | [] -> star_variable | x -> flexible_arrow x in (* [actual_parameters_as_formal] is the well-formedness checker for parameterized non terminal application. *) let actual_parameters_as_formal actual_parameters formal_parameters = List.for_all2 (fun y -> (function ParameterVar x -> x.value = y | _ -> false)) formal_parameters actual_parameters in (* The environment is initialized. *) let env : environment = StringMap.fold (fun k r acu -> (k, (r.pr_positions, nt_type (conv k))) :: acu) p_grammar.p_rules [] in (* We traverse the graph checking each parameterized non terminal definition is well-formed. *) RulesGraph.iter (fun i -> let params = parameters i and iname = name i and repr = ConnectedComponents.representative i and positions = positions i in (* The environment is augmented with the parameters whose types are unknown. *) let env' = List.map (fun k -> (k, (positions, fresh_flexible_variable ()))) params in let env = env' @ env in (* The type of the parameterized non terminal is constrained to be [expected_ty]. *) let check_type () = check positions env iname (Arrow (List.map (fun (_, (_, t)) -> t) env')) in (* We check the number of parameters. *) let check_parameters () = let parameters_len = List.length params in (* The component is visited for the first time. *) if marked_components.(repr) = unseen then marked_components.(repr) <- parameters_len else (* Otherwise, we check that the arity is homogeneous in the component. *) if marked_components.(repr) <> parameters_len then Error.error positions (Printf.sprintf "Mutually recursive definitions must have the same parameters.\n\ This is not the case for %s and %s." (name repr) iname) in (* In each production rule, the parameterized non terminal of the same component must be instantiated with the same formal arguments. *) let check_producers () = List.iter (fun { pr_producers = symbols } -> List.iter (function (_, p) -> let symbol, actuals = Parameters.unapp p in (* We take the use of each symbol into account. *) check [ symbol.position ] env symbol.value (if actuals = [] then star else Arrow (List.map (parameter_type env) actuals)); (* If it is in the same component, check in addition that the arguments are the formal arguments. *) try let idx = conv symbol.value in if ConnectedComponents.representative idx = repr then if not (actual_parameters_as_formal actuals params) then Error.error [ symbol.position ] (Printf.sprintf "Mutually recursive definitions must have the same \ parameters.\n\ This is not the case for %s." (let name1, name2 = (name idx), (name i) in if name1 <> name2 then name1 ^ " and "^ name2 else name1)) with _ -> ()) symbols) (branches i) in check_type (); check_parameters (); check_producers ()) let rec subst_parameter subst = function | ParameterVar x -> (try List.assoc x.value subst with Not_found -> ParameterVar x) | ParameterApp (x, ps) -> (try match List.assoc x.value subst with | ParameterVar y -> ParameterApp (y, List.map (subst_parameter subst) ps) | ParameterApp _ -> (* Type-checking ensures that we cannot do partial application. Consequently, if an higher-order non terminal is an actual argument, it cannot be the result of a partial application. *) assert false with Not_found -> ParameterApp (x, List.map (subst_parameter subst) ps)) let subst_parameters subst = List.map (subst_parameter subst) let names_of_p_grammar p_grammar = StringMap.fold (fun tok _ acu -> StringSet.add tok acu) p_grammar.p_tokens StringSet.empty $$ (StringMap.fold (fun nt _ acu -> StringSet.add nt acu) p_grammar.p_rules) let expand p_grammar = (* Check that it is safe to expand this parameterized grammar. *) check_grammar p_grammar; (* Set up a mechanism that ensures that names are unique -- and, in fact, ensures the stronger condition that normalized names are unique. *) let names = ref (StringSet.empty) in let ensure_fresh name = let normalized_name = Misc.normalize name in if StringSet.mem normalized_name !names then Error.error [] (Printf.sprintf "internal name clash over %s" normalized_name); names := StringSet.add normalized_name !names; name in let expanded_rules = Hashtbl.create 13 in let module InstanceTable = Hashtbl.Make (Parameters) in let rule_names = InstanceTable.create 13 in (* [mangle p] chooses a name for the new nonterminal symbol that corresponds to the parameter [p]. *) let rec mangle = function | ParameterVar x | ParameterApp (x, []) -> Positions.value x | ParameterApp (x, ps) -> (* We include parentheses and commas in the names that we assign to expanded nonterminals, because that is more readable and acceptable in many situations. We replace them with underscores in situations where these characters are not valid. *) Printf.sprintf "%s(%s)" (Positions.value x) (separated_list_to_string mangle "," ps) in let name_of symbol parameters = let param = ParameterApp (symbol, parameters) in try InstanceTable.find rule_names param with Not_found -> let name = ensure_fresh (mangle param) in InstanceTable.add rule_names param name; name in (* Given the substitution [subst] from parameters to non terminal, we instantiate the parameterized branch. *) let rec expand_branch subst pbranch = let new_producers = List.map (function (ido, p) -> let sym, actual_parameters = Parameters.unapp p in let sym, actual_parameters = try match List.assoc sym.value subst with | ParameterVar x -> x, subst_parameters subst actual_parameters | ParameterApp (x, ps) -> assert (actual_parameters = []); x, ps with Not_found -> sym, subst_parameters subst actual_parameters in (* Instantiate the definition of the producer. *) (expand_branches subst sym actual_parameters, Option.map Positions.value ido)) pbranch.pr_producers in { branch_position = pbranch.pr_branch_position; producers = new_producers; action = pbranch.pr_action; branch_shift_precedence = pbranch.pr_branch_shift_precedence; branch_reduce_precedence = pbranch.pr_branch_reduce_precedence; } (* Instantiate the branches of sym for a particular set of actual parameters. *) and expand_branches subst sym actual_parameters = let nsym = name_of sym actual_parameters in try if not (Hashtbl.mem expanded_rules nsym) then begin let prule = StringMap.find (Positions.value sym) p_grammar.p_rules in let subst = (* Type checking ensures that parameterized non terminal instantiations are well defined. *) assert (List.length prule.pr_parameters = List.length actual_parameters); List.combine prule.pr_parameters actual_parameters @ subst in Hashtbl.add expanded_rules nsym { branches = []; positions = []; inline_flag = false }; let rules = List.map (expand_branch subst) prule.pr_branches in Hashtbl.replace expanded_rules nsym { branches = rules; positions = prule.pr_positions; inline_flag = prule.pr_inline_flag; } end; nsym (* If [sym] is a terminal, then it is not in [p_grammar.p_rules]. Expansion is not needed. *) with Not_found -> Positions.value sym in let rec types_from_list = function | [] -> StringMap.empty | (nt, ty)::q -> let accu = types_from_list q in let mangled = mangle nt in if StringMap.mem mangled accu then Error.error [Positions.position (Parameters.with_pos nt)] (Printf.sprintf "There are multiple %%type definitions for nonterminal %s." mangled); StringMap.add mangled (Positions.value ty) accu in let start_symbols = StringMap.domain (p_grammar.p_start_symbols) in { preludes = p_grammar.p_preludes; postludes = p_grammar.p_postludes; parameters = p_grammar.p_parameters; start_symbols = start_symbols; types = types_from_list p_grammar.p_types; tokens = p_grammar.p_tokens; rules = let closed_rules = StringMap.fold (fun k prule rules -> (* If [k] is a start symbol then it cannot be parameterized. *) if prule.pr_parameters <> [] && StringSet.mem k start_symbols then Error.error [] (Printf.sprintf "The start symbol `%s' cannot be parameterized." k); (* Entry points are the closed non terminals. *) if prule.pr_parameters = [] then StringMap.add k { branches = List.map (expand_branch []) prule.pr_branches; positions = prule.pr_positions; inline_flag = prule.pr_inline_flag; } rules else rules) p_grammar.p_rules StringMap.empty in Hashtbl.fold StringMap.add expanded_rules closed_rules } menhir-20130116/src/version.ml0000644000175000017500000000003112075533603015067 0ustar stephstephlet version = "20130116" menhir-20130116/src/derivation.mli0000644000175000017500000000546412075533603015736 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar (* -------------------------------------------------------------------------- *) (* This is the type of derivations. Derivations are forests: see inside. *) type t (* This is the type of derivations contexts, or derivations with a derivation-shaped hole. *) type context (* -------------------------------------------------------------------------- *) (* Construction. *) (* [empty] is the forest that consists of a single empty tree. *) val empty: t (* [tail pos rhs] is the forest: (i) whose first element is the empty tree, and (ii) whose remaining elements are the symbols found at positions greater than or equal to [pos] in the array [rhs]. *) val tail: int -> Symbol.t array -> t (* [build pos rhs forest comment] is the forest: (i) whose first element is the tree that has the non-terminal symbol [rhs.(pos)] at its root and the forest [forest] below its root, and (ii) whose remaining elements are the symbols found at positions greater than [pos] in the array [rhs]. *) val build: int -> Symbol.t array -> t -> string option -> t (* [prepend symbol forest] is the forest: (i) whose first element is the symbol [symbol], and (ii) whose remaining elements form the forest [forest]. *) val prepend: Symbol.t -> t -> t (* -------------------------------------------------------------------------- *) (* Factoring. *) (* [factor] factors the maximal common derivation context out of a nonempty family of derivations. It produces a pair of the context and of the residual derivations. *) val factor: t Item.Map.t -> context * t Item.Map.t (* -------------------------------------------------------------------------- *) (* Display. *) (* [print] prints a derivation. *) val print: out_channel -> t -> unit (* [printc] prints a derivation context. *) val printc: out_channel -> context -> unit menhir-20130116/src/Fix.mli0000644000175000017500000001021012075533603014301 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This code is described in the paper ``Lazy Least Fixed Points in ML''. *) (* -------------------------------------------------------------------------- *) (* Maps. *) (* We require imperative maps, that is, maps that can be updated in place. An implementation of persistent maps, such as the one offered by ocaml's standard library, can easily be turned into an implementation of imperative maps, so this is a weak requirement. *) module type IMPERATIVE_MAPS = sig type key type 'data t val create: unit -> 'data t val clear: 'data t -> unit val add: key -> 'data -> 'data t -> unit val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* -------------------------------------------------------------------------- *) (* Properties. *) (* Properties must form a partial order, equipped with a least element, and must satisfy the ascending chain condition: every monotone sequence eventually stabilizes. *) (* [is_maximal] determines whether a property [p] is maximal with respect to the partial order. Only a conservative check is required: in any event, it is permitted for [is_maximal p] to return [false]. If [is_maximal p] returns [true], then [p] must have no upper bound other than itself. In particular, if properties form a lattice, then [p] must be the top element. This feature, not described in the paper, enables a couple of minor optimizations. *) module type PROPERTY = sig type property val bottom: property val equal: property -> property -> bool val is_maximal: property -> bool end (* -------------------------------------------------------------------------- *) (* The code is parametric in an implementation of maps over variables and in an implementation of properties. *) module Make (M : IMPERATIVE_MAPS) (P : PROPERTY) : sig type variable = M.key type property = P.property (* A valuation is a mapping of variables to properties. *) type valuation = variable -> property (* A right-hand side, when supplied with a valuation that gives meaning to its free variables, evaluates to a property. More precisely, a right-hand side is a monotone function of valuations to properties. *) type rhs = valuation -> property (* A system of equations is a mapping of variables to right-hand sides. *) type equations = variable -> rhs (* [lfp eqs] produces the least solution of the system of monotone equations [eqs]. *) (* It is guaranteed that, for each variable [v], the application [eqs v] is performed at most once (whereas the right-hand side produced by this application is, in general, evaluated multiple times). This guarantee can be used to perform costly pre-computation, or memory allocation, when [eqs] is applied to its first argument. *) (* When [lfp] is applied to a system of equations [eqs], it performs no actual computation. It produces a valuation, [get], which represents the least solution of the system of equations. The actual fixed point computation takes place, on demand, when [get] is applied. *) val lfp: equations -> valuation end menhir-20130116/src/packedIntArray.ml0000644000175000017500000001315112075533603016312 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* A packed integer array is represented as a pair of an integer [k] and a string [s]. The integer [k] is the number of bits per integer that we use. The string [s] is just an array of bits, which is read in 8-bit chunks. *) (* The ocaml programming language treats string literals and array literals in slightly different ways: the former are statically allocated, while the latter are dynamically allocated. (This is rather arbitrary.) In the context of Menhir's table-based back-end, where compact, immutable integer arrays are needed, ocaml strings are preferable to ocaml arrays. *) type t = int * string (* The magnitude [k] of an integer [v] is the number of bits required to represent [v]. It is rounded up to the nearest power of two, so that [k] divides [Sys.word_size]. *) let magnitude (v : int) = if v < 0 then Sys.word_size else let rec check k max = (* [max] equals [2^k] *) if (max <= 0) || (v < max) then k (* if [max] just overflew, then [v] requires a full ocaml integer, and [k] is the number of bits in an ocaml integer plus one, that is, [Sys.word_size]. *) else check (2 * k) (max * max) in check 1 2 (* [pack a] turns an array of integers into a packed integer array. *) (* Because the sign bit is the most significant bit, the magnitude of any negative number is the word size. In other words, [pack] does not achieve any space savings as soon as [a] contains any negative numbers, even if they are ``small''. *) let pack (a : int array) : t = let m = Array.length a in (* Compute the maximum magnitude of the array elements. This tells us how many bits per element we are going to use. *) let k = Array.fold_left (fun k v -> max k (magnitude v) ) 1 a in (* Because access to ocaml strings is performed on an 8-bit basis, two cases arise. If [k] is less than 8, then we can pack multiple array entries into a single character. If [k] is greater than 8, then we must use multiple characters to represent a single array entry. *) if k <= 8 then begin (* [w] is the number of array entries that we pack in a character. *) assert (8 mod k = 0); let w = 8 / k in (* [n] is the length of the string that we allocate. *) let n = if m mod w = 0 then m / w else m / w + 1 in let s = String.create n in (* Define a reader for the source array. The reader might run off the end if [w] does not divide [m]. *) let i = ref 0 in let next () = let ii = !i in if ii = m then 0 (* ran off the end, pad with zeroes *) else let v = a.(ii) in i := ii + 1; v in (* Fill up the string. *) for j = 0 to n - 1 do let c = ref 0 in for x = 1 to w do c := (!c lsl k) lor next() done; s.[j] <- Char.chr !c done; (* Done. *) k, s end else begin (* k > 8 *) (* [w] is the number of characters that we use to encode an array entry. *) assert (k mod 8 = 0); let w = k / 8 in (* [n] is the length of the string that we allocate. *) let n = m * w in let s = String.create n in (* Fill up the string. *) for i = 0 to m - 1 do let v = ref a.(i) in for x = 1 to w do s.[(i + 1) * w - x] <- Char.chr (!v land 255); v := !v lsr 8 done done; (* Done. *) k, s end (* Access to a string. *) let read (s : string) (i : int) : int = Char.code (String.unsafe_get s i) (* [get1 t i] returns the integer stored in the packed array [t] at index [i]. It assumes (and does not check) that the array's bit width is [1]. The parameter [t] is just a string. *) let get1 (s : string) (i : int) : int = let c = read s (i lsr 3) in let c = c lsr ((lnot i) land 0b111) in let c = c land 0b1 in c (* [get t i] returns the integer stored in the packed array [t] at index [i]. *) (* Together, [pack] and [get] satisfy the following property: if the index [i] is within bounds, then [get (pack a) i] equals [a.(i)]. *) let get ((k, s) : t) (i : int) : int = match k with | 1 -> get1 s i | 2 -> let c = read s (i lsr 2) in let c = c lsr (2 * ((lnot i) land 0b11)) in let c = c land 0b11 in c | 4 -> let c = read s (i lsr 1) in let c = c lsr (4 * ((lnot i) land 0b1)) in let c = c land 0b1111 in c | 8 -> read s i | 16 -> let j = 2 * i in (read s j) lsl 8 + read s (j + 1) | _ -> assert (k = 32); (* 64 bits unlikely, not supported *) let j = 4 * i in (((read s j lsl 8) + read s (j + 1)) lsl 8 + read s (j + 2)) lsl 8 + read s (j + 3) menhir-20130116/src/action.ml0000644000175000017500000001600612075533603014670 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: action.ml,v 1.10 2006/06/26 09:41:33 regisgia Exp $ *) open Keyword type t = { expr : IL.expr; keywords : Keyword.KeywordSet.t; filenames : string list; pkeywords : Keyword.keyword Positions.located list } let from_stretch s = { expr = IL.ETextual s; filenames = [ s.Stretch.stretch_filename ]; keywords = Keyword.KeywordSet.from_list (List.map Positions.value s.Stretch.stretch_keywords); pkeywords = s.Stretch.stretch_keywords; } let parenthesize s = if String.length s < 2 || s.[0] <> '(' || s.[String.length s - 1] <> ')' then "(" ^ s ^ ")" else s let rec parenthesize_stretch = function | IL.ETextual s -> IL.ETextual { s with Stretch.stretch_raw_content = parenthesize s.Stretch.stretch_raw_content } | IL.ELet (es, e) -> IL.ELet (List.map (fun (p, e) -> (p, parenthesize_stretch e)) es, parenthesize_stretch e) | x -> x let compose x a1 a2 = { expr = IL.ELet ([ IL.PVar x, parenthesize_stretch a1.expr ], a2.expr); keywords = Keyword.KeywordSet.union a1.keywords a2.keywords; filenames = a1.filenames @ a2.filenames; pkeywords = a1.pkeywords @ a2.pkeywords; } let rename_inlined_psym (psym, first_prod, last_prod) phi l = List.fold_left (fun (l, phi, (used1, used2)) pk -> match pk.Positions.value with | Position (subject, where, flavor) -> let (subject', where'), (used1, used2) = match subject, where with | RightNamed s, w -> (* In the host rule, $startpos(x) is changed to $startpos(first_prod) (same thing for $endpos). *) if s = psym then match w with | WhereStart -> first_prod, (true, used2) | WhereEnd -> last_prod, (used1, true) else (* Otherwise, we just that the renaming into account. *) let s' = try List.assoc s phi with Not_found -> s in (RightNamed s', w), (used1, used2) | _ -> (subject, where), (used1, used2) in let from_pos = Keyword.posvar subject where flavor and to_pos = Keyword.posvar subject' where' flavor in (Positions.with_pos pk.Positions.position (Position (subject', where', flavor)) :: l, (if from_pos <> to_pos && not (List.mem_assoc from_pos phi) then (from_pos, to_pos) :: phi else phi), (used1, used2)) | _ -> pk :: l, phi, (used1, used2) ) ([], phi, (false, false)) l (* Rename the keywords related to position to handle the composition of semantic actions during non terminal inlining. The first argument describes the context: - [first_prod] is the first producer that starts the action's rule. - [last_prod] is the last one. For instance, if %inline rule r is A -> B C and rule r' is D -> E A F, then [first_prod] is B and [last_prod] is C. If r is A -> and r' is unchanged. [first_prod] is E and [last_prod] is F. - [psym] is the producer that is being inlined. *) let rename_pkeywords (psym, first_prod, last_prod) phi l = List.fold_left (fun (l, phi, (used1, used2)) pk -> match pk.Positions.value with | Position (subject, where, flavor) -> let (subject', where'), (used1, used2) = match subject, where with (* $startpos is changed to $startpos(first_prod) in the inlined rule. *) | Left, WhereStart -> first_prod, (true, used2) (* Similarly for $endpos. *) | Left, WhereEnd -> last_prod, (used1, true) (* $i cannot be combined with inlining. *) | RightDollar i, w -> assert false | RightNamed s, w -> (* In the host rule, $startpos(x) is changed to to $startpos(first_prod) (same thing for $endpos). *) if s = psym then match w with | WhereStart -> first_prod, (true, used2) | WhereEnd -> last_prod, (used1, true) else (* Otherwise, we just that the renaming into account. *) let s' = try List.assoc s phi with Not_found -> s in (RightNamed s', w), (used1, used2) in let from_pos = Keyword.posvar subject where flavor and to_pos = Keyword.posvar subject' where' flavor in (Positions.with_pos pk.Positions.position (Position (subject', where', flavor)) :: l, (if from_pos <> to_pos && not (List.mem_assoc from_pos phi) then (from_pos, to_pos) :: phi else phi), (used1, used2)) | x -> pk :: l, phi, (used1, used2)) ([], phi, (false, false)) l let rename renaming_fun renaming_env phi a = let pkeywords, phi, used_fg = renaming_fun renaming_env phi a.pkeywords in { a with (* We use the let construct to rename without modification of the semantic action code. *) expr = IL.ELet (List.map (fun (x, x') -> (IL.PVar x, IL.EVar x')) phi, a.expr); (* Keywords related to positions are updated too. *) keywords = List.fold_left (fun acu pk -> Keyword.KeywordSet.add pk.Positions.value acu) Keyword.KeywordSet.empty pkeywords; pkeywords = pkeywords }, used_fg let rename_inlined_psym = rename rename_inlined_psym let rename = rename rename_pkeywords let to_il_expr action = action.expr let filenames action = action.filenames let keywords action = action.keywords let pkeywords action = action.pkeywords let rec print f action = let module P = Printer.Make (struct let f = f let locate_stretches = None let raw_stretch_action = true end) in P.expr action.expr let has_previouserror action = KeywordSet.mem PreviousError (keywords action) let has_syntaxerror action = KeywordSet.mem SyntaxError (keywords action) let has_leftstart action = KeywordSet.exists (function | Position (Left, WhereStart, _) -> true | _ -> false ) (keywords action) let has_leftend action = KeywordSet.exists (function | Position (Left, WhereEnd, _) -> true | _ -> false ) (keywords action) let has_dollar i action = KeywordSet.exists (function | Dollar j when i = j -> true | _ -> false ) (keywords action) let use_dollar action = KeywordSet.exists (function | Dollar _ -> true | _ -> false ) (keywords action) menhir-20130116/src/codePieces.ml0000644000175000017500000001673212075533603015464 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module defines many internal naming conventions for use by the two code generators, [CodeBackend] and [TableBackend]. It also offers a few code generation facilities. *) open IL open CodeBits open Grammar (* ------------------------------------------------------------------------ *) (* Naming conventions. *) (* The type variable associated with a nonterminal [nt]. *) let ntvar nt = Infer.ntvar (Nonterminal.print true nt) (* The variable that holds the environment. This is a parameter to all functions. We do not make it a global variable because we wish to preserve re-entrancy. *) let env = prefix "env" (* A variable used to hold a semantic value. *) let semv = "_v" (* A variable used to hold a stack. *) let stack = prefix "stack" (* A variable used to hold a state. *) let state = prefix "s" (* A variable used to hold a token. *) let token = "_tok" (* Variables used to hold start and end positions. Do not change these names! They are chosen to coincide with the $startpos and $endpos keywords, which the lexer rewrites to _startpos and _endpos, so that binding these variables before executing a semantic action is meaningful. *) let startp = "_startpos" let endp = "_endpos" (* ------------------------------------------------------------------------ *) (* Types for semantic values. *) (* [semvtypent nt] is the type of the semantic value associated with nonterminal [nt]. *) let semvtypent nt = match Nonterminal.ocamltype nt with | None -> (* [nt] has unknown type. If we we have run [Infer], then this can't happen. However, running type inference is only an option, so we still have to deal with that case. *) TypVar (ntvar nt) | Some ocamltype -> (* [nt] has known type. *) TypTextual ocamltype (* [semvtypetok tok] is the type of the semantic value associated with token [tok]. There is no such type if the token does not have a semantic value. *) let semvtypetok tok = match Terminal.ocamltype tok with | None -> (* Token has unit type and is omitted in stack cell. *) [] | Some ocamltype -> (* Token has known type. *) [ TypTextual ocamltype ] (* [semvtype symbol] is the type of the semantic value associated with [symbol]. *) let semvtype = function | Symbol.T tok -> semvtypetok tok | Symbol.N nt -> [ semvtypent nt ] (* [symvalt] returns the empty list if the symbol at hand carries no semantic value and the singleton list [[f t]] if it carries a semantic value of type [t]. *) let symvalt symbol f = match semvtype symbol with | [] -> [] | [ t ] -> [ f t ] | _ -> assert false (* [symval symbol x] returns either the empty list or the singleton list [[x]], depending on whether [symbol] carries a semantic value. *) let symval symbol x = match semvtype symbol with | [] -> [] | [ t ] -> [ x ] | _ -> assert false (* [tokval] is a version of [symval], specialized for terminal symbols. *) let tokval tok x = symval (Symbol.T tok) x (* ------------------------------------------------------------------------ *) (* Patterns for tokens. *) (* [tokpat tok] is a pattern that matches the token [tok], without binding its semantic value. *) let tokpat tok = PData (TokenType.tokenprefix (Terminal.print tok), tokval tok PWildcard) (* [tokpatv tok] is a pattern that matches the token [tok], and binds its semantic value, if it has one, to the variable [semv]. *) let tokpatv tok = PData (TokenType.tokenprefix (Terminal.print tok), tokval tok (PVar semv)) (* [tokspat toks] is a pattern that matches any token in the set [toks], without binding its semantic value. *) let tokspat toks = POr ( TerminalSet.fold (fun tok pats -> tokpat tok :: pats ) toks [] ) (* [destructuretokendef name codomain bindsemv branch] generates the definition of a function that destructures tokens. [name] is the name of the function that is generated. [codomain] is its return type. [bindsemv] tells whether the variable [semv] should be bound. [branch] is applied to each (non-pseudo) terminal and must produce code for each branch. *) let destructuretokendef name codomain bindsemv branch = { valpublic = false; valpat = PVar name; valval = EAnnot ( EFun ([ PVar token ], EMatch (EVar token, Terminal.fold (fun tok branches -> if Terminal.pseudo tok then branches else { branchpat = (if bindsemv then tokpatv else tokpat) tok; branchbody = branch tok } :: branches ) [] ) ), type2scheme (arrow TokenType.ttoken codomain) ) } (* ------------------------------------------------------------------------ *) (* Bindings for exotic keywords. *) (* [extrabindings fpreviouserror action] provides definitions for the [$startofs], [$endofs], and [$previouserror] keywords, if required by a semantic action. The parameter [fpreviouserror] is the name of the [previouserror] field in the environment -- the table-based and code-based back-ends use different names. The parameter [action] is the semantic action within which these keywords might be used. *) (* The [ofs] keyword family is defined in terms of the [pos] family by accessing the [pos_cnum] field. The [$previouserror] keyword simply provides access to the current value of [env.previouserror]. *) let extrabindings fpreviouserror action = Keyword.KeywordSet.fold (fun keyword bindings -> match keyword with | Keyword.Dollar _ | Keyword.Position (_, _, Keyword.FlavorPosition) | Keyword.SyntaxError -> bindings | Keyword.Position (s, w, (Keyword.FlavorOffset as f)) -> (PVar (Keyword.posvar s w f), ERecordAccess (EVar (Keyword.posvar s w Keyword.FlavorPosition), "Lexing.pos_cnum")) :: bindings | Keyword.PreviousError -> (PVar "_previouserror", ERecordAccess (EVar env, fpreviouserror)) :: bindings ) (Action.keywords action) [] (* ------------------------------------------------------------------------ *) (* A global variable holds the exception [Error]. *) (* We preallocate the [Error] exception and store it into a global variable. This allows saving code at the sites where the exception is raised. Don't change the conventional name [_eRR], it is shared with the lexer, which replaces occurrences of the [$syntaxerror] keyword with [(raise _eRR)]. *) let parse_error = "_eRR" let errorval = EVar parse_error let excvaldef = { valpublic = false; valpat = PVar parse_error; valval = EData (Interface.excname, []) } menhir-20130116/src/concreteSyntax.mli0000644000175000017500000000245512075533603016600 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: concreteSyntax.mli,v 1.3 2005/12/01 16:20:06 regisgia Exp $ *) type grammar = { pg_filename : Syntax.filename; pg_declarations : (Syntax.declaration Positions.located) list; pg_rules : Syntax.parameterized_rule list; pg_trailer : Syntax.trailer option; } menhir-20130116/src/misc.ml0000644000175000017500000001527112075533603014351 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) let ( $$ ) x f = f x let unSome = function None -> assert false | Some x -> x let o2s o f = match o with | None -> "" | Some x -> f x let single = function | [ x ] -> x | _ -> assert false let rec mapd f = function | [] -> [] | x :: xs -> let y1, y2 = f x in y1 :: y2 :: mapd f xs let tabulate n f = let a = Array.init n f in Array.get a let tabulateb n f = let a = Array.init n f in Array.get a, Array.fold_left (fun count element -> if element then count + 1 else count ) 0 a let tabulatef number fold n dummy f = let a = Array.create n dummy in let () = fold (fun () element -> a.(number element) <- f element ) () in let get element = a.(number element) in get let tabulateo number fold n f = let c = ref 0 in let get = tabulatef number fold n None (fun element -> let image = f element in begin match image with | Some _ -> incr c | None -> () end; image ) in get, !c let rec truncate k xs = match k, xs with | 0, _ -> [] | _, [] -> assert false | _, x :: xs -> x :: truncate (k-1) xs let truncate k xs = if List.length xs <= k then xs else truncate k xs let repeat k x = let rec loop k x accu = if k = 0 then accu else loop (k - 1) x (x :: accu) in loop k x [] module IntSet = Set.Make (struct type t = int let compare = ( - ) end) let separated_list_to_string printer separator list = let rec loop x = function | [] -> printer x | y :: xs -> printer x ^ separator ^ loop y xs in match list with | [] -> "" | x :: xs -> loop x xs let index_map string_map = let n = StringMap.cardinal string_map in let a = Array.create n None in let conv, _ = StringMap.fold (fun k v (conv, idx) -> a.(idx) <- Some (k, v); StringMap.add k idx conv, idx + 1) string_map (StringMap.empty, 0) in ((fun n -> snd (unSome a.(n))), (fun k -> StringMap.find k conv), (fun n -> fst (unSome a.(n)))) let support_assoc l x = try List.assoc x l with Not_found -> x let index (strings : string list) : int * string array * int StringMap.t = let name = Array.of_list strings and n, map = List.fold_left (fun (n, map) s -> n+1, StringMap.add s n map ) (0, StringMap.empty) strings in n, name, map (* Turning an implicit list, stored using pointers through a hash table, into an explicit list. The head of the implicit list is not included in the explicit list. *) let materialize (table : ('a, 'a option) Hashtbl.t) (x : 'a) : 'a list = let rec loop x = match Hashtbl.find table x with | None -> [] | Some x -> x :: loop x in loop x (* [iteri] implements a [for] loop over integers, from 0 to [n-1]. *) let iteri n f = for i = 0 to n - 1 do f i done (* [foldi] implements a [for] loop over integers, from 0 to [n-1], with an accumulator. [foldij] implements a [for] loop over integers, from [start] to [n-1], with an accumulator. *) let foldij start n f accu = let rec loop i accu = if i = n then accu else loop (i+1) (f i accu) in loop start accu let foldi n f accu = foldij 0 n f accu (* [mapi n f] produces the list [ f 0; ... f (n-1) ]. *) let mapi n f = List.rev ( foldi n (fun i accu -> f i :: accu ) [] ) (* [qfold f accu q] repeatedly takes an element [x] off the queue [q] and applies [f] to the accumulator and to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. We allocate an option to ensure that [qfold] is tail-recursive. *) let rec qfold f accu q = match try Some (Queue.take q) with Queue.Empty -> None with | Some x -> qfold f (f accu x) q | None -> accu (* [qiter f q] repeatedly takes an element [x] off the queue [q] and applies [f] to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. *) let qiter f q = try while true do f (Queue.take q) done with Queue.Empty -> () let rec smap f = function | [] -> [] | (x :: xs) as l -> let x' = f x and xs' = smap f xs in if x == x' && xs == xs' then l else x' :: xs' let rec smapa f accu = function | [] -> accu, [] | (x :: xs) as l -> let accu, x' = f accu x in let accu, xs' = smapa f accu xs in accu, if x == x' && xs == xs' then l else x' :: xs' let normalize s = let s = String.copy s in let n = String.length s in for i = 0 to n - 1 do match s.[i] with | '(' | ')' | ',' -> s.[i] <- '_' | _ -> () done; s (* [postincrement r] increments [r] and returns its original value. *) let postincrement r = let x = !r in r := x + 1; x (* [gcp] returns the greatest common prefix of two strings. *) let gcp s1 s2 = let n1 = String.length s1 and n2 = String.length s2 in let rec loop i = if (i < n1) && (i < n2) && (s1.[i] = s2.[i]) then loop (i+1) else String.sub s1 0 i in loop 0 (* [gcps] returns the greatest common prefix of a nonempty list of strings. *) let rec gcps = function | [] -> assert false | s :: ss -> let rec loop accu = function | [] -> accu | s :: ss -> loop (gcp s accu) ss in loop s ss (* [array_forall p a] computes the conjunction of the predicate [p] over all elements of the array [a]. *) exception ArrayForall let _ArrayForall = ArrayForall let array_forall (p : 'a -> bool) (a : 'a array) : bool = try for i = 0 to Array.length a - 1 do let x = Array.get a i in if not (p x) then raise _ArrayForall done; true with ArrayForall -> false menhir-20130116/src/referenceInterpreter.mli0000644000175000017500000000314612075533603017747 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar open Cst (* This reference interpreter animates the LR automaton. It uses the grammar and automaton descriptions, as provided by [Grammar] and [Lr1], as well as the generic LR engine in [MenhirLib.Engine]. *) (* The first parameter to the interpreter is a Boolean flag that tells whether a trace should be produced on the standard error channel. *) (* The interpreter requires a start symbol, a lexer, and a lexing buffer. It either succeeds and produces a concrete syntax tree, or fails. *) val interpret: bool -> Nonterminal.t -> (Lexing.lexbuf -> Terminal.t) -> Lexing.lexbuf -> cst option menhir-20130116/src/infiniteArray.ml0000644000175000017500000000372512075533603016223 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: infiniteArray.ml,v 1.6 2007/09/10 21:09:37 fpottier Exp $ *) (** This module implements infinite arrays, that is, arrays that grow transparently upon demand. *) type 'a t = { default: 'a; mutable table: 'a array; mutable extent: int; (* the index of the greatest [set] ever, plus one *) } let default_size = 16384 (* must be non-zero *) let make x = { default = x; table = Array.make default_size x; extent = 0; } let rec new_length length i = if i < length then length else new_length (2 * length) i let ensure a i = let table = a.table in let length = Array.length table in if i >= length then begin let table' = Array.make (new_length (2 * length) i) a.default in Array.blit table 0 table' 0 length; a.table <- table' end let get a i = ensure a i; a.table.(i) let set a i x = ensure a i; a.table.(i) <- x; a.extent <- max (i + 1) a.extent let extent a = a.extent let domain a = Array.sub a.table 0 a.extent menhir-20130116/src/checkOCamlVersion.ml0000644000175000017500000001014512075533602016747 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id $*) (* This module parses ocaml version and confronts it with a user-provided version. *) (* According to OCaml's manual, the Sys.ocaml_version value follows the regexp [version_regexp]. *) let mnum = "\\([0-9]+\\)" (* version = major.minor[.patchlevel][+additional-info]. *) let version_regexp = Str.regexp (Printf.sprintf "%s\\.%s\\(\\.%s\\)?\\(\\+\\(.+\\)\\)?" mnum mnum mnum) let must field = function | None -> failwith (Printf.sprintf "\"%s\" field is undefined." field) | Some s -> s let as_int s = try int_of_string s with Failure _ -> Printf.eprintf "Invalid number '%s'\n" s; exit 1 let parse_version version = let get i = try Some (Str.matched_group i version) with Not_found -> None in if Str.string_match version_regexp version 0 then ( as_int (must "major" (get 1)), as_int (must "minor" (get 2)), get 4, get 6 ) else begin Printf.eprintf "Failed to retrieve ocaml version.\n"; exit 1 end (* The user can compare its version with three different orderings: - eq means major and minor numbers are equal ; - eq-strict means that even the patchlevel and the additional information are equal ; - lt means that ocaml version is older that the user-provided version ; - gt means that ocaml version is newer that the user-provided version. *) let eq, eq_strict, gt, lt = ref false, ref false, ref false, ref false let verbose = ref false let options = Arg.align [ "--eq", Arg.Set eq, " Is the version equal to ?"; "--eq-strict", Arg.Set eq_strict, " Is the version strictly equal to ? \ (taking into account patchlevel and additional information)"; "--gt", Arg.Set gt, " Is the version newer than ? (default)"; "--lt", Arg.Set lt, " Is the version older than ?"; "--verbose", Arg.Set verbose, " Show version." ] let usage = "check-ocaml-version [options] \n" let version = ref None let set_version s = version := Some s let _ = Arg.parse options set_version usage let compare, compare_str, strict = match !eq, !gt, !lt with | true, false, false -> ( = ) , "", !eq_strict | false, true, false -> ( >= ), "or greater ", false | false, false, true -> ( <= ), "or lesser ", false | false, false, false -> (Printf.printf "%s\n%!" Sys.ocaml_version; exit 1) | _ -> failwith "(eq|gt|lt) flags must be used independently" let compare_version (major, minor, p, a) (major', minor', p', a') = if major = major' then if minor = minor' then if strict then (p = p') && (a = a') else true else compare minor minor' else compare major major' let _ = match !version with | None -> Printf.printf "%s\n%!" Sys.ocaml_version | Some version -> let ov = parse_version Sys.ocaml_version and uv = parse_version version in if compare_version ov uv then begin if !verbose then Printf.printf "Version %s is OK.\n%!" Sys.ocaml_version; exit 0 end else begin if !verbose then Printf.printf "%s is NOT OK: version %s %swas required.%!\n" Sys.ocaml_version version compare_str; exit 1 end menhir-20130116/src/referenceInterpreter.ml0000644000175000017500000001570312075533603017600 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar open Cst (* Set up all of the information required by the LR engine. Everything is read directly from [Grammar] and [Lr1]. *) module T = struct type state = Lr1.node type token = Terminal.t type terminal = Terminal.t type semantic_value = cst let token2terminal (token : token) : terminal = token let token2value (token : token) : semantic_value = CstTerminal token let error_terminal = Terminal.error let error_value = CstError type production = Production.index let default_reduction (s : state) defred nodefred env = match Invariant.has_default_reduction s with | Some (prod, _) -> defred env prod | None -> nodefred env let action (s : state) (tok : terminal) value shift reduce fail env = (* Check whether [s] has an outgoing shift transition along [tok]. *) try let s' : state = SymbolMap.find (Symbol.T tok) (Lr1.transitions s) in (* There is such a transition. Return either [ShiftDiscard] or [ShiftNoDiscard], depending on the existence of a default reduction on [#] at [s']. *) match Invariant.has_default_reduction s' with | Some (_, toks) when TerminalSet.mem Terminal.sharp toks -> shift env false tok value s' | _ -> shift env true tok value s' (* There is no such transition. Look for a reduction. *) with Not_found -> try let prod = Misc.single (TerminalMap.find tok (Lr1.reductions s)) in reduce env prod (* There is no reduction either. Fail. *) with Not_found -> fail env let goto (s : state) (prod : production) : state = try SymbolMap.find (Symbol.N (Production.nt prod)) (Lr1.transitions s) with Not_found -> assert false open MenhirLib.EngineTypes exception Accept of semantic_value exception Error type semantic_action = (state, semantic_value, token) env -> unit let semantic_action (prod : production) : semantic_action = fun env -> (* Check whether [prod] is a start production. *) match Production.classify prod with (* If it is one, accept. Start productions are of the form S' -> S, where S is a non-terminal symbol, so the desired semantic value is found within the top cell of the stack. *) | Some _ -> raise (Accept env.stack.semv) (* If it is not, reduce. Pop a suffix of the stack, and use it to construct a new concrete syntax tree node. *) | None -> let n = Production.length prod in let values : semantic_value array = Array.make n CstError (* dummy *) and startp : Lexing.position ref = ref Lexing.dummy_pos and endp : Lexing.position ref = ref Lexing.dummy_pos in (* The auxiliary function [pop k stack] pops [k] stack cells and returns a truncated stack. It also updates the automaton's current state, and fills in [values], [startp], and [endp]. *) let rec pop k stack = if k = 0 then (* There are no more stack cells to pop. *) stack else begin (* Fetch a semantic value. *) values.(k - 1) <- stack.semv; (* Pop one cell. The stack must be non-empty. As we pop a cell, change the automaton's current state to the one stored within the cell. (It is sufficient to do this only when [k] is 1.) If this is the first (last) cell that we pop, update [endp] ([startp]). *) let next = stack.next in assert (stack != next); if k = n then begin endp := stack.endp end; if k = 1 then begin env.current <- stack.state; startp := stack.startp end; pop (k - 1) next end in let stack = pop n env.stack in (* Construct and push a new stack cell. The associated semantic value is a new concrete syntax tree. *) env.stack <- { state = env.current; semv = CstNonTerminal (prod, values); startp = !startp; endp = !endp; next = stack } (* The reference interpreter performs error recovery if and only if this is requested via [--recovery]. *) let recovery = Settings.recovery module Log = struct open Printf (* I use a reference as a quick and dirty form of parameter passing. *) let log = ref false let maybe action = if !log then begin action(); prerr_newline() end let state s = maybe (fun () -> fprintf stderr "State %d:" (Lr1.number s) ) let shift tok s' = maybe (fun () -> fprintf stderr "Shifting (%s) to state %d" (Terminal.print tok) (Lr1.number s') ) let reduce_or_accept prod = maybe (fun () -> match Production.classify prod with | Some _ -> fprintf stderr "Accepting" | None -> fprintf stderr "Reducing production %s" (Production.print prod) ) let lookahead_token lexbuf tok = maybe (fun () -> fprintf stderr "Lookahead token is now %s (%d-%d)" (Terminal.print tok) lexbuf.Lexing.lex_start_p.Lexing.pos_cnum lexbuf.Lexing.lex_curr_p.Lexing.pos_cnum ) let initiating_error_handling () = maybe (fun () -> fprintf stderr "Initiating error handling" ) let resuming_error_handling () = maybe (fun () -> fprintf stderr "Resuming error handling" ) let handling_error s = maybe (fun () -> fprintf stderr "Handling error in state %d" (Lr1.number s) ) let discarding_last_token tok = maybe (fun () -> fprintf stderr "Discarding last token read (%s)" (Terminal.print tok) ) end end (* Instantiate the LR engine with this information. *) module E = MenhirLib.Engine.Make (T) (* Define a palatable user entry point. *) let interpret log nt lexer lexbuf = (* Find the start state that corresponds to [nt] in the automaton. *) let s : Lr1.node = try ProductionMap.find (Production.startsymbol2startprod nt) Lr1.entry with Not_found -> assert false in (* Run the engine. *) try T.Log.log := log; Some (E.entry s lexer lexbuf) with T.Error -> None menhir-20130116/src/Fix.ml0000644000175000017500000004430412075533603014143 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* -------------------------------------------------------------------------- *) (* Maps. *) (* We require imperative maps, that is, maps that can be updated in place. An implementation of persistent maps, such as the one offered by ocaml's standard library, can easily be turned into an implementation of imperative maps, so this is a weak requirement. *) module type IMPERATIVE_MAPS = sig type key type 'data t val create: unit -> 'data t val clear: 'data t -> unit val add: key -> 'data -> 'data t -> unit val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* -------------------------------------------------------------------------- *) (* Properties. *) (* Properties must form a partial order, equipped with a least element, and must satisfy the ascending chain condition: every monotone sequence eventually stabilizes. *) (* [is_maximal] determines whether a property [p] is maximal with respect to the partial order. Only a conservative check is required: in any event, it is permitted for [is_maximal p] to return [false]. If [is_maximal p] returns [true], then [p] must have no upper bound other than itself. In particular, if properties form a lattice, then [p] must be the top element. This feature, not described in the paper, enables a couple of minor optimizations. *) module type PROPERTY = sig type property val bottom: property val equal: property -> property -> bool val is_maximal: property -> bool end (* -------------------------------------------------------------------------- *) (* The dynamic dependency graph. *) (* An edge from [node1] to [node2] means that [node1] depends on [node2], or (equivalently) that [node1] observes [node2]. Then, an update of the current property at [node2] causes a signal to be sent to [node1]. A node can observe itself. *) (* This module could be placed in a separate file, but is included here in order to make [Fix] self-contained. *) module Graph : sig (* This module provides a data structure for maintaining and modifying a directed graph. Each node is allowed to carry a piece of client data. There are functions for creating a new node, looking up a node's data, looking up a node's predecessors, and setting or clearing a node's successors (all at once). *) type 'data node (* [create data] creates a new node, with no incident edges, with client information [data]. Time complexity: constant. *) val create: 'data -> 'data node (* [data node] returns the client information associated with the node [node]. Time complexity: constant. *) val data: 'data node -> 'data (* [predecessors node] returns a list of [node]'s predecessors. Amortized time complexity: linear in the length of the output list. *) val predecessors: 'data node -> 'data node list (* [set_successors src dsts] creates an edge from the node [src] to each of the nodes in the list [dsts]. Duplicate elements in the list [dsts] are removed, so that no duplicate edges are created. It is assumed that [src] initially has no successors. Time complexity: linear in the length of the input list. *) val set_successors: 'data node -> 'data node list -> unit (* [clear_successors node] removes all of [node]'s outgoing edges. Time complexity: linear in the number of edges that are removed. *) val clear_successors: 'data node -> unit (* That's it. *) end = struct (* Using doubly-linked adjacency lists, one could implement [predecessors] in worst-case linear time with respect to the length of the output list, [set_successors] in worst-case linear time with respect to the length of the input list, and [clear_successors] in worst-case linear time with respect to the number of edges that are removed. We use a simpler implementation, based on singly-linked adjacency lists, with deferred removal of edges. It achieves the same complexity bounds, except [predecessors] only offers an amortized complexity bound. This is good enough for our purposes, and, in practice, is more efficient by a constant factor. This simplification was suggested by Arthur Charguraud. *) type 'data node = { (* The client information associated with this node. *) data: 'data; (* This node's incoming and outgoing edges. *) mutable outgoing: 'data edge list; mutable incoming: 'data edge list; (* A transient mark, always set to [false], except when checking against duplicate elements in a successor list. *) mutable marked: bool; } and 'data edge = { (* This edge's nodes. Edges are symmetric: source and destination are not distinguished. Thus, an edge appears both in the outgoing edge list of its source node and in the incoming edge list of its destination node. This allows edges to be easily marked as destroyed. *) node1: 'data node; node2: 'data node; (* Edges that are destroyed are marked as such, but are not immediately removed from the adjacency lists. *) mutable destroyed: bool; } let create (data : 'data) : 'data node = { data = data; outgoing = []; incoming = []; marked = false; } let data (node : 'data node) : 'data = node.data (* [follow src edge] returns the node that is connected to [src] by [edge]. Time complexity: constant. *) let follow src edge = if edge.node1 == src then edge.node2 else begin assert (edge.node2 == src); edge.node1 end (* The [predecessors] function removes edges that have been marked destroyed. The cost of removing these has already been paid for, so the amortized time complexity of [predecessors] is linear in the length of the output list. *) let predecessors (node : 'data node) : 'data node list = let predecessors = List.filter (fun edge -> not edge.destroyed) node.incoming in node.incoming <- predecessors; List.map (follow node) predecessors (* [link src dst] creates a new edge from [src] to [dst], together with its reverse edge. Time complexity: constant. *) let link (src : 'data node) (dst : 'data node) : unit = let edge = { node1 = src; node2 = dst; destroyed = false; } in src.outgoing <- edge :: src.outgoing; dst.incoming <- edge :: dst.incoming let set_successors (src : 'data node) (dsts : 'data node list) : unit = assert (src.outgoing = []); let rec loop = function | [] -> () | dst :: dsts -> if dst.marked then loop dsts (* skip duplicate elements *) else begin dst.marked <- true; link src dst; loop dsts; dst.marked <- false end in loop dsts let clear_successors (node : 'data node) : unit = List.iter (fun edge -> assert (not edge.destroyed); edge.destroyed <- true; ) node.outgoing; node.outgoing <- [] end (* -------------------------------------------------------------------------- *) (* The code is parametric in an implementation of maps over variables and in an implementation of properties. *) module Make (M : IMPERATIVE_MAPS) (P : PROPERTY) = struct type variable = M.key type property = P.property type valuation = variable -> property type rhs = valuation -> property type equations = variable -> rhs (* -------------------------------------------------------------------------- *) (* Data. *) (* Each node in the dependency graph carries information about a fixed variable [v]. *) type node = data Graph.node and data = { (* This is the result of the application of [rhs] to the variable [v]. It must be stored in order to guarantee that this application is performed at most once. *) rhs: rhs; (* This is the current property at [v]. It evolves monotonically with time. *) mutable property: property; (* That's it! *) } (* [property node] returns the current property at [node]. *) let property node = (Graph.data node).property (* -------------------------------------------------------------------------- *) (* Many definitions must be made within the body of the function [lfp]. For greater syntactic convenience, we place them in a local module. *) let lfp (eqs : equations) : valuation = let module LFP = struct (* -------------------------------------------------------------------------- *) (* The workset. *) (* When the algorithm is inactive, the workset is empty. *) (* Our workset is based on a Queue, but it could just as well be based on a Stack. A textual replacement is possible. It could also be based on a priority queue, provided a sensible way of assigning priorities could be found. *) module Workset : sig (* [insert node] inserts [node] into the workset. [node] must have no successors. *) val insert: node -> unit (* [repeat f] repeatedly applies [f] to a node extracted out of the workset, until the workset becomes empty. [f] is allowed to use [insert]. *) val repeat: (node -> unit) -> unit (* That's it! *) end = struct (* Initialize the workset. *) let workset = Queue.create() let insert node = Queue.push node workset let repeat f = while not (Queue.is_empty workset) do f (Queue.pop workset) done end (* -------------------------------------------------------------------------- *) (* Signals. *) (* A node in the workset has no successors. (It can have predecessors.) In other words, a predecessor (an observer) of some node is never in the workset. Furthermore, a node never appears twice in the workset. *) (* When a variable broadcasts a signal, all of its predecessors (observers) receive the signal. Any variable that receives the signal loses all of its successors (that is, it ceases to observe anything) and is inserted into the workset. This preserves the above invariant. *) let signal subject = List.iter (fun observer -> Graph.clear_successors observer; Workset.insert observer ) (Graph.predecessors subject) (* At this point, [subject] has no predecessors. This plays no role in the correctness proof, though. *) (* -------------------------------------------------------------------------- *) (* Tables. *) (* The permanent table maps variables that have reached a fixed point to properties. It persists forever. *) let permanent : property M.t = M.create() (* The transient table maps variables that have not yet reached a fixed point to nodes. (A node contains not only a property, but also a memoized right-hand side, and carries edges.) At the beginning of a run, it is empty. It fills up during a run. At the end of a run, it is copied into the permanent table and cleared. *) let transient : node M.t = M.create() (* [freeze()] copies the transient table into the permanent table, and empties the transient table. This allows all nodes to be reclaimed by the garbage collector. *) let freeze () = M.iter (fun v node -> M.add v (property node) permanent ) transient; M.clear transient (* -------------------------------------------------------------------------- *) (* Workset processing. *) (* [solve node] re-evaluates the right-hand side at [node]. If this leads to a change, then the current property is updated, and [node] emits a signal towards its observers. *) (* When [solve node] is invoked, [node] has no subjects. Indeed, when [solve] is invoked by [node_for], [node] is newly created; when [solve] is invoked by [Workset.repeat], [node] has just been extracted out of the workset, and a node in the workset has no subjects. *) (* [node] must not be in the workset. *) (* In short, when [solve node] is invoked, [node] is neither awake nor asleep. When [solve node] finishes, [node] is either awake or asleep again. (Chances are, it is asleep, unless it is its own observer; then, it is awakened by the final call to [signal node].) *) let rec solve (node : node) : unit = (* Retrieve the data record carried by this node. *) let data = Graph.data node in (* Prepare to compute an updated value at this node. This is done by invoking the client's right-hand side function. *) (* The flag [alive] is used to prevent the client from invoking [request] after this interaction phase is over. In theory, this dynamic check seems required in order to argue that [request] behaves like a pure function. In practice, this check is not very useful: only a bizarre client would store a [request] function and invoke it after it has become stale. *) let alive = ref true and subjects = ref [] in (* We supply the client with [request], a function that provides access to the current valuation, and dynamically records dependencies. This yields a set of dependencies that is correct by construction. *) let request (v : variable) : property = assert !alive; try M.find v permanent with Not_found -> let subject = node_for v in let p = property subject in if not (P.is_maximal p) then subjects := subject :: !subjects; p in (* Give control to the client. *) let new_property = data.rhs request in (* From now on, prevent any invocation of this instance of [request] the client. *) alive := false; (* At this point, [node] has no subjects, as noted above. Thus, the precondition of [set_successors] is met. We can install [data.subjects] as the new set of subjects for this node. *) (* If we have gathered no subjects in the list [data.subjects], then this node must have stabilized. If [new_property] is maximal, then this node must have stabilized. *) (* If this node has stabilized, then it need not observe any more, so the call to [set_successors] is skipped. In practice, this seems to be a minor optimization. In the particular case where every node stabilizes at the very first call to [rhs], this means that no edges are ever built. This particular case is unlikely, as it means that we are just doing memoization, not a true fixed point computation. *) (* One could go further and note that, if this node has stabilized, then it could immediately be taken out of the transient table and copied into the permanent table. This would have the beneficial effect of allowing the detection of further nodes that have stabilized. Furthermore, it would enforce the property that no node in the transient table has a maximal value, hence the call to [is_maximal] above would become useless. *) if not (!subjects = [] || P.is_maximal new_property) then Graph.set_successors node !subjects; (* If the updated value differs from the previous value, record the updated value and send a signal to all observers of [node]. *) if not (P.equal data.property new_property) then begin data.property <- new_property; signal node end (* Note that equality of the two values does not imply that this node has stabilized forever. *) (* -------------------------------------------------------------------------- *) (* [node_for v] returns the graph node associated with the variable [v]. It is assumed that [v] does not appear in the permanent table. If [v] appears in the transient table, the associated node is returned. Otherwise, [v] is a newly discovered variable: a new node is created on the fly, and the transient table is grown. The new node can either be inserted into the workset (it is then awake) or handled immediately via a recursive call to [solve] (it is then asleep, unless it observes itself). *) (* The recursive call to [solve node] can be replaced, if desired, by a call to [Workset.insert node]. Using a recursive call to [solve] permits eager top-down discovery of new nodes. This can save a constant factor, because it allows new nodes to move directly from [bottom] to a good first approximation, without sending any signals, since [node] has no observers when [solve node] is invoked. In fact, if the dependency graph is acyclic, the algorithm discovers nodes top-down, performs computation on the way back up, and runs without ever inserting a node into the workset! Unfortunately, this causes the stack to grow as deep as the longest path in the dependency graph, which can blow up the stack. *) and node_for (v : variable) : node = try M.find v transient with Not_found -> let node = Graph.create { rhs = eqs v; property = P.bottom } in (* Adding this node to the transient table prior to calling [solve] recursively is mandatory, otherwise [solve] might loop, creating an infinite number of nodes for the same variable. *) M.add v node transient; solve node; (* or: Workset.insert node *) node (* -------------------------------------------------------------------------- *) (* Invocations of [get] trigger the fixed point computation. *) (* The flag [inactive] prevents reentrant calls by the client. *) let inactive = ref true let get (v : variable) : property = try M.find v permanent with Not_found -> assert !inactive; inactive := false; let node = node_for v in Workset.repeat solve; freeze(); inactive := true; property node (* -------------------------------------------------------------------------- *) (* Close the local module [LFP]. *) end in LFP.get end menhir-20130116/src/nonpositiveCycles.mli0000644000175000017500000000353412075533603017306 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module uses Floyd and Warshall's algorithm to detect whether a graph with integer-weighted edges contains a simple cycle of negative weight. *) (* The algorithm runs in cubic time in the number of vertices. It may be worthwhile to first use Tarjan's algorithm to obtain the graph's strongly connected components, and use Floyd and Warshall's algorithm only on each component. *) module Run (G : sig type node (* We assume each node has a unique index. Indices must range from $0$ to $n-1$, where $n$ is the number of nodes in the graph. *) val n: int val index: node -> int (* Iterating over a node's immediate successors. Edges are weighted. *) val successors: (int -> node -> unit) -> node -> unit (* Iterating over all nodes. *) val iter: (node -> unit) -> unit end) : sig val graph_has_nonpositive_simple_cycle : bool end menhir-20130116/src/fancy-parser.mly0000644000175000017500000003125712075533603016203 0ustar stephsteph/**************************************************************************/ /* */ /* Menhir */ /* */ /* Franois Pottier, INRIA Rocquencourt */ /* Yann Rgis-Gianas, PPS, Universit Paris Diderot */ /* */ /* Copyright 2005-2008 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the Q Public License version 1.0, with the change */ /* described in file LICENSE. */ /* */ /**************************************************************************/ /* This is the fancy version of the parser, to be processed by menhir. It is kept in sync with [Parser], but exercises menhir's features. */ /* ------------------------------------------------------------------------- */ /* Imports. */ %{ open Keyword open ConcreteSyntax open Syntax open Positions %} /* ------------------------------------------------------------------------- */ /* Tokens. */ %token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL %token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER %token LID UID %token HEADER %token OCAMLTYPE %token PERCENTPERCENT %token ACTION /* ------------------------------------------------------------------------- */ /* Start symbol. */ %start grammar /* ------------------------------------------------------------------------- */ /* Priorities. */ /* These declarations solve a shift-reduce conflict in favor of shifting: when the declaration of a non-terminal symbol begins with a leading bar, it is understood as an (insignificant) leading optional bar, *not* as an empty right-hand side followed by a bar. This ambiguity arises due to the existence of a new notation for letting several productions share a single semantic action. */ %nonassoc no_optional_bar %nonassoc BAR /* These declarations encourage the [error] token to be shifted if found at the end of what seems like a legal declaration. */ %nonassoc decl %nonassoc error %% /* ------------------------------------------------------------------------- */ /* A grammar consists of declarations and rules, followed by an optional trailer, which we do not parse. */ grammar: ds = declaration* PERCENTPERCENT rs = rule* t = trailer { { pg_filename = ""; (* filled in by the caller *) pg_declarations = List.flatten ds; pg_rules = List.flatten rs; pg_trailer = t } } /* ------------------------------------------------------------------------- */ /* A declaration is an %{ Objective Caml header %}, or a %token, %start, %type, %left, %right, or %nonassoc declaration. */ declaration: | h = HEADER /* lexically delimited by %{ ... %} */ { [ with_poss $startpos $endpos (DCode h) ] } | TOKEN t = OCAMLTYPE? ts = clist(terminal) %prec decl { List.map (Positions.map (fun terminal -> DToken (t, terminal))) ts } | TOKEN OCAMLTYPE? clist(terminal) error | TOKEN OCAMLTYPE? error { Error.signal (Positions.two $startpos $endpos) "\ Syntax error in a %token declaration. Here are sample valid declarations: %token DOT SEMICOLON %token LID UID"; [] } | START t = OCAMLTYPE? nts = clist(nonterminal) %prec decl /* %start foo is syntactic sugar for %start foo %type foo */ { match t with | None -> List.map (Positions.map (fun nonterminal -> DStart nonterminal)) nts | Some t -> Misc.mapd (fun ntloc -> Positions.mapd (fun nt -> DStart nt, DType (t, ParameterVar ntloc)) ntloc) nts } | START OCAMLTYPE? clist(nonterminal) error | START OCAMLTYPE? error { Error.signal (Positions.two $startpos $endpos) "\ Syntax error in a %start declaration. Here are sample valid declarations: %start expression phrase %start date time"; [] } | TYPE t = OCAMLTYPE ss = clist(actual_parameter) %prec decl { List.map (Positions.map (fun nt -> DType (t, nt))) (List.map Parameters.with_pos ss) } | TYPE OCAMLTYPE clist(actual_parameter) error | TYPE OCAMLTYPE error | TYPE error { Error.signal (Positions.two $startpos $endpos) "\ Syntax error in a %type declaration. Here are sample valid declarations: %type expression %type date time"; [] } | k = priority_keyword ss = clist(symbol) %prec decl { let prec = ParserAux.current_token_precedence $startpos(k) $endpos(k) in List.map (Positions.map (fun symbol -> DTokenProperties (symbol, k, prec))) ss } | priority_keyword clist(symbol) error | priority_keyword error { Error.signal (Positions.two $startpos $endpos) "\ Syntax error in a precedence declaration. Here are sample valid declarations: %left PLUS TIMES %nonassoc unary_minus %right CONCAT"; [] } | PARAMETER t = OCAMLTYPE { [ with_poss $startpos $endpos (DParameter t) ] } | PARAMETER error { Error.signal (Positions.two $startpos $endpos) "\ Syntax error in a %parameter declaration. Here is a sample valid declaration: %parameter "; [] } /* This error production should lead to resynchronization on the next %something. The use of $previouserror prevents reporting errors that are too close to one another -- presumably, the second error only means that we failed to properly recover after the first error. */ | error { if $previouserror >= 3 then Error.signal (Positions.two $startpos $endpos) "Syntax error inside a declaration."; [] } /* This production recognizes tokens that are valid in the rules section, but not in the declarations section. This is a hint that a %% was forgotten. */ | rule_specific_token { if $previouserror >= 3 then Error.signal (Positions.two $startpos $endpos) "Syntax error inside a declaration.\n\ Did you perhaps forget the %% that separates declarations and rules?"; (* Do not attempt to perform error recovery. There is no way of forcing the automaton into a state where rules are expected. *) exit 1 } priority_keyword: LEFT { LeftAssoc } | RIGHT { RightAssoc } | NONASSOC { NonAssoc } rule_specific_token: | PUBLIC | INLINE | COLON | EOF { () } /* ------------------------------------------------------------------------- */ /* Our lists of symbols are separated with optional commas. Order is irrelevant. */ %inline clist(X): xs = separated_nonempty_list(COMMA?, X) { xs } /* ------------------------------------------------------------------------- */ /* A symbol is a terminal or nonterminal symbol. One would like to require nonterminal symbols to begin with a lowercase letter, so as to lexically distinguish them from terminal symbols, which must begin with an uppercase letter. However, for compatibility with ocamlyacc, this is impossible. It can be required only for nonterminal symbols that are also start symbols. */ symbol: id = LID | id = UID { id } /* ------------------------------------------------------------------------- */ /* Terminals must begin with an uppercase letter. Nonterminals that are declared to be start symbols must begin with a lowercase letter. */ %inline terminal: id = UID { id } %inline nonterminal: id = LID { id } /* ------------------------------------------------------------------------- */ /* A rule defines a symbol. It is optionally declared %public, and optionally carries a number of formal parameters. The right-hand side of the definition consists of a list of productions. */ rule: flags = flags /* flags */ symbol = symbol /* the symbol that is being defined */ params = plist(symbol) /* formal parameters */ COLON optional_bar prods = separated_nonempty_list(BAR, production_group) /* productions */ { let public, inline = flags in [ { pr_public_flag = public; pr_inline_flag = inline; pr_nt = Positions.value symbol; pr_positions = [ Positions.position symbol ]; pr_parameters = List.map Positions.value params; pr_branches = List.flatten prods } ] } | error /* This error production should lead to resynchronization on the next well-formed rule. */ { if $previouserror >= 3 then Error.signal (Positions.two $startpos $endpos) "Syntax error inside the definition of a nonterminal symbol."; [] } flags: /* epsilon */ { false, false } | PUBLIC { true, false } | INLINE { false, true } | PUBLIC INLINE | INLINE PUBLIC { true, true } optional_bar: /* epsilon */ %prec no_optional_bar | BAR { () } /* ------------------------------------------------------------------------- */ /* A production group consists of a list of productions, followed by a semantic action and an optional precedence specification. */ production_group: productions = separated_nonempty_list(BAR, production) action = ACTION oprec2 = precedence? { ParserAux.check_production_group productions $startpos(action) $endpos(action) action; List.map (fun (producers, oprec1, rprec, pos) -> { pr_producers = producers; pr_action = action; pr_branch_shift_precedence = ParserAux.override pos oprec1 oprec2; pr_branch_reduce_precedence = rprec; pr_branch_position = pos }) productions } | error ACTION precedence? | error EOF /* This error production should lead to resynchronization on the next semantic action, unless the end of file is reached before a semantic action is found. */ { if $previouserror >= 3 then Error.signal (Positions.two $startpos($1) $endpos($1)) "Syntax error inside a production."; [] } %inline precedence: PREC symbol = symbol { symbol } /* ------------------------------------------------------------------------- */ /* A production is a list of producers, optionally followed by a precedence declaration. */ production: producers = producer* oprec = precedence? { producers, oprec, ParserAux.current_reduce_precedence(), Positions.lex_join $startpos $endpos } /* ------------------------------------------------------------------------- */ /* A producer is an actual parameter, possibly preceded by a binding. Because both [ioption] and [terminated] are defined as inlined by the standard library, this definition expands to two productions, one of which begins with id = LID, the other of which begins with p = actual_parameter. The token LID is in FIRST(actual_parameter), but the LR(1) formalism can deal with that. If [option] was used instead of [ioption], an LR(1) conflict would arise -- looking ahead at LID would not allow determining whether to reduce an empty [option] or to shift. */ producer: | id = ioption(terminated(LID, EQUAL)) p = actual_parameter { id, p } /* ------------------------------------------------------------------------- */ /* The syntax of actual parameters allows applications, whereas the syntax of formal parameters does not. It also allows use of the "?", "+", and "*" shortcuts. */ actual_parameter: symbol = symbol actuals = plist(actual_parameter) modifier = modifier? { Parameters.oapp1 modifier (Parameters.app symbol actuals) } /* ------------------------------------------------------------------------- */ /* Formal or actual parameter lists are delimited with parentheses and separated with commas. They are optional. */ %inline plist(X): params = loption(delimited(LPAREN, separated_nonempty_list(COMMA, X), RPAREN)) { params } /* ------------------------------------------------------------------------- */ /* The "?", "+", and "*" modifiers are short-hands for applications of certain parameterized nonterminals, defined in the standard library. */ modifier: QUESTION { with_poss $startpos $endpos "option" } | PLUS { with_poss $startpos $endpos "nonempty_list" } | STAR { with_poss $startpos $endpos "list" } /* ------------------------------------------------------------------------- */ /* A trailer is announced by %%, but is optional. */ trailer: EOF { None } | p = PERCENTPERCENT /* followed by actual trailer */ { Some (Lazy.force p) } %% menhir-20130116/src/IO.ml0000644000175000017500000000522512075533603013723 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* Input-output utilities. *) (* ------------------------------------------------------------------------- *) (* [exhaust channel] reads all of the data that's available on [channel]. *) let chunk_size = 2048 let exhaust channel = let buffer = Buffer.create chunk_size in let chunk = String.create chunk_size in let rec loop () = let length = input channel chunk 0 chunk_size in if length = 0 then Buffer.contents buffer else begin Buffer.add_substring buffer chunk 0 length; loop() end in loop() (* ------------------------------------------------------------------------- *) (* [invoke command] invokes an external command (which expects no input) and returns its output, if the command succeeds. It returns [None] if the command fails. *) let invoke command = let ic = Unix.open_process_in command in let result = exhaust ic in match Unix.close_process_in ic with | Unix.WEXITED 0 -> Some result | _ -> None (* ------------------------------------------------------------------------- *) (* [winvoke writers command cleaners] invokes each of the [writer] functions, invokes the command [command], and runs each of the [cleaner] functions. Then, it either returns the command's output, if the command succeeded, or exits, otherwise. *) let winvoke writers command cleaners = let call action = action () in List.iter call writers; let output = invoke command in List.iter call cleaners; (* Stop if the command failed. Otherwise, return its output. *) match output with | None -> (* Presumably, the command printed an error message for us. *) exit 1 | Some output -> output menhir-20130116/src/partialGrammar.mli0000644000175000017500000000220712075533603016525 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: partialGrammar.mli,v 1.4 2005/12/01 16:20:07 regisgia Exp $ *) val join_partial_grammars : ConcreteSyntax.grammar list -> InternalSyntax.grammar menhir-20130116/src/lr1partial.ml0000644000175000017500000001705412075533603015472 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar module Run (X : sig (* A restricted set of tokens of interest. *) val tokens: TerminalSet.t (* A state of the (merged) LR(1) automaton that we're trying to simulate. *) val goal: Lr1.node end) = struct (* First, let's restrict our interest to the nodes of the merged LR(1) automaton that can reach the goal node. Some experiments show that this can involve one tenth to one half of all nodes. This optimization seems minor, but is easy to implement. *) let relevant = Lr1.reverse_dfs X.goal (* Second, all of the states that we shall consider are restricted to the set of tokens of interest. This is an important idea: by abstracting away some information, we make the construction much faster. *) let restrict = Lr0.restrict X.tokens (* Constructing the automaton. The automaton is represented as a graph. States are never merged -- this is a canonical LR(1) construction! As we go, we record the correspondence between nodes in this automaton and nodes in the merged LR(1) automaton. This allows us to tell when we have reached the desired place. This also allows us not to follow transitions that have already been eliminated, in the merged automaton, via resolution of shift/reduce conflicts. Whenever we follow a transition in the canonical LR(1) automaton, we check that the corresponding transition is legal in the merged LR(1) automaton. The automaton is explored breadth-first and shortest paths from every node to one of the start nodes are recorded. *) type node = { state: Lr0.lr1state; ancestor: (Symbol.t * node) option; shadow: Lr1.node; } (* A queue of pending nodes, whose successors should be explored. *) let queue : node Queue.t = Queue.create() (* Mapping of LR(0) state numbers to lists of nodes. *) let map : node list array = Array.create Lr0.n [] (* Exploring a state. This creates a new node, if necessary, and enqueues it for further exploration. *) exception Goal of node * Terminal.t let explore ancestor shadow (state : Lr0.lr1state) : unit = (* Find all existing nodes that share the same LR(0) core. *) let k = Lr0.core state in assert (k < Lr0.n); let similar = map.(k) in (* Check whether one of these nodes coincides with the candidate new node. If so, stop. This check requires comparing not only the states of the partial, canonical automaton, but also their shadows in the full, merged automaton. This is because a single state of the canonical automaton may be reached along several different paths, leading to distinct shadows in the merged automaton, and we must explore all of these paths in order to ensure that we eventually find a goal node. *) if not (List.exists (fun node -> Lr0.equal state node.state && shadow == node.shadow ) similar) then begin (* Otherwise, create a new node. *) let node = { state = state; ancestor = ancestor; shadow = shadow; } in map.(k) <- node :: similar; Queue.add node queue; (* Check whether this is a goal node. A node [N] is a goal node if (i) [N] has a conflict involving one of the tokens of interest and (ii) [N] corresponds to the goal node, that is, the path that leads to [N] in the canonical LR(1) automaton leads to the goal node in the merged LR(1) automaton. Note that these conditions do not uniquely define [N]. *) if shadow == X.goal then let can_reduce = ref TerminalSet.empty in let reductions1 : Production.index list TerminalMap.t = Lr1.reductions shadow in List.iter (fun (toks, prod) -> TerminalSet.iter (fun tok -> (* We are looking at a [(tok, prod)] pair -- a reduction in the canonical automaton state. *) (* Check that this reduction, which exists in the canonical automaton state, also exists in the merged automaton -- that is, it wasn't suppressed by conflict resolution. *) if List.mem prod (TerminalMap.lookup tok reductions1) then try let (_ : Lr1.node) = SymbolMap.find (Symbol.T tok) (Lr1.transitions shadow) in (* Shift/reduce conflict. *) raise (Goal (node, tok)) with Not_found -> let toks = !can_reduce in (* We rely on the property that [TerminalSet.add tok toks] preserves physical equality when [tok] is a member of [toks]. *) let toks' = TerminalSet.add tok toks in if toks == toks' then (* Reduce/reduce conflict. *) raise (Goal (node, tok)) else (* No conflict so far. *) can_reduce := toks' ) toks ) (Lr0.reductions state) end (* Populate the queue with the start nodes. Until we find a goal node, take a node out the queue, construct the nodes that correspond to its successors, and enqueue them. *) let goal, token = try ProductionMap.iter (fun (prod : Production.index) (k : Lr0.node) -> let shadow = try ProductionMap.find prod Lr1.entry with Not_found -> assert false in if relevant shadow then explore None shadow (restrict (Lr0.start k)) ) Lr0.entry; Misc.qiter (fun node -> SymbolMap.iter (fun symbol state -> try let shadow = SymbolMap.find symbol (Lr1.transitions node.shadow) in if relevant shadow then explore (Some (symbol, node)) shadow (restrict state) with Not_found -> (* No shadow. This can happen if a shift/reduce conflict was resolved in favor in reduce. Ignore that transition. *) () ) (Lr0.transitions node.state) ) queue; (* We didn't find a goal node. This shouldn't happen! If the goal node in the merged LR(1) automaton has a conflict, then there should exist a node with a conflict in the canonical automaton as well. Otherwise, Pager's construction is incorrect. *) begin Printf.fprintf stderr "** Internal failure (Pager's theorem).\n"; Printf.fprintf stderr "** Tokens of interest: %s\n" (TerminalSet.print X.tokens); Printf.fprintf stderr "** Goal state: %d\n" (Lr1.number X.goal); Printf.fprintf stderr "** Please send your grammar to Menhir's developers.\n%!"; exit 1 end with Goal (node, tok) -> node, tok (* Query the goal node that was found about the shortest path from it to one of the entry nodes. *) let source, path = let rec follow path node = match node.ancestor with | None -> Lr1.start2item node.shadow, Array.of_list path | Some (symbol, node) -> follow (symbol :: path) node in follow [] goal let goal = Lr0.export goal.state end menhir-20130116/src/parser.mly0000644000175000017500000002256312075533603015105 0ustar stephsteph/**************************************************************************/ /* */ /* Menhir */ /* */ /* Franois Pottier, INRIA Rocquencourt */ /* Yann Rgis-Gianas, PPS, Universit Paris Diderot */ /* */ /* Copyright 2005-2008 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the Q Public License version 1.0, with the change */ /* described in file LICENSE. */ /* */ /**************************************************************************/ /* This is the crude version of the parser. It is meant to be processed by ocamlyacc. Its existence is necessary for bootstrapping. It is kept in sync with [fancy-parser]. The two parsers accept the same language, but [fancy-parser] performs more refined error recovery. */ %{ open Keyword open ConcreteSyntax open Syntax open Positions %} %token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL %token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER %token LID UID %token HEADER %token OCAMLTYPE %token PERCENTPERCENT %token ACTION %start grammar %type grammar /* These declarations solve a shift-reduce conflict in favor of shifting: when the declaration of a non-terminal symbol begins with a leading bar, it is understood as an (insignificant) leading optional bar, *not* as an empty right-hand side followed by a bar. This ambiguity arises due to the existence of a new notation for letting several productions share a single semantic action. */ %nonassoc no_optional_bar %nonassoc BAR %% /* ------------------------------------------------------------------------- */ /* A grammar consists of declarations and rules, followed by an optional trailer, which we do not parse. */ grammar: declarations PERCENTPERCENT rules trailer { { pg_filename = ""; (* filled in by the caller *) pg_declarations = List.rev $1; pg_rules = $3; pg_trailer = $4 } } trailer: EOF { None } | PERCENTPERCENT /* followed by actual trailer */ { Some (Lazy.force $1) } /* ------------------------------------------------------------------------- */ /* A declaration is an %{ Objective Caml header %}, or a %token, %start, %type, %left, %right, or %nonassoc declaration. */ declarations: /* epsilon */ { [] } | declarations declaration { $2 @ $1 } declaration: | HEADER /* lexically delimited by %{ ... %} */ { [ unknown_pos (DCode $1) ] } | TOKEN optional_ocamltype terminals { List.map (Positions.map (fun terminal -> DToken ($2, terminal))) $3 } | START nonterminals { List.map (Positions.map (fun nonterminal -> DStart nonterminal)) $2 } | TYPE OCAMLTYPE actual_parameters { List.map (Positions.map (fun nt -> DType ($2, nt))) (List.map Parameters.with_pos $3) } | START OCAMLTYPE nonterminals /* %start foo is syntactic sugar for %start foo %type foo */ { Misc.mapd (fun ntloc -> Positions.mapd (fun nt -> DStart nt, DType ($2, ParameterVar ntloc)) ntloc) $3 } | priority_keyword symbols { let prec = ParserAux.current_token_precedence (rhs_start_pos 1) (rhs_end_pos 1) in List.map (Positions.map (fun symbol -> DTokenProperties (symbol, $1, prec))) $2 } | PARAMETER OCAMLTYPE { [ unknown_pos (DParameter $2) ] } optional_ocamltype: /* epsilon */ { None } | OCAMLTYPE /* lexically delimited by angle brackets */ { Some $1 } priority_keyword: LEFT { LeftAssoc } | RIGHT { RightAssoc } | NONASSOC { NonAssoc } /* ------------------------------------------------------------------------- */ /* A symbol is a terminal or nonterminal symbol. One would like to require nonterminal symbols to begin with a lowercase letter, so as to lexically distinguish them from terminal symbols, which must begin with an uppercase letter. However, for compatibility with ocamlyacc, this is impossible. It can be required only for nonterminal symbols that are also start symbols. */ symbols: /* epsilon */ { [] } | symbols optional_comma symbol { $3 :: $1 } symbol: LID { $1 } | UID { $1 } optional_comma: /* epsilon */ { () } | COMMA { () } /* ------------------------------------------------------------------------- */ /* Terminals must begin with an uppercase letter. Nonterminals that are declared to be start symbols must begin with a lowercase letter. */ terminals: /* epsilon */ { [] } | terminals optional_comma UID { $3 :: $1 } nonterminals: /* epsilon */ { [] } | nonterminals LID { $2 :: $1 } /* ------------------------------------------------------------------------- */ /* A rule defines a symbol. It is optionally declared %public, and optionally carries a number of formal parameters. The right-hand side of the definition consists of a list of production groups. */ rules: /* epsilon */ { [] } | rules rule { $2 :: $1 } rule: flags symbol optional_formal_parameters COLON optional_bar production_group production_groups { let public, inline = $1 in { pr_public_flag = public; pr_inline_flag = inline; pr_nt = Positions.value $2; pr_positions = [ Positions.position $2 ]; pr_parameters = $3; pr_branches = List.flatten ($6 :: List.rev $7) } } flags: /* epsilon */ { false, false } | PUBLIC { true, false } | INLINE { false, true } | PUBLIC INLINE { true, true } | INLINE PUBLIC { true, true } /* ------------------------------------------------------------------------- */ /* Parameters are surroundered with parentheses and delimited by commas. The syntax of actual parameters allows applications, whereas the syntax of formal parameters does not. It also allows use of the "?", "+", and "*" shortcuts. */ optional_formal_parameters: /* epsilon */ { [] } | LPAREN formal_parameters RPAREN { $2 } formal_parameters: symbol { [ Positions.value $1 ] } | symbol COMMA formal_parameters { Positions.value $1 :: $3 } optional_actual_parameters: /* epsilon */ { [] } | LPAREN actual_parameters_comma RPAREN { $2 } actual_parameters_comma: actual_parameter { [ $1 ] } | actual_parameter COMMA actual_parameters_comma { $1 :: $3 } actual_parameter: symbol optional_actual_parameters optional_modifier { Parameters.oapp1 $3 (Parameters.app $1 $2) } actual_parameters: /* epsilon */ { [] } | actual_parameters optional_comma actual_parameter { $3::$1 } optional_bar: /* epsilon */ %prec no_optional_bar { () } | BAR { () } /* ------------------------------------------------------------------------- */ /* The "?", "+", and "*" modifiers are short-hands for applications of certain parameterized nonterminals, defined in the standard library. */ optional_modifier: /* epsilon */ { None } | modifier { Some $1 } modifier: QUESTION { unknown_pos "option" } | PLUS { unknown_pos "nonempty_list" } | STAR { unknown_pos "list" } /* ------------------------------------------------------------------------- */ /* A production group consists of a list of productions, followed by a semantic action and an optional precedence specification. */ production_groups: /* epsilon */ { [] } | production_groups BAR production_group { $3 :: $1 } production_group: productions ACTION /* action is lexically delimited by braces */ optional_precedence { let productions, action, oprec2 = $1, $2, $3 in ParserAux.check_production_group productions (rhs_start_pos 2) (rhs_end_pos 2) action; List.map (fun (producers, oprec1, rprec, pos) -> { pr_producers = producers; pr_action = action; pr_branch_shift_precedence = ParserAux.override pos oprec1 oprec2; pr_branch_reduce_precedence = rprec; pr_branch_position = pos }) productions } optional_precedence: /* epsilon */ { None } | PREC symbol { Some $2 } /* ------------------------------------------------------------------------- */ /* A production is a list of producers, optionally followed by a precedence declaration. Lists of productions are nonempty and separated with bars. */ productions: production { [ $1 ] } | production bar_productions { $1 :: $2 } bar_productions: BAR production { [ $2 ] } | BAR production bar_productions { $2 :: $3 } production: producers optional_precedence { List.rev $1, $2, ParserAux.current_reduce_precedence(), Positions.lex_join (symbol_start_pos()) (symbol_end_pos()) } producers: /* epsilon */ { [] } | producers producer { $2 :: $1 } /* ------------------------------------------------------------------------- */ /* A producer is an actual parameter, possibly preceded by a binding. */ producer: | actual_parameter { None, $1 } | LID EQUAL actual_parameter { Some $1, $3 } %% menhir-20130116/src/parserAux.ml0000644000175000017500000000536312075533603015371 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Positions open Syntax let current_token_precedence = let c = ref 0 in fun pos1 pos2 -> incr c; PrecedenceLevel (Error.get_filemark (), !c, pos1, pos2) let current_reduce_precedence = let c = ref 0 in fun () -> incr c; PrecedenceLevel (Error.get_filemark (), !c, Lexing.dummy_pos, Lexing.dummy_pos) module IdSet = Set.Make (struct type t = identifier located let compare id1 id2 = compare (value id1) (value id2) end) let defined_identifiers ((ido, _) : producer) accu = Option.fold IdSet.add ido accu let defined_identifiers (producers : producer list) = List.fold_right defined_identifiers producers IdSet.empty let check_production_group right_hand_sides pos1 pos2 action = begin match right_hand_sides with | [] -> assert false | ((producers : producer list), _, _, _) :: right_hand_sides -> let ids = defined_identifiers producers in List.iter (fun (producers, _, _, _) -> let ids' = defined_identifiers producers in try let id = IdSet.choose (IdSet.union (IdSet.diff ids ids') (IdSet.diff ids' ids)) in Error.error [Positions.position id] "Two productions that share a semantic action must define\n\ exactly the same identifiers." with Not_found -> () ) right_hand_sides end; begin if List.length right_hand_sides > 1 && Action.use_dollar action then Error.signal (Positions.two pos1 pos2) "A semantic action that is shared between several productions must\n\ not use the $i notation -- semantic values must be named instead." end let override pos o1 o2 = match o1, o2 with | Some _, Some _ -> Error.signal [ pos ] "This production carries two %prec declarations."; o2 | None, Some _ -> o2 | _, None -> o1 menhir-20130116/src/codeBits.mli0000644000175000017500000000414312075533603015317 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module provides a number of tiny functions that help produce [IL] code. *) open IL (* Standard types. *) val tunit: typ val tint: typ val tstring: typ val texn: typ val tposition: typ val tlexbuf: typ val tobj : typ (* Building a type variable. *) val tvar: string -> typ (* Building a type scheme. *) val scheme: string list -> typ -> typescheme val type2scheme: typ -> typescheme (* Projecting out of a [PVar] pattern. *) val pat2var: pattern -> string (* Building a [let] construct, with on-the-fly simplification. *) val blet: (pattern * expr) list * expr -> expr val mlet: pattern list -> expr list -> expr -> expr (* [bottom] is an expression that has every type. Its semantics is irrelevant. *) val bottom: expr (* Boolean constants. *) val etrue: expr val efalse: expr val eboolconst: bool -> expr (* These help build function types. *) val arrow: typ -> typ -> typ val arrowif: bool -> typ -> typ -> typ val marrow: typ list -> typ -> typ (* These functions are used to generate names in menhir's namespace. *) val prefix: string -> string val dataprefix: string -> string val tvprefix: string -> string menhir-20130116/src/dot.mli0000644000175000017500000000417012075533603014351 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module displays graphs in graphviz dot format. It is much more basic than the one bundled with the ocamlgraph library, but offers the advantage of being stand-alone. *) (* ------------------------------------------------------------------------- *) (* Type definitions. *) type size = float * float (* in inches *) type orientation = | Portrait | Landscape type rankdir = | LeftToRight | TopToBottom type ratio = | Compress | Fill | Auto type style = (* Both nodes and edges. *) | Solid | Dashed | Dotted | Bold | Invisible (* Nodes only. *) | Filled | Diagonals | Rounded (* ------------------------------------------------------------------------- *) (* The graph printer. *) module Print (G : sig type vertex val name: vertex -> string val successors: (?style:style -> label:string -> vertex -> unit) -> vertex -> unit val iter: (?style:style -> label:string -> vertex -> unit) -> unit end) : sig val print: ?directed: bool -> ?size: size -> ?orientation: orientation -> ?rankdir: rankdir -> ?ratio: ratio -> out_channel -> unit end menhir-20130116/src/syntax.mli0000644000175000017500000000730412075533603015113 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* Terminals and nonterminal symbols are strings. Identifiers (which are used to refer to a symbol's semantic value) are strings. A file name is a string. *) type terminal = string type nonterminal = string type symbol = string type identifier = string type filename = string (* A trailer is also a source file fragment. It is represented simply as a string. *) type trailer = string (* Objective Caml semantic actions are represented as stretches. *) type action = Action.t type token_associativity = LeftAssoc | RightAssoc | NonAssoc | UndefinedAssoc type precedence_level = UndefinedPrecedence (* Items are incomparable when they originate in different files. A brand of type [Mark.t] is used to record an item's origin. The positions allow locating certain warnings. *) | PrecedenceLevel of Mark.t * int * Lexing.position * Lexing.position type token_properties = { tk_filename : filename; tk_ocamltype : Stretch.ocamltype option; tk_position : Positions.t; mutable tk_associativity : token_associativity; mutable tk_priority : precedence_level; (* TEMPORARY terminologie toujours pas coherente *) mutable tk_is_declared : bool; } type parameter = | ParameterVar of symbol Positions.located | ParameterApp of symbol Positions.located * parameters and parameters = parameter list type declaration = (* Raw Objective Caml code. *) | DCode of Stretch.t (* Raw Objective Caml functor parameter. *) | DParameter of Stretch.ocamltype (* really a stretch *) (* Terminal symbol (token) declaration. *) | DToken of Stretch.ocamltype option * terminal (* Start symbol declaration. *) | DStart of nonterminal (* Priority and associativity declaration. *) | DTokenProperties of terminal * token_associativity * precedence_level (* Type declaration. *) | DType of Stretch.ocamltype * parameter type branch_shift_precedence = symbol Positions.located option type branch_reduce_precedence = precedence_level type producer = identifier Positions.located option * parameter type parameterized_branch = { pr_branch_position : Positions.t; pr_producers : producer list; pr_action : action; pr_branch_shift_precedence : branch_shift_precedence; pr_branch_reduce_precedence : branch_reduce_precedence } type parameterized_rule = { pr_public_flag : bool; pr_inline_flag : bool; pr_nt : nonterminal; pr_positions : Positions.t list; pr_parameters : symbol list; pr_branches : parameterized_branch list; } menhir-20130116/src/interpret.mli0000644000175000017500000000214112075533603015573 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module is in charge of handling the [--interpret] option, if it is present. It offers no functionality. *) menhir-20130116/src/engineTypes.ml0000644000175000017500000002630612075533603015711 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This file defines several types and module types that are used in the specification of module [Engine]. *) (* --------------------------------------------------------------------------- *) (* It would be nice if we could keep the structure of stacks and environments hidden. However, stacks and environments must be accessible to semantic actions, so the following data structure definitions must be public. *) (* --------------------------------------------------------------------------- *) (* A stack is a linked list of cells. A sentinel cell -- which is its own successor -- is used to mark the bottom of the stack. The sentinel cell itself is not significant -- it contains dummy values. *) type ('state, 'semantic_value) stack = { (* The state that we should go back to if we pop this stack cell. *) (* This convention means that the state contained in the top stack cell is not the current state [env.current]. It also means that the state found within the sentinel is a dummy -- it is never consulted. This convention is the same as that adopted by the code-based back-end. *) state: 'state; (* The semantic value associated with the chunk of input that this cell represents. *) semv: 'semantic_value; (* The start and end positions of the chunk of input that this cell represents. *) startp: Lexing.position; endp: Lexing.position; (* The next cell down in the stack. If this is a self-pointer, then this cell is the sentinel, and the stack is conceptually empty. *) next: ('state, 'semantic_value) stack; } (* --------------------------------------------------------------------------- *) (* A parsing environment contains basically all of the automaton's state. *) type ('state, 'semantic_value, 'token) env = { (* The lexer. *) lexer: Lexing.lexbuf -> 'token; (* The lexing buffer. It is used as an argument to the lexer, and also accessed directly when extracting positions. *) lexbuf: Lexing.lexbuf; (* The last token that was obtained from the lexer. *) mutable token: 'token; (* A count of how many tokens were shifted since the beginning, or since the last [error] token was encountered. By convention, if [shifted] is (-1), then the current lookahead token is [error]. *) mutable shifted: int; (* A copy of the value of [shifted] just before the most recent error was detected. This value is not used by the automaton itself, but is made accessible to semantic actions. *) mutable previouserror: int; (* The stack. In [CodeBackend], it is passed around on its own, whereas, here, it is accessed via the environment. *) mutable stack: ('state, 'semantic_value) stack; (* The current state. In [CodeBackend], it is passed around on its own, whereas, here, it is accessed via the environment. *) mutable current: 'state; } (* --------------------------------------------------------------------------- *) (* This signature describes the parameters that must be supplied to the LR engine. *) module type TABLE = sig (* The type of automaton states. *) type state (* The type of tokens. These can be thought of as real tokens, that is, tokens returned by the lexer. They carry a semantic value. This type does not include the [error] pseudo-token. *) type token (* The type of terminal symbols. These can be thought of as integer codes. They do not carry a semantic value. This type does include the [error] pseudo-token. *) type terminal (* The type of semantic values. *) type semantic_value (* A token is conceptually a pair of a (non-[error]) terminal symbol and a semantic value. The following two functions are the pair projections. *) val token2terminal: token -> terminal val token2value: token -> semantic_value (* Even though the [error] pseudo-token is not a real token, it is a terminal symbol. Furthermore, for regularity, it must have a semantic value. *) val error_terminal: terminal val error_value: semantic_value (* The type of productions. *) type production (* If a state [s] has a default reduction on production [prod], then, upon entering [s], the automaton should reduce [prod] without consulting the lookahead token. The following function allows determining which states have default reductions. *) (* Instead of returning a value of a sum type -- either [DefRed prod], or [NoDefRed] -- it accepts two continuations, and invokes just one of them. This mechanism allows avoiding a memory allocation. *) val default_reduction: state -> ('env -> production -> 'answer) -> ('env -> 'answer) -> 'env -> 'answer (* An LR automaton can normally take three kinds of actions: shift, reduce, or fail. (Acceptance is a particular case of reduction: it consists in reducing a start production.) *) (* There are two variants of the shift action. [shift/discard s] instructs the automaton to discard the current token, request a new one from the lexer, and move to state [s]. [shift/nodiscard s] instructs it to move to state [s] without requesting a new token. This instruction should be used when [s] has a default reduction on [#]. See [CodeBackend.gettoken] for details. *) (* This is the automaton's action table. It maps a pair of a state and a terminal symbol to an action. *) (* Instead of returning a value of a sum type -- one of shift/discard, shift/nodiscard, reduce, or fail -- this function accepts three continuations, and invokes just one them. This mechanism allows avoiding a memory allocation. *) (* In summary, the parameters to [action] are as follows: - the first two parameters, a state and a terminal symbol, are used to look up the action table; - the next parameter is the semantic value associated with the above terminal symbol; it is not used, only passed along to the shift continuation, as explained below; - the shift continuation expects an environment; a flag that tells whether to discard the current token; the terminal symbol that is being shifted; its semantic value; and the target state of the transition; - the reduce continuation expects an environment and a production; - the fail continuation expects an environment; - the last parameter is the environment; it is not used, only passed along to the selected continuation. *) val action: state -> terminal -> semantic_value -> ('env -> bool -> terminal -> semantic_value -> state -> 'answer) -> ('env -> production -> 'answer) -> ('env -> 'answer) -> 'env -> 'answer (* This is the automaton's goto table. It maps a pair of a state and a production to a new state. This convention is slightly different from the textbook approach. The goto table is usually indexed by a state and a non-terminal symbol. *) val goto: state -> production -> state (* By convention, a semantic action is responsible for: 1. fetching whatever semantic values and positions it needs off the stack; 2. popping an appropriate number of cells off the stack, as dictated by the length of the right-hand side of the production; this involves updating [env.stack]; 3. computing a new semantic value, as well as new start and end positions; 4. pushing a new stack cell, which contains the three values computed in step 3; this again involves updating [env.stack] (only one update is necessary). Point 1 is essentially forced upon us: if semantic values were fetched off the stack by this interpreter, then the calling convention for semantic actions would be variadic: not all semantic actions would have the same number of arguments. The rest follows rather naturally. *) (* If production [prod] is an accepting production, then the semantic action is responsible for raising exception [Accept], instead of returning normally. This convention allows us to not distinguish between regular productions and accepting productions. All we have to do is catch that exception at top level. *) (* Semantic actions are allowed to raise [Error]. *) exception Accept of semantic_value exception Error type semantic_action = (state, semantic_value, token) env -> unit val semantic_action: production -> semantic_action (* The LR engine can attempt error recovery. This consists in discarding tokens, just after an error has been successfully handled, until a token that can be successfully handled is found. This mechanism is optional. The following flag enables it. *) val recovery: bool (* The LR engine requires a number of hooks, which are used for logging. *) (* The comments below indicate the conventional messages that correspond to these hooks in the code-based back-end; see [CodeBackend]. *) module Log : sig (* State %d: *) val state: state -> unit (* Shifting () to state *) val shift: terminal -> state -> unit (* Reducing a production should be logged either as a reduction event (for regular productions) or as an acceptance event (for start productions). *) (* Reducing production / Accepting *) val reduce_or_accept: production -> unit (* Lookahead token is now (-) *) val lookahead_token: Lexing.lexbuf -> terminal -> unit (* Initiating error handling *) val initiating_error_handling: unit -> unit (* Resuming error handling *) val resuming_error_handling: unit -> unit (* Handling error in state *) val handling_error: state -> unit (* Discarding last token read () *) val discarding_last_token: terminal -> unit end end (* --------------------------------------------------------------------------- *) (* This signature describes the LR engine. *) module type ENGINE = sig type state type token type semantic_value (* An entry point to the engine requires a start state, a lexer, and a lexing buffer. It either succeeds and produces a semantic value, or fails and raises [Error]. *) exception Error val entry: state -> (Lexing.lexbuf -> token) -> Lexing.lexbuf -> semantic_value end menhir-20130116/src/convert.mli0000644000175000017500000000662212075533603015247 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* An ocamlyacc-style, or Menhir-style, parser requires access to the lexer, which must be parameterized with a lexing buffer, and to the lexing buffer itself, where it reads position information. *) (* This traditional API is convenient when used with ocamllex, but inelegant when used with other lexer generators. *) type ('token, 'semantic_value) traditional = (Lexing.lexbuf -> 'token) -> Lexing.lexbuf -> 'semantic_value (* This revised API is independent of any lexer generator. Here, the parser only requires access to the lexer, and the lexer takes no parameters. The tokens returned by the lexer may contain position information. *) type ('token, 'semantic_value) revised = (unit -> 'token) -> 'semantic_value (* --------------------------------------------------------------------------- *) (* Converting a traditional parser, produced by ocamlyacc or Menhir, into a revised parser. *) (* A token of the revised lexer is essentially a triple of a token of the traditional lexer (or raw token), a start position, and and end position. The three [get] functions are accessors. *) (* We do not require the type ['token] to actually be a triple type. This enables complex applications where it is a record type with more than three fields. It also enables simple applications where positions are of no interest, so ['token] is just ['raw_token] and [get_startp] and [get_endp] return dummy positions. *) val traditional2revised: ('token -> 'raw_token) -> ('token -> Lexing.position) -> ('token -> Lexing.position) -> ('raw_token, 'semantic_value) traditional -> ('token, 'semantic_value) revised (* --------------------------------------------------------------------------- *) (* Converting a revised parser back to a traditional parser. *) val revised2traditional: ('raw_token -> Lexing.position -> Lexing.position -> 'token) -> ('token, 'semantic_value) revised -> ('raw_token, 'semantic_value) traditional (* --------------------------------------------------------------------------- *) (* Simplified versions of the above, where concrete triples are used. *) module Simplified : sig val traditional2revised: ('token, 'semantic_value) traditional -> ('token * Lexing.position * Lexing.position, 'semantic_value) revised val revised2traditional: ('token * Lexing.position * Lexing.position, 'semantic_value) revised -> ('token, 'semantic_value) traditional end menhir-20130116/src/preFront.mli0000644000175000017500000000315212075533603015361 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module drives the first half of the front-end. It opens and parses the input files, which yields a number of partial grammars. It joins these grammars, expands them to get rid of parameterized nonterminals, and performs reachability analysis. This yields a single unified grammar. More transformations over this grammar are performed in the second half of the front-end, which is driven by [Front]. The modules [PreFront] and [Front] are separated because it is convenient to insert auxiliary modules, such as [TokenType] and [Infer], in between the two. *) val grammar: UnparameterizedSyntax.grammar menhir-20130116/src/error.mli0000644000175000017500000000611712075533603014717 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module helps report errors and maintains some information about the source file that is being read. *) (* ---------------------------------------------------------------------------- *) (* Call [set_filename] before lexing and parsing in order to inform the module [Error] about the name of the file that is being examined. *) (* TEMPORARY limiter ou supprimer ou commenter cette interface stateful *) val set_filename: string -> unit val get_filename: unit -> string val get_filemark: unit -> Mark.t val file_contents: string option ref val get_file_contents: unit -> string (* ---------------------------------------------------------------------------- *) (* Logging and log levels. *) val logG: int -> (out_channel -> unit) -> unit val logA: int -> (out_channel -> unit) -> unit val logC: int -> (out_channel -> unit) -> unit (* ---------------------------------------------------------------------------- *) (* Errors and warnings. *) (* [error ps msg] displays the error message [msg], referring to the positions [ps], and exits. *) val error: Positions.positions -> string -> 'a (* [errorp v msg] displays the error message [msg], referring to the position range carried by [v], and exits. *) val errorp: 'a Positions.located -> string -> 'b (* [warning ps msg] displays the warning message [msg], referring to the positions [ps]. *) val warning: Positions.positions -> string -> unit (* [signal ps msg] displays the error message [msg], referring to the positions [ps], and does not exit immediately. *) val signal: Positions.positions -> string -> unit (* [errors] returns [true] if [signal] was previously called. Together [signal] and [errors] allow reporting multiple errors before aborting. *) val errors: unit -> bool (* Certain warnings about the grammar can optionally be treated as errors. The following function emits a warning or error message, via [warning] or [signal]. It does not stop the program; the client must at some point call [errors] and stop the program if any errors have been reported. *) val grammar_warning: Positions.positions -> string -> unit menhir-20130116/src/positions.mli0000644000175000017500000001060312075533603015610 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: positions.mli,v 1.10 2005/12/01 16:20:07 regisgia Exp $ *) (* TEMPORARY faire un peu le mnage dans cette interface plthorique? *) (** Extension of standard library's positions. *) (** {2 Extended lexing positions} *) (** Abstract type for pairs of positions in the lexing stream. *) type t (** Decoration of a value with a position. *) type 'a located = { value : 'a; position : t; } (** [value dv] returns the raw value that underlies the decorated value [dv]. *) val value: 'a located -> 'a (** [position dv] returns the position that decorates the decorated value [dv]. *) val position: 'a located -> t (** [with_pos p v] decorates [v] with a position [p]. *) val with_pos : t -> 'a -> 'a located val with_cpos: Lexing.lexbuf -> 'a -> 'a located val with_poss : Lexing.position -> Lexing.position -> 'a -> 'a located val unknown_pos : 'a -> 'a located (** [map f v] extends the decoration from [v] to [f v]. *) val map: ('a -> 'b) -> 'a located -> 'b located (** [iter f dv] applies [f] to the value inside [dv]. *) val iter: ('a -> unit) -> 'a located -> unit (** [mapd f v] extends the decoration from [v] to both members of the pair [f v]. *) val mapd: ('a -> 'b1 * 'b2) -> 'a located -> 'b1 located * 'b2 located (** This value is used when an object does not from a particular input location. *) val dummy: t (** {2 Accessors} *) (** [column p] returns the number of characters from the beginning of the line of the Lexing.position [p]. *) val column : Lexing.position -> int (** [column p] returns the line number of to the Lexing.position [p]. *) val line : Lexing.position -> int (** [characters p1 p2] returns the character interval between [p1] and [p2] assuming they are located in the same line. *) val characters : Lexing.position -> Lexing.position -> int * int val start_of_position: t -> Lexing.position val end_of_position: t -> Lexing.position val filename_of_position: t -> string (** {2 Position handling} *) (** [join p1 p2] returns a position that starts where [p1] starts and stops where [p2] stops. *) val join : t -> t -> t val lex_join : Lexing.position -> Lexing.position -> t val ljoinf : ('a -> t) -> 'a list -> t val joinf : ('a -> t) -> 'a -> 'a -> t val join_located : 'a located -> 'b located -> ('a -> 'b -> 'c) -> 'c located val join_located_list : ('a located) list -> ('a list -> 'b list) -> ('b list) located (** [string_of_lex_pos p] returns a string representation for the lexing position [p]. *) val string_of_lex_pos : Lexing.position -> string (** [string_of_pos p] returns the standard (Emacs-like) representation of the position [p]. *) val string_of_pos : t -> string (** [pos_or_undef po] is the identity function except if po = None, in that case, it returns [undefined_position]. *) val pos_or_undef : t option -> t (** {2 Interaction with the lexer runtime} *) (** [cpos lexbuf] returns the current position of the lexer. *) val cpos : Lexing.lexbuf -> t (** [string_of_cpos p] returns a string representation of the lexer's current position. *) val string_of_cpos : Lexing.lexbuf -> string (* The functions that print error messages and warnings require a list of positions. The following auxiliary functions help build such lists. *) type positions = t list val one: Lexing.position -> positions val two: Lexing.position -> Lexing.position -> positions val lexbuf: Lexing.lexbuf -> positions menhir-20130116/src/stringMap.ml0000644000175000017500000000253512075533603015361 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) include Map.Make (String) let cardinal s = fold (fun _ _ x -> x + 1) s 0 let filter pred map = fold (fun key value map -> if pred key value then add key value map else map) map empty let restrict domain map = filter (fun k _ -> StringSet.mem k domain) map let domain map = fold (fun key _ acu -> StringSet.add key acu) map StringSet.empty menhir-20130116/src/stretch.mli0000644000175000017500000000375312075533603015245 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: stretch.mli,v 1.4 2005/12/01 16:20:07 regisgia Exp $ *) (* A stretch is a fragment of a source file. It holds the file name, the line number, and the line count (that is, the length) of the fragment. These are used for generating #line directives when the fragment is copied to an output file. It also holds the textual content of the fragment, as a string. The [raw_content] field holds the text that was found in the source file, while the [content] field holds the same text after transformation by the lexer (which substitutes keywords, inserts padding, etc.). *) type t = { stretch_filename : string; stretch_linenum : int; stretch_linecount : int; stretch_raw_content : string; stretch_content : string; stretch_keywords : Keyword.keyword Positions.located list } (* An Objective Caml type is either a stretch (if it was found in some source file) or a string (if it was inferred via [Infer]). *) type ocamltype = | Declared of t | Inferred of string menhir-20130116/src/stringSet.ml0000644000175000017500000000206612075533603015376 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) include Set.Make (String) let of_list xs = List.fold_right add xs empty menhir-20130116/src/grammar.mli0000644000175000017500000003406412075533603015216 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module transforms [Front.grammar], an abstract syntax tree for the grammar, into an internal representation of the grammar that is more usable. *) (* ------------------------------------------------------------------------ *) (* Nonterminals. *) module Nonterminal : sig (* The type of nonterminals. *) type t (* The number of nonterminals. This includes the extra nonterminals that are internally generated for the grammar's entry points. *) val n: int (* [lookup] maps an identifier to a nonterminal, or raises [Not_found]. *) val lookup : string -> t (* Nonterminals can be converted to integers. This feature is exploited in the table-based back-end. *) val n2i: t -> int (* This produces a string representation of a nonterminal. It should in principle never be applied to one of the internally generated nonterminals, as we do not wish users to become aware of the existence of these extra nonterminals. However, we do sometimes violate this rule when it is difficult to do otherwise. The Boolean parameter tells whether the string representation should be normalized, that is, whether parentheses and commas should be eliminated. This is necessary if the string is intended for use as a valid nonterminal name or as a valid Objective Caml identifier. *) val print: bool -> t -> string (* This is the Objective Caml type associated with a nonterminal symbol. It is known only if a %type declaration was provided. This function is not applicable to the internally generated nonterminals. *) val ocamltype: t -> Stretch.ocamltype option (* Iteration over nonterminals. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [n2i] above. *) val iter: (t -> unit) -> unit val fold: (t -> 'a -> 'a) -> 'a -> 'a val map: (t -> 'a) -> 'a list (* Iteration over all nonterminals, except the start nonterminals. *) val iterx: (t -> unit) -> unit val foldx: (t -> 'a -> 'a) -> 'a -> 'a (* Tabulation of a function over nonterminals. *) val tabulate: (t -> 'a) -> (t -> 'a) (* [positions nt] is a list of the positions associated with the definition of [nt]. There can be more than one position because definitions can be split over multiple files. *) val positions: t -> Positions.t list (* This tells whether a non-terminal symbol is one of the start symbols. *) val is_start: t -> bool end (* ------------------------------------------------------------------------ *) (* Terminals. *) module Terminal : sig (* The type of terminals. *) type t (* The number of terminals. This includes the two pseudo-tokens [#] and [error]. *) val n: int (* Comparison. *) val equal: t -> t -> bool (* [lookup] maps an identifier to a terminal, or raises [Not_found]. *) val lookup : string -> t (* Terminals can be converted to integers. This feature is exploited in the table-based back-end. *) val t2i: t -> int (* This produces a string representation of a terminal. *) val print: t -> string (* This is the Objective Caml type associated with a terminal symbol. It is known only if the %token declaration was accompanied with a type. *) val ocamltype: t -> Stretch.ocamltype option (* These are the two pseudo-tokens [#] and [error]. The former is used to denote the end of the token stream. The latter is accessible to the user and is used for handling errors. *) val sharp: t val error: t (* This is the programmer-defined [EOF] token, if there is one. It is recognized based solely on its name, which is fragile, but this behavior is documented. This token is assumed to represent [ocamllex]'s [eof] pattern. It is used only in emitting warnings in [--error-recovery] mode. *) val eof: t option (* This returns [true] if and only if the token at hand is one of [#] or [error]. *) val pseudo: t -> bool (* Iteration over terminals. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [t2i] above. [mapx] offers iteration over all terminals except [#]. *) val iter: (t -> unit) -> unit val fold: (t -> 'a -> 'a) -> 'a -> 'a val map: (t -> 'a) -> 'a list val mapx: (t -> 'a) -> 'a list end (* ------------------------------------------------------------------------ *) (* Sets and maps over terminals. *) module TerminalSet : sig (* All of the operations documented in [GSet] are available. *) include GSet.S with type element = Terminal.t (* This offers a string representation of a set of terminals. The symbols are simply listed one after the other and separated with spaces. *) val print: t -> string (* This is the set of all terminal symbols except the pseudo-tokens [#] and [error]. *) val universe: t end (* All of the operations documented in [GMap] are available. *) module TerminalMap : GMap.S with type key = Terminal.t (* ------------------------------------------------------------------------ *) (* Symbols. *) module Symbol : sig (* A symbol is either a nonterminal or a terminal. *) type t = | N of Nonterminal.t | T of Terminal.t (* Comparison. *) val equal: t -> t -> bool val lequal: t list -> t list -> bool (* These produce a string representation of a symbol, of a list of symbols, or of an array of symbols. The symbols are simply listed one after the other and separated with spaces. [printao] prints an array of symbols, starting at a particular offset. [printaod] is analogous, but can also print a single dot at a particular position between two symbols. *) val print: t -> string val printl: t list -> string val printa: t array -> string val printao: int -> t array -> string val printaod: int -> int -> t array -> string end (* ------------------------------------------------------------------------ *) (* Sets and maps over symbols. *) (* All of the operations documented in [Set] are available. *) module SymbolSet : Set.S with type elt = Symbol.t module SymbolMap : sig (* All of the operations documented in [Map] are available. *) include Map.S with type key = Symbol.t val domain: 'a t -> key list (* This returns [true] if and only if all of the symbols in the domain of the map at hand are nonterminals. *) val purelynonterminal: 'a t -> bool end (* ------------------------------------------------------------------------ *) (* Productions. *) module Production : sig (* This is the type of productions. This includes user-defined productions as well as the internally generated productions associated with the start symbols. *) type index (* Productions can be converted to integers and back. This is unsafe and should be avoided as much as possible. This feature is exploited, for efficiency, in the encoding of items. *) val p2i: index -> int val i2p: int -> index (* The number of productions. *) val n: int (* These map a production index to the production's definition, that is, a nonterminal (the left-hand side) and an array of symbols (the right-hand side). *) val def: index -> Nonterminal.t * Symbol.t array val nt: index -> Nonterminal.t val rhs: index -> Symbol.t array val length: index -> int (* This maps a production index to an array of the identifiers that should be used for naming the semantic values of the symbols in the right-hand side. *) val identifiers: index -> Syntax.identifier array (* This maps a production index to an array of Boolean flag. Each flag tells whether the semantic value of the corresponding symbol is used in the semantic action. This is a conservative approximation: [true] means maybe, while [false] means certainly not. *) val used: index -> bool array (* This maps a production index to the production's semantic action. This function is not applicable to a start production. *) val action: index -> Syntax.action (* [positions prod] is a list of the positions associated with production [prod]. This is usually a singleton list, but there can be more than one position for start productions when the definition of the corresponding start symbol is split over multiple files. *) val positions: index -> Positions.t list (* Iteration over all productions. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [p2i] above. *) val iter: (index -> unit) -> unit val fold: (index -> 'a -> 'a) -> 'a -> 'a val map: (index -> 'a) -> 'a list (* Iteration over all productions, except the start productions. *) val iterx: (index -> unit) -> unit val foldx: (index -> 'a -> 'a) -> 'a -> 'a (* This maps a (user) non-terminal start symbol to the corresponding start production. *) val startsymbol2startprod: Nonterminal.t -> index (* Iteration over the productions associated with a specific nonterminal. *) val iternt: Nonterminal.t -> (index -> unit) -> unit val foldnt: Nonterminal.t -> 'a -> (index -> 'a -> 'a) -> 'a (* This allows determining whether a production is a start production. If it is a start production, the start symbol that it is associated with is returned. If it is a regular production, nothing is returned. *) val classify: index -> Nonterminal.t option (* This produces a string representation of a production. It should never be applied to a start production, as we do not wish users to become aware of the existence of these extra productions. *) val print: index -> string (* Tabulation of a Boolean function over productions. [tabulateb f] returns a tabulated version of [f] as well as the number of productions where [f] is true. *) val tabulate: (index -> 'a) -> (index -> 'a) val tabulateb: (index -> bool) -> (index -> bool) * int end (* ------------------------------------------------------------------------ *) (* Maps over productions. *) module ProductionMap : sig include GMap.S with type key = Production.index (* Iteration over the start productions only. *) val start: (Production.index -> 'a) -> 'a t end (* ------------------------------------------------------------------------ *) (* Analysis of the grammar. *) module Analysis : sig (* [nullable_first_rhs rhs i] considers the string of symbols found at offset [i] in the array [rhs]. It returns its NULLABLE flag as well as its FIRST set. The offset [i] must be contained between [0] and [n], where [n] is the length of [rhs], inclusive. *) val nullable_first_rhs: Symbol.t array -> int -> bool * TerminalSet.t (* [explain_first_rhs tok rhs i] explains why the token [tok] appears in the FIRST set for the string of symbols found at offset [i] in the array [rhs]. *) val explain_first_rhs: Terminal.t -> Symbol.t array -> int -> string (* [follow nt] is the FOLLOW set of the non-terminal symbol [nt], that is, the set of terminal symbols that could follow an expansion of [nt] in a valid sentence. *) val follow: Nonterminal.t -> TerminalSet.t end (* ------------------------------------------------------------------------ *) (* Conflict resolution via precedences. *) module Precedence : sig (* Shift/reduce conflicts require making a choice between shifting a token and reducing a production. How these choices are made is of no concern to the back-end, but here is a rough explanation. Shifting is preferred when the token has higher precedence than the production, or they have same precedence and the token is right-associative. Reducing is preferred when the token has lower precedence than the production, or they have same precedence and the token is left-associative. Neither is allowed when the token and the production have same precedence and the token is non-associative. No preference is explicitly specified when the token or the production has undefined precedence. In that case, the default choice is to prefer shifting, but a conflict will be reported. *) type choice = | ChooseShift | ChooseReduce | ChooseNeither | DontKnow val shift_reduce: Terminal.t -> Production.index -> choice (* Reduce/reduce conflicts require making a choice between reducing two distinct productions. This is done by exploiting a partial order on productions. For compatibility with ocamlyacc, this order should be total and should correspond to textual order when the two productions originate in the same source file. When they originate in different source files, the two productions should be incomparable. *) val reduce_reduce: Production.index -> Production.index -> Production.index option end (* ------------------------------------------------------------------------ *) (* Diagnostics. *) (* This function prints diagnostics about precedence declarations that are never consulted. It is called after the automaton is constructed. *) val diagnostics: unit -> unit menhir-20130116/src/front.mli0000644000175000017500000000241012075533603014706 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module drives the second half of the front-end. It starts where [PreFront] left off, and performs type inference. This yields the grammar that the back-end works with (through the interface provided by module [Grammar]). *) val grammar: UnparameterizedSyntax.grammar menhir-20130116/src/printer.mli0000644000175000017500000000353112075533603015246 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* A pretty-printer for [IL]. *) module Make (X : sig (* This is the channel that is being written to. *) val f: out_channel (* If [raw_stretch_action] is set, then we print the semantic actions as they are found into the original source code. *) val raw_stretch_action: bool (* This controls the way we print Objective Caml stretches (types and semantic actions). We either surround them with #line directives (for better error reports if the generated code is ill-typed) or don't (for better readability). The value is either [None] -- do not provide #line directives -- or [Some filename] -- do provide them. [filename] is the name of the file that is being written. *) val locate_stretches: string option end) : sig val program: IL.program -> unit val expr: IL.expr -> unit val interface: IL.interface -> unit end menhir-20130116/src/inliner.mli0000644000175000017500000000245012075533603015222 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This transformer inlines every function that is called at most once. It also inlines some functions whose body consists of a single function call. At the same time, every function that is never called is dropped. Public functions are never inlined or dropped. *) val inline: IL.program -> IL.program menhir-20130116/src/reductionGraphs.ml0000644000175000017500000002463612075533603016564 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This file contains experimental code that has been used in an attempt to explore the validation of the termination property for LR(1) automata. *) (* This code is currently unused. It could be plugged in again (at the end of the module [Invariant]. *) (* ------------------------------------------------------------------------ *) (* Build a graph of all reductions. The vertices are the states of the automaton; there is an edge from [s1] to [s2] if a reduction (including the goto step) can take us from [s1] to [s2]. Every edge is labeled with its effect on the size of the stack. *) (* This graph is built with respect to a fixed lookahead token [tok]. We consider only the reductions that are permitted with [tok] is the next token on the stream. *) exception NoDanger let clique_count = ref 0 let make_reduction_graph tok = (* Build the reduction graph for this token. The main part of the work is to define the edges; the rest is immediate. *) let module ReductionGraph = struct type node = Lr1.node let n = Lr1.n let index = Lr1.number (* This auxiliary function pops [n] cells off an abstract stack. The parameter [states] is a set of states that we might be in before popping. The function returns a set of states that we might be in after popping. *) let rec pop_word n states w = if n = 0 then states else match w with | [] -> (* the invariant is too weak to ensure that reduction is possible! *) assert false | (_, states) :: w -> pop_word (n-1) states w let pop_stack n states (w, _) = pop_word n states w (* The following function allows listing the successors of a node. Each edge is labeled with an integer value that represents the decrease in the size of the stack. *) let successors (action : int -> node -> unit) node : unit = (* Find which reductions are permitted at this node. *) let prods = match has_default_reduction node with | Some (prod, _) -> [ prod ] | None -> try TerminalMap.find tok (Lr1.reductions node) with Not_found -> [] in (* Get a description of the stack at this node. *) let stack = lfp node in (* For each production [prod], ... *) List.iter (fun prod -> (* If this is a start production, ignore it. We are not interested in accept actions, only in true reduce actions. *) match Production.classify prod with | Some _ -> () | None -> (* Find out how many cells are popped. *) let decrease = Production.length prod in (* Find out what states we might be in after popping. *) let states = pop_stack decrease (Lr1.NodeSet.singleton node) stack in (* Now, the goto step pushes one cell... *) let increase = 1 in let net = decrease - increase in (* Find out which states we might be in after the goto step. *) let symbol = Symbol.N (Production.nt prod) in let goto (state : Lr1.node) : Lr1.node = try SymbolMap.find symbol (Lr1.transitions state) with Not_found -> (* the invariant is too weak to ensure that goto is possible! *) assert false in (* There is a transition, labelled [decrease - increase], from [node] to every state in the image through [goto] of the set [states]. *) Lr1.NodeSet.iter (fun state -> action net (goto state) ) states ) prods let iter = Lr1.iter (* The [successors] function describes a multi-graph: there might be multiple edges with the same source and target nodes. In that case, we would like to keep only one, the one with minimum weight, as this is the most dangerous one. Do so (naively). *) let adjacency : (int * node) list Lr1.NodeMap.t ref = ref Lr1.NodeMap.empty let () = iter (fun source -> (* Compute a list of outgoing edges. *) let edges = ref [] in successors (fun weight target -> edges := (weight, target) :: !edges; ) source; let edges = List.sort (fun (weight1, _) (weight2, _) -> weight1 - weight2) !edges in (* Define a predicate that accepts an edge only the first time its target node is seen. *) let seen = ref Lr1.NodeSet.empty in let acceptable (_, node) = if Lr1.NodeSet.mem node !seen then false else begin seen := Lr1.NodeSet.add node !seen; true end in (* Filter the list of edges. This relies on [filter] evaluating the predicate left-to-right. *) let edges = List.filter acceptable edges in (* Augment the table. *) adjacency := Lr1.NodeMap.add source edges !adjacency ) let successors (action : int -> node -> unit) source : unit = let edges = try Lr1.NodeMap.find source !adjacency with Not_found -> assert false in List.iter (fun (weight, target) -> action weight target ) edges end in (* We are interested in determining whether the reduction graph contains simple cycles of nonpositive weight. In order to answer this question, it is sufficient (and more tractable) to consider each strongly connected component separately. *) (* Compute the strongly connected components. *) let module SCC = Tarjan.Run (struct include ReductionGraph (* Forget the edge labels. *) let successors action node = successors (fun _ target -> action target) node end) in (* Examine the components, one at a time. *) SCC.iter (fun representative elements -> match elements with | [] -> assert false | [ _ ] -> () | _ -> try (* We have a non-trivial component. [representative] is its representative, and [elements] is the list of its elements. *) (* This auxiliary function tests whether a node is a member of this component. *) let member node = Lr1.number (SCC.representative node) = Lr1.number representative in (* Build a description of this component. *) let module Clique = struct type node = Lr1.node (* Re-index the nodes. *) let n, node_to_new_index = List.fold_left (fun (n, map) node -> n + 1, Lr1.NodeMap.add node n map ) (0, Lr1.NodeMap.empty) elements let index node = try Lr1.NodeMap.find node node_to_new_index with Not_found -> assert false (* Restrict the edges to only those that remain within this component. *) let successors (action : int -> node -> unit) node : unit = ReductionGraph.successors (fun label successor -> if member successor then action label successor ) node (* Restrict the vertices to only the elements of this component. *) let iter (action : node -> unit) : unit = List.iter action elements end in (* In the following, we perform several tests, of increasing strength and cost, to determine whether there is a dangerous cycle in the clique. *) (* Check whether at least one edge has nonpositive weight. If that is not the case, then there is clearly no dangerous cycle. *) let danger = ref false in Clique.iter (fun node -> Clique.successors (fun weight _ -> if weight <= 0 then danger := true ) node ); if not !danger then raise NoDanger; (* Check whether there is at least one edge of negative weight. If not, look for a non-trivial strongly connected component among the edges of zero weight. *) let negative = ref false in Clique.iter (fun node -> Clique.successors (fun weight _ -> if weight < 0 then negative := true ) node ); if not !negative then begin let module ZeroWeight = struct include Clique let successors action source = successors (fun weight target -> if weight = 0 then action target ) source end in let module ZeroWeightSCC = Tarjan.Run (ZeroWeight) in danger := false; ZeroWeightSCC.iter (fun _ elements -> if List.length elements > 1 then danger := true ) end; if not !danger then raise NoDanger; (* Use Floyd and Warshall's algorithm to determine if there is a dangerous cycle. *) let module NC = NonpositiveCycles.Run(Clique) in if not NC.graph_has_nonpositive_simple_cycle then raise NoDanger; (* If there might be danger, then print this clique for manual examination. *) let module PrintableClique = struct include Clique type vertex = node let name node = Printf.sprintf "s%d" (Lr1.number node) let successors (action: ?style:Dot.style -> label:string -> vertex -> unit) node : unit = successors (fun label successor -> action ~label:(string_of_int label) successor ) node let iter (action: ?style:Dot.style -> label:string -> vertex -> unit) : unit = iter (fun node -> action ~label:(name node) node ) end in let filename = Printf.sprintf "%s.%d.dot" (Terminal.print tok) (Misc.postincrement clique_count) in let c = open_out filename in let module P = Dot.Print(PrintableClique) in P.print ~orientation:Dot.Portrait ~size:(8.,5.) c; close_out c with NoDanger -> () ) let () = (* The graphs are built, and printed to .dot files, only if requested by the user via [--make-reduction-graphs]. The reduction graphs are not used by Menhir itself. *) if Settings.make_reduction_graphs then begin Terminal.iter make_reduction_graph; Printf.fprintf stderr "Constructed %d potentially interesting reduction cliques.\n" !clique_count end menhir-20130116/src/lr1.mli0000644000175000017500000001320712075533603014262 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar (* This module constructs an LR(1) automaton by following Pager's method, that is, by merging states on the fly when they are found to be (weakly) compatible. *) (* Shift/reduce conflicts are silently resolved when (and only when) that is allowed in a clean way by user-specified priorities. This includes shift/reduce/reduce conflicts when (and only when) there is agreement that the shift action should be preferred. Conflicts that cannot be silently resolved in this phase will be reported, explained, and arbitrarily resolved immediately before code generation. *) (* ------------------------------------------------------------------------- *) (* Accessors. *) (* This is the type of the automaton's nodes. *) type node module Node : Set.OrderedType with type t = node module NodeSet : Set.S with type elt = node module NodeMap : Map.S with type key = node module ImperativeNodeMap : Fix.IMPERATIVE_MAPS with type key = node (* These are the automaton's entry states, indexed by the start productions. *) val entry: node ProductionMap.t (* Nodes are numbered sequentially from [0] to [n-1]. *) val n: int val number: node -> int (* This provides access to the LR(1) state that a node stands for. *) val state: node -> Lr0.lr1state (* This converts a start node into the single item that it contains. *) val start2item: node -> Item.t (* This maps a node to its incoming symbol, that is, the symbol carried by all of the edges that enter this node. A node has zero incoming edges (and, thus, no incoming symbol) if and only if it is a start node. *) val incoming_symbol: node -> Symbol.t option (* This maps a node to its predecessors. *) val predecessors: node -> node list (* This provides access to a node's transitions and reductions. *) val transitions: node -> node SymbolMap.t val reductions: node -> Production.index list TerminalMap.t (* (New as of 2012/01/23.) This tells whether a shift/reduce conflict in this node was solved in favor of neither (%nonassoc). This implies that one must forbid a default reduction at this node. *) val forbid_default_reduction: node -> bool (* This inverts a mapping of tokens to productions into a mapping of productions to sets of tokens. *) val invert : ProductionMap.key list TerminalMap.t -> TerminalSet.t ProductionMap.t (* Computing which terminal symbols a state is willing to act upon. This function is currently unused, but could be used as part of an error reporting system. *) val acceptable_tokens: node -> TerminalSet.t (* Iteration over all nodes. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [number] above. *) val fold: ('a -> node -> 'a) -> 'a -> 'a val iter: (node -> unit) -> unit val map: (node -> 'a) -> 'a list (* Iteration over non-start nodes *) val foldx: ('a -> node -> 'a) -> 'a -> 'a val iterx: (node -> unit) -> unit (* Breadth-first iteration over all edges. See [Breadth]. *) val bfs: (bool -> node -> Symbol.t -> node -> unit) -> unit (* Iteration over all edges that carry a certain symbol. Edges are grouped in families, where all edges in a single family have the same target node. [targets f accu symbol] invokes [f accu sources target] once for every family, where [sources] are the sources of the edges in the family and [target] is their common target. *) val targets: ('a -> node list -> node -> 'a) -> 'a -> Symbol.t -> 'a (* Iteration over all nodes with conflicts. [conflicts f] invokes [f toks node] once for every node [node] with a conflict, where [toks] are the tokens involved in the conflicts at that node. *) val conflicts: (TerminalSet.t -> node -> unit) -> unit (* [reverse_dfs goal] performs a reverse depth-first search through the automaton, starting at node [goal], and marking the nodes traversed. It returns a function that tells whether a node is marked, that is, whether a path leads from that node to the goal node. *) val reverse_dfs: node -> (node -> bool) (* ------------------------------------------------------------------------- *) (* Modifications of the automaton. *) (* This function performs default conflict resolution. First, it resolves standard (shift/reduce and reduce/reduce) conflicts (thus ensuring that the automaton is deterministic) by removing some reduction actions. Second, it resolves end-of-stream conflicts by ensuring that states that have a reduce action at the pseudo-token "#" have no other action. It is called after conflicts have been explained and before code generation takes place. The automaton is modified in place. *) val default_conflict_resolution: unit -> unit menhir-20130116/src/nonTerminalDefinitionInlining.mli0000644000175000017500000000257412075533603021560 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: nonTerminalDefinitionInlining.mli,v 1.5 2005/12/01 16:20:06 regisgia Exp $ *) (** [inline g] traverses the rules of [g] and inlines the non terminal definitions that are marked with [%inline]. It returns a pair of the transformed grammar and a flag that tells whether any inlining was actually done. *) val inline: UnparameterizedSyntax.grammar -> UnparameterizedSyntax.grammar * bool menhir-20130116/src/partialGrammar.ml0000644000175000017500000006006212075533603016357 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: partialGrammar.ml,v 1.63 2006/10/22 14:07:57 fpottier Exp $ *) open Misc open Syntax open Stretch open ConcreteSyntax open InternalSyntax open Positions open Keyword (* ------------------------------------------------------------------------- *) (* This adds one declaration [decl], as found in file [filename], to the grammar [grammar]. *) let join_declaration filename (grammar : grammar) decl = match decl.value with (* Preludes are stored in an arbitrary order. The order of preludes within a single source file is preserved. Same treatment for functor parameters. *) | DCode code -> { grammar with p_preludes = grammar.p_preludes @ [ code ] } | DParameter (Stretch.Declared stretch) -> { grammar with p_parameters = grammar.p_parameters @ [ stretch ] } | DParameter (Stretch.Inferred _) -> assert false (* Token declarations are recorded. Things are made somewhat difficult by the fact that %token and %left-%right-%nonassoc declarations are independent. *) | DToken (ocamltype, terminal) -> let token_property = try (* Retrieve any previous definition for this token. *) let token_property = StringMap.find terminal grammar.p_tokens in (* If the previous definition was actually a %token declaration (as opposed to a %left, %right, or %nonassoc specification), signal an error. *) if token_property.tk_is_declared then Error.errorp decl (Printf.sprintf "the token %s has multiple definitions." terminal) (* Otherwise, update the previous definition. *) else { token_property with tk_is_declared = true; tk_ocamltype = ocamltype; tk_filename = filename; tk_position = decl.position; } with Not_found -> (* If no previous definition exists, create one. *) { tk_filename = filename; tk_ocamltype = ocamltype; tk_associativity = UndefinedAssoc; tk_priority = UndefinedPrecedence; tk_position = decl.position; tk_is_declared = true } in { grammar with p_tokens = StringMap.add terminal token_property grammar.p_tokens } (* Start symbols. *) | DStart nonterminal -> { grammar with p_start_symbols = StringMap.add nonterminal decl.position grammar.p_start_symbols } (* Type declarations for nonterminals. *) | DType (ocamltype, nonterminal) -> { grammar with p_types = (nonterminal, with_pos (position decl) ocamltype)::grammar.p_types } (* Token associativity and precedence. *) | DTokenProperties (terminal, assoc, prec) -> (* Retrieve the property record for this token, creating one if none existed (but without deeming the token to have been declared). *) let token_properties, grammar = try StringMap.find terminal grammar.p_tokens, grammar with Not_found -> let p = { tk_filename = filename; tk_ocamltype = None; tk_associativity = UndefinedAssoc; tk_priority = prec; tk_is_declared = false; (* Will be updated later. *) tk_position = decl.position; } in p, { grammar with p_tokens = StringMap.add terminal p grammar.p_tokens } in (* Reject duplicate precedence declarations. *) if token_properties.tk_associativity <> UndefinedAssoc then Error.error [ decl.position; token_properties.tk_position ] (Printf.sprintf "there are multiple precedence declarations for token %s." terminal); (* Record the new declaration. *) token_properties.tk_priority <- prec; token_properties.tk_associativity <- assoc; grammar (* ------------------------------------------------------------------------- *) (* This stores an optional trailer into a grammar. Trailers are stored in an arbitrary order. *) let join_trailer trailer grammar = match trailer with | None -> grammar | Some trailer -> { grammar with p_postludes = trailer :: grammar.p_postludes } (* ------------------------------------------------------------------------- *) (* We rewrite definitions when nonterminals are renamed. The renaming [phi] is an association list of names to names. *) type renaming = (nonterminal * nonterminal) list let identity_renaming = [] let rewrite_nonterminal (phi : renaming) nonterminal = Misc.support_assoc phi nonterminal let rewrite_parameter phi parameter = Parameters.map (Positions.map (Misc.support_assoc phi)) parameter let rewrite_element phi (ido, parameter) = ido, rewrite_parameter phi parameter let rewrite_branch phi ({ pr_producers = producers } as branch) = { branch with pr_producers = List.map (rewrite_element phi) producers } let rewrite_branches phi branches = match phi with | [] -> branches | _ -> List.map (rewrite_branch phi) branches let fresh_counter = ref 0 let names = ref StringSet.empty let use_name name = names := StringSet.add name !names let used_name name = StringSet.mem name !names let rec fresh ?(hint = "v") () = let name = incr fresh_counter; hint ^ string_of_int !fresh_counter in if used_name name then fresh ~hint () else ( use_name name; name ) (* Alpha conversion of [prule]. We rename bound parameters using fresh names. *) let alphaconvert_rule parameters prule = let phi = List.combine parameters (List.map (fun x -> fresh ~hint:x ()) parameters) in { prule with pr_parameters = List.map (Misc.support_assoc phi) prule.pr_parameters; pr_branches = rewrite_branches phi prule.pr_branches } (* Rewrite a rule taking bounded names into account. We rename parameters to avoid capture. *) let rewrite_rule phi prule = let ids = List.fold_left (fun acu (f, d) -> StringSet.add f (StringSet.add d acu)) StringSet.empty phi in let captured_parameters = List.filter (fun p -> StringSet.mem p ids) prule.pr_parameters in let prule = alphaconvert_rule captured_parameters prule in { prule with pr_nt = rewrite_nonterminal phi prule.pr_nt; pr_branches = rewrite_branches phi prule.pr_branches } let rewrite_rules phi rules = List.map (rewrite_rule phi) rules let rewrite_grammar phi grammar = (* We assume that [phi] affects only private symbols, so it does not affect the start symbols. *) if phi = identity_renaming then grammar else { grammar with pg_rules = rewrite_rules phi grammar.pg_rules } (* ------------------------------------------------------------------------- *) (* To rename (internalize) a nonterminal, we prefix it with its filename. This guarantees that names are unique. *) let is_valid_nonterminal_character = function | 'A' .. 'Z' | 'a' .. 'z' | '_' | '\192' .. '\214' | '\216' .. '\246' | '\248' .. '\255' | '0' .. '9' -> true | _ -> false let restrict filename = let m = String.copy (Filename.chop_suffix filename (if Settings.coq then ".vy" else ".mly")) in for i = 0 to String.length m - 1 do if not (is_valid_nonterminal_character m.[i]) then m.[i] <- '_' done; m let rename nonterminal filename = let name = restrict filename ^ "_" ^ nonterminal in if used_name name then fresh ~hint:name () else (use_name name; name) (* ------------------------------------------------------------------------- *) (* A nonterminal is considered public if it is declared using %public or %start. *) let is_public grammar prule = prule.pr_public_flag || StringMap.mem prule.pr_nt grammar.p_start_symbols (* ------------------------------------------------------------------------- *) type symbol_kind = (* The nonterminal is declared public at a particular position. *) | PublicNonTerminal of Positions.t (* The nonterminal is not declared public at a particular position. *) | PrivateNonTerminal of Positions.t (* The symbol is a token. *) | Token of token_properties (* We do not know yet what does the symbol means. This is defined in the sequel or it is free in the partial grammar. *) | DontKnow of Positions.t type symbol_table = (symbol, symbol_kind) Hashtbl.t let find_symbol symbols symbol = Hashtbl.find symbols symbol let add_in_symbol_table symbols symbol kind = use_name symbol; Hashtbl.add symbols symbol kind; symbols let replace_in_symbol_table symbols symbol kind = Hashtbl.replace symbols symbol kind; symbols let empty_symbol_table () = Hashtbl.create 13 let store_symbol symbols symbol kind = try let sym_info = find_symbol symbols symbol in match sym_info, kind with (* There are two definitions of the same symbol in one particular unit. This is forbidden. *) | (PublicNonTerminal p | PrivateNonTerminal p), (PublicNonTerminal p' | PrivateNonTerminal p') -> Error.error [ p; p'] (Printf.sprintf "the nonterminal symbol %s is multiply defined." symbol) (* The symbol is known to be a token but declared as a non terminal.*) | (Token tkp, (PrivateNonTerminal p | PublicNonTerminal p)) | ((PrivateNonTerminal p | PublicNonTerminal p), Token tkp) -> Error.error [ p; tkp.tk_position ] (Printf.sprintf "The identifier %s is a reference to a token." symbol) (* We do not gain any piece of information. *) | _, DontKnow _ | Token _, Token _ -> symbols (* We learn that the symbol is a non terminal or a token. *) | DontKnow _, _ -> replace_in_symbol_table symbols symbol kind with Not_found -> add_in_symbol_table symbols symbol kind let store_used_symbol position tokens symbols symbol = try store_symbol symbols symbol (Token (StringMap.find symbol tokens)) with Not_found -> store_symbol symbols symbol (DontKnow position) let non_terminal_is_not_reserved symbol positions = if symbol = "error" then Error.error positions (Printf.sprintf "%s is reserved and thus cannot be used \ as a non-terminal symbol." symbol) let non_terminal_is_not_a_token tokens symbol positions = try let tkp = StringMap.find symbol tokens in Error.error (positions @ [ tkp.tk_position ]) (Printf.sprintf "The identifier %s is a reference to a token." symbol) with Not_found -> () let store_public_nonterminal tokens symbols symbol positions = non_terminal_is_not_reserved symbol positions; non_terminal_is_not_a_token tokens symbol positions; store_symbol symbols symbol (PublicNonTerminal (List.hd positions)) let store_private_nonterminal tokens symbols symbol positions = non_terminal_is_not_reserved symbol positions; non_terminal_is_not_a_token tokens symbol positions; store_symbol symbols symbol (PrivateNonTerminal (List.hd positions)) let string_of_kind = function | PublicNonTerminal p -> Printf.sprintf "public (%s)" (Positions.string_of_pos p) | PrivateNonTerminal p -> Printf.sprintf "private (%s)" (Positions.string_of_pos p) | Token tk -> Printf.sprintf "token (%s)" tk.tk_filename | DontKnow p -> Printf.sprintf "only used at (%s)" (Positions.string_of_pos p) let string_of_symbol_table t = let b = Buffer.create 13 in let m = 1 + Hashtbl.fold (fun k v acu -> max (String.length k) acu) t 0 in let fill_blank s = let s' = String.make m ' ' in String.blit s 0 s' 0 (String.length s); s' in Hashtbl.iter (fun k v -> Buffer.add_string b (Printf.sprintf "%s: %s\n" (fill_blank k) (string_of_kind v))) t; Buffer.contents b let is_private_symbol t x = try match Hashtbl.find t x with | PrivateNonTerminal _ -> true | _ -> false with Not_found -> false let is_public_symbol t x = try match Hashtbl.find t x with | PublicNonTerminal _ -> true | _ -> false with Not_found -> false let fold_on_private_symbols f init t = Hashtbl.fold (fun k -> function PrivateNonTerminal _ -> (fun acu -> f acu k) | _ -> (fun acu -> acu)) t init let fold_on_public_symbols f init t = Hashtbl.fold (fun k -> function PublicNonTerminal _ -> (fun acu -> f acu k) | _ -> (fun acu -> acu)) t init let iter_on_only_used_symbols f t = Hashtbl.iter (fun k -> function DontKnow pos -> f k pos | _ -> ()) t let symbols_of grammar (pgrammar : ConcreteSyntax.grammar) = let tokens = grammar.p_tokens in let symbols_of_rule symbols prule = let rec store_except_rule_parameters = fun symbols (symbol, parameters) -> (* Rule parameters are bound locally, so they are not taken into account. *) if List.mem symbol.value prule.pr_parameters then symbols else (* Otherwise, mark this symbol as being used and analyse its parameters. *) List.fold_left (fun symbols -> function | ParameterApp (symbol, parameters) -> store_except_rule_parameters symbols (symbol, parameters) | ParameterVar symbol -> store_except_rule_parameters symbols (symbol, []) ) (store_used_symbol symbol.position tokens symbols symbol.value) parameters in (* Analyse each branch. *) let symbols = List.fold_left (fun symbols branch -> List.fold_left (fun symbols (_, p) -> let symbol, parameters = Parameters.unapp p in store_except_rule_parameters symbols (symbol, parameters) ) symbols branch.pr_producers ) symbols prule.pr_branches in (* Store the symbol declaration. *) if prule.pr_public_flag || StringMap.mem prule.pr_nt grammar.p_start_symbols then store_public_nonterminal tokens symbols prule.pr_nt prule.pr_positions else store_private_nonterminal tokens symbols prule.pr_nt prule.pr_positions in List.fold_left symbols_of_rule (empty_symbol_table ()) pgrammar.pg_rules let merge_rules tokens symbols pgs = (* Retrieve all the public symbols. *) let public_symbols = List.fold_left (fold_on_public_symbols (fun s k -> StringSet.add k s)) (StringSet.singleton "error") symbols in (* We check the references in each grammar can be bound to a public symbol. *) let _ = List.iter (iter_on_only_used_symbols (fun k pos -> if not (StringSet.mem k public_symbols) then Error.error [ pos ] (Printf.sprintf "%s is undefined." k))) symbols in (* Detect private symbol clashes and rename them if necessary. *) let detect_private_symbol_clashes = fold_on_private_symbols (fun (defined, clashes) symbol -> if StringSet.mem symbol defined || StringSet.mem symbol public_symbols then (defined, StringSet.add symbol clashes) else (StringSet.add symbol defined, clashes)) in let private_symbols, clashes = List.fold_left detect_private_symbol_clashes (StringSet.empty, StringSet.empty) symbols in let rpgs = List.map (fun (symbol_table, pg) -> let renaming = StringSet.fold (fun x phi -> if is_private_symbol symbol_table x then begin let x' = rename x pg.pg_filename in Printf.fprintf stderr "Note: the nonterminal symbol %s (from %s) is renamed %s.\n" x pg.pg_filename x'; (x, x') :: phi end else phi) clashes [] in rewrite_grammar renaming pg) pgs in (* Merge public nonterminal definitions and copy private nonterminal definitions. Since the clash between private symbols have already been resolved, these copies are safe. *) List.fold_left (fun rules rpg -> List.fold_left (fun rules r -> let r = try let r' = StringMap.find r.pr_nt rules in let positions = r.pr_positions @ r'.pr_positions in let ra, ra' = List.length r.pr_parameters, List.length r'.pr_parameters in (* The arity of the parameterized symbols must be constant.*) if ra <> ra' then Error.error positions (Printf.sprintf "symbol %s is defined with arities %d and %d." r.pr_nt ra ra') else if r.pr_inline_flag <> r'.pr_inline_flag then Error.error positions (Printf.sprintf "not all definitions of %s are marked %%inline." r.pr_nt) else (* We combine the different branches. The parameters could have different names, we rename them with the fresh names assigned earlier (see the next comment). *) let phi = List.combine r.pr_parameters r'.pr_parameters in let rbr = rewrite_branches phi r.pr_branches in { r' with pr_positions = positions; pr_branches = rbr @ r'.pr_branches } with Not_found -> (* We alphaconvert the rule in order to avoid the capture of private symbols coming from another unit. *) alphaconvert_rule r.pr_parameters r in StringMap.add r.pr_nt r rules) rules rpg.pg_rules) StringMap.empty rpgs let empty_grammar = { p_preludes = []; p_postludes = []; p_parameters = []; p_start_symbols = StringMap.empty; p_types = []; p_tokens = StringMap.empty; p_rules = StringMap.empty } let join grammar pgrammar = let filename = pgrammar.pg_filename in List.fold_left (join_declaration filename) grammar pgrammar.pg_declarations $$ join_trailer pgrammar.pg_trailer (* Check that the $i's are consistent, that is, that they are within bounds and that they are not used when symbols are explicitly named. Check also that no two symbols carry the same name. *) let check_keywords grammar producers action = let length = List.length producers in List.iter (function keyword -> match Positions.value keyword with | Dollar i | Position (RightDollar i, _, _) -> if i < 1 || i > length then Error.errorp keyword (Printf.sprintf "$%d refers to a nonexistent symbol." i); let ido, param = List.nth producers (i - 1) in begin match ido with | Some { value = id } -> Error.errorp keyword (Printf.sprintf "please do not say: $%d. Instead, say: %s." i id) | None -> () end | Position (RightNamed id, _, _) -> let found = ref false in List.iter (fun (ido, param) -> match ido with | Some { value = id' } when id = id' -> found := true | _ -> () ) producers; if not !found then Error.errorp keyword (Printf.sprintf "%s refers to a nonexistent symbol." id) | Position (Left, _, _) | PreviousError | SyntaxError -> () ) (Action.pkeywords action) let check_parameterized_grammar_is_well_defined grammar = (* Every start symbol is defined and has a %type declaration. *) StringMap.iter (fun nonterminal p -> if not (StringMap.mem nonterminal grammar.p_rules) then Error.error [p] (Printf.sprintf "the start symbol %s is undefined." nonterminal); if not (List.exists (function | ParameterVar { value = id }, _ -> id = nonterminal | _ -> false) grammar.p_types) then Error.error [p] (Printf.sprintf "the type of the start symbol %s is unspecified." nonterminal); ) grammar.p_start_symbols; let rec parameter_head_symb = function | ParameterVar id -> id | ParameterApp (id, _) -> id in List.iter (fun (symbol, _) -> let head_symb = parameter_head_symb symbol in if not (StringMap.mem (value head_symb) grammar.p_rules) then Error.errorp (Parameters.with_pos symbol) (Printf.sprintf "this is a terminal symbol.\n\ %%type declarations are applicable only to nonterminal symbols.")) grammar.p_types; (* Every reference to a symbol is well defined. *) let reserved = [ "error" ] in let used_tokens = ref StringSet.empty in let mark_token_as_used token = used_tokens := StringSet.add token !used_tokens in let check_identifier_reference grammar prule s p = (* Mark the symbol as a used token if this is a token. *) if StringMap.mem s grammar.p_tokens then mark_token_as_used s; if not (StringMap.mem s grammar.p_rules || StringMap.mem s grammar.p_tokens || List.mem s prule.pr_parameters || List.mem s reserved) then Error.error [ p ] (Printf.sprintf "%s is undefined." s) in StringMap.iter (fun k prule -> List.iter (* Check each branch. *) (fun { pr_producers = producers; pr_branch_shift_precedence = sprec; pr_action = action } -> ignore (List.fold_left (* Check the producers. *) (fun already_seen (id, p) -> let symbol, parameters = Parameters.unapp p in let s = symbol.value and p = symbol.position in let already_seen = match id with None -> already_seen | Some id -> (* Check the producer id is unique. *) if StringSet.mem id.value already_seen then Error.error [ id.position ] (Printf.sprintf "there are multiple producers named %s in this sequence." id.value); StringSet.add id.value already_seen in (* Check that the producer is defined somewhere. *) check_identifier_reference grammar prule s p; StringMap.iter (check_identifier_reference grammar prule) (List.fold_left Parameters.identifiers StringMap.empty parameters); (* Check the %prec is a valid reference to a token. *) (try if not ((StringMap.find s grammar.p_tokens).tk_is_declared || List.mem s reserved) then Error.errorp symbol (Printf.sprintf "%s has not been declared as a token." s) with Not_found -> ()); already_seen ) StringSet.empty producers); check_keywords grammar producers action; match sprec with | None -> () | Some terminal -> check_identifier_reference grammar prule terminal.value terminal.position; (* It is forbidden to use the %prec directive with %inline. *) if prule.pr_inline_flag then Error.errorp terminal "use of %prec is forbidden in an %inlined nonterminal definition."; (* Furthermore, the symbol following %prec must be a valid token identifier. *) if not (StringMap.mem terminal.value grammar.p_tokens) then Error.errorp terminal (Printf.sprintf "%s is undefined." terminal.value)) prule.pr_branches; (* It is forbidden to use %inline on a %start symbol. *) if (prule.pr_inline_flag && StringMap.mem k grammar.p_start_symbols) then Error.error prule.pr_positions (Printf.sprintf "%s cannot be both a start symbol and inlined." k); ) grammar.p_rules; (* Check that every token is used. *) begin match Settings.token_type_mode with | Settings.TokenTypeOnly -> () | Settings.TokenTypeAndCode | Settings.CodeOnly _ -> StringMap.iter (fun token { tk_position = p } -> if not (StringSet.mem token !used_tokens) then Error.warning [p] (Printf.sprintf "the token %s is unused." token) ) grammar.p_tokens end; grammar let join_partial_grammars pgs = let grammar = List.fold_left join empty_grammar pgs in let symbols = List.map (symbols_of grammar) pgs in let tpgs = List.combine symbols pgs in let rules = merge_rules grammar.p_tokens symbols tpgs in check_parameterized_grammar_is_well_defined { grammar with p_rules = rules } menhir-20130116/src/breadth.mli0000644000175000017500000000377412075533603015205 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module implements generic breadth-first search over a graph with labeled edges. *) module Make (G : sig (* This is the type of graph vertices. *) type vertex (* This is the type of graph labels. *) type label (* These allow marking a vertex and checking whether it is marked. *) val set_mark: vertex -> Mark.t -> unit val get_mark: vertex -> Mark.t (* This is an iterator over the graph's entry vertices. *) val entry: (vertex -> unit) -> unit (* This provides access to a vertex' successors. *) val successors: (label -> vertex -> unit) -> vertex -> unit end) : sig (* [search f] invokes [f discovery v label v'] once for every edge from vertex [v] to vertex [v'] carrying label [label]. Vertices [v'] are presented breadth-first. The flag [discovery] tells whether the edge is a discovery edge, that is, whether it belongs to the spanning forest of shortest paths that is being built. *) val search: (bool -> G.vertex -> G.label -> G.vertex -> unit) -> unit end menhir-20130116/src/error.ml0000644000175000017500000000724412075533603014550 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Printf open Lexing (* TEMPORARY Vrifier que les messages d'erreur sont standardiss au maximum, localiss au maximum. Supprimer autant de fonctions que possible dans ce module. *) (* TEMPORARY reprendre compl`etement implementation et interface de ce module *) (* ---------------------------------------------------------------------------- *) (* Global state. *) let get_initialized_ref ref = match !ref with | None -> assert false | Some contents -> contents let filename = ref (None : string option) let filemark = ref Mark.none (* 2011/10/19: do not use [Filename.basename]. The [#] annotations that we insert in the [.ml] file must retain their full path. This does mean that the [#] annotations depend on how menhir is invoked -- e.g. [menhir foo/bar.mly] and [cd foo && menhir bar.mly] will produce different files. Nevertheless, this seems useful/reasonable. *) (* This also influences the type error messages produced by [--infer]. *) let set_filename name = filename := Some name; filemark := Mark.fresh() let get_filename () = get_initialized_ref filename let get_filemark () = !filemark let file_contents = ref (None : string option) let get_file_contents () = get_initialized_ref file_contents (* ---------------------------------------------------------------------------- *) (* Logging and log levels. *) let log kind verbosity msg = if kind >= verbosity then Printf.fprintf stderr "%t%!" msg let logG = log Settings.logG let logA = log Settings.logA let logC = log Settings.logC (* ---------------------------------------------------------------------------- *) (* Errors and warnings. *) let errors = ref false let printN positions message = List.iter (fun position -> fprintf stderr "%s:\n" (Positions.string_of_pos position) ) positions; fprintf stderr "%s\n%!" message let error_message message = "Error: " ^ message let error positions message = printN positions (error_message message); exit 1 let errorp v message = error [ Positions.position v ] message let signal positions message = printN positions message; errors := true let warning positions message = printN positions (Printf.sprintf "Warning: %s" message) let errors () = !errors (* Certain warnings about the grammar can optionally be treated as errors. The following function emits a warning or error message, via [warning] or [signal]. It does not stop the program; the client must at some point call [errors] and stop the program if any errors have been reported. *) let grammar_warning positions message = if Settings.strict then signal positions (error_message message) else warning positions message menhir-20130116/src/unparameterizedPrinter.ml0000644000175000017500000001420112075533603020151 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Positions open Misc open Syntax open Stretch open UnparameterizedSyntax open Settings let print_preludes f g = List.iter (fun prelude -> Printf.fprintf f "%%{%s%%}\n" prelude.stretch_raw_content ) g.preludes let print_start_symbols b g = StringSet.iter (fun symbol -> Printf.fprintf b "%%start %s\n" (Misc.normalize symbol) ) g.start_symbols let rec insert_in_partitions item m = function | [] -> [ (m, [ item ]) ] | (m', items) :: partitions when Mark.same m m' -> (m', item :: items) :: partitions | t :: partitions -> t :: (insert_in_partitions item m partitions) let insert (undefined, partitions) = function | (item, UndefinedPrecedence) -> ((item, 0) :: undefined, partitions) | (item, PrecedenceLevel (m, v, _, _)) -> (undefined, insert_in_partitions (item, v) m partitions) let print_ocamltype ocamltype = Printf.sprintf " <%s>" ( match ocamltype with | Declared stretch -> stretch.stretch_raw_content | Inferred t -> t ) let print_assoc = function | LeftAssoc -> Printf.sprintf "%%left" | RightAssoc -> Printf.sprintf "%%right" | NonAssoc -> Printf.sprintf "%%nonassoc" | UndefinedAssoc -> "" let print_tokens mode b g = (* Sort tokens wrt precedence. *) let undefined, partition_tokens = StringMap.fold (fun token prop acu -> insert acu (token, prop.tk_priority) ) g.tokens ([], []) in let ordered_tokens = List.fold_left (fun acu (_, ms) -> acu @ List.sort (fun (_, v) (_, v') -> compare v v') ms ) undefined partition_tokens in List.iter (fun (token, _) -> let prop = StringMap.find token g.tokens in if prop.tk_is_declared then Printf.fprintf b "%%token%s %s\n" begin match mode with | PrintNormal | PrintUnitActions -> Misc.o2s prop.tk_ocamltype print_ocamltype | PrintUnitActionsUnitTokens -> "" (* omitted ocamltype after %token means *) end token ) ordered_tokens; ignore (List.fold_left (fun last_prop (token, v) -> let prop = StringMap.find token g.tokens in match last_prop with | None -> if prop.tk_associativity = UndefinedAssoc then None else ( Printf.fprintf b "%s %s " (print_assoc prop.tk_associativity) token; Some v) | Some v' when v <> v' -> if prop.tk_associativity = UndefinedAssoc then None else ( Printf.fprintf b "\n%s %s " (print_assoc prop.tk_associativity) token; Some v) | Some v' -> Printf.fprintf b "%s " token; last_prop ) None ordered_tokens); Printf.fprintf b "\n" let print_types mode b g = StringMap.iter (fun symbol ty -> Printf.fprintf b "%%type%s %s\n" begin match mode with | PrintNormal -> print_ocamltype ty | PrintUnitActions | PrintUnitActionsUnitTokens -> " " end (Misc.normalize symbol) ) g.types let binding mode id = match mode with | PrintNormal -> id ^ " = " | PrintUnitActions | PrintUnitActionsUnitTokens -> "" let string_of_producer mode (symbol, ido) = Misc.o2s ido (binding mode) ^ (Misc.normalize symbol) let print_branch mode f branch = Printf.fprintf f "%s%s\n {" (String.concat " " (List.map (string_of_producer mode) branch.producers)) (Misc.o2s branch.branch_shift_precedence (fun x -> " %prec "^x.value)); begin match mode with | PrintNormal -> Action.print f branch.action | PrintUnitActions | PrintUnitActionsUnitTokens -> Printf.fprintf f "()" end; Printf.fprintf f "}\n" let print_trailers b g = List.iter (Printf.fprintf b "%s\n") g.postludes let branches_order r r' = let branch_order b b' = match b.branch_reduce_precedence, b'.branch_reduce_precedence with | UndefinedPrecedence, _ | _, UndefinedPrecedence -> 0 | PrecedenceLevel (m, l, _, _), PrecedenceLevel (m', l', _, _) -> if Mark.same m m' then if l < l' then -1 else if l > l' then 1 else 0 else 0 in let rec lexical_order bs bs' = match bs, bs' with | [], [] -> 0 | [], _ -> -1 | _, [] -> 1 | b :: bs, b' :: bs' -> match branch_order b b' with | 0 -> lexical_order bs bs' | x -> x in lexical_order r.branches r'.branches let print_rules mode b g = let rules_as_list = StringMap.fold (fun nt r acu -> (nt, r) :: acu) g.rules [] in let ordered_rules = List.sort (fun (nt, r) (nt', r') -> branches_order r r') rules_as_list in List.iter (fun (nt, r) -> Printf.fprintf b "\n%s:\n" (Misc.normalize nt); List.iter (fun br -> Printf.fprintf b "| "; print_branch mode b br ) r.branches ) ordered_rules let print mode f g = begin match mode with | PrintNormal -> print_preludes f g | PrintUnitActions | PrintUnitActionsUnitTokens -> () end; print_start_symbols f g; print_tokens mode f g; print_types mode f g; Printf.fprintf f "%%%%\n"; print_rules mode f g; Printf.fprintf f "\n%%%%\n"; begin match mode with | PrintNormal -> print_trailers f g | PrintUnitActions | PrintUnitActionsUnitTokens -> () end menhir-20130116/src/option.mli0000644000175000017500000000217012075533603015071 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) val map: ('a -> 'b) -> 'a option -> 'b option val iter: ('a -> unit) -> 'a option -> unit val fold: ('a -> 'b -> 'b) -> 'a option -> 'b -> 'b menhir-20130116/src/printer.ml0000644000175000017500000004026112075533603015076 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* A pretty-printer for [IL]. *) open IL open Printf module Make (X : sig (* This is the channel that is being written to. *) val f: out_channel (* If [raw_stretch_action] is set, then we print the semantic actions as they are found into the original source code. *) val raw_stretch_action: bool (* This controls the way we print Objective Caml stretches (types and semantic actions). We either surround them with #line directives (for better error reports if the generated code is ill - typed) or don't (for better readability). The value is either [None] -- do not provide #line directives -- or [Some filename] -- do provide them. [filename] is the name of the file that is being written to. *) val locate_stretches: string option end) = struct (* ------------------------------------------------------------------------- *) (* Dealing with newlines and indentation. *) let maxindent = 120 let whitespace = String.make maxindent ' ' let indentation = ref 0 let line = ref 1 (* [rawnl] is, in principle, the only place where writing a newline character to the output channel is permitted. This ensures that the line counter remains correct. But see also [stretch] and [typ0]. *) let rawnl f = incr line; output_char f '\n' let nl f = rawnl f; output f whitespace 0 !indentation let indent ofs producer f x = let old_indentation = !indentation in let new_indentation = old_indentation + ofs in if new_indentation <= maxindent then indentation := new_indentation; nl f; producer f x; indentation := old_indentation (* This produces a #line directive. *) let sharp f line file = fprintf f "%t# %d \"%s\"%t" rawnl line file rawnl (* ------------------------------------------------------------------------- *) (* Printers of atomic elements. *) let nothing f = () let space f = output_char f ' ' let comma f = output_string f ", " let seminl f = output_char f ';'; nl f let times f = output_string f " * " let letrec f = output_string f "let rec " let letnonrec f = output_string f "let " let keytyp f = output_string f "type " let exc f = output_string f "exception " let et f = output_string f "and " let var f x = output_string f x let bar f = output_string f " | " (* ------------------------------------------------------------------------- *) (* List printers. *) let rec list elem sep f = function | [] -> () | e :: es -> fprintf f "%t%a%a" sep elem e (list elem sep) es let rec typeparams p0 p1 f = function | [] -> () | [ param ] -> fprintf f "%a " p0 param | param :: params -> fprintf f "(%a%a) " p1 param (list p1 comma) params (* ------------------------------------------------------------------------- *) (* Expression printer. *) (* We use symbolic constants that stand for subsets of the expression constructors. We do not use numeric levels to stand for subsets, because our subsets do not form a linear inclusion chain. *) type subset = | All | AllButSeq | AllButFunTryMatch | AllButFunTryMatchSeq | AllButLetFunTryMatch | AllButLetFunTryMatchSeq | AllButIfThen | AllButIfThenSeq | OnlyAppOrAtom | OnlyAtom (* This computes the intersection of a subset with the constraint "should not be a sequence". *) let andNotSeq = function | All | AllButSeq -> AllButSeq | AllButFunTryMatch | AllButFunTryMatchSeq -> AllButFunTryMatchSeq | AllButLetFunTryMatch | AllButLetFunTryMatchSeq -> AllButLetFunTryMatchSeq | AllButIfThen | AllButIfThenSeq -> AllButIfThenSeq | OnlyAppOrAtom -> OnlyAppOrAtom | OnlyAtom -> OnlyAtom (* This defines the semantics of subsets by relating expressions with subsets. *) let rec member e k = match e with | EComment _ | EPatComment _ -> true | EFun _ | ETry _ | EMatch _ -> begin match k with | AllButFunTryMatch | AllButFunTryMatchSeq | AllButLetFunTryMatch | AllButLetFunTryMatchSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | ELet ([], e) -> member e k | ELet ((PUnit, _) :: _, _) -> begin match k with | AllButSeq | AllButFunTryMatchSeq | AllButLetFunTryMatchSeq | AllButIfThenSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | ELet (_ :: _, _) -> begin match k with | AllButLetFunTryMatch | AllButLetFunTryMatchSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | EIfThen _ -> begin match k with | AllButIfThen | AllButIfThenSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | EApp (_, _ :: _) | EData (_, _ :: _) | EMagic _ | ERepr _ | ERaise _ -> begin match k with | OnlyAtom -> false | _ -> true end | ERecordWrite _ | EIfThenElse _ -> begin match k with | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | EVar _ | ETextual _ | EApp (_, []) | EData (_, []) | ETuple _ | EAnnot _ | ERecord _ | ERecordAccess (_, _) | EIntConst _ | EMaxInt | EStringConst _ | EUnit | EArray _ | EArrayAccess (_, _) -> true let rec exprlet k pes f e2 = match pes with | [] -> exprk k f e2 | (PUnit, e1) :: pes -> fprintf f "%a%t%a" (exprk AllButLetFunTryMatch) e1 seminl (exprlet k pes) e2 | (PVar id1, EAnnot (e1, ts1)) :: pes -> (* TEMPORARY current ocaml does not support type schemes here; drop quantifiers, if any *) fprintf f "let %s : %a = %a in%t%a" id1 typ ts1.body (* scheme ts1 *) expr e1 nl (exprlet k pes) e2 | (PVar id1, EFun (ps1, e1)) :: pes -> fprintf f "let %s%a = %a in%t%t%a" id1 (list pat0 space) ps1 (indent 2 expr) e1 nl nl (exprlet k pes) e2 | (p1, (ELet _ as e1)) :: pes -> fprintf f "let %a =%a%tin%t%a" pat p1 (indent 2 expr) e1 nl nl (exprlet k pes) e2 | (p1, e1) :: pes -> fprintf f "let %a = %a in%t%a" pat p1 expr e1 nl (exprlet k pes) e2 and atom f e = exprk OnlyAtom f e and app f e = exprk OnlyAppOrAtom f e and expr f e = exprk All f e and exprk k f e = if member e k then match e with | EComment (c, e) -> if Settings.comment then fprintf f "(* %s *)%t%a" c nl (exprk k) e else exprk k f e | EPatComment (s, p, e) -> if Settings.comment then fprintf f "(* %s%a *)%t%a" s pat p nl (exprk k) e else exprk k f e | ELet (pes, e2) -> exprlet k pes f e2 | ERecordWrite (e1, field, e2) -> fprintf f "%a.%s <- %a" atom e1 field (exprk (andNotSeq k)) e2 | EMatch (e, []) -> assert false | EMatch (e, brs) -> fprintf f "match %a with%a" expr e (branches k) brs | ETry (_, []) -> assert false | ETry (e, brs) -> fprintf f "try%a%twith%a" (indent 2 expr) e nl (branches k) brs | EIfThen (e1, e2) -> fprintf f "if %a then%a" expr e1 (indent 2 (exprk (andNotSeq k))) e2 | EIfThenElse (e0, e1, e2) -> fprintf f "if %a then%a%telse%a" expr e0 (indent 2 (exprk AllButIfThenSeq)) e1 nl (indent 2 (exprk (andNotSeq k))) e2 | EFun (ps, e) -> fprintf f "fun%a ->%a" (list pat0 space) ps (indent 2 (exprk k)) e | EApp (EVar op, [ e1; e2 ]) when op.[0] = '(' && op.[String.length op - 1] = ')' -> let op = String.sub op 1 (String.length op - 2) in fprintf f "%a %s %a" app e1 op app e2 | EApp (e, args) -> fprintf f "%a%a" app e (list atom space) args | ERaise e -> fprintf f "raise %a" atom e | EMagic e -> fprintf f "Obj.magic %a" atom e | ERepr e -> fprintf f "Obj.repr %a" atom e | EData (d, []) -> var f d | EData (d, [ arg ]) -> fprintf f "%s %a" d atom arg | EData (d, arg :: args) -> fprintf f "%s (%a%a)" d app arg (list app comma) args | EVar v -> var f v | ETextual action -> stretch (X.raw_stretch_action) f action | EUnit -> fprintf f "()" | EIntConst k -> if k >= 0 then fprintf f "%d" k else fprintf f "(%d)" k | EMaxInt -> fprintf f "max_int" | EStringConst s -> fprintf f "\"%s\"" (String.escaped s) | ETuple [] -> assert false | ETuple [ e ] -> atom f e | ETuple (e :: es) -> fprintf f "(%a%a)" app e (list app comma) es | EAnnot (e, s) -> (* TEMPORARY current ocaml does not support type schemes here; drop quantifiers, if any *) fprintf f "(%a : %a)" app e typ s.body (* should be scheme s *) | ERecordAccess (e, field) -> fprintf f "%a.%s" atom e field | ERecord fs -> fprintf f "{%a}" (indent 2 (list field nothing)) fs | EArray fs -> fprintf f "[|%a|]" (indent 2 (list array_field nothing)) fs | EArrayAccess (e, i) -> fprintf f "%a.(%a)" atom e expr i else fprintf f "(%a)" expr e and stretch raw f stretch = let content = stretch.Stretch.stretch_content and raw_content = stretch.Stretch.stretch_raw_content in match X.locate_stretches with | Some basename -> sharp f stretch.Stretch.stretch_linenum stretch.Stretch.stretch_filename; output_string f content; line := !line + stretch.Stretch.stretch_linecount; sharp f (!line + 2) basename; output f whitespace 0 !indentation | None -> output_string f (if raw then raw_content else content) and branches k f = function | [] -> () | [ br ] -> fprintf f "%t| %a" nl (branch k) br | br :: brs -> fprintf f "%t| %a%a" nl (branch AllButFunTryMatch) br (branches k) brs and branch k f br = fprintf f "%a ->%a" pat br.branchpat (indent 4 (exprk k)) br.branchbody and field f (label, e) = fprintf f "%s = %a%t" label app e seminl and fpats f fps = list fpat nothing f fps and fpat f = function | (_, PWildcard) -> () (* in a record pattern, fields can be omitted *) | (label, p) -> fprintf f "%s = %a%t" label pat p seminl and array_field f e = fprintf f "%a%t" app e seminl and pat0 f = function | PUnit -> fprintf f "()" | PWildcard -> fprintf f "_" | PVar x -> var f x | PData (d, []) -> var f d | PTuple [] -> assert false | PTuple [ p ] -> pat0 f p | PTuple (p :: ps) -> fprintf f "(%a%a)" pat1 p (list pat1 comma) ps | PAnnot (p, t) -> fprintf f "(%a : %a)" pat p typ t | PRecord fps -> fprintf f "{%a}" (indent 2 fpats) fps | p -> fprintf f "(%a)" pat p and pat1 f = function | PData (d, [ arg ]) -> fprintf f "%s %a" d pat0 arg | PData (d, arg :: args) -> fprintf f "%s (%a%a)" d pat1 arg (list pat1 comma) args | PTuple [ p ] -> pat1 f p | p -> pat0 f p and pat2 f = function | POr [] -> assert false | POr (p :: ps) -> fprintf f "%a%a" pat2 p (list pat2 bar) ps | PTuple [ p ] -> pat2 f p | p -> pat1 f p and pat f p = pat2 f p and typevar f v = fprintf f "'%s" v and typ0 f = function | TypTextual (Stretch.Declared ocamltype) -> (* Parentheses are necessary to avoid confusion between 1 - ary data constructor with n arguments and n - ary data constructor. *) fprintf f "(%a)" (stretch true) ocamltype | TypTextual (Stretch.Inferred t) -> line := !line + LineCount.count 0 (Lexing.from_string t); fprintf f "(%s)" t | TypVar v -> typevar f v | TypApp (t, params) -> fprintf f "%a%s" (typeparams typ0 typ) params t | t -> fprintf f "(%a)" typ t and typ1 f = function | TypTuple [] -> assert false | TypTuple [ t ] -> typ1 f t | TypTuple (t :: ts) -> fprintf f "%a%a" typ0 t (list typ0 times) ts | t -> typ0 f t and typ2 f = function | TypArrow (t1, t2) -> fprintf f "%a -> %a" typ1 t1 typ2 t2 | t -> typ1 f t and typ f = typ2 f and scheme f scheme = match scheme.quantifiers with | [] -> typ f scheme.body | qs -> fprintf f "%a. %a" (list typevar space) qs typ scheme.body (* ------------------------------------------------------------------------- *) (* Toplevel definition printer. *) let datavalparams f = function | [] -> () | valparam :: valparams -> fprintf f " of %a%a" typ valparam (list typ times) valparams let datatypeparams f = function | None -> () | Some typs -> fprintf f "(* %a*)" (list typ space) typs (* TEMPORARY not great *) let datadef f def = fprintf f " | %s%a%a" def.dataname datavalparams def.datavalparams datatypeparams def.datatypeparams let fielddef f def = fprintf f " %s%s: %a" (if def.modifiable then "mutable " else "") def.fieldname scheme def.fieldtype let typerhs f = function | TDefRecord [] -> assert false | TDefRecord (field :: fields) -> fprintf f " = {%t%a%a%t}" nl fielddef field (list fielddef seminl) fields nl | TDefSum [] -> () | TDefSum defs -> fprintf f " = %a" (list datadef nl) defs | TAbbrev t -> fprintf f " = %a" typ t let typeconstraint f = function | None -> () | Some (t1, t2) -> fprintf f "%tconstraint %a = %a" nl typ t1 typ t2 let typedef f def = fprintf f "%a%s%a%a%t%t" (typeparams typevar typevar) def.typeparams def.typename typerhs def.typerhs typeconstraint def.typeconstraint nl nl let rec pdefs pdef sep1 sep2 f = function | [] -> () | def :: defs -> fprintf f "%t%a%a" sep1 pdef def (pdefs pdef sep2 sep2) defs let valdef f = function | { valpat = PVar id; valval = EAnnot (e, ts) } -> (* TEMPORARY current ocaml does not support type schemes here; drop quantifiers, if any *) fprintf f "%s : %a =%a%t%t" id typ ts.body (* scheme ts *) (indent 2 expr) e nl nl | { valpat = p; valval = e } -> fprintf f "%a =%a%t%t" pat p (indent 2 expr) e nl nl let valdefs = pdefs valdef letrec et let nonrecvaldefs = pdefs valdef letnonrec letnonrec let typedefs = pdefs typedef keytyp et let directive f = function | DirOpen s -> fprintf f "open %s%t%t" s nl nl | DirInclude s -> fprintf f "include %s%t%t" s nl nl let directives = pdefs directive nothing nothing let excdef f def = match def.exceq with | None -> fprintf f "%s%t%t" def.excname nl nl | Some s -> fprintf f "%s = %s%t%t" def.excname s nl nl let excdefs = pdefs excdef exc exc let functorparams intf body b f params = match params with | [] -> fprintf f "%a%!" body b | _ -> fprintf f "module Make%a%t%s%t%a%t%tend%t%!" (list (stretch false) nl) params nl (if intf then ": sig" else "= struct") nl (indent 2 body) b nl nl nl let structure f p = fprintf f "struct%aend" ( indent 2 (fun f p -> fprintf f "%t%a%a%a" nl excdefs p.struct_excdefs typedefs p.struct_typedefs nonrecvaldefs p.struct_nonrecvaldefs ) ) p let rec modexpr f = function | MVar x -> fprintf f "%s" x | MStruct s -> structure f s | MApp (e1, e2) -> fprintf f "%a (%a)" modexpr e1 modexpr e2 let moduledef f d = fprintf f "module %s = %a%t%t" d.modulename modexpr d.modulerhs nl nl let program f p = fprintf f "%a%a" excdefs p.excdefs typedefs p.typedefs; List.iter (stretch false f) p.prologue; fprintf f "%a%a%a" nonrecvaldefs p.nonrecvaldefs (list moduledef nothing) p.moduledefs valdefs p.valdefs; List.iter (output_string f) p.postlogue let valdecl f (x, ts) = fprintf f "val %s: %a" x typ ts.body let interface f i = fprintf f "%a%a%a%!" excdefs i.excdecls typedefs i.typedecls (list valdecl nl) i.valdecls let program p = functorparams false program p X.f p.paramdefs let interface i = functorparams true interface i X.f i.paramdecls let expr e = expr X.f e end menhir-20130116/src/convert.ml0000644000175000017500000001104412075533603015070 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* An ocamlyacc-style, or Menhir-style, parser requires access to the lexer, which must be parameterized with a lexing buffer, and to the lexing buffer itself, where it reads position information. *) (* This traditional API is convenient when used with ocamllex, but inelegant when used with other lexer generators. *) type ('token, 'semantic_value) traditional = (Lexing.lexbuf -> 'token) -> Lexing.lexbuf -> 'semantic_value (* This revised API is independent of any lexer generator. Here, the parser only requires access to the lexer, and the lexer takes no parameters. The tokens returned by the lexer may contain position information. *) type ('token, 'semantic_value) revised = (unit -> 'token) -> 'semantic_value (* --------------------------------------------------------------------------- *) (* Converting a traditional parser, produced by ocamlyacc or Menhir, into a revised parser. *) (* A token of the revised lexer is essentially a triple of a token of the traditional lexer (or raw token), a start position, and and end position. The three [get] functions are accessors. *) (* We do not require the type ['token] to actually be a triple type. This enables complex applications where it is a record type with more than three fields. It also enables simple applications where positions are of no interest, so ['token] is just ['raw_token] and [get_startp] and [get_endp] return dummy positions. *) let traditional2revised (get_raw_token : 'token -> 'raw_token) (get_startp : 'token -> Lexing.position) (get_endp : 'token -> Lexing.position) (parser : ('raw_token, 'semantic_value) traditional) : ('token, 'semantic_value) revised = (* Accept a revised lexer. *) fun (lexer : unit -> 'token) -> (* Create a dummy lexing buffer. *) let lexbuf : Lexing.lexbuf = Lexing.from_string "" in (* Wrap the revised lexer as a traditional lexer. A traditional lexer returns a raw token and updates the fields of the lexing buffer with new positions, which will be read by the parser. *) let lexer (lexbuf : Lexing.lexbuf) : 'raw_token = let token : 'token = lexer() in lexbuf.Lexing.lex_start_p <- get_startp token; lexbuf.Lexing.lex_curr_p <- get_endp token; get_raw_token token in (* Invoke the traditional parser. *) parser lexer lexbuf (* --------------------------------------------------------------------------- *) (* Converting a revised parser back to a traditional parser. *) let revised2traditional (make_token : 'raw_token -> Lexing.position -> Lexing.position -> 'token) (parser : ('token, 'semantic_value) revised) : ('raw_token, 'semantic_value) traditional = (* Accept a traditional lexer and a lexing buffer. *) fun (lexer : Lexing.lexbuf -> 'raw_token) (lexbuf : Lexing.lexbuf) -> (* Wrap the traditional lexer as a revised lexer. *) let lexer () : 'token = let token : 'raw_token = lexer lexbuf in make_token token lexbuf.Lexing.lex_start_p lexbuf.Lexing.lex_curr_p in (* Invoke the revised parser. *) parser lexer (* --------------------------------------------------------------------------- *) (* Simplified versions of the above, where concrete triples are used. *) module Simplified = struct let traditional2revised parser = traditional2revised (fun (token, _, _) -> token) (fun (_, startp, _) -> startp) (fun (_, _, endp) -> endp) parser let revised2traditional parser = revised2traditional (fun token startp endp -> (token, startp, endp)) parser end menhir-20130116/src/lineCount.mll0000644000175000017500000000237612075533603015534 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This simple function counts the number of newline characters in a string. *) let newline = ('\010' | '\013' | "\013\010") let ordinary = [^ '\010' '\013']+ rule count n = parse | eof { n } | newline { count (n + 1) lexbuf } | ordinary { count n lexbuf } menhir-20130116/src/infiniteArray.mli0000644000175000017500000000357212075533603016374 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* $Id: infiniteArray.mli,v 1.5 2007/09/10 21:09:37 fpottier Exp $ *) (** This module implements infinite arrays. **) type 'a t (** [make x] creates an infinite array, where every slot contains [x]. **) val make: 'a -> 'a t (** [get a i] returns the element contained at offset [i] in the array [a]. Slots are numbered 0 and up. **) val get: 'a t -> int -> 'a (** [set a i x] sets the element contained at offset [i] in the array [a] to [x]. Slots are numbered 0 and up. **) val set: 'a t -> int -> 'a -> unit (** [extent a] is the length of an initial segment of the array [a] that is sufficiently large to contain all [set] operations ever performed. In other words, all elements beyond that segment have the default value. *) val extent: 'a t -> int (** [domain a] is a fresh copy of an initial segment of the array [a] whose length is [extent a]. *) val domain: 'a t -> 'a array menhir-20130116/src/sentenceLexer.mll0000644000175000017500000000505312075533603016373 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This lexer is used to read the sentences provided on the standard input channel when [--interpret] is enabled. *) { open Lexing open SentenceParser open Grammar (* Updates the line counter, which is used in some error messages. *) let update_loc lexbuf = let pos = lexbuf.lex_curr_p in lexbuf.lex_curr_p <- { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum; } (* A short-hand. *) let error1 lexbuf msg = Error.error (Positions.one (lexeme_start_p lexbuf)) msg } let newline = ('\010' | '\013' | "\013\010") let whitespace = [ ' ' '\t' ';' ] let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222'] let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *) rule lex = parse | (lowercase identchar *) as lid { try let nt = Nonterminal.lookup lid in if StringSet.mem lid Front.grammar.UnparameterizedSyntax.start_symbols then NONTERMINAL nt else error1 lexbuf (Printf.sprintf "\"%s\" is not a start symbol." lid) with Not_found -> error1 lexbuf (Printf.sprintf "\"%s\" is not a known non-terminal symbol." lid) } | (uppercase identchar *) as uid { try TERMINAL (Terminal.lookup uid) with Not_found -> error1 lexbuf (Printf.sprintf "\"%s\" is not a known terminal symbol." uid) } | whitespace { lex lexbuf } | newline { update_loc lexbuf; EOL } | eof { EOF } | ':' { COLON } | _ { error1 lexbuf "unexpected character(s)." } menhir-20130116/src/IL.mli0000644000175000017500000001526212075533602014072 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* Abstract syntax of the language used for code production. *) type program = { (* The structure of programs is pretty ad hoc: the following components must be printed in this order -- so there is no way for a module definition to follow a recursive value definition, for instance. This is tolerable for the time being, but may have to change in the future. *) (* Functor parameters. *) paramdefs: Stretch.t list; (* Raw Objective Caml prologue. *) prologue: Stretch.t list; (* Exception definitions. *) excdefs: excdef list; (* Algebraic data type definitions (mutually recursive). *) typedefs: typedef list; (* Value definitions (not mutually recursive). *) nonrecvaldefs: valdef list; (* Module definitions. *) moduledefs: moduledef list; (* Function definitions (mutually recursive). *) valdefs: valdef list; (* Raw Objective Caml postlogue. *) postlogue: string list; } and interface = { (* Functor parameters. *) paramdecls: Stretch.t list; (* Exception definitions. *) excdecls: excdef list; (* Algebraic data type declarations (mutually recursive). *) typedecls: typedef list; (* Value declarations. *) valdecls: (string * typescheme) list } and excdef = { (* Name of the exception. *) excname: string; (* Optional equality. *) exceq: string option; } and typedef = { (* Name of the algebraic data type. *) typename: string; (* Type parameters. This is a list of type variable names, without the leading quote, which will be added by the pretty-printer. *) typeparams: string list; (* Data constructors. *) typerhs: typedefrhs; (* Constraint. *) typeconstraint: (typ * typ) option } and typedefrhs = | TDefRecord of fielddef list | TDefSum of datadef list | TAbbrev of typ and fielddef = { (* Whether the field is mutable. *) modifiable: bool; (* Name of the field. *) fieldname: string; (* Type of the field. *) fieldtype: typescheme } and datadef = { (* Name of the data constructor. *) dataname: string; (* Types of the value parameters. *) datavalparams: typ list; (* Instantiated type parameters, if this is a GADT -- [None] if this is an ordinary ADT. *) datatypeparams: typ list option; } and typ = (* Textual Objective Caml type. *) | TypTextual of Stretch.ocamltype (* Type variable, without its leading quote. *) | TypVar of string (* Application of an algebraic data type constructor. *) | TypApp of string * typ list (* Anonymous tuple. *) | TypTuple of typ list (* Arrow type. *) | TypArrow of typ * typ and typescheme = { (* Universal quantifiers, without leading quotes. *) quantifiers: string list; (* Body. *) body: typ; } and valdef = { (* Whether the value is public. Public values cannot be suppressed by the inliner. They serve as seeds for the dead code analysis. *) valpublic: bool; (* Definition's left-hand side. *) valpat: pattern; (* Value to which it is bound. *) valval: expr } and expr = (* Variable. *) | EVar of string (* Function. *) | EFun of pattern list * expr (* Function call. *) | EApp of expr * expr list (* Local definitions. This is a nested sequence of [let] definitions. *) | ELet of (pattern * expr) list * expr (* Case analysis. *) | EMatch of expr * branch list | EIfThen of expr * expr | EIfThenElse of expr * expr * expr (* Raising exceptions. *) | ERaise of expr (* Exception analysis. *) | ETry of expr * branch list (* Data construction. Tuples of length 1 are considered nonexistent, that is, [ETuple [e]] is considered the same expression as [e]. *) | EUnit | EIntConst of int | EMaxInt | EStringConst of string | EData of string * expr list | ETuple of expr list (* Type annotation. *) | EAnnot of expr * typescheme (* Cheating on the typechecker. *) | EMagic of expr (* Obj.magic *) | ERepr of expr (* Obj.repr *) (* Records. *) | ERecord of (string * expr) list | ERecordAccess of expr * string | ERecordWrite of expr * string * expr (* Textual Objective Caml code. *) | ETextual of Stretch.t (* Comments. *) | EComment of string * expr | EPatComment of string * pattern * expr (* Arrays. *) | EArray of expr list | EArrayAccess of expr * expr and branch = { (* Branch pattern. *) branchpat: pattern; (* Branch body. *) branchbody: expr; } and pattern = (* Wildcard. *) | PWildcard (* Variable. *) | PVar of string (* Data deconstruction. Tuples of length 1 are considered nonexistent, that is, [PTuple [p]] is considered the same pattern as [p]. *) | PUnit | PData of string * pattern list | PTuple of pattern list | PRecord of (string * pattern) list (* Disjunction. *) | POr of pattern list (* Type annotation. *) | PAnnot of pattern * typ and directive = | DirOpen of string | DirInclude of string (* Module definitions. *) and moduledef = { (* The name of the module that is being defined. *) modulename: string; (* The module expression to which it is being bound. *) modulerhs: modexpr; } (* Module expressions. *) and modexpr = | MVar of string | MStruct of structure | MApp of modexpr * modexpr (* Structures. This is somewhat redundant with the structure of programs, but will do for now. *) and structure = { (* Exception definitions. *) struct_excdefs: excdef list; (* Algebraic data type definitions (mutually recursive). *) struct_typedefs: typedef list; (* Value definitions (not mutually recursive). *) struct_nonrecvaldefs: valdef list; } menhir-20130116/src/option.ml0000644000175000017500000000233212075533603014720 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) let map f = function | None -> None | Some x -> Some (f x) let iter f o = match o with | None -> () | Some x -> f x let fold f o accu = match o with | None -> accu | Some x -> f x accu menhir-20130116/src/infer.mli0000644000175000017500000000311612075533603014665 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* [ntvar symbol] is the name of the type variable associated with a nonterminal symbol. *) val ntvar: string -> string (* [infer grammar] analyzes the grammar [grammar] and returns a new grammar, augmented with a [%type] declaration for every nonterminal symbol. The [ocamlc] compiler is used to infer types. *) val infer: UnparameterizedSyntax.grammar -> UnparameterizedSyntax.grammar (* [depend grammar] prints (on the standard output channel) the Objective Caml dependencies induced by the semantic actions. Then, it exits the program. *) val depend: UnparameterizedSyntax.grammar -> 'a menhir-20130116/src/breadth.ml0000644000175000017500000000412412075533603015022 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module implements generic breadth-first search over a graph with labeled edges. *) module Make (G : sig (* This is the type of graph vertices. *) type vertex (* This is the type of graph labels. *) type label (* These allow marking a vertex and checking whether it is marked. *) val set_mark: vertex -> Mark.t -> unit val get_mark: vertex -> Mark.t (* This is an iterator over the graph's entry vertices. *) val entry: (vertex -> unit) -> unit (* This provides access to a vertex' successors. *) val successors: (label -> vertex -> unit) -> vertex -> unit end) = struct let search f = let queue : G.vertex Queue.t = Queue.create () and mark = Mark.fresh() in let visited vertex = Mark.same mark (G.get_mark vertex) and visit vertex = G.set_mark vertex mark; Queue.add vertex queue in G.entry visit; Misc.qiter (fun vertex -> G.successors (fun label son -> if not (visited son) then begin visit son; f true vertex label son end else f false vertex label son ) vertex ) queue end menhir-20130116/src/patricia.mli0000644000175000017500000000246212075533603015361 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This is an implementation of Patricia trees, following Chris Okasaki's paper at the 1998 ML Workshop in Baltimore. Both big-endian and little-endian trees are provided. Both sets and maps are implemented on top of Patricia trees. *) module Little : GMap.S with type key = int module Big : GMap.S with type key = int menhir-20130116/src/compressedBitSet.ml0000644000175000017500000001153612075533603016675 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* A compressed (or should we say sparse?) bit set is a list of pairs of integers. The first component of every pair is an index, while the second component is a bit field. The list is sorted by order of increasing indices. *) type t = | N | C of int * int * t type element = int let word_size = Sys.word_size - 1 let empty = N let is_empty = function | N -> true | C _ -> false let add i s = let ioffset = i mod word_size in let iaddr = i - ioffset and imask = 1 lsl ioffset in let rec add = function | N -> (* Insert at end. *) C (iaddr, imask, N) | C (addr, ss, qs) as s -> if iaddr < addr then (* Insert in front. *) C (iaddr, imask, s) else if iaddr = addr then (* Found appropriate cell, update bit field. *) let ss' = ss lor imask in if ss' = ss then s else C (addr, ss', qs) else (* Not there yet, continue. *) let qs' = add qs in if qs == qs' then s else C (addr, ss, qs') in add s let singleton i = add i N let remove i s = let ioffset = i mod word_size in let iaddr = i - ioffset and imask = 1 lsl ioffset in let rec remove = function | N -> N | C (addr, ss, qs) as s -> if iaddr < addr then s else if iaddr = addr then (* Found appropriate cell, update bit field. *) let ss' = ss land (lnot imask) in if ss' = 0 then qs else if ss' = ss then s else C (addr, ss', qs) else (* Not there yet, continue. *) let qs' = remove qs in if qs == qs' then s else C (addr, ss, qs') in remove s let rec fold f s accu = match s with | N -> accu | C (base, ss, qs) -> let limit = base + word_size in let rec loop i ss accu = if i = limit then accu else loop (i + 1) (ss lsr 1) (if ss land 1 = 1 then f i accu else accu) in fold f qs (loop base ss accu) let iter f s = fold (fun x () -> f x) s () let cardinal s = fold (fun _ m -> m + 1) s 0 let elements s = fold (fun tl hd -> tl :: hd) s [] let rec subset s1 s2 = match s1, s2 with | N, _ -> true | _, N -> false | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then false else if addr1 = addr2 then if (ss1 land ss2) <> ss1 then false else subset qs1 qs2 else subset s1 qs2 let mem i s = subset (singleton i) s let rec union s1 s2 = match s1, s2 with | N, s | s, N -> s | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then C (addr1, ss1, union qs1 s2) else if addr1 > addr2 then let s = union s1 qs2 in if s == qs2 then s2 else C (addr2, ss2, s) else let ss = ss1 lor ss2 in let s = union qs1 qs2 in if ss == ss2 && s == qs2 then s2 else C (addr1, ss, s) let rec inter s1 s2 = match s1, s2 with | N, _ | _, N -> N | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then inter qs1 s2 else if addr1 > addr2 then inter s1 qs2 else let ss = ss1 land ss2 in let s = inter qs1 qs2 in if ss = 0 then s else if (ss = ss1) && (s == qs1) then s1 else C (addr1, ss, s) exception Found of int let choose s = try iter (fun x -> raise (Found x) ) s; raise Not_found with Found x -> x let rec compare s1 s2 = match s1, s2 with N, N -> 0 | _, N -> 1 | N, _ -> -1 | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then -1 else if addr1 > addr2 then 1 else if ss1 < ss2 then -1 else if ss1 > ss2 then 1 else compare qs1 qs2 let rec equal s1 s2 = compare s1 s2 = 0 let rec disjoint s1 s2 = match s1, s2 with | N, _ | _, N -> true | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 = addr2 then if (ss1 land ss2) = 0 then disjoint qs1 qs2 else false else if addr1 < addr2 then disjoint qs1 s2 else disjoint s1 qs2 menhir-20130116/src/lr0.mli0000644000175000017500000001044212075533603014257 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar (* This module builds the LR(0) automaton associated with the grammar, then provides access to it. It also provides facilities for efficiently performing LR(1) constructions. *) (* ------------------------------------------------------------------------ *) (* The LR(0) automaton. *) (* The nodes of the LR(0) automaton are numbered. *) type node = int (* This is the number of nodes in the LR(0) automaton. *) val n: int (* These are the automaton's entry states, indexed by the start productions. *) val entry: node ProductionMap.t (* A node can be converted to the underlying LR(0) set of items. This set is not closed. *) val items: node -> Item.Set.t (* ------------------------------------------------------------------------ *) (* Help for building the LR(1) automaton. *) (* An LR(1) state is internally represented as a pair of an LR(0) state number and an array of concrete lookahead sets (whose length depends on the LR(0) state). *) type lr1state (* An encoded LR(1) state can be turned into a concrete representation, that is, a mapping of items to concrete lookahead sets. *) type concretelr1state = TerminalSet.t Item.Map.t val export: lr1state -> concretelr1state (* One can take the closure of a concrete LR(1) state. *) val closure: concretelr1state -> concretelr1state (* The core of an LR(1) state is the underlying LR(0) state. *) val core: lr1state -> node (* One can create an LR(1) start state out of an LR(0) start node. *) val start: node -> lr1state (* Information about the transitions and reductions at a state. *) val transitions: lr1state -> lr1state SymbolMap.t val outgoing_symbols: node -> Symbol.t list val transition: Symbol.t -> lr1state -> lr1state val reductions: lr1state -> (TerminalSet.t * Production.index) list (* Equality of states. The two states must have the same core. Then, they are equal if and only if their lookahead sets are pointwise equal. *) val equal: lr1state -> lr1state -> bool (* Subsumption between states. The two states must have the same core. Then, one subsumes the other if and only if their lookahead sets are (pointwise) in the subset relation. *) val subsume: lr1state -> lr1state -> bool (* A slightly modified version of Pager's weak compatibility criterion. The two states must have the same core. *) val compatible: lr1state -> lr1state -> bool (* This function determines whether two (core-equivalent) states can be merged without creating an end-of-stream conflict. *) val eos_compatible: lr1state -> lr1state -> bool (* This function determines whether two (core-equivalent) states can be merged without creating spurious reductions on the [error] token. *) val error_compatible: lr1state -> lr1state -> bool (* Union of two states. The two states must have the same core. The new state is obtained by pointwise union of the lookahead sets. *) val union: lr1state -> lr1state -> lr1state (* Restriction of a state to a set of tokens of interest. Every lookahead set is intersected with that set. *) val restrict: TerminalSet.t -> lr1state -> lr1state (* Displaying a concrete state. *) val print_concrete: concretelr1state -> string (* Displaying a state. By default, only the kernel is displayed, not the closure. *) val print: lr1state -> string val print_closure: lr1state -> string menhir-20130116/src/rowDisplacement.mli0000644000175000017500000000507712075533603016732 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This module compresses a two-dimensional table, where some values are considered insignificant, via row displacement. *) (* A compressed table is represented as a pair of arrays. The displacement array is an array of offsets into the data array. *) type 'a table = int array * (* displacement *) 'a array (* data *) (* [compress equal insignificant dummy m n t] turns the two-dimensional table [t] into a compressed table. The parameter [equal] is equality of data values. The parameter [wildcard] tells which data values are insignificant, and can thus be overwritten with other values. The parameter [dummy] is used to fill holes in the data array. [m] and [n] are the integer dimensions of the table [t]. *) val compress: ('a -> 'a -> bool) -> ('a -> bool) -> 'a -> int -> int -> 'a array array -> 'a table (* [get ct i j] returns the value found at indices [i] and [j] in the compressed table [ct]. This function call is permitted only if the value found at indices [i] and [j] in the original table is significant -- otherwise, it could fail abruptly. *) (* Together, [compress] and [get] have the property that, if the value found at indices [i] and [j] in an uncompressed table [t] is significant, then [get (compress t) i j] is equal to that value. *) val get: 'a table -> int -> int -> 'a (* [getget] is a variant of [get] which only requires read access, via accessors, to the two components of the table. *) val getget: ('displacement -> int -> int) -> ('data -> int -> 'a) -> 'displacement * 'data -> int -> int -> 'a menhir-20130116/src/stringMap.mli0000644000175000017500000000266112075533603015532 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) include Map.S with type key = string val cardinal : 'a t -> int (* [restrict s m] restricts the domain of the map [m] to (its intersection with) the set [s]. *) val restrict: StringSet.t -> 'a t -> 'a t (* [filter pred m] restricts the domain of the map [m] to (key, value) couples that verify [pred]. *) val filter: (string -> 'a -> bool) -> 'a t -> 'a t (* [domain m] returns the domain of the map [m]. *) val domain: 'a t -> StringSet.t menhir-20130116/src/settings.ml0000644000175000017500000002523712075533602015260 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Printf (* ------------------------------------------------------------------------- *) (* Prepare for parsing the command line. *) type token_type_mode = | TokenTypeAndCode (* produce the definition of the [token] type and code for the parser *) | TokenTypeOnly (* produce the type definition only *) | CodeOnly of string (* produce the code only; import token type from specified module *) let token_type_mode = ref TokenTypeAndCode let tokentypeonly () = token_type_mode := TokenTypeOnly let codeonly m = if String.capitalize m <> m then begin (* Not using module [Error] to avoid a circular dependency. *) fprintf stderr "Error: %s is not a valid Objective Caml module name.\n" m; exit 1 end; token_type_mode := CodeOnly m let version = ref false type construction_mode = | ModeCanonical (* --canonical: canonical Knuth LR(1) automaton *) | ModeInclusionOnly (* --no-pager : states are merged when there is an inclusion relationship, default reductions are used *) | ModePager (* normal mode: states are merged as per Pager's criterion, default reductions are used *) (* Note that --canonical overrides --no-pager. If both are specified, the result is a canonical automaton. *) let construction_mode = ref ModePager let explain = ref false let base = ref "" let dump = ref false let follow = ref false let graph = ref false let trace = ref false let noprefix = ref false type print_mode = | PrintNormal | PrintUnitActions | PrintUnitActionsUnitTokens type preprocess_mode = | PMNormal (* preprocess and continue *) | PMOnlyPreprocess of print_mode (* preprocess, print grammar, stop *) let preprocess_mode = ref PMNormal let recovery = ref false let v () = dump := true; explain := true let infer = ref false let inline = ref true type ocamldep_mode = | OMNone (* do not invoke ocamldep *) | OMRaw (* invoke ocamldep and echo its raw output *) | OMPostprocess (* invoke ocamldep and postprocess its output *) let depend = ref OMNone let code_inlining = ref true let comment = ref false let ocamlc = ref "ocamlc" let ocamldep = ref "ocamldep" let logG, logA, logC = ref 0, ref 0, ref 0 let timings = ref false let filenames = ref StringSet.empty let no_stdlib = ref false let stdlib_path = ref Installation.libdir let insert name = filenames := StringSet.add name !filenames let interpret = ref false let interpret_show_cst = ref false let table = ref false let coq = ref false let coq_no_complete = ref false let coq_no_actions = ref false let strict = ref false type suggestion = | SuggestNothing | SuggestCompFlags | SuggestLinkFlags of string (* "cmo" or "cmx" *) let suggestion = ref SuggestNothing let options = Arg.align [ "--base", Arg.Set_string base, " Specifies a base name for the output file(s)"; "--canonical", Arg.Unit (fun () -> construction_mode := ModeCanonical), " Construct a canonical Knuth LR(1) automaton"; "--comment", Arg.Set comment, " Include comments in the generated code"; "--coq", Arg.Set coq, " (undocumented)"; "--coq-no-complete", Arg.Set coq_no_complete, " (undocumented)"; "--coq-no-actions", Arg.Set coq_no_actions, " (undocumented)"; "--depend", Arg.Unit (fun () -> depend := OMPostprocess), " Invoke ocamldep and display dependencies"; "--dump", Arg.Set dump, " Describe the automaton in .automaton"; "--error-recovery", Arg.Set recovery, " Attempt recovery by discarding tokens after errors"; "--explain", Arg.Set explain, " Explain conflicts in .conflicts"; "--external-tokens", Arg.String codeonly, " Import token type definition from "; "--follow-construction", Arg.Set follow, " (undocumented)"; "--graph", Arg.Set graph, " Write grammar's dependency graph to .dot"; "--infer", Arg.Set infer, " Invoke ocamlc for ahead of time type inference"; "--interpret", Arg.Set interpret, " Interpret the sentences provided on stdin"; "--interpret-show-cst", Arg.Set interpret_show_cst, " Show a concrete syntax tree upon acceptance"; "--log-automaton", Arg.Set_int logA, " Log information about the automaton"; "--log-code", Arg.Set_int logC, " Log information about the generated code"; "--log-grammar", Arg.Set_int logG, " Log information about the grammar"; "--no-code-inlining", Arg.Clear code_inlining, " (undocumented)"; "--no-inline", Arg.Clear inline, " Ignore the %inline keyword."; "--no-pager", Arg.Unit (fun () -> if !construction_mode = ModePager then construction_mode := ModeInclusionOnly), " (undocumented)"; "--no-prefix", Arg.Set noprefix, " (undocumented)"; "--no-stdlib", Arg.Set no_stdlib, " Do not load the standard library"; "--ocamlc", Arg.Set_string ocamlc, " Specifies how ocamlc should be invoked"; "--ocamldep", Arg.Set_string ocamldep, " Specifies how ocamldep should be invoked"; "--only-preprocess", Arg.Unit (fun () -> preprocess_mode := PMOnlyPreprocess PrintNormal), " Print grammar and exit"; "--only-preprocess-u", Arg.Unit (fun () -> preprocess_mode := PMOnlyPreprocess PrintUnitActions), " Print grammar with unit actions and exit"; "--only-preprocess-uu", Arg.Unit (fun () -> preprocess_mode := PMOnlyPreprocess PrintUnitActionsUnitTokens), " Print grammar with unit actions & tokens and exit"; "--only-tokens", Arg.Unit tokentypeonly, " Generate token type definition only, no code"; "--raw-depend", Arg.Unit (fun () -> depend := OMRaw), " Invoke ocamldep and echo its raw output"; "--stdlib", Arg.Set_string stdlib_path, " Specify where the standard library lies"; "--strict", Arg.Set strict, " Warnings about the grammar are errors"; "--suggest-comp-flags", Arg.Unit (fun () -> suggestion := SuggestCompFlags), " Suggest compilation flags for ocaml{c,opt}"; "--suggest-link-flags-byte", Arg.Unit (fun () -> suggestion := SuggestLinkFlags "cmo"), " Suggest link flags for ocamlc"; "--suggest-link-flags-opt", Arg.Unit (fun () -> suggestion := SuggestLinkFlags "cmx"), " Suggest link flags for ocamlopt"; "--table", Arg.Set table, " Use the table-based back-end"; "--timings", Arg.Set timings, " Display internal timings"; "--trace", Arg.Set trace, " Include tracing instructions in the generated code"; "--version", Arg.Set version, " Show version number and exit"; "-b", Arg.Set_string base, " Synonymous with --base "; "-lg", Arg.Set_int logG, " Synonymous with --log-grammar"; "-la", Arg.Set_int logA, " Synonymous with --log-automaton"; "-lc", Arg.Set_int logC, " Synonymous with --log-code"; "-t", Arg.Set table, " Synonymous with --table"; "-v", Arg.Unit v, " Synonymous with --dump --explain"; ] let usage = sprintf "Usage: %s " Sys.argv.(0) (* ------------------------------------------------------------------------- *) (* Parse the command line. *) let () = Arg.parse options insert usage (* ------------------------------------------------------------------------- *) (* If required, print a version number and stop. *) let () = if !version then begin printf "menhir, version %s\n" Version.version; exit 0 end (* ------------------------------------------------------------------------- *) (* Menhir is able to suggest compile and link flags to be passed to the Objective Caml compilers. If required, do so and stop. *) (* If [--table] is not passed, no flags are necessary. If [--table] is passed, then [MenhirLib] needs to be visible (at compile time) and linked in (at link time). This is done either via [ocamlfind], if it was available at installation time, or manually. *) (* The compilation flags are in fact meant to be used both at compile- and link-time. *) let () = match !suggestion with | SuggestNothing -> () | SuggestCompFlags -> if !table then if Installation.ocamlfind then printf "-package menhirLib\n%!" else printf "-I %s\n%!" Installation.libdir; exit 0 | SuggestLinkFlags extension -> if !table then if Installation.ocamlfind then printf "-linkpkg\n%!" else printf "menhirLib.%s\n%!" extension; exit 0 (* ------------------------------------------------------------------------- *) (* Export the settings. *) let stdlib_filename = !stdlib_path ^ "/standard.mly" let filenames = StringSet.elements !filenames let base = if !base = "" then match filenames with | [] -> fprintf stderr "%s\n" usage; exit 1 | [ filename ] -> Filename.chop_suffix filename (if !coq then ".vy" else ".mly") | _ -> fprintf stderr "Error: you must specify --base when providing multiple input files.\n"; exit 1 else !base let filenames = if !no_stdlib || !coq then filenames else stdlib_filename :: filenames let token_type_mode = !token_type_mode let construction_mode = !construction_mode let explain = !explain let dump = !dump let follow = !follow let graph = !graph let trace = !trace let recovery = !recovery let noprefix = !noprefix let infer = !infer let code_inlining = !code_inlining let depend = !depend let inline = !inline let comment = !comment let preprocess_mode = !preprocess_mode let ocamlc = !ocamlc let ocamldep = !ocamldep let logG, logA, logC = !logG, !logA, !logC let timings = !timings let interpret = !interpret let interpret_show_cst = !interpret_show_cst let table = !table let coq = !coq let coq_no_complete = !coq_no_complete let coq_no_actions = !coq_no_actions let strict = !strict menhir-20130116/src/keyword.ml0000644000175000017500000000571012075533602015076 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module provides some type and function definitions that help deal with the keywords that we recognize within semantic actions. *) (* ------------------------------------------------------------------------- *) (* Types. *) (* The user can request position information either at type [int] (a simple offset) or at type [Lexing.position]. *) type flavor = | FlavorOffset | FlavorPosition (* The user can request position information about the start or end of a symbol. *) type where = | WhereStart | WhereEnd (* The user can request position information about a production's left-hand side or about one of the symbols in its right-hand side, which he can refer to by position or by name. *) type subject = | Left | RightDollar of int | RightNamed of string (* Keywords inside semantic actions. They allow access to semantic values or to position information. *) type keyword = | Dollar of int | Position of subject * where * flavor | PreviousError | SyntaxError (* ------------------------------------------------------------------------- *) (* These auxiliary functions help map a [Position] keyword to the name of the variable that the keyword is replaced with. *) let where = function | WhereStart -> "start" | WhereEnd -> "end" let subject = function | Left -> "" | RightDollar i -> Printf.sprintf "__%d_" i | RightNamed id -> Printf.sprintf "_%s_" id let flavor = function | FlavorPosition -> "pos" | FlavorOffset -> "ofs" let posvar s w f = Printf.sprintf "_%s%s%s" (where w) (flavor f) (subject s) (* ------------------------------------------------------------------------- *) (* Sets of keywords. *) module KeywordSet = struct include Set.Make (struct type t = keyword let compare = compare end) (* This converts a list of keywords with positions into a set of keywords. *) let from_list keywords = List.fold_right add keywords empty end menhir-20130116/src/front.ml0000644000175000017500000000505512075533602014544 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* Start where [PreFront] left off. *) let grammar = PreFront.grammar (* Perform reachability analysis. *) let grammar = Reachability.trim grammar let () = Time.tick "Trimming" (* If [--depend] was specified on the command line, perform dependency analysis and stop. *) let () = match Settings.depend with | Settings.OMRaw | Settings.OMPostprocess -> Infer.depend grammar (* never returns *) | Settings.OMNone -> () (* If [--infer] was specified on the command line, perform type inference and stop. *) let grammar = if Settings.infer then let grammar = Infer.infer grammar in Time.tick "Inferring types for nonterminals"; grammar else grammar (* If [--no-inline] was specified on the command line, skip the inlining of non terminal definitions marked with %inline. *) let grammar = if Settings.inline then begin let grammar, inlined = NonTerminalDefinitionInlining.inline grammar in if not Settings.infer && inlined && not Settings.coq then Error.warning [] "you are using the standard library and/or the %inline keyword. We\n\ recommend switching on --infer in order to avoid obscure type error messages."; Time.tick "Inlining"; grammar end else grammar (* If [--only-preprocess] or [--only-preprocess-drop] was specified on the command line, print the grammar and stop. Otherwise, continue. *) let () = match Settings.preprocess_mode with | Settings.PMOnlyPreprocess mode -> UnparameterizedPrinter.print mode stdout grammar; exit 0 | Settings.PMNormal -> () menhir-20130116/src/lexer.mll0000644000175000017500000003451312075533603014711 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) { open Lexing open Parser open Positions (* This wrapper saves the current lexeme start, invokes its argument, and restores it. This allows transmitting better positions to the parser. *) let savestart lexbuf f = let startp = lexbuf.lex_start_p in let token = f lexbuf in lexbuf.lex_start_p <- startp; token (* Updates the line counter, which is used in some error messages. *) let update_loc lexbuf = let pos = lexbuf.lex_curr_p in lexbuf.lex_curr_p <- { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum; } (* Extracts a chunk out of the source file. *) let chunk ofs1 ofs2 = let contents = Error.get_file_contents() in let len = ofs2 - ofs1 in String.sub contents ofs1 len (* Extracts a chunk out of the source file, delimited by one position and extending to the end of the file. *) let echunk ofs1 = let contents = Error.get_file_contents() in let len = String.length contents - ofs1 in String.sub contents ofs1 len (* Overwrites an old character with a new one at a specified offset in a string. *) let overwrite content offset c1 c2 = assert (content.[offset] = c1); content.[offset] <- c2 (* Creates a stretch. *) let mk_stretch parenthesize pos1 pos2 pkeywords = let ofs1 = pos1.pos_cnum and ofs2 = pos2.pos_cnum in let raw_content = chunk ofs1 ofs2 in let content = String.copy raw_content in (* Turn our keywords into valid Objective Caml identifiers by replacing '$', '(', and ')' with '_'. Bloody. *) List.iter (function { value = keyword; position = pos } -> let pos = start_of_position pos in let ofs = pos.pos_cnum - ofs1 in overwrite content ofs '$' '_'; match keyword with | Keyword.Dollar _ | Keyword.Position (Keyword.Left, _, _) | Keyword.PreviousError -> () | Keyword.SyntaxError -> (* $syntaxerror is replaced with (raise _eRR) *) let source = "(raise _eRR)" in String.blit source 0 content ofs (String.length source) | Keyword.Position (subject, where, _) -> let ofslpar = match where with | Keyword.WhereStart -> ofs + 9 | Keyword.WhereEnd -> ofs + 7 in overwrite content ofslpar '(' '_'; match subject with | Keyword.Left -> assert false | Keyword.RightDollar i -> overwrite content (ofslpar + 1) '$' '_'; overwrite content (ofslpar + 2 + String.length (string_of_int i)) ')' '_' | Keyword.RightNamed id -> overwrite content (ofslpar + 1 + String.length id) ')' '_' ) pkeywords; (* Add whitespace so that the column numbers match those of the source file. If requested, add parentheses so that the semantic action can be inserted into other code without ambiguity. *) let content = if parenthesize then (String.make (pos1.pos_cnum - pos1.pos_bol - 1) ' ') ^ "(" ^ content ^ ")" else (String.make (pos1.pos_cnum - pos1.pos_bol) ' ') ^ content in { Stretch.stretch_filename = Error.get_filename(); Stretch.stretch_linenum = pos1.pos_lnum; Stretch.stretch_linecount = pos2.pos_lnum - pos1.pos_lnum; Stretch.stretch_content = content; Stretch.stretch_raw_content = raw_content; Stretch.stretch_keywords = pkeywords } (* Translates the family of position-related keywords to abstract syntax. *) let mk_keyword lexbuf w f n id = let where = match w with | Some _ -> Keyword.WhereStart | None -> Keyword.WhereEnd and flavor = match f with | Some _ -> Keyword.FlavorPosition | None -> Keyword.FlavorOffset and subject = match n, id with | Some n, None -> Keyword.RightDollar (int_of_string n) | None, Some id -> Keyword.RightNamed id | None, None -> Keyword.Left | Some _, Some _ -> assert false in let keyword = Keyword.Position (subject, where, flavor) in with_cpos lexbuf keyword (* Objective Caml's reserved words. *) let reserved = let table = Hashtbl.create 149 in List.iter (fun word -> Hashtbl.add table word ()) [ "and"; "as"; "assert"; "begin"; "class"; "constraint"; "do"; "done"; "downto"; "else"; "end"; "exception"; "external"; "false"; "for"; "fun"; "function"; "functor"; "if"; "in"; "include"; "inherit"; "initializer"; "lazy"; "let"; "match"; "method"; "module"; "mutable"; "new"; "object"; "of"; "open"; "or"; "parser"; "private"; "rec"; "sig"; "struct"; "then"; "to"; "true"; "try"; "type"; "val"; "virtual"; "when"; "while"; "with"; "mod"; "land"; "lor"; "lxor"; "lsl"; "lsr"; "asr"; ]; table (* A short-hand. *) let error1 pos msg = Error.error (Positions.one pos) msg } let newline = ('\010' | '\013' | "\013\010") let whitespace = [ ' ' '\t' ';' ] let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222'] let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *) let poskeyword = '$' (("start" as w) | "end") (("pos" as f) | "ofs") ( '(' ( '$' (['0'-'9']+ as n) | ((lowercase identchar*) as id)) ')')? let previouserror = "$previouserror" let syntaxerror = "$syntaxerror" rule main = parse | "%token" { TOKEN } | "%type" { TYPE } | "%left" { LEFT } | "%right" { RIGHT } | "%nonassoc" { NONASSOC } | "%start" { START } | "%prec" { PREC } | "%public" { PUBLIC } | "%parameter" { PARAMETER } | "%inline" { INLINE } | "%%" { let ofs = lexeme_end lexbuf in PERCENTPERCENT (lazy (echunk ofs)) } | ":" { COLON } | "," { COMMA } | "=" { EQUAL } | "(" { LPAREN } | ")" { RPAREN } | "|" { BAR } | "?" { QUESTION } | "*" { STAR } | "+" { PLUS } | (lowercase identchar *) as id { if Hashtbl.mem reserved id then Error.errorp (Positions.with_poss (lexeme_start_p lexbuf) (lexeme_end_p lexbuf) ()) "this is an Objective Caml reserved word." else LID (with_pos (cpos lexbuf) id) } | (uppercase identchar *) as id { UID (with_pos (cpos lexbuf) id) } | "//" [^ '\010' '\013']* newline (* skip C++ style comment *) | newline { update_loc lexbuf; main lexbuf } | whitespace+ { main lexbuf } | "/*" { comment (lexeme_start_p lexbuf) lexbuf; main lexbuf } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; main lexbuf } | "<" { savestart lexbuf (ocamltype (lexeme_end_p lexbuf)) } | "%{" { savestart lexbuf (fun lexbuf -> let openingpos = lexeme_end_p lexbuf in let closingpos, _ = action true openingpos [] lexbuf in (* TEMPORARY if keyword list nonempty, issue an error *) HEADER (mk_stretch false openingpos closingpos []) ) } | "{" { savestart lexbuf (fun lexbuf -> let openingpos = lexeme_end_p lexbuf in let closingpos, pkeywords = action false openingpos [] lexbuf in let stretch = mk_stretch true openingpos closingpos pkeywords in ACTION (Action.from_stretch stretch) ) } (* TEMPORARY comprendre si la diffrence entre header et action est bien justifie et si certains choix comme le parenthsage et le traitement des keywords ne pourraient pas tre effectus plus loin. *) | eof { EOF } | _ { error1 (lexeme_start_p lexbuf) "unexpected character(s)." } (* Skip C style comments. *) and comment openingpos = parse | newline { update_loc lexbuf; comment openingpos lexbuf } | "*/" { () } | eof { error1 openingpos "unterminated comment." } | _ { comment openingpos lexbuf } (* Collect an O'Caml type delimited by angle brackets. Angle brackets can appear as part of O'Caml function types. They might also appear as part of O'Caml variant types, but we ignore that possibility for the moment. *) and ocamltype openingpos = parse | "->" { ocamltype openingpos lexbuf } | '>' { OCAMLTYPE (Stretch.Declared (mk_stretch true openingpos (lexeme_start_p lexbuf) [])) } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamltype openingpos lexbuf } | newline { update_loc lexbuf; ocamltype openingpos lexbuf } | eof { error1 openingpos "unterminated Objective Caml type." } | _ { ocamltype openingpos lexbuf } (* Collect O'Caml code delimited by curly brackets. Any occurrences of the special ``$i'' identifiers are recorded in the accumulating parameter [pkeywords]. Nested curly brackets must be properly counted. Nested parentheses are also kept track of, so as to better report errors when they are not balanced. *) and action percent openingpos pkeywords = parse | '{' { let _, pkeywords = action false (lexeme_end_p lexbuf) pkeywords lexbuf in action percent openingpos pkeywords lexbuf } | ("}" | "%}") as delimiter { match percent, delimiter with | true, "%}" | false, "}" -> (* This is the delimiter we were instructed to look for. *) lexeme_start_p lexbuf, pkeywords | _, _ -> (* This is not it. *) error1 openingpos "unbalanced opening brace." } | '(' { let _, pkeywords = parentheses (lexeme_end_p lexbuf) pkeywords lexbuf in action percent openingpos pkeywords lexbuf } | '$' (['0'-'9']+ as n) { let pkeyword = with_cpos lexbuf (Keyword.Dollar (int_of_string n)) in action percent openingpos (pkeyword :: pkeywords) lexbuf } | poskeyword { let pkeyword = mk_keyword lexbuf w f n id in action percent openingpos (pkeyword :: pkeywords) lexbuf } | previouserror { let pkeyword = with_cpos lexbuf Keyword.PreviousError in action percent openingpos (pkeyword :: pkeywords) lexbuf } | syntaxerror { let pkeyword = with_cpos lexbuf Keyword.SyntaxError in action percent openingpos (pkeyword :: pkeywords) lexbuf } | '"' { string (lexeme_start_p lexbuf) lexbuf; action percent openingpos pkeywords lexbuf } | "'" { char lexbuf; action percent openingpos pkeywords lexbuf } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; action percent openingpos pkeywords lexbuf } | newline { update_loc lexbuf; action percent openingpos pkeywords lexbuf } | ')' | eof { error1 openingpos "unbalanced opening brace." } | _ { action percent openingpos pkeywords lexbuf } and parentheses openingpos pkeywords = parse | '(' { let _, pkeywords = parentheses (lexeme_end_p lexbuf) pkeywords lexbuf in parentheses openingpos pkeywords lexbuf } | ')' { lexeme_start_p lexbuf, pkeywords } | '{' { let _, pkeywords = action false (lexeme_end_p lexbuf) pkeywords lexbuf in parentheses openingpos pkeywords lexbuf } | '$' (['0'-'9']+ as n) { let pkeyword = with_cpos lexbuf (Keyword.Dollar (int_of_string n)) in parentheses openingpos (pkeyword :: pkeywords) lexbuf } | poskeyword { let pkeyword = mk_keyword lexbuf w f n id in parentheses openingpos (pkeyword :: pkeywords) lexbuf } | previouserror { let pkeyword = with_cpos lexbuf Keyword.PreviousError in parentheses openingpos (pkeyword :: pkeywords) lexbuf } | syntaxerror { let pkeyword = with_cpos lexbuf Keyword.SyntaxError in parentheses openingpos (pkeyword :: pkeywords) lexbuf } | '"' { string (lexeme_start_p lexbuf) lexbuf; parentheses openingpos pkeywords lexbuf } | "'" { char lexbuf; parentheses openingpos pkeywords lexbuf } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; parentheses openingpos pkeywords lexbuf } | newline { update_loc lexbuf; parentheses openingpos pkeywords lexbuf } | '}' | eof { error1 openingpos "unbalanced opening parenthesis." } | _ { parentheses openingpos pkeywords lexbuf } (* Skip O'Caml comments. Comments can be nested and can contain strings or characters, which must be correctly analyzed. (A string could contain begin-of-comment or end-of-comment sequences, which must be ignored; a character could contain a begin-of-string sequence.) *) and ocamlcomment openingpos = parse | "*)" { () } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf } | '"' { string (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf } | "'" { char lexbuf; ocamlcomment openingpos lexbuf } | newline { update_loc lexbuf; ocamlcomment openingpos lexbuf } | eof { error1 openingpos "unterminated Objective Caml comment." } | _ { ocamlcomment openingpos lexbuf } (* Skip O'Caml strings. *) and string openingpos = parse | '"' { () } | '\\' newline | newline { update_loc lexbuf; string openingpos lexbuf } | '\\' _ (* Upon finding a backslash, skip the character that follows, unless it is a newline. Pretty crude, but should work. *) { string openingpos lexbuf } | eof { error1 openingpos "unterminated Objective Caml string." } | _ { string openingpos lexbuf } (* Skip O'Caml characters. A lone quote character is legal inside a comment, so if we don't recognize the matching closing quote, we simply abandon. *) and char = parse | '\\'? newline "'" { update_loc lexbuf } | [^ '\\' '\''] "'" | '\\' _ "'" | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'" | "" { () } menhir-20130116/src/invariant.ml0000644000175000017500000007015712075533603015415 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module discovers information about the shape and content of the stack in each of the automaton's states. *) open Grammar open Conflict (* artificial dependency; ensures that [Conflict] runs first *) (* ------------------------------------------------------------------------ *) (* Compute a lower bound on the height of the stack at every state. At the same time, compute which symbols are held in this stack prefix. *) (* In order to compute (a lower bound on) the height of the stack at a state [s], we examine the LR(0) items that compose [s]. For each item, if the bullet is at position [pos], then we can be assured that the height of the stack is at least [pos]. Thus, we compute the maximum of [pos] over all items (of which there is at least one). *) (* The set of items that we use is not closed, but this does not matter; the items that would be added by the closure would not add any information regarding the height of the stack, since the bullet is at position 0 in these items. *) (* Instead of computing just the stack height, we compute, in the same manner, which symbols are on the stack at a state [s]. This is an array of symbols whose length is the height of the stack at [s]. By convention, the top of the stack is the end of the array. *) (* We first compute and tabulate this information at the level of the LR(0) automaton. *) let stack_symbols : Lr0.node -> Symbol.t array = let dummy = Array.create 0 (Symbol.T Terminal.sharp) in Misc.tabulate Lr0.n (fun node -> Item.Set.fold (fun item accu -> let prod, nt, rhs, pos, length = Item.def item in if pos > Array.length accu then Array.sub rhs 0 pos else accu ) (Lr0.items node) dummy ) (* Then, it is easy to extend it to the LR(1) automaton. *) let stack_symbols (node : Lr1.node) : Symbol.t array = stack_symbols (Lr0.core (Lr1.state node)) let stack_height (node : Lr1.node) : int = Array.length (stack_symbols node) (* ------------------------------------------------------------------------ *) (* Above, we have computed a prefix of the stack at every state. We have computed the length of this prefix and the symbols that are held in this prefix of the stack. Now, compute which states may be held in this prefix. *) (* In order to compute this information, we perform an analysis of the automaton, via a least fixed fixed point computation. *) (* It is worth noting that it would be possible to use an analysis based on a least fixed point computation to discover at the same time the length of the stack prefix, the symbols that it contains, and the states that it may contain. This alternate approach, which was used until 2012/08/25, would lead us to discovering a richer invariant, that is, potentially longer prefixes. This extra information, however, was useless; computing it was a waste of time. Hence, as of 2012/08/25, the height of the stack prefix and the symbols that it contains are predicted (see above), and the least fixed computation is used only to populate these prefixes of predictable length with state information. *) (* By the way, this least fixed point analysis remains the most costly computation throughout this module. *) (* Vectors of sets of states. *) module StateVector = struct type property = Lr1.NodeSet.t list let empty = [] let rec equal v1 v2 = match v1, v2 with | [], [] -> true | states1 :: v1, states2 :: v2 -> Lr1.NodeSet.equal states1 states2 && equal v1 v2 | _, _ -> (* Because all heights are known ahead of time, we are able to (and careful to) compare only vectors of equal length. *) assert false let rec join v1 v2 = match v1, v2 with | [], [] -> [] | states1 :: v1, states2 :: v2 -> Lr1.NodeSet.union states1 states2 :: join v1 v2 | _, _ -> (* Because all heights are known ahead of time, we are able to (and careful to) compare only vectors of equal length. *) assert false let push v x = x :: v let truncate h v = Misc.truncate h v end (* In order to perform the fixed point computation, we must extend our type of vectors with a bottom element. This element will not appear in the least fixed point, provided every state of the automaton is reachable. *) module StateLattice = struct type property = | Bottom | NonBottom of StateVector.property let bottom = Bottom let empty = NonBottom StateVector.empty let equal v1 v2 = match v1, v2 with | Bottom, Bottom -> true | NonBottom v1, NonBottom v2 -> StateVector.equal v1 v2 | _, _ -> false let join v1 v2 = match v1, v2 with | Bottom, v | v, Bottom -> v | NonBottom v1, NonBottom v2 -> NonBottom (StateVector.join v1 v2) let push v x = match v with | Bottom -> Bottom | NonBottom v -> NonBottom (StateVector.push v x) let truncate h v = match v with | Bottom -> Bottom | NonBottom v -> NonBottom (StateVector.truncate h v) let is_maximal _ = false end open StateLattice (* Define the fixed point. *) let stack_states : Lr1.node -> property = let module F = Fix.Make(Lr1.ImperativeNodeMap)(StateLattice) in F.lfp (fun node (get : Lr1.node -> property) -> (* We use the fact that a state has incoming transitions if and only if it is not a start state. *) match Lr1.incoming_symbol node with | None -> assert (Lr1.predecessors node = []); assert (stack_height node = 0); (* If [node] is a start state, then the stack at [node] may be (in fact, must be) the empty stack. *) empty | Some symbol -> (* If [node] is not a start state, then include the contribution of every incoming transition. We compute a join over all predecessors. The contribution of one predecessor is the abstract value found at this predecessor, extended with a new cell for this transition, and truncated to the stack height at [node], so as to avoid obtaining a vector that is longer than expected/necessary. *) let height = stack_height node in List.fold_left (fun v predecessor -> join v (truncate height (push (get predecessor) (Lr1.NodeSet.singleton predecessor)) ) ) bottom (Lr1.predecessors node) ) (* If every state is reachable, then the least fixed point must be non-bottom everywhere, so we may view it as a function that produces a vector of sets of states. *) let stack_states (node : Lr1.node) : StateVector.property = match stack_states node with | Bottom -> (* apparently this node is unreachable *) assert false | NonBottom v -> v (* ------------------------------------------------------------------------ *) (* For each production, compute where (that is, in which states) this production can be reduced. *) let production_where : Lr1.NodeSet.t ProductionMap.t = Lr1.fold (fun accu node -> TerminalMap.fold (fun _ prods accu -> let prod = Misc.single prods in let nodes = try ProductionMap.lookup prod accu with Not_found -> Lr1.NodeSet.empty in ProductionMap.add prod (Lr1.NodeSet.add node nodes) accu ) (Lr1.reductions node) accu ) ProductionMap.empty let production_where (prod : Production.index) : Lr1.NodeSet.t = try (* Production [prod] may be reduced at [nodes]. *) let nodes = ProductionMap.lookup prod production_where in assert (not (Lr1.NodeSet.is_empty nodes)); nodes with Not_found -> (* The production [prod] is never reduced. *) Lr1.NodeSet.empty let ever_reduced prod = not (Lr1.NodeSet.is_empty (production_where prod)) let fold_reduced f prod accu = Lr1.NodeSet.fold f (production_where prod) accu (* ------------------------------------------------------------------------ *) (* Warn about productions that are never reduced. *) let () = let count = ref 0 in Production.iter (fun prod -> if Lr1.NodeSet.is_empty (production_where prod) then match Production.classify prod with | Some nt -> incr count; Error.grammar_warning (Nonterminal.positions nt) (Printf.sprintf "symbol %s is never accepted." (Nonterminal.print false nt)) | None -> incr count; Error.grammar_warning (Production.positions prod) (Printf.sprintf "production %sis never reduced." (Production.print prod)) ); if !count > 0 then Error.grammar_warning [] (Printf.sprintf "in total, %d productions are never reduced." !count) (* ------------------------------------------------------------------------ *) (* From the above information, deduce, for each production, the states that may appear in the stack when this production is reduced. *) (* We are careful to produce a vector of states whose length is exactly that of the production [prod]. *) let production_states : Production.index -> StateLattice.property = Production.tabulate (fun prod -> let nodes = production_where prod in let height = Production.length prod in Lr1.NodeSet.fold (fun node accu -> join accu (truncate height (NonBottom (stack_states node)) ) ) nodes bottom ) (* ------------------------------------------------------------------------ *) (* We now determine which states must be represented, that is, explicitly pushed onto the stack. For simplicity, a state is either always represented or never represented. More fine-grained strategies, where a single state is sometimes pushed onto the stack and sometimes not pushed, depending on which outgoing transition is being taken, are conceivable, but quite tricky, and probably not worth the trouble. (1) If two states are liable to appear within a single stack cell, then one is represented if and only if the other is represented. This ensures that the structure of stacks is known everywhere and that we can propose types for stacks. (2) If a state [s] has an outgoing transition along nonterminal symbol [nt], and if the [goto] table for symbol [nt] has more than one target, then state [s] is represented. (3) If a stack cell contains more than one state and if at least one of these states is able to handle the [error] token, then these states are represented. (4) If the semantic action associated with a production mentions the [$syntaxerror] keyword, then the state that is being reduced to (that is, the state that initiated the recognition of this production) is represented. (Indeed, it will be passed as an argument to [errorcase].) *) (* Data. *) let rep : bool UnionFind.point array = Array.init Lr1.n (fun _ -> UnionFind.fresh false) (* Getter. *) let represented state = rep.(Lr1.number state) (* Setters. *) let represent state = UnionFind.change (represented state) true let represents states = represent (Lr1.NodeSet.choose states) (* Enforce condition (1) above. *) let share (v : StateVector.property) = List.iter (fun states -> let dummy = UnionFind.fresh false in Lr1.NodeSet.iter (fun state -> UnionFind.eunion dummy (represented state) ) states ) v let () = Lr1.iter (fun node -> share (stack_states node) ); Production.iter (fun prod -> match production_states prod with | Bottom -> () | NonBottom v -> share v ) (* Enforce condition (2) above. *) let () = Nonterminal.iter (fun nt -> let count = Lr1.targets (fun count _ _ -> count + 1 ) 0 (Symbol.N nt) in if count > 1 then Lr1.targets (fun () sources _ -> List.iter represent sources ) () (Symbol.N nt) ) (* Enforce condition (3) above. *) let handler state = try let _ = SymbolMap.find (Symbol.T Terminal.error) (Lr1.transitions state) in true with Not_found -> try let _ = TerminalMap.lookup Terminal.error (Lr1.reductions state) in true with Not_found -> false let handlers states = Lr1.NodeSet.exists handler states let () = Lr1.iter (fun node -> let v = stack_states node in List.iter (fun states -> if Lr1.NodeSet.cardinal states >= 2 && handlers states then represents states ) v ) (* Enforce condition (4) above. *) let () = Production.iterx (fun prod -> if Action.has_syntaxerror (Production.action prod) then match production_states prod with | Bottom -> () | NonBottom v -> let sites = production_where prod in let length = Production.length prod in if length = 0 then Lr1.NodeSet.iter represent sites else let states = List.nth v (length - 1) in represents states ) (* Define accessors. *) let represented state = UnionFind.find (represented state) let representeds states = if Lr1.NodeSet.is_empty states then assert false else represented (Lr1.NodeSet.choose states) (* Statistics. *) let () = Error.logC 1 (fun f -> let count = Lr1.fold (fun count node -> if represented node then count + 1 else count ) 0 in Printf.fprintf f "%d out of %d states are represented.\n" count Lr1.n ) (* ------------------------------------------------------------------------ *) (* Accessors for information about the stack. *) (* We describe a stack prefix as a list of cells, where each cell is a pair of a symbol and a set of states. The top of the stack is the head of the list. *) type cell = Symbol.t * Lr1.NodeSet.t type word = cell list (* This auxiliary function converts a stack-as-an-array (top of stack at the right end) to a stack-as-a-list (top of stack at list head). *) let convert a = let n = Array.length a in let rec loop i accu = if i = n then accu else loop (i + 1) (a.(i) :: accu) in loop 0 [] (* [stack s] describes the stack when the automaton is in state [s]. *) let stack node : word = List.combine (convert (stack_symbols node)) (stack_states node) (* [prodstack prod] describes the stack when production [prod] is about to be reduced. *) let prodstack prod : word = match production_states prod with | Bottom -> (* This production is never reduced. *) assert false | NonBottom v -> List.combine (convert (Production.rhs prod)) v (* [gotostack nt] is the structure of the stack when a shift transition over nonterminal [nt] is about to be taken. It consists of just one cell. *) let gotostack : Nonterminal.t -> word = Nonterminal.tabulate (fun nt -> let sources = Lr1.targets (fun accu sources _ -> List.fold_right Lr1.NodeSet.add sources accu ) Lr1.NodeSet.empty (Symbol.N nt) in [ Symbol.N nt, sources ] ) let fold f accu w = List.fold_right (fun (symbol, states) accu -> f accu (representeds states) symbol states ) w accu let fold_top f accu w = match w with | [] -> accu | (symbol, states) :: _ -> f (representeds states) symbol (* ------------------------------------------------------------------------ *) (* Explain how the stack should be deconstructed when an error is found. We sometimes have a choice as too how many stack cells should be popped. Indeed, several cells in the known suffix of the stack may physically hold a state. If neither of these states handles errors, then we could jump to either. (Indeed, if we jump to one that's nearer, it will in turn pop further stack cells and jump to one that's farther.) In the interests of code size, we should pop as few stack cells as possible. So, we jump to the topmost represented state in the known suffix. *) type state = | Represented | UnRepresented of Lr1.node type instruction = | Die | DownTo of word * state let rewind node : instruction = let w = stack node in let rec rewind w = match w with | [] -> (* I believe that every stack description either is definite (that is, ends with [TailEmpty]) or contains at least one represented state. Thus, if we find an empty [w], this means that the stack is definitely empty. *) Die | ((_, states) as cell) :: w -> if representeds states then (* Here is a represented state. We will pop this cell and no more. *) DownTo ([ cell ], Represented) else if handlers states then begin (* Here is an unrepresented state that can handle errors. The cell must hold a singleton set of states, so we know which state to jump to, even though it isn't represented. *) assert (Lr1.NodeSet.cardinal states = 1); let state = Lr1.NodeSet.choose states in DownTo ([ cell ], UnRepresented state) end else (* Here is an unrepresented state that does not handle errors. Pop this cell and look further. *) match rewind w with | Die -> Die | DownTo (w, st) -> DownTo (cell :: w, st) in rewind w (* ------------------------------------------------------------------------ *) (* We now determine which positions must be kept track of. For simplicity, we do this on a per symbol basis. That is, for each symbol, either we never keep track of position information, or we always do. In fact, we do distinguish start and end positions. This leads to computing two sets of symbols -- those that keep track of their start position and those that keep track of their end position. A symbol on the right-hand side of a production must keep track of its (start or end) position if that position is explicitly requested by a semantic action. Furthermore, if the left-hand symbol of a production must keep track of its start (resp. end) position, then the first (resp. last) symbol of its right-hand side (if there is one) must do so as well. That is, unless the right-hand side is empty. *) open Keyword let startp = ref SymbolSet.empty let endp = ref SymbolSet.empty let rec require where symbol = let wherep = match where with | WhereStart -> startp | WhereEnd -> endp in if not (SymbolSet.mem symbol !wherep) then begin wherep := SymbolSet.add symbol !wherep; match symbol with | Symbol.T _ -> () | Symbol.N nt -> Production.iternt nt (require_aux where) end and require_aux where prod = let nt, rhs = Production.def prod in let length = Array.length rhs in if length > 0 then match where with | WhereStart -> require where rhs.(0) | WhereEnd -> require where rhs.(length - 1) let () = Production.iterx (fun prod -> let rhs = Production.rhs prod and ids = Production.identifiers prod and action = Production.action prod in KeywordSet.iter (function | Dollar _ | PreviousError | SyntaxError -> () | Position (Left, where, _) -> require_aux where prod | Position (RightDollar i, where, _) -> require where rhs.(i - 1) | Position (RightNamed id, where, _) -> Array.iteri (fun i id' -> if id = id' then require where rhs.(i) ) ids ) (Action.keywords action) ) let startp = !startp let endp = !endp let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d symbols keep track of their start position.\n\ %d out of %d symbols keep track of their end position.\n" (SymbolSet.cardinal startp) (Terminal.n + Nonterminal.n) (SymbolSet.cardinal endp) (Terminal.n + Nonterminal.n)) let startp symbol = SymbolSet.mem symbol startp let endp symbol = SymbolSet.mem symbol endp (* ------------------------------------------------------------------------- *) (* Miscellaneous. *) let universal symbol = Lr1.fold (fun universal s -> universal && (if represented s then SymbolMap.mem symbol (Lr1.transitions s) else true) ) true (* ------------------------------------------------------------------------ *) (* Discover which states potentially can do error recovery. They are the states whose incoming symbol is [error]. At these states, [env.shifted] is zero, that is, no tokens have been successfully shifted since the last error token was shifted. We do not include in this definition the states where [env.shifted] *may be* zero. That would involve adding in all states reachable from the above states via reductions. However, error recovery will never be performed in these states. Indeed, imagine we shift an error token and enter a state that can do error recovery, according to the above definition. If, at this point, we consult the lookahead token [tok] and perform a reduction, then the new state that we reach is, by construction, able to act upon [tok], so no error recovery will be performed at that state, even though [env.shifted] is still zero. However, we must not perform default reductions at states that can do error recovery, otherwise we break this reasoning. If the option [--error-recovery] was not provided on the command line, then no states will perform error recovery. This makes things simpler (and saves some code) in the common case where people are not interested in error recovery. This also disables the warning about states that can do error recovery but do not accept the EOF token. *) let recoverers = if Settings.recovery then Lr1.fold (fun recoverers node -> match Lr1.incoming_symbol node with | Some (Symbol.T tok) when Terminal.equal tok Terminal.error -> Lr1.NodeSet.add node recoverers | _ -> recoverers ) Lr1.NodeSet.empty else Lr1.NodeSet.empty let recoverer node = Lr1.NodeSet.mem node recoverers (* ------------------------------------------------------------------------ *) (* Discover which states can peek at an error. These are the states where [env.shifted] may be -1, that is, where an error token may be on the stream. These are the states that are targets of a reduce action on [error]. *) (* 2012/08/25 I am optimizing this code, whose original version I found had quadratic complexity. The problem is as follows. We can easily iterate over all states to find which states [s] have a reduce action on error. What we must find out, then, is into which state [t] this reduce action takes us. This is not easy to predict, as it depends on the contents of the stack. The original code used an overapproximation, as follows: if the reduction concerns a production whose head symbol is [nt], then all of the states that have an incoming transition labeled [nt] are potential targets. The new version of the code below relies on the same approximation, but uses two successive loops instead of two nested loops. *) let errorpeekers = (* First compute a set of symbols [nt]... *) let nts : SymbolSet.t = Lr1.fold (fun nts node -> try let prods = TerminalMap.lookup Terminal.error (Lr1.reductions node) in let prod = Misc.single prods in let nt = Production.nt prod in SymbolSet.add (Symbol.N nt) nts with Not_found -> nts ) SymbolSet.empty in (* ... then compute the set of all target states of all transitions labeled by some symbol in the set [nt]. *) SymbolSet.fold (fun nt errorpeekers -> Lr1.targets (fun errorpeekers _ target -> Lr1.NodeSet.add target errorpeekers ) errorpeekers nt ) nts Lr1.NodeSet.empty let errorpeeker node = Lr1.NodeSet.mem node errorpeekers (* ------------------------------------------------------------------------ *) (* Here is how we check whether state [s] should have a default reduction. We check whether [s] has no outgoing shift transitions and only has one possible reduction action. In that case, we produce a default reduction action, that is, we perform reduction without consulting the lookahead token. This saves code, but can alter the parser's behavior in the presence of errors. A state that can perform error recovery (that is, a state whose incoming symbol is [error]) never performs a default reduction. This is explained above. Actually, we allow one exception: if the state has a single (reduction) action on "#", as explained in the next paragraph, then we perform this default reduction and do not allow error recovery to take place. Error recovery would not make much sense, since we believe we are at the end of file. The check for default actions subsumes the check for the case where [s] admits a reduce action with lookahead symbol "#". In that case, it must be the only possible action -- see [Lr1.default_conflict_resolution]. That is, we have reached a point where we have recognized a well-formed input and are now expecting an end-of-stream. In that case, performing reduction without looking at the next token is the right thing to do, since there should in fact be none. The state that we reduce to will also have the same property, and so on, so we will in fact end up rewinding the entire stack and accepting the input when the stack becomes empty. (New as of 2012/01/23.) A state where a shift/reduce conflict was solved in favor of neither (due to a use of the %nonassoc directive) must not perform a default reduction. Indeed, this would effectively mean that the failure that was requested by the user is forgotten and replaced with a reduction. This surprising behavior is present in ocamlyacc and was present in earlier versions of Menhir. See e.g. http://caml.inria.fr/mantis/view.php?id=5462 There is a chance that we might run into trouble if the ideas described in the above two paragraphs collide, that is, if we forbid a default reduction (due to a shift/reduce conflict solved by %nonassoc) in a node where we would like to have default reduction on "#". This situation seems unlikely to arise, so I will not do anything about it for the moment. (Furthermore, someone who uses precedence declarations is looking for trouble anyway.) 20120525: if [--canonical] has been specified, then we disallow default reductions on a normal token, because we do not want to introduce any spurious actions into the automaton. We do still allow default reductions on "#", since they are needed for the automaton to terminate properly. *) let (has_default_reduction : Lr1.node -> (Production.index * TerminalSet.t) option), hdrcount = Misc.tabulateo Lr1.number Lr1.fold Lr1.n (fun s -> if Lr1.forbid_default_reduction s then None else match ProductionMap.is_singleton (Lr1.invert (Lr1.reductions s)) with | Some (_, toks) as reduction when SymbolMap.purelynonterminal (Lr1.transitions s) -> if TerminalSet.mem Terminal.sharp toks then (* Perform default reduction on "#". *) reduction else if recoverer s then (* Do not perform default reduction. Allow error recovery. *) None else begin (* Perform default reduction, unless [--canonical] has been specified. *) match Settings.construction_mode with | Settings.ModeCanonical -> None | Settings.ModeInclusionOnly | Settings.ModePager -> reduction end | Some _ | None -> None ) let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d states have a default reduction.\n" hdrcount Lr1.n) (* ------------------------------------------------------------------------ *) let () = Time.tick "Constructing the invariant" (* ------------------------------------------------------------------------ *) (* If any fatal error was signaled up to this point, stop now. This may include errors signaled in the modules [lr1] and [invariant] by calling the function [Error.grammar_warning]. *) let () = if Error.errors() then exit 1 menhir-20130116/src/codeBits.ml0000644000175000017500000000747012075533603015154 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module provides a number of tiny functions that help produce [IL] code. *) open IL (* The unit type. *) let tunit = TypApp ("unit", []) (* The integer type. *) let tint = TypApp ("int", []) (* The string type. *) let tstring = TypApp ("string", []) (* The exception type. *) let texn = TypApp ("exn", []) (* The type of lexer positions. *) let tposition = TypApp ("Lexing.position", []) (* The type of lexer buffers. *) let tlexbuf = TypApp ("Lexing.lexbuf", []) (* The type of untyped semantic values. *) let tobj = TypApp ("Obj.t", []) (* Building a type variable. *) let tvar x : typ = TypVar x (* Building a type scheme. *) let scheme qs t = { quantifiers = qs; body = t } (* Building a type scheme with no quantifiers out of a type. *) let type2scheme t = scheme [] t let pat2var = function | PVar x -> x | _ -> assert false (* [simplify] removes bindings of the form [let v = v in ...] and [let _ = v in ...]. *) let rec simplify = function | [] -> [] | (PVar v1, EVar v2) :: bindings when v1 = v2 -> (* Avoid a useless let binding. *) simplify bindings | (PWildcard, EVar _) :: bindings -> (* Avoid a useless let binding. *) simplify bindings | binding :: bindings -> binding :: simplify bindings (* Building a [let] construct, with on-the-fly simplification. *) let rec blet (bindings, body) = match simplify bindings with | [] -> body | bindings -> ELet (bindings, body) let mlet formals actuals body = blet (List.combine formals actuals, body) (* [bottom] is an expression that has every type. Its semantics is irrelevant. *) let bottom = ERaise (EData ("Not_found", [])) (* Boolean constants. *) let efalse : expr = EData ("false", []) let etrue : expr = EData ("true", []) let eboolconst b = if b then etrue else efalse (* These help build function types. *) let arrow typ body : typ = TypArrow (typ, body) let arrowif flag typ body : typ = if flag then arrow typ body else body let marrow typs body : typ = List.fold_right arrow typs body (* ------------------------------------------------------------------------ *) (* Here is a bunch of naming conventions. Our names are chosen to minimize the likelihood that a name in a semantic action is captured. In other words, all global definitions as well as the parameters to [reduce] are given far-fetched names, unless [--no-prefix] was specified. Note that the prefix must begin with '_'. This allows avoiding warnings about unused variables with ocaml 3.09 and later. *) let prefix name = if Settings.noprefix then name else "_menhir_" ^ name let dataprefix name = if Settings.noprefix then name else "Menhir" ^ name let tvprefix name = if Settings.noprefix then name else "ttv_" ^ name menhir-20130116/src/time.ml0000644000175000017500000000332012075533603014344 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) let channel = stderr open Unix open Printf let clock = ref (times()) let tick msg = if Settings.timings then let times1 = !clock in let times2 = times() in fprintf channel "%s: %.02fs\n%!" msg (times2.tms_utime -. times1.tms_utime); clock := times() type chrono = float ref let fresh () = ref 0. let chrono (chrono : float ref) (task : unit -> 'a) : 'a = if Settings.timings then begin let times1 = times() in let result = task() in let times2 = times() in chrono := !chrono +. times2.tms_utime -. times1.tms_utime; result end else task() let display (chrono : float ref) msg = if Settings.timings then fprintf channel "%s: %.02fs\n" msg !chrono menhir-20130116/src/engine.mli0000644000175000017500000000230112075533603015022 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) open EngineTypes (* The LR parsing engine. *) module Make (T : TABLE) : ENGINE with type state = T.state and type token = T.token and type semantic_value = T.semantic_value menhir-20130116/src/tableInterpreter.ml0000644000175000017500000001175212075533603016731 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This module instantiates the generic [Engine] with a thin decoding layer for the generated tables. Like [Engine], it is part of [MenhirLib]. *) (* The exception [Accept] is pre-declared here: this obviates the need for generating its definition. The exception [Error] is declared within the generated parser. This is preferable to pre-declaring it here, as it ensures that each parser gets its own, distinct [Error] exception. This is consistent with the code-based back-end. *) exception Accept of Obj.t (* This functor is invoked by the generated parser. *) module Make (T : TableFormat.TABLES) = Engine.Make (struct type state = int type token = T.token type terminal = int type semantic_value = Obj.t let token2terminal = T.token2terminal let token2value = T.token2value let error_terminal = T.error_terminal let error_value = Obj.repr () type production = int let default_reduction state defred nodefred env = let code = PackedIntArray.get T.default_reduction state in if code = 0 then nodefred env else defred env (code - 1) (* This auxiliary function helps access a compressed, two-dimensional matrix, like the action and goto tables. *) let unmarshal2 table i j = RowDisplacement.getget PackedIntArray.get PackedIntArray.get table i j (* This auxiliary function helps access a flattened, two-dimensional matrix, like the error bitmap. *) let unflatten (n, data) i j = PackedIntArray.get1 data (n * i + j) let action state terminal value shift reduce fail env = match unflatten T.error state terminal with | 1 -> let action = unmarshal2 T.action state terminal in let opcode = action land 0b11 and param = action lsr 2 in if opcode >= 0b10 then (* 0b10 : shift/discard *) (* 0b11 : shift/nodiscard *) let please_discard = (opcode = 0b10) in shift env please_discard terminal value param else (* 0b01 : reduce *) (* 0b00 : cannot happen *) reduce env param | c -> assert (c = 0); fail env let goto state prod = let code = unmarshal2 T.goto state (PackedIntArray.get T.lhs prod) in (* code = 1 + state *) code - 1 exception Accept = Accept exception Error = T.Error type semantic_action = (state, semantic_value, token) EngineTypes.env -> unit let semantic_action prod = T.semantic_action.(prod) let recovery = T.recovery module Log = struct open Printf let state state = match T.trace with | Some _ -> fprintf stderr "State %d:\n%!" state | None -> () let shift terminal state = match T.trace with | Some (terminals, _) -> fprintf stderr "Shifting (%s) to state %d\n%!" terminals.(terminal) state | None -> () let reduce_or_accept prod = match T.trace with | Some (_, productions) -> fprintf stderr "%s\n%!" productions.(prod) | None -> () let lookahead_token lexbuf token = match T.trace with | Some (terminals, _) -> fprintf stderr "Lookahead token is now %s (%d-%d)\n%!" terminals.(token) lexbuf.Lexing.lex_start_p.Lexing.pos_cnum lexbuf.Lexing.lex_curr_p.Lexing.pos_cnum | None -> () let initiating_error_handling () = match T.trace with | Some _ -> fprintf stderr "Initiating error handling\n%!" | None -> () let resuming_error_handling () = match T.trace with | Some _ -> fprintf stderr "Resuming error handling\n%!" | None -> () let handling_error state = match T.trace with | Some _ -> fprintf stderr "Handling error in state %d\n%!" state | None -> () let discarding_last_token token = match T.trace with | Some (terminals, _) -> fprintf stderr "Discarding last token read (%s)\n%!" terminals.(token) | None -> () end end) menhir-20130116/src/reachability.ml0000644000175000017500000000431312075533603016051 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open UnparameterizedSyntax open Syntax let rec visit grammar visited symbol = try let rule = StringMap.find symbol grammar.rules in if not (StringSet.mem symbol visited) then let visited = StringSet.add symbol visited in List.fold_left (visitb grammar) visited rule.branches else visited with Not_found -> (* This is a terminal symbol. *) assert (symbol = "error" || StringMap.mem symbol grammar.tokens); visited and visitb grammar visited { producers = symbols } = List.fold_left (visits grammar) visited symbols and visits grammar visited (symbol, _) = visit grammar visited symbol let trim grammar = if StringSet.cardinal grammar.start_symbols = 0 then Error.error [] "no start symbol has been declared." else let reachable = StringSet.fold (fun symbol visited -> visit grammar visited symbol ) grammar.start_symbols StringSet.empty in StringMap.iter (fun symbol rule -> if not (StringSet.mem symbol reachable) then Error.grammar_warning rule.positions (Printf.sprintf "symbol %s is unreachable from any of the start symbol(s)." symbol) ) grammar.rules; { grammar with rules = StringMap.restrict reachable grammar.rules } menhir-20130116/src/tableInterpreter.mli0000644000175000017500000000333412075533603017077 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This module instantiates the generic [Engine] with a thin decoding layer for the generated tables. Like [Engine], it is part of [MenhirLib]. *) (* The exception [Accept] is pre-declared here: this obviates the need for generating its definition. The exception [Error] is declared within the generated parser. This is preferable to pre-declaring it here, as it ensures that each parser gets its own, distinct [Error] exception. This is consistent with the code-based back-end. *) exception Accept of Obj.t (* This functor is invoked by the generated parser. *) module Make (T : TableFormat.TABLES) : EngineTypes.ENGINE with type state = int and type token = T.token and type semantic_value = Obj.t menhir-20130116/src/codeBackend.mli0000644000175000017500000000212212075533602015737 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* The (code-based) code generator. *) module Run (T : sig end) : sig val program: IL.program end menhir-20130116/src/time.mli0000644000175000017500000000271712075533603014526 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* Call [tick msg] to stop timing a task and start timing the next task. A message is displayed. The message includes [msg] as well as timing information. The very first task is deemed to begin when this module is initialized. *) val tick: string -> unit (* Another timing method, with separate chronometers; useful for more precise profiling. *) type chrono val fresh: unit -> chrono val chrono: chrono -> (unit -> 'a) -> 'a val display: chrono -> string -> unit menhir-20130116/src/patricia.ml0000644000175000017500000010024512075533603015206 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This is an implementation of Patricia trees, following Chris Okasaki's paper at the 1998 ML Workshop in Baltimore. Both big-endian and little-endian trees are provided. Both sets and maps are implemented on top of Patricia trees. *) (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Little-endian vs big-endian trees} *) (* A tree is little-endian if it expects the key's least significant bits to be tested first during a search. It is big-endian if it expects the key's most significant bits to be tested first. Most of the code is independent of this design choice, so it is written as a functor, parameterized by a small structure which defines endianness. Here is the interface which must be adhered to by such a structure. *) module Endianness = struct module type S = sig (* A mask is an integer with a single one bit (i.e. a power of 2). *) type mask = int (* [branching_bit] accepts two distinct integers and returns a mask which identifies the first bit where they differ. The meaning of ``first'' varies according to the endianness being implemented. *) val branching_bit: int -> int -> mask (* [mask i m] returns an integer [i'], where all bits which [m] says are relevant are identical to those in [i], and all others are set to some unspecified, but fixed value. Which bits are ``relevant'' according to a given mask varies according to the endianness being implemented. *) val mask: int -> mask -> int (* [shorter m1 m2] returns [true] if and only if [m1] describes a shorter prefix than [m2], i.e. if it makes fewer bits relevant. Which bits are ``relevant'' according to a given mask varies according to the endianness being implemented. *) val shorter: mask -> mask -> bool end (* Now, let us define [Little] and [Big], two possible [Endiannness] choices. *) module Little = struct type mask = int let lowest_bit x = x land (-x) (* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1] show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is the lowest bit in this bit field, since we are checking least significant bits first. *) let branching_bit i0 i1 = lowest_bit (i0 lxor i1) (* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the right of the single one bit in the mask [m]. We keep these bits, and set all others to 0. *) let mask i m = i land (m-1) (* The smaller [m] is, the fewer bits are relevant. *) let shorter = (<) end module Big = struct type mask = int let lowest_bit x = x land (-x) let rec highest_bit x = let m = lowest_bit x in if x = m then m else highest_bit (x - m) (* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1] show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is the highest bit in this bit field, since we are checking most significant bits first. In Okasaki's paper, this loop is sped up by computing a conservative initial guess. Indeed, the bit at which the two prefixes disagree must be somewhere within the shorter prefix, so we can begin searching at the least-significant valid bit in the shorter prefix. Unfortunately, to allow computing the initial guess, the main code has to pass in additional parameters, e.g. a mask which describes the length of each prefix. This ``pollutes'' the endianness-independent code. For this reason, this optimization isn't implemented here. *) let branching_bit i0 i1 = highest_bit (i0 lxor i1) (* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the left of the single one bit in the mask [m]. We keep these bits, and set all others to 0. Okasaki uses a different convention, which allows big-endian Patricia trees to masquerade as binary search trees. This feature does not seem to be useful here. *) let mask i m = i land (lnot (2*m-1)) (* The smaller [m] is, the more bits are relevant. *) let shorter = (>) end end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Patricia-tree-based maps} *) module Make (X : Endianness.S) = struct (* Patricia trees are maps whose keys are integers. *) type key = int (* A tree is either empty, or a leaf node, containing both the integer key and a piece of data, or a binary node. Each binary node carries two integers. The first one is the longest common prefix of all keys in this sub-tree. The second integer is the branching bit. It is an integer with a single one bit (i.e. a power of 2), which describes the bit being tested at this node. *) type 'a t = | Empty | Leaf of int * 'a | Branch of int * X.mask * 'a t * 'a t (* The empty map. *) let empty = Empty (* [choose m] returns an arbitrarily chosen binding in [m], if [m] is nonempty, and raises [Not_found] otherwise. *) let rec choose = function | Empty -> raise Not_found | Leaf (key, data) -> key, data | Branch (_, _, tree0, _) -> choose tree0 (* [lookup k m] looks up the value associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. This implementation takes branches \emph{without} checking whether the key matches the prefix found at the current node. This means that a query for a non-existent key shall be detected only when finally reaching a leaf, rather than higher up in the tree. This strategy is better when (most) queries are expected to be successful. *) let rec lookup key = function | Empty -> raise Not_found | Leaf (key', data) -> if key = key' then data else raise Not_found | Branch (_, mask, tree0, tree1) -> lookup key (if (key land mask) = 0 then tree0 else tree1) let find = lookup (* [mem k m] tells whether the key [k] appears in the domain of the map [m]. *) let mem k m = try let _ = lookup k m in true with Not_found -> false (* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree. Assume $t_0$ and $t_1$ are non-empty trees, with longest common prefixes $p_0$ and $p_1$, respectively. Further, suppose that $p_0$ and $p_1$ disagree, that is, neither prefix is contained in the other. Then, no matter how large $t_0$ and $t_1$ are, we can merge them simply by creating a new [Branch] node that has $t_0$ and $t_1$ as children! *) let join p0 t0 p1 t1 = let m = X.branching_bit p0 p1 in let p = X.mask p0 (* for instance *) m in if (p0 land m) = 0 then Branch(p, m, t0, t1) else Branch(p, m, t1, t0) (* The auxiliary function [match_prefix] tells whether a given key has a given prefix. More specifically, [match_prefix k p m] returns [true] if and only if the key [k] has prefix [p] up to bit [m]. Throughout our implementation of Patricia trees, prefixes are assumed to be in normal form, i.e. their irrelevant bits are set to some predictable value. Formally, we assume [X.mask p m] equals [p] whenever [p] is a prefix with [m] relevant bits. This allows implementing [match_prefix] using only one call to [X.mask]. On the other hand, this requires normalizing prefixes, as done e.g. in [join] above, where [X.mask p0 m] has to be used instead of [p0]. *) let match_prefix k p m = X.mask k m = p (* [fine_add decide k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding from [k] to [d0] already exists, then the resulting map contains a binding from [k] to [decide d0 d]. *) type 'a decision = 'a -> 'a -> 'a exception Unchanged let basic_add decide k d m = let rec add t = match t with | Empty -> Leaf (k, d) | Leaf (k0, d0) -> if k = k0 then let d' = decide d0 d in if d' == d0 then raise Unchanged else Leaf (k, d') else join k (Leaf (k, d)) k0 t | Branch (p, m, t0, t1) -> if match_prefix k p m then if (k land m) = 0 then Branch (p, m, add t0, t1) else Branch (p, m, t0, add t1) else join k (Leaf (k, d)) p t in add m let strict_add k d m = basic_add (fun _ _ -> raise Unchanged) k d m let fine_add decide k d m = try basic_add decide k d m with Unchanged -> m (* [add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding already exists for [k], it is overridden. *) let add k d m = fine_add (fun old_binding new_binding -> new_binding) k d m (* [singleton k d] returns a map whose only binding is from [k] to [d]. *) let singleton k d = Leaf (k, d) (* [is_singleton m] returns [Some (k, d)] if [m] is a singleton map that maps [k] to [d]. Otherwise, it returns [None]. *) let is_singleton = function | Leaf (k, d) -> Some (k, d) | Empty | Branch _ -> None (* [is_empty m] returns [true] if and only if the map [m] defines no bindings at all. *) let is_empty = function | Empty -> true | Leaf _ | Branch _ -> false (* [cardinal m] returns [m]'s cardinal, that is, the number of keys it binds, or, in other words, its domain's cardinal. *) let rec cardinal = function | Empty -> 0 | Leaf _ -> 1 | Branch (_, _, t0, t1) -> cardinal t0 + cardinal t1 (* [remove k m] returns the map [m] deprived from any binding involving [k]. *) let remove key m = let rec remove = function | Empty -> raise Not_found | Leaf (key', _) -> if key = key' then Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (key land mask) = 0 then match remove tree0 with | Empty -> tree1 | tree0 -> Branch (prefix, mask, tree0, tree1) else match remove tree1 with | Empty -> tree0 | tree1 -> Branch (prefix, mask, tree0, tree1) in try remove m with Not_found -> m (* [lookup_and_remove k m] looks up the value [v] associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. The call returns the value [v], together with the map [m] deprived from the binding from [k] to [v]. *) let rec lookup_and_remove key = function | Empty -> raise Not_found | Leaf (key', data) -> if key = key' then data, Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (key land mask) = 0 then match lookup_and_remove key tree0 with | data, Empty -> data, tree1 | data, tree0 -> data, Branch (prefix, mask, tree0, tree1) else match lookup_and_remove key tree1 with | data, Empty -> data, tree0 | data, tree1 -> data, Branch (prefix, mask, tree0, tree1) let find_and_remove = lookup_and_remove (* [fine_union decide m1 m2] returns the union of the maps [m1] and [m2]. If a key [k] is bound to [x1] (resp. [x2]) within [m1] (resp. [m2]), then [decide] is called. It is passed [x1] and [x2], and must return the value which shall be bound to [k] in the final map. The operation returns [m2] itself (as opposed to a copy of it) when its result is equal to [m2]. *) let reverse decision elem1 elem2 = decision elem2 elem1 let fine_union decide m1 m2 = let rec union s t = match s, t with | Empty, _ -> t | (Leaf _ | Branch _), Empty -> s | Leaf(key, value), _ -> fine_add (reverse decide) key value t | Branch _, Leaf(key, value) -> fine_add decide key value s | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then (* The trees have the same prefix. Merge their sub-trees. *) let u0 = union s0 t0 and u1 = union s1 t1 in if t0 == u0 && t1 == u1 then t else Branch(p, m, u0, u1) else if (X.shorter m n) & (match_prefix q p m) then (* [q] contains [p]. Merge [t] with a sub-tree of [s]. *) if (q land m) = 0 then Branch(p, m, union s0 t, s1) else Branch(p, m, s0, union s1 t) else if (X.shorter n m) & (match_prefix p q n) then (* [p] contains [q]. Merge [s] with a sub-tree of [t]. *) if (p land n) = 0 then let u0 = union s t0 in if t0 == u0 then t else Branch(q, n, u0, t1) else let u1 = union s t1 in if t1 == u1 then t else Branch(q, n, t0, u1) else (* The prefixes disagree. *) join p s q t in union m1 m2 (* [union m1 m2] returns the union of the maps [m1] and [m2]. Bindings in [m2] take precedence over those in [m1]. *) let union m1 m2 = fine_union (fun d d' -> d') m1 m2 (* [iter f m] invokes [f k x], in turn, for each binding from key [k] to element [x] in the map [m]. Keys are presented to [f] according to some unspecified, but fixed, order. *) let rec iter f = function | Empty -> () | Leaf (key, data) -> f key data | Branch (_, _, tree0, tree1) -> iter f tree0; iter f tree1 (* [fold f m seed] invokes [f k d accu], in turn, for each binding from key [k] to datum [d] in the map [m]. Keys are presented to [f] in increasing order according to the map's ordering. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. *) let rec fold f m accu = match m with | Empty -> accu | Leaf (key, data) -> f key data accu | Branch (_, _, tree0, tree1) -> fold f tree1 (fold f tree0 accu) (* [fold_rev] performs exactly the same job as [fold], but presents keys to [f] in the opposite order. *) let rec fold_rev f m accu = match m with | Empty -> accu | Leaf (key, data) -> f key data accu | Branch (_, _, tree0, tree1) -> fold_rev f tree0 (fold_rev f tree1 accu) (* It is valid to evaluate [iter2 f m1 m2] if and only if [m1] and [m2] have the same domain. Doing so invokes [f k x1 x2], in turn, for each key [k] bound to [x1] in [m1] and to [x2] in [m2]. Bindings are presented to [f] according to some unspecified, but fixed, order. *) let rec iter2 f t1 t2 = match t1, t2 with | Empty, Empty -> () | Leaf (key1, data1), Leaf (key2, data2) -> assert (key1 = key2); f key1 (* for instance *) data1 data2 | Branch (p1, m1, left1, right1), Branch (p2, m2, left2, right2) -> assert (p1 = p2); assert (m1 = m2); iter2 f left1 left2; iter2 f right1 right2 | _, _ -> assert false (* [map f m] returns the map obtained by composing the map [m] with the function [f]; that is, the map $k\mapsto f(m(k))$. *) let rec map f = function | Empty -> Empty | Leaf (key, data) -> Leaf(key, f data) | Branch (p, m, tree0, tree1) -> Branch (p, m, map f tree0, map f tree1) (* [endo_map] is similar to [map], but attempts to physically share its result with its input. This saves memory when [f] is the identity function. *) let rec endo_map f tree = match tree with | Empty -> tree | Leaf (key, data) -> let data' = f data in if data == data' then tree else Leaf(key, data') | Branch (p, m, tree0, tree1) -> let tree0' = endo_map f tree0 in let tree1' = endo_map f tree1 in if (tree0' == tree0) & (tree1' == tree1) then tree else Branch (p, m, tree0', tree1') (* [iterator m] returns a stateful iterator over the map [m]. *) (* TEMPORARY performance could be improved, see JCF's paper *) let iterator m = let remainder = ref [ m ] in let rec next () = match !remainder with | [] -> None | Empty :: parent -> remainder := parent; next() | (Leaf (key, data)) :: parent -> remainder := parent; Some (key, data) | (Branch(_, _, s0, s1)) :: parent -> remainder := s0 :: s1 :: parent; next () in next (* If [dcompare] is an ordering over data, then [compare dcompare] is an ordering over maps. *) exception Got of int let compare dcompare m1 m2 = let iterator2 = iterator m2 in try iter (fun key1 data1 -> match iterator2() with | None -> raise (Got 1) | Some (key2, data2) -> let c = Pervasives.compare key1 key2 in if c <> 0 then raise (Got c) else let c = dcompare data1 data2 in if c <> 0 then raise (Got c) ) m1; match iterator2() with | None -> 0 | Some _ -> -1 with Got c -> c (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Patricia-tree-based sets} *) (* To enhance code sharing, it would be possible to implement maps as sets of pairs, or (vice-versa) to implement sets as maps to the unit element. However, both possibilities introduce some space and time inefficiency. To avoid it, we define each structure separately. *) module Domain = struct type element = int type t = | Empty | Leaf of int | Branch of int * X.mask * t * t (* The empty set. *) let empty = Empty (* [is_empty s] returns [true] if and only if the set [s] is empty. *) let is_empty = function | Empty -> true | Leaf _ | Branch _ -> false (* [singleton x] returns a set whose only element is [x]. *) let singleton x = Leaf x (* [is_singleton s] returns [Some x] if [s] is a singleton containing [x] as its only element; otherwise, it returns [None]. *) let is_singleton = function | Leaf x -> Some x | Empty | Branch _ -> None (* [choose s] returns an arbitrarily chosen element of [s], if [s] is nonempty, and raises [Not_found] otherwise. *) let rec choose = function | Empty -> raise Not_found | Leaf x -> x | Branch (_, _, tree0, _) -> choose tree0 (* [cardinal s] returns [s]'s cardinal. *) let rec cardinal = function | Empty -> 0 | Leaf _ -> 1 | Branch (_, _, t0, t1) -> cardinal t0 + cardinal t1 (* [mem x s] returns [true] if and only if [x] appears in the set [s]. *) let rec mem x = function | Empty -> false | Leaf x' -> x = x' | Branch (_, mask, tree0, tree1) -> mem x (if (x land mask) = 0 then tree0 else tree1) (* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree. *) let join p0 t0 p1 t1 = let m = X.branching_bit p0 p1 in let p = X.mask p0 (* for instance *) m in if (p0 land m) = 0 then Branch(p, m, t0, t1) else Branch(p, m, t1, t0) (* [add x s] returns a set whose elements are all elements of [s], plus [x]. *) exception Unchanged let rec strict_add x t = match t with | Empty -> Leaf x | Leaf x0 -> if x = x0 then raise Unchanged else join x (Leaf x) x0 t | Branch (p, m, t0, t1) -> if match_prefix x p m then if (x land m) = 0 then Branch (p, m, strict_add x t0, t1) else Branch (p, m, t0, strict_add x t1) else join x (Leaf x) p t let add x s = try strict_add x s with Unchanged -> s (* [make2 x y] creates a set whose elements are [x] and [y]. [x] and [y] need not be distinct. *) let make2 x y = add x (Leaf y) (* [fine_add] does not make much sense for sets of integers. Better warn the user. *) type decision = int -> int -> int let fine_add decision x s = assert false (* [remove x s] returns a set whose elements are all elements of [s], except [x]. *) let remove x s = let rec strict_remove = function | Empty -> raise Not_found | Leaf x' -> if x = x' then Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (x land mask) = 0 then match strict_remove tree0 with | Empty -> tree1 | tree0 -> Branch (prefix, mask, tree0, tree1) else match strict_remove tree1 with | Empty -> tree0 | tree1 -> Branch (prefix, mask, tree0, tree1) in try strict_remove s with Not_found -> s (* [union s1 s2] returns the union of the sets [s1] and [s2]. *) let rec union s t = match s, t with | Empty, _ -> t | _, Empty -> s | Leaf x, _ -> add x t | _, Leaf x -> add x s | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then (* The trees have the same prefix. Merge their sub-trees. *) let u0 = union s0 t0 and u1 = union s1 t1 in if t0 == u0 && t1 == u1 then t else Branch(p, m, u0, u1) else if (X.shorter m n) & (match_prefix q p m) then (* [q] contains [p]. Merge [t] with a sub-tree of [s]. *) if (q land m) = 0 then Branch(p, m, union s0 t, s1) else Branch(p, m, s0, union s1 t) else if (X.shorter n m) & (match_prefix p q n) then (* [p] contains [q]. Merge [s] with a sub-tree of [t]. *) if (p land n) = 0 then let u0 = union s t0 in if t0 == u0 then t else Branch(q, n, u0, t1) else let u1 = union s t1 in if t1 == u1 then t else Branch(q, n, t0, u1) else (* The prefixes disagree. *) join p s q t (* [fine_union] does not make much sense for sets of integers. Better warn the user. *) let fine_union decision s1 s2 = assert false (* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures that the newly created node does not have an [Empty] child. *) let build p m t0 t1 = match t0, t1 with | Empty, Empty -> Empty | Empty, _ -> t1 | _, Empty -> t0 | _, _ -> Branch(p, m, t0, t1) (* [diff s t] returns the set difference of [s] and [t], that is, $s\setminus t$. *) let rec diff s t = match s, t with | Empty, _ | _, Empty -> s | Leaf x, _ -> if mem x t then Empty else s | _, Leaf x -> remove x s | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then (* The trees have the same prefix. Compute the differences of their sub-trees. *) build p m (diff s0 t0) (diff s1 t1) else if (X.shorter m n) & (match_prefix q p m) then (* [q] contains [p]. Subtract [t] off a sub-tree of [s]. *) if (q land m) = 0 then build p m (diff s0 t) s1 else build p m s0 (diff s1 t) else if (X.shorter n m) & (match_prefix p q n) then (* [p] contains [q]. Subtract a sub-tree of [t] off [s]. *) diff s (if (p land n) = 0 then t0 else t1) else (* The prefixes disagree. *) s (* [inter s t] returns the set intersection of [s] and [t], that is, $s\cap t$. *) let rec inter s t = match s, t with | Empty, _ | _, Empty -> Empty | (Leaf x as s), t | t, (Leaf x as s) -> if mem x t then s else Empty | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then (* The trees have the same prefix. Compute the intersections of their sub-trees. *) build p m (inter s0 t0) (inter s1 t1) else if (X.shorter m n) & (match_prefix q p m) then (* [q] contains [p]. Intersect [t] with a sub-tree of [s]. *) inter (if (q land m) = 0 then s0 else s1) t else if (X.shorter n m) & (match_prefix p q n) then (* [p] contains [q]. Intersect [s] with a sub-tree of [t]. *) inter s (if (p land n) = 0 then t0 else t1) else (* The prefixes disagree. *) Empty (* [disjoint s1 s2] returns [true] if and only if the sets [s1] and [s2] are disjoint, i.e. iff their intersection is empty. It is a specialized version of [inter], which uses less space. *) exception NotDisjoint let disjoint s t = let rec inter s t = match s, t with | Empty, _ | _, Empty -> () | Leaf x, _ -> if mem x t then raise NotDisjoint | _, Leaf x -> if mem x s then raise NotDisjoint | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then begin inter s0 t0; inter s1 t1 end else if (X.shorter m n) & (match_prefix q p m) then inter (if (q land m) = 0 then s0 else s1) t else if (X.shorter n m) & (match_prefix p q n) then inter s (if (p land n) = 0 then t0 else t1) else () in try inter s t; true with NotDisjoint -> false (* [iter f s] invokes [f x], in turn, for each element [x] of the set [s]. Elements are presented to [f] according to some unspecified, but fixed, order. *) let rec iter f = function | Empty -> () | Leaf x -> f x | Branch (_, _, tree0, tree1) -> iter f tree0; iter f tree1 (* [fold f s seed] invokes [f x accu], in turn, for each element [x] of the set [s]. Elements are presented to [f] according to some unspecified, but fixed, order. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. In other words, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then [fold f s seed] computes $([f]\,x_n\,\ldots\,([f]\,x_2\,([f]\,x_1\,[seed]))\ldots)$. *) let rec fold f s accu = match s with | Empty -> accu | Leaf x -> f x accu | Branch (_, _, s0, s1) -> fold f s1 (fold f s0 accu) (* [elements s] is a list of all elements in the set [s]. *) let elements s = fold (fun tl hd -> tl :: hd) s [] (* [fold_rev] performs exactly the same job as [fold], but presents elements to [f] in the opposite order. *) let rec fold_rev f s accu = match s with | Empty -> accu | Leaf x -> f x accu | Branch (_, _, s0, s1) -> fold_rev f s0 (fold_rev f s1 accu) (* [iter2] does not make much sense for sets of integers. Better warn the user. *) let rec iter2 f t1 t2 = assert false (* [iterator s] returns a stateful iterator over the set [s]. That is, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then [iterator s] is a function which, when invoked for the $k^{\text{th}}$ time, returns [Some]$x_k$, if $k\leq n$, and [None] otherwise. Such a function can be useful when one wishes to iterate over a set's elements, without being restricted by the call stack's discipline. For more comments about this algorithm, please see module [Baltree], which defines a similar one. *) let iterator s = let remainder = ref [ s ] in let rec next () = match !remainder with | [] -> None | Empty :: parent -> remainder := parent; next() | (Leaf x) :: parent -> remainder := parent; Some x | (Branch(_, _, s0, s1)) :: parent -> remainder := s0 :: s1 :: parent; next () in next (* [exists p s] returns [true] if and only if some element of [s] matches the predicate [p]. *) exception Exists let exists p s = try iter (fun x -> if p x then raise Exists ) s; false with Exists -> true (* [compare] is an ordering over sets. *) exception Got of int let compare s1 s2 = let iterator2 = iterator s2 in try iter (fun x1 -> match iterator2() with | None -> raise (Got 1) | Some x2 -> let c = Pervasives.compare x1 x2 in if c <> 0 then raise (Got c) ) s1; match iterator2() with | None -> 0 | Some _ -> -1 with Got c -> c (* [equal] implements equality over sets. *) let equal s1 s2 = compare s1 s2 = 0 (* [subset] implements the subset predicate over sets. In other words, [subset s t] returns [true] if and only if $s\subseteq t$. It is a specialized version of [diff]. *) exception NotSubset let subset s t = let rec diff s t = match s, t with | Empty, _ -> () | _, Empty | Branch _, Leaf _ -> raise NotSubset | Leaf x, _ -> if not (mem x t) then raise NotSubset | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then begin diff s0 t0; diff s1 t1 end else if (X.shorter n m) & (match_prefix p q n) then diff s (if (p land n) = 0 then t0 else t1) else (* Either [q] contains [p], which means at least one of [s]'s sub-trees is not contained within [t], or the prefixes disagree. In either case, the subset relationship cannot possibly hold. *) raise NotSubset in try diff s t; true with NotSubset -> false (* [filter p s] returns the subset of [s] formed by all elements which satisfy the predicate [p]. *) let filter predicate s = let modified = ref false in let subset = fold (fun element subset -> if predicate element then add element subset else begin modified := true; subset end ) s Empty in if !modified then subset else s (* [map f s] computes the image of [s] through [f]. *) let map f s = fold (fun element accu -> add (f element) accu ) s Empty (* [monotone_map] and [endo_map] do not make much sense for sets of integers. Better warn the user. *) let monotone_map f s = assert false let endo_map f s = assert false end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Relating sets and maps} *) (* Back to the world of maps. Let us now describe the relationship which exists between maps and their domains. *) (* [domain m] returns [m]'s domain. *) let rec domain = function | Empty -> Domain.Empty | Leaf (k, _) -> Domain.Leaf k | Branch (p, m, t0, t1) -> Domain.Branch (p, m, domain t0, domain t1) (* [lift f s] returns the map $k\mapsto f(k)$, where $k$ ranges over a set of keys [s]. *) let rec lift f = function | Domain.Empty -> Empty | Domain.Leaf k -> Leaf (k, f k) | Domain.Branch (p, m, t0, t1) -> Branch(p, m, lift f t0, lift f t1) (* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures that the newly created node does not have an [Empty] child. *) let build p m t0 t1 = match t0, t1 with | Empty, Empty -> Empty | Empty, _ -> t1 | _, Empty -> t0 | _, _ -> Branch(p, m, t0, t1) (* [corestrict m d] performs a co-restriction of the map [m] to the domain [d]. That is, it returns the map $k\mapsto m(k)$, where $k$ ranges over all keys bound in [m] but \emph{not} present in [d]. Its code resembles [diff]'s. *) let rec corestrict s t = match s, t with | Empty, _ | _, Domain.Empty -> s | Leaf (k, _), _ -> if Domain.mem k t then Empty else s | _, Domain.Leaf k -> remove k s | Branch(p, m, s0, s1), Domain.Branch(q, n, t0, t1) -> if (p = q) & (m = n) then build p m (corestrict s0 t0) (corestrict s1 t1) else if (X.shorter m n) & (match_prefix q p m) then if (q land m) = 0 then build p m (corestrict s0 t) s1 else build p m s0 (corestrict s1 t) else if (X.shorter n m) & (match_prefix p q n) then corestrict s (if (p land n) = 0 then t0 else t1) else s end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Instantiating the functor} *) module Little = Make(Endianness.Little) module Big = Make(Endianness.Big) menhir-20130116/src/tableBackend.mli0000644000175000017500000000212312075533603016116 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* The (table-based) code generator. *) module Run (T : sig end) : sig val program: IL.program end menhir-20130116/src/pprint.mli0000644000175000017500000002206712075533603015104 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* ------------------------------------------------------------------------- *) (* Basic combinators for building documents. *) type document val empty: document val hardline: document val char: char -> document val substring: string -> int -> int -> document val text: string -> document val blank: int -> document val (^^): document -> document -> document val nest: int -> document -> document val column: (int -> document) -> document val nesting: (int -> document) -> document val group: document -> document val ifflat: document -> document -> document (* ------------------------------------------------------------------------- *) (* Low-level combinators for alignment and indentation. *) val align: document -> document val hang: int -> document -> document val indent: int -> document -> document (* ------------------------------------------------------------------------- *) (* High-level combinators for building documents. *) (* [break n] Puts [n] spaces in flat mode and a new line otherwise. Equivalent to: [ifflat (String.make n ' ') hardline] *) val break: int -> document (* [break0] equivalent to [break 0] *) val break0: document (* [break1] equivalent to [break 1] *) val break1: document val string: string -> document val words: string -> document val lparen: document val rparen: document val langle: document val rangle: document val lbrace: document val rbrace: document val lbracket: document val rbracket: document val squote: document val dquote: document val bquote: document val semi: document val colon: document val comma: document val space: document val dot: document val sharp: document val backslash: document val equals: document val qmark: document val tilde: document val at: document val percent: document val dollar: document val caret: document val ampersand: document val star: document val plus: document val minus: document val underscore: document val bang: document val bar: document val squotes: document -> document val dquotes: document -> document val bquotes: document -> document val braces: document -> document val parens: document -> document val angles: document -> document val brackets: document -> document val fold: (document -> document -> document) -> document list -> document val fold1: (document -> document -> document) -> document list -> document val fold1map: (document -> document -> document) -> ('a -> document) -> 'a list -> document val sepmap: document -> ('a -> document) -> 'a list -> document val optional: ('a -> document) -> 'a option -> document (* [prefix left right] Flat layout: [left] [right] Otherwise: [left] [right] *) val prefix: string -> document -> document (* [infix middle left right] Flat layout: [left] [middle] [right] Otherwise: [left] [middle] [right] *) val infix: string -> document -> document -> document (* [infix_com middle left right] Flat layout: [left][middle] [right] Otherwise: [left][middle] [right] *) val infix_com: string -> document -> document -> document (* [infix_dot middle left right] Flat layout: [left][middle][right] Otherwise: [left][middle] [right] *) val infix_dot: string -> document -> document -> document (* [surround nesting break open_doc contents close_doc] *) val surround: int -> document -> document -> document -> document -> document (* [surround1 open_txt contents close_txt] Flat: [open_txt][contents][close_txt] Otherwise: [open_txt] [contents] [close_txt] *) val surround1: string -> document -> string -> document (* [surround2 open_txt contents close_txt] Flat: [open_txt] [contents] [close_txt] Otherwise: [open_txt] [contents] [close_txt] *) val surround2: string -> document -> string -> document (* [soft_surround nesting break open_doc contents close_doc] *) val soft_surround: int -> document -> document -> document -> document -> document (* [seq indent break empty_seq open_seq sep_seq close_seq contents] *) val seq: int -> document -> document -> document -> document -> document -> document list -> document (* [seq1 open_seq sep_seq close_seq contents] Flat layout: [open_seq][contents][sep_seq]...[sep_seq][contents][close_seq] Otherwise: [open_seq] [contents][sep_seq]...[sep_seq][contents] [close_seq] *) val seq1: string -> string -> string -> document list -> document (* [seq2 open_seq sep_seq close_seq contents] Flat layout: [open_seq] [contents][sep_seq]...[sep_seq][contents] [close_seq] Otherwise: [open_seq] [contents][sep_seq]...[sep_seq][contents] [close_seq] *) val seq2: string -> string -> string -> document list -> document (* [group1 d] equivalent to [group (nest 1 d)] *) val group1: document -> document (* [group2 d] equivalent to [group (nest 2 d)] *) val group2: document -> document module Operators : sig val ( ^^ ) : document -> document -> document val ( !^ ) : string -> document val ( ^/^ ) : document -> document -> document val ( ^//^ ) : document -> document -> document val ( ^@^ ) : document -> document -> document val ( ^@@^ ) : document -> document -> document end (* ------------------------------------------------------------------------- *) (* A signature for document renderers. *) module type RENDERER = sig (* Output channels. *) type channel (* [pretty rfrac width channel document] pretty-prints the document [document] to the output channel [channel]. The parameter [width] is the maximum number of characters per line. The parameter [rfrac] is the ribbon width, a fraction relative to [width]. The ribbon width is the maximum number of non-indentation characters per line. *) val pretty: float -> int -> channel -> document -> unit (* [compact channel document] prints the document [document] to the output channel [channel]. No indentation is used. All newline instructions are respected, that is, no groups are flattened. *) val compact: channel -> document -> unit end (* ------------------------------------------------------------------------- *) (* Renderers to output channels and to memory buffers. *) module Channel : RENDERER with type channel = out_channel module Buffer : RENDERER with type channel = Buffer.t (* ------------------------------------------------------------------------- *) (* A signature for value representations. This is compatible with the associated Camlp4 generator: SwitchValueRepresentation *) module type VALUE_REPRESENTATION = sig (* The type of value representation *) type t (* [variant type_name data_constructor_name tag arguments] Given information about the variant and its arguments, this function produces a new value representation. *) val variant : string -> string -> int -> t list -> t (* [record type_name fields] Given a type name and a list of record fields, this function produces the value representation of a record. *) val record : string -> (string * t) list -> t (* [tuple arguments] Given a list of value representation this function produces a new value representation. *) val tuple : t list -> t (* ------------------------------------------------------------------------- *) (* Value representation for primitive types. *) val string : string -> t val int : int -> t val int32 : int32 -> t val int64 : int64 -> t val nativeint : nativeint -> t val float : float -> t val char : char -> t val bool : bool -> t val option : ('a -> t) -> 'a option -> t val list : ('a -> t) -> 'a list -> t val array : ('a -> t) -> 'a array -> t val ref : ('a -> t) -> 'a ref -> t (* Value representation for any other value. *) val unknown : string -> 'a -> t end (* A signature for source printers. *) module type DOCUMENT_VALUE_REPRESENTATION = VALUE_REPRESENTATION with type t = document module ML : DOCUMENT_VALUE_REPRESENTATION (* Deprecated *) val line: document val linebreak: document val softline: document val softbreak: document menhir-20130116/src/mark.mli0000644000175000017500000000273512075533603014522 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (** This module implements a very simple notion of ``mark''. *) (** The type of marks. *) type t (** [fresh()] generates a fresh mark, that is, a mark that is guaranteed to be distinct from all existing marks. *) val fresh: unit -> t (** [same mark1 mark2] tells whether [mark1] and [mark2] are the same mark, that is, were created by the same call to [fresh]. *) val same: t -> t -> bool (** [none] is a distinguished mark, created via an initial call to [fresh()]. *) val none: t menhir-20130116/src/tokenType.ml0000644000175000017500000000731112075533602015373 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module deals with a few details regarding the definition of the [token] type. In particular, if [--only-tokens] was specified, it emits the type definition and exits. *) open Syntax open UnparameterizedSyntax open IL open CodeBits (* This is the conventional name of the [token] type, with no prefix. A prefix is possibly appended to it below, where [tctoken] is redefined before being exported. *) let tctoken = "token" (* This is the definition of the type of tokens. *) let tokentypedef = let datadefs = StringMap.fold (fun token properties defs -> (* Pseudo-tokens (used in %prec declarations, but never declared using %token) are filtered out. *) if properties.tk_is_declared then let params = match properties.tk_ocamltype with | None -> [] | Some t -> [ TypTextual t ] in { dataname = token; datavalparams = params; datatypeparams = None } :: defs else defs ) PreFront.grammar.tokens [] in { typename = tctoken; typeparams = []; typerhs = TDefSum datadefs; typeconstraint = None } (* Consult the command line options to determine what to do. If we were asked to only produce a type definition, then do so and stop. Otherwise, tell the code generator whether it should produce a type definition as part of the code. *) let tokentypedef, tokenprefix = match Settings.token_type_mode with | Settings.TokenTypeOnly -> (* Create both an .mli file and an .ml file. This is made necessary by the fact that the two can be different when there are functor parameters. *) let module P = Printer.Make (struct let f = open_out (Settings.base ^ ".mli") let raw_stretch_action = false let locate_stretches = None let parenthesize_let_lhs = false end) in P.interface { paramdecls = PreFront.grammar.parameters; excdecls = []; typedecls = [ tokentypedef ]; valdecls = [] }; let module P = Printer.Make (struct let f = open_out (Settings.base ^ ".ml") let raw_stretch_action = false let locate_stretches = None end) in P.program { paramdefs = PreFront.grammar.parameters; prologue = []; excdefs = []; typedefs = [ tokentypedef ]; nonrecvaldefs = []; valdefs = []; moduledefs = []; postlogue = []; }; exit 0 | Settings.CodeOnly m -> [], (fun id -> m ^ "." ^ id) | Settings.TokenTypeAndCode -> [ tokentypedef ], (fun id -> id) (* Redefine the name of the [token] type to take a possible prefix into account. *) let tctoken = tokenprefix tctoken let ttoken = TypApp (tctoken, []) (* The type of lexers. *) let tlexer = TypArrow (tlexbuf, ttoken) menhir-20130116/src/listMonad.ml0000644000175000017500000000351312075533603015344 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) type 'a m = 'a list let return x = [ x ] let bind l f = List.flatten (List.map f l) let ( >>= ) l f = bind l f (* 1. (return x) >>= f == f x bind [ x ] f = List.flatten (List.map f [ x ]) = f x 2. m >>= return == m bind l return = List.flatten (List.map (fun x -> [ x ]) (x1::x2::..::xn)) = List.flatten ([x1]::...::[xn]) = x1::...::xn = l 3. (m >>= f) >>= g == m >>= (\x -> f x >>= g) bind (bind l f) g = List.flatten (List.map g (List.flatten (List.map f (x1::...::xn)))) = List.flatten (List.map g (f x1 :: f x2 :: ... :: f xn)) = List.flatten (List.map g ([fx1_1; fx1_2 ... ] :: [fx2_1; ... ] :: ...)) = List.flatten ([ g fx1_1; g fx_1_2 ... ] :: [ g fx_2_1; ... ] ...) = List.flatten (List.map (fun x -> List.flatten (List.map g (f x))) l) = bind l (fun x -> bind (f x) g) *) menhir-20130116/src/lexmli.mll0000644000175000017500000000507512075533602015064 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This code analyzes the output of [ocamlc -i] and returns a list of identifiers together with their types. Types are represented by offsets in the source string. *) { let fail () = Error.error [] "failed to make sense of ocamlc's output." } let whitespace = [ ' ' '\t' '\n' '\r' ] let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *) (* Read a list of bindings. We start immediately after a [val] keyword, so we expect either an end marker, or an identifier, followed by a colon, followed by a type, followed by another list of bindings. In the latter case, we recognize the identifier and the colon, record where the type begins, and pass control to [type_then_bindings]. *) rule bindings env = parse | "menhir_end_marker : int" { env } | whitespace* ((lowercase identchar*) as id) whitespace* ':' whitespace* { type_then_bindings env id (Lexing.lexeme_end lexbuf) lexbuf } | _ | eof { fail() } (* Read a type followed by a list of bindings. *) and type_then_bindings env id openingofs = parse | whitespace+ "val" whitespace { let closingofs = Lexing.lexeme_start lexbuf in bindings ((id, openingofs, closingofs) :: env) lexbuf } | _ { type_then_bindings env id openingofs lexbuf } | eof { fail() } (* Skip up to the first [val] keyword that follows the begin marker, and start from there. *) and main = parse | _* "val menhir_begin_marker : int" whitespace+ "val" whitespace+ { bindings [] lexbuf } | _ | eof { fail() } menhir-20130116/src/gSet.ml0000644000175000017500000000703712075533603014321 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This is a stripped down version of [GSet] that describes both [Patricia] and [CompressedBitSet]. The full version of [GSet] is in [AlphaLib]. *) module type S = sig (* Elements are assumed to have a natural total order. *) type element (* Sets. *) type t (* The empty set. *) val empty: t (* [is_empty s] tells whether [s] is the empty set. *) val is_empty: t -> bool (* [singleton x] returns a singleton set containing [x] as its only element. *) val singleton: element -> t (* [cardinal s] returns the cardinal of [s]. *) val cardinal: t -> int (* [choose s] returns an arbitrarily chosen element of [s], if [s] is nonempty, and raises [Not_found] otherwise. *) val choose: t -> element (* [mem x s] returns [true] if and only if [x] appears in the set [s]. *) val mem: element -> t -> bool (* [add x s] returns a set whose elements are all elements of [s], plus [x]. *) val add: element -> t -> t (* [remove x s] returns a set whose elements are all elements of [s], except [x]. *) val remove: element -> t -> t (* [union s1 s2] returns the union of the sets [s1] and [s2]. *) val union: t -> t -> t (* [inter s t] returns the set intersection of [s] and [t], that is, $s\cap t$. *) val inter: t -> t -> t (* [disjoint s1 s2] returns [true] if and only if the sets [s1] and [s2] are disjoint, i.e. iff their intersection is empty. *) val disjoint: t -> t -> bool (* [iter f s] invokes [f x], in turn, for each element [x] of the set [s]. Elements are presented to [f] in increasing order. *) val iter: (element -> unit) -> t -> unit (* [fold f s seed] invokes [f x accu], in turn, for each element [x] of the set [s]. Elements are presented to [f] in increasing order. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. In other words, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then [fold f s seed] computes $([f]\,x_n\,\ldots\,([f]\,x_2\,([f]\,x_1\,[seed]))\ldots)$. *) val fold: (element -> 'b -> 'b) -> t -> 'b -> 'b (* [elements s] is a list of all elements in the set [s]. *) val elements: t -> element list (* [compare] is an ordering over sets. *) val compare: t -> t -> int (* [equal] implements equality over sets. *) val equal: t -> t -> bool (* [subset] implements the subset predicate over sets. *) val subset: (t -> t -> bool) end menhir-20130116/src/conflict.mli0000644000175000017500000000216412075533603015365 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module explains conflicts. Explanations are written to the .conflicts file. No functionality is offered by this module. *) menhir-20130116/src/sentenceParser.mly0000644000175000017500000000316512075533603016567 0ustar stephsteph/**************************************************************************/ /* */ /* Menhir */ /* */ /* Franois Pottier, INRIA Rocquencourt */ /* Yann Rgis-Gianas, PPS, Universit Paris Diderot */ /* */ /* Copyright 2005-2008 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the Q Public License version 1.0, with the change */ /* described in file LICENSE. */ /* */ /**************************************************************************/ /* This parser is used to read the sentences provided on the standard input channel when [--interpret] is enabled. */ /* A sentence is a pair of an optional non-terminal start symbol and a list of terminal symbols. */ %{ open Grammar %} %token COLON EOF EOL %token TERMINAL %token NONTERMINAL %type <(Grammar.Nonterminal.t option * Grammar.Terminal.t list) option> sentence %start sentence %% sentence: | EOF { None } | NONTERMINAL COLON terminals EOL { Some (Some $1, $3) } | terminals EOL { Some (None, $1) } terminals: | { [] } | TERMINAL terminals { $1 :: $2 } menhir-20130116/src/infer.ml0000644000175000017500000002574612075533603014531 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Syntax open Stretch open UnparameterizedSyntax open IL open CodeBits open TokenType (* ------------------------------------------------------------------------- *) (* Naming conventions. *) (* The type variable associated with a nonterminal symbol. Its name begins with a prefix which ensures that it cannot clash with Objective Caml keywords. *) let ntvar symbol = Printf.sprintf "tv_%s" (Misc.normalize symbol) (* The name of the temporary file. *) let base = Settings.base let mlname = base ^ ".ml" let mliname = base ^ ".mli" (* ------------------------------------------------------------------------- *) (* Code production. *) (* [nttype nt] is the type of the nonterminal [nt], as currently known. *) let nttype grammar nt = try TypTextual (StringMap.find nt grammar.types) with Not_found -> TypVar (ntvar nt) (* [is_standard] determines whether a branch derives from a standard library definition. The method, based on a file name, is somewhat fragile. *) let is_standard branch = List.for_all (fun x -> x = Settings.stdlib_filename) (Action.filenames branch.action) (* [actiondef] turns a branch into a function definition. *) let actiondef grammar symbol branch = (* Construct a list of the semantic action's formal parameters that depend on the production's right-hand side. *) let _, formals = List.fold_left (fun (i, formals) (symbol, ido) -> let id, startp, endp, starto, endo = match ido with | None -> (* Symbols for which no name was chosen will be represented by variables named _1, _2, etc. *) Printf.sprintf "_%d" (i + 1), Printf.sprintf "_startpos__%d_" (i + 1), Printf.sprintf "_endpos__%d_" (i + 1), Printf.sprintf "_startofs__%d_" (i + 1), Printf.sprintf "_endofs__%d_" (i + 1) | Some id -> (* Symbols for which a name was explicitly chosen will be known by that name in semantic actions. *) id, Printf.sprintf "_startpos_%s_" id, Printf.sprintf "_endpos_%s_" id, Printf.sprintf "_startofs_%s_" id, Printf.sprintf "_endofs_%s_" id in let t = try let props = StringMap.find symbol grammar.tokens in (* Symbol is a terminal. *) match props.tk_ocamltype with | None -> tunit | Some ocamltype -> TypTextual ocamltype with Not_found -> (* Symbol is a nonterminal. *) nttype grammar symbol in i + 1, PAnnot (PVar id, t) :: PAnnot (PVar startp, tposition) :: PAnnot (PVar endp, tposition) :: PAnnot (PVar starto, tint) :: PAnnot (PVar endo, tint) :: formals ) (0, []) branch.producers in (* Extend the list with parameters that do not depend on the right-hand side. *) let formals = PAnnot (PVar "_previouserror", tint) :: PAnnot (PVar "_eRR", texn) :: PAnnot (PVar "_startpos", tposition) :: PAnnot (PVar "_endpos", tposition) :: PAnnot (PVar "_startofs", tint) :: PAnnot (PVar "_endofs", tint) :: formals in (* Construct a function definition out of the above bindings and the semantic action. *) let body = EAnnot ( Action.to_il_expr branch.action, type2scheme (nttype grammar symbol) ) in match formals with | [] -> body | _ -> EFun (formals, body) (* [program] turns an entire grammar into a test program. *) let program grammar = (* Turn the grammar into a bunch of function definitions. Grammar productions that derive from the standard library are reflected first, so that type errors are not reported in them. *) let bindings1, bindings2 = StringMap.fold (fun symbol rule (bindings1, bindings2) -> List.fold_left (fun (bindings1, bindings2) branch -> if is_standard branch then (PWildcard, actiondef grammar symbol branch) :: bindings1, bindings2 else bindings1, (PWildcard, actiondef grammar symbol branch) :: bindings2 ) (bindings1, bindings2) rule.branches ) grammar.rules ([], []) in (* Create entry points whose types are the unknowns that we are looking for. *) let ps, ts = StringMap.fold (fun symbol _ (ps, ts) -> PVar (Misc.normalize symbol) :: ps, nttype grammar symbol :: ts ) grammar.rules ([], []) in let def = { valpublic = true; valpat = PTuple ps; valval = ELet (bindings1 @ bindings2, EAnnot (bottom, type2scheme (TypTuple ts))) } in (* Insert markers to delimit the part of the file that we are interested in. These markers are recognized by [Lexmli]. This helps skip the values, types, exceptions, etc. that might be defined by the prologue or postlogue. *) let begindef = { valpublic = true; valpat = PVar "menhir_begin_marker"; valval = EIntConst 0 } and enddef = { valpublic = true; valpat = PVar "menhir_end_marker"; valval = EIntConst 0 } in (* Issue the test program. We include the definition of the type of tokens, because, in principle, the semantic actions may refer to it or to its data constructors. *) { paramdefs = PreFront.grammar.parameters; prologue = PreFront.grammar.preludes; excdefs = []; typedefs = tokentypedef; nonrecvaldefs = [ begindef; def; enddef ]; moduledefs = []; valdefs = []; postlogue = PreFront.grammar.postludes } (* ------------------------------------------------------------------------- *) (* Writing the program associated with a grammar to a file. *) let write grammar () = let ml = open_out mlname in let module P = Printer.Make (struct let f = ml let locate_stretches = Some mlname let raw_stretch_action = false end) in P.program (program grammar); close_out ml let remove filename () = Sys.remove filename (* ------------------------------------------------------------------------- *) (* Moving away and restoring a file. *) let mover filename = if Sys.file_exists filename then let newname = filename ^ ".moved_by_menhir" in let moveaway () = Sys.rename filename newname and restore () = Sys.rename newname filename in moveaway, restore else let nothing () = () in nothing, nothing (* ------------------------------------------------------------------------- *) (* Running ocamldep on the program. *) type entry = string (* basename *) * string (* filename *) type line = entry (* target *) * entry list (* dependencies *) let depend grammar = (* Create an [.ml] file and an [.mli] file, then invoke ocamldep to compute dependencies for us. *) (* If an old [.ml] or [.mli] file exists, we are careful to preserve it. We temporarily move it out of the way and restore it when we are done. There is no reason why dependency analysis should destroy existing files. *) let moveml, restoreml = mover mlname and movemli, restoremli = mover mliname in let output = IO.winvoke [ moveml; movemli; write grammar; Interface.write ] (Printf.sprintf "%s %s %s" Settings.ocamldep (Filename.quote mlname) (Filename.quote mliname)) [ remove mlname; remove mliname; restoreml; restoremli ] in (* Echo ocamldep's output. *) print_string output; (* If [--raw-depend] was specified on the command line, stop here. This option is used by omake, which performs its own postprocessing of [ocamldep]'s output. For normal [make] users, who use [--depend], some postprocessing is required, which is performed below. *) begin match Settings.depend with | Settings.OMNone -> assert false (* we wouldn't be here in the first place *) | Settings.OMRaw -> () | Settings.OMPostprocess -> (* Make sense out of ocamldep's output. *) let lexbuf = Lexing.from_string output in let lines : line list = try Lexdep.main lexbuf with Lexdep.Error msg -> (* Echo the error message, followed with ocamldep's output. *) Error.error [] (msg ^ output) in (* Look for the line that concerns the [.cmo] target, and echo a modified version of this line, where the [.cmo] target is replaced with [.ml] and [.mli] targets, and where the dependency over the [.cmi] file is dropped. In doing so, we assume that the user's [Makefile] supports bytecode compilation, so that it makes sense to request [bar.cmo] to be built, as opposed to [bar.cmx]. This is not optimal, but will do. [camldep] exhibits the same behavior. *) (* TEMPORARY allow ocamldep to be called with flag -native. *) List.iter (fun ((_, target_filename), dependencies) -> if Filename.check_suffix target_filename ".cmo" then let dependencies = List.filter (fun (basename, _) -> basename <> base ) dependencies in if List.length dependencies > 0 then begin Printf.printf "%s.ml %s.mli:" base base; List.iter (fun (basename, filename) -> Printf.printf " %s" filename ) dependencies; Printf.printf "\n%!" end ) lines end; (* Stop. *) exit 0 (* ------------------------------------------------------------------------- *) (* Inferring types for a grammar's nonterminals. *) let infer grammar = (* Invoke ocamlc to do type inference for us. *) let output = IO.winvoke [ write grammar ] (Printf.sprintf "%s -c -i %s" Settings.ocamlc (Filename.quote mlname)) [ remove mlname ] in (* Make sense out of ocamlc's output. *) let env : (string * int * int) list = Lexmli.main (Lexing.from_string output) in let env : (string * ocamltype) list = List.map (fun (id, openingofs, closingofs) -> id, Inferred (String.sub output openingofs (closingofs - openingofs)) ) env in (* Augment the grammar with new %type declarations. *) let types = StringMap.fold (fun symbol _ types -> let ocamltype = try List.assoc (Misc.normalize symbol) env with Not_found -> assert false in if StringMap.mem symbol grammar.types then (* If there was a declared type, keep it. *) types else (* Otherwise, insert the inferred type. *) StringMap.add symbol ocamltype types ) grammar.rules grammar.types in { grammar with types = types } menhir-20130116/src/cst.mli0000644000175000017500000000347312075533603014361 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) open Grammar (* Concrete syntax trees. *) (* A concrete syntax tree is one of a leaf -- which corresponds to a terminal symbol; a node -- which corresponds to a non-terminal symbol, and whose immediate descendants form an expansion of that symbol; or an error leaf -- which corresponds to a point where the [error] pseudo-token was shifted. *) type cst = | CstTerminal of Terminal.t | CstNonTerminal of Production.index * cst array | CstError (* This is a (mostly) unambiguous printer for concrete syntax trees, in an sexp-like notation. *) val print: out_channel -> cst -> unit (* This is a pretty-printer for concrete syntax trees. The notation is the same as that used by the above printer; the only difference is that the [Pprint] library is used to manage indentation. *) val show: out_channel -> cst -> unit menhir-20130116/src/parserAux.mli0000644000175000017500000000353312075533603015537 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* This module provides utilities that are shared by the two versions of the parser. *) open Syntax (* TEMPORARY document *) val current_token_precedence: Lexing.position -> Lexing.position -> precedence_level val current_reduce_precedence: unit -> precedence_level (* [check_disjunctive_production] accepts production group and checks that they all productions in the group define the same set of identifiers. It also checks that the semantic action does not use any [$i] keyword. *) val check_production_group: (producer list * 'a * 'b * 'c) list -> Lexing.position -> Lexing.position -> Action.t -> unit (* [override pos oprec1 oprec2] decides which of the two optional %prec declarations [oprec1] and [oprec2] applies to a production. It signals an error if the two are present. *) val override: Positions.t -> 'a option -> 'a option -> 'a option menhir-20130116/CHANGES0000644000175000017500000001574412075533602013274 0ustar stephsteph2013/01/16: "menhir --depend" was broken since someone added new whitespace in the output of ocamldep. Fixed. 2012/12/19: Fixed a compilation problem that would arise when a file produced by Menhir on a 64-bit platform was compiled by ocaml on a 32-bit platform. 2012/08/25: Performance improvements in the computation of various information about the automaton (module [Invariant]). The improvements will be noticeable only for very large automata. 2012/06/07: The option --log-grammar 3 (and above) now causes the FOLLOW sets for *terminal* symbols to be computed and displayed. 2012/05/25: Added the flag --canonical, which causes Menhir to produce a canonical LR(1) automaton in the style of Knuth. This means that no merging of states takes place during the construction of the automaton, and that no default reductions are allowed. 2012/01/23: Fixed a bug whereby a %nonassoc declaration was not respected. This declaration requests that a shift/reduce conflict be reduced in favor of neither shifting nor reducing, that is, a syntax error must occur. However, due to an unforeseen interaction with the "default reduction" mechanism, this declaration was sometimes ignored and reduction would take place. 2012/01/09: Changes in the (undocumented) Coq back-end so as to match the ESOP 2012 paper. 2011/10/19: The Makefile now tests whether Unix or Windows is used (the test is performed by evaluating Sys.os_type under ocaml) and changes a couple settings accordingly: - the executable file name is either menhir or menhir.exe - the object file suffix is either .o or .obj 2011/10/19: Added --strict, which causes many warnings about the grammar and about the automaton to be considered errors. 2011/10/19: The # annotations that are inserted in the generated .ml file now retain their full path. (That is, we no longer use [Filename.basename].) This implies that the # annotations depend on how menhir is invoked -- e.g. "menhir foo/bar.mly" and "cd foo && menhir bar.mly" will produce different results. Nevertheless, this seems reasonable and useful (e.g. in conjunction with ocamlbuild and a hierarchy of files). Thanks to Daniel Weil. 2011/10/06: With the -lg 1 switch, Menhir now indicates whether the grammar is SLR(1). 2011/05/24: Removed the lock in ocamldep.wrapper. It is the responsibility of the user to avoid interferences with other processes (or other instances of the script) that create and/or remove files. 2011/04/28: The (internal) computation of the automaton's invariant was broken and has been fixed. Surprisingly, this does not seem to affect the generated code, (which was correct,) so no observable bug is fixed. Hopefully no bug is introduced! 2011/04/07: The grammar description files (.mly) are now read in up front and stored in memory while they are parsed. This allows us to avoid the use of pos_in and seek_in, which do not work correctly when CRLF conversion is being performed. 2011/04/05: Fixed a bug in the type inference module (for parameterized non-terminals) which would cause an infinite loop. 2011/01/24: Fixed a bug that would cause an assertion failure in the generated parser in some situations where the input stream was incorrect and the grammar involved the error token. The fix might cause grammars that use the error token to behave differently (hopefully more accurately) as of now. 2009/06/18: Makefile changes: build and install only the bytecode version of menhirLib when TARGET=byte is set. 2009/02/06: Fixed ocamldep.wrapper to avoid quoting the name of the ocaml command. This is hoped to fix a compilation problem under MinGW. 2009/02/04: A Makefile fix to avoid a problem under Windows/Cygwin. Renamed the ocaml-check-version script so as to avoid a warning. 2008/09/05: Ocaml summer project: added --interpret, --table, and --suggest-*. 2008/08/06: Fixed a problem that would cause the code inliner to abort when a semantic value and a non-terminal symbol happened to have the same name. 2008/08/06: Removed code sharing. 2008/06/20: Removed an incorrect assertion that caused failures (lr1.ml, line 134). 2007/12/05: Disabled code sharing by default, as it is currently broken. (See Yann's message; assertion failure at runtime.) 2007/12/01: Added an optimization to share code among states that have identical outgoing transition tables. 2007/08/30: Small Makefile change: create an executable file for check-ocaml-version in order to work around the absence of dynamic loading on some platforms. 2007/05/20: Made a fundamental change in the construction of the LR(1) automaton in order to eliminate a bug that could lead to spurious conflicts -- thanks to Ketti for submitting a bug report. 2007/05/18: Added --follow-construction to help understand the construction of the LR(1) automaton (very verbose). 2007/05/11: Code generation: more explicit qualifications with Pervasives so as to avoid capture when the user redefines some of the built-in operators, such as (+). Added a new demo (calc-param) that shows how to use %parameter. 2007/03/22: Makefile improvements (check for PREFIX; bootstrap in bytecode now also available). Slight changes to OMakefile.shared. 2007/02/15: Portability fix in Makefile and Makefile.shared (avoided "which"). 2006/12/15: Portability fix in Makefile.shared (replaced "&>" with "2>&1 >"). 2006/06/23: Made a slight restriction to Pager's criterion so as to never introduce fake conflict tokens (see Lr0.compatible). This might help make conflict explanations more accurate in the future. 2006/06/16: Fixed bug that would cause positions to become invalid after %inlining. 2006/06/15: Fixed --depend to be more lenient when analyzing ocamldep's output. Added --raw-depend which transmits ocamldep's output unchanged (for use in conjunction with omake). 2006/06/12: Fixed bug that would cause --only-preprocess to print %token declarations also for pseudo-tokens. Fixed bug that caused some precedence declarations to be incorrectly reported as useless. Improved things so that useless pseudo-tokens now also cause warnings. Fixed bug that would cause %type directives for terminal symbols to be incorrectly accepted. Fixed bug that would occur when a semantic action containing $i keywords was %inlined. 2006/05/05: Fixed problem that caused some end-of-stream conflicts not to be reported. Fixed Pager's compatibility criterion to avoid creating end-of-stream conflicts. 2006/04/21: Fixed problem that allowed generating incorrect but apparently well-typed Objective Caml code when a semantic action was ill-typed and --infer was omitted. 2006/03/29: Improved conflict reports by factoring out maximal common derivation contexts. 2006/03/28: Fixed bug that could arise when explaining a conflict in a non-LALR(1) grammar. 2006/03/27: Changed count of reduce/reduce conflicts to allow a comparison with ocamlyacc's diagnostics. When refusing to resolve a conflict, report all diagnostics before dying. 2006/03/18: Added display of FOLLOW sets when using --log-grammar 2. Added --graph option. Fixed behavior of --depend option. 2006/01/06: Removed reversed lists from the standard library. menhir-20130116/INSTALLATION0000644000175000017500000000200212075533602014104 0ustar stephstephREQUIREMENTS You need Objective Caml 3.09 or later and GNU make. HOW TO INSTALL If you wish to install via ocamlfind, make sure that ocamlfind is in your PATH. (Remember that prefixing a command with sudo affects its PATH.) Run the following command: make PREFIX=/usr/local install If your machine does not have the native code Objective Caml compiler (ocamlopt), but does have the bytecode compiler (ocamlc), then instead of the above command, use: make PREFIX=/usr/local TARGET=byte all install The value of the PREFIX variable can be changed to control where the software, the standard library, and the documentation should be stored. These files are copied to the following places: $PREFIX/bin/ $PREFIX/share/menhir/ $PREFIX/doc/menhir/ The support library, MenhirLib, is either installed via ocamlfind, if available, or placed within $PREFIX/share/menhir. Menhir's --suggest options help determine where and how it was installed. The documentation includes a reference manual and a number of demos. menhir-20130116/demos/0002755000175000017500000000000012075533602013377 5ustar stephstephmenhir-20130116/demos/Makefile.auto0000644000175000017500000000055112075533602016005 0ustar stephsteph# This auxiliary Makefile is meant to be included by a client Makefile in # addition to Makefile.shared. It is optional. It implements the common case # where every .mly file in the current directory is to be viewed as a # mono-module grammar specification. $(foreach module,$(wildcard *.mly),$(eval $(call menhir_monomodule,$(patsubst %.mly,%,$(module)),))) menhir-20130116/demos/calc/0002755000175000017500000000000012075533602014301 5ustar stephstephmenhir-20130116/demos/calc/.omakedb.lock0000644000175000017500000000007012075533602016626 0ustar stephsteph*** omake: the project was last locked by madiran:5845. menhir-20130116/demos/calc/Makefile0000644000175000017500000000045112075533602015737 0ustar stephsteph# Add --table on the next line to use Menhir's table-based back-end. PGFLAGS := --infer GENERATED := parser.ml parser.mli lexer.ml MODULES := parser lexer calc EXECUTABLE := calc OCAMLDEPWRAPPER := ../ocamldep.wrapper include ../Makefile.shared include ../Makefile.auto menhir-20130116/demos/calc/stratified.ml0000644000175000017500000007457612075533602017012 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) exception Error type token = | TIMES | RPAREN | PLUS | MINUS | LPAREN | INT of ( # 1 "stratified.mly" (int) # 13 "stratified.ml" ) | EOL | DIV and _menhir_env = { _menhir_lexer: Lexing.lexbuf -> token; _menhir_lexbuf: Lexing.lexbuf; mutable _menhir_token: token; mutable _menhir_startp: Lexing.position; mutable _menhir_endp: Lexing.position; mutable _menhir_shifted: int } and _menhir_state = | MenhirState13 | MenhirState12 | MenhirState11 | MenhirState10 | MenhirState9 | MenhirState7 | MenhirState6 | MenhirState5 | MenhirState4 | MenhirState3 | MenhirState1 | MenhirState0 let _eRR = Error let rec _menhir_goto_expr_aux : _menhir_env -> 'ttv_tail -> _menhir_state -> 'tv_expr_aux -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s _v -> match _menhir_s with | MenhirState7 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv81 * _menhir_state * 'tv_term) * _menhir_state) * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_v : 'tv_expr_aux) = _v in ((let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv79 * _menhir_state) * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in let (_ : _menhir_state) = _menhir_s in let (_ : 'tv_expr_aux) = _v in ((let ((_menhir_stack, _menhir_s), _, _) = _menhir_stack in let _v : 'tv_expr_aux = # 40 "stratified.mly" ( ) # 60 "stratified.ml" in _menhir_goto_expr_aux _menhir_env _menhir_stack _menhir_s _v) : 'freshtv80)) : 'freshtv82) | MenhirState5 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv85 * _menhir_state * 'tv_term) * _menhir_state) * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_v : 'tv_expr_aux) = _v in ((let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv83 * _menhir_state) * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in let (_ : _menhir_state) = _menhir_s in let (_ : 'tv_expr_aux) = _v in ((let ((_menhir_stack, _menhir_s), _, _) = _menhir_stack in let _v : 'tv_expr_aux = # 38 "stratified.mly" ( ) # 76 "stratified.ml" in _menhir_goto_expr_aux _menhir_env _menhir_stack _menhir_s _v) : 'freshtv84)) : 'freshtv86) | MenhirState3 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv117 * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_v : 'tv_expr_aux) = _v in ((let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv115 * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in let (_ : _menhir_state) = _menhir_s in let (_ : 'tv_expr_aux) = _v in ((let (_menhir_stack, _menhir_s, _) = _menhir_stack in let _v : 'tv_expr = # 34 "stratified.mly" ( ) # 92 "stratified.ml" in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv113) = _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_v : 'tv_expr) = _v in ((let _menhir_stack = (_menhir_stack, _menhir_s, _v) in match _menhir_s with | MenhirState1 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv95 * _menhir_state) * _menhir_state * 'tv_expr) = Obj.magic _menhir_stack in ((assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); let _tok = _menhir_env._menhir_token in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv93 * _menhir_state) * _menhir_state * 'tv_expr) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | RPAREN -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv89 * _menhir_state) * _menhir_state * 'tv_expr) = Obj.magic _menhir_stack in ((let _ = _menhir_discard _menhir_env in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv87 * _menhir_state) * _menhir_state * 'tv_expr) = Obj.magic _menhir_stack in ((let ((_menhir_stack, _menhir_s), _, _) = _menhir_stack in let _v : 'tv_factor = # 18 "stratified.mly" ( ) # 119 "stratified.ml" in _menhir_goto_factor _menhir_env _menhir_stack _menhir_s _v) : 'freshtv88)) : 'freshtv90) | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv91 * _menhir_state) * _menhir_state * 'tv_expr) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s, _) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv92)) : 'freshtv94)) : 'freshtv96) | MenhirState0 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv111 * _menhir_state * 'tv_expr) = Obj.magic _menhir_stack in ((assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); let _tok = _menhir_env._menhir_token in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv109 * _menhir_state * 'tv_expr) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | EOL -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv105 * _menhir_state * 'tv_expr) = Obj.magic _menhir_stack in ((let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv103 * _menhir_state * 'tv_expr) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s, e) = _menhir_stack in let _v : ( # 6 "stratified.mly" (unit) # 147 "stratified.ml" ) = # 12 "stratified.mly" ( ) # 151 "stratified.ml" in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv101) = _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_v : ( # 6 "stratified.mly" (unit) # 159 "stratified.ml" )) = _v in ((let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv99) = Obj.magic _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_v : ( # 6 "stratified.mly" (unit) # 167 "stratified.ml" )) = _v in ((let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv97) = Obj.magic _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_1 : ( # 6 "stratified.mly" (unit) # 175 "stratified.ml" )) = _v in (Obj.magic _1 : 'freshtv98)) : 'freshtv100)) : 'freshtv102)) : 'freshtv104)) : 'freshtv106) | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv107 * _menhir_state * 'tv_expr) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s, _) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv108)) : 'freshtv110)) : 'freshtv112) | _ -> _menhir_fail ()) : 'freshtv114)) : 'freshtv116)) : 'freshtv118) | _ -> _menhir_fail () and _menhir_reduce4 : _menhir_env -> 'ttv_tail -> _menhir_state -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s -> let _v : 'tv_expr_aux = # 42 "stratified.mly" ( ) # 195 "stratified.ml" in _menhir_goto_expr_aux _menhir_env _menhir_stack _menhir_s _v and _menhir_run4 : _menhir_env -> 'ttv_tail * _menhir_state * 'tv_term -> _menhir_state -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s -> let _menhir_stack = (_menhir_stack, _menhir_s) in let _tok = _menhir_discard _menhir_env in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv77 * _menhir_state * 'tv_term) * _menhir_state) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | INT _v -> _menhir_run2 _menhir_env (Obj.magic _menhir_stack) MenhirState4 _v | LPAREN -> _menhir_run1 _menhir_env (Obj.magic _menhir_stack) MenhirState4 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState4) : 'freshtv78) and _menhir_run6 : _menhir_env -> 'ttv_tail * _menhir_state * 'tv_term -> _menhir_state -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s -> let _menhir_stack = (_menhir_stack, _menhir_s) in let _tok = _menhir_discard _menhir_env in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv75 * _menhir_state * 'tv_term) * _menhir_state) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | INT _v -> _menhir_run2 _menhir_env (Obj.magic _menhir_stack) MenhirState6 _v | LPAREN -> _menhir_run1 _menhir_env (Obj.magic _menhir_stack) MenhirState6 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState6) : 'freshtv76) and _menhir_goto_term_aux : _menhir_env -> 'ttv_tail -> _menhir_state -> 'tv_term_aux -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s _v -> match _menhir_s with | MenhirState13 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv51 * _menhir_state * 'tv_factor) * _menhir_state) * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_v : 'tv_term_aux) = _v in ((let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv49 * _menhir_state) * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in let (_ : _menhir_state) = _menhir_s in let (_ : 'tv_term_aux) = _v in ((let ((_menhir_stack, _menhir_s), _, _) = _menhir_stack in let _v : 'tv_term_aux = # 28 "stratified.mly" ( ) # 249 "stratified.ml" in _menhir_goto_term_aux _menhir_env _menhir_stack _menhir_s _v) : 'freshtv50)) : 'freshtv52) | MenhirState11 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv55 * _menhir_state * 'tv_factor) * _menhir_state) * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_v : 'tv_term_aux) = _v in ((let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv53 * _menhir_state) * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in let (_ : _menhir_state) = _menhir_s in let (_ : 'tv_term_aux) = _v in ((let ((_menhir_stack, _menhir_s), _, _) = _menhir_stack in let _v : 'tv_term_aux = # 26 "stratified.mly" ( ) # 265 "stratified.ml" in _menhir_goto_term_aux _menhir_env _menhir_stack _menhir_s _v) : 'freshtv54)) : 'freshtv56) | MenhirState9 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv73 * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_v : 'tv_term_aux) = _v in ((let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv71 * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in let (_ : _menhir_state) = _menhir_s in let (_ : 'tv_term_aux) = _v in ((let (_menhir_stack, _menhir_s, _) = _menhir_stack in let _v : 'tv_term = # 22 "stratified.mly" ( ) # 281 "stratified.ml" in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv69) = _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_v : 'tv_term) = _v in ((let _menhir_stack = (_menhir_stack, _menhir_s, _v) in match _menhir_s with | MenhirState0 | MenhirState1 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv59 * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in ((assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); let _tok = _menhir_env._menhir_token in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv57 * _menhir_state * 'tv_term) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | MINUS -> _menhir_run6 _menhir_env (Obj.magic _menhir_stack) MenhirState3 | PLUS -> _menhir_run4 _menhir_env (Obj.magic _menhir_stack) MenhirState3 | EOL | RPAREN -> _menhir_reduce4 _menhir_env (Obj.magic _menhir_stack) MenhirState3 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState3) : 'freshtv58)) : 'freshtv60) | MenhirState4 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv63 * _menhir_state * 'tv_term) * _menhir_state) * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in ((assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); let _tok = _menhir_env._menhir_token in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv61 * _menhir_state * 'tv_term) * _menhir_state) * _menhir_state * 'tv_term) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | MINUS -> _menhir_run6 _menhir_env (Obj.magic _menhir_stack) MenhirState5 | PLUS -> _menhir_run4 _menhir_env (Obj.magic _menhir_stack) MenhirState5 | EOL | RPAREN -> _menhir_reduce4 _menhir_env (Obj.magic _menhir_stack) MenhirState5 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState5) : 'freshtv62)) : 'freshtv64) | MenhirState6 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv67 * _menhir_state * 'tv_term) * _menhir_state) * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in ((assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); let _tok = _menhir_env._menhir_token in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv65 * _menhir_state * 'tv_term) * _menhir_state) * _menhir_state * 'tv_term) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | MINUS -> _menhir_run6 _menhir_env (Obj.magic _menhir_stack) MenhirState7 | PLUS -> _menhir_run4 _menhir_env (Obj.magic _menhir_stack) MenhirState7 | EOL | RPAREN -> _menhir_reduce4 _menhir_env (Obj.magic _menhir_stack) MenhirState7 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState7) : 'freshtv66)) : 'freshtv68) | _ -> _menhir_fail ()) : 'freshtv70)) : 'freshtv72)) : 'freshtv74) | _ -> _menhir_fail () and _menhir_fail : unit -> 'a = fun () -> Printf.fprintf Pervasives.stderr "Internal failure -- please contact the parser generator's developers.\n%!"; assert false and _menhir_reduce11 : _menhir_env -> 'ttv_tail -> _menhir_state -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s -> let _v : 'tv_term_aux = # 30 "stratified.mly" ( ) # 361 "stratified.ml" in _menhir_goto_term_aux _menhir_env _menhir_stack _menhir_s _v and _menhir_run10 : _menhir_env -> 'ttv_tail * _menhir_state * 'tv_factor -> _menhir_state -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s -> let _menhir_stack = (_menhir_stack, _menhir_s) in let _tok = _menhir_discard _menhir_env in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv47 * _menhir_state * 'tv_factor) * _menhir_state) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | INT _v -> _menhir_run2 _menhir_env (Obj.magic _menhir_stack) MenhirState10 _v | LPAREN -> _menhir_run1 _menhir_env (Obj.magic _menhir_stack) MenhirState10 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState10) : 'freshtv48) and _menhir_run12 : _menhir_env -> 'ttv_tail * _menhir_state * 'tv_factor -> _menhir_state -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s -> let _menhir_stack = (_menhir_stack, _menhir_s) in let _tok = _menhir_discard _menhir_env in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv45 * _menhir_state * 'tv_factor) * _menhir_state) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | INT _v -> _menhir_run2 _menhir_env (Obj.magic _menhir_stack) MenhirState12 _v | LPAREN -> _menhir_run1 _menhir_env (Obj.magic _menhir_stack) MenhirState12 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState12) : 'freshtv46) and _menhir_goto_factor : _menhir_env -> 'ttv_tail -> _menhir_state -> 'tv_factor -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s _v -> let _menhir_stack = (_menhir_stack, _menhir_s, _v) in match _menhir_s with | MenhirState0 | MenhirState1 | MenhirState4 | MenhirState6 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv35 * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in ((assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); let _tok = _menhir_env._menhir_token in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv33 * _menhir_state * 'tv_factor) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | DIV -> _menhir_run12 _menhir_env (Obj.magic _menhir_stack) MenhirState9 | TIMES -> _menhir_run10 _menhir_env (Obj.magic _menhir_stack) MenhirState9 | EOL | MINUS | PLUS | RPAREN -> _menhir_reduce11 _menhir_env (Obj.magic _menhir_stack) MenhirState9 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState9) : 'freshtv34)) : 'freshtv36) | MenhirState10 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv39 * _menhir_state * 'tv_factor) * _menhir_state) * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in ((assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); let _tok = _menhir_env._menhir_token in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv37 * _menhir_state * 'tv_factor) * _menhir_state) * _menhir_state * 'tv_factor) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | DIV -> _menhir_run12 _menhir_env (Obj.magic _menhir_stack) MenhirState11 | TIMES -> _menhir_run10 _menhir_env (Obj.magic _menhir_stack) MenhirState11 | EOL | MINUS | PLUS | RPAREN -> _menhir_reduce11 _menhir_env (Obj.magic _menhir_stack) MenhirState11 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState11) : 'freshtv38)) : 'freshtv40) | MenhirState12 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv43 * _menhir_state * 'tv_factor) * _menhir_state) * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in ((assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); let _tok = _menhir_env._menhir_token in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv41 * _menhir_state * 'tv_factor) * _menhir_state) * _menhir_state * 'tv_factor) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | DIV -> _menhir_run12 _menhir_env (Obj.magic _menhir_stack) MenhirState13 | TIMES -> _menhir_run10 _menhir_env (Obj.magic _menhir_stack) MenhirState13 | EOL | MINUS | PLUS | RPAREN -> _menhir_reduce11 _menhir_env (Obj.magic _menhir_stack) MenhirState13 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState13) : 'freshtv42)) : 'freshtv44) | _ -> _menhir_fail () and _menhir_discard : _menhir_env -> token = fun _menhir_env -> let lexbuf = _menhir_env._menhir_lexbuf in let _tok = _menhir_env._menhir_lexer lexbuf in _menhir_env._menhir_token <- _tok; _menhir_env._menhir_startp <- lexbuf.Lexing.lex_start_p; _menhir_env._menhir_endp <- lexbuf.Lexing.lex_curr_p; let shifted = Pervasives.(+) _menhir_env._menhir_shifted 1 in if Pervasives.(>=) shifted 0 then _menhir_env._menhir_shifted <- shifted; _tok and _menhir_errorcase : _menhir_env -> 'ttv_tail -> _menhir_state -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s -> match _menhir_s with | MenhirState13 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv9 * _menhir_state * 'tv_factor) * _menhir_state) * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s, _) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv10) | MenhirState12 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv11 * _menhir_state * 'tv_factor) * _menhir_state) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv12) | MenhirState11 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv13 * _menhir_state * 'tv_factor) * _menhir_state) * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s, _) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv14) | MenhirState10 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv15 * _menhir_state * 'tv_factor) * _menhir_state) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv16) | MenhirState9 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv17 * _menhir_state * 'tv_factor) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s, _) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv18) | MenhirState7 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv19 * _menhir_state * 'tv_term) * _menhir_state) * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s, _) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv20) | MenhirState6 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv21 * _menhir_state * 'tv_term) * _menhir_state) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv22) | MenhirState5 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : (('freshtv23 * _menhir_state * 'tv_term) * _menhir_state) * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s, _) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv24) | MenhirState4 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : ('freshtv25 * _menhir_state * 'tv_term) * _menhir_state) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv26) | MenhirState3 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv27 * _menhir_state * 'tv_term) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s, _) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv28) | MenhirState1 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv29 * _menhir_state) = Obj.magic _menhir_stack in ((let (_menhir_stack, _menhir_s) = _menhir_stack in _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) _menhir_s) : 'freshtv30) | MenhirState0 -> let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv31) = Obj.magic _menhir_stack in (raise _eRR : 'freshtv32) and _menhir_run1 : _menhir_env -> 'ttv_tail -> _menhir_state -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s -> let _menhir_stack = (_menhir_stack, _menhir_s) in let _tok = _menhir_discard _menhir_env in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv7 * _menhir_state) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | INT _v -> _menhir_run2 _menhir_env (Obj.magic _menhir_stack) MenhirState1 _v | LPAREN -> _menhir_run1 _menhir_env (Obj.magic _menhir_stack) MenhirState1 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState1) : 'freshtv8) and _menhir_run2 : _menhir_env -> 'ttv_tail -> _menhir_state -> ( # 1 "stratified.mly" (int) # 558 "stratified.ml" ) -> 'ttv_return = fun _menhir_env _menhir_stack _menhir_s _v -> let _ = _menhir_discard _menhir_env in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv5) = Obj.magic _menhir_stack in let (_menhir_s : _menhir_state) = _menhir_s in let (_ : ( # 1 "stratified.mly" (int) # 568 "stratified.ml" )) = _v in ((let _v : 'tv_factor = # 16 "stratified.mly" ( ) # 573 "stratified.ml" in _menhir_goto_factor _menhir_env _menhir_stack _menhir_s _v) : 'freshtv6) and main : (Lexing.lexbuf -> token) -> Lexing.lexbuf -> ( # 6 "stratified.mly" (unit) # 580 "stratified.ml" ) = fun lexer lexbuf -> let _menhir_env = let (lexer : Lexing.lexbuf -> token) = lexer in let (lexbuf : Lexing.lexbuf) = lexbuf in ((let _tok = lexer lexbuf in { _menhir_lexer = lexer; _menhir_lexbuf = lexbuf; _menhir_token = _tok; _menhir_startp = lexbuf.Lexing.lex_start_p; _menhir_endp = lexbuf.Lexing.lex_curr_p; _menhir_shifted = 4611686018427387903; }) : _menhir_env) in Obj.magic (let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv3) = () in ((assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); let _tok = _menhir_env._menhir_token in let (_menhir_env : _menhir_env) = _menhir_env in let (_menhir_stack : 'freshtv1) = _menhir_stack in let (_tok : token) = _tok in ((match _tok with | INT _v -> _menhir_run2 _menhir_env (Obj.magic _menhir_stack) MenhirState0 _v | LPAREN -> _menhir_run1 _menhir_env (Obj.magic _menhir_stack) MenhirState0 | _ -> assert (Pervasives.(<>) _menhir_env._menhir_shifted (-1)); _menhir_env._menhir_shifted <- (-1); _menhir_errorcase _menhir_env (Obj.magic _menhir_stack) MenhirState0) : 'freshtv2)) : 'freshtv4)) menhir-20130116/demos/calc/OMakefile0000644000175000017500000000033412075533602016056 0ustar stephstephGENERATED = parser.ml parser.mli lexer.ml MODULES = parser lexer calc EXECUTABLE = calc OCAMLDEPWRAPPER = ../ocamldep.wrapper include ../OMakefile.shared MenhirAuto() .DEFAULT: $(EXECUTABLE)$(OSUFFIX) menhir-20130116/demos/calc/stratified.mli0000644000175000017500000000223412075533602017141 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) exception Error type token = | TIMES | RPAREN | PLUS | MINUS | LPAREN | INT of (int) | EOL | DIV val main: (Lexing.lexbuf -> token) -> Lexing.lexbuf -> (unit)menhir-20130116/demos/calc/README0000644000175000017500000000041012075533602015152 0ustar stephstephThis tiny program reads arithmetic expressions from the standard input channel. Each expression is expected to be complete when the current line ends. Its value is then displayed on the standard output channel. This code is adapted from ocamlyacc's documentation. menhir-20130116/demos/calc/parser.mly0000644000175000017500000000302612075533602016317 0ustar stephsteph/**************************************************************************/ /* */ /* Menhir */ /* */ /* Franois Pottier, INRIA Rocquencourt */ /* Yann Rgis-Gianas, PPS, Universit Paris Diderot */ /* */ /* Copyright 2005-2008 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the Q Public License version 1.0, with the change */ /* described in file LICENSE. */ /* */ /**************************************************************************/ %token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %start main %% main: | e = expr EOL { e } expr: | i = INT { i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20130116/demos/calc/OMakeroot.omc0000644000175000017500000002223212075533602016700 0ustar stephstephmadiran.omcȍȫG>ljR9w1$K0yݰ@Ѱ@@#CWD%LCѰ@,removesuffix-Cа @)addsuffix %CѰ @)OMAKEPATHϞFCа@)addprefixCѰ@&randomaCа@&mkfifo4CѰ@!%% CѰ!@5filter-proper-targets fC@@AѰ%@!** VCа)@!&& ZC@@Aа-@!++ C@@ACEѰ1@$HOMErECа5@!@@CѰ9@!>>BCа=@!<<C@@A@BѰA@-BUILD_SUMMARYn{CаE@!^^"C@@A@BEѰI@%mkdiriC@@AGMѰM@&setvarUkCѰQ@(homenamecCаU@$HOSTC@@A@BѰY@+addsuffixesOTC@@ADRѰ]@%closeZTCѰa@%splitNCаe@$lxor~5CѰi@/replacesuffixesBaC@@AѰm@,SCANNER_MODECаq@+AUTO_REHASHC@@A@BDѰu@$PATHĥCаy@.CREATE_SUBDIRSV1C@@Aа}@0RuntimeException8)C@@ACHѰ@)eprintvlnCа@(map-find"!CѰ@(PositionC@@AѰ@$RuleXC@а@@%MKDIRC@@ABDѰ@"CPJLCа@%flush5EC@@Aа@(map-keystC@@ACHQdѰ@"lekCа@"eqCѰ@,uncapitalizelyCа@$exitSCѰ@"NF}Cа%@"MV}3C@@Aа@"OS1C@@ACѰ@-object-lengthdxCа1@"RMd]C@@A@BFаƠ@"bgCѰʠ@&PasswdeCаΠ@*OutChannel^C@@AаҠ@$read~C@@ACѰ֠@"cpwCаڠ@"cdC@@A@BFMѰޠ@"ge vCѰ@0dependencies-allCа@"fgGC@@Aа@'dirnamey0C@@ACа@"if3CѰ@)InChannel,W&Cа@"gt'C@@Aа@*object-map,`2C@@ACѰ@"in;C@@AEIWѰ@"or#Cа@"ltzCѰ @"lsyICа@*object-memur)C@@A@BѰ@"nlCа@"mvC@@Aа@$waitSC@@ACFа@(subrange CѰ"@"rm/Cа&@+EMPTY_ARRAYC@@A@BѰ*@)empty-map H:@Cа.@'SYSNAME 0%C@@Aа2@%Float C@@ACFMeJѰ6@)mapprefixCа:@0html-pre-escapedr@%equal lCѰB@+random-init ۖCаF@%match ~ CѰJ@*parse-rule X_C@@AѰN@.filter-targets <[CаR@*OS_VERSION 4WC@@AаV@,parse-engine C@@ACEаZ@)file-sort L/ CѰ^@%Lexer BCаb@$file # C@@Aаf@$find HC@@ACѰj@:remove-project-directories M&C@@AEKаn@,sequence-mapMCѰr@'subdirs >Cаv@*create-map CѰz@&suffix %gCа@$ROOT לC@@A@BѰ@%fopen XC@@ADѰ@+replace-nth[Cа@$BodyC@@A@BGѰ@#addCа@$Node\NCѰ@'symlinkw3Cа@$BoolaC@@Aа@:EXIT_ON_UNCAUGHT_EXCEPTIONC@@ACѰ@/sequence-lengthfC@@AEѰ@/file-check-sort$C@а@,builtin-grepC@@ABHP\Ѱ@&rehashCѰ@,html-escaped%CѰ@#andziC@@AѰ@#asrC@@A@BFMQnа@)c-escapedO27CѰ@#diviCѰ@'STDROOTKgCа@&concatn@,sequence-revC@@ACаB@.history-length WCѰF@+add-wrapperC@@AѰJ@/target-optionalnxC@аN@,GLOB_OPTIONS(C@@ABDHNWѰR@#funCѰV@&getenvG CѰZ@*quote-argv Cа^@*obj-length{PXC@@A@Bаb@$pipeCѰf@%breakC@@AѰj@(getpwnamᱟCаn@,xterm-escapeַC@@A@BDGѰr@(basenameDCѰv@&stderr/kCаz@%chownkC@@Aа~@+parse-start0T4C@@ACа@7digest-in-path-optionalOFwCѰ@'tgetstrR Cа@%where C@@Aа@%CHMODC@@ACѰ@,get-registryhCа@,gettimeofdayh>C@@A@BFJRjYѰ@#mem!ƟCа@&nth-tlMCѰ@&nth-hdѿ9CѰ@)mapsuffix$)CѰ@%lseekJLCа@'foreachi C@@Aа@&shellawC@@ACѰ@&vmountFUCа@'sprintfC@а@%EMPTYF7C@@ABEMа@#lsl!hCѰ@#lex jp}Cа"@0xterm-escape-end CѰ&@3ALLOW_EMPTY_SUBDIRS C@@AѰ*@(nonempty 5d@#lor!"C@аB@&Select!aC@@ABEJѰF@#max!CѰJ@#lsr!nyC@@AѰN@*shell-code!C@аR@'absname!¡C@@ABDO]nѰV@%shell#CаZ@*capitalize" CѰ^@$land!CѰb@+file-exists!mcC@аf@$getc!g'C@@ABѰj@'tmpfile!#Cаn@$gets!kC@@A@BEѰr@*filter-out"6CѰv@#min"ْC@аz@$USER"1C@@ABа~@#mod"}*CѰ@$last"UC@@AѰ@#mul"0Cа@#BIN"OC@@A@BDGMа@#nth$Jd^CѰ@#not#OCа@(getpwuid#RBCѰ@#neg#,C@а@%stdin#D$C@@ABѰ@'MACHINE#MC@@ADѰ@1set-nonblock-mode$9Cа@&String$ C@@A@BGѰ@(printvln$#qCѰ@-history-index$pSCа@2builtin-test-brack$^0C@@A@BѰ@*OMakeFlags%C@@ADLZIѰ@-ocaml-escaped.؎CѰ @&setenv*CѰƠ@%fgets(Cаʠ@*intersects'tCѰΠ@&unlink&_CѰҠ@,OMakeVersion&\Cа֠@.digest-in-path% C@@Aаڠ@'obj-add&C@@ACѰޠ@)lowercase&C@а@&stdout'C@@ABFѰ@6prompt-invisible-begin(.LCѰ@4prompt-invisible-end(ZCа@'VERBOSE'1.C@@A@Bа@+parse-build(WCѰ@#Dir(:C@@AѰ@$echo(PgCа@&$class(C@@A@BDGNа@*id-escaped)CѰ@#rev( CѰ @(getgrnam(9/C@@AѰ@&export))XCа@3project-directories)vC@@Aа@%Group),C@@ACEѰ@#set*GbCа@%rmdir)!>CѰ"@&eprint) Cа&@0prompt-invisible)DC@@A@BѰ*@,builtin-find*35qC@@ADѰ.@(fullname*kC@@AFL[Ѱ2@(unsetenv+{0CѰ6@&Parser+K'Cа:@'fprintv+ 5CѰ>@*map-values*CаB@'defined*fC@@AаF@'fprintf*EC@@ACѰJ@'map-add+ARCаN@+object-find+C@@A@BFаR@#sub+f#CѰV@5find-in-path-optional+\XC@@AѰZ@$glob+fhC@@ACJа^@$link-86CѰb@&switch+$hCаf@&accept+XCѰj@%write+iaC@@AѰn@,cmp-versions+؞C@аr@#Fun+C@@ABDаv@#try,t=CѰz@.parse-nonassoc,iqC@@AѰ~@&Object,ւICа@%array,C@@Aа@%lockf,SC@@ACEJѰ@1set-close-on-exec-qCѰ@*encode-uri-BtC@@Aа@(obj-find.@CѰ@,out-contents.mCа @%RMDIR.W^C@@Aа@*map-remove.~C@@ACѰ@(eprintln.C@@AEGR]yѰ@&STDLIB4nCѰ@.exists-in-path1@xCѰ@+html-string0qCа@%lstat/nhCѰ@&getvar.C@а@&rewind/#9C@@ABѰ@'Channel/KCа @#Int/t1C@@AаƠ@%raise/C@@ACFаʠ@&OSTYPE0CѰΠ@-shell-success0C@@AѰҠ@'history1=HCаE@#LIB0gC@@A@BDKаڠ@4find-targets-in-path2TCѰޠ@&rename2hhvCа@(fprintln1?CѰ@ #find-ocaml-targets-in-path-optional1kCа@%Shell1H(C@@A@BѰ@$File1C@а@(NODENAME2$C@@ABEѰ@(Location2PCа@'connect2|fC@@Aа@%Token2SC@@ACIѰ@&system3nCѰ@&applya3 uC@а @$grep3."C@@ABа@#Map4TCѰ@*lex-search3¥NC@а@)Exception4SSQC@@ABѰ@$lnot4ckMC@@ADGQ]а@&printf@'obj-map7CѰB@#PID6)ACаF@&CDPATH6C@@AаJ@*object-add7iA1C@@ACѰN@$stat7fC@@AEJѰR@$stop8CѰV@'extends8CаZ@(Sequence8T.C@@A@BѰ^@,InOutChannel:lCаb@&Number93HC@@Aаf@'println:sC@@ACFQѰj@%float;rhCаn@*stat-reset:7CѰr@)getpwents:#C@@AѰv@&fsubst;uCаz@%Array; 1C@@Aа~@4UnbuildableException;AC@@ACEа@-OMAKE_VERSION;ZCѰ@&fprint;@C@а@#TAB;fC@@ABѰ@'map-map<qC@@ADJ\Ѱ@.string-escaped<~CѰ@&printv<[$CѰ@+parse-rightR.CѰ@&digest=6CѰ@*lex-engine<Cа@-filter-existsCаΠ@$Pipe>lJC@@A@BѰҠ@$jobs?1W2C@@ADѰ֠@0target-is-proper?k%Cаڠ@$join?vC@@A@BGNSpNwаߠ@,Omake_ir_ast)or@@@@а@)OMakeroot0jDBDS頰젱%build%omake#lib$godi%local#usr@@@ݪݪа  vZvZ KJKJ $Zf$Zf >Ψp>Ψp  )Common.om%语^语^@.JBJUC@1OBOMAB%false8OBOJ;OBOJ(.SUBDIRS@?OLOM!.AB@@@P@menhir-20130116/demos/calc/OMakeroot0000644000175000017500000000035512075533602016125 0ustar stephsteph# # Load the standard configuration. # open build/Common # # The command-line variables are defined *after* the # standard configuration has been loaded. # DefineCommandVars() # # Include the OMakefile in this directory. # .SUBDIRS: . menhir-20130116/demos/calc/lexer.mll0000644000175000017500000000273612075533602016134 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) { open Parser exception Error of string } rule line = parse | ([^'\n']* '\n') as line { line } | eof { exit 0 } and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | eof { exit 0 } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20130116/demos/calc/OMakefile.omc0000644000175000017500000002277012075533602016643 0ustar stephstephmadiran.omcȍȫG>l{n\Ry,1n鶄%k@Ѱ@@%chmod5Cа@(subrange CѰ@@"MV}3CѰ@,SCANNER_MODECѰ@$HOSTCѰ@-BUILD_SUMMARYn{CѰ@&mkfifo4Cа@!%% CѰ"@5filter-proper-targets fC@@AѰ&@!** VCа*@!&& ZC@@Aа.@!++ C@@ACEа2@!@@CѰ6@!>>BCа:@!<<C@@A@BѰ>@!^^"C@@ADJѰB@%mkdiriCѰF@$HOMErEC@@AѰJ@&randomaC@@ACNѰN@)addprefixCѰR@&setvarUkCѰV@(homenamecC@@AѰZ@+addsuffixesOTC@@ACѰ^@$lxor~5CѰb@/replacesuffixesBaC@@AѰf@+AUTO_REHASHC@@ACGVѰj@$RuleXCѰn@0RuntimeException8)CѰr@.CREATE_SUBDIRSV1CѰv@%splitNC@@AѰz@$PATHĥCа~@/OCAMLDEPWRAPPERKC@@A@BDѰ@(PositionCѰ@%closeZTC@@AѰ@(map-find"!C@@ACHѰ@"CPJLCѰ@)eprintvlnCѰ@%MKDIRC@@AѰ@%flush5EC@@ACѰ@)OMAKEPATHϞFCѰ@(map-keystC@@AѰ@"LNvC@@ACGPgѰ@"ge vCѰ@$read~CѰ@-object-lengthdxCѰ@$exitSCѰ@"OS1Cа@"NF}C@@A@BѰ@"RMd]C@@ADѰƠ@*OutChannel^CѰʠ@,uncapitalizelyC@@AѰΠ@&PasswdeC@@ACHѰҠ@"eqCѰ֠@"cdCѰڠ@"bgC@@AѰޠ@"cpwC@@ACѰ@0dependencies-allCѰ@"fgGC@@AѰ@'dirnamey0C@@ACGPѰ@"lsyICѰ@"if3CѰ@)InChannel,W&CѰ@"gt'C@@AѰ@*object-map,`2C@@ACѰ@"lekCѰ@"in;C@@AѰ @*object-memur)C@@ACGѰ@$waitSCѰ@"mvCѰ@"ltzC@@AѰ@"nlC@@ACѰ@+EMPTY_ARRAYCѰ"@"or#C@@AѰ&@"rm/C@@ACGO`HѰ*@#addCѰ.@:remove-project-directories M&CѰ2@*OS_VERSION 4WCѰ6@%Float CѰ:@)empty-map H:@CѰ>@'SYSNAME 0%C@@AѰ9@&DIRSEP \^C@@ACѰF@*parse-rule X_CѰJ@)addsuffix %C@@AѰN@%match ~ C@@ACGѰR@$file # CѰV@,parse-engine CѰZ@.filter-targets <[C@@AѰ^@+random-init ۖC@@ACѰb@$find HCѰf@%Lexer BC@@AѰj@)file-sort L/ C@@ACGOѰn@$BodyCѰi@'EXT_LIB pCѰm@$ROOT לCѰz@%equal lC@@AѰ~@&suffix %gCа@)GENERATED C@@A@BDѰ@%fopen XCѰ@*create-map C@@AѰ@'subdirs >C@@ACHѰ@'symlinkw3CѰ@,sequence-mapMCѰ@+replace-nth[C@@AѰ@$BoolaC@@ACѰ@$Node\NCѰ@:EXIT_ON_UNCAUGHT_EXCEPTIONC@@AѰ@/sequence-lengthfC@@ACGP`Ѱ@*parse-leftv*KCѰ@&rehashCѰ@0html-pre-escapedrCѰ@%quoteH0CѰ@&target/C@@AѰ@&selectJTC@@ACѰ@)mapprefixCѰ@,find-in-pathc{C@@AѰ @)uppercasejrC@@ACGѰ@,intersectionCCѰ@&concatn@*decode-uriɅ[CѰB@,dependenciesC@@AѰF@&stringkC@@ACѰJ@2xterm-escape-beginR6CѰNB#dupGC@@AѰR@/create-lazy-mapwC@@ACGѰV@/target-optionalnxCѰZ@+add-wrapperCѰ^@(lex-rule.C@@AѰb@.history-length WC@@ACѰf@)c-escapedO27CѰj@,GLOB_OPTIONS(C@@AѰn@*obj-length{PXC@@ACGOѰr@&stderr/kCѰv@,xterm-escapeַCѰz@%breakCѰ~@&getenvG C@@AѰ@$pipeC@@ACѰ@#funCѰ@(getpwnamᱟC@@AѰ@%chownkC@@ACGѰ@'tgetstrR CѰ@(basenameDCѰ@+parse-start0T4C@@AѰ@%where C@@ACѰ@7digest-in-path-optionalOFwCѰ@%CHMODC@@AѰ@,gettimeofdayh>C@@ACGO_Ѱ@2shell-success-null^CѰ@,builtin-testsCѰ"@$scan~C@@AѰ&@%EMPTYF7C@@ACGO_Ѱ*@&export))XCа.@#neg#,CѰ2@'absname!¡CѰ6@*OS_RELEASE CѰ1@)NOSCANNER Ig_CѰ>@0xterm-escape-end CѰB@3ALLOW_EMPTY_SUBDIRS C@@AѰF@(nonempty 5d@%Group),C@@AѰB@0prompt-invisible)DC@@ACѰF@,builtin-find*35qCѰJ@%rmdir)!>C@@AѰN@#set*GbC@@ACGѰR@'fprintf*ECѰV@'defined*fCѰZ@&setenv*C@@AѰ^@*map-values*C@@ACѰb@+object-find+CѰf@'fprintv+ 5C@@AѰj@'map-add+ARC@@ACGOѰn@#Fun+CѰr@(unsetenv+{0CѰv@#sub+f#CѰz@5find-in-path-optional+\XC@@AѰ~@$glob+fhC@@ACѰ@&accept+XCѰ@%write+iaC@@AѰ@,cmp-versions+؞C@@ACGѰ@%array,CѰ@.parse-nonassoc,iqCѰ@&switch+$hC@@AѰ@#try,t=C@@ACѰ@%lockf,SCѰ@&Object,ւIC@@AѰ@$link-86C@@ACGO_Ѱ@$grep3."Cа@-shell-success0CѰ@&getvar.CѰ@*map-remove.~CѰ@%RMDIR.W^CѰ@1set-close-on-exec-qC@@AѰ @,out-contents.mC@@ACѰƠ@(eprintln.CѰʠ@(obj-find.@C@@AѰΠ@-ocaml-escaped.؎C@@ACGѰҠ@'Channel/KCѰ֠@%lstat/nhCѰڠ@&rewind/#9C@@AѰޠ@#Int/t1C@@ACѰ@+html-string0qCѰ@%raise/C@@AѰ@'PATHSEP0Z GC@@ACGOѰ@$File1CѰ@.exists-in-path1@xCѰ@#LIB0gCѰ@&OSTYPE0C@@AѰ@'history1=HC@@ACѰ@ #find-ocaml-targets-in-path-optional1kCѰ@%Shell1H(C@@AѰ @(fprintln1?C@@ACGѰ@(Location2PCѰ@&rename2hhvCѰ@(NODENAME2$C@@AѰ@'connect2|fC@@ACѰ@4find-targets-in-path2TCѰ"@%Token2SC@@AѰ&@&applya3 uC@@ACGO_а*@$stat7fCѰ.@,history-file5CѰ2@#Map4TCѰ6@*lex-search3¥NCѰ:@&system3nC@@AѰ>@)Exception4SSQC@@ACѰB@&STDLIB4nCѰF@$lnot4ckMC@@AѰJ@%dirof5ѴC@@ACGѰN@&CDPATH6CѰR@7add-project-directories5%CѰV@%apply51hC@@AѰZ@=find-targets-in-path-optional55C@@ACѰ^@*object-add7iA1CѰb@#PID6)AC@аf@'MODULES7*C@@ABѰj@'obj-map7C@@ADHPѰn@#TAB;fCаr@&filter:e,CѰv@$stop8CѰz@(Sequence8T.CѰ~@'obj-mem7C@@AѰ@'extends8C@@ACѰ@,InOutChannel:lCѰ@&Number93HC@а@*EXECUTABLE9@~C@@ABѰ@'println:sC@@ADHѰ@&fsubst;uCѰ@*stat-reset:7CѰ@)getpwents:#C@@AѰ@%Array; 1C@@ACѰ@%float;rhCѰ@4UnbuildableException;AC@@AѰ@&fprint;@C@@ACGPа@.string-escaped<~CѰ@'map-memlJCѰ@$tell=gCѰ@6ABORT_ON_COMMAND_ERROR=V C@@AѰ@'TARGETS>R.C@@ACѰ@'EXT_ASM>CѰ@$test>C@@Aа@$jobs?1W2CѰ@)fprintvln?C@@AѰ@$join?vCа@2find-build-targets?]C@@Aа @0target-is-proper?k%C@@ACEGKOWhyYyа@,Omake_ir_ast)or@@@@а@)OMakefile&,$A@AoC@ARAoARAo=parser.ml parser.mli lexer.ml@ B@BcC@BRBcBRBc1parser lexer calc@C@CVC@CRCV$calcD@DeC@DRDe DRDe3../ocamldep.wrapper@$E@E[&EHE[)EHE[3../OMakefile.shared@@-F@FLCB@*MenhirAuto.3B@3G@GaAB?%false:G@GH=G@GH(.DEFAULT@AG`GaDGJGaGGJGWBC@JGWGaBC_@'OSUFFIXj@@AB*@ @@[@menhir-20130116/demos/calc/.omakedb0000644000175000017500000001037312075533602015706 0ustar stephsteph.odb]Lom{]Q # pѰ@%build%omake #lib$godi%local#usr@@@ ݪݪа! vZvZ"KJKJ#$Zf$Zf$>Ψp>Ψp%  )Common.om(&语^语^0Ma@$A@@@_j ܒ/A #A #A0fQ'ҁҕа36%demos:&menhir>#devB(fpottierF$home.J::KL NNM,b.,b.NZVZV0OMakefile.sharedPۃۃ02 @AJR@_j4 &/A #/A #/A0L٥$`کf+Ѱ\_$calc-c}a }a *parser.mlie5V5V02@AJR@_j &/A &/A &/A0Wݻ5ݣѰq&calc.ouE.E.02@AJR@_j &/A &/A &/A0@PEΌWX>D @@AѰ$)OMakefile҄N҄N树02@AJR@_j ܒ/A ZJA ZJA0{n\Ry,1n@@ACѰ3*parser.cmi602@AJR@_j &/A &/A &/A0Ez=52DJI4ѰC'lexer.oᬧᬧ02@AJR@_j &/A &/A &/A008aNP&3@@AѰR(parser.o  02@AJR@_j &/A &/A &/A0yObk@&1]@@ACGаa)OMakerootd+]+]02@AJR@_j ܒ/A ZJA ZJA0jR9w1Ѱ͠q!.Ѱt02 ACJR@_j &/A /A /A&squashѰݠ-Pervasives.omOO0MW@$A@@@_j' ܒ/A #A #A0 W9J]/Q^@@AѰ젰(calc.cmxjj02@AJR@_j &/A &/A &/A0y Pnr 8,@@ACѰ*parser.mly䠑02@AJR@_j, /A ZJA ZJA0B0|а $calcսս02@AJR@_j &/A &/A &/A00~8F8F02@AJR@_j /A ZJA ZJA0=`x dvѰJ)parser.mlN.`O .`O 02@AJR@_j/ &/A &/A &/A0c%g@@@AѰY)lexer.cmx]9902@AJR@_j &/A &/A &/A0le]i6@аh (lexer.mll;U;U02@AJR@_j /A /A /A0϶a2? 'y@@ABDHLTѰxC{ |+-3+-36@Ѱ+-3@@@AѰ7.`O ,@@A0]ʁ{Ѱ**parser.cmow%w%@@A@аb @@A@@ABѰC443@@@A@@A0G@ 'Ѱw%@@A@s@@A@@ABаC2E.xx, @@@Ana@@A0$X˼\ +wѰP(calc.cmo,6%,6%#W)lexer.cmoTُTُ@@A@B@b@@AB@@A@B@@AаC02Q2Q*K@@@Ag0Wݻ5ݣ@@A04юcG8CU&@@@ACѰɠCs9+`+`Α@@@Ai]@@A0H]GF,/7"G@@A@~@@A@@AB@@AE@@@A@@@@@@A@@@0bs@a@@ABѰS4@а堤C@@@A@@AC0PZaߍ Printf.fprintf stderr "%s%!" msg | Parser.Error -> Printf.fprintf stderr "At offset %d: syntax error.\n%!" (Lexing.lexeme_start linebuf) done menhir-20130116/demos/Makefile0000644000175000017500000000032012075533602015030 0ustar stephstephDEMOS := calc calc-two calc-param .PHONY: all clean all: @for i in $(DEMOS) ; do \ $(MAKE) -C $$i ; \ done clean: /bin/rm -f *~ .*~ *.omc @for i in $(DEMOS) ; do \ $(MAKE) -C $$i clean ; \ done menhir-20130116/demos/OMakefile.shared0000644000175000017500000001353012075533602016423 0ustar stephsteph# This OMakefile is designed for projects that use Objective Caml, # ocamllex, menhir, and alphaCaml. # This OMakefile is meant to be included within a host OMakefile # that defines the following variables: # # GENERATED : a list of the source (.ml and .mli) files # that are generated (by invoking ocamllex # or menhir) # # MODULES : a list of the modules (without extension) # that should be linked into the executable # program. Order is significant. # # EXECUTABLE : the base name of the executables that should # be produced. Suffixes $(BSUFFIX) and $(OSUFFIX) # will be added to distinguish the bytecode and # native code versions. # ------------------------------------------------------------------------- # The host OMakefile can override the following default settings. # Menhir. if $(not $(defined MENHIR)) MENHIR = menhir export # Parser generation flags. if $(not $(defined MENHIR_FLAGS)) MENHIR_FLAGS = --infer -v export # Include directives for compilation and for linking. if $(not $(defined PREFIXED_OCAMLINCLUDES)) PREFIXED_OCAMLINCLUDES = export # Bytecode compilation flags. if $(not $(defined BFLAGS)) BFLAGS = -dtypes -g export # Native code compilation flags. if $(not $(defined OFLAGS)) OFLAGS = -dtypes export # Menhir-suggested compilation flags. if $(not $(defined SUGG_FLAGS)) SUGG_FLAGS = $(shell $(MENHIR) $(MENHIR_FLAGS) --suggest-comp-flags 2>/dev/null) export # Bytecode link-time flags. if $(not $(defined BLNKFLAGS)) BLNKFLAGS = -g export # Menhir-suggested bytecode link-time flags. if $(not $(defined SUGG_BLNKFLAGS)) SUGG_BLNKFLAGS = $(shell $(MENHIR) $(MENHIR_FLAGS) --suggest-link-flags-byte 2>/dev/null) export # Native code link-time flags. if $(not $(defined OLNKFLAGS)) OLNKFLAGS = export # Menhir-suggested native code link-time flags. if $(not $(defined SUGG_OLNKFLAGS)) SUGG_OLNKFLAGS = $(shell $(MENHIR) $(MENHIR_FLAGS) --suggest-link-flags-opt 2>/dev/null) export # Lexer generation flags. if $(not $(defined LGFLAGS)) LGFLAGS = export # Suffix appended to the name of the bytecode executable. if $(not $(defined BSUFFIX)) BSUFFIX = .byte export # Suffix appended to the name of the native code executable. if $(not $(defined OSUFFIX)) OSUFFIX = export # Access paths for the tools. if $(not $(defined OCAML)) OCAML = ocaml export if $(not $(defined OCAMLC)) if $(which ocamlfind) OCAMLC = ocamlfind ocamlc export elseif $(which ocamlc.opt) OCAMLC = ocamlc.opt export else OCAMLC = ocamlc export export if $(not $(defined OCAMLOPT)) if $(which ocamlfind) OCAMLOPT = ocamlfind ocamlopt export elseif $(which ocamlopt.opt) OCAMLOPT = ocamlopt.opt export else OCAMLOPT = ocamlopt export export if $(not $(defined OCAMLDEP)) if $(which ocamlfind) OCAMLDEP = ocamlfind ocamldep export elseif $(which ocamldep.opt) OCAMLDEP = ocamldep.opt export else OCAMLDEP = ocamldep export export if $(not $(defined OCAMLDEPWRAPPER)) OCAMLDEPWRAPPER = ./ocamldep.wrapper export if $(not $(defined OCAMLLEX)) OCAMLLEX = ocamllex export if $(not $(defined ALPHACAML)) ALPHACAML = alphaCaml export # ---------------------------------------------------------------- # Define an ocamldep wrapper that creates fake generated files so that # ocamldep can see that these files exist (or are supposed to exist). # This is required to work around ocamldep's brokenness. WrapScanner(command) = $(OCAML) $(OCAMLDEPWRAPPER) $(GENERATED) - $(command) # ---------------------------------------------------------------- # Dependencies. .SCANNER: %.cmi: %.mli WrapScanner($(OCAMLDEP) $<) .SCANNER: %.cmx %.cmo %.o: %.ml WrapScanner($(OCAMLDEP) $<) # ---------------------------------------------------------------- # Compilation. %.cmi: %.mli $(OCAMLC) $(PREFIXED_OCAMLINCLUDES) $(BFLAGS) $(SUGG_FLAGS) -c $< %.cmo: %.ml $(OCAMLC) $(PREFIXED_OCAMLINCLUDES) $(BFLAGS) $(SUGG_FLAGS) -c $< %.cmx %.o: %.ml $(OCAMLOPT) $(PREFIXED_OCAMLINCLUDES) $(OFLAGS) $(SUGG_FLAGS) -c $< %.ml: %.mll $(OCAMLLEX) $(LGFLAGS) $< %.ml %.mli: %.mla $(ALPHACAML) $< # ---------------------------------------------------------------- # Linking. $(EXECUTABLE)$(OSUFFIX): $(addsuffix .cmx, $(MODULES)) $(OCAMLOPT) -o $@ $(PREFIXED_OCAMLINCLUDES) $(OLNKFLAGS) $(SUGG_FLAGS) $(SUGG_OLNKFLAGS) $+ $(EXECUTABLE)$(BSUFFIX): $(addsuffix .cmo, $(MODULES)) $(OCAMLC) -o $@ $(PREFIXED_OCAMLINCLUDES) $(BLNKFLAGS) $(SUGG_FLAGS) $(SUGG_BLNKFLAGS) $+ # ---------------------------------------------------------------- # Menhir: multiple file projects. MenhirMulti(target, sources, options) = TARGETS = $(file $(target).ml $(target).mli) SOURCES = $(file $(sources)) $(TARGETS): $(SOURCES) $(MENHIR) --ocamlc "$(OCAMLC) $(PREFIXED_OCAMLINCLUDES) $(BFLAGS)" $(MENHIR_FLAGS) --base $(target) $(options) $(SOURCES) .SCANNER: $(TARGETS): $(SOURCES) WrapScanner($(MENHIR) --ocamldep "$(OCAMLDEP)" --depend --base $(target) $(options) $(SOURCES)) # Menhir: single file projects. MenhirMono(target, options) = MenhirMulti($(target), $(target).mly, $(options)) # Menhir: automatic single file projects. # This causes every .mly file to be viewed as a single file project. MenhirAuto() = foreach (target, $(glob *.mly)) MenhirMono($(removesuffix $(target)), $(EMPTY)) # ---------------------------------------------------------------- .PHONY: clean clean: /bin/rm -f $(EXECUTABLE)$(BSUFFIX) $(EXECUTABLE)$(OSUFFIX) $(GENERATED) /bin/rm -f *.cmi *.cmx *.cmo *.o *~ .*~ *.automaton *.conflicts *.annot menhir-20130116/demos/ocamldep.wrapper0000755000175000017500000000574312075533602016577 0ustar stephsteph#!/usr/bin/env ocaml (* ocamldep.wrapper ... - runs the in an environment where all of the listed appear to exist. The files are created, if required, before the command is run, and destroyed afterwards. *) (* An earlier version of this script acquired a lock, so as to prevent multiple instances of this script from interfering with one another. However, this did not prevent interference between this script and some other process (e.g., the ocaml compiler) which creates files. So, the lock has been removed. My suggestion is to never use this script in a concurrent setting. If you wish to use parallel make, then you might be able to use a two-level Makefile approach: first, compute all dependencies in a sequential manner; then, construct all targets in a parallel manner. *) #load "unix.cma" open Printf (* Parse the command line. The arguments that precede "-" are understood as file names and stored in the list [xs]. The arguments that follow "-" are understood as a command and stored in [command]. *) let xs = ref [] let command = ref "" let verbose = ref false let rec loop accumulating i = if i = Array.length Sys.argv then () else if accumulating then (* [accumulating] is [true] as long as we have not found the "-" marker *) match Sys.argv.(i) with | "-v" -> verbose := true; loop true (i+1) | "-" -> (* We have found the marker. The next parameter should be the name of the raw [ocamldep] command. Copy it to the command (unquoted -- apparently some shells do not permit quoting a command name). *) let i = i + 1 in assert (i < Array.length Sys.argv); command := Sys.argv.(i); (* Stop accumulating file names. Copy the remaining arguments into the command. *) loop false (i+1) | _ -> (* Continue accumulating file names in [xs]. *) xs := Sys.argv.(i) :: !xs; loop true (i+1) else begin (* After we have found the "-" marker, the remaining arguments are copied (quoted) into the command. *) command := sprintf "%s %s" !command (Filename.quote Sys.argv.(i)); loop false (i+1) end let () = loop true 1 (* Create the required files if they don't exist, run the command, then destroy any files that we have created. *) let rec loop = function | [] -> if !verbose then fprintf stderr "ocamldep.wrapper: running %s\n" !command; Sys.command !command | x :: xs -> if Sys.file_exists x then loop xs else begin if !verbose then fprintf stderr "ocamldep.wrapper: creating fake %s\n" x; let c = open_out x in close_out c; let exitcode = loop xs in if Sys.file_exists x then begin try if !verbose then fprintf stderr "ocamldep.wrapper: removing fake %s..." x; Sys.remove x; if !verbose then fprintf stderr " ok\n" with Sys_error _ -> if !verbose then fprintf stderr " failed\n" end; exitcode end let () = exit (loop !xs) menhir-20130116/demos/Makefile.shared0000644000175000017500000002125312075533602016305 0ustar stephsteph# This Makefile is shared between all demos. It is our suggestion # of a canonical Makefile for projects that use Objective Caml, # ocamllex, and menhir. It requires a recent version of GNU make # (older versions do not correctly implement $(eval)). # ---------------------------------------------------------------- # This Makefile is meant to be included within a host Makefile # that defines the following variables: # # GENERATED : a list of the source (.ml and .mli) files # that should be generated by invoking ocamllex # or menhir # # MODULES : a list of the modules (without extension) # that should be linked into the executable # program. Order is significant. # # EXECUTABLE : the base name of the executables that should # be produced. Suffixes $(BSUFFIX) and $(OSUFFIX) # will be added to distinguish the bytecode and # native code versions. # ---------------------------------------------------------------- # The host Makefile can also override the following settings: # Menhir. ifndef MENHIR MENHIR := menhir endif # Parser generation flags. ifndef PGFLAGS PGFLAGS := --infer -v endif # Include directives for compilation and for linking. ifndef INCLUDE INCLUDE := endif # Bytecode compilation flags. ifndef BFLAGS BFLAGS := endif # Native code compilation flags. ifndef OFLAGS OFLAGS := endif # Menhir-suggested compilation flags. ifndef SUGG_FLAGS SUGG_FLAGS := $(shell $(MENHIR) $(PGFLAGS) --suggest-comp-flags 2>/dev/null) endif # Bytecode link-time flags. ifndef BLNKFLAGS BLNKFLAGS := endif # Menhir-suggested bytecode link-time flags. ifndef SUGG_BLNKFLAGS SUGG_BLNKFLAGS := $(shell $(MENHIR) $(PGFLAGS) --suggest-link-flags-byte 2>/dev/null) endif # Native code link-time flags. ifndef OLNKFLAGS OLNKFLAGS := endif # Menhir-suggested native code link-time flags. ifndef SUGG_OLNKFLAGS SUGG_OLNKFLAGS := $(shell $(MENHIR) $(PGFLAGS) --suggest-link-flags-opt 2>/dev/null) endif # Suffix appended to the name of the bytecode executable. ifndef BSUFFIX BSUFFIX := .byte endif # Suffix appended to the name of the native code executable. ifndef OSUFFIX OSUFFIX := endif # Access paths. ifndef OCAML OCAML := ocaml endif ifndef OCAMLC OCAMLC := $(shell if ocamlfind ocamlc -v >/dev/null 2>&1 ; \ then echo ocamlfind ocamlc ; \ elif ocamlc.opt -v >/dev/null 2>&1 ; \ then echo ocamlc.opt ; \ else echo ocamlc ; fi) endif ifndef OCAMLOPT OCAMLOPT := $(shell if ocamlfind ocamlopt -v >/dev/null 2>&1 ; \ then echo ocamlfind ocamlopt ; \ elif ocamlopt.opt -v >/dev/null 2>&1 ; \ then echo ocamlopt.opt ; \ else echo ocamlopt ; fi) endif ifndef OCAMLDEP OCAMLDEP := $(shell if ocamlfind ocamldep -version >/dev/null 2>&1 ; \ then echo ocamlfind ocamldep ; \ elif ocamldep.opt -version >/dev/null 2>&1 ; \ then echo ocamldep.opt ; \ else echo ocamldep ; fi) endif ifndef OCAMLDEPWRAPPER OCAMLDEPWRAPPER:= ./ocamldep.wrapper endif ifndef OCAMLLEX OCAMLLEX := ocamllex endif # A list of targets that do not require dependency analysis. # This variable should be set by the host before including # this Makefile. COLD += clean # ---------------------------------------------------------------- # Do not destroy the generated source files. .SECONDARY: $(GENERATED) # ---------------------------------------------------------------- # Linking. $(EXECUTABLE)$(OSUFFIX): $(MODULES:=.cmx) $(OCAMLOPT) -o $@ $(INCLUDE) $(OLNKFLAGS) $(SUGG_FLAGS) $(SUGG_OLNKFLAGS) $^ $(EXECUTABLE)$(BSUFFIX): $(MODULES:=.cmo) $(OCAMLC) -o $@ $(INCLUDE) $(BLNKFLAGS) $(SUGG_FLAGS) $(SUGG_BLNKFLAGS) $^ # ---------------------------------------------------------------- # Compiling. # We make the .ml and .mli files generated by ocamllex and menhir # unwritable, so as to prevent them from being edited by mistake. %.cmi: %.mli %.mli.d $(OCAMLC) $(INCLUDE) $(BFLAGS) $(SUGG_FLAGS) -c $< %.cmo: %.ml %.ml.d $(OCAMLC) $(INCLUDE) $(BFLAGS) $(SUGG_FLAGS) -c $< %.cmx %.o: %.ml %.ml.d $(OCAMLOPT) $(INCLUDE) $(OFLAGS) $(SUGG_FLAGS) -c $< %.ml: %.mll @if [ -f $@ ] ; then /bin/chmod +w $@ ; fi $(OCAMLLEX) $< @/bin/chmod -w $@ # ---------------------------------------------------------------- # Computing dependencies. # We associate a tiny Makefile, whose name ends in .d, with every # source file; it contains dependency information only. For an .ml or # .mli file, we create an .ml.d or .mli.d file by invoking ocamldep. # For an .mll file, we create an .ml.d file by invoking ocamllex first # (this is implicit), then ocamldep. # When it finds a reference to module M, ocamldep checks whether the # files m.ml and m.mli exist before deciding which dependency to # report. If m.ml and m.mli are generated from m.mll or m.mly, then # there is a potential problem: because ocamldep is invoked before # these files are created, it cannot see them. The standard solution # until now was to invoke ocamllex and ocamlyacc first to create all # generated files, and run ocamldep next. This approach does not work # with menhir when the --infer switch is on: menhir cannot be invoked # first because it needs type information found in .cmi (or .cmo or # .cmx) files. Our solution is to wrap ocamldep in a script that # creates fake generated files m.ml and m.mli to let ocamldep know that # these files are supposed to exist. This is somewhat tricky, but appears # to work. %.ml.d: %.ml $(OCAML) $(OCAMLDEPWRAPPER) $(GENERATED) - $(OCAMLDEP) $< > $@ %.mli.d: %.mli $(OCAML) $(OCAMLDEPWRAPPER) $(GENERATED) - $(OCAMLDEP) $< > $@ # All .d files are included within the present Makefile, so it they # do not exist, they are created first, and the dependencies that # they contain are then taken into account. # A .SECONDARY directive is used to ensure that the auxiliary # Makefiles are never removed. Otherwise, make could create # one, remove one, create one, remove one, ... (We have observed # this.) ifeq ($(findstring $(MAKECMDGOALS),$(COLD)),) ifneq ($(strip $(wildcard *.mli)),) .SECONDARY: $(patsubst %.mli,%.mli.d,$(wildcard *.mli)) -include $(patsubst %.mli,%.mli.d,$(wildcard *.mli)) endif ifneq ($(strip $(wildcard *.ml)),) .SECONDARY: $(patsubst %.ml,%.ml.d,$(wildcard *.ml)) -include $(patsubst %.ml,%.ml.d,$(wildcard *.ml)) endif ifneq ($(strip $(wildcard *.mll)),) .SECONDARY: $(patsubst %.mll,%.ml.d,$(wildcard *.mll)) -include $(patsubst %.mll,%.ml.d,$(wildcard *.mll)) endif endif # ---------------------------------------------------------------- # Support for menhir projects. # The macro menhir_multimodule defines support for multi-module grammar # specifications, that is, for producing parsers out of multiple # source files. The parameter $(1) is the name of the parser that # should be produced; the parameter $(2) is the list of .mly source # files; the parameter $(3) contains extra options to be passed to # menhir. # The dependency file is named $(1).d and created by invoking menhir # --depend. define menhir_multimodule $(1).ml $(1).mli: $(2) $(1).d @if [ -f $(1).ml ] ; then /bin/chmod +w $(1).ml ; fi @if [ -f $(1).mli ] ; then /bin/chmod +w $(1).mli ; fi $(MENHIR) --ocamlc "$(OCAMLC)" $(PGFLAGS) --base $(1) $(3) $(2) @/bin/chmod -w $(1).ml $(1).mli $(1).d: $(2) @if [ -f $(1).ml ] ; then /bin/chmod +w $(1).ml ; fi @if [ -f $(1).mli ] ; then /bin/chmod +w $(1).mli ; fi $(OCAML) $(OCAMLDEPWRAPPER) $(GENERATED) - \ $(MENHIR) --ocamldep "$(OCAMLDEP)" --depend --base $(1) $(3) $(2) > $$@ ifeq ($$(findstring $$(MAKECMDGOALS),$$(COLD)),) .SECONDARY: $(1).d -include $(1).d endif endef # The macro menhir_monomodule defines support for a mono-module grammar # specification. The parameter $(1) is the name of the parser that # should be produced; the source file is $(1).mly. The parameter $(2) # contains extra options to be passed to menhir. define menhir_monomodule $(eval $(call menhir_multimodule,$(1),$(1).mly,$(2))) endef # Neither of the two macros above is invoked by default, as it is not # known here which is appropriate. It is up to the client to invoke # these macros with suitable parameters. The auxiliary Makefile.auto # implements the common case where every .mly file is a mono-module # grammar. # ---------------------------------------------------------------- .PHONY: clean clean:: /bin/rm -f $(EXECUTABLE)$(BSUFFIX) $(EXECUTABLE)$(OSUFFIX) $(GENERATED) /bin/rm -f *.cmi *.cmx *.cmo *.o *~ .*~ *.automaton *.conflicts *.annot /bin/rm -f *.d menhir-20130116/demos/calc-two/0002755000175000017500000000000012075533602015110 5ustar stephstephmenhir-20130116/demos/calc-two/common.mly0000644000175000017500000000225112075533602017121 0ustar stephsteph/**************************************************************************/ /* */ /* Menhir */ /* */ /* Franois Pottier, INRIA Rocquencourt */ /* Yann Rgis-Gianas, PPS, Universit Paris Diderot */ /* */ /* Copyright 2005-2008 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the Q Public License version 1.0, with the change */ /* described in file LICENSE. */ /* */ /**************************************************************************/ (* This partial grammar specification defines the grammar's entry point to be an expression, followed with an end-of-line token. *) %start main %% main: | e = expr EOL { e } menhir-20130116/demos/calc-two/Makefile0000644000175000017500000000112412075533602016544 0ustar stephsteph# Add --table on the next line to use Menhir's table-based back-end. PGFLAGS := --infer GENERATED := tokens.ml tokens.mli algebraic.ml algebraic.mli reverse.ml reverse.mli lexer.ml MODULES := algebraic reverse lexer calc EXECUTABLE := calc OCAMLDEPWRAPPER := ../ocamldep.wrapper include ../Makefile.shared $(eval $(call menhir_monomodule,tokens,--only-tokens)) $(eval $(call menhir_multimodule,algebraic,tokens.mly algebraic.mly common.mly,--external-tokens Tokens)) $(eval $(call menhir_multimodule,reverse,tokens.mly reverse.mly common.mly,--external-tokens Tokens)) menhir-20130116/demos/calc-two/OMakefile0000644000175000017500000000067312075533602016673 0ustar stephstephGENERATED = tokens.ml tokens.mli algebraic.ml algebraic.mli reverse.ml reverse.mli lexer.ml MODULES = algebraic reverse lexer calc EXECUTABLE = calc OCAMLDEPWRAPPER = ../ocamldep.wrapper include ../OMakefile.shared MenhirMono(tokens,--only-tokens) MenhirMulti(algebraic,tokens.mly algebraic.mly,--external-tokens Tokens) MenhirMulti(reverse,tokens.mly reverse.mly,--external-tokens Tokens) .DEFAULT: $(EXECUTABLE)$(OSUFFIX) menhir-20130116/demos/calc-two/README0000644000175000017500000000116712075533602015773 0ustar stephstephThis tiny program reads arithmetic expressions from the standard input channel. Each expression is expected to be complete when the current line ends. Its value is then displayed on the standard output channel. In this version, there is a single lexer, but there are two parsers, one for expressions in algebraic (that is, infix) notation, one for expressions in reverse Polish (that is, postfix) notation. One of the two parsers is selected at runtime via a command line switch. This demo illustrates how to build two parsers that share a single set of tokens (see tokens.mly) and that share some productions (see common.mly). menhir-20130116/demos/calc-two/reverse.mly0000644000175000017500000000267612075533602017317 0ustar stephsteph/**************************************************************************/ /* */ /* Menhir */ /* */ /* Franois Pottier, INRIA Rocquencourt */ /* Yann Rgis-Gianas, PPS, Universit Paris Diderot */ /* */ /* Copyright 2005-2008 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the Q Public License version 1.0, with the change */ /* described in file LICENSE. */ /* */ /**************************************************************************/ (* This partial grammar specification defines the syntax of expressions in reverse Polish notation. Parentheses are meaningless, and unary minus is not supported (some other symbol than MINUS would be required in order to avoid an ambiguity). *) %% %public expr: | i = INT { i } | e1 = expr e2 = expr PLUS { e1 + e2 } | e1 = expr e2 = expr MINUS { e1 - e2 } | e1 = expr e2 = expr TIMES { e1 * e2 } | e1 = expr e2 = expr DIV { e1 / e2 } menhir-20130116/demos/calc-two/algebraic.mly0000644000175000017500000000300312075533602017536 0ustar stephsteph/**************************************************************************/ /* */ /* Menhir */ /* */ /* Franois Pottier, INRIA Rocquencourt */ /* Yann Rgis-Gianas, PPS, Universit Paris Diderot */ /* */ /* Copyright 2005-2008 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the Q Public License version 1.0, with the change */ /* described in file LICENSE. */ /* */ /**************************************************************************/ (* This partial grammar specification defines the syntax of expressions in algebraic notation. *) %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %% %public expr: | i = INT { i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20130116/demos/calc-two/OMakeroot0000644000175000017500000000035512075533602016734 0ustar stephsteph# # Load the standard configuration. # open build/Common # # The command-line variables are defined *after* the # standard configuration has been loaded. # DefineCommandVars() # # Include the OMakefile in this directory. # .SUBDIRS: . menhir-20130116/demos/calc-two/lexer.mll0000644000175000017500000000273612075533602016743 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) { open Tokens exception Error of string } rule line = parse | ([^'\n']* '\n') as line { line } | eof { exit 0 } and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | eof { exit 0 } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20130116/demos/calc-two/calc.ml0000644000175000017500000000350212075533602016342 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) let algebraic = ref true let () = Arg.parse [ "--algebraic", Arg.Set algebraic, " Use algebraic (that is, infix) notation"; "--reverse", Arg.Clear algebraic, " Use reverse Polish (that is, postfix) notation"; ] (fun _ -> ()) (Printf.sprintf "Usage: %s " Sys.argv.(0)) let main = if !algebraic then Algebraic.main else Reverse.main let () = let stdinbuf = Lexing.from_channel stdin in while true do (* Read line by line. *) let linebuf = Lexing.from_string (Lexer.line stdinbuf) in try (* Run the parser on a single line of input. *) Printf.printf "%d\n%!" (main Lexer.token linebuf) with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg | Algebraic.Error | Reverse.Error -> Printf.fprintf stderr "At offset %d: syntax error.\n%!" (Lexing.lexeme_start linebuf) done menhir-20130116/demos/calc-two/tokens.mly0000644000175000017500000000220112075533602017127 0ustar stephsteph/**************************************************************************/ /* */ /* Menhir */ /* */ /* Franois Pottier, INRIA Rocquencourt */ /* Yann Rgis-Gianas, PPS, Universit Paris Diderot */ /* */ /* Copyright 2005-2008 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the Q Public License version 1.0, with the change */ /* described in file LICENSE. */ /* */ /**************************************************************************/ (* This partial grammar specification defines the set of tokens. *) %token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %% menhir-20130116/demos/calc-param/0002755000175000017500000000000012075533602015377 5ustar stephstephmenhir-20130116/demos/calc-param/Makefile0000644000175000017500000000066512075533602017044 0ustar stephsteph# Add --table on the next line to use Menhir's table-based back-end. PGFLAGS := --infer GENERATED := parser.ml parser.mli lexer.ml tokens.ml tokens.mli MODULES := parser lexer calc EXECUTABLE := calc OCAMLDEPWRAPPER := ../ocamldep.wrapper include ../Makefile.shared $(eval $(call menhir_monomodule,tokens,--only-tokens)) $(eval $(call menhir_multimodule,parser,tokens.mly parser.mly,--external-tokens Tokens)) menhir-20130116/demos/calc-param/OMakefile0000644000175000017500000000033412075533602017154 0ustar stephstephGENERATED = parser.ml parser.mli lexer.ml MODULES = parser lexer calc EXECUTABLE = calc OCAMLDEPWRAPPER = ../ocamldep.wrapper include ../OMakefile.shared MenhirAuto() .DEFAULT: $(EXECUTABLE)$(OSUFFIX) menhir-20130116/demos/calc-param/README0000644000175000017500000000054512075533602016261 0ustar stephstephIn this variant of the calc demo, the parser's semantic actions are parameterized over a structure, called [Semantics], which defines how numbers should be interpreted. The parser is later instantiated with floating-point numbers, so the calculator actually performs floating-point evaluation -- but the grammar specification is independent of this detail. menhir-20130116/demos/calc-param/parser.mly0000644000175000017500000000362412075533602017421 0ustar stephsteph/**************************************************************************/ /* */ /* Menhir */ /* */ /* Franois Pottier, INRIA Rocquencourt */ /* Yann Rgis-Gianas, PPS, Universit Paris Diderot */ /* */ /* Copyright 2005-2008 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the Q Public License version 1.0, with the change */ /* described in file LICENSE. */ /* */ /**************************************************************************/ (* These are the functions that we need in order to write our semantic actions. *) %parameter number val ( + ): number -> number -> number val ( - ): number -> number -> number val ( * ): number -> number -> number val ( / ): number -> number -> number val ( ~-): number -> number end> (* The parser no longer returns an integer; instead, it returns an abstract number. *) %start main (* Let us open the [Semantics] module, so as to make all of its operations available in the semantic actions. *) %{ open Semantics %} %% main: | e = expr EOL { e } expr: | i = INT { inject i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20130116/demos/calc-param/OMakeroot0000644000175000017500000000035512075533602017223 0ustar stephsteph# # Load the standard configuration. # open build/Common # # The command-line variables are defined *after* the # standard configuration has been loaded. # DefineCommandVars() # # Include the OMakefile in this directory. # .SUBDIRS: . menhir-20130116/demos/calc-param/lexer.mll0000644000175000017500000000273612075533602017232 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) { open Tokens exception Error of string } rule line = parse | ([^'\n']* '\n') as line { line } | eof { exit 0 } and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | eof { exit 0 } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20130116/demos/calc-param/calc.ml0000644000175000017500000000353312075533602016635 0ustar stephsteph(**************************************************************************) (* *) (* Menhir *) (* *) (* Franois Pottier, INRIA Rocquencourt *) (* Yann Rgis-Gianas, PPS, Universit Paris Diderot *) (* *) (* Copyright 2005-2008 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the change *) (* described in file LICENSE. *) (* *) (**************************************************************************) (* Let's do floating-point evaluation, for a change. *) module FloatSemantics = struct type number = float let inject = float_of_int let ( + ) = ( +. ) let ( - ) = ( -. ) let ( * ) = ( *. ) let ( / ) = ( /. ) let (~- ) = (~-. ) end (* Let us now specialize our parameterized parser. *) module FloatParser = Parser.Make(FloatSemantics) (* The rest is as usual. *) let () = let stdinbuf = Lexing.from_channel stdin in while true do (* Read line by line. *) let linebuf = Lexing.from_string (Lexer.line stdinbuf) in try (* Run the parser on a single line of input. *) Printf.printf "%.1f\n%!" (FloatParser.main Lexer.token linebuf) with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg | FloatParser.Error -> Printf.fprintf stderr "At offset %d: syntax error.\n%!" (Lexing.lexeme_start linebuf) done menhir-20130116/demos/calc-param/tokens.mly0000644000175000017500000000255612075533602017433 0ustar stephsteph/**************************************************************************/ /* */ /* Menhir */ /* */ /* Franois Pottier, INRIA Rocquencourt */ /* Yann Rgis-Gianas, PPS, Universit Paris Diderot */ /* */ /* Copyright 2005-2008 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the Q Public License version 1.0, with the change */ /* described in file LICENSE. */ /* */ /**************************************************************************/ (* We want the tokens to be independent of the [Semantics] parameter, so we declare them here, in a separate file, as opposed to within [parser.mly]. *) %token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %%