menhir-20151112/0000755000175000017500000000000012621170077012173 5ustar mehdimehdimenhir-20151112/Makefile0000644000175000017500000001506612621170073013637 0ustar mehdimehdi# This is the main Makefile that is shipped as part of the source package. # Keep in mind that the hierarchy that is shipped is not identical to the # hierarchy within the git repository. Some sub-directories are not shipped. # The documentation (manual.pdf, menhir.1) is pre-built and stored at the root. # This Makefile can also be used directly in the repository. In that case, # the documentation and demos are not installed. # The hierarchy that is shipped includes: # demos # menhir.1 # manual.pdf # src # Makefile (this one) # ---------------------------------------------------------------------------- # The following variables must/can be configured. ifndef PREFIX $(error Please define PREFIX) endif ifndef TARGET TARGET := native endif # ---------------------------------------------------------------------------- # By default, we attempt to use ocamlfind (if present in the PATH), but it # is possible to prevent that externally by setting USE_OCAMLFIND to false. ifndef USE_OCAMLFIND USE_OCAMLFIND = ocamlfind ocamlc -v >/dev/null 2>&1 endif # ---------------------------------------------------------------------------- # Installation paths. # These may be overridden from outside; e.g., our opam package description # provides its own values of docdir, libdir, and mandir. bindir := $(PREFIX)/bin docdir := $(PREFIX)/share/doc/menhir libdir := $(PREFIX)/share/menhir mandir := $(PREFIX)/share/man/man1 MANS := menhir.1 DOCS := manual.pdf demos MLYLIB := src/standard.mly # ---------------------------------------------------------------------------- # A few settings differ on Windows versus Unix. # If the compiler is MSVC, then the name of the executable file ends in .exe, # and object file names end in .obj instead of .o. ifneq (,$(shell ocamlc -config | grep -E "ccomp_type: msvc")) MENHIREXE := menhir.exe OBJ := obj # LIBSUFFIX := lib else MENHIREXE := menhir OBJ := o # LIBSUFFIX := a endif # The path $(installation_libdir), which is recorded in src/installation.ml (see # below), must sometimes be translated using cygpath. # This one is tricky. To summarize, if I understood correctly, we can assume # that Cygwin always exists when Menhir is compiled and installed (because # executing a Makefile, like this one, requires Cygwin), but we cannot assume # that Menhir will be executed under Cygwin. If the OCaml compiler is # configured to produce a Cygwin executable, then, yes, Cygwin is there at # execution time, so path translation is not necessary (and should not be # performed). On the other hand, if the OCaml compiler is configured to # produce a native Windows executable, then Cygwin is not there at execution # time and path translation is required. In summary, path translation must be # performed if "os_type" is "Win32" or "Win64", and must not be performed if # "os_type" is "Cygwin" or "Unix". ifneq (,$(shell ocamlc -config | grep -E "os_type: (Win32|Win64)")) installation_libdir := $(shell cygpath -m $(libdir)) else installation_libdir := $(libdir) endif # ------------------------------------------------------------------------- # The names of the modules in MenhirLib are obtained by reading the # non-comment lines in menhirLib.mlpack. MENHIRLIB_MODULES := $(shell grep -ve "^[ \t\n\r]*\#" src/menhirLib.mlpack) # ---------------------------------------------------------------------------- # The directory where things are built. BUILDDIR := src/_stage2 # ---------------------------------------------------------------------------- # Compilation. .PHONY: all install uninstall all: # Installation time settings are recorded within src/installation.ml. # This file is recreated every time so as to avoid becoming stale. @ rm -f src/installation.ml @ echo "let libdir = \"$(installation_libdir)\"" > src/installation.ml @ if $(USE_OCAMLFIND) ; then \ echo "let ocamlfind = true" >> src/installation.ml ; \ else \ echo "let ocamlfind = false" >> src/installation.ml ; \ fi # Compile the library modules and the Menhir executable. @ $(MAKE) -C src library bootstrap # The source file menhirLib.ml is created by concatenating all of the source # files that make up MenhirLib. This file is not needed to compile Menhir or # MenhirLib. It is installed at the same time as MenhirLib and is copied by # Menhir when the user requests a self-contained parser (one that is not # dependent on MenhirLib). @ echo "Creating menhirLib.ml" @ rm -f $(BUILDDIR)/menhirLib.ml @ for m in $(MENHIRLIB_MODULES) ; do \ echo "module $$m = struct" >> $(BUILDDIR)/menhirLib.ml ; \ cat src/$$m.ml >> $(BUILDDIR)/menhirLib.ml ; \ echo "end" >> $(BUILDDIR)/menhirLib.ml ; \ done # The source file menhirLib.mli is created in the same way. If a module # does not have an .mli file, then we assume that its .ml file contains # type (and module type) definitions only, so we copy it instead of the # (non-existent) .mli file. @ echo "Creating menhirLib.mli" @ rm -f $(BUILDDIR)/menhirLib.mli @ for m in $(MENHIRLIB_MODULES) ; do \ echo "module $$m : sig" >> $(BUILDDIR)/menhirLib.mli ; \ if [ -f src/$$m.mli ] ; then \ cat src/$$m.mli >> $(BUILDDIR)/menhirLib.mli ; \ else \ cat src/$$m.ml >> $(BUILDDIR)/menhirLib.mli ; \ fi ; \ echo "end" >> $(BUILDDIR)/menhirLib.mli ; \ done # ------------------------------------------------------------------------- # The files that should be installed as part of menhirLib. MENHIRLIB := menhirLib.mli menhirLib.ml menhirLib.cmi menhirLib.cmo ifneq ($(TARGET),byte) MENHIRLIB := $(MENHIRLIB) menhirLib.cmx menhirLib.$(OBJ) endif # ---------------------------------------------------------------------------- # Installation. install: # Install the executable. mkdir -p $(bindir) install $(BUILDDIR)/menhir.$(TARGET) $(bindir)/$(MENHIREXE) # Install the library. mkdir -p $(libdir) install -m 644 $(MLYLIB) $(libdir) @if $(USE_OCAMLFIND) ; then \ echo Installing MenhirLib via ocamlfind. ; \ ocamlfind install menhirLib src/META $(patsubst %,$(BUILDDIR)/%,$(MENHIRLIB)) ; \ else \ echo Installing MenhirLib manually. ; \ install -m 644 $(patsubst %,$(BUILDDIR)/%,$(MENHIRLIB)) $(libdir) ; \ fi # Install the documentation, if it has been built. if [ -f manual.pdf ] ; then \ mkdir -p $(docdir) ; \ mkdir -p $(mandir) ; \ cp -r $(DOCS) $(docdir) ; \ cp -r $(MANS) $(mandir) ; \ fi uninstall: rm -rf $(bindir)/$(MENHIREXE) rm -rf $(libdir) @if $(USE_OCAMLFIND) ; then \ echo Un-installing MenhirLib via ocamlfind. ; \ ocamlfind remove menhirLib ; \ fi rm -rf $(docdir) rm -rf $(mandir)/$(MANS) menhir-20151112/src/0000755000175000017500000000000012621170074012757 5ustar mehdimehdimenhir-20151112/src/grammar.ml0000644000175000017500000000026512621170073014741 0ustar mehdimehdi(* This module runs the grammar functor on the grammar produced by the front-end. *) include GrammarFunctor.Make(struct let grammar = Front.grammar let verbose = true end) menhir-20151112/src/listMonad.ml0000644000175000017500000000154112621170073015243 0ustar mehdimehditype 'a m = 'a list let return x = [ x ] let bind l f = List.flatten (List.map f l) let ( >>= ) l f = bind l f (* 1. (return x) >>= f == f x bind [ x ] f = List.flatten (List.map f [ x ]) = f x 2. m >>= return == m bind l return = List.flatten (List.map (fun x -> [ x ]) (x1::x2::..::xn)) = List.flatten ([x1]::...::[xn]) = x1::...::xn = l 3. (m >>= f) >>= g == m >>= (\x -> f x >>= g) bind (bind l f) g = List.flatten (List.map g (List.flatten (List.map f (x1::...::xn)))) = List.flatten (List.map g (f x1 :: f x2 :: ... :: f xn)) = List.flatten (List.map g ([fx1_1; fx1_2 ... ] :: [fx2_1; ... ] :: ...)) = List.flatten ([ g fx1_1; g fx_1_2 ... ] :: [ g fx_2_1; ... ] ...) = List.flatten (List.map (fun x -> List.flatten (List.map g (f x))) l) = bind l (fun x -> bind (f x) g) *) menhir-20151112/src/interface.mli0000644000175000017500000000150012621170073015415 0ustar mehdimehdi(* This module defines the interface of the generated parser. *) (* This is the [Error] exception. *) val excname: string val excdef: IL.excdef (* The type of the entry point for the start symbol [nt]. *) val entrytypescheme: UnparameterizedSyntax.grammar -> string -> IL.typescheme (* The name of the interpreter sub-module, when the table back-end is used. *) val interpreter: string (* The type ['a checkpoint], defined in the interpreter sub-module. *) val checkpoint: IL.typ -> IL.typ (* The name of the sub-module that contains the incremental entry points. *) val incremental: string (* The name of the sub-module that contains the inspection API. *) val inspection: string (* This writes the interface of the generated parser to the [.mli] file. *) val write: UnparameterizedSyntax.grammar -> unit -> unit menhir-20151112/src/lexdep.mll0000644000175000017500000000213312621170073014744 0ustar mehdimehdi(* This code analyzes the output of [ocamldep] and returns the list of [.cmi] files that the [.cmo] file depends on. *) { open Lexing exception Error of string let fail lexbuf = raise (Error (Printf.sprintf "failed to make sense of ocamldep's output (character %d).\n" lexbuf.lex_curr_p.pos_cnum) ) } let newline = ('\n' | '\r' | "\r\n") let whitespace = ( ' ' | '\t' | ('\\' newline) ) let entrychar = [^ '\n' '\r' '\t' ' ' '\\' ':' ] let entry = ((entrychar+ as basename) ".cm" ('i' | 'o' | 'x') as filename) (* [main] recognizes a sequence of lines, where a line consists of an entry, followed by a colon, followed by a list of entries. *) rule main = parse | eof { [] } | entry whitespace* ":" { let bfs = collect [] lexbuf in ((basename, filename), bfs) :: main lexbuf } | _ { fail lexbuf } (* [collect] recognizes a list of entries, separated with spaces and ending in a newline. *) and collect bfs = parse | whitespace+ entry { collect ((basename, filename) :: bfs) lexbuf } | whitespace* newline { bfs } | _ | eof { fail lexbuf } menhir-20151112/src/yaccDriver.ml0000644000175000017500000000106312621170073015403 0ustar mehdimehdi(* The module [Driver] serves to offer a unified API to the parser, which could be produced by either ocamlyacc or Menhir. *) (* This is the ocamlyacc-specific driver. There is nothing special to do. We handle syntax errors in a minimalistic manner. This error handling code will be exercised only if there is a syntax error in [fancy-parser.mly], during stage 2 of the bootstrap process. *) let grammar lexer lexbuf = try Parser.grammar lexer lexbuf with Parsing.Parse_error -> Error.error (Positions.lexbuf lexbuf) "syntax error." menhir-20151112/src/sentenceLexer.mll0000644000175000017500000000406112621170073016271 0ustar mehdimehdi(* This lexer is used to read the sentences provided on the standard input channel when [--interpret] is enabled. *) { open Lexing open SentenceParser open Grammar (* A short-hand. *) let error2 lexbuf = Error.error (Positions.two lexbuf.lex_start_p lexbuf.lex_curr_p) } let newline = ('\010' | '\013' | "\013\010") let whitespace = [ ' ' '\t' ';' ] let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222'] let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *) let autocomment = "##" [^'\010''\013']* newline let comment = "#" [^'\010''\013']* newline let skip = newline whitespace* newline rule lex = parse (* An identifier that begins with an lowercase letter is considered a non-terminal symbol. It should be a start symbol. *) | (lowercase identchar *) as lid { try let nt = Nonterminal.lookup lid in if StringSet.mem lid Front.grammar.UnparameterizedSyntax.start_symbols then NONTERMINAL (nt, lexbuf.lex_start_p, lexbuf.lex_curr_p) else error2 lexbuf "\"%s\" is not a start symbol." lid with Not_found -> error2 lexbuf "\"%s\" is not a known non-terminal symbol." lid } (* An identifier that begins with an uppercase letter is considered a terminal symbol. *) | (uppercase identchar *) as uid { try TERMINAL (Terminal.lookup uid, lexbuf.lex_start_p, lexbuf.lex_curr_p) with Not_found -> error2 lexbuf "\"%s\" is not a known terminal symbol." uid } (* Whitespace is ignored. *) | whitespace { lex lexbuf } (* The end of a line is translated to [EOL]. *) | newline { new_line lexbuf; EOL } (* An auto-generated comment is ignored. *) | autocomment { new_line lexbuf; lex lexbuf } (* A manually-written comment is preserved. *) | comment as c { new_line lexbuf; COMMENT c } (* The end of file is translated to [EOF]. *) | eof { EOF } (* A colon. *) | ':' { COLON } | _ { error2 lexbuf "unexpected character." } menhir-20151112/src/FixSolver.mli0000644000175000017500000000126012621170073015401 0ustar mehdimehdimodule Make (M : Fix.IMPERATIVE_MAPS) (P : sig include Fix.PROPERTY val union: property -> property -> property end) : sig (* Variables and constraints. A constraint is an inequality between a constant or a variable, on the left-hand side, and a variable, on the right-hand side. *) type variable = M.key type property = P.property (* An imperative interface, where we create a new constraint system, and are given three functions to add constraints and (once we are done adding) to solve the system. *) val create: unit -> (property -> variable -> unit) * (variable -> variable -> unit) * (unit -> (variable -> property)) end menhir-20151112/src/infer.mli0000644000175000017500000000114412621170073014564 0ustar mehdimehdi(* [ntvar symbol] is the name of the type variable associated with a nonterminal symbol. *) val ntvar: string -> string (* [infer grammar] analyzes the grammar [grammar] and returns a new grammar, augmented with a [%type] declaration for every nonterminal symbol. The [ocamlc] compiler is used to infer types. *) val infer: UnparameterizedSyntax.grammar -> UnparameterizedSyntax.grammar (* [depend grammar] prints (on the standard output channel) the Objective Caml dependencies induced by the semantic actions. Then, it exits the program. *) val depend: UnparameterizedSyntax.grammar -> 'a menhir-20151112/src/parserMessages.messages0000644000175000017500000002321212621170073017473 0ustar mehdimehdi# ---------------------------------------------------------------------------- grammar: UID Either a declaration or %% is expected at this point. # ---------------------------------------------------------------------------- grammar: HEADER UID Either another declaration or %% is expected at this point. # ---------------------------------------------------------------------------- grammar: TYPE UID grammar: TYPE OCAMLTYPE TYPE grammar: TYPE OCAMLTYPE UID PREC grammar: TYPE OCAMLTYPE UID LPAREN TYPE grammar: TYPE OCAMLTYPE UID COMMA TYPE grammar: TYPE OCAMLTYPE UID LPAREN UID UID grammar: TYPE OCAMLTYPE UID LPAREN UID COMMA TYPE grammar: TYPE OCAMLTYPE UID PLUS RPAREN grammar: ON_ERROR_REDUCE TYPE # %type and %on_error_reduce are both followed with clist(strict_actual), # so they are not distinguished in the automaton. Ill-formed declaration. Examples of well-formed declarations: %type expression %type date time %type option(date) %on_error_reduce expression %on_error_reduce date time %on_error_reduce option(date) # ---------------------------------------------------------------------------- grammar: TOKEN TYPE grammar: TOKEN OCAMLTYPE TYPE grammar: TOKEN UID STAR grammar: TOKEN UID COMMA TYPE Ill-formed %token declaration. Examples of well-formed declarations: %token FOO %token DOT SEMICOLON %token LID UID # ---------------------------------------------------------------------------- grammar: START UID grammar: START OCAMLTYPE LEFT grammar: START LID UID grammar: START LID COMMA UID Ill-formed %start declaration. A start symbol must begin with a lowercase letter. Examples of well-formed declarations: %start program %start expression phrase %start date time # ---------------------------------------------------------------------------- grammar: RIGHT TYPE grammar: RIGHT UID STAR grammar: RIGHT UID COMMA TYPE Ill-formed precedence declaration. Examples of well-formed declarations: %left PLUS %left PLUS MINUS %nonassoc unary_minus %right CONCAT # ---------------------------------------------------------------------------- grammar: PARAMETER UID Ill-formed %parameter declaration. Examples of well-formed declarations: %parameter # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT TYPE Either a rule or %% is expected at this point. # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON ACTION TYPE Either another rule or %% is expected at this point. # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT PUBLIC TYPE grammar: PERCENTPERCENT INLINE TYPE grammar: PERCENTPERCENT PUBLIC INLINE TYPE Ill-formed rule. %inline, %public, or a non-terminal symbol is expected at this point. Examples of well-formed rules: %public option(X): { None } | x = X { Some x } %inline clist(X): xs = separated_nonempty_list(COMMA?, X) { xs } %public %inline pair(X, Y): x = X; y = Y { (x, y) } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID UID Ill-formed rule. Either a parenthesized, comma-delimited list of formal parameters or a colon is expected at this point. Examples of well-formed rules: main: e = expr EOL { e } expr: i = INT { i } | e1 = expr PLUS e2 = expr { e1 + e2 } option(X): { None } | x = X { Some x } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID LPAREN TYPE grammar: PERCENTPERCENT UID LPAREN UID UID grammar: PERCENTPERCENT UID LPAREN UID COMMA TYPE Ill-formed rule. A comma-delimited list of formal parameters is expected at this point. Examples of well-formed rules: option(X): { None } | x = X { Some x } pair(X, Y): x = X; y = Y { (x, y) } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON TYPE grammar: PERCENTPERCENT UID COLON BAR TYPE grammar: PERCENTPERCENT UID COLON ACTION BAR TYPE grammar: PERCENTPERCENT UID COLON UID BAR TYPE Ill-formed rule. A list of productions is expected at this point. Examples of well-formed rules: main: e = expr EOL { e } expr: i = INT { i } | e1 = expr PLUS e2 = expr { e1 + e2 } symbol: s = LID | s = UID { s } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON UID TYPE grammar: PERCENTPERCENT UID COLON LID TYPE grammar: PERCENTPERCENT UID COLON LID EQUAL TYPE grammar: PERCENTPERCENT UID COLON UID PLUS TYPE grammar: PERCENTPERCENT UID COLON LID EQUAL UID PLUS TYPE Ill-formed production. A production is a sequence of producers, followed with a semantic action. Examples of well-formed producers: expr option(COMMA) separated_list(COMMA, expr) e = expr ds = declaration* es = list(terminated(expr, SEMI)) es = list(e = expr SEMI { e }) xs = list(x = var { Some x } | WILDCARD { None }) # The following sentences are similar, but have an open parenthesis. # Suggesting that a parenthesis could be closed seems a safe bet. These # sentences are otherwise tricky. In front of us could be many things (comma, # closing parenthesis, identifier, modifier, %prec keyword, etc.). We expect # to reduce to actual or lax_actual. Let's just back up to a safe level of # abstraction and say this is an ill-formed production. grammar: PERCENTPERCENT UID COLON UID LPAREN UID TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN UID STAR TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN UID LPAREN LID RPAREN TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN LID TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN ACTION BAR TYPE Ill-formed production. Maybe you meant to close a parenthesis at this point? A production is a sequence of producers, followed with a semantic action. Examples of well-formed producers: expr option(COMMA) separated_list(COMMA, expr) e = expr ds = declaration* es = list(terminated(expr, SEMI)) es = list(e = expr SEMI { e }) xs = list(x = var { Some x } | WILDCARD { None }) # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON UID LPAREN ACTION UID # In the non-canonical automaton, this is a tricky case where, looking at the # description of the state, it seems that only COMMA and RPAREN can follow # here. But in fact, other tokens are possible, such as BAR, simply because # they will NOT take us into this state. In the canonical automaton, the list # of possibilities is explicit in the lookahead sets. grammar: PERCENTPERCENT UID COLON UID LPAREN ACTION PREC UID UID # In the first case above, we may expect a %prec annotation, whereas in the # second case above, we have just seen it. In the error message, we merge # these two situations and do not mention the possibility of a %prec # annotation. Either another production | ... or a comma or a closing parenthesis is expected at this point. # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON PREC TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN ACTION PREC TYPE grammar: PERCENTPERCENT UID COLON ACTION PREC TYPE Ill-formed %prec annotation. A symbol is expected at this point. Examples of well-formed annotations: expr: MINUS e = expr %prec UMINUS { -e } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON UID LPAREN TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN UID COMMA TYPE Ill-formed rule. A comma-delimited list of actual parameters is expected at this point. Examples of well-formed rules: call: f = callee LPAREN args = separated_list(COMMA, expr) RPAREN { f, args } list(X): { [] } | x = X; xs = list(X) { x :: xs } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON PREC LID UID Ill-formed rule. Either a semantic action { ... } or another production | ... is expected here. Examples of well-formed rules: expr: MINUS e = expr %prec UMINUS { -e } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID LPAREN UID RPAREN BAR Ill-formed rule. A colon is expected at this point. Examples of well-formed rules: option(X): { None } | x = X { Some x } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON ACTION PREC UID TYPE Either another rule or another production | ... is expected at this point. Examples of well-formed rules: option(X): { None } | x = X { Some x } # ---------------------------------------------------------------------------- grammar: TYPE OCAMLTYPE UID LPAREN UID LPAREN TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN UID LPAREN TYPE Ill-formed list of actual parameters. A comma-delimited list of actual parameters is expected at this point. Examples of well-formed actual parameters: expr expr+ option(expr) separated_list(COMMA, expr) # Omitting the fact that an anonymous rule is a valid actual parameter... # Also omitting the subtle distinctions between lax_actual, actual, etc. # ---------------------------------------------------------------------------- grammar: TYPE OCAMLTYPE UID LPAREN UID PLUS UID Ill-formed list of actual parameters. A modifier, a closing parenthesis, or a comma is expected at this point. A modifier is * or + or ?. Examples of well-formed actual parameters: expr expr+ option(expr) separated_list(COMMA, expr) # ------------------------------------------------------------------------------ # Local Variables: # mode: shell-script # End: menhir-20151112/src/stretch.mli0000644000175000017500000000200512621170073015132 0ustar mehdimehdi(* A stretch is a fragment of a source file. It holds the file name, the line number, and the line count (that is, the length) of the fragment. These are used for generating #line directives when the fragment is copied to an output file. It also holds the textual content of the fragment, as a string. The [raw_content] field holds the text that was found in the source file, while the [content] field holds the same text after transformation by the lexer (which may substitute keywords, insert padding, insert parentheses, etc.). See [Lexer.mk_stretch] and its various call sites in [Lexer]. *) type t = { stretch_filename : string; stretch_linenum : int; stretch_linecount : int; stretch_raw_content : string; stretch_content : string; stretch_keywords : Keyword.keyword list } (* An Objective Caml type is either a stretch (if it was found in some source file) or a string (if it was inferred via [Infer]). *) type ocamltype = | Declared of t | Inferred of string menhir-20151112/src/coqBackend.mli0000644000175000017500000000015012621170073015507 0ustar mehdimehdi(* The coq code generator. *) module Run (T: sig end) : sig val write_all: out_channel -> unit end menhir-20151112/src/gMap.ml0000644000175000017500000001321212621170073014173 0ustar mehdimehdimodule type S = sig (* Keys are assumed to have a natural total order. *) type key (* The type of maps whose data have type ['a]. *) type 'a t (* The empty map. *) val empty: 'a t (* [lookup k m] looks up the value associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. *) val lookup: key -> 'a t -> 'a val find: key -> 'a t -> 'a (* [add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding already exists for [k], it is overridden. *) val add: key -> 'a -> 'a t -> 'a t (* [strict_add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding already exists for [k] then [Unchanged] is raised. *) exception Unchanged val strict_add: key -> 'a -> 'a t -> 'a t (* [fine_add decide k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding from [k] to [d0] already exists, then the resulting map contains a binding from [k] to [decide d0 d]. *) type 'a decision = 'a -> 'a -> 'a val fine_add: 'a decision -> key -> 'a -> 'a t -> 'a t (* [mem k m] tells whether the key [k] appears in the domain of the map [m]. *) val mem: key -> 'a t -> bool (* [singleton k d] returns a map whose only binding is from [k] to [d]. *) val singleton: key -> 'a -> 'a t (* [is_empty m] returns [true] if and only if the map [m] defines no bindings at all. *) val is_empty: 'a t -> bool (* [is_singleton s] returns [Some x] if [s] is a singleton containing [x] as its only element; otherwise, it returns [None]. *) val is_singleton: 'a t -> (key * 'a) option (* [cardinal m] returns [m]'s cardinal, that is, the number of keys it binds, or, in other words, the cardinal of its domain. *) val cardinal: 'a t -> int (* [choose m] returns an arbitrarily chosen binding in [m], if [m] is nonempty, and raises [Not_found] otherwise. *) val choose: 'a t -> key * 'a (* [lookup_and_remove k m] looks up the value [v] associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. The call returns the value [v], together with the map [m] deprived from the binding from [k] to [v]. *) val lookup_and_remove: key -> 'a t -> 'a * 'a t val find_and_remove: key -> 'a t -> 'a * 'a t (* [remove k m] is the map [m] deprived from any binding for [k]. *) val remove: key -> 'a t -> 'a t (* [union m1 m2] returns the union of the maps [m1] and [m2]. Bindings in [m2] take precedence over those in [m1]. *) val union: 'a t -> 'a t -> 'a t (* [fine_union decide m1 m2] returns the union of the maps [m1] and [m2]. If a key [k] is bound to [x1] (resp. [x2]) within [m1] (resp. [m2]), then [decide] is called. It is passed [x1] and [x2], and must return the value that shall be bound to [k] in the final map. *) val fine_union: 'a decision -> 'a t -> 'a t -> 'a t (* [iter f m] invokes [f k x], in turn, for each binding from key [k] to element [x] in the map [m]. Keys are presented to [f] in increasing order. *) val iter: (key -> 'a -> unit) -> 'a t -> unit (* [fold f m seed] invokes [f k d accu], in turn, for each binding from key [k] to datum [d] in the map [m]. Keys are presented to [f] in increasing order. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. *) val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b (* [fold_rev] performs exactly the same job as [fold], but presents keys to [f] in the opposite order. *) val fold_rev: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b (* [filter f m] returns a copy of the map [m] where only the bindings that satisfy [f] have been retained. *) val filter: (key -> 'a -> bool) -> 'a t -> 'a t (* It is valid to evaluate [iter2 f m1 m2] if and only if [m1] and [m2] have equal domains. Doing so invokes [f k x1 x2], in turn, for each key [k] bound to [x1] in [m1] and to [x2] in [m2]. Bindings are presented to [f] in increasing order. *) val iter2: (key -> 'a -> 'b -> unit) -> 'a t -> 'b t -> unit (* [map f m] returns the map obtained by composing the map [m] with the function [f]; that is, the map $k\mapsto f(m(k))$. *) val map: ('a -> 'b) -> 'a t -> 'b t (* [endo_map] is similar to [map], but attempts to physically share its result with its input. This saves memory when [f] is the identity function. *) val endo_map: ('a -> 'a) -> 'a t -> 'a t (* If [dcompare] is an ordering over data, then [compare dcompare] is an ordering over maps. *) val compare: ('a -> 'a -> int) -> 'a t -> 'a t -> int (* A map's domain is a set. Thus, to be able to perform operations on domains, we need set operations, provided by the [Domain] sub-module. The two-way connection between maps and their domains is given by two additional functions, [domain] and [lift]. [domain m] returns [m]'s domain. [lift f s] returns the map $k\mapsto f(k)$, where $k$ ranges over a set of keys [s]. *) module Domain : GSet.S with type element = key val domain: 'a t -> Domain.t val lift: (key -> 'a) -> Domain.t -> 'a t (* [corestrict m d] performs a co-restriction of the map [m] to the domain [d]. That is, it returns the map $k\mapsto m(k)$, where $k$ ranges over all keys bound in [m] but \emph{not} present in [d]. *) val corestrict: 'a t -> Domain.t -> 'a t end menhir-20151112/src/front.mli0000644000175000017500000000074012621170073014612 0ustar mehdimehdi(* This module drives the front-end. It opens and parses the input files, which yields a number of partial grammars. It joins these grammars, expands them to get rid of parameterized nonterminals, and performs reachability analysis. This yields a single unified grammar. It then performs type inference. This yields the grammar that the back-end works with (often through the interface provided by module [Grammar]). *) val grammar: UnparameterizedSyntax.grammar menhir-20151112/src/Maps.mli0000644000175000017500000000442512621170073014366 0ustar mehdimehdi(* This module defines three signatures for association maps, together with a number of conversion functors. *) (* Following the convention of the ocaml standard library, the [find] functions raise [Not_found] when the key is not a member of the domain of the map. By contrast, [get] returns an option. *) (* BEGIN PERSISTENT_MAPS *) module type PERSISTENT_MAPS = sig type key type 'data t val empty: 'data t val add: key -> 'data -> 'data t -> 'data t val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* END PERSISTENT_MAPS *) (* BEGIN IMPERATIVE_MAPS *) module type IMPERATIVE_MAPS = sig type key type 'data t val create: unit -> 'data t val clear: 'data t -> unit val add: key -> 'data -> 'data t -> unit val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* END IMPERATIVE_MAPS *) (* BEGIN IMPERATIVE_MAP *) module type IMPERATIVE_MAP = sig type key type data val set: key -> data -> unit val get: key -> data option end (* END IMPERATIVE_MAP *) (* An implementation of persistent maps can be made to satisfy the interface of imperative maps. An imperative map is represented as a persistent map, wrapped within a reference cell. *) module PersistentMapsToImperativeMaps (M : PERSISTENT_MAPS) : IMPERATIVE_MAPS with type key = M.key and type 'data t = 'data M.t ref (* An implementation of imperative maps can be made to satisfy the interface of a single imperative map. This map is obtained via a single call to [create]. *) module ImperativeMapsToImperativeMap (M : IMPERATIVE_MAPS) (D : sig type data end) : IMPERATIVE_MAP with type key = M.key and type data = D.data (* An implementation of imperative maps as arrays is possible if keys are consecutive integers. *) module ArrayAsImperativeMaps (K : sig val n: int end) : IMPERATIVE_MAPS with type key = int and type 'data t = 'data option array (* An implementation of imperative maps as a hash table. *) module HashTableAsImperativeMaps (H : Hashtbl.HashedType) : IMPERATIVE_MAPS with type key = H.t (* A trivial implementation of equality and hashing. *) module TrivialHashedType (T : sig type t end) : Hashtbl.HashedType with type t = T.t menhir-20151112/src/settings.ml0000644000175000017500000003152612621170073015157 0ustar mehdimehdiopen Printf (* ------------------------------------------------------------------------- *) (* Prepare for parsing the command line. *) type token_type_mode = | TokenTypeAndCode (* produce the definition of the [token] type and code for the parser *) | TokenTypeOnly (* produce the type definition only *) | CodeOnly of string (* produce the code only; import token type from specified module *) let token_type_mode = ref TokenTypeAndCode let tokentypeonly () = token_type_mode := TokenTypeOnly let codeonly m = if String.capitalize m <> m then begin (* Not using module [Error] to avoid a circular dependency. *) fprintf stderr "Error: %s is not a valid Objective Caml module name.\n" m; exit 1 end; token_type_mode := CodeOnly m let version = ref false type construction_mode = | ModeCanonical (* --canonical: canonical Knuth LR(1) automaton *) | ModeInclusionOnly (* --no-pager : states are merged when there is an inclusion relationship *) | ModePager (* normal mode: states are merged as per Pager's criterion *) | ModeLALR (* --lalr : states are merged as in an LALR generator, i.e. as soon as they have the same LR(0) core *) (* Note that --canonical overrides --no-pager. If both are specified, the result is a canonical automaton. *) let construction_mode = ref ModePager let explain = ref false let base = ref "" let dump = ref false let follow = ref false let graph = ref false let trace = ref false let noprefix = ref false type print_mode = | PrintNormal | PrintUnitActions | PrintUnitActionsUnitTokens type preprocess_mode = | PMNormal (* preprocess and continue *) | PMOnlyPreprocess of print_mode (* preprocess, print grammar, stop *) let preprocess_mode = ref PMNormal let recovery = ref false let v () = dump := true; explain := true let infer = ref false let inline = ref true type ocamldep_mode = | OMNone (* do not invoke ocamldep *) | OMRaw (* invoke ocamldep and echo its raw output *) | OMPostprocess (* invoke ocamldep and postprocess its output *) let depend = ref OMNone let code_inlining = ref true let comment = ref false let ocamlc = ref "ocamlc" let ocamldep = ref "ocamldep" let logG, logA, logC = ref 0, ref 0, ref 0 let timings = ref false let filenames = ref StringSet.empty let no_stdlib = ref false let stdlib_path = ref Installation.libdir let insert name = filenames := StringSet.add name !filenames let interpret = ref false let interpret_show_cst = ref false let interpret_error = ref false let table = ref false let inspection = ref false let coq = ref false let coq_no_complete = ref false let coq_no_actions = ref false let strict = ref false let fixedexc = ref false type suggestion = | SuggestNothing | SuggestCompFlags | SuggestLinkFlags of string (* "cmo" or "cmx" *) | SuggestWhereIsMenhirLibSource let suggestion = ref SuggestNothing let ignored_unused_tokens = ref StringSet.empty let ignore_unused_token t = ignored_unused_tokens := StringSet.add t !ignored_unused_tokens let ignore_all_unused_tokens = ref false let list_errors = ref false let compile_errors = ref None let set_compile_errors filename = compile_errors := Some filename let compare_errors = ref [] let add_compare_errors filename = compare_errors := filename :: !compare_errors let update_errors = ref None let set_update_errors filename = update_errors := Some filename let echo_errors = ref None let set_echo_errors filename = echo_errors := Some filename let options = Arg.align [ "--base", Arg.Set_string base, " Specifies a base name for the output file(s)"; "--canonical", Arg.Unit (fun () -> construction_mode := ModeCanonical), " Construct a canonical Knuth LR(1) automaton"; "--comment", Arg.Set comment, " Include comments in the generated code"; "--compare-errors", Arg.String add_compare_errors, " (used twice) Compare two .messages files."; "--compile-errors", Arg.String set_compile_errors, " Compile a .messages file to OCaml code."; "--coq", Arg.Set coq, " Generate a formally verified parser, in Coq"; "--coq-no-complete", Arg.Set coq_no_complete, " Do not generate a proof of completeness"; "--coq-no-actions", Arg.Set coq_no_actions, " Ignore semantic actions in the Coq output"; "--depend", Arg.Unit (fun () -> depend := OMPostprocess), " Invoke ocamldep and display dependencies"; "--dump", Arg.Set dump, " Describe the automaton in .automaton"; "--echo-errors", Arg.String set_echo_errors, " Echo the sentences in a .messages file"; "--error-recovery", Arg.Set recovery, " (no longer supported)"; "--explain", Arg.Set explain, " Explain conflicts in .conflicts"; "--external-tokens", Arg.String codeonly, " Import token type definition from "; "--fixed-exception", Arg.Set fixedexc, " Declares Error = Parsing.Parse_error"; "--follow-construction", Arg.Set follow, " (undocumented)"; "--graph", Arg.Set graph, " Write grammar's dependency graph to .dot"; "--infer", Arg.Set infer, " Invoke ocamlc for ahead of time type inference"; "--inspection", Arg.Set inspection, " Generate the inspection API (requires --table)"; "--interpret", Arg.Set interpret, " Interpret the sentences provided on stdin"; "--interpret-show-cst", Arg.Set interpret_show_cst, " Show a concrete syntax tree upon acceptance"; "--interpret-error", Arg.Set interpret_error, " Interpret one sentence that should end in an error"; "--lalr", Arg.Unit (fun () -> construction_mode := ModeLALR), " Construct an LALR(1) automaton"; "--list-errors", Arg.Set list_errors, " Produce a list of erroneous inputs"; "--log-automaton", Arg.Set_int logA, " Log information about the automaton"; "--log-code", Arg.Set_int logC, " Log information about the generated code"; "--log-grammar", Arg.Set_int logG, " Log information about the grammar"; "--no-code-inlining", Arg.Clear code_inlining, " (undocumented)"; "--no-inline", Arg.Clear inline, " Ignore the %inline keyword."; "--no-pager", Arg.Unit (fun () -> if !construction_mode = ModePager then construction_mode := ModeInclusionOnly), " (undocumented)"; "--no-prefix", Arg.Set noprefix, " (undocumented)"; "--no-stdlib", Arg.Set no_stdlib, " Do not load the standard library"; "--ocamlc", Arg.Set_string ocamlc, " Specifies how ocamlc should be invoked"; "--ocamldep", Arg.Set_string ocamldep, " Specifies how ocamldep should be invoked"; "--only-preprocess", Arg.Unit (fun () -> preprocess_mode := PMOnlyPreprocess PrintNormal), " Print grammar and exit"; "--only-preprocess-u", Arg.Unit (fun () -> preprocess_mode := PMOnlyPreprocess PrintUnitActions), " Print grammar with unit actions and exit"; "--only-preprocess-uu", Arg.Unit (fun () -> preprocess_mode := PMOnlyPreprocess PrintUnitActionsUnitTokens), " Print grammar with unit actions & tokens and exit"; "--only-tokens", Arg.Unit tokentypeonly, " Generate token type definition only, no code"; "--raw-depend", Arg.Unit (fun () -> depend := OMRaw), " Invoke ocamldep and echo its raw output"; "--stdlib", Arg.Set_string stdlib_path, " Specify where the standard library lies"; "--strict", Arg.Set strict, " Warnings about the grammar are errors"; "--suggest-comp-flags", Arg.Unit (fun () -> suggestion := SuggestCompFlags), " Suggest compilation flags for ocaml{c,opt}"; "--suggest-link-flags-byte", Arg.Unit (fun () -> suggestion := SuggestLinkFlags "cmo"), " Suggest link flags for ocamlc"; "--suggest-link-flags-opt", Arg.Unit (fun () -> suggestion := SuggestLinkFlags "cmx"), " Suggest link flags for ocamlopt"; "--suggest-menhirLib", Arg.Unit (fun () -> suggestion := SuggestWhereIsMenhirLibSource), " Suggest where is MenhirLib"; "--table", Arg.Set table, " Use the table-based back-end"; "--timings", Arg.Set timings, " Display internal timings"; "--trace", Arg.Set trace, " Include tracing instructions in the generated code"; "--unused-token", Arg.String ignore_unused_token, " Do not warn that is unused"; "--unused-tokens", Arg.Set ignore_all_unused_tokens, " Do not warn about any unused token"; "--update-errors", Arg.String set_update_errors, " Update auto-comments in a .messages file"; "--version", Arg.Set version, " Show version number and exit"; "-b", Arg.Set_string base, " Synonymous with --base "; "-lg", Arg.Set_int logG, " Synonymous with --log-grammar"; "-la", Arg.Set_int logA, " Synonymous with --log-automaton"; "-lc", Arg.Set_int logC, " Synonymous with --log-code"; "-t", Arg.Set table, " Synonymous with --table"; "-v", Arg.Unit v, " Synonymous with --dump --explain"; ] let usage = sprintf "Usage: %s " Sys.argv.(0) (* ------------------------------------------------------------------------- *) (* Parse the command line. *) let () = Arg.parse options insert usage (* ------------------------------------------------------------------------- *) (* If required, print a version number and stop. *) let () = if !version then begin printf "menhir, version %s\n" Version.version; exit 0 end (* ------------------------------------------------------------------------- *) (* Menhir is able to suggest compile and link flags to be passed to the Objective Caml compilers. If required, do so and stop. *) (* If [--table] is not passed, no flags are necessary. If [--table] is passed, then [MenhirLib] needs to be visible (at compile time) and linked in (at link time). This is done either via [ocamlfind], if it was available at installation time, or manually. *) (* The compilation flags are in fact meant to be used both at compile- and link-time. *) let () = match !suggestion with | SuggestNothing -> () | SuggestCompFlags -> if !table then if Installation.ocamlfind then printf "-package menhirLib\n%!" else printf "-I %s\n%!" Installation.libdir; exit 0 | SuggestLinkFlags extension -> if !table then if Installation.ocamlfind then printf "-linkpkg\n%!" else printf "menhirLib.%s\n%!" extension; exit 0 | SuggestWhereIsMenhirLibSource -> if Installation.ocamlfind then let _ = Sys.command "ocamlfind query menhirLib" in () else printf "%s\n%!" Installation.libdir; exit 0 (* ------------------------------------------------------------------------- *) (* Export the settings. *) let stdlib_filename = !stdlib_path ^ "/standard.mly" let filenames = StringSet.elements !filenames let base = if !base = "" then match filenames with | [] -> fprintf stderr "%s\n" usage; exit 1 | [ filename ] -> Filename.chop_suffix filename (if !coq then ".vy" else ".mly") | _ -> fprintf stderr "Error: you must specify --base when providing multiple input files.\n"; exit 1 else !base let filenames = if !no_stdlib || !coq then filenames else stdlib_filename :: filenames let token_type_mode = !token_type_mode let construction_mode = !construction_mode let explain = !explain let dump = !dump let follow = !follow let graph = !graph let trace = !trace let () = if !recovery then begin fprintf stderr "Error: --error-recovery mode is no longer supported.\n"; exit 1 end let noprefix = !noprefix let infer = !infer let code_inlining = !code_inlining let depend = !depend let inline = !inline let comment = !comment let preprocess_mode = !preprocess_mode let ocamlc = !ocamlc let ocamldep = !ocamldep let logG, logA, logC = !logG, !logA, !logC let timings = !timings let interpret = !interpret let interpret_show_cst = !interpret_show_cst let interpret_error = !interpret_error let table = !table let inspection = !inspection let () = if inspection && not table then begin fprintf stderr "Error: --inspection requires --table.\n"; exit 1 end let coq = !coq let coq_no_complete = !coq_no_complete let coq_no_actions = !coq_no_actions let strict = !strict let fixedexc = !fixedexc let ignored_unused_tokens = !ignored_unused_tokens let ignore_all_unused_tokens = !ignore_all_unused_tokens let list_errors = !list_errors let compile_errors = !compile_errors let compare_errors = match !compare_errors with | [] -> None | [ filename2; filename1 ] -> (* LIFO *) Some (filename1, filename2) | _ -> eprintf "To compare two .messages files, please use:\n\ --compare-errors --compare-errors .\n"; exit 1 let update_errors = !update_errors let echo_errors = !echo_errors menhir-20151112/src/segment.mll0000644000175000017500000000741212621170073015132 0ustar mehdimehdi(* This lexer is used to cut an input into segments, delimited by a blank line. (More precisely, by a run of at least one blank line and zero or more comment lines.) It produces a list of segments, where each segment is represented as a pair of positions. It is stand-alone and cannot fail. *) (* The whitespace in between two segments can contain comments, and the user may wish to preserve them. For this reason, we view a run of whitespace as a segment, too, and we accompany each segment with a tag which is either [Segment] or [Whitespace]. The two kinds of segments must alternate in the list that we produce. *) { type tag = | Segment | Whitespace open Lexing } let newline = ('\010' | '\013' | "\013\010") let whitespace = [ ' ' '\t' ';' ] let comment = '#' [^'\010''\013']* newline (* In the idle state, we skip whitespace, newlines and comments (while updating the liner counter). If we reach the end of file, we return the list of all segments found so far. If we reach a non-blank non-comment character, we record its position and switch to the busy state. *) rule idle opening segments = parse | whitespace { idle opening segments lexbuf } | newline { new_line lexbuf; idle opening segments lexbuf } | comment { new_line lexbuf; idle opening segments lexbuf } | eof { let closing = lexbuf.lex_start_p in let segment = Whitespace, opening, closing in let segments = segment :: segments in List.rev segments } | _ { let closing = lexbuf.lex_start_p in let segment = Whitespace, opening, closing in let segments = segment :: segments in let opening = closing in busy segments opening false lexbuf } (* In the busy state, we skip everything, maintaining one bit [just_saw_a_newline], until [just_saw_a_newline] is true and we find a second newline. This marks the end of a segment, and we revert back to the idle state. If we reach the end of file, we consider that this is also the end of a segment. *) and busy segments opening just_saw_a_newline = parse | whitespace { busy segments opening just_saw_a_newline lexbuf } | newline { new_line lexbuf; (* The newline that we just saw is already included in the segment. This one is not included. *) let closing = lexbuf.lex_start_p in if just_saw_a_newline then let segment = Segment, opening, closing in let segments = segment :: segments in let opening = closing in idle opening segments lexbuf else busy segments opening true lexbuf } | eof { let closing = lexbuf.lex_start_p in let segment = Segment, opening, closing in let segments = segment :: segments in List.rev segments } | _ { busy segments opening false lexbuf } { (* This wrapper function reads a file, cuts it into segments, and creates a fresh lexbuf for each segment, taking care to adjust its start position. *) let segment filename : (tag * string * lexbuf) list = let content = IO.read_whole_file filename in let lexbuf = from_string content in lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = filename }; let segments : (tag * position * position) list = idle lexbuf.lex_curr_p [] lexbuf in List.map (fun (tag, startp, endp) -> let start = startp.pos_cnum in let length = endp.pos_cnum - start in let content = String.sub content start length in let lexbuf = from_string content in lexbuf.lex_start_p <- startp; lexbuf.lex_curr_p <- startp; lexbuf.lex_abs_pos <- startp.pos_cnum; (* That was tricky to find out. See [Lexing.engine]. [pos_cnum] is updated based on [buf.lex_abs_pos + buf.lex_curr_pos]. *) tag, content, lexbuf ) segments } menhir-20151112/src/stringSet.mli0000644000175000017500000000010212621170073015434 0ustar mehdimehdiinclude Set.S with type elt = string val of_list: elt list -> t menhir-20151112/src/IL.mli0000644000175000017500000001254412621170073013773 0ustar mehdimehdi(* Abstract syntax of the language used for code production. *) type interface = interface_item list and interface_item = (* Functor. Called [Make]. No functor if no parameters. Very ad hoc! *) | IIFunctor of Stretch.t list * interface (* Exception declarations. *) | IIExcDecls of excdef list (* Algebraic data type declarations (mutually recursive). *) | IITypeDecls of typedef list (* Value declarations. *) | IIValDecls of (string * typescheme) list (* Include directive. *) | IIInclude of module_type (* Submodule. *) | IIModule of string * module_type (* Comment. *) | IIComment of string and module_type = | MTNamedModuleType of string | MTWithType of module_type * string list * string * with_kind * typ | MTSigEnd of interface and with_kind = | WKNonDestructive (* = *) | WKDestructive (* := *) and excdef = { (* Name of the exception. *) excname: string; (* Optional equality. *) exceq: string option; } and typedef = { (* Name of the algebraic data type. *) typename: string; (* Type parameters. This is a list of type variable names, without the leading quote, which will be added by the pretty-printer. Can also be "_". *) typeparams: string list; (* Data constructors. *) typerhs: typedefrhs; (* Constraint. *) typeconstraint: (typ * typ) option } and typedefrhs = | TDefRecord of fielddef list | TDefSum of datadef list | TAbbrev of typ and fielddef = { (* Whether the field is mutable. *) modifiable: bool; (* Name of the field. *) fieldname: string; (* Type of the field. *) fieldtype: typescheme } and datadef = { (* Name of the data constructor. *) dataname: string; (* Types of the value parameters. *) datavalparams: typ list; (* Instantiated type parameters, if this is a GADT -- [None] if this is an ordinary ADT. *) datatypeparams: typ list option; } and typ = (* Textual Objective Caml type. *) | TypTextual of Stretch.ocamltype (* Type variable, without its leading quote. Can also be "_". *) | TypVar of string (* Application of an algebraic data type constructor. *) | TypApp of string * typ list (* Anonymous tuple. *) | TypTuple of typ list (* Arrow type. *) | TypArrow of typ * typ and typescheme = { (* Universal quantifiers, without leading quotes. *) quantifiers: string list; (* Body. *) body: typ; } and valdef = { (* Whether the value is public. Public values cannot be suppressed by the inliner. They serve as seeds for the dead code analysis. *) valpublic: bool; (* Definition's left-hand side. *) valpat: pattern; (* Value to which it is bound. *) valval: expr } and expr = (* Variable. *) | EVar of string (* Function. *) | EFun of pattern list * expr (* Function call. *) | EApp of expr * expr list (* Local definitions. This is a nested sequence of [let] definitions. *) | ELet of (pattern * expr) list * expr (* Case analysis. *) | EMatch of expr * branch list | EIfThen of expr * expr | EIfThenElse of expr * expr * expr (* Raising exceptions. *) | ERaise of expr (* Exception analysis. *) | ETry of expr * branch list (* Data construction. Tuples of length 1 are considered nonexistent, that is, [ETuple [e]] is considered the same expression as [e]. *) | EUnit | EIntConst of int | EStringConst of string | EData of string * expr list | ETuple of expr list (* Type annotation. *) | EAnnot of expr * typescheme (* Cheating on the typechecker. *) | EMagic of expr (* Obj.magic *) | ERepr of expr (* Obj.repr *) (* Records. *) | ERecord of (string * expr) list | ERecordAccess of expr * string | ERecordWrite of expr * string * expr (* Textual Objective Caml code. *) | ETextual of Stretch.t (* Comments. *) | EComment of string * expr | EPatComment of string * pattern * expr (* Arrays. *) | EArray of expr list | EArrayAccess of expr * expr and branch = { (* Branch pattern. *) branchpat: pattern; (* Branch body. *) branchbody: expr; } and pattern = (* Wildcard. *) | PWildcard (* Variable. *) | PVar of string (* Data deconstruction. Tuples of length 1 are considered nonexistent, that is, [PTuple [p]] is considered the same pattern as [p]. *) | PUnit | PData of string * pattern list | PTuple of pattern list | PRecord of (string * pattern) list (* Disjunction. *) | POr of pattern list (* Type annotation. *) | PAnnot of pattern * typ (* Module expressions. *) and modexpr = | MVar of string | MStruct of structure | MApp of modexpr * modexpr (* Structures. *) and program = structure and structure = structure_item list and structure_item = (* Functor. Called [Make]. No functor if no parameters. Very ad hoc! *) | SIFunctor of Stretch.t list * structure (* Exception definitions. *) | SIExcDefs of excdef list (* Algebraic data type definitions (mutually recursive). *) | SITypeDefs of typedef list (* Value definitions (mutually recursive or not, as per the flag). *) | SIValDefs of bool * valdef list (* Raw OCaml code. *) | SIStretch of Stretch.t list (* Sub-module definition. *) | SIModuleDef of string * modexpr (* Module inclusion. *) | SIInclude of modexpr (* Comment. *) | SIComment of string menhir-20151112/src/PackedIntArray.mli0000644000175000017500000000537712621170074016337 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* A packed integer array is represented as a pair of an integer [k] and a string [s]. The integer [k] is the number of bits per integer that we use. The string [s] is just an array of bits, which is read in 8-bit chunks. *) (* The ocaml programming language treats string literals and array literals in slightly different ways: the former are statically allocated, while the latter are dynamically allocated. (This is rather arbitrary.) In the context of Menhir's table-based back-end, where compact, immutable integer arrays are needed, ocaml strings are preferable to ocaml arrays. *) type t = int * string (* [pack a] turns an array of integers into a packed integer array. *) (* Because the sign bit is the most significant bit, the magnitude of any negative number is the word size. In other words, [pack] does not achieve any space savings as soon as [a] contains any negative numbers, even if they are ``small''. *) val pack: int array -> t (* [get t i] returns the integer stored in the packed array [t] at index [i]. *) (* Together, [pack] and [get] satisfy the following property: if the index [i] is within bounds, then [get (pack a) i] equals [a.(i)]. *) val get: t -> int -> int (* [get1 t i] returns the integer stored in the packed array [t] at index [i]. It assumes (and does not check) that the array's bit width is [1]. The parameter [t] is just a string. *) val get1: string -> int -> int (* [unflatten1 (n, data) i j] accesses the two-dimensional bitmap represented by [(n, data)] at indices [i] and [j]. The integer [n] is the width of the bitmap; the string [data] is the second component of the packed array obtained by encoding the table as a one-dimensional array. *) val unflatten1: int * string -> int -> int -> int menhir-20151112/src/unionFind.mli0000644000175000017500000000274112621170073015416 0ustar mehdimehdi(** This module implements a simple and efficient union/find algorithm. See Robert E. Tarjan, ``Efficiency of a Good But Not Linear Set Union Algorithm'', JACM 22(2), 1975. *) (** The abstraction defined by this module is a set of points, partitioned into equivalence classes. With each equivalence class, a piece of information, of abstract type ['a], is associated; we call it a descriptor. *) type 'a point (** [fresh desc] creates a fresh point and returns it. It forms an equivalence class of its own, whose descriptor is [desc]. *) val fresh: 'a -> 'a point (** [find point] returns the descriptor associated with [point]'s equivalence class. *) val find: 'a point -> 'a (** [union point1 point2] merges the equivalence classes associated with [point1] and [point2] (which must be distinct) into a single class whose descriptor is that originally associated with [point2]. *) val union: 'a point -> 'a point -> unit (** [equivalent point1 point2] tells whether [point1] and [point2] belong to the same equivalence class. *) val equivalent: 'a point -> 'a point -> bool (** [eunion point1 point2] is identical to [union], except it does nothing if [point1] and [point2] are already equivalent. *) val eunion: 'a point -> 'a point -> unit (** [redundant] maps all members of an equivalence class, but one, to [true]. *) val redundant: 'a point -> bool (** [change p d] updates the descriptor of [p] to [d]. *) val change: 'a point -> 'a -> unit menhir-20151112/src/interface.ml0000644000175000017500000001163512621170073015256 0ustar mehdimehdiopen UnparameterizedSyntax open IL open CodeBits (* -------------------------------------------------------------------------- *) (* The [Error] exception. *) let excname = "Error" let excdef = { excname = excname; exceq = (if Settings.fixedexc then Some "Parsing.Parse_error" else None); } (* -------------------------------------------------------------------------- *) (* The type of the monolithic entry point for the start symbol [symbol]. *) let entrytypescheme grammar symbol = let typ = TypTextual (ocamltype_of_start_symbol grammar symbol) in type2scheme (marrow [ arrow tlexbuf TokenType.ttoken; tlexbuf ] typ) (* -------------------------------------------------------------------------- *) (* When the table back-end is active, the generated parser contains, as a sub-module, an application of [Engine.Make]. This sub-module is named as follows. *) let interpreter = "MenhirInterpreter" let checkpoint t = TypApp (interpreter ^ ".checkpoint", [ t ]) let lr1state = "lr1state" let tlr1state a : typ = TypApp (lr1state, [a]) (* -------------------------------------------------------------------------- *) (* The name of the sub-module that contains the incremental entry points. *) let incremental = "Incremental" (* The type of the incremental entry point for the start symbol [symbol]. *) let entrytypescheme_incremental grammar symbol = let t = TypTextual (ocamltype_of_start_symbol grammar symbol) in type2scheme (marrow [ tposition ] (checkpoint t)) (* -------------------------------------------------------------------------- *) (* The name of the sub-module that contains the inspection API. *) let inspection = "Inspection" (* -------------------------------------------------------------------------- *) (* The monolithic (traditional) API: the type [token], the exception [Error], and the parser's entry points. *) let monolithic_api grammar = TokenType.tokentypedef grammar @ IIComment "This exception is raised by the monolithic API functions." :: IIExcDecls [ excdef ] :: IIComment "The monolithic API." :: IIValDecls ( StringSet.fold (fun symbol decls -> (Misc.normalize symbol, entrytypescheme grammar symbol) :: decls ) grammar.start_symbols [] ) :: [] (* -------------------------------------------------------------------------- *) (* The inspection API. *) let inspection_api grammar () = let a = "a" in (* Define the types [terminal] and [nonterminal]. *) TokenType.tokengadtdef grammar @ NonterminalType.nonterminalgadtdef grammar @ (* Include the signature that lists the inspection functions, with appropriate type instantiations. *) IIComment "The inspection API." :: IIInclude ( with_types WKDestructive "MenhirLib.IncrementalEngine.INSPECTION" [ [ a ], "lr1state", tlr1state (TypVar a); [], "production", TypApp ("production", []); [ a ], TokenType.tctokengadt, TokenType.ttokengadt (TypVar a); [ a ], NonterminalType.tcnonterminalgadt, NonterminalType.tnonterminalgadt (TypVar a) ] ) :: [] (* -------------------------------------------------------------------------- *) (* The incremental API. *) let incremental_engine () : module_type = with_types WKNonDestructive "MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE" [ [], "token", (* NOT [tctoken], which is qualified if [--external-tokens] is used *) TokenType.ttoken ] let incremental_entry_points grammar : interface = IIComment "The entry point(s) to the incremental API." :: IIModule (incremental, MTSigEnd [ IIValDecls ( StringSet.fold (fun symbol decls -> (symbol, entrytypescheme_incremental grammar symbol) :: decls ) grammar.start_symbols [] ) ]) :: [] let incremental_api grammar () : interface = IIModule ( interpreter, MTSigEnd ( IIComment "The incremental API." :: IIInclude (incremental_engine()) :: listiflazy Settings.inspection (inspection_api grammar) ) ) :: (* The entry points must come after the incremental API, because their type refers to the type [checkpoint]. *) incremental_entry_points grammar (* -------------------------------------------------------------------------- *) (* The complete interface of the generated parser. *) let interface grammar = [ IIFunctor (grammar.parameters, monolithic_api grammar @ listiflazy Settings.table (incremental_api grammar) ) ] (* -------------------------------------------------------------------------- *) (* Writing the interface to a file. *) let write grammar () = (* We have a dependency on [TokenType], which takes care of the case where [token_type_mode] is [TokenTypeOnly]. *) assert (Settings.token_type_mode <> Settings.TokenTypeOnly); let mli = open_out (Settings.base ^ ".mli") in let module P = Printer.Make (struct let f = mli let locate_stretches = None end) in P.interface (interface grammar); close_out mli menhir-20151112/src/parameterizedGrammar.ml0000644000175000017500000004647012621170073017466 0ustar mehdimehdiopen Positions open Syntax open UnparameterizedSyntax open InternalSyntax open Misc (* Inference for non terminals. *) (* Unification variables convey [variable_info] to describe the multi-equation they take part of. *) type variable_info = { mutable structure : nt_type option; mutable name : string option; mutable mark : Mark.t } (* [UnionFind] is used to improve the union and the equality test between multi-equations. *) and variable = variable_info UnionFind.point (* Types are simple types. [star] denotes the type of ground symbol (non terminal or terminal). [Arrow] describes the type of a parameterized non terminal. *) and nt_type = Arrow of variable list let star = Arrow [] (* [var_name] is a name generator for unification variables. *) let var_name = let name_counter = ref (-1) in let next_name () = incr name_counter; String.make 1 (char_of_int (97 + !name_counter mod 26)) ^ let d = !name_counter / 26 in if d = 0 then "" else string_of_int d in fun v -> let repr = UnionFind.find v in match repr.name with None -> let name = next_name () in repr.name <- Some name; name | Some x -> x (* [string_of_nt_type] is a simple pretty printer for types (they can be recursive). *) (* 2011/04/05: types can no longer be recursive, but I won't touch the printer -fpottier *) let string_of paren_fun ?paren ?colors t : string = let colors = match colors with None -> (Mark.fresh (), Mark.fresh ()) | Some cs -> cs in let s, p = paren_fun colors t in if paren <> None && p = true then "("^ s ^")" else s let rec paren_nt_type colors = function (* [colors] is a pair [white, black] *) Arrow [] -> "*", false | Arrow ins -> let args = separated_list_to_string (string_of paren_var ~paren:true ~colors) ", " ins in let args = if List.length ins > 1 then "("^ args ^ ")" else args in args^" -> *", true and paren_var (white, black) x = let descr = UnionFind.find x in if Mark.same descr.mark white then begin descr.mark <- black; var_name x, false end else begin descr.mark <- white; let s, p = match descr.structure with None -> var_name x, false | Some t -> paren_nt_type (white, black) t in if Mark.same descr.mark black then (var_name x ^ " = " ^ s, true) else (s, p) end let string_of_nt_type ?colors t = (* TEMPORARY note: always called without a [colors] argument! *) string_of ?colors paren_nt_type t let string_of_var ?colors v = (* TEMPORARY note: always called without a [colors] argument! *) string_of ?colors paren_var v (* for debugging: (* [print_env env] returns a string description of the typing environment. *) let print_env = List.iter (fun (k, (_, v)) -> Printf.eprintf "%s: %s\n" k (string_of_var v)) *) (* [occurs_check x y] checks that [x] does not occur within [y]. *) let dfs action x = let black = Mark.fresh () in let rec visit_var x = let descr = UnionFind.find x in if not (Mark.same descr.mark black) then begin descr.mark <- black; action x; match descr.structure with | None -> () | Some t -> visit_term t end and visit_term (Arrow ins) = List.iter visit_var ins in visit_var x exception OccursError of variable * variable let occurs_check x y = dfs (fun z -> if UnionFind.equivalent x z then raise (OccursError (x, y))) y (* First order unification. *) (* 2011/04/05: perform an eager occurs check and prevent the construction of any cycles. *) let fresh_flexible_variable () = UnionFind.fresh { structure = None; name = None; mark = Mark.none } let fresh_structured_variable t = UnionFind.fresh { structure = Some t; name = None; mark = Mark.none } let star_variable = fresh_structured_variable star exception UnificationError of nt_type * nt_type exception BadArityError of int * int let rec unify_var toplevel x y = if not (UnionFind.equivalent x y) then let reprx, repry = UnionFind.find x, UnionFind.find y in match reprx.structure, repry.structure with None, Some _ -> occurs_check x y; UnionFind.union x y | Some _, None -> occurs_check y x; UnionFind.union y x | None, None -> UnionFind.union x y | Some t, Some t' -> unify toplevel t t'; UnionFind.union x y and unify toplevel t1 t2 = match t1, t2 with | Arrow ins, Arrow ins' -> let n1, n2 = List.length ins, List.length ins' in if n1 <> n2 then if n1 = 0 || n2 = 0 || not toplevel then raise (UnificationError (t1, t2)) else (* the flag [toplevel] is used only here and influences which exception is raised; BadArityError is raised only at toplevel *) raise (BadArityError (n1, n2)); List.iter2 (unify_var false) ins ins' let unify_var x y = unify_var true x y (* Typing environment. *) type environment = (string * (Positions.t list * variable)) list (* [lookup x env] returns the type related to [x] in the typing environment [env]. By convention, identifiers that are not in [env] are terminals. They are given the type [Star]. *) let lookup x (env: environment) = try snd (List.assoc x env) with Not_found -> star_variable (* This function checks that the symbol [k] has the type [expected_type]. *) let check positions env k expected_type = let inference_var = lookup k env in let checking_var = fresh_structured_variable expected_type in try unify_var inference_var checking_var with UnificationError (t1, t2) -> Error.error positions "how is this symbol parameterized?\n\ It is used at sorts %s and %s.\n\ The sort %s is not compatible with the sort %s." (string_of_var inference_var) (string_of_var checking_var) (string_of_nt_type t1) (string_of_nt_type t2) | BadArityError (n1, n2) -> Error.error positions "does this symbol expect %d or %d arguments?" (min n1 n2) (max n1 n2) | OccursError (x, y) -> Error.error positions "how is this symbol parameterized?\n\ It is used at sorts %s and %s.\n\ The sort %s cannot be unified with the sort %s." (string_of_var inference_var) (string_of_var checking_var) (string_of_var x) (string_of_var y) (* An identifier can be used either in a total application or as a higher-order non terminal (no partial application is allowed). *) let rec parameter_type env = function | ParameterVar x -> lookup x.value env | ParameterApp (x, args) -> assert (args <> []); let expected_type = (* [x] is applied, it must be to the exact number of arguments. *) Arrow (List.map (parameter_type env) args) in (* Check the well-formedness of the application. *) check [x.position] env x.value expected_type; (* Similarly, if it was a total application the result is [Star] otherwise it is the flexible variable. *) star_variable let check_grammar p_grammar = (* [n] is the grammar size. *) let n = StringMap.cardinal p_grammar.p_rules in (* The successors of the non terminal [N] are its producers. It induce a graph over the non terminals and its successor function is implemented by [successors]. Non terminals are indexed using [nt]. *) let nt, conv, _iconv = index_map p_grammar.p_rules in let parameters, name, branches, positions = (fun n -> (nt n).pr_parameters), (fun n -> (nt n).pr_nt), (fun n -> (nt n).pr_branches), (fun n -> (nt n).pr_positions) in (* The successors function is implemented as an array using the indexing previously created. *) let successors = Array.init n (fun node -> (* We only are interested by parameterized non terminals. *) if parameters node <> [] then List.fold_left (fun succs { pr_producers = symbols } -> List.fold_left (fun succs -> function (_, p) -> let symbol, _ = Parameters.unapp p in try let symbol_node = conv symbol.value in (* [symbol] is a parameterized non terminal, we add it to the successors set. *) if parameters symbol_node <> [] then IntSet.add symbol_node succs else succs with Not_found -> (* [symbol] is a token, it is not interesting for type inference purpose. *) succs ) succs symbols ) IntSet.empty (branches node) else Misc.IntSet.empty ) in (* The successors function and the indexing induce the following graph module. *) let module RulesGraph = struct type node = int let n = n let index node = node let successors f node = IntSet.iter f successors.(node) let iter f = for i = 0 to n - 1 do f i done end in let module ConnectedComponents = Tarjan.Run (RulesGraph) in (* We check that: - all the parameterized definitions of a particular component have the same number of parameters. - every parameterized non terminal definition always uses parameterized definitions of the same component with its formal parameters. Components are marked during the traversal: -1 means unvisited n with n > 0 is the number of parameters of the clique. *) let unseen = -1 in let marked_components = Array.make n unseen in let flexible_arrow args = let ty = Arrow (List.map (fun _ -> fresh_flexible_variable ()) args) in fresh_structured_variable ty in (* [nt_type i] is the type of the i-th non terminal. *) let nt_type i = match parameters i with | [] -> star_variable | x -> flexible_arrow x in (* [actual_parameters_as_formal] is the well-formedness checker for parameterized non terminal application. *) let actual_parameters_as_formal actual_parameters formal_parameters = List.for_all2 (fun y -> (function ParameterVar x -> x.value = y | _ -> false)) formal_parameters actual_parameters in (* The environment is initialized. *) let env : environment = StringMap.fold (fun k r acu -> (k, (r.pr_positions, nt_type (conv k))) :: acu) p_grammar.p_rules [] in (* We traverse the graph checking each parameterized non terminal definition is well-formed. *) RulesGraph.iter (fun i -> let params = parameters i and iname = name i and repr = ConnectedComponents.representative i and positions = positions i in (* The environment is augmented with the parameters whose types are unknown. *) let env' = List.map (fun k -> (k, (positions, fresh_flexible_variable ()))) params in let env = env' @ env in (* The type of the parameterized non terminal is constrained to be [expected_ty]. *) let check_type () = check positions env iname (Arrow (List.map (fun (_, (_, t)) -> t) env')) in (* We check the number of parameters. *) let check_parameters () = let parameters_len = List.length params in (* The component is visited for the first time. *) if marked_components.(repr) = unseen then marked_components.(repr) <- parameters_len else (* Otherwise, we check that the arity is homogeneous in the component. *) if marked_components.(repr) <> parameters_len then Error.error positions "mutually recursive definitions must have the same parameters.\n\ This is not the case for %s and %s." (name repr) iname in (* In each production rule, the parameterized non terminal of the same component must be instantiated with the same formal arguments. *) let check_producers () = List.iter (fun { pr_producers = symbols } -> List.iter (function (_, p) -> let symbol, actuals = Parameters.unapp p in (* We take the use of each symbol into account. *) check [ symbol.position ] env symbol.value (if actuals = [] then star else Arrow (List.map (parameter_type env) actuals)); (* If it is in the same component, check in addition that the arguments are the formal arguments. *) try let idx = conv symbol.value in if ConnectedComponents.representative idx = repr then if not (actual_parameters_as_formal actuals params) then Error.error [ symbol.position ] "mutually recursive definitions must have the same \ parameters.\n\ This is not the case for %s." (let name1, name2 = (name idx), (name i) in if name1 <> name2 then name1 ^ " and "^ name2 else name1) with _ -> ()) symbols) (branches i) in check_type (); check_parameters (); check_producers ()) let rec subst_parameter subst = function | ParameterVar x -> (try List.assoc x.value subst with Not_found -> ParameterVar x) | ParameterApp (x, ps) -> (try match List.assoc x.value subst with | ParameterVar y -> ParameterApp (y, List.map (subst_parameter subst) ps) | ParameterApp _ -> (* Type-checking ensures that we cannot do partial application. Consequently, if an higher-order non terminal is an actual argument, it cannot be the result of a partial application. *) assert false with Not_found -> ParameterApp (x, List.map (subst_parameter subst) ps)) let subst_parameters subst = List.map (subst_parameter subst) (* TEMPORARY why unused? let names_of_p_grammar p_grammar = StringMap.fold (fun tok _ acu -> StringSet.add tok acu) p_grammar.p_tokens StringSet.empty $$ (StringMap.fold (fun nt _ acu -> StringSet.add nt acu) p_grammar.p_rules) *) let expand p_grammar = (* Check that it is safe to expand this parameterized grammar. *) check_grammar p_grammar; (* Set up a mechanism that ensures that names are unique -- and, in fact, ensures the stronger condition that normalized names are unique. *) let names = ref (StringSet.empty) in let ensure_fresh name = let normalized_name = Misc.normalize name in if StringSet.mem normalized_name !names then Error.error [] "internal name clash over %s" normalized_name; names := StringSet.add normalized_name !names; name in let expanded_rules = Hashtbl.create 13 in let module InstanceTable = Hashtbl.Make (Parameters) in let rule_names = InstanceTable.create 13 in (* [mangle p] chooses a name for the new nonterminal symbol that corresponds to the parameter [p]. *) let rec mangle = function | ParameterVar x | ParameterApp (x, []) -> Positions.value x | ParameterApp (x, ps) -> (* We include parentheses and commas in the names that we assign to expanded nonterminals, because that is more readable and acceptable in many situations. We replace them with underscores in situations where these characters are not valid. *) Printf.sprintf "%s(%s)" (Positions.value x) (separated_list_to_string mangle "," ps) in let name_of symbol parameters = let param = ParameterApp (symbol, parameters) in try InstanceTable.find rule_names param with Not_found -> let name = ensure_fresh (mangle param) in InstanceTable.add rule_names param name; name in (* Given the substitution [subst] from parameters to non terminal, we instantiate the parameterized branch. *) let rec expand_branch subst pbranch = let new_producers = List.map (function (ido, p) -> let sym, actual_parameters = Parameters.unapp p in let sym, actual_parameters = try match List.assoc sym.value subst with | ParameterVar x -> x, subst_parameters subst actual_parameters | ParameterApp (x, ps) -> assert (actual_parameters = []); x, ps with Not_found -> sym, subst_parameters subst actual_parameters in (* Instantiate the definition of the producer. *) (expand_branches subst sym actual_parameters, Positions.value ido)) pbranch.pr_producers in { branch_position = pbranch.pr_branch_position; producers = new_producers; action = pbranch.pr_action; branch_prec_annotation = pbranch.pr_branch_prec_annotation; branch_production_level = pbranch.pr_branch_production_level; } (* Instantiate the branches of sym for a particular set of actual parameters. *) and expand_branches subst sym actual_parameters = let nsym = name_of sym actual_parameters in try if not (Hashtbl.mem expanded_rules nsym) then begin let prule = StringMap.find (Positions.value sym) p_grammar.p_rules in let subst = (* Type checking ensures that parameterized non terminal instantiations are well defined. *) assert (List.length prule.pr_parameters = List.length actual_parameters); List.combine prule.pr_parameters actual_parameters @ subst in Hashtbl.add expanded_rules nsym { branches = []; positions = []; inline_flag = false }; let rules = List.map (expand_branch subst) prule.pr_branches in Hashtbl.replace expanded_rules nsym { branches = rules; positions = prule.pr_positions; inline_flag = prule.pr_inline_flag; } end; nsym (* If [sym] is a terminal, then it is not in [p_grammar.p_rules]. Expansion is not needed. *) with Not_found -> Positions.value sym in (* Process %type declarations. *) let rec types_from_list (ps : (Syntax.parameter * 'a Positions.located) list) : 'a StringMap.t = match ps with | [] -> StringMap.empty | (nt, ty)::q -> let accu = types_from_list q in let mangled = mangle nt in if StringMap.mem mangled accu then Error.error [Parameters.position nt] "there are multiple %%type declarations for nonterminal %s." mangled; StringMap.add mangled (Positions.value ty) accu in (* Process %on_error_reduce declarations. *) let rec on_error_reduce_from_list (ps : Syntax.parameter list) : StringSet.t = match ps with | [] -> StringSet.empty | nt :: ps -> let accu = on_error_reduce_from_list ps in let mangled = mangle nt in if StringSet.mem mangled accu then Error.error [Parameters.position nt] "there are multiple %%on_error_reduce declarations for nonterminal %s." mangled; StringSet.add mangled accu in let start_symbols = StringMap.domain (p_grammar.p_start_symbols) in { preludes = p_grammar.p_preludes; postludes = p_grammar.p_postludes; parameters = p_grammar.p_parameters; start_symbols = start_symbols; types = types_from_list p_grammar.p_types; on_error_reduce = on_error_reduce_from_list p_grammar.p_on_error_reduce; tokens = p_grammar.p_tokens; rules = let closed_rules = StringMap.fold (fun k prule rules -> (* If [k] is a start symbol then it cannot be parameterized. *) if prule.pr_parameters <> [] && StringSet.mem k start_symbols then Error.error [] "the start symbol %s cannot be parameterized." k; (* Entry points are the closed non terminals. *) if prule.pr_parameters = [] then StringMap.add k { branches = List.map (expand_branch []) prule.pr_branches; positions = prule.pr_positions; inline_flag = prule.pr_inline_flag; } rules else rules) p_grammar.p_rules StringMap.empty in Hashtbl.fold StringMap.add expanded_rules closed_rules } menhir-20151112/src/nonTerminalDefinitionInlining.ml0000644000175000017500000002300412621170073021276 0ustar mehdimehdiopen Keyword open UnparameterizedSyntax open ListMonad (* This exception will be raised when a branch does not need inlining. *) exception NoInlining (* Color are used to detect cycles. *) type 'a color = | BeingExpanded | Expanded of 'a (* [index2id] converts a 0-based index (into a list of producers) to an identifier (the name of the producer). *) let index2id producers i = try let (_, x) = List.nth producers i in x with Failure _ -> assert false (* should not happen *) (* [rename_sw_outer] transforms the keywords in the outer production (the caller) during inlining. It replaces [$startpos(x)] and [$endpos(x)], where [x] is the name of the callee, with [startpx] and [endpx], respectively. *) let rename_sw_outer (x, startpx, endpx) (subject, where) : (subject * where) option = match subject, where with | Before, _ -> None | RightNamed x', _ -> if x' = x then match where with | WhereStart -> Some startpx | WhereEnd -> Some endpx | WhereSymbolStart -> assert false (* has been expanded away *) else None | Left, _ -> (* [$startpos], [$endpos], and [$symbolstartpos] have been expanded away earlier; see [KeywordExpansion]. *) assert false (* [rename_sw_inner] transforms the keywords in the inner production (the callee) during inlining. It replaces [$endpos($0)] with [beforeendp]. *) let rename_sw_inner beforeendp (subject, where) : (subject * where) option = match subject, where with | Before, _ -> assert (where = WhereEnd); Some beforeendp | RightNamed _, _ -> None | Left, _ -> (* [$startpos] and [$endpos] have been expanded away earlier; see [KeywordExpansion]. *) assert false (* Inline a grammar. The resulting grammar does not contain any definitions that can be inlined. *) let inline grammar = let names producers = List.fold_left (fun s (_, x) -> StringSet.add x s) StringSet.empty producers in (* This function returns a fresh name beginning with [prefix] and that is not in the set of names [names]. *) let rec fresh ?(c=0) names prefix = let name = prefix^string_of_int c in if StringSet.mem name names then fresh ~c:(c+1) names prefix else name in let use_inline = ref false in (* This table associates a color to each non terminal that can be expanded. *) let expanded_non_terminals = Hashtbl.create 13 in let expanded_state k = Hashtbl.find expanded_non_terminals k in let mark_as_being_expanded k = Hashtbl.add expanded_non_terminals k BeingExpanded in let mark_as_expanded k r = Hashtbl.replace expanded_non_terminals k (Expanded r); r in (* This function traverses the producers of the branch [b] and find the first non terminal that can be inlined. If it finds one, it inlines its branches into [b], that's why this function can return several branches. If it does not find one non terminal to be inlined, it raises [NoInlining]. *) let rec find_inline_producer b = let prefix, nt, p, psym, suffix = let rec chop_inline i (prefix, suffix) = match suffix with | [] -> raise NoInlining | ((nt, id) as x) :: xs -> try let r = StringMap.find nt grammar.rules in if r.inline_flag then (* We have to inline the rule [r] into [b] between [prefix] and [xs]. *) List.rev prefix, nt, r, id, xs else chop_inline (i + 1) (x :: prefix, xs) with Not_found -> chop_inline (i + 1) (x :: prefix, xs) in chop_inline 1 ([], b.producers) in prefix, expand_rule nt p, nt, psym, suffix (* We have to rename producers' names of the inlined production if they clash with the producers' names of the branch into which we do the inlining. *) and rename_if_necessary b producers = (* First we compute the set of names already in use. *) let producers_names = names (b.producers @ producers) in (* Compute a renaming and the new inlined producers' names. *) let phi, producers' = List.fold_left (fun (phi, producers) (p, x) -> if StringSet.mem x producers_names then let x' = fresh producers_names x in ((x, x') :: phi, (p, x') :: producers) else (phi, (p, x) :: producers) ) ([], []) producers in phi, List.rev producers' (* Inline the non terminals that can be inlined in [b]. We use the ListMonad to combine the results. *) and expand_branch (b : branch) : branch ListMonad.m = try (* [c] is the identifier under which the callee is known. *) let prefix, p, _nt, c, suffix = find_inline_producer b in use_inline := true; (* Inline a branch of [nt] at position [prefix] ... [suffix] in the branch [b]. *) let inline_branch pb = (* Rename the producers of this branch is they conflict with the name of the host's producers. *) let phi, inlined_producers = rename_if_necessary b pb.producers in (* After inlining, the producers are as follows. *) let producers = prefix @ inlined_producers @ suffix in let index2id = index2id producers in let prefix = List.length prefix and inlined_producers = List.length inlined_producers in (* Define how the start and end positions of the inner production should be computed once it is inlined into the outer production. These definitions of [startp] and [endp] are then used to transform [$startpos] and [$endpos] in the inner production and to transform [$startpos(x)] and [$endpos(x)] in the outer production. *) (* 2015/11/04. We ensure that positions are computed in the same manner, regardless of whether inlining is performed. *) let startp = if inlined_producers > 0 then (* If the inner production is non-epsilon, things are easy. The start position of the inner production is the start position of its first element. *) RightNamed (index2id prefix), WhereStart else if prefix > 0 then (* If the inner production is epsilon, we are supposed to compute the end position of whatever comes in front of it. If the prefix is nonempty, then this is the end position of the last symbol in the prefix. *) RightNamed (index2id (prefix - 1)), WhereEnd else (* If the inner production is epsilon and the prefix is empty, then we need to look up the end position stored in the top stack cell. This is the reason why we need the keyword [$endpos($0)]. It is required in this case to preserve the semantics of $startpos and $endpos. *) Before, WhereEnd (* Note that, to contrary to intuition perhaps, we do NOT have that if the prefix is empty, then the start position of the inner production is the start production of the outer production. This is true only if the inner production is non-epsilon. *) in let endp = if inlined_producers > 0 then (* If the inner production is non-epsilon, things are easy, then its end position is the end position of its last element. *) RightNamed (index2id (prefix + inlined_producers - 1)), WhereEnd else (* If the inner production is epsilon, then its end position is equal to its start position. *) startp in (* We must also transform [$endpos($0)] if it used by the inner production. It refers to the end position of the stack cell that comes before the inner production. So, if the prefix is non-empty, then it translates to the end position of the last element of the prefix. Otherwise, it translates to [$endpos($0)]. *) let beforeendp = if prefix > 0 then RightNamed (index2id (prefix - 1)), WhereEnd else Before, WhereEnd in (* Rename the outer and inner semantic action. *) let outer_action = Action.rename (rename_sw_outer (c, startp, endp)) [] b.action and action' = Action.rename (rename_sw_inner beforeendp) phi pb.action in { b with producers = producers; action = Action.compose c action' outer_action } in List.map inline_branch p.branches >>= expand_branch with NoInlining -> return b (* Expand a rule if necessary. *) and expand_rule k r = try (match expanded_state k with | BeingExpanded -> Error.error r.positions "there is a cycle in the definition of %s." k | Expanded r -> r) with Not_found -> mark_as_being_expanded k; mark_as_expanded k { r with branches = r.branches >>= expand_branch } in (* If we are in Coq mode, %inline is forbidden. *) let _ = if Settings.coq then StringMap.iter (fun _ r -> if r.inline_flag then Error.error r.positions "%%inline is not supported by the Coq back-end.") grammar.rules in (* To expand a grammar, we expand all its rules and remove the %inline rules. *) let expanded_rules = StringMap.mapi expand_rule grammar.rules and useful_types = StringMap.filter (fun k _ -> try not (StringMap.find k grammar.rules).inline_flag with Not_found -> true) grammar.types in { grammar with rules = StringMap.filter (fun _ r -> not r.inline_flag) expanded_rules; types = useful_types }, !use_inline menhir-20151112/src/EngineTypes.ml0000644000175000017500000003106512621170074015550 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This file defines several types and module types that are used in the specification of module [Engine]. *) (* --------------------------------------------------------------------------- *) (* It would be nice if we could keep the structure of stacks and environments hidden. However, stacks and environments must be accessible to semantic actions, so the following data structure definitions must be public. *) (* --------------------------------------------------------------------------- *) (* A stack is a linked list of cells. A sentinel cell -- which is its own successor -- is used to mark the bottom of the stack. The sentinel cell itself is not significant -- it contains dummy values. *) type ('state, 'semantic_value) stack = { (* The state that we should go back to if we pop this stack cell. *) (* This convention means that the state contained in the top stack cell is not the current state [env.current]. It also means that the state found within the sentinel is a dummy -- it is never consulted. This convention is the same as that adopted by the code-based back-end. *) state: 'state; (* The semantic value associated with the chunk of input that this cell represents. *) semv: 'semantic_value; (* The start and end positions of the chunk of input that this cell represents. *) startp: Lexing.position; endp: Lexing.position; (* The next cell down in the stack. If this is a self-pointer, then this cell is the sentinel, and the stack is conceptually empty. *) next: ('state, 'semantic_value) stack; } (* --------------------------------------------------------------------------- *) (* A parsing environment contains all of the parser's state (except for the current program point). *) type ('state, 'semantic_value, 'token) env = { (* If this flag is true, then the first component of [env.triple] should be ignored, as it has been logically overwritten with the [error] pseudo-token. *) error: bool; (* The last token that was obtained from the lexer, together with its start and end positions. Warning: before the first call to the lexer has taken place, a dummy (and possibly invalid) token is stored here. *) triple: 'token * Lexing.position * Lexing.position; (* The stack. In [CodeBackend], it is passed around on its own, whereas, here, it is accessed via the environment. *) stack: ('state, 'semantic_value) stack; (* The current state. In [CodeBackend], it is passed around on its own, whereas, here, it is accessed via the environment. *) current: 'state; } (* --------------------------------------------------------------------------- *) (* This signature describes the parameters that must be supplied to the LR engine. *) module type TABLE = sig (* The type of automaton states. *) type state (* States are numbered. *) val number: state -> int (* The type of tokens. These can be thought of as real tokens, that is, tokens returned by the lexer. They carry a semantic value. This type does not include the [error] pseudo-token. *) type token (* The type of terminal symbols. These can be thought of as integer codes. They do not carry a semantic value. This type does include the [error] pseudo-token. *) type terminal (* The type of semantic values. *) type semantic_value (* A token is conceptually a pair of a (non-[error]) terminal symbol and a semantic value. The following two functions are the pair projections. *) val token2terminal: token -> terminal val token2value: token -> semantic_value (* Even though the [error] pseudo-token is not a real token, it is a terminal symbol. Furthermore, for regularity, it must have a semantic value. *) val error_terminal: terminal val error_value: semantic_value (* The type of productions. *) type production (* If a state [s] has a default reduction on production [prod], then, upon entering [s], the automaton should reduce [prod] without consulting the lookahead token. The following function allows determining which states have default reductions. *) (* Instead of returning a value of a sum type -- either [DefRed prod], or [NoDefRed] -- it accepts two continuations, and invokes just one of them. This mechanism allows avoiding a memory allocation. *) val default_reduction: state -> ('env -> production -> 'answer) -> ('env -> 'answer) -> 'env -> 'answer (* An LR automaton can normally take three kinds of actions: shift, reduce, or fail. (Acceptance is a particular case of reduction: it consists in reducing a start production.) *) (* There are two variants of the shift action. [shift/discard s] instructs the automaton to discard the current token, request a new one from the lexer, and move to state [s]. [shift/nodiscard s] instructs it to move to state [s] without requesting a new token. This instruction should be used when [s] has a default reduction on [#]. See [CodeBackend.gettoken] for details. *) (* This is the automaton's action table. It maps a pair of a state and a terminal symbol to an action. *) (* Instead of returning a value of a sum type -- one of shift/discard, shift/nodiscard, reduce, or fail -- this function accepts three continuations, and invokes just one them. This mechanism allows avoiding a memory allocation. *) (* In summary, the parameters to [action] are as follows: - the first two parameters, a state and a terminal symbol, are used to look up the action table; - the next parameter is the semantic value associated with the above terminal symbol; it is not used, only passed along to the shift continuation, as explained below; - the shift continuation expects an environment; a flag that tells whether to discard the current token; the terminal symbol that is being shifted; its semantic value; and the target state of the transition; - the reduce continuation expects an environment and a production; - the fail continuation expects an environment; - the last parameter is the environment; it is not used, only passed along to the selected continuation. *) val action: state -> terminal -> semantic_value -> ('env -> bool -> terminal -> semantic_value -> state -> 'answer) -> ('env -> production -> 'answer) -> ('env -> 'answer) -> 'env -> 'answer (* This is the automaton's goto table. It maps a pair of a state and a production to a new state. This convention is slightly different from the textbook approach. The goto table is usually indexed by a state and a non-terminal symbol. *) val goto: state -> production -> state (* [is_start prod] tells whether the production [prod] is a start production. *) val is_start: production -> bool (* By convention, a semantic action is responsible for: 1. fetching whatever semantic values and positions it needs off the stack; 2. popping an appropriate number of cells off the stack, as dictated by the length of the right-hand side of the production; 3. computing a new semantic value, as well as new start and end positions; 4. pushing a new stack cell, which contains the three values computed in step 3; 5. returning the new stack computed in steps 2 and 4. Point 1 is essentially forced upon us: if semantic values were fetched off the stack by this interpreter, then the calling convention for semantic actions would be variadic: not all semantic actions would have the same number of arguments. The rest follows rather naturally. *) (* Semantic actions are allowed to raise [Error]. *) exception Error type semantic_action = (state, semantic_value, token) env -> (state, semantic_value) stack val semantic_action: production -> semantic_action (* The LR engine requires a number of hooks, which are used for logging. *) (* The comments below indicate the conventional messages that correspond to these hooks in the code-based back-end; see [CodeBackend]. *) (* If the flag [log] is false, then the logging functions are not called. If it is [true], then they are called. *) val log : bool module Log : sig (* State %d: *) val state: state -> unit (* Shifting () to state *) val shift: terminal -> state -> unit (* Reducing a production should be logged either as a reduction event (for regular productions) or as an acceptance event (for start productions). *) (* Reducing production / Accepting *) val reduce_or_accept: production -> unit (* Lookahead token is now (-) *) val lookahead_token: terminal -> Lexing.position -> Lexing.position -> unit (* Initiating error handling *) val initiating_error_handling: unit -> unit (* Resuming error handling *) val resuming_error_handling: unit -> unit (* Handling error in state *) val handling_error: state -> unit end end (* --------------------------------------------------------------------------- *) (* This signature describes the monolithic (traditional) LR engine. *) (* In this interface, the parser controls the lexer. *) module type MONOLITHIC_ENGINE = sig type state type token type semantic_value (* An entry point to the engine requires a start state, a lexer, and a lexing buffer. It either succeeds and produces a semantic value, or fails and raises [Error]. *) exception Error val entry: state -> (Lexing.lexbuf -> token) -> Lexing.lexbuf -> semantic_value end (* --------------------------------------------------------------------------- *) (* The following signatures describe the incremental LR engine. *) (* First, see [INCREMENTAL_ENGINE] in the file [IncrementalEngine.ml]. *) (* The [start] function is set apart because we do not wish to publish it as part of the generated [parser.mli] file. Instead, the table back-end will publish specialized versions of it, with a suitable type cast. *) module type INCREMENTAL_ENGINE_START = sig (* [start] is an entry point. It requires a start state and a start position and begins the parsing process. If the lexer is based on an OCaml lexing buffer, the start position should be [lexbuf.lex_curr_p]. [start] produces a checkpoint, which usually will be an [InputNeeded] checkpoint. (It could be [Accepted] if this starting state accepts only the empty word. It could be [Rejected] if this starting state accepts no word at all.) It does not raise any exception. *) (* [start s pos] should really produce a checkpoint of type ['a checkpoint], for a fixed ['a] that depends on the state [s]. We cannot express this, so we use [semantic_value checkpoint], which is safe. The table back-end uses [Obj.magic] to produce safe specialized versions of [start]. *) type state type semantic_value type 'a checkpoint val start: state -> Lexing.position -> semantic_value checkpoint end (* --------------------------------------------------------------------------- *) (* This signature describes the LR engine, which combines the monolithic and incremental interfaces. *) module type ENGINE = sig include MONOLITHIC_ENGINE include IncrementalEngine.INCREMENTAL_ENGINE with type token := token and type 'a lr1state = state (* useful for us; hidden from the end user *) include INCREMENTAL_ENGINE_START with type state := state and type semantic_value := semantic_value and type 'a checkpoint := 'a checkpoint end menhir-20151112/src/slr.ml0000644000175000017500000001133112621170073014107 0ustar mehdimehdi(* This module extends the LR(0) automaton with lookahead information in order to construct an SLR(1) automaton. The lookahead information is obtained by considering the FOLLOW sets. *) (* This construction is not used by Menhir, but can be used to check whether the grammar is in the class SLR(1). This check is performed when the log level [lg] is at least 1. *) open Grammar (* This flag, which is reserved for internal use, causes more information about SLR(1) conflict states to be printed. *) let tell_me_everything = false (* The following function turns an LR(0) state into an SLR(1) state. *) let make_slr_state (s : Lr0.node) : Lr0.concretelr1state = (* Obtain the set of LR(0) items associated with the state [s]. *) let items = Lr0.items s in (* Unfortunately, this set is not closed. We do not have a function that computes the closure of a set of LR(0) items -- we could build one using [Item.Closure], but that would be overkill. So, we first convert this set to a set of LR(1) items, then compute the closure at this level, and finally we turn this LR(1) state into an SLR(1) state by letting the lookahead sets be the FOLLOW sets. This is somewhat ugly and naïve, but seems to work. *) (* Convert this set to a set of LR(1) items. Here, we can use any set of tokens as the lookahead set. We use the empty set. *) let s = Item.Map.lift (fun _item -> TerminalSet.empty) items in (* Compute the LR(1) closure. *) let s = Lr0.closure s in (* We now have an LR(1) state that has the correct set of LR(0) items but phony lookahead information. We convert it into an SLR(1) state by deciding that, for each item, the lookahead set is the FOLLOW set of the symbol that appears on the left-hand side of the item. *) Item.Map.fold (fun item toks accu -> let _, nt, _, _, _ = Item.def item in let follow_nt = Analysis.follow nt in assert (TerminalSet.subset toks follow_nt); (* sanity check *) Item.Map.add item follow_nt accu ) s Item.Map.empty (* Insertion of a new reduce action into the table of reductions. Copied from [Lr1] (boo, hiss). *) let addl prod tok reductions = let prods = try TerminalMap.lookup tok reductions with Not_found -> [] in TerminalMap.add tok (prod :: prods) reductions (* Same thing, for a set of tokens. *) let addl prod toks reductions = TerminalSet.fold (addl prod) toks reductions (* The following function turns a closed LR(1) state into a map of terminal symbols to reduction actions. Copied from a related function in [Lr0]. *) let reductions (s : Lr0.concretelr1state) : Production.index list TerminalMap.t = Item.Map.fold (fun item toks reductions -> match Item.classify item with | Item.Reduce prod -> addl prod toks reductions | Item.Shift _ -> reductions ) s TerminalMap.empty (* The following function turns a closed LR(1) state into a set of shift actions. *) let transitions (s : Lr0.concretelr1state) : TerminalSet.t = Item.Map.fold (fun item _ transitions -> match Item.classify item with | Item.Shift (Symbol.T tok, _) -> TerminalSet.add tok transitions | Item.Shift (Symbol.N _, _) | Item.Reduce _ -> transitions ) s TerminalSet.empty (* This function computes the domain of a terminal map, producing a terminal set. *) let domain (m : 'a TerminalMap.t) : TerminalSet.t = TerminalMap.fold (fun tok _ accu -> TerminalSet.add tok accu ) m TerminalSet.empty (* The following function checks whether a closed LR(1) state is free of conflicts. *) let state_is_ok (s : Lr0.concretelr1state) : bool = let reductions = reductions s and transitions = transitions s in (* Check for shift/reduce conflicts. *) TerminalSet.disjoint transitions (domain reductions) && (* Check for reduce/reduce conflicts. *) TerminalMap.fold (fun _ prods ok -> ok && match prods with | [] | [ _ ] -> true | _ :: _ :: _ -> false ) reductions true (* The following function counts the number of states in the SLR(1) automaton that have a conflict. *) let count_slr_violations () : int = let count = ref 0 in for s = 0 to Lr0.n - 1 do let s = make_slr_state s in if not (state_is_ok s) then begin incr count; if tell_me_everything then Printf.fprintf stderr "The following SLR(1) state has a conflict:\n%s" (Lr0.print_concrete "" s) end done; !count (* At log level 1, indicate whether the grammar is SLR(1). *) let () = Error.logG 1 (fun f -> let count = count_slr_violations() in if count = 0 then Printf.fprintf f "The grammar is SLR(1).\n" else Printf.fprintf f "The grammar is not SLR(1) -- %d states have a conflict.\n" count ) menhir-20151112/src/pprint.ml0000644000175000017500000006552112621170073014635 0ustar mehdimehdi(* This is an adaptation of Daan Leijen's [PPrint] library, which itself is based on the ideas developed by Philip Wadler in ``A Prettier Printer''. For more information, see: http://www.cs.uu.nl/~daan/pprint.html http://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf *) (* ------------------------------------------------------------------------- *) (* A uniform interface for output channels. *) module type OUTPUT = sig type channel val char: channel -> char -> unit val substring: channel -> string -> int (* offset *) -> int (* length *) -> unit end (* ------------------------------------------------------------------------- *) (* Two implementations of the above interface, respectively based on output channels and memory buffers. This compensates for the fact that ocaml's standard library does not allow creating an output channel out of a memory buffer (a regrettable omission). *) module ChannelOutput : OUTPUT with type channel = out_channel = struct type channel = out_channel let char = output_char let substring = output_substring end module BufferOutput : OUTPUT with type channel = Buffer.t = struct type channel = Buffer.t let char = Buffer.add_char let substring = Buffer.add_substring end (* ------------------------------------------------------------------------- *) (* Here is the algebraic data type of documents. It is analogous to Daan Leijen's version, but the binary constructor [Union] is replaced with the unary constructor [Group], and the constant [Line] is replaced with more general constructions, namely [IfFlat], which provides alternative forms depending on the current flattening mode, and [HardLine], which represents a newline character, and is invalid in flattening mode. *) type document = (* [Empty] is the empty document. *) | Empty (* [Char c] is a document that consists of the single character [c]. We enforce the invariant that [c] is not a newline character. *) | Char of char (* [String (s, ofs, len)] is a document that consists of the portion of the string [s] delimited by the offset [ofs] and the length [len]. We assume, but do not check, that this portion does not contain a newline character. *) | String of string * int * int (* [Blank n] is a document that consists of [n] blank characters. *) | Blank of int (* When in flattening mode, [IfFlat (d1, d2)] turns into the document [d1]. When not in flattening mode, it turns into the document [d2]. *) | IfFlat of document * document (* When in flattening mode, [HardLine] is illegal. When not in flattening mode, it represents a newline character, followed with an appropriate number of indentation. A safe way of using [HardLine] is to only use it directly within the right branch of an [IfFlat] construct. *) | HardLine (* [Cat doc1 doc2] is the concatenation of the documents [doc1] and [doc2]. *) | Cat of document * document (* [Nest (j, doc)] is the document [doc], in which the indentation level has been increased by [j], that is, in which [j] blanks have been inserted after every newline character. *) | Nest of int * document (* [Group doc] represents an alternative: it is either a flattened form of [doc], in which occurrences of [Group] disappear and occurrences of [IfFlat] resolve to their left branch, or [doc] itself. *) | Group of document (* [Column f] is the document obtained by applying [f] to the current column number. *) | Column of (int -> document) (* [Nesting f] is the document obtained by applying [f] to the current indentation level, that is, the number of blanks that were printed at the beginning of the current line. *) | Nesting of (int -> document) (* ------------------------------------------------------------------------- *) (* A signature for document renderers. *) module type RENDERER = sig (* Output channels. *) type channel (* [pretty rfrac width channel document] pretty-prints the document [document] to the output channel [channel]. The parameter [width] is the maximum number of characters per line. The parameter [rfrac] is the ribbon width, a fraction relative to [width]. The ribbon width is the maximum number of non-indentation characters per line. *) val pretty: float -> int -> channel -> document -> unit (* [compact channel document] prints the document [document] to the output channel [channel]. No indentation is used. All newline instructions are respected, that is, no groups are flattened. *) val compact: channel -> document -> unit end (* ------------------------------------------------------------------------- *) (* The pretty rendering algorithm: preliminary declarations. *) (* The renderer is supposed to behave exactly like Daan Leijen's, although its implementation is quite radically different. Instead of relying on Haskell's lazy evaluation mechanism, we implement an abstract machine with mutable current state, forking, backtracking (via an explicit stack of choice points), and cut (disposal of earlier choice points). *) (* The renderer's input consists of an ordered sequence of documents. Each document carries an extra indentation level, akin to an implicit [Nest] constructor, and a ``flattening'' flag, which, if set, means that this document should be printed in flattening mode. *) (* An alternative coding style would be to avoid decorating each input document with an indentation level and a flattening mode, and allow the input sequence to contain instructions that set the current nesting level or reset the flattening mode. That would perhaps be slightly more readable, and slightly less efficient. *) type input = | INil | ICons of int * bool * document * input (* When possible (that is, when the stack is empty), the renderer writes directly to the output channel. Otherwise, output is buffered until either a failure point is reached (then, the buffered output is discarded) or a cut is reached (then, all buffered output is committed to the output channel). At all times, the length of the buffered output is at most one line. *) (* The buffered output consists of a list of characters and strings. It is stored in reverse order (the head of the list should be printed last). *) type output = | OEmpty | OChar of char * output | OString of string * int * int * output | OBlank of int * output (* The renderer maintains the following state record. For efficiency, the record is mutable; it is copied when the renderer forks, that is, at choice points. *) type 'channel state = { (* The line width and ribbon width. *) width: int; ribbon: int; (* The output channel. *) channel: 'channel; (* The current indentation level. This is the number of blanks that were printed at the beginning of the current line. *) mutable indentation: int; (* The current column. *) mutable column: int; (* The renderer's input. For efficiency, the input is assumed to never be empty, and the leading [ICons] constructor is inlined within the state record. In other words, the fields [nest1], [flatten1], and [input1] concern the first input document, and the field [input] contains the rest of the input sequence. *) mutable indent1: int; mutable flatten1: bool; mutable input1: document; mutable input: input; (* The renderer's buffer output. *) mutable output: output; } (* The renderer maintains a stack of resumptions, that is, states in which execution should be resumed if the current thread of execution fails by lack of space on the current line. *) (* It is not difficult to prove that the stack is empty if and only if flattening mode is off. Furthermore, when flattening mode is on, all groups are ignored, so no new choice points are pushed onto the stack. As a result, the stack has height one at most at all times, so that the stack height is zero when flattening mode is off and one when flattening mode is on. *) type 'channel stack = 'channel state list (* ------------------------------------------------------------------------- *) (* The pretty rendering algorithm: code. *) (* The renderer is parameterized over an implementation of output channels. *) module Renderer (Output : OUTPUT) = struct type channel = Output.channel (* Printing blank space (indentation characters). *) let blank_length = 80 let blank_buffer = String.make blank_length ' ' let rec blanks channel n = if n <= 0 then () else if n <= blank_length then Output.substring channel blank_buffer 0 n else begin Output.substring channel blank_buffer 0 blank_length; blanks channel (n - blank_length) end (* Committing buffered output to the output channel. The list is printed in reverse order. The code is not tail recursive, but there is no risk of stack overflow, since the length of the buffered output cannot exceed one line. *) let rec commit channel = function | OEmpty -> () | OChar (c, output) -> commit channel output; Output.char channel c | OString (s, ofs, len, output) -> commit channel output; Output.substring channel s ofs len | OBlank (n, output) -> commit channel output; blanks channel n (* The renderer's abstract machine. *) (* The procedures [run], [shift], [emit_char], [emit_string], and [emit_blanks] are mutually recursive, and are tail recursive. They maintain a stack and a current state. The states in the stack, and the current state, are pairwise distinct, so that the current state can be mutated without affecting the contents of the stack. *) (* An invariant is: the buffered output is nonempty only when the stack is nonempty. The contrapositive is: if the stack is empty, then the buffered output is empty. Indeed, the fact that the stack is empty means that no choices were made, so we are not in a speculative mode of execution: as a result, all output can be sent directly to the output channel. On the contrary, when the stack is nonempty, there is a possibility that we might backtrack in the future, so all output should be held in a buffer. *) (* [run] is allowed to call itself recursively only when no material is printed. In that case, the check for failure is skipped -- indeed, this test is performed only within [shift]. *) let rec run (stack : channel stack) (state : channel state) : unit = (* Examine the first piece of input, as well as (in some cases) the current flattening mode. *) match state.input1, state.flatten1 with (* The first piece of input is an empty document. Discard it and continue. *) | Empty, _ -> shift stack state (* The first piece of input is a character. Emit it and continue. *) | Char c, _ -> emit_char stack state c (* The first piece of input is a string. Emit it and continue. *) | String (s, ofs, len), _ -> emit_string stack state s ofs len | Blank n, _ -> emit_blanks stack state n (* The first piece of input is a hard newline instruction. Such an instruction is valid only when flattening mode is off. *) (* We emit a newline character, followed by the prescribed amount of indentation. We update the current state to record how many indentation characters were printed and to to reflect the new column number. Then, we discard the current piece of input and continue. *) | HardLine, flattening -> assert (not flattening); (* flattening mode must be off. *) assert (stack = []); (* since flattening mode is off, the stack must be empty. *) Output.char state.channel '\n'; let i = state.indent1 in blanks state.channel i; state.column <- i; state.indentation <- i; shift stack state (* The first piece of input is an [IfFlat] conditional instruction. *) | IfFlat (doc, _), true | IfFlat (_, doc), false -> state.input1 <- doc; run stack state (* The first piece of input is a concatenation operator. We take it apart and queue both documents in the input sequence. *) | Cat (doc1, doc2), _ -> state.input1 <- doc1; state.input <- ICons (state.indent1, state.flatten1, doc2, state.input); run stack state (* The first piece of input is a [Nest] operator. We increase the amount of indentation to be applied to the first input document. *) | Nest (j, doc), _ -> state.indent1 <- state.indent1 + j; state.input1 <- doc; run stack state (* The first piece of input is a [Group] operator, and flattening mode is currently off. This introduces a choice point: either we flatten this whole group, or we don't. We try the former possibility first: this is done by enabling flattening mode. Should this avenue fail, we push the current state, in which flattening mode is disabled, onto the stack. *) (* Note that the current state is copied before continuing, so that the state that is pushed on the stack is not affected by future modifications. This is a fork. *) | Group doc, false -> state.input1 <- doc; run (state :: stack) { state with flatten1 = true } (* The first piece of input is a [Group] operator, and flattening mode is currently on. The operator is ignored. *) | Group doc, true -> state.input1 <- doc; run stack state (* The first piece of input is a [Column] operator. The current column is fed into it, so as to produce a document, with which we continue. *) | Column f, _ -> state.input1 <- f state.column; run stack state (* The first piece of input is a [Column] operator. The current indentation level is fed into it, so as to produce a document, with which we continue. *) | Nesting f, _ -> state.input1 <- f state.indentation; run stack state (* [shift] discards the first document in the input sequence, so that the second input document, if there is one, becomes first. The renderer stops if there is none. *) and shift stack state = assert (state.output = OEmpty || stack <> []); assert (state.flatten1 = (stack <> [])); (* If the stack is nonempty and we have exceeded either the width or the ribbon width parameters, then fail. Backtracking is implemented by discarding the current state, popping a state off the stack, and making it the current state. *) match stack with | resumption :: stack when state.column > state.width || state.column - state.indentation > state.ribbon -> run stack resumption | _ -> match state.input with | INil -> (* End of input. Commit any buffered output and stop. *) commit state.channel state.output | ICons (indent, flatten, head, tail) -> (* There is an input document. Move it one slot ahead and check if we are leaving flattening mode. *) state.indent1 <- indent; state.input1 <- head; state.input <- tail; if state.flatten1 && not flatten then begin (* Leaving flattening mode means success: we have flattened a certain group, and fitted it all on a line, without reaching a failure point. We would now like to commit our decision to flatten this group. This is a Prolog cut. We discard the stack of choice points, replacing it with an empty stack, and commit all buffered output. *) state.flatten1 <- flatten; (* false *) commit state.channel state.output; state.output <- OEmpty; run [] state end else run stack state (* [emit_char] prints a character (either to the output channel or to the output buffer), increments the current column, discards the first piece of input, and continues. *) and emit_char stack state c = begin match stack with | [] -> Output.char state.channel c | _ -> state.output <- OChar (c, state.output) end; state.column <- state.column + 1; shift stack state (* [emit_string] prints a string (either to the output channel or to the output buffer), updates the current column, discards the first piece of input, and continues. *) and emit_string stack state s ofs len = begin match stack with | [] -> Output.substring state.channel s ofs len | _ -> state.output <- OString (s, ofs, len, state.output) end; state.column <- state.column + len; shift stack state (* [emit_blanks] prints a blank string (either to the output channel or to the output buffer), updates the current column, discards the first piece of input, and continues. *) and emit_blanks stack state n = begin match stack with | [] -> blanks state.channel n | _ -> state.output <- OBlank (n, state.output) end; state.column <- state.column + n; shift stack state (* This is the renderer's main entry point. *) let pretty rfrac width channel document = run [] { width = width; ribbon = max 0 (min width (truncate (float_of_int width *. rfrac))); channel = channel; indentation = 0; column = 0; indent1 = 0; flatten1 = false; input1 = document; input = INil; output = OEmpty; } (* ------------------------------------------------------------------------- *) (* The compact rendering algorithm. *) let compact channel document = let column = ref 0 in let rec scan = function | Empty -> () | Char c -> Output.char channel c; column := !column + 1 | String (s, ofs, len) -> Output.substring channel s ofs len; column := !column + len | Blank n -> blanks channel n; column := !column + n | HardLine -> Output.char channel '\n'; column := 0 | Cat (doc1, doc2) -> scan doc1; scan doc2 | IfFlat (doc, _) | Nest (_, doc) | Group doc -> scan doc | Column f -> scan (f !column) | Nesting f -> scan (f 0) in scan document end (* ------------------------------------------------------------------------- *) (* Instantiating the renderers for the two kinds of output channels. *) module Channel = Renderer(ChannelOutput) module Buffer = Renderer(BufferOutput) (* ------------------------------------------------------------------------- *) (* Constructors. *) let empty = Empty let (^^) x y = match x, y with | Empty, x | x, Empty -> x | _, _ -> Cat (x, y) let ifflat doc1 doc2 = IfFlat (doc1, doc2) let hardline = HardLine let char c = assert (c <> '\n'); Char c let substring s ofs len = if len = 0 then Empty else String (s, ofs, len) let text s = substring s 0 (String.length s) let blank n = if n = 0 then Empty else Blank n let nest i x = assert (i >= 0); Nest (i, x) let column f = Column f let nesting f = Nesting f let group x = Group x (* ------------------------------------------------------------------------- *) (* Low-level combinators for alignment and indentation. *) let align d = column (fun k -> nesting (fun i -> nest (k - i) d ) ) let hang i d = align (nest i d) let indent i d = hang i (blank i ^^ d) (* ------------------------------------------------------------------------- *) (* High-level combinators. *) let lparen = char '(' let rparen = char ')' let langle = char '<' let rangle = char '>' let lbrace = char '{' let rbrace = char '}' let lbracket = char '[' let rbracket = char ']' let squote = char '\'' let dquote = char '"' let bquote = char '`' let semi = char ';' let colon = char ':' let comma = char ',' let space = char ' ' let dot = char '.' let sharp = char '#' let backslash = char '\\' let equals = char '=' let qmark = char '?' let tilde = char '~' let at = char '@' let percent = char '%' let dollar = char '$' let caret = char '^' let ampersand = char '&' let star = char '*' let plus = char '+' let minus = char '-' let underscore = char '_' let bang = char '!' let bar = char '|' let break i = ifflat (text (String.make i ' ')) hardline let break0 = ifflat empty hardline let break1 = ifflat space hardline let string s = let n = String.length s in let rec chop i = try let j = String.index_from s i '\n' in substring s i (j - i) ^^ break1 ^^ chop (j + 1) with Not_found -> substring s i (n - i) in chop 0 let group_break1 = group break1 let words s = let n = String.length s in let rec blank accu i = (* we have skipped over at least one blank character *) if i = n then accu ^^ group_break1 else match s.[i] with | ' ' | '\t' | '\n' | '\r' -> blank accu (i + 1) | _ -> word break1 accu i (i + 1) and word prefix accu i j = (* we have skipped over at least one non-blank character *) if j = n then accu ^^ group (prefix ^^ substring s i (j - i)) else match s.[j] with | ' ' | '\t' | '\n' | '\r' -> blank (accu ^^ group (prefix ^^ substring s i (j - i))) (j + 1) | _ -> word prefix accu i (j + 1) in if n = 0 then empty else match s.[0] with | ' ' | '\t' | '\n' | '\r' -> blank empty 1 | _ -> word empty empty 0 1 let enclose l r x = l ^^ x ^^ r let squotes = enclose squote squote let dquotes = enclose dquote dquote let bquotes = enclose bquote bquote let braces = enclose lbrace rbrace let parens = enclose lparen rparen let angles = enclose langle rangle let brackets = enclose lbracket rbracket let fold f docs = List.fold_right f docs empty let rec fold1 f docs = match docs with | [] -> empty | [ doc ] -> doc | doc :: docs -> f doc (fold1 f docs) let rec fold1map f g docs = match docs with | [] -> empty | [ doc ] -> g doc | doc :: docs -> let doc = g doc in (* force left-to-right evaluation *) f doc (fold1map f g docs) let sepmap sep g docs = fold1map (fun x y -> x ^^ sep ^^ y) g docs let optional f = function | None -> empty | Some x -> f x let group1 d = group (nest 1 d) let group2 d = group (nest 2 d) module Operators = struct let ( !^ ) = text let ( ^^ ) = ( ^^ ) let ( ^/^ ) x y = x ^^ break1 ^^ y let ( ^//^ ) x y = group (x ^^ nest 2 (break1 ^^ y)) let ( ^@^ ) x y = group (x ^^ break1 ^^ y) let ( ^@@^ ) x y = group2 (x ^^ break1 ^^ y) end open Operators let prefix op x = !^op ^//^ x let infix op x y = (x ^^ space ^^ !^op) ^//^ y let infix_dot op x y = group2 ((x ^^ !^op) ^^ break0 ^^ y) let infix_com op x y = x ^^ !^op ^^ group_break1 ^^ y let surround n sep open_doc contents close_doc = group (open_doc ^^ nest n (sep ^^ contents) ^^ sep ^^ close_doc) let surround1 open_txt contents close_txt = surround 1 break0 !^open_txt contents !^close_txt let surround2 open_txt contents close_txt = surround 2 break1 !^open_txt contents !^close_txt let soft_surround n sep open_doc contents close_doc = group (open_doc ^^ nest n (group sep ^^ contents) ^^ group (sep ^^ close_doc)) let seq indent break empty_seq open_seq sep_seq close_seq = function | [] -> empty_seq | xs -> surround indent break open_seq (fold1 (fun x xs -> x ^^ sep_seq ^^ xs) xs) close_seq let seq1 open_txt sep_txt close_txt = seq 1 break0 !^(open_txt ^ close_txt) !^open_txt (!^sep_txt ^^ break1) !^close_txt let seq2 open_txt sep_txt close_txt = seq 2 break1 !^(open_txt ^ close_txt) !^open_txt (!^sep_txt ^^ break1) !^close_txt let sprintf fmt = Printf.ksprintf string fmt (* A signature for value representations. This is compatible with the associated Camlp4 generator: SwitchValueRepresentation *) module type VALUE_REPRESENTATION = sig (* The type of value representation *) type t (* [variant type_name data_constructor_name tag arguments] Given information about the variant and its arguments, this function produces a new value representation. *) val variant : string -> string -> int -> t list -> t (* [record type_name fields] Given a type name and a list of record fields, this function produces the value representation of a record. *) val record : string -> (string * t) list -> t (* [tuple arguments] Given a list of value representation this function produces a new value representation. *) val tuple : t list -> t (* ------------------------------------------------------------------------- *) (* Value representation for primitive types. *) val string : string -> t val int : int -> t val int32 : int32 -> t val int64 : int64 -> t val nativeint : nativeint -> t val float : float -> t val char : char -> t val bool : bool -> t val option : ('a -> t) -> 'a option -> t val list : ('a -> t) -> 'a list -> t val array : ('a -> t) -> 'a array -> t val ref : ('a -> t) -> 'a ref -> t (* Value representation for any other value. *) val unknown : string -> 'a -> t end module type DOCUMENT_VALUE_REPRESENTATION = VALUE_REPRESENTATION with type t = document (* please remove as soon as this will be available in ocaml *) module MissingFloatRepr = struct let valid_float_lexeme s = let l = String.length s in let rec loop i = if i >= l then s ^ "." else match s.[i] with | '0' .. '9' | '-' -> loop (i+1) | _ -> s in loop 0 let float_repres f = match classify_float f with FP_nan -> "nan" | FP_infinite -> if f < 0.0 then "neg_infinity" else "infinity" | _ -> let s1 = Printf.sprintf "%.12g" f in if f = float_of_string s1 then valid_float_lexeme s1 else let s2 = Printf.sprintf "%.15g" f in if f = float_of_string s2 then valid_float_lexeme s2 else Printf.sprintf "%.18g" f end module ML = struct type t = document let tuple = seq1 "(" "," ")" let variant _ cons _ args = if args = [] then !^cons else !^cons ^^ tuple args let record _ fields = seq2 "{" ";" "}" (List.map (fun (k, v) -> infix ":" !^k v) fields) let option f = function | Some x -> !^"Some" ^^ tuple [f x] | None -> !^"None" let list f xs = seq2 "[" ";" "]" (List.map f xs) let array f xs = seq2 "[|" ";" "|]" (Array.to_list (Array.map f xs)) let ref f x = record "ref" ["contents", f !x] let float f = string (MissingFloatRepr.float_repres f) let int = sprintf "%d" let int32 = sprintf "%ld" let int64 = sprintf "%Ld" let nativeint = sprintf "%nd" let char = sprintf "%C" let bool = sprintf "%B" let string = sprintf "%S" let unknown tyname _ = sprintf "" tyname end (* Deprecated *) let line = ifflat space hardline let linebreak = ifflat empty hardline let softline = group line let softbreak = group linebreak menhir-20151112/src/resizableArray.mli0000644000175000017500000000432612621170073016445 0ustar mehdimehdi(* This module implements resizable arrays, that is, arrays that can grow upon explicit request. *) type 'a t (* [make capacity default init] creates a resizable array of logical length 0, whose physical length is initially [capacity], and whose default element is [default]. The default element is used to fill empty slots in the physical array; it is otherwise irrelevant. The [init] function is used to initialize new logical slots when the logical size of the array grows, so, unlike [default], it is semantically meaningful. *) val make: int -> 'a -> (int -> 'a) -> 'a t (* [make_] is a simplified variant of [make] where the [init] function always returns [default], i.e., where new logical slots are initialized with [default] when the array is grown. *) val make_: int -> 'a -> 'a t (* [length a] returns the current logical length of the array [a]. *) val length: 'a t -> int (* [resize a n] changes the logical length of the array [a] to [n]. If the length decreases, any excess elements are lost. The capacity of the underlying physical array remains the same. If the length increases, the new positions are filled with the array's default element, as initially supplied to [make]. The capacity of the underlying physical array grows by at least a factor of two. *) val resize: 'a t -> int -> unit (* [get a i] returns the element contained at offset [i] in the array [a]. Slots are numbered 0 and up. [i] must be strictly less than the array's current logical length. *) val get: 'a t -> int -> 'a (* [set a i x] sets the element contained at offset [i] in the array [a] to [x]. Slots are numbered 0 and up. [i] must be strictly less than the array's current logical length. *) val set: 'a t -> int -> 'a -> unit (* [push a x] appends the element [x] at the end of the array [a], whose length increases by one. *) val push: 'a t -> 'a -> unit (* [pop a] removes the element [x] found at the end of the array [a], whose length decreases by one. The array must have nonzero length. *) val pop: 'a t -> 'a (* [default a] returns the default value that was used when the array [a] was created. This should be seldom useful, but can be convenient. *) val default: 'a t -> 'a menhir-20151112/src/Fix.ml0000644000175000017500000004416712621170073014052 0ustar mehdimehdi(**************************************************************************) (* *) (* Fix *) (* *) (* Author: François Pottier, INRIA Paris-Rocquencourt *) (* Version: 20101206 *) (* *) (* The copyright to this code is held by Institut National de Recherche *) (* en Informatique et en Automatique (INRIA). All rights reserved. This *) (* file is distributed under the license CeCILL-C (see file LICENSE). *) (* *) (**************************************************************************) (* -------------------------------------------------------------------------- *) (* Maps. *) (* We require imperative maps, that is, maps that can be updated in place. An implementation of persistent maps, such as the one offered by ocaml's standard library, can easily be turned into an implementation of imperative maps, so this is a weak requirement. *) module type IMPERATIVE_MAPS = sig type key type 'data t val create: unit -> 'data t val clear: 'data t -> unit val add: key -> 'data -> 'data t -> unit val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* -------------------------------------------------------------------------- *) (* Properties. *) (* Properties must form a partial order, equipped with a least element, and must satisfy the ascending chain condition: every monotone sequence eventually stabilizes. *) (* [is_maximal] determines whether a property [p] is maximal with respect to the partial order. Only a conservative check is required: in any event, it is permitted for [is_maximal p] to return [false]. If [is_maximal p] returns [true], then [p] must have no upper bound other than itself. In particular, if properties form a lattice, then [p] must be the top element. This feature, not described in the paper, enables a couple of minor optimizations. *) module type PROPERTY = sig type property val bottom: property val equal: property -> property -> bool val is_maximal: property -> bool end (* -------------------------------------------------------------------------- *) (* The dynamic dependency graph. *) (* An edge from [node1] to [node2] means that [node1] depends on [node2], or (equivalently) that [node1] observes [node2]. Then, an update of the current property at [node2] causes a signal to be sent to [node1]. A node can observe itself. *) (* This module could be placed in a separate file, but is included here in order to make [Fix] self-contained. *) module Graph : sig (* This module provides a data structure for maintaining and modifying a directed graph. Each node is allowed to carry a piece of client data. There are functions for creating a new node, looking up a node's data, looking up a node's predecessors, and setting or clearing a node's successors (all at once). *) type 'data node (* [create data] creates a new node, with no incident edges, with client information [data]. Time complexity: constant. *) val create: 'data -> 'data node (* [data node] returns the client information associated with the node [node]. Time complexity: constant. *) val data: 'data node -> 'data (* [predecessors node] returns a list of [node]'s predecessors. Amortized time complexity: linear in the length of the output list. *) val predecessors: 'data node -> 'data node list (* [set_successors src dsts] creates an edge from the node [src] to each of the nodes in the list [dsts]. Duplicate elements in the list [dsts] are removed, so that no duplicate edges are created. It is assumed that [src] initially has no successors. Time complexity: linear in the length of the input list. *) val set_successors: 'data node -> 'data node list -> unit (* [clear_successors node] removes all of [node]'s outgoing edges. Time complexity: linear in the number of edges that are removed. *) val clear_successors: 'data node -> unit (* That's it. *) end = struct (* Using doubly-linked adjacency lists, one could implement [predecessors] in worst-case linear time with respect to the length of the output list, [set_successors] in worst-case linear time with respect to the length of the input list, and [clear_successors] in worst-case linear time with respect to the number of edges that are removed. We use a simpler implementation, based on singly-linked adjacency lists, with deferred removal of edges. It achieves the same complexity bounds, except [predecessors] only offers an amortized complexity bound. This is good enough for our purposes, and, in practice, is more efficient by a constant factor. This simplification was suggested by Arthur Charguéraud. *) type 'data node = { (* The client information associated with this node. *) data: 'data; (* This node's incoming and outgoing edges. *) mutable outgoing: 'data edge list; mutable incoming: 'data edge list; (* A transient mark, always set to [false], except when checking against duplicate elements in a successor list. *) mutable marked: bool; } and 'data edge = { (* This edge's nodes. Edges are symmetric: source and destination are not distinguished. Thus, an edge appears both in the outgoing edge list of its source node and in the incoming edge list of its destination node. This allows edges to be easily marked as destroyed. *) node1: 'data node; node2: 'data node; (* Edges that are destroyed are marked as such, but are not immediately removed from the adjacency lists. *) mutable destroyed: bool; } let create (data : 'data) : 'data node = { data = data; outgoing = []; incoming = []; marked = false; } let data (node : 'data node) : 'data = node.data (* [follow src edge] returns the node that is connected to [src] by [edge]. Time complexity: constant. *) let follow src edge = if edge.node1 == src then edge.node2 else begin assert (edge.node2 == src); edge.node1 end (* The [predecessors] function removes edges that have been marked destroyed. The cost of removing these has already been paid for, so the amortized time complexity of [predecessors] is linear in the length of the output list. *) let predecessors (node : 'data node) : 'data node list = let predecessors = List.filter (fun edge -> not edge.destroyed) node.incoming in node.incoming <- predecessors; List.map (follow node) predecessors (* [link src dst] creates a new edge from [src] to [dst], together with its reverse edge. Time complexity: constant. *) let link (src : 'data node) (dst : 'data node) : unit = let edge = { node1 = src; node2 = dst; destroyed = false; } in src.outgoing <- edge :: src.outgoing; dst.incoming <- edge :: dst.incoming let set_successors (src : 'data node) (dsts : 'data node list) : unit = assert (src.outgoing = []); let rec loop = function | [] -> () | dst :: dsts -> if dst.marked then loop dsts (* skip duplicate elements *) else begin dst.marked <- true; link src dst; loop dsts; dst.marked <- false end in loop dsts let clear_successors (node : 'data node) : unit = List.iter (fun edge -> assert (not edge.destroyed); edge.destroyed <- true; ) node.outgoing; node.outgoing <- [] end (* -------------------------------------------------------------------------- *) (* The code is parametric in an implementation of maps over variables and in an implementation of properties. *) module Make (M : IMPERATIVE_MAPS) (P : PROPERTY) = struct type variable = M.key type property = P.property type valuation = variable -> property type rhs = valuation -> property type equations = variable -> rhs (* -------------------------------------------------------------------------- *) (* Data. *) (* Each node in the dependency graph carries information about a fixed variable [v]. *) type node = data Graph.node and data = { (* This is the result of the application of [rhs] to the variable [v]. It must be stored in order to guarantee that this application is performed at most once. *) rhs: rhs; (* This is the current property at [v]. It evolves monotonically with time. *) mutable property: property; (* That's it! *) } (* [property node] returns the current property at [node]. *) let property node = (Graph.data node).property (* -------------------------------------------------------------------------- *) (* Many definitions must be made within the body of the function [lfp]. For greater syntactic convenience, we place them in a local module. *) let lfp (eqs : equations) : valuation = let module LFP = struct (* -------------------------------------------------------------------------- *) (* The workset. *) (* When the algorithm is inactive, the workset is empty. *) (* Our workset is based on a Queue, but it could just as well be based on a Stack. A textual replacement is possible. It could also be based on a priority queue, provided a sensible way of assigning priorities could be found. *) module Workset : sig (* [insert node] inserts [node] into the workset. [node] must have no successors. *) val insert: node -> unit (* [repeat f] repeatedly applies [f] to a node extracted out of the workset, until the workset becomes empty. [f] is allowed to use [insert]. *) val repeat: (node -> unit) -> unit (* That's it! *) end = struct (* Initialize the workset. *) let workset = Queue.create() let insert node = Queue.push node workset let repeat f = while not (Queue.is_empty workset) do f (Queue.pop workset) done end (* -------------------------------------------------------------------------- *) (* Signals. *) (* A node in the workset has no successors. (It can have predecessors.) In other words, a predecessor (an observer) of some node is never in the workset. Furthermore, a node never appears twice in the workset. *) (* When a variable broadcasts a signal, all of its predecessors (observers) receive the signal. Any variable that receives the signal loses all of its successors (that is, it ceases to observe anything) and is inserted into the workset. This preserves the above invariant. *) let signal subject = List.iter (fun observer -> Graph.clear_successors observer; Workset.insert observer ) (Graph.predecessors subject) (* At this point, [subject] has no predecessors. This plays no role in the correctness proof, though. *) (* -------------------------------------------------------------------------- *) (* Tables. *) (* The permanent table maps variables that have reached a fixed point to properties. It persists forever. *) let permanent : property M.t = M.create() (* The transient table maps variables that have not yet reached a fixed point to nodes. (A node contains not only a property, but also a memoized right-hand side, and carries edges.) At the beginning of a run, it is empty. It fills up during a run. At the end of a run, it is copied into the permanent table and cleared. *) let transient : node M.t = M.create() (* [freeze()] copies the transient table into the permanent table, and empties the transient table. This allows all nodes to be reclaimed by the garbage collector. *) let freeze () = M.iter (fun v node -> M.add v (property node) permanent ) transient; M.clear transient (* -------------------------------------------------------------------------- *) (* Workset processing. *) (* [solve node] re-evaluates the right-hand side at [node]. If this leads to a change, then the current property is updated, and [node] emits a signal towards its observers. *) (* When [solve node] is invoked, [node] has no subjects. Indeed, when [solve] is invoked by [node_for], [node] is newly created; when [solve] is invoked by [Workset.repeat], [node] has just been extracted out of the workset, and a node in the workset has no subjects. *) (* [node] must not be in the workset. *) (* In short, when [solve node] is invoked, [node] is neither awake nor asleep. When [solve node] finishes, [node] is either awake or asleep again. (Chances are, it is asleep, unless it is its own observer; then, it is awakened by the final call to [signal node].) *) let rec solve (node : node) : unit = (* Retrieve the data record carried by this node. *) let data = Graph.data node in (* Prepare to compute an updated value at this node. This is done by invoking the client's right-hand side function. *) (* The flag [alive] is used to prevent the client from invoking [request] after this interaction phase is over. In theory, this dynamic check seems required in order to argue that [request] behaves like a pure function. In practice, this check is not very useful: only a bizarre client would store a [request] function and invoke it after it has become stale. *) let alive = ref true and subjects = ref [] in (* We supply the client with [request], a function that provides access to the current valuation, and dynamically records dependencies. This yields a set of dependencies that is correct by construction. *) let request (v : variable) : property = assert !alive; try M.find v permanent with Not_found -> let subject = node_for v in let p = property subject in if not (P.is_maximal p) then subjects := subject :: !subjects; p in (* Give control to the client. *) let new_property = data.rhs request in (* From now on, prevent any invocation of this instance of [request] the client. *) alive := false; (* At this point, [node] has no subjects, as noted above. Thus, the precondition of [set_successors] is met. We can install [data.subjects] as the new set of subjects for this node. *) (* If we have gathered no subjects in the list [data.subjects], then this node must have stabilized. If [new_property] is maximal, then this node must have stabilized. *) (* If this node has stabilized, then it need not observe any more, so the call to [set_successors] is skipped. In practice, this seems to be a minor optimization. In the particular case where every node stabilizes at the very first call to [rhs], this means that no edges are ever built. This particular case is unlikely, as it means that we are just doing memoization, not a true fixed point computation. *) (* One could go further and note that, if this node has stabilized, then it could immediately be taken out of the transient table and copied into the permanent table. This would have the beneficial effect of allowing the detection of further nodes that have stabilized. Furthermore, it would enforce the property that no node in the transient table has a maximal value, hence the call to [is_maximal] above would become useless. *) if not (!subjects = [] || P.is_maximal new_property) then Graph.set_successors node !subjects; (* If the updated value differs from the previous value, record the updated value and send a signal to all observers of [node]. *) if not (P.equal data.property new_property) then begin data.property <- new_property; signal node end (* Note that equality of the two values does not imply that this node has stabilized forever. *) (* -------------------------------------------------------------------------- *) (* [node_for v] returns the graph node associated with the variable [v]. It is assumed that [v] does not appear in the permanent table. If [v] appears in the transient table, the associated node is returned. Otherwise, [v] is a newly discovered variable: a new node is created on the fly, and the transient table is grown. The new node can either be inserted into the workset (it is then awake) or handled immediately via a recursive call to [solve] (it is then asleep, unless it observes itself). *) (* The recursive call to [solve node] can be replaced, if desired, by a call to [Workset.insert node]. Using a recursive call to [solve] permits eager top-down discovery of new nodes. This can save a constant factor, because it allows new nodes to move directly from [bottom] to a good first approximation, without sending any signals, since [node] has no observers when [solve node] is invoked. In fact, if the dependency graph is acyclic, the algorithm discovers nodes top-down, performs computation on the way back up, and runs without ever inserting a node into the workset! Unfortunately, this causes the stack to grow as deep as the longest path in the dependency graph, which can blow up the stack. *) and node_for (v : variable) : node = try M.find v transient with Not_found -> let node = Graph.create { rhs = eqs v; property = P.bottom } in (* Adding this node to the transient table prior to calling [solve] recursively is mandatory, otherwise [solve] might loop, creating an infinite number of nodes for the same variable. *) M.add v node transient; solve node; (* or: Workset.insert node *) node (* -------------------------------------------------------------------------- *) (* Invocations of [get] trigger the fixed point computation. *) (* The flag [inactive] prevents reentrant calls by the client. *) let inactive = ref true let get (v : variable) : property = try M.find v permanent with Not_found -> assert !inactive; inactive := false; let node = node_for v in Workset.repeat solve; freeze(); inactive := true; property node (* -------------------------------------------------------------------------- *) (* Close the local module [LFP]. *) end in LFP.get end menhir-20151112/src/dot.mli0000644000175000017500000000241412621170073014250 0ustar mehdimehdi(* This module displays graphs in graphviz dot format. It is much more basic than the one bundled with the ocamlgraph library, but offers the advantage of being stand-alone. *) (* ------------------------------------------------------------------------- *) (* Type definitions. *) type size = float * float (* in inches *) type orientation = | Portrait | Landscape type rankdir = | LeftToRight | TopToBottom type ratio = | Compress | Fill | Auto type style = (* Both nodes and edges. *) | Solid | Dashed | Dotted | Bold | Invisible (* Nodes only. *) | Filled | Diagonals | Rounded type shape = | Box | Oval | Circle | DoubleCircle (* there are many others, let's stop here *) (* ------------------------------------------------------------------------- *) (* The graph printer. *) module Print (G : sig type vertex val name: vertex -> string val successors: (?style:style -> label:string -> vertex -> unit) -> vertex -> unit val iter: (?shape:shape -> ?style:style -> label:string -> vertex -> unit) -> unit end) : sig val print: ?directed: bool -> ?size: size -> ?orientation: orientation -> ?rankdir: rankdir -> ?ratio: ratio -> out_channel -> unit end menhir-20151112/src/stringSet.ml0000644000175000017500000000011412621170073015266 0ustar mehdimehdiinclude Set.Make (String) let of_list xs = List.fold_right add xs empty menhir-20151112/src/lr1partial.mli0000644000175000017500000000221512621170073015534 0ustar mehdimehdiopen Grammar (* This exception is raised by [Run] if we fail to reach the goal state. This is known to happen in a few pathological cases (e.g., when a shift/reduce conflict is solved in favor of reduction, the only path towards the goal state may disappear). So we report this situation gracefully in the .conflicts file instead of failing abruptly. *) exception Oops module Run (X : sig (* A restricted set of tokens of interest. *) val tokens: TerminalSet.t (* A state of the (merged) LR(1) automaton that we're trying to simulate. *) val goal: Lr1.node end) : sig (* What we are after is a path, in the canonical LR(1) automaton, that leads from some entry node to a node [N] such that (i) [N] has a conflict involving one of the tokens of interest and (ii) [N] corresponds to the goal node, that is, the path that leads to [N] in the canonical LR(1) automaton leads to the goal node in the merged LR(1) automaton. *) val source: Item.t val path: Symbol.t array val goal: Lr0.concretelr1state (* An (arbitrarily chosen) conflict token in the goal state. *) val token: Terminal.t end menhir-20151112/src/coqBackend.ml0000644000175000017500000004477012621170073015356 0ustar mehdimehdiopen Printf open Grammar module Run (T: sig end) = struct let print_term t = assert (not (Terminal.pseudo t)); sprintf "%s't" (Terminal.print t) let print_nterm nt = sprintf "%s'nt" (Nonterminal.print true nt) let print_symbol = function | Symbol.N nt -> sprintf "NT %s" (print_nterm nt) | Symbol.T t -> sprintf "T %s" (print_term t) let print_type ty = if Settings.coq_no_actions then "unit" else match ty with | None -> raise Not_found (* fpottier: argh! *) | Some t -> match t with | Stretch.Declared s -> s.Stretch.stretch_content | Stretch.Inferred _ -> assert false (* We cannot infer coq types *) let is_final_state node = match Invariant.has_default_reduction node with | Some (prod, _) -> Production.is_start prod | None -> false let lr1_iter_nonfinal f = Lr1.iter (fun node -> if not (is_final_state node) then f node) let lr1_iterx_nonfinal f = Lr1.iterx (fun node -> if not (is_final_state node) then f node) let lr1_foldx_nonfinal f = Lr1.foldx (fun accu node -> if not (is_final_state node) then f accu node else accu) let print_nis nis = sprintf "Nis'%d" (Lr1.number nis) let print_init init = sprintf "Init'%d" (Lr1.number init) let print_st st = match Lr1.incoming_symbol st with | Some _ -> sprintf "Ninit %s" (print_nis st) | None -> sprintf "Init %s" (print_init st) let (prod_ids, _) = Production.foldx (fun p (prod_ids, counters) -> let lhs = Production.nt p in let id = try SymbolMap.find (Symbol.N lhs) counters with Not_found -> 0 in (ProductionMap.add p id prod_ids, SymbolMap.add (Symbol.N lhs) (id+1) counters)) (ProductionMap.empty, SymbolMap.empty) let print_prod p = sprintf "Prod'%s'%d" (Nonterminal.print true (Production.nt p)) (ProductionMap.find p prod_ids) let () = if not Settings.coq_no_actions then begin Nonterminal.iterx (fun nonterminal -> match Nonterminal.ocamltype nonterminal with | None -> Error.error [] "I don't know the type of the nonterminal symbol %s." (Nonterminal.print false nonterminal) | Some _ -> ()); Production.iterx (fun prod -> if not (Keyword.KeywordSet.is_empty (Action.keywords (Production.action prod))) then Error.error [] "The Coq back-end supports none of the $ keywords." ) end; Production.iterx (fun prod -> Array.iter (fun symb -> match symb with | Symbol.T t -> if t = Terminal.error then Error.error [] "the Coq back-end does not support the error token." | _ -> ()) (Production.rhs prod)); if Front.grammar.UnparameterizedSyntax.parameters <> [] then Error.error [] "the Coq back-end does not support %%parameter." (* Optimized because if we extract some constants to the right caml term, the ocaml inlining+constant unfolding replaces that by the actual constant *) let rec write_optimized_int31 f n = match n with | 0 -> fprintf f "Int31.On" | 1 -> fprintf f "Int31.In" | k when k land 1 = 0 -> fprintf f "(twice "; write_optimized_int31 f (n lsr 1); fprintf f ")" | _ -> fprintf f "(twice_plus_one "; write_optimized_int31 f (n lsr 1); fprintf f ")" let write_inductive_alphabet f name constrs = fprintf f "Inductive %s' : Set :=" name; List.iter (fprintf f "\n | %s") constrs; fprintf f ".\n"; fprintf f "Definition %s := %s'.\n\n" name name; if List.length constrs > 0 then begin let iteri f = ignore (List.fold_left (fun k x -> f k x; succ k) 0 constrs) in fprintf f "Program Instance %sNum : Numbered %s :=\n" name name; fprintf f " { inj := fun x => match x return _ with "; iteri (fun k constr -> fprintf f "| %s => " constr; write_optimized_int31 f k; fprintf f " "; ); fprintf f "end;\n"; fprintf f " surj := (fun n => match n return _ with "; iteri (fprintf f "| %d => %s "); fprintf f "| _ => %s end)%%int31;\n" (List.hd constrs); fprintf f " inj_bound := %d%%int31 }.\n" (List.length constrs); end else begin fprintf f "Program Instance %sAlph : Alphabet %s :=\n" name name; fprintf f " { AlphabetComparable := {| compare := fun x y =>\n"; fprintf f " match x, y return comparison with end |};\n"; fprintf f " AlphabetEnumerable := {| all_list := [] |} }."; end let write_terminals f = write_inductive_alphabet f "terminal" ( Terminal.fold (fun t l -> if Terminal.pseudo t then l else print_term t::l) []); fprintf f "Instance TerminalAlph : Alphabet terminal := _.\n\n" let write_nonterminals f = write_inductive_alphabet f "nonterminal" ( Nonterminal.foldx (fun nt l -> (print_nterm nt)::l) []); fprintf f "Instance NonTerminalAlph : Alphabet nonterminal := _.\n\n" let write_symbol_semantic_type f = fprintf f "Definition terminal_semantic_type (t:terminal) : Type:=\n"; fprintf f " match t with\n"; Terminal.iter (fun terminal -> if not (Terminal.pseudo terminal) then fprintf f " | %s => %s%%type\n" (print_term terminal) (try print_type (Terminal.ocamltype terminal) with Not_found -> "unit") ); fprintf f " end.\n\n"; fprintf f "Definition nonterminal_semantic_type (nt:nonterminal) : Type:=\n"; fprintf f " match nt with\n"; Nonterminal.iterx (fun nonterminal -> fprintf f " | %s => %s%%type\n" (print_nterm nonterminal) (print_type (Nonterminal.ocamltype nonterminal))); fprintf f " end.\n\n"; fprintf f "Definition symbol_semantic_type (s:symbol) : Type:=\n"; fprintf f " match s with\n"; fprintf f " | T t => terminal_semantic_type t\n"; fprintf f " | NT nt => nonterminal_semantic_type nt\n"; fprintf f " end.\n\n" let write_productions f = write_inductive_alphabet f "production" ( Production.foldx (fun prod l -> (print_prod prod)::l) []); fprintf f "Instance ProductionAlph : Alphabet production := _.\n\n" let write_productions_contents f = fprintf f "Definition prod_contents (p:production) :\n"; fprintf f " { p:nonterminal * list symbol &\n"; fprintf f " arrows_left (map symbol_semantic_type (rev (snd p)))\n"; fprintf f " (symbol_semantic_type (NT (fst p))) }\n"; fprintf f " :=\n"; fprintf f " let box := existT (fun p =>\n"; fprintf f " arrows_left (map symbol_semantic_type (rev (snd p)))\n"; fprintf f " (symbol_semantic_type (NT (fst p))))\n"; fprintf f " in\n"; fprintf f " match p with\n"; Production.iterx (fun prod -> fprintf f " | %s => box\n" (print_prod prod); fprintf f " (%s, [%s])\n" (print_nterm (Production.nt prod)) (String.concat "; " (List.map print_symbol (List.rev (Array.to_list (Production.rhs prod))))); if Production.length prod = 0 then fprintf f " (\n" else fprintf f " (fun %s =>\n" (String.concat " " (List.rev (Array.to_list (Production.identifiers prod)))); if Settings.coq_no_actions then fprintf f "()" else Action.print f (Production.action prod); fprintf f "\n)\n"); fprintf f " end.\n\n"; fprintf f "Definition prod_lhs (p:production) :=\n"; fprintf f " fst (projT1 (prod_contents p)).\n"; fprintf f "Definition prod_rhs_rev (p:production) :=\n"; fprintf f " snd (projT1 (prod_contents p)).\n"; fprintf f "Definition prod_action (p:production) :=\n"; fprintf f " projT2 (prod_contents p).\n\n" let write_nullable_first f = fprintf f "Definition nullable_nterm (nt:nonterminal) : bool :=\n"; fprintf f " match nt with\n"; Nonterminal.iterx (fun nt -> fprintf f " | %s => %b\n" (print_nterm nt) (Analysis.nullable nt)); fprintf f " end.\n\n"; fprintf f "Definition first_nterm (nt:nonterminal) : list terminal :=\n"; fprintf f " match nt with\n"; Nonterminal.iterx (fun nt -> let firstSet = Analysis.first nt in fprintf f " | %s => [" (print_nterm nt); let first = ref true in TerminalSet.iter (fun t -> if !first then first := false else fprintf f "; "; fprintf f "%s" (print_term t) ) firstSet; fprintf f "]\n"); fprintf f " end.\n\n" let write_grammar f = fprintf f "Module Import Gram <: Grammar.T.\n\n"; fprintf f "Local Obligation Tactic := intro x; case x; reflexivity.\n\n"; write_terminals f; write_nonterminals f; fprintf f "Include Grammar.Symbol.\n\n"; write_symbol_semantic_type f; write_productions f; write_productions_contents f; fprintf f "Include Grammar.Defs.\n\n"; fprintf f "End Gram.\n\n" let write_nis f = write_inductive_alphabet f "noninitstate" ( lr1_foldx_nonfinal (fun l node -> (print_nis node)::l) []); fprintf f "Instance NonInitStateAlph : Alphabet noninitstate := _.\n\n" let write_init f = write_inductive_alphabet f "initstate" ( ProductionMap.fold (fun _prod node l -> (print_init node)::l) Lr1.entry []); fprintf f "Instance InitStateAlph : Alphabet initstate := _.\n\n" let write_start_nt f = fprintf f "Definition start_nt (init:initstate) : nonterminal :=\n"; fprintf f " match init with\n"; Lr1.fold_entry (fun _prod node startnt _t () -> fprintf f " | %s => %s\n" (print_init node) (print_nterm startnt) ) (); fprintf f " end.\n\n" let write_actions f = fprintf f "Definition action_table (state:state) : action :=\n"; fprintf f " match state with\n"; lr1_iter_nonfinal (fun node -> fprintf f " | %s => " (print_st node); match Invariant.has_default_reduction node with | Some (prod, _) -> fprintf f "Default_reduce_act %s\n" (print_prod prod) | None -> fprintf f "Lookahead_act (fun terminal:terminal =>\n"; fprintf f " match terminal return lookahead_action terminal with\n"; let has_fail = ref false in Terminal.iter (fun t -> if not (Terminal.pseudo t) then begin try let target = SymbolMap.find (Symbol.T t) (Lr1.transitions node) in fprintf f " | %s => Shift_act %s (eq_refl _)\n" (print_term t) (print_nis target) with Not_found -> try let prod = Misc.single (TerminalMap.find t (Lr1.reductions node)) in fprintf f " | %s => Reduce_act %s\n" (print_term t) (print_prod prod) with Not_found -> has_fail := true end); if !has_fail then fprintf f " | _ => Fail_act\n"; fprintf f " end)\n" ); fprintf f " end.\n\n" let write_gotos f = fprintf f "Definition goto_table (state:state) (nt:nonterminal) :=\n"; fprintf f " match state, nt return option { s:noninitstate | NT nt = last_symb_of_non_init_state s } with\n"; let has_none = ref false in lr1_iter_nonfinal (fun node -> Nonterminal.iterx (fun nt -> try let target = SymbolMap.find (Symbol.N nt) (Lr1.transitions node) in fprintf f " | %s, %s => " (print_st node) (print_nterm nt); if is_final_state target then fprintf f "None" else fprintf f "Some (exist _ %s (eq_refl _))\n" (print_nis target) with Not_found -> has_none := true)); if !has_none then fprintf f " | _, _ => None\n"; fprintf f " end.\n\n" let write_last_symb f = fprintf f "Definition last_symb_of_non_init_state (noninitstate:noninitstate) : symbol :=\n"; fprintf f " match noninitstate with\n"; lr1_iterx_nonfinal (fun node -> match Lr1.incoming_symbol node with | Some s -> fprintf f " | %s => %s\n" (print_nis node) (print_symbol s) | None -> assert false); fprintf f " end.\n\n" let write_past_symb f = fprintf f "Definition past_symb_of_non_init_state (noninitstate:noninitstate) : list symbol :=\n"; fprintf f " match noninitstate with\n"; lr1_iterx_nonfinal (fun node -> let s = String.concat "; " (List.tl (Invariant.fold (fun l _ symb _ -> print_symbol symb::l) [] (Invariant.stack node))) in fprintf f " | %s => [%s]\n" (print_nis node) s); fprintf f " end.\n"; fprintf f "Extract Constant past_symb_of_non_init_state => \"fun _ -> assert false\".\n\n" let write_past_states f = fprintf f "Definition past_state_of_non_init_state (s:noninitstate) : list (state -> bool) :=\n"; fprintf f " match s with\n"; lr1_iterx_nonfinal (fun node -> let s = String.concat ";\n " (Invariant.fold (fun accu _ _ states -> let b = Buffer.create 16 in bprintf b "fun s:state =>\n"; bprintf b " match s return bool with\n"; bprintf b " "; Lr1.NodeSet.iter (fun st -> bprintf b "| %s " (print_st st)) states; bprintf b "=> true\n"; bprintf b " | _ => false\n"; bprintf b " end"; Buffer.contents b::accu) [] (Invariant.stack node)) in fprintf f " | %s =>\n [ %s ]\n" (print_nis node) s); fprintf f " end.\n\n"; fprintf f "Extract Constant past_state_of_non_init_state => \"fun _ -> assert false\".\n\n" let write_items f = if not Settings.coq_no_complete then begin lr1_iter_nonfinal (fun node -> fprintf f "Definition items_of_state_%d : list item :=\n" (Lr1.number node); fprintf f " [ "; let first = ref true in Item.Map.iter (fun item lookaheads -> let prod, pos = Item.export item in if not (Production.is_start prod) then begin if !first then first := false else fprintf f ";\n "; fprintf f "{| prod_item := %s;\n" (print_prod prod); fprintf f " dot_pos_item := %d;\n" pos; fprintf f " lookaheads_item := ["; let first = ref true in let lookaheads = if TerminalSet.mem Terminal.sharp lookaheads then TerminalSet.universe else lookaheads in TerminalSet.iter (fun lookahead -> if !first then first := false else fprintf f "; "; fprintf f "%s" (print_term lookahead) ) lookaheads; fprintf f "] |}" end ) (Lr0.closure (Lr0.export (Lr1.state node))); fprintf f " ].\n"; fprintf f "Extract Inlined Constant items_of_state_%d => \"assert false\".\n\n" (Lr1.number node) ); fprintf f "Definition items_of_state (s:state) : list item :=\n"; fprintf f " match s with\n"; lr1_iter_nonfinal (fun node -> fprintf f " | %s => items_of_state_%d\n" (print_st node) (Lr1.number node)); fprintf f " end.\n"; end else fprintf f "Definition items_of_state (s:state): list item := [].\n"; fprintf f "Extract Constant items_of_state => \"fun _ -> assert false\".\n\n" let write_automaton f = fprintf f "Module Aut <: Automaton.T.\n\n"; fprintf f "Local Obligation Tactic := intro x; case x; reflexivity.\n\n"; fprintf f "Module Gram := Gram.\n"; fprintf f "Module GramDefs := Gram.\n\n"; write_nullable_first f; write_nis f; write_last_symb f; write_init f; fprintf f "Include Automaton.Types.\n\n"; write_start_nt f; write_actions f; write_gotos f; write_past_symb f; write_past_states f; write_items f; fprintf f "End Aut.\n\n" let write_theorems f = fprintf f "Require Import Main.\n\n"; fprintf f "Module Parser := Main.Make Aut.\n"; fprintf f "Theorem safe:\n"; fprintf f " Parser.safe_validator () = true.\n"; fprintf f "Proof eq_refl true<:Parser.safe_validator () = true.\n\n"; if not Settings.coq_no_complete then begin fprintf f "Theorem complete:\n"; fprintf f " Parser.complete_validator () = true.\n"; fprintf f "Proof eq_refl true<:Parser.complete_validator () = true.\n\n"; end; Lr1.fold_entry (fun _prod node startnt _t () -> let funName = Nonterminal.print true startnt in fprintf f "Definition %s := Parser.parse safe Aut.%s.\n\n" funName (print_init node); fprintf f "Theorem %s_correct iterator buffer:\n" funName; fprintf f " match %s iterator buffer with\n" funName; fprintf f " | Parser.Inter.Parsed_pr sem buffer_new =>\n"; fprintf f " exists word,\n"; fprintf f " buffer = Parser.Inter.app_str word buffer_new /\\\n"; fprintf f " inhabited (Gram.parse_tree (%s) word sem)\n" (print_symbol (Symbol.N startnt)); fprintf f " | _ => True\n"; fprintf f " end.\n"; fprintf f "Proof. apply Parser.parse_correct. Qed.\n\n"; if not Settings.coq_no_complete then begin fprintf f "Theorem %s_complete (iterator:nat) word buffer_end (output:%s):\n" funName (print_type (Nonterminal.ocamltype startnt)); fprintf f " forall tree:Gram.parse_tree (%s) word output,\n" (print_symbol (Symbol.N startnt)); fprintf f " match %s iterator (Parser.Inter.app_str word buffer_end) with\n" funName; fprintf f " | Parser.Inter.Fail_pr => False\n"; fprintf f " | Parser.Inter.Parsed_pr output_res buffer_end_res =>\n"; fprintf f " output_res = output /\\ buffer_end_res = buffer_end /\\\n"; fprintf f " le (Gram.pt_size tree) iterator\n"; fprintf f " | Parser.Inter.Timeout_pr => lt iterator (Gram.pt_size tree)\n"; fprintf f " end.\n"; fprintf f "Proof. apply Parser.parse_complete with (init:=Aut.%s); exact complete. Qed.\n\n" (print_init node); end ) () let write_all f = if not Settings.coq_no_actions then List.iter (fun s -> fprintf f "%s\n\n" s.Stretch.stretch_content) Front.grammar.UnparameterizedSyntax.preludes; fprintf f "Require Import List.\n"; fprintf f "Require Import Int31.\n"; fprintf f "Require Import Syntax.\n"; fprintf f "Require Import Tuples.\n"; fprintf f "Require Import Alphabet.\n"; fprintf f "Require Grammar.\n"; fprintf f "Require Automaton.\n\n"; fprintf f "Unset Elimination Schemes.\n\n"; write_grammar f; write_automaton f; write_theorems f; if not Settings.coq_no_actions then List.iter (fun stretch -> fprintf f "\n\n%s" stretch.Stretch.stretch_raw_content) Front.grammar.UnparameterizedSyntax.postludes end menhir-20151112/src/LinearizedArray.ml0000644000175000017500000000534312621170074016403 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* The [entry] array contains offsets into the [data] array. It has [n+1] elements if the original (unencoded) array has [n] elements. The value of [entry.(n)] is the length of the [data] array. This convention is natural and allows avoiding a special case. *) type 'a t = (* data: *) 'a array * (* entry: *) int array let make (a : 'a array array) : 'a t = let n = Array.length a in (* Build the entry array. *) let size = ref 0 in let entry = Array.init (n + 1) (fun i -> let s = !size in if i < n then size := s + Array.length a.(i); s ) in assert (entry.(n) = !size); (* Build the data array. *) let i = ref 0 and j = ref 0 in let data = Array.init !size (fun _ -> while !j = Array.length a.(!i) do i := !i + 1; j := 0; done; let x = a.(!i).(!j) in j := !j + 1; x ) in data, entry let length ((_, entry) : 'a t) : int = Array.length entry let row_length ((_, entry) : 'a t) i : int = entry.(i + 1) - entry.(i) let row_length_via get_entry i = get_entry (i + 1) - get_entry i let read ((data, entry) as la : 'a t) i j : 'a = assert (0 <= j && j < row_length la i); data.(entry.(i) + j) let read_via get_data get_entry i j = assert (0 <= j && j < row_length_via get_entry i); get_data (get_entry i + j) let write ((data, entry) as la : 'a t) i j (v : 'a) : unit = assert (0 <= j && j < row_length la i); data.(entry.(i) + j) <- v let rec read_interval_via get_data i j = if i = j then [] else get_data i :: read_interval_via get_data (i + 1) j let read_row_via get_data get_entry i = read_interval_via get_data (get_entry i) (get_entry (i + 1)) let read_row ((data, entry) : 'a t) i : 'a list = read_row_via (Array.get data) (Array.get entry) i menhir-20151112/src/StaticVersion.ml0000644000175000017500000000003212621170074016101 0ustar mehdimehdilet require_20151112 = () menhir-20151112/src/invariant.mli0000644000175000017500000001062312621170073015456 0ustar mehdimehdi(* This module discovers and publishes information about the automaton. It determines the shape of the stack when a state is about to be entered, when a production is about to be reduced, and when a goto transition is about to be taken. It also determines which states should be represented (that is, need to physically exist on the stack at runtime) and which symbols need to keep track of (start or end) positions. It also determines which automaton states could have to deal with an [error] token. *) open Grammar (* ------------------------------------------------------------------------- *) (* A representation of stack shapes. *) (* A word is a representation of a stack or stack suffix. *) type word (* [fold] folds over a word. At each cell, [f] is applied to the accumulator, to a Boolean flag that tells whether the cell holds a state, to the set of possible states of the cell, and to the symbol associated with the cell. The stack is visited from bottom to top. *) val fold: ('a -> bool -> Symbol.t -> Lr1.NodeSet.t -> 'a) -> 'a -> word -> 'a (* [fold_top f accu s] is analogous to [fold], but only folds over the top stack cell, if there is one, so that [f] is either not invoked at all or invoked just once. *) val fold_top: (bool -> Symbol.t -> 'a) -> 'a -> word -> 'a (* [print w] produces a string representation of the word [w]. Only the symbols are shown. *) val print: word -> string (* ------------------------------------------------------------------------- *) (* Information about the stack. *) (* [stack s] is the structure of the stack at state [s]. *) val stack: Lr1.node -> word (* [prodstack prod] is the structure of the stack when production [prod] is about to be reduced. This function should not be called if production [prod] is never reduced. *) val prodstack: Production.index -> word (* [gotostack nt] is the structure of the stack when a shift transition over nonterminal [nt] is about to be taken. It consists of just one cell. *) val gotostack: Nonterminal.t -> word (* [rewind s] explains how to rewind the stack when dealing with an error in state [s]. It produces an instruction to either die (because no state on the stack can handle errors) or pop a suffix of the stack. In the latter case, one reaches a state that is either represented (its identity is physically stored in the bottommost cell that is popped) or unrepresented (its identity is statically known). *) type instruction = | Die | DownTo of word * state and state = | Represented | UnRepresented of Lr1.node val rewind: Lr1.node -> instruction (* ------------------------------------------------------------------------- *) (* Information about which states and positions need to physically exist on the stack. *) (* [represented s] tells whether state [s] must have an explicit representation, that is, whether it is pushed onto the stack. *) val represented: Lr1.node -> bool (* [startp symbol] and [endp symbol] tell whether start or end positions must be recorded for symbol [symbol]. *) val startp: Symbol.t -> bool val endp: Symbol.t -> bool (* ------------------------------------------------------------------------- *) (* Information about error handling. *) (* [errorpeeker s] tells whether state [s] can potentially peek at an error. This is the case if, in state [s], an error token may be on the stream. *) val errorpeeker: Lr1.node -> bool (* ------------------------------------------------------------------------- *) (* Information about which productions are reduced and where. *) (* [ever_reduced prod] tells whether production [prod] is ever reduced. *) val ever_reduced: Production.index -> bool (* [fold_reduced prod] folds over all states that can reduce production [prod]. *) val fold_reduced: (Lr1.node -> 'a -> 'a) -> Production.index -> 'a -> 'a (* ------------------------------------------------------------------------- *) (* Information about default reductions. *) (* [has_default_reduction s] tells whether state [s] has a default reduction, and, if so, upon which set of tokens. *) val has_default_reduction : Lr1.node -> (Production.index * TerminalSet.t) option (* ------------------------------------------------------------------------- *) (* Miscellaneous. *) (* [universal symbol] tells whether every represented state has an outgoing transition along [symbol]. *) val universal: Symbol.t -> bool menhir-20151112/src/tarjan.ml0000644000175000017500000001236012621170073014571 0ustar mehdimehdi(* This module provides an implementation of Tarjan's algorithm for finding the strongly connected components of a graph. The algorithm runs when the functor is applied. Its complexity is $O(V+E)$, where $V$ is the number of vertices in the graph $G$, and $E$ is the number of edges. *) module Run (G : sig type node (* We assume each node has a unique index. Indices must range from $0$ to $n-1$, where $n$ is the number of nodes in the graph. *) val n: int val index: node -> int (* Iterating over a node's immediate successors. *) val successors: (node -> unit) -> node -> unit (* Iterating over all nodes. *) val iter: (node -> unit) -> unit end) = struct (* Define the internal data structure associated with each node. *) type data = { (* Each node carries a flag which tells whether it appears within the SCC stack (which is defined below). *) mutable stacked: bool; (* Each node carries a number. Numbers represent the order in which nodes were discovered. *) mutable number: int; (* Each node [x] records the lowest number associated to a node already detected within [x]'s SCC. *) mutable low: int; (* Each node carries a pointer to a representative element of its SCC. This field is used by the algorithm to store its results. *) mutable representative: G.node; (* Each representative node carries a list of the nodes in its SCC. This field is used by the algorithm to store its results. *) mutable scc: G.node list } (* Define a mapping from external nodes to internal ones. Here, we simply use each node's index as an entry into a global array. *) let table = (* Create the array. We initially fill it with [None], of type [data option], because we have no meaningful initial value of type [data] at hand. *) let table = Array.make G.n None in (* Initialize the array. *) G.iter (fun x -> table.(G.index x) <- Some { stacked = false; number = 0; low = 0; representative = x; scc = [] } ); (* Define a function which gives easy access to the array. It maps each node to its associated piece of internal data. *) function x -> match table.(G.index x) with | Some dx -> dx | None -> assert false (* Indices do not cover the range $0\ldots n$, as expected. *) (* Create an empty stack, used to record all nodes which belong to the current SCC. *) let scc_stack = Stack.create() (* Initialize a function which allocates numbers for (internal) nodes. A new number is assigned to each node the first time it is visited. Numbers returned by this function start at 1 and increase. Initially, all nodes have number 0, so they are considered unvisited. *) let mark = let counter = ref 0 in fun dx -> incr counter; dx.number <- !counter; dx.low <- !counter (* This reference will hold a list of all representative nodes. *) let representatives = ref [] (* Look at all nodes of the graph, one after the other. Any unvisited nodes become roots of the search forest. *) let () = G.iter (fun root -> let droot = table root in if droot.number = 0 then begin (* This node hasn't been visited yet. Start a depth-first walk from it. *) mark droot; droot.stacked <- true; Stack.push droot scc_stack; let rec walk x = let dx = table x in G.successors (fun y -> let dy = table y in if dy.number = 0 then begin (* $y$ hasn't been visited yet, so $(x,y)$ is a regular edge, part of the search forest. *) mark dy; dy.stacked <- true; Stack.push dy scc_stack; (* Continue walking, depth-first. *) walk y; if dy.low < dx.low then dx.low <- dy.low end else if (dy.low < dx.low) && dy.stacked then begin (* The first condition above indicates that $y$ has been visited before $x$, so $(x, y)$ is a backwards or transverse edge. The second condition indicates that $y$ is inside the same SCC as $x$; indeed, if it belongs to another SCC, then the latter has already been identified and moved out of [scc_stack]. *) if dy.number < dx.low then dx.low <- dy.number end ) x; (* We are done visiting $x$'s neighbors. *) if dx.low = dx.number then begin (* $x$ is the entry point of a SCC. The whole SCC is now available; move it out of the stack. We pop elements out of the SCC stack until $x$ itself is found. *) let rec loop () = let element = Stack.pop scc_stack in element.stacked <- false; dx.scc <- element.representative :: dx.scc; element.representative <- x; if element != dx then loop() in loop(); representatives := x :: !representatives end in walk root end ) (* There only remains to make our results accessible to the outside. *) let representative x = (table x).representative let scc x = (table x).scc let iter action = List.iter (fun x -> let data = table x in assert (data.representative == x); (* a sanity check *) assert (data.scc <> []); (* a sanity check *) action x data.scc ) !representatives end menhir-20151112/src/StaticVersion.mli0000644000175000017500000000003412621170074016254 0ustar mehdimehdival require_20151112 : unit menhir-20151112/src/PackedIntArray.ml0000644000175000017500000001401412621170074016152 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* A packed integer array is represented as a pair of an integer [k] and a string [s]. The integer [k] is the number of bits per integer that we use. The string [s] is just an array of bits, which is read in 8-bit chunks. *) (* The ocaml programming language treats string literals and array literals in slightly different ways: the former are statically allocated, while the latter are dynamically allocated. (This is rather arbitrary.) In the context of Menhir's table-based back-end, where compact, immutable integer arrays are needed, ocaml strings are preferable to ocaml arrays. *) type t = int * string (* The magnitude [k] of an integer [v] is the number of bits required to represent [v]. It is rounded up to the nearest power of two, so that [k] divides [Sys.word_size]. *) let magnitude (v : int) = if v < 0 then Sys.word_size else let rec check k max = (* [max] equals [2^k] *) if (max <= 0) || (v < max) then k (* if [max] just overflew, then [v] requires a full ocaml integer, and [k] is the number of bits in an ocaml integer plus one, that is, [Sys.word_size]. *) else check (2 * k) (max * max) in check 1 2 (* [pack a] turns an array of integers into a packed integer array. *) (* Because the sign bit is the most significant bit, the magnitude of any negative number is the word size. In other words, [pack] does not achieve any space savings as soon as [a] contains any negative numbers, even if they are ``small''. *) let pack (a : int array) : t = let m = Array.length a in (* Compute the maximum magnitude of the array elements. This tells us how many bits per element we are going to use. *) let k = Array.fold_left (fun k v -> max k (magnitude v) ) 1 a in (* Because access to ocaml strings is performed on an 8-bit basis, two cases arise. If [k] is less than 8, then we can pack multiple array entries into a single character. If [k] is greater than 8, then we must use multiple characters to represent a single array entry. *) if k <= 8 then begin (* [w] is the number of array entries that we pack in a character. *) assert (8 mod k = 0); let w = 8 / k in (* [n] is the length of the string that we allocate. *) let n = if m mod w = 0 then m / w else m / w + 1 in let s = Bytes.create n in (* Define a reader for the source array. The reader might run off the end if [w] does not divide [m]. *) let i = ref 0 in let next () = let ii = !i in if ii = m then 0 (* ran off the end, pad with zeroes *) else let v = a.(ii) in i := ii + 1; v in (* Fill up the string. *) for j = 0 to n - 1 do let c = ref 0 in for _x = 1 to w do c := (!c lsl k) lor next() done; Bytes.set s j (Char.chr !c) done; (* Done. *) k, Bytes.unsafe_to_string s end else begin (* k > 8 *) (* [w] is the number of characters that we use to encode an array entry. *) assert (k mod 8 = 0); let w = k / 8 in (* [n] is the length of the string that we allocate. *) let n = m * w in let s = Bytes.create n in (* Fill up the string. *) for i = 0 to m - 1 do let v = ref a.(i) in for x = 1 to w do Bytes.set s ((i + 1) * w - x) (Char.chr (!v land 255)); v := !v lsr 8 done done; (* Done. *) k, Bytes.unsafe_to_string s end (* Access to a string. *) let read (s : string) (i : int) : int = Char.code (String.unsafe_get s i) (* [get1 t i] returns the integer stored in the packed array [t] at index [i]. It assumes (and does not check) that the array's bit width is [1]. The parameter [t] is just a string. *) let get1 (s : string) (i : int) : int = let c = read s (i lsr 3) in let c = c lsr ((lnot i) land 0b111) in let c = c land 0b1 in c (* [get t i] returns the integer stored in the packed array [t] at index [i]. *) (* Together, [pack] and [get] satisfy the following property: if the index [i] is within bounds, then [get (pack a) i] equals [a.(i)]. *) let get ((k, s) : t) (i : int) : int = match k with | 1 -> get1 s i | 2 -> let c = read s (i lsr 2) in let c = c lsr (2 * ((lnot i) land 0b11)) in let c = c land 0b11 in c | 4 -> let c = read s (i lsr 1) in let c = c lsr (4 * ((lnot i) land 0b1)) in let c = c land 0b1111 in c | 8 -> read s i | 16 -> let j = 2 * i in (read s j) lsl 8 + read s (j + 1) | _ -> assert (k = 32); (* 64 bits unlikely, not supported *) let j = 4 * i in (((read s j lsl 8) + read s (j + 1)) lsl 8 + read s (j + 2)) lsl 8 + read s (j + 3) (* [unflatten1 (n, data) i j] accesses the two-dimensional bitmap represented by [(n, data)] at indices [i] and [j]. The integer [n] is the width of the bitmap; the string [data] is the second component of the packed array obtained by encoding the table as a one-dimensional array. *) let unflatten1 (n, data) i j = get1 data (n * i + j) menhir-20151112/src/action.ml0000644000175000017500000001036512621170073014572 0ustar mehdimehdiopen Keyword type t = { (* The code for this semantic action. *) expr: IL.expr; (* The files where this semantic action originates. Via inlining, several semantic actions can be combined into one, so there can be several files. *) filenames: string list; (* The set of keywords that appear in this semantic action. They can be thought of as free variables that refer to positions. They must be renamed during inlining. *) keywords : KeywordSet.t; } (* Creation. *) let from_stretch s = { expr = IL.ETextual s; filenames = [ s.Stretch.stretch_filename ]; keywords = KeywordSet.of_list s.Stretch.stretch_keywords } (* Defining a keyword in terms of other keywords. *) let define keyword keywords f action = assert (KeywordSet.mem keyword action.keywords); { action with expr = f action.expr; keywords = KeywordSet.union keywords (KeywordSet.remove keyword action.keywords) } (* Composition, used during inlining. *) let compose x a1 a2 = (* 2015/07/20: there used to be a call to [parenthesize_stretch] here, which would insert parentheses around every stretch in [a1]. This is not necessary, as far as I can see, since every stretch that represents a semantic action is already parenthesized by the lexer. *) { expr = IL.ELet ([ IL.PVar x, a1.expr ], a2.expr); keywords = KeywordSet.union a1.keywords a2.keywords; filenames = a1.filenames @ a2.filenames; } (* Substitutions, represented as association lists. In principle, no name appears twice in the domain. *) type subst = (string * string) list let apply (phi : subst) (s : string) : string = try List.assoc s phi with Not_found -> s let apply_subject (phi : subst) (subject : subject) : subject = match subject with | Before | Left -> subject | RightNamed s -> RightNamed (apply phi s) let extend x y (phi : subst ref) = assert (not (List.mem_assoc x !phi)); if x <> y then phi := (x, y) :: !phi (* Renaming of keywords, used during inlining. *) type sw = Keyword.subject * Keyword.where (* [rename_keyword f phi keyword] applies the function [f] to possibly change the keyword [keyword]. If [f] decides to change this keyword (by returning [Some _]) then this decision is obeyed. Otherwise, the keyword is renamed by the substitution [phi]. In either case, [phi] is extended with a renaming decision. *) let rename_keyword (f : sw -> sw option) (phi : subst ref) keyword : keyword = match keyword with | SyntaxError -> SyntaxError | Position (subject, where, flavor) -> let subject', where' = match f (subject, where) with | Some (subject', where') -> subject', where' | None -> apply_subject !phi subject, where in extend (Keyword.posvar subject where flavor) (Keyword.posvar subject' where' flavor) phi; Position (subject', where', flavor) (* [rename f phi a] applies to the semantic action [a] the renaming [phi] as well as the transformations decided by the function [f]. The function [f] is applied to each (not-yet-renamed) keyword and may decide to transform it, by returning [Some _], or to not transform it, by returning [None]. (In the latter case, [phi] still applies to the keyword.) *) let rename f phi a = (* Rename all keywords, growing [phi] as we go. *) let keywords = a.keywords in let phi = ref phi in let keywords = KeywordSet.map (rename_keyword f phi) keywords in let phi = !phi in (* Construct a new semantic action, where [phi] is translated into a series of [let] bindings. *) let phi = List.map (fun (x, y) -> IL.PVar x, IL.EVar y) phi in let expr = IL.ELet (phi, a.expr) in { expr = expr; filenames = a.filenames; keywords = keywords; } let to_il_expr action = action.expr let filenames action = action.filenames let keywords action = action.keywords let print f action = let module P = Printer.Make (struct let f = f let locate_stretches = None end) in P.expr action.expr let has_syntaxerror action = KeywordSet.mem SyntaxError (keywords action) let has_beforeend action = KeywordSet.mem (Position (Before, WhereEnd, FlavorPosition)) action.keywords menhir-20151112/src/unparameterizedPrinter.mli0000644000175000017500000000111212621170073020217 0ustar mehdimehdi(* This is a pretty-printer for grammars. *) (* If the [mode] parameter requests ``unit actions'', then semantic actions are dropped: that is, they are replaced with trivial semantic actions that return unit. Accordingly, all [%type] declarations are changed to unit. The prologue and epilogue are dropped. All bindings for semantic values are suppressed. If, furthermore, the [mode] parameter requests ``unit tokens'', then the types carried by tokens are changed to unit. *) val print: Settings.print_mode -> out_channel -> UnparameterizedSyntax.grammar -> unit menhir-20151112/src/back.ml0000644000175000017500000000377712621170073014226 0ustar mehdimehdi(* Driver for the back-end. *) module I = Interpret (* artificial dependency; ensures that [Interpret] runs first *) (* If [--list-errors] is set, produce a list of erroneous input sentences, then stop. *) let () = if Settings.list_errors then begin let module L = LRijkstra.Run(struct (* Undocumented: if [--log-automaton 2] is set, be verbose. *) let verbose = Settings.logA >= 2 (* For my own purposes, LRijkstra can print one line of statistics to a .csv file. *) let statistics = if false then Some "lr.csv" else None end) in exit 0 end (* Define an .ml file writer . *) let write program = let module P = Printer.Make (struct let filename = Settings.base ^ ".ml" let f = open_out filename let locate_stretches = if Settings.infer then (* Typechecking should not fail at this stage. Omit #line directives. *) None else (* 2011/10/19: do not use [Filename.basename]. The [#] annotations that we insert in the [.ml] file must retain their full path. This does mean that the [#] annotations depend on how menhir is invoked -- e.g. [menhir foo/bar.mly] and [cd foo && menhir bar.mly] will produce different files. Nevertheless, this seems useful/reasonable. *) Some filename end) in P.program program (* Construct the code, using either the table-based or the code-based back-end, and pass it on to the printer. (This continuation-passing style is imposed by the fact that there is no conditional in ocaml's module language.) *) let () = if Settings.coq then let module B = CoqBackend.Run (struct end) in let filename = Settings.base ^ ".v" in let f = open_out filename in B.write_all f; exit 0 else if Settings.table then let module B = TableBackend.Run (struct end) in write B.program else let module B = CodeBackend.Run (struct end) in write (Inliner.inline B.program) (* Write the interface file. *) let () = Interface.write Front.grammar () let () = Time.tick "Printing" menhir-20151112/src/installation.mli0000644000175000017500000000052612621170073016165 0ustar mehdimehdi(* This module defines a number of installation settings. Its source code is generated by the main [Makefile]. *) (* The directory where Menhir's standard library, [standard.mly], is installed. *) val libdir: string (* Whether MenhirLib was installed via [ocamlfind] or (manually) in the above directory. *) val ocamlfind: bool menhir-20151112/src/interpret.ml0000644000175000017500000006305312621170073015333 0ustar mehdimehdimodule I = Invariant (* artificial dependency; ensures that [Invariant] runs first *) (* --------------------------------------------------------------------------- *) open Grammar open SentenceParserAux (* A delimiter. *) type delimiter = string (* An error message. *) type message = string (* A run is a series of sentences or comments, followed with a delimiter (at least one blank line; comments), followed with an error message. *) type run = located_sentence or_comment list * delimiter * message (* A targeted sentence is a located sentence together with the target into which it leads. A target tells us which state a sentence leads to, as well as which spurious reductions are performed at the end. *) type target = ReferenceInterpreter.target let target2state (s, _spurious) = s type maybe_targeted_sentence = located_sentence * target option type targeted_sentence = located_sentence * target (* A targeted run is a series of targeted sentences or comments together with an error message. *) type maybe_targeted_run = maybe_targeted_sentence or_comment list * delimiter * message type targeted_run = targeted_sentence or_comment list * delimiter * message (* A filtered targeted run is a series of targeted sentences together with an error message. (The comments have been filtered out.) *) type filtered_targeted_run = targeted_sentence list * message (* --------------------------------------------------------------------------- *) (* Display and debugging. *) let print_sentence (nto, terminals) : string = let b = Buffer.create 128 in Option.iter (fun nt -> Printf.bprintf b "%s: " (Nonterminal.print false nt) ) nto; List.iter (fun t -> Printf.bprintf b "%s " (Terminal.print t) ) terminals; Printf.bprintf b "\n"; Buffer.contents b (* --------------------------------------------------------------------------- *) (* [stream] turns a finite list of terminals into a stream of terminals. *) exception EndOfStream let stream (toks : Terminal.t list) : unit -> Terminal.t * Lexing.position * Lexing.position = let toks = ref toks in fun () -> let tok = match !toks with | tok :: more -> (* Take a token off the list, and return it. *) toks := more; tok | [] -> (* The finite list has been exhausted. Here, two plausible behaviors come to mind. The first behavior consists in raising an exception. In that case, we are creating a finite stream, and it is up to the parser to not read past its end. The second behavior consists in returning a designated token. In that case, we are creating an infinite, eventually constant, stream. The choice between these two behaviors is somewhat arbitrary; furthermore, in the second case, the choice of the designated token is arbitrary as well. Here, we adopt the second behavior if and only if the grammar has an EOF token, and we use EOF as the designated token. Again, this is arbitrary, and could be changed in the future. *) match Terminal.eof with | Some eof -> eof | None -> raise EndOfStream in (* For now, return dummy positions. *) tok, Lexing.dummy_pos, Lexing.dummy_pos (* --------------------------------------------------------------------------- *) (* [start sentence] returns the start symbol that we should use to interpret the sentence [sentence]. *) (* If a start symbol was explicitly provided as part of the sentence, we use it. Otherwise, we use the grammar's unique start symbol, if there is one. *) let start poss ((nto, _) : sentence) : Nonterminal.t = match nto with | Some nt -> nt | None -> match ProductionMap.is_singleton Lr1.entry with | None -> Error.error poss "because the grammar has multiple start symbols, each of the\n\ sentences provided on the standard input channel must be of the\n\ form: : *" | Some (prod, _) -> match Production.classify prod with | Some nt -> nt | None -> assert false (* --------------------------------------------------------------------------- *) (* [interpret] interprets a sentence. *) let interpret ((_, toks) as sentence) : unit = let nt = start [] sentence in (* Run the reference interpreter. This can produce a concrete syntax tree ([Some cst]), fail with a parser error ([None]), or fail with a lexer error ([EndOfStream]). *) (* In either case, we produce just one line of output, so it should be clear to the user which outcomes correspond to which sentences (should multiple sentences be supplied). *) begin try match MenhirLib.Convert.Simplified.traditional2revised (ReferenceInterpreter.interpret Settings.trace nt) (stream toks) with | Some cst -> (* Success. *) Printf.printf "ACCEPT"; if Settings.interpret_show_cst then begin print_newline(); Cst.show stdout cst end | None -> (* Parser failure. *) Printf.printf "REJECT" with EndOfStream -> (* Lexer failure. *) Printf.printf "OVERSHOOT" end; print_newline() (* --------------------------------------------------------------------------- *) (* [interpret_error_aux] interprets a sentence, expecting it to end in an error. Failure or success is reported via two continuations. *) let interpret_error_aux log poss ((_, terminals) as sentence) fail succeed = let nt = start poss sentence in let open ReferenceInterpreter in match check_error_path log nt terminals with | OInputReadPastEnd -> fail "no syntax error occurs." | OInputNotFullyConsumed -> fail "a syntax error occurs before the last token is reached." | OUnexpectedAccept -> fail "no syntax error occurs; in fact, this input is accepted." | OK target -> succeed nt terminals target (* --------------------------------------------------------------------------- *) (* This default error message is produced by [--list-errors] when it creates a [.messages] file, and is recognized by [--compare-errors] when it compares two such files. *) let default_message = "\n" (* [print_messages_auto] displays just the sentence and the auto-generated comments. [otarget] may be [None], in which case the auto-generated comment is just a warning that this sentence does not end in an error. *) let print_messages_auto (nt, sentence, otarget) : unit = (* Print the sentence, followed with auto-generated comments. *) print_string (print_sentence (Some nt, sentence)); match (otarget : target option) with | None -> Printf.printf "##\n\ ## WARNING: This sentence does NOT end with a syntax error, as it should.\n\ ##\n" | Some (s', spurious) -> Printf.printf "##\n\ ## Ends in an error in state: %d.\n\ ##\n\ %s##\n" (Lr1.number s') (* [Lr0.print] or [Lr0.print_closure] could be used here. The latter could sometimes be helpful, but is usually intolerably verbose. *) (Lr0.print "## " (Lr1.state s')) ; Printf.printf "## The known suffix of the stack is as follows:\n\ ## %s\n\ ##\n" (Invariant.print (Invariant.stack s')) ; if spurious <> [] then begin Printf.printf "## WARNING: This example involves spurious reductions.\n\ ## This implies that, although the LR(1) items shown above provide an\n\ ## accurate view of the past (what has been recognized so far), they\n\ ## may provide an INCOMPLETE view of the future (what was expected next).\n" ; List.iter (fun (s, prod) -> Printf.printf "## In state %d, spurious reduction of production %s\n" (Lr1.number s) (Production.print prod) ) spurious; Printf.printf "##\n" end (* [print_messages_item] displays one data item. The item is of the form [nt, sentence, target], which means that beginning at the start symbol [nt], the sentence [sentence] ends in an error in the target state given by [target]. [target] also contains information about which spurious reductions are performed at the end. The display obeys the [.messages] file format. *) let print_messages_item (nt, sentence, target) : unit = (* Print the sentence, followed with auto-generated comments. *) print_messages_auto (nt, sentence, Some target); (* Then, print a proposed error message, between two blank lines. *) Printf.printf "\n%s\n" default_message (* --------------------------------------------------------------------------- *) (* [write_run run] writes a run into a new [.messages] file. Manually-written comments are preserved. New auto-generated comments are produced. *) let write_run : maybe_targeted_run or_comment -> unit = function | Thing (sentences_or_comments, delimiter, message) -> (* First, print every sentence and human comment. *) List.iter (fun sentence_or_comment -> match sentence_or_comment with | Thing ((poss, ((_, toks) as sentence)), target) -> let nt = start poss sentence in (* Every sentence is followed with newly generated auto-comments. *) print_messages_auto (nt, toks, target) | Comment c -> print_string c ) sentences_or_comments; (* Then, print the delimiter, which must begin with a blank line and may include comments. *) print_string delimiter; (* Then, print the error message. *) print_string message (* No need for another blank line. It will be printed as part of a separate [Comment]. *) | Comment comments -> (* Must begin with a blank line. *) print_string comments (* --------------------------------------------------------------------------- *) (* [interpret_error] interprets a sentence, expecting it to end in an error. Failure or success is reported on the standard output channel. This is used by [--interpret-error]. *) let fail msg = Error.error [] "%s" msg let succeed nt terminals target = print_messages_item (nt, terminals, target); exit 0 let interpret_error sentence = interpret_error_aux Settings.trace [] sentence fail succeed (* --------------------------------------------------------------------------- *) (* [target_sentence] interprets a (located) sentence, expecting it to end in an error, computes the state in which the error is obtained, and constructs a targeted sentence. *) let target_sentence (signal : Positions.positions -> ('a, out_channel, unit, unit) format4 -> 'a) : located_sentence -> maybe_targeted_sentence = fun (poss, sentence) -> (poss, sentence), interpret_error_aux false poss sentence (* failure: *) (fun msg -> signal poss "this sentence does not end with a syntax error, as it should.\n%s" msg ; None ) (* success: *) (fun _nt _terminals target -> Some target) let target_run_1 signal : run -> maybe_targeted_run = fun (sentences, delimiter, message) -> List.map (or_comment_map (target_sentence signal)) sentences, delimiter, message let target_run_2 : maybe_targeted_run -> targeted_run = fun (sentences, delimiter, message) -> let aux (x, y) = (x, Misc.unSome y) in List.map (or_comment_map aux) sentences, delimiter, message let target_runs : run list -> targeted_run list = fun runs -> (* Interpret all sentences, possibly displaying multiple errors. *) let runs = List.map (target_run_1 Error.signal) runs in (* Abort if an error occurred. *) if Error.errors() then exit 1; (* Remove the options introduced by the first phase above. *) let runs = List.map target_run_2 runs in runs (* --------------------------------------------------------------------------- *) (* [filter_things] filters out the comments in a list of things or comments. *) let filter_things : 'a or_comment list -> 'a list = fun things -> List.flatten (List.map unThing things) (* [filter_run] filters out the comments within a run. *) let filter_run : targeted_run -> filtered_targeted_run = fun (sentences, _, message) -> filter_things sentences, message (* --------------------------------------------------------------------------- *) (* [setup()] returns a function [read] which reads one sentence from the standard input channel. *) let setup () : unit -> sentence option = let open Lexing in let lexbuf = from_channel stdin in lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = "(stdin)" }; let read () = try SentenceParser.optional_sentence SentenceLexer.lex lexbuf with Parsing.Parse_error -> Error.error (Positions.lexbuf lexbuf) "ill-formed input sentence." in read (* --------------------------------------------------------------------------- *) (* If [--interpret] is set, interpret the sentences found on the standard input channel, then stop, without generating a parser. *) (* We read a series of sentences from the standard input channel. To allow interactive use, we interpret each sentence as soon as it is read. *) let () = if Settings.interpret then let read = setup() in while true do match read() with | None -> exit 0 | Some sentence -> interpret sentence done (* --------------------------------------------------------------------------- *) (* If [--interpret-error] is set, interpret one sentence found on the standard input channel, then stop, without generating a parser. *) (* We read just one sentence, confirm that this sentence ends in an error, and (if that is the case) display the number of the state that is reached. *) let () = if Settings.interpret_error then let read = setup() in match read() with | None -> exit 1 (* abnormal: no input *) | Some sentence -> interpret_error sentence (* never returns *) (* --------------------------------------------------------------------------- *) (* Display an informational message about the contents of a [.messages] file. *) let stats (runs : run or_comment list) = (* [s] counts the sample input sentences. [m] counts the error messages. *) let s = ref 0 and m = ref 0 in List.iter (function | Thing (sentences, _, _) -> incr m; List.iter (function | Thing _ -> incr s | Comment _ -> () ) sentences | Comment _ -> () ) runs; Printf.eprintf "Read %d sample input sentences and %d error messages.\n%!" !s !m; runs (* --------------------------------------------------------------------------- *) (* Reading a [.messages] file. *) (* Our life is slightly complicated by the fact that the whitespace between two runs can contain comments, which we wish to preserve when performing [--update-errors]. *) let read_messages filename : run or_comment list = let open Segment in (* Read and segment the file. *) let segments : (tag * string * Lexing.lexbuf) list = segment filename in (* Process the segments, two by two. We expect one segment to contain a non-empty series of sentences, and the next segment to contain free-form text. *) let rec loop accu segments = match segments with | [] -> List.rev accu | (Whitespace, comments, _) :: segments -> loop (Comment comments :: accu) segments | (Segment, _, lexbuf) :: segments -> (* Read a series of located sentences. *) match SentenceParser.entry SentenceLexer.lex lexbuf with | exception Parsing.Parse_error -> Error.error [Positions.cpos lexbuf] "ill-formed sentence." | sentences -> (* In principle, we should now find a segment of whitespace followed with a segment of text. By construction, the two kinds of segments alternate. *) match segments with | (Whitespace, comments, _) :: (Segment, message, _) :: segments -> let run : run = sentences, comments, message in loop (Thing run :: accu) segments | [] | [ _ ] -> Error.error (Positions.one (Lexing.lexeme_end_p lexbuf)) "missing a final message. I may be desynchronized." | (Segment, _, _) :: _ | (Whitespace, _, _) :: (Whitespace, _, _) :: _ -> (* Should not happen, thanks to the alternation between the two kinds of segments. *) assert false in stats (loop [] segments) (* --------------------------------------------------------------------------- *) (* [message_table] converts a list of targeted runs to a table (a mapping) of states to located sentences and messages. Optionally, it can detect that two sentences lead to the same state, and report an error. *) let message_table (detect_redundancy : bool) (runs : filtered_targeted_run list) : (located_sentence * message) Lr1.NodeMap.t = let table = List.fold_left (fun table (sentences_and_states, message) -> List.fold_left (fun table (sentence2, target) -> let s = target2state target in match Lr1.NodeMap.find s table with | sentence1, _ -> if detect_redundancy then Error.signal (fst sentence1 @ fst sentence2) "these sentences both cause an error in state %d." (Lr1.number s); table | exception Not_found -> Lr1.NodeMap.add s (sentence2, message) table ) table sentences_and_states ) Lr1.NodeMap.empty runs in if Error.errors() then exit 1; table (* --------------------------------------------------------------------------- *) (* [compile_runs] converts a list of targeted runs to OCaml code that encodes a mapping of state numbers to error messages. The code is sent to the standard output channel. *) let compile_runs filename (runs : filtered_targeted_run list) : unit = (* We wish to produce a function that maps a state number to a message. By convention, we call this function [message]. *) let name = "message" in let open IL in let open CodeBits in let default = { branchpat = PWildcard; branchbody = eraisenotfound (* The default branch raises an exception, which can be caught by the user, who can then produce a generic error message. *) } in let branches = List.fold_left (fun branches (sentences_and_states, message) -> (* Create an or-pattern for these states. *) let states = List.map (fun (_, target) -> let s = target2state target in pint (Lr1.number s) ) sentences_and_states in (* Map all these states to this message. *) { branchpat = POr states; branchbody = EStringConst message } :: branches ) [ default ] runs in let messagedef = { valpublic = true; valpat = PVar name; valval = EFun ([ PVar "s" ], EMatch (EVar "s", branches)) } in let program = [ SIComment (Printf.sprintf "This file was auto-generated based on \"%s\"." filename); SIComment (Printf.sprintf "Please note that the function [%s] can raise [Not_found]." name); SIValDefs (false, [ messagedef ]); ] in (* Write this program to the standard output channel. *) let module P = Printer.Make (struct let f = stdout let locate_stretches = None end) in P.program program (* --------------------------------------------------------------------------- *) (* If [--compile-errors ] is set, compile the error message descriptions found in file [filename] down to OCaml code, then stop. *) let () = Settings.compile_errors |> Option.iter (fun filename -> (* Read the file. *) let runs : run or_comment list = read_messages filename in (* Drop the comments in between two runs. *) let runs : run list = filter_things runs in (* Convert every sentence to a state number. We signal an error if a sentence does not end in an error, as expected. *) let runs : targeted_run list = target_runs runs in (* Remove comments within the runs. *) let runs : filtered_targeted_run list = List.map filter_run runs in (* Build a mapping of states to located sentences. This allows us to detect if two sentences lead to the same state. *) let _ = message_table true runs in (* In principle, we would like to check whether this set of sentences is complete (i.e., covers all states where an error can arise), but this may be costly -- it requires running [LRijkstra]. Instead, we offer a separate facility for comparing two [.messages] files, one of which can be produced via [--list-errors]. This can be used to ensure completeness. *) (* Now, compile this information down to OCaml code. We wish to produce a function that maps a state number to a message. By convention, we call this function [message]. *) compile_runs filename runs; exit 0 ) (* --------------------------------------------------------------------------- *) (* If two [--compare-errors ] directives are provided, compare the two message descriptions files, and stop. We wish to make sure that every state that appears on the left-hand side appears on the right-hand side as well. *) let () = Settings.compare_errors |> Option.iter (fun (filename1, filename2) -> (* Read and convert both files, as above. *) let runs1 = read_messages filename1 and runs2 = read_messages filename2 in let runs1 = filter_things runs1 and runs2 = filter_things runs2 in let runs1 = target_runs runs1 and runs2 = target_runs runs2 in (* here, it would be OK to ignore errors *) let runs1 = List.map filter_run runs1 and runs2 = List.map filter_run runs2 in let table1 = message_table false runs1 and table2 = message_table false runs2 in (* Check that the domain of [table1] is a subset of the domain of [table2]. *) table1 |> Lr1.NodeMap.iter (fun s ((poss1, _), _) -> if not (Lr1.NodeMap.mem s table2) then Error.signal poss1 "this sentence leads to an error in state %d.\n\ No sentence that leads to this state exists in \"%s\"." (Lr1.number s) filename2 ); (* Check that [table1] is a subset of [table2], that is, for every state [s] in the domain of [table1], [s] is mapped by [table1] and [table2] to the same error message. As an exception, if the message found in [table1] is the default message, then no comparison takes place. This allows using [--list-errors] and [--compare-errors] in conjunction to ensure that a [.messages] file is complete, without seeing warnings about different messages. *) table1 |> Lr1.NodeMap.iter (fun s ((poss1, _), message1) -> if message1 <> default_message then try let (poss2, _), message2 = Lr1.NodeMap.find s table2 in if message1 <> message2 then Error.warning (poss1 @ poss2) "these sentences lead to an error in state %d.\n\ The corresponding messages in \"%s\" and \"%s\" differ." (Lr1.number s) filename1 filename2 with Not_found -> () ); if Error.errors() then exit 1; exit 0 ) (* --------------------------------------------------------------------------- *) (* If [--update-errors ] is set, update the error message descriptions found in file [filename]. The idea is to re-generate the auto-comments, which are marked with ##, while leaving the rest untouched. *) let () = Settings.update_errors |> Option.iter (fun filename -> (* Read the file. *) let runs : run or_comment list = read_messages filename in (* Convert every sentence to a state number. Warn, but do not fail, if a sentence does not end in an error, as it should. *) let runs : maybe_targeted_run or_comment list = List.map (or_comment_map (target_run_1 Error.warning)) runs in (* We might wish to detect if two sentences lead to the same state. We might also wish to detect if this set of sentences is incomplete, and complete it automatically. However, the first task is carried out by [--compile-errors] already, and the second task is carried out by [--list-errors] and [--compare-errors] together. For now, let's try and keep things as simple as possible. The task of [--update-errors] should be to update the auto-generated comments, without failing, and without adding or removing sentences. *) (* Now, write a new [.messages] to the standard output channel, with new auto-generated comments. *) List.iter write_run runs; exit 0 ) (* --------------------------------------------------------------------------- *) (* If [--echo-errors ] is set, echo the error sentences found in file [filename]. Do not echo the error messages or the comments. *) (* In principle, we should able to run this command without even giving an .mly file name on the command line, and without building the automaton. This is not possible at the moment, because our code is organized in too rigid a manner. *) let () = Settings.echo_errors |> Option.iter (fun filename -> (* Read the file. *) let runs : run or_comment list = read_messages filename in (* Echo. *) List.iter (or_comment_iter (fun run -> let (sentences : located_sentence or_comment list), _, _ = run in List.iter (or_comment_iter (fun (_, sentence) -> print_string (print_sentence sentence) )) sentences )) runs; exit 0 ) menhir-20151112/src/error.ml0000644000175000017500000000414312621170073014443 0ustar mehdimehdiopen Printf (* ---------------------------------------------------------------------------- *) (* Global state. *) let get_initialized_ref ref = match !ref with | None -> assert false | Some contents -> contents let filename = ref (None : string option) let filemark = ref Mark.none (* 2011/10/19: do not use [Filename.basename]. The [#] annotations that we insert in the [.ml] file must retain their full path. This does mean that the [#] annotations depend on how menhir is invoked -- e.g. [menhir foo/bar.mly] and [cd foo && menhir bar.mly] will produce different files. Nevertheless, this seems useful/reasonable. *) (* This also influences the type error messages produced by [--infer]. *) let set_filename name = filename := Some name; filemark := Mark.fresh() let get_filename () = get_initialized_ref filename let get_filemark () = !filemark let file_contents = ref (None : string option) let get_file_contents () = get_initialized_ref file_contents (* ---------------------------------------------------------------------------- *) (* Logging and log levels. *) let log kind verbosity msg = if kind >= verbosity then Printf.fprintf stderr "%t%!" msg let logG = log Settings.logG let logA = log Settings.logA let logC = log Settings.logC (* ---------------------------------------------------------------------------- *) (* Errors and warnings. *) let errors = ref false let display continuation header positions format = List.iter (fun position -> fprintf stderr "%s:\n" (Positions.string_of_pos position) ) positions; Printf.kfprintf continuation stderr (header ^^ format ^^ "\n%!") let error positions format = display (fun _ -> exit 1) "Error: " positions format let signal positions format = display (fun _ -> errors := true) "Error: " positions format let warning positions format = display (fun _ -> ()) "Warning: " positions format let errors () = !errors let errorp v = error [ Positions.position v ] let grammar_warning = if Settings.strict then signal else warning menhir-20151112/src/sentenceParserAux.ml0000644000175000017500000000076612621170073016760 0ustar mehdimehdiopen Grammar type terminals = Terminal.t list type sentence = Nonterminal.t option * terminals type located_sentence = Positions.positions * sentence type comment = string type 'a or_comment = | Thing of 'a | Comment of comment let or_comment_iter f = function | Thing s -> f s | Comment _ -> () let or_comment_map f = function | Thing s -> Thing (f s) | Comment c -> Comment c let unThing = function | Thing x -> [ x ] | Comment _ -> [] menhir-20151112/src/sentenceParser.mly0000644000175000017500000000563412621170073016472 0ustar mehdimehdi/* This is two parsers in one. */ /* This parser is used to read the sentences provided on the standard input channel when [--interpret] is set. The entry point is [optional_sentence]. */ /* It is used also to read a [.messages] file. The entry point is [entry]. */ /* This parser must be compatible with both ocamlyacc and menhir, so we use $ notation, do not use Menhir's standard library, and collect positions manually. */ /* ------------------------------------------------------------------------ */ /* Tokens. */ %token COLON EOF EOL %token TERMINAL %token NONTERMINAL %token COMMENT /* only manually-written comments, beginning with a single # */ /* ------------------------------------------------------------------------ */ /* Types. */ %{ open SentenceParserAux (* Removing the position information in a terminal or non-terminal symbol. *) let strip_symbol (x, _, _) = x (* Removing the position information in a sentence. *) let strip_sentence (nto, terminals) = Option.map strip_symbol nto, List.map strip_symbol terminals (* Computing the start and end positions of a sentence. *) let locate_sentence (nto, terminals) = let opening = match nto, terminals with | Some (_, opening, _), _ | None, (_, opening, _) :: _ -> opening | None, [] -> Lexing.dummy_pos (* cannot happen *) and closing = match nto, List.rev terminals with | _, (_, _, closing) :: _ | Some (_, _, closing), _ -> closing | None, [] -> Lexing.dummy_pos (* cannot happen *) in Positions.two opening closing, strip_sentence (nto, terminals) %} %type located_sentence %type optional_sentence %start optional_sentence %type entry %start entry %% /* ------------------------------------------------------------------------ */ /* An entry is a list of located sentences or comments. */ entry: located_sentences_or_comments EOF { $1 } /* A list of located sentences or comments. */ located_sentences_or_comments: { [] } | located_sentence located_sentences_or_comments { Thing $1 :: $2 } | COMMENT located_sentences_or_comments { Comment $1 :: $2 } /* A located sentence. */ located_sentence: sentence { locate_sentence $1 } /* An optional sentence. */ optional_sentence: | EOF { None } | sentence { Some (strip_sentence $1) } /* A sentence is a pair of an optional non-terminal start symbol and a list of terminal symbols. It is terminated by a newline. */ sentence: | NONTERMINAL COLON terminals EOL { Some $1, $3 } | terminals EOL { None, $1 } /* A list of terminal symbols. */ terminals: | { [] } | TERMINAL terminals { $1 :: $2 } menhir-20151112/src/compressedBitSet.mli0000644000175000017500000000004712621170073016741 0ustar mehdimehdiinclude GSet.S with type element = int menhir-20151112/src/parserAux.ml0000644000175000017500000000636012621170073015267 0ustar mehdimehdiopen Positions open Syntax let new_precedence_level = let c = ref 0 in fun pos1 pos2 -> incr c; PrecedenceLevel (Error.get_filemark (), !c, pos1, pos2) let new_production_level = let c = ref 0 in fun () -> incr c; ProductionLevel (Error.get_filemark (), !c) module IdSet = Set.Make (struct type t = identifier located let compare id1 id2 = compare (value id1) (value id2) end) let defined_identifiers (_, ido, _) accu = Option.fold IdSet.add ido accu let defined_identifiers producers = List.fold_right defined_identifiers producers IdSet.empty let check_production_group right_hand_sides = begin match right_hand_sides with | [] -> assert false | (producers, _, _, _) :: right_hand_sides -> let ids = defined_identifiers producers in List.iter (fun (producers, _, _, _) -> let ids' = defined_identifiers producers in try let id = IdSet.choose (IdSet.union (IdSet.diff ids ids') (IdSet.diff ids' ids)) in Error.error [Positions.position id] "two productions that share a semantic action must define\n\ exactly the same identifiers." with Not_found -> () ) right_hand_sides end (* [normalize_producer i p] assigns a name of the form [_i] to the unnamed producer [p]. *) let normalize_producer i (pos, opt_identifier, parameter) = let id = match opt_identifier with | Some id -> id | None -> Positions.with_pos pos ("_" ^ string_of_int (i + 1)) in (id, parameter) let normalize_producers producers = List.mapi normalize_producer producers let override pos o1 o2 = match o1, o2 with | Some _, Some _ -> Error.signal [ pos ] "this production carries two %%prec declarations."; o2 | None, Some _ -> o2 | _, None -> o1 (* Support for on-the-fly expansion of anonymous rules. Whenever such a rule is encountered, we create a fresh non-terminal symbol, add a definition of this symbol to a global variable, and return a reference to this symbol. Quick and dirty. So, in the end, clean. *) let fresh : unit -> string = let next = ref 0 in fun () -> Printf.sprintf "__anonymous_%d" (Misc.postincrement next) let rules = ref [] let anonymous pos branches = (* Generate a fresh non-terminal symbol. *) let symbol = fresh() in (* Construct its definition. Note that it is implicitly marked %inline. *) let rule = { pr_public_flag = false; pr_inline_flag = true; pr_nt = symbol; pr_positions = [ pos ]; (* this list is not allowed to be empty *) pr_parameters = []; pr_branches = branches } in (* Record this definition. *) rules := rule :: !rules; (* Return the symbol that stands for it. *) symbol let rules () = let result = !rules in (* Reset the global state, in case we need to read several .mly files. *) rules := []; result (* Only unnamed producers can be referred to using positional identifiers. Besides, such positions must be taken in the interval [1 .. List.length producers]. The output array [p] is such that [p.(idx) = Some x] if [idx] must be referred to using [x], not [$(idx + 1)]. *) let producer_names producers = producers |> List.map (fun (_, oid, _) -> Option.map Positions.value oid) |> Array.of_list menhir-20151112/src/Convert.ml0000644000175000017500000001104712621170074014734 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* An ocamlyacc-style, or Menhir-style, parser requires access to the lexer, which must be parameterized with a lexing buffer, and to the lexing buffer itself, where it reads position information. *) (* This traditional API is convenient when used with ocamllex, but inelegant when used with other lexer generators. *) type ('token, 'semantic_value) traditional = (Lexing.lexbuf -> 'token) -> Lexing.lexbuf -> 'semantic_value (* This revised API is independent of any lexer generator. Here, the parser only requires access to the lexer, and the lexer takes no parameters. The tokens returned by the lexer may contain position information. *) type ('token, 'semantic_value) revised = (unit -> 'token) -> 'semantic_value (* --------------------------------------------------------------------------- *) (* Converting a traditional parser, produced by ocamlyacc or Menhir, into a revised parser. *) (* A token of the revised lexer is essentially a triple of a token of the traditional lexer (or raw token), a start position, and and end position. The three [get] functions are accessors. *) (* We do not require the type ['token] to actually be a triple type. This enables complex applications where it is a record type with more than three fields. It also enables simple applications where positions are of no interest, so ['token] is just ['raw_token] and [get_startp] and [get_endp] return dummy positions. *) let traditional2revised (get_raw_token : 'token -> 'raw_token) (get_startp : 'token -> Lexing.position) (get_endp : 'token -> Lexing.position) (parser : ('raw_token, 'semantic_value) traditional) : ('token, 'semantic_value) revised = (* Accept a revised lexer. *) fun (lexer : unit -> 'token) -> (* Create a dummy lexing buffer. *) let lexbuf : Lexing.lexbuf = Lexing.from_string "" in (* Wrap the revised lexer as a traditional lexer. A traditional lexer returns a raw token and updates the fields of the lexing buffer with new positions, which will be read by the parser. *) let lexer (lexbuf : Lexing.lexbuf) : 'raw_token = let token : 'token = lexer() in lexbuf.Lexing.lex_start_p <- get_startp token; lexbuf.Lexing.lex_curr_p <- get_endp token; get_raw_token token in (* Invoke the traditional parser. *) parser lexer lexbuf (* --------------------------------------------------------------------------- *) (* Converting a revised parser back to a traditional parser. *) let revised2traditional (make_token : 'raw_token -> Lexing.position -> Lexing.position -> 'token) (parser : ('token, 'semantic_value) revised) : ('raw_token, 'semantic_value) traditional = (* Accept a traditional lexer and a lexing buffer. *) fun (lexer : Lexing.lexbuf -> 'raw_token) (lexbuf : Lexing.lexbuf) -> (* Wrap the traditional lexer as a revised lexer. *) let lexer () : 'token = let token : 'raw_token = lexer lexbuf in make_token token lexbuf.Lexing.lex_start_p lexbuf.Lexing.lex_curr_p in (* Invoke the revised parser. *) parser lexer (* --------------------------------------------------------------------------- *) (* Simplified versions of the above, where concrete triples are used. *) module Simplified = struct let traditional2revised parser = traditional2revised (fun (token, _, _) -> token) (fun (_, startp, _) -> startp) (fun (_, _, endp) -> endp) parser let revised2traditional parser = revised2traditional (fun token startp endp -> (token, startp, endp)) parser end menhir-20151112/src/installation.ml0000644000175000017500000000010412621170073016004 0ustar mehdimehdilet libdir = "/Users/fpottier/dev/menhir/src/" let ocamlfind = true menhir-20151112/src/codePieces.ml0000644000175000017500000001246712621170073015365 0ustar mehdimehdi(* This module defines many internal naming conventions for use by the two code generators, [CodeBackend] and [TableBackend]. It also offers a few code generation facilities. *) open IL open CodeBits open Grammar (* ------------------------------------------------------------------------ *) (* Naming conventions. *) (* The type variable associated with a nonterminal [nt]. *) let ntvar nt = Infer.ntvar (Nonterminal.print true nt) (* The variable that holds the environment. This is a parameter to all functions. We do not make it a global variable because we wish to preserve re-entrancy. *) let env = prefix "env" (* A variable used to hold a semantic value. *) let semv = "_v" (* A variable used to hold a stack. *) let stack = prefix "stack" (* A variable used to hold a state. *) let state = prefix "s" (* A variable used to hold a token. *) let token = "_tok" (* Variables used to hold start and end positions. Do not change these names! They are chosen to coincide with the $startpos and $endpos keywords, which the lexer rewrites to _startpos and _endpos, so that binding these variables before executing a semantic action is meaningful. *) (* These names should agree with the printing function [Keyword.posvar]. *) let beforeendp = "_endpos__0_" let startp = "_startpos" let endp = "_endpos" (* ------------------------------------------------------------------------ *) (* Types for semantic values. *) (* [semvtypent nt] is the type of the semantic value associated with nonterminal [nt]. *) let semvtypent nt = match Nonterminal.ocamltype nt with | None -> (* [nt] has unknown type. If we we have run [Infer], then this can't happen. However, running type inference is only an option, so we still have to deal with that case. *) TypVar (ntvar nt) | Some ocamltype -> (* [nt] has known type. *) TypTextual ocamltype (* [semvtypetok tok] is the type of the semantic value associated with token [tok]. There is no such type if the token does not have a semantic value. *) let semvtypetok tok = match Terminal.ocamltype tok with | None -> (* Token has unit type and is omitted in stack cell. *) [] | Some ocamltype -> (* Token has known type. *) [ TypTextual ocamltype ] (* [semvtype symbol] is the type of the semantic value associated with [symbol]. *) let semvtype = function | Symbol.T tok -> semvtypetok tok | Symbol.N nt -> [ semvtypent nt ] (* [symvalt] returns the empty list if the symbol at hand carries no semantic value and the singleton list [[f t]] if it carries a semantic value of type [t]. *) let symvalt symbol f = match semvtype symbol with | [] -> [] | [ t ] -> [ f t ] | _ -> assert false (* [symval symbol x] returns either the empty list or the singleton list [[x]], depending on whether [symbol] carries a semantic value. *) let symval symbol x = match semvtype symbol with | [] -> [] | [ _t ] -> [ x ] | _ -> assert false (* [tokval] is a version of [symval], specialized for terminal symbols. *) let tokval tok x = symval (Symbol.T tok) x (* ------------------------------------------------------------------------ *) (* Patterns for tokens. *) (* [tokpat tok] is a pattern that matches the token [tok], without binding its semantic value. *) let tokpat tok = PData (TokenType.tokendata (Terminal.print tok), tokval tok PWildcard) (* [tokpatv tok] is a pattern that matches the token [tok], and binds its semantic value, if it has one, to the variable [semv]. *) let tokpatv tok = PData (TokenType.tokendata (Terminal.print tok), tokval tok (PVar semv)) (* [tokspat toks] is a pattern that matches any token in the set [toks], without binding its semantic value. *) let tokspat toks = POr ( TerminalSet.fold (fun tok pats -> tokpat tok :: pats ) toks [] ) (* [destructuretokendef name codomain bindsemv branch] generates the definition of a function that destructures tokens. [name] is the name of the function that is generated. [codomain] is its return type. [bindsemv] tells whether the variable [semv] should be bound. [branch] is applied to each (non-pseudo) terminal and must produce code for each branch. *) let destructuretokendef name codomain bindsemv branch = { valpublic = false; valpat = PVar name; valval = EAnnot ( EFun ([ PVar token ], EMatch (EVar token, Terminal.fold (fun tok branches -> if Terminal.pseudo tok then branches else { branchpat = (if bindsemv then tokpatv else tokpat) tok; branchbody = branch tok } :: branches ) [] ) ), type2scheme (arrow TokenType.ttoken codomain) ) } (* ------------------------------------------------------------------------ *) (* A global variable holds the exception [Error]. *) (* We preallocate the [Error] exception and store it into a global variable. This allows saving code at the sites where the exception is raised. Don't change the conventional name [_eRR], it is shared with the lexer, which replaces occurrences of the [$syntaxerror] keyword with [(raise _eRR)]. *) let parse_error = "_eRR" let errorval = EVar parse_error let excvaldef = { valpublic = false; valpat = PVar parse_error; valval = EData (Interface.excname, []) } menhir-20151112/src/_tags0000644000175000017500000000053412621170073014000 0ustar mehdimehdi # Menhir needs the Unix library. : use_unix # checkOCamlVersion needs the Str library. : use_str # Build and link with -g. <**/*.{cmo,cmx}>:debug <**/*.byte>:debug <**/*.native>:debug # Turn off assertions in some modules, where they are useful when debugging, but costly. : noassert menhir-20151112/src/Makefile0000644000175000017500000000616612621170073014427 0ustar mehdimehdi.PHONY: everyday library bootstrap stage1 stage2 stage3 clean # ---------------------------------------------------------------------------- # Choose a target. ifndef TARGET TARGET := native endif # ---------------------------------------------------------------------------- # Define the files that form the library. ifeq ($(TARGET),byte) LIBFILES := menhirLib.cmo else LIBFILES := menhirLib.cmo menhirLib.cmx endif # ---------------------------------------------------------------------------- # Ocamlbuild tool and settings. OCAMLBUILD := ocamlbuild -classic-display -j 0 -cflags "-safe-string -bin-annot" # ---------------------------------------------------------------------------- # For everyday development. # Typing "make" will perform just stage 1. This is enough to ensure that # the source code is correct. everyday: installation.ml stage1 # ---------------------------------------------------------------------------- # Building MenhirLib. library: $(OCAMLBUILD) $(LIBFILES) # ---------------------------------------------------------------------------- # Building Menhir from scratch (a.k.a. bootstrapping). bootstrap: .versioncheck stage1 stage2 stage3 # ---------------------------------------------------------------------------- # Checking the version of the OCaml compiler. .versioncheck: @ echo Checking that Objective Caml is recent enough... @$(OCAMLBUILD) -build-dir _stage1 checkOCamlVersion.byte @ _stage1/checkOCamlVersion.byte --verbose --gt "4.02" @ touch $@ # ---------------------------------------------------------------------------- # Stage 1. # Build Menhir using ocamlyacc. stage1: @$(OCAMLBUILD) -build-dir _stage1 menhir.$(TARGET) # ---------------------------------------------------------------------------- # Stage 2. # Build Menhir using Menhir (from stage 1). # Do not use . to refer to the current directory, because ocamlbuild # descends into another directory when executing commands. # Do not use $(shell pwd) either, because this assumes we are running # on a Unix platform, and can fail on Windows. # So, use .., which works fine if ocamlbuild has effectively descended # into a subdirectory. SRC := .. FLAGS := -v -lg 1 -la 1 -lc 1 --table --infer --stdlib $(SRC) --strict --fixed-exception --canonical stage2: @$(OCAMLBUILD) -build-dir _stage2 -tag fancy_parser \ -use-menhir -menhir "$(SRC)/_stage1/menhir.$(TARGET) $(FLAGS)" \ menhir.$(TARGET) # ---------------------------------------------------------------------------- # Stage 3 (optional). # Re-generate Menhir's parser using Menhir (from stage 2) and check that it # is identical to the stage 2 parser. stage3: @$(OCAMLBUILD) -build-dir _stage3 -tag fancy_parser \ -use-menhir -menhir "$(SRC)/_stage2/menhir.$(TARGET) $(FLAGS)" \ parser.ml parser.mli @for i in parser.ml parser.mli ; do \ if ! diff _stage2/$$i _stage3/$$i 2>&1 >/dev/null ; then \ echo "Bootstrap FAILED: $$i did not reach a fixed point."; exit 1 ; \ fi ; \ done; \ echo "Bootstrap successful." # ---------------------------------------------------------------------------- # Cleaning up. clean:: rm -rf .versioncheck _build _stage1 _stage2 _stage3 menhir-20151112/src/lr1.ml0000644000175000017500000011237312621170073014015 0ustar mehdimehdiopen Grammar module S = Slr (* artificial dependency; ensures that [Slr] runs first *) (* This module constructs an LR(1) automaton by following Pager's method, that is, by merging states on the fly when they are weakly compatible. *) (* ------------------------------------------------------------------------ *) (* Nodes. *) type node = { (* A node number, assigned during construction. *) raw_number: int; (* A node number, assigned after conflict resolution has taken place and after inacessible nodes have been removed. This yields sequential numbers, from the client's point of view. *) mutable number: int; (* Each node is associated with a state. This state can change during construction as nodes are merged. *) mutable state: Lr0.lr1state; (* Each node carries information about its outgoing transitions and about its reductions. *) mutable transitions: node SymbolMap.t; mutable reductions: Production.index list TerminalMap.t; (* Tokens for which there are several possible behaviors are conflict tokens. *) mutable conflict_tokens: TerminalSet.t; (* Transitions are also stored in reverse, so as to allow reverse traversals of the automaton. *) mutable predecessors: node list; (* Transient marks are used during construction and traversal. *) mutable mark: Mark.t; (* (New as of 2012/01/23.) This flag records whether a shift/reduce conflict in this node was solved in favor of neither (%nonassoc). This is later used to forbid a default reduction at this node. *) mutable forbid_default_reduction: bool; } module Node = struct type t = node let compare node1 node2 = node1.number - node2.number end module NodeSet = Set.Make (Node) module NodeMap = Map.Make (Node) (* ------------------------------------------------------------------------ *) (* Output debugging information if [--follow-construction] is enabled. *) let follow_transition (again : bool) (source : node) (symbol : Symbol.t) (state : Lr0.lr1state) = if Settings.follow then Printf.fprintf stderr "%s transition out of state r%d along symbol %s.\nProposed target state:\n%s" (if again then "Re-examining" else "Examining") source.raw_number (Symbol.print symbol) (Lr0.print_closure "" state) let follow_state (msg : string) (node : node) (print : bool) = if Settings.follow then Printf.fprintf stderr "%s: r%d.\n%s\n" msg node.raw_number (if print then Lr0.print_closure "" node.state else "") (* ------------------------------------------------------------------------ *) (* The following two mutually recursive functions are invoked when the state associated with an existing node grows. The node's descendants are examined and grown into a fixpoint is reached. This work is performed in an eager manner: we do not attempt to build any new transitions until all existing nodes have been suitably grown. Indeed, building new transitions requires making merging decisions, and such decisions cannot be made on a sound basis unless all existing nodes have been suitably grown. Otherwise, one could run into a dead end where two successive, incompatible merging decisions are made, because the consequences of the first decision (growing descendant nodes) were not made explicit before the second decision was taken. This was a bug in versions of Menhir ante 20070520. Although I wrote this code independently, I later found out that it seems quite similar to the code in Karl Schimpf's Ph.D. thesis (1981), page 35. It is necessary that all existing transitions be explicit before the [grow] functions are called. In other words, if it has been decided that there will be a transition from [node1] to [node2], then [node1.transitions] must be updated before [grow] is invoked. *) (* [grow node state] grows the existing node [node], if necessary, so that its associated state subsumes [state]. If this represents an actual (strict) growth, then [node]'s descendants are grown as well. *) let rec grow node state = if Lr0.subsume state node.state then follow_state "Target state is unaffected" node false else begin (* In versions of Menhir prior to June 2008, I wrote this: If I know what I am doing, then the new state that is being merged into the existing state should be compatible, in Pager's sense, with the existing node. In other words, compatibility should be preserved through transitions. and the code contained this assertion: assert (Lr0.compatible state node.state); assert (Lr0.eos_compatible state node.state); However, this was wrong. See, for instance, the sample grammars cocci.mly and boris-mini.mly. The problem is particularly clearly apparent in boris-mini.mly, where it only involves inclusion of states -- the definition of Pager's weak compatibility does not enter the picture. Here is, roughly, what is going on. Assume we have built some state A, which, along some symbol S, has a transition to itself. This means, in fact, that computing the successor of A along S yields a *subset* of A, that is, succ(A, S) <= A. Then, we wish to build a new state A', which turns out to be a superset of A, so we decide to grow A. (The fact that A is a subset of A' implies that A and A' are Pager-compatible.) As per the code below, we immediately update the state A in place, to become A'. Then, we inspect the transition along symbol S. We find that the state succ(A', S) must be merged into A'. In this situation, the assertions above require succ(A', S) to be compatible with A'. However, this is not necessarily the case. By monotonicity of succ, we do have succ(A, S) <= succ(A', S). But nothing says that succ(A', S) are related with respect to inclusion, or even Pager-compatible. The grammar in boris-mini.mly shows that they are not. *) (* Grow [node]. *) node.state <- Lr0.union state node.state; follow_state "Growing existing state" node true; (* Grow [node]'s successors. *) grow_successors node end (* [grow_successors node] grows [node]'s successors. *) (* Note that, if there is a cycle in the graph, [grow_successors] can be invoked several times at a single node [node], with [node.state] taking on a new value every time. In such a case, this code should be correct, although probably not very efficient. *) and grow_successors node = SymbolMap.iter (fun symbol (successor_node : node) -> let successor_state = Lr0.transition symbol node.state in follow_transition true node symbol successor_state; grow successor_node successor_state ) node.transitions (* ------------------------------------------------------------------------ *) (* Data structures maintained during the construction of the automaton. *) (* A queue of pending nodes, whose outgoing transitions have not yet been built. *) let queue : node Queue.t = Queue.create() (* A mapping of LR(0) node numbers to lists of nodes. This allows us to efficiently find all existing nodes that are core-compatible with a newly found state. *) let map : node list array = Array.make Lr0.n [] (* A counter that allows assigning raw numbers to nodes. *) let num = ref 0 (* ------------------------------------------------------------------------ *) (* [create state] creates a new node that stands for the state [state]. It is expected that [state] does not subsume, and is not subsumed by, any existing state. *) let create (state : Lr0.lr1state) : node = (* Allocate a new node. *) let node = { state = state; transitions = SymbolMap.empty; reductions = TerminalMap.empty; conflict_tokens = TerminalSet.empty; raw_number = Misc.postincrement num; number = 0; (* temporary placeholder *) mark = Mark.none; predecessors = []; forbid_default_reduction = false; } in (* Update the mapping of LR(0) cores to lists of nodes. *) let k = Lr0.core state in assert (k < Lr0.n); map.(k) <- node :: map.(k); (* Enqueue this node for further examination. *) Queue.add node queue; (* Debugging output. *) follow_state "Creating a new state" node false; (* Return the freshly created node. *) node (* ------------------------------------------------------------------------ *) (* Materializing a transition turns its target state into a (fresh or existing). There are three scenarios: the proposed new state can be subsumed by an existing state, compatible with an existing state, or neither. *) exception Subsumed of node exception Compatible of node let materialize (source : node) (symbol : Symbol.t) (target : Lr0.lr1state) : unit = try (* Debugging output. *) follow_transition false source symbol target; (* Find all existing core-compatible states. *) let k = Lr0.core target in assert (k < Lr0.n); let similar = map.(k) in (* Check whether we need to create a new node or can reuse an existing state. *) (* 20120525: the manner in which this check is performed depends on [Settings.construction_mode]. There are now three modes. *) (* 20150204: there are now four modes. *) begin match Settings.construction_mode with | Settings.ModeCanonical -> (* In a canonical automaton, two states can be merged only if they are identical. *) List.iter (fun node -> if Lr0.subsume target node.state && Lr0.subsume node.state target then raise (Subsumed node) ) similar | Settings.ModeInclusionOnly | Settings.ModePager -> (* A more aggressive approach is to take subsumption into account: if the new candidate state is a subset of an existing state, then no new node needs to be created. Furthermore, the existing state does not need to be enlarged. *) (* 20110124: require error compatibility in addition to subsumption. *) List.iter (fun node -> if Lr0.subsume target node.state && Lr0.error_compatible target node.state then raise (Subsumed node) ) similar | Settings.ModeLALR -> () end; begin match Settings.construction_mode with | Settings.ModeCanonical | Settings.ModeInclusionOnly -> () | Settings.ModePager -> (* One can be even more aggressive and check whether the existing state is compatible, in Pager's sense, with the new state. If so, there is no need to create a new state: just merge the new state into the existing one. The result is a state that may be larger than each of the two states that have been merged. *) (* 20110124: require error compatibility in addition to the existing compatibility criteria. *) List.iter (fun node -> if Lr0.compatible target node.state && Lr0.eos_compatible target node.state && Lr0.error_compatible target node.state then raise (Compatible node) ) similar | Settings.ModeLALR -> (* In LALR mode, as soon as there is one similar state -- i.e. one state that shares the same LR(0) core -- we merge the new state into the existing one. *) List.iter (fun node -> raise (Compatible node) ) similar end; (* The above checks have failed. Create a new node. Two states that are in the subsumption relation are also compatible. This implies that the newly created node does not subsume any existing states. *) source.transitions <- SymbolMap.add symbol (create target) source.transitions with | Subsumed node -> (* Join an existing target node. *) follow_state "Joining existing state" node false; source.transitions <- SymbolMap.add symbol node source.transitions | Compatible node -> (* Join and grow an existing target node. It seems important that the new transition is created before [grow_successors] is invoked, so that all transition decisions made so far are explicit. *) node.state <- Lr0.union target node.state; follow_state "Joining and growing existing state" node true; source.transitions <- SymbolMap.add symbol node source.transitions; grow_successors node (* ------------------------------------------------------------------------ *) (* The actual construction process. *) (* Populate the queue with the start nodes and store them in an array. *) let entry : node ProductionMap.t = ProductionMap.map (fun (k : Lr0.node) -> create (Lr0.start k) ) Lr0.entry (* Pick a node in the queue, that is, a node whose transitions have not yet been built. Build these transitions, and continue. *) (* Note that building a transition can cause existing nodes to grow, so [node.state] is not necessarily invariant throughout the inner loop. *) let () = Misc.qiter (fun node -> List.iter (fun symbol -> materialize node symbol (Lr0.transition symbol node.state) ) (Lr0.outgoing_symbols (Lr0.core node.state)) ) queue (* Record how many nodes were constructed. *) let n = !num let () = Error.logA 1 (fun f -> Printf.fprintf f "Built an LR(1) automaton with %d states.\n" !num) (* ------------------------------------------------------------------------ *) (* A mapping of symbols to lists of nodes that admit this incoming symbol. This mapping is constructed by [visit] below. *) let incoming : node list SymbolMap.t ref = ref SymbolMap.empty let lookup_incoming symbol = try SymbolMap.find symbol !incoming with Not_found -> [] let record_incoming osymbol target = Option.iter (fun symbol -> let targets = lookup_incoming symbol in incoming := SymbolMap.add symbol (target :: targets) !incoming ) osymbol (* ------------------------------------------------------------------------ *) (* We now perform one depth-first traversal of the automaton, recording predecessor edges, numbering nodes, sorting nodes according to their incoming symbol, building reduction tables, and finding out which nodes have conflicts. *) (* A count of all nodes. *) let () = num := 0 (* A list of all nodes. *) let nodes : node list ref = ref [] (* A list of nodes with conflicts. *) let conflict_nodes : node list ref = ref [] (* Counts of nodes with shift/reduce and reduce/reduce conflicts. *) let shift_reduce = ref 0 let reduce_reduce = ref 0 (* Count of the shift/reduce conflicts that could be silently resolved. *) let silently_solved = ref 0 (* Go ahead. *) let () = let marked = Mark.fresh() in let rec visit osymbol node = if not (Mark.same node.mark marked) then begin node.mark <- marked; nodes := node :: !nodes; record_incoming osymbol node; (* Number this node. *) let number = !num in num := number + 1; node.number <- number; (* Insertion of a new reduce action into the table of reductions. *) let addl prod tok reductions = let prods = try TerminalMap.lookup tok reductions with Not_found -> [] in TerminalMap.add tok (prod :: prods) reductions in (* Build the reduction table. Here, we gather all potential reductions, without attempting to solve shift/reduce conflicts on the fly, because that would potentially hide shift/reduce/reduce conflicts, which we want to be aware of. *) let reductions = List.fold_left (fun reductions (toks, prod) -> TerminalSet.fold (addl prod) toks reductions ) TerminalMap.empty (Lr0.reductions node.state) in (* Detect conflicts. Attempt to solve shift/reduce conflicts when unambiguously allowed by priorities. *) let has_shift_reduce = ref false and has_reduce_reduce = ref false in node.reductions <- TerminalMap.fold (fun tok prods reductions -> if SymbolMap.mem (Symbol.T tok) node.transitions then begin (* There is a transition in addition to the reduction(s). We have (at least) a shift/reduce conflict. *) assert (not (Terminal.equal tok Terminal.sharp)); match prods with | [] -> assert false | [ prod ] -> begin (* This is a single shift/reduce conflict. If priorities tell us how to solve it, we follow that and modify the automaton. *) match Precedence.shift_reduce tok prod with | Precedence.ChooseShift -> (* Suppress the reduce action. *) incr silently_solved; reductions | Precedence.ChooseReduce -> (* Record the reduce action and suppress the shift transition. The automaton is modified in place. This can have the subtle effect of making some nodes unreachable. Any conflicts in these nodes will then be ignored (as they should be). *) incr silently_solved; node.transitions <- SymbolMap.remove (Symbol.T tok) node.transitions; TerminalMap.add tok prods reductions | Precedence.ChooseNeither -> (* Suppress the reduce action and the shift transition. *) incr silently_solved; node.transitions <- SymbolMap.remove (Symbol.T tok) node.transitions; node.forbid_default_reduction <- true; reductions | Precedence.DontKnow -> (* Priorities don't allow concluding. Record the existence of a shift/reduce conflict. *) node.conflict_tokens <- Grammar.TerminalSet.add tok node.conflict_tokens; has_shift_reduce := true; TerminalMap.add tok prods reductions end | _prod1 :: _prod2 :: _ -> (* This is a shift/reduce/reduce conflict. If the priorities are such that each individual shift/reduce conflict is solved in favor of shifting or in favor of neither, then solve the entire composite conflict in the same way. Otherwise, report the conflict. *) let choices = List.map (Precedence.shift_reduce tok) prods in if List.for_all (fun choice -> match choice with | Precedence.ChooseShift -> true | _ -> false ) choices then begin (* Suppress the reduce action. *) silently_solved := !silently_solved + List.length prods; reductions end else if List.for_all (fun choice -> match choice with | Precedence.ChooseNeither -> true | _ -> false ) choices then begin (* Suppress the reduce action and the shift transition. *) silently_solved := !silently_solved + List.length prods; node.transitions <- SymbolMap.remove (Symbol.T tok) node.transitions; reductions end else begin (* Record a shift/reduce/reduce conflict. Keep all reductions. *) node.conflict_tokens <- Grammar.TerminalSet.add tok node.conflict_tokens; has_shift_reduce := true; has_reduce_reduce := true; TerminalMap.add tok prods reductions end end else let () = match prods with | [] | [ _ ] -> () | _prod1 :: _prod2 :: _ -> (* There is no transition in addition to the reduction(s). We have a pure reduce/reduce conflict. Do nothing about it at this point. *) node.conflict_tokens <- Grammar.TerminalSet.add tok node.conflict_tokens; has_reduce_reduce := true in TerminalMap.add tok prods reductions ) reductions TerminalMap.empty; (* Record statistics about conflicts. *) if not (TerminalSet.is_empty node.conflict_tokens) then begin conflict_nodes := node :: !conflict_nodes; if !has_shift_reduce then incr shift_reduce; if !has_reduce_reduce then incr reduce_reduce end; (* Continue the depth-first traversal. Record predecessors edges as we go. No ancestor appears twice in a list of predecessors, because two nodes cannot be related by two edges that carry distinct symbols. *) SymbolMap.iter (fun symbol son -> son.predecessors <- node :: son.predecessors; visit (Some symbol) son ) node.transitions end in ProductionMap.iter (fun _ node -> visit None node) entry let nodes = List.rev !nodes (* list is now sorted by increasing node numbers *) let conflict_nodes = !conflict_nodes let () = if !silently_solved = 1 then Error.logA 1 (fun f -> Printf.fprintf f "One shift/reduce conflict was silently solved.\n") else if !silently_solved > 1 then Error.logA 1 (fun f -> Printf.fprintf f "%d shift/reduce conflicts were silently solved.\n" !silently_solved); if !num < n then Error.logA 1 (fun f -> Printf.fprintf f "Only %d states remain after resolving shift/reduce conflicts.\n" !num) let () = Grammar.diagnostics() let n = !num let forbid_default_reduction node = node.forbid_default_reduction (* ------------------------------------------------------------------------ *) (* The incoming symbol of a node can be computed by going through its LR(0) core. For this reason, we do not need to explicitly record it here. *) let incoming_symbol node = Lr0.incoming_symbol (Lr0.core node.state) (* ------------------------------------------------------------------------ *) (* Iteration over all nodes. *) let fold f accu = List.fold_left f accu nodes let iter f = fold (fun () node -> f node) () let map f = List.map f nodes let foldx f = fold (fun accu node -> match incoming_symbol node with | None -> accu | Some _ -> f accu node) let iterx f = iter (fun node -> match incoming_symbol node with | None -> () | Some _ -> f node) (* -------------------------------------------------------------------------- *) (* Our output channel. *) let out = lazy (open_out (Settings.base ^ ".automaton")) (* ------------------------------------------------------------------------ *) (* If requested, dump a verbose description of the automaton. *) let describe out node = Printf.fprintf out "State %d%s:\n%s" node.number (if Settings.follow then Printf.sprintf " (r%d)" node.raw_number else "") (Lr0.print "" node.state); SymbolMap.iter (fun symbol node -> Printf.fprintf out "-- On %s shift to state %d\n" (Symbol.print symbol) node.number ) node.transitions; TerminalMap.iter (fun tok prods -> List.iter (fun prod -> (* TEMPORARY factoriser les symboles qui conduisent a reduire une meme production *) Printf.fprintf out "-- On %s " (Terminal.print tok); match Production.classify prod with | Some nt -> Printf.fprintf out "accept %s\n" (Nonterminal.print false nt) | None -> Printf.fprintf out "reduce production %s\n" (Production.print prod) ) prods ) node.reductions; if not (TerminalSet.is_empty node.conflict_tokens) then Printf.fprintf out "** Conflict on %s\n" (TerminalSet.print node.conflict_tokens); Printf.fprintf out "\n%!" let () = Time.tick "Construction of the LR(1) automaton"; if Settings.dump then begin iter (describe (Lazy.force out)); Time.tick "Dumping the LR(1) automaton" end (* ------------------------------------------------------------------------ *) (* [reverse_dfs goal] performs a reverse depth-first search through the automaton, starting at node [goal], and marking the nodes traversed. It returns a function that tells whether a node is marked, that is, whether a path leads from that node to the goal node. *) let reverse_dfs goal = let mark = Mark.fresh() in let marked node = Mark.same node.mark mark in let rec visit node = if not (marked node) then begin node.mark <- mark; List.iter visit node.predecessors end in visit goal; marked (* ------------------------------------------------------------------------ *) (* Iterating over all nodes that are targets of edges carrying a certain symbol. The sources of the corresponding edges are also provided. *) let targets f accu symbol = (* There are no incoming transitions on the start symbols. *) let targets = lookup_incoming symbol in List.fold_left (fun accu target -> f accu target.predecessors target ) accu targets (* ------------------------------------------------------------------------ *) (* Converting a start node into the single item that it contains. *) let start2item node = let state : Lr0.lr1state = node.state in let core : Lr0.node = Lr0.core state in let items : Item.Set.t = Lr0.items core in assert (Item.Set.cardinal items = 1); Item.Set.choose items (* ------------------------------------------------------------------------ *) (* Accessors. *) let number node = node.number let state node = node.state let transitions node = node.transitions let reductions node = node.reductions let conflicts f = List.iter (fun node -> f node.conflict_tokens node ) conflict_nodes let predecessors node = node.predecessors (* ------------------------------------------------------------------------ *) (* This inverts a mapping of tokens to productions into a mapping of productions to sets of tokens. *) (* This is needed, in [CodeBackend], to avoid producing two (or more) separate branches that call the same [reduce] function. Instead, we generate just one branch, guarded by a [POr] pattern. *) let invert reductions : TerminalSet.t ProductionMap.t = TerminalMap.fold (fun tok prods inverse -> let prod = Misc.single prods in let toks = try ProductionMap.lookup prod inverse with Not_found -> TerminalSet.empty in ProductionMap.add prod (TerminalSet.add tok toks) inverse ) reductions ProductionMap.empty (* ------------------------------------------------------------------------ *) (* [has_beforeend s] tests whether the state [s] can reduce a production whose semantic action uses [$endpos($0)]. Note that [$startpos] and [$endpos] have been expanded away already, so we need not worry about the fact that (in an epsilon production) they expand to [$endpos($0)]. *) let has_beforeend node = TerminalMap.fold (fun _ prods accu -> accu || let prod = Misc.single prods in not (Production.is_start prod) && let action = Production.action prod in Action.has_beforeend action ) (reductions node) false (* ------------------------------------------------------------------------ *) (* Computing which terminal symbols a state is willing to act upon. One must keep in mind that, due to the merging of states, a state might be willing to perform a reduction on a certain token, yet the reduction can take us to another state where this token causes an error. In other words, the set of terminal symbols that is computed here is really an over-approximation of the set of symbols that will not cause an error. And there seems to be no way of performing an exact computation, as we would need to know not only the current state, but the contents of the stack as well. *) let acceptable_tokens (s : node) = (* If this state is willing to act on the error token, ignore it -- we do not wish to report that an error would be accepted in this state :-) *) let transitions = SymbolMap.remove (Symbol.T Terminal.error) (transitions s) and reductions = TerminalMap.remove Terminal.error (reductions s) in (* Accumulate the tokens carried by outgoing transitions. *) let covered = SymbolMap.fold (fun symbol _ covered -> match symbol with | Symbol.T tok -> TerminalSet.add tok covered | Symbol.N _ -> covered ) transitions TerminalSet.empty in (* Accumulate the tokens that permit reduction. *) let covered = ProductionMap.fold (fun _ toks covered -> TerminalSet.union toks covered ) (invert reductions) covered in (* That's it. *) covered (* ------------------------------------------------------------------------ *) (* Report statistics. *) (* Produce the reports. *) let () = if !shift_reduce = 1 then Error.grammar_warning [] "one state has shift/reduce conflicts." else if !shift_reduce > 1 then Error.grammar_warning [] "%d states have shift/reduce conflicts." !shift_reduce; if !reduce_reduce = 1 then Error.grammar_warning [] "one state has reduce/reduce conflicts." else if !reduce_reduce > 1 then Error.grammar_warning [] "%d states have reduce/reduce conflicts." !reduce_reduce (* There is a global check for errors at the end of [Invariant], so we do not need to check & stop here. *) (* ------------------------------------------------------------------------ *) (* When requested by the code generator, apply default conflict resolution to ensure that the automaton is deterministic. *) (* [best prod prods] chooses which production should be reduced among the list [prod :: prods]. It fails if no best choice exists. *) let rec best choice = function | [] -> choice | prod :: prods -> match Precedence.reduce_reduce choice prod with | Some choice -> best choice prods | None -> (* The cause for not knowing which production is best could be: 1- the productions originate in different source files; 2- they are derived, via inlining, from the same production. *) Error.signal (Production.positions choice @ Production.positions prod) "do not know how to resolve a reduce/reduce conflict\n\ between the following two productions:\n%s\n%s" (Production.print choice) (Production.print prod); choice (* dummy *) (* Go ahead. *) let default_conflict_resolution () = let shift_reduce = ref 0 and reduce_reduce = ref 0 in List.iter (fun node -> node.reductions <- TerminalMap.fold (fun tok prods reductions -> try let (_ : node) = SymbolMap.find (Symbol.T tok) node.transitions in (* There is a transition at this symbol, so this is a (possibly multiway) shift/reduce conflict. Resolve in favor of shifting by suppressing all reductions. *) shift_reduce := List.length prods + !shift_reduce; reductions with Not_found -> (* There is no transition at this symbol. Check whether we have multiple reductions. *) match prods with | [] -> assert false | [ _ ] -> TerminalMap.add tok prods reductions | prod :: ((_ :: _) as prods) -> (* We have a reduce/reduce conflict. Resolve, if possible, in favor of a single reduction. This reduction must be preferrable to each of the others. *) reduce_reduce := List.length prods + !reduce_reduce; TerminalMap.add tok [ best prod prods ] reductions ) node.reductions TerminalMap.empty ) conflict_nodes; if !shift_reduce = 1 then Error.warning [] "one shift/reduce conflict was arbitrarily resolved." else if !shift_reduce > 1 then Error.warning [] "%d shift/reduce conflicts were arbitrarily resolved." !shift_reduce; if !reduce_reduce = 1 then Error.warning [] "one reduce/reduce conflict was arbitrarily resolved." else if !reduce_reduce > 1 then Error.warning [] "%d reduce/reduce conflicts were arbitrarily resolved." !reduce_reduce; (* Now, ensure that states that have a reduce action at the pseudo-token "#" have no other action. *) let ambiguities = ref 0 in fold (fun () node -> try let prods, reductions = TerminalMap.lookup_and_remove Terminal.sharp node.reductions in let prod = Misc.single prods in (* This node has a reduce action at "#". Determine whether there exist other actions. If there exist any other actions, suppress this reduce action, and signal an ambiguity. We signal an ambiguity even in the case where all actions at this node call for reducing a single production. Indeed, in that case, even though we know that this production must be reduced, we do not know whether we should first discard the current token (and call the lexer). *) let has_ambiguity = ref false in let toks = ref TerminalSet.empty in TerminalMap.iter (fun tok _prods -> node.reductions <- reductions; has_ambiguity := true; toks := TerminalSet.add tok !toks ) reductions; SymbolMap.iter (fun symbol _ -> match symbol with | Symbol.N _ -> () | Symbol.T tok -> node.reductions <- reductions; has_ambiguity := true; toks := TerminalSet.add tok !toks ) node.transitions; if !has_ambiguity then begin incr ambiguities; if Settings.dump then begin Printf.fprintf (Lazy.force out) "State %d has an end-of-stream conflict. There is a tension between\n\ (1) %s\n\ without even requesting a lookahead token, and\n\ (2) checking whether the lookahead token is %s%s,\n\ which would require some other action.\n\n" (number node) (match Production.classify prod with | Some nt -> Printf.sprintf "accepting %s" (Nonterminal.print false nt) | None -> Printf.sprintf "reducing production %s" (Production.print prod)) (if TerminalSet.cardinal !toks > 1 then "one of " else "") (TerminalSet.print !toks) end end with Not_found -> () ) (); if !ambiguities = 1 then Error.grammar_warning [] "one state has an end-of-stream conflict." else if !ambiguities > 1 then Error.grammar_warning [] "%d states have an end-of-stream conflict." !ambiguities (* ------------------------------------------------------------------------ *) (* Extra reductions. 2015/10/19 *) (* If a state can reduce one production whose left-hand symbol has been marked [%on_error_reduce], and only one such production, then every error action in this state is replaced with a reduction action. This is done even though this state may have outgoing shift transitions: thus, we are forcing one interpretation of the past, among several possible interpretations. *) (* The above is the lax interpretation of the criterion. In a stricter interpretation, one could require the state to be able to reduce only one production, and furthermore require this production to be marked. In practice, the lax interpretation makes [%on_error_reduce] more powerful, and this extra power seems useful. *) (* The code below looks like the decision on a default reduction in [Invariant], except we do not impose the absence of outgoing terminal transitions. Also, we actually modify the automaton, so the back-ends, the reference interpreter, etc., need not be aware of this feature, whereas they are aware of default reductions. *) (* This code can run before we decide on the default reductions; this does not affect which default reductions will be permitted. *) (* A count of how many states receive extra reductions through this mechanism. *) let extra = ref 0 (* The set of nonterminal symbols in the left-hand side of an extra reduction. *) let extra_nts = ref StringSet.empty let lhs prod : string = Nonterminal.print false (Production.nt prod) let extra_reductions () = iter (fun node -> (* Just like a default reduction, an extra reduction should be forbidden (it seems) if [forbid_default_reduction] is set. *) if not node.forbid_default_reduction then begin (* Compute the productions which this node can reduce. *) let productions = invert (reductions node) in (* Keep only those whose left-hand symbol is marked [%on_error_reduce]. *) let productions = ProductionMap.filter (fun prod _ -> StringSet.mem (lhs prod) OnErrorReduce.declarations ) productions in (* Check if this only one such production remains. *) match ProductionMap.is_singleton productions with | None -> () | Some (prod, _) -> let acceptable = acceptable_tokens node in (* An extra reduction is possible. Replace every error action with a reduction of [prod]. If we replace at least one error action with a reduction, update [extra] and [extra_nts]. *) let triggered = lazy ( incr extra; extra_nts := StringSet.add (lhs prod) !extra_nts ) in Terminal.iter_real (fun tok -> if not (TerminalSet.mem tok acceptable) then begin node.reductions <- TerminalMap.add tok [ prod ] node.reductions; Lazy.force triggered end ) end ); (* Info message. *) if !extra > 0 then Error.logA 1 (fun f -> Printf.fprintf f "Extra reductions on error were added in %d states.\n" !extra ); (* Warn about useless %on_error_reduce declarations. *) StringSet.iter (fun nt -> if not (StringSet.mem nt !extra_nts) then Error.grammar_warning [] "the declaration %%on_error_reduce %s is never useful." nt ) OnErrorReduce.declarations (* ------------------------------------------------------------------------ *) (* Define [fold_entry], which in some cases facilitates the use of [entry]. *) let fold_entry f accu = ProductionMap.fold (fun prod state accu -> let nt : Nonterminal.t = match Production.classify prod with | Some nt -> nt | None -> assert false (* this is a start production *) in let t : Stretch.ocamltype = Nonterminal.ocamltype_of_start_symbol nt in f prod state nt t accu ) entry accu let entry_of_nt nt = (* Find the entry state that corresponds to [nt]. *) try ProductionMap.find (Production.startsymbol2startprod nt) entry with Not_found -> assert false exception Found of Nonterminal.t let nt_of_entry s = (* [s] should be an initial state. *) assert (incoming_symbol s = None); try ProductionMap.iter (fun prod entry -> if Node.compare s entry = 0 then match Production.classify prod with | None -> assert false | Some nt -> raise (Found nt) ) entry; (* This should not happen if [s] is indeed an initial state. *) assert false with Found nt -> nt menhir-20151112/src/back.mli0000644000175000017500000000012712621170073014361 0ustar mehdimehdi(* This module drives the back-end. No functionality is offered by this module. *) menhir-20151112/src/InfiniteArray.mli0000644000175000017500000000346612621170074016237 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (** This module implements infinite arrays. **) type 'a t (** [make x] creates an infinite array, where every slot contains [x]. **) val make: 'a -> 'a t (** [get a i] returns the element contained at offset [i] in the array [a]. Slots are numbered 0 and up. **) val get: 'a t -> int -> 'a (** [set a i x] sets the element contained at offset [i] in the array [a] to [x]. Slots are numbered 0 and up. **) val set: 'a t -> int -> 'a -> unit (** [extent a] is the length of an initial segment of the array [a] that is sufficiently large to contain all [set] operations ever performed. In other words, all elements beyond that segment have the default value. *) val extent: 'a t -> int (** [domain a] is a fresh copy of an initial segment of the array [a] whose length is [extent a]. *) val domain: 'a t -> 'a array menhir-20151112/src/DependencyGraph.ml0000644000175000017500000000231312621170073016347 0ustar mehdimehdiopen Grammar let print_dependency_graph() = (* Allocate. *) let forward : NonterminalSet.t NonterminalMap.t ref = ref NonterminalMap.empty in let successors nt = try NonterminalMap.find nt !forward with Not_found -> NonterminalSet.empty in (* Populate. *) Production.iter (fun prod -> let nt1 = Production.nt prod and rhs = Production.rhs prod in Array.iter (function | Symbol.T _ -> () | Symbol.N nt2 -> forward := NonterminalMap.add nt1 (NonterminalSet.add nt2 (successors nt1)) !forward ) rhs ); (* Print. *) let module P = Dot.Print (struct type vertex = Nonterminal.t let name nt = Printf.sprintf "nt%d" (Nonterminal.n2i nt) let successors (f : ?style:Dot.style -> label:string -> vertex -> unit) nt = NonterminalSet.iter (fun successor -> f ~label:"" successor ) (successors nt) let iter (f : ?shape:Dot.shape -> ?style:Dot.style -> label:string -> vertex -> unit) = Nonterminal.iter (fun nt -> f ~label:(Nonterminal.print false nt) nt ) end) in let f = open_out (Settings.base ^ ".dot") in P.print f; close_out f menhir-20151112/src/misc.ml0000644000175000017500000001615612621170073014254 0ustar mehdimehdi let ( $$ ) x f = f x let unSome = function None -> assert false | Some x -> x let o2s o f = match o with | None -> "" | Some x -> f x let single = function | [ x ] -> x | _ -> assert false let rec mapd f = function | [] -> [] | x :: xs -> let y1, y2 = f x in y1 :: y2 :: mapd f xs let tabulate n f = let a = Array.init n f in Array.get a let tabulateb n f = let a = Array.init n f in Array.get a, Array.fold_left (fun count element -> if element then count + 1 else count ) 0 a let tabulatef number fold n dummy f = let a = Array.make n dummy in let () = fold (fun () element -> a.(number element) <- f element ) () in let get element = a.(number element) in get let tabulateo number fold n f = let c = ref 0 in let get = tabulatef number fold n None (fun element -> let image = f element in begin match image with | Some _ -> incr c | None -> () end; image ) in get, !c module IntSet = Set.Make (struct type t = int let compare = ( - ) end) type 'a iter = ('a -> unit) -> unit let separated_iter_to_string printer separator iter = let b = Buffer.create 32 in let first = ref true in iter (fun x -> if !first then begin Buffer.add_string b (printer x); first := false end else begin Buffer.add_string b separator; Buffer.add_string b (printer x) end ); Buffer.contents b let separated_list_to_string printer separator xs = separated_iter_to_string printer separator (fun f -> List.iter f xs) let terminated_iter_to_string printer terminator iter = let b = Buffer.create 32 in iter (fun x -> Buffer.add_string b (printer x); Buffer.add_string b terminator ); Buffer.contents b let terminated_list_to_string printer terminator xs = terminated_iter_to_string printer terminator (fun f -> List.iter f xs) let index_map string_map = let n = StringMap.cardinal string_map in let a = Array.make n None in let conv, _ = StringMap.fold (fun k v (conv, idx) -> a.(idx) <- Some (k, v); StringMap.add k idx conv, idx + 1) string_map (StringMap.empty, 0) in ((fun n -> snd (unSome a.(n))), (fun k -> StringMap.find k conv), (fun n -> fst (unSome a.(n)))) let support_assoc l x = try List.assoc x l with Not_found -> x let index (strings : string list) : int * string array * int StringMap.t = let name = Array.of_list strings and n, map = List.fold_left (fun (n, map) s -> n+1, StringMap.add s n map ) (0, StringMap.empty) strings in n, name, map (* Turning an implicit list, stored using pointers through a hash table, into an explicit list. The head of the implicit list is not included in the explicit list. *) let materialize (table : ('a, 'a option) Hashtbl.t) (x : 'a) : 'a list = let rec loop x = match Hashtbl.find table x with | None -> [] | Some x -> x :: loop x in loop x (* [iteri] implements a [for] loop over integers, from 0 to [n-1]. *) let iteri n f = for i = 0 to n - 1 do f i done (* [foldi] implements a [for] loop over integers, from 0 to [n-1], with an accumulator. [foldij] implements a [for] loop over integers, from [start] to [n-1], with an accumulator. *) let foldij start n f accu = let rec loop i accu = if i = n then accu else loop (i+1) (f i accu) in loop start accu let foldi n f accu = foldij 0 n f accu (* [mapij start n f] produces the list [ f start; ... f (n-1) ]. *) let mapij start n f = List.rev ( foldij start n (fun i accu -> f i :: accu ) [] ) (* [mapi n f] produces the list [ f 0; ... f (n-1) ]. *) let mapi n f = mapij 0 n f (* [qfold f accu q] repeatedly takes an element [x] off the queue [q] and applies [f] to the accumulator and to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. We allocate an option to ensure that [qfold] is tail-recursive. *) let rec qfold f accu q = match try Some (Queue.take q) with Queue.Empty -> None with | Some x -> qfold f (f accu x) q | None -> accu (* [qiter f q] repeatedly takes an element [x] off the queue [q] and applies [f] to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. *) let qiter f q = try while true do f (Queue.take q) done with Queue.Empty -> () let rec smap f = function | [] -> [] | (x :: xs) as l -> let x' = f x and xs' = smap f xs in if x == x' && xs == xs' then l else x' :: xs' let rec smapa f accu = function | [] -> accu, [] | (x :: xs) as l -> let accu, x' = f accu x in let accu, xs' = smapa f accu xs in accu, if x == x' && xs == xs' then l else x' :: xs' let normalize s = let s = Bytes.of_string s in let n = Bytes.length s in for i = 0 to n - 1 do match Bytes.get s i with | '(' | ')' | ',' -> Bytes.set s i '_' | _ -> () done; Bytes.unsafe_to_string s (* [postincrement r] increments [r] and returns its original value. *) let postincrement r = let x = !r in r := x + 1; x (* [map_opt f l] returns the list of [y]s such that [f x = Some y] where [x] is in [l], preserving the order of elements of [l]. *) let map_opt f l = List.(rev (fold_left (fun ys x -> match f x with | None -> ys | Some y -> y :: ys ) [] l)) let new_intern capacity = (* Set up a a hash table, mapping strings to unique integers. *) let module H = Hashtbl.Make(struct type t = string let equal = (=) let hash = Hashtbl.hash end) in let table = H.create capacity in (* This counts the calls to [intern]. *) let c = ref 0 in (* A string is mapped to a unique string, as follows. *) let intern s = c := !c + 1; try H.find table s with Not_found -> H.add table s s; s and verbose () = Printf.fprintf stderr "%d calls to intern; %d unique strings.\n%!" !c (H.length table) in intern, verbose let new_encode_decode capacity = (* Set up a a hash table, mapping strings to unique integers. *) let module H = Hashtbl.Make(struct type t = string let equal = (=) let hash = Hashtbl.hash end) in let table = H.create capacity in (* Set up a resizable array, mapping integers to strings. *) let text = MenhirLib.InfiniteArray.make "" in (* This counts the calls to [encode]. *) let c = ref 0 in (* A string is mapped to a unique integer, as follows. *) let encode (s : string) : int = c := !c + 1; try H.find table s with Not_found -> (* The number of elements in the hash table is the next available unique integer code. *) let i = H.length table in H.add table s i; MenhirLib.InfiniteArray.set text i s; i (* An integer code can be mapped back to a string, as follows. *) and decode (i : int) : string = MenhirLib.InfiniteArray.get text i and verbose () = Printf.fprintf stderr "%d calls to intern; %d unique strings.\n%!" !c (H.length table) in encode, decode, verbose menhir-20151112/src/mark.mli0000644000175000017500000000076312621170073014421 0ustar mehdimehdi(** This module implements a very simple notion of ``mark''. *) (** The type of marks. *) type t (** [fresh()] generates a fresh mark, that is, a mark that is guaranteed to be distinct from all existing marks. *) val fresh: unit -> t (** [same mark1 mark2] tells whether [mark1] and [mark2] are the same mark, that is, were created by the same call to [fresh]. *) val same: t -> t -> bool (** [none] is a distinguished mark, created via an initial call to [fresh()]. *) val none: t menhir-20151112/src/front.ml0000644000175000017500000001245612621170073014450 0ustar mehdimehdi(* The front-end. This module performs a series of toplevel side effects. *) (* ------------------------------------------------------------------------- *) (* Reading a grammar from a file. *) let load_partial_grammar filename = let validExt = if Settings.coq then ".vy" else ".mly" in if not (Filename.check_suffix filename validExt) then Error.error [] "argument file names should end in %s. \"%s\" is not accepted." validExt filename; Error.set_filename filename; try let contents = IO.read_whole_file filename in Error.file_contents := Some contents; let open Lexing in let lexbuf = Lexing.from_string contents in lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = filename }; let grammar = { (Driver.grammar Lexer.main lexbuf) with ConcreteSyntax.pg_filename = filename } in Error.file_contents := None; grammar with Sys_error msg -> Error.error [] "%s" msg (* ------------------------------------------------------------------------- *) (* Read all of the grammar files that are named on the command line. *) let partial_grammars = List.map load_partial_grammar Settings.filenames let () = Time.tick "Lexing and parsing" (* ------------------------------------------------------------------------- *) (* If several grammar files were specified, merge them. *) let parameterized_grammar = PartialGrammar.join_partial_grammars partial_grammars (* ------------------------------------------------------------------------- *) (* Expand away all applications of parameterized nonterminal symbols, so as to obtain a grammar without parameterized nonterminal symbols. *) let grammar = ParameterizedGrammar.expand parameterized_grammar let () = Time.tick "Joining and expanding" (* ------------------------------------------------------------------------- *) (* If [--only-tokens] was specified on the command line, produce the definition of the [token] type and stop. *) let () = TokenType.produce_tokentypes grammar (* ------------------------------------------------------------------------- *) (* Perform reachability analysis. *) let grammar = Reachability.trim grammar let () = Time.tick "Trimming" (* ------------------------------------------------------------------------- *) (* If [--depend] or [--raw-depend] was specified on the command line, perform dependency analysis and stop. *) let () = match Settings.depend with | Settings.OMRaw | Settings.OMPostprocess -> Infer.depend grammar (* never returns *) | Settings.OMNone -> () (* The purpose of [--depend] and [--raw-depend] is to support [--infer]. Indeed, [--infer] is implemented by producing a mock [.ml] file (which contains just the semantic actions) and invoking [ocamlc]. This requires certain [.cmi] files to exist. So, [--(raw-)depend] is a way for us to announce which [.cmi] files we need. It is implemented by producing the mock [.ml] file and running [ocamldep] on it. We also produce a mock [.mli] file, even though in principle it should be unnecessary -- see comment in [nonterminalType.mli]. *) (* ------------------------------------------------------------------------- *) (* If some flags imply that we will NOT produce an OCaml parser, then there is no need to perform type inference, so we act as if --infer was absent. This saves time and dependency nightmares. *) let skipping_parser_generation = Settings.coq || Settings.compile_errors <> None || Settings.interpret_error || Settings.list_errors || Settings.compare_errors <> None || Settings.update_errors <> None || Settings.echo_errors <> None || false (* maybe also: [preprocess_mode <> PMNormal] *) (* ------------------------------------------------------------------------- *) (* If [--infer] was specified on the command line, perform type inference. The OCaml type of every nonterminal is then known. *) let grammar = if Settings.infer && not skipping_parser_generation then let grammar = Infer.infer grammar in Time.tick "Inferring types for nonterminals"; grammar else grammar (* ------------------------------------------------------------------------- *) (* Expand away some of the position keywords. *) let grammar = KeywordExpansion.expand_grammar grammar (* ------------------------------------------------------------------------- *) (* If [--no-inline] was specified on the command line, skip the inlining of non terminal definitions marked with %inline. *) let grammar = if Settings.inline then begin let grammar, inlined = NonTerminalDefinitionInlining.inline grammar in if not Settings.infer && inlined && not skipping_parser_generation then Error.warning [] "you are using the standard library and/or the %%inline keyword. We\n\ recommend switching on --infer in order to avoid obscure type error messages."; Time.tick "Inlining"; grammar end else grammar (* ------------------------------------------------------------------------- *) (* If [--only-preprocess] or [--only-preprocess-drop] was specified on the command line, print the grammar and stop. Otherwise, continue. *) let () = match Settings.preprocess_mode with | Settings.PMOnlyPreprocess mode -> UnparameterizedPrinter.print mode stdout grammar; exit 0 | Settings.PMNormal -> () menhir-20151112/src/settings.mli0000644000175000017500000001400612621170073015322 0ustar mehdimehdi(* This module parses the command line. *) (* The list of file names that appear on the command line. *) val filenames: string list (* How to deal with the type of tokens. *) type token_type_mode = | TokenTypeAndCode (* produce the definition of the [token] type and code for the parser *) | TokenTypeOnly (* produce the type definition only *) | CodeOnly of string (* produce the code only, by relying on an external token type *) val token_type_mode: token_type_mode (* How to construct the automaton. *) type construction_mode = | ModeCanonical (* --canonical: canonical Knuth LR(1) automaton *) | ModeInclusionOnly (* --no-pager : states are merged when there is an inclusion relationship, default reductions are used *) | ModePager (* normal mode: states are merged as per Pager's criterion, default reductions are used *) | ModeLALR (* --lalr : states are merged as in an LALR generator, i.e. as soon as they have the same LR(0) core *) val construction_mode: construction_mode (* Whether conflicts should be explained. *) val explain: bool (* Whether the automaton should be dumped. *) val dump: bool (* Whether the automaton's construction should be explained (very verbose). *) val follow: bool (* Whether the grammar's dependence graph should be dumped. *) val graph: bool (* Whether tracing instructions should be generated. *) val trace: bool (* Whether one should stop and print the grammar after joining and expanding the grammar. *) type print_mode = | PrintNormal | PrintUnitActions | PrintUnitActionsUnitTokens type preprocess_mode = | PMNormal (* preprocess and continue *) | PMOnlyPreprocess of print_mode (* preprocess, print grammar, stop *) val preprocess_mode: preprocess_mode (* Whether one should invoke ocamlc in order to infer types for all nonterminals. *) val infer: bool (* Whether one should inline the non terminal definitions marked with the %inline keyword. *) val inline: bool (* Whether and how one should invoke ocamldep in order to compute and display dependencies. *) type ocamldep_mode = | OMNone (* do not invoke ocamldep *) | OMRaw (* invoke ocamldep and echo its raw output *) | OMPostprocess (* invoke ocamldep and postprocess its output *) val depend: ocamldep_mode (* Whether comments should be printed or discarded. *) val comment: bool (* This undocumented flag suppresses prefixing of identifiers with an unlikely prefix in the generated code. This increases the code's readability, but can cause identifiers in semantic actions to be captured. *) val noprefix: bool (* This undocumented flag causes the code to be transformed by [Inline]. It is on by default. *) val code_inlining: bool (* How [ocamlc] and [ocamldep] should be invoked. *) val ocamlc: string val ocamldep: string (* How verbose we should be. *) val logG: int (* diagnostics on the grammar *) val logA: int (* diagnostics on the automaton *) val logC: int (* diagnostics on the generated code *) (* Whether tasks should be timed. *) val timings: bool (* The base name that should be used for the files that we create. This name can contain a path. *) val base: string (* The filename of the standard library. *) val stdlib_filename : string (* Whether Menhir should behave as an interpreter. *) val interpret : bool (* Whether the interpreter should build and display concrete syntax trees. *) val interpret_show_cst : bool (* Whether Menhir should behave as an interpreter, in a special mode where it checks one input sentence, expecting it to trigger an error at the last token, and displays which state was reached. *) val interpret_error : bool (* Whether to use the table-based back-end ([true]) or the code-based back-end ([false]). *) val table : bool (* Whether to generate the inspection API (which requires GADTs, and requires producing more tables). *) val inspection : bool (* Whether to generate a coq description of the grammar and automaton. *) val coq : bool (* Whether the coq description must contain completeness proofs. *) val coq_no_complete : bool (* Whether the coq backend should ignore types and semantic actions. *) val coq_no_actions : bool (* Whether unresolved LR(1) conflicts, useless precedence declarations, productions that are never reduced, etc. should be treated as errors. *) val strict: bool (* This flag causes the exception [Error] should be declared equal to [Parsing.Parse_error]. This is useful when full compatibility with ocamlyacc is desired. In particular, this is used when building Menhir itself, since Menhir is compiled first using ocamlyacc, then using Menhir. *) val fixedexc: bool (* This is a set of tokens which may be unused and about which we should not emit a warning. *) val ignored_unused_tokens: StringSet.t (* This flag supersedes the set [ignored_unused_tokens]. If it is set, then we should not emit a warning about any unused tokens. *) val ignore_all_unused_tokens: bool (* This flag causes Menhir to produce a list of erroneous input sentences. Enough sentences are computed to produce exactly one error in every state where an error can occur. *) val list_errors: bool (* This flag causes Menhir to read the error message descriptions stored in [filename] and compile them to OCaml code. *) val compile_errors: string option (* If present, this is a pair of .messages files whose contents should be compared. *) val compare_errors: (string * string) option (* This flag causes Menhir to read the error message descriptions stored in [filename] and re-generate the auto-generated comments, which begin with [##]. This allows bringing these comments up to date when the grammar evolves. *) val update_errors: string option (* This flag causes Menhir to read the error message descriptions stored in [filename] and echo the error sentences (and nothing else; no messages, no comments). *) val echo_errors: string option menhir-20151112/src/partialGrammar.ml0000644000175000017500000005440512621170073016263 0ustar mehdimehdiopen Misc open Syntax open ConcreteSyntax open InternalSyntax open Positions (* ------------------------------------------------------------------------- *) (* This adds one declaration [decl], as found in file [filename], to the grammar [grammar]. *) let join_declaration filename (grammar : grammar) decl = match decl.value with (* Preludes are stored in an arbitrary order. The order of preludes within a single source file is preserved. Same treatment for functor parameters. *) | DCode code -> { grammar with p_preludes = grammar.p_preludes @ [ code ] } | DParameter (Stretch.Declared stretch) -> { grammar with p_parameters = grammar.p_parameters @ [ stretch ] } | DParameter (Stretch.Inferred _) -> assert false (* Token declarations are recorded. Things are made somewhat difficult by the fact that %token and %left-%right-%nonassoc declarations are independent. *) | DToken (ocamltype, terminal) -> let token_property = try (* Retrieve any previous definition for this token. *) let token_property = StringMap.find terminal grammar.p_tokens in (* If the previous definition was actually a %token declaration (as opposed to a %left, %right, or %nonassoc specification), signal an error. *) if token_property.tk_is_declared then Error.errorp decl "the token %s has multiple definitions." terminal (* Otherwise, update the previous definition. *) else { token_property with tk_is_declared = true; tk_ocamltype = ocamltype; tk_filename = filename; tk_position = decl.position; } with Not_found -> (* If no previous definition exists, create one. *) { tk_filename = filename; tk_ocamltype = ocamltype; tk_associativity = UndefinedAssoc; tk_precedence = UndefinedPrecedence; tk_position = decl.position; tk_is_declared = true } in { grammar with p_tokens = StringMap.add terminal token_property grammar.p_tokens } (* Start symbols. *) | DStart nonterminal -> { grammar with p_start_symbols = StringMap.add nonterminal decl.position grammar.p_start_symbols } (* Type declarations for nonterminals. *) | DType (ocamltype, nonterminal) -> { grammar with p_types = (nonterminal, with_pos (position decl) ocamltype)::grammar.p_types } (* Reductions on error for nonterminals. *) | DOnErrorReduce (nonterminal) -> { grammar with p_on_error_reduce = nonterminal :: grammar.p_on_error_reduce } (* Token associativity and precedence. *) | DTokenProperties (terminal, assoc, prec) -> (* Retrieve the property record for this token, creating one if none existed (but without deeming the token to have been declared). *) let token_properties, grammar = try StringMap.find terminal grammar.p_tokens, grammar with Not_found -> let p = { tk_filename = filename; tk_ocamltype = None; tk_associativity = UndefinedAssoc; tk_precedence = prec; tk_is_declared = false; (* Will be updated later. *) tk_position = decl.position; } in p, { grammar with p_tokens = StringMap.add terminal p grammar.p_tokens } in (* Reject duplicate precedence declarations. *) if token_properties.tk_associativity <> UndefinedAssoc then Error.error [ decl.position; token_properties.tk_position ] "there are multiple precedence declarations for token %s." terminal; (* Record the new declaration. *) token_properties.tk_precedence <- prec; token_properties.tk_associativity <- assoc; grammar (* ------------------------------------------------------------------------- *) (* This stores an optional trailer into a grammar. Trailers are stored in an arbitrary order. *) let join_trailer trailer grammar = match trailer with | None -> grammar | Some trailer -> { grammar with p_postludes = trailer :: grammar.p_postludes } (* ------------------------------------------------------------------------- *) (* We rewrite definitions when nonterminals are renamed. The renaming [phi] is an association list of names to names. *) type renaming = (nonterminal * nonterminal) list let identity_renaming = [] let rewrite_nonterminal (phi : renaming) nonterminal = Misc.support_assoc phi nonterminal let rewrite_parameter phi parameter = Parameters.map (Positions.map (Misc.support_assoc phi)) parameter let rewrite_element phi (ido, parameter) = ido, rewrite_parameter phi parameter let rewrite_branch phi ({ pr_producers = producers } as branch) = { branch with pr_producers = List.map (rewrite_element phi) producers } let rewrite_branches phi branches = match phi with | [] -> branches | _ -> List.map (rewrite_branch phi) branches let fresh_counter = ref 0 let names = ref StringSet.empty let use_name name = names := StringSet.add name !names let used_name name = StringSet.mem name !names let rec fresh ?(hint = "v") () = let name = incr fresh_counter; hint ^ string_of_int !fresh_counter in if used_name name then fresh ~hint () else ( use_name name; name ) (* Alpha conversion of [prule]. We rename bound parameters using fresh names. *) let alphaconvert_rule parameters prule = let phi = List.combine parameters (List.map (fun x -> fresh ~hint:x ()) parameters) in { prule with pr_parameters = List.map (Misc.support_assoc phi) prule.pr_parameters; pr_branches = rewrite_branches phi prule.pr_branches } (* Rewrite a rule taking bounded names into account. We rename parameters to avoid capture. *) let rewrite_rule phi prule = let ids = List.fold_left (fun acu (f, d) -> StringSet.add f (StringSet.add d acu)) StringSet.empty phi in let captured_parameters = List.filter (fun p -> StringSet.mem p ids) prule.pr_parameters in let prule = alphaconvert_rule captured_parameters prule in { prule with pr_nt = rewrite_nonterminal phi prule.pr_nt; pr_branches = rewrite_branches phi prule.pr_branches } let rewrite_rules phi rules = List.map (rewrite_rule phi) rules let rewrite_grammar phi grammar = (* We assume that [phi] affects only private symbols, so it does not affect the start symbols. *) if phi = identity_renaming then grammar else { grammar with pg_rules = rewrite_rules phi grammar.pg_rules } (* ------------------------------------------------------------------------- *) (* To rename (internalize) a nonterminal, we prefix it with its filename. This guarantees that names are unique. *) let is_valid_nonterminal_character = function | 'A' .. 'Z' | 'a' .. 'z' | '_' | '\192' .. '\214' | '\216' .. '\246' | '\248' .. '\255' | '0' .. '9' -> true | _ -> false let restrict filename = let m = Bytes.of_string (Filename.chop_suffix filename (if Settings.coq then ".vy" else ".mly")) in for i = 0 to Bytes.length m - 1 do if not (is_valid_nonterminal_character (Bytes.get m i)) then Bytes.set m i '_' done; Bytes.unsafe_to_string m let rename nonterminal filename = let name = restrict filename ^ "_" ^ nonterminal in if used_name name then fresh ~hint:name () else (use_name name; name) (* ------------------------------------------------------------------------- *) (* A nonterminal is considered public if it is declared using %public or %start. *) (* TEMPORARY why unused? let is_public grammar prule = prule.pr_public_flag || StringMap.mem prule.pr_nt grammar.p_start_symbols *) (* ------------------------------------------------------------------------- *) type symbol_kind = (* The nonterminal is declared public at a particular position. *) | PublicNonTerminal of Positions.t (* The nonterminal is not declared public at a particular position. *) | PrivateNonTerminal of Positions.t (* The symbol is a token. *) | Token of token_properties (* We do not know yet what the symbol means. This is defined in the sequel or it is free in the partial grammar. *) | DontKnow of Positions.t type symbol_table = (symbol, symbol_kind) Hashtbl.t let find_symbol (symbols : symbol_table) symbol = Hashtbl.find symbols symbol let add_in_symbol_table (symbols : symbol_table) symbol kind = use_name symbol; Hashtbl.add symbols symbol kind; symbols let replace_in_symbol_table (symbols : symbol_table) symbol kind = Hashtbl.replace symbols symbol kind; symbols let empty_symbol_table () : symbol_table = Hashtbl.create 13 let store_symbol (symbols : symbol_table) symbol kind = try let sym_info = find_symbol symbols symbol in match sym_info, kind with (* There are two definitions of the same symbol in one particular unit. This is forbidden. *) | (PublicNonTerminal p | PrivateNonTerminal p), (PublicNonTerminal p' | PrivateNonTerminal p') -> Error.error [ p; p'] "the nonterminal symbol %s is multiply defined." symbol (* The symbol is known to be a token but declared as a non terminal.*) | (Token tkp, (PrivateNonTerminal p | PublicNonTerminal p)) | ((PrivateNonTerminal p | PublicNonTerminal p), Token tkp) -> Error.error [ p; tkp.tk_position ] "the identifier %s is a reference to a token." symbol (* We do not gain any piece of information. *) | _, DontKnow _ | Token _, Token _ -> symbols (* We learn that the symbol is a non terminal or a token. *) | DontKnow _, _ -> replace_in_symbol_table symbols symbol kind with Not_found -> add_in_symbol_table symbols symbol kind let store_used_symbol position tokens symbols symbol = try store_symbol symbols symbol (Token (StringMap.find symbol tokens)) with Not_found -> store_symbol symbols symbol (DontKnow position) let non_terminal_is_not_reserved symbol positions = if symbol = "error" then Error.error positions "%s is reserved and thus cannot be used \ as a non-terminal symbol." symbol let non_terminal_is_not_a_token tokens symbol positions = try let tkp = StringMap.find symbol tokens in Error.error (positions @ [ tkp.tk_position ]) "the identifier %s is a reference to a token." symbol with Not_found -> () let store_public_nonterminal tokens symbols symbol positions = non_terminal_is_not_reserved symbol positions; non_terminal_is_not_a_token tokens symbol positions; store_symbol symbols symbol (PublicNonTerminal (List.hd positions)) let store_private_nonterminal tokens symbols symbol positions = non_terminal_is_not_reserved symbol positions; non_terminal_is_not_a_token tokens symbol positions; store_symbol symbols symbol (PrivateNonTerminal (List.hd positions)) (* for debugging, presumably: let string_of_kind = function | PublicNonTerminal p -> Printf.sprintf "public (%s)" (Positions.string_of_pos p) | PrivateNonTerminal p -> Printf.sprintf "private (%s)" (Positions.string_of_pos p) | Token tk -> Printf.sprintf "token (%s)" tk.tk_filename | DontKnow p -> Printf.sprintf "only used at (%s)" (Positions.string_of_pos p) let string_of_symbol_table t = let b = Buffer.create 13 in let m = 1 + Hashtbl.fold (fun k v acu -> max (String.length k) acu) t 0 in let fill_blank s = let s' = String.make m ' ' in String.blit s 0 s' 0 (String.length s); s' in Hashtbl.iter (fun k v -> Buffer.add_string b (Printf.sprintf "%s: %s\n" (fill_blank k) (string_of_kind v))) t; Buffer.contents b *) let is_private_symbol t x = try match Hashtbl.find t x with | PrivateNonTerminal _ -> true | _ -> false with Not_found -> false (* TEMPORARY why unused? let is_public_symbol t x = try match Hashtbl.find t x with | PublicNonTerminal _ -> true | _ -> false with Not_found -> false *) let fold_on_private_symbols f init t = Hashtbl.fold (fun k -> function PrivateNonTerminal _ -> (fun acu -> f acu k) | _ -> (fun acu -> acu)) t init let fold_on_public_symbols f init t = Hashtbl.fold (fun k -> function PublicNonTerminal _ -> (fun acu -> f acu k) | _ -> (fun acu -> acu)) t init let iter_on_only_used_symbols f t = Hashtbl.iter (fun k -> function DontKnow pos -> f k pos | _ -> ()) t let symbols_of grammar (pgrammar : ConcreteSyntax.grammar) = let tokens = grammar.p_tokens in let symbols_of_rule symbols prule = let rec store_except_rule_parameters = fun symbols (symbol, parameters) -> (* Rule parameters are bound locally, so they are not taken into account. *) if List.mem symbol.value prule.pr_parameters then symbols else (* Otherwise, mark this symbol as being used and analyse its parameters. *) List.fold_left (fun symbols -> function | ParameterApp (symbol, parameters) -> store_except_rule_parameters symbols (symbol, parameters) | ParameterVar symbol -> store_except_rule_parameters symbols (symbol, []) ) (store_used_symbol symbol.position tokens symbols symbol.value) parameters in (* Analyse each branch. *) let symbols = List.fold_left (fun symbols branch -> List.fold_left (fun symbols (_, p) -> let symbol, parameters = Parameters.unapp p in store_except_rule_parameters symbols (symbol, parameters) ) symbols branch.pr_producers ) symbols prule.pr_branches in (* Store the symbol declaration. *) if prule.pr_public_flag || StringMap.mem prule.pr_nt grammar.p_start_symbols then store_public_nonterminal tokens symbols prule.pr_nt prule.pr_positions else store_private_nonterminal tokens symbols prule.pr_nt prule.pr_positions in List.fold_left symbols_of_rule (empty_symbol_table ()) pgrammar.pg_rules let merge_rules symbols pgs = (* Retrieve all the public symbols. *) let public_symbols = List.fold_left (fold_on_public_symbols (fun s k -> StringSet.add k s)) (StringSet.singleton "error") symbols in (* We check the references in each grammar can be bound to a public symbol. *) let _ = List.iter (iter_on_only_used_symbols (fun k pos -> if not (StringSet.mem k public_symbols) then Error.error [ pos ] "%s is undefined." k)) symbols in (* Detect private symbol clashes and rename them if necessary. *) let detect_private_symbol_clashes = fold_on_private_symbols (fun (defined, clashes) symbol -> if StringSet.mem symbol defined || StringSet.mem symbol public_symbols then (defined, StringSet.add symbol clashes) else (StringSet.add symbol defined, clashes)) in let _private_symbols, clashes = List.fold_left detect_private_symbol_clashes (StringSet.empty, StringSet.empty) symbols in let rpgs = List.map (fun (symbol_table, pg) -> let renaming = StringSet.fold (fun x phi -> if is_private_symbol symbol_table x then begin let x' = rename x pg.pg_filename in Printf.fprintf stderr "Note: the nonterminal symbol %s (from %s) is renamed %s.\n" x pg.pg_filename x'; (x, x') :: phi end else phi) clashes [] in rewrite_grammar renaming pg) pgs in (* Merge public nonterminal definitions and copy private nonterminal definitions. Since the clash between private symbols have already been resolved, these copies are safe. *) List.fold_left (fun rules rpg -> List.fold_left (fun rules r -> let r = try let r' = StringMap.find r.pr_nt rules in let positions = r.pr_positions @ r'.pr_positions in let ra, ra' = List.length r.pr_parameters, List.length r'.pr_parameters in (* The arity of the parameterized symbols must be constant.*) if ra <> ra' then Error.error positions "the symbol %s is defined with arities %d and %d." r.pr_nt ra ra' else if r.pr_inline_flag <> r'.pr_inline_flag then Error.error positions "not all definitions of %s are marked %%inline." r.pr_nt else (* We combine the different branches. The parameters could have different names, we rename them with the fresh names assigned earlier (see the next comment). *) let phi = List.combine r.pr_parameters r'.pr_parameters in let rbr = rewrite_branches phi r.pr_branches in { r' with pr_positions = positions; pr_branches = rbr @ r'.pr_branches } with Not_found -> (* We alphaconvert the rule in order to avoid the capture of private symbols coming from another unit. *) alphaconvert_rule r.pr_parameters r in StringMap.add r.pr_nt r rules) rules rpg.pg_rules) StringMap.empty rpgs let empty_grammar = { p_preludes = []; p_postludes = []; p_parameters = []; p_start_symbols = StringMap.empty; p_types = []; p_tokens = StringMap.empty; p_rules = StringMap.empty; p_on_error_reduce = []; } let join grammar pgrammar = let filename = pgrammar.pg_filename in List.fold_left (join_declaration filename) grammar pgrammar.pg_declarations $$ join_trailer pgrammar.pg_trailer let check_parameterized_grammar_is_well_defined grammar = (* Every start symbol is defined and has a %type declaration. *) StringMap.iter (fun nonterminal p -> if not (StringMap.mem nonterminal grammar.p_rules) then Error.error [p] "the start symbol %s is undefined." nonterminal; if not (List.exists (function | ParameterVar { value = id }, _ -> id = nonterminal | _ -> false) grammar.p_types) then Error.error [p] "the type of the start symbol %s is unspecified." nonterminal; ) grammar.p_start_symbols; let parameter_head_symb = function | ParameterVar id -> id | ParameterApp (id, _) -> id in (* Every %type definition has, at its head, a nonterminal symbol. *) (* Same check for %on_error_reduce definitions. *) (* Apparently we do not check the parameters at this point. Maybe this is done later, or not at all. *) let check (kind : string) (ps : Syntax.parameter list) = List.iter (fun p -> let head_symb = parameter_head_symb p in if not (StringMap.mem (value head_symb) grammar.p_rules) then Error.error [Parameters.position p] "this should be a nonterminal symbol.\n\ %s declarations are applicable only to nonterminal symbols." kind ) ps in check "%type" (List.map fst grammar.p_types); check "%on_error_reduce" grammar.p_on_error_reduce; (* Every reference to a symbol is well defined. *) let reserved = [ "error" ] in let used_tokens = ref StringSet.empty in let mark_token_as_used token = used_tokens := StringSet.add token !used_tokens in let check_identifier_reference grammar prule s p = (* Mark the symbol as a used token if this is a token. *) if StringMap.mem s grammar.p_tokens then mark_token_as_used s; if not (StringMap.mem s grammar.p_rules || StringMap.mem s grammar.p_tokens || List.mem s prule.pr_parameters || List.mem s reserved) then Error.error [ p ] "%s is undefined." s in StringMap.iter (fun k prule -> List.iter (* Check each branch. *) (fun { pr_producers = producers; pr_branch_prec_annotation; } -> ignore (List.fold_left (* Check the producers. *) (fun already_seen (id, p) -> let symbol, parameters = Parameters.unapp p in let s = symbol.value and p = symbol.position in let already_seen = (* Check the producer id is unique. *) if StringSet.mem id.value already_seen then Error.error [ id.position ] "there are multiple producers named %s in this sequence." id.value; StringSet.add id.value already_seen in (* Check that the producer is defined somewhere. *) check_identifier_reference grammar prule s p; StringMap.iter (check_identifier_reference grammar prule) (List.fold_left Parameters.identifiers StringMap.empty parameters); (* If this producer seems to be a reference to a token, make sure it is a real token, as opposed to a pseudo-token introduced in a priority declaration. *) (try if not ((StringMap.find s grammar.p_tokens).tk_is_declared || List.mem s reserved) then Error.errorp symbol "%s has not been declared as a token." s with Not_found -> ()); already_seen ) StringSet.empty producers); match pr_branch_prec_annotation with | None -> () | Some terminal -> check_identifier_reference grammar prule terminal.value terminal.position; (* It is forbidden to use the %prec directive with %inline. *) if prule.pr_inline_flag then Error.errorp terminal "use of %%prec is forbidden in an %%inlined nonterminal definition."; (* Furthermore, the symbol following %prec must be a valid token identifier. *) if not (StringMap.mem terminal.value grammar.p_tokens) then Error.errorp terminal "%s is undefined." terminal.value) prule.pr_branches; (* It is forbidden to use %inline on a %start symbol. *) if (prule.pr_inline_flag && StringMap.mem k grammar.p_start_symbols) then Error.error prule.pr_positions "%s cannot be both a start symbol and inlined." k; ) grammar.p_rules; (* Check that every token is used. *) if not Settings.ignore_all_unused_tokens then begin match Settings.token_type_mode with | Settings.TokenTypeOnly -> () | Settings.TokenTypeAndCode | Settings.CodeOnly _ -> StringMap.iter (fun token { tk_position = p } -> if not (StringSet.mem token !used_tokens || StringSet.mem token Settings.ignored_unused_tokens) then Error.warning [p] "the token %s is unused." token ) grammar.p_tokens end; grammar let join_partial_grammars pgs = let grammar = List.fold_left join empty_grammar pgs in let symbols = List.map (symbols_of grammar) pgs in let tpgs = List.combine symbols pgs in let rules = merge_rules symbols tpgs in check_parameterized_grammar_is_well_defined { grammar with p_rules = rules } menhir-20151112/src/nonterminalType.ml0000644000175000017500000000514712621170073016507 0ustar mehdimehdiopen UnparameterizedSyntax open IL (* This is the conventional name of the nonterminal GADT, which describes the nonterminal symbols. *) let tcnonterminalgadt = "nonterminal" let tnonterminalgadt a = TypApp (tcnonterminalgadt, [ a ]) (* This is the conventional name of the data constructors of the nonterminal GADT. *) let tnonterminalgadtdata nt = "N_" ^ Misc.normalize nt (* This is the definition of the nonterminal GADT. Here, the data constructors have no value argument, but have a type index. *) exception MissingOCamlType of string let nonterminalgadtdef grammar = assert Settings.inspection; let comment, datadefs = try (* The ordering of this list matters. We want the data constructors to respect the internal ordering (as determined by [nonterminals] in [UnparameterizedSyntax]) of the nonterminal symbols. This may be exploited in the table back-end to allow an unsafe conversion of a data constructor to an integer code. See [n2i] in [InspectionTableInterpreter]. *) "The indexed type of nonterminal symbols.", List.map (fun nt -> let index = match ocamltype_of_symbol grammar nt with | Some t -> TypTextual t | None -> raise (MissingOCamlType nt) in { dataname = tnonterminalgadtdata nt; datavalparams = []; datatypeparams = Some [ index ] } ) (nonterminals grammar) with MissingOCamlType nt -> (* If the type of some nonterminal symbol is unknown, give up and define ['a nonterminal] as an abstract type. This is useful when we are in [--(raw)-depend] mode and we do not wish to fail. Instead, we produce a mock [.mli] file that is an approximation of the real [.mli] file. When we are not in [--(raw)-depend] mode, though, this is a problem. We display an error message and stop. *) match Settings.depend with | Settings.OMRaw | Settings.OMPostprocess -> "The indexed type of nonterminal symbols (mock!).", [] | Settings.OMNone -> Error.error [] "\ the type of the nonterminal symbol %s is unknown.\n\ When --inspection is set, the type of every nonterminal symbol must be known.\n\ Please use --infer or specify the type of every symbol via %%type declarations." nt in [ IIComment comment; IITypeDecls [{ typename = tcnonterminalgadt; typeparams = [ "_" ]; typerhs = TDefSum datadefs; typeconstraint = None }] ] menhir-20151112/src/General.mli0000644000175000017500000000444212621170074015043 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This module offers general-purpose functions on lists and streams. *) (* --------------------------------------------------------------------------- *) (* Lists. *) (* [take n xs] returns the [n] first elements of the list [xs]. It is acceptable for the list [xs] to have length less than [n], in which case [xs] itself is returned. *) val take: int -> 'a list -> 'a list (* [drop n xs] returns the list [xs], deprived of its [n] first elements. It is acceptable for the list [xs] to have length less than [n], in which case an empty list is returned. *) val drop: int -> 'a list -> 'a list (* [uniq cmp xs] assumes that the list [xs] is sorted according to the ordering [cmp] and returns the list [xs] deprived of any duplicate elements. *) val uniq: ('a -> 'a -> int) -> 'a list -> 'a list (* [weed cmp xs] returns the list [xs] deprived of any duplicate elements. *) val weed: ('a -> 'a -> int) -> 'a list -> 'a list (* --------------------------------------------------------------------------- *) (* A stream is a list whose elements are produced on demand. *) type 'a stream = 'a head Lazy.t and 'a head = | Nil | Cons of 'a * 'a stream (* The length of a stream. *) val length: 'a stream -> int (* Folding over a stream. *) val foldr: ('a -> 'b -> 'b) -> 'a stream -> 'b -> 'b menhir-20151112/src/lineCount.mll0000644000175000017500000000042412621170073015424 0ustar mehdimehdi(* This simple function counts the number of newline characters in a string. *) let newline = ('\010' | '\013' | "\013\010") let ordinary = [^ '\010' '\013']+ rule count n = parse | eof { n } | newline { count (n + 1) lexbuf } | ordinary { count n lexbuf } menhir-20151112/src/parameterizedGrammar.mli0000644000175000017500000000125412621170073017626 0ustar mehdimehdi(* This turns a grammar where nonterminal symbols can be parameterized into a grammar where nonterminal symbols are not parameterized. The transformation is a textual expansion process, whose termination is guaranteed by a simple type system. Expansion creates new nonterminal symbols whose names contain parentheses and commas. These names can be printed directly in informational messages (error messages, conflict reports, descriptions of the automaton, etc.). However, they must be sanitized via [Misc.normalize] when printed in a context where a valid identifier is expected. *) val expand : InternalSyntax.grammar -> UnparameterizedSyntax.grammar menhir-20151112/src/option.mli0000644000175000017500000000033412621170073014771 0ustar mehdimehdival map: ('a -> 'b) -> 'a option -> 'b option val iter: ('a -> unit) -> 'a option -> unit val fold: ('a -> 'b -> 'b) -> 'a option -> 'b -> 'b val project: 'a option -> 'a (* careful: calls [exit 1] in case of failure *) menhir-20151112/src/stringMap.ml0000644000175000017500000000056312621170073015260 0ustar mehdimehdiinclude Map.Make (String) let cardinal s = fold (fun _ _ x -> x + 1) s 0 let filter pred map = fold (fun key value map -> if pred key value then add key value map else map) map empty let restrict domain map = filter (fun k _ -> StringSet.mem k domain) map let domain map = fold (fun key _ acu -> StringSet.add key acu) map StringSet.empty menhir-20151112/src/cst.mli0000644000175000017500000000152112621170073014251 0ustar mehdimehdiopen Grammar (* Concrete syntax trees. *) (* A concrete syntax tree is one of a leaf -- which corresponds to a terminal symbol; a node -- which corresponds to a non-terminal symbol, and whose immediate descendants form an expansion of that symbol; or an error leaf -- which corresponds to a point where the [error] pseudo-token was shifted. *) type cst = | CstTerminal of Terminal.t | CstNonTerminal of Production.index * cst array | CstError (* This is a (mostly) unambiguous printer for concrete syntax trees, in an sexp-like notation. *) val print: out_channel -> cst -> unit (* This is a pretty-printer for concrete syntax trees. The notation is the same as that used by the above printer; the only difference is that the [Pprint] library is used to manage indentation. *) val show: out_channel -> cst -> unit menhir-20151112/src/lexer.mll0000644000175000017500000004437512621170073014620 0ustar mehdimehdi{ open Lexing open Parser open Positions open Keyword (* ------------------------------------------------------------------------ *) (* Short-hands. *) let error1 pos = Error.error (Positions.one pos) let error2 lexbuf = Error.error (Positions.two lexbuf.lex_start_p lexbuf.lex_curr_p) (* ------------------------------------------------------------------------ *) (* This wrapper saves the current lexeme start, invokes its argument, and restores it. This allows transmitting better positions to the parser. *) let savestart lexbuf f = let startp = lexbuf.lex_start_p in let token = f lexbuf in lexbuf.lex_start_p <- startp; token (* ------------------------------------------------------------------------ *) (* Extracts a chunk out of the source file. *) let chunk ofs1 ofs2 = let contents = Error.get_file_contents() in let len = ofs2 - ofs1 in String.sub contents ofs1 len (* ------------------------------------------------------------------------ *) (* Overwrites an old character with a new one at a specified offset in a [bytes] buffer. *) let overwrite content offset c1 c2 = assert (Bytes.get content offset = c1); Bytes.set content offset c2 (* ------------------------------------------------------------------------ *) (* Keyword recognition and construction. *) (* A monster is a spot where we have identified a keyword in concrete syntax. We describe a monster as an object with the following methods: *) type monster = { (* The position of the monster. *) pos: Positions.t; (* This method is passed an array of (optional) names for the producers, that is, the elements of the production's right-hand side. It may perform some checks and is allowed to fail. *) check: string option array -> unit; (* This method transforms the keyword (in place) into a conventional OCaml identifier. This is done by replacing '$', '(', and ')' with '_'. Bloody. The arguments are [ofs1] and [content]. [ofs1] is the offset where [content] begins in the source file. *) transform: int -> bytes -> unit; (* This is the keyword, in abstract syntax. *) keyword: keyword option; } (* ------------------------------------------------------------------------ *) (* The [$syntaxerror] monster. *) let syntaxerror pos : monster = let check _ = () and transform ofs1 content = (* [$syntaxerror] is replaced with [(raise _eRR)]. Same length. *) let pos = start_of_position pos in let ofs = pos.pos_cnum - ofs1 in let source = "(raise _eRR)" in Bytes.blit_string source 0 content ofs (String.length source) and keyword = Some SyntaxError in { pos; check; transform; keyword } (* ------------------------------------------------------------------------ *) (* We check that every [$i] is within range. Also, we forbid using [$i] when a producer has been given a name; this is bad style and may be a mistake. (Plus, this simplies our life, as we rewrite [$i] to [_i], and we would have to rewrite it to a different identifier otherwise.) *) let check_dollar pos i producers = if not (0 <= i - 1 && i - 1 < Array.length producers) then Error.error [pos] "$%d refers to a nonexistent symbol." i else producers.(i - 1) |> Option.iter (fun x -> Error.error [pos] "please do not say: $%d. Instead, say: %s." i x ) (* We check that every reference to a producer [x] in a position keyword, such as [$startpos(x)], exists. *) let check_producer pos x producers = if not (List.mem (Some x) (Array.to_list producers)) then Error.error [pos] "%s refers to a nonexistent symbol." x (* ------------------------------------------------------------------------ *) (* The [$i] monster. *) let dollar pos i : monster = let check = check_dollar pos i and transform ofs1 content = (* [$i] is replaced with [_i]. Thus, it is no longer a keyword. *) let pos = start_of_position pos in let ofs = pos.pos_cnum - ofs1 in overwrite content ofs '$' '_' and keyword = None in { pos; check; transform; keyword } (* ------------------------------------------------------------------------ *) (* The position-keyword monster. The most horrible of all. *) let position pos (where : string) (flavor : string) (i : string option) (x : string option) = let none _ = () in let where, ofslpar (* offset of the opening parenthesis, if there is one *) = match where with | "symbolstart" -> WhereSymbolStart, 15 | "start" -> WhereStart, 9 | "end" -> WhereEnd, 7 | _ -> assert false in let () = match where, i, x with | WhereSymbolStart, Some _, _ | WhereSymbolStart, _, Some _ -> Error.error [pos] "$symbolstart%s does not take a parameter." flavor | _, _, _ -> () in let flavor = match flavor with | "pos" -> FlavorPosition | "ofs" -> FlavorOffset | _ -> assert false in let subject, check = match i, x with | Some i, None -> let ii = int_of_string i in (* cannot fail *) if ii = 0 && where = WhereEnd then (* [$endpos($0)] *) Before, none else (* [$startpos($i)] is rewritten to [$startpos(_i)]. *) RightNamed ("_" ^ i), check_dollar pos ii | None, Some x -> (* [$startpos(x)] *) RightNamed x, check_producer pos x | None, None -> (* [$startpos] *) Left, none | Some _, Some _ -> assert false in let transform ofs1 content = let pos = start_of_position pos in let ofs = pos.pos_cnum - ofs1 in overwrite content ofs '$' '_'; let ofslpar = ofs + ofslpar in match i, x with | None, Some x -> overwrite content ofslpar '(' '_'; overwrite content (ofslpar + 1 + String.length x) ')' '_' | Some i, None -> overwrite content ofslpar '(' '_'; overwrite content (ofslpar + 1) '$' '_'; overwrite content (ofslpar + 2 + String.length i) ')' '_' | _, _ -> () in let keyword = Some (Position (subject, where, flavor)) in { pos; check; transform; keyword } (* ------------------------------------------------------------------------ *) (* In an OCaml header, there should be no monsters. This is just a sanity check. *) let no_monsters monsters = match monsters with | [] -> () | monster :: _ -> Error.error [monster.pos] "a Menhir keyword cannot be used in an OCaml header." (* ------------------------------------------------------------------------ *) (* Creates a stretch. *) let mk_stretch pos1 pos2 parenthesize monsters = (* Read the specified chunk of the file. *) let ofs1 = pos1.pos_cnum and ofs2 = pos2.pos_cnum in let raw_content : string = chunk ofs1 ofs2 in (* Transform the monsters, if there are any. (This explicit test allows saving one string copy and keeping just one live copy.) *) let content : string = match monsters with | [] -> raw_content | _ :: _ -> let content : bytes = Bytes.of_string raw_content in List.iter (fun monster -> monster.transform ofs1 content) monsters; Bytes.unsafe_to_string content in (* Add whitespace so that the column numbers match those of the source file. If requested, add parentheses so that the semantic action can be inserted into other code without ambiguity. *) let content = if parenthesize then (String.make (pos1.pos_cnum - pos1.pos_bol - 1) ' ') ^ "(" ^ content ^ ")" else (String.make (pos1.pos_cnum - pos1.pos_bol) ' ') ^ content in Stretch.({ stretch_filename = Error.get_filename(); stretch_linenum = pos1.pos_lnum; stretch_linecount = pos2.pos_lnum - pos1.pos_lnum; stretch_content = content; stretch_raw_content = raw_content; stretch_keywords = Misc.map_opt (fun monster -> monster.keyword) monsters }) (* ------------------------------------------------------------------------ *) (* Objective Caml's reserved words. *) let reserved = let table = Hashtbl.create 149 in List.iter (fun word -> Hashtbl.add table word ()) [ "and"; "as"; "assert"; "begin"; "class"; "constraint"; "do"; "done"; "downto"; "else"; "end"; "exception"; "external"; "false"; "for"; "fun"; "function"; "functor"; "if"; "in"; "include"; "inherit"; "initializer"; "lazy"; "let"; "match"; "method"; "module"; "mutable"; "new"; "object"; "of"; "open"; "or"; "parser"; "private"; "rec"; "sig"; "struct"; "then"; "to"; "true"; "try"; "type"; "val"; "virtual"; "when"; "while"; "with"; "mod"; "land"; "lor"; "lxor"; "lsl"; "lsr"; "asr"; ]; table } (* ------------------------------------------------------------------------ *) (* Patterns. *) let newline = ('\010' | '\013' | "\013\010") let whitespace = [ ' ' '\t' ';' ] let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222'] let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *) let poskeyword = '$' (("symbolstart" | "start" | "end") as where) (("pos" | "ofs") as flavor) ( '(' ( '$' (['0'-'9']+ as i) | ((lowercase identchar*) as x)) ')')? let previouserror = "$previouserror" let syntaxerror = "$syntaxerror" (* ------------------------------------------------------------------------ *) (* The lexer. *) rule main = parse | "%token" { TOKEN } | "%type" { TYPE } | "%left" { LEFT } | "%right" { RIGHT } | "%nonassoc" { NONASSOC } | "%start" { START } | "%prec" { PREC } | "%public" { PUBLIC } | "%parameter" { PARAMETER } | "%inline" { INLINE } | "%on_error_reduce" { ON_ERROR_REDUCE } | "%%" { (* The token [PERCENTPERCENT] carries a stretch that contains everything that follows %% in the input file. This string must be created lazily. The parser decides (based on the context) whether this stretch is needed. If it is indeed needed, then constructing this stretch drives the lexer to the end of the file. *) PERCENTPERCENT (lazy ( let openingpos = lexeme_end_p lexbuf in let closingpos = finish lexbuf in mk_stretch openingpos closingpos false [] )) } | ":" { COLON } | "," { COMMA } | "=" { EQUAL } | "(" { LPAREN } | ")" { RPAREN } | "|" { BAR } | "?" { QUESTION } | "*" { STAR } | "+" { PLUS } | (lowercase identchar *) as id { if Hashtbl.mem reserved id then error2 lexbuf "this is an Objective Caml reserved word." else LID (with_pos (cpos lexbuf) id) } | (uppercase identchar *) as id { UID (with_pos (cpos lexbuf) id) } | "//" [^ '\010' '\013']* newline (* skip C++ style comment *) | newline { new_line lexbuf; main lexbuf } | whitespace+ { main lexbuf } | "/*" { comment (lexeme_start_p lexbuf) lexbuf; main lexbuf } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; main lexbuf } | "<" { savestart lexbuf (ocamltype (lexeme_end_p lexbuf)) } | "%{" { savestart lexbuf (fun lexbuf -> let openingpos = lexeme_end_p lexbuf in let closingpos, monsters = action true openingpos [] lexbuf in no_monsters monsters; HEADER (mk_stretch openingpos closingpos false []) ) } | "{" { savestart lexbuf (fun lexbuf -> let openingpos = lexeme_end_p lexbuf in let closingpos, monsters = action false openingpos [] lexbuf in ACTION ( fun (producers : string option array) -> List.iter (fun monster -> monster.check producers) monsters; let stretch = mk_stretch openingpos closingpos true monsters in Action.from_stretch stretch ) ) } | eof { EOF } | _ { error2 lexbuf "unexpected character(s)." } (* ------------------------------------------------------------------------ *) (* Skip C style comments. *) and comment openingpos = parse | newline { new_line lexbuf; comment openingpos lexbuf } | "*/" { () } | eof { error1 openingpos "unterminated comment." } | _ { comment openingpos lexbuf } (* ------------------------------------------------------------------------ *) (* Collect an O'Caml type delimited by angle brackets. Angle brackets can appear as part of O'Caml function types and variant types, so we must recognize them and *not* treat them as a closing bracket. *) and ocamltype openingpos = parse | "->" | "[>" { ocamltype openingpos lexbuf } | '>' { OCAMLTYPE (Stretch.Declared (mk_stretch openingpos (lexeme_start_p lexbuf) true [])) } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamltype openingpos lexbuf } | newline { new_line lexbuf; ocamltype openingpos lexbuf } | eof { error1 openingpos "unterminated Objective Caml type." } | _ { ocamltype openingpos lexbuf } (* ------------------------------------------------------------------------ *) (* Collect O'Caml code delimited by curly brackets. The monsters that are encountered along the way are accumulated in the list [monsters]. Nested curly brackets must be properly counted. Nested parentheses are also kept track of, so as to better report errors when they are not balanced. *) and action percent openingpos monsters = parse | '{' { let _, monsters = action false (lexeme_end_p lexbuf) monsters lexbuf in action percent openingpos monsters lexbuf } | ("}" | "%}") as delimiter { match percent, delimiter with | true, "%}" | false, "}" -> (* This is the delimiter we were instructed to look for. *) lexeme_start_p lexbuf, monsters | _, _ -> (* This is not it. *) error1 openingpos "unbalanced opening brace." } | '(' { let _, monsters = parentheses (lexeme_end_p lexbuf) monsters lexbuf in action percent openingpos monsters lexbuf } | '$' (['0'-'9']+ as i) { let monster = dollar (cpos lexbuf) (int_of_string i) in action percent openingpos (monster :: monsters) lexbuf } | poskeyword { let monster = position (cpos lexbuf) where flavor i x in action percent openingpos (monster :: monsters) lexbuf } | previouserror { error2 lexbuf "$previouserror is no longer supported." } | syntaxerror { let monster = syntaxerror (cpos lexbuf) in action percent openingpos (monster :: monsters) lexbuf } | '"' { string (lexeme_start_p lexbuf) lexbuf; action percent openingpos monsters lexbuf } | "'" { char lexbuf; action percent openingpos monsters lexbuf } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; action percent openingpos monsters lexbuf } | newline { new_line lexbuf; action percent openingpos monsters lexbuf } | ')' | eof { error1 openingpos "unbalanced opening brace." } | _ { action percent openingpos monsters lexbuf } (* ------------------------------------------------------------------------ *) and parentheses openingpos monsters = parse | '(' { let _, monsters = parentheses (lexeme_end_p lexbuf) monsters lexbuf in parentheses openingpos monsters lexbuf } | ')' { lexeme_start_p lexbuf, monsters } | '{' { let _, monsters = action false (lexeme_end_p lexbuf) monsters lexbuf in parentheses openingpos monsters lexbuf } | '$' (['0'-'9']+ as i) { let monster = dollar (cpos lexbuf) (int_of_string i) in parentheses openingpos (monster :: monsters) lexbuf } | poskeyword { let monster = position (cpos lexbuf) where flavor i x in parentheses openingpos (monster :: monsters) lexbuf } | previouserror { error2 lexbuf "$previouserror is no longer supported." } | syntaxerror { let monster = syntaxerror (cpos lexbuf) in parentheses openingpos (monster :: monsters) lexbuf } | '"' { string (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf } | "'" { char lexbuf; parentheses openingpos monsters lexbuf } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf } | newline { new_line lexbuf; parentheses openingpos monsters lexbuf } | '}' | eof { error1 openingpos "unbalanced opening parenthesis." } | _ { parentheses openingpos monsters lexbuf } (* ------------------------------------------------------------------------ *) (* Skip O'Caml comments. Comments can be nested and can contain strings or characters, which must be correctly analyzed. (A string could contain begin-of-comment or end-of-comment sequences, which must be ignored; a character could contain a begin-of-string sequence.) *) and ocamlcomment openingpos = parse | "*)" { () } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf } | '"' { string (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf } | "'" { char lexbuf; ocamlcomment openingpos lexbuf } | newline { new_line lexbuf; ocamlcomment openingpos lexbuf } | eof { error1 openingpos "unterminated Objective Caml comment." } | _ { ocamlcomment openingpos lexbuf } (* ------------------------------------------------------------------------ *) (* Skip O'Caml strings. *) and string openingpos = parse | '"' { () } | '\\' newline | newline { new_line lexbuf; string openingpos lexbuf } | '\\' _ (* Upon finding a backslash, skip the character that follows, unless it is a newline. Pretty crude, but should work. *) { string openingpos lexbuf } | eof { error1 openingpos "unterminated Objective Caml string." } | _ { string openingpos lexbuf } (* ------------------------------------------------------------------------ *) (* Skip O'Caml characters. A lone quote character is legal inside a comment, so if we don't recognize the matching closing quote, we simply abandon. *) and char = parse | '\\'? newline "'" { new_line lexbuf } | [^ '\\' '\''] "'" | '\\' _ "'" | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'" | "" { () } (* ------------------------------------------------------------------------ *) (* Read until the end of the file. This is used after finding a %% that marks the end of the grammar specification. We update the current position as we go. This allows us to build a stretch for the trailer. *) and finish = parse | newline { new_line lexbuf; finish lexbuf } | eof { lexeme_start_p lexbuf } | _ { finish lexbuf } menhir-20151112/src/MySet.ml0000644000175000017500000000563112621170073014356 0ustar mehdimehdimodule Make (Ord: Map.OrderedType) = struct type elt = Ord.t type t = Empty | Node of t * elt * t * int (* Sets are represented by balanced binary trees (the heights of the children differ by at most 2 *) let height = function Empty -> 0 | Node(_, _, _, h) -> h (* Creates a new node with left son l, value v and right son r. We must have all elements of l < v < all elements of r. l and r must be balanced and | height l - height r | <= 2. Inline expansion of height for better speed. *) let create l v r = let hl = match l with Empty -> 0 | Node(_,_,_,h) -> h in let hr = match r with Empty -> 0 | Node(_,_,_,h) -> h in Node(l, v, r, (if hl >= hr then hl + 1 else hr + 1)) (* Same as create, but performs one step of rebalancing if necessary. Assumes l and r balanced and | height l - height r | <= 3. Inline expansion of create for better speed in the most frequent case where no rebalancing is required. *) let bal l v r = let hl = match l with Empty -> 0 | Node(_,_,_,h) -> h in let hr = match r with Empty -> 0 | Node(_,_,_,h) -> h in if hl > hr + 2 then begin match l with Empty -> invalid_arg "Set.bal" | Node(ll, lv, lr, _) -> if height ll >= height lr then create ll lv (create lr v r) else begin match lr with Empty -> invalid_arg "Set.bal" | Node(lrl, lrv, lrr, _)-> create (create ll lv lrl) lrv (create lrr v r) end end else if hr > hl + 2 then begin match r with Empty -> invalid_arg "Set.bal" | Node(rl, rv, rr, _) -> if height rr >= height rl then create (create l v rl) rv rr else begin match rl with Empty -> invalid_arg "Set.bal" | Node(rll, rlv, rlr, _) -> create (create l v rll) rlv (create rlr rv rr) end end else Node(l, v, r, (if hl >= hr then hl + 1 else hr + 1)) (* [add x t] guarantees that it returns [t] (physically unchanged) if [x] is already a member of [t]. *) let rec add x = function Empty -> Node(Empty, x, Empty, 1) | Node(l, v, r, _) as t -> let c = Ord.compare x v in if c = 0 then t else if c < 0 then let l' = add x l in if l == l' then t else bal l' v r else let r' = add x r in if r == r' then t else bal l v r' let empty = Empty let rec find x = function Empty -> raise Not_found | Node(l, v, r, _) -> let c = Ord.compare x v in if c = 0 then v else find x (if c < 0 then l else r) let rec iter f = function Empty -> () | Node(l, v, r, _) -> iter f l; f v; iter f r end menhir-20151112/src/stringMap.mli0000644000175000017500000000070712621170073015431 0ustar mehdimehdiinclude Map.S with type key = string val cardinal : 'a t -> int (* [restrict s m] restricts the domain of the map [m] to (its intersection with) the set [s]. *) val restrict: StringSet.t -> 'a t -> 'a t (* [filter pred m] restricts the domain of the map [m] to (key, value) couples that verify [pred]. *) val filter: (string -> 'a -> bool) -> 'a t -> 'a t (* [domain m] returns the domain of the map [m]. *) val domain: 'a t -> StringSet.t menhir-20151112/src/tokenType.mli0000644000175000017500000000404012621170073015441 0ustar mehdimehdi(* This module deals with the definitions of the type(s) that describe the tokens and the terminal symbols. *) (* By default, following [ocamlyacc], we produce just one type, [token], which describes the tokens. A token contains a tag (a terminal symbol) and possibly a semantic value. *) (* In addition to that, in [--inspection] mode only, we produce a GADT which describes the terminal symbols. A terminal symbol is just a tag; it does not carry a semantic value. *) (* In this module, we also deal with [--only-tokens] and [--external-tokens]. If [--only-tokens] is specified on the command line, [produce_tokentypes] emits the type definition(s) and exit. If [--external-tokens M] is set, then the token type and the token GADT are defined as abbreviations for [M.token] and ['a M.terminal]. *) (* The conventional name of the [token] type, for use by the code generators. *) val ttoken: IL.typ (* [tokendata] maps the name of a token to a data constructor of the [token] type. (If [--external-tokens] is set, then it prefixes its argument with an appropriate OCaml module name. Otherwise, it is the identity.) *) val tokendata: string -> string (* The conventional name of the [terminal] type, a.k.a. the token GADT. This is an indexed type (i.e., it has one type parameter). Its data constructors carry zero value arguments. *) val tctokengadt: string val ttokengadt: IL.typ -> IL.typ (* [tokengadtdata] maps the name of a token to a data constructor of the token GADT. *) val tokengadtdata: string -> string (* The definitions of the token type and of the token GADT, for use by the code generators. Each of these lists defines zero or one type. *) val tokentypedef: UnparameterizedSyntax.grammar -> IL.interface val tokengadtdef: UnparameterizedSyntax.grammar -> IL.interface (* If [--only-tokens] is set, then [produce_tokentypes] writes the type definitions to the [.ml] and [.mli] files and stops Menhir. Otherwise, it does nothing. *) val produce_tokentypes: UnparameterizedSyntax.grammar -> unit menhir-20151112/src/checkOCamlVersion.ml0000644000175000017500000000616012621170073016652 0ustar mehdimehdi(* This module parses ocaml version and confronts it with a user-provided version. *) (* According to OCaml's manual, the Sys.ocaml_version value follows the regexp [version_regexp]. *) let mnum = "\\([0-9]+\\)" (* version = major.minor[.patchlevel][+additional-info]. *) let version_regexp = Str.regexp (Printf.sprintf "%s\\.%s\\(\\.%s\\)?\\(\\+\\(.+\\)\\)?" mnum mnum mnum) let must field = function | None -> failwith (Printf.sprintf "\"%s\" field is undefined." field) | Some s -> s let as_int s = try int_of_string s with Failure _ -> Printf.eprintf "Invalid number '%s'\n" s; exit 1 let parse_version version = let get i = try Some (Str.matched_group i version) with Not_found -> None in if Str.string_match version_regexp version 0 then ( as_int (must "major" (get 1)), as_int (must "minor" (get 2)), get 4, get 6 ) else begin Printf.eprintf "Failed to retrieve ocaml version.\n"; exit 1 end (* The user can compare its version with three different orderings: - eq means major and minor numbers are equal ; - eq-strict means that even the patchlevel and the additional information are equal ; - lt means that ocaml version is older that the user-provided version ; - gt means that ocaml version is newer that the user-provided version. *) let eq, eq_strict, gt, lt = ref false, ref false, ref false, ref false let verbose = ref false let options = Arg.align [ "--eq", Arg.Set eq, " Is the version equal to ?"; "--eq-strict", Arg.Set eq_strict, " Is the version strictly equal to ? \ (taking into account patchlevel and additional information)"; "--gt", Arg.Set gt, " Is the version newer than ? (default)"; "--lt", Arg.Set lt, " Is the version older than ?"; "--verbose", Arg.Set verbose, " Show version." ] let usage = "check-ocaml-version [options] \n" let version = ref None let set_version s = version := Some s let _ = Arg.parse options set_version usage let compare, compare_str, strict = match !eq, !gt, !lt with | true, false, false -> ( = ) , "", !eq_strict | false, true, false -> ( >= ), "or greater ", false | false, false, true -> ( <= ), "or lesser ", false | false, false, false -> (Printf.printf "%s\n%!" Sys.ocaml_version; exit 1) | _ -> failwith "(eq|gt|lt) flags must be used independently" let compare_version (major, minor, p, a) (major', minor', p', a') = if major = major' then if minor = minor' then if strict then (p = p') && (a = a') else true else compare minor minor' else compare major major' let _ = match !version with | None -> Printf.printf "%s\n%!" Sys.ocaml_version | Some version -> let ov = parse_version Sys.ocaml_version and uv = parse_version version in if compare_version ov uv then begin if !verbose then Printf.printf "Version %s is OK.\n%!" Sys.ocaml_version; exit 0 end else begin if !verbose then Printf.printf "%s is NOT OK: version %s %swas required.%!\n" Sys.ocaml_version version compare_str; exit 1 end menhir-20151112/src/LinearizedArray.mli0000644000175000017500000000533512621170074016555 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* An array of arrays (of possibly different lengths!) can be ``linearized'', i.e., encoded as a data array (by concatenating all of the little arrays) and an entry array (which contains offsets into the data array). *) type 'a t = (* data: *) 'a array * (* entry: *) int array (* [make a] turns the array of arrays [a] into a linearized array. *) val make: 'a array array -> 'a t (* [read la i j] reads the linearized array [la] at indices [i] and [j]. Thus, [read (make a) i j] is equivalent to [a.(i).(j)]. *) val read: 'a t -> int -> int -> 'a (* [write la i j v] writes the value [v] into the linearized array [la] at indices [i] and [j]. *) val write: 'a t -> int -> int -> 'a -> unit (* [length la] is the number of rows of the array [la]. Thus, [length (make a)] is equivalent to [Array.length a]. *) val length: 'a t -> int (* [row_length la i] is the length of the row at index [i] in the linearized array [la]. Thus, [row_length (make a) i] is equivalent to [Array.length a.(i)]. *) val row_length: 'a t -> int -> int (* [read_row la i] reads the row at index [i], producing a list. Thus, [read_row (make a) i] is equivalent to [Array.to_list a.(i)]. *) val read_row: 'a t -> int -> 'a list (* The following variants read the linearized array via accessors [get_data : int -> 'a] and [get_entry : int -> int]. *) val row_length_via: (* get_entry: *) (int -> int) -> (* i: *) int -> int val read_via: (* get_data: *) (int -> 'a) -> (* get_entry: *) (int -> int) -> (* i: *) int -> (* j: *) int -> 'a val read_row_via: (* get_data: *) (int -> 'a) -> (* get_entry: *) (int -> int) -> (* i: *) int -> 'a list menhir-20151112/src/tarjan.mli0000644000175000017500000000247412621170073014747 0ustar mehdimehdi(* This module provides an implementation of Tarjan's algorithm for finding the strongly connected components of a graph. The algorithm runs when the functor is applied. Its complexity is $O(V+E)$, where $V$ is the number of vertices in the graph $G$, and $E$ is the number of edges. *) module Run (G : sig type node (* We assume each node has a unique index. Indices must range from $0$ to $n-1$, where $n$ is the number of nodes in the graph. *) val n: int val index: node -> int (* Iterating over a node's immediate successors. *) val successors: (node -> unit) -> node -> unit (* Iterating over all nodes. *) val iter: (node -> unit) -> unit end) : sig open G (* This function maps each node to a representative element of its strongly connected component. *) val representative: node -> node (* This function maps each representative element to a list of all members of its strongly connected component. Non-representative elements are mapped to an empty list. *) val scc: node -> node list (* [iter action] allows iterating over all strongly connected components. For each component, the [action] function is applied to the representative element and to a (non-empty) list of all elements. *) val iter: (node -> node list -> unit) -> unit end menhir-20151112/src/syntax.mli0000644000175000017500000000574412621170073015021 0ustar mehdimehdi(* Terminals and nonterminal symbols are strings. Identifiers (which are used to refer to a symbol's semantic value) are strings. A file name is a string. *) type terminal = string type nonterminal = string type symbol = string type identifier = string type filename = string (* A trailer is a source file fragment. *) type trailer = Stretch.t (* Objective Caml semantic actions are represented as stretches. *) type action = Action.t type token_associativity = LeftAssoc | RightAssoc | NonAssoc | UndefinedAssoc type precedence_level = UndefinedPrecedence (* Items are incomparable when they originate in different files. A brand of type [Mark.t] is used to record an item's origin. The positions allow locating certain warnings. *) | PrecedenceLevel of Mark.t * int * Lexing.position * Lexing.position type token_properties = { tk_filename : filename; tk_ocamltype : Stretch.ocamltype option; tk_position : Positions.t; mutable tk_associativity : token_associativity; mutable tk_precedence : precedence_level; mutable tk_is_declared : bool; } type parameter = | ParameterVar of symbol Positions.located | ParameterApp of symbol Positions.located * parameters and parameters = parameter list type declaration = (* Raw Objective Caml code. *) | DCode of Stretch.t (* Raw Objective Caml functor parameter. *) | DParameter of Stretch.ocamltype (* really a stretch *) (* Terminal symbol (token) declaration. *) | DToken of Stretch.ocamltype option * terminal (* Start symbol declaration. *) | DStart of nonterminal (* Priority and associativity declaration. *) | DTokenProperties of terminal * token_associativity * precedence_level (* Type declaration. *) | DType of Stretch.ocamltype * parameter (* On-error-reduce declaration. *) | DOnErrorReduce of parameter (* A [%prec] annotation is optional. A production can carry at most one. If there is one, it is a symbol name. See [ParserAux]. *) type branch_prec_annotation = symbol Positions.located option (* A "production level" is used to solve reduce/reduce conflicts. It reflects which production appears first in the grammar. See [ParserAux]. *) type branch_production_level = | ProductionLevel of Mark.t * int type producer = identifier Positions.located * parameter type parameterized_branch = { pr_branch_position : Positions.t; pr_producers : producer list; pr_action : action; pr_branch_prec_annotation : branch_prec_annotation; pr_branch_production_level : branch_production_level } type parameterized_rule = { pr_public_flag : bool; pr_inline_flag : bool; pr_nt : nonterminal; pr_positions : Positions.t list; pr_parameters : symbol list; pr_branches : parameterized_branch list; } menhir-20151112/src/mark.ml0000644000175000017500000000047212621170073014245 0ustar mehdimehdi(** This module implements a very simple notion of ``mark''. A mark is really a reference cell (without content). Creating a new mark requires allocating a new cell, and comparing marks requires comparing pointers. *) type t = unit ref let fresh = ref let same = (==) let none = fresh() menhir-20151112/src/InfiniteArray.ml0000644000175000017500000000371712621170074016065 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (** This module implements infinite arrays, that is, arrays that grow transparently upon demand. *) type 'a t = { default: 'a; mutable table: 'a array; mutable extent: int; (* the index of the greatest [set] ever, plus one *) } let default_size = 16384 (* must be non-zero *) let make x = { default = x; table = Array.make default_size x; extent = 0; } let rec new_length length i = if i < length then length else new_length (2 * length) i let ensure a i = assert (0 <= i); let table = a.table in let length = Array.length table in if i >= length then begin let table' = Array.make (new_length (2 * length) i) a.default in Array.blit table 0 table' 0 length; a.table <- table' end let get a i = ensure a i; Array.unsafe_get a.table (i) let set a i x = ensure a i; Array.unsafe_set a.table (i) x; if a.extent <= i then a.extent <- i + 1 let extent a = a.extent let domain a = Array.sub a.table 0 a.extent menhir-20151112/src/codeBackend.mli0000644000175000017500000000015012621170073015637 0ustar mehdimehdi(* The (code-based) code generator. *) module Run (T : sig end) : sig val program: IL.program end menhir-20151112/src/Engine.mli0000644000175000017500000000241112621170074014665 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) open EngineTypes (* The LR parsing engine. *) module Make (T : TABLE) : ENGINE with type state = T.state and type token = T.token and type semantic_value = T.semantic_value and type production = T.production menhir-20151112/src/unparameterizedPrinter.ml0000644000175000017500000001247712621170073020066 0ustar mehdimehdiopen Positions open Syntax open Stretch open UnparameterizedSyntax open Settings let print_preludes f g = List.iter (fun prelude -> Printf.fprintf f "%%{%s%%}\n" prelude.stretch_raw_content ) g.preludes let print_start_symbols b g = StringSet.iter (fun symbol -> Printf.fprintf b "%%start %s\n" (Misc.normalize symbol) ) g.start_symbols let rec insert_in_partitions item m = function | [] -> [ (m, [ item ]) ] | (m', items) :: partitions when Mark.same m m' -> (m', item :: items) :: partitions | t :: partitions -> t :: (insert_in_partitions item m partitions) let insert (undefined, partitions) = function | (item, UndefinedPrecedence) -> ((item, 0) :: undefined, partitions) | (item, PrecedenceLevel (m, v, _, _)) -> (undefined, insert_in_partitions (item, v) m partitions) let print_ocamltype ocamltype = Printf.sprintf " <%s>" ( match ocamltype with | Declared stretch -> stretch.stretch_raw_content | Inferred t -> t ) let print_assoc = function | LeftAssoc -> Printf.sprintf "%%left" | RightAssoc -> Printf.sprintf "%%right" | NonAssoc -> Printf.sprintf "%%nonassoc" | UndefinedAssoc -> "" let print_tokens mode b g = (* Sort tokens wrt precedence. *) let undefined, partition_tokens = StringMap.fold (fun token prop acu -> insert acu (token, prop.tk_precedence) ) g.tokens ([], []) in let ordered_tokens = List.fold_left (fun acu (_, ms) -> acu @ List.sort (fun (_, v) (_, v') -> compare v v') ms ) undefined partition_tokens in List.iter (fun (token, _) -> let prop = StringMap.find token g.tokens in if prop.tk_is_declared then Printf.fprintf b "%%token%s %s\n" begin match mode with | PrintNormal | PrintUnitActions -> Misc.o2s prop.tk_ocamltype print_ocamltype | PrintUnitActionsUnitTokens -> "" (* omitted ocamltype after %token means *) end token ) ordered_tokens; ignore (List.fold_left (fun last_prop (token, v) -> let prop = StringMap.find token g.tokens in match last_prop with | None -> if prop.tk_associativity = UndefinedAssoc then None else ( Printf.fprintf b "%s %s " (print_assoc prop.tk_associativity) token; Some v) | Some v' when v <> v' -> if prop.tk_associativity = UndefinedAssoc then None else ( Printf.fprintf b "\n%s %s " (print_assoc prop.tk_associativity) token; Some v) | Some _ -> Printf.fprintf b "%s " token; last_prop ) None ordered_tokens); Printf.fprintf b "\n" let print_types mode b g = StringMap.iter (fun symbol ty -> Printf.fprintf b "%%type%s %s\n" begin match mode with | PrintNormal -> print_ocamltype ty | PrintUnitActions | PrintUnitActionsUnitTokens -> " " end (Misc.normalize symbol) ) g.types let binding mode id = match mode with | PrintNormal -> id ^ " = " | PrintUnitActions | PrintUnitActionsUnitTokens -> "" let string_of_producer mode (symbol, ido) = binding mode ido ^ (Misc.normalize symbol) let print_branch mode f branch = Printf.fprintf f "%s%s\n {" (String.concat " " (List.map (string_of_producer mode) branch.producers)) (Misc.o2s branch.branch_prec_annotation (fun x -> " %prec "^x.value)); begin match mode with | PrintNormal -> Action.print f branch.action | PrintUnitActions | PrintUnitActionsUnitTokens -> () (* Printing a pair of empty braces is fine. *) end; Printf.fprintf f "}\n" let print_trailers b g = List.iter (fun stretch -> Printf.fprintf b "%s\n" stretch.stretch_raw_content) g.postludes (* Because the resolution of reduce/reduce conflicts is implicitly dictated by the order in which productions appear in the grammar, the printer should be careful to preserve this order. *) let branches_order r r' = let branch_order b b' = match b.branch_production_level, b'.branch_production_level with | ProductionLevel (m, l), ProductionLevel (m', l') -> if Mark.same m m' then if l < l' then -1 else if l > l' then 1 else 0 else 0 in let rec lexical_order bs bs' = match bs, bs' with | [], [] -> 0 | [], _ -> -1 | _, [] -> 1 | b :: bs, b' :: bs' -> match branch_order b b' with | 0 -> lexical_order bs bs' | x -> x in lexical_order r.branches r'.branches let print_rules mode b g = let rules_as_list = StringMap.fold (fun nt r acu -> (nt, r) :: acu) g.rules [] in let ordered_rules = List.sort (fun (_nt, r) (_nt', r') -> branches_order r r') rules_as_list in List.iter (fun (nt, r) -> Printf.fprintf b "\n%s:\n" (Misc.normalize nt); List.iter (fun br -> Printf.fprintf b "| "; print_branch mode b br ) r.branches ) ordered_rules let print mode f g = begin match mode with | PrintNormal -> print_preludes f g | PrintUnitActions | PrintUnitActionsUnitTokens -> () end; print_start_symbols f g; print_tokens mode f g; print_types mode f g; Printf.fprintf f "%%%%\n"; print_rules mode f g; Printf.fprintf f "\n%%%%\n"; begin match mode with | PrintNormal -> print_trailers f g | PrintUnitActions | PrintUnitActionsUnitTokens -> () end menhir-20151112/src/General.ml0000644000175000017500000000424312621170074014671 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* --------------------------------------------------------------------------- *) (* Lists. *) let rec take n xs = match n, xs with | 0, _ | _, [] -> [] | _, (x :: xs as input) -> let xs' = take (n - 1) xs in if xs == xs' then input else x :: xs' let rec drop n xs = match n, xs with | 0, _ -> xs | _, [] -> [] | _, _ :: xs -> drop (n - 1) xs let rec uniq1 cmp x ys = match ys with | [] -> [] | y :: ys -> if cmp x y = 0 then uniq1 compare x ys else y :: uniq1 cmp y ys let uniq cmp xs = match xs with | [] -> [] | x :: xs -> x :: uniq1 cmp x xs let weed cmp xs = uniq cmp (List.sort cmp xs) (* --------------------------------------------------------------------------- *) (* Streams. *) type 'a stream = 'a head Lazy.t and 'a head = | Nil | Cons of 'a * 'a stream (* The length of a stream. *) let rec length xs = match Lazy.force xs with | Nil -> 0 | Cons (_, xs) -> 1 + length xs (* Folding over a stream. *) let rec foldr f xs accu = match Lazy.force xs with | Nil -> accu | Cons (x, xs) -> f x (foldr f xs accu) menhir-20151112/src/item.ml0000644000175000017500000002506312621170073014254 0ustar mehdimehdiopen Grammar (* ------------------------------------------------------------------------ *) (* Items. *) (* An LR(0) item encodes a pair of integers, namely the index of the production and the index of the bullet in the production's right-hand side. *) (* Both integers are packed into a single integer, using 7 bits for the bullet position and the rest (usually 24 bits) for the production index. These widths could be adjusted. *) (* The function [export] is duplicated in [TableInterpreter]. Do not modify it; or modify it here and there in a consistent manner. *) type t = int let import (prod, pos) = assert (pos < 128); (Production.p2i prod) lsl 7 + pos let export t = (Production.i2p (t lsr 7), t mod 128) let marshal (item : t) : int = item (* Comparison. *) let equal (item1 : t) (item2: t) = item1 = item2 (* Position. *) let positions (item : t) = let prod, _ = export item in Production.positions prod (* [def item] looks up the production associated with this item in the grammar and returns [prod, nt, rhs, pos, length], where [prod] is the production's index, [nt] and [rhs] represent the production, [pos] is the position of the bullet in the item, and [length] is the length of the production's right-hand side. *) let def t = let prod, pos = export t in let nt, rhs = Production.def prod in let length = Array.length rhs in assert ((pos >= 0) && (pos <= length)); prod, nt, rhs, pos, length let startnt t = let _, _, rhs, pos, length = def t in assert (pos = 0 && length = 1); match rhs.(0) with | Symbol.N nt -> nt | Symbol.T _ -> assert false (* Printing. *) let print item = let _, nt, rhs, pos, _ = def item in Printf.sprintf "%s -> %s" (Nonterminal.print false nt) (Symbol.printaod 0 pos rhs) (* Classifying items. *) type kind = | Shift of Symbol.t * t | Reduce of Production.index let classify item = let prod, _, rhs, pos, length = def item in if pos = length then Reduce prod else Shift (rhs.(pos), import (prod, pos + 1)) (* Sets of items and maps over items. Hashing these data structures is specifically allowed, so balanced trees (for instance) would not be applicable here. *) module Map = Patricia.Big module Set = Map.Domain (* This functor performs precomputation that helps efficiently compute the closure of an LR(0) or LR(1) state. The precomputation requires time linear in the size of the grammar. The nature of the lookahead sets remains abstract. *) (* The precomputation consists in building the LR(0) nondeterministic automaton. This is a graph whose nodes are items and whose edges are epsilon transitions. (We do not care about shift transitions here.) Lookahead information can be attached to nodes and is propagated through the graph during closure computations. *) module Closure (L : Lookahead.S) = struct type state = L.t Map.t type node = { (* Nodes are sequentially numbered so as to allow applying Tarjan's algorithm (below). *) num: int; (* Each node is associated with an item. *) item: t; (* All of the epsilon transitions that leave a node have the same behavior with respect to lookahead information. *) (* The lookahead set transmitted along an epsilon transition is either a constant, or the union of a constant and the lookahead set at the source node. The former case corresponds to a source item whose trailer is not nullable, the latter to a source item whose trailer is nullable. *) epsilon_constant: L.t; epsilon_transmits: bool; (* Each node carries pointers to its successors through epsilon transitions. This field is never modified once initialization is over. *) mutable epsilon_transitions: node list; (* The following fields are transient, that is, only used temporarily during graph traversals. Marks are used to recognize which nodes have been traversed already. Lists of predecessors are used to record which edges have been traversed. Lookahead information is attached with each node. *) mutable mark: Mark.t; mutable predecessors: node list; mutable lookahead: L.t; } (* Allocate one graph node per item and build a mapping of items to nodes. *) let count = ref 0 let mapping : node array array = Array.make Production.n [||] let item2node item = let prod, pos = export item in mapping.(Production.p2i prod).(pos) let () = Production.iter (fun prod -> let _nt, rhs = Production.def prod in let length = Array.length rhs in mapping.(Production.p2i prod) <- Array.init (length+1) (fun pos -> let item = import (prod, pos) in let num = !count in count := num + 1; (* The lookahead set transmitted through an epsilon transition is the FIRST set of the remainder of the source item, plus, if that is nullable, the lookahead set of the source item. *) let constant, transmits = if pos < length then let nullable, first = Analysis.nullable_first_prod prod (pos + 1) in L.constant first, nullable else (* No epsilon transitions leave this item. *) L.empty, false in { num = num; item = item; epsilon_constant = constant; epsilon_transmits = transmits; epsilon_transitions = []; (* temporary placeholder *) mark = Mark.none; predecessors = []; lookahead = L.empty; } ) ) (* At each node, compute transitions. *) let () = Production.iter (fun prod -> let _nt, rhs = Production.def prod in let length = Array.length rhs in Array.iteri (fun pos node -> node.epsilon_transitions <- if pos < length then match rhs.(pos) with | Symbol.N nt -> Production.foldnt nt [] (fun prod nodes -> (item2node (import (prod, 0))) :: nodes ) | Symbol.T _ -> [] else [] ) mapping.(Production.p2i prod) ) (* Detect and reject cycles of transitions that transmit a lookahead set. We need to ensure that there are no such cycles in order to be able to traverse these transitions in topological order. Each such cycle corresponds to a set of productions of the form A1 -> A2, A2 -> A3, ..., An -> A1 (modulo nullable trailers). Such cycles are unlikely to occur in realistic grammars, so our current approach is to reject the grammar if such a cycle exists. Actually, according to DeRemer and Pennello (1982), such a cycle is exactly an includes cycle, and implies that the grammar is not LR(k) for any k, unless A1, ..., An are in fact uninhabited. In other words, this is a pathological case. *) (* Yes, indeed, this is called a cycle in Aho & Ullman's book, and a loop in Grune & Jacobs' book. It is not difficult to see that (provided all symbols are inhabited) the grammar is infinitely ambiguous if and only if there is a loop. *) module P = struct type foo = node type node = foo let n = !count let index node = node.num let iter f = Array.iter (fun nodes -> Array.iter f nodes ) mapping let successors f node = if node.epsilon_transmits then List.iter f node.epsilon_transitions end module T = Tarjan.Run (P) let cycle scc = let items = List.map (fun node -> node.item) scc in let positions = List.flatten (List.map positions items) in let names = String.concat "\n" (List.map print items) in Error.error positions "the grammar is ambiguous.\n\ The following items participate in an epsilon-cycle:\n\ %s" names let () = P.iter (fun node -> let scc = T.scc node in match scc with | [] -> () | [ node ] -> (* This is a strongly connected component of one node. Check whether it carries a self-loop. Forbidding self-loops is not strictly required by the code that follows, but is consistent with the fact that we forbid cycles of length greater than 1. *) P.successors (fun successor -> if successor.num = node.num then cycle scc ) node | _ -> (* This is a strongly connected component of at least two elements. *) cycle scc ) (* Closure computation. *) let closure (items : state) : state = (* Explore the graph forwards, starting from these items. Marks are used to tell which nodes have been visited. Build a list of all visited nodes; this is in fact the list of all items in the closure. At initial nodes and when reaching a node through a transition, record a lookahead set. When we reach a node through a transition that transmits the lookahead set found at its source, record its source, so as to allow re-traversing this transition backwards (below). *) let this = Mark.fresh() in let nodes = ref [] in let rec visit father transmits toks node = if Mark.same node.mark this then begin (* Node has been visited already. *) node.lookahead <- L.union toks node.lookahead; if transmits then node.predecessors <- father :: node.predecessors end else begin (* Node is new. *) node.predecessors <- if transmits then [ father ] else []; node.lookahead <- toks; follow node end and follow node = node.mark <- this; nodes := node :: !nodes; List.iter (visit node node.epsilon_transmits node.epsilon_constant) node.epsilon_transitions in Map.iter (fun item toks -> let node = item2node item in visit node (* dummy! *) false toks node ) items; let nodes = !nodes in (* Explore the graph of transmitting transitions backwards. By hypothesis, it is acyclic, so this is a topological walk. Lookahead sets are inherited through transitions. *) let this = Mark.fresh() in let rec walk node = if not (Mark.same node.mark this) then begin (* Node is new. *) node.mark <- this; (* Explore all predecessors and merge their lookahead sets into the current node's own lookahead set. *) List.iter (fun predecessor -> walk predecessor; node.lookahead <- L.union predecessor.lookahead node.lookahead ) node.predecessors end in List.iter walk nodes; (* Done. Produce a mapping of items to lookahead sets. Clear all transient fields so as to reduce pressure on the GC -- this does not make much difference. *) List.fold_left (fun closure node -> node.predecessors <- []; let closure = Map.add node.item node.lookahead closure in node.lookahead <- L.empty; closure ) Map.empty nodes (* End of closure computation *) end menhir-20151112/src/derivation.mli0000644000175000017500000000351212621170073015626 0ustar mehdimehdiopen Grammar (* -------------------------------------------------------------------------- *) (* This is the type of derivations. Derivations are forests: see inside. *) type t (* This is the type of derivations contexts, or derivations with a derivation-shaped hole. *) type context (* -------------------------------------------------------------------------- *) (* Construction. *) (* [empty] is the forest that consists of a single empty tree. *) val empty: t (* [tail pos rhs] is the forest: (i) whose first element is the empty tree, and (ii) whose remaining elements are the symbols found at positions greater than or equal to [pos] in the array [rhs]. *) val tail: int -> Symbol.t array -> t (* [build pos rhs forest comment] is the forest: (i) whose first element is the tree that has the non-terminal symbol [rhs.(pos)] at its root and the forest [forest] below its root, and (ii) whose remaining elements are the symbols found at positions greater than [pos] in the array [rhs]. *) val build: int -> Symbol.t array -> t -> string option -> t (* [prepend symbol forest] is the forest: (i) whose first element is the symbol [symbol], and (ii) whose remaining elements form the forest [forest]. *) val prepend: Symbol.t -> t -> t (* -------------------------------------------------------------------------- *) (* Factoring. *) (* [factor] factors the maximal common derivation context out of a nonempty family of derivations. It produces a pair of the context and of the residual derivations. *) val factor: t Item.Map.t -> context * t Item.Map.t (* -------------------------------------------------------------------------- *) (* Display. *) (* [print] prints a derivation. *) val print: out_channel -> t -> unit (* [printc] prints a derivation context. *) val printc: out_channel -> context -> unit menhir-20151112/src/action.mli0000644000175000017500000000417312621170073014743 0ustar mehdimehdiopen Keyword (** Semantic action's type. *) type t (** [compose x a1 a2] builds the action [let x = a1 in a2]. This feature is used during the processing of the %inline keyword. *) val compose : string -> t -> t -> t (* [define keyword keywords f action] defines away the keyword [keyword]. It is removed from the set of keywords of this semantic action; the set [keywords] is added in its place. The body of the semantic action is transformed by the function [f], which typically wraps it in some new [let] bindings. *) val define: keyword -> KeywordSet.t -> (IL.expr -> IL.expr) -> t -> t (* Variable-to-variable substitutions, used by [rename], below. *) type subst = (string * string) list (* [Subject/where] pairs, as defined in [Keyword], encode a position keyword. *) type sw = subject * where (** [rename f phi a] applies to the semantic action [a] the renaming [phi] as well as the transformations decided by the function [f]. The function [f] is applied to each (not-yet-renamed) keyword and may decide to transform it, by returning [Some _], or to not transform it, by returning [None]. (In the latter case, [phi] still applies to the keyword.) *) val rename: (sw -> sw option) -> subst -> t -> t (** Semantic actions are translated into [IL] code using the [IL.ETextual] and [IL.ELet] constructors. *) val to_il_expr: t -> IL.expr (** A semantic action might be the inlining of several others. The filenames of the different parts are given by [filenames a]. This can be used, for instance, to check whether all parts come from the standard library. *) val filenames: t -> string list (** [keywords a] is the set of keywords used in the semantic action [a]. *) val keywords: t -> KeywordSet.t (** [print f a] prints [a] to channel [f]. *) val print: out_channel -> t -> unit (** [from_stretch s] builds an action out of a textual piece of code. *) val from_stretch: Stretch.t -> t (** Test whether the keyword [$syntaxerror] is used in the action. *) val has_syntaxerror: t -> bool (** Test whether the keyword [$endpos($0)] is used in the action. *) val has_beforeend: t -> bool menhir-20151112/src/lr0.ml0000644000175000017500000004327712621170073014022 0ustar mehdimehdiopen Grammar module InfiniteArray = MenhirLib.InfiniteArray (* ------------------------------------------------------------------------ *) (* Symbolic lookahead information. *) (* A symbolic lookahead set consists of an actual concrete set of terminal symbols and of a number of set variables. Set variables as encoded as integers. *) module SymbolicLookahead = struct type t = TerminalSet.t * CompressedBitSet.t let constant toks = (toks, CompressedBitSet.empty) let empty = constant TerminalSet.empty let union (toks1, vars1) ((toks2, vars2) as s2) = let toks = TerminalSet.union toks1 toks2 and vars = CompressedBitSet.union vars1 vars2 in if toks2 == toks && vars2 == vars then s2 else (toks, vars) let variable (var : int) : t = (TerminalSet.empty, CompressedBitSet.singleton var) let project (toks, vars) = assert (CompressedBitSet.is_empty vars); toks end (* We will perform closure operations over symbolic lookahead sets. This allows us to later represent LR(1) states as pairs of an LR(0) node number and an array of concrete lookahead sets. *) module SymbolicClosure = Item.Closure(SymbolicLookahead) (* Closure operations over concrete lookahead sets are also used (when explaining conflicts). One could take another instance of the functor. The approach below is somewhat less elegant and makes each call to [closure] somewhat slower, but saves the cost of instantiating the functor again -- which is linear in the size of the grammar. *) type concretelr1state = TerminalSet.t Item.Map.t let closure (state : concretelr1state) : concretelr1state = Item.Map.map SymbolicLookahead.project (SymbolicClosure.closure (Item.Map.map SymbolicLookahead.constant state)) (* ------------------------------------------------------------------------ *) (* Finding which non-epsilon transitions leave a set of items. This code is parametric in the nature of lookahead sets. *) let transitions (state : 'a Item.Map.t) : 'a Item.Map.t SymbolMap.t = Item.Map.fold (fun item toks transitions -> match Item.classify item with | Item.Shift (symbol, item') -> let items : 'a Item.Map.t = try SymbolMap.find symbol transitions with Not_found -> Item.Map.empty in SymbolMap.add symbol (Item.Map.add item' toks items) transitions | Item.Reduce _ -> transitions ) state SymbolMap.empty (* ------------------------------------------------------------------------ *) (* Determining the reduction opportunities at a (closed) state. They are represented as a list of pairs of a lookahead set and a production index. This code is again parametric in the nature of lookahead sets. *) let reductions (state : 'a Item.Map.t) : ('a * Production.index) list = Item.Map.fold (fun item toks accu -> match Item.classify item with | Item.Reduce prod -> (toks, prod) :: accu | Item.Shift _ -> accu ) state [] (* ------------------------------------------------------------------------ *) (* Construction of the the LR(0) automaton. *) (* Nodes are numbered sequentially. *) type node = int (* A symbolic transition is a pair of the target state number and an array of symbolic lookahead sets. The variables in these sets are numbered in [0,g) where g is the number of items in the source LR(0) state. Items are numbered in the order of presentation by [Item.Set.fold]. *) type symbolic_transition_target = node * SymbolicLookahead.t array (* The automaton is represented by (growing) arrays of states (sets of items), symbolic transition information, and symbolic reduction information, indexed by node numbers. Conversely, a hash table maps states (sets of items) to node numbers. *) let n = ref 0 let states : Item.Set.t InfiniteArray.t = InfiniteArray.make Item.Set.empty let _transitions : symbolic_transition_target SymbolMap.t InfiniteArray.t = InfiniteArray.make SymbolMap.empty let _reductions : (SymbolicLookahead.t * Production.index) list InfiniteArray.t = InfiniteArray.make [] let map : (Item.Set.t, node) Hashtbl.t = Hashtbl.create 50021 let incoming : Symbol.t option InfiniteArray.t = InfiniteArray.make None (* The automaton is built depth-first. *) let rec explore (symbol : Symbol.t option) (state : Item.Set.t) : node = (* Find out whether this state was already explored. *) try Hashtbl.find map state with Not_found -> (* If not, create a new node. *) let k = !n in n := k + 1; InfiniteArray.set states k state; Hashtbl.add map state k; (* Record its incoming symbol. *) InfiniteArray.set incoming k symbol; (* Build a symbolic version of the current state, where each item is associated with a distinct lookahead set variable, numbered consecutively. *) let (_ : int), (symbolic_state : SymbolicClosure.state) = Item.Set.fold (fun item (i, symbolic_state) -> i+1, Item.Map.add item (SymbolicLookahead.variable i) symbolic_state ) state (0, Item.Map.empty) in (* Compute the symbolic closure. *) let closure = SymbolicClosure.closure symbolic_state in (* Compute symbolic information about reductions. *) InfiniteArray.set _reductions k (reductions closure); (* Compute symbolic information about the transitions, and, by dropping the symbolic lookahead information, explore the transitions to further LR(0) states. *) InfiniteArray.set _transitions k (SymbolMap.mapi (fun symbol symbolic_state -> let (k : node) = explore (Some symbol) (Item.Map.domain symbolic_state) in let lookahead : SymbolicLookahead.t array = Array.make (Item.Map.cardinal symbolic_state) SymbolicLookahead.empty in let (_ : int) = Item.Map.fold (fun _ s i -> lookahead.(i) <- s; i+1 ) symbolic_state 0 in ((k, lookahead) : symbolic_transition_target) ) (transitions closure)); k (* Creating a start state out of a start production. It contains a single item, consisting of the start production, at position 0. *) let start prod : Item.Set.t = Item.Set.singleton (Item.import (prod, 0)) (* This starts the construction of the automaton and records the entry nodes in an array. *) let entry : node ProductionMap.t = ProductionMap.start (fun prod -> explore None (start prod) ) let () = Hashtbl.clear map let n = !n let () = Error.logA 1 (fun f -> Printf.fprintf f "Built an LR(0) automaton with %d states.\n" n); Time.tick "Construction of the LR(0) automaton" (* ------------------------------------------------------------------------ *) (* Accessors. *) let items node : Item.Set.t = InfiniteArray.get states node let incoming_symbol node : Symbol.t option = InfiniteArray.get incoming node (* ------------------------------------------------------------------------ *) (* Help for building the LR(1) automaton. *) (* An LR(1) state is represented as a pair of an LR(0) state number and an array of concrete lookahead sets (whose length depends on the LR(0) state). *) type lr1state = node * TerminalSet.t array (* An encoded LR(1) state can be turned into a concrete representation, that is, a mapping of items to concrete lookahead sets. *) let export (k, toksr) = let (_ : int), items = Item.Set.fold (fun item (i, items) -> i+1, Item.Map.add item toksr.(i) items ) (InfiniteArray.get states k) (0, Item.Map.empty) in items (* Displaying a concrete state. *) let print_concrete leading (state : concretelr1state) = let buffer = Buffer.create 1024 in Item.Map.iter (fun item toks -> Printf.bprintf buffer "%s%s[ %s ]\n" leading (Item.print item) (TerminalSet.print toks) ) state; Buffer.contents buffer (* Displaying a state. By default, only the kernel is displayed, not the closure. *) let print leading state = print_concrete leading (export state) let print_closure leading state = print_concrete leading (closure (export state)) (* The core of an LR(1) state is the underlying LR(0) state. *) let core (k, _) = k (* A sanity check. *) let well_formed (k, toksr) = Array.length toksr = Item.Set.cardinal (InfiniteArray.get states k) (* An LR(1) start state is the combination of an LR(0) start state (which consists of a single item) with a singleton lookahead set that consists of the end-of-file pseudo-token. *) let start k = let state = (k, [| TerminalSet.singleton Terminal.sharp |]) in assert (well_formed state); state (* Interpreting a symbolic lookahead set with respect to a source state. The variables in the symbolic lookahead set (which are integers) are interpreted as indices into the state's array of concrete lookahead sets. The result is a concrete lookahead set. *) let interpret ((_, toksr) as state : lr1state) ((toks, vars) : SymbolicLookahead.t) : TerminalSet.t = assert (well_formed state); CompressedBitSet.fold (fun var toks -> assert (var >= 0 && var < Array.length toksr); TerminalSet.union toksr.(var) toks ) vars toks (* Out of an LR(1) state, one produces information about reductions and transitions. This is done in an efficient way by interpreting the precomputed symbolic information with respect to that state. *) let reductions ((k, _) as state : lr1state) : (TerminalSet.t * Production.index) list = List.map (fun (s, prod) -> interpret state s, prod ) (InfiniteArray.get _reductions k) let transitions ((k, _) as state : lr1state) : lr1state SymbolMap.t = SymbolMap.map (fun ((k, sr) : symbolic_transition_target) -> ((k, Array.map (interpret state) sr) : lr1state) ) (InfiniteArray.get _transitions k) let outgoing_symbols (k : node) : Symbol.t list = SymbolMap.domain (InfiniteArray.get _transitions k) let transition symbol ((k, _) as state : lr1state) : lr1state = let ((k, sr) : symbolic_transition_target) = try SymbolMap.find symbol (InfiniteArray.get _transitions k) with Not_found -> assert false (* no transition along this symbol *) in (k, Array.map (interpret state) sr) (* Equality of states. *) let equal ((k1, toksr1) as state1) ((k2, toksr2) as state2) = assert (k1 = k2 && well_formed state1 && well_formed state2); let rec loop i = if i = 0 then true else let i = i - 1 in (TerminalSet.equal toksr1.(i) toksr2.(i)) && (loop i) in loop (Array.length toksr1) (* Subsumption between states. *) let subsume ((k1, toksr1) as state1) ((k2, toksr2) as state2) = assert (k1 = k2 && well_formed state1 && well_formed state2); let rec loop i = if i = 0 then true else let i = i - 1 in (TerminalSet.subset toksr1.(i) toksr2.(i)) && (loop i) in loop (Array.length toksr1) (* This function determines whether two (core-equivalent) states are compatible, according to a criterion that is close to Pager's weak compatibility criterion. Pager's criterion guarantees that if a merged state has a potential conflict at [(i, j)] -- that is, some token [t] appears within the lookahead sets of both item [i] and item [j] -- then there exists a state in the canonical automaton that also has a potential conflict at [(i, j)] -- that is, some token [u] appears within the lookahead sets of both item [i] and item [j]. Note that [t] and [u] can be distinct. Pager has shown that his weak compatibility criterion is stable, that is, preserved by transitions and closure. This means that, if two states can be merged, then so can their successors. This is important, because merging two states means committing to merging their successors, even though we have not even built these successors yet. The criterion used here is a slightly more restrictive version of Pager's criterion, which guarantees equality of the tokens [t] and [u]. This is done essentially by applying Pager's original criterion on a token-wise basis. Pager's original criterion states that two states can be merged if the new state has no conflict or one of the original states has a conflict. Our more restrictive criterion states that two states can be merged if, for every token [t], the new state has no conflict at [t] or one of the original states has a conflict at [t]. This modified criterion is also stable. My experiments show that it is almost as effective in practice: out of more than a hundred real-world sample grammars, only one automaton was affected, and only one extra state appeared as a result of using the modified criterion. Its advantage is to potentially make conflict explanations easier: if there appears to be a conflict at [t], then some conflict at [t] can be explained. This was not true when using Pager's original criterion. *) let compatible (k1, toksr1) (k2, toksr2) = assert (k1 = k2); let n = Array.length toksr1 in (* Two states are compatible if and only if they are compatible at every pair (i, j), where i and j are distinct. *) let rec loopi i = if i = n then true else let toksr1i = toksr1.(i) and toksr2i = toksr2.(i) in let rec loopj j = if j = i then true else let toksr1j = toksr1.(j) and toksr2j = toksr2.(j) in (* The two states are compatible at (i, j) if every conflict token in the merged state already was a conflict token in one of the two original states. This could be written as follows: TerminalSet.subset (TerminalSet.inter (TerminalSet.union toksr1i toksr2i) (TerminalSet.union toksr1j toksr2j)) (TerminalSet.union (TerminalSet.inter toksr1i toksr1j) (TerminalSet.inter toksr2i toksr2j)) but is easily seen (on paper) to be equivalent to: *) TerminalSet.subset (TerminalSet.inter toksr2i toksr1j) (TerminalSet.union toksr1i toksr2j) && TerminalSet.subset (TerminalSet.inter toksr1i toksr2j) (TerminalSet.union toksr2i toksr1j) && loopj (j+1) in loopj 0 && loopi (i+1) in loopi 0 (* This function determines whether two (core-equivalent) states can be merged without creating an end-of-stream conflict, now or in the future. The rule is, if an item appears in one state with the singleton "#" as its lookahead set, then its lookahead set in the other state must contain "#". So, either the second lookahead set is also the singleton "#", and no end-of-stream conflict exists, or it is larger, and the second state already contains an end-of-stream conflict. Put another way, we do not want to merge two lookahead sets when one contains "#" alone and the other does not contain "#". I invented this rule to complement Pager's criterion. I believe, but I am not 100% sure, that it does indeed prevent end-of-stream conflicts and that it is stable. Thanks to Sébastien Hinderer for reporting the bug caused by the absence of this extra criterion. *) let eos_compatible (k1, toksr1) (k2, toksr2) = assert (k1 = k2); let n = Array.length toksr1 in let rec loop i = if i = n then true else let toks1 = toksr1.(i) and toks2 = toksr2.(i) in begin if TerminalSet.mem Terminal.sharp toks1 && TerminalSet.is_singleton toks1 then (* "#" is alone in one set: it must be a member of the other set. *) TerminalSet.mem Terminal.sharp toks2 else if TerminalSet.mem Terminal.sharp toks2 && TerminalSet.is_singleton toks2 then (* Symmetric condition. *) TerminalSet.mem Terminal.sharp toks1 else true end && loop (i+1) in loop 0 (* This function determines whether two (core-equivalent) states can be merged without creating spurious reductions on the [error] token. The rule is, we merge two states only if they agree on which reductions are permitted on the [error] token. Without this restriction, we might end up in a situation where we decide to introduce an [error] token into the input stream and perform a reduction, whereas a canonical LR(1) automaton, confronted with the same input string, would fail normally -- that is, it would introduce an [error] token into the input stream, but it would not be able to perform a reduction right away: the current state would be discarded. In the interest of more accurate (or sane, or predictable) error handling, I decided to introduce this restriction as of 20110124. This will cause an increase in the size of automata for grammars that use the [error] token. It might actually make the [error] token somewhat easier to use. Note that two sets can be in the subsumption relation and still be error-incompatible. Error-compatibility requires equality of the lookahead sets, restricted to [error]. Thanks to Didier Rémy for reporting a bug caused by the absence of this extra criterion. *) let error_compatible (k1, toksr1) (k2, toksr2) = assert (k1 = k2); let n = Array.length toksr1 in let rec loop i = if i = n then true else let toks1 = toksr1.(i) and toks2 = toksr2.(i) in begin if TerminalSet.mem Terminal.error toks1 then (* [error] is a member of one set: it must be a member of the other set. *) TerminalSet.mem Terminal.error toks2 else if TerminalSet.mem Terminal.error toks2 then (* Symmetric condition. *) TerminalSet.mem Terminal.error toks1 else true end && loop (i+1) in loop 0 (* Union of two states. The two states must have the same core. The new state is obtained by pointwise union of the lookahead sets. *) let union (k1, toksr1) (k2, toksr2) = assert (k1 = k2); k1, Array.init (Array.length toksr1) (fun i -> TerminalSet.union toksr1.(i) toksr2.(i) ) (* Restriction of a state to a set of tokens of interest. Every lookahead set is intersected with that set. *) let restrict toks (k, toksr) = k, Array.map (fun toksri -> TerminalSet.inter toksri toks ) toksr menhir-20151112/src/lr1.mli0000644000175000017500000001367112621170073014167 0ustar mehdimehdiopen Grammar (* This module constructs an LR(1) automaton by following Pager's method, that is, by merging states on the fly when they are found to be (weakly) compatible. *) (* Shift/reduce conflicts are silently resolved when (and only when) that is allowed in a clean way by user-specified priorities. This includes shift/reduce/reduce conflicts when (and only when) there is agreement that the shift action should be preferred. Conflicts that cannot be silently resolved in this phase will be reported, explained, and arbitrarily resolved immediately before code generation. *) (* ------------------------------------------------------------------------- *) (* Accessors. *) (* This is the type of the automaton's nodes. *) type node module Node : Set.OrderedType with type t = node module NodeSet : Set.S with type elt = node module NodeMap : Map.S with type key = node (* These are the automaton's entry states, indexed by the start productions. *) val entry: node ProductionMap.t (* [fold_entry] folds over [entry]. For convenience, it gives access not only to the start production and start state, but also to the nonterminal symbol and to the OCaml type associated with this production. *) val fold_entry: (Production.index -> node -> Nonterminal.t -> Stretch.ocamltype -> 'a -> 'a) -> 'a -> 'a (* [entry_of_nt] maps a (user) non-terminal start symbol to the corresponding start state. [nt_of_entry] does the reverse. *) val entry_of_nt: Nonterminal.t -> node val nt_of_entry: node -> Nonterminal.t (* Nodes are numbered sequentially from [0] to [n-1]. *) val n: int val number: node -> int (* This provides access to the LR(1) state that a node stands for. *) val state: node -> Lr0.lr1state (* This converts a start node into the single item that it contains. *) val start2item: node -> Item.t (* This maps a node to its incoming symbol, that is, the symbol carried by all of the edges that enter this node. A node has zero incoming edges (and, thus, no incoming symbol) if and only if it is a start node. *) val incoming_symbol: node -> Symbol.t option (* This maps a node to its predecessors. *) val predecessors: node -> node list (* This provides access to a node's transitions and reductions. *) val transitions: node -> node SymbolMap.t val reductions: node -> Production.index list TerminalMap.t (* (New as of 2012/01/23.) This tells whether a shift/reduce conflict in this node was solved in favor of neither (%nonassoc). This implies that one must forbid a default reduction at this node. *) val forbid_default_reduction: node -> bool (* This inverts a mapping of tokens to productions into a mapping of productions to sets of tokens. *) val invert : ProductionMap.key list TerminalMap.t -> TerminalSet.t ProductionMap.t (* [has_beforeend s] tests whether the state [s] can reduce a production whose semantic action uses [$endpos($0)]. Note that [$startpos] and [$endpos] have been expanded away already, so we need not worry about the fact that (in an epsilon production) they expand to [$endpos($0)]. *) val has_beforeend: node -> bool (* Computing which terminal symbols a state is willing to act upon. This function is currently unused, but could be used as part of an error reporting system. *) val acceptable_tokens: node -> TerminalSet.t (* Iteration over all nodes. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [number] above. *) val fold: ('a -> node -> 'a) -> 'a -> 'a val iter: (node -> unit) -> unit val map: (node -> 'a) -> 'a list (* Iteration over non-start nodes *) val foldx: ('a -> node -> 'a) -> 'a -> 'a val iterx: (node -> unit) -> unit (* Iteration over all edges that carry a certain symbol. Edges are grouped in families, where all edges in a single family have the same target node. [targets f accu symbol] invokes [f accu sources target] once for every family, where [sources] are the sources of the edges in the family and [target] is their common target. *) val targets: ('a -> node list -> node -> 'a) -> 'a -> Symbol.t -> 'a (* Iteration over all nodes with conflicts. [conflicts f] invokes [f toks node] once for every node [node] with a conflict, where [toks] are the tokens involved in the conflicts at that node. *) val conflicts: (TerminalSet.t -> node -> unit) -> unit (* [reverse_dfs goal] performs a reverse depth-first search through the automaton, starting at node [goal], and marking the nodes traversed. It returns a function that tells whether a node is marked, that is, whether a path leads from that node to the goal node. *) val reverse_dfs: node -> (node -> bool) (* ------------------------------------------------------------------------- *) (* Modifications of the automaton. *) (* This function performs default conflict resolution. First, it resolves standard (shift/reduce and reduce/reduce) conflicts (thus ensuring that the automaton is deterministic) by removing some reduction actions. Second, it resolves end-of-stream conflicts by ensuring that states that have a reduce action at the pseudo-token "#" have no other action. It is called after conflicts have been explained and before code generation takes place. The automaton is modified in place. *) val default_conflict_resolution: unit -> unit (* This function adds extra reduction actions in the face of an error, if requested by the user via [%on_error_reduce]. *) (* It must be called after conflict resolution has taken place. The automaton is modified in place. *) (* If a state can reduce only one production, whose left-hand symbol has been declared [%on_error_reduce], then every error action in this state is replaced with a reduction action. This is done even though this state may have outgoing shift transitions: thus, we are forcing one interpretation of the past, among several possible interpretations. *) val extra_reductions: unit -> unit menhir-20151112/src/derivation.ml0000644000175000017500000002100212621170073015447 0ustar mehdimehdiopen Grammar (* -------------------------------------------------------------------------- *) (* This is a data structure for linear derivation trees. These are derivation trees that are list-like (that is, they do not branch), because a single path is of interest. A tree is either empty or formed of a non-terminal symbol at the root and a forest below the root. A forest is an ordered list of elements. However, its elements are not trees, as one would perhaps expect. Because we are interested in *linear* derivation trees, only one element of the forest receives focus and is a tree. All other elements remain un-expanded, so they are just symbols. In other words, a linear derivation tree is roughly just a list of levels, where each forest corresponds to one level. *) type 'focus level = { prefix: Symbol.t list; focus: 'focus; suffix: Symbol.t list; comment: string } type tree = | TEmpty | TRooted of Symbol.t * forest and forest = tree level (* We make use of contexts with a forest-shaped hole. We have tree contexts and forest contexts. Tree contexts do not have a case for holes, since we work with forest-shaped holes only. Forest contexts have one. *) type ctree = | CRooted of Symbol.t * cforest and cforest = | CHole | CCons of ctree level (* Make a few types visible to clients. *) type t = forest type context = cforest (* -------------------------------------------------------------------------- *) (* Construction. *) let rec array_to_list a i j = if i = j then [] else a.(i) :: array_to_list a (i + 1) j let empty = { prefix = []; focus = TEmpty; suffix = []; comment = "" } let tail pos rhs = let length = Array.length rhs in assert (pos < length); { prefix = []; focus = TEmpty; suffix = array_to_list rhs pos length; comment = "" } let build pos rhs forest comment = let length = Array.length rhs in assert (pos < length); match rhs.(pos) with | Symbol.T _ -> assert false | Symbol.N _ as symbol -> { prefix = []; focus = TRooted (symbol, forest); suffix = array_to_list rhs (pos + 1) length; comment = (match comment with None -> "" | Some comment -> comment) } let prepend symbol forest = { forest with prefix = symbol :: forest.prefix } (* -------------------------------------------------------------------------- *) (* Display. *) let buffer = Buffer.create 32768 let rec print_blank k = if k > 0 then begin Buffer.add_char buffer ' '; print_blank (k - 1) end let print_symbol symbol = let word = Symbol.print symbol in Buffer.add_string buffer word; Buffer.add_char buffer ' '; String.length word + 1 let print_symbols symbols = List.fold_left (fun offset symbol -> offset + print_symbol symbol ) 0 symbols let print_level print_focus_root print_focus_remainder offset forest = print_blank offset; let offset = offset + print_symbols forest.prefix in print_focus_root forest.focus; let (_ : int) = print_symbols forest.suffix in if String.length forest.comment > 0 then begin Buffer.add_string buffer "// "; Buffer.add_string buffer forest.comment end; Buffer.add_char buffer '\n'; print_focus_remainder offset forest.focus let print_tree_root = function | TEmpty -> Buffer.add_string buffer ". " | TRooted (symbol, _) -> let (_ : int) = print_symbol symbol in () let rec print_forest offset forest = print_level print_tree_root print_tree_remainder offset forest and print_tree_remainder offset = function | TEmpty -> () | TRooted (_, forest) -> print_forest offset forest let print_ctree_root = function | CRooted (symbol, _) -> let (_ : int) = print_symbol symbol in () let rec print_cforest offset cforest = match cforest with | CHole -> print_blank offset; Buffer.add_string buffer "(?)\n" | CCons forest -> print_level print_ctree_root print_ctree_remainder offset forest and print_ctree_remainder offset = function | CRooted (_, cforest) -> print_cforest offset cforest let wrap print channel x = Buffer.clear buffer; print 0 x; Buffer.output_buffer channel buffer let print = wrap print_forest let printc = wrap print_cforest (* -------------------------------------------------------------------------- *) (* [punch] turns a (tree or forest) into a pair of a (tree or forest) context and a residual forest, where the context is chosen maximal. In other words, the residual forest consists of a single level -- its focus is [TEmpty]. *) let rec punch_tree tree : (ctree * forest) option = match tree with | TEmpty -> None | TRooted (symbol, forest) -> let forest1, forest2 = punch_forest forest in Some (CRooted (symbol, forest1), forest2) and punch_forest forest : cforest * forest = match punch_tree forest.focus with | None -> CHole, forest | Some (ctree1, forest2) -> CCons { prefix = forest.prefix; focus = ctree1; suffix = forest.suffix; comment = forest.comment }, forest2 (* [fill] fills a (tree or forest) context with a forest so as to produce a new (tree or forest). *) let rec fill_tree ctree1 forest2 : tree = match ctree1 with | CRooted (symbol1, cforest1) -> TRooted (symbol1, fill_forest cforest1 forest2) and fill_forest cforest1 forest2 : forest = match cforest1 with | CHole -> forest2 | CCons level1 -> { prefix = level1.prefix; focus = fill_tree level1.focus forest2; suffix = level1.suffix; comment = level1.comment } (* [common] factors the maximal common (tree or forest) context out of a pair of a (tree or forest) context and a (tree or forest). It returns the (tree or forest) context as well as the residuals of the two parameters. *) let rec common_tree ctree1 tree2 : (ctree * cforest * forest) option = match ctree1, tree2 with | CRooted _, TEmpty -> None | CRooted (symbol1, cforest1), TRooted (symbol2, forest2) -> if Symbol.equal symbol1 symbol2 then let cforest, cforest1, forest2 = common_forest cforest1 forest2 in Some (CRooted (symbol1, cforest), cforest1, forest2) else None and common_forest cforest1 forest2 : cforest * cforest * forest = match cforest1 with | CHole -> CHole, cforest1, forest2 | CCons forest1 -> if Symbol.lequal forest1.prefix forest2.prefix && Symbol.lequal forest1.suffix forest2.suffix && forest1.comment = forest2.comment then begin match common_tree forest1.focus forest2.focus with | None -> CHole, cforest1, forest2 | Some (ctree, csubforest1, subforest2) -> let cforest = { prefix = forest1.prefix; focus = ctree; suffix = forest1.suffix; comment = forest1.comment } in CCons cforest, csubforest1, subforest2 end else CHole, cforest1, forest2 (* [factor] factors the maximal common forest context out of a nonempty family of forests. We assume that the family is represented as a map indexed by items, because this is convenient for the application that we have in mind, but this assumption is really irrelevant. *) let factor forests = match Item.Map.fold (fun item forest accu -> match accu with | None -> (* First time through the loop, so [forest] is the first forest that we examine. Punch it, so as to produce a maximal forest context and a residual forest. *) let context, residual = punch_forest forest in Some (context, Item.Map.singleton item residual) | Some (context, residuals) -> (* Another iteration through the loop. [context] and [residuals] are the maximal common context and the residuals of the forests examined so far. *) (* Combine the common context obtained so far with the forest at hand. This yields a new, smaller common context, as well as residuals for the previous common context and for the forest at hand. *) let context, contextr, forestr = common_forest context forest in (* The residual forests are now: (i) the residual forest [forestr]; and (ii) the previous residual forests [residuals], each of which must be placed with the residual context [contextr]. *) let residuals = Item.Map.add item forestr (Item.Map.map (fill_forest contextr) residuals) in Some (context, residuals) ) forests None with | None -> assert false (* parameter [forests] was an empty map *) | Some (context, residuals) -> context, residuals menhir-20151112/src/menhirLib.mlpack0000644000175000017500000000060312621170073016057 0ustar mehdimehdi# This is the list of modules that must go into MenhirLib. # Thy must be listed in dependency order, as this list is # used to construct menhirLib.ml at installation time. General Convert IncrementalEngine EngineTypes Engine Printers InfiniteArray PackedIntArray RowDisplacement LinearizedArray TableFormat InspectionTableFormat InspectionTableInterpreter TableInterpreter StaticVersion menhir-20151112/src/parserAux.mli0000644000175000017500000000557712621170073015451 0ustar mehdimehdi(* This module provides utilities that are shared by the two versions of the parser. *) open Syntax (* [new_precedence_level pos1 pos2] creates a new precendence level, which is stronger than any levels previously created by this function. It should be called every time a [%left], [%right], or [%nonassoc] declaration is found. The positions are the positions of this declaration in the source code. The precedence levels created by this function are attached to tokens and (via %prec) to productions. They are used in solving shift/reduce and shift/reduce/reduce conflicts. *) val new_precedence_level: Lexing.position -> Lexing.position -> precedence_level (* [new_production_level()] creates a new production level, which is stronger than any levels previously created by this function. It should be called every time a new production is found. The production levels created by this function are attached to productions. They are used in solving reduce/reduce conflicts: following ocamlyacc and bison, the production that appears first in the grammar receives preference. It may seem very strange that %prec annotations do not influence this process, but that's how it is, at least for the moment. *) val new_production_level: unit -> branch_production_level (* [check_production_group] accepts a production group and checks that all productions in the group define the same set of identifiers. *) val check_production_group: ((Positions.t * identifier Positions.located option * parameter) list * 'a * 'b * 'c) list -> unit (* [normalize_producers] accepts a list of producers where identifiers are optional and returns a list of producers where identifiers are mandatory. A missing identifier in the [i]-th position receives the conventional name [_i]. *) val normalize_producers: (Positions.t * identifier Positions.located option * parameter) list -> producer list (* [override pos oprec1 oprec2] decides which of the two optional %prec declarations [oprec1] and [oprec2] applies to a production. It signals an error if the two are present. *) val override: Positions.t -> 'a option -> 'a option -> 'a option (* Support for on-the-fly expansion of anonymous rules. When such a rule is encountered, invoke [anonymous], which creates a fresh non-terminal symbol, records the definition of this symbol to a global variable, and returns this symbol. In the end, invoke [rules], so as to obtain a list of all recorded definitions. *) val anonymous: Positions.t -> parameterized_branch list -> string val rules: unit -> parameterized_rule list (* [producer_names producers] returns an array [names] such that [names.(idx) = None] if the (idx + 1)-th producer is unnamed and [names.(idx) = Some id] if it is called [id]. *) val producer_names : (_ * Syntax.identifier Positions.located option * _) list -> Syntax.identifier option array menhir-20151112/src/positions.ml0000644000175000017500000000610012621170073015334 0ustar mehdimehdi(* TEMPORARY vérifier que ces fonctions sont utilisées partout et de façon cohérente; interaction avec [Error]? *) open Lexing type t = { start_p : Lexing.position; end_p : Lexing.position } type 'a located = { value : 'a; position : t; } let value { value = v } = v let position { position = p } = p let with_pos p v = { value = v; position = p; } let with_poss p1 p2 v = with_pos { start_p = p1; end_p = p2 } v let map f v = { value = f v.value; position = v.position; } let iter f { value = v } = f v let mapd f v = let w1, w2 = f v.value in let pos = v.position in { value = w1; position = pos }, { value = w2; position = pos } let dummy = { start_p = Lexing.dummy_pos; end_p = Lexing.dummy_pos } let unknown_pos v = { value = v; position = dummy } let start_of_position p = p.start_p let end_of_position p = p.end_p let filename_of_position p = p.start_p.Lexing.pos_fname let line p = p.pos_lnum let column p = p.pos_cnum - p.pos_bol let characters p1 p2 = (column p1, p2.pos_cnum - p1.pos_bol) (* intentionally [p1.pos_bol] *) let join x1 x2 = { start_p = if x1 = dummy then x2.start_p else x1.start_p; end_p = if x2 = dummy then x1.end_p else x2.end_p } let lex_join x1 x2 = { start_p = x1; end_p = x2 } let join_located l1 l2 f = { value = f l1.value l2.value; position = join l1.position l2.position; } let string_of_lex_pos p = let c = p.pos_cnum - p.pos_bol in (string_of_int p.pos_lnum)^":"^(string_of_int c) let string_of_pos p = let filename = filename_of_position p in (* [filename] is hopefully not "". *) let l = line p.start_p in let c1, c2 = characters p.start_p p.end_p in Printf.sprintf "File \"%s\", line %d, characters %d-%d" filename l c1 c2 let pos_or_undef = function | None -> dummy | Some x -> x let cpos lexbuf = { start_p = Lexing.lexeme_start_p lexbuf; end_p = Lexing.lexeme_end_p lexbuf; } let with_cpos lexbuf v = with_pos (cpos lexbuf) v let string_of_cpos lexbuf = string_of_pos (cpos lexbuf) let joinf f t1 t2 = join (f t1) (f t2) let ljoinf f = List.fold_left (fun p t -> join p (f t)) dummy let join_located_list ls f = { value = f (List.map (fun l -> l.value) ls); position = ljoinf (fun x -> x.position) ls } (* The functions that print error messages and warnings require a list of positions. The following auxiliary functions help build such lists. *) type positions = t list let one (pos : Lexing.position) : positions = [ { start_p = pos; end_p = pos } ] (* or: lex_join pos pos *) let two (pos1 : Lexing.position) (pos2 : Lexing.position) : positions = [ lex_join pos1 pos2 ] let lexbuf (lexbuf : Lexing.lexbuf) : positions = [ lex_join lexbuf.Lexing.lex_start_p lexbuf.Lexing.lex_curr_p ] let print (pos : Lexing.position) = Printf.printf "{ pos_fname = \"%s\"; pos_lnum = %d; pos_bol = %d; pos_cnum = %d }\n" pos.pos_fname pos.pos_lnum pos.pos_bol pos.pos_cnum menhir-20151112/src/lexmli.mll0000644000175000017500000000312312621170073014755 0ustar mehdimehdi(* This code analyzes the output of [ocamlc -i] and returns a list of identifiers together with their types. Types are represented by offsets in the source string. *) { let fail () = Error.error [] "failed to make sense of ocamlc's output." } let whitespace = [ ' ' '\t' '\n' '\r' ] let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *) (* Read a list of bindings. We start immediately after a [val] keyword, so we expect either an end marker, or an identifier, followed by a colon, followed by a type, followed by another list of bindings. In the latter case, we recognize the identifier and the colon, record where the type begins, and pass control to [type_then_bindings]. *) rule bindings env = parse | "menhir_end_marker : int" { env } | whitespace* ((lowercase identchar*) as id) whitespace* ':' whitespace* { type_then_bindings env id (Lexing.lexeme_end lexbuf) lexbuf } | _ | eof { fail() } (* Read a type followed by a list of bindings. *) and type_then_bindings env id openingofs = parse | whitespace+ "val" whitespace { let closingofs = Lexing.lexeme_start lexbuf in bindings ((id, openingofs, closingofs) :: env) lexbuf } | _ { type_then_bindings env id openingofs lexbuf } | eof { fail() } (* Skip up to the first [val] keyword that follows the begin marker, and start from there. *) and main = parse | _* "val menhir_begin_marker : int" whitespace+ "val" whitespace+ { bindings [] lexbuf } | _ | eof { fail() } menhir-20151112/src/LRijkstra.ml0000644000175000017500000014572312621170073015231 0ustar mehdimehdi(* The purpose of this algorithm is to find, for each pair of a state [s] and a terminal symbol [z] such that looking at [z] in state [s] causes an error, a minimal path (starting in some initial state) that actually triggers this error. *) (* This is potentially useful for grammar designers who wish to better understand the properties of their grammar, or who wish to produce a list of all possible syntax errors (or, at least, one syntax error in each automaton state where an error may occur). *) (* The problem seems rather tricky. One might think that it suffices to compute shortest paths in the automaton, and to use [Analysis.minimal] to replace each non-terminal symbol in a path with a minimal word that this symbol generates. One can indeed do so, but this yields only a lower bound on the actual shortest path to the error at [s, z]. Indeed, several difficulties arise, including the fact that reductions are subject to a lookahead hypothesis; the fact that some states have a default reduction, hence will never trigger an error; the fact that conflict resolution removes some (shift or reduce) actions, hence may suppress the shortest path. *) (* We explicitly choose to ignore the [error] token. Thus, we disregard any reductions or transitions that take place when the lookahead symbol is [error]. As a result, any state whose incoming symbol is [error] is found unreachable. It would be too complicated to have to create a first error in order to be able to take certain transitions or drop certain parts of the input. *) (* We never work with the terminal symbol [#] either. This symbol never appears in the maps returned by [Lr1.transitions] and [Lr1.reductions]. Thus, in principle, we work with ``real'' terminal symbols only. However, we encode [any] as [#] -- see below. *) (* NOTE: THIS FILE IS COMPILED WITH -noassert BY DEFAULT. If you would like the assertions to be tested at runtime, change that in the file _tags. The performance impact of the assertions is about 10%. *) (* ------------------------------------------------------------------------ *) (* To delay the side effects performed by this module, we wrap everything in in a big functor. The functor also serves to pass verbosity parameters. *) module Run (X : sig (* If [verbose] is set, produce various messages on [stderr]. *) val verbose: bool (* If [statistics] is defined, it is interpreted as the name of a file to which one line of statistics is appended. *) val statistics: string option end) = struct open Grammar (* ------------------------------------------------------------------------ *) (* Record our start time. *) let now () = match X.statistics with | Some _ -> Unix.((times()).tms_utime) | None -> 0.0 let start = now() (* ------------------------------------------------------------------------ *) (* Because of our encoding of terminal symbols as 8-bit characters, this algorithm supports at most 256 terminal symbols. *) let () = if Terminal.n > 256 then Error.error [] "--list-errors supports at most 256 terminal symbols.\n\ The grammar has %d terminal symbols." Terminal.n (* ------------------------------------------------------------------------ *) (* Build a module that represents words as (hash-consed) strings. Note: this functor application has a side effect (it allocates memory, and more importantly, it may fail). *) module W = Terminal.Word(struct end) (* ------------------------------------------------------------------------ *) (* The [error] token may appear in the maps returned by [Lr1.transitions] and [Lr1.reductions], so we sometimes need to explicitly check for it. *) let non_error z = not (Terminal.equal z Terminal.error) (* We introduce a pseudo-terminal symbol [any]. It is used in several places later on, in particular in the [lookahead] field of a fact, to encode the absence of a lookahead hypothesis -- i.e., any terminal symbol will do. *) (* We choose to encode [any] as [#]. There is no risk of confusion, since we do not use [#] anywhere. Thus, the assertion [Terminal.real z] implies [z <> any]. *) let any = Terminal.sharp (* ------------------------------------------------------------------------ *) (* We begin with a number of auxiliary functions that provide information about the LR(1) automaton. These functions could perhaps be moved to a separate module. We keep them here, for the moment, because they are not used anywhere else. *) (* [reductions_on s z] is the list of reductions permitted in state [s] when the lookahead symbol is [z]. This is a list of zero or one elements. This does not take default reductions into account. *) let reductions_on s z : Production.index list = assert (Terminal.real z); try TerminalMap.find z (Lr1.reductions s) with Not_found -> [] (* [has_reduction s z] tells whether state [s] is willing to reduce some production (and if so, which one) when the lookahead symbol is [z]. It takes a possible default reduction into account. *) let has_reduction s z : Production.index option = assert (Terminal.real z); match Invariant.has_default_reduction s with | Some (prod, _) -> Some prod | None -> match reductions_on s z with | prod :: prods -> assert (prods = []); Some prod | [] -> None (* [can_reduce s prod] indicates whether state [s] is able to reduce production [prod] (either as a default reduction, or as a normal reduction). *) let can_reduce s prod = match Invariant.has_default_reduction s with | Some (prod', _) when prod = prod' -> true | _ -> TerminalMap.fold (fun z prods accu -> (* A reduction on [#] is always a default reduction. (See [lr1.ml].) *) assert (not (Terminal.equal z Terminal.sharp)); accu || non_error z && List.mem prod prods ) (Lr1.reductions s) false (* [causes_an_error s z] tells whether state [s] will initiate an error on the lookahead symbol [z]. *) let causes_an_error s z : bool = assert (Terminal.real z); match Invariant.has_default_reduction s with | Some _ -> false | None -> reductions_on s z = [] && not (SymbolMap.mem (Symbol.T z) (Lr1.transitions s)) (* [foreach_terminal f] applies the function [f] to every terminal symbol in turn, except [error] and [#]. *) let foreach_terminal = Terminal.iter_real (* [foreach_terminal_not_causing_an_error s f] applies the function [f] to every terminal symbol [z] such that [causes_an_error s z] is false. This could be implemented in a naive manner using [foreach_terminal] and [causes_an_error]. This implementation is significantly more efficient. *) let foreach_terminal_not_causing_an_error s f = match Invariant.has_default_reduction s with | Some _ -> (* There is a default reduction. No symbol causes an error. *) foreach_terminal f | None -> (* Enumerate every terminal symbol [z] for which there is a reduction. *) TerminalMap.iter (fun z _ -> (* A reduction on [#] is always a default reduction. (See [lr1.ml].) *) assert (not (Terminal.equal z Terminal.sharp)); if non_error z then f z ) (Lr1.reductions s); (* Enumerate every terminal symbol [z] for which there is a transition. *) SymbolMap.iter (fun sym _ -> match sym with | Symbol.T z -> assert (not (Terminal.equal z Terminal.sharp)); if non_error z then f z | Symbol.N _ -> () ) (Lr1.transitions s) (* Let us say a state [s] is solid if its incoming symbol is a terminal symbol (or if it has no incoming symbol at all, i.e., it is an initial state). It is fragile if its incoming symbol is a non-terminal symbol. *) let is_solid s = match Lr1.incoming_symbol s with | None | Some (Symbol.T _) -> true | Some (Symbol.N _) -> false (* [reduction_path_exists s w prod] tests whether the path determined by the sequence of symbols [w] out of the state [s] exists in the automaton and leads to a state where [prod] can be reduced. It further requires [w] to not contain the [error] token. Finally, it it sees the [error] token, it sets the flag [grammar_uses_error]. *) let grammar_uses_error = ref false let rec reduction_path_exists s (w : Symbol.t list) prod : bool = match w with | [] -> can_reduce s prod | (Symbol.T t) :: _ when Terminal.equal t Terminal.error -> grammar_uses_error := true; false | a :: w -> match SymbolMap.find a (Lr1.transitions s) with | s -> reduction_path_exists s w prod | exception Not_found -> false (* ------------------------------------------------------------------------ *) (* Suppose [s] is a state that carries an outgoing edge labeled with a non-terminal symbol [nt]. We are interested in finding out how this edge can be taken. In order to do that, we must determine how, by starting in [s], one can follow a path that corresponds to (the right-hand side of) a production [prod] associated with [nt]. There are in general several such productions. The paths that they determine in the automaton form a "star". We represent the star rooted at [s] as a trie. For every state [s], the star rooted at [s] is constructed in advance, before the algorithm runs. While the algorithm runs, a point in the trie (that is, a sub-trie) tells us where we come from, where we are, and which production(s) we are hoping to reduce in the future. *) module Trie : sig type trie (* [star s] creates a (new) trie whose source is [s], populated with its branches. (There is one branch for every production [prod] associated with every non-terminal symbol [nt] for which [s] carries an outgoing edge.) If the star turns out to be trivial then [None] is returned. *) val star: Lr1.node -> trie option (* After [star s] has been called, [size (Lr1.number s)] reports the size of the trie that has been constructed for state [s]. *) val size: int -> int (* After [star] has been called a number of times, [total_size()] reports the total size of the tries that have been constructed. *) val total_size: unit -> int (* Every (sub-)trie has a unique identity. (One can think of it as its address.) [compare] compares the identity of two tries. This can be used, e.g., to set up a map whose keys are tries. *) val compare: trie -> trie -> int (* [source t] returns the source state of the (sub-)trie [t]. This is the root of the star of which [t] is a sub-trie. In other words, this tells us "where we come from". *) val source: trie -> Lr1.node (* [current t] returns the current state of the (sub-)trie [t]. This is the root of the sub-trie [t]. In other words, this tells us "where we are". *) val current: trie -> Lr1.node (* [accepts prod t] tells whether the current state of the trie [t] is the end of a branch associated with production [prod]. If so, this means that we have successfully followed a path that corresponds to the right-hand side of production [prod]. *) val accepts: Production.index -> trie -> bool (* [step sym t] is the immediate sub-trie of [t] along the symbol [sym]. This function raises [Not_found] if [t] has no child labeled [sym]. *) val step: Symbol.t -> trie -> trie (* [verbose()] outputs debugging & performance information. *) val verbose: unit -> unit (* Since every (sub-)trie has a unique identity, its identity can serve as a unique integer code for this (sub-)trie. We allow this conversion, both ways. This mechanism is used only as a way of saving space in the encoding of facts. *) val encode: trie -> int val decode: int -> trie end = struct (* A trie has the following structure. *) type trie = { (* A unique identity, used by [compare]. The trie construction code ensures that these numbers are indeed unique: see [fresh], [insert], [star]. *) identity: int; (* The root state of this star: "where we come from". *) source: Lr1.node; (* The current state, i.e., the root of this sub-trie: "where we are". *) current: Lr1.node; (* The productions that we can reduce in the current state. In other words, if this list is nonempty, then the current state is the end of one (or several) branches. It can nonetheless have children. *) mutable productions: Production.index list; (* The children, or sub-tries. *) mutable transitions: trie SymbolMap.t (* The two fields above are written only during the construction of a trie. Once every trie has been constructed, they are frozen. *) } (* This counter is used by [mktrie] to produce unique identities. *) let c = ref 0 (* We keep a mapping of integer identities to tries. Whenever a new identity is assigned, this mapping must be updated. *) let tries = let s : Lr1.node = Obj.magic () in (* yes, this hurts *) let dummy = { identity = -1; source = s; current = s; productions = []; transitions = SymbolMap.empty } in MenhirLib.InfiniteArray.make dummy (* This smart constructor creates a new trie with a unique identity. *) let mktrie source current productions transitions = let identity = Misc.postincrement c in let t = { identity; source; current; productions; transitions } in MenhirLib.InfiniteArray.set tries identity t; t (* [insert t w prod] updates the trie (in place) by adding a new branch, corresponding to the sequence of symbols [w], and ending with a reduction of production [prod]. We assume [reduction_path_exists w prod t.current] holds, so we need not worry about this being a dead branch, and we can use destructive updates without having to set up an undo mechanism. *) let rec insert (t : trie) (w : Symbol.t list) prod : unit = match w with | [] -> assert (can_reduce t.current prod); t.productions <- prod :: t.productions | a :: w -> match SymbolMap.find a (Lr1.transitions t.current) with | exception Not_found -> assert false | successor -> (* Find our child at [a], or create it. *) let t' = try SymbolMap.find a t.transitions with Not_found -> let t' = mktrie t.source successor [] SymbolMap.empty in t.transitions <- SymbolMap.add a t' t.transitions; t' in (* Update our child. *) insert t' w prod (* [insert t prod] inserts a new branch, corresponding to production [prod], into the trie [t], which is updated in place. *) let insert t prod : unit = let w = Array.to_list (Production.rhs prod) in (* Check whether the path [w] leads to a state where [prod] can be reduced. If not, then some transition or reduction action must have been suppressed by conflict resolution; or the path [w] involves the [error] token. In that case, the branch is dead, and is not added. This test is superfluous (i.e., it would be OK to add a dead branch) but allows us to build a slightly smaller star in some cases. *) if reduction_path_exists t.current w prod then insert t w prod (* [fresh s] creates a new empty trie whose source is [s]. *) let fresh source = mktrie source source [] SymbolMap.empty (* The star at [s] is obtained by starting with a fresh empty trie and inserting into it every production [prod] whose left-hand side [nt] is the label of an outgoing edge at [s]. *) let star s = let t = fresh s in SymbolMap.iter (fun sym _ -> match sym with | Symbol.T _ -> () | Symbol.N nt -> Production.iternt nt (insert t) ) (Lr1.transitions s); t (* A trie [t] is nontrivial if it has at least one branch, i.e., contains at least one sub-trie whose [productions] field is nonempty. Trivia: a trie of size greater than 1 is necessarily nontrivial, but the converse is not true: a nontrivial trie can have size 1. (This occurs if all productions have zero length.) *) let trivial t = t.productions = [] && SymbolMap.is_empty t.transitions (* Redefine [star] to include a [nontrivial] test and to record the size of the newly built trie. *) let size = Array.make Lr1.n (-1) let star s = let initial = !c in let t = star s in let final = !c in size.(Lr1.number s) <- final - initial; if trivial t then None else Some t let size s = assert (size.(s) >= 0); size.(s) let total_size () = !c let compare t1 t2 = Pervasives.compare t1.identity t2.identity let source t = t.source let current t = t.current let accepts prod t = List.mem prod t.productions let step a t = SymbolMap.find a t.transitions (* careful: may raise [Not_found] *) let verbose () = Printf.eprintf "Total star size: %d\n%!" (total_size()) let decode i = let t = MenhirLib.InfiniteArray.get tries i in assert (t.identity = i); (* ensure we do not get the [dummy] trie *) t let encode t = assert (decode t.identity == t); (* round-trip property *) t.identity end (* ------------------------------------------------------------------------ *) (* The main algorithm, [LRijkstra], accumulates facts. A fact is a triple of a [position] (that is, a sub-trie), a [word], and a [lookahead] assumption. Such a fact means that this [position] can be reached, from the source state [Trie.source position], by consuming [word], under the assumption that the next input symbol is [lookahead]. *) (* We allow [lookahead] to be [any] so as to indicate that this fact does not have a lookahead assumption. *) (* type fact = { position: Trie.trie; word: W.word; lookahead: Terminal.t (* may be [any] *) } *) (* To save memory (and therefore time), we encode a fact in a single OCaml integer value. This is made possible by the fact that tries, words, and terminal symbols are represented as (or can be encoded as) integers. This admittedly horrible hack allows us to save roughly a factor of 2 in space, and to gain 10% in time. *) type fact = int let dummy : fact = -1 (* should never be accessed! *) (* Encoding and decoding facts. *) (* We encode [position|word|lookahead] in a single word of memory. *) (* The lookahead symbol fits in 8 bits. *) (* In the largest grammars that we have seen, the number of unique words is about 3.10^5, so a word should fit in about 19 bits (2^19 = 524288). In the largest grammars that we have seen, the total star size is about 64000, so a trie should fit in about 17 bits (2^17 = 131072). *) (* On a 64-bit machine, we have ample space in a 63-bit word! We allocate 30 bits for [word] and the rest (i.e., 25 bits) for [position]. *) (* On a 32-bit machine, we are a bit more cramped! In Menhir's own fancy-parser, the number of terminal symbols is 27, the number of unique words is 566, and the total star size is 546. We allocate 12 bits for [word] and 11 bits for [position]. This is better than refusing to work altogether, but still not great. A more satisfactory approach might be to revert to heap allocation of facts when in 32-bit mode, but that would make the code somewhat ugly. *) let w_lookahead = 8 let w_word = if Sys.word_size < 64 then 12 else 30 let w_position = Sys.word_size - 1 - (w_word + w_lookahead) (* 25, on a 64-bit machine *) let identity (fact : fact) : int = assert (fact <> dummy); fact lsr (w_word + w_lookahead) let position (fact : fact) : Trie.trie = assert (fact <> dummy); Trie.decode (identity fact) let word (fact : fact) : W.word = assert (fact <> dummy); (fact lsr w_lookahead) land (1 lsl w_word - 1) let lookahead (fact : fact) : Terminal.t = Terminal.i2t (fact land (1 lsl w_lookahead - 1)) let mkfact position (word : W.word) lookahead = let position : int = Trie.encode position and word : int = word and lookahead : int = Terminal.t2i lookahead in assert (0 <= position && 0 <= word && 0 <= lookahead); assert (lookahead < 1 lsl w_lookahead); if position < 1 lsl w_position && word < 1 lsl w_word then (* [lsl] binds tighter than [lor] *) (position lsl w_word lor word) lsl w_lookahead lor lookahead else let advice = if Sys.word_size < 64 then "Please use a 64-bit machine." else "Please report this error to Menhir's developers." in Error.error [] "an internal limit was exceeded.\n\ Sys.word_size = %d. Position = %d. Word = %d.\n\ %s%!" Sys.word_size position word advice let mkfact p w l = let fact = mkfact p w l in assert (word fact == w); (* round-trip property *) assert (lookahead fact == l); (* round-trip property *) assert (position fact == p); (* round-trip property *) fact (* Two invariants reduce the number of facts that we consider: 1. If [lookahead] is a real terminal symbol [z] (i.e., not [any]), then [z] does not cause an error in the [current] state. It would be useless to consider a fact that violates this property; this cannot possibly lead to a successful reduction. In practice, this refinement allows reducing the number of facts that go through the queue by a factor of two. 2. [lookahead] is [any] iff the [current] state is solid. This sounds rather reasonable (when a state is entered by shifting, it is entered regardless of which symbol follows) and simplifies the implementation of the sub-module [F]. *) let invariant1 position _word lookahead = let current = Trie.current position in lookahead = any || not (causes_an_error current lookahead) let invariant2 position _word lookahead = let current = Trie.current position in (lookahead = any) = is_solid current (* [compatible z a] checks whether the terminal symbol [a] satisfies the lookahead assumption [z] -- which can be [any]. *) let compatible z a = assert (non_error z); assert (Terminal.real a); z = any || z = a (* ------------------------------------------------------------------------ *) (* As in Dijkstra's algorithm, a priority queue contains the facts that await examination. The length of [word fact] serves as the priority of a fact. This guarantees that we discover shortest paths. (We never insert into the queue a fact whose priority is less than the priority of the last fact extracted out of the queue.) *) (* [LowIntegerPriorityQueue] offers very efficient operations (essentially constant time, for a small constant). It exploits the fact that priorities are low nonnegative integers. *) module Q = LowIntegerPriorityQueue let q = Q.create dummy (* In principle, there is no need to insert the fact into the queue if [F] already stores a comparable fact. We could perform this test in [enqueue]. However, a few experiments suggests that this is not worthwhile. The run time augments (because membership in [F] is tested twice, upon inserting and upon extracting) and the memory consumption does not seem to go down significantly. *) let enqueue position word lookahead = (* [lookahead] can be [any], but cannot be [error] *) assert (non_error lookahead); assert (invariant1 position word lookahead); assert (invariant2 position word lookahead); (* The length of [word] serves as the priority of this fact. *) let priority = W.length word in (* Encode and enqueue this fact. *) Q.add q (mkfact position word lookahead) priority (* ------------------------------------------------------------------------ *) (* Construct the [star] of every state [s]. Initialize the priority queue. *) let () = (* For every state [s]... *) Lr1.iter (fun s -> (* If the trie rooted at [s] is nontrivial...*) match Trie.star s with | None -> () | Some position -> (* ...then insert an initial fact into the priority queue. *) (* In order to respect invariants 1 and 2, we must distinguish two cases. If [s] is solid, then we insert a single fact, whose lookahead assumption is [any]. Otherwise, we must insert one initial fact for every terminal symbol [z] that does not cause an error in state [s]. *) let word = W.epsilon in if is_solid s then enqueue position word any else foreach_terminal_not_causing_an_error s (fun z -> enqueue position word z ) ); if X.verbose then Trie.verbose() (* Produce a warning if the grammar uses the [error] pseudo-token. *) let () = if !grammar_uses_error then Error.warning [] "--list-errors ignores all productions that involve the error token." (* ------------------------------------------------------------------------ *) (* The module [F] maintains a set of known facts. *) (* Three aspects of a fact are of particular interest: - its position [position], given by [position fact]; - its first symbol [a], given by [W.first (word fact) (lookahead fact)]; - its lookahead assumption [z], given by [lookahead fact]. For every triple of [position], [a], and [z], we store at most one fact, (whose word has minimal length). Indeed, we are not interested in keeping track of several words that produce the same effect. Only the shortest such word is of interest. Thus, the total number of facts accumulated by the algorithm is at most [T.n^2], where [T] is the total size of the tries that we have constructed, and [n] is the number of terminal symbols. (This number can be quite large. [T] can be in the tens of thousands, and [n] can be over one hundred. These figures lead to a theoretical upper bound of 100M. In practice, for T=25K and n=108, we observe that the algorithm gathers about 7M facts.) *) module F : sig (* [register fact] registers the fact [fact]. It returns [true] if this fact is new, i.e., no fact concerning the same triple of [position], [a], and [z] was previously known. *) val register: fact -> bool (* [query current z f] enumerates all known facts whose current state is [current] and whose lookahead assumption is compatible with [z]. The symbol [z] must a real terminal symbol, i.e., cannot be [any]. *) val query: Lr1.node -> Terminal.t -> (fact -> unit) -> unit (* [size()] returns the number of facts currently stored in the set. *) val size: unit -> int (* [verbose()] outputs debugging & performance information. *) val verbose: unit -> unit end = struct (* We need to query the set of facts in two ways. In [register], we must test whether a proposed triple of [position], [a], [z] already appears in the set. In [query], we must find all facts that match a pair [current, z], where [current] is a state. (Note that [position] determines [current], but the converse is not true: a position contains more information besides the current state.) To address these needs, we use a two-level table. The first level is a matrix indexed by [current] and [z]. At the second level, we find sets of facts, where two facts are considered equal if they have the same triple of [position], [a], and [z]. In fact, we know at this level that all facts have the same [z] component, so only [position] and [a] are compared. Because our facts satisfy invariant 2, [z] is [any] if and only if the state [current] is solid. This means that we are wasting quite a lot of space in the matrix (for a solid state, the whole line is empty, except for the [any] column). *) (* The level-2 sets. *) module M = MySet.Make(struct type t = fact let compare fact1 fact2 = assert (lookahead fact1 = lookahead fact2); (* Compare the two positions first. This can be done without going through [Trie.decode], by directly comparing the two integer identities. *) let c = Pervasives.compare (identity fact1) (identity fact2) in assert (c = Trie.compare (position fact1) (position fact2)); if c <> 0 then c else let z = lookahead fact1 in let a1 = W.first (word fact1) z and a2 = W.first (word fact2) z in (* note: [a1] and [a2] can be [any] here *) Terminal.compare a1 a2 end) (* The level-1 matrix. *) let table = Array.make (Lr1.n * Terminal.n) M.empty let index current z = Terminal.n * (Lr1.number current) + Terminal.t2i z let count = ref 0 let register fact = let current = Trie.current (position fact) in let z = lookahead fact in let i = index current z in let m = table.(i) in (* We crucially rely on the fact that [M.add] guarantees not to change the set if an ``equal'' fact already exists. Thus, a later, longer path is ignored in favor of an earlier, shorter path. *) let m' = M.add fact m in m != m' && begin incr count; table.(i) <- m'; true end let query current z f = assert (not (Terminal.equal z any)); (* If the state [current] is solid then the facts that concern it are stored in the column [any], and all of them are compatible with [z]. Otherwise, they are stored in all columns except [any], and only those stored in the column [z] are compatible with [z]. *) let i = index current (if is_solid current then any else z) in let m = table.(i) in M.iter f m let size () = !count let verbose () = Printf.eprintf "F stores %d facts.\n%!" (size()) end (* ------------------------------------------------------------------------ *) (* The module [E] is in charge of recording the non-terminal edges that we have discovered, or more precisely, the conditions under which these edges can be taken. It maintains a set of quadruples [s, nt, w, z], where such a quadruple means that in the state [s], the outgoing edge labeled [nt] can be taken by consuming the word [w], under the assumption that the next symbol is [z]. Again, the terminal symbol [a], given by [W.first w z], plays a role. For each quadruple [s, nt, a, z], we store at most one quadruple [s, nt, w, z]. Thus, internally, we maintain a mapping of [s, nt, a, z] to [w]. For greater simplicity, we do not allow [z] to be [any] in [register] or [query]. Allowing it would complicate things significantly, it seems. *) module E : sig (* [register s nt w z] records that, in state [s], the outgoing edge labeled [nt] can be taken by consuming the word [w], if the next symbol is [z]. It returns [true] if this information is new, i.e., if the underlying quadruple [s, nt, a, z] is new. The symbol [z] cannot be [any]. *) val register: Lr1.node -> Nonterminal.t -> W.word -> Terminal.t -> bool (* [query s nt a z] enumerates all words [w] such that, in state [s], the outgoing edge labeled [nt] can be taken by consuming the word [w], under the assumption that the next symbol is [z], and the first symbol of the word [w.z] is [a]. The symbol [a] can be [any]. The symbol [z] cannot be [any]. *) val query: Lr1.node -> Nonterminal.t -> Terminal.t -> Terminal.t -> (W.word -> unit) -> unit (* [size()] returns the number of edges currently stored in the set. *) val size: unit -> int (* [verbose()] outputs debugging & performance information. *) val verbose: unit -> unit end = struct (* At a high level, we must implement a mapping of [s, nt, a, z] to [w]. In practice, we can implement this specification using any combination of arrays, hash tables, balanced binary trees, and perfect hashing (i.e., packing several of [s], [nt], [a], [z] in one word.) Here, we choose to use an array, indexed by [s], of hash tables, indexed by a key that packs [nt], [a], and [z] in one word. According to a quick experiment, the final population of the hash table [table.(index s)] seems to be roughly [Terminal.n * Trie.size s]. We note that using an initial capacity of 0 and relying on the hash table's resizing mechanism has a significant cost, which is why we try to guess a good initial capacity. *) module H = Hashtbl let table = Array.init Lr1.n (fun i -> let size = Trie.size i in H.create (if size = 1 then 0 else Terminal.n * size) ) let index s = Lr1.number s let pack nt a z : int = (* We rely on the fact that we have at most 256 terminal symbols. *) (Nonterminal.n2i nt lsl 16) lor (Terminal.t2i a lsl 8) lor (Terminal.t2i z) let count = ref 0 let register s nt w z = assert (Terminal.real z); let i = index s in let m = table.(i) in let a = W.first w z in (* Note that looking at [a] in state [s] cannot cause an error. *) assert (not (causes_an_error s a)); let key = pack nt a z in if H.mem m key then false else begin incr count; H.add m key w; true end let rec query s nt a z f = assert (Terminal.real z); if Terminal.equal a any then begin (* If [a] is [any], we query the table for every real symbol [a]. We can limit ourselves to symbols that do not cause an error in state [s]. Those that do certainly do not have an entry; see the assertion in [register] above. *) foreach_terminal_not_causing_an_error s (fun a -> query s nt a z f ) end else begin let i = index s in let m = table.(i) in let key = pack nt a z in match H.find m key with | w -> f w | exception Not_found -> () end let size () = !count let verbose () = Printf.eprintf "E stores %d edges.\n%!" (size()) end (* ------------------------------------------------------------------------ *) (* [new_edge s nt w z] is invoked when we discover that in the state [s], the outgoing edge labeled [nt] can be taken by consuming the word [w], under the assumption that the next symbol is [z]. We check whether this quadruple already exists in the set [E]. If not, then we add it, and we compute its consequences, in the form of new facts, which we insert into the priority queue for later examination. *) let new_edge s nt w z = assert (Terminal.real z); if E.register s nt w z then let sym = Symbol.N nt in (* Query [F] for existing facts which could be extended by following this newly discovered edge. They must be facts whose current state is [s] and whose lookahead assumption is compatible with [a]. For each such fact, ... *) F.query s (W.first w z) (fun fact -> assert (compatible (lookahead fact) (W.first w z)); (* ... try to take one step in the trie along an edge labeled [nt]. *) match Trie.step sym (position fact) with | position -> (* This takes us to a new state whose incoming symbol is [nt]. Hence, this state is not solid. In order to satisfy invariant 2, we must create fact whose lookahead assumption is not [any]. That's fine, since our lookahead assumption is [z]. In order to satisfy invariant 1, we must check that [z] does not cause an error in this state. *) assert (not (is_solid (Trie.current position))); if not (causes_an_error (Trie.current position) z) then let word = W.append (word fact) w in enqueue position word z | exception Not_found -> (* Could not take a step in the trie. This means this branch leads nowhere of interest, and was pruned when the trie was constructed. *) () ) (* ------------------------------------------------------------------------ *) (* [new_fact fact] is invoked when we discover a new fact (i.e., one that was not previously known). It studies the consequences of this fact. These consequences are of two kinds: - As in Dijkstra's algorithm, the new fact can be viewed as a newly discovered vertex. We study its (currently known) outgoing edges, and enqueue new facts in the priority queue. - Sometimes, a fact can also be viewed as a newly discovered edge. This is the case when the word that took us from [source] to [current] represents a production of the grammar and [current] is willing to reduce this production. We record the existence of this edge, and re-inspect any previously discovered vertices which are interested in this outgoing edge. *) let new_fact fact = (* Throughout this rather long function, there is just one [fact]. Let's name its components right now, so as to avoid accessing them several times. (That could be costly, as it requires decoding the fact.) *) let position = position fact and lookahead = lookahead fact and word = word fact in let source = Trie.source position and current = Trie.current position in (* 1. View [fact] as a vertex. Examine the transitions out of [current]. For every transition labeled by a symbol [sym] and into a state [target], ... *) Lr1.transitions current |> SymbolMap.iter (fun sym target -> (* ... try to follow this transition in the trie [position], down to a child which we call [child]. *) match Trie.step sym position, sym with | exception Not_found -> (* Could not take a step in the trie. This means this transition leads nowhere of interest. *) () | child, Symbol.T t -> (* 1a. The transition exists in the trie, and [sym] is in fact a terminal symbol [t]. We note that [t] cannot be the [error] token, because the trie does not have any edges labeled [error]. *) assert (Lr1.Node.compare (Trie.current child) target = 0); assert (is_solid target); assert (non_error t); (* If the lookahead assumption [lookahead] is compatible with [t], then we derive a new fact, where one more edge has been taken, and enqueue this new fact for later examination. *) (* The state [target] is solid, i.e., its incoming symbol is terminal. This state is always entered without consideration for the next lookahead symbol. Thus, we can use [any] as the lookahead assumption in the new fact that we produce. If we did not have [any], we would have to produce one fact for every possible lookahead symbol. *) if compatible lookahead t then let word = W.append word (W.singleton t) in enqueue child word any | child, Symbol.N nt -> (* 1b. The transition exists in the trie, and [sym] is in fact a nonterminal symbol [nt]. *) assert (Lr1.Node.compare (Trie.current child) target = 0); assert (not (is_solid target)); (* We need to know how this nonterminal edge can be taken. We query [E] for a word [w] that allows us to take this edge. In general, the answer depends on the terminal symbol [z] that comes *after* this word: we try all such symbols. We must make sure that the first symbol of the word [w.z] satisfies the lookahead assumption [lookahead]; this is ensured by passing this information to [E.query]. *) (* It could be the case that, due to a default reduction, the answer to our query does not depend on [z], and we are wasting work. However, allowing [z] to be [any] in [E.query], and taking advantage of this to increase performance, seems difficult. *) (* Remark by Jacques-Henri Jourdan: we could remove the outer loop on [z], remove the parameter [z] to [E.query], and let [E.query] itself enumerate all values of [z]. Potentially this could allow a more efficient implementation of the data structure [E]. *) foreach_terminal_not_causing_an_error target (fun z -> E.query current nt lookahead z (fun w -> assert (compatible lookahead (W.first w z)); let word = W.append word w in enqueue child word z ) ) ); (* 2. View [fact] as a possible edge. This is possible if the path from [source] to the [current] state represents a production [prod] and [current] is willing to reduce this production. Then, reducing [prod] takes us all the way back to [source]. Thus, this production gives rise to an edge labeled [nt] -- the left-hand side of [prod] -- out of [source]. *) let z = lookahead in if not (Terminal.equal z any) then begin (* 2a. The lookahead assumption [z] is a real terminal symbol. We check whether [current] is willing to reduce some production [prod] on [z], and whether the sub-trie [position] accepts [prod], which means that this reduction takes us back to the root of the trie. If so, we have discovered a new edge. *) match has_reduction current z with | Some prod when Trie.accepts prod position -> new_edge source (Production.nt prod) word z | _ -> () end else begin (* 2b. The lookahead assumption is [any]. We must consider every pair [prod, z] such that the [current] state can reduce [prod] on [z] and [position] accepts [prod]. *) match Invariant.has_default_reduction current with | Some (prod, _) -> if Trie.accepts prod position then (* [new_edge] does not accept [any] as its 4th parameter, so we must iterate over all terminal symbols. *) foreach_terminal (fun z -> new_edge source (Production.nt prod) word z ) | None -> TerminalMap.iter (fun z prods -> if non_error z then let prod = Misc.single prods in if Trie.accepts prod position then new_edge source (Production.nt prod) word z ) (Lr1.reductions current) end (* ------------------------------------------------------------------------ *) (* The main loop of the algorithm. *) (* [level] is the length of [word fact] for the facts that we are examining at the moment. [extracted] counts how many facts we have extracted out of the priority queue. [considered] counts how many of these were found to be new, and subsequently passed to [new_fact]. *) let level, extracted, considered = ref 0, ref 0, ref 0 let done_with_level () = Printf.eprintf "Done with level %d.\n" !level; W.verbose(); F.verbose(); E.verbose(); Printf.eprintf "Q stores %d facts.\n" (Q.cardinal q); Printf.eprintf "%d facts extracted out of Q, of which %d considered.\n%!" !extracted !considered let () = Q.repeat q (fun fact -> incr extracted; if F.register fact then begin if X.verbose && W.length (word fact) > !level then begin done_with_level(); level := W.length (word fact); end; incr considered; new_fact fact end ); if X.verbose then done_with_level(); Time.tick "Running LRijkstra" (* ------------------------------------------------------------------------ *) (* The following code validates the fact that an error can be triggered in state [s'] by beginning at the start symbol [nt] and reading the sequence of terminal symbols [w]. We use this for debugging purposes. Furthermore, this gives us a list of spurious reductions, which we use to produce a comment. *) let fail msg = Printf.eprintf "LRijkstra: internal error: %s.\n%!" msg; exit 1 let validate nt s' w : ReferenceInterpreter.target = let open ReferenceInterpreter in match check_error_path false nt (W.elements w) with | OInputReadPastEnd -> fail "input was read past its end" | OInputNotFullyConsumed -> fail "input was not fully consumed" | OUnexpectedAccept -> fail "input was unexpectedly accepted" | OK ((state, _) as target) -> if Lr1.Node.compare state s' <> 0 then fail ( Printf.sprintf "error occurred in state %d instead of %d" (Lr1.number state) (Lr1.number s') ) else target (* ------------------------------------------------------------------------ *) (* We now wish to determine, given a state [s'] and a terminal symbol [z], a minimal path that takes us from some entry state to state [s'] with [z] as the next (unconsumed) symbol. *) (* This can be formulated as a search for a shortest path in a graph. The graph is not just the automaton, though. It is a (much) larger graph whose vertices are pairs [s, z] and whose edges are obtained by querying the module [E] above. For this purpose, we use Dijkstra's algorithm, unmodified. Experiments show that the running time of this phase is typically 10x shorter than the running time of the main loop above. *) module A = Astar.Make(struct (* A vertex is a pair [s, z], where [z] is a real terminal symbol. *) type node = Lr1.node * Terminal.t let equal (s'1, z1) (s'2, z2) = Lr1.Node.compare s'1 s'2 = 0 && Terminal.compare z1 z2 = 0 let hash (s, z) = Hashtbl.hash (Lr1.number s, z) (* An edge is labeled with a word. *) type label = W.word (* We search forward from every [s, z], where [s] is an initial state. *) let sources f = foreach_terminal (fun z -> ProductionMap.iter (fun _ s -> f (s, z) ) Lr1.entry ) (* The successors of [s, z] are defined as follows. *) let successors (s, z) edge = assert (Terminal.real z); (* For every transition out of [s], labeled [sym], leading to [s']... *) Lr1.transitions s |> SymbolMap.iter (fun sym s' -> match sym with | Symbol.T t -> if Terminal.equal z t then (* If [sym] is the terminal symbol [z], then this transition matches our lookahead assumption, so we can take it. For every [z'], we have an edge to [s', z'], labeled with the singleton word [z]. *) let w = W.singleton z in foreach_terminal (fun z' -> edge w 1 (s', z') ) | Symbol.N nt -> (* If [sym] is a nonterminal symbol [nt], then we query [E] in order to find out which (minimal) words [w] allow us to take this transition. We must again try every [z'], and must respect the constraint that the first symbol of the word [w.z'] is [z]. For every [z'] and [w] that fulfill these requirements, we have an edge to [s', z'], labeled with the word [w]. *) foreach_terminal (fun z' -> E.query s nt z z' (fun w -> edge w (W.length w) (s', z') ) ) ) (* Algorithm A*, used with a zero estimate, is Dijkstra's algorithm. We have experimented with a non-zero estimate, but the performance increase was minimal. *) let estimate _ = 0 end) (* ------------------------------------------------------------------------ *) (* [explored] counts how many graph nodes we have discovered during the search. *) let explored = ref 0 (* We wish to store a set of triples [nt, w, (s', spurious)], meaning that an error can be triggered in state [s'] by beginning in the initial state that corresponds to [nt] and by reading the sequence of terminal symbols [w]. We wish to store at most one such triple for every state [s'], so we organize the data as a set [domain] of states [s'] and a list [data] of triples [nt, w, (s', spurious)]. The list [spurious] documents the spurious reductions that are performed by the parser at the end. *) (* We could print this data as we go, which would naturally result in sorting the output by increasing word sizes. However, it seems preferable to sort the sentences lexicographically, so that similar sentences end up close to one another. (We could also sort them by state number. The result would be roughly similar.) This is why we store a list of triples and sort it before printing it out. *) let domain = ref Lr1.NodeSet.empty let data : (Nonterminal.t * W.word * ReferenceInterpreter.target) list ref = ref [] (* The set [reachable] stores every reachable state (regardless of whether an error can be triggered in that state). *) let reachable = ref Lr1.NodeSet.empty (* Perform the forward search. *) let _, _ = A.search (fun ((s', z), path) -> incr explored; reachable := Lr1.NodeSet.add s' !reachable; (* If [z] causes an error in state [s'] and this is the first time we are able to trigger an error in this state, ... *) if causes_an_error s' z && not (Lr1.NodeSet.mem s' !domain) then begin (* Reconstruct the initial state [s] and the word [w] that lead to this error. *) let (s, _), ws = A.reverse path in let w = List.fold_right W.append ws (W.singleton z) in (* Check that the reference interpreter confirms our finding. At the same time, compute a list of spurious reductions. *) let nt = Lr1.nt_of_entry s in let target = validate nt s' w in (* Store this new data. *) domain := Lr1.NodeSet.add s' !domain; data := (nt, w, target) :: !data end ) (* Sort and output the data. *) let () = !data |> List.fast_sort (fun (nt1, w1, _) (nt2, w2, _) -> let c = Nonterminal.compare nt1 nt2 in if c <> 0 then c else W.compare w2 w1 ) |> List.map (fun (nt, w, target) -> (nt, W.elements w, target)) |> List.iter Interpret.print_messages_item (* ------------------------------------------------------------------------ *) (* Verbosity. *) let max_heap_size = if X.verbose || X.statistics <> None then let stat = Gc.quick_stat() in (stat.Gc.top_heap_words * (Sys.word_size / 8) / 1024 / 1024) else 0 (* dummy *) let () = Time.tick "Forward search"; if X.verbose then begin Printf.eprintf "%d graph nodes explored by forward search.\n\ %d out of %d states are reachable.\n\ Found %d states where an error can occur.\n\ Maximum size reached by the major heap: %dM\n%!" !explored (Lr1.NodeSet.cardinal !reachable) Lr1.n (Lr1.NodeSet.cardinal !domain) max_heap_size end (* ------------------------------------------------------------------------ *) (* If requested by the client, write one line of statistics to a .csv file. *) let stop = now() let () = X.statistics |> Option.iter (fun filename -> let c = open_out_gen [ Open_creat; Open_append; Open_text ] 0o644 filename in Printf.fprintf c "%s,%d,%d,%d,%d,%d,%d,%d,%.2f,%d\n%!" (* Grammar name. *) Settings.base (* Number of terminal symbols. *) Terminal.n (* Number of nonterminal symbols. *) Nonterminal.n (* Grammar size (not counting the error productions). *) begin Production.foldx (fun prod accu -> let rhs = Production.rhs prod in if List.mem (Symbol.T Terminal.error) (Array.to_list rhs) then accu else accu + Array.length rhs ) 0 end (* Automaton size (i.e., number of states). *) Lr1.n (* Total trie size. *) (Trie.total_size()) (* Size of [F]. *) (F.size()) (* Size of [E]. *) (E.size()) (* Elapsed user time, in seconds. *) (stop -. start) (* Max heap size, in megabytes. *) max_heap_size ; close_out c ) (* ------------------------------------------------------------------------ *) end menhir-20151112/src/InspectionTableInterpreter.ml0000644000175000017500000001630212621170074020622 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* -------------------------------------------------------------------------- *) (* The type functor. *) module Symbols (T : sig type 'a terminal type 'a nonterminal end) = struct open T (* This should be the only place in the whole library (and generator!) where these types are defined. *) type 'a symbol = | T : 'a terminal -> 'a symbol | N : 'a nonterminal -> 'a symbol type xsymbol = | X : 'a symbol -> xsymbol end (* -------------------------------------------------------------------------- *) (* The code functor. *) module Make (B : TableFormat.TABLES) (T : InspectionTableFormat.TABLES with type 'a lr1state = int) = struct (* Including [T] is an easy way of inheriting the definitions of the types [symbol] and [xsymbol]. *) include T (* This auxiliary function decodes a packed linearized array, as created by [TableBackend.linearize_and_marshal1]. Here, we read a row all at once. *) let read_packed_linearized ((data, entry) : PackedIntArray.t * PackedIntArray.t) (i : int) : int list = LinearizedArray.read_row_via (PackedIntArray.get data) (PackedIntArray.get entry) i (* This auxiliary function decodes a symbol. The encoding was done by [encode_symbol] or [encode_symbol_option] in the table back-end. *) let decode_symbol (symbol : int) : T.xsymbol = (* If [symbol] is 0, then we have no symbol. This could mean e.g. that the function [incoming_symbol] has been applied to an initial state. In principle, this cannot happen. *) assert (symbol > 0); (* The low-order bit distinguishes terminal and nonterminal symbols. *) let kind = symbol land 1 in let symbol = symbol lsr 1 in if kind = 0 then T.terminal (symbol - 1) else T.nonterminal symbol (* These auxiliary functions convert a symbol to its integer code. For speed and for convenience, we use an unsafe type cast. This relies on the fact that the data constructors of the [terminal] and [nonterminal] GADTs are declared in an order that reflects their internal code. In the case of nonterminal symbols, we add [start] to account for the presence of the start symbols. *) let n2i (nt : 'a T.nonterminal) : int = let answer = B.start + Obj.magic nt in assert (T.nonterminal answer = X (N nt)); (* TEMPORARY roundtrip *) answer let t2i (t : 'a T.terminal) : int = let answer = Obj.magic t in assert (T.terminal answer = X (T t)); (* TEMPORARY roundtrip *) answer (* Ordering functions. *) let compare_terminals t1 t2 = (* Subtraction is safe because overflow is impossible. *) t2i t1 - t2i t2 let compare_nonterminals nt1 nt2 = (* Subtraction is safe because overflow is impossible. *) n2i nt1 - n2i nt2 let compare_symbols symbol1 symbol2 = match symbol1, symbol2 with | X (T _), X (N _) -> -1 | X (N _), X (T _) -> 1 | X (T t1), X (T t2) -> compare_terminals t1 t2 | X (N nt1), X (N nt2) -> compare_nonterminals nt1 nt2 let compare_productions prod1 prod2 = (* Subtraction is safe because overflow is impossible. *) prod1 - prod2 let compare_items (prod1, index1) (prod2, index2) = let c = compare_productions prod1 prod2 in (* Subtraction is safe because overflow is impossible. *) if c <> 0 then c else index1 - index2 (* The function [incoming_symbol] goes through the tables [T.lr0_core] and [T.lr0_incoming]. This yields a representation of type [xsymbol], out of which we strip the [X] quantifier, so as to get a naked symbol. This last step is ill-typed and potentially dangerous. It is safe only because this function is used at type ['a lr1state -> 'a symbol], which forces an appropriate choice of ['a]. *) let incoming_symbol (s : 'a T.lr1state) : 'a T.symbol = let core = PackedIntArray.get T.lr0_core s in let symbol = decode_symbol (PackedIntArray.get T.lr0_incoming core) in match symbol with | T.X symbol -> Obj.magic symbol (* The function [lhs] reads the table [B.lhs] and uses [T.nonterminal] to decode the symbol. *) let lhs prod = T.nonterminal (PackedIntArray.get B.lhs prod) (* The function [rhs] reads the table [T.rhs] and uses [decode_symbol] to decode the symbol. *) let rhs prod = List.map decode_symbol (read_packed_linearized T.rhs prod) (* The function [items] maps the LR(1) state [s] to its LR(0) core, then uses [core] as an index into the table [T.lr0_items]. The items are then decoded by the function [export] below, which is essentially a copy of [Item.export]. *) type item = int * int let export t : item = (t lsr 7, t mod 128) let items s = (* Map [s] to its LR(0) core. *) let core = PackedIntArray.get T.lr0_core s in (* Now use [core] to look up the table [T.lr0_items]. *) List.map export (read_packed_linearized T.lr0_items core) (* The function [nullable] maps the nonterminal symbol [nt] to its integer code, which it uses to look up the array [T.nullable]. This yields 0 or 1, which we map back to a Boolean result. *) let decode_bool i = assert (i = 0 || i = 1); i = 1 let nullable nt = decode_bool (PackedIntArray.get1 T.nullable (n2i nt)) (* The function [first] maps the symbols [nt] and [t] to their integer codes, which it uses to look up the matrix [T.first]. *) let first nt t = decode_bool (PackedIntArray.unflatten1 T.first (n2i nt) (t2i t)) let xfirst symbol t = match symbol with | X (T t') -> compare_terminals t t' = 0 | X (N nt) -> first nt t (* The function [foreach_terminal] exploits the fact that the first component of [B.error] is [Terminal.n - 1], i.e., the number of terminal symbols, including [error] but not [#]. *) let rec foldij i j f accu = if i = j then accu else foldij (i + 1) j f (f i accu) let foreach_terminal f accu = let n, _ = B.error in foldij 0 n (fun i accu -> f (T.terminal i) accu ) accu let foreach_terminal_but_error f accu = let n, _ = B.error in foldij 0 n (fun i accu -> if i = B.error_terminal then accu else f (T.terminal i) accu ) accu end menhir-20151112/src/patricia.mli0000644000175000017500000000051012621170073015251 0ustar mehdimehdi(* This is an implementation of Patricia trees, following Chris Okasaki's paper at the 1998 ML Workshop in Baltimore. Both big-endian and little-endian trees are provided. Both sets and maps are implemented on top of Patricia trees. *) module Little : GMap.S with type key = int module Big : GMap.S with type key = int menhir-20151112/src/myocamlbuild.ml0000644000175000017500000002005012621170073015766 0ustar mehdimehdiopen Ocamlbuild_plugin open Command (* ---------------------------------------------------------------------------- *) (* The following rules can be copied into other projects. *) (* ---------------------------------------------------------------------------- *) (* The auxiliary function [lines] reads a file, line by line. *) let lines filename : string list = let c = open_in filename in let lines = ref [] in try while true do lines := input_line c :: !lines done; assert false with End_of_file -> close_in c; List.rev !lines (* The auxiliary function [noncomment] recognizes a non-blank non-comment line. *) let rec noncomment s i n = i < n && match s.[i] with | ' ' | '\t' | '\r' | '\n' -> noncomment s (i + 1) n | '#' -> false | _ -> true let noncomment s = noncomment s 0 (String.length s) (* ---------------------------------------------------------------------------- *) (* If [m] is the name of a module, [cmx m] is the name of its [.cmx] file. There are two candidate names, because of OCaml's convention where the first letter of the file name is capitalized to obtain the module name. We decide between the two by testing whether an [.ml] file exists. *) let cmx (m : string) : string = let candidate = m ^ ".cmx" in if Sys.file_exists (m ^ ".ml") then candidate else String.uncapitalize candidate (* ---------------------------------------------------------------------------- *) (* If there is a file [foo.mlpack], then the modules that are listed in this file are meant to be part of the library [Foo], and should receive the tag [for-pack(Foo)]. ocamlbuild doesn't do this automatically, so we program it. *) (* The argument [basename] should be the basename of the [.mlpack] file. *) let for_pack (basename : string) = let filename = basename ^ ".mlpack" in let modules = List.filter noncomment (lines filename) in let library = String.capitalize basename in let tags = [ Printf.sprintf "for-pack(%s)" library ] in List.iter (fun m -> tag_file (cmx m) tags ) modules (* ---------------------------------------------------------------------------- *) (* The following rules can be copied into other projects. *) (* ---------------------------------------------------------------------------- *) (* This rule generates an .ml file [target] from an .mly file [grammar] and a .messages file [messages]. *) (* If the name of a witness file is passed, it is made an additional dependency. This triggers a separate rule (see below) which performs a completeness check, that is, which checks that the .messages file lists every possible syntax error. *) let compile_errors grammar messages (witness : string list) target = rule "menhir/compile_errors" ~prod:target ~deps:([ grammar; messages ] @ witness) (fun env _ -> let grammar = env grammar in let tags = tags_of_pathname grammar ++ "ocaml" ++ "menhir" in Cmd(S[ !Options.ocamlyacc; (* menhir *) T tags; P grammar; A "--compile-errors"; P (env messages); Sh ">"; Px (env target); ])) (* A generic version of the above rule, with uniform naming. *) let generic_compile_errors (check_completeness : bool) = compile_errors (* sources: *) "%.mly" "%Messages.messages" (* if present, this dependency forces a completeness check: *) (if check_completeness then [ "%Messages.witness" ] else []) (* target: *) "%Messages.ml" (* ---------------------------------------------------------------------------- *) (* This rule generates a .messages file [messages] from an .mly file [grammar]. *) let list_errors grammar messages = rule "produce a list of messages" ~prod:messages ~dep:grammar (fun env _ -> let grammar = env grammar in let tags = tags_of_pathname grammar ++ "ocaml" ++ "menhir" in Cmd(S[ !Options.ocamlyacc; (* menhir *) T tags; P grammar; A "--list-errors"; Sh ">"; Px (env messages); ])) (* ---------------------------------------------------------------------------- *) (* This rule compares the .messages files [messages1] and [messages2]. This is used to ensure complete coverage, i.e., check that every possible error is covered. The file [witness] is used as a witness that the comparison has been carried out. *) let compare_errors grammar messages1 messages2 witness = rule "compare two lists of messages" ~stamp:witness ~deps:[ grammar; messages1; messages2 ] (fun env _ -> let grammar = env grammar in let tags = tags_of_pathname grammar ++ "ocaml" ++ "menhir" in Cmd(S[ !Options.ocamlyacc; (* menhir *) T tags; P grammar; A "--compare-errors"; P (env messages1); A "--compare-errors"; P (env messages2); ])) (* ---------------------------------------------------------------------------- *) (* This rule combines the above two rules and makes sure that the [messages] file is complete, i.e., covers all possible errors. This rule creates a witness file. *) let completeness_check grammar messages witness = (* We need a name for a temporary [.messages] file, which we produce, and which lists all possible errors. *) let complete_messages = grammar ^ ".auto.messages" in (* Use the above two rules. *) list_errors grammar complete_messages; compare_errors grammar complete_messages messages witness (* A generic version of the above rule, with uniform naming. *) let generic_completeness_check () = completeness_check (* sources: *) "%.mly" "%Messages.messages" (* target: *) "%Messages.witness" (* ---------------------------------------------------------------------------- *) (* The following rules and settings are specific to the compilation of Menhir. *) (* ---------------------------------------------------------------------------- *) (* Dealing with the two parsers. *) (* Just for fun, Menhir comes with two parsers for its own input files. One is called [yacc-parser.mly] and is built using [ocamlyacc]. The other is called [fancy-parser.mly] and is built using Menhir. It depends on [standard.mly]. The choice between the two parsers is determined by the presence of the tag [fancy_parser]. *) let fancy () : bool = mark_tag_used "fancy_parser"; Tags.mem "fancy_parser" (tags_of_pathname "") let parser_configuration () = (* Create [parser.mly] by copying the appropriate source file. *) copy_rule "create parser.mly" (* source: *) (if fancy() then "fancy-parser.mly" else "yacc-parser.mly") (* target: *) "parser.mly" ; (* Create [Driver.ml] by copying the appropriate source file. *) copy_rule "create Driver.ml" (* source: *) (if fancy() then "fancyDriver.ml" else "yaccDriver.ml") (* target: *) "Driver.ml" ; (* In the fancy case, use Menhir to generate [parserMessages.ml] based on [parserMessages.messages], which is maintained by hand. Also, check that [parserMessages.messages] covers all possible syntax errors. *) if fancy() then begin generic_compile_errors true; (* We might wish to perform the completeness check only if [Sys.word_size] is at least 64. Indeed, on a 32-bit machine, [menhir --list-errors] is restricted to small grammars. For the moment, this works, because our grammar is small enough. *) generic_completeness_check() end (* ---------------------------------------------------------------------------- *) (* Compilation flags for Menhir. *) let flags () = (* -inline 1000 *) flag ["ocaml"; "compile"; "native"] (S [A "-inline"; A "1000"]); (* -noassert *) flag ["ocaml"; "compile"; "noassert"] (S [A "-noassert"]); (* nazi warnings *) flag ["ocaml"; "compile"; "my_warnings"] (S[A "-w"; A "@1..49-4-9-41-44"]) (* ---------------------------------------------------------------------------- *) (* Define custom compilation rules. *) let () = dispatch (function After_rules -> (* Add our rules after the standard ones. *) parser_configuration(); flags(); for_pack "menhirLib"; | _ -> () ) menhir-20151112/src/rawPrinter.mli0000644000175000017500000000037012621170073015616 0ustar mehdimehdi(* A debugging pretty-printer for [IL]. Newlines are used liberally, so as to facilitate diffs. *) module Make (X : sig (* This is the channel that is being written to. *) val f: out_channel end) : sig val expr: IL.expr -> unit end menhir-20151112/src/traverse.ml0000644000175000017500000003111312621170073015142 0ustar mehdimehdi(* Code for traversing or transforming [IL] terms. *) open IL open CodeBits (* This turns a list of value definitions into a hash table. It also counts and numbers the definitions. We assume that the left-hand side of every definition is a variable. *) let tabulate_defs (defs : valdef list) : int * (string, int * valdef) Hashtbl.t = let count = ref 0 in let table = Hashtbl.create 1023 in List.iter (fun def -> let k = !count in count := k + 1; Hashtbl.add table (pat2var def.valpat) (k, def) ) defs; !count, table (* This mixin class, used by [map] and [fold] below, helps maintain environments, which can be used to keep track of local variable bindings. *) class virtual ['env] env = object(self) (* The virtual method [pvar] records a local variable binding in the environment. *) method virtual pvar: 'env -> string -> 'env method pat env = function | PWildcard | PUnit -> env | PVar id -> self#pvar env id | PTuple ps | POr ps | PData (_, ps) -> self#pats env ps | PAnnot (p, _) -> self#pat env p | PRecord fps -> self#fpats env fps method pats env ps = List.fold_left self#pat env ps method fpats env fps = List.fold_left self#fpat env fps method fpat env (_, p) = self#pat env p end (* A class that helps transform expressions. The environment [env] can be used to keep track of local variable bindings. *) exception NoChange class virtual ['env] map = object (self) inherit ['env] env method expr (env : 'env) e = try match e with | EVar x -> self#evar env x | EFun (ps, e) -> self#efun env ps e | EApp (e, es) -> self#eapp env e es | ELet (bs, e) -> self#elet env bs e | EMatch (e, bs) -> self#ematch env e bs | EIfThen (e, e1) -> self#eifthen env e e1 | EIfThenElse (e, e1, e2) -> self#eifthenelse env e e1 e2 | ERaise e -> self#eraise env e | ETry (e, bs) -> self#etry env e bs | EUnit -> self#eunit env | EIntConst k -> self#eintconst env k | EStringConst s -> self#estringconst env s | EData (d, es) -> self#edata env d es | ETuple es -> self#etuple env es | EAnnot (e, t) -> self#eannot env e t | EMagic e -> self#emagic env e | ERepr _ -> self#erepr env e | ERecord fs -> self#erecord env fs | ERecordAccess (e, f) -> self#erecordaccess env e f | ERecordWrite (e, f, e1) -> self#erecordwrite env e f e1 | ETextual action -> self#etextual env action | EComment (s, e) -> self#ecomment env s e | EPatComment (s, p, e) -> self#epatcomment env s p e | EArray es -> self#earray env es | EArrayAccess (e, i) -> self#earrayaccess env e i with NoChange -> e method evar _env _x = raise NoChange method efun env ps e = let e' = self#expr (self#pats env ps) e in if e == e' then raise NoChange else EFun (ps, e') method eapp env e es = let e' = self#expr env e and es' = self#exprs env es in if e == e' && es == es' then raise NoChange else EApp (e', es') method elet env bs e = let env, bs' = self#bindings env bs in let e' = self#expr env e in if bs == bs' && e == e' then raise NoChange else ELet (bs', e') method ematch env e bs = let e' = self#expr env e and bs' = self#branches env bs in if e == e' && bs == bs' then raise NoChange else EMatch (e', bs') method eifthen env e e1 = let e' = self#expr env e and e1' = self#expr env e1 in if e == e' && e1 == e1' then raise NoChange else EIfThen (e', e1') method eifthenelse env e e1 e2 = let e' = self#expr env e and e1' = self#expr env e1 and e2' = self#expr env e2 in if e == e' && e1 == e1' && e2 == e2' then raise NoChange else EIfThenElse (e', e1', e2') method eraise env e = let e' = self#expr env e in if e == e' then raise NoChange else ERaise e' method etry env e bs = let e' = self#expr env e and bs' = self#branches env bs in if e == e' && bs == bs' then raise NoChange else ETry (e', bs') method eunit _env = raise NoChange method eintconst _env _k = raise NoChange method estringconst _env _s = raise NoChange method edata env d es = let es' = self#exprs env es in if es == es' then raise NoChange else EData (d, es') method etuple env es = let es' = self#exprs env es in if es == es' then raise NoChange else ETuple es' method eannot env e t = let e' = self#expr env e in if e == e' then raise NoChange else EAnnot (e', t) method emagic env e = let e' = self#expr env e in if e == e' then raise NoChange else EMagic e' method erepr env e = let e' = self#expr env e in if e == e' then raise NoChange else ERepr e' method erecord env fs = let fs' = self#fields env fs in if fs == fs' then raise NoChange else ERecord fs' method erecordaccess env e f = let e' = self#expr env e in if e == e' then raise NoChange else ERecordAccess (e', f) method erecordwrite env e f e1 = let e' = self#expr env e and e1' = self#expr env e1 in if e == e' && e1 == e1' then raise NoChange else ERecordWrite (e', f, e1') method earray env es = let es' = self#exprs env es in if es == es' then raise NoChange else EArray es' method earrayaccess env e i = let e' = self#expr env e in if e == e' then raise NoChange else EArrayAccess (e', i) method etextual _env _action = raise NoChange method ecomment env s e = let e' = self#expr env e in if e == e' then raise NoChange else EComment (s, e') method epatcomment env s p e = let e' = self#expr env e in if e == e' then raise NoChange else EPatComment (s, p, e') method exprs env es = Misc.smap (self#expr env) es method fields env fs = Misc.smap (self#field env) fs method field env ((f, e) as field) = let e' = self#expr env e in if e == e' then field else (f, e') method branches env bs = Misc.smap (self#branch env) bs method branch env b = let e = b.branchbody in let e' = self#expr (self#pat env b.branchpat) e in if e == e' then b else { b with branchbody = e' } (* The method [binding] produces a pair of an updated environment and a transformed binding. *) method binding env ((p, e) as b) = let e' = self#expr env e in self#pat env p, if e == e' then b else (p, e') (* For nested non-recursive bindings, the environment produced by each binding is used to traverse the following bindings. The method [binding] produces a pair of an updated environment and a transformed list of bindings. *) method bindings env bs = Misc.smapa self#binding env bs method valdef env def = let e = def.valval in let e' = self#expr env e in if e == e' then def else { def with valval = e' } method valdefs env defs = Misc.smap (self#valdef env) defs end (* A class that helps iterate, or fold, over expressions. *) class virtual ['env, 'a] fold = object (self) inherit ['env] env method expr (env : 'env) (accu : 'a) e = match e with | EVar x -> self#evar env accu x | EFun (ps, e) -> self#efun env accu ps e | EApp (e, es) -> self#eapp env accu e es | ELet (bs, e) -> self#elet env accu bs e | EMatch (e, bs) -> self#ematch env accu e bs | EIfThen (e, e1) -> self#eifthen env accu e e1 | EIfThenElse (e, e1, e2) -> self#eifthenelse env accu e e1 e2 | ERaise e -> self#eraise env accu e | ETry (e, bs) -> self#etry env accu e bs | EUnit -> self#eunit env accu | EIntConst k -> self#eintconst env accu k | EStringConst s -> self#estringconst env accu s | EData (d, es) -> self#edata env accu d es | ETuple es -> self#etuple env accu es | EAnnot (e, t) -> self#eannot env accu e t | EMagic e -> self#emagic env accu e | ERepr _ -> self#erepr env accu e | ERecord fs -> self#erecord env accu fs | ERecordAccess (e, f) -> self#erecordaccess env accu e f | ERecordWrite (e, f, e1) -> self#erecordwrite env accu e f e1 | ETextual action -> self#etextual env accu action | EComment (s, e) -> self#ecomment env accu s e | EPatComment (s, p, e) -> self#epatcomment env accu s p e | EArray es -> self#earray env accu es | EArrayAccess (e, i) -> self#earrayaccess env accu e i method evar (_env : 'env) (accu : 'a) _x = accu method efun (env : 'env) (accu : 'a) ps e = let accu = self#expr (self#pats env ps) accu e in accu method eapp (env : 'env) (accu : 'a) e es = let accu = self#expr env accu e in let accu = self#exprs env accu es in accu method elet (env : 'env) (accu : 'a) bs e = let env, accu = self#bindings env accu bs in let accu = self#expr env accu e in accu method ematch (env : 'env) (accu : 'a) e bs = let accu = self#expr env accu e in let accu = self#branches env accu bs in accu method eifthen (env : 'env) (accu : 'a) e e1 = let accu = self#expr env accu e in let accu = self#expr env accu e1 in accu method eifthenelse (env : 'env) (accu : 'a) e e1 e2 = let accu = self#expr env accu e in let accu = self#expr env accu e1 in let accu = self#expr env accu e2 in accu method eraise (env : 'env) (accu : 'a) e = let accu = self#expr env accu e in accu method etry (env : 'env) (accu : 'a) e bs = let accu = self#expr env accu e in let accu = self#branches env accu bs in accu method eunit (_env : 'env) (accu : 'a) = accu method eintconst (_env : 'env) (accu : 'a) _k = accu method estringconst (_env : 'env) (accu : 'a) _s = accu method edata (env : 'env) (accu : 'a) _d es = let accu = self#exprs env accu es in accu method etuple (env : 'env) (accu : 'a) es = let accu = self#exprs env accu es in accu method eannot (env : 'env) (accu : 'a) e _t = let accu = self#expr env accu e in accu method emagic (env : 'env) (accu : 'a) e = let accu = self#expr env accu e in accu method erepr (env : 'env) (accu : 'a) e = let accu = self#expr env accu e in accu method erecord (env : 'env) (accu : 'a) fs = let accu = self#fields env accu fs in accu method erecordaccess (env : 'env) (accu : 'a) e _f = let accu = self#expr env accu e in accu method erecordwrite (env : 'env) (accu : 'a) e _f e1 = let accu = self#expr env accu e in let accu = self#expr env accu e1 in accu method earray (env : 'env) (accu : 'a) es = let accu = self#exprs env accu es in accu method earrayaccess (env : 'env) (accu : 'a) e _i = let accu = self#expr env accu e in accu method etextual (_env : 'env) (accu : 'a) _action = accu method ecomment (env : 'env) (accu : 'a) _s e = let accu = self#expr env accu e in accu method epatcomment (env : 'env) (accu : 'a) _s _p e = let accu = self#expr env accu e in accu method exprs (env : 'env) (accu : 'a) es = List.fold_left (self#expr env) accu es method fields (env : 'env) (accu : 'a) fs = List.fold_left (self#field env) accu fs method field (env : 'env) (accu : 'a) (_f, e) = let accu = self#expr env accu e in accu method branches (env : 'env) (accu : 'a) bs = List.fold_left (self#branch env) accu bs method branch (env : 'env) (accu : 'a) b = let accu = self#expr (self#pat env b.branchpat) accu b.branchbody in accu method binding ((env, accu) : 'env * 'a) (p, e) = let accu = self#expr env accu e in self#pat env p, accu method bindings (env : 'env) (accu : 'a) bs = List.fold_left self#binding (env, accu) bs method valdef (env : 'env) (accu : 'a) def = let accu = self#expr env accu def.valval in accu method valdefs (env : 'env) (accu : 'a) defs = List.fold_left (self#valdef env) accu defs end menhir-20151112/src/reachability.ml0000644000175000017500000000230112621170073015744 0ustar mehdimehdiopen UnparameterizedSyntax let rec visit grammar visited symbol = try let rule = StringMap.find symbol grammar.rules in if not (StringSet.mem symbol visited) then let visited = StringSet.add symbol visited in List.fold_left (visitb grammar) visited rule.branches else visited with Not_found -> (* This is a terminal symbol. *) assert (symbol = "error" || StringMap.mem symbol grammar.tokens); visited and visitb grammar visited { producers = symbols } = List.fold_left (visits grammar) visited symbols and visits grammar visited (symbol, _) = visit grammar visited symbol let trim grammar = if StringSet.cardinal grammar.start_symbols = 0 then Error.error [] "no start symbol has been declared." else let reachable = StringSet.fold (fun symbol visited -> visit grammar visited symbol ) grammar.start_symbols StringSet.empty in StringMap.iter (fun symbol rule -> if not (StringSet.mem symbol reachable) then Error.grammar_warning rule.positions "symbol %s is unreachable from any of the start symbol(s)." symbol ) grammar.rules; { grammar with rules = StringMap.restrict reachable grammar.rules } menhir-20151112/src/keywordExpansion.ml0000644000175000017500000001763612621170073016676 0ustar mehdimehdiopen UnparameterizedSyntax open Keyword open IL open CodeBits (* [posvar_ keyword] constructs the conventional name of the variable that stands for the position keyword [keyword]. *) let posvar_ = function | Position (subject, where, flavor) -> posvar subject where flavor | _ -> assert false (* [posvar_] should be applied to a position keyword *) (* [symbolstartpos producers i n] constructs an expression which, beginning at index [i], looks for the first non-empty producer and returns its start position. If none is found, this expression returns the end position of the right-hand side. This computation is modeled after the function [Parsing.symbol_start_pos] in OCaml's standard library. *) (* This cascade of [if] constructs could be quite big, and this could be a problem in terms of code size. Fortunately, we can optimize this code by computing, ahead of time, the outcome of certain comparisons. We assume that the lexer never produces a token whose start and end positions are the same. There follows that a non-nullable symbol cannot have the same start and end positions. Conversely, a symbol that generates (a subset of) the language {epsilon} must have the same start and end positions. *) (* Although this code is modeled after [Parsing.symbol_start_pos], we compare positions using physical equality, whereas they use structural equality. If for some reason a symbol has start and end positions that are structurally equal but physically different, then a difference will be observable. However, this is very unlikely. It would mean that a token has the same start and end positions (and furthermore, this position has been re-allocated). *) (* The reason why we expand [$symbolstartpos] away prior to inlining is that we want its meaning to be preserved by inlining. If we tried to preserve this keyword through the inlining phase, then (I suppose) we would have to introduce a family of keywords [$symbolstartpos(i, j)], computing over the interval from [i] to [j], and the preservation would not be exact -- because a nonempty symbol, once inlined, can be seen to be a sequence of empty and nonempty symbols. *) let rec symbolstartpos ((nullable, epsilon) as analysis) producers i n : IL.expr * KeywordSet.t = if i = n then (* Return [$endpos]. *) let keyword = Position (Left, WhereEnd, FlavorPosition) in EVar (posvar_ keyword), KeywordSet.singleton keyword else (* [symbol] is the symbol that appears in the right-hand side at position i. [x] is the identifier that is bound to it. We generate code that compares [$startpos($i)] and [$endpos($i)]. If they differ, we return [$startpos($i)]. Otherwise, we continue. Furthermore, as noted above, if [symbol] is not nullable, then we know that the start and end positions must differ, so we optimize this case. *) let symbol, x = List.nth producers i in let startp = Position (RightNamed x, WhereStart, FlavorPosition) and endp = Position (RightNamed x, WhereEnd, FlavorPosition) in if not (nullable symbol) then (* The start and end positions must differ. *) EVar (posvar_ startp), KeywordSet.singleton startp else let continue, keywords = symbolstartpos analysis producers (i + 1) n in if epsilon symbol then (* The start and end positions must be the same. *) continue, keywords else (* In the general case, a runtime test is required. *) EIfThenElse ( EApp (EVar "Pervasives.(!=)", [ EVar (posvar_ startp); EVar (posvar_ endp) ]), EVar (posvar_ startp), continue ), KeywordSet.add startp (KeywordSet.add endp keywords) (* [define keyword1 f keyword2] macro-expands [keyword1] as [f(keyword2)], where [f] is a function of expressions to expressions. *) let define keyword1 f keyword2 = Action.define keyword1 (KeywordSet.singleton keyword2) (mlet [ PVar (posvar_ keyword1) ] [ f (EVar (posvar_ keyword2)) ]) (* An [ofs] keyword is expanded away. It is defined in terms of the corresponding [pos] keyword. *) let expand_ofs keyword action = match keyword with | Position (subject, where, FlavorOffset) -> define keyword (fun e -> ERecordAccess (e, "Lexing.pos_cnum")) (Position (subject, where, FlavorPosition)) action | _ -> action (* [$symbolstartpos] is expanded into a cascade of [if] constructs, modeled after [Parsing.symbol_start_pos]. *) let expand_symbolstartpos analysis producers n keyword action = match keyword with | Position (Left, WhereSymbolStart, FlavorPosition) -> let expansion, keywords = symbolstartpos analysis producers 0 n in Action.define keyword keywords (mlet [ PVar (posvar_ keyword) ] [ expansion ]) action | Position (RightNamed _, WhereSymbolStart, FlavorPosition) -> (* [$symbolstartpos(x)] does not exist. *) assert false | _ -> action (* [$startpos] and [$endpos] are expanded away. *) let expand_startend producers n keyword action = match keyword with | Position (Left, WhereStart, flavor) -> (* [$startpos] is defined as [$startpos($1)] if this production has nonzero length and [$endpos($0)] otherwise. *) define keyword (fun e -> e) ( if n > 0 then let _, x = List.hd producers in Position (RightNamed x, WhereStart, flavor) else Position (Before, WhereEnd, flavor) ) action | Position (Left, WhereEnd, flavor) -> (* [$endpos] is defined as [$endpos($n)] if this production has nonzero length and [$endpos($0)] otherwise. *) define keyword (fun e -> e) ( if n > 0 then let _, x = List.hd (List.rev producers) in Position (RightNamed x, WhereEnd, flavor) else Position (Before, WhereEnd, flavor) ) action | _ -> action (* [expand_round] performs one round of expansion on [action], using [f] as a rewriting rule. *) let expand_round f action = KeywordSet.fold f (Action.keywords action) action (* [expand_action] performs macro-expansion in [action]. We do this in several rounds: first, expand the [ofs] keywords away; then, expand [symbolstart] away; then, expand the rest. We do this in this order because each round can cause new keywords to appear, which must eliminated by the following rounds. *) let expand_action analysis producers action = let n = List.length producers in (* The [ofs] keyword family is defined in terms of the [pos] family by accessing the [pos_cnum] field. Expand these keywords away first. *) let action = expand_round expand_ofs action in (* Expand [$symbolstartpos] away. *) let action = expand_round (expand_symbolstartpos analysis producers n) action in (* Then, expand away the non-[ofs] keywords. *) let action = expand_round (expand_startend producers n) action in action (* Silently analyze the grammar so as to find out which symbols are nullable and which symbols generate a subset of {epsilon}. This is used to optimize the expansion of $symbolstartpos. *) let analysis grammar = let module G = GrammarFunctor.Make(struct let grammar = grammar let verbose = false end) in let lookup (nt : Syntax.symbol) : G.Symbol.t = try G.Symbol.lookup nt with Not_found -> assert false in let nullable nt : bool = G.Analysis.nullable_symbol (lookup nt) and epsilon nt : bool = G.TerminalSet.is_empty (G.Analysis.first_symbol (lookup nt)) in nullable, epsilon (* Put everything together. *) let expand_branch analysis branch = { branch with action = expand_action analysis branch.producers branch.action } let expand_rule analysis rule = { rule with branches = List.map (expand_branch analysis) rule.branches } let expand_grammar grammar = let analysis = analysis grammar in { grammar with rules = StringMap.map (expand_rule analysis) grammar.rules } menhir-20151112/src/unionFind.ml0000644000175000017500000001072212621170073015243 0ustar mehdimehdi(** This module implements a simple and efficient union/find algorithm. See Robert E. Tarjan, ``Efficiency of a Good But Not Linear Set Union Algorithm'', JACM 22(2), 1975. *) (** The abstraction defined by this module is a set of points, partitioned into equivalence classes. With each equivalence class, a piece of information, of abstract type ['a], is associated; we call it a descriptor. A point is implemented as a cell, whose (mutable) contents consist of a single link to either information about the equivalence class, or another point. Thus, points form a graph, which must be acyclic, and whose connected components are the equivalence classes. In every equivalence class, exactly one point has no outgoing edge, and carries information about the class instead. It is the class's representative element. Information about a class consists of an integer weight (the number of elements in the class) and of the class's descriptor. *) type 'a point = { mutable link: 'a link } and 'a link = | Info of 'a info | Link of 'a point and 'a info = { mutable weight: int; mutable descriptor: 'a } (** [fresh desc] creates a fresh point and returns it. It forms an equivalence class of its own, whose descriptor is [desc]. *) let fresh desc = { link = Info { weight = 1; descriptor = desc } } (** [repr point] returns the representative element of [point]'s equivalence class. It is found by starting at [point] and following the links. For efficiency, the function performs path compression at the same time. *) let rec repr point = match point.link with | Link point' -> let point'' = repr point' in if point'' != point' then (* [point''] is [point']'s representative element. Because we just invoked [repr point'], [point'.link] must be [Link point'']. We write this value into [point.link], thus performing path compression. Note that this function never performs memory allocation. *) point.link <- point'.link; point'' | Info _ -> point (** [find point] returns the descriptor associated with [point]'s equivalence class. *) let rec find point = (* By not calling [repr] immediately, we optimize the common cases where the path starting at [point] has length 0 or 1, at the expense of the general case. *) match point.link with | Info info | Link { link = Info info } -> info.descriptor | Link { link = Link _ } -> find (repr point) let rec change point v = match point.link with | Info info | Link { link = Info info } -> info.descriptor <- v | Link { link = Link _ } -> change (repr point) v (** [union point1 point2] merges the equivalence classes associated with [point1] and [point2] (which must be distinct) into a single class whose descriptor is that originally associated with [point2]. The fact that [point1] and [point2] do not originally belong to the same class guarantees that we do not create a cycle in the graph. The weights are used to determine whether [point1] should be made to point to [point2], or vice-versa. By making the representative of the smaller class point to that of the larger class, we guarantee that paths remain of logarithmic length (not accounting for path compression, which makes them yet smaller). *) let union point1 point2 = let point1 = repr point1 and point2 = repr point2 in assert (point1 != point2); match point1.link, point2.link with | Info info1, Info info2 -> let weight1 = info1.weight and weight2 = info2.weight in if weight1 >= weight2 then begin point2.link <- Link point1; info1.weight <- weight1 + weight2; info1.descriptor <- info2.descriptor end else begin point1.link <- Link point2; info2.weight <- weight1 + weight2 end | _, _ -> assert false (* [repr] guarantees that [link] matches [Info _]. *) (** [equivalent point1 point2] tells whether [point1] and [point2] belong to the same equivalence class. *) let equivalent point1 point2 = repr point1 == repr point2 (** [eunion point1 point2] is identical to [union], except it does nothing if [point1] and [point2] are already equivalent. *) let eunion point1 point2 = if not (equivalent point1 point2) then union point1 point2 (** [redundant] maps all members of an equivalence class, but one, to [true]. *) let redundant = function | { link = Link _ } -> true | { link = Info _ } -> false menhir-20151112/src/pprint.mli0000644000175000017500000002011512621170073014774 0ustar mehdimehdi(* ------------------------------------------------------------------------- *) (* Basic combinators for building documents. *) type document val empty: document val hardline: document val char: char -> document val substring: string -> int -> int -> document val text: string -> document val blank: int -> document val (^^): document -> document -> document val nest: int -> document -> document val column: (int -> document) -> document val nesting: (int -> document) -> document val group: document -> document val ifflat: document -> document -> document (* ------------------------------------------------------------------------- *) (* Low-level combinators for alignment and indentation. *) val align: document -> document val hang: int -> document -> document val indent: int -> document -> document (* ------------------------------------------------------------------------- *) (* High-level combinators for building documents. *) (* [break n] Puts [n] spaces in flat mode and a new line otherwise. Equivalent to: [ifflat (String.make n ' ') hardline] *) val break: int -> document (* [break0] equivalent to [break 0] *) val break0: document (* [break1] equivalent to [break 1] *) val break1: document val string: string -> document val words: string -> document val lparen: document val rparen: document val langle: document val rangle: document val lbrace: document val rbrace: document val lbracket: document val rbracket: document val squote: document val dquote: document val bquote: document val semi: document val colon: document val comma: document val space: document val dot: document val sharp: document val backslash: document val equals: document val qmark: document val tilde: document val at: document val percent: document val dollar: document val caret: document val ampersand: document val star: document val plus: document val minus: document val underscore: document val bang: document val bar: document val squotes: document -> document val dquotes: document -> document val bquotes: document -> document val braces: document -> document val parens: document -> document val angles: document -> document val brackets: document -> document val fold: (document -> document -> document) -> document list -> document val fold1: (document -> document -> document) -> document list -> document val fold1map: (document -> document -> document) -> ('a -> document) -> 'a list -> document val sepmap: document -> ('a -> document) -> 'a list -> document val optional: ('a -> document) -> 'a option -> document (* [prefix left right] Flat layout: [left] [right] Otherwise: [left] [right] *) val prefix: string -> document -> document (* [infix middle left right] Flat layout: [left] [middle] [right] Otherwise: [left] [middle] [right] *) val infix: string -> document -> document -> document (* [infix_com middle left right] Flat layout: [left][middle] [right] Otherwise: [left][middle] [right] *) val infix_com: string -> document -> document -> document (* [infix_dot middle left right] Flat layout: [left][middle][right] Otherwise: [left][middle] [right] *) val infix_dot: string -> document -> document -> document (* [surround nesting break open_doc contents close_doc] *) val surround: int -> document -> document -> document -> document -> document (* [surround1 open_txt contents close_txt] Flat: [open_txt][contents][close_txt] Otherwise: [open_txt] [contents] [close_txt] *) val surround1: string -> document -> string -> document (* [surround2 open_txt contents close_txt] Flat: [open_txt] [contents] [close_txt] Otherwise: [open_txt] [contents] [close_txt] *) val surround2: string -> document -> string -> document (* [soft_surround nesting break open_doc contents close_doc] *) val soft_surround: int -> document -> document -> document -> document -> document (* [seq indent break empty_seq open_seq sep_seq close_seq contents] *) val seq: int -> document -> document -> document -> document -> document -> document list -> document (* [seq1 open_seq sep_seq close_seq contents] Flat layout: [open_seq][contents][sep_seq]...[sep_seq][contents][close_seq] Otherwise: [open_seq] [contents][sep_seq]...[sep_seq][contents] [close_seq] *) val seq1: string -> string -> string -> document list -> document (* [seq2 open_seq sep_seq close_seq contents] Flat layout: [open_seq] [contents][sep_seq]...[sep_seq][contents] [close_seq] Otherwise: [open_seq] [contents][sep_seq]...[sep_seq][contents] [close_seq] *) val seq2: string -> string -> string -> document list -> document (* [group1 d] equivalent to [group (nest 1 d)] *) val group1: document -> document (* [group2 d] equivalent to [group (nest 2 d)] *) val group2: document -> document module Operators : sig val ( ^^ ) : document -> document -> document val ( !^ ) : string -> document val ( ^/^ ) : document -> document -> document val ( ^//^ ) : document -> document -> document val ( ^@^ ) : document -> document -> document val ( ^@@^ ) : document -> document -> document end (* ------------------------------------------------------------------------- *) (* A signature for document renderers. *) module type RENDERER = sig (* Output channels. *) type channel (* [pretty rfrac width channel document] pretty-prints the document [document] to the output channel [channel]. The parameter [width] is the maximum number of characters per line. The parameter [rfrac] is the ribbon width, a fraction relative to [width]. The ribbon width is the maximum number of non-indentation characters per line. *) val pretty: float -> int -> channel -> document -> unit (* [compact channel document] prints the document [document] to the output channel [channel]. No indentation is used. All newline instructions are respected, that is, no groups are flattened. *) val compact: channel -> document -> unit end (* ------------------------------------------------------------------------- *) (* Renderers to output channels and to memory buffers. *) module Channel : RENDERER with type channel = out_channel module Buffer : RENDERER with type channel = Buffer.t (* ------------------------------------------------------------------------- *) (* A signature for value representations. This is compatible with the associated Camlp4 generator: SwitchValueRepresentation *) module type VALUE_REPRESENTATION = sig (* The type of value representation *) type t (* [variant type_name data_constructor_name tag arguments] Given information about the variant and its arguments, this function produces a new value representation. *) val variant : string -> string -> int -> t list -> t (* [record type_name fields] Given a type name and a list of record fields, this function produces the value representation of a record. *) val record : string -> (string * t) list -> t (* [tuple arguments] Given a list of value representation this function produces a new value representation. *) val tuple : t list -> t (* ------------------------------------------------------------------------- *) (* Value representation for primitive types. *) val string : string -> t val int : int -> t val int32 : int32 -> t val int64 : int64 -> t val nativeint : nativeint -> t val float : float -> t val char : char -> t val bool : bool -> t val option : ('a -> t) -> 'a option -> t val list : ('a -> t) -> 'a list -> t val array : ('a -> t) -> 'a array -> t val ref : ('a -> t) -> 'a ref -> t (* Value representation for any other value. *) val unknown : string -> 'a -> t end (* A signature for source printers. *) module type DOCUMENT_VALUE_REPRESENTATION = VALUE_REPRESENTATION with type t = document module ML : DOCUMENT_VALUE_REPRESENTATION (* Deprecated *) val line: document val linebreak: document val softline: document val softbreak: document menhir-20151112/src/dot.ml0000644000175000017500000000610712621170073014102 0ustar mehdimehdiopen Printf (* ------------------------------------------------------------------------- *) (* Type definitions. *) type size = float * float (* in inches *) type orientation = | Portrait | Landscape type rankdir = | LeftToRight | TopToBottom type ratio = | Compress | Fill | Auto type style = (* Both nodes and edges. *) | Solid | Dashed | Dotted | Bold | Invisible (* Nodes only. *) | Filled | Diagonals | Rounded type shape = | Box | Oval | Circle | DoubleCircle (* there are many others, let's stop here *) (* ------------------------------------------------------------------------- *) (* Basic printers. *) let print_style = function | None -> "" | Some style -> let style = match style with | Solid -> "solid" | Dashed -> "dashed" | Dotted -> "dotted" | Bold -> "bold" | Invisible -> "invis" | Filled -> "filled" | Diagonals -> "diagonals" | Rounded -> "rounded" in sprintf ", style = %s" style let print_shape = function | None -> "" | Some shape -> let shape = match shape with | Box -> "box" | Oval -> "oval" | Circle -> "circle" | DoubleCircle -> "doublecircle" in sprintf ", shape = %s" shape (* ------------------------------------------------------------------------- *) (* The graph printer. *) module Print (G : sig type vertex val name: vertex -> string val successors: (?style:style -> label:string -> vertex -> unit) -> vertex -> unit val iter: (?shape:shape -> ?style:style -> label:string -> vertex -> unit) -> unit end) = struct let print ?(directed = true) ?size ?(orientation = Landscape) ?(rankdir = LeftToRight) ?(ratio = Compress) (f : out_channel) = fprintf f "%s G {\n" (if directed then "digraph" else "graph"); Option.iter (fun (hsize, vsize) -> fprintf f "size=\"%f, %f\";\n" hsize vsize ) size; begin match orientation with | Portrait -> fprintf f "orientation = portrait;\n" | Landscape -> fprintf f "orientation = landscape;\n" end; begin match rankdir with | LeftToRight -> fprintf f "rankdir = LR;\n" | TopToBottom -> fprintf f "rankdir = TB;\n" end; begin match ratio with | Compress -> fprintf f "ratio = compress;\n" | Fill -> fprintf f "ratio = fill;\n" | Auto -> fprintf f "ratio = auto;\n" end; G.iter (fun ?shape ?style ~label vertex -> fprintf f "%s [ label=\"%s\"%s%s ] ;\n" (G.name vertex) label (print_style style) (print_shape shape) ); G.iter (fun ?shape ?style ~label source -> ignore shape; (* avoid unused variable warnings *) ignore style; ignore label; G.successors (fun ?style ~label destination -> fprintf f "%s %s %s [ label=\"%s\"%s ] ;\n" (G.name source) (if directed then "->" else "--") (G.name destination) label (print_style style) ) source ); fprintf f "\n}\n" end menhir-20151112/src/InspectionTableFormat.ml0000644000175000017500000000612212621170074017546 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This signature defines the format of the tables that are produced (in addition to the tables described in [TableFormat]) when the command line switch [--inspection] is enabled. It is used as an argument to [InspectionTableInterpreter.Make]. *) module type TABLES = sig (* The types of symbols. *) include IncrementalEngine.SYMBOLS (* The type ['a lr1state] describes an LR(1) state. The generated parser defines it internally as [int]. *) type 'a lr1state (* Some of the tables that follow use encodings of (terminal and nonterminal) symbols as integers. So, we need functions that map the integer encoding of a symbol to its algebraic encoding. *) val terminal: int -> xsymbol val nonterminal: int -> xsymbol (* The left-hand side of every production already appears in the signature [TableFormat.TABLES], so we need not repeat it here. *) (* The right-hand side of every production. This a linearized array of arrays of integers, whose [data] and [entry] components have been packed. The encoding of symbols as integers in described in [TableBackend]. *) val rhs: PackedIntArray.t * PackedIntArray.t (* A mapping of every (non-initial) state to its LR(0) core. *) val lr0_core: PackedIntArray.t (* A mapping of every LR(0) state to its set of LR(0) items. Each item is represented in its packed form (see [Item]) as an integer. Thus the mapping is an array of arrays of integers, which is linearized and packed, like [rhs]. *) val lr0_items: PackedIntArray.t * PackedIntArray.t (* A mapping of every LR(0) state to its incoming symbol, if it has one. *) val lr0_incoming: PackedIntArray.t (* A table that tells which non-terminal symbols are nullable. *) val nullable: string (* This is a packed int array of bit width 1. It can be read using [PackedIntArray.get1]. *) (* A two-table dimensional table, indexed by a nonterminal symbol and by a terminal symbol (other than [#]), encodes the FIRST sets. *) val first: int (* width of the bitmap *) * string (* second component of [PackedIntArray.t] *) end menhir-20151112/src/tableBackend.ml0000644000175000017500000007074712621170073015666 0ustar mehdimehdiopen CodeBits open Grammar open IL open Interface open Printf open TokenType open NonterminalType open CodePieces module Run (T : sig end) = struct (* ------------------------------------------------------------------------ *) (* Conventional names for modules, exceptions, record fields, functions. *) let menhirlib = "MenhirLib" let make_engine = menhirlib ^ ".TableInterpreter.Make" let make_symbol = menhirlib ^ ".InspectionTableInterpreter.Symbols" let make_inspection = menhirlib ^ ".InspectionTableInterpreter.Make" let engineTypes = menhirlib ^ ".EngineTypes" let field x = engineTypes ^ "." ^ x let fstate = field "state" let fsemv = field "semv" let fstartp = field "startp" let fendp = field "endp" let fnext = field "next" let fstack = field "stack" let fcurrent = field "current" let entry = interpreter ^ ".entry" let start = interpreter ^ ".start" let staticVersion = menhirlib ^ ".StaticVersion" (* The following are names of internal sub-modules. *) let basics = "Basics" let tables = "Tables" let symbols = "Symbols" let ti = "TI" (* ------------------------------------------------------------------------ *) (* Statistics. *) (* Integer division, rounded up. *) let div a b = if a mod b = 0 then a / b else a / b + 1 (* [size] provides a rough measure of the size of its argument, in words. The [unboxed] parameter is true if we have already counted 1 for the pointer to the object. *) let rec size unboxed = function | EIntConst _ | ETuple [] | EData (_, []) -> if unboxed then 0 else 1 | EStringConst s -> 1 + div (String.length s * 8) Sys.word_size | ETuple es | EData (_, es) | EArray es -> 1 + List.length es + List.fold_left (fun s e -> s + size true e) 0 es | _ -> assert false (* not implemented *) let size = size false (* Optionally, print a measure of each of the tables that we are defining. *) let define (name, expr) = { valpublic = true; valpat = PVar name; valval = expr } let define_and_measure (x, e) = Error.logC 1 (fun f -> fprintf f "The %s table occupies roughly %d bytes.\n" x (size e * (Sys.word_size / 8)) ); define (x, e) (* ------------------------------------------------------------------------ *) (* Code generation for semantic actions. *) (* The functions [reducecellparams] and [reducebody] are adpated from [CodeBackend]. *) (* Things are slightly more regular here than in the code-based back-end, since there is no optimization: every stack cell has the same structure and holds a state, a semantic value, and a pair of positions. Because every semantic value is represented, we do not have a separate [unitbindings]. *) (* [reducecellparams] constructs a pattern that describes the contents of a stack cell. If this is the bottom cell, the variable [state] is bound to the state found in the cell. If [ids.(i)] is used in the semantic action, then it is bound to the semantic value. The position variables are always bound. *) let reducecellparams prod i _symbol (next : pattern) : pattern = let ids = Production.identifiers prod in PRecord [ fstate, (if i = 0 then PVar state else PWildcard); fsemv, PVar ids.(i); fstartp, PVar (Printf.sprintf "_startpos_%s_" ids.(i)); fendp, PVar (Printf.sprintf "_endpos_%s_" ids.(i)); fnext, next; ] (* The semantic values bound in [reducecellparams] have type [Obj.t]. They should now be cast to their real type. If we had [PMagic] in the syntax of patterns, we could do that in one swoop; since we don't, we have to issue a series of casts a posteriori. *) let reducecellcasts prod i symbol casts = let ids = Production.identifiers prod in let id = ids.(i) in let t : typ = match semvtype symbol with | [] -> tunit | [ t ] -> t | _ -> assert false in (* Cast: [let id = ((Obj.magic id) : t) in ...]. *) ( PVar id, EAnnot (EMagic (EVar id), type2scheme t) ) :: casts (* 2015/11/04. The start and end positions of an epsilon production are obtained by taking the end position stored in the top stack cell (whatever it is). *) let endpos_of_top_stack_cell = ERecordAccess(EVar stack, fendp) (* This is the body of the [reduce] function associated with production [prod]. It assumes that the variables [env] and [stack] have been bound. *) let reducebody prod = let nt, _rhs = Production.def prod and ids = Production.identifiers prod and length = Production.length prod in (* Build a pattern that represents the shape of the stack. Out of the stack, we extract a state (except when the production is an epsilon production) and a number of semantic values. *) (* At the same time, build a series of casts. *) let (_ : int), pat, casts = Invariant.fold (fun (i, pat, casts) (_ : bool) symbol _ -> i + 1, reducecellparams prod i symbol pat, reducecellcasts prod i symbol casts ) (0, PVar stack, []) (Invariant.prodstack prod) in (* Determine beforeend/start/end positions for the left-hand side of the production, and bind them to the conventional variables [beforeendp], [startp], and [endp]. These variables may be unused by the semantic action, in which case these bindings are dead code and can be ignored by the OCaml compiler. *) let posbindings = ( PVar beforeendp, endpos_of_top_stack_cell ) :: ( PVar startp, if length > 0 then EVar (Printf.sprintf "_startpos_%s_" ids.(0)) else endpos_of_top_stack_cell ) :: ( PVar endp, if length > 0 then EVar (Printf.sprintf "_endpos_%s_" ids.(length - 1)) else EVar startp ) :: [] in (* This cannot be one of the start productions. *) assert (not (Production.is_start prod)); (* This is a regular production. Perform a reduction. *) let action = Production.action prod in let act = EAnnot (Action.to_il_expr action, type2scheme (semvtypent nt)) in EComment ( Production.print prod, blet ( (pat, EVar stack) :: (* destructure the stack *) casts @ (* perform type casts *) posbindings @ (* bind [startp] and [endp] *) [ PVar semv, act ], (* run the user's code and bind [semv] *) (* Return a new stack, onto which we have pushed a new stack cell. *) ERecord [ (* the new stack cell *) fstate, EVar state; (* the current state after popping; it will be updated by [goto] *) fsemv, ERepr (EVar semv); (* the newly computed semantic value *) fstartp, EVar startp; (* the newly computed start and end positions *) fendp, EVar endp; fnext, EVar stack; (* this is the stack after popping *) ] ) ) let semantic_action prod = EFun ( [ PVar env ], if Invariant.ever_reduced prod then (* Access the stack and current state via the environment. *) (* In fact, the current state needs be bound here only if this is an epsilon production. Otherwise, the variable [state] will be bound by the pattern produced by [reducecellparams] above. *) ELet ( [ PVar stack, ERecordAccess (EVar env, fstack) ] @ (if Production.length prod = 0 then [ PVar state, ERecordAccess (EVar env, fcurrent) ] else []), (* Then, *) reducebody prod ) else (* For productions that are never reduced, generate no code. *) (* We do this mainly because [Invariant.prodstack] does not support productions that are never reduced. *) EComment ( "a production never reduced", EApp (EVar "assert", [ EData ("false", []) ]) ) ) (* Export the number of start productions. *) let start_def = define ( "start", EIntConst Production.start ) (* ------------------------------------------------------------------------ *) (* Table encodings. *) (* Encodings of entries in the default reduction table. *) let encode_DefRed prod = (* 1 + prod *) 1 + Production.p2i prod let encode_NoDefRed = (* 0 *) 0 (* Encodings of entries in the action table. *) let encode_Reduce prod = (* prod | 01 *) (Production.p2i prod lsl 2) lor 1 let encode_ShiftDiscard s = (* s | 10 *) ((Lr1.number s) lsl 2) lor 0b10 let encode_ShiftNoDiscard s = (* s | 11 *) ((Lr1.number s) lsl 2) lor 0b11 let encode_Fail = (* 00 *) 0 (* Encodings of entries in the goto table. *) let encode_Goto node = (* 1 + node *) 1 + Lr1.number node let encode_NoGoto = (* 0 *) 0 (* Encodings of the hole in the action and goto tables. *) let hole = assert (encode_Fail = 0); assert (encode_NoGoto = 0); 0 (* Encodings of entries in the error bitmap. *) let encode_Error = (* 0 *) 0 let encode_NoError = (* 1 *) 1 (* Encodings of terminal and nonterminal symbols in the production table. *) let encode_no_symbol = 0 (* 0 | 0 *) let encode_terminal tok = (Terminal.t2i tok + 1) lsl 1 (* t + 1 | 0 *) let encode_nonterminal nt = ((Nonterminal.n2i nt) lsl 1) lor 1 (* nt | 1 *) let encode_symbol = function | Symbol.T tok -> encode_terminal tok | Symbol.N nt -> encode_nonterminal nt let encode_symbol_option = function | None -> encode_no_symbol | Some symbol -> encode_symbol symbol (* Encoding a Boolean as an integer value. *) let encode_bool b = if b then 1 else 0 (* ------------------------------------------------------------------------ *) (* Table compression. *) (* Our sparse, two-dimensional tables are turned into one-dimensional tables via [RowDisplacement]. *) (* The error bitmap, which is two-dimensional but not sparse, is made one-dimensional by simple flattening. *) (* Every one-dimensional table is then packed via [PackedIntArray]. *) (* Optionally, we print some information about the compression ratio. *) (* [population] counts the number of significant entries in a two-dimensional matrix. *) let population (matrix : int array array) = Array.fold_left (fun population row -> Array.fold_left (fun population entry -> if entry = hole then population else population + 1 ) population row ) 0 matrix (* [marshal1] marshals a one-dimensional array. *) let marshal1 (table : int array) = let (bits : int), (text : string) = MenhirLib.PackedIntArray.pack table in ETuple [ EIntConst bits; EStringConst text ] (* [marshal11] marshals a one-dimensional array whose bit width is statically known to be [1]. *) let marshal11 (table : int array) = let (bits : int), (text : string) = MenhirLib.PackedIntArray.pack table in assert (bits = 1); EStringConst text (* List-based versions of the above functions. *) let marshal1_list (table : int list) = marshal1 (Array.of_list table) let marshal11_list (table : int list) = marshal11 (Array.of_list table) (* [linearize_and_marshal1] marshals an array of integer arrays (of possibly different lengths). *) let linearize_and_marshal1 (table : int array array) = let data, entry = MenhirLib.LinearizedArray.make table in ETuple [ marshal1 data; marshal1 entry ] (* [flatten_and_marshal11_list] marshals a two-dimensional bitmap, whose width (for now) is assumed to be [Terminal.n - 1]. *) let flatten_and_marshal11_list (table : int list list) = ETuple [ (* Store the table width. *) EIntConst (Terminal.n - 1); (* View the table as a one-dimensional array, and marshal it. *) marshal11_list (List.flatten table) ] (* [marshal2] marshals a two-dimensional table, with row displacement. *) let marshal2 name m n (matrix : int list list) = let matrix : int array array = Array.of_list (List.map Array.of_list matrix) in let (displacement : int array), (data : int array) = MenhirLib.RowDisplacement.compress (=) (fun x -> x = hole) hole m n matrix in Error.logC 1 (fun f -> fprintf f "The %s table is %d entries; %d non-zero; %d compressed.\n" name (m * n) (population matrix) (Array.length displacement + Array.length data) ); ETuple [ marshal1 displacement; marshal1 data; ] (* ------------------------------------------------------------------------ *) (* Table generation. *) (* The action table. *) let action node t = match Invariant.has_default_reduction node with | Some _ -> (* [node] has a default reduction; in that case, the action table is never looked up. *) hole | None -> try let target = SymbolMap.find (Symbol.T t) (Lr1.transitions node) in (* [node] has a transition to [target]. If [target] has a default reduction on [#], use [ShiftNoDiscard], otherwise [ShiftDiscard]. *) match Invariant.has_default_reduction target with | Some (_, toks) when TerminalSet.mem Terminal.sharp toks -> assert (TerminalSet.cardinal toks = 1); encode_ShiftNoDiscard target | _ -> encode_ShiftDiscard target with Not_found -> try (* [node] has a reduction. *) let prod = Misc.single (TerminalMap.find t (Lr1.reductions node)) in encode_Reduce prod with Not_found -> (* [node] has no action. *) encode_Fail (* In the error bitmap and in the action table, the row that corresponds to the [#] pseudo-terminal is never accessed. Thus, we do not create this row. This does not create a gap in the table, because this is the right-most row. For sanity, we check this fact here. *) let () = assert (Terminal.t2i Terminal.sharp = Terminal.n - 1) (* The goto table. *) let goto node nt = try let target = SymbolMap.find (Symbol.N nt) (Lr1.transitions node) in encode_Goto target with Not_found -> encode_NoGoto (* The error bitmap reflects which entries in the action table are [Fail]. Like the action table, it is not accessed when [node] has a default reduction. *) let error node t = if action node t = encode_Fail then encode_Error else encode_NoError (* The default reductions table. *) let default_reduction node = match Invariant.has_default_reduction node with | Some (prod, _) -> encode_DefRed prod | None -> encode_NoDefRed (* Generate the table definitions. *) let action = define_and_measure ( "action", marshal2 "action" Lr1.n (Terminal.n - 1) ( Lr1.map (fun node -> Terminal.mapx (fun t -> action node t ) ) ) ) let goto = define_and_measure ( "goto", marshal2 "goto" Lr1.n Nonterminal.n ( Lr1.map (fun node -> Nonterminal.map (fun nt -> goto node nt ) ) ) ) let error = define_and_measure ( "error", flatten_and_marshal11_list ( Lr1.map (fun node -> Terminal.mapx (fun t -> error node t ) ) ) ) let default_reduction = define_and_measure ( "default_reduction", marshal1_list ( Lr1.map (fun node -> default_reduction node ) ) ) let lhs = define_and_measure ( "lhs", marshal1 ( Production.amap (fun prod -> Nonterminal.n2i (Production.nt prod) ) ) ) let semantic_action = define ( "semantic_action", (* Non-start productions only. *) EArray (Production.mapx semantic_action) ) (* ------------------------------------------------------------------------ *) (* When [--trace] is enabled, we need tables that map terminals and productions to strings. *) let stringwrap f x = EStringConst (f x) let reduce_or_accept prod = match Production.classify prod with | Some _ -> "Accepting" | None -> "Reducing production " ^ (Production.print prod) let trace = define_and_measure ( "trace", if Settings.trace then EData ("Some", [ ETuple [ EArray (Terminal.map (stringwrap Terminal.print)); EArray (Production.map (stringwrap reduce_or_accept)); ] ]) else EData ("None", []) ) (* ------------------------------------------------------------------------ *) (* Generate the two functions that map a token to its integer code and to its semantic value, respectively. *) let token2terminal = destructuretokendef "token2terminal" tint false (fun tok -> EIntConst (Terminal.t2i tok)) let token2value = destructuretokendef "token2value" tobj true (fun tok -> ERepr ( match Terminal.ocamltype tok with | None -> EUnit | Some _ -> EVar semv ) ) (* ------------------------------------------------------------------------ *) (* The client APIs invoke the interpreter with an appropriate start state. The monolithic API calls [entry] (see [Engine]), while the incremental API calls [start]. *) (* An entry point to the monolithic API. *) let monolithic_entry_point state nt t = define ( Nonterminal.print true nt, let lexer = "lexer" and lexbuf = "lexbuf" in EFun ( [ PVar lexer; PVar lexbuf ], EAnnot ( EMagic ( EApp ( EVar entry, [ EIntConst (Lr1.number state); EVar lexer; EVar lexbuf ] ) ), type2scheme (TypTextual t) ) ) ) (* The whole monolithic API. *) let monolithic_api : IL.valdef list = Lr1.fold_entry (fun _prod state nt t api -> monolithic_entry_point state nt t :: api ) [] (* An entry point to the incremental API. *) let incremental_entry_point state nt t = let initial = "initial_position" in define ( Nonterminal.print true nt, (* In principle the eta-expansion [fun initial_position -> start s initial_position] should not be necessary, since [start] is a pure function. However, when [--trace] is enabled, [start] will log messages to the standard error channel. *) EFun ( [ PVar initial ], EAnnot ( EMagic ( EApp ( EVar start, [ EIntConst (Lr1.number state); EVar initial; ] ) ), type2scheme (checkpoint (TypTextual t)) ) ) ) (* The whole incremental API. *) let incremental_api : IL.valdef list = Lr1.fold_entry (fun _prod state nt t api -> incremental_entry_point state nt t :: api ) [] (* ------------------------------------------------------------------------ *) (* Constructing representations of symbols. *) (* [eterminal t] is a value of type ['a terminal] (for some ['a]) that encodes the terminal symbol [t]. It is just a data constructor of the terminal GADT. *) let eterminal (t : Terminal.t) : expr = EData (tokengadtdata (Terminal.print t), []) (* [enonterminal nt] is a value of type ['a nonterminal] (for some ['a]) that encodes the nonterminal symbol [nt]. It is just a data constructor of the nonterminal GADT. *) let enonterminal (nt : Nonterminal.t) : expr = EData (tnonterminalgadtdata (Nonterminal.print false nt), []) (* [esymbol symbol] is a value of type ['a symbol] (for some ['a]) that encodes the symbol [symbol]. It is built by applying the injection [T] or [N] to the terminal or nonterminal encoding. *) let dataT = "T" let dataN = "N" let esymbol (symbol : Symbol.t) : expr = match symbol with | Symbol.T t -> EData (dataT, [ eterminal t ]) | Symbol.N nt -> EData (dataN, [ enonterminal nt ]) (* [xsymbol symbol] is a value of type [xsymbol] that encodes the symbol [symbol]. It is built by applying the injection [X] (an existential quantifier) to [esymbol symbol]. *) let dataX = "X" let xsymbol (symbol : Symbol.t) : expr = EData (dataX, [ esymbol symbol ]) (* ------------------------------------------------------------------------ *) (* Produce a function that maps a terminal symbol (represented as an integer code) to its representation as an [xsymbol]. Include [error] but not [#], i.e., include all of the symbols which can appear in a production. *) (* Note that, instead of generating a function, we could (a) use an array or (b) use an unsafe conversion of an integer to a data constructor, then wrap it using [X] and [T/N]. Approach (b) is unsafe and causes memory allocation (due to the wrapping) at each call. *) let terminal () = assert Settings.inspection; let t = "t" in define ( "terminal", EFun ([ PVar t ], EMatch (EVar t, Terminal.mapx (fun tok -> { branchpat = pint (Terminal.t2i tok); branchbody = xsymbol (Symbol.T tok) } ) @ [ { branchpat = PWildcard; branchbody = EComment ("This terminal symbol does not exist.", EApp (EVar "assert", [ efalse ]) ) } ] ) ) ) (* ------------------------------------------------------------------------ *) (* Produce a function that maps a (non-start) nonterminal symbol (represented as an integer code) to its representation as an [xsymbol]. *) let nonterminal () = assert Settings.inspection; let nt = "nt" in define ( "nonterminal", EFun ([ PVar nt ], EMatch (EVar nt, Nonterminal.foldx (fun nt branches -> { branchpat = pint (Nonterminal.n2i nt); branchbody = xsymbol (Symbol.N nt) } :: branches ) [ { branchpat = PWildcard; branchbody = EComment ("This nonterminal symbol does not exist.", EApp (EVar "assert", [ efalse ]) ) } ] ) ) ) (* ------------------------------------------------------------------------ *) (* Produce a mapping of every LR(0) state to its incoming symbol (encoded as an integer value). (Note that the initial states do not have one.) *) let lr0_incoming () = assert Settings.inspection; define_and_measure ( "lr0_incoming", marshal1 (Array.init Lr0.n (fun node -> encode_symbol_option (Lr0.incoming_symbol node) )) ) (* ------------------------------------------------------------------------ *) (* A table that maps a production (i.e., an integer index) to the production's right-hand side. In principle, we use this table for ordinary productions only, as opposed to the start productions, whose existence is not exposed to the user. However, it is simpler (and not really costly) to include all productions in this table. *) let rhs () = assert Settings.inspection; let productions : int array array = Production.amap (fun prod -> Array.map encode_symbol (Production.rhs prod) ) in define_and_measure ( "rhs", linearize_and_marshal1 productions ) (* ------------------------------------------------------------------------ *) (* A table that maps an LR(1) state to its LR(0) core. *) let lr0_core () = assert Settings.inspection; define_and_measure ( "lr0_core", marshal1_list (Lr1.map (fun (node : Lr1.node) -> Lr0.core (Lr1.state node) )) ) (* A table that maps an LR(0) state to a set of LR(0) items. *) let lr0_items () = assert Settings.inspection; let items : int array array = Array.init Lr0.n (fun node -> Array.map Item.marshal (Array.of_list (Item.Set.elements (Lr0.items node))) ) in define_and_measure ( "lr0_items", linearize_and_marshal1 items ) (* ------------------------------------------------------------------------ *) (* A table that tells which nonterminal symbols are nullable. (For simplicity, this table includes the start symbols.) *) let nullable () = assert Settings.inspection; define_and_measure ( "nullable", marshal11_list ( Nonterminal.map (fun nt -> encode_bool (Analysis.nullable nt) ) ) ) (* ------------------------------------------------------------------------ *) (* A two-dimensional bitmap, indexed first by nonterminal symbols, then by terminal symbols, encodes the FIRST sets. *) let first () = assert Settings.inspection; define_and_measure ( "first", flatten_and_marshal11_list ( Nonterminal.map (fun nt -> Terminal.mapx (fun t -> encode_bool (TerminalSet.mem t (Analysis.first nt)) ) ) ) ) (* ------------------------------------------------------------------------ *) (* A reference to [MenhirLib.StaticVersion.require_XXXXXXXX], where [XXXXXXXX] is our 8-digit version number. This ensures that the generated code can be linked only with an appropriate version of MenhirLib. This is important because we use unsafe casts, and a version mismatch could cause a crash. *) let versiondef = { valpublic = true; valpat = PUnit; valval = EVar (staticVersion ^ ".require_" ^ Version.version); } (* ------------------------------------------------------------------------ *) (* Let's put everything together. *) open UnparameterizedSyntax let grammar = Front.grammar let program = [ SIFunctor (grammar.parameters, (* Make a reference to [MenhirLib.StaticVersion.require_XXXXXXXX], where [XXXXXXXX] is our 8-digit version number. This ensures that the generated code can be linked only with an appropriate version of MenhirLib. This is important because we use unsafe casts, and a version mismatch could cause a crash. *) SIComment "This generated code requires the following version of MenhirLib:" :: SIValDefs (false, [ versiondef ]) :: (* Define the internal sub-module [basics], which contains the definitions of the exception [Error] and of the type [token]. Then, include this sub-module. This sub-module is used again below, as part of the application of the functor [TableInterpreter.Make]. *) SIModuleDef (basics, MStruct ( SIExcDefs [ excdef ] :: interface_to_structure ( tokentypedef grammar ) )) :: SIInclude (MVar basics) :: SIValDefs (false, [ excvaldef ]) :: (* In order to avoid hiding user-defined identifiers, only the exception [Error] and the type [token] should be defined (at top level, with non-mangled names) above this line. We also define the value [_eRR] above this line so that we do not have a problem if a user prelude hides the name [Error]. *) SIStretch grammar.preludes :: (* Define the tables. *) SIModuleDef (tables, MStruct [ (* The internal sub-module [basics] contains the definitions of the exception [Error] and of the type [token]. *) SIInclude (MVar basics); (* This is a non-recursive definition, so none of the names defined here are visible in the semantic actions. *) SIValDefs (false, [ token2terminal; define ("error_terminal", EIntConst (Terminal.t2i Terminal.error)); token2value; default_reduction; error; start_def; action; lhs; goto; semantic_action; trace; ]) ] ) :: SIModuleDef (interpreter, MStruct ( (* Apply the functor [TableInterpreter.Make] to the tables. *) SIModuleDef (ti, MApp (MVar make_engine, MVar tables)) :: SIInclude (MVar ti) :: listiflazy Settings.inspection (fun () -> (* Define the internal sub-module [symbols], which contains type definitions. Then, include this sub-module. This sub-module is used again below, as part of the application of the functor [TableInterpreter.MakeInspection]. *) SIModuleDef (symbols, MStruct ( interface_to_structure ( tokengadtdef grammar @ nonterminalgadtdef grammar ) )) :: SIInclude (MVar symbols) :: SIInclude (MApp (MApp (MVar make_inspection, MVar tables), MStruct ( (* This module must satisfy [InspectionTableFormat.TABLES]. *) (* [lr1state] *) SIInclude (MVar ti) :: (* [terminal], [nonterminal]. *) SIInclude (MVar symbols) :: (* This functor application builds the types [symbol] and [xsymbol] in terms of the types [terminal] and [nonterminal]. This saves us the trouble of generating these definitions. *) SIInclude (MApp (MVar make_symbol, MVar symbols)) :: SIValDefs (false, terminal() :: nonterminal() :: lr0_incoming() :: rhs() :: lr0_core() :: lr0_items() :: nullable() :: first() :: [] ) :: [] ))) :: [] ) )) :: SIValDefs (false, monolithic_api) :: SIModuleDef (incremental, MStruct [ SIValDefs (false, incremental_api) ]) :: SIStretch grammar.postludes :: [])] let () = Time.tick "Producing abstract syntax" end menhir-20151112/src/inliner.mli0000644000175000017500000000047612621170073015130 0ustar mehdimehdi(* This transformer inlines every function that is called at most once. It also inlines some functions whose body consists of a single function call. At the same time, every function that is never called is dropped. Public functions are never inlined or dropped. *) val inline: IL.program -> IL.program menhir-20151112/src/Fix.mli0000644000175000017500000001007312621170073014210 0ustar mehdimehdi(**************************************************************************) (* *) (* Fix *) (* *) (* Author: François Pottier, INRIA Paris-Rocquencourt *) (* Version: 20101206 *) (* *) (* The copyright to this code is held by Institut National de Recherche *) (* en Informatique et en Automatique (INRIA). All rights reserved. This *) (* file is distributed under the license CeCILL-C (see file LICENSE). *) (* *) (**************************************************************************) (* This code is described in the paper ``Lazy Least Fixed Points in ML''. *) (* -------------------------------------------------------------------------- *) (* Maps. *) (* We require imperative maps, that is, maps that can be updated in place. An implementation of persistent maps, such as the one offered by ocaml's standard library, can easily be turned into an implementation of imperative maps, so this is a weak requirement. *) module type IMPERATIVE_MAPS = sig type key type 'data t val create: unit -> 'data t val clear: 'data t -> unit val add: key -> 'data -> 'data t -> unit val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* -------------------------------------------------------------------------- *) (* Properties. *) (* Properties must form a partial order, equipped with a least element, and must satisfy the ascending chain condition: every monotone sequence eventually stabilizes. *) (* [is_maximal] determines whether a property [p] is maximal with respect to the partial order. Only a conservative check is required: in any event, it is permitted for [is_maximal p] to return [false]. If [is_maximal p] returns [true], then [p] must have no upper bound other than itself. In particular, if properties form a lattice, then [p] must be the top element. This feature, not described in the paper, enables a couple of minor optimizations. *) module type PROPERTY = sig type property val bottom: property val equal: property -> property -> bool val is_maximal: property -> bool end (* -------------------------------------------------------------------------- *) (* The code is parametric in an implementation of maps over variables and in an implementation of properties. *) module Make (M : IMPERATIVE_MAPS) (P : PROPERTY) : sig type variable = M.key type property = P.property (* A valuation is a mapping of variables to properties. *) type valuation = variable -> property (* A right-hand side, when supplied with a valuation that gives meaning to its free variables, evaluates to a property. More precisely, a right-hand side is a monotone function of valuations to properties. *) type rhs = valuation -> property (* A system of equations is a mapping of variables to right-hand sides. *) type equations = variable -> rhs (* [lfp eqs] produces the least solution of the system of monotone equations [eqs]. *) (* It is guaranteed that, for each variable [v], the application [eqs v] is performed at most once (whereas the right-hand side produced by this application is, in general, evaluated multiple times). This guarantee can be used to perform costly pre-computation, or memory allocation, when [eqs] is applied to its first argument. *) (* When [lfp] is applied to a system of equations [eqs], it performs no actual computation. It produces a valuation, [get], which represents the least solution of the system of equations. The actual fixed point computation takes place, on demand, when [get] is applied. *) val lfp: equations -> valuation end menhir-20151112/src/item.mli0000644000175000017500000000403712621170073014423 0ustar mehdimehdiopen Grammar (* An LR(0) item encodes a pair of integers, namely the index of the production and the index of the bullet in the production's right-hand side. *) type t val import: Production.index * int -> t val export: t -> Production.index * int (* An item can be encoded as an integer. This is used in the table back-end only. The decoding function (really a copy of [export]) is in [TableInterpreter]. *) val marshal: t -> int (* Comparison. *) val equal: t -> t -> bool (* [def item] looks up the production associated with this item in the grammar and returns [prod, nt, rhs, pos, length], where [prod] is the production's index, [nt] and [rhs] represent the production, [pos] is the position of the bullet in the item, and [length] is the length of the production's right-hand side. *) val def: t -> Production.index * Nonterminal.t * Symbol.t array * int * int (* If [item] is a start item, [startnt item] returns the start nonterminal that corresponds to [item]. *) val startnt: t -> Nonterminal.t (* Printing. *) val print: t -> string (* Classifying items as shift or reduce items. A shift item is one where the bullet can still advance. A reduce item is one where the bullet has reached the end of the right-hand side. *) type kind = | Shift of Symbol.t * t | Reduce of Production.index val classify: t -> kind (* Sets of items and maps over items. Hashing these data structures is specifically allowed. *) module Set : GSet.S with type element = t module Map : GMap.S with type key = t and type Domain.t = Set.t (* This functor performs precomputation that helps efficiently compute the closure of an LR(0) or LR(1) state. The precomputation requires time linear in the size of the grammar. The nature of the lookahead sets remains abstract. *) module Closure (L : Lookahead.S) : sig (* A state maps items to lookahead information. *) type state = L.t Map.t (* This takes the closure of a state through all epsilon transitions. *) val closure: state -> state end menhir-20151112/src/RowDisplacement.mli0000644000175000017500000000510212621170074016560 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This module compresses a two-dimensional table, where some values are considered insignificant, via row displacement. *) (* A compressed table is represented as a pair of arrays. The displacement array is an array of offsets into the data array. *) type 'a table = int array * (* displacement *) 'a array (* data *) (* [compress equal insignificant dummy m n t] turns the two-dimensional table [t] into a compressed table. The parameter [equal] is equality of data values. The parameter [wildcard] tells which data values are insignificant, and can thus be overwritten with other values. The parameter [dummy] is used to fill holes in the data array. [m] and [n] are the integer dimensions of the table [t]. *) val compress: ('a -> 'a -> bool) -> ('a -> bool) -> 'a -> int -> int -> 'a array array -> 'a table (* [get ct i j] returns the value found at indices [i] and [j] in the compressed table [ct]. This function call is permitted only if the value found at indices [i] and [j] in the original table is significant -- otherwise, it could fail abruptly. *) (* Together, [compress] and [get] have the property that, if the value found at indices [i] and [j] in an uncompressed table [t] is significant, then [get (compress t) i j] is equal to that value. *) val get: 'a table -> int -> int -> 'a (* [getget] is a variant of [get] which only requires read access, via accessors, to the two components of the table. *) val getget: ('displacement -> int -> int) -> ('data -> int -> 'a) -> 'displacement * 'data -> int -> int -> 'a menhir-20151112/src/conflict.mli0000644000175000017500000000021212621170073015255 0ustar mehdimehdi(* This module explains conflicts. Explanations are written to the .conflicts file. No functionality is offered by this module. *) menhir-20151112/src/codeBits.ml0000644000175000017500000001047312621170073015051 0ustar mehdimehdi(* This module provides a number of tiny functions that help produce [IL] code. *) open IL (* A list subject to a condition. *) let listif condition xs = if condition then xs else [] let elementif condition x = if condition then [ x ] else [] let listiflazy condition xs = if condition then xs() else [] (* The unit type. *) let tunit = TypApp ("unit", []) (* The Boolean type. *) let tbool = TypApp ("bool", []) (* The integer type. *) let tint = TypApp ("int", []) (* The string type. *) let tstring = TypApp ("string", []) (* The exception type. *) let texn = TypApp ("exn", []) (* The type of lexer positions. *) let tposition = TypApp ("Lexing.position", []) (* The type of lexer buffers. *) let tlexbuf = TypApp ("Lexing.lexbuf", []) (* The type of untyped semantic values. *) let tobj = TypApp ("Obj.t", []) (* Building a type variable. *) let tvar x : typ = TypVar x (* Building a type scheme. *) let scheme qs t = { quantifiers = qs; body = t } (* Building a type scheme with no quantifiers out of a type. *) let type2scheme t = scheme [] t let pat2var = function | PVar x -> x | _ -> assert false (* [simplify] removes bindings of the form [let v = v in ...] and [let _ = v in ...]. *) let rec simplify = function | [] -> [] | (PVar v1, EVar v2) :: bindings when v1 = v2 -> (* Avoid a useless let binding. *) simplify bindings | (PWildcard, EVar _) :: bindings -> (* Avoid a useless let binding. *) simplify bindings | binding :: bindings -> binding :: simplify bindings (* Building a [let] construct, with on-the-fly simplification. *) let blet (bindings, body) = match simplify bindings with | [] -> body | bindings -> ELet (bindings, body) let mlet formals actuals body = blet (List.combine formals actuals, body) (* [eraisenotfound] is an expression that raises [Not_found]. *) let eraisenotfound = ERaise (EData ("Not_found", [])) (* [bottom] is an expression that has every type. Its semantics is irrelevant. *) let bottom = eraisenotfound (* Boolean constants. *) let efalse : expr = EData ("false", []) let etrue : expr = EData ("true", []) let eboolconst b = if b then etrue else efalse (* Option constructors. *) let enone = EData ("None", []) let esome e = EData ("Some", [e]) (* List constructors. *) let rec elist xs = match xs with | [] -> EData ("[]", []) | x :: xs -> EData ("::", [ x; elist xs ]) (* Integer constants as patterns. *) let pint k : pattern = PData (string_of_int k, []) (* These help build function types. *) let arrow typ body : typ = TypArrow (typ, body) let arrowif flag typ body : typ = if flag then arrow typ body else body let marrow typs body : typ = List.fold_right arrow typs body (* ------------------------------------------------------------------------ *) (* Here is a bunch of naming conventions. Our names are chosen to minimize the likelihood that a name in a semantic action is captured. In other words, all global definitions as well as the parameters to [reduce] are given far-fetched names, unless [--no-prefix] was specified. Note that the prefix must begin with '_'. This allows avoiding warnings about unused variables with ocaml 3.09 and later. *) let prefix name = if Settings.noprefix then name else "_menhir_" ^ name let dataprefix name = if Settings.noprefix then name else "Menhir" ^ name let tvprefix name = if Settings.noprefix then name else "ttv_" ^ name (* ------------------------------------------------------------------------ *) (* Converting an interface to a structure. Only exception and type definitions go through. *) let interface_item_to_structure_item = function | IIExcDecls defs -> [ SIExcDefs defs ] | IITypeDecls defs -> [ SITypeDefs defs ] | IIFunctor (_, _) | IIValDecls _ | IIInclude _ | IIModule (_, _) | IIComment _ -> [] let interface_to_structure i = List.flatten (List.map interface_item_to_structure_item i) (* Constructing a named module type together with a list of "with type" constraints. *) let with_types wk name tys = List.fold_left (fun mt (params, name, ty) -> MTWithType (mt, params, name, wk, ty) ) (MTNamedModuleType name) tys menhir-20151112/src/fancyDriver.ml0000644000175000017500000000406012621170073015564 0ustar mehdimehdi(* The module [Driver] serves to offer a unified API to the parser, which could be produced by either ocamlyacc or Menhir. *) (* This is the Menhir-specific driver. We wish to handle syntax errors in a more ambitious manner, so as to help our end users understand their mistakes. *) open MenhirLib.General (* streams: Nil, Cons *) open Parser.MenhirInterpreter (* incremental API to our parser *) (* [fail] is invoked if a syntax error is encountered. *) let fail lexbuf checkpoint = match checkpoint with | HandlingError env -> (* The parser has suspended itself because of a syntax error. Stop. Find out which state the parser is currently in. *) let stack = stack env in let s : int = match Lazy.force stack with | Nil -> (* Hmm... The parser is in its initial state. Its number is usually 0. This is a BIG HACK. TEMPORARY *) 0 | Cons (Element (s, _, _, _), _) -> number s in (* Display a nice error message. In principle, the table found in [ParserMessages] should be complete, so we should obtain a nice message. If [Not_found] is raised, we produce a generic message, which is better than nothing. Note that the OCaml code in [ParserMessages] is auto-generated based on the table in [ParserMessages.messages]. *) let message = try ParserMessages.message s with Not_found -> Printf.sprintf "Unknown syntax error (in state %d).\n" s in (* Hack: remove the final newline, because [Error.error] adds one. *) let message = String.sub message 0 (String.length message - 1) in (* Display our message and die. *) Error.error (Positions.lexbuf lexbuf) "syntax error.\n%s" message | _ -> (* This cannot happen. *) assert false (* The entry point. *) let grammar lexer lexbuf = loop_handle (fun v -> v) (fail lexbuf) (lexer_lexbuf_to_supplier lexer lexbuf) (Parser.Incremental.grammar lexbuf.Lexing.lex_curr_p) menhir-20151112/src/conflict.ml0000644000175000017500000004001712621170073015113 0ustar mehdimehdiopen Grammar let () = if Settings.graph then DependencyGraph.print_dependency_graph() (* artificial dependency *) (* -------------------------------------------------------------------------- *) (* Our output channel. *) let out = lazy (open_out (Settings.base ^ ".conflicts")) (* -------------------------------------------------------------------------- *) (* Explaining shift actions. *) (* The existence of a shift action stems from the existence of a shift item in the LR(0) core that underlies the LR(1) state of interest. That is, lookahead sets are not relevant. The existence of a shift item in the LR(0) core is explained by finding a path from a start item to the shift item in the LR(0) nondeterministic automaton, such that the symbols read along this path form the (previously fixed) symbol string that leads to the conflict state in the LR(1) automaton. There may be several such paths: a shortest one is chosen. There may also be several shift items in the conflict state: an arbitrary one is chosen. I believe it would not be interesting to display traces for several shift items: they would be identical except in their last line (where the desired shift item actually appears). *) (* Symbolic execution of the nondeterministic LR(0) automaton. *) (* Configurations are pairs of an LR(0) item and an offset into the input string, which indicates how much has been read so far. *) type configuration0 = Item.t * int (* This function builds a derivation out of a (nonempty, reversed) sequence of configurations. The derivation is constructed from bottom to top, that is, beginning at the last configuration and moving back towards to the start configuration. *) let rec follow derivation offset' = function | [] -> assert (offset' = 0); derivation | (item, offset) :: configs -> let _, _, rhs, pos, _ = Item.def item in let derivation = if offset = offset' then (* This is an epsilon transition. Put a new root node on top of the existing derivation. *) Derivation.build pos rhs derivation None else (* This was a shift transition. Tack symbol in front of the forest. *) Derivation.prepend rhs.(pos) derivation in follow derivation offset configs (* Symbolic execution begins with a start item (corresponding to one of the automaton's entry nodes), a fixed string of input symbols, to be fully consumed, and a goal item. The objective is to find a path through the automaton that leads from the start configuration [(start, 0)] to the goal configuration [(stop, n)], where [n] is the length of the input string. The automaton is explored via breadth-first search. A hash table is used to record which configurations have been visited and to build a spanning tree of shortest paths. *) exception Done let explain_shift_item (start : Item.t) (input : Symbol.t array) (stop : Item.t) : Derivation.t = let n = Array.length input in let table : (configuration0, configuration0 option) Hashtbl.t = Hashtbl.create 1023 in let queue : configuration0 Queue.t = Queue.create() in let enqueue ancestor config = try let _ = Hashtbl.find table config in () with Not_found -> Hashtbl.add table config ancestor; Queue.add config queue in enqueue None (start, 0); try Misc.qiter (function (item, offset) as config -> (* If the item we're looking at is the goal item and if we have read all of the input symbols, stop. *) if (Item.equal item stop) && (offset = n) then raise Done; (* Otherwise, explore the transitions out of this item. *) let prod, _, rhs, pos, length = Item.def item in (* Shift transition, followed only if the symbol matches the symbol found in the input string. *) if (pos < length) && (offset < n) && (Symbol.equal rhs.(pos) input.(offset)) then begin let config' = (Item.import (prod, pos+1), offset+1) in enqueue (Some config) config' end; (* Epsilon transitions. *) if pos < length then match rhs.(pos) with | Symbol.N nt -> Production.iternt nt (fun prod -> let config' = (Item.import (prod, 0), offset) in enqueue (Some config) config' ) | Symbol.T _ -> () ) queue; assert false with Done -> (* We have found a (shortest) path from the start configuration to the goal configuration. Turn it into an explicit derivation. *) let configs = Misc.materialize table (stop, n) in let _, _, rhs, pos, _ = Item.def stop in let derivation = Derivation.tail pos rhs in let derivation = follow derivation n configs in derivation (* -------------------------------------------------------------------------- *) (* Explaining reduce actions. *) (* The existence of a reduce action stems from the existence of a reduce item, whose lookahead set contains the token of interest, in the state of interest. Here, lookahead sets are relevant only insofar as they contain or do not contain the token of interest -- in other words, lookahead sets can be abstracted by Boolean values. The existence of the reduce item is explained by finding a path from a start item to the reduce item in the LR(1) nondeterministic automaton, such that the symbols read along this path form the (previously fixed) symbol string that leads to the conflict state in the LR(1) automaton. There may be several such paths: a shortest one is chosen. *) (* Symbolic execution of the nondeterministic LR(1) automaton. *) (* Configurations are pairs of an LR(1) item and an offset into the input string, which indicates how much has been read so far. An LR(1) item is itself represented as the combination of an LR(0) item and a Boolean flag, telling whether the token of interest appears or does not appear in the lookahead set. *) type configuration1 = Item.t * bool * int (* This function builds a derivation out of a sequence of configurations. The end of the sequence is dealt with specially -- we want to explain how the lookahead symbol appears and is inherited. Once that is done, the rest (that is, the beginning) of the derivation is dealt with as above. *) let config1toconfig0 (item, _, offset) = (item, offset) let rec follow1 tok derivation offset' = function | [] -> assert (Terminal.equal tok Terminal.sharp); (* One could emit a comment saying that the lookahead token is initially [#]. That comment would have to be displayed above the derivation, though, and there is no support for that at the moment, so let's skip it. *) derivation | (item, _, offset) :: configs -> let prod, _, rhs, pos, length = Item.def item in if offset = offset' then (* This is an epsilon transition. Attack a new line and add a comment that explains why the lookahead symbol is produced or inherited. *) let nullable, first = Analysis.nullable_first_prod prod (pos + 1) in if TerminalSet.mem tok first then (* The lookahead symbol is produced (and perhaps also inherited, but let's ignore that). *) let e = Analysis.explain_first_rhs tok rhs (pos + 1) in let comment = "lookahead token appears" ^ (if e = "" then "" else " because " ^ e) in let derivation = Derivation.build pos rhs derivation (Some comment) in (* Print the rest of the derivation without paying attention to the lookahead symbols. *) follow derivation offset (List.map config1toconfig0 configs) else begin (* The lookahead symbol is not produced, so it is definitely inherited. *) assert nullable; let comment = "lookahead token is inherited" ^ (if pos + 1 < length then Printf.sprintf " because %scan vanish" (Symbol.printao (pos + 1) rhs) else "") in let derivation = Derivation.build pos rhs derivation (Some comment) in follow1 tok derivation offset configs end else (* This is a shift transition. Tack symbol in front of forest. *) let derivation = Derivation.prepend rhs.(pos) derivation in follow1 tok derivation offset configs (* Symbolic execution is performed in the same manner as above. *) let explain_reduce_item (tok : Terminal.t) (start : Item.t) (input : Symbol.t array) (stop : Item.t) : Derivation.t = let n = Array.length input in let table : (configuration1, configuration1 option) Hashtbl.t = Hashtbl.create 1023 in let queue : configuration1 Queue.t = Queue.create() in let enqueue ancestor config = try let _ = Hashtbl.find table config in () with Not_found -> Hashtbl.add table config ancestor; Queue.add config queue in (* If the lookahead token is #, then it initially appear in the lookahead set, otherwise it doesn't. *) enqueue None (start, Terminal.equal tok Terminal.sharp, 0); try Misc.qiter (function (item, lookahead, offset) as config -> (* If the item we're looking at is the goal item and if we have read all of the input symbols, stop. *) if (Item.equal item stop) && lookahead && (offset = n) then raise Done; (* Otherwise, explore the transitions out of this item. *) let prod, _nt, rhs, pos, length = Item.def item in (* Shift transition, followed only if the symbol matches the symbol found in the input string. *) if (pos < length) && (offset < n) && (Symbol.equal rhs.(pos) input.(offset)) then begin let config' = (Item.import (prod, pos+1), lookahead, offset+1) in enqueue (Some config) config' end; (* Epsilon transitions. *) if pos < length then match rhs.(pos) with | Symbol.N nt -> let nullable, first = Analysis.nullable_first_prod prod (pos + 1) in let first : bool = TerminalSet.mem tok first in let lookahead' = if nullable then first || lookahead else first in Production.iternt nt (fun prod -> let config' = (Item.import (prod, 0), lookahead', offset) in enqueue (Some config) config' ) | Symbol.T _ -> () ) queue; assert false with Done -> (* We have found a (shortest) path from the start configuration to the goal configuration. Turn it into an explicit derivation. *) let configs = Misc.materialize table (stop, true, n) in let derivation = Derivation.empty in let derivation = follow1 tok derivation n configs in derivation (* -------------------------------------------------------------------------- *) (* Putting it all together. *) let () = if Settings.explain then begin Lr1.conflicts (fun toks node -> try (* Construct a partial LR(1) automaton, looking for a conflict in a state that corresponds to this node. Because Pager's algorithm can merge two states as soon as one of them has a conflict, we can't be too specific about the conflict that we expect to find in the canonical automaton. So, we must supply a set of conflict tokens and accept any kind of conflict that involves one of them. *) (* TEMPORARY with the new compatibility criterion, we can be sure that every conflict token is indeed involved in a conflict. Exploit that? Avoid focusing on a single token? *) let module P = Lr1partial.Run (struct let tokens = toks let goal = node end) in let closure = Lr0.closure P.goal in (* Determine what kind of conflict was found. *) let shift, reduce = Item.Map.fold (fun item toks (shift, reduce) -> match Item.classify item with | Item.Shift (Symbol.T tok, _) when Terminal.equal tok P.token -> shift + 1, reduce | Item.Reduce _ when TerminalSet.mem P.token toks -> shift, reduce + 1 | _ -> shift, reduce ) closure (0, 0) in let kind = if (shift > 0) && (reduce > 1) then "shift/reduce/reduce" else if (shift > 0) then "shift/reduce" else "reduce/reduce" in (* Explain how the conflict state is reached. *) let out = Lazy.force out in Printf.fprintf out "\n\ ** Conflict (%s) in state %d.\n\ ** Token%s involved: %s\n%s\ ** This state is reached from %s after reading:\n\n%s\n" kind (Lr1.number node) (if TerminalSet.cardinal toks > 1 then "s" else "") (TerminalSet.print toks) (if TerminalSet.cardinal toks > 1 then Printf.sprintf "** The following explanations concentrate on token %s.\n" (Terminal.print P.token) else "") (Nonterminal.print false (Item.startnt P.source)) (Symbol.printa P.path); (* Examine the items in that state, focusing on one particular token. Out of the shift items, we explain just one -- this seems enough. We explain each of the reduce items. *) (* First, build a mapping of items to derivations. *) let (_ : bool), derivations = Item.Map.fold (fun item toks (still_looking_for_shift_item, derivations) -> match Item.classify item with | Item.Shift (Symbol.T tok, _) when still_looking_for_shift_item && (Terminal.equal tok P.token) -> false, let derivation = explain_shift_item P.source P.path item in Item.Map.add item derivation derivations | Item.Reduce _ when TerminalSet.mem P.token toks -> still_looking_for_shift_item, let derivation = explain_reduce_item P.token P.source P.path item in Item.Map.add item derivation derivations | _ -> still_looking_for_shift_item, derivations ) closure (true, Item.Map.empty) in (* Factor out the common context among all derivations, so as to avoid repeating it. This helps prevent derivation trees from drifting too far away towards the right. It also helps produce sub-derivations that are quite compact. *) let context, derivations = Derivation.factor derivations in (* Display the common context. *) Printf.fprintf out "\n** The derivations that appear below have the following common factor:\ \n** (The question mark symbol (?) represents the spot where the derivations begin to differ.)\n\n"; Derivation.printc out context; (* Then, display the sub-derivations. *) Item.Map.iter (fun item derivation -> Printf.fprintf out "\n** In state %d, looking ahead at %s, " (Lr1.number node) (Terminal.print P.token); begin match Item.classify item with | Item.Shift _ -> Printf.fprintf out "shifting is permitted\n** because of the following sub-derivation:\n\n" | Item.Reduce prod -> Printf.fprintf out "reducing production\n** %s\n** is permitted because of the following sub-derivation:\n\n" (Production.print prod) end; Derivation.print out derivation ) derivations; flush out with Lr1partial.Oops -> (* Ha ha! We were unable to explain this conflict. This could happen because the automaton was butchered by conflict resolution directives, or because [--lalr] was enabled and we have unexplainable LALR conflicts. Anyway, send the error message to the .conflicts file and continue. *) let out = Lazy.force out in Printf.fprintf out "\n\ ** Conflict (unexplainable) in state %d.\n\ ** Token%s involved: %s\n\ ** %s.\n%!" (Lr1.number node) (if TerminalSet.cardinal toks > 1 then "s" else "") (TerminalSet.print toks) (match Settings.construction_mode with | Settings.ModeLALR -> "This may be an artificial conflict caused by your use of --lalr" | Settings.ModeCanonical | Settings.ModeInclusionOnly | Settings.ModePager -> "Please send your grammar to Menhir's developers" ) ); Time.tick "Explaining conflicts" end (* ------------------------------------------------------------------------ *) (* Resolve the conflicts that remain in the automaton. *) let () = Lr1.default_conflict_resolution(); Time.tick "Resolving remaining conflicts" (* ------------------------------------------------------------------------ *) (* Now is as good a time as any to add extra reductions, if requested by the user. This must be done after conflicts have been resolved. *) let () = Lr1.extra_reductions(); Time.tick "Adding extra reductions" menhir-20151112/src/fancy-parser.mly0000644000175000017500000002630412621170073016100 0ustar mehdimehdi/* This is the fancy version of the parser, to be processed by menhir. It is kept in sync with [Parser], but exercises menhir's features. */ /* As of 2014/12/02, the $previouserror keyword and the --error-recovery mode no longer exists. Thus, we replace all calls to [Error.signal] with calls to [Error.error], and report just one error. */ /* ------------------------------------------------------------------------- */ /* Imports. */ %{ open ConcreteSyntax open Syntax open Positions %} /* ------------------------------------------------------------------------- */ /* Tokens. */ %token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL %token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER ON_ERROR_REDUCE %token LID UID %token HEADER %token OCAMLTYPE %token PERCENTPERCENT %token Action.t> ACTION /* ------------------------------------------------------------------------- */ /* Start symbol. */ %start grammar /* ------------------------------------------------------------------------- */ /* Priorities. */ /* These declarations solve a shift-reduce conflict in favor of shifting: when the declaration of a non-terminal symbol begins with a leading bar, it is understood as an (insignificant) leading optional bar, *not* as an empty right-hand side followed by a bar. This ambiguity arises due to the existence of a new notation for letting several productions share a single semantic action. */ %nonassoc no_optional_bar %nonassoc BAR %% /* ------------------------------------------------------------------------- */ /* A grammar consists of declarations and rules, followed by an optional trailer, which we do not parse. */ grammar: ds = declaration* PERCENTPERCENT rs = rule* t = trailer { { pg_filename = ""; (* filled in by the caller *) pg_declarations = List.flatten ds; pg_rules = rs @ ParserAux.rules(); pg_trailer = t } } /* ------------------------------------------------------------------------- */ /* A declaration is an %{ Objective Caml header %}, or a %token, %start, %type, %left, %right, or %nonassoc declaration. */ declaration: | h = HEADER /* lexically delimited by %{ ... %} */ { [ with_poss $startpos $endpos (DCode h) ] } | TOKEN t = OCAMLTYPE? ts = clist(terminal) { List.map (Positions.map (fun terminal -> DToken (t, terminal))) ts } | START t = OCAMLTYPE? nts = clist(nonterminal) /* %start foo is syntactic sugar for %start foo %type foo */ { match t with | None -> List.map (Positions.map (fun nonterminal -> DStart nonterminal)) nts | Some t -> Misc.mapd (fun ntloc -> Positions.mapd (fun nt -> DStart nt, DType (t, ParameterVar ntloc)) ntloc) nts } | TYPE t = OCAMLTYPE ss = clist(strict_actual) { List.map (Positions.map (fun nt -> DType (t, nt))) (List.map Parameters.with_pos ss) } | k = priority_keyword ss = clist(symbol) { let prec = ParserAux.new_precedence_level $startpos(k) $endpos(k) in List.map (Positions.map (fun symbol -> DTokenProperties (symbol, k, prec))) ss } | PARAMETER t = OCAMLTYPE { [ with_poss $startpos $endpos (DParameter t) ] } | ON_ERROR_REDUCE ss = clist(strict_actual) { List.map (Positions.map (fun nt -> DOnErrorReduce nt)) (List.map Parameters.with_pos ss) } /* This production recognizes tokens that are valid in the rules section, but not in the declarations section. This is a hint that a %% was forgotten. */ | rule_specific_token { Error.error (Positions.two $startpos $endpos) "syntax error inside a declaration.\n\ Did you perhaps forget the %%%% that separates declarations and rules?" } priority_keyword: LEFT { LeftAssoc } | RIGHT { RightAssoc } | NONASSOC { NonAssoc } %inline rule_specific_token: | PUBLIC | INLINE | COLON | EOF { () } /* ------------------------------------------------------------------------- */ /* Our lists of symbols are separated with optional commas. Order is irrelevant. */ %inline clist(X): xs = separated_nonempty_list(COMMA?, X) { xs } /* ------------------------------------------------------------------------- */ /* A symbol is a terminal or nonterminal symbol. One would like to require nonterminal symbols to begin with a lowercase letter, so as to lexically distinguish them from terminal symbols, which must begin with an uppercase letter. However, for compatibility with ocamlyacc, this is impossible. It can be required only for nonterminal symbols that are also start symbols. */ symbol: id = LID | id = UID { id } /* ------------------------------------------------------------------------- */ /* Terminals must begin with an uppercase letter. Nonterminals that are declared to be start symbols must begin with a lowercase letter. */ %inline terminal: id = UID { id } %inline nonterminal: id = LID { id } /* ------------------------------------------------------------------------- */ /* A rule defines a symbol. It is optionally declared %public, and optionally carries a number of formal parameters. The right-hand side of the definition consists of a list of productions. */ rule: flags = flags /* flags */ symbol = symbol /* the symbol that is being defined */ params = plist(symbol) /* formal parameters */ COLON optional_bar branches = branches { let public, inline = flags in { pr_public_flag = public; pr_inline_flag = inline; pr_nt = Positions.value symbol; pr_positions = [ Positions.position symbol ]; pr_parameters = List.map Positions.value params; pr_branches = branches } } %inline branches: prods = separated_nonempty_list(BAR, production_group) { List.flatten prods } flags: /* epsilon */ { false, false } | PUBLIC { true, false } | INLINE { false, true } | PUBLIC INLINE | INLINE PUBLIC { true, true } optional_bar: /* epsilon */ %prec no_optional_bar | BAR { () } /* ------------------------------------------------------------------------- */ /* A production group consists of a list of productions, followed by a semantic action and an optional precedence specification. */ production_group: productions = separated_nonempty_list(BAR, production) action = ACTION oprec2 = ioption(precedence) { (* If multiple productions share a single semantic action, check that all of them bind the same names. *) ParserAux.check_production_group productions; (* Then, *) List.map (fun (producers, oprec1, level, pos) -> (* Replace [$i] with [_i]. *) let pr_producers = ParserAux.normalize_producers producers in (* Distribute the semantic action. Also, check that every [$i] is within bounds. *) let action : Syntax.identifier option array -> Action.t = action in let pr_action = action (ParserAux.producer_names producers) in { pr_producers; pr_action; pr_branch_prec_annotation = ParserAux.override pos oprec1 oprec2; pr_branch_production_level = level; pr_branch_position = pos }) productions } %inline precedence: PREC symbol = symbol { symbol } /* ------------------------------------------------------------------------- */ /* A production is a list of producers, optionally followed by a precedence declaration. */ production: producers = producer* oprec = ioption(precedence) { producers, oprec, ParserAux.new_production_level(), Positions.lex_join $startpos $endpos } /* ------------------------------------------------------------------------- */ /* A producer is an actual parameter, possibly preceded by a binding. Because both [ioption] and [terminated] are defined as inlined by the standard library, this definition expands to two productions, one of which begins with id = LID, the other of which begins with p = actual. The token LID is in FIRST(actual), but the LR(1) formalism can deal with that. If [option] was used instead of [ioption], an LR(1) conflict would arise -- looking ahead at LID would not allow determining whether to reduce an empty [option] or to shift. */ producer: | id = ioption(terminated(LID, EQUAL)) p = actual { position (with_poss $startpos $endpos ()), id, p } /* ------------------------------------------------------------------------- */ /* The ideal syntax of actual parameters includes: 1. a symbol, optionally applied to a list of actual parameters; 2. an actual parameter followed with a modifier; 3. an anonymous rule. (Not delimited by parentheses! Otherwise one would often end up writing two pairs of parentheses.) */ /* In order to avoid a few ambiguities, we restrict this ideal syntax as follows: a. Within a %type declaration, we use [strict_actual], which allows 1- and 2- (this is undocumented; the documentation says we require a symbol) but not 3-, which would not make semantic sense anyway. b. Within a producer, we use [actual], which allows 1- and 2- but not 3-. Case 3- is allowed by switching to [lax_actual] within the actual arguments of an application, which are clearly delimited by parentheses and commas. c. In front of a modifier, we can never allow [lax_actual], as this would create an ambiguity: basically, [A | B?] could be interpreted either as [(A | B)?] or as [A | (B?)]. */ %inline generic_actual(A, B): (* 1- *) symbol = symbol actuals = plist(A) { Parameters.app symbol actuals } (* 2- *) | p = B m = modifier { ParameterApp (m, [ p ]) } strict_actual: p = generic_actual(strict_actual, strict_actual) { p } actual: p = generic_actual(lax_actual, actual) { p } lax_actual: p = generic_actual(lax_actual, /* cannot be lax_ */ actual) { p } (* 3- *) | /* leading bar disallowed */ branches = branches { let position = position (with_poss $startpos $endpos ()) in let symbol = ParserAux.anonymous position branches in ParameterVar (with_pos position symbol) } /* ------------------------------------------------------------------------- */ /* Formal or actual parameter lists are delimited with parentheses and separated with commas. They are optional. */ %inline plist(X): params = loption(delimited(LPAREN, separated_nonempty_list(COMMA, X), RPAREN)) { params } /* ------------------------------------------------------------------------- */ /* The "?", "+", and "*" modifiers are short-hands for applications of certain parameterized nonterminals, defined in the standard library. */ modifier: QUESTION { with_poss $startpos $endpos "option" } | PLUS { with_poss $startpos $endpos "nonempty_list" } | STAR { with_poss $startpos $endpos "list" } /* ------------------------------------------------------------------------- */ /* A trailer is announced by %%, but is optional. */ trailer: EOF { None } | p = PERCENTPERCENT /* followed by actual trailer */ { Some (Lazy.force p) } %% menhir-20151112/src/Printers.mli0000644000175000017500000000523012621170074015270 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This module is part of MenhirLib. *) module Make (I : IncrementalEngine.EVERYTHING) (User : sig (* [print s] is supposed to send the string [s] to some output channel. *) val print: string -> unit (* [print_symbol s] is supposed to print a representation of the symbol [s]. *) val print_symbol: I.xsymbol -> unit (* [print_element e] is supposed to print a representation of the element [e]. This function is optional; if it is not provided, [print_element_as_symbol] (defined below) is used instead. *) val print_element: (I.element -> unit) option end) : sig open I (* Printing a list of symbols. *) val print_symbols: xsymbol list -> unit (* Printing an element as a symbol. This prints just the symbol that this element represents; nothing more. *) val print_element_as_symbol: element -> unit (* Printing a stack as a list of elements. This function needs an element printer. It uses [print_element] if provided by the user; otherwise it uses [print_element_as_symbol]. (Ending with a newline.) *) val print_stack: stack -> unit (* Printing an item. (Ending with a newline.) *) val print_item: item -> unit (* Printing a production. (Ending with a newline.) *) val print_production: production -> unit (* Printing the current LR(1) state. The current state is first displayed as a number; then the list of its LR(0) items is printed. (Ending with a newline.) *) val print_current_state: env -> unit (* Printing a summary of the stack and current state. This function just calls [print_stack] and [print_current_state] in succession. *) val print_env: env -> unit end menhir-20151112/src/patricia.ml0000644000175000017500000007137412621170073015120 0ustar mehdimehdi(* This is an implementation of Patricia trees, following Chris Okasaki's paper at the 1998 ML Workshop in Baltimore. Both big-endian and little-endian trees are provided. Both sets and maps are implemented on top of Patricia trees. *) (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Little-endian vs big-endian trees} *) (* A tree is little-endian if it expects the key's least significant bits to be tested first during a search. It is big-endian if it expects the key's most significant bits to be tested first. Most of the code is independent of this design choice, so it is written as a functor, parameterized by a small structure which defines endianness. Here is the interface which must be adhered to by such a structure. *) module Endianness = struct module type S = sig (* A mask is an integer with a single one bit (i.e. a power of 2). *) type mask = int (* [branching_bit] accepts two distinct integers and returns a mask which identifies the first bit where they differ. The meaning of ``first'' varies according to the endianness being implemented. *) val branching_bit: int -> int -> mask (* [mask i m] returns an integer [i'], where all bits which [m] says are relevant are identical to those in [i], and all others are set to some unspecified, but fixed value. Which bits are ``relevant'' according to a given mask varies according to the endianness being implemented. *) val mask: int -> mask -> int (* [shorter m1 m2] returns [true] if and only if [m1] describes a shorter prefix than [m2], i.e. if it makes fewer bits relevant. Which bits are ``relevant'' according to a given mask varies according to the endianness being implemented. *) val shorter: mask -> mask -> bool end (* Now, let us define [Little] and [Big], two possible [Endiannness] choices. *) module Little = struct type mask = int let lowest_bit x = x land (-x) (* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1] show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is the lowest bit in this bit field, since we are checking least significant bits first. *) let branching_bit i0 i1 = lowest_bit (i0 lxor i1) (* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the right of the single one bit in the mask [m]. We keep these bits, and set all others to 0. *) let mask i m = i land (m-1) (* The smaller [m] is, the fewer bits are relevant. *) let shorter = (<) end module Big = struct type mask = int let lowest_bit x = x land (-x) let rec highest_bit x = let m = lowest_bit x in if x = m then m else highest_bit (x - m) (* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1] show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is the highest bit in this bit field, since we are checking most significant bits first. In Okasaki's paper, this loop is sped up by computing a conservative initial guess. Indeed, the bit at which the two prefixes disagree must be somewhere within the shorter prefix, so we can begin searching at the least-significant valid bit in the shorter prefix. Unfortunately, to allow computing the initial guess, the main code has to pass in additional parameters, e.g. a mask which describes the length of each prefix. This ``pollutes'' the endianness-independent code. For this reason, this optimization isn't implemented here. *) let branching_bit i0 i1 = highest_bit (i0 lxor i1) (* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the left of the single one bit in the mask [m]. We keep these bits, and set all others to 0. Okasaki uses a different convention, which allows big-endian Patricia trees to masquerade as binary search trees. This feature does not seem to be useful here. *) let mask i m = i land (lnot (2*m-1)) (* The smaller [m] is, the more bits are relevant. *) let shorter = (>) end end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Patricia-tree-based maps} *) module Make (X : Endianness.S) = struct (* Patricia trees are maps whose keys are integers. *) type key = int (* A tree is either empty, or a leaf node, containing both the integer key and a piece of data, or a binary node. Each binary node carries two integers. The first one is the longest common prefix of all keys in this sub-tree. The second integer is the branching bit. It is an integer with a single one bit (i.e. a power of 2), which describes the bit being tested at this node. *) type 'a t = | Empty | Leaf of int * 'a | Branch of int * X.mask * 'a t * 'a t (* The empty map. *) let empty = Empty (* [choose m] returns an arbitrarily chosen binding in [m], if [m] is nonempty, and raises [Not_found] otherwise. *) let rec choose = function | Empty -> raise Not_found | Leaf (key, data) -> key, data | Branch (_, _, tree0, _) -> choose tree0 (* [lookup k m] looks up the value associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. This implementation takes branches \emph{without} checking whether the key matches the prefix found at the current node. This means that a query for a non-existent key shall be detected only when finally reaching a leaf, rather than higher up in the tree. This strategy is better when (most) queries are expected to be successful. *) let rec lookup key = function | Empty -> raise Not_found | Leaf (key', data) -> if key = key' then data else raise Not_found | Branch (_, mask, tree0, tree1) -> lookup key (if (key land mask) = 0 then tree0 else tree1) let find = lookup (* [mem k m] tells whether the key [k] appears in the domain of the map [m]. *) let mem k m = try let _ = lookup k m in true with Not_found -> false (* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree. Assume $t_0$ and $t_1$ are non-empty trees, with longest common prefixes $p_0$ and $p_1$, respectively. Further, suppose that $p_0$ and $p_1$ disagree, that is, neither prefix is contained in the other. Then, no matter how large $t_0$ and $t_1$ are, we can merge them simply by creating a new [Branch] node that has $t_0$ and $t_1$ as children! *) let join p0 t0 p1 t1 = let m = X.branching_bit p0 p1 in let p = X.mask p0 (* for instance *) m in if (p0 land m) = 0 then Branch(p, m, t0, t1) else Branch(p, m, t1, t0) (* The auxiliary function [match_prefix] tells whether a given key has a given prefix. More specifically, [match_prefix k p m] returns [true] if and only if the key [k] has prefix [p] up to bit [m]. Throughout our implementation of Patricia trees, prefixes are assumed to be in normal form, i.e. their irrelevant bits are set to some predictable value. Formally, we assume [X.mask p m] equals [p] whenever [p] is a prefix with [m] relevant bits. This allows implementing [match_prefix] using only one call to [X.mask]. On the other hand, this requires normalizing prefixes, as done e.g. in [join] above, where [X.mask p0 m] has to be used instead of [p0]. *) let match_prefix k p m = X.mask k m = p (* [fine_add decide k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding from [k] to [d0] already exists, then the resulting map contains a binding from [k] to [decide d0 d]. *) type 'a decision = 'a -> 'a -> 'a exception Unchanged let basic_add decide k d m = let rec add t = match t with | Empty -> Leaf (k, d) | Leaf (k0, d0) -> if k = k0 then let d' = decide d0 d in if d' == d0 then raise Unchanged else Leaf (k, d') else join k (Leaf (k, d)) k0 t | Branch (p, m, t0, t1) -> if match_prefix k p m then if (k land m) = 0 then Branch (p, m, add t0, t1) else Branch (p, m, t0, add t1) else join k (Leaf (k, d)) p t in add m let strict_add k d m = basic_add (fun _ _ -> raise Unchanged) k d m let fine_add decide k d m = try basic_add decide k d m with Unchanged -> m (* [add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding already exists for [k], it is overridden. *) let add k d m = fine_add (fun _old_binding new_binding -> new_binding) k d m (* [singleton k d] returns a map whose only binding is from [k] to [d]. *) let singleton k d = Leaf (k, d) (* [is_singleton m] returns [Some (k, d)] if [m] is a singleton map that maps [k] to [d]. Otherwise, it returns [None]. *) let is_singleton = function | Leaf (k, d) -> Some (k, d) | Empty | Branch _ -> None (* [is_empty m] returns [true] if and only if the map [m] defines no bindings at all. *) let is_empty = function | Empty -> true | Leaf _ | Branch _ -> false (* [cardinal m] returns [m]'s cardinal, that is, the number of keys it binds, or, in other words, its domain's cardinal. *) let rec cardinal = function | Empty -> 0 | Leaf _ -> 1 | Branch (_, _, t0, t1) -> cardinal t0 + cardinal t1 (* [remove k m] returns the map [m] deprived from any binding involving [k]. *) let remove key m = let rec remove = function | Empty -> raise Not_found | Leaf (key', _) -> if key = key' then Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (key land mask) = 0 then match remove tree0 with | Empty -> tree1 | tree0 -> Branch (prefix, mask, tree0, tree1) else match remove tree1 with | Empty -> tree0 | tree1 -> Branch (prefix, mask, tree0, tree1) in try remove m with Not_found -> m (* [lookup_and_remove k m] looks up the value [v] associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. The call returns the value [v], together with the map [m] deprived from the binding from [k] to [v]. *) let rec lookup_and_remove key = function | Empty -> raise Not_found | Leaf (key', data) -> if key = key' then data, Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (key land mask) = 0 then match lookup_and_remove key tree0 with | data, Empty -> data, tree1 | data, tree0 -> data, Branch (prefix, mask, tree0, tree1) else match lookup_and_remove key tree1 with | data, Empty -> data, tree0 | data, tree1 -> data, Branch (prefix, mask, tree0, tree1) let find_and_remove = lookup_and_remove (* [fine_union decide m1 m2] returns the union of the maps [m1] and [m2]. If a key [k] is bound to [x1] (resp. [x2]) within [m1] (resp. [m2]), then [decide] is called. It is passed [x1] and [x2], and must return the value which shall be bound to [k] in the final map. The operation returns [m2] itself (as opposed to a copy of it) when its result is equal to [m2]. *) let reverse decision elem1 elem2 = decision elem2 elem1 let fine_union decide m1 m2 = let rec union s t = match s, t with | Empty, _ -> t | (Leaf _ | Branch _), Empty -> s | Leaf(key, value), _ -> fine_add (reverse decide) key value t | Branch _, Leaf(key, value) -> fine_add decide key value s | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) && (m = n) then (* The trees have the same prefix. Merge their sub-trees. *) let u0 = union s0 t0 and u1 = union s1 t1 in if t0 == u0 && t1 == u1 then t else Branch(p, m, u0, u1) else if (X.shorter m n) && (match_prefix q p m) then (* [q] contains [p]. Merge [t] with a sub-tree of [s]. *) if (q land m) = 0 then Branch(p, m, union s0 t, s1) else Branch(p, m, s0, union s1 t) else if (X.shorter n m) && (match_prefix p q n) then (* [p] contains [q]. Merge [s] with a sub-tree of [t]. *) if (p land n) = 0 then let u0 = union s t0 in if t0 == u0 then t else Branch(q, n, u0, t1) else let u1 = union s t1 in if t1 == u1 then t else Branch(q, n, t0, u1) else (* The prefixes disagree. *) join p s q t in union m1 m2 (* [union m1 m2] returns the union of the maps [m1] and [m2]. Bindings in [m2] take precedence over those in [m1]. *) let union m1 m2 = fine_union (fun _d d' -> d') m1 m2 (* [iter f m] invokes [f k x], in turn, for each binding from key [k] to element [x] in the map [m]. Keys are presented to [f] according to some unspecified, but fixed, order. *) let rec iter f = function | Empty -> () | Leaf (key, data) -> f key data | Branch (_, _, tree0, tree1) -> iter f tree0; iter f tree1 (* [fold f m seed] invokes [f k d accu], in turn, for each binding from key [k] to datum [d] in the map [m]. Keys are presented to [f] in increasing order according to the map's ordering. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. *) let rec fold f m accu = match m with | Empty -> accu | Leaf (key, data) -> f key data accu | Branch (_, _, tree0, tree1) -> fold f tree1 (fold f tree0 accu) (* [fold_rev] performs exactly the same job as [fold], but presents keys to [f] in the opposite order. *) let rec fold_rev f m accu = match m with | Empty -> accu | Leaf (key, data) -> f key data accu | Branch (_, _, tree0, tree1) -> fold_rev f tree0 (fold_rev f tree1 accu) (* It is valid to evaluate [iter2 f m1 m2] if and only if [m1] and [m2] have the same domain. Doing so invokes [f k x1 x2], in turn, for each key [k] bound to [x1] in [m1] and to [x2] in [m2]. Bindings are presented to [f] according to some unspecified, but fixed, order. *) let rec iter2 f t1 t2 = match t1, t2 with | Empty, Empty -> () | Leaf (key1, data1), Leaf (key2, data2) -> assert (key1 = key2); f key1 (* for instance *) data1 data2 | Branch (p1, m1, left1, right1), Branch (p2, m2, left2, right2) -> assert (p1 = p2); assert (m1 = m2); iter2 f left1 left2; iter2 f right1 right2 | _, _ -> assert false (* [map f m] returns the map obtained by composing the map [m] with the function [f]; that is, the map $k\mapsto f(m(k))$. *) let rec map f = function | Empty -> Empty | Leaf (key, data) -> Leaf(key, f data) | Branch (p, m, tree0, tree1) -> Branch (p, m, map f tree0, map f tree1) (* [endo_map] is similar to [map], but attempts to physically share its result with its input. This saves memory when [f] is the identity function. *) let rec endo_map f tree = match tree with | Empty -> tree | Leaf (key, data) -> let data' = f data in if data == data' then tree else Leaf(key, data') | Branch (p, m, tree0, tree1) -> let tree0' = endo_map f tree0 in let tree1' = endo_map f tree1 in if (tree0' == tree0) && (tree1' == tree1) then tree else Branch (p, m, tree0', tree1') (* [filter f m] returns a copy of the map [m] where only the bindings that satisfy [f] have been retained. *) let filter f m = fold (fun key data accu -> if f key data then add key data accu else accu ) m empty (* [iterator m] returns a stateful iterator over the map [m]. *) (* TEMPORARY performance could be improved, see JCF's paper *) let iterator m = let remainder = ref [ m ] in let rec next () = match !remainder with | [] -> None | Empty :: parent -> remainder := parent; next() | (Leaf (key, data)) :: parent -> remainder := parent; Some (key, data) | (Branch(_, _, s0, s1)) :: parent -> remainder := s0 :: s1 :: parent; next () in next (* If [dcompare] is an ordering over data, then [compare dcompare] is an ordering over maps. *) exception Got of int let compare dcompare m1 m2 = let iterator2 = iterator m2 in try iter (fun key1 data1 -> match iterator2() with | None -> raise (Got 1) | Some (key2, data2) -> let c = Pervasives.compare key1 key2 in if c <> 0 then raise (Got c) else let c = dcompare data1 data2 in if c <> 0 then raise (Got c) ) m1; match iterator2() with | None -> 0 | Some _ -> -1 with Got c -> c (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Patricia-tree-based sets} *) (* To enhance code sharing, it would be possible to implement maps as sets of pairs, or (vice-versa) to implement sets as maps to the unit element. However, both possibilities introduce some space and time inefficiency. To avoid it, we define each structure separately. *) module Domain = struct type element = int type t = | Empty | Leaf of int | Branch of int * X.mask * t * t (* The empty set. *) let empty = Empty (* [is_empty s] returns [true] if and only if the set [s] is empty. *) let is_empty = function | Empty -> true | Leaf _ | Branch _ -> false (* [singleton x] returns a set whose only element is [x]. *) let singleton x = Leaf x (* [is_singleton s] tests whether [s] is a singleton set. *) let is_singleton = function | Leaf _ -> true | Empty | Branch _ -> false (* [choose s] returns an arbitrarily chosen element of [s], if [s] is nonempty, and raises [Not_found] otherwise. *) let rec choose = function | Empty -> raise Not_found | Leaf x -> x | Branch (_, _, tree0, _) -> choose tree0 (* [cardinal s] returns [s]'s cardinal. *) let rec cardinal = function | Empty -> 0 | Leaf _ -> 1 | Branch (_, _, t0, t1) -> cardinal t0 + cardinal t1 (* [mem x s] returns [true] if and only if [x] appears in the set [s]. *) let rec mem x = function | Empty -> false | Leaf x' -> x = x' | Branch (_, mask, tree0, tree1) -> mem x (if (x land mask) = 0 then tree0 else tree1) (* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree. *) let join p0 t0 p1 t1 = let m = X.branching_bit p0 p1 in let p = X.mask p0 (* for instance *) m in if (p0 land m) = 0 then Branch(p, m, t0, t1) else Branch(p, m, t1, t0) (* [add x s] returns a set whose elements are all elements of [s], plus [x]. *) exception Unchanged let rec strict_add x t = match t with | Empty -> Leaf x | Leaf x0 -> if x = x0 then raise Unchanged else join x (Leaf x) x0 t | Branch (p, m, t0, t1) -> if match_prefix x p m then if (x land m) = 0 then Branch (p, m, strict_add x t0, t1) else Branch (p, m, t0, strict_add x t1) else join x (Leaf x) p t let add x s = try strict_add x s with Unchanged -> s (* [remove x s] returns a set whose elements are all elements of [s], except [x]. *) let remove x s = let rec strict_remove = function | Empty -> raise Not_found | Leaf x' -> if x = x' then Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (x land mask) = 0 then match strict_remove tree0 with | Empty -> tree1 | tree0 -> Branch (prefix, mask, tree0, tree1) else match strict_remove tree1 with | Empty -> tree0 | tree1 -> Branch (prefix, mask, tree0, tree1) in try strict_remove s with Not_found -> s (* [union s1 s2] returns the union of the sets [s1] and [s2]. *) let rec union s t = match s, t with | Empty, _ -> t | _, Empty -> s | Leaf x, _ -> add x t | _, Leaf x -> add x s | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) && (m = n) then (* The trees have the same prefix. Merge their sub-trees. *) let u0 = union s0 t0 and u1 = union s1 t1 in if t0 == u0 && t1 == u1 then t else Branch(p, m, u0, u1) else if (X.shorter m n) && (match_prefix q p m) then (* [q] contains [p]. Merge [t] with a sub-tree of [s]. *) if (q land m) = 0 then Branch(p, m, union s0 t, s1) else Branch(p, m, s0, union s1 t) else if (X.shorter n m) && (match_prefix p q n) then (* [p] contains [q]. Merge [s] with a sub-tree of [t]. *) if (p land n) = 0 then let u0 = union s t0 in if t0 == u0 then t else Branch(q, n, u0, t1) else let u1 = union s t1 in if t1 == u1 then t else Branch(q, n, t0, u1) else (* The prefixes disagree. *) join p s q t (* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures that the newly created node does not have an [Empty] child. *) let build p m t0 t1 = match t0, t1 with | Empty, Empty -> Empty | Empty, _ -> t1 | _, Empty -> t0 | _, _ -> Branch(p, m, t0, t1) (* [inter s t] returns the set intersection of [s] and [t], that is, $s\cap t$. *) let rec inter s t = match s, t with | Empty, _ | _, Empty -> Empty | (Leaf x as s), t | t, (Leaf x as s) -> if mem x t then s else Empty | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) && (m = n) then (* The trees have the same prefix. Compute the intersections of their sub-trees. *) build p m (inter s0 t0) (inter s1 t1) else if (X.shorter m n) && (match_prefix q p m) then (* [q] contains [p]. Intersect [t] with a sub-tree of [s]. *) inter (if (q land m) = 0 then s0 else s1) t else if (X.shorter n m) && (match_prefix p q n) then (* [p] contains [q]. Intersect [s] with a sub-tree of [t]. *) inter s (if (p land n) = 0 then t0 else t1) else (* The prefixes disagree. *) Empty (* [disjoint s1 s2] returns [true] if and only if the sets [s1] and [s2] are disjoint, i.e. iff their intersection is empty. It is a specialized version of [inter], which uses less space. *) exception NotDisjoint let disjoint s t = let rec inter s t = match s, t with | Empty, _ | _, Empty -> () | Leaf x, _ -> if mem x t then raise NotDisjoint | _, Leaf x -> if mem x s then raise NotDisjoint | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) && (m = n) then begin inter s0 t0; inter s1 t1 end else if (X.shorter m n) && (match_prefix q p m) then inter (if (q land m) = 0 then s0 else s1) t else if (X.shorter n m) && (match_prefix p q n) then inter s (if (p land n) = 0 then t0 else t1) else () in try inter s t; true with NotDisjoint -> false (* [iter f s] invokes [f x], in turn, for each element [x] of the set [s]. Elements are presented to [f] according to some unspecified, but fixed, order. *) let rec iter f = function | Empty -> () | Leaf x -> f x | Branch (_, _, tree0, tree1) -> iter f tree0; iter f tree1 (* [fold f s seed] invokes [f x accu], in turn, for each element [x] of the set [s]. Elements are presented to [f] according to some unspecified, but fixed, order. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. In other words, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then [fold f s seed] computes $([f]\,x_n\,\ldots\,([f]\,x_2\,([f]\,x_1\,[seed]))\ldots)$. *) let rec fold f s accu = match s with | Empty -> accu | Leaf x -> f x accu | Branch (_, _, s0, s1) -> fold f s1 (fold f s0 accu) (* [elements s] is a list of all elements in the set [s]. *) let elements s = fold (fun tl hd -> tl :: hd) s [] (* [iterator s] returns a stateful iterator over the set [s]. That is, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then [iterator s] is a function which, when invoked for the $k^{\text{th}}$ time, returns [Some]$x_k$, if $k\leq n$, and [None] otherwise. Such a function can be useful when one wishes to iterate over a set's elements, without being restricted by the call stack's discipline. For more comments about this algorithm, please see module [Baltree], which defines a similar one. *) let iterator s = let remainder = ref [ s ] in let rec next () = match !remainder with | [] -> None | Empty :: parent -> remainder := parent; next() | (Leaf x) :: parent -> remainder := parent; Some x | (Branch(_, _, s0, s1)) :: parent -> remainder := s0 :: s1 :: parent; next () in next (* [compare] is an ordering over sets. *) exception Got of int let compare s1 s2 = let iterator2 = iterator s2 in try iter (fun x1 -> match iterator2() with | None -> raise (Got 1) | Some x2 -> let c = Pervasives.compare x1 x2 in if c <> 0 then raise (Got c) ) s1; match iterator2() with | None -> 0 | Some _ -> -1 with Got c -> c (* [equal] implements equality over sets. *) let equal s1 s2 = compare s1 s2 = 0 (* [subset] implements the subset predicate over sets. In other words, [subset s t] returns [true] if and only if $s\subseteq t$. It is a specialized version of [diff]. *) exception NotSubset let subset s t = let rec diff s t = match s, t with | Empty, _ -> () | _, Empty | Branch _, Leaf _ -> raise NotSubset | Leaf x, _ -> if not (mem x t) then raise NotSubset | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) && (m = n) then begin diff s0 t0; diff s1 t1 end else if (X.shorter n m) && (match_prefix p q n) then diff s (if (p land n) = 0 then t0 else t1) else (* Either [q] contains [p], which means at least one of [s]'s sub-trees is not contained within [t], or the prefixes disagree. In either case, the subset relationship cannot possibly hold. *) raise NotSubset in try diff s t; true with NotSubset -> false end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Relating sets and maps} *) (* Back to the world of maps. Let us now describe the relationship which exists between maps and their domains. *) (* [domain m] returns [m]'s domain. *) let rec domain = function | Empty -> Domain.Empty | Leaf (k, _) -> Domain.Leaf k | Branch (p, m, t0, t1) -> Domain.Branch (p, m, domain t0, domain t1) (* [lift f s] returns the map $k\mapsto f(k)$, where $k$ ranges over a set of keys [s]. *) let rec lift f = function | Domain.Empty -> Empty | Domain.Leaf k -> Leaf (k, f k) | Domain.Branch (p, m, t0, t1) -> Branch(p, m, lift f t0, lift f t1) (* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures that the newly created node does not have an [Empty] child. *) let build p m t0 t1 = match t0, t1 with | Empty, Empty -> Empty | Empty, _ -> t1 | _, Empty -> t0 | _, _ -> Branch(p, m, t0, t1) (* [corestrict m d] performs a co-restriction of the map [m] to the domain [d]. That is, it returns the map $k\mapsto m(k)$, where $k$ ranges over all keys bound in [m] but \emph{not} present in [d]. Its code resembles [diff]'s. *) let rec corestrict s t = match s, t with | Empty, _ | _, Domain.Empty -> s | Leaf (k, _), _ -> if Domain.mem k t then Empty else s | _, Domain.Leaf k -> remove k s | Branch(p, m, s0, s1), Domain.Branch(q, n, t0, t1) -> if (p = q) && (m = n) then build p m (corestrict s0 t0) (corestrict s1 t1) else if (X.shorter m n) && (match_prefix q p m) then if (q land m) = 0 then build p m (corestrict s0 t) s1 else build p m s0 (corestrict s1 t) else if (X.shorter n m) && (match_prefix p q n) then corestrict s (if (p land n) = 0 then t0 else t1) else s end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Instantiating the functor} *) module Little = Make(Endianness.Little) module Big = Make(Endianness.Big) menhir-20151112/src/TableInterpreter.mli0000644000175000017500000000322312621170074016735 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This module instantiates the generic [Engine] with a thin decoding layer for the generated tables. Like [Engine], it is part of [MenhirLib]. *) (* The exception [Error] is declared within the generated parser. This is preferable to pre-declaring it here, as it ensures that each parser gets its own, distinct [Error] exception. This is consistent with the code-based back-end. *) (* This functor is invoked by the generated parser. *) module Make (T : TableFormat.TABLES) : EngineTypes.ENGINE with type state = int and type token = T.token and type semantic_value = Obj.t and type production = int menhir-20151112/src/option.ml0000644000175000017500000000057112621170073014623 0ustar mehdimehdilet map f = function | None -> None | Some x -> Some (f x) let iter f o = match o with | None -> () | Some x -> f x let fold f o accu = match o with | None -> accu | Some x -> f x accu let project = function | Some x -> x | None -> (* Presumably, an error message has already been printed. *) exit 1 menhir-20151112/src/interpret.mli0000644000175000017500000000165612621170073015505 0ustar mehdimehdi(* This module is in charge of handling several command line options, namely [--interpret], [--interpret-error], [--compile-errors], [--compare-errors]. If any of these options is present, the execution of Menhir stops here. *) (* This default error message is produced by [--list-errors] when it creates a [.messages] file, and is recognized by [--compare-errors] when it compares two such files. *) val default_message: string (* [print_messages_item] displays one data item. The item is of the form [nt, sentence, target], which means that beginning at the start symbol [nt], the sentence [sentence] ends in an error in the target state given by [target]. [target] also contains information about which spurious reductions are performed at the end. The display obeys the [.messages] file format. *) open Grammar val print_messages_item: Nonterminal.t * Terminal.t list * ReferenceInterpreter.target -> unit menhir-20151112/src/IO.ml0000644000175000017500000000673512621170073013632 0ustar mehdimehdi(* Input-output utilities. *) (* ------------------------------------------------------------------------- *) (* [try/finally] has the same semantics as in Java. *) let try_finally action handler = let result = try action() with e -> handler(); raise e in handler(); result (* ------------------------------------------------------------------------- *) (* [moving_away filename action] moves the file [filename] away (if it exists), performs [action], then moves the file back into place (if it was moved away). *) let moving_away filename action = if Sys.file_exists filename then let newname = filename ^ ".moved_by_menhir" in Sys.rename filename newname; try_finally action (fun () -> Sys.rename newname filename ) else action() (* ------------------------------------------------------------------------- *) (* [with_file filename creation action] creates the file [filename] by running [creation], then runs [action], and ensures that the file is removed in the end. *) let with_file filename creation action = creation(); try_finally action (fun () -> Sys.remove filename) (* ------------------------------------------------------------------------- *) (* [exhaust channel] reads all of the data that's available on [channel]. It does not assume that the length of the data is known ahead of time. It does not close the channel. *) let chunk_size = 16384 let exhaust channel = let buffer = Buffer.create chunk_size in let chunk = Bytes.create chunk_size in let rec loop () = let length = input channel chunk 0 chunk_size in if length = 0 then Buffer.contents buffer else begin Buffer.add_subbytes buffer chunk 0 length; loop() end in loop() (* ------------------------------------------------------------------------- *) (* [invoke command] invokes an external command (which expects no input) and returns its output, if the command succeeds. It returns [None] if the command fails. *) let invoke command = let ic = Unix.open_process_in command in (* 20130911 Be careful to read in text mode, so as to avoid newline translation problems (which would manifest themselves on Windows). *) set_binary_mode_in ic false; let result = exhaust ic in match Unix.close_process_in ic with | Unix.WEXITED 0 -> Some result | _ -> None (* ------------------------------------------------------------------------- *) (* [read_whole_file filename] reads the file [filename] in text mode and returns its contents as a string. *) let read_whole_file filename = (* Open the file in text mode, so that (under Windows) CRLF is converted to LF. This guarantees that one byte is one character and seems to be required in order to report accurate positions. *) let channel = open_in filename in (* The standard library functions [pos_in] and [seek_in] do not work correctly when CRLF conversion is being performed, so we abandon their use. (They were used to go and extract the text of semantic actions.) Instead we load the entire file into memory up front, and work with a string. *) (* The standard library function [in_channel_length] does not work correctly when CRLF conversion is being performed, so we do not use it to read the whole file. And the standard library function [Buffer.add_channel] uses [really_input] internally, so we cannot use it either. Bummer. *) let s = exhaust channel in close_in channel; s menhir-20151112/src/nonterminalType.mli0000644000175000017500000000415412621170073016655 0ustar mehdimehdi(* This module deals with the definition of the type that describes the nonterminal symbols. *) (* This is the conventional name of the [nonterminal] GADT. This is an indexed type (i.e., it has one type parameter). Its data constructors carry zero value arguments. *) val tcnonterminalgadt: string val tnonterminalgadt: IL.typ -> IL.typ (* [tnonterminalgadtdata nt] is the conventional name of the data constructor associated with the non-terminal symbol [nt]. *) val tnonterminalgadtdata: string -> string (* This is the definition of the [nonterminal] GADT, for use by the code generators. This definition can be constructed only if the type of every nonterminal symbol is known, either because the user has provided this information, or because [--infer] has been set and inference has been performed already. This definition is produced only in [--inspection] mode. *) val nonterminalgadtdef: UnparameterizedSyntax.grammar -> IL.interface (* When in [--(raw-)depend] mode, we are asked to produce a mock [.mli] file before [--infer] has run, which means that we are usually not able to construct the definition of the [nonterminal] GADT. This implies that the mock [.mli] file is a subset of the final [.mli] file. I believe that, when working with [ocamlbuild], this is not a problem. In fact, the mock [.mli] file could just as well be empty or absent, and things would still work: in principle, it is enough for us to publish which files we need in order to be able to type-check the real [.ml] file used by [--infer]. However, when working with [make], which is unable to mix the production of targets and the computation of dependencies, we additionally need to predict which files one will need in order to compile the real [.mli] and [.ml] files. Here, the fact that the mock [.mli] file is incomplete could in theory be a problem, leading to incomplete dependencies. The problem does not lie in the line [parser.ml parser.mli: ...] that we add; it lies in the lines produced by [ocamldep] itself, where the line [parser.cmi: ...] is missing some dependencies. *) menhir-20151112/src/listMonad.mli0000644000175000017500000000050412621170073015412 0ustar mehdimehdi(** Monad type which represents a list of results. *) type 'a m = 'a list (** [bind x f] applies [f] to a list of results, returning a list of results. *) val bind: 'a m -> ('a -> 'b m) -> 'b m val ( >>= ) : 'a m -> ('a -> 'b m) -> 'b m (** [return x] is the left and right unit of [bind]. *) val return: 'a -> 'a m menhir-20151112/src/time.mli0000644000175000017500000000074512621170073014425 0ustar mehdimehdi(* Call [tick msg] to stop timing a task and start timing the next task. A message is displayed. The message includes [msg] as well as timing information. The very first task is deemed to begin when this module is initialized. *) val tick: string -> unit (* Another timing method, with separate chronometers; useful for more precise profiling. *) type chrono val fresh: unit -> chrono val chrono: chrono -> (unit -> 'a) -> 'a val display: chrono -> string -> unit menhir-20151112/src/referenceInterpreter.mli0000644000175000017500000000371612621170073017652 0ustar mehdimehdiopen Grammar open Cst (* This reference interpreter animates the LR automaton. It uses the grammar and automaton descriptions, as provided by [Grammar] and [Lr1], as well as the generic LR engine in [MenhirLib.Engine]. *) (* The first parameter to the interpreter is a Boolean flag that tells whether a trace should be produced on the standard error channel. *) (* The interpreter requires a start symbol, a lexer, and a lexing buffer. It either succeeds and produces a concrete syntax tree, or fails. *) val interpret: bool -> Nonterminal.t -> (Lexing.lexbuf -> Terminal.t) -> Lexing.lexbuf -> cst option (* This variant of the reference interpreter is used internally by us. We use it to debug [LRijkstra]. It checks that a sentence leads to a syntax error in the expected state. It is also used by several of the command line options [--interpret-error], [--compile-errors], etc. *) type spurious_reduction = Lr1.node * Production.index type target = Lr1.node * spurious_reduction list type check_error_path_outcome = (* Bad: the input was read past its end. *) | OInputReadPastEnd (* Bad: a syntax error occurred before all of the input was read. *) | OInputNotFullyConsumed (* Bad: the parser unexpectedly accepted (part of) this input. *) | OUnexpectedAccept (* Good: a syntax error occurred after reading the last input token. We report in which state the error took place, as well as a list of spurious reductions. A non-default reduction that takes place after looking at the last input token (i.e., the erroneous token) is spurious. Furthermore, any reduction that takes place after a spurious reduction is itself spurious. We note that a spurious reduction can take place only in a non-canonical LR automaton. *) | OK of target val check_error_path: bool -> (* --trace *) Nonterminal.t -> (* initial non-terminal symbol *) Terminal.t list -> (* input *) check_error_path_outcome menhir-20151112/src/unparameterizedSyntax.ml0000644000175000017500000000635012621170073017722 0ustar mehdimehdi(* A parameterized branch may instantiate parameterized non terminals. If the parameterized branch contributes to the definition of a parameterized terminal, then the instantiation of parameterized non terminals that are defined simultaneously must only be done with formal parameters. Furthermore, all the parameterized non terminals that are in a common mutual recursive definition must have the same arity. These conditions are sufficient to ensure termination of expansion. For example: C[x] : ... // This definition does not involve A or B. A[x,y] : B[x,y] C[Y] // This mutual recursive definition is ok. B[x,y] : A[x,y] D[x] : E[D[x]] // This one is incorrect. E[y] : D[y] *) open Syntax type branch = { branch_position : Positions.t; producers : (symbol * identifier) list; (* TEMPORARY convention renversée par rapport à syntax.mli; faire un type record au lieu d'une paire? *) action : action; branch_prec_annotation : branch_prec_annotation; branch_production_level : branch_production_level } type rule = { branches : branch list; positions : Positions.t list; (* This flag is not relevant after the NonTerminalInlining.inline pass. *) inline_flag : bool; } type grammar = { preludes : Stretch.t list; postludes : Syntax.trailer list; parameters : Stretch.t list; start_symbols : StringSet.t; types : Stretch.ocamltype StringMap.t; on_error_reduce : StringSet.t; tokens : Syntax.token_properties StringMap.t; rules : rule StringMap.t; } (* [tokens grammar] is a list of all (real) tokens in the grammar [grammar]. The special tokens "#" and "error" are not included. Pseudo-tokens (used in %prec declarations, but never declared using %token) are filtered out. *) let tokens grammar = StringMap.fold (fun token properties tokens -> if properties.tk_is_declared then token :: tokens else tokens ) grammar.tokens [] (* [typed_tokens grammar] is analogous, but includes the OCaml type of each token. *) let typed_tokens grammar = StringMap.fold (fun token properties tokens -> if properties.tk_is_declared then (token, properties.tk_ocamltype) :: tokens else tokens ) grammar.tokens [] (* [nonterminals grammar] is a list of all nonterminal symbols in the grammar [grammar]. *) let nonterminals grammar : nonterminal list = StringMap.fold (fun nt _ rules -> nt :: rules) grammar.rules [] (* [ocamltype_of_symbol grammar symbol] produces the OCaml type of the symbol [symbol] in the grammar [grammar], if it is known. *) let ocamltype_of_symbol grammar symbol : Stretch.ocamltype option = try Some (StringMap.find symbol grammar.types) with Not_found -> None (* [ocamltype_of_start_symbol grammar symbol] produces the OCaml type of the start symbol [symbol] in the grammar [grammar]. *) let ocamltype_of_start_symbol grammar symbol : Stretch.ocamltype = try StringMap.find symbol grammar.types with Not_found -> (* Every start symbol should have a type. *) assert false menhir-20151112/src/IO.mli0000644000175000017500000000222712621170073013773 0ustar mehdimehdi(* Input-output utilities. *) (* [try/finally] has the same semantics as in Java. *) val try_finally : (unit -> 'a) -> (unit -> unit) -> 'a (* [moving_away filename action] moves the file [filename] away (if it exists), performs [action], then moves the file back into place (if it was moved away). *) val moving_away: string -> (unit -> 'a) -> 'a (* [with_file filename creation action] creates the file [filename] by running [creation], then runs [action], and ensures that the file is removed in the end. *) val with_file: string -> (unit -> unit) -> (unit -> 'a) -> 'a (* [exhaust channel] reads all of the data that's available on [channel]. It does not assume that the length of the data is known ahead of time. It does not close the channel. *) val exhaust: in_channel -> string (* [invoke command] invokes an external command (which expects no input) and returns its output, if the command succeeds. It returns [None] if the command fails. *) val invoke: string -> string option (* [read_whole_file filename] reads the file [filename] in text mode and returns its contents as a string. *) val read_whole_file: string -> string menhir-20151112/src/keywordExpansion.mli0000644000175000017500000000042012621170073017026 0ustar mehdimehdiopen UnparameterizedSyntax (* [expand_grammar] expands away the keywords [$startpos] and [$endpos], as well the entire [ofs] family of keywords. Doing this early simplifies some aspects later on, in particular %inlining. *) val expand_grammar: grammar -> grammar menhir-20151112/src/misc.mli0000644000175000017500000001556712621170073014432 0ustar mehdimehdi(* Projecting out of an option. May fail abruptly! *) val unSome: 'a option -> 'a (* Converting an option to a string, with [None] converted to the empty string. *) val o2s: 'a option -> ('a -> string) -> string (* Projection out of a singleton list. *) val single: 'a list -> 'a (* A variant of [List.map] where [f] returns a pair of elements, to be flattened into the new list. *) val mapd: ('a -> 'b * 'b) -> 'a list -> 'b list (* Tabulating a function using an internal array. [tabulate n f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type [int] and are supposed to lie in the range [0..n). *) val tabulate: int -> (int -> 'a) -> (int -> 'a) (* Tabulating a function using an internal array. [tabulateb n f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type [int] and are supposed to lie in the range [0..n). The result type of [f] is assumed to be of type [bool]. [tabulateb] also returns the number of points where [f] is [true]. *) val tabulateb: int -> (int -> bool) -> (int -> bool) * int (* [tabulatef number fold n dummy f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type ['a] and are mapped by [number] into the range [0..n). [fold] allows folding over the domain of [f]. [dummy] is used to initialize the internal array. Its value has no impact if [fold] is surjective. *) val tabulatef: ('a -> int) -> ((unit -> 'a -> unit) -> unit -> unit) -> int -> 'b -> ('a -> 'b) -> ('a -> 'b) (* [tabulateo number fold n f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type ['a] and are mapped by [number] into the range [0..n). [fold] allows folding over the domain of [f]. The result type of [f] is an option type, and [tabulateo] also returns the number of points where [f] is [Some _]. *) val tabulateo: ('a -> int) -> ((unit -> 'a -> unit) -> unit -> unit) -> int -> ('a -> 'b option) -> ('a -> 'b option) * int (* Reverse function application. *) val ( $$ ) : 'a -> ('a -> 'b) -> 'b (* Sets of strings and maps over strings. *) module IntSet : Set.S with type elt = int (* [separated_list_to_string printer sep l] converts [l] into a string representation built by using [printer] on each element and [sep] as a separator. *) type 'a iter = ('a -> unit) -> unit val separated_iter_to_string: ('a -> string) -> string -> 'a iter -> string val separated_list_to_string: ('a -> string) -> string -> 'a list -> string (* [terminated_list_to_string printer term l] converts [l] into a string representation built by using [printer] on each element and [term] as a terminator. *) val terminated_list_to_string: ('a -> string) -> string -> 'a list -> string val terminated_iter_to_string: ('a -> string) -> string -> 'a iter -> string (* [index_map f] returns a triple (indexed_f, domain_indexation, domain_array). [indexed_f] is a mapping from [0..n-1] to the elements of the map [f] ([n] being the size of the image of [f]). [domain_indexation] is a mapping from the domain of the map [f] to indexes. [domain_array] is a mapping from the indexes to the domain of [f]. The indexation implements [f] ie: - forall x in domain(m), indexed_f (domain_indexation x) = f (x). - forall x in domain(m), domain_array (domain_indexation x) = x. *) val index_map : 'a StringMap.t -> (int -> 'a) * (string -> int) * (int -> string) (* [support_assoc l x] returns the second component of the first couple in [l] whose first component is [x]. If it does not exist, it returns [x]. *) val support_assoc : ('a * 'a) list -> 'a -> 'a (* [index] indexes a list of (distinct) strings, that is, assigns an integer index to each string and builds mappings both ways between strings and indices. *) val index: string list -> int * string array * int StringMap.t (* Turning an implicit list, stored using pointers through a hash table, into an explicit list. The head of the implicit list is not included in the explicit list. *) val materialize: ('a, 'a option) Hashtbl.t -> 'a -> 'a list (* [iteri] implements a [for] loop over integers, from 0 to [n-1]. *) val iteri: int -> (int -> unit) -> unit (* [foldi] implements a [for] loop over integers, from 0 to [n-1], with an accumulator. [foldij] implements a [for] loop over integers, from [start] to [n-1], with an accumulator. *) val foldi: int -> (int -> 'a -> 'a) -> 'a -> 'a val foldij: int -> int -> (int -> 'a -> 'a) -> 'a -> 'a (* [mapij start n f] produces the list [ f start; ... f (n-1) ]. *) val mapij: int -> int -> (int -> 'a) -> 'a list (* [mapi n f] produces the list [ f 0; ... f (n-1) ]. *) val mapi: int -> (int -> 'a) -> 'a list (* [qfold f accu q] repeatedly takes an element [x] off the queue [q] and applies [f] to the accumulator and to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. *) val qfold: ('a -> 'b -> 'a) -> 'a -> 'b Queue.t -> 'a (* [qiter f q] repeatedly takes an element [x] off the queue [q] and applies [f] to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. *) val qiter: ('b -> unit) -> 'b Queue.t -> unit (* [smap] has the same semantics as [List.map], but attempts to physically return the input list when [f] is the identity. *) val smap: ('a -> 'a) -> 'a list -> 'a list (* [smapa] is a variant of [smap] that maintains an accumulator. *) val smapa: ('b -> 'a -> 'b * 'a) -> 'b -> 'a list -> 'b * 'a list (* [normalize s] returns a copy of [s] where parentheses and commas are replaced with underscores. *) val normalize: string -> string (* [postincrement r] increments [r] and returns its original value. *) val postincrement: int ref -> int (* [map_opt f l] returns the list of [y]s such that [f x = Some y] where [x] is in [l], preserving the order of elements of [l]. *) val map_opt : ('a -> 'b option) -> 'a list -> 'b list (* [new_intern capacity] creates a new service for interning (hash-consing) strings. [capacity] is the initial capacity of the internal hash table. [new_intern] returns a pair [intern, verbose] where [intern] is the hash-consing service and [verbose] prints statistics about the use of the service so far. *) val new_intern: int -> (string -> string) * (unit -> unit) (* [new_encode_decode capacity] creates a new service for assigning unique integer codes to strings. [capacity] is the initial capacity of the internal hash table. [new_encode_decode] returns a triple [encode, decode, verbose], where [encode] and [decode] translate between strings and unique integer codes and [verbose] prints statistics about the use of the service so far. *) val new_encode_decode: int -> (string -> int) * (int -> string) * (unit -> unit) menhir-20151112/src/RowDisplacement.ml0000644000175000017500000002114512621170074016414 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This module compresses a two-dimensional table, where some values are considered insignificant, via row displacement. *) (* This idea reportedly appears in Aho and Ullman's ``Principles of Compiler Design'' (1977). It is evaluated in Tarjan and Yao's ``Storing a Sparse Table'' (1979) and in Dencker, Dürre, and Heuft's ``Optimization of Parser Tables for Portable Compilers'' (1984). *) (* A compressed table is represented as a pair of arrays. The displacement array is an array of offsets into the data array. *) type 'a table = int array * (* displacement *) 'a array (* data *) (* In a natural version of this algorithm, displacements would be greater than (or equal to) [-n]. However, in the particular setting of Menhir, both arrays are intended to be compressed with [PackedIntArray], which does not efficiently support negative numbers. For this reason, we are careful not to produce negative displacements. *) (* In order to avoid producing negative displacements, we simply use the least significant bit as the sign bit. This is implemented by [encode] and [decode] below. *) (* One could also think, say, of adding [n] to every displacement, so as to ensure that all displacements are nonnegative. This would work, but would require [n] to be published, for use by the decoder. *) let encode (displacement : int) : int = if displacement >= 0 then displacement lsl 1 else (-displacement) lsl 1 + 1 let decode (displacement : int) : int = if displacement land 1 = 0 then displacement lsr 1 else -(displacement lsr 1) (* It is reasonable to assume that, as matrices grow large, their density becomes low, i.e., they have many insignificant entries. As a result, it is important to work with a sparse data structure for rows. We internally represent a row as a list of its significant entries, where each entry is a pair of a [j] index and an element. *) type 'a row = (int * 'a) list (* [compress equal insignificant dummy m n t] turns the two-dimensional table [t] into a compressed table. The parameter [equal] is equality of data values. The parameter [wildcard] tells which data values are insignificant, and can thus be overwritten with other values. The parameter [dummy] is used to fill holes in the data array. [m] and [n] are the integer dimensions of the table [t]. *) let compress (equal : 'a -> 'a -> bool) (insignificant : 'a -> bool) (dummy : 'a) (m : int) (n : int) (t : 'a array array) : 'a table = (* Be defensive. *) assert (Array.length t = m); assert begin for i = 0 to m - 1 do assert (Array.length t.(i) = n) done; true end; (* This turns a row-as-array into a row-as-sparse-list. The row is accompanied by its index [i] and by its rank (the number of its significant entries, that is, the length of the row-as-a-list. *) let sparse (i : int) (line : 'a array) : int * int * 'a row (* index, rank, row *) = let rec loop (j : int) (rank : int) (row : 'a row) = if j < 0 then i, rank, row else let x = line.(j) in if insignificant x then loop (j - 1) rank row else loop (j - 1) (1 + rank) ((j, x) :: row) in loop (n - 1) 0 [] in (* Construct an array of all rows, together with their index and rank. *) let rows : (int * int * 'a row) array = (* index, rank, row *) Array.mapi sparse t in (* Sort this array by decreasing rank. This does not have any impact on correctness, but reportedly improves compression. The intuitive idea is that rows with few significant elements are easy to fit, so they should be inserted last, after the problem has become quite constrained by fitting the heavier rows. This heuristic is attributed to Ziegler. *) Array.fast_sort (fun (_, rank1, _) (_, rank2, _) -> compare rank2 rank1 ) rows; (* Allocate a one-dimensional array of displacements. *) let displacement : int array = Array.make m 0 in (* Allocate a one-dimensional, infinite array of values. Indices into this array are written [k]. *) let data : 'a InfiniteArray.t = InfiniteArray.make dummy in (* Determine whether [row] fits at offset [k] within the current [data] array, up to extension of this array. *) (* Note that this check always succeeds when [k] equals the length of the [data] array. Indeed, the loop is then skipped. This property guarantees the termination of the recursive function [fit] below. *) let fits k (row : 'a row) : bool = let d = InfiniteArray.extent data in let rec loop = function | [] -> true | (j, x) :: row -> (* [x] is a significant element. *) (* By hypothesis, [k + j] is nonnegative. If it is greater than or equal to the current length of the data array, stop -- the row fits. *) assert (k + j >= 0); if k + j >= d then true (* We now know that [k + j] is within bounds of the data array. Check whether it is compatible with the element [y] found there. If it is, continue. If it isn't, stop -- the row does not fit. *) else let y = InfiniteArray.get data (k + j) in if insignificant y || equal x y then loop row else false in loop row in (* Find the leftmost position where a row fits. *) (* If the leftmost significant element in this row is at offset [j], then we can hope to fit as far left as [-j] -- so this element lands at offset [0] in the data array. *) (* Note that displacements may be negative. This means that, for insignificant elements, accesses to the data array could fail: they could be out of bounds, either towards the left or towards the right. This is not a problem, as long as [get] is invoked only at significant elements. *) let rec fit k row : int = if fits k row then k else fit (k + 1) row in let fit row = match row with | [] -> 0 (* irrelevant *) | (j, _) :: _ -> fit (-j) row in (* Write [row] at (compatible) offset [k]. *) let rec write k = function | [] -> () | (j, x) :: row -> InfiniteArray.set data (k + j) x; write k row in (* Iterate over the sorted array of rows. Fit and write each row at the leftmost compatible offset. Update the displacement table. *) Array.iter (fun (i, _, row) -> let k = fit row in (* if [row] has leading insignificant elements, then [k] can be negative *) write k row; displacement.(i) <- encode k ) rows; (* Return the compressed tables. *) displacement, InfiniteArray.domain data (* [get ct i j] returns the value found at indices [i] and [j] in the compressed table [ct]. This function call is permitted only if the value found at indices [i] and [j] in the original table is significant -- otherwise, it could fail abruptly. *) (* Together, [compress] and [get] have the property that, if the value found at indices [i] and [j] in an uncompressed table [t] is significant, then [get (compress t) i j] is equal to that value. *) let get (displacement, data) i j = assert (0 <= i && i < Array.length displacement); let k = decode displacement.(i) in assert (0 <= k + j && k + j < Array.length data); (* failure of this assertion indicates an attempt to access an insignificant element that happens to be mapped out of the bounds of the [data] array. *) data.(k + j) (* [getget] is a variant of [get] which only requires read access, via accessors, to the two components of the table. *) let getget get_displacement get_data (displacement, data) i j = let k = decode (get_displacement displacement i) in get_data data (k + j) menhir-20151112/src/internalSyntax.mli0000644000175000017500000000075212621170073016510 0ustar mehdimehditype grammar = { p_preludes : Stretch.t list; p_postludes : Syntax.trailer list; p_parameters : Stretch.t list; p_start_symbols : Positions.t StringMap.t; p_types : (Syntax.parameter * Stretch.ocamltype Positions.located) list; p_tokens : Syntax.token_properties StringMap.t; p_rules : Syntax.parameterized_rule StringMap.t; p_on_error_reduce : Syntax.parameter list; } menhir-20151112/src/error.mli0000644000175000017500000000456612621170073014625 0ustar mehdimehdi(* This module helps report errors and maintains some information about the source file that is being read. *) (* ---------------------------------------------------------------------------- *) (* Call [set_filename] before lexing and parsing in order to inform the module [Error] about the name of the file that is being examined. *) (* TEMPORARY limiter ou supprimer ou commenter cette interface stateful *) val set_filename: string -> unit val get_filename: unit -> string val get_filemark: unit -> Mark.t val file_contents: string option ref val get_file_contents: unit -> string (* ---------------------------------------------------------------------------- *) (* Logging and log levels. *) val logG: int -> (out_channel -> unit) -> unit val logA: int -> (out_channel -> unit) -> unit val logC: int -> (out_channel -> unit) -> unit (* ---------------------------------------------------------------------------- *) (* Errors and warnings. *) (* [error ps format ...] displays the list of positions [ps], followed with the error message [format ...], and exits. The strings "Error: " and "\n" are automatically added at the beginning and end of the error message. The message should begin with a lowercase letter and end with a dot. *) val error: Positions.positions -> ('a, out_channel, unit, 'b) format4 -> 'a (* [errorp] is like [error], but uses the position range carried by [v]. *) val errorp: _ Positions.located -> ('a, out_channel, unit, 'b) format4 -> 'a (* [signal] is like [error], except it does not exit immediately. It sets a flag which can be tested using [errors]. *) val signal: Positions.positions -> ('a, out_channel, unit, unit) format4 -> 'a (* [errors] returns [true] if [signal] was previously called. Together [signal] and [errors] allow reporting multiple errors before aborting. *) val errors: unit -> bool (* [warning] is like [signal], except it does not set a flag. *) val warning: Positions.positions -> ('a, out_channel, unit, unit) format4 -> 'a (* Certain warnings about the grammar can optionally be treated as errors. The following function emits a warning or error message, via [warning] or [signal]. It does not stop the program; the client must at some point call [errors] and stop the program if any errors have been reported. *) val grammar_warning: Positions.positions -> ('a, out_channel, unit, unit) format4 -> 'a menhir-20151112/src/LowIntegerPriorityQueue.ml0000644000175000017500000000616212621170073020143 0ustar mehdimehdi(* This module implements a simple-minded priority queue, under the assumption that priorities are low nonnegative integers. *) module MyArray = ResizableArray module MyStack = ResizableArray type 'a t = { (* A priority queue is represented as a resizable array, indexed by priorities, of stacks (implemented as resizable arrays). There is no a priori bound on the size of the main array -- its size is increased if needed. It is up to the user to use priorities of reasonable magnitude. *) a: 'a MyStack.t MyArray.t; (* Index of lowest nonempty stack, if there is one; or lower (sub-optimal, but safe). If the queue is empty, [best] is arbitrary. *) mutable best: int; (* Current number of elements in the queue. Used in [remove] to stop the search for a nonempty bucket. *) mutable cardinal: int; } let create default = (* Set up the main array so that it initially has 16 priority levels and, whenever new levels are added, each of them is initialized with a fresh empty stack. The dummy stack is never accessed; it is used to fill empty physical slots in the main array. *) let dummy = MyStack.make_ 0 default in let a = MyArray.make 16 dummy (fun _ -> MyStack.make_ 1024 default) in { a; best = 0; cardinal = 0 } let add q x priority = assert (0 <= priority); q.cardinal <- q.cardinal + 1; (* Grow the main array if necessary. *) if MyArray.length q.a <= priority then MyArray.resize q.a (priority + 1); (* Find out which stack we should push into. *) let xs = MyArray.get q.a priority in (* assert (xs != MyArray.default q.a); *) (* Push. *) MyStack.push xs x; (* Decrease [q.best], if necessary, so as not to miss the new element. In the special case of Dijkstra's algorithm or A*, this never happens. *) if priority < q.best then q.best <- priority let is_empty q = q.cardinal = 0 let cardinal q = q.cardinal let rec remove_nonempty q = (* Look for the next nonempty bucket. We know there is one. This may seem inefficient, because it is a linear search. However, in applications where [q.best] never decreases, the cumulated cost of this loop is the maximum priority ever used, which is good. *) let xs = MyArray.get q.a q.best in if MyStack.length xs = 0 then begin (* As noted below, [MyStack.pop] does not physically shrink the stack. When we find that a priority level has become empty, we physically empty it, so as to free the (possibly large) space that it takes up. This strategy is good when the client is Dijkstra's algorithm or A*. *) let dummy = MyArray.default q.a in MyArray.set q.a q.best dummy; q.best <- q.best + 1; remove_nonempty q end else begin q.cardinal <- q.cardinal - 1; Some (MyStack.pop xs) (* Note: [MyStack.pop] does not shrink the physical array underlying the stack. This is good, because we are likely to push new elements into this stack. *) end let remove q = if q.cardinal = 0 then None else remove_nonempty q let rec repeat q f = match remove q with | None -> () | Some x -> f x; repeat q f menhir-20151112/src/InspectionTableInterpreter.mli0000644000175000017500000000356212621170074020777 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This functor is invoked inside the generated parser, in [--table] mode. It produces no code! It simply constructs the types [symbol] and [xsymbol] on top of the generated types [terminal] and [nonterminal]. *) module Symbols (T : sig type 'a terminal type 'a nonterminal end) : IncrementalEngine.SYMBOLS with type 'a terminal := 'a T.terminal and type 'a nonterminal := 'a T.nonterminal (* This functor is invoked inside the generated parser, in [--table] mode. It constructs the inspection API on top of the inspection tables described in [InspectionTableFormat]. *) module Make (B : TableFormat.TABLES) (T : InspectionTableFormat.TABLES with type 'a lr1state = int) : IncrementalEngine.INSPECTION with type 'a terminal := 'a T.terminal and type 'a nonterminal := 'a T.nonterminal and type 'a lr1state := 'a T.lr1state and type production := int menhir-20151112/src/astar.mli0000644000175000017500000000345612621170073014603 0ustar mehdimehdi(* This signature defines an implicit representation for graphs where edges have integer costs, there is a distinguished start node, and there is a set of distinguished goal nodes. It is also assumed that some geometric knowledge of the graph allows safely estimating the cost of shortest paths to goal nodes. If no such knowledge is available, [estimate] should be the constant zero function. *) module Make (G : sig (* Graph nodes. *) type node include Hashtbl.HashedType with type t := node (* Edge labels. *) type label (* The source node(s). *) val sources: (node -> unit) -> unit (* [successors n f] presents each of [n]'s successors, in an arbitrary order, to [f], together with the cost of the edge that was followed. *) val successors: node -> (label -> int -> node -> unit) -> unit (* An estimate of the cost of the shortest path from the supplied node to some goal node. This estimate must be a correct under-approximation of the actual cost. *) val estimate: node -> int end) : sig (* A path (from a target node back to some source node) is described by a series of labels and ends in a source node. *) type path = | Edge of G.label * path | Source of G.node (* A path can also be presented as a pair of a source node and a list of labels, which describe the edges from the source node to a target node. *) val reverse: path -> G.node * G.label list (* Search. Newly discovered nodes are presented to the user, in order of increasing distance from the source nodes, by invoking the user-supplied function [f]. At the end, a mapping of nodes to distances to the source nodes and a mapping of nodes to shortest paths are returned. *) val search: (G.node * path -> unit) -> (G.node -> int) * (G.node -> path) end menhir-20151112/src/grammarFunctor.mli0000644000175000017500000004162412621170073016457 0ustar mehdimehdi(* The functor [Make] transforms an abstract syntax tree for the grammar into a rich internal representation of the grammar. *) (* The reason why this is now a functor, and the reason why its verbosity can be controlled, is that we may wish to invoke it several times, e.g. on the grammar before %inlining, and on the grammar after %inlining. 2015/11/10 *) module Make (G : sig (* An abstract syntax tree for the grammar. *) val grammar: UnparameterizedSyntax.grammar (* This flag indicates whether it is OK to produce warnings, verbose information, etc., when this functor is invoked. If it is set to [false], then only serious errors can be signaled. *) val verbose: bool end) : sig (* ------------------------------------------------------------------------ *) (* Nonterminals. *) module Nonterminal : sig (* The type of nonterminals. *) type t (* Comparison. *) val compare: t -> t -> int (* The number of nonterminals. This includes the extra nonterminals that are internally generated for the grammar's entry points. *) val n: int (* [lookup] maps an identifier to a nonterminal, or raises [Not_found]. *) val lookup : string -> t (* Nonterminals can be converted to integers. This feature is exploited in the table-based back-end. *) val n2i: t -> int (* This produces a string representation of a nonterminal. It should in principle never be applied to one of the internally generated nonterminals, as we do not wish users to become aware of the existence of these extra nonterminals. However, we do sometimes violate this rule when it is difficult to do otherwise. The Boolean parameter tells whether the string representation should be normalized, that is, whether parentheses and commas should be eliminated. This is necessary if the string is intended for use as a valid nonterminal name or as a valid Objective Caml identifier. *) val print: bool -> t -> string (* This is the Objective Caml type associated with a nonterminal symbol. It is known only if a %type declaration was provided. This function is not applicable to the internally generated nonterminals. *) val ocamltype: t -> Stretch.ocamltype option (* A start symbol always has a type. This allows us to define a simplified version of [ocamltype] for start symbols. *) val ocamltype_of_start_symbol: t -> Stretch.ocamltype (* Iteration over nonterminals. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [n2i] above. *) val iter: (t -> unit) -> unit val fold: (t -> 'a -> 'a) -> 'a -> 'a val map: (t -> 'a) -> 'a list (* Iteration over all nonterminals, except the start nonterminals. *) val iterx: (t -> unit) -> unit val foldx: (t -> 'a -> 'a) -> 'a -> 'a (* Tabulation of a function over nonterminals. *) val tabulate: (t -> 'a) -> (t -> 'a) (* [positions nt] is a list of the positions associated with the definition of [nt]. There can be more than one position because definitions can be split over multiple files. *) val positions: t -> Positions.t list (* This tells whether a non-terminal symbol is one of the start symbols. *) val is_start: t -> bool end (* ------------------------------------------------------------------------ *) (* Sets of nonterminals. *) module NonterminalMap : GMap.S with type key = Nonterminal.t module NonterminalSet = NonterminalMap.Domain (* ------------------------------------------------------------------------ *) (* Terminals. *) module Terminal : sig (* The type of terminals. *) type t (* The number of terminals. This includes the two pseudo-tokens [#] and [error]. *) val n: int (* Comparison. *) val equal: t -> t -> bool val compare: t -> t -> int (* [lookup] maps an identifier to a terminal, or raises [Not_found]. *) val lookup : string -> t (* Terminals can be converted to integers. This feature is exploited in the table-based back-end and in [LRijkstra]. The reverse conversion, [i2t], is unsafe and should not be used. [LRijkstra] uses it :-) *) val t2i: t -> int val i2t: int -> t (* unsafe! *) (* This produces a string representation of a terminal. *) val print: t -> string (* This is the Objective Caml type associated with a terminal symbol. It is known only if the %token declaration was accompanied with a type. *) val ocamltype: t -> Stretch.ocamltype option (* These are the two pseudo-tokens [#] and [error]. The former is used to denote the end of the token stream. The latter is accessible to the user and is used for handling errors. *) val sharp: t val error: t (* This is the programmer-defined [EOF] token, if there is one. It is recognized based solely on its name, which is fragile, but this behavior is documented. This token is assumed to represent [ocamllex]'s [eof] pattern. It is used only by the reference interpreter, and in a rather non-essential way. *) val eof: t option (* A terminal symbol is pseudo if it is [#] or [error]. It is real otherwise. *) val pseudo: t -> bool val real: t -> bool (* Iteration over terminals. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [t2i] above. *) val iter: (t -> unit) -> unit val fold: (t -> 'a -> 'a) -> 'a -> 'a val map: (t -> 'a) -> 'a list (* [mapx] offers iteration over all terminals except [#]. *) val mapx: (t -> 'a) -> 'a list (* [iter_real] offers iteration over all real terminals. *) val iter_real: (t -> unit) -> unit (* The sub-module [Word] offers an implementation of words (that is, sequences) of terminal symbols. It is used by [LRijkstra]. We make it a functor, because it has internal state (a hash table) and a side effect (failure if there are more than 256 terminal symbols). *) (* The type [word] should be treated, as much as possible, as an abstract type. In fact, for efficiency reasons, we represent a word as a unique integer codes, and we allocate these integer codes sequentially, from 0 upwards. The conversion from [int] to [word] is of course unsafe and should be used wisely. *) module Word (X : sig end) : sig type word = int val epsilon: word val singleton: t -> word val append: word -> word -> word val length: word -> int (* [first w z] returns the first symbol of the word [w.z]. *) val first: word -> t -> t val elements: word -> t list val print: word -> string (* [verbose()] prints statistics about the use of the internal hash-consing table so far. *) val verbose: unit -> unit (* Lexicographic ordering. *) val compare: word -> word -> int end end (* ------------------------------------------------------------------------ *) (* Sets and maps over terminals. *) module TerminalSet : sig (* All of the operations documented in [GSet] are available. *) include GSet.S with type element = Terminal.t (* This offers a string representation of a set of terminals. The symbols are simply listed one after the other and separated with spaces. *) val print: t -> string (* This is the set of all terminal symbols except the pseudo-tokens [#] and [error]. *) val universe: t end (* All of the operations documented in [GMap] are available. *) module TerminalMap : GMap.S with type key = Terminal.t (* ------------------------------------------------------------------------ *) (* Symbols. *) module Symbol : sig (* A symbol is either a nonterminal or a terminal. *) type t = | N of Nonterminal.t | T of Terminal.t (* [lookup] maps an identifier to a symbol, or raises [Not_found]. *) val lookup : string -> t (* Comparison. *) val equal: t -> t -> bool val lequal: t list -> t list -> bool (* These produce a string representation of a symbol, of a list of symbols, or of an array of symbols. The symbols are simply listed one after the other and separated with spaces. [printao] prints an array of symbols, starting at a particular offset. [printaod] is analogous, but can also print a single dot at a particular position between two symbols. *) val print: t -> string val printl: t list -> string val printa: t array -> string val printao: int -> t array -> string val printaod: int -> int -> t array -> string end (* ------------------------------------------------------------------------ *) (* Sets and maps over symbols. *) (* All of the operations documented in [Set] are available. *) module SymbolSet : Set.S with type elt = Symbol.t module SymbolMap : sig (* All of the operations documented in [Map] are available. *) include Map.S with type key = Symbol.t val domain: 'a t -> key list (* This returns [true] if and only if all of the symbols in the domain of the map at hand are nonterminals. *) val purelynonterminal: 'a t -> bool end (* ------------------------------------------------------------------------ *) (* Productions. *) module Production : sig (* This is the type of productions. This includes user-defined productions as well as the internally generated productions associated with the start symbols. *) type index (* Comparison. *) val compare: index -> index -> int (* Productions can be converted to integers and back. This is unsafe and should be avoided as much as possible. This feature is exploited, for efficiency, in the encoding of items. *) val p2i: index -> int val i2p: int -> index (* The number of productions. *) val n: int (* These map a production index to the production's definition, that is, a nonterminal (the left-hand side) and an array of symbols (the right-hand side). *) val def: index -> Nonterminal.t * Symbol.t array val nt: index -> Nonterminal.t val rhs: index -> Symbol.t array val length: index -> int (* This maps a production index to an array of the identifiers that should be used for naming the semantic values of the symbols in the right-hand side. *) val identifiers: index -> Syntax.identifier array (* This maps a production index to the production's semantic action. This function is not applicable to a start production. *) val action: index -> Syntax.action (* [positions prod] is a list of the positions associated with production [prod]. This is usually a singleton list, but there can be more than one position for start productions when the definition of the corresponding start symbol is split over multiple files. *) val positions: index -> Positions.t list (* Iteration over all productions. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [p2i] above. *) val iter: (index -> unit) -> unit val fold: (index -> 'a -> 'a) -> 'a -> 'a val map: (index -> 'a) -> 'a list val amap: (index -> 'a) -> 'a array (* Iteration over all productions, except the start productions. *) val iterx: (index -> unit) -> unit val foldx: (index -> 'a -> 'a) -> 'a -> 'a val mapx: (index -> 'a) -> 'a list (* This maps a (user) non-terminal start symbol to the corresponding start production. *) val startsymbol2startprod: Nonterminal.t -> index (* Iteration over the productions associated with a specific nonterminal. *) val iternt: Nonterminal.t -> (index -> unit) -> unit val foldnt: Nonterminal.t -> 'a -> (index -> 'a -> 'a) -> 'a (* This allows determining whether a production is a start production. If it is a start production, the start symbol that it is associated with is returned. If it is a regular production, nothing is returned. *) val classify: index -> Nonterminal.t option (* [is_start] is easier to use than [classify] when the start symbol is not needed. *) val is_start: index -> bool (* The integer [start] is published so as to allow the table back-end to produce code for [is_start]. It should not be used otherwise. *) val start: int (* This produces a string representation of a production. It should never be applied to a start production, as we do not wish users to become aware of the existence of these extra productions. *) val print: index -> string (* Tabulation of a Boolean function over productions. [tabulateb f] returns a tabulated version of [f] as well as the number of productions where [f] is true. *) val tabulate: (index -> 'a) -> (index -> 'a) val tabulateb: (index -> bool) -> (index -> bool) * int end (* ------------------------------------------------------------------------ *) (* Maps over productions. *) module ProductionMap : sig include GMap.S with type key = Production.index (* Iteration over the start productions only. *) val start: (Production.index -> 'a) -> 'a t end (* ------------------------------------------------------------------------ *) (* Analysis of the grammar. *) module Analysis : sig (* [nullable nt] is the NULLABLE flag of the non-terminal symbol [nt]. That is, it is true if and only if this symbol produces the empty word [epsilon]. *) val nullable: Nonterminal.t -> bool val nullable_symbol: Symbol.t -> bool (* [first nt] is the FIRST set of the non-terminal symbol [nt]. *) val first: Nonterminal.t -> TerminalSet.t val first_symbol: Symbol.t -> TerminalSet.t (* [nullable_first_prod prod i] considers the suffix of the production [prod] defined by offset [i]. It returns its NULLABLE flag as well as its FIRST set. The offset [i] must be contained between [0] and [n], inclusive, where [n] is the length of production [prod]. *) val nullable_first_prod: Production.index -> int -> bool * TerminalSet.t (* [first_prod_lookahead prod i t] computes [FIRST(alpha.t)], where [alpha] is the suffix of the production defined by offset [i], and [t] is a terminal symbol. The offset [i] must be contained between [0] and [n], inclusive, where [n] is the length of production [prod]. *) val first_prod_lookahead: Production.index -> int -> Terminal.t -> TerminalSet.t (* [explain_first_rhs tok rhs i] explains why the token [tok] appears in the FIRST set for the string of symbols found at offset [i] in the array [rhs]. *) val explain_first_rhs: Terminal.t -> Symbol.t array -> int -> string (* [follow nt] is the FOLLOW set of the non-terminal symbol [nt], that is, the set of terminal symbols that could follow an expansion of [nt] in a valid sentence. *) val follow: Nonterminal.t -> TerminalSet.t end (* ------------------------------------------------------------------------ *) (* Conflict resolution via precedences. *) module Precedence : sig (* Shift/reduce conflicts require making a choice between shifting a token and reducing a production. How these choices are made is of no concern to the back-end, but here is a rough explanation. Shifting is preferred when the token has higher precedence than the production, or they have same precedence and the token is right-associative. Reducing is preferred when the token has lower precedence than the production, or they have same precedence and the token is left-associative. Neither is allowed when the token and the production have same precedence and the token is non-associative. No preference is explicitly specified when the token or the production has undefined precedence. In that case, the default choice is to prefer shifting, but a conflict will be reported. *) type choice = | ChooseShift | ChooseReduce | ChooseNeither | DontKnow val shift_reduce: Terminal.t -> Production.index -> choice (* Reduce/reduce conflicts require making a choice between reducing two distinct productions. This is done by exploiting a partial order on productions. For compatibility with ocamlyacc, this order should be total and should correspond to textual order when the two productions originate in the same source file. When they originate in different source files, the two productions should be incomparable. *) val reduce_reduce: Production.index -> Production.index -> Production.index option end (* ------------------------------------------------------------------------ *) (* %on_error_reduce declarations. *) module OnErrorReduce : sig (* This is the set of %on_error_reduce declarations. *) val declarations: StringSet.t end (* ------------------------------------------------------------------------ *) (* Diagnostics. *) (* This function prints warnings about useless precedence declarations for terminal symbols (%left, %right, %nonassoc) and productions (%prec). It should be invoked after only the automaton has been constructed. *) val diagnostics: unit -> unit (* ------------------------------------------------------------------------ *) end (* module Make *) menhir-20151112/src/Boolean.mli0000644000175000017500000000013512621170073015037 0ustar mehdimehdiinclude Fix.PROPERTY with type property = bool val union: property -> property -> property menhir-20151112/src/codeBackend.ml0000644000175000017500000015075512621170073015507 0ustar mehdimehdi(* The code generator. *) module Run (T : sig end) = struct open Grammar open IL open CodeBits open CodePieces open TokenType open Interface (* ------------------------------------------------------------------------ *) (* Here is a description of our code generation mechanism. Every internal function that we produce is parameterized by the parser environment [env], which contains (pointers to) the lexer, the lexing buffer, the last token read, etc. No global variables are exploited, so our parsers are reentrant. The functions that we export do not expect an environment as a parameter; they create a fresh one when invoked. Every state [s] is translated to a [run] function. To a first approximation, the only parameter of the [run] function, besides [env], is the stack. However, in some cases (consult the predicate [runpushes]), the top stack cell is not yet allocated when [run s] is called. The cell's contents are passed as extra parameters, and it is [run]'s responsibility to allocate that cell. (When [run] is the target of a shift transition, the position parameters [startp] and [endp] are redundant with the [env] parameter, because they are always equal to [env.startp] and [env.endp]. However, this does not appear to make a great difference in terms of code size, and makes our life easier, so we do not attempt to eliminate this redundancy.) The first thing in [run] is to discard a token, if the state was entered through a shift transition, and to peek at the lookahead token. When the current token is to be discarded, the [discard] function is invoked. It discards the current token, invokes the lexer to obtain a new token, and returns an updated environment. When we only wish to peek at the current token, without discarding it, we simply read [env.token]. (We have to be careful in cases where the current lookahead token might be [error], since, in those cases, [env.token] is meaningless; see below.) Once the lookahead token is obtained, [run] performs a case analysis of the lookahead token. Each branch performs one of the following. In shift branches, control is dispatched to another [run] function, with appropriate parameters, typically the current stack plus the information that should go into the new top stack cell (a state, a semantic value, locations). In reduce branches, a [reduce] function is invoked. In the default branch, error handling is initiated (see below). The [reduce] function associated with production [prod] pops as many stack cells as necessary, retrieving semantic values and the state [s] that initiated the reduction. It then evaluates the semantic action, which yields a new semantic value. (This is the only place where semantic actions are evaluated, so that semantic actions are never duplicated.) It then passes control on to the [goto] function associated with the nonterminal [nt], where [nt] is the left-hand side of the production [prod]. The [goto] function associated with nonterminal [nt] expects just one parameter besides the environment -- namely, the stack. However, in some cases (consult the predicate [gotopushes]), the top stack cell is not allocated yet, so its contents are passed as extra parameters. In that case, [goto] first allocates that cell. Then, it examines the state found in that cell and performs a goto transition, that is, a shift transition on the nonterminal symbol [nt]. This simply consists in passing control to the [run] function associated with the transition's target state. If this case analysis only has one branch, because all transitions for [nt] lead to the same target state, then no case analysis is required. In principle, a stack cell contains a state, a semantic value, and start and end positions. However, the state can be omitted if it is never consulted by a [goto] function. The semantic value can be omitted if it is associated with a token that was declared not to carry a semantic value. (One could also omit semantic values for nonterminals whose type was declared to be [unit], but that does not seem very useful.) The start or end position can be omitted if they are associated with a symbol that does not require keeping track of positions. When all components of a stack cell are omitted, the entire cell disappears, so that no memory allocation is required. For each start symbol [nt], an entry point function, named after [nt], is generated. Its parameters are a lexer and a lexing buffer. The function allocates and initializes a parser environment and transfers control to the appropriate [run] function. Our functions are grouped into one huge [let rec] definition. The inliner, implemented as a separate module, will inline functions that are called at most once, remove dead code (although there should be none or next to none), and possibly perform other transformations. I note that, if a state can be entered only through (nondefault) reductions, then, in that state, the lookahead token must be a member of the set of tokens that allow these reductions, and by construction, there must exist an action on that token in that state. Thus, the default branch (which signals an error when the lookahead token is not a member of the expected set) is in fact dead. It would be nice (but difficult) to exploit types to prove that. However, one could at least replace the code of that branch with a simple [assert false]. TEMPORARY do it *) (* ------------------------------------------------------------------------ *) (* Here is a description of our error handling mechanism. With every state [s], we associate an [error] function. If [s] is willing to act when the lookahead token is [error], then this function tells how. This includes *both* shift *and* reduce actions. (For some reason, yacc/ocamlyacc/mule/bison can only shift on [error].) If [s] is unable to act when the lookahead token is [error], then this function pops a stack cell, extracts a state [s'] out of it, and transfers control, via a global [errorcase] dispatch function, to the [error] function associated with [s']. (Because some stack cells do not physically hold a state, this description is somewhat simpler than the truth, but that's the idea.) When an error is detected in state [s], then (see [initiate]) the [error] function associated with [s] is invoked. Immediately before invoking the [error] function, the flag [env.error] is set. By convention, this means that the current token is discarded and replaced with an [error] token. The [error] token transparently inherits the positions associated with the underlying concrete token. Whenever we attempt to consult the current token, we check whether [env.error] is set and, if that is the case, resume error handling by calling the [error] function associated with the current state. This allows a series of reductions to correctly take place when the lookahead token is [error]. In many states, though, it is possible to statically prove that [env.error] cannot be set. In that case, we produce a lookup of [env.token] without checking [env.error]. The flag [env.error] is cleared when a token is shifted. States with default reductions perform a reduction regardless of the current lookahead token, which can be either [error] or a regular token. A question that bothered me for a while was, when unwinding the stack, do we stop at a state that has a default reduction? Should it be considered able to handle the error token? I now believe that the answer is, this cannot happen. Indeed, if a state has a default reduction, then, whenever it is entered, reduction is performed and that state is exited, which means that it is never pushed onto the stack. So, it is fine to consider that a state with a default reduction is unable to handle errors. I note that a state that can handle [error] and has a default reduction must in fact have a reduction action on [error]. *) (* The type of environments. *) let tcenv = env let tenv = TypApp (tcenv, []) (* The [assertfalse] function. We have just one of these, in order to save code size. It should become unnecessary when we add GADTs. *) let assertfalse = prefix "fail" (* The [discard] function. *) let discard = prefix "discard" (* The [initenv] function. *) let initenv = prefix "init" (* The [run] function associated with a state [s]. *) let run s = prefix (Printf.sprintf "run%d" (Lr1.number s)) (* The [goto] function associated with a nonterminal [nt]. *) let goto nt = prefix (Printf.sprintf "goto_%s" (Nonterminal.print true nt)) (* The [reduce] function associated with a production [prod]. *) let reduce prod = prefix (Printf.sprintf "reduce%d" (Production.p2i prod)) (* The [errorcase] function. *) let errorcase = prefix "errorcase" (* The [error] function associated with a state [s]. *) let error s = prefix (Printf.sprintf "error%d" (Lr1.number s)) (* The constant associated with a state [s]. *) let statecon s = dataprefix (Printf.sprintf "State%d" (Lr1.number s)) let estatecon s = EData (statecon s, []) let pstatecon s = PData (statecon s, []) let pstatescon ss = POr (List.map pstatecon ss) (* The type of states. *) let tcstate = prefix "state" let tstate = TypApp (tcstate, []) (* The [print_token] function. This automatically generated function is used in [--trace] mode. *) let print_token = prefix "print_token" (* Fields in the environment record. *) let flexer = prefix "lexer" let flexbuf = prefix "lexbuf" let ftoken = prefix "token" let ferror = prefix "error" (* The type variable that represents the stack tail. *) let tvtail = tvprefix "tail" let ttail = TypVar tvtail (* The result type for every function. TEMPORARY *) let tvresult = tvprefix "return" let tresult = TypVar tvresult (* ------------------------------------------------------------------------ *) (* Helpers for code production. *) let var x : expr = EVar x let pvar x : pattern = PVar x let magic e : expr = EMagic e let nomagic e = e (* The following assertion checks that [env.error] is [false]. *) let assertnoerror : pattern * expr = PUnit, EApp (EVar "assert", [ EApp (EVar "not", [ ERecordAccess (EVar env, ferror) ]) ]) let etuple = function | [] -> assert false | [ e ] -> e | es -> ETuple es let ptuple = function | [] -> assert false | [ p ] -> p | ps -> PTuple ps let trace (format : string) (args : expr list) : (pattern * expr) list = if Settings.trace then [ PUnit, EApp (EVar "Printf.fprintf", (EVar "Pervasives.stderr") :: (EStringConst (format ^"\n%!")) :: args) ] else [] let tracecomment (comment : string) (body : expr) : expr = if Settings.trace then blet (trace comment [], body) else EComment (comment, body) let auto2scheme t = scheme [ tvtail; tvresult ] t (* ------------------------------------------------------------------------ *) (* Accessing the positions of the current token. *) (* There are two ways we can go about this. We can read the positions from the lexbuf immediately after we request a new token, or we can wait until we need the positions and read them at that point. As of 2014/12/12, we switch to the latter approach. The speed difference in a micro-benchmark is not measurable, but this allows us to save two fields in the [env] record, which should be a good thing, as it implies less frequent minor collections. *) let getstartp = ERecordAccess (ERecordAccess (EVar env, flexbuf), "Lexing.lex_start_p") let getendp = ERecordAccess (ERecordAccess (EVar env, flexbuf), "Lexing.lex_curr_p") (* ------------------------------------------------------------------------ *) (* Determine whether the [goto] function for nonterminal [nt] will push a new cell onto the stack. If it doesn't, then that job is delegated to the [run] functions called by [goto]. One could decide that [gotopushes] always returns true, and produce decent code. As a refinement, we decide to drive the [push] operation inside the [run] functions if all of them are able to eliminate this operation via shiftreduce optimization. This will be the case if all of these [run] functions implement a default reduction of a non-epsilon production. If that is not the case, then [gotopushes] returns true. In general, it is good to place the [push] operation inside [goto], because multiple [reduce] functions transfer control to [goto], and [goto] in turn transfers control to multiple [run] functions. Hence, this is where code sharing is maximal. All of the [run] functions that [goto] can transfer control to expect a stack cell of the same shape (indeed, the symbol [nt] is the same in every case, and the state is always represented), which makes this decision possible. *) let gotopushes : Nonterminal.t -> bool = Nonterminal.tabulate (fun nt -> not ( Lr1.targets (fun accu _ target -> accu && match Invariant.has_default_reduction target with | Some (prod, _) -> Production.length prod > 0 | None -> false ) true (Symbol.N nt) ) ) (* ------------------------------------------------------------------------ *) (* Determine whether the [run] function for state [s] will push a new cell onto the stack. Our convention is this. If this [run] function is entered via a shift transition, then it is in charge of pushing a new stack cell. If it is entered via a goto transition, then it is in charge of pushing a new cell if and only if the [goto] function that invoked it did not do so. Last, if this [run] function is invoked directly by an entry point, then it does not push a stack cell. *) let runpushes s = match Lr1.incoming_symbol s with | Some (Symbol.T _) -> true | Some (Symbol.N nt) -> not (gotopushes nt) | None -> false (* ------------------------------------------------------------------------ *) (* In some situations, we are able to fuse a shift (or goto) transition with a reduce transition, which means that we save the cost (in speed and in code size) of pushing and popping the top stack cell. This involves creating a modified version of the [reduce] function associated with a production [prod], where the contents of the top stack cell are passed as extra parameters. Because we wish to avoid code duplication, we perform this change only if all call sites for [reduce] agree on this modified calling convention. At the call site, the optimization is possible only if a stack cell allocation exists and is immediately followed by a call to [reduce]. This is the case inside the [run] function for state [s] when [run] pushes a stack cell and performs a default reduction. This optimization amounts to coalescing the push operation inside [run] with the pop operation that follows inside [reduce]. Unit production elimination, on the other hand, would coalesce the pop operation inside [reduce] with the push operation that follows inside [goto]. For this reason, the two are contradictory. As a result, we do not attempt to perform unit production elimination. In fact, we did implement it at one point and found that it was seldom applicable, because preference was given to the shiftreduce optimization. There are cases where shiftreduce optimization does not make any difference, for instance, if production [prod] is never reduced, or if the top stack cell is in fact nonexistent. *) let (shiftreduce : Production.index -> bool), shiftreducecount = Production.tabulateb (fun prod -> (* Check that this production pops at least one stack cell. *) Production.length prod > 0 && (* Check that all call sites push a stack cell and have a default reduction. *) Invariant.fold_reduced (fun s accu -> accu && (match Invariant.has_default_reduction s with None -> false | Some _ -> true) && (runpushes s) ) prod true ) let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d productions exploit shiftreduce optimization.\n" shiftreducecount Production.n) (* Check that, as predicted above, [gotopushes nt] returns [false] only when all of the [run] functions that follow it perform shiftreduce optimization. This can be proved as follows. If [gotopushes nt] returns [false], then every successor state [s] has a default reduction for some non-epsilon production [prod]. Furthermore, all states that can reduce [prod] must be successors of that same [goto] function: indeed, because the right-hand side of the production ends with symbol [nt], every state that can reduce [prod] must be entered through [nt]. So, at all such states, [runpushes] is true, which guarantees that [shiftreduce prod] is true as well. *) let () = assert ( Nonterminal.fold (fun nt accu -> accu && if gotopushes nt then true else Lr1.targets (fun accu _ target -> accu && match Invariant.has_default_reduction target with | Some (prod, _) -> shiftreduce prod | None -> false ) true (Symbol.N nt) ) true ) (* ------------------------------------------------------------------------ *) (* Type production. *) (* This is the type of states. Only states that are represented are declared. *) let statetypedef = { typename = tcstate; typeparams = []; typerhs = TDefSum ( Lr1.fold (fun defs s -> if Invariant.represented s then { dataname = statecon s; datavalparams = []; datatypeparams = None } :: defs else defs ) [] ); typeconstraint = None } (* The type of lexers. *) let tlexer = TypArrow (tlexbuf, ttoken) (* This is the type of parser environments. *) let field modifiable name t = { modifiable = modifiable; fieldname = name; fieldtype = type2scheme t } let envtypedef = { typename = tcenv; typeparams = []; typerhs = TDefRecord [ (* The lexer itself. *) field false flexer tlexer; (* The lexing buffer. *) field false flexbuf tlexbuf; (* The last token that was read from the lexer. This is the head of the token stream, unless [env.error] is set. *) field false ftoken ttoken; (* A flag which tells whether we currently have an [error] token at the head of the stream. When this flag is set, the head of the token stream is the [error] token, and the contents of the [token] field is irrelevant. The token following [error] is obtained by invoking the lexer again. *) field true ferror tbool; ]; typeconstraint = None } (* [curry] curries the top stack cell in a type [t] of the form [(stack type) arrow (result type)]. [t] remains unchanged if the stack type does not make at least one cell explicit. *) let curry = function | TypArrow (TypTuple (tstack :: tcell), tresult) -> TypArrow (tstack, marrow tcell tresult) | TypArrow _ as t -> t | _ -> assert false (* [curryif true] is [curry], [curryif false] is the identity. *) let curryif flag t = if flag then curry t else t (* Types for stack cells. [celltype tailtype holds_state symbol] returns the type of a stack cell. The parameter [tailtype] is the type of the tail of the stack. The flag [holds_state] tells whether the cell holds a state. The parameter [symbol] is used to determine whether the cell holds a semantic value and what its type is. A subtlety here and in [curry] above is that singleton stack cells give rise to singleton tuple types, which the type printer eliminates, but which do exist internally. As a result, [curry] always correctly removes the top stack cell, even if it is a singleton tuple cell. *) let celltype tailtype holds_state symbol _ = TypTuple ( tailtype :: elementif (Invariant.endp symbol) tposition @ elementif holds_state tstate @ semvtype symbol @ elementif (Invariant.startp symbol) tposition ) (* Types for stacks. [stacktype s] is the type of the stack at state [s]. [reducestacktype prod] is the type of the stack when about to reduce production [prod]. [gotostacktype nt] is the type of the stack when the [goto] function associated with [nt] is called. In all cases, the tail (that is, the unknown part) of the stack is represented by [ttail], currently a type variable. These stack types are obtained by folding [celltype] over a description of the stack provided by module [Invariant]. *) let stacktype s = Invariant.fold celltype ttail (Invariant.stack s) let reducestacktype prod = Invariant.fold celltype ttail (Invariant.prodstack prod) let gotostacktype nt = Invariant.fold celltype ttail (Invariant.gotostack nt) (* The type of the [run] function. As announced earlier, if [s] is the target of shift transitions, the type of the stack is curried, that is, the top stack cell is not yet allocated, so its contents are passed as extra parameters. If [s] is the target of goto transitions, the top stack cell is allocated. If [s] is a start state, this issue makes no difference. *) let runtypescheme s = auto2scheme ( arrow tenv ( curryif (runpushes s) ( arrow (stacktype s) tresult ) ) ) (* The type of the [goto] function. The top stack cell is curried. *) let gototypescheme nt = auto2scheme (arrow tenv (curry (arrow (gotostacktype nt) tresult))) (* If [prod] is an epsilon production and if the [goto] function associated with it expects a state parameter, then the [reduce] function associated with [prod] also requires a state parameter. *) let reduce_expects_state_param prod = let nt = Production.nt prod in Production.length prod = 0 && Invariant.fold (fun _ holds_state _ _ -> holds_state) false (Invariant.gotostack nt) (* The type of the [reduce] function. If shiftreduce optimization is performed for this production, then the top stack cell is not explicitly allocated. *) let reducetypescheme prod = auto2scheme ( arrow tenv ( curryif (shiftreduce prod) ( arrow (reducestacktype prod) ( arrowif (reduce_expects_state_param prod) tstate tresult ) ) ) ) (* The type of the [errorcase] function. The shape of the stack is unknown, and is determined by examining the state parameter. *) let errorcasetypescheme = auto2scheme (marrow [ tenv; ttail; tstate ] tresult) (* The type of the [error] function. The shape of the stack is the one associated with state [s]. *) let errortypescheme s = auto2scheme ( marrow [ tenv; stacktype s ] tresult) (* ------------------------------------------------------------------------ *) (* Code production preliminaries. *) (* This flag will be set to [true] if we ever raise the [Error] exception. This happens when we unwind the entire stack without finding a state that can handle errors. *) let can_die = ref false (* A code pattern for an exception handling construct where both alternatives are in tail position. Concrete syntax in OCaml 4.02 is [match e with x -> e1 | exception Error -> e2]. Earlier versions of OCaml do not support this construct. We continue to emulate it using a combination of [try/with], [match/with], and an [option] value. It is used only in a very rare case anyway. *) let letunless e x e1 e2 = EMatch ( ETry ( EData ("Some", [ e ]), [ { branchpat = PData (excdef.excname, []); branchbody = EData ("None", []) } ] ), [ { branchpat = PData ("Some", [ PVar x ]); branchbody = e1 }; { branchpat = PData ("None", []); branchbody = e2 } ] ) (* ------------------------------------------------------------------------ *) (* Calling conventions. *) (* The layout of a stack cell is determined here. The first field in a stack cell is always a pointer to the rest of the stack; it is followed by the fields listed below, each of which may or may not appear. [runpushcell] and [gotopushcell] are the two places where stack cells are allocated. *) (* 2015/11/04. We make [endp] the first element in the list of optional fields, so we are able to access it at a fixed offset, provided we know that it exists. This is exploited when reducing an epsilon production. *) (* The contents of a stack cell, exposed as individual parameters. The choice of identifiers is suitable for use in the definition of [run]. *) let runcellparams var holds_state symbol = elementif (Invariant.endp symbol) (var endp) @ elementif holds_state (var state) @ symval symbol (var semv) @ elementif (Invariant.startp symbol) (var startp) (* The contents of a stack cell, exposed as individual parameters, again. The choice of identifiers is suitable for use in the definition of a [reduce] function. [prod] is the production's index. The integer [i] tells which symbol on the right-hand side we are focusing on, that is, which symbol this stack cell is associated with. *) let reducecellparams prod i holds_state symbol = let ids = Production.identifiers prod in (* The semantic value is bound to the variable [ids.(i)]. *) let semvpat _t = PVar ids.(i) in elementif (Invariant.endp symbol) (PVar (Printf.sprintf "_endpos_%s_" ids.(i))) @ elementif holds_state (if i = 0 then PVar state else PWildcard) @ symvalt symbol semvpat @ elementif (Invariant.startp symbol) (PVar (Printf.sprintf "_startpos_%s_" ids.(i))) (* The contents of a stack cell, exposed as individual parameters, again. The choice of identifiers is suitable for use in the definition of [error]. *) let errorcellparams (i, pat) holds_state symbol _ = i + 1, ptuple ( pat :: elementif (Invariant.endp symbol) PWildcard @ elementif holds_state (if i = 0 then PVar state else PWildcard) @ symval symbol PWildcard @ elementif (Invariant.startp symbol) PWildcard ) (* Calls to [run]. *) let runparams magic var s = var env :: magic (var stack) :: listif (runpushes s) (Invariant.fold_top (runcellparams var) [] (Invariant.stack s)) let call_run s actuals = EApp (EVar (run s), actuals) (* The parameters to [reduce]. When shiftreduce optimization is in effect, the top stack cell is not allocated, so extra parameters are required. Note that [shiftreduce prod] and [reduce_expects_state_param prod] are mutually exclusive conditions, so the [state] parameter is never bound twice. *) let reduceparams prod = PVar env :: PVar stack :: listif (shiftreduce prod) ( Invariant.fold_top (reducecellparams prod (Production.length prod - 1)) [] (Invariant.prodstack prod) ) @ elementif (reduce_expects_state_param prod) (PVar state) (* Calls to [reduce]. One must specify the production [prod] as well as the current state [s]. *) let call_reduce prod s = let actuals = (EVar env) :: (EMagic (EVar stack)) :: listif (shiftreduce prod) (Invariant.fold_top (runcellparams var) [] (Invariant.stack s)) (* compare with [runpushcell s] *) @ elementif (reduce_expects_state_param prod) (estatecon s) in EApp (EVar (reduce prod), actuals) (* Calls to [goto]. *) let gotoparams var nt = var env :: var stack :: Invariant.fold_top (runcellparams var) [] (Invariant.gotostack nt) let call_goto nt = EApp (EVar (goto nt), gotoparams var nt) (* Calls to [errorcase]. *) let errorcaseparams magic var = [ var env; magic (var stack); var state ] let call_errorcase = EApp (EVar errorcase, errorcaseparams magic var) (* Calls to [error]. *) let errorparams magic var = [ var env; magic (var stack) ] let call_error magic s = EApp (EVar (error s), errorparams magic var) let call_error_via_errorcase magic s = (* TEMPORARY document *) if Invariant.represented s then EApp (EVar errorcase, [ var env; magic (var stack); estatecon s ]) else call_error magic s (* Calls to [assertfalse]. *) let call_assertfalse = EApp (EVar assertfalse, [ EVar "()" ]) (* ------------------------------------------------------------------------ *) (* Code production for the automaton functions. *) (* Count how many states actually can peek at an error token. This figure is, in general, inferior or equal to the number of states at which [Invariant.errorpeeker] is true, because some of these states have a default reduction and will not consult the lookahead token. *) let errorpeekers = ref 0 (* Code for calling the reduction function for token [prod] upon finding a token within [toks]. This produces a branch, to be inserted in a [run] function for state [s]. *) let reducebranch toks prod s = { branchpat = tokspat toks; branchbody = call_reduce prod s } (* Code for shifting from state [s] to state [s'] via the token [tok]. This produces a branch, to be inserted in a [run] function for state [s]. The callee, [run s'], is responsible for taking the current token off the input stream. (There is actually a case where the token is *not* taken off the stream: when [s'] has a default reduction on [#].) It is also responsible for pushing a new stack cell. The rationale behind this decision is that there may be multiple shift transitions into [s'], so we actually share that code by placing it inside [run s'] rather than inside every transition. *) let shiftbranchbody s tok s' = (* Construct the actual parameters for [run s']. *) let actuals = (EVar env) :: (EMagic (EVar stack)) :: Invariant.fold_top (fun holds_state symbol -> assert (Symbol.equal (Symbol.T tok) symbol); elementif (Invariant.endp symbol) getendp @ elementif holds_state (estatecon s) @ tokval tok (EVar semv) @ elementif (Invariant.startp symbol) getstartp ) [] (Invariant.stack s') in (* Call [run s']. *) tracecomment (Printf.sprintf "Shifting (%s) to state %d" (Terminal.print tok) (Lr1.number s')) (call_run s' actuals) let shiftbranch s tok s' = assert (not (Terminal.pseudo tok)); { branchpat = PData (tokendata (Terminal.print tok), tokval tok (PVar semv)); branchbody = shiftbranchbody s tok s' } (* This generates code for pushing a new stack cell upon entering the [run] function for state [s]. *) let runpushcell s e = if runpushes s then let contents = var stack :: Invariant.fold_top (runcellparams var) [] (Invariant.stack s) in mlet [ pvar stack ] [ etuple contents ] e else e let runpushcellunless shiftreduce s e = if shiftreduce then EComment ("Not allocating top stack cell", e) else runpushcell s e (* This generates code for dealing with the lookahead token upon entering the [run] function for state [s]. If [s] is the target of a shift transition, then we must take the current token (which was consumed in the shift transition) off the input stream. Whether [s] was entered through a shift or a goto transition, we want to peek at the next token, unless we are performing a default reduction. The parameter [defred] tells which default reduction, if any, we are about to perform. *) (* 2014/12/06 New convention regarding initial states (i.e., states which have no incoming symbol). The function [initenv] does not invoke the lexer, so the [run] function for an initial state must do it. (Except in the very special case where the initial state has a default reduction on [#] -- this means the grammar recognizes only the empty word. We have ruled out this case.) *) let gettoken s defred e = match Lr1.incoming_symbol s, defred with | (Some (Symbol.T _) | None), Some (_, toks) when TerminalSet.mem Terminal.sharp toks -> assert (TerminalSet.cardinal toks = 1); (* There is a default reduction on token [#]. We cannot request the next token, since that might drive the lexer off the end of the input stream, so we cannot call [discard]. Do nothing. *) e | (Some (Symbol.T _) | None), Some _ -> (* There is some other default reduction. Discard the first input token. *) blet ([ PVar env, EApp (EVar discard, [ EVar env ]) (* Note that we do not read [env.token]. *) ], e) | (Some (Symbol.T _) | None), None -> (* There is no default reduction. Discard the first input token and peek at the next one. *) blet ([ PVar env, EApp (EVar discard, [ EVar env ]); PVar token, ERecordAccess (EVar env, ftoken) ], e) | Some (Symbol.N _), Some _ -> (* There is some default reduction. Do not peek at the input token. *) e | Some (Symbol.N _), None -> (* There is no default reduction. Peek at the first input token, without taking it off the input stream. This is normally done by reading [env.token], unless the token might be [error]: then, we check [env.error] first. *) if Invariant.errorpeeker s then begin incr errorpeekers; EIfThenElse ( ERecordAccess (EVar env, ferror), tracecomment "Resuming error handling" (call_error_via_errorcase magic s), blet ([ PVar token, ERecordAccess (EVar env, ftoken) ], e) ) end else blet ([ assertnoerror; PVar token, ERecordAccess (EVar env, ftoken) ], e) (* This produces the header of a [run] function. *) let runheader s body = let body = tracecomment (Printf.sprintf "State %d:" (Lr1.number s)) body in { valpublic = false; valpat = PVar (run s); valval = EAnnot (EFun (runparams nomagic pvar s, body), runtypescheme s) } (* This produces the comment attached with a default reduction. *) let defaultreductioncomment toks e = EPatComment ( "Reducing without looking ahead at ", tokspat toks, e ) (* This produces some bookkeeping code that is used when initiating error handling. We set the flag [env.error]. By convention, the field [env.token] becomes meaningless and one considers that the first token on the input stream is [error]. As a result, the next peek at the lookahead token will cause error handling to be resumed. The next call to [discard] will take the [error] token off the input stream and clear [env.error]. *) (* It seems convenient for [env.error] to be a mutable field, as this allows us to generate compact code. Re-allocating the whole record would produce less compact code. And speed is not an issue in this error-handling code. *) let errorbookkeeping e = tracecomment "Initiating error handling" (blet ( [ PUnit, ERecordWrite (EVar env, ferror, etrue) ], e )) (* This code is used to indicate that a new error has been detected in state [s]. If I am correct, [env.error] is never set here. Indeed, that would mean that we first found an error, and then signaled another error before being able to shift the first error token. My understanding is that this cannot happen: when the first error is signaled, we end up at a state that is willing to handle the error token, by a series of reductions followed by a shift. We initiate error handling by first performing the standard bookkeeping described above, then transferring control to the [error] function associated with [s]. *) let initiate s = blet ( [ assertnoerror ], errorbookkeeping (call_error_via_errorcase magic s) ) (* This produces the body of the [run] function for state [s]. *) let rundef s : valdef = match Invariant.has_default_reduction s with | Some (prod, toks) as defred -> (* Perform reduction without looking ahead. If shiftreduce optimization is being performed, then no stack cell is allocated. The contents of the top stack cell are passed do [reduce] as extra parameters. *) runheader s ( runpushcellunless (shiftreduce prod) s ( gettoken s defred ( defaultreductioncomment toks ( call_reduce prod s ) ) ) ) | None -> (* If this state is willing to act on the error token, ignore that -- this is taken care of elsewhere. *) let transitions = SymbolMap.remove (Symbol.T Terminal.error) (Lr1.transitions s) and reductions = TerminalMap.remove Terminal.error (Lr1.reductions s) in (* Construct the main case analysis that determines what action should be taken next. A default branch, where an error is detected, is added if the analysis is not exhaustive. In the default branch, we initiate error handling. *) let covered, branches = ProductionMap.fold (fun prod toks (covered, branches) -> (* There is a reduction for these tokens. *) TerminalSet.union toks covered, reducebranch toks prod s :: branches ) (Lr1.invert reductions) (TerminalSet.empty, []) in let covered, branches = SymbolMap.fold (fun symbol s' (covered, branches) -> match symbol with | Symbol.T tok -> (* There is a shift transition for this token. *) TerminalSet.add tok covered, shiftbranch s tok s' :: branches | Symbol.N _ -> covered, branches ) transitions (covered, branches) in let branches = if TerminalSet.subset TerminalSet.universe covered then branches else branches @ [ { branchpat = PWildcard; branchbody = initiate s } ] in (* Finally, construct the code for [run]. The former pushes things onto the stack, obtains the lookahead token, then performs the main case analysis on the lookahead token. *) runheader s ( runpushcell s ( gettoken s None ( EMatch ( EVar token, branches ) ) ) ) (* This is the body of the [reduce] function associated with production [prod]. *) let reducebody prod = (* Find out about the left-hand side of this production and about the identifiers that have been bound to the symbols in the right-hand side. These represent variables that we should bind to semantic values before invoking the semantic action. *) let nt, rhs = Production.def prod and ids = Production.identifiers prod and length = Production.length prod in (* Build a pattern that represents the shape of the stack. Out of the stack, we extract a state (except when the production is an epsilon production) and a number of semantic values. If shiftreduce optimization is being performed, then the top stack cell is not explicitly allocated, so we do not include it in the pattern that is built. *) let (_ : int), pat = Invariant.fold (fun (i, pat) holds_state symbol _ -> i + 1, if i = length - 1 && shiftreduce prod then pat else ptuple (pat :: reducecellparams prod i holds_state symbol) ) (0, PVar stack) (Invariant.prodstack prod) in (* If any identifiers refer to terminal symbols without a semantic value, then bind these identifiers to the unit value. This provides the illusion that every symbol, terminal or nonterminal, has a semantic value. This is more regular and allows applying operators such as ? to terminal symbols without a semantic value. *) let unitbindings = Misc.foldi length (fun i unitbindings -> match semvtype rhs.(i) with | [] -> (PVar ids.(i), EUnit) :: unitbindings | _ -> unitbindings ) [] in (* If necessary, determine start and end positions for the left-hand side of the production. If the right-hand side is nonempty, this is done by extracting position information out of the first and last symbols of the right-hand side. If it is empty, then (as of 2015/11/04) this is done by taking the end position stored in the top stack cell (whatever it is). The constraints imposed by the module [Invariant], the layout of cells, and our creation of a sentinel cell (see [entrydef] further on), ensure that this cell exists and has an [endp] field at offset 1. Yes, we live dangerously. You only live once. *) let extract x = (* Extract the end position (i.e., the field at offset 1) in the top stack cell and bind it to the variable [x]. *) PTuple [ PWildcard; PVar x ], EMagic (EVar stack) in let symbol = Symbol.N nt in let posbindings action = let bind_startp = Invariant.startp symbol in elementif (Action.has_beforeend action) ( extract beforeendp ) @ elementif bind_startp ( if length > 0 then PVar startp, EVar (Printf.sprintf "_startpos_%s_" ids.(0)) else extract startp ) @ elementif (Invariant.endp symbol) ( if length > 0 then PVar endp, EVar (Printf.sprintf "_endpos_%s_" ids.(length - 1)) else if bind_startp then PVar endp, EVar startp else extract endp ) in (* If this production is one of the start productions, then reducing it means accepting the input. In that case, we return a final semantic value and stop. Otherwise, we transfer control to the [goto] function, unless the semantic action raises [Error], in which case we transfer control to [errorcase]. *) if Production.is_start prod then tracecomment "Accepting" (blet ( [ pat, EVar stack ], EMagic (EVar ids.(0)) )) else let action = Production.action prod in let act = EAnnot (Action.to_il_expr action, type2scheme (semvtypent nt)) in tracecomment (Printf.sprintf "Reducing production %s" (Production.print prod)) (blet ( (pat, EVar stack) :: unitbindings @ posbindings action, (* If the semantic action is susceptible of raising [Error], use a [let/unless] construct, otherwise use [let]. *) if Action.has_syntaxerror action then letunless act semv (call_goto nt) (errorbookkeeping call_errorcase) else blet ([ PVar semv, act ], call_goto nt) )) (* This is the definition of the [reduce] function associated with production [prod]. *) let reducedef prod = { valpublic = false; valpat = PVar (reduce prod); valval = EAnnot ( EFun ( reduceparams prod, reducebody prod ), reducetypescheme prod ) } (* This generates code for pushing a new stack cell inside [goto]. *) let gotopushcell nt e = if gotopushes nt then let contents = var stack :: Invariant.fold_top (runcellparams var) [] (Invariant.gotostack nt) in mlet [ pvar stack ] [ etuple contents ] e else e (* This is the heart of the [goto] function associated with nonterminal [nt]. *) let gotobody nt = (* Examine the current state to determine where to go next. *) let branches = Lr1.targets (fun branches sources target -> { branchpat = pstatescon sources; branchbody = call_run target (runparams magic var target) } :: branches ) [] (Symbol.N nt) in match branches with | [] -> (* If there are no branches, then this [goto] function is never invoked. The inliner will drop it, so whatever we generate here is unimportant. *) call_assertfalse | [ branch ] -> (* If there is only one branch, no case analysis is required. This optimization is not strictly necessary if GADTs are used by the compiler to prove that the case analysis is exhaustive. It does improve readability, though, and is also useful if the compiler does not have GADTs. *) EPatComment ( "State should be ", branch.branchpat, branch.branchbody ) | _ -> (* In the general case, we keep the branches computed above and, unless [nt] is universal, add a default branch, which is theoretically useless but helps avoid warnings if the compiler does not have GADTs. *) let default = { branchpat = PWildcard; branchbody = call_assertfalse } in EMatch ( EVar state, branches @ (if Invariant.universal (Symbol.N nt) then [] else [ default ]) ) (* This the [goto] function associated with nonterminal [nt]. *) let gotodef nt = { valpublic = false; valpat = PVar (goto nt); valval = EAnnot (EFun (gotoparams pvar nt, gotopushcell nt (gotobody nt)), gototypescheme nt) } (* ------------------------------------------------------------------------ *) (* Code production for the error handling functions. *) (* This is the body of the [error] function associated with state [s]. *) let handle s e = tracecomment (Printf.sprintf "Handling error in state %d" (Lr1.number s)) e let errorbody s = try let s' = SymbolMap.find (Symbol.T Terminal.error) (Lr1.transitions s) in (* There is a shift transition on error. *) handle s ( shiftbranchbody s Terminal.error s' ) with Not_found -> try let prods = TerminalMap.lookup Terminal.error (Lr1.reductions s) in let prod = Misc.single prods in (* There is a reduce transition on error. If shiftreduce optimization is enabled for this production, then we must pop an extra cell for [reduce]'s calling convention to be met. *) let extrapop e = if shiftreduce prod then let pat = ptuple (PVar stack :: Invariant.fold_top (runcellparams pvar) [] (Invariant.stack s)) in blet ([ pat, EVar stack ], e) else e in handle s ( extrapop ( call_reduce prod s ) ) with Not_found -> (* This state is unable to handle errors. Pop the stack to find a state that does handle errors, a state that can further pop the stack, or die. *) match Invariant.rewind s with | Invariant.Die -> can_die := true; ERaise errorval | Invariant.DownTo (w, st) -> let _, pat = Invariant.fold errorcellparams (0, PVar stack) w in blet ( [ pat, EVar stack ], match st with | Invariant.Represented -> call_errorcase | Invariant.UnRepresented s -> call_error magic s ) (* This is the [error] function associated with state [s]. *) let errordef s = { valpublic = false; valpat = PVar (error s); valval = EAnnot ( EFun ( errorparams nomagic pvar, errorbody s ), errortypescheme s ) } (* This is the [errorcase] function. It examines its state parameter and dispatches control to an appropriate [error] function. *) let errorcasedef = let branches = Lr1.fold (fun branches s -> if Invariant.represented s then { branchpat = pstatecon s; branchbody = EApp (EVar (error s), [ EVar env; EMagic (EVar stack) ]) } :: branches else branches ) [] in { valpublic = false; valpat = PVar errorcase; valval = EAnnot ( EFun ( errorcaseparams nomagic pvar, EMatch ( EVar state, branches ) ), errorcasetypescheme ) } (* ------------------------------------------------------------------------ *) (* Code production for the entry points. *) (* This is the entry point associated with a start state [s]. By convention, it is named after the nonterminal [nt] that corresponds to this state. This is a public definition. The code initializes a parser environment, an empty stack, and invokes [run]. 2015/11/11. If the state [s] can reduce an epsilon production whose left-hand symbol keeps track of its start or end position, or if [s] can reduce any production that mentions [$endpos($0)], then the initial stack should contain a sentinel cell with a valid [endp] field at offset 1. For simplicity, we always create a sentinel cell. *) let entrydef s = let nt = Item.startnt (Lr1.start2item s) in let lexer = "lexer" and lexbuf = "lexbuf" in let initial_stack = let initial_position = getendp in etuple [ EUnit; initial_position ] in { valpublic = true; valpat = PVar (Nonterminal.print true nt); valval = EAnnot ( EFun ( [ PVar lexer; PVar lexbuf ], blet ( [ PVar env, EApp (EVar initenv, [ EVar lexer; EVar lexbuf ]) ], EMagic (EApp (EVar (run s), [ EVar env; initial_stack ])) ) ), entrytypescheme Front.grammar (Nonterminal.print true nt) ) } (* ------------------------------------------------------------------------ *) (* Code production for auxiliary functions. *) (* This is [assertfalse], used when internal failure is detected. This should never happen if our tool is correct. *) let assertfalsedef = { valpublic = false; valpat = PVar assertfalse; valval = EAnnot ( EFun ([ PUnit ], blet ([ PUnit, EApp (EVar "Printf.fprintf", [ EVar "Pervasives.stderr"; EStringConst "Internal failure -- please contact the parser generator's developers.\n%!" ]); ], EApp (EVar "assert", [ efalse ]) ) ), scheme [ "a" ] (arrow tunit (tvar "a")) ) } (* This is [print_token], used to print tokens in [--trace] mode. *) let printtokendef = destructuretokendef print_token tstring false (fun tok -> EStringConst (Terminal.print tok)) (* This is [discard], used to take a token off the input stream and query the lexer for a new one. The code queries the lexer for a new token and stores it into [env.token], overwriting the previous token. It also stores the start and positions of the new token. Last, [env.error] is cleared. We use the lexer's [lex_start_p] and [lex_curr_p] fields to extract the start and end positions of the token that we just read. In practice, it seems that [lex_start_p] can be inaccurate (that is the case when the lexer calls itself recursively, instead of simply recognizing an atomic pattern and returning immediately). However, we are 100% compatible with ocamlyacc here, and there is no better solution anyway. As of 2014/12/12, we re-allocate the environment record instead of updating it. Perhaps surprisingly, this makes the code TWICE FASTER overall. The write barrier is really costly! *) let discardbody = let lexer = "lexer" and lexbuf = "lexbuf" in EFun ( [ PVar env ], blet ([ PVar lexer, ERecordAccess (EVar env, flexer); PVar lexbuf, ERecordAccess (EVar env, flexbuf); PVar token, EApp (EVar lexer, [ EVar lexbuf ]); ] @ trace "Lookahead token is now %s (%d-%d)" [ EApp (EVar print_token, [ EVar token ]); ERecordAccess (ERecordAccess (EVar lexbuf, "Lexing.lex_start_p"), "Lexing.pos_cnum"); ERecordAccess (ERecordAccess (EVar lexbuf, "Lexing.lex_curr_p"), "Lexing.pos_cnum") ], ERecord [ flexer, EVar lexer; flexbuf, EVar lexbuf; ftoken, EVar token; ferror, efalse ] ) ) let discarddef = { valpublic = false; valpat = PVar discard; valval = EAnnot ( discardbody, type2scheme (arrow tenv tenv) ) } (* This is [initenv], used to allocate a fresh parser environment. It fills in all fields in a straightforward way. The [token] field receives a dummy value. It will be overwritten by the first call to [run], which will invoke [discard]. This allows us to invoke the lexer in just one place. *) let initenvdef = let lexer = "lexer" and lexbuf = "lexbuf" in { valpublic = false; valpat = PVar initenv; valval = EAnnot ( EFun ( [ PVar lexer; PVar lexbuf ], blet ( (* We do not have a dummy token at hand, so we forge one. *) (* It will be overwritten by the first call to the lexer. *) [ PVar token, EMagic EUnit ], ERecord ([ (flexer, EVar lexer); (flexbuf, EVar lexbuf); (ftoken, EVar token); (ferror, efalse) ] ) ) ), type2scheme (marrow [ tlexer; tlexbuf ] tenv) ) } (* ------------------------------------------------------------------------ *) (* Here is complete code for the parser. *) open UnparameterizedSyntax let grammar = Front.grammar let program = [ SIFunctor (grammar.parameters, SIExcDefs [ excdef ] :: SIValDefs (false, [ excvaldef ]) :: interface_to_structure ( tokentypedef grammar ) @ SITypeDefs [ envtypedef; statetypedef ] :: SIStretch grammar.preludes :: SIValDefs (true, ProductionMap.fold (fun _ s defs -> entrydef s :: defs ) Lr1.entry ( Lr1.fold (fun defs s -> rundef s :: errordef s :: defs ) ( Nonterminal.foldx (fun nt defs -> gotodef nt :: defs ) (Production.fold (fun prod defs -> if Invariant.ever_reduced prod then reducedef prod :: defs else defs ) [ discarddef; initenvdef; printtokendef; assertfalsedef; errorcasedef ]))) ) :: SIStretch grammar.postludes :: [])] (* ------------------------------------------------------------------------ *) (* We are done! *) let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d states can peek at an error.\n" !errorpeekers Lr1.n) let () = if not !can_die then Error.logC 1 (fun f -> Printf.fprintf f "The generated parser cannot raise Error.\n") let () = Time.tick "Producing abstract syntax" end menhir-20151112/src/infer.ml0000644000175000017500000002362512621170073014423 0ustar mehdimehdiopen Syntax open Stretch open UnparameterizedSyntax open IL open CodeBits open TokenType (* ------------------------------------------------------------------------- *) (* Naming conventions. *) (* The type variable associated with a nonterminal symbol. Its name begins with a prefix which ensures that it begins with a lowercase letter and cannot clash with Objective Caml keywords. *) let ntvar symbol = Printf.sprintf "tv_%s" (Misc.normalize symbol) (* The term variable associated with a nonterminal symbol. Its name begins with a prefix which ensures that it begins with a lowercase letter and cannot clash with Objective Caml keywords. *) let encode symbol = Printf.sprintf "xv_%s" (Misc.normalize symbol) let decode s = let n = String.length s in assert (n >= 3 && String.sub s 0 3 = "xv_"); String.sub s 3 (n - 3) (* The name of the temporary file. *) let base = Settings.base let mlname = base ^ ".ml" let mliname = base ^ ".mli" (* ------------------------------------------------------------------------- *) (* Code production. *) (* [nttype nt] is the type of the nonterminal [nt], as currently known. *) let nttype grammar nt = try TypTextual (StringMap.find nt grammar.types) with Not_found -> TypVar (ntvar nt) (* [is_standard] determines whether a branch derives from a standard library definition. The method, based on a file name, is somewhat fragile. *) let is_standard branch = List.for_all (fun x -> x = Settings.stdlib_filename) (Action.filenames branch.action) (* [actiondef] turns a branch into a function definition. *) let actiondef grammar symbol branch = (* Construct a list of the semantic action's formal parameters that depend on the production's right-hand side. *) let formals = List.fold_left (fun formals (symbol, id) -> let id, startp, endp, starto, endo = id, Printf.sprintf "_startpos_%s_" id, Printf.sprintf "_endpos_%s_" id, Printf.sprintf "_startofs_%s_" id, Printf.sprintf "_endofs_%s_" id in let t = try let props = StringMap.find symbol grammar.tokens in (* Symbol is a terminal. *) match props.tk_ocamltype with | None -> tunit | Some ocamltype -> TypTextual ocamltype with Not_found -> (* Symbol is a nonterminal. *) nttype grammar symbol in PAnnot (PVar id, t) :: PAnnot (PVar startp, tposition) :: PAnnot (PVar endp, tposition) :: PAnnot (PVar starto, tint) :: PAnnot (PVar endo, tint) :: formals ) [] branch.producers in (* Extend the list with parameters that do not depend on the right-hand side. *) let formals = PAnnot (PVar "_eRR", texn) :: PAnnot (PVar "_startpos", tposition) :: PAnnot (PVar "_endpos", tposition) :: PAnnot (PVar "_endpos__0_", tposition) :: PAnnot (PVar "_symbolstartpos", tposition) :: PAnnot (PVar "_startofs", tint) :: PAnnot (PVar "_endofs", tint) :: PAnnot (PVar "_endofs__0_", tint) :: PAnnot (PVar "_symbolstartofs", tint) :: formals in (* Construct a function definition out of the above bindings and the semantic action. *) let body = EAnnot ( Action.to_il_expr branch.action, type2scheme (nttype grammar symbol) ) in match formals with | [] -> body | _ -> EFun (formals, body) (* [program] turns an entire grammar into a test program. *) let program grammar = (* Turn the grammar into a bunch of function definitions. Grammar productions that derive from the standard library are reflected first, so that type errors are not reported in them. *) let bindings1, bindings2 = StringMap.fold (fun symbol rule (bindings1, bindings2) -> List.fold_left (fun (bindings1, bindings2) branch -> if is_standard branch then (PWildcard, actiondef grammar symbol branch) :: bindings1, bindings2 else bindings1, (PWildcard, actiondef grammar symbol branch) :: bindings2 ) (bindings1, bindings2) rule.branches ) grammar.rules ([], []) in (* Create entry points whose types are the unknowns that we are looking for. *) let ps, ts = StringMap.fold (fun symbol _ (ps, ts) -> PVar (encode (Misc.normalize symbol)) :: ps, nttype grammar symbol :: ts ) grammar.rules ([], []) in let def = { valpublic = true; valpat = PTuple ps; valval = ELet (bindings1 @ bindings2, EAnnot (bottom, type2scheme (TypTuple ts))) } in (* Insert markers to delimit the part of the file that we are interested in. These markers are recognized by [Lexmli]. This helps skip the values, types, exceptions, etc. that might be defined by the prologue or postlogue. *) let begindef = { valpublic = true; valpat = PVar "menhir_begin_marker"; valval = EIntConst 0 } and enddef = { valpublic = true; valpat = PVar "menhir_end_marker"; valval = EIntConst 0 } in (* Issue the test program. We include the definition of the type of tokens, because, in principle, the semantic actions may refer to it or to its data constructors. *) [ SIFunctor (grammar.parameters, interface_to_structure (tokentypedef grammar) @ SIStretch grammar.preludes :: SIValDefs (false, [ begindef; def; enddef ]) :: SIStretch grammar.postludes :: [])] (* ------------------------------------------------------------------------- *) (* Writing the program associated with a grammar to a file. *) let write grammar () = let ml = open_out mlname in let module P = Printer.Make (struct let f = ml let locate_stretches = Some mlname end) in P.program (program grammar); close_out ml (* ------------------------------------------------------------------------- *) (* Running ocamldep on the program. *) type entry = string (* basename *) * string (* filename *) type line = entry (* target *) * entry list (* dependencies *) let depend grammar = (* Create an [.ml] file and an [.mli] file, then invoke ocamldep to compute dependencies for us. *) (* If an old [.ml] or [.mli] file exists, we are careful to preserve it. We temporarily move it out of the way and restore it when we are done. There is no reason why dependency analysis should destroy existing files. *) let ocamldep_command = Printf.sprintf "%s %s %s" Settings.ocamldep (Filename.quote mlname) (Filename.quote mliname) in let output : string = Option.project ( IO.moving_away mlname (fun () -> IO.moving_away mliname (fun () -> IO.with_file mlname (write grammar) (fun () -> IO.with_file mliname (Interface.write grammar) (fun () -> IO.invoke ocamldep_command ))))) in (* Echo ocamldep's output. *) print_string output; (* If [--raw-depend] was specified on the command line, stop here. This option is used by omake and by ocamlbuild, which performs their own postprocessing of [ocamldep]'s output. For normal [make] users, who use [--depend], some postprocessing is required, which is performed below. *) begin match Settings.depend with | Settings.OMNone -> assert false (* we wouldn't be here in the first place *) | Settings.OMRaw -> () | Settings.OMPostprocess -> (* Make sense out of ocamldep's output. *) let lexbuf = Lexing.from_string output in let lines : line list = try Lexdep.main lexbuf with Lexdep.Error msg -> (* Echo the error message, followed with ocamldep's output. *) Error.error [] "%s" (msg ^ output) in (* Look for the line that concerns the [.cmo] target, and echo a modified version of this line, where the [.cmo] target is replaced with [.ml] and [.mli] targets, and where the dependency over the [.cmi] file is dropped. In doing so, we assume that the user's [Makefile] supports bytecode compilation, so that it makes sense to request [bar.cmo] to be built, as opposed to [bar.cmx]. This is not optimal, but will do. [camldep] exhibits the same behavior. *) (* TEMPORARY allow ocamldep to be called with flag -native. *) List.iter (fun ((_, target_filename), dependencies) -> if Filename.check_suffix target_filename ".cmo" then let dependencies = List.filter (fun (basename, _) -> basename <> base ) dependencies in if List.length dependencies > 0 then begin Printf.printf "%s.ml %s.mli:" base base; List.iter (fun (_basename, filename) -> Printf.printf " %s" filename ) dependencies; Printf.printf "\n%!" end ) lines end; (* Stop. *) exit 0 (* ------------------------------------------------------------------------- *) (* Inferring types for a grammar's nonterminals. *) let infer grammar = (* Invoke ocamlc to do type inference for us. *) let ocamlc_command = Printf.sprintf "%s -c -i %s" Settings.ocamlc (Filename.quote mlname) in let output = write grammar (); match IO.invoke ocamlc_command with | Some result -> Sys.remove mlname; result | None -> (* 2015/10/05: intentionally do not remove the [.ml] file if [ocamlc] fails. (Or if an exception is thrown.) We cannot understand why [ocaml] complains if we can't see the [.ml] file. *) exit 1 in (* Make sense out of ocamlc's output. *) let env : (string * int * int) list = Lexmli.main (Lexing.from_string output) in let env : (string * ocamltype) list = List.map (fun (id, openingofs, closingofs) -> decode id, Inferred (String.sub output openingofs (closingofs - openingofs)) ) env in (* Augment the grammar with new %type declarations. *) let types = StringMap.fold (fun symbol _ types -> let ocamltype = try List.assoc (Misc.normalize symbol) env with Not_found -> assert false in if StringMap.mem symbol grammar.types then (* If there was a declared type, keep it. *) types else (* Otherwise, insert the inferred type. *) StringMap.add symbol ocamltype types ) grammar.rules grammar.types in { grammar with types = types } menhir-20151112/src/tokenType.ml0000644000175000017500000001305212621170073015273 0ustar mehdimehdi(* This module deals with a few details regarding the definition of the [token] type. In particular, if [--only-tokens] was specified, it emits the type definition and exits. *) open UnparameterizedSyntax open IL open CodeBits (* This is the conventional name of the [token] type, with no prefix. A prefix is possibly appended to it below, where [tctoken] is redefined before being exported. *) let tctoken = "token" let ttoken = TypApp (tctoken, []) (* This is the conventional name of the token GADT, which describes the tokens. Same setup as above. *) let tctokengadt = "terminal" let ttokengadt a = TypApp (tctokengadt, [ a ]) (* This is the conventional name of the data constructors of the token GADT. *) let ttokengadtdata token = "T_" ^ token (* This is the definition of the type of tokens. It is defined as an algebraic data type, unless [--external-tokens M] is set, in which case it is defined as an abbreviation for the type [M.token]. *) let tokentypedef grammar = let typerhs = match Settings.token_type_mode with | Settings.TokenTypeOnly | Settings.TokenTypeAndCode -> (* Algebraic data type. *) TDefSum ( List.map (fun (tok, typo) -> { dataname = tok; datavalparams = (match typo with None -> [] | Some t -> [ TypTextual t ]); datatypeparams = None }) (typed_tokens grammar) ) | Settings.CodeOnly m -> (* Type abbreviation. *) TAbbrev (TypApp (m ^ "." ^ tctoken, [])) in [ IIComment "The type of tokens."; IITypeDecls [{ typename = tctoken; typeparams = []; typerhs; typeconstraint = None }] ] (* This is the definition of the token GADT. Here, the data constructors have no value argument, but have a type index. *) (* The token GADT is produced only when [Settings.inspection] is true. Thus, when [Settings.inspection] is false, we remain compatible with old versions of OCaml, without GADTs. *) (* Although the [token] type does not include the [error] token (because this token is never produced by the lexer), the token GADT must include the [error] token (because this GADT must describe all of the tokens that are allowed to appear in a production). *) (* It is defined as a generalized algebraic data type, unless [--external-tokens M] is set, in which case it is defined as an abbreviation for the type ['a M.tokengadt]. *) let tokengadtdef grammar = assert Settings.inspection; let param, typerhs = match Settings.token_type_mode with | Settings.TokenTypeOnly | Settings.TokenTypeAndCode -> (* Generalized algebraic data type. *) let param = "_" in param, TDefSum ( (* The ordering of this list matters. We want the data constructors to respect the internal ordering (as determined by [typed_tokens] in [UnparameterizedSyntax]) of the terminal symbols. This may be exploited in the table back-end to allow an unsafe conversion of a data constructor to an integer code. See [t2i] in [InspectionTableInterpreter]. *) { dataname = ttokengadtdata "error"; datavalparams = []; datatypeparams = Some [ tunit ] (* the [error] token has a semantic value of type [unit] *) } :: List.map (fun (token, typo) -> { dataname = ttokengadtdata token; datavalparams = []; datatypeparams = Some [ match typo with None -> tunit | Some t -> TypTextual t ] }) (typed_tokens grammar) ) | Settings.CodeOnly m -> (* Type abbreviation. *) let param = "a" in param, TAbbrev (TypApp (m ^ "." ^ tctokengadt, [ TypVar param ])) in [ IIComment "The indexed type of terminal symbols."; IITypeDecls [{ typename = tctokengadt; typeparams = [ param ]; typerhs; typeconstraint = None }] ] (* If we were asked to only produce a type definition, then do so and stop. *) let produce_tokentypes grammar = match Settings.token_type_mode with | Settings.TokenTypeOnly -> (* Create both an .mli file and an .ml file. This is made necessary by the fact that the two can be different when there are functor parameters. *) let i = tokentypedef grammar @ listiflazy Settings.inspection (fun () -> tokengadtdef grammar ) in let module P = Printer.Make (struct let f = open_out (Settings.base ^ ".mli") let locate_stretches = None end) in P.interface [ IIFunctor (grammar.parameters, i) ]; let module P = Printer.Make (struct let f = open_out (Settings.base ^ ".ml") let locate_stretches = None end) in P.program [ SIFunctor (grammar.parameters, interface_to_structure i ) ]; exit 0 | Settings.CodeOnly _ | Settings.TokenTypeAndCode -> () (* The token type and the token GADTs can be referred to via a short (unqualified) name, regardless of how they have been defined (either directly or as an abbreviation). However, their data constructors must be qualified if [--external-tokens] is set. *) let tokenprefix id = match Settings.token_type_mode with | Settings.CodeOnly m -> m ^ "." ^ id | Settings.TokenTypeAndCode -> id | Settings.TokenTypeOnly -> id (* irrelevant, really *) let tokendata = tokenprefix let tokengadtdata token = tokenprefix (ttokengadtdata token) menhir-20151112/src/menhir.ml0000644000175000017500000000014612621170073014573 0ustar mehdimehdi(* The main program. *) (* Everything is in [Back]. *) module B = Back (* artificial dependency *) menhir-20151112/src/TableInterpreter.ml0000644000175000017500000001073512621170074016572 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) module Make (T : TableFormat.TABLES) = Engine.Make (struct type state = int let number s = s type token = T.token type terminal = int type semantic_value = Obj.t let token2terminal = T.token2terminal let token2value = T.token2value let error_terminal = T.error_terminal let error_value = Obj.repr () type production = int let default_reduction state defred nodefred env = let code = PackedIntArray.get T.default_reduction state in if code = 0 then nodefred env else defred env (code - 1) let is_start prod = prod < T.start (* This auxiliary function helps access a compressed, two-dimensional matrix, like the action and goto tables. *) let unmarshal2 table i j = RowDisplacement.getget PackedIntArray.get PackedIntArray.get table i j (* This auxiliary function helps access a flattened, two-dimensional matrix, like the error bitmap. *) let action state terminal value shift reduce fail env = match PackedIntArray.unflatten1 T.error state terminal with | 1 -> let action = unmarshal2 T.action state terminal in let opcode = action land 0b11 and param = action lsr 2 in if opcode >= 0b10 then (* 0b10 : shift/discard *) (* 0b11 : shift/nodiscard *) let please_discard = (opcode = 0b10) in shift env please_discard terminal value param else (* 0b01 : reduce *) (* 0b00 : cannot happen *) reduce env param | c -> assert (c = 0); fail env let goto state prod = let code = unmarshal2 T.goto state (PackedIntArray.get T.lhs prod) in (* code = 1 + state *) code - 1 exception Error = T.Error type semantic_action = (state, semantic_value, token) EngineTypes.env -> (state, semantic_value) EngineTypes.stack let semantic_action prod = (* Indexing into the array [T.semantic_action] is off by [T.start], because the start productions do not have entries in this array. *) T.semantic_action.(prod - T.start) (* If [T.trace] is [None], then the logging functions do nothing. *) let log = match T.trace with Some _ -> true | None -> false module Log = struct open Printf let state state = match T.trace with | Some _ -> fprintf stderr "State %d:\n%!" state | None -> () let shift terminal state = match T.trace with | Some (terminals, _) -> fprintf stderr "Shifting (%s) to state %d\n%!" terminals.(terminal) state | None -> () let reduce_or_accept prod = match T.trace with | Some (_, productions) -> fprintf stderr "%s\n%!" productions.(prod) | None -> () let lookahead_token token startp endp = match T.trace with | Some (terminals, _) -> fprintf stderr "Lookahead token is now %s (%d-%d)\n%!" terminals.(token) startp.Lexing.pos_cnum endp.Lexing.pos_cnum | None -> () let initiating_error_handling () = match T.trace with | Some _ -> fprintf stderr "Initiating error handling\n%!" | None -> () let resuming_error_handling () = match T.trace with | Some _ -> fprintf stderr "Resuming error handling\n%!" | None -> () let handling_error state = match T.trace with | Some _ -> fprintf stderr "Handling error in state %d\n%!" state | None -> () end end) menhir-20151112/src/IncrementalEngine.ml0000644000175000017500000003333412621170074016706 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) open General (* This signature describes the incremental LR engine. *) (* In this mode, the user controls the lexer, and the parser suspends itself when it needs to read a new token. *) module type INCREMENTAL_ENGINE = sig type token (* The type ['a checkpoint] represents an intermediate or final state of the parser. An intermediate checkpoint is a suspension: it records the parser's current state, and allows parsing to be resumed. The parameter ['a] is the type of the semantic value that will eventually be produced if the parser succeeds. *) (* [Accepted] and [Rejected] are final checkpoints. [Accepted] carries a semantic value. *) (* [InputNeeded] is an intermediate checkpoint. It means that the parser wishes to read one token before continuing. *) (* [Shifting] is an intermediate checkpoint. It means that the parser is taking a shift transition. It exposes the state of the parser before and after the transition. The Boolean parameter tells whether the parser intends to request a new token after this transition. (It always does, except when it is about to accept.) *) (* [AboutToReduce] is an intermediate checkpoint. It means that the parser is about to perform a reduction step. It exposes the parser's current state as well as the production that is about to be reduced. *) (* [HandlingError] is an intermediate checkpoint. It means that the parser has detected an error and is currently handling it, in several steps. *) type env type production type 'a checkpoint = private | InputNeeded of env | Shifting of env * env * bool | AboutToReduce of env * production | HandlingError of env | Accepted of 'a | Rejected (* [offer] allows the user to resume the parser after it has suspended itself with a checkpoint of the form [InputNeeded env]. [offer] expects the old checkpoint as well as a new token and produces a new checkpoint. It does not raise any exception. *) val offer: 'a checkpoint -> token * Lexing.position * Lexing.position -> 'a checkpoint (* [resume] allows the user to resume the parser after it has suspended itself with a checkpoint of the form [AboutToReduce (env, prod)] or [HandlingError env]. [resume] expects the old checkpoint and produces a new checkpoint. It does not raise any exception. *) val resume: 'a checkpoint -> 'a checkpoint (* A token supplier is a function of no arguments which delivers a new token (together with its start and end positions) every time it is called. *) type supplier = unit -> token * Lexing.position * Lexing.position (* A pair of a lexer and a lexing buffer can be easily turned into a supplier. *) val lexer_lexbuf_to_supplier: (Lexing.lexbuf -> token) -> Lexing.lexbuf -> supplier (* The functions [offer] and [resume] are sufficient to write a parser loop. One can imagine many variations (which is why we expose these functions in the first place!). Here, we expose a few variations of the main loop, ready for use. *) (* [loop supplier checkpoint] begins parsing from [checkpoint], reading tokens from [supplier]. It continues parsing until it reaches a checkpoint of the form [Accepted v] or [Rejected]. In the former case, it returns [v]. In the latter case, it raises the exception [Error]. *) val loop: supplier -> 'a checkpoint -> 'a (* [loop_handle succeed fail supplier checkpoint] begins parsing from [checkpoint], reading tokens from [supplier]. It continues parsing until it reaches a checkpoint of the form [Accepted v] or [HandlingError env] (or [Rejected], but that should not happen, as [HandlingError _] will be observed first). In the former case, it calls [succeed v]. In the latter case, it calls [fail] with this checkpoint. It cannot raise [Error]. This means that Menhir's traditional error-handling procedure (which pops the stack until a state that can act on the [error] token is found) does not get a chance to run. Instead, the user can implement her own error handling code, in the [fail] continuation. *) val loop_handle: ('a -> 'answer) -> ('a checkpoint -> 'answer) -> supplier -> 'a checkpoint -> 'answer (* [loop_handle_undo] is analogous to [loop_handle], except it passes a pair of checkpoints to the failure continuation. The first (and oldest) checkpoint is the last [InputNeeded] checkpoint that was encountered before the error was detected. The second (and newest) checkpoint is where the error was detected, as in [loop_handle]. Going back to the first checkpoint can be thought of as undoing any reductions that were performed after seeing the problematic token. (These reductions must be default reductions or spurious reductions.) [loop_handle_undo] must initially be applied to an [InputNeeded] checkpoint. The parser's initial checkpoints satisfy this constraint. *) val loop_handle_undo: ('a -> 'answer) -> ('a checkpoint -> 'a checkpoint -> 'answer) -> supplier -> 'a checkpoint -> 'answer (* [loop_test f checkpoint accu] assumes that [checkpoint] has been obtained by submitting a token to the parser. It runs the parser from [checkpoint], through an arbitrary number of reductions, until the parser either accepts this token (i.e., shifts) or rejects it (i.e., signals an error). If the parser decides to shift, then the accumulator is updated by applying the user function [f] to the [env] just before shifting and to the old [accu]. Otherwise, the accumulator is not updated, i.e., [accu] is returned. *) (* It is desirable that the semantic actions be side-effect free, or that their side-effects be harmless (replayable). *) val loop_test: (env -> 'accu -> 'accu) -> 'a checkpoint -> 'accu -> 'accu (* The function [loop_test] can be used, after an error has been detected, to dynamically test which tokens would have been accepted at this point. We provide this test, ready for use. *) (* For completeness, one must undo any spurious reductions before carrying out this test -- that is, one must apply [acceptable] to the FIRST checkpoint that is passed by [loop_handle_undo] to its failure continuation. *) (* This test causes some semantic actions to be run! The semantic actions should be side-effect free, or their side-effects should be harmless. *) (* The position [pos] is used as the start and end positions of the hypothetical token, and may be picked up by the semantic actions. We suggest using the position where the error was detected. *) val acceptable: 'a checkpoint -> token -> Lexing.position -> bool (* The abstract type ['a lr1state] describes the non-initial states of the LR(1) automaton. The index ['a] represents the type of the semantic value associated with this state's incoming symbol. *) type 'a lr1state (* The states of the LR(1) automaton are numbered (from 0 and up). *) val number: _ lr1state -> int (* An element is a pair of a non-initial state [s] and a semantic value [v] associated with the incoming symbol of this state. The idea is, the value [v] was pushed onto the stack just before the state [s] was entered. Thus, for some type ['a], the state [s] has type ['a lr1state] and the value [v] has type ['a]. In other words, the type [element] is an existential type. *) type element = | Element: 'a lr1state * 'a * Lexing.position * Lexing.position -> element (* The parser's stack is (or, more precisely, can be viewed as) a stream of elements. The type [stream] is defined by the module [General]. *) type stack = element stream (* This is the parser's stack, a stream of elements. This stream is empty if the parser is in an initial state; otherwise, it is non-empty. The LR(1) automaton's current state is the one found in the top element of the stack. *) val stack: env -> stack (* These are the start and end positions of the current lookahead token. If invoked in an initial state, this function returns a pair of twice the initial position. *) val positions: env -> Lexing.position * Lexing.position (* This tells whether the parser is about to perform a default reduction. In particular, when applied to an environment taken from a result of the form [AboutToReduce (env, prod)], this tells whether the reduction that is about to take place is a default reduction. *) val has_default_reduction: env -> bool end (* This signature is a fragment of the inspection API that is made available to the user when [--inspection] is used. This fragment contains type definitions for symbols. *) module type SYMBOLS = sig (* The type ['a terminal] represents a terminal symbol. The type ['a nonterminal] represents a nonterminal symbol. In both cases, the index ['a] represents the type of the semantic values associated with this symbol. The concrete definitions of these types are generated. *) type 'a terminal type 'a nonterminal (* The type ['a symbol] represents a terminal or nonterminal symbol. It is the disjoint union of the types ['a terminal] and ['a nonterminal]. *) type 'a symbol = | T : 'a terminal -> 'a symbol | N : 'a nonterminal -> 'a symbol (* The type [xsymbol] is an existentially quantified version of the type ['a symbol]. This type is useful in situations where the index ['a] is not statically known. *) type xsymbol = | X : 'a symbol -> xsymbol end (* This signature describes the inspection API that is made available to the user when [--inspection] is used. *) module type INSPECTION = sig (* The types of symbols are described above. *) include SYMBOLS (* The type ['a lr1state] is meant to be the same as in [INCREMENTAL_ENGINE]. *) type 'a lr1state (* The type [production] is meant to be the same as in [INCREMENTAL_ENGINE]. It represents a production of the grammar. A production can be examined via the functions [lhs] and [rhs] below. *) type production (* An LR(0) item is a pair of a production [prod] and a valid index [i] into this production. That is, if the length of [rhs prod] is [n], then [i] is comprised between 0 and [n], inclusive. *) type item = production * int (* Ordering functions. *) val compare_terminals: _ terminal -> _ terminal -> int val compare_nonterminals: _ nonterminal -> _ nonterminal -> int val compare_symbols: xsymbol -> xsymbol -> int val compare_productions: production -> production -> int val compare_items: item -> item -> int (* [incoming_symbol s] is the incoming symbol of the state [s], that is, the symbol that the parser must recognize before (has recognized when) it enters the state [s]. This function gives access to the semantic value [v] stored in a stack element [Element (s, v, _, _)]. Indeed, by case analysis on the symbol [incoming_symbol s], one discovers the type ['a] of the value [v]. *) val incoming_symbol: 'a lr1state -> 'a symbol (* [items s] is the set of the LR(0) items in the LR(0) core of the LR(1) state [s]. This set is not epsilon-closed. This set is presented as a list, in an arbitrary order. *) val items: _ lr1state -> item list (* [lhs prod] is the left-hand side of the production [prod]. This is always a non-terminal symbol. *) val lhs: production -> xsymbol (* [rhs prod] is the right-hand side of the production [prod]. This is a (possibly empty) sequence of (terminal or nonterminal) symbols. *) val rhs: production -> xsymbol list (* [nullable nt] tells whether the non-terminal symbol [nt] is nullable. That is, it is true if and only if this symbol produces the empty word [epsilon]. *) val nullable: _ nonterminal -> bool (* [first nt t] tells whether the FIRST set of the nonterminal symbol [nt] contains the terminal symbol [t]. That is, it is true if and only if [nt] produces a word that begins with [t]. *) val first: _ nonterminal -> _ terminal -> bool (* [xfirst] is analogous to [first], but expects a first argument of type [xsymbol] instead of [_ terminal]. *) val xfirst: xsymbol -> _ terminal -> bool (* [foreach_terminal] enumerates the terminal symbols, including [error]. [foreach_terminal_but_error] enumerates the terminal symbols, excluding [error]. *) val foreach_terminal: (xsymbol -> 'a -> 'a) -> 'a -> 'a val foreach_terminal_but_error: (xsymbol -> 'a -> 'a) -> 'a -> 'a end (* This signature combines the incremental API and the inspection API. *) module type EVERYTHING = sig include INCREMENTAL_ENGINE include INSPECTION with type 'a lr1state := 'a lr1state with type production := production end menhir-20151112/src/Maps.ml0000644000175000017500000000507112621170073014213 0ustar mehdimehdi(* BEGIN PERSISTENT_MAPS *) module type PERSISTENT_MAPS = sig type key type 'data t val empty: 'data t val add: key -> 'data -> 'data t -> 'data t val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* END PERSISTENT_MAPS *) (* BEGIN IMPERATIVE_MAPS *) module type IMPERATIVE_MAPS = sig type key type 'data t val create: unit -> 'data t val clear: 'data t -> unit val add: key -> 'data -> 'data t -> unit val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* END IMPERATIVE_MAPS *) (* BEGIN IMPERATIVE_MAP *) module type IMPERATIVE_MAP = sig type key type data val set: key -> data -> unit val get: key -> data option end (* END IMPERATIVE_MAP *) module PersistentMapsToImperativeMaps (M : PERSISTENT_MAPS) : IMPERATIVE_MAPS with type key = M.key and type 'data t = 'data M.t ref = struct type key = M.key type 'data t = 'data M.t ref let create () = ref M.empty let clear t = t := M.empty let add k d t = t := M.add k d !t let find k t = M.find k !t let iter f t = M.iter f !t end module ImperativeMapsToImperativeMap (M : IMPERATIVE_MAPS) (D : sig type data end) : IMPERATIVE_MAP with type key = M.key and type data = D.data = struct type key = M.key type data = D.data let m = M.create() let set k d = M.add k d m let get k = try Some (M.find k m) with Not_found -> None end module ArrayAsImperativeMaps (K : sig val n: int end) : IMPERATIVE_MAPS with type key = int and type 'data t = 'data option array = struct open K type key = int type 'data t = 'data option array let create () = Array.make n None let clear m = Array.fill m 0 n None let add key data m = m.(key) <- Some data let find key m = match m.(key) with | None -> raise Not_found | Some data -> data let iter f m = Array.iteri (fun key data -> match data with | None -> () | Some data -> f key data ) m end module HashTableAsImperativeMaps (H : Hashtbl.HashedType) : IMPERATIVE_MAPS with type key = H.t = struct include Hashtbl.Make(H) let create () = create 1023 let add key data table = add table key data let find table key = find key table end module TrivialHashedType (T : sig type t end) : Hashtbl.HashedType with type t = T.t = struct include T let equal = (=) let hash = Hashtbl.hash end menhir-20151112/src/parameters.ml0000644000175000017500000000236412621170073015460 0ustar mehdimehdi(* TEMPORARY clean up and write an .mli file *) open Syntax open Positions let app p ps = match ps with | [] -> ParameterVar p | _ -> ParameterApp (p, ps) let oapp1 o p = match o with | None -> p | Some var -> ParameterApp (var, [ p ]) let unapp = function | ParameterVar x -> (x, []) | ParameterApp (p, ps) -> (p, ps) let rec map f = function | ParameterVar x -> ParameterVar (f x) | ParameterApp (p, ps) -> ParameterApp (f p, List.map (map f) ps) let rec fold f init = function | ParameterVar x -> f init x | ParameterApp (p, ps) -> f (List.fold_left (fold f) init ps) p let identifiers m p = fold (fun acu x -> StringMap.add x.value x.position acu) m p type t = parameter let rec equal x y = match x, y with | ParameterVar x, ParameterVar y when x.value = y.value -> true | ParameterApp (p1, p2), ParameterApp (p1', p2') -> p1.value = p1'.value && List.for_all2 equal p2 p2' | _ -> false let hash = function | ParameterVar x | ParameterApp (x, _) -> Hashtbl.hash (Positions.value x) let position = function | ParameterVar x | ParameterApp (x, _) -> Positions.position x let with_pos p = Positions.with_pos (position p) p menhir-20151112/src/standard.mly0000644000175000017500000001463112621170074015307 0ustar mehdimehdi/**************************************************************************/ /* */ /* Menhir */ /* */ /* François Pottier, INRIA Paris-Rocquencourt */ /* Yann Régis-Gianas, PPS, Université Paris Diderot */ /* */ /* Copyright 2005-2015 Institut National de Recherche en Informatique */ /* et en Automatique. All rights reserved. This file is distributed */ /* under the terms of the GNU Library General Public License, with the */ /* special exception on linking described in file LICENSE. */ /* */ /**************************************************************************/ (* This is menhir's standard library. It offers a number of parameterized nonterminal definitions, such as options and lists, that should be useful in a number of circumstances. *) %% (* ------------------------------------------------------------------------- *) (* The identity. *) (* [anonymous(X)] is the same as [X]. *) (* This allows placing an anonymous sub-rule in the middle of a rule, as in: foo anonymous(baz { action1 }) bar { action2 } Because anonymous is marked %inline, everything is expanded away. So, this is equivalent to: foo baz bar { action1; action2 } Note that [action1] moves to the end of the rule. The anonymous sub-rule can even have several branches, as in: foo anonymous(baz { action1a } | quux { action1b }) bar { action2 } This is expanded to: foo baz bar { action1a; action2 } | foo quux bar { action1b; action2 } *) %public %inline anonymous(X): x = X { x } (* [embedded(X)] is the same as [X]. *) (* This allows placing an anonymous sub-rule in the middle of a rule, as in: foo embedded(baz { action1 }) bar { action2 } Because [embedded] is not marked %inline, this is equivalent to: foo xxx bar { action2 } where the fresh non-terminal symbol [xxx] is separately defined by: xxx: baz { action1 } In particular, if there is no [baz], what we get is a semantic action embedded in the middle of a rule. For instance, foo embedded({ action1 }) bar { action2 } is equivalent to: foo xxx bar { action2 } where [xxx] is separately defined by the rule: xxx: { action1 } *) %public embedded(X): x = X { x } (* ------------------------------------------------------------------------- *) (* Options. *) (* [option(X)] recognizes either nothing or [X]. It produces a value of type ['a option] if [X] produces a value of type ['a]. *) %public option(X): /* nothing */ { None } | x = X { Some x } (* [ioption(X)] is identical to [option(X)], except its definition is inlined. This has the effect of duplicating the production that refers to it, possibly eliminating an LR(1) conflict. *) %public %inline ioption(X): /* nothing */ { None } | x = X { Some x } (* [boption(X)] recognizes either nothing or [X]. It produces a value of type [bool]. *) %public boption(X): /* nothing */ { false } | X { true } (* [loption(X)] recognizes either nothing or [X]. It produces a value of type ['a list] if [X] produces a value of type ['a list]. *) %public loption(X): /* nothing */ { [] } | x = X { x } (* ------------------------------------------------------------------------- *) (* Sequences. *) (* [pair(X, Y)] recognizes the sequence [X Y]. It produces a value of type ['a * 'b] if [X] and [Y] produce values of type ['a] and ['b], respectively. *) %public %inline pair(X, Y): x = X; y = Y { (x, y) } (* [separated_pair(X, sep, Y)] recognizes the sequence [X sep Y]. It produces a value of type ['a * 'b] if [X] and [Y] produce values of type ['a] and ['b], respectively. *) %public %inline separated_pair(X, sep, Y): x = X; sep; y = Y { (x, y) } (* [preceded(opening, X)] recognizes the sequence [opening X]. It passes on the value produced by [X], so that it produces a value of type ['a] if [X] produces a value of type ['a]. *) %public %inline preceded(opening, X): opening; x = X { x } (* [terminated(X, closing)] recognizes the sequence [X closing]. It passes on the value produced by [X], so that it produces a value of type ['a] if [X] produces a value of type ['a]. *) %public %inline terminated(X, closing): x = X; closing { x } (* [delimited(opening, X, closing)] recognizes the sequence [opening X closing]. It passes on the value produced by [X], so that it produces a value of type ['a] if [X] produces a value of type ['a]. *) %public %inline delimited(opening, X, closing): opening; x = X; closing { x } (* ------------------------------------------------------------------------- *) (* Lists. *) (* [list(X)] recognizes a possibly empty list of [X]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public list(X): /* nothing */ { [] } | x = X; xs = list(X) { x :: xs } (* [nonempty_list(X)] recognizes a nonempty list of [X]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public nonempty_list(X): x = X { [ x ] } | x = X; xs = nonempty_list(X) { x :: xs } (* [separated_list(separator, X)] recognizes a possibly empty list of [X]'s, separated with [separator]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public %inline separated_list(separator, X): xs = loption(separated_nonempty_list(separator, X)) { xs } (* [separated_nonempty_list(separator, X)] recognizes a nonempty list of [X]'s, separated with [separator]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public separated_nonempty_list(separator, X): x = X { [ x ] } | x = X; separator; xs = separated_nonempty_list(separator, X) { x :: xs } %% menhir-20151112/src/keyword.ml0000644000175000017500000000400712621170073014775 0ustar mehdimehdi(* This module provides some type and function definitions that help deal with the keywords that we recognize within semantic actions. *) (* ------------------------------------------------------------------------- *) (* Types. *) (* The user can request position information either at type [int] (a simple offset) or at type [Lexing.position]. *) type flavor = | FlavorOffset | FlavorPosition (* The user can request position information about the $start or $end of a symbol. Also, $symbolstart requests the computation of the start position of the first nonempty element in a production. *) type where = | WhereSymbolStart | WhereStart | WhereEnd (* The user can request position information about a production's left-hand side or about one of the symbols in its right-hand side, which he can refer to by position or by name. *) type subject = | Before | Left | RightNamed of string (* Keywords inside semantic actions. They allow access to semantic values or to position information. *) type keyword = | Position of subject * where * flavor | SyntaxError (* ------------------------------------------------------------------------- *) (* These auxiliary functions help map a [Position] keyword to the name of the variable that the keyword is replaced with. *) let where = function | WhereSymbolStart -> "symbolstart" | WhereStart -> "start" | WhereEnd -> "end" let subject = function | Before -> "__0_" | Left -> "" | RightNamed id -> Printf.sprintf "_%s_" id let flavor = function | FlavorPosition -> "pos" | FlavorOffset -> "ofs" let posvar s w f = Printf.sprintf "_%s%s%s" (where w) (flavor f) (subject s) (* ------------------------------------------------------------------------- *) (* Sets of keywords. *) module KeywordSet = struct include Set.Make (struct type t = keyword let compare = compare end) let map f keywords = fold (fun keyword accu -> add (f keyword) accu ) keywords empty end menhir-20151112/src/slr.mli0000644000175000017500000000057312621170073014266 0ustar mehdimehdi(* This module extends the LR(0) automaton with lookahead information in order to construct an SLR(1) automaton. The lookahead information is obtained by considering the FOLLOW sets. *) (* This construction is not used by Menhir, but can be used to check whether the grammar is in the class SLR(1). This check is performed when the log level [lg] is at least 1. *) menhir-20151112/src/LowIntegerPriorityQueue.mli0000644000175000017500000000173512621170073020315 0ustar mehdimehdi(** This module implements a simple-minded priority queue, under the assumption that priorities are low nonnegative integers. *) (** The type of priority queues. *) type 'a t (** [create default] creates an empty priority queue. The [default] value is used to fill empty physical slots, but is otherwise irrelevant. *) val create: 'a -> 'a t (** [add q x p] inserts the element [x], with priority [p], into the queue [q]. *) val add: 'a t -> 'a -> int -> unit (** [remove q] extracts out of [q] and returns an element with minimum priority. *) val remove: 'a t -> 'a option (** [is_empty q] tests whether the queue [q] is empty. *) val is_empty: 'a t -> bool (** [cardinal q] returns the number of elements in the queue [q]. *) val cardinal: 'a t -> int (** [repeat q f] repeatedly extracts an element with minimum priority out of [q] and passes it to [f] (which may insert new elements into [q]), until [q] is exhausted. *) val repeat: 'a t -> ('a -> unit) -> unit menhir-20151112/src/gSet.ml0000644000175000017500000000522412621170073014215 0ustar mehdimehdi(* This is a stripped down version of [GSet] that describes both [Patricia] and [CompressedBitSet]. The full version of [GSet] is in [AlphaLib]. *) module type S = sig (* Elements are assumed to have a natural total order. *) type element (* Sets. *) type t (* The empty set. *) val empty: t (* [is_empty s] tells whether [s] is the empty set. *) val is_empty: t -> bool (* [singleton x] returns a singleton set containing [x] as its only element. *) val singleton: element -> t (* [is_singleton s] tests whether [s] is a singleton set. *) val is_singleton: t -> bool (* [cardinal s] returns the cardinal of [s]. *) val cardinal: t -> int (* [choose s] returns an arbitrarily chosen element of [s], if [s] is nonempty, and raises [Not_found] otherwise. *) val choose: t -> element (* [mem x s] returns [true] if and only if [x] appears in the set [s]. *) val mem: element -> t -> bool (* [add x s] returns a set whose elements are all elements of [s], plus [x]. *) val add: element -> t -> t (* [remove x s] returns a set whose elements are all elements of [s], except [x]. *) val remove: element -> t -> t (* [union s1 s2] returns the union of the sets [s1] and [s2]. *) val union: t -> t -> t (* [inter s t] returns the set intersection of [s] and [t], that is, $s\cap t$. *) val inter: t -> t -> t (* [disjoint s1 s2] returns [true] if and only if the sets [s1] and [s2] are disjoint, i.e. iff their intersection is empty. *) val disjoint: t -> t -> bool (* [iter f s] invokes [f x], in turn, for each element [x] of the set [s]. Elements are presented to [f] in increasing order. *) val iter: (element -> unit) -> t -> unit (* [fold f s seed] invokes [f x accu], in turn, for each element [x] of the set [s]. Elements are presented to [f] in increasing order. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. In other words, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then [fold f s seed] computes $([f]\,x_n\,\ldots\,([f]\,x_2\,([f]\,x_1\,[seed]))\ldots)$. *) val fold: (element -> 'b -> 'b) -> t -> 'b -> 'b (* [elements s] is a list of all elements in the set [s]. *) val elements: t -> element list (* [compare] is an ordering over sets. *) val compare: t -> t -> int (* [equal] implements equality over sets. *) val equal: t -> t -> bool (* [subset] implements the subset predicate over sets. *) val subset: (t -> t -> bool) end menhir-20151112/src/lr1partial.ml0000644000175000017500000001443112621170073015366 0ustar mehdimehdiopen Grammar exception Oops module Run (X : sig (* A restricted set of tokens of interest. *) val tokens: TerminalSet.t (* A state of the (merged) LR(1) automaton that we're trying to simulate. *) val goal: Lr1.node end) = struct (* First, let's restrict our interest to the nodes of the merged LR(1) automaton that can reach the goal node. Some experiments show that this can involve one tenth to one half of all nodes. This optimization seems minor, but is easy to implement. *) let relevant = Lr1.reverse_dfs X.goal (* Second, all of the states that we shall consider are restricted to the set of tokens of interest. This is an important idea: by abstracting away some information, we make the construction much faster. *) let restrict = Lr0.restrict X.tokens (* Constructing the automaton. The automaton is represented as a graph. States are never merged -- this is a canonical LR(1) construction! As we go, we record the correspondence between nodes in this automaton and nodes in the merged LR(1) automaton. This allows us to tell when we have reached the desired place. This also allows us not to follow transitions that have already been eliminated, in the merged automaton, via resolution of shift/reduce conflicts. Whenever we follow a transition in the canonical LR(1) automaton, we check that the corresponding transition is legal in the merged LR(1) automaton. The automaton is explored breadth-first and shortest paths from every node to one of the start nodes are recorded. *) type node = { state: Lr0.lr1state; ancestor: (Symbol.t * node) option; shadow: Lr1.node; } (* A queue of pending nodes, whose successors should be explored. *) let queue : node Queue.t = Queue.create() (* Mapping of LR(0) state numbers to lists of nodes. *) let map : node list array = Array.make Lr0.n [] (* Exploring a state. This creates a new node, if necessary, and enqueues it for further exploration. *) exception Goal of node * Terminal.t let explore ancestor shadow (state : Lr0.lr1state) : unit = (* Find all existing nodes that share the same LR(0) core. *) let k = Lr0.core state in assert (k < Lr0.n); let similar = map.(k) in (* Check whether one of these nodes coincides with the candidate new node. If so, stop. This check requires comparing not only the states of the partial, canonical automaton, but also their shadows in the full, merged automaton. This is because a single state of the canonical automaton may be reached along several different paths, leading to distinct shadows in the merged automaton, and we must explore all of these paths in order to ensure that we eventually find a goal node. *) if not (List.exists (fun node -> Lr0.equal state node.state && shadow == node.shadow ) similar) then begin (* Otherwise, create a new node. *) let node = { state = state; ancestor = ancestor; shadow = shadow; } in map.(k) <- node :: similar; Queue.add node queue; (* Check whether this is a goal node. A node [N] is a goal node if (i) [N] has a conflict involving one of the tokens of interest and (ii) [N] corresponds to the goal node, that is, the path that leads to [N] in the canonical LR(1) automaton leads to the goal node in the merged LR(1) automaton. Note that these conditions do not uniquely define [N]. *) if shadow == X.goal then let can_reduce = ref TerminalSet.empty in let reductions1 : Production.index list TerminalMap.t = Lr1.reductions shadow in List.iter (fun (toks, prod) -> TerminalSet.iter (fun tok -> (* We are looking at a [(tok, prod)] pair -- a reduction in the canonical automaton state. *) (* Check that this reduction, which exists in the canonical automaton state, also exists in the merged automaton -- that is, it wasn't suppressed by conflict resolution. *) if List.mem prod (TerminalMap.lookup tok reductions1) then try let (_ : Lr1.node) = SymbolMap.find (Symbol.T tok) (Lr1.transitions shadow) in (* Shift/reduce conflict. *) raise (Goal (node, tok)) with Not_found -> let toks = !can_reduce in (* We rely on the property that [TerminalSet.add tok toks] preserves physical equality when [tok] is a member of [toks]. *) let toks' = TerminalSet.add tok toks in if toks == toks' then (* Reduce/reduce conflict. *) raise (Goal (node, tok)) else (* No conflict so far. *) can_reduce := toks' ) toks ) (Lr0.reductions state) end (* Populate the queue with the start nodes. Until we find a goal node, take a node out the queue, construct the nodes that correspond to its successors, and enqueue them. *) let goal, token = try ProductionMap.iter (fun (prod : Production.index) (k : Lr0.node) -> let shadow = try ProductionMap.find prod Lr1.entry with Not_found -> assert false in if relevant shadow then explore None shadow (restrict (Lr0.start k)) ) Lr0.entry; Misc.qiter (fun node -> SymbolMap.iter (fun symbol state -> try let shadow = SymbolMap.find symbol (Lr1.transitions node.shadow) in if relevant shadow then explore (Some (symbol, node)) shadow (restrict state) with Not_found -> (* No shadow. This can happen if a shift/reduce conflict was resolved in favor in reduce. Ignore that transition. *) () ) (Lr0.transitions node.state) ) queue; (* We didn't find a goal node. This shouldn't happen! If the goal node in the merged LR(1) automaton has a conflict, then there should exist a node with a conflict in the canonical automaton as well. Otherwise, Pager's construction is incorrect. *) raise Oops with Goal (node, tok) -> node, tok (* Query the goal node that was found about the shortest path from it to one of the entry nodes. *) let source, path = let rec follow path node = match node.ancestor with | None -> Lr1.start2item node.shadow, Array.of_list path | Some (symbol, node) -> follow (symbol :: path) node in follow [] goal let goal = Lr0.export goal.state end menhir-20151112/src/version.ml0000644000175000017500000000003112621170074014770 0ustar mehdimehdilet version = "20151112" menhir-20151112/src/lookahead.mli0000644000175000017500000000077512621170073015421 0ustar mehdimehdi(* These are the operations required of lookahead sets during a closure computation. This signature is exploited by the functor [Item.Closure]. *) module type S = sig (* The type of lookahead sets. *) type t (* The empty lookahead set. Redundant with the following, but convenient. *) val empty: t (* A concrete, constant set of terminal symbols. *) val constant: Grammar.TerminalSet.t -> t (* [union s1 s2] returns the union of [s1] and [s2]. *) val union: t -> t -> t end menhir-20151112/src/Driver.mli0000644000175000017500000000033712621170073014717 0ustar mehdimehdi(* The module [Driver] serves to offer a unified API to the parser, which could be produced by either ocamlyacc or Menhir. *) val grammar : (Lexing.lexbuf -> Parser.token) -> Lexing.lexbuf -> ConcreteSyntax.grammar menhir-20151112/src/tableBackend.mli0000644000175000017500000000015112621170073016015 0ustar mehdimehdi(* The (table-based) code generator. *) module Run (T : sig end) : sig val program: IL.program end menhir-20151112/src/Printers.ml0000644000175000017500000000656012621170074015126 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) module Make (I : IncrementalEngine.EVERYTHING) (User : sig val print: string -> unit val print_symbol: I.xsymbol -> unit val print_element: (I.element -> unit) option end) = struct let arrow = " -> " let dot = "." let space = " " let newline = "\n" open User open I (* Printing a list of symbols. An optional dot is printed at offset [i] into the list [symbols], if this offset lies between [0] and the length of the list (included). *) let rec print_symbols i symbols = if i = 0 then begin print dot; print space; print_symbols (-1) symbols end else begin match symbols with | [] -> () | symbol :: symbols -> print_symbol symbol; print space; print_symbols (i - 1) symbols end (* Printing an element as a symbol. *) let print_element_as_symbol element = match element with | Element (s, _, _, _) -> print_symbol (X (incoming_symbol s)) (* Some of the functions that follow need an element printer. They use [print_element] if provided by the user; otherwise they use [print_element_as_symbol]. *) let print_element = match print_element with | Some print_element -> print_element | None -> print_element_as_symbol (* Printing a stack as a list of symbols. *) let print_stack stack = General.foldr (fun element () -> print_element element; print space ) stack (); print newline (* Printing an item. *) let print_item (prod, i) = print_symbol (lhs prod); print arrow; print_symbols i (rhs prod); print newline (* Printing a list of symbols (public version). *) let print_symbols symbols = print_symbols (-1) symbols (* Printing a production (without a dot). *) let print_production prod = print_item (prod, -1) (* Printing the current LR(1) state. *) let print_current_state env = print "Current LR(1) state: "; match Lazy.force (stack env) with | General.Nil -> print ""; print newline | General.Cons (Element (current, _, _, _), _) -> print (string_of_int (Obj.magic current)); (* TEMPORARY safe conversion needed *) print newline; List.iter print_item (items current) let print_env env = print_stack (stack env); print_current_state env; print newline end menhir-20151112/src/astar.ml0000755000175000017500000002235412621170073014433 0ustar mehdimehdi(* This module implements A* search, following Hart, Nilsson, and Raphael (1968). To each visited graph node, the algorithm associates an internal record, carrying various information. For this reason, the algorithm's space complexity is, in the worst case, linear in the size of the graph. The mapping of nodes to internal records is implemented via a hash table, while the converse mapping is direct (via a record field). Nodes that remain to be examined are kept in a priority queue, where the priority of a node is the cost of the shortest known path from the start node to it plus the estimated cost of a path from this node to a goal node. (Lower priority nodes are considered first). It is the use of the second summand that makes A* more efficient than Dijkstra's standard algorithm for finding shortest paths in an arbitrary graph. In fact, when [G.estimate] is the constant zero function, A* coincides with Dijkstra's algorithm. One should note that A* is faster than Dijkstra's algorithm only when a path to some goal node exists. Otherwise, both algorithms explore the entire graph, and have similar time requirements. The priority queue is implemented as an array of doubly linked lists. *) module Make (G : sig (* Graph nodes. *) type node include Hashtbl.HashedType with type t := node (* Edge labels. *) type label (* The source node(s). *) val sources: (node -> unit) -> unit (* [successors n f] presents each of [n]'s successors, in an arbitrary order, to [f], together with the cost of the edge that was followed. *) val successors: node -> (label -> int -> node -> unit) -> unit (* An estimate of the cost of the shortest path from the supplied node to some goal node. For algorithms such as A* and IDA* to find shortest paths, this estimate must be a correct under-approximation of the actual cost. *) val estimate: node -> int end) = struct type cost = int (* Nodes with low priorities are dealt with first. *) type priority = cost (* Paths back to a source (visible by the user). *) type path = | Edge of G.label * path | Source of G.node let rec follow labels path = match path with | Source node -> node, labels | Edge (label, path) -> follow (label :: labels) path let reverse path = follow [] path type inode = { (* Graph node associated with this internal record. *) this: G.node; (* Cost of the best known path from a source node to this node. (ghat) *) mutable cost: cost; (* Estimated cost of the best path from this node to a goal node. (hhat) *) estimate: cost; (* Best known path from a source node to this node. *) mutable path: path; (* Previous node on doubly linked priority list *) mutable prev: inode; (* Next node on doubly linked priority list *) mutable next: inode; (* The node's priority, if the node is in the queue; -1 otherwise *) mutable priority: priority; } (* This auxiliary module maintains a mapping of graph nodes to internal records. *) module M : sig (* Adds a binding to the mapping. *) val add: G.node -> inode -> unit (* Retrieves the internal record for this node. Raises [Not_found] no such record exists. *) val get: G.node -> inode end = struct module H = Hashtbl.Make(struct include G type t = node end) let t = H.create 100003 let add node inode = H.add t node inode let get node = H.find t node end (* This auxiliary module maintains a priority queue of internal records. *) module P : sig (* Adds this node to the queue. *) val add: inode -> priority -> unit (* Adds this node to the queue, or changes its priority, if it already was in the queue. It is assumed, in the second case, that the priority can only decrease. *) val add_or_decrease: inode -> priority -> unit (* Retrieve a node with lowest priority of the queue. *) val get: unit -> inode option end = struct module InfiniteArray = MenhirLib.InfiniteArray (* Array of pointers to the doubly linked lists, indexed by priorities. There is no a priori bound on the size of this array -- its size is increased if needed. It is up to the user to use a graph where paths have reasonable lengths. *) let a = InfiniteArray.make None (* Index of lowest nonempty list, if there is one; or lower (sub-optimal, but safe). If the queue is empty, [best] is arbitrary. *) let best = ref 0 (* Current number of elements in the queue. Used in [get] to stop the search for a nonempty bucket. *) let cardinal = ref 0 (* Adjust node's priority and insert into doubly linked list. *) let add inode priority = assert (0 <= priority); cardinal := !cardinal + 1; inode.priority <- priority; match InfiniteArray.get a priority with | None -> InfiniteArray.set a priority (Some inode); (* Decrease [best], if necessary, so as not to miss the new element. In the special case of A*, this never happens. *) assert (!best <= priority); (* if priority < !best then best := priority *) | Some inode' -> inode.next <- inode'; inode.prev <- inode'.prev; inode'.prev.next <- inode; inode'.prev <- inode (* Takes a node off its doubly linked list. Does not adjust [best], as this is not necessary in order to preserve the invariant. *) let remove inode = cardinal := !cardinal - 1; if inode.next == inode then InfiniteArray.set a inode.priority None else begin InfiniteArray.set a inode.priority (Some inode.next); inode.next.prev <- inode.prev; inode.prev.next <- inode.next; inode.next <- inode; inode.prev <- inode end; inode.priority <- -1 let rec get () = if !cardinal = 0 then None else get_nonempty() and get_nonempty () = (* Look for next nonempty bucket. We know there is one. This may seem inefficient, because it is a linear search. However, in A*, [best] never decreases, so the total cost of this loop is the maximum priority ever used. *) match InfiniteArray.get a !best with | None -> best := !best + 1; get_nonempty() | Some inode as result -> remove inode; result let add_or_decrease inode priority = if inode.priority >= 0 then remove inode; add inode priority end (* Initialization. *) let estimate node = let e = G.estimate node in assert (0 <= e); (* failure means user error *) e let () = G.sources (fun node -> let rec inode = { this = node; cost = 0; estimate = estimate node; path = Source node; prev = inode; next = inode; priority = -1 } in M.add node inode; P.add inode inode.estimate ) (* Access to the search results (after the search is over). *) let distance node = try (M.get node).cost with Not_found -> max_int let path node = (M.get node).path (* let [Not_found] escape if no path was found *) (* Search. *) let rec search f = (* Pick the open node that currently has lowest fhat, that is, lowest estimated distance to a goal node. *) match P.get() with | None -> (* Finished. *) distance, path | Some inode -> let node = inode.this in (* Let the user know about this newly discovered node. *) f (node, inode.path); (* Otherwise, examine its successors. *) G.successors node (fun label edge_cost son -> assert (0 <= edge_cost); (* failure means user error *) (* Determine the cost of the best known path from the start node, through this node, to this son. *) let new_cost = inode.cost + edge_cost in assert (0 <= new_cost); (* failure means overflow *) try let ison = M.get son in if new_cost < ison.cost then begin (* This son has been visited before, but this new path to it is shorter. If it was already open and waiting in the priority queue, increase its priority; otherwise, mark it as open and insert it into the queue. *) let new_fhat = new_cost + ison.estimate in assert (0 <= new_fhat); (* failure means overflow *) P.add_or_decrease ison new_fhat; ison.cost <- new_cost; ison.path <- Edge (label, inode.path) end with Not_found -> (* This son was never visited before. Allocate a new status record for it and mark it as open. *) let rec ison = { this = son; cost = new_cost; estimate = estimate son; path = Edge (label, inode.path); prev = ison; next = ison; priority = -1 } in M.add son ison; let fhat = new_cost + ison.estimate in assert (0 <= fhat); (* failure means overflow *) P.add ison fhat ); search f end menhir-20151112/src/printer.mli0000644000175000017500000000131512621170073015144 0ustar mehdimehdi(* A pretty-printer for [IL]. *) module Make (X : sig (* This is the channel that is being written to. *) val f: out_channel (* This controls the way we print Objective Caml stretches (types and semantic actions). We either surround them with #line directives (for better error reports if the generated code is ill-typed) or don't (for better readability). The value is either [None] -- do not provide #line directives -- or [Some filename] -- do provide them. [filename] is the name of the file that is being written. *) val locate_stretches: string option end) : sig val program: IL.program -> unit val expr: IL.expr -> unit val interface: IL.interface -> unit end menhir-20151112/src/Engine.ml0000644000175000017500000006710112621170074014523 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) open EngineTypes (* The LR parsing engine. *) (* This module is used: - at compile time, if so requested by the user, via the --interpret options; - at run time, in the table-based back-end. *) module Make (T : TABLE) = struct (* This propagates type and exception definitions. *) include T type env = (state, semantic_value, token) EngineTypes.env (* --------------------------------------------------------------------------- *) (* The type [checkpoint] represents an intermediate or final result of the parser. See [EngineTypes]. *) (* The type [checkpoint] is presented to the user as a private type (see [IncrementalEngine]). This prevents the user from manufacturing checkpoints (i.e., continuations) that do not make sense. (Such continuations could potentially violate the LR invariant and lead to crashes.) *) type 'a checkpoint = | InputNeeded of env | Shifting of env * env * bool | AboutToReduce of env * production | HandlingError of env | Accepted of 'a | Rejected (* --------------------------------------------------------------------------- *) (* In the code-based back-end, the [run] function is sometimes responsible for pushing a new cell on the stack. This is motivated by code sharing concerns. In this interpreter, there is no such concern; [run]'s caller is always responsible for updating the stack. *) (* In the code-based back-end, there is a [run] function for each state [s]. This function can behave in two slightly different ways, depending on when it is invoked, or (equivalently) depending on [s]. If [run] is invoked after shifting a terminal symbol (or, equivalently, if [s] has a terminal incoming symbol), then [run] discards a token, unless [s] has a default reduction on [#]. (Indeed, in that case, requesting the next token might drive the lexer off the end of the input stream.) If, on the other hand, [run] is invoked after performing a goto transition, or invoked directly by an entry point, then there is nothing to discard. These two cases are reflected in [CodeBackend.gettoken]. Here, the code is structured in a slightly different way. It is up to the caller of [run] to indicate whether to discard a token, via the parameter [please_discard]. This flag is set when [s] is being entered by shifting a terminal symbol and [s] does not have a default reduction on [#]. *) (* The following recursive group of functions are tail recursive, produce a checkpoint of type [semantic_value checkpoint], and cannot raise an exception. A semantic action can raise [Error], but this exception is immediately caught within [reduce]. *) let rec run env please_discard : semantic_value checkpoint = (* Log the fact that we just entered this state. *) if log then Log.state env.current; (* If [please_discard] is set, we discard the current lookahead token and fetch the next one. In order to request a token from the user, we return an [InputNeeded] continuation, which, when invoked by the user, will take us to [discard]. If [please_discard] is not set, we skip this step and jump directly to [check_for_default_reduction]. *) if please_discard then InputNeeded env else check_for_default_reduction env (* [discard env triple] stores [triple] into [env], overwriting the previous token. It is invoked by [offer], which itself is invoked by the user in response to an [InputNeeded] checkpoint. *) and discard env triple = if log then begin let (token, startp, endp) = triple in Log.lookahead_token (T.token2terminal token) startp endp end; let env = { env with error = false; triple } in check_for_default_reduction env and check_for_default_reduction env = (* Examine what situation we are in. This case analysis is analogous to that performed in [CodeBackend.gettoken], in the sub-case where we do not have a terminal incoming symbol. *) T.default_reduction env.current announce_reduce (* there is a default reduction; perform it *) check_for_error_token (* there is none; continue below *) env and check_for_error_token env = (* There is no default reduction. Consult the current lookahead token so as to determine which action should be taken. *) (* Peeking at the first input token, without taking it off the input stream, is done by reading [env.triple]. We are careful to first check [env.error]. *) (* Note that, if [please_discard] was true, then we have just called [discard], so the lookahead token cannot be [error]. *) (* Returning [HandlingError env] is equivalent to calling [error env] directly, except it allows the user to regain control. *) if env.error then begin if log then Log.resuming_error_handling(); HandlingError env end else let (token, _, _) = env.triple in (* We consult the two-dimensional action table, indexed by the current state and the current lookahead token, in order to determine which action should be taken. *) T.action env.current (* determines a row *) (T.token2terminal token) (* determines a column *) (T.token2value token) shift (* shift continuation *) announce_reduce (* reduce continuation *) initiate (* failure continuation *) env (* --------------------------------------------------------------------------- *) (* This function takes care of shift transitions along a terminal symbol. (Goto transitions are taken care of within [reduce] below.) The symbol can be either an actual token or the [error] pseudo-token. *) (* Here, the lookahead token CAN be [error]. *) and shift env (please_discard : bool) (terminal : terminal) (value : semantic_value) (s' : state) = (* Log the transition. *) if log then Log.shift terminal s'; (* Push a new cell onto the stack, containing the identity of the state that we are leaving. *) let (_, startp, endp) = env.triple in let stack = { state = env.current; semv = value; startp; endp; next = env.stack; } in (* Switch to state [s']. *) let new_env = { env with stack; current = s' } in (* Expose the transition to the user. (In principle, we have a choice between exposing the transition before we take it, after we take it, or at some point in between. This affects the number and type of the parameters carried by [Shifting]. Here, we choose to expose the transition after we take it; this allows [Shifting] to carry only three parameters, whose meaning is simple.) *) Shifting (env, new_env, please_discard) (* --------------------------------------------------------------------------- *) (* The function [announce_reduce] stops the parser and returns a checkpoint which allows the parser to be resumed by calling [reduce]. *) (* Only ordinary productions are exposed to the user. Start productions are not exposed to the user. Reducing a start production simply leads to the successful termination of the parser. *) and announce_reduce env (prod : production) = if T.is_start prod then accept env prod else AboutToReduce (env, prod) (* The function [reduce] takes care of reductions. It is invoked by [resume] after an [AboutToReduce] event has been produced. *) (* Here, the lookahead token CAN be [error]. *) (* The production [prod] CANNOT be a start production. *) and reduce env (prod : production) = (* Log a reduction event. *) if log then Log.reduce_or_accept prod; (* Invoke the semantic action. The semantic action is responsible for truncating the stack and pushing a new cell onto the stack, which contains a new semantic value. It can raise [Error]. *) (* If the semantic action terminates normally, it returns a new stack, which becomes the current stack. *) (* If the semantic action raises [Error], we catch it and initiate error handling. *) (* This [match/with/exception] construct requires OCaml 4.02. *) match T.semantic_action prod env with | stack -> (* By our convention, the semantic action has produced an updated stack. The state now found in the top stack cell is the return state. *) (* Perform a goto transition. The target state is determined by consulting the goto table at the return state and at production [prod]. *) let current = T.goto stack.state prod in let env = { env with stack; current } in run env false | exception Error -> initiate env and accept env prod = (* Log an accept event. *) if log then Log.reduce_or_accept prod; (* Extract the semantic value out of the stack. *) let v = env.stack.semv in (* Finish. *) Accepted v (* --------------------------------------------------------------------------- *) (* The following functions deal with errors. *) (* [initiate] initiates or resumes error handling. *) (* Here, the lookahead token CAN be [error]. *) and initiate env = if log then Log.initiating_error_handling(); let env = { env with error = true } in HandlingError env (* [error] handles errors. *) and error env = assert env.error; (* Consult the column associated with the [error] pseudo-token in the action table. *) T.action env.current (* determines a row *) T.error_terminal (* determines a column *) T.error_value error_shift (* shift continuation *) error_reduce (* reduce continuation *) error_fail (* failure continuation *) env and error_shift env please_discard terminal value s' = (* Here, [terminal] is [T.error_terminal], and [value] is [T.error_value]. *) assert (terminal = T.error_terminal && value = T.error_value); (* This state is capable of shifting the [error] token. *) if log then Log.handling_error env.current; shift env please_discard terminal value s' and error_reduce env prod = (* This state is capable of performing a reduction on [error]. *) if log then Log.handling_error env.current; reduce env prod (* Intentionally calling [reduce] instead of [announce_reduce]. It does not seem very useful, and it could be confusing, to expose the reduction steps taken during error handling. *) and error_fail env = (* This state is unable to handle errors. Attempt to pop a stack cell. *) let cell = env.stack in let next = cell.next in if next == cell then (* The stack is empty. Die. *) Rejected else begin (* The stack is nonempty. Pop a cell, updating the current state with that found in the popped cell, and try again. *) let env = { env with stack = next; current = cell.state } in HandlingError env end (* End of the nest of tail recursive functions. *) (* --------------------------------------------------------------------------- *) (* --------------------------------------------------------------------------- *) (* The incremental interface. See [EngineTypes]. *) (* [start s] begins the parsing process. *) let start (s : state) (initial : Lexing.position) : semantic_value checkpoint = (* Build an empty stack. This is a dummy cell, which is its own successor. Its [next] field WILL be accessed by [error_fail] if an error occurs and is propagated all the way until the stack is empty. Its [endp] field WILL be accessed (by a semantic action) if an epsilon production is reduced when the stack is empty. *) let rec empty = { state = s; (* dummy *) semv = T.error_value; (* dummy *) startp = initial; (* dummy *) endp = initial; next = empty; } in (* Build an initial environment. *) (* Unfortunately, there is no type-safe way of constructing a dummy token. Tokens carry semantic values, which in general we cannot manufacture. This instance of [Obj.magic] could be avoided by adopting a different representation (e.g., no [env.error] field, and an option in the first component of [env.triple]), but I like this representation better. *) let dummy_token = Obj.magic () in let env = { error = false; triple = (dummy_token, initial, initial); (* dummy *) stack = empty; current = s; } in (* Begin parsing. *) (* The parameter [please_discard] here is [true], which means we know that we must read at least one token. This claim relies on the fact that we have ruled out the two special cases where a start symbol recognizes the empty language or the singleton language {epsilon}. *) run env true (* [offer checkpoint triple] is invoked by the user in response to a checkpoint of the form [InputNeeded env]. It checks that [checkpoint] is indeed of this form, and invokes [discard]. *) (* [resume checkpoint] is invoked by the user in response to a checkpoint of the form [AboutToReduce (env, prod)] or [HandlingError env]. It checks that [checkpoint] is indeed of this form, and invokes [reduce] or [error], as appropriate. *) (* In reality, [offer] and [resume] accept an argument of type [semantic_value checkpoint] and produce a checkpoint of the same type. The choice of [semantic_value] is forced by the fact that this is the parameter of the checkpoint [Accepted]. *) (* We change this as follows. *) (* We change the argument and result type of [offer] and [resume] from [semantic_value checkpoint] to ['a checkpoint]. This is safe, in this case, because we give the user access to values of type [t checkpoint] only if [t] is indeed the type of the eventual semantic value for this run. (More precisely, by examining the signatures [INCREMENTAL_ENGINE] and [INCREMENTAL_ENGINE_START], one finds that the user can build a value of type ['a checkpoint] only if ['a] is [semantic_value]. The table back-end goes further than this and produces versions of [start] composed with a suitable cast, which give the user access to a value of type [t checkpoint] where [t] is the type of the start symbol.) *) let offer : 'a . 'a checkpoint -> token * Lexing.position * Lexing.position -> 'a checkpoint = function | InputNeeded env -> Obj.magic discard env | _ -> raise (Invalid_argument "offer expects InputNeeded") let resume : 'a . 'a checkpoint -> 'a checkpoint = function | HandlingError env -> Obj.magic error env | Shifting (_, env, please_discard) -> Obj.magic run env please_discard | AboutToReduce (env, prod) -> Obj.magic reduce env prod | _ -> raise (Invalid_argument "resume expects HandlingError | AboutToReduce") (* --------------------------------------------------------------------------- *) (* --------------------------------------------------------------------------- *) (* The traditional interface. See [EngineTypes]. *) (* --------------------------------------------------------------------------- *) (* Wrapping a lexer and lexbuf as a token supplier. *) type supplier = unit -> token * Lexing.position * Lexing.position let lexer_lexbuf_to_supplier (lexer : Lexing.lexbuf -> token) (lexbuf : Lexing.lexbuf) : supplier = fun () -> let token = lexer lexbuf in let startp = lexbuf.Lexing.lex_start_p and endp = lexbuf.Lexing.lex_curr_p in token, startp, endp (* --------------------------------------------------------------------------- *) (* The main loop repeatedly handles intermediate checkpoints, until a final checkpoint is obtained. This allows implementing the monolithic interface ([entry]) in terms of the incremental interface ([start], [offer], [handle], [reduce]). *) (* By convention, acceptance is reported by returning a semantic value, whereas rejection is reported by raising [Error]. *) (* [loop] is polymorphic in ['a]. No cheating is involved in achieving this. All of the cheating resides in the types assigned to [offer] and [handle] above. *) let rec loop : 'a . supplier -> 'a checkpoint -> 'a = fun read checkpoint -> match checkpoint with | InputNeeded _ -> (* The parser needs a token. Request one from the lexer, and offer it to the parser, which will produce a new checkpoint. Then, repeat. *) let triple = read() in let checkpoint = offer checkpoint triple in loop read checkpoint | Shifting _ | AboutToReduce _ | HandlingError _ -> (* The parser has suspended itself, but does not need new input. Just resume the parser. Then, repeat. *) let checkpoint = resume checkpoint in loop read checkpoint | Accepted v -> (* The parser has succeeded and produced a semantic value. Return this semantic value to the user. *) v | Rejected -> (* The parser rejects this input. Raise an exception. *) raise Error let entry (s : state) lexer lexbuf : semantic_value = let initial = lexbuf.Lexing.lex_curr_p in loop (lexer_lexbuf_to_supplier lexer lexbuf) (start s initial) (* --------------------------------------------------------------------------- *) (* [loop_handle] stops if it encounters an error, and at this point, invokes its failure continuation, without letting Menhir do its own traditional error-handling (which involves popping the stack, etc.). *) let rec loop_handle succeed fail read checkpoint = match checkpoint with | InputNeeded _ -> let triple = read() in let checkpoint = offer checkpoint triple in loop_handle succeed fail read checkpoint | Shifting _ | AboutToReduce _ -> let checkpoint = resume checkpoint in loop_handle succeed fail read checkpoint | HandlingError _ | Rejected -> (* The parser has detected an error. Invoke the failure continuation. *) fail checkpoint | Accepted v -> (* The parser has succeeded and produced a semantic value. Invoke the success continuation. *) succeed v (* --------------------------------------------------------------------------- *) (* [loop_handle_undo] is analogous to [loop_handle], except it passes a pair of checkpoints to the failure continuation. The first (and oldest) checkpoint is the last [InputNeeded] checkpoint that was encountered before the error was detected. The second (and newest) checkpoint is where the error was detected, as in [loop_handle]. Going back to the first checkpoint can be thought of as undoing any reductions that were performed after seeing the problematic token. (These reductions must be default reductions or spurious reductions.) *) let rec loop_handle_undo succeed fail read (inputneeded, checkpoint) = match checkpoint with | InputNeeded _ -> (* Update the last recorded [InputNeeded] checkpoint. *) let inputneeded = checkpoint in let triple = read() in let checkpoint = offer checkpoint triple in loop_handle_undo succeed fail read (inputneeded, checkpoint) | Shifting _ | AboutToReduce _ -> let checkpoint = resume checkpoint in loop_handle_undo succeed fail read (inputneeded, checkpoint) | HandlingError _ | Rejected -> fail inputneeded checkpoint | Accepted v -> succeed v (* For simplicity, we publish a version of [loop_handle_undo] that takes a single checkpoint as an argument, instead of a pair of checkpoints. We check that the argument is [InputNeeded _], and duplicate it. *) (* The parser cannot accept or reject before it asks for the very first character of input. (Indeed, we statically reject a symbol that generates the empty language or the singleton language {epsilon}.) So, the [start] checkpoint must match [InputNeeded _]. Hence, it is permitted to call [loop_handle_undo] with a [start] checkpoint. *) let loop_handle_undo succeed fail read checkpoint = assert (match checkpoint with InputNeeded _ -> true | _ -> false); loop_handle_undo succeed fail read (checkpoint, checkpoint) (* ------------------------------------------------------------------------ *) (* [loop_test f checkpoint accu] assumes that [checkpoint] has been obtained by submitting a token to the parser. It runs the parser from [checkpoint], through an arbitrary number of reductions, until the parser either accepts this token (i.e., shifts) or rejects it (i.e., signals an error). If the parser decides to shift, then the accumulator is updated by applying the user function [f] to the [env] just before shifting and to the old [accu]. Otherwise, the accumulator is not updated, i.e., [accu] is returned. *) (* This test causes some semantic actions to be run! The semantic actions should be side-effect free, or their side-effects should be harmless. *) let rec loop_test f checkpoint accu = match checkpoint with | Shifting (env, _, _) -> (* The parser is about to shift, which means it is willing to consume the terminal symbol that we have fed it. Update the accumulator with the state just before this transition. *) f env accu | AboutToReduce _ -> (* The parser wishes to reduce. Just follow. *) loop_test f (resume checkpoint) accu | HandlingError _ -> (* The parser fails, which means it rejects the terminal symbol that we have fed it. Do not update the accumulator. *) accu | InputNeeded _ | Accepted _ | Rejected -> (* None of these cases can arise. Indeed, after a token is submitted to it, the parser must shift, reduce, or signal an error, before it can request another token or terminate. *) assert false (* --------------------------------------------------------------------------- *) (* The function [loop_test] can be used, after an error has been detected, to dynamically test which tokens would have been accepted at this point. We provide this test, ready for use. *) (* For completeness, one must undo any spurious reductions before carrying out this test -- that is, one must apply [acceptable] to the FIRST checkpoint that is passed by [loop_handle_undo] to its failure continuation. *) (* This test causes some semantic actions to be run! The semantic actions should be side-effect free, or their side-effects should be harmless. *) (* The position [pos] is used as the start and end positions of the hypothetical token, and may be picked up by the semantic actions. We suggest using the position where the error was detected. *) let acceptable checkpoint token pos = let triple = (token, pos, pos) in let checkpoint = offer checkpoint triple in loop_test (fun _env _accu -> true) checkpoint false (* --------------------------------------------------------------------------- *) (* The type ['a lr1state] describes the (non-initial) states of the LR(1) automaton. The index ['a] represents the type of the semantic value associated with the state's incoming symbol. *) (* The type ['a lr1state] is defined as an alias for [state], which itself is usually defined as [int] (see [TableInterpreter]). So, ['a lr1state] is technically a phantom type, but should really be thought of as a GADT whose data constructors happen to be represented as integers. It is presented to the user as an abstract type (see [IncrementalEngine]). *) type 'a lr1state = state (* --------------------------------------------------------------------------- *) (* Stack inspection. *) (* We offer a read-only view of the parser's state as a stream of elements. Each element contains a pair of a (non-initial) state and a semantic value associated with (the incoming symbol of) this state. Note that the type [element] is an existential type. *) type element = | Element: 'a lr1state * 'a * Lexing.position * Lexing.position -> element open General type stack = element stream (* If [current] is the current state and [cell] is the top stack cell, then [stack cell current] is a view of the parser's state as a stream of elements. *) let rec stack cell current : element stream = lazy ( (* The stack is empty iff the top stack cell is its own successor. In that case, the current state [current] should be an initial state (which has no incoming symbol). We do not allow the user to inspect this state. *) let next = cell.next in if next == cell then Nil else (* Construct an element containing the current state [current] as well as the semantic value contained in the top stack cell. This semantic value is associated with the incoming symbol of this state, so it makes sense to pair them together. The state has type ['a state] and the semantic value has type ['a], for some type ['a]. Here, the OCaml type-checker thinks ['a] is [semantic_value] and considers this code well-typed. Outside, we will use magic to provide the user with a way of inspecting states and recovering the value of ['a]. *) let element = Element ( current, cell.semv, cell.startp, cell.endp ) in Cons (element, stack next cell.state) ) let stack env : element stream = stack env.stack env.current (* --------------------------------------------------------------------------- *) (* Access to the position of the lookahead token. *) let positions { triple = (_, startp, endp); _ } = startp, endp (* --------------------------------------------------------------------------- *) (* Access to information about default reductions. *) (* We can make this a function of states, or a function of environments. For now, the latter appears simpler. *) let has_default_reduction env : bool = T.default_reduction env.current (fun _env _prod -> true) (fun _env -> false) env end menhir-20151112/src/FixSolver.ml0000644000175000017500000000300312621170073015225 0ustar mehdimehdimodule Make (M : Fix.IMPERATIVE_MAPS) (P : sig include Fix.PROPERTY val union: property -> property -> property end) = struct type variable = M.key type property = P.property (* A constraint is represented as a mapping of each variable to an expression, which represents its lower bound. We could represent an expression as a list of constants and variables; we can also represent it as a binary tree, as follows. *) type expression = | EBottom | ECon of property | EVar of variable | EJoin of expression * expression type constraint_ = expression M.t (* Looking up a variable's lower bound. *) let consult (m : constraint_) (x : variable) : expression = try M.find x m with Not_found -> EBottom (* Evaluation of an expression in an environment. *) let rec evaluate get e = match e with | EBottom -> P.bottom | ECon p -> p | EVar x -> get x | EJoin (e1, e2) -> P.union (evaluate get e1) (evaluate get e2) (* Solving a constraint. *) let solve (m : constraint_) : variable -> property = let module F = Fix.Make(M)(P) in F.lfp (fun x get -> evaluate get (consult m x) ) (* The imperative interface. *) let create () = let m = M.create() in let record_ConVar p y = M.add y (EJoin (ECon p, consult m y)) m and record_VarVar x y = M.add y (EJoin (EVar x, consult m y)) m in record_ConVar, record_VarVar, fun () -> solve m end menhir-20151112/src/grammarFunctor.ml0000644000175000017500000011645312621170073016311 0ustar mehdimehdiopen UnparameterizedSyntax open Syntax open Positions module Make (G : sig (* An abstract syntax tree for the grammar. *) val grammar: UnparameterizedSyntax.grammar (* This flag indicates whether it is OK to produce warnings, verbose information, etc., when this functor is invoked. If it is set to [false], then only serious errors can be signaled. *) val verbose: bool end) = struct open G (* ------------------------------------------------------------------------ *) (* Precedence levels for tokens or pseudo-tokens alike. *) module TokPrecedence = struct (* This set records, on a token by token basis, whether the token's precedence level is ever useful. This allows emitting warnings about useless precedence declarations. *) let ever_useful : StringSet.t ref = ref StringSet.empty let use id = ever_useful := StringSet.add id !ever_useful (* This function is invoked when someone wants to consult a token's precedence level. This does not yet mean that this level is useful, though. Indeed, if it is subsequently compared against [UndefinedPrecedence], it will not allow solving a conflict. So, in addition to the desired precedence level, we return a delayed computation which, when evaluated, records that this precedence level was useful. *) let levelip id properties = lazy (use id), properties.tk_precedence let leveli id = let properties = try StringMap.find id grammar.tokens with Not_found -> assert false (* well-formedness check has been performed earlier *) in levelip id properties (* This function prints warnings about useless precedence declarations for terminal symbols (%left, %right, %nonassoc). It should be invoked after only the automaton has been constructed. *) let diagnostics () = StringMap.iter (fun id properties -> if not (StringSet.mem id !ever_useful) then match properties.tk_precedence with | UndefinedPrecedence -> () | PrecedenceLevel (_, _, pos1, pos2) -> Error.grammar_warning (Positions.two pos1 pos2) "the precedence level assigned to %s is never useful." id ) grammar.tokens end (* ------------------------------------------------------------------------ *) (* Nonterminals. *) module Nonterminal = struct type t = int let n2i i = i let compare = (-) (* Determine how many nonterminals we have and build mappings both ways between names and indices. A new nonterminal is created for every start symbol. *) let new_start_nonterminals = StringSet.fold (fun symbol ss -> (symbol ^ "'") :: ss) grammar.start_symbols [] let original_nonterminals = nonterminals grammar let start = List.length new_start_nonterminals let (n : int), (name : string array), (map : int StringMap.t) = Misc.index (new_start_nonterminals @ original_nonterminals) let () = if verbose then Error.logG 1 (fun f -> Printf.fprintf f "Grammar has %d nonterminal symbols, among which %d start symbols.\n" (n - start) start ) let is_start nt = nt < start let print normalize nt = if normalize then Misc.normalize name.(nt) else name.(nt) let lookup name = StringMap.find name map let positions nt = (StringMap.find (print false nt) grammar.rules).positions let iter f = Misc.iteri n f let fold f accu = Misc.foldi n f accu let map f = Misc.mapi n f let iterx f = for nt = start to n - 1 do f nt done let foldx f accu = Misc.foldij start n f accu let ocamltype nt = assert (not (is_start nt)); try Some (StringMap.find (print false nt) grammar.types) with Not_found -> None let ocamltype_of_start_symbol nt = match ocamltype nt with | Some typ -> typ | None -> (* Every start symbol has a type. *) assert false let tabulate f = Array.get (Array.init n f) end (* Sets and maps over nonterminals. *) module NonterminalMap = Patricia.Big module NonterminalSet = Patricia.Big.Domain (* ------------------------------------------------------------------------ *) (* Terminals. *) module Terminal = struct type t = int let t2i i = i let i2t i = i let compare = (-) let equal (tok1 : t) (tok2 : t) = tok1 = tok2 (* Determine how many terminals we have and build mappings both ways between names and indices. A new terminal "#" is created. A new terminal "error" is created. The fact that the integer code assigned to the "#" pseudo-terminal is the last one is exploited in the table-based back-end. (The right-most row of the action table is not created.) Pseudo-tokens (used in %prec declarations, but never declared using %token) are filtered out. *) (* In principle, the number of the [error] token is irrelevant. It is currently 0, but we do not rely on that. *) let (n : int), (name : string array), (map : int StringMap.t) = let tokens = tokens grammar in match tokens with | [] when verbose -> Error.error [] "no tokens have been declared." | _ -> Misc.index ("error" :: tokens @ [ "#" ]) let print tok = name.(tok) let lookup name = StringMap.find name map let sharp = lookup "#" let error = lookup "error" let pseudo tok = (tok = sharp) || (tok = error) let real t = error <> t && t <> sharp let token_properties = let not_so_dummy_properties = (* applicable to [error] and [#] *) { tk_filename = "__primitives__"; tk_precedence = UndefinedPrecedence; tk_associativity = UndefinedAssoc; tk_ocamltype = None; tk_is_declared = true; tk_position = Positions.dummy; } in Array.init n (fun tok -> try StringMap.find name.(tok) grammar.tokens with Not_found -> assert (tok = sharp || tok = error); not_so_dummy_properties ) let () = if verbose then Error.logG 1 (fun f -> Printf.fprintf f "Grammar has %d terminal symbols.\n" (n - 2) ) let precedence_level tok = TokPrecedence.levelip (print tok) token_properties.(tok) let associativity tok = token_properties.(tok).tk_associativity let ocamltype tok = token_properties.(tok).tk_ocamltype let iter f = Misc.iteri n f let fold f accu = Misc.foldi n f accu let map f = Misc.mapi n f let () = assert (sharp = n - 1) let mapx f = Misc.mapi sharp f let () = assert (error = 0) let iter_real f = for i = 1 to n-2 do f i done (* If a token named [EOF] exists, then it is assumed to represent ocamllex's [eof] pattern. *) let eof = try Some (lookup "EOF") with Not_found -> None (* The sub-module [Word] offers an implementation of words (that is, sequences) of terminal symbols. It is used by [LRijkstra]. We make it a functor, because it has internal state (a hash table) and a side effect (failure if there are more than 256 terminal symbols). *) module Word (X : sig end) = struct (* We could use lists, or perhaps the sequences offered by the module [Seq], which support constant time concatenation. However, we need a much more compact representation: [LRijkstra] stores tens of millions of such words. We use strings, because they are very compact (8 bits per symbol), and on top of that, we use a hash-consing facility. In practice, hash-consing allows us to save 1000x in space. *) (* A drawback of this approach is that it works only if the number of terminal symbols is at most 256. For the moment, this is good enough. [LRijkstra] already has difficulty at 100 terminal symbols or so. *) let () = assert (n <= 256) let (encode : string -> int), (decode : int -> string), verbose = Misc.new_encode_decode 1024 type word = int let epsilon = encode "" let singleton t = encode (String.make 1 (Char.chr t)) let append i1 i2 = let w1 = decode i1 and w2 = decode i2 in if String.length w1 = 0 then i2 else if String.length w2 = 0 then i1 else encode (w1 ^ w2) let length i = String.length (decode i) let first i z = let w = decode i in if String.length w > 0 then Char.code w.[0] else z let rec elements i n w = if i = n then [] else Char.code w.[i] :: elements (i + 1) n w let elements i = let w = decode i in elements 0 (String.length w) w let print i = let w = decode i in Misc.separated_iter_to_string (fun c -> print (Char.code c)) " " (fun f -> String.iter f w) (* [Pervasives.compare] implements a lexicographic ordering on strings. *) let compare i1 i2 = Pervasives.compare (decode i1) (decode i2) end end (* Sets of terminals are used intensively in the LR(1) construction, so it is important that they be as efficient as possible. *) module TerminalSet = struct include CompressedBitSet let print toks = Misc.separated_iter_to_string Terminal.print " " (fun f -> iter f toks) let universe = remove Terminal.sharp ( remove Terminal.error ( Terminal.fold add empty ) ) (* The following definitions are used in the computation of FIRST sets below. They are not exported outside of this file. *) type property = t let bottom = empty let is_maximal _ = false end (* Maps over terminals. *) module TerminalMap = Patricia.Big (* ------------------------------------------------------------------------ *) (* Symbols. *) module Symbol = struct type t = | N of Nonterminal.t | T of Terminal.t let compare sym1 sym2 = match sym1, sym2 with | N nt1, N nt2 -> Nonterminal.compare nt1 nt2 | T tok1, T tok2 -> Terminal.compare tok1 tok2 | N _, T _ -> 1 | T _, N _ -> -1 let equal sym1 sym2 = compare sym1 sym2 = 0 let rec lequal syms1 syms2 = match syms1, syms2 with | [], [] -> true | sym1 :: syms1, sym2 :: syms2 -> equal sym1 sym2 && lequal syms1 syms2 | _ :: _, [] | [], _ :: _ -> false let print = function | N nt -> Nonterminal.print false nt | T tok -> Terminal.print tok let nonterminal = function | T _ -> false | N _ -> true (* Printing an array of symbols. [offset] is the start offset -- we print everything to its right. [dot] is the dot offset -- we print a dot at this offset, if we find it. *) let printaod offset dot symbols = let buffer = Buffer.create 512 in let length = Array.length symbols in for i = offset to length do if i = dot then Buffer.add_string buffer ". "; if i < length then begin Buffer.add_string buffer (print symbols.(i)); Buffer.add_char buffer ' ' end done; Buffer.contents buffer let printao offset symbols = printaod offset (-1) symbols let printa symbols = printao 0 symbols let printl symbols = printa (Array.of_list symbols) let lookup name = try T (Terminal.lookup name) with Not_found -> try N (Nonterminal.lookup name) with Not_found -> assert false (* well-formedness check has been performed earlier *) end (* Sets of symbols. *) module SymbolSet = struct include Set.Make(Symbol) let print symbols = Symbol.printl (elements symbols) (* The following definitions are used in the computation of symbolic FOLLOW sets below. They are not exported outside of this file. *) type property = t let bottom = empty let is_maximal _ = false end (* Maps over symbols. *) module SymbolMap = struct include Map.Make(Symbol) let domain m = fold (fun symbol _ accu -> symbol :: accu ) m [] let purelynonterminal m = fold (fun symbol _ accu -> accu && Symbol.nonterminal symbol ) m true end (* ------------------------------------------------------------------------ *) (* Productions. *) module Production = struct type index = int let compare = (-) (* Create an array of productions. Record which productions are associated with every nonterminal. A new production S' -> S is created for every start symbol S. It is known as a start production. *) let n : int = let n = StringMap.fold (fun _ { branches = branches } n -> n + List.length branches ) grammar.rules 0 in if verbose then Error.logG 1 (fun f -> Printf.fprintf f "Grammar has %d productions.\n" n); n + StringSet.cardinal grammar.start_symbols let p2i prod = prod let i2p prod = assert (prod >= 0 && prod < n); prod let table : (Nonterminal.t * Symbol.t array) array = Array.make n (-1, [||]) let identifiers : identifier array array = Array.make n [||] let actions : action option array = Array.make n None let ntprods : (int * int) array = Array.make Nonterminal.n (-1, -1) let positions : Positions.t list array = Array.make n [] let (start : int), (startprods : index NonterminalMap.t) = StringSet.fold (fun nonterminal (k, startprods) -> let nt = Nonterminal.lookup nonterminal and nt' = Nonterminal.lookup (nonterminal ^ "'") in table.(k) <- (nt', [| Symbol.N nt |]); identifiers.(k) <- [| "_1" |]; ntprods.(nt') <- (k, k+1); positions.(k) <- Nonterminal.positions nt; k+1, NonterminalMap.add nt k startprods ) grammar.start_symbols (0, NonterminalMap.empty) let prec_decl : symbol located option array = Array.make n None let production_level : branch_production_level array = (* The start productions should receive this dummy level, I suppose. We use a fresh mark, so a reduce/reduce conflict that involves a start production will not be solved. *) let dummy = ProductionLevel (Mark.fresh(), 0) in Array.make n dummy let (_ : int) = StringMap.fold (fun nonterminal { branches = branches } k -> let nt = Nonterminal.lookup nonterminal in let k' = List.fold_left (fun k branch -> let symbols = Array.of_list branch.producers in table.(k) <- (nt, Array.map (fun (v, _) -> Symbol.lookup v) symbols); identifiers.(k) <- Array.map snd symbols; actions.(k) <- Some branch.action; production_level.(k) <- branch.branch_production_level; prec_decl.(k) <- branch.branch_prec_annotation; positions.(k) <- [ branch.branch_position ]; k+1 ) k branches in ntprods.(nt) <- (k, k'); k' ) grammar.rules start (* Iteration over the productions associated with a specific nonterminal. *) let iternt nt f = let k, k' = ntprods.(nt) in for prod = k to k' - 1 do f prod done let foldnt (nt : Nonterminal.t) (accu : 'a) (f : index -> 'a -> 'a) : 'a = let k, k' = ntprods.(nt) in let rec loop accu prod = if prod < k' then loop (f prod accu) (prod + 1) else accu in loop accu k (* This funny variant is lazy. If at some point [f] does not demand its second argument, then iteration stops. *) let foldnt_lazy (nt : Nonterminal.t) (f : index -> (unit -> 'a) -> 'a) (seed : 'a) : 'a = let k, k' = ntprods.(nt) in let rec loop prod seed = if prod < k' then f prod (fun () -> loop (prod + 1) seed) else seed in loop k seed (* Accessors. *) let def prod = table.(prod) let nt prod = let nt, _ = table.(prod) in nt let rhs prod = let _, rhs = table.(prod) in rhs let length prod = Array.length (rhs prod) let identifiers prod = identifiers.(prod) let is_start prod = prod < start let classify prod = if is_start prod then match (rhs prod).(0) with | Symbol.N nt -> Some nt | Symbol.T _ -> assert false else None let action prod = match actions.(prod) with | Some action -> action | None -> (* Start productions have no action. *) assert (is_start prod); assert false let positions prod = positions.(prod) let startsymbol2startprod nt = try NonterminalMap.find nt startprods with Not_found -> assert false (* [nt] is not a start symbol *) (* Iteration. *) let iter f = Misc.iteri n f let fold f accu = Misc.foldi n f accu let map f = Misc.mapi n f let amap f = Array.init n f let iterx f = for prod = start to n - 1 do f prod done let foldx f accu = Misc.foldij start n f accu let mapx f = Misc.mapij start n f (* Printing a production. *) let print prod = assert (not (is_start prod)); let nt, rhs = table.(prod) in Printf.sprintf "%s -> %s" (Nonterminal.print false nt) (Symbol.printao 0 rhs) (* Tabulation. *) let tabulate f = Misc.tabulate n f let tabulateb f = Misc.tabulateb n f (* This array allows recording, for each %prec declaration, whether it is ever useful. This allows us to emit a warning about useless %prec declarations. *) (* 2015/10/06: We take into account the fact that a %prec declaration can be duplicated by inlining or by the expansion of parameterized non-terminal symbols. Our table is not indexed by productions, but by positions (of %prec declarations in the source). Thus, if a %prec declaration is duplicated, at least one of its copies should be found useful for the warning to be suppressed. *) let ever_useful : (Positions.t, unit) Hashtbl.t = (* assuming that generic hashing and equality on positions are OK *) Hashtbl.create 16 let consult_prec_decl prod = let osym = prec_decl.(prod) in lazy ( Option.iter (fun sym -> (* Mark this %prec declaration as useful. *) let pos = Positions.position sym in Hashtbl.add ever_useful pos () ) osym ), osym (* This function prints warnings about useless precedence declarations for productions (%prec). It should be invoked after only the automaton has been constructed. *) let diagnostics () = iterx (fun prod -> let osym = prec_decl.(prod) in Option.iter (fun sym -> (* Check whether this %prec declaration was useless. *) let pos = Positions.position sym in if not (Hashtbl.mem ever_useful pos) then begin Error.grammar_warning [pos] "this %%prec declaration is never useful."; Hashtbl.add ever_useful pos () (* hack: avoid two warnings at the same position *) end ) osym ) (* Determining the precedence level of a production. If no %prec declaration was explicitly supplied, it is the precedence level of the rightmost terminal symbol in the production's right-hand side. *) type production_level = | PNone | PRightmostToken of Terminal.t | PPrecDecl of symbol let rightmost_terminal prod = Array.fold_left (fun accu symbol -> match symbol with | Symbol.T tok -> PRightmostToken tok | Symbol.N _ -> accu ) PNone (rhs prod) let combine e1 e2 = lazy (Lazy.force e1; Lazy.force e2) let precedence prod = let fact1, prec_decl = consult_prec_decl prod in let oterminal = match prec_decl with | None -> rightmost_terminal prod | Some { value = terminal } -> PPrecDecl terminal in match oterminal with | PNone -> fact1, UndefinedPrecedence | PRightmostToken tok -> let fact2, level = Terminal.precedence_level tok in combine fact1 fact2, level | PPrecDecl id -> let fact2, level = TokPrecedence.leveli id in combine fact1 fact2, level end (* ------------------------------------------------------------------------ *) (* Maps over productions. *) module ProductionMap = struct include Patricia.Big (* Iteration over the start productions only. *) let start f = Misc.foldi Production.start (fun prod m -> add prod (f prod) m ) empty end (* ------------------------------------------------------------------------ *) (* Support for analyses of the grammar, expressed as fixed point computations. We exploit the generic fixed point algorithm in [Fix]. *) (* We perform memoization only at nonterminal symbols. We assume that the analysis of a symbol is the analysis of its definition (as opposed to, say, a computation that depends on the occurrences of this symbol in the grammar). *) module GenericAnalysis (P : Fix.PROPERTY) (S : sig open P (* An analysis is specified by the following functions. *) (* [terminal] maps a terminal symbol to a property. *) val terminal: Terminal.t -> property (* [disjunction] abstracts a binary alternative. That is, when we analyze an alternative between several productions, we compute a property for each of them independently, then we combine these properties using [disjunction]. *) val disjunction: property -> (unit -> property) -> property (* [P.bottom] should be a neutral element for [disjunction]. We use it in the analysis of an alternative with zero branches. *) (* [conjunction] abstracts a binary sequence. That is, when we analyze a sequence, we compute a property for each member independently, then we combine these properties using [conjunction]. In general, conjunction needs access to the first member of the sequence (a symbol), not just to its analysis (a property). *) val conjunction: Symbol.t -> property -> (unit -> property) -> property (* [epsilon] abstracts the empty sequence. It should be a neutral element for [conjunction]. *) val epsilon: property end) : sig open P (* The results of the analysis take the following form. *) (* To every nonterminal symbol, we associate a property. *) val nonterminal: Nonterminal.t -> property (* To every symbol, we associate a property. *) val symbol: Symbol.t -> property (* To every suffix of every production, we associate a property. The offset [i], which determines the beginning of the suffix, must be contained between [0] and [n], inclusive, where [n] is the length of the production. *) val production: Production.index -> int -> property end = struct open P (* The following analysis functions are parameterized over [get], which allows making a recursive call to the analysis at a nonterminal symbol. [get] maps a nonterminal symbol to a property. *) (* Analysis of a symbol. *) let symbol sym get : property = match sym with | Symbol.T tok -> S.terminal tok | Symbol.N nt -> (* Recursive call to the analysis, via [get]. *) get nt (* Analysis of (a suffix of) a production [prod], starting at index [i]. *) let production prod i get : property = let rhs = Production.rhs prod in let n = Array.length rhs in (* Conjunction over all symbols in the right-hand side. This can be viewed as a version of [Array.fold_right], which does not necessarily begin at index [0]. Note that, because [conjunction] is lazy, it is possible to stop early. *) let rec loop i = if i = n then S.epsilon else let sym = rhs.(i) in S.conjunction sym (symbol sym get) (fun () -> loop (i+1)) in loop i (* The analysis is the least fixed point of the following function, which analyzes a nonterminal symbol by looking up and analyzing its definition as a disjunction of conjunctions of symbols. *) let nonterminal nt get : property = (* Disjunction over all productions for this nonterminal symbol. *) Production.foldnt_lazy nt (fun prod rest -> S.disjunction (production prod 0 get) rest ) P.bottom (* The least fixed point is taken as follows. Note that it is computed on demand, as [lfp] is called by the user. *) module F = Fix.Make (Maps.ArrayAsImperativeMaps(Nonterminal)) (P) let nonterminal = F.lfp nonterminal (* The auxiliary functions can be published too. *) let symbol sym = symbol sym nonterminal let production prod i = production prod i nonterminal end (* ------------------------------------------------------------------------ *) (* Compute which nonterminals are nonempty, that is, recognize a nonempty language. Also, compute which nonterminals are nullable. The two computations are almost identical. The only difference is in the base case: a single terminal symbol is not nullable, but is nonempty. *) module NONEMPTY = GenericAnalysis (Boolean) (struct (* A terminal symbol is nonempty. *) let terminal _ = true (* An alternative is nonempty if at least one branch is nonempty. *) let disjunction p q = p || q() (* A sequence is nonempty if both members are nonempty. *) let conjunction _ p q = p && q() (* The sequence epsilon is nonempty. It generates the singleton language {epsilon}. *) let epsilon = true end) module NULLABLE = GenericAnalysis (Boolean) (struct (* A terminal symbol is not nullable. *) let terminal _ = false (* An alternative is nullable if at least one branch is nullable. *) let disjunction p q = p || q() (* A sequence is nullable if both members are nullable. *) let conjunction _ p q = p && q() (* The sequence epsilon is nullable. *) let epsilon = true end) (* ------------------------------------------------------------------------ *) (* Compute FIRST sets. *) module FIRST = GenericAnalysis (TerminalSet) (struct (* A terminal symbol has a singleton FIRST set. *) let terminal = TerminalSet.singleton (* The FIRST set of an alternative is the union of the FIRST sets. *) let disjunction p q = TerminalSet.union p (q()) (* The FIRST set of a sequence is the union of: the FIRST set of the first member, and the FIRST set of the second member, if the first member is nullable. *) let conjunction symbol p q = if NULLABLE.symbol symbol then TerminalSet.union p (q()) else p (* The FIRST set of the empty sequence is empty. *) let epsilon = TerminalSet.empty end) (* ------------------------------------------------------------------------ *) let () = if verbose then begin (* If a start symbol generates the empty language or generates the language {epsilon}, report an error. In principle, this could be just a warning. However, in [Engine], in the function [start], it is convenient to assume that neither of these situations can arise. This means that at least one token must be read. *) StringSet.iter (fun symbol -> let nt = Nonterminal.lookup symbol in if not (NONEMPTY.nonterminal nt) then Error.error (Nonterminal.positions nt) "%s generates the empty language." (Nonterminal.print false nt); if TerminalSet.is_empty (FIRST.nonterminal nt) then Error.error (Nonterminal.positions nt) "%s generates the language {epsilon}." (Nonterminal.print false nt) ) grammar.start_symbols; (* If a nonterminal symbol generates the empty language, issue a warning. *) for nt = Nonterminal.start to Nonterminal.n - 1 do if not (NONEMPTY.nonterminal nt) then Error.grammar_warning (Nonterminal.positions nt) "%s generates the empty language." (Nonterminal.print false nt); done end (* ------------------------------------------------------------------------ *) (* Dump the analysis results. *) let () = if verbose then Error.logG 2 (fun f -> for nt = Nonterminal.start to Nonterminal.n - 1 do Printf.fprintf f "nullable(%s) = %b\n" (Nonterminal.print false nt) (NULLABLE.nonterminal nt) done; for nt = Nonterminal.start to Nonterminal.n - 1 do Printf.fprintf f "first(%s) = %s\n" (Nonterminal.print false nt) (TerminalSet.print (FIRST.nonterminal nt)) done ) let () = if verbose then Time.tick "Analysis of the grammar" (* ------------------------------------------------------------------------ *) (* Compute FOLLOW sets. Unnecessary for us, but requested by a user. Also, this is useful for the SLR(1) test. Thus, we perform this analysis only on demand. *) (* The computation of the symbolic FOLLOW sets follows exactly the same pattern as that of the traditional FOLLOW sets. We share code and parameterize this computation over a module [P]. The type [P.property] intuitively represents a set of symbols. *) module FOLLOW (P : sig include Fix.PROPERTY val union: property -> property -> property val terminal: Terminal.t -> property val first: Production.index -> int -> property end) = struct module S = FixSolver.Make (Maps.ArrayAsImperativeMaps(Nonterminal)) (P) (* Build a system of constraints. *) let record_ConVar, record_VarVar, solve = S.create() (* Iterate over all start symbols. *) let () = let sharp = P.terminal Terminal.sharp in for nt = 0 to Nonterminal.start - 1 do assert (Nonterminal.is_start nt); (* Add # to FOLLOW(nt). *) record_ConVar sharp nt done (* We need to do this explicitly because our start productions are of the form S' -> S, not S' -> S #, so # will not automatically appear into FOLLOW(S) when the start productions are examined. *) (* Iterate over all productions. *) let () = Array.iteri (fun prod (nt1, rhs) -> (* Iterate over all nonterminal symbols [nt2] in the right-hand side. *) Array.iteri (fun i symbol -> match symbol with | Symbol.T _ -> () | Symbol.N nt2 -> let nullable = NULLABLE.production prod (i+1) and first = P.first prod (i+1) in (* The FIRST set of the remainder of the right-hand side contributes to the FOLLOW set of [nt2]. *) record_ConVar first nt2; (* If the remainder of the right-hand side is nullable, FOLLOW(nt1) contributes to FOLLOW(nt2). *) if nullable then record_VarVar nt1 nt2 ) rhs ) Production.table (* Second pass. Solve the equations (on demand). *) let follow : Nonterminal.t -> P.property = solve() end (* Use the above functor to obtain the standard (concrete) FOLLOW sets. *) let follow : Nonterminal.t -> TerminalSet.t = let module F = FOLLOW(struct include TerminalSet let terminal = singleton let first = FIRST.production end) in F.follow (* At log level 2, display the FOLLOW sets. *) let () = if verbose then Error.logG 2 (fun f -> for nt = Nonterminal.start to Nonterminal.n - 1 do Printf.fprintf f "follow(%s) = %s\n" (Nonterminal.print false nt) (TerminalSet.print (follow nt)) done ) (* Compute FOLLOW sets for the terminal symbols as well. Again, unnecessary for us, but requested by a user. This is done in a single pass over the grammar -- no new fixpoint computation is required. *) let tfollow : TerminalSet.t array Lazy.t = lazy ( let tfollow = Array.make Terminal.n TerminalSet.empty in (* Iterate over all productions. *) Array.iteri (fun prod (nt1, rhs) -> (* Iterate over all terminal symbols [t2] in the right-hand side. *) Array.iteri (fun i symbol -> match symbol with | Symbol.N _ -> () | Symbol.T t2 -> let nullable = NULLABLE.production prod (i+1) and first = FIRST.production prod (i+1) in (* The FIRST set of the remainder of the right-hand side contributes to the FOLLOW set of [t2]. *) tfollow.(t2) <- TerminalSet.union first tfollow.(t2); (* If the remainder of the right-hand side is nullable, FOLLOW(nt1) contributes to FOLLOW(t2). *) if nullable then tfollow.(t2) <- TerminalSet.union (follow nt1) tfollow.(t2) ) rhs ) Production.table; tfollow ) (* Define another accessor. *) let tfollow t = (Lazy.force tfollow).(t) (* At log level 3, display the FOLLOW sets for terminal symbols. *) let () = if verbose then Error.logG 3 (fun f -> for t = 0 to Terminal.n - 1 do Printf.fprintf f "follow(%s) = %s\n" (Terminal.print t) (TerminalSet.print (tfollow t)) done ) (* ------------------------------------------------------------------------ *) (* Compute symbolic FIRST and FOLLOW sets. *) (* The symbolic FIRST set of the word determined by [prod/i] is defined (and computed) as follows. *) let sfirst prod i = let rhs = Production.rhs prod in let n = Array.length rhs in let rec loop i = if i = n then (* If the word [prod/i] is empty, the set is empty. *) SymbolSet.empty else let sym = rhs.(i) in (* If the word [prod/i] begins with a symbol [sym], then [sym] itself is part of the symbolic FIRST set, unconditionally. *) SymbolSet.union (SymbolSet.singleton sym) (* Furthermore, if [sym] is nullable, then the symbolic FIRST set of the sub-word [prod/i+1] contributes, too. *) (if NULLABLE.symbol sym then loop (i + 1) else SymbolSet.empty) in loop i (* The symbolic FOLLOW sets are computed just like the FOLLOW sets, except we use a symbolic FIRST set instead of a standard FIRST set. *) let sfollow : Nonterminal.t -> SymbolSet.t = let module F = FOLLOW(struct include SymbolSet let terminal t = SymbolSet.singleton (Symbol.T t) let first = sfirst end) in F.follow (* At log level 3, display the symbolic FOLLOW sets. *) let () = if verbose then Error.logG 3 (fun f -> for nt = Nonterminal.start to Nonterminal.n - 1 do Printf.fprintf f "sfollow(%s) = %s\n" (Nonterminal.print false nt) (SymbolSet.print (sfollow nt)) done ) (* ------------------------------------------------------------------------ *) (* Provide explanations about FIRST sets. *) (* The idea is to explain why a certain token appears in the FIRST set for a certain sequence of symbols. Such an explanation involves basic assertions of the form (i) symbol N is nullable and (ii) the token appears in the FIRST set for symbol N. We choose to take these basic facts for granted, instead of recursively explaining them, so as to keep explanations short. *) (* We first produce an explanation in abstract syntax, then convert it to a human-readable string. *) type explanation = | EObvious (* sequence begins with desired token *) | EFirst of Terminal.t * Nonterminal.t (* sequence begins with a nonterminal that produces desired token *) | ENullable of Symbol.t list * explanation (* sequence begins with a list of nullable symbols and ... *) let explain (tok : Terminal.t) (rhs : Symbol.t array) (i : int) = let length = Array.length rhs in let rec loop i = assert (i < length); let symbol = rhs.(i) in match symbol with | Symbol.T tok' -> assert (Terminal.equal tok tok'); EObvious | Symbol.N nt -> if TerminalSet.mem tok (FIRST.nonterminal nt) then EFirst (tok, nt) else begin assert (NULLABLE.nonterminal nt); match loop (i + 1) with | ENullable (symbols, e) -> ENullable (symbol :: symbols, e) | e -> ENullable ([ symbol ], e) end in loop i let rec convert = function | EObvious -> "" | EFirst (tok, nt) -> Printf.sprintf "%s can begin with %s" (Nonterminal.print false nt) (Terminal.print tok) | ENullable (symbols, e) -> let e = convert e in Printf.sprintf "%scan vanish%s%s" (Symbol.printl symbols) (if e = "" then "" else " and ") e (* ------------------------------------------------------------------------ *) (* Package the analysis results. *) module Analysis = struct let nullable = NULLABLE.nonterminal let nullable_symbol = NULLABLE.symbol let first = FIRST.nonterminal let first_symbol = FIRST.symbol (* An initial definition of [nullable_first_prod]. *) let nullable_first_prod prod i = NULLABLE.production prod i, FIRST.production prod i (* A memoised version, so as to avoid recomputing along a production's right-hand side. *) let nullable_first_prod = Misc.tabulate Production.n (fun prod -> Misc.tabulate (Production.length prod + 1) (fun i -> nullable_first_prod prod i ) ) let first_prod_lookahead prod i z = let nullable, first = nullable_first_prod prod i in if nullable then TerminalSet.add z first else first let explain_first_rhs (tok : Terminal.t) (rhs : Symbol.t array) (i : int) = convert (explain tok rhs i) let follow = follow end (* ------------------------------------------------------------------------ *) (* Conflict resolution via precedences. *) module Precedence = struct type choice = | ChooseShift | ChooseReduce | ChooseNeither | DontKnow type order = Lt | Gt | Eq | Ic let precedence_order p1 p2 = match p1, p2 with | UndefinedPrecedence, _ | _, UndefinedPrecedence -> Ic | PrecedenceLevel (m1, l1, _, _), PrecedenceLevel (m2, l2, _, _) -> if not (Mark.same m1 m2) then Ic else if l1 > l2 then Gt else if l1 < l2 then Lt else Eq let production_order p1 p2 = match p1, p2 with | ProductionLevel (m1, l1), ProductionLevel (m2, l2) -> if not (Mark.same m1 m2) then Ic else if l1 > l2 then Gt else if l1 < l2 then Lt else Eq let shift_reduce tok prod = let fact1, tokp = Terminal.precedence_level tok and fact2, prodp = Production.precedence prod in match precedence_order tokp prodp with (* Our information is inconclusive. Drop [fact1] and [fact2], that is, do not record that this information was useful. *) | Ic -> DontKnow (* Our information is useful. Record that fact by evaluating [fact1] and [fact2]. *) | (Eq | Lt | Gt) as c -> Lazy.force fact1; Lazy.force fact2; match c with | Ic -> assert false (* already dispatched *) | Eq -> begin match Terminal.associativity tok with | LeftAssoc -> ChooseReduce | RightAssoc -> ChooseShift | NonAssoc -> ChooseNeither | _ -> assert false (* If [tok]'s precedence level is defined, then its associativity must be defined as well. *) end | Lt -> ChooseReduce | Gt -> ChooseShift let reduce_reduce prod1 prod2 = let pl1 = Production.production_level.(prod1) and pl2 = Production.production_level.(prod2) in match production_order pl1 pl2 with | Lt -> Some prod1 | Gt -> Some prod2 | Eq -> (* The order is strict except in the presence of parameterized non-terminals and/or inlining. Two productions can have the same precedence level if they originate, via macro-expansion or via inlining, from a single production in the source grammar. *) None | Ic -> None end (* This function prints warnings about useless precedence declarations for terminal symbols (%left, %right, %nonassoc) and productions (%prec). It should be invoked after only the automaton has been constructed. *) let diagnostics () = TokPrecedence.diagnostics(); Production.diagnostics() (* ------------------------------------------------------------------------ *) (* %on_error_reduce declarations. *) module OnErrorReduce = struct let declarations = grammar.on_error_reduce end (* ------------------------------------------------------------------------ *) end (* module Make *) menhir-20151112/src/invariant.ml0000644000175000017500000007201312621170073015306 0ustar mehdimehdi (* This module discovers information about the shape and content of the stack in each of the automaton's states. *) open Grammar module C = Conflict (* artificial dependency; ensures that [Conflict] runs first *) (* ------------------------------------------------------------------------ *) (* Compute a lower bound on the height of the stack at every state. At the same time, compute which symbols are held in this stack prefix. *) (* In order to compute (a lower bound on) the height of the stack at a state [s], we examine the LR(0) items that compose [s]. For each item, if the bullet is at position [pos], then we can be assured that the height of the stack is at least [pos]. Thus, we compute the maximum of [pos] over all items (of which there is at least one). *) (* The set of items that we use is not closed, but this does not matter; the items that would be added by the closure would not add any information regarding the height of the stack, since the bullet is at position 0 in these items. *) (* Instead of computing just the stack height, we compute, in the same manner, which symbols are on the stack at a state [s]. This is an array of symbols whose length is the height of the stack at [s]. By convention, the top of the stack is the end of the array. *) (* We first compute and tabulate this information at the level of the LR(0) automaton. *) let stack_symbols : Lr0.node -> Symbol.t array = let dummy = Array.make 0 (Symbol.T Terminal.sharp) in Misc.tabulate Lr0.n (fun node -> Item.Set.fold (fun item accu -> let _prod, _nt, rhs, pos, _length = Item.def item in if pos > Array.length accu then Array.sub rhs 0 pos else accu ) (Lr0.items node) dummy ) (* Then, it is easy to extend it to the LR(1) automaton. *) let stack_symbols (node : Lr1.node) : Symbol.t array = stack_symbols (Lr0.core (Lr1.state node)) let stack_height (node : Lr1.node) : int = Array.length (stack_symbols node) (* ------------------------------------------------------------------------ *) (* Above, we have computed a prefix of the stack at every state. We have computed the length of this prefix and the symbols that are held in this prefix of the stack. Now, compute which states may be held in this prefix. *) (* In order to compute this information, we perform an analysis of the automaton, via a least fixed fixed point computation. *) (* It is worth noting that it would be possible to use an analysis based on a least fixed point computation to discover at the same time the length of the stack prefix, the symbols that it contains, and the states that it may contain. This alternate approach, which was used until 2012/08/25, would lead us to discovering a richer invariant, that is, potentially longer prefixes. This extra information, however, was useless; computing it was a waste of time. Hence, as of 2012/08/25, the height of the stack prefix and the symbols that it contains are predicted (see above), and the least fixed computation is used only to populate these prefixes of predictable length with state information. *) (* By the way, this least fixed point analysis remains the most costly computation throughout this module. *) (* Vectors of sets of states. *) module StateVector = struct type property = Lr1.NodeSet.t list let empty = [] let rec equal v1 v2 = match v1, v2 with | [], [] -> true | states1 :: v1, states2 :: v2 -> Lr1.NodeSet.equal states1 states2 && equal v1 v2 | _, _ -> (* Because all heights are known ahead of time, we are able to (and careful to) compare only vectors of equal length. *) assert false let rec join v1 v2 = match v1, v2 with | [], [] -> [] | states1 :: v1, states2 :: v2 -> Lr1.NodeSet.union states1 states2 :: join v1 v2 | _, _ -> (* Because all heights are known ahead of time, we are able to (and careful to) compare only vectors of equal length. *) assert false let push v x = x :: v let truncate = MenhirLib.General.take end (* In order to perform the fixed point computation, we must extend our type of vectors with a bottom element. This element will not appear in the least fixed point, provided every state of the automaton is reachable. *) module StateLattice = struct type property = | Bottom | NonBottom of StateVector.property let bottom = Bottom let empty = NonBottom StateVector.empty let equal v1 v2 = match v1, v2 with | Bottom, Bottom -> true | NonBottom v1, NonBottom v2 -> StateVector.equal v1 v2 | _, _ -> false let join v1 v2 = match v1, v2 with | Bottom, v | v, Bottom -> v | NonBottom v1, NonBottom v2 -> NonBottom (StateVector.join v1 v2) let push v x = match v with | Bottom -> Bottom | NonBottom v -> NonBottom (StateVector.push v x) let truncate h v = match v with | Bottom -> Bottom | NonBottom v -> NonBottom (StateVector.truncate h v) let is_maximal _ = false end open StateLattice (* Define the fixed point. *) let stack_states : Lr1.node -> property = let module F = Fix.Make (Maps.PersistentMapsToImperativeMaps(Lr1.NodeMap)) (StateLattice) in F.lfp (fun node (get : Lr1.node -> property) -> (* We use the fact that a state has incoming transitions if and only if it is not a start state. *) match Lr1.incoming_symbol node with | None -> assert (Lr1.predecessors node = []); assert (stack_height node = 0); (* If [node] is a start state, then the stack at [node] may be (in fact, must be) the empty stack. *) empty | Some _symbol -> (* If [node] is not a start state, then include the contribution of every incoming transition. We compute a join over all predecessors. The contribution of one predecessor is the abstract value found at this predecessor, extended with a new cell for this transition, and truncated to the stack height at [node], so as to avoid obtaining a vector that is longer than expected/necessary. *) let height = stack_height node in List.fold_left (fun v predecessor -> join v (truncate height (push (get predecessor) (Lr1.NodeSet.singleton predecessor)) ) ) bottom (Lr1.predecessors node) ) (* If every state is reachable, then the least fixed point must be non-bottom everywhere, so we may view it as a function that produces a vector of sets of states. *) let stack_states (node : Lr1.node) : StateVector.property = match stack_states node with | Bottom -> (* apparently this node is unreachable *) assert false | NonBottom v -> v (* ------------------------------------------------------------------------ *) (* For each production, compute where (that is, in which states) this production can be reduced. *) let production_where : Lr1.NodeSet.t ProductionMap.t = Lr1.fold (fun accu node -> TerminalMap.fold (fun _ prods accu -> let prod = Misc.single prods in let nodes = try ProductionMap.lookup prod accu with Not_found -> Lr1.NodeSet.empty in ProductionMap.add prod (Lr1.NodeSet.add node nodes) accu ) (Lr1.reductions node) accu ) ProductionMap.empty let production_where (prod : Production.index) : Lr1.NodeSet.t = try (* Production [prod] may be reduced at [nodes]. *) let nodes = ProductionMap.lookup prod production_where in assert (not (Lr1.NodeSet.is_empty nodes)); nodes with Not_found -> (* The production [prod] is never reduced. *) Lr1.NodeSet.empty let ever_reduced prod = not (Lr1.NodeSet.is_empty (production_where prod)) let fold_reduced f prod accu = Lr1.NodeSet.fold f (production_where prod) accu (* ------------------------------------------------------------------------ *) (* Warn about productions that are never reduced. *) let () = let count = ref 0 in Production.iter (fun prod -> if Lr1.NodeSet.is_empty (production_where prod) then match Production.classify prod with | Some nt -> incr count; Error.grammar_warning (Nonterminal.positions nt) "symbol %s is never accepted." (Nonterminal.print false nt) | None -> incr count; Error.grammar_warning (Production.positions prod) "production %sis never reduced." (Production.print prod) ); if !count > 0 then Error.grammar_warning [] "in total, %d productions are never reduced." !count (* ------------------------------------------------------------------------ *) (* From the above information, deduce, for each production, the states that may appear in the stack when this production is reduced. *) (* We are careful to produce a vector of states whose length is exactly that of the production [prod]. *) let production_states : Production.index -> StateLattice.property = Production.tabulate (fun prod -> let nodes = production_where prod in let height = Production.length prod in Lr1.NodeSet.fold (fun node accu -> join accu (truncate height (NonBottom (stack_states node)) ) ) nodes bottom ) (* ------------------------------------------------------------------------ *) (* We now determine which states must be represented, that is, explicitly pushed onto the stack. For simplicity, a state is either always represented or never represented. More fine-grained strategies, where a single state is sometimes pushed onto the stack and sometimes not pushed, depending on which outgoing transition is being taken, are conceivable, but quite tricky, and probably not worth the trouble. (1) If two states are liable to appear within a single stack cell, then one is represented if and only if the other is represented. This ensures that the structure of stacks is known everywhere and that we can propose types for stacks. (2) If a state [s] has an outgoing transition along nonterminal symbol [nt], and if the [goto] table for symbol [nt] has more than one target, then state [s] is represented. (3) If a stack cell contains more than one state and if at least one of these states is able to handle the [error] token, then these states are represented. (4) If the semantic action associated with a production mentions the [$syntaxerror] keyword, then the state that is being reduced to (that is, the state that initiated the recognition of this production) is represented. (Indeed, it will be passed as an argument to [errorcase].) *) (* Data. *) let rep : bool UnionFind.point array = Array.init Lr1.n (fun _ -> UnionFind.fresh false) (* Getter. *) let represented state = rep.(Lr1.number state) (* Setters. *) let represent state = UnionFind.change (represented state) true let represents states = represent (Lr1.NodeSet.choose states) (* Enforce condition (1) above. *) let share (v : StateVector.property) = List.iter (fun states -> let dummy = UnionFind.fresh false in Lr1.NodeSet.iter (fun state -> UnionFind.eunion dummy (represented state) ) states ) v let () = Lr1.iter (fun node -> share (stack_states node) ); Production.iter (fun prod -> match production_states prod with | Bottom -> () | NonBottom v -> share v ) (* Enforce condition (2) above. *) let () = Nonterminal.iter (fun nt -> let count = Lr1.targets (fun count _ _ -> count + 1 ) 0 (Symbol.N nt) in if count > 1 then Lr1.targets (fun () sources _ -> List.iter represent sources ) () (Symbol.N nt) ) (* Enforce condition (3) above. *) let handler state = try let _ = SymbolMap.find (Symbol.T Terminal.error) (Lr1.transitions state) in true with Not_found -> try let _ = TerminalMap.lookup Terminal.error (Lr1.reductions state) in true with Not_found -> false let handlers states = Lr1.NodeSet.exists handler states let () = Lr1.iter (fun node -> let v = stack_states node in List.iter (fun states -> if Lr1.NodeSet.cardinal states >= 2 && handlers states then represents states ) v ) (* Enforce condition (4) above. *) let () = Production.iterx (fun prod -> if Action.has_syntaxerror (Production.action prod) then match production_states prod with | Bottom -> () | NonBottom v -> let sites = production_where prod in let length = Production.length prod in if length = 0 then Lr1.NodeSet.iter represent sites else let states = List.nth v (length - 1) in represents states ) (* Define accessors. *) let represented state = UnionFind.find (represented state) let representeds states = if Lr1.NodeSet.is_empty states then assert false else represented (Lr1.NodeSet.choose states) (* Statistics. *) let () = Error.logC 1 (fun f -> let count = Lr1.fold (fun count node -> if represented node then count + 1 else count ) 0 in Printf.fprintf f "%d out of %d states are represented.\n" count Lr1.n ) (* ------------------------------------------------------------------------ *) (* Accessors for information about the stack. *) (* We describe a stack prefix as a list of cells, where each cell is a pair of a symbol and a set of states. The top of the stack is the head of the list. *) type cell = Symbol.t * Lr1.NodeSet.t type word = cell list (* This auxiliary function converts a stack-as-an-array (top of stack at the right end) to a stack-as-a-list (top of stack at list head). *) let convert a = let n = Array.length a in let rec loop i accu = if i = n then accu else loop (i + 1) (a.(i) :: accu) in loop 0 [] (* [stack s] describes the stack when the automaton is in state [s]. *) let stack node : word = List.combine (convert (stack_symbols node)) (stack_states node) (* [prodstack prod] describes the stack when production [prod] is about to be reduced. *) let prodstack prod : word = match production_states prod with | Bottom -> (* This production is never reduced. *) assert false | NonBottom v -> List.combine (convert (Production.rhs prod)) v (* [gotostack nt] is the structure of the stack when a shift transition over nonterminal [nt] is about to be taken. It consists of just one cell. *) let gotostack : Nonterminal.t -> word = Nonterminal.tabulate (fun nt -> let sources = Lr1.targets (fun accu sources _ -> List.fold_right Lr1.NodeSet.add sources accu ) Lr1.NodeSet.empty (Symbol.N nt) in [ Symbol.N nt, sources ] ) let fold f accu w = List.fold_right (fun (symbol, states) accu -> f accu (representeds states) symbol states ) w accu let fold_top f accu w = match w with | [] -> accu | (symbol, states) :: _ -> f (representeds states) symbol let print (w : word) = let b = Buffer.create 64 in fold (fun () _represented symbol _states -> Buffer.add_string b (Symbol.print symbol); Buffer.add_char b ' ' ) () w; Buffer.contents b (* ------------------------------------------------------------------------ *) (* Explain how the stack should be deconstructed when an error is found. We sometimes have a choice as too how many stack cells should be popped. Indeed, several cells in the known suffix of the stack may physically hold a state. If neither of these states handles errors, then we could jump to either. (Indeed, if we jump to one that's nearer, it will in turn pop further stack cells and jump to one that's farther.) In the interests of code size, we should pop as few stack cells as possible. So, we jump to the topmost represented state in the known suffix. *) type state = | Represented | UnRepresented of Lr1.node type instruction = | Die | DownTo of word * state let rewind node : instruction = let w = stack node in let rec rewind w = match w with | [] -> (* I believe that every stack description either is definite (that is, ends with [TailEmpty]) or contains at least one represented state. Thus, if we find an empty [w], this means that the stack is definitely empty. *) Die | ((_, states) as cell) :: w -> if representeds states then (* Here is a represented state. We will pop this cell and no more. *) DownTo ([ cell ], Represented) else if handlers states then begin (* Here is an unrepresented state that can handle errors. The cell must hold a singleton set of states, so we know which state to jump to, even though it isn't represented. *) assert (Lr1.NodeSet.cardinal states = 1); let state = Lr1.NodeSet.choose states in DownTo ([ cell ], UnRepresented state) end else (* Here is an unrepresented state that does not handle errors. Pop this cell and look further. *) match rewind w with | Die -> Die | DownTo (w, st) -> DownTo (cell :: w, st) in rewind w (* ------------------------------------------------------------------------ *) (* Machinery for the computation of which symbols must keep track of their start or end positions. *) open Keyword type variable = Symbol.t * where (* WhereStart or WhereEnd *) module M : Fix.IMPERATIVE_MAPS with type key = variable = struct type key = variable type 'data t = { mutable startp: 'data SymbolMap.t; mutable endp: 'data SymbolMap.t; } open SymbolMap let create() = { startp = empty; endp = empty } let clear m = m.startp <- empty; m.endp <- empty let add (sym, where) data m = match where with | WhereStart -> m.startp <- add sym data m.startp | WhereEnd -> m.endp <- add sym data m.endp | WhereSymbolStart -> assert false let find (sym, where) m = match where with | WhereStart -> find sym m.startp | WhereEnd -> find sym m.endp | WhereSymbolStart -> assert false let iter f m = iter (fun sym -> f (sym, WhereStart)) m.startp; iter (fun sym -> f (sym, WhereEnd)) m.endp end (* ------------------------------------------------------------------------ *) (* We now determine which positions must be kept track of. For simplicity, we do this on a per-symbol basis. That is, for each symbol, either we never keep track of position information, or we always do. In fact, we do distinguish start and end positions. This leads to computing two sets of symbols -- those that keep track of their start position and those that keep track of their end position. A symbol on the right-hand side of a production must keep track of its (start or end) position if that position is explicitly requested by a semantic action. Furthermore, if the left-hand symbol of a production must keep track of its start (resp. end) position, then the first (resp. last) symbol of its right-hand side (if there is one) must do so as well. That is, unless the right-hand side is empty. *) (* 2015/11/11. When a production [prod] is reduced, the top stack cell may be consulted for its end position. This implies that this cell must exist and must store an end position! Now, when does this happen? 1- This happens if [prod] is an epsilon production and the left-hand symbol of the production, [nt prod], keeps track of its start or end position. 2- This happens if the semantic action explicitly mentions the keyword [$endpos($0)]. Now, if this happens, what should we do? a- If this happens in a state [s] whose incoming symbol is [sym], then [sym] must keep track of its end position. b- If this happens in an initial state, where the stack may be empty, then the sentinel cell at the bottom of the stack must contain an end position. Point (b) doesn't concern us here, but point (a) does. We must implement the constraint (1) \/ (2) -> (a). Point (b) is taken care of in the code back-end, where, for simplicity, we always create a sentinel cell. *) (* I will say that this is a lot more sophisticated than I would like. The code back-end has been known for its efficiency and I am trying to maintain this property -- in particular, I would like to keep track of no positions at all, if the user doesn't use any position keyword. But I am suffering. *) module S = FixSolver.Make(M)(Boolean) let record_ConVar, record_VarVar, solve = S.create() let () = (* We gather the constraints explained above in two loops. The first loop looks at every (non-start) production [prod]. The second loop looks at every (non-initial) state [s]. *) Production.iterx (fun prod -> let nt, rhs = Production.def prod and ids = Production.identifiers prod and action = Production.action prod in let length = Array.length rhs in if length > 0 then begin (* If [nt] keeps track of its start position, then the first symbol in the right-hand side must do so as well. *) record_VarVar (Symbol.N nt, WhereStart) (rhs.(0), WhereStart); (* If [nt] keeps track of its end position, then the last symbol in the right-hand side must do so as well. *) record_VarVar (Symbol.N nt, WhereEnd) (rhs.(length - 1), WhereEnd) end; KeywordSet.iter (function | SyntaxError -> () | Position (Before, _, _) -> (* Doing nothing here because [$endpos($0)] is dealt with in the second loop. *) () | Position (Left, _, _) -> (* [$startpos] and [$endpos] have been expanded away. *) assert false | Position (RightNamed _, WhereSymbolStart, _) -> (* [$symbolstartpos(x)] does not exist. *) assert false | Position (RightNamed id, where, _) -> (* If the semantic action mentions [$startpos($i)], then the [i]-th symbol in the right-hand side must keep track of its start position. Similarly for end positions. *) Array.iteri (fun i id' -> if id = id' then record_ConVar true (rhs.(i), where) ) ids ) (Action.keywords action) ); (* end of loop on productions *) Lr1.iterx (fun s -> (* Let [sym] be the incoming symbol of state [s]. *) let sym = Misc.unSome (Lr1.incoming_symbol s) in (* Condition (1) in the long comment above (2015/11/11). If an epsilon production [prod] can be reduced in state [s], if its left-hand side [nt] keeps track of its start or end position, then [sym] must keep track of its end position. *) TerminalMap.iter (fun _ prods -> let prod = Misc.single prods in let nt, rhs = Production.def prod in let length = Array.length rhs in if length = 0 then begin record_VarVar (Symbol.N nt, WhereStart) (sym, WhereEnd); record_VarVar (Symbol.N nt, WhereEnd) (sym, WhereEnd) end ) (Lr1.reductions s); (* Condition (2) in the long comment above (2015/11/11). If a production can be reduced in state [s] and mentions [$endpos($0)], then [sym] must keep track of its end position. *) if Lr1.has_beforeend s then record_ConVar true (sym, WhereEnd) ) let track : variable -> bool = solve() let startp symbol = track (symbol, WhereStart) let endp symbol = track (symbol, WhereEnd) let for_every_symbol (f : Symbol.t -> unit) : unit = Terminal.iter (fun t -> f (Symbol.T t)); Nonterminal.iter (fun nt -> f (Symbol.N nt)) let sum_over_every_symbol (f : Symbol.t -> bool) : int = let c = ref 0 in for_every_symbol (fun sym -> if f sym then c := !c + 1); !c let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d symbols keep track of their start position.\n\ %d out of %d symbols keep track of their end position.\n" (sum_over_every_symbol startp) (Terminal.n + Nonterminal.n) (sum_over_every_symbol endp) (Terminal.n + Nonterminal.n)) (* ------------------------------------------------------------------------- *) (* Miscellaneous. *) let universal symbol = Lr1.fold (fun universal s -> universal && (if represented s then SymbolMap.mem symbol (Lr1.transitions s) else true) ) true (* ------------------------------------------------------------------------ *) (* Discover which states can peek at an error. These are the states where an error token may be on the stream. These are the states that are targets of a reduce action on [error]. *) (* 2012/08/25 I am optimizing this code, whose original version I found had quadratic complexity. The problem is as follows. We can easily iterate over all states to find which states [s] have a reduce action on error. What we must find out, then, is into which state [t] this reduce action takes us. This is not easy to predict, as it depends on the contents of the stack. The original code used an overapproximation, as follows: if the reduction concerns a production whose head symbol is [nt], then all of the states that have an incoming transition labeled [nt] are potential targets. The new version of the code below relies on the same approximation, but uses two successive loops instead of two nested loops. *) let errorpeekers = (* First compute a set of symbols [nt]... *) let nts : SymbolSet.t = Lr1.fold (fun nts node -> try let prods = TerminalMap.lookup Terminal.error (Lr1.reductions node) in let prod = Misc.single prods in let nt = Production.nt prod in SymbolSet.add (Symbol.N nt) nts with Not_found -> nts ) SymbolSet.empty in (* ... then compute the set of all target states of all transitions labeled by some symbol in the set [nt]. *) SymbolSet.fold (fun nt errorpeekers -> Lr1.targets (fun errorpeekers _ target -> Lr1.NodeSet.add target errorpeekers ) errorpeekers nt ) nts Lr1.NodeSet.empty let errorpeeker node = Lr1.NodeSet.mem node errorpeekers (* ------------------------------------------------------------------------ *) (* Here is how we check whether state [s] should have a default reduction. We check whether [s] has no outgoing shift transitions and only has one possible reduction action. In that case, we produce a default reduction action, that is, we perform reduction without consulting the lookahead token. This saves code, but can alter the parser's behavior in the presence of errors. The check for default actions subsumes the check for the case where [s] admits a reduce action with lookahead symbol "#". In that case, it must be the only possible action -- see [Lr1.default_conflict_resolution]. That is, we have reached a point where we have recognized a well-formed input and are now expecting an end-of-stream. In that case, performing reduction without looking at the next token is the right thing to do, since there should in fact be none. The state that we reduce to will also have the same property, and so on, so we will in fact end up rewinding the entire stack and accepting the input when the stack becomes empty. (New as of 2012/01/23.) A state where a shift/reduce conflict was solved in favor of neither (due to a use of the %nonassoc directive) must not perform a default reduction. Indeed, this would effectively mean that the failure that was requested by the user is forgotten and replaced with a reduction. This surprising behavior is present in ocamlyacc and was present in earlier versions of Menhir. See e.g. http://caml.inria.fr/mantis/view.php?id=5462 There is a chance that we might run into trouble if the ideas described in the above two paragraphs collide, that is, if we forbid a default reduction (due to a shift/reduce conflict solved by %nonassoc) in a node where we would like to have default reduction on "#". This situation seems unlikely to arise, so I will not do anything about it for the moment. (Furthermore, someone who uses precedence declarations is looking for trouble anyway.) Between 2012/05/25 and 2015/09/25, if [--canonical] has been specified, then we disallow default reductions on a normal token, because we do not want to introduce any spurious actions into the automaton. We do still allow default reductions on "#", since they are needed for the automaton to terminate properly. From 2015/09/25 on, we again always allow default reductions, as they seem to be beneficial when explaining syntax errors. *) let (has_default_reduction : Lr1.node -> (Production.index * TerminalSet.t) option), hdrcount = Misc.tabulateo Lr1.number Lr1.fold Lr1.n (fun s -> if Lr1.forbid_default_reduction s then None else let reduction = ProductionMap.is_singleton (Lr1.invert (Lr1.reductions s)) in match reduction with | Some _ -> if SymbolMap.purelynonterminal (Lr1.transitions s) then reduction else None | None -> reduction ) let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d states have a default reduction.\n" hdrcount Lr1.n) (* ------------------------------------------------------------------------ *) let () = Time.tick "Constructing the invariant" (* ------------------------------------------------------------------------ *) (* If any fatal error was signaled up to this point, stop now. This may include errors signaled in the modules [lr1] and [invariant] by calling the function [Error.grammar_warning]. *) let () = if Error.errors() then exit 1 menhir-20151112/src/nonTerminalDefinitionInlining.mli0000644000175000017500000000047312621170073021454 0ustar mehdimehdi(** [inline g] traverses the rules of [g] and inlines the non terminal definitions that are marked with [%inline]. It returns a pair of the transformed grammar and a flag that tells whether any inlining was actually done. *) val inline: UnparameterizedSyntax.grammar -> UnparameterizedSyntax.grammar * bool menhir-20151112/src/concreteSyntax.mli0000644000175000017500000000037412621170073016476 0ustar mehdimehditype grammar = { pg_filename : Syntax.filename; pg_declarations : (Syntax.declaration Positions.located) list; pg_rules : Syntax.parameterized_rule list; pg_trailer : Syntax.trailer option; } menhir-20151112/src/LRijkstra.mli0000644000175000017500000000122612621170073015367 0ustar mehdimehdi(* The purpose of this algorithm is to find, for each pair of a state [s] and a terminal symbol [z] such that looking at [z] in state [s] causes an error, a minimal path (starting in some initial state) that actually triggers this error. *) (* The result of this analysis is a [.messages] file. It is written to the standard output channel. No result is returned. *) module Run (X : sig (* If [verbose] is set, produce various messages on [stderr]. *) val verbose: bool (* If [statistics] is defined, it is interpreted as the name of a file to which one line of statistics is appended. *) val statistics: string option end) : sig end menhir-20151112/src/DependencyGraph.mli0000644000175000017500000000035412621170073016523 0ustar mehdimehdi(* Build and print the forward reference graph of the grammar. There is an edge of a nonterminal symbol [nt1] to every nonterminal symbol [nt2] that occurs in the definition of [nt1]. *) val print_dependency_graph: unit -> unit menhir-20151112/src/keyword.mli0000644000175000017500000000315112621170073015145 0ustar mehdimehdi(* This module provides some type and function definitions that help deal with the keywords that we recognize within semantic actions. *) (* The user can request position information either at type [int] (a simple offset) or at type [Lexing.position]. *) type flavor = | FlavorOffset | FlavorPosition (* The user can request position information about the $start or $end of a symbol. Also, $symbolstart requests the computation of the start position of the first nonempty element in a production. *) type where = | WhereSymbolStart | WhereStart | WhereEnd (* The user can request position information about a production's left-hand side or about one of the symbols in its right-hand side, which he must refer to by name. (Referring to its symbol by its position, using [$i], is permitted in the concrete syntax, but the lexer eliminates this form.) We add a new subject, [Before], which corresponds to [$endpos($0)] in concrete syntax. We adopt the (slightly awkward) convention that when the subject is [Before], the [where] component must be [WhereEnd]. *) type subject = | Before | Left | RightNamed of string (* Keywords inside semantic actions. They allow access to semantic values or to position information. *) type keyword = | Position of subject * where * flavor | SyntaxError (* This maps a [Position] keyword to the name of the variable that the keyword is replaced with. *) val posvar: subject -> where -> flavor -> string (* Sets of keywords. *) module KeywordSet : sig include Set.S with type elt = keyword val map: (keyword -> keyword) -> t -> t end menhir-20151112/src/partialGrammar.mli0000644000175000017500000000012512621170073016422 0ustar mehdimehdival join_partial_grammars : ConcreteSyntax.grammar list -> InternalSyntax.grammar menhir-20151112/src/codePieces.mli0000644000175000017500000000643312621170073015532 0ustar mehdimehdi(* This module defines many internal naming conventions for use by the two code generators, [CodeBackend] and [TableBackend]. It also offers a few code generation facilities. *) open IL open Grammar (* ------------------------------------------------------------------------ *) (* Naming conventions. *) (* The type variable associated with a nonterminal [nt]. *) val ntvar : Nonterminal.t -> string (* The variable that holds the environment. This is a parameter to all functions. We do not make it a global variable because we wish to preserve re-entrancy. *) val env : string (* A variable used to hold a semantic value. *) val semv : string (* A variable used to hold a stack. *) val stack: string (* A variable used to hold a state. *) val state: string (* A variable used to hold a token. *) val token: string (* Variables used to hold start and end positions. *) val beforeendp: string val startp: string val endp: string (* ------------------------------------------------------------------------ *) (* Types for semantic values. *) (* [semvtypent nt] is the type of the semantic value associated with nonterminal [nt]. *) val semvtypent : Nonterminal.t -> typ (* [semvtypetok tok] is the type of the semantic value associated with token [tok]. There is no such type if the token does not have a semantic value. *) val semvtypetok : Terminal.t -> typ list (* [semvtype symbol] is the type of the semantic value associated with [symbol]. *) val semvtype : Symbol.t -> typ list (* [symvalt] returns the empty list if the symbol at hand carries no semantic value and the singleton list [[f t]] if it carries a semantic value of type [t]. *) val symvalt : Symbol.t -> (typ -> 'a) -> 'a list (* [symval symbol x] returns either the empty list or the singleton list [[x]], depending on whether [symbol] carries a semantic value. *) val symval : Symbol.t -> 'a -> 'a list (* [tokval] is a version of [symval], specialized for terminal symbols. *) val tokval : Terminal.t -> 'a -> 'a list (* ------------------------------------------------------------------------ *) (* Patterns for tokens. *) (* [tokpat tok] is a pattern that matches the token [tok], without binding its semantic value. *) val tokpat: Terminal.t -> pattern (* [tokpatv tok] is a pattern that matches the token [tok], and binds its semantic value, if it has one, to the variable [semv]. *) val tokpatv: Terminal.t -> pattern (* [tokspat toks] is a pattern that matches any token in the set [toks], without binding its semantic value. *) val tokspat: TerminalSet.t -> pattern (* [destructuretokendef name codomain bindsemv branch] generates the definition of a function that destructure tokens. [name] is the name of the function that is generated. [codomain] is its return type. [bindsemv] tells whether the variable [semv] should be bound. [branch] is applied to each (non-pseudo) terminal and must produce code for each branch. *) val destructuretokendef: string -> typ -> bool -> (Terminal.t -> expr) -> valdef (* ------------------------------------------------------------------------ *) (* A global variable holds the exception [Error]. *) (* The definition of this global variable. *) val excvaldef: valdef (* A reference to this global variable. *) val errorval: expr menhir-20151112/src/reachability.mli0000644000175000017500000000034712621170073016125 0ustar mehdimehdi(* This extremely simple analysis restricts a grammar to the set of nonterminals that are reachable, via productions, from the start nonterminals. *) val trim: UnparameterizedSyntax.grammar -> UnparameterizedSyntax.grammar menhir-20151112/src/referenceInterpreter.ml0000644000175000017500000002324312621170073017476 0ustar mehdimehdiopen Grammar open Cst (* ------------------------------------------------------------------------ *) (* Set up all of the information required by the LR engine. Everything is read directly from [Grammar] and [Lr1]. *) module T = struct type state = Lr1.node let number = Lr1.number type token = Terminal.t type terminal = Terminal.t type semantic_value = cst let token2terminal (token : token) : terminal = token let token2value (token : token) : semantic_value = CstTerminal token let error_terminal = Terminal.error let error_value = CstError type production = Production.index let default_reduction (s : state) defred nodefred env = match Invariant.has_default_reduction s with | Some (prod, _) -> defred env prod | None -> nodefred env let action (s : state) (tok : terminal) value shift reduce fail env = (* Check whether [s] has an outgoing shift transition along [tok]. *) try let s' : state = SymbolMap.find (Symbol.T tok) (Lr1.transitions s) in (* There is such a transition. Return either [ShiftDiscard] or [ShiftNoDiscard], depending on the existence of a default reduction on [#] at [s']. *) match Invariant.has_default_reduction s' with | Some (_, toks) when TerminalSet.mem Terminal.sharp toks -> shift env false tok value s' | _ -> shift env true tok value s' (* There is no such transition. Look for a reduction. *) with Not_found -> try let prod = Misc.single (TerminalMap.find tok (Lr1.reductions s)) in reduce env prod (* There is no reduction either. Fail. *) with Not_found -> fail env let goto (s : state) (prod : production) : state = try SymbolMap.find (Symbol.N (Production.nt prod)) (Lr1.transitions s) with Not_found -> assert false open MenhirLib.EngineTypes exception Error (* By convention, a semantic action returns a new stack. It does not affect [env]. *) let is_start = Production.is_start type semantic_action = (state, semantic_value, token) env -> (state, semantic_value) stack let semantic_action (prod : production) : semantic_action = fun env -> assert (not (Production.is_start prod)); (* Reduce. Pop a suffix of the stack, and use it to construct a new concrete syntax tree node. *) let n = Production.length prod in let values : semantic_value array = Array.make n CstError (* dummy *) and startp = ref Lexing.dummy_pos and endp= ref Lexing.dummy_pos and current = ref env.current and stack = ref env.stack in (* We now enter a loop to pop [k] stack cells and (after that) push a new cell onto the stack. *) (* This loop does not update [env.current]. Instead, the state in the newly pushed stack cell will be used (by our caller) as a basis for a goto transition, and [env.current] will be updated (if necessary) then. *) for k = n downto 1 do (* Fetch a semantic value. *) values.(k - 1) <- !stack.semv; (* Pop one cell. The stack must be non-empty. As we pop a cell, change the automaton's current state to the one stored within the cell. (It is sufficient to do this only when [k] is 1, since the last write overwrites any and all previous writes.) If this is the first (last) cell that we pop, update [endp] ([startp]). *) let next = !stack.next in assert (!stack != next); if k = n then begin endp := !stack.endp end; if k = 1 then begin current := !stack.state; startp := !stack.startp end; stack := next done; (* Done popping. *) (* Construct and push a new stack cell. The associated semantic value is a new concrete syntax tree. *) { state = !current; semv = CstNonTerminal (prod, values); startp = !startp; endp = !endp; next = !stack } (* The logging functions that follow are called only if [log] is [true]. *) module Log = struct open Printf let state s = fprintf stderr "State %d:" (Lr1.number s); prerr_newline() let shift tok s' = fprintf stderr "Shifting (%s) to state %d" (Terminal.print tok) (Lr1.number s'); prerr_newline() let reduce_or_accept prod = match Production.classify prod with | Some _ -> fprintf stderr "Accepting"; prerr_newline() | None -> fprintf stderr "Reducing production %s" (Production.print prod); prerr_newline() let lookahead_token tok startp endp = fprintf stderr "Lookahead token is now %s (%d-%d)" (Terminal.print tok) startp.Lexing.pos_cnum endp.Lexing.pos_cnum; prerr_newline() let initiating_error_handling () = fprintf stderr "Initiating error handling"; prerr_newline() let resuming_error_handling () = fprintf stderr "Resuming error handling"; prerr_newline() let handling_error s = fprintf stderr "Handling error in state %d" (Lr1.number s); prerr_newline() end end (* ------------------------------------------------------------------------ *) (* Define a palatable user entry point. *) let interpret log nt lexer lexbuf = (* Instantiate the LR engine. *) let module E = MenhirLib.Engine.Make (struct include T let log = log end) in (* Run it. *) try Some (E.entry (Lr1.entry_of_nt nt) lexer lexbuf) with T.Error -> None (* ------------------------------------------------------------------------ *) (* Another entry point, used internally by [LRijkstra] to check that the sentences that [LRijkstra] produces do lead to an error in the expected state. *) open MenhirLib.General (* streams *) type spurious_reduction = Lr1.node * Production.index type target = Lr1.node * spurious_reduction list type check_error_path_outcome = (* Bad: the input was read past its end. *) | OInputReadPastEnd (* Bad: a syntax error occurred before all of the input was read. *) | OInputNotFullyConsumed (* Bad: the parser unexpectedly accepted (part of) this input. *) | OUnexpectedAccept (* Good: a syntax error occurred after reading the last input token. We report in which state the error took place, as well as a list of spurious reductions. A non-default reduction that takes place after looking at the last input token (i.e., the erroneous token) is spurious. Furthermore, any reduction that takes place after a spurious reduction is itself spurious. We note that a spurious reduction can take place only in a non-canonical LR automaton. *) | OK of target let check_error_path log nt input = (* Instantiate the LR engine. *) let module E = MenhirLib.Engine.Make (struct include T let log = log end) in (* Determine the initial state. *) let entry = Lr1.entry_of_nt nt in (* This function helps extract the current parser state out of [env]. It may become unnecessary if the [Engine] API offers it. *) let current env = (* Peek at the stack. If empty, then we must be in the initial state. *) match Lazy.force (E.stack env) with | Nil -> entry | Cons (E.Element (s, _, _, _), _) -> s in (* Set up a function that delivers tokens one by one. *) let input = ref input in let next () = match !input with | [] -> None | t :: ts -> input := ts; Some t in let looking_at_last_token () : bool = !input = [] in (* Run. We wish to stop at the first error (without handling the error in any way) and report in which state the error occurred. A clean way of doing this is to use the incremental API, as follows. The main loop resembles the [loop] function in [Engine]. *) (* Another reason why we write our own loop is that we wish to detect spurious reductions. We accumulate these reductions in [spurious], a (reversed) list of productions. *) let rec loop (checkpoint : cst E.checkpoint) (spurious : spurious_reduction list) = match checkpoint with | E.InputNeeded _ -> begin match next() with | None -> OInputReadPastEnd | Some t -> loop (E.offer checkpoint (t, Lexing.dummy_pos, Lexing.dummy_pos)) spurious end | E.Shifting _ -> loop (E.resume checkpoint) spurious | E.AboutToReduce (env, prod) -> (* If we have requested the last input token and if this is not a default reduction, then this is a spurious reduction. Furthermore, if a spurious reduction has taken place already, then this is also a spurious reduction. *) let spurious = if looking_at_last_token() && not (E.has_default_reduction env) || spurious <> [] then (current env, prod) :: spurious else spurious in loop (E.resume checkpoint) spurious | E.HandlingError env -> (* Check that all of the input has been read. Otherwise, the error has occurred sooner than expected. *) if !input = [] then (* Return the current state and the list of spurious reductions. *) OK (current env, List.rev spurious) else OInputNotFullyConsumed | E.Accepted _ -> (* The parser has succeeded. This is unexpected. *) OUnexpectedAccept | E.Rejected -> (* The parser rejects this input. This should not happen; we should observe [HandlingError _] first. *) assert false in loop (E.start entry Lexing.dummy_pos) [] menhir-20151112/src/compressedBitSet.ml0000644000175000017500000001044412621170073016572 0ustar mehdimehdi(* A compressed (or should we say sparse?) bit set is a list of pairs of integers. The first component of every pair is an index, while the second component is a bit field. The list is sorted by order of increasing indices. *) type t = | N | C of int * int * t type element = int let word_size = Sys.word_size - 1 let empty = N let is_empty = function | N -> true | C _ -> false let add i s = let ioffset = i mod word_size in let iaddr = i - ioffset and imask = 1 lsl ioffset in let rec add = function | N -> (* Insert at end. *) C (iaddr, imask, N) | C (addr, ss, qs) as s -> if iaddr < addr then (* Insert in front. *) C (iaddr, imask, s) else if iaddr = addr then (* Found appropriate cell, update bit field. *) let ss' = ss lor imask in if ss' = ss then s else C (addr, ss', qs) else (* Not there yet, continue. *) let qs' = add qs in if qs == qs' then s else C (addr, ss, qs') in add s let singleton i = add i N let remove i s = let ioffset = i mod word_size in let iaddr = i - ioffset and imask = 1 lsl ioffset in let rec remove = function | N -> N | C (addr, ss, qs) as s -> if iaddr < addr then s else if iaddr = addr then (* Found appropriate cell, update bit field. *) let ss' = ss land (lnot imask) in if ss' = 0 then qs else if ss' = ss then s else C (addr, ss', qs) else (* Not there yet, continue. *) let qs' = remove qs in if qs == qs' then s else C (addr, ss, qs') in remove s let rec fold f s accu = match s with | N -> accu | C (base, ss, qs) -> loop f qs base ss accu and loop f qs i ss accu = if ss = 0 then fold f qs accu else (* One could in principle check whether [ss land 0x3] is zero and if so move to [i + 2] and [ss lsr 2], and similarly for various sizes. In practice, this does not seem to make a measurable difference. *) loop f qs (i + 1) (ss lsr 1) (if ss land 1 = 1 then f i accu else accu) let iter f s = fold (fun x () -> f x) s () let is_singleton s = match s with | C (_, ss, N) -> (* Test whether only one bit is set in [ss]. We do this by turning off the rightmost bit, then comparing to zero. *) ss land (ss - 1) = 0 | C (_, _, C _) | N -> false let cardinal s = fold (fun _ m -> m + 1) s 0 let elements s = fold (fun tl hd -> tl :: hd) s [] let rec subset s1 s2 = match s1, s2 with | N, _ -> true | _, N -> false | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then false else if addr1 = addr2 then if (ss1 land ss2) <> ss1 then false else subset qs1 qs2 else subset s1 qs2 let mem i s = subset (singleton i) s let rec union s1 s2 = match s1, s2 with | N, s | s, N -> s | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then C (addr1, ss1, union qs1 s2) else if addr1 > addr2 then let s = union s1 qs2 in if s == qs2 then s2 else C (addr2, ss2, s) else let ss = ss1 lor ss2 in let s = union qs1 qs2 in if ss == ss2 && s == qs2 then s2 else C (addr1, ss, s) let rec inter s1 s2 = match s1, s2 with | N, _ | _, N -> N | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then inter qs1 s2 else if addr1 > addr2 then inter s1 qs2 else let ss = ss1 land ss2 in let s = inter qs1 qs2 in if ss = 0 then s else if (ss = ss1) && (s == qs1) then s1 else C (addr1, ss, s) exception Found of int let choose s = try iter (fun x -> raise (Found x) ) s; raise Not_found with Found x -> x let rec compare s1 s2 = match s1, s2 with N, N -> 0 | _, N -> 1 | N, _ -> -1 | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then -1 else if addr1 > addr2 then 1 else if ss1 < ss2 then -1 else if ss1 > ss2 then 1 else compare qs1 qs2 let equal s1 s2 = compare s1 s2 = 0 let rec disjoint s1 s2 = match s1, s2 with | N, _ | _, N -> true | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 = addr2 then if (ss1 land ss2) = 0 then disjoint qs1 qs2 else false else if addr1 < addr2 then disjoint qs1 s2 else disjoint s1 qs2 menhir-20151112/src/Boolean.ml0000644000175000017500000000030312621170073014663 0ustar mehdimehdi(* The Boolean lattice. *) type property = bool let bottom = false let equal (b1 : bool) (b2 : bool) = b1 = b2 let is_maximal b = b let union (b1 : bool) (b2 : bool) = b1 || b2 menhir-20151112/src/resizableArray.ml0000644000175000017500000000566012621170073016276 0ustar mehdimehdi(* This module implements resizable arrays, that is, arrays that can grow upon explicit request. *) type 'a t = { (* The default element is used to fill empty slots when growing or shrinking the physical array. *) default: 'a; (* The init function is used to initialize newly allocated slots when growing the logical array. *) init: int -> 'a; (* The logical size of this array. *) mutable size: int; (* The physical array, whose length is at least [size]. *) mutable table: 'a array } let make capacity default init = (* [capacity] must be nonzero, so that doubling it actually enlarges the array. *) assert (capacity >= 0); let capacity = if capacity = 0 then 1 else capacity in let table = Array.make capacity default in { default; init; size = 0; table } let make_ capacity default = make capacity default (fun _ -> default) let length a = a.size let get a i = assert (0 <= i && i < a.size); Array.unsafe_get a.table (i) let set a i x = assert (0 <= i && i < a.size); Array.unsafe_set a.table (i) x let shrink a s = (* This is [resize a s], assuming [0 <= s < a.size]. *) Array.fill a.table s (a.size - s) a.default; a.size <- s let grow a s = (* This is [resize a s], assuming [0 <= s && a.size < s]. *) let n = Array.length a.table in if s > n then begin (* The physical size of the array must increase. The new size is at least double of the previous size, and larger if requested. *) let table = Array.make (max (2 * n) s) a.default in Array.blit a.table 0 table 0 n; a.table <- table end; (* From [a.size] to [s], we have new logical slots. Initialize them. *) let init = a.init and table = a.table in for i = a.size to s - 1 do Array.unsafe_set table i (init i) done; (* Update the array's logical size. *) a.size <- s let resize a s = assert (0 <= s); if s < a.size then shrink a s else if s > a.size then grow a s let push a x = let s = a.size in (* equivalent to: [length a] *) begin (* equivalent to: [resize a (s + 1)] *) let s = s + 1 in let n = Array.length a.table in if s > n then begin (* assert (s = n + 1); *) (* assert (max (2 * n) s = 2 * n); *) let table = Array.make (2 * n) a.default in Array.blit a.table 0 table 0 n; a.table <- table end; (* No need to call [init], since there is just one new logical slot and we are about to write it anyway. *) a.size <- s end; Array.unsafe_set a.table (s) x (* equivalent to: [set a s x] *) let pop a = let s = a.size in (* equivalent to: [length a] *) assert (s > 0); let s = s - 1 in a.size <- s; let table = a.table in let x = Array.unsafe_get table (s) in (* equivalent to: [get a s] *) Array.unsafe_set table (s) a.default; (* equivalent to: [resize a s] *) x let default a = a.default menhir-20151112/src/inliner.ml0000644000175000017500000001777612621170073014772 0ustar mehdimehdiopen IL open CodeBits (* In the following, we only inline global functions. In order to avoid unintended capture, as we traverse terms, we keep track of local identifiers that hide global ones. The following little class helps do that. (The pathological case where a local binding hides a global one probably does not arise very often. Fortunately, checking against it in this way is quite cheap, and lets me sleep safely.) *) class locals table = object method pvar (locals : StringSet.t) (id : string) = if Hashtbl.mem table id then StringSet.add id locals else locals end (* Here is the inliner. *) let inline_valdefs (defs : valdef list) : valdef list = (* Create a table of all global definitions. *) let before, table = Traverse.tabulate_defs defs in (* Prepare to count how many times each function is used, including inside its own definition. The public functions serve as starting points for this discovery phase. *) let queue : valdef Queue.t = Queue.create() and usage : int StringMap.t ref = ref StringMap.empty in (* [visit] is called at every identifier occurrence. *) let visit locals id = if StringSet.mem id locals then (* This is a local identifier. Do nothing. *) () else try let _, def = Hashtbl.find table id in (* This is a globally defined identifier. Increment its usage count. If it was never visited, enqueue its definition for exploration. *) let n = try StringMap.find id !usage with Not_found -> Queue.add def queue; 0 in usage := StringMap.add id (n + 1) !usage with Not_found -> (* This identifier is not global. It is either local or a reference to some external library, e.g. ocaml's standard library. *) () in (* Look for occurrences of identifiers inside expressions. *) let o = object inherit [ StringSet.t, unit ] Traverse.fold inherit locals table method! evar locals () id = visit locals id end in (* Initialize the queue with all public definitions, and work from there. We assume that the left-hand side of every definition is a variable. *) List.iter (fun { valpublic = public; valpat = p } -> if public then visit StringSet.empty (pat2var p) ) defs; Misc.qfold (o#valdef StringSet.empty) () queue; let usage = !usage in (* Now, inline every function that is called at most once. At the same time, every function that is never called is dropped. The public functions again serve as starting points for the traversal. *) let queue : valdef Queue.t = Queue.create() and emitted = ref StringSet.empty in let enqueue def = let id = pat2var def.valpat in if not (StringSet.mem id !emitted) then begin emitted := StringSet.add id !emitted; Queue.add def queue end in (* A simple application is an application of a variable to a number of variables, constants, or record accesses out of variables. *) let rec is_simple_arg = function | EVar _ | EData (_, []) | ERecordAccess (EVar _, _) -> true | EMagic e -> is_simple_arg e | _ -> false in let is_simple_app = function | EApp (EVar _, actuals) -> List.for_all is_simple_arg actuals | _ -> false in (* Taking a fresh instance of a type scheme. Ugly. *) let instance = let count = ref 0 in let fresh tv = incr count; tv, Printf.sprintf "freshtv%d" !count in fun scheme -> let mapping = List.map fresh scheme.quantifiers in let rec sub typ = match typ with | TypTextual _ -> typ | TypVar v -> begin try TypVar (List.assoc v mapping) with Not_found -> typ end | TypApp (f, typs) -> TypApp (f, List.map sub typs) | TypTuple typs -> TypTuple (List.map sub typs) | TypArrow (typ1, typ2) -> TypArrow (sub typ1, sub typ2) in sub scheme.body in (* Destructuring a type annotation. *) let rec annotate formals body typ = match formals, typ with | [], _ -> [], EAnnot (body, type2scheme typ) | formal :: formals, TypArrow (targ, tres) -> let formals, body = annotate formals body tres in PAnnot (formal, targ) :: formals, body | _ :: _, _ -> (* Type annotation has insufficient arity. *) assert false in (* The heart of the inliner: rewriting a function call to a [let] expression. If there was a type annotation at the function definition site, it is dropped, provided [--infer] was enabled. Otherwise, it is kept, because, due to the presence of [EMagic] expressions in the code, dropping a type annotation could cause an ill-typed program to become apparently well-typed. Keeping a type annotation requires taking a fresh instance of the type scheme, because OCaml doesn't have support for locally and existentially bound type variables. Yuck. *) let inline formals actuals body oscheme = assert (List.length actuals = List.length formals); match oscheme with | Some scheme when not Settings.infer -> let formals, body = annotate formals body (instance scheme) in mlet formals actuals body | _ -> mlet formals actuals body in (* Look for occurrences of identifiers inside expressions, branches, etc. and replace them with their definitions if they have only one use site or if their definitions are sufficiently simple. *) let o = object (self) inherit [ StringSet.t ] Traverse.map as super inherit locals table method! eapp locals e actuals = match e with | EVar id when (Hashtbl.mem table id) && (* a global identifier *) (not (StringSet.mem id locals)) (* not hidden by a local identifier *) -> let _, def = Hashtbl.find table id in (* cannot fail, thanks to the above check *) let formals, body, oscheme = match def with | { valval = EFun (formals, body) } -> formals, body, None | { valval = EAnnot (EFun (formals, body), scheme) } -> formals, body, Some scheme | { valval = _ } -> (* The definition is not a function definition. This should not happen in the kind of code that we generate. *) assert false in assert (StringMap.mem id usage); if StringMap.find id usage = 1 || is_simple_app body then (* The definition can be inlined, with beta reduction. *) inline formals (self#exprs locals actuals) (EComment (id, self#expr locals body)) oscheme else begin (* The definition cannot be inlined. *) enqueue def; super#eapp locals e actuals end | _ -> (* The thing in function position is not a reference to a global. *) super#eapp locals e actuals end in (* Initialize the queue with all public definitions, and work from there. *) List.iter (function { valpublic = public } as def -> if public then enqueue def ) defs; let valdefs = Misc.qfold (fun defs def -> o#valdef StringSet.empty def :: defs ) [] queue in Error.logC 1 (fun f -> Printf.fprintf f "%d functions before inlining, %d functions after inlining.\n" before (List.length valdefs)); Time.tick "Inlining"; valdefs (* Dumb recursive traversal. *) let rec inline_structure_item item = match item with | SIValDefs (true, defs) -> (* A nest of recursive definitions. Act on it. *) SIValDefs (true, inline_valdefs defs) | SIFunctor (params, s) -> SIFunctor (params, inline_structure s) | SIModuleDef (name, e) -> SIModuleDef (name, inline_modexpr e) | SIInclude e -> SIInclude (inline_modexpr e) | SIExcDefs _ | SITypeDefs _ | SIValDefs (false, _) | SIStretch _ | SIComment _ -> item and inline_structure s = List.map inline_structure_item s and inline_modexpr = function | MVar x -> MVar x | MStruct s -> MStruct (inline_structure s) | MApp (e1, e2) -> MApp (inline_modexpr e1, inline_modexpr e2) (* The external entry point. *) let inline (p : program) : program = if Settings.code_inlining then inline_structure p else p menhir-20151112/src/positions.mli0000644000175000017500000000665212621170073015521 0ustar mehdimehdi(* TEMPORARY faire un peu le ménage dans cette interface pléthorique? *) (** Extension of standard library's positions. *) (** {2 Extended lexing positions} *) (** Abstract type for pairs of positions in the lexing stream. *) type t (** Decoration of a value with a position. *) type 'a located = { value : 'a; position : t; } (** [value dv] returns the raw value that underlies the decorated value [dv]. *) val value: 'a located -> 'a (** [position dv] returns the position that decorates the decorated value [dv]. *) val position: 'a located -> t (** [with_pos p v] decorates [v] with a position [p]. *) val with_pos : t -> 'a -> 'a located val with_cpos: Lexing.lexbuf -> 'a -> 'a located val with_poss : Lexing.position -> Lexing.position -> 'a -> 'a located val unknown_pos : 'a -> 'a located (** [map f v] extends the decoration from [v] to [f v]. *) val map: ('a -> 'b) -> 'a located -> 'b located (** [iter f dv] applies [f] to the value inside [dv]. *) val iter: ('a -> unit) -> 'a located -> unit (** [mapd f v] extends the decoration from [v] to both members of the pair [f v]. *) val mapd: ('a -> 'b1 * 'b2) -> 'a located -> 'b1 located * 'b2 located (** This value is used when an object does not from a particular input location. *) val dummy: t (** {2 Accessors} *) (** [column p] returns the number of characters from the beginning of the line of the Lexing.position [p]. *) val column : Lexing.position -> int (** [column p] returns the line number of to the Lexing.position [p]. *) val line : Lexing.position -> int (** [characters p1 p2] returns the character interval between [p1] and [p2] assuming they are located in the same line. *) val characters : Lexing.position -> Lexing.position -> int * int val start_of_position: t -> Lexing.position val end_of_position: t -> Lexing.position val filename_of_position: t -> string (** {2 Position handling} *) (** [join p1 p2] returns a position that starts where [p1] starts and stops where [p2] stops. *) val join : t -> t -> t val lex_join : Lexing.position -> Lexing.position -> t val ljoinf : ('a -> t) -> 'a list -> t val joinf : ('a -> t) -> 'a -> 'a -> t val join_located : 'a located -> 'b located -> ('a -> 'b -> 'c) -> 'c located val join_located_list : ('a located) list -> ('a list -> 'b list) -> ('b list) located (** [string_of_lex_pos p] returns a string representation for the lexing position [p]. *) val string_of_lex_pos : Lexing.position -> string (** [string_of_pos p] returns the standard (Emacs-like) representation of the position [p]. *) val string_of_pos : t -> string (** [pos_or_undef po] is the identity function except if po = None, in that case, it returns [undefined_position]. *) val pos_or_undef : t option -> t (** {2 Interaction with the lexer runtime} *) (** [cpos lexbuf] returns the current position of the lexer. *) val cpos : Lexing.lexbuf -> t (** [string_of_cpos p] returns a string representation of the lexer's current position. *) val string_of_cpos : Lexing.lexbuf -> string (* The functions that print error messages and warnings require a list of positions. The following auxiliary functions help build such lists. *) type positions = t list val one: Lexing.position -> positions val two: Lexing.position -> Lexing.position -> positions val lexbuf: Lexing.lexbuf -> positions (* Low-level printing function, for debugging. *) val print: Lexing.position -> unit menhir-20151112/src/printer.ml0000644000175000017500000004227512621170073015005 0ustar mehdimehdi(* A pretty-printer for [IL]. *) open IL open Printf module Make (X : sig (* This is the channel that is being written to. *) val f: out_channel (* This controls the way we print Objective Caml stretches (types and semantic actions). We either surround them with #line directives (for better error reports if the generated code is ill - typed) or don't (for better readability). The value is either [None] -- do not provide #line directives -- or [Some filename] -- do provide them. [filename] is the name of the file that is being written to. *) val locate_stretches: string option end) = struct (* ------------------------------------------------------------------------- *) (* Dealing with newlines and indentation. *) let maxindent = 120 let whitespace = String.make maxindent ' ' let indentation = ref 0 let line = ref 1 (* [rawnl] is, in principle, the only place where writing a newline character to the output channel is permitted. This ensures that the line counter remains correct. But see also [stretch] and [typ0]. *) let rawnl f = incr line; output_char f '\n' let nl f = rawnl f; output_substring f whitespace 0 !indentation let indent ofs producer f x = let old_indentation = !indentation in let new_indentation = old_indentation + ofs in if new_indentation <= maxindent then indentation := new_indentation; nl f; producer f x; indentation := old_indentation (* This produces a #line directive. *) let sharp f line file = fprintf f "%t# %d \"%s\"%t" rawnl line file rawnl (* ------------------------------------------------------------------------- *) (* Printers of atomic elements. *) let nothing _ = () let space f = output_char f ' ' let comma f = output_string f ", " let semi f = output_char f ';' let seminl f = semi f; nl f let times f = output_string f " * " let letrec f = output_string f "let rec " let letnonrec f = output_string f "let " let keytyp f = output_string f "type " let exc f = output_string f "exception " let et f = output_string f "and " let var f x = output_string f x let bar f = output_string f " | " (* ------------------------------------------------------------------------- *) (* List printers. *) (* A list with a separator in front of every element. *) let rec list elem sep f = function | [] -> () | e :: es -> fprintf f "%t%a%a" sep elem e (list elem sep) es (* A list with a separator between elements. *) let seplist elem sep f = function | [] -> () | e :: es -> fprintf f "%a%a" elem e (list elem sep) es (* OCaml type parameters. *) let typeparams p0 p1 f = function | [] -> () | [ param ] -> fprintf f "%a " p0 param | _ :: _ as params -> fprintf f "(%a) " (seplist p1 comma) params (* ------------------------------------------------------------------------- *) (* Expression printer. *) (* We use symbolic constants that stand for subsets of the expression constructors. We do not use numeric levels to stand for subsets, because our subsets do not form a linear inclusion chain. *) type subset = | All | AllButSeq | AllButFunTryMatch | AllButFunTryMatchSeq | AllButLetFunTryMatch | AllButLetFunTryMatchSeq | AllButIfThenSeq | OnlyAppOrAtom | OnlyAtom (* This computes the intersection of a subset with the constraint "should not be a sequence". *) let andNotSeq = function | All | AllButSeq -> AllButSeq | AllButFunTryMatch | AllButFunTryMatchSeq -> AllButFunTryMatchSeq | AllButLetFunTryMatch | AllButLetFunTryMatchSeq -> AllButLetFunTryMatchSeq | AllButIfThenSeq -> AllButIfThenSeq | OnlyAppOrAtom -> OnlyAppOrAtom | OnlyAtom -> OnlyAtom (* This defines the semantics of subsets by relating expressions with subsets. *) let rec member e k = match e with | EComment _ | EPatComment _ -> true | EFun _ | ETry _ | EMatch _ -> begin match k with | AllButFunTryMatch | AllButFunTryMatchSeq | AllButLetFunTryMatch | AllButLetFunTryMatchSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | ELet ([], e) -> member e k | ELet ((PUnit, _) :: _, _) -> begin match k with | AllButSeq | AllButFunTryMatchSeq | AllButLetFunTryMatchSeq | AllButIfThenSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | ELet (_ :: _, _) -> begin match k with | AllButLetFunTryMatch | AllButLetFunTryMatchSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | EIfThen _ -> begin match k with | AllButIfThenSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | EApp (_, _ :: _) | EData (_, _ :: _) | EMagic _ | ERepr _ | ERaise _ -> begin match k with | OnlyAtom -> false | _ -> true end | ERecordWrite _ | EIfThenElse _ -> begin match k with | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | EVar _ | ETextual _ | EApp (_, []) | EData (_, []) | ETuple _ | EAnnot _ | ERecord _ | ERecordAccess (_, _) | EIntConst _ | EStringConst _ | EUnit | EArray _ | EArrayAccess (_, _) -> true let rec exprlet k pes f e2 = match pes with | [] -> exprk k f e2 | (PUnit, e1) :: pes -> fprintf f "%a%t%a" (exprk AllButLetFunTryMatch) e1 seminl (exprlet k pes) e2 | (PVar id1, EAnnot (e1, ts1)) :: pes -> (* TEMPORARY current ocaml does not support type schemes here; drop quantifiers, if any *) fprintf f "let %s : %a = %a in%t%a" id1 typ ts1.body (* scheme ts1 *) expr e1 nl (exprlet k pes) e2 | (PVar id1, EFun (ps1, e1)) :: pes -> fprintf f "let %s%a = %a in%t%t%a" id1 (list pat0 space) ps1 (indent 2 expr) e1 nl nl (exprlet k pes) e2 | (p1, (ELet _ as e1)) :: pes -> fprintf f "let %a =%a%tin%t%a" pat p1 (indent 2 expr) e1 nl nl (exprlet k pes) e2 | (p1, e1) :: pes -> fprintf f "let %a = %a in%t%a" pat p1 expr e1 nl (exprlet k pes) e2 and atom f e = exprk OnlyAtom f e and app f e = exprk OnlyAppOrAtom f e and expr f e = exprk All f e and exprk k f e = if member e k then match e with | EComment (c, e) -> if Settings.comment then fprintf f "(* %s *)%t%a" c nl (exprk k) e else exprk k f e | EPatComment (s, p, e) -> if Settings.comment then fprintf f "(* %s%a *)%t%a" s pat p nl (exprk k) e else exprk k f e | ELet (pes, e2) -> exprlet k pes f e2 | ERecordWrite (e1, field, e2) -> fprintf f "%a.%s <- %a" atom e1 field (exprk (andNotSeq k)) e2 | EMatch (_, []) -> assert false | EMatch (e, brs) -> fprintf f "match %a with%a" expr e (branches k) brs | ETry (_, []) -> assert false | ETry (e, brs) -> fprintf f "try%a%twith%a" (indent 2 expr) e nl (branches k) brs | EIfThen (e1, e2) -> fprintf f "if %a then%a" expr e1 (indent 2 (exprk (andNotSeq k))) e2 | EIfThenElse (e0, e1, e2) -> fprintf f "if %a then%a%telse%a" expr e0 (indent 2 (exprk AllButIfThenSeq)) e1 nl (indent 2 (exprk (andNotSeq k))) e2 | EFun (ps, e) -> fprintf f "fun%a ->%a" (list pat0 space) ps (indent 2 (exprk k)) e | EApp (EVar op, [ e1; e2 ]) when op.[0] = '(' && op.[String.length op - 1] = ')' -> let op = String.sub op 1 (String.length op - 2) in fprintf f "%a %s %a" app e1 op app e2 | EApp (e, args) -> fprintf f "%a%a" app e (list atom space) args | ERaise e -> fprintf f "raise %a" atom e | EMagic e -> fprintf f "Obj.magic %a" atom e | ERepr e -> fprintf f "Obj.repr %a" atom e | EData (d, []) -> var f d | EData (d, [ arg ]) -> fprintf f "%s %a" d atom arg | EData ("::", [ arg1; arg2 ]) -> (* Special case for infix cons. *) fprintf f "%a :: %a" atom arg1 atom arg2 | EData (d, (_ :: _ :: _ as args)) -> fprintf f "%s (%a)" d (seplist app comma) args | EVar v -> var f v | ETextual action -> stretch false f action | EUnit -> fprintf f "()" | EIntConst k -> if k >= 0 then fprintf f "%d" k else fprintf f "(%d)" k | EStringConst s -> fprintf f "\"%s\"" (String.escaped s) | ETuple [] -> assert false | ETuple [ e ] -> atom f e | ETuple (_ :: _ :: _ as es) -> fprintf f "(%a)" (seplist app comma) es | EAnnot (e, s) -> (* TEMPORARY current ocaml does not support type schemes here; drop quantifiers, if any *) fprintf f "(%a : %a)" app e typ s.body (* should be scheme s *) | ERecordAccess (e, field) -> fprintf f "%a.%s" atom e field | ERecord fs -> fprintf f "{%a%t}" (indent 2 (seplist field nl)) fs nl | EArray fs -> fprintf f "[|%a%t|]" (indent 2 (seplist array_field nl)) fs nl | EArrayAccess (e, i) -> fprintf f "%a.(%a)" atom e expr i else fprintf f "(%a)" expr e and stretch raw f stretch = let content = stretch.Stretch.stretch_content and raw_content = stretch.Stretch.stretch_raw_content in match X.locate_stretches with | Some basename -> sharp f stretch.Stretch.stretch_linenum stretch.Stretch.stretch_filename; output_string f content; line := !line + stretch.Stretch.stretch_linecount; sharp f (!line + 2) basename; output_substring f whitespace 0 !indentation | None -> output_string f (if raw then raw_content else content) and branches k f = function | [] -> () | [ br ] -> fprintf f "%t| %a" nl (branch k) br | br :: brs -> fprintf f "%t| %a%a" nl (branch AllButFunTryMatch) br (branches k) brs and branch k f br = fprintf f "%a ->%a" pat br.branchpat (indent 4 (exprk k)) br.branchbody and field f (label, e) = fprintf f "%s = %a%t" label app e semi and fpat f (label, p) = fprintf f "%s = %a%t" label pat p semi and array_field f e = fprintf f "%a%t" app e semi and pat0 f = function | PUnit -> fprintf f "()" | PWildcard -> fprintf f "_" | PVar x -> var f x | PData (d, []) -> var f d | PTuple [] -> assert false | PTuple [ p ] -> pat0 f p | PTuple (_ :: _ :: _ as ps) -> fprintf f "(%a)" (seplist pat1 comma) ps | PAnnot (p, t) -> fprintf f "(%a : %a)" pat p typ t | PRecord fps -> (* In a record pattern, fields can be omitted. *) let fps = List.filter (function (_, PWildcard) -> false | _ -> true) fps in fprintf f "{%a%t}" (indent 2 (seplist fpat nl)) fps nl | p -> fprintf f "(%a)" pat p and pat1 f = function | PData (d, [ arg ]) -> fprintf f "%s %a" d pat0 arg | PData (d, (_ :: _ :: _ as args)) -> fprintf f "%s (%a)" d (seplist pat1 comma) args | PTuple [ p ] -> pat1 f p | p -> pat0 f p and pat2 f = function | POr [] -> assert false | POr (_ :: _ as ps) -> seplist pat2 bar f ps | PTuple [ p ] -> pat2 f p | p -> pat1 f p and pat f p = pat2 f p and typevar f = function | "_" -> fprintf f "_" | v -> fprintf f "'%s" v and typ0 f = function | TypTextual (Stretch.Declared ocamltype) -> (* Parentheses are necessary to avoid confusion between 1 - ary data constructor with n arguments and n - ary data constructor. *) fprintf f "(%a)" (stretch true) ocamltype | TypTextual (Stretch.Inferred t) -> line := !line + LineCount.count 0 (Lexing.from_string t); fprintf f "(%s)" t | TypVar v -> typevar f v | TypApp (t, params) -> fprintf f "%a%s" (typeparams typ0 typ) params t | t -> fprintf f "(%a)" typ t and typ1 f = function | TypTuple [] -> assert false | TypTuple (_ :: _ as ts) -> seplist typ0 times f ts | t -> typ0 f t and typ2 f = function | TypArrow (t1, t2) -> fprintf f "%a -> %a" typ1 t1 typ2 t2 | t -> typ1 f t and typ f = typ2 f and scheme f scheme = match scheme.quantifiers with | [] -> typ f scheme.body | qs -> fprintf f "%a. %a" (list typevar space) qs typ scheme.body (* ------------------------------------------------------------------------- *) (* Toplevel definition printer. *) (* The tuple of the arguments of a data constructor. *) let datavalparams f params = (* [typ1] because [type t = A of int -> int ] is not allowed by OCaml *) (* [type t = A of (int -> int)] is allowed *) seplist typ1 times f params (* A data constructor definition. *) let datadef typename f def = fprintf f " | %s" def.dataname; match def.datavalparams, def.datatypeparams with | [], None -> (* | A *) () | _ :: _, None -> (* | A of t * u *) fprintf f " of %a" datavalparams def.datavalparams | [], Some indices -> (* | A : (v, w) ty *) fprintf f " : %a%s" (typeparams typ0 typ) indices typename | _ :: _, Some indices -> (* | A : t * u -> (v, w) ty *) fprintf f " : %a -> %a%s" datavalparams def.datavalparams (typeparams typ0 typ) indices typename let fielddef f def = fprintf f " %s%s: %a" (if def.modifiable then "mutable " else "") def.fieldname scheme def.fieldtype let typerhs typename f = function | TDefRecord [] -> assert false | TDefRecord (_ :: _ as fields) -> fprintf f " = {%t%a%t}" nl (seplist fielddef seminl) fields nl | TDefSum [] -> () | TDefSum defs -> fprintf f " = %a" (list (datadef typename) nl) defs | TAbbrev t -> fprintf f " = %a" typ t let typeconstraint f = function | None -> () | Some (t1, t2) -> fprintf f "%tconstraint %a = %a" nl typ t1 typ t2 let typedef f def = fprintf f "%a%s%a%a" (typeparams typevar typevar) def.typeparams def.typename (typerhs def.typename) def.typerhs typeconstraint def.typeconstraint let rec pdefs pdef sep1 sep2 f = function | [] -> () | [ def ] -> fprintf f "%t%a" sep1 pdef def | def :: defs -> fprintf f "%t%a%t%t%a" sep1 pdef def (* Separate two successive items with two newlines. *) nl nl (pdefs pdef sep2 sep2) defs let valdef f = function | { valpat = PVar id; valval = EAnnot (e, ts) } -> (* TEMPORARY current ocaml does not support type schemes here; drop quantifiers, if any *) fprintf f "%s : %a =%a" id typ ts.body (* scheme ts *) (indent 2 expr) e | { valpat = p; valval = e } -> fprintf f "%a =%a" pat p (indent 2 expr) e let valdefs recursive = pdefs valdef (if recursive then letrec else letnonrec) et let typedefs = pdefs typedef keytyp et let excdef in_intf f def = match in_intf, def.exceq with | _, None | true, Some _ -> fprintf f "%s" def.excname | false, Some s -> fprintf f "%s = %s" def.excname s let excdefs in_intf = pdefs (excdef in_intf) exc exc let block format body f b = fprintf f format (fun f b -> indent 2 body f b; nl f ) b (* Convention: each structure (or interface) item prints a newline before and after itself. *) let rec structure_item f item = match item with | SIFunctor ([], s) -> structure f s | SIStretch stretches -> List.iter (stretch false f) stretches | _ -> nl f; begin match item with | SIFunctor (params, s) -> fprintf f "module Make%a%t= %a" (list (stretch false) nl) params nl structend s | SIExcDefs defs -> excdefs false f defs | SITypeDefs defs -> typedefs f defs | SIValDefs (recursive, defs) -> valdefs recursive f defs | SIStretch _ -> assert false (* already handled above *) | SIModuleDef (name, rhs) -> fprintf f "module %s = %a" name modexpr rhs | SIInclude e -> fprintf f "include %a" modexpr e | SIComment comment -> fprintf f "(* %s *)" comment end; nl f and structend f s = block "struct%aend" structure f s and structure f s = list structure_item nothing f s and modexpr f = function | MVar x -> fprintf f "%s" x | MStruct s -> structend f s | MApp (e1, e2) -> fprintf f "%a (%a)" modexpr e1 modexpr e2 let valdecl f (x, ts) = fprintf f "val %s: %a" x typ ts.body let with_kind f = function | WKNonDestructive -> output_string f "=" | WKDestructive -> output_string f ":=" let rec module_type f = function | MTNamedModuleType s -> output_string f s | MTWithType (mt, params, name, wk, t) -> fprintf f "%a%a" module_type mt (indent 2 with_type) (params, name, wk, t) | MTSigEnd i -> sigend f i and with_type f (params, name, wk, t) = fprintf f "with type %a %a %a" typ (TypApp (name, List.map (fun v -> TypVar v) params)) with_kind wk typ t and interface_item f item = match item with | IIFunctor ([], i) -> interface f i | _ -> nl f; begin match item with | IIFunctor (params, i) -> fprintf f "module Make%a%t: %a" (list (stretch false) nl) params nl sigend i | IIExcDecls defs -> excdefs true f defs | IITypeDecls defs -> typedefs f defs | IIValDecls decls -> pdefs valdecl nothing nothing f decls | IIInclude mt -> fprintf f "include %a" module_type mt | IIModule (name, mt) -> fprintf f "module %s : %a" name module_type mt | IIComment comment -> fprintf f "(* %s *)" comment end; nl f and sigend f i = block "sig%aend" interface f i and interface f i = list interface_item nothing f i let program s = structure X.f s; flush X.f let interface i = interface X.f i; flush X.f let expr e = expr X.f e; flush X.f end menhir-20151112/src/codeBits.mli0000644000175000017500000000413312621170073015216 0ustar mehdimehdi(* This module provides a number of tiny functions that help produce [IL] code. *) open IL (* A list subject to a condition. (Be careful, though: the list is of course constructed even if the condition is false.) *) val listif: bool -> 'a list -> 'a list val elementif: bool -> 'a -> 'a list (* A lazy version of [listif], where the list is constructed only if the condition is true. *) val listiflazy: bool -> (unit -> 'a list) -> 'a list (* Standard types. *) val tunit: typ val tbool: typ val tint: typ val tstring: typ val texn: typ val tposition: typ val tlexbuf: typ val tobj : typ (* Building a type variable. *) val tvar: string -> typ (* Building a type scheme. *) val scheme: string list -> typ -> typescheme val type2scheme: typ -> typescheme (* Projecting out of a [PVar] pattern. *) val pat2var: pattern -> string (* Building a [let] construct, with on-the-fly simplification. *) val blet: (pattern * expr) list * expr -> expr val mlet: pattern list -> expr list -> expr -> expr (* [eraisenotfound] is an expression that raises [Not_found]. *) val eraisenotfound: expr (* [bottom] is an expression that has every type. Its semantics is irrelevant. *) val bottom: expr (* Boolean constants. *) val etrue: expr val efalse: expr val eboolconst: bool -> expr (* Option constructors. *) val enone: expr val esome: expr -> expr (* List constructors. *) val elist: expr list -> expr (* Integer constants as patterns. *) val pint: int -> pattern (* These help build function types. *) val arrow: typ -> typ -> typ val arrowif: bool -> typ -> typ -> typ val marrow: typ list -> typ -> typ (* These functions are used to generate names in menhir's namespace. *) val prefix: string -> string val dataprefix: string -> string val tvprefix: string -> string (* Converting an interface to a structure. Only exception and type definitions go through. *) val interface_to_structure: interface -> structure (* Constructing a named module type together with a list of "with type" constraints. *) val with_types: IL.with_kind -> string -> (string list * string * IL.typ) list -> IL.module_type menhir-20151112/src/MySet.mli0000644000175000017500000000107512621170073014525 0ustar mehdimehdi(* This is a stripped-down copy of the [Set] module from OCaml's standard library. The only difference is that [add x t] guarantees that it returns [t] (physically unchanged) if [x] is already a member of [t]. This yields fewer memory allocations and an easy way of testing whether the element was already present in the set before it was added. *) module Make (Ord: Map.OrderedType) : sig type elt = Ord.t type t val empty: t val add: elt -> t -> t val find: elt -> t -> elt (* may raise [Not_found] *) val iter: (elt -> unit) -> t -> unit end menhir-20151112/src/lr0.mli0000644000175000017500000000733112621170073014162 0ustar mehdimehdiopen Grammar (* This module builds the LR(0) automaton associated with the grammar, then provides access to it. It also provides facilities for efficiently performing LR(1) constructions. *) (* ------------------------------------------------------------------------ *) (* The LR(0) automaton. *) (* The nodes of the LR(0) automaton are numbered. *) type node = int (* This is the number of nodes in the LR(0) automaton. *) val n: int (* These are the automaton's entry states, indexed by the start productions. *) val entry: node ProductionMap.t (* A node can be converted to the underlying LR(0) set of items. This set is not closed. *) val items: node -> Item.Set.t (* The incoming symbol of an LR(0) node is the symbol carried by all of the edges that enter this node. A node has zero incoming edges (and, thus, no incoming symbol) if and only if it is a start node.. *) val incoming_symbol: node -> Symbol.t option (* ------------------------------------------------------------------------ *) (* Help for building the LR(1) automaton. *) (* An LR(1) state is internally represented as a pair of an LR(0) state number and an array of concrete lookahead sets (whose length depends on the LR(0) state). *) type lr1state (* An encoded LR(1) state can be turned into a concrete representation, that is, a mapping of items to concrete lookahead sets. *) type concretelr1state = TerminalSet.t Item.Map.t val export: lr1state -> concretelr1state (* One can take the closure of a concrete LR(1) state. *) val closure: concretelr1state -> concretelr1state (* The core of an LR(1) state is the underlying LR(0) state. *) val core: lr1state -> node (* One can create an LR(1) start state out of an LR(0) start node. *) val start: node -> lr1state (* Information about the transitions and reductions at a state. *) val transitions: lr1state -> lr1state SymbolMap.t val outgoing_symbols: node -> Symbol.t list val transition: Symbol.t -> lr1state -> lr1state val reductions: lr1state -> (TerminalSet.t * Production.index) list (* Equality of states. The two states must have the same core. Then, they are equal if and only if their lookahead sets are pointwise equal. *) val equal: lr1state -> lr1state -> bool (* Subsumption between states. The two states must have the same core. Then, one subsumes the other if and only if their lookahead sets are (pointwise) in the subset relation. *) val subsume: lr1state -> lr1state -> bool (* A slightly modified version of Pager's weak compatibility criterion. The two states must have the same core. *) val compatible: lr1state -> lr1state -> bool (* This function determines whether two (core-equivalent) states can be merged without creating an end-of-stream conflict. *) val eos_compatible: lr1state -> lr1state -> bool (* This function determines whether two (core-equivalent) states can be merged without creating spurious reductions on the [error] token. *) val error_compatible: lr1state -> lr1state -> bool (* Union of two states. The two states must have the same core. The new state is obtained by pointwise union of the lookahead sets. *) val union: lr1state -> lr1state -> lr1state (* Restriction of a state to a set of tokens of interest. Every lookahead set is intersected with that set. *) val restrict: TerminalSet.t -> lr1state -> lr1state (* The following functions display: 1- a concrete state; 2- a state (only the kernel, not the closure); 3- the closure of a state. The first parameter is a fixed string that is added at the beginning of every line. *) val print_concrete: string -> concretelr1state -> string val print: string -> lr1state -> string val print_closure: string -> lr1state -> string menhir-20151112/src/rawPrinter.ml0000644000175000017500000001147012621170073015450 0ustar mehdimehdi(* A debugging pretty-printer for [IL]. Newlines are used liberally, so as to facilitate diffs. *) open IL open Printf module Make (X : sig (* This is the channel that is being written to. *) val f: out_channel end) = struct (* ------------------------------------------------------------------------- *) (* XML-style trees. *) type tree = | Node of string * tree list let node label ts = Node (label, ts) (* ------------------------------------------------------------------------- *) (* Dealing with newlines and indentation. *) let maxindent = 120 let whitespace = String.make maxindent ' ' let indentation = ref 0 let line = ref 1 (* [rawnl] is, in principle, the only place where writing a newline character to the output channel is permitted. This ensures that the line counter remains correct. But see also [stretch] and [typ0]. *) let rawnl f = incr line; output_char f '\n' let nl f = rawnl f; output f whitespace 0 !indentation let indent ofs producer f x = let old_indentation = !indentation in let new_indentation = old_indentation + ofs in if new_indentation <= maxindent then indentation := new_indentation; nl f; producer f x; indentation := old_indentation (* ------------------------------------------------------------------------- *) (* Tree printers. *) let rec print_tree f = function | Node (label, []) -> output_char f '<'; output_string f label; output_char f '/'; output_char f '>'; nl f | Node (label, ts) -> output_char f '<'; output_string f label; output_char f '>'; indent 2 print_trees f ts; output_char f '<'; output_char f '/'; output_string f label; output_char f '>'; nl f and print_trees f = function | [] -> () | t :: ts -> print_tree f t; print_trees f ts (* ------------------------------------------------------------------------- *) (* Expression-to-tree converter. *) let rec expr e = match e with | EComment (c, e) -> node "comment" [ string c; expr e ] | EPatComment (s, p, e) -> node "patcomment" [ string s; pat p; expr e ] | ELet (pes, e2) -> node "let" ( patexprs pes @ [ expr e2 ]) | ERecordWrite (e1, field, e2) -> node "recordwrite" [ expr e1; string field; expr e2 ] | EMatch (e, brs) -> node "match" ( expr e :: branches brs ) | ETry (e, brs) -> node "try" ( expr e :: branches brs ) | EIfThen (e1, e2) -> node "ifthen" [ expr e1; expr e2 ] | EIfThenElse (e0, e1, e2) -> node "ifthenelse" [ expr e0; expr e1; expr e2 ] | EFun (ps, e) -> node "fun" ( pats ps @ [ expr e ]) | EApp (e, args) -> node "app" ( expr e :: exprs args ) | ERaise e -> node "raise" [ expr e ] | EMagic e -> node "magic" [ expr e ] | ERepr e -> node "repr" [ expr e ] | EData (d, args) -> node "data" ( string d :: exprs args ) | EVar v -> node "var" [ string v ] | ETextual action -> node "text" [ stretch action ] | EUnit -> node "unit" [] | EIntConst k -> node "int" [ int k ] | EStringConst s -> node "string" [ string s ] | ETuple es -> node "tuple" ( exprs es ) | EAnnot (e, s) -> node "annot" [ expr e; scheme s ] | ERecordAccess (e, field) -> node "recordaccess" [ expr e; string field ] | ERecord fs -> node "record" (fields fs) | EArray fs -> node "array" (exprs fs) | EArrayAccess (e1, e2) -> node "arrayaccess" [ expr e1; expr e2 ] and exprs es = List.map expr es and stretch stretch = string stretch.Stretch.stretch_content and branches brs = List.map branch brs and branch br = node "branch" [ pat br.branchpat; expr br.branchbody ] and fields fs = List.map field fs and field (label, e) = node "field" [ string label; expr e ] and pats ps = List.map pat ps and pat = function | PUnit -> node "punit" [] | PWildcard -> node "pwildcard" [] | PVar x -> node "pvar" [ string x ] | PTuple ps -> node "ptuple" (pats ps) | PAnnot (p, t) -> node "pannot" [ pat p; typ t ] | PData (d, args) -> node "pdata" (string d :: pats args) | PRecord fps -> node "precord" (fpats fps) | POr ps -> node "por" (pats ps) and fpats fps = List.map fpat fps and fpat (_, p) = pat p and patexprs pes = List.map patexpr pes and patexpr (p, e) = node "patexpr" [ pat p; expr e ] and string s = node s [] and int k = node (string_of_int k) [] and bool b = node (if b then "true" else "false") [] and scheme s = string "omitted" (* TEMPORARY to be completed, someday *) and typ t = string "omitted" (* TEMPORARY to be completed, someday *) (* ------------------------------------------------------------------------- *) (* Convert to a tree, then print the tree. *) let expr e = print_tree X.f (expr e) end menhir-20151112/src/TableFormat.ml0000644000175000017500000001407412621170074015517 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* This signature defines the format of the parse tables. It is used as an argument to [TableInterpreter.Make]. *) module type TABLES = sig (* This is the parser's type of tokens. *) type token (* This maps a token to its internal (generation-time) integer code. *) val token2terminal: token -> int (* This is the integer code for the error pseudo-token. *) val error_terminal: int (* This maps a token to its semantic value. *) val token2value: token -> Obj.t (* Traditionally, an LR automaton is described by two tables, namely, an action table and a goto table. See, for instance, the Dragon book. The action table is a two-dimensional matrix that maps a state and a lookahead token to an action. An action is one of: shift to a certain state, reduce a certain production, accept, or fail. The goto table is a two-dimensional matrix that maps a state and a non-terminal symbol to either a state or undefined. By construction, this table is sparse: its undefined entries are never looked up. A compression technique is free to overlap them with other entries. In Menhir, things are slightly different. If a state has a default reduction on token [#], then that reduction must be performed without consulting the lookahead token. As a result, we must first determine whether that is the case, before we can obtain a lookahead token and use it as an index in the action table. Thus, Menhir's tables are as follows. A one-dimensional default reduction table maps a state to either ``no default reduction'' (encoded as: 0) or ``by default, reduce prod'' (encoded as: 1 + prod). The action table is looked up only when there is no default reduction. *) val default_reduction: PackedIntArray.t (* Menhir follows Dencker, Dürre and Heuft, who point out that, although the action table is not sparse by nature (i.e., the error entries are significant), it can be made sparse by first factoring out a binary error matrix, then replacing the error entries in the action table with undefined entries. Thus: A two-dimensional error bitmap maps a state and a terminal to either ``fail'' (encoded as: 0) or ``do not fail'' (encoded as: 1). The action table, which is now sparse, is looked up only in the latter case. *) (* The error bitmap is flattened into a one-dimensional table; its width is recorded so as to allow indexing. The table is then compressed via [PackedIntArray]. The bit width of the resulting packed array must be [1], so it is not explicitly recorded. *) (* The error bitmap does not contain a column for the [#] pseudo-terminal. Thus, its width is [Terminal.n - 1]. We exploit the fact that the integer code assigned to [#] is greatest: the fact that the right-most column in the bitmap is missing does not affect the code for accessing it. *) val error: int (* width of the bitmap *) * string (* second component of [PackedIntArray.t] *) (* A two-dimensional action table maps a state and a terminal to one of ``shift to state s and discard the current token'' (encoded as: s | 10), ``shift to state s without discarding the current token'' (encoded as: s | 11), or ``reduce prod'' (encoded as: prod | 01). *) (* The action table is first compressed via [RowDisplacement], then packed via [PackedIntArray]. *) (* Like the error bitmap, the action table does not contain a column for the [#] pseudo-terminal. *) val action: PackedIntArray.t * PackedIntArray.t (* A one-dimensional lhs table maps a production to its left-hand side (a non-terminal symbol). *) val lhs: PackedIntArray.t (* A two-dimensional goto table maps a state and a non-terminal symbol to either undefined (encoded as: 0) or a new state s (encoded as: 1 + s). *) (* The goto table is first compressed via [RowDisplacement], then packed via [PackedIntArray]. *) val goto: PackedIntArray.t * PackedIntArray.t (* The number of start productions. A production [prod] is a start production if and only if [prod < start] holds. This is also the number of start symbols. A nonterminal symbol [nt] is a start symbol if and only if [nt < start] holds. *) val start: int (* A one-dimensional semantic action table maps productions to semantic actions. The calling convention for semantic actions is described in [EngineTypes]. This table contains ONLY NON-START PRODUCTIONS, so the indexing is off by [start]. Be careful. *) val semantic_action: ((int, Obj.t, token) EngineTypes.env -> (int, Obj.t) EngineTypes.stack) array (* The parser defines its own [Error] exception. This exception can be raised by semantic actions and caught by the engine, and raised by the engine towards the final user. *) exception Error (* The parser indicates whether to generate a trace. Generating a trace requires two extra tables, which respectively map a terminal symbol and a production to a string. *) val trace: (string array * string array) option end menhir-20151112/src/Convert.mli0000644000175000017500000000662512621170074015113 0ustar mehdimehdi(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, INRIA Paris-Rocquencourt *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright 2005-2015 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with the *) (* special exception on linking described in file LICENSE. *) (* *) (**************************************************************************) (* An ocamlyacc-style, or Menhir-style, parser requires access to the lexer, which must be parameterized with a lexing buffer, and to the lexing buffer itself, where it reads position information. *) (* This traditional API is convenient when used with ocamllex, but inelegant when used with other lexer generators. *) type ('token, 'semantic_value) traditional = (Lexing.lexbuf -> 'token) -> Lexing.lexbuf -> 'semantic_value (* This revised API is independent of any lexer generator. Here, the parser only requires access to the lexer, and the lexer takes no parameters. The tokens returned by the lexer may contain position information. *) type ('token, 'semantic_value) revised = (unit -> 'token) -> 'semantic_value (* --------------------------------------------------------------------------- *) (* Converting a traditional parser, produced by ocamlyacc or Menhir, into a revised parser. *) (* A token of the revised lexer is essentially a triple of a token of the traditional lexer (or raw token), a start position, and and end position. The three [get] functions are accessors. *) (* We do not require the type ['token] to actually be a triple type. This enables complex applications where it is a record type with more than three fields. It also enables simple applications where positions are of no interest, so ['token] is just ['raw_token] and [get_startp] and [get_endp] return dummy positions. *) val traditional2revised: ('token -> 'raw_token) -> ('token -> Lexing.position) -> ('token -> Lexing.position) -> ('raw_token, 'semantic_value) traditional -> ('token, 'semantic_value) revised (* --------------------------------------------------------------------------- *) (* Converting a revised parser back to a traditional parser. *) val revised2traditional: ('raw_token -> Lexing.position -> Lexing.position -> 'token) -> ('token, 'semantic_value) revised -> ('raw_token, 'semantic_value) traditional (* --------------------------------------------------------------------------- *) (* Simplified versions of the above, where concrete triples are used. *) module Simplified : sig val traditional2revised: ('token, 'semantic_value) traditional -> ('token * Lexing.position * Lexing.position, 'semantic_value) revised val revised2traditional: ('token * Lexing.position * Lexing.position, 'semantic_value) revised -> ('token, 'semantic_value) traditional end menhir-20151112/src/META0000644000175000017500000000024212621170074013426 0ustar mehdimehdirequires = "" description = "Runtime support for code generated by Menhir" archive(byte) = "menhirLib.cmo" archive(native) = "menhirLib.cmx" version = "20151112" menhir-20151112/src/time.ml0000644000175000017500000000134612621170073014252 0ustar mehdimehdilet channel = stderr open Unix open Printf let clock = ref (times()) let tick msg = if Settings.timings then let times1 = !clock in let times2 = times() in fprintf channel "%s: %.02fs\n%!" msg (times2.tms_utime -. times1.tms_utime); clock := times() type chrono = float ref let fresh () = ref 0. let chrono (chrono : float ref) (task : unit -> 'a) : 'a = if Settings.timings then begin let times1 = times() in let result = task() in let times2 = times() in chrono := !chrono +. times2.tms_utime -. times1.tms_utime; result end else task() let display (chrono : float ref) msg = if Settings.timings then fprintf channel "%s: %.02fs\n" msg !chrono menhir-20151112/src/cst.ml0000644000175000017500000000461112621170073014103 0ustar mehdimehdiopen Grammar (* Concrete syntax trees. *) (* A concrete syntax tree is one of a leaf -- which corresponds to a terminal symbol; a node -- which corresponds to a non-terminal symbol, and whose immediate descendants form an expansion of that symbol; or an error leaf -- which corresponds to a point where the [error] pseudo-token was shifted. *) type cst = | CstTerminal of Terminal.t | CstNonTerminal of Production.index * cst array | CstError (* This is a (mostly) unambiguous printer for concrete syntax trees, in an sexp-like notation. *) let rec pcst b = function | CstTerminal tok -> (* A leaf is denoted by a terminal symbol. *) Printf.bprintf b "%s" (Terminal.print tok) | CstNonTerminal (prod, csts) -> (* A node is denoted by a bracketed, whitespace-separated list, whose head is a non-terminal symbol (followed with a colon) and whose tail consists of the node's descendants. *) (* There is in fact some ambiguity in this notation, since we only print the non-terminal symbol that forms the left-hand side of production [prod], instead of the production itself. This abuse makes things much more readable, and should be acceptable for the moment. The cases where ambiguity actually arises should be rare. *) Printf.bprintf b "[%s:%a]" (Nonterminal.print false (Production.nt prod)) pcsts csts | CstError -> (* An error leaf is denoted by [error]. *) Printf.bprintf b "error" and pcsts b (csts : cst array) = Array.iter (fun cst -> Printf.bprintf b " %a" pcst cst ) csts (* This is the public interface. *) let wrap print f x = let b = Buffer.create 32768 in print b x; Buffer.output_buffer f b let print = wrap pcst (* This is a pretty-printer for concrete syntax trees. The notation is the same as that used by the above printer; the only difference is that the [Pprint] library is used to manage indentation. *) open Pprint let rec build : cst -> document = function | CstTerminal tok -> text (Terminal.print tok) | CstNonTerminal (prod, csts) -> brackets ( group ( text (Nonterminal.print false (Production.nt prod)) ^^ colon ^^ group ( nest 2 ( Array.fold_left (fun doc cst -> doc ^^ break1 ^^ build cst ) empty csts ) ) ^^ break0 ) ) | CstError -> text "error" let show f cst = Channel.pretty 0.8 80 f (build cst) menhir-20151112/src/yacc-parser.mly0000644000175000017500000002160212621170073015713 0ustar mehdimehdi/* This is the crude version of the parser. It is meant to be processed by ocamlyacc. Its existence is necessary for bootstrapping. It is kept in sync with [fancy-parser], with a few differences: 1. [fancy-parser] exploits many features of Menhir; 2. [fancy-parser] performs slightly more refined error handling; 3. [fancy-parser] supports anonymous rules. */ %{ open ConcreteSyntax open Syntax open Positions %} %token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL %token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER ON_ERROR_REDUCE %token LID UID %token HEADER %token OCAMLTYPE %token PERCENTPERCENT %token Syntax.action> ACTION %start grammar %type grammar /* These declarations solve a shift-reduce conflict in favor of shifting: when the declaration of a non-terminal symbol begins with a leading bar, it is understood as an (insignificant) leading optional bar, *not* as an empty right-hand side followed by a bar. This ambiguity arises due to the existence of a new notation for letting several productions share a single semantic action. */ %nonassoc no_optional_bar %nonassoc BAR %% /* ------------------------------------------------------------------------- */ /* A grammar consists of declarations and rules, followed by an optional trailer, which we do not parse. */ grammar: declarations PERCENTPERCENT rules trailer { { pg_filename = ""; (* filled in by the caller *) pg_declarations = List.rev $1; pg_rules = $3; pg_trailer = $4 } } trailer: EOF { None } | PERCENTPERCENT /* followed by actual trailer */ { Some (Lazy.force $1) } /* ------------------------------------------------------------------------- */ /* A declaration is an %{ Objective Caml header %}, or a %token, %start, %type, %left, %right, or %nonassoc declaration. */ declarations: /* epsilon */ { [] } | declarations declaration { $2 @ $1 } declaration: | HEADER /* lexically delimited by %{ ... %} */ { [ unknown_pos (DCode $1) ] } | TOKEN optional_ocamltype terminals { List.map (Positions.map (fun terminal -> DToken ($2, terminal))) $3 } | START nonterminals { List.map (Positions.map (fun nonterminal -> DStart nonterminal)) $2 } | TYPE OCAMLTYPE actuals { List.map (Positions.map (fun nt -> DType ($2, nt))) (List.map Parameters.with_pos $3) } | START OCAMLTYPE nonterminals /* %start foo is syntactic sugar for %start foo %type foo */ { Misc.mapd (fun ntloc -> Positions.mapd (fun nt -> DStart nt, DType ($2, ParameterVar ntloc)) ntloc) $3 } | priority_keyword symbols { let prec = ParserAux.new_precedence_level (rhs_start_pos 1) (rhs_end_pos 1) in List.map (Positions.map (fun symbol -> DTokenProperties (symbol, $1, prec))) $2 } | PARAMETER OCAMLTYPE { [ unknown_pos (DParameter $2) ] } | ON_ERROR_REDUCE actuals { List.map (Positions.map (fun nt -> DOnErrorReduce nt)) (List.map Parameters.with_pos $2) } optional_ocamltype: /* epsilon */ { None } | OCAMLTYPE /* lexically delimited by angle brackets */ { Some $1 } priority_keyword: LEFT { LeftAssoc } | RIGHT { RightAssoc } | NONASSOC { NonAssoc } /* ------------------------------------------------------------------------- */ /* A symbol is a terminal or nonterminal symbol. One would like to require nonterminal symbols to begin with a lowercase letter, so as to lexically distinguish them from terminal symbols, which must begin with an uppercase letter. However, for compatibility with ocamlyacc, this is impossible. It can be required only for nonterminal symbols that are also start symbols. */ symbols: /* epsilon */ { [] } | symbols optional_comma symbol { $3 :: $1 } symbol: LID { $1 } | UID { $1 } optional_comma: /* epsilon */ { () } | COMMA { () } /* ------------------------------------------------------------------------- */ /* Terminals must begin with an uppercase letter. Nonterminals that are declared to be start symbols must begin with a lowercase letter. */ terminals: /* epsilon */ { [] } | terminals optional_comma UID { $3 :: $1 } nonterminals: /* epsilon */ { [] } | nonterminals LID { $2 :: $1 } /* ------------------------------------------------------------------------- */ /* A rule defines a symbol. It is optionally declared %public, and optionally carries a number of formal parameters. The right-hand side of the definition consists of a list of production groups. */ rules: /* epsilon */ { [] } | rules rule { $2 :: $1 } rule: flags symbol optional_formal_parameters COLON optional_bar production_group production_groups { let public, inline = $1 in { pr_public_flag = public; pr_inline_flag = inline; pr_nt = Positions.value $2; pr_positions = [ Positions.position $2 ]; pr_parameters = $3; pr_branches = List.flatten ($6 :: List.rev $7) } } flags: /* epsilon */ { false, false } | PUBLIC { true, false } | INLINE { false, true } | PUBLIC INLINE { true, true } | INLINE PUBLIC { true, true } /* ------------------------------------------------------------------------- */ /* Parameters are surroundered with parentheses and delimited by commas. The syntax of actual parameters allows applications, whereas the syntax of formal parameters does not. It also allows use of the "?", "+", and "*" shortcuts. */ optional_formal_parameters: /* epsilon */ { [] } | LPAREN formal_parameters RPAREN { $2 } formal_parameters: symbol { [ Positions.value $1 ] } | symbol COMMA formal_parameters { Positions.value $1 :: $3 } optional_actuals: /* epsilon */ { [] } | LPAREN actuals_comma RPAREN { $2 } actuals_comma: actual { [ $1 ] } | actual COMMA actuals_comma { $1 :: $3 } actual: symbol optional_actuals { Parameters.app $1 $2 } | actual modifier { ParameterApp ($2, [ $1 ]) } actuals: /* epsilon */ { [] } | actuals optional_comma actual { $3::$1 } optional_bar: /* epsilon */ %prec no_optional_bar { () } | BAR { () } /* ------------------------------------------------------------------------- */ /* The "?", "+", and "*" modifiers are short-hands for applications of certain parameterized nonterminals, defined in the standard library. */ modifier: QUESTION { unknown_pos "option" } | PLUS { unknown_pos "nonempty_list" } | STAR { unknown_pos "list" } /* ------------------------------------------------------------------------- */ /* A production group consists of a list of productions, followed by a semantic action and an optional precedence specification. */ production_groups: /* epsilon */ { [] } | production_groups BAR production_group { $3 :: $1 } production_group: productions ACTION /* action is lexically delimited by braces */ optional_precedence { let productions, action, oprec2 = $1, $2, $3 in (* If multiple productions share a single semantic action, check that all of them bind the same names. *) ParserAux.check_production_group productions; (* Then, *) List.map (fun (producers, oprec1, level, pos) -> (* Replace [$i] with [_i]. *) let pr_producers = ParserAux.normalize_producers producers in (* Distribute the semantic action. Also, check that every [$i] is within bounds. *) let pr_action = action (ParserAux.producer_names producers) in { pr_producers; pr_action; pr_branch_prec_annotation = ParserAux.override pos oprec1 oprec2; pr_branch_production_level = level; pr_branch_position = pos }) productions } optional_precedence: /* epsilon */ { None } | PREC symbol { Some $2 } /* ------------------------------------------------------------------------- */ /* A production is a list of producers, optionally followed by a precedence declaration. Lists of productions are nonempty and separated with bars. */ productions: production { [ $1 ] } | production bar_productions { $1 :: $2 } bar_productions: BAR production { [ $2 ] } | BAR production bar_productions { $2 :: $3 } production: producers optional_precedence { List.rev $1, $2, ParserAux.new_production_level(), Positions.lex_join (symbol_start_pos()) (symbol_end_pos()) } producers: /* epsilon */ { [] } | producers producer { $2 :: $1 } /* ------------------------------------------------------------------------- */ /* A producer is an actual parameter, possibly preceded by a binding. */ producer: | actual { Positions.lex_join (symbol_start_pos()) (symbol_end_pos()), None, $1 } | LID EQUAL actual { Positions.lex_join (symbol_start_pos()) (symbol_end_pos()), Some $1, $3 } %% menhir-20151112/menhir.10000644000175000017500000000612012621170074013533 0ustar mehdimehdi.\" Hey, EMACS: -*- nroff -*- .TH MENHIR 1 "April 19, 2006" .\" Please adjust this date whenever revising the manpage. .\" .\" Some roff macros, for reference: .\" .nh disable hyphenation .\" .hy enable hyphenation .\" .ad l left justify .\" .ad b justify to both left and right margins .\" .nf disable filling .\" .fi enable filling .\" .br insert line break .\" .sp insert n+1 empty lines .\" for manpage-specific macros, see man(7) .SH NAME menhir \- parser generator for OCaml .SH SYNOPSIS .B menhir .RI [ options ] " files" .SH DESCRIPTION .B menhir is a LR(1) parser generator for the Objective Caml programming language. That is, Menhir compiles LR(1) grammar specifications down to Objective Caml code. It is mostly compatible with .BR ocamlyacc (1). .SH OPTIONS .TP .B \-h, \-\-help Show summary of options. .TP .BI \-b,\ \-\-base\ basename Specifies a base name for the output file(s). .TP .B \-\-comment Include comments in the generated code. .TP .B \-\-depend Invoke ocamldep and display dependencies. .TP .B \-\-dump Describe the automaton in .IR basename .automaton. .TP .B \-\-error\-recovery Attempt recovery by discarding tokens after errors. .TP .B \-\-explain Explain conflicts in .IR basename .conflicts. .TP .BI \-\-external\-tokens\ module Import token type definition from .IR module . .TP .B \-\-graph Write grammar's dependency graph to .IR basename .dot. .TP .B \-\-infer Invoke ocamlc for ahead of time type inference. .TP .B \-\-interpret Interpret the sentences provided on stdin. .TP .B \-\-interpret\-show\-cst Show a concrete syntax tree upon acceptance. .TP .BI \-la,\ \-\-log\-automaton\ level Log information about the automaton. .TP .BI \-lc,\ \-\-log\-code\ level Log information about the generated code. .TP .BI \-lg,\ \-\-log\-grammar\ level Log information about the grammar. .TP .B \-\-no\-inline Ignore the %inline keyword. .TP .B \-\-no\-stdlib Do not load the standard library. .TP .BI \-\-ocamlc\ command Specifies how ocamlc should be invoked. .TP .BI \-\-ocamldep\ command Specifies how ocamldep should be invoked. .TP .B \-\-only\-preprocess Print a simplified grammar and exit. .TP .B \-\-only\-tokens Generate token type definition only, no code. .TP .B \-\-raw\-depend Invoke ocamldep and echo its raw output. .TP .BI \-\-stdlib\ directory Specify where the standard library lies. .TP .B \-\-suggest\-comp\-flags Suggest compilation flags for ocaml{c,opt}. .TP .B \-\-suggest\-link\-flags-byte Suggest link flags for ocamlc. .TP .B \-\-suggest\-link\-flags-opt Suggest link flags for ocamlopt. .TP .B \-t, \-\-table Use the table-based back-end. .TP .B \-\-timings Display internal timings. .TP .B \-\-trace Include tracing instructions in the generated code. .TP .B \-\-version Show version number and exit. .TP .B \-v Synonymous with .BR \-\-dump\ \-\-explain . .SH SEE ALSO .BR ocaml (1). .SH AUTHOR .B menhir was written by François Pottier and Yann Régis-Gianas. .PP This manual page was written by Samuel Mimram , for the Debian project (but may be used by others). menhir-20151112/demos/0000755000175000017500000000000012646770154013313 5ustar mehdimehdimenhir-20151112/demos/calc-two/0000755000175000017500000000000012646770154015024 5ustar mehdimehdimenhir-20151112/demos/calc-two/algebraic.mlypack0000644000175000017500000000003012621170073020273 0ustar mehdimehdiTokens Algebraic Common menhir-20151112/demos/calc-two/reverse.mly0000644000175000017500000000072412621170073017210 0ustar mehdimehdi(* This partial grammar specification defines the syntax of expressions in reverse Polish notation. Parentheses are meaningless, and unary minus is not supported (some other symbol than MINUS would be required in order to avoid an ambiguity). *) %% %public expr: | i = INT { i } | e1 = expr e2 = expr PLUS { e1 + e2 } | e1 = expr e2 = expr MINUS { e1 - e2 } | e1 = expr e2 = expr TIMES { e1 * e2 } | e1 = expr e2 = expr DIV { e1 / e2 } menhir-20151112/demos/calc-two/common.mly0000644000175000017500000000027712621170073017030 0ustar mehdimehdi(* This partial grammar specification defines the grammar's entry point to be an expression, followed with an end-of-line token. *) %start main %% main: | e = expr EOL { e } menhir-20151112/demos/calc-two/README0000644000175000017500000000116712621170073015674 0ustar mehdimehdiThis tiny program reads arithmetic expressions from the standard input channel. Each expression is expected to be complete when the current line ends. Its value is then displayed on the standard output channel. In this version, there is a single lexer, but there are two parsers, one for expressions in algebraic (that is, infix) notation, one for expressions in reverse Polish (that is, postfix) notation. One of the two parsers is selected at runtime via a command line switch. This demo illustrates how to build two parsers that share a single set of tokens (see tokens.mly) and that share some productions (see common.mly). menhir-20151112/demos/calc-two/_tags0000644000175000017500000000027612621170073016034 0ustar mehdimehdi: only_tokens : external_tokens(Tokens) : external_tokens(Tokens) : unused_token(LPAREN) : unused_token(RPAREN) menhir-20151112/demos/calc-two/Makefile0000644000175000017500000000103412621170073016445 0ustar mehdimehdi.PHONY: all clean test # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif MENHIRFLAGS := --infer OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" MAIN := calc all: $(OCAMLBUILD) $(MAIN).native clean: rm -f *~ .*~ $(OCAMLBUILD) -clean test: all @echo "The following command should print 42:" echo "(1 + 2 * 10) * 2" | ./$(MAIN).native --algebraic @echo "The following command should print 42:" echo " 1 2 10 * + 2 *" | ./$(MAIN).native --reverse menhir-20151112/demos/calc-two/lexer.mll0000644000175000017500000000200612621170073016632 0ustar mehdimehdi{ open Tokens exception Error of string } (* This rule looks for a single line, terminated with '\n' or eof. It returns a pair of an optional string (the line that was found) and a Boolean flag (false if eof was reached). *) rule line = parse | ([^'\n']* '\n') as line (* Normal case: one line, no eof. *) { Some line, true } | eof (* Normal case: no data, eof. *) { None, false } | ([^'\n']+ as line) eof (* Special case: some data but missing '\n', then eof. Consider this as the last line, and add the missing '\n'. *) { Some (line ^ "\n"), false } (* This rule analyzes a single line and turns it into a stream of tokens. *) and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20151112/demos/calc-two/algebraic.mly0000644000175000017500000000103112621170073017436 0ustar mehdimehdi(* This partial grammar specification defines the syntax of expressions in algebraic notation. *) %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %% %public expr: | i = INT { i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20151112/demos/calc-two/reverse.mlypack0000644000175000017500000000002612621170073020042 0ustar mehdimehdiTokens Reverse Common menhir-20151112/demos/calc-two/myocamlbuild.ml0000644000175000017500000000143012621170073020020 0ustar mehdimehdiopen Ocamlbuild_plugin open Command (* Define ocamlbuild flags [only_tokens] and [external_tokens(Foo)] and [unused_token(Bar)] which correspond to menhir's [--only-tokens] and [--external-tokens Foo] and [--unused-token Bar]. When they are used, these flags should be passed both to [menhir] and to [menhir --raw-depend]. *) let menhir_flags() = List.iter (fun mode -> flag [ mode; "only_tokens" ] (S[A "--only-tokens"]); pflag [ mode ] "external_tokens" (fun name -> S[A "--external-tokens"; A name] ); pflag [ mode ] "unused_token" (fun name -> S[A "--unused-token"; A name] ) ) [ "menhir"; "menhir_ocamldep" ] let () = dispatch (fun event -> match event with | After_rules -> menhir_flags() | _ -> () ) menhir-20151112/demos/calc-two/tokens.mlypack0000644000175000017500000000000712621170073017671 0ustar mehdimehdiTokens menhir-20151112/demos/calc-two/calc.ml0000644000175000017500000000212612621170073016244 0ustar mehdimehdilet algebraic = ref true let () = Arg.parse [ "--algebraic", Arg.Set algebraic, " Use algebraic (that is, infix) notation"; "--reverse", Arg.Clear algebraic, " Use reverse Polish (that is, postfix) notation"; ] (fun _ -> ()) (Printf.sprintf "Usage: %s " Sys.argv.(0)) let main = if !algebraic then Algebraic.main else Reverse.main let process (line : string) = let linebuf = Lexing.from_string line in try (* Run the parser on this line of input. *) Printf.printf "%d\n%!" (main Lexer.token linebuf) with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg | Algebraic.Error | Reverse.Error -> Printf.fprintf stderr "At offset %d: syntax error.\n%!" (Lexing.lexeme_start linebuf) let process (optional_line : string option) = match optional_line with | None -> () | Some line -> process line let rec repeat channel = (* Attempt to read one line. *) let optional_line, continue = Lexer.line channel in process optional_line; if continue then repeat channel let () = repeat (Lexing.from_channel stdin) menhir-20151112/demos/calc-two/tokens.mly0000644000175000017500000000022712621170073017036 0ustar mehdimehdi(* This partial grammar specification defines the set of tokens. *) %token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %% menhir-20151112/demos/obsolete/0000755000175000017500000000000012621170073015112 5ustar mehdimehdimenhir-20151112/demos/obsolete/Makefile.auto0000644000175000017500000000055112621170073017522 0ustar mehdimehdi# This auxiliary Makefile is meant to be included by a client Makefile in # addition to Makefile.shared. It is optional. It implements the common case # where every .mly file in the current directory is to be viewed as a # mono-module grammar specification. $(foreach module,$(wildcard *.mly),$(eval $(call menhir_monomodule,$(patsubst %.mly,%,$(module)),))) menhir-20151112/demos/obsolete/Makefile.calc-two0000644000175000017500000000146112621170073020264 0ustar mehdimehdi# This is the old version of calc-two/Makefile, based on Makefile.shared. # It has been superseded by the calc-two/Makefile, based on ocamlbuild. # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif # Add --table on the next line to use Menhir's table-based back-end. PGFLAGS := --infer GENERATED := tokens.ml tokens.mli algebraic.ml algebraic.mli reverse.ml reverse.mli lexer.ml MODULES := algebraic reverse lexer calc EXECUTABLE := calc OCAMLDEPWRAPPER := ../ocamldep.wrapper include ../Makefile.shared $(eval $(call menhir_monomodule,tokens,--only-tokens)) $(eval $(call menhir_multimodule,algebraic,tokens.mly algebraic.mly common.mly,--external-tokens Tokens)) $(eval $(call menhir_multimodule,reverse,tokens.mly reverse.mly common.mly,--external-tokens Tokens)) menhir-20151112/demos/obsolete/ocamldep.wrapper0000755000175000017500000000574312621170073020314 0ustar mehdimehdi#!/usr/bin/env ocaml (* ocamldep.wrapper ... - runs the in an environment where all of the listed appear to exist. The files are created, if required, before the command is run, and destroyed afterwards. *) (* An earlier version of this script acquired a lock, so as to prevent multiple instances of this script from interfering with one another. However, this did not prevent interference between this script and some other process (e.g., the ocaml compiler) which creates files. So, the lock has been removed. My suggestion is to never use this script in a concurrent setting. If you wish to use parallel make, then you might be able to use a two-level Makefile approach: first, compute all dependencies in a sequential manner; then, construct all targets in a parallel manner. *) #load "unix.cma" open Printf (* Parse the command line. The arguments that precede "-" are understood as file names and stored in the list [xs]. The arguments that follow "-" are understood as a command and stored in [command]. *) let xs = ref [] let command = ref "" let verbose = ref false let rec loop accumulating i = if i = Array.length Sys.argv then () else if accumulating then (* [accumulating] is [true] as long as we have not found the "-" marker *) match Sys.argv.(i) with | "-v" -> verbose := true; loop true (i+1) | "-" -> (* We have found the marker. The next parameter should be the name of the raw [ocamldep] command. Copy it to the command (unquoted -- apparently some shells do not permit quoting a command name). *) let i = i + 1 in assert (i < Array.length Sys.argv); command := Sys.argv.(i); (* Stop accumulating file names. Copy the remaining arguments into the command. *) loop false (i+1) | _ -> (* Continue accumulating file names in [xs]. *) xs := Sys.argv.(i) :: !xs; loop true (i+1) else begin (* After we have found the "-" marker, the remaining arguments are copied (quoted) into the command. *) command := sprintf "%s %s" !command (Filename.quote Sys.argv.(i)); loop false (i+1) end let () = loop true 1 (* Create the required files if they don't exist, run the command, then destroy any files that we have created. *) let rec loop = function | [] -> if !verbose then fprintf stderr "ocamldep.wrapper: running %s\n" !command; Sys.command !command | x :: xs -> if Sys.file_exists x then loop xs else begin if !verbose then fprintf stderr "ocamldep.wrapper: creating fake %s\n" x; let c = open_out x in close_out c; let exitcode = loop xs in if Sys.file_exists x then begin try if !verbose then fprintf stderr "ocamldep.wrapper: removing fake %s..." x; Sys.remove x; if !verbose then fprintf stderr " ok\n" with Sys_error _ -> if !verbose then fprintf stderr " failed\n" end; exitcode end let () = exit (loop !xs) menhir-20151112/demos/obsolete/Makefile.shared0000644000175000017500000002125312621170073020022 0ustar mehdimehdi# This Makefile is shared between all demos. It is our suggestion # of a canonical Makefile for projects that use Objective Caml, # ocamllex, and menhir. It requires a recent version of GNU make # (older versions do not correctly implement $(eval)). # ---------------------------------------------------------------- # This Makefile is meant to be included within a host Makefile # that defines the following variables: # # GENERATED : a list of the source (.ml and .mli) files # that should be generated by invoking ocamllex # or menhir # # MODULES : a list of the modules (without extension) # that should be linked into the executable # program. Order is significant. # # EXECUTABLE : the base name of the executables that should # be produced. Suffixes $(BSUFFIX) and $(OSUFFIX) # will be added to distinguish the bytecode and # native code versions. # ---------------------------------------------------------------- # The host Makefile can also override the following settings: # Menhir. ifndef MENHIR MENHIR := menhir endif # Parser generation flags. ifndef PGFLAGS PGFLAGS := --infer -v endif # Include directives for compilation and for linking. ifndef INCLUDE INCLUDE := endif # Bytecode compilation flags. ifndef BFLAGS BFLAGS := endif # Native code compilation flags. ifndef OFLAGS OFLAGS := endif # Menhir-suggested compilation flags. ifndef SUGG_FLAGS SUGG_FLAGS := $(shell $(MENHIR) $(PGFLAGS) --suggest-comp-flags 2>/dev/null) endif # Bytecode link-time flags. ifndef BLNKFLAGS BLNKFLAGS := endif # Menhir-suggested bytecode link-time flags. ifndef SUGG_BLNKFLAGS SUGG_BLNKFLAGS := $(shell $(MENHIR) $(PGFLAGS) --suggest-link-flags-byte 2>/dev/null) endif # Native code link-time flags. ifndef OLNKFLAGS OLNKFLAGS := endif # Menhir-suggested native code link-time flags. ifndef SUGG_OLNKFLAGS SUGG_OLNKFLAGS := $(shell $(MENHIR) $(PGFLAGS) --suggest-link-flags-opt 2>/dev/null) endif # Suffix appended to the name of the bytecode executable. ifndef BSUFFIX BSUFFIX := .byte endif # Suffix appended to the name of the native code executable. ifndef OSUFFIX OSUFFIX := endif # Access paths. ifndef OCAML OCAML := ocaml endif ifndef OCAMLC OCAMLC := $(shell if ocamlfind ocamlc -v >/dev/null 2>&1 ; \ then echo ocamlfind ocamlc ; \ elif ocamlc.opt -v >/dev/null 2>&1 ; \ then echo ocamlc.opt ; \ else echo ocamlc ; fi) endif ifndef OCAMLOPT OCAMLOPT := $(shell if ocamlfind ocamlopt -v >/dev/null 2>&1 ; \ then echo ocamlfind ocamlopt ; \ elif ocamlopt.opt -v >/dev/null 2>&1 ; \ then echo ocamlopt.opt ; \ else echo ocamlopt ; fi) endif ifndef OCAMLDEP OCAMLDEP := $(shell if ocamlfind ocamldep -version >/dev/null 2>&1 ; \ then echo ocamlfind ocamldep ; \ elif ocamldep.opt -version >/dev/null 2>&1 ; \ then echo ocamldep.opt ; \ else echo ocamldep ; fi) endif ifndef OCAMLDEPWRAPPER OCAMLDEPWRAPPER:= ./ocamldep.wrapper endif ifndef OCAMLLEX OCAMLLEX := ocamllex endif # A list of targets that do not require dependency analysis. # This variable should be set by the host before including # this Makefile. COLD += clean # ---------------------------------------------------------------- # Do not destroy the generated source files. .SECONDARY: $(GENERATED) # ---------------------------------------------------------------- # Linking. $(EXECUTABLE)$(OSUFFIX): $(MODULES:=.cmx) $(OCAMLOPT) -o $@ $(INCLUDE) $(OLNKFLAGS) $(SUGG_FLAGS) $(SUGG_OLNKFLAGS) $^ $(EXECUTABLE)$(BSUFFIX): $(MODULES:=.cmo) $(OCAMLC) -o $@ $(INCLUDE) $(BLNKFLAGS) $(SUGG_FLAGS) $(SUGG_BLNKFLAGS) $^ # ---------------------------------------------------------------- # Compiling. # We make the .ml and .mli files generated by ocamllex and menhir # unwritable, so as to prevent them from being edited by mistake. %.cmi: %.mli %.mli.d $(OCAMLC) $(INCLUDE) $(BFLAGS) $(SUGG_FLAGS) -c $< %.cmo: %.ml %.ml.d $(OCAMLC) $(INCLUDE) $(BFLAGS) $(SUGG_FLAGS) -c $< %.cmx %.o: %.ml %.ml.d $(OCAMLOPT) $(INCLUDE) $(OFLAGS) $(SUGG_FLAGS) -c $< %.ml: %.mll @if [ -f $@ ] ; then /bin/chmod +w $@ ; fi $(OCAMLLEX) $< @/bin/chmod -w $@ # ---------------------------------------------------------------- # Computing dependencies. # We associate a tiny Makefile, whose name ends in .d, with every # source file; it contains dependency information only. For an .ml or # .mli file, we create an .ml.d or .mli.d file by invoking ocamldep. # For an .mll file, we create an .ml.d file by invoking ocamllex first # (this is implicit), then ocamldep. # When it finds a reference to module M, ocamldep checks whether the # files m.ml and m.mli exist before deciding which dependency to # report. If m.ml and m.mli are generated from m.mll or m.mly, then # there is a potential problem: because ocamldep is invoked before # these files are created, it cannot see them. The standard solution # until now was to invoke ocamllex and ocamlyacc first to create all # generated files, and run ocamldep next. This approach does not work # with menhir when the --infer switch is on: menhir cannot be invoked # first because it needs type information found in .cmi (or .cmo or # .cmx) files. Our solution is to wrap ocamldep in a script that # creates fake generated files m.ml and m.mli to let ocamldep know that # these files are supposed to exist. This is somewhat tricky, but appears # to work. %.ml.d: %.ml $(OCAML) $(OCAMLDEPWRAPPER) $(GENERATED) - $(OCAMLDEP) $< > $@ %.mli.d: %.mli $(OCAML) $(OCAMLDEPWRAPPER) $(GENERATED) - $(OCAMLDEP) $< > $@ # All .d files are included within the present Makefile, so it they # do not exist, they are created first, and the dependencies that # they contain are then taken into account. # A .SECONDARY directive is used to ensure that the auxiliary # Makefiles are never removed. Otherwise, make could create # one, remove one, create one, remove one, ... (We have observed # this.) ifeq ($(findstring $(MAKECMDGOALS),$(COLD)),) ifneq ($(strip $(wildcard *.mli)),) .SECONDARY: $(patsubst %.mli,%.mli.d,$(wildcard *.mli)) -include $(patsubst %.mli,%.mli.d,$(wildcard *.mli)) endif ifneq ($(strip $(wildcard *.ml)),) .SECONDARY: $(patsubst %.ml,%.ml.d,$(wildcard *.ml)) -include $(patsubst %.ml,%.ml.d,$(wildcard *.ml)) endif ifneq ($(strip $(wildcard *.mll)),) .SECONDARY: $(patsubst %.mll,%.ml.d,$(wildcard *.mll)) -include $(patsubst %.mll,%.ml.d,$(wildcard *.mll)) endif endif # ---------------------------------------------------------------- # Support for menhir projects. # The macro menhir_multimodule defines support for multi-module grammar # specifications, that is, for producing parsers out of multiple # source files. The parameter $(1) is the name of the parser that # should be produced; the parameter $(2) is the list of .mly source # files; the parameter $(3) contains extra options to be passed to # menhir. # The dependency file is named $(1).d and created by invoking menhir # --depend. define menhir_multimodule $(1).ml $(1).mli: $(2) $(1).d @if [ -f $(1).ml ] ; then /bin/chmod +w $(1).ml ; fi @if [ -f $(1).mli ] ; then /bin/chmod +w $(1).mli ; fi $(MENHIR) --ocamlc "$(OCAMLC)" $(PGFLAGS) --base $(1) $(3) $(2) @/bin/chmod -w $(1).ml $(1).mli $(1).d: $(2) @if [ -f $(1).ml ] ; then /bin/chmod +w $(1).ml ; fi @if [ -f $(1).mli ] ; then /bin/chmod +w $(1).mli ; fi $(OCAML) $(OCAMLDEPWRAPPER) $(GENERATED) - \ $(MENHIR) --ocamldep "$(OCAMLDEP)" --depend --base $(1) $(3) $(2) > $$@ ifeq ($$(findstring $$(MAKECMDGOALS),$$(COLD)),) .SECONDARY: $(1).d -include $(1).d endif endef # The macro menhir_monomodule defines support for a mono-module grammar # specification. The parameter $(1) is the name of the parser that # should be produced; the source file is $(1).mly. The parameter $(2) # contains extra options to be passed to menhir. define menhir_monomodule $(eval $(call menhir_multimodule,$(1),$(1).mly,$(2))) endef # Neither of the two macros above is invoked by default, as it is not # known here which is appropriate. It is up to the client to invoke # these macros with suitable parameters. The auxiliary Makefile.auto # implements the common case where every .mly file is a mono-module # grammar. # ---------------------------------------------------------------- .PHONY: clean clean:: /bin/rm -f $(EXECUTABLE)$(BSUFFIX) $(EXECUTABLE)$(OSUFFIX) $(GENERATED) /bin/rm -f *.cmi *.cmx *.cmo *.o *~ .*~ *.automaton *.conflicts *.annot /bin/rm -f *.d menhir-20151112/demos/find-menhir.sh0000755000175000017500000000152212621170073016035 0ustar mehdimehdi#!/bin/bash # This script tries to find the Menhir executable. # This is useful because we would like the demos # to work regardless of whether Menhir has been # installed already. # A normal user does not need this script. One can # assume that Menhir has been installed. # First attempt: find Menhir in the src directory # of the Menhir distribution. # This loop assumes that we are somewhere within # the Menhir distribution, so by going up, we will # end up at the root of the distribution. attempts=2 while [ $attempts -gt 0 ] && ! [ -d src ] ; do let attempts=attempts-1 cd .. done LOCAL=src/_stage1/menhir.native if ls $LOCAL >/dev/null 2>/dev/null ; then echo `pwd`/$LOCAL exit 0 fi # Second attempt: find Menhir in the PATH. if which menhir >/dev/null ; then echo menhir exit 0 fi echo Error: could not find Menhir. exit 1 menhir-20151112/demos/Makefile0000644000175000017500000000035312621170073014737 0ustar mehdimehdiDEMOS := calc calc-two calc-param calc-incremental calc-inspection .PHONY: all clean all: @for i in $(DEMOS) ; do \ $(MAKE) -C $$i ; \ done clean: /bin/rm -f *~ .*~ @for i in $(DEMOS) ; do \ $(MAKE) -C $$i clean ; \ done menhir-20151112/demos/calc-param/0000755000175000017500000000000012646770154015313 5ustar mehdimehdimenhir-20151112/demos/calc-param/parser.mlypack0000644000175000017500000000001612621170073020151 0ustar mehdimehdiTokens Parser menhir-20151112/demos/calc-param/README0000644000175000017500000000054512621170073016162 0ustar mehdimehdiIn this variant of the calc demo, the parser's semantic actions are parameterized over a structure, called [Semantics], which defines how numbers should be interpreted. The parser is later instantiated with floating-point numbers, so the calculator actually performs floating-point evaluation -- but the grammar specification is independent of this detail. menhir-20151112/demos/calc-param/_tags0000644000175000017500000000011012621170073016306 0ustar mehdimehdi: only_tokens : external_tokens(Tokens) menhir-20151112/demos/calc-param/Makefile0000644000175000017500000000065612621170073016745 0ustar mehdimehdi.PHONY: all clean test # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif MENHIRFLAGS := --infer OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" MAIN := calc all: $(OCAMLBUILD) $(MAIN).native clean: rm -f *~ .*~ $(OCAMLBUILD) -clean test: all @echo "The following command should print 42.0:" echo "(1 + 2 * 10) * 2" | ./$(MAIN).native menhir-20151112/demos/calc-param/lexer.mll0000644000175000017500000000200612621170073017121 0ustar mehdimehdi{ open Tokens exception Error of string } (* This rule looks for a single line, terminated with '\n' or eof. It returns a pair of an optional string (the line that was found) and a Boolean flag (false if eof was reached). *) rule line = parse | ([^'\n']* '\n') as line (* Normal case: one line, no eof. *) { Some line, true } | eof (* Normal case: no data, eof. *) { None, false } | ([^'\n']+ as line) eof (* Special case: some data but missing '\n', then eof. Consider this as the last line, and add the missing '\n'. *) { Some (line ^ "\n"), false } (* This rule analyzes a single line and turns it into a stream of tokens. *) and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20151112/demos/calc-param/parser.mly0000644000175000017500000000165212621170073017321 0ustar mehdimehdi(* These are the functions that we need in order to write our semantic actions. *) %parameter number val ( + ): number -> number -> number val ( - ): number -> number -> number val ( * ): number -> number -> number val ( / ): number -> number -> number val ( ~-): number -> number end> (* The parser no longer returns an integer; instead, it returns an abstract number. *) %start main (* Let us open the [Semantics] module, so as to make all of its operations available in the semantic actions. *) %{ open Semantics %} %% main: | e = expr EOL { e } expr: | i = INT { inject i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20151112/demos/calc-param/myocamlbuild.ml0000644000175000017500000000122412621170073020310 0ustar mehdimehdiopen Ocamlbuild_plugin open Command let menhir_flags() = (* Define two ocamlbuild flags [only_tokens] and [external_tokens(Foo)] which correspond to menhir's [--only-tokens] and [--external-tokens Foo]. When they are used, these flags should be passed both to [menhir] and to [menhir --raw-depend]. *) List.iter (fun mode -> flag [ mode; "only_tokens" ] (S[A "--only-tokens"]); pflag [ mode ] "external_tokens" (fun name -> S[A "--external-tokens"; A name] ) ) [ "menhir"; "menhir_ocamldep" ] let () = dispatch (fun event -> match event with | After_rules -> menhir_flags() | _ -> () ) menhir-20151112/demos/calc-param/tokens.mlypack0000644000175000017500000000000712621170073020160 0ustar mehdimehdiTokens menhir-20151112/demos/calc-param/calc.ml0000644000175000017500000000215712621170073016537 0ustar mehdimehdi(* Let's do floating-point evaluation, for a change. *) module FloatSemantics = struct type number = float let inject = float_of_int let ( + ) = ( +. ) let ( - ) = ( -. ) let ( * ) = ( *. ) let ( / ) = ( /. ) let (~- ) = (~-. ) end (* Let us now specialize our parameterized parser. *) module FloatParser = Parser.Make(FloatSemantics) (* The rest is as usual. *) let process (line : string) = let linebuf = Lexing.from_string line in try (* Run the parser on this line of input. *) Printf.printf "%f\n%!" (FloatParser.main Lexer.token linebuf) with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg | FloatParser.Error -> Printf.fprintf stderr "At offset %d: syntax error.\n%!" (Lexing.lexeme_start linebuf) let process (optional_line : string option) = match optional_line with | None -> () | Some line -> process line let rec repeat channel = (* Attempt to read one line. *) let optional_line, continue = Lexer.line channel in process optional_line; if continue then repeat channel let () = repeat (Lexing.from_channel stdin) menhir-20151112/demos/calc-param/tokens.mly0000644000175000017500000000060412621170073017324 0ustar mehdimehdi(* We want the tokens to be independent of the [Semantics] parameter, so we declare them here, in a separate file, as opposed to within [parser.mly]. *) %token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %% menhir-20151112/demos/calc-incremental/0000755000175000017500000000000012646770154016514 5ustar mehdimehdimenhir-20151112/demos/calc-incremental/README0000644000175000017500000000020512621170073017354 0ustar mehdimehdiThis variant of the calc demo uses Menhir with the --table option. It also demonstrates how to use the incremental parser interface. menhir-20151112/demos/calc-incremental/Makefile0000644000175000017500000000120712621170073020137 0ustar mehdimehdi.PHONY: all clean test # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif # We use the table back-end, and link against menhirLib. # We assume that menhirLib has been installed in such a # way that ocamlfind knows about it. MENHIRFLAGS := --table # -- infer is automatically added by ocamlbuild. OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" -package menhirLib MAIN := calc all: $(OCAMLBUILD) $(MAIN).native clean: rm -f *~ .*~ $(OCAMLBUILD) -clean test: all @echo "The following command should print 42:" echo "(1 + 2 * 10) * 2" | ./$(MAIN).native menhir-20151112/demos/calc-incremental/lexer.mll0000644000175000017500000000200612621170073020322 0ustar mehdimehdi{ open Parser exception Error of string } (* This rule looks for a single line, terminated with '\n' or eof. It returns a pair of an optional string (the line that was found) and a Boolean flag (false if eof was reached). *) rule line = parse | ([^'\n']* '\n') as line (* Normal case: one line, no eof. *) { Some line, true } | eof (* Normal case: no data, eof. *) { None, false } | ([^'\n']+ as line) eof (* Special case: some data but missing '\n', then eof. Consider this as the last line, and add the missing '\n'. *) { Some (line ^ "\n"), false } (* This rule analyzes a single line and turns it into a stream of tokens. *) and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20151112/demos/calc-incremental/parser.mly0000644000175000017500000000105412621170073020516 0ustar mehdimehdi%token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %start main %% main: | e = expr EOL { e } expr: | i = INT { i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20151112/demos/calc-incremental/calc.ml0000644000175000017500000000611012621170073017731 0ustar mehdimehdiopen Lexing (* A short name for the incremental parser API. *) module I = Parser.MenhirInterpreter (* -------------------------------------------------------------------------- *) (* The loop which drives the parser. At each iteration, we analyze a checkpoint produced by the parser, and act in an appropriate manner. [lexbuf] is the lexing buffer. [checkpoint] is the last checkpoint produced by the parser. *) let rec loop lexbuf (checkpoint : int I.checkpoint) = match checkpoint with | I.InputNeeded env -> (* The parser needs a token. Request one from the lexer, and offer it to the parser, which will produce a new checkpoint. Then, repeat. *) let token = Lexer.token lexbuf in let startp = lexbuf.lex_start_p and endp = lexbuf.lex_curr_p in let checkpoint = I.offer checkpoint (token, startp, endp) in loop lexbuf checkpoint | I.Shifting _ | I.AboutToReduce _ -> let checkpoint = I.resume checkpoint in loop lexbuf checkpoint | I.HandlingError env -> (* The parser has suspended itself because of a syntax error. Stop. *) Printf.fprintf stderr "At offset %d: syntax error.\n%!" (lexeme_start lexbuf) | I.Accepted v -> (* The parser has succeeded and produced a semantic value. Print it. *) Printf.printf "%d\n%!" v | I.Rejected -> (* The parser rejects this input. This cannot happen, here, because we stop as soon as the parser reports [HandlingError]. *) assert false (* -------------------------------------------------------------------------- *) (* The above loop is shown for explanatory purposes, but can in fact be replaced with the following code, which exploits the functions [lexer_lexbuf_to_supplier] and [loop_handle] offered by Menhir. *) let succeed (v : int) = (* The parser has succeeded and produced a semantic value. Print it. *) Printf.printf "%d\n%!" v let fail lexbuf (_ : int I.checkpoint) = (* The parser has suspended itself because of a syntax error. Stop. *) Printf.fprintf stderr "At offset %d: syntax error.\n%!" (lexeme_start lexbuf) let loop lexbuf result = let supplier = I.lexer_lexbuf_to_supplier Lexer.token lexbuf in I.loop_handle succeed (fail lexbuf) supplier result (* -------------------------------------------------------------------------- *) (* Initialize the lexer, and catch any exception raised by the lexer. *) let process (line : string) = let lexbuf = from_string line in try loop lexbuf (Parser.Incremental.main lexbuf.lex_curr_p) with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg (* -------------------------------------------------------------------------- *) (* The rest of the code is as in the [calc] demo. *) let process (optional_line : string option) = match optional_line with | None -> () | Some line -> process line let rec repeat channel = (* Attempt to read one line. *) let optional_line, continue = Lexer.line channel in process optional_line; if continue then repeat channel let () = repeat (from_channel stdin) menhir-20151112/demos/calc/0000755000175000017500000000000012646770154014215 5ustar mehdimehdimenhir-20151112/demos/calc/README0000644000175000017500000000063612621170073015065 0ustar mehdimehdiThis tiny program reads arithmetic expressions from the standard input channel. Each expression is expected to be complete when the current line ends. Its value is then displayed on the standard output channel. This code is adapted from ocamlyacc's documentation. We compile the parser using Menhir's code back-end. For an example of using Menhir's table back-end, see the calc-incremental/ and calc-inspection/. menhir-20151112/demos/calc/Makefile0000644000175000017500000000065412621170073015645 0ustar mehdimehdi.PHONY: all clean test # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif MENHIRFLAGS := --infer OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" MAIN := calc all: $(OCAMLBUILD) $(MAIN).native clean: rm -f *~ .*~ $(OCAMLBUILD) -clean test: all @echo "The following command should print 42:" echo "(1 + 2 * 10) * 2" | ./$(MAIN).native menhir-20151112/demos/calc/lexer.mll0000644000175000017500000000200612621170073016023 0ustar mehdimehdi{ open Parser exception Error of string } (* This rule looks for a single line, terminated with '\n' or eof. It returns a pair of an optional string (the line that was found) and a Boolean flag (false if eof was reached). *) rule line = parse | ([^'\n']* '\n') as line (* Normal case: one line, no eof. *) { Some line, true } | eof (* Normal case: no data, eof. *) { None, false } | ([^'\n']+ as line) eof (* Special case: some data but missing '\n', then eof. Consider this as the last line, and add the missing '\n'. *) { Some (line ^ "\n"), false } (* This rule analyzes a single line and turns it into a stream of tokens. *) and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20151112/demos/calc/parser.mly0000644000175000017500000000105412621170073016217 0ustar mehdimehdi%token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %start main %% main: | e = expr EOL { e } expr: | i = INT { i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20151112/demos/calc/calc.ml0000644000175000017500000000132712621170073015437 0ustar mehdimehdilet process (line : string) = let linebuf = Lexing.from_string line in try (* Run the parser on this line of input. *) Printf.printf "%d\n%!" (Parser.main Lexer.token linebuf) with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg | Parser.Error -> Printf.fprintf stderr "At offset %d: syntax error.\n%!" (Lexing.lexeme_start linebuf) let process (optional_line : string option) = match optional_line with | None -> () | Some line -> process line let rec repeat channel = (* Attempt to read one line. *) let optional_line, continue = Lexer.line channel in process optional_line; if continue then repeat channel let () = repeat (Lexing.from_channel stdin) menhir-20151112/demos/calc-inspection/0000755000175000017500000000000012646770154016366 5ustar mehdimehdimenhir-20151112/demos/calc-inspection/CalcErrorReporting.ml0000644000175000017500000000120012621170073022442 0ustar mehdimehdiopen Parser open Parser.MenhirInterpreter (* In order to submit artificial tokens to the parser, we need a function that converts a terminal symbol to a (dummy) token. Unfortunately, we cannot (in general) auto-generate this code, because it requires making up semantic values of arbitrary OCaml types. *) let terminal2token (type a) (symbol : a terminal) : token = match symbol with | T_TIMES -> TIMES | T_RPAREN -> RPAREN | T_PLUS -> PLUS | T_MINUS -> MINUS | T_LPAREN -> LPAREN | T_INT -> INT 0 | T_EOL -> EOL | T_DIV -> DIV | T_error -> assert false menhir-20151112/demos/calc-inspection/README0000644000175000017500000000024312621170073017230 0ustar mehdimehdiThis variant of the calc demo uses Menhir with the --table and --inspection options. It illustrates how to inspect the intermediate states produced by the parser. menhir-20151112/demos/calc-inspection/Makefile0000644000175000017500000000155512621170073020017 0ustar mehdimehdi.PHONY: all clean test # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif # We use the table back-end, and link against menhirLib. # We assume that menhirLib has been installed in such a # way that ocamlfind knows about it. MENHIRFLAGS := --table --inspection -v -la 2 # -- infer is automatically added by ocamlbuild. OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" -package menhirLib MAIN := calc all: $(OCAMLBUILD) $(MAIN).native clean: rm -f *~ .*~ $(OCAMLBUILD) -clean test: all @echo "The following command should print 42:" echo "(1 + 2 * 10) * 2" | ./$(MAIN).native @echo "The following command should print an error message:" echo "(1 + 2 * 10) * )" | ./$(MAIN).native @echo "The following command should print an error message:" echo "(1 + 2 * 10))" | ./$(MAIN).native menhir-20151112/demos/calc-inspection/CalcPrinters.mli0000644000175000017500000000035212621170073021445 0ustar mehdimehdiopen Parser.MenhirInterpreter (* This module offers the functionality required by the functor [MenhirLib.Printers.Make]. *) val print: string -> unit val print_symbol: xsymbol -> unit val print_element: (element -> unit) option menhir-20151112/demos/calc-inspection/lexer.mll0000644000175000017500000000200612621170073020174 0ustar mehdimehdi{ open Parser exception Error of string } (* This rule looks for a single line, terminated with '\n' or eof. It returns a pair of an optional string (the line that was found) and a Boolean flag (false if eof was reached). *) rule line = parse | ([^'\n']* '\n') as line (* Normal case: one line, no eof. *) { Some line, true } | eof (* Normal case: no data, eof. *) { None, false } | ([^'\n']+ as line) eof (* Special case: some data but missing '\n', then eof. Consider this as the last line, and add the missing '\n'. *) { Some (line ^ "\n"), false } (* This rule analyzes a single line and turns it into a stream of tokens. *) and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20151112/demos/calc-inspection/parser.mly0000644000175000017500000000105412621170073020370 0ustar mehdimehdi%token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %start main %% main: | e = expr EOL { e } expr: | i = INT { i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20151112/demos/calc-inspection/ErrorReporting.mli0000644000175000017500000000574512621170073022052 0ustar mehdimehdi(* This module needs cleaning up. It is supposed to automatically produce a syntax error message, based on the current state and stack. *) module Make (I : MenhirLib.IncrementalEngine.EVERYTHING) (User : sig (* In order to submit artificial tokens to the parser, we need a function that converts a terminal symbol to a token. Unfortunately, we cannot (in general) auto-generate this code, because it requires making up semantic values of arbitrary OCaml types. *) val terminal2token: _ I.terminal -> I.token end) : sig open I (* An explanation is a description of what the parser has recognized in the recent past and what it expects next. More precisely, an explanation is an LR(0) item, enriched with positions. Indeed, the past (the first half of the item's right-hand side, up to the bullet) corresponds to a part of the input that has been read already, so it can be annotated with positions. *) type explanation (* The LR(0) item. *) val item: explanation -> item (* The past. This is a non-empty sequence of (terminal and non-terminal) symbols, each of which corresponds to a range of the input file. These symbols correspond to the first half (up to the bullet) of the item's right-hand side. In short, they represent what (we think) we have recognized in the recent past. *) (* It is worth noting that, when an error occurs, we produce multiple explanations, which may have different pasts. Indeed, not only may these pasts have different lengths (one may be a suffix of another), but two pasts can in fact be incomparable. Indeed, different choices of the lookahead token may cause different reductions, hence different interpretations of what has been read in the past. *) val past: explanation -> (xsymbol * Lexing.position * Lexing.position) list (* The future. This is a non-empty sequence of (terminal and non-terminal) symbols. These symbols correspond to the second half (after the bullet) of the item's right-hand side. In short, they represent what we expect to recognize in the future, if this item is a good prediction. *) (* This information can be computed from [item]. This function is provided only for convenience. *) val future: explanation -> xsymbol list (* A goal. This is a non-terminal symbol. It is the item's left-hand side. In short, it represents the reduction that we will be able to perform if we successfully recognize this future. *) (* This information can be computed from [item]. This function is provided only for convenience. *) val goal: explanation -> xsymbol (* TEMPORARY *) (* We build lists of explanations. These explanations may originate in distinct LR(1) states. They may have different pasts, because *) exception Error of (Lexing.position * Lexing.position) * explanation list (* TEMPORARY *) val entry: 'a I.checkpoint -> (Lexing.lexbuf -> token) -> Lexing.lexbuf -> 'a end menhir-20151112/demos/calc-inspection/ErrorReporting.ml0000644000175000017500000001515712621170073021677 0ustar mehdimehdimodule Make (I : MenhirLib.IncrementalEngine.EVERYTHING) (User : sig (* In order to submit artificial tokens to the parser, we need a function that converts a terminal symbol to a token. Unfortunately, we cannot (in general) auto-generate this code, because it requires making up semantic values of arbitrary OCaml types. *) val terminal2token: _ I.terminal -> I.token end) = struct open MenhirLib.General open I open User (* ------------------------------------------------------------------------ *) (* Explanations. *) type explanation = { item: item; past: (xsymbol * Lexing.position * Lexing.position) list } let item explanation = explanation.item let past explanation = explanation.past let future explanation = let prod, index = explanation.item in let rhs = rhs prod in drop index rhs let goal explanation = let prod, _ = explanation.item in lhs prod (* ------------------------------------------------------------------------ *) (* [items_current env] assumes that [env] is not an initial state (which implies that the stack is non-empty). Under this assumption, it extracts the automaton's current state, i.e., the LR(1) state found in the top stack cell. It then goes through [items] so as to obtain the LR(0) items associated with this state. *) let items_current env : item list = (* Get the current state. *) match Lazy.force (stack env) with | Nil -> (* If we get here, then the stack is empty, which means the parser is in an initial state. This should not happen. *) invalid_arg "items_current" (* TEMPORARY it DOES happen! *) | Cons (Element (current, _, _, _), _) -> (* Extract the current state out of the top stack element, and convert it to a set of LR(0) items. Returning a set of items instead of an ['a lr1state] is convenient; returning [current] would require wrapping it in an existential type. *) items current (* [is_shift_item t item] determines whether [item] justifies a shift transition along the terminal symbol [t]. *) let is_shift_item (t : _ terminal) (prod, index) : bool = let rhs = rhs prod in let length = List.length rhs in assert (0 < index && index <= length); (* We test that there is one symbol after the bullet and this symbol is [t] or can generate a word that begins with [t]. (Note that we don't need to worry about the case where this symbol is nullable and [t] is generated by the following symbol. In that situation, we would have to reduce before we can shift [t].) *) index < length && xfirst (List.nth rhs index) t let compare_explanations x1 x2 = let c = compare_items x1.item x2.item in (* TEMPORARY checking that if [c] is 0 then the positions are the same *) assert ( c <> 0 || List.for_all2 (fun (_, start1, end1) (_, start2, end2) -> start1.Lexing.pos_cnum = start2.Lexing.pos_cnum && end1.Lexing.pos_cnum = end2.Lexing.pos_cnum ) x1.past x2.past ); c (* [marry past stack] TEMPORARY comment *) let rec marry past stack = match past, stack with | [], _ -> [] | symbol :: past, lazy (Cons (Element (s, _, startp, endp), stack)) -> assert (compare_symbols symbol (X (incoming_symbol s)) = 0); (symbol, startp, endp) :: marry past stack | _ :: _, lazy Nil -> assert false (* [accumulate t env explanations] is called if the parser decides to shift the test token [t]. The parameter [env] describes the parser configuration before it shifts this token. (Some reductions have taken place.) We use the shift items found in [env] to produce new explanations. *) let accumulate (t : _ terminal) env explanations = (* The parser is about to shift, which means it is willing to consume the terminal symbol [t]. In the state before the transition, look at the items that justify shifting [t]. We view these items as explanations: they explain what we have read and what we expect to read. *) let stack = stack env in List.fold_left (fun explanations item -> if is_shift_item t item then let prod, index = item in let rhs = rhs prod in { item = item; past = List.rev (marry (List.rev (take index rhs)) stack) } :: explanations else explanations ) explanations (items_current env) (* TEMPORARY [env] may be an initial state! violating [item_current]'s precondition *) (* [investigate pos checkpoint] assumes that [checkpoint] is of the form [InputNeeded _]. For every terminal symbol [t], it investigates how the parser reacts when fed the symbol [t], and returns a list of explanations. The position [pos] is where a syntax error was detected; it is used when manufacturing dummy tokens. This is important because the position of the dummy token may end up in the explanations that we produce. *) let investigate pos (checkpoint : _ checkpoint) : explanation list = weed compare_explanations ( foreach_terminal_but_error (fun symbol explanations -> match symbol with | X (N _) -> assert false | X (T t) -> (* Build a dummy token for the terminal symbol [t]. *) let token = (terminal2token t, pos, pos) in (* Submit it to the parser. Accumulate explanations. *) let checkpoint = offer checkpoint token in I.loop_test (accumulate t) checkpoint explanations ) [] ) (* We drive the parser in the usual way, but records the last [InputNeeded] checkpoint. If a syntax error is detected, we go back to this checkpoint and analyze it in order to produce a meaningful diagnostic. *) exception Error of (Lexing.position * Lexing.position) * explanation list let entry (start : 'a I.checkpoint) lexer lexbuf = let fail (inputneeded : 'a I.checkpoint) (checkpoint : 'a I.checkpoint) = (* The parser signals a syntax error. Note the position of the problematic token, which is useful. Then, go back to the last [InputNeeded] checkpoint and investigate. *) match checkpoint with | HandlingError env -> let (startp, _) as positions = positions env in raise (Error (positions, investigate startp inputneeded)) | _ -> assert false in I.loop_handle_undo (fun v -> v) fail (lexer_lexbuf_to_supplier lexer lexbuf) start (* TEMPORARY could also publish a list of the terminal symbols that do not cause an error *) end menhir-20151112/demos/calc-inspection/CalcPrinters.ml0000644000175000017500000000257612621170073021306 0ustar mehdimehdiopen Parser.MenhirInterpreter (* In order to print syntax error messages and/or debugging information, we need a symbol printer. *) let print_symbol symbol : string = match symbol with | X (T T_TIMES) -> "*" | X (T T_RPAREN) -> ")" | X (T T_PLUS) -> "+" | X (T T_MINUS) -> "-" | X (T T_LPAREN) -> "(" | X (T T_INT) -> "INT" | X (N N_expr) -> "expr" | X (N N_main) -> "main" | X (T T_EOL) -> "EOL" | X (T T_DIV) -> "/" | X (T T_error) -> "error" (* In order to print a view of the stack that includes semantic values, we need an element printer. (If we don't need this feature, then [print_symbol] above suffices.) *) let print_element e : string = match e with | Element (s, v, _, _) -> match incoming_symbol s with | T T_TIMES -> "*" | T T_RPAREN -> ")" | T T_PLUS -> "+" | T T_MINUS -> "-" | T T_LPAREN -> "(" | T T_INT -> string_of_int v | N N_expr -> string_of_int v | N N_main -> string_of_int v | T T_EOL -> "" | T T_DIV -> "/" | T T_error -> "error" (* The public functions. *) let print = prerr_string let print_symbol s = print (print_symbol s) let print_element = Some (fun s -> print (print_element s)) menhir-20151112/demos/calc-inspection/calc.ml0000644000175000017500000000335512621170073017613 0ustar mehdimehdiopen Lexing open MenhirLib.General open Parser.MenhirInterpreter (* Instantiate [MenhirLib.Printers] for our parser. This requires providing a few printing functions -- see [CalcPrinters]. *) module P = MenhirLib.Printers.Make (Parser.MenhirInterpreter) (CalcPrinters) (* Instantiate [ErrorReporting] for our parser. This requires providing a few functions -- see [CalcErrorReporting]. *) module E = ErrorReporting.Make (Parser.MenhirInterpreter) (CalcErrorReporting) (* Define a printer for explanations. We treat an explanation as if it were just an item: that is, we ignore the position information that is provided in the explanation. Indeed, this information is hard to show in text mode. *) let print_explanation explanation = P.print_item (E.item explanation) let print_explanations startp explanations = Printf.fprintf stderr "At line %d, column %d: syntax error.\n" startp.pos_lnum startp.pos_cnum; List.iter print_explanation explanations; flush stderr (* The rest of the code is as in the [calc] demo. *) let process (line : string) = let lexbuf = from_string line in try let v = E.entry (Parser.Incremental.main lexbuf.lex_curr_p) Lexer.token lexbuf in Printf.printf "%d\n%!" v with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg | E.Error ((startp, _), explanations) -> print_explanations startp explanations let process (optional_line : string option) = match optional_line with | None -> () | Some line -> process line let rec repeat channel = (* Attempt to read one line. *) let optional_line, continue = Lexer.line channel in process optional_line; if continue then repeat channel let () = repeat (from_channel stdin) menhir-20151112/demos/calc-inspection/CalcErrorReporting.mli0000644000175000017500000000025712621170073022626 0ustar mehdimehdiopen Parser.MenhirInterpreter (* This module offers the functionality required by the functor [ErrorReporting.Printers.Make]. *) val terminal2token: _ terminal -> token menhir-20151112/LICENSE0000644000175000017500000007615412621170073013211 0ustar mehdimehdiIn the following, "the Library" refers to the following files: 1- the file src/standard.mly; 2- the OCaml source files whose basename appears in src/menhirLib.mlpack and whose extension is ".ml" or ".mli". "The Generator" refers to all other files in this archive. The Generator is distributed under the terms of the Q Public License version 1.0 with a change to choice of law (included below). The Library is distributed under the terms of the GNU Library General Public License version 2 (included below). As a special exception to the Q Public Licence, you may develop application programs, reusable components and other software items that link with the original or modified versions of the Generator and are not made available to the general public, without any of the additional requirements listed in clause 6c of the Q Public licence. As a special exception to the GNU Library General Public License, you may link, statically or dynamically, a "work that uses the Library" with a publicly distributed version of the Library to produce an executable file containing portions of the Library, and distribute that executable file under terms of your choice, without any of the additional requirements listed in clause 6 of the GNU Library General Public License. By "a publicly distributed version of the Library", we mean either the unmodified Library as distributed by INRIA, or a modified version of the Library that is distributed under the conditions defined in clause 3 of the GNU Library General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU Library General Public License. ---------------------------------------------------------------------- THE Q PUBLIC LICENSE version 1.0 Copyright (C) 1999 Troll Tech AS, Norway. Everyone is permitted to copy and distribute this license document. The intent of this license is to establish freedom to share and change the software regulated by this license under the open source model. This license applies to any software containing a notice placed by the copyright holder saying that it may be distributed under the terms of the Q Public License version 1.0. Such software is herein referred to as the Software. This license covers modification and distribution of the Software, use of third-party application programs based on the Software, and development of free software which uses the Software. Granted Rights 1. You are granted the non-exclusive rights set forth in this license provided you agree to and comply with any and all conditions in this license. Whole or partial distribution of the Software, or software items that link with the Software, in any form signifies acceptance of this license. 2. You may copy and distribute the Software in unmodified form provided that the entire package, including - but not restricted to - copyright, trademark notices and disclaimers, as released by the initial developer of the Software, is distributed. 3. You may make modifications to the Software and distribute your modifications, in a form that is separate from the Software, such as patches. The following restrictions apply to modifications: a. Modifications must not alter or remove any copyright notices in the Software. b. When modifications to the Software are released under this license, a non-exclusive royalty-free right is granted to the initial developer of the Software to distribute your modification in future versions of the Software provided such versions remain available under these terms in addition to any other license(s) of the initial developer. 4. You may distribute machine-executable forms of the Software or machine-executable forms of modified versions of the Software, provided that you meet these restrictions: a. You must include this license document in the distribution. b. You must ensure that all recipients of the machine-executable forms are also able to receive the complete machine-readable source code to the distributed Software, including all modifications, without any charge beyond the costs of data transfer, and place prominent notices in the distribution explaining this. c. You must ensure that all modifications included in the machine-executable forms are available under the terms of this license. 5. You may use the original or modified versions of the Software to compile, link and run application programs legally developed by you or by others. 6. You may develop application programs, reusable components and other software items that link with the original or modified versions of the Software. These items, when distributed, are subject to the following requirements: a. You must ensure that all recipients of machine-executable forms of these items are also able to receive and use the complete machine-readable source code to the items without any charge beyond the costs of data transfer. b. You must explicitly license all recipients of your items to use and re-distribute original and modified versions of the items in both machine-executable and source code forms. The recipients must be able to do so without any charges whatsoever, and they must be able to re-distribute to anyone they choose. c. If the items are not available to the general public, and the initial developer of the Software requests a copy of the items, then you must supply one. Limitations of Liability In no event shall the initial developers or copyright holders be liable for any damages whatsoever, including - but not restricted to - lost revenue or profits or other direct, indirect, special, incidental or consequential damages, even if they have been advised of the possibility of such damages, except to the extent invariable law, if any, provides otherwise. No Warranty The Software and this license document are provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Choice of Law This license is governed by the Laws of France. ---------------------------------------------------------------------- GNU LIBRARY GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1991 Free Software Foundation, Inc. 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the library GPL. It is numbered 2 because it goes with version 2 of the ordinary GPL.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Library General Public License, applies to some specially designated Free Software Foundation software, and to any other libraries whose authors decide to use it. You can use it for your libraries, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library, or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link a program with the library, you must provide complete object files to the recipients so that they can relink them with the library, after making changes to the library and recompiling it. And you must show them these terms so they know their rights. Our method of protecting your rights has two steps: (1) copyright the library, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the library. Also, for each distributor's protection, we want to make certain that everyone understands that there is no warranty for this free library. If the library is modified by someone else and passed on, we want its recipients to know that what they have is not the original version, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that companies distributing free software will individually obtain patent licenses, thus in effect transforming the program into proprietary software. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License, which was designed for utility programs. This license, the GNU Library General Public License, applies to certain designated libraries. This license is quite different from the ordinary one; be sure to read it in full, and don't assume that anything in it is the same as in the ordinary license. The reason we have a separate public license for some libraries is that they blur the distinction we usually make between modifying or adding to a program and simply using it. Linking a program with a library, without changing the library, is in some sense simply using the library, and is analogous to running a utility program or application program. However, in a textual and legal sense, the linked executable is a combined work, a derivative of the original library, and the ordinary General Public License treats it as such. Because of this blurred distinction, using the ordinary General Public License for libraries did not effectively promote software sharing, because most developers did not use the libraries. We concluded that weaker conditions might promote sharing better. However, unrestricted linking of non-free programs would deprive the users of those programs of all benefit from the free status of the libraries themselves. This Library General Public License is intended to permit developers of non-free programs to use free libraries, while preserving your freedom as a user of such programs to change the free libraries that are incorporated in them. (We have not seen how to achieve this as regards changes in header files, but we have achieved it as regards changes in the actual functions of the Library.) The hope is that this will lead to faster development of free libraries. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, while the latter only works together with the library. Note that it is possible for a library to be covered by the ordinary General Public License rather than by this special one. GNU LIBRARY GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Library General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also compile or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. c) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. d) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Library General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS Appendix: How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! menhir-20151112/INSTALLATION0000644000175000017500000000212212621170073014010 0ustar mehdimehdiREQUIREMENTS You need Objective Caml 4.02 or later, ocamlbuild, and GNU make. HOW TO INSTALL If you wish to install via ocamlfind, make sure that ocamlfind is in your PATH. (Remember that prefixing a command with sudo affects its PATH.) Run the following commands: make PREFIX=/usr/local all make PREFIX=/usr/local install If your machine does not have the native code Objective Caml compiler (ocamlopt), but does have the bytecode compiler (ocamlc), then instead of the above command, use: make PREFIX=/usr/local TARGET=byte all make PREFIX=/usr/local TARGET=byte install The value of the PREFIX variable can be changed to control where the software, the standard library, and the documentation should be stored. These files are copied to the following places: $PREFIX/bin/ $PREFIX/share/menhir/ $PREFIX/doc/menhir/ The support library, MenhirLib, is either installed via ocamlfind, if available, or placed within $PREFIX/share/menhir. Menhir's --suggest options help determine where and how it was installed. The documentation includes a reference manual and a number of demos. menhir-20151112/AUTHORS0000644000175000017500000000020712621170073013236 0ustar mehdimehdiFrançois Pottier Yann Régis-Gianas Jacques-Henri Jourdan (Coq back-end) menhir-20151112/CHANGES0000644000175000017500000003753112621170073013173 0ustar mehdimehdi2015/11/11: Fixed a severe bug in Menhir 20151110 which (when using the code back-end) could cause a generated parser to crash. Thanks to ygrek for reporting the bug. 2015/11/11: The code produced by version XXXXXXXX of menhir --table can now be linked only against a matching version of MenhirLib. If an incorrect version of MenhirLib is installed, the OCaml compiler should complain that [MenhirLib.StaticVersion.require_XXXXXXXX] is undefined. 2015/11/10: Optimized the computation of $symbolstartpos, based on a couple of assumptions about the lexer. (See the manual.) 2015/11/04: Modified the treatment of %inline so that the positions that are computed are the same, regardless of whether %inline is used. This property did not hold until now. It now does. Of course, this means that the positions computed by the new Menhir are not the same as those computed by older versions of Menhir. 2015/11/04: Fixed a bug in the treatment of %inline that would lead to an incorrect position being computed when the caller and callee had a variable by the same name. 2015/11/04: Modified Menhir so as to compute the start and end positions in the exact same way as ocamlyacc. (There used to be a difference in the treatment of epsilon productions.) Of course, this means that the positions computed by the new Menhir are not the same as those computed by older versions of Menhir. Added the keyword $symbolstartpos so as to simulate Parsing.symbol_start_pos() in the ocamlyacc world. The keyword $startpos sometimes produces a position that is too far off to the left; $symbolstartpos produces a more accurate position. 2015/11/04: Incompatible change of the incremental API: instead of a unit argument, the entry points (which are named after the start symbols) now require an initial position, which typically should be [lexbuf.lex_curr_p]. 2015/11/03: Fix-fix-and-re-fix the Makefile in an attempt to allow installation under opam/Windows. Thanks to Daniel Weil for patient explanations and testing. 2015/10/29: MenhirLib is now installed in both binary and source forms. "menhir --suggest-menhirLib" reports where MenhirLib is installed. This can be used to retrieve a snapshot of MenhirLib in source form and include it in your project (if you wish to use --table mode, yet do not wish to have a dependency on MenhirLib). 2015/10/26: Allow --list-errors to work on 32-bit machines (with low hard limits). This should fix a problem whereby the 2015/10/23 release could not bootstrap on a 32-bit machine. 2015/10/23: New declaration "%on_error_reduce foo", where foo is a nonterminal symbol. This modifies the automaton as follows. In every state where a production of the form "foo -> ..." is ready to be reduced, every error action is replaced with a reduction of this production. (If there is a conflict between several productions that could be reduced in this manner, nothing is done.) This does not affect the language that is accepted by the automaton, but delays the detection of an error: more reductions take place before the error is detected. 2015/10/23: Fixed a bug whereby Menhir would warn about a useless %prec declaration, even though it was useful. This would happen when the declaration was duplicated (by inlining or by macro-expansion) and some but not all of the copies were useful. 2015/10/23: Added [has_default_reduction] to the incremental API. 2015/10/23: Modified the meaning of --canonical to allow default reductions to take place. This implies no loss of precision in terms of lookahead sets, and should allow gaining more contextual information when a syntax error is encountered. (It should also lead to a smaller automaton.) 2015/10/23: A brand new set of tools to work on syntax errors. New command --list-errors, which produces a list of input sentences which are representative of all possible syntax errors. (Costly.) New command --interpret-error, which confirms that one particular input sentence ends in a syntax error, and prints the number of the state in which this error occurs. New command --compile-errors, which compiles a list of erroneous sentences (together with error messages) to OCaml code. New command --compare-errors, which compares two lists of erroneous sentences to check if they cover the same error states. New command --update-errors, which updates the auto-generated comments in a list of erroneous sentences. New command --echo-errors, which removes all comments and messages from a list of erroneous sentences, and echoes just the sentences. 2015/10/16: Additions to the incremental API. A [supplier] is a function that produces tokens on demand. [lexer_lexbuf_to_supplier] turns a lexer and a lexbuf into a supplier. [loop] is a ready-made made main parsing loop. [loop_handle] is a variant that lets the user do her own error handling. [loop_handle_undo] is a variant that additionally allows undoing the last few "spurious" reductions. [number] maps a state of the LR(1) automaton to its number. 2015/10/16: Incompatible change of the incremental API: renamed the type ['a result] to ['a checkpoint]. This is a better name anyway, and should help avoid confusion with the type ['a result] introduced in OCaml 4.03. 2015/10/12: Avoid using $(shell pwd) in Makefile, for better Windows compatibility. 2015/10/05: Fixed a bug where inconsistent OCaml code was generated when --table and --external-tokens were used together. (Reported by Darin Morrison.) 2015/10/05: In --infer mode, leave the .ml file around (instead of removing it) if ocamlc fails, so we have a chance to understand what's wrong. 2015/09/21: Re-established some error messages concerning the mis-use of $i which had disappeared on 2015/06/29. 2015/09/11: Fixed the mysterious message that would appear when a nonterminal symbol begins with an uppercase letter and --infer is turned on. Clarified the documentation to indicate that a (non-start) nonterminal symbol can begin with an uppercase letter, but this is not recommended. 2015/08/27: New option --inspection (added last January, documented only now). This generates an inspection API which allows inspecting the automaton's stack, among other things. This API can in principle be used to write custom code for error reporting, error recovery, etc. It is not yet mature and may change in the future. 2015/07/20: Added the command line options --unused-token and --unused-tokens. 2015/06/29: Changed the treatment of the positional keywords $i. They are now rewritten into variables of the form '_i' where 'i' is an integer. Users are advised not to use variables of this form inside semantic actions. 2015/02/11: Added support for anonymous rules. This allows writing, e.g., list(e = expression SEMI { e }) whereas previously one should have written list(terminated(e, SEMI)). 2015/02/09: Moved all of the demos to ocamlbuild (instead of make). 2015/01/18: Incompatible change of the incremental API. The incremental API now exposes shift events too. 2015/01/16: Fixed a couple bugs in Makefile and src/Makefile which would cause compilation and installation to fail with "TARGET=byte". (Reported by Jérémie Courrèges-Anglas and Daniel Dickman.) 2015/01/01: Incompatible change of the incremental API. The entry point main_incremental is now named Incremental.main. 2014/12/29: Incompatible change of the incremental API. The API now exposes reduction events. The type 'a result is now private. The type env is no longer parameterized. [handle] is renamed to [resume]. [offer] and [resume] now expect a result, not an environment. 2014/12/22: Documented the Coq back-end (designed and implemented by Jacques-Henri Jourdan). 2014/12/15: New incremental API (in --table mode only), inspired by Frédéric Bour. 2014/12/11: Menhir now reports an error if one of the start symbols produces either the empty language or the singleton language {epsilon}. Although some people out there actually define a start symbol that recognizes {epsilon} (and use it as a way of initializing or re-initializing some global state), this is considered bad style. Furthermore, by ruling out this case, we are able to simplify the table back-end a little bit. 2014/12/12: A speed improvement in the code back-end. 2014/12/08: Menhir now requires OCaml 4.02 (instead of 3.09). 2014/12/02: Removed support for the $previouserror keyword. Removed support for --error-recovery mode. 2014/02/18: In the Coq backend, use ' instead of _ as separator in identifiers. Also, correct a serious bug that was inadvertently introduced on 2013/03/01 (r319). 2014/02/14 Lexer fix so as to support an open variant type [> ...] within a %type<...> declaration. 2013/12/16 Updated the Makefile so that install no longer depends on all. Updated the demos so that the lexer does not invoke "exit 0" when encoutering eof. (This should be more intuitive.) 2013/09/11: Fixed a newline conversion problem that would prevent Menhir from building on Windows when using ocaml 4.01. 2013/03/02: Switched to ocamlbuild. Many thanks to Daniel Weil for offering very useful guidance. 2013/01/16: "menhir --depend" was broken since someone added new whitespace in the output of ocamldep. Fixed. 2012/12/19: Fixed a compilation problem that would arise when a file produced by Menhir on a 64-bit platform was compiled by ocaml on a 32-bit platform. 2012/08/25: Performance improvements in the computation of various information about the automaton (module [Invariant]). The improvements will be noticeable only for very large automata. 2012/06/07: The option --log-grammar 3 (and above) now causes the FOLLOW sets for *terminal* symbols to be computed and displayed. 2012/05/25: Added the flag --canonical, which causes Menhir to produce a canonical LR(1) automaton in the style of Knuth. This means that no merging of states takes place during the construction of the automaton, and that no default reductions are allowed. 2012/01/23: Fixed a bug whereby a %nonassoc declaration was not respected. This declaration requests that a shift/reduce conflict be reduced in favor of neither shifting nor reducing, that is, a syntax error must occur. However, due to an unforeseen interaction with the "default reduction" mechanism, this declaration was sometimes ignored and reduction would take place. 2012/01/09: Changes in the (undocumented) Coq back-end so as to match the ESOP 2012 paper. 2011/10/19: The Makefile now tests whether Unix or Windows is used (the test is performed by evaluating Sys.os_type under ocaml) and changes a couple settings accordingly: - the executable file name is either menhir or menhir.exe - the object file suffix is either .o or .obj 2011/10/19: Added --strict, which causes many warnings about the grammar and about the automaton to be considered errors. 2011/10/19: The # annotations that are inserted in the generated .ml file now retain their full path. (That is, we no longer use [Filename.basename].) This implies that the # annotations depend on how menhir is invoked -- e.g. "menhir foo/bar.mly" and "cd foo && menhir bar.mly" will produce different results. Nevertheless, this seems reasonable and useful (e.g. in conjunction with ocamlbuild and a hierarchy of files). Thanks to Daniel Weil. 2011/10/06: With the -lg 1 switch, Menhir now indicates whether the grammar is SLR(1). 2011/05/24: Removed the lock in ocamldep.wrapper. It is the responsibility of the user to avoid interferences with other processes (or other instances of the script) that create and/or remove files. 2011/04/28: The (internal) computation of the automaton's invariant was broken and has been fixed. Surprisingly, this does not seem to affect the generated code, (which was correct,) so no observable bug is fixed. Hopefully no bug is introduced! 2011/04/07: The grammar description files (.mly) are now read in up front and stored in memory while they are parsed. This allows us to avoid the use of pos_in and seek_in, which do not work correctly when CRLF conversion is being performed. 2011/04/05: Fixed a bug in the type inference module (for parameterized non-terminals) which would cause an infinite loop. 2011/01/24: Fixed a bug that would cause an assertion failure in the generated parser in some situations where the input stream was incorrect and the grammar involved the error token. The fix might cause grammars that use the error token to behave differently (hopefully more accurately) as of now. 2009/06/18: Makefile changes: build and install only the bytecode version of menhirLib when TARGET=byte is set. 2009/02/06: Fixed ocamldep.wrapper to avoid quoting the name of the ocaml command. This is hoped to fix a compilation problem under MinGW. 2009/02/04: A Makefile fix to avoid a problem under Windows/Cygwin. Renamed the ocaml-check-version script so as to avoid a warning. 2008/09/05: Ocaml summer project: added --interpret, --table, and --suggest-*. 2008/08/06: Fixed a problem that would cause the code inliner to abort when a semantic value and a non-terminal symbol happened to have the same name. 2008/08/06: Removed code sharing. 2008/06/20: Removed an incorrect assertion that caused failures (lr1.ml, line 134). 2007/12/05: Disabled code sharing by default, as it is currently broken. (See Yann's message; assertion failure at runtime.) 2007/12/01: Added an optimization to share code among states that have identical outgoing transition tables. 2007/08/30: Small Makefile change: create an executable file for check-ocaml-version in order to work around the absence of dynamic loading on some platforms. 2007/05/20: Made a fundamental change in the construction of the LR(1) automaton in order to eliminate a bug that could lead to spurious conflicts -- thanks to Ketti for submitting a bug report. 2007/05/18: Added --follow-construction to help understand the construction of the LR(1) automaton (very verbose). 2007/05/11: Code generation: more explicit qualifications with Pervasives so as to avoid capture when the user redefines some of the built-in operators, such as (+). Added a new demo (calc-param) that shows how to use %parameter. 2007/03/22: Makefile improvements (check for PREFIX; bootstrap in bytecode now also available). Slight changes to OMakefile.shared. 2007/02/15: Portability fix in Makefile and Makefile.shared (avoided "which"). 2006/12/15: Portability fix in Makefile.shared (replaced "&>" with "2>&1 >"). 2006/06/23: Made a slight restriction to Pager's criterion so as to never introduce fake conflict tokens (see Lr0.compatible). This might help make conflict explanations more accurate in the future. 2006/06/16: Fixed bug that would cause positions to become invalid after %inlining. 2006/06/15: Fixed --depend to be more lenient when analyzing ocamldep's output. Added --raw-depend which transmits ocamldep's output unchanged (for use in conjunction with omake). 2006/06/12: Fixed bug that would cause --only-preprocess to print %token declarations also for pseudo-tokens. Fixed bug that caused some precedence declarations to be incorrectly reported as useless. Improved things so that useless pseudo-tokens now also cause warnings. Fixed bug that would cause %type directives for terminal symbols to be incorrectly accepted. Fixed bug that would occur when a semantic action containing $i keywords was %inlined. 2006/05/05: Fixed problem that caused some end-of-stream conflicts not to be reported. Fixed Pager's compatibility criterion to avoid creating end-of-stream conflicts. 2006/04/21: Fixed problem that allowed generating incorrect but apparently well-typed Objective Caml code when a semantic action was ill-typed and --infer was omitted. 2006/03/29: Improved conflict reports by factoring out maximal common derivation contexts. 2006/03/28: Fixed bug that could arise when explaining a conflict in a non-LALR(1) grammar. 2006/03/27: Changed count of reduce/reduce conflicts to allow a comparison with ocamlyacc's diagnostics. When refusing to resolve a conflict, report all diagnostics before dying. 2006/03/18: Added display of FOLLOW sets when using --log-grammar 2. Added --graph option. Fixed behavior of --depend option. 2006/01/06: Removed reversed lists from the standard library.