menhir-20171222/0000775000175000017500000000000013217215732013515 5ustar fpottierfpottiermenhir-20171222/demos/0000775000175000017500000000000013217215727014630 5ustar fpottierfpottiermenhir-20171222/demos/calc-two/0000775000175000017500000000000013217215730016333 5ustar fpottierfpottiermenhir-20171222/demos/calc-two/common.mly0000664000175000017500000000027713217215727020362 0ustar fpottierfpottier(* This partial grammar specification defines the grammar's entry point to be an expression, followed with an end-of-line token. *) %start main %% main: | e = expr EOL { e } menhir-20171222/demos/calc-two/.merlin0000664000175000017500000000001113217215727017620 0ustar fpottierfpottierB _build menhir-20171222/demos/calc-two/_tags0000664000175000017500000000027613217215727017366 0ustar fpottierfpottier: only_tokens : external_tokens(Tokens) : external_tokens(Tokens) : unused_token(LPAREN) : unused_token(RPAREN) menhir-20171222/demos/calc-two/README0000664000175000017500000000116713217215727017226 0ustar fpottierfpottierThis tiny program reads arithmetic expressions from the standard input channel. Each expression is expected to be complete when the current line ends. Its value is then displayed on the standard output channel. In this version, there is a single lexer, but there are two parsers, one for expressions in algebraic (that is, infix) notation, one for expressions in reverse Polish (that is, postfix) notation. One of the two parsers is selected at runtime via a command line switch. This demo illustrates how to build two parsers that share a single set of tokens (see tokens.mly) and that share some productions (see common.mly). menhir-20171222/demos/calc-two/Makefile0000664000175000017500000000103413217215727017777 0ustar fpottierfpottier.PHONY: all clean test # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif MENHIRFLAGS := --infer OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" MAIN := calc all: $(OCAMLBUILD) $(MAIN).native clean: rm -f *~ .*~ $(OCAMLBUILD) -clean test: all @echo "The following command should print 42:" echo "(1 + 2 * 10) * 2" | ./$(MAIN).native --algebraic @echo "The following command should print 42:" echo " 1 2 10 * + 2 *" | ./$(MAIN).native --reverse menhir-20171222/demos/calc-two/reverse.mlypack0000664000175000017500000000002613217215727021374 0ustar fpottierfpottierTokens Reverse Common menhir-20171222/demos/calc-two/lexer.mll0000664000175000017500000000200613217215727020164 0ustar fpottierfpottier{ open Tokens exception Error of string } (* This rule looks for a single line, terminated with '\n' or eof. It returns a pair of an optional string (the line that was found) and a Boolean flag (false if eof was reached). *) rule line = parse | ([^'\n']* '\n') as line (* Normal case: one line, no eof. *) { Some line, true } | eof (* Normal case: no data, eof. *) { None, false } | ([^'\n']+ as line) eof (* Special case: some data but missing '\n', then eof. Consider this as the last line, and add the missing '\n'. *) { Some (line ^ "\n"), false } (* This rule analyzes a single line and turns it into a stream of tokens. *) and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20171222/demos/calc-two/reverse.mly0000664000175000017500000000072413217215727020542 0ustar fpottierfpottier(* This partial grammar specification defines the syntax of expressions in reverse Polish notation. Parentheses are meaningless, and unary minus is not supported (some other symbol than MINUS would be required in order to avoid an ambiguity). *) %% %public expr: | i = INT { i } | e1 = expr e2 = expr PLUS { e1 + e2 } | e1 = expr e2 = expr MINUS { e1 - e2 } | e1 = expr e2 = expr TIMES { e1 * e2 } | e1 = expr e2 = expr DIV { e1 / e2 } menhir-20171222/demos/calc-two/tokens.mlypack0000664000175000017500000000000713217215727021223 0ustar fpottierfpottierTokens menhir-20171222/demos/calc-two/tokens.mly0000664000175000017500000000022713217215727020370 0ustar fpottierfpottier(* This partial grammar specification defines the set of tokens. *) %token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %% menhir-20171222/demos/calc-two/myocamlbuild.ml0000664000175000017500000000143013217215727021352 0ustar fpottierfpottieropen Ocamlbuild_plugin open Command (* Define ocamlbuild flags [only_tokens] and [external_tokens(Foo)] and [unused_token(Bar)] which correspond to menhir's [--only-tokens] and [--external-tokens Foo] and [--unused-token Bar]. When they are used, these flags should be passed both to [menhir] and to [menhir --raw-depend]. *) let menhir_flags() = List.iter (fun mode -> flag [ mode; "only_tokens" ] (S[A "--only-tokens"]); pflag [ mode ] "external_tokens" (fun name -> S[A "--external-tokens"; A name] ); pflag [ mode ] "unused_token" (fun name -> S[A "--unused-token"; A name] ) ) [ "menhir"; "menhir_ocamldep" ] let () = dispatch (fun event -> match event with | After_rules -> menhir_flags() | _ -> () ) menhir-20171222/demos/calc-two/calc.ml0000664000175000017500000000212613217215727017576 0ustar fpottierfpottierlet algebraic = ref true let () = Arg.parse [ "--algebraic", Arg.Set algebraic, " Use algebraic (that is, infix) notation"; "--reverse", Arg.Clear algebraic, " Use reverse Polish (that is, postfix) notation"; ] (fun _ -> ()) (Printf.sprintf "Usage: %s " Sys.argv.(0)) let main = if !algebraic then Algebraic.main else Reverse.main let process (line : string) = let linebuf = Lexing.from_string line in try (* Run the parser on this line of input. *) Printf.printf "%d\n%!" (main Lexer.token linebuf) with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg | Algebraic.Error | Reverse.Error -> Printf.fprintf stderr "At offset %d: syntax error.\n%!" (Lexing.lexeme_start linebuf) let process (optional_line : string option) = match optional_line with | None -> () | Some line -> process line let rec repeat channel = (* Attempt to read one line. *) let optional_line, continue = Lexer.line channel in process optional_line; if continue then repeat channel let () = repeat (Lexing.from_channel stdin) menhir-20171222/demos/calc-two/algebraic.mly0000664000175000017500000000103113217215727020770 0ustar fpottierfpottier(* This partial grammar specification defines the syntax of expressions in algebraic notation. *) %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %% %public expr: | i = INT { i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20171222/demos/calc-two/algebraic.mlypack0000664000175000017500000000003013217215727021625 0ustar fpottierfpottierTokens Algebraic Common menhir-20171222/demos/obsolete/0000775000175000017500000000000013217215727016444 5ustar fpottierfpottiermenhir-20171222/demos/obsolete/Makefile.shared0000664000175000017500000002125313217215727021354 0ustar fpottierfpottier# This Makefile is shared between all demos. It is our suggestion # of a canonical Makefile for projects that use Objective Caml, # ocamllex, and menhir. It requires a recent version of GNU make # (older versions do not correctly implement $(eval)). # ---------------------------------------------------------------- # This Makefile is meant to be included within a host Makefile # that defines the following variables: # # GENERATED : a list of the source (.ml and .mli) files # that should be generated by invoking ocamllex # or menhir # # MODULES : a list of the modules (without extension) # that should be linked into the executable # program. Order is significant. # # EXECUTABLE : the base name of the executables that should # be produced. Suffixes $(BSUFFIX) and $(OSUFFIX) # will be added to distinguish the bytecode and # native code versions. # ---------------------------------------------------------------- # The host Makefile can also override the following settings: # Menhir. ifndef MENHIR MENHIR := menhir endif # Parser generation flags. ifndef PGFLAGS PGFLAGS := --infer -v endif # Include directives for compilation and for linking. ifndef INCLUDE INCLUDE := endif # Bytecode compilation flags. ifndef BFLAGS BFLAGS := endif # Native code compilation flags. ifndef OFLAGS OFLAGS := endif # Menhir-suggested compilation flags. ifndef SUGG_FLAGS SUGG_FLAGS := $(shell $(MENHIR) $(PGFLAGS) --suggest-comp-flags 2>/dev/null) endif # Bytecode link-time flags. ifndef BLNKFLAGS BLNKFLAGS := endif # Menhir-suggested bytecode link-time flags. ifndef SUGG_BLNKFLAGS SUGG_BLNKFLAGS := $(shell $(MENHIR) $(PGFLAGS) --suggest-link-flags-byte 2>/dev/null) endif # Native code link-time flags. ifndef OLNKFLAGS OLNKFLAGS := endif # Menhir-suggested native code link-time flags. ifndef SUGG_OLNKFLAGS SUGG_OLNKFLAGS := $(shell $(MENHIR) $(PGFLAGS) --suggest-link-flags-opt 2>/dev/null) endif # Suffix appended to the name of the bytecode executable. ifndef BSUFFIX BSUFFIX := .byte endif # Suffix appended to the name of the native code executable. ifndef OSUFFIX OSUFFIX := endif # Access paths. ifndef OCAML OCAML := ocaml endif ifndef OCAMLC OCAMLC := $(shell if ocamlfind ocamlc -v >/dev/null 2>&1 ; \ then echo ocamlfind ocamlc ; \ elif ocamlc.opt -v >/dev/null 2>&1 ; \ then echo ocamlc.opt ; \ else echo ocamlc ; fi) endif ifndef OCAMLOPT OCAMLOPT := $(shell if ocamlfind ocamlopt -v >/dev/null 2>&1 ; \ then echo ocamlfind ocamlopt ; \ elif ocamlopt.opt -v >/dev/null 2>&1 ; \ then echo ocamlopt.opt ; \ else echo ocamlopt ; fi) endif ifndef OCAMLDEP OCAMLDEP := $(shell if ocamlfind ocamldep -version >/dev/null 2>&1 ; \ then echo ocamlfind ocamldep ; \ elif ocamldep.opt -version >/dev/null 2>&1 ; \ then echo ocamldep.opt ; \ else echo ocamldep ; fi) endif ifndef OCAMLDEPWRAPPER OCAMLDEPWRAPPER:= ./ocamldep.wrapper endif ifndef OCAMLLEX OCAMLLEX := ocamllex endif # A list of targets that do not require dependency analysis. # This variable should be set by the host before including # this Makefile. COLD += clean # ---------------------------------------------------------------- # Do not destroy the generated source files. .SECONDARY: $(GENERATED) # ---------------------------------------------------------------- # Linking. $(EXECUTABLE)$(OSUFFIX): $(MODULES:=.cmx) $(OCAMLOPT) -o $@ $(INCLUDE) $(OLNKFLAGS) $(SUGG_FLAGS) $(SUGG_OLNKFLAGS) $^ $(EXECUTABLE)$(BSUFFIX): $(MODULES:=.cmo) $(OCAMLC) -o $@ $(INCLUDE) $(BLNKFLAGS) $(SUGG_FLAGS) $(SUGG_BLNKFLAGS) $^ # ---------------------------------------------------------------- # Compiling. # We make the .ml and .mli files generated by ocamllex and menhir # unwritable, so as to prevent them from being edited by mistake. %.cmi: %.mli %.mli.d $(OCAMLC) $(INCLUDE) $(BFLAGS) $(SUGG_FLAGS) -c $< %.cmo: %.ml %.ml.d $(OCAMLC) $(INCLUDE) $(BFLAGS) $(SUGG_FLAGS) -c $< %.cmx %.o: %.ml %.ml.d $(OCAMLOPT) $(INCLUDE) $(OFLAGS) $(SUGG_FLAGS) -c $< %.ml: %.mll @if [ -f $@ ] ; then /bin/chmod +w $@ ; fi $(OCAMLLEX) $< @/bin/chmod -w $@ # ---------------------------------------------------------------- # Computing dependencies. # We associate a tiny Makefile, whose name ends in .d, with every # source file; it contains dependency information only. For an .ml or # .mli file, we create an .ml.d or .mli.d file by invoking ocamldep. # For an .mll file, we create an .ml.d file by invoking ocamllex first # (this is implicit), then ocamldep. # When it finds a reference to module M, ocamldep checks whether the # files m.ml and m.mli exist before deciding which dependency to # report. If m.ml and m.mli are generated from m.mll or m.mly, then # there is a potential problem: because ocamldep is invoked before # these files are created, it cannot see them. The standard solution # until now was to invoke ocamllex and ocamlyacc first to create all # generated files, and run ocamldep next. This approach does not work # with menhir when the --infer switch is on: menhir cannot be invoked # first because it needs type information found in .cmi (or .cmo or # .cmx) files. Our solution is to wrap ocamldep in a script that # creates fake generated files m.ml and m.mli to let ocamldep know that # these files are supposed to exist. This is somewhat tricky, but appears # to work. %.ml.d: %.ml $(OCAML) $(OCAMLDEPWRAPPER) $(GENERATED) - $(OCAMLDEP) $< > $@ %.mli.d: %.mli $(OCAML) $(OCAMLDEPWRAPPER) $(GENERATED) - $(OCAMLDEP) $< > $@ # All .d files are included within the present Makefile, so it they # do not exist, they are created first, and the dependencies that # they contain are then taken into account. # A .SECONDARY directive is used to ensure that the auxiliary # Makefiles are never removed. Otherwise, make could create # one, remove one, create one, remove one, ... (We have observed # this.) ifeq ($(findstring $(MAKECMDGOALS),$(COLD)),) ifneq ($(strip $(wildcard *.mli)),) .SECONDARY: $(patsubst %.mli,%.mli.d,$(wildcard *.mli)) -include $(patsubst %.mli,%.mli.d,$(wildcard *.mli)) endif ifneq ($(strip $(wildcard *.ml)),) .SECONDARY: $(patsubst %.ml,%.ml.d,$(wildcard *.ml)) -include $(patsubst %.ml,%.ml.d,$(wildcard *.ml)) endif ifneq ($(strip $(wildcard *.mll)),) .SECONDARY: $(patsubst %.mll,%.ml.d,$(wildcard *.mll)) -include $(patsubst %.mll,%.ml.d,$(wildcard *.mll)) endif endif # ---------------------------------------------------------------- # Support for menhir projects. # The macro menhir_multimodule defines support for multi-module grammar # specifications, that is, for producing parsers out of multiple # source files. The parameter $(1) is the name of the parser that # should be produced; the parameter $(2) is the list of .mly source # files; the parameter $(3) contains extra options to be passed to # menhir. # The dependency file is named $(1).d and created by invoking menhir # --depend. define menhir_multimodule $(1).ml $(1).mli: $(2) $(1).d @if [ -f $(1).ml ] ; then /bin/chmod +w $(1).ml ; fi @if [ -f $(1).mli ] ; then /bin/chmod +w $(1).mli ; fi $(MENHIR) --ocamlc "$(OCAMLC)" $(PGFLAGS) --base $(1) $(3) $(2) @/bin/chmod -w $(1).ml $(1).mli $(1).d: $(2) @if [ -f $(1).ml ] ; then /bin/chmod +w $(1).ml ; fi @if [ -f $(1).mli ] ; then /bin/chmod +w $(1).mli ; fi $(OCAML) $(OCAMLDEPWRAPPER) $(GENERATED) - \ $(MENHIR) --ocamldep "$(OCAMLDEP)" --depend --base $(1) $(3) $(2) > $$@ ifeq ($$(findstring $$(MAKECMDGOALS),$$(COLD)),) .SECONDARY: $(1).d -include $(1).d endif endef # The macro menhir_monomodule defines support for a mono-module grammar # specification. The parameter $(1) is the name of the parser that # should be produced; the source file is $(1).mly. The parameter $(2) # contains extra options to be passed to menhir. define menhir_monomodule $(eval $(call menhir_multimodule,$(1),$(1).mly,$(2))) endef # Neither of the two macros above is invoked by default, as it is not # known here which is appropriate. It is up to the client to invoke # these macros with suitable parameters. The auxiliary Makefile.auto # implements the common case where every .mly file is a mono-module # grammar. # ---------------------------------------------------------------- .PHONY: clean clean:: /bin/rm -f $(EXECUTABLE)$(BSUFFIX) $(EXECUTABLE)$(OSUFFIX) $(GENERATED) /bin/rm -f *.cmi *.cmx *.cmo *.o *~ .*~ *.automaton *.conflicts *.annot /bin/rm -f *.d menhir-20171222/demos/obsolete/Makefile.calc-two0000664000175000017500000000146113217215727021616 0ustar fpottierfpottier# This is the old version of calc-two/Makefile, based on Makefile.shared. # It has been superseded by the calc-two/Makefile, based on ocamlbuild. # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif # Add --table on the next line to use Menhir's table-based back-end. PGFLAGS := --infer GENERATED := tokens.ml tokens.mli algebraic.ml algebraic.mli reverse.ml reverse.mli lexer.ml MODULES := algebraic reverse lexer calc EXECUTABLE := calc OCAMLDEPWRAPPER := ../ocamldep.wrapper include ../Makefile.shared $(eval $(call menhir_monomodule,tokens,--only-tokens)) $(eval $(call menhir_multimodule,algebraic,tokens.mly algebraic.mly common.mly,--external-tokens Tokens)) $(eval $(call menhir_multimodule,reverse,tokens.mly reverse.mly common.mly,--external-tokens Tokens)) menhir-20171222/demos/obsolete/ocamldep.wrapper0000775000175000017500000000574313217215727021646 0ustar fpottierfpottier#!/usr/bin/env ocaml (* ocamldep.wrapper ... - runs the in an environment where all of the listed appear to exist. The files are created, if required, before the command is run, and destroyed afterwards. *) (* An earlier version of this script acquired a lock, so as to prevent multiple instances of this script from interfering with one another. However, this did not prevent interference between this script and some other process (e.g., the ocaml compiler) which creates files. So, the lock has been removed. My suggestion is to never use this script in a concurrent setting. If you wish to use parallel make, then you might be able to use a two-level Makefile approach: first, compute all dependencies in a sequential manner; then, construct all targets in a parallel manner. *) #load "unix.cma" open Printf (* Parse the command line. The arguments that precede "-" are understood as file names and stored in the list [xs]. The arguments that follow "-" are understood as a command and stored in [command]. *) let xs = ref [] let command = ref "" let verbose = ref false let rec loop accumulating i = if i = Array.length Sys.argv then () else if accumulating then (* [accumulating] is [true] as long as we have not found the "-" marker *) match Sys.argv.(i) with | "-v" -> verbose := true; loop true (i+1) | "-" -> (* We have found the marker. The next parameter should be the name of the raw [ocamldep] command. Copy it to the command (unquoted -- apparently some shells do not permit quoting a command name). *) let i = i + 1 in assert (i < Array.length Sys.argv); command := Sys.argv.(i); (* Stop accumulating file names. Copy the remaining arguments into the command. *) loop false (i+1) | _ -> (* Continue accumulating file names in [xs]. *) xs := Sys.argv.(i) :: !xs; loop true (i+1) else begin (* After we have found the "-" marker, the remaining arguments are copied (quoted) into the command. *) command := sprintf "%s %s" !command (Filename.quote Sys.argv.(i)); loop false (i+1) end let () = loop true 1 (* Create the required files if they don't exist, run the command, then destroy any files that we have created. *) let rec loop = function | [] -> if !verbose then fprintf stderr "ocamldep.wrapper: running %s\n" !command; Sys.command !command | x :: xs -> if Sys.file_exists x then loop xs else begin if !verbose then fprintf stderr "ocamldep.wrapper: creating fake %s\n" x; let c = open_out x in close_out c; let exitcode = loop xs in if Sys.file_exists x then begin try if !verbose then fprintf stderr "ocamldep.wrapper: removing fake %s..." x; Sys.remove x; if !verbose then fprintf stderr " ok\n" with Sys_error _ -> if !verbose then fprintf stderr " failed\n" end; exitcode end let () = exit (loop !xs) menhir-20171222/demos/obsolete/Makefile.auto0000664000175000017500000000055113217215727021054 0ustar fpottierfpottier# This auxiliary Makefile is meant to be included by a client Makefile in # addition to Makefile.shared. It is optional. It implements the common case # where every .mly file in the current directory is to be viewed as a # mono-module grammar specification. $(foreach module,$(wildcard *.mly),$(eval $(call menhir_monomodule,$(patsubst %.mly,%,$(module)),))) menhir-20171222/demos/calc-param/0000775000175000017500000000000013217215730016622 5ustar fpottierfpottiermenhir-20171222/demos/calc-param/.merlin0000664000175000017500000000001113217215727020107 0ustar fpottierfpottierB _build menhir-20171222/demos/calc-param/_tags0000664000175000017500000000011013217215727017640 0ustar fpottierfpottier: only_tokens : external_tokens(Tokens) menhir-20171222/demos/calc-param/parser.mlypack0000664000175000017500000000001613217215727021503 0ustar fpottierfpottierTokens Parser menhir-20171222/demos/calc-param/parser.mly0000664000175000017500000000165213217215727020653 0ustar fpottierfpottier(* These are the functions that we need in order to write our semantic actions. *) %parameter number val ( + ): number -> number -> number val ( - ): number -> number -> number val ( * ): number -> number -> number val ( / ): number -> number -> number val ( ~-): number -> number end> (* The parser no longer returns an integer; instead, it returns an abstract number. *) %start main (* Let us open the [Semantics] module, so as to make all of its operations available in the semantic actions. *) %{ open Semantics %} %% main: | e = expr EOL { e } expr: | i = INT { inject i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20171222/demos/calc-param/README0000664000175000017500000000054513217215727017514 0ustar fpottierfpottierIn this variant of the calc demo, the parser's semantic actions are parameterized over a structure, called [Semantics], which defines how numbers should be interpreted. The parser is later instantiated with floating-point numbers, so the calculator actually performs floating-point evaluation -- but the grammar specification is independent of this detail. menhir-20171222/demos/calc-param/Makefile0000664000175000017500000000065613217215727020277 0ustar fpottierfpottier.PHONY: all clean test # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif MENHIRFLAGS := --infer OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" MAIN := calc all: $(OCAMLBUILD) $(MAIN).native clean: rm -f *~ .*~ $(OCAMLBUILD) -clean test: all @echo "The following command should print 42.0:" echo "(1 + 2 * 10) * 2" | ./$(MAIN).native menhir-20171222/demos/calc-param/lexer.mll0000664000175000017500000000200613217215727020453 0ustar fpottierfpottier{ open Tokens exception Error of string } (* This rule looks for a single line, terminated with '\n' or eof. It returns a pair of an optional string (the line that was found) and a Boolean flag (false if eof was reached). *) rule line = parse | ([^'\n']* '\n') as line (* Normal case: one line, no eof. *) { Some line, true } | eof (* Normal case: no data, eof. *) { None, false } | ([^'\n']+ as line) eof (* Special case: some data but missing '\n', then eof. Consider this as the last line, and add the missing '\n'. *) { Some (line ^ "\n"), false } (* This rule analyzes a single line and turns it into a stream of tokens. *) and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20171222/demos/calc-param/tokens.mlypack0000664000175000017500000000000713217215727021512 0ustar fpottierfpottierTokens menhir-20171222/demos/calc-param/tokens.mly0000664000175000017500000000060413217215727020656 0ustar fpottierfpottier(* We want the tokens to be independent of the [Semantics] parameter, so we declare them here, in a separate file, as opposed to within [parser.mly]. *) %token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %% menhir-20171222/demos/calc-param/myocamlbuild.ml0000664000175000017500000000122413217215727021642 0ustar fpottierfpottieropen Ocamlbuild_plugin open Command let menhir_flags() = (* Define two ocamlbuild flags [only_tokens] and [external_tokens(Foo)] which correspond to menhir's [--only-tokens] and [--external-tokens Foo]. When they are used, these flags should be passed both to [menhir] and to [menhir --raw-depend]. *) List.iter (fun mode -> flag [ mode; "only_tokens" ] (S[A "--only-tokens"]); pflag [ mode ] "external_tokens" (fun name -> S[A "--external-tokens"; A name] ) ) [ "menhir"; "menhir_ocamldep" ] let () = dispatch (fun event -> match event with | After_rules -> menhir_flags() | _ -> () ) menhir-20171222/demos/calc-param/calc.ml0000664000175000017500000000215713217215727020071 0ustar fpottierfpottier(* Let's do floating-point evaluation, for a change. *) module FloatSemantics = struct type number = float let inject = float_of_int let ( + ) = ( +. ) let ( - ) = ( -. ) let ( * ) = ( *. ) let ( / ) = ( /. ) let (~- ) = (~-. ) end (* Let us now specialize our parameterized parser. *) module FloatParser = Parser.Make(FloatSemantics) (* The rest is as usual. *) let process (line : string) = let linebuf = Lexing.from_string line in try (* Run the parser on this line of input. *) Printf.printf "%f\n%!" (FloatParser.main Lexer.token linebuf) with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg | FloatParser.Error -> Printf.fprintf stderr "At offset %d: syntax error.\n%!" (Lexing.lexeme_start linebuf) let process (optional_line : string option) = match optional_line with | None -> () | Some line -> process line let rec repeat channel = (* Attempt to read one line. *) let optional_line, continue = Lexer.line channel in process optional_line; if continue then repeat channel let () = repeat (Lexing.from_channel stdin) menhir-20171222/demos/generate-printers/0000775000175000017500000000000013217215730020260 5ustar fpottierfpottiermenhir-20171222/demos/generate-printers/.merlin0000664000175000017500000000001613217215727021552 0ustar fpottierfpottierPKG menhirSdk menhir-20171222/demos/generate-printers/generate.ml0000664000175000017500000001222413217215727022413 0ustar fpottierfpottieropen Printf open MenhirSdk (* ------------------------------------------------------------------------ *) (* We expect one command line argument: the name of a .cmly file. *) let filename = if Array.length Sys.argv = 2 && Filename.check_suffix Sys.argv.(1) ".cmly" then Sys.argv.(1) else begin eprintf "Usage: %s \n" Sys.argv.(0); exit 1 end (* ------------------------------------------------------------------------ *) (* Read this file. This gives rise to a module whose signature is [Cmly_api.GRAMMAR]. We include it, so we can use it without even naming it. *) include Cmly_read.Read (struct let filename = filename end) (* ------------------------------------------------------------------------ *) (* All names which refer to Menhir's inspection API are qualified with this module name. We do not [open] this module because that might hide some names exploited by the user within attributes. *) let menhir = "MenhirInterpreter" (* ------------------------------------------------------------------------ *) (* The header consists of an [open] directive, followed with content taken from [@header] attributes. *) let module_name = filename |> Filename.basename |> Filename.chop_extension |> String.capitalize_ascii let header () = printf "open %s\n\n" module_name; List.iter (fun attr -> if Attribute.has_label "header" attr then printf "%s\n" (Attribute.payload attr) ) Grammar.attributes (* ------------------------------------------------------------------------ *) (* [name default attrs] returns the payload of an [@name] attribute found in [attrs], if there is one, and the literal string [default] otherwise. *) let name default attrs = try let attr = List.find (Attribute.has_label "name") attrs in Attribute.payload attr with Not_found -> sprintf "%S" default (* [print_symbol()] generates code for a [print_symbol] function, which converts a symbol to a string. The type of a symbol is [xsymbol]; see the documentation of Menhir's inspection API. *) let print_symbol () = printf "let print_symbol = function\n"; Terminal.iter (fun t -> match Terminal.kind t with | `REGULAR | `ERROR -> printf " | %s.X (%s.T %s.T_%s) -> %s\n" menhir menhir menhir (Terminal.name t) (name (Terminal.name t) (Terminal.attributes t)) | `PSEUDO | `EOF -> () ); Nonterminal.iter (fun n -> match Nonterminal.kind n with | `REGULAR -> printf " | %s.X (%s.N %s.N_%s) -> %s\n" menhir menhir menhir (Nonterminal.name n) (name (Nonterminal.name n) (Nonterminal.attributes n)) | `START -> () ); printf "\n" (* ------------------------------------------------------------------------ *) (* [printer default attrs] returns the payload of a [@printer] attribute found in [attrs], within parentheses, if there is one. Otherwise, it returns a function that ignores its argument and always returns the literal string [name default attrs]. *) let printer default attrs = try let attr = List.find (Attribute.has_label "printer") attrs in sprintf "(%s)" (Attribute.payload attr) with Not_found -> sprintf "(fun _ -> %s)" (name default attrs) (* [print_value()] generates code for a [print_value] function, which converts a pair of a symbol and its semantic value to a string. The type of the symbol is ['a symbol], and the type of the value is ['a]. See the documentation of Menhir's inspection API. *) let print_value () = printf "let print_value (type a) : a %s.symbol -> a -> string = function\n" menhir; Terminal.iter (fun t -> match Terminal.kind t with | `REGULAR | `ERROR -> printf " | %s.T %s.T_%s -> %s\n" menhir menhir (Terminal.name t) (printer (Terminal.name t) (Terminal.attributes t)) | `PSEUDO | `EOF -> () ); Nonterminal.iter (fun n -> match Nonterminal.kind n with | `REGULAR -> printf " | %s.N %s.N_%s -> %s\n" menhir menhir (Nonterminal.name n) (printer (Nonterminal.name n) (Nonterminal.attributes n)) | `START -> () ); printf "\n" (* [print_token()] generates code for a [print_token] function, which converts a token to a string. The type of the token is [token]. This is done by converting the token to a pair of a symbol and a value and invoking [print_value]. *) let print_token () = printf "let print_token = function\n"; Terminal.iter (fun t -> match Terminal.kind t with | `REGULAR -> (* Deal with the case where the token carries no semantic value. *) let pattern, value = match Terminal.typ t with | None -> "", "()" | Some _typ -> " v", "v" in printf " | %s%s -> print_value (%s.T %s.T_%s) %s\n" (Terminal.name t) pattern menhir menhir (Terminal.name t) value | `ERROR | `PSEUDO | `EOF -> () ); printf "\n" (* ------------------------------------------------------------------------ *) (* The main program. *) let () = header(); print_symbol(); print_value(); print_token() menhir-20171222/demos/generate-printers/_tags0000664000175000017500000000011613217215727021304 0ustar fpottierfpottiertrue: \ package(unix), \ package(menhirSdk), \ safe_string, \ warn(A) menhir-20171222/demos/generate-printers/Makefile0000664000175000017500000000115213217215727021725 0ustar fpottierfpottier.PHONY: all test clean # The Menhir executable in the PATH and the library MenhirSdk # should agree on their version number, or this test will fail # complaining that magic strings do not match. MENHIR := menhir OCAMLBUILD := ocamlbuild -use-ocamlfind MAIN := generate EXECUTABLE := menhir-generate-printers all: $(OCAMLBUILD) $(MAIN).native rm -f $(MAIN).native cp -f _build/$(MAIN).native $(EXECUTABLE) TEST := ../../test/good/parser_raw test: all rm -f $(TEST).cmly $(MENHIR) --cmly $(TEST).mly ./$(EXECUTABLE) $(TEST).cmly clean: rm -f *~ .*~ $(OCAMLBUILD) -clean rm -f $(EXECUTABLE) menhir-20171222/demos/generate-printers/README.md0000664000175000017500000000162413217215727021550 0ustar fpottierfpottierThis tool, `menhir-generate-printers`, reads a `.cmly` file and produces code for three functions, namely `print_symbol`, `print_value`, and `print_token`. ``` val print_symbol: MenhirInterpreter.xsymbol -> string ``` By default, `print_symbol` prints the internal name of a (terminal or nonterminal) symbol. This can however be changed by attaching a `[@name]` attribute with this symbol. The attribute payload should be an OCaml expression of type `string`. ``` val print_value: 'a MenhirInterpreter.symbol -> 'a -> string val print_token: token -> string ``` By default, `print_value` and `print_token` ignore the semantic value and print just the name of the symbol, like `print_symbol`. This can however be changed by attaching a `[@printer]` attribute with this symbol. The attribute payload should be an OCaml expression of type `_ -> string`, where `_` stands for an appropriate type of semantic values. menhir-20171222/demos/find-menhir.sh0000775000175000017500000000152213217215727017367 0ustar fpottierfpottier#!/bin/bash # This script tries to find the Menhir executable. # This is useful because we would like the demos # to work regardless of whether Menhir has been # installed already. # A normal user does not need this script. One can # assume that Menhir has been installed. # First attempt: find Menhir in the src directory # of the Menhir distribution. # This loop assumes that we are somewhere within # the Menhir distribution, so by going up, we will # end up at the root of the distribution. attempts=2 while [ $attempts -gt 0 ] && ! [ -d src ] ; do let attempts=attempts-1 cd .. done LOCAL=src/_stage1/menhir.native if ls $LOCAL >/dev/null 2>/dev/null ; then echo `pwd`/$LOCAL exit 0 fi # Second attempt: find Menhir in the PATH. if which menhir >/dev/null ; then echo menhir exit 0 fi echo Error: could not find Menhir. exit 1 menhir-20171222/demos/Makefile0000664000175000017500000000056713217215727016300 0ustar fpottierfpottier# The following demos require menhirLib: # calc-incremental # calc-inspection # The following demos require menhirSdk: # generate-printers DEMOS := \ calc \ calc-two \ calc-param \ calc-incremental \ calc-inspection \ generate-printers \ .PHONY: all clean all clean:: @for i in $(DEMOS) ; do \ $(MAKE) -C $$i $@ ; \ done clean:: /bin/rm -f *~ .*~ menhir-20171222/demos/calc-inspection/0000775000175000017500000000000013217215730017675 5ustar fpottierfpottiermenhir-20171222/demos/calc-inspection/.merlin0000664000175000017500000000002713217215727021171 0ustar fpottierfpottierB _build PKG menhirLib menhir-20171222/demos/calc-inspection/CalcPrinters.ml0000664000175000017500000000257613217215727022640 0ustar fpottierfpottieropen Parser.MenhirInterpreter (* In order to print syntax error messages and/or debugging information, we need a symbol printer. *) let print_symbol symbol : string = match symbol with | X (T T_TIMES) -> "*" | X (T T_RPAREN) -> ")" | X (T T_PLUS) -> "+" | X (T T_MINUS) -> "-" | X (T T_LPAREN) -> "(" | X (T T_INT) -> "INT" | X (N N_expr) -> "expr" | X (N N_main) -> "main" | X (T T_EOL) -> "EOL" | X (T T_DIV) -> "/" | X (T T_error) -> "error" (* In order to print a view of the stack that includes semantic values, we need an element printer. (If we don't need this feature, then [print_symbol] above suffices.) *) let print_element e : string = match e with | Element (s, v, _, _) -> match incoming_symbol s with | T T_TIMES -> "*" | T T_RPAREN -> ")" | T T_PLUS -> "+" | T T_MINUS -> "-" | T T_LPAREN -> "(" | T T_INT -> string_of_int v | N N_expr -> string_of_int v | N N_main -> string_of_int v | T T_EOL -> "" | T T_DIV -> "/" | T T_error -> "error" (* The public functions. *) let print = prerr_string let print_symbol s = print (print_symbol s) let print_element = Some (fun s -> print (print_element s)) menhir-20171222/demos/calc-inspection/CalcPrinters.mli0000664000175000017500000000035213217215727022777 0ustar fpottierfpottieropen Parser.MenhirInterpreter (* This module offers the functionality required by the functor [MenhirLib.Printers.Make]. *) val print: string -> unit val print_symbol: xsymbol -> unit val print_element: (element -> unit) option menhir-20171222/demos/calc-inspection/CalcErrorReporting.ml0000664000175000017500000000120013217215727023774 0ustar fpottierfpottieropen Parser open Parser.MenhirInterpreter (* In order to submit artificial tokens to the parser, we need a function that converts a terminal symbol to a (dummy) token. Unfortunately, we cannot (in general) auto-generate this code, because it requires making up semantic values of arbitrary OCaml types. *) let terminal2token (type a) (symbol : a terminal) : token = match symbol with | T_TIMES -> TIMES | T_RPAREN -> RPAREN | T_PLUS -> PLUS | T_MINUS -> MINUS | T_LPAREN -> LPAREN | T_INT -> INT 0 | T_EOL -> EOL | T_DIV -> DIV | T_error -> assert false menhir-20171222/demos/calc-inspection/CalcErrorReporting.mli0000664000175000017500000000025713217215727024160 0ustar fpottierfpottieropen Parser.MenhirInterpreter (* This module offers the functionality required by the functor [ErrorReporting.Printers.Make]. *) val terminal2token: _ terminal -> token menhir-20171222/demos/calc-inspection/parser.mly0000664000175000017500000000105413217215727021722 0ustar fpottierfpottier%token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %start main %% main: | e = expr EOL { e } expr: | i = INT { i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20171222/demos/calc-inspection/README0000664000175000017500000000024313217215727020562 0ustar fpottierfpottierThis variant of the calc demo uses Menhir with the --table and --inspection options. It illustrates how to inspect the intermediate states produced by the parser. menhir-20171222/demos/calc-inspection/ErrorReporting.mli0000664000175000017500000000574513217215727023404 0ustar fpottierfpottier(* This module needs cleaning up. It is supposed to automatically produce a syntax error message, based on the current state and stack. *) module Make (I : MenhirLib.IncrementalEngine.EVERYTHING) (User : sig (* In order to submit artificial tokens to the parser, we need a function that converts a terminal symbol to a token. Unfortunately, we cannot (in general) auto-generate this code, because it requires making up semantic values of arbitrary OCaml types. *) val terminal2token: _ I.terminal -> I.token end) : sig open I (* An explanation is a description of what the parser has recognized in the recent past and what it expects next. More precisely, an explanation is an LR(0) item, enriched with positions. Indeed, the past (the first half of the item's right-hand side, up to the bullet) corresponds to a part of the input that has been read already, so it can be annotated with positions. *) type explanation (* The LR(0) item. *) val item: explanation -> item (* The past. This is a non-empty sequence of (terminal and non-terminal) symbols, each of which corresponds to a range of the input file. These symbols correspond to the first half (up to the bullet) of the item's right-hand side. In short, they represent what (we think) we have recognized in the recent past. *) (* It is worth noting that, when an error occurs, we produce multiple explanations, which may have different pasts. Indeed, not only may these pasts have different lengths (one may be a suffix of another), but two pasts can in fact be incomparable. Indeed, different choices of the lookahead token may cause different reductions, hence different interpretations of what has been read in the past. *) val past: explanation -> (xsymbol * Lexing.position * Lexing.position) list (* The future. This is a non-empty sequence of (terminal and non-terminal) symbols. These symbols correspond to the second half (after the bullet) of the item's right-hand side. In short, they represent what we expect to recognize in the future, if this item is a good prediction. *) (* This information can be computed from [item]. This function is provided only for convenience. *) val future: explanation -> xsymbol list (* A goal. This is a non-terminal symbol. It is the item's left-hand side. In short, it represents the reduction that we will be able to perform if we successfully recognize this future. *) (* This information can be computed from [item]. This function is provided only for convenience. *) val goal: explanation -> xsymbol (* TEMPORARY *) (* We build lists of explanations. These explanations may originate in distinct LR(1) states. They may have different pasts, because *) exception Error of (Lexing.position * Lexing.position) * explanation list (* TEMPORARY *) val entry: 'a I.checkpoint -> (Lexing.lexbuf -> token) -> Lexing.lexbuf -> 'a end menhir-20171222/demos/calc-inspection/Makefile0000664000175000017500000000155513217215727021351 0ustar fpottierfpottier.PHONY: all clean test # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif # We use the table back-end, and link against menhirLib. # We assume that menhirLib has been installed in such a # way that ocamlfind knows about it. MENHIRFLAGS := --table --inspection -v -la 2 # -- infer is automatically added by ocamlbuild. OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" -package menhirLib MAIN := calc all: $(OCAMLBUILD) $(MAIN).native clean: rm -f *~ .*~ $(OCAMLBUILD) -clean test: all @echo "The following command should print 42:" echo "(1 + 2 * 10) * 2" | ./$(MAIN).native @echo "The following command should print an error message:" echo "(1 + 2 * 10) * )" | ./$(MAIN).native @echo "The following command should print an error message:" echo "(1 + 2 * 10))" | ./$(MAIN).native menhir-20171222/demos/calc-inspection/lexer.mll0000664000175000017500000000200613217215727021526 0ustar fpottierfpottier{ open Parser exception Error of string } (* This rule looks for a single line, terminated with '\n' or eof. It returns a pair of an optional string (the line that was found) and a Boolean flag (false if eof was reached). *) rule line = parse | ([^'\n']* '\n') as line (* Normal case: one line, no eof. *) { Some line, true } | eof (* Normal case: no data, eof. *) { None, false } | ([^'\n']+ as line) eof (* Special case: some data but missing '\n', then eof. Consider this as the last line, and add the missing '\n'. *) { Some (line ^ "\n"), false } (* This rule analyzes a single line and turns it into a stream of tokens. *) and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20171222/demos/calc-inspection/calc.ml0000664000175000017500000000335313217215727021143 0ustar fpottierfpottieropen Lexing open MenhirLib.General open Parser.MenhirInterpreter (* Instantiate [MenhirLib.Printers] for our parser. This requires providing a few printing functions -- see [CalcPrinters]. *) module P = MenhirLib.Printers.Make (Parser.MenhirInterpreter) (CalcPrinters) (* Instantiate [ErrorReporting] for our parser. This requires providing a few functions -- see [CalcErrorReporting]. *) module E = ErrorReporting.Make (Parser.MenhirInterpreter) (CalcErrorReporting) (* Define a printer for explanations. We treat an explanation as if it were just an item: that is, we ignore the position information that is provided in the explanation. Indeed, this information is hard to show in text mode. *) let print_explanation explanation = P.print_item (E.item explanation) let print_explanations startp explanations = Printf.fprintf stderr "At line %d, column %d: syntax error.\n" startp.pos_lnum startp.pos_cnum; List.iter print_explanation explanations; flush stderr (* The rest of the code is as in the [calc] demo. *) let process (line : string) = let lexbuf = from_string line in try let v = E.entry (Parser.Incremental.main lexbuf.lex_curr_p) Lexer.token lexbuf in Printf.printf "%d\n%!" v with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg | E.Error ((startp, _), explanations) -> print_explanations startp explanations let process (optional_line : string option) = match optional_line with | None -> () | Some line -> process line let rec repeat channel = (* Attempt to read one line. *) let optional_line, continue = Lexer.line channel in process optional_line; if continue then repeat channel let () = repeat (from_channel stdin) menhir-20171222/demos/calc-inspection/ErrorReporting.ml0000664000175000017500000001525313217215727023226 0ustar fpottierfpottiermodule Make (I : MenhirLib.IncrementalEngine.EVERYTHING) (User : sig (* In order to submit artificial tokens to the parser, we need a function that converts a terminal symbol to a token. Unfortunately, we cannot (in general) auto-generate this code, because it requires making up semantic values of arbitrary OCaml types. *) val terminal2token: _ I.terminal -> I.token end) = struct open MenhirLib.General open I open User (* ------------------------------------------------------------------------ *) (* Explanations. *) type explanation = { item: item; past: (xsymbol * Lexing.position * Lexing.position) list } let item explanation = explanation.item let past explanation = explanation.past let future explanation = let prod, index = explanation.item in let rhs = rhs prod in drop index rhs let goal explanation = let prod, _ = explanation.item in lhs prod (* ------------------------------------------------------------------------ *) (* [items_current env] assumes that [env] is not an initial state (which implies that the stack is non-empty). Under this assumption, it extracts the automaton's current state, i.e., the LR(1) state found in the top stack cell. It then goes through [items] so as to obtain the LR(0) items associated with this state. *) let items_current env : item list = (* Get the current state. *) match Lazy.force (stack env) with | Nil -> (* If we get here, then the stack is empty, which means the parser is in an initial state. This should not happen. *) invalid_arg "items_current" (* TEMPORARY it DOES happen! *) | Cons (Element (current, _, _, _), _) -> (* Extract the current state out of the top stack element, and convert it to a set of LR(0) items. Returning a set of items instead of an ['a lr1state] is convenient; returning [current] would require wrapping it in an existential type. *) items current (* [is_shift_item t item] determines whether [item] justifies a shift transition along the terminal symbol [t]. *) let is_shift_item (t : _ terminal) (prod, index) : bool = let rhs = rhs prod in let length = List.length rhs in assert (0 < index && index <= length); (* We test that there is one symbol after the bullet and this symbol is [t] or can generate a word that begins with [t]. (Note that we don't need to worry about the case where this symbol is nullable and [t] is generated by the following symbol. In that situation, we would have to reduce before we can shift [t].) *) index < length && xfirst (List.nth rhs index) t let compare_explanations x1 x2 = let c = compare_items x1.item x2.item in (* TEMPORARY checking that if [c] is 0 then the positions are the same *) assert ( c <> 0 || List.for_all2 (fun (_, start1, end1) (_, start2, end2) -> start1.Lexing.pos_cnum = start2.Lexing.pos_cnum && end1.Lexing.pos_cnum = end2.Lexing.pos_cnum ) x1.past x2.past ); c (* [marry past stack] TEMPORARY comment *) let rec marry past stack = match past, stack with | [], _ -> [] | symbol :: past, lazy (Cons (Element (s, _, startp, endp), stack)) -> assert (compare_symbols symbol (X (incoming_symbol s)) = 0); (symbol, startp, endp) :: marry past stack | _ :: _, lazy Nil -> assert false (* [accumulate t env explanations] is called if the parser decides to shift the test token [t]. The parameter [env] describes the parser configuration before it shifts this token. (Some reductions have taken place.) We use the shift items found in [env] to produce new explanations. *) let accumulate (t : _ terminal) env explanations = (* The parser is about to shift, which means it is willing to consume the terminal symbol [t]. In the state before the transition, look at the items that justify shifting [t]. We view these items as explanations: they explain what we have read and what we expect to read. *) let stack = stack env in List.fold_left (fun explanations item -> if is_shift_item t item then let prod, index = item in let rhs = rhs prod in { item = item; past = List.rev (marry (List.rev (take index rhs)) stack) } :: explanations else explanations ) explanations (items_current env) (* TEMPORARY [env] may be an initial state! violating [item_current]'s precondition *) (* [investigate pos checkpoint] assumes that [checkpoint] is of the form [InputNeeded _]. For every terminal symbol [t], it investigates how the parser reacts when fed the symbol [t], and returns a list of explanations. The position [pos] is where a syntax error was detected; it is used when manufacturing dummy tokens. This is important because the position of the dummy token may end up in the explanations that we produce. *) let investigate pos (checkpoint : _ checkpoint) : explanation list = weed compare_explanations ( foreach_terminal_but_error (fun symbol explanations -> match symbol with | X (N _) -> assert false | X (T t) -> (* Build a dummy token for the terminal symbol [t]. *) let token = (terminal2token t, pos, pos) in (* Submit it to the parser. Accumulate explanations. *) match shifts (offer checkpoint token) with | None -> explanations | Some env -> accumulate t env explanations ) [] ) (* We drive the parser in the usual way, but records the last [InputNeeded] checkpoint. If a syntax error is detected, we go back to this checkpoint and analyze it in order to produce a meaningful diagnostic. *) exception Error of (Lexing.position * Lexing.position) * explanation list let entry (start : 'a I.checkpoint) lexer lexbuf = let fail (inputneeded : 'a I.checkpoint) (checkpoint : 'a I.checkpoint) = (* The parser signals a syntax error. Note the position of the problematic token, which is useful. Then, go back to the last [InputNeeded] checkpoint and investigate. *) match checkpoint with | HandlingError env -> let (startp, _) as positions = positions env in raise (Error (positions, investigate startp inputneeded)) | _ -> assert false in I.loop_handle_undo (fun v -> v) fail (lexer_lexbuf_to_supplier lexer lexbuf) start (* TEMPORARY could also publish a list of the terminal symbols that do not cause an error *) end menhir-20171222/demos/calc/0000775000175000017500000000000013217215730015524 5ustar fpottierfpottiermenhir-20171222/demos/calc/.merlin0000664000175000017500000000001113217215727017011 0ustar fpottierfpottierB _build menhir-20171222/demos/calc/parser.mly0000664000175000017500000000105413217215727017551 0ustar fpottierfpottier%token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %start main %% main: | e = expr EOL { e } expr: | i = INT { i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20171222/demos/calc/README0000664000175000017500000000063613217215727016417 0ustar fpottierfpottierThis tiny program reads arithmetic expressions from the standard input channel. Each expression is expected to be complete when the current line ends. Its value is then displayed on the standard output channel. This code is adapted from ocamlyacc's documentation. We compile the parser using Menhir's code back-end. For an example of using Menhir's table back-end, see the calc-incremental/ and calc-inspection/. menhir-20171222/demos/calc/Makefile0000664000175000017500000000065413217215727017177 0ustar fpottierfpottier.PHONY: all clean test # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif MENHIRFLAGS := --infer OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" MAIN := calc all: $(OCAMLBUILD) $(MAIN).native clean: rm -f *~ .*~ $(OCAMLBUILD) -clean test: all @echo "The following command should print 42:" echo "(1 + 2 * 10) * 2" | ./$(MAIN).native menhir-20171222/demos/calc/lexer.mll0000664000175000017500000000200613217215727017355 0ustar fpottierfpottier{ open Parser exception Error of string } (* This rule looks for a single line, terminated with '\n' or eof. It returns a pair of an optional string (the line that was found) and a Boolean flag (false if eof was reached). *) rule line = parse | ([^'\n']* '\n') as line (* Normal case: one line, no eof. *) { Some line, true } | eof (* Normal case: no data, eof. *) { None, false } | ([^'\n']+ as line) eof (* Special case: some data but missing '\n', then eof. Consider this as the last line, and add the missing '\n'. *) { Some (line ^ "\n"), false } (* This rule analyzes a single line and turns it into a stream of tokens. *) and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20171222/demos/calc/calc.ml0000664000175000017500000000132713217215727016771 0ustar fpottierfpottierlet process (line : string) = let linebuf = Lexing.from_string line in try (* Run the parser on this line of input. *) Printf.printf "%d\n%!" (Parser.main Lexer.token linebuf) with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg | Parser.Error -> Printf.fprintf stderr "At offset %d: syntax error.\n%!" (Lexing.lexeme_start linebuf) let process (optional_line : string option) = match optional_line with | None -> () | Some line -> process line let rec repeat channel = (* Attempt to read one line. *) let optional_line, continue = Lexer.line channel in process optional_line; if continue then repeat channel let () = repeat (Lexing.from_channel stdin) menhir-20171222/demos/calc-incremental/0000775000175000017500000000000013217215730020023 5ustar fpottierfpottiermenhir-20171222/demos/calc-incremental/.merlin0000664000175000017500000000002713217215727021317 0ustar fpottierfpottierB _build PKG menhirLib menhir-20171222/demos/calc-incremental/parser.mly0000664000175000017500000000105413217215727022050 0ustar fpottierfpottier%token INT %token PLUS MINUS TIMES DIV %token LPAREN RPAREN %token EOL %left PLUS MINUS /* lowest precedence */ %left TIMES DIV /* medium precedence */ %nonassoc UMINUS /* highest precedence */ %start main %% main: | e = expr EOL { e } expr: | i = INT { i } | LPAREN e = expr RPAREN { e } | e1 = expr PLUS e2 = expr { e1 + e2 } | e1 = expr MINUS e2 = expr { e1 - e2 } | e1 = expr TIMES e2 = expr { e1 * e2 } | e1 = expr DIV e2 = expr { e1 / e2 } | MINUS e = expr %prec UMINUS { - e } menhir-20171222/demos/calc-incremental/README0000664000175000017500000000020513217215727020706 0ustar fpottierfpottierThis variant of the calc demo uses Menhir with the --table option. It also demonstrates how to use the incremental parser interface. menhir-20171222/demos/calc-incremental/Makefile0000664000175000017500000000120713217215727021471 0ustar fpottierfpottier.PHONY: all clean test # Find Menhir. ifndef MENHIR MENHIR := $(shell ../find-menhir.sh) endif # We use the table back-end, and link against menhirLib. # We assume that menhirLib has been installed in such a # way that ocamlfind knows about it. MENHIRFLAGS := --table # -- infer is automatically added by ocamlbuild. OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" -package menhirLib MAIN := calc all: $(OCAMLBUILD) $(MAIN).native clean: rm -f *~ .*~ $(OCAMLBUILD) -clean test: all @echo "The following command should print 42:" echo "(1 + 2 * 10) * 2" | ./$(MAIN).native menhir-20171222/demos/calc-incremental/lexer.mll0000664000175000017500000000200613217215727021654 0ustar fpottierfpottier{ open Parser exception Error of string } (* This rule looks for a single line, terminated with '\n' or eof. It returns a pair of an optional string (the line that was found) and a Boolean flag (false if eof was reached). *) rule line = parse | ([^'\n']* '\n') as line (* Normal case: one line, no eof. *) { Some line, true } | eof (* Normal case: no data, eof. *) { None, false } | ([^'\n']+ as line) eof (* Special case: some data but missing '\n', then eof. Consider this as the last line, and add the missing '\n'. *) { Some (line ^ "\n"), false } (* This rule analyzes a single line and turns it into a stream of tokens. *) and token = parse | [' ' '\t'] { token lexbuf } | '\n' { EOL } | ['0'-'9']+ as i { INT (int_of_string i) } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | '(' { LPAREN } | ')' { RPAREN } | _ { raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } menhir-20171222/demos/calc-incremental/calc.ml0000664000175000017500000000611013217215727021263 0ustar fpottierfpottieropen Lexing (* A short name for the incremental parser API. *) module I = Parser.MenhirInterpreter (* -------------------------------------------------------------------------- *) (* The loop which drives the parser. At each iteration, we analyze a checkpoint produced by the parser, and act in an appropriate manner. [lexbuf] is the lexing buffer. [checkpoint] is the last checkpoint produced by the parser. *) let rec loop lexbuf (checkpoint : int I.checkpoint) = match checkpoint with | I.InputNeeded env -> (* The parser needs a token. Request one from the lexer, and offer it to the parser, which will produce a new checkpoint. Then, repeat. *) let token = Lexer.token lexbuf in let startp = lexbuf.lex_start_p and endp = lexbuf.lex_curr_p in let checkpoint = I.offer checkpoint (token, startp, endp) in loop lexbuf checkpoint | I.Shifting _ | I.AboutToReduce _ -> let checkpoint = I.resume checkpoint in loop lexbuf checkpoint | I.HandlingError env -> (* The parser has suspended itself because of a syntax error. Stop. *) Printf.fprintf stderr "At offset %d: syntax error.\n%!" (lexeme_start lexbuf) | I.Accepted v -> (* The parser has succeeded and produced a semantic value. Print it. *) Printf.printf "%d\n%!" v | I.Rejected -> (* The parser rejects this input. This cannot happen, here, because we stop as soon as the parser reports [HandlingError]. *) assert false (* -------------------------------------------------------------------------- *) (* The above loop is shown for explanatory purposes, but can in fact be replaced with the following code, which exploits the functions [lexer_lexbuf_to_supplier] and [loop_handle] offered by Menhir. *) let succeed (v : int) = (* The parser has succeeded and produced a semantic value. Print it. *) Printf.printf "%d\n%!" v let fail lexbuf (_ : int I.checkpoint) = (* The parser has suspended itself because of a syntax error. Stop. *) Printf.fprintf stderr "At offset %d: syntax error.\n%!" (lexeme_start lexbuf) let loop lexbuf result = let supplier = I.lexer_lexbuf_to_supplier Lexer.token lexbuf in I.loop_handle succeed (fail lexbuf) supplier result (* -------------------------------------------------------------------------- *) (* Initialize the lexer, and catch any exception raised by the lexer. *) let process (line : string) = let lexbuf = from_string line in try loop lexbuf (Parser.Incremental.main lexbuf.lex_curr_p) with | Lexer.Error msg -> Printf.fprintf stderr "%s%!" msg (* -------------------------------------------------------------------------- *) (* The rest of the code is as in the [calc] demo. *) let process (optional_line : string option) = match optional_line with | None -> () | Some line -> process line let rec repeat channel = (* Attempt to read one line. *) let optional_line, continue = Lexer.line channel in process optional_line; if continue then repeat channel let () = repeat (from_channel stdin) menhir-20171222/doc/0000775000175000017500000000000013217215732014262 5ustar fpottierfpottiermenhir-20171222/doc/mymacros.sty0000664000175000017500000000144613217215730016660 0ustar fpottierfpottier%; whizzy -macros main.tex % References to sections, lemmas, theorems, etc. \newcommand{\sref}[1]{\S\ref{#1}} \newcommand{\tref}[1]{Theorem~\ref{#1}} \newcommand{\lemref}[1]{Lemma~\ref{#1}} \newcommand{\dref}[1]{Definition~\ref{#1}} \newcommand{\eref}[1]{Example~\ref{#1}} \newcommand{\fref}[1]{Figure~\ref{#1}} \newcommand{\aref}[1]{Appendix~\ref{#1}} % Abbreviations. \def\etal.{\emph{et al.}} % Define \citeyear in addition to \cite, if not already defined. \@ifundefined{citeyear}{ \@ifundefined{shortcite}{ \let\citeyear\cite }{ \let\citeyear\shortcite } }{} % Lambda-calculus syntax. \newcommand{\ekw}[1]{\mathsf{#1}} \newcommand{\expr}{e} \newcommand{\evar}{x} \newcommand{\eabs}[2]{\lambda#1.#2} \newcommand{\eapp}[2]{#1\;#2} \newcommand{\elet}[3]{\ekw{let}\;#1=#2\;\ekw{in}\;#3} menhir-20171222/doc/declarations-onerrorreduce.mly0000664000175000017500000000040513217215730022326 0ustar fpottierfpottier%token ID ARROW LPAREN RPAREN COLON SEMICOLON %start program %on_error_reduce typ1 %% typ0: ID | LPAREN typ1 RPAREN {} typ1: typ0 | typ0 ARROW typ1 {} declaration: ID COLON typ1 {} program: | LPAREN declaration RPAREN | declaration SEMICOLON {} menhir-20171222/doc/main.tex0000664000175000017500000055040413217215730015736 0ustar fpottierfpottier\def\true{true} \let\fpacm\true \documentclass[onecolumn,11pt,nocopyrightspace,preprint]{sigplanconf} \usepackage{amstext} \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \usepackage{moreverb} \usepackage{tikz} \usepackage{xspace} \usepackage{mymacros} \def\fppdf{true} \usepackage{fppdf} \input{macros} \input{version} % Let Menhir's version number appear at the bottom right of every page. \makeatletter \def\@formatyear{\menhirversion} \makeatother % ------------------------------------------------------------------------------ % Headings. \title{\menhir Reference Manual\\\normalsize (version \menhirversion)} \begin{document} \authorinfo{François Pottier\and Yann Régis-Gianas} {INRIA} {\{Francois.Pottier, Yann.Regis-Gianas\}@inria.fr} \maketitle % ------------------------------------------------------------------------------ \clearpage \tableofcontents \clearpage % ------------------------------------------------------------------------------ \section{Foreword} \menhir is a parser generator. It turns high-level grammar specifications, decorated with semantic actions expressed in the \ocaml programming language~\cite{ocaml}, into parsers, again expressed in \ocaml. It is based on Knuth's LR(1) parser construction technique~\cite{knuth-lr-65}. It is strongly inspired by its precursors: \yacc~\cite{johnson-yacc-79}, \texttt{ML-Yacc}~\cite{tarditi-appel-00}, and \ocamlyacc~\cite{ocaml}, but offers a large number of minor and major improvements that make it a more modern tool. This brief reference manual explains how to use \menhir. It does not attempt to explain context-free grammars, parsing, or the LR technique. Readers who have never used a parser generator are encouraged to read about these ideas first~\cite{aho-86,appel-tiger-98,hopcroft-motwani-ullman-00}. They are also invited to have a look at the \distrib{demos} directory in \menhir's distribution. Potential users of \menhir should be warned that \menhir's feature set is not completely stable. There is a tension between preserving a measure of compatibility with \ocamlyacc, on the one hand, and introducing new ideas, on the other hand. Some aspects of the tool, such as the error handling mechanism, are still potentially subject to incompatible changes: for instance, in the future, the current error handling mechanism (which is based on the \error token, see \sref{sec:errors}) could be removed and replaced with an entirely different mechanism. There is room for improvement in the tool and in this reference manual. Bug reports and suggestions are welcome! % ------------------------------------------------------------------------------ \section{Usage} \menhir is invoked as follows: \begin{quote} \cmenhir \nt{option} \ldots \nt{option} \nt{filename} \ldots \nt{filename} \end{quote} Each of the file names must end with \mly (unless \ocoq is used, in which case it must end with \vy) and denotes a partial grammar specification. These partial grammar specifications are joined (\sref{sec:split}) to form a single, self-contained grammar specification, which is then processed. The following optional command line switches allow controlling many aspects of the process. \docswitch{\obase \nt{basename}} This switch controls the base name of the \ml and \mli files that are produced. That is, the tool will produce files named \nt{basename}\texttt{.ml} and \nt{basename}\texttt{.mli}. Note that \nt{basename} can contain occurrences of the \texttt{/} character, so it really specifies a path and a base name. When only one \nt{filename} is provided on the command line, the default \nt{basename} is obtained by depriving \nt{filename} of its final \mly suffix. When multiple file names are provided on the command line, no default base name exists, so that the \obase switch \emph{must} be used. \docswitch{\ocmly} This switch causes Menhir to produce a \cmly file in addition to its normal operation. This file contains a (binary-form) representation of the grammar and automaton (see \sref{sec:sdk}). \docswitch{\ocomment} This switch causes a few comments to be inserted into the \ocaml code that is written to the \ml file. \docswitch{\ocompareerrors \nt{filename1} \ocompareerrors \nt{filename2}} Two such switches must always be used in conjunction so as to specify the names of two \messages files, \nt{filename1} and \nt{filename2}. Each file is read and internally translated to a mapping of states to messages. \menhir then checks that the left-hand mapping is a subset of the right-hand mapping. This feature is typically used in conjunction with \olisterrors to check that \nt{filename2} is complete (that is, covers all states where an error can occur). For more information, see \sref{sec:errors:new}. \docswitch{\ocompileerrors \nt{filename}} This switch causes \menhir to read the file \nt{filename}, which must obey the \messages file format, and to compile it to an OCaml function that maps a state number to a message. The OCaml code is sent to the standard output channel. At the same time, \menhir checks that the collection of input sentences in the file \nt{filename} is correct and irredundant. For more information, see \sref{sec:errors:new}. \docswitch{\ocoq} This switch causes \menhir to produce Coq code. See \sref{sec:coq}. \docswitch{\ocoqnoactions} (Used in conjunction with \ocoq.) This switch causes the semantic actions present in the \vy file to be ignored and replaced with \verb+tt+, the unique inhabitant of Coq's \verb+unit+ type. This feature can be used to test the Coq back-end with a standard grammar, that is, a grammar that contains \ocaml semantic actions. Just rename the file from \mly to \vy and set this switch. \docswitch{\ocoqnocomplete} (Used in conjunction with \ocoq.) This switch disables the generation of the proof of completeness of the parser (\sref{sec:coq}). This can be necessary because the proof of completeness is possible only if the grammar has no conflict (not even a benign one, in the sense of \sref{sec:conflicts:benign}). This can be desirable also because, for a complex grammar, completeness may require a heavy certificate and its validation by Coq may take time. \docswitch{\odepend} This switch causes \menhir to generate dependency information for use in conjunction with \make. When invoked in this mode, \menhir does not generate a parser. Instead, it examines the grammar specification and prints a list of prerequisites for the targets \nt{basename}\texttt{.cm[iox]}, \nt{basename}\texttt{.ml}, and \nt{basename}\texttt{.mli}. This list is intended to be textually included within a \Makefile. It is important to note that \nt{basename}\texttt{.ml} and \nt{basename}\texttt{.mli} can have \texttt{.cm[iox]} prerequisites. This is because, when the \oinfer switch is used, \menhir infers types by invoking \ocamlc, and \ocamlc itself requires the \ocaml modules that the grammar specification depends upon to have been compiled first. % The file \distrib{demos/obsolete/Makefile.shared} exploits the \odepend switch. An end user who uses \ocamlbuild does not need this switch. When in \odepend mode, \menhir computes dependencies by invoking \ocamldep. The command that is used to run \ocamldep is controlled by the \oocamldep switch. \docswitch{\odump} This switch causes a description of the automaton to be written to the file \nt{basename}\automaton. \docswitch{\oechoerrors \nt{filename}} This switch causes \menhir to read the \messages file \nt{filename} and to produce on the standard output channel just the input sentences. (That is, all messages, blank lines, and comments are filtered out.) For more information, see \sref{sec:errors:new}. \docswitch{\oexplain} This switch causes conflict explanations to be written to the file \nt{basename}\conflicts. See also \sref{sec:conflicts}. \docswitch{\oexternaltokens \nt{T}} This switch causes the definition of the \token type to be omitted in \nt{basename}\texttt{.ml} and \nt{basename}\texttt{.mli}. Instead, the generated parser relies on the type $T$\texttt{.}\token, where $T$ is an \ocaml module name. It is up to the user to define module $T$ and to make sure that it exports a suitable \token type. Module $T$ can be hand-written. It can also be automatically generated out of a grammar specification using the \oonlytokens switch. \docswitch{\ofixedexc} This switch causes the exception \texttt{Error} to be internally defined as a synonym for \texttt{Parsing.Parse\_error}. This means that an exception handler that catches \texttt{Parsing.Parse\_error} will also catch the generated parser's \texttt{Error}. This helps increase \menhir's compatibility with \ocamlyacc. There is otherwise no reason to use this switch. \docswitch{\ograph} This switch causes a description of the grammar's dependency graph to be written to the file \nt{basename}\dott. The graph's vertices are the grammar's nonterminal symbols. There is a directed edge from vertex $A$ to vertex $B$ if the definition of $A$ refers to $B$. The file is in a format that is suitable for processing by the \emph{graphviz} toolkit. \docswitch{\oinfer} This switch causes the semantic actions to be checked for type consistency \emph{before} the parser is generated. This is done by invoking the \ocaml compiler. Use of \oinfer is \textbf{strongly recommended}, because it helps obtain consistent, well-located type error messages, especially when advanced features such as \menhir's standard library or \dinline keyword are exploited. One downside of \oinfer is that the \ocaml compiler usually needs to consult a few \texttt{.cm[iox]} files. This means that these files must have been created first, requiring \Makefile changes and use of the \odepend switch. The file \distrib{demos/obsolete/Makefile.shared} suggests how to deal with this difficulty. A better option is to avoid \make altogether and use \ocamlbuild, which has built-in knowledge of \menhir. Using \ocamlbuild is \textbf{strongly recommended}! % There is a slight catch with \oinfer. The types inferred by \ocamlc are valid % in the toplevel context, but can change meaning when inserted into a local % context. \docswitch{\oinspection} This switch requires \otable. It causes \menhir to generate not only the monolithic and incremental APIs (\sref{sec:monolithic}, \sref{sec:incremental}), but also the inspection API (\sref{sec:inspection}). Activating this switch causes a few more tables to be produced, resulting in somewhat larger code size. \docswitch{\ointerpret} This switch causes \menhir to act as an interpreter, rather than as a compiler. No \ocaml code is generated. Instead, \menhir reads sentences off the standard input channel, parses them, and displays outcomes. This switch can be usefully combined with \otrace. For more information, see \sref{sec:interpret}. \docswitch{\ointerpreterror} This switch is analogous to \ointerpret, except \menhir expects every sentence to cause an error on its last token, and displays information about the state in which the error is detected, in the \messages file format. For more information, see \sref{sec:errors:new}. \docswitch{\ointerpretshowcst} This switch, used in conjunction with \ointerpret, causes \menhir to display a concrete syntax tree when a sentence is successfully parsed. For more information, see \sref{sec:interpret}. \docswitch{\olisterrors} This switch causes \menhir to produce (on the standard output channel) a complete list of input sentences that cause an error, in the \messages file format. For more information, see \sref{sec:errors:new}. \docswitch{\ologautomaton \nt{level}} When \nt{level} is nonzero, this switch causes some information about the automaton to be logged to the standard error channel. \docswitch{\ologcode \nt{level}} When \nt{level} is nonzero, this switch causes some information about the generated \ocaml code to be logged to the standard error channel. \docswitch{\ologgrammar \nt{level}} When \nt{level} is nonzero, this switch causes some information about the grammar to be logged to the standard error channel. When \nt{level} is 2, the \emph{nullable}, \emph{FIRST}, and \emph{FOLLOW} tables are displayed. \docswitch{\onoinline} This switch causes all \dinline keywords in the grammar specification to be ignored. This is especially useful in order to understand whether these keywords help solve any conflicts. \docswitch{\onostdlib} This switch instructs Menhir to \emph{not} use its standard library (\sref{sec:library}). \docswitch{\oocamlc \nt{command}} This switch controls how \ocamlc is invoked (when \oinfer is used). It allows setting both the name of the executable and the command line options that are passed to it. \docswitch{\oocamldep \nt{command}} This switch controls how \ocamldep is invoked (when \odepend is used). It allows setting both the name of the executable and the command line options that are passed to it. \docswitch{\oonlypreprocess} This switch causes the grammar specifications to be transformed up to the point where the automaton's construction can begin. The grammar specifications whose names are provided on the command line are joined (\sref{sec:split}); all parameterized nonterminal symbols are expanded away (\sref{sec:templates}); type inference is performed, if \oinfer is enabled; all nonterminal symbols marked \dinline are expanded away (\sref{sec:inline}). This yields a single, monolithic grammar specification, which is printed on the standard output channel. \docswitch{\oonlytokens} This switch causes the \dtoken declarations in the grammar specification to be translated into a definition of the \token type, which is written to the files \nt{basename}\texttt{.ml} and \nt{basename}\texttt{.mli}. No code is generated. This is useful when a single set of tokens is to be shared between several parsers. The directory \distrib{demos/calc-two} contains a demo that illustrates the use of this switch. \docswitch{\orawdepend} This switch is analogous to \odepend, except that \ocamldep's output is not postprocessed by \menhir; it is echoed without change. This switch is not suitable for direct use with \make; it is intended for use with \omake or \ocamlbuild, which perform their own postprocessing. An end user who uses \ocamlbuild does not need to mention this switch: \ocamlbuild uses it automatically. \docswitch{\ostdlib \nt{directory}} This switch controls the directory where the standard library (\sref{sec:library}) is found. It takes precedence over both the installation-time directory and the directory that may be specified via the environment variable \verb+$MENHIR_STDLIB+. \docswitch{\ostrict} This switch causes several warnings about the grammar and about the automaton to be considered errors. This includes warnings about useless precedence declarations, non-terminal symbols that produce the empty language, unreachable non-terminal symbols, productions that are never reduced, conflicts that are not resolved by precedence declarations, and end-of-stream conflicts. \docswitch{\osuggestcomp} This switch causes \menhir to print a set of suggested compilation flags, and exit. These flags are intended to be passed to the \ocaml compilers (\ocamlc or \ocamlopt) when compiling and linking the parser generated by \menhir. What are these flags? In the absence of the \otable switch, they are empty. When \otable is set, these flags ensure that \menhirlib is visible to the \ocaml compiler. If the support library \menhirlib was installed via \ocamlfind, a \texttt{-package} directive is issued; otherwise, a \texttt{-I} directive is used. % The file \distrib{demos/obsolete/Makefile.shared} shows how to exploit % the \texttt{--suggest-*} switches. \docswitch{\osuggestlinkb} This switch causes \menhir to print a set of suggested link flags, and exit. These flags are intended to be passed to \texttt{ocamlc} when producing a bytecode executable. What are these flags? In the absence of the \otable switch, they are empty. When \otable is set, these flags ensure that \menhirlib is linked in. If the support library \menhirlib was installed via \ocamlfind, a \texttt{-linkpkg} directive is issued; otherwise, the object file \texttt{menhirLib.cmo} is named. % The file \distrib{demos/obsolete/Makefile.shared} shows how to exploit % the \texttt{--suggest-*} switches. \docswitch{\osuggestlinko} This switch causes \menhir to print a set of suggested link flags, and exit. These flags are intended to be passed to \texttt{ocamlopt} when producing a native code executable. What are these flags? In the absence of the \otable switch, they are empty. When \otable is set, these flags ensure that \menhirlib is linked in. If the support library \menhirlib was installed via \ocamlfind, a \texttt{-linkpkg} directive is issued; otherwise, the object file \texttt{menhirLib.cmx} is named. % The file \distrib{demos/obsolete/Makefile.shared} shows how to exploit % the \texttt{--suggest-*} switches. \docswitch{\osuggestmenhirlib} This switch causes \menhir to print (the absolute path of) the directory where \menhirlib was installed. If \menhirlib was installed via \ocamlfind, this is equivalent to calling \texttt{ocamlfind query menhirLib}. \docswitch{\osuggestocamlfind} This switch causes \menhir to print a Boolean value (i.e., either \texttt{true} or \texttt{false}), which indicates whether \menhirlib was installed via \ocamlfind. \docswitch{\otable} This switch causes \menhir to use its table-based back-end, as opposed to its (default) code-based back-end. When \otable is used, \menhir produces significantly more compact and somewhat slower parsers. See \sref{sec:qa} for a speed comparison. The table-based back-end produces rather compact tables, which are analogous to those produced by \yacc, \bison, or \ocamlyacc. These tables are not quite stand-alone: they are exploited by an interpreter, which is shipped as part of the support library \menhirlib. For this reason, when \otable is used, \menhirlib must be made visible to the \ocaml compilers, and must be linked into your executable program. The \texttt{--suggest-*} switches, described above, help do this. The code-based back-end compiles the LR automaton directly into a nest of mutually recursive \ocaml functions. In that case, \menhirlib is not required. The incremental API (\sref{sec:incremental}) and the inspection API (\sref{sec:inspection}) are made available only by the table-based back-end. \docswitch{\otimings} This switch causes internal timing information to be sent to the standard error channel. \docswitch{\otrace} This switch causes tracing code to be inserted into the generated parser, so that, when the parser is run, its actions are logged to the standard error channel. This is analogous to \texttt{ocamlrun}'s \texttt{p=1} parameter, except this switch must be enabled at compile time: one cannot selectively enable or disable tracing at runtime. \docswitch{\oignoreprec} This switch suppresses all warnings about useless \dleft, \dright, \dnonassoc and \dprec declarations. \docswitch{\oignoreone \nt{symbol}} This switch suppresses the warning that is normally emitted when \menhir finds that the terminal symbol \nt{symbol} is unused. \docswitch{\oignoreall} This switch suppresses all of the warnings that are normally emitted when \menhir finds that some terminal symbols are unused. \docswitch{\oupdateerrors \nt{filename}} This switch causes \menhir to read the \messages file \nt{filename} and to produce on the standard output channel a new \messages file that is identical, except the auto-generated comments have been re-generated. For more information, see \sref{sec:errors:new}. \docswitch{\oversion} This switch causes \menhir to print its own version number and exit. % ------------------------------------------------------------------------------ \section{Lexical conventions} The semicolon character (\kw{;}) is treated as insignificant, just like white space. Thus, rules and producers (for instance) can be separated with semicolons if it is thought that this improves readability. Semicolons can be omitted otherwise. Identifiers (\nt{id}) coincide with \ocaml identifiers, except they are not allowed to contain the quote (\kw{'}) character. Following \ocaml, identifiers that begin with a lowercase letter (\nt{lid}) or with an uppercase letter (\nt{uid}) are distinguished. Comments are C-style (surrounded with \kw{/*} and \kw{*/}, cannot be nested), C++-style (announced by \kw{/$\!$/} and extending until the end of the line), or \ocaml-style (surrounded with \kw{(*} and \kw{*)}, can be nested). Of course, inside \ocaml code, only \ocaml-style comments are allowed. \ocaml type expressions are surrounded with \kangle{and}. Within such expressions, all references to type constructors (other than the built-in \textit{list}, \textit{option}, etc.) must be fully qualified. % ------------------------------------------------------------------------------ \section{Syntax of grammar specifications} \begin{figure} \begin{center} \begin{tabular}{r@{}c@{}l} \nt{specification} \is \sepspacelist{\nt{declaration}} \percentpercent \sepspacelist{\nt{rule}} \optional{\percentpercent \textit{OCaml code}} \\ \nt{declaration} \is \dheader{\textit{OCaml code}} \\ && \dparameter \ocamlparam \\ && \dtoken \optional{\ocamltype} \sepspacelist{\nt{uid}} \\ && \dnonassoc \sepspacelist{\nt{uid}} \\ && \dleft \sepspacelist{\nt{uid}} \\ && \dright \sepspacelist{\nt{uid}} \\ && \dtype \ocamltype \sepspacelist{\nt{lid}} \\ && \dstart \optional{\ocamltype} \sepspacelist{\nt{lid}} \\ && \donerrorreduce \sepspacelist{\nt{lid}} \\ \nt{rule} \is \optional{\dpublic} \optional{\dinline} \nt{lid} \optional{\dlpar\sepcommalist{\nt{id}}\drpar} \deuxpoints \optional{\barre} \seplist{\ \barre}{\nt{group}} \\ \nt{group} \is \seplist{\ \barre}{\nt{production}} \daction \optional {\dprec \nt{id}} \\ \nt{production} \is \sepspacelist{\nt{producer}} \optional {\dprec \nt{id}} \\ \nt{producer} \is \optional{\nt{lid} \dequal} \nt{actual} \\ \nt{actual} \is \nt{id} \optional{\dlpar\sepcommalist{\nt{actual}}\drpar} \\ && \nt{actual} \optional{\dquestion \barre \dplus \barre \dstar} \\ && \seplist{\ \barre}{\nt{group}} % not really allowed everywhere \end{tabular} \end{center} \caption{Syntax of grammar specifications} \label{fig:syntax} \end{figure} The syntax of grammar specifications appears in \fref{fig:syntax}. (For compatibility with \ocamlyacc, some specifications that do not fully adhere to this syntax are also accepted.) Attributes are not documented in \fref{fig:syntax}: see \sref{sec:attributes}. \subsection{Declarations} \label{sec:decls} A specification file begins with a sequence of declarations, ended by a mandatory \percentpercent keyword. \subsubsection{Headers} \label{sec:decls:headers} A header is a piece of \ocaml code, surrounded with \dheader{and}. It is copied verbatim at the beginning of the \ml file. It typically contains \ocaml \kw{open} directives and function definitions for use by the semantic actions. If a single grammar specification file contains multiple headers, their order is preserved. However, when two headers originate in distinct grammar specification files, the order in which they are copied to the \ml file is unspecified. \subsubsection{Parameters} \label{sec:parameter} A declaration of the form: \begin{quote} \dparameter \ocamlparam \end{quote} causes the entire parser to become parameterized over the \ocaml module \nt{uid}, that is, to become an \ocaml functor. The directory \distrib{demos/calc-param} contains a demo that illustrates the use of this switch. If a single specification file contains multiple \dparameter declarations, their order is preserved, so that the module name \nt{uid} introduced by one declaration is effectively in scope in the declarations that follow. When two \dparameter declarations originate in distinct grammar specification files, the order in which they are processed is unspecified. Last, \dparameter declarations take effect before \dheader{$\ldots$}, \dtoken, \dtype, or \dstart declarations are considered, so that the module name \nt{uid} introduced by a \dparameter declaration is effectively in scope in \emph{all} \dheader{$\ldots$}, \dtoken, \dtype, or \dstart declarations, regardless of whether they precede or follow the \dparameter declaration. This means, in particular, that the side effects of an \ocaml header are observed only when the functor is applied, not when it is defined. \subsubsection{Tokens} A declaration of the form: \begin{quote} \dtoken \optional{\ocamltype} $\nt{uid}_1, \ldots, \nt{uid}_n$ \end{quote} defines the identifiers $\nt{uid}_1, \ldots, \nt{uid}_n$ as tokens, that is, as terminal symbols in the grammar specification and as data constructors in the \textit{token} type. If an \ocaml type $t$ is present, then these tokens are considered to carry a semantic value of type $t$, otherwise they are considered to carry no semantic value. \subsubsection{Priority and associativity} \label{sec:assoc} A declaration of one of the following forms: \begin{quote} \dnonassoc $\nt{uid}_1 \ldots \nt{uid}_n$ \\ \dleft $\nt{uid}_1 \ldots \nt{uid}_n$ \\ \dright $\nt{uid}_1 \ldots \nt{uid}_n$ \end{quote} assigns both a \emph{priority level} and an \emph{associativity status} to the symbols $\nt{uid}_1, \ldots, \nt{uid}_n$. The priority level assigned to $\nt{uid}_1, \ldots, \nt{uid}_n$ is not defined explicitly: instead, it is defined to be higher than the priority level assigned by the previous \dnonassoc, \dleft, or \dright declaration, and lower than that assigned by the next \dnonassoc, \dleft, or \dright declaration. The symbols $\nt{uid}_1, \ldots, \nt{uid}_n$ can be tokens (defined elsewhere by a \dtoken declaration) or dummies (not defined anywhere). Both can be referred to as part of \dprec annotations. Associativity status and priority levels allow shift/reduce conflicts to be silently resolved (\sref{sec:conflicts}). \subsubsection{Types} \label{sec:type} A declaration of the form: \begin{quote} \dtype \ocamltype $\nt{lid}_1 \ldots \nt{lid}_n$ \end{quote} assigns an \ocaml type to each of the nonterminal symbols $\nt{lid}_1, \ldots, \nt{lid}_n$. For start symbols, providing an \ocaml type is mandatory, but is usually done as part of the \dstart declaration. For other symbols, it is optional. Providing type information can improve the quality of \ocaml's type error messages. A \dtype declaration may concern not only a nonterminal symbol, such as, say, \texttt{expression}, but also a fully applied parameterized nonterminal symbol, such as \texttt{list(expression)} or \texttt{separated\_list(COMMA, option(expression))}. The types provided as part of \dtype declarations are copied verbatim to the \ml and \mli files. In contrast, headers (\sref{sec:decls:headers}) are copied to the \ml file only. For this reason, the types provided as part of \dtype declarations must make sense both in the presence and in the absence of these headers. They should typically be fully qualified types. % TEMPORARY type information can be mandatory in --coq mode; document? \subsubsection{Start symbols} \label{sec:start} A declaration of the form: \begin{quote} \dstart \optional{\ocamltype} $\nt{lid}_1 \ldots \nt{lid}_n$ \end{quote} declares the nonterminal symbols $\nt{lid}_1, \ldots, \nt{lid}_n$ to be start symbols. Each such symbol must be assigned an \ocaml type either as part of the \dstart declaration or via separate \dtype declarations. Each of $\nt{lid}_1, \ldots, \nt{lid}_n$ becomes the name of a function whose signature is published in the \mli file and that can be used to invoke the parser. \subsubsection{Extra reductions on error} \label{sec:onerrorreduce} A declaration of the form: \begin{quote} \donerrorreduce $\nt{lid}_1 \ldots \nt{lid}_n$ \end{quote} marks the nonterminal symbols $\nt{lid}_1, \ldots, \nt{lid}_n$ as potentially eligible for reduction when an invalid token is found. This may cause one or more extra reduction steps to be performed before the error is detected. More precisely, this declaration affects the automaton as follows. Let us say that a production $\nt{lid} \rightarrow \ldots$ is ``reducible on error'' if its left-hand symbol~\nt{lid} appears in a \donerrorreduce declaration. After the automaton has been constructed and after any conflicts have been resolved, in every state~$s$, the following algorithm is applied: \begin{enumerate} \item Construct the set of all productions that are ready to be reduced in state~$s$ and are reducible on error; \item Test if one of them, say $p$, has higher ``on-error-reduce-priority'' than every other production in this set; \item If so, in state~$s$, replace every error action with a reduction of the production~$p$. (In other words, for every terminal symbol~$t$, if the action table says: ``in state~$s$, when the next input symbol is~$t$, fail'', then this entry is replaced with: ``in state~$s$, when the next input symbol is~$t$, reduce production~$p$''.) \end{enumerate} If step 3 above is executed in state~$s$, then an error can never be detected in state~$s$, since all error actions in state~$s$ are replaced with reduce actions. Error detection is deferred: at least one reduction takes place before the error is detected. It is a ``spurious'' reduction: in a canonical LR(1) automaton, it would not take place. An \donerrorreduce declaration does not affect the language that is accepted by the automaton. It does not affect the location where an error is detected. It is used to control in which state an error is detected. If used wisely, it can make errors easier to report, because they are detected in a state for which it is easier to write an accurate diagnostic message (\sref{sec:errors:diagnostics}). % This may make the tables bigger (but I have no statistics). % This makes LRijkstra significantly slower. Like a \dtype declaration, an \donerrorreduce declaration may concern not only a nonterminal symbol, such as, say, \texttt{expression}, but also a fully applied parameterized nonterminal symbol, such as \texttt{list(expression)} or \texttt{separated\_list(COMMA, option(expression))}. The ``on-error-reduce-priority'' of a production is that of its left-hand symbol. The ``on-error-reduce-priority'' of a nonterminal symbol is determined implicitly by the order of \donerrorreduce declarations. In the declaration $\donerrorreduce\;\nt{lid}_1 \ldots \nt{lid}_n$, the symbols $\nt{lid}_1, \ldots, \nt{lid}_n$ have the same ``on-error-reduce-priority''. They have higher ``on-error-reduce-priority'' than the symbols listed in previous \donerrorreduce declarations, and lower ``on-error-reduce-priority'' than those listed in later \donerrorreduce declarations. \subsection{Rules} Following the mandatory \percentpercent keyword, a sequence of rules is expected. Each rule defines a nonterminal symbol~\nt{id}. % (It is recommended that the name of a nonterminal symbol begin with a lowercase letter, so it falls in the category \nt{lid}. This is in fact mandatory for the start symbols.) In its simplest form, a rule begins with the nonterminal symbol \nt{id}, followed by a colon character (\deuxpoints), and continues with a sequence of production groups (\sref{sec:productiongroups}). Each production group is preceded with a vertical bar character (\barre); the very first bar is optional. The meaning of the bar is choice: the nonterminal symbol \nt{id} develops to either of the production groups. We defer explanations of the keyword \dpublic (\sref{sec:split}), of the keyword \dinline (\sref{sec:inline}), and of the optional formal parameters $\dlpar\sepcommalist{\nt{id}}\drpar$ (\sref{sec:templates}). \subsubsection{Production groups} \label{sec:productiongroups} In its simplest form, a production group consists of a single production (\sref{sec:productions}), followed by an \ocaml semantic action (\sref{sec:actions}) and an optional \dprec annotation (\sref{sec:prec}). A production specifies a sequence of terminal and nonterminal symbols that should be recognized, and optionally binds identifiers to their semantic values. \paragraph{Semantic actions} \label{sec:actions} A semantic action is a piece of \ocaml code that is executed in order to assign a semantic value to the nonterminal symbol with which this production group is associated. A semantic action can refer to the (already computed) semantic values of the terminal or nonterminal symbols that appear in the production via the semantic value identifiers bound by the production. For compatibility with \ocamlyacc, semantic actions can also refer to unnamed semantic values via positional keywords of the form \kw{\$1}, \kw{\$2}, etc.\ This style is discouraged. Furthermore, as a positional keyword of the form \kw{\$i} is internally rewritten as \nt{\_i}, the user should not use identifiers of the form \nt{\_i}. \paragraph{\dprec annotations} \label{sec:prec} An annotation of the form \dprec \nt{id} indicates that the precedence level of the production group is the level assigned to the symbol \nt{id} via a previous \dnonassoc, \dleft, or \dright declaration (\sref{sec:assoc}). In the absence of a \dprec annotation, the precedence level assigned to each production is the level assigned to the rightmost terminal symbol that appears in it. It is undefined if the rightmost terminal symbol has an undefined precedence level or if the production mentions no terminal symbols at all. The precedence level assigned to a production is used when resolving shift/reduce conflicts (\sref{sec:conflicts}). \paragraph{Multiple productions in a group} If multiple productions are present in a single group, then the semantic action and precedence annotation are shared between them. This short-hand effectively allows several productions to share a semantic action and precedence annotation without requiring textual duplication. It is legal only when every production binds exactly the same set of semantic value identifiers and when no positional semantic value keywords (\kw{\$1}, etc.) are used. \subsubsection{Productions} \label{sec:productions} A production is a sequence of producers (\sref{sec:producers}), optionally followed by a \dprec annotation (\sref{sec:prec}). If a precedence annotation is present, it applies to this production alone, not to other productions in the production group. It is illegal for a production and its production group to both carry \dprec annotations. \subsubsection{Producers} \label{sec:producers} A producer is an actual (\sref{sec:actual}), optionally preceded with a binding of a semantic value identifier, of the form \nt{lid} \dequal. The actual specifies which construction should be recognized and how a semantic value should be computed for that construction. The identifier \nt{lid}, if present, becomes bound to that semantic value in the semantic action that follows. Otherwise, the semantic value can be referred to via a positional keyword (\kw{\$1}, etc.). \subsubsection{Actuals} \label{sec:actual} In its simplest form, an actual is just a terminal or nonterminal symbol $\nt{id}$. If it is a parameterized non-terminal symbol (see \sref{sec:templates}), then it should be applied: $\nt{id}\dlpar\sepcommalist{\nt{actual}}\drpar$. An actual may be followed with a modifier (\dquestion, \dplus, or \dstar). This is explained further on (see \sref{sec:templates} and \fref{fig:sugar}). An actual may also be an ``anonymous rule''. In that case, one writes just the rule's right-hand side, which takes the form $\seplist{\ \barre\ }{\nt{group}}$. (This form is allowed only as an argument in an application.) This form is expanded on the fly to a definition of a fresh non-terminal symbol, which is declared \dinline. For instance, providing an anonymous rule as an argument to \nt{list}: \begin{quote} \begin{tabular}{l} \nt{list} \dlpar \basic{e} = \nt{expression}; \basic{SEMICOLON} \dpaction{\basic{e}} \drpar \end{tabular} \end{quote} is equivalent to writing this: \begin{quote} \begin{tabular}{l} \nt{list} \dlpar \nt{expression\_SEMICOLON} \drpar \end{tabular} \end{quote} where the non-terminal symbol \nt{expression\_SEMICOLON} is chosen fresh and is defined as follows: \begin{quote} \begin{tabular}{l} \dinline \nt{expression\_SEMICOLON}: \newprod \basic{e} = \nt{expression}; \basic{SEMICOLON} \dpaction{\basic{e}} \end{tabular} \end{quote} \section{Advanced features} \subsection{Splitting specifications over multiple files} \label{sec:split} \paragraph{Modules} Grammar specifications can be split over multiple files. When \menhir is invoked with multiple argument file names, it considers each of these files as a \emph{partial} grammar specification, and \emph{joins} these partial specifications in order to obtain a single, complete specification. This feature is intended to promote a form a modularity. It is hoped that, by splitting large grammar specifications into several ``modules'', they can be made more manageable. It is also hoped that this mechanism, in conjunction with parameterization (\sref{sec:templates}), will promote sharing and reuse. It should be noted, however, that this is only a weak form of modularity. Indeed, partial specifications cannot be independently processed (say, checked for conflicts). It is necessary to first join them, so as to form a complete grammar specification, before any kind of grammar analysis can be done. This mechanism is, in fact, how \menhir's standard library (\sref{sec:library}) is made available: even though its name does not appear on the command line, it is automatically joined with the user's explicitly-provided grammar specifications, making the standard library's definitions globally visible. A partial grammar specification, or module, contains declarations and rules, just like a complete one: there is no visible difference. Of course, it can consist of only declarations, or only rules, if the user so chooses. (Don't forget the mandatory \percentpercent keyword that separates declarations and rules. It must be present, even if one of the two sections is empty.) \paragraph{Private and public nonterminal symbols} It should be noted that joining is \emph{not} a purely textual process. If two modules happen to define a nonterminal symbol by the same name, then it is considered, by default, that this is an accidental name clash. In that case, each of the two nonterminal symbols is silently renamed so as to avoid the clash. In other words, by default, a nonterminal symbol defined in module $A$ is considered \emph{private}, and cannot be defined again, or referred to, in module $B$. Naturally, it is sometimes desirable to define a nonterminal symbol $N$ in module $A$ and to refer to it in module $B$. This is permitted if $N$ is public, that is, if either its definition carries the keyword \dpublic or $N$ is declared to be a start symbol. A public nonterminal symbol is never renamed, so it can be referred to by modules other than its defining module. In fact, it is permitted to split the definition of a \emph{public} nonterminal symbol, over multiple modules and/or within a single module. That is, a public nonterminal symbol $N$ can have multiple definitions, within one module and/or in distinct modules. All of these definitions are joined using the choice (\barre) operator. This feature allows splitting a grammar specification in a manner that is independent of the grammar's structure. For instance, in the grammar of a programming language, the definition of the nonterminal symbol \nt{expression} could be split into multiple modules, where one module groups the expression forms that have to do with arithmetic, one module groups those that concern function definitions and function calls, one module groups those that concern object definitions and method calls, and so on. \paragraph{Tokens aside} Another use of modularity consists in placing all \dtoken declarations in one module, and the actual grammar specification in another module. The module that contains the token definitions can then be shared, making it easier to define multiple parsers that accept the same type of tokens. (On this topic, see \distrib{demos/calc-two}.) \subsection{Parameterizing rules} \label{sec:templates} A rule (that is, the definition of a nonterminal symbol) can be parameterized over an arbitrary number of symbols, which are referred to as formal parameters. \paragraph{Example} For instance, here is the definition of the parameterized nonterminal symbol \nt{option}, taken from the standard library (\sref{sec:library}): % \begin{quote} \begin{tabular}{l} \dpublic \basic{option}(\basic{X}): \newprod \dpaction{\basic{None}} \newprod \basic{x} = \basic{X} \dpaction{\basic{Some} \basic{x}} \end{tabular} \end{quote} % This definition states that \nt{option}(\basic{X}) expands to either the empty string, producing the semantic value \basic{None}, or to the string \basic{X}, producing the semantic value {\basic{Some}~\basic{x}}, where \basic{x} is the semantic value of \basic{X}. In this definition, the symbol \basic{X} is abstract: it stands for an arbitrary terminal or nonterminal symbol. The definition is made public, so \nt{option} can be referred to within client modules. A client that wishes to use \nt{option} simply refers to it, together with an actual parameter -- a symbol that is intended to replace \basic{X}. For instance, here is how one might define a sequence of declarations, preceded with optional commas: % \begin{quote} \begin{tabular}{l} \nt{declarations}: \newprod \dpaction{[]} \newprod \basic{ds} = \nt{declarations}; \nt{option}(\basic{COMMA}); \basic{d} = \nt{declaration} \dpaction{ \basic{d} :: \basic{ds} } \end{tabular} \end{quote} % This definition states that \nt{declarations} expands either to the empty string or to \nt{declarations} followed by an optional comma followed by \nt{declaration}. (Here, \basic{COMMA} is presumably a terminal symbol.) When this rule is encountered, the definition of \nt{option} is instantiated: that is, a copy of the definition, where \basic{COMMA} replaces \basic{X}, is produced. Things behave exactly as if one had written: \begin{quote} \begin{tabular}{l} \basic{optional\_comma}: \newprod \dpaction{\basic{None}} \newprod \basic{x} = \basic{COMMA} \dpaction{\basic{Some} \basic{x}} \\ \nt{declarations}: \newprod \dpaction{[]} \newprod \basic{ds} = \nt{declarations}; \nt{optional\_comma}; \basic{d} = \nt{declaration} \dpaction{ \basic{d} :: \basic{ds} } \end{tabular} \end{quote} % Note that, even though \basic{COMMA} presumably has been declared as a token with no semantic value, writing \basic{x}~=~\basic{COMMA} is legal, and binds \basic{x} to the unit value. This design choice ensures that the definition of \nt{option} makes sense regardless of the nature of \basic{X}: that is, \basic{X} can be instantiated with a terminal symbol, with or without a semantic value, or with a nonterminal symbol. \paragraph{Parameterization in general} In general, the definition of a nonterminal symbol $N$ can be parameterized with an arbitrary number of formal parameters. When $N$ is referred to within a production, it must be applied to the same number of actuals. In general, an actual is: % \begin{itemize} \item either a single symbol, which can be a terminal symbol, a nonterminal symbol, or a formal parameter; \item or an application of such a symbol to a number of actuals. \end{itemize} For instance, here is a rule whose single production consists of a single producer, which contains several, nested actuals. (This example is discussed again in \sref{sec:library}.) % \begin{quote} \begin{tabular}{l} \nt{plist}(\nt{X}): \newprod \basic{xs} = \nt{loption}(% \nt{delimited}(% \basic{LPAREN}, \nt{separated\_nonempty\_list}(\basic{COMMA}, \basic{X}), \basic{RPAREN}% )% ) \dpaction{\basic{xs}} \end{tabular} \end{quote} \begin{figure} \begin{center} \begin{tabular}{r@{\hskip 2mm}c@{\hskip 2mm}l} \nt{actual}\dquestion & is syntactic sugar for & \nt{option}(\nt{actual}) \\ \nt{actual}\dplus & is syntactic sugar for & \nt{nonempty\_list}(\nt{actual}) \\ \nt{actual}\dstar & is syntactic sugar for & \nt{list}(\nt{actual}) \end{tabular} \end{center} \caption{Syntactic sugar for simulating regular expressions} \label{fig:sugar} \end{figure} % Applications of the parameterized nonterminal symbols \nt{option}, \nt{nonempty\_list}, and \nt{list}, which are defined in the standard library (\sref{sec:library}), can be written using a familiar, regular-expression like syntax (\fref{fig:sugar}). \paragraph{Higher-order parameters} A formal parameter can itself expect parameters. For instance, here is a rule that defines the syntax of procedures in an imaginary programming language: % \begin{quote} \begin{tabular}{l} \nt{procedure}(\nt{list}): \newprod \basic{PROCEDURE} \basic{ID} \nt{list}(\nt{formal}) \nt{SEMICOLON} \nt{block} \nt{SEMICOLON} \dpaction{$\ldots$} \end{tabular} \end{quote} % This rule states that the token \basic{ID}, which represents the name of the procedure, should be followed with a list of formal parameters. (The definitions of the nonterminal symbols \nt{formal} and \nt{block} are not shown.) However, because \nt{list} is a formal parameter, as opposed to a concrete nonterminal symbol defined elsewhere, this definition does not specify how the list is laid out: which token, if any, is used to separate, or terminate, list elements? is the list allowed to be empty? and so on. A more concrete notion of procedure is obtained by instantiating the formal parameter \nt{list}: for instance, \nt{procedure}(\nt{plist}), where \nt{plist} is the parameterized nonterminal symbol defined earlier, is a valid application. \paragraph{Consistency} Definitions and uses of parameterized nonterminal symbols are checked for consistency before they are expanded away. In short, it is checked that, wherever a nonterminal symbol is used, it is supplied with actual arguments in appropriate number and of appropriate nature. This guarantees that expansion of parameterized definitions terminates and produces a well-formed grammar as its outcome. \subsection{Inlining} \label{sec:inline} It is well-known that the following grammar of arithmetic expressions does not work as expected: that is, in spite of the priority declarations, it has shift/reduce conflicts. % \begin{quote} \begin{tabular}{l} \dtoken \kangle{\basic{int}} \basic{INT} \\ \dtoken \basic{PLUS} \basic{TIMES} \\ \dleft \basic{PLUS} \\ \dleft \basic{TIMES} \\ \\ \percentpercent \\ \\ \nt{expression}: \newprod \basic{i} = \basic{INT} \dpaction{\basic{i}} \newprod \basic{e} = \nt{expression}; \basic{o} = \nt{op}; \basic{f} = \nt{expression} \dpaction{\basic{o} \basic{e} \basic{f}} \\ \nt{op}: \newprod \basic{PLUS} \dpaction{( + )} \newprod \basic{TIMES} \dpaction{( * )} \end{tabular} \end{quote} % The trouble is, the precedence level of the production \nt{expression} $\rightarrow$ \nt{expression} \nt{op} \nt{expression} is undefined, and there is no sensible way of defining it via a \dprec declaration, since the desired level really depends upon the symbol that was recognized by \nt{op}: was it \basic{PLUS} or \basic{TIMES}? The standard workaround is to abandon the definition of \nt{op} as a separate nonterminal symbol, and to inline its definition into the definition of \nt{expression}, like this: % \begin{quote} \begin{tabular}{l} \nt{expression}: \newprod \basic{i} = \basic{INT} \dpaction{\basic{i}} \newprod \basic{e} = \nt{expression}; \basic{PLUS}; \basic{f} = \nt{expression} \dpaction{\basic{e} + \basic{f}} \newprod \basic{e} = \nt{expression}; \basic{TIMES}; \basic{f} = \nt{expression} \dpaction{\basic{e} * \basic{f}} \end{tabular} \end{quote} % This avoids the shift/reduce conflict, but gives up some of the original specification's structure, which, in realistic situations, can be damageable. Fortunately, \menhir offers a way of avoiding the conflict without manually transforming the grammar, by declaring that the nonterminal symbol \nt{op} should be inlined: % \begin{quote} \begin{tabular}{l} \nt{expression}: \newprod \basic{i} = \basic{INT} \dpaction{\basic{i}} \newprod \basic{e} = \nt{expression}; \basic{o} = \nt{op}; \basic{f} = \nt{expression} \dpaction{\basic{o} \basic{e} \basic{f}} \\ \dinline \nt{op}: \newprod \basic{PLUS} \dpaction{( + )} \newprod \basic{TIMES} \dpaction{( * )} \end{tabular} \end{quote} % The \dinline keyword causes all references to \nt{op} to be replaced with its definition. In this example, the definition of \nt{op} involves two productions, one that develops to \basic{PLUS} and one that expands to \basic{TIMES}, so every production that refers to \nt{op} is effectively turned into two productions, one that refers to \basic{PLUS} and one that refers to \basic{TIMES}. After inlining, \nt{op} disappears and \nt{expression} has three productions: that is, the result of inlining is exactly the manual workaround shown above. In some situations, inlining can also help recover a slight efficiency margin. For instance, the definition: % \begin{quote} \begin{tabular}{l} \dinline \nt{plist}(\nt{X}): \newprod \basic{xs} = \nt{loption}(% \nt{delimited}(% \basic{LPAREN}, \nt{separated\_nonempty\_list}(\basic{COMMA}, \basic{X}), \basic{RPAREN}% )% ) \dpaction{\basic{xs}} \end{tabular} \end{quote} % effectively makes \nt{plist}(\nt{X}) an alias for the right-hand side \nt{loption}($\ldots$). Without the \dinline keyword, the language recognized by the grammar would be the same, but the LR automaton would probably have one more state and would perform one more reduction at run time. The \dinline keyword does not affect the computation of positions (\sref{sec:positions}). The same positions are computed, regardless of where \dinline keywords are placed. If the semantic actions have side effects, the \dinline keyword \emph{can} affect the order in which these side effects take place. In the example of \nt{op} and \nt{expression} above, if for some reason the semantic action associated with \nt{op} has a side effect (such as updating a global variable, or printing a message), then, by inlining \nt{op}, we delay this side effect, which takes place \emph{after} the second operand has been recognized, whereas in the absence of inlining it takes place as soon as the operator has been recognized. % Du coup, ça change l'ordre des effets, dans cet exemple, de infixe % à postfixe. \subsection{The standard library} \label{sec:library} \begin{figure} \begin{center} \begin{tabular}{lp{51mm}l@{}l} Name & Recognizes & Produces & Comment \\ \hline\\ \nt{option}(\nt{X}) & $\epsilon$ \barre \nt{X} & $\alpha$ \basic{option}, if \nt{X} : $\alpha$ \\ \nt{ioption}(\nt{X}) & $\epsilon$ \barre \nt{X} & $\alpha$ \basic{option}, if \nt{X} : $\alpha$ & (inlined) \\ \nt{boption}(\nt{X}) & $\epsilon$ \barre \nt{X} & \basic{bool} \\ \nt{loption}(\nt{X}) & $\epsilon$ \barre \nt{X} & $\alpha$ \basic{list}, if \nt{X} : $\alpha$ \nt{list} \\ \\ \nt{pair}(\nt{X}, \nt{Y}) & \nt{X} \nt{Y} & $\alpha\times\beta$, if \nt{X} : $\alpha$ and \nt{Y} : $\beta$ \\ \nt{separated\_pair}(\nt{X}, \nt{sep}, \nt{Y}) & \nt{X} \nt{sep} \nt{Y} & $\alpha\times\beta$, if \nt{X} : $\alpha$ and \nt{Y} : $\beta$ \\ \nt{preceded}(\nt{opening}, \nt{X}) & \nt{opening} \nt{X} & $\alpha$, if \nt{X} : $\alpha$ \\ \nt{terminated}(\nt{X}, \nt{closing}) & \nt{X} \nt{closing} & $\alpha$, if \nt{X} : $\alpha$ \\ \nt{delimited}(\nt{opening}, \nt{X}, \nt{closing}) & \nt{opening} \nt{X} \nt{closing} & $\alpha$, if \nt{X} : $\alpha$ \\ \\ \nt{list}(\nt{X}) & a possibly empty sequence of \nt{X}'s & $\alpha$ \basic{list}, if \nt{X} : $\alpha$ \\ \nt{nonempty\_list}(\nt{X}) & a nonempty sequence of \nt{X}'s & $\alpha$ \basic{list}, if \nt{X} : $\alpha$ \\ \nt{separated\_list}(\nt{sep}, \nt{X}) & a possibly empty sequence of \nt{X}'s separated with \nt{sep}'s & $\alpha$ \basic{list}, if \nt{X} : $\alpha$ \\ \nt{separated\_nonempty\_list}(\nt{sep}, \nt{X}) & a nonempty sequence of \nt{X}'s separated with \nt{sep}'s & $\alpha$ \basic{list}, if \nt{X} : $\alpha$ \\ \end{tabular} \end{center} \caption{Summary of the standard library} \label{fig:standard} \end{figure} Once equipped with a rudimentary module system (\sref{sec:split}), parameterization (\sref{sec:templates}), and inlining (\sref{sec:inline}), it is straightforward to propose a collection of commonly used definitions, such as options, sequences, lists, and so on. This \emph{standard library} is joined, by default, with every grammar specification. A summary of the nonterminal symbols offered by the standard library appears in \fref{fig:standard}. See also the short-hands documented in \fref{fig:sugar}. By relying on the standard library, a client module can concisely define more elaborate notions. For instance, the following rule: % \begin{quote} \begin{tabular}{l} \dinline \nt{plist}(\nt{X}): \newprod \basic{xs} = \nt{loption}(% \nt{delimited}(% \basic{LPAREN}, \nt{separated\_nonempty\_list}(\basic{COMMA}, \basic{X}), \basic{RPAREN}% )% ) \dpaction{\basic{xs}} \end{tabular} \end{quote} % causes \nt{plist}(\nt{X}) to recognize a list of \nt{X}'s, where the empty list is represented by the empty string, and a non-empty list is delimited with parentheses and comma-separated. The standard library is stored in a file named \texttt{standard.mly}, which is installed at the same time as Menhir. By default, Menhir attempts to find this file in the directory where this file was installed. This can be overridden by setting the environment variable \verb+$MENHIR_STDLIB+. If defined, this variable should contain the path of the directory where \texttt{standard.mly} is stored. (This path may end with a \texttt{/} character.) This can be overridden also via the command line switch \ostdlib. % The command line switch \onostdlib instructs Menhir to \emph{not} load the standard library. % ------------------------------------------------------------------------------ \section{Conflicts} \label{sec:conflicts} When a shift/reduce or reduce/reduce conflict is detected, it is classified as either benign, if it can be resolved by consulting user-supplied precedence declarations, or severe, if it cannot. Benign conflicts are not reported. Severe conflicts are reported and, if the \oexplain switch is on, explained. \subsection{When is a conflict benign?} \label{sec:conflicts:benign} A shift/reduce conflict involves a single token (the one that one might wish to shift) and one or more productions (those that one might wish to reduce). When such a conflict is detected, the precedence level (\sref{sec:assoc}, \sref{sec:prec}) of these entities are looked up and compared as follows: \begin{enumerate} \item if only one production is involved, and if it has higher priority than the token, then the conflict is resolved in favor of reduction. \item if only one production is involved, and if it has the same priority as the token, then the associativity status of the token is looked up: \begin{enumerate} \item if the token was declared nonassociative, then the conflict is resolved in favor of neither action, that is, a syntax error will be signaled if this token shows up when this production is about to be reduced; \item if the token was declared left-associative, then the conflict is resolved in favor of reduction; \item if the token was declared right-associative, then the conflict is resolved in favor of shifting. \end{enumerate} \item \label{multiway} if multiple productions are involved, and if, considered one by one, they all cause the conflict to be resolved in the same way (that is, either in favor in shifting, or in favor of neither), then the conflict is resolved in that way. \end{enumerate} In either of these cases, the conflict is considered benign. Otherwise, it is considered severe. Note that a reduce/reduce conflict is always considered severe, unless it happens to be subsumed by a benign multi-way shift/reduce conflict (item~\ref{multiway} above). \subsection{How are severe conflicts explained?} When the \odump switch is on, a description of the automaton is written to the \automaton file. Severe conflicts are shown as part of this description. Fortunately, there is also a way of understanding conflicts in terms of the grammar, rather than in terms of the automaton. When the \oexplain switch is on, a textual explanation is written to the \conflicts file. \emph{Not all conflicts are explained} in this file: instead, \emph{only one conflict per automaton state is explained}. This is done partly in the interest of brevity, but also because Pager's algorithm can create artificial conflicts in a state that already contains a true LR(1) conflict; thus, one cannot hope in general to explain all of the conflicts that appear in the automaton. As a result of this policy, once all conflicts explained in the \conflicts file have been fixed, one might need to run \menhir again to produce yet more conflict explanations. \begin{figure} \begin{quote} \begin{tabular}{l} \dtoken \basic{IF THEN ELSE} \\ \dstart \kangle{\basic{expression}} \nt{expression} \\ \\ \percentpercent \\ \\ \nt{expression}: \newprod $\ldots$ \newprod \basic{IF b} = \nt{expression} \basic{THEN e} = \nt{expression} \dpaction{$\ldots$} \newprod \basic{IF b} = \nt{expression} \basic{THEN e} = \nt{expression} \basic{ELSE f} = \nt{expression} \dpaction{$\ldots$} \newprod $\ldots$ \end{tabular} \end{quote} \caption{Basic example of a shift/reduce conflict} \label{fig:basicshiftreduce} \end{figure} \paragraph{How the conflict state is reached} \fref{fig:basicshiftreduce} shows a grammar specification with a typical shift/reduce conflict. % When this specification is analyzed, the conflict is detected, and an explanation is written to the \conflicts file. The explanation first indicates in which state the conflict lies by showing how that state is reached. Here, it is reached after recognizing the following string of terminal and nonterminal symbols---the \emph{conflict string}: % \begin{quote} \basic{IF expression THEN IF expression THEN expression} \end{quote} Allowing the conflict string to contain both nonterminal and terminal symbols usually makes it shorter and more readable. If desired, a conflict string composed purely of terminal symbols could be obtained by replacing each occurrence of a nonterminal symbol $N$ with an arbitrary $N$-sentence. The conflict string can be thought of as a path that leads from one of the automaton's start states to the conflict state. When multiple such paths exist, the one that is displayed is chosen shortest. Nevertheless, it may sometimes be quite long. In that case, artificially (and temporarily) declaring some existing nonterminal symbols to be start symbols has the effect of adding new start states to the automaton and can help produce shorter conflict strings. Here, \nt{expression} was declared to be a start symbol, which is why the conflict string is quite short. In addition to the conflict string, the \conflicts file also states that the \emph{conflict token} is \basic{ELSE}. That is, when the automaton has recognized the conflict string and when the lookahead token (the next token on the input stream) is \basic{ELSE}, a conflict arises. A conflict corresponds to a choice: the automaton is faced with several possible actions, and does not know which one should be taken. This indicates that the grammar is not LR(1). The grammar may or may not be inherently ambiguous. In our example, the conflict string and the conflict token are enough to understand why there is a conflict: when two \basic{IF} constructs are nested, it is ambiguous which of the two constructs the \basic{ELSE} branch should be associated with. Nevertheless, the \conflicts file provides further information: it explicitly shows that there exists a conflict, by proving that two distinct actions are possible. Here, one of these actions consists in \emph{shifting}, while the other consists in \emph{reducing}: this is a \emph{shift/reduce} conflict. A \emph{proof} takes the form of a \emph{partial derivation tree} whose \emph{fringe} begins with the conflict string, followed by the conflict token. A derivation tree is a tree whose nodes are labeled with symbols. The root node carries a start symbol. A node that carries a terminal symbol is considered a leaf, and has no children. A node that carries a nonterminal symbol $N$ either is considered a leaf, and has no children; or is not considered a leaf, and has $n$ children, where $n\geq 0$, labeled $\nt{x}_1,\ldots,\nt{x}_n$, where $N \rightarrow \nt{x}_1,\ldots,\nt{x}_n$ is a production. The fringe of a partial derivation tree is the string of terminal and nonterminal symbols carried by the tree's leaves. A string of terminal and nonterminal symbols that is the fringe of some partial derivation tree is a \emph{sentential form}. \paragraph{Why shifting is legal} \begin{figure} \mycommonbaseline \begin{center} \begin{tikzpicture}[level distance=12mm] \node { \nt{expression} } child { node {\basic{IF}} } child { node {\nt{expression}} } child { node {\basic{THEN}} } child { node {\nt{expression}} child { node {\basic{IF}} } child { node {\nt{expression}} } child { node {\basic{THEN}} } child { node {\nt{expression}} } child { node {\basic{ELSE}} } child { node {\nt{expression}} } } ; \end{tikzpicture} \end{center} \caption{A partial derivation tree that justifies shifting} \label{fig:shifting:tree} \end{figure} \begin{figure} \begin{center} \begin{tabbing} \= \nt{expression} \\ \> \basic{IF} \nt{expression} \basic{THEN} \= \nt{expression} \\ \> \> \basic{IF} \nt{expression} \basic{THEN} \basic{expression} . \basic{ELSE} \nt{expression} \end{tabbing} \end{center} \caption{A textual version of the tree in \fref{fig:shifting:tree}} \label{fig:shifting:text} \end{figure} In our example, the proof that shifting is possible is the derivation tree shown in Figures~\ref{fig:shifting:tree} and~\ref{fig:shifting:text}. At the root of the tree is the grammar's start symbol, \nt{expression}. This symbol develops into the string \nt{IF expression THEN expression}, which forms the tree's second level. The second occurrence of \nt{expression} in that string develops into \nt{IF expression THEN expression ELSE expression}, which forms the tree's last level. The tree's fringe, a sentential form, is the string \nt{IF expression THEN IF expression THEN expression ELSE expression}. As announced earlier, it begins with the conflict string \nt{IF expression THEN IF expression THEN expression}, followed with the conflict token \nt{ELSE}. In \fref{fig:shifting:text}, the end of the conflict string is materialized with a dot. Note that this dot does not occupy the rightmost position in the tree's last level. In other words, the conflict token (\basic{ELSE}) itself occurs on the tree's last level. In practical terms, this means that, after the automaton has recognized the conflict string and peeked at the conflict token, it makes sense for it to \emph{shift} that token. \paragraph{Why reducing is legal} \begin{figure} \mycommonbaseline \begin{center} \begin{tikzpicture}[level distance=12mm] \node { \nt{expression} } child { node {\basic{IF}} } child { node {\nt{expression}} } child { node {\basic{THEN}} } child { node {\nt{expression}} child { node {\basic{IF}} } child { node {\nt{expression}} } child { node {\basic{THEN}} } child { node {\nt{expression}} } } child { node {\basic{ELSE}} } child { node {\nt{expression}} } ; \end{tikzpicture} \end{center} \caption{A partial derivation tree that justifies reducing} \label{fig:reducing:tree} \end{figure} \begin{figure} \begin{center} \begin{tabbing} \= \nt{expression} \\ \> \basic{IF} \nt{expression} \basic{THEN} \= \nt{expression} \basic{ELSE} \nt{expression} \sidecomment{lookahead token appears} \\ \> \> \basic{IF} \nt{expression} \basic{THEN} \basic{expression} . \end{tabbing} \end{center} \caption{A textual version of the tree in \fref{fig:reducing:tree}} \label{fig:reducing:text} \end{figure} In our example, the proof that shifting is possible is the derivation tree shown in Figures~\ref{fig:reducing:tree} and~\ref{fig:reducing:text}. Again, the sentential form found at the fringe of the tree begins with the conflict string, followed with the conflict token. Again, in \fref{fig:reducing:text}, the end of the conflict string is materialized with a dot. Note that, this time, the dot occupies the rightmost position in the tree's last level. In other words, the conflict token (\basic{ELSE}) appeared on an earlier level (here, on the second level). This fact is emphasized by the comment \inlinesidecomment{lookahead token appears} found at the second level. In practical terms, this means that, after the automaton has recognized the conflict string and peeked at the conflict token, it makes sense for it to \emph{reduce} the production that corresponds to the tree's last level---here, the production is \nt{expression} $\rightarrow$ \basic{IF} \nt{expression} \basic{THEN} \basic{expression}. \paragraph{An example of a more complex derivation tree} Figures~\ref{fig:xreducing:tree} and~\ref{fig:xreducing:text} show a partial derivation tree that justifies reduction in a more complex situation. (This derivation tree is relative to a grammar that is not shown.) Here, the conflict string is \basic{DATA UIDENT EQUALS UIDENT}; the conflict token is \basic{LIDENT}. It is quite clear that the fringe of the tree begins with the conflict string. However, in this case, the fringe does not explicitly exhibit the conflict token. Let us examine the tree more closely and answer the question: following \basic{UIDENT}, what's the next terminal symbol on the fringe? \begin{figure} \mycommonbaseline \begin{center} \begin{tikzpicture}[level distance=12mm,level 1/.style={sibling distance=18mm}, level 2/.style={sibling distance=18mm}, level 4/.style={sibling distance=24mm}]] \node { \nt{decls} } child { node {\nt{decl}} child { node {\basic{DATA}} } child { node {\basic{UIDENT}} } child { node {\basic{EQUALS}} } child { node {\nt{tycon\_expr}} child { node {\nt{tycon\_item}} child { node {\basic{UIDENT}} } child { node {\nt{opt\_type\_exprs}} child { node {} edge from parent [dashed] } } } } } child { node {\nt{opt\_semi}} } child { node {\nt{decls}} } ; \end{tikzpicture} \end{center} \caption{A partial derivation tree that justifies reducing} \label{fig:xreducing:tree} \end{figure} \begin{figure} \begin{center} \begin{tabbing} \= \nt{decls} \\ \> \nt{decl} \nt{opt\_semi} \nt{decls} \sidecomment{lookahead token appears because \nt{opt\_semi} can vanish and \nt{decls} can begin with \basic{LIDENT}} \\ \> \basic{DATA UIDENT} \basic{EQUALS} \= \nt{tycon\_expr} \sidecomment{lookahead token is inherited} \\ \> \> \nt{tycon\_item} \sidecomment{lookahead token is inherited} \\ \> \> \basic{UIDENT} \= \nt{opt\_type\_exprs} \sidecomment{lookahead token is inherited} \\ \> \> \> . \end{tabbing} \end{center} \caption{A textual version of the tree in \fref{fig:xreducing:tree}} \label{fig:xreducing:text} \end{figure} First, note that \nt{opt\_type\_exprs} is \emph{not} a leaf node, even though it has no children. The grammar contains the production $\nt{opt\_type\_exprs} \rightarrow \epsilon$: the nonterminal symbol \nt{opt\_type\_exprs} develops to the empty string. (This is made clear in \fref{fig:xreducing:text}, where a single dot appears immediately below \nt{opt\_type\_exprs}.) Thus, \nt{opt\_type\_exprs} is not part of the fringe. Next, note that \nt{opt\_type\_exprs} is the rightmost symbol within its level. Thus, in order to find the next symbol on the fringe, we have to look up one level. This is the meaning of the comment \inlinesidecomment{lookahead token is inherited}. Similarly, \nt{tycon\_item} and \nt{tycon\_expr} appear rightmost within their level, so we again have to look further up. This brings us back to the tree's second level. There, \nt{decl} is \emph{not} the rightmost symbol: next to it, we find \nt{opt\_semi} and \nt{decls}. Does this mean that \nt{opt\_semi} is the next symbol on the fringe? Yes and no. \nt{opt\_semi} is a \emph{nonterminal} symbol, but we are really interested in finding out what the next \emph{terminal} symbol on the fringe could be. The partial derivation tree shown in Figures~\ref{fig:xreducing:tree} and~\ref{fig:xreducing:text} does not explicitly answer this question. In order to answer it, we need to know more about \nt{opt\_semi} and \nt{decls}. Here, \nt{opt\_semi} stands (as one might have guessed) for an optional semicolon, so the grammar contains a production $\nt{opt\_semi} \rightarrow \epsilon$. This is indicated by the comment \inlinesidecomment{\nt{opt\_semi} can vanish}. (Nonterminal symbols that generate $\epsilon$ are also said to be \emph{nullable}.) Thus, one could choose to turn this partial derivation tree into a larger one by developing \nt{opt\_semi} into $\epsilon$, making it a non-leaf node. That would yield a new partial derivation tree where the next symbol on the fringe, following \basic{UIDENT}, is \nt{decls}. Now, what about \nt{decls}? Again, it is a \emph{nonterminal} symbol, and we are really interested in finding out what the next \emph{terminal} symbol on the fringe could be. Again, we need to imagine how this partial derivation tree could be turned into a larger one by developing \nt{decls}. Here, the grammar happens to contain a production of the form $\nt{decls} \rightarrow \basic{LIDENT} \ldots$ This is indicated by the comment \inlinesidecomment{\nt{decls} can begin with \basic{LIDENT}}. Thus, by developing \nt{decls}, it is possible to construct a partial derivation tree where the next symbol on the fringe, following \basic{UIDENT}, is \basic{LIDENT}. This is precisely the conflict token. To sum up, there exists a partial derivation tree whose fringe begins with the conflict string, followed with the conflict token. Furthermore, in that derivation tree, the dot occupies the rightmost position in the last level. As in our previous example, this means that, after the automaton has recognized the conflict string and peeked at the conflict token, it makes sense for it to \emph{reduce} the production that corresponds to the tree's last level---here, the production is $\nt{opt\_type\_exprs} \rightarrow \epsilon$. \paragraph{Greatest common factor among derivation trees} Understanding conflicts requires comparing two (or more) derivation trees. It is frequent for these trees to exhibit a common factor, that is, to exhibit identical structure near the top of the tree, and to differ only below a specific node. Manual identification of that node can be tedious, so \menhir performs this work automatically. When explaining a $n$-way conflict, it first displays the greatest common factor of the $n$ derivation trees. A question mark symbol $\basic{(?)}$ is used to identify the node where the trees begin to differ. Then, \menhir displays each of the $n$ derivation trees, \emph{without their common factor} -- that is, it displays $n$ sub-trees that actually begin to differ at the root. This should make visual comparisons significantly easier. \subsection{How are severe conflicts resolved in the end?} It is unspecified how severe conflicts are resolved. \menhir attempts to mimic \ocamlyacc's specification, that is, to resolve shift/reduce conflicts in favor of shifting, and to resolve reduce/reduce conflicts in favor of the production that textually appears earliest in the grammar specification. However, this specification is inconsistent in case of three-way conflicts, that is, conflicts that simultaneously involve a shift action and several reduction actions. Furthermore, textual precedence can be undefined when the grammar specification is split over multiple modules. In short, \menhir's philosophy is that \begin{center} severe conflicts should not be tolerated, \end{center} so you should not care how they are resolved. % If a shift/reduce conflict is resolved in favor of reduction, then there can % exist words of terminal symbols that are accepted by the canonical LR(1) % automaton without traversing any conflict state and which are rejected by our % automaton (constructed by Pager's method followed by conflict % resolution). Same problem when a shift/reduce conflict is resolved in favor of % neither action (via \dnonassoc) or when a reduce/reduce conflict is resolved % arbitrarily. \subsection{End-of-stream conflicts} \label{sec:eos} \menhir's treatment of the end of the token stream is (believed to be) fully compatible with \ocamlyacc's. Yet, \menhir attempts to be more user-friendly by warning about a class of so-called ``end-of-stream conflicts''. % TEMPORARY il faut noter que \menhir n'est pas conforme à ocamlyacc en % présence de conflits end-of-stream; apparemment il part dans le mur % en exigeant toujours le token suivant, alors que ocamlyacc est capable % de s'arrêter (comment?); cf. problème de S. Hinderer (avril 2015). \paragraph{How the end of stream is handled} In many textbooks on parsing, it is assumed that the lexical analyzer, which produces the token stream, produces a special token, written \eos, to signal that the end of the token stream has been reached. A parser generator can take advantage of this by transforming the grammar: for each start symbol $\nt{S}$ in the original grammar, a new start symbol $\nt{S'}$ is defined, together with the production $S'\rightarrow S\eos$. The symbol $S$ is no longer a start symbol in the new grammar. This means that the parser will accept a sentence derived from $S$ only if it is immediately followed by the end of the token stream. This approach has the advantage of simplicity. However, \ocamlyacc and \menhir do not follow it, for several reasons. Perhaps the most convincing one is that it is not flexible enough: sometimes, it is desirable to recognize a sentence derived from $S$, \emph{without} requiring that it be followed by the end of the token stream: this is the case, for instance, when reading commands, one by one, on the standard input channel. In that case, there is no end of stream: the token stream is conceptually infinite. Furthermore, after a command has been recognized, we do \emph{not} wish to examine the next token, because doing so might cause the program to block, waiting for more input. In short, \ocamlyacc and \menhir's approach is to recognize a sentence derived from $S$ and to \emph{not look}, if possible, at what follows. However, this is possible only if the definition of $S$ is such that the end of an $S$-sentence is identifiable without knowledge of the lookahead token. When the definition of $S$ does not satisfy this criterion, and \emph{end-of-stream conflict} arises: after a potential $S$-sentence has been read, there can be a tension between consulting the next token, in order to determine whether the sentence is continued, and \emph{not} consulting the next token, because the sentence might be over and whatever follows should not be read. \menhir warns about end-of-stream conflicts, whereas \ocamlyacc does not. \paragraph{A definition of end-of-stream conflicts} Technically, \menhir proceeds as follows. A \eos symbol is introduced. It is, however, only a \emph{pseudo-}token: it is never produced by the lexical analyzer. For each start symbol $\nt{S}$ in the original grammar, a new start symbol $\nt{S'}$ is defined, together with the production $S'\rightarrow S$. The corresponding start state of the LR(1) automaton is composed of the LR(1) item $S' \rightarrow . \;S\; [\eos]$. That is, the pseudo-token \eos initially appears in the lookahead set, indicating that we expect to be done after recognizing an $S$-sentence. During the construction of the LR(1) automaton, this lookahead set is inherited by other items, with the effect that, in the end, the automaton has: \begin{itemize} \item \emph{shift} actions only on physical tokens; and \item \emph{reduce} actions either on physical tokens or on the pseudo-token \eos. \end{itemize} A state of the automaton has a reduce action on \eos if, in that state, an $S$-sentence has been read, so that the job is potentially finished. A state has a shift or reduce action on a physical token if, in that state, more tokens potentially need to be read before an $S$-sentence is recognized. If a state has a reduce action on \eos, then that action should be taken \emph{without} requesting the next token from the lexical analyzer. On the other hand, if a state has a shift or reduce action on a physical token, then the lookahead token \emph{must} be consulted in order to determine if that action should be taken. \begin{figure}[p] \begin{quote} \begin{tabular}{l} \dtoken \kangle{\basic{int}} \basic{INT} \\ \dtoken \basic{PLUS TIMES} \\ \dleft PLUS \\ \dleft TIMES \\ \dstart \kangle{\basic{int}} \nt{expr} \\ \percentpercent \\ \nt{expr}: \newprod \basic{i} = \basic{INT} \dpaction{\basic{i}} \newprod \basic{e1} = \nt{expr} \basic{PLUS} \basic{e2} = \nt{expr} \dpaction{\basic{e1 + e2}} \newprod \basic{e1} = \nt{expr} \basic{TIMES} \basic{e2} = \nt{expr} \dpaction{\basic{e1 * e2}} \end{tabular} \end{quote} \caption{Basic example of an end-of-stream conflict} \label{fig:basiceos} \end{figure} \begin{figure}[p] \begin{verbatim} State 6: expr -> expr . PLUS expr [ # TIMES PLUS ] expr -> expr PLUS expr . [ # TIMES PLUS ] expr -> expr . TIMES expr [ # TIMES PLUS ] -- On TIMES shift to state 3 -- On # PLUS reduce production expr -> expr PLUS expr State 4: expr -> expr . PLUS expr [ # TIMES PLUS ] expr -> expr . TIMES expr [ # TIMES PLUS ] expr -> expr TIMES expr . [ # TIMES PLUS ] -- On # TIMES PLUS reduce production expr -> expr TIMES expr State 2: expr' -> expr . [ # ] expr -> expr . PLUS expr [ # TIMES PLUS ] expr -> expr . TIMES expr [ # TIMES PLUS ] -- On TIMES shift to state 3 -- On PLUS shift to state 5 -- On # accept expr \end{verbatim} \caption{Part of an LR automaton for the grammar in \fref{fig:basiceos}} \label{fig:basiceosdump} \end{figure} \begin{figure}[p] \begin{quote} \begin{tabular}{l} \ldots \\ \dtoken \basic{END} \\ \dstart \kangle{\basic{int}} \nt{main} \hskip 1cm \textit{// instead of \nt{expr}} \\ \percentpercent \\ \nt{main}: \newprod \basic{e} = \nt{expr} \basic{END} \dpaction{\basic{e}} \\ \nt{expr}: \newprod \ldots \end{tabular} \end{quote} \caption{Fixing the grammar specification in \fref{fig:basiceos}} \label{fig:basiceos:sol} \end{figure} An end-of-stream conflict arises when a state has distinct actions on \eos and on at least one physical token. In short, this means that the end of an $S$-sentence cannot be unambiguously identified without examining one extra token. \menhir's default behavior, in that case, is to suppress the action on \eos, so that more input is \emph{always} requested. \paragraph{Example} \fref{fig:basiceos} shows a grammar that has end-of-stream conflicts. When this grammar is processed, \menhir warns about these conflicts, and further warns that \nt{expr} is never accepted. Let us explain. Part of the corresponding automaton, as described in the \automaton file, is shown in \fref{fig:basiceosdump}. Explanations at the end of the \automaton file (not shown) point out that states 6 and 2 have an end-of-stream conflict. Indeed, both states have distinct actions on \eos and on the physical token \basic{TIMES}. % It is interesting to note that, even though state 4 has actions on \eos and on physical tokens, it does not have an end-of-stream conflict. This is because the action taken in state 4 is always to reduce the production $\nt{expr} \rightarrow \nt{expr}$ \basic{TIMES} \nt{expr}, regardless of the lookahead token. By default, \menhir produces a parser where end-of-stream conflicts are resolved in favor of looking ahead: that is, the problematic reduce actions on \eos are suppressed. This means, in particular, that the \emph{accept} action in state 2, which corresponds to reducing the production $\nt{expr} \rightarrow \nt{expr'}$, is suppressed. This explains why the symbol \nt{expr} is never accepted: because expressions do not have an unambiguous end marker, the parser will always request one more token and will never stop. In order to avoid this end-of-stream conflict, the standard solution is to introduce a new token, say \basic{END}, and to use it as an end marker for expressions. The \basic{END} token could be generated by the lexical analyzer when it encounters the actual end of stream, or it could correspond to a piece of concrete syntax, say, a line feed character, a semicolon, or an \texttt{end} keyword. The solution is shown in \fref{fig:basiceos:sol}. % ------------------------------------------------------------------------------ \section{Positions} \label{sec:positions} When an \ocamllex-generated lexical analyzer produces a token, it updates two fields, named \verb+lex_start_p+ and \verb+lex_curr_p+, in its environment record, whose type is \verb+Lexing.lexbuf+. Each of these fields holds a value of type \verb+Lexing.position+. Together, they represent the token's start and end positions within the text that is being scanned. These fields are read by Menhir after calling the lexical analyzer, so \textbf{it is the lexical analyzer's responsibility} to correctly set these fields. A position consists mainly of an offset (the position's \verb+pos_cnum+ field), but also holds information about the current file name, the current line number, and the current offset within the current line. (Not all \ocamllex-generated analyzers keep this extra information up to date. This must be explicitly programmed by the author of the lexical analyzer.) \begin{figure} \begin{center} \begin{tabular}{@{}l@{\hskip 7.0mm}l@{}} \verb+$startpos+ & start position of the first symbol in the production's right-hand side, if there is one; \\& end position of the most recently parsed symbol, otherwise \\ \verb+$endpos+ & end position of the first symbol in the production's right-hand side, if there is one; \\& end position of the most recently parsed symbol, otherwise \\ \verb+$startpos(+ \verb+$+\nt{i} \barre \nt{id} \verb+)+ & start position of the symbol named \verb+$+\nt{i} or \nt{id} \\ \verb+$endpos(+ \verb+$+\nt{i} \barre \nt{id} \verb+)+ & end position of the symbol named \verb+$+\nt{i} or \nt{id} \\ \ksymbolstartpos & start position of the leftmost symbol \nt{id} such that \verb+$startpos(+\nt{id}\verb+)+ \verb+!=+\, \verb+$endpos(+\nt{id}\verb+)+; \\& if there is no such symbol, \verb+$endpos+ \\[2mm] \verb+$startofs+ \\ \verb+$endofs+ \\ \verb+$startofs(+ \verb+$+\nt{i} \barre \nt{id} \verb+)+ & same as above, but produce an integer offset instead of a position \\ \verb+$endofs(+ \verb+$+\nt{i} \barre \nt{id} \verb+)+ \\ \verb+$symbolstartofs+ \\ \end{tabular} \end{center} \caption{Position-related keywords} \label{fig:pos} \end{figure} % We could document $endpos($0). Not sure whether that would be a good thing. \begin{figure} \begin{tabular}{@{}ll@{\hskip2cm}l} % Positions. \verb+symbol_start_pos()+ & \ksymbolstartpos \\ \verb+symbol_end_pos()+ & \verb+$endpos+ \\ \verb+rhs_start_pos i+ & \verb+$startpos($i)+ & ($1 \leq i \leq n$) \\ \verb+rhs_end_pos i+ & \verb+$endpos($i)+ & ($1 \leq i \leq n$) \\ % i = 0 permitted, really % Offsets. \verb+symbol_start()+ & \verb+$symbolstartofs+ \\ \verb+symbol_end()+ & \verb+$endofs+ \\ \verb+rhs_start i+ & \verb+$startofs($i)+ & ($1 \leq i \leq n$) \\ \verb+rhs_end i+ & \verb+$endofs($i)+ & ($1 \leq i \leq n$) \\ % i = 0 permitted, really \end{tabular} \caption{Translating position-related incantations from \ocamlyacc to \menhir} \label{fig:pos:mapping} \end{figure} This mechanism allows associating pairs of positions with terminal symbols. If desired, \menhir automatically extends it to nonterminal symbols as well. That is, it offers a mechanism for associating pairs of positions with terminal or nonterminal symbols. This is done by making a set of keywords available to semantic actions (\fref{fig:pos}). Note that these keywords are \emph{not} available outside of a semantic action: in particular, they cannot be used within an \ocaml header. Note also that \ocaml's standard library module \texttt{Parsing} is deprecated. The functions that it offers \emph{can} be called, but will return dummy positions. We remark that, if the current production has an empty right-hand side, then \verb+$startpos+ and \verb+$endpos+ are equal, and (by convention) are the end position of the most recently parsed symbol (that is, the symbol that happens to be on top of the automaton's stack when this production is reduced). If the current production has a nonempty right-hand side, then \verb+$startpos+ is the same as \verb+$startpos($1)+ and \verb+$endpos+ is the same as \verb+$endpos($+\nt{n}\verb+)+, where \nt{n} is the length of the right-hand side. More generally, if the current production has matched a sentence of length zero, then \verb+$startpos+ and \verb+$endpos+ will be equal, and conversely. % (provided the lexer is reasonable and never produces a token whose start and % end positions are equal). The position \verb+$startpos+ is sometimes ``further towards the left'' than one would like. For example, in the following production: \begin{verbatim} declaration: modifier? variable { $startpos } \end{verbatim} the keyword \verb+$startpos+ represents the start position of the optional modifier \verb+modifier?+. If this modifier turns out to be absent, then its start position is (by definition) the end position of the most recently parsed symbol. This may not be what is desired: perhaps the user would prefer in this case to use the start position of the symbol \verb+variable+. This is achieved by using \ksymbolstartpos instead of \verb+$startpos+. By definition, \ksymbolstartpos is the start position of the leftmost symbol whose start and end positions differ. In this example, the computation of \ksymbolstartpos skips the absent \verb+modifier+, whose start and end positions coincide, and returns the start position of the symbol \verb+variable+ (assuming this symbol has distinct start and end positions). % On pourrait souligner que $symbolstartpos renvoie la $startpos du premier % symbole non vide, et non pas la $symbolstartpos du premier symbole non vide. % Donc ça peut rester un peu contre-intuitif, et ne pas correspondre % exactement à ce que l'on attend. D'ailleurs, le calcul de $symbolstartpos % est préservé par %inline (on obtient cela très facilement en éliminant % $symbolstartpos avant l'inlining) mais ne correspond pas à ce que donnerait % $symbolstartpos après un inlining manuel. Fondamentalement, cette notion de % $symbolstartpos ne tourne pas très rond. There is no keyword \verb+$symbolendpos+. Indeed, the problem with \verb+$startpos+ is due to the asymmetry in the definition of \verb+$startpos+ and \verb+$endpos+ in the case of an empty right-hand side, and does not affect \verb+$endpos+. \newcommand{\fineprint}{\footnote{% The computation of \ksymbolstartpos is optimized by \menhir under two assumptions about the lexer. First, \menhir assumes that the lexer never produces a token whose start and end positions are equal. Second, \menhir assumes that two positions produced by the lexer are equal if and only if they are physically equal. If the lexer violates either of these assumptions, the computation of \ksymbolstartpos could produce a result that differs from \texttt{Parsing.symbol\_start\_pos()}. }} The positions computed by Menhir are exactly the same as those computed by \verb+ocamlyacc+\fineprint. More precisely, \fref{fig:pos:mapping} sums up how to translate a call to the \texttt{Parsing} module, as used in an \ocamlyacc grammar, to a \menhir keyword. We note that \menhir's \verb+$startpos+ does not appear in the right-hand column in \fref{fig:pos:mapping}. In other words, \menhir's \verb+$startpos+ does not correspond exactly to any of the \ocamlyacc function calls. An exact \ocamlyacc equivalent of \verb+$startpos+ is \verb+rhs_start_pos 1+ if the current production has a nonempty right-hand side and \verb+symbol_start_pos()+ if it has an empty right-hand side. Finally, we remark that \menhir's \dinline keyword (\sref{sec:inline}) does not affect the computation of positions. The same positions are computed, regardless of where \dinline keywords are placed. % ------------------------------------------------------------------------------ \section{Using \menhir as an interpreter} \label{sec:interpret} When \ointerpret is set, \menhir no longer behaves as a compiler. Instead, it acts as an interpreter. That is, it repeatedly: \begin{itemize} \item reads a sentence off the standard input channel; \item parses this sentence, according to the grammar; \item displays an outcome. \end{itemize} This process stops when the end of the input channel is reached. \subsection{Sentences} \label{sec:sentences} The syntax of sentences is as follows: \begin{center} \begin{tabular}{r@{}c@{}l} \nt{sentence} \is \optional{\nt{lid}\,\deuxpoints} \sepspacelist{\nt{uid}} \,\dnewline \end{tabular} \end{center} Less formally, a sentence is a sequence of zero or more terminal symbols (\nt{uid}'s), separated with whitespace, terminated with a newline character, and optionally preceded with a non-terminal start symbol (\nt{lid}). This non-terminal symbol can be omitted if, and only if, the grammar only has one start symbol. For instance, here are four valid sentences for the grammar of arithmetic expressions found in the directory \distrib{demos/calc}: % \begin{verbatim} main: INT PLUS INT EOL INT PLUS INT INT PLUS PLUS INT EOL INT PLUS PLUS \end{verbatim} % In the first sentence, the start symbol \texttt{main} was explicitly specified. In the other sentences, it was omitted, which is permitted, because this grammar has no start symbol other than \texttt{main}. The first sentence is a stream of four terminal symbols, namely \texttt{INT}, \texttt{PLUS}, \texttt{INT}, and \texttt{EOL}. These terminal symbols must be provided under their symbolic names. Writing, say, ``\texttt{12+32\textbackslash n}'' instead of \texttt{INT PLUS INT EOL} is not permitted. \menhir would not be able to make sense of such a concrete notation, since it does not have a lexer for it. % On pourrait documenter le fait qu'une phrase finie est transformée par \menhir % en un flot de tokens potentiellement infinie, avec un suffixe infini EOF ... % Mais c'est un hack, qui pourrait changer à l'avenir. \subsection{Outcomes} \label{sec:outcomes} As soon as \menhir is able to read a complete sentence off the standard input channel (that is, as soon as it finds the newline character that ends the sentence), it parses the sentence according to whichever grammar was specified on the command line, and displays an outcome. An outcome is one of the following: \begin{itemize} \item \texttt{ACCEPT}: a prefix of the sentence was successfully parsed; a parser generated by \menhir would successfully stop and produce a semantic value; \item \texttt{OVERSHOOT}: the end of the sentence was reached before it could be accepted; a parser generated by \menhir would request a non-existent ``next token'' from the lexer, causing it to fail or block; \item \texttt{REJECT}: the sentence was not accepted; a parser generated by \menhir would raise the exception \texttt{Error}. \end{itemize} When \ointerpretshowcst is set, each \texttt{ACCEPT} outcome is followed with a concrete syntax tree. A concrete syntax tree is either a leaf or a node. A leaf is either a terminal symbol or \error. A node is annotated with a non-terminal symbol, and carries a sequence of immediate descendants that correspond to a valid expansion of this non-terminal symbol. \menhir's notation for concrete syntax trees is as follows: \begin{center} \begin{tabular}{r@{}c@{}l} \nt{cst} \is \nt{uid} \\ && \error \\ && \texttt{[} \nt{lid}\,\deuxpoints \sepspacelist{\nt{cst}} \texttt{]} \end{tabular} \end{center} % This notation is not quite unambiguous (it is ambiguous if several % productions are identical). For instance, if one wished to parse the example sentences of \sref{sec:sentences} using the grammar of arithmetic expressions in \distrib{demos/calc}, one could invoke \menhir as follows: \begin{verbatim} $ menhir --interpret --interpret-show-cst demos/calc/parser.mly main: INT PLUS INT EOL ACCEPT [main: [expr: [expr: INT] PLUS [expr: INT]] EOL] INT PLUS INT OVERSHOOT INT PLUS PLUS INT EOL REJECT INT PLUS PLUS REJECT \end{verbatim} (Here, \menhir's input---the sentences provided by the user on the standard input channel--- is shown intermixed with \menhir's output---the outcomes printed by \menhir on the standard output channel.) The first sentence is valid, and accepted; a concrete syntax tree is displayed. The second sentence is incomplete, because the grammar specifies that a valid expansion of \texttt{main} ends with the terminal symbol \texttt{EOL}; hence, the outcome is \texttt{OVERSHOOT}. The third sentence is invalid, because of the repeated occurrence of the terminal symbol \texttt{PLUS}; the outcome is \texttt{REJECT}. The fourth sentence, a prefix of the third one, is rejected for the same reason. \subsection{Remarks} Using \menhir as an interpreter offers an easy way of debugging your grammar. For instance, if one wished to check that addition is considered left-associative, as requested by the \dleft directive found in the file \distrib{demos/calc/parser.mly}, one could submit the following sentence: \begin{verbatim} $ ./menhir --interpret --interpret-show-cst ../demos/calc/parser.mly INT PLUS INT PLUS INT EOL ACCEPT [main: [expr: [expr: [expr: INT] PLUS [expr: INT]] PLUS [expr: INT]] EOL ] \end{verbatim} %$ The concrete syntax tree displayed by \menhir is skewed towards the left, as desired. The switches \ointerpret and \otrace can be used in conjunction. When \otrace is set, the interpreter logs its actions to the standard error channel. % ------------------------------------------------------------------------------ \section{Generated API} When \menhir processes a grammar specification, say \texttt{parser.mly}, it produces one \ocaml module, \texttt{Parser}, whose code resides in the file \texttt{parser.ml} and whose signature resides in the file \texttt{parser.mli}. We now review this signature. For simplicity, we assume that the grammar specification has just one start symbol \verb+main+, whose \ocaml type is \verb+thing+. % ------------------------------------------------------------------------------ \subsection{Monolithic API} \label{sec:monolithic} The monolithic API defines the type \verb+token+, the exception \verb+Error+, and the parsing function \verb+main+, named after the start symbol of the grammar. %% type token The type \verb+token+ is an algebraic data type. A value of type \verb+token+ represents a terminal symbol and its semantic value. For instance, if the grammar contains the declarations \verb+%token A+ and \verb+%token B+, then the generated file \texttt{parser.mli} contains the following definition: \begin{verbatim} type token = | A | B of int \end{verbatim} % If \oonlytokens is specified on the command line, the type \verb+token+ is generated, and the rest is omitted. On the contrary, if \oexternaltokens is used, the type \verb+token+ is omitted, but the rest (described below) is generated. %% exception Error The exception \verb+Error+ carries no argument. It is raised by the parsing function \verb+main+ (described below) when a syntax error is detected. % \begin{verbatim} exception Error \end{verbatim} %% val main Next comes one parsing function for each start symbol of the grammar. Here, we have assumed that there is one start symbol, named \verb+main+, so the generated file \texttt{parser.mli} contains the following declaration: \begin{verbatim} val main: (Lexing.lexbuf -> token) -> Lexing.lexbuf -> thing \end{verbatim} % On ne montre pas la définition de l'exception Error. This function expects two arguments, namely: a lexer, which typically is produced by \ocamllex and has type \verb+Lexing.lexbuf -> token+; and a lexing buffer, which has type \verb+Lexing.lexbuf+. This API is compatible with \ocamlyacc. (For information on using \menhir without \ocamllex, please consult \sref{sec:qa}.) % This API is ``monolithic'' in the sense that there is just one function, which does everything: it pulls tokens from the lexer, parses, and eventually returns a semantic value (or fails by throwing the exception \texttt{Error}). % ------------------------------------------------------------------------------ \subsection{Incremental API} \label{sec:incremental} If \otable is set, \menhir offers an incremental API in addition to the monolithic API. In this API, control is inverted. The parser does not have access to the lexer. Instead, when the parser needs the next token, it stops and returns its current state to the user. The user is then responsible for obtaining this token (typically by invoking the lexer) and resuming the parser from that state. % The directory \distrib{demos/calc-incremental} contains a demo that illustrates the use of the incremental API. This API is ``incremental'' in the sense that the user has access to a sequence of the intermediate states of the parser. Assuming that semantic values are immutable, a parser state is a persistent data structure: it can be stored and used multiple times, if desired. This enables applications such as ``live parsing'', where a buffer is continuously parsed while it is being edited. The parser can be re-started in the middle of the buffer whenever the user edits a character. Because two successive parser states share most of their data in memory, a list of $n$ successive parser states occupies only $O(n)$ space in memory. % One could point out that semantic actions should be side-effect free. % But that is an absolute requirement. Semantic actions can have side % effects, if the user knows what they are doing. % TEMPORARY actually, live parsing also requires a way of performing % error recovery, up to a complete parse... as in Merlin. % ------------------------------------------------------------------------------ \subsubsection{Starting the parser} In this API, the parser is started by invoking \verb+Incremental.main+. (Recall that we assume that \verb+main+ is the name of the start symbol.) The generated file \texttt{parser.mli} contains the following declaration: \begin{verbatim} module Incremental : sig val main: position -> thing MenhirInterpreter.checkpoint end \end{verbatim} The argument is the initial position. If the lexer is based on an OCaml lexing buffer, this argument should be \verb+lexbuf.lex_curr_p+. In \sref{sec:incremental} and \sref{sec:inspection}, the type \verb+position+ is a synonym for \verb+Lexing.position+. We emphasize that the function \verb+Incremental.main+ does not parse anything. It constructs a checkpoint which serves as a \emph{starting} point. The functions \verb+offer+ and \verb+resume+, described below, are used to drive the parser. % ------------------------------------------------------------------------------ \subsubsection{Driving the parser} \label{sec:incremental:driving} The sub-module \menhirinterpreter is also part of the incremental API. Its declaration, which appears in the generated file \texttt{parser.mli}, is as follows: \begin{verbatim} module MenhirInterpreter : MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE with type token = token \end{verbatim} The signature \verb+INCREMENTAL_ENGINE+, defined in the module \menhirlibincrementalengine, contains many types and functions, which are described in the rest of this section (\sref{sec:incremental:driving}) and in the following sections (\sref{sec:incremental:inspecting}, \sref{sec:incremental:updating}). Please keep in mind that, from the outside, these types and functions should be referred to with an appropriate prefix. For instance, the type \verb+checkpoint+ should be referred to as \verb+MenhirInterpreter.checkpoint+, or \verb+Parser.MenhirInterpreter.checkpoint+, depending on which modules the user chooses to open. %% type token % Passons-le sous silence. %% type 'a env \begin{verbatim} type 'a env \end{verbatim} The abstract type \verb+'a env+ represents the current state of the parser. (That is, it contains the current state and stack of the LR automaton.) Assuming that semantic values are immutable, it is a persistent data structure: it can be stored and used multiple times, if desired. The parameter \verb+'a+ is the type of the semantic value that will eventually be produced if the parser succeeds. %% type production \begin{verbatim} type production \end{verbatim} The abstract type \verb+production+ represents a production of the grammar. % The ``start productions'' (which do not exist in an \mly file, but are constructed by Menhir internally) are \emph{not} part of this type. %% type 'a checkpoint \begin{verbatim} type 'a checkpoint = private | InputNeeded of 'a env | Shifting of 'a env * 'a env * bool | AboutToReduce of 'a env * production | HandlingError of 'a env | Accepted of 'a | Rejected \end{verbatim} The type \verb+'a checkpoint+ represents an intermediate or final state of the parser. An intermediate checkpoint is a suspension: it records the parser's current state, and allows parsing to be resumed. The parameter \verb+'a+ is the type of the semantic value that will eventually be produced if the parser succeeds. \verb+Accepted+ and \verb+Rejected+ are final checkpoints. \verb+Accepted+ carries a semantic value. \verb+InputNeeded+ is an intermediate checkpoint. It means that the parser wishes to read one token before continuing. \verb+Shifting+ is an intermediate checkpoint. It means that the parser is taking a shift transition. It exposes the state of the parser before and after the transition. The Boolean parameter tells whether the parser intends to request a new token after this transition. (It always does, except when it is about to accept.) \verb+AboutToReduce+ is an intermediate checkpoint: it means that the parser is about to perform a reduction step. \verb+HandlingError+ is also an intermediate checkpoint: it means that the parser has detected an error and is about to handle it. (Error handling is typically performed in several steps, so the next checkpoint is likely to be \verb+HandlingError+ again.) In these two cases, the parser does not need more input. The parser suspends itself at this point only in order to give the user an opportunity to observe the parser's transitions and possibly handle errors in a different manner, if desired. %% val offer \begin{verbatim} val offer: 'a checkpoint -> token * position * position -> 'a checkpoint \end{verbatim} The function \verb+offer+ allows the user to resume the parser after the parser has suspended itself with a checkpoint of the form \verb+InputNeeded env+. This function expects the previous checkpoint \verb+checkpoint+ as well as a new token (together with the start and end positions of this token). It produces a new checkpoint, which again can be an intermediate checkpoint or a final checkpoint. It does not raise any exception. (The exception \texttt{Error} is used only in the monolithic API.) %% val resume \begin{verbatim} val resume: 'a checkpoint -> 'a checkpoint \end{verbatim} The function \verb+resume+ allows the user to resume the parser after the parser has suspended itself with a checkpoint of the form \verb+AboutToReduce (env, prod)+ or \verb+HandlingError env+. This function expects just the previous checkpoint \verb+checkpoint+. It produces a new checkpoint. It does not raise any exception. The incremental API subsumes the monolithic API. Indeed, \verb+main+ can be (and is in fact) implemented by first using \verb+Incremental.main+, then calling \verb+offer+ and \verb+resume+ in a loop, until a final checkpoint is obtained. %% type supplier \begin{verbatim} type supplier = unit -> token * position * position \end{verbatim} A token supplier is a function of no arguments which delivers a new token (together with its start and end positions) every time it is called. The function \verb+loop+ and its variants, described below, expect a supplier as an argument. %% val lexer_lexbuf_to_supplier \begin{verbatim} val lexer_lexbuf_to_supplier: (Lexing.lexbuf -> token) -> Lexing.lexbuf -> supplier \end{verbatim} The function \verb+lexer_lexbuf_to_supplier+, applied to a lexer and to a lexing buffer, produces a fresh supplier. %% (remark about the loop* functions) The functions \verb+offer+ and \verb+resume+, documented above, are sufficient to write a parser loop. One can imagine many variations of such a loop, which is why we expose \verb+offer+ and \verb+resume+ in the first place. Nevertheless, some variations are so common that it is worth providing them, ready for use. The following functions are implemented on top of \verb+offer+ and \verb+resume+. %% val loop \begin{verbatim} val loop: supplier -> 'a checkpoint -> 'a \end{verbatim} \verb+loop supplier checkpoint+ begins parsing from \verb+checkpoint+, reading tokens from \verb+supplier+. It continues parsing until it reaches a checkpoint of the form \verb+Accepted v+ or \verb+Rejected+. In the former case, it returns \verb+v+. In the latter case, it raises the exception \verb+Error+. (By the way, this is how we implement the monolithic API on top of the incremental API.) \begin{verbatim} val loop_handle: ('a -> 'answer) -> ('a checkpoint -> 'answer) -> supplier -> 'a checkpoint -> 'answer \end{verbatim} \verb+loop_handle succeed fail supplier checkpoint+ begins parsing from \verb+checkpoint+, reading tokens from \verb+supplier+. It continues until it reaches a checkpoint of the form \verb+Accepted v+ or \verb+HandlingError _+ (or~\verb+Rejected+, but that should not happen, as \verb+HandlingError _+ will be observed first). In the former case, it calls \verb+succeed v+. In the latter case, it calls \verb+fail+ with this checkpoint. It cannot raise \verb+Error+. This means that Menhir's traditional error-handling procedure (which pops the stack until a state that can act on the \error token is found) does not get a chance to run. Instead, the user can implement her own error handling code, in the \verb+fail+ continuation. %% val loop_handle_undo \begin{verbatim} val loop_handle_undo: ('a -> 'answer) -> ('a checkpoint -> 'a checkpoint -> 'answer) -> supplier -> 'a checkpoint -> 'answer \end{verbatim} \verb+loop_handle_undo+ is analogous to \verb+loop_handle+, but passes a pair of checkpoints (instead of a single checkpoint) to the failure continuation. % The first (and oldest) checkpoint that is passed to the failure continuation is the last \verb+InputNeeded+ checkpoint that was encountered before the error was detected. The second (and newest) checkpoint is where the error was detected. (This is the same checkpoint that \verb+loop_handle+ would pass to its failure continuation.) Going back to the first checkpoint can be thought of as undoing any reductions that were performed after seeing the problematic token. (These reductions must be default reductions or spurious reductions.) This can be useful to someone who wishes to implement an error explanation or error recovery mechanism. \verb+loop_handle_undo+ must be applied to an \verb+InputNeeded+ checkpoint. The initial checkpoint produced by \verb+Incremental.main+ is of this form. %% val shifts \begin{verbatim} val shifts: 'a checkpoint -> 'a env option \end{verbatim} \verb+shifts checkpoint+ assumes that \verb+checkpoint+ has been obtained by submitting a token to the parser. It runs the parser from \verb+checkpoint+, through an arbitrary number of reductions, until the parser either accepts this token (i.e., shifts) or rejects it (i.e., signals an error). If the parser decides to shift, then \verb+Some env+ is returned, where \verb+env+ is the parser's state just before shifting. Otherwise, \verb+None+ is returned. This can be used to test whether the parser is willing to accept a certain token. This function should be used with caution, though, as it causes semantic actions to be executed. It is desirable that all semantic actions be side-effect-free, or that their side-effects be harmless. %% val acceptable \begin{verbatim} val acceptable: 'a checkpoint -> token -> position -> bool \end{verbatim} \verb+acceptable checkpoint token pos+ requires \verb+checkpoint+ to be an \verb+InputNeeded+ checkpoint. It returns \verb+true+ iff the parser is willing to shift this token. % This can be used to test, after an error has been detected, which tokens would have been accepted at this point. To do this, one would typically use \verb+loop_handle_undo+ to get access to the last \verb+InputNeeded+ checkpoint that was encountered before the error was detected, and apply \verb+acceptable+ to that checkpoint. \verb+acceptable+ is implemented using \verb+shifts+, so, like \verb+shifts+, it causes certain semantic actions to be executed. It is desirable that all semantic actions be side-effect-free, or that their side-effects be harmless. % ------------------------------------------------------------------------------ \subsubsection{Inspecting the parser's state} \label{sec:incremental:inspecting} Although the type \verb+env+ is opaque, a parser state can be inspected via a few accessor functions, which are described in this section. The following types and functions are contained in the \verb+MenhirInterpreter+ sub-module. %% type 'a lr1state \begin{verbatim} type 'a lr1state \end{verbatim} The abstract type \verb+'a lr1state+ describes a (non-initial) state of the LR(1) automaton. % If \verb+s+ is such a state, then \verb+s+ should have at least one incoming transition, and all of its incoming transitions carry the same (terminal or non-terminal) symbol, say $A$. We say that $A$ is the \emph{incoming symbol} of the state~\verb+s+. % The index \verb+'a+ is the type of the semantic values associated with $A$. The role played by \verb+'a+ is clarified in the definition of the type \verb+element+, which appears further on. %% val number \begin{verbatim} val number: _ lr1state -> int \end{verbatim} The states of the LR(1) automaton are numbered (from 0 and up). The function \verb+number+ maps a state to its number. %% val production_index %% val find_production \begin{verbatim} val production_index: production -> int val find_production: int -> production \end{verbatim} Productions are numbered. (The set of indices of all productions forms an interval, which does \emph{not} necessarily begin at 0.) % The function \verb+production_index+ converts a production to an integer number, whereas the function \verb+find_production+ carries out the reverse conversion. It is an error to apply \verb+find_production+ to an invalid index. %% type element \begin{verbatim} type element = | Element: 'a lr1state * 'a * position * position -> element \end{verbatim} The type \verb+element+ describes one entry in the stack of the LR(1) automaton. In a stack element of the form \verb+Element (s, v, startp, endp)+, \verb+s+ is a (non-initial) state and \verb+v+ is a semantic value. The value~\verb+v+ is associated with the incoming symbol~$A$ of the state~\verb+s+. In other words, the value \verb+v+ was pushed onto the stack just before the state \verb+s+ was entered. Thus, for some type \verb+'a+, the state~\verb+s+ has type \verb+'a lr1state+ and the value~\verb+v+ has type~\verb+'a+. The positions \verb+startp+ and \verb+endp+ delimit the fragment of the input text that was reduced to the symbol $A$. In order to do anything useful with the value \verb+v+, one must gain information about the type \verb+'a+, by inspection of the state~\verb+s+. So far, the type \verb+'a lr1state+ is abstract, so there is no way of inspecting~\verb+s+. The inspection API (\sref{sec:inspection}) offers further tools for this purpose. %% val top \begin{verbatim} val top: 'a env -> element option \end{verbatim} \verb+top env+ returns the parser's top stack element. The state contained in this stack element is the current state of the automaton. If the stack is empty, \verb+None+ is returned. In that case, the current state of the automaton must be an initial state. %% val pop_many \begin{verbatim} val pop_many: int -> 'a env -> 'a env option \end{verbatim} \verb+pop_many i env+ pops \verb+i+ elements off the automaton's stack. This is done via \verb+i+ successive invocations of \verb+pop+. Thus, \verb+pop_many 1+ is \verb+pop+. The index \verb+i+ must be nonnegative. The time complexity is $O(i)$. %% val get \begin{verbatim} val get: int -> 'a env -> element option \end{verbatim} \verb+get i env+ returns the parser's \verb+i+-th stack element. The index \verb+i+ is 0-based: thus, \verb+get 0+ is \verb+top+. If \verb+i+ is greater than or equal to the number of elements in the stack, \verb+None+ is returned. \verb+get+ is implemented using \verb+pop_many+ and \verb+top+: its time complexity is $O(i)$. %% val current_state_number \begin{verbatim} val current_state_number: 'a env -> int \end{verbatim} \verb+current_state_number env+ is the integer number of the automaton's current state. Although this number might conceivably be obtained via the functions~\verb+top+ and \verb+number+, using \verb+current_state_number+ is preferable, because this method works even when the automaton's stack is empty (in which case the current state is an initial state, and \verb+top+ returns \verb+None+). This number can be passed as an argument to a \verb+message+ function generated by \verb+menhir --compile-errors+. %% val equal \begin{verbatim} val equal: 'a env -> 'a env -> bool \end{verbatim} \verb+equal env1 env2+ tells whether the parser configurations \verb+env1+ and \verb+env2+ are equal in the sense that the automaton's current state is the same in \verb+env1+ and \verb+env2+ and the stack is \emph{physically} the same in \verb+env1+ and \verb+env2+. If \verb+equal env1 env2+ is \verb+true+, then the sequence of the stack elements, as observed via \verb+pop+ and \verb+top+, must be the same in \verb+env1+ and \verb+env2+. Also, if \verb+equal env1 env2+ holds, then the checkpoints \verb+input_needed env1+ and \verb+input_needed env2+ must be equivalent. (The function \verb+input_needed+ is documented in \sref{sec:incremental:updating}.) The function \verb+equal+ has time complexity $O(1)$. %% val positions \begin{verbatim} val positions: 'a env -> position * position \end{verbatim} The function \verb+positions+ returns the start and end positions of the current lookahead token. If invoked in an initial state, this function returns a pair of twice the initial position that was passed as an argument to \verb+main+. %% val has_default_reduction %% val state_has_default_reduction \begin{verbatim} val env_has_default_reduction: 'a env -> bool val state_has_default_reduction: _ lr1state -> bool \end{verbatim} When applied to an environment \verb+env+ taken from a checkpoint of the form \verb+AboutToReduce (env, prod)+, the function \verb+env_has_default_reduction+ tells whether the reduction that is about to take place is a default reduction. \verb+state_has_default_reduction s+ tells whether the state \verb+s+ has a default reduction. This includes the case where \verb+s+ is an accepting state. % ------------------------------------------------------------------------------ \subsubsection{Updating the parser's state} \label{sec:incremental:updating} The functions presented in the previous section (\sref{sec:incremental:inspecting}) allow inspecting parser states of type \verb+'a checkpoint+ and \verb+'a env+. However, so far, there are no functions for manufacturing new parser states, except \verb+offer+ and \verb+resume+, which create new checkpoints by feeding tokens, one by one, to the parser. In this section, a small number of functions are provided for manufacturing new parser states of type \verb+'a env+ and \verb+'a checkpoint+. These functions allow going far back into the past and jumping ahead into the future, so to speak. In other words, they allow driving the parser in other ways than by feeding tokens into it. The functions \verb+pop+, \verb+force_reduction+ and \verb+feed+ (part of the inspection API; see \sref{sec:inspection}) construct values of type \verb+'a env+. The function \verb+input_needed+ constructs values of type \verb+'a checkpoint+ and thereby allows resuming parsing in normal mode (via \verb+offer+). Together, these functions can be used to implement error handling and error recovery strategies. %% val pop \begin{verbatim} val pop: 'a env -> 'a env option \end{verbatim} \verb+pop env+ returns a new environment, where the parser's top stack cell has been popped off. (If the stack is empty, \verb+None+ is returned.) This amounts to pretending that the (terminal or nonterminal) symbol that corresponds to this stack cell has not been read. %% val force_reduction \begin{verbatim} val force_reduction: production -> 'a env -> 'a env \end{verbatim} \verb+force_reduction prod env+ can be called only if in the state \verb+env+ the parser is capable of reducing the production \verb+prod+. If this condition is satisfied, then this production is reduced, which means that its semantic action is executed (this can have side effects!) and the automaton makes a goto (nonterminal) transition. If this condition is not satisfied, an \verb+Invalid_argument+ exception is raised. %% val input_needed \begin{verbatim} val input_needed: 'a env -> 'a checkpoint \end{verbatim} \verb+input_needed env+ returns \verb+InputNeeded env+. Thus, out of a parser state that might have been obtained via a series of calls to the functions \verb+pop+, \verb+force_reduction+, \verb+feed+, and so on, it produces a checkpoint, which can be used to resume normal parsing, by supplying this checkpoint as an argument to \verb+offer+. This function should be used with some care. It could ``mess up the lookahead'' in the sense that it allows parsing to resume in an arbitrary state \verb+s+ with an arbitrary lookahead symbol \verb+t+, even though Menhir's reachability analysis (which is carried out via the \olisterrors switch) might well think that it is impossible to reach this particular configuration. If one is using Menhir's new error reporting facility (\sref{sec:errors:new}), this could cause the parser to reach an error state for which no error message has been prepared. % ------------------------------------------------------------------------------ \subsection{Inspection API} \label{sec:inspection} If \oinspection is set, \menhir offers an inspection API in addition to the monolithic and incremental APIs. (The reason why this is not done by default is that this requires more tables to be generated, thus making the generated parser larger.) Like the incremental API, the inspection API is found in the sub-module \menhirinterpreter. It offers the following types and functions. %% type _ terminal The type \verb+'a terminal+ is a generalized algebraic data type (GADT). A value of type \verb+'a terminal+ represents a terminal symbol (without a semantic value). The index \verb+'a+ is the type of the semantic values associated with this symbol. For instance, if the grammar contains the declarations \verb+%token A+ and \verb+%token B+, then the generated module \menhirinterpreter contains the following definition: % \begin{verbatim} type _ terminal = | T_A : unit terminal | T_B : int terminal \end{verbatim} % The data constructors are named after the terminal symbols, prefixed with ``\verb+T_+''. %% type _ nonterminal The type \verb+'a nonterminal+ is also a GADT. A value of type \verb+'a nonterminal+ represents a nonterminal symbol (without a semantic value). The index \verb+'a+ is the type of the semantic values associated with this symbol. For instance, if \verb+main+ is the only nonterminal symbol, then the generated module \menhirinterpreter contains the following definition: % \begin{verbatim} type _ nonterminal = | N_main : thing nonterminal \end{verbatim} % The data constructors are named after the nonterminal symbols, prefixed with ``\verb+N_+''. %% type 'a symbol The type \verb+'a symbol+ % (an algebraic data type) is the disjoint union of the types \verb+'a terminal+ and \verb+'a nonterminal+. In other words, a value of type \verb+'a symbol+ represents a terminal or nonterminal symbol (without a semantic value). This type is (always) defined as follows: % \begin{verbatim} type 'a symbol = | T : 'a terminal -> 'a symbol | N : 'a nonterminal -> 'a symbol \end{verbatim} %% type xsymbol The type \verb+xsymbol+ is an existentially quantified version of the type \verb+'a symbol+. It is useful in situations where the index \verb+'a+ is not statically known. It is (always) defined as follows: % \begin{verbatim} type xsymbol = | X : 'a symbol -> xsymbol \end{verbatim} %% type item The type \verb+item+ describes an LR(0) item, that is, a pair of a production \verb+prod+ and an index \verb+i+ into the right-hand side of this production. If the length of the right-hand side is \verb+n+, then \verb+i+ is comprised between 0 and \verb+n+, inclusive. \begin{verbatim} type item = production * int \end{verbatim} %% Comparison functions. The following functions implement total orderings on the types \verb+_ terminal+, \verb+_ nonterminal+, \verb+xsymbol+, \verb+production+, and \verb+item+. \begin{verbatim} val compare_terminals: _ terminal -> _ terminal -> int val compare_nonterminals: _ nonterminal -> _ nonterminal -> int val compare_symbols: xsymbol -> xsymbol -> int val compare_productions: production -> production -> int val compare_items: item -> item -> int \end{verbatim} %% val incoming_symbol The function \verb+incoming_symbol+ maps a (non-initial) LR(1) state~\verb+s+ to its incoming symbol, that is, the symbol that the parser must recognize before it enters the state \verb+s+. % \begin{verbatim} val incoming_symbol: 'a lr1state -> 'a symbol \end{verbatim} % This function can be used to gain access to the semantic value \verb+v+ in a stack element \verb+Element (s, v, _, _)+. Indeed, by case analysis on the symbol \verb+incoming_symbol s+, one gains information about the type \verb+'a+, hence one obtains the ability to do something useful with the value~\verb+v+. %% val items The function \verb+items+ maps a (non-initial) LR(1) state~\verb+s+ to its LR(0) \emph{core}, that is, to the underlying set of LR(0) items. This set is represented as a list, whose elements appear in an arbitrary order. This set is \emph{not} closed under $\epsilon$-transitions. % \begin{verbatim} val items: _ lr1state -> item list \end{verbatim} %% val lhs %% val rhs The functions \verb+lhs+ and \verb+rhs+ map a production \verb+prod+ to its left-hand side and right-hand side, respectively. The left-hand side is always a nonterminal symbol, hence always of the form \verb+N _+. The right-hand side is a (possibly empty) sequence of (terminal or nonterminal) symbols. % \begin{verbatim} val lhs: production -> xsymbol val rhs: production -> xsymbol list \end{verbatim} % %% val nullable The function \verb+nullable+, applied to a non-terminal symbol, tells whether this symbol is nullable. A nonterminal symbol is nullable if and only if it produces the empty word $\epsilon$. % \begin{verbatim} val nullable: _ nonterminal -> bool \end{verbatim} %% val first %% val xfirst The function call \verb+first nt t+ tells whether the \emph{FIRST} set of the nonterminal symbol \verb+nt+ contains the terminal symbol \verb+t+. That is, it returns \verb+true+ if and only if \verb+nt+ produces a word that begins with \verb+t+. The function \verb+xfirst+ is identical to \verb+first+, except it expects a first argument of type \verb+xsymbol+ instead of \verb+_ terminal+. % \begin{verbatim} val first: _ nonterminal -> _ terminal -> bool val xfirst: xsymbol -> _ terminal -> bool \end{verbatim} %% val foreach_terminal %% val foreach_terminal_but_error The function \verb+foreach_terminal+ enumerates the terminal symbols, including the special symbol \error. The function \verb+foreach_terminal_but_error+ enumerates the terminal symbols, excluding \error. \begin{verbatim} val foreach_terminal: (xsymbol -> 'a -> 'a) -> 'a -> 'a val foreach_terminal_but_error: (xsymbol -> 'a -> 'a) -> 'a -> 'a \end{verbatim} %% val feed \verb+feed symbol startp semv endp env+ causes the parser to consume the (terminal or nonterminal) symbol \verb+symbol+, accompanied with the semantic value \verb+semv+ and with the start and end positions \verb+startp+ and \verb+endp+. Thus, the automaton makes a transition, and reaches a new state. The stack grows by one cell. This operation is permitted only if the current state (as determined by \verb+env+) has an outgoing transition labeled with \verb+symbol+. Otherwise, an \verb+Invalid_argument+ exception is raised. \begin{verbatim} val feed: 'a symbol -> position -> 'a -> position -> 'b env -> 'b env \end{verbatim} % TEMPORARY % document the modules that use the inspection API: Printers % document MenhirLib.General? % The directory \distrib{demos/calc-inspection} contains a demo that illustrates the use of the inspection API. % review it / clean it up! % ------------------------------------------------------------------------------ \section{Error handling: the traditional way} \label{sec:errors} \menhir's traditional error handling mechanism is considered deprecated: although it is still supported for the time being, it might be removed in the future. We recommend setting up an error handling mechanism using the new tools offered by \menhir (\sref{sec:errors:new}). \paragraph{Error handling} \menhir's error traditional handling mechanism is inspired by that of \yacc and \ocamlyacc, but is not identical. A special \error token is made available for use within productions. The LR automaton is constructed exactly as if \error was a regular terminal symbol. However, \error is never produced by the lexical analyzer. Instead, when an error is detected, the current lookahead token is discarded and replaced with the \error token, which becomes the current lookahead token. At this point, the parser enters \emph{error handling} mode. In error handling mode, automaton states are popped off the automaton's stack until a state that can \emph{act} on \error is found. This includes \emph{both} shift \emph{and} reduce actions. (\yacc and \ocamlyacc do not trigger reduce actions on \error. It is somewhat unclear why this is so.) When a state that can reduce on \error is found, reduction is performed. Since the lookahead token is still \error, the automaton remains in error handling mode. When a state that can shift on \error is found, the \error token is shifted. At this point, the parser returns to normal mode. When no state that can act on \error is found on the automaton's stack, the parser stops and raises the exception \texttt{Error}. This exception carries no information. The position of the error can be obtained by reading the lexical analyzer's environment record. \paragraph{Error recovery} \ocamlyacc offers an error recovery mode, which is entered immediately after an \error token was successfully shifted. In this mode, tokens are repeatedly taken off the input stream and discarded until an acceptable token is found. This feature is no longer offered by \menhir. \paragraph{Error-related keywords} The following keyword is made available to semantic actions. When the \verb+$syntaxerror+ keyword is evaluated, evaluation of the semantic action is aborted, so that the current reduction is abandoned; the current lookahead token is discarded and replaced with the \error token; and error handling mode is entered. Note that there is no mechanism for inserting an \error token \emph{in front of} the current lookahead token, even though this might also be desirable. It is unclear whether this keyword is useful; it might be suppressed in the future. % ------------------------------------------------------------------------------ \section{Error handling: the new way} \label{sec:errors:new} \menhir's incremental API (\sref{sec:incremental}) allows taking control when an error is detected. Indeed, as soon as an invalid token is detected, the parser produces a checkpoint of the form \verb+HandlingError _+. At this point, if one decides to let the parser proceed, by just calling \verb+resume+, then \menhir enters its traditional error handling mode (\sref{sec:errors}). Instead, however, one can decide to take control and perform error handling or error recovery in any way one pleases. One can, for instance, build and display a diagnostic message, based on the automaton's current stack and/or state. Or, one could modify the input stream, by inserting or deleting tokens, so as to suppress the error, and resume normal parsing. In principle, the possibilities are endless. An apparently simple-minded approach to error reporting, proposed by Jeffery~\citeyear{jeffery-03} and further explored by Pottier~\citeyear{pottier-reachability-cc-2016}, consists in selecting a diagnostic message (or a template for a diagnostic message) based purely on the current state of the automaton. In this approach, one determines, ahead of time, which are the ``error states'' (that is, the states in which an error can be detected), and one prepares, for each error state, a diagnostic message. Because state numbers are fragile (they change when the grammar evolves), an error state is identified not by its number, but by an input sentence that leads to it: more precisely, by an input sentence which causes an error to be detected in this state. Thus, one maintains a set of pairs of an erroneous input sentence and a diagnostic message. \menhir defines a file format, the \messages file format, for representing this information (\sref{sec:messages:format}), and offers a set of tools for creating, maintaining, and exploiting \messages files (\sref{sec:messages:tools}). Once one understands these tools, there remains to write a collection of diagnostic messages, a more subtle task than one might think (\sref{sec:errors:diagnostics}), and to glue everything together (\sref{sec:errors:example}). In this approach to error handling, as in any other approach, one must understand exactly when (that is, in which states) errors are detected. This in turn requires understanding how the automaton is constructed. \menhir's construction technique is not Knuth's canonical LR(1) technique~\cite{knuth-lr-65}, which is usually too expensive to be practical. Instead, \menhir \emph{merges} states~\cite{pager-77} and introduces so-called \emph{default reductions}. These techniques \emph{defer} error detection by allowing extra reductions to take place before an error is detected. % Furthermore, \menhir supports \donerrorreduce declarations, % which also introduce extra reductions. The impact of these alterations must be taken into account when writing diagnostic messages (\sref{sec:errors:diagnostics}). In this approach to error handling, the special \error token is not used. It should not appear in the grammar. Similarly, the \verb+$syntaxerror+ keyword should not be used. % ------------------------------------------------------------------------------ \subsection{The \messages file format} \label{sec:messages:format} A \messages file is a text file. Comment lines, which begin with a \verb+#+ character, are ignored everywhere. As is evident in the following description, blank lines are significant: they are used as separators between entries and within an entry. A~\messages file is composed of a list of entries. Two entries are separated by one or more blank lines. Each entry consists of one or more input sentences, followed with one or more blank lines, followed with a message. The syntax of an input sentence is described in \sref{sec:sentences}. A message is arbitrary text, but cannot contain a blank line. We stress that there cannot be a blank line between two sentences (if there is one, \menhir becomes confused and may complain about some word not being ``a known non-terminal symbol''). \begin{figure} \begin{verbatim} grammar: TYPE UID grammar: TYPE OCAMLTYPE UID PREC # A (handwritten) comment. Ill-formed declaration. Examples of well-formed declarations: %type expression %type date time \end{verbatim} \caption{An entry in a \messages file} \label{fig:messages:entry} \end{figure} \begin{figure} \begin{verbatim} grammar: TYPE UID ## ## Ends in an error in state: 1. ## ## declaration -> TYPE . OCAMLTYPE separated_nonempty_list(option(COMMA), ## strict_actual) [ TYPE TOKEN START RIGHT PUBLIC PERCENTPERCENT PARAMETER ## ON_ERROR_REDUCE NONASSOC LEFT INLINE HEADER EOF COLON ] ## ## The known suffix of the stack is as follows: ## TYPE ## grammar: TYPE OCAMLTYPE UID PREC ## ## Ends in an error in state: 5. ## ## strict_actual -> symbol . loption(delimited(LPAREN,separated_nonempty_list ## (COMMA,strict_actual),RPAREN)) [ UID TYPE TOKEN START STAR RIGHT QUESTION ## PUBLIC PLUS PERCENTPERCENT PARAMETER ON_ERROR_REDUCE NONASSOC LID LEFT ## INLINE HEADER EOF COMMA COLON ] ## ## The known suffix of the stack is as follows: ## symbol ## # A (handwritten) comment. Ill-formed declaration. Examples of well-formed declarations: %type expression %type date time \end{verbatim} \caption{An entry in a \messages file, decorated with auto-generated comments} \label{fig:messages:entry:decorated} \end{figure} As an example, \fref{fig:messages:entry} shows a valid entry, taken from \menhir's own \messages file. This entry contains two input sentences, which lead to errors in two distinct states. A single message is associated with these two error states. Several commands, described next (\sref{sec:messages:tools}), produce \messages files where each input sentence is followed with an auto-generated comment, marked with \verb+##+. This special comment indicates in which state the error is detected, and is supposed to help the reader understand what it means to be in this state: What has been read so far? What is expected next? As an example, the previous entry, decorated with auto-generated comments, is shown in \fref{fig:messages:entry:decorated}. (We have manually wrapped the lines that did not fit in this document.) An auto-generated comment begins with the number of the error state that is reached via this input sentence. Then, the auto-generated comment shows the LR(1) items that compose this state, in the same format as in an \automaton file. these items offer a description of the past (that is, what has been read so far) and the future (that is, which terminal symbols are allowed next). Finally, the auto-generated comment shows what is known about the stack when the automaton is in this state. (This can be deduced from the LR(1) items, but is more readable if shown separately.) % Plus, there might be cases where the known suffix is longer than the what % the LR(1) items suggest. But I have never seen this yet. In a canonical LR(1) automaton, the LR(1) items offer an exact description of the past and future. However, in a noncanonical automaton, which is by default what \menhir produces, the situation is more subtle. The lookahead sets can be over-approximated, so the automaton can perform one or more ``spurious reductions'' before an error is detected. As a result, the LR(1) items in the error state offer a description of the future that may be both incorrect (that is, a terminal symbol that appears in a lookahead set is not necessarily a valid continuation) and incomplete (that is, a terminal symbol that does not appear in any lookahead set may nevertheless be a valid continuation). More details appear further on (\sref{sec:errors:diagnostics}). In order to attract the user's attention to this issue, if an input sentence causes one or more spurious reductions, then the auto-generated comment contains a warning about this fact. This mechanism is not completely foolproof, though, as it may be the case that one particular sentence does not cause any spurious reductions (hence, no warning appears), yet leads to an error state that can be reached via other sentences that do involve spurious reductions. % Not sure what to conclude about this issue... % ------------------------------------------------------------------------------ \subsection{Maintaining \messages files} \label{sec:messages:tools} Ideally, the set of input sentences in a \messages file should be correct (that is, every sentence causes an error on its last token), irredundant (that is, no two sentences lead to the same error state), and complete (that is, every error state is reached by some sentence). Correctness and irredundancy are checked by the command \ocompileerrors \nt{filename}, where \nt{filename} is the name of a \messages file. This command fails if a sentence does not cause an error at all, or causes an error too early. It also fails if two sentences lead to the same error state. % If the file is correct and irredundant, then (as its name suggests) this command compiles the \messages file down to an OCaml function, whose code is printed on the standard output channel. This function, named \verb+message+, has type \verb+int -> string+, and maps a state number to a message. It raises the exception \verb+Not_found+ if its argument is not the number of a state for which a message has been defined. Completeness is checked via the commands \olisterrors and \ocompareerrors. The former produces, from scratch, a complete set of input sentences, that is, a set of input sentences that reaches all error states. The latter compares two sets of sentences (more precisely, the two underlying sets of error states) for inclusion. The command \olisterrors first computes all possible ways of causing an error. From this information, it deduces a list of all error states, that is, all states where an error can be detected. For each of these states, it computes a (minimal) input sentence that causes an error in this state. Finally, it prints these sentences, in the \messages file format, on the standard output channel. Each sentence is followed with an auto-generated comment and with a dummy diagnostic message. The user should be warned that this algorithm may require large amounts of time (typically in the tens of seconds, possibly more) and memory (typically in the gigabytes, possibly more). It requires a 64-bit machine. (On a 32-bit machine, it works, but quickly hits a built-in size limit.) At the verbosity level \ologautomaton~\texttt{2}, it displays some progress information and internal statistics on the standard error channel. The command \ocompareerrors \nt{filename1} \ocompareerrors \nt{filename2} compares the \messages files \nt{filename1} and \nt{filename2}. Each file is read and internally translated to a mapping of states to messages. \menhir then checks that the left-hand mapping is a subset of the right-hand mapping. That is, if a state~$s$ is reached by some sentence in \nt{filename1}, then it should also be reached by some sentence in \nt{filename2}. Furthermore, if the message associated with $s$ in \nt{filename1} is not a dummy message, then the same message should be associated with $s$ in \nt{filename2}. To check that the sentences in \nt{filename2} cover all error states, it suffices to (1)~use \olisterrors to produce a complete set of sentences, which one stores in \nt{filename1}, then (2)~use \ocompareerrors to compare \nt{filename1} and \nt{filename2}. The command \oupdateerrors \nt{filename} is used to update the auto-generated comments in the \messages file \nt{filename}. It is typically used after a change in the grammar (or in the command line options that affect the construction of the automaton). A new \messages file is produced on the standard output channel. It is identical to \nt{filename}, except the auto-generated comments, identified by \verb+##+, have been removed and re-generated. The command \oechoerrors \nt{filename} is used to filter out all comments, blank lines, and messages from the \messages file \nt{filename}. The input sentences, and nothing else, are echoed on the standard output channel. As an example application, one could then translate the sentences to concrete syntax and create a collection of source files that trigger every possible syntax error. The command \ointerpreterror is analogous to \ointerpret. It causes \menhir to act as an interpreter. \menhir reads sentences off the standard input channel, parses them, and displays the outcome. This switch can be usefully combined with \otrace. The main difference between \ointerpret and \ointerpreterror is that, when the latter command is used, \menhir expects the input sentence to cause an error on its last token, and displays information about the state in which the error is detected, in the form of a \messages file entry. This can be used to quickly find out exactly what error is caused by one particular input sentence. % ------------------------------------------------------------------------------ \subsection{Writing accurate diagnostic messages} \label{sec:errors:diagnostics} One might think that writing a diagnostic message for each error state is a straightforward (if lengthy) task. In reality, it is not so simple. % Here are a few guidelines. % The reader is referred to Pottier's % paper~\citeyear{pottier-reachability-cc-2016} for more details. \paragraph{A state, not a sentence} The first thing to keep in mind is that a diagnostic message is associated with a \emph{state}~$s$, as opposed to a sentence. An entry in a \messages file contains a sentence~$w$ that leads to an error in state~$s$. This sentence is just one way of causing an error in state~$s$; there may exist many other sentences that also cause an error in this state. The diagnostic message should not be specific of the sentence~$w$: it should make sense regardless of how the state~$s$ is reached. As a rule of thumb, when writing a diagnostic message, one should (as much as possible) ignore the example sentence~$w$ altogether, and concentrate on the description of the state~$s$, which appears as part of the auto-generated comment. The LR(1) items that compose the state~$s$ offer a description of the past (that is, what has been read so far) and the future (that is, which terminal symbols are allowed next). A diagnostic message should be designed based on this description. \begin{figure} \verbatiminput{declarations.mly} \caption{A grammar where one error state is difficult to explain} \label{fig:declarations} \end{figure} \begin{figure} \begin{verbatim} program: ID COLON ID LPAREN ## ## Ends in an error in state: 8. ## ## typ1 -> typ0 . [ SEMICOLON RPAREN ] ## typ1 -> typ0 . ARROW typ1 [ SEMICOLON RPAREN ] ## ## The known suffix of the stack is as follows: ## typ0 ## \end{verbatim} \caption{A problematic error state in the grammar of \fref{fig:declarations}, due to over-approximation} \label{fig:declarations:over} \end{figure} \paragraph{The problem of over-approximated lookahead sets} As pointed out earlier (\sref{sec:messages:format}), in a noncanonical automaton, the lookahead sets in the LR(1) items can be both over- and under-approximated. One must be aware of this phenomenon, otherwise one runs the risk of writing a diagnostic message that proposes too many or too few continuations. As an example, let us consider the grammar in \fref{fig:declarations}. According to this grammar, a ``program'' is either a declaration between parentheses or a declaration followed with a semicolon. A ``declaration'' is an identifier, followed with a colon, followed with a type. A ``type'' is an identifier, a type between parentheses, or a function type in the style of OCaml. The (noncanonical) automaton produced by \menhir for this grammar has 17~states. Using \olisterrors, we find that an error can be detected in 10 of these 17~states. By manual inspection of the auto-generated comments, we find that for 9 out of these 10~states, writing an accurate diagnostic message is easy. However, one problematic state remains, namely state~8, shown in \fref{fig:declarations:over}. In this state, a (level-0) type has just been read. One valid continuation, which corresponds to the second LR(1) item in \fref{fig:declarations:over}, is to continue this type: the terminal symbol \verb+ARROW+, followed with a (level-1) type, is a valid continuation. Now, the question is, what other valid continuations are there? By examining the first LR(1) item in \fref{fig:declarations:over}, it may look as if both \verb+SEMICOLON+ and \verb+RPAREN+ are valid continuations. However, this cannot be the case. A moment's thought reveals that \emph{either} we have seen an opening parenthesis \verb+LPAREN+ at the very beginning of the program, in which case we definitely expect a closing parenthesis \verb+RPAREN+; \emph{or} we have not seen one, in which case we definitely expect a semicolon \verb+SEMICOLON+. It is \emph{never} the case that \emph{both} \verb+SEMICOLON+ and \verb+RPAREN+ are valid continuations! In fact, the lookahead set in the first LR(1) item in \fref{fig:declarations:over} is over-approximated. State~8 in the noncanonical automaton results from merging two states in the canonical automaton. In such a situation, one cannot write an accurate diagnostic message. % by lack of ``static context''. Knowing that the automaton is in state~8 does not give us a precise view of the valid continuations. Some valuable information (that is, whether we have seen an opening parenthesis \verb+LPAREN+ at the very beginning of the program) is buried in the automaton's stack. \begin{figure} \verbatiminput{declarations-phantom.mly} \caption{Splitting the problematic state of \fref{fig:declarations:over} via selective duplication} \label{fig:declarations:phantom} \end{figure} \begin{figure} \verbatiminput{declarations-onerrorreduce.mly} \caption{Avoiding the problematic state of \fref{fig:declarations:over} via reductions on error} \label{fig:declarations:onerrorreduce} \end{figure} \begin{figure} \begin{verbatim} program: ID COLON ID LPAREN ## ## Ends in an error in state: 15. ## ## program -> declaration . SEMICOLON [ # ] ## ## The known suffix of the stack is as follows: ## declaration ## ## WARNING: This example involves spurious reductions. ## This implies that, although the LR(1) items shown above provide an ## accurate view of the past (what has been recognized so far), they ## may provide an INCOMPLETE view of the future (what was expected next). ## In state 8, spurious reduction of production typ1 -> typ0 ## In state 11, spurious reduction of production declaration -> ID COLON typ1 ## \end{verbatim} \caption{A problematic error state in the grammar of \fref{fig:declarations:onerrorreduce}, due to under-approximation} \label{fig:declarations:under} \end{figure} How can one work around this problem? Let us suggest three options. \paragraph{Blind duplication of states} One option would be to build a canonical automaton by using the % (undocumented!) \ocanonical switch. In this example, one would obtain a 27-state automaton, where the problem has disappeared. However, this option is rarely viable, as it duplicates many states without good reason. \paragraph{Selective duplication of states} A second option is to manually cause just enough duplication to remove the problematic over-approximation. In our example, we wish to distinguish two kinds of types and declarations, namely those that must be followed with a closing parenthesis, and those that must be followed with a semicolon. We create such a distinction by parameterizing \verb+typ1+ and \verb+declaration+ with a phantom parameter. The modified grammar is shown in \fref{fig:declarations:phantom}. The phantom parameter does not affect the language that is accepted: for instance, the nonterminal symbols \texttt{declaration(SEMICOLON)} and \texttt{declaration(RPAREN)} generate the same language as \texttt{declaration} in the grammar of \fref{fig:declarations}. Yet, by giving distinct names to these two symbols, we force the construction of an automaton where more states are distinguished. In this example, \menhir produces a 23-state automaton. Using \olisterrors, we find that an error can be detected in 11 of these 23~states, and by manual inspection of the auto-generated comments, we find that for each of these 11~states, writing an accurate diagnostic message is easy. In summary, we have selectively duplicated just enough states so as to split the problematic error state into two non-problematic error states. % Je me demande s'il n'y a pas un lien avec la traduction de LR(k+1) vers LR(k)... % On voit que le FOLLOW est intégré au symbole nonterminal. \paragraph{Reductions on error} A third and last option is to introduce an \donerrorreduce declaration (\sref{sec:onerrorreduce}) so as to prevent the detection of an error in the problematic state~8. We see in \fref{fig:declarations:over} that, in state~8, the production $\texttt{typ1} \rightarrow \texttt{typ0}$ is ready to be reduced. If we could force this reduction to take place, then the automaton would move to some other state where it would be clear which of \verb+SEMICOLON+ and \verb+RPAREN+ is expected. We achieve this by marking \verb+typ1+ as ``reducible on error''. The modified grammar is shown in \fref{fig:declarations:onerrorreduce}. For this grammar, \menhir produces a 17-state automaton. (This is the exact same automaton as for the grammar of \fref{fig:declarations}, except 2 of the 17 states have received extra reduction actions.) Using \olisterrors, we find that an error can be detected in 9 of these~17 states. The problematic state, namely state~8, is no longer an error state! The problem has vanished. \paragraph{The problem of under-approximated lookahead sets} The third option seems by far the simplest of all, and is recommended in many situations. However, it comes with a caveat. There may now exist states whose lookahead sets are under-approximated, in a certain sense. Because of this, there is a danger of writing an incomplete diagnostic message, one that does not list all valid continuations. To see this, let us look again at the sentence \texttt{ID COLON ID LPAREN}. In the grammar and automaton of \fref{fig:declarations}, this sentence takes us to the problematic state~8, shown in \fref{fig:declarations:over}. In the grammar and automaton of \fref{fig:declarations:onerrorreduce}, because more reduction actions are carried out before the error is detected, this sentence takes us to state~15, shown in \fref{fig:declarations:under}. When writing a diagnostic message for state~15, one might be tempted to write: ``Up to this point, a declaration has been recognized. At this point, a semicolon is expected''. Indeed, by examining the sole LR(1) item in state~15, it looks as if \verb+SEMICOLON+ is the only permitted continuation. However, this is not the case. Another valid continuation is \verb+ARROW+: indeed, the sentence \texttt{ID COLON ID ARROW ID SEMICOLON} forms a valid program. In fact, if the first token following \texttt{ID COLON ID} is \texttt{ARROW}, then in state~8 this token is shifted, so the two reductions that take us from state~8 through state~11 to state~15 never take place. This is why, even though \texttt{ARROW} does not appear in state~15 as a valid continuation, it nevertheless is a valid continuation of \texttt{ID COLON ID}. The warning produced by \menhir, shown in \fref{fig:declarations:under}, is supposed to attract attention to this issue. Another way to explain this issue is to point out that, by declaring \verb+%on_error_reduce typ1+, we make a choice. When the parser reads a type and finds an invalid token, it decides that this type is finished, even though, in reality, this type could be continued with \verb+ARROW+ \ldots. This in turn causes the parser to perform another reduction and consider the current declaration finished, even though, in reality, this declaration could be continued with \verb+ARROW+ \ldots. In summary, when writing a diagnostic message for state~15, one should take into account the fact that this state can be reached via spurious reductions and (therefore) \verb+SEMICOLON+ may not be the only permitted continuation. One way of doing this, without explicitly listing all permitted continuations, is to write: ``Up to this point, a declaration has been recognized. If this declaration is complete, then at this point, a semicolon is expected''. % ------------------------------------------------------------------------------ \subsection{A working example} \label{sec:errors:example} The CompCert verified compiler offers a real-world example of this approach to error handling. The ``pre-parser'' is where syntax errors are detected: see \compcertgithubfile{cparser/pre\_parser.mly}. % (The pre-parser is also in charge of distinguishing type names versus variable % names, but that is an independent issue.) A database of erroneous input sentences and (templates for) diagnostic messages is stored in \compcertgithubfile{cparser/handcrafted.messages}. It is compiled, using \ocompileerrors, to an OCaml file named \texttt{cparser/pre\_parser\_messages.ml}. The function \verb+Pre_parser_messages.message+, which maps a state number to (a template for) a diagnostic message, is called from \compcertgithubfile{cparser/ErrorReports.ml}, where we construct and display a full-fledged diagnostic message. In CompCert, we allow a template for a diagnostic message to contain the special form \verb+$i+, where \verb+i+ is an integer constant, understood as an index into the parser's stack. The code in \compcertgithubfile{cparser/ErrorReports.ml} automatically replaces this special form with the fragment of the source text that corresponds to this stack entry. This mechanism is not built into \menhir; it is implemented in CompCert using \menhir's incremental API. % ------------------------------------------------------------------------------ \section{Coq back-end} \label{sec:coq} \menhir is able to generate a parser that whose correctness can be formally verified using the Coq proof assistant~\cite{jourdan-leroy-pottier-12}. This feature is used to construct the parser of the CompCert verified compiler~\cite{compcert}. Setting the \ocoq switch on the command line enables the Coq back-end. When this switch is set, \menhir expects an input file whose name ends in \vy and generates a Coq file whose name ends in \texttt{.v}. Like a \mly file, a \vy file is a grammar specification, with embedded semantic actions. The only difference is that the semantic actions in a \vy file are expressed in Coq instead of \ocaml. A \vy file otherwise uses the same syntax as a \mly file. CompCert's \compcertgithubfile{cparser/Parser.vy} serves as an example. Several restrictions are imposed when \menhir is used in \ocoq mode: % \begin{itemize} \item The error handling mechanism (\sref{sec:errors}) is absent. The \verb+$syntaxerror+ keyword and the \error token are not supported. \item Location information is not propagated. The \verb+$start*+ and \verb+$end*+ keywords (\fref{fig:pos}) are not supported. \item \dparameter (\sref{sec:parameter}) is not supported. \item \dinline (\sref{sec:inline}) is not supported. \item The standard library (\sref{sec:library}) is not supported, of course, because its semantic actions are expressed in \ocaml. If desired, the user can define an analogous library, whose semantic actions are expressed in Coq. \item Because Coq's type inference algorithm is rather unpredictable, the Coq type of every nonterminal symbol must be provided via a \dtype or \dstart declaration (\sref{sec:type}, \sref{sec:start}). \item Unless the proof of completeness has been deactivated using \ocoqnocomplete, the grammar must not have a conflict (not even a benign one, in the sense of \sref{sec:conflicts:benign}). That is, the grammar must be LR(1). Conflict resolution via priority and associativity declarations (\sref{sec:assoc}) is not supported. The reason is that there is no simple formal specification of how conflict resolution should work. \end{itemize} The generated file contains several modules: \begin{itemize} \item The module \verb+Gram+ defines the terminal and non-terminal symbols, the grammar, and the semantic actions. \item The module \verb+Aut+ contains the automaton generated by \menhir, together with a certificate that is checked by Coq while establishing the soundness and completeness of the parser. \end{itemize} The type~\verb+terminal+ of the terminal symbols is an inductive type, with one constructor for each terminal symbol. A terminal symbol named \verb+Foo+ in the \verb+.vy+ file is named \verb+Foo't+ in Coq. A~terminal symbol per se does not carry a the semantic value. We also define the type \verb+token+ of tokens, that is, dependent pairs of a terminal symbol and a semantic value of an appropriate type for this symbol. We model the lexer as an object of type \verb+Streams.Stream token+, that is, an infinite stream of tokens. % TEMPORARY documenter que du coup, après extraction, la seule façon pour un % lexer OCaml de produire des tokens, c'est d'utiliser Obj.magic % cf. la fonction compute_token_stream dans le Lexer.mll de Compcert: % Cons (Coq_existT (t, Obj.magic v), Lazy.from_fun compute_token_stream) The type~\verb+nonterminal+ of the non-terminal symbols is an inductive type, with one constructor for each non-terminal symbol. A non-terminal symbol named \verb+Bar+ in the \verb+.vy+ file is named \verb+Bar'nt+ in Coq. The proof of termination of an LR(1) parser in the case of invalid input seems far from obvious. We did not find such a proof in the literature. In an application such as CompCert~\cite{compcert}, this question is not considered crucial. For this reason, we did not formally establish the termination of the parser. Instead, we use the ``fuel'' technique. The parser takes an additional parameter of type \verb+nat+ that indicates the maximum number of steps the parser is allowed to perform. In practice, after extracting the code to \ocaml, one can use the standard trick of passing an infinite amount of fuel, defined in \ocaml by \verb+let rec inf = S inf+. Parsing can have three different outcomes, represented by the type \verb+parse_result+. % (This definition is implicitly parameterized over the initial state~\verb+init+. We omit the details here.) % \begin{verbatim} Inductive parse_result := | Fail_pr: parse_result | Timeout_pr: parse_result | Parsed_pr: symbol_semantic_type (NT (start_nt init)) -> Stream token -> parse_result. \end{verbatim} The outcome \verb+Fail_pr+ means that parsing has failed because of a syntax error. (If the completeness of the parser with respect to the grammar has been proved, this implies that the input is invalid). The outcome \verb+Timeout_pr+ means that the fuel has been exhausted. Of course, this cannot happen if the parser was given an infinite amount of fuel, as suggested above. The outcome \verb+Parsed_pr+ means that the parser has succeeded in parsing a prefix of the input stream. It carries the semantic value that has been constructed for this prefix, as well as the remainder of the input stream. For each entry point \verb+entry+ of the grammar, \menhir generates a parsing function \verb+entry+, whose type is \verb+nat -> Stream token -> parse_result+. % jh: Je suis un peu embêté, parce que init est % en réalité de type initstate, mais je n'ai pas envie d'en parler % dans la doc. Tout ce qui importe, c'est que le premier paramètre de % Parsed_pr a un type compatible avec le type que l'utilisateur a % donné. Two theorems are provided, named \verb+entry_point_correct+ and \verb+entry_point_complete+. The correctness theorem states that, if a word (a prefix of the input stream) is accepted, then this word is valid (with respect to the grammar) and the semantic value that is constructed by the parser is valid as well (with respect to the grammar). The completeness theorem states that if a word (a prefix of the input stream) is valid (with respect to the grammar), then (given sufficient fuel) it is accepted by the parser. These results imply that the grammar is unambiguous: for every input, there is at most one valid interpretation. This is proved by another generated theorem, named \verb+Parser.unambiguous+. % jh: Pas besoin de prouver la terminaison pour avoir la non-ambiguïté, car % les cas de non-terminaison ne concernent que les entrées invalides. % fp: bien vu! % fp: ce serait intéressant d'avoir un certificat comme quoi la grammaire est % bien LR(1), mais peut-être qu'on s'en fout. C'est bien de savoir qu'elle % est non-ambiguë. % jh: Je ne sais pas ce que c'est qu'un certificat comme quoi la grammaire % est LR(1), en pratique... % fp: Ce serait une preuve d'un théorème, exprimé uniquement en termes de % la grammaire, comme quoi la grammaire est LR(1). Il y a une définition % de cette propriété dans le textbook de Aho et Ullman, si je me rappelle % bien. Mais peu importe. % fp: On pourrait aussi souhaiter un théorème comme quoi le parser ne lit % pas le stream trop loin... % jh: pour vraiment prouver cela, il faudrait inverser le % controle. Sinon, comme résultat un peu moins fort, dans la version % actuelle, on renvoie le stream restant, et on prouve qu'il % correspond bien à la fin du Stream. The parsers produced by \menhir's Coq back-end must be linked with a Coq library, which can be found in the CompCert tree~\cite{compcert,compcert-github}, in the \compcertgithubfile{cparser/validator} subdirectory. CompCert can be used as an example if one wishes to use \menhir to generate a formally verified parser as part of some other project. % fp: ce pourrait être bien de documenter les directives nécessaires pour % extraire du code efficace. D'après Xavier ce morceau de extraction.v % est pertinent: \begin{comment} (* Int31 *) Extract Inductive Int31.digits => "bool" [ "false" "true" ]. Extract Inductive Int31.int31 => "int" [ "Camlcoq.Int31.constr" ] "Camlcoq.Int31.destr". Extract Constant Int31.twice => "Camlcoq.Int31.twice". Extract Constant Int31.twice_plus_one => "Camlcoq.Int31.twice_plus_one". Extract Constant Int31.compare31 => "Camlcoq.Int31.compare". Extract Constant Int31.On => "0". Extract Constant Int31.In => "1". \end{comment} % Peut-être en faire aussi un fichier de librairie? % ------------------------------------------------------------------------------ \section{Building grammarware on top of Menhir} \label{sec:grammarware} It is possible to build a variety of grammar-processing tools, also known as ``grammarware''~\cite{klint-laemmel-verhoef-05}, on top of Menhir's front-end. Indeed, Menhir offers a facility for dumping a \cmly file, which contains a (binary-form) representation of the grammar and automaton, as well as a library, \menhirsdk, for (programmatically) reading and exploiting a \cmly file. These facilities are described in \sref{sec:sdk}. % Furthermore, Menhir allows decorating a grammar with ``attributes'', which are ignored by Menhir's back-ends, yet are written to the \cmly file, thus can be exploited by other tools, via \menhirsdk. % Attributes are described in \sref{sec:attributes}. \subsection{Menhir's SDK} \label{sec:sdk} The command line option \ocmly causes Menhir to produce a \cmly file in addition to its normal operation. This file contains a (binary-form) representation of the grammar and automaton. This is the grammar that is obtained after the following steps have been carried out: \begin{itemize} \item joining multiple \mly files, if necessary; % in fact, always (due to standard.mly) \item eliminating anonymous rules; \item expanding away parameterized nonterminal symbols; \item removing unreachable nonterminal symbols; \item performing OCaml type inference, if the \oinfer switch is used; \item inlining away nonterminal symbols that are decorated with \dinline. \end{itemize} The library \menhirsdk offers an API for reading a \cmly file. The functor \repo{src/cmly_read.mli}{\texttt{MenhirSdk.Cmly\_read.Read}} reads such a file and produces a module whose signature is \repo{src/cmly_api.ml}{\texttt{MenhirSdk.Cmly\_api.GRAMMAR}}. This API is not explained in this document; for details, the reader is expected to follow the above links. % TEMPORARY mention the demo generate-printers % as an example of both the SDK and attributes % (possibly make it an independent package) \subsection{Attributes} \label{sec:attributes} Attributes are decorations that can be placed in \mly files. They are ignored by Menhir's back-ends, but are written to \cmly files, thus can be exploited by other tools, via \menhirsdk. An attribute consists of a name and a payload. An attribute name is an OCaml identifier, such as \texttt{cost}, or a list of OCaml identifiers, separated with dots, such as \texttt{my.name}. An attribute payload is an OCaml expression of arbitrary type, such as \texttt{1} or \verb+"&&"+ or \verb+print_int+. Following the syntax of OCaml's attributes, an attribute's name and payload are separated with one or more spaces, and are delimited by \verb+[@+ and \verb+]+. Thus, \verb+[@cost 1]+ and \verb+[@printer print_int]+ are examples of attributes. An attribute can be attached at one of four levels: % grammar-level attributes, %[@foo ...] % terminal attribute, %token BAR [@foo ...] % nonterminal attribute, bar [@foo ...]: ... % producer attribute, e = expr [@foo ...] \begin{enumerate} \item An attribute can be attached with the grammar. Such an attribute must be preceded with a \verb+%+ sign and must appear in the declarations section (\sref{sec:decls}). For example, the following is a valid declaration: \begin{verbatim} %[@trace true] \end{verbatim} \item An attribute can be attached with a terminal symbol. Such an attribute must follow the declaration of this symbol. For example, the following is a valid declaration of the terminal symbol \verb+INT+: \begin{verbatim} %token INT [@cost 0] [@printer print_int] \end{verbatim} \item An attribute can be attached with a nonterminal symbol. Such an attribute must appear inside the rule that defines this symbol, immediately after the name of this symbol. For instance, the following is a valid definition of the nonterminal symbol \verb+expr+: \begin{verbatim} expr [@default EConst 0]: i = INT { EConst i } | e1 = expr PLUS e2 = expr { EAdd (e1, e2) } \end{verbatim} An attribute can be attached with a parameterized nonterminal symbol: \begin{verbatim} option [@default None] (X): { None } | x = X { Some x } \end{verbatim} An attribute cannot be attached with a nonterminal symbol that is decorated with the \dinline keyword. \item An attribute can be attached with a producer (\sref{sec:producers}), that is, with an occurrence of a terminal or nonterminal symbol in the right-hand side of a production. Such an attribute must appear immediately after the producer. For instance, in the following rule, an attribute is attached with the producer \verb+expr*+: \begin{verbatim} exprs: LPAREN es = expr* [@list true] RPAREN { es } \end{verbatim} \end{enumerate} % %attribute declarations: As a convenience, it is possible to attach many attributes with many (terminal and nonterminal) symbols in one go, via an \dattribute declaration, which must be placed in the declarations section (\sref{sec:decls}). For instance, the following declaration attaches both of the attributes \verb+[@cost 0]+ and \verb+[@precious false]+ with each of the symbols \verb+INT+ and \verb+id+: \begin{verbatim} %attribute INT id [@cost 0] [@precious false] \end{verbatim} An \dattribute declaration can be considered syntactic sugar: it is desugared away in terms of the four forms of attributes presented earlier. (The command line switch \oonlypreprocess can be used to see how it is desugared.) % Interaction of %attribute declarations and parameterized nonterminals: If an attribute is attached with a parameterized nonterminal symbol, then, when this symbol is expanded away, the attribute is transmitted to every instance. For instance, in an earlier example, the attribute \verb+[@default None]+ was attached with the parameterized symbol \verb+option+. Then, every instance of \verb+option+, such as \verb+option(expr)+, \verb+option(COMMA)+, and so on, inherits this attribute. To attach an attribute with one specific instance only, one can use an \dattribute declaration. For instance, the declaration \verb+%attribute option(expr) [@cost 10]+ attaches an attribute with the nonterminal symbol \verb+option(expr)+, but not with the symbol \verb+option(COMMA)+. % ------------------------------------------------------------------------------ \section{Comparison with \ocamlyacc} % TEMPORARY idéalement, il faudrait documenter la différence de comportement % sur les réductions par défaut (sur des symboles autres que #). Here is an incomplete list of the differences between \ocamlyacc and \menhir. The list is roughly sorted by decreasing order of importance. \begin{itemize} \item \menhir allows the definition of a nonterminal symbol to be parameterized by other (terminal or nonterminal) symbols (\sref{sec:templates}). Furthermore, it offers a library of standard parameterized definitions (\sref{sec:library}), including options, sequences, and lists. It offers some support for EBNF syntax, via the \dquestion, \dplus, and \dstar modifiers. \item \ocamlyacc only accepts LALR(1) grammars. \menhir accepts LR(1) grammars, thus avoiding certain artificial conflicts. \item \menhir's \dinline keyword (\sref{sec:inline}) helps avoid or resolve some LR(1) conflicts without artificial modification of the grammar. \item \menhir explains conflicts (\sref{sec:conflicts}) in terms of the grammar, not just in terms of the automaton. \menhir's explanations are believed to be understandable by mere humans. \item \menhir offers an incremental API (in \otable mode only) (\sref{sec:incremental}). This means that the state of the parser can be saved at any point (at no cost) and that parsing can later be resumed from a saved state. \item In \ocoq mode, \menhir produces a parser whose correctness and completeness with respect to the grammar can be checked by Coq (\sref{sec:coq}). \item \menhir offers an interpreter (\sref{sec:interpret}) that helps debug grammars interactively. \item \menhir allows grammar specifications to be split over multiple files (\sref{sec:split}). It also allows several grammars to share a single set of tokens. \item \menhir produces reentrant parsers. \item \menhir is able to produce parsers that are parameterized by \ocaml modules. \item \ocamlyacc requires semantic values to be referred to via keywords: \verb+$1+, \verb+$2+, and so on. \menhir allows semantic values to be explicitly named. \item \menhir warns about end-of-stream conflicts (\sref{sec:eos}), whereas \ocamlyacc does not. \menhir warns about productions that are never reduced, whereas, at least in some cases, \ocamlyacc does not. \item \menhir offers an option to typecheck semantic actions \emph{before} a parser is generated: see \oinfer. \item \ocamlyacc produces tables that are interpreted by a piece of C code, requiring semantic actions to be encapsulated as \ocaml closures and invoked by C code. \menhir offers a choice between producing tables and producing code. In either case, no C code is involved. \item \menhir makes \ocaml's standard library module \texttt{Parsing} entirely obsolete. Access to locations is now via keywords (\sref{sec:positions}). Uses of \verb+raise Parse_error+ within semantic actions are deprecated. The function \verb+parse_error+ is deprecated. They are replaced with keywords (\sref{sec:errors}). \item \menhir's error handling mechanism (\sref{sec:errors}) is inspired by \ocamlyacc's, but is not guaranteed to be fully compatible. Error recovery, also known as re-synchronization, is not supported by \menhir. \item The way in which severe conflicts (\sref{sec:conflicts}) are resolved is not guaranteed to be fully compatible with \ocamlyacc. \item \menhir warns about unused \dtoken, \dnonassoc, \dleft, and \dright declarations. It also warns about \dprec annotations that do not help resolve a conflict. \item \menhir accepts \ocaml-style comments. \item \menhir allows \dstart and \dtype declarations to be condensed. \item \menhir allows two (or more) productions to share a single semantic action. \item \menhir produces better error messages when a semantic action contains ill-balanced parentheses. % \item \ocamlyacc allows nonterminal start symbols to start with an uppercase % letter, and produces invalid \ocaml code in that case. \menhir disallows this. \item \ocamlyacc ignores semicolons and commas everywhere. \menhir also ignores semicolons everywhere, but treats commas as significant. Commas are optional within \dtoken declarations. % \item \ocamlyacc ignores multiple definitions of a token, even when two of them are at % different types. \menhir rejects this. \item \ocamlyacc allows \dtype declarations to refer to terminal or non-terminal symbols, whereas \menhir requires them to refer to non-terminal symbols. Types can be assigned to terminal symbols with a \dtoken declaration. \end{itemize} % ------------------------------------------------------------------------------ \section{Questions and Answers} \label{sec:qa} $\mathstrut$ % Ensure correct indentation of the first question. Ugly. \vspace{-\baselineskip} \question{Is \menhir faster than \ocamlyacc? What is the speed difference between \texttt{menhir} and \texttt{menhir -{}-table}?} A (not quite scientific) benchmark suggests that the parsers produced by \ocamlyacc and \texttt{menhir -{}-table} have comparable speed, whereas those produced by \texttt{menhir} are between 2 and 5 times faster. This benchmark excludes the time spent in the lexer and in the semantic actions. \question{How do I write \Makefile rules for Menhir?} This can be quite difficult, especially when \oinfer is used. Look at \distrib{demos/obsolete/Makefile.shared} or (better) use \ocamlbuild, which has built-in compilation rules for \ocaml and \menhir. % TEMPORARY document the use of \ocamlbuild (and point to the demos) % basic scenario: ocamlbuild -use-menhir % what to add for the table back-end? (--table and -package menhirLib) % or use the --suggest options? % advanced scenario: multi-module, use .mlypack % advanced scenario: --only-tokens and -external-tokens, use .mlypack + _tags + myocamlbuild.ml \question{\menhir reports \emph{more} shift/reduce conflicts than \ocamlyacc! How come?} \ocamlyacc sometimes merges two states of the automaton that \menhir considers distinct. This happens when the grammar is not LALR(1). If these two states happen to contain a shift/reduce conflict, then \menhir reports two conflicts, while \ocamlyacc only reports one. Of course, the two conflicts are very similar, so fixing one will usually fix the other as well. \question{I do not use \ocamllex. Is there an API that does not involve lexing buffers?} Like \ocamlyacc, \menhir produces parsers whose monolithic API (\sref{sec:monolithic}) is intended for use with \ocamllex. However, it is possible to convert them, after the fact, to a simpler, revised API. In the revised API, there are no lexing buffers, and a lexer is just a function from unit to tokens. Converters are provided by the library module \menhirlibconvert. This can be useful, for instance, for users of \texttt{ulex}, the Unicode lexer generator. Also, please note that \menhir's incremental API (\sref{sec:incremental}) does not mention the type \verb+Lexing.lexbuf+. In this API, the parser expects to be supplied with triples of a token and start/end positions of type \verb+Lexing.position+. \question{I need both \dinline and non-\dinline versions of a non-terminal symbol. Is this possible?} Define an \dinline version first, then use it to define a non-\dinline version, like this: \begin{verbatim} %inline ioption(X): (* nothing *) { None } | x = X { Some x } option(X): o = ioption(X) { o } \end{verbatim} This can work even in the presence of recursion, as illustrated by the following definition of (reversed, left-recursive, possibly empty) lists: \begin{verbatim} %inline irevlist(X): (* nothing *) { [] } | xs = revlist(X) x = X { x :: xs } revlist(X): xs = irevlist(X) { xs } \end{verbatim} The definition of \verb+irevlist+ is expanded into the definition of \verb+revlist+, so in the end, \verb+revlist+ receives its normal, recursive definition. One can then view \verb+irevlist+ as a variant of \verb+revlist+ that is inlined one level deep. % Intentionally do not call this "list", because people may copy-paste this % definition, and will end up unintentionally redefining the meaning of *. \question{Can I ship a generated parser while avoiding a dependency on \menhirlib?} Yes. One option is to use the code-based back-end (that is, to not use \otable). In this case, the generated parser is self-contained. Another option is to use the table-based back-end (that is, use \otable) and include a copy of the files \verb+menhirLib.{ml,mli}+ together with the generated parser. The command \texttt{menhir \osuggestmenhirlib} will tell you where to find these source files. \question{Why is \texttt{\$startpos} off towards the left? It seems to include some leading whitespace.} Indeed, as of 2015/11/04, the computation of positions has changed so as to match \ocamlyacc's behavior. As a result, \texttt{\$startpos} can now appear to be too far off to the left. This is explained in \sref{sec:positions}. In short, the solution is to use \verb+$symbolstartpos+ instead. \question{Can I pretty-print a grammar in ASCII, HTML, or \LaTeX{} format?} Yes. Have a look at \texttt{obelisk} \cite{obelisk}. % ------------------------------------------------------------------------------ \section{Technical background} After experimenting with Knuth's canonical LR(1) technique~\cite{knuth-lr-65}, we found that it \emph{really} is not practical, even on today's computers. For this reason, \menhir implements a slightly modified version of Pager's algorithm~\cite{pager-77}, which merges states on the fly if it can be proved that no reduce/reduce conflicts will arise as a consequence of this decision. This is how \menhir avoids the so-called \emph{mysterious} conflicts created by LALR(1) parser generators~\cite[section 5.7]{bison}. \menhir's algorithm for explaining conflicts is inspired by DeRemer and Pennello's~\cite{deremer-pennello-82} and adapted for use with Pager's construction technique. By default, \menhir produces code, as opposed to tables. This approach has been explored before~\cite{bhamidipaty-proebsting-98,horspool-faster-90}. \menhir performs some static analysis of the automaton in order to produce more compact code. When asked to produce tables, \menhir performs compression via first-fit row displacement, as described by Tarjan and Yao~\cite{tarjan-yao-79}. Double displacement is not used. The action table is made sparse by factoring out an error matrix, as suggested by Dencker, Dürre, and Heuft~\cite{dencker-84}. The type-theoretic tricks that triggered our interest in LR parsers~\cite{pottier-regis-gianas-typed-lr} are not implemented in \menhir. In the beginning, we did not implement them because the \ocaml compiler did not at the time offer generalized algebraic data types (GADTs). Today, \ocaml has GADTs, but, as the saying goes, ``if it ain't broken, don't fix it''. The main ideas behind the Coq back-end are described in a paper by Jourdan, Pottier and Leroy~\cite{jourdan-leroy-pottier-12}. The approach to error reports presented in \sref{sec:errors:new} was proposed by Jeffery~\citeyear{jeffery-03} and further explored by Pottier~\citeyear{pottier-reachability-cc-2016}. % ------------------------------------------------------------------------------ \section{Acknowledgements} \menhir's interpreter (\ointerpret) and table-based back-end (\otable) were implemented by Guillaume Bau, Raja Boujbel, and François Pottier. The project was generously funded by Jane Street Capital, LLC through the ``OCaml Summer Project'' initiative. Frédéric Bour provided motivation and an initial implementation for the incremental API, for the inspection API, for attributes, and for \menhirsdk. \href{https://github.com/ocaml/merlin}{Merlin}, an emacs mode for OCaml, contains an impressive incremental, syntax-error-tolerant OCaml parser, which is based on Menhir and has been a driving force for Menhir's APIs. Jacques-Henri Jourdan designed and implemented the Coq back-end and did the Coq proofs for it. Gabriel Scherer provided motivation for investigating Jeffery's technique. % ------------------------------------------------------------------------------ % Bibliography. \bibliographystyle{plain} \bibliography{local} \end{document} % LocalWords: Yann Régis Gianas Regis inria Menhir filename mly basename Coq % LocalWords: coq vy tt Coq's iox Menhir's nonterminal graphviz nullable calc % LocalWords: inline postprocessed postprocessing ocamlc bytecode linkpkg cmo % LocalWords: menhirLib ocamlopt cmx qa ocamlrun runtime uid productiongroups % LocalWords: prec Actuals parameterization Parameterizing ds actuals plist xs % LocalWords: loption LPAREN RPAREN Inlining inlined inlining lp ioption bool % LocalWords: boption sep nonassociative multi basicshiftreduce lookahead decl % LocalWords: UIDENT LIDENT decls tycon expr exprs basiceos basiceosdump lex % LocalWords: curr Lexing lexbuf pos cnum startpos endpos startofs endofs LALR % LocalWords: syntaxerror whitespace EOL cst API lexing MenhirInterpreter pc % LocalWords: InputNeeded HandlingError env CompCert Aut se nat init cparser % LocalWords: validator subdirectory EBNF reentrant eos typecheck menhir ulex % LocalWords: DeRemer Pennello's Tarjan Yao Dencker Dürre Heuft Bau Raja LLC % LocalWords: Acknowledgements Boujbel Frédéric Bour menhir-20171222/doc/macros.tex0000664000175000017500000001657313217215730016302 0ustar fpottierfpottier% EBNF syntax. \let\nt\textit % Nonterminal. \newcommand{\is}{& ${} ::= {}$ &} \newcommand{\optional}[1]{$[\,\text{#1}\,]$} % Option. \newcommand{\seplist}[2]{#2#1${}\ldots{}$#1#2} \newcommand{\sepspacelist}[1]{\seplist{\ }{#1}} \newcommand{\sepcommalist}[1]{\seplist{,\ }{#1}} \newcommand{\newprod}{\\\hskip 1cm\barre\hskip2mm} \newcommand{\phaprod}{\\\hskip 1cm\phantom\barre\hskip2mm} % Concrete syntax. \newcommand{\percentpercent}{\kw{\%\%}\xspace} \newcommand{\deuxpoints}{\kw{:}\xspace} \newcommand{\barre}{\kw{\textbar}\xspace} \newcommand{\kangle}[1]{\kw{\textless} #1 \kw{\textgreater}} \newcommand{\ocamltype}{\kangle{\textit{\ocaml type}}\xspace} \newcommand{\ocamlparam}{\kangle{\nt{uid} \deuxpoints \textit{\ocaml module type}}\xspace} \newcommand{\dheader}[1]{\kw{\%\{} #1 \kw{\%\}}} \newcommand{\dtoken}{\kw{\%token}\xspace} \newcommand{\dstart}{\kw{\%start}\xspace} \newcommand{\dtype}{\kw{\%type}\xspace} \newcommand{\dnonassoc}{\kw{\%nonassoc}\xspace} \newcommand{\dleft}{\kw{\%left}\xspace} \newcommand{\dright}{\kw{\%right}\xspace} \newcommand{\dparameter}{\kw{\%parameter}\xspace} \newcommand{\dpublic}{\kw{\%public}\xspace} \newcommand{\dinline}{\kw{\%inline}\xspace} \newcommand{\donerrorreduce}{\kw{\%on\_error\_reduce}\xspace} \newcommand{\dattribute}{\kw{\%attribute}\xspace} \newcommand{\dpaction}[1]{\kw{\{} #1 \kw{\}}\xspace} \newcommand{\daction}{\dpaction{\textit{\ocaml code}}\xspace} \newcommand{\dprec}{\kw{\%prec}\xspace} \newcommand{\dequal}{\kw{=}\xspace} \newcommand{\dquestion}{\kw{?}\xspace} \newcommand{\dplus}{\kw{+}\xspace} \newcommand{\dstar}{\kw{*}\xspace} \newcommand{\dlpar}{\kw{(}\,\xspace} \newcommand{\drpar}{\,\kw{)}\xspace} \newcommand{\eos}{\kw{\#}\xspace} \newcommand{\dnewline}{\kw{\textbackslash n}\xspace} % Stylistic conventions. \newcommand{\kw}[1]{\text{\upshape\sf\bfseries #1}} \newcommand{\inlinesidecomment}[1]{\textit{\textbf{\footnotesize // #1}}} \newcommand{\sidecomment}[1]{\hskip 2cm\inlinesidecomment{#1}} \newcommand{\docswitch}[1]{\vspace{1mm plus 1mm}#1.\hskip 3mm} \newcommand{\error}{\kw{error}\xspace} % Links to Menhir's repository. \newcommand{\repo}[2]{\href{https://gitlab.inria.fr/fpottier/menhir/blob/master/#1}{#2}} \newcommand{\menhirlibconvert}{\repo{src/Convert.mli}{\texttt{MenhirLib.Convert}}\xspace} \newcommand{\menhirlibincrementalengine}{\repo{src/IncrementalEngine.ml}{\texttt{MenhirLib.IncrementalEngine}}\xspace} % Links to CompCert's repository. \newcommand{\compcertgithub}{https://github.com/AbsInt/CompCert/tree/master} \newcommand{\compcertgithubfile}[1]{\href{\compcertgithub/#1}{\texttt{#1}}} % Abbreviations. \newcommand{\menhir}{Menhir\xspace} \newcommand{\menhirlib}{\texttt{MenhirLib}\xspace} \newcommand{\menhirsdk}{\texttt{MenhirSdk}\xspace} \newcommand{\menhirinterpreter}{\texttt{MenhirInterpreter}\xspace} \newcommand{\cmenhir}{\texttt{menhir}\xspace} \newcommand{\ml}{\texttt{.ml}\xspace} \newcommand{\mli}{\texttt{.mli}\xspace} \newcommand{\mly}{\texttt{.mly}\xspace} \newcommand{\cmly}{\texttt{.cmly}\xspace} \newcommand{\vy}{\texttt{.vy}\xspace} \newcommand{\ocaml}{OCaml\xspace} \newcommand{\ocamlc}{\texttt{ocamlc}\xspace} \newcommand{\ocamlopt}{\texttt{ocamlopt}\xspace} \newcommand{\ocamldep}{\texttt{ocamldep}\xspace} \newcommand{\ocamlfind}{\texttt{ocamlfind}\xspace} \newcommand{\make}{\texttt{make}\xspace} \newcommand{\omake}{\texttt{omake}\xspace} \newcommand{\ocamlbuild}{\texttt{ocamlbuild}\xspace} \newcommand{\Makefile}{\texttt{Makefile}\xspace} \newcommand{\yacc}{\texttt{yacc}\xspace} \newcommand{\bison}{\texttt{bison}\xspace} \newcommand{\ocamlyacc}{\texttt{ocamlyacc}\xspace} \newcommand{\ocamllex}{\texttt{ocamllex}\xspace} \newcommand{\token}{\texttt{token}\xspace} \newcommand{\automaton}{\texttt{.automaton}\xspace} \newcommand{\conflicts}{\texttt{.conflicts}\xspace} \newcommand{\dott}{\texttt{.dot}\xspace} % Files in the distribution. \newcommand{\distrib}[1]{\texttt{#1}} % Environments. \newcommand{\question}[1]{\vspace{3mm}$\diamond$ \textbf{#1}} % Ocamlweb settings. \newcommand{\basic}[1]{\textit{#1}} \let\ocwkw\kw \let\ocwbt\basic \let\ocwupperid\basic \let\ocwlowerid\basic \let\ocwtv\basic \newcommand{\ocwbar}{\vskip 2mm plus 2mm \hrule \vskip 2mm plus 2mm} \newcommand{\tcup}{${}\cup{}$} \newcommand{\tcap}{${}\cap{}$} \newcommand{\tminus}{${}\setminus{}$} % Command line options. \newcommand{\obase}{\texttt{-{}-base}\xspace} \newcommand{\ocanonical}{\texttt{-{}-canonical}\xspace} % undocumented! \newcommand{\ocomment}{\texttt{-{}-comment}\xspace} \newcommand{\ocmly}{\texttt{-{}-cmly}\xspace} \newcommand{\odepend}{\texttt{-{}-depend}\xspace} \newcommand{\orawdepend}{\texttt{-{}-raw-depend}\xspace} \newcommand{\odump}{\texttt{-{}-dump}\xspace} \newcommand{\oerrorrecovery}{\texttt{-{}-error-recovery}\xspace} \newcommand{\oexplain}{\texttt{-{}-explain}\xspace} \newcommand{\oexternaltokens}{\texttt{-{}-external-tokens}\xspace} \newcommand{\ofixedexc}{\texttt{-{}-fixed-exception}\xspace} \newcommand{\ograph}{\texttt{-{}-graph}\xspace} \newcommand{\oignoreone}{\texttt{-{}-unused-token}\xspace} \newcommand{\oignoreall}{\texttt{-{}-unused-tokens}\xspace} \newcommand{\oignoreprec}{\texttt{-{}-unused-precedence-levels}\xspace} \newcommand{\oinfer}{\texttt{-{}-infer}\xspace} \newcommand{\oinspection}{\texttt{-{}-inspection}\xspace} \newcommand{\ointerpret}{\texttt{-{}-interpret}\xspace} \newcommand{\ointerpretshowcst}{\texttt{-{}-interpret-show-cst}\xspace} \newcommand{\ologautomaton}{\texttt{-{}-log-automaton}\xspace} \newcommand{\ologcode}{\texttt{-{}-log-code}\xspace} \newcommand{\ologgrammar}{\texttt{-{}-log-grammar}\xspace} \newcommand{\onoinline}{\texttt{-{}-no-inline}\xspace} \newcommand{\onostdlib}{\texttt{-{}-no-stdlib}\xspace} \newcommand{\oocamlc}{\texttt{-{}-ocamlc}\xspace} \newcommand{\oocamldep}{\texttt{-{}-ocamldep}\xspace} \newcommand{\oonlypreprocess}{\texttt{-{}-only-preprocess}\xspace} \newcommand{\oonlytokens}{\texttt{-{}-only-tokens}\xspace} \newcommand{\ostrict}{\texttt{-{}-strict}\xspace} \newcommand{\osuggestcomp}{\texttt{-{}-suggest-comp-flags}\xspace} \newcommand{\osuggestlinkb}{\texttt{-{}-suggest-link-flags-byte}\xspace} \newcommand{\osuggestlinko}{\texttt{-{}-suggest-link-flags-opt}\xspace} \newcommand{\osuggestmenhirlib}{\texttt{-{}-suggest-menhirLib}\xspace} \newcommand{\osuggestocamlfind}{\texttt{-{}-suggest-ocamlfind}\xspace} \newcommand{\otable}{\texttt{-{}-table}\xspace} \newcommand{\otimings}{\texttt{-{}-timings}\xspace} \newcommand{\otrace}{\texttt{-{}-trace}\xspace} \newcommand{\ostdlib}{\texttt{-{}-stdlib}\xspace} \newcommand{\oversion}{\texttt{-{}-version}\xspace} \newcommand{\ocoq}{\texttt{-{}-coq}\xspace} \newcommand{\ocoqnocomplete}{\texttt{-{}-coq-no-complete}\xspace} \newcommand{\ocoqnoactions}{\texttt{-{}-coq-no-actions}\xspace} \newcommand{\olisterrors}{\texttt{-{}-list-errors}\xspace} \newcommand{\ointerpreterror}{\texttt{-{}-interpret-error}\xspace} \newcommand{\ocompileerrors}{\texttt{-{}-compile-errors}\xspace} \newcommand{\ocompareerrors}{\texttt{-{}-compare-errors}\xspace} \newcommand{\oupdateerrors}{\texttt{-{}-update-errors}\xspace} \newcommand{\oechoerrors}{\texttt{-{}-echo-errors}\xspace} % The .messages file format. \newcommand{\messages}{\texttt{.messages}\xspace} % Adding mathstruts to ensure a common baseline. \newcommand{\mycommonbaseline}{ \let\oldnt\nt \renewcommand{\nt}[1]{$\mathstrut$\oldnt{##1}} \let\oldbasic\basic \renewcommand{\basic}[1]{$\mathstrut$\oldbasic{##1}} } % Position keywords. \newcommand{\ksymbolstartpos}{\texttt{\$symbolstartpos}\xspace} menhir-20171222/doc/declarations-phantom.mly0000664000175000017500000000047313217215730021123 0ustar fpottierfpottier%token ID ARROW LPAREN RPAREN COLON SEMICOLON %start program %% typ0: ID | LPAREN typ1(RPAREN) RPAREN {} typ1(phantom): typ0 | typ0 ARROW typ1(phantom) {} declaration(phantom): ID COLON typ1(phantom) {} program: | LPAREN declaration(RPAREN) RPAREN | declaration(SEMICOLON) SEMICOLON {} menhir-20171222/doc/whizzy.sh0000664000175000017500000000046013217215730016160 0ustar fpottierfpottier# Include TEXINPUTS setting from Makefile.local. # Do not include all of Makefile.local, because both whizzytex and Makefile # rely on NAME (for different purposes). if [ -f Makefile.local ] then echo "Extracting TEXINPUTS setting from Makefile.local..." `grep TEXINPUTS Makefile.local` fi menhir-20171222/doc/sigplanconf.cls0000664000175000017500000007120713217215730017275 0ustar fpottierfpottier%----------------------------------------------------------------------------- % % LaTeX Class/Style File % % Name: sigplanconf.cls % Purpose: A LaTeX 2e class file for SIGPLAN conference proceedings. % This class file supercedes acm_proc_article-sp, % sig-alternate, and sigplan-proc. % % Author: Paul C. Anagnostopoulos % Windfall Software % 978 371-2316 % paul@windfall.com % % Created: 12 September 2004 % % Revisions: See end of file. % %----------------------------------------------------------------------------- \NeedsTeXFormat{LaTeX2e}[1995/12/01] \ProvidesClass{sigplanconf}[2005/03/07 v0.93 ACM SIGPLAN Proceedings] % The following few pages contain LaTeX programming extensions adapted % from the ZzTeX macro package. % Token Hackery % ----- ------- \def \@expandaftertwice {\expandafter\expandafter\expandafter} \def \@expandafterthrice {\expandafter\expandafter\expandafter\expandafter \expandafter\expandafter\expandafter} % This macro discards the next token. \def \@discardtok #1{}% token % This macro removes the `pt' following a dimension. {\catcode `\p = 12 \catcode `\t = 12 \gdef \@remover #1pt{#1} } % \catcode % This macro extracts the contents of a macro and returns it as plain text. % Usage: \expandafter\@defof \meaning\macro\@mark \def \@defof #1:->#2\@mark{#2} % Control Sequence Names % ------- -------- ----- \def \@name #1{% {\tokens} \csname \expandafter\@discardtok \string#1\endcsname} \def \@withname #1#2{% {\command}{\tokens} \expandafter#1\csname \expandafter\@discardtok \string#2\endcsname} % Flags (Booleans) % ----- ---------- % The boolean literals \@true and \@false are appropriate for use with % the \if command, which tests the codes of the next two characters. \def \@true {TT} \def \@false {FL} \def \@setflag #1=#2{\edef #1{#2}}% \flag = boolean % IF and Predicates % -- --- ---------- % A "predicate" is a macro that returns \@true or \@false as its value. % Such values are suitable for use with the \if conditional. For example: % % \if \oddp{\x} \else \fi % A predicate can be used with \@setflag as follows: % % \@setflag \flag = {} % Here are the predicates for TeX's repertoire of conditional % commands. These might be more appropriately interspersed with % other definitions in this module, but what the heck. % Some additional "obvious" predicates are defined. \def \eqlp #1#2{\ifnum #1 = #2\@true \else \@false \fi} \def \neqlp #1#2{\ifnum #1 = #2\@false \else \@true \fi} \def \lssp #1#2{\ifnum #1 < #2\@true \else \@false \fi} \def \gtrp #1#2{\ifnum #1 > #2\@true \else \@false \fi} \def \zerop #1{\ifnum #1 = 0\@true \else \@false \fi} \def \onep #1{\ifnum #1 = 1\@true \else \@false \fi} \def \posp #1{\ifnum #1 > 0\@true \else \@false \fi} \def \negp #1{\ifnum #1 < 0\@true \else \@false \fi} \def \oddp #1{\ifodd #1\@true \else \@false \fi} \def \evenp #1{\ifodd #1\@false \else \@true \fi} \def \rangep #1#2#3{\if \orp{\lssp{#1}{#2}}{\gtrp{#1}{#3}}\@false \else \@true \fi} \def \tensp #1{\rangep{#1}{10}{19}} \def \dimeqlp #1#2{\ifdim #1 = #2\@true \else \@false \fi} \def \dimneqlp #1#2{\ifdim #1 = #2\@false \else \@true \fi} \def \dimlssp #1#2{\ifdim #1 < #2\@true \else \@false \fi} \def \dimgtrp #1#2{\ifdim #1 > #2\@true \else \@false \fi} \def \dimzerop #1{\ifdim #1 = 0pt\@true \else \@false \fi} \def \dimposp #1{\ifdim #1 > 0pt\@true \else \@false \fi} \def \dimnegp #1{\ifdim #1 < 0pt\@true \else \@false \fi} \def \vmodep {\ifvmode \@true \else \@false \fi} \def \hmodep {\ifhmode \@true \else \@false \fi} \def \mathmodep {\ifmmode \@true \else \@false \fi} \def \textmodep {\ifmmode \@false \else \@true \fi} \def \innermodep {\ifinner \@true \else \@false \fi} \long\def \codeeqlp #1#2{\if #1#2\@true \else \@false \fi} \long\def \cateqlp #1#2{\ifcat #1#2\@true \else \@false \fi} \long\def \tokeqlp #1#2{\ifx #1#2\@true \else \@false \fi} \long\def \xtokeqlp #1#2{\expandafter\ifx #1#2\@true \else \@false \fi} \long\def \definedp #1{% \expandafter\ifx \csname \expandafter\@discardtok \string#1\endcsname \relax \@false \else \@true \fi} \long\def \undefinedp #1{% \expandafter\ifx \csname \expandafter\@discardtok \string#1\endcsname \relax \@true \else \@false \fi} \def \emptydefp #1{\ifx #1\@empty \@true \else \@false \fi}% {\name} \let \emptylistp = \emptydefp \long\def \emptyargp #1{% {#n} \@empargp #1\@empargq\@mark} \long\def \@empargp #1#2\@mark{% \ifx #1\@empargq \@true \else \@false \fi} \def \@empargq {\@empargq} \def \emptytoksp #1{% {\tokenreg} \expandafter\@emptoksp \the#1\@mark} \long\def \@emptoksp #1\@mark{\emptyargp{#1}} \def \voidboxp #1{\ifvoid #1\@true \else \@false \fi} \def \hboxp #1{\ifhbox #1\@true \else \@false \fi} \def \vboxp #1{\ifvbox #1\@true \else \@false \fi} \def \eofp #1{\ifeof #1\@true \else \@false \fi} % Flags can also be used as predicates, as in: % % \if \flaga \else \fi % Now here we have predicates for the common logical operators. \def \notp #1{\if #1\@false \else \@true \fi} \def \andp #1#2{\if #1% \if #2\@true \else \@false \fi \else \@false \fi} \def \orp #1#2{\if #1% \@true \else \if #2\@true \else \@false \fi \fi} \def \xorp #1#2{\if #1% \if #2\@false \else \@true \fi \else \if #2\@true \else \@false \fi \fi} % Arithmetic % ---------- \def \@increment #1{\advance #1 by 1\relax}% {\count} \def \@decrement #1{\advance #1 by -1\relax}% {\count} % Options % ------- \@setflag \@blockstyle = \@false \@setflag \@copyrightwanted = \@true \@setflag \@explicitsize = \@false \@setflag \@mathtime = \@false \@setflag \@ninepoint = \@true \@setflag \@onecolumn = \@false \@setflag \@preprint = \@false \newcount{\@numheaddepth} \@numheaddepth = 3 \@setflag \@times = \@false % Note that all the dangerous article class options are trapped. \DeclareOption{9pt}{\@setflag \@ninepoint = \@true \@setflag \@explicitsize = \@true} \DeclareOption{10pt}{\PassOptionsToClass{10pt}{article}% \@setflag \@ninepoint = \@false \@setflag \@explicitsize = \@true} \DeclareOption{11pt}{\PassOptionsToClass{11pt}{article}% \@setflag \@ninepoint = \@false \@setflag \@explicitsize = \@true} \DeclareOption{12pt}{\@unsupportedoption{12pt}} \DeclareOption{a4paper}{\@unsupportedoption{a4paper}} \DeclareOption{a5paper}{\@unsupportedoption{a5paper}} \DeclareOption{b5paper}{\@unsupportedoption{b5paper}} \DeclareOption{blockstyle}{\@setflag \@blockstyle = \@true} \DeclareOption{cm}{\@setflag \@times = \@false} \DeclareOption{computermodern}{\@setflag \@times = \@false} \DeclareOption{executivepaper}{\@unsupportedoption{executivepaper}} \DeclareOption{indentedstyle}{\@setflag \@blockstyle = \@false} \DeclareOption{landscape}{\@unsupportedoption{landscape}} \DeclareOption{legalpaper}{\@unsupportedoption{legalpaper}} \DeclareOption{letterpaper}{\@unsupportedoption{letterpaper}} \DeclareOption{mathtime}{\@setflag \@mathtime = \@true} \DeclareOption{nocopyrightspace}{\@setflag \@copyrightwanted = \@false} \DeclareOption{notitlepage}{\@unsupportedoption{notitlepage}} \DeclareOption{numberedpars}{\@numheaddepth = 4} \DeclareOption{onecolumn}{\@setflag \@onecolumn = \@true} \DeclareOption{preprint}{\@setflag \@preprint = \@true} \DeclareOption{times}{\@setflag \@times = \@true} \DeclareOption{titlepage}{\@unsupportedoption{titlepage}} \DeclareOption{twocolumn}{\@setflag \@onecolumn = \@false} \DeclareOption*{\PassOptionsToClass{\CurrentOption}{article}} \ExecuteOptions{9pt,indentedstyle,times} \@setflag \@explicitsize = \@false \ProcessOptions \if \@onecolumn \if \notp{\@explicitsize}% \@setflag \@ninepoint = \@false \PassOptionsToClass{11pt}{article}% \fi \PassOptionsToClass{twoside,onecolumn}{article} \else \PassOptionsToClass{twoside,twocolumn}{article} \fi \LoadClass{article} \def \@unsupportedoption #1{% \ClassError{proc}{The standard '#1' option is not supported.}} % Utilities % --------- \newcommand{\setvspace}[2]{% #1 = #2 \advance #1 by -1\parskip} % Document Parameters % -------- ---------- % Page: \setlength{\hoffset}{-1in} \setlength{\voffset}{-1in} \setlength{\topmargin}{1in} \setlength{\headheight}{0pt} \setlength{\headsep}{0pt} \if \@onecolumn \setlength{\evensidemargin}{.75in} \setlength{\oddsidemargin}{.75in} \else \setlength{\evensidemargin}{.75in} \setlength{\oddsidemargin}{.75in} \fi % Text area: \newdimen{\standardtextwidth} \setlength{\standardtextwidth}{42pc} \if \@onecolumn \setlength{\textwidth}{40.5pc} \else \setlength{\textwidth}{\standardtextwidth} \fi \setlength{\topskip}{8pt} \setlength{\columnsep}{2pc} \setlength{\textheight}{54.5pc} % Running foot: \setlength{\footskip}{30pt} % Paragraphs: \if \@blockstyle \setlength{\parskip}{5pt plus .1pt minus .5pt} \setlength{\parindent}{0pt} \else \setlength{\parskip}{0pt} \setlength{\parindent}{12pt} \fi \setlength{\lineskip}{.5pt} \setlength{\lineskiplimit}{\lineskip} \frenchspacing \pretolerance = 400 \tolerance = \pretolerance \setlength{\emergencystretch}{5pt} \clubpenalty = 10000 \widowpenalty = 10000 \setlength{\hfuzz}{.5pt} % Standard vertical spaces: \newskip{\standardvspace} \setvspace{\standardvspace}{5pt plus 1pt minus .5pt} % Margin paragraphs: \setlength{\marginparwidth}{0pt} \setlength{\marginparsep}{0pt} \setlength{\marginparpush}{0pt} \setlength{\skip\footins}{8pt plus 3pt minus 1pt} \setlength{\footnotesep}{9pt} \renewcommand{\footnoterule}{% \hrule width .5\columnwidth height .33pt depth 0pt} \renewcommand{\@makefntext}[1]{% \noindent \@makefnmark \hspace{1pt}#1} % Floats: \setcounter{topnumber}{4} \setcounter{bottomnumber}{1} \setcounter{totalnumber}{4} \renewcommand{\fps@figure}{tp} \renewcommand{\fps@table}{tp} \renewcommand{\topfraction}{0.90} \renewcommand{\bottomfraction}{0.30} \renewcommand{\textfraction}{0.10} \renewcommand{\floatpagefraction}{0.75} \setcounter{dbltopnumber}{4} \renewcommand{\dbltopfraction}{\topfraction} \renewcommand{\dblfloatpagefraction}{\floatpagefraction} \setlength{\floatsep}{18pt plus 4pt minus 2pt} \setlength{\textfloatsep}{18pt plus 4pt minus 3pt} \setlength{\intextsep}{10pt plus 4pt minus 3pt} \setlength{\dblfloatsep}{18pt plus 4pt minus 2pt} \setlength{\dbltextfloatsep}{20pt plus 4pt minus 3pt} % Miscellaneous: \errorcontextlines = 5 % Fonts % ----- \if \@times \renewcommand{\rmdefault}{ptm}% \if \@mathtime \usepackage[mtbold,noTS1]{mathtime}% \else %%% \usepackage{mathptm}% \fi \else \relax \fi \if \@ninepoint \renewcommand{\normalsize}{% \@setfontsize{\normalsize}{9pt}{10pt}% \setlength{\abovedisplayskip}{5pt plus 1pt minus .5pt}% \setlength{\belowdisplayskip}{\abovedisplayskip}% \setlength{\abovedisplayshortskip}{3pt plus 1pt minus 2pt}% \setlength{\belowdisplayshortskip}{\abovedisplayshortskip}} \renewcommand{\tiny}{\@setfontsize{\tiny}{5pt}{6pt}} \renewcommand{\scriptsize}{\@setfontsize{\scriptsize}{7pt}{8pt}} \renewcommand{\small}{% \@setfontsize{\small}{8pt}{9pt}% \setlength{\abovedisplayskip}{4pt plus 1pt minus 1pt}% \setlength{\belowdisplayskip}{\abovedisplayskip}% \setlength{\abovedisplayshortskip}{2pt plus 1pt}% \setlength{\belowdisplayshortskip}{\abovedisplayshortskip}} \renewcommand{\footnotesize}{% \@setfontsize{\footnotesize}{8pt}{9pt}% \setlength{\abovedisplayskip}{4pt plus 1pt minus .5pt}% \setlength{\belowdisplayskip}{\abovedisplayskip}% \setlength{\abovedisplayshortskip}{2pt plus 1pt}% \setlength{\belowdisplayshortskip}{\abovedisplayshortskip}} \renewcommand{\large}{\@setfontsize{\large}{11pt}{13pt}} \renewcommand{\Large}{\@setfontsize{\Large}{14pt}{18pt}} \renewcommand{\LARGE}{\@setfontsize{\LARGE}{18pt}{20pt}} \renewcommand{\huge}{\@setfontsize{\huge}{20pt}{25pt}} \renewcommand{\Huge}{\@setfontsize{\Huge}{25pt}{30pt}} \fi % Abstract % -------- \renewenvironment{abstract}{% \section*{Abstract}% \normalsize}{% } % Bibliography % ------------ \renewenvironment{thebibliography}[1] {\section*{\refname \@mkboth{\MakeUppercase\refname}{\MakeUppercase\refname}}% \list{\@biblabel{\@arabic\c@enumiv}}% {\settowidth\labelwidth{\@biblabel{#1}}% \leftmargin\labelwidth \advance\leftmargin\labelsep \@openbib@code \usecounter{enumiv}% \let\p@enumiv\@empty \renewcommand\theenumiv{\@arabic\c@enumiv}}% \small \softraggedright%%%\sloppy \clubpenalty4000 \@clubpenalty \clubpenalty \widowpenalty4000% \sfcode`\.\@m} {\def\@noitemerr {\@latex@warning{Empty `thebibliography' environment}}% \endlist} % Categories % ---------- \@setflag \@firstcategory = \@true \newcommand{\category}[3]{% \if \@firstcategory \paragraph*{Categories and Subject Descriptors}% \@setflag \@firstcategory = \@false \else \unskip ;\hspace{.75em}% \fi \@ifnextchar [{\@category{#1}{#2}{#3}}{\@category{#1}{#2}{#3}[]}} \def \@category #1#2#3[#4]{% {\let \and = \relax #1 [\textit{#2}]% \if \emptyargp{#4}% \if \notp{\emptyargp{#3}}: #3\fi \else :\space \if \notp{\emptyargp{#3}}#3---\fi \textrm{#4}% \fi}} % Copyright Notice % --------- ------ \def \ftype@copyrightbox {8} \def \@toappear {} \def \@permission {} \def \@copyrightspace {% \@float{copyrightbox}[b]% \vbox to 1.25in{% \vfill \begin{center}% \@toappear \end{center}}% \end@float} \long\def \toappear #1{% \def \@toappear {\parbox[b]{20pc}{\scriptsize #1}}} %%%\def \toappearbox #1{% %%% \def \@toappear {\raisebox{5pt}{\framebox[20pc]{\parbox[b]{19pc}{#1}}}}} \toappear{% \noindent \@permission \par \vspace{2pt} \noindent \textsl{\@conferencename}\quad \@conferenceinfo \par \@copyrightinfo} \newcommand{\permission}[1]{% \gdef \@permission {#1}} \permission{% Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. To copy otherwise, to republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee.} \def \@copyrightinfo {% \if \notp{\emptydefp{\copyrightinfo}}% Copyright \copyright\ \@copyrightyear\ ACM \@copyrightdata\dots \$5.00. \fi} % Enunciations % ------------ \def \@begintheorem #1#2{% {name}{number} \trivlist \item[\hskip \labelsep \textsc{#1 #2.}]% \itshape\selectfont \ignorespaces} \def \@opargbegintheorem #1#2#3{% {name}{number}{title} \trivlist \item[% \hskip\labelsep \textsc{#1\ #2}% \if \notp{\emptyargp{#3}}\nut (#3).\fi]% \itshape\selectfont \ignorespaces} \@setflag \@qeddone = \@false \newenvironment{proof}{% \global\@setflag \@qeddone = \@false \@ifnextchar[{\@titledproof}{\@titledproof[]}}{% \if \notp{\@qeddone}\qed \fi \endtrivlist} \def \@titledproof [#1]{% \trivlist \item[\hskip \labelsep \textsc{Proof% \if \notp{\emptyargp{#1}}\space #1\fi .}]% \ignorespaces} \newcommand{\qed}{% \unskip \kern 6pt {\linethickness{.5pt}\framebox(4,4){}}% \global\@setflag \@qeddone = \@true} \newcommand{\newdef}[2]{% {type}{name} \@withname\@ifdefinable {#1}{% \@definecounter{#1}% \@withname\xdef {the#1}{\@thmcounter{#1}}% \global\@namedef{#1}{\@begindef{#1}{#2}}% \global\@namedef{end#1}{\@endtheorem}}} \def \@begindef #1#2{% {type}{name} \refstepcounter{#1}% \@ifnextchar[{\@titleddef{#1}{#2}}{\@titleddef{#1}{#2}[]}} \def \@titleddef #1#2[#3]{% {type}{name}[title] \trivlist \item[\hskip \labelsep \itshape{#2% \if \notp{\emptyargp{#3}}\space #3\fi .}]% \ignorespaces} % Figures % ------- \@setflag \@caprule = \@true \long\def \@makecaption #1#2{% \addvspace{4pt} \if \@caprule \hrule width \hsize height .33pt \vspace{4pt} \fi \setbox \@tempboxa = \hbox{\@setfigurenumber{#1.}\nut #2}% \if \dimgtrp{\wd\@tempboxa}{\hsize}% \noindent \@setfigurenumber{#1.}\nut #2\par \else \centerline{\box\@tempboxa}% \fi} \newcommand{\nocaptionrule}{% \@setflag \@caprule = \@false} \def \@setfigurenumber #1{% {\rmfamily \bfseries \selectfont #1}} % Hierarchy % --------- \setcounter{secnumdepth}{\@numheaddepth} \newskip{\@sectionaboveskip} \setvspace{\@sectionaboveskip}{10pt plus 3pt minus 2pt} \newskip{\@sectionbelowskip} \if \@blockstyle \setlength{\@sectionbelowskip}{0.1pt}% \else \setlength{\@sectionbelowskip}{4pt}% \fi \renewcommand{\section}{% \@startsection {section}% {1}% {0pt}% {-\@sectionaboveskip}% {\@sectionbelowskip}% {\large \bfseries \raggedright}} \newskip{\@subsectionaboveskip} \setvspace{\@subsectionaboveskip}{8pt plus 2pt minus 2pt} \newskip{\@subsectionbelowskip} \if \@blockstyle \setlength{\@subsectionbelowskip}{0.1pt}% \else \setlength{\@subsectionbelowskip}{4pt}% \fi \renewcommand{\subsection}{% \@startsection% {subsection}% {2}% {0pt}% {-\@subsectionaboveskip}% {\@subsectionbelowskip}% {\normalsize \bfseries \raggedright}} \renewcommand{\subsubsection}{% \@startsection% {subsubsection}% {3}% {0pt}% {-\@subsectionaboveskip} {\@subsectionbelowskip}% {\normalsize \bfseries \raggedright}} \newskip{\@paragraphaboveskip} \setvspace{\@paragraphaboveskip}{6pt plus 2pt minus 2pt} \renewcommand{\paragraph}{% \@startsection% {paragraph}% {4}% {0pt}% {\@paragraphaboveskip} {-1em}% {\normalsize \bfseries \if \@times \itshape \fi}} % Standard headings: \newcommand{\acks}{\section*{Acknowledgments}} \newcommand{\keywords}{\paragraph*{Keywords}} \newcommand{\terms}{\paragraph*{General Terms}} % Identification % -------------- \def \@conferencename {} \def \@conferenceinfo {} \def \@copyrightyear {} \def \@copyrightdata {[to be supplied]} \newcommand{\conferenceinfo}[2]{% \gdef \@conferencename {#1}% \gdef \@conferenceinfo {#2}} \newcommand{\copyrightyear}[1]{% \gdef \@copyrightyear {#1}} \let \CopyrightYear = \copyrightyear \newcommand{\copyrightdata}[1]{% \gdef \@copyrightdata {#1}} \let \crdata = \copyrightdata % Lists % ----- \setlength{\leftmargini}{13pt} \setlength\leftmarginii{13pt} \setlength\leftmarginiii{13pt} \setlength\leftmarginiv{13pt} \setlength{\labelsep}{3.5pt} \setlength{\topsep}{\standardvspace} \if \@blockstyle \setlength{\itemsep}{0pt} \setlength{\parsep}{4pt} \else \setlength{\itemsep}{2pt} \setlength{\parsep}{0pt} \fi \renewcommand{\labelitemi}{{\small \centeroncapheight{\textbullet}}} \renewcommand{\labelitemii}{\centeroncapheight{\rule{2.5pt}{2.5pt}}} \renewcommand{\labelitemiii}{$-$} \renewcommand{\labelitemiv}{{\Large \textperiodcentered}} \renewcommand{\@listi}{% \leftmargin = \leftmargini \listparindent = \parindent} \let \@listI = \@listi \renewcommand{\@listii}{% \leftmargin = \leftmarginii \labelwidth = \leftmarginii \advance \labelwidth by -\labelsep \listparindent = \parindent} \renewcommand{\@listiii}{% \leftmargin = \leftmarginiii \labelwidth = \leftmarginiii \advance \labelwidth by -\labelsep \listparindent = \parindent} \renewcommand{\@listiv}{% \leftmargin = \leftmarginiv \labelwidth = \leftmarginiv \advance \labelwidth by -\labelsep \listparindent = \parindent} % Mathematics % ----------- \def \theequation {\arabic{equation}} % Miscellaneous % ------------- \newcommand{\balancecolumns}{% \vfill\eject \global\@colht = \textheight \global\ht\@cclv = \textheight} \newcommand{\nut}{\hspace{.5em}} \newcommand{\softraggedright}{% \let \\ = \@centercr \leftskip = 0pt \rightskip = 0pt plus 10pt} % Program Code % ------- ---- \newcommand{\mono}[1]{% {\@tempdima = \fontdimen2\font \texttt{\spaceskip = 1.1\@tempdima #1}}} % Running Heads and Feet % ------- ----- --- ---- \if \@preprint \def \ps@plain {% \let \@mkboth = \@gobbletwo \let \@evenhead = \@empty \def \@evenfoot {% \reset@font \@conferencename \hfil \thepage \hfil \@formatyear}% \let \@oddhead = \@empty \let \@oddfoot = \@evenfoot} \else \let \ps@plain = \ps@empty \let \ps@headings = \ps@empty \let \ps@myheadings = \ps@empty \fi \def \@formatyear {% \number\year/\number\month/\number\day} % Title Page % ----- ---- \@setflag \@addauthorsdone = \@false \def \@titletext {\@latex@error{No title was provided}{}} \def \@subtitletext {} \newcount{\@authorcount} \newcount{\@titlenotecount} \newtoks{\@titlenotetext} \renewcommand{\title}[1]{% \gdef \@titletext {#1}} \newcommand{\subtitle}[1]{% \gdef \@subtitletext {#1}} \newcommand{\authorinfo}[3]{% {names}{affiliation}{email/URL} \global\@increment \@authorcount \@withname\gdef {\@authorname\romannumeral\@authorcount}{#1}% \@withname\gdef {\@authoraffil\romannumeral\@authorcount}{#2}% \@withname\gdef {\@authoremail\romannumeral\@authorcount}{#3}} \renewcommand{\author}[1]{% \@latex@error{The \string\author\space command is obsolete; use \string\authorinfo}{}} \renewcommand{\maketitle}{% \pagestyle{plain}% \if \@onecolumn {\hsize = \standardtextwidth \@maketitle}% \else \twocolumn[\@maketitle]% \fi \@placetitlenotes \if \@copyrightwanted \@copyrightspace \fi} \def \@maketitle {% \begin{center} \let \thanks = \titlenote \noindent \LARGE \bfseries \@titletext \par \vskip 6pt \noindent \Large \@subtitletext \par \vskip 12pt \ifcase \@authorcount \@latex@error{No authors were specified for this paper}{}\or \@titleauthors{i}{}{}\or \@titleauthors{i}{ii}{}\or \@titleauthors{i}{ii}{iii}\or \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{}{}\or \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{}\or \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}\or \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% \@titleauthors{vii}{}{}\or \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% \@titleauthors{vii}{viii}{}\or \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% \@titleauthors{vii}{viii}{ix}\or \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% \@titleauthors{vii}{viii}{ix}\@titleauthors{x}{}{}% \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% \@titleauthors{vii}{viii}{ix}\@titleauthors{x}{xi}{}% \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% \@titleauthors{vii}{viii}{ix}\@titleauthors{x}{xi}{xii}% \else \@latex@error{Cannot handle more than 12 authors}{}% \fi \vspace{1.75pc} \end{center}} \def \@titleauthors #1#2#3{% \if \andp{\emptyargp{#2}}{\emptyargp{#3}}% \noindent \@setauthor{40pc}{#1}{\@false}\par \else\if \emptyargp{#3}% \noindent \@setauthor{17pc}{#1}{\@false}\hspace{3pc}% \@setauthor{17pc}{#2}{\@false}\par \else \noindent \@setauthor{12.5pc}{#1}{\@false}\hspace{2pc}% \@setauthor{12.5pc}{#2}{\@false}\hspace{2pc}% \@setauthor{12.5pc}{#3}{\@true}\par \relax \fi\fi \vspace{20pt}} \def \@setauthor #1#2#3{% \vtop{% \def \and {% \hspace{16pt}} \hsize = #1 \normalfont \centering \large \@name{\@authorname#2}\par \vspace{5pt} \normalsize \@name{\@authoraffil#2}\par \vspace{2pt} \textsf{\@name{\@authoremail#2}}\par}} \def \@maybetitlenote #1{% \if \andp{#1}{\gtrp{\@authorcount}{3}}% \titlenote{See page~\pageref{@addauthors} for additional authors.}% \fi} \newtoks{\@fnmark} \newcommand{\titlenote}[1]{% \global\@increment \@titlenotecount \ifcase \@titlenotecount \relax \or \@fnmark = {\ast}\or \@fnmark = {\dagger}\or \@fnmark = {\ddagger}\or \@fnmark = {\S}\or \@fnmark = {\P}\or \@fnmark = {\ast\ast}% \fi \,$^{\the\@fnmark}$% \edef \reserved@a {\noexpand\@appendtotext{% \noexpand\@titlefootnote{\the\@fnmark}}}% \reserved@a{#1}} \def \@appendtotext #1#2{% \global\@titlenotetext = \expandafter{\the\@titlenotetext #1{#2}}} \newcount{\@authori} \iffalse \def \additionalauthors {% \if \gtrp{\@authorcount}{3}% \section{Additional Authors}% \label{@addauthors}% \noindent \@authori = 4 {\let \\ = ,% \loop \textbf{\@name{\@authorname\romannumeral\@authori}}, \@name{\@authoraffil\romannumeral\@authori}, email: \@name{\@authoremail\romannumeral\@authori}.% \@increment \@authori \if \notp{\gtrp{\@authori}{\@authorcount}} \repeat}% \par \fi \global\@setflag \@addauthorsdone = \@true} \fi \let \addauthorsection = \additionalauthors \def \@placetitlenotes { \the\@titlenotetext} % Utilities % --------- \newcommand{\centeroncapheight}[1]{% {\setbox\@tempboxa = \hbox{#1}% \@measurecapheight{\@tempdima}% % Calculate ht(CAP) - ht(text) \advance \@tempdima by -\ht\@tempboxa % ------------------ \divide \@tempdima by 2 % 2 \raise \@tempdima \box\@tempboxa}} \newbox{\@measbox} \def \@measurecapheight #1{% {\dimen} \setbox\@measbox = \hbox{ABCDEFGHIJKLMNOPQRSTUVWXYZ}% #1 = \ht\@measbox} \long\def \@titlefootnote #1#2{% \insert\footins{% \reset@font\footnotesize \interlinepenalty\interfootnotelinepenalty \splittopskip\footnotesep \splitmaxdepth \dp\strutbox \floatingpenalty \@MM \hsize\columnwidth \@parboxrestore %%% \protected@edef\@currentlabel{% %%% \csname p@footnote\endcsname\@thefnmark}% \color@begingroup \def \@makefnmark {$^{#1}$}% \@makefntext{% \rule\z@\footnotesep\ignorespaces#2\@finalstrut\strutbox}% \color@endgroup}} % LaTeX Modifications % ----- ------------- \def \@seccntformat #1{% \@name{\the#1}% \@expandaftertwice\@seccntformata \csname the#1\endcsname.\@mark \quad} \def \@seccntformata #1.#2\@mark{% \if \emptyargp{#2}.\fi} % Revision History % -------- ------- % Date Person Ver. Change % ---- ------ ---- ------ % 2004.09.12 PCA 0.1--5 Preliminary development. % 2004.11.18 PCA 0.5 Start beta testing. % 2004.11.19 PCA 0.6 Obsolete \author and replace with % \authorinfo. % Add 'nocopyrightspace' option. % Compress article opener spacing. % Add 'mathtime' option. % Increase text height by 6 points. % 2004.11.28 PCA 0.7 Add 'cm/computermodern' options. % Change default to Times text. % 2004.12.14 PCA 0.8 Remove use of mathptm.sty; it cannot % coexist with latexym or amssymb. % 2005.01.20 PCA 0.9 Rename class file to sigplanconf.cls. % 2005.03.05 PCA 0.91 Change default copyright data. % 2005.03.06 PCA 0.92 Add at-signs to some macro names. % 2005.03.07 PCA 0.93 The 'onecolumn' option defaults to '11pt', % and it uses the full type width. menhir-20171222/doc/Makefile0000664000175000017500000000040213217215730015714 0ustar fpottierfpottier.PHONY: all loop clean export TEXINPUTS=.: all: main.pdf %.pdf: %.tex $(wildcard *.tex) $(wildcard *.bib) $(wildcard *.sty) $(wildcard *.mly) pdflatex $* bibtex $* pdflatex $* pdflatex $* loop: latexmk -pdf -pvc main clean: rm -f `cat .gitignore` menhir-20171222/doc/plain.bst0000664000175000017500000004606313217215730016106 0ustar fpottierfpottier% BibTeX standard bibliography style `plain' % version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09. % Copyright (C) 1985, all rights reserved. % Copying of this file is authorized only if either % (1) you make absolutely no changes to your copy, including name, or % (2) if you do make changes, you name it something other than % btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. % This restriction helps ensure that all standard styles are identical. % The file btxbst.doc has the documentation for this style. % Modified by Francois.Pottier@inria.fr with support for url field. ENTRY { address author booktitle chapter edition editor howpublished institution journal key month note number organization pages publisher school series title type url volume year } {} { label } INTEGERS { output.state before.all mid.sentence after.sentence after.block } FUNCTION {init.state.consts} { #0 'before.all := #1 'mid.sentence := #2 'after.sentence := #3 'after.block := } STRINGS { s t } FUNCTION {output.nonnull} { 's := output.state mid.sentence = { ", " * write$ } { output.state after.block = { add.period$ write$ newline$ "\newblock " write$ } { output.state before.all = 'write$ { add.period$ " " * write$ } if$ } if$ mid.sentence 'output.state := } if$ s } FUNCTION {output} { duplicate$ empty$ 'pop$ 'output.nonnull if$ } FUNCTION {output.check} { 't := duplicate$ empty$ { pop$ "empty " t * " in " * cite$ * warning$ } 'output.nonnull if$ } FUNCTION {output.bibitem} { newline$ "\bibitem{" write$ cite$ write$ "}" write$ newline$ "" before.all 'output.state := } FUNCTION {fin.entry} { add.period$ write$ newline$ } FUNCTION {new.block} { output.state before.all = 'skip$ { after.block 'output.state := } if$ } FUNCTION {new.sentence} { output.state after.block = 'skip$ { output.state before.all = 'skip$ { after.sentence 'output.state := } if$ } if$ } FUNCTION {not} { { #0 } { #1 } if$ } FUNCTION {and} { 'skip$ { pop$ #0 } if$ } FUNCTION {or} { { pop$ #1 } 'skip$ if$ } FUNCTION {new.block.checka} { empty$ 'skip$ 'new.block if$ } FUNCTION {new.block.checkb} { empty$ swap$ empty$ and 'skip$ 'new.block if$ } FUNCTION {new.sentence.checka} { empty$ 'skip$ 'new.sentence if$ } FUNCTION {new.sentence.checkb} { empty$ swap$ empty$ and 'skip$ 'new.sentence if$ } FUNCTION {field.or.null} { duplicate$ empty$ { pop$ "" } 'skip$ if$ } FUNCTION {emphasize} { duplicate$ empty$ { pop$ "" } { "{\em " swap$ * "}" * } if$ } INTEGERS { nameptr namesleft numnames } FUNCTION {format.names} { 's := #1 'nameptr := s num.names$ 'numnames := numnames 'namesleft := { namesleft #0 > } { s nameptr "{ff~}{vv~}{ll}{, jj}" format.name$ 't := nameptr #1 > { namesleft #1 > { ", " * t * } { numnames #2 > { "," * } 'skip$ if$ t "others" = { " et~al." * } { " and " * t * } if$ } if$ } 't if$ nameptr #1 + 'nameptr := namesleft #1 - 'namesleft := } while$ } FUNCTION {format.authors} { author empty$ { "" } { author format.names } if$ } FUNCTION {format.editors} { editor empty$ { "" } { editor format.names editor num.names$ #1 > { ", editors" * } { ", editor" * } if$ } if$ } FUNCTION {format.title} { title empty$ { "" } { url empty$ { title "t" change.case$ } { "\href{" url "}{" title "t" change.case$ "}" * * * * } if$ } if$ } FUNCTION {n.dashify} { 't := "" { t empty$ not } { t #1 #1 substring$ "-" = { t #1 #2 substring$ "--" = not { "--" * t #2 global.max$ substring$ 't := } { { t #1 #1 substring$ "-" = } { "-" * t #2 global.max$ substring$ 't := } while$ } if$ } { t #1 #1 substring$ * t #2 global.max$ substring$ 't := } if$ } while$ } FUNCTION {format.date} { year empty$ { month empty$ { "" } { "there's a month but no year in " cite$ * warning$ month } if$ } { month empty$ 'year { month " " * year * } if$ } if$ } FUNCTION {format.btitle} { url empty$ { title emphasize } { "\href{" url "}{" title emphasize "}" * * * * } if$ } FUNCTION {tie.or.space.connect} { duplicate$ text.length$ #3 < { "~" } { " " } if$ swap$ * * } FUNCTION {either.or.check} { empty$ 'pop$ { "can't use both " swap$ * " fields in " * cite$ * warning$ } if$ } FUNCTION {format.bvolume} { volume empty$ { "" } { "volume" volume tie.or.space.connect series empty$ 'skip$ { " of " * series emphasize * } if$ "volume and number" number either.or.check } if$ } FUNCTION {format.number.series} { volume empty$ { number empty$ { series field.or.null } { output.state mid.sentence = { "number" } { "Number" } if$ number tie.or.space.connect series empty$ { "there's a number but no series in " cite$ * warning$ } { " in " * series * } if$ } if$ } { "" } if$ } FUNCTION {format.edition} { edition empty$ { "" } { output.state mid.sentence = { edition "l" change.case$ " edition" * } { edition "t" change.case$ " edition" * } if$ } if$ } INTEGERS { multiresult } FUNCTION {multi.page.check} { 't := #0 'multiresult := { multiresult not t empty$ not and } { t #1 #1 substring$ duplicate$ "-" = swap$ duplicate$ "," = swap$ "+" = or or { #1 'multiresult := } { t #2 global.max$ substring$ 't := } if$ } while$ multiresult } FUNCTION {format.pages} { pages empty$ { "" } { pages multi.page.check { "pages" pages n.dashify tie.or.space.connect } { "page" pages tie.or.space.connect } if$ } if$ } FUNCTION {format.vol.num.pages} { volume field.or.null number empty$ 'skip$ { "(" number * ")" * * volume empty$ { "there's a number but no volume in " cite$ * warning$ } 'skip$ if$ } if$ pages empty$ 'skip$ { duplicate$ empty$ { pop$ format.pages } { ":" * pages n.dashify * } if$ } if$ } FUNCTION {format.chapter.pages} { chapter empty$ 'format.pages { type empty$ { "chapter" } { type "l" change.case$ } if$ chapter tie.or.space.connect pages empty$ 'skip$ { ", " * format.pages * } if$ } if$ } FUNCTION {format.in.ed.booktitle} { booktitle empty$ { "" } { editor empty$ { "In " booktitle emphasize * } { "In " format.editors * ", " * booktitle emphasize * } if$ } if$ } FUNCTION {empty.misc.check} { author empty$ title empty$ howpublished empty$ month empty$ year empty$ note empty$ and and and and and key empty$ not and { "all relevant fields are empty in " cite$ * warning$ } 'skip$ if$ } FUNCTION {format.thesis.type} { type empty$ 'skip$ { pop$ type "t" change.case$ } if$ } FUNCTION {format.tr.number} { type empty$ { "Technical Report" } 'type if$ number empty$ { "t" change.case$ } { number tie.or.space.connect } if$ } FUNCTION {format.article.crossref} { key empty$ { journal empty$ { "need key or journal for " cite$ * " to crossref " * crossref * warning$ "" } { "In {\em " journal * "\/}" * } if$ } { "In " key * } if$ " \cite{" * crossref * "}" * } FUNCTION {format.crossref.editor} { editor #1 "{vv~}{ll}" format.name$ editor num.names$ duplicate$ #2 > { pop$ " et~al." * } { #2 < 'skip$ { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = { " et~al." * } { " and " * editor #2 "{vv~}{ll}" format.name$ * } if$ } if$ } if$ } FUNCTION {format.book.crossref} { volume empty$ { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ "In " } { "Volume" volume tie.or.space.connect " of " * } if$ editor empty$ editor field.or.null author field.or.null = or { key empty$ { series empty$ { "need editor, key, or series for " cite$ * " to crossref " * crossref * warning$ "" * } { "{\em " * series * "\/}" * } if$ } { key * } if$ } { format.crossref.editor * } if$ " \cite{" * crossref * "}" * } FUNCTION {format.incoll.inproc.crossref} { editor empty$ editor field.or.null author field.or.null = or { key empty$ { booktitle empty$ { "need editor, key, or booktitle for " cite$ * " to crossref " * crossref * warning$ "" } { "In {\em " booktitle * "\/}" * } if$ } { "In " key * } if$ } { "In " format.crossref.editor * } if$ " \cite{" * crossref * "}" * } FUNCTION {article} { output.bibitem format.authors "author" output.check new.block format.title "title" output.check new.block crossref missing$ { journal emphasize "journal" output.check format.vol.num.pages output format.date "year" output.check } { format.article.crossref output.nonnull format.pages output } if$ new.block note output fin.entry } FUNCTION {book} { output.bibitem author empty$ { format.editors "author and editor" output.check } { format.authors output.nonnull crossref missing$ { "author and editor" editor either.or.check } 'skip$ if$ } if$ new.block format.btitle "title" output.check crossref missing$ { format.bvolume output new.block format.number.series output new.sentence publisher "publisher" output.check address output } { new.block format.book.crossref output.nonnull } if$ format.edition output format.date "year" output.check new.block note output fin.entry } FUNCTION {booklet} { output.bibitem format.authors output new.block format.title "title" output.check howpublished address new.block.checkb howpublished output address output format.date output new.block note output fin.entry } FUNCTION {inbook} { output.bibitem author empty$ { format.editors "author and editor" output.check } { format.authors output.nonnull crossref missing$ { "author and editor" editor either.or.check } 'skip$ if$ } if$ new.block format.btitle "title" output.check crossref missing$ { format.bvolume output format.chapter.pages "chapter and pages" output.check new.block format.number.series output new.sentence publisher "publisher" output.check address output } { format.chapter.pages "chapter and pages" output.check new.block format.book.crossref output.nonnull } if$ format.edition output format.date "year" output.check new.block note output fin.entry } FUNCTION {incollection} { output.bibitem format.authors "author" output.check new.block format.title "title" output.check new.block crossref missing$ { format.in.ed.booktitle "booktitle" output.check format.bvolume output format.number.series output format.chapter.pages output new.sentence publisher "publisher" output.check address output format.edition output format.date "year" output.check } { format.incoll.inproc.crossref output.nonnull format.chapter.pages output } if$ new.block note output fin.entry } FUNCTION {inproceedings} { output.bibitem format.authors "author" output.check new.block format.title "title" output.check new.block crossref missing$ { format.in.ed.booktitle "booktitle" output.check format.bvolume output format.number.series output format.pages output address empty$ { organization publisher new.sentence.checkb organization output publisher output format.date "year" output.check } { address output.nonnull format.date "year" output.check new.sentence organization output publisher output } if$ } { format.incoll.inproc.crossref output.nonnull format.pages output } if$ new.block note output fin.entry } FUNCTION {conference} { inproceedings } FUNCTION {manual} { output.bibitem author empty$ { organization empty$ 'skip$ { organization output.nonnull address output } if$ } { format.authors output.nonnull } if$ new.block format.btitle "title" output.check author empty$ { organization empty$ { address new.block.checka address output } 'skip$ if$ } { organization address new.block.checkb organization output address output } if$ format.edition output format.date output new.block note output fin.entry } FUNCTION {mastersthesis} { output.bibitem format.authors "author" output.check new.block format.title "title" output.check new.block "Master's thesis" format.thesis.type output.nonnull school "school" output.check address output format.date "year" output.check new.block note output fin.entry } FUNCTION {misc} { output.bibitem format.authors output title howpublished new.block.checkb format.title output howpublished new.block.checka howpublished output format.date output new.block note output fin.entry empty.misc.check } FUNCTION {phdthesis} { output.bibitem format.authors "author" output.check new.block format.btitle "title" output.check new.block "PhD thesis" format.thesis.type output.nonnull school "school" output.check address output format.date "year" output.check new.block note output fin.entry } FUNCTION {proceedings} { output.bibitem editor empty$ { organization output } { format.editors output.nonnull } if$ new.block format.btitle "title" output.check format.bvolume output format.number.series output address empty$ { editor empty$ { publisher new.sentence.checka } { organization publisher new.sentence.checkb organization output } if$ publisher output format.date "year" output.check } { address output.nonnull format.date "year" output.check new.sentence editor empty$ 'skip$ { organization output } if$ publisher output } if$ new.block note output fin.entry } FUNCTION {techreport} { output.bibitem format.authors "author" output.check new.block format.title "title" output.check new.block format.tr.number output.nonnull institution "institution" output.check address output format.date "year" output.check new.block note output fin.entry } FUNCTION {unpublished} { output.bibitem format.authors "author" output.check new.block format.title "title" output.check new.block note "note" output.check format.date output fin.entry } FUNCTION {default.type} { misc } MACRO {jan} {"January"} MACRO {feb} {"February"} MACRO {mar} {"March"} MACRO {apr} {"April"} MACRO {may} {"May"} MACRO {jun} {"June"} MACRO {jul} {"July"} MACRO {aug} {"August"} MACRO {sep} {"September"} MACRO {oct} {"October"} MACRO {nov} {"November"} MACRO {dec} {"December"} MACRO {acmcs} {"ACM Computing Surveys"} MACRO {acta} {"Acta Informatica"} MACRO {cacm} {"Communications of the ACM"} MACRO {ibmjrd} {"IBM Journal of Research and Development"} MACRO {ibmsj} {"IBM Systems Journal"} MACRO {ieeese} {"IEEE Transactions on Software Engineering"} MACRO {ieeetc} {"IEEE Transactions on Computers"} MACRO {ieeetcad} {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"} MACRO {ipl} {"Information Processing Letters"} MACRO {jacm} {"Journal of the ACM"} MACRO {jcss} {"Journal of Computer and System Sciences"} MACRO {scp} {"Science of Computer Programming"} MACRO {sicomp} {"SIAM Journal on Computing"} MACRO {tocs} {"ACM Transactions on Computer Systems"} MACRO {tods} {"ACM Transactions on Database Systems"} MACRO {tog} {"ACM Transactions on Graphics"} MACRO {toms} {"ACM Transactions on Mathematical Software"} MACRO {toois} {"ACM Transactions on Office Information Systems"} MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"} MACRO {tcs} {"Theoretical Computer Science"} READ FUNCTION {sortify} { purify$ "l" change.case$ } INTEGERS { len } FUNCTION {chop.word} { 's := 'len := s #1 len substring$ = { s len #1 + global.max$ substring$ } 's if$ } FUNCTION {sort.format.names} { 's := #1 'nameptr := "" s num.names$ 'numnames := numnames 'namesleft := { namesleft #0 > } { nameptr #1 > { " " * } 'skip$ if$ s nameptr "{vv{ } }{ll{ }}{ ff{ }}{ jj{ }}" format.name$ 't := nameptr numnames = t "others" = and { "et al" * } { t sortify * } if$ nameptr #1 + 'nameptr := namesleft #1 - 'namesleft := } while$ } FUNCTION {sort.format.title} { 't := "A " #2 "An " #3 "The " #4 t chop.word chop.word chop.word sortify #1 global.max$ substring$ } FUNCTION {author.sort} { author empty$ { key empty$ { "to sort, need author or key in " cite$ * warning$ "" } { key sortify } if$ } { author sort.format.names } if$ } FUNCTION {author.editor.sort} { author empty$ { editor empty$ { key empty$ { "to sort, need author, editor, or key in " cite$ * warning$ "" } { key sortify } if$ } { editor sort.format.names } if$ } { author sort.format.names } if$ } FUNCTION {author.organization.sort} { author empty$ { organization empty$ { key empty$ { "to sort, need author, organization, or key in " cite$ * warning$ "" } { key sortify } if$ } { "The " #4 organization chop.word sortify } if$ } { author sort.format.names } if$ } FUNCTION {editor.organization.sort} { editor empty$ { organization empty$ { key empty$ { "to sort, need editor, organization, or key in " cite$ * warning$ "" } { key sortify } if$ } { "The " #4 organization chop.word sortify } if$ } { editor sort.format.names } if$ } FUNCTION {presort} { type$ "book" = type$ "inbook" = or 'author.editor.sort { type$ "proceedings" = 'editor.organization.sort { type$ "manual" = 'author.organization.sort 'author.sort if$ } if$ } if$ " " * year field.or.null sortify * " " * title field.or.null sort.format.title * #1 entry.max$ substring$ 'sort.key$ := } ITERATE {presort} SORT STRINGS { longest.label } INTEGERS { number.label longest.label.width } FUNCTION {initialize.longest.label} { "" 'longest.label := #1 'number.label := #0 'longest.label.width := } FUNCTION {longest.label.pass} { number.label int.to.str$ 'label := number.label #1 + 'number.label := label width$ longest.label.width > { label 'longest.label := label width$ 'longest.label.width := } 'skip$ if$ } EXECUTE {initialize.longest.label} ITERATE {longest.label.pass} FUNCTION {begin.bib} { preamble$ empty$ 'skip$ { preamble$ write$ newline$ } if$ "\begin{thebibliography}{" longest.label * "}" * write$ newline$ } EXECUTE {begin.bib} EXECUTE {init.state.consts} ITERATE {call.type$} FUNCTION {end.bib} { newline$ "\end{thebibliography}" write$ newline$ } EXECUTE {end.bib} menhir-20171222/doc/whizzy.sty0000664000175000017500000000013713217215730016366 0ustar fpottierfpottier% Use small pages when whizzytex'ing. \makeatletter \setlength\textheight{340\p@} \makeatother menhir-20171222/doc/whizzy.el0000664000175000017500000000035413217215730016150 0ustar fpottierfpottier(whizzy-add-configuration ".*\.\\(tex\\|sty\\)" '((whizzy-master . "main.tex")) ) (whizzy-add-configuration "main\.tex" '((whizzy . "section -advi \"advi -geometry 1270x1024 -fullwidth -html Start-Document\" -dvicopy dvicopy" )) ) menhir-20171222/doc/declarations.mly0000664000175000017500000000035713217215730017460 0ustar fpottierfpottier%token ID ARROW LPAREN RPAREN COLON SEMICOLON %start program %% typ0: ID | LPAREN typ1 RPAREN {} typ1: typ0 | typ0 ARROW typ1 {} declaration: ID COLON typ1 {} program: | LPAREN declaration RPAREN | declaration SEMICOLON {} menhir-20171222/doc/version.tex0000664000175000017500000000003613217215730016466 0ustar fpottierfpottier\gdef\menhirversion{20171222} menhir-20171222/doc/fppdf.sty0000664000175000017500000000244013217215730016120 0ustar fpottierfpottier% This tiny package invokes ``hyperref'' with appropriate options. % Three modes are provided: % if \fppdf is defined, we configure ``hyperref'' for PDF output. % otherwise, if WhizzyTeX is active, we do configure ``softref'' for producing DVI output % containing ``advi''-style hyperlinks. % otherwise, we configure nothing. \ProvidesPackage{fppdf} \@ifundefined{fppdf}{ \newcommand{\texorpdfstring}[2]{#1} \newcommand{\href}[2]{#2} \@ifundefined{WhizzyTeX}{ % PostScript output. \typeout{No hyperlinks.} }{ % WhizzyTeX output. \typeout{Hyperlinks in advi style.} % % Dfinissons les commandes \softlink et \softtarget, employes par locallabel, % de faon ce que les labels de preuves deviennent des hyperliens. % \edef\hyper@quote{\string"} \edef\hyper@sharp{\string#} \def \softlink #1#2{\special {html:}#2\special {html:}} \def \softtarget #1#2{\special {html:}#2\special {html:}} } }{ % PDF output. \typeout{Hyperlinks in pdflatex style.} \usepackage[bookmarks=true,bookmarksopen=true,colorlinks=true,linkcolor=blue,citecolor=blue,urlcolor=blue]{hyperref} \let\softlink\hyperlink \let\softtarget\hypertarget } menhir-20171222/doc/local.bib0000664000175000017500000001562113217215730016035 0ustar fpottierfpottier@String{acta = "Acta Informatica"} @String{aw = "Addison-Wesley"} @String{cacm = "Communications of the {ACM}"} @String{cc = "Compiler Construction (CC)"} @String{cup = "Cambridge University Press"} @String{entcs = "Electronic Notes in Theoretical Computer Science"} @String{spe = "Software: Practice and Experience"} @String{toplas = "ACM Transactions on Programming Languages and Systems"} @Misc{compcert-github, author = "Xavier Leroy", title = "The {CompCert C} verified compiler", year = "2014", howpublished = "\url{https://github.com/AbsInt/CompCert}", } @Misc{obelisk, author = {L\'elio Brun}, title = {Obelisk}, howpublished = {\url{https://github.com/Lelio-Brun/Obelisk}}, year = {2017}, } @Book{aho-86, author = "Alfred V. Aho and Ravi Sethi and Jeffrey D. Ullman", title = "Compilers: Principles, Techniques, and Tools", publisher = aw, year = "1986", } @Book{appel-tiger-98, author = "Andrew Appel", title = "Modern Compiler Implementation in {ML}", publisher = cup, year = "1998", URL = "http://www.cs.princeton.edu/~appel/modern/ml/", } @Article{bhamidipaty-proebsting-98, author = "Achyutram Bhamidipaty and Todd A. Proebsting", title = "Very Fast {YACC}-Compatible Parsers (For Very Little Effort)", journal = spe, year = "1998", volume = "28", number = "2", pages = "181--190", URL = "http://www.cs.arizona.edu/people/todd/papers/TR95-09.ps", } @Article{dencker-84, author = "Peter Dencker and Karl Dürre and Johannes Heuft", title = "Optimization of parser tables for portable compilers", journal = toplas, volume = "6", number = "4", year = "1984", pages = "546--572", URL = "http://doi.acm.org/10.1145/1780.1802", } @Article{deremer-pennello-82, author = "Frank DeRemer and Thomas Pennello", title = "Efficient Computation of ${LALR}(1)$ Look-Ahead Sets", journal = toplas, volume = "4", number = "4", year = "1982", pages = "615--649", URL = "http://doi.acm.org/10.1145/69622.357187", } @Manual{bison, title = "Bison", author = "Charles Donnelly and Richard Stallman", year = "2015", URL = "http://www.gnu.org/software/bison/manual/", } @Book{hopcroft-motwani-ullman-00, author = "John E. Hopcroft and Rajeev Motwani and Jeffrey D. Ullman", title = "Introduction to Automata Theory, Languages, and Computation", publisher = aw, year = "2000", URL = "http://www-db.stanford.edu/~ullman/ialc.html", } @Article{horspool-faster-90, author = "R. Nigel Horspool and Michael Whitney", title = "Even Faster {LR} Parsing", journal = spe, year = "1990", volume = "20", number = "6", pages = "515--535", URL = "http://www.cs.uvic.ca/~nigelh/Publications/fastparse.pdf", } @Article{jeffery-03, author = "Clinton L. Jeffery", title = "Generating {LR} syntax error messages from examples", journal = toplas, volume = "25", number = "5", year = "2003", pages = "631--640", URL = "http://doi.acm.org/10.1145/937563.937566", } @InCollection{johnson-yacc-79, author = "Steven C. Johnson", title = "{Yacc}: Yet Another Compiler Compiler", booktitle = "{UNIX} Programmer's Manual", volume = "2", publisher = "Holt, Rinehart, and Winston", pages = "353--387", year = "1979", URL = "http://dinosaur.compilertools.net/", } @InProceedings{jourdan-leroy-pottier-12, author = "Jacques-Henri Jourdan and François Pottier and Xavier Leroy", title = "Validating ${LR}(1)$ Parsers", year = "2012", booktitle = esop, publisher = springer, series = lncs, volume = "7211", pages = "397--416", URL = "http://gallium.inria.fr/~fpottier/publis/jourdan-leroy-pottier-validating-parsers.pdf", } @Article{klint-laemmel-verhoef-05, author = "Paul Klint and Ralf L{\"a}mmel and Chris Verhoef", title = "Toward an engineering discipline for grammarware", journal = tosem, volume = "14", number = "3", year = "2005", pages = "331--380", URL = "http://www.few.vu.nl/~x/gw/gw.pdf", } @Article{knuth-lr-65, author = "Donald E. Knuth", title = "On the translation of languages from left to right", journal = "Information \& Control", year = "1965", volume = "8", number = "6", pages = "607--639", URL = "http://www.sciencedirect.com/science/article/pii/S0019995865904262", } @Misc{compcert, author = "Xavier Leroy", title = "The {CompCert C} compiler", year = "2015", howpublished = "\url{http://compcert.inria.fr/}", } @Misc{ocaml, author = "Xavier Leroy and Damien Doligez and Alain Frisch and Jacques Garrigue and Didier Rémy and Jérôme Vouillon", title = "The {OCaml} system: documentation and user's manual", year = "2016", URL = "http://caml.inria.fr/", } @Article{pager-77, author = "David Pager", title = "A Practical General Method for Constructing ${LR}(k)$ Parsers", journal = acta, year = "1977", volume = "7", pages = "249--268", URL = "http://dx.doi.org/10.1007/BF00290336", } @InProceedings{pottier-reachability-cc-2016, author = "François Pottier", title = "Reachability and error diagnosis in {LR}(1) parsers", booktitle = cc, year = "2016", pages = "88--98", URL = "http://gallium.inria.fr/~fpottier/publis/fpottier-reachability-cc2016.pdf", } @Article{pottier-regis-gianas-typed-lr, author = "François Pottier and Yann {Régis-Gianas}", title = "Towards efficient, typed {LR} parsers", URL = "http://gallium.inria.fr/~fpottier/publis/fpottier-regis-gianas-typed-lr.pdf", year = "2006", pages = "155--180", journal = entcs, volume = "148", number = "2", } @Manual{tarditi-appel-00, title = "{ML-Yacc} User's Manual", author = "David R. Tarditi and Andrew W. Appel", year = "2000", URL = "http://www.smlnj.org/doc/ML-Yacc/", } @Article{tarjan-yao-79, author = "Robert Endre Tarjan and Andrew Chi-Chih Yao", title = "Storing a sparse table", journal = cacm, volume = "22", number = "11", year = "1979", pages = "606--611", URL = "http://doi.acm.org/10.1145/359168.359175", } menhir-20171222/src/0000775000175000017500000000000013217215730014302 5ustar fpottierfpottiermenhir-20171222/src/settings.ml0000664000175000017500000003610113217215730016475 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Printf (* ------------------------------------------------------------------------- *) (* Prepare for parsing the command line. *) type token_type_mode = | TokenTypeAndCode (* produce the definition of the [token] type and code for the parser *) | TokenTypeOnly (* produce the type definition only *) | CodeOnly of string (* produce the code only; import token type from specified module *) let token_type_mode = ref TokenTypeAndCode let tokentypeonly () = token_type_mode := TokenTypeOnly let is_uppercase_ascii c = c >= 'A' && c <= 'Z' let is_capitalized_ascii s = String.length s > 0 && is_uppercase_ascii s.[0] let codeonly m = if not (is_capitalized_ascii m) then begin (* Not using module [Error] to avoid a circular dependency. *) fprintf stderr "Error: %s is not a valid OCaml module name.\n" m; exit 1 end; token_type_mode := CodeOnly m let version = ref false type construction_mode = | ModeCanonical (* --canonical: canonical Knuth LR(1) automaton *) | ModeInclusionOnly (* --no-pager : states are merged when there is an inclusion relationship *) | ModePager (* normal mode: states are merged as per Pager's criterion *) | ModeLALR (* --lalr : states are merged as in an LALR generator, i.e. as soon as they have the same LR(0) core *) (* Note that --canonical overrides --no-pager. If both are specified, the result is a canonical automaton. *) let construction_mode = ref ModePager let explain = ref false let base = ref "" let dump = ref false let follow = ref false let graph = ref false let trace = ref false let noprefix = ref false type print_mode = | PrintNormal | PrintForOCamlyacc | PrintUnitActions of bool (* if true, declare unit tokens *) type preprocess_mode = | PMNormal (* preprocess and continue *) | PMOnlyPreprocess of print_mode (* preprocess, print grammar, stop *) let preprocess_mode = ref PMNormal let recovery = ref false let v () = dump := true; explain := true let infer = ref false let inline = ref true type ocamldep_mode = | OMNone (* do not invoke ocamldep *) | OMRaw (* invoke ocamldep and echo its raw output *) | OMPostprocess (* invoke ocamldep and postprocess its output *) let depend = ref OMNone let code_inlining = ref true let comment = ref false let ocamlc = ref "ocamlc" let ocamldep = ref "ocamldep" let logG, logA, logC = ref 0, ref 0, ref 0 let timings = ref false let filenames = ref StringSet.empty let no_stdlib = ref false (* By default, [stdlib_path] is [Installation.libdir], that is, the directory that was specified when Menhir was compiled. This is overridden by the environment variable $MENHIR_STDLIB, if it is defined, and by the --stdlib command line option, if present. *) let stdlib_path = ref Installation.libdir let () = try stdlib_path := Sys.getenv "MENHIR_STDLIB" with Not_found -> () let insert name = filenames := StringSet.add name !filenames let interpret = ref false let interpret_show_cst = ref false let interpret_error = ref false let table = ref false let inspection = ref false let coq = ref false let coq_no_complete = ref false let coq_no_actions = ref false let strict = ref false let fixedexc = ref false type suggestion = | SuggestNothing | SuggestCompFlags | SuggestLinkFlags of string (* "cmo" or "cmx" *) | SuggestWhereIsMenhirLibSource | SuggestUseOcamlfind let suggestion = ref SuggestNothing let ignored_unused_tokens = ref StringSet.empty let ignore_unused_token t = ignored_unused_tokens := StringSet.add t !ignored_unused_tokens let ignore_all_unused_tokens = ref false let ignore_all_unused_precedence_levels = ref false let list_errors = ref false let compile_errors = ref None let set_compile_errors filename = compile_errors := Some filename let compare_errors = ref [] let add_compare_errors filename = compare_errors := filename :: !compare_errors let update_errors = ref None let set_update_errors filename = update_errors := Some filename let echo_errors = ref None let set_echo_errors filename = echo_errors := Some filename let cmly = ref false let options = Arg.align [ "--base", Arg.Set_string base, " Specifies a base name for the output file(s)"; "--canonical", Arg.Unit (fun () -> construction_mode := ModeCanonical), " Construct a canonical Knuth LR(1) automaton"; "--cmly", Arg.Set cmly, " Write a .cmly file"; "--comment", Arg.Set comment, " Include comments in the generated code"; "--compare-errors", Arg.String add_compare_errors, " (used twice) Compare two .messages files."; "--compile-errors", Arg.String set_compile_errors, " Compile a .messages file to OCaml code."; "--coq", Arg.Set coq, " Generate a formally verified parser, in Coq"; "--coq-no-complete", Arg.Set coq_no_complete, " Do not generate a proof of completeness"; "--coq-no-actions", Arg.Set coq_no_actions, " Ignore semantic actions in the Coq output"; "--depend", Arg.Unit (fun () -> depend := OMPostprocess), " Invoke ocamldep and display dependencies"; "--dump", Arg.Set dump, " Write an .automaton file"; "--echo-errors", Arg.String set_echo_errors, " Echo the sentences in a .messages file"; "--error-recovery", Arg.Set recovery, " (no longer supported)"; "--explain", Arg.Set explain, " Explain conflicts in .conflicts"; "--external-tokens", Arg.String codeonly, " Import token type definition from "; "--fixed-exception", Arg.Set fixedexc, " Declares Error = Parsing.Parse_error"; "--follow-construction", Arg.Set follow, " (undocumented)"; "--graph", Arg.Set graph, " Write a dependency graph to a .dot file"; "--infer", Arg.Set infer, " Invoke ocamlc to do type inference"; "--inspection", Arg.Set inspection, " Generate the inspection API"; "--interpret", Arg.Set interpret, " Interpret the sentences provided on stdin"; "--interpret-show-cst", Arg.Set interpret_show_cst, " Show a concrete syntax tree upon acceptance"; "--interpret-error", Arg.Set interpret_error, " Interpret an error sentence"; "--lalr", Arg.Unit (fun () -> construction_mode := ModeLALR), " Construct an LALR(1) automaton"; "--list-errors", Arg.Set list_errors, " Produce a list of erroneous inputs"; "--log-automaton", Arg.Set_int logA, " Log information about the automaton"; "--log-code", Arg.Set_int logC, " Log information about the generated code"; "--log-grammar", Arg.Set_int logG, " Log information about the grammar"; "--no-code-inlining", Arg.Clear code_inlining, " (undocumented)"; "--no-inline", Arg.Clear inline, " Ignore the %inline keyword."; "--no-pager", Arg.Unit (fun () -> if !construction_mode = ModePager then construction_mode := ModeInclusionOnly), " (undocumented)"; "--no-prefix", Arg.Set noprefix, " (undocumented)"; "--no-stdlib", Arg.Set no_stdlib, " Do not load the standard library"; "--ocamlc", Arg.Set_string ocamlc, " Specifies how ocamlc should be invoked"; "--ocamldep", Arg.Set_string ocamldep, " Specifies how ocamldep should be invoked"; "--only-preprocess", Arg.Unit (fun () -> preprocess_mode := PMOnlyPreprocess PrintNormal), " Print grammar and exit"; "--only-preprocess-for-ocamlyacc", Arg.Unit (fun () -> preprocess_mode := PMOnlyPreprocess PrintForOCamlyacc), " Print grammar in ocamlyacc format and exit"; "--only-preprocess-u", Arg.Unit (fun () -> preprocess_mode := PMOnlyPreprocess (PrintUnitActions false)), " Print grammar with unit actions and exit"; "--only-preprocess-uu", Arg.Unit (fun () -> preprocess_mode := PMOnlyPreprocess (PrintUnitActions true)), " Print grammar with unit actions & tokens"; "--only-tokens", Arg.Unit tokentypeonly, " Generate token type definition only, no code"; "--raw-depend", Arg.Unit (fun () -> depend := OMRaw), " Invoke ocamldep and echo its raw output"; "--stdlib", Arg.Set_string stdlib_path, " Specify where the standard library lies"; "--strict", Arg.Set strict, " Warnings about the grammar are errors"; "--suggest-comp-flags", Arg.Unit (fun () -> suggestion := SuggestCompFlags), " Suggest compilation flags for ocaml{c,opt}"; "--suggest-link-flags-byte", Arg.Unit (fun () -> suggestion := SuggestLinkFlags "cmo"), " Suggest link flags for ocamlc"; "--suggest-link-flags-opt", Arg.Unit (fun () -> suggestion := SuggestLinkFlags "cmx"), " Suggest link flags for ocamlopt"; "--suggest-menhirLib", Arg.Unit (fun () -> suggestion := SuggestWhereIsMenhirLibSource), " Suggest where is MenhirLib"; "--suggest-ocamlfind", Arg.Unit (fun () -> suggestion := SuggestUseOcamlfind), " Show if Menhir was installed using ocamlfind"; "--table", Arg.Set table, " Use the table-based back-end"; "--timings", Arg.Set timings, " Display internal timings"; "--trace", Arg.Set trace, " Generate tracing instructions"; "--unused-precedence-levels", Arg.Set ignore_all_unused_precedence_levels, " Do not warn about unused precedence levels"; "--unused-token", Arg.String ignore_unused_token, " Do not warn that is unused"; "--unused-tokens", Arg.Set ignore_all_unused_tokens, " Do not warn about any unused token"; "--update-errors", Arg.String set_update_errors, " Update auto-comments in a .messages file"; "--version", Arg.Set version, " Show version number and exit"; "-b", Arg.Set_string base, " Synonymous with --base "; "-lg", Arg.Set_int logG, " Synonymous with --log-grammar"; "-la", Arg.Set_int logA, " Synonymous with --log-automaton"; "-lc", Arg.Set_int logC, " Synonymous with --log-code"; "-t", Arg.Set table, " Synonymous with --table"; "-v", Arg.Unit v, " Synonymous with --dump --explain"; ] let usage = sprintf "Usage: %s " Sys.argv.(0) (* ------------------------------------------------------------------------- *) (* Parse the command line. *) let () = Arg.parse options insert usage (* ------------------------------------------------------------------------- *) (* If required, print a version number and stop. *) let () = if !version then begin printf "menhir, version %s\n" Version.version; exit 0 end (* ------------------------------------------------------------------------- *) (* Menhir is able to suggest compile and link flags to be passed to the OCaml compilers. If required, do so and stop. *) (* If [--table] is not passed, no flags are necessary. If [--table] is passed, then [MenhirLib] needs to be visible (at compile time) and linked in (at link time). This is done either via [ocamlfind], if it was available at installation time, or manually. *) (* The compilation flags are in fact meant to be used both at compile- and link-time. *) let () = match !suggestion with | SuggestNothing -> () | SuggestCompFlags -> if !table then if Installation.ocamlfind then printf "-package menhirLib\n%!" else printf "-I %s\n%!" Installation.libdir; exit 0 | SuggestLinkFlags extension -> if !table then if Installation.ocamlfind then printf "-linkpkg\n%!" else printf "menhirLib.%s\n%!" extension; exit 0 | SuggestWhereIsMenhirLibSource -> if Installation.ocamlfind then let _ = Sys.command "ocamlfind query menhirLib" in () else printf "%s\n%!" Installation.libdir; exit 0 | SuggestUseOcamlfind -> printf "%b\n" Installation.ocamlfind; exit 0 (* ------------------------------------------------------------------------- *) (* Export the settings. *) let stdlib_filename = !stdlib_path ^ "/standard.mly" let filenames = StringSet.elements !filenames let base = if !base = "" then match filenames with | [] -> fprintf stderr "%s\n" usage; exit 1 | [ filename ] -> Filename.chop_suffix filename (if !coq then ".vy" else ".mly") | _ -> fprintf stderr "Error: you must specify --base when providing multiple input files.\n"; exit 1 else !base let filenames = if !no_stdlib || !coq then filenames else stdlib_filename :: filenames let token_type_mode = !token_type_mode let construction_mode = !construction_mode let explain = !explain let dump = !dump let follow = !follow let graph = !graph let trace = !trace let () = if !recovery then begin fprintf stderr "Error: --error-recovery mode is no longer supported.\n"; exit 1 end let noprefix = !noprefix let infer = !infer let code_inlining = !code_inlining let depend = !depend let inline = !inline let comment = !comment let preprocess_mode = !preprocess_mode let ocamlc = !ocamlc let ocamldep = !ocamldep let logG, logA, logC = !logG, !logA, !logC let timings = !timings let interpret = !interpret let interpret_show_cst = !interpret_show_cst let interpret_error = !interpret_error let table = !table let inspection = !inspection let () = if inspection && not table then begin fprintf stderr "Error: --inspection requires --table.\n"; exit 1 end let coq = !coq let coq_no_complete = !coq_no_complete let coq_no_actions = !coq_no_actions let strict = !strict let fixedexc = !fixedexc let ignored_unused_tokens = !ignored_unused_tokens let ignore_all_unused_tokens = !ignore_all_unused_tokens let ignore_all_unused_precedence_levels = !ignore_all_unused_precedence_levels let list_errors = !list_errors let compile_errors = !compile_errors let compare_errors = match !compare_errors with | [] -> None | [ filename2; filename1 ] -> (* LIFO *) Some (filename1, filename2) | _ -> eprintf "To compare two .messages files, please use:\n\ --compare-errors --compare-errors .\n"; exit 1 let update_errors = !update_errors let echo_errors = !echo_errors let cmly = !cmly menhir-20171222/src/IO.mli0000664000175000017500000000414713217215730015322 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* Input-output utilities. *) (* [try/finally] has the same semantics as in Java. *) val try_finally : (unit -> 'a) -> (unit -> unit) -> 'a (* [moving_away filename action] moves the file [filename] away (if it exists), performs [action], then moves the file back into place (if it was moved away). *) val moving_away: string -> (unit -> 'a) -> 'a (* [with_file filename creation action] creates the file [filename] by running [creation], then runs [action], and ensures that the file is removed in the end. *) val with_file: string -> (unit -> unit) -> (unit -> 'a) -> 'a (* [exhaust channel] reads all of the data that's available on [channel]. It does not assume that the length of the data is known ahead of time. It does not close the channel. *) val exhaust: in_channel -> string (* [invoke command] invokes an external command (which expects no input) and returns its output, if the command succeeds. It returns [None] if the command fails. *) val invoke: string -> string option (* [read_whole_file filename] reads the file [filename] in text mode and returns its contents as a string. *) val read_whole_file: string -> string menhir-20171222/src/lr0.ml0000664000175000017500000004576213217215730015347 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar module InfiniteArray = MenhirLib.InfiniteArray (* ------------------------------------------------------------------------ *) (* Symbolic lookahead information. *) (* A symbolic lookahead set consists of an actual concrete set of terminal symbols and of a number of set variables. Set variables as encoded as integers. *) module SymbolicLookahead = struct type t = TerminalSet.t * CompressedBitSet.t let constant toks = (toks, CompressedBitSet.empty) let empty = constant TerminalSet.empty let union (toks1, vars1) ((toks2, vars2) as s2) = let toks = TerminalSet.union toks1 toks2 and vars = CompressedBitSet.union vars1 vars2 in if toks2 == toks && vars2 == vars then s2 else (toks, vars) let variable (var : int) : t = (TerminalSet.empty, CompressedBitSet.singleton var) let project (toks, vars) = assert (CompressedBitSet.is_empty vars); toks end (* We will perform closure operations over symbolic lookahead sets. This allows us to later represent LR(1) states as pairs of an LR(0) node number and an array of concrete lookahead sets. *) module SymbolicClosure = Item.Closure(SymbolicLookahead) (* Closure operations over concrete lookahead sets are also used (when explaining conflicts). One could take another instance of the functor. The approach below is somewhat less elegant and makes each call to [closure] somewhat slower, but saves the cost of instantiating the functor again -- which is linear in the size of the grammar. *) type concretelr1state = TerminalSet.t Item.Map.t let closure (state : concretelr1state) : concretelr1state = Item.Map.map SymbolicLookahead.project (SymbolicClosure.closure (Item.Map.map SymbolicLookahead.constant state)) (* ------------------------------------------------------------------------ *) (* Finding which non-epsilon transitions leave a set of items. This code is parametric in the nature of lookahead sets. *) let transitions (state : 'a Item.Map.t) : 'a Item.Map.t SymbolMap.t = Item.Map.fold (fun item toks transitions -> match Item.classify item with | Item.Shift (symbol, item') -> let items : 'a Item.Map.t = try SymbolMap.find symbol transitions with Not_found -> Item.Map.empty in SymbolMap.add symbol (Item.Map.add item' toks items) transitions | Item.Reduce _ -> transitions ) state SymbolMap.empty (* ------------------------------------------------------------------------ *) (* Determining the reduction opportunities at a (closed) state. They are represented as a list of pairs of a lookahead set and a production index. This code is again parametric in the nature of lookahead sets. *) let reductions (state : 'a Item.Map.t) : ('a * Production.index) list = Item.Map.fold (fun item toks accu -> match Item.classify item with | Item.Reduce prod -> (toks, prod) :: accu | Item.Shift _ -> accu ) state [] (* ------------------------------------------------------------------------ *) (* Construction of the the LR(0) automaton. *) (* Nodes are numbered sequentially. *) type node = int (* A symbolic transition is a pair of the target state number and an array of symbolic lookahead sets. The variables in these sets are numbered in [0,g) where g is the number of items in the source LR(0) state. Items are numbered in the order of presentation by [Item.Set.fold]. *) type symbolic_transition_target = node * SymbolicLookahead.t array (* The automaton is represented by (growing) arrays of states (sets of items), symbolic transition information, and symbolic reduction information, indexed by node numbers. Conversely, a hash table maps states (sets of items) to node numbers. *) let n = ref 0 let states : Item.Set.t InfiniteArray.t = InfiniteArray.make Item.Set.empty let _transitions : symbolic_transition_target SymbolMap.t InfiniteArray.t = InfiniteArray.make SymbolMap.empty let _reductions : (SymbolicLookahead.t * Production.index) list InfiniteArray.t = InfiniteArray.make [] let map : (Item.Set.t, node) Hashtbl.t = Hashtbl.create 50021 let incoming : Symbol.t option InfiniteArray.t = InfiniteArray.make None (* The automaton is built depth-first. *) let rec explore (symbol : Symbol.t option) (state : Item.Set.t) : node = (* Find out whether this state was already explored. *) try Hashtbl.find map state with Not_found -> (* If not, create a new node. *) let k = !n in n := k + 1; InfiniteArray.set states k state; Hashtbl.add map state k; (* Record its incoming symbol. *) InfiniteArray.set incoming k symbol; (* Build a symbolic version of the current state, where each item is associated with a distinct lookahead set variable, numbered consecutively. *) let (_ : int), (symbolic_state : SymbolicClosure.state) = Item.Set.fold (fun item (i, symbolic_state) -> i+1, Item.Map.add item (SymbolicLookahead.variable i) symbolic_state ) state (0, Item.Map.empty) in (* Compute the symbolic closure. *) let closure = SymbolicClosure.closure symbolic_state in (* Compute symbolic information about reductions. *) InfiniteArray.set _reductions k (reductions closure); (* Compute symbolic information about the transitions, and, by dropping the symbolic lookahead information, explore the transitions to further LR(0) states. *) InfiniteArray.set _transitions k (SymbolMap.mapi (fun symbol symbolic_state -> let (k : node) = explore (Some symbol) (Item.Map.domain symbolic_state) in let lookahead : SymbolicLookahead.t array = Array.make (Item.Map.cardinal symbolic_state) SymbolicLookahead.empty in let (_ : int) = Item.Map.fold (fun _ s i -> lookahead.(i) <- s; i+1 ) symbolic_state 0 in ((k, lookahead) : symbolic_transition_target) ) (transitions closure)); k (* Creating a start state out of a start production. It contains a single item, consisting of the start production, at position 0. *) let start prod : Item.Set.t = Item.Set.singleton (Item.import (prod, 0)) (* This starts the construction of the automaton and records the entry nodes in an array. *) let entry : node ProductionMap.t = ProductionMap.start (fun prod -> explore None (start prod) ) let () = Hashtbl.clear map let n = !n let () = Error.logA 1 (fun f -> Printf.fprintf f "Built an LR(0) automaton with %d states.\n" n); Time.tick "Construction of the LR(0) automaton" (* ------------------------------------------------------------------------ *) (* Accessors. *) let items node : Item.Set.t = InfiniteArray.get states node let incoming_symbol node : Symbol.t option = InfiniteArray.get incoming node (* ------------------------------------------------------------------------ *) (* Help for building the LR(1) automaton. *) (* An LR(1) state is represented as a pair of an LR(0) state number and an array of concrete lookahead sets (whose length depends on the LR(0) state). *) type lr1state = node * TerminalSet.t array (* An encoded LR(1) state can be turned into a concrete representation, that is, a mapping of items to concrete lookahead sets. *) let export (k, toksr) = let (_ : int), items = Item.Set.fold (fun item (i, items) -> i+1, Item.Map.add item toksr.(i) items ) (InfiniteArray.get states k) (0, Item.Map.empty) in items (* Displaying a concrete state. *) let print_concrete leading (state : concretelr1state) = let buffer = Buffer.create 1024 in Item.Map.iter (fun item toks -> Printf.bprintf buffer "%s%s[ %s ]\n" leading (Item.print item) (TerminalSet.print toks) ) state; Buffer.contents buffer (* Displaying a state. By default, only the kernel is displayed, not the closure. *) let print leading state = print_concrete leading (export state) let print_closure leading state = print_concrete leading (closure (export state)) (* The core of an LR(1) state is the underlying LR(0) state. *) let core (k, _) = k (* A sanity check. *) let well_formed (k, toksr) = Array.length toksr = Item.Set.cardinal (InfiniteArray.get states k) (* An LR(1) start state is the combination of an LR(0) start state (which consists of a single item) with a singleton lookahead set that consists of the end-of-file pseudo-token. *) let start k = let state = (k, [| TerminalSet.singleton Terminal.sharp |]) in assert (well_formed state); state (* Interpreting a symbolic lookahead set with respect to a source state. The variables in the symbolic lookahead set (which are integers) are interpreted as indices into the state's array of concrete lookahead sets. The result is a concrete lookahead set. *) let interpret ((_, toksr) as state : lr1state) ((toks, vars) : SymbolicLookahead.t) : TerminalSet.t = assert (well_formed state); CompressedBitSet.fold (fun var toks -> assert (var >= 0 && var < Array.length toksr); TerminalSet.union toksr.(var) toks ) vars toks (* Out of an LR(1) state, one produces information about reductions and transitions. This is done in an efficient way by interpreting the precomputed symbolic information with respect to that state. *) let reductions ((k, _) as state : lr1state) : (TerminalSet.t * Production.index) list = List.map (fun (s, prod) -> interpret state s, prod ) (InfiniteArray.get _reductions k) let transitions ((k, _) as state : lr1state) : lr1state SymbolMap.t = SymbolMap.map (fun ((k, sr) : symbolic_transition_target) -> ((k, Array.map (interpret state) sr) : lr1state) ) (InfiniteArray.get _transitions k) let outgoing_symbols (k : node) : Symbol.t list = SymbolMap.domain (InfiniteArray.get _transitions k) let transition symbol ((k, _) as state : lr1state) : lr1state = let ((k, sr) : symbolic_transition_target) = try SymbolMap.find symbol (InfiniteArray.get _transitions k) with Not_found -> assert false (* no transition along this symbol *) in (k, Array.map (interpret state) sr) (* Equality of states. *) let equal ((k1, toksr1) as state1) ((k2, toksr2) as state2) = assert (k1 = k2 && well_formed state1 && well_formed state2); let rec loop i = if i = 0 then true else let i = i - 1 in (TerminalSet.equal toksr1.(i) toksr2.(i)) && (loop i) in loop (Array.length toksr1) (* Subsumption between states. *) let subsume ((k1, toksr1) as state1) ((k2, toksr2) as state2) = assert (k1 = k2 && well_formed state1 && well_formed state2); let rec loop i = if i = 0 then true else let i = i - 1 in (TerminalSet.subset toksr1.(i) toksr2.(i)) && (loop i) in loop (Array.length toksr1) (* This function determines whether two (core-equivalent) states are compatible, according to a criterion that is close to Pager's weak compatibility criterion. Pager's criterion guarantees that if a merged state has a potential conflict at [(i, j)] -- that is, some token [t] appears within the lookahead sets of both item [i] and item [j] -- then there exists a state in the canonical automaton that also has a potential conflict at [(i, j)] -- that is, some token [u] appears within the lookahead sets of both item [i] and item [j]. Note that [t] and [u] can be distinct. Pager has shown that his weak compatibility criterion is stable, that is, preserved by transitions and closure. This means that, if two states can be merged, then so can their successors. This is important, because merging two states means committing to merging their successors, even though we have not even built these successors yet. The criterion used here is a slightly more restrictive version of Pager's criterion, which guarantees equality of the tokens [t] and [u]. This is done essentially by applying Pager's original criterion on a token-wise basis. Pager's original criterion states that two states can be merged if the new state has no conflict or one of the original states has a conflict. Our more restrictive criterion states that two states can be merged if, for every token [t], the new state has no conflict at [t] or one of the original states has a conflict at [t]. This modified criterion is also stable. My experiments show that it is almost as effective in practice: out of more than a hundred real-world sample grammars, only one automaton was affected, and only one extra state appeared as a result of using the modified criterion. Its advantage is to potentially make conflict explanations easier: if there appears to be a conflict at [t], then some conflict at [t] can be explained. This was not true when using Pager's original criterion. *) let compatible (k1, toksr1) (k2, toksr2) = assert (k1 = k2); let n = Array.length toksr1 in (* Two states are compatible if and only if they are compatible at every pair (i, j), where i and j are distinct. *) let rec loopi i = if i = n then true else let toksr1i = toksr1.(i) and toksr2i = toksr2.(i) in let rec loopj j = if j = i then true else let toksr1j = toksr1.(j) and toksr2j = toksr2.(j) in (* The two states are compatible at (i, j) if every conflict token in the merged state already was a conflict token in one of the two original states. This could be written as follows: TerminalSet.subset (TerminalSet.inter (TerminalSet.union toksr1i toksr2i) (TerminalSet.union toksr1j toksr2j)) (TerminalSet.union (TerminalSet.inter toksr1i toksr1j) (TerminalSet.inter toksr2i toksr2j)) but is easily seen (on paper) to be equivalent to: *) TerminalSet.subset (TerminalSet.inter toksr2i toksr1j) (TerminalSet.union toksr1i toksr2j) && TerminalSet.subset (TerminalSet.inter toksr1i toksr2j) (TerminalSet.union toksr2i toksr1j) && loopj (j+1) in loopj 0 && loopi (i+1) in loopi 0 (* This function determines whether two (core-equivalent) states can be merged without creating an end-of-stream conflict, now or in the future. The rule is, if an item appears in one state with the singleton "#" as its lookahead set, then its lookahead set in the other state must contain "#". So, either the second lookahead set is also the singleton "#", and no end-of-stream conflict exists, or it is larger, and the second state already contains an end-of-stream conflict. Put another way, we do not want to merge two lookahead sets when one contains "#" alone and the other does not contain "#". I invented this rule to complement Pager's criterion. I believe, but I am not 100% sure, that it does indeed prevent end-of-stream conflicts and that it is stable. Thanks to Sébastien Hinderer for reporting the bug caused by the absence of this extra criterion. *) let eos_compatible (k1, toksr1) (k2, toksr2) = assert (k1 = k2); let n = Array.length toksr1 in let rec loop i = if i = n then true else let toks1 = toksr1.(i) and toks2 = toksr2.(i) in begin if TerminalSet.mem Terminal.sharp toks1 && TerminalSet.is_singleton toks1 then (* "#" is alone in one set: it must be a member of the other set. *) TerminalSet.mem Terminal.sharp toks2 else if TerminalSet.mem Terminal.sharp toks2 && TerminalSet.is_singleton toks2 then (* Symmetric condition. *) TerminalSet.mem Terminal.sharp toks1 else true end && loop (i+1) in loop 0 (* This function determines whether two (core-equivalent) states can be merged without creating spurious reductions on the [error] token. The rule is, we merge two states only if they agree on which reductions are permitted on the [error] token. Without this restriction, we might end up in a situation where we decide to introduce an [error] token into the input stream and perform a reduction, whereas a canonical LR(1) automaton, confronted with the same input string, would fail normally -- that is, it would introduce an [error] token into the input stream, but it would not be able to perform a reduction right away: the current state would be discarded. In the interest of more accurate (or sane, or predictable) error handling, I decided to introduce this restriction as of 20110124. This will cause an increase in the size of automata for grammars that use the [error] token. It might actually make the [error] token somewhat easier to use. Note that two sets can be in the subsumption relation and still be error-incompatible. Error-compatibility requires equality of the lookahead sets, restricted to [error]. Thanks to Didier Rémy for reporting a bug caused by the absence of this extra criterion. *) let error_compatible (k1, toksr1) (k2, toksr2) = assert (k1 = k2); let n = Array.length toksr1 in let rec loop i = if i = n then true else let toks1 = toksr1.(i) and toks2 = toksr2.(i) in begin if TerminalSet.mem Terminal.error toks1 then (* [error] is a member of one set: it must be a member of the other set. *) TerminalSet.mem Terminal.error toks2 else if TerminalSet.mem Terminal.error toks2 then (* Symmetric condition. *) TerminalSet.mem Terminal.error toks1 else true end && loop (i+1) in loop 0 (* Union of two states. The two states must have the same core. The new state is obtained by pointwise union of the lookahead sets. *) let union (k1, toksr1) (k2, toksr2) = assert (k1 = k2); k1, Array.init (Array.length toksr1) (fun i -> TerminalSet.union toksr1.(i) toksr2.(i) ) (* Restriction of a state to a set of tokens of interest. Every lookahead set is intersected with that set. *) let restrict toks (k, toksr) = k, Array.map (fun toksri -> TerminalSet.inter toksri toks ) toksr menhir-20171222/src/keywordExpansion.mli0000664000175000017500000000234013217215730020355 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open UnparameterizedSyntax (* [expand_grammar] expands away the keywords [$startpos] and [$endpos], as well the entire [ofs] family of keywords. Doing this early simplifies some aspects later on, in particular %inlining. *) val expand_grammar: grammar -> grammar menhir-20171222/src/Drop.ml0000664000175000017500000001333313217215730015543 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) let value = Positions.value (* The source. *) module S = Syntax (* The target. *) module T = UnparameterizedSyntax (* -------------------------------------------------------------------------- *) (* Most of the translation is straightforward. *) let drop_parameter (param : S.parameter) : S.symbol = match param with | S.ParameterVar sym -> value sym | S.ParameterApp _ -> (* The grammar should not have any parameterized symbols. *) assert false | S.ParameterAnonymous _ -> assert false let drop_producer ((id, param, attrs) : S.producer) : T.producer = { T.producer_identifier = value id; T.producer_symbol = drop_parameter param; T.producer_attributes = attrs } let drop_branch (branch : S.parameterized_branch) : T.branch = { T.branch_position = branch.S.pr_branch_position; T.producers = List.map drop_producer branch.S.pr_producers; T.action = branch.S.pr_action; T.branch_prec_annotation = branch.S.pr_branch_prec_annotation; T.branch_production_level = branch.S.pr_branch_production_level } let drop_rule (rule : S.parameterized_rule) : T.rule = (* The grammar should not have any parameterized symbols. *) assert (rule.S.pr_parameters = []); (* The [%public] flag is dropped. *) { T.branches = List.map drop_branch rule.S.pr_branches; T.positions = rule.S.pr_positions; T.inline_flag = rule.S.pr_inline_flag; T.attributes = rule.S.pr_attributes; } (* -------------------------------------------------------------------------- *) (* We must store [%type] declarations and [%on_error_reduce] declarations in StringMaps, whereas so far they were represented as lists. *) let drop_declarations (kind : string) (f : 'info1 -> 'info2) (decls : (S.parameter * 'info1) list) : 'info2 StringMap.t = (* Now is as good a time as any to check against multiple declarations concerning a single nonterminal symbol. Indeed, if we did not rule out this situation, then we would have to keep only one (arbitrarily chosen) declaration. To do this, we first build a map of symbols to info *and* position... *) List.fold_left (fun accu (param, info) -> let symbol = drop_parameter param in begin match StringMap.find symbol accu with | exception Not_found -> () | (_, position) -> Error.error [position; Parameters.position param] "there are multiple %s declarations for the symbol %s." kind symbol end; StringMap.add symbol (f info, Parameters.position param) accu ) StringMap.empty decls (* ... then drop the positions. *) |> StringMap.map (fun (info, _) -> info) let drop_type_declarations = drop_declarations "%type" value let drop_on_error_reduce_declarations = drop_declarations "%on_error_reduce" (fun x -> x) (* -------------------------------------------------------------------------- *) (* We must eliminate (that is, desugar) [%attribute] declarations. We examine them one by one and attach these attributes with terminal or nonterminal symbols, as appropriate. This is entirely straightforward. *) let add_attribute (g : T.grammar) param attr : T.grammar = let symbol = drop_parameter param in match StringMap.find symbol g.T.tokens with | props -> (* This is a terminal symbol. *) let props = { props with S.tk_attributes = attr :: props.S.tk_attributes } in { g with T.tokens = StringMap.add symbol props g.T.tokens } | exception Not_found -> match StringMap.find symbol g.T.rules with | rule -> (* This is a nonterminal symbol. *) let rule = { rule with T.attributes = attr :: rule.T.attributes } in { g with T.rules = StringMap.add symbol rule g.T.rules } | exception Not_found -> (* This is an unknown symbol. This should not happen. *) assert false let add_attributes g (params, attrs) = List.fold_left (fun g param -> List.fold_left (fun g attr -> add_attribute g param attr ) g attrs ) g params let add_attributes (decls : (S.parameter list * S.attributes) list) g = List.fold_left add_attributes g decls (* -------------------------------------------------------------------------- *) (* Putting it all together. *) let drop (g : S.grammar) : T.grammar = { T.preludes = g.S.p_preludes; T.postludes = g.S.p_postludes; T.parameters = g.S.p_parameters; T.start_symbols = StringMap.domain g.S.p_start_symbols; T.types = drop_type_declarations g.S.p_types; T.tokens = g.S.p_tokens; T.on_error_reduce = drop_on_error_reduce_declarations g.S.p_on_error_reduce; T.gr_attributes = g.S.p_grammar_attributes; T.rules = StringMap.map drop_rule g.S.p_rules } |> add_attributes g.S.p_symbol_attributes menhir-20171222/src/Unifier.ml0000664000175000017500000001446713217215730016251 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module provides a simple-minded implementation of first-order unification over an arbitrary signature. *) (* -------------------------------------------------------------------------- *) (* The signature must be described by the client, as follows. *) module type STRUCTURE = sig (* The type ['a structure] should be understood as a type of shallow terms whose leaves have type ['a]. *) type 'a structure val map: ('a -> 'b) -> 'a structure -> 'b structure val iter: ('a -> unit) -> 'a structure -> unit (* [iter2] fails if the head constructors differ. *) exception Iter2 val iter2: ('a -> 'b -> unit) -> 'a structure -> 'b structure -> unit end (* -------------------------------------------------------------------------- *) (* The unifier. *) module Make (S : STRUCTURE) = struct type 'a structure = 'a S.structure (* The data structure maintained by the unifier is as follows. *) (* A unifier variable is a point of the union-find algorithm. *) type variable = descriptor UnionFind.point and descriptor = { (* Every equivalence class carries a globally unique identifier. When a new equivalence class is created, a fresh identifier is chosen, and when two classes are merged, one of the two identifiers is kept. This identifier can be used as a key in a hash table. One should be aware, though, that identifiers are stable only as long as no unions are performed. *) id : int; (* Every equivalence class carries a structure, which is either [None], which means that the variable is just that, a variable; or [Some t], which means that the variable represents (has been equated with) the term [t]. *) structure : variable structure option; (* Every equivalence class carries a mutable mark, which is used only by the occurs check. We could also remove this field altogether and use a separate hash table, where [id]s serve as keys, but this should be faster. The occurs check is performed eagerly, so this could matter. *) mutable mark : Mark.t; } (* -------------------------------------------------------------------------- *) (* Accessors. *) let id v = (UnionFind.get v).id let structure v = (UnionFind.get v).structure (* -------------------------------------------------------------------------- *) (* [fresh] creates a fresh variable with specified structure. *) let fresh = let c = ref 0 in fun structure -> let id = Misc.postincrement c in let mark = Mark.none in UnionFind.fresh { id; structure; mark } (* -------------------------------------------------------------------------- *) (* [occurs_check x y] checks that [x] does not occur within [y]. *) exception Occurs of variable * variable let occurs_check x y = (* Generate a fresh color for this particular traversal. *) let black = Mark.fresh () in (* The traversal code -- a depth-first search. *) let rec visit z = let desc = UnionFind.get z in if not (Mark.same desc.mark black) then begin desc.mark <- black; (* We are looking for [x]. *) if UnionFind.equivalent x z then raise (Occurs (x, y)) else Option.iter (S.iter visit) desc.structure end in (* The root is [y]. *) visit y (* -------------------------------------------------------------------------- *) (* The internal function [unify v1 v2] equates the variables [v1] and [v2] and propagates the consequences of this equation until a cycle is detected, an inconsistency is found, or a solved form is reached. The exceptions that can be raised are [Occurs] and [S.Iter2]. *) let rec unify (v1 : variable) (v2 : variable) : unit = if not (UnionFind.equivalent v1 v2) then begin let desc1 = UnionFind.get v1 and desc2 = UnionFind.get v2 in (* Unify the two descriptors. *) let desc = match desc1.structure, desc2.structure with | None, None -> (* variable/variable *) desc1 | None, Some _ -> (* variable/term *) occurs_check v1 v2; desc2 | Some _, None -> (* term/variable *) occurs_check v2 v1; desc1 | Some s1, Some s2 -> (* term/term *) S.iter2 unify s1 s2; { desc1 with structure = Some s1 } in (* Merge the equivalence classes. Do this last, so we get more meaningful output if the recursive call (above) fails and we have to print the two terms. *) UnionFind.union v1 v2; UnionFind.set v1 desc end (* -------------------------------------------------------------------------- *) (* The public version of [unify]. *) exception Unify of variable * variable let unify v1 v2 = try unify v1 v2 with S.Iter2 -> raise (Unify (v1, v2)) (* -------------------------------------------------------------------------- *) (* Decoding an acyclic graph as a deep term. *) (* This is a simple-minded version of the code, where sharing is lost. Its cost could be exponential if there is a lot of sharing. In practice, its use is usually appropriate, especially in the scenario where the term is meant to be printed as a tree. *) type term = | TVar of int | TNode of term structure let rec decode (v : variable) : term = match structure v with | None -> TVar (id v) | Some t -> TNode (S.map decode t) (* -------------------------------------------------------------------------- *) end menhir-20171222/src/printer.ml0000664000175000017500000005154413217215730016330 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* A pretty-printer for [IL]. *) open IL module PreliminaryMake (X : sig (* We assume that the following types and functions are given. This allows us to work both with buffers of type [Buffer.t] and with output channels of type [out_channel]. *) type channel val fprintf: channel -> ('a, channel, unit) format -> 'a val output_substring: channel -> string -> int -> int -> unit (* This is the channel that is being written to. *) val f: channel (* [locate_stretches] controls the way we print OCaml stretches (types and semantic actions). If it is [Some dstfilename], where [dstfilename] is the name of the file that is being written, then we surround stretches with OCaml line number directives of the form # . If it is [None], then we don't. *) (* Providing line number directives allows the OCaml typechecker to report type errors in the .mly file, instead of in the generated .ml / .mli files. Line number directives also affect the dynamic semantics of any [assert] statements contained in semantic actions: when they are provided, the [Assert_failure] exception carries a location in the .mly file. As a general rule of thumb, line number directives should always be provided, except perhaps where we think that they decrease readability (e.g., in a generated .mli file). *) val locate_stretches: string option end) = struct open X let output_char f c = fprintf f "%c" c let output_string f s = fprintf f "%s" s let flush f = fprintf f "%!" (* ------------------------------------------------------------------------- *) (* Dealing with newlines and indentation. *) let maxindent = 120 let whitespace = String.make maxindent ' ' let indentation = ref 0 let line = ref 1 (* [rawnl] is, in principle, the only place where writing a newline character to the output channel is permitted. This ensures that the line counter remains correct. But see also [stretch] and [typ0]. *) let rawnl f = incr line; output_char f '\n' let nl f = rawnl f; output_substring f whitespace 0 !indentation let indent ofs producer f x = let old_indentation = !indentation in let new_indentation = old_indentation + ofs in if new_indentation <= maxindent then indentation := new_indentation; nl f; producer f x; indentation := old_indentation (* This produces a line number directive. *) let sharp f line file = fprintf f "%t# %d \"%s\"%t" rawnl line file rawnl (* ------------------------------------------------------------------------- *) (* Printers of atomic elements. *) let nothing _ = () let space f = output_char f ' ' let comma f = output_string f ", " let semi f = output_char f ';' let seminl f = semi f; nl f let times f = output_string f " * " let letrec f = output_string f "let rec " let letnonrec f = output_string f "let " let keytyp f = output_string f "type " let exc f = output_string f "exception " let et f = output_string f "and " let var f x = output_string f x let bar f = output_string f " | " (* ------------------------------------------------------------------------- *) (* List printers. *) (* A list with a separator in front of every element. *) let rec list elem sep f = function | [] -> () | e :: es -> fprintf f "%t%a%a" sep elem e (list elem sep) es (* A list with a separator between elements. *) let seplist elem sep f = function | [] -> () | e :: es -> fprintf f "%a%a" elem e (list elem sep) es (* OCaml type parameters. *) let typeparams p0 p1 f = function | [] -> () | [ param ] -> fprintf f "%a " p0 param | _ :: _ as params -> fprintf f "(%a) " (seplist p1 comma) params (* ------------------------------------------------------------------------- *) (* Expression printer. *) (* We use symbolic constants that stand for subsets of the expression constructors. We do not use numeric levels to stand for subsets, because our subsets do not form a linear inclusion chain. *) type subset = | All | AllButSeq | AllButFunTryMatch | AllButFunTryMatchSeq | AllButLetFunTryMatch | AllButLetFunTryMatchSeq | AllButIfThenSeq | OnlyAppOrAtom | OnlyAtom (* This computes the intersection of a subset with the constraint "should not be a sequence". *) let andNotSeq = function | All | AllButSeq -> AllButSeq | AllButFunTryMatch | AllButFunTryMatchSeq -> AllButFunTryMatchSeq | AllButLetFunTryMatch | AllButLetFunTryMatchSeq -> AllButLetFunTryMatchSeq | AllButIfThenSeq -> AllButIfThenSeq | OnlyAppOrAtom -> OnlyAppOrAtom | OnlyAtom -> OnlyAtom (* This defines the semantics of subsets by relating expressions with subsets. *) let rec member e k = match e with | EComment _ | EPatComment _ -> true | EFun _ | ETry _ | EMatch _ -> begin match k with | AllButFunTryMatch | AllButFunTryMatchSeq | AllButLetFunTryMatch | AllButLetFunTryMatchSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | ELet ([], e) -> member e k | ELet ((PUnit, _) :: _, _) -> begin match k with | AllButSeq | AllButFunTryMatchSeq | AllButLetFunTryMatchSeq | AllButIfThenSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | ELet (_ :: _, _) -> begin match k with | AllButLetFunTryMatch | AllButLetFunTryMatchSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | EIfThen _ -> begin match k with | AllButIfThenSeq | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | EApp (_, _ :: _) | EData (_, _ :: _) | EMagic _ | ERepr _ | ERaise _ -> begin match k with | OnlyAtom -> false | _ -> true end | ERecordWrite _ | EIfThenElse _ -> begin match k with | OnlyAppOrAtom | OnlyAtom -> false | _ -> true end | EVar _ | ETextual _ | EApp (_, []) | EData (_, []) | ETuple _ | EAnnot _ | ERecord _ | ERecordAccess (_, _) | EIntConst _ | EStringConst _ | EUnit | EArray _ | EArrayAccess (_, _) -> true let rec exprlet k pes f e2 = match pes with | [] -> exprk k f e2 | (PUnit, e1) :: pes -> fprintf f "%a%t%a" (exprk AllButLetFunTryMatch) e1 seminl (exprlet k pes) e2 | (PVar id1, EAnnot (e1, ts1)) :: pes -> (* TEMPORARY current ocaml does not support type schemes here; drop quantifiers, if any *) fprintf f "let %s : %a = %a in%t%a" id1 typ ts1.body (* scheme ts1 *) expr e1 nl (exprlet k pes) e2 | (PVar id1, EFun (ps1, e1)) :: pes -> fprintf f "let %s%a = %a in%t%t%a" id1 (list pat0 space) ps1 (indent 2 expr) e1 nl nl (exprlet k pes) e2 | (p1, (ELet _ as e1)) :: pes -> fprintf f "let %a =%a%tin%t%a" pat p1 (indent 2 expr) e1 nl nl (exprlet k pes) e2 | (p1, e1) :: pes -> fprintf f "let %a = %a in%t%a" pat p1 expr e1 nl (exprlet k pes) e2 and atom f e = exprk OnlyAtom f e and app f e = exprk OnlyAppOrAtom f e and expr f e = exprk All f e and exprk k f e = if member e k then match e with | EComment (c, e) -> if Settings.comment then fprintf f "(* %s *)%t%a" c nl (exprk k) e else exprk k f e | EPatComment (s, p, e) -> if Settings.comment then fprintf f "(* %s%a *)%t%a" s pat p nl (exprk k) e else exprk k f e | ELet (pes, e2) -> exprlet k pes f e2 | ERecordWrite (e1, field, e2) -> fprintf f "%a.%s <- %a" atom e1 field (exprk (andNotSeq k)) e2 | EMatch (_, []) -> assert false | EMatch (e, brs) -> fprintf f "match %a with%a" expr e (branches k) brs | ETry (_, []) -> assert false | ETry (e, brs) -> fprintf f "try%a%twith%a" (indent 2 expr) e nl (branches k) brs | EIfThen (e1, e2) -> fprintf f "if %a then%a" expr e1 (indent 2 (exprk (andNotSeq k))) e2 | EIfThenElse (e0, e1, e2) -> fprintf f "if %a then%a%telse%a" expr e0 (indent 2 (exprk AllButIfThenSeq)) e1 nl (indent 2 (exprk (andNotSeq k))) e2 | EFun (ps, e) -> fprintf f "fun%a ->%a" (list pat0 space) ps (indent 2 (exprk k)) e | EApp (EVar op, [ e1; e2 ]) when op.[0] = '(' && op.[String.length op - 1] = ')' -> let op = String.sub op 1 (String.length op - 2) in fprintf f "%a %s %a" app e1 op app e2 | EApp (e, args) -> fprintf f "%a%a" app e (list atom space) args | ERaise e -> fprintf f "raise %a" atom e | EMagic e -> fprintf f "Obj.magic %a" atom e | ERepr e -> fprintf f "Obj.repr %a" atom e | EData (d, []) -> var f d | EData (d, [ arg ]) -> fprintf f "%s %a" d atom arg | EData ("::", [ arg1; arg2 ]) -> (* Special case for infix cons. *) fprintf f "%a :: %a" atom arg1 atom arg2 | EData (d, (_ :: _ :: _ as args)) -> fprintf f "%s (%a)" d (seplist app comma) args | EVar v -> var f v | ETextual action -> stretch false f action | EUnit -> fprintf f "()" | EIntConst k -> if k >= 0 then fprintf f "%d" k else fprintf f "(%d)" k | EStringConst s -> fprintf f "\"%s\"" (String.escaped s) | ETuple [] -> assert false | ETuple [ e ] -> atom f e | ETuple (_ :: _ :: _ as es) -> fprintf f "(%a)" (seplist app comma) es | EAnnot (e, s) -> (* TEMPORARY current ocaml does not support type schemes here; drop quantifiers, if any *) fprintf f "(%a : %a)" app e typ s.body (* should be scheme s *) | ERecordAccess (e, field) -> fprintf f "%a.%s" atom e field | ERecord fs -> fprintf f "{%a%t}" (indent 2 (seplist field nl)) fs nl | EArray fs -> fprintf f "[|%a%t|]" (indent 2 (seplist array_field nl)) fs nl | EArrayAccess (e, i) -> fprintf f "%a.(%a)" atom e expr i else fprintf f "(%a)" expr e and stretch raw f stretch = let content = stretch.Stretch.stretch_content and raw_content = stretch.Stretch.stretch_raw_content in match X.locate_stretches with | Some basename -> sharp f stretch.Stretch.stretch_linenum stretch.Stretch.stretch_filename; output_string f content; line := !line + stretch.Stretch.stretch_linecount; sharp f (!line + 2) basename; output_substring f whitespace 0 !indentation | None -> output_string f (if raw then raw_content else content) and branches k f = function | [] -> () | [ br ] -> fprintf f "%t| %a" nl (branch k) br | br :: brs -> fprintf f "%t| %a%a" nl (branch AllButFunTryMatch) br (branches k) brs and branch k f br = fprintf f "%a ->%a" pat br.branchpat (indent 4 (exprk k)) br.branchbody and field f (label, e) = fprintf f "%s = %a%t" label app e semi and fpat f (label, p) = fprintf f "%s = %a%t" label pat p semi and array_field f e = fprintf f "%a%t" app e semi and pat0 f = function | PUnit -> fprintf f "()" | PWildcard -> fprintf f "_" | PVar x -> var f x | PData (d, []) -> var f d | PTuple [] -> assert false | PTuple [ p ] -> pat0 f p | PTuple (_ :: _ :: _ as ps) -> fprintf f "(%a)" (seplist pat1 comma) ps | PAnnot (p, t) -> fprintf f "(%a : %a)" pat p typ t | PRecord fps -> (* In a record pattern, fields can be omitted. *) let fps = List.filter (function (_, PWildcard) -> false | _ -> true) fps in fprintf f "{%a%t}" (indent 2 (seplist fpat nl)) fps nl | p -> fprintf f "(%a)" pat p and pat1 f = function | PData (d, [ arg ]) -> fprintf f "%s %a" d pat0 arg | PData (d, (_ :: _ :: _ as args)) -> fprintf f "%s (%a)" d (seplist pat1 comma) args | PTuple [ p ] -> pat1 f p | p -> pat0 f p and pat2 f = function | POr [] -> assert false | POr (_ :: _ as ps) -> seplist pat2 bar f ps | PTuple [ p ] -> pat2 f p | p -> pat1 f p and pat f p = pat2 f p and typevar f = function | "_" -> fprintf f "_" | v -> fprintf f "'%s" v and typ0 f = function | TypTextual (Stretch.Declared ocamltype) -> (* Parentheses are necessary to avoid confusion between 1 - ary data constructor with n arguments and n - ary data constructor. *) fprintf f "(%a)" (stretch true) ocamltype | TypTextual (Stretch.Inferred t) -> line := !line + LineCount.count 0 (Lexing.from_string t); fprintf f "(%s)" t | TypVar v -> typevar f v | TypApp (t, params) -> fprintf f "%a%s" (typeparams typ0 typ) params t | t -> fprintf f "(%a)" typ t and typ1 f = function | TypTuple [] -> assert false | TypTuple (_ :: _ as ts) -> seplist typ0 times f ts | t -> typ0 f t and typ2 f = function | TypArrow (t1, t2) -> fprintf f "%a -> %a" typ1 t1 typ2 t2 | t -> typ1 f t and typ f = typ2 f and scheme f scheme = match scheme.quantifiers with | [] -> typ f scheme.body | qs -> fprintf f "%a. %a" (list typevar space) qs typ scheme.body (* ------------------------------------------------------------------------- *) (* Toplevel definition printer. *) (* The tuple of the arguments of a data constructor. *) let datavalparams f params = (* [typ1] because [type t = A of int -> int ] is not allowed by OCaml *) (* [type t = A of (int -> int)] is allowed *) seplist typ1 times f params (* A data constructor definition. *) let datadef typename f def = fprintf f " | %s" def.dataname; match def.datavalparams, def.datatypeparams with | [], None -> (* | A *) () | _ :: _, None -> (* | A of t * u *) fprintf f " of %a" datavalparams def.datavalparams | [], Some indices -> (* | A : (v, w) ty *) fprintf f " : %a%s" (typeparams typ0 typ) indices typename | _ :: _, Some indices -> (* | A : t * u -> (v, w) ty *) fprintf f " : %a -> %a%s" datavalparams def.datavalparams (typeparams typ0 typ) indices typename let fielddef f def = fprintf f " %s%s: %a" (if def.modifiable then "mutable " else "") def.fieldname scheme def.fieldtype let typerhs typename f = function | TDefRecord [] -> assert false | TDefRecord (_ :: _ as fields) -> fprintf f " = {%t%a%t}" nl (seplist fielddef seminl) fields nl | TDefSum [] -> () | TDefSum defs -> fprintf f " = %a" (list (datadef typename) nl) defs | TAbbrev t -> fprintf f " = %a" typ t let typeconstraint f = function | None -> () | Some (t1, t2) -> fprintf f "%tconstraint %a = %a" nl typ t1 typ t2 let typedef f def = fprintf f "%a%s%a%a" (typeparams typevar typevar) def.typeparams def.typename (typerhs def.typename) def.typerhs typeconstraint def.typeconstraint let rec pdefs pdef sep1 sep2 f = function | [] -> () | [ def ] -> fprintf f "%t%a" sep1 pdef def | def :: defs -> fprintf f "%t%a%t%t%a" sep1 pdef def (* Separate two successive items with two newlines. *) nl nl (pdefs pdef sep2 sep2) defs let valdef f = function | { valpat = PVar id; valval = EAnnot (e, ts) } -> (* TEMPORARY current ocaml does not support type schemes here; drop quantifiers, if any *) fprintf f "%s : %a =%a" id typ ts.body (* scheme ts *) (indent 2 expr) e | { valpat = p; valval = e } -> fprintf f "%a =%a" pat p (indent 2 expr) e let valdefs recursive = pdefs valdef (if recursive then letrec else letnonrec) et let typedefs = pdefs typedef keytyp et let excdef in_intf f def = match in_intf, def.exceq with | _, None | true, Some _ -> fprintf f "%s" def.excname | false, Some s -> fprintf f "%s = %s" def.excname s let excdefs in_intf = pdefs (excdef in_intf) exc exc let block format body f b = fprintf f format (fun f b -> indent 2 body f b; nl f ) b (* Convention: each structure (or interface) item prints a newline before and after itself. *) let rec structure_item f item = match item with | SIFunctor ([], s) -> structure f s | SIStretch stretches -> List.iter (stretch false f) stretches | _ -> nl f; begin match item with | SIFunctor (params, s) -> fprintf f "module Make%a%t= %a" (list (stretch false) nl) params nl structend s | SIExcDefs defs -> excdefs false f defs | SITypeDefs defs -> typedefs f defs | SIValDefs (recursive, defs) -> valdefs recursive f defs | SIStretch _ -> assert false (* already handled above *) | SIModuleDef (name, rhs) -> fprintf f "module %s = %a" name modexpr rhs | SIInclude e -> fprintf f "include %a" modexpr e | SIComment comment -> fprintf f "(* %s *)" comment end; nl f and structend f s = block "struct%aend" structure f s and structure f s = list structure_item nothing f s and modexpr f = function | MVar x -> fprintf f "%s" x | MStruct s -> structend f s | MApp (e1, e2) -> fprintf f "%a (%a)" modexpr e1 modexpr e2 let valdecl f (x, ts) = fprintf f "val %s: %a" x typ ts.body let with_kind f = function | WKNonDestructive -> output_string f "=" | WKDestructive -> output_string f ":=" let rec module_type f = function | MTNamedModuleType s -> output_string f s | MTWithType (mt, params, name, wk, t) -> fprintf f "%a%a" module_type mt (indent 2 with_type) (params, name, wk, t) | MTSigEnd i -> sigend f i and with_type f (params, name, wk, t) = fprintf f "with type %a %a %a" typ (TypApp (name, List.map (fun v -> TypVar v) params)) with_kind wk typ t and interface_item f item = match item with | IIFunctor ([], i) -> interface f i | _ -> nl f; begin match item with | IIFunctor (params, i) -> fprintf f "module Make%a%t: %a" (list (stretch false) nl) params nl sigend i | IIExcDecls defs -> excdefs true f defs | IITypeDecls defs -> typedefs f defs | IIValDecls decls -> pdefs valdecl nothing nothing f decls | IIInclude mt -> fprintf f "include %a" module_type mt | IIModule (name, mt) -> fprintf f "module %s : %a" name module_type mt | IIComment comment -> fprintf f "(* %s *)" comment end; nl f and sigend f i = block "sig%aend" interface f i and interface f i = list interface_item nothing f i let program s = structure X.f s; flush X.f let interface i = interface X.f i; flush X.f let expr e = expr X.f e; flush X.f end (* ------------------------------------------------------------------------- *) (* Instantiation with output channels. *) module Make (X : sig val f: out_channel val locate_stretches: string option end) = struct include PreliminaryMake(struct type channel = out_channel include X let fprintf = Printf.fprintf let output_substring = output_substring end) end (* ------------------------------------------------------------------------- *) (* Instantiation with buffers. *) module MakeBuffered (X : sig val f: Buffer.t val locate_stretches: string option end) = struct include PreliminaryMake(struct type channel = Buffer.t include X let fprintf = Printf.bprintf let output_substring = Buffer.add_substring end) end (* ------------------------------------------------------------------------- *) (* Common instantiations. *) let print_expr f e = let module P = Make (struct let f = f let locate_stretches = None end) in P.expr e let string_of_expr e = let b = Buffer.create 512 in let module P = MakeBuffered (struct let f = b let locate_stretches = None end) in P.expr e; Buffer.contents b menhir-20171222/src/parameters.ml0000664000175000017500000000730513217215730017004 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* TEMPORARY clean up and write an .mli file *) open Syntax open Positions let app p ps = match ps with | [] -> ParameterVar p | _ -> ParameterApp (p, ps) let unapp = function | ParameterVar x -> (x, []) | ParameterApp (p, ps) -> (p, ps) | ParameterAnonymous _ -> (* Anonymous rules are eliminated early on. *) assert false let unvar = function | ParameterVar x -> x | ParameterApp _ | ParameterAnonymous _ -> assert false let rec map f = function | ParameterVar x -> ParameterVar (f x) | ParameterApp (p, ps) -> ParameterApp (f p, List.map (map f) ps) | ParameterAnonymous _ -> (* Anonymous rules are eliminated early on. *) assert false let rec fold f init = function | ParameterVar x -> f init x | ParameterApp (p, ps) -> f (List.fold_left (fold f) init ps) p | ParameterAnonymous _ -> (* Anonymous rules are eliminated early on. *) assert false let identifiers m p = fold (fun accu x -> StringMap.add x.value x.position accu) m p let rec occurs (x : symbol) (p : parameter) = match p with | ParameterVar y -> x = y.value | ParameterApp (y, ps) -> x = y.value || List.exists (occurs x) ps | ParameterAnonymous _ -> assert false let occurs_shallow (x : symbol) (p : parameter) = match p with | ParameterVar y -> x = y.value | ParameterApp (y, _) -> assert (x <> y.value); false | ParameterAnonymous _ -> assert false let occurs_deep (x : symbol) (p : parameter) = match p with | ParameterVar _ -> false | ParameterApp (_, ps) -> List.exists (occurs x) ps | ParameterAnonymous _ -> assert false type t = parameter let rec equal x y = match x, y with | ParameterVar x, ParameterVar y -> x.value = y.value | ParameterApp (p1, p2), ParameterApp (p1', p2') -> p1.value = p1'.value && List.for_all2 equal p2 p2' | _ -> (* Anonymous rules are eliminated early on. *) false let hash = function | ParameterVar x | ParameterApp (x, _) -> Hashtbl.hash (Positions.value x) | ParameterAnonymous _ -> (* Anonymous rules are eliminated early on. *) assert false let position = function | ParameterVar x | ParameterApp (x, _) -> Positions.position x | ParameterAnonymous bs -> Positions.position bs let with_pos p = Positions.with_pos (position p) p let rec print with_spaces = function | ParameterVar x | ParameterApp (x, []) -> x.value | ParameterApp (x, ps) -> let separator = if with_spaces then ", " else "," in Printf.sprintf "%s(%s)" x.value (Misc.separated_list_to_string (print with_spaces) separator ps) | ParameterAnonymous _ -> assert false menhir-20171222/src/invariant.ml0000664000175000017500000006252513217215730016641 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module discovers information about the shape and content of the stack in each of the automaton's states. *) open Grammar module C = Conflict (* artificial dependency; ensures that [Conflict] runs first *) (* ------------------------------------------------------------------------ *) (* Compute a lower bound on the height of the stack at every state. At the same time, compute which symbols are held in this stack prefix. *) (* In order to compute (a lower bound on) the height of the stack at a state [s], we examine the LR(0) items that compose [s]. For each item, if the bullet is at position [pos], then we can be assured that the height of the stack is at least [pos]. Thus, we compute the maximum of [pos] over all items (of which there is at least one). *) (* The set of items that we use is not closed, but this does not matter; the items that would be added by the closure would not add any information regarding the height of the stack, since the bullet is at position 0 in these items. *) (* Instead of computing just the stack height, we compute, in the same manner, which symbols are on the stack at a state [s]. This is an array of symbols whose length is the height of the stack at [s]. By convention, the top of the stack is the end of the array. *) (* We first compute and tabulate this information at the level of the LR(0) automaton. *) let stack_symbols : Lr0.node -> Symbol.t array = let dummy = Array.make 0 (Symbol.T Terminal.sharp) in Misc.tabulate Lr0.n (fun node -> Item.Set.fold (fun item accu -> let _prod, _nt, rhs, pos, _length = Item.def item in if pos > Array.length accu then Array.sub rhs 0 pos else accu ) (Lr0.items node) dummy ) (* Then, it is easy to extend it to the LR(1) automaton. *) let stack_symbols (node : Lr1.node) : Symbol.t array = stack_symbols (Lr0.core (Lr1.state node)) let stack_height (node : Lr1.node) : int = Array.length (stack_symbols node) (* ------------------------------------------------------------------------ *) (* Above, we have computed a prefix of the stack at every state. We have computed the length of this prefix and the symbols that are held in this prefix of the stack. Now, compute which states may be held in this prefix. *) (* In order to compute this information, we perform an analysis of the automaton, via a least fixed fixed point computation. *) (* It is worth noting that it would be possible to use an analysis based on a least fixed point computation to discover at the same time the length of the stack prefix, the symbols that it contains, and the states that it may contain. This alternate approach, which was used until 2012/08/25, would lead us to discovering a richer invariant, that is, potentially longer prefixes. This extra information, however, was useless; computing it was a waste of time. Hence, as of 2012/08/25, the height of the stack prefix and the symbols that it contains are predicted (see above), and the least fixed computation is used only to populate these prefixes of predictable length with state information. *) (* By the way, this least fixed point analysis remains the most costly computation throughout this module. *) (* Vectors of sets of states. *) module StateVector = struct type property = Lr1.NodeSet.t list let empty = [] let rec equal v1 v2 = match v1, v2 with | [], [] -> true | states1 :: v1, states2 :: v2 -> Lr1.NodeSet.equal states1 states2 && equal v1 v2 | _, _ -> (* Because all heights are known ahead of time, we are able to (and careful to) compare only vectors of equal length. *) assert false let rec join v1 v2 = match v1, v2 with | [], [] -> [] | states1 :: v1, states2 :: v2 -> Lr1.NodeSet.union states1 states2 :: join v1 v2 | _, _ -> (* Because all heights are known ahead of time, we are able to (and careful to) compare only vectors of equal length. *) assert false let push v x = x :: v let truncate = MenhirLib.General.take end (* In order to perform the fixed point computation, we must extend our type of vectors with a bottom element. This element will not appear in the least fixed point, provided every state of the automaton is reachable. *) module StateLattice = struct type property = | Bottom | NonBottom of StateVector.property let bottom = Bottom let empty = NonBottom StateVector.empty let equal v1 v2 = match v1, v2 with | Bottom, Bottom -> true | NonBottom v1, NonBottom v2 -> StateVector.equal v1 v2 | _, _ -> false let join v1 v2 = match v1, v2 with | Bottom, v | v, Bottom -> v | NonBottom v1, NonBottom v2 -> NonBottom (StateVector.join v1 v2) let push v x = match v with | Bottom -> Bottom | NonBottom v -> NonBottom (StateVector.push v x) let truncate h v = match v with | Bottom -> Bottom | NonBottom v -> NonBottom (StateVector.truncate h v) let is_maximal _ = false end open StateLattice (* Define the fixed point. *) let stack_states : Lr1.node -> property = let module F = Fix.Make (Maps.PersistentMapsToImperativeMaps(Lr1.NodeMap)) (StateLattice) in F.lfp (fun node (get : Lr1.node -> property) -> (* We use the fact that a state has incoming transitions if and only if it is not a start state. *) match Lr1.incoming_symbol node with | None -> assert (Lr1.predecessors node = []); assert (stack_height node = 0); (* If [node] is a start state, then the stack at [node] may be (in fact, must be) the empty stack. *) empty | Some _symbol -> (* If [node] is not a start state, then include the contribution of every incoming transition. We compute a join over all predecessors. The contribution of one predecessor is the abstract value found at this predecessor, extended with a new cell for this transition, and truncated to the stack height at [node], so as to avoid obtaining a vector that is longer than expected/necessary. *) let height = stack_height node in List.fold_left (fun v predecessor -> join v (truncate height (push (get predecessor) (Lr1.NodeSet.singleton predecessor)) ) ) bottom (Lr1.predecessors node) ) (* If every state is reachable, then the least fixed point must be non-bottom everywhere, so we may view it as a function that produces a vector of sets of states. *) let stack_states (node : Lr1.node) : StateVector.property = match stack_states node with | Bottom -> (* apparently this node is unreachable *) assert false | NonBottom v -> v (* ------------------------------------------------------------------------ *) (* From the above information, deduce, for each production, the states that may appear in the stack when this production is reduced. *) (* We are careful to produce a vector of states whose length is exactly that of the production [prod]. *) let production_states : Production.index -> StateLattice.property = Production.tabulate (fun prod -> let nodes = Lr1.production_where prod in let height = Production.length prod in Lr1.NodeSet.fold (fun node accu -> join accu (truncate height (NonBottom (stack_states node)) ) ) nodes bottom ) (* ------------------------------------------------------------------------ *) (* We now determine which states must be represented, that is, explicitly pushed onto the stack. For simplicity, a state is either always represented or never represented. More fine-grained strategies, where a single state is sometimes pushed onto the stack and sometimes not pushed, depending on which outgoing transition is being taken, are conceivable, but quite tricky, and probably not worth the trouble. (1) If two states are liable to appear within a single stack cell, then one is represented if and only if the other is represented. This ensures that the structure of stacks is known everywhere and that we can propose types for stacks. (2) If a state [s] has an outgoing transition along nonterminal symbol [nt], and if the [goto] table for symbol [nt] has more than one target, then state [s] is represented. (3) If a stack cell contains more than one state and if at least one of these states is able to handle the [error] token, then these states are represented. (4) If the semantic action associated with a production mentions the [$syntaxerror] keyword, then the state that is being reduced to (that is, the state that initiated the recognition of this production) is represented. (Indeed, it will be passed as an argument to [errorcase].) *) (* Data. *) let rep : bool UnionFind.point array = Array.init Lr1.n (fun _ -> UnionFind.fresh false) (* Getter. *) let represented state = rep.(Lr1.number state) (* Setters. *) let represent state = UnionFind.set (represented state) true let represents states = represent (Lr1.NodeSet.choose states) (* Enforce condition (1) above. *) let share (v : StateVector.property) = List.iter (fun states -> let dummy = UnionFind.fresh false in Lr1.NodeSet.iter (fun state -> UnionFind.union dummy (represented state) ) states ) v let () = Lr1.iter (fun node -> share (stack_states node) ); Production.iter (fun prod -> match production_states prod with | Bottom -> () | NonBottom v -> share v ) (* Enforce condition (2) above. *) let () = Nonterminal.iter (fun nt -> let count = Lr1.targets (fun count _ _ -> count + 1 ) 0 (Symbol.N nt) in if count > 1 then Lr1.targets (fun () sources _ -> List.iter represent sources ) () (Symbol.N nt) ) (* Enforce condition (3) above. *) let handler state = try let _ = SymbolMap.find (Symbol.T Terminal.error) (Lr1.transitions state) in true with Not_found -> try let _ = TerminalMap.lookup Terminal.error (Lr1.reductions state) in true with Not_found -> false let handlers states = Lr1.NodeSet.exists handler states let () = Lr1.iter (fun node -> let v = stack_states node in List.iter (fun states -> if Lr1.NodeSet.cardinal states >= 2 && handlers states then represents states ) v ) (* Enforce condition (4) above. *) let () = Production.iterx (fun prod -> if Action.has_syntaxerror (Production.action prod) then match production_states prod with | Bottom -> () | NonBottom v -> let sites = Lr1.production_where prod in let length = Production.length prod in if length = 0 then Lr1.NodeSet.iter represent sites else let states = List.nth v (length - 1) in represents states ) (* Define accessors. *) let represented state = UnionFind.get (represented state) let representeds states = if Lr1.NodeSet.is_empty states then assert false else represented (Lr1.NodeSet.choose states) (* Statistics. *) let () = Error.logC 1 (fun f -> let count = Lr1.fold (fun count node -> if represented node then count + 1 else count ) 0 in Printf.fprintf f "%d out of %d states are represented.\n" count Lr1.n ) (* ------------------------------------------------------------------------ *) (* Accessors for information about the stack. *) (* We describe a stack prefix as a list of cells, where each cell is a pair of a symbol and a set of states. The top of the stack is the head of the list. *) type cell = Symbol.t * Lr1.NodeSet.t type word = cell list (* This auxiliary function converts a stack-as-an-array (top of stack at the right end) to a stack-as-a-list (top of stack at list head). *) let convert a = let n = Array.length a in let rec loop i accu = if i = n then accu else loop (i + 1) (a.(i) :: accu) in loop 0 [] (* [stack s] describes the stack when the automaton is in state [s]. *) let stack node : word = List.combine (convert (stack_symbols node)) (stack_states node) (* [prodstack prod] describes the stack when production [prod] is about to be reduced. *) let prodstack prod : word = match production_states prod with | Bottom -> (* This production is never reduced. *) assert false | NonBottom v -> List.combine (convert (Production.rhs prod)) v (* [gotostack nt] is the structure of the stack when a shift transition over nonterminal [nt] is about to be taken. It consists of just one cell. *) let gotostack : Nonterminal.t -> word = Nonterminal.tabulate (fun nt -> let sources = Lr1.targets (fun accu sources _ -> List.fold_right Lr1.NodeSet.add sources accu ) Lr1.NodeSet.empty (Symbol.N nt) in [ Symbol.N nt, sources ] ) let fold f accu w = List.fold_right (fun (symbol, states) accu -> f accu (representeds states) symbol states ) w accu let fold_top f accu w = match w with | [] -> accu | (symbol, states) :: _ -> f (representeds states) symbol let print (w : word) = let b = Buffer.create 64 in fold (fun () _represented symbol _states -> Buffer.add_string b (Symbol.print symbol); Buffer.add_char b ' ' ) () w; Buffer.contents b (* ------------------------------------------------------------------------ *) (* Explain how the stack should be deconstructed when an error is found. We sometimes have a choice as too how many stack cells should be popped. Indeed, several cells in the known suffix of the stack may physically hold a state. If neither of these states handles errors, then we could jump to either. (Indeed, if we jump to one that's nearer, it will in turn pop further stack cells and jump to one that's farther.) In the interests of code size, we should pop as few stack cells as possible. So, we jump to the topmost represented state in the known suffix. *) type state = | Represented | UnRepresented of Lr1.node type instruction = | Die | DownTo of word * state let rewind node : instruction = let w = stack node in let rec rewind w = match w with | [] -> (* I believe that every stack description either is definite (that is, ends with [TailEmpty]) or contains at least one represented state. Thus, if we find an empty [w], this means that the stack is definitely empty. *) Die | ((_, states) as cell) :: w -> if representeds states then (* Here is a represented state. We will pop this cell and no more. *) DownTo ([ cell ], Represented) else if handlers states then begin (* Here is an unrepresented state that can handle errors. The cell must hold a singleton set of states, so we know which state to jump to, even though it isn't represented. *) assert (Lr1.NodeSet.cardinal states = 1); let state = Lr1.NodeSet.choose states in DownTo ([ cell ], UnRepresented state) end else (* Here is an unrepresented state that does not handle errors. Pop this cell and look further. *) match rewind w with | Die -> Die | DownTo (w, st) -> DownTo (cell :: w, st) in rewind w (* ------------------------------------------------------------------------ *) (* Machinery for the computation of which symbols must keep track of their start or end positions. *) open Keyword type variable = Symbol.t * where (* WhereStart or WhereEnd *) module M : Fix.IMPERATIVE_MAPS with type key = variable = struct type key = variable type 'data t = { mutable startp: 'data SymbolMap.t; mutable endp: 'data SymbolMap.t; } open SymbolMap let create() = { startp = empty; endp = empty } let clear m = m.startp <- empty; m.endp <- empty let add (sym, where) data m = match where with | WhereStart -> m.startp <- add sym data m.startp | WhereEnd -> m.endp <- add sym data m.endp | WhereSymbolStart -> assert false let find (sym, where) m = match where with | WhereStart -> find sym m.startp | WhereEnd -> find sym m.endp | WhereSymbolStart -> assert false let iter f m = iter (fun sym -> f (sym, WhereStart)) m.startp; iter (fun sym -> f (sym, WhereEnd)) m.endp end (* ------------------------------------------------------------------------ *) (* We now determine which positions must be kept track of. For simplicity, we do this on a per-symbol basis. That is, for each symbol, either we never keep track of position information, or we always do. In fact, we do distinguish start and end positions. This leads to computing two sets of symbols -- those that keep track of their start position and those that keep track of their end position. A symbol on the right-hand side of a production must keep track of its (start or end) position if that position is explicitly requested by a semantic action. Furthermore, if the left-hand symbol of a production must keep track of its start (resp. end) position, then the first (resp. last) symbol of its right-hand side (if there is one) must do so as well. That is, unless the right-hand side is empty. *) (* 2015/11/11. When a production [prod] is reduced, the top stack cell may be consulted for its end position. This implies that this cell must exist and must store an end position! Now, when does this happen? 1- This happens if [prod] is an epsilon production and the left-hand symbol of the production, [nt prod], keeps track of its start or end position. 2- This happens if the semantic action explicitly mentions the keyword [$endpos($0)]. Now, if this happens, what should we do? a- If this happens in a state [s] whose incoming symbol is [sym], then [sym] must keep track of its end position. b- If this happens in an initial state, where the stack may be empty, then the sentinel cell at the bottom of the stack must contain an end position. Point (b) doesn't concern us here, but point (a) does. We must implement the constraint (1) \/ (2) -> (a). Point (b) is taken care of in the code back-end, where, for simplicity, we always create a sentinel cell. *) (* I will say that this is a lot more sophisticated than I would like. The code back-end has been known for its efficiency and I am trying to maintain this property -- in particular, I would like to keep track of no positions at all, if the user doesn't use any position keyword. But I am suffering. *) module S = FixSolver.Make(M)(Boolean) let record_ConVar, record_VarVar, solve = S.create() let () = (* We gather the constraints explained above in two loops. The first loop looks at every (non-start) production [prod]. The second loop looks at every (non-initial) state [s]. *) Production.iterx (fun prod -> let nt, rhs = Production.def prod and ids = Production.identifiers prod and action = Production.action prod in let length = Array.length rhs in if length > 0 then begin (* If [nt] keeps track of its start position, then the first symbol in the right-hand side must do so as well. *) record_VarVar (Symbol.N nt, WhereStart) (rhs.(0), WhereStart); (* If [nt] keeps track of its end position, then the last symbol in the right-hand side must do so as well. *) record_VarVar (Symbol.N nt, WhereEnd) (rhs.(length - 1), WhereEnd) end; KeywordSet.iter (function | SyntaxError -> () | Position (Before, _, _) -> (* Doing nothing here because [$endpos($0)] is dealt with in the second loop. *) () | Position (Left, _, _) -> (* [$startpos] and [$endpos] have been expanded away. *) assert false | Position (RightNamed _, WhereSymbolStart, _) -> (* [$symbolstartpos(x)] does not exist. *) assert false | Position (RightNamed id, where, _) -> (* If the semantic action mentions [$startpos($i)], then the [i]-th symbol in the right-hand side must keep track of its start position. Similarly for end positions. *) Array.iteri (fun i id' -> if id = id' then record_ConVar true (rhs.(i), where) ) ids ) (Action.keywords action) ); (* end of loop on productions *) Lr1.iterx (fun s -> (* Let [sym] be the incoming symbol of state [s]. *) let sym = Misc.unSome (Lr1.incoming_symbol s) in (* Condition (1) in the long comment above (2015/11/11). If an epsilon production [prod] can be reduced in state [s], if its left-hand side [nt] keeps track of its start or end position, then [sym] must keep track of its end position. *) TerminalMap.iter (fun _ prods -> let prod = Misc.single prods in let nt, rhs = Production.def prod in let length = Array.length rhs in if length = 0 then begin record_VarVar (Symbol.N nt, WhereStart) (sym, WhereEnd); record_VarVar (Symbol.N nt, WhereEnd) (sym, WhereEnd) end ) (Lr1.reductions s); (* Condition (2) in the long comment above (2015/11/11). If a production can be reduced in state [s] and mentions [$endpos($0)], then [sym] must keep track of its end position. *) if Lr1.has_beforeend s then record_ConVar true (sym, WhereEnd) ) let track : variable -> bool = solve() let startp symbol = track (symbol, WhereStart) let endp symbol = track (symbol, WhereEnd) let for_every_symbol (f : Symbol.t -> unit) : unit = Terminal.iter (fun t -> f (Symbol.T t)); Nonterminal.iter (fun nt -> f (Symbol.N nt)) let sum_over_every_symbol (f : Symbol.t -> bool) : int = let c = ref 0 in for_every_symbol (fun sym -> if f sym then c := !c + 1); !c let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d symbols keep track of their start position.\n\ %d out of %d symbols keep track of their end position.\n" (sum_over_every_symbol startp) (Terminal.n + Nonterminal.n) (sum_over_every_symbol endp) (Terminal.n + Nonterminal.n)) (* ------------------------------------------------------------------------- *) (* Miscellaneous. *) let universal symbol = Lr1.fold (fun universal s -> universal && (if represented s then SymbolMap.mem symbol (Lr1.transitions s) else true) ) true (* ------------------------------------------------------------------------ *) (* Discover which states can peek at an error. These are the states where an error token may be on the stream. These are the states that are targets of a reduce action on [error]. *) (* 2012/08/25 I am optimizing this code, whose original version I found had quadratic complexity. The problem is as follows. We can easily iterate over all states to find which states [s] have a reduce action on error. What we must find out, then, is into which state [t] this reduce action takes us. This is not easy to predict, as it depends on the contents of the stack. The original code used an overapproximation, as follows: if the reduction concerns a production whose head symbol is [nt], then all of the states that have an incoming transition labeled [nt] are potential targets. The new version of the code below relies on the same approximation, but uses two successive loops instead of two nested loops. *) let errorpeekers = (* First compute a set of symbols [nt]... *) let nts : SymbolSet.t = Lr1.fold (fun nts node -> try let prods = TerminalMap.lookup Terminal.error (Lr1.reductions node) in let prod = Misc.single prods in let nt = Production.nt prod in SymbolSet.add (Symbol.N nt) nts with Not_found -> nts ) SymbolSet.empty in (* ... then compute the set of all target states of all transitions labeled by some symbol in the set [nt]. *) SymbolSet.fold (fun nt errorpeekers -> Lr1.targets (fun errorpeekers _ target -> Lr1.NodeSet.add target errorpeekers ) errorpeekers nt ) nts Lr1.NodeSet.empty let errorpeeker node = Lr1.NodeSet.mem node errorpeekers (* ------------------------------------------------------------------------ *) let () = Time.tick "Constructing the invariant" menhir-20171222/src/menhirSdk.mlpack0000664000175000017500000000015213217215730017415 0ustar fpottierfpottier# This is the list of modules that must go into MenhirSdk. Keyword Version Cmly_format Cmly_api Cmly_read menhir-20171222/src/rawPrinter.mli0000664000175000017500000000231013217215730017136 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* A debugging pretty-printer for [IL]. Newlines are used liberally, so as to facilitate diffs. *) module Make (X : sig (* This is the channel that is being written to. *) val f: out_channel end) : sig val expr: IL.expr -> unit end menhir-20171222/src/infer.ml0000664000175000017500000002637513217215730015754 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Syntax open Stretch open UnparameterizedSyntax open IL open CodeBits open TokenType (* ------------------------------------------------------------------------- *) (* Naming conventions. *) (* The type variable associated with a nonterminal symbol. Its name begins with a prefix which ensures that it begins with a lowercase letter and cannot clash with OCaml keywords. *) let ntvar symbol = Printf.sprintf "tv_%s" (Misc.normalize symbol) (* The term variable associated with a nonterminal symbol. Its name begins with a prefix which ensures that it begins with a lowercase letter and cannot clash with OCaml keywords. *) let encode symbol = Printf.sprintf "xv_%s" (Misc.normalize symbol) let decode s = let n = String.length s in assert (n >= 3 && String.sub s 0 3 = "xv_"); String.sub s 3 (n - 3) (* The name of the temporary file. *) let base = Settings.base let mlname = base ^ ".ml" let mliname = base ^ ".mli" (* ------------------------------------------------------------------------- *) (* Code production. *) (* [nttype nt] is the type of the nonterminal [nt], as currently known. *) let nttype grammar nt = try TypTextual (StringMap.find nt grammar.types) with Not_found -> TypVar (ntvar nt) (* [is_standard] determines whether a branch derives from a standard library definition. The method, based on a file name, is somewhat fragile. *) let is_standard branch = List.for_all (fun x -> x = Settings.stdlib_filename) (Action.filenames branch.action) (* [actiondef] turns a branch into a function definition. *) let actiondef grammar symbol branch = (* Construct a list of the semantic action's formal parameters that depend on the production's right-hand side. *) let formals = List.fold_left (fun formals producer -> let symbol = producer_symbol producer and id = producer_identifier producer in let startp, endp, starto, endo = Printf.sprintf "_startpos_%s_" id, Printf.sprintf "_endpos_%s_" id, Printf.sprintf "_startofs_%s_" id, Printf.sprintf "_endofs_%s_" id in let t = try let props = StringMap.find symbol grammar.tokens in (* Symbol is a terminal. *) match props.tk_ocamltype with | None -> tunit | Some ocamltype -> TypTextual ocamltype with Not_found -> (* Symbol is a nonterminal. *) nttype grammar symbol in PAnnot (PVar id, t) :: PAnnot (PVar startp, tposition) :: PAnnot (PVar endp, tposition) :: PAnnot (PVar starto, tint) :: PAnnot (PVar endo, tint) :: formals ) [] branch.producers in (* Extend the list with parameters that do not depend on the right-hand side. *) let formals = PAnnot (PVar "_eRR", texn) :: PAnnot (PVar "_startpos", tposition) :: PAnnot (PVar "_endpos", tposition) :: PAnnot (PVar "_endpos__0_", tposition) :: PAnnot (PVar "_symbolstartpos", tposition) :: PAnnot (PVar "_startofs", tint) :: PAnnot (PVar "_endofs", tint) :: PAnnot (PVar "_endofs__0_", tint) :: PAnnot (PVar "_symbolstartofs", tint) :: formals in (* Construct a function definition out of the above bindings and the semantic action. *) let body = EAnnot ( Action.to_il_expr branch.action, type2scheme (nttype grammar symbol) ) in match formals with | [] -> body | _ -> EFun (formals, body) (* [program] turns an entire grammar into a test program. *) let program grammar = (* Turn the grammar into a bunch of function definitions. Grammar productions that derive from the standard library are reflected first, so that type errors are not reported in them. *) let bindings1, bindings2 = StringMap.fold (fun symbol rule (bindings1, bindings2) -> List.fold_left (fun (bindings1, bindings2) branch -> if is_standard branch then (PWildcard, actiondef grammar symbol branch) :: bindings1, bindings2 else bindings1, (PWildcard, actiondef grammar symbol branch) :: bindings2 ) (bindings1, bindings2) rule.branches ) grammar.rules ([], []) in (* Create entry points whose types are the unknowns that we are looking for. *) let ps, ts = StringMap.fold (fun symbol _ (ps, ts) -> PVar (encode (Misc.normalize symbol)) :: ps, nttype grammar symbol :: ts ) grammar.rules ([], []) in let def = { valpublic = true; valpat = PTuple ps; valval = ELet (bindings1 @ bindings2, EAnnot (bottom, type2scheme (TypTuple ts))) } in (* Insert markers to delimit the part of the file that we are interested in. These markers are recognized by [Lexmli]. This helps skip the values, types, exceptions, etc. that might be defined by the prologue or postlogue. *) let begindef = { valpublic = true; valpat = PVar "menhir_begin_marker"; valval = EIntConst 0 } and enddef = { valpublic = true; valpat = PVar "menhir_end_marker"; valval = EIntConst 0 } in (* Issue the test program. We include the definition of the type of tokens, because, in principle, the semantic actions may refer to it or to its data constructors. *) [ SIFunctor (grammar.parameters, interface_to_structure (tokentypedef grammar) @ SIStretch grammar.preludes :: SIValDefs (false, [ begindef; def; enddef ]) :: SIStretch grammar.postludes :: [])] (* ------------------------------------------------------------------------- *) (* Writing the program associated with a grammar to a file. *) let write grammar () = let ml = open_out mlname in let module P = Printer.Make (struct let f = ml let locate_stretches = Some mlname end) in P.program (program grammar); close_out ml (* ------------------------------------------------------------------------- *) (* Running ocamldep on the program. *) type entry = string (* basename *) * string (* filename *) type line = entry (* target *) * entry list (* dependencies *) let depend grammar = (* Create an [.ml] file and an [.mli] file, then invoke ocamldep to compute dependencies for us. *) (* If an old [.ml] or [.mli] file exists, we are careful to preserve it. We temporarily move it out of the way and restore it when we are done. There is no reason why dependency analysis should destroy existing files. *) let ocamldep_command = Printf.sprintf "%s %s %s" Settings.ocamldep (Filename.quote mlname) (Filename.quote mliname) in let output : string = Option.project ( IO.moving_away mlname (fun () -> IO.moving_away mliname (fun () -> IO.with_file mlname (write grammar) (fun () -> IO.with_file mliname (Interface.write grammar) (fun () -> IO.invoke ocamldep_command ))))) in (* Echo ocamldep's output. *) print_string output; (* If [--raw-depend] was specified on the command line, stop here. This option is used by omake and by ocamlbuild, which performs their own postprocessing of [ocamldep]'s output. For normal [make] users, who use [--depend], some postprocessing is required, which is performed below. *) begin match Settings.depend with | Settings.OMNone -> assert false (* we wouldn't be here in the first place *) | Settings.OMRaw -> () | Settings.OMPostprocess -> (* Make sense out of ocamldep's output. *) let lexbuf = Lexing.from_string output in let lines : line list = try Lexdep.main lexbuf with Lexdep.Error msg -> (* Echo the error message, followed with ocamldep's output. *) Error.error [] "%s" (msg ^ output) in (* Look for the line that concerns the [.cmo] target, and echo a modified version of this line, where the [.cmo] target is replaced with [.ml] and [.mli] targets, and where the dependency over the [.cmi] file is dropped. In doing so, we assume that the user's [Makefile] supports bytecode compilation, so that it makes sense to request [bar.cmo] to be built, as opposed to [bar.cmx]. This is not optimal, but will do. [camldep] exhibits the same behavior. *) (* TEMPORARY allow ocamldep to be called with flag -native. *) List.iter (fun ((_, target_filename), dependencies) -> if Filename.check_suffix target_filename ".cmo" then let dependencies = List.filter (fun (basename, _) -> basename <> base ) dependencies in if List.length dependencies > 0 then begin Printf.printf "%s.ml %s.mli:" base base; List.iter (fun (_basename, filename) -> Printf.printf " %s" filename ) dependencies; Printf.printf "\n%!" end ) lines end; (* Stop. *) exit 0 (* ------------------------------------------------------------------------- *) (* Inferring types for a grammar's nonterminals. *) let infer grammar = (* Invoke ocamlc to do type inference for us. *) let ocamlc_command = Printf.sprintf "%s -c -i %s" Settings.ocamlc (Filename.quote mlname) in let output = write grammar (); match IO.invoke ocamlc_command with | Some result -> Sys.remove mlname; result | None -> (* 2015/10/05: intentionally do not remove the [.ml] file if [ocamlc] fails. (Or if an exception is thrown.) We cannot understand why [ocaml] complains if we can't see the [.ml] file. *) exit 1 in (* Make sense out of ocamlc's output. *) let env : (string * int * int) list = Lexmli.main (Lexing.from_string output) in let env : (string * ocamltype) list = List.map (fun (id, openingofs, closingofs) -> decode id, Inferred (String.sub output openingofs (closingofs - openingofs)) ) env in (* Augment the grammar with new %type declarations. *) let types = StringMap.fold (fun symbol _ types -> let ocamltype = try List.assoc (Misc.normalize symbol) env with Not_found -> assert false in if StringMap.mem symbol grammar.types then (* If there was a declared type, keep it. *) types else (* Otherwise, insert the inferred type. *) StringMap.add symbol ocamltype types ) grammar.rules grammar.types in { grammar with types = types } menhir-20171222/src/keyword.ml0000664000175000017500000000572713217215730016333 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module provides some type and function definitions that help deal with the keywords that we recognize within semantic actions. *) (* ------------------------------------------------------------------------- *) (* Types. *) (* The user can request position information either at type [int] (a simple offset) or at type [Lexing.position]. *) type flavor = | FlavorOffset | FlavorPosition (* The user can request position information about the $start or $end of a symbol. Also, $symbolstart requests the computation of the start position of the first nonempty element in a production. *) type where = | WhereSymbolStart | WhereStart | WhereEnd (* The user can request position information about a production's left-hand side or about one of the symbols in its right-hand side, which he can refer to by position or by name. *) type subject = | Before | Left | RightNamed of string (* Keywords inside semantic actions. They allow access to semantic values or to position information. *) type keyword = | Position of subject * where * flavor | SyntaxError (* ------------------------------------------------------------------------- *) (* These auxiliary functions help map a [Position] keyword to the name of the variable that the keyword is replaced with. *) let where = function | WhereSymbolStart -> "symbolstart" | WhereStart -> "start" | WhereEnd -> "end" let subject = function | Before -> "__0_" | Left -> "" | RightNamed id -> Printf.sprintf "_%s_" id let flavor = function | FlavorPosition -> "pos" | FlavorOffset -> "ofs" let posvar s w f = Printf.sprintf "_%s%s%s" (where w) (flavor f) (subject s) (* ------------------------------------------------------------------------- *) (* Sets of keywords. *) module KeywordSet = struct include Set.Make (struct type t = keyword let compare = compare end) let map f keywords = fold (fun keyword accu -> add (f keyword) accu ) keywords empty end menhir-20171222/src/standard.mly0000664000175000017500000001457113217215730016635 0ustar fpottierfpottier/******************************************************************************/ /* */ /* Menhir */ /* */ /* François Pottier, Inria Paris */ /* Yann Régis-Gianas, PPS, Université Paris Diderot */ /* */ /* Copyright Inria. All rights reserved. This file is distributed under the */ /* terms of the GNU Library General Public License version 2, with a */ /* special exception on linking, as described in the file LICENSE. */ /* */ /******************************************************************************/ (* This is menhir's standard library. It offers a number of parameterized nonterminal definitions, such as options and lists, that should be useful in a number of circumstances. *) %% (* ------------------------------------------------------------------------- *) (* The identity. *) (* [anonymous(X)] is the same as [X]. *) (* This allows placing an anonymous sub-rule in the middle of a rule, as in: foo anonymous(baz { action1 }) bar { action2 } Because anonymous is marked %inline, everything is expanded away. So, this is equivalent to: foo baz bar { action1; action2 } Note that [action1] moves to the end of the rule. The anonymous sub-rule can even have several branches, as in: foo anonymous(baz { action1a } | quux { action1b }) bar { action2 } This is expanded to: foo baz bar { action1a; action2 } | foo quux bar { action1b; action2 } *) %public %inline anonymous(X): x = X { x } (* [embedded(X)] is the same as [X]. *) (* This allows placing an anonymous sub-rule in the middle of a rule, as in: foo embedded(baz { action1 }) bar { action2 } Because [embedded] is not marked %inline, this is equivalent to: foo xxx bar { action2 } where the fresh non-terminal symbol [xxx] is separately defined by: xxx: baz { action1 } In particular, if there is no [baz], what we get is a semantic action embedded in the middle of a rule. For instance, foo embedded({ action1 }) bar { action2 } is equivalent to: foo xxx bar { action2 } where [xxx] is separately defined by the rule: xxx: { action1 } *) %public embedded(X): x = X { x } (* ------------------------------------------------------------------------- *) (* Options. *) (* [option(X)] recognizes either nothing or [X]. It produces a value of type ['a option] if [X] produces a value of type ['a]. *) %public option(X): /* nothing */ { None } | x = X { Some x } (* [ioption(X)] is identical to [option(X)], except its definition is inlined. This has the effect of duplicating the production that refers to it, possibly eliminating an LR(1) conflict. *) %public %inline ioption(X): /* nothing */ { None } | x = X { Some x } (* [boption(X)] recognizes either nothing or [X]. It produces a value of type [bool]. *) %public boption(X): /* nothing */ { false } | X { true } (* [loption(X)] recognizes either nothing or [X]. It produces a value of type ['a list] if [X] produces a value of type ['a list]. *) %public loption(X): /* nothing */ { [] } | x = X { x } (* ------------------------------------------------------------------------- *) (* Sequences. *) (* [pair(X, Y)] recognizes the sequence [X Y]. It produces a value of type ['a * 'b] if [X] and [Y] produce values of type ['a] and ['b], respectively. *) %public %inline pair(X, Y): x = X; y = Y { (x, y) } (* [separated_pair(X, sep, Y)] recognizes the sequence [X sep Y]. It produces a value of type ['a * 'b] if [X] and [Y] produce values of type ['a] and ['b], respectively. *) %public %inline separated_pair(X, sep, Y): x = X; sep; y = Y { (x, y) } (* [preceded(opening, X)] recognizes the sequence [opening X]. It passes on the value produced by [X], so that it produces a value of type ['a] if [X] produces a value of type ['a]. *) %public %inline preceded(opening, X): opening; x = X { x } (* [terminated(X, closing)] recognizes the sequence [X closing]. It passes on the value produced by [X], so that it produces a value of type ['a] if [X] produces a value of type ['a]. *) %public %inline terminated(X, closing): x = X; closing { x } (* [delimited(opening, X, closing)] recognizes the sequence [opening X closing]. It passes on the value produced by [X], so that it produces a value of type ['a] if [X] produces a value of type ['a]. *) %public %inline delimited(opening, X, closing): opening; x = X; closing { x } (* ------------------------------------------------------------------------- *) (* Lists. *) (* [list(X)] recognizes a possibly empty list of [X]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public list(X): /* nothing */ { [] } | x = X; xs = list(X) { x :: xs } (* [nonempty_list(X)] recognizes a nonempty list of [X]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public nonempty_list(X): x = X { [ x ] } | x = X; xs = nonempty_list(X) { x :: xs } (* [separated_list(separator, X)] recognizes a possibly empty list of [X]'s, separated with [separator]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public %inline separated_list(separator, X): xs = loption(separated_nonempty_list(separator, X)) { xs } (* [separated_nonempty_list(separator, X)] recognizes a nonempty list of [X]'s, separated with [separator]'s. It produces a value of type ['a list] if [X] produces a value of type ['a]. The front element of the list is the first element that was parsed. *) %public separated_nonempty_list(separator, X): x = X { [ x ] } | x = X; separator; xs = separated_nonempty_list(separator, X) { x :: xs } %% menhir-20171222/src/nonterminalType.mli0000664000175000017500000000607413217215730020204 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module deals with the definition of the type that describes the nonterminal symbols. *) (* This is the conventional name of the [nonterminal] GADT. This is an indexed type (i.e., it has one type parameter). Its data constructors carry zero value arguments. *) val tcnonterminalgadt: string val tnonterminalgadt: IL.typ -> IL.typ (* [tnonterminalgadtdata nt] is the conventional name of the data constructor associated with the non-terminal symbol [nt]. *) val tnonterminalgadtdata: string -> string (* This is the definition of the [nonterminal] GADT, for use by the code generators. This definition can be constructed only if the type of every nonterminal symbol is known, either because the user has provided this information, or because [--infer] has been set and inference has been performed already. This definition is produced only in [--inspection] mode. *) val nonterminalgadtdef: UnparameterizedSyntax.grammar -> IL.interface (* When in [--(raw-)depend] mode, we are asked to produce a mock [.mli] file before [--infer] has run, which means that we are usually not able to construct the definition of the [nonterminal] GADT. This implies that the mock [.mli] file is a subset of the final [.mli] file. I believe that, when working with [ocamlbuild], this is not a problem. In fact, the mock [.mli] file could just as well be empty or absent, and things would still work: in principle, it is enough for us to publish which files we need in order to be able to type-check the real [.ml] file used by [--infer]. However, when working with [make], which is unable to mix the production of targets and the computation of dependencies, we additionally need to predict which files one will need in order to compile the real [.mli] and [.ml] files. Here, the fact that the mock [.mli] file is incomplete could in theory be a problem, leading to incomplete dependencies. The problem does not lie in the line [parser.ml parser.mli: ...] that we add; it lies in the lines produced by [ocamldep] itself, where the line [parser.cmi: ...] is missing some dependencies. *) menhir-20171222/src/tarjan.ml0000664000175000017500000001506213217215730016117 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module provides an implementation of Tarjan's algorithm for finding the strongly connected components of a graph. The algorithm runs when the functor is applied. Its complexity is $O(V+E)$, where $V$ is the number of vertices in the graph $G$, and $E$ is the number of edges. *) module Run (G : sig type node (* We assume each node has a unique index. Indices must range from $0$ to $n-1$, where $n$ is the number of nodes in the graph. *) val n: int val index: node -> int (* Iterating over a node's immediate successors. *) val successors: (node -> unit) -> node -> unit (* Iterating over all nodes. *) val iter: (node -> unit) -> unit end) = struct (* Define the internal data structure associated with each node. *) type data = { (* Each node carries a flag which tells whether it appears within the SCC stack (which is defined below). *) mutable stacked: bool; (* Each node carries a number. Numbers represent the order in which nodes were discovered. *) mutable number: int; (* Each node [x] records the lowest number associated to a node already detected within [x]'s SCC. *) mutable low: int; (* Each node carries a pointer to a representative element of its SCC. This field is used by the algorithm to store its results. *) mutable representative: G.node; (* Each representative node carries a list of the nodes in its SCC. This field is used by the algorithm to store its results. *) mutable scc: G.node list } (* Define a mapping from external nodes to internal ones. Here, we simply use each node's index as an entry into a global array. *) let table = (* Create the array. We initially fill it with [None], of type [data option], because we have no meaningful initial value of type [data] at hand. *) let table = Array.make G.n None in (* Initialize the array. *) G.iter (fun x -> table.(G.index x) <- Some { stacked = false; number = 0; low = 0; representative = x; scc = [] } ); (* Define a function which gives easy access to the array. It maps each node to its associated piece of internal data. *) function x -> match table.(G.index x) with | Some dx -> dx | None -> assert false (* Indices do not cover the range $0\ldots n$, as expected. *) (* Create an empty stack, used to record all nodes which belong to the current SCC. *) let scc_stack = Stack.create() (* Initialize a function which allocates numbers for (internal) nodes. A new number is assigned to each node the first time it is visited. Numbers returned by this function start at 1 and increase. Initially, all nodes have number 0, so they are considered unvisited. *) let mark = let counter = ref 0 in fun dx -> incr counter; dx.number <- !counter; dx.low <- !counter (* This reference will hold a list of all representative nodes. *) let representatives = ref [] (* Look at all nodes of the graph, one after the other. Any unvisited nodes become roots of the search forest. *) let () = G.iter (fun root -> let droot = table root in if droot.number = 0 then begin (* This node hasn't been visited yet. Start a depth-first walk from it. *) mark droot; droot.stacked <- true; Stack.push droot scc_stack; let rec walk x = let dx = table x in G.successors (fun y -> let dy = table y in if dy.number = 0 then begin (* $y$ hasn't been visited yet, so $(x,y)$ is a regular edge, part of the search forest. *) mark dy; dy.stacked <- true; Stack.push dy scc_stack; (* Continue walking, depth-first. *) walk y; if dy.low < dx.low then dx.low <- dy.low end else if (dy.low < dx.low) && dy.stacked then begin (* The first condition above indicates that $y$ has been visited before $x$, so $(x, y)$ is a backwards or transverse edge. The second condition indicates that $y$ is inside the same SCC as $x$; indeed, if it belongs to another SCC, then the latter has already been identified and moved out of [scc_stack]. *) if dy.number < dx.low then dx.low <- dy.number end ) x; (* We are done visiting $x$'s neighbors. *) if dx.low = dx.number then begin (* $x$ is the entry point of a SCC. The whole SCC is now available; move it out of the stack. We pop elements out of the SCC stack until $x$ itself is found. *) let rec loop () = let element = Stack.pop scc_stack in element.stacked <- false; dx.scc <- element.representative :: dx.scc; element.representative <- x; if element != dx then loop() in loop(); representatives := x :: !representatives end in walk root end ) (* There only remains to make our results accessible to the outside. *) let representative x = (table x).representative let scc x = (table x).scc let iter action = List.iter (fun x -> let data = table x in assert (data.representative == x); (* a sanity check *) assert (data.scc <> []); (* a sanity check *) action x data.scc ) !representatives end menhir-20171222/src/cmly_api.ml0000664000175000017500000001155413217215730016437 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The following signatures describe the API offered by the functor [Cfmly_read.Read]. This functor reads in a .cmly file and gives access to the description of the grammar and automaton contained in this file. *) (* This API is currently entirely self-contained, except for a reference to the module [Keyword], which is also part of [MenhirSdk]. *) (* The module type [INDEXED] describes a type [t] whose elements are in a bijection with an integer interval of the form [0..count). *) module type INDEXED = sig type t val count : int val of_int : int -> t val to_int : t -> int val iter : (t -> unit) -> unit val fold : (t -> 'a -> 'a) -> 'a -> 'a val tabulate : (t -> 'a) -> t -> 'a end (* The module type [GRAMMAR] describes the grammar and automaton. *) module type GRAMMAR = sig type terminal = private int type nonterminal = private int type production = private int type lr0 = private int type lr1 = private int type item = production * int type ocamltype = string type ocamlexpr = string module Range : sig type t val startp: t -> Lexing.position val endp: t -> Lexing.position end module Attribute : sig type t val label : t -> string val has_label : string -> t -> bool val payload : t -> string val position : t -> Range.t end module Grammar : sig val basename : string val preludes : string list val postludes : string list val parameters : string list val entry_points : (nonterminal * production * lr1) list val attributes : Attribute.t list end module Terminal : sig include INDEXED with type t = terminal val name : t -> string val kind : t -> [`REGULAR | `ERROR | `EOF | `PSEUDO] val typ : t -> ocamltype option val attributes : t -> Attribute.t list end module Nonterminal : sig include INDEXED with type t = nonterminal val name : t -> string val mangled_name : t -> string val kind : t -> [`REGULAR | `START] val typ : t -> ocamltype option val positions : t -> Range.t list val nullable : t -> bool val first : t -> terminal list val attributes : t -> Attribute.t list end type symbol = | T of terminal | N of nonterminal val symbol_name : ?mangled:bool -> symbol -> string type identifier = string module Action : sig type t val expr : t -> ocamlexpr val keywords : t -> Keyword.keyword list end module Production : sig include INDEXED with type t = production val kind : t -> [`REGULAR | `START] val lhs : t -> nonterminal val rhs : t -> (symbol * identifier * Attribute.t list) array val positions : t -> Range.t list val action : t -> Action.t option val attributes : t -> Attribute.t list end module Lr0 : sig include INDEXED with type t = lr0 val incoming : t -> symbol option val items : t -> item list end module Lr1 : sig include INDEXED with type t = lr1 val lr0 : t -> lr0 val transitions : t -> (symbol * t) list val reductions : t -> (terminal * production list) list end module Print : sig open Format val terminal : formatter -> terminal -> unit val nonterminal : formatter -> nonterminal -> unit val symbol : formatter -> symbol -> unit val mangled_nonterminal : formatter -> nonterminal -> unit val mangled_symbol : formatter -> symbol -> unit val production : formatter -> production -> unit val item : formatter -> item -> unit val itemset : formatter -> item list -> unit val annot_item : string list -> formatter -> item -> unit val annot_itemset : string list list -> formatter -> item list -> unit end end menhir-20171222/src/lr1partial.ml0000664000175000017500000001740213217215730016713 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar exception Oops module Run (X : sig (* A restricted set of tokens of interest. *) val tokens: TerminalSet.t (* A state of the (merged) LR(1) automaton that we're trying to simulate. *) val goal: Lr1.node end) = struct (* First, let's restrict our interest to the nodes of the merged LR(1) automaton that can reach the goal node. Some experiments show that this can involve one tenth to one half of all nodes. This optimization seems minor, but is easy to implement. *) let relevant = Lr1.reverse_dfs X.goal (* Second, all of the states that we shall consider are restricted to the set of tokens of interest. This is an important idea: by abstracting away some information, we make the construction much faster. *) let restrict = Lr0.restrict X.tokens (* Constructing the automaton. The automaton is represented as a graph. States are never merged -- this is a canonical LR(1) construction! As we go, we record the correspondence between nodes in this automaton and nodes in the merged LR(1) automaton. This allows us to tell when we have reached the desired place. This also allows us not to follow transitions that have already been eliminated, in the merged automaton, via resolution of shift/reduce conflicts. Whenever we follow a transition in the canonical LR(1) automaton, we check that the corresponding transition is legal in the merged LR(1) automaton. The automaton is explored breadth-first and shortest paths from every node to one of the start nodes are recorded. *) type node = { state: Lr0.lr1state; ancestor: (Symbol.t * node) option; shadow: Lr1.node; } (* A queue of pending nodes, whose successors should be explored. *) let queue : node Queue.t = Queue.create() (* Mapping of LR(0) state numbers to lists of nodes. *) let map : node list array = Array.make Lr0.n [] (* Exploring a state. This creates a new node, if necessary, and enqueues it for further exploration. *) exception Goal of node * Terminal.t let explore ancestor shadow (state : Lr0.lr1state) : unit = (* Find all existing nodes that share the same LR(0) core. *) let k = Lr0.core state in assert (k < Lr0.n); let similar = map.(k) in (* Check whether one of these nodes coincides with the candidate new node. If so, stop. This check requires comparing not only the states of the partial, canonical automaton, but also their shadows in the full, merged automaton. This is because a single state of the canonical automaton may be reached along several different paths, leading to distinct shadows in the merged automaton, and we must explore all of these paths in order to ensure that we eventually find a goal node. *) if not (List.exists (fun node -> Lr0.equal state node.state && shadow == node.shadow ) similar) then begin (* Otherwise, create a new node. *) let node = { state = state; ancestor = ancestor; shadow = shadow; } in map.(k) <- node :: similar; Queue.add node queue; (* Check whether this is a goal node. A node [N] is a goal node if (i) [N] has a conflict involving one of the tokens of interest and (ii) [N] corresponds to the goal node, that is, the path that leads to [N] in the canonical LR(1) automaton leads to the goal node in the merged LR(1) automaton. Note that these conditions do not uniquely define [N]. *) if shadow == X.goal then let can_reduce = ref TerminalSet.empty in let reductions1 : Production.index list TerminalMap.t = Lr1.reductions shadow in List.iter (fun (toks, prod) -> TerminalSet.iter (fun tok -> (* We are looking at a [(tok, prod)] pair -- a reduction in the canonical automaton state. *) (* Check that this reduction, which exists in the canonical automaton state, also exists in the merged automaton -- that is, it wasn't suppressed by conflict resolution. *) if List.mem prod (TerminalMap.lookup tok reductions1) then try let (_ : Lr1.node) = SymbolMap.find (Symbol.T tok) (Lr1.transitions shadow) in (* Shift/reduce conflict. *) raise (Goal (node, tok)) with Not_found -> let toks = !can_reduce in (* We rely on the property that [TerminalSet.add tok toks] preserves physical equality when [tok] is a member of [toks]. *) let toks' = TerminalSet.add tok toks in if toks == toks' then (* Reduce/reduce conflict. *) raise (Goal (node, tok)) else (* No conflict so far. *) can_reduce := toks' ) toks ) (Lr0.reductions state) end (* Populate the queue with the start nodes. Until we find a goal node, take a node out the queue, construct the nodes that correspond to its successors, and enqueue them. *) let goal, token = try ProductionMap.iter (fun (prod : Production.index) (k : Lr0.node) -> let shadow = try ProductionMap.find prod Lr1.entry with Not_found -> assert false in if relevant shadow then explore None shadow (restrict (Lr0.start k)) ) Lr0.entry; Misc.qiter (fun node -> SymbolMap.iter (fun symbol state -> try let shadow = SymbolMap.find symbol (Lr1.transitions node.shadow) in if relevant shadow then explore (Some (symbol, node)) shadow (restrict state) with Not_found -> (* No shadow. This can happen if a shift/reduce conflict was resolved in favor in reduce. Ignore that transition. *) () ) (Lr0.transitions node.state) ) queue; (* We didn't find a goal node. This shouldn't happen! If the goal node in the merged LR(1) automaton has a conflict, then there should exist a node with a conflict in the canonical automaton as well. Otherwise, Pager's construction is incorrect. *) raise Oops with Goal (node, tok) -> node, tok (* Query the goal node that was found about the shortest path from it to one of the entry nodes. *) let source, path = let rec follow path node = match node.ancestor with | None -> Lr1.start2item node.shadow, Array.of_list path | Some (symbol, node) -> follow (symbol :: path) node in follow [] goal let goal = Lr0.export goal.state end menhir-20171222/src/rawPrinter.ml0000664000175000017500000001340713217215730016776 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* A debugging pretty-printer for [IL]. Newlines are used liberally, so as to facilitate diffs. *) open IL open Printf module Make (X : sig (* This is the channel that is being written to. *) val f: out_channel end) = struct (* ------------------------------------------------------------------------- *) (* XML-style trees. *) type tree = | Node of string * tree list let node label ts = Node (label, ts) (* ------------------------------------------------------------------------- *) (* Dealing with newlines and indentation. *) let maxindent = 120 let whitespace = String.make maxindent ' ' let indentation = ref 0 let line = ref 1 (* [rawnl] is, in principle, the only place where writing a newline character to the output channel is permitted. This ensures that the line counter remains correct. But see also [stretch] and [typ0]. *) let rawnl f = incr line; output_char f '\n' let nl f = rawnl f; output f whitespace 0 !indentation let indent ofs producer f x = let old_indentation = !indentation in let new_indentation = old_indentation + ofs in if new_indentation <= maxindent then indentation := new_indentation; nl f; producer f x; indentation := old_indentation (* ------------------------------------------------------------------------- *) (* Tree printers. *) let rec print_tree f = function | Node (label, []) -> output_char f '<'; output_string f label; output_char f '/'; output_char f '>'; nl f | Node (label, ts) -> output_char f '<'; output_string f label; output_char f '>'; indent 2 print_trees f ts; output_char f '<'; output_char f '/'; output_string f label; output_char f '>'; nl f and print_trees f = function | [] -> () | t :: ts -> print_tree f t; print_trees f ts (* ------------------------------------------------------------------------- *) (* Expression-to-tree converter. *) let rec expr e = match e with | EComment (c, e) -> node "comment" [ string c; expr e ] | EPatComment (s, p, e) -> node "patcomment" [ string s; pat p; expr e ] | ELet (pes, e2) -> node "let" ( patexprs pes @ [ expr e2 ]) | ERecordWrite (e1, field, e2) -> node "recordwrite" [ expr e1; string field; expr e2 ] | EMatch (e, brs) -> node "match" ( expr e :: branches brs ) | ETry (e, brs) -> node "try" ( expr e :: branches brs ) | EIfThen (e1, e2) -> node "ifthen" [ expr e1; expr e2 ] | EIfThenElse (e0, e1, e2) -> node "ifthenelse" [ expr e0; expr e1; expr e2 ] | EFun (ps, e) -> node "fun" ( pats ps @ [ expr e ]) | EApp (e, args) -> node "app" ( expr e :: exprs args ) | ERaise e -> node "raise" [ expr e ] | EMagic e -> node "magic" [ expr e ] | ERepr e -> node "repr" [ expr e ] | EData (d, args) -> node "data" ( string d :: exprs args ) | EVar v -> node "var" [ string v ] | ETextual action -> node "text" [ stretch action ] | EUnit -> node "unit" [] | EIntConst k -> node "int" [ int k ] | EStringConst s -> node "string" [ string s ] | ETuple es -> node "tuple" ( exprs es ) | EAnnot (e, s) -> node "annot" [ expr e; scheme s ] | ERecordAccess (e, field) -> node "recordaccess" [ expr e; string field ] | ERecord fs -> node "record" (fields fs) | EArray fs -> node "array" (exprs fs) | EArrayAccess (e1, e2) -> node "arrayaccess" [ expr e1; expr e2 ] and exprs es = List.map expr es and stretch stretch = string stretch.Stretch.stretch_content and branches brs = List.map branch brs and branch br = node "branch" [ pat br.branchpat; expr br.branchbody ] and fields fs = List.map field fs and field (label, e) = node "field" [ string label; expr e ] and pats ps = List.map pat ps and pat = function | PUnit -> node "punit" [] | PWildcard -> node "pwildcard" [] | PVar x -> node "pvar" [ string x ] | PTuple ps -> node "ptuple" (pats ps) | PAnnot (p, t) -> node "pannot" [ pat p; typ t ] | PData (d, args) -> node "pdata" (string d :: pats args) | PRecord fps -> node "precord" (fpats fps) | POr ps -> node "por" (pats ps) and fpats fps = List.map fpat fps and fpat (_, p) = pat p and patexprs pes = List.map patexpr pes and patexpr (p, e) = node "patexpr" [ pat p; expr e ] and string s = node s [] and int k = node (string_of_int k) [] and bool b = node (if b then "true" else "false") [] and scheme s = string "omitted" (* TEMPORARY to be completed, someday *) and typ t = string "omitted" (* TEMPORARY to be completed, someday *) (* ------------------------------------------------------------------------- *) (* Convert to a tree, then print the tree. *) let expr e = print_tree X.f (expr e) end menhir-20171222/src/inliner.ml0000664000175000017500000002300213217215730016271 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open IL open CodeBits (* In the following, we only inline global functions. In order to avoid unintended capture, as we traverse terms, we keep track of local identifiers that hide global ones. The following little class helps do that. (The pathological case where a local binding hides a global one probably does not arise very often. Fortunately, checking against it in this way is quite cheap, and lets me sleep safely.) *) class locals table = object method pvar (locals : StringSet.t) (id : string) = if Hashtbl.mem table id then StringSet.add id locals else locals end (* Here is the inliner. *) let inline_valdefs (defs : valdef list) : valdef list = (* Create a table of all global definitions. *) let before, table = Traverse.tabulate_defs defs in (* Prepare to count how many times each function is used, including inside its own definition. The public functions serve as starting points for this discovery phase. *) let queue : valdef Queue.t = Queue.create() and usage : int StringMap.t ref = ref StringMap.empty in (* [visit] is called at every identifier occurrence. *) let visit locals id = if StringSet.mem id locals then (* This is a local identifier. Do nothing. *) () else try let _, def = Hashtbl.find table id in (* This is a globally defined identifier. Increment its usage count. If it was never visited, enqueue its definition for exploration. *) let n = try StringMap.find id !usage with Not_found -> Queue.add def queue; 0 in usage := StringMap.add id (n + 1) !usage with Not_found -> (* This identifier is not global. It is either local or a reference to some external library, e.g. ocaml's standard library. *) () in (* Look for occurrences of identifiers inside expressions. *) let o = object inherit [ StringSet.t, unit ] Traverse.fold inherit locals table method! evar locals () id = visit locals id end in (* Initialize the queue with all public definitions, and work from there. We assume that the left-hand side of every definition is a variable. *) List.iter (fun { valpublic = public; valpat = p } -> if public then visit StringSet.empty (pat2var p) ) defs; Misc.qfold (o#valdef StringSet.empty) () queue; let usage = !usage in (* Now, inline every function that is called at most once. At the same time, every function that is never called is dropped. The public functions again serve as starting points for the traversal. *) let queue : valdef Queue.t = Queue.create() and emitted = ref StringSet.empty in let enqueue def = let id = pat2var def.valpat in if not (StringSet.mem id !emitted) then begin emitted := StringSet.add id !emitted; Queue.add def queue end in (* A simple application is an application of a variable to a number of variables, constants, or record accesses out of variables. *) let rec is_simple_arg = function | EVar _ | EData (_, []) | ERecordAccess (EVar _, _) -> true | EMagic e -> is_simple_arg e | _ -> false in let is_simple_app = function | EApp (EVar _, actuals) -> List.for_all is_simple_arg actuals | _ -> false in (* Taking a fresh instance of a type scheme. Ugly. *) let instance = let count = ref 0 in let fresh tv = incr count; tv, Printf.sprintf "freshtv%d" !count in fun scheme -> let mapping = List.map fresh scheme.quantifiers in let rec sub typ = match typ with | TypTextual _ -> typ | TypVar v -> begin try TypVar (List.assoc v mapping) with Not_found -> typ end | TypApp (f, typs) -> TypApp (f, List.map sub typs) | TypTuple typs -> TypTuple (List.map sub typs) | TypArrow (typ1, typ2) -> TypArrow (sub typ1, sub typ2) in sub scheme.body in (* Destructuring a type annotation. *) let rec annotate formals body typ = match formals, typ with | [], _ -> [], EAnnot (body, type2scheme typ) | formal :: formals, TypArrow (targ, tres) -> let formals, body = annotate formals body tres in PAnnot (formal, targ) :: formals, body | _ :: _, _ -> (* Type annotation has insufficient arity. *) assert false in (* The heart of the inliner: rewriting a function call to a [let] expression. If there was a type annotation at the function definition site, it is dropped, provided [--infer] was enabled. Otherwise, it is kept, because, due to the presence of [EMagic] expressions in the code, dropping a type annotation could cause an ill-typed program to become apparently well-typed. Keeping a type annotation requires taking a fresh instance of the type scheme, because OCaml doesn't have support for locally and existentially bound type variables. Yuck. *) let inline formals actuals body oscheme = assert (List.length actuals = List.length formals); match oscheme with | Some scheme when not Settings.infer -> let formals, body = annotate formals body (instance scheme) in mlet formals actuals body | _ -> mlet formals actuals body in (* Look for occurrences of identifiers inside expressions, branches, etc. and replace them with their definitions if they have only one use site or if their definitions are sufficiently simple. *) let o = object (self) inherit [ StringSet.t ] Traverse.map as super inherit locals table method! eapp locals e actuals = match e with | EVar id when (Hashtbl.mem table id) && (* a global identifier *) (not (StringSet.mem id locals)) (* not hidden by a local identifier *) -> let _, def = Hashtbl.find table id in (* cannot fail, thanks to the above check *) let formals, body, oscheme = match def with | { valval = EFun (formals, body) } -> formals, body, None | { valval = EAnnot (EFun (formals, body), scheme) } -> formals, body, Some scheme | { valval = _ } -> (* The definition is not a function definition. This should not happen in the kind of code that we generate. *) assert false in assert (StringMap.mem id usage); if StringMap.find id usage = 1 || is_simple_app body then (* The definition can be inlined, with beta reduction. *) inline formals (self#exprs locals actuals) (EComment (id, self#expr locals body)) oscheme else begin (* The definition cannot be inlined. *) enqueue def; super#eapp locals e actuals end | _ -> (* The thing in function position is not a reference to a global. *) super#eapp locals e actuals end in (* Initialize the queue with all public definitions, and work from there. *) List.iter (function { valpublic = public } as def -> if public then enqueue def ) defs; let valdefs = Misc.qfold (fun defs def -> o#valdef StringSet.empty def :: defs ) [] queue in Error.logC 1 (fun f -> Printf.fprintf f "%d functions before inlining, %d functions after inlining.\n" before (List.length valdefs)); Time.tick "Inlining"; valdefs (* Dumb recursive traversal. *) let rec inline_structure_item item = match item with | SIValDefs (true, defs) -> (* A nest of recursive definitions. Act on it. *) SIValDefs (true, inline_valdefs defs) | SIFunctor (params, s) -> SIFunctor (params, inline_structure s) | SIModuleDef (name, e) -> SIModuleDef (name, inline_modexpr e) | SIInclude e -> SIInclude (inline_modexpr e) | SIExcDefs _ | SITypeDefs _ | SIValDefs (false, _) | SIStretch _ | SIComment _ -> item and inline_structure s = List.map inline_structure_item s and inline_modexpr = function | MVar x -> MVar x | MStruct s -> MStruct (inline_structure s) | MApp (e1, e2) -> MApp (inline_modexpr e1, inline_modexpr e2) (* The external entry point. *) let inline (p : program) : program = if Settings.code_inlining then inline_structure p else p menhir-20171222/src/unparameterizedSyntax.ml0000664000175000017500000001307213217215730021245 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Syntax (* This is the abstract syntax for an unparameterized grammar, that is, a grammar that does not have any parameterized nonterminal symbols. Such a grammar is obtained as the result of an expansion phase, which is implemented in [ParameterizedGrammar]. *) (* In an unparameterized grammar, %attribute declarations can be desugared away. This is also done during the above-mentioned expansion phase. Thus, in an unparameterized grammar, attributes can be attached in the following places: - with the grammar: field [gr_attributes] of [grammar] - with a terminal symbol: field [tk_attributes] of [token_properties] - with a nonterminal symbol: field [attributes] of [rule] - with a producer: field [producer_attributes] of [producer] *) (* ------------------------------------------------------------------------ *) (* A producer is a pair of identifier and a symbol. In concrete syntax, it could be [e = expr], for instance. It carries a number of attributes. *) type producer = { producer_identifier : identifier; producer_symbol : symbol; producer_attributes : attributes; } type producers = producer list (* ------------------------------------------------------------------------ *) (* A branch contains a series of producers and a semantic action. It is the same as in the surface syntax; see [Syntax]. *) type branch = { branch_position : Positions.t; producers : producers; action : action; branch_prec_annotation : branch_prec_annotation; branch_production_level : branch_production_level } (* ------------------------------------------------------------------------ *) (* A rule consists mainly of several brahches. In contrast with the surface syntax, it has no parameters. *) (* The [%inline] flag is no longer relevant after [NonTerminalInlining]. *) type rule = { branches : branch list; positions : Positions.t list; inline_flag : bool; attributes : attributes; } (* ------------------------------------------------------------------------ *) (* A grammar is essentially the same as in the surface syntax; see [Syntax]. The main difference is that [%attribute] declarations, represented by the field [p_symbol_attributes] in the surface syntax, have disappeared. *) type grammar = { preludes : Stretch.t list; postludes : Syntax.postlude list; parameters : Stretch.t list; start_symbols : StringSet.t; types : Stretch.ocamltype StringMap.t; tokens : Syntax.token_properties StringMap.t; on_error_reduce : on_error_reduce_level StringMap.t; gr_attributes : attributes; rules : rule StringMap.t; } (* -------------------------------------------------------------------------- *) (* Accessors for the type [producer]. *) let producer_identifier { producer_identifier } = producer_identifier let producer_symbol { producer_symbol } = producer_symbol let producer_attributes { producer_attributes } = producer_attributes (* -------------------------------------------------------------------------- *) (* [tokens grammar] is a list of all (real) tokens in the grammar [grammar]. The special tokens "#" and "error" are not included. Pseudo-tokens (used in %prec declarations, but never declared using %token) are filtered out. *) let tokens grammar = StringMap.fold (fun token properties tokens -> if properties.tk_is_declared then token :: tokens else tokens ) grammar.tokens [] (* [typed_tokens grammar] is analogous, but includes the OCaml type of each token. *) let typed_tokens grammar = StringMap.fold (fun token properties tokens -> if properties.tk_is_declared then (token, properties.tk_ocamltype) :: tokens else tokens ) grammar.tokens [] (* [nonterminals grammar] is a list of all nonterminal symbols in the grammar [grammar]. *) let nonterminals grammar : nonterminal list = StringMap.fold (fun nt _ rules -> nt :: rules) grammar.rules [] (* [ocamltype_of_symbol grammar symbol] produces the OCaml type of the symbol [symbol] in the grammar [grammar], if it is known. *) let ocamltype_of_symbol grammar symbol : Stretch.ocamltype option = try Some (StringMap.find symbol grammar.types) with Not_found -> None (* [ocamltype_of_start_symbol grammar symbol] produces the OCaml type of the start symbol [symbol] in the grammar [grammar]. *) let ocamltype_of_start_symbol grammar symbol : Stretch.ocamltype = try StringMap.find symbol grammar.types with Not_found -> (* Every start symbol should have a type. *) assert false menhir-20171222/src/listMonad.ml0000664000175000017500000000344413217215730016573 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) type 'a m = 'a list let return x = [ x ] let bind l f = List.flatten (List.map f l) let ( >>= ) l f = bind l f (* 1. (return x) >>= f == f x bind [ x ] f = List.flatten (List.map f [ x ]) = f x 2. m >>= return == m bind l return = List.flatten (List.map (fun x -> [ x ]) (x1::x2::..::xn)) = List.flatten ([x1]::...::[xn]) = x1::...::xn = l 3. (m >>= f) >>= g == m >>= (\x -> f x >>= g) bind (bind l f) g = List.flatten (List.map g (List.flatten (List.map f (x1::...::xn)))) = List.flatten (List.map g (f x1 :: f x2 :: ... :: f xn)) = List.flatten (List.map g ([fx1_1; fx1_2 ... ] :: [fx2_1; ... ] :: ...)) = List.flatten ([ g fx1_1; g fx_1_2 ... ] :: [ g fx_2_1; ... ] ...) = List.flatten (List.map (fun x -> List.flatten (List.map g (f x))) l) = bind l (fun x -> bind (f x) g) *) menhir-20171222/src/conflict.ml0000664000175000017500000004365313217215730016450 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar let () = if Settings.graph then DependencyGraph.print_dependency_graph() (* artificial dependency *) (* -------------------------------------------------------------------------- *) (* Our output channel. *) let out = lazy (open_out (Settings.base ^ ".conflicts")) (* -------------------------------------------------------------------------- *) (* Explaining shift actions. *) (* The existence of a shift action stems from the existence of a shift item in the LR(0) core that underlies the LR(1) state of interest. That is, lookahead sets are not relevant. The existence of a shift item in the LR(0) core is explained by finding a path from a start item to the shift item in the LR(0) nondeterministic automaton, such that the symbols read along this path form the (previously fixed) symbol string that leads to the conflict state in the LR(1) automaton. There may be several such paths: a shortest one is chosen. There may also be several shift items in the conflict state: an arbitrary one is chosen. I believe it would not be interesting to display traces for several shift items: they would be identical except in their last line (where the desired shift item actually appears). *) (* Symbolic execution of the nondeterministic LR(0) automaton. *) (* Configurations are pairs of an LR(0) item and an offset into the input string, which indicates how much has been read so far. *) type configuration0 = Item.t * int (* This function builds a derivation out of a (nonempty, reversed) sequence of configurations. The derivation is constructed from bottom to top, that is, beginning at the last configuration and moving back towards to the start configuration. *) let rec follow derivation offset' = function | [] -> assert (offset' = 0); derivation | (item, offset) :: configs -> let _, _, rhs, pos, _ = Item.def item in let derivation = if offset = offset' then (* This is an epsilon transition. Put a new root node on top of the existing derivation. *) Derivation.build pos rhs derivation None else (* This was a shift transition. Tack symbol in front of the forest. *) Derivation.prepend rhs.(pos) derivation in follow derivation offset configs (* Symbolic execution begins with a start item (corresponding to one of the automaton's entry nodes), a fixed string of input symbols, to be fully consumed, and a goal item. The objective is to find a path through the automaton that leads from the start configuration [(start, 0)] to the goal configuration [(stop, n)], where [n] is the length of the input string. The automaton is explored via breadth-first search. A hash table is used to record which configurations have been visited and to build a spanning tree of shortest paths. *) exception Done let explain_shift_item (start : Item.t) (input : Symbol.t array) (stop : Item.t) : Derivation.t = let n = Array.length input in let table : (configuration0, configuration0 option) Hashtbl.t = Hashtbl.create 1023 in let queue : configuration0 Queue.t = Queue.create() in let enqueue ancestor config = try let _ = Hashtbl.find table config in () with Not_found -> Hashtbl.add table config ancestor; Queue.add config queue in enqueue None (start, 0); try Misc.qiter (function (item, offset) as config -> (* If the item we're looking at is the goal item and if we have read all of the input symbols, stop. *) if (Item.equal item stop) && (offset = n) then raise Done; (* Otherwise, explore the transitions out of this item. *) let prod, _, rhs, pos, length = Item.def item in (* Shift transition, followed only if the symbol matches the symbol found in the input string. *) if (pos < length) && (offset < n) && (Symbol.equal rhs.(pos) input.(offset)) then begin let config' = (Item.import (prod, pos+1), offset+1) in enqueue (Some config) config' end; (* Epsilon transitions. *) if pos < length then match rhs.(pos) with | Symbol.N nt -> Production.iternt nt (fun prod -> let config' = (Item.import (prod, 0), offset) in enqueue (Some config) config' ) | Symbol.T _ -> () ) queue; assert false with Done -> (* We have found a (shortest) path from the start configuration to the goal configuration. Turn it into an explicit derivation. *) let configs = Misc.materialize table (stop, n) in let _, _, rhs, pos, _ = Item.def stop in let derivation = Derivation.tail pos rhs in let derivation = follow derivation n configs in derivation (* -------------------------------------------------------------------------- *) (* Explaining reduce actions. *) (* The existence of a reduce action stems from the existence of a reduce item, whose lookahead set contains the token of interest, in the state of interest. Here, lookahead sets are relevant only insofar as they contain or do not contain the token of interest -- in other words, lookahead sets can be abstracted by Boolean values. The existence of the reduce item is explained by finding a path from a start item to the reduce item in the LR(1) nondeterministic automaton, such that the symbols read along this path form the (previously fixed) symbol string that leads to the conflict state in the LR(1) automaton. There may be several such paths: a shortest one is chosen. *) (* Symbolic execution of the nondeterministic LR(1) automaton. *) (* Configurations are pairs of an LR(1) item and an offset into the input string, which indicates how much has been read so far. An LR(1) item is itself represented as the combination of an LR(0) item and a Boolean flag, telling whether the token of interest appears or does not appear in the lookahead set. *) type configuration1 = Item.t * bool * int (* This function builds a derivation out of a sequence of configurations. The end of the sequence is dealt with specially -- we want to explain how the lookahead symbol appears and is inherited. Once that is done, the rest (that is, the beginning) of the derivation is dealt with as above. *) let config1toconfig0 (item, _, offset) = (item, offset) let rec follow1 tok derivation offset' = function | [] -> assert (Terminal.equal tok Terminal.sharp); (* One could emit a comment saying that the lookahead token is initially [#]. That comment would have to be displayed above the derivation, though, and there is no support for that at the moment, so let's skip it. *) derivation | (item, _, offset) :: configs -> let prod, _, rhs, pos, length = Item.def item in if offset = offset' then (* This is an epsilon transition. Attack a new line and add a comment that explains why the lookahead symbol is produced or inherited. *) let nullable, first = Analysis.nullable_first_prod prod (pos + 1) in if TerminalSet.mem tok first then (* The lookahead symbol is produced (and perhaps also inherited, but let's ignore that). *) let e = Analysis.explain_first_rhs tok rhs (pos + 1) in let comment = "lookahead token appears" ^ (if e = "" then "" else " because " ^ e) in let derivation = Derivation.build pos rhs derivation (Some comment) in (* Print the rest of the derivation without paying attention to the lookahead symbols. *) follow derivation offset (List.map config1toconfig0 configs) else begin (* The lookahead symbol is not produced, so it is definitely inherited. *) assert nullable; let comment = "lookahead token is inherited" ^ (if pos + 1 < length then Printf.sprintf " because %scan vanish" (Symbol.printao (pos + 1) rhs) else "") in let derivation = Derivation.build pos rhs derivation (Some comment) in follow1 tok derivation offset configs end else (* This is a shift transition. Tack symbol in front of forest. *) let derivation = Derivation.prepend rhs.(pos) derivation in follow1 tok derivation offset configs (* Symbolic execution is performed in the same manner as above. *) let explain_reduce_item (tok : Terminal.t) (start : Item.t) (input : Symbol.t array) (stop : Item.t) : Derivation.t = let n = Array.length input in let table : (configuration1, configuration1 option) Hashtbl.t = Hashtbl.create 1023 in let queue : configuration1 Queue.t = Queue.create() in let enqueue ancestor config = try let _ = Hashtbl.find table config in () with Not_found -> Hashtbl.add table config ancestor; Queue.add config queue in (* If the lookahead token is #, then it initially appear in the lookahead set, otherwise it doesn't. *) enqueue None (start, Terminal.equal tok Terminal.sharp, 0); try Misc.qiter (function (item, lookahead, offset) as config -> (* If the item we're looking at is the goal item and if we have read all of the input symbols, stop. *) if (Item.equal item stop) && lookahead && (offset = n) then raise Done; (* Otherwise, explore the transitions out of this item. *) let prod, _nt, rhs, pos, length = Item.def item in (* Shift transition, followed only if the symbol matches the symbol found in the input string. *) if (pos < length) && (offset < n) && (Symbol.equal rhs.(pos) input.(offset)) then begin let config' = (Item.import (prod, pos+1), lookahead, offset+1) in enqueue (Some config) config' end; (* Epsilon transitions. *) if pos < length then match rhs.(pos) with | Symbol.N nt -> let nullable, first = Analysis.nullable_first_prod prod (pos + 1) in let first : bool = TerminalSet.mem tok first in let lookahead' = if nullable then first || lookahead else first in Production.iternt nt (fun prod -> let config' = (Item.import (prod, 0), lookahead', offset) in enqueue (Some config) config' ) | Symbol.T _ -> () ) queue; assert false with Done -> (* We have found a (shortest) path from the start configuration to the goal configuration. Turn it into an explicit derivation. *) let configs = Misc.materialize table (stop, true, n) in let derivation = Derivation.empty in let derivation = follow1 tok derivation n configs in derivation (* -------------------------------------------------------------------------- *) (* Putting it all together. *) let () = if Settings.explain then begin Lr1.conflicts (fun toks node -> try (* Construct a partial LR(1) automaton, looking for a conflict in a state that corresponds to this node. Because Pager's algorithm can merge two states as soon as one of them has a conflict, we can't be too specific about the conflict that we expect to find in the canonical automaton. So, we must supply a set of conflict tokens and accept any kind of conflict that involves one of them. *) (* TEMPORARY with the new compatibility criterion, we can be sure that every conflict token is indeed involved in a conflict. Exploit that? Avoid focusing on a single token? *) let module P = Lr1partial.Run (struct let tokens = toks let goal = node end) in let closure = Lr0.closure P.goal in (* Determine what kind of conflict was found. *) let shift, reduce = Item.Map.fold (fun item toks (shift, reduce) -> match Item.classify item with | Item.Shift (Symbol.T tok, _) when Terminal.equal tok P.token -> shift + 1, reduce | Item.Reduce _ when TerminalSet.mem P.token toks -> shift, reduce + 1 | _ -> shift, reduce ) closure (0, 0) in let kind = if (shift > 0) && (reduce > 1) then "shift/reduce/reduce" else if (shift > 0) then "shift/reduce" else "reduce/reduce" in (* Explain how the conflict state is reached. *) let out = Lazy.force out in Printf.fprintf out "\n\ ** Conflict (%s) in state %d.\n\ ** Token%s involved: %s\n%s\ ** This state is reached from %s after reading:\n\n%s\n" kind (Lr1.number node) (if TerminalSet.cardinal toks > 1 then "s" else "") (TerminalSet.print toks) (if TerminalSet.cardinal toks > 1 then Printf.sprintf "** The following explanations concentrate on token %s.\n" (Terminal.print P.token) else "") (Nonterminal.print false (Item.startnt P.source)) (Symbol.printa P.path); (* Examine the items in that state, focusing on one particular token. Out of the shift items, we explain just one -- this seems enough. We explain each of the reduce items. *) (* First, build a mapping of items to derivations. *) let (_ : bool), derivations = Item.Map.fold (fun item toks (still_looking_for_shift_item, derivations) -> match Item.classify item with | Item.Shift (Symbol.T tok, _) when still_looking_for_shift_item && (Terminal.equal tok P.token) -> false, let derivation = explain_shift_item P.source P.path item in Item.Map.add item derivation derivations | Item.Reduce _ when TerminalSet.mem P.token toks -> still_looking_for_shift_item, let derivation = explain_reduce_item P.token P.source P.path item in Item.Map.add item derivation derivations | _ -> still_looking_for_shift_item, derivations ) closure (true, Item.Map.empty) in (* Factor out the common context among all derivations, so as to avoid repeating it. This helps prevent derivation trees from drifting too far away towards the right. It also helps produce sub-derivations that are quite compact. *) let context, derivations = Derivation.factor derivations in (* Display the common context. *) Printf.fprintf out "\n** The derivations that appear below have the following common factor:\ \n** (The question mark symbol (?) represents the spot where the derivations begin to differ.)\n\n"; Derivation.printc out context; (* Then, display the sub-derivations. *) Item.Map.iter (fun item derivation -> Printf.fprintf out "\n** In state %d, looking ahead at %s, " (Lr1.number node) (Terminal.print P.token); begin match Item.classify item with | Item.Shift _ -> Printf.fprintf out "shifting is permitted\n** because of the following sub-derivation:\n\n" | Item.Reduce prod -> Printf.fprintf out "reducing production\n** %s\n** is permitted because of the following sub-derivation:\n\n" (Production.print prod) end; Derivation.print out derivation ) derivations; flush out with Lr1partial.Oops -> (* Ha ha! We were unable to explain this conflict. This could happen because the automaton was butchered by conflict resolution directives, or because [--lalr] was enabled and we have unexplainable LALR conflicts. Anyway, send the error message to the .conflicts file and continue. *) let out = Lazy.force out in Printf.fprintf out "\n\ ** Conflict (unexplainable) in state %d.\n\ ** Token%s involved: %s\n\ ** %s.\n%!" (Lr1.number node) (if TerminalSet.cardinal toks > 1 then "s" else "") (TerminalSet.print toks) (match Settings.construction_mode with | Settings.ModeLALR -> "This may be an artificial conflict caused by your use of --lalr" | Settings.ModeCanonical | Settings.ModeInclusionOnly | Settings.ModePager -> "Please send your grammar to Menhir's developers" ) ); Time.tick "Explaining conflicts" end (* ------------------------------------------------------------------------ *) (* Resolve the conflicts that remain in the automaton. *) let () = Lr1.default_conflict_resolution(); Time.tick "Resolving remaining conflicts" (* ------------------------------------------------------------------------ *) (* Now is as good a time as any to add extra reductions, if requested by the user. This must be done after conflicts have been resolved. *) let () = Lr1.extra_reductions(); Time.tick "Adding extra reductions" menhir-20171222/src/parserAux.ml0000664000175000017500000000774013217215730016616 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Positions open Syntax type early_producer = Positions.t * identifier located option * parameter * attributes type early_producers = early_producer list type early_production = early_producers * string located option * (* optional precedence *) branch_production_level * Positions.t type early_productions = early_production list let new_precedence_level = let c = ref 0 in fun pos1 pos2 -> incr c; PrecedenceLevel (InputFile.get_input_file (), !c, pos1, pos2) let new_production_level = let c = ref 0 in fun () -> incr c; ProductionLevel (InputFile.get_input_file (), !c) let new_on_error_reduce_level = new_production_level (* the counter is shared with [new_production_level], but this is irrelevant *) module IdSet = Set.Make (struct type t = identifier located let compare id1 id2 = compare (value id1) (value id2) end) let defined_identifiers (_, ido, _, _) accu = Option.fold IdSet.add ido accu let defined_identifiers (producers : early_producers) = List.fold_right defined_identifiers producers IdSet.empty let check_production_group (right_hand_sides : early_productions) = match right_hand_sides with | [] -> (* A production group cannot be empty. *) assert false | (producers, _, _, _) :: right_hand_sides -> let ids = defined_identifiers producers in List.iter (fun (producers, _, _, _) -> let ids' = defined_identifiers producers in try let id = IdSet.choose (IdSet.union (IdSet.diff ids ids') (IdSet.diff ids' ids)) in Error.error [Positions.position id] "two productions that share a semantic action must define exactly\n\ the same identifiers. Here, \"%s\" is defined\n\ in one production, but not in all of them." (Positions.value id) with Not_found -> () ) right_hand_sides (* [normalize_producer i p] assigns a name of the form [_i] to the unnamed producer [p]. *) let normalize_producer i (pos, opt_identifier, parameter, attrs) = let id = match opt_identifier with | Some id -> id | None -> Positions.with_pos pos ("_" ^ string_of_int (i + 1)) in (id, parameter, attrs) let normalize_producers (producers : early_producers) : producer list = List.mapi normalize_producer producers let override pos o1 o2 = match o1, o2 with | Some _, Some _ -> Error.signal [ pos ] "this production carries two %%prec declarations."; o2 | None, Some _ -> o2 | _, None -> o1 (* Only unnamed producers can be referred to using positional identifiers. Besides, such positions must be taken in the interval [1 .. List.length producers]. The output array [p] is such that [p.(idx) = Some x] if [idx] must be referred to using [x], not [$(idx + 1)]. *) let producer_names (producers : early_producers) = producers |> List.map (fun (_, oid, _, _) -> Option.map Positions.value oid) |> Array.of_list menhir-20171222/src/lr1.ml0000664000175000017500000012675713217215730015354 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar module S = Slr (* artificial dependency; ensures that [Slr] runs first *) (* This module constructs an LR(1) automaton by following Pager's method, that is, by merging states on the fly when they are weakly compatible. *) (* ------------------------------------------------------------------------ *) (* Nodes. *) type node = { (* A node number, assigned during construction. *) raw_number: int; (* A node number, assigned after conflict resolution has taken place and after inacessible nodes have been removed. This yields sequential numbers, from the client's point of view. *) mutable number: int; (* Each node is associated with a state. This state can change during construction as nodes are merged. *) mutable state: Lr0.lr1state; (* Each node carries information about its outgoing transitions and about its reductions. *) mutable transitions: node SymbolMap.t; mutable reductions: Production.index list TerminalMap.t; (* Tokens for which there are several possible behaviors are conflict tokens. *) mutable conflict_tokens: TerminalSet.t; (* Transitions are also stored in reverse, so as to allow reverse traversals of the automaton. *) mutable predecessors: node list; (* Transient marks are used during construction and traversal. *) mutable mark: Mark.t; (* (New as of 2012/01/23.) This flag records whether a shift/reduce conflict in this node was solved in favor of neither (%nonassoc). This is later used to forbid a default reduction at this node. *) mutable forbid_default_reduction: bool; } module Node = struct type t = node let compare node1 node2 = node1.number - node2.number end module NodeSet = Set.Make (Node) module NodeMap = Map.Make (Node) (* ------------------------------------------------------------------------ *) (* Output debugging information if [--follow-construction] is enabled. *) let follow_transition (again : bool) (source : node) (symbol : Symbol.t) (state : Lr0.lr1state) = if Settings.follow then Printf.fprintf stderr "%s transition out of state r%d along symbol %s.\nProposed target state:\n%s" (if again then "Re-examining" else "Examining") source.raw_number (Symbol.print symbol) (Lr0.print_closure "" state) let follow_state (msg : string) (node : node) (print : bool) = if Settings.follow then Printf.fprintf stderr "%s: r%d.\n%s\n" msg node.raw_number (if print then Lr0.print_closure "" node.state else "") (* ------------------------------------------------------------------------ *) (* The following two mutually recursive functions are invoked when the state associated with an existing node grows. The node's descendants are examined and grown into a fixpoint is reached. This work is performed in an eager manner: we do not attempt to build any new transitions until all existing nodes have been suitably grown. Indeed, building new transitions requires making merging decisions, and such decisions cannot be made on a sound basis unless all existing nodes have been suitably grown. Otherwise, one could run into a dead end where two successive, incompatible merging decisions are made, because the consequences of the first decision (growing descendant nodes) were not made explicit before the second decision was taken. This was a bug in versions of Menhir ante 20070520. Although I wrote this code independently, I later found out that it seems quite similar to the code in Karl Schimpf's Ph.D. thesis (1981), page 35. It is necessary that all existing transitions be explicit before the [grow] functions are called. In other words, if it has been decided that there will be a transition from [node1] to [node2], then [node1.transitions] must be updated before [grow] is invoked. *) (* [grow node state] grows the existing node [node], if necessary, so that its associated state subsumes [state]. If this represents an actual (strict) growth, then [node]'s descendants are grown as well. *) let rec grow node state = if Lr0.subsume state node.state then follow_state "Target state is unaffected" node false else begin (* In versions of Menhir prior to June 2008, I wrote this: If I know what I am doing, then the new state that is being merged into the existing state should be compatible, in Pager's sense, with the existing node. In other words, compatibility should be preserved through transitions. and the code contained this assertion: assert (Lr0.compatible state node.state); assert (Lr0.eos_compatible state node.state); However, this was wrong. See, for instance, the sample grammars cocci.mly and boris-mini.mly. The problem is particularly clearly apparent in boris-mini.mly, where it only involves inclusion of states -- the definition of Pager's weak compatibility does not enter the picture. Here is, roughly, what is going on. Assume we have built some state A, which, along some symbol S, has a transition to itself. This means, in fact, that computing the successor of A along S yields a *subset* of A, that is, succ(A, S) <= A. Then, we wish to build a new state A', which turns out to be a superset of A, so we decide to grow A. (The fact that A is a subset of A' implies that A and A' are Pager-compatible.) As per the code below, we immediately update the state A in place, to become A'. Then, we inspect the transition along symbol S. We find that the state succ(A', S) must be merged into A'. In this situation, the assertions above require succ(A', S) to be compatible with A'. However, this is not necessarily the case. By monotonicity of succ, we do have succ(A, S) <= succ(A', S). But nothing says that succ(A', S) are related with respect to inclusion, or even Pager-compatible. The grammar in boris-mini.mly shows that they are not. *) (* Grow [node]. *) node.state <- Lr0.union state node.state; follow_state "Growing existing state" node true; (* Grow [node]'s successors. *) grow_successors node end (* [grow_successors node] grows [node]'s successors. *) (* Note that, if there is a cycle in the graph, [grow_successors] can be invoked several times at a single node [node], with [node.state] taking on a new value every time. In such a case, this code should be correct, although probably not very efficient. *) and grow_successors node = SymbolMap.iter (fun symbol (successor_node : node) -> let successor_state = Lr0.transition symbol node.state in follow_transition true node symbol successor_state; grow successor_node successor_state ) node.transitions (* ------------------------------------------------------------------------ *) (* Data structures maintained during the construction of the automaton. *) (* A queue of pending nodes, whose outgoing transitions have not yet been built. *) let queue : node Queue.t = Queue.create() (* A mapping of LR(0) node numbers to lists of nodes. This allows us to efficiently find all existing nodes that are core-compatible with a newly found state. *) let map : node list array = Array.make Lr0.n [] (* A counter that allows assigning raw numbers to nodes. *) let num = ref 0 (* ------------------------------------------------------------------------ *) (* [create state] creates a new node that stands for the state [state]. It is expected that [state] does not subsume, and is not subsumed by, any existing state. *) let create (state : Lr0.lr1state) : node = (* Allocate a new node. *) let node = { state = state; transitions = SymbolMap.empty; reductions = TerminalMap.empty; conflict_tokens = TerminalSet.empty; raw_number = Misc.postincrement num; number = 0; (* temporary placeholder *) mark = Mark.none; predecessors = []; forbid_default_reduction = false; } in (* Update the mapping of LR(0) cores to lists of nodes. *) let k = Lr0.core state in assert (k < Lr0.n); map.(k) <- node :: map.(k); (* Enqueue this node for further examination. *) Queue.add node queue; (* Debugging output. *) follow_state "Creating a new state" node false; (* Return the freshly created node. *) node (* ------------------------------------------------------------------------ *) (* Materializing a transition turns its target state into a (fresh or existing). There are three scenarios: the proposed new state can be subsumed by an existing state, compatible with an existing state, or neither. *) exception Subsumed of node exception Compatible of node let materialize (source : node) (symbol : Symbol.t) (target : Lr0.lr1state) : unit = try (* Debugging output. *) follow_transition false source symbol target; (* Find all existing core-compatible states. *) let k = Lr0.core target in assert (k < Lr0.n); let similar = map.(k) in (* Check whether we need to create a new node or can reuse an existing state. *) (* 20120525: the manner in which this check is performed depends on [Settings.construction_mode]. There are now three modes. *) (* 20150204: there are now four modes. *) begin match Settings.construction_mode with | Settings.ModeCanonical -> (* In a canonical automaton, two states can be merged only if they are identical. *) List.iter (fun node -> if Lr0.subsume target node.state && Lr0.subsume node.state target then raise (Subsumed node) ) similar | Settings.ModeInclusionOnly | Settings.ModePager -> (* A more aggressive approach is to take subsumption into account: if the new candidate state is a subset of an existing state, then no new node needs to be created. Furthermore, the existing state does not need to be enlarged. *) (* 20110124: require error compatibility in addition to subsumption. *) List.iter (fun node -> if Lr0.subsume target node.state && Lr0.error_compatible target node.state then raise (Subsumed node) ) similar | Settings.ModeLALR -> () end; begin match Settings.construction_mode with | Settings.ModeCanonical | Settings.ModeInclusionOnly -> () | Settings.ModePager -> (* One can be even more aggressive and check whether the existing state is compatible, in Pager's sense, with the new state. If so, there is no need to create a new state: just merge the new state into the existing one. The result is a state that may be larger than each of the two states that have been merged. *) (* 20110124: require error compatibility in addition to the existing compatibility criteria. *) List.iter (fun node -> if Lr0.compatible target node.state && Lr0.eos_compatible target node.state && Lr0.error_compatible target node.state then raise (Compatible node) ) similar | Settings.ModeLALR -> (* In LALR mode, as soon as there is one similar state -- i.e. one state that shares the same LR(0) core -- we merge the new state into the existing one. *) List.iter (fun node -> raise (Compatible node) ) similar end; (* The above checks have failed. Create a new node. Two states that are in the subsumption relation are also compatible. This implies that the newly created node does not subsume any existing states. *) source.transitions <- SymbolMap.add symbol (create target) source.transitions with | Subsumed node -> (* Join an existing target node. *) follow_state "Joining existing state" node false; source.transitions <- SymbolMap.add symbol node source.transitions | Compatible node -> (* Join and grow an existing target node. It seems important that the new transition is created before [grow_successors] is invoked, so that all transition decisions made so far are explicit. *) node.state <- Lr0.union target node.state; follow_state "Joining and growing existing state" node true; source.transitions <- SymbolMap.add symbol node source.transitions; grow_successors node (* ------------------------------------------------------------------------ *) (* The actual construction process. *) (* Populate the queue with the start nodes and store them in an array. *) let entry : node ProductionMap.t = ProductionMap.map (fun (k : Lr0.node) -> create (Lr0.start k) ) Lr0.entry (* Pick a node in the queue, that is, a node whose transitions have not yet been built. Build these transitions, and continue. *) (* Note that building a transition can cause existing nodes to grow, so [node.state] is not necessarily invariant throughout the inner loop. *) let () = Misc.qiter (fun node -> List.iter (fun symbol -> materialize node symbol (Lr0.transition symbol node.state) ) (Lr0.outgoing_symbols (Lr0.core node.state)) ) queue (* Record how many nodes were constructed. *) let n = !num let () = Error.logA 1 (fun f -> Printf.fprintf f "Built an LR(1) automaton with %d states.\n" !num) (* ------------------------------------------------------------------------ *) (* A mapping of symbols to lists of nodes that admit this incoming symbol. This mapping is constructed by [visit] below. *) let incoming : node list SymbolMap.t ref = ref SymbolMap.empty let lookup_incoming symbol = try SymbolMap.find symbol !incoming with Not_found -> [] let record_incoming osymbol target = Option.iter (fun symbol -> let targets = lookup_incoming symbol in incoming := SymbolMap.add symbol (target :: targets) !incoming ) osymbol (* ------------------------------------------------------------------------ *) (* We now perform one depth-first traversal of the automaton, recording predecessor edges, numbering nodes, sorting nodes according to their incoming symbol, building reduction tables, and finding out which nodes have conflicts. *) (* A count of all nodes. *) let () = num := 0 (* A list of all nodes. *) let nodes : node list ref = ref [] (* A list of nodes with conflicts. *) let conflict_nodes : node list ref = ref [] (* Counts of nodes with shift/reduce and reduce/reduce conflicts. *) let shift_reduce = ref 0 let reduce_reduce = ref 0 (* Count of the shift/reduce conflicts that could be silently resolved. *) let silently_solved = ref 0 (* Go ahead. *) let () = let marked = Mark.fresh() in let rec visit osymbol node = if not (Mark.same node.mark marked) then begin node.mark <- marked; nodes := node :: !nodes; record_incoming osymbol node; (* Number this node. *) let number = !num in num := number + 1; node.number <- number; (* Insertion of a new reduce action into the table of reductions. *) let addl prod tok reductions = let prods = try TerminalMap.lookup tok reductions with Not_found -> [] in TerminalMap.add tok (prod :: prods) reductions in (* Build the reduction table. Here, we gather all potential reductions, without attempting to solve shift/reduce conflicts on the fly, because that would potentially hide shift/reduce/reduce conflicts, which we want to be aware of. *) let reductions = List.fold_left (fun reductions (toks, prod) -> TerminalSet.fold (addl prod) toks reductions ) TerminalMap.empty (Lr0.reductions node.state) in (* Detect conflicts. Attempt to solve shift/reduce conflicts when unambiguously allowed by priorities. *) let has_shift_reduce = ref false and has_reduce_reduce = ref false in node.reductions <- TerminalMap.fold (fun tok prods reductions -> if SymbolMap.mem (Symbol.T tok) node.transitions then begin (* There is a transition in addition to the reduction(s). We have (at least) a shift/reduce conflict. *) assert (not (Terminal.equal tok Terminal.sharp)); match prods with | [] -> assert false | [ prod ] -> begin (* This is a single shift/reduce conflict. If priorities tell us how to solve it, we follow that and modify the automaton. *) match Precedence.shift_reduce tok prod with | Precedence.ChooseShift -> (* Suppress the reduce action. *) incr silently_solved; reductions | Precedence.ChooseReduce -> (* Record the reduce action and suppress the shift transition. The automaton is modified in place. This can have the subtle effect of making some nodes unreachable. Any conflicts in these nodes will then be ignored (as they should be). *) incr silently_solved; node.transitions <- SymbolMap.remove (Symbol.T tok) node.transitions; TerminalMap.add tok prods reductions | Precedence.ChooseNeither -> (* Suppress the reduce action and the shift transition. *) incr silently_solved; node.transitions <- SymbolMap.remove (Symbol.T tok) node.transitions; node.forbid_default_reduction <- true; reductions | Precedence.DontKnow -> (* Priorities don't allow concluding. Record the existence of a shift/reduce conflict. *) node.conflict_tokens <- Grammar.TerminalSet.add tok node.conflict_tokens; has_shift_reduce := true; TerminalMap.add tok prods reductions end | _prod1 :: _prod2 :: _ -> (* This is a shift/reduce/reduce conflict. If the priorities are such that each individual shift/reduce conflict is solved in favor of shifting or in favor of neither, then solve the entire composite conflict in the same way. Otherwise, report the conflict. *) let choices = List.map (Precedence.shift_reduce tok) prods in if List.for_all (fun choice -> match choice with | Precedence.ChooseShift -> true | _ -> false ) choices then begin (* Suppress the reduce action. *) silently_solved := !silently_solved + List.length prods; reductions end else if List.for_all (fun choice -> match choice with | Precedence.ChooseNeither -> true | _ -> false ) choices then begin (* Suppress the reduce action and the shift transition. *) silently_solved := !silently_solved + List.length prods; node.transitions <- SymbolMap.remove (Symbol.T tok) node.transitions; reductions end else begin (* Record a shift/reduce/reduce conflict. Keep all reductions. *) node.conflict_tokens <- Grammar.TerminalSet.add tok node.conflict_tokens; has_shift_reduce := true; has_reduce_reduce := true; TerminalMap.add tok prods reductions end end else let () = match prods with | [] | [ _ ] -> () | _prod1 :: _prod2 :: _ -> (* There is no transition in addition to the reduction(s). We have a pure reduce/reduce conflict. Do nothing about it at this point. *) node.conflict_tokens <- Grammar.TerminalSet.add tok node.conflict_tokens; has_reduce_reduce := true in TerminalMap.add tok prods reductions ) reductions TerminalMap.empty; (* Record statistics about conflicts. *) if not (TerminalSet.is_empty node.conflict_tokens) then begin conflict_nodes := node :: !conflict_nodes; if !has_shift_reduce then incr shift_reduce; if !has_reduce_reduce then incr reduce_reduce end; (* Continue the depth-first traversal. Record predecessors edges as we go. No ancestor appears twice in a list of predecessors, because two nodes cannot be related by two edges that carry distinct symbols. *) SymbolMap.iter (fun symbol son -> son.predecessors <- node :: son.predecessors; visit (Some symbol) son ) node.transitions end in ProductionMap.iter (fun _ node -> visit None node) entry let nodes = List.rev !nodes (* list is now sorted by increasing node numbers *) let conflict_nodes = !conflict_nodes let () = if !silently_solved = 1 then Error.logA 1 (fun f -> Printf.fprintf f "One shift/reduce conflict was silently solved.\n") else if !silently_solved > 1 then Error.logA 1 (fun f -> Printf.fprintf f "%d shift/reduce conflicts were silently solved.\n" !silently_solved); if !num < n then Error.logA 1 (fun f -> Printf.fprintf f "Only %d states remain after resolving shift/reduce conflicts.\n" !num) let () = Grammar.diagnostics() let n = !num let forbid_default_reduction node = node.forbid_default_reduction (* ------------------------------------------------------------------------ *) (* The incoming symbol of a node can be computed by going through its LR(0) core. For this reason, we do not need to explicitly record it here. *) let incoming_symbol node = Lr0.incoming_symbol (Lr0.core node.state) (* ------------------------------------------------------------------------ *) (* Iteration over all nodes. *) let fold f accu = List.fold_left f accu nodes let iter f = fold (fun () node -> f node) () let map f = List.map f nodes let foldx f = fold (fun accu node -> match incoming_symbol node with | None -> accu | Some _ -> f accu node) let iterx f = iter (fun node -> match incoming_symbol node with | None -> () | Some _ -> f node) (* -------------------------------------------------------------------------- *) (* Our output channel. *) let out = lazy (open_out (Settings.base ^ ".automaton")) (* ------------------------------------------------------------------------ *) (* If requested, dump a verbose description of the automaton. *) let describe out node = Printf.fprintf out "State %d%s:\n%s" node.number (if Settings.follow then Printf.sprintf " (r%d)" node.raw_number else "") (Lr0.print "" node.state); SymbolMap.iter (fun symbol node -> Printf.fprintf out "-- On %s shift to state %d\n" (Symbol.print symbol) node.number ) node.transitions; TerminalMap.iter (fun tok prods -> List.iter (fun prod -> (* TEMPORARY factoriser les symboles qui conduisent a reduire une meme production *) Printf.fprintf out "-- On %s " (Terminal.print tok); match Production.classify prod with | Some nt -> Printf.fprintf out "accept %s\n" (Nonterminal.print false nt) | None -> Printf.fprintf out "reduce production %s\n" (Production.print prod) ) prods ) node.reductions; if not (TerminalSet.is_empty node.conflict_tokens) then Printf.fprintf out "** Conflict on %s\n" (TerminalSet.print node.conflict_tokens); Printf.fprintf out "\n%!" let () = Time.tick "Construction of the LR(1) automaton"; if Settings.dump then begin iter (describe (Lazy.force out)); Time.tick "Dumping the LR(1) automaton" end (* ------------------------------------------------------------------------ *) (* [reverse_dfs goal] performs a reverse depth-first search through the automaton, starting at node [goal], and marking the nodes traversed. It returns a function that tells whether a node is marked, that is, whether a path leads from that node to the goal node. *) let reverse_dfs goal = let mark = Mark.fresh() in let marked node = Mark.same node.mark mark in let rec visit node = if not (marked node) then begin node.mark <- mark; List.iter visit node.predecessors end in visit goal; marked (* ------------------------------------------------------------------------ *) (* Iterating over all nodes that are targets of edges carrying a certain symbol. The sources of the corresponding edges are also provided. *) let targets f accu symbol = (* There are no incoming transitions on the start symbols. *) let targets = lookup_incoming symbol in List.fold_left (fun accu target -> f accu target.predecessors target ) accu targets (* ------------------------------------------------------------------------ *) (* Converting a start node into the single item that it contains. *) let start2item node = let state : Lr0.lr1state = node.state in let core : Lr0.node = Lr0.core state in let items : Item.Set.t = Lr0.items core in assert (Item.Set.cardinal items = 1); Item.Set.choose items (* ------------------------------------------------------------------------ *) (* Accessors. *) let number node = node.number let state node = node.state let transitions node = node.transitions let reductions node = node.reductions let conflicts f = List.iter (fun node -> f node.conflict_tokens node ) conflict_nodes let predecessors node = node.predecessors (* ------------------------------------------------------------------------ *) (* This inverts a mapping of tokens to productions into a mapping of productions to sets of tokens. *) (* This is needed, in [CodeBackend], to avoid producing two (or more) separate branches that call the same [reduce] function. Instead, we generate just one branch, guarded by a [POr] pattern. *) let invert reductions : TerminalSet.t ProductionMap.t = TerminalMap.fold (fun tok prods inverse -> let prod = Misc.single prods in let toks = try ProductionMap.lookup prod inverse with Not_found -> TerminalSet.empty in ProductionMap.add prod (TerminalSet.add tok toks) inverse ) reductions ProductionMap.empty (* ------------------------------------------------------------------------ *) (* [has_beforeend s] tests whether the state [s] can reduce a production whose semantic action uses [$endpos($0)]. Note that [$startpos] and [$endpos] have been expanded away already, so we need not worry about the fact that (in an epsilon production) they expand to [$endpos($0)]. *) let has_beforeend node = TerminalMap.fold (fun _ prods accu -> accu || let prod = Misc.single prods in not (Production.is_start prod) && let action = Production.action prod in Action.has_beforeend action ) (reductions node) false (* ------------------------------------------------------------------------ *) (* Computing which terminal symbols a state is willing to act upon. One must keep in mind that, due to the merging of states, a state might be willing to perform a reduction on a certain token, yet the reduction can take us to another state where this token causes an error. In other words, the set of terminal symbols that is computed here is really an over-approximation of the set of symbols that will not cause an error. And there seems to be no way of performing an exact computation, as we would need to know not only the current state, but the contents of the stack as well. *) let acceptable_tokens (s : node) = (* If this state is willing to act on the error token, ignore it -- we do not wish to report that an error would be accepted in this state :-) *) let transitions = SymbolMap.remove (Symbol.T Terminal.error) (transitions s) and reductions = TerminalMap.remove Terminal.error (reductions s) in (* Accumulate the tokens carried by outgoing transitions. *) let covered = SymbolMap.fold (fun symbol _ covered -> match symbol with | Symbol.T tok -> TerminalSet.add tok covered | Symbol.N _ -> covered ) transitions TerminalSet.empty in (* Accumulate the tokens that permit reduction. *) let covered = ProductionMap.fold (fun _ toks covered -> TerminalSet.union toks covered ) (invert reductions) covered in (* That's it. *) covered (* ------------------------------------------------------------------------ *) (* Report statistics. *) (* Produce the reports. *) let () = if !shift_reduce = 1 then Error.grammar_warning [] "one state has shift/reduce conflicts." else if !shift_reduce > 1 then Error.grammar_warning [] "%d states have shift/reduce conflicts." !shift_reduce; if !reduce_reduce = 1 then Error.grammar_warning [] "one state has reduce/reduce conflicts." else if !reduce_reduce > 1 then Error.grammar_warning [] "%d states have reduce/reduce conflicts." !reduce_reduce (* ------------------------------------------------------------------------ *) (* If any fatal error was signaled up to this point, stop now. *) let () = if Error.errors() then exit 1 (* ------------------------------------------------------------------------ *) (* For each production, compute where (that is, in which states) this production can be reduced. This computation is done AFTER default conflict resolution (see below). It is an error to call the accessor function [production_where] before default conflict resolution has taken place. *) let production_where : NodeSet.t ProductionMap.t option ref = ref None let initialize_production_where () = production_where := Some ( fold (fun accu node -> TerminalMap.fold (fun _ prods accu -> let prod = Misc.single prods in let nodes = try ProductionMap.lookup prod accu with Not_found -> NodeSet.empty in ProductionMap.add prod (NodeSet.add node nodes) accu ) (reductions node) accu ) ProductionMap.empty ) let production_where (prod : Production.index) : NodeSet.t = match !production_where with | None -> (* It is an error to call this function before conflict resolution. *) assert false | Some production_where -> try (* Production [prod] may be reduced at [nodes]. *) let nodes = ProductionMap.lookup prod production_where in assert (not (NodeSet.is_empty nodes)); nodes with Not_found -> (* The production [prod] is never reduced. *) NodeSet.empty (* ------------------------------------------------------------------------ *) (* Warn about productions that are never reduced. *) (* These are productions that can never, ever be reduced, because there is no state that is willing to reduce them. There could be other productions that are never reduced because the only states that are willing to reduce them are unreachable. We do not report those. In fact, through the use of the inspection API, it might be possible to bring the automaton into a state where one of those productions can be reduced. *) let warn_about_productions_never_reduced () = let count = ref 0 in Production.iter (fun prod -> if NodeSet.is_empty (production_where prod) then match Production.classify prod with | Some nt -> incr count; Error.grammar_warning (Nonterminal.positions nt) "symbol %s is never accepted." (Nonterminal.print false nt) | None -> incr count; Error.grammar_warning (Production.positions prod) "production %sis never reduced." (Production.print prod) ); if !count > 0 then Error.grammar_warning [] "in total, %d productions are never reduced." !count (* ------------------------------------------------------------------------ *) (* When requested by the code generator, apply default conflict resolution to ensure that the automaton is deterministic. *) (* [best prod prods] chooses which production should be reduced among the list [prod :: prods]. It fails if no best choice exists. *) let rec best choice = function | [] -> choice | prod :: prods -> match Precedence.reduce_reduce choice prod with | Some choice -> best choice prods | None -> (* The cause for not knowing which production is best could be: 1- the productions originate in different source files; 2- they are derived, via inlining, from the same production. *) Error.signal (Production.positions choice @ Production.positions prod) "do not know how to resolve a reduce/reduce conflict\n\ between the following two productions:\n%s\n%s" (Production.print choice) (Production.print prod); choice (* dummy *) (* Go ahead. *) let default_conflict_resolution () = let shift_reduce = ref 0 and reduce_reduce = ref 0 in List.iter (fun node -> node.reductions <- TerminalMap.fold (fun tok prods reductions -> try let (_ : node) = SymbolMap.find (Symbol.T tok) node.transitions in (* There is a transition at this symbol, so this is a (possibly multiway) shift/reduce conflict. Resolve in favor of shifting by suppressing all reductions. *) shift_reduce := List.length prods + !shift_reduce; reductions with Not_found -> (* There is no transition at this symbol. Check whether we have multiple reductions. *) match prods with | [] -> assert false | [ _ ] -> TerminalMap.add tok prods reductions | prod :: ((_ :: _) as prods) -> (* We have a reduce/reduce conflict. Resolve, if possible, in favor of a single reduction. This reduction must be preferrable to each of the others. *) reduce_reduce := List.length prods + !reduce_reduce; TerminalMap.add tok [ best prod prods ] reductions ) node.reductions TerminalMap.empty ) conflict_nodes; if !shift_reduce = 1 then Error.warning [] "one shift/reduce conflict was arbitrarily resolved." else if !shift_reduce > 1 then Error.warning [] "%d shift/reduce conflicts were arbitrarily resolved." !shift_reduce; if !reduce_reduce = 1 then Error.warning [] "one reduce/reduce conflict was arbitrarily resolved." else if !reduce_reduce > 1 then Error.warning [] "%d reduce/reduce conflicts were arbitrarily resolved." !reduce_reduce; (* Now, ensure that states that have a reduce action at the pseudo-token "#" have no other action. *) let ambiguities = ref 0 in fold (fun () node -> try let prods, reductions = TerminalMap.lookup_and_remove Terminal.sharp node.reductions in let prod = Misc.single prods in (* This node has a reduce action at "#". Determine whether there exist other actions. If there exist any other actions, suppress this reduce action, and signal an ambiguity. We signal an ambiguity even in the case where all actions at this node call for reducing a single production. Indeed, in that case, even though we know that this production must be reduced, we do not know whether we should first discard the current token (and call the lexer). *) let has_ambiguity = ref false in let toks = ref TerminalSet.empty in TerminalMap.iter (fun tok _prods -> node.reductions <- reductions; has_ambiguity := true; toks := TerminalSet.add tok !toks ) reductions; SymbolMap.iter (fun symbol _ -> match symbol with | Symbol.N _ -> () | Symbol.T tok -> node.reductions <- reductions; has_ambiguity := true; toks := TerminalSet.add tok !toks ) node.transitions; if !has_ambiguity then begin incr ambiguities; if Settings.dump then begin Printf.fprintf (Lazy.force out) "State %d has an end-of-stream conflict. There is a tension between\n\ (1) %s\n\ without even requesting a lookahead token, and\n\ (2) checking whether the lookahead token is %s%s,\n\ which would require some other action.\n\n" (number node) (match Production.classify prod with | Some nt -> Printf.sprintf "accepting %s" (Nonterminal.print false nt) | None -> Printf.sprintf "reducing production %s" (Production.print prod)) (if TerminalSet.cardinal !toks > 1 then "one of " else "") (TerminalSet.print !toks) end end with Not_found -> () ) (); if !ambiguities = 1 then Error.grammar_warning [] "one state has an end-of-stream conflict." else if !ambiguities > 1 then Error.grammar_warning [] "%d states have an end-of-stream conflict." !ambiguities; (* We can now compute where productions are reduced. *) initialize_production_where(); warn_about_productions_never_reduced() (* ------------------------------------------------------------------------ *) (* Extra reductions. *) (* 2015/10/19 Original implementation. *) (* 2016/07/13 Use priority levels to choose which productions to reduce when several productions are eligible. *) (* If a state can reduce some productions whose left-hand symbol has been marked [%on_error_reduce], and if one such production [prod] is preferable to every other (according to the priority rules of [%on_error_reduce] declarations), then every error action in this state is replaced with a reduction of [prod]. This is done even though this state may have outgoing shift transitions: thus, we are forcing one interpretation of the past, among several possible interpretations. *) (* The code below looks like the decision on a default reduction in [Default], except we do not impose the absence of outgoing terminal transitions. Also, we actually modify the automaton, so the back-ends, the reference interpreter, etc., need not be aware of this feature, whereas they are aware of default reductions. *) (* This code can run before we decide on the default reductions; this does not affect which default reductions will be permitted. *) (* This code does not affect which productions can be reduced where. Thus, it is OK for it to run after [initialize_production_where()]. *) (* A count of how many states receive extra reductions through this mechanism. *) let extra = ref 0 (* A count of how many states have more than one eligible production, but one is preferable to every other (so priority plays a role). *) let prioritized = ref 0 (* The set of nonterminal symbols in the left-hand side of an extra reduction. *) let extra_nts = ref NonterminalSet.empty let extra_reductions_in_node node = (* Compute the productions which this node can reduce. *) let productions : _ ProductionMap.t = invert (reductions node) in let prods : Production.index list = ProductionMap.fold (fun prod _ prods -> prod :: prods) productions [] in (* Keep only those whose left-hand symbol is marked [%on_error_reduce]. *) let prods = List.filter OnErrorReduce.reduce prods in (* Check if one of them is preferable to every other one. *) match Misc.best OnErrorReduce.preferable prods with | None -> (* Either no production is marked [%on_error_reduce], or several of them are marked and none is preferable. *) () | Some prod -> let acceptable = acceptable_tokens node in (* An extra reduction is possible. Replace every error action with a reduction of [prod]. If we replace at least one error action with a reduction, update [extra] and [extra_nts]. *) let triggered = lazy ( incr extra; if List.length prods > 1 then incr prioritized; extra_nts := NonterminalSet.add (Production.nt prod) !extra_nts ) in Terminal.iter_real (fun tok -> if not (TerminalSet.mem tok acceptable) then begin node.reductions <- TerminalMap.add tok [ prod ] node.reductions; Lazy.force triggered end ) let extra_reductions () = (* Examine every node. *) iter (fun node -> (* Just like a default reduction, an extra reduction should be forbidden (it seems) if [forbid_default_reduction] is set. *) if not node.forbid_default_reduction then extra_reductions_in_node node ); (* Info message. *) if !extra > 0 then Error.logA 1 (fun f -> Printf.fprintf f "Extra reductions on error were added in %d states.\n" !extra; Printf.fprintf f "Priority played a role in %d of these states.\n" !prioritized ); (* Warn about useless %on_error_reduce declarations. *) OnErrorReduce.iter (fun nt -> if not (NonterminalSet.mem nt !extra_nts) then Error.grammar_warning [] "the declaration %%on_error_reduce %s is never useful." (Nonterminal.print false nt) ) (* ------------------------------------------------------------------------ *) (* Define [fold_entry], which in some cases facilitates the use of [entry]. *) let fold_entry f accu = ProductionMap.fold (fun prod state accu -> let nt : Nonterminal.t = match Production.classify prod with | Some nt -> nt | None -> assert false (* this is a start production *) in let t : Stretch.ocamltype = Nonterminal.ocamltype_of_start_symbol nt in f prod state nt t accu ) entry accu let entry_of_nt nt = (* Find the entry state that corresponds to [nt]. *) try ProductionMap.find (Production.startsymbol2startprod nt) entry with Not_found -> assert false exception Found of Nonterminal.t let nt_of_entry s = (* [s] should be an initial state. *) assert (incoming_symbol s = None); try ProductionMap.iter (fun prod entry -> if Node.compare s entry = 0 then match Production.classify prod with | None -> assert false | Some nt -> raise (Found nt) ) entry; (* This should not happen if [s] is indeed an initial state. *) assert false with Found nt -> nt menhir-20171222/src/unparameterizedPrinter.ml0000664000175000017500000003443413217215730021407 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Printf open Positions open Syntax open Stretch open UnparameterizedSyntax open Settings (* When the original grammar is split over several files, it may be IMPOSSIBLE to print it out into a single file, as that would introduce a total ordering (between rules, between priority declarations, between %on_error_reduce declarations) that did not exist originally. We currently do not warn about this problem. Nobody has ever complained about it. *) (* -------------------------------------------------------------------------- *) (* The printing mode. *) (* [PrintNormal] is the normal mode: the result is a Menhir grammar. [PrintForOCamlyacc] is close to the normal mode, but attempts to produce ocamlyacc-compatible output. This means, in particular, that we cannot bind identifiers to semantic values, but must use [$i] instead. [PrintUnitActions _] causes all OCaml code to be suppressed: the semantic actions are replaced with unit actions, preludes and postludes disappear, %parameter declarations disappear. Every %type declaration carries the [unit] type. [PrintUnitActions true] in addition declares that every token carries a semantic value of type [unit]. *) module Print (X : sig val mode : Settings.print_mode end) = struct open X (* -------------------------------------------------------------------------- *) (* Printing an OCaml type. *) let print_ocamltype ty : string = Printf.sprintf " <%s>" ( match ty with | Declared stretch -> stretch.stretch_raw_content | Inferred t -> t ) let print_ocamltype ty : string = let s = print_ocamltype ty in match mode with | PrintForOCamlyacc -> (* ocamlyacc does not allow a %type declaration to contain a new line. Replace it with a space. *) String.map (function '\r' | '\n' -> ' ' | c -> c) s | PrintNormal | PrintUnitActions _ -> s (* -------------------------------------------------------------------------- *) (* Printing the type of a terminal symbol. *) let print_token_type (prop : token_properties) = match mode with | PrintNormal | PrintForOCamlyacc | PrintUnitActions false -> Misc.o2s prop.tk_ocamltype print_ocamltype | PrintUnitActions true -> "" (* omitted ocamltype after %token means *) (* -------------------------------------------------------------------------- *) (* Printing the type of a nonterminal symbol. *) let print_nonterminal_type ty = match mode with | PrintNormal | PrintForOCamlyacc -> print_ocamltype ty | PrintUnitActions _ -> " " (* -------------------------------------------------------------------------- *) (* Printing a binding for a semantic value. *) let print_binding id = match mode with | PrintNormal -> id ^ " = " | PrintForOCamlyacc | PrintUnitActions _ -> (* need not, or must not, bind a semantic value *) "" (* -------------------------------------------------------------------------- *) (* Testing whether it is permitted to print OCaml code (semantic actions, prelude, postlude). *) let if_ocaml_code_permitted f x = match mode with | PrintNormal | PrintForOCamlyacc -> f x | PrintUnitActions _ -> (* In these modes, all OCaml code is omitted: semantic actions, preludes, postludes, etc. *) () (* -------------------------------------------------------------------------- *) (* Testing whether attributes should be printed. *) let attributes_printed : bool = match mode with | PrintNormal | PrintUnitActions _ -> true | PrintForOCamlyacc -> false (* -------------------------------------------------------------------------- *) (* Printing a semantic action. *) let print_semantic_action f g branch = let e = Action.to_il_expr branch.action in match mode with | PrintUnitActions _ -> (* In the unit-action modes, we print a pair of empty braces, which is fine. *) () | PrintNormal -> Printer.print_expr f e | PrintForOCamlyacc -> (* In ocamlyacc-compatibility mode, the code must be wrapped in [let]-bindings whose right-hand side uses the [$i] keywords. *) let bindings = List.mapi (fun i producer -> let id = producer_identifier producer and symbol = producer_symbol producer in (* Test if [symbol] is a terminal symbol whose type is [unit]. *) let is_unit_token = try let prop = StringMap.find symbol g.tokens in prop.tk_ocamltype = None with Not_found -> false in (* Define the variable [id] as a synonym for [$(i+1)]. *) (* As an exception to this rule, if [symbol] is a terminal symbol which has been declared *not* to carry a semantic value, then we cannot use [$(i+1)] -- ocamlyacc does not allow it -- so we use the unit value instead. *) IL.PVar id, if is_unit_token then IL.EUnit else IL.EVar (sprintf "$%d" (i + 1)) ) branch.producers in (* The identifiers that we bind are pairwise distinct. *) (* We must use simultaneous bindings (that is, a [let/and] form), as opposed to a cascade of [let] bindings. Indeed, ocamlyacc internally translates [$i] to [_i] (just like us!), so name captures will occur unless we restrict the use of [$i] to the outermost scope. (Reported by Kenji Maillard.) *) let e = CodeBits.eletand (bindings, e) in Printer.print_expr f e (* -------------------------------------------------------------------------- *) (* Printing preludes and postludes. *) let print_preludes f g = List.iter (fun prelude -> fprintf f "%%{%s%%}\n" prelude.stretch_raw_content ) g.preludes let print_postludes f g = List.iter (fun postlude -> fprintf f "%s\n" postlude.stretch_raw_content ) g.postludes (* -------------------------------------------------------------------------- *) (* Printing %start declarations. *) let print_start_symbols f g = StringSet.iter (fun symbol -> fprintf f "%%start %s\n" (Misc.normalize symbol) ) g.start_symbols (* -------------------------------------------------------------------------- *) (* Printing %parameter declarations. *) let print_parameter f stretch = fprintf f "%%parameter<%s>\n" stretch.stretch_raw_content let print_parameters f g = match mode with | PrintNormal -> List.iter (print_parameter f) g.parameters | PrintForOCamlyacc | PrintUnitActions _ -> (* %parameter declarations are not supported by ocamlyacc, and presumably become useless when the semantic actions are removed. *) () (* -------------------------------------------------------------------------- *) (* Printing attributes. *) let print_attribute f ((name, payload) : attribute) = if attributes_printed then fprintf f " [@%s %s]" (Positions.value name) payload.stretch_raw_content let print_attributes f attrs = List.iter (print_attribute f) attrs (* -------------------------------------------------------------------------- *) (* Printing token declarations and precedence declarations. *) let print_assoc = function | LeftAssoc -> Printf.sprintf "%%left" | RightAssoc -> Printf.sprintf "%%right" | NonAssoc -> Printf.sprintf "%%nonassoc" | UndefinedAssoc -> "" let compare_pairs compare1 compare2 (x1, x2) (y1, y2) = let c = compare1 x1 y1 in if c <> 0 then c else compare2 x2 y2 let compare_tokens (_token, prop) (_token', prop') = match prop.tk_precedence, prop'.tk_precedence with | UndefinedPrecedence, UndefinedPrecedence -> 0 | UndefinedPrecedence, PrecedenceLevel _ -> -1 | PrecedenceLevel _, UndefinedPrecedence -> 1 | PrecedenceLevel (m, v, _, _), PrecedenceLevel (m', v', _, _) -> compare_pairs InputFile.compare_input_files Pervasives.compare (m, v) (m', v') let print_tokens f g = (* Print the %token declarations. *) StringMap.iter (fun token prop -> if prop.tk_is_declared then fprintf f "%%token%s %s%a\n" (print_token_type prop) token print_attributes prop.tk_attributes ) g.tokens; (* Sort the tokens wrt. precedence, and group them into levels. *) let levels : (string * token_properties) list list = Misc.levels compare_tokens (List.sort compare_tokens ( StringMap.bindings g.tokens )) in (* Print the precedence declarations: %left, %right, %nonassoc. *) List.iter (fun level -> let (_token, prop) = try List.hd level with Failure _ -> assert false in (* Do nothing about the tokens that have no precedence. *) if prop.tk_precedence <> UndefinedPrecedence then begin fprintf f "%s" (print_assoc prop.tk_associativity); List.iter (fun (token, _prop) -> fprintf f " %s" token ) level; fprintf f "\n" end ) levels (* -------------------------------------------------------------------------- *) (* Printing %type declarations. *) let print_types f g = StringMap.iter (fun symbol ty -> fprintf f "%%type%s %s\n" (print_nonterminal_type ty) (Misc.normalize symbol) ) g.types (* -------------------------------------------------------------------------- *) (* Printing branches and rules. *) let print_producer sep f producer = fprintf f "%s%s%s%a" (sep()) (print_binding (producer_identifier producer)) (Misc.normalize (producer_symbol producer)) print_attributes (producer_attributes producer) let print_branch f g branch = (* Print the producers. *) let sep = Misc.once "" " " in List.iter (print_producer sep f) branch.producers; (* Print the %prec annotation, if there is one. *) Option.iter (fun x -> fprintf f " %%prec %s" x.value ) branch.branch_prec_annotation; (* Newline, indentation, semantic action. *) fprintf f "\n {"; print_semantic_action f g branch; fprintf f "}\n" (* Because the resolution of reduce/reduce conflicts is implicitly dictated by the order in which productions appear in the grammar, the printer should be careful to preserve this order. *) (* 2016/08/25: As noted above, when two productions originate in different files, we have a problem. We MUST print them in some order, even though they should be incomparable. In that case, we use the order in which the source files are specified on the command line. However, this behavior is undocumented, and should not be exploited. (In previous versions of Menhir, the function passed to [List.sort] was not transitive, so it did not make any sense!) *) let compare_branch_production_levels bpl bpl' = match bpl, bpl' with | ProductionLevel (m, l), ProductionLevel (m', l') -> compare_pairs InputFile.compare_input_files Pervasives.compare (m, l) (m', l') let compare_branches (b : branch) (b' : branch) = compare_branch_production_levels b.branch_production_level b'.branch_production_level let compare_rules (_nt, (r : rule)) (_nt', (r' : rule)) = match r.branches, r'.branches with | [], [] -> 0 | [], _ -> -1 | _, [] -> 1 | b :: _, b' :: _ -> (* To compare two rules, it suffices to compare their first productions. *) compare_branches b b' let print_rule f g (nt, r) = fprintf f "\n%s%a:\n" (Misc.normalize nt) print_attributes r.attributes; (* Menhir accepts a leading "|", but bison does not. Let's not print it. So, we print a bar-separated list. *) let sep = Misc.once (" ") ("| ") in List.iter (fun br -> fprintf f "%s" (sep()); print_branch f g br ) r.branches let print_rules f g = let rules = List.sort compare_rules (StringMap.bindings g.rules) in List.iter (print_rule f g) rules (* -------------------------------------------------------------------------- *) (* Printing %on_error_reduce declarations. *) let print_on_error_reduce_declarations f g = let cmp (_nt, oel) (_nt', oel') = compare_branch_production_levels oel oel' in let levels : (string * on_error_reduce_level) list list = Misc.levels cmp (List.sort cmp ( StringMap.bindings g.on_error_reduce )) in List.iter (fun level -> fprintf f "%%on_error_reduce"; List.iter (fun (nt, _level) -> fprintf f " %s" nt ) level; fprintf f "\n" ) levels let print_on_error_reduce_declarations f g = match mode with | PrintNormal | PrintUnitActions _ -> print_on_error_reduce_declarations f g | PrintForOCamlyacc -> (* %on_error_reduce declarations are not supported by ocamlyacc *) () (* -------------------------------------------------------------------------- *) (* Printing %attribute declarations. *) let print_grammar_attribute f ((name, payload) : attribute) = if attributes_printed then fprintf f "%%[@%s %s]\n" (Positions.value name) payload.stretch_raw_content let print_grammar_attributes f g = List.iter (print_grammar_attribute f) g.gr_attributes (* -------------------------------------------------------------------------- *) (* The main entry point. *) let print f g = print_parameters f g; if_ocaml_code_permitted (print_preludes f) g; print_start_symbols f g; print_tokens f g; print_types f g; print_on_error_reduce_declarations f g; print_grammar_attributes f g; fprintf f "%%%%\n"; print_rules f g; fprintf f "\n%%%%\n"; if_ocaml_code_permitted (print_postludes f) g end let print mode = let module P = Print(struct let mode = mode end) in P.print menhir-20171222/src/nonTerminalDefinitionInlining.mli0000664000175000017500000000241313217215730022774 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (** [inline g] traverses the rules of [g] and inlines the non terminal definitions that are marked with [%inline]. It returns a pair of the transformed grammar and a flag that tells whether any inlining was actually done. *) val inline: UnparameterizedSyntax.grammar -> UnparameterizedSyntax.grammar * bool menhir-20171222/src/Memoize.mli0000664000175000017500000000315313217215730016414 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) module type MEMOIZER = sig (* A type of keys. *) type key (* A memoization combinator for this type. *) val memoize: (key -> 'a) -> (key -> 'a) end module type IMPERATIVE_MAP = sig (* A type of keys. *) type key (* A type of imperative maps. *) type 'a t (* Creation, insertion, lookup. *) val create: int -> 'a t val add: 'a t -> key -> 'a -> unit val find: 'a t -> key -> 'a end module Make (M : IMPERATIVE_MAP) : MEMOIZER with type key = M.key module MakeViaMap (O : Map.OrderedType) : MEMOIZER with type key = O.t module MakeViaHashtbl (H : Hashtbl.HashedType) : MEMOIZER with type key = H.t module Int : MEMOIZER with type key = int menhir-20171222/src/InfiniteArray.ml0000664000175000017500000000366013217215730017405 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (** This module implements infinite arrays, that is, arrays that grow transparently upon demand. *) type 'a t = { default: 'a; mutable table: 'a array; mutable extent: int; (* the index of the greatest [set] ever, plus one *) } let default_size = 16384 (* must be non-zero *) let make x = { default = x; table = Array.make default_size x; extent = 0; } let rec new_length length i = if i < length then length else new_length (2 * length) i let ensure a i = assert (0 <= i); let table = a.table in let length = Array.length table in if i >= length then begin let table' = Array.make (new_length (2 * length) i) a.default in Array.blit table 0 table' 0 length; a.table <- table' end let get a i = ensure a i; Array.unsafe_get a.table (i) let set a i x = ensure a i; Array.unsafe_set a.table (i) x; if a.extent <= i then a.extent <- i + 1 let extent a = a.extent let domain a = Array.sub a.table 0 a.extent menhir-20171222/src/interface.mli0000664000175000017500000000342013217215730016744 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module defines the interface of the generated parser. *) (* This is the [Error] exception. *) val excname: string val excdef: IL.excdef (* The type of the entry point for the start symbol [nt]. *) val entrytypescheme: UnparameterizedSyntax.grammar -> string -> IL.typescheme (* The name of the interpreter sub-module, when the table back-end is used. *) val interpreter: string (* The type ['a checkpoint], defined in the interpreter sub-module. *) val checkpoint: IL.typ -> IL.typ (* The name of the sub-module that contains the incremental entry points. *) val incremental: string (* The name of the sub-module that contains the inspection API. *) val inspection: string (* This writes the interface of the generated parser to the [.mli] file. *) val write: UnparameterizedSyntax.grammar -> unit -> unit menhir-20171222/src/sentenceParser.mly0000664000175000017500000000755113217215730020016 0ustar fpottierfpottier/******************************************************************************/ /* */ /* Menhir */ /* */ /* François Pottier, Inria Paris */ /* Yann Régis-Gianas, PPS, Université Paris Diderot */ /* */ /* Copyright Inria. All rights reserved. This file is distributed under the */ /* terms of the GNU General Public License version 2, as described in the */ /* file LICENSE. */ /* */ /******************************************************************************/ /* This is two parsers in one. */ /* This parser is used to read the sentences provided on the standard input channel when [--interpret] is set. The entry point is [optional_sentence]. */ /* It is used also to read a [.messages] file. The entry point is [entry]. */ /* This parser must be compatible with both ocamlyacc and menhir, so we use $ notation, do not use Menhir's standard library, and collect positions manually. */ /* ------------------------------------------------------------------------ */ /* Tokens. */ %token COLON EOF EOL %token TERMINAL %token NONTERMINAL %token COMMENT /* only manually-written comments, beginning with a single # */ /* ------------------------------------------------------------------------ */ /* Types. */ %{ open SentenceParserAux (* Removing the position information in a terminal or non-terminal symbol. *) let strip_symbol (x, _, _) = x (* Removing the position information in a sentence. *) let strip_sentence (nto, terminals) = Option.map strip_symbol nto, List.map strip_symbol terminals (* Computing the start and end positions of a sentence. *) let locate_sentence (nto, terminals) = let opening = match nto, terminals with | Some (_, opening, _), _ | None, (_, opening, _) :: _ -> opening | None, [] -> Lexing.dummy_pos (* cannot happen *) and closing = match nto, List.rev terminals with | _, (_, _, closing) :: _ | Some (_, _, closing), _ -> closing | None, [] -> Lexing.dummy_pos (* cannot happen *) in Positions.two opening closing, strip_sentence (nto, terminals) %} %type located_sentence %type optional_sentence %start optional_sentence %type entry %start entry %% /* ------------------------------------------------------------------------ */ /* An entry is a list of located sentences or comments. */ entry: located_sentences_or_comments EOF { $1 } /* A list of located sentences or comments. */ located_sentences_or_comments: { [] } | located_sentence located_sentences_or_comments { Thing $1 :: $2 } | COMMENT located_sentences_or_comments { Comment $1 :: $2 } /* A located sentence. */ located_sentence: sentence { locate_sentence $1 } /* An optional sentence. */ optional_sentence: | EOF { None } | sentence { Some (strip_sentence $1) } /* A sentence is a pair of an optional non-terminal start symbol and a list of terminal symbols. It is terminated by a newline. */ sentence: | NONTERMINAL COLON terminals EOL { Some $1, $3 } | terminals EOL { None, $1 } /* A list of terminal symbols. */ terminals: | { [] } | TERMINAL terminals { $1 :: $2 } menhir-20171222/src/StaticVersion.ml0000664000175000017500000000003213217215730017424 0ustar fpottierfpottierlet require_20171222 = () menhir-20171222/src/misc.mli0000664000175000017500000001726513217215730015753 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* Projecting out of an option. May fail abruptly! *) val unSome: 'a option -> 'a (* Converting an option to a string, with [None] converted to the empty string. *) val o2s: 'a option -> ('a -> string) -> string (* Projection out of a singleton list. *) val single: 'a list -> 'a (* A variant of [List.map] where [f] returns a pair of elements, to be flattened into the new list. *) val mapd: ('a -> 'b * 'b) -> 'a list -> 'b list (* Tabulating a function using an internal array. [tabulate n f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type [int] and are supposed to lie in the range [0..n). *) val tabulate: int -> (int -> 'a) -> (int -> 'a) (* Tabulating a function using an internal array. [tabulateb n f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type [int] and are supposed to lie in the range [0..n). The result type of [f] is assumed to be of type [bool]. [tabulateb] also returns the number of points where [f] is [true]. *) val tabulateb: int -> (int -> bool) -> (int -> bool) * int (* [tabulateo number fold n f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type ['a] and are mapped by [number] into the range [0..n). [fold] allows folding over the domain of [f]. The result type of [f] is an option type, and [tabulateo] also returns the number of points where [f] is [Some _]. *) val tabulateo: ('a -> int) -> ((unit -> 'a -> unit) -> unit -> unit) -> int -> ('a -> 'b option) -> ('a -> 'b option) * int (* Reverse function application. *) val ( $$ ) : 'a -> ('a -> 'b) -> 'b (* [separated_list_to_string printer sep l] converts [l] into a string representation built by using [printer] on each element and [sep] as a separator. *) type 'a iter = ('a -> unit) -> unit val separated_iter_to_string: ('a -> string) -> string -> 'a iter -> string val separated_list_to_string: ('a -> string) -> string -> 'a list -> string (* If [a] is an array, therefore a mapping of integers to elements, then [inverse a] computes its inverse, a mapping of elements to integers. The type ['a] of elements must support the use of OCaml's generic equality and hashing functions. *) val inverse: 'a array -> ('a -> int) (* [support_assoc l x] returns the second component of the first couple in [l] whose first component is [x]. If it does not exist, it returns [x]. *) val support_assoc : ('a * 'a) list -> 'a -> 'a (* [index] indexes a list of (distinct) strings, that is, assigns an integer index to each string and builds mappings both ways between strings and indices. *) val index: string list -> int * string array * int StringMap.t (* Turning an implicit list, stored using pointers through a hash table, into an explicit list. The head of the implicit list is not included in the explicit list. *) val materialize: ('a, 'a option) Hashtbl.t -> 'a -> 'a list (* [iteri] implements a [for] loop over integers, from 0 to [n-1]. *) val iteri: int -> (int -> unit) -> unit (* [foldi] implements a [for] loop over integers, from 0 to [n-1], with an accumulator. [foldij] implements a [for] loop over integers, from [start] to [n-1], with an accumulator. *) val foldi: int -> (int -> 'a -> 'a) -> 'a -> 'a val foldij: int -> int -> (int -> 'a -> 'a) -> 'a -> 'a (* [mapij start n f] produces the list [ f start; ... f (n-1) ]. *) val mapij: int -> int -> (int -> 'a) -> 'a list (* [mapi n f] produces the list [ f 0; ... f (n-1) ]. *) val mapi: int -> (int -> 'a) -> 'a list (* [qfold f accu q] repeatedly takes an element [x] off the queue [q] and applies [f] to the accumulator and to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. *) val qfold: ('a -> 'b -> 'a) -> 'a -> 'b Queue.t -> 'a (* [qiter f q] repeatedly takes an element [x] off the queue [q] and applies [f] to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. *) val qiter: ('b -> unit) -> 'b Queue.t -> unit (* [smap] has the same semantics as [List.map], but attempts to physically return the input list when [f] is the identity. *) val smap: ('a -> 'a) -> 'a list -> 'a list (* [smapa] is a variant of [smap] that maintains an accumulator. *) val smapa: ('b -> 'a -> 'b * 'a) -> 'b -> 'a list -> 'b * 'a list (* [normalize s] returns a copy of [s] where parentheses and commas are replaced with underscores. *) val normalize: string -> string (* [postincrement r] increments [r] and returns its original value. *) val postincrement: int ref -> int (* [map_opt f l] returns the list of [y]s such that [f x = Some y] where [x] is in [l], preserving the order of elements of [l]. *) val map_opt : ('a -> 'b option) -> 'a list -> 'b list (* [new_encode_decode capacity] creates a new service for assigning unique integer codes to strings. [capacity] is the initial capacity of the internal hash table. [new_encode_decode] returns a triple [encode, decode, verbose], where [encode] and [decode] translate between strings and unique integer codes and [verbose] prints statistics about the use of the service so far. *) val new_encode_decode: int -> (string -> int) * (int -> string) * (unit -> unit) (* [new_claim()] creates a new service for claiming names. It returns a function [claim] of type [int -> unit] such that the call [claim x] succeeds if and only if [claim x] has never been called before. *) val new_claim: unit -> (string -> unit) (* If [preferable] is a partial order on elements, then [best preferable xs] returns the best (least) element of [xs], if there is one. Its complexity is quadratic. *) val best: ('a -> 'a -> bool) -> 'a list -> 'a option (* Assuming that the list [xs] is sorted with respect to the ordering [cmp], [levels cmp xs] is the list of levels of [xs], where a level is a maximal run of adjacent equal elements. Every level is a nonempty list. *) val levels: ('a -> 'a -> int) -> 'a list -> 'a list list (* Assuming that the list [xs] is sorted with respect to the ordering [cmp], [dup cmp xs] returns a duplicate element of the list [xs], if one exists. *) val dup: ('a -> 'a -> int) -> 'a list -> 'a option (* [once x y] produces a function [f] which produces [x] the first time it is called and produces [y] forever thereafter. *) val once: 'a -> 'a -> (unit -> 'a) (* Equality and hashing for lists, parameterized over equality and hashing for elements. *) module ListExtras : sig val equal: ('a -> 'a -> bool) -> 'a list -> 'a list -> bool val hash: ('a -> int) -> 'a list -> int end (* A nice way of printing "nth" in English, for concrete values of [n]. *) val nth: int -> string menhir-20171222/src/StaticVersion.mli0000664000175000017500000000003413217215730017577 0ustar fpottierfpottierval require_20171222 : unit menhir-20171222/src/TableFormat.ml0000664000175000017500000001402713217215730017040 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* This signature defines the format of the parse tables. It is used as an argument to [TableInterpreter.Make]. *) module type TABLES = sig (* This is the parser's type of tokens. *) type token (* This maps a token to its internal (generation-time) integer code. *) val token2terminal: token -> int (* This is the integer code for the error pseudo-token. *) val error_terminal: int (* This maps a token to its semantic value. *) val token2value: token -> Obj.t (* Traditionally, an LR automaton is described by two tables, namely, an action table and a goto table. See, for instance, the Dragon book. The action table is a two-dimensional matrix that maps a state and a lookahead token to an action. An action is one of: shift to a certain state, reduce a certain production, accept, or fail. The goto table is a two-dimensional matrix that maps a state and a non-terminal symbol to either a state or undefined. By construction, this table is sparse: its undefined entries are never looked up. A compression technique is free to overlap them with other entries. In Menhir, things are slightly different. If a state has a default reduction on token [#], then that reduction must be performed without consulting the lookahead token. As a result, we must first determine whether that is the case, before we can obtain a lookahead token and use it as an index in the action table. Thus, Menhir's tables are as follows. A one-dimensional default reduction table maps a state to either ``no default reduction'' (encoded as: 0) or ``by default, reduce prod'' (encoded as: 1 + prod). The action table is looked up only when there is no default reduction. *) val default_reduction: PackedIntArray.t (* Menhir follows Dencker, Dürre and Heuft, who point out that, although the action table is not sparse by nature (i.e., the error entries are significant), it can be made sparse by first factoring out a binary error matrix, then replacing the error entries in the action table with undefined entries. Thus: A two-dimensional error bitmap maps a state and a terminal to either ``fail'' (encoded as: 0) or ``do not fail'' (encoded as: 1). The action table, which is now sparse, is looked up only in the latter case. *) (* The error bitmap is flattened into a one-dimensional table; its width is recorded so as to allow indexing. The table is then compressed via [PackedIntArray]. The bit width of the resulting packed array must be [1], so it is not explicitly recorded. *) (* The error bitmap does not contain a column for the [#] pseudo-terminal. Thus, its width is [Terminal.n - 1]. We exploit the fact that the integer code assigned to [#] is greatest: the fact that the right-most column in the bitmap is missing does not affect the code for accessing it. *) val error: int (* width of the bitmap *) * string (* second component of [PackedIntArray.t] *) (* A two-dimensional action table maps a state and a terminal to one of ``shift to state s and discard the current token'' (encoded as: s | 10), ``shift to state s without discarding the current token'' (encoded as: s | 11), or ``reduce prod'' (encoded as: prod | 01). *) (* The action table is first compressed via [RowDisplacement], then packed via [PackedIntArray]. *) (* Like the error bitmap, the action table does not contain a column for the [#] pseudo-terminal. *) val action: PackedIntArray.t * PackedIntArray.t (* A one-dimensional lhs table maps a production to its left-hand side (a non-terminal symbol). *) val lhs: PackedIntArray.t (* A two-dimensional goto table maps a state and a non-terminal symbol to either undefined (encoded as: 0) or a new state s (encoded as: 1 + s). *) (* The goto table is first compressed via [RowDisplacement], then packed via [PackedIntArray]. *) val goto: PackedIntArray.t * PackedIntArray.t (* The number of start productions. A production [prod] is a start production if and only if [prod < start] holds. This is also the number of start symbols. A nonterminal symbol [nt] is a start symbol if and only if [nt < start] holds. *) val start: int (* A one-dimensional semantic action table maps productions to semantic actions. The calling convention for semantic actions is described in [EngineTypes]. This table contains ONLY NON-START PRODUCTIONS, so the indexing is off by [start]. Be careful. *) val semantic_action: ((int, Obj.t, token) EngineTypes.env -> (int, Obj.t) EngineTypes.stack) array (* The parser defines its own [Error] exception. This exception can be raised by semantic actions and caught by the engine, and raised by the engine towards the final user. *) exception Error (* The parser indicates whether to generate a trace. Generating a trace requires two extra tables, which respectively map a terminal symbol and a production to a string. *) val trace: (string array * string array) option end menhir-20171222/src/MySet.mli0000664000175000017500000000301513217215730016045 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This is a stripped-down copy of the [Set] module from OCaml's standard library. The only difference is that [add x t] guarantees that it returns [t] (physically unchanged) if [x] is already a member of [t]. This yields fewer memory allocations and an easy way of testing whether the element was already present in the set before it was added. *) module Make (Ord: Map.OrderedType) : sig type elt = Ord.t type t val empty: t val add: elt -> t -> t val find: elt -> t -> elt (* may raise [Not_found] *) val iter: (elt -> unit) -> t -> unit end menhir-20171222/src/FixSolver.ml0000664000175000017500000000472313217215730016563 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) module Make (M : Fix.IMPERATIVE_MAPS) (P : sig include Fix.PROPERTY val union: property -> property -> property end) = struct type variable = M.key type property = P.property (* A constraint is represented as a mapping of each variable to an expression, which represents its lower bound. We could represent an expression as a list of constants and variables; we can also represent it as a binary tree, as follows. *) type expression = | EBottom | ECon of property | EVar of variable | EJoin of expression * expression type constraint_ = expression M.t (* Looking up a variable's lower bound. *) let consult (m : constraint_) (x : variable) : expression = try M.find x m with Not_found -> EBottom (* Evaluation of an expression in an environment. *) let rec evaluate get e = match e with | EBottom -> P.bottom | ECon p -> p | EVar x -> get x | EJoin (e1, e2) -> P.union (evaluate get e1) (evaluate get e2) (* Solving a constraint. *) let solve (m : constraint_) : variable -> property = let module F = Fix.Make(M)(P) in F.lfp (fun x get -> evaluate get (consult m x) ) (* The imperative interface. *) let create () = let m = M.create() in let record_ConVar p y = M.add y (EJoin (ECon p, consult m y)) m and record_VarVar x y = M.add y (EJoin (EVar x, consult m y)) m in record_ConVar, record_VarVar, fun () -> solve m end menhir-20171222/src/stringSet.mli0000664000175000017500000000202213217215730016763 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) include Set.S with type elt = string val of_list: elt list -> t menhir-20171222/src/dot.mli0000664000175000017500000000433413217215730015577 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module displays graphs in graphviz dot format. It is much more basic than the one bundled with the ocamlgraph library, but offers the advantage of being stand-alone. *) (* ------------------------------------------------------------------------- *) (* Type definitions. *) type size = float * float (* in inches *) type orientation = | Portrait | Landscape type rankdir = | LeftToRight | TopToBottom type ratio = | Compress | Fill | Auto type style = (* Both nodes and edges. *) | Solid | Dashed | Dotted | Bold | Invisible (* Nodes only. *) | Filled | Diagonals | Rounded type shape = | Box | Oval | Circle | DoubleCircle (* there are many others, let's stop here *) (* ------------------------------------------------------------------------- *) (* The graph printer. *) module Print (G : sig type vertex val name: vertex -> string val successors: (?style:style -> label:string -> vertex -> unit) -> vertex -> unit val iter: (?shape:shape -> ?style:style -> label:string -> vertex -> unit) -> unit end) : sig val print: ?directed: bool -> ?size: size -> ?orientation: orientation -> ?rankdir: rankdir -> ?ratio: ratio -> out_channel -> unit end menhir-20171222/src/grammarFunctor.mli0000664000175000017500000004705513217215730020007 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The functor [Make] transforms an abstract syntax tree for the grammar into a rich internal representation of the grammar. *) (* The reason why this is now a functor, and the reason why its verbosity can be controlled, is that we may wish to invoke it several times, e.g. on the grammar before %inlining, and on the grammar after %inlining. 2015/11/10 *) module Make (G : sig (* An abstract syntax tree for the grammar. *) val grammar: UnparameterizedSyntax.grammar (* This flag indicates whether it is OK to produce warnings, verbose information, etc., when this functor is invoked. If it is set to [false], then only serious errors can be signaled. *) val verbose: bool end) : sig (* ------------------------------------------------------------------------ *) (* Nonterminals. *) module Nonterminal : sig (* The type of nonterminals. *) type t (* Comparison. *) val compare: t -> t -> int (* The number of nonterminals. This includes the extra nonterminals that are internally generated for the grammar's entry points. *) val n: int (* [lookup] maps an identifier to a nonterminal, or raises [Not_found]. *) val lookup : string -> t (* Nonterminals can be converted to integers. This feature is exploited in the table-based back-end. *) val n2i: t -> int (* This produces a string representation of a nonterminal. It should in principle never be applied to one of the internally generated nonterminals, as we do not wish users to become aware of the existence of these extra nonterminals. However, we do sometimes violate this rule when it is difficult to do otherwise. The Boolean parameter tells whether the string representation should be normalized, that is, whether parentheses and commas should be eliminated. This is necessary if the string is intended for use as a valid nonterminal name or as a valid OCaml identifier. *) val print: bool -> t -> string (* This is the OCaml type associated with a nonterminal symbol. It is known only if a %type declaration was provided. This function is not applicable to the internally generated nonterminals. *) val ocamltype: t -> Stretch.ocamltype option (* A start symbol always has a type. This allows us to define a simplified version of [ocamltype] for start symbols. *) val ocamltype_of_start_symbol: t -> Stretch.ocamltype (* Creation of a table indexed by nonterminals. *) val init: (t -> 'a) -> 'a array (* Iteration over nonterminals. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [n2i] above. *) val iter: (t -> unit) -> unit val fold: (t -> 'a -> 'a) -> 'a -> 'a val map: (t -> 'a) -> 'a list (* Iteration over all nonterminals, except the start nonterminals. *) val iterx: (t -> unit) -> unit val foldx: (t -> 'a -> 'a) -> 'a -> 'a (* Tabulation of a function over nonterminals. *) val tabulate: (t -> 'a) -> (t -> 'a) (* [positions nt] is a list of the positions associated with the definition of [nt]. There can be more than one position because definitions can be split over multiple files. *) val positions: t -> Positions.t list (* This tells whether a non-terminal symbol is one of the start symbols. *) val is_start: t -> bool (* [attributes nt] is the list of attributes attached with the nonterminal symbol [nt]. *) val attributes: t -> Syntax.attribute list end (* ------------------------------------------------------------------------ *) (* Sets of nonterminals. *) module NonterminalMap : GMap.S with type key = Nonterminal.t module NonterminalSet = NonterminalMap.Domain (* ------------------------------------------------------------------------ *) (* Terminals. *) module Terminal : sig (* The type of terminals. *) type t (* The number of terminals. This includes the two pseudo-tokens [#] and [error]. *) val n: int (* Comparison. *) val equal: t -> t -> bool val compare: t -> t -> int (* [lookup] maps an identifier to a terminal, or raises [Not_found]. *) val lookup : string -> t (* Terminals can be converted to integers. This feature is exploited in the table-based back-end and in [LRijkstra]. The reverse conversion, [i2t], is unsafe and should not be used. [LRijkstra] uses it :-) *) val t2i: t -> int val i2t: int -> t (* unsafe! *) (* This produces a string representation of a terminal. *) val print: t -> string (* This is the OCaml type associated with a terminal symbol. It is known only if the %token declaration was accompanied with a type. *) val ocamltype: t -> Stretch.ocamltype option (* These are the two pseudo-tokens [#] and [error]. The former is used to denote the end of the token stream. The latter is accessible to the user and is used for handling errors. *) val sharp: t val error: t (* This is the programmer-defined [EOF] token, if there is one. It is recognized based solely on its name, which is fragile, but this behavior is documented. This token is assumed to represent [ocamllex]'s [eof] pattern. It is used only by the reference interpreter, and in a rather non-essential way. *) val eof: t option (* A terminal symbol is pseudo if it is [#] or [error]. It is real otherwise. *) val pseudo: t -> bool val real: t -> bool (* Creation of a table indexed by terminals. *) val init: (t -> 'a) -> 'a array (* Iteration over terminals. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [t2i] above. *) val iter: (t -> unit) -> unit val fold: (t -> 'a -> 'a) -> 'a -> 'a val map: (t -> 'a) -> 'a list (* Iteration over all terminals except [#]. *) val foldx: (t -> 'a -> 'a) -> 'a -> 'a val mapx: (t -> 'a) -> 'a list (* [iter_real] offers iteration over all real terminals. *) val iter_real: (t -> unit) -> unit (* [attributes t] is the list of attributes attached with the terminal symbol [t]. *) val attributes: t -> Syntax.attribute list (* The sub-module [Word] offers an implementation of words (that is, sequences) of terminal symbols. It is used by [LRijkstra]. We make it a functor, because it has internal state (a hash table) and a side effect (failure if there are more than 256 terminal symbols). *) (* The type [word] should be treated, as much as possible, as an abstract type. In fact, for efficiency reasons, we represent a word as a unique integer codes, and we allocate these integer codes sequentially, from 0 upwards. The conversion from [int] to [word] is of course unsafe and should be used wisely. *) module Word (X : sig end) : sig type word = int val epsilon: word val singleton: t -> word val append: word -> word -> word val length: word -> int (* [first w z] returns the first symbol of the word [w.z]. *) val first: word -> t -> t val elements: word -> t list val print: word -> string (* [verbose()] prints statistics about the use of the internal hash-consing table so far. *) val verbose: unit -> unit (* Lexicographic ordering. *) val compare: word -> word -> int end end (* ------------------------------------------------------------------------ *) (* Sets and maps over terminals. *) module TerminalSet : sig (* All of the operations documented in [GSet] are available. *) include GSet.S with type element = Terminal.t (* This offers a string representation of a set of terminals. The symbols are simply listed one after the other and separated with spaces. *) val print: t -> string (* This is the set of all terminal symbols except the pseudo-tokens [#] and [error]. *) val universe: t end (* All of the operations documented in [GMap] are available. *) module TerminalMap : GMap.S with type key = Terminal.t (* ------------------------------------------------------------------------ *) (* Symbols. *) module Symbol : sig (* A symbol is either a nonterminal or a terminal. *) type t = | N of Nonterminal.t | T of Terminal.t (* [lookup] maps an identifier to a symbol, or raises [Not_found]. *) val lookup : string -> t (* Comparison. *) val equal: t -> t -> bool val lequal: t list -> t list -> bool (* These produce a string representation of a symbol, of a list of symbols, or of an array of symbols. The symbols are simply listed one after the other and separated with spaces. [printao] prints an array of symbols, starting at a particular offset. [printaod] is analogous, but can also print a single dot at a particular position between two symbols. *) val print: t -> string val printl: t list -> string val printa: t array -> string val printao: int -> t array -> string val printaod: int -> int -> t array -> string end (* ------------------------------------------------------------------------ *) (* Sets and maps over symbols. *) (* All of the operations documented in [Set] are available. *) module SymbolSet : Set.S with type elt = Symbol.t module SymbolMap : sig (* All of the operations documented in [Map] are available. *) include Map.S with type key = Symbol.t val domain: 'a t -> key list (* This returns [true] if and only if all of the symbols in the domain of the map at hand are nonterminals. *) val purelynonterminal: 'a t -> bool end (* ------------------------------------------------------------------------ *) (* Productions. *) module Production : sig (* This is the type of productions. This includes user-defined productions as well as the internally generated productions associated with the start symbols. *) type index (* Comparison. *) val compare: index -> index -> int (* Productions can be converted to integers and back. This is unsafe and should be avoided as much as possible. This feature is exploited, for efficiency, in the encoding of items. *) val p2i: index -> int val i2p: int -> index (* The number of productions. *) val n: int (* These map a production index to the production's definition, that is, a nonterminal (the left-hand side) and an array of symbols (the right-hand side). *) val def: index -> Nonterminal.t * Symbol.t array val nt: index -> Nonterminal.t val rhs: index -> Symbol.t array val length: index -> int (* This maps a production index to an array of the identifiers that should be used for naming the semantic values of the symbols in the right-hand side. *) val identifiers: index -> Syntax.identifier array (* This maps a production index to the production's semantic action. This function is not applicable to a start production. *) val action: index -> Syntax.action (* [positions prod] is a list of the positions associated with production [prod]. This is usually a singleton list, but there can be more than one position for start productions when the definition of the corresponding start symbol is split over multiple files. *) val positions: index -> Positions.t list (* [lhs_attributes prod] returns the attributes attached with the head symbol of the production [prod]. It is equivalent to [Nonterminal.attributes (nt prod)]. [rhs_attributes prod] returns an array of the attributes attached with each element in the right-hand side of the production [prod]. *) val lhs_attributes: index -> Syntax.attributes val rhs_attributes: index -> Syntax.attributes array (* Creation of a table indexed by productions. *) val init: (index -> 'a) -> 'a array (* Iteration over all productions. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [p2i] above. *) val iter: (index -> unit) -> unit val fold: (index -> 'a -> 'a) -> 'a -> 'a val map: (index -> 'a) -> 'a list val amap: (index -> 'a) -> 'a array (* Iteration over all productions, except the start productions. *) val iterx: (index -> unit) -> unit val foldx: (index -> 'a -> 'a) -> 'a -> 'a val mapx: (index -> 'a) -> 'a list (* This maps a (user) non-terminal start symbol to the corresponding start production. *) val startsymbol2startprod: Nonterminal.t -> index (* Iteration over the productions associated with a specific nonterminal. *) val iternt: Nonterminal.t -> (index -> unit) -> unit val foldnt: Nonterminal.t -> 'a -> (index -> 'a -> 'a) -> 'a (* This allows determining whether a production is a start production. If it is a start production, the start symbol that it is associated with is returned. If it is a regular production, nothing is returned. *) val classify: index -> Nonterminal.t option (* [is_start] is easier to use than [classify] when the start symbol is not needed. *) val is_start: index -> bool (* The integer [start] is published so as to allow the table back-end to produce code for [is_start]. It should not be used otherwise. *) val start: int (* This produces a string representation of a production. It should never be applied to a start production, as we do not wish users to become aware of the existence of these extra productions. *) val print: index -> string (* Tabulation of a Boolean function over productions. [tabulateb f] returns a tabulated version of [f] as well as the number of productions where [f] is true. *) val tabulate: (index -> 'a) -> (index -> 'a) val tabulateb: (index -> bool) -> (index -> bool) * int end (* ------------------------------------------------------------------------ *) (* Maps over productions. *) module ProductionMap : sig include GMap.S with type key = Production.index (* Iteration over the start productions only. *) val start: (Production.index -> 'a) -> 'a t end (* ------------------------------------------------------------------------ *) (* Analysis of the grammar. *) module Analysis : sig (* [nullable nt] is the NULLABLE flag of the non-terminal symbol [nt]. That is, it is true if and only if this symbol produces the empty word [epsilon]. *) val nullable: Nonterminal.t -> bool val nullable_symbol: Symbol.t -> bool (* [first nt] is the FIRST set of the non-terminal symbol [nt]. *) val first: Nonterminal.t -> TerminalSet.t val first_symbol: Symbol.t -> TerminalSet.t (* [nullable_first_prod prod i] considers the suffix of the production [prod] defined by offset [i]. It returns its NULLABLE flag as well as its FIRST set. The offset [i] must be contained between [0] and [n], inclusive, where [n] is the length of production [prod]. *) val nullable_first_prod: Production.index -> int -> bool * TerminalSet.t (* [first_prod_lookahead prod i t] computes [FIRST(alpha.t)], where [alpha] is the suffix of the production defined by offset [i], and [t] is a terminal symbol. The offset [i] must be contained between [0] and [n], inclusive, where [n] is the length of production [prod]. *) val first_prod_lookahead: Production.index -> int -> Terminal.t -> TerminalSet.t (* [explain_first_rhs tok rhs i] explains why the token [tok] appears in the FIRST set for the string of symbols found at offset [i] in the array [rhs]. *) val explain_first_rhs: Terminal.t -> Symbol.t array -> int -> string (* [follow nt] is the FOLLOW set of the non-terminal symbol [nt], that is, the set of terminal symbols that could follow an expansion of [nt] in a valid sentence. *) val follow: Nonterminal.t -> TerminalSet.t (* [attributes] are the attributes attached with the grammar. *) val attributes: Syntax.attributes end (* ------------------------------------------------------------------------ *) (* Conflict resolution via precedences. *) module Precedence : sig (* Shift/reduce conflicts require making a choice between shifting a token and reducing a production. How these choices are made is of no concern to the back-end, but here is a rough explanation. Shifting is preferred when the token has higher precedence than the production, or they have same precedence and the token is right-associative. Reducing is preferred when the token has lower precedence than the production, or they have same precedence and the token is left-associative. Neither is allowed when the token and the production have same precedence and the token is non-associative. No preference is explicitly specified when the token or the production has undefined precedence. In that case, the default choice is to prefer shifting, but a conflict will be reported. *) type choice = | ChooseShift | ChooseReduce | ChooseNeither | DontKnow val shift_reduce: Terminal.t -> Production.index -> choice (* Reduce/reduce conflicts require making a choice between reducing two distinct productions. This is done by exploiting a partial order on productions. For compatibility with ocamlyacc, this order should be total and should correspond to textual order when the two productions originate in the same source file. When they originate in different source files, the two productions should be incomparable. *) val reduce_reduce: Production.index -> Production.index -> Production.index option end (* ------------------------------------------------------------------------ *) (* [%on_error_reduce] declarations. *) module OnErrorReduce : sig (* [reduce prod] tells whether the left-hand side of [prod] (a nonterminal symbol) appears in an [%on_error_reduce] declaration. *) val reduce: Production.index -> bool (* [iter f] applies the function [f] in turn, in an arbitrary order, to every nonterminal symbol that appears in an [%on_error_reduce] declaration. *) val iter: (Nonterminal.t -> unit) -> unit (* When two productions could be reduced, in a single state, due to [%on_error_reduce] declarations, these productions can be compared, using [preferable], to test if one of them takes precedence over the other. This is a partial order; two productions may be incomparable. *) val preferable: Production.index -> Production.index -> bool end (* ------------------------------------------------------------------------ *) (* Diagnostics. *) (* This function prints warnings about useless precedence declarations for terminal symbols (%left, %right, %nonassoc) and productions (%prec). It should be invoked after only the automaton has been constructed. *) val diagnostics: unit -> unit (* ------------------------------------------------------------------------ *) end (* module Make *) menhir-20171222/src/stringMap.mli0000664000175000017500000000262413217215730016755 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) include Map.S with type key = string val cardinal : 'a t -> int (* [restrict s m] restricts the domain of the map [m] to (its intersection with) the set [s]. *) val restrict: StringSet.t -> 'a t -> 'a t (* [filter pred m] restricts the domain of the map [m] to (key, value) couples that verify [pred]. *) val filter: (string -> 'a -> bool) -> 'a t -> 'a t (* [domain m] returns the domain of the map [m]. *) val domain: 'a t -> StringSet.t menhir-20171222/src/stringSet.ml0000664000175000017500000000203413217215730016615 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) include Set.Make (String) let of_list xs = List.fold_right add xs empty menhir-20171222/src/InputFile.mli0000664000175000017500000000541613217215730016712 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module keeps track of which input file is currently being read. It defines a type [input_file] of input files, which is used to record the origin of certain elements (productions, declarations, etc.). *) (* ---------------------------------------------------------------------------- *) (* The identity of the current input file. *) type input_file (* [new_input_file filename] must be called when a new input file is about to be read. *) val new_input_file: string -> unit (* [get_input_file()] indicates which input file is currently being read. [get_input_file_name()] is the name of this file. *) val get_input_file: unit -> input_file val get_input_file_name: unit -> string (* This fictitious "built-in" input file is used as the origin of the start productions. This technical detail is probably irrelevant entirely. *) val builtin_input_file: input_file (* This equality test for input files is used (for instance) when determining which of two productions has greater priority. *) val same_input_file: input_file -> input_file -> bool (* This ordering between input files reflects their ordering on the command line. Ideally, it should NOT be used. *) val compare_input_files: input_file -> input_file -> int (* ---------------------------------------------------------------------------- *) (* The contents of the current input file. *) (* [with_file_contents contents f] records that the contents of the current input file is [contents] while the action [f] runs. The function [f] can then call [chunk] (below) to retrieve certain segments of [contents]. *) val with_file_contents: string -> (unit -> 'a) -> 'a (* [chunk pos1 pos2] extracts a chunk out of the current input file, delimited by the positions [pos1] and [pos2]. *) val chunk: (Lexing.position * Lexing.position) -> string menhir-20171222/src/parserMessages.messages0000664000175000017500000002423613217215730021026 0ustar fpottierfpottier# ---------------------------------------------------------------------------- grammar: UID grammar: HEADER UID Either a declaration or %% is expected at this point. # ---------------------------------------------------------------------------- grammar: TYPE UID grammar: TYPE OCAMLTYPE TYPE grammar: TYPE OCAMLTYPE UID PREC grammar: TYPE OCAMLTYPE UID LPAREN TYPE grammar: TYPE OCAMLTYPE UID COMMA TYPE grammar: TYPE OCAMLTYPE UID LPAREN UID UID grammar: TYPE OCAMLTYPE UID LPAREN UID COMMA TYPE grammar: TYPE OCAMLTYPE UID PLUS RPAREN grammar: ON_ERROR_REDUCE TYPE # %type and %on_error_reduce are both followed with clist(strict_actual), # so they are not distinguished in the automaton. Ill-formed declaration. Examples of well-formed declarations: %type expression %type date time %type option(date) %on_error_reduce expression %on_error_reduce date time %on_error_reduce option(date) # ---------------------------------------------------------------------------- grammar: TOKEN TYPE grammar: TOKEN OCAMLTYPE TYPE grammar: TOKEN UID STAR grammar: TOKEN UID COMMA TYPE grammar: TOKEN UID ATTRIBUTE STAR Ill-formed %token declaration. Examples of well-formed declarations: %token FOO %token DOT SEMICOLON %token LID UID %token FOO [@cost 0] # ---------------------------------------------------------------------------- grammar: START UID grammar: START OCAMLTYPE LEFT grammar: START LID UID grammar: START LID COMMA UID Ill-formed %start declaration. A start symbol must begin with a lowercase letter. Examples of well-formed declarations: %start program %start expression phrase %start date time # ---------------------------------------------------------------------------- grammar: RIGHT TYPE grammar: RIGHT UID STAR grammar: RIGHT UID COMMA TYPE Ill-formed precedence declaration. Examples of well-formed declarations: %left PLUS %left PLUS MINUS %nonassoc unary_minus %right CONCAT # ---------------------------------------------------------------------------- grammar: PARAMETER UID Ill-formed %parameter declaration. Examples of well-formed declarations: %parameter # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT TYPE Either a rule or %% is expected at this point. # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON ACTION TYPE Either another rule or %% is expected at this point. # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT PUBLIC TYPE grammar: PERCENTPERCENT INLINE TYPE grammar: PERCENTPERCENT PUBLIC INLINE TYPE Ill-formed rule. %inline, %public, or a non-terminal symbol is expected at this point. Examples of well-formed rules: %public option(X): { None } | x = X { Some x } %inline clist(X): xs = separated_nonempty_list(COMMA?, X) { xs } %public %inline pair(X, Y): x = X; y = Y { (x, y) } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID UID grammar: PERCENTPERCENT LID ATTRIBUTE UID Ill-formed rule. Either a parenthesized, comma-delimited list of formal parameters or an attribute or a colon is expected at this point. Examples of well-formed rules: main: e = expr EOL { e } expr: i = INT { i } | e1 = expr PLUS e2 = expr { e1 + e2 } option(X): { None } | x = X { Some x } main [@cost 0]: e = expr EOL { e } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID LPAREN TYPE grammar: PERCENTPERCENT UID LPAREN UID UID grammar: PERCENTPERCENT UID LPAREN UID COMMA TYPE Ill-formed rule. A comma-delimited list of formal parameters is expected at this point. Examples of well-formed rules: option(X): { None } | x = X { Some x } pair(X, Y): x = X; y = Y { (x, y) } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON TYPE grammar: PERCENTPERCENT UID COLON BAR TYPE grammar: PERCENTPERCENT UID COLON ACTION BAR TYPE grammar: PERCENTPERCENT UID COLON UID BAR TYPE Ill-formed rule. A list of productions is expected at this point. Examples of well-formed rules: main: e = expr EOL { e } expr: i = INT { i } | e1 = expr PLUS e2 = expr { e1 + e2 } symbol: s = LID | s = UID { s } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON UID TYPE grammar: PERCENTPERCENT UID COLON UID ATTRIBUTE TYPE grammar: PERCENTPERCENT UID COLON LID TYPE grammar: PERCENTPERCENT UID COLON LID EQUAL TYPE grammar: PERCENTPERCENT UID COLON UID PLUS TYPE grammar: PERCENTPERCENT UID COLON LID EQUAL UID PLUS TYPE Ill-formed production. A production is a sequence of producers, followed with a semantic action. Examples of well-formed producers: expr option(COMMA) separated_list(COMMA, expr) e = expr ds = declaration* es = list(terminated(expr, SEMI)) es = list(e = expr SEMI { e }) xs = list(x = var { Some x } | WILDCARD { None }) expr [@cost 0] # The following sentences are similar, but have an open parenthesis. # Suggesting that a parenthesis could be closed seems a safe bet. These # sentences are otherwise tricky. In front of us could be many things (comma, # closing parenthesis, identifier, modifier, %prec keyword, etc.). We expect # to reduce to actual or lax_actual. Let's just back up to a safe level of # abstraction and say this is an ill-formed production. grammar: PERCENTPERCENT UID COLON UID LPAREN UID TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN UID STAR TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN UID LPAREN LID RPAREN TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN LID TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN ACTION BAR TYPE Ill-formed production. Maybe you meant to close a parenthesis at this point? A production is a sequence of producers, followed with a semantic action. Examples of well-formed producers: expr option(COMMA) separated_list(COMMA, expr) e = expr ds = declaration* es = list(terminated(expr, SEMI)) es = list(e = expr SEMI { e }) xs = list(x = var { Some x } | WILDCARD { None }) # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON UID LPAREN ACTION UID # In the non-canonical automaton, this is a tricky case where, looking at the # description of the state, it seems that only COMMA and RPAREN can follow # here. But in fact, other tokens are possible, such as BAR, simply because # they will NOT take us into this state. In the canonical automaton, the list # of possibilities is explicit in the lookahead sets. grammar: PERCENTPERCENT UID COLON UID LPAREN ACTION PREC UID UID # In the first case above, we may expect a %prec annotation, whereas in the # second case above, we have just seen it. In the error message, we merge # these two situations and do not mention the possibility of a %prec # annotation. Either another production | ... or a comma or a closing parenthesis is expected at this point. # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON PREC TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN ACTION PREC TYPE grammar: PERCENTPERCENT UID COLON ACTION PREC TYPE Ill-formed %prec annotation. A symbol is expected at this point. Examples of well-formed annotations: expr: MINUS e = expr %prec UMINUS { -e } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON UID LPAREN TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN UID COMMA TYPE Ill-formed rule. A comma-delimited list of actual parameters is expected at this point. Examples of well-formed rules: call: f = callee LPAREN args = separated_list(COMMA, expr) RPAREN { f, args } list(X): { [] } | x = X; xs = list(X) { x :: xs } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON PREC LID UID Ill-formed rule. Either a semantic action { ... } or another production | ... is expected here. Examples of well-formed rules: expr: MINUS e = expr %prec UMINUS { -e } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID LPAREN UID RPAREN BAR Ill-formed rule. A colon is expected at this point. Examples of well-formed rules: option(X): { None } | x = X { Some x } # ---------------------------------------------------------------------------- grammar: PERCENTPERCENT UID COLON ACTION PREC UID TYPE Either another rule or another production | ... is expected at this point. Examples of well-formed rules: option(X): { None } | x = X { Some x } # ---------------------------------------------------------------------------- grammar: TYPE OCAMLTYPE UID LPAREN UID LPAREN TYPE grammar: PERCENTPERCENT UID COLON UID LPAREN UID LPAREN TYPE Ill-formed list of actual parameters. A comma-delimited list of actual parameters is expected at this point. Examples of well-formed actual parameters: expr expr+ option(expr) separated_list(COMMA, expr) # Omitting the fact that an anonymous rule is a valid actual parameter... # Also omitting the subtle distinctions between lax_actual, actual, etc. # ---------------------------------------------------------------------------- grammar: TYPE OCAMLTYPE UID LPAREN UID PLUS UID Ill-formed list of actual parameters. A modifier, a closing parenthesis, or a comma is expected at this point. A modifier is * or + or ?. Examples of well-formed actual parameters: expr expr+ option(expr) separated_list(COMMA, expr) # ------------------------------------------------------------------------------ grammar: PERCENTATTRIBUTE TYPE grammar: PERCENTATTRIBUTE UID COMMA TYPE grammar: PERCENTATTRIBUTE UID TYPE grammar: PERCENTATTRIBUTE UID PLUS TYPE grammar: PERCENTATTRIBUTE UID LPAREN TYPE grammar: PERCENTATTRIBUTE UID ATTRIBUTE UID Ill-formed %attribute declaration. An %attribute declaration should contain a nonempty list of symbols, followed with a nonempty list of attributes. Examples of well-formed declarations: %attribute FOO [@printer "foo"] %attribute bar BAZ [@printer "bar/BAZ"] [@cost 2.0] # Local Variables: # mode: shell-script # End: menhir-20171222/src/gSet.ml0000664000175000017500000000714413217215730015544 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This is a stripped down version of [GSet] that describes both [Patricia] and [CompressedBitSet]. The full version of [GSet] is in [AlphaLib]. *) module type S = sig (* Elements are assumed to have a natural total order. *) type element (* Sets. *) type t (* The empty set. *) val empty: t (* [is_empty s] tells whether [s] is the empty set. *) val is_empty: t -> bool (* [singleton x] returns a singleton set containing [x] as its only element. *) val singleton: element -> t (* [is_singleton s] tests whether [s] is a singleton set. *) val is_singleton: t -> bool (* [cardinal s] returns the cardinal of [s]. *) val cardinal: t -> int (* [choose s] returns an arbitrarily chosen element of [s], if [s] is nonempty, and raises [Not_found] otherwise. *) val choose: t -> element (* [mem x s] returns [true] if and only if [x] appears in the set [s]. *) val mem: element -> t -> bool (* [add x s] returns a set whose elements are all elements of [s], plus [x]. *) val add: element -> t -> t (* [remove x s] returns a set whose elements are all elements of [s], except [x]. *) val remove: element -> t -> t (* [union s1 s2] returns the union of the sets [s1] and [s2]. *) val union: t -> t -> t (* [inter s t] returns the set intersection of [s] and [t], that is, $s\cap t$. *) val inter: t -> t -> t (* [disjoint s1 s2] returns [true] if and only if the sets [s1] and [s2] are disjoint, i.e. iff their intersection is empty. *) val disjoint: t -> t -> bool (* [iter f s] invokes [f x], in turn, for each element [x] of the set [s]. Elements are presented to [f] in increasing order. *) val iter: (element -> unit) -> t -> unit (* [fold f s seed] invokes [f x accu], in turn, for each element [x] of the set [s]. Elements are presented to [f] in increasing order. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. In other words, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then [fold f s seed] computes $([f]\,x_n\,\ldots\,([f]\,x_2\,([f]\,x_1\,[seed]))\ldots)$. *) val fold: (element -> 'b -> 'b) -> t -> 'b -> 'b (* [elements s] is a list of all elements in the set [s]. *) val elements: t -> element list (* [compare] is an ordering over sets. *) val compare: t -> t -> int (* [equal] implements equality over sets. *) val equal: t -> t -> bool (* [subset] implements the subset predicate over sets. *) val subset: (t -> t -> bool) end menhir-20171222/src/back.ml0000664000175000017500000000655513217215730015547 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* Driver for the back-end. *) module I = Interpret (* artificial dependency; ensures that [Interpret] runs first *) (* If [--list-errors] is set, produce a list of erroneous input sentences, then stop. *) let () = if Settings.list_errors then begin let module L = LRijkstra.Run(struct (* Undocumented: if [--log-automaton 2] is set, be verbose. *) let verbose = Settings.logA >= 2 (* For my own purposes, LRijkstra can print one line of statistics to a .csv file. *) let statistics = if false then Some "lr.csv" else None end) in exit 0 end (* Define an .ml file writer . *) let write program = let module P = Printer.Make (struct let filename = Settings.base ^ ".ml" let f = open_out filename let locate_stretches = (* 2017/05/09: always include line number directives in generated .ml files. Indeed, they affect the semantics of [assert] instructions in the semantic actions. *) (* 2011/10/19: do not use [Filename.basename]. The line number directives that we insert in the [.ml] file must retain their full path. This does mean that the line number directives depend on how menhir is invoked -- e.g. [menhir foo/bar.mly] and [cd foo && menhir bar.mly] will produce different files. Nevertheless, this seems useful/reasonable. *) Some filename end) in P.program program (* If requested, generate a .cmly file. *) let () = if Settings.cmly then Cmly_write.write (Settings.base ^ ".cmly") (* The following DEAD code forces [Cmly_read] to be typechecked. *) let () = if false then let module R = Cmly_read.Read (struct let filename = "" end) in () (* Construct the code, using either the table-based or the code-based back-end, and pass it on to the printer. (This continuation-passing style is imposed by the fact that there is no conditional in ocaml's module language.) *) let () = if Settings.coq then let module B = CoqBackend.Run (struct end) in let filename = Settings.base ^ ".v" in let f = open_out filename in B.write_all f; exit 0 else if Settings.table then let module B = TableBackend.Run (struct end) in write B.program else let module B = CodeBackend.Run (struct end) in write (Inliner.inline B.program) (* Write the interface file. *) let () = Interface.write Front.grammar () let () = Time.tick "Printing" menhir-20171222/src/option.mli0000664000175000017500000000242613217215730016321 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) val map: ('a -> 'b) -> 'a option -> 'b option val iter: ('a -> unit) -> 'a option -> unit val fold: ('a -> 'b -> 'b) -> 'a option -> 'b -> 'b val project: 'a option -> 'a (* careful: calls [exit 1] in case of failure *) val equal: ('a -> 'b -> bool) -> 'a option -> 'b option -> bool val hash: ('a -> int) -> 'a option -> int menhir-20171222/src/lexmli.mll0000664000175000017500000000504313217215730016304 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This code analyzes the output of [ocamlc -i] and returns a list of identifiers together with their types. Types are represented by offsets in the source string. *) { let fail () = Error.error [] "failed to make sense of ocamlc's output." } let whitespace = [ ' ' '\t' '\n' '\r' ] let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *) (* Read a list of bindings. We start immediately after a [val] keyword, so we expect either an end marker, or an identifier, followed by a colon, followed by a type, followed by another list of bindings. In the latter case, we recognize the identifier and the colon, record where the type begins, and pass control to [type_then_bindings]. *) rule bindings env = parse | "menhir_end_marker : int" { env } | whitespace* ((lowercase identchar*) as id) whitespace* ':' whitespace* { type_then_bindings env id (Lexing.lexeme_end lexbuf) lexbuf } | _ | eof { fail() } (* Read a type followed by a list of bindings. *) and type_then_bindings env id openingofs = parse | whitespace+ "val" whitespace { let closingofs = Lexing.lexeme_start lexbuf in bindings ((id, openingofs, closingofs) :: env) lexbuf } | _ { type_then_bindings env id openingofs lexbuf } | eof { fail() } (* Skip up to the first [val] keyword that follows the begin marker, and start from there. *) and main = parse | _* "val menhir_begin_marker : int" whitespace+ "val" whitespace+ { bindings [] lexbuf } | _ | eof { fail() } menhir-20171222/src/General.mli0000664000175000017500000000453613217215730016372 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* This module offers general-purpose functions on lists and streams. *) (* As of 2017/03/31, this module is DEPRECATED. It might be removed in the future. *) (* --------------------------------------------------------------------------- *) (* Lists. *) (* [take n xs] returns the [n] first elements of the list [xs]. It is acceptable for the list [xs] to have length less than [n], in which case [xs] itself is returned. *) val take: int -> 'a list -> 'a list (* [drop n xs] returns the list [xs], deprived of its [n] first elements. It is acceptable for the list [xs] to have length less than [n], in which case an empty list is returned. *) val drop: int -> 'a list -> 'a list (* [uniq cmp xs] assumes that the list [xs] is sorted according to the ordering [cmp] and returns the list [xs] deprived of any duplicate elements. *) val uniq: ('a -> 'a -> int) -> 'a list -> 'a list (* [weed cmp xs] returns the list [xs] deprived of any duplicate elements. *) val weed: ('a -> 'a -> int) -> 'a list -> 'a list (* --------------------------------------------------------------------------- *) (* A stream is a list whose elements are produced on demand. *) type 'a stream = 'a head Lazy.t and 'a head = | Nil | Cons of 'a * 'a stream (* The length of a stream. *) val length: 'a stream -> int (* Folding over a stream. *) val foldr: ('a -> 'b -> 'b) -> 'a stream -> 'b -> 'b menhir-20171222/src/interface.ml0000664000175000017500000001364113217215730016601 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open UnparameterizedSyntax open IL open CodeBits (* -------------------------------------------------------------------------- *) (* The [Error] exception. *) let excname = "Error" let excdef = { excname = excname; exceq = (if Settings.fixedexc then Some "Parsing.Parse_error" else None); } (* -------------------------------------------------------------------------- *) (* The type of the monolithic entry point for the start symbol [symbol]. *) let entrytypescheme grammar symbol = let typ = TypTextual (ocamltype_of_start_symbol grammar symbol) in type2scheme (marrow [ arrow tlexbuf TokenType.ttoken; tlexbuf ] typ) (* -------------------------------------------------------------------------- *) (* When the table back-end is active, the generated parser contains, as a sub-module, an application of [Engine.Make]. This sub-module is named as follows. *) let interpreter = "MenhirInterpreter" let checkpoint t = TypApp (interpreter ^ ".checkpoint", [ t ]) let lr1state = "lr1state" let tlr1state a : typ = TypApp (lr1state, [a]) (* -------------------------------------------------------------------------- *) (* The name of the sub-module that contains the incremental entry points. *) let incremental = "Incremental" (* The type of the incremental entry point for the start symbol [symbol]. *) let entrytypescheme_incremental grammar symbol = let t = TypTextual (ocamltype_of_start_symbol grammar symbol) in type2scheme (marrow [ tposition ] (checkpoint t)) (* -------------------------------------------------------------------------- *) (* The name of the sub-module that contains the inspection API. *) let inspection = "Inspection" (* -------------------------------------------------------------------------- *) (* The monolithic (traditional) API: the type [token], the exception [Error], and the parser's entry points. *) let monolithic_api grammar = TokenType.tokentypedef grammar @ IIComment "This exception is raised by the monolithic API functions." :: IIExcDecls [ excdef ] :: IIComment "The monolithic API." :: IIValDecls ( StringSet.fold (fun symbol decls -> (Misc.normalize symbol, entrytypescheme grammar symbol) :: decls ) grammar.start_symbols [] ) :: [] (* -------------------------------------------------------------------------- *) (* The inspection API. *) let inspection_api grammar () = let a = "a" in (* Define the types [terminal] and [nonterminal]. *) TokenType.tokengadtdef grammar @ NonterminalType.nonterminalgadtdef grammar @ (* Include the signature that lists the inspection functions, with appropriate type instantiations. *) IIComment "The inspection API." :: IIInclude ( with_types WKDestructive "MenhirLib.IncrementalEngine.INSPECTION" [ [ a ], "lr1state", tlr1state (TypVar a); [], "production", TypApp ("production", []); [ a ], TokenType.tctokengadt, TokenType.ttokengadt (TypVar a); [ a ], NonterminalType.tcnonterminalgadt, NonterminalType.tnonterminalgadt (TypVar a); [ a ], "env", TypApp ("env", [ TypVar a ]); ] ) :: [] (* -------------------------------------------------------------------------- *) (* The incremental API. *) let incremental_engine () : module_type = with_types WKNonDestructive "MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE" [ [], "token", (* NOT [tctoken], which is qualified if [--external-tokens] is used *) TokenType.ttoken ] let incremental_entry_points grammar : interface = IIComment "The entry point(s) to the incremental API." :: IIModule (incremental, MTSigEnd [ IIValDecls ( StringSet.fold (fun symbol decls -> (symbol, entrytypescheme_incremental grammar symbol) :: decls ) grammar.start_symbols [] ) ]) :: [] let incremental_api grammar () : interface = IIModule ( interpreter, MTSigEnd ( IIComment "The incremental API." :: IIInclude (incremental_engine()) :: listiflazy Settings.inspection (inspection_api grammar) ) ) :: (* The entry points must come after the incremental API, because their type refers to the type [checkpoint]. *) incremental_entry_points grammar (* -------------------------------------------------------------------------- *) (* The complete interface of the generated parser. *) let interface grammar = [ IIFunctor (grammar.parameters, monolithic_api grammar @ listiflazy Settings.table (incremental_api grammar) ) ] (* -------------------------------------------------------------------------- *) (* Writing the interface to a file. *) let write grammar () = (* We have a dependency on [TokenType], which takes care of the case where [token_type_mode] is [TokenTypeOnly]. *) assert (Settings.token_type_mode <> Settings.TokenTypeOnly); let mli = open_out (Settings.base ^ ".mli") in let module P = Printer.Make (struct let f = mli let locate_stretches = None end) in P.interface (interface grammar); close_out mli menhir-20171222/src/coqBackend.ml0000664000175000017500000005171313217215730016675 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Printf open Grammar module Run (T: sig end) = struct let print_term t = assert (not (Terminal.pseudo t)); sprintf "%s't" (Terminal.print t) let print_nterm nt = sprintf "%s'nt" (Nonterminal.print true nt) let print_symbol = function | Symbol.N nt -> sprintf "NT %s" (print_nterm nt) | Symbol.T t -> sprintf "T %s" (print_term t) let print_type ty = if Settings.coq_no_actions then "unit" else match ty with | None -> raise Not_found (* fpottier: argh! *) | Some t -> match t with | Stretch.Declared s -> s.Stretch.stretch_content | Stretch.Inferred _ -> assert false (* We cannot infer coq types *) let is_final_state node = match Default.has_default_reduction node with | Some (prod, _) -> Production.is_start prod | None -> false let lr1_iter_nonfinal f = Lr1.iter (fun node -> if not (is_final_state node) then f node) let lr1_iterx_nonfinal f = Lr1.iterx (fun node -> if not (is_final_state node) then f node) let lr1_foldx_nonfinal f = Lr1.foldx (fun accu node -> if not (is_final_state node) then f accu node else accu) let print_nis nis = sprintf "Nis'%d" (Lr1.number nis) let print_init init = sprintf "Init'%d" (Lr1.number init) let print_st st = match Lr1.incoming_symbol st with | Some _ -> sprintf "Ninit %s" (print_nis st) | None -> sprintf "Init %s" (print_init st) let (prod_ids, _) = Production.foldx (fun p (prod_ids, counters) -> let lhs = Production.nt p in let id = try SymbolMap.find (Symbol.N lhs) counters with Not_found -> 0 in (ProductionMap.add p id prod_ids, SymbolMap.add (Symbol.N lhs) (id+1) counters)) (ProductionMap.empty, SymbolMap.empty) let print_prod p = sprintf "Prod'%s'%d" (Nonterminal.print true (Production.nt p)) (ProductionMap.find p prod_ids) let () = if not Settings.coq_no_actions then begin Nonterminal.iterx (fun nonterminal -> match Nonterminal.ocamltype nonterminal with | None -> Error.error [] "I don't know the type of the nonterminal symbol %s." (Nonterminal.print false nonterminal) | Some _ -> ()); Production.iterx (fun prod -> if not (Keyword.KeywordSet.is_empty (Action.keywords (Production.action prod))) then Error.error [] "The Coq back-end supports none of the $ keywords." ) end; Production.iterx (fun prod -> Array.iter (fun symb -> match symb with | Symbol.T t -> if t = Terminal.error then Error.error [] "the Coq back-end does not support the error token." | _ -> ()) (Production.rhs prod)); if Front.grammar.UnparameterizedSyntax.parameters <> [] then Error.error [] "the Coq back-end does not support %%parameter." (* Optimized because if we extract some constants to the right caml term, the ocaml inlining+constant unfolding replaces that by the actual constant *) let rec write_optimized_int31 f n = match n with | 0 -> fprintf f "Int31.On" | 1 -> fprintf f "Int31.In" | k when k land 1 = 0 -> fprintf f "(twice "; write_optimized_int31 f (n lsr 1); fprintf f ")" | _ -> fprintf f "(twice_plus_one "; write_optimized_int31 f (n lsr 1); fprintf f ")" let write_inductive_alphabet f name constrs = fprintf f "Inductive %s' : Set :=" name; List.iter (fprintf f "\n| %s") constrs; fprintf f ".\n"; fprintf f "Definition %s := %s'.\n\n" name name; if List.length constrs > 0 then begin let iteri f = ignore (List.fold_left (fun k x -> f k x; succ k) 0 constrs) in fprintf f "Program Instance %sNum : Numbered %s :=\n" name name; fprintf f " { inj := fun x => match x return _ with "; iteri (fun k constr -> fprintf f "| %s => " constr; write_optimized_int31 f k; fprintf f " "; ); fprintf f "end;\n"; fprintf f " surj := (fun n => match n return _ with "; iteri (fprintf f "| %d => %s "); fprintf f "| _ => %s end)%%int31;\n" (List.hd constrs); fprintf f " inj_bound := %d%%int31 }.\n" (List.length constrs); end else begin fprintf f "Program Instance %sAlph : Alphabet %s :=\n" name name; fprintf f " { AlphabetComparable := {| compare := fun x y =>\n"; fprintf f " match x, y return comparison with end |};\n"; fprintf f " AlphabetEnumerable := {| all_list := [] |} }."; end let write_terminals f = write_inductive_alphabet f "terminal" ( Terminal.fold (fun t l -> if Terminal.pseudo t then l else print_term t::l) []); fprintf f "Instance TerminalAlph : Alphabet terminal := _.\n\n" let write_nonterminals f = write_inductive_alphabet f "nonterminal" ( Nonterminal.foldx (fun nt l -> (print_nterm nt)::l) []); fprintf f "Instance NonTerminalAlph : Alphabet nonterminal := _.\n\n" let write_symbol_semantic_type f = fprintf f "Definition terminal_semantic_type (t:terminal) : Type:=\n"; fprintf f " match t with\n"; Terminal.iter (fun terminal -> if not (Terminal.pseudo terminal) then fprintf f " | %s => %s%%type\n" (print_term terminal) (try print_type (Terminal.ocamltype terminal) with Not_found -> "unit") ); fprintf f " end.\n\n"; fprintf f "Definition nonterminal_semantic_type (nt:nonterminal) : Type:=\n"; fprintf f " match nt with\n"; Nonterminal.iterx (fun nonterminal -> fprintf f " | %s => %s%%type\n" (print_nterm nonterminal) (print_type (Nonterminal.ocamltype nonterminal))); fprintf f " end.\n\n"; fprintf f "Definition symbol_semantic_type (s:symbol) : Type:=\n"; fprintf f " match s with\n"; fprintf f " | T t => terminal_semantic_type t\n"; fprintf f " | NT nt => nonterminal_semantic_type nt\n"; fprintf f " end.\n\n" let write_productions f = write_inductive_alphabet f "production" ( Production.foldx (fun prod l -> (print_prod prod)::l) []); fprintf f "Instance ProductionAlph : Alphabet production := _.\n\n" let write_productions_contents f = fprintf f "Definition prod_contents (p:production) :\n"; fprintf f " { p:nonterminal * list symbol &\n"; fprintf f " arrows_left (map symbol_semantic_type (rev (snd p)))\n"; fprintf f " (symbol_semantic_type (NT (fst p))) }\n"; fprintf f " :=\n"; fprintf f " let box := existT (fun p =>\n"; fprintf f " arrows_left (map symbol_semantic_type (rev (snd p)))\n"; fprintf f " (symbol_semantic_type (NT (fst p))))\n"; fprintf f " in\n"; fprintf f " match p with\n"; Production.iterx (fun prod -> fprintf f " | %s => box\n" (print_prod prod); fprintf f " (%s, [%s])\n" (print_nterm (Production.nt prod)) (String.concat "; " (List.map print_symbol (List.rev (Array.to_list (Production.rhs prod))))); if Production.length prod = 0 then fprintf f " (\n" else fprintf f " (fun %s =>\n" (String.concat " " (List.rev (Array.to_list (Production.identifiers prod)))); if Settings.coq_no_actions then fprintf f "()" else Printer.print_expr f (Action.to_il_expr (Production.action prod)); fprintf f "\n)\n"); fprintf f " end.\n\n"; fprintf f "Definition prod_lhs (p:production) :=\n"; fprintf f " fst (projT1 (prod_contents p)).\n"; fprintf f "Definition prod_rhs_rev (p:production) :=\n"; fprintf f " snd (projT1 (prod_contents p)).\n"; fprintf f "Definition prod_action (p:production) :=\n"; fprintf f " projT2 (prod_contents p).\n\n" let write_nullable_first f = fprintf f "Definition nullable_nterm (nt:nonterminal) : bool :=\n"; fprintf f " match nt with\n"; Nonterminal.iterx (fun nt -> fprintf f " | %s => %b\n" (print_nterm nt) (Analysis.nullable nt)); fprintf f " end.\n\n"; fprintf f "Definition first_nterm (nt:nonterminal) : list terminal :=\n"; fprintf f " match nt with\n"; Nonterminal.iterx (fun nt -> let firstSet = Analysis.first nt in fprintf f " | %s => [" (print_nterm nt); let first = ref true in TerminalSet.iter (fun t -> if !first then first := false else fprintf f "; "; fprintf f "%s" (print_term t) ) firstSet; fprintf f "]\n"); fprintf f " end.\n\n" let write_grammar f = fprintf f "Module Import Gram <: Grammar.T.\n\n"; fprintf f "Local Obligation Tactic := let x := fresh in intro x; case x; reflexivity.\n\n"; write_terminals f; write_nonterminals f; fprintf f "Include Grammar.Symbol.\n\n"; write_symbol_semantic_type f; write_productions f; write_productions_contents f; fprintf f "Include Grammar.Defs.\n\n"; fprintf f "End Gram.\n\n" let write_nis f = write_inductive_alphabet f "noninitstate" ( lr1_foldx_nonfinal (fun l node -> (print_nis node)::l) []); fprintf f "Instance NonInitStateAlph : Alphabet noninitstate := _.\n\n" let write_init f = write_inductive_alphabet f "initstate" ( ProductionMap.fold (fun _prod node l -> (print_init node)::l) Lr1.entry []); fprintf f "Instance InitStateAlph : Alphabet initstate := _.\n\n" let write_start_nt f = fprintf f "Definition start_nt (init:initstate) : nonterminal :=\n"; fprintf f " match init with\n"; Lr1.fold_entry (fun _prod node startnt _t () -> fprintf f " | %s => %s\n" (print_init node) (print_nterm startnt) ) (); fprintf f " end.\n\n" let write_actions f = fprintf f "Definition action_table (state:state) : action :=\n"; fprintf f " match state with\n"; lr1_iter_nonfinal (fun node -> fprintf f " | %s => " (print_st node); match Default.has_default_reduction node with | Some (prod, _) -> fprintf f "Default_reduce_act %s\n" (print_prod prod) | None -> fprintf f "Lookahead_act (fun terminal:terminal =>\n"; fprintf f " match terminal return lookahead_action terminal with\n"; let has_fail = ref false in Terminal.iter (fun t -> if not (Terminal.pseudo t) then begin try let target = SymbolMap.find (Symbol.T t) (Lr1.transitions node) in fprintf f " | %s => Shift_act %s (eq_refl _)\n" (print_term t) (print_nis target) with Not_found -> try let prod = Misc.single (TerminalMap.find t (Lr1.reductions node)) in fprintf f " | %s => Reduce_act %s\n" (print_term t) (print_prod prod) with Not_found -> has_fail := true end); if !has_fail then fprintf f " | _ => Fail_act\n"; fprintf f " end)\n" ); fprintf f " end.\n\n" let write_gotos f = fprintf f "Definition goto_table (state:state) (nt:nonterminal) :=\n"; fprintf f " match state, nt return option { s:noninitstate | NT nt = last_symb_of_non_init_state s } with\n"; let has_none = ref false in lr1_iter_nonfinal (fun node -> Nonterminal.iterx (fun nt -> try let target = SymbolMap.find (Symbol.N nt) (Lr1.transitions node) in fprintf f " | %s, %s => " (print_st node) (print_nterm nt); if is_final_state target then fprintf f "None" else fprintf f "Some (exist _ %s (eq_refl _))\n" (print_nis target) with Not_found -> has_none := true)); if !has_none then fprintf f " | _, _ => None\n"; fprintf f " end.\n\n" let write_last_symb f = fprintf f "Definition last_symb_of_non_init_state (noninitstate:noninitstate) : symbol :=\n"; fprintf f " match noninitstate with\n"; lr1_iterx_nonfinal (fun node -> match Lr1.incoming_symbol node with | Some s -> fprintf f " | %s => %s\n" (print_nis node) (print_symbol s) | None -> assert false); fprintf f " end.\n\n" let write_past_symb f = fprintf f "Definition past_symb_of_non_init_state (noninitstate:noninitstate) : list symbol :=\n"; fprintf f " match noninitstate with\n"; lr1_iterx_nonfinal (fun node -> let s = String.concat "; " (List.tl (Invariant.fold (fun l _ symb _ -> print_symbol symb::l) [] (Invariant.stack node))) in fprintf f " | %s => [%s]\n" (print_nis node) s); fprintf f " end.\n"; fprintf f "Extract Constant past_symb_of_non_init_state => \"fun _ -> assert false\".\n\n" module NodeSetMap = Map.Make(Lr1.NodeSet) let write_past_states f = let get_stateset_id = let memo = ref NodeSetMap.empty in let next_id = ref 1 in fun stateset -> try NodeSetMap.find stateset !memo with | Not_found -> let id = sprintf "state_set_%d" !next_id in memo := NodeSetMap.add stateset id !memo; incr next_id; fprintf f "Definition %s (s:state) : bool :=\n" id; fprintf f " match s with\n"; fprintf f " "; Lr1.NodeSet.iter (fun st -> fprintf f "| %s " (print_st st)) stateset; fprintf f "=> true\n"; fprintf f " | _ => false\n"; fprintf f " end.\n"; fprintf f "Extract Inlined Constant %s => \"assert false\".\n\n" id; id in let b = Buffer.create 256 in bprintf b "Definition past_state_of_non_init_state (s:noninitstate) : list (state -> bool) :=\n"; bprintf b " match s with\n"; lr1_iterx_nonfinal (fun node -> let s = String.concat "; " (Invariant.fold (fun accu _ _ states -> get_stateset_id states::accu) [] (Invariant.stack node)) in bprintf b " | %s => [ %s ]\n" (print_nis node) s); bprintf b " end.\n"; Buffer.output_buffer f b; fprintf f "Extract Constant past_state_of_non_init_state => \"fun _ -> assert false\".\n\n" module TerminalSetMap = Map.Make(TerminalSet) let write_items f = if not Settings.coq_no_complete then begin let get_lookaheadset_id = let memo = ref TerminalSetMap.empty in let next_id = ref 1 in fun lookaheadset -> let lookaheadset = if TerminalSet.mem Terminal.sharp lookaheadset then TerminalSet.universe else lookaheadset in try TerminalSetMap.find lookaheadset !memo with Not_found -> let id = sprintf "lookahead_set_%d" !next_id in memo := TerminalSetMap.add lookaheadset id !memo; incr next_id; fprintf f "Definition %s : list terminal :=\n [" id; let first = ref true in TerminalSet.iter (fun lookahead -> if !first then first := false else fprintf f "; "; fprintf f "%s" (print_term lookahead) ) lookaheadset; fprintf f "].\nExtract Inlined Constant %s => \"assert false\".\n\n" id; id in let b = Buffer.create 256 in lr1_iter_nonfinal (fun node -> bprintf b "Definition items_of_state_%d : list item :=\n" (Lr1.number node); bprintf b " [ "; let first = ref true in Item.Map.iter (fun item lookaheads -> let prod, pos = Item.export item in if not (Production.is_start prod) then begin if !first then first := false else bprintf b ";\n "; bprintf b "{| prod_item := %s; dot_pos_item := %d; lookaheads_item := %s |}" (print_prod prod) pos (get_lookaheadset_id lookaheads); end ) (Lr0.closure (Lr0.export (Lr1.state node))); bprintf b " ].\n"; bprintf b "Extract Inlined Constant items_of_state_%d => \"assert false\".\n\n" (Lr1.number node) ); Buffer.output_buffer f b; fprintf f "Definition items_of_state (s:state) : list item :=\n"; fprintf f " match s with\n"; lr1_iter_nonfinal (fun node -> fprintf f " | %s => items_of_state_%d\n" (print_st node) (Lr1.number node)); fprintf f " end.\n"; end else fprintf f "Definition items_of_state (s:state): list item := [].\n"; fprintf f "Extract Constant items_of_state => \"fun _ -> assert false\".\n\n" let write_automaton f = fprintf f "Module Aut <: Automaton.T.\n\n"; fprintf f "Local Obligation Tactic := let x := fresh in intro x; case x; reflexivity.\n\n"; fprintf f "Module Gram := Gram.\n"; fprintf f "Module GramDefs := Gram.\n\n"; write_nullable_first f; write_nis f; write_last_symb f; write_init f; fprintf f "Include Automaton.Types.\n\n"; write_start_nt f; write_actions f; write_gotos f; write_past_symb f; write_past_states f; write_items f; fprintf f "End Aut.\n\n" let write_theorems f = fprintf f "Require Import Main.\n\n"; fprintf f "Module Parser := Main.Make Aut.\n"; fprintf f "Theorem safe:\n"; fprintf f " Parser.safe_validator () = true.\n"; fprintf f "Proof eq_refl true<:Parser.safe_validator () = true.\n\n"; if not Settings.coq_no_complete then begin fprintf f "Theorem complete:\n"; fprintf f " Parser.complete_validator () = true.\n"; fprintf f "Proof eq_refl true<:Parser.complete_validator () = true.\n\n"; end; Lr1.fold_entry (fun _prod node startnt _t () -> let funName = Nonterminal.print true startnt in fprintf f "Definition %s := Parser.parse safe Aut.%s.\n\n" funName (print_init node); fprintf f "Theorem %s_correct iterator buffer:\n" funName; fprintf f " match %s iterator buffer with\n" funName; fprintf f " | Parser.Inter.Parsed_pr sem buffer_new =>\n"; fprintf f " exists word,\n"; fprintf f " buffer = Parser.Inter.app_str word buffer_new /\\\n"; fprintf f " inhabited (Gram.parse_tree (%s) word sem)\n" (print_symbol (Symbol.N startnt)); fprintf f " | _ => True\n"; fprintf f " end.\n"; fprintf f "Proof. apply Parser.parse_correct. Qed.\n\n"; if not Settings.coq_no_complete then begin fprintf f "Theorem %s_complete (iterator:nat) word buffer_end (output:%s):\n" funName (print_type (Nonterminal.ocamltype startnt)); fprintf f " forall tree:Gram.parse_tree (%s) word output,\n" (print_symbol (Symbol.N startnt)); fprintf f " match %s iterator (Parser.Inter.app_str word buffer_end) with\n" funName; fprintf f " | Parser.Inter.Fail_pr => False\n"; fprintf f " | Parser.Inter.Parsed_pr output_res buffer_end_res =>\n"; fprintf f " output_res = output /\\ buffer_end_res = buffer_end /\\\n"; fprintf f " le (Gram.pt_size tree) iterator\n"; fprintf f " | Parser.Inter.Timeout_pr => lt iterator (Gram.pt_size tree)\n"; fprintf f " end.\n"; fprintf f "Proof. apply Parser.parse_complete with (init:=Aut.%s); exact complete. Qed.\n\n" (print_init node); end ) () let write_all f = if not Settings.coq_no_actions then List.iter (fun s -> fprintf f "%s\n\n" s.Stretch.stretch_content) Front.grammar.UnparameterizedSyntax.preludes; fprintf f "Require Import List.\n"; fprintf f "Require Import Int31.\n"; fprintf f "Require Import Syntax.\n"; fprintf f "Require Import Tuples.\n"; fprintf f "Require Import Alphabet.\n"; fprintf f "Require Grammar.\n"; fprintf f "Require Automaton.\n\n"; fprintf f "Unset Elimination Schemes.\n\n"; write_grammar f; write_automaton f; write_theorems f; if not Settings.coq_no_actions then List.iter (fun stretch -> fprintf f "\n\n%s" stretch.Stretch.stretch_raw_content) Front.grammar.UnparameterizedSyntax.postludes end menhir-20171222/src/error.ml0000664000175000017500000000410213217215730015762 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Printf (* ---------------------------------------------------------------------------- *) (* Logging and log levels. *) let log kind verbosity msg = if kind >= verbosity then Printf.fprintf stderr "%t%!" msg let logG = log Settings.logG let logA = log Settings.logA let logC = log Settings.logC (* ---------------------------------------------------------------------------- *) (* Errors and warnings. *) let errors = ref false let display continuation header positions format = List.iter (fun position -> fprintf stderr "%s:\n" (Positions.string_of_pos position) ) positions; Printf.kfprintf continuation stderr (header ^^ format ^^ "\n%!") let error positions format = display (fun _ -> exit 1) "Error: " positions format let signal positions format = display (fun _ -> errors := true) "Error: " positions format let warning positions format = display (fun _ -> ()) "Warning: " positions format let errors () = !errors let errorp v = error [ Positions.position v ] let grammar_warning = if Settings.strict then signal else warning menhir-20171222/src/unionFind.ml0000664000175000017500000001201113217215730016560 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (** This module implements a simple and efficient union/find algorithm. See Robert E. Tarjan, ``Efficiency of a Good But Not Linear Set Union Algorithm'', JACM 22(2), 1975. *) (** The abstraction defined by this module is a set of points, partitioned into equivalence classes. With each equivalence class, a piece of information, of abstract type ['a], is associated; we call it a descriptor. A point is implemented as a cell, whose (mutable) contents consist of a single link to either information about the equivalence class, or another point. Thus, points form a graph, which must be acyclic, and whose connected components are the equivalence classes. In every equivalence class, exactly one point has no outgoing edge, and carries information about the class instead. It is the class's representative element. Information about a class consists of an integer weight (the number of elements in the class) and of the class's descriptor. *) type 'a point = { mutable link: 'a link } and 'a link = | Info of 'a info | Link of 'a point and 'a info = { mutable weight: int; mutable descriptor: 'a } (** [fresh desc] creates a fresh point and returns it. It forms an equivalence class of its own, whose descriptor is [desc]. *) let fresh desc = { link = Info { weight = 1; descriptor = desc } } (** [repr point] returns the representative element of [point]'s equivalence class. It is found by starting at [point] and following the links. For efficiency, the function performs path compression at the same time. *) let rec repr point = match point.link with | Link point' -> let point'' = repr point' in if point'' != point' then (* [point''] is [point']'s representative element. Because we just invoked [repr point'], [point'.link] must be [Link point'']. We write this value into [point.link], thus performing path compression. Note that this function never performs memory allocation. *) point.link <- point'.link; point'' | Info _ -> point (** [get point] returns the descriptor associated with [point]'s equivalence class. *) let rec get point = (* By not calling [repr] immediately, we optimize the common cases where the path starting at [point] has length 0 or 1, at the expense of the general case. *) match point.link with | Info info | Link { link = Info info } -> info.descriptor | Link { link = Link _ } -> get (repr point) let rec set point v = match point.link with | Info info | Link { link = Info info } -> info.descriptor <- v | Link { link = Link _ } -> set (repr point) v (** [union point1 point2] merges the equivalence classes associated with [point1] and [point2] into a single class whose descriptor is that originally associated with [point2]. It does nothing if [point1] and [point2] already are in the same class. The weights are used to determine whether [point1] should be made to point to [point2], or vice-versa. By making the representative of the smaller class point to that of the larger class, we guarantee that paths remain of logarithmic length (not accounting for path compression, which makes them yet smaller). *) let union point1 point2 = let point1 = repr point1 and point2 = repr point2 in if point1 != point2 then match point1.link, point2.link with | Info info1, Info info2 -> let weight1 = info1.weight and weight2 = info2.weight in if weight1 >= weight2 then begin point2.link <- Link point1; info1.weight <- weight1 + weight2; info1.descriptor <- info2.descriptor end else begin point1.link <- Link point2; info2.weight <- weight1 + weight2 end | _, _ -> assert false (* [repr] guarantees that [link] matches [Info _]. *) (** [equivalent point1 point2] tells whether [point1] and [point2] belong to the same equivalence class. *) let equivalent point1 point2 = repr point1 == repr point2 menhir-20171222/src/tableBackend.mli0000664000175000017500000000207113217215730017344 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The (table-based) code generator. *) module Run (T : sig end) : sig val program: IL.program end menhir-20171222/src/LowIntegerPriorityQueue.mli0000664000175000017500000000365513217215730021644 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (** This module implements a simple-minded priority queue, under the assumption that priorities are low nonnegative integers. *) (** The type of priority queues. *) type 'a t (** [create default] creates an empty priority queue. The [default] value is used to fill empty physical slots, but is otherwise irrelevant. *) val create: 'a -> 'a t (** [add q x p] inserts the element [x], with priority [p], into the queue [q]. *) val add: 'a t -> 'a -> int -> unit (** [remove q] extracts out of [q] and returns an element with minimum priority. *) val remove: 'a t -> 'a option (** [is_empty q] tests whether the queue [q] is empty. *) val is_empty: 'a t -> bool (** [cardinal q] returns the number of elements in the queue [q]. *) val cardinal: 'a t -> int (** [repeat q f] repeatedly extracts an element with minimum priority out of [q] and passes it to [f] (which may insert new elements into [q]), until [q] is exhausted. *) val repeat: 'a t -> ('a -> unit) -> unit menhir-20171222/src/ErrorReports.mli0000664000175000017500000000374413217215730017465 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* -------------------------------------------------------------------------- *) (* The following functions help keep track of the start and end positions of the last two tokens in a two-place buffer. This is used to nicely display where a syntax error took place. *) type 'a buffer (* [wrap lexer] returns a pair of a new (initially empty) buffer and a lexer which internally relies on [lexer] and updates [buffer] on the fly whenever a token is demanded. *) open Lexing val wrap: (lexbuf -> 'token) -> (position * position) buffer * (lexbuf -> 'token) (* [show f buffer] prints the contents of the buffer, producing a string that is typically of the form "after '%s' and before '%s'". The function [f] is used to print an element. The buffer MUST be nonempty. *) val show: ('a -> string) -> 'a buffer -> string (* [last buffer] returns the last element of the buffer. The buffer MUST be nonempty. *) val last: 'a buffer -> 'a (* -------------------------------------------------------------------------- *) menhir-20171222/src/default.ml0000664000175000017500000001007613217215730016264 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar module C = Conflict (* artificial dependency; ensures that [Conflict] runs first *) (* Here is how we check whether state [s] should have a default reduction. We check whether [s] has no outgoing shift transitions and only has one possible reduction action. In that case, we produce a default reduction action, that is, we perform reduction without consulting the lookahead token. This saves code, but can alter the parser's behavior in the presence of errors. The check for default actions subsumes the check for the case where [s] admits a reduce action with lookahead symbol "#". In that case, it must be the only possible action -- see [Lr1.default_conflict_resolution]. That is, we have reached a point where we have recognized a well-formed input and are now expecting an end-of-stream. In that case, performing reduction without looking at the next token is the right thing to do, since there should in fact be none. The state that we reduce to will also have the same property, and so on, so we will in fact end up rewinding the entire stack and accepting the input when the stack becomes empty. (New as of 2012/01/23.) A state where a shift/reduce conflict was solved in favor of neither (due to a use of the %nonassoc directive) must not perform a default reduction. Indeed, this would effectively mean that the failure that was requested by the user is forgotten and replaced with a reduction. This surprising behavior is present in ocamlyacc and was present in earlier versions of Menhir. See e.g. http://caml.inria.fr/mantis/view.php?id=5462 There is a chance that we might run into trouble if the ideas described in the above two paragraphs collide, that is, if we forbid a default reduction (due to a shift/reduce conflict solved by %nonassoc) in a node where we would like to have default reduction on "#". This situation seems unlikely to arise, so I will not do anything about it for the moment. (Furthermore, someone who uses precedence declarations is looking for trouble anyway.) Between 2012/05/25 and 2015/09/25, if [--canonical] has been specified, then we disallow default reductions on a normal token, because we do not want to introduce any spurious actions into the automaton. We do still allow default reductions on "#", since they are needed for the automaton to terminate properly. From 2015/09/25 on, we again always allow default reductions, as they seem to be beneficial when explaining syntax errors. *) let has_default_reduction, count = Misc.tabulateo Lr1.number Lr1.fold Lr1.n (fun s -> if Lr1.forbid_default_reduction s then None else let reduction = ProductionMap.is_singleton (Lr1.invert (Lr1.reductions s)) in match reduction with | Some _ -> if SymbolMap.purelynonterminal (Lr1.transitions s) then reduction else None | None -> reduction ) let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d states have a default reduction.\n" count Lr1.n) let () = Time.tick "Computing default reductions" menhir-20171222/src/action.ml0000664000175000017500000001203713217215730016114 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Keyword type t = { (* The code for this semantic action. *) expr: IL.expr; (* The files where this semantic action originates. Via inlining, several semantic actions can be combined into one, so there can be several files. *) filenames: string list; (* The set of keywords that appear in this semantic action. They can be thought of as free variables that refer to positions. They must be renamed during inlining. *) keywords : KeywordSet.t; } (* Creation. *) let from_stretch s = { expr = IL.ETextual s; filenames = [ s.Stretch.stretch_filename ]; keywords = KeywordSet.of_list s.Stretch.stretch_keywords } (* Defining a keyword in terms of other keywords. *) let define keyword keywords f action = assert (KeywordSet.mem keyword action.keywords); { action with expr = f action.expr; keywords = KeywordSet.union keywords (KeywordSet.remove keyword action.keywords) } (* Composition, used during inlining. *) let compose x a1 a2 = (* 2015/07/20: there used to be a call to [parenthesize_stretch] here, which would insert parentheses around every stretch in [a1]. This is not necessary, as far as I can see, since every stretch that represents a semantic action is already parenthesized by the lexer. *) { expr = IL.ELet ([ IL.PVar x, a1.expr ], a2.expr); keywords = KeywordSet.union a1.keywords a2.keywords; filenames = a1.filenames @ a2.filenames; } (* Substitutions, represented as association lists. In principle, no name appears twice in the domain. *) type subst = (string * string) list let apply (phi : subst) (s : string) : string = try List.assoc s phi with Not_found -> s let apply_subject (phi : subst) (subject : subject) : subject = match subject with | Before | Left -> subject | RightNamed s -> RightNamed (apply phi s) let extend x y (phi : subst ref) = assert (not (List.mem_assoc x !phi)); if x <> y then phi := (x, y) :: !phi (* Renaming of keywords, used during inlining. *) type sw = Keyword.subject * Keyword.where (* [rename_keyword f phi keyword] applies the function [f] to possibly change the keyword [keyword]. If [f] decides to change this keyword (by returning [Some _]) then this decision is obeyed. Otherwise, the keyword is renamed by the substitution [phi]. In either case, [phi] is extended with a renaming decision. *) let rename_keyword (f : sw -> sw option) (phi : subst ref) keyword : keyword = match keyword with | SyntaxError -> SyntaxError | Position (subject, where, flavor) -> let subject', where' = match f (subject, where) with | Some (subject', where') -> subject', where' | None -> apply_subject !phi subject, where in extend (Keyword.posvar subject where flavor) (Keyword.posvar subject' where' flavor) phi; Position (subject', where', flavor) (* [rename f phi a] applies to the semantic action [a] the renaming [phi] as well as the transformations decided by the function [f]. The function [f] is applied to each (not-yet-renamed) keyword and may decide to transform it, by returning [Some _], or to not transform it, by returning [None]. (In the latter case, [phi] still applies to the keyword.) *) let rename f phi a = (* Rename all keywords, growing [phi] as we go. *) let keywords = a.keywords in let phi = ref phi in let keywords = KeywordSet.map (rename_keyword f phi) keywords in let phi = !phi in (* Construct a new semantic action, where [phi] is translated into a series of [let] bindings. *) let phi = List.map (fun (x, y) -> IL.PVar x, IL.EVar y) phi in let expr = IL.ELet (phi, a.expr) in { expr = expr; filenames = a.filenames; keywords = keywords; } let to_il_expr action = action.expr let filenames action = action.filenames let keywords action = action.keywords let has_syntaxerror action = KeywordSet.mem SyntaxError (keywords action) let has_beforeend action = KeywordSet.mem (Position (Before, WhereEnd, FlavorPosition)) action.keywords menhir-20171222/src/derivation.mli0000664000175000017500000000543213217215730017155 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar (* -------------------------------------------------------------------------- *) (* This is the type of derivations. Derivations are forests: see inside. *) type t (* This is the type of derivations contexts, or derivations with a derivation-shaped hole. *) type context (* -------------------------------------------------------------------------- *) (* Construction. *) (* [empty] is the forest that consists of a single empty tree. *) val empty: t (* [tail pos rhs] is the forest: (i) whose first element is the empty tree, and (ii) whose remaining elements are the symbols found at positions greater than or equal to [pos] in the array [rhs]. *) val tail: int -> Symbol.t array -> t (* [build pos rhs forest comment] is the forest: (i) whose first element is the tree that has the non-terminal symbol [rhs.(pos)] at its root and the forest [forest] below its root, and (ii) whose remaining elements are the symbols found at positions greater than [pos] in the array [rhs]. *) val build: int -> Symbol.t array -> t -> string option -> t (* [prepend symbol forest] is the forest: (i) whose first element is the symbol [symbol], and (ii) whose remaining elements form the forest [forest]. *) val prepend: Symbol.t -> t -> t (* -------------------------------------------------------------------------- *) (* Factoring. *) (* [factor] factors the maximal common derivation context out of a nonempty family of derivations. It produces a pair of the context and of the residual derivations. *) val factor: t Item.Map.t -> context * t Item.Map.t (* -------------------------------------------------------------------------- *) (* Display. *) (* [print] prints a derivation. *) val print: out_channel -> t -> unit (* [printc] prints a derivation context. *) val printc: out_channel -> context -> unit menhir-20171222/src/installation.mli0000664000175000017500000000244613217215730017514 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module defines a number of installation settings. Its source code is generated by the main [Makefile]. *) (* The directory where Menhir's standard library, [standard.mly], is installed. *) val libdir: string (* Whether MenhirLib was installed via [ocamlfind] or (manually) in the above directory. *) val ocamlfind: bool menhir-20171222/src/Convert.ml0000664000175000017500000001101213217215730016247 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* An ocamlyacc-style, or Menhir-style, parser requires access to the lexer, which must be parameterized with a lexing buffer, and to the lexing buffer itself, where it reads position information. *) (* This traditional API is convenient when used with ocamllex, but inelegant when used with other lexer generators. *) type ('token, 'semantic_value) traditional = (Lexing.lexbuf -> 'token) -> Lexing.lexbuf -> 'semantic_value (* This revised API is independent of any lexer generator. Here, the parser only requires access to the lexer, and the lexer takes no parameters. The tokens returned by the lexer may contain position information. *) type ('token, 'semantic_value) revised = (unit -> 'token) -> 'semantic_value (* --------------------------------------------------------------------------- *) (* Converting a traditional parser, produced by ocamlyacc or Menhir, into a revised parser. *) (* A token of the revised lexer is essentially a triple of a token of the traditional lexer (or raw token), a start position, and and end position. The three [get] functions are accessors. *) (* We do not require the type ['token] to actually be a triple type. This enables complex applications where it is a record type with more than three fields. It also enables simple applications where positions are of no interest, so ['token] is just ['raw_token] and [get_startp] and [get_endp] return dummy positions. *) let traditional2revised (get_raw_token : 'token -> 'raw_token) (get_startp : 'token -> Lexing.position) (get_endp : 'token -> Lexing.position) (parser : ('raw_token, 'semantic_value) traditional) : ('token, 'semantic_value) revised = (* Accept a revised lexer. *) fun (lexer : unit -> 'token) -> (* Create a dummy lexing buffer. *) let lexbuf : Lexing.lexbuf = Lexing.from_string "" in (* Wrap the revised lexer as a traditional lexer. A traditional lexer returns a raw token and updates the fields of the lexing buffer with new positions, which will be read by the parser. *) let lexer (lexbuf : Lexing.lexbuf) : 'raw_token = let token : 'token = lexer() in lexbuf.Lexing.lex_start_p <- get_startp token; lexbuf.Lexing.lex_curr_p <- get_endp token; get_raw_token token in (* Invoke the traditional parser. *) parser lexer lexbuf (* --------------------------------------------------------------------------- *) (* Converting a revised parser back to a traditional parser. *) let revised2traditional (make_token : 'raw_token -> Lexing.position -> Lexing.position -> 'token) (parser : ('token, 'semantic_value) revised) : ('raw_token, 'semantic_value) traditional = (* Accept a traditional lexer and a lexing buffer. *) fun (lexer : Lexing.lexbuf -> 'raw_token) (lexbuf : Lexing.lexbuf) -> (* Wrap the traditional lexer as a revised lexer. *) let lexer () : 'token = let token : 'raw_token = lexer lexbuf in make_token token lexbuf.Lexing.lex_start_p lexbuf.Lexing.lex_curr_p in (* Invoke the revised parser. *) parser lexer (* --------------------------------------------------------------------------- *) (* Simplified versions of the above, where concrete triples are used. *) module Simplified = struct let traditional2revised parser = traditional2revised (fun (token, _, _) -> token) (fun (_, startp, _) -> startp) (fun (_, _, endp) -> endp) parser let revised2traditional parser = revised2traditional (fun token startp endp -> (token, startp, endp)) parser end menhir-20171222/src/sentenceParserAux.ml0000664000175000017500000000270613217215730020300 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar type terminals = Terminal.t list type sentence = Nonterminal.t option * terminals type located_sentence = Positions.positions * sentence type comment = string type 'a or_comment = | Thing of 'a | Comment of comment let or_comment_iter f = function | Thing s -> f s | Comment _ -> () let or_comment_map f = function | Thing s -> Thing (f s) | Comment c -> Comment c let unThing = function | Thing x -> [ x ] | Comment _ -> [] menhir-20171222/src/codeBits.ml0000664000175000017500000001337513217215730016401 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module provides a number of tiny functions that help produce [IL] code. *) open IL (* A list subject to a condition. *) let listif condition xs = if condition then xs else [] let elementif condition x = if condition then [ x ] else [] let listiflazy condition xs = if condition then xs() else [] (* The unit type. *) let tunit = TypApp ("unit", []) (* The Boolean type. *) let tbool = TypApp ("bool", []) (* The integer type. *) let tint = TypApp ("int", []) (* The string type. *) let tstring = TypApp ("string", []) (* The exception type. *) let texn = TypApp ("exn", []) (* The type of lexer positions. *) let tposition = TypApp ("Lexing.position", []) (* The type of lexer buffers. *) let tlexbuf = TypApp ("Lexing.lexbuf", []) (* The type of untyped semantic values. *) let tobj = TypApp ("Obj.t", []) (* Building a type variable. *) let tvar x : typ = TypVar x (* Building a type scheme. *) let scheme qs t = { quantifiers = qs; body = t } (* Building a type scheme with no quantifiers out of a type. *) let type2scheme t = scheme [] t let pat2var = function | PVar x -> x | _ -> assert false (* [simplify] removes bindings of the form [let v = v in ...] and [let _ = v in ...]. *) let rec simplify = function | [] -> [] | (PVar v1, EVar v2) :: bindings when v1 = v2 -> (* Avoid a useless let binding. *) simplify bindings | (PWildcard, EVar _) :: bindings -> (* Avoid a useless let binding. *) simplify bindings | binding :: bindings -> binding :: simplify bindings (* Building a [let] construct, with on-the-fly simplification. *) let blet (bindings, body) = match simplify bindings with | [] -> body | bindings -> ELet (bindings, body) let mlet formals actuals body = blet (List.combine formals actuals, body) (* Simulating a [let/and] construct using tuples. *) let eletand (bindings, body) = match bindings with | [] -> (* special case: zero bindings *) body | [ _ ] -> (* special case: one binding *) ELet (bindings, body) | _ :: _ :: _ -> (* general case: at least two bindings *) let pats, exprs = List.split bindings in ELet ([ PTuple pats, ETuple exprs ], body) (* [eraisenotfound] is an expression that raises [Not_found]. *) let eraisenotfound = ERaise (EData ("Not_found", [])) (* [bottom] is an expression that has every type. Its semantics is irrelevant. *) let bottom = eraisenotfound (* Boolean constants. *) let efalse : expr = EData ("false", []) let etrue : expr = EData ("true", []) let eboolconst b = if b then etrue else efalse (* Option constructors. *) let enone = EData ("None", []) let esome e = EData ("Some", [e]) (* List constructors. *) let rec elist xs = match xs with | [] -> EData ("[]", []) | x :: xs -> EData ("::", [ x; elist xs ]) (* Integer constants as patterns. *) let pint k : pattern = PData (string_of_int k, []) (* These help build function types. *) let arrow typ body : typ = TypArrow (typ, body) let arrowif flag typ body : typ = if flag then arrow typ body else body let marrow typs body : typ = List.fold_right arrow typs body (* ------------------------------------------------------------------------ *) (* Here is a bunch of naming conventions. Our names are chosen to minimize the likelihood that a name in a semantic action is captured. In other words, all global definitions as well as the parameters to [reduce] are given far-fetched names, unless [--no-prefix] was specified. Note that the prefix must begin with '_'. This allows avoiding warnings about unused variables with ocaml 3.09 and later. *) let prefix name = if Settings.noprefix then name else "_menhir_" ^ name let dataprefix name = if Settings.noprefix then name else "Menhir" ^ name let tvprefix name = if Settings.noprefix then name else "ttv_" ^ name (* ------------------------------------------------------------------------ *) (* Converting an interface to a structure. Only exception and type definitions go through. *) let interface_item_to_structure_item = function | IIExcDecls defs -> [ SIExcDefs defs ] | IITypeDecls defs -> [ SITypeDefs defs ] | IIFunctor (_, _) | IIValDecls _ | IIInclude _ | IIModule (_, _) | IIComment _ -> [] let interface_to_structure i = List.flatten (List.map interface_item_to_structure_item i) (* Constructing a named module type together with a list of "with type" constraints. *) let with_types wk name tys = List.fold_left (fun mt (params, name, ty) -> MTWithType (mt, params, name, wk, ty) ) (MTNamedModuleType name) tys let mapp me1 me2 = MApp (me1, me2) let mapp me1 mes2 = List.fold_left mapp me1 mes2 menhir-20171222/src/lr1.mli0000664000175000017500000001642413217215730015512 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar (* This module constructs an LR(1) automaton by following Pager's method, that is, by merging states on the fly when they are found to be (weakly) compatible. *) (* Shift/reduce conflicts are silently resolved when (and only when) that is allowed in a clean way by user-specified priorities. This includes shift/reduce/reduce conflicts when (and only when) there is agreement that the shift action should be preferred. Conflicts that cannot be silently resolved in this phase will be reported, explained, and arbitrarily resolved immediately before code generation. *) (* ------------------------------------------------------------------------- *) (* Accessors. *) (* This is the type of the automaton's nodes. *) type node module Node : Set.OrderedType with type t = node module NodeSet : Set.S with type elt = node module NodeMap : Map.S with type key = node (* These are the automaton's entry states, indexed by the start productions. *) val entry: node ProductionMap.t (* [fold_entry] folds over [entry]. For convenience, it gives access not only to the start production and start state, but also to the nonterminal symbol and to the OCaml type associated with this production. *) val fold_entry: (Production.index -> node -> Nonterminal.t -> Stretch.ocamltype -> 'a -> 'a) -> 'a -> 'a (* [entry_of_nt] maps a (user) non-terminal start symbol to the corresponding start state. [nt_of_entry] does the reverse. *) val entry_of_nt: Nonterminal.t -> node val nt_of_entry: node -> Nonterminal.t (* Nodes are numbered sequentially from [0] to [n-1]. *) val n: int val number: node -> int (* This provides access to the LR(1) state that a node stands for. *) val state: node -> Lr0.lr1state (* This converts a start node into the single item that it contains. *) val start2item: node -> Item.t (* This maps a node to its incoming symbol, that is, the symbol carried by all of the edges that enter this node. A node has zero incoming edges (and, thus, no incoming symbol) if and only if it is a start node. *) val incoming_symbol: node -> Symbol.t option (* This maps a node to its predecessors. *) val predecessors: node -> node list (* This provides access to a node's transitions and reductions. *) val transitions: node -> node SymbolMap.t val reductions: node -> Production.index list TerminalMap.t (* (New as of 2012/01/23.) This tells whether a shift/reduce conflict in this node was solved in favor of neither (%nonassoc). This implies that one must forbid a default reduction at this node. *) val forbid_default_reduction: node -> bool (* This inverts a mapping of tokens to productions into a mapping of productions to sets of tokens. *) val invert : ProductionMap.key list TerminalMap.t -> TerminalSet.t ProductionMap.t (* [has_beforeend s] tests whether the state [s] can reduce a production whose semantic action uses [$endpos($0)]. Note that [$startpos] and [$endpos] have been expanded away already, so we need not worry about the fact that (in an epsilon production) they expand to [$endpos($0)]. *) val has_beforeend: node -> bool (* Computing which terminal symbols a state is willing to act upon. This function is currently unused, but could be used as part of an error reporting system. *) val acceptable_tokens: node -> TerminalSet.t (* Iteration over all nodes. The order in which elements are examined, and the order of [map]'s output list, correspond to the numeric indices produced by [number] above. *) val fold: ('a -> node -> 'a) -> 'a -> 'a val iter: (node -> unit) -> unit val map: (node -> 'a) -> 'a list (* Iteration over non-start nodes *) val foldx: ('a -> node -> 'a) -> 'a -> 'a val iterx: (node -> unit) -> unit (* Iteration over all edges that carry a certain symbol. Edges are grouped in families, where all edges in a single family have the same target node. [targets f accu symbol] invokes [f accu sources target] once for every family, where [sources] are the sources of the edges in the family and [target] is their common target. *) val targets: ('a -> node list -> node -> 'a) -> 'a -> Symbol.t -> 'a (* Iteration over all nodes with conflicts. [conflicts f] invokes [f toks node] once for every node [node] with a conflict, where [toks] are the tokens involved in the conflicts at that node. *) val conflicts: (TerminalSet.t -> node -> unit) -> unit (* [reverse_dfs goal] performs a reverse depth-first search through the automaton, starting at node [goal], and marking the nodes traversed. It returns a function that tells whether a node is marked, that is, whether a path leads from that node to the goal node. *) val reverse_dfs: node -> (node -> bool) (* ------------------------------------------------------------------------- *) (* Modifications of the automaton. *) (* This function performs default conflict resolution. First, it resolves standard (shift/reduce and reduce/reduce) conflicts (thus ensuring that the automaton is deterministic) by removing some reduction actions. Second, it resolves end-of-stream conflicts by ensuring that states that have a reduce action at the pseudo-token "#" have no other action. It is called after conflicts have been explained and before code generation takes place. The automaton is modified in place. *) val default_conflict_resolution: unit -> unit (* This function adds extra reduction actions in the face of an error, if requested by the user via [%on_error_reduce]. *) (* It must be called after conflict resolution has taken place. The automaton is modified in place. *) (* If a state can reduce only one production, whose left-hand symbol has been declared [%on_error_reduce], then every error action in this state is replaced with a reduction action. This is done even though this state may have outgoing shift transitions: thus, we are forcing one interpretation of the past, among several possible interpretations. *) val extra_reductions: unit -> unit (* ------------------------------------------------------------------------- *) (* Information about which productions are reduced and where. *) (* [production_where prod] is the set of all states [s] where production [prod] might be reduced. It is an error to call this functios before default conflict resolution has taken place. *) val production_where: Production.index -> NodeSet.t menhir-20171222/src/tarjan.mli0000664000175000017500000000441413217215730016267 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module provides an implementation of Tarjan's algorithm for finding the strongly connected components of a graph. The algorithm runs when the functor is applied. Its complexity is $O(V+E)$, where $V$ is the number of vertices in the graph $G$, and $E$ is the number of edges. *) module Run (G : sig type node (* We assume each node has a unique index. Indices must range from $0$ to $n-1$, where $n$ is the number of nodes in the graph. *) val n: int val index: node -> int (* Iterating over a node's immediate successors. *) val successors: (node -> unit) -> node -> unit (* Iterating over all nodes. *) val iter: (node -> unit) -> unit end) : sig open G (* This function maps each node to a representative element of its strongly connected component. *) val representative: node -> node (* This function maps each representative element to a list of all members of its strongly connected component. Non-representative elements are mapped to an empty list. *) val scc: node -> node list (* [iter action] allows iterating over all strongly connected components. For each component, the [action] function is applied to the representative element and to a (non-empty) list of all elements. *) val iter: (node -> node list -> unit) -> unit end menhir-20171222/src/_tags0000664000175000017500000000074513217215730015330 0ustar fpottierfpottier # Compile options. true: \ safe_string, \ bin_annot, \ debug # Menhir needs the Unix library. : use_unix # Turn off assertions in some modules, where they are useful when debugging, but costly. : noassert # Declare that our temporary build directories should not be traversed. # This is required by ocamlbuild 4.03; it will otherwise complain that # these build directories violate its hygiene rules. <_sdk>: -traverse <_stage*>: -traverse menhir-20171222/src/reachability.mli0000664000175000017500000000226713217215730017454 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This extremely simple analysis restricts a grammar to the set of nonterminals that are reachable, via productions, from the start nonterminals. *) val trim: UnparameterizedSyntax.grammar -> UnparameterizedSyntax.grammar menhir-20171222/src/SortInference.mli0000664000175000017500000000234713217215730017561 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Syntax open GroundSort (* [infer_grammar g] performs sort inference for the grammar [g], rejecting the grammar if it is ill-sorted. It returns a map of (terminal and nonterminal) symbols to ground sorts. *) type sorts = sort StringMap.t val infer: grammar -> sorts menhir-20171222/src/cmly_write.mli0000664000175000017500000000233513217215730017166 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* [write filename] queries the modules [Front] and [Grammar] for information about the grammar and queries the modules [Lr0] and [Lr1] for information about the automaton. It writes this information to the .cmly file [filename]. *) val write: string -> unit menhir-20171222/src/GroundSort.ml0000664000175000017500000000210413217215730016737 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) type sort = | GArrow of sort list let star = GArrow [] let domain sort = let GArrow sorts = sort in sorts menhir-20171222/src/compressedBitSet.mli0000664000175000017500000000176713217215730020277 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) include GSet.S with type element = int menhir-20171222/src/infer.mli0000664000175000017500000000305313217215730016111 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* [ntvar symbol] is the name of the type variable associated with a nonterminal symbol. *) val ntvar: string -> string (* [infer grammar] analyzes the grammar [grammar] and returns a new grammar, augmented with a [%type] declaration for every nonterminal symbol. The [ocamlc] compiler is used to infer types. *) val infer: UnparameterizedSyntax.grammar -> UnparameterizedSyntax.grammar (* [depend grammar] prints (on the standard output channel) the OCaml dependencies induced by the semantic actions. Then, it exits the program. *) val depend: UnparameterizedSyntax.grammar -> 'a menhir-20171222/src/positions.ml0000664000175000017500000000757713217215730016703 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Lexing type t = { start_p : Lexing.position; end_p : Lexing.position } type 'a located = { value : 'a; position : t; } let value { value = v } = v let position { position = p } = p let with_pos p v = { value = v; position = p; } let with_poss p1 p2 v = with_pos { start_p = p1; end_p = p2 } v let map f v = { value = f v.value; position = v.position; } let iter f { value = v } = f v let mapd f v = let w1, w2 = f v.value in let pos = v.position in { value = w1; position = pos }, { value = w2; position = pos } let dummy = { start_p = Lexing.dummy_pos; end_p = Lexing.dummy_pos } let unknown_pos v = { value = v; position = dummy } let start_of_position p = p.start_p let end_of_position p = p.end_p let filename_of_position p = p.start_p.Lexing.pos_fname let line p = p.pos_lnum let column p = p.pos_cnum - p.pos_bol let characters p1 p2 = (column p1, p2.pos_cnum - p1.pos_bol) (* intentionally [p1.pos_bol] *) let join x1 x2 = { start_p = if x1 = dummy then x2.start_p else x1.start_p; end_p = if x2 = dummy then x1.end_p else x2.end_p } let lex_join x1 x2 = { start_p = x1; end_p = x2 } let join_located l1 l2 f = { value = f l1.value l2.value; position = join l1.position l2.position; } let string_of_lex_pos p = let c = p.pos_cnum - p.pos_bol in (string_of_int p.pos_lnum)^":"^(string_of_int c) let string_of_pos p = let filename = filename_of_position p in (* [filename] is hopefully not "". *) let l = line p.start_p in let c1, c2 = characters p.start_p p.end_p in Printf.sprintf "File \"%s\", line %d, characters %d-%d" filename l c1 c2 let pos_or_undef = function | None -> dummy | Some x -> x let cpos lexbuf = { start_p = Lexing.lexeme_start_p lexbuf; end_p = Lexing.lexeme_end_p lexbuf; } let with_cpos lexbuf v = with_pos (cpos lexbuf) v let string_of_cpos lexbuf = string_of_pos (cpos lexbuf) let joinf f t1 t2 = join (f t1) (f t2) let ljoinf f = List.fold_left (fun p t -> join p (f t)) dummy let join_located_list ls f = { value = f (List.map (fun l -> l.value) ls); position = ljoinf (fun x -> x.position) ls } (* The functions that print error messages and warnings require a list of positions. The following auxiliary functions help build such lists. *) type positions = t list let one (pos : Lexing.position) : positions = [ { start_p = pos; end_p = pos } ] (* or: lex_join pos pos *) let two (pos1 : Lexing.position) (pos2 : Lexing.position) : positions = [ lex_join pos1 pos2 ] let lexbuf (lexbuf : Lexing.lexbuf) : positions = [ lex_join lexbuf.Lexing.lex_start_p lexbuf.Lexing.lex_curr_p ] let print (pos : Lexing.position) = Printf.printf "{ pos_fname = \"%s\"; pos_lnum = %d; pos_bol = %d; pos_cnum = %d }\n" pos.pos_fname pos.pos_lnum pos.pos_bol pos.pos_cnum menhir-20171222/src/InfiniteArray.mli0000664000175000017500000000343113217215730017552 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (** This module implements infinite arrays. **) type 'a t (** [make x] creates an infinite array, where every slot contains [x]. **) val make: 'a -> 'a t (** [get a i] returns the element contained at offset [i] in the array [a]. Slots are numbered 0 and up. **) val get: 'a t -> int -> 'a (** [set a i x] sets the element contained at offset [i] in the array [a] to [x]. Slots are numbered 0 and up. **) val set: 'a t -> int -> 'a -> unit (** [extent a] is the length of an initial segment of the array [a] that is sufficiently large to contain all [set] operations ever performed. In other words, all elements beyond that segment have the default value. *) val extent: 'a t -> int (** [domain a] is a fresh copy of an initial segment of the array [a] whose length is [extent a]. *) val domain: 'a t -> 'a array menhir-20171222/src/sentenceLexer.mll0000664000175000017500000000610713217215730017620 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This lexer is used to read the sentences provided on the standard input channel when [--interpret] is enabled. *) { open Lexing open SentenceParser open Grammar (* A short-hand. *) let error2 lexbuf = Error.error (Positions.two lexbuf.lex_start_p lexbuf.lex_curr_p) } let newline = ('\010' | '\013' | "\013\010") let whitespace = [ ' ' '\t' ';' ] let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222'] let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *) let autocomment = "##" [^'\010''\013']* newline let comment = "#" [^'\010''\013']* newline let skip = newline whitespace* newline rule lex = parse (* An identifier that begins with an lowercase letter is considered a non-terminal symbol. It should be a start symbol. *) | (lowercase identchar *) as lid { try let nt = Nonterminal.lookup lid in if StringSet.mem lid Front.grammar.UnparameterizedSyntax.start_symbols then NONTERMINAL (nt, lexbuf.lex_start_p, lexbuf.lex_curr_p) else error2 lexbuf "\"%s\" is not a start symbol." lid with Not_found -> error2 lexbuf "\"%s\" is not a known non-terminal symbol." lid } (* An identifier that begins with an uppercase letter is considered a terminal symbol. *) | (uppercase identchar *) as uid { try TERMINAL (Terminal.lookup uid, lexbuf.lex_start_p, lexbuf.lex_curr_p) with Not_found -> error2 lexbuf "\"%s\" is not a known terminal symbol." uid } (* Whitespace is ignored. *) | whitespace { lex lexbuf } (* The end of a line is translated to [EOL]. *) | newline { new_line lexbuf; EOL } (* An auto-generated comment is ignored. *) | autocomment { new_line lexbuf; lex lexbuf } (* A manually-written comment is preserved. *) | comment as c { new_line lexbuf; COMMENT c } (* The end of file is translated to [EOF]. *) | eof { EOF } (* A colon. *) | ':' { COLON } | _ { error2 lexbuf "unexpected character." } menhir-20171222/src/resizableArray.mli0000664000175000017500000000624613217215730017774 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module implements resizable arrays, that is, arrays that can grow upon explicit request. *) type 'a t (* [make capacity default init] creates a resizable array of logical length 0, whose physical length is initially [capacity], and whose default element is [default]. The default element is used to fill empty slots in the physical array; it is otherwise irrelevant. The [init] function is used to initialize new logical slots when the logical size of the array grows, so, unlike [default], it is semantically meaningful. *) val make: int -> 'a -> (int -> 'a) -> 'a t (* [make_] is a simplified variant of [make] where the [init] function always returns [default], i.e., where new logical slots are initialized with [default] when the array is grown. *) val make_: int -> 'a -> 'a t (* [length a] returns the current logical length of the array [a]. *) val length: 'a t -> int (* [resize a n] changes the logical length of the array [a] to [n]. If the length decreases, any excess elements are lost. The capacity of the underlying physical array remains the same. If the length increases, the new positions are filled with the array's default element, as initially supplied to [make]. The capacity of the underlying physical array grows by at least a factor of two. *) val resize: 'a t -> int -> unit (* [get a i] returns the element contained at offset [i] in the array [a]. Slots are numbered 0 and up. [i] must be strictly less than the array's current logical length. *) val get: 'a t -> int -> 'a (* [set a i x] sets the element contained at offset [i] in the array [a] to [x]. Slots are numbered 0 and up. [i] must be strictly less than the array's current logical length. *) val set: 'a t -> int -> 'a -> unit (* [push a x] appends the element [x] at the end of the array [a], whose length increases by one. *) val push: 'a t -> 'a -> unit (* [pop a] removes the element [x] found at the end of the array [a], whose length decreases by one. The array must have nonzero length. *) val pop: 'a t -> 'a (* [default a] returns the default value that was used when the array [a] was created. This should be seldom useful, but can be convenient. *) val default: 'a t -> 'a menhir-20171222/src/RowDisplacement.mli0000664000175000017500000000504513217215730020111 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* This module compresses a two-dimensional table, where some values are considered insignificant, via row displacement. *) (* A compressed table is represented as a pair of arrays. The displacement array is an array of offsets into the data array. *) type 'a table = int array * (* displacement *) 'a array (* data *) (* [compress equal insignificant dummy m n t] turns the two-dimensional table [t] into a compressed table. The parameter [equal] is equality of data values. The parameter [wildcard] tells which data values are insignificant, and can thus be overwritten with other values. The parameter [dummy] is used to fill holes in the data array. [m] and [n] are the integer dimensions of the table [t]. *) val compress: ('a -> 'a -> bool) -> ('a -> bool) -> 'a -> int -> int -> 'a array array -> 'a table (* [get ct i j] returns the value found at indices [i] and [j] in the compressed table [ct]. This function call is permitted only if the value found at indices [i] and [j] in the original table is significant -- otherwise, it could fail abruptly. *) (* Together, [compress] and [get] have the property that, if the value found at indices [i] and [j] in an uncompressed table [t] is significant, then [get (compress t) i j] is equal to that value. *) val get: 'a table -> int -> int -> 'a (* [getget] is a variant of [get] which only requires read access, via accessors, to the two components of the table. *) val getget: ('displacement -> int -> int) -> ('data -> int -> 'a) -> 'displacement * 'data -> int -> int -> 'a menhir-20171222/src/yaccDriver.ml0000664000175000017500000000300313217215730016723 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The module [Driver] serves to offer a unified API to the parser, which could be produced by either ocamlyacc or Menhir. *) (* This is the ocamlyacc-specific driver. There is nothing special to do. We handle syntax errors in a minimalistic manner. This error handling code will be exercised only if there is a syntax error in [fancy-parser.mly], during stage 2 of the bootstrap process. *) let grammar lexer lexbuf = try Parser.grammar lexer lexbuf with Parsing.Parse_error -> Error.error (Positions.lexbuf lexbuf) "syntax error." menhir-20171222/src/anonymous.mli0000664000175000017500000000203713217215730017037 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Syntax val transform_partial_grammar: partial_grammar -> partial_grammar menhir-20171222/src/dot.ml0000664000175000017500000001040513217215730015422 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Printf (* ------------------------------------------------------------------------- *) (* Type definitions. *) type size = float * float (* in inches *) type orientation = | Portrait | Landscape type rankdir = | LeftToRight | TopToBottom type ratio = | Compress | Fill | Auto type style = (* Both nodes and edges. *) | Solid | Dashed | Dotted | Bold | Invisible (* Nodes only. *) | Filled | Diagonals | Rounded type shape = | Box | Oval | Circle | DoubleCircle (* there are many others, let's stop here *) (* ------------------------------------------------------------------------- *) (* Basic printers. *) let print_style = function | None -> "" | Some style -> let style = match style with | Solid -> "solid" | Dashed -> "dashed" | Dotted -> "dotted" | Bold -> "bold" | Invisible -> "invis" | Filled -> "filled" | Diagonals -> "diagonals" | Rounded -> "rounded" in sprintf ", style = %s" style let print_shape = function | None -> "" | Some shape -> let shape = match shape with | Box -> "box" | Oval -> "oval" | Circle -> "circle" | DoubleCircle -> "doublecircle" in sprintf ", shape = %s" shape (* ------------------------------------------------------------------------- *) (* The graph printer. *) module Print (G : sig type vertex val name: vertex -> string val successors: (?style:style -> label:string -> vertex -> unit) -> vertex -> unit val iter: (?shape:shape -> ?style:style -> label:string -> vertex -> unit) -> unit end) = struct let print ?(directed = true) ?size ?(orientation = Landscape) ?(rankdir = LeftToRight) ?(ratio = Compress) (f : out_channel) = fprintf f "%s G {\n" (if directed then "digraph" else "graph"); Option.iter (fun (hsize, vsize) -> fprintf f "size=\"%f, %f\";\n" hsize vsize ) size; begin match orientation with | Portrait -> fprintf f "orientation = portrait;\n" | Landscape -> fprintf f "orientation = landscape;\n" end; begin match rankdir with | LeftToRight -> fprintf f "rankdir = LR;\n" | TopToBottom -> fprintf f "rankdir = TB;\n" end; begin match ratio with | Compress -> fprintf f "ratio = compress;\n" | Fill -> fprintf f "ratio = fill;\n" | Auto -> fprintf f "ratio = auto;\n" end; G.iter (fun ?shape ?style ~label vertex -> fprintf f "%s [ label=\"%s\"%s%s ] ;\n" (G.name vertex) label (print_style style) (print_shape shape) ); G.iter (fun ?shape ?style ~label source -> ignore shape; (* avoid unused variable warnings *) ignore style; ignore label; G.successors (fun ?style ~label destination -> fprintf f "%s %s %s [ label=\"%s\"%s ] ;\n" (G.name source) (if directed then "->" else "--") (G.name destination) label (print_style style) ) source ); fprintf f "\n}\n" end menhir-20171222/src/Boolean.ml0000664000175000017500000000222313217215730016212 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The Boolean lattice. *) type property = bool let bottom = false let equal (b1 : bool) (b2 : bool) = b1 = b2 let is_maximal b = b let union (b1 : bool) (b2 : bool) = b1 || b2 menhir-20171222/src/SelectiveExpansion.ml0000664000175000017500000004466413217215730020462 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) let value = Positions.value let unknown = Positions.unknown_pos open Syntax open GroundSort (* -------------------------------------------------------------------------- *) (* Expansion modes. *) type mode = | ExpandHigherSort | ExpandAll (* -------------------------------------------------------------------------- *) (* Expansion can be understood as traversing a graph where every vertex is labeled with a pair of a nonterminal symbol [nt] and an instantiation of the formal parameters of [nt]. *) (* We allow partial instantiations, where some of the formal parameters of [nt] are instantiated, while others remain present. For this reason, we represent an instantation as a list of *optional* actual parameters. *) (* The actual parameters that appear in an instantiation make sense *in the source namespace* (at the toplevel). That is, they refer to (terminal and nonterminal) symbols that exist (at the toplevel) in the original grammar. *) type instantiation = parameter option list type label = nonterminal * instantiation (* Equality and hashing for labels. *) module Label = struct type t = label let equal (nt1, inst1) (nt2, inst2) = nt1 = nt2 && Misc.ListExtras.equal (Option.equal Parameters.equal) inst1 inst2 let hash (nt, inst) = Hashtbl.hash (nt, Misc.ListExtras.hash (Option.hash Parameters.hash) inst) end (* -------------------------------------------------------------------------- *) (* [mangle label] chooses a concrete name for the new nonterminal symbol that corresponds to the label [label]. *) (* We include parentheses and commas in this name, because that is readable and acceptable in many situations. We replace them with underscores in situations where these characters are not valid; see [Misc.normalize]. *) let mangle_po (po : parameter option) = match po with | None -> (* When a parameter remains uninstantiated, we put an underscore in its place. *) "_" | Some p -> Parameters.print false p let mangle ((nt, pos) : label) : nonterminal = if pos = [] then nt else Printf.sprintf "%s(%s)" nt (Misc.separated_list_to_string mangle_po "," pos) (* -------------------------------------------------------------------------- *) (* An environment maps all of the formal parameters of a rule to actual parameters, which make sense in the source namespace. *) module Env = StringMap type env = parameter Env.t let subst_symbol env sym : parameter = try Env.find (value sym) env with Not_found -> (* [x] is not a formal parameter. It is a toplevel symbol. *) ParameterVar sym let apply (param : parameter) (params : parameter list) : parameter = match param with | ParameterVar sym -> assert (params <> []); ParameterApp (sym, params) | ParameterApp _ -> (* In a well-sorted grammar, only a variable can have higher sort. Here, [param] must have higher sort, so [param] must be a variable. This case cannot arise. *) assert false | ParameterAnonymous _ -> (* Anonymous rules are eliminated early on. *) assert false let rec subst_parameter env param : parameter = match param with | ParameterVar sym -> subst_symbol env sym | ParameterApp (sym, params) -> assert (params <> []); apply (subst_symbol env sym) (subst_parameters env params) | ParameterAnonymous _ -> (* Anonymous rules are eliminated early on. *) assert false and subst_parameters env params = List.map (subst_parameter env) params (* -------------------------------------------------------------------------- *) (* -------------------------------------------------------------------------- *) (* For syntactic convenience, the rest of this file is a functor. *) module Run (G : sig (* Expansion mode. *) val mode: mode (* Sort information. *) val sorts: SortInference.sorts (* The grammar [g] whose expansion is desired. *) val g : grammar end) = struct open G (* -------------------------------------------------------------------------- *) (* Determining the sort of a symbol or parameter. *) (* Be careful: these functions use the toplevel sort environment [sorts], so they must not be used within a rule. (The sort environment would have to be extended with information about the formal parameters.) *) let sort symbol = try StringMap.find (value symbol) sorts with Not_found -> assert false let sort param = match param with | ParameterVar sym -> sort sym | ParameterApp (_, params) -> assert (params <> []); (* An application always has sort [*]. *) star | ParameterAnonymous _ -> assert false (* -------------------------------------------------------------------------- *) (* Looking up the [%attribute] declarations, looking for attributes attached with a nonterminal symbol [nt]. This is used when we create a specialized version of this symbol. *) (* We use an inefficient linear search, but that shouldn't be a problem. *) let global_attributes (nt : symbol) : attribute list = let param = ParameterVar (unknown nt) in List.concat (List.map (fun (params, attrs) -> if List.exists (Parameters.equal param) params then attrs else [] ) g.p_symbol_attributes) (* -------------------------------------------------------------------------- *) (* A queue keeps track of the graph vertices that have been discovered but not yet visited. *) let enqueue, repeatedly = let queue = Queue.create() in let enqueue label = Queue.add label queue and repeatedly visit = Misc.qiter visit queue in enqueue, repeatedly (* -------------------------------------------------------------------------- *) (* A hash table is used to mark the graph vertices that have been discovered. *) let mark, marked = let module H = Hashtbl.Make(Label) in let table = H.create 128 in let mark label = H.add table label () and marked label = H.mem table label in mark, marked (* -------------------------------------------------------------------------- *) (* The rules of the expanded grammar are gradually collected. *) let emit, rules = let rules = ref StringMap.empty in let emit rule = assert (not (StringMap.mem rule.pr_nt !rules)); rules := StringMap.add rule.pr_nt rule !rules and rules() = !rules in emit, rules (* -------------------------------------------------------------------------- *) (* On top of the function [mangle], we set up a mechanism that checks that every (normalized) mangled name is unique. (Indeed, in principle, there could be clashes, although in practice this is unlikely.) We must check that every application of [mangle] to a *new* argument yields a *new* (normalized) result. This is succinctly expressed by combining a claim and a memoizer. *) let mangle : label -> nonterminal = let ensure_fresh = Misc.new_claim() in let module M = Memoize.MakeViaHashtbl(Label) in M.memoize (fun label -> let name = mangle label in ensure_fresh (Misc.normalize name); name ) (* -------------------------------------------------------------------------- *) (* [recognize] receives an actual parameter [param] that makes sense in the source namespace and transforms it into a parameter that makes sense in the target namespace. This involves examining each application and "recognizing" it as an application of a label to a sequence of residual actual parameters, as explained next. All labels thus recognized are enqueued. *) (* [recognize] governs how much specialization is performed. For instance, [F(X, Y, Z)] could be recognized as: - an application of the symbol [F] to the residual arguments [X, Y, Z]. Then, no specialization at all takes place. - an application of the symbol [F(X,Y,Z)] to no residual arguments. Then, [F] is fully specialized for [X, Y, Z]. - in between these extremes, say, an application of the symbol [F(X,_,Z)] to the residual argument [Y]. Then, [F] is partially specialized. If there are any residual arguments, then they must be recursively recognized. For instance, [F(X,G(Y),Z)] could be recognized as an application of the symbol [F(X,_,Z)] to [G(Y)], which itself could be recognized as an application of the symbol [G(Y)] to no residual arguments. *) let rec recognize (param : parameter) : parameter = (* [param] must have sort [star], in an appropriate sort environment. *) match param with | ParameterAnonymous _ -> assert false | ParameterVar _ -> param | ParameterApp (sym, ps) -> assert (ps <> []); let x = value sym in (* This symbol is applied to at least one argument, so cannot be a terminal symbol. It must be either a nonterminal symbol or an (uninstantiated) formal parameter of the current rule. *) (* Actually, in both modes, formal parameters of higher sort are expanded away, so [sym] cannot be an uninstantiated parameter of the current rule. It must be a nonterminal symbol. We can therefore look up its sort in the toplevel environment [sorts]. *) let inst, residuals = match mode with | ExpandAll -> (* Expansion of all parameters. *) let inst = List.map (fun p -> Some p) ps and residuals = [] in inst, residuals | ExpandHigherSort -> (* Expansion of only the parameters of higher sort. *) let ss : sort list = domain (sort (ParameterVar sym)) in assert (List.length ps = List.length ss); let pss = List.combine ps ss in let inst = pss |> List.map (fun (param, sort) -> if sort = star then None else Some param) in let residuals = pss |> List.filter (fun (_, sort) -> sort = star) |> List.map (fun (param, _) -> recognize param) in inst, residuals in let label = (x, inst) in enqueue label; let sym = mangle label in Parameters.app (unknown sym) residuals (* -------------------------------------------------------------------------- *) (* The following functions take arguments in the source namespace and produce results in the target namespace. *) let subst_parameter env param = (* [param] must have sort [star], in an appropriate sort environment. *) recognize (subst_parameter env param) let subst_producer env (id, param, attrs) = let param = subst_parameter env param in (id, param, attrs) let subst_producers env producers = List.map (subst_producer env) producers let subst_branch env branch = { branch with pr_producers = subst_producers env branch.pr_producers } let subst_branches env branches = List.map (subst_branch env) branches (* -------------------------------------------------------------------------- *) (* A quick and dirty way of mapping a name to a fresh name. *) let freshen : string -> string = let c = ref 0 in fun x -> Printf.sprintf "%s__menhir__%d" x (Misc.postincrement c) (* -------------------------------------------------------------------------- *) (* [instantiation_env] expects the formal parameters of a rule, [formals], and an instantiation [inst] that dictates how this rule must be specialized. It returns an environment [env] that can be used to perform specialization and a list of residual formal parameters (those that are not specialized). *) let instantiation_env formals inst : env * symbol list = assert (List.length formals = List.length inst); let env, residuals = List.fold_right2 (fun formal po (env, residuals) -> let param, residuals = match po with | Some param -> (* This formal parameter is instantiated. *) param, residuals | None -> (* This formal parameter is not instantiated. *) (* We would like to map it to itself. *) (* However, we must in principle be a bit careful: if a toplevel symbol by the same name as [formal] appears free in the codomain of the environment that we are building, then we will run intro trouble. We avoid this problem by systematically renaming every formal parameter to a fresh unlikely name. *) let formal = freshen formal in ParameterVar (unknown formal), formal :: residuals in Env.add formal param env, residuals ) formals inst (Env.empty, []) in env, residuals (* -------------------------------------------------------------------------- *) (* [visit label] visits a vertex labeled [label] in the graph. This label is a pair of a nonterminal symbol [nt] and an instantiation [inst]. Unless this vertex has been visited already, we create a specialized copy of [nt] for this instantiation. This involves a call to [subst_branches], which can cause more vertices to be discovered and enqueued. *) (* The specialized symbol retains any attributes carried by the original parameterized symbol. These attributes could be either attached with this rule ([rule.pr_attributes]) or specified via an [%attribute] declaration. We have to look up [%attribute] declarations now (as opposed to letting [Drop] handle them) if this is a parameterized symbol, as the connection between the original parameterized symbol and its specialized version is evident here but is lost afterwards. *) let visit label = if not (marked label) then begin mark label; let (nt, inst) = label in let rule = StringMap.find nt g.p_rules in let formals = rule.pr_parameters in let env, residuals = instantiation_env formals inst in emit { rule with pr_nt = mangle label; pr_parameters = residuals; pr_branches = subst_branches env rule.pr_branches; pr_attributes = (if formals = [] then [] else global_attributes nt) @ rule.pr_attributes } end (* -------------------------------------------------------------------------- *) (* The entry points of the graph traversal include the nonterminal symbols of sort [*]. (Not just the start symbols, as we haven't run the reachability analysis, and the grammar may contain unreachable parts, which we still want to expand.) Because a start symbol must have sort [*], this includes the start symbols. *) let () = StringMap.iter (fun nt prule -> if prule.pr_parameters = [] then let label = (nt, []) in enqueue label ) g.p_rules (* -------------------------------------------------------------------------- *) (* The parameters that appear in [%type] declarations and [%on_error_reduce] declarations are also considered entry points. They have sort [*]. *) let subst_parameter param = subst_parameter Env.empty param let subst_declaration (param, info) = assert (sort param = star); (subst_parameter param, info) let subst_declarations decls = List.map subst_declaration decls (* -------------------------------------------------------------------------- *) (* An [%attribute] declaration for a parameter of sort [*] is treated as an entry point. An [%attribute] declaration for a symbol of higher sort is not regarded as an entry point, and at the end, is kept only if this symbol still appears in the expanded grammar. *) (* This is done in two passes over the list of [%attribute] declarations, named [thingify] and [unthingify]. The first pass runs as part of the discovery of entry points, before the graph traversal. The second pass runs after the graph traversal is complete. *) type thing = | TargetParameterOfSortStar of parameter | SourceParameterOfHigherSort of parameter let thingify_parameter param : thing = if sort param = star then TargetParameterOfSortStar (subst_parameter param) else SourceParameterOfHigherSort param let thingify_attribute_declaration (params, attrs) = (List.map thingify_parameter params, attrs) let thingify_attribute_declarations decls = List.map thingify_attribute_declaration decls let unthingify_parameter rules thing = match thing with | TargetParameterOfSortStar param -> (* This parameter has sort [star]. Keep it. *) Some param | SourceParameterOfHigherSort param -> (* This parameter has higher sort. It must be a symbol. Keep it if it still appears in the expanded grammar. *) let symbol = value (Parameters.unvar param) in if StringMap.mem symbol rules then Some param else None let unthingify_attribute_declaration rules (params, attrs) = (Misc.map_opt (unthingify_parameter rules) params, attrs) let unthingify_attribute_declarations rules decls = List.map (unthingify_attribute_declaration rules) decls (* -------------------------------------------------------------------------- *) (* Put everything together a construct a new grammar. *) let g = (* Discovery of entry points. *) let p_types = subst_declarations g.p_types and p_on_error_reduce = subst_declarations g.p_on_error_reduce and things = thingify_attribute_declarations g.p_symbol_attributes in (* Graph traversal. *) repeatedly visit; (* Construction of the new grammar. *) let p_rules = rules() in let p_symbol_attributes = unthingify_attribute_declarations p_rules things in { g with p_types; p_on_error_reduce; p_symbol_attributes; p_rules } end (* of the functor *) (* -------------------------------------------------------------------------- *) (* Re-package the above functor as a function. *) let expand mode sorts g = let module G = Run(struct let mode = mode let sorts = sorts let g = g end) in G.g menhir-20171222/src/slr.mli0000664000175000017500000000251313217215730015606 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module extends the LR(0) automaton with lookahead information in order to construct an SLR(1) automaton. The lookahead information is obtained by considering the FOLLOW sets. *) (* This construction is not used by Menhir, but can be used to check whether the grammar is in the class SLR(1). This check is performed when the log level [lg] is at least 1. *) menhir-20171222/src/Printers.mli0000664000175000017500000000520113217215730016611 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* This module is part of MenhirLib. *) module Make (I : IncrementalEngine.EVERYTHING) (User : sig (* [print s] is supposed to send the string [s] to some output channel. *) val print: string -> unit (* [print_symbol s] is supposed to print a representation of the symbol [s]. *) val print_symbol: I.xsymbol -> unit (* [print_element e] is supposed to print a representation of the element [e]. This function is optional; if it is not provided, [print_element_as_symbol] (defined below) is used instead. *) val print_element: (I.element -> unit) option end) : sig open I (* Printing a list of symbols. *) val print_symbols: xsymbol list -> unit (* Printing an element as a symbol. This prints just the symbol that this element represents; nothing more. *) val print_element_as_symbol: element -> unit (* Printing a stack as a list of elements. This function needs an element printer. It uses [print_element] if provided by the user; otherwise it uses [print_element_as_symbol]. (Ending with a newline.) *) val print_stack: 'a env -> unit (* Printing an item. (Ending with a newline.) *) val print_item: item -> unit (* Printing a production. (Ending with a newline.) *) val print_production: production -> unit (* Printing the current LR(1) state. The current state is first displayed as a number; then the list of its LR(0) items is printed. (Ending with a newline.) *) val print_current_state: 'a env -> unit (* Printing a summary of the stack and current state. This function just calls [print_stack] and [print_current_state] in succession. *) val print_env: 'a env -> unit end menhir-20171222/src/syntax.ml0000664000175000017500000002174313217215730016171 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The type [partial_grammar] describes the abstract syntax that is produced by the parsers (yacc-parser and fancy-parser). The type [grammar] describes the abstract syntax that is obtained after one or more partial grammars are joined (see [PartialGrammar]). It differs in that declarations are organized in a more useful way and a number of well-formedness checks have been performed. *) (* ------------------------------------------------------------------------ *) (* Terminals and nonterminal symbols are strings. Identifiers (which are used to refer to a symbol's semantic value) are strings. A file name is a string. *) type terminal = string type nonterminal = string type symbol = string type identifier = string type filename = string (* ------------------------------------------------------------------------ *) (* A postlude is a source file fragment. *) type postlude = Stretch.t (* ------------------------------------------------------------------------ *) (* OCaml semantic actions are represented as stretches. *) type action = Action.t (* ------------------------------------------------------------------------ *) (* An attribute consists of an attribute name and an attribute payload. The payload is an uninterpreted stretch of source text. *) type attribute = string Positions.located * Stretch.t type attributes = attribute list (* Attributes allow the user to annotate the grammar with information that is ignored by Menhir, but can be exploited by other tools, via the SDK. *) (* Attributes can be attached in the following places: - with the grammar: %[@bar ...] - with a terminal symbol: %token FOO [@bar ...] - with a rule: foo(X) [@bar ...]: ... - with a producer: e = foo(quux) [@bar ...] - with an arbitrary symbol: %attribute FOO foo(quux) [@bar ...] After expanding away parameterized nonterminal symbols, things become a bit simpler, as %attribute declarations are desugared away. *) (* ------------------------------------------------------------------------ *) (* Information about tokens. (Only after joining.) *) type token_associativity = LeftAssoc | RightAssoc | NonAssoc | UndefinedAssoc type precedence_level = UndefinedPrecedence (* Items are incomparable when they originate in different files. A value of type [input_file] is used to record an item's origin. The positions allow locating certain warnings. *) | PrecedenceLevel of InputFile.input_file * int * Lexing.position * Lexing.position type token_properties = { tk_filename : filename; tk_ocamltype : Stretch.ocamltype option; tk_position : Positions.t; tk_attributes : attributes; mutable tk_associativity : token_associativity; mutable tk_precedence : precedence_level; mutable tk_is_declared : bool; } (* ------------------------------------------------------------------------ *) (* A [%prec] annotation is optional. A production can carry at most one. If there is one, it is a symbol name. See [ParserAux]. *) type branch_prec_annotation = symbol Positions.located option (* ------------------------------------------------------------------------ *) (* A "production level" is used to solve reduce/reduce conflicts. It reflects which production appears first in the grammar. See [ParserAux]. *) type branch_production_level = | ProductionLevel of InputFile.input_file * int (* ------------------------------------------------------------------------ *) (* A level is attached to every [%on_error_reduce] declaration. It is used to decide what to do when several such declarations are applicable in a single state. *) type on_error_reduce_level = branch_production_level (* we re-use the above type, to save code *) (* ------------------------------------------------------------------------ *) (* A parameter is either just a symbol or an application of a symbol to a nonempty tuple of parameters. Before anonymous rules have been eliminated, it can also be an anonymous rule, represented as a list of branches. *) type parameter = | ParameterVar of symbol Positions.located | ParameterApp of symbol Positions.located * parameters | ParameterAnonymous of parameterized_branch list Positions.located and parameters = parameter list (* ------------------------------------------------------------------------ *) (* A producer is a pair of identifier and a parameter. In concrete syntax, it could be [e = expr], for instance. It carries a number of attributes. *) and producer = identifier Positions.located * parameter * attributes (* ------------------------------------------------------------------------ *) (* A branch contains a series of producers and a semantic action. *) and parameterized_branch = { pr_branch_position : Positions.t; pr_producers : producer list; pr_action : action; pr_branch_prec_annotation : branch_prec_annotation; pr_branch_production_level : branch_production_level } (* ------------------------------------------------------------------------ *) (* A rule has a header and several branches. *) type parameterized_rule = { pr_public_flag : bool; pr_inline_flag : bool; pr_nt : nonterminal; pr_positions : Positions.t list; pr_attributes : attributes; pr_parameters : symbol list; pr_branches : parameterized_branch list; } (* ------------------------------------------------------------------------ *) (* A declaration. (Only before joining.) *) type declaration = (* Raw OCaml code. *) | DCode of Stretch.t (* Raw OCaml functor parameter. *) | DParameter of Stretch.ocamltype (* really a stretch *) (* Terminal symbol (token) declaration. *) | DToken of Stretch.ocamltype option * terminal * attributes (* Start symbol declaration. *) | DStart of nonterminal (* Priority and associativity declaration. *) | DTokenProperties of terminal * token_associativity * precedence_level (* Type declaration. *) | DType of Stretch.ocamltype * parameter (* Grammar-level attribute declaration. *) | DGrammarAttribute of attribute (* Attributes shared among multiple symbols, i.e., [%attribute]. *) | DSymbolAttributes of parameter list * attributes (* On-error-reduce declaration. *) | DOnErrorReduce of parameter * on_error_reduce_level (* ------------------------------------------------------------------------ *) (* A partial grammar. (Only before joining.) *) type partial_grammar = { pg_filename : filename; pg_postlude : postlude option; pg_declarations : declaration Positions.located list; pg_rules : parameterized_rule list; } (* ------------------------------------------------------------------------ *) (* A grammar. (Only after joining.) *) (* The differences with partial grammars (above) are as follows: 1. the file name is gone (there could be several file names, anyway). 2. there can be several postludes. 3. declarations are organized by kind: preludes, postludes, functor %parameters, %start symbols, %types, %tokens, %on_error_reduce, grammar attributes, %attributes. 4. rules are stored in a map, indexed by symbol names, instead of a list. *) type grammar = { p_preludes : Stretch.t list; p_postludes : postlude list; p_parameters : Stretch.t list; p_start_symbols : Positions.t StringMap.t; p_types : (parameter * Stretch.ocamltype Positions.located) list; p_tokens : token_properties StringMap.t; p_on_error_reduce : (parameter * on_error_reduce_level) list; p_grammar_attributes : attributes; p_symbol_attributes : (parameter list * attributes) list; p_rules : parameterized_rule StringMap.t; } menhir-20171222/src/META0000664000175000017500000000002513217215730014750 0ustar fpottierfpottierversion = "20171222" menhir-20171222/src/error.mli0000664000175000017500000000542313217215730016142 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module helps report errors. *) (* ---------------------------------------------------------------------------- *) (* Logging and log levels. *) val logG: int -> (out_channel -> unit) -> unit val logA: int -> (out_channel -> unit) -> unit val logC: int -> (out_channel -> unit) -> unit (* ---------------------------------------------------------------------------- *) (* Errors and warnings. *) (* [error ps format ...] displays the list of positions [ps], followed with the error message [format ...], and exits. The strings "Error: " and "\n" are automatically added at the beginning and end of the error message. The message should begin with a lowercase letter and end with a dot. *) val error: Positions.positions -> ('a, out_channel, unit, 'b) format4 -> 'a (* [errorp] is like [error], but uses the position range carried by [v]. *) val errorp: _ Positions.located -> ('a, out_channel, unit, 'b) format4 -> 'a (* [signal] is like [error], except it does not exit immediately. It sets a flag which can be tested using [errors]. *) val signal: Positions.positions -> ('a, out_channel, unit, unit) format4 -> 'a (* [errors] returns [true] if [signal] was previously called. Together [signal] and [errors] allow reporting multiple errors before aborting. *) val errors: unit -> bool (* [warning] is like [signal], except it does not set a flag. *) val warning: Positions.positions -> ('a, out_channel, unit, unit) format4 -> 'a (* Certain warnings about the grammar can optionally be treated as errors. The following function emits a warning or error message, via [warning] or [signal]. It does not stop the program; the client must at some point call [errors] and stop the program if any errors have been reported. *) val grammar_warning: Positions.positions -> ('a, out_channel, unit, unit) format4 -> 'a menhir-20171222/src/LinearizedArray.ml0000664000175000017500000000530613217215730017725 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* The [entry] array contains offsets into the [data] array. It has [n+1] elements if the original (unencoded) array has [n] elements. The value of [entry.(n)] is the length of the [data] array. This convention is natural and allows avoiding a special case. *) type 'a t = (* data: *) 'a array * (* entry: *) int array let make (a : 'a array array) : 'a t = let n = Array.length a in (* Build the entry array. *) let size = ref 0 in let entry = Array.init (n + 1) (fun i -> let s = !size in if i < n then size := s + Array.length a.(i); s ) in assert (entry.(n) = !size); (* Build the data array. *) let i = ref 0 and j = ref 0 in let data = Array.init !size (fun _ -> while !j = Array.length a.(!i) do i := !i + 1; j := 0; done; let x = a.(!i).(!j) in j := !j + 1; x ) in data, entry let length ((_, entry) : 'a t) : int = Array.length entry let row_length ((_, entry) : 'a t) i : int = entry.(i + 1) - entry.(i) let row_length_via get_entry i = get_entry (i + 1) - get_entry i let read ((data, entry) as la : 'a t) i j : 'a = assert (0 <= j && j < row_length la i); data.(entry.(i) + j) let read_via get_data get_entry i j = assert (0 <= j && j < row_length_via get_entry i); get_data (get_entry i + j) let write ((data, entry) as la : 'a t) i j (v : 'a) : unit = assert (0 <= j && j < row_length la i); data.(entry.(i) + j) <- v let rec read_interval_via get_data i j = if i = j then [] else get_data i :: read_interval_via get_data (i + 1) j let read_row_via get_data get_entry i = read_interval_via get_data (get_entry i) (get_entry (i + 1)) let read_row ((data, entry) : 'a t) i : 'a list = read_row_via (Array.get data) (Array.get entry) i menhir-20171222/src/GroundSort.mli0000664000175000017500000000230513217215730017113 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The syntax of sorts is: sort ::= (sort, ..., sort) -> * where the arity (the number of sorts on the left-hand side of the arrow) can be zero. *) type sort = | GArrow of sort list val star: sort val domain: sort -> sort list menhir-20171222/src/menhir.ml0000664000175000017500000000206613217215730016122 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The main program. *) (* Everything is in [Back]. *) module B = Back (* artificial dependency *) menhir-20171222/src/codeBackend.mli0000664000175000017500000000207013217215730017166 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The (code-based) code generator. *) module Run (T : sig end) : sig val program: IL.program end menhir-20171222/src/Convert.mli0000664000175000017500000000657013217215730016435 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* An ocamlyacc-style, or Menhir-style, parser requires access to the lexer, which must be parameterized with a lexing buffer, and to the lexing buffer itself, where it reads position information. *) (* This traditional API is convenient when used with ocamllex, but inelegant when used with other lexer generators. *) type ('token, 'semantic_value) traditional = (Lexing.lexbuf -> 'token) -> Lexing.lexbuf -> 'semantic_value (* This revised API is independent of any lexer generator. Here, the parser only requires access to the lexer, and the lexer takes no parameters. The tokens returned by the lexer may contain position information. *) type ('token, 'semantic_value) revised = (unit -> 'token) -> 'semantic_value (* --------------------------------------------------------------------------- *) (* Converting a traditional parser, produced by ocamlyacc or Menhir, into a revised parser. *) (* A token of the revised lexer is essentially a triple of a token of the traditional lexer (or raw token), a start position, and and end position. The three [get] functions are accessors. *) (* We do not require the type ['token] to actually be a triple type. This enables complex applications where it is a record type with more than three fields. It also enables simple applications where positions are of no interest, so ['token] is just ['raw_token] and [get_startp] and [get_endp] return dummy positions. *) val traditional2revised: ('token -> 'raw_token) -> ('token -> Lexing.position) -> ('token -> Lexing.position) -> ('raw_token, 'semantic_value) traditional -> ('token, 'semantic_value) revised (* --------------------------------------------------------------------------- *) (* Converting a revised parser back to a traditional parser. *) val revised2traditional: ('raw_token -> Lexing.position -> Lexing.position -> 'token) -> ('token, 'semantic_value) revised -> ('raw_token, 'semantic_value) traditional (* --------------------------------------------------------------------------- *) (* Simplified versions of the above, where concrete triples are used. *) module Simplified : sig val traditional2revised: ('token, 'semantic_value) traditional -> ('token * Lexing.position * Lexing.position, 'semantic_value) revised val revised2traditional: ('token * Lexing.position * Lexing.position, 'semantic_value) revised -> ('token, 'semantic_value) traditional end menhir-20171222/src/lineCount.mll0000664000175000017500000000234413217215730016753 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This simple function counts the number of newline characters in a string. *) let newline = ('\010' | '\013' | "\013\010") let ordinary = [^ '\010' '\013']+ rule count n = parse | eof { n } | newline { count (n + 1) lexbuf } | ordinary { count n lexbuf } menhir-20171222/src/time.ml0000664000175000017500000000326613217215730015601 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) let channel = stderr open Unix open Printf let clock = ref (times()) let tick msg = if Settings.timings then let times1 = !clock in let times2 = times() in fprintf channel "%s: %.02fs\n%!" msg (times2.tms_utime -. times1.tms_utime); clock := times() type chrono = float ref let fresh () = ref 0. let chrono (chrono : float ref) (task : unit -> 'a) : 'a = if Settings.timings then begin let times1 = times() in let result = task() in let times2 = times() in chrono := !chrono +. times2.tms_utime -. times1.tms_utime; result end else task() let display (chrono : float ref) msg = if Settings.timings then fprintf channel "%s: %.02fs\n" msg !chrono menhir-20171222/src/lr0.mli0000664000175000017500000001125013217215730015501 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar (* This module builds the LR(0) automaton associated with the grammar, then provides access to it. It also provides facilities for efficiently performing LR(1) constructions. *) (* ------------------------------------------------------------------------ *) (* The LR(0) automaton. *) (* The nodes of the LR(0) automaton are numbered. *) type node = int (* This is the number of nodes in the LR(0) automaton. *) val n: int (* These are the automaton's entry states, indexed by the start productions. *) val entry: node ProductionMap.t (* A node can be converted to the underlying LR(0) set of items. This set is not closed. *) val items: node -> Item.Set.t (* The incoming symbol of an LR(0) node is the symbol carried by all of the edges that enter this node. A node has zero incoming edges (and, thus, no incoming symbol) if and only if it is a start node.. *) val incoming_symbol: node -> Symbol.t option (* ------------------------------------------------------------------------ *) (* Help for building the LR(1) automaton. *) (* An LR(1) state is internally represented as a pair of an LR(0) state number and an array of concrete lookahead sets (whose length depends on the LR(0) state). *) type lr1state (* An encoded LR(1) state can be turned into a concrete representation, that is, a mapping of items to concrete lookahead sets. *) type concretelr1state = TerminalSet.t Item.Map.t val export: lr1state -> concretelr1state (* One can take the closure of a concrete LR(1) state. *) val closure: concretelr1state -> concretelr1state (* The core of an LR(1) state is the underlying LR(0) state. *) val core: lr1state -> node (* One can create an LR(1) start state out of an LR(0) start node. *) val start: node -> lr1state (* Information about the transitions and reductions at a state. *) val transitions: lr1state -> lr1state SymbolMap.t val outgoing_symbols: node -> Symbol.t list val transition: Symbol.t -> lr1state -> lr1state val reductions: lr1state -> (TerminalSet.t * Production.index) list (* Equality of states. The two states must have the same core. Then, they are equal if and only if their lookahead sets are pointwise equal. *) val equal: lr1state -> lr1state -> bool (* Subsumption between states. The two states must have the same core. Then, one subsumes the other if and only if their lookahead sets are (pointwise) in the subset relation. *) val subsume: lr1state -> lr1state -> bool (* A slightly modified version of Pager's weak compatibility criterion. The two states must have the same core. *) val compatible: lr1state -> lr1state -> bool (* This function determines whether two (core-equivalent) states can be merged without creating an end-of-stream conflict. *) val eos_compatible: lr1state -> lr1state -> bool (* This function determines whether two (core-equivalent) states can be merged without creating spurious reductions on the [error] token. *) val error_compatible: lr1state -> lr1state -> bool (* Union of two states. The two states must have the same core. The new state is obtained by pointwise union of the lookahead sets. *) val union: lr1state -> lr1state -> lr1state (* Restriction of a state to a set of tokens of interest. Every lookahead set is intersected with that set. *) val restrict: TerminalSet.t -> lr1state -> lr1state (* The following functions display: 1- a concrete state; 2- a state (only the kernel, not the closure); 3- the closure of a state. The first parameter is a fixed string that is added at the beginning of every line. *) val print_concrete: string -> concretelr1state -> string val print: string -> lr1state -> string val print_closure: string -> lr1state -> string menhir-20171222/src/pprint.ml0000664000175000017500000007063413217215730016162 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This is an adaptation of Daan Leijen's [PPrint] library, which itself is based on the ideas developed by Philip Wadler in ``A Prettier Printer''. For more information, see: http://www.cs.uu.nl/~daan/pprint.html http://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf *) (* ------------------------------------------------------------------------- *) (* A uniform interface for output channels. *) module type OUTPUT = sig type channel val char: channel -> char -> unit val substring: channel -> string -> int (* offset *) -> int (* length *) -> unit end (* ------------------------------------------------------------------------- *) (* Two implementations of the above interface, respectively based on output channels and memory buffers. This compensates for the fact that ocaml's standard library does not allow creating an output channel out of a memory buffer (a regrettable omission). *) module ChannelOutput : OUTPUT with type channel = out_channel = struct type channel = out_channel let char = output_char let substring = output_substring end module BufferOutput : OUTPUT with type channel = Buffer.t = struct type channel = Buffer.t let char = Buffer.add_char let substring = Buffer.add_substring end (* ------------------------------------------------------------------------- *) (* Here is the algebraic data type of documents. It is analogous to Daan Leijen's version, but the binary constructor [Union] is replaced with the unary constructor [Group], and the constant [Line] is replaced with more general constructions, namely [IfFlat], which provides alternative forms depending on the current flattening mode, and [HardLine], which represents a newline character, and is invalid in flattening mode. *) type document = (* [Empty] is the empty document. *) | Empty (* [Char c] is a document that consists of the single character [c]. We enforce the invariant that [c] is not a newline character. *) | Char of char (* [String (s, ofs, len)] is a document that consists of the portion of the string [s] delimited by the offset [ofs] and the length [len]. We assume, but do not check, that this portion does not contain a newline character. *) | String of string * int * int (* [Blank n] is a document that consists of [n] blank characters. *) | Blank of int (* When in flattening mode, [IfFlat (d1, d2)] turns into the document [d1]. When not in flattening mode, it turns into the document [d2]. *) | IfFlat of document * document (* When in flattening mode, [HardLine] is illegal. When not in flattening mode, it represents a newline character, followed with an appropriate number of indentation. A safe way of using [HardLine] is to only use it directly within the right branch of an [IfFlat] construct. *) | HardLine (* [Cat doc1 doc2] is the concatenation of the documents [doc1] and [doc2]. *) | Cat of document * document (* [Nest (j, doc)] is the document [doc], in which the indentation level has been increased by [j], that is, in which [j] blanks have been inserted after every newline character. *) | Nest of int * document (* [Group doc] represents an alternative: it is either a flattened form of [doc], in which occurrences of [Group] disappear and occurrences of [IfFlat] resolve to their left branch, or [doc] itself. *) | Group of document (* [Column f] is the document obtained by applying [f] to the current column number. *) | Column of (int -> document) (* [Nesting f] is the document obtained by applying [f] to the current indentation level, that is, the number of blanks that were printed at the beginning of the current line. *) | Nesting of (int -> document) (* ------------------------------------------------------------------------- *) (* A signature for document renderers. *) module type RENDERER = sig (* Output channels. *) type channel (* [pretty rfrac width channel document] pretty-prints the document [document] to the output channel [channel]. The parameter [width] is the maximum number of characters per line. The parameter [rfrac] is the ribbon width, a fraction relative to [width]. The ribbon width is the maximum number of non-indentation characters per line. *) val pretty: float -> int -> channel -> document -> unit (* [compact channel document] prints the document [document] to the output channel [channel]. No indentation is used. All newline instructions are respected, that is, no groups are flattened. *) val compact: channel -> document -> unit end (* ------------------------------------------------------------------------- *) (* The pretty rendering algorithm: preliminary declarations. *) (* The renderer is supposed to behave exactly like Daan Leijen's, although its implementation is quite radically different. Instead of relying on Haskell's lazy evaluation mechanism, we implement an abstract machine with mutable current state, forking, backtracking (via an explicit stack of choice points), and cut (disposal of earlier choice points). *) (* The renderer's input consists of an ordered sequence of documents. Each document carries an extra indentation level, akin to an implicit [Nest] constructor, and a ``flattening'' flag, which, if set, means that this document should be printed in flattening mode. *) (* An alternative coding style would be to avoid decorating each input document with an indentation level and a flattening mode, and allow the input sequence to contain instructions that set the current nesting level or reset the flattening mode. That would perhaps be slightly more readable, and slightly less efficient. *) type input = | INil | ICons of int * bool * document * input (* When possible (that is, when the stack is empty), the renderer writes directly to the output channel. Otherwise, output is buffered until either a failure point is reached (then, the buffered output is discarded) or a cut is reached (then, all buffered output is committed to the output channel). At all times, the length of the buffered output is at most one line. *) (* The buffered output consists of a list of characters and strings. It is stored in reverse order (the head of the list should be printed last). *) type output = | OEmpty | OChar of char * output | OString of string * int * int * output | OBlank of int * output (* The renderer maintains the following state record. For efficiency, the record is mutable; it is copied when the renderer forks, that is, at choice points. *) type 'channel state = { (* The line width and ribbon width. *) width: int; ribbon: int; (* The output channel. *) channel: 'channel; (* The current indentation level. This is the number of blanks that were printed at the beginning of the current line. *) mutable indentation: int; (* The current column. *) mutable column: int; (* The renderer's input. For efficiency, the input is assumed to never be empty, and the leading [ICons] constructor is inlined within the state record. In other words, the fields [nest1], [flatten1], and [input1] concern the first input document, and the field [input] contains the rest of the input sequence. *) mutable indent1: int; mutable flatten1: bool; mutable input1: document; mutable input: input; (* The renderer's buffer output. *) mutable output: output; } (* The renderer maintains a stack of resumptions, that is, states in which execution should be resumed if the current thread of execution fails by lack of space on the current line. *) (* It is not difficult to prove that the stack is empty if and only if flattening mode is off. Furthermore, when flattening mode is on, all groups are ignored, so no new choice points are pushed onto the stack. As a result, the stack has height one at most at all times, so that the stack height is zero when flattening mode is off and one when flattening mode is on. *) type 'channel stack = 'channel state list (* ------------------------------------------------------------------------- *) (* The pretty rendering algorithm: code. *) (* The renderer is parameterized over an implementation of output channels. *) module Renderer (Output : OUTPUT) = struct type channel = Output.channel (* Printing blank space (indentation characters). *) let blank_length = 80 let blank_buffer = String.make blank_length ' ' let rec blanks channel n = if n <= 0 then () else if n <= blank_length then Output.substring channel blank_buffer 0 n else begin Output.substring channel blank_buffer 0 blank_length; blanks channel (n - blank_length) end (* Committing buffered output to the output channel. The list is printed in reverse order. The code is not tail recursive, but there is no risk of stack overflow, since the length of the buffered output cannot exceed one line. *) let rec commit channel = function | OEmpty -> () | OChar (c, output) -> commit channel output; Output.char channel c | OString (s, ofs, len, output) -> commit channel output; Output.substring channel s ofs len | OBlank (n, output) -> commit channel output; blanks channel n (* The renderer's abstract machine. *) (* The procedures [run], [shift], [emit_char], [emit_string], and [emit_blanks] are mutually recursive, and are tail recursive. They maintain a stack and a current state. The states in the stack, and the current state, are pairwise distinct, so that the current state can be mutated without affecting the contents of the stack. *) (* An invariant is: the buffered output is nonempty only when the stack is nonempty. The contrapositive is: if the stack is empty, then the buffered output is empty. Indeed, the fact that the stack is empty means that no choices were made, so we are not in a speculative mode of execution: as a result, all output can be sent directly to the output channel. On the contrary, when the stack is nonempty, there is a possibility that we might backtrack in the future, so all output should be held in a buffer. *) (* [run] is allowed to call itself recursively only when no material is printed. In that case, the check for failure is skipped -- indeed, this test is performed only within [shift]. *) let rec run (stack : channel stack) (state : channel state) : unit = (* Examine the first piece of input, as well as (in some cases) the current flattening mode. *) match state.input1, state.flatten1 with (* The first piece of input is an empty document. Discard it and continue. *) | Empty, _ -> shift stack state (* The first piece of input is a character. Emit it and continue. *) | Char c, _ -> emit_char stack state c (* The first piece of input is a string. Emit it and continue. *) | String (s, ofs, len), _ -> emit_string stack state s ofs len | Blank n, _ -> emit_blanks stack state n (* The first piece of input is a hard newline instruction. Such an instruction is valid only when flattening mode is off. *) (* We emit a newline character, followed by the prescribed amount of indentation. We update the current state to record how many indentation characters were printed and to to reflect the new column number. Then, we discard the current piece of input and continue. *) | HardLine, flattening -> assert (not flattening); (* flattening mode must be off. *) assert (stack = []); (* since flattening mode is off, the stack must be empty. *) Output.char state.channel '\n'; let i = state.indent1 in blanks state.channel i; state.column <- i; state.indentation <- i; shift stack state (* The first piece of input is an [IfFlat] conditional instruction. *) | IfFlat (doc, _), true | IfFlat (_, doc), false -> state.input1 <- doc; run stack state (* The first piece of input is a concatenation operator. We take it apart and queue both documents in the input sequence. *) | Cat (doc1, doc2), _ -> state.input1 <- doc1; state.input <- ICons (state.indent1, state.flatten1, doc2, state.input); run stack state (* The first piece of input is a [Nest] operator. We increase the amount of indentation to be applied to the first input document. *) | Nest (j, doc), _ -> state.indent1 <- state.indent1 + j; state.input1 <- doc; run stack state (* The first piece of input is a [Group] operator, and flattening mode is currently off. This introduces a choice point: either we flatten this whole group, or we don't. We try the former possibility first: this is done by enabling flattening mode. Should this avenue fail, we push the current state, in which flattening mode is disabled, onto the stack. *) (* Note that the current state is copied before continuing, so that the state that is pushed on the stack is not affected by future modifications. This is a fork. *) | Group doc, false -> state.input1 <- doc; run (state :: stack) { state with flatten1 = true } (* The first piece of input is a [Group] operator, and flattening mode is currently on. The operator is ignored. *) | Group doc, true -> state.input1 <- doc; run stack state (* The first piece of input is a [Column] operator. The current column is fed into it, so as to produce a document, with which we continue. *) | Column f, _ -> state.input1 <- f state.column; run stack state (* The first piece of input is a [Column] operator. The current indentation level is fed into it, so as to produce a document, with which we continue. *) | Nesting f, _ -> state.input1 <- f state.indentation; run stack state (* [shift] discards the first document in the input sequence, so that the second input document, if there is one, becomes first. The renderer stops if there is none. *) and shift stack state = assert (state.output = OEmpty || stack <> []); assert (state.flatten1 = (stack <> [])); (* If the stack is nonempty and we have exceeded either the width or the ribbon width parameters, then fail. Backtracking is implemented by discarding the current state, popping a state off the stack, and making it the current state. *) match stack with | resumption :: stack when state.column > state.width || state.column - state.indentation > state.ribbon -> run stack resumption | _ -> match state.input with | INil -> (* End of input. Commit any buffered output and stop. *) commit state.channel state.output | ICons (indent, flatten, head, tail) -> (* There is an input document. Move it one slot ahead and check if we are leaving flattening mode. *) state.indent1 <- indent; state.input1 <- head; state.input <- tail; if state.flatten1 && not flatten then begin (* Leaving flattening mode means success: we have flattened a certain group, and fitted it all on a line, without reaching a failure point. We would now like to commit our decision to flatten this group. This is a Prolog cut. We discard the stack of choice points, replacing it with an empty stack, and commit all buffered output. *) state.flatten1 <- flatten; (* false *) commit state.channel state.output; state.output <- OEmpty; run [] state end else run stack state (* [emit_char] prints a character (either to the output channel or to the output buffer), increments the current column, discards the first piece of input, and continues. *) and emit_char stack state c = begin match stack with | [] -> Output.char state.channel c | _ -> state.output <- OChar (c, state.output) end; state.column <- state.column + 1; shift stack state (* [emit_string] prints a string (either to the output channel or to the output buffer), updates the current column, discards the first piece of input, and continues. *) and emit_string stack state s ofs len = begin match stack with | [] -> Output.substring state.channel s ofs len | _ -> state.output <- OString (s, ofs, len, state.output) end; state.column <- state.column + len; shift stack state (* [emit_blanks] prints a blank string (either to the output channel or to the output buffer), updates the current column, discards the first piece of input, and continues. *) and emit_blanks stack state n = begin match stack with | [] -> blanks state.channel n | _ -> state.output <- OBlank (n, state.output) end; state.column <- state.column + n; shift stack state (* This is the renderer's main entry point. *) let pretty rfrac width channel document = run [] { width = width; ribbon = max 0 (min width (truncate (float_of_int width *. rfrac))); channel = channel; indentation = 0; column = 0; indent1 = 0; flatten1 = false; input1 = document; input = INil; output = OEmpty; } (* ------------------------------------------------------------------------- *) (* The compact rendering algorithm. *) let compact channel document = let column = ref 0 in let rec scan = function | Empty -> () | Char c -> Output.char channel c; column := !column + 1 | String (s, ofs, len) -> Output.substring channel s ofs len; column := !column + len | Blank n -> blanks channel n; column := !column + n | HardLine -> Output.char channel '\n'; column := 0 | Cat (doc1, doc2) -> scan doc1; scan doc2 | IfFlat (doc, _) | Nest (_, doc) | Group doc -> scan doc | Column f -> scan (f !column) | Nesting f -> scan (f 0) in scan document end (* ------------------------------------------------------------------------- *) (* Instantiating the renderers for the two kinds of output channels. *) module Channel = Renderer(ChannelOutput) module Buffer = Renderer(BufferOutput) (* ------------------------------------------------------------------------- *) (* Constructors. *) let empty = Empty let (^^) x y = match x, y with | Empty, x | x, Empty -> x | _, _ -> Cat (x, y) let ifflat doc1 doc2 = IfFlat (doc1, doc2) let hardline = HardLine let char c = assert (c <> '\n'); Char c let substring s ofs len = if len = 0 then Empty else String (s, ofs, len) let text s = substring s 0 (String.length s) let blank n = if n = 0 then Empty else Blank n let nest i x = assert (i >= 0); Nest (i, x) let column f = Column f let nesting f = Nesting f let group x = Group x (* ------------------------------------------------------------------------- *) (* Low-level combinators for alignment and indentation. *) let align d = column (fun k -> nesting (fun i -> nest (k - i) d ) ) let hang i d = align (nest i d) let indent i d = hang i (blank i ^^ d) (* ------------------------------------------------------------------------- *) (* High-level combinators. *) let lparen = char '(' let rparen = char ')' let langle = char '<' let rangle = char '>' let lbrace = char '{' let rbrace = char '}' let lbracket = char '[' let rbracket = char ']' let squote = char '\'' let dquote = char '"' let bquote = char '`' let semi = char ';' let colon = char ':' let comma = char ',' let space = char ' ' let dot = char '.' let sharp = char '#' let backslash = char '\\' let equals = char '=' let qmark = char '?' let tilde = char '~' let at = char '@' let percent = char '%' let dollar = char '$' let caret = char '^' let ampersand = char '&' let star = char '*' let plus = char '+' let minus = char '-' let underscore = char '_' let bang = char '!' let bar = char '|' let break i = ifflat (text (String.make i ' ')) hardline let break0 = ifflat empty hardline let break1 = ifflat space hardline let string s = let n = String.length s in let rec chop i = try let j = String.index_from s i '\n' in substring s i (j - i) ^^ break1 ^^ chop (j + 1) with Not_found -> substring s i (n - i) in chop 0 let group_break1 = group break1 let words s = let n = String.length s in let rec blank accu i = (* we have skipped over at least one blank character *) if i = n then accu ^^ group_break1 else match s.[i] with | ' ' | '\t' | '\n' | '\r' -> blank accu (i + 1) | _ -> word break1 accu i (i + 1) and word prefix accu i j = (* we have skipped over at least one non-blank character *) if j = n then accu ^^ group (prefix ^^ substring s i (j - i)) else match s.[j] with | ' ' | '\t' | '\n' | '\r' -> blank (accu ^^ group (prefix ^^ substring s i (j - i))) (j + 1) | _ -> word prefix accu i (j + 1) in if n = 0 then empty else match s.[0] with | ' ' | '\t' | '\n' | '\r' -> blank empty 1 | _ -> word empty empty 0 1 let enclose l r x = l ^^ x ^^ r let squotes = enclose squote squote let dquotes = enclose dquote dquote let bquotes = enclose bquote bquote let braces = enclose lbrace rbrace let parens = enclose lparen rparen let angles = enclose langle rangle let brackets = enclose lbracket rbracket let fold f docs = List.fold_right f docs empty let rec fold1 f docs = match docs with | [] -> empty | [ doc ] -> doc | doc :: docs -> f doc (fold1 f docs) let rec fold1map f g docs = match docs with | [] -> empty | [ doc ] -> g doc | doc :: docs -> let doc = g doc in (* force left-to-right evaluation *) f doc (fold1map f g docs) let sepmap sep g docs = fold1map (fun x y -> x ^^ sep ^^ y) g docs let optional f = function | None -> empty | Some x -> f x let group1 d = group (nest 1 d) let group2 d = group (nest 2 d) module Operators = struct let ( !^ ) = text let ( ^^ ) = ( ^^ ) let ( ^/^ ) x y = x ^^ break1 ^^ y let ( ^//^ ) x y = group (x ^^ nest 2 (break1 ^^ y)) let ( ^@^ ) x y = group (x ^^ break1 ^^ y) let ( ^@@^ ) x y = group2 (x ^^ break1 ^^ y) end open Operators let prefix op x = !^op ^//^ x let infix op x y = (x ^^ space ^^ !^op) ^//^ y let infix_dot op x y = group2 ((x ^^ !^op) ^^ break0 ^^ y) let infix_com op x y = x ^^ !^op ^^ group_break1 ^^ y let surround n sep open_doc contents close_doc = group (open_doc ^^ nest n (sep ^^ contents) ^^ sep ^^ close_doc) let surround1 open_txt contents close_txt = surround 1 break0 !^open_txt contents !^close_txt let surround2 open_txt contents close_txt = surround 2 break1 !^open_txt contents !^close_txt let soft_surround n sep open_doc contents close_doc = group (open_doc ^^ nest n (group sep ^^ contents) ^^ group (sep ^^ close_doc)) let seq indent break empty_seq open_seq sep_seq close_seq = function | [] -> empty_seq | xs -> surround indent break open_seq (fold1 (fun x xs -> x ^^ sep_seq ^^ xs) xs) close_seq let seq1 open_txt sep_txt close_txt = seq 1 break0 !^(open_txt ^ close_txt) !^open_txt (!^sep_txt ^^ break1) !^close_txt let seq2 open_txt sep_txt close_txt = seq 2 break1 !^(open_txt ^ close_txt) !^open_txt (!^sep_txt ^^ break1) !^close_txt let sprintf fmt = Printf.ksprintf string fmt (* A signature for value representations. This is compatible with the associated Camlp4 generator: SwitchValueRepresentation *) module type VALUE_REPRESENTATION = sig (* The type of value representation *) type t (* [variant type_name data_constructor_name tag arguments] Given information about the variant and its arguments, this function produces a new value representation. *) val variant : string -> string -> int -> t list -> t (* [record type_name fields] Given a type name and a list of record fields, this function produces the value representation of a record. *) val record : string -> (string * t) list -> t (* [tuple arguments] Given a list of value representation this function produces a new value representation. *) val tuple : t list -> t (* ------------------------------------------------------------------------- *) (* Value representation for primitive types. *) val string : string -> t val int : int -> t val int32 : int32 -> t val int64 : int64 -> t val nativeint : nativeint -> t val float : float -> t val char : char -> t val bool : bool -> t val option : ('a -> t) -> 'a option -> t val list : ('a -> t) -> 'a list -> t val array : ('a -> t) -> 'a array -> t val ref : ('a -> t) -> 'a ref -> t (* Value representation for any other value. *) val unknown : string -> 'a -> t end module type DOCUMENT_VALUE_REPRESENTATION = VALUE_REPRESENTATION with type t = document (* please remove as soon as this will be available in ocaml *) module MissingFloatRepr = struct let valid_float_lexeme s = let l = String.length s in let rec loop i = if i >= l then s ^ "." else match s.[i] with | '0' .. '9' | '-' -> loop (i+1) | _ -> s in loop 0 let float_repres f = match classify_float f with FP_nan -> "nan" | FP_infinite -> if f < 0.0 then "neg_infinity" else "infinity" | _ -> let s1 = Printf.sprintf "%.12g" f in if f = float_of_string s1 then valid_float_lexeme s1 else let s2 = Printf.sprintf "%.15g" f in if f = float_of_string s2 then valid_float_lexeme s2 else Printf.sprintf "%.18g" f end module ML = struct type t = document let tuple = seq1 "(" "," ")" let variant _ cons _ args = if args = [] then !^cons else !^cons ^^ tuple args let record _ fields = seq2 "{" ";" "}" (List.map (fun (k, v) -> infix ":" !^k v) fields) let option f = function | Some x -> !^"Some" ^^ tuple [f x] | None -> !^"None" let list f xs = seq2 "[" ";" "]" (List.map f xs) let array f xs = seq2 "[|" ";" "|]" (Array.to_list (Array.map f xs)) let ref f x = record "ref" ["contents", f !x] let float f = string (MissingFloatRepr.float_repres f) let int = sprintf "%d" let int32 = sprintf "%ld" let int64 = sprintf "%Ld" let nativeint = sprintf "%nd" let char = sprintf "%C" let bool = sprintf "%B" let string = sprintf "%S" let unknown tyname _ = sprintf "" tyname end (* Deprecated *) let line = ifflat space hardline let linebreak = ifflat empty hardline let softline = group line let softbreak = group linebreak menhir-20171222/src/TableInterpreter.ml0000664000175000017500000001615413217215730020116 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) module MakeEngineTable (T : TableFormat.TABLES) = struct type state = int let number s = s type token = T.token type terminal = int type nonterminal = int type semantic_value = Obj.t let token2terminal = T.token2terminal let token2value = T.token2value let error_terminal = T.error_terminal let error_value = Obj.repr () (* The function [foreach_terminal] exploits the fact that the first component of [T.error] is [Terminal.n - 1], i.e., the number of terminal symbols, including [error] but not [#]. *) (* There is similar code in [InspectionTableInterpreter]. The code there contains an additional conversion of the type [terminal] to the type [xsymbol]. *) let rec foldij i j f accu = if i = j then accu else foldij (i + 1) j f (f i accu) let foreach_terminal f accu = let n, _ = T.error in foldij 0 n (fun i accu -> f i accu ) accu type production = int (* In principle, only non-start productions are exposed to the user, at type [production] or at type [int]. This is checked dynamically. *) let non_start_production i = assert (T.start <= i && i - T.start < Array.length T.semantic_action) let production_index i = non_start_production i; i let find_production i = non_start_production i; i let default_reduction state defred nodefred env = let code = PackedIntArray.get T.default_reduction state in if code = 0 then nodefred env else defred env (code - 1) let is_start prod = prod < T.start (* This auxiliary function helps access a compressed, two-dimensional matrix, like the action and goto tables. *) let unmarshal2 table i j = RowDisplacement.getget PackedIntArray.get PackedIntArray.get table i j let action state terminal value shift reduce fail env = match PackedIntArray.unflatten1 T.error state terminal with | 1 -> let action = unmarshal2 T.action state terminal in let opcode = action land 0b11 and param = action lsr 2 in if opcode >= 0b10 then (* 0b10 : shift/discard *) (* 0b11 : shift/nodiscard *) let please_discard = (opcode = 0b10) in shift env please_discard terminal value param else (* 0b01 : reduce *) (* 0b00 : cannot happen *) reduce env param | c -> assert (c = 0); fail env let goto_nt state nt = let code = unmarshal2 T.goto state nt in (* code = 1 + state *) code - 1 let goto_prod state prod = goto_nt state (PackedIntArray.get T.lhs prod) let maybe_goto_nt state nt = let code = unmarshal2 T.goto state nt in (* If [code] is 0, there is no outgoing transition. If [code] is [1 + state], there is a transition towards [state]. *) assert (0 <= code); if code = 0 then None else Some (code - 1) exception Error = T.Error type semantic_action = (state, semantic_value, token) EngineTypes.env -> (state, semantic_value) EngineTypes.stack let semantic_action prod = (* Indexing into the array [T.semantic_action] is off by [T.start], because the start productions do not have entries in this array. *) T.semantic_action.(prod - T.start) (* [may_reduce state prod] tests whether the state [state] is capable of reducing the production [prod]. This information could be determined in constant time if we were willing to create a bitmap for it, but that would take up a lot of space. Instead, we obtain this information by iterating over a line in the action table. This is costly, but this function is not normally used by the LR engine anyway; it is supposed to be used only by programmers who wish to develop error recovery strategies. *) (* In the future, if desired, we could memoize this function, so as to pay the cost in (memory) space only if and where this function is actually used. We could also replace [foreach_terminal] with a function [exists_terminal] which stops as soon as the accumulator is [true]. *) let may_reduce state prod = (* Test if there is a default reduction of [prod]. *) default_reduction state (fun () prod' -> prod = prod') (fun () -> (* If not, then for each terminal [t], ... *) foreach_terminal (fun t accu -> accu || (* ... test if there is a reduction of [prod] on [t]. *) action state t () (* shift: *) (fun () _ _ () _ -> false) (* reduce: *) (fun () prod' -> prod = prod') (* fail: *) (fun () -> false) () ) false ) () (* If [T.trace] is [None], then the logging functions do nothing. *) let log = match T.trace with Some _ -> true | None -> false module Log = struct open Printf let state state = match T.trace with | Some _ -> fprintf stderr "State %d:\n%!" state | None -> () let shift terminal state = match T.trace with | Some (terminals, _) -> fprintf stderr "Shifting (%s) to state %d\n%!" terminals.(terminal) state | None -> () let reduce_or_accept prod = match T.trace with | Some (_, productions) -> fprintf stderr "%s\n%!" productions.(prod) | None -> () let lookahead_token token startp endp = match T.trace with | Some (terminals, _) -> fprintf stderr "Lookahead token is now %s (%d-%d)\n%!" terminals.(token) startp.Lexing.pos_cnum endp.Lexing.pos_cnum | None -> () let initiating_error_handling () = match T.trace with | Some _ -> fprintf stderr "Initiating error handling\n%!" | None -> () let resuming_error_handling () = match T.trace with | Some _ -> fprintf stderr "Resuming error handling\n%!" | None -> () let handling_error state = match T.trace with | Some _ -> fprintf stderr "Handling error in state %d\n%!" state | None -> () end end menhir-20171222/src/astar.mli0000664000175000017500000000537613217215730016132 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This signature defines an implicit representation for graphs where edges have integer costs, there is a distinguished start node, and there is a set of distinguished goal nodes. It is also assumed that some geometric knowledge of the graph allows safely estimating the cost of shortest paths to goal nodes. If no such knowledge is available, [estimate] should be the constant zero function. *) module Make (G : sig (* Graph nodes. *) type node include Hashtbl.HashedType with type t := node (* Edge labels. *) type label (* The source node(s). *) val sources: (node -> unit) -> unit (* [successors n f] presents each of [n]'s successors, in an arbitrary order, to [f], together with the cost of the edge that was followed. *) val successors: node -> (label -> int -> node -> unit) -> unit (* An estimate of the cost of the shortest path from the supplied node to some goal node. This estimate must be a correct under-approximation of the actual cost. *) val estimate: node -> int end) : sig (* A path (from a target node back to some source node) is described by a series of labels and ends in a source node. *) type path = | Edge of G.label * path | Source of G.node (* A path can also be presented as a pair of a source node and a list of labels, which describe the edges from the source node to a target node. *) val reverse: path -> G.node * G.label list (* Search. Newly discovered nodes are presented to the user, in order of increasing distance from the source nodes, by invoking the user-supplied function [f]. At the end, a mapping of nodes to distances to the source nodes and a mapping of nodes to shortest paths are returned. *) val search: (G.node * path -> unit) -> (G.node -> int) * (G.node -> path) end menhir-20171222/src/unparameterizedPrinter.mli0000664000175000017500000000303213217215730021546 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This is a pretty-printer for grammars. *) (* If the [mode] parameter requests ``unit actions'', then semantic actions are dropped: that is, they are replaced with trivial semantic actions that return unit. Accordingly, all [%type] declarations are changed to unit. The prologue and epilogue are dropped. All bindings for semantic values are suppressed. If, furthermore, the [mode] parameter requests ``unit tokens'', then the types carried by tokens are changed to unit. *) val print: Settings.print_mode -> out_channel -> UnparameterizedSyntax.grammar -> unit menhir-20171222/src/traverse.ml0000664000175000017500000003270713217215730016500 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* Code for traversing or transforming [IL] terms. *) open IL open CodeBits (* This turns a list of value definitions into a hash table. It also counts and numbers the definitions. We assume that the left-hand side of every definition is a variable. *) let tabulate_defs (defs : valdef list) : int * (string, int * valdef) Hashtbl.t = let count = ref 0 in let table = Hashtbl.create 1023 in List.iter (fun def -> let k = !count in count := k + 1; Hashtbl.add table (pat2var def.valpat) (k, def) ) defs; !count, table (* This mixin class, used by [map] and [fold] below, helps maintain environments, which can be used to keep track of local variable bindings. *) class virtual ['env] env = object(self) (* The virtual method [pvar] records a local variable binding in the environment. *) method virtual pvar: 'env -> string -> 'env method pat env = function | PWildcard | PUnit -> env | PVar id -> self#pvar env id | PTuple ps | POr ps | PData (_, ps) -> self#pats env ps | PAnnot (p, _) -> self#pat env p | PRecord fps -> self#fpats env fps method pats env ps = List.fold_left self#pat env ps method fpats env fps = List.fold_left self#fpat env fps method fpat env (_, p) = self#pat env p end (* A class that helps transform expressions. The environment [env] can be used to keep track of local variable bindings. *) exception NoChange class virtual ['env] map = object (self) inherit ['env] env method expr (env : 'env) e = try match e with | EVar x -> self#evar env x | EFun (ps, e) -> self#efun env ps e | EApp (e, es) -> self#eapp env e es | ELet (bs, e) -> self#elet env bs e | EMatch (e, bs) -> self#ematch env e bs | EIfThen (e, e1) -> self#eifthen env e e1 | EIfThenElse (e, e1, e2) -> self#eifthenelse env e e1 e2 | ERaise e -> self#eraise env e | ETry (e, bs) -> self#etry env e bs | EUnit -> self#eunit env | EIntConst k -> self#eintconst env k | EStringConst s -> self#estringconst env s | EData (d, es) -> self#edata env d es | ETuple es -> self#etuple env es | EAnnot (e, t) -> self#eannot env e t | EMagic e -> self#emagic env e | ERepr _ -> self#erepr env e | ERecord fs -> self#erecord env fs | ERecordAccess (e, f) -> self#erecordaccess env e f | ERecordWrite (e, f, e1) -> self#erecordwrite env e f e1 | ETextual action -> self#etextual env action | EComment (s, e) -> self#ecomment env s e | EPatComment (s, p, e) -> self#epatcomment env s p e | EArray es -> self#earray env es | EArrayAccess (e, i) -> self#earrayaccess env e i with NoChange -> e method evar _env _x = raise NoChange method efun env ps e = let e' = self#expr (self#pats env ps) e in if e == e' then raise NoChange else EFun (ps, e') method eapp env e es = let e' = self#expr env e and es' = self#exprs env es in if e == e' && es == es' then raise NoChange else EApp (e', es') method elet env bs e = let env, bs' = self#bindings env bs in let e' = self#expr env e in if bs == bs' && e == e' then raise NoChange else ELet (bs', e') method ematch env e bs = let e' = self#expr env e and bs' = self#branches env bs in if e == e' && bs == bs' then raise NoChange else EMatch (e', bs') method eifthen env e e1 = let e' = self#expr env e and e1' = self#expr env e1 in if e == e' && e1 == e1' then raise NoChange else EIfThen (e', e1') method eifthenelse env e e1 e2 = let e' = self#expr env e and e1' = self#expr env e1 and e2' = self#expr env e2 in if e == e' && e1 == e1' && e2 == e2' then raise NoChange else EIfThenElse (e', e1', e2') method eraise env e = let e' = self#expr env e in if e == e' then raise NoChange else ERaise e' method etry env e bs = let e' = self#expr env e and bs' = self#branches env bs in if e == e' && bs == bs' then raise NoChange else ETry (e', bs') method eunit _env = raise NoChange method eintconst _env _k = raise NoChange method estringconst _env _s = raise NoChange method edata env d es = let es' = self#exprs env es in if es == es' then raise NoChange else EData (d, es') method etuple env es = let es' = self#exprs env es in if es == es' then raise NoChange else ETuple es' method eannot env e t = let e' = self#expr env e in if e == e' then raise NoChange else EAnnot (e', t) method emagic env e = let e' = self#expr env e in if e == e' then raise NoChange else EMagic e' method erepr env e = let e' = self#expr env e in if e == e' then raise NoChange else ERepr e' method erecord env fs = let fs' = self#fields env fs in if fs == fs' then raise NoChange else ERecord fs' method erecordaccess env e f = let e' = self#expr env e in if e == e' then raise NoChange else ERecordAccess (e', f) method erecordwrite env e f e1 = let e' = self#expr env e and e1' = self#expr env e1 in if e == e' && e1 == e1' then raise NoChange else ERecordWrite (e', f, e1') method earray env es = let es' = self#exprs env es in if es == es' then raise NoChange else EArray es' method earrayaccess env e i = let e' = self#expr env e in if e == e' then raise NoChange else EArrayAccess (e', i) method etextual _env _action = raise NoChange method ecomment env s e = let e' = self#expr env e in if e == e' then raise NoChange else EComment (s, e') method epatcomment env s p e = let e' = self#expr env e in if e == e' then raise NoChange else EPatComment (s, p, e') method exprs env es = Misc.smap (self#expr env) es method fields env fs = Misc.smap (self#field env) fs method field env ((f, e) as field) = let e' = self#expr env e in if e == e' then field else (f, e') method branches env bs = Misc.smap (self#branch env) bs method branch env b = let e = b.branchbody in let e' = self#expr (self#pat env b.branchpat) e in if e == e' then b else { b with branchbody = e' } (* The method [binding] produces a pair of an updated environment and a transformed binding. *) method binding env ((p, e) as b) = let e' = self#expr env e in self#pat env p, if e == e' then b else (p, e') (* For nested non-recursive bindings, the environment produced by each binding is used to traverse the following bindings. The method [binding] produces a pair of an updated environment and a transformed list of bindings. *) method bindings env bs = Misc.smapa self#binding env bs method valdef env def = let e = def.valval in let e' = self#expr env e in if e == e' then def else { def with valval = e' } method valdefs env defs = Misc.smap (self#valdef env) defs end (* A class that helps iterate, or fold, over expressions. *) class virtual ['env, 'a] fold = object (self) inherit ['env] env method expr (env : 'env) (accu : 'a) e = match e with | EVar x -> self#evar env accu x | EFun (ps, e) -> self#efun env accu ps e | EApp (e, es) -> self#eapp env accu e es | ELet (bs, e) -> self#elet env accu bs e | EMatch (e, bs) -> self#ematch env accu e bs | EIfThen (e, e1) -> self#eifthen env accu e e1 | EIfThenElse (e, e1, e2) -> self#eifthenelse env accu e e1 e2 | ERaise e -> self#eraise env accu e | ETry (e, bs) -> self#etry env accu e bs | EUnit -> self#eunit env accu | EIntConst k -> self#eintconst env accu k | EStringConst s -> self#estringconst env accu s | EData (d, es) -> self#edata env accu d es | ETuple es -> self#etuple env accu es | EAnnot (e, t) -> self#eannot env accu e t | EMagic e -> self#emagic env accu e | ERepr _ -> self#erepr env accu e | ERecord fs -> self#erecord env accu fs | ERecordAccess (e, f) -> self#erecordaccess env accu e f | ERecordWrite (e, f, e1) -> self#erecordwrite env accu e f e1 | ETextual action -> self#etextual env accu action | EComment (s, e) -> self#ecomment env accu s e | EPatComment (s, p, e) -> self#epatcomment env accu s p e | EArray es -> self#earray env accu es | EArrayAccess (e, i) -> self#earrayaccess env accu e i method evar (_env : 'env) (accu : 'a) _x = accu method efun (env : 'env) (accu : 'a) ps e = let accu = self#expr (self#pats env ps) accu e in accu method eapp (env : 'env) (accu : 'a) e es = let accu = self#expr env accu e in let accu = self#exprs env accu es in accu method elet (env : 'env) (accu : 'a) bs e = let env, accu = self#bindings env accu bs in let accu = self#expr env accu e in accu method ematch (env : 'env) (accu : 'a) e bs = let accu = self#expr env accu e in let accu = self#branches env accu bs in accu method eifthen (env : 'env) (accu : 'a) e e1 = let accu = self#expr env accu e in let accu = self#expr env accu e1 in accu method eifthenelse (env : 'env) (accu : 'a) e e1 e2 = let accu = self#expr env accu e in let accu = self#expr env accu e1 in let accu = self#expr env accu e2 in accu method eraise (env : 'env) (accu : 'a) e = let accu = self#expr env accu e in accu method etry (env : 'env) (accu : 'a) e bs = let accu = self#expr env accu e in let accu = self#branches env accu bs in accu method eunit (_env : 'env) (accu : 'a) = accu method eintconst (_env : 'env) (accu : 'a) _k = accu method estringconst (_env : 'env) (accu : 'a) _s = accu method edata (env : 'env) (accu : 'a) _d es = let accu = self#exprs env accu es in accu method etuple (env : 'env) (accu : 'a) es = let accu = self#exprs env accu es in accu method eannot (env : 'env) (accu : 'a) e _t = let accu = self#expr env accu e in accu method emagic (env : 'env) (accu : 'a) e = let accu = self#expr env accu e in accu method erepr (env : 'env) (accu : 'a) e = let accu = self#expr env accu e in accu method erecord (env : 'env) (accu : 'a) fs = let accu = self#fields env accu fs in accu method erecordaccess (env : 'env) (accu : 'a) e _f = let accu = self#expr env accu e in accu method erecordwrite (env : 'env) (accu : 'a) e _f e1 = let accu = self#expr env accu e in let accu = self#expr env accu e1 in accu method earray (env : 'env) (accu : 'a) es = let accu = self#exprs env accu es in accu method earrayaccess (env : 'env) (accu : 'a) e _i = let accu = self#expr env accu e in accu method etextual (_env : 'env) (accu : 'a) _action = accu method ecomment (env : 'env) (accu : 'a) _s e = let accu = self#expr env accu e in accu method epatcomment (env : 'env) (accu : 'a) _s _p e = let accu = self#expr env accu e in accu method exprs (env : 'env) (accu : 'a) es = List.fold_left (self#expr env) accu es method fields (env : 'env) (accu : 'a) fs = List.fold_left (self#field env) accu fs method field (env : 'env) (accu : 'a) (_f, e) = let accu = self#expr env accu e in accu method branches (env : 'env) (accu : 'a) bs = List.fold_left (self#branch env) accu bs method branch (env : 'env) (accu : 'a) b = let accu = self#expr (self#pat env b.branchpat) accu b.branchbody in accu method binding ((env, accu) : 'env * 'a) (p, e) = let accu = self#expr env accu e in self#pat env p, accu method bindings (env : 'env) (accu : 'a) bs = List.fold_left self#binding (env, accu) bs method valdef (env : 'env) (accu : 'a) def = let accu = self#expr env accu def.valval in accu method valdefs (env : 'env) (accu : 'a) defs = List.fold_left (self#valdef env) accu defs end menhir-20171222/src/TableInterpreter.mli0000664000175000017500000000325313217215730020263 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* This module provides a thin decoding layer for the generated tables, thus providing an API that is suitable for use by [Engine.Make]. It is part of [MenhirLib]. *) (* The exception [Error] is declared within the generated parser. This is preferable to pre-declaring it here, as it ensures that each parser gets its own, distinct [Error] exception. This is consistent with the code-based back-end. *) (* This functor is invoked by the generated parser. *) module MakeEngineTable (T : TableFormat.TABLES) : EngineTypes.TABLE with type state = int and type token = T.token and type semantic_value = Obj.t and type production = int and type terminal = int and type nonterminal = int menhir-20171222/src/codePieces.ml0000664000175000017500000001625513217215730016710 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module defines many internal naming conventions for use by the two code generators, [CodeBackend] and [TableBackend]. It also offers a few code generation facilities. *) open IL open CodeBits open Grammar (* ------------------------------------------------------------------------ *) (* Naming conventions. *) (* The type variable associated with a nonterminal [nt]. *) let ntvar nt = Infer.ntvar (Nonterminal.print true nt) (* The variable that holds the environment. This is a parameter to all functions. We do not make it a global variable because we wish to preserve re-entrancy. *) let env = prefix "env" (* A variable used to hold a semantic value. *) let semv = "_v" (* A variable used to hold a stack. *) let stack = prefix "stack" (* A variable used to hold a state. *) let state = prefix "s" (* A variable used to hold a token. *) let token = "_tok" (* Variables used to hold start and end positions. Do not change these names! They are chosen to coincide with the $startpos and $endpos keywords, which the lexer rewrites to _startpos and _endpos, so that binding these variables before executing a semantic action is meaningful. *) (* These names should agree with the printing function [Keyword.posvar]. *) let beforeendp = "_endpos__0_" let startp = "_startpos" let endp = "_endpos" (* ------------------------------------------------------------------------ *) (* Types for semantic values. *) (* [semvtypent nt] is the type of the semantic value associated with nonterminal [nt]. *) let semvtypent nt = match Nonterminal.ocamltype nt with | None -> (* [nt] has unknown type. If we have run [Infer], then this can't happen. However, running type inference is only an option, so we still have to deal with that case. *) TypVar (ntvar nt) | Some ocamltype -> (* [nt] has known type. *) TypTextual ocamltype (* [semvtypetok tok] is the type of the semantic value associated with token [tok]. There is no such type if the token does not have a semantic value. *) let semvtypetok tok = match Terminal.ocamltype tok with | None -> (* Token has unit type and is omitted in stack cell. *) [] | Some ocamltype -> (* Token has known type. *) [ TypTextual ocamltype ] (* [semvtype symbol] is the type of the semantic value associated with [symbol]. *) let semvtype = function | Symbol.T tok -> semvtypetok tok | Symbol.N nt -> [ semvtypent nt ] (* [symvalt] returns the empty list if the symbol at hand carries no semantic value and the singleton list [[f t]] if it carries a semantic value of type [t]. *) let symvalt symbol f = match semvtype symbol with | [] -> [] | [ t ] -> [ f t ] | _ -> assert false (* [symval symbol x] returns either the empty list or the singleton list [[x]], depending on whether [symbol] carries a semantic value. *) let symval symbol x = match semvtype symbol with | [] -> [] | [ _t ] -> [ x ] | _ -> assert false (* [tokval] is a version of [symval], specialized for terminal symbols. *) let tokval tok x = symval (Symbol.T tok) x (* ------------------------------------------------------------------------ *) (* Patterns for tokens. *) (* [tokpat tok] is a pattern that matches the token [tok], without binding its semantic value. *) let tokpat tok = PData (TokenType.tokendata (Terminal.print tok), tokval tok PWildcard) (* [tokpatv tok] is a pattern that matches the token [tok], and binds its semantic value, if it has one, to the variable [semv]. *) let tokpatv tok = PData (TokenType.tokendata (Terminal.print tok), tokval tok (PVar semv)) (* [tokspat toks] is a pattern that matches any token in the set [toks], without binding its semantic value. *) let tokspat toks = POr ( TerminalSet.fold (fun tok pats -> tokpat tok :: pats ) toks [] ) (* [destructuretokendef name codomain bindsemv branch] generates the definition of a function that destructures tokens. [name] is the name of the function that is generated. [codomain] is its return type. [bindsemv] tells whether the variable [semv] should be bound. [branch] is applied to each (non-pseudo) terminal and must produce code for each branch. *) let destructuretokendef name codomain bindsemv branch = { valpublic = false; valpat = PVar name; valval = EAnnot ( EFun ([ PVar token ], EMatch (EVar token, Terminal.fold (fun tok branches -> if Terminal.pseudo tok then branches else { branchpat = (if bindsemv then tokpatv else tokpat) tok; branchbody = branch tok } :: branches ) [] ) ), type2scheme (arrow TokenType.ttoken codomain) ) } (* ------------------------------------------------------------------------ *) (* A global variable holds the exception [Error]. *) (* We preallocate the [Error] exception and store it into a global variable. This allows saving code at the sites where the exception is raised. Don't change the conventional name [_eRR], it is shared with the lexer, which replaces occurrences of the [$syntaxerror] keyword with [(raise _eRR)]. *) let parse_error = "_eRR" let errorval = EVar parse_error let basics = "MenhirBasics" (* 2017/01/20 The name [basics] must be an unlikely name, as it might otherwise hide a user-defined module by the same name. *) let excvaldef = { valpublic = false; valpat = PVar parse_error; valval = EData (basics ^ "." ^ Interface.excname, []) (* 2016/06/23 We now use the qualified name [Basics.Error], instead of just [Error], so as to avoid OCaml's warning 41. *) } (* ------------------------------------------------------------------------ *) (* Define the internal sub-module [Basics], which contains the definitions of the exception [Error] and of the type [token]. Then, include this sub-module. This is used both in the code and table back-ends. *) let mbasics grammar = [ SIModuleDef (basics, MStruct ( SIExcDefs [ Interface.excdef ] :: interface_to_structure ( TokenType.tokentypedef grammar ) )); SIInclude (MVar basics); SIValDefs (false, [ excvaldef ]); ] menhir-20171222/src/Fix.mli0000664000175000017500000001015413217215730015534 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This code is described in the paper ``Lazy Least Fixed Points in ML''. *) (* -------------------------------------------------------------------------- *) (* Maps. *) (* We require imperative maps, that is, maps that can be updated in place. An implementation of persistent maps, such as the one offered by ocaml's standard library, can easily be turned into an implementation of imperative maps, so this is a weak requirement. *) module type IMPERATIVE_MAPS = sig type key type 'data t val create: unit -> 'data t val clear: 'data t -> unit val add: key -> 'data -> 'data t -> unit val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* -------------------------------------------------------------------------- *) (* Properties. *) (* Properties must form a partial order, equipped with a least element, and must satisfy the ascending chain condition: every monotone sequence eventually stabilizes. *) (* [is_maximal] determines whether a property [p] is maximal with respect to the partial order. Only a conservative check is required: in any event, it is permitted for [is_maximal p] to return [false]. If [is_maximal p] returns [true], then [p] must have no upper bound other than itself. In particular, if properties form a lattice, then [p] must be the top element. This feature, not described in the paper, enables a couple of minor optimizations. *) module type PROPERTY = sig type property val bottom: property val equal: property -> property -> bool val is_maximal: property -> bool end (* -------------------------------------------------------------------------- *) (* The code is parametric in an implementation of maps over variables and in an implementation of properties. *) module Make (M : IMPERATIVE_MAPS) (P : PROPERTY) : sig type variable = M.key type property = P.property (* A valuation is a mapping of variables to properties. *) type valuation = variable -> property (* A right-hand side, when supplied with a valuation that gives meaning to its free variables, evaluates to a property. More precisely, a right-hand side is a monotone function of valuations to properties. *) type rhs = valuation -> property (* A system of equations is a mapping of variables to right-hand sides. *) type equations = variable -> rhs (* [lfp eqs] produces the least solution of the system of monotone equations [eqs]. *) (* It is guaranteed that, for each variable [v], the application [eqs v] is performed at most once (whereas the right-hand side produced by this application is, in general, evaluated multiple times). This guarantee can be used to perform costly pre-computation, or memory allocation, when [eqs] is applied to its first argument. *) (* When [lfp] is applied to a system of equations [eqs], it performs no actual computation. It produces a valuation, [get], which represents the least solution of the system of equations. The actual fixed point computation takes place, on demand, when [get] is applied. *) val lfp: equations -> valuation end menhir-20171222/src/InspectionTableInterpreter.mli0000664000175000017500000000414013217215730022313 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* This functor is invoked inside the generated parser, in [--table] mode. It produces no code! It simply constructs the types [symbol] and [xsymbol] on top of the generated types [terminal] and [nonterminal]. *) module Symbols (T : sig type 'a terminal type 'a nonterminal end) : IncrementalEngine.SYMBOLS with type 'a terminal := 'a T.terminal and type 'a nonterminal := 'a T.nonterminal (* This functor is invoked inside the generated parser, in [--table] mode. It constructs the inspection API on top of the inspection tables described in [InspectionTableFormat]. *) module Make (TT : TableFormat.TABLES) (IT : InspectionTableFormat.TABLES with type 'a lr1state = int) (ET : EngineTypes.TABLE with type terminal = int and type nonterminal = int and type semantic_value = Obj.t) (E : sig type 'a env = (ET.state, ET.semantic_value, ET.token) EngineTypes.env end) : IncrementalEngine.INSPECTION with type 'a terminal := 'a IT.terminal and type 'a nonterminal := 'a IT.nonterminal and type 'a lr1state := 'a IT.lr1state and type production := int and type 'a env := 'a E.env menhir-20171222/src/EngineTypes.ml0000664000175000017500000003331013217215730017066 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* This file defines several types and module types that are used in the specification of module [Engine]. *) (* --------------------------------------------------------------------------- *) (* It would be nice if we could keep the structure of stacks and environments hidden. However, stacks and environments must be accessible to semantic actions, so the following data structure definitions must be public. *) (* --------------------------------------------------------------------------- *) (* A stack is a linked list of cells. A sentinel cell -- which is its own successor -- is used to mark the bottom of the stack. The sentinel cell itself is not significant -- it contains dummy values. *) type ('state, 'semantic_value) stack = { (* The state that we should go back to if we pop this stack cell. *) (* This convention means that the state contained in the top stack cell is not the current state [env.current]. It also means that the state found within the sentinel is a dummy -- it is never consulted. This convention is the same as that adopted by the code-based back-end. *) state: 'state; (* The semantic value associated with the chunk of input that this cell represents. *) semv: 'semantic_value; (* The start and end positions of the chunk of input that this cell represents. *) startp: Lexing.position; endp: Lexing.position; (* The next cell down in the stack. If this is a self-pointer, then this cell is the sentinel, and the stack is conceptually empty. *) next: ('state, 'semantic_value) stack; } (* --------------------------------------------------------------------------- *) (* A parsing environment contains all of the parser's state (except for the current program point). *) type ('state, 'semantic_value, 'token) env = { (* If this flag is true, then the first component of [env.triple] should be ignored, as it has been logically overwritten with the [error] pseudo-token. *) error: bool; (* The last token that was obtained from the lexer, together with its start and end positions. Warning: before the first call to the lexer has taken place, a dummy (and possibly invalid) token is stored here. *) triple: 'token * Lexing.position * Lexing.position; (* The stack. In [CodeBackend], it is passed around on its own, whereas, here, it is accessed via the environment. *) stack: ('state, 'semantic_value) stack; (* The current state. In [CodeBackend], it is passed around on its own, whereas, here, it is accessed via the environment. *) current: 'state; } (* --------------------------------------------------------------------------- *) (* This signature describes the parameters that must be supplied to the LR engine. *) module type TABLE = sig (* The type of automaton states. *) type state (* States are numbered. *) val number: state -> int (* The type of tokens. These can be thought of as real tokens, that is, tokens returned by the lexer. They carry a semantic value. This type does not include the [error] pseudo-token. *) type token (* The type of terminal symbols. These can be thought of as integer codes. They do not carry a semantic value. This type does include the [error] pseudo-token. *) type terminal (* The type of nonterminal symbols. *) type nonterminal (* The type of semantic values. *) type semantic_value (* A token is conceptually a pair of a (non-[error]) terminal symbol and a semantic value. The following two functions are the pair projections. *) val token2terminal: token -> terminal val token2value: token -> semantic_value (* Even though the [error] pseudo-token is not a real token, it is a terminal symbol. Furthermore, for regularity, it must have a semantic value. *) val error_terminal: terminal val error_value: semantic_value (* [foreach_terminal] allows iterating over all terminal symbols. *) val foreach_terminal: (terminal -> 'a -> 'a) -> 'a -> 'a (* The type of productions. *) type production val production_index: production -> int val find_production: int -> production (* If a state [s] has a default reduction on production [prod], then, upon entering [s], the automaton should reduce [prod] without consulting the lookahead token. The following function allows determining which states have default reductions. *) (* Instead of returning a value of a sum type -- either [DefRed prod], or [NoDefRed] -- it accepts two continuations, and invokes just one of them. This mechanism allows avoiding a memory allocation. *) val default_reduction: state -> ('env -> production -> 'answer) -> ('env -> 'answer) -> 'env -> 'answer (* An LR automaton can normally take three kinds of actions: shift, reduce, or fail. (Acceptance is a particular case of reduction: it consists in reducing a start production.) *) (* There are two variants of the shift action. [shift/discard s] instructs the automaton to discard the current token, request a new one from the lexer, and move to state [s]. [shift/nodiscard s] instructs it to move to state [s] without requesting a new token. This instruction should be used when [s] has a default reduction on [#]. See [CodeBackend.gettoken] for details. *) (* This is the automaton's action table. It maps a pair of a state and a terminal symbol to an action. *) (* Instead of returning a value of a sum type -- one of shift/discard, shift/nodiscard, reduce, or fail -- this function accepts three continuations, and invokes just one them. This mechanism allows avoiding a memory allocation. *) (* In summary, the parameters to [action] are as follows: - the first two parameters, a state and a terminal symbol, are used to look up the action table; - the next parameter is the semantic value associated with the above terminal symbol; it is not used, only passed along to the shift continuation, as explained below; - the shift continuation expects an environment; a flag that tells whether to discard the current token; the terminal symbol that is being shifted; its semantic value; and the target state of the transition; - the reduce continuation expects an environment and a production; - the fail continuation expects an environment; - the last parameter is the environment; it is not used, only passed along to the selected continuation. *) val action: state -> terminal -> semantic_value -> ('env -> bool -> terminal -> semantic_value -> state -> 'answer) -> ('env -> production -> 'answer) -> ('env -> 'answer) -> 'env -> 'answer (* This is the automaton's goto table. This table maps a pair of a state and a nonterminal symbol to a new state. By extension, it also maps a pair of a state and a production to a new state. *) (* The function [goto_nt] can be applied to [s] and [nt] ONLY if the state [s] has an outgoing transition labeled [nt]. Otherwise, its result is undefined. Similarly, the call [goto_prod prod s] is permitted ONLY if the state [s] has an outgoing transition labeled with the nonterminal symbol [lhs prod]. The function [maybe_goto_nt] involves an additional dynamic check and CAN be called even if there is no outgoing transition. *) val goto_nt : state -> nonterminal -> state val goto_prod: state -> production -> state val maybe_goto_nt: state -> nonterminal -> state option (* [is_start prod] tells whether the production [prod] is a start production. *) val is_start: production -> bool (* By convention, a semantic action is responsible for: 1. fetching whatever semantic values and positions it needs off the stack; 2. popping an appropriate number of cells off the stack, as dictated by the length of the right-hand side of the production; 3. computing a new semantic value, as well as new start and end positions; 4. pushing a new stack cell, which contains the three values computed in step 3; 5. returning the new stack computed in steps 2 and 4. Point 1 is essentially forced upon us: if semantic values were fetched off the stack by this interpreter, then the calling convention for semantic actions would be variadic: not all semantic actions would have the same number of arguments. The rest follows rather naturally. *) (* Semantic actions are allowed to raise [Error]. *) exception Error type semantic_action = (state, semantic_value, token) env -> (state, semantic_value) stack val semantic_action: production -> semantic_action (* [may_reduce state prod] tests whether the state [state] is capable of reducing the production [prod]. This function is currently costly and is not used by the core LR engine. It is used in the implementation of certain functions, such as [force_reduction], which allow the engine to be driven programmatically. *) val may_reduce: state -> production -> bool (* The LR engine requires a number of hooks, which are used for logging. *) (* The comments below indicate the conventional messages that correspond to these hooks in the code-based back-end; see [CodeBackend]. *) (* If the flag [log] is false, then the logging functions are not called. If it is [true], then they are called. *) val log : bool module Log : sig (* State %d: *) val state: state -> unit (* Shifting () to state *) val shift: terminal -> state -> unit (* Reducing a production should be logged either as a reduction event (for regular productions) or as an acceptance event (for start productions). *) (* Reducing production / Accepting *) val reduce_or_accept: production -> unit (* Lookahead token is now (-) *) val lookahead_token: terminal -> Lexing.position -> Lexing.position -> unit (* Initiating error handling *) val initiating_error_handling: unit -> unit (* Resuming error handling *) val resuming_error_handling: unit -> unit (* Handling error in state *) val handling_error: state -> unit end end (* --------------------------------------------------------------------------- *) (* This signature describes the monolithic (traditional) LR engine. *) (* In this interface, the parser controls the lexer. *) module type MONOLITHIC_ENGINE = sig type state type token type semantic_value (* An entry point to the engine requires a start state, a lexer, and a lexing buffer. It either succeeds and produces a semantic value, or fails and raises [Error]. *) exception Error val entry: state -> (Lexing.lexbuf -> token) -> Lexing.lexbuf -> semantic_value end (* --------------------------------------------------------------------------- *) (* The following signatures describe the incremental LR engine. *) (* First, see [INCREMENTAL_ENGINE] in the file [IncrementalEngine.ml]. *) (* The [start] function is set apart because we do not wish to publish it as part of the generated [parser.mli] file. Instead, the table back-end will publish specialized versions of it, with a suitable type cast. *) module type INCREMENTAL_ENGINE_START = sig (* [start] is an entry point. It requires a start state and a start position and begins the parsing process. If the lexer is based on an OCaml lexing buffer, the start position should be [lexbuf.lex_curr_p]. [start] produces a checkpoint, which usually will be an [InputNeeded] checkpoint. (It could be [Accepted] if this starting state accepts only the empty word. It could be [Rejected] if this starting state accepts no word at all.) It does not raise any exception. *) (* [start s pos] should really produce a checkpoint of type ['a checkpoint], for a fixed ['a] that depends on the state [s]. We cannot express this, so we use [semantic_value checkpoint], which is safe. The table back-end uses [Obj.magic] to produce safe specialized versions of [start]. *) type state type semantic_value type 'a checkpoint val start: state -> Lexing.position -> semantic_value checkpoint end (* --------------------------------------------------------------------------- *) (* This signature describes the LR engine, which combines the monolithic and incremental interfaces. *) module type ENGINE = sig include MONOLITHIC_ENGINE include IncrementalEngine.INCREMENTAL_ENGINE with type token := token and type 'a lr1state = state (* useful for us; hidden from the end user *) include INCREMENTAL_ENGINE_START with type state := state and type semantic_value := semantic_value and type 'a checkpoint := 'a checkpoint end menhir-20171222/src/tokenType.mli0000664000175000017500000000576013217215730016777 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module deals with the definitions of the type(s) that describe the tokens and the terminal symbols. *) (* By default, following [ocamlyacc], we produce just one type, [token], which describes the tokens. A token contains a tag (a terminal symbol) and possibly a semantic value. *) (* In addition to that, in [--inspection] mode only, we produce a GADT which describes the terminal symbols. A terminal symbol is just a tag; it does not carry a semantic value. *) (* In this module, we also deal with [--only-tokens] and [--external-tokens]. If [--only-tokens] is specified on the command line, [produce_tokentypes] emits the type definition(s) and exit. If [--external-tokens M] is set, then the token type and the token GADT are defined as abbreviations for [M.token] and ['a M.terminal]. *) (* The conventional name of the [token] type, for use by the code generators. *) val ttoken: IL.typ (* [tokendata] maps the name of a token to a data constructor of the [token] type. (If [--external-tokens] is set, then it prefixes its argument with an appropriate OCaml module name. Otherwise, it is the identity.) *) val tokendata: string -> string (* The conventional name of the [terminal] type, a.k.a. the token GADT. This is an indexed type (i.e., it has one type parameter). Its data constructors carry zero value arguments. *) val tctokengadt: string val ttokengadt: IL.typ -> IL.typ (* [tokengadtdata] maps the name of a token to a data constructor of the token GADT. *) val tokengadtdata: string -> string (* The definitions of the token type and of the token GADT, for use by the code generators. Each of these lists defines zero or one type. *) val tokentypedef: UnparameterizedSyntax.grammar -> IL.interface val tokengadtdef: UnparameterizedSyntax.grammar -> IL.interface (* If [--only-tokens] is set, then [produce_tokentypes] writes the type definitions to the [.ml] and [.mli] files and stops Menhir. Otherwise, it does nothing. *) val produce_tokentypes: UnparameterizedSyntax.grammar -> unit menhir-20171222/src/patricia.ml0000664000175000017500000007760013217215730016442 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This is an implementation of Patricia trees, following Chris Okasaki's paper at the 1998 ML Workshop in Baltimore. Both big-endian and little-endian trees are provided. Both sets and maps are implemented on top of Patricia trees. *) (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Little-endian vs big-endian trees} *) (* A tree is little-endian if it expects the key's least significant bits to be tested first during a search. It is big-endian if it expects the key's most significant bits to be tested first. Most of the code is independent of this design choice, so it is written as a functor, parameterized by a small structure which defines endianness. Here is the interface which must be adhered to by such a structure. *) module Endianness = struct module type S = sig (* A mask is an integer with a single one bit (i.e. a power of 2). *) type mask = int (* [branching_bit] accepts two distinct integers and returns a mask which identifies the first bit where they differ. The meaning of ``first'' varies according to the endianness being implemented. *) val branching_bit: int -> int -> mask (* [mask i m] returns an integer [i'], where all bits which [m] says are relevant are identical to those in [i], and all others are set to some unspecified, but fixed value. Which bits are ``relevant'' according to a given mask varies according to the endianness being implemented. *) val mask: int -> mask -> int (* [shorter m1 m2] returns [true] if and only if [m1] describes a shorter prefix than [m2], i.e. if it makes fewer bits relevant. Which bits are ``relevant'' according to a given mask varies according to the endianness being implemented. *) val shorter: mask -> mask -> bool end (* Now, let us define [Little] and [Big], two possible [Endiannness] choices. *) module Little = struct type mask = int let lowest_bit x = x land (-x) (* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1] show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is the lowest bit in this bit field, since we are checking least significant bits first. *) let branching_bit i0 i1 = lowest_bit (i0 lxor i1) (* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the right of the single one bit in the mask [m]. We keep these bits, and set all others to 0. *) let mask i m = i land (m-1) (* The smaller [m] is, the fewer bits are relevant. *) let shorter = (<) end module Big = struct type mask = int let lowest_bit x = x land (-x) let rec highest_bit x = let m = lowest_bit x in if x = m then m else highest_bit (x - m) (* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1] show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is the highest bit in this bit field, since we are checking most significant bits first. In Okasaki's paper, this loop is sped up by computing a conservative initial guess. Indeed, the bit at which the two prefixes disagree must be somewhere within the shorter prefix, so we can begin searching at the least-significant valid bit in the shorter prefix. Unfortunately, to allow computing the initial guess, the main code has to pass in additional parameters, e.g. a mask which describes the length of each prefix. This ``pollutes'' the endianness-independent code. For this reason, this optimization isn't implemented here. *) let branching_bit i0 i1 = highest_bit (i0 lxor i1) (* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the left of the single one bit in the mask [m]. We keep these bits, and set all others to 0. Okasaki uses a different convention, which allows big-endian Patricia trees to masquerade as binary search trees. This feature does not seem to be useful here. *) let mask i m = i land (lnot (2*m-1)) (* The smaller [m] is, the more bits are relevant. *) let shorter = (>) end end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Patricia-tree-based maps} *) module Make (X : Endianness.S) = struct (* Patricia trees are maps whose keys are integers. *) type key = int (* A tree is either empty, or a leaf node, containing both the integer key and a piece of data, or a binary node. Each binary node carries two integers. The first one is the longest common prefix of all keys in this sub-tree. The second integer is the branching bit. It is an integer with a single one bit (i.e. a power of 2), which describes the bit being tested at this node. *) type 'a t = | Empty | Leaf of int * 'a | Branch of int * X.mask * 'a t * 'a t (* The empty map. *) let empty = Empty (* [choose m] returns an arbitrarily chosen binding in [m], if [m] is nonempty, and raises [Not_found] otherwise. *) let rec choose = function | Empty -> raise Not_found | Leaf (key, data) -> key, data | Branch (_, _, tree0, _) -> choose tree0 (* [lookup k m] looks up the value associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. This implementation takes branches \emph{without} checking whether the key matches the prefix found at the current node. This means that a query for a non-existent key shall be detected only when finally reaching a leaf, rather than higher up in the tree. This strategy is better when (most) queries are expected to be successful. *) let rec lookup key = function | Empty -> raise Not_found | Leaf (key', data) -> if key = key' then data else raise Not_found | Branch (_, mask, tree0, tree1) -> lookup key (if (key land mask) = 0 then tree0 else tree1) let find = lookup (* [mem k m] tells whether the key [k] appears in the domain of the map [m]. *) let mem k m = try let _ = lookup k m in true with Not_found -> false (* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree. Assume $t_0$ and $t_1$ are non-empty trees, with longest common prefixes $p_0$ and $p_1$, respectively. Further, suppose that $p_0$ and $p_1$ disagree, that is, neither prefix is contained in the other. Then, no matter how large $t_0$ and $t_1$ are, we can merge them simply by creating a new [Branch] node that has $t_0$ and $t_1$ as children! *) let join p0 t0 p1 t1 = let m = X.branching_bit p0 p1 in let p = X.mask p0 (* for instance *) m in if (p0 land m) = 0 then Branch(p, m, t0, t1) else Branch(p, m, t1, t0) (* The auxiliary function [match_prefix] tells whether a given key has a given prefix. More specifically, [match_prefix k p m] returns [true] if and only if the key [k] has prefix [p] up to bit [m]. Throughout our implementation of Patricia trees, prefixes are assumed to be in normal form, i.e. their irrelevant bits are set to some predictable value. Formally, we assume [X.mask p m] equals [p] whenever [p] is a prefix with [m] relevant bits. This allows implementing [match_prefix] using only one call to [X.mask]. On the other hand, this requires normalizing prefixes, as done e.g. in [join] above, where [X.mask p0 m] has to be used instead of [p0]. *) let match_prefix k p m = X.mask k m = p (* [fine_add decide k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding from [k] to [d0] already exists, then the resulting map contains a binding from [k] to [decide d0 d]. *) type 'a decision = 'a -> 'a -> 'a exception Unchanged let basic_add decide k d m = let rec add t = match t with | Empty -> Leaf (k, d) | Leaf (k0, d0) -> if k = k0 then let d' = decide d0 d in if d' == d0 then raise Unchanged else Leaf (k, d') else join k (Leaf (k, d)) k0 t | Branch (p, m, t0, t1) -> if match_prefix k p m then if (k land m) = 0 then Branch (p, m, add t0, t1) else Branch (p, m, t0, add t1) else join k (Leaf (k, d)) p t in add m let strict_add k d m = basic_add (fun _ _ -> raise Unchanged) k d m let fine_add decide k d m = try basic_add decide k d m with Unchanged -> m (* [add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding already exists for [k], it is overridden. *) let add k d m = fine_add (fun _old_binding new_binding -> new_binding) k d m (* [singleton k d] returns a map whose only binding is from [k] to [d]. *) let singleton k d = Leaf (k, d) (* [is_singleton m] returns [Some (k, d)] if [m] is a singleton map that maps [k] to [d]. Otherwise, it returns [None]. *) let is_singleton = function | Leaf (k, d) -> Some (k, d) | Empty | Branch _ -> None (* [is_empty m] returns [true] if and only if the map [m] defines no bindings at all. *) let is_empty = function | Empty -> true | Leaf _ | Branch _ -> false (* [cardinal m] returns [m]'s cardinal, that is, the number of keys it binds, or, in other words, its domain's cardinal. *) let rec cardinal = function | Empty -> 0 | Leaf _ -> 1 | Branch (_, _, t0, t1) -> cardinal t0 + cardinal t1 (* [remove k m] returns the map [m] deprived from any binding involving [k]. *) let remove key m = let rec remove = function | Empty -> raise Not_found | Leaf (key', _) -> if key = key' then Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (key land mask) = 0 then match remove tree0 with | Empty -> tree1 | tree0 -> Branch (prefix, mask, tree0, tree1) else match remove tree1 with | Empty -> tree0 | tree1 -> Branch (prefix, mask, tree0, tree1) in try remove m with Not_found -> m (* [lookup_and_remove k m] looks up the value [v] associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. The call returns the value [v], together with the map [m] deprived from the binding from [k] to [v]. *) let rec lookup_and_remove key = function | Empty -> raise Not_found | Leaf (key', data) -> if key = key' then data, Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (key land mask) = 0 then match lookup_and_remove key tree0 with | data, Empty -> data, tree1 | data, tree0 -> data, Branch (prefix, mask, tree0, tree1) else match lookup_and_remove key tree1 with | data, Empty -> data, tree0 | data, tree1 -> data, Branch (prefix, mask, tree0, tree1) let find_and_remove = lookup_and_remove (* [fine_union decide m1 m2] returns the union of the maps [m1] and [m2]. If a key [k] is bound to [x1] (resp. [x2]) within [m1] (resp. [m2]), then [decide] is called. It is passed [x1] and [x2], and must return the value which shall be bound to [k] in the final map. The operation returns [m2] itself (as opposed to a copy of it) when its result is equal to [m2]. *) let reverse decision elem1 elem2 = decision elem2 elem1 let fine_union decide m1 m2 = let rec union s t = match s, t with | Empty, _ -> t | (Leaf _ | Branch _), Empty -> s | Leaf(key, value), _ -> fine_add (reverse decide) key value t | Branch _, Leaf(key, value) -> fine_add decide key value s | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) && (m = n) then (* The trees have the same prefix. Merge their sub-trees. *) let u0 = union s0 t0 and u1 = union s1 t1 in if t0 == u0 && t1 == u1 then t else Branch(p, m, u0, u1) else if (X.shorter m n) && (match_prefix q p m) then (* [q] contains [p]. Merge [t] with a sub-tree of [s]. *) if (q land m) = 0 then Branch(p, m, union s0 t, s1) else Branch(p, m, s0, union s1 t) else if (X.shorter n m) && (match_prefix p q n) then (* [p] contains [q]. Merge [s] with a sub-tree of [t]. *) if (p land n) = 0 then let u0 = union s t0 in if t0 == u0 then t else Branch(q, n, u0, t1) else let u1 = union s t1 in if t1 == u1 then t else Branch(q, n, t0, u1) else (* The prefixes disagree. *) join p s q t in union m1 m2 (* [union m1 m2] returns the union of the maps [m1] and [m2]. Bindings in [m2] take precedence over those in [m1]. *) let union m1 m2 = fine_union (fun _d d' -> d') m1 m2 (* [iter f m] invokes [f k x], in turn, for each binding from key [k] to element [x] in the map [m]. Keys are presented to [f] according to some unspecified, but fixed, order. *) let rec iter f = function | Empty -> () | Leaf (key, data) -> f key data | Branch (_, _, tree0, tree1) -> iter f tree0; iter f tree1 (* [fold f m seed] invokes [f k d accu], in turn, for each binding from key [k] to datum [d] in the map [m]. Keys are presented to [f] in increasing order according to the map's ordering. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. *) let rec fold f m accu = match m with | Empty -> accu | Leaf (key, data) -> f key data accu | Branch (_, _, tree0, tree1) -> fold f tree1 (fold f tree0 accu) (* [fold_rev] performs exactly the same job as [fold], but presents keys to [f] in the opposite order. *) let rec fold_rev f m accu = match m with | Empty -> accu | Leaf (key, data) -> f key data accu | Branch (_, _, tree0, tree1) -> fold_rev f tree0 (fold_rev f tree1 accu) (* It is valid to evaluate [iter2 f m1 m2] if and only if [m1] and [m2] have the same domain. Doing so invokes [f k x1 x2], in turn, for each key [k] bound to [x1] in [m1] and to [x2] in [m2]. Bindings are presented to [f] according to some unspecified, but fixed, order. *) let rec iter2 f t1 t2 = match t1, t2 with | Empty, Empty -> () | Leaf (key1, data1), Leaf (key2, data2) -> assert (key1 = key2); f key1 (* for instance *) data1 data2 | Branch (p1, m1, left1, right1), Branch (p2, m2, left2, right2) -> assert (p1 = p2); assert (m1 = m2); iter2 f left1 left2; iter2 f right1 right2 | _, _ -> assert false (* [map f m] returns the map obtained by composing the map [m] with the function [f]; that is, the map $k\mapsto f(m(k))$. *) let rec map f = function | Empty -> Empty | Leaf (key, data) -> Leaf(key, f data) | Branch (p, m, tree0, tree1) -> Branch (p, m, map f tree0, map f tree1) (* [endo_map] is similar to [map], but attempts to physically share its result with its input. This saves memory when [f] is the identity function. *) let rec endo_map f tree = match tree with | Empty -> tree | Leaf (key, data) -> let data' = f data in if data == data' then tree else Leaf(key, data') | Branch (p, m, tree0, tree1) -> let tree0' = endo_map f tree0 in let tree1' = endo_map f tree1 in if (tree0' == tree0) && (tree1' == tree1) then tree else Branch (p, m, tree0', tree1') (* [filter f m] returns a copy of the map [m] where only the bindings that satisfy [f] have been retained. *) let filter f m = fold (fun key data accu -> if f key data then add key data accu else accu ) m empty (* [iterator m] returns a stateful iterator over the map [m]. *) (* TEMPORARY performance could be improved, see JCF's paper *) let iterator m = let remainder = ref [ m ] in let rec next () = match !remainder with | [] -> None | Empty :: parent -> remainder := parent; next() | (Leaf (key, data)) :: parent -> remainder := parent; Some (key, data) | (Branch(_, _, s0, s1)) :: parent -> remainder := s0 :: s1 :: parent; next () in next (* If [dcompare] is an ordering over data, then [compare dcompare] is an ordering over maps. *) exception Got of int let compare dcompare m1 m2 = let iterator2 = iterator m2 in try iter (fun key1 data1 -> match iterator2() with | None -> raise (Got 1) | Some (key2, data2) -> let c = Pervasives.compare key1 key2 in if c <> 0 then raise (Got c) else let c = dcompare data1 data2 in if c <> 0 then raise (Got c) ) m1; match iterator2() with | None -> 0 | Some _ -> -1 with Got c -> c (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Patricia-tree-based sets} *) (* To enhance code sharing, it would be possible to implement maps as sets of pairs, or (vice-versa) to implement sets as maps to the unit element. However, both possibilities introduce some space and time inefficiency. To avoid it, we define each structure separately. *) module Domain = struct type element = int type t = | Empty | Leaf of int | Branch of int * X.mask * t * t (* The empty set. *) let empty = Empty (* [is_empty s] returns [true] if and only if the set [s] is empty. *) let is_empty = function | Empty -> true | Leaf _ | Branch _ -> false (* [singleton x] returns a set whose only element is [x]. *) let singleton x = Leaf x (* [is_singleton s] tests whether [s] is a singleton set. *) let is_singleton = function | Leaf _ -> true | Empty | Branch _ -> false (* [choose s] returns an arbitrarily chosen element of [s], if [s] is nonempty, and raises [Not_found] otherwise. *) let rec choose = function | Empty -> raise Not_found | Leaf x -> x | Branch (_, _, tree0, _) -> choose tree0 (* [cardinal s] returns [s]'s cardinal. *) let rec cardinal = function | Empty -> 0 | Leaf _ -> 1 | Branch (_, _, t0, t1) -> cardinal t0 + cardinal t1 (* [mem x s] returns [true] if and only if [x] appears in the set [s]. *) let rec mem x = function | Empty -> false | Leaf x' -> x = x' | Branch (_, mask, tree0, tree1) -> mem x (if (x land mask) = 0 then tree0 else tree1) (* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree. *) let join p0 t0 p1 t1 = let m = X.branching_bit p0 p1 in let p = X.mask p0 (* for instance *) m in if (p0 land m) = 0 then Branch(p, m, t0, t1) else Branch(p, m, t1, t0) (* [add x s] returns a set whose elements are all elements of [s], plus [x]. *) exception Unchanged let rec strict_add x t = match t with | Empty -> Leaf x | Leaf x0 -> if x = x0 then raise Unchanged else join x (Leaf x) x0 t | Branch (p, m, t0, t1) -> if match_prefix x p m then if (x land m) = 0 then Branch (p, m, strict_add x t0, t1) else Branch (p, m, t0, strict_add x t1) else join x (Leaf x) p t let add x s = try strict_add x s with Unchanged -> s (* [remove x s] returns a set whose elements are all elements of [s], except [x]. *) let remove x s = let rec strict_remove = function | Empty -> raise Not_found | Leaf x' -> if x = x' then Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (x land mask) = 0 then match strict_remove tree0 with | Empty -> tree1 | tree0 -> Branch (prefix, mask, tree0, tree1) else match strict_remove tree1 with | Empty -> tree0 | tree1 -> Branch (prefix, mask, tree0, tree1) in try strict_remove s with Not_found -> s (* [union s1 s2] returns the union of the sets [s1] and [s2]. *) let rec union s t = match s, t with | Empty, _ -> t | _, Empty -> s | Leaf x, _ -> add x t | _, Leaf x -> add x s | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) && (m = n) then (* The trees have the same prefix. Merge their sub-trees. *) let u0 = union s0 t0 and u1 = union s1 t1 in if t0 == u0 && t1 == u1 then t else Branch(p, m, u0, u1) else if (X.shorter m n) && (match_prefix q p m) then (* [q] contains [p]. Merge [t] with a sub-tree of [s]. *) if (q land m) = 0 then Branch(p, m, union s0 t, s1) else Branch(p, m, s0, union s1 t) else if (X.shorter n m) && (match_prefix p q n) then (* [p] contains [q]. Merge [s] with a sub-tree of [t]. *) if (p land n) = 0 then let u0 = union s t0 in if t0 == u0 then t else Branch(q, n, u0, t1) else let u1 = union s t1 in if t1 == u1 then t else Branch(q, n, t0, u1) else (* The prefixes disagree. *) join p s q t (* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures that the newly created node does not have an [Empty] child. *) let build p m t0 t1 = match t0, t1 with | Empty, Empty -> Empty | Empty, _ -> t1 | _, Empty -> t0 | _, _ -> Branch(p, m, t0, t1) (* [inter s t] returns the set intersection of [s] and [t], that is, $s\cap t$. *) let rec inter s t = match s, t with | Empty, _ | _, Empty -> Empty | (Leaf x as s), t | t, (Leaf x as s) -> if mem x t then s else Empty | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) && (m = n) then (* The trees have the same prefix. Compute the intersections of their sub-trees. *) build p m (inter s0 t0) (inter s1 t1) else if (X.shorter m n) && (match_prefix q p m) then (* [q] contains [p]. Intersect [t] with a sub-tree of [s]. *) inter (if (q land m) = 0 then s0 else s1) t else if (X.shorter n m) && (match_prefix p q n) then (* [p] contains [q]. Intersect [s] with a sub-tree of [t]. *) inter s (if (p land n) = 0 then t0 else t1) else (* The prefixes disagree. *) Empty (* [disjoint s1 s2] returns [true] if and only if the sets [s1] and [s2] are disjoint, i.e. iff their intersection is empty. It is a specialized version of [inter], which uses less space. *) exception NotDisjoint let disjoint s t = let rec inter s t = match s, t with | Empty, _ | _, Empty -> () | Leaf x, _ -> if mem x t then raise NotDisjoint | _, Leaf x -> if mem x s then raise NotDisjoint | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) && (m = n) then begin inter s0 t0; inter s1 t1 end else if (X.shorter m n) && (match_prefix q p m) then inter (if (q land m) = 0 then s0 else s1) t else if (X.shorter n m) && (match_prefix p q n) then inter s (if (p land n) = 0 then t0 else t1) else () in try inter s t; true with NotDisjoint -> false (* [iter f s] invokes [f x], in turn, for each element [x] of the set [s]. Elements are presented to [f] according to some unspecified, but fixed, order. *) let rec iter f = function | Empty -> () | Leaf x -> f x | Branch (_, _, tree0, tree1) -> iter f tree0; iter f tree1 (* [fold f s seed] invokes [f x accu], in turn, for each element [x] of the set [s]. Elements are presented to [f] according to some unspecified, but fixed, order. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. In other words, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then [fold f s seed] computes $([f]\,x_n\,\ldots\,([f]\,x_2\,([f]\,x_1\,[seed]))\ldots)$. *) let rec fold f s accu = match s with | Empty -> accu | Leaf x -> f x accu | Branch (_, _, s0, s1) -> fold f s1 (fold f s0 accu) (* [elements s] is a list of all elements in the set [s]. *) let elements s = fold (fun tl hd -> tl :: hd) s [] (* [iterator s] returns a stateful iterator over the set [s]. That is, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then [iterator s] is a function which, when invoked for the $k^{\text{th}}$ time, returns [Some]$x_k$, if $k\leq n$, and [None] otherwise. Such a function can be useful when one wishes to iterate over a set's elements, without being restricted by the call stack's discipline. For more comments about this algorithm, please see module [Baltree], which defines a similar one. *) let iterator s = let remainder = ref [ s ] in let rec next () = match !remainder with | [] -> None | Empty :: parent -> remainder := parent; next() | (Leaf x) :: parent -> remainder := parent; Some x | (Branch(_, _, s0, s1)) :: parent -> remainder := s0 :: s1 :: parent; next () in next (* [compare] is an ordering over sets. *) exception Got of int let compare s1 s2 = let iterator2 = iterator s2 in try iter (fun x1 -> match iterator2() with | None -> raise (Got 1) | Some x2 -> let c = Pervasives.compare x1 x2 in if c <> 0 then raise (Got c) ) s1; match iterator2() with | None -> 0 | Some _ -> -1 with Got c -> c (* [equal] implements equality over sets. *) let equal s1 s2 = compare s1 s2 = 0 (* [subset] implements the subset predicate over sets. In other words, [subset s t] returns [true] if and only if $s\subseteq t$. It is a specialized version of [diff]. *) exception NotSubset let subset s t = let rec diff s t = match s, t with | Empty, _ -> () | _, Empty | Branch _, Leaf _ -> raise NotSubset | Leaf x, _ -> if not (mem x t) then raise NotSubset | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) && (m = n) then begin diff s0 t0; diff s1 t1 end else if (X.shorter n m) && (match_prefix p q n) then diff s (if (p land n) = 0 then t0 else t1) else (* Either [q] contains [p], which means at least one of [s]'s sub-trees is not contained within [t], or the prefixes disagree. In either case, the subset relationship cannot possibly hold. *) raise NotSubset in try diff s t; true with NotSubset -> false end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Relating sets and maps} *) (* Back to the world of maps. Let us now describe the relationship which exists between maps and their domains. *) (* [domain m] returns [m]'s domain. *) let rec domain = function | Empty -> Domain.Empty | Leaf (k, _) -> Domain.Leaf k | Branch (p, m, t0, t1) -> Domain.Branch (p, m, domain t0, domain t1) (* [lift f s] returns the map $k\mapsto f(k)$, where $k$ ranges over a set of keys [s]. *) let rec lift f = function | Domain.Empty -> Empty | Domain.Leaf k -> Leaf (k, f k) | Domain.Branch (p, m, t0, t1) -> Branch(p, m, lift f t0, lift f t1) (* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures that the newly created node does not have an [Empty] child. *) let build p m t0 t1 = match t0, t1 with | Empty, Empty -> Empty | Empty, _ -> t1 | _, Empty -> t0 | _, _ -> Branch(p, m, t0, t1) (* [corestrict m d] performs a co-restriction of the map [m] to the domain [d]. That is, it returns the map $k\mapsto m(k)$, where $k$ ranges over all keys bound in [m] but \emph{not} present in [d]. Its code resembles [diff]'s. *) let rec corestrict s t = match s, t with | Empty, _ | _, Domain.Empty -> s | Leaf (k, _), _ -> if Domain.mem k t then Empty else s | _, Domain.Leaf k -> remove k s | Branch(p, m, s0, s1), Domain.Branch(q, n, t0, t1) -> if (p = q) && (m = n) then build p m (corestrict s0 t0) (corestrict s1 t1) else if (X.shorter m n) && (match_prefix q p m) then if (q land m) = 0 then build p m (corestrict s0 t) s1 else build p m s0 (corestrict s1 t) else if (X.shorter n m) && (match_prefix p q n) then corestrict s (if (p land n) = 0 then t0 else t1) else s end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Instantiating the functor} *) module Little = Make(Endianness.Little) module Big = Make(Endianness.Big) menhir-20171222/src/lookahead.mli0000664000175000017500000000271513217215730016741 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* These are the operations required of lookahead sets during a closure computation. This signature is exploited by the functor [Item.Closure]. *) module type S = sig (* The type of lookahead sets. *) type t (* The empty lookahead set. Redundant with the following, but convenient. *) val empty: t (* A concrete, constant set of terminal symbols. *) val constant: Grammar.TerminalSet.t -> t (* [union s1 s2] returns the union of [s1] and [s2]. *) val union: t -> t -> t end menhir-20171222/src/IncrementalEngine.ml0000664000175000017500000004644413217215730020237 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) type position = Lexing.position open General (* This signature describes the incremental LR engine. *) (* In this mode, the user controls the lexer, and the parser suspends itself when it needs to read a new token. *) module type INCREMENTAL_ENGINE = sig type token (* A value of type [production] is (an index for) a production. The start productions (which do not exist in an \mly file, but are constructed by Menhir internally) are not part of this type. *) type production (* The type ['a checkpoint] represents an intermediate or final state of the parser. An intermediate checkpoint is a suspension: it records the parser's current state, and allows parsing to be resumed. The parameter ['a] is the type of the semantic value that will eventually be produced if the parser succeeds. *) (* [Accepted] and [Rejected] are final checkpoints. [Accepted] carries a semantic value. *) (* [InputNeeded] is an intermediate checkpoint. It means that the parser wishes to read one token before continuing. *) (* [Shifting] is an intermediate checkpoint. It means that the parser is taking a shift transition. It exposes the state of the parser before and after the transition. The Boolean parameter tells whether the parser intends to request a new token after this transition. (It always does, except when it is about to accept.) *) (* [AboutToReduce] is an intermediate checkpoint. It means that the parser is about to perform a reduction step. It exposes the parser's current state as well as the production that is about to be reduced. *) (* [HandlingError] is an intermediate checkpoint. It means that the parser has detected an error and is currently handling it, in several steps. *) (* A value of type ['a env] represents a configuration of the automaton: current state, stack, lookahead token, etc. The parameter ['a] is the type of the semantic value that will eventually be produced if the parser succeeds. *) (* In normal operation, the parser works with checkpoints: see the functions [offer] and [resume]. However, it is also possible to work directly with environments (see the functions [pop], [force_reduction], and [feed]) and to reconstruct a checkpoint out of an environment (see [input_needed]). This is considered advanced functionality; its purpose is to allow error recovery strategies to be programmed by the user. *) type 'a env type 'a checkpoint = private | InputNeeded of 'a env | Shifting of 'a env * 'a env * bool | AboutToReduce of 'a env * production | HandlingError of 'a env | Accepted of 'a | Rejected (* [offer] allows the user to resume the parser after it has suspended itself with a checkpoint of the form [InputNeeded env]. [offer] expects the old checkpoint as well as a new token and produces a new checkpoint. It does not raise any exception. *) val offer: 'a checkpoint -> token * position * position -> 'a checkpoint (* [resume] allows the user to resume the parser after it has suspended itself with a checkpoint of the form [AboutToReduce (env, prod)] or [HandlingError env]. [resume] expects the old checkpoint and produces a new checkpoint. It does not raise any exception. *) val resume: 'a checkpoint -> 'a checkpoint (* A token supplier is a function of no arguments which delivers a new token (together with its start and end positions) every time it is called. *) type supplier = unit -> token * position * position (* A pair of a lexer and a lexing buffer can be easily turned into a supplier. *) val lexer_lexbuf_to_supplier: (Lexing.lexbuf -> token) -> Lexing.lexbuf -> supplier (* The functions [offer] and [resume] are sufficient to write a parser loop. One can imagine many variations (which is why we expose these functions in the first place!). Here, we expose a few variations of the main loop, ready for use. *) (* [loop supplier checkpoint] begins parsing from [checkpoint], reading tokens from [supplier]. It continues parsing until it reaches a checkpoint of the form [Accepted v] or [Rejected]. In the former case, it returns [v]. In the latter case, it raises the exception [Error]. *) val loop: supplier -> 'a checkpoint -> 'a (* [loop_handle succeed fail supplier checkpoint] begins parsing from [checkpoint], reading tokens from [supplier]. It continues parsing until it reaches a checkpoint of the form [Accepted v] or [HandlingError env] (or [Rejected], but that should not happen, as [HandlingError _] will be observed first). In the former case, it calls [succeed v]. In the latter case, it calls [fail] with this checkpoint. It cannot raise [Error]. This means that Menhir's traditional error-handling procedure (which pops the stack until a state that can act on the [error] token is found) does not get a chance to run. Instead, the user can implement her own error handling code, in the [fail] continuation. *) val loop_handle: ('a -> 'answer) -> ('a checkpoint -> 'answer) -> supplier -> 'a checkpoint -> 'answer (* [loop_handle_undo] is analogous to [loop_handle], except it passes a pair of checkpoints to the failure continuation. The first (and oldest) checkpoint is the last [InputNeeded] checkpoint that was encountered before the error was detected. The second (and newest) checkpoint is where the error was detected, as in [loop_handle]. Going back to the first checkpoint can be thought of as undoing any reductions that were performed after seeing the problematic token. (These reductions must be default reductions or spurious reductions.) [loop_handle_undo] must initially be applied to an [InputNeeded] checkpoint. The parser's initial checkpoints satisfy this constraint. *) val loop_handle_undo: ('a -> 'answer) -> ('a checkpoint -> 'a checkpoint -> 'answer) -> supplier -> 'a checkpoint -> 'answer (* [shifts checkpoint] assumes that [checkpoint] has been obtained by submitting a token to the parser. It runs the parser from [checkpoint], through an arbitrary number of reductions, until the parser either accepts this token (i.e., shifts) or rejects it (i.e., signals an error). If the parser decides to shift, then [Some env] is returned, where [env] is the parser's state just before shifting. Otherwise, [None] is returned. *) (* It is desirable that the semantic actions be side-effect free, or that their side-effects be harmless (replayable). *) val shifts: 'a checkpoint -> 'a env option (* The function [acceptable] allows testing, after an error has been detected, which tokens would have been accepted at this point. It is implemented using [shifts]. Its argument should be an [InputNeeded] checkpoint. *) (* For completeness, one must undo any spurious reductions before carrying out this test -- that is, one must apply [acceptable] to the FIRST checkpoint that is passed by [loop_handle_undo] to its failure continuation. *) (* This test causes some semantic actions to be run! The semantic actions should be side-effect free, or their side-effects should be harmless. *) (* The position [pos] is used as the start and end positions of the hypothetical token, and may be picked up by the semantic actions. We suggest using the position where the error was detected. *) val acceptable: 'a checkpoint -> token -> position -> bool (* The abstract type ['a lr1state] describes the non-initial states of the LR(1) automaton. The index ['a] represents the type of the semantic value associated with this state's incoming symbol. *) type 'a lr1state (* The states of the LR(1) automaton are numbered (from 0 and up). *) val number: _ lr1state -> int (* Productions are numbered. *) (* [find_production i] requires the index [i] to be valid. Use with care. *) val production_index: production -> int val find_production: int -> production (* An element is a pair of a non-initial state [s] and a semantic value [v] associated with the incoming symbol of this state. The idea is, the value [v] was pushed onto the stack just before the state [s] was entered. Thus, for some type ['a], the state [s] has type ['a lr1state] and the value [v] has type ['a]. In other words, the type [element] is an existential type. *) type element = | Element: 'a lr1state * 'a * position * position -> element (* The parser's stack is (or, more precisely, can be viewed as) a stream of elements. The type [stream] is defined by the module [General]. *) (* As of 2017/03/31, the types [stream] and [stack] and the function [stack] are DEPRECATED. They might be removed in the future. An alternative way of inspecting the stack is via the functions [top] and [pop]. *) type stack = (* DEPRECATED *) element stream (* This is the parser's stack, a stream of elements. This stream is empty if the parser is in an initial state; otherwise, it is non-empty. The LR(1) automaton's current state is the one found in the top element of the stack. *) val stack: 'a env -> stack (* DEPRECATED *) (* [top env] returns the parser's top stack element. The state contained in this stack element is the current state of the automaton. If the stack is empty, [None] is returned. In that case, the current state of the automaton must be an initial state. *) val top: 'a env -> element option (* [pop_many i env] pops [i] cells off the automaton's stack. This is done via [i] successive invocations of [pop]. Thus, [pop_many 1] is [pop]. The index [i] must be nonnegative. The time complexity is O(i). *) val pop_many: int -> 'a env -> 'a env option (* [get i env] returns the parser's [i]-th stack element. The index [i] is 0-based: thus, [get 0] is [top]. If [i] is greater than or equal to the number of elements in the stack, [None] is returned. The time complexity is O(i). *) val get: int -> 'a env -> element option (* [current_state_number env] is (the integer number of) the automaton's current state. This works even if the automaton's stack is empty, in which case the current state is an initial state. This number can be passed as an argument to a [message] function generated by [menhir --compile-errors]. *) val current_state_number: 'a env -> int (* [equal env1 env2] tells whether the parser configurations [env1] and [env2] are equal in the sense that the automaton's current state is the same in [env1] and [env2] and the stack is *physically* the same in [env1] and [env2]. If [equal env1 env2] is [true], then the sequence of the stack elements, as observed via [pop] and [top], must be the same in [env1] and [env2]. Also, if [equal env1 env2] holds, then the checkpoints [input_needed env1] and [input_needed env2] must be equivalent. The function [equal] has time complexity O(1). *) val equal: 'a env -> 'a env -> bool (* These are the start and end positions of the current lookahead token. If invoked in an initial state, this function returns a pair of twice the initial position. *) val positions: 'a env -> position * position (* When applied to an environment taken from a checkpoint of the form [AboutToReduce (env, prod)], the function [env_has_default_reduction] tells whether the reduction that is about to take place is a default reduction. *) val env_has_default_reduction: 'a env -> bool (* [state_has_default_reduction s] tells whether the state [s] has a default reduction. This includes the case where [s] is an accepting state. *) val state_has_default_reduction: _ lr1state -> bool (* [pop env] returns a new environment, where the parser's top stack cell has been popped off. (If the stack is empty, [None] is returned.) This amounts to pretending that the (terminal or nonterminal) symbol that corresponds to this stack cell has not been read. *) val pop: 'a env -> 'a env option (* [force_reduction prod env] should be called only if in the state [env] the parser is capable of reducing the production [prod]. If this condition is satisfied, then this production is reduced, which means that its semantic action is executed (this can have side effects!) and the automaton makes a goto (nonterminal) transition. If this condition is not satisfied, [Invalid_argument _] is raised. *) val force_reduction: production -> 'a env -> 'a env (* [input_needed env] returns [InputNeeded env]. That is, out of an [env] that might have been obtained via a series of calls to the functions [pop], [force_reduction], [feed], etc., it produces a checkpoint, which can be used to resume normal parsing, by supplying this checkpoint as an argument to [offer]. *) (* This function should be used with some care. It could "mess up the lookahead" in the sense that it allows parsing to resume in an arbitrary state [s] with an arbitrary lookahead symbol [t], even though Menhir's reachability analysis (menhir --list-errors) might well think that it is impossible to reach this particular configuration. If one is using Menhir's new error reporting facility, this could cause the parser to reach an error state for which no error message has been prepared. *) val input_needed: 'a env -> 'a checkpoint end (* This signature is a fragment of the inspection API that is made available to the user when [--inspection] is used. This fragment contains type definitions for symbols. *) module type SYMBOLS = sig (* The type ['a terminal] represents a terminal symbol. The type ['a nonterminal] represents a nonterminal symbol. In both cases, the index ['a] represents the type of the semantic values associated with this symbol. The concrete definitions of these types are generated. *) type 'a terminal type 'a nonterminal (* The type ['a symbol] represents a terminal or nonterminal symbol. It is the disjoint union of the types ['a terminal] and ['a nonterminal]. *) type 'a symbol = | T : 'a terminal -> 'a symbol | N : 'a nonterminal -> 'a symbol (* The type [xsymbol] is an existentially quantified version of the type ['a symbol]. This type is useful in situations where the index ['a] is not statically known. *) type xsymbol = | X : 'a symbol -> xsymbol end (* This signature describes the inspection API that is made available to the user when [--inspection] is used. *) module type INSPECTION = sig (* The types of symbols are described above. *) include SYMBOLS (* The type ['a lr1state] is meant to be the same as in [INCREMENTAL_ENGINE]. *) type 'a lr1state (* The type [production] is meant to be the same as in [INCREMENTAL_ENGINE]. It represents a production of the grammar. A production can be examined via the functions [lhs] and [rhs] below. *) type production (* An LR(0) item is a pair of a production [prod] and a valid index [i] into this production. That is, if the length of [rhs prod] is [n], then [i] is comprised between 0 and [n], inclusive. *) type item = production * int (* Ordering functions. *) val compare_terminals: _ terminal -> _ terminal -> int val compare_nonterminals: _ nonterminal -> _ nonterminal -> int val compare_symbols: xsymbol -> xsymbol -> int val compare_productions: production -> production -> int val compare_items: item -> item -> int (* [incoming_symbol s] is the incoming symbol of the state [s], that is, the symbol that the parser must recognize before (has recognized when) it enters the state [s]. This function gives access to the semantic value [v] stored in a stack element [Element (s, v, _, _)]. Indeed, by case analysis on the symbol [incoming_symbol s], one discovers the type ['a] of the value [v]. *) val incoming_symbol: 'a lr1state -> 'a symbol (* [items s] is the set of the LR(0) items in the LR(0) core of the LR(1) state [s]. This set is not epsilon-closed. This set is presented as a list, in an arbitrary order. *) val items: _ lr1state -> item list (* [lhs prod] is the left-hand side of the production [prod]. This is always a non-terminal symbol. *) val lhs: production -> xsymbol (* [rhs prod] is the right-hand side of the production [prod]. This is a (possibly empty) sequence of (terminal or nonterminal) symbols. *) val rhs: production -> xsymbol list (* [nullable nt] tells whether the non-terminal symbol [nt] is nullable. That is, it is true if and only if this symbol produces the empty word [epsilon]. *) val nullable: _ nonterminal -> bool (* [first nt t] tells whether the FIRST set of the nonterminal symbol [nt] contains the terminal symbol [t]. That is, it is true if and only if [nt] produces a word that begins with [t]. *) val first: _ nonterminal -> _ terminal -> bool (* [xfirst] is analogous to [first], but expects a first argument of type [xsymbol] instead of [_ terminal]. *) val xfirst: xsymbol -> _ terminal -> bool (* [foreach_terminal] enumerates the terminal symbols, including [error]. [foreach_terminal_but_error] enumerates the terminal symbols, excluding [error]. *) val foreach_terminal: (xsymbol -> 'a -> 'a) -> 'a -> 'a val foreach_terminal_but_error: (xsymbol -> 'a -> 'a) -> 'a -> 'a (* The type [env] is meant to be the same as in [INCREMENTAL_ENGINE]. *) type 'a env (* [feed symbol startp semv endp env] causes the parser to consume the (terminal or nonterminal) symbol [symbol], accompanied with the semantic value [semv] and with the start and end positions [startp] and [endp]. Thus, the automaton makes a transition, and reaches a new state. The stack grows by one cell. This operation is permitted only if the current state (as determined by [env]) has an outgoing transition labeled with [symbol]. Otherwise, [Invalid_argument _] is raised. *) val feed: 'a symbol -> position -> 'a -> position -> 'b env -> 'b env end (* This signature combines the incremental API and the inspection API. *) module type EVERYTHING = sig include INCREMENTAL_ENGINE include INSPECTION with type 'a lr1state := 'a lr1state with type production := production with type 'a env := 'a env end menhir-20171222/src/pprint.mli0000664000175000017500000002203213217215730016320 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* ------------------------------------------------------------------------- *) (* Basic combinators for building documents. *) type document val empty: document val hardline: document val char: char -> document val substring: string -> int -> int -> document val text: string -> document val blank: int -> document val (^^): document -> document -> document val nest: int -> document -> document val column: (int -> document) -> document val nesting: (int -> document) -> document val group: document -> document val ifflat: document -> document -> document (* ------------------------------------------------------------------------- *) (* Low-level combinators for alignment and indentation. *) val align: document -> document val hang: int -> document -> document val indent: int -> document -> document (* ------------------------------------------------------------------------- *) (* High-level combinators for building documents. *) (* [break n] Puts [n] spaces in flat mode and a new line otherwise. Equivalent to: [ifflat (String.make n ' ') hardline] *) val break: int -> document (* [break0] equivalent to [break 0] *) val break0: document (* [break1] equivalent to [break 1] *) val break1: document val string: string -> document val words: string -> document val lparen: document val rparen: document val langle: document val rangle: document val lbrace: document val rbrace: document val lbracket: document val rbracket: document val squote: document val dquote: document val bquote: document val semi: document val colon: document val comma: document val space: document val dot: document val sharp: document val backslash: document val equals: document val qmark: document val tilde: document val at: document val percent: document val dollar: document val caret: document val ampersand: document val star: document val plus: document val minus: document val underscore: document val bang: document val bar: document val squotes: document -> document val dquotes: document -> document val bquotes: document -> document val braces: document -> document val parens: document -> document val angles: document -> document val brackets: document -> document val fold: (document -> document -> document) -> document list -> document val fold1: (document -> document -> document) -> document list -> document val fold1map: (document -> document -> document) -> ('a -> document) -> 'a list -> document val sepmap: document -> ('a -> document) -> 'a list -> document val optional: ('a -> document) -> 'a option -> document (* [prefix left right] Flat layout: [left] [right] Otherwise: [left] [right] *) val prefix: string -> document -> document (* [infix middle left right] Flat layout: [left] [middle] [right] Otherwise: [left] [middle] [right] *) val infix: string -> document -> document -> document (* [infix_com middle left right] Flat layout: [left][middle] [right] Otherwise: [left][middle] [right] *) val infix_com: string -> document -> document -> document (* [infix_dot middle left right] Flat layout: [left][middle][right] Otherwise: [left][middle] [right] *) val infix_dot: string -> document -> document -> document (* [surround nesting break open_doc contents close_doc] *) val surround: int -> document -> document -> document -> document -> document (* [surround1 open_txt contents close_txt] Flat: [open_txt][contents][close_txt] Otherwise: [open_txt] [contents] [close_txt] *) val surround1: string -> document -> string -> document (* [surround2 open_txt contents close_txt] Flat: [open_txt] [contents] [close_txt] Otherwise: [open_txt] [contents] [close_txt] *) val surround2: string -> document -> string -> document (* [soft_surround nesting break open_doc contents close_doc] *) val soft_surround: int -> document -> document -> document -> document -> document (* [seq indent break empty_seq open_seq sep_seq close_seq contents] *) val seq: int -> document -> document -> document -> document -> document -> document list -> document (* [seq1 open_seq sep_seq close_seq contents] Flat layout: [open_seq][contents][sep_seq]...[sep_seq][contents][close_seq] Otherwise: [open_seq] [contents][sep_seq]...[sep_seq][contents] [close_seq] *) val seq1: string -> string -> string -> document list -> document (* [seq2 open_seq sep_seq close_seq contents] Flat layout: [open_seq] [contents][sep_seq]...[sep_seq][contents] [close_seq] Otherwise: [open_seq] [contents][sep_seq]...[sep_seq][contents] [close_seq] *) val seq2: string -> string -> string -> document list -> document (* [group1 d] equivalent to [group (nest 1 d)] *) val group1: document -> document (* [group2 d] equivalent to [group (nest 2 d)] *) val group2: document -> document module Operators : sig val ( ^^ ) : document -> document -> document val ( !^ ) : string -> document val ( ^/^ ) : document -> document -> document val ( ^//^ ) : document -> document -> document val ( ^@^ ) : document -> document -> document val ( ^@@^ ) : document -> document -> document end (* ------------------------------------------------------------------------- *) (* A signature for document renderers. *) module type RENDERER = sig (* Output channels. *) type channel (* [pretty rfrac width channel document] pretty-prints the document [document] to the output channel [channel]. The parameter [width] is the maximum number of characters per line. The parameter [rfrac] is the ribbon width, a fraction relative to [width]. The ribbon width is the maximum number of non-indentation characters per line. *) val pretty: float -> int -> channel -> document -> unit (* [compact channel document] prints the document [document] to the output channel [channel]. No indentation is used. All newline instructions are respected, that is, no groups are flattened. *) val compact: channel -> document -> unit end (* ------------------------------------------------------------------------- *) (* Renderers to output channels and to memory buffers. *) module Channel : RENDERER with type channel = out_channel module Buffer : RENDERER with type channel = Buffer.t (* ------------------------------------------------------------------------- *) (* A signature for value representations. This is compatible with the associated Camlp4 generator: SwitchValueRepresentation *) module type VALUE_REPRESENTATION = sig (* The type of value representation *) type t (* [variant type_name data_constructor_name tag arguments] Given information about the variant and its arguments, this function produces a new value representation. *) val variant : string -> string -> int -> t list -> t (* [record type_name fields] Given a type name and a list of record fields, this function produces the value representation of a record. *) val record : string -> (string * t) list -> t (* [tuple arguments] Given a list of value representation this function produces a new value representation. *) val tuple : t list -> t (* ------------------------------------------------------------------------- *) (* Value representation for primitive types. *) val string : string -> t val int : int -> t val int32 : int32 -> t val int64 : int64 -> t val nativeint : nativeint -> t val float : float -> t val char : char -> t val bool : bool -> t val option : ('a -> t) -> 'a option -> t val list : ('a -> t) -> 'a list -> t val array : ('a -> t) -> 'a array -> t val ref : ('a -> t) -> 'a ref -> t (* Value representation for any other value. *) val unknown : string -> 'a -> t end (* A signature for source printers. *) module type DOCUMENT_VALUE_REPRESENTATION = VALUE_REPRESENTATION with type t = document module ML : DOCUMENT_VALUE_REPRESENTATION (* Deprecated *) val line: document val linebreak: document val softline: document val softbreak: document menhir-20171222/src/fancy-parser.mly0000664000175000017500000003230613217215730017423 0ustar fpottierfpottier/******************************************************************************/ /* */ /* Menhir */ /* */ /* François Pottier, Inria Paris */ /* Yann Régis-Gianas, PPS, Université Paris Diderot */ /* */ /* Copyright Inria. All rights reserved. This file is distributed under the */ /* terms of the GNU General Public License version 2, as described in the */ /* file LICENSE. */ /* */ /******************************************************************************/ /* This is the fancy version of the parser, to be processed by menhir. It is kept in sync with [Parser], but exercises menhir's features. */ /* As of 2014/12/02, the $previouserror keyword and the --error-recovery mode no longer exists. Thus, we replace all calls to [Error.signal] with calls to [Error.error], and report just one error. */ /* ------------------------------------------------------------------------- */ /* Imports. */ %{ open Syntax open Positions %} /* ------------------------------------------------------------------------- */ /* Tokens. */ %token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL %token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER ON_ERROR_REDUCE %token LID UID %token HEADER %token OCAMLTYPE %token PERCENTPERCENT %token Action.t> ACTION %token ATTRIBUTE GRAMMARATTRIBUTE %token PERCENTATTRIBUTE /* ------------------------------------------------------------------------- */ /* Type annotations and start symbol. */ %type producer %type production %start grammar /* ------------------------------------------------------------------------- */ /* Priorities. */ /* These declarations solve a shift-reduce conflict in favor of shifting: when the declaration of a non-terminal symbol begins with a leading bar, it is understood as an (insignificant) leading optional bar, *not* as an empty right-hand side followed by a bar. This ambiguity arises due to the existence of a new notation for letting several productions share a single semantic action. */ %nonassoc no_optional_bar %nonassoc BAR %% /* ------------------------------------------------------------------------- */ /* A grammar consists of declarations and rules, followed by an optional postlude, which we do not parse. */ grammar: ds = declaration* PERCENTPERCENT rs = rule* t = postlude { { pg_filename = ""; (* filled in by the caller *) pg_declarations = List.flatten ds; pg_rules = rs; pg_postlude = t } } /* ------------------------------------------------------------------------- */ /* A declaration is an %{ OCaml header %}, or a %token, %start, %type, %left, %right, or %nonassoc declaration. */ declaration: | h = HEADER /* lexically delimited by %{ ... %} */ { [ with_poss $startpos $endpos (DCode h) ] } | TOKEN ty = OCAMLTYPE? ts = clist(terminal) { List.map (Positions.map (fun (terminal, attrs) -> DToken (ty, terminal, attrs))) ts } | START t = OCAMLTYPE? nts = clist(nonterminal) /* %start foo is syntactic sugar for %start foo %type foo */ { match t with | None -> List.map (Positions.map (fun nonterminal -> DStart nonterminal)) nts | Some t -> Misc.mapd (fun ntloc -> Positions.mapd (fun nt -> DStart nt, DType (t, ParameterVar ntloc)) ntloc) nts } | TYPE t = OCAMLTYPE ss = clist(strict_actual) { List.map (Positions.map (fun nt -> DType (t, nt))) (List.map Parameters.with_pos ss) } | k = priority_keyword ss = clist(symbol) { let prec = ParserAux.new_precedence_level $startpos(k) $endpos(k) in List.map (Positions.map (fun symbol -> DTokenProperties (symbol, k, prec))) ss } | PARAMETER t = OCAMLTYPE { [ with_poss $startpos $endpos (DParameter t) ] } | attr = GRAMMARATTRIBUTE { [ with_poss $startpos $endpos (DGrammarAttribute attr) ] } | PERCENTATTRIBUTE actuals = clist(strict_actual) attrs = ATTRIBUTE+ { [ with_poss $startpos $endpos (DSymbolAttributes (actuals, attrs)) ] } | ON_ERROR_REDUCE ss = clist(strict_actual) { let prec = ParserAux.new_on_error_reduce_level() in List.map (Positions.map (fun nt -> DOnErrorReduce (nt, prec))) (List.map Parameters.with_pos ss) } /* This production recognizes tokens that are valid in the rules section, but not in the declarations section. This is a hint that a %% was forgotten. */ | rule_specific_token { Error.error (Positions.two $startpos $endpos) "syntax error inside a declaration.\n\ Did you perhaps forget the %%%% that separates declarations and rules?" } priority_keyword: LEFT { LeftAssoc } | RIGHT { RightAssoc } | NONASSOC { NonAssoc } %inline rule_specific_token: | PUBLIC | INLINE | COLON | EOF { () } /* ------------------------------------------------------------------------- */ /* Our lists of symbols are separated with optional commas. Order is irrelevant. */ %inline clist(X): xs = separated_nonempty_list(COMMA?, X) { xs } /* ------------------------------------------------------------------------- */ /* A symbol is a terminal or nonterminal symbol. One would like to require nonterminal symbols to begin with a lowercase letter, so as to lexically distinguish them from terminal symbols, which must begin with an uppercase letter. However, for compatibility with ocamlyacc, this is impossible. It can be required only for nonterminal symbols that are also start symbols. */ symbol: id = LID | id = UID { id } /* ------------------------------------------------------------------------- */ /* Terminals must begin with an uppercase letter. Nonterminals that are declared to be start symbols must begin with a lowercase letter. */ %inline terminal: id = UID attrs = ATTRIBUTE* { Positions.map (fun uid -> (uid, attrs)) id } %inline nonterminal: id = LID { id } /* ------------------------------------------------------------------------- */ /* A rule defines a symbol. It is optionally declared %public, and optionally carries a number of formal parameters. The right-hand side of the definition consists of a list of productions. */ rule: flags = flags /* flags */ symbol = symbol /* the symbol that is being defined */ attributes = ATTRIBUTE* params = plist(symbol) /* formal parameters */ COLON optional_bar branches = branches { let public, inline = flags in { pr_public_flag = public; pr_inline_flag = inline; pr_nt = Positions.value symbol; pr_positions = [ Positions.position symbol ]; pr_attributes = attributes; pr_parameters = List.map Positions.value params; pr_branches = branches } } %inline branches: prods = separated_nonempty_list(BAR, production_group) { List.flatten prods } flags: /* epsilon */ { false, false } | PUBLIC { true, false } | INLINE { false, true } | PUBLIC INLINE | INLINE PUBLIC { true, true } optional_bar: /* epsilon */ %prec no_optional_bar | BAR { () } /* ------------------------------------------------------------------------- */ /* A production group consists of a list of productions, followed by a semantic action and an optional precedence specification. */ production_group: productions = separated_nonempty_list(BAR, production) action = ACTION oprec2 = ioption(precedence) { (* If multiple productions share a single semantic action, check that all of them bind the same names. *) ParserAux.check_production_group productions; (* Then, *) List.map (fun (producers, oprec1, level, pos) -> (* Replace [$i] with [_i]. *) let pr_producers = ParserAux.normalize_producers producers in (* Distribute the semantic action. Also, check that every [$i] is within bounds. *) let action : Syntax.identifier option array -> Action.t = action in let pr_action = action (ParserAux.producer_names producers) in { pr_producers; pr_action; pr_branch_prec_annotation = ParserAux.override pos oprec1 oprec2; pr_branch_production_level = level; pr_branch_position = pos }) productions } %inline precedence: PREC symbol = symbol { symbol } /* ------------------------------------------------------------------------- */ /* A production is a list of producers, optionally followed by a precedence declaration. */ production: producers = producer* oprec = ioption(precedence) { producers, oprec, ParserAux.new_production_level(), Positions.lex_join $startpos $endpos } /* ------------------------------------------------------------------------- */ /* A producer is an actual parameter, possibly preceded by a binding, and possibly followed with attributes. Because both [ioption] and [terminated] are defined as inlined by the standard library, this definition expands to two productions, one of which begins with id = LID, the other of which begins with p = actual. The token LID is in FIRST(actual), but the LR(1) formalism can deal with that. If [option] was used instead of [ioption], an LR(1) conflict would arise -- looking ahead at LID would not allow determining whether to reduce an empty [option] or to shift. */ producer: | id = ioption(terminated(LID, EQUAL)) p = actual attrs = ATTRIBUTE* { position (with_poss $startpos $endpos ()), id, p, attrs } /* ------------------------------------------------------------------------- */ /* The ideal syntax of actual parameters includes: 1. a symbol, optionally applied to a list of actual parameters; 2. an actual parameter followed with a modifier; 3. an anonymous rule. (Not delimited by parentheses! Otherwise one would often end up writing two pairs of parentheses.) */ /* In order to avoid a few ambiguities, we restrict this ideal syntax as follows: a. Within a %type declaration, we use [strict_actual], which allows 1- and 2- (this is undocumented; the documentation says we require a symbol) but not 3-, which would not make semantic sense anyway. b. Within a producer, we use [actual], which allows 1- and 2- but not 3-. Case 3- is allowed by switching to [lax_actual] within the actual arguments of an application, which are clearly delimited by parentheses and commas. c. In front of a modifier, we can never allow [lax_actual], as this would create an ambiguity: basically, [A | B?] could be interpreted either as [(A | B)?] or as [A | (B?)]. */ %inline generic_actual(A, B): (* 1- *) symbol = symbol actuals = plist(A) { Parameters.app symbol actuals } (* 2- *) | p = B m = modifier { ParameterApp (m, [ p ]) } strict_actual: p = generic_actual(strict_actual, strict_actual) { p } actual: p = generic_actual(lax_actual, actual) { p } lax_actual: p = generic_actual(lax_actual, /* cannot be lax_ */ actual) { p } (* 3- *) | /* leading bar disallowed */ branches = branches { ParameterAnonymous (with_poss $startpos $endpos branches) } (* 2016/05/18: we used to eliminate anonymous rules on the fly during parsing. However, when an anonymous rule appears in a parameterized definition, the fresh nonterminal symbol that is created should be parameterized. This was not done, and is not easy to do on the fly, as it requires inherited attributes (or a way of simulating them). We now use explicit abstract syntax for anonymous rules. *) /* ------------------------------------------------------------------------- */ /* Formal or actual parameter lists are delimited with parentheses and separated with commas. They are optional. */ %inline plist(X): params = loption(delimited(LPAREN, separated_nonempty_list(COMMA, X), RPAREN)) { params } /* ------------------------------------------------------------------------- */ /* The "?", "+", and "*" modifiers are short-hands for applications of certain parameterized nonterminals, defined in the standard library. */ modifier: QUESTION { with_poss $startpos $endpos "option" } | PLUS { with_poss $startpos $endpos "nonempty_list" } | STAR { with_poss $startpos $endpos "list" } /* ------------------------------------------------------------------------- */ /* A postlude is announced by %%, but is optional. */ postlude: EOF { None } | p = PERCENTPERCENT /* followed by actual postlude */ { Some (Lazy.force p) } %% menhir-20171222/src/unionFind.mli0000664000175000017500000000427113217215730016742 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (** This module implements a simple and efficient union/find algorithm. See Robert E. Tarjan, ``Efficiency of a Good But Not Linear Set Union Algorithm'', JACM 22(2), 1975. *) (** The abstraction defined by this module is a set of points, partitioned into equivalence classes. With each equivalence class, a piece of information, of abstract type ['a], is associated; we call it a descriptor. *) type 'a point (** [fresh desc] creates a fresh point and returns it. It forms an equivalence class of its own, whose descriptor is [desc]. *) val fresh: 'a -> 'a point (** [get point] returns the descriptor associated with [point]'s equivalence class. *) val get: 'a point -> 'a (** [union point1 point2] merges the equivalence classes associated with [point1] and [point2] into a single class whose descriptor is that originally associated with [point2]. It does nothing if [point1] and [point2] already are in the same class. *) val union: 'a point -> 'a point -> unit (** [equivalent point1 point2] tells whether [point1] and [point2] belong to the same equivalence class. *) val equivalent: 'a point -> 'a point -> bool (** [set p d] updates the descriptor of [p] to [d]. *) val set: 'a point -> 'a -> unit menhir-20171222/src/derivation.ml0000664000175000017500000002331313217215730017002 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar (* -------------------------------------------------------------------------- *) (* This is a data structure for linear derivation trees. These are derivation trees that are list-like (that is, they do not branch), because a single path is of interest. A tree is either empty or formed of a non-terminal symbol at the root and a forest below the root. A forest is an ordered list of elements. However, its elements are not trees, as one would perhaps expect. Because we are interested in *linear* derivation trees, only one element of the forest receives focus and is a tree. All other elements remain un-expanded, so they are just symbols. In other words, a linear derivation tree is roughly just a list of levels, where each forest corresponds to one level. *) type 'focus level = { prefix: Symbol.t list; focus: 'focus; suffix: Symbol.t list; comment: string } type tree = | TEmpty | TRooted of Symbol.t * forest and forest = tree level (* We make use of contexts with a forest-shaped hole. We have tree contexts and forest contexts. Tree contexts do not have a case for holes, since we work with forest-shaped holes only. Forest contexts have one. *) type ctree = | CRooted of Symbol.t * cforest and cforest = | CHole | CCons of ctree level (* Make a few types visible to clients. *) type t = forest type context = cforest (* -------------------------------------------------------------------------- *) (* Construction. *) let rec array_to_list a i j = if i = j then [] else a.(i) :: array_to_list a (i + 1) j let empty = { prefix = []; focus = TEmpty; suffix = []; comment = "" } let tail pos rhs = let length = Array.length rhs in assert (pos < length); { prefix = []; focus = TEmpty; suffix = array_to_list rhs pos length; comment = "" } let build pos rhs forest comment = let length = Array.length rhs in assert (pos < length); match rhs.(pos) with | Symbol.T _ -> assert false | Symbol.N _ as symbol -> { prefix = []; focus = TRooted (symbol, forest); suffix = array_to_list rhs (pos + 1) length; comment = (match comment with None -> "" | Some comment -> comment) } let prepend symbol forest = { forest with prefix = symbol :: forest.prefix } (* -------------------------------------------------------------------------- *) (* Display. *) let buffer = Buffer.create 32768 let rec print_blank k = if k > 0 then begin Buffer.add_char buffer ' '; print_blank (k - 1) end let print_symbol symbol = let word = Symbol.print symbol in Buffer.add_string buffer word; Buffer.add_char buffer ' '; String.length word + 1 let print_symbols symbols = List.fold_left (fun offset symbol -> offset + print_symbol symbol ) 0 symbols let print_level print_focus_root print_focus_remainder offset forest = print_blank offset; let offset = offset + print_symbols forest.prefix in print_focus_root forest.focus; let (_ : int) = print_symbols forest.suffix in if String.length forest.comment > 0 then begin Buffer.add_string buffer "// "; Buffer.add_string buffer forest.comment end; Buffer.add_char buffer '\n'; print_focus_remainder offset forest.focus let print_tree_root = function | TEmpty -> Buffer.add_string buffer ". " | TRooted (symbol, _) -> let (_ : int) = print_symbol symbol in () let rec print_forest offset forest = print_level print_tree_root print_tree_remainder offset forest and print_tree_remainder offset = function | TEmpty -> () | TRooted (_, forest) -> print_forest offset forest let print_ctree_root = function | CRooted (symbol, _) -> let (_ : int) = print_symbol symbol in () let rec print_cforest offset cforest = match cforest with | CHole -> print_blank offset; Buffer.add_string buffer "(?)\n" | CCons forest -> print_level print_ctree_root print_ctree_remainder offset forest and print_ctree_remainder offset = function | CRooted (_, cforest) -> print_cforest offset cforest let wrap print channel x = Buffer.clear buffer; print 0 x; Buffer.output_buffer channel buffer let print = wrap print_forest let printc = wrap print_cforest (* -------------------------------------------------------------------------- *) (* [punch] turns a (tree or forest) into a pair of a (tree or forest) context and a residual forest, where the context is chosen maximal. In other words, the residual forest consists of a single level -- its focus is [TEmpty]. *) let rec punch_tree tree : (ctree * forest) option = match tree with | TEmpty -> None | TRooted (symbol, forest) -> let forest1, forest2 = punch_forest forest in Some (CRooted (symbol, forest1), forest2) and punch_forest forest : cforest * forest = match punch_tree forest.focus with | None -> CHole, forest | Some (ctree1, forest2) -> CCons { prefix = forest.prefix; focus = ctree1; suffix = forest.suffix; comment = forest.comment }, forest2 (* [fill] fills a (tree or forest) context with a forest so as to produce a new (tree or forest). *) let rec fill_tree ctree1 forest2 : tree = match ctree1 with | CRooted (symbol1, cforest1) -> TRooted (symbol1, fill_forest cforest1 forest2) and fill_forest cforest1 forest2 : forest = match cforest1 with | CHole -> forest2 | CCons level1 -> { prefix = level1.prefix; focus = fill_tree level1.focus forest2; suffix = level1.suffix; comment = level1.comment } (* [common] factors the maximal common (tree or forest) context out of a pair of a (tree or forest) context and a (tree or forest). It returns the (tree or forest) context as well as the residuals of the two parameters. *) let rec common_tree ctree1 tree2 : (ctree * cforest * forest) option = match ctree1, tree2 with | CRooted _, TEmpty -> None | CRooted (symbol1, cforest1), TRooted (symbol2, forest2) -> if Symbol.equal symbol1 symbol2 then let cforest, cforest1, forest2 = common_forest cforest1 forest2 in Some (CRooted (symbol1, cforest), cforest1, forest2) else None and common_forest cforest1 forest2 : cforest * cforest * forest = match cforest1 with | CHole -> CHole, cforest1, forest2 | CCons forest1 -> if Symbol.lequal forest1.prefix forest2.prefix && Symbol.lequal forest1.suffix forest2.suffix && forest1.comment = forest2.comment then begin match common_tree forest1.focus forest2.focus with | None -> CHole, cforest1, forest2 | Some (ctree, csubforest1, subforest2) -> let cforest = { prefix = forest1.prefix; focus = ctree; suffix = forest1.suffix; comment = forest1.comment } in CCons cforest, csubforest1, subforest2 end else CHole, cforest1, forest2 (* [factor] factors the maximal common forest context out of a nonempty family of forests. We assume that the family is represented as a map indexed by items, because this is convenient for the application that we have in mind, but this assumption is really irrelevant. *) let factor forests = match Item.Map.fold (fun item forest accu -> match accu with | None -> (* First time through the loop, so [forest] is the first forest that we examine. Punch it, so as to produce a maximal forest context and a residual forest. *) let context, residual = punch_forest forest in Some (context, Item.Map.singleton item residual) | Some (context, residuals) -> (* Another iteration through the loop. [context] and [residuals] are the maximal common context and the residuals of the forests examined so far. *) (* Combine the common context obtained so far with the forest at hand. This yields a new, smaller common context, as well as residuals for the previous common context and for the forest at hand. *) let context, contextr, forestr = common_forest context forest in (* The residual forests are now: (i) the residual forest [forestr]; and (ii) the previous residual forests [residuals], each of which must be placed with the residual context [contextr]. *) let residuals = Item.Map.add item forestr (Item.Map.map (fill_forest contextr) residuals) in Some (context, residuals) ) forests None with | None -> assert false (* parameter [forests] was an empty map *) | Some (context, residuals) -> context, residuals menhir-20171222/src/codeBits.mli0000664000175000017500000000633113217215730016544 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module provides a number of tiny functions that help produce [IL] code. *) open IL (* A list subject to a condition. (Be careful, though: the list is of course constructed even if the condition is false.) *) val listif: bool -> 'a list -> 'a list val elementif: bool -> 'a -> 'a list (* A lazy version of [listif], where the list is constructed only if the condition is true. *) val listiflazy: bool -> (unit -> 'a list) -> 'a list (* Standard types. *) val tunit: typ val tbool: typ val tint: typ val tstring: typ val texn: typ val tposition: typ val tlexbuf: typ val tobj : typ (* Building a type variable. *) val tvar: string -> typ (* Building a type scheme. *) val scheme: string list -> typ -> typescheme val type2scheme: typ -> typescheme (* Projecting out of a [PVar] pattern. *) val pat2var: pattern -> string (* Building a [let] construct, with on-the-fly simplification. *) val blet: (pattern * expr) list * expr -> expr val mlet: pattern list -> expr list -> expr -> expr (* Simulating a [let/and] construct. *) val eletand: (pattern * expr) list * expr -> expr (* [eraisenotfound] is an expression that raises [Not_found]. *) val eraisenotfound: expr (* [bottom] is an expression that has every type. Its semantics is irrelevant. *) val bottom: expr (* Boolean constants. *) val etrue: expr val efalse: expr val eboolconst: bool -> expr (* Option constructors. *) val enone: expr val esome: expr -> expr (* List constructors. *) val elist: expr list -> expr (* Integer constants as patterns. *) val pint: int -> pattern (* These help build function types. *) val arrow: typ -> typ -> typ val arrowif: bool -> typ -> typ -> typ val marrow: typ list -> typ -> typ (* These functions are used to generate names in menhir's namespace. *) val prefix: string -> string val dataprefix: string -> string val tvprefix: string -> string (* Converting an interface to a structure. Only exception and type definitions go through. *) val interface_to_structure: interface -> structure (* Constructing a named module type together with a list of "with type" constraints. *) val with_types: IL.with_kind -> string -> (string list * string * IL.typ) list -> IL.module_type (* Functor applications. *) val mapp: IL.modexpr -> IL.modexpr list -> IL.modexpr menhir-20171222/src/listMonad.mli0000664000175000017500000000242413217215730016741 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (** Monad type which represents a list of results. *) type 'a m = 'a list (** [bind x f] applies [f] to a list of results, returning a list of results. *) val bind: 'a m -> ('a -> 'b m) -> 'b m val ( >>= ) : 'a m -> ('a -> 'b m) -> 'b m (** [return x] is the left and right unit of [bind]. *) val return: 'a -> 'a m menhir-20171222/src/action.mli0000664000175000017500000000577013217215730016273 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Keyword (** Semantic action's type. *) type t (** [compose x a1 a2] builds the action [let x = a1 in a2]. This feature is used during the processing of the %inline keyword. *) val compose : string -> t -> t -> t (* [define keyword keywords f action] defines away the keyword [keyword]. It is removed from the set of keywords of this semantic action; the set [keywords] is added in its place. The body of the semantic action is transformed by the function [f], which typically wraps it in some new [let] bindings. *) val define: keyword -> KeywordSet.t -> (IL.expr -> IL.expr) -> t -> t (* Variable-to-variable substitutions, used by [rename], below. *) type subst = (string * string) list (* [Subject/where] pairs, as defined in [Keyword], encode a position keyword. *) type sw = subject * where (** [rename f phi a] applies to the semantic action [a] the renaming [phi] as well as the transformations decided by the function [f]. The function [f] is applied to each (not-yet-renamed) keyword and may decide to transform it, by returning [Some _], or to not transform it, by returning [None]. (In the latter case, [phi] still applies to the keyword.) *) val rename: (sw -> sw option) -> subst -> t -> t (** Semantic actions are translated into [IL] code using the [IL.ETextual] and [IL.ELet] constructors. *) val to_il_expr: t -> IL.expr (** A semantic action might be the inlining of several others. The filenames of the different parts are given by [filenames a]. This can be used, for instance, to check whether all parts come from the standard library. *) val filenames: t -> string list (** [keywords a] is the set of keywords used in the semantic action [a]. *) val keywords: t -> KeywordSet.t (** [from_stretch s] builds an action out of a textual piece of code. *) val from_stretch: Stretch.t -> t (** Test whether the keyword [$syntaxerror] is used in the action. *) val has_syntaxerror: t -> bool (** Test whether the keyword [$endpos($0)] is used in the action. *) val has_beforeend: t -> bool menhir-20171222/src/PackedIntArray.ml0000664000175000017500000001411713217215730017501 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* A packed integer array is represented as a pair of an integer [k] and a string [s]. The integer [k] is the number of bits per integer that we use. The string [s] is just an array of bits, which is read in 8-bit chunks. *) (* The ocaml programming language treats string literals and array literals in slightly different ways: the former are statically allocated, while the latter are dynamically allocated. (This is rather arbitrary.) In the context of Menhir's table-based back-end, where compact, immutable integer arrays are needed, ocaml strings are preferable to ocaml arrays. *) type t = int * string (* The magnitude [k] of an integer [v] is the number of bits required to represent [v]. It is rounded up to the nearest power of two, so that [k] divides [Sys.word_size]. *) let magnitude (v : int) = if v < 0 then Sys.word_size else let rec check k max = (* [max] equals [2^k] *) if (max <= 0) || (v < max) then k (* if [max] just overflew, then [v] requires a full ocaml integer, and [k] is the number of bits in an ocaml integer plus one, that is, [Sys.word_size]. *) else check (2 * k) (max * max) in check 1 2 (* [pack a] turns an array of integers into a packed integer array. *) (* Because the sign bit is the most significant bit, the magnitude of any negative number is the word size. In other words, [pack] does not achieve any space savings as soon as [a] contains any negative numbers, even if they are ``small''. *) let pack (a : int array) : t = let m = Array.length a in (* Compute the maximum magnitude of the array elements. This tells us how many bits per element we are going to use. *) let k = Array.fold_left (fun k v -> max k (magnitude v) ) 1 a in (* Because access to ocaml strings is performed on an 8-bit basis, two cases arise. If [k] is less than 8, then we can pack multiple array entries into a single character. If [k] is greater than 8, then we must use multiple characters to represent a single array entry. *) if k <= 8 then begin (* [w] is the number of array entries that we pack in a character. *) assert (8 mod k = 0); let w = 8 / k in (* [n] is the length of the string that we allocate. *) let n = if m mod w = 0 then m / w else m / w + 1 in let s = Bytes.create n in (* Define a reader for the source array. The reader might run off the end if [w] does not divide [m]. *) let i = ref 0 in let next () = let ii = !i in if ii = m then 0 (* ran off the end, pad with zeroes *) else let v = a.(ii) in i := ii + 1; v in (* Fill up the string. *) for j = 0 to n - 1 do let c = ref 0 in for _x = 1 to w do c := (!c lsl k) lor next() done; Bytes.set s j (Char.chr !c) done; (* Done. *) k, Bytes.unsafe_to_string s end else begin (* k > 8 *) (* [w] is the number of characters that we use to encode an array entry. *) assert (k mod 8 = 0); let w = k / 8 in (* [n] is the length of the string that we allocate. *) let n = m * w in let s = Bytes.create n in (* Fill up the string. *) for i = 0 to m - 1 do let v = ref a.(i) in for x = 1 to w do Bytes.set s ((i + 1) * w - x) (Char.chr (!v land 255)); v := !v lsr 8 done done; (* Done. *) k, Bytes.unsafe_to_string s end (* Access to a string. *) let read (s : string) (i : int) : int = Char.code (String.unsafe_get s i) (* [get1 t i] returns the integer stored in the packed array [t] at index [i]. It assumes (and does not check) that the array's bit width is [1]. The parameter [t] is just a string. *) let get1 (s : string) (i : int) : int = let c = read s (i lsr 3) in let c = c lsr ((lnot i) land 0b111) in let c = c land 0b1 in c (* [get t i] returns the integer stored in the packed array [t] at index [i]. *) (* Together, [pack] and [get] satisfy the following property: if the index [i] is within bounds, then [get (pack a) i] equals [a.(i)]. *) let get ((k, s) : t) (i : int) : int = match k with | 1 -> get1 s i | 2 -> let c = read s (i lsr 2) in let c = c lsr (2 * ((lnot i) land 0b11)) in let c = c land 0b11 in c | 4 -> let c = read s (i lsr 1) in let c = c lsr (4 * ((lnot i) land 0b1)) in let c = c land 0b1111 in c | 8 -> read s i | 16 -> let j = 2 * i in (read s j) lsl 8 + read s (j + 1) | _ -> assert (k = 32); (* 64 bits unlikely, not supported *) let j = 4 * i in (((read s j lsl 8) + read s (j + 1)) lsl 8 + read s (j + 2)) lsl 8 + read s (j + 3) (* [unflatten1 (n, data) i j] accesses the two-dimensional bitmap represented by [(n, data)] at indices [i] and [j]. The integer [n] is the width of the bitmap; the string [data] is the second component of the packed array obtained by encoding the table as a one-dimensional array. *) let unflatten1 (n, data) i j = get1 data (n * i + j) menhir-20171222/src/time.mli0000664000175000017500000000266513217215730015754 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* Call [tick msg] to stop timing a task and start timing the next task. A message is displayed. The message includes [msg] as well as timing information. The very first task is deemed to begin when this module is initialized. *) val tick: string -> unit (* Another timing method, with separate chronometers; useful for more precise profiling. *) type chrono val fresh: unit -> chrono val chrono: chrono -> (unit -> 'a) -> 'a val display: chrono -> string -> unit menhir-20171222/src/FixSolver.mli0000664000175000017500000000320013217215730016721 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) module Make (M : Fix.IMPERATIVE_MAPS) (P : sig include Fix.PROPERTY val union: property -> property -> property end) : sig (* Variables and constraints. A constraint is an inequality between a constant or a variable, on the left-hand side, and a variable, on the right-hand side. *) type variable = M.key type property = P.property (* An imperative interface, where we create a new constraint system, and are given three functions to add constraints and (once we are done adding) to solve the system. *) val create: unit -> (property -> variable -> unit) * (variable -> variable -> unit) * (unit -> (variable -> property)) end menhir-20171222/src/CheckSafeParameterizedGrammar.ml0000664000175000017500000001566013217215730022504 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) let value = Positions.value open Syntax (* This test accepts a parameterized grammar, with the restriction that all parameters must have sort [*]. This implies that the head of every application must be a toplevel nonterminal symbol: it cannot be a formal parameter of the current rule. *) (* -------------------------------------------------------------------------- *) (* This flag causes graph edges to be logged on the standard error channel. *) let debug = false (* -------------------------------------------------------------------------- *) (* For syntactic convenience, the code is wrapped in a functor. *) module Run (G : sig val g : grammar end) = struct open G (* -------------------------------------------------------------------------- *) (* We build a graph whose vertices are all formal parameters of all rules. A formal parameter is represented as a pair of a nonterminal symbol and a 0-based integer index (the number of this parameter within this rule). We use OCaml's generic equality and hash functions at this type. *) type formal = symbol * int let formals (nt, rule) : formal list = let arity = List.length rule.pr_parameters in Misc.mapi arity (fun i -> nt, i) let formals : formal array = StringMap.bindings g.p_rules |> List.map formals |> List.concat |> Array.of_list (* -------------------------------------------------------------------------- *) (* The graph edges are as follows. First, for every rule of the following form: F(..., X, ...): # where X is the i-th formal parameter of F ... G(..., X, ...) ... # where X is the j-th actual parameter of G there is a "safe" edge from the formal parameter F/i to the formal G/j. This reflects the fact that there is a flow from F/i to G/j. It is "safe" in the sense that it is not size-increasing: the same parameter X is passed from F to G. Second, for every rule of the following form: F(..., X, ...): # where X is the i-th formal parameter of F ... G(..., H(..., X, ...) , ...) ... # where H(...) is the j-th actual parameter of G there is a "dangerous" edge from the formal parameter F/i to the formal G/j. This reflects the fact that there is a flow from F/i to G/j. This flow is "dangerous" in the sense that it is size-increasing: X is transformed to H(..., X, ...). *) type edge = | Safe | Dangerous let successors_parameter (f : edge -> formal -> unit) x (param : parameter) = match param with | ParameterVar _ -> (* This is not an application. No successors. *) () | ParameterApp (sym, params) -> let nt = value sym in (* If [x] occurs in the [i]-th actual parameter of this application, then there is an edge to the formal [nt, i]. Whether it is a safe or dangerous edge depends on whether [x] occurs shallow or deep. *) List.iteri (fun i param -> if Parameters.occurs_shallow x param then f Safe (nt, i) else if Parameters.occurs_deep x param then f Dangerous (nt, i) ) params | ParameterAnonymous _ -> assert false let successors_producer f x ((_, param, _) : producer) = successors_parameter f x param let successors_branch f x (branch : parameterized_branch) = List.iter (successors_producer f x) branch.pr_producers let successors f ((nt, i) : formal) = let rule = try StringMap.find nt g.p_rules with Not_found -> assert false in let x = try List.nth rule.pr_parameters i with Failure _ -> assert false in List.iter (successors_branch f x) rule.pr_branches (* -------------------------------------------------------------------------- *) (* We now have a full description of the graph. *) module G = struct type node = formal let n = Array.length formals let index = Misc.inverse formals let successors f = successors (fun _ target -> f target) let iter f = Array.iter f formals end (* -------------------------------------------------------------------------- *) (* Display the graph. *) let () = if debug then G.iter (fun (x, i) -> successors (fun edge (y, j) -> let kind = match edge with Safe -> "safe" | Dangerous -> "dangerous" in Printf.eprintf "%s/%d ->(%s) %s/%d\n" x i kind y j ) (x, i) ) (* -------------------------------------------------------------------------- *) (* Compute its strongly connected components, ignoring the distinction between safe and dangerous edges. *) module T = Tarjan.Run(G) (* -------------------------------------------------------------------------- *) (* The safety criterion is: no dangerous edge is part of a cycle. Indeed, if this criterion is satisfied, then expansion must terminate: only a finite number of well-sorted terms (involving toplevel symbols and applications) can arise. (This sentence is not a proof!) Conversely, if a dangerous edge appears in a cycle, then expansion will not terminate. (That is, unless the dangerous cycle is unreachable. We choose to reject it anyway in that case.) In other words, this criterion is sound and complete. *) (* Checking that no dangerous edge is part of a cycle is done by examining the source and destination of every dangerous edge and ensuring that they lie in distinct components. *) let () = G.iter (fun source -> successors (fun edge target -> match edge with | Safe -> () | Dangerous -> if T.representative source = T.representative target then let (nt, i) = source in Error.error [] "the parameterized nonterminal symbols in this grammar\n\ cannot be expanded away: expansion would not terminate.\n\ The %s formal parameter of \"%s\" grows without bound." (Misc.nth (i + 1)) nt ) source ) end (* of the functor *) (* -------------------------------------------------------------------------- *) (* Re-package the above functor as a function. *) let check g = let module T = Run(struct let g = g end) in () menhir-20171222/src/General.ml0000664000175000017500000000420613217215730016213 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* --------------------------------------------------------------------------- *) (* Lists. *) let rec take n xs = match n, xs with | 0, _ | _, [] -> [] | _, (x :: xs as input) -> let xs' = take (n - 1) xs in if xs == xs' then input else x :: xs' let rec drop n xs = match n, xs with | 0, _ -> xs | _, [] -> [] | _, _ :: xs -> drop (n - 1) xs let rec uniq1 cmp x ys = match ys with | [] -> [] | y :: ys -> if cmp x y = 0 then uniq1 compare x ys else y :: uniq1 cmp y ys let uniq cmp xs = match xs with | [] -> [] | x :: xs -> x :: uniq1 cmp x xs let weed cmp xs = uniq cmp (List.sort cmp xs) (* --------------------------------------------------------------------------- *) (* Streams. *) type 'a stream = 'a head Lazy.t and 'a head = | Nil | Cons of 'a * 'a stream (* The length of a stream. *) let rec length xs = match Lazy.force xs with | Nil -> 0 | Cons (_, xs) -> 1 + length xs (* Folding over a stream. *) let rec foldr f xs accu = match Lazy.force xs with | Nil -> accu | Cons (x, xs) -> f x (foldr f xs accu) menhir-20171222/src/Drop.mli0000664000175000017500000000235713217215730015720 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This function translates a grammar from the [Syntax] format to the [UnparameterizedSyntax] format. Naturally, the grammar must not have any parameterized symbols, since these are not allowed by the latter format. *) val drop: Syntax.grammar -> UnparameterizedSyntax.grammar menhir-20171222/src/option.ml0000664000175000017500000000310713217215730016145 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) let map f = function | None -> None | Some x -> Some (f x) let iter f o = match o with | None -> () | Some x -> f x let fold f o accu = match o with | None -> accu | Some x -> f x accu let project = function | Some x -> x | None -> (* Presumably, an error message has already been printed. *) exit 1 let equal equal o1 o2 = match o1, o2 with | None, None -> true | Some x1, Some x2 -> equal x1 x2 | None, Some _ | Some _, None -> false let hash hash = function | Some x -> hash x | None -> Hashtbl.hash None menhir-20171222/src/Boolean.mli0000664000175000017500000000205513217215730016366 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) include Fix.PROPERTY with type property = bool val union: property -> property -> property menhir-20171222/src/item.mli0000664000175000017500000000575713217215730015761 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar (* An LR(0) item encodes a pair of integers, namely the index of the production and the index of the bullet in the production's right-hand side. *) type t val import: Production.index * int -> t val export: t -> Production.index * int (* An item can be encoded as an integer. This is used in the table back-end only. The decoding function (really a copy of [export]) is in [TableInterpreter]. *) val marshal: t -> int (* Comparison. *) val equal: t -> t -> bool (* [def item] looks up the production associated with this item in the grammar and returns [prod, nt, rhs, pos, length], where [prod] is the production's index, [nt] and [rhs] represent the production, [pos] is the position of the bullet in the item, and [length] is the length of the production's right-hand side. *) val def: t -> Production.index * Nonterminal.t * Symbol.t array * int * int (* If [item] is a start item, [startnt item] returns the start nonterminal that corresponds to [item]. *) val startnt: t -> Nonterminal.t (* Printing. *) val print: t -> string (* Classifying items as shift or reduce items. A shift item is one where the bullet can still advance. A reduce item is one where the bullet has reached the end of the right-hand side. *) type kind = | Shift of Symbol.t * t | Reduce of Production.index val classify: t -> kind (* Sets of items and maps over items. Hashing these data structures is specifically allowed. *) module Set : GSet.S with type element = t module Map : GMap.S with type key = t and type Domain.t = Set.t (* This functor performs precomputation that helps efficiently compute the closure of an LR(0) or LR(1) state. The precomputation requires time linear in the size of the grammar. The nature of the lookahead sets remains abstract. *) module Closure (L : Lookahead.S) : sig (* A state maps items to lookahead information. *) type state = L.t Map.t (* This takes the closure of a state through all epsilon transitions. *) val closure: state -> state end menhir-20171222/src/cst.ml0000664000175000017500000000675413217215730015441 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar (* Concrete syntax trees. *) (* A concrete syntax tree is one of a leaf -- which corresponds to a terminal symbol; a node -- which corresponds to a non-terminal symbol, and whose immediate descendants form an expansion of that symbol; or an error leaf -- which corresponds to a point where the [error] pseudo-token was shifted. *) type cst = | CstTerminal of Terminal.t | CstNonTerminal of Production.index * cst array | CstError (* This is a (mostly) unambiguous printer for concrete syntax trees, in an sexp-like notation. *) let rec pcst b = function | CstTerminal tok -> (* A leaf is denoted by a terminal symbol. *) Printf.bprintf b "%s" (Terminal.print tok) | CstNonTerminal (prod, csts) -> (* A node is denoted by a bracketed, whitespace-separated list, whose head is a non-terminal symbol (followed with a colon) and whose tail consists of the node's descendants. *) (* There is in fact some ambiguity in this notation, since we only print the non-terminal symbol that forms the left-hand side of production [prod], instead of the production itself. This abuse makes things much more readable, and should be acceptable for the moment. The cases where ambiguity actually arises should be rare. *) Printf.bprintf b "[%s:%a]" (Nonterminal.print false (Production.nt prod)) pcsts csts | CstError -> (* An error leaf is denoted by [error]. *) Printf.bprintf b "error" and pcsts b (csts : cst array) = Array.iter (fun cst -> Printf.bprintf b " %a" pcst cst ) csts (* This is the public interface. *) let wrap print f x = let b = Buffer.create 32768 in print b x; Buffer.output_buffer f b let print = wrap pcst (* This is a pretty-printer for concrete syntax trees. The notation is the same as that used by the above printer; the only difference is that the [Pprint] library is used to manage indentation. *) open Pprint let rec build : cst -> document = function | CstTerminal tok -> text (Terminal.print tok) | CstNonTerminal (prod, csts) -> brackets ( group ( text (Nonterminal.print false (Production.nt prod)) ^^ colon ^^ group ( nest 2 ( Array.fold_left (fun doc cst -> doc ^^ break1 ^^ build cst ) empty csts ) ) ^^ break0 ) ) | CstError -> text "error" let show f cst = Channel.pretty 0.8 80 f (build cst) menhir-20171222/src/ErrorReports.ml0000664000175000017500000000521313217215730017305 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* -------------------------------------------------------------------------- *) (* A two-place buffer stores zero, one, or two elements. *) type 'a content = | Zero | One of 'a | Two of 'a * (* most recent: *) 'a type 'a buffer = 'a content ref (* [update buffer x] pushes [x] into [buffer], causing the buffer to slide. *) let update buffer x = buffer := match !buffer, x with | Zero, _ -> One x | One x1, x2 | Two (_, x1), x2 -> Two (x1, x2) (* [show f buffer] prints the contents of the buffer. The function [f] is used to print an element. *) let show f buffer : string = match !buffer with | Zero -> (* The buffer cannot be empty. If we have read no tokens, we cannot have detected a syntax error. *) assert false | One invalid -> (* It is unlikely, but possible, that we have read just one token. *) Printf.sprintf "before '%s'" (f invalid) | Two (valid, invalid) -> (* In the most likely case, we have read two tokens. *) Printf.sprintf "after '%s' and before '%s'" (f valid) (f invalid) (* [last buffer] returns the last element of the buffer (that is, the invalid token). *) let last buffer = match !buffer with | Zero -> (* The buffer cannot be empty. If we have read no tokens, we cannot have detected a syntax error. *) assert false | One invalid | Two (_, invalid) -> invalid (* [wrap buffer lexer] *) open Lexing let wrap lexer = let buffer = ref Zero in buffer, fun lexbuf -> let token = lexer lexbuf in update buffer (lexbuf.lex_start_p, lexbuf.lex_curr_p); token (* -------------------------------------------------------------------------- *) menhir-20171222/src/Unifier.mli0000664000175000017500000000536513217215730016417 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module provides a simple-minded implementation of first-order unification over an arbitrary signature. *) (* -------------------------------------------------------------------------- *) (* The signature must be described by the client, as follows. *) module type STRUCTURE = sig (* The type ['a structure] should be understood as a type of shallow terms whose leaves have type ['a]. *) type 'a structure val map: ('a -> 'b) -> 'a structure -> 'b structure val iter: ('a -> unit) -> 'a structure -> unit (* [iter2] fails if the head constructors differ. *) exception Iter2 val iter2: ('a -> 'b -> unit) -> 'a structure -> 'b structure -> unit end (* -------------------------------------------------------------------------- *) (* The unifier. *) module Make (S : STRUCTURE) : sig (* The type of unification variables. *) type variable (* [fresh s] creates a fresh variable that carries the structure [s]. *) val fresh: variable S.structure option -> variable (* [structure x] returns the structure (currently) carried by variable [x]. *) val structure: variable -> variable S.structure option (* [unify x y] attempts to unify the terms represented by the variables [x] and [y]. The creation of cycles is not permitted; an eager occurs check rules them out. *) exception Unify of variable * variable exception Occurs of variable * variable val unify: variable -> variable -> unit (* This is the type of deep terms over the signature [S]. *) type term = | TVar of int (* the variable's unique identity *) | TNode of term S.structure (* [decode x] turns the variable [x] into the term that it represents. Sharing is lost, so this operation can in the worst case have exponential cost. *) val decode: variable -> term end menhir-20171222/src/InspectionTableInterpreter.ml0000664000175000017500000002520613217215730022150 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* -------------------------------------------------------------------------- *) (* The type functor. *) module Symbols (T : sig type 'a terminal type 'a nonterminal end) = struct open T (* This should be the only place in the whole library (and generator!) where these types are defined. *) type 'a symbol = | T : 'a terminal -> 'a symbol | N : 'a nonterminal -> 'a symbol type xsymbol = | X : 'a symbol -> xsymbol end (* -------------------------------------------------------------------------- *) (* The code functor. *) module Make (TT : TableFormat.TABLES) (IT : InspectionTableFormat.TABLES with type 'a lr1state = int) (ET : EngineTypes.TABLE with type terminal = int and type nonterminal = int and type semantic_value = Obj.t) (E : sig type 'a env = (ET.state, ET.semantic_value, ET.token) EngineTypes.env end) = struct (* Including [IT] is an easy way of inheriting the definitions of the types [symbol] and [xsymbol]. *) include IT (* This auxiliary function decodes a packed linearized array, as created by [TableBackend.linearize_and_marshal1]. Here, we read a row all at once. *) let read_packed_linearized (data, entry : PackedIntArray.t * PackedIntArray.t) (i : int) : int list = LinearizedArray.read_row_via (PackedIntArray.get data) (PackedIntArray.get entry) i (* This auxiliary function decodes a symbol. The encoding was done by [encode_symbol] or [encode_symbol_option] in the table back-end. *) let decode_symbol (symbol : int) : IT.xsymbol = (* If [symbol] is 0, then we have no symbol. This could mean e.g. that the function [incoming_symbol] has been applied to an initial state. In principle, this cannot happen. *) assert (symbol > 0); (* The low-order bit distinguishes terminal and nonterminal symbols. *) let kind = symbol land 1 in let symbol = symbol lsr 1 in if kind = 0 then IT.terminal (symbol - 1) else IT.nonterminal symbol (* These auxiliary functions convert a symbol to its integer code. For speed and for convenience, we use an unsafe type cast. This relies on the fact that the data constructors of the [terminal] and [nonterminal] GADTs are declared in an order that reflects their internal code. In the case of nonterminal symbols, we add [start] to account for the presence of the start symbols. *) let n2i (nt : 'a IT.nonterminal) : int = let answer = TT.start + Obj.magic nt in (* For safety, check that the above cast produced a correct result. *) assert (IT.nonterminal answer = X (N nt)); answer let t2i (t : 'a IT.terminal) : int = let answer = Obj.magic t in (* For safety, check that the above cast produced a correct result. *) assert (IT.terminal answer = X (T t)); answer (* Ordering functions. *) let compare_terminals t1 t2 = (* Subtraction is safe because overflow is impossible. *) t2i t1 - t2i t2 let compare_nonterminals nt1 nt2 = (* Subtraction is safe because overflow is impossible. *) n2i nt1 - n2i nt2 let compare_symbols symbol1 symbol2 = match symbol1, symbol2 with | X (T _), X (N _) -> -1 | X (N _), X (T _) -> 1 | X (T t1), X (T t2) -> compare_terminals t1 t2 | X (N nt1), X (N nt2) -> compare_nonterminals nt1 nt2 let compare_productions prod1 prod2 = (* Subtraction is safe because overflow is impossible. *) prod1 - prod2 let compare_items (prod1, index1) (prod2, index2) = let c = compare_productions prod1 prod2 in (* Subtraction is safe because overflow is impossible. *) if c <> 0 then c else index1 - index2 (* The function [incoming_symbol] goes through the tables [IT.lr0_core] and [IT.lr0_incoming]. This yields a representation of type [xsymbol], out of which we strip the [X] quantifier, so as to get a naked symbol. This last step is ill-typed and potentially dangerous. It is safe only because this function is used at type ['a lr1state -> 'a symbol], which forces an appropriate choice of ['a]. *) let incoming_symbol (s : 'a IT.lr1state) : 'a IT.symbol = let core = PackedIntArray.get IT.lr0_core s in let symbol = decode_symbol (PackedIntArray.get IT.lr0_incoming core) in match symbol with | IT.X symbol -> Obj.magic symbol (* The function [lhs] reads the table [TT.lhs] and uses [IT.nonterminal] to decode the symbol. *) let lhs prod = IT.nonterminal (PackedIntArray.get TT.lhs prod) (* The function [rhs] reads the table [IT.rhs] and uses [decode_symbol] to decode the symbol. *) let rhs prod = List.map decode_symbol (read_packed_linearized IT.rhs prod) (* The function [items] maps the LR(1) state [s] to its LR(0) core, then uses [core] as an index into the table [IT.lr0_items]. The items are then decoded by the function [export] below, which is essentially a copy of [Item.export]. *) type item = int * int let export t : item = (t lsr 7, t mod 128) let items s = (* Map [s] to its LR(0) core. *) let core = PackedIntArray.get IT.lr0_core s in (* Now use [core] to look up the table [IT.lr0_items]. *) List.map export (read_packed_linearized IT.lr0_items core) (* The function [nullable] maps the nonterminal symbol [nt] to its integer code, which it uses to look up the array [IT.nullable]. This yields 0 or 1, which we map back to a Boolean result. *) let decode_bool i = assert (i = 0 || i = 1); i = 1 let nullable nt = decode_bool (PackedIntArray.get1 IT.nullable (n2i nt)) (* The function [first] maps the symbols [nt] and [t] to their integer codes, which it uses to look up the matrix [IT.first]. *) let first nt t = decode_bool (PackedIntArray.unflatten1 IT.first (n2i nt) (t2i t)) let xfirst symbol t = match symbol with | X (T t') -> compare_terminals t t' = 0 | X (N nt) -> first nt t (* The function [foreach_terminal] exploits the fact that the first component of [TT.error] is [Terminal.n - 1], i.e., the number of terminal symbols, including [error] but not [#]. *) let rec foldij i j f accu = if i = j then accu else foldij (i + 1) j f (f i accu) let foreach_terminal f accu = let n, _ = TT.error in foldij 0 n (fun i accu -> f (IT.terminal i) accu ) accu let foreach_terminal_but_error f accu = let n, _ = TT.error in foldij 0 n (fun i accu -> if i = TT.error_terminal then accu else f (IT.terminal i) accu ) accu (* ------------------------------------------------------------------------ *) (* The following is the implementation of the function [feed]. This function is logically part of the LR engine, so it would be nice if it were placed in the module [Engine], but it must be placed here because, to ensure type safety, its arguments must be a symbol of type ['a symbol] and a semantic value of type ['a]. The type ['a symbol] is not available in [Engine]. It is available here. *) open EngineTypes open ET open E (* [feed] fails if the current state does not have an outgoing transition labeled with the desired symbol. This check is carried out at runtime. *) let feed_failure () = invalid_arg "feed: outgoing transition does not exist" (* Feeding a nonterminal symbol [nt]. Here, [nt] has type [nonterminal], which is a synonym for [int], and [semv] has type [semantic_value], which is a synonym for [Obj.t]. This type is unsafe, because pushing a semantic value of arbitrary type into the stack can later cause a semantic action to crash and burn. The function [feed] is given a safe type below. *) let feed_nonterminal (nt : nonterminal) startp (semv : semantic_value) endp (env : 'b env) : 'b env = (* Check if the source state has an outgoing transition labeled [nt]. This is done by consulting the [goto] table. *) let source = env.current in match ET.maybe_goto_nt source nt with | None -> feed_failure() | Some target -> (* Push a new cell onto the stack, containing the identity of the state that we are leaving. The semantic value [semv] and positions [startp] and [endp] contained in the new cell are provided by the caller. *) let stack = { state = source; semv; startp; endp; next = env.stack } in (* Move to the target state. *) { env with stack; current = target } let reduce _env _prod = feed_failure() let initiate _env = feed_failure() let feed_terminal (terminal : terminal) startp (semv : semantic_value) endp (env : 'b env) : 'b env = (* Check if the source state has an outgoing transition labeled [terminal]. This is done by consulting the [action] table. *) let source = env.current in ET.action source terminal semv (fun env _please_discard _terminal semv target -> (* There is indeed a transition toward the state [target]. Push a new cell onto the stack and move to the target state. *) let stack = { state = source; semv; startp; endp; next = env.stack } in { env with stack; current = target } ) reduce initiate env (* The type assigned to [feed] ensures that the type of the semantic value [semv] is appropriate: it must be the semantic-value type of the symbol [symbol]. *) let feed (symbol : 'a symbol) startp (semv : 'a) endp env = let semv : semantic_value = Obj.repr semv in match symbol with | N nt -> feed_nonterminal (n2i nt) startp semv endp env | T terminal -> feed_terminal (t2i terminal) startp semv endp env end menhir-20171222/src/resizableArray.ml0000664000175000017500000000757713217215730017633 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module implements resizable arrays, that is, arrays that can grow upon explicit request. *) type 'a t = { (* The default element is used to fill empty slots when growing or shrinking the physical array. *) default: 'a; (* The init function is used to initialize newly allocated slots when growing the logical array. *) init: int -> 'a; (* The logical size of this array. *) mutable size: int; (* The physical array, whose length is at least [size]. *) mutable table: 'a array } let make capacity default init = (* [capacity] must be nonzero, so that doubling it actually enlarges the array. *) assert (capacity >= 0); let capacity = if capacity = 0 then 1 else capacity in let table = Array.make capacity default in { default; init; size = 0; table } let make_ capacity default = make capacity default (fun _ -> default) let length a = a.size let get a i = assert (0 <= i && i < a.size); Array.unsafe_get a.table (i) let set a i x = assert (0 <= i && i < a.size); Array.unsafe_set a.table (i) x let shrink a s = (* This is [resize a s], assuming [0 <= s < a.size]. *) Array.fill a.table s (a.size - s) a.default; a.size <- s let grow a s = (* This is [resize a s], assuming [0 <= s && a.size < s]. *) let n = Array.length a.table in if s > n then begin (* The physical size of the array must increase. The new size is at least double of the previous size, and larger if requested. *) let table = Array.make (max (2 * n) s) a.default in Array.blit a.table 0 table 0 n; a.table <- table end; (* From [a.size] to [s], we have new logical slots. Initialize them. *) let init = a.init and table = a.table in for i = a.size to s - 1 do Array.unsafe_set table i (init i) done; (* Update the array's logical size. *) a.size <- s let resize a s = assert (0 <= s); if s < a.size then shrink a s else if s > a.size then grow a s let push a x = let s = a.size in (* equivalent to: [length a] *) begin (* equivalent to: [resize a (s + 1)] *) let s = s + 1 in let n = Array.length a.table in if s > n then begin (* assert (s = n + 1); *) (* assert (max (2 * n) s = 2 * n); *) let table = Array.make (2 * n) a.default in Array.blit a.table 0 table 0 n; a.table <- table end; (* No need to call [init], since there is just one new logical slot and we are about to write it anyway. *) a.size <- s end; Array.unsafe_set a.table (s) x (* equivalent to: [set a s x] *) let pop a = let s = a.size in (* equivalent to: [length a] *) assert (s > 0); let s = s - 1 in a.size <- s; let table = a.table in let x = Array.unsafe_get table (s) in (* equivalent to: [get a s] *) Array.unsafe_set table (s) a.default; (* equivalent to: [resize a s] *) x let default a = a.default menhir-20171222/src/Makefile0000664000175000017500000000566213217215730015753 0ustar fpottierfpottier.PHONY: everyday bootstrap stage1 stage2 stage3 sdk clean # ---------------------------------------------------------------------------- # Choose a target. ifndef TARGET TARGET := native endif # ---------------------------------------------------------------------------- # Ocamlbuild tool and settings. OCAMLBUILD := ocamlbuild -classic-display -j 0 ifeq ($(TARGET),byte) OCAMLBUILD := $(OCAMLBUILD) -byte-plugin endif # ---------------------------------------------------------------------------- # For everyday development. # Typing "make" will perform just stage 1. This is enough to ensure that # the source code is correct. everyday: installation.ml stage1 # ---------------------------------------------------------------------------- # Building Menhir from scratch (a.k.a. bootstrapping). bootstrap: @ $(MAKE) stage1 @ $(MAKE) stage2 @ $(MAKE) stage3 # ---------------------------------------------------------------------------- # Stage 1. # Build Menhir using ocamlyacc. stage1: @ $(OCAMLBUILD) -build-dir _stage1 menhir.$(TARGET) # ---------------------------------------------------------------------------- # Stage 2. # Build Menhir using Menhir (from stage 1). # Do not use . to refer to the current directory, because ocamlbuild # descends into another directory when executing commands. # Do not use $(shell pwd) either, because this assumes we are running # on a Unix platform, and can fail on Windows. # So, use .., which works fine if ocamlbuild has effectively descended # into a subdirectory. SRC := .. FLAGS := -v -lg 1 -la 1 -lc 1 --table --infer --stdlib $(SRC) --strict --fixed-exception --canonical stage2: @ $(OCAMLBUILD) -build-dir _stage2 -tag fancy_parser \ -use-menhir -menhir "$(SRC)/_stage1/menhir.$(TARGET) $(FLAGS)" \ menhir.$(TARGET) # ---------------------------------------------------------------------------- # Stage 3 (optional). # Re-generate Menhir's parser using Menhir (from stage 2) and check that it # is identical to the stage 2 parser. stage3: @ $(OCAMLBUILD) -build-dir _stage3 -tag fancy_parser \ -use-menhir -menhir "$(SRC)/_stage2/menhir.$(TARGET) $(FLAGS)" \ parser.ml parser.mli @ for i in parser.ml parser.mli ; do \ if ! diff _stage2/$$i _stage3/$$i 2>&1 >/dev/null ; then \ echo "Bootstrap FAILED: $$i did not reach a fixed point."; exit 1 ; \ fi ; \ done; \ echo "Bootstrap successful." # ------------------------------------------------------------------------- # The ocamlbuild targets that should be used to build menhirSdk. MENHIRSDK := menhirSdk.cmo ifneq ($(TARGET),byte) MENHIRSDK := $(MENHIRSDK) menhirSdk.cmx endif # ---------------------------------------------------------------------------- # Building menhirSdk. sdk: @ $(OCAMLBUILD) \ -build-dir _sdk \ -tag sdk \ $(MENHIRSDK) # ---------------------------------------------------------------------------- # Cleaning up. clean:: rm -rf _stage1 _stage2 _stage3 _sdk menhir-20171222/src/IL.mli0000664000175000017500000001443113217215730015314 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* Abstract syntax of the language used for code production. *) type interface = interface_item list and interface_item = (* Functor. Called [Make]. No functor if no parameters. Very ad hoc! *) | IIFunctor of Stretch.t list * interface (* Exception declarations. *) | IIExcDecls of excdef list (* Algebraic data type declarations (mutually recursive). *) | IITypeDecls of typedef list (* Value declarations. *) | IIValDecls of (string * typescheme) list (* Include directive. *) | IIInclude of module_type (* Submodule. *) | IIModule of string * module_type (* Comment. *) | IIComment of string and module_type = | MTNamedModuleType of string | MTWithType of module_type * string list * string * with_kind * typ | MTSigEnd of interface and with_kind = | WKNonDestructive (* = *) | WKDestructive (* := *) and excdef = { (* Name of the exception. *) excname: string; (* Optional equality. *) exceq: string option; } and typedef = { (* Name of the algebraic data type. *) typename: string; (* Type parameters. This is a list of type variable names, without the leading quote, which will be added by the pretty-printer. Can also be "_". *) typeparams: string list; (* Data constructors. *) typerhs: typedefrhs; (* Constraint. *) typeconstraint: (typ * typ) option } and typedefrhs = | TDefRecord of fielddef list | TDefSum of datadef list | TAbbrev of typ and fielddef = { (* Whether the field is mutable. *) modifiable: bool; (* Name of the field. *) fieldname: string; (* Type of the field. *) fieldtype: typescheme } and datadef = { (* Name of the data constructor. *) dataname: string; (* Types of the value parameters. *) datavalparams: typ list; (* Instantiated type parameters, if this is a GADT -- [None] if this is an ordinary ADT. *) datatypeparams: typ list option; } and typ = (* Textual OCaml type. *) | TypTextual of Stretch.ocamltype (* Type variable, without its leading quote. Can also be "_". *) | TypVar of string (* Application of an algebraic data type constructor. *) | TypApp of string * typ list (* Anonymous tuple. *) | TypTuple of typ list (* Arrow type. *) | TypArrow of typ * typ and typescheme = { (* Universal quantifiers, without leading quotes. *) quantifiers: string list; (* Body. *) body: typ; } and valdef = { (* Whether the value is public. Public values cannot be suppressed by the inliner. They serve as seeds for the dead code analysis. *) valpublic: bool; (* Definition's left-hand side. *) valpat: pattern; (* Value to which it is bound. *) valval: expr } and expr = (* Variable. *) | EVar of string (* Function. *) | EFun of pattern list * expr (* Function call. *) | EApp of expr * expr list (* Local definitions. This is a nested sequence of [let] definitions. *) | ELet of (pattern * expr) list * expr (* Case analysis. *) | EMatch of expr * branch list | EIfThen of expr * expr | EIfThenElse of expr * expr * expr (* Raising exceptions. *) | ERaise of expr (* Exception analysis. *) | ETry of expr * branch list (* Data construction. Tuples of length 1 are considered nonexistent, that is, [ETuple [e]] is considered the same expression as [e]. *) | EUnit | EIntConst of int | EStringConst of string | EData of string * expr list | ETuple of expr list (* Type annotation. *) | EAnnot of expr * typescheme (* Cheating on the typechecker. *) | EMagic of expr (* Obj.magic *) | ERepr of expr (* Obj.repr *) (* Records. *) | ERecord of (string * expr) list | ERecordAccess of expr * string | ERecordWrite of expr * string * expr (* Textual OCaml code. *) | ETextual of Stretch.t (* Comments. *) | EComment of string * expr | EPatComment of string * pattern * expr (* Arrays. *) | EArray of expr list | EArrayAccess of expr * expr and branch = { (* Branch pattern. *) branchpat: pattern; (* Branch body. *) branchbody: expr; } and pattern = (* Wildcard. *) | PWildcard (* Variable. *) | PVar of string (* Data deconstruction. Tuples of length 1 are considered nonexistent, that is, [PTuple [p]] is considered the same pattern as [p]. *) | PUnit | PData of string * pattern list | PTuple of pattern list | PRecord of (string * pattern) list (* Disjunction. *) | POr of pattern list (* Type annotation. *) | PAnnot of pattern * typ (* Module expressions. *) and modexpr = | MVar of string | MStruct of structure | MApp of modexpr * modexpr (* Structures. *) and program = structure and structure = structure_item list and structure_item = (* Functor. Called [Make]. No functor if no parameters. Very ad hoc! *) | SIFunctor of Stretch.t list * structure (* Exception definitions. *) | SIExcDefs of excdef list (* Algebraic data type definitions (mutually recursive). *) | SITypeDefs of typedef list (* Value definitions (mutually recursive or not, as per the flag). *) | SIValDefs of bool * valdef list (* Raw OCaml code. *) | SIStretch of Stretch.t list (* Sub-module definition. *) | SIModuleDef of string * modexpr (* Module inclusion. *) | SIInclude of modexpr (* Comment. *) | SIComment of string menhir-20171222/src/positions.mli0000664000175000017500000001053513217215730017040 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* TEMPORARY clean up this over-complicated API? *) (** Extension of standard library's positions. *) (** {2 Extended lexing positions} *) (** Abstract type for pairs of positions in the lexing stream. *) type t (** Decoration of a value with a position. *) type 'a located = { value : 'a; position : t; } (** [value dv] returns the raw value that underlies the decorated value [dv]. *) val value: 'a located -> 'a (** [position dv] returns the position that decorates the decorated value [dv]. *) val position: 'a located -> t (** [with_pos p v] decorates [v] with a position [p]. *) val with_pos : t -> 'a -> 'a located val with_cpos: Lexing.lexbuf -> 'a -> 'a located val with_poss : Lexing.position -> Lexing.position -> 'a -> 'a located val unknown_pos : 'a -> 'a located (** [map f v] extends the decoration from [v] to [f v]. *) val map: ('a -> 'b) -> 'a located -> 'b located (** [iter f dv] applies [f] to the value inside [dv]. *) val iter: ('a -> unit) -> 'a located -> unit (** [mapd f v] extends the decoration from [v] to both members of the pair [f v]. *) val mapd: ('a -> 'b1 * 'b2) -> 'a located -> 'b1 located * 'b2 located (** This value is used when an object does not from a particular input location. *) val dummy: t (** {2 Accessors} *) (** [column p] returns the number of characters from the beginning of the line of the Lexing.position [p]. *) val column : Lexing.position -> int (** [column p] returns the line number of to the Lexing.position [p]. *) val line : Lexing.position -> int (** [characters p1 p2] returns the character interval between [p1] and [p2] assuming they are located in the same line. *) val characters : Lexing.position -> Lexing.position -> int * int val start_of_position: t -> Lexing.position val end_of_position: t -> Lexing.position val filename_of_position: t -> string (** {2 Position handling} *) (** [join p1 p2] returns a position that starts where [p1] starts and stops where [p2] stops. *) val join : t -> t -> t val lex_join : Lexing.position -> Lexing.position -> t val ljoinf : ('a -> t) -> 'a list -> t val joinf : ('a -> t) -> 'a -> 'a -> t val join_located : 'a located -> 'b located -> ('a -> 'b -> 'c) -> 'c located val join_located_list : ('a located) list -> ('a list -> 'b list) -> ('b list) located (** [string_of_lex_pos p] returns a string representation for the lexing position [p]. *) val string_of_lex_pos : Lexing.position -> string (** [string_of_pos p] returns the standard (Emacs-like) representation of the position [p]. *) val string_of_pos : t -> string (** [pos_or_undef po] is the identity function except if po = None, in that case, it returns [undefined_position]. *) val pos_or_undef : t option -> t (** {2 Interaction with the lexer runtime} *) (** [cpos lexbuf] returns the current position of the lexer. *) val cpos : Lexing.lexbuf -> t (** [string_of_cpos p] returns a string representation of the lexer's current position. *) val string_of_cpos : Lexing.lexbuf -> string (* The functions that print error messages and warnings require a list of positions. The following auxiliary functions help build such lists. *) type positions = t list val one: Lexing.position -> positions val two: Lexing.position -> Lexing.position -> positions val lexbuf: Lexing.lexbuf -> positions (* Low-level printing function, for debugging. *) val print: Lexing.position -> unit menhir-20171222/src/Maps.mli0000664000175000017500000000640113217215730015706 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module defines three signatures for association maps, together with a number of conversion functors. *) (* Following the convention of the ocaml standard library, the [find] functions raise [Not_found] when the key is not a member of the domain of the map. By contrast, [get] returns an option. *) (* BEGIN PERSISTENT_MAPS *) module type PERSISTENT_MAPS = sig type key type 'data t val empty: 'data t val add: key -> 'data -> 'data t -> 'data t val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* END PERSISTENT_MAPS *) (* BEGIN IMPERATIVE_MAPS *) module type IMPERATIVE_MAPS = sig type key type 'data t val create: unit -> 'data t val clear: 'data t -> unit val add: key -> 'data -> 'data t -> unit val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* END IMPERATIVE_MAPS *) (* BEGIN IMPERATIVE_MAP *) module type IMPERATIVE_MAP = sig type key type data val set: key -> data -> unit val get: key -> data option end (* END IMPERATIVE_MAP *) (* An implementation of persistent maps can be made to satisfy the interface of imperative maps. An imperative map is represented as a persistent map, wrapped within a reference cell. *) module PersistentMapsToImperativeMaps (M : PERSISTENT_MAPS) : IMPERATIVE_MAPS with type key = M.key and type 'data t = 'data M.t ref (* An implementation of imperative maps can be made to satisfy the interface of a single imperative map. This map is obtained via a single call to [create]. *) module ImperativeMapsToImperativeMap (M : IMPERATIVE_MAPS) (D : sig type data end) : IMPERATIVE_MAP with type key = M.key and type data = D.data (* An implementation of imperative maps as arrays is possible if keys are consecutive integers. *) module ArrayAsImperativeMaps (K : sig val n: int end) : IMPERATIVE_MAPS with type key = int and type 'data t = 'data option array (* An implementation of imperative maps as a hash table. *) module HashTableAsImperativeMaps (H : Hashtbl.HashedType) : IMPERATIVE_MAPS with type key = H.t (* A trivial implementation of equality and hashing. *) module TrivialHashedType (T : sig type t end) : Hashtbl.HashedType with type t = T.t menhir-20171222/src/DependencyGraph.mli0000664000175000017500000000227413217215730020052 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* Build and print the forward reference graph of the grammar. There is an edge of a nonterminal symbol [nt1] to every nonterminal symbol [nt2] that occurs in the definition of [nt1]. *) val print_dependency_graph: unit -> unit menhir-20171222/src/partialGrammar.ml0000664000175000017500000006444013217215730017607 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Misc open Syntax open Positions (* ------------------------------------------------------------------------- *) (* This adds one declaration [decl], as found in file [filename], to the grammar [grammar]. *) let join_declaration filename (grammar : grammar) decl = match decl.value with (* Preludes are stored in an arbitrary order. The order of preludes within a single source file is preserved. Same treatment for functor parameters. *) | DCode code -> { grammar with p_preludes = grammar.p_preludes @ [ code ] } | DParameter (Stretch.Declared stretch) -> { grammar with p_parameters = grammar.p_parameters @ [ stretch ] } | DParameter (Stretch.Inferred _) -> assert false (* Token declarations are recorded. Things are made somewhat difficult by the fact that %token and %left-%right-%nonassoc declarations are independent. *) | DToken (ocamltype, terminal, attributes) -> let token_property = try (* Retrieve any previous definition for this token. *) let token_property = StringMap.find terminal grammar.p_tokens in (* If the previous definition was actually a %token declaration (as opposed to a %left, %right, or %nonassoc specification), signal an error. *) if token_property.tk_is_declared then Error.errorp decl "the token %s has multiple definitions." terminal; (* Otherwise, update the previous definition. *) { token_property with tk_is_declared = true; tk_ocamltype = ocamltype; tk_filename = filename; tk_position = decl.position; tk_attributes = attributes; } with Not_found -> (* If no previous definition exists, create one. *) { tk_filename = filename; tk_ocamltype = ocamltype; tk_associativity = UndefinedAssoc; tk_precedence = UndefinedPrecedence; tk_position = decl.position; tk_attributes = attributes; tk_is_declared = true } in { grammar with p_tokens = StringMap.add terminal token_property grammar.p_tokens } (* Start symbols. *) | DStart nonterminal -> { grammar with p_start_symbols = StringMap.add nonterminal decl.position grammar.p_start_symbols } (* Type declarations for nonterminals. *) | DType (ocamltype, nonterminal) -> { grammar with p_types = (nonterminal, with_pos (position decl) ocamltype)::grammar.p_types } (* Reductions on error for nonterminals. *) | DOnErrorReduce (nonterminal, prec) -> { grammar with p_on_error_reduce = (nonterminal, prec) :: grammar.p_on_error_reduce } (* Token associativity and precedence. *) | DTokenProperties (terminal, assoc, prec) -> (* Retrieve the property record for this token, creating one if none existed (but without deeming the token to have been declared). *) let token_properties, grammar = try StringMap.find terminal grammar.p_tokens, grammar with Not_found -> let p = { tk_filename = filename; tk_ocamltype = None; tk_associativity = UndefinedAssoc; tk_precedence = prec; tk_is_declared = false; tk_attributes = []; (* Will be updated later. *) tk_position = decl.position; } in p, { grammar with p_tokens = StringMap.add terminal p grammar.p_tokens } in (* Reject duplicate precedence declarations. *) if token_properties.tk_associativity <> UndefinedAssoc then Error.error [ decl.position; token_properties.tk_position ] "there are multiple precedence declarations for token %s." terminal; (* Record the new declaration. *) token_properties.tk_precedence <- prec; token_properties.tk_associativity <- assoc; grammar | DGrammarAttribute attr -> { grammar with p_grammar_attributes = attr :: grammar.p_grammar_attributes } | DSymbolAttributes (actuals, attrs) -> { grammar with p_symbol_attributes = (actuals, attrs) :: grammar.p_symbol_attributes } (* ------------------------------------------------------------------------- *) (* This stores an optional postlude into a grammar. Postludes are stored in an arbitrary order. *) let join_postlude postlude grammar = match postlude with | None -> grammar | Some postlude -> { grammar with p_postludes = postlude :: grammar.p_postludes } (* ------------------------------------------------------------------------- *) (* We rewrite definitions when nonterminals are renamed. The renaming [phi] is an association list of names to names. *) type renaming = (nonterminal * nonterminal) list let identity_renaming = [] let rewrite_nonterminal (phi : renaming) nonterminal = Misc.support_assoc phi nonterminal let rewrite_parameter phi parameter = Parameters.map (Positions.map (Misc.support_assoc phi)) parameter let rewrite_producer phi ((ido, parameter, attrs) : producer) = ido, rewrite_parameter phi parameter, attrs let rewrite_branch phi ({ pr_producers = producers } as branch) = { branch with pr_producers = List.map (rewrite_producer phi) producers } let rewrite_branches phi branches = match phi with | [] -> branches | _ -> List.map (rewrite_branch phi) branches let fresh_counter = ref 0 let names = ref StringSet.empty let use_name name = names := StringSet.add name !names let used_name name = StringSet.mem name !names let rec fresh ?(hint = "v") () = let name = incr fresh_counter; hint ^ string_of_int !fresh_counter in if used_name name then fresh ~hint () else ( use_name name; name ) (* Alpha conversion of [prule]. We rename bound parameters using fresh names. *) let alphaconvert_rule parameters prule = let phi = List.combine parameters (List.map (fun x -> fresh ~hint:x ()) parameters) in { prule with pr_parameters = List.map (Misc.support_assoc phi) prule.pr_parameters; pr_branches = rewrite_branches phi prule.pr_branches } (* Rewrite a rule taking bound names into account. We rename parameters to avoid capture. *) let rewrite_rule phi prule = let ids = List.fold_left (fun acu (f, d) -> StringSet.add f (StringSet.add d acu)) StringSet.empty phi in let captured_parameters = List.filter (fun p -> StringSet.mem p ids) prule.pr_parameters in let prule = alphaconvert_rule captured_parameters prule in { prule with pr_nt = rewrite_nonterminal phi prule.pr_nt; pr_branches = rewrite_branches phi prule.pr_branches } let rewrite_rules phi rules = List.map (rewrite_rule phi) rules let rewrite_grammar phi grammar = (* We assume that [phi] affects only private symbols, so it does not affect the start symbols. *) if phi = identity_renaming then grammar else { grammar with pg_rules = rewrite_rules phi grammar.pg_rules } (* ------------------------------------------------------------------------- *) (* To rename (internalize) a nonterminal, we prefix it with its filename. This guarantees that names are unique. *) let is_valid_nonterminal_character = function | 'A' .. 'Z' | 'a' .. 'z' | '_' | '\192' .. '\214' | '\216' .. '\246' | '\248' .. '\255' | '0' .. '9' -> true | _ -> false let restrict filename = let m = Bytes.of_string (Filename.chop_suffix filename (if Settings.coq then ".vy" else ".mly")) in for i = 0 to Bytes.length m - 1 do if not (is_valid_nonterminal_character (Bytes.get m i)) then Bytes.set m i '_' done; Bytes.unsafe_to_string m let rename nonterminal filename = let name = restrict filename ^ "_" ^ nonterminal in if used_name name then fresh ~hint:name () else (use_name name; name) (* ------------------------------------------------------------------------- *) type symbol_kind = (* The nonterminal is declared public at a particular position. *) | PublicNonTerminal of Positions.t (* The nonterminal is declared (nonpublic) at a particular position. *) | PrivateNonTerminal of Positions.t (* The symbol is a token. *) | Token of token_properties (* We do not know yet what the symbol means. This is defined in the sequel or it is free in the partial grammar. *) | DontKnow of Positions.t type symbol_table = (symbol, symbol_kind) Hashtbl.t let find_symbol (symbols : symbol_table) symbol = Hashtbl.find symbols symbol let add_in_symbol_table (symbols : symbol_table) symbol kind = use_name symbol; Hashtbl.add symbols symbol kind; symbols let replace_in_symbol_table (symbols : symbol_table) symbol kind = Hashtbl.replace symbols symbol kind; symbols let empty_symbol_table () : symbol_table = Hashtbl.create 13 let store_symbol (symbols : symbol_table) symbol kind = match find_symbol symbols symbol, kind with (* The symbol is not known so far. Add it. *) | exception Not_found -> add_in_symbol_table symbols symbol kind (* There are two definitions of this symbol in one grammatical unit (that is, one .mly file), and at least one of them is private. This is forbidden. *) | PrivateNonTerminal p, PrivateNonTerminal p' | PublicNonTerminal p, PrivateNonTerminal p' | PrivateNonTerminal p, PublicNonTerminal p' -> Error.error [ p; p'] "the nonterminal symbol %s is multiply defined.\n\ Only %%public symbols can have split definitions." symbol (* The symbol is known to be a token but declared as a nonterminal.*) | Token tkp, (PrivateNonTerminal p | PublicNonTerminal p) | (PrivateNonTerminal p | PublicNonTerminal p), Token tkp -> Error.error [ p; tkp.tk_position ] "the identifier %s is a reference to a token." symbol (* In the following cases, we do not gain any piece of information. As of 2017/03/29, splitting the definition of a %public nonterminal symbol is permitted. (It used to be permitted over multiple units, but forbidden within a single unit.) *) | _, DontKnow _ | Token _, Token _ | PublicNonTerminal _, PublicNonTerminal _ -> symbols (* We learn that the symbol is a nonterminal or a token. *) | DontKnow _, _ -> replace_in_symbol_table symbols symbol kind let store_used_symbol position tokens symbols symbol = let kind = try Token (StringMap.find symbol tokens) with Not_found -> DontKnow position in store_symbol symbols symbol kind let non_terminal_is_not_reserved symbol positions = if symbol = "error" then Error.error positions "%s is reserved and thus cannot be used \ as a non-terminal symbol." symbol let non_terminal_is_not_a_token tokens symbol positions = try let tkp = StringMap.find symbol tokens in Error.error (positions @ [ tkp.tk_position ]) "the identifier %s is a reference to a token." symbol with Not_found -> () let store_public_nonterminal tokens symbols symbol positions = non_terminal_is_not_reserved symbol positions; non_terminal_is_not_a_token tokens symbol positions; store_symbol symbols symbol (PublicNonTerminal (List.hd positions)) let store_private_nonterminal tokens symbols symbol positions = non_terminal_is_not_reserved symbol positions; non_terminal_is_not_a_token tokens symbol positions; store_symbol symbols symbol (PrivateNonTerminal (List.hd positions)) (* for debugging, presumably: let string_of_kind = function | PublicNonTerminal p -> Printf.sprintf "public (%s)" (Positions.string_of_pos p) | PrivateNonTerminal p -> Printf.sprintf "private (%s)" (Positions.string_of_pos p) | Token tk -> Printf.sprintf "token (%s)" tk.tk_filename | DontKnow p -> Printf.sprintf "only used at (%s)" (Positions.string_of_pos p) let string_of_symbol_table t = let b = Buffer.create 13 in let m = 1 + Hashtbl.fold (fun k v acu -> max (String.length k) acu) t 0 in let fill_blank s = let s' = String.make m ' ' in String.blit s 0 s' 0 (String.length s); s' in Hashtbl.iter (fun k v -> Buffer.add_string b (Printf.sprintf "%s: %s\n" (fill_blank k) (string_of_kind v))) t; Buffer.contents b *) let is_private_symbol t x = try match Hashtbl.find t x with | PrivateNonTerminal _ -> true | _ -> false with Not_found -> false let fold_on_private_symbols f init t = Hashtbl.fold (fun k -> function PrivateNonTerminal _ -> (fun acu -> f acu k) | _ -> (fun acu -> acu)) t init let fold_on_public_symbols f init t = Hashtbl.fold (fun k -> function PublicNonTerminal _ -> (fun acu -> f acu k) | _ -> (fun acu -> acu)) t init let iter_on_only_used_symbols f t = Hashtbl.iter (fun k -> function DontKnow pos -> f k pos | _ -> ()) t let symbols_of grammar (pgrammar : Syntax.partial_grammar) = let tokens = grammar.p_tokens in let symbols_of_rule symbols prule = let rec store_except_rule_parameters symbols parameter = let symbol, parameters = Parameters.unapp parameter in (* Process the reference to [symbol]. *) let symbols = if List.mem symbol.value prule.pr_parameters then (* Rule parameters are bound locally, so they are not taken into account. *) symbols else store_used_symbol symbol.position tokens symbols symbol.value in (* Process the parameters. *) List.fold_left store_except_rule_parameters symbols parameters in (* Analyse each branch. *) let symbols = List.fold_left (fun symbols branch -> List.fold_left (fun symbols (_, p, _) -> store_except_rule_parameters symbols p ) symbols branch.pr_producers ) symbols prule.pr_branches in (* Store the symbol declaration. *) (* A nonterminal symbol is considered public if it is declared using %public or %start. *) if prule.pr_public_flag || StringMap.mem prule.pr_nt grammar.p_start_symbols then store_public_nonterminal tokens symbols prule.pr_nt prule.pr_positions else store_private_nonterminal tokens symbols prule.pr_nt prule.pr_positions in List.fold_left symbols_of_rule (empty_symbol_table ()) pgrammar.pg_rules let merge_rules symbols pgs = (* Retrieve all the public symbols. *) let public_symbols = List.fold_left (fold_on_public_symbols (fun s k -> StringSet.add k s)) (StringSet.singleton "error") symbols in (* We check the references in each grammar can be bound to a public symbol. *) let _ = List.iter (iter_on_only_used_symbols (fun k pos -> if not (StringSet.mem k public_symbols) then Error.error [ pos ] "%s is undefined." k)) symbols in (* Detect private symbol clashes and rename them if necessary. *) let detect_private_symbol_clashes = fold_on_private_symbols (fun (defined, clashes) symbol -> if StringSet.mem symbol defined || StringSet.mem symbol public_symbols then (defined, StringSet.add symbol clashes) else (StringSet.add symbol defined, clashes)) in let _private_symbols, clashes = List.fold_left detect_private_symbol_clashes (StringSet.empty, StringSet.empty) symbols in let rpgs = List.map (fun (symbol_table, pg) -> let renaming = StringSet.fold (fun x phi -> if is_private_symbol symbol_table x then begin let x' = rename x pg.pg_filename in Printf.fprintf stderr "Note: the nonterminal symbol %s (from %s) is renamed %s.\n" x pg.pg_filename x'; (x, x') :: phi end else phi) clashes [] in rewrite_grammar renaming pg) pgs in (* Merge public nonterminal definitions and copy private nonterminal definitions. Since the clash between private symbols have already been resolved, these copies are safe. *) List.fold_left (fun rules rpg -> List.fold_left (fun rules r -> let r = try let r' = StringMap.find r.pr_nt rules in let positions = r.pr_positions @ r'.pr_positions in let ra, ra' = List.length r.pr_parameters, List.length r'.pr_parameters in (* The arity of the parameterized symbols must be constant.*) if ra <> ra' then Error.error positions "the symbol %s is defined with arities %d and %d." r.pr_nt ra ra' else if r.pr_inline_flag <> r'.pr_inline_flag then Error.error positions "not all definitions of %s are marked %%inline." r.pr_nt else (* We combine the different branches. The parameters could have different names, we rename them with the fresh names assigned earlier (see the next comment). *) let phi = List.combine r.pr_parameters r'.pr_parameters in let rbr = rewrite_branches phi r.pr_branches in { r' with pr_positions = positions; pr_branches = rbr @ r'.pr_branches; pr_attributes = r.pr_attributes @ r'.pr_attributes; } with Not_found -> (* We alphaconvert the rule in order to avoid the capture of private symbols coming from another unit. *) alphaconvert_rule r.pr_parameters r in StringMap.add r.pr_nt r rules) rules rpg.pg_rules) StringMap.empty rpgs let empty_grammar = { p_preludes = []; p_postludes = []; p_parameters = []; p_start_symbols = StringMap.empty; p_types = []; p_tokens = StringMap.empty; p_rules = StringMap.empty; p_on_error_reduce = []; p_grammar_attributes = []; p_symbol_attributes = []; } let join grammar pgrammar = let filename = pgrammar.pg_filename in List.fold_left (join_declaration filename) grammar pgrammar.pg_declarations $$ join_postlude pgrammar.pg_postlude (* If a rule is marked %inline, then it must not carry an attribute. *) let check_inline_attribute prule = match prule.pr_inline_flag, prule.pr_attributes with | true, (id, _payload) :: _attributes -> Error.error [Positions.position id] "the nonterminal symbol %s is declared %%inline.\n\ It cannot carry an attribute." prule.pr_nt | true, [] | false, _ -> () let check_parameterized_grammar_is_well_defined grammar = (* Every start symbol is defined and has a %type declaration. *) StringMap.iter (fun nonterminal p -> if not (StringMap.mem nonterminal grammar.p_rules) then Error.error [p] "the start symbol %s is undefined." nonterminal; if not (List.exists (function | ParameterVar { value = id }, _ -> id = nonterminal | _ -> false) grammar.p_types) then Error.error [p] "the type of the start symbol %s is unspecified." nonterminal; ) grammar.p_start_symbols; (* Every %type definition refers to well-defined (terminal or nonterminal) symbols and has, at its head, a nonterminal symbol. *) (* Same check for %on_error_reduce definitions. *) let reserved = [ "error" ] in let rec check (kind : string) (must_be_nonterminal : bool) (p : Syntax.parameter) = (* Destructure head and arguments. *) let head, ps = Parameters.unapp p in let head = value head in (* Check if [head] is a nonterminal or terminal symbol. *) let is_nonterminal = StringMap.mem head grammar.p_rules and is_terminal = StringMap.mem head grammar.p_tokens || List.mem head reserved in (* If [head] is not satisfactory, error. *) if not (is_terminal || is_nonterminal) then Error.error [Parameters.position p] "%s is undefined." head; if (must_be_nonterminal && not is_nonterminal) then Error.error [Parameters.position p] "%s is a terminal symbol,\n\ but %s declarations are applicable only to nonterminal symbols." (Parameters.print true p) kind; (* Then, check the arguments. *) List.iter (check kind false) ps in let check_fst kind must_be_nonterminal (p, _) = check kind must_be_nonterminal p in List.iter (check_fst "%type" true) grammar.p_types; List.iter (check_fst "%on_error_reduce" true) grammar.p_on_error_reduce; List.iter (fun (params, _) -> List.iter (check "%attribute" false) params ) grammar.p_symbol_attributes; (* Every reference to a symbol is well defined. *) let used_tokens = ref StringSet.empty in let mark_token_as_used token = used_tokens := StringSet.add token !used_tokens in let check_identifier_reference grammar prule s p = (* Mark the symbol as a used token if this is a token. *) if StringMap.mem s grammar.p_tokens then mark_token_as_used s; if not (StringMap.mem s grammar.p_rules || StringMap.mem s grammar.p_tokens || List.mem s prule.pr_parameters || List.mem s reserved) then Error.error [ p ] "%s is undefined." s in StringMap.iter (fun k prule -> (* The formal parameters of each rule must have distinct names. *) prule.pr_parameters |> List.sort compare |> Misc.dup compare |> Option.iter (fun x -> Error.error prule.pr_positions "several parameters of this rule are named \"%s\"." x ); (* Check each branch. *) List.iter (fun { pr_producers = producers; pr_branch_prec_annotation; } -> ignore (List.fold_left (* Check the producers. *) (fun already_seen (id, p, _) -> let symbol, parameters = Parameters.unapp p in let s = symbol.value and p = symbol.position in let already_seen = (* Check the producer id is unique. *) if StringSet.mem id.value already_seen then Error.error [ id.position ] "there are multiple producers named %s in this sequence." id.value; StringSet.add id.value already_seen in (* Check that the producer is defined somewhere. *) check_identifier_reference grammar prule s p; StringMap.iter (check_identifier_reference grammar prule) (List.fold_left Parameters.identifiers StringMap.empty parameters); (* If this producer seems to be a reference to a token, make sure it is a real token, as opposed to a pseudo-token introduced in a priority declaration. *) (try if not ((StringMap.find s grammar.p_tokens).tk_is_declared || List.mem s reserved) then Error.errorp symbol "%s has not been declared as a token." s with Not_found -> ()); already_seen ) StringSet.empty producers); match pr_branch_prec_annotation with | None -> () | Some terminal -> check_identifier_reference grammar prule terminal.value terminal.position; (* Furthermore, the symbol following %prec must be a valid token identifier. *) if not (StringMap.mem terminal.value grammar.p_tokens) then Error.errorp terminal "%s is undefined." terminal.value) prule.pr_branches; (* It is forbidden to use %inline on a %start symbol. *) if (prule.pr_inline_flag && StringMap.mem k grammar.p_start_symbols) then Error.error prule.pr_positions "%s cannot be both a start symbol and inlined." k; (* If a rule is marked %inline, then it must not carry an attribute. *) check_inline_attribute prule ) grammar.p_rules; (* Check that every token is used. *) if not Settings.ignore_all_unused_tokens then begin match Settings.token_type_mode with | Settings.TokenTypeOnly -> () | Settings.TokenTypeAndCode | Settings.CodeOnly _ -> StringMap.iter (fun token { tk_position = p } -> if not (StringSet.mem token !used_tokens || StringSet.mem token Settings.ignored_unused_tokens) then Error.warning [p] "the token %s is unused." token ) grammar.p_tokens end; grammar let join_partial_grammars pgs = let grammar = List.fold_left join empty_grammar pgs in let symbols = List.map (symbols_of grammar) pgs in let tpgs = List.combine symbols pgs in let rules = merge_rules symbols tpgs in check_parameterized_grammar_is_well_defined { grammar with p_rules = rules } menhir-20171222/src/Fix.ml0000664000175000017500000004434713217215730015376 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* -------------------------------------------------------------------------- *) (* Maps. *) (* We require imperative maps, that is, maps that can be updated in place. An implementation of persistent maps, such as the one offered by ocaml's standard library, can easily be turned into an implementation of imperative maps, so this is a weak requirement. *) module type IMPERATIVE_MAPS = sig type key type 'data t val create: unit -> 'data t val clear: 'data t -> unit val add: key -> 'data -> 'data t -> unit val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* -------------------------------------------------------------------------- *) (* Properties. *) (* Properties must form a partial order, equipped with a least element, and must satisfy the ascending chain condition: every monotone sequence eventually stabilizes. *) (* [is_maximal] determines whether a property [p] is maximal with respect to the partial order. Only a conservative check is required: in any event, it is permitted for [is_maximal p] to return [false]. If [is_maximal p] returns [true], then [p] must have no upper bound other than itself. In particular, if properties form a lattice, then [p] must be the top element. This feature, not described in the paper, enables a couple of minor optimizations. *) module type PROPERTY = sig type property val bottom: property val equal: property -> property -> bool val is_maximal: property -> bool end (* -------------------------------------------------------------------------- *) (* The dynamic dependency graph. *) (* An edge from [node1] to [node2] means that [node1] depends on [node2], or (equivalently) that [node1] observes [node2]. Then, an update of the current property at [node2] causes a signal to be sent to [node1]. A node can observe itself. *) (* This module could be placed in a separate file, but is included here in order to make [Fix] self-contained. *) module Graph : sig (* This module provides a data structure for maintaining and modifying a directed graph. Each node is allowed to carry a piece of client data. There are functions for creating a new node, looking up a node's data, looking up a node's predecessors, and setting or clearing a node's successors (all at once). *) type 'data node (* [create data] creates a new node, with no incident edges, with client information [data]. Time complexity: constant. *) val create: 'data -> 'data node (* [data node] returns the client information associated with the node [node]. Time complexity: constant. *) val data: 'data node -> 'data (* [predecessors node] returns a list of [node]'s predecessors. Amortized time complexity: linear in the length of the output list. *) val predecessors: 'data node -> 'data node list (* [set_successors src dsts] creates an edge from the node [src] to each of the nodes in the list [dsts]. Duplicate elements in the list [dsts] are removed, so that no duplicate edges are created. It is assumed that [src] initially has no successors. Time complexity: linear in the length of the input list. *) val set_successors: 'data node -> 'data node list -> unit (* [clear_successors node] removes all of [node]'s outgoing edges. Time complexity: linear in the number of edges that are removed. *) val clear_successors: 'data node -> unit (* That's it. *) end = struct (* Using doubly-linked adjacency lists, one could implement [predecessors] in worst-case linear time with respect to the length of the output list, [set_successors] in worst-case linear time with respect to the length of the input list, and [clear_successors] in worst-case linear time with respect to the number of edges that are removed. We use a simpler implementation, based on singly-linked adjacency lists, with deferred removal of edges. It achieves the same complexity bounds, except [predecessors] only offers an amortized complexity bound. This is good enough for our purposes, and, in practice, is more efficient by a constant factor. This simplification was suggested by Arthur Charguéraud. *) type 'data node = { (* The client information associated with this node. *) data: 'data; (* This node's incoming and outgoing edges. *) mutable outgoing: 'data edge list; mutable incoming: 'data edge list; (* A transient mark, always set to [false], except when checking against duplicate elements in a successor list. *) mutable marked: bool; } and 'data edge = { (* This edge's nodes. Edges are symmetric: source and destination are not distinguished. Thus, an edge appears both in the outgoing edge list of its source node and in the incoming edge list of its destination node. This allows edges to be easily marked as destroyed. *) node1: 'data node; node2: 'data node; (* Edges that are destroyed are marked as such, but are not immediately removed from the adjacency lists. *) mutable destroyed: bool; } let create (data : 'data) : 'data node = { data = data; outgoing = []; incoming = []; marked = false; } let data (node : 'data node) : 'data = node.data (* [follow src edge] returns the node that is connected to [src] by [edge]. Time complexity: constant. *) let follow src edge = if edge.node1 == src then edge.node2 else begin assert (edge.node2 == src); edge.node1 end (* The [predecessors] function removes edges that have been marked destroyed. The cost of removing these has already been paid for, so the amortized time complexity of [predecessors] is linear in the length of the output list. *) let predecessors (node : 'data node) : 'data node list = let predecessors = List.filter (fun edge -> not edge.destroyed) node.incoming in node.incoming <- predecessors; List.map (follow node) predecessors (* [link src dst] creates a new edge from [src] to [dst], together with its reverse edge. Time complexity: constant. *) let link (src : 'data node) (dst : 'data node) : unit = let edge = { node1 = src; node2 = dst; destroyed = false; } in src.outgoing <- edge :: src.outgoing; dst.incoming <- edge :: dst.incoming let set_successors (src : 'data node) (dsts : 'data node list) : unit = assert (src.outgoing = []); let rec loop = function | [] -> () | dst :: dsts -> if dst.marked then loop dsts (* skip duplicate elements *) else begin dst.marked <- true; link src dst; loop dsts; dst.marked <- false end in loop dsts let clear_successors (node : 'data node) : unit = List.iter (fun edge -> assert (not edge.destroyed); edge.destroyed <- true; ) node.outgoing; node.outgoing <- [] end (* -------------------------------------------------------------------------- *) (* The code is parametric in an implementation of maps over variables and in an implementation of properties. *) module Make (M : IMPERATIVE_MAPS) (P : PROPERTY) = struct type variable = M.key type property = P.property type valuation = variable -> property type rhs = valuation -> property type equations = variable -> rhs (* -------------------------------------------------------------------------- *) (* Data. *) (* Each node in the dependency graph carries information about a fixed variable [v]. *) type node = data Graph.node and data = { (* This is the result of the application of [rhs] to the variable [v]. It must be stored in order to guarantee that this application is performed at most once. *) rhs: rhs; (* This is the current property at [v]. It evolves monotonically with time. *) mutable property: property; (* That's it! *) } (* [property node] returns the current property at [node]. *) let property node = (Graph.data node).property (* -------------------------------------------------------------------------- *) (* Many definitions must be made within the body of the function [lfp]. For greater syntactic convenience, we place them in a local module. *) let lfp (eqs : equations) : valuation = let module LFP = struct (* -------------------------------------------------------------------------- *) (* The workset. *) (* When the algorithm is inactive, the workset is empty. *) (* Our workset is based on a Queue, but it could just as well be based on a Stack. A textual replacement is possible. It could also be based on a priority queue, provided a sensible way of assigning priorities could be found. *) module Workset : sig (* [insert node] inserts [node] into the workset. [node] must have no successors. *) val insert: node -> unit (* [repeat f] repeatedly applies [f] to a node extracted out of the workset, until the workset becomes empty. [f] is allowed to use [insert]. *) val repeat: (node -> unit) -> unit (* That's it! *) end = struct (* Initialize the workset. *) let workset = Queue.create() let insert node = Queue.push node workset let repeat f = while not (Queue.is_empty workset) do f (Queue.pop workset) done end (* -------------------------------------------------------------------------- *) (* Signals. *) (* A node in the workset has no successors. (It can have predecessors.) In other words, a predecessor (an observer) of some node is never in the workset. Furthermore, a node never appears twice in the workset. *) (* When a variable broadcasts a signal, all of its predecessors (observers) receive the signal. Any variable that receives the signal loses all of its successors (that is, it ceases to observe anything) and is inserted into the workset. This preserves the above invariant. *) let signal subject = List.iter (fun observer -> Graph.clear_successors observer; Workset.insert observer ) (Graph.predecessors subject) (* At this point, [subject] has no predecessors. This plays no role in the correctness proof, though. *) (* -------------------------------------------------------------------------- *) (* Tables. *) (* The permanent table maps variables that have reached a fixed point to properties. It persists forever. *) let permanent : property M.t = M.create() (* The transient table maps variables that have not yet reached a fixed point to nodes. (A node contains not only a property, but also a memoized right-hand side, and carries edges.) At the beginning of a run, it is empty. It fills up during a run. At the end of a run, it is copied into the permanent table and cleared. *) let transient : node M.t = M.create() (* [freeze()] copies the transient table into the permanent table, and empties the transient table. This allows all nodes to be reclaimed by the garbage collector. *) let freeze () = M.iter (fun v node -> M.add v (property node) permanent ) transient; M.clear transient (* -------------------------------------------------------------------------- *) (* Workset processing. *) (* [solve node] re-evaluates the right-hand side at [node]. If this leads to a change, then the current property is updated, and [node] emits a signal towards its observers. *) (* When [solve node] is invoked, [node] has no subjects. Indeed, when [solve] is invoked by [node_for], [node] is newly created; when [solve] is invoked by [Workset.repeat], [node] has just been extracted out of the workset, and a node in the workset has no subjects. *) (* [node] must not be in the workset. *) (* In short, when [solve node] is invoked, [node] is neither awake nor asleep. When [solve node] finishes, [node] is either awake or asleep again. (Chances are, it is asleep, unless it is its own observer; then, it is awakened by the final call to [signal node].) *) let rec solve (node : node) : unit = (* Retrieve the data record carried by this node. *) let data = Graph.data node in (* Prepare to compute an updated value at this node. This is done by invoking the client's right-hand side function. *) (* The flag [alive] is used to prevent the client from invoking [request] after this interaction phase is over. In theory, this dynamic check seems required in order to argue that [request] behaves like a pure function. In practice, this check is not very useful: only a bizarre client would store a [request] function and invoke it after it has become stale. *) let alive = ref true and subjects = ref [] in (* We supply the client with [request], a function that provides access to the current valuation, and dynamically records dependencies. This yields a set of dependencies that is correct by construction. *) let request (v : variable) : property = assert !alive; try M.find v permanent with Not_found -> let subject = node_for v in let p = property subject in if not (P.is_maximal p) then subjects := subject :: !subjects; p in (* Give control to the client. *) let new_property = data.rhs request in (* From now on, prevent any invocation of this instance of [request] the client. *) alive := false; (* At this point, [node] has no subjects, as noted above. Thus, the precondition of [set_successors] is met. We can install [data.subjects] as the new set of subjects for this node. *) (* If we have gathered no subjects in the list [data.subjects], then this node must have stabilized. If [new_property] is maximal, then this node must have stabilized. *) (* If this node has stabilized, then it need not observe any more, so the call to [set_successors] is skipped. In practice, this seems to be a minor optimization. In the particular case where every node stabilizes at the very first call to [rhs], this means that no edges are ever built. This particular case is unlikely, as it means that we are just doing memoization, not a true fixed point computation. *) (* One could go further and note that, if this node has stabilized, then it could immediately be taken out of the transient table and copied into the permanent table. This would have the beneficial effect of allowing the detection of further nodes that have stabilized. Furthermore, it would enforce the property that no node in the transient table has a maximal value, hence the call to [is_maximal] above would become useless. *) if not (!subjects = [] || P.is_maximal new_property) then Graph.set_successors node !subjects; (* If the updated value differs from the previous value, record the updated value and send a signal to all observers of [node]. *) if not (P.equal data.property new_property) then begin data.property <- new_property; signal node end (* Note that equality of the two values does not imply that this node has stabilized forever. *) (* -------------------------------------------------------------------------- *) (* [node_for v] returns the graph node associated with the variable [v]. It is assumed that [v] does not appear in the permanent table. If [v] appears in the transient table, the associated node is returned. Otherwise, [v] is a newly discovered variable: a new node is created on the fly, and the transient table is grown. The new node can either be inserted into the workset (it is then awake) or handled immediately via a recursive call to [solve] (it is then asleep, unless it observes itself). *) (* The recursive call to [solve node] can be replaced, if desired, by a call to [Workset.insert node]. Using a recursive call to [solve] permits eager top-down discovery of new nodes. This can save a constant factor, because it allows new nodes to move directly from [bottom] to a good first approximation, without sending any signals, since [node] has no observers when [solve node] is invoked. In fact, if the dependency graph is acyclic, the algorithm discovers nodes top-down, performs computation on the way back up, and runs without ever inserting a node into the workset! Unfortunately, this causes the stack to grow as deep as the longest path in the dependency graph, which can blow up the stack. *) and node_for (v : variable) : node = try M.find v transient with Not_found -> let node = Graph.create { rhs = eqs v; property = P.bottom } in (* Adding this node to the transient table prior to calling [solve] recursively is mandatory, otherwise [solve] might loop, creating an infinite number of nodes for the same variable. *) M.add v node transient; solve node; (* or: Workset.insert node *) node (* -------------------------------------------------------------------------- *) (* Invocations of [get] trigger the fixed point computation. *) (* The flag [inactive] prevents reentrant calls by the client. *) let inactive = ref true let get (v : variable) : property = try M.find v permanent with Not_found -> assert !inactive; inactive := false; let node = node_for v in Workset.repeat solve; freeze(); inactive := true; property node (* -------------------------------------------------------------------------- *) (* Close the local module [LFP]. *) end in LFP.get end menhir-20171222/src/anonymous.ml0000664000175000017500000001351513217215730016671 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Syntax (* For each anonymous rule, we define a fresh nonterminal symbol, and replace the anonymous rule with a reference to this symbol. If the anonymous rule appears inside a parameterized rule, then we must define a parameterized nonterminal symbol. *) (* ------------------------------------------------------------------------ *) (* Computing the free names of some syntactic categories. *) let rec fn_parameter accu (p : parameter) = (* [p] cannot be [ParameterAnonymous _]. *) let x, ps = Parameters.unapp p in let accu = StringSet.add (Positions.value x) accu in fn_parameters accu ps and fn_parameters accu ps = List.fold_left fn_parameter accu ps let fn_producer accu ((_, p, _) : producer) = fn_parameter accu p let fn_branch accu branch = List.fold_left fn_producer accu branch.pr_producers let fn_branches accu branches = List.fold_left fn_branch accu branches (* ------------------------------------------------------------------------ *) (* This functor makes it easy to share mutable internal state between the functions that follow. *) module Run (X : sig end) = struct (* ------------------------------------------------------------------------ *) (* A fresh name generator. *) let fresh : unit -> string = let next = ref 0 in fun () -> Printf.sprintf "__anonymous_%d" (Misc.postincrement next) (* ------------------------------------------------------------------------ *) (* A rule accumulator. Used to collect the fresh definitions that we produce. *) let rules = ref [] (* ------------------------------------------------------------------------ *) (* [anonymous pos parameters branches] deals with an anonymous rule, at position [pos], which appears inside a possibly-parameterized rule whose parameters are [parameters], and whose body is [branches]. We assume that [branches] does not itself contain any anonymous rules. As a side effect, we create a fresh definition, and return its name. *) let var (symbol : symbol) : parameter = ParameterVar (Positions.with_pos Positions.dummy symbol) let anonymous pos (parameters : symbol list) (branches : parameterized_branch list) : parameter = (* Compute the free symbols of [branches]. They should form a subset of [parameters], although we have not yet checked this. We create a definition that is parameterized only over the parameters that actually occur free in the definition -- i.e., a definition without useless parameters. This seems important, as (in some situations) it avoids duplication and leads to fewer states in the automaton. *) let used = fn_branches StringSet.empty branches in let parameters = List.filter (fun x -> StringSet.mem x used) parameters in (* Generate a fresh non-terminal symbol. *) let symbol = fresh() in (* Construct its definition. Note that it is implicitly marked %inline. Also, it does not carry any attributes; this is consistent with the fact that %inline symbols cannot carry attributes. *) let rule = { pr_public_flag = false; pr_inline_flag = true; pr_nt = symbol; pr_positions = [ pos ]; (* this list is not allowed to be empty *) pr_attributes = []; pr_parameters = parameters; pr_branches = branches } in (* Record this definition. *) rules := rule :: !rules; (* Return the symbol that stands for it. *) Parameters.app (Positions.with_pos pos symbol) (List.map var parameters) (* ------------------------------------------------------------------------ *) (* Traversal code. *) let rec transform_parameter (parameters : symbol list) (p : parameter) : parameter = match p with | ParameterVar _ -> p | ParameterApp (x, ps) -> ParameterApp (x, List.map (transform_parameter parameters) ps) | ParameterAnonymous branches -> let pos = Positions.position branches and branches = Positions.value branches in (* Do not forget the recursive invocation! *) let branches = List.map (transform_parameterized_branch parameters) branches in (* This is where the real work is done. *) anonymous pos parameters branches and transform_producer parameters ((x, p, attrs) : producer) = x, transform_parameter parameters p, attrs and transform_parameterized_branch parameters branch = let pr_producers = List.map (transform_producer parameters) branch.pr_producers in { branch with pr_producers } let transform_parameterized_rule rule = let pr_branches = List.map (transform_parameterized_branch rule.pr_parameters) rule.pr_branches in { rule with pr_branches } end (* ------------------------------------------------------------------------ *) (* The main entry point invokes the functor and reads its result. *) let transform_partial_grammar g = let module R = Run(struct end) in let pg_rules = List.map R.transform_parameterized_rule g.pg_rules in let pg_rules = !R.rules @ pg_rules in { g with pg_rules } menhir-20171222/src/yacc-parser.mly0000664000175000017500000002516013217215730017242 0ustar fpottierfpottier/******************************************************************************/ /* */ /* Menhir */ /* */ /* François Pottier, Inria Paris */ /* Yann Régis-Gianas, PPS, Université Paris Diderot */ /* */ /* Copyright Inria. All rights reserved. This file is distributed under the */ /* terms of the GNU General Public License version 2, as described in the */ /* file LICENSE. */ /* */ /******************************************************************************/ /* This is the crude version of the parser. It is meant to be processed by ocamlyacc. Its existence is necessary for bootstrapping. It is kept in sync with [fancy-parser], with a few differences: 0. [yacc-parser] produces dummy position information; 1. [fancy-parser] exploits many features of Menhir; 2. [fancy-parser] performs slightly more refined error handling; 3. [fancy-parser] supports anonymous rules. */ %{ open Syntax open Positions %} %token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL %token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER ON_ERROR_REDUCE %token LID UID %token HEADER %token OCAMLTYPE %token PERCENTPERCENT %token Syntax.action> ACTION %token ATTRIBUTE GRAMMARATTRIBUTE %token PERCENTATTRIBUTE %start grammar %type producer %type production %type grammar /* These declarations solve a shift-reduce conflict in favor of shifting: when the declaration of a non-terminal symbol begins with a leading bar, it is understood as an (insignificant) leading optional bar, *not* as an empty right-hand side followed by a bar. This ambiguity arises due to the existence of a new notation for letting several productions share a single semantic action. */ %nonassoc no_optional_bar %nonassoc BAR %% /* ------------------------------------------------------------------------- */ /* A grammar consists of declarations and rules, followed by an optional postlude, which we do not parse. */ grammar: declarations PERCENTPERCENT rules postlude { { pg_filename = ""; (* filled in by the caller *) pg_declarations = List.rev $1; pg_rules = $3; pg_postlude = $4 } } postlude: EOF { None } | PERCENTPERCENT /* followed by actual postlude */ { Some (Lazy.force $1) } /* ------------------------------------------------------------------------- */ /* A declaration is an %{ OCaml header %}, or a %token, %start, %type, %left, %right, or %nonassoc declaration. */ declarations: /* epsilon */ { [] } | declarations declaration { $2 @ $1 } declaration: | HEADER /* lexically delimited by %{ ... %} */ { [ unknown_pos (DCode $1) ] } | TOKEN optional_ocamltype terminals { List.map (Positions.map (fun (terminal, attrs) -> DToken ($2, terminal, attrs))) $3 } | START nonterminals { List.map (Positions.map (fun nonterminal -> DStart nonterminal)) $2 } | TYPE OCAMLTYPE actuals { List.map (Positions.map (fun nt -> DType ($2, nt))) (List.map Parameters.with_pos $3) } | START OCAMLTYPE nonterminals /* %start foo is syntactic sugar for %start foo %type foo */ { Misc.mapd (fun ntloc -> Positions.mapd (fun nt -> DStart nt, DType ($2, ParameterVar ntloc)) ntloc) $3 } | priority_keyword symbols { let prec = ParserAux.new_precedence_level (rhs_start_pos 1) (rhs_end_pos 1) in List.map (Positions.map (fun symbol -> DTokenProperties (symbol, $1, prec))) $2 } | PARAMETER OCAMLTYPE { [ unknown_pos (DParameter $2) ] } | GRAMMARATTRIBUTE { [ unknown_pos (DGrammarAttribute $1) ] } | PERCENTATTRIBUTE actuals attributes { [ unknown_pos (DSymbolAttributes ($2, $3)) ] } | ON_ERROR_REDUCE actuals { let prec = ParserAux.new_on_error_reduce_level() in List.map (Positions.map (fun nt -> DOnErrorReduce (nt, prec))) (List.map Parameters.with_pos $2) } optional_ocamltype: /* epsilon */ { None } | OCAMLTYPE /* lexically delimited by angle brackets */ { Some $1 } priority_keyword: LEFT { LeftAssoc } | RIGHT { RightAssoc } | NONASSOC { NonAssoc } /* ------------------------------------------------------------------------- */ /* A symbol is a terminal or nonterminal symbol. One would like to require nonterminal symbols to begin with a lowercase letter, so as to lexically distinguish them from terminal symbols, which must begin with an uppercase letter. However, for compatibility with ocamlyacc, this is impossible. It can be required only for nonterminal symbols that are also start symbols. */ symbols: /* epsilon */ { [] } | symbols optional_comma symbol { $3 :: $1 } symbol: LID { $1 } | UID { $1 } optional_comma: /* epsilon */ { () } | COMMA { () } attributes: /* epsilon */ { [] } | ATTRIBUTE attributes { $1 :: $2 } /* ------------------------------------------------------------------------- */ /* Terminals must begin with an uppercase letter. Nonterminals that are declared to be start symbols must begin with a lowercase letter. */ terminals: /* epsilon */ { [] } | terminals optional_comma UID attributes { (Positions.map (fun uid -> (uid, $4)) $3) :: $1 } nonterminals: /* epsilon */ { [] } | nonterminals LID { $2 :: $1 } /* ------------------------------------------------------------------------- */ /* A rule defines a symbol. It is optionally declared %public, and optionally carries a number of formal parameters. The right-hand side of the definition consists of a list of production groups. */ rules: /* epsilon */ { [] } | rules rule { $2 :: $1 } rule: flags symbol attributes optional_formal_parameters COLON optional_bar production_group production_groups { let public, inline = $1 in { pr_public_flag = public; pr_inline_flag = inline; pr_nt = Positions.value $2; pr_positions = [ Positions.position $2 ]; pr_attributes = $3; pr_parameters = $4; pr_branches = List.flatten ($7 :: List.rev $8) } } flags: /* epsilon */ { false, false } | PUBLIC { true, false } | INLINE { false, true } | PUBLIC INLINE { true, true } | INLINE PUBLIC { true, true } /* ------------------------------------------------------------------------- */ /* Parameters are surroundered with parentheses and delimited by commas. The syntax of actual parameters allows applications, whereas the syntax of formal parameters does not. It also allows use of the "?", "+", and "*" shortcuts. */ optional_formal_parameters: /* epsilon */ { [] } | LPAREN formal_parameters RPAREN { $2 } formal_parameters: symbol { [ Positions.value $1 ] } | symbol COMMA formal_parameters { Positions.value $1 :: $3 } optional_actuals: /* epsilon */ { [] } | LPAREN actuals_comma RPAREN { $2 } actuals_comma: actual { [ $1 ] } | actual COMMA actuals_comma { $1 :: $3 } actual: symbol optional_actuals { Parameters.app $1 $2 } | actual modifier { ParameterApp ($2, [ $1 ]) } actuals: /* epsilon */ { [] } | actuals optional_comma actual { $3::$1 } optional_bar: /* epsilon */ %prec no_optional_bar { () } | BAR { () } /* ------------------------------------------------------------------------- */ /* The "?", "+", and "*" modifiers are short-hands for applications of certain parameterized nonterminals, defined in the standard library. */ modifier: QUESTION { unknown_pos "option" } | PLUS { unknown_pos "nonempty_list" } | STAR { unknown_pos "list" } /* ------------------------------------------------------------------------- */ /* A production group consists of a list of productions, followed by a semantic action and an optional precedence specification. */ production_groups: /* epsilon */ { [] } | production_groups BAR production_group { $3 :: $1 } production_group: productions ACTION /* action is lexically delimited by braces */ optional_precedence { let productions, action, oprec2 = $1, $2, $3 in (* If multiple productions share a single semantic action, check that all of them bind the same names. *) ParserAux.check_production_group productions; (* Then, *) List.map (fun (producers, oprec1, level, pos) -> (* Replace [$i] with [_i]. *) let pr_producers = ParserAux.normalize_producers producers in (* Distribute the semantic action. Also, check that every [$i] is within bounds. *) let pr_action = action (ParserAux.producer_names producers) in { pr_producers; pr_action; pr_branch_prec_annotation = ParserAux.override pos oprec1 oprec2; pr_branch_production_level = level; pr_branch_position = pos }) productions } optional_precedence: /* epsilon */ { None } | PREC symbol { Some $2 } /* ------------------------------------------------------------------------- */ /* A production is a list of producers, optionally followed by a precedence declaration. Lists of productions are nonempty and separated with bars. */ productions: production { [ $1 ] } | production bar_productions { $1 :: $2 } bar_productions: BAR production { [ $2 ] } | BAR production bar_productions { $2 :: $3 } production: producers optional_precedence { List.rev $1, $2, ParserAux.new_production_level(), Positions.lex_join (symbol_start_pos()) (symbol_end_pos()) } producers: /* epsilon */ { [] } | producers producer { $2 :: $1 } /* ------------------------------------------------------------------------- */ /* A producer is an actual parameter, possibly preceded by a binding, and possibly followed with attributes. */ producer: | actual attributes { Positions.lex_join (symbol_start_pos()) (symbol_end_pos()), None, $1, $2 } | LID EQUAL actual attributes { Positions.lex_join (symbol_start_pos()) (symbol_end_pos()), Some $1, $3, $4 } %% menhir-20171222/src/cmly_read.ml0000664000175000017500000002176413217215730016605 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Cmly_format open Cmly_api (* ------------------------------------------------------------------------ *) (* Reading a .cmly file. *) exception Error of string let read (ic : in_channel) : grammar = (* .cmly file format: CMLY ++ version string ++ grammar *) let magic = "CMLY" ^ Version.version in try let m = really_input_string ic (String.length magic) in if m <> magic then raise (Error (Printf.sprintf "Invalid magic string in .cmly file.\n\ Expecting %S, but got %S." magic m)) else (input_value ic : grammar) with | End_of_file (* [really_input_string], [input_value] *) | Failure _ -> (* [input_value] *) raise (Error (Printf.sprintf "Invalid or damaged .cmly file.")) let read (filename : string) : grammar = let ic = open_in_bin filename in match read ic with | x -> close_in_noerr ic; x | exception exn -> close_in_noerr ic; raise exn (* ------------------------------------------------------------------------ *) (* Packaging the interval [0..count) as a module of type [INDEXED]. *) module Index (P : sig val name: string (* for error messages only *) val count: int end) : INDEXED with type t = int = struct type t = int let count = P.count let of_int n = if 0 <= n && n < count then n else invalid_arg (P.name ^ ".of_int: index out of bounds") let to_int n = n let iter f = for i = 0 to count - 1 do f i done let fold f x = let r = ref x in for i = 0 to count - 1 do r := f i !r done; !r let tabulate f = let a = Array.init count f in Array.get a end (* ------------------------------------------------------------------------ *) (* Packaging a data structure of type [Cmly_format.grammar] as a module of type [Cmly_api.GRAMMAR]. *) module Make (G : sig val grammar : grammar end) : GRAMMAR = struct open G type terminal = int type nonterminal = int type production = int type lr0 = int type lr1 = int type item = production * int type ocamltype = string type ocamlexpr = string module Range = struct type t = Cmly_format.range let startp range = range.r_start let endp range = range.r_end end module Attribute = struct type t = Cmly_format.attribute let label attr = attr.a_label let has_label label attr = label = attr.a_label let payload attr = attr.a_payload let position attr = attr.a_position end module Grammar = struct let basename = grammar.g_basename let preludes = grammar.g_preludes let postludes = grammar.g_postludes let entry_points = grammar.g_entry_points let attributes = grammar.g_attributes let parameters = grammar.g_parameters end module Terminal = struct let table = grammar.g_terminals let name i = table.(i).t_name let kind i = table.(i).t_kind let typ i = table.(i).t_type let attributes i = table.(i).t_attributes include Index(struct let name = "Terminal" let count = Array.length table end) end module Nonterminal = struct let table = grammar.g_nonterminals let name i = table.(i).n_name let mangled_name i = table.(i).n_mangled_name let kind i = table.(i).n_kind let typ i = table.(i).n_type let positions i = table.(i).n_positions let nullable i = table.(i).n_nullable let first i = table.(i).n_first let attributes i = table.(i).n_attributes include Index(struct let name = "Nonterminal" let count = Array.length table end) end type symbol = Cmly_format.symbol = | T of terminal | N of nonterminal let symbol_name ?(mangled=false) = function | T t -> Terminal.name t | N n -> if mangled then Nonterminal.mangled_name n else Nonterminal.name n type identifier = string module Action = struct type t = action let expr t = t.a_expr let keywords t = t.a_keywords end module Production = struct let table = grammar.g_productions let kind i = table.(i).p_kind let lhs i = table.(i).p_lhs let rhs i = table.(i).p_rhs let positions i = table.(i).p_positions let action i = table.(i).p_action let attributes i = table.(i).p_attributes include Index(struct let name = "Production" let count = Array.length table end) end module Lr0 = struct let table = grammar.g_lr0_states let incoming i = table.(i).lr0_incoming let items i = table.(i).lr0_items include Index(struct let name = "Lr0" let count = Array.length table end) end module Lr1 = struct let table = grammar.g_lr1_states let lr0 i = table.(i).lr1_lr0 let transitions i = table.(i).lr1_transitions let reductions i = table.(i).lr1_reductions include Index(struct let name = "Lr1" let count = Array.length table end) end module Print = struct let terminal ppf t = Format.pp_print_string ppf (Terminal.name t) let nonterminal ppf t = Format.pp_print_string ppf (Nonterminal.name t) let symbol ppf = function | T t -> terminal ppf t | N n -> nonterminal ppf n let mangled_nonterminal ppf t = Format.pp_print_string ppf (Nonterminal.name t) let mangled_symbol ppf = function | T t -> terminal ppf t | N n -> mangled_nonterminal ppf n let rec lengths l acc = function | [] -> if l = -1 then [] else l :: lengths (-1) [] acc | [] :: rows -> lengths l acc rows | (col :: cols) :: rows -> lengths (max l (String.length col)) (cols :: acc) rows let rec adjust_length lengths cols = match lengths, cols with | l :: ls, c :: cs -> let pad = l - String.length c in let c = if pad = 0 then c else c ^ String.make pad ' ' in c :: adjust_length ls cs | _, [] -> [] | [], _ -> assert false let align_tabular rows = let lengths = lengths (-1) [] rows in List.map (adjust_length lengths) rows let print_line ppf = function | [] -> () | x :: xs -> Format.fprintf ppf "%s" x; List.iter (Format.fprintf ppf " %s") xs let print_table ppf table = let table = align_tabular table in List.iter (Format.fprintf ppf "%a\n" print_line) table let annot_itemset annots ppf items = let last_lhs = ref (-1) in let prepare (p, pos) annot = let rhs = Array.map (fun (sym, id, _) -> if id <> "" && id.[0] <> '_' then "(" ^ id ^ " = " ^ symbol_name sym ^ ")" else symbol_name sym ) (Production.rhs p) in if pos >= 0 && pos < Array.length rhs then rhs.(pos) <- ". " ^ rhs.(pos) else if pos > 0 && pos = Array.length rhs then rhs.(pos - 1) <- rhs.(pos - 1) ^ " ."; let lhs = Production.lhs p in let rhs = Array.to_list rhs in let rhs = if !last_lhs = lhs then "" :: " |" :: rhs else begin last_lhs := lhs; Nonterminal.name lhs :: "::=" :: rhs end in if annot = [] then [rhs] else [rhs; ("" :: "" :: annot)] in let rec prepare_all xs ys = match xs, ys with | [], _ -> [] | (x :: xs), (y :: ys) -> let z = prepare x y in z :: prepare_all xs ys | (x :: xs), [] -> let z = prepare x [] in z :: prepare_all xs [] in print_table ppf (List.concat (prepare_all items annots)) let itemset ppf t = annot_itemset [] ppf t let annot_item annot ppf item = annot_itemset [annot] ppf [item] let item ppf t = annot_item [] ppf t let production ppf t = item ppf (t, -1) end end module Read (X : sig val filename : string end) = Make (struct let grammar = read X.filename end) menhir-20171222/src/MySet.ml0000664000175000017500000000755113217215730015705 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) module Make (Ord: Map.OrderedType) = struct type elt = Ord.t type t = Empty | Node of t * elt * t * int (* Sets are represented by balanced binary trees (the heights of the children differ by at most 2 *) let height = function Empty -> 0 | Node(_, _, _, h) -> h (* Creates a new node with left son l, value v and right son r. We must have all elements of l < v < all elements of r. l and r must be balanced and | height l - height r | <= 2. Inline expansion of height for better speed. *) let create l v r = let hl = match l with Empty -> 0 | Node(_,_,_,h) -> h in let hr = match r with Empty -> 0 | Node(_,_,_,h) -> h in Node(l, v, r, (if hl >= hr then hl + 1 else hr + 1)) (* Same as create, but performs one step of rebalancing if necessary. Assumes l and r balanced and | height l - height r | <= 3. Inline expansion of create for better speed in the most frequent case where no rebalancing is required. *) let bal l v r = let hl = match l with Empty -> 0 | Node(_,_,_,h) -> h in let hr = match r with Empty -> 0 | Node(_,_,_,h) -> h in if hl > hr + 2 then begin match l with Empty -> invalid_arg "Set.bal" | Node(ll, lv, lr, _) -> if height ll >= height lr then create ll lv (create lr v r) else begin match lr with Empty -> invalid_arg "Set.bal" | Node(lrl, lrv, lrr, _)-> create (create ll lv lrl) lrv (create lrr v r) end end else if hr > hl + 2 then begin match r with Empty -> invalid_arg "Set.bal" | Node(rl, rv, rr, _) -> if height rr >= height rl then create (create l v rl) rv rr else begin match rl with Empty -> invalid_arg "Set.bal" | Node(rll, rlv, rlr, _) -> create (create l v rll) rlv (create rlr rv rr) end end else Node(l, v, r, (if hl >= hr then hl + 1 else hr + 1)) (* [add x t] guarantees that it returns [t] (physically unchanged) if [x] is already a member of [t]. *) let rec add x = function Empty -> Node(Empty, x, Empty, 1) | Node(l, v, r, _) as t -> let c = Ord.compare x v in if c = 0 then t else if c < 0 then let l' = add x l in if l == l' then t else bal l' v r else let r' = add x r in if r == r' then t else bal l v r' let empty = Empty let rec find x = function Empty -> raise Not_found | Node(l, v, r, _) -> let c = Ord.compare x v in if c = 0 then v else find x (if c < 0 then l else r) let rec iter f = function Empty -> () | Node(l, v, r, _) -> iter f l; f v; iter f r end menhir-20171222/src/Printers.ml0000664000175000017500000000670613217215730016453 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) module Make (I : IncrementalEngine.EVERYTHING) (User : sig val print: string -> unit val print_symbol: I.xsymbol -> unit val print_element: (I.element -> unit) option end) = struct let arrow = " -> " let dot = "." let space = " " let newline = "\n" open User open I (* Printing a list of symbols. An optional dot is printed at offset [i] into the list [symbols], if this offset lies between [0] and the length of the list (included). *) let rec print_symbols i symbols = if i = 0 then begin print dot; print space; print_symbols (-1) symbols end else begin match symbols with | [] -> () | symbol :: symbols -> print_symbol symbol; print space; print_symbols (i - 1) symbols end (* Printing an element as a symbol. *) let print_element_as_symbol element = match element with | Element (s, _, _, _) -> print_symbol (X (incoming_symbol s)) (* Some of the functions that follow need an element printer. They use [print_element] if provided by the user; otherwise they use [print_element_as_symbol]. *) let print_element = match print_element with | Some print_element -> print_element | None -> print_element_as_symbol (* Printing a stack as a list of symbols. Stack bottom on the left, stack top on the right. *) let rec print_stack env = match top env, pop env with | Some element, Some env -> print_stack env; print space; print_element element | _, _ -> () let print_stack env = print_stack env; print newline (* Printing an item. *) let print_item (prod, i) = print_symbol (lhs prod); print arrow; print_symbols i (rhs prod); print newline (* Printing a list of symbols (public version). *) let print_symbols symbols = print_symbols (-1) symbols (* Printing a production (without a dot). *) let print_production prod = print_item (prod, -1) (* Printing the current LR(1) state. *) let print_current_state env = print "Current LR(1) state: "; match top env with | None -> print ""; (* TEMPORARY unsatisfactory *) print newline | Some (Element (current, _, _, _)) -> print (string_of_int (number current)); print newline; List.iter print_item (items current) let print_env env = print_stack env; print_current_state env; print newline end menhir-20171222/src/LinearizedArray.mli0000664000175000017500000000530013217215730020070 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* An array of arrays (of possibly different lengths!) can be ``linearized'', i.e., encoded as a data array (by concatenating all of the little arrays) and an entry array (which contains offsets into the data array). *) type 'a t = (* data: *) 'a array * (* entry: *) int array (* [make a] turns the array of arrays [a] into a linearized array. *) val make: 'a array array -> 'a t (* [read la i j] reads the linearized array [la] at indices [i] and [j]. Thus, [read (make a) i j] is equivalent to [a.(i).(j)]. *) val read: 'a t -> int -> int -> 'a (* [write la i j v] writes the value [v] into the linearized array [la] at indices [i] and [j]. *) val write: 'a t -> int -> int -> 'a -> unit (* [length la] is the number of rows of the array [la]. Thus, [length (make a)] is equivalent to [Array.length a]. *) val length: 'a t -> int (* [row_length la i] is the length of the row at index [i] in the linearized array [la]. Thus, [row_length (make a) i] is equivalent to [Array.length a.(i)]. *) val row_length: 'a t -> int -> int (* [read_row la i] reads the row at index [i], producing a list. Thus, [read_row (make a) i] is equivalent to [Array.to_list a.(i)]. *) val read_row: 'a t -> int -> 'a list (* The following variants read the linearized array via accessors [get_data : int -> 'a] and [get_entry : int -> int]. *) val row_length_via: (* get_entry: *) (int -> int) -> (* i: *) int -> int val read_via: (* get_data: *) (int -> 'a) -> (* get_entry: *) (int -> int) -> (* i: *) int -> (* j: *) int -> 'a val read_row_via: (* get_data: *) (int -> 'a) -> (* get_entry: *) (int -> int) -> (* i: *) int -> 'a list menhir-20171222/src/interpret.mli0000664000175000017500000000357613217215730017034 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module is in charge of handling several command line options, namely [--interpret], [--interpret-error], [--compile-errors], [--compare-errors]. If any of these options is present, the execution of Menhir stops here. *) (* This default error message is produced by [--list-errors] when it creates a [.messages] file, and is recognized by [--compare-errors] when it compares two such files. *) val default_message: string (* [print_messages_item] displays one data item. The item is of the form [nt, sentence, target], which means that beginning at the start symbol [nt], the sentence [sentence] ends in an error in the target state given by [target]. [target] also contains information about which spurious reductions are performed at the end. The display obeys the [.messages] file format. *) open Grammar val print_messages_item: Nonterminal.t * Terminal.t list * ReferenceInterpreter.target -> unit menhir-20171222/src/lexer.mll0000664000175000017500000005174013217215730016136 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) { open Lexing open Parser open Positions open Keyword (* ------------------------------------------------------------------------ *) (* Short-hands. *) let error1 pos = Error.error (Positions.one pos) let error2 lexbuf = Error.error (Positions.two lexbuf.lex_start_p lexbuf.lex_curr_p) (* ------------------------------------------------------------------------ *) (* This wrapper saves the current lexeme start, invokes its argument, and restores it. This allows transmitting better positions to the parser. *) let savestart lexbuf f = let startp = lexbuf.lex_start_p in let token = f lexbuf in lexbuf.lex_start_p <- startp; token (* ------------------------------------------------------------------------ *) (* Overwrites an old character with a new one at a specified offset in a [bytes] buffer. *) let overwrite content offset c1 c2 = assert (Bytes.get content offset = c1); Bytes.set content offset c2 (* ------------------------------------------------------------------------ *) (* Keyword recognition and construction. *) (* A monster is a spot where we have identified a keyword in concrete syntax. We describe a monster as an object with the following methods: *) type monster = { (* The position of the monster. *) pos: Positions.t; (* This method is passed an array of (optional) names for the producers, that is, the elements of the production's right-hand side. It may perform some checks and is allowed to fail. *) check: string option array -> unit; (* This method transforms the keyword (in place) into a conventional OCaml identifier. This is done by replacing '$', '(', and ')' with '_'. Bloody. The arguments are [ofs1] and [content]. [ofs1] is the offset where [content] begins in the source file. *) transform: int -> bytes -> unit; (* This is the keyword, in abstract syntax. *) keyword: keyword option; } (* ------------------------------------------------------------------------ *) (* The [$syntaxerror] monster. *) let syntaxerror pos : monster = let check _ = () and transform ofs1 content = (* [$syntaxerror] is replaced with [(raise _eRR)]. Same length. *) let pos = start_of_position pos in let ofs = pos.pos_cnum - ofs1 in let source = "(raise _eRR)" in Bytes.blit_string source 0 content ofs (String.length source) and keyword = Some SyntaxError in { pos; check; transform; keyword } (* ------------------------------------------------------------------------ *) (* We check that every [$i] is within range. Also, we forbid using [$i] when a producer has been given a name; this is bad style and may be a mistake. (Plus, this simplies our life, as we rewrite [$i] to [_i], and we would have to rewrite it to a different identifier otherwise.) *) let check_dollar pos i producers = if not (0 <= i - 1 && i - 1 < Array.length producers) then Error.error [pos] "$%d refers to a nonexistent symbol." i else producers.(i - 1) |> Option.iter (fun x -> Error.error [pos] "please do not say: $%d. Instead, say: %s." i x ) (* We check that every reference to a producer [x] in a position keyword, such as [$startpos(x)], exists. *) let check_producer pos x producers = if not (List.mem (Some x) (Array.to_list producers)) then Error.error [pos] "%s refers to a nonexistent symbol." x (* ------------------------------------------------------------------------ *) (* The [$i] monster. *) let dollar pos i : monster = let check = check_dollar pos i and transform ofs1 content = (* [$i] is replaced with [_i]. Thus, it is no longer a keyword. *) let pos = start_of_position pos in let ofs = pos.pos_cnum - ofs1 in overwrite content ofs '$' '_' and keyword = None in { pos; check; transform; keyword } (* ------------------------------------------------------------------------ *) (* The position-keyword monster. The most horrible of all. *) let position pos (where : string) (flavor : string) (i : string option) (x : string option) = let none _ = () in let where, ofslpar (* offset of the opening parenthesis, if there is one *) = match where with | "symbolstart" -> WhereSymbolStart, 15 | "start" -> WhereStart, 9 | "end" -> WhereEnd, 7 | _ -> assert false in let () = match where, i, x with | WhereSymbolStart, Some _, _ | WhereSymbolStart, _, Some _ -> Error.error [pos] "$symbolstart%s does not take a parameter." flavor | _, _, _ -> () in let flavor = match flavor with | "pos" -> FlavorPosition | "ofs" -> FlavorOffset | _ -> assert false in let subject, check = match i, x with | Some i, None -> let ii = int_of_string i in (* cannot fail *) if ii = 0 && where = WhereEnd then (* [$endpos($0)] *) Before, none else (* [$startpos($i)] is rewritten to [$startpos(_i)]. *) RightNamed ("_" ^ i), check_dollar pos ii | None, Some x -> (* [$startpos(x)] *) RightNamed x, check_producer pos x | None, None -> (* [$startpos] *) Left, none | Some _, Some _ -> assert false in let transform ofs1 content = let pos = start_of_position pos in let ofs = pos.pos_cnum - ofs1 in overwrite content ofs '$' '_'; let ofslpar = ofs + ofslpar in match i, x with | None, Some x -> overwrite content ofslpar '(' '_'; overwrite content (ofslpar + 1 + String.length x) ')' '_' | Some i, None -> overwrite content ofslpar '(' '_'; overwrite content (ofslpar + 1) '$' '_'; overwrite content (ofslpar + 2 + String.length i) ')' '_' | _, _ -> () in let keyword = Some (Position (subject, where, flavor)) in { pos; check; transform; keyword } (* ------------------------------------------------------------------------ *) (* In an OCaml header, there should be no monsters. This is just a sanity check. *) let no_monsters monsters = match monsters with | [] -> () | monster :: _ -> Error.error [monster.pos] "a Menhir keyword cannot be used in an OCaml header." (* ------------------------------------------------------------------------ *) (* Creates a stretch. *) let mk_stretch pos1 pos2 parenthesize monsters = (* Read the specified chunk of the file. *) let raw_content : string = InputFile.chunk (pos1, pos2) in (* Transform the monsters, if there are any. (This explicit test allows saving one string copy and keeping just one live copy.) *) let content : string = match monsters with | [] -> raw_content | _ :: _ -> let content : bytes = Bytes.of_string raw_content in List.iter (fun monster -> monster.transform pos1.pos_cnum content) monsters; Bytes.unsafe_to_string content in (* Add whitespace so that the column numbers match those of the source file. If requested, add parentheses so that the semantic action can be inserted into other code without ambiguity. *) let content = if parenthesize then (String.make (pos1.pos_cnum - pos1.pos_bol - 1) ' ') ^ "(" ^ content ^ ")" else (String.make (pos1.pos_cnum - pos1.pos_bol) ' ') ^ content in Stretch.({ stretch_filename = InputFile.get_input_file_name(); stretch_linenum = pos1.pos_lnum; stretch_linecount = pos2.pos_lnum - pos1.pos_lnum; stretch_content = content; stretch_raw_content = raw_content; stretch_keywords = Misc.map_opt (fun monster -> monster.keyword) monsters }) (* ------------------------------------------------------------------------ *) (* OCaml's reserved words. *) let reserved = let table = Hashtbl.create 149 in List.iter (fun word -> Hashtbl.add table word ()) [ "and"; "as"; "assert"; "begin"; "class"; "constraint"; "do"; "done"; "downto"; "else"; "end"; "exception"; "external"; "false"; "for"; "fun"; "function"; "functor"; "if"; "in"; "include"; "inherit"; "initializer"; "lazy"; "let"; "match"; "method"; "module"; "mutable"; "new"; "object"; "of"; "open"; "or"; "parser"; "private"; "rec"; "sig"; "struct"; "then"; "to"; "true"; "try"; "type"; "val"; "virtual"; "when"; "while"; "with"; "mod"; "land"; "lor"; "lxor"; "lsl"; "lsr"; "asr"; ]; table } (* ------------------------------------------------------------------------ *) (* Patterns. *) let newline = ('\010' | '\013' | "\013\010") let whitespace = [ ' ' '\t' ';' ] let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222'] let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *) let attributechar = identchar | '.' let poskeyword = '$' (("symbolstart" | "start" | "end") as where) (("pos" | "ofs") as flavor) ( '(' ( '$' (['0'-'9']+ as i) | ((lowercase identchar*) as x)) ')')? let previouserror = "$previouserror" let syntaxerror = "$syntaxerror" (* ------------------------------------------------------------------------ *) (* The lexer. *) rule main = parse | "%token" { TOKEN } | "%type" { TYPE } | "%left" { LEFT } | "%right" { RIGHT } | "%nonassoc" { NONASSOC } | "%start" { START } | "%prec" { PREC } | "%public" { PUBLIC } | "%parameter" { PARAMETER } | "%inline" { INLINE } | "%attribute" { PERCENTATTRIBUTE } | "%on_error_reduce" { ON_ERROR_REDUCE } | "%%" { (* The token [PERCENTPERCENT] carries a stretch that contains everything that follows %% in the input file. This string must be created lazily. The parser decides (based on the context) whether this stretch is needed. If it is indeed needed, then constructing this stretch drives the lexer to the end of the file. *) PERCENTPERCENT (lazy ( let openingpos = lexeme_end_p lexbuf in let closingpos = finish lexbuf in mk_stretch openingpos closingpos false [] )) } | ":" { COLON } | "," { COMMA } | "=" { EQUAL } | "(" { LPAREN } | ")" { RPAREN } | "|" { BAR } | "?" { QUESTION } | "*" { STAR } | "+" { PLUS } | (lowercase identchar *) as id { if Hashtbl.mem reserved id then error2 lexbuf "this is an OCaml reserved word." else LID (with_pos (cpos lexbuf) id) } | (uppercase identchar *) as id { UID (with_pos (cpos lexbuf) id) } | "//" [^ '\010' '\013']* newline (* skip C++ style comment *) | newline { new_line lexbuf; main lexbuf } | whitespace+ { main lexbuf } | "/*" { comment (lexeme_start_p lexbuf) lexbuf; main lexbuf } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; main lexbuf } | "<" { savestart lexbuf (ocamltype (lexeme_end_p lexbuf)) } | "%{" { savestart lexbuf (fun lexbuf -> let openingpos = lexeme_start_p lexbuf in let stretchpos = lexeme_end_p lexbuf in let closingpos, monsters = action true openingpos [] lexbuf in no_monsters monsters; HEADER (mk_stretch stretchpos closingpos false []) ) } | "{" { savestart lexbuf (fun lexbuf -> let openingpos = lexeme_start_p lexbuf in let stretchpos = lexeme_end_p lexbuf in let closingpos, monsters = action false openingpos [] lexbuf in ACTION ( fun (producers : string option array) -> List.iter (fun monster -> monster.check producers) monsters; let stretch = mk_stretch stretchpos closingpos true monsters in Action.from_stretch stretch ) ) } | ('%'? as percent) "[@" (attributechar+ as id) whitespace* { let openingpos = lexeme_start_p lexbuf in let stretchpos = lexeme_end_p lexbuf in let closingpos = attribute openingpos lexbuf in let pos = Positions.lex_join openingpos (lexeme_end_p lexbuf) in let attr = mk_stretch stretchpos closingpos false [] in if percent = "" then (* No [%] sign: this is a normal attribute. *) ATTRIBUTE (Positions.with_pos pos id, attr) else (* A [%] sign is present: this is a grammar-wide attribute. *) GRAMMARATTRIBUTE (Positions.with_pos pos id, attr) } | eof { EOF } | _ { error2 lexbuf "unexpected character(s)." } (* ------------------------------------------------------------------------ *) (* Skip C style comments. *) and comment openingpos = parse | newline { new_line lexbuf; comment openingpos lexbuf } | "*/" { () } | eof { error1 openingpos "unterminated comment." } | _ { comment openingpos lexbuf } (* ------------------------------------------------------------------------ *) (* Collect an O'Caml type delimited by angle brackets. Angle brackets can appear as part of O'Caml function types and variant types, so we must recognize them and *not* treat them as a closing bracket. *) and ocamltype openingpos = parse | "->" | "[>" { ocamltype openingpos lexbuf } | '>' { OCAMLTYPE (Stretch.Declared (mk_stretch openingpos (lexeme_start_p lexbuf) true [])) } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamltype openingpos lexbuf } | newline { new_line lexbuf; ocamltype openingpos lexbuf } | eof { error1 openingpos "unterminated OCaml type." } | _ { ocamltype openingpos lexbuf } (* ------------------------------------------------------------------------ *) (* Collect O'Caml code delimited by curly brackets. The monsters that are encountered along the way are accumulated in the list [monsters]. Nested curly brackets must be properly counted. Nested parentheses are also kept track of, so as to better report errors when they are not balanced. *) and action percent openingpos monsters = parse | '{' { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in action percent openingpos monsters lexbuf } | ("}" | "%}") as delimiter { match percent, delimiter with | true, "%}" | false, "}" -> (* This is the delimiter we were instructed to look for. *) lexeme_start_p lexbuf, monsters | _, _ -> (* This is not it. *) error1 openingpos "unbalanced opening brace." } | '(' { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in action percent openingpos monsters lexbuf } | '$' (['0'-'9']+ as i) { let monster = dollar (cpos lexbuf) (int_of_string i) in action percent openingpos (monster :: monsters) lexbuf } | poskeyword { let monster = position (cpos lexbuf) where flavor i x in action percent openingpos (monster :: monsters) lexbuf } | previouserror { error2 lexbuf "$previouserror is no longer supported." } | syntaxerror { let monster = syntaxerror (cpos lexbuf) in action percent openingpos (monster :: monsters) lexbuf } | '"' { string (lexeme_start_p lexbuf) lexbuf; action percent openingpos monsters lexbuf } | "'" { char lexbuf; action percent openingpos monsters lexbuf } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; action percent openingpos monsters lexbuf } | newline { new_line lexbuf; action percent openingpos monsters lexbuf } | ')' | eof { error1 openingpos "unbalanced opening brace." } | _ { action percent openingpos monsters lexbuf } (* ------------------------------------------------------------------------ *) (* Inside a semantic action, we keep track of nested parentheses, so as to better report errors when they are not balanced. *) and parentheses openingpos monsters = parse | '(' { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in parentheses openingpos monsters lexbuf } | ')' { lexeme_start_p lexbuf, monsters } | '{' { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in parentheses openingpos monsters lexbuf } | '$' (['0'-'9']+ as i) { let monster = dollar (cpos lexbuf) (int_of_string i) in parentheses openingpos (monster :: monsters) lexbuf } | poskeyword { let monster = position (cpos lexbuf) where flavor i x in parentheses openingpos (monster :: monsters) lexbuf } | previouserror { error2 lexbuf "$previouserror is no longer supported." } | syntaxerror { let monster = syntaxerror (cpos lexbuf) in parentheses openingpos (monster :: monsters) lexbuf } | '"' { string (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf } | "'" { char lexbuf; parentheses openingpos monsters lexbuf } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf } | newline { new_line lexbuf; parentheses openingpos monsters lexbuf } | '}' | eof { error1 openingpos "unbalanced opening parenthesis." } | _ { parentheses openingpos monsters lexbuf } (* ------------------------------------------------------------------------ *) (* Collect an attribute payload, which is terminated by a closing square bracket. Nested square brackets must be properly counted. Nested curly brackets and nested parentheses are also kept track of, so as to better report errors when they are not balanced. *) and attribute openingpos = parse | '[' { let _ = attribute (lexeme_start_p lexbuf) lexbuf in attribute openingpos lexbuf } | ']' { lexeme_start_p lexbuf } | '{' { let _, _ = action false (lexeme_start_p lexbuf) [] lexbuf in attribute openingpos lexbuf } | '(' { let _, _ = parentheses (lexeme_start_p lexbuf) [] lexbuf in attribute openingpos lexbuf } | '"' { string (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf } | "'" { char lexbuf; attribute openingpos lexbuf } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf } | newline { new_line lexbuf; attribute openingpos lexbuf } | '}' | ')' | eof { error1 openingpos "unbalanced opening bracket." } | _ { attribute openingpos lexbuf } (* ------------------------------------------------------------------------ *) (* Skip O'Caml comments. Comments can be nested and can contain strings or characters, which must be correctly analyzed. (A string could contain begin-of-comment or end-of-comment sequences, which must be ignored; a character could contain a begin-of-string sequence.) *) and ocamlcomment openingpos = parse | "*)" { () } | "(*" { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf } | '"' { string (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf } | "'" { char lexbuf; ocamlcomment openingpos lexbuf } | newline { new_line lexbuf; ocamlcomment openingpos lexbuf } | eof { error1 openingpos "unterminated OCaml comment." } | _ { ocamlcomment openingpos lexbuf } (* ------------------------------------------------------------------------ *) (* Skip O'Caml strings. *) and string openingpos = parse | '"' { () } | '\\' newline | newline { new_line lexbuf; string openingpos lexbuf } | '\\' _ (* Upon finding a backslash, skip the character that follows, unless it is a newline. Pretty crude, but should work. *) { string openingpos lexbuf } | eof { error1 openingpos "unterminated OCaml string." } | _ { string openingpos lexbuf } (* ------------------------------------------------------------------------ *) (* Skip O'Caml characters. A lone quote character is legal inside a comment, so if we don't recognize the matching closing quote, we simply abandon. *) and char = parse | '\\'? newline "'" { new_line lexbuf } | [^ '\\' '\''] "'" | '\\' _ "'" | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'" | "" { () } (* ------------------------------------------------------------------------ *) (* Read until the end of the file. This is used after finding a %% that marks the end of the grammar specification. We update the current position as we go. This allows us to build a stretch for the postlude. *) and finish = parse | newline { new_line lexbuf; finish lexbuf } | eof { lexeme_start_p lexbuf } | _ { finish lexbuf } menhir-20171222/src/lr1partial.mli0000664000175000017500000000413513217215730017063 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar (* This exception is raised by [Run] if we fail to reach the goal state. This is known to happen in a few pathological cases (e.g., when a shift/reduce conflict is solved in favor of reduction, the only path towards the goal state may disappear). So we report this situation gracefully in the .conflicts file instead of failing abruptly. *) exception Oops module Run (X : sig (* A restricted set of tokens of interest. *) val tokens: TerminalSet.t (* A state of the (merged) LR(1) automaton that we're trying to simulate. *) val goal: Lr1.node end) : sig (* What we are after is a path, in the canonical LR(1) automaton, that leads from some entry node to a node [N] such that (i) [N] has a conflict involving one of the tokens of interest and (ii) [N] corresponds to the goal node, that is, the path that leads to [N] in the canonical LR(1) automaton leads to the goal node in the merged LR(1) automaton. *) val source: Item.t val path: Symbol.t array val goal: Lr0.concretelr1state (* An (arbitrarily chosen) conflict token in the goal state. *) val token: Terminal.t end menhir-20171222/src/keywordExpansion.ml0000664000175000017500000002175013217215730020212 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open UnparameterizedSyntax open Keyword open IL open CodeBits (* [posvar_ keyword] constructs the conventional name of the variable that stands for the position keyword [keyword]. *) let posvar_ = function | Position (subject, where, flavor) -> posvar subject where flavor | _ -> assert false (* [posvar_] should be applied to a position keyword *) (* [symbolstartpos producers i n] constructs an expression which, beginning at index [i], looks for the first non-empty producer and returns its start position. If none is found, this expression returns the end position of the right-hand side. This computation is modeled after the function [Parsing.symbol_start_pos] in OCaml's standard library. *) (* This cascade of [if] constructs could be quite big, and this could be a problem in terms of code size. Fortunately, we can optimize this code by computing, ahead of time, the outcome of certain comparisons. We assume that the lexer never produces a token whose start and end positions are the same. There follows that a non-nullable symbol cannot have the same start and end positions. Conversely, a symbol that generates (a subset of) the language {epsilon} must have the same start and end positions. *) (* Although this code is modeled after [Parsing.symbol_start_pos], we compare positions using physical equality, whereas they use structural equality. If for some reason a symbol has start and end positions that are structurally equal but physically different, then a difference will be observable. However, this is very unlikely. It would mean that a token has the same start and end positions (and furthermore, this position has been re-allocated). *) (* The reason why we expand [$symbolstartpos] away prior to inlining is that we want its meaning to be preserved by inlining. If we tried to preserve this keyword through the inlining phase, then (I suppose) we would have to introduce a family of keywords [$symbolstartpos(i, j)], computing over the interval from [i] to [j], and the preservation would not be exact -- because a nonempty symbol, once inlined, can be seen to be a sequence of empty and nonempty symbols. *) let rec symbolstartpos ((nullable, epsilon) as analysis) producers i n : IL.expr * KeywordSet.t = if i = n then (* Return [$endpos]. *) let keyword = Position (Left, WhereEnd, FlavorPosition) in EVar (posvar_ keyword), KeywordSet.singleton keyword else (* [symbol] is the symbol that appears in the right-hand side at position i. [x] is the identifier that is bound to it. We generate code that compares [$startpos($i)] and [$endpos($i)]. If they differ, we return [$startpos($i)]. Otherwise, we continue. Furthermore, as noted above, if [symbol] is not nullable, then we know that the start and end positions must differ, so we optimize this case. *) let producer = List.nth producers i in let symbol = producer_symbol producer and x = producer_identifier producer in let startp = Position (RightNamed x, WhereStart, FlavorPosition) and endp = Position (RightNamed x, WhereEnd, FlavorPosition) in if not (nullable symbol) then (* The start and end positions must differ. *) EVar (posvar_ startp), KeywordSet.singleton startp else let continue, keywords = symbolstartpos analysis producers (i + 1) n in if epsilon symbol then (* The start and end positions must be the same. *) continue, keywords else (* In the general case, a runtime test is required. *) EIfThenElse ( EApp (EVar "Pervasives.(!=)", [ EVar (posvar_ startp); EVar (posvar_ endp) ]), EVar (posvar_ startp), continue ), KeywordSet.add startp (KeywordSet.add endp keywords) (* [define keyword1 f keyword2] macro-expands [keyword1] as [f(keyword2)], where [f] is a function of expressions to expressions. *) let define keyword1 f keyword2 = Action.define keyword1 (KeywordSet.singleton keyword2) (mlet [ PVar (posvar_ keyword1) ] [ f (EVar (posvar_ keyword2)) ]) (* An [ofs] keyword is expanded away. It is defined in terms of the corresponding [pos] keyword. *) let expand_ofs keyword action = match keyword with | Position (subject, where, FlavorOffset) -> define keyword (fun e -> ERecordAccess (e, "Lexing.pos_cnum")) (Position (subject, where, FlavorPosition)) action | _ -> action (* [$symbolstartpos] is expanded into a cascade of [if] constructs, modeled after [Parsing.symbol_start_pos]. *) let expand_symbolstartpos analysis producers n keyword action = match keyword with | Position (Left, WhereSymbolStart, FlavorPosition) -> let expansion, keywords = symbolstartpos analysis producers 0 n in Action.define keyword keywords (mlet [ PVar (posvar_ keyword) ] [ expansion ]) action | Position (RightNamed _, WhereSymbolStart, FlavorPosition) -> (* [$symbolstartpos(x)] does not exist. *) assert false | _ -> action (* [$startpos] and [$endpos] are expanded away. *) let expand_startend producers n keyword action = match keyword with | Position (Left, WhereStart, flavor) -> (* [$startpos] is defined as [$startpos($1)] if this production has nonzero length and [$endpos($0)] otherwise. *) define keyword (fun e -> e) ( if n > 0 then let x = producer_identifier (List.hd producers) in Position (RightNamed x, WhereStart, flavor) else Position (Before, WhereEnd, flavor) ) action | Position (Left, WhereEnd, flavor) -> (* [$endpos] is defined as [$endpos($n)] if this production has nonzero length and [$endpos($0)] otherwise. *) define keyword (fun e -> e) ( if n > 0 then let x = producer_identifier (List.hd (List.rev producers)) in Position (RightNamed x, WhereEnd, flavor) else Position (Before, WhereEnd, flavor) ) action | _ -> action (* [expand_round] performs one round of expansion on [action], using [f] as a rewriting rule. *) let expand_round f action = KeywordSet.fold f (Action.keywords action) action (* [expand_action] performs macro-expansion in [action]. We do this in several rounds: first, expand the [ofs] keywords away; then, expand [symbolstart] away; then, expand the rest. We do this in this order because each round can cause new keywords to appear, which must eliminated by the following rounds. *) let expand_action analysis producers action = let n = List.length producers in (* The [ofs] keyword family is defined in terms of the [pos] family by accessing the [pos_cnum] field. Expand these keywords away first. *) let action = expand_round expand_ofs action in (* Expand [$symbolstartpos] away. *) let action = expand_round (expand_symbolstartpos analysis producers n) action in (* Then, expand away the non-[ofs] keywords. *) let action = expand_round (expand_startend producers n) action in action (* Silently analyze the grammar so as to find out which symbols are nullable and which symbols generate a subset of {epsilon}. This is used to optimize the expansion of $symbolstartpos. *) let analysis grammar = let module G = GrammarFunctor.Make(struct let grammar = grammar let verbose = false end) in let lookup (nt : Syntax.symbol) : G.Symbol.t = try G.Symbol.lookup nt with Not_found -> assert false in let nullable nt : bool = G.Analysis.nullable_symbol (lookup nt) and epsilon nt : bool = G.TerminalSet.is_empty (G.Analysis.first_symbol (lookup nt)) in nullable, epsilon (* Put everything together. *) let expand_branch analysis branch = { branch with action = expand_action analysis branch.producers branch.action } let expand_rule analysis rule = { rule with branches = List.map (expand_branch analysis) rule.branches } let expand_grammar grammar = let analysis = analysis grammar in { grammar with rules = StringMap.map (expand_rule analysis) grammar.rules } menhir-20171222/src/SortUnification.ml0000664000175000017500000000762513217215730017766 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module implements sort inference. *) (* -------------------------------------------------------------------------- *) (* The syntax of sorts is: sort ::= (sort, ..., sort) -> * where the arity (the number of sorts on the left-hand side of the arrow) can be zero. *) module S = struct type 'a structure = | Arrow of 'a list let map f (Arrow xs) = Arrow (List.map f xs) let iter f (Arrow xs) = List.iter f xs exception Iter2 let iter2 f (Arrow xs1) (Arrow xs2) = let n1 = List.length xs1 and n2 = List.length xs2 in if n1 = n2 then List.iter2 f xs1 xs2 else raise Iter2 end include S (* -------------------------------------------------------------------------- *) (* Instantiate the unification algorithm with the above signature. *) include Unifier.Make(S) type sort = term = | TVar of int | TNode of sort structure (* -------------------------------------------------------------------------- *) (* Sort constructors. *) let arrow (args : variable list) : variable = fresh (Some (Arrow args)) let star : variable = arrow [] let fresh () = fresh None (* Sort accessors. *) let domain (x : variable) : variable list option = match structure x with | Some (Arrow xs) -> Some xs | None -> None (* -------------------------------------------------------------------------- *) (* Converting between sorts and ground sorts. *) let rec ground s = match s with | TVar _ -> (* All variables are replaced with [*]. *) GroundSort.GArrow [] | TNode (Arrow ss) -> GroundSort.GArrow (List.map ground ss) let rec unground (GroundSort.GArrow ss) = TNode (Arrow (List.map unground ss)) (* -------------------------------------------------------------------------- *) (* A name generator for unification variables. *) let make_gensym () : unit -> string = let c = ref 0 in let gensym () = let n = Misc.postincrement c in Printf.sprintf "%c%s" (char_of_int (Char.code 'a' + n mod 26)) (let d = n / 26 in if d = 0 then "" else string_of_int d) in gensym (* A memoized name generator. *) let make_name () : int -> string = let gensym = make_gensym() in Memoize.Int.memoize (fun _x -> gensym()) (* -------------------------------------------------------------------------- *) (* A printer. *) let rec print name (b : Buffer.t) (sort : sort) = match sort with | TVar x -> Printf.bprintf b "%s" (name x) | TNode (S.Arrow []) -> Printf.bprintf b "*" | TNode (S.Arrow (sort :: sorts)) -> (* Always parenthesize the domain, so there is no ambiguity. *) Printf.bprintf b "(%a%a) -> *" (print name) sort (print_comma_sorts name) sorts and print_comma_sorts name b sorts = List.iter (print_comma_sort name b) sorts and print_comma_sort name b sort = Printf.bprintf b ", %a" (print name) sort let print sort : string = let b = Buffer.create 32 in print (make_name()) b sort; Buffer.contents b menhir-20171222/src/IO.ml0000664000175000017500000001065513217215730015152 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* Input-output utilities. *) (* ------------------------------------------------------------------------- *) (* [try/finally] has the same semantics as in Java. *) let try_finally action handler = let result = try action() with e -> handler(); raise e in handler(); result (* ------------------------------------------------------------------------- *) (* [moving_away filename action] moves the file [filename] away (if it exists), performs [action], then moves the file back into place (if it was moved away). *) let moving_away filename action = if Sys.file_exists filename then let newname = filename ^ ".moved_by_menhir" in Sys.rename filename newname; try_finally action (fun () -> Sys.rename newname filename ) else action() (* ------------------------------------------------------------------------- *) (* [with_file filename creation action] creates the file [filename] by running [creation], then runs [action], and ensures that the file is removed in the end. *) let with_file filename creation action = creation(); try_finally action (fun () -> Sys.remove filename) (* ------------------------------------------------------------------------- *) (* [exhaust channel] reads all of the data that's available on [channel]. It does not assume that the length of the data is known ahead of time. It does not close the channel. *) let chunk_size = 16384 let exhaust channel = let buffer = Buffer.create chunk_size in let chunk = Bytes.create chunk_size in let rec loop () = let length = input channel chunk 0 chunk_size in if length = 0 then Buffer.contents buffer else begin Buffer.add_subbytes buffer chunk 0 length; loop() end in loop() (* ------------------------------------------------------------------------- *) (* [invoke command] invokes an external command (which expects no input) and returns its output, if the command succeeds. It returns [None] if the command fails. *) let invoke command = let ic = Unix.open_process_in command in (* 20130911 Be careful to read in text mode, so as to avoid newline translation problems (which would manifest themselves on Windows). *) set_binary_mode_in ic false; let result = exhaust ic in match Unix.close_process_in ic with | Unix.WEXITED 0 -> Some result | _ -> None (* ------------------------------------------------------------------------- *) (* [read_whole_file filename] reads the file [filename] in text mode and returns its contents as a string. *) let read_whole_file filename = (* Open the file in text mode, so that (under Windows) CRLF is converted to LF. This guarantees that one byte is one character and seems to be required in order to report accurate positions. *) let channel = open_in filename in (* The standard library functions [pos_in] and [seek_in] do not work correctly when CRLF conversion is being performed, so we abandon their use. (They were used to go and extract the text of semantic actions.) Instead we load the entire file into memory up front, and work with a string. *) (* The standard library function [in_channel_length] does not work correctly when CRLF conversion is being performed, so we do not use it to read the whole file. And the standard library function [Buffer.add_channel] uses [really_input] internally, so we cannot use it either. Bummer. *) let s = exhaust channel in close_in channel; s menhir-20171222/src/partialGrammar.mli0000664000175000017500000000203313217215730017746 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Syntax val join_partial_grammars : partial_grammar list -> grammar menhir-20171222/src/settings.mli0000664000175000017500000001646513217215730016661 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module parses the command line. *) (* The list of file names that appear on the command line. *) val filenames: string list (* How to deal with the type of tokens. *) type token_type_mode = | TokenTypeAndCode (* produce the definition of the [token] type and code for the parser *) | TokenTypeOnly (* produce the type definition only *) | CodeOnly of string (* produce the code only, by relying on an external token type *) val token_type_mode: token_type_mode (* How to construct the automaton. *) type construction_mode = | ModeCanonical (* --canonical: canonical Knuth LR(1) automaton *) | ModeInclusionOnly (* --no-pager : states are merged when there is an inclusion relationship, default reductions are used *) | ModePager (* normal mode: states are merged as per Pager's criterion, default reductions are used *) | ModeLALR (* --lalr : states are merged as in an LALR generator, i.e. as soon as they have the same LR(0) core *) val construction_mode: construction_mode (* Whether conflicts should be explained. *) val explain: bool (* Whether the automaton should be dumped. *) val dump: bool (* Whether the automaton's construction should be explained (very verbose). *) val follow: bool (* Whether the grammar's dependence graph should be dumped. *) val graph: bool (* Whether tracing instructions should be generated. *) val trace: bool (* Whether one should stop and print the grammar after joining and expanding the grammar. *) type print_mode = | PrintNormal | PrintForOCamlyacc | PrintUnitActions of bool (* if true, declare unit tokens *) type preprocess_mode = | PMNormal (* preprocess and continue *) | PMOnlyPreprocess of print_mode (* preprocess, print grammar, stop *) val preprocess_mode: preprocess_mode (* Whether one should invoke ocamlc in order to infer types for all nonterminals. *) val infer: bool (* Whether one should inline the non terminal definitions marked with the %inline keyword. *) val inline: bool (* Whether and how one should invoke ocamldep in order to compute and display dependencies. *) type ocamldep_mode = | OMNone (* do not invoke ocamldep *) | OMRaw (* invoke ocamldep and echo its raw output *) | OMPostprocess (* invoke ocamldep and postprocess its output *) val depend: ocamldep_mode (* Whether comments should be printed or discarded. *) val comment: bool (* This undocumented flag suppresses prefixing of identifiers with an unlikely prefix in the generated code. This increases the code's readability, but can cause identifiers in semantic actions to be captured. *) val noprefix: bool (* This undocumented flag causes the code to be transformed by [Inline]. It is on by default. *) val code_inlining: bool (* How [ocamlc] and [ocamldep] should be invoked. *) val ocamlc: string val ocamldep: string (* How verbose we should be. *) val logG: int (* diagnostics on the grammar *) val logA: int (* diagnostics on the automaton *) val logC: int (* diagnostics on the generated code *) (* Whether tasks should be timed. *) val timings: bool (* The base name that should be used for the files that we create. This name can contain a path. *) val base: string (* The filename of the standard library. *) val stdlib_filename : string (* Whether Menhir should behave as an interpreter. *) val interpret : bool (* Whether the interpreter should build and display concrete syntax trees. *) val interpret_show_cst : bool (* Whether Menhir should behave as an interpreter, in a special mode where it checks one input sentence, expecting it to trigger an error at the last token, and displays which state was reached. *) val interpret_error : bool (* Whether to use the table-based back-end ([true]) or the code-based back-end ([false]). *) val table : bool (* Whether to generate the inspection API (which requires GADTs, and requires producing more tables). *) val inspection : bool (* Whether to generate a coq description of the grammar and automaton. *) val coq : bool (* Whether the coq description must contain completeness proofs. *) val coq_no_complete : bool (* Whether the coq backend should ignore types and semantic actions. *) val coq_no_actions : bool (* Whether unresolved LR(1) conflicts, useless precedence declarations, productions that are never reduced, etc. should be treated as errors. *) val strict: bool (* This flag causes the exception [Error] should be declared equal to [Parsing.Parse_error]. This is useful when full compatibility with ocamlyacc is desired. In particular, this is used when building Menhir itself, since Menhir is compiled first using ocamlyacc, then using Menhir. *) val fixedexc: bool (* This is a set of tokens which may be unused and about which we should not emit a warning. *) val ignored_unused_tokens: StringSet.t (* This flag supersedes the set [ignored_unused_tokens]. If it is set, then we should not emit a warning about any unused tokens. *) val ignore_all_unused_tokens: bool (* This flag suppresses all warnings about unused precedence levels. *) val ignore_all_unused_precedence_levels: bool (* This flag causes Menhir to produce a list of erroneous input sentences. Enough sentences are computed to produce exactly one error in every state where an error can occur. *) val list_errors: bool (* This flag causes Menhir to read the error message descriptions stored in [filename] and compile them to OCaml code. *) val compile_errors: string option (* If present, this is a pair of .messages files whose contents should be compared. *) val compare_errors: (string * string) option (* This flag causes Menhir to read the error message descriptions stored in [filename] and re-generate the auto-generated comments, which begin with [##]. This allows bringing these comments up to date when the grammar evolves. *) val update_errors: string option (* This flag causes Menhir to read the error message descriptions stored in [filename] and echo the error sentences (and nothing else; no messages, no comments). *) val echo_errors: string option (* This flag causes Menhir to produce a [.cmly] file, which contains a binary-format description of the grammar and automaton. *) val cmly: bool menhir-20171222/src/SortUnification.mli0000664000175000017500000000442413217215730020131 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module implements sort inference. *) (* -------------------------------------------------------------------------- *) (* The syntax of sorts is: sort ::= (sort, ..., sort) -> * where the arity (the number of sorts on the left-hand side of the arrow) can be zero. See [GroundSort]. *) type 'a structure = | Arrow of 'a list type sort = | TVar of int | TNode of sort structure (* -------------------------------------------------------------------------- *) (* Sort unification. *) type variable val star: variable val arrow: variable list -> variable val fresh: unit -> variable (* [domain] is the opposite of [arrow]. If [x] has been unified with an arrow, then [domain x] returns its domain. Otherwise, it returns [None]. Use with caution. *) val domain: variable -> variable list option exception Unify of variable * variable exception Occurs of variable * variable val unify: variable -> variable -> unit (* Once unification is over, a unification variable can be decoded as a sort. *) val decode: variable -> sort (* Grounding a sort replaces all sort variables with the sort [*]. *) val ground: sort -> GroundSort.sort val unground: GroundSort.sort -> sort (* -------------------------------------------------------------------------- *) (* A sort can be printed. *) val print: sort -> string menhir-20171222/src/fancyDriver.ml0000664000175000017500000000563313217215730017117 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The module [Driver] serves to offer a unified API to the parser, which could be produced by either ocamlyacc or Menhir. *) (* This is the Menhir-specific driver. We wish to handle syntax errors in a more ambitious manner, so as to help our end users understand their mistakes. *) open Parser.MenhirInterpreter (* incremental API to our parser *) (* [fail buffer lexbuf s] is invoked if a syntax error is encountered in state [s]. *) let fail buffer lexbuf (s : int) = (* Display a nice error message. In principle, the table found in [ParserMessages] should be complete, so we should obtain a nice message. If [Not_found] is raised, we produce a generic message, which is better than nothing. Note that the OCaml code in [ParserMessages] is auto-generated based on the table in [ParserMessages.messages]. *) let message = try ParserMessages.message s with Not_found -> Printf.sprintf "Unknown syntax error (in state %d).\n" s in (* Show the two tokens between which the error took place. *) let where = MenhirLib.ErrorReports.show InputFile.chunk buffer in (* Hack: remove the final newline, because [Error.error] adds one. *) let message = String.sub message 0 (String.length message - 1) in (* Display our message and die. *) Error.error (Positions.lexbuf lexbuf) "syntax error %s.\n%s" where message (* Same as above, except we expect a checkpoint instead of a state [s]. *) let fail buffer lexbuf checkpoint = match checkpoint with | HandlingError env -> let s = current_state_number env in fail buffer lexbuf s | _ -> assert false (* this cannot happen *) (* The entry point. *) let grammar lexer lexbuf = (* Keep track of the last two tokens in a buffer. *) let buffer, lexer = MenhirLib.ErrorReports.wrap lexer in loop_handle (fun v -> v) (fail buffer lexbuf) (lexer_lexbuf_to_supplier lexer lexbuf) (Parser.Incremental.grammar lexbuf.Lexing.lex_curr_p) menhir-20171222/src/cmly_read.mli0000664000175000017500000000244613217215730016752 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The functor [Read] reads a .cmly file. If the file is unreadable, the exception [Error] is raised. Otherwise, the functor builds a module of type [Cmly_api.GRAMMAR], which gives access to a description of the grammar and automaton. *) exception Error of string module Read (X : sig val filename : string end) : Cmly_api.GRAMMAR menhir-20171222/src/RowDisplacement.ml0000664000175000017500000002142113217215730017734 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* This module compresses a two-dimensional table, where some values are considered insignificant, via row displacement. *) (* This idea reportedly appears in Aho and Ullman's ``Principles of Compiler Design'' (1977). It is evaluated in Tarjan and Yao's ``Storing a Sparse Table'' (1979) and in Dencker, Dürre, and Heuft's ``Optimization of Parser Tables for Portable Compilers'' (1984). *) (* A compressed table is represented as a pair of arrays. The displacement array is an array of offsets into the data array. *) type 'a table = int array * (* displacement *) 'a array (* data *) (* In a natural version of this algorithm, displacements would be greater than (or equal to) [-n]. However, in the particular setting of Menhir, both arrays are intended to be compressed with [PackedIntArray], which does not efficiently support negative numbers. For this reason, we are careful not to produce negative displacements. *) (* In order to avoid producing negative displacements, we simply use the least significant bit as the sign bit. This is implemented by [encode] and [decode] below. *) (* One could also think, say, of adding [n] to every displacement, so as to ensure that all displacements are nonnegative. This would work, but would require [n] to be published, for use by the decoder. *) let encode (displacement : int) : int = if displacement >= 0 then displacement lsl 1 else (-displacement) lsl 1 + 1 let decode (displacement : int) : int = if displacement land 1 = 0 then displacement lsr 1 else -(displacement lsr 1) (* It is reasonable to assume that, as matrices grow large, their density becomes low, i.e., they have many insignificant entries. As a result, it is important to work with a sparse data structure for rows. We internally represent a row as a list of its significant entries, where each entry is a pair of a [j] index and an element. *) type 'a row = (int * 'a) list (* [compress equal insignificant dummy m n t] turns the two-dimensional table [t] into a compressed table. The parameter [equal] is equality of data values. The parameter [wildcard] tells which data values are insignificant, and can thus be overwritten with other values. The parameter [dummy] is used to fill holes in the data array. [m] and [n] are the integer dimensions of the table [t]. *) let compress (equal : 'a -> 'a -> bool) (insignificant : 'a -> bool) (dummy : 'a) (m : int) (n : int) (t : 'a array array) : 'a table = (* Be defensive. *) assert (Array.length t = m); assert begin for i = 0 to m - 1 do assert (Array.length t.(i) = n) done; true end; (* This turns a row-as-array into a row-as-sparse-list. The row is accompanied by its index [i] and by its rank (the number of its significant entries, that is, the length of the row-as-a-list. *) let sparse (i : int) (line : 'a array) : int * int * 'a row (* index, rank, row *) = let rec loop (j : int) (rank : int) (row : 'a row) = if j < 0 then i, rank, row else let x = line.(j) in if insignificant x then loop (j - 1) rank row else loop (j - 1) (1 + rank) ((j, x) :: row) in loop (n - 1) 0 [] in (* Construct an array of all rows, together with their index and rank. *) let rows : (int * int * 'a row) array = (* index, rank, row *) Array.mapi sparse t in (* Sort this array by decreasing rank. This does not have any impact on correctness, but reportedly improves compression. The intuitive idea is that rows with few significant elements are easy to fit, so they should be inserted last, after the problem has become quite constrained by fitting the heavier rows. This heuristic is attributed to Ziegler. *) Array.fast_sort (fun (_, rank1, _) (_, rank2, _) -> compare rank2 rank1 ) rows; (* Allocate a one-dimensional array of displacements. *) let displacement : int array = Array.make m 0 in (* Allocate a one-dimensional, infinite array of values. Indices into this array are written [k]. *) let data : 'a InfiniteArray.t = InfiniteArray.make dummy in (* Determine whether [row] fits at offset [k] within the current [data] array, up to extension of this array. *) (* Note that this check always succeeds when [k] equals the length of the [data] array. Indeed, the loop is then skipped. This property guarantees the termination of the recursive function [fit] below. *) let fits k (row : 'a row) : bool = let d = InfiniteArray.extent data in let rec loop = function | [] -> true | (j, x) :: row -> (* [x] is a significant element. *) (* By hypothesis, [k + j] is nonnegative. If it is greater than or equal to the current length of the data array, stop -- the row fits. *) assert (k + j >= 0); if k + j >= d then true (* We now know that [k + j] is within bounds of the data array. Check whether it is compatible with the element [y] found there. If it is, continue. If it isn't, stop -- the row does not fit. *) else let y = InfiniteArray.get data (k + j) in if insignificant y || equal x y then loop row else false in loop row in (* Find the leftmost position where a row fits. *) (* If the leftmost significant element in this row is at offset [j], then we can hope to fit as far left as [-j] -- so this element lands at offset [0] in the data array. *) (* Note that displacements may be negative. This means that, for insignificant elements, accesses to the data array could fail: they could be out of bounds, either towards the left or towards the right. This is not a problem, as long as [get] is invoked only at significant elements. *) let rec fit k row : int = if fits k row then k else fit (k + 1) row in let fit row = match row with | [] -> 0 (* irrelevant *) | (j, _) :: _ -> fit (-j) row in (* Write [row] at (compatible) offset [k]. *) let rec write k = function | [] -> () | (j, x) :: row -> InfiniteArray.set data (k + j) x; write k row in (* Iterate over the sorted array of rows. Fit and write each row at the leftmost compatible offset. Update the displacement table. *) Array.iter (fun (i, _, row) -> let k = fit row in (* if [row] has leading insignificant elements, then [k] can be negative *) write k row; displacement.(i) <- encode k ) rows; (* Return the compressed tables. *) displacement, InfiniteArray.domain data (* [get ct i j] returns the value found at indices [i] and [j] in the compressed table [ct]. This function call is permitted only if the value found at indices [i] and [j] in the original table is significant -- otherwise, it could fail abruptly. *) (* Together, [compress] and [get] have the property that, if the value found at indices [i] and [j] in an uncompressed table [t] is significant, then [get (compress t) i j] is equal to that value. *) let get (displacement, data) i j = assert (0 <= i && i < Array.length displacement); let k = decode displacement.(i) in assert (0 <= k + j && k + j < Array.length data); (* failure of this assertion indicates an attempt to access an insignificant element that happens to be mapped out of the bounds of the [data] array. *) data.(k + j) (* [getget] is a variant of [get] which only requires read access, via accessors, to the two components of the table. *) let getget get_displacement get_data (displacement, data) i j = let k = decode (get_displacement displacement i) in get_data data (k + j) menhir-20171222/src/LRijkstra.mli0000664000175000017500000000314613217215730016716 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The purpose of this algorithm is to find, for each pair of a state [s] and a terminal symbol [z] such that looking at [z] in state [s] causes an error, a minimal path (starting in some initial state) that actually triggers this error. *) (* The result of this analysis is a [.messages] file. It is written to the standard output channel. No result is returned. *) module Run (X : sig (* If [verbose] is set, produce various messages on [stderr]. *) val verbose: bool (* If [statistics] is defined, it is interpreted as the name of a file to which one line of statistics is appended. *) val statistics: string option end) : sig end menhir-20171222/src/Memoize.ml0000664000175000017500000000402113217215730016236 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) module type MEMOIZER = sig (* A fixed type of keys. *) type key (* A memoization combinator for this type. *) val memoize: (key -> 'a) -> (key -> 'a) end module type IMPERATIVE_MAP = sig (* A type of keys. *) type key (* A type of imperative maps. *) type 'a t (* Creation, insertion, lookup. *) val create: int -> 'a t val add: 'a t -> key -> 'a -> unit val find: 'a t -> key -> 'a end module Make (M : IMPERATIVE_MAP) = struct type key = M.key let memoize (f : key -> 'a) = let table = M.create 127 in fun x -> try M.find table x with Not_found -> let y = f x in M.add table x y; y end module MakeViaMap (O : Map.OrderedType) = Make(struct module M = Map.Make(O) type key = O.t type 'a t = 'a M.t ref let create _ = ref M.empty let add table key data = table := M.add key data !table let find table key = M.find key !table end) module MakeViaHashtbl (H : Hashtbl.HashedType) = Make(Hashtbl.Make(H)) module Int = MakeViaHashtbl(struct type t = int let equal = (=) let hash = Hashtbl.hash end) menhir-20171222/src/segment.mll0000664000175000017500000001132413217215730016453 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This lexer is used to cut an input into segments, delimited by a blank line. (More precisely, by a run of at least one blank line and zero or more comment lines.) It produces a list of segments, where each segment is represented as a pair of positions. It is stand-alone and cannot fail. *) (* The whitespace in between two segments can contain comments, and the user may wish to preserve them. For this reason, we view a run of whitespace as a segment, too, and we accompany each segment with a tag which is either [Segment] or [Whitespace]. The two kinds of segments must alternate in the list that we produce. *) { type tag = | Segment | Whitespace open Lexing } let newline = ('\010' | '\013' | "\013\010") let whitespace = [ ' ' '\t' ';' ] let comment = '#' [^'\010''\013']* newline (* In the idle state, we skip whitespace, newlines and comments (while updating the liner counter). If we reach the end of file, we return the list of all segments found so far. If we reach a non-blank non-comment character, we record its position and switch to the busy state. *) rule idle opening segments = parse | whitespace { idle opening segments lexbuf } | newline { new_line lexbuf; idle opening segments lexbuf } | comment { new_line lexbuf; idle opening segments lexbuf } | eof { let closing = lexbuf.lex_start_p in let segment = Whitespace, opening, closing in let segments = segment :: segments in List.rev segments } | _ { let closing = lexbuf.lex_start_p in let segment = Whitespace, opening, closing in let segments = segment :: segments in let opening = closing in busy segments opening false lexbuf } (* In the busy state, we skip everything, maintaining one bit [just_saw_a_newline], until [just_saw_a_newline] is true and we find a second newline. This marks the end of a segment, and we revert back to the idle state. If we reach the end of file, we consider that this is also the end of a segment. *) and busy segments opening just_saw_a_newline = parse | whitespace { busy segments opening just_saw_a_newline lexbuf } | newline { new_line lexbuf; (* The newline that we just saw is already included in the segment. This one is not included. *) let closing = lexbuf.lex_start_p in if just_saw_a_newline then let segment = Segment, opening, closing in let segments = segment :: segments in let opening = closing in idle opening segments lexbuf else busy segments opening true lexbuf } | eof { let closing = lexbuf.lex_start_p in let segment = Segment, opening, closing in let segments = segment :: segments in List.rev segments } | _ { busy segments opening false lexbuf } { (* This wrapper function reads a file, cuts it into segments, and creates a fresh lexbuf for each segment, taking care to adjust its start position. *) let segment filename : (tag * string * lexbuf) list = let content = IO.read_whole_file filename in let lexbuf = from_string content in lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = filename }; let segments : (tag * position * position) list = idle lexbuf.lex_curr_p [] lexbuf in List.map (fun (tag, startp, endp) -> let start = startp.pos_cnum in let length = endp.pos_cnum - start in let content = String.sub content start length in let lexbuf = from_string content in lexbuf.lex_start_p <- startp; lexbuf.lex_curr_p <- startp; lexbuf.lex_abs_pos <- startp.pos_cnum; (* That was tricky to find out. See [Lexing.engine]. [pos_cnum] is updated based on [buf.lex_abs_pos + buf.lex_curr_pos]. *) tag, content, lexbuf ) segments } menhir-20171222/src/PackedIntArray.mli0000664000175000017500000000534213217215730017652 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* A packed integer array is represented as a pair of an integer [k] and a string [s]. The integer [k] is the number of bits per integer that we use. The string [s] is just an array of bits, which is read in 8-bit chunks. *) (* The ocaml programming language treats string literals and array literals in slightly different ways: the former are statically allocated, while the latter are dynamically allocated. (This is rather arbitrary.) In the context of Menhir's table-based back-end, where compact, immutable integer arrays are needed, ocaml strings are preferable to ocaml arrays. *) type t = int * string (* [pack a] turns an array of integers into a packed integer array. *) (* Because the sign bit is the most significant bit, the magnitude of any negative number is the word size. In other words, [pack] does not achieve any space savings as soon as [a] contains any negative numbers, even if they are ``small''. *) val pack: int array -> t (* [get t i] returns the integer stored in the packed array [t] at index [i]. *) (* Together, [pack] and [get] satisfy the following property: if the index [i] is within bounds, then [get (pack a) i] equals [a.(i)]. *) val get: t -> int -> int (* [get1 t i] returns the integer stored in the packed array [t] at index [i]. It assumes (and does not check) that the array's bit width is [1]. The parameter [t] is just a string. *) val get1: string -> int -> int (* [unflatten1 (n, data) i j] accesses the two-dimensional bitmap represented by [(n, data)] at indices [i] and [j]. The integer [n] is the width of the bitmap; the string [data] is the second component of the packed array obtained by encoding the table as a one-dimensional array. *) val unflatten1: int * string -> int -> int -> int menhir-20171222/src/LowIntegerPriorityQueue.ml0000664000175000017500000001010213217215730021454 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module implements a simple-minded priority queue, under the assumption that priorities are low nonnegative integers. *) module MyArray = ResizableArray module MyStack = ResizableArray type 'a t = { (* A priority queue is represented as a resizable array, indexed by priorities, of stacks (implemented as resizable arrays). There is no a priori bound on the size of the main array -- its size is increased if needed. It is up to the user to use priorities of reasonable magnitude. *) a: 'a MyStack.t MyArray.t; (* Index of lowest nonempty stack, if there is one; or lower (sub-optimal, but safe). If the queue is empty, [best] is arbitrary. *) mutable best: int; (* Current number of elements in the queue. Used in [remove] to stop the search for a nonempty bucket. *) mutable cardinal: int; } let create default = (* Set up the main array so that it initially has 16 priority levels and, whenever new levels are added, each of them is initialized with a fresh empty stack. The dummy stack is never accessed; it is used to fill empty physical slots in the main array. *) let dummy = MyStack.make_ 0 default in let a = MyArray.make 16 dummy (fun _ -> MyStack.make_ 1024 default) in { a; best = 0; cardinal = 0 } let add q x priority = assert (0 <= priority); q.cardinal <- q.cardinal + 1; (* Grow the main array if necessary. *) if MyArray.length q.a <= priority then MyArray.resize q.a (priority + 1); (* Find out which stack we should push into. *) let xs = MyArray.get q.a priority in (* assert (xs != MyArray.default q.a); *) (* Push. *) MyStack.push xs x; (* Decrease [q.best], if necessary, so as not to miss the new element. In the special case of Dijkstra's algorithm or A*, this never happens. *) if priority < q.best then q.best <- priority let is_empty q = q.cardinal = 0 let cardinal q = q.cardinal let rec remove_nonempty q = (* Look for the next nonempty bucket. We know there is one. This may seem inefficient, because it is a linear search. However, in applications where [q.best] never decreases, the cumulated cost of this loop is the maximum priority ever used, which is good. *) let xs = MyArray.get q.a q.best in if MyStack.length xs = 0 then begin (* As noted below, [MyStack.pop] does not physically shrink the stack. When we find that a priority level has become empty, we physically empty it, so as to free the (possibly large) space that it takes up. This strategy is good when the client is Dijkstra's algorithm or A*. *) let dummy = MyArray.default q.a in MyArray.set q.a q.best dummy; q.best <- q.best + 1; remove_nonempty q end else begin q.cardinal <- q.cardinal - 1; Some (MyStack.pop xs) (* Note: [MyStack.pop] does not shrink the physical array underlying the stack. This is good, because we are likely to push new elements into this stack. *) end let remove q = if q.cardinal = 0 then None else remove_nonempty q let rec repeat q f = match remove q with | None -> () | Some x -> f x; repeat q f menhir-20171222/src/Engine.ml0000664000175000017500000010044413217215730016044 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) type position = Lexing.position open EngineTypes (* The LR parsing engine. *) (* This module is used: - at compile time, if so requested by the user, via the --interpret options; - at run time, in the table-based back-end. *) module Make (T : TABLE) = struct (* This propagates type and exception definitions. The functions [number], [production_index], [find_production], too, are defined by this [include] declaration. *) include T type 'a env = (state, semantic_value, token) EngineTypes.env (* ------------------------------------------------------------------------ *) (* The type [checkpoint] represents an intermediate or final result of the parser. See [EngineTypes]. *) (* The type [checkpoint] is presented to the user as a private type (see [IncrementalEngine]). This prevents the user from manufacturing checkpoints (i.e., continuations) that do not make sense. (Such continuations could potentially violate the LR invariant and lead to crashes.) *) (* 2017/03/29 Although [checkpoint] is a private type, we now expose a constructor function, [input_needed]. This function allows manufacturing a checkpoint out of an environment. For this reason, the type [env] must also be parameterized with ['a]. *) type 'a checkpoint = | InputNeeded of 'a env | Shifting of 'a env * 'a env * bool | AboutToReduce of 'a env * production | HandlingError of 'a env | Accepted of 'a | Rejected (* ------------------------------------------------------------------------ *) (* In the code-based back-end, the [run] function is sometimes responsible for pushing a new cell on the stack. This is motivated by code sharing concerns. In this interpreter, there is no such concern; [run]'s caller is always responsible for updating the stack. *) (* In the code-based back-end, there is a [run] function for each state [s]. This function can behave in two slightly different ways, depending on when it is invoked, or (equivalently) depending on [s]. If [run] is invoked after shifting a terminal symbol (or, equivalently, if [s] has a terminal incoming symbol), then [run] discards a token, unless [s] has a default reduction on [#]. (Indeed, in that case, requesting the next token might drive the lexer off the end of the input stream.) If, on the other hand, [run] is invoked after performing a goto transition, or invoked directly by an entry point, then there is nothing to discard. These two cases are reflected in [CodeBackend.gettoken]. Here, the code is structured in a slightly different way. It is up to the caller of [run] to indicate whether to discard a token, via the parameter [please_discard]. This flag is set when [s] is being entered by shifting a terminal symbol and [s] does not have a default reduction on [#]. *) (* The following recursive group of functions are tail recursive, produce a checkpoint of type [semantic_value checkpoint], and cannot raise an exception. A semantic action can raise [Error], but this exception is immediately caught within [reduce]. *) let rec run env please_discard : semantic_value checkpoint = (* Log the fact that we just entered this state. *) if log then Log.state env.current; (* If [please_discard] is set, we discard the current lookahead token and fetch the next one. In order to request a token from the user, we return an [InputNeeded] continuation, which, when invoked by the user, will take us to [discard]. If [please_discard] is not set, we skip this step and jump directly to [check_for_default_reduction]. *) if please_discard then InputNeeded env else check_for_default_reduction env (* [discard env triple] stores [triple] into [env], overwriting the previous token. It is invoked by [offer], which itself is invoked by the user in response to an [InputNeeded] checkpoint. *) and discard env triple = if log then begin let (token, startp, endp) = triple in Log.lookahead_token (T.token2terminal token) startp endp end; let env = { env with error = false; triple } in check_for_default_reduction env and check_for_default_reduction env = (* Examine what situation we are in. This case analysis is analogous to that performed in [CodeBackend.gettoken], in the sub-case where we do not have a terminal incoming symbol. *) T.default_reduction env.current announce_reduce (* there is a default reduction; perform it *) check_for_error_token (* there is none; continue below *) env and check_for_error_token env = (* There is no default reduction. Consult the current lookahead token so as to determine which action should be taken. *) (* Peeking at the first input token, without taking it off the input stream, is done by reading [env.triple]. We are careful to first check [env.error]. *) (* Note that, if [please_discard] was true, then we have just called [discard], so the lookahead token cannot be [error]. *) (* Returning [HandlingError env] is equivalent to calling [error env] directly, except it allows the user to regain control. *) if env.error then begin if log then Log.resuming_error_handling(); HandlingError env end else let (token, _, _) = env.triple in (* We consult the two-dimensional action table, indexed by the current state and the current lookahead token, in order to determine which action should be taken. *) T.action env.current (* determines a row *) (T.token2terminal token) (* determines a column *) (T.token2value token) shift (* shift continuation *) announce_reduce (* reduce continuation *) initiate (* failure continuation *) env (* ------------------------------------------------------------------------ *) (* This function takes care of shift transitions along a terminal symbol. (Goto transitions are taken care of within [reduce] below.) The symbol can be either an actual token or the [error] pseudo-token. *) (* Here, the lookahead token CAN be [error]. *) and shift env (please_discard : bool) (terminal : terminal) (value : semantic_value) (s' : state) = (* Log the transition. *) if log then Log.shift terminal s'; (* Push a new cell onto the stack, containing the identity of the state that we are leaving. *) let (_, startp, endp) = env.triple in let stack = { state = env.current; semv = value; startp; endp; next = env.stack; } in (* Switch to state [s']. *) let new_env = { env with stack; current = s' } in (* Expose the transition to the user. (In principle, we have a choice between exposing the transition before we take it, after we take it, or at some point in between. This affects the number and type of the parameters carried by [Shifting]. Here, we choose to expose the transition after we take it; this allows [Shifting] to carry only three parameters, whose meaning is simple.) *) Shifting (env, new_env, please_discard) (* ------------------------------------------------------------------------ *) (* The function [announce_reduce] stops the parser and returns a checkpoint which allows the parser to be resumed by calling [reduce]. *) (* Only ordinary productions are exposed to the user. Start productions are not exposed to the user. Reducing a start production simply leads to the successful termination of the parser. *) and announce_reduce env (prod : production) = if T.is_start prod then accept env prod else AboutToReduce (env, prod) (* The function [reduce] takes care of reductions. It is invoked by [resume] after an [AboutToReduce] event has been produced. *) (* Here, the lookahead token CAN be [error]. *) (* The production [prod] CANNOT be a start production. *) and reduce env (prod : production) = (* Log a reduction event. *) if log then Log.reduce_or_accept prod; (* Invoke the semantic action. The semantic action is responsible for truncating the stack and pushing a new cell onto the stack, which contains a new semantic value. It can raise [Error]. *) (* If the semantic action terminates normally, it returns a new stack, which becomes the current stack. *) (* If the semantic action raises [Error], we catch it and initiate error handling. *) (* This [match/with/exception] construct requires OCaml 4.02. *) match T.semantic_action prod env with | stack -> (* By our convention, the semantic action has produced an updated stack. The state now found in the top stack cell is the return state. *) (* Perform a goto transition. The target state is determined by consulting the goto table at the return state and at production [prod]. *) let current = T.goto_prod stack.state prod in let env = { env with stack; current } in run env false | exception Error -> initiate env and accept env prod = (* Log an accept event. *) if log then Log.reduce_or_accept prod; (* Extract the semantic value out of the stack. *) let v = env.stack.semv in (* Finish. *) Accepted v (* ------------------------------------------------------------------------ *) (* The following functions deal with errors. *) (* [initiate] initiates or resumes error handling. *) (* Here, the lookahead token CAN be [error]. *) and initiate env = if log then Log.initiating_error_handling(); let env = { env with error = true } in HandlingError env (* [error] handles errors. *) and error env = assert env.error; (* Consult the column associated with the [error] pseudo-token in the action table. *) T.action env.current (* determines a row *) T.error_terminal (* determines a column *) T.error_value error_shift (* shift continuation *) error_reduce (* reduce continuation *) error_fail (* failure continuation *) env and error_shift env please_discard terminal value s' = (* Here, [terminal] is [T.error_terminal], and [value] is [T.error_value]. *) assert (terminal = T.error_terminal && value = T.error_value); (* This state is capable of shifting the [error] token. *) if log then Log.handling_error env.current; shift env please_discard terminal value s' and error_reduce env prod = (* This state is capable of performing a reduction on [error]. *) if log then Log.handling_error env.current; reduce env prod (* Intentionally calling [reduce] instead of [announce_reduce]. It does not seem very useful, and it could be confusing, to expose the reduction steps taken during error handling. *) and error_fail env = (* This state is unable to handle errors. Attempt to pop a stack cell. *) let cell = env.stack in let next = cell.next in if next == cell then (* The stack is empty. Die. *) Rejected else begin (* The stack is nonempty. Pop a cell, updating the current state with that found in the popped cell, and try again. *) let env = { env with stack = next; current = cell.state } in HandlingError env end (* End of the nest of tail recursive functions. *) (* ------------------------------------------------------------------------ *) (* ------------------------------------------------------------------------ *) (* The incremental interface. See [EngineTypes]. *) (* [start s] begins the parsing process. *) let start (s : state) (initial : position) : semantic_value checkpoint = (* Build an empty stack. This is a dummy cell, which is its own successor. Its [next] field WILL be accessed by [error_fail] if an error occurs and is propagated all the way until the stack is empty. Its [endp] field WILL be accessed (by a semantic action) if an epsilon production is reduced when the stack is empty. *) let rec empty = { state = s; (* dummy *) semv = T.error_value; (* dummy *) startp = initial; (* dummy *) endp = initial; next = empty; } in (* Build an initial environment. *) (* Unfortunately, there is no type-safe way of constructing a dummy token. Tokens carry semantic values, which in general we cannot manufacture. This instance of [Obj.magic] could be avoided by adopting a different representation (e.g., no [env.error] field, and an option in the first component of [env.triple]), but I like this representation better. *) let dummy_token = Obj.magic () in let env = { error = false; triple = (dummy_token, initial, initial); (* dummy *) stack = empty; current = s; } in (* Begin parsing. *) (* The parameter [please_discard] here is [true], which means we know that we must read at least one token. This claim relies on the fact that we have ruled out the two special cases where a start symbol recognizes the empty language or the singleton language {epsilon}. *) run env true (* [offer checkpoint triple] is invoked by the user in response to a checkpoint of the form [InputNeeded env]. It checks that [checkpoint] is indeed of this form, and invokes [discard]. *) (* [resume checkpoint] is invoked by the user in response to a checkpoint of the form [AboutToReduce (env, prod)] or [HandlingError env]. It checks that [checkpoint] is indeed of this form, and invokes [reduce] or [error], as appropriate. *) (* In reality, [offer] and [resume] accept an argument of type [semantic_value checkpoint] and produce a checkpoint of the same type. The choice of [semantic_value] is forced by the fact that this is the parameter of the checkpoint [Accepted]. *) (* We change this as follows. *) (* We change the argument and result type of [offer] and [resume] from [semantic_value checkpoint] to ['a checkpoint]. This is safe, in this case, because we give the user access to values of type [t checkpoint] only if [t] is indeed the type of the eventual semantic value for this run. (More precisely, by examining the signatures [INCREMENTAL_ENGINE] and [INCREMENTAL_ENGINE_START], one finds that the user can build a value of type ['a checkpoint] only if ['a] is [semantic_value]. The table back-end goes further than this and produces versions of [start] composed with a suitable cast, which give the user access to a value of type [t checkpoint] where [t] is the type of the start symbol.) *) let offer : 'a . 'a checkpoint -> token * position * position -> 'a checkpoint = function | InputNeeded env -> Obj.magic discard env | _ -> invalid_arg "offer expects InputNeeded" let resume : 'a . 'a checkpoint -> 'a checkpoint = function | HandlingError env -> Obj.magic error env | Shifting (_, env, please_discard) -> Obj.magic run env please_discard | AboutToReduce (env, prod) -> Obj.magic reduce env prod | _ -> invalid_arg "resume expects HandlingError | Shifting | AboutToReduce" (* ------------------------------------------------------------------------ *) (* ------------------------------------------------------------------------ *) (* The traditional interface. See [EngineTypes]. *) (* ------------------------------------------------------------------------ *) (* Wrapping a lexer and lexbuf as a token supplier. *) type supplier = unit -> token * position * position let lexer_lexbuf_to_supplier (lexer : Lexing.lexbuf -> token) (lexbuf : Lexing.lexbuf) : supplier = fun () -> let token = lexer lexbuf in let startp = lexbuf.Lexing.lex_start_p and endp = lexbuf.Lexing.lex_curr_p in token, startp, endp (* ------------------------------------------------------------------------ *) (* The main loop repeatedly handles intermediate checkpoints, until a final checkpoint is obtained. This allows implementing the monolithic interface ([entry]) in terms of the incremental interface ([start], [offer], [handle], [reduce]). *) (* By convention, acceptance is reported by returning a semantic value, whereas rejection is reported by raising [Error]. *) (* [loop] is polymorphic in ['a]. No cheating is involved in achieving this. All of the cheating resides in the types assigned to [offer] and [handle] above. *) let rec loop : 'a . supplier -> 'a checkpoint -> 'a = fun read checkpoint -> match checkpoint with | InputNeeded _ -> (* The parser needs a token. Request one from the lexer, and offer it to the parser, which will produce a new checkpoint. Then, repeat. *) let triple = read() in let checkpoint = offer checkpoint triple in loop read checkpoint | Shifting _ | AboutToReduce _ | HandlingError _ -> (* The parser has suspended itself, but does not need new input. Just resume the parser. Then, repeat. *) let checkpoint = resume checkpoint in loop read checkpoint | Accepted v -> (* The parser has succeeded and produced a semantic value. Return this semantic value to the user. *) v | Rejected -> (* The parser rejects this input. Raise an exception. *) raise Error let entry (s : state) lexer lexbuf : semantic_value = let initial = lexbuf.Lexing.lex_curr_p in loop (lexer_lexbuf_to_supplier lexer lexbuf) (start s initial) (* ------------------------------------------------------------------------ *) (* [loop_handle] stops if it encounters an error, and at this point, invokes its failure continuation, without letting Menhir do its own traditional error-handling (which involves popping the stack, etc.). *) let rec loop_handle succeed fail read checkpoint = match checkpoint with | InputNeeded _ -> let triple = read() in let checkpoint = offer checkpoint triple in loop_handle succeed fail read checkpoint | Shifting _ | AboutToReduce _ -> let checkpoint = resume checkpoint in loop_handle succeed fail read checkpoint | HandlingError _ | Rejected -> (* The parser has detected an error. Invoke the failure continuation. *) fail checkpoint | Accepted v -> (* The parser has succeeded and produced a semantic value. Invoke the success continuation. *) succeed v (* ------------------------------------------------------------------------ *) (* [loop_handle_undo] is analogous to [loop_handle], except it passes a pair of checkpoints to the failure continuation. The first (and oldest) checkpoint is the last [InputNeeded] checkpoint that was encountered before the error was detected. The second (and newest) checkpoint is where the error was detected, as in [loop_handle]. Going back to the first checkpoint can be thought of as undoing any reductions that were performed after seeing the problematic token. (These reductions must be default reductions or spurious reductions.) *) let rec loop_handle_undo succeed fail read (inputneeded, checkpoint) = match checkpoint with | InputNeeded _ -> (* Update the last recorded [InputNeeded] checkpoint. *) let inputneeded = checkpoint in let triple = read() in let checkpoint = offer checkpoint triple in loop_handle_undo succeed fail read (inputneeded, checkpoint) | Shifting _ | AboutToReduce _ -> let checkpoint = resume checkpoint in loop_handle_undo succeed fail read (inputneeded, checkpoint) | HandlingError _ | Rejected -> fail inputneeded checkpoint | Accepted v -> succeed v (* For simplicity, we publish a version of [loop_handle_undo] that takes a single checkpoint as an argument, instead of a pair of checkpoints. We check that the argument is [InputNeeded _], and duplicate it. *) (* The parser cannot accept or reject before it asks for the very first character of input. (Indeed, we statically reject a symbol that generates the empty language or the singleton language {epsilon}.) So, the [start] checkpoint must match [InputNeeded _]. Hence, it is permitted to call [loop_handle_undo] with a [start] checkpoint. *) let loop_handle_undo succeed fail read checkpoint = assert (match checkpoint with InputNeeded _ -> true | _ -> false); loop_handle_undo succeed fail read (checkpoint, checkpoint) (* ------------------------------------------------------------------------ *) let rec shifts checkpoint = match checkpoint with | Shifting (env, _, _) -> (* The parser is about to shift, which means it is willing to consume the terminal symbol that we have fed it. Return the state just before this transition. *) Some env | AboutToReduce _ -> (* The parser wishes to reduce. Just follow. *) shifts (resume checkpoint) | HandlingError _ -> (* The parser fails, which means it rejects the terminal symbol that we have fed it. *) None | InputNeeded _ | Accepted _ | Rejected -> (* None of these cases can arise. Indeed, after a token is submitted to it, the parser must shift, reduce, or signal an error, before it can request another token or terminate. *) assert false let acceptable checkpoint token pos = let triple = (token, pos, pos) in let checkpoint = offer checkpoint triple in match shifts checkpoint with | None -> false | Some _env -> true (* ------------------------------------------------------------------------ *) (* The type ['a lr1state] describes the (non-initial) states of the LR(1) automaton. The index ['a] represents the type of the semantic value associated with the state's incoming symbol. *) (* The type ['a lr1state] is defined as an alias for [state], which itself is usually defined as [int] (see [TableInterpreter]). So, ['a lr1state] is technically a phantom type, but should really be thought of as a GADT whose data constructors happen to be represented as integers. It is presented to the user as an abstract type (see [IncrementalEngine]). *) type 'a lr1state = state (* ------------------------------------------------------------------------ *) (* Stack inspection. *) (* We offer a read-only view of the parser's state as a stream of elements. Each element contains a pair of a (non-initial) state and a semantic value associated with (the incoming symbol of) this state. Note that the type [element] is an existential type. *) (* As of 2017/03/31, the type [stack] and the function [stack] are DEPRECATED. If desired, they could now be implemented outside Menhir, by relying on the functions [top] and [pop]. *) type element = | Element: 'a lr1state * 'a * position * position -> element open General type stack = element stream (* If [current] is the current state and [cell] is the top stack cell, then [stack cell current] is a view of the parser's state as a stream of elements. *) let rec stack cell current : element stream = lazy ( (* The stack is empty iff the top stack cell is its own successor. In that case, the current state [current] should be an initial state (which has no incoming symbol). We do not allow the user to inspect this state. *) let next = cell.next in if next == cell then Nil else (* Construct an element containing the current state [current] as well as the semantic value contained in the top stack cell. This semantic value is associated with the incoming symbol of this state, so it makes sense to pair them together. The state has type ['a state] and the semantic value has type ['a], for some type ['a]. Here, the OCaml type-checker thinks ['a] is [semantic_value] and considers this code well-typed. Outside, we will use magic to provide the user with a way of inspecting states and recovering the value of ['a]. *) let element = Element ( current, cell.semv, cell.startp, cell.endp ) in Cons (element, stack next cell.state) ) let stack env : element stream = stack env.stack env.current (* As explained above, the function [top] allows access to the top stack element only if the stack is nonempty, i.e., only if the current state is not an initial state. *) let top env : element option = let cell = env.stack in let next = cell.next in if next == cell then None else Some (Element (env.current, cell.semv, cell.startp, cell.endp)) (* [equal] compares the stacks for physical equality, and compares the current states via their numbers (this seems cleaner than using OCaml's polymorphic equality). *) (* The two fields that are not compared by [equal], namely [error] and [triple], are overwritten by the function [discard], which handles [InputNeeded] checkpoints. Thus, if [equal env1 env2] holds, then the checkpoints [input_needed env1] and [input_needed env2] are equivalent: they lead the parser to behave in the same way. *) let equal env1 env2 = env1.stack == env2.stack && number env1.current = number env2.current let current_state_number env = number env.current (* ------------------------------------------------------------------------ *) (* Access to the position of the lookahead token. *) let positions { triple = (_, startp, endp); _ } = startp, endp (* ------------------------------------------------------------------------ *) (* Access to information about default reductions. *) (* This can be a function of states, or a function of environments. We offer both. *) (* Instead of a Boolean result, we could return a [production option]. However, we would have to explicitly test whether [prod] is a start production, and in that case, return [None], I suppose. Indeed, we have decided not to expose the start productions. *) let state_has_default_reduction (state : _ lr1state) : bool = T.default_reduction state (fun _env _prod -> true) (fun _env -> false) () let env_has_default_reduction env = state_has_default_reduction env.current (* ------------------------------------------------------------------------ *) (* The following functions work at the level of environments (as opposed to checkpoints). The function [pop] causes the automaton to go back into the past, pretending that the last input symbol has never been read. The function [force_reduction] causes the automaton to re-interpret the past, by recognizing the right-hand side of a production and reducing this production. The function [feed] causes the automaton to progress into the future by pretending that a (terminal or nonterminal) symbol has been read. *) (* The function [feed] would ideally be defined here. However, for this function to be type-safe, the GADT ['a symbol] is needed. For this reason, we move its definition to [InspectionTableInterpreter], where the inspection API is available. *) (* [pop] pops one stack cell. It cannot go wrong. *) let pop (env : 'a env) : 'a env option = let cell = env.stack in let next = cell.next in if next == cell then (* The stack is empty. *) None else (* The stack is nonempty. Pop off one cell. *) Some { env with stack = next; current = cell.state } (* [force_reduction] is analogous to [reduce], except that it does not continue by calling [run env] or [initiate env]. Instead, it returns [env] to the user. *) (* [force_reduction] is dangerous insofar as it executes a semantic action. This semantic action could have side effects: nontermination, state, exceptions, input/output, etc. *) let force_reduction prod (env : 'a env) : 'a env = (* Check if this reduction is permitted. This check is REALLY important. The stack must have the correct shape: that is, it must be sufficiently high, and must contain semantic values of appropriate types, otherwise the semantic action will crash and burn. *) (* We currently check whether the current state is WILLING to reduce this production (i.e., there is a reduction action in the action table row associated with this state), whereas it would be more liberal to check whether this state is CAPABLE of reducing this production (i.e., the stack has an appropriate shape). We currently have no means of performing such a check. *) if not (T.may_reduce env.current prod) then invalid_arg "force_reduction: this reduction is not permitted in this state" else begin (* We do not expose the start productions to the user, so this cannot be a start production. Hence, it has a semantic action. *) assert (not (T.is_start prod)); (* Invoke the semantic action. *) let stack = T.semantic_action prod env in (* Perform a goto transition. *) let current = T.goto_prod stack.state prod in { env with stack; current } end (* The environment manipulation functions -- [pop] and [force_reduction] above, plus [feed] -- manipulate the automaton's stack and current state, but do not affect the automaton's lookahead symbol. When the function [input_needed] is used to go back from an environment to a checkpoint (and therefore, resume normal parsing), the lookahead symbol is clobbered anyway, since the only action that the user can take is to call [offer]. So far, so good. One problem, though, is that this call to [offer] may well place the automaton in a configuration of a state [s] and a lookahead symbol [t] that is normally unreachable. Also, perhaps the state [s] is a state where an input symbol normally is never demanded, so this [InputNeeded] checkpoint is fishy. There does not seem to be a deep problem here, but, when programming an error recovery strategy, one should pay some attention to this issue. Ideally, perhaps, one should use [input_needed] only in a state [s] where an input symbol is normally demanded, that is, a state [s] whose incoming symbol is a terminal symbol and which does not have a default reduction on [#]. *) let input_needed (env : 'a env) : 'a checkpoint = InputNeeded env (* The following functions are compositions of [top] and [pop]. *) let rec pop_many i env = if i = 0 then Some env else match pop env with | None -> None | Some env -> pop_many (i - 1) env let get i env = match pop_many i env with | None -> None | Some env -> top env end menhir-20171222/src/Driver.mli0000664000175000017500000000225613217215730016245 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The module [Driver] serves to offer a unified API to the parser, which could be produced by either ocamlyacc or Menhir. *) val grammar : (Lexing.lexbuf -> Parser.token) -> Lexing.lexbuf -> Syntax.partial_grammar menhir-20171222/src/Engine.mli0000664000175000017500000000312713217215730016215 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) open EngineTypes (* The LR parsing engine. *) module Make (T : TABLE) : ENGINE with type state = T.state and type token = T.token and type semantic_value = T.semantic_value and type production = T.production and type 'a env = (T.state, T.semantic_value, T.token) EngineTypes.env (* We would prefer not to expose the definition of the type [env]. However, it must be exposed because some of the code in the inspection API needs access to the engine's internals; see [InspectionTableInterpreter]. Everything would be simpler if --inspection was always ON, but that would lead to bigger parse tables for everybody. *) menhir-20171222/src/referenceInterpreter.mli0000664000175000017500000000563613217215730021201 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar open Cst (* This reference interpreter animates the LR automaton. It uses the grammar and automaton descriptions, as provided by [Grammar] and [Lr1], as well as the generic LR engine in [MenhirLib.Engine]. *) (* The first parameter to the interpreter is a Boolean flag that tells whether a trace should be produced on the standard error channel. *) (* The interpreter requires a start symbol, a lexer, and a lexing buffer. It either succeeds and produces a concrete syntax tree, or fails. *) val interpret: bool -> Nonterminal.t -> (Lexing.lexbuf -> Terminal.t) -> Lexing.lexbuf -> cst option (* This variant of the reference interpreter is used internally by us. We use it to debug [LRijkstra]. It checks that a sentence leads to a syntax error in the expected state. It is also used by several of the command line options [--interpret-error], [--compile-errors], etc. *) type spurious_reduction = Lr1.node * Production.index type target = Lr1.node * spurious_reduction list type check_error_path_outcome = (* Bad: the input was read past its end. *) | OInputReadPastEnd (* Bad: a syntax error occurred before all of the input was read. *) | OInputNotFullyConsumed (* Bad: the parser unexpectedly accepted (part of) this input. *) | OUnexpectedAccept (* Good: a syntax error occurred after reading the last input token. We report in which state the error took place, as well as a list of spurious reductions. A non-default reduction that takes place after looking at the last input token (i.e., the erroneous token) is spurious. Furthermore, any reduction that takes place after a spurious reduction is itself spurious. We note that a spurious reduction can take place only in a non-canonical LR automaton. *) | OK of target val check_error_path: bool -> (* --trace *) Nonterminal.t -> (* initial non-terminal symbol *) Terminal.t list -> (* input *) check_error_path_outcome menhir-20171222/src/patricia.mli0000664000175000017500000000243013217215730016600 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This is an implementation of Patricia trees, following Chris Okasaki's paper at the 1998 ML Workshop in Baltimore. Both big-endian and little-endian trees are provided. Both sets and maps are implemented on top of Patricia trees. *) module Little : GMap.S with type key = int module Big : GMap.S with type key = int menhir-20171222/src/referenceInterpreter.ml0000664000175000017500000002616013217215730021023 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar open Cst (* ------------------------------------------------------------------------ *) (* Set up all of the information required by the LR engine. Everything is read directly from [Grammar] and [Lr1]. *) module T = struct type state = Lr1.node let number = Lr1.number type token = Terminal.t type terminal = Terminal.t type nonterminal = Nonterminal.t type semantic_value = cst let token2terminal (token : token) : terminal = token let token2value (token : token) : semantic_value = CstTerminal token let error_terminal = Terminal.error let error_value = CstError let foreach_terminal = Terminal.foldx type production = Production.index let production_index = Production.p2i let find_production = Production.i2p let default_reduction (s : state) defred nodefred env = match Default.has_default_reduction s with | Some (prod, _) -> defred env prod | None -> nodefred env let action (s : state) (tok : terminal) value shift reduce fail env = (* Check whether [s] has an outgoing shift transition along [tok]. *) try let s' : state = SymbolMap.find (Symbol.T tok) (Lr1.transitions s) in (* There is such a transition. Return either [ShiftDiscard] or [ShiftNoDiscard], depending on the existence of a default reduction on [#] at [s']. *) match Default.has_default_reduction s' with | Some (_, toks) when TerminalSet.mem Terminal.sharp toks -> shift env false tok value s' | _ -> shift env true tok value s' (* There is no such transition. Look for a reduction. *) with Not_found -> try let prod = Misc.single (TerminalMap.find tok (Lr1.reductions s)) in reduce env prod (* There is no reduction either. Fail. *) with Not_found -> fail env let goto_nt (s : state) (nt : nonterminal) : state = try SymbolMap.find (Symbol.N nt) (Lr1.transitions s) with Not_found -> assert false let goto_prod (s : state) (prod : production) : state = goto_nt s (Production.nt prod) let maybe_goto_nt (s : state) (nt : nonterminal) : state option = try Some (SymbolMap.find (Symbol.N nt) (Lr1.transitions s)) with Not_found -> None open MenhirLib.EngineTypes exception Error (* By convention, a semantic action returns a new stack. It does not affect [env]. *) let is_start = Production.is_start type semantic_action = (state, semantic_value, token) env -> (state, semantic_value) stack let semantic_action (prod : production) : semantic_action = fun env -> assert (not (Production.is_start prod)); (* Reduce. Pop a suffix of the stack, and use it to construct a new concrete syntax tree node. *) let n = Production.length prod in let values : semantic_value array = Array.make n CstError (* dummy *) and startp = ref Lexing.dummy_pos and endp= ref Lexing.dummy_pos and current = ref env.current and stack = ref env.stack in (* We now enter a loop to pop [k] stack cells and (after that) push a new cell onto the stack. *) (* This loop does not update [env.current]. Instead, the state in the newly pushed stack cell will be used (by our caller) as a basis for a goto transition, and [env.current] will be updated (if necessary) then. *) for k = n downto 1 do (* Fetch a semantic value. *) values.(k - 1) <- !stack.semv; (* Pop one cell. The stack must be non-empty. As we pop a cell, change the automaton's current state to the one stored within the cell. (It is sufficient to do this only when [k] is 1, since the last write overwrites any and all previous writes.) If this is the first (last) cell that we pop, update [endp] ([startp]). *) let next = !stack.next in assert (!stack != next); if k = n then begin endp := !stack.endp end; if k = 1 then begin current := !stack.state; startp := !stack.startp end; stack := next done; (* Done popping. *) (* Construct and push a new stack cell. The associated semantic value is a new concrete syntax tree. *) { state = !current; semv = CstNonTerminal (prod, values); startp = !startp; endp = !endp; next = !stack } let may_reduce node prod = Lr1.NodeSet.mem node (Lr1.production_where prod) (* The logging functions that follow are called only if [log] is [true]. *) module Log = struct open Printf let state s = fprintf stderr "State %d:" (Lr1.number s); prerr_newline() let shift tok s' = fprintf stderr "Shifting (%s) to state %d" (Terminal.print tok) (Lr1.number s'); prerr_newline() let reduce_or_accept prod = match Production.classify prod with | Some _ -> fprintf stderr "Accepting"; prerr_newline() | None -> fprintf stderr "Reducing production %s" (Production.print prod); prerr_newline() let lookahead_token tok startp endp = fprintf stderr "Lookahead token is now %s (%d-%d)" (Terminal.print tok) startp.Lexing.pos_cnum endp.Lexing.pos_cnum; prerr_newline() let initiating_error_handling () = fprintf stderr "Initiating error handling"; prerr_newline() let resuming_error_handling () = fprintf stderr "Resuming error handling"; prerr_newline() let handling_error s = fprintf stderr "Handling error in state %d" (Lr1.number s); prerr_newline() end end (* ------------------------------------------------------------------------ *) (* Define a palatable user entry point. *) let interpret log nt lexer lexbuf = (* Instantiate the LR engine. *) let module E = MenhirLib.Engine.Make (struct include T let log = log end) in (* Run it. *) try Some (E.entry (Lr1.entry_of_nt nt) lexer lexbuf) with T.Error -> None (* ------------------------------------------------------------------------ *) (* Another entry point, used internally by [LRijkstra] to check that the sentences that [LRijkstra] produces do lead to an error in the expected state. *) type spurious_reduction = Lr1.node * Production.index type target = Lr1.node * spurious_reduction list type check_error_path_outcome = (* Bad: the input was read past its end. *) | OInputReadPastEnd (* Bad: a syntax error occurred before all of the input was read. *) | OInputNotFullyConsumed (* Bad: the parser unexpectedly accepted (part of) this input. *) | OUnexpectedAccept (* Good: a syntax error occurred after reading the last input token. We report in which state the error took place, as well as a list of spurious reductions. A non-default reduction that takes place after looking at the last input token (i.e., the erroneous token) is spurious. Furthermore, any reduction that takes place after a spurious reduction is itself spurious. We note that a spurious reduction can take place only in a non-canonical LR automaton. *) | OK of target let check_error_path log nt input = (* Instantiate the LR engine. *) let module E = MenhirLib.Engine.Make (struct include T let log = log end) in (* Determine the initial state. *) let entry = Lr1.entry_of_nt nt in (* This function helps extract the current parser state out of [env]. It may become unnecessary if the [Engine] API offers it. *) let current env = (* Peek at the stack. If empty, then we must be in the initial state. *) match E.top env with | None -> entry | Some (E.Element (s, _, _, _)) -> s in (* Set up a function that delivers tokens one by one. *) let input = ref input in let next () = match !input with | [] -> None | t :: ts -> input := ts; Some t in let looking_at_last_token () : bool = !input = [] in (* Run. We wish to stop at the first error (without handling the error in any way) and report in which state the error occurred. A clean way of doing this is to use the incremental API, as follows. The main loop resembles the [loop] function in [Engine]. *) (* Another reason why we write our own loop is that we wish to detect spurious reductions. We accumulate these reductions in [spurious], a (reversed) list of productions. *) let rec loop (checkpoint : cst E.checkpoint) (spurious : spurious_reduction list) = match checkpoint with | E.InputNeeded _ -> begin match next() with | None -> OInputReadPastEnd | Some t -> loop (E.offer checkpoint (t, Lexing.dummy_pos, Lexing.dummy_pos)) spurious end | E.Shifting _ -> loop (E.resume checkpoint) spurious | E.AboutToReduce (env, prod) -> (* If we have requested the last input token and if this is not a default reduction, then this is a spurious reduction. Furthermore, if a spurious reduction has taken place already, then this is also a spurious reduction. *) let spurious = if looking_at_last_token() && not (E.env_has_default_reduction env) || spurious <> [] then (current env, prod) :: spurious else spurious in loop (E.resume checkpoint) spurious | E.HandlingError env -> (* Check that all of the input has been read. Otherwise, the error has occurred sooner than expected. *) if !input = [] then (* Return the current state and the list of spurious reductions. *) OK (current env, List.rev spurious) else OInputNotFullyConsumed | E.Accepted _ -> (* The parser has succeeded. This is unexpected. *) OUnexpectedAccept | E.Rejected -> (* The parser rejects this input. This should not happen; we should observe [HandlingError _] first. *) assert false in loop (E.start entry Lexing.dummy_pos) [] menhir-20171222/src/version.ml0000664000175000017500000000003113217215730016313 0ustar fpottierfpottierlet version = "20171222" menhir-20171222/src/printer.mli0000664000175000017500000000457613217215730016504 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* A pretty-printer for [IL]. *) module Make (X : sig (* This is the channel that is being written to. *) val f: out_channel (* [locate_stretches] controls the way we print OCaml stretches (types and semantic actions). If it is [Some dstfilename], where [dstfilename] is the name of the file that is being written, then we surround stretches with OCaml line number directives of the form # . If it is [None], then we don't. *) (* Providing line number directives allows the OCaml typechecker to report type errors in the .mly file, instead of in the generated .ml / .mli files. Line number directives also affect the dynamic semantics of any [assert] statements contained in semantic actions: when they are provided, the [Assert_failure] exception carries a location in the .mly file. As a general rule of thumb, line number directives should always be provided, except perhaps where we think that they decrease readability (e.g., in a generated .mli file). *) val locate_stretches: string option end) : sig val program: IL.program -> unit val expr: IL.expr -> unit val interface: IL.interface -> unit end (* Common instantiations. In the following two functions, [locate_stretches] is [None], so no line number directives are printed. *) val print_expr: out_channel -> IL.expr -> unit val string_of_expr: IL.expr -> string menhir-20171222/src/back.mli0000664000175000017500000000204713217215730015710 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module drives the back-end. No functionality is offered by this module. *) menhir-20171222/src/conflict.mli0000664000175000017500000000213213217215730016604 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module explains conflicts. Explanations are written to the .conflicts file. No functionality is offered by this module. *) menhir-20171222/src/cst.mli0000664000175000017500000000344113217215730015600 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar (* Concrete syntax trees. *) (* A concrete syntax tree is one of a leaf -- which corresponds to a terminal symbol; a node -- which corresponds to a non-terminal symbol, and whose immediate descendants form an expansion of that symbol; or an error leaf -- which corresponds to a point where the [error] pseudo-token was shifted. *) type cst = | CstTerminal of Terminal.t | CstNonTerminal of Production.index * cst array | CstError (* This is a (mostly) unambiguous printer for concrete syntax trees, in an sexp-like notation. *) val print: out_channel -> cst -> unit (* This is a pretty-printer for concrete syntax trees. The notation is the same as that used by the above printer; the only difference is that the [Pprint] library is used to manage indentation. *) val show: out_channel -> cst -> unit menhir-20171222/src/DependencyGraph.ml0000664000175000017500000000423313217215730017676 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar let print_dependency_graph() = (* Allocate. *) let forward : NonterminalSet.t NonterminalMap.t ref = ref NonterminalMap.empty in let successors nt = try NonterminalMap.find nt !forward with Not_found -> NonterminalSet.empty in (* Populate. *) Production.iter (fun prod -> let nt1 = Production.nt prod and rhs = Production.rhs prod in Array.iter (function | Symbol.T _ -> () | Symbol.N nt2 -> forward := NonterminalMap.add nt1 (NonterminalSet.add nt2 (successors nt1)) !forward ) rhs ); (* Print. *) let module P = Dot.Print (struct type vertex = Nonterminal.t let name nt = Printf.sprintf "nt%d" (Nonterminal.n2i nt) let successors (f : ?style:Dot.style -> label:string -> vertex -> unit) nt = NonterminalSet.iter (fun successor -> f ~label:"" successor ) (successors nt) let iter (f : ?shape:Dot.shape -> ?style:Dot.style -> label:string -> vertex -> unit) = Nonterminal.iter (fun nt -> f ~label:(Nonterminal.print false nt) nt ) end) in let f = open_out (Settings.base ^ ".dot") in P.print f; close_out f menhir-20171222/src/stretch.mli0000664000175000017500000000373513217215730016471 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* A stretch is a fragment of a source file. It holds the file name, the line number, and the line count (that is, the length) of the fragment. These are used to generate line number directives when the fragment is copied to an output file. It also holds the textual content of the fragment, as a string. The [raw_content] field holds the text that was found in the source file, while the [content] field holds the same text after transformation by the lexer (which may substitute keywords, insert padding, insert parentheses, etc.). See [Lexer.mk_stretch] and its various call sites in [Lexer]. *) type t = { stretch_filename : string; stretch_linenum : int; stretch_linecount : int; stretch_raw_content : string; stretch_content : string; stretch_keywords : Keyword.keyword list } (* An OCaml type is either a stretch (if it was found in some source file) or a string (if it was inferred via [Infer]). *) type ocamltype = | Declared of t | Inferred of string menhir-20171222/src/myocamlbuild.ml0000664000175000017500000002547613217215730017333 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Ocamlbuild_plugin open Command (* ---------------------------------------------------------------------------- *) (* This compatibility layer allows us to support both OCaml 4.02 and 4.03, with deprecation errors activated. We define our own copies of certain 4.03 functions. *) module Compatibility = struct module Char = struct let lowercase_ascii c = if (c >= 'A' && c <= 'Z') then Char.chr (Char.code c + 32) else c let uppercase_ascii c = if (c >= 'a' && c <= 'z') then Char.chr (Char.code c - 32) else c end module Bytes = struct include Bytes let apply1 f s = if Bytes.length s = 0 then s else begin let r = Bytes.copy s in Bytes.unsafe_set r 0 (f (Bytes.unsafe_get s 0)); r end let capitalize_ascii s = apply1 Char.uppercase_ascii s let uncapitalize_ascii s = apply1 Char.lowercase_ascii s end module String = struct let capitalize_ascii s = Bytes.unsafe_to_string (Bytes.capitalize_ascii (Bytes.unsafe_of_string s)) let uncapitalize_ascii s = Bytes.unsafe_to_string (Bytes.uncapitalize_ascii (Bytes.unsafe_of_string s)) end end (* ---------------------------------------------------------------------------- *) (* The following rules can be copied into other projects. *) (* ---------------------------------------------------------------------------- *) (* The auxiliary function [lines] reads a file, line by line. *) let lines filename : string list = let c = open_in filename in let lines = ref [] in try while true do lines := input_line c :: !lines done; assert false with End_of_file -> close_in c; List.rev !lines (* The auxiliary function [noncomment] recognizes a non-blank non-comment line. *) let rec noncomment s i n = i < n && match s.[i] with | ' ' | '\t' | '\r' | '\n' -> noncomment s (i + 1) n | '#' -> false | _ -> true let noncomment s = noncomment s 0 (String.length s) (* ---------------------------------------------------------------------------- *) (* If [m] is the name of a module, [cmx m] are the possible names of its [.cmx] file. There are two candidate names, because of OCaml's convention where the first letter of the file name is capitalized to obtain the module name. We do *not* decide between them by accessing the file system, because we do not understand or control when ocamlbuild copies files to the build directory. *) let cmx (m : string) : string list = let candidate = m ^ ".cmx" in [ candidate; Compatibility.String.uncapitalize_ascii candidate ] (* ---------------------------------------------------------------------------- *) (* If there is a file [foo.mlpack], then the modules that are listed in this file are meant to be part of the library [Foo], and should receive the tag [for-pack(Foo)]. ocamlbuild doesn't do this automatically, so we program it. *) (* The argument [basename] should be the basename of the [.mlpack] file. *) let for_pack (basename : string) = let filename = basename ^ ".mlpack" in let modules = List.filter noncomment (lines filename) in let library = Compatibility.String.capitalize_ascii basename in let tags = [ Printf.sprintf "for-pack(%s)" library ] in List.iter (fun m -> List.iter (fun candidate -> tag_file candidate tags ) (cmx m) ) modules (* ---------------------------------------------------------------------------- *) (* The following rules can be copied into other projects. *) (* ---------------------------------------------------------------------------- *) (* This rule generates an .ml file [target] from an .mly file [grammar] and a .messages file [messages]. *) (* If the name of a witness file is passed, it is made an additional dependency. This triggers a separate rule (see below) which performs a completeness check, that is, which checks that the .messages file lists every possible syntax error. *) let compile_errors grammar messages (witness : string list) target = rule "menhir/compile_errors" ~prod:target ~deps:([ grammar; messages ] @ witness) (fun env _ -> let grammar = env grammar in let tags = tags_of_pathname grammar ++ "ocaml" ++ "menhir" in Cmd(S[ !Options.ocamlyacc; (* menhir *) T tags; P grammar; A "--compile-errors"; P (env messages); Sh ">"; Px (env target); ])) (* A generic version of the above rule, with uniform naming. *) let generic_compile_errors (check_completeness : bool) = compile_errors (* sources: *) "%.mly" "%Messages.messages" (* if present, this dependency forces a completeness check: *) (if check_completeness then [ "%Messages.witness" ] else []) (* target: *) "%Messages.ml" (* ---------------------------------------------------------------------------- *) (* This rule generates a .messages file [messages] from an .mly file [grammar]. *) let list_errors grammar messages = rule "produce a list of messages" ~prod:messages ~dep:grammar (fun env _ -> let grammar = env grammar in let tags = tags_of_pathname grammar ++ "ocaml" ++ "menhir" in Cmd(S[ !Options.ocamlyacc; (* menhir *) T tags; P grammar; A "--list-errors"; Sh ">"; Px (env messages); ])) (* ---------------------------------------------------------------------------- *) (* This rule compares the .messages files [messages1] and [messages2]. This is used to ensure complete coverage, i.e., check that every possible error is covered. The file [witness] is used as a witness that the comparison has been carried out. *) let compare_errors grammar messages1 messages2 witness = rule "compare two lists of messages" ~stamp:witness ~deps:[ grammar; messages1; messages2 ] (fun env _ -> let grammar = env grammar in let tags = tags_of_pathname grammar ++ "ocaml" ++ "menhir" in Cmd(S[ !Options.ocamlyacc; (* menhir *) T tags; P grammar; A "--compare-errors"; P (env messages1); A "--compare-errors"; P (env messages2); ])) (* ---------------------------------------------------------------------------- *) (* This rule combines the above two rules and makes sure that the [messages] file is complete, i.e., covers all possible errors. This rule creates a witness file. *) let completeness_check grammar messages witness = (* We need a name for a temporary [.messages] file, which we produce, and which lists all possible errors. *) let complete_messages = grammar ^ ".auto.messages" in (* Use the above two rules. *) list_errors grammar complete_messages; compare_errors grammar complete_messages messages witness (* A generic version of the above rule, with uniform naming. *) let generic_completeness_check () = completeness_check (* sources: *) "%.mly" "%Messages.messages" (* target: *) "%Messages.witness" (* ---------------------------------------------------------------------------- *) (* The following rules and settings are specific to the compilation of Menhir. *) (* ---------------------------------------------------------------------------- *) (* Dealing with the two parsers. *) (* Just for fun, Menhir comes with two parsers for its own input files. One is called [yacc-parser.mly] and is built using [ocamlyacc]. The other is called [fancy-parser.mly] and is built using Menhir. It depends on [standard.mly]. The choice between the two parsers is determined by the presence of the tag [fancy_parser]. *) let fancy () : bool = mark_tag_used "fancy_parser"; Tags.mem "fancy_parser" (tags_of_pathname "") let parser_configuration () = (* Create [parser.mly] by copying the appropriate source file. *) copy_rule "create parser.mly" (* source: *) (if fancy() then "fancy-parser.mly" else "yacc-parser.mly") (* target: *) "parser.mly" ; (* Create [Driver.ml] by copying the appropriate source file. *) copy_rule "create Driver.ml" (* source: *) (if fancy() then "fancyDriver.ml" else "yaccDriver.ml") (* target: *) "Driver.ml" ; (* In the fancy case, use Menhir to generate [parserMessages.ml] based on [parserMessages.messages], which is maintained by hand. Also, check that [parserMessages.messages] covers all possible syntax errors. *) if fancy() then begin generic_compile_errors true; (* We might wish to perform the completeness check only if [Sys.word_size] is at least 64. Indeed, on a 32-bit machine, [menhir --list-errors] is restricted to small grammars. For the moment, this works, because our grammar is small enough. *) generic_completeness_check() end (* ---------------------------------------------------------------------------- *) (* If the tag [sdk] is provided, then the modules listed in [menhirSdk.mlpack] must be built using [for-pack(MenhirSdk)]. Otherwise, we are building Menhir and menhirLib, so the modules listed in [menhirLib.mlpack] must be built using [for-pack(MenhirLib)]. There could be a nonempty intersection between the two, which is why we do not supply both sets of flags at once. *) let sdk () : bool = mark_tag_used "sdk"; Tags.mem "sdk" (tags_of_pathname "") (* ---------------------------------------------------------------------------- *) (* Compilation flags for Menhir. *) let flags () = (* -noassert (if enabled by tag) *) flag ["ocaml"; "compile"; "noassert"] (S [A "-noassert"]); (* nazi warnings *) flag ["ocaml"; "compile"; "my_warnings"] (S[A "-w"; A "@1..60-4-9-41-44-60"]) (* ---------------------------------------------------------------------------- *) (* Define custom compilation rules. *) let () = dispatch (function After_rules -> (* Add our rules after the standard ones. *) parser_configuration(); flags(); if sdk() then for_pack "menhirSdk" else for_pack "menhirLib" | _ -> () ) menhir-20171222/src/interpret.ml0000664000175000017500000006535213217215730016663 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) module I = Invariant (* artificial dependency *) module D = Default (* artificial dependency *) (* --------------------------------------------------------------------------- *) open Grammar open SentenceParserAux (* A delimiter. *) type delimiter = string (* An error message. *) type message = string (* A run is a series of sentences or comments, followed with a delimiter (at least one blank line; comments), followed with an error message. *) type run = located_sentence or_comment list * delimiter * message (* A targeted sentence is a located sentence together with the target into which it leads. A target tells us which state a sentence leads to, as well as which spurious reductions are performed at the end. *) type target = ReferenceInterpreter.target let target2state (s, _spurious) = s type maybe_targeted_sentence = located_sentence * target option type targeted_sentence = located_sentence * target (* A targeted run is a series of targeted sentences or comments together with an error message. *) type maybe_targeted_run = maybe_targeted_sentence or_comment list * delimiter * message type targeted_run = targeted_sentence or_comment list * delimiter * message (* A filtered targeted run is a series of targeted sentences together with an error message. (The comments have been filtered out.) *) type filtered_targeted_run = targeted_sentence list * message (* --------------------------------------------------------------------------- *) (* Display and debugging. *) let print_sentence (nto, terminals) : string = let b = Buffer.create 128 in Option.iter (fun nt -> Printf.bprintf b "%s: " (Nonterminal.print false nt) ) nto; List.iter (fun t -> Printf.bprintf b "%s " (Terminal.print t) ) terminals; Printf.bprintf b "\n"; Buffer.contents b (* --------------------------------------------------------------------------- *) (* [stream] turns a finite list of terminals into a stream of terminals. *) exception EndOfStream let stream (toks : Terminal.t list) : unit -> Terminal.t * Lexing.position * Lexing.position = let toks = ref toks in fun () -> let tok = match !toks with | tok :: more -> (* Take a token off the list, and return it. *) toks := more; tok | [] -> (* The finite list has been exhausted. Here, two plausible behaviors come to mind. The first behavior consists in raising an exception. In that case, we are creating a finite stream, and it is up to the parser to not read past its end. The second behavior consists in returning a designated token. In that case, we are creating an infinite, eventually constant, stream. The choice between these two behaviors is somewhat arbitrary; furthermore, in the second case, the choice of the designated token is arbitrary as well. Here, we adopt the second behavior if and only if the grammar has an EOF token, and we use EOF as the designated token. Again, this is arbitrary, and could be changed in the future. *) match Terminal.eof with | Some eof -> eof | None -> raise EndOfStream in (* For now, return dummy positions. *) tok, Lexing.dummy_pos, Lexing.dummy_pos (* --------------------------------------------------------------------------- *) (* [start sentence] returns the start symbol that we should use to interpret the sentence [sentence]. *) (* If a start symbol was explicitly provided as part of the sentence, we use it. Otherwise, we use the grammar's unique start symbol, if there is one. *) let start poss ((nto, _) : sentence) : Nonterminal.t = match nto with | Some nt -> nt | None -> match ProductionMap.is_singleton Lr1.entry with | None -> Error.error poss "because the grammar has multiple start symbols, each of the\n\ sentences provided on the standard input channel must be of the\n\ form: : *" | Some (prod, _) -> match Production.classify prod with | Some nt -> nt | None -> assert false (* --------------------------------------------------------------------------- *) (* [interpret] interprets a sentence. *) let interpret ((_, toks) as sentence) : unit = let nt = start [] sentence in (* Run the reference interpreter. This can produce a concrete syntax tree ([Some cst]), fail with a parser error ([None]), or fail with a lexer error ([EndOfStream]). *) (* In either case, we produce just one line of output, so it should be clear to the user which outcomes correspond to which sentences (should multiple sentences be supplied). *) begin try match MenhirLib.Convert.Simplified.traditional2revised (ReferenceInterpreter.interpret Settings.trace nt) (stream toks) with | Some cst -> (* Success. *) Printf.printf "ACCEPT"; if Settings.interpret_show_cst then begin print_newline(); Cst.show stdout cst end | None -> (* Parser failure. *) Printf.printf "REJECT" with EndOfStream -> (* Lexer failure. *) Printf.printf "OVERSHOOT" end; print_newline() (* --------------------------------------------------------------------------- *) (* [interpret_error_aux] interprets a sentence, expecting it to end in an error. Failure or success is reported via two continuations. *) let interpret_error_aux log poss ((_, terminals) as sentence) fail succeed = let nt = start poss sentence in let open ReferenceInterpreter in match check_error_path log nt terminals with | OInputReadPastEnd -> fail "no syntax error occurs." | OInputNotFullyConsumed -> fail "a syntax error occurs before the last token is reached." | OUnexpectedAccept -> fail "no syntax error occurs; in fact, this input is accepted." | OK target -> succeed nt terminals target (* --------------------------------------------------------------------------- *) (* This default error message is produced by [--list-errors] when it creates a [.messages] file, and is recognized by [--compare-errors] when it compares two such files. *) let default_message = "\n" (* [print_messages_auto] displays just the sentence and the auto-generated comments. [otarget] may be [None], in which case the auto-generated comment is just a warning that this sentence does not end in an error. *) let print_messages_auto (nt, sentence, otarget) : unit = (* Print the sentence, followed with auto-generated comments. *) print_string (print_sentence (Some nt, sentence)); match (otarget : target option) with | None -> Printf.printf "##\n\ ## WARNING: This sentence does NOT end with a syntax error, as it should.\n\ ##\n" | Some (s', spurious) -> Printf.printf "##\n\ ## Ends in an error in state: %d.\n\ ##\n\ %s##\n" (Lr1.number s') (* [Lr0.print] or [Lr0.print_closure] could be used here. The latter could sometimes be helpful, but is usually intolerably verbose. *) (Lr0.print "## " (Lr1.state s')) ; Printf.printf "## The known suffix of the stack is as follows:\n\ ## %s\n\ ##\n" (Invariant.print (Invariant.stack s')) ; if spurious <> [] then begin Printf.printf "## WARNING: This example involves spurious reductions.\n\ ## This implies that, although the LR(1) items shown above provide an\n\ ## accurate view of the past (what has been recognized so far), they\n\ ## may provide an INCOMPLETE view of the future (what was expected next).\n" ; List.iter (fun (s, prod) -> Printf.printf "## In state %d, spurious reduction of production %s\n" (Lr1.number s) (Production.print prod) ) spurious; Printf.printf "##\n" end (* [print_messages_item] displays one data item. The item is of the form [nt, sentence, target], which means that beginning at the start symbol [nt], the sentence [sentence] ends in an error in the target state given by [target]. [target] also contains information about which spurious reductions are performed at the end. The display obeys the [.messages] file format. *) let print_messages_item (nt, sentence, target) : unit = (* Print the sentence, followed with auto-generated comments. *) print_messages_auto (nt, sentence, Some target); (* Then, print a proposed error message, between two blank lines. *) Printf.printf "\n%s\n" default_message (* --------------------------------------------------------------------------- *) (* [write_run run] writes a run into a new [.messages] file. Manually-written comments are preserved. New auto-generated comments are produced. *) let write_run : maybe_targeted_run or_comment -> unit = function | Thing (sentences_or_comments, delimiter, message) -> (* First, print every sentence and human comment. *) List.iter (fun sentence_or_comment -> match sentence_or_comment with | Thing ((poss, ((_, toks) as sentence)), target) -> let nt = start poss sentence in (* Every sentence is followed with newly generated auto-comments. *) print_messages_auto (nt, toks, target) | Comment c -> print_string c ) sentences_or_comments; (* Then, print the delimiter, which must begin with a blank line and may include comments. *) print_string delimiter; (* Then, print the error message. *) print_string message (* No need for another blank line. It will be printed as part of a separate [Comment]. *) | Comment comments -> (* Must begin with a blank line. *) print_string comments (* --------------------------------------------------------------------------- *) (* [interpret_error] interprets a sentence, expecting it to end in an error. Failure or success is reported on the standard output channel. This is used by [--interpret-error]. *) let fail msg = Error.error [] "%s" msg let succeed nt terminals target = print_messages_item (nt, terminals, target); exit 0 let interpret_error sentence = interpret_error_aux Settings.trace [] sentence fail succeed (* --------------------------------------------------------------------------- *) (* [target_sentence] interprets a (located) sentence, expecting it to end in an error, computes the state in which the error is obtained, and constructs a targeted sentence. *) let target_sentence (signal : Positions.positions -> ('a, out_channel, unit, unit) format4 -> 'a) : located_sentence -> maybe_targeted_sentence = fun (poss, sentence) -> (poss, sentence), interpret_error_aux false poss sentence (* failure: *) (fun msg -> signal poss "this sentence does not end with a syntax error, as it should.\n%s" msg ; None ) (* success: *) (fun _nt _terminals target -> Some target) let target_run_1 signal : run -> maybe_targeted_run = fun (sentences, delimiter, message) -> List.map (or_comment_map (target_sentence signal)) sentences, delimiter, message let target_run_2 : maybe_targeted_run -> targeted_run = fun (sentences, delimiter, message) -> let aux (x, y) = (x, Misc.unSome y) in List.map (or_comment_map aux) sentences, delimiter, message let target_runs : run list -> targeted_run list = fun runs -> (* Interpret all sentences, possibly displaying multiple errors. *) let runs = List.map (target_run_1 Error.signal) runs in (* Abort if an error occurred. *) if Error.errors() then exit 1; (* Remove the options introduced by the first phase above. *) let runs = List.map target_run_2 runs in runs (* --------------------------------------------------------------------------- *) (* [filter_things] filters out the comments in a list of things or comments. *) let filter_things : 'a or_comment list -> 'a list = fun things -> List.flatten (List.map unThing things) (* [filter_run] filters out the comments within a run. *) let filter_run : targeted_run -> filtered_targeted_run = fun (sentences, _, message) -> filter_things sentences, message (* --------------------------------------------------------------------------- *) (* [setup()] returns a function [read] which reads one sentence from the standard input channel. *) let setup () : unit -> sentence option = let open Lexing in let lexbuf = from_channel stdin in lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = "(stdin)" }; let read () = try SentenceParser.optional_sentence SentenceLexer.lex lexbuf with Parsing.Parse_error -> Error.error (Positions.lexbuf lexbuf) "ill-formed input sentence." in read (* --------------------------------------------------------------------------- *) (* If [--interpret] is set, interpret the sentences found on the standard input channel, then stop, without generating a parser. *) (* We read a series of sentences from the standard input channel. To allow interactive use, we interpret each sentence as soon as it is read. *) let () = if Settings.interpret then let read = setup() in while true do match read() with | None -> exit 0 | Some sentence -> interpret sentence done (* --------------------------------------------------------------------------- *) (* If [--interpret-error] is set, interpret one sentence found on the standard input channel, then stop, without generating a parser. *) (* We read just one sentence, confirm that this sentence ends in an error, and (if that is the case) display the number of the state that is reached. *) let () = if Settings.interpret_error then let read = setup() in match read() with | None -> exit 1 (* abnormal: no input *) | Some sentence -> interpret_error sentence (* never returns *) (* --------------------------------------------------------------------------- *) (* Display an informational message about the contents of a [.messages] file. *) let stats (runs : run or_comment list) = (* [s] counts the sample input sentences. [m] counts the error messages. *) let s = ref 0 and m = ref 0 in List.iter (function | Thing (sentences, _, _) -> incr m; List.iter (function | Thing _ -> incr s | Comment _ -> () ) sentences | Comment _ -> () ) runs; Printf.eprintf "Read %d sample input sentences and %d error messages.\n%!" !s !m; runs (* --------------------------------------------------------------------------- *) (* Reading a [.messages] file. *) (* Our life is slightly complicated by the fact that the whitespace between two runs can contain comments, which we wish to preserve when performing [--update-errors]. *) let read_messages filename : run or_comment list = let open Segment in (* Read and segment the file. *) let segments : (tag * string * Lexing.lexbuf) list = segment filename in (* Process the segments, two by two. We expect one segment to contain a non-empty series of sentences, and the next segment to contain free-form text. *) let rec loop accu segments = match segments with | [] -> List.rev accu | (Whitespace, comments, _) :: segments -> loop (Comment comments :: accu) segments | (Segment, _, lexbuf) :: segments -> (* Read a series of located sentences. *) match SentenceParser.entry SentenceLexer.lex lexbuf with | exception Parsing.Parse_error -> Error.error [Positions.cpos lexbuf] "ill-formed sentence." | sentences -> (* In principle, we should now find a segment of whitespace followed with a segment of text. By construction, the two kinds of segments alternate. *) match segments with | (Whitespace, comments, _) :: (Segment, message, _) :: segments -> let run : run = sentences, comments, message in loop (Thing run :: accu) segments | [] | [ _ ] -> Error.error (Positions.one (Lexing.lexeme_end_p lexbuf)) "missing a final message. I may be desynchronized." | (Segment, _, _) :: _ | (Whitespace, _, _) :: (Whitespace, _, _) :: _ -> (* Should not happen, thanks to the alternation between the two kinds of segments. *) assert false in stats (loop [] segments) (* --------------------------------------------------------------------------- *) (* [message_table] converts a list of targeted runs to a table (a mapping) of states to located sentences and messages. Optionally, it can detect that two sentences lead to the same state, and report an error. *) let message_table (detect_redundancy : bool) (runs : filtered_targeted_run list) : (located_sentence * message) Lr1.NodeMap.t = let table = List.fold_left (fun table (sentences_and_states, message) -> List.fold_left (fun table (sentence2, target) -> let s = target2state target in match Lr1.NodeMap.find s table with | sentence1, _ -> if detect_redundancy then Error.signal (fst sentence1 @ fst sentence2) "these sentences both cause an error in state %d." (Lr1.number s); table | exception Not_found -> Lr1.NodeMap.add s (sentence2, message) table ) table sentences_and_states ) Lr1.NodeMap.empty runs in if Error.errors() then exit 1; table (* --------------------------------------------------------------------------- *) (* [compile_runs] converts a list of targeted runs to OCaml code that encodes a mapping of state numbers to error messages. The code is sent to the standard output channel. *) let compile_runs filename (runs : filtered_targeted_run list) : unit = (* We wish to produce a function that maps a state number to a message. By convention, we call this function [message]. *) let name = "message" in let open IL in let open CodeBits in let default = { branchpat = PWildcard; branchbody = eraisenotfound (* The default branch raises an exception, which can be caught by the user, who can then produce a generic error message. *) } in let branches = List.fold_left (fun branches (sentences_and_states, message) -> (* Create an or-pattern for these states. *) let states = List.map (fun (_, target) -> let s = target2state target in pint (Lr1.number s) ) sentences_and_states in (* Map all these states to this message. *) { branchpat = POr states; branchbody = EStringConst message } :: branches ) [ default ] runs in let messagedef = { valpublic = true; valpat = PVar name; valval = EFun ([ PVar "s" ], EMatch (EVar "s", branches)) } in let program = [ SIComment (Printf.sprintf "This file was auto-generated based on \"%s\"." filename); SIComment (Printf.sprintf "Please note that the function [%s] can raise [Not_found]." name); SIValDefs (false, [ messagedef ]); ] in (* Write this program to the standard output channel. *) let module P = Printer.Make (struct let f = stdout let locate_stretches = None end) in P.program program (* --------------------------------------------------------------------------- *) (* If [--compile-errors ] is set, compile the error message descriptions found in file [filename] down to OCaml code, then stop. *) let () = Settings.compile_errors |> Option.iter (fun filename -> (* Read the file. *) let runs : run or_comment list = read_messages filename in (* Drop the comments in between two runs. *) let runs : run list = filter_things runs in (* Convert every sentence to a state number. We signal an error if a sentence does not end in an error, as expected. *) let runs : targeted_run list = target_runs runs in (* Remove comments within the runs. *) let runs : filtered_targeted_run list = List.map filter_run runs in (* Build a mapping of states to located sentences. This allows us to detect if two sentences lead to the same state. *) let _ = message_table true runs in (* In principle, we would like to check whether this set of sentences is complete (i.e., covers all states where an error can arise), but this may be costly -- it requires running [LRijkstra]. Instead, we offer a separate facility for comparing two [.messages] files, one of which can be produced via [--list-errors]. This can be used to ensure completeness. *) (* Now, compile this information down to OCaml code. We wish to produce a function that maps a state number to a message. By convention, we call this function [message]. *) compile_runs filename runs; exit 0 ) (* --------------------------------------------------------------------------- *) (* If two [--compare-errors ] directives are provided, compare the two message descriptions files, and stop. We wish to make sure that every state that appears on the left-hand side appears on the right-hand side as well. *) let () = Settings.compare_errors |> Option.iter (fun (filename1, filename2) -> (* Read and convert both files, as above. *) let runs1 = read_messages filename1 and runs2 = read_messages filename2 in let runs1 = filter_things runs1 and runs2 = filter_things runs2 in let runs1 = target_runs runs1 and runs2 = target_runs runs2 in (* here, it would be OK to ignore errors *) let runs1 = List.map filter_run runs1 and runs2 = List.map filter_run runs2 in let table1 = message_table false runs1 and table2 = message_table false runs2 in (* Check that the domain of [table1] is a subset of the domain of [table2]. *) table1 |> Lr1.NodeMap.iter (fun s ((poss1, _), _) -> if not (Lr1.NodeMap.mem s table2) then Error.signal poss1 "this sentence leads to an error in state %d.\n\ No sentence that leads to this state exists in \"%s\"." (Lr1.number s) filename2 ); (* Check that [table1] is a subset of [table2], that is, for every state [s] in the domain of [table1], [s] is mapped by [table1] and [table2] to the same error message. As an exception, if the message found in [table1] is the default message, then no comparison takes place. This allows using [--list-errors] and [--compare-errors] in conjunction to ensure that a [.messages] file is complete, without seeing warnings about different messages. *) table1 |> Lr1.NodeMap.iter (fun s ((poss1, _), message1) -> if message1 <> default_message then try let (poss2, _), message2 = Lr1.NodeMap.find s table2 in if message1 <> message2 then Error.warning (poss1 @ poss2) "these sentences lead to an error in state %d.\n\ The corresponding messages in \"%s\" and \"%s\" differ." (Lr1.number s) filename1 filename2 with Not_found -> () ); if Error.errors() then exit 1; exit 0 ) (* --------------------------------------------------------------------------- *) (* If [--update-errors ] is set, update the error message descriptions found in file [filename]. The idea is to re-generate the auto-comments, which are marked with ##, while leaving the rest untouched. *) let () = Settings.update_errors |> Option.iter (fun filename -> (* Read the file. *) let runs : run or_comment list = read_messages filename in (* Convert every sentence to a state number. Warn, but do not fail, if a sentence does not end in an error, as it should. *) let runs : maybe_targeted_run or_comment list = List.map (or_comment_map (target_run_1 Error.warning)) runs in (* We might wish to detect if two sentences lead to the same state. We might also wish to detect if this set of sentences is incomplete, and complete it automatically. However, the first task is carried out by [--compile-errors] already, and the second task is carried out by [--list-errors] and [--compare-errors] together. For now, let's try and keep things as simple as possible. The task of [--update-errors] should be to update the auto-generated comments, without failing, and without adding or removing sentences. *) (* Now, write a new [.messages] to the standard output channel, with new auto-generated comments. *) List.iter write_run runs; exit 0 ) (* --------------------------------------------------------------------------- *) (* If [--echo-errors ] is set, echo the error sentences found in file [filename]. Do not echo the error messages or the comments. *) (* In principle, we should able to run this command without even giving an .mly file name on the command line, and without building the automaton. This is not possible at the moment, because our code is organized in too rigid a manner. *) let () = Settings.echo_errors |> Option.iter (fun filename -> (* Read the file. *) let runs : run or_comment list = read_messages filename in (* Echo. *) List.iter (or_comment_iter (fun run -> let (sentences : located_sentence or_comment list), _, _ = run in List.iter (or_comment_iter (fun (_, sentence) -> print_string (print_sentence sentence) )) sentences )) runs; exit 0 ) menhir-20171222/src/lexdep.mll0000664000175000017500000000407113217215730016273 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This code analyzes the output of [ocamldep] and returns the list of [.cmi] files that the [.cmo] file depends on. *) { open Lexing exception Error of string let fail lexbuf = raise (Error (Printf.sprintf "failed to make sense of ocamldep's output (character %d).\n" lexbuf.lex_curr_p.pos_cnum) ) } let newline = ('\n' | '\r' | "\r\n") let whitespace = ( ' ' | '\t' | ('\\' newline) ) let entrychar = [^ '\n' '\r' '\t' ' ' '\\' ':' ] let entry = ((entrychar+ as basename) ".cm" ('i' | 'o' | 'x') as filename) (* [main] recognizes a sequence of lines, where a line consists of an entry, followed by a colon, followed by a list of entries. *) rule main = parse | eof { [] } | entry whitespace* ":" { let bfs = collect [] lexbuf in ((basename, filename), bfs) :: main lexbuf } | _ { fail lexbuf } (* [collect] recognizes a list of entries, separated with spaces and ending in a newline. *) and collect bfs = parse | whitespace+ entry { collect ((basename, filename) :: bfs) lexbuf } | whitespace* newline { bfs } | _ | eof { fail lexbuf } menhir-20171222/src/compressedBitSet.ml0000664000175000017500000001336413217215730020122 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* A compressed (or should we say sparse?) bit set is a list of pairs of integers. The first component of every pair is an index, while the second component is a bit field. The list is sorted by order of increasing indices. *) type t = | N | C of int * int * t type element = int let word_size = Sys.word_size - 1 let empty = N let is_empty = function | N -> true | C _ -> false let add i s = let ioffset = i mod word_size in let iaddr = i - ioffset and imask = 1 lsl ioffset in let rec add = function | N -> (* Insert at end. *) C (iaddr, imask, N) | C (addr, ss, qs) as s -> if iaddr < addr then (* Insert in front. *) C (iaddr, imask, s) else if iaddr = addr then (* Found appropriate cell, update bit field. *) let ss' = ss lor imask in if ss' = ss then s else C (addr, ss', qs) else (* Not there yet, continue. *) let qs' = add qs in if qs == qs' then s else C (addr, ss, qs') in add s let singleton i = add i N let remove i s = let ioffset = i mod word_size in let iaddr = i - ioffset and imask = 1 lsl ioffset in let rec remove = function | N -> N | C (addr, ss, qs) as s -> if iaddr < addr then s else if iaddr = addr then (* Found appropriate cell, update bit field. *) let ss' = ss land (lnot imask) in if ss' = 0 then qs else if ss' = ss then s else C (addr, ss', qs) else (* Not there yet, continue. *) let qs' = remove qs in if qs == qs' then s else C (addr, ss, qs') in remove s let rec fold f s accu = match s with | N -> accu | C (base, ss, qs) -> loop f qs base ss accu and loop f qs i ss accu = if ss = 0 then fold f qs accu else (* One could in principle check whether [ss land 0x3] is zero and if so move to [i + 2] and [ss lsr 2], and similarly for various sizes. In practice, this does not seem to make a measurable difference. *) loop f qs (i + 1) (ss lsr 1) (if ss land 1 = 1 then f i accu else accu) let iter f s = fold (fun x () -> f x) s () let is_singleton s = match s with | C (_, ss, N) -> (* Test whether only one bit is set in [ss]. We do this by turning off the rightmost bit, then comparing to zero. *) ss land (ss - 1) = 0 | C (_, _, C _) | N -> false let cardinal s = fold (fun _ m -> m + 1) s 0 let elements s = fold (fun tl hd -> tl :: hd) s [] let rec subset s1 s2 = match s1, s2 with | N, _ -> true | _, N -> false | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then false else if addr1 = addr2 then if (ss1 land ss2) <> ss1 then false else subset qs1 qs2 else subset s1 qs2 let mem i s = subset (singleton i) s let rec union s1 s2 = match s1, s2 with | N, s | s, N -> s | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then C (addr1, ss1, union qs1 s2) else if addr1 > addr2 then let s = union s1 qs2 in if s == qs2 then s2 else C (addr2, ss2, s) else let ss = ss1 lor ss2 in let s = union qs1 qs2 in if ss == ss2 && s == qs2 then s2 else C (addr1, ss, s) let rec inter s1 s2 = match s1, s2 with | N, _ | _, N -> N | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then inter qs1 s2 else if addr1 > addr2 then inter s1 qs2 else let ss = ss1 land ss2 in let s = inter qs1 qs2 in if ss = 0 then s else if (ss = ss1) && (s == qs1) then s1 else C (addr1, ss, s) exception Found of int let choose s = try iter (fun x -> raise (Found x) ) s; raise Not_found with Found x -> x let rec compare s1 s2 = match s1, s2 with N, N -> 0 | _, N -> 1 | N, _ -> -1 | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 < addr2 then -1 else if addr1 > addr2 then 1 else if ss1 < ss2 then -1 else if ss1 > ss2 then 1 else compare qs1 qs2 let equal s1 s2 = compare s1 s2 = 0 let rec disjoint s1 s2 = match s1, s2 with | N, _ | _, N -> true | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> if addr1 = addr2 then if (ss1 land ss2) = 0 then disjoint qs1 qs2 else false else if addr1 < addr2 then disjoint qs1 s2 else disjoint s1 qs2 menhir-20171222/src/astar.ml0000664000175000017500000002441113217215730015750 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module implements A* search, following Hart, Nilsson, and Raphael (1968). To each visited graph node, the algorithm associates an internal record, carrying various information. For this reason, the algorithm's space complexity is, in the worst case, linear in the size of the graph. The mapping of nodes to internal records is implemented via a hash table, while the converse mapping is direct (via a record field). Nodes that remain to be examined are kept in a priority queue, where the priority of a node is the cost of the shortest known path from the start node to it plus the estimated cost of a path from this node to a goal node. (Lower priority nodes are considered first). It is the use of the second summand that makes A* more efficient than Dijkstra's standard algorithm for finding shortest paths in an arbitrary graph. In fact, when [G.estimate] is the constant zero function, A* coincides with Dijkstra's algorithm. One should note that A* is faster than Dijkstra's algorithm only when a path to some goal node exists. Otherwise, both algorithms explore the entire graph, and have similar time requirements. The priority queue is implemented as an array of doubly linked lists. *) module Make (G : sig (* Graph nodes. *) type node include Hashtbl.HashedType with type t := node (* Edge labels. *) type label (* The source node(s). *) val sources: (node -> unit) -> unit (* [successors n f] presents each of [n]'s successors, in an arbitrary order, to [f], together with the cost of the edge that was followed. *) val successors: node -> (label -> int -> node -> unit) -> unit (* An estimate of the cost of the shortest path from the supplied node to some goal node. For algorithms such as A* and IDA* to find shortest paths, this estimate must be a correct under-approximation of the actual cost. *) val estimate: node -> int end) = struct type cost = int (* Nodes with low priorities are dealt with first. *) type priority = cost (* Paths back to a source (visible by the user). *) type path = | Edge of G.label * path | Source of G.node let rec follow labels path = match path with | Source node -> node, labels | Edge (label, path) -> follow (label :: labels) path let reverse path = follow [] path type inode = { (* Graph node associated with this internal record. *) this: G.node; (* Cost of the best known path from a source node to this node. (ghat) *) mutable cost: cost; (* Estimated cost of the best path from this node to a goal node. (hhat) *) estimate: cost; (* Best known path from a source node to this node. *) mutable path: path; (* Previous node on doubly linked priority list *) mutable prev: inode; (* Next node on doubly linked priority list *) mutable next: inode; (* The node's priority, if the node is in the queue; -1 otherwise *) mutable priority: priority; } (* This auxiliary module maintains a mapping of graph nodes to internal records. *) module M : sig (* Adds a binding to the mapping. *) val add: G.node -> inode -> unit (* Retrieves the internal record for this node. Raises [Not_found] no such record exists. *) val get: G.node -> inode end = struct module H = Hashtbl.Make(struct include G type t = node end) let t = H.create 100003 let add node inode = H.add t node inode let get node = H.find t node end (* This auxiliary module maintains a priority queue of internal records. *) module P : sig (* Adds this node to the queue. *) val add: inode -> priority -> unit (* Adds this node to the queue, or changes its priority, if it already was in the queue. It is assumed, in the second case, that the priority can only decrease. *) val add_or_decrease: inode -> priority -> unit (* Retrieve a node with lowest priority of the queue. *) val get: unit -> inode option end = struct module InfiniteArray = MenhirLib.InfiniteArray (* Array of pointers to the doubly linked lists, indexed by priorities. There is no a priori bound on the size of this array -- its size is increased if needed. It is up to the user to use a graph where paths have reasonable lengths. *) let a = InfiniteArray.make None (* Index of lowest nonempty list, if there is one; or lower (sub-optimal, but safe). If the queue is empty, [best] is arbitrary. *) let best = ref 0 (* Current number of elements in the queue. Used in [get] to stop the search for a nonempty bucket. *) let cardinal = ref 0 (* Adjust node's priority and insert into doubly linked list. *) let add inode priority = assert (0 <= priority); cardinal := !cardinal + 1; inode.priority <- priority; match InfiniteArray.get a priority with | None -> InfiniteArray.set a priority (Some inode); (* Decrease [best], if necessary, so as not to miss the new element. In the special case of A*, this never happens. *) assert (!best <= priority); (* if priority < !best then best := priority *) | Some inode' -> inode.next <- inode'; inode.prev <- inode'.prev; inode'.prev.next <- inode; inode'.prev <- inode (* Takes a node off its doubly linked list. Does not adjust [best], as this is not necessary in order to preserve the invariant. *) let remove inode = cardinal := !cardinal - 1; if inode.next == inode then InfiniteArray.set a inode.priority None else begin InfiniteArray.set a inode.priority (Some inode.next); inode.next.prev <- inode.prev; inode.prev.next <- inode.next; inode.next <- inode; inode.prev <- inode end; inode.priority <- -1 let rec get () = if !cardinal = 0 then None else get_nonempty() and get_nonempty () = (* Look for next nonempty bucket. We know there is one. This may seem inefficient, because it is a linear search. However, in A*, [best] never decreases, so the total cost of this loop is the maximum priority ever used. *) match InfiniteArray.get a !best with | None -> best := !best + 1; get_nonempty() | Some inode as result -> remove inode; result let add_or_decrease inode priority = if inode.priority >= 0 then remove inode; add inode priority end (* Initialization. *) let estimate node = let e = G.estimate node in assert (0 <= e); (* failure means user error *) e let () = G.sources (fun node -> let rec inode = { this = node; cost = 0; estimate = estimate node; path = Source node; prev = inode; next = inode; priority = -1 } in M.add node inode; P.add inode inode.estimate ) (* Access to the search results (after the search is over). *) let distance node = try (M.get node).cost with Not_found -> max_int let path node = (M.get node).path (* let [Not_found] escape if no path was found *) (* Search. *) let rec search f = (* Pick the open node that currently has lowest fhat, that is, lowest estimated distance to a goal node. *) match P.get() with | None -> (* Finished. *) distance, path | Some inode -> let node = inode.this in (* Let the user know about this newly discovered node. *) f (node, inode.path); (* Otherwise, examine its successors. *) G.successors node (fun label edge_cost son -> assert (0 <= edge_cost); (* failure means user error *) (* Determine the cost of the best known path from the start node, through this node, to this son. *) let new_cost = inode.cost + edge_cost in assert (0 <= new_cost); (* failure means overflow *) try let ison = M.get son in if new_cost < ison.cost then begin (* This son has been visited before, but this new path to it is shorter. If it was already open and waiting in the priority queue, increase its priority; otherwise, mark it as open and insert it into the queue. *) let new_fhat = new_cost + ison.estimate in assert (0 <= new_fhat); (* failure means overflow *) P.add_or_decrease ison new_fhat; ison.cost <- new_cost; ison.path <- Edge (label, inode.path) end with Not_found -> (* This son was never visited before. Allocate a new status record for it and mark it as open. *) let rec ison = { this = son; cost = new_cost; estimate = estimate son; path = Edge (label, inode.path); prev = ison; next = ison; priority = -1 } in M.add son ison; let fhat = new_cost + ison.estimate in assert (0 <= fhat); (* failure means overflow *) P.add ison fhat ); search f end menhir-20171222/src/InspectionTableFormat.ml0000664000175000017500000000606513217215730021077 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* This signature defines the format of the tables that are produced (in addition to the tables described in [TableFormat]) when the command line switch [--inspection] is enabled. It is used as an argument to [InspectionTableInterpreter.Make]. *) module type TABLES = sig (* The types of symbols. *) include IncrementalEngine.SYMBOLS (* The type ['a lr1state] describes an LR(1) state. The generated parser defines it internally as [int]. *) type 'a lr1state (* Some of the tables that follow use encodings of (terminal and nonterminal) symbols as integers. So, we need functions that map the integer encoding of a symbol to its algebraic encoding. *) val terminal: int -> xsymbol val nonterminal: int -> xsymbol (* The left-hand side of every production already appears in the signature [TableFormat.TABLES], so we need not repeat it here. *) (* The right-hand side of every production. This a linearized array of arrays of integers, whose [data] and [entry] components have been packed. The encoding of symbols as integers in described in [TableBackend]. *) val rhs: PackedIntArray.t * PackedIntArray.t (* A mapping of every (non-initial) state to its LR(0) core. *) val lr0_core: PackedIntArray.t (* A mapping of every LR(0) state to its set of LR(0) items. Each item is represented in its packed form (see [Item]) as an integer. Thus the mapping is an array of arrays of integers, which is linearized and packed, like [rhs]. *) val lr0_items: PackedIntArray.t * PackedIntArray.t (* A mapping of every LR(0) state to its incoming symbol, if it has one. *) val lr0_incoming: PackedIntArray.t (* A table that tells which non-terminal symbols are nullable. *) val nullable: string (* This is a packed int array of bit width 1. It can be read using [PackedIntArray.get1]. *) (* A two-table dimensional table, indexed by a nonterminal symbol and by a terminal symbol (other than [#]), encodes the FIRST sets. *) val first: int (* width of the bitmap *) * string (* second component of [PackedIntArray.t] *) end menhir-20171222/src/slr.ml0000664000175000017500000001332313217215730015436 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module extends the LR(0) automaton with lookahead information in order to construct an SLR(1) automaton. The lookahead information is obtained by considering the FOLLOW sets. *) (* This construction is not used by Menhir, but can be used to check whether the grammar is in the class SLR(1). This check is performed when the log level [lg] is at least 1. *) open Grammar (* This flag, which is reserved for internal use, causes more information about SLR(1) conflict states to be printed. *) let tell_me_everything = false (* The following function turns an LR(0) state into an SLR(1) state. *) let make_slr_state (s : Lr0.node) : Lr0.concretelr1state = (* Obtain the set of LR(0) items associated with the state [s]. *) let items = Lr0.items s in (* Unfortunately, this set is not closed. We do not have a function that computes the closure of a set of LR(0) items -- we could build one using [Item.Closure], but that would be overkill. So, we first convert this set to a set of LR(1) items, then compute the closure at this level, and finally we turn this LR(1) state into an SLR(1) state by letting the lookahead sets be the FOLLOW sets. This is somewhat ugly and naïve, but seems to work. *) (* Convert this set to a set of LR(1) items. Here, we can use any set of tokens as the lookahead set. We use the empty set. *) let s = Item.Map.lift (fun _item -> TerminalSet.empty) items in (* Compute the LR(1) closure. *) let s = Lr0.closure s in (* We now have an LR(1) state that has the correct set of LR(0) items but phony lookahead information. We convert it into an SLR(1) state by deciding that, for each item, the lookahead set is the FOLLOW set of the symbol that appears on the left-hand side of the item. *) Item.Map.fold (fun item toks accu -> let _, nt, _, _, _ = Item.def item in let follow_nt = Analysis.follow nt in assert (TerminalSet.subset toks follow_nt); (* sanity check *) Item.Map.add item follow_nt accu ) s Item.Map.empty (* Insertion of a new reduce action into the table of reductions. Copied from [Lr1] (boo, hiss). *) let addl prod tok reductions = let prods = try TerminalMap.lookup tok reductions with Not_found -> [] in TerminalMap.add tok (prod :: prods) reductions (* Same thing, for a set of tokens. *) let addl prod toks reductions = TerminalSet.fold (addl prod) toks reductions (* The following function turns a closed LR(1) state into a map of terminal symbols to reduction actions. Copied from a related function in [Lr0]. *) let reductions (s : Lr0.concretelr1state) : Production.index list TerminalMap.t = Item.Map.fold (fun item toks reductions -> match Item.classify item with | Item.Reduce prod -> addl prod toks reductions | Item.Shift _ -> reductions ) s TerminalMap.empty (* The following function turns a closed LR(1) state into a set of shift actions. *) let transitions (s : Lr0.concretelr1state) : TerminalSet.t = Item.Map.fold (fun item _ transitions -> match Item.classify item with | Item.Shift (Symbol.T tok, _) -> TerminalSet.add tok transitions | Item.Shift (Symbol.N _, _) | Item.Reduce _ -> transitions ) s TerminalSet.empty (* This function computes the domain of a terminal map, producing a terminal set. *) let domain (m : 'a TerminalMap.t) : TerminalSet.t = TerminalMap.fold (fun tok _ accu -> TerminalSet.add tok accu ) m TerminalSet.empty (* The following function checks whether a closed LR(1) state is free of conflicts. *) let state_is_ok (s : Lr0.concretelr1state) : bool = let reductions = reductions s and transitions = transitions s in (* Check for shift/reduce conflicts. *) TerminalSet.disjoint transitions (domain reductions) && (* Check for reduce/reduce conflicts. *) TerminalMap.fold (fun _ prods ok -> ok && match prods with | [] | [ _ ] -> true | _ :: _ :: _ -> false ) reductions true (* The following function counts the number of states in the SLR(1) automaton that have a conflict. *) let count_slr_violations () : int = let count = ref 0 in for s = 0 to Lr0.n - 1 do let s = make_slr_state s in if not (state_is_ok s) then begin incr count; if tell_me_everything then Printf.fprintf stderr "The following SLR(1) state has a conflict:\n%s" (Lr0.print_concrete "" s) end done; !count (* At log level 1, indicate whether the grammar is SLR(1). *) let () = Error.logG 1 (fun f -> let count = count_slr_violations() in if count = 0 then Printf.fprintf f "The grammar is SLR(1).\n" else Printf.fprintf f "The grammar is not SLR(1) -- %d states have a conflict.\n" count ) menhir-20171222/src/stringMap.ml0000664000175000017500000000253313217215730016603 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) include Map.Make (String) let cardinal s = fold (fun _ _ x -> x + 1) s 0 let filter pred map = fold (fun key value map -> if pred key value then add key value map else map) map empty let restrict domain map = filter (fun k _ -> StringSet.mem k domain) map let domain map = fold (fun key _ acu -> StringSet.add key acu) map StringSet.empty menhir-20171222/src/misc.ml0000664000175000017500000002261413217215730015574 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) let ( $$ ) x f = f x let unSome = function None -> assert false | Some x -> x let o2s o f = match o with | None -> "" | Some x -> f x let single = function | [ x ] -> x | _ -> assert false let rec mapd f = function | [] -> [] | x :: xs -> let y1, y2 = f x in y1 :: y2 :: mapd f xs let tabulate n f = let a = Array.init n f in Array.get a let tabulateb n f = let a = Array.init n f in Array.get a, Array.fold_left (fun count element -> if element then count + 1 else count ) 0 a (* [tabulatef number fold n dummy f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type ['a] and are mapped by [number] into the range [0..n). [fold] allows folding over the domain of [f]. [dummy] is used to initialize the internal array. Its value has no impact if [fold] is surjective. *) let tabulatef number fold n dummy f = let a = Array.make n dummy in let () = fold (fun () element -> a.(number element) <- f element ) () in let get element = a.(number element) in get let tabulateo number fold n f = let c = ref 0 in let get = tabulatef number fold n None (fun element -> let image = f element in begin match image with | Some _ -> incr c | None -> () end; image ) in get, !c type 'a iter = ('a -> unit) -> unit let separated_iter_to_string printer separator iter = let b = Buffer.create 32 in let first = ref true in iter (fun x -> if !first then begin Buffer.add_string b (printer x); first := false end else begin Buffer.add_string b separator; Buffer.add_string b (printer x) end ); Buffer.contents b let separated_list_to_string printer separator xs = separated_iter_to_string printer separator (fun f -> List.iter f xs) let inverse (a : 'a array) : 'a -> int = let table = Hashtbl.create (Array.length a) in Array.iteri (fun i data -> assert (not (Hashtbl.mem table data)); Hashtbl.add table data i ) a; fun data -> try Hashtbl.find table data with Not_found -> assert false let support_assoc l x = try List.assoc x l with Not_found -> x let index (strings : string list) : int * string array * int StringMap.t = let name = Array.of_list strings and n, map = List.fold_left (fun (n, map) s -> n+1, StringMap.add s n map ) (0, StringMap.empty) strings in n, name, map (* Turning an implicit list, stored using pointers through a hash table, into an explicit list. The head of the implicit list is not included in the explicit list. *) let materialize (table : ('a, 'a option) Hashtbl.t) (x : 'a) : 'a list = let rec loop x = match Hashtbl.find table x with | None -> [] | Some x -> x :: loop x in loop x (* [iteri] implements a [for] loop over integers, from 0 to [n-1]. *) let iteri n f = for i = 0 to n - 1 do f i done (* [foldi] implements a [for] loop over integers, from 0 to [n-1], with an accumulator. [foldij] implements a [for] loop over integers, from [start] to [n-1], with an accumulator. *) let foldij start n f accu = let rec loop i accu = if i = n then accu else loop (i+1) (f i accu) in loop start accu let foldi n f accu = foldij 0 n f accu (* [mapij start n f] produces the list [ f start; ... f (n-1) ]. *) let mapij start n f = List.rev ( foldij start n (fun i accu -> f i :: accu ) [] ) (* [mapi n f] produces the list [ f 0; ... f (n-1) ]. *) let mapi n f = mapij 0 n f (* [qfold f accu q] repeatedly takes an element [x] off the queue [q] and applies [f] to the accumulator and to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. We allocate an option to ensure that [qfold] is tail-recursive. *) let rec qfold f accu q = match try Some (Queue.take q) with Queue.Empty -> None with | Some x -> qfold f (f accu x) q | None -> accu (* [qiter f q] repeatedly takes an element [x] off the queue [q] and applies [f] to [x], until [q] becomes empty. Of course, [f] can add elements to [q] as a side-effect. *) let qiter f q = try while true do f (Queue.take q) done with Queue.Empty -> () let rec smap f = function | [] -> [] | (x :: xs) as l -> let x' = f x and xs' = smap f xs in if x == x' && xs == xs' then l else x' :: xs' let rec smapa f accu = function | [] -> accu, [] | (x :: xs) as l -> let accu, x' = f accu x in let accu, xs' = smapa f accu xs in accu, if x == x' && xs == xs' then l else x' :: xs' let normalize s = let s = Bytes.of_string s in let n = Bytes.length s in for i = 0 to n - 1 do match Bytes.get s i with | '(' | ')' | ',' -> Bytes.set s i '_' | _ -> () done; Bytes.unsafe_to_string s (* [postincrement r] increments [r] and returns its original value. *) let postincrement r = let x = !r in r := x + 1; x (* [map_opt f l] returns the list of [y]s such that [f x = Some y] where [x] is in [l], preserving the order of elements of [l]. *) let map_opt f l = List.(rev (fold_left (fun ys x -> match f x with | None -> ys | Some y -> y :: ys ) [] l)) let new_encode_decode capacity = (* Set up a a hash table, mapping strings to unique integers. *) let module H = Hashtbl.Make(struct type t = string let equal = (=) let hash = Hashtbl.hash end) in let table = H.create capacity in (* Set up a resizable array, mapping integers to strings. *) let text = MenhirLib.InfiniteArray.make "" in (* This counts the calls to [encode]. *) let c = ref 0 in (* A string is mapped to a unique integer, as follows. *) let encode (s : string) : int = c := !c + 1; try H.find table s with Not_found -> (* The number of elements in the hash table is the next available unique integer code. *) let i = H.length table in H.add table s i; MenhirLib.InfiniteArray.set text i s; i (* An integer code can be mapped back to a string, as follows. *) and decode (i : int) : string = MenhirLib.InfiniteArray.get text i and verbose () = Printf.fprintf stderr "%d calls to intern; %d unique strings.\n%!" !c (H.length table) in encode, decode, verbose let new_claim () = let names = ref StringSet.empty in let claim name = if StringSet.mem name !names then Error.error [] "internal name clash over %s" name; names := StringSet.add name !names in claim let rec best (preferable : 'a -> 'a -> bool) (xs : 'a list) : 'a option = match xs with | [] -> (* Special case: no elements at all, so no best element. This case does not participate in the recursion. *) None | [x] -> Some x | x :: xs -> (* If [x] is preferable to every element of [xs], then it is the best element of [x :: xs]. *) if List.for_all (preferable x) xs then Some x else (* [xs] is nonempty, so the recursive call is permitted. *) match best preferable xs with | Some y -> if preferable y x then (* If [y] is the best element of [xs] and [y] is preferable to [x], then [y] is the best element of [x :: xs]. *) Some y else (* There is no best element. *) None | None -> (* There is no best element. *) None let rec levels1 cmp x1 xs = match xs with | [] -> [x1], [] | x2 :: xs -> let ys1, yss = levels1 cmp x2 xs in if cmp x1 x2 = 0 then x1 :: ys1, yss else [x1], ys1 :: yss let levels cmp xs = match xs with | [] -> [] | x1 :: xs -> let ys1, yss = levels1 cmp x1 xs in ys1 :: yss let rec dup1 cmp x ys = match ys with | [] -> None | y :: ys -> if cmp x y = 0 then Some x else dup1 cmp y ys let dup cmp xs = match xs with | [] -> None | x :: xs -> dup1 cmp x xs let once x y = let s = ref x in fun () -> let result = !s in s := y; result module ListExtras = struct let equal = List.for_all2 let hash hash xs = Hashtbl.hash (List.map hash xs) end let nth = function | 1 -> "first" | 2 -> "second" | 3 -> "third" | i -> Printf.sprintf "%dth" i menhir-20171222/src/gMap.ml0000664000175000017500000001513213217215730015522 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) module type S = sig (* Keys are assumed to have a natural total order. *) type key (* The type of maps whose data have type ['a]. *) type 'a t (* The empty map. *) val empty: 'a t (* [lookup k m] looks up the value associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. *) val lookup: key -> 'a t -> 'a val find: key -> 'a t -> 'a (* [add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding already exists for [k], it is overridden. *) val add: key -> 'a -> 'a t -> 'a t (* [strict_add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding already exists for [k] then [Unchanged] is raised. *) exception Unchanged val strict_add: key -> 'a -> 'a t -> 'a t (* [fine_add decide k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding from [k] to [d0] already exists, then the resulting map contains a binding from [k] to [decide d0 d]. *) type 'a decision = 'a -> 'a -> 'a val fine_add: 'a decision -> key -> 'a -> 'a t -> 'a t (* [mem k m] tells whether the key [k] appears in the domain of the map [m]. *) val mem: key -> 'a t -> bool (* [singleton k d] returns a map whose only binding is from [k] to [d]. *) val singleton: key -> 'a -> 'a t (* [is_empty m] returns [true] if and only if the map [m] defines no bindings at all. *) val is_empty: 'a t -> bool (* [is_singleton s] returns [Some x] if [s] is a singleton containing [x] as its only element; otherwise, it returns [None]. *) val is_singleton: 'a t -> (key * 'a) option (* [cardinal m] returns [m]'s cardinal, that is, the number of keys it binds, or, in other words, the cardinal of its domain. *) val cardinal: 'a t -> int (* [choose m] returns an arbitrarily chosen binding in [m], if [m] is nonempty, and raises [Not_found] otherwise. *) val choose: 'a t -> key * 'a (* [lookup_and_remove k m] looks up the value [v] associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. The call returns the value [v], together with the map [m] deprived from the binding from [k] to [v]. *) val lookup_and_remove: key -> 'a t -> 'a * 'a t val find_and_remove: key -> 'a t -> 'a * 'a t (* [remove k m] is the map [m] deprived from any binding for [k]. *) val remove: key -> 'a t -> 'a t (* [union m1 m2] returns the union of the maps [m1] and [m2]. Bindings in [m2] take precedence over those in [m1]. *) val union: 'a t -> 'a t -> 'a t (* [fine_union decide m1 m2] returns the union of the maps [m1] and [m2]. If a key [k] is bound to [x1] (resp. [x2]) within [m1] (resp. [m2]), then [decide] is called. It is passed [x1] and [x2], and must return the value that shall be bound to [k] in the final map. *) val fine_union: 'a decision -> 'a t -> 'a t -> 'a t (* [iter f m] invokes [f k x], in turn, for each binding from key [k] to element [x] in the map [m]. Keys are presented to [f] in increasing order. *) val iter: (key -> 'a -> unit) -> 'a t -> unit (* [fold f m seed] invokes [f k d accu], in turn, for each binding from key [k] to datum [d] in the map [m]. Keys are presented to [f] in increasing order. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. *) val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b (* [fold_rev] performs exactly the same job as [fold], but presents keys to [f] in the opposite order. *) val fold_rev: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b (* [filter f m] returns a copy of the map [m] where only the bindings that satisfy [f] have been retained. *) val filter: (key -> 'a -> bool) -> 'a t -> 'a t (* It is valid to evaluate [iter2 f m1 m2] if and only if [m1] and [m2] have equal domains. Doing so invokes [f k x1 x2], in turn, for each key [k] bound to [x1] in [m1] and to [x2] in [m2]. Bindings are presented to [f] in increasing order. *) val iter2: (key -> 'a -> 'b -> unit) -> 'a t -> 'b t -> unit (* [map f m] returns the map obtained by composing the map [m] with the function [f]; that is, the map $k\mapsto f(m(k))$. *) val map: ('a -> 'b) -> 'a t -> 'b t (* [endo_map] is similar to [map], but attempts to physically share its result with its input. This saves memory when [f] is the identity function. *) val endo_map: ('a -> 'a) -> 'a t -> 'a t (* If [dcompare] is an ordering over data, then [compare dcompare] is an ordering over maps. *) val compare: ('a -> 'a -> int) -> 'a t -> 'a t -> int (* A map's domain is a set. Thus, to be able to perform operations on domains, we need set operations, provided by the [Domain] sub-module. The two-way connection between maps and their domains is given by two additional functions, [domain] and [lift]. [domain m] returns [m]'s domain. [lift f s] returns the map $k\mapsto f(k)$, where $k$ ranges over a set of keys [s]. *) module Domain : GSet.S with type element = key val domain: 'a t -> Domain.t val lift: (key -> 'a) -> Domain.t -> 'a t (* [corestrict m d] performs a co-restriction of the map [m] to the domain [d]. That is, it returns the map $k\mapsto m(k)$, where $k$ ranges over all keys bound in [m] but \emph{not} present in [d]. *) val corestrict: 'a t -> Domain.t -> 'a t end menhir-20171222/src/menhirLib.mlpack0000664000175000017500000000062113217215730017403 0ustar fpottierfpottier# This is the list of modules that must go into MenhirLib. # They must be listed in dependency order, as this list is # used to construct menhirLib.ml at installation time. General Convert IncrementalEngine EngineTypes Engine ErrorReports Printers InfiniteArray PackedIntArray RowDisplacement LinearizedArray TableFormat InspectionTableFormat InspectionTableInterpreter TableInterpreter StaticVersion menhir-20171222/src/keyword.mli0000664000175000017500000000507413217215730016477 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module provides some type and function definitions that help deal with the keywords that we recognize within semantic actions. *) (* The user can request position information either at type [int] (a simple offset) or at type [Lexing.position]. *) type flavor = | FlavorOffset | FlavorPosition (* The user can request position information about the $start or $end of a symbol. Also, $symbolstart requests the computation of the start position of the first nonempty element in a production. *) type where = | WhereSymbolStart | WhereStart | WhereEnd (* The user can request position information about a production's left-hand side or about one of the symbols in its right-hand side, which he must refer to by name. (Referring to its symbol by its position, using [$i], is permitted in the concrete syntax, but the lexer eliminates this form.) We add a new subject, [Before], which corresponds to [$endpos($0)] in concrete syntax. We adopt the (slightly awkward) convention that when the subject is [Before], the [where] component must be [WhereEnd]. *) type subject = | Before | Left | RightNamed of string (* Keywords inside semantic actions. They allow access to semantic values or to position information. *) type keyword = | Position of subject * where * flavor | SyntaxError (* This maps a [Position] keyword to the name of the variable that the keyword is replaced with. *) val posvar: subject -> where -> flavor -> string (* Sets of keywords. *) module KeywordSet : sig include Set.S with type elt = keyword val map: (keyword -> keyword) -> t -> t end menhir-20171222/src/menhirLib.META0000664000175000017500000000021513217215730016661 0ustar fpottierfpottierrequires = "" description = "Runtime support for code generated by Menhir" archive(byte) = "menhirLib.cmo" archive(native) = "menhirLib.cmx" menhir-20171222/src/menhirSdk.META0000664000175000017500000000023113217215730016672 0ustar fpottierfpottierrequires = "" description = "Toolkit for postprocessing Menhir automata (.cmly files)" archive(byte) = "menhirSdk.cmo" archive(native) = "menhirSdk.cmx" menhir-20171222/src/invariant.mli0000664000175000017500000001144713217215730017007 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module discovers and publishes information about the automaton. It determines the shape of the stack when a state is about to be entered, when a production is about to be reduced, and when a goto transition is about to be taken. It also determines which states should be represented (that is, need to physically exist on the stack at runtime) and which symbols need to keep track of (start or end) positions. It also determines which automaton states could have to deal with an [error] token. The information computed in this module is used in the code back-end, in the Coq back-end, and in the automated production of .messages files. It is not used in the table back-end. *) open Grammar (* ------------------------------------------------------------------------- *) (* A representation of stack shapes. *) (* A word is a representation of a stack or stack suffix. *) type word (* [fold] folds over a word. At each cell, [f] is applied to the accumulator, to a Boolean flag that tells whether the cell holds a state, to the set of possible states of the cell, and to the symbol associated with the cell. The stack is visited from bottom to top. *) val fold: ('a -> bool -> Symbol.t -> Lr1.NodeSet.t -> 'a) -> 'a -> word -> 'a (* [fold_top f accu s] is analogous to [fold], but only folds over the top stack cell, if there is one, so that [f] is either not invoked at all or invoked just once. *) val fold_top: (bool -> Symbol.t -> 'a) -> 'a -> word -> 'a (* [print w] produces a string representation of the word [w]. Only the symbols are shown. *) val print: word -> string (* ------------------------------------------------------------------------- *) (* Information about the stack. *) (* [stack s] is the structure of the stack at state [s]. *) val stack: Lr1.node -> word (* [prodstack prod] is the structure of the stack when production [prod] is about to be reduced. This function should not be called if production [prod] is never reduced. *) val prodstack: Production.index -> word (* [gotostack nt] is the structure of the stack when a shift transition over nonterminal [nt] is about to be taken. It consists of just one cell. *) val gotostack: Nonterminal.t -> word (* [rewind s] explains how to rewind the stack when dealing with an error in state [s]. It produces an instruction to either die (because no state on the stack can handle errors) or pop a suffix of the stack. In the latter case, one reaches a state that is either represented (its identity is physically stored in the bottommost cell that is popped) or unrepresented (its identity is statically known). *) type instruction = | Die | DownTo of word * state and state = | Represented | UnRepresented of Lr1.node val rewind: Lr1.node -> instruction (* ------------------------------------------------------------------------- *) (* Information about which states and positions need to physically exist on the stack. *) (* [represented s] tells whether state [s] must have an explicit representation, that is, whether it is pushed onto the stack. *) val represented: Lr1.node -> bool (* [startp symbol] and [endp symbol] tell whether start or end positions must be recorded for symbol [symbol]. *) val startp: Symbol.t -> bool val endp: Symbol.t -> bool (* ------------------------------------------------------------------------- *) (* Information about error handling. *) (* [errorpeeker s] tells whether state [s] can potentially peek at an error. This is the case if, in state [s], an error token may be on the stream. *) val errorpeeker: Lr1.node -> bool (* ------------------------------------------------------------------------- *) (* Miscellaneous. *) (* [universal symbol] tells whether every represented state has an outgoing transition along [symbol]. *) val universal: Symbol.t -> bool menhir-20171222/src/codeBackend.ml0000664000175000017500000015562313217215730017032 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The code generator. *) module Run (T : sig end) = struct open Grammar open IL open CodeBits open CodePieces open TokenType open Interface (* ------------------------------------------------------------------------ *) (* Here is a description of our code generation mechanism. Every internal function that we produce is parameterized by the parser environment [env], which contains (pointers to) the lexer, the lexing buffer, the last token read, etc. No global variables are exploited, so our parsers are reentrant. The functions that we export do not expect an environment as a parameter; they create a fresh one when invoked. Every state [s] is translated to a [run] function. To a first approximation, the only parameter of the [run] function, besides [env], is the stack. However, in some cases (consult the predicate [runpushes]), the top stack cell is not yet allocated when [run s] is called. The cell's contents are passed as extra parameters, and it is [run]'s responsibility to allocate that cell. (When [run] is the target of a shift transition, the position parameters [startp] and [endp] are redundant with the [env] parameter, because they are always equal to [env.startp] and [env.endp]. However, this does not appear to make a great difference in terms of code size, and makes our life easier, so we do not attempt to eliminate this redundancy.) The first thing in [run] is to discard a token, if the state was entered through a shift transition, and to peek at the lookahead token. When the current token is to be discarded, the [discard] function is invoked. It discards the current token, invokes the lexer to obtain a new token, and returns an updated environment. When we only wish to peek at the current token, without discarding it, we simply read [env.token]. (We have to be careful in cases where the current lookahead token might be [error], since, in those cases, [env.token] is meaningless; see below.) Once the lookahead token is obtained, [run] performs a case analysis of the lookahead token. Each branch performs one of the following. In shift branches, control is dispatched to another [run] function, with appropriate parameters, typically the current stack plus the information that should go into the new top stack cell (a state, a semantic value, locations). In reduce branches, a [reduce] function is invoked. In the default branch, error handling is initiated (see below). The [reduce] function associated with production [prod] pops as many stack cells as necessary, retrieving semantic values and the state [s] that initiated the reduction. It then evaluates the semantic action, which yields a new semantic value. (This is the only place where semantic actions are evaluated, so that semantic actions are never duplicated.) It then passes control on to the [goto] function associated with the nonterminal [nt], where [nt] is the left-hand side of the production [prod]. The [goto] function associated with nonterminal [nt] expects just one parameter besides the environment -- namely, the stack. However, in some cases (consult the predicate [gotopushes]), the top stack cell is not allocated yet, so its contents are passed as extra parameters. In that case, [goto] first allocates that cell. Then, it examines the state found in that cell and performs a goto transition, that is, a shift transition on the nonterminal symbol [nt]. This simply consists in passing control to the [run] function associated with the transition's target state. If this case analysis only has one branch, because all transitions for [nt] lead to the same target state, then no case analysis is required. In principle, a stack cell contains a state, a semantic value, and start and end positions. However, the state can be omitted if it is never consulted by a [goto] function. The semantic value can be omitted if it is associated with a token that was declared not to carry a semantic value. (One could also omit semantic values for nonterminals whose type was declared to be [unit], but that does not seem very useful.) The start or end position can be omitted if they are associated with a symbol that does not require keeping track of positions. When all components of a stack cell are omitted, the entire cell disappears, so that no memory allocation is required. For each start symbol [nt], an entry point function, named after [nt], is generated. Its parameters are a lexer and a lexing buffer. The function allocates and initializes a parser environment and transfers control to the appropriate [run] function. Our functions are grouped into one huge [let rec] definition. The inliner, implemented as a separate module, will inline functions that are called at most once, remove dead code (although there should be none or next to none), and possibly perform other transformations. I note that, if a state can be entered only through (nondefault) reductions, then, in that state, the lookahead token must be a member of the set of tokens that allow these reductions, and by construction, there must exist an action on that token in that state. Thus, the default branch (which signals an error when the lookahead token is not a member of the expected set) is in fact dead. It would be nice (but difficult) to exploit types to prove that. However, one could at least replace the code of that branch with a simple [assert false]. TEMPORARY do it *) (* ------------------------------------------------------------------------ *) (* Here is a description of our error handling mechanism. With every state [s], we associate an [error] function. If [s] is willing to act when the lookahead token is [error], then this function tells how. This includes *both* shift *and* reduce actions. (For some reason, yacc/ocamlyacc/mule/bison can only shift on [error].) If [s] is unable to act when the lookahead token is [error], then this function pops a stack cell, extracts a state [s'] out of it, and transfers control, via a global [errorcase] dispatch function, to the [error] function associated with [s']. (Because some stack cells do not physically hold a state, this description is somewhat simpler than the truth, but that's the idea.) When an error is detected in state [s], then (see [initiate]) the [error] function associated with [s] is invoked. Immediately before invoking the [error] function, the flag [env.error] is set. By convention, this means that the current token is discarded and replaced with an [error] token. The [error] token transparently inherits the positions associated with the underlying concrete token. Whenever we attempt to consult the current token, we check whether [env.error] is set and, if that is the case, resume error handling by calling the [error] function associated with the current state. This allows a series of reductions to correctly take place when the lookahead token is [error]. In many states, though, it is possible to statically prove that [env.error] cannot be set. In that case, we produce a lookup of [env.token] without checking [env.error]. The flag [env.error] is cleared when a token is shifted. States with default reductions perform a reduction regardless of the current lookahead token, which can be either [error] or a regular token. A question that bothered me for a while was, when unwinding the stack, do we stop at a state that has a default reduction? Should it be considered able to handle the error token? I now believe that the answer is, this cannot happen. Indeed, if a state has a default reduction, then, whenever it is entered, reduction is performed and that state is exited, which means that it is never pushed onto the stack. So, it is fine to consider that a state with a default reduction is unable to handle errors. I note that a state that can handle [error] and has a default reduction must in fact have a reduction action on [error]. *) (* The type of environments. *) let tcenv = env let tenv = TypApp (tcenv, []) (* The [assertfalse] function. We have just one of these, in order to save code size. It should become unnecessary when we add GADTs. *) let assertfalse = prefix "fail" (* The [discard] function. *) let discard = prefix "discard" (* The [initenv] function. *) let initenv = prefix "init" (* The [run] function associated with a state [s]. *) let run s = prefix (Printf.sprintf "run%d" (Lr1.number s)) (* The [goto] function associated with a nonterminal [nt]. *) let goto nt = prefix (Printf.sprintf "goto_%s" (Nonterminal.print true nt)) (* The [reduce] function associated with a production [prod]. *) let reduce prod = prefix (Printf.sprintf "reduce%d" (Production.p2i prod)) (* The [errorcase] function. *) let errorcase = prefix "errorcase" (* The [error] function associated with a state [s]. *) let error s = prefix (Printf.sprintf "error%d" (Lr1.number s)) (* The constant associated with a state [s]. *) let statecon s = dataprefix (Printf.sprintf "State%d" (Lr1.number s)) let estatecon s = EData (statecon s, []) let pstatecon s = PData (statecon s, []) let pstatescon ss = POr (List.map pstatecon ss) (* The type of states. *) let tcstate = prefix "state" let tstate = TypApp (tcstate, []) (* The [print_token] function. This automatically generated function is used in [--trace] mode. *) let print_token = prefix "print_token" (* Fields in the environment record. *) let flexer = prefix "lexer" let flexbuf = prefix "lexbuf" let ftoken = prefix "token" let ferror = prefix "error" (* The type variable that represents the stack tail. *) let tvtail = tvprefix "tail" let ttail = TypVar tvtail (* The result type for every function. TEMPORARY *) let tvresult = tvprefix "return" let tresult = TypVar tvresult (* ------------------------------------------------------------------------ *) (* Helpers for code production. *) let var x : expr = EVar x let pvar x : pattern = PVar x let magic e : expr = EMagic e let nomagic e = e (* The following assertion checks that [env.error] is [false]. *) let assertnoerror : pattern * expr = PUnit, EApp (EVar "assert", [ EApp (EVar "not", [ ERecordAccess (EVar env, ferror) ]) ]) let etuple = function | [] -> assert false | [ e ] -> e | es -> ETuple es let ptuple = function | [] -> assert false | [ p ] -> p | ps -> PTuple ps let trace (format : string) (args : expr list) : (pattern * expr) list = if Settings.trace then [ PUnit, EApp (EVar "Printf.fprintf", (EVar "Pervasives.stderr") :: (EStringConst (format ^"\n%!")) :: args) ] else [] let tracecomment (comment : string) (body : expr) : expr = if Settings.trace then blet (trace comment [], body) else EComment (comment, body) let auto2scheme t = scheme [ tvtail; tvresult ] t (* ------------------------------------------------------------------------ *) (* Accessing the positions of the current token. *) (* There are two ways we can go about this. We can read the positions from the lexbuf immediately after we request a new token, or we can wait until we need the positions and read them at that point. As of 2014/12/12, we switch to the latter approach. The speed difference in a micro-benchmark is not measurable, but this allows us to save two fields in the [env] record, which should be a good thing, as it implies less frequent minor collections. *) let getstartp = ERecordAccess (ERecordAccess (EVar env, flexbuf), "Lexing.lex_start_p") let getendp = ERecordAccess (ERecordAccess (EVar env, flexbuf), "Lexing.lex_curr_p") (* ------------------------------------------------------------------------ *) (* Determine whether the [goto] function for nonterminal [nt] will push a new cell onto the stack. If it doesn't, then that job is delegated to the [run] functions called by [goto]. One could decide that [gotopushes] always returns true, and produce decent code. As a refinement, we decide to drive the [push] operation inside the [run] functions if all of them are able to eliminate this operation via shiftreduce optimization. This will be the case if all of these [run] functions implement a default reduction of a non-epsilon production. If that is not the case, then [gotopushes] returns true. In general, it is good to place the [push] operation inside [goto], because multiple [reduce] functions transfer control to [goto], and [goto] in turn transfers control to multiple [run] functions. Hence, this is where code sharing is maximal. All of the [run] functions that [goto] can transfer control to expect a stack cell of the same shape (indeed, the symbol [nt] is the same in every case, and the state is always represented), which makes this decision possible. *) let gotopushes : Nonterminal.t -> bool = Nonterminal.tabulate (fun nt -> not ( Lr1.targets (fun accu _ target -> accu && match Default.has_default_reduction target with | Some (prod, _) -> Production.length prod > 0 | None -> false ) true (Symbol.N nt) ) ) (* ------------------------------------------------------------------------ *) (* Determine whether the [run] function for state [s] will push a new cell onto the stack. Our convention is this. If this [run] function is entered via a shift transition, then it is in charge of pushing a new stack cell. If it is entered via a goto transition, then it is in charge of pushing a new cell if and only if the [goto] function that invoked it did not do so. Last, if this [run] function is invoked directly by an entry point, then it does not push a stack cell. *) let runpushes s = match Lr1.incoming_symbol s with | Some (Symbol.T _) -> true | Some (Symbol.N nt) -> not (gotopushes nt) | None -> false (* ------------------------------------------------------------------------ *) (* In some situations, we are able to fuse a shift (or goto) transition with a reduce transition, which means that we save the cost (in speed and in code size) of pushing and popping the top stack cell. This involves creating a modified version of the [reduce] function associated with a production [prod], where the contents of the top stack cell are passed as extra parameters. Because we wish to avoid code duplication, we perform this change only if all call sites for [reduce] agree on this modified calling convention. At the call site, the optimization is possible only if a stack cell allocation exists and is immediately followed by a call to [reduce]. This is the case inside the [run] function for state [s] when [run] pushes a stack cell and performs a default reduction. This optimization amounts to coalescing the push operation inside [run] with the pop operation that follows inside [reduce]. Unit production elimination, on the other hand, would coalesce the pop operation inside [reduce] with the push operation that follows inside [goto]. For this reason, the two are contradictory. As a result, we do not attempt to perform unit production elimination. In fact, we did implement it at one point and found that it was seldom applicable, because preference was given to the shiftreduce optimization. There are cases where shiftreduce optimization does not make any difference, for instance, if production [prod] is never reduced, or if the top stack cell is in fact nonexistent. *) let (shiftreduce : Production.index -> bool), shiftreducecount = Production.tabulateb (fun prod -> (* Check that this production pops at least one stack cell. *) Production.length prod > 0 && (* Check that all call sites push a stack cell and have a default reduction. *) Lr1.NodeSet.fold (fun s accu -> accu && (match Default.has_default_reduction s with None -> false | Some _ -> true) && (runpushes s) ) (Lr1.production_where prod) true ) let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d productions exploit shiftreduce optimization.\n" shiftreducecount Production.n) (* Check that, as predicted above, [gotopushes nt] returns [false] only when all of the [run] functions that follow it perform shiftreduce optimization. This can be proved as follows. If [gotopushes nt] returns [false], then every successor state [s] has a default reduction for some non-epsilon production [prod]. Furthermore, all states that can reduce [prod] must be successors of that same [goto] function: indeed, because the right-hand side of the production ends with symbol [nt], every state that can reduce [prod] must be entered through [nt]. So, at all such states, [runpushes] is true, which guarantees that [shiftreduce prod] is true as well. *) let () = assert ( Nonterminal.fold (fun nt accu -> accu && if gotopushes nt then true else Lr1.targets (fun accu _ target -> accu && match Default.has_default_reduction target with | Some (prod, _) -> shiftreduce prod | None -> false ) true (Symbol.N nt) ) true ) (* ------------------------------------------------------------------------ *) (* Type production. *) (* This is the type of states. Only states that are represented are declared. *) let statetypedef = { typename = tcstate; typeparams = []; typerhs = TDefSum ( Lr1.fold (fun defs s -> if Invariant.represented s then { dataname = statecon s; datavalparams = []; datatypeparams = None } :: defs else defs ) [] ); typeconstraint = None } (* The type of lexers. *) let tlexer = TypArrow (tlexbuf, ttoken) (* This is the type of parser environments. *) let field modifiable name t = { modifiable = modifiable; fieldname = name; fieldtype = type2scheme t } let envtypedef = { typename = tcenv; typeparams = []; typerhs = TDefRecord [ (* The lexer itself. *) field false flexer tlexer; (* The lexing buffer. *) field false flexbuf tlexbuf; (* The last token that was read from the lexer. This is the head of the token stream, unless [env.error] is set. *) field false ftoken ttoken; (* A flag which tells whether we currently have an [error] token at the head of the stream. When this flag is set, the head of the token stream is the [error] token, and the contents of the [token] field is irrelevant. The token following [error] is obtained by invoking the lexer again. *) field true ferror tbool; ]; typeconstraint = None } (* [curry] curries the top stack cell in a type [t] of the form [(stack type) arrow (result type)]. [t] remains unchanged if the stack type does not make at least one cell explicit. *) let curry = function | TypArrow (TypTuple (tstack :: tcell), tresult) -> TypArrow (tstack, marrow tcell tresult) | TypArrow _ as t -> t | _ -> assert false (* [curryif true] is [curry], [curryif false] is the identity. *) let curryif flag t = if flag then curry t else t (* Types for stack cells. [celltype tailtype holds_state symbol] returns the type of a stack cell. The parameter [tailtype] is the type of the tail of the stack. The flag [holds_state] tells whether the cell holds a state. The parameter [symbol] is used to determine whether the cell holds a semantic value and what its type is. A subtlety here and in [curry] above is that singleton stack cells give rise to singleton tuple types, which the type printer eliminates, but which do exist internally. As a result, [curry] always correctly removes the top stack cell, even if it is a singleton tuple cell. *) let celltype tailtype holds_state symbol _ = TypTuple ( tailtype :: elementif (Invariant.endp symbol) tposition @ elementif holds_state tstate @ semvtype symbol @ elementif (Invariant.startp symbol) tposition ) (* Types for stacks. [stacktype s] is the type of the stack at state [s]. [reducestacktype prod] is the type of the stack when about to reduce production [prod]. [gotostacktype nt] is the type of the stack when the [goto] function associated with [nt] is called. In all cases, the tail (that is, the unknown part) of the stack is represented by [ttail], currently a type variable. These stack types are obtained by folding [celltype] over a description of the stack provided by module [Invariant]. *) let stacktype s = Invariant.fold celltype ttail (Invariant.stack s) let reducestacktype prod = Invariant.fold celltype ttail (Invariant.prodstack prod) let gotostacktype nt = Invariant.fold celltype ttail (Invariant.gotostack nt) (* The type of the [run] function. As announced earlier, if [s] is the target of shift transitions, the type of the stack is curried, that is, the top stack cell is not yet allocated, so its contents are passed as extra parameters. If [s] is the target of goto transitions, the top stack cell is allocated. If [s] is a start state, this issue makes no difference. *) let runtypescheme s = auto2scheme ( arrow tenv ( curryif (runpushes s) ( arrow (stacktype s) tresult ) ) ) (* The type of the [goto] function. The top stack cell is curried. *) let gototypescheme nt = auto2scheme (arrow tenv (curry (arrow (gotostacktype nt) tresult))) (* If [prod] is an epsilon production and if the [goto] function associated with it expects a state parameter, then the [reduce] function associated with [prod] also requires a state parameter. *) let reduce_expects_state_param prod = let nt = Production.nt prod in Production.length prod = 0 && Invariant.fold (fun _ holds_state _ _ -> holds_state) false (Invariant.gotostack nt) (* The type of the [reduce] function. If shiftreduce optimization is performed for this production, then the top stack cell is not explicitly allocated. *) let reducetypescheme prod = auto2scheme ( arrow tenv ( curryif (shiftreduce prod) ( arrow (reducestacktype prod) ( arrowif (reduce_expects_state_param prod) tstate tresult ) ) ) ) (* The type of the [errorcase] function. The shape of the stack is unknown, and is determined by examining the state parameter. *) let errorcasetypescheme = auto2scheme (marrow [ tenv; ttail; tstate ] tresult) (* The type of the [error] function. The shape of the stack is the one associated with state [s]. *) let errortypescheme s = auto2scheme ( marrow [ tenv; stacktype s ] tresult) (* ------------------------------------------------------------------------ *) (* Code production preliminaries. *) (* This flag will be set to [true] if we ever raise the [Error] exception. This happens when we unwind the entire stack without finding a state that can handle errors. *) let can_die = ref false (* A code pattern for an exception handling construct where both alternatives are in tail position. Concrete syntax in OCaml 4.02 is [match e with x -> e1 | exception Error -> e2]. Earlier versions of OCaml do not support this construct. We continue to emulate it using a combination of [try/with], [match/with], and an [option] value. It is used only in a very rare case anyway. *) let letunless e x e1 e2 = EMatch ( ETry ( EData ("Some", [ e ]), [ { branchpat = PData (excdef.excname, []); branchbody = EData ("None", []) } ] ), [ { branchpat = PData ("Some", [ PVar x ]); branchbody = e1 }; { branchpat = PData ("None", []); branchbody = e2 } ] ) (* ------------------------------------------------------------------------ *) (* Calling conventions. *) (* The layout of a stack cell is determined here. The first field in a stack cell is always a pointer to the rest of the stack; it is followed by the fields listed below, each of which may or may not appear. [runpushcell] and [gotopushcell] are the two places where stack cells are allocated. *) (* 2015/11/04. We make [endp] the first element in the list of optional fields, so we are able to access it at a fixed offset, provided we know that it exists. This is exploited when reducing an epsilon production. *) (* The contents of a stack cell, exposed as individual parameters. The choice of identifiers is suitable for use in the definition of [run]. *) let runcellparams var holds_state symbol = elementif (Invariant.endp symbol) (var endp) @ elementif holds_state (var state) @ symval symbol (var semv) @ elementif (Invariant.startp symbol) (var startp) (* The contents of a stack cell, exposed as individual parameters, again. The choice of identifiers is suitable for use in the definition of a [reduce] function. [prod] is the production's index. The integer [i] tells which symbol on the right-hand side we are focusing on, that is, which symbol this stack cell is associated with. *) let reducecellparams prod i holds_state symbol = let ids = Production.identifiers prod in (* The semantic value is bound to the variable [ids.(i)]. Its type is [t]. As of 2016/03/11, we generate a type annotation. Indeed, because of our use of [magic], the semantic value would otherwise have an unknown type; and, if it is a function, the OCaml compiler could warn (incorrectly) that this function does not use its argument. *) let semvpat t = PAnnot (PVar ids.(i), t) in elementif (Invariant.endp symbol) (PVar (Printf.sprintf "_endpos_%s_" ids.(i))) @ elementif holds_state (if i = 0 then PVar state else PWildcard) @ symvalt symbol semvpat @ elementif (Invariant.startp symbol) (PVar (Printf.sprintf "_startpos_%s_" ids.(i))) (* The contents of a stack cell, exposed as individual parameters, again. The choice of identifiers is suitable for use in the definition of [error]. *) let errorcellparams (i, pat) holds_state symbol _ = i + 1, ptuple ( pat :: elementif (Invariant.endp symbol) PWildcard @ elementif holds_state (if i = 0 then PVar state else PWildcard) @ symval symbol PWildcard @ elementif (Invariant.startp symbol) PWildcard ) (* Calls to [run]. *) let runparams magic var s = var env :: magic (var stack) :: listif (runpushes s) (Invariant.fold_top (runcellparams var) [] (Invariant.stack s)) let call_run s actuals = EApp (EVar (run s), actuals) (* The parameters to [reduce]. When shiftreduce optimization is in effect, the top stack cell is not allocated, so extra parameters are required. Note that [shiftreduce prod] and [reduce_expects_state_param prod] are mutually exclusive conditions, so the [state] parameter is never bound twice. *) let reduceparams prod = PVar env :: PVar stack :: listif (shiftreduce prod) ( Invariant.fold_top (reducecellparams prod (Production.length prod - 1)) [] (Invariant.prodstack prod) ) @ elementif (reduce_expects_state_param prod) (PVar state) (* Calls to [reduce]. One must specify the production [prod] as well as the current state [s]. *) let call_reduce prod s = let actuals = (EVar env) :: (EMagic (EVar stack)) :: listif (shiftreduce prod) (Invariant.fold_top (runcellparams var) [] (Invariant.stack s)) (* compare with [runpushcell s] *) @ elementif (reduce_expects_state_param prod) (estatecon s) in EApp (EVar (reduce prod), actuals) (* Calls to [goto]. *) let gotoparams var nt = var env :: var stack :: Invariant.fold_top (runcellparams var) [] (Invariant.gotostack nt) let call_goto nt = EApp (EVar (goto nt), gotoparams var nt) (* Calls to [errorcase]. *) let errorcaseparams magic var = [ var env; magic (var stack); var state ] let call_errorcase = EApp (EVar errorcase, errorcaseparams magic var) (* Calls to [error]. *) let errorparams magic var = [ var env; magic (var stack) ] let call_error magic s = EApp (EVar (error s), errorparams magic var) let call_error_via_errorcase magic s = (* TEMPORARY document *) if Invariant.represented s then EApp (EVar errorcase, [ var env; magic (var stack); estatecon s ]) else call_error magic s (* Calls to [assertfalse]. *) let call_assertfalse = EApp (EVar assertfalse, [ EVar "()" ]) (* ------------------------------------------------------------------------ *) (* Code production for the automaton functions. *) (* Count how many states actually can peek at an error token. This figure is, in general, inferior or equal to the number of states at which [Invariant.errorpeeker] is true, because some of these states have a default reduction and will not consult the lookahead token. *) let errorpeekers = ref 0 (* Code for calling the reduction function for token [prod] upon finding a token within [toks]. This produces a branch, to be inserted in a [run] function for state [s]. *) let reducebranch toks prod s = { branchpat = tokspat toks; branchbody = call_reduce prod s } (* Code for shifting from state [s] to state [s'] via the token [tok]. This produces a branch, to be inserted in a [run] function for state [s]. The callee, [run s'], is responsible for taking the current token off the input stream. (There is actually a case where the token is *not* taken off the stream: when [s'] has a default reduction on [#].) It is also responsible for pushing a new stack cell. The rationale behind this decision is that there may be multiple shift transitions into [s'], so we actually share that code by placing it inside [run s'] rather than inside every transition. *) let shiftbranchbody s tok s' = (* Construct the actual parameters for [run s']. *) let actuals = (EVar env) :: (EMagic (EVar stack)) :: Invariant.fold_top (fun holds_state symbol -> assert (Symbol.equal (Symbol.T tok) symbol); elementif (Invariant.endp symbol) getendp @ elementif holds_state (estatecon s) @ tokval tok (EVar semv) @ elementif (Invariant.startp symbol) getstartp ) [] (Invariant.stack s') in (* Call [run s']. *) tracecomment (Printf.sprintf "Shifting (%s) to state %d" (Terminal.print tok) (Lr1.number s')) (call_run s' actuals) let shiftbranch s tok s' = assert (not (Terminal.pseudo tok)); { branchpat = PData (tokendata (Terminal.print tok), tokval tok (PVar semv)); branchbody = shiftbranchbody s tok s' } (* This generates code for pushing a new stack cell upon entering the [run] function for state [s]. *) let runpushcell s e = if runpushes s then let contents = var stack :: Invariant.fold_top (runcellparams var) [] (Invariant.stack s) in mlet [ pvar stack ] [ etuple contents ] e else e let runpushcellunless shiftreduce s e = if shiftreduce then EComment ("Not allocating top stack cell", e) else runpushcell s e (* This generates code for dealing with the lookahead token upon entering the [run] function for state [s]. If [s] is the target of a shift transition, then we must take the current token (which was consumed in the shift transition) off the input stream. Whether [s] was entered through a shift or a goto transition, we want to peek at the next token, unless we are performing a default reduction. The parameter [defred] tells which default reduction, if any, we are about to perform. *) (* 2014/12/06 New convention regarding initial states (i.e., states which have no incoming symbol). The function [initenv] does not invoke the lexer, so the [run] function for an initial state must do it. (Except in the very special case where the initial state has a default reduction on [#] -- this means the grammar recognizes only the empty word. We have ruled out this case.) *) let gettoken s defred e = match Lr1.incoming_symbol s, defred with | (Some (Symbol.T _) | None), Some (_, toks) when TerminalSet.mem Terminal.sharp toks -> assert (TerminalSet.cardinal toks = 1); (* There is a default reduction on token [#]. We cannot request the next token, since that might drive the lexer off the end of the input stream, so we cannot call [discard]. Do nothing. *) e | (Some (Symbol.T _) | None), Some _ -> (* There is some other default reduction. Discard the first input token. *) blet ([ PVar env, EApp (EVar discard, [ EVar env ]) (* Note that we do not read [env.token]. *) ], e) | (Some (Symbol.T _) | None), None -> (* There is no default reduction. Discard the first input token and peek at the next one. *) blet ([ PVar env, EApp (EVar discard, [ EVar env ]); PVar token, ERecordAccess (EVar env, ftoken) ], e) | Some (Symbol.N _), Some _ -> (* There is some default reduction. Do not peek at the input token. *) e | Some (Symbol.N _), None -> (* There is no default reduction. Peek at the first input token, without taking it off the input stream. This is normally done by reading [env.token], unless the token might be [error]: then, we check [env.error] first. *) if Invariant.errorpeeker s then begin incr errorpeekers; EIfThenElse ( ERecordAccess (EVar env, ferror), tracecomment "Resuming error handling" (call_error_via_errorcase magic s), blet ([ PVar token, ERecordAccess (EVar env, ftoken) ], e) ) end else blet ([ assertnoerror; PVar token, ERecordAccess (EVar env, ftoken) ], e) (* This produces the header of a [run] function. *) let runheader s body = let body = tracecomment (Printf.sprintf "State %d:" (Lr1.number s)) body in { valpublic = false; valpat = PVar (run s); valval = EAnnot (EFun (runparams nomagic pvar s, body), runtypescheme s) } (* This produces the comment attached with a default reduction. *) let defaultreductioncomment toks e = EPatComment ( "Reducing without looking ahead at ", tokspat toks, e ) (* This produces some bookkeeping code that is used when initiating error handling. We set the flag [env.error]. By convention, the field [env.token] becomes meaningless and one considers that the first token on the input stream is [error]. As a result, the next peek at the lookahead token will cause error handling to be resumed. The next call to [discard] will take the [error] token off the input stream and clear [env.error]. *) (* It seems convenient for [env.error] to be a mutable field, as this allows us to generate compact code. Re-allocating the whole record would produce less compact code. And speed is not an issue in this error-handling code. *) let errorbookkeeping e = tracecomment "Initiating error handling" (blet ( [ PUnit, ERecordWrite (EVar env, ferror, etrue) ], e )) (* This code is used to indicate that a new error has been detected in state [s]. If I am correct, [env.error] is never set here. Indeed, that would mean that we first found an error, and then signaled another error before being able to shift the first error token. My understanding is that this cannot happen: when the first error is signaled, we end up at a state that is willing to handle the error token, by a series of reductions followed by a shift. We initiate error handling by first performing the standard bookkeeping described above, then transferring control to the [error] function associated with [s]. *) let initiate s = blet ( [ assertnoerror ], errorbookkeeping (call_error_via_errorcase magic s) ) (* This produces the body of the [run] function for state [s]. *) let rundef s : valdef = match Default.has_default_reduction s with | Some (prod, toks) as defred -> (* Perform reduction without looking ahead. If shiftreduce optimization is being performed, then no stack cell is allocated. The contents of the top stack cell are passed do [reduce] as extra parameters. *) runheader s ( runpushcellunless (shiftreduce prod) s ( gettoken s defred ( defaultreductioncomment toks ( call_reduce prod s ) ) ) ) | None -> (* If this state is willing to act on the error token, ignore that -- this is taken care of elsewhere. *) let transitions = SymbolMap.remove (Symbol.T Terminal.error) (Lr1.transitions s) and reductions = TerminalMap.remove Terminal.error (Lr1.reductions s) in (* Construct the main case analysis that determines what action should be taken next. A default branch, where an error is detected, is added if the analysis is not exhaustive. In the default branch, we initiate error handling. *) let covered, branches = ProductionMap.fold (fun prod toks (covered, branches) -> (* There is a reduction for these tokens. *) TerminalSet.union toks covered, reducebranch toks prod s :: branches ) (Lr1.invert reductions) (TerminalSet.empty, []) in let covered, branches = SymbolMap.fold (fun symbol s' (covered, branches) -> match symbol with | Symbol.T tok -> (* There is a shift transition for this token. *) TerminalSet.add tok covered, shiftbranch s tok s' :: branches | Symbol.N _ -> covered, branches ) transitions (covered, branches) in let branches = if TerminalSet.subset TerminalSet.universe covered then branches else branches @ [ { branchpat = PWildcard; branchbody = initiate s } ] in (* Finally, construct the code for [run]. The former pushes things onto the stack, obtains the lookahead token, then performs the main case analysis on the lookahead token. *) runheader s ( runpushcell s ( gettoken s None ( EMatch ( EVar token, branches ) ) ) ) (* This is the body of the [reduce] function associated with production [prod]. *) let reducebody prod = (* Find out about the left-hand side of this production and about the identifiers that have been bound to the symbols in the right-hand side. These represent variables that we should bind to semantic values before invoking the semantic action. *) let nt, rhs = Production.def prod and ids = Production.identifiers prod and length = Production.length prod in (* Build a pattern that represents the shape of the stack. Out of the stack, we extract a state (except when the production is an epsilon production) and a number of semantic values. If shiftreduce optimization is being performed, then the top stack cell is not explicitly allocated, so we do not include it in the pattern that is built. *) let (_ : int), pat = Invariant.fold (fun (i, pat) holds_state symbol _ -> i + 1, if i = length - 1 && shiftreduce prod then pat else ptuple (pat :: reducecellparams prod i holds_state symbol) ) (0, PVar stack) (Invariant.prodstack prod) in (* If any identifiers refer to terminal symbols without a semantic value, then bind these identifiers to the unit value. This provides the illusion that every symbol, terminal or nonterminal, has a semantic value. This is more regular and allows applying operators such as ? to terminal symbols without a semantic value. *) let unitbindings = Misc.foldi length (fun i unitbindings -> match semvtype rhs.(i) with | [] -> (PVar ids.(i), EUnit) :: unitbindings | _ -> unitbindings ) [] in (* If necessary, determine start and end positions for the left-hand side of the production. If the right-hand side is nonempty, this is done by extracting position information out of the first and last symbols of the right-hand side. If it is empty, then (as of 2015/11/04) this is done by taking the end position stored in the top stack cell (whatever it is). The constraints imposed by the module [Invariant], the layout of cells, and our creation of a sentinel cell (see [entrydef] further on), ensure that this cell exists and has an [endp] field at offset 1. Yes, we live dangerously. You only live once. *) let extract x = (* Extract the end position (i.e., the field at offset 1) in the top stack cell and bind it to the variable [x]. *) PTuple [ PWildcard; PVar x ], EMagic (EVar stack) in let symbol = Symbol.N nt in let posbindings action = let bind_startp = Invariant.startp symbol in elementif (Action.has_beforeend action) ( extract beforeendp ) @ elementif bind_startp ( if length > 0 then PVar startp, EVar (Printf.sprintf "_startpos_%s_" ids.(0)) else extract startp ) @ elementif (Invariant.endp symbol) ( if length > 0 then PVar endp, EVar (Printf.sprintf "_endpos_%s_" ids.(length - 1)) else if bind_startp then PVar endp, EVar startp else extract endp ) in (* If this production is one of the start productions, then reducing it means accepting the input. In that case, we return a final semantic value and stop. Otherwise, we transfer control to the [goto] function, unless the semantic action raises [Error], in which case we transfer control to [errorcase]. *) if Production.is_start prod then tracecomment "Accepting" (blet ( [ pat, EVar stack ], EMagic (EVar ids.(0)) )) else let action = Production.action prod in let act = EAnnot (Action.to_il_expr action, type2scheme (semvtypent nt)) in tracecomment (Printf.sprintf "Reducing production %s" (Production.print prod)) (blet ( (pat, EVar stack) :: unitbindings @ posbindings action, (* If the semantic action is susceptible of raising [Error], use a [let/unless] construct, otherwise use [let]. *) if Action.has_syntaxerror action then letunless act semv (call_goto nt) (errorbookkeeping call_errorcase) else blet ([ PVar semv, act ], call_goto nt) )) (* This is the definition of the [reduce] function associated with production [prod]. *) let reducedef prod = { valpublic = false; valpat = PVar (reduce prod); valval = EAnnot ( EFun ( reduceparams prod, reducebody prod ), reducetypescheme prod ) } (* This generates code for pushing a new stack cell inside [goto]. *) let gotopushcell nt e = if gotopushes nt then let contents = var stack :: Invariant.fold_top (runcellparams var) [] (Invariant.gotostack nt) in mlet [ pvar stack ] [ etuple contents ] e else e (* This is the heart of the [goto] function associated with nonterminal [nt]. *) let gotobody nt = (* Examine the current state to determine where to go next. *) let branches = Lr1.targets (fun branches sources target -> { branchpat = pstatescon sources; branchbody = call_run target (runparams magic var target) } :: branches ) [] (Symbol.N nt) in match branches with | [] -> (* If there are no branches, then this [goto] function is never invoked. The inliner will drop it, so whatever we generate here is unimportant. *) call_assertfalse | [ branch ] -> (* If there is only one branch, no case analysis is required. This optimization is not strictly necessary if GADTs are used by the compiler to prove that the case analysis is exhaustive. It does improve readability, though, and is also useful if the compiler does not have GADTs. *) EPatComment ( "State should be ", branch.branchpat, branch.branchbody ) | _ -> (* In the general case, we keep the branches computed above and, unless [nt] is universal, add a default branch, which is theoretically useless but helps avoid warnings if the compiler does not have GADTs. *) let default = { branchpat = PWildcard; branchbody = call_assertfalse } in EMatch ( EVar state, branches @ (if Invariant.universal (Symbol.N nt) then [] else [ default ]) ) (* This the [goto] function associated with nonterminal [nt]. *) let gotodef nt = { valpublic = false; valpat = PVar (goto nt); valval = EAnnot (EFun (gotoparams pvar nt, gotopushcell nt (gotobody nt)), gototypescheme nt) } (* ------------------------------------------------------------------------ *) (* Code production for the error handling functions. *) (* This is the body of the [error] function associated with state [s]. *) let handle s e = tracecomment (Printf.sprintf "Handling error in state %d" (Lr1.number s)) e let errorbody s = try let s' = SymbolMap.find (Symbol.T Terminal.error) (Lr1.transitions s) in (* There is a shift transition on error. *) handle s ( shiftbranchbody s Terminal.error s' ) with Not_found -> try let prods = TerminalMap.lookup Terminal.error (Lr1.reductions s) in let prod = Misc.single prods in (* There is a reduce transition on error. If shiftreduce optimization is enabled for this production, then we must pop an extra cell for [reduce]'s calling convention to be met. *) let extrapop e = if shiftreduce prod then let pat = ptuple (PVar stack :: Invariant.fold_top (runcellparams pvar) [] (Invariant.stack s)) in blet ([ pat, EVar stack ], e) else e in handle s ( extrapop ( call_reduce prod s ) ) with Not_found -> (* This state is unable to handle errors. Pop the stack to find a state that does handle errors, a state that can further pop the stack, or die. *) match Invariant.rewind s with | Invariant.Die -> can_die := true; ERaise errorval | Invariant.DownTo (w, st) -> let _, pat = Invariant.fold errorcellparams (0, PVar stack) w in blet ( [ pat, EVar stack ], match st with | Invariant.Represented -> call_errorcase | Invariant.UnRepresented s -> call_error magic s ) (* This is the [error] function associated with state [s]. *) let errordef s = { valpublic = false; valpat = PVar (error s); valval = EAnnot ( EFun ( errorparams nomagic pvar, errorbody s ), errortypescheme s ) } (* This is the [errorcase] function. It examines its state parameter and dispatches control to an appropriate [error] function. *) let errorcasedef = let branches = Lr1.fold (fun branches s -> if Invariant.represented s then { branchpat = pstatecon s; branchbody = EApp (EVar (error s), [ EVar env; EMagic (EVar stack) ]) } :: branches else branches ) [] in { valpublic = false; valpat = PVar errorcase; valval = EAnnot ( EFun ( errorcaseparams nomagic pvar, EMatch ( EVar state, branches ) ), errorcasetypescheme ) } (* ------------------------------------------------------------------------ *) (* Code production for the entry points. *) (* This is the entry point associated with a start state [s]. By convention, it is named after the nonterminal [nt] that corresponds to this state. This is a public definition. The code initializes a parser environment, an empty stack, and invokes [run]. 2015/11/11. If the state [s] can reduce an epsilon production whose left-hand symbol keeps track of its start or end position, or if [s] can reduce any production that mentions [$endpos($0)], then the initial stack should contain a sentinel cell with a valid [endp] field at offset 1. For simplicity, we always create a sentinel cell. *) let entrydef s = let nt = Item.startnt (Lr1.start2item s) in let lexer = "lexer" and lexbuf = "lexbuf" in let initial_stack = let initial_position = getendp in etuple [ EUnit; initial_position ] in { valpublic = true; valpat = PVar (Nonterminal.print true nt); valval = EAnnot ( EFun ( [ PVar lexer; PVar lexbuf ], blet ( [ PVar env, EApp (EVar initenv, [ EVar lexer; EVar lexbuf ]) ], EMagic (EApp (EVar (run s), [ EVar env; initial_stack ])) ) ), entrytypescheme Front.grammar (Nonterminal.print true nt) ) } (* ------------------------------------------------------------------------ *) (* Code production for auxiliary functions. *) (* This is [assertfalse], used when internal failure is detected. This should never happen if our tool is correct. *) let assertfalsedef = { valpublic = false; valpat = PVar assertfalse; valval = EAnnot ( EFun ([ PUnit ], blet ([ PUnit, EApp (EVar "Printf.fprintf", [ EVar "Pervasives.stderr"; EStringConst "Internal failure -- please contact the parser generator's developers.\n%!" ]); ], EApp (EVar "assert", [ efalse ]) ) ), scheme [ "a" ] (arrow tunit (tvar "a")) ) } (* This is [print_token], used to print tokens in [--trace] mode. *) let printtokendef = destructuretokendef print_token tstring false (fun tok -> EStringConst (Terminal.print tok)) (* This is [discard], used to take a token off the input stream and query the lexer for a new one. The code queries the lexer for a new token and stores it into [env.token], overwriting the previous token. It also stores the start and positions of the new token. Last, [env.error] is cleared. We use the lexer's [lex_start_p] and [lex_curr_p] fields to extract the start and end positions of the token that we just read. In practice, it seems that [lex_start_p] can be inaccurate (that is the case when the lexer calls itself recursively, instead of simply recognizing an atomic pattern and returning immediately). However, we are 100% compatible with ocamlyacc here, and there is no better solution anyway. As of 2014/12/12, we re-allocate the environment record instead of updating it. Perhaps surprisingly, this makes the code TWICE FASTER overall. The write barrier is really costly! *) let discardbody = let lexer = "lexer" and lexbuf = "lexbuf" in EFun ( [ PVar env ], blet ([ PVar lexer, ERecordAccess (EVar env, flexer); PVar lexbuf, ERecordAccess (EVar env, flexbuf); PVar token, EApp (EVar lexer, [ EVar lexbuf ]); ] @ trace "Lookahead token is now %s (%d-%d)" [ EApp (EVar print_token, [ EVar token ]); ERecordAccess (ERecordAccess (EVar lexbuf, "Lexing.lex_start_p"), "Lexing.pos_cnum"); ERecordAccess (ERecordAccess (EVar lexbuf, "Lexing.lex_curr_p"), "Lexing.pos_cnum") ], ERecord [ flexer, EVar lexer; flexbuf, EVar lexbuf; ftoken, EVar token; ferror, efalse ] ) ) let discarddef = { valpublic = false; valpat = PVar discard; valval = EAnnot ( discardbody, type2scheme (arrow tenv tenv) ) } (* This is [initenv], used to allocate a fresh parser environment. It fills in all fields in a straightforward way. The [token] field receives a dummy value. It will be overwritten by the first call to [run], which will invoke [discard]. This allows us to invoke the lexer in just one place. *) let initenvdef = let lexer = "lexer" and lexbuf = "lexbuf" in { valpublic = false; valpat = PVar initenv; valval = EAnnot ( EFun ( [ PVar lexer; PVar lexbuf ], blet ( (* We do not have a dummy token at hand, so we forge one. *) (* It will be overwritten by the first call to the lexer. *) [ PVar token, EMagic EUnit ], ERecord ([ (flexer, EVar lexer); (flexbuf, EVar lexbuf); (ftoken, EVar token); (ferror, efalse) ] ) ) ), type2scheme (marrow [ tlexer; tlexbuf ] tenv) ) } (* ------------------------------------------------------------------------ *) (* Here is complete code for the parser. *) open UnparameterizedSyntax let grammar = Front.grammar let program = [ SIFunctor (grammar.parameters, mbasics grammar @ SITypeDefs [ envtypedef; statetypedef ] :: SIStretch grammar.preludes :: SIValDefs (true, ProductionMap.fold (fun _ s defs -> entrydef s :: defs ) Lr1.entry ( Lr1.fold (fun defs s -> rundef s :: errordef s :: defs ) ( Nonterminal.foldx (fun nt defs -> gotodef nt :: defs ) (Production.fold (fun prod defs -> if Lr1.NodeSet.is_empty (Lr1.production_where prod) then defs else reducedef prod :: defs ) [ discarddef; initenvdef; printtokendef; assertfalsedef; errorcasedef ]))) ) :: SIStretch grammar.postludes :: [])] (* ------------------------------------------------------------------------ *) (* We are done! *) let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d states can peek at an error.\n" !errorpeekers Lr1.n) let () = if not !can_die then Error.logC 1 (fun f -> Printf.fprintf f "The generated parser cannot raise Error.\n") let () = Time.tick "Producing abstract syntax" end menhir-20171222/src/nonTerminalDefinitionInlining.ml0000664000175000017500000003404613217215730022632 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Keyword open UnparameterizedSyntax open ListMonad (* This exception will be raised when a branch does not need inlining. *) exception NoInlining (* Color are used to detect cycles. *) type 'a color = | BeingExpanded | Expanded of 'a (* [index2id] converts a 0-based index (into a list of producers) to an identifier (the name of the producer). *) let index2id producers i = try producer_identifier (List.nth producers i) with Failure _ -> assert false (* should not happen *) (* [rename_sw_outer] transforms the keywords in the outer production (the caller) during inlining. It replaces [$startpos(x)] and [$endpos(x)], where [x] is the name of the callee, with [startpx] and [endpx], respectively. *) let rename_sw_outer (x, startpx, endpx) (subject, where) : (subject * where) option = match subject, where with | Before, _ -> None | RightNamed x', _ -> if x' = x then match where with | WhereStart -> Some startpx | WhereEnd -> Some endpx | WhereSymbolStart -> assert false (* has been expanded away *) else None | Left, _ -> (* [$startpos], [$endpos], and [$symbolstartpos] have been expanded away earlier; see [KeywordExpansion]. *) assert false (* [rename_sw_inner] transforms the keywords in the inner production (the callee) during inlining. It replaces [$endpos($0)] with [beforeendp]. *) let rename_sw_inner beforeendp (subject, where) : (subject * where) option = match subject, where with | Before, _ -> assert (where = WhereEnd); Some beforeendp | RightNamed _, _ -> None | Left, _ -> (* [$startpos] and [$endpos] have been expanded away earlier; see [KeywordExpansion]. *) assert false (* This auxiliary function checks that a use site of an %inline symbol does not carry any attributes. *) let check_no_producer_attributes producer = match producer_attributes producer with | [] -> () | (id, _payload) :: _attributes -> Error.error [Positions.position id] "the nonterminal symbol %s is declared %%inline.\n\ A use of it cannot carry an attribute." (producer_symbol producer) let names (producers : producers) : StringSet.t = List.fold_left (fun s producer -> StringSet.add (producer_identifier producer) s ) StringSet.empty producers (* Inline a grammar. The resulting grammar does not contain any definitions that can be inlined. *) let inline grammar = (* This function returns a fresh name beginning with [prefix] and that is not in the set of names [names]. *) let rec fresh ?(c=0) names prefix = let name = prefix^string_of_int c in if StringSet.mem name names then fresh ~c:(c+1) names prefix else name in let use_inline = ref false in (* This table associates a color to each non terminal that can be expanded. *) let expanded_non_terminals = Hashtbl.create 13 in let expanded_state k = Hashtbl.find expanded_non_terminals k in let mark_as_being_expanded k = Hashtbl.add expanded_non_terminals k BeingExpanded in let mark_as_expanded k r = Hashtbl.replace expanded_non_terminals k (Expanded r); r in (* [find_inline_producer b] traverses the producers of the branch [b] and looks for the first nonterminal symbol that can be inlined. If it finds one, it inlines its branches into [b], which is why this function can return several branches. Otherwise, it raises [NoInlining]. *) let rec chop_inline (prefix, suffix) = match suffix with | [] -> raise NoInlining | x :: xs -> let nt = producer_symbol x and id = producer_identifier x in try let r = StringMap.find nt grammar.rules in if r.inline_flag then begin (* We have checked earlier than an %inline symbol does not carry any attributes. In addition, we now check that the use site of this symbol does not carry any attributes either. Thus, we need not worry about propagating these attributes through inlining. *) check_no_producer_attributes x; (* We inline the rule [r] into [b] between [prefix] and [xs]. *) List.rev prefix, nt, r, id, xs end else chop_inline (x :: prefix, xs) with Not_found -> chop_inline (x :: prefix, xs) in let rec find_inline_producer b = let prefix, nt, p, psym, suffix = chop_inline ([], b.producers) in prefix, expand_rule nt p, nt, psym, suffix (* We have to rename producers' names of the inlined production if they clash with the producers' names of the branch into which we do the inlining. *) and rename_if_necessary b producers = (* First we compute the set of names already in use. *) let producers_names = names (b.producers @ producers) in (* Compute a renaming and the new inlined producers' names. *) let phi, producers' = List.fold_left (fun (phi, producers) producer -> let x = producer_identifier producer in if StringSet.mem x producers_names then let x' = fresh producers_names x in ((x, x') :: phi, { producer with producer_identifier = x' } :: producers) else (phi, producer :: producers) ) ([], []) producers in phi, List.rev producers' (* Inline the non terminals that can be inlined in [b]. We use the ListMonad to combine the results. *) and expand_branch (b : branch) : branch ListMonad.m = try (* [c] is the identifier under which the callee is known. *) let prefix, p, nt, c, suffix = find_inline_producer b in use_inline := true; (* Inline a branch of [nt] at position [prefix] ... [suffix] in the branch [b]. *) let inline_branch pb = (* 2015/11/18. The interaction of %prec and %inline is not documented. It used to be the case that we would disallow marking a production both %inline and %prec. Now, we allow it, but we check that (1) it is inlined at the last position of the host production and (2) the host production does not already have a %prec annotation. *) pb.branch_prec_annotation |> Option.iter (fun callee_prec -> (* The callee has a %prec annotation. *) (* Check condition 1. *) if List.length suffix > 0 then Error.error [ Positions.position callee_prec; b.branch_position ] "this production carries a %%prec annotation,\n\ and the nonterminal symbol %s is marked %%inline.\n\ For this reason, %s can be used only in tail position." nt nt; (* Check condition 2. *) b.branch_prec_annotation |> Option.iter (fun caller_prec -> Error.error [ Positions.position callee_prec; Positions.position caller_prec ] "this production carries a %%prec annotation,\n\ and the nonterminal symbol %s is marked %%inline.\n\ For this reason, %s cannot be used in a production\n\ which itself carries a %%prec annotation." nt nt ) ); (* Rename the producers of this branch if they conflict with the name of the host's producers. *) let phi, inlined_producers = rename_if_necessary b pb.producers in (* After inlining, the producers are as follows. *) let producers = prefix @ inlined_producers @ suffix in let index2id = index2id producers in let prefix = List.length prefix and inlined_producers = List.length inlined_producers in (* Define how the start and end positions of the inner production should be computed once it is inlined into the outer production. These definitions of [startp] and [endp] are then used to transform [$startpos] and [$endpos] in the inner production and to transform [$startpos(x)] and [$endpos(x)] in the outer production. *) (* 2015/11/04. We ensure that positions are computed in the same manner, regardless of whether inlining is performed. *) let startp = if inlined_producers > 0 then (* If the inner production is non-epsilon, things are easy. The start position of the inner production is the start position of its first element. *) RightNamed (index2id prefix), WhereStart else if prefix > 0 then (* If the inner production is epsilon, we are supposed to compute the end position of whatever comes in front of it. If the prefix is nonempty, then this is the end position of the last symbol in the prefix. *) RightNamed (index2id (prefix - 1)), WhereEnd else (* If the inner production is epsilon and the prefix is empty, then we need to look up the end position stored in the top stack cell. This is the reason why we need the keyword [$endpos($0)]. It is required in this case to preserve the semantics of $startpos and $endpos. *) Before, WhereEnd (* Note that, to contrary to intuition perhaps, we do NOT have that if the prefix is empty, then the start position of the inner production is the start production of the outer production. This is true only if the inner production is non-epsilon. *) in let endp = if inlined_producers > 0 then (* If the inner production is non-epsilon, things are easy, then its end position is the end position of its last element. *) RightNamed (index2id (prefix + inlined_producers - 1)), WhereEnd else (* If the inner production is epsilon, then its end position is equal to its start position. *) startp in (* We must also transform [$endpos($0)] if it used by the inner production. It refers to the end position of the stack cell that comes before the inner production. So, if the prefix is non-empty, then it translates to the end position of the last element of the prefix. Otherwise, it translates to [$endpos($0)]. *) let beforeendp = if prefix > 0 then RightNamed (index2id (prefix - 1)), WhereEnd else Before, WhereEnd in (* Rename the outer and inner semantic action. *) let outer_action = Action.rename (rename_sw_outer (c, startp, endp)) [] b.action and action' = Action.rename (rename_sw_inner beforeendp) phi pb.action in (* 2015/11/18. If the callee has a %prec annotation (which implies the caller does not have one, and the callee appears in tail position in the caller) then the annotation is inherited. This seems reasonable, but remains undocumented. *) let branch_prec_annotation = match pb.branch_prec_annotation with | (Some _) as annotation -> assert (b.branch_prec_annotation = None); annotation | None -> b.branch_prec_annotation in { b with producers; action = Action.compose c action' outer_action; branch_prec_annotation; } in List.map inline_branch p.branches >>= expand_branch with NoInlining -> return b (* Expand a rule if necessary. *) and expand_rule k r = try (match expanded_state k with | BeingExpanded -> Error.error r.positions "there is a cycle in the definition of %s." k | Expanded r -> r) with Not_found -> mark_as_being_expanded k; mark_as_expanded k { r with branches = r.branches >>= expand_branch } in (* If we are in Coq mode, %inline is forbidden. *) let _ = if Settings.coq then StringMap.iter (fun _ r -> if r.inline_flag then Error.error r.positions "%%inline is not supported by the Coq back-end.") grammar.rules in (* To expand a grammar, we expand all its rules and remove the %inline rules. *) let expanded_rules = StringMap.mapi expand_rule grammar.rules in let useful (k : string) : bool = try not (StringMap.find k grammar.rules).inline_flag with Not_found -> true (* could be: assert false? *) in (* Remove %on_error_reduce declarations for symbols that are expanded away, and warn about them, at the same time. *) let useful_warn (k : string) : bool = let u = useful k in if not u then Error.grammar_warning [] "the declaration %%on_error_reduce %s\n\ has no effect, since this symbol is marked %%inline and is expanded away." k; u in { grammar with rules = StringMap.filter (fun _ r -> not r.inline_flag) expanded_rules; types = StringMap.filter (fun k _ -> useful k) grammar.types; on_error_reduce = StringMap.filter (fun k _ -> useful_warn k) grammar.on_error_reduce; }, !use_inline menhir-20171222/src/parserAux.mli0000664000175000017500000000741313217215730016764 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module provides utilities that are shared by the two versions of the parser. *) open Positions open Syntax (* A few types used in the parser. *) type early_producer = Positions.t * identifier located option * parameter * attributes type early_producers = early_producer list type early_production = early_producers * string located option * (* optional precedence *) branch_production_level * Positions.t type early_productions = early_production list (* [new_precedence_level pos1 pos2] creates a new precendence level, which is stronger than any levels previously created by this function. It should be called every time a [%left], [%right], or [%nonassoc] declaration is found. The positions are the positions of this declaration in the source code. The precedence levels created by this function are attached to tokens and (via %prec) to productions. They are used in solving shift/reduce and shift/reduce/reduce conflicts. *) val new_precedence_level: Lexing.position -> Lexing.position -> precedence_level (* [new_production_level()] creates a new production level, which is stronger than any levels previously created by this function. It should be called every time a new production is found. The production levels created by this function are attached to productions. They are used in solving reduce/reduce conflicts: following ocamlyacc and bison, the production that appears first in the grammar receives preference. It may seem very strange that %prec annotations do not influence this process, but that's how it is, at least for the moment. *) val new_production_level: unit -> branch_production_level (* [new_on_error_reduce_level()] creates a new level, which is attached to an [%on_error_reduce] declaration. *) val new_on_error_reduce_level: unit -> on_error_reduce_level (* [check_production_group] accepts a production group and checks that all productions in the group define the same set of identifiers. *) val check_production_group: early_productions -> unit (* [normalize_producers] accepts a list of producers where identifiers are optional and returns a list of producers where identifiers are mandatory. A missing identifier in the [i]-th position receives the conventional name [_i]. *) val normalize_producers: early_producers -> producer list (* [override pos oprec1 oprec2] decides which of the two optional %prec declarations [oprec1] and [oprec2] applies to a production. It signals an error if the two are present. *) val override: Positions.t -> 'a option -> 'a option -> 'a option (* [producer_names producers] returns an array [names] such that [names.(idx) = None] if the (idx + 1)-th producer is unnamed and [names.(idx) = Some id] if it is called [id]. *) val producer_names: early_producers -> identifier option array menhir-20171222/src/nonterminalType.ml0000664000175000017500000000706713217215730020036 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open UnparameterizedSyntax open IL (* This is the conventional name of the nonterminal GADT, which describes the nonterminal symbols. *) let tcnonterminalgadt = "nonterminal" let tnonterminalgadt a = TypApp (tcnonterminalgadt, [ a ]) (* This is the conventional name of the data constructors of the nonterminal GADT. *) let tnonterminalgadtdata nt = "N_" ^ Misc.normalize nt (* This is the definition of the nonterminal GADT. Here, the data constructors have no value argument, but have a type index. *) exception MissingOCamlType of string let nonterminalgadtdef grammar = assert Settings.inspection; let comment, datadefs = try (* The ordering of this list matters. We want the data constructors to respect the internal ordering (as determined by [nonterminals] in [UnparameterizedSyntax]) of the nonterminal symbols. This may be exploited in the table back-end to allow an unsafe conversion of a data constructor to an integer code. See [n2i] in [InspectionTableInterpreter]. *) "The indexed type of nonterminal symbols.", List.map (fun nt -> let index = match ocamltype_of_symbol grammar nt with | Some t -> TypTextual t | None -> raise (MissingOCamlType nt) in { dataname = tnonterminalgadtdata nt; datavalparams = []; datatypeparams = Some [ index ] } ) (nonterminals grammar) with MissingOCamlType nt -> (* If the type of some nonterminal symbol is unknown, give up and define ['a nonterminal] as an abstract type. This is useful when we are in [--(raw)-depend] mode and we do not wish to fail. Instead, we produce a mock [.mli] file that is an approximation of the real [.mli] file. When we are not in [--(raw)-depend] mode, though, this is a problem. We display an error message and stop. *) match Settings.depend with | Settings.OMRaw | Settings.OMPostprocess -> "The indexed type of nonterminal symbols (mock!).", [] | Settings.OMNone -> Error.error [] "\ the type of the nonterminal symbol %s is unknown.\n\ When --inspection is set, the type of every nonterminal symbol must be known.\n\ Please use --infer or specify the type of every symbol via %%type declarations." nt in [ IIComment comment; IITypeDecls [{ typename = tcnonterminalgadt; typeparams = [ "_" ]; typerhs = TDefSum datadefs; typeconstraint = None }] ] menhir-20171222/src/cmly_write.ml0000664000175000017500000001344713217215730017023 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open UnparameterizedSyntax open Grammar open Cmly_format let raw_content stretch = stretch.Stretch.stretch_raw_content let ocamltype (typ : Stretch.ocamltype) : ocamltype = match typ with | Stretch.Declared stretch -> raw_content stretch | Stretch.Inferred typ -> typ let ocamltype (typo : Stretch.ocamltype option) : ocamltype option = match typo with | None -> None | Some typ -> Some (ocamltype typ) let range (pos : Positions.t) : range = { r_start = Positions.start_of_position pos; r_end = Positions.end_of_position pos; } let ranges = List.map range let attribute (label, payload : Syntax.attribute) : attribute = { a_label = Positions.value label; a_payload = raw_content payload; a_position = range (Positions.position label); } let attributes : Syntax.attributes -> attributes = List.map attribute let terminal (t : Terminal.t) : terminal_def = { t_kind = ( if Terminal.equal t Terminal.error then `ERROR else if (match Terminal.eof with | None -> false | Some eof -> Terminal.equal t eof) then `EOF else if Terminal.pseudo t then `PSEUDO else `REGULAR ); t_name = Terminal.print t; t_type = ocamltype (Terminal.ocamltype t); t_attributes = attributes (Terminal.attributes t); } let nonterminal (nt : Nonterminal.t) : nonterminal_def = let is_start = Nonterminal.is_start nt in { n_kind = if is_start then `START else `REGULAR; n_name = Nonterminal.print false nt; n_mangled_name = Nonterminal.print true nt; n_type = if is_start then None else ocamltype (Nonterminal.ocamltype nt); n_positions = if is_start then [] else ranges (Nonterminal.positions nt); n_nullable = Analysis.nullable nt; n_first = List.map Terminal.t2i (TerminalSet.elements (Analysis.first nt)); n_attributes = if is_start then [] else attributes (Nonterminal.attributes nt); } let symbol (sym : Symbol.t) : symbol = match sym with | Symbol.N n -> N (Nonterminal.n2i n) | Symbol.T t -> T (Terminal.t2i t) let action (a : Action.t) : action = { a_expr = Printer.string_of_expr (Action.to_il_expr a); a_keywords = Keyword.KeywordSet.elements (Action.keywords a); } let rhs (prod : Production.index) : producer_def array = match Production.classify prod with | Some n -> [| (N (Nonterminal.n2i n), "", []) |] | None -> Array.mapi (fun i sym -> let id = (Production.identifiers prod).(i) in let attrs = attributes (Production.rhs_attributes prod).(i) in symbol sym, id, attrs ) (Production.rhs prod) let production (prod : Production.index) : production_def = { p_kind = if Production.is_start prod then `START else `REGULAR; p_lhs = Nonterminal.n2i (Production.nt prod); p_rhs = rhs prod; p_positions = ranges (Production.positions prod); p_action = if Production.is_start prod then None else Some (action (Production.action prod)); p_attributes = attributes (Production.lhs_attributes prod); } let item (i : Item.t) : production * int = let p, i = Item.export i in (Production.p2i p, i) let itemset (is : Item.Set.t) : (production * int) list = List.map item (Item.Set.elements is) let lr0_state (node : Lr0.node) : lr0_state_def = { lr0_incoming = Option.map symbol (Lr0.incoming_symbol node); lr0_items = itemset (Lr0.items node) } let transition (sym, node) : symbol * lr1 = (symbol sym, Lr1.number node) let lr1_state (node : Lr1.node) : lr1_state_def = { lr1_lr0 = Lr0.core (Lr1.state node); lr1_transitions = List.map transition (SymbolMap.bindings (Lr1.transitions node)); lr1_reductions = let add t ps rs = (Terminal.t2i t, List.map Production.p2i ps) :: rs in TerminalMap.fold_rev add (Lr1.reductions node) [] } let entry_point prod node nt _typ accu : (nonterminal * production * lr1) list = (Nonterminal.n2i nt, Production.p2i prod, Lr1.number node) :: accu let encode () : grammar = { g_basename = Settings.base; g_preludes = List.map raw_content Front.grammar.preludes; g_postludes = List.map raw_content Front.grammar.postludes; g_terminals = Terminal.init terminal; g_nonterminals = Nonterminal.init nonterminal; g_productions = Production.init production; g_lr0_states = Array.init Lr0.n lr0_state; g_lr1_states = Array.of_list (Lr1.map lr1_state); g_entry_points = Lr1.fold_entry entry_point []; g_attributes = attributes Analysis.attributes; g_parameters = List.map raw_content Front.grammar.parameters; } let write oc t = (* .cmly file format: CMLY ++ version string ++ grammar *) let magic = "CMLY" ^ Version.version in output_string oc magic; output_value oc (t : grammar) let write filename = let oc = open_out filename in write oc (encode()); close_out oc menhir-20171222/src/codePieces.mli0000664000175000017500000001112613217215730017051 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module defines many internal naming conventions for use by the two code generators, [CodeBackend] and [TableBackend]. It also offers a few code generation facilities. *) open IL open Grammar (* ------------------------------------------------------------------------ *) (* Naming conventions. *) (* The type variable associated with a nonterminal [nt]. *) val ntvar : Nonterminal.t -> string (* The variable that holds the environment. This is a parameter to all functions. We do not make it a global variable because we wish to preserve re-entrancy. *) val env : string (* A variable used to hold a semantic value. *) val semv : string (* A variable used to hold a stack. *) val stack: string (* A variable used to hold a state. *) val state: string (* A variable used to hold a token. *) val token: string (* Variables used to hold start and end positions. *) val beforeendp: string val startp: string val endp: string (* ------------------------------------------------------------------------ *) (* Types for semantic values. *) (* [semvtypent nt] is the type of the semantic value associated with nonterminal [nt]. *) val semvtypent : Nonterminal.t -> typ (* [semvtypetok tok] is the type of the semantic value associated with token [tok]. There is no such type if the token does not have a semantic value. *) val semvtypetok : Terminal.t -> typ list (* [semvtype symbol] is the type of the semantic value associated with [symbol]. *) val semvtype : Symbol.t -> typ list (* [symvalt] returns the empty list if the symbol at hand carries no semantic value and the singleton list [[f t]] if it carries a semantic value of type [t]. *) val symvalt : Symbol.t -> (typ -> 'a) -> 'a list (* [symval symbol x] returns either the empty list or the singleton list [[x]], depending on whether [symbol] carries a semantic value. *) val symval : Symbol.t -> 'a -> 'a list (* [tokval] is a version of [symval], specialized for terminal symbols. *) val tokval : Terminal.t -> 'a -> 'a list (* ------------------------------------------------------------------------ *) (* Patterns for tokens. *) (* [tokpat tok] is a pattern that matches the token [tok], without binding its semantic value. *) val tokpat: Terminal.t -> pattern (* [tokpatv tok] is a pattern that matches the token [tok], and binds its semantic value, if it has one, to the variable [semv]. *) val tokpatv: Terminal.t -> pattern (* [tokspat toks] is a pattern that matches any token in the set [toks], without binding its semantic value. *) val tokspat: TerminalSet.t -> pattern (* [destructuretokendef name codomain bindsemv branch] generates the definition of a function that destructure tokens. [name] is the name of the function that is generated. [codomain] is its return type. [bindsemv] tells whether the variable [semv] should be bound. [branch] is applied to each (non-pseudo) terminal and must produce code for each branch. *) val destructuretokendef: string -> typ -> bool -> (Terminal.t -> expr) -> valdef (* ------------------------------------------------------------------------ *) (* A global variable holds the exception [Error]. *) (* A reference to this global variable. *) val errorval: expr (* ------------------------------------------------------------------------ *) (* The structure items [mbasics grammar] define and include the internal sub-module [Basics], which contains the definitions of the exception [Error] and of the type [token]. Then, they define the global variable mentioned above, which holds the exception [Error]. *) val basics: string val mbasics: UnparameterizedSyntax.grammar -> structure menhir-20171222/src/reachability.ml0000664000175000017500000000452613217215730017303 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open UnparameterizedSyntax let rec visit grammar visited symbol = try let rule = StringMap.find symbol grammar.rules in if not (StringSet.mem symbol visited) then let visited = StringSet.add symbol visited in List.fold_left (visitb grammar) visited rule.branches else visited with Not_found -> (* This is a terminal symbol. *) assert (symbol = "error" || StringMap.mem symbol grammar.tokens); visited and visitb grammar visited { producers = symbols } = List.fold_left (visits grammar) visited symbols and visits grammar visited producer = visit grammar visited (producer_symbol producer) let trim grammar = if StringSet.cardinal grammar.start_symbols = 0 then Error.error [] "no start symbol has been declared." else let reachable = StringSet.fold (fun symbol visited -> visit grammar visited symbol ) grammar.start_symbols StringSet.empty in StringMap.iter (fun symbol rule -> if not (StringSet.mem symbol reachable) then Error.grammar_warning rule.positions "symbol %s is unreachable from any of the start symbol(s)." symbol ) grammar.rules; { grammar with rules = StringMap.restrict reachable grammar.rules; types = StringMap.restrict reachable grammar.types; on_error_reduce = StringMap.restrict reachable grammar.on_error_reduce; } menhir-20171222/src/default.mli0000664000175000017500000000225313217215730016433 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar (* [has_default_reduction s] tells whether state [s] has a default reduction, and, if so, upon which set of tokens. *) val has_default_reduction : Lr1.node -> (Production.index * TerminalSet.t) option menhir-20171222/src/cmly_format.ml0000664000175000017500000000641513217215730017156 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module defines the data that is stored in .cmly files. In short, a .cmly file contains a value of type [grammar], defined below. *) (* The type definitions in this module are used by [Cmly_write], which writes a .cmly file, and by [Cmly_read], which reads a .cmly file. They should not be used anywhere else. *) (* All entities (terminal symbols, nonterminal symbols, and so on) are represented as integers. These integers serve as indices into arrays. This enables simple and efficient hashing, comparison, indexing, etc. *) type terminal = int type nonterminal = int type production = int type lr0 = int type lr1 = int type ocamltype = string type ocamlexpr = string type range = { r_start: Lexing.position; r_end: Lexing.position; } type attribute = { a_label: string; a_payload: string; a_position: range; } type attributes = attribute list type terminal_def = { t_name: string; t_kind: [`REGULAR | `ERROR | `EOF | `PSEUDO]; t_type: ocamltype option; t_attributes: attributes; } type nonterminal_def = { n_name: string; n_kind: [`REGULAR | `START]; n_mangled_name: string; n_type: ocamltype option; n_positions: range list; n_nullable: bool; n_first: terminal list; n_attributes: attributes; } type symbol = | T of terminal | N of nonterminal type identifier = string type action = { a_expr: ocamlexpr; a_keywords: Keyword.keyword list; } type producer_def = symbol * identifier * attributes type production_def = { p_kind: [`REGULAR | `START]; p_lhs: nonterminal; p_rhs: producer_def array; p_positions: range list; p_action: action option; p_attributes: attributes; } type lr0_state_def = { lr0_incoming: symbol option; lr0_items: (production * int) list; } type lr1_state_def = { lr1_lr0: lr0; lr1_transitions: (symbol * lr1) list; lr1_reductions: (terminal * production list) list; } type grammar = { g_basename : string; g_preludes : string list; g_postludes : string list; g_terminals : terminal_def array; g_nonterminals : nonterminal_def array; g_productions : production_def array; g_lr0_states : lr0_state_def array; g_lr1_states : lr1_state_def array; g_entry_points : (nonterminal * production * lr1) list; g_attributes : attributes; g_parameters : string list; } menhir-20171222/src/mark.ml0000664000175000017500000000241213217215730015565 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (** This module implements a very simple notion of ``mark''. A mark is really a reference cell (without content). Creating a new mark requires allocating a new cell, and comparing marks requires comparing pointers. *) type t = unit ref let fresh = ref let same = (==) let none = fresh() menhir-20171222/src/grammar.ml0000664000175000017500000000220513217215730016261 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module runs the grammar functor on the grammar produced by the front-end. *) include GrammarFunctor.Make(struct let grammar = Front.grammar let verbose = true end) menhir-20171222/src/CheckSafeParameterizedGrammar.mli0000664000175000017500000000246213217215730022651 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This test accepts a parameterized grammar, with the restriction that all parameters must have sort [*]. Parameters of higher sort must be eliminated prior to running this test: see [SelectiveExpansion]. *) (* This test succeeds if and only if the expansion of this grammar is safe, that is, terminates. *) val check: Syntax.grammar -> unit menhir-20171222/src/Maps.ml0000664000175000017500000000706513217215730015544 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* BEGIN PERSISTENT_MAPS *) module type PERSISTENT_MAPS = sig type key type 'data t val empty: 'data t val add: key -> 'data -> 'data t -> 'data t val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* END PERSISTENT_MAPS *) (* BEGIN IMPERATIVE_MAPS *) module type IMPERATIVE_MAPS = sig type key type 'data t val create: unit -> 'data t val clear: 'data t -> unit val add: key -> 'data -> 'data t -> unit val find: key -> 'data t -> 'data val iter: (key -> 'data -> unit) -> 'data t -> unit end (* END IMPERATIVE_MAPS *) (* BEGIN IMPERATIVE_MAP *) module type IMPERATIVE_MAP = sig type key type data val set: key -> data -> unit val get: key -> data option end (* END IMPERATIVE_MAP *) module PersistentMapsToImperativeMaps (M : PERSISTENT_MAPS) : IMPERATIVE_MAPS with type key = M.key and type 'data t = 'data M.t ref = struct type key = M.key type 'data t = 'data M.t ref let create () = ref M.empty let clear t = t := M.empty let add k d t = t := M.add k d !t let find k t = M.find k !t let iter f t = M.iter f !t end module ImperativeMapsToImperativeMap (M : IMPERATIVE_MAPS) (D : sig type data end) : IMPERATIVE_MAP with type key = M.key and type data = D.data = struct type key = M.key type data = D.data let m = M.create() let set k d = M.add k d m let get k = try Some (M.find k m) with Not_found -> None end module ArrayAsImperativeMaps (K : sig val n: int end) : IMPERATIVE_MAPS with type key = int and type 'data t = 'data option array = struct open K type key = int type 'data t = 'data option array let create () = Array.make n None let clear m = Array.fill m 0 n None let add key data m = m.(key) <- Some data let find key m = match m.(key) with | None -> raise Not_found | Some data -> data let iter f m = Array.iteri (fun key data -> match data with | None -> () | Some data -> f key data ) m end module HashTableAsImperativeMaps (H : Hashtbl.HashedType) : IMPERATIVE_MAPS with type key = H.t = struct include Hashtbl.Make(H) let create () = create 1023 let add key data table = add table key data let find table key = find key table end module TrivialHashedType (T : sig type t end) : Hashtbl.HashedType with type t = T.t = struct include T let equal = (=) let hash = Hashtbl.hash end menhir-20171222/src/grammarFunctor.ml0000664000175000017500000012766513217215730017644 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open UnparameterizedSyntax open Syntax open Positions module Make (G : sig (* An abstract syntax tree for the grammar. *) val grammar: UnparameterizedSyntax.grammar (* This flag indicates whether it is OK to produce warnings, verbose information, etc., when this functor is invoked. If it is set to [false], then only serious errors can be signaled. *) val verbose: bool end) = struct open G (* ------------------------------------------------------------------------ *) (* Precedence levels for tokens or pseudo-tokens alike. *) module TokPrecedence = struct (* This set records, on a token by token basis, whether the token's precedence level is ever useful. This allows emitting warnings about useless precedence declarations. *) let ever_useful : StringSet.t ref = ref StringSet.empty let use id = ever_useful := StringSet.add id !ever_useful (* This function is invoked when someone wants to consult a token's precedence level. This does not yet mean that this level is useful, though. Indeed, if it is subsequently compared against [UndefinedPrecedence], it will not allow solving a conflict. So, in addition to the desired precedence level, we return a delayed computation which, when evaluated, records that this precedence level was useful. *) let levelip id properties = lazy (use id), properties.tk_precedence let leveli id = let properties = try StringMap.find id grammar.tokens with Not_found -> assert false (* well-formedness check has been performed earlier *) in levelip id properties (* This function prints warnings about useless precedence declarations for terminal symbols (%left, %right, %nonassoc). It should be invoked after only the automaton has been constructed. *) let diagnostics () = StringMap.iter (fun id properties -> if not (StringSet.mem id !ever_useful) then match properties.tk_precedence with | UndefinedPrecedence -> () | PrecedenceLevel (_, _, pos1, pos2) -> Error.grammar_warning (Positions.two pos1 pos2) "the precedence level assigned to %s is never useful." id ) grammar.tokens end (* ------------------------------------------------------------------------ *) (* Nonterminals. *) module Nonterminal = struct type t = int let n2i i = i let compare = (-) (* Determine how many nonterminals we have and build mappings both ways between names and indices. A new nonterminal is created for every start symbol. *) let new_start_nonterminals = StringSet.fold (fun symbol ss -> (symbol ^ "'") :: ss) grammar.start_symbols [] let original_nonterminals = nonterminals grammar let start = List.length new_start_nonterminals let (n : int), (name : string array), (map : int StringMap.t) = Misc.index (new_start_nonterminals @ original_nonterminals) let () = if verbose then Error.logG 1 (fun f -> Printf.fprintf f "Grammar has %d nonterminal symbols, among which %d start symbols.\n" (n - start) start ) let is_start nt = nt < start let print normalize nt = if normalize then Misc.normalize name.(nt) else name.(nt) let lookup name = StringMap.find name map let positions nt = (StringMap.find (print false nt) grammar.rules).positions let init f = Array.init n f let iter f = Misc.iteri n f let fold f accu = Misc.foldi n f accu let map f = Misc.mapi n f let iterx f = for nt = start to n - 1 do f nt done let foldx f accu = Misc.foldij start n f accu let ocamltype nt = assert (not (is_start nt)); try Some (StringMap.find (print false nt) grammar.types) with Not_found -> None let ocamltype_of_start_symbol nt = match ocamltype nt with | Some typ -> typ | None -> (* Every start symbol has a type. *) assert false let tabulate f = Array.get (Array.init n f) let attributes : Syntax.attributes array = Array.make n [] let () = StringMap.iter (fun nonterminal { attributes = attrs } -> let nt = lookup nonterminal in attributes.(nt) <- attrs ) grammar.rules let attributes nt = attributes.(nt) end (* Sets and maps over nonterminals. *) module NonterminalMap = Patricia.Big module NonterminalSet = Patricia.Big.Domain (* ------------------------------------------------------------------------ *) (* Terminals. *) module Terminal = struct type t = int let t2i i = i let i2t i = i let compare = (-) let equal (tok1 : t) (tok2 : t) = tok1 = tok2 (* Determine how many terminals we have and build mappings both ways between names and indices. A new terminal "#" is created. A new terminal "error" is created. The fact that the integer code assigned to the "#" pseudo-terminal is the last one is exploited in the table-based back-end. (The right-most row of the action table is not created.) Pseudo-tokens (used in %prec declarations, but never declared using %token) are filtered out. *) (* In principle, the number of the [error] token is irrelevant. It is currently 0, but we do not rely on that. *) let (n : int), (name : string array), (map : int StringMap.t) = let tokens = tokens grammar in match tokens with | [] when verbose -> Error.error [] "no tokens have been declared." | _ -> Misc.index ("error" :: tokens @ [ "#" ]) let print tok = name.(tok) let lookup name = StringMap.find name map let sharp = lookup "#" let error = lookup "error" let pseudo tok = (tok = sharp) || (tok = error) let real t = error <> t && t <> sharp let token_properties = let not_so_dummy_properties = (* applicable to [error] and [#] *) { tk_filename = "__primitives__"; tk_precedence = UndefinedPrecedence; tk_associativity = UndefinedAssoc; tk_ocamltype = None; tk_is_declared = true; tk_position = Positions.dummy; tk_attributes = []; } in Array.init n (fun tok -> try StringMap.find name.(tok) grammar.tokens with Not_found -> assert (tok = sharp || tok = error); not_so_dummy_properties ) let () = if verbose then Error.logG 1 (fun f -> Printf.fprintf f "Grammar has %d terminal symbols.\n" (n - 2) ) let precedence_level tok = TokPrecedence.levelip (print tok) token_properties.(tok) let associativity tok = token_properties.(tok).tk_associativity let ocamltype tok = token_properties.(tok).tk_ocamltype let init f = Array.init n f let iter f = Misc.iteri n f let fold f accu = Misc.foldi n f accu let map f = Misc.mapi n f let () = assert (sharp = n - 1) let foldx f accu = Misc.foldi sharp f accu let mapx f = Misc.mapi sharp f let () = assert (error = 0) let iter_real f = for i = 1 to n-2 do f i done (* If a token named [EOF] exists, then it is assumed to represent ocamllex's [eof] pattern. *) let eof = try Some (lookup "EOF") with Not_found -> None let attributes tok = token_properties.(tok).tk_attributes (* The sub-module [Word] offers an implementation of words (that is, sequences) of terminal symbols. It is used by [LRijkstra]. We make it a functor, because it has internal state (a hash table) and a side effect (failure if there are more than 256 terminal symbols). *) module Word (X : sig end) = struct (* We could use lists, or perhaps the sequences offered by the module [Seq], which support constant time concatenation. However, we need a much more compact representation: [LRijkstra] stores tens of millions of such words. We use strings, because they are very compact (8 bits per symbol), and on top of that, we use a hash-consing facility. In practice, hash-consing allows us to save 1000x in space. *) (* A drawback of this approach is that it works only if the number of terminal symbols is at most 256. For the moment, this is good enough. [LRijkstra] already has difficulty at 100 terminal symbols or so. *) let () = assert (n <= 256) let (encode : string -> int), (decode : int -> string), verbose = Misc.new_encode_decode 1024 type word = int let epsilon = encode "" let singleton t = encode (String.make 1 (Char.chr t)) let append i1 i2 = let w1 = decode i1 and w2 = decode i2 in if String.length w1 = 0 then i2 else if String.length w2 = 0 then i1 else encode (w1 ^ w2) let length i = String.length (decode i) let first i z = let w = decode i in if String.length w > 0 then Char.code w.[0] else z let rec elements i n w = if i = n then [] else Char.code w.[i] :: elements (i + 1) n w let elements i = let w = decode i in elements 0 (String.length w) w let print i = let w = decode i in Misc.separated_iter_to_string (fun c -> print (Char.code c)) " " (fun f -> String.iter f w) (* [Pervasives.compare] implements a lexicographic ordering on strings. *) let compare i1 i2 = Pervasives.compare (decode i1) (decode i2) end end (* Sets of terminals are used intensively in the LR(1) construction, so it is important that they be as efficient as possible. *) module TerminalSet = struct include CompressedBitSet let print toks = Misc.separated_iter_to_string Terminal.print " " (fun f -> iter f toks) let universe = remove Terminal.sharp ( remove Terminal.error ( Terminal.fold add empty ) ) (* The following definitions are used in the computation of FIRST sets below. They are not exported outside of this file. *) type property = t let bottom = empty let is_maximal _ = false end (* Maps over terminals. *) module TerminalMap = Patricia.Big (* ------------------------------------------------------------------------ *) (* Symbols. *) module Symbol = struct type t = | N of Nonterminal.t | T of Terminal.t let compare sym1 sym2 = match sym1, sym2 with | N nt1, N nt2 -> Nonterminal.compare nt1 nt2 | T tok1, T tok2 -> Terminal.compare tok1 tok2 | N _, T _ -> 1 | T _, N _ -> -1 let equal sym1 sym2 = compare sym1 sym2 = 0 let rec lequal syms1 syms2 = match syms1, syms2 with | [], [] -> true | sym1 :: syms1, sym2 :: syms2 -> equal sym1 sym2 && lequal syms1 syms2 | _ :: _, [] | [], _ :: _ -> false let print = function | N nt -> Nonterminal.print false nt | T tok -> Terminal.print tok let nonterminal = function | T _ -> false | N _ -> true (* Printing an array of symbols. [offset] is the start offset -- we print everything to its right. [dot] is the dot offset -- we print a dot at this offset, if we find it. *) let printaod offset dot symbols = let buffer = Buffer.create 512 in let length = Array.length symbols in for i = offset to length do if i = dot then Buffer.add_string buffer ". "; if i < length then begin Buffer.add_string buffer (print symbols.(i)); Buffer.add_char buffer ' ' end done; Buffer.contents buffer let printao offset symbols = printaod offset (-1) symbols let printa symbols = printao 0 symbols let printl symbols = printa (Array.of_list symbols) let lookup name = try T (Terminal.lookup name) with Not_found -> try N (Nonterminal.lookup name) with Not_found -> assert false (* well-formedness check has been performed earlier *) end (* Sets of symbols. *) module SymbolSet = struct include Set.Make(Symbol) let print symbols = Symbol.printl (elements symbols) (* The following definitions are used in the computation of symbolic FOLLOW sets below. They are not exported outside of this file. *) type property = t let bottom = empty let is_maximal _ = false end (* Maps over symbols. *) module SymbolMap = struct include Map.Make(Symbol) let domain m = fold (fun symbol _ accu -> symbol :: accu ) m [] let purelynonterminal m = fold (fun symbol _ accu -> accu && Symbol.nonterminal symbol ) m true end (* ------------------------------------------------------------------------ *) (* Productions. *) module Production = struct type index = int let compare = (-) (* Create an array of productions. Record which productions are associated with every nonterminal. A new production S' -> S is created for every start symbol S. It is known as a start production. *) let n : int = let n = StringMap.fold (fun _ { branches = branches } n -> n + List.length branches ) grammar.rules 0 in if verbose then Error.logG 1 (fun f -> Printf.fprintf f "Grammar has %d productions.\n" n); n + StringSet.cardinal grammar.start_symbols let p2i prod = prod let i2p prod = assert (prod >= 0 && prod < n); prod let table : (Nonterminal.t * Symbol.t array) array = Array.make n (-1, [||]) let identifiers : identifier array array = Array.make n [||] let actions : action option array = Array.make n None let ntprods : (int * int) array = Array.make Nonterminal.n (-1, -1) let positions : Positions.t list array = Array.make n [] let rhs_attributes : Syntax.attributes array array = Array.make n [||] let (start : int), (startprods : index NonterminalMap.t) = StringSet.fold (fun nonterminal (k, startprods) -> let nt = Nonterminal.lookup nonterminal and nt' = Nonterminal.lookup (nonterminal ^ "'") in table.(k) <- (nt', [| Symbol.N nt |]); identifiers.(k) <- [| "_1" |]; ntprods.(nt') <- (k, k+1); positions.(k) <- Nonterminal.positions nt; k+1, NonterminalMap.add nt k startprods ) grammar.start_symbols (0, NonterminalMap.empty) let prec_decl : symbol located option array = Array.make n None let production_level : branch_production_level array = (* The start productions receive a level that pretends that they originate in a fictitious "builtin" file. So, a reduce/reduce conflict that involves a start production will not be solved. *) let dummy = ProductionLevel (InputFile.builtin_input_file, 0) in Array.make n dummy let (_ : int) = StringMap.fold (fun nonterminal { branches } k -> let nt = Nonterminal.lookup nonterminal in let k' = List.fold_left (fun k branch -> let symbols = Array.of_list branch.producers in table.(k) <- (nt, Array.map (fun producer -> Symbol.lookup (producer_symbol producer)) symbols); identifiers.(k) <- Array.map producer_identifier symbols; actions.(k) <- Some branch.action; rhs_attributes.(k) <- Array.map producer_attributes symbols; production_level.(k) <- branch.branch_production_level; prec_decl.(k) <- branch.branch_prec_annotation; positions.(k) <- [ branch.branch_position ]; k+1 ) k branches in ntprods.(nt) <- (k, k'); k' ) grammar.rules start (* Iteration over the productions associated with a specific nonterminal. *) let iternt nt f = let k, k' = ntprods.(nt) in for prod = k to k' - 1 do f prod done let foldnt (nt : Nonterminal.t) (accu : 'a) (f : index -> 'a -> 'a) : 'a = let k, k' = ntprods.(nt) in let rec loop accu prod = if prod < k' then loop (f prod accu) (prod + 1) else accu in loop accu k (* This funny variant is lazy. If at some point [f] does not demand its second argument, then iteration stops. *) let foldnt_lazy (nt : Nonterminal.t) (f : index -> (unit -> 'a) -> 'a) (seed : 'a) : 'a = let k, k' = ntprods.(nt) in let rec loop prod seed = if prod < k' then f prod (fun () -> loop (prod + 1) seed) else seed in loop k seed (* Accessors. *) let def prod = table.(prod) let nt prod = let nt, _ = table.(prod) in nt let rhs prod = let _, rhs = table.(prod) in rhs let length prod = Array.length (rhs prod) let identifiers prod = identifiers.(prod) let is_start prod = prod < start let classify prod = if is_start prod then match (rhs prod).(0) with | Symbol.N nt -> Some nt | Symbol.T _ -> assert false else None let action prod = match actions.(prod) with | Some action -> action | None -> (* Start productions have no action. *) assert (is_start prod); assert false let positions prod = positions.(prod) let lhs_attributes prod = Nonterminal.attributes (nt prod) let rhs_attributes prod = rhs_attributes.(prod) let startsymbol2startprod nt = try NonterminalMap.find nt startprods with Not_found -> assert false (* [nt] is not a start symbol *) (* Iteration. *) let init f = Array.init n f let iter f = Misc.iteri n f let fold f accu = Misc.foldi n f accu let map f = Misc.mapi n f let amap f = Array.init n f let iterx f = for prod = start to n - 1 do f prod done let foldx f accu = Misc.foldij start n f accu let mapx f = Misc.mapij start n f (* Printing a production. *) let print prod = assert (not (is_start prod)); let nt, rhs = table.(prod) in Printf.sprintf "%s -> %s" (Nonterminal.print false nt) (Symbol.printao 0 rhs) (* Tabulation. *) let tabulate f = Misc.tabulate n f let tabulateb f = Misc.tabulateb n f (* This array allows recording, for each %prec declaration, whether it is ever useful. This allows us to emit a warning about useless %prec declarations. *) (* 2015/10/06: We take into account the fact that a %prec declaration can be duplicated by inlining or by the expansion of parameterized non-terminal symbols. Our table is not indexed by productions, but by positions (of %prec declarations in the source). Thus, if a %prec declaration is duplicated, at least one of its copies should be found useful for the warning to be suppressed. *) let ever_useful : (Positions.t, unit) Hashtbl.t = (* assuming that generic hashing and equality on positions are OK *) Hashtbl.create 16 let consult_prec_decl prod = let osym = prec_decl.(prod) in lazy ( Option.iter (fun sym -> (* Mark this %prec declaration as useful. *) let pos = Positions.position sym in Hashtbl.add ever_useful pos () ) osym ), osym (* This function prints warnings about useless precedence declarations for productions (%prec). It should be invoked after only the automaton has been constructed. *) let diagnostics () = iterx (fun prod -> let osym = prec_decl.(prod) in Option.iter (fun sym -> (* Check whether this %prec declaration was useless. *) let pos = Positions.position sym in if not (Hashtbl.mem ever_useful pos) then begin Error.grammar_warning [pos] "this %%prec declaration is never useful."; Hashtbl.add ever_useful pos () (* hack: avoid two warnings at the same position *) end ) osym ) (* Determining the precedence level of a production. If no %prec declaration was explicitly supplied, it is the precedence level of the rightmost terminal symbol in the production's right-hand side. *) type production_level = | PNone | PRightmostToken of Terminal.t | PPrecDecl of symbol let rightmost_terminal prod = Array.fold_left (fun accu symbol -> match symbol with | Symbol.T tok -> PRightmostToken tok | Symbol.N _ -> accu ) PNone (rhs prod) let combine e1 e2 = lazy (Lazy.force e1; Lazy.force e2) let precedence prod = let fact1, prec_decl = consult_prec_decl prod in let oterminal = match prec_decl with | None -> rightmost_terminal prod | Some { value = terminal } -> PPrecDecl terminal in match oterminal with | PNone -> fact1, UndefinedPrecedence | PRightmostToken tok -> let fact2, level = Terminal.precedence_level tok in combine fact1 fact2, level | PPrecDecl id -> let fact2, level = TokPrecedence.leveli id in combine fact1 fact2, level end (* ------------------------------------------------------------------------ *) (* Maps over productions. *) module ProductionMap = struct include Patricia.Big (* Iteration over the start productions only. *) let start f = Misc.foldi Production.start (fun prod m -> add prod (f prod) m ) empty end (* ------------------------------------------------------------------------ *) (* Support for analyses of the grammar, expressed as fixed point computations. We exploit the generic fixed point algorithm in [Fix]. *) (* We perform memoization only at nonterminal symbols. We assume that the analysis of a symbol is the analysis of its definition (as opposed to, say, a computation that depends on the occurrences of this symbol in the grammar). *) module GenericAnalysis (P : Fix.PROPERTY) (S : sig open P (* An analysis is specified by the following functions. *) (* [terminal] maps a terminal symbol to a property. *) val terminal: Terminal.t -> property (* [disjunction] abstracts a binary alternative. That is, when we analyze an alternative between several productions, we compute a property for each of them independently, then we combine these properties using [disjunction]. *) val disjunction: property -> (unit -> property) -> property (* [P.bottom] should be a neutral element for [disjunction]. We use it in the analysis of an alternative with zero branches. *) (* [conjunction] abstracts a binary sequence. That is, when we analyze a sequence, we compute a property for each member independently, then we combine these properties using [conjunction]. In general, conjunction needs access to the first member of the sequence (a symbol), not just to its analysis (a property). *) val conjunction: Symbol.t -> property -> (unit -> property) -> property (* [epsilon] abstracts the empty sequence. It should be a neutral element for [conjunction]. *) val epsilon: property end) : sig open P (* The results of the analysis take the following form. *) (* To every nonterminal symbol, we associate a property. *) val nonterminal: Nonterminal.t -> property (* To every symbol, we associate a property. *) val symbol: Symbol.t -> property (* To every suffix of every production, we associate a property. The offset [i], which determines the beginning of the suffix, must be contained between [0] and [n], inclusive, where [n] is the length of the production. *) val production: Production.index -> int -> property end = struct open P (* The following analysis functions are parameterized over [get], which allows making a recursive call to the analysis at a nonterminal symbol. [get] maps a nonterminal symbol to a property. *) (* Analysis of a symbol. *) let symbol sym get : property = match sym with | Symbol.T tok -> S.terminal tok | Symbol.N nt -> (* Recursive call to the analysis, via [get]. *) get nt (* Analysis of (a suffix of) a production [prod], starting at index [i]. *) let production prod i get : property = let rhs = Production.rhs prod in let n = Array.length rhs in (* Conjunction over all symbols in the right-hand side. This can be viewed as a version of [Array.fold_right], which does not necessarily begin at index [0]. Note that, because [conjunction] is lazy, it is possible to stop early. *) let rec loop i = if i = n then S.epsilon else let sym = rhs.(i) in S.conjunction sym (symbol sym get) (fun () -> loop (i+1)) in loop i (* The analysis is the least fixed point of the following function, which analyzes a nonterminal symbol by looking up and analyzing its definition as a disjunction of conjunctions of symbols. *) let nonterminal nt get : property = (* Disjunction over all productions for this nonterminal symbol. *) Production.foldnt_lazy nt (fun prod rest -> S.disjunction (production prod 0 get) rest ) P.bottom (* The least fixed point is taken as follows. Note that it is computed on demand, as [lfp] is called by the user. *) module F = Fix.Make (Maps.ArrayAsImperativeMaps(Nonterminal)) (P) let nonterminal = F.lfp nonterminal (* The auxiliary functions can be published too. *) let symbol sym = symbol sym nonterminal let production prod i = production prod i nonterminal end (* ------------------------------------------------------------------------ *) (* Compute which nonterminals are nonempty, that is, recognize a nonempty language. Also, compute which nonterminals are nullable. The two computations are almost identical. The only difference is in the base case: a single terminal symbol is not nullable, but is nonempty. *) module NONEMPTY = GenericAnalysis (Boolean) (struct (* A terminal symbol is nonempty. *) let terminal _ = true (* An alternative is nonempty if at least one branch is nonempty. *) let disjunction p q = p || q() (* A sequence is nonempty if both members are nonempty. *) let conjunction _ p q = p && q() (* The sequence epsilon is nonempty. It generates the singleton language {epsilon}. *) let epsilon = true end) module NULLABLE = GenericAnalysis (Boolean) (struct (* A terminal symbol is not nullable. *) let terminal _ = false (* An alternative is nullable if at least one branch is nullable. *) let disjunction p q = p || q() (* A sequence is nullable if both members are nullable. *) let conjunction _ p q = p && q() (* The sequence epsilon is nullable. *) let epsilon = true end) (* ------------------------------------------------------------------------ *) (* Compute FIRST sets. *) module FIRST = GenericAnalysis (TerminalSet) (struct (* A terminal symbol has a singleton FIRST set. *) let terminal = TerminalSet.singleton (* The FIRST set of an alternative is the union of the FIRST sets. *) let disjunction p q = TerminalSet.union p (q()) (* The FIRST set of a sequence is the union of: the FIRST set of the first member, and the FIRST set of the second member, if the first member is nullable. *) let conjunction symbol p q = if NULLABLE.symbol symbol then TerminalSet.union p (q()) else p (* The FIRST set of the empty sequence is empty. *) let epsilon = TerminalSet.empty end) (* ------------------------------------------------------------------------ *) let () = if verbose then begin (* If a start symbol generates the empty language or generates the language {epsilon}, report an error. In principle, this could be just a warning. However, in [Engine], in the function [start], it is convenient to assume that neither of these situations can arise. This means that at least one token must be read. *) StringSet.iter (fun symbol -> let nt = Nonterminal.lookup symbol in if not (NONEMPTY.nonterminal nt) then Error.error (Nonterminal.positions nt) "%s generates the empty language." (Nonterminal.print false nt); if TerminalSet.is_empty (FIRST.nonterminal nt) then Error.error (Nonterminal.positions nt) "%s generates the language {epsilon}." (Nonterminal.print false nt) ) grammar.start_symbols; (* If a nonterminal symbol generates the empty language, issue a warning. *) for nt = Nonterminal.start to Nonterminal.n - 1 do if not (NONEMPTY.nonterminal nt) then Error.grammar_warning (Nonterminal.positions nt) "%s generates the empty language." (Nonterminal.print false nt); done end (* ------------------------------------------------------------------------ *) (* Dump the analysis results. *) let () = if verbose then Error.logG 2 (fun f -> for nt = Nonterminal.start to Nonterminal.n - 1 do Printf.fprintf f "nullable(%s) = %b\n" (Nonterminal.print false nt) (NULLABLE.nonterminal nt) done; for nt = Nonterminal.start to Nonterminal.n - 1 do Printf.fprintf f "first(%s) = %s\n" (Nonterminal.print false nt) (TerminalSet.print (FIRST.nonterminal nt)) done ) let () = if verbose then Time.tick "Analysis of the grammar" (* ------------------------------------------------------------------------ *) (* Compute FOLLOW sets. Unnecessary for us, but requested by a user. Also, this is useful for the SLR(1) test. Thus, we perform this analysis only on demand. *) (* The computation of the symbolic FOLLOW sets follows exactly the same pattern as that of the traditional FOLLOW sets. We share code and parameterize this computation over a module [P]. The type [P.property] intuitively represents a set of symbols. *) module FOLLOW (P : sig include Fix.PROPERTY val union: property -> property -> property val terminal: Terminal.t -> property val first: Production.index -> int -> property end) = struct module S = FixSolver.Make (Maps.ArrayAsImperativeMaps(Nonterminal)) (P) (* Build a system of constraints. *) let record_ConVar, record_VarVar, solve = S.create() (* Iterate over all start symbols. *) let () = let sharp = P.terminal Terminal.sharp in for nt = 0 to Nonterminal.start - 1 do assert (Nonterminal.is_start nt); (* Add # to FOLLOW(nt). *) record_ConVar sharp nt done (* We need to do this explicitly because our start productions are of the form S' -> S, not S' -> S #, so # will not automatically appear into FOLLOW(S) when the start productions are examined. *) (* Iterate over all productions. *) let () = Array.iteri (fun prod (nt1, rhs) -> (* Iterate over all nonterminal symbols [nt2] in the right-hand side. *) Array.iteri (fun i symbol -> match symbol with | Symbol.T _ -> () | Symbol.N nt2 -> let nullable = NULLABLE.production prod (i+1) and first = P.first prod (i+1) in (* The FIRST set of the remainder of the right-hand side contributes to the FOLLOW set of [nt2]. *) record_ConVar first nt2; (* If the remainder of the right-hand side is nullable, FOLLOW(nt1) contributes to FOLLOW(nt2). *) if nullable then record_VarVar nt1 nt2 ) rhs ) Production.table (* Second pass. Solve the equations (on demand). *) let follow : Nonterminal.t -> P.property = solve() end (* Use the above functor to obtain the standard (concrete) FOLLOW sets. *) let follow : Nonterminal.t -> TerminalSet.t = let module F = FOLLOW(struct include TerminalSet let terminal = singleton let first = FIRST.production end) in F.follow (* At log level 2, display the FOLLOW sets. *) let () = if verbose then Error.logG 2 (fun f -> for nt = Nonterminal.start to Nonterminal.n - 1 do Printf.fprintf f "follow(%s) = %s\n" (Nonterminal.print false nt) (TerminalSet.print (follow nt)) done ) (* Compute FOLLOW sets for the terminal symbols as well. Again, unnecessary for us, but requested by a user. This is done in a single pass over the grammar -- no new fixpoint computation is required. *) let tfollow : TerminalSet.t array Lazy.t = lazy ( let tfollow = Array.make Terminal.n TerminalSet.empty in (* Iterate over all productions. *) Array.iteri (fun prod (nt1, rhs) -> (* Iterate over all terminal symbols [t2] in the right-hand side. *) Array.iteri (fun i symbol -> match symbol with | Symbol.N _ -> () | Symbol.T t2 -> let nullable = NULLABLE.production prod (i+1) and first = FIRST.production prod (i+1) in (* The FIRST set of the remainder of the right-hand side contributes to the FOLLOW set of [t2]. *) tfollow.(t2) <- TerminalSet.union first tfollow.(t2); (* If the remainder of the right-hand side is nullable, FOLLOW(nt1) contributes to FOLLOW(t2). *) if nullable then tfollow.(t2) <- TerminalSet.union (follow nt1) tfollow.(t2) ) rhs ) Production.table; tfollow ) (* Define another accessor. *) let tfollow t = (Lazy.force tfollow).(t) (* At log level 3, display the FOLLOW sets for terminal symbols. *) let () = if verbose then Error.logG 3 (fun f -> for t = 0 to Terminal.n - 1 do Printf.fprintf f "follow(%s) = %s\n" (Terminal.print t) (TerminalSet.print (tfollow t)) done ) (* ------------------------------------------------------------------------ *) (* Compute symbolic FIRST and FOLLOW sets. *) (* The symbolic FIRST set of the word determined by [prod/i] is defined (and computed) as follows. *) let sfirst prod i = let rhs = Production.rhs prod in let n = Array.length rhs in let rec loop i = if i = n then (* If the word [prod/i] is empty, the set is empty. *) SymbolSet.empty else let sym = rhs.(i) in (* If the word [prod/i] begins with a symbol [sym], then [sym] itself is part of the symbolic FIRST set, unconditionally. *) SymbolSet.union (SymbolSet.singleton sym) (* Furthermore, if [sym] is nullable, then the symbolic FIRST set of the sub-word [prod/i+1] contributes, too. *) (if NULLABLE.symbol sym then loop (i + 1) else SymbolSet.empty) in loop i (* The symbolic FOLLOW sets are computed just like the FOLLOW sets, except we use a symbolic FIRST set instead of a standard FIRST set. *) let sfollow : Nonterminal.t -> SymbolSet.t = let module F = FOLLOW(struct include SymbolSet let terminal t = SymbolSet.singleton (Symbol.T t) let first = sfirst end) in F.follow (* At log level 3, display the symbolic FOLLOW sets. *) let () = if verbose then Error.logG 3 (fun f -> for nt = Nonterminal.start to Nonterminal.n - 1 do Printf.fprintf f "sfollow(%s) = %s\n" (Nonterminal.print false nt) (SymbolSet.print (sfollow nt)) done ) (* ------------------------------------------------------------------------ *) (* Provide explanations about FIRST sets. *) (* The idea is to explain why a certain token appears in the FIRST set for a certain sequence of symbols. Such an explanation involves basic assertions of the form (i) symbol N is nullable and (ii) the token appears in the FIRST set for symbol N. We choose to take these basic facts for granted, instead of recursively explaining them, so as to keep explanations short. *) (* We first produce an explanation in abstract syntax, then convert it to a human-readable string. *) type explanation = | EObvious (* sequence begins with desired token *) | EFirst of Terminal.t * Nonterminal.t (* sequence begins with a nonterminal that produces desired token *) | ENullable of Symbol.t list * explanation (* sequence begins with a list of nullable symbols and ... *) let explain (tok : Terminal.t) (rhs : Symbol.t array) (i : int) = let length = Array.length rhs in let rec loop i = assert (i < length); let symbol = rhs.(i) in match symbol with | Symbol.T tok' -> assert (Terminal.equal tok tok'); EObvious | Symbol.N nt -> if TerminalSet.mem tok (FIRST.nonterminal nt) then EFirst (tok, nt) else begin assert (NULLABLE.nonterminal nt); match loop (i + 1) with | ENullable (symbols, e) -> ENullable (symbol :: symbols, e) | e -> ENullable ([ symbol ], e) end in loop i let rec convert = function | EObvious -> "" | EFirst (tok, nt) -> Printf.sprintf "%s can begin with %s" (Nonterminal.print false nt) (Terminal.print tok) | ENullable (symbols, e) -> let e = convert e in Printf.sprintf "%scan vanish%s%s" (Symbol.printl symbols) (if e = "" then "" else " and ") e (* ------------------------------------------------------------------------ *) (* Package the analysis results. *) module Analysis = struct let nullable = NULLABLE.nonterminal let nullable_symbol = NULLABLE.symbol let first = FIRST.nonterminal let first_symbol = FIRST.symbol (* An initial definition of [nullable_first_prod]. *) let nullable_first_prod prod i = NULLABLE.production prod i, FIRST.production prod i (* A memoised version, so as to avoid recomputing along a production's right-hand side. *) let nullable_first_prod = Misc.tabulate Production.n (fun prod -> Misc.tabulate (Production.length prod + 1) (fun i -> nullable_first_prod prod i ) ) let first_prod_lookahead prod i z = let nullable, first = nullable_first_prod prod i in if nullable then TerminalSet.add z first else first let explain_first_rhs (tok : Terminal.t) (rhs : Symbol.t array) (i : int) = convert (explain tok rhs i) let follow = follow let attributes = grammar.gr_attributes end (* ------------------------------------------------------------------------ *) (* Conflict resolution via precedences. *) module Precedence = struct type choice = | ChooseShift | ChooseReduce | ChooseNeither | DontKnow type order = Lt | Gt | Eq | Ic let precedence_order p1 p2 = match p1, p2 with | UndefinedPrecedence, _ | _, UndefinedPrecedence -> Ic | PrecedenceLevel (m1, l1, _, _), PrecedenceLevel (m2, l2, _, _) -> if not (InputFile.same_input_file m1 m2) then Ic else if l1 > l2 then Gt else if l1 < l2 then Lt else Eq let production_order p1 p2 = match p1, p2 with | ProductionLevel (m1, l1), ProductionLevel (m2, l2) -> if not (InputFile.same_input_file m1 m2) then Ic else if l1 > l2 then Gt else if l1 < l2 then Lt else Eq let shift_reduce tok prod = let fact1, tokp = Terminal.precedence_level tok and fact2, prodp = Production.precedence prod in match precedence_order tokp prodp with (* Our information is inconclusive. Drop [fact1] and [fact2], that is, do not record that this information was useful. *) | Ic -> DontKnow (* Our information is useful. Record that fact by evaluating [fact1] and [fact2]. *) | (Eq | Lt | Gt) as c -> Lazy.force fact1; Lazy.force fact2; match c with | Ic -> assert false (* already dispatched *) | Eq -> begin match Terminal.associativity tok with | LeftAssoc -> ChooseReduce | RightAssoc -> ChooseShift | NonAssoc -> ChooseNeither | _ -> assert false (* If [tok]'s precedence level is defined, then its associativity must be defined as well. *) end | Lt -> ChooseReduce | Gt -> ChooseShift let reduce_reduce prod1 prod2 = let pl1 = Production.production_level.(prod1) and pl2 = Production.production_level.(prod2) in match production_order pl1 pl2 with | Lt -> Some prod1 | Gt -> Some prod2 | Eq -> (* The order is strict except in the presence of parameterized non-terminals and/or inlining. Two productions can have the same precedence level if they originate, via macro-expansion or via inlining, from a single production in the source grammar. *) None | Ic -> None end (* This function prints warnings about useless precedence declarations for terminal symbols (%left, %right, %nonassoc) and productions (%prec). It should be invoked after only the automaton has been constructed. *) let diagnostics () = if not Settings.ignore_all_unused_precedence_levels then begin TokPrecedence.diagnostics(); Production.diagnostics() end (* ------------------------------------------------------------------------ *) (* %on_error_reduce declarations. *) module OnErrorReduce = struct (* We keep a [StringMap] internally, and convert back and forth between the types [Nonterminal.t] and [string] when querying this map. This is not very elegant, and could be changed if desired. *) let declarations : Syntax.on_error_reduce_level StringMap.t = grammar.on_error_reduce let print (nt : Nonterminal.t) : string = Nonterminal.print false nt let lookup (nt : string) : Nonterminal.t = try Nonterminal.lookup nt with Not_found -> (* If this fails, then we have an [%on_error_reduce] declaration for an invalid symbol. *) assert false let reduce prod = let nt = Production.nt prod in StringMap.mem (print nt) declarations let iter f = StringMap.iter (fun nt _prec -> f (lookup nt) ) declarations open Precedence let preferable prod1 prod2 = (* The two productions that we are comparing must be distinct. *) assert (prod1 <> prod2); let nt1 = Production.nt prod1 and nt2 = Production.nt prod2 in (* If they have the same left-hand side (which seems rather unlikely?), declare them incomparable. *) nt1 <> nt2 && (* Otherwise, look up the priority levels associated with their left-hand symbols. *) let prec1, prec2 = try StringMap.find (print nt1) declarations, StringMap.find (print nt2) declarations with Not_found -> (* [preferable] should be used to compare two symbols for which there exist [%on_error_reduce] declarations. *) assert false in match production_order prec1 prec2 with | Gt -> (* [prec1] is a higher integer than [prec2], therefore comes later in the file. By analogy with [%left] and friends, we give higher priority to later declarations. *) true | Lt -> false | Eq | Ic -> (* We could issue a warning or an information message in these cases. *) false end (* ------------------------------------------------------------------------ *) end (* module Make *) menhir-20171222/src/tokenType.ml0000664000175000017500000001517613217215730016630 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module deals with a few details regarding the definition of the [token] type. In particular, if [--only-tokens] was specified, it emits the type definition and exits. *) open UnparameterizedSyntax open IL open CodeBits (* This is the conventional name of the [token] type, with no prefix. A prefix is possibly appended to it below, where [tctoken] is redefined before being exported. *) let tctoken = "token" let ttoken = TypApp (tctoken, []) (* This is the conventional name of the token GADT, which describes the tokens. Same setup as above. *) let tctokengadt = "terminal" let ttokengadt a = TypApp (tctokengadt, [ a ]) (* This is the conventional name of the data constructors of the token GADT. *) let ttokengadtdata token = "T_" ^ token (* This is the definition of the type of tokens. It is defined as an algebraic data type, unless [--external-tokens M] is set, in which case it is defined as an abbreviation for the type [M.token]. *) let tokentypedef grammar = let typerhs = match Settings.token_type_mode with | Settings.TokenTypeOnly | Settings.TokenTypeAndCode -> (* Algebraic data type. *) TDefSum ( List.map (fun (tok, typo) -> { dataname = tok; datavalparams = (match typo with None -> [] | Some t -> [ TypTextual t ]); datatypeparams = None }) (typed_tokens grammar) ) | Settings.CodeOnly m -> (* Type abbreviation. *) TAbbrev (TypApp (m ^ "." ^ tctoken, [])) in [ IIComment "The type of tokens."; IITypeDecls [{ typename = tctoken; typeparams = []; typerhs; typeconstraint = None }] ] (* This is the definition of the token GADT. Here, the data constructors have no value argument, but have a type index. *) (* The token GADT is produced only when [Settings.inspection] is true. Thus, when [Settings.inspection] is false, we remain compatible with old versions of OCaml, without GADTs. *) (* Although the [token] type does not include the [error] token (because this token is never produced by the lexer), the token GADT must include the [error] token (because this GADT must describe all of the tokens that are allowed to appear in a production). *) (* It is defined as a generalized algebraic data type, unless [--external-tokens M] is set, in which case it is defined as an abbreviation for the type ['a M.tokengadt]. *) let tokengadtdef grammar = assert Settings.inspection; let param, typerhs = match Settings.token_type_mode with | Settings.TokenTypeOnly | Settings.TokenTypeAndCode -> (* Generalized algebraic data type. *) let param = "_" in param, TDefSum ( (* The ordering of this list matters. We want the data constructors to respect the internal ordering (as determined by [typed_tokens] in [UnparameterizedSyntax]) of the terminal symbols. This may be exploited in the table back-end to allow an unsafe conversion of a data constructor to an integer code. See [t2i] in [InspectionTableInterpreter]. *) { dataname = ttokengadtdata "error"; datavalparams = []; datatypeparams = Some [ tunit ] (* the [error] token has a semantic value of type [unit] *) } :: List.map (fun (token, typo) -> { dataname = ttokengadtdata token; datavalparams = []; datatypeparams = Some [ match typo with None -> tunit | Some t -> TypTextual t ] }) (typed_tokens grammar) ) | Settings.CodeOnly m -> (* Type abbreviation. *) let param = "a" in param, TAbbrev (TypApp (m ^ "." ^ tctokengadt, [ TypVar param ])) in [ IIComment "The indexed type of terminal symbols."; IITypeDecls [{ typename = tctokengadt; typeparams = [ param ]; typerhs; typeconstraint = None }] ] (* If we were asked to only produce a type definition, then do so and stop. *) let produce_tokentypes grammar = match Settings.token_type_mode with | Settings.TokenTypeOnly -> (* Create both an .mli file and an .ml file. This is made necessary by the fact that the two can be different when there are functor parameters. *) let i = tokentypedef grammar @ listiflazy Settings.inspection (fun () -> tokengadtdef grammar ) in let module P = Printer.Make (struct let f = open_out (Settings.base ^ ".mli") let locate_stretches = None end) in P.interface [ IIFunctor (grammar.parameters, i) ]; let module P = Printer.Make (struct let f = open_out (Settings.base ^ ".ml") let locate_stretches = None end) in P.program [ SIFunctor (grammar.parameters, interface_to_structure i ) ]; exit 0 | Settings.CodeOnly _ | Settings.TokenTypeAndCode -> () (* The token type and the token GADTs can be referred to via a short (unqualified) name, regardless of how they have been defined (either directly or as an abbreviation). However, their data constructors must be qualified if [--external-tokens] is set. *) let tokenprefix id = match Settings.token_type_mode with | Settings.CodeOnly m -> m ^ "." ^ id | Settings.TokenTypeAndCode -> id | Settings.TokenTypeOnly -> id (* irrelevant, really *) let tokendata = tokenprefix let tokengadtdata token = tokenprefix (ttokengadtdata token) menhir-20171222/src/SelectiveExpansion.mli0000664000175000017500000000377613217215730020632 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Syntax open SortInference (* [expand sorts g] expands away some or all of the parameterized nonterminal symbols in the grammar [g], producing a new grammar. [sorts] is the sort environment produced by [SortInference]. *) (* The mode [ExpandHigherSort] causes a partial expansion: only the parameters of higher sort (i.e., of sort other than [*]) are expanded away. This mode is safe, in the sense that expansion always terminates. A proof sketch is as follows: 1- an application always has sort [*]; 2- therefore, only a variable can have higher sort; 3- therefore, only a finite number of terms can appear during expansion. *) (* The mode [ExpandAll] causes a complete expansion: all parameters are expanded away. This process is potentially nonterminating. One must first run the termination test in [CheckSafeParameterizedGrammar] (which itself is applicable only after the parameters of higher sort have been expanded away). *) type mode = | ExpandHigherSort | ExpandAll val expand: mode -> sorts -> grammar -> grammar menhir-20171222/src/mark.mli0000664000175000017500000000270313217215730015741 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (** This module implements a very simple notion of ``mark''. *) (** The type of marks. *) type t (** [fresh()] generates a fresh mark, that is, a mark that is guaranteed to be distinct from all existing marks. *) val fresh: unit -> t (** [same mark1 mark2] tells whether [mark1] and [mark2] are the same mark, that is, were created by the same call to [fresh]. *) val same: t -> t -> bool (** [none] is a distinguished mark, created via an initial call to [fresh()]. *) val none: t menhir-20171222/src/item.ml0000664000175000017500000003006113217215730015572 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open Grammar (* ------------------------------------------------------------------------ *) (* Items. *) (* An LR(0) item encodes a pair of integers, namely the index of the production and the index of the bullet in the production's right-hand side. *) (* Both integers are packed into a single integer, using 7 bits for the bullet position and the rest (usually 24 bits) for the production index. These widths could be adjusted. *) (* The function [export] is duplicated in [TableInterpreter]. Do not modify it; or modify it here and there in a consistent manner. *) type t = int let import (prod, pos) = assert (pos < 128); (Production.p2i prod) lsl 7 + pos let export t = (Production.i2p (t lsr 7), t mod 128) let marshal (item : t) : int = item (* Comparison. *) let equal (item1 : t) (item2: t) = item1 = item2 (* Position. *) let positions (item : t) = let prod, _ = export item in Production.positions prod (* [def item] looks up the production associated with this item in the grammar and returns [prod, nt, rhs, pos, length], where [prod] is the production's index, [nt] and [rhs] represent the production, [pos] is the position of the bullet in the item, and [length] is the length of the production's right-hand side. *) let def t = let prod, pos = export t in let nt, rhs = Production.def prod in let length = Array.length rhs in assert ((pos >= 0) && (pos <= length)); prod, nt, rhs, pos, length let startnt t = let _, _, rhs, pos, length = def t in assert (pos = 0 && length = 1); match rhs.(0) with | Symbol.N nt -> nt | Symbol.T _ -> assert false (* Printing. *) let print item = let _, nt, rhs, pos, _ = def item in Printf.sprintf "%s -> %s" (Nonterminal.print false nt) (Symbol.printaod 0 pos rhs) (* Classifying items. *) type kind = | Shift of Symbol.t * t | Reduce of Production.index let classify item = let prod, _, rhs, pos, length = def item in if pos = length then Reduce prod else Shift (rhs.(pos), import (prod, pos + 1)) (* Sets of items and maps over items. Hashing these data structures is specifically allowed, so balanced trees (for instance) would not be applicable here. *) module Map = Patricia.Big module Set = Map.Domain (* This functor performs precomputation that helps efficiently compute the closure of an LR(0) or LR(1) state. The precomputation requires time linear in the size of the grammar. The nature of the lookahead sets remains abstract. *) (* The precomputation consists in building the LR(0) nondeterministic automaton. This is a graph whose nodes are items and whose edges are epsilon transitions. (We do not care about shift transitions here.) Lookahead information can be attached to nodes and is propagated through the graph during closure computations. *) module Closure (L : Lookahead.S) = struct type state = L.t Map.t type node = { (* Nodes are sequentially numbered so as to allow applying Tarjan's algorithm (below). *) num: int; (* Each node is associated with an item. *) item: t; (* All of the epsilon transitions that leave a node have the same behavior with respect to lookahead information. *) (* The lookahead set transmitted along an epsilon transition is either a constant, or the union of a constant and the lookahead set at the source node. The former case corresponds to a source item whose trailer is not nullable, the latter to a source item whose trailer is nullable. *) epsilon_constant: L.t; epsilon_transmits: bool; (* Each node carries pointers to its successors through epsilon transitions. This field is never modified once initialization is over. *) mutable epsilon_transitions: node list; (* The following fields are transient, that is, only used temporarily during graph traversals. Marks are used to recognize which nodes have been traversed already. Lists of predecessors are used to record which edges have been traversed. Lookahead information is attached with each node. *) mutable mark: Mark.t; mutable predecessors: node list; mutable lookahead: L.t; } (* Allocate one graph node per item and build a mapping of items to nodes. *) let count = ref 0 let mapping : node array array = Array.make Production.n [||] let item2node item = let prod, pos = export item in mapping.(Production.p2i prod).(pos) let () = Production.iter (fun prod -> let _nt, rhs = Production.def prod in let length = Array.length rhs in mapping.(Production.p2i prod) <- Array.init (length+1) (fun pos -> let item = import (prod, pos) in let num = !count in count := num + 1; (* The lookahead set transmitted through an epsilon transition is the FIRST set of the remainder of the source item, plus, if that is nullable, the lookahead set of the source item. *) let constant, transmits = if pos < length then let nullable, first = Analysis.nullable_first_prod prod (pos + 1) in L.constant first, nullable else (* No epsilon transitions leave this item. *) L.empty, false in { num = num; item = item; epsilon_constant = constant; epsilon_transmits = transmits; epsilon_transitions = []; (* temporary placeholder *) mark = Mark.none; predecessors = []; lookahead = L.empty; } ) ) (* At each node, compute transitions. *) let () = Production.iter (fun prod -> let _nt, rhs = Production.def prod in let length = Array.length rhs in Array.iteri (fun pos node -> node.epsilon_transitions <- if pos < length then match rhs.(pos) with | Symbol.N nt -> Production.foldnt nt [] (fun prod nodes -> (item2node (import (prod, 0))) :: nodes ) | Symbol.T _ -> [] else [] ) mapping.(Production.p2i prod) ) (* Detect and reject cycles of transitions that transmit a lookahead set. We need to ensure that there are no such cycles in order to be able to traverse these transitions in topological order. Each such cycle corresponds to a set of productions of the form A1 -> A2, A2 -> A3, ..., An -> A1 (modulo nullable trailers). Such cycles are unlikely to occur in realistic grammars, so our current approach is to reject the grammar if such a cycle exists. Actually, according to DeRemer and Pennello (1982), such a cycle is exactly an includes cycle, and implies that the grammar is not LR(k) for any k, unless A1, ..., An are in fact uninhabited. In other words, this is a pathological case. *) (* Yes, indeed, this is called a cycle in Aho & Ullman's book, and a loop in Grune & Jacobs' book. It is not difficult to see that (provided all symbols are inhabited) the grammar is infinitely ambiguous if and only if there is a loop. *) module P = struct type foo = node type node = foo let n = !count let index node = node.num let iter f = Array.iter (fun nodes -> Array.iter f nodes ) mapping let successors f node = if node.epsilon_transmits then List.iter f node.epsilon_transitions end module T = Tarjan.Run (P) let cycle scc = let items = List.map (fun node -> node.item) scc in let positions = List.flatten (List.map positions items) in let names = String.concat "\n" (List.map print items) in Error.error positions "the grammar is ambiguous.\n\ The following items participate in an epsilon-cycle:\n\ %s" names let () = P.iter (fun node -> let scc = T.scc node in match scc with | [] -> () | [ node ] -> (* This is a strongly connected component of one node. Check whether it carries a self-loop. Forbidding self-loops is not strictly required by the code that follows, but is consistent with the fact that we forbid cycles of length greater than 1. *) P.successors (fun successor -> if successor.num = node.num then cycle scc ) node | _ -> (* This is a strongly connected component of at least two elements. *) cycle scc ) (* Closure computation. *) let closure (items : state) : state = (* Explore the graph forwards, starting from these items. Marks are used to tell which nodes have been visited. Build a list of all visited nodes; this is in fact the list of all items in the closure. At initial nodes and when reaching a node through a transition, record a lookahead set. When we reach a node through a transition that transmits the lookahead set found at its source, record its source, so as to allow re-traversing this transition backwards (below). *) let this = Mark.fresh() in let nodes = ref [] in let rec visit father transmits toks node = if Mark.same node.mark this then begin (* Node has been visited already. *) node.lookahead <- L.union toks node.lookahead; if transmits then node.predecessors <- father :: node.predecessors end else begin (* Node is new. *) node.predecessors <- if transmits then [ father ] else []; node.lookahead <- toks; follow node end and follow node = node.mark <- this; nodes := node :: !nodes; List.iter (visit node node.epsilon_transmits node.epsilon_constant) node.epsilon_transitions in Map.iter (fun item toks -> let node = item2node item in visit node (* dummy! *) false toks node ) items; let nodes = !nodes in (* Explore the graph of transmitting transitions backwards. By hypothesis, it is acyclic, so this is a topological walk. Lookahead sets are inherited through transitions. *) let this = Mark.fresh() in let rec walk node = if not (Mark.same node.mark this) then begin (* Node is new. *) node.mark <- this; (* Explore all predecessors and merge their lookahead sets into the current node's own lookahead set. *) List.iter (fun predecessor -> walk predecessor; node.lookahead <- L.union predecessor.lookahead node.lookahead ) node.predecessors end in List.iter walk nodes; (* Done. Produce a mapping of items to lookahead sets. Clear all transient fields so as to reduce pressure on the GC -- this does not make much difference. *) List.fold_left (fun closure node -> node.predecessors <- []; let closure = Map.add node.item node.lookahead closure in node.lookahead <- L.empty; closure ) Map.empty nodes (* End of closure computation *) end menhir-20171222/src/LRijkstra.ml0000664000175000017500000014747513217215730016563 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The purpose of this algorithm is to find, for each pair of a state [s] and a terminal symbol [z] such that looking at [z] in state [s] causes an error, a minimal path (starting in some initial state) that actually triggers this error. *) (* This is potentially useful for grammar designers who wish to better understand the properties of their grammar, or who wish to produce a list of all possible syntax errors (or, at least, one syntax error in each automaton state where an error may occur). *) (* The problem seems rather tricky. One might think that it suffices to compute shortest paths in the automaton, and to use [Analysis.minimal] to replace each non-terminal symbol in a path with a minimal word that this symbol generates. One can indeed do so, but this yields only a lower bound on the actual shortest path to the error at [s, z]. Indeed, several difficulties arise, including the fact that reductions are subject to a lookahead hypothesis; the fact that some states have a default reduction, hence will never trigger an error; the fact that conflict resolution removes some (shift or reduce) actions, hence may suppress the shortest path. *) (* We explicitly choose to ignore the [error] token. Thus, we disregard any reductions or transitions that take place when the lookahead symbol is [error]. As a result, any state whose incoming symbol is [error] is found unreachable. It would be too complicated to have to create a first error in order to be able to take certain transitions or drop certain parts of the input. *) (* We never work with the terminal symbol [#] either. This symbol never appears in the maps returned by [Lr1.transitions] and [Lr1.reductions]. Thus, in principle, we work with ``real'' terminal symbols only. However, we encode [any] as [#] -- see below. *) (* NOTE: THIS FILE IS COMPILED WITH -noassert BY DEFAULT. If you would like the assertions to be tested at runtime, change that in the file _tags. The performance impact of the assertions is about 10%. *) (* ------------------------------------------------------------------------ *) (* To delay the side effects performed by this module, we wrap everything in in a big functor. The functor also serves to pass verbosity parameters. *) module Run (X : sig (* If [verbose] is set, produce various messages on [stderr]. *) val verbose: bool (* If [statistics] is defined, it is interpreted as the name of a file to which one line of statistics is appended. *) val statistics: string option end) = struct open Grammar (* ------------------------------------------------------------------------ *) (* Record our start time. *) let now () = match X.statistics with | Some _ -> Unix.((times()).tms_utime) | None -> 0.0 let start = now() (* ------------------------------------------------------------------------ *) (* Because of our encoding of terminal symbols as 8-bit characters, this algorithm supports at most 256 terminal symbols. *) let () = if Terminal.n > 256 then Error.error [] "--list-errors supports at most 256 terminal symbols.\n\ The grammar has %d terminal symbols." Terminal.n (* ------------------------------------------------------------------------ *) (* Build a module that represents words as (hash-consed) strings. Note: this functor application has a side effect (it allocates memory, and more importantly, it may fail). *) module W = Terminal.Word(struct end) (* ------------------------------------------------------------------------ *) (* The [error] token may appear in the maps returned by [Lr1.transitions] and [Lr1.reductions], so we sometimes need to explicitly check for it. *) let non_error z = not (Terminal.equal z Terminal.error) (* We introduce a pseudo-terminal symbol [any]. It is used in several places later on, in particular in the [lookahead] field of a fact, to encode the absence of a lookahead hypothesis -- i.e., any terminal symbol will do. *) (* We choose to encode [any] as [#]. There is no risk of confusion, since we do not use [#] anywhere. Thus, the assertion [Terminal.real z] implies [z <> any]. *) let any = Terminal.sharp (* ------------------------------------------------------------------------ *) (* We begin with a number of auxiliary functions that provide information about the LR(1) automaton. These functions could perhaps be moved to a separate module. We keep them here, for the moment, because they are not used anywhere else. *) (* [reductions_on s z] is the list of reductions permitted in state [s] when the lookahead symbol is [z]. This is a list of zero or one elements. This does not take default reductions into account. *) let reductions_on s z : Production.index list = assert (Terminal.real z); try TerminalMap.find z (Lr1.reductions s) with Not_found -> [] (* [has_reduction s z] tells whether state [s] is willing to reduce some production (and if so, which one) when the lookahead symbol is [z]. It takes a possible default reduction into account. *) let has_reduction s z : Production.index option = assert (Terminal.real z); match Default.has_default_reduction s with | Some (prod, _) -> Some prod | None -> match reductions_on s z with | prod :: prods -> assert (prods = []); Some prod | [] -> None (* [can_reduce s prod] indicates whether state [s] is able to reduce production [prod] (either as a default reduction, or as a normal reduction). *) let can_reduce s prod = match Default.has_default_reduction s with | Some (prod', _) when prod = prod' -> true | _ -> TerminalMap.fold (fun z prods accu -> (* A reduction on [#] is always a default reduction. (See [lr1.ml].) *) assert (not (Terminal.equal z Terminal.sharp)); accu || non_error z && List.mem prod prods ) (Lr1.reductions s) false (* [causes_an_error s z] tells whether state [s] will initiate an error on the lookahead symbol [z]. *) let causes_an_error s z : bool = assert (Terminal.real z); match Default.has_default_reduction s with | Some _ -> false | None -> reductions_on s z = [] && not (SymbolMap.mem (Symbol.T z) (Lr1.transitions s)) (* [foreach_terminal f] applies the function [f] to every terminal symbol in turn, except [error] and [#]. *) let foreach_terminal = Terminal.iter_real (* [foreach_terminal_not_causing_an_error s f] applies the function [f] to every terminal symbol [z] such that [causes_an_error s z] is false. This could be implemented in a naive manner using [foreach_terminal] and [causes_an_error]. This implementation is significantly more efficient. *) let foreach_terminal_not_causing_an_error s f = match Default.has_default_reduction s with | Some _ -> (* There is a default reduction. No symbol causes an error. *) foreach_terminal f | None -> (* Enumerate every terminal symbol [z] for which there is a reduction. *) TerminalMap.iter (fun z _ -> (* A reduction on [#] is always a default reduction. (See [lr1.ml].) *) assert (not (Terminal.equal z Terminal.sharp)); if non_error z then f z ) (Lr1.reductions s); (* Enumerate every terminal symbol [z] for which there is a transition. *) SymbolMap.iter (fun sym _ -> match sym with | Symbol.T z -> assert (not (Terminal.equal z Terminal.sharp)); if non_error z then f z | Symbol.N _ -> () ) (Lr1.transitions s) (* Let us say a state [s] is solid if its incoming symbol is a terminal symbol (or if it has no incoming symbol at all, i.e., it is an initial state). It is fragile if its incoming symbol is a non-terminal symbol. *) let is_solid s = match Lr1.incoming_symbol s with | None | Some (Symbol.T _) -> true | Some (Symbol.N _) -> false (* [reduction_path_exists s w prod] tests whether the path determined by the sequence of symbols [w] out of the state [s] exists in the automaton and leads to a state where [prod] can be reduced. It further requires [w] to not contain the [error] token. Finally, it it sees the [error] token, it sets the flag [grammar_uses_error]. *) let grammar_uses_error = ref false let rec reduction_path_exists s (w : Symbol.t list) prod : bool = match w with | [] -> can_reduce s prod | (Symbol.T t) :: _ when Terminal.equal t Terminal.error -> grammar_uses_error := true; false | a :: w -> match SymbolMap.find a (Lr1.transitions s) with | s -> reduction_path_exists s w prod | exception Not_found -> false (* ------------------------------------------------------------------------ *) (* Suppose [s] is a state that carries an outgoing edge labeled with a non-terminal symbol [nt]. We are interested in finding out how this edge can be taken. In order to do that, we must determine how, by starting in [s], one can follow a path that corresponds to (the right-hand side of) a production [prod] associated with [nt]. There are in general several such productions. The paths that they determine in the automaton form a "star". We represent the star rooted at [s] as a trie. For every state [s], the star rooted at [s] is constructed in advance, before the algorithm runs. While the algorithm runs, a point in the trie (that is, a sub-trie) tells us where we come from, where we are, and which production(s) we are hoping to reduce in the future. *) module Trie : sig type trie (* [star s] creates a (new) trie whose source is [s], populated with its branches. (There is one branch for every production [prod] associated with every non-terminal symbol [nt] for which [s] carries an outgoing edge.) If the star turns out to be trivial then [None] is returned. *) val star: Lr1.node -> trie option (* After [star s] has been called, [size (Lr1.number s)] reports the size of the trie that has been constructed for state [s]. *) val size: int -> int (* After [star] has been called a number of times, [total_size()] reports the total size of the tries that have been constructed. *) val total_size: unit -> int (* Every (sub-)trie has a unique identity. (One can think of it as its address.) [compare] compares the identity of two tries. This can be used, e.g., to set up a map whose keys are tries. *) val compare: trie -> trie -> int (* [source t] returns the source state of the (sub-)trie [t]. This is the root of the star of which [t] is a sub-trie. In other words, this tells us "where we come from". *) val source: trie -> Lr1.node (* [current t] returns the current state of the (sub-)trie [t]. This is the root of the sub-trie [t]. In other words, this tells us "where we are". *) val current: trie -> Lr1.node (* [accepts prod t] tells whether the current state of the trie [t] is the end of a branch associated with production [prod]. If so, this means that we have successfully followed a path that corresponds to the right-hand side of production [prod]. *) val accepts: Production.index -> trie -> bool (* [step sym t] is the immediate sub-trie of [t] along the symbol [sym]. This function raises [Not_found] if [t] has no child labeled [sym]. *) val step: Symbol.t -> trie -> trie (* [verbose()] outputs debugging & performance information. *) val verbose: unit -> unit (* Since every (sub-)trie has a unique identity, its identity can serve as a unique integer code for this (sub-)trie. We allow this conversion, both ways. This mechanism is used only as a way of saving space in the encoding of facts. *) val encode: trie -> int val decode: int -> trie end = struct (* A trie has the following structure. *) type trie = { (* A unique identity, used by [compare]. The trie construction code ensures that these numbers are indeed unique: see [fresh], [insert], [star]. *) identity: int; (* The root state of this star: "where we come from". *) source: Lr1.node; (* The current state, i.e., the root of this sub-trie: "where we are". *) current: Lr1.node; (* The productions that we can reduce in the current state. In other words, if this list is nonempty, then the current state is the end of one (or several) branches. It can nonetheless have children. *) mutable productions: Production.index list; (* The children, or sub-tries. *) mutable transitions: trie SymbolMap.t (* The two fields above are written only during the construction of a trie. Once every trie has been constructed, they are frozen. *) } (* This counter is used by [mktrie] to produce unique identities. *) let c = ref 0 (* We keep a mapping of integer identities to tries. Whenever a new identity is assigned, this mapping must be updated. *) let tries = let s : Lr1.node = Obj.magic () in (* yes, this hurts *) let dummy = { identity = -1; source = s; current = s; productions = []; transitions = SymbolMap.empty } in MenhirLib.InfiniteArray.make dummy (* This smart constructor creates a new trie with a unique identity. *) let mktrie source current productions transitions = let identity = Misc.postincrement c in let t = { identity; source; current; productions; transitions } in MenhirLib.InfiniteArray.set tries identity t; t (* [insert t w prod] updates the trie (in place) by adding a new branch, corresponding to the sequence of symbols [w], and ending with a reduction of production [prod]. We assume [reduction_path_exists w prod t.current] holds, so we need not worry about this being a dead branch, and we can use destructive updates without having to set up an undo mechanism. *) let rec insert (t : trie) (w : Symbol.t list) prod : unit = match w with | [] -> assert (can_reduce t.current prod); t.productions <- prod :: t.productions | a :: w -> match SymbolMap.find a (Lr1.transitions t.current) with | exception Not_found -> assert false | successor -> (* Find our child at [a], or create it. *) let t' = try SymbolMap.find a t.transitions with Not_found -> let t' = mktrie t.source successor [] SymbolMap.empty in t.transitions <- SymbolMap.add a t' t.transitions; t' in (* Update our child. *) insert t' w prod (* [insert t prod] inserts a new branch, corresponding to production [prod], into the trie [t], which is updated in place. *) let insert t prod : unit = let w = Array.to_list (Production.rhs prod) in (* Check whether the path [w] leads to a state where [prod] can be reduced. If not, then some transition or reduction action must have been suppressed by conflict resolution; or the path [w] involves the [error] token. In that case, the branch is dead, and is not added. This test is superfluous (i.e., it would be OK to add a dead branch) but allows us to build a slightly smaller star in some cases. *) if reduction_path_exists t.current w prod then insert t w prod (* [fresh s] creates a new empty trie whose source is [s]. *) let fresh source = mktrie source source [] SymbolMap.empty (* The star at [s] is obtained by starting with a fresh empty trie and inserting into it every production [prod] whose left-hand side [nt] is the label of an outgoing edge at [s]. *) let star s = let t = fresh s in SymbolMap.iter (fun sym _ -> match sym with | Symbol.T _ -> () | Symbol.N nt -> Production.iternt nt (insert t) ) (Lr1.transitions s); t (* A trie [t] is nontrivial if it has at least one branch, i.e., contains at least one sub-trie whose [productions] field is nonempty. Trivia: a trie of size greater than 1 is necessarily nontrivial, but the converse is not true: a nontrivial trie can have size 1. (This occurs if all productions have zero length.) *) let trivial t = t.productions = [] && SymbolMap.is_empty t.transitions (* Redefine [star] to include a [nontrivial] test and to record the size of the newly built trie. *) let size = Array.make Lr1.n (-1) let star s = let initial = !c in let t = star s in let final = !c in size.(Lr1.number s) <- final - initial; if trivial t then None else Some t let size s = assert (size.(s) >= 0); size.(s) let total_size () = !c let compare t1 t2 = Pervasives.compare t1.identity t2.identity let source t = t.source let current t = t.current let accepts prod t = List.mem prod t.productions let step a t = SymbolMap.find a t.transitions (* careful: may raise [Not_found] *) let verbose () = Printf.eprintf "Total star size: %d\n%!" (total_size()) let decode i = let t = MenhirLib.InfiniteArray.get tries i in assert (t.identity = i); (* ensure we do not get the [dummy] trie *) t let encode t = assert (decode t.identity == t); (* round-trip property *) t.identity end (* ------------------------------------------------------------------------ *) (* The main algorithm, [LRijkstra], accumulates facts. A fact is a triple of a [position] (that is, a sub-trie), a [word], and a [lookahead] assumption. Such a fact means that this [position] can be reached, from the source state [Trie.source position], by consuming [word], under the assumption that the next input symbol is [lookahead]. *) (* We allow [lookahead] to be [any] so as to indicate that this fact does not have a lookahead assumption. *) (* type fact = { position: Trie.trie; word: W.word; lookahead: Terminal.t (* may be [any] *) } *) (* To save memory (and therefore time), we encode a fact in a single OCaml integer value. This is made possible by the fact that tries, words, and terminal symbols are represented as (or can be encoded as) integers. This admittedly horrible hack allows us to save roughly a factor of 2 in space, and to gain 10% in time. *) type fact = int let dummy : fact = -1 (* should never be accessed! *) (* Encoding and decoding facts. *) (* We encode [position|word|lookahead] in a single word of memory. *) (* The lookahead symbol fits in 8 bits. *) (* In the largest grammars that we have seen, the number of unique words is about 3.10^5, so a word should fit in about 19 bits (2^19 = 524288). In the largest grammars that we have seen, the total star size is about 64000, so a trie should fit in about 17 bits (2^17 = 131072). *) (* On a 64-bit machine, we have ample space in a 63-bit word! We allocate 30 bits for [word] and the rest (i.e., 25 bits) for [position]. *) (* On a 32-bit machine, we are a bit more cramped! In Menhir's own fancy-parser, the number of terminal symbols is 27, the number of unique words is 566, and the total star size is 546. We allocate 12 bits for [word] and 11 bits for [position]. This is better than refusing to work altogether, but still not great. A more satisfactory approach might be to revert to heap allocation of facts when in 32-bit mode, but that would make the code somewhat ugly. *) let w_lookahead = 8 let w_word = if Sys.word_size < 64 then 12 else 30 let w_position = Sys.word_size - 1 - (w_word + w_lookahead) (* 25, on a 64-bit machine *) let identity (fact : fact) : int = assert (fact <> dummy); fact lsr (w_word + w_lookahead) let position (fact : fact) : Trie.trie = assert (fact <> dummy); Trie.decode (identity fact) let word (fact : fact) : W.word = assert (fact <> dummy); (fact lsr w_lookahead) land (1 lsl w_word - 1) let lookahead (fact : fact) : Terminal.t = Terminal.i2t (fact land (1 lsl w_lookahead - 1)) let mkfact position (word : W.word) lookahead = let position : int = Trie.encode position and word : int = word and lookahead : int = Terminal.t2i lookahead in assert (0 <= position && 0 <= word && 0 <= lookahead); assert (lookahead < 1 lsl w_lookahead); if position < 1 lsl w_position && word < 1 lsl w_word then (* [lsl] binds tighter than [lor] *) (position lsl w_word lor word) lsl w_lookahead lor lookahead else let advice = if Sys.word_size < 64 then "Please use a 64-bit machine." else "Please report this error to Menhir's developers." in Error.error [] "an internal limit was exceeded.\n\ Sys.word_size = %d. Position = %d. Word = %d.\n\ %s%!" Sys.word_size position word advice let mkfact p w l = let fact = mkfact p w l in assert (word fact == w); (* round-trip property *) assert (lookahead fact == l); (* round-trip property *) assert (position fact == p); (* round-trip property *) fact (* Two invariants reduce the number of facts that we consider: 1. If [lookahead] is a real terminal symbol [z] (i.e., not [any]), then [z] does not cause an error in the [current] state. It would be useless to consider a fact that violates this property; this cannot possibly lead to a successful reduction. In practice, this refinement allows reducing the number of facts that go through the queue by a factor of two. 2. [lookahead] is [any] iff the [current] state is solid. This sounds rather reasonable (when a state is entered by shifting, it is entered regardless of which symbol follows) and simplifies the implementation of the sub-module [F]. *) let invariant1 position _word lookahead = let current = Trie.current position in lookahead = any || not (causes_an_error current lookahead) let invariant2 position _word lookahead = let current = Trie.current position in (lookahead = any) = is_solid current (* [compatible z a] checks whether the terminal symbol [a] satisfies the lookahead assumption [z] -- which can be [any]. *) let compatible z a = assert (non_error z); assert (Terminal.real a); z = any || z = a (* ------------------------------------------------------------------------ *) (* As in Dijkstra's algorithm, a priority queue contains the facts that await examination. The length of [word fact] serves as the priority of a fact. This guarantees that we discover shortest paths. (We never insert into the queue a fact whose priority is less than the priority of the last fact extracted out of the queue.) *) (* [LowIntegerPriorityQueue] offers very efficient operations (essentially constant time, for a small constant). It exploits the fact that priorities are low nonnegative integers. *) module Q = LowIntegerPriorityQueue let q = Q.create dummy (* In principle, there is no need to insert the fact into the queue if [F] already stores a comparable fact. We could perform this test in [enqueue]. However, a few experiments suggests that this is not worthwhile. The run time augments (because membership in [F] is tested twice, upon inserting and upon extracting) and the memory consumption does not seem to go down significantly. *) let enqueue position word lookahead = (* [lookahead] can be [any], but cannot be [error] *) assert (non_error lookahead); assert (invariant1 position word lookahead); assert (invariant2 position word lookahead); (* The length of [word] serves as the priority of this fact. *) let priority = W.length word in (* Encode and enqueue this fact. *) Q.add q (mkfact position word lookahead) priority (* ------------------------------------------------------------------------ *) (* Construct the [star] of every state [s]. Initialize the priority queue. *) let () = (* For every state [s]... *) Lr1.iter (fun s -> (* If the trie rooted at [s] is nontrivial...*) match Trie.star s with | None -> () | Some position -> (* ...then insert an initial fact into the priority queue. *) (* In order to respect invariants 1 and 2, we must distinguish two cases. If [s] is solid, then we insert a single fact, whose lookahead assumption is [any]. Otherwise, we must insert one initial fact for every terminal symbol [z] that does not cause an error in state [s]. *) let word = W.epsilon in if is_solid s then enqueue position word any else foreach_terminal_not_causing_an_error s (fun z -> enqueue position word z ) ); if X.verbose then Trie.verbose() (* Produce a warning if the grammar uses the [error] pseudo-token. *) let () = if !grammar_uses_error then Error.warning [] "--list-errors ignores all productions that involve the error token." (* ------------------------------------------------------------------------ *) (* The module [F] maintains a set of known facts. *) (* Three aspects of a fact are of particular interest: - its position [position], given by [position fact]; - its first symbol [a], given by [W.first (word fact) (lookahead fact)]; - its lookahead assumption [z], given by [lookahead fact]. For every triple of [position], [a], and [z], we store at most one fact, (whose word has minimal length). Indeed, we are not interested in keeping track of several words that produce the same effect. Only the shortest such word is of interest. Thus, the total number of facts accumulated by the algorithm is at most [T.n^2], where [T] is the total size of the tries that we have constructed, and [n] is the number of terminal symbols. (This number can be quite large. [T] can be in the tens of thousands, and [n] can be over one hundred. These figures lead to a theoretical upper bound of 100M. In practice, for T=25K and n=108, we observe that the algorithm gathers about 7M facts.) *) module F : sig (* [register fact] registers the fact [fact]. It returns [true] if this fact is new, i.e., no fact concerning the same triple of [position], [a], and [z] was previously known. *) val register: fact -> bool (* [query current z f] enumerates all known facts whose current state is [current] and whose lookahead assumption is compatible with [z]. The symbol [z] must a real terminal symbol, i.e., cannot be [any]. *) val query: Lr1.node -> Terminal.t -> (fact -> unit) -> unit (* [size()] returns the number of facts currently stored in the set. *) val size: unit -> int (* [verbose()] outputs debugging & performance information. *) val verbose: unit -> unit end = struct (* We need to query the set of facts in two ways. In [register], we must test whether a proposed triple of [position], [a], [z] already appears in the set. In [query], we must find all facts that match a pair [current, z], where [current] is a state. (Note that [position] determines [current], but the converse is not true: a position contains more information besides the current state.) To address these needs, we use a two-level table. The first level is a matrix indexed by [current] and [z]. At the second level, we find sets of facts, where two facts are considered equal if they have the same triple of [position], [a], and [z]. In fact, we know at this level that all facts have the same [z] component, so only [position] and [a] are compared. Because our facts satisfy invariant 2, [z] is [any] if and only if the state [current] is solid. This means that we are wasting quite a lot of space in the matrix (for a solid state, the whole line is empty, except for the [any] column). *) (* The level-2 sets. *) module M = MySet.Make(struct type t = fact let compare fact1 fact2 = assert (lookahead fact1 = lookahead fact2); (* Compare the two positions first. This can be done without going through [Trie.decode], by directly comparing the two integer identities. *) let c = Pervasives.compare (identity fact1) (identity fact2) in assert (c = Trie.compare (position fact1) (position fact2)); if c <> 0 then c else let z = lookahead fact1 in let a1 = W.first (word fact1) z and a2 = W.first (word fact2) z in (* note: [a1] and [a2] can be [any] here *) Terminal.compare a1 a2 end) (* The level-1 matrix. *) let table = Array.make (Lr1.n * Terminal.n) M.empty let index current z = Terminal.n * (Lr1.number current) + Terminal.t2i z let count = ref 0 let register fact = let current = Trie.current (position fact) in let z = lookahead fact in let i = index current z in let m = table.(i) in (* We crucially rely on the fact that [M.add] guarantees not to change the set if an ``equal'' fact already exists. Thus, a later, longer path is ignored in favor of an earlier, shorter path. *) let m' = M.add fact m in m != m' && begin incr count; table.(i) <- m'; true end let query current z f = assert (not (Terminal.equal z any)); (* If the state [current] is solid then the facts that concern it are stored in the column [any], and all of them are compatible with [z]. Otherwise, they are stored in all columns except [any], and only those stored in the column [z] are compatible with [z]. *) let i = index current (if is_solid current then any else z) in let m = table.(i) in M.iter f m let size () = !count let verbose () = Printf.eprintf "F stores %d facts.\n%!" (size()) end (* ------------------------------------------------------------------------ *) (* The module [E] is in charge of recording the non-terminal edges that we have discovered, or more precisely, the conditions under which these edges can be taken. It maintains a set of quadruples [s, nt, w, z], where such a quadruple means that in the state [s], the outgoing edge labeled [nt] can be taken by consuming the word [w], under the assumption that the next symbol is [z]. Again, the terminal symbol [a], given by [W.first w z], plays a role. For each quadruple [s, nt, a, z], we store at most one quadruple [s, nt, w, z]. Thus, internally, we maintain a mapping of [s, nt, a, z] to [w]. For greater simplicity, we do not allow [z] to be [any] in [register] or [query]. Allowing it would complicate things significantly, it seems. *) module E : sig (* [register s nt w z] records that, in state [s], the outgoing edge labeled [nt] can be taken by consuming the word [w], if the next symbol is [z]. It returns [true] if this information is new, i.e., if the underlying quadruple [s, nt, a, z] is new. The symbol [z] cannot be [any]. *) val register: Lr1.node -> Nonterminal.t -> W.word -> Terminal.t -> bool (* [query s nt a foreach] enumerates all words [w] and all real symbols [z] such that, in state [s], the outgoing edge labeled [nt] can be taken by consuming the word [w], under the assumption that the next symbol is [z], and the first symbol of the word [w.z] is [a]. The symbol [a] can be [any]. The function [foreach] can be either [foreach_terminal] or of the form [foreach_terminal_not_causing_an_error _]. It limits the symbols [z] that are considered. *) val query: Lr1.node -> Nonterminal.t -> Terminal.t -> (* foreach: *) ((Terminal.t -> unit) -> unit) -> (W.word -> Terminal.t -> unit) -> unit (* [size()] returns the number of edges currently stored in the set. *) val size: unit -> int (* [verbose()] outputs debugging & performance information. *) val verbose: unit -> unit end = struct (* At a high level, we must implement a mapping of [s, nt, a, z] to [w]. In practice, we can implement this specification using any combination of arrays, hash tables, balanced binary trees, and perfect hashing (i.e., packing several of [s], [nt], [a], [z] in one word.) Here, we choose to use an array, indexed by [s], of hash tables, indexed by a key that packs [nt], [a], and [z] in one word. According to a quick experiment, the final population of the hash table [table.(index s)] seems to be roughly [Terminal.n * Trie.size s]. We note that using an initial capacity of 0 and relying on the hash table's resizing mechanism has a significant cost, which is why we try to guess a good initial capacity. *) module H = Hashtbl let table = Array.init Lr1.n (fun i -> let size = Trie.size i in H.create (if size = 1 then 0 else Terminal.n * size) ) let index s = Lr1.number s let pack nt a z : int = (* We rely on the fact that we have at most 256 terminal symbols. *) (Nonterminal.n2i nt lsl 16) lor (Terminal.t2i a lsl 8) lor (Terminal.t2i z) let count = ref 0 let register s nt w z = assert (Terminal.real z); let i = index s in let m = table.(i) in let a = W.first w z in (* Note that looking at [a] in state [s] cannot cause an error. *) assert (not (causes_an_error s a)); let key = pack nt a z in if H.mem m key then false else begin incr count; H.add m key w; true end let rec query s nt a foreach f = if Terminal.equal a any then begin (* If [a] is [any], we query the table for every real symbol [a]. We can limit ourselves to symbols that do not cause an error in state [s]. Those that do certainly do not have an entry; see the assertion in [register] above. *) foreach_terminal_not_causing_an_error s (fun a -> query s nt a foreach f ) end else let i = index s in let m = table.(i) in foreach (fun z -> assert (Terminal.real z); let key = pack nt a z in match H.find m key with | w -> f w z | exception Not_found -> () ) let size () = !count let verbose () = Printf.eprintf "E stores %d edges.\n%!" (size()) end (* ------------------------------------------------------------------------ *) (* [new_edge s nt w z] is invoked when we discover that in the state [s], the outgoing edge labeled [nt] can be taken by consuming the word [w], under the assumption that the next symbol is [z]. We check whether this quadruple already exists in the set [E]. If not, then we add it, and we compute its consequences, in the form of new facts, which we insert into the priority queue for later examination. *) let new_edge s nt w z = assert (Terminal.real z); if E.register s nt w z then let sym = Symbol.N nt in (* Query [F] for existing facts which could be extended by following this newly discovered edge. They must be facts whose current state is [s] and whose lookahead assumption is compatible with [a]. For each such fact, ... *) F.query s (W.first w z) (fun fact -> assert (compatible (lookahead fact) (W.first w z)); (* ... try to take one step in the trie along an edge labeled [nt]. *) match Trie.step sym (position fact) with | position -> (* This takes us to a new state whose incoming symbol is [nt]. Hence, this state is not solid. In order to satisfy invariant 2, we must create fact whose lookahead assumption is not [any]. That's fine, since our lookahead assumption is [z]. In order to satisfy invariant 1, we must check that [z] does not cause an error in this state. *) assert (not (is_solid (Trie.current position))); if not (causes_an_error (Trie.current position) z) then let word = W.append (word fact) w in enqueue position word z | exception Not_found -> (* Could not take a step in the trie. This means this branch leads nowhere of interest, and was pruned when the trie was constructed. *) () ) (* ------------------------------------------------------------------------ *) (* [new_fact fact] is invoked when we discover a new fact (i.e., one that was not previously known). It studies the consequences of this fact. These consequences are of two kinds: - As in Dijkstra's algorithm, the new fact can be viewed as a newly discovered vertex. We study its (currently known) outgoing edges, and enqueue new facts in the priority queue. - Sometimes, a fact can also be viewed as a newly discovered edge. This is the case when the word that took us from [source] to [current] represents a production of the grammar and [current] is willing to reduce this production. We record the existence of this edge, and re-inspect any previously discovered vertices which are interested in this outgoing edge. *) let new_fact fact = (* Throughout this rather long function, there is just one [fact]. Let's name its components right now, so as to avoid accessing them several times. (That could be costly, as it requires decoding the fact.) *) let position = position fact and lookahead = lookahead fact and word = word fact in let source = Trie.source position and current = Trie.current position in (* 1. View [fact] as a vertex. Examine the transitions out of [current]. For every transition labeled by a symbol [sym] and into a state [target], ... *) Lr1.transitions current |> SymbolMap.iter (fun sym target -> (* ... try to follow this transition in the trie [position], down to a child which we call [child]. *) match Trie.step sym position, sym with | exception Not_found -> (* Could not take a step in the trie. This means this transition leads nowhere of interest. *) () | child, Symbol.T t -> (* 1a. The transition exists in the trie, and [sym] is in fact a terminal symbol [t]. We note that [t] cannot be the [error] token, because the trie does not have any edges labeled [error]. *) assert (Lr1.Node.compare (Trie.current child) target = 0); assert (is_solid target); assert (non_error t); (* If the lookahead assumption [lookahead] is compatible with [t], then we derive a new fact, where one more edge has been taken, and enqueue this new fact for later examination. *) (* The state [target] is solid, i.e., its incoming symbol is terminal. This state is always entered without consideration for the next lookahead symbol. Thus, we can use [any] as the lookahead assumption in the new fact that we produce. If we did not have [any], we would have to produce one fact for every possible lookahead symbol. *) if compatible lookahead t then let word = W.append word (W.singleton t) in enqueue child word any | child, Symbol.N nt -> (* 1b. The transition exists in the trie, and [sym] is in fact a nonterminal symbol [nt]. *) assert (Lr1.Node.compare (Trie.current child) target = 0); assert (not (is_solid target)); (* We need to know how this nonterminal edge can be taken. We query [E] for a word [w] that allows us to take this edge. In general, the answer depends on the terminal symbol [z] that comes *after* this word: we try all such symbols. We must make sure that the first symbol of the word [w.z] satisfies the lookahead assumption [lookahead]; this is ensured by passing this information to [E.query]. *) (* It could be the case that, due to a default reduction, the answer to our query does not depend on [z], and we are wasting work. However, allowing [z] to be [any] in [E.query], and taking advantage of this to increase performance, seems difficult. *) let foreach = foreach_terminal_not_causing_an_error target in E.query current nt lookahead foreach (fun w z -> assert (compatible lookahead (W.first w z)); let word = W.append word w in enqueue child word z ) ); (* 2. View [fact] as a possible edge. This is possible if the path from [source] to the [current] state represents a production [prod] and [current] is willing to reduce this production. Then, reducing [prod] takes us all the way back to [source]. Thus, this production gives rise to an edge labeled [nt] -- the left-hand side of [prod] -- out of [source]. *) let z = lookahead in if not (Terminal.equal z any) then begin (* 2a. The lookahead assumption [z] is a real terminal symbol. We check whether [current] is willing to reduce some production [prod] on [z], and whether the sub-trie [position] accepts [prod], which means that this reduction takes us back to the root of the trie. If so, we have discovered a new edge. *) match has_reduction current z with | Some prod when Trie.accepts prod position -> new_edge source (Production.nt prod) word z | _ -> () end else begin (* 2b. The lookahead assumption is [any]. We must consider every pair [prod, z] such that the [current] state can reduce [prod] on [z] and [position] accepts [prod]. *) match Default.has_default_reduction current with | Some (prod, _) -> if Trie.accepts prod position then (* [new_edge] does not accept [any] as its 4th parameter, so we must iterate over all terminal symbols. *) foreach_terminal (fun z -> new_edge source (Production.nt prod) word z ) | None -> TerminalMap.iter (fun z prods -> if non_error z then let prod = Misc.single prods in if Trie.accepts prod position then new_edge source (Production.nt prod) word z ) (Lr1.reductions current) end (* ------------------------------------------------------------------------ *) (* The main loop of the algorithm. *) (* [level] is the length of [word fact] for the facts that we are examining at the moment. [extracted] counts how many facts we have extracted out of the priority queue. [considered] counts how many of these were found to be new, and subsequently passed to [new_fact]. *) let level, extracted, considered = ref 0, ref 0, ref 0 let done_with_level () = Printf.eprintf "Done with level %d.\n" !level; W.verbose(); F.verbose(); E.verbose(); Printf.eprintf "Q stores %d facts.\n" (Q.cardinal q); Printf.eprintf "%d facts extracted out of Q, of which %d considered.\n%!" !extracted !considered let () = Q.repeat q (fun fact -> incr extracted; if F.register fact then begin if X.verbose && W.length (word fact) > !level then begin done_with_level(); level := W.length (word fact); end; incr considered; new_fact fact end ); if X.verbose then done_with_level(); Time.tick "Running LRijkstra" (* ------------------------------------------------------------------------ *) (* The following code validates the fact that an error can be triggered in state [s'] by beginning at the start symbol [nt] and reading the sequence of terminal symbols [w]. We use this for debugging purposes. Furthermore, this gives us a list of spurious reductions, which we use to produce a comment. *) let fail msg = Printf.eprintf "LRijkstra: internal error: %s.\n%!" msg; exit 1 let validate nt s' w : ReferenceInterpreter.target = let open ReferenceInterpreter in match check_error_path false nt (W.elements w) with | OInputReadPastEnd -> fail "input was read past its end" | OInputNotFullyConsumed -> fail "input was not fully consumed" | OUnexpectedAccept -> fail "input was unexpectedly accepted" | OK ((state, _) as target) -> if Lr1.Node.compare state s' <> 0 then fail ( Printf.sprintf "error occurred in state %d instead of %d" (Lr1.number state) (Lr1.number s') ) else target (* ------------------------------------------------------------------------ *) (* We now wish to determine, given a state [s'] and a terminal symbol [z], a minimal path that takes us from some entry state to state [s'] with [z] as the next (unconsumed) symbol. *) (* This can be formulated as a search for a shortest path in a graph. The graph is not just the automaton, though. It is a (much) larger graph whose vertices are pairs [s, z] and whose edges are obtained by querying the module [E] above. For this purpose, we use Dijkstra's algorithm, unmodified. Experiments show that the running time of this phase is typically 10x shorter than the running time of the main loop above. *) module A = Astar.Make(struct (* A vertex is a pair [s, z], where [z] is a real terminal symbol. *) type node = Lr1.node * Terminal.t let equal (s'1, z1) (s'2, z2) = Lr1.Node.compare s'1 s'2 = 0 && Terminal.compare z1 z2 = 0 let hash (s, z) = Hashtbl.hash (Lr1.number s, z) (* An edge is labeled with a word. *) type label = W.word (* We search forward from every [s, z], where [s] is an initial state. *) let sources f = foreach_terminal (fun z -> ProductionMap.iter (fun _ s -> f (s, z) ) Lr1.entry ) (* The successors of [s, z] are defined as follows. *) let successors (s, z) edge = assert (Terminal.real z); (* For every transition out of [s], labeled [sym], leading to [s']... *) Lr1.transitions s |> SymbolMap.iter (fun sym s' -> match sym with | Symbol.T t -> if Terminal.equal z t then (* If [sym] is the terminal symbol [z], then this transition matches our lookahead assumption, so we can take it. For every [z'], we have an edge to [s', z'], labeled with the singleton word [z]. *) let w = W.singleton z in foreach_terminal (fun z' -> edge w 1 (s', z') ) | Symbol.N nt -> (* If [sym] is a nonterminal symbol [nt], then we query [E] in order to find out which (minimal) words [w] allow us to take this transition. We must again try every [z'], and must respect the constraint that the first symbol of the word [w.z'] is [z]. For every [z'] and [w] that fulfill these requirements, we have an edge to [s', z'], labeled with the word [w]. *) E.query s nt z foreach_terminal (fun w z' -> edge w (W.length w) (s', z') ) ) (* Algorithm A*, used with a zero estimate, is Dijkstra's algorithm. We have experimented with a non-zero estimate, but the performance increase was minimal. *) let estimate _ = 0 end) (* ------------------------------------------------------------------------ *) (* [explored] counts how many graph nodes we have discovered during the search. *) let explored = ref 0 (* We wish to store a set of triples [nt, w, (s', spurious)], meaning that an error can be triggered in state [s'] by beginning in the initial state that corresponds to [nt] and by reading the sequence of terminal symbols [w]. We wish to store at most one such triple for every state [s'], so we organize the data as a set [domain] of states [s'] and a list [data] of triples [nt, w, (s', spurious)]. The list [spurious] documents the spurious reductions that are performed by the parser at the end. *) (* We could print this data as we go, which would naturally result in sorting the output by increasing word sizes. However, it seems preferable to sort the sentences lexicographically, so that similar sentences end up close to one another. (We could also sort them by state number. The result would be roughly similar.) This is why we store a list of triples and sort it before printing it out. *) let domain = ref Lr1.NodeSet.empty let data : (Nonterminal.t * W.word * ReferenceInterpreter.target) list ref = ref [] (* The set [reachable] stores every reachable state (regardless of whether an error can be triggered in that state). *) let reachable = ref Lr1.NodeSet.empty (* Perform the forward search. *) let _, _ = A.search (fun ((s', z), path) -> incr explored; reachable := Lr1.NodeSet.add s' !reachable; (* If [z] causes an error in state [s'] and this is the first time we are able to trigger an error in this state, ... *) if causes_an_error s' z && not (Lr1.NodeSet.mem s' !domain) then begin (* Reconstruct the initial state [s] and the word [w] that lead to this error. *) let (s, _), ws = A.reverse path in let w = List.fold_right W.append ws (W.singleton z) in (* Check that the reference interpreter confirms our finding. At the same time, compute a list of spurious reductions. *) let nt = Lr1.nt_of_entry s in let target = validate nt s' w in (* Store this new data. *) domain := Lr1.NodeSet.add s' !domain; data := (nt, w, target) :: !data end ) (* Sort and output the data. *) let () = !data |> List.fast_sort (fun (nt1, w1, _) (nt2, w2, _) -> let c = Nonterminal.compare nt1 nt2 in if c <> 0 then c else W.compare w2 w1 ) |> List.map (fun (nt, w, target) -> (nt, W.elements w, target)) |> List.iter Interpret.print_messages_item (* ------------------------------------------------------------------------ *) (* Verbosity. *) let max_heap_size = if X.verbose || X.statistics <> None then let stat = Gc.quick_stat() in (stat.Gc.top_heap_words * (Sys.word_size / 8) / 1024 / 1024) else 0 (* dummy *) let () = Time.tick "Forward search"; if X.verbose then begin Printf.eprintf "%d graph nodes explored by forward search.\n\ %d out of %d states are reachable.\n\ Found %d states where an error can occur.\n\ Maximum size reached by the major heap: %dM\n%!" !explored (Lr1.NodeSet.cardinal !reachable) Lr1.n (Lr1.NodeSet.cardinal !domain) max_heap_size end (* ------------------------------------------------------------------------ *) (* If requested by the client, write one line of statistics to a .csv file. *) let stop = now() let () = X.statistics |> Option.iter (fun filename -> let c = open_out_gen [ Open_creat; Open_append; Open_text ] 0o644 filename in Printf.fprintf c "%s,%d,%d,%d,%d,%d,%d,%d,%.2f,%d\n%!" (* Grammar name. *) Settings.base (* Number of terminal symbols. *) Terminal.n (* Number of nonterminal symbols. *) Nonterminal.n (* Grammar size (not counting the error productions). *) begin Production.foldx (fun prod accu -> let rhs = Production.rhs prod in if List.mem (Symbol.T Terminal.error) (Array.to_list rhs) then accu else accu + Array.length rhs ) 0 end (* Automaton size (i.e., number of states). *) Lr1.n (* Total trie size. *) (Trie.total_size()) (* Size of [F]. *) (F.size()) (* Size of [E]. *) (E.size()) (* Elapsed user time, in seconds. *) (stop -. start) (* Max heap size, in megabytes. *) max_heap_size ; close_out c ) (* ------------------------------------------------------------------------ *) end menhir-20171222/src/inliner.mli0000664000175000017500000000241613217215730016450 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This transformer inlines every function that is called at most once. It also inlines some functions whose body consists of a single function call. At the same time, every function that is never called is dropped. Public functions are never inlined or dropped. *) val inline: IL.program -> IL.program menhir-20171222/src/tableBackend.ml0000664000175000017500000007350313217215730017203 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) open CodeBits open Grammar open IL open Interface open Printf open TokenType open NonterminalType open CodePieces module Run (T : sig end) = struct (* ------------------------------------------------------------------------ *) (* Conventional names for modules, exceptions, record fields, functions. *) let menhirlib = "MenhirLib" let make_engine_table = menhirlib ^ ".TableInterpreter.MakeEngineTable" let make_engine = menhirlib ^ ".Engine.Make" let make_symbol = menhirlib ^ ".InspectionTableInterpreter.Symbols" let make_inspection = menhirlib ^ ".InspectionTableInterpreter.Make" let engineTypes = menhirlib ^ ".EngineTypes" let field x = engineTypes ^ "." ^ x let fstate = field "state" let fsemv = field "semv" let fstartp = field "startp" let fendp = field "endp" let fnext = field "next" let fstack = field "stack" let fcurrent = field "current" let entry = interpreter ^ ".entry" let start = interpreter ^ ".start" let staticVersion = menhirlib ^ ".StaticVersion" (* The following are names of internal sub-modules. *) let tables = "Tables" let symbols = "Symbols" let et = "ET" let ti = "TI" (* ------------------------------------------------------------------------ *) (* Statistics. *) (* Integer division, rounded up. *) let div a b = if a mod b = 0 then a / b else a / b + 1 (* [size] provides a rough measure of the size of its argument, in words. The [unboxed] parameter is true if we have already counted 1 for the pointer to the object. *) let rec size unboxed = function | EIntConst _ | ETuple [] | EData (_, []) -> if unboxed then 0 else 1 | EStringConst s -> 1 + div (String.length s * 8) Sys.word_size | ETuple es | EData (_, es) | EArray es -> 1 + List.length es + List.fold_left (fun s e -> s + size true e) 0 es | _ -> assert false (* not implemented *) let size = size false (* Optionally, print a measure of each of the tables that we are defining. *) let define (name, expr) = { valpublic = true; valpat = PVar name; valval = expr } let define_and_measure (x, e) = Error.logC 1 (fun f -> fprintf f "The %s table occupies roughly %d bytes.\n" x (size e * (Sys.word_size / 8)) ); define (x, e) (* ------------------------------------------------------------------------ *) (* Code generation for semantic actions. *) (* The functions [reducecellparams] and [reducebody] are adapted from [CodeBackend]. *) (* Things are slightly more regular here than in the code-based back-end, since there is no optimization: every stack cell has the same structure and holds a state, a semantic value, and a pair of positions. Because every semantic value is represented, we do not have a separate [unitbindings]. *) (* [reducecellparams] constructs a pattern that describes the contents of a stack cell. If this is the bottom cell, the variable [state] is bound to the state found in the cell. If [ids.(i)] is used in the semantic action, then it is bound to the semantic value. The position variables are always bound. *) let reducecellparams prod i _symbol (next : pattern) : pattern = let ids = Production.identifiers prod in PRecord [ fstate, (if i = 0 then PVar state else PWildcard); fsemv, PVar ids.(i); fstartp, PVar (Printf.sprintf "_startpos_%s_" ids.(i)); fendp, PVar (Printf.sprintf "_endpos_%s_" ids.(i)); fnext, next; ] (* The semantic values bound in [reducecellparams] have type [Obj.t]. They should now be cast to their real type. If we had [PMagic] in the syntax of patterns, we could do that in one swoop; since we don't, we have to issue a series of casts a posteriori. *) let reducecellcasts prod i symbol casts = let ids = Production.identifiers prod in let id = ids.(i) in let t : typ = match semvtype symbol with | [] -> tunit | [ t ] -> t | _ -> assert false in (* Cast: [let id = ((Obj.magic id) : t) in ...]. *) ( PVar id, EAnnot (EMagic (EVar id), type2scheme t) ) :: casts (* 2015/11/04. The start and end positions of an epsilon production are obtained by taking the end position stored in the top stack cell (whatever it is). *) let endpos_of_top_stack_cell = ERecordAccess(EVar stack, fendp) (* This is the body of the [reduce] function associated with production [prod]. It assumes that the variables [env] and [stack] have been bound. *) let reducebody prod = let nt, rhs = Production.def prod and ids = Production.identifiers prod and length = Production.length prod in (* Build a pattern that represents the shape of the stack. Out of the stack, we extract a state (except when the production is an epsilon production) and a number of semantic values. *) (* At the same time, build a series of casts. *) (* We want a [fold] that begins with the deepest cells in the stack. Folding from left to right on [rhs] is appropriate. *) let (_ : int), pat, casts = Array.fold_left (fun (i, pat, casts) symbol -> i + 1, reducecellparams prod i symbol pat, reducecellcasts prod i symbol casts ) (0, PVar stack, []) rhs in (* Determine beforeend/start/end positions for the left-hand side of the production, and bind them to the conventional variables [beforeendp], [startp], and [endp]. These variables may be unused by the semantic action, in which case these bindings are dead code and can be ignored by the OCaml compiler. *) let posbindings = ( PVar beforeendp, endpos_of_top_stack_cell ) :: ( PVar startp, if length > 0 then EVar (Printf.sprintf "_startpos_%s_" ids.(0)) else endpos_of_top_stack_cell ) :: ( PVar endp, if length > 0 then EVar (Printf.sprintf "_endpos_%s_" ids.(length - 1)) else EVar startp ) :: [] in (* This cannot be one of the start productions. *) assert (not (Production.is_start prod)); (* This is a regular production. Perform a reduction. *) let action = Production.action prod in let act = EAnnot (Action.to_il_expr action, type2scheme (semvtypent nt)) in EComment ( Production.print prod, blet ( (pat, EVar stack) :: (* destructure the stack *) casts @ (* perform type casts *) posbindings @ (* bind [startp] and [endp] *) [ PVar semv, act ], (* run the user's code and bind [semv] *) (* Return a new stack, onto which we have pushed a new stack cell. *) ERecord [ (* the new stack cell *) fstate, EVar state; (* the current state after popping; it will be updated by [goto] *) fsemv, ERepr (EVar semv); (* the newly computed semantic value *) fstartp, EVar startp; (* the newly computed start and end positions *) fendp, EVar endp; fnext, EVar stack; (* this is the stack after popping *) ] ) ) let semantic_action prod = EFun ( [ PVar env ], (* Access the stack and current state via the environment. *) (* In fact, the current state needs be bound here only if this is an epsilon production. Otherwise, the variable [state] will be bound by the pattern produced by [reducecellparams] above. *) ELet ( [ PVar stack, ERecordAccess (EVar env, fstack) ] @ (if Production.length prod = 0 then [ PVar state, ERecordAccess (EVar env, fcurrent) ] else []), reducebody prod ) ) (* Export the number of start productions. *) let start_def = define ( "start", EIntConst Production.start ) (* ------------------------------------------------------------------------ *) (* Table encodings. *) (* Encodings of entries in the default reduction table. *) let encode_DefRed prod = (* 1 + prod *) 1 + Production.p2i prod let encode_NoDefRed = (* 0 *) 0 (* Encodings of entries in the action table. *) let encode_Reduce prod = (* prod | 01 *) (Production.p2i prod lsl 2) lor 1 let encode_ShiftDiscard s = (* s | 10 *) ((Lr1.number s) lsl 2) lor 0b10 let encode_ShiftNoDiscard s = (* s | 11 *) ((Lr1.number s) lsl 2) lor 0b11 let encode_Fail = (* 00 *) 0 (* Encodings of entries in the goto table. *) let encode_Goto node = (* 1 + node *) 1 + Lr1.number node let encode_NoGoto = (* 0 *) 0 (* Encodings of the hole in the action and goto tables. *) let hole = assert (encode_Fail = 0); assert (encode_NoGoto = 0); 0 (* Encodings of entries in the error bitmap. *) let encode_Error = (* 0 *) 0 let encode_NoError = (* 1 *) 1 (* Encodings of terminal and nonterminal symbols in the production table. *) let encode_no_symbol = 0 (* 0 | 0 *) let encode_terminal tok = (Terminal.t2i tok + 1) lsl 1 (* t + 1 | 0 *) let encode_nonterminal nt = ((Nonterminal.n2i nt) lsl 1) lor 1 (* nt | 1 *) let encode_symbol = function | Symbol.T tok -> encode_terminal tok | Symbol.N nt -> encode_nonterminal nt let encode_symbol_option = function | None -> encode_no_symbol | Some symbol -> encode_symbol symbol (* Encoding a Boolean as an integer value. *) let encode_bool b = if b then 1 else 0 (* ------------------------------------------------------------------------ *) (* Table compression. *) (* Our sparse, two-dimensional tables are turned into one-dimensional tables via [RowDisplacement]. *) (* The error bitmap, which is two-dimensional but not sparse, is made one-dimensional by simple flattening. *) (* Every one-dimensional table is then packed via [PackedIntArray]. *) (* Optionally, we print some information about the compression ratio. *) (* [population] counts the number of significant entries in a two-dimensional matrix. *) let population (matrix : int array array) = Array.fold_left (fun population row -> Array.fold_left (fun population entry -> if entry = hole then population else population + 1 ) population row ) 0 matrix (* [marshal1] marshals a one-dimensional array. *) let marshal1 (table : int array) = let (bits : int), (text : string) = MenhirLib.PackedIntArray.pack table in ETuple [ EIntConst bits; EStringConst text ] (* [marshal11] marshals a one-dimensional array whose bit width is statically known to be [1]. *) let marshal11 (table : int array) = let (bits : int), (text : string) = MenhirLib.PackedIntArray.pack table in assert (bits = 1); EStringConst text (* List-based versions of the above functions. *) let marshal1_list (table : int list) = marshal1 (Array.of_list table) let marshal11_list (table : int list) = marshal11 (Array.of_list table) (* [linearize_and_marshal1] marshals an array of integer arrays (of possibly different lengths). *) let linearize_and_marshal1 (table : int array array) = let data, entry = MenhirLib.LinearizedArray.make table in ETuple [ marshal1 data; marshal1 entry ] (* [flatten_and_marshal11_list] marshals a two-dimensional bitmap, whose width (for now) is assumed to be [Terminal.n - 1]. *) let flatten_and_marshal11_list (table : int list list) = ETuple [ (* Store the table width. *) EIntConst (Terminal.n - 1); (* View the table as a one-dimensional array, and marshal it. *) marshal11_list (List.flatten table) ] (* [marshal2] marshals a two-dimensional table, with row displacement. *) let marshal2 name m n (matrix : int list list) = let matrix : int array array = Array.of_list (List.map Array.of_list matrix) in let (displacement : int array), (data : int array) = MenhirLib.RowDisplacement.compress (=) (fun x -> x = hole) hole m n matrix in Error.logC 1 (fun f -> fprintf f "The %s table is %d entries; %d non-zero; %d compressed.\n" name (m * n) (population matrix) (Array.length displacement + Array.length data) ); ETuple [ marshal1 displacement; marshal1 data; ] (* ------------------------------------------------------------------------ *) (* Table generation. *) (* The action table. *) let action node t = match Default.has_default_reduction node with | Some _ -> (* [node] has a default reduction; in that case, the action table is never looked up. *) hole | None -> try let target = SymbolMap.find (Symbol.T t) (Lr1.transitions node) in (* [node] has a transition to [target]. If [target] has a default reduction on [#], use [ShiftNoDiscard], otherwise [ShiftDiscard]. *) match Default.has_default_reduction target with | Some (_, toks) when TerminalSet.mem Terminal.sharp toks -> assert (TerminalSet.cardinal toks = 1); encode_ShiftNoDiscard target | _ -> encode_ShiftDiscard target with Not_found -> try (* [node] has a reduction. *) let prod = Misc.single (TerminalMap.find t (Lr1.reductions node)) in encode_Reduce prod with Not_found -> (* [node] has no action. *) encode_Fail (* In the error bitmap and in the action table, the row that corresponds to the [#] pseudo-terminal is never accessed. Thus, we do not create this row. This does not create a gap in the table, because this is the right-most row. For sanity, we check this fact here. *) let () = assert (Terminal.t2i Terminal.sharp = Terminal.n - 1) (* The goto table. *) let goto node nt = try let target = SymbolMap.find (Symbol.N nt) (Lr1.transitions node) in encode_Goto target with Not_found -> encode_NoGoto (* The error bitmap reflects which entries in the action table are [Fail]. Like the action table, it is not accessed when [node] has a default reduction. *) let error node t = if action node t = encode_Fail then encode_Error else encode_NoError (* The default reductions table. *) let default_reduction node = match Default.has_default_reduction node with | Some (prod, _) -> encode_DefRed prod | None -> encode_NoDefRed (* Generate the table definitions. *) let action = define_and_measure ( "action", marshal2 "action" Lr1.n (Terminal.n - 1) ( Lr1.map (fun node -> Terminal.mapx (fun t -> action node t ) ) ) ) let goto = define_and_measure ( "goto", marshal2 "goto" Lr1.n Nonterminal.n ( Lr1.map (fun node -> Nonterminal.map (fun nt -> goto node nt ) ) ) ) let error = define_and_measure ( "error", flatten_and_marshal11_list ( Lr1.map (fun node -> Terminal.mapx (fun t -> error node t ) ) ) ) let default_reduction = define_and_measure ( "default_reduction", marshal1_list ( Lr1.map (fun node -> default_reduction node ) ) ) let lhs = define_and_measure ( "lhs", marshal1 ( Production.amap (fun prod -> Nonterminal.n2i (Production.nt prod) ) ) ) let semantic_action = define ( "semantic_action", (* Non-start productions only. *) EArray (Production.mapx semantic_action) ) (* ------------------------------------------------------------------------ *) (* When [--trace] is enabled, we need tables that map terminals and productions to strings. *) let stringwrap f x = EStringConst (f x) let reduce_or_accept prod = match Production.classify prod with | Some _ -> "Accepting" | None -> "Reducing production " ^ (Production.print prod) let trace = define_and_measure ( "trace", if Settings.trace then EData ("Some", [ ETuple [ EArray (Terminal.map (stringwrap Terminal.print)); EArray (Production.map (stringwrap reduce_or_accept)); ] ]) else EData ("None", []) ) (* ------------------------------------------------------------------------ *) (* Generate the two functions that map a token to its integer code and to its semantic value, respectively. *) let token2terminal = destructuretokendef "token2terminal" tint false (fun tok -> EIntConst (Terminal.t2i tok)) let token2value = destructuretokendef "token2value" tobj true (fun tok -> ERepr ( match Terminal.ocamltype tok with | None -> EUnit | Some _ -> EVar semv ) ) (* ------------------------------------------------------------------------ *) (* The client APIs invoke the interpreter with an appropriate start state. The monolithic API calls [entry] (see [Engine]), while the incremental API calls [start]. *) (* An entry point to the monolithic API. *) let monolithic_entry_point state nt t = define ( Nonterminal.print true nt, let lexer = "lexer" and lexbuf = "lexbuf" in EFun ( [ PVar lexer; PVar lexbuf ], EAnnot ( EMagic ( EApp ( EVar entry, [ EIntConst (Lr1.number state); EVar lexer; EVar lexbuf ] ) ), type2scheme (TypTextual t) ) ) ) (* The whole monolithic API. *) let monolithic_api : IL.valdef list = Lr1.fold_entry (fun _prod state nt t api -> monolithic_entry_point state nt t :: api ) [] (* An entry point to the incremental API. *) let incremental_entry_point state nt t = let initial = "initial_position" in define ( Nonterminal.print true nt, (* In principle the eta-expansion [fun initial_position -> start s initial_position] should not be necessary, since [start] is a pure function. However, when [--trace] is enabled, [start] will log messages to the standard error channel. *) EFun ( [ PVar initial ], EAnnot ( EMagic ( EApp ( EVar start, [ EIntConst (Lr1.number state); EVar initial; ] ) ), type2scheme (checkpoint (TypTextual t)) ) ) ) (* The whole incremental API. *) let incremental_api : IL.valdef list = Lr1.fold_entry (fun _prod state nt t api -> incremental_entry_point state nt t :: api ) [] (* ------------------------------------------------------------------------ *) (* Constructing representations of symbols. *) (* [eterminal t] is a value of type ['a terminal] (for some ['a]) that encodes the terminal symbol [t]. It is just a data constructor of the terminal GADT. *) let eterminal (t : Terminal.t) : expr = EData (tokengadtdata (Terminal.print t), []) (* [enonterminal nt] is a value of type ['a nonterminal] (for some ['a]) that encodes the nonterminal symbol [nt]. It is just a data constructor of the nonterminal GADT. *) let enonterminal (nt : Nonterminal.t) : expr = EData (tnonterminalgadtdata (Nonterminal.print false nt), []) (* [esymbol symbol] is a value of type ['a symbol] (for some ['a]) that encodes the symbol [symbol]. It is built by applying the injection [T] or [N] to the terminal or nonterminal encoding. *) let dataT = "T" let dataN = "N" let esymbol (symbol : Symbol.t) : expr = match symbol with | Symbol.T t -> EData (dataT, [ eterminal t ]) | Symbol.N nt -> EData (dataN, [ enonterminal nt ]) (* [xsymbol symbol] is a value of type [xsymbol] that encodes the symbol [symbol]. It is built by applying the injection [X] (an existential quantifier) to [esymbol symbol]. *) let dataX = "X" let xsymbol (symbol : Symbol.t) : expr = EData (dataX, [ esymbol symbol ]) (* ------------------------------------------------------------------------ *) (* Produce a function that maps a terminal symbol (represented as an integer code) to its representation as an [xsymbol]. Include [error] but not [#], i.e., include all of the symbols which can appear in a production. *) (* Note that, instead of generating a function, we could (a) use an array or (b) use an unsafe conversion of an integer to a data constructor, then wrap it using [X] and [T/N]. Approach (b) is unsafe and causes memory allocation (due to the wrapping) at each call. *) let terminal () = assert Settings.inspection; let t = "t" in define ( "terminal", EFun ([ PVar t ], EMatch (EVar t, Terminal.mapx (fun tok -> { branchpat = pint (Terminal.t2i tok); branchbody = xsymbol (Symbol.T tok) } ) @ [ { branchpat = PWildcard; branchbody = EComment ("This terminal symbol does not exist.", EApp (EVar "assert", [ efalse ]) ) } ] ) ) ) (* ------------------------------------------------------------------------ *) (* Produce a function that maps a (non-start) nonterminal symbol (represented as an integer code) to its representation as an [xsymbol]. *) let nonterminal () = assert Settings.inspection; let nt = "nt" in define ( "nonterminal", EFun ([ PVar nt ], EMatch (EVar nt, Nonterminal.foldx (fun nt branches -> { branchpat = pint (Nonterminal.n2i nt); branchbody = xsymbol (Symbol.N nt) } :: branches ) [ { branchpat = PWildcard; branchbody = EComment ("This nonterminal symbol does not exist.", EApp (EVar "assert", [ efalse ]) ) } ] ) ) ) (* ------------------------------------------------------------------------ *) (* Produce a mapping of every LR(0) state to its incoming symbol (encoded as an integer value). (Note that the initial states do not have one.) *) let lr0_incoming () = assert Settings.inspection; define_and_measure ( "lr0_incoming", marshal1 (Array.init Lr0.n (fun node -> encode_symbol_option (Lr0.incoming_symbol node) )) ) (* ------------------------------------------------------------------------ *) (* A table that maps a production (i.e., an integer index) to the production's right-hand side. In principle, we use this table for ordinary productions only, as opposed to the start productions, whose existence is not exposed to the user. However, it is simpler (and not really costly) to include all productions in this table. *) let rhs () = assert Settings.inspection; let productions : int array array = Production.amap (fun prod -> Array.map encode_symbol (Production.rhs prod) ) in define_and_measure ( "rhs", linearize_and_marshal1 productions ) (* ------------------------------------------------------------------------ *) (* A table that maps an LR(1) state to its LR(0) core. *) let lr0_core () = assert Settings.inspection; define_and_measure ( "lr0_core", marshal1_list (Lr1.map (fun (node : Lr1.node) -> Lr0.core (Lr1.state node) )) ) (* A table that maps an LR(0) state to a set of LR(0) items. *) let lr0_items () = assert Settings.inspection; let items : int array array = Array.init Lr0.n (fun node -> Array.map Item.marshal (Array.of_list (Item.Set.elements (Lr0.items node))) ) in define_and_measure ( "lr0_items", linearize_and_marshal1 items ) (* ------------------------------------------------------------------------ *) (* A table that tells which nonterminal symbols are nullable. (For simplicity, this table includes the start symbols.) *) let nullable () = assert Settings.inspection; define_and_measure ( "nullable", marshal11_list ( Nonterminal.map (fun nt -> encode_bool (Analysis.nullable nt) ) ) ) (* ------------------------------------------------------------------------ *) (* A two-dimensional bitmap, indexed first by nonterminal symbols, then by terminal symbols, encodes the FIRST sets. *) let first () = assert Settings.inspection; define_and_measure ( "first", flatten_and_marshal11_list ( Nonterminal.map (fun nt -> Terminal.mapx (fun t -> encode_bool (TerminalSet.mem t (Analysis.first nt)) ) ) ) ) (* ------------------------------------------------------------------------ *) (* A reference to [MenhirLib.StaticVersion.require_XXXXXXXX], where [XXXXXXXX] is our 8-digit version number. This ensures that the generated code can be linked only with an appropriate version of MenhirLib. This is important because we use unsafe casts, and a version mismatch could cause a crash. *) let versiondef = { valpublic = true; valpat = PUnit; valval = EVar (staticVersion ^ ".require_" ^ Version.version); } (* ------------------------------------------------------------------------ *) (* Let's put everything together. *) open UnparameterizedSyntax let grammar = Front.grammar let program = [ SIFunctor (grammar.parameters, (* Make a reference to [MenhirLib.StaticVersion.require_XXXXXXXX], where [XXXXXXXX] is our 8-digit version number. This ensures that the generated code can be linked only with an appropriate version of MenhirLib. This is important because we use unsafe casts, and a version mismatch could cause a crash. *) SIComment "This generated code requires the following version of MenhirLib:" :: SIValDefs (false, [ versiondef ]) :: (* Define the internal sub-module [basics], which contains the definitions of the exception [Error] and of the type [token]. Then, include this sub-module. This sub-module is used again below, as part of the application of the functor [TableInterpreter.Make]. *) mbasics grammar @ (* In order to avoid hiding user-defined identifiers, only the exception [Error] and the type [token] should be defined (at top level, with non-mangled names) above this line. We also define the value [_eRR] above this line so that we do not have a problem if a user prelude hides the name [Error]. *) SIStretch grammar.preludes :: (* Define the tables. *) SIModuleDef (tables, MStruct [ (* The internal sub-module [basics] contains the definitions of the exception [Error] and of the type [token]. *) SIInclude (MVar basics); (* This is a non-recursive definition, so none of the names defined here are visible in the semantic actions. *) SIValDefs (false, [ token2terminal; define ("error_terminal", EIntConst (Terminal.t2i Terminal.error)); token2value; default_reduction; error; start_def; action; lhs; goto; semantic_action; trace; ]) ] ) :: SIModuleDef (interpreter, MStruct ( (* Apply the functor [TableInterpreter.MakeEngineTable] to the tables. *) SIModuleDef (et, MApp (MVar make_engine_table, MVar tables)) :: (* Apply the functor [Engine.Make] to obtain an engine. *) SIModuleDef (ti, MApp (MVar make_engine, MVar et)) :: SIInclude (MVar ti) :: listiflazy Settings.inspection (fun () -> (* Define the internal sub-module [symbols], which contains type definitions. Then, include this sub-module. This sub-module is used again below, as part of the application of the functor [TableInterpreter.MakeInspection]. *) SIModuleDef (symbols, MStruct ( interface_to_structure ( tokengadtdef grammar @ nonterminalgadtdef grammar ) )) :: SIInclude (MVar symbols) :: (* Apply the functor [InspectionTableInterpreter.Make], which expects four arguments. *) SIInclude (mapp (MVar make_inspection) [ (* Argument 1, of type [TableFormat.TABLES]. *) MVar tables; (* Argument 2, of type [InspectionTableFormat.TABLES]. *) MStruct ( (* [lr1state] *) SIInclude (MVar ti) :: (* [terminal], [nonterminal]. *) SIInclude (MVar symbols) :: (* This functor application builds the types [symbol] and [xsymbol] in terms of the types [terminal] and [nonterminal]. This saves us the trouble of generating these definitions. *) SIInclude (MApp (MVar make_symbol, MVar symbols)) :: SIValDefs (false, terminal() :: nonterminal() :: lr0_incoming() :: rhs() :: lr0_core() :: lr0_items() :: nullable() :: first() :: [] ) :: [] ); (* Argument 3, of type [EngineTypes.TABLE]. *) MVar et; (* Argument 4, of type [EngineTypes.ENGINE with ...]. *) MVar ti; ]) :: [] ) )) :: SIValDefs (false, monolithic_api) :: SIModuleDef (incremental, MStruct [ SIValDefs (false, incremental_api) ]) :: SIStretch grammar.postludes :: [])] let () = Time.tick "Producing abstract syntax" end menhir-20171222/src/InputFile.ml0000664000175000017500000000726513217215730016545 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* ---------------------------------------------------------------------------- *) (* The identity of the current input file. *) (* 2011/10/19: do not use [Filename.basename]. The [#] annotations that we insert in the [.ml] file must retain their full path. This does mean that the [#] annotations depend on how menhir is invoked -- e.g. [menhir foo/bar.mly] and [cd foo && menhir bar.mly] will produce different files. Nevertheless, this seems useful/reasonable. *) (* This also influences the type error messages produced by [--infer]. *) (* 2016/08/25: in principle, the order in which file names appear on the command line (when there are several of them) does not matter. It is however used in [UnparameterizedPrinter] (see the problem description there). For this reason, we define a type [input_file] which includes the file's name as well as its index on the command line. *) type input_file = { input_file_name: string; input_file_index: int } let builtin_input_file = { input_file_name = ""; input_file_index = -1 } let dummy_input_file = { input_file_name = ""; input_file_index = 0 } let same_input_file file1 file2 = file1.input_file_index = file2.input_file_index (* could also use physical equality [file1 == file2] *) let compare_input_files file1 file2 = Pervasives.compare file1.input_file_index file2.input_file_index (* Ideally, this function should NOT be used, as it reflects the order of the input files on the command line. As of 2016/08/25, it is used by [UnparameterizedPrinter], for lack of a better solution. *) let current_input_file = ref dummy_input_file (* This declares that a new file is being processed. *) let new_input_file name : unit = current_input_file := { input_file_name = name; input_file_index = !current_input_file.input_file_index + 1 } let get_input_file () : input_file = assert (!current_input_file != dummy_input_file); !current_input_file let get_input_file_name () : string = (get_input_file()).input_file_name (* ---------------------------------------------------------------------------- *) (* The contents of the current input file. *) let get_initialized_ref ref = match !ref with | None -> assert false | Some contents -> contents let file_contents = ref (None : string option) let get_file_contents () = get_initialized_ref file_contents let with_file_contents contents f = file_contents := Some contents; let result = f() in file_contents := None; (* avoid memory leak *) result open Lexing let chunk (pos1, pos2) = let ofs1 = pos1.pos_cnum and ofs2 = pos2.pos_cnum in let contents = get_file_contents() in let len = ofs2 - ofs1 in String.sub contents ofs1 len menhir-20171222/src/coqBackend.mli0000664000175000017500000000207013217215730017036 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The coq code generator. *) module Run (T: sig end) : sig val write_all: out_channel -> unit end menhir-20171222/src/SortInference.ml0000664000175000017500000002237613217215730017414 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) let value = Positions.value let position = Positions.position let error = Error.error open Syntax open SortUnification (* -------------------------------------------------------------------------- *) (* Error handling. *) (* In [check_arity], in principle, [arity1] is the expected arity and [arity2] is the actual arity. This distinction does not make much sense, though, as we do not know which is wrong, the declaration site or the use site. So, we display a neutral error message. *) let check_arity sym arity1 arity2 = let plural = max arity1 arity2 > 1 in if arity1 <> arity2 then error [position sym] "does the symbol \"%s\" expect %d or %d argument%s?" (value sym) (min arity1 arity2) (max arity1 arity2) (if plural then "s" else "") (* This variant of [unify] is used when no unification error can arise. *) let unify_cannot_fail sort1 sort2 = try unify sort1 sort2 with | Unify _ | Occurs _ -> (* If the caller is right, this unification step cannot fail! *) assert false (* In [unify], in principle, [sort1] is the expected sort and [sort2] is the actual sort. Again, this distinction does not make much sense, so we display a neutral error message. *) let unify sym sort1 sort2 = try unify sort1 sort2 with | Unify (v1, v2) -> let print v = print (decode v) in error [position sym] "how is the symbol \"%s\" parameterized?\n\ It is used at sorts %s and %s.\n\ The sort %s is not compatible with the sort %s." (value sym) (print sort1) (print sort2) (print v1) (print v2) | Occurs (v1, v2) -> let print v = print (decode v) in error [position sym] "how is the symbol \"%s\" parameterized?\n\ It is used at sorts %s and %s.\n\ The sort %s cannot be unified with the sort %s." (value sym) (print sort1) (print sort2) (print v1) (print v2) (* -------------------------------------------------------------------------- *) (* An environment maps (terminal and nonterminal) symbols to unification variables. *) type symbol = string module Env = StringMap type env = variable Env.t let find x env : variable = try Env.find x env with Not_found -> assert false (* unbound terminal or nonterminal symbol *) let extend env (xvs : (symbol * variable) list) = List.fold_left (fun env (x, v) -> Env.add x v env ) env xvs (* -------------------------------------------------------------------------- *) (* [allocate xs] allocates a fresh unification variable [v] for every element [x] of the list [xs]. It returns the lists [xvs] and [vs]. *) let allocate (xs : 'a list) : ('a * variable) list * variable list = let xvs = List.map (fun x -> x, fresh()) xs in let vs = List.map snd xvs in xvs, vs (* -------------------------------------------------------------------------- *) (* [check_parameter env param expected] checks that the parameter [param] has sort [expected]. A parameter is either a symbol or an application of a symbol to a number of parameters. Every application is total -- the language does not have partial applications. The sort of every application is [star], but the sort of a variable is unrestricted. *) let rec check_parameter env (param : parameter) (expected : variable) = match param with | ParameterVar sym -> let x = value sym in unify sym expected (find x env) | ParameterApp (sym, actuals) -> let x = value sym in (* This application has sort [star]. *) unify sym expected star; (* Retrieve the expected sort of each parameter. Two cases arise: if [x] has already been assigned an arrow sort, then we can retrieve its domain, which gives us the expected sort of each actual parameter; otherwise, we just make up a fresh arrow sort of appropriate arity. We could avoid this case distinction and always use the latter method, but the former method, when applicable, yields better error messages. If [sym] is a toplevel (nonterminal or terminal) symbol, then we will be in the first case, as we have been careful to initially assign an arrow sort of appropriate arity to each such symbol. *) let v = find x env in let expected = match domain v with | Some expected -> check_arity sym (List.length expected) (List.length actuals); expected | None -> let _, expected = allocate actuals in unify_cannot_fail v (arrow expected); expected in (* Check the sort of each actual parameter. *) List.iter2 (check_parameter env) actuals expected | ParameterAnonymous _ -> (* Anonymous rules have been eliminated already. *) assert false (* -------------------------------------------------------------------------- *) (* The following functions respectively check that a producer, a branch, a rule, and a grammar are well-sorted under an environment [env]. *) let check_producer env (producer : producer) = let (_, param, _) = producer in (* A producer must have sort [star]. *) check_parameter env param star let check_branch env (branch : parameterized_branch) = List.iter (check_producer env) branch.pr_producers let enter_rule env (nt : symbol) (rule : parameterized_rule) : env = (* For each formal parameter, allocate a fresh variable. *) let formals, domain = allocate rule.pr_parameters in (* Connect these variables with the sort of the symbol [nt]. *) (* Because it is performed first, this particular unification cannot fail. *) unify_cannot_fail (find nt env) (arrow domain); (* Extend the environment. *) extend env formals let check_rule env (nt : symbol) (rule : parameterized_rule) = (* Extend the environment within this rule. *) let env = enter_rule env nt rule in (* Check each branch in this extended environment. *) List.iter (check_branch env) rule.pr_branches let check_grammar env g = (* Each rule must be well-sorted. *) StringMap.iter (check_rule env) g.p_rules; (* The start symbols must have sort [star]. *) StringMap.iter (fun nt position -> let sym = Positions.with_pos position nt in unify sym star (find nt env) ) g.p_start_symbols; (* Every symbol that appears in a [%type] declaration must have sort [star]. *) List.iter (fun (param, _) -> check_parameter env param star ) g.p_types; (* Same rule for [%on_error_reduce] declarations. *) List.iter (fun (param, _) -> check_parameter env param star ) g.p_on_error_reduce; (* The symbols that appear in [%attribute] declarations must be well-sorted. Their sort is not necessarily [star]: it is legal to attach an attribute with a parameterized symbol. *) List.iter (fun (params, _) -> List.iter (fun param -> check_parameter env param (fresh()) ) params ) g.p_symbol_attributes (* -------------------------------------------------------------------------- *) type sorts = GroundSort.sort Env.t let infer (g : grammar) : sorts = (* For each (terminal or nonterminal) symbol, allocate a unification variable. The terminal symbols have sort [star], so we can use this particular variable. *) let env = StringMap.fold (fun tok _ env -> Env.add tok star env ) g.p_tokens Env.empty in let env = Env.add "error" star env in let env = StringMap.fold (fun nt rule env -> let env = Env.add nt (fresh()) env in (* The following line unifies the sort of [nt] with an arrow of appropriate arity. It cannot fail. This strategy should lead to slightly better unification error messages. *) let _ : env = enter_rule env nt rule in env ) g.p_rules env in (* Impose sort equality constraints. *) check_grammar env g; (* Decode the environment, so our user doesn't have to deal with unification variables. *) let env = Env.map decode env in (* Ground any unassigned sort variables. (These should occur only in unreachable parts of the grammar.) This guarantees that the user does not have to deal with sort variables. *) let env = Env.map ground env in (* At log level 3, display the inferred sort of every symbol. *) Error.logG 3 (fun f -> Env.iter (fun x gsort -> Printf.fprintf f "%s :: %s\n" x (print (unground gsort)) ) env ); env menhir-20171222/src/front.mli0000664000175000017500000000266013217215730016141 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* This module drives the front-end. It opens and parses the input files, which yields a number of partial grammars. It joins these grammars, expands them to get rid of parameterized nonterminals, and performs reachability analysis. This yields a single unified grammar. It then performs type inference. This yields the grammar that the back-end works with (often through the interface provided by module [Grammar]). *) val grammar: UnparameterizedSyntax.grammar menhir-20171222/src/front.ml0000664000175000017500000001676713217215730016005 0ustar fpottierfpottier(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) (* The front-end. This module performs a series of toplevel side effects. *) (* ------------------------------------------------------------------------- *) (* Reading a grammar from a file. *) let load_partial_grammar filename : Syntax.partial_grammar = let validExt = if Settings.coq then ".vy" else ".mly" in if not (Filename.check_suffix filename validExt) then Error.error [] "argument file names should end in %s. \"%s\" is not accepted." validExt filename; InputFile.new_input_file filename; try let contents = IO.read_whole_file filename in InputFile.with_file_contents contents (fun () -> let open Lexing in let lexbuf = Lexing.from_string contents in lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = filename }; (* the grammar: *) { (Driver.grammar Lexer.main lexbuf) with Syntax.pg_filename = filename } ) with Sys_error msg -> Error.error [] "%s" msg (* ------------------------------------------------------------------------- *) (* Read all of the grammar files that are named on the command line. *) let grammars : Syntax.partial_grammar list = List.map load_partial_grammar Settings.filenames let () = Time.tick "Lexing and parsing" (* ------------------------------------------------------------------------- *) (* Eliminate anonymous rules. *) let grammars : Syntax.partial_grammar list = List.map Anonymous.transform_partial_grammar grammars (* ------------------------------------------------------------------------- *) (* If several grammar files were specified, merge them. *) let grammar : Syntax.grammar = PartialGrammar.join_partial_grammars grammars (* ------------------------------------------------------------------------- *) (* Check that the grammar is well-sorted; infer the sort of every symbol. *) let sorts = SortInference.infer grammar (* ------------------------------------------------------------------------- *) (* Expand away all applications of parameterized nonterminal symbols, so as to obtain a grammar without parameterized nonterminal symbols. *) let grammar : UnparameterizedSyntax.grammar = let module S = SelectiveExpansion in (* First, perform a selective expansion: expand away all parameters of higher sort, keeping the parameters of sort [*]. This process always terminates. *) let grammar1 = S.expand S.ExpandHigherSort sorts grammar in (* This "first-order parameterized grammar" can then be submitted to the termination check. *) CheckSafeParameterizedGrammar.check grammar1; (* If it passes the check, then full expansion is safe. We drop [grammar1] and start over from [grammar]. This is required in order to get correct names. (Expanding [grammar1] would yield an equivalent grammar, with more complicated names, reflecting the two steps of expansion.) *) let grammar = S.expand S.ExpandAll sorts grammar in (* This yields an unparameterized grammar. *) Drop.drop grammar let () = Time.tick "Joining and expanding" (* ------------------------------------------------------------------------- *) (* If [--only-tokens] was specified on the command line, produce the definition of the [token] type and stop. *) let () = TokenType.produce_tokentypes grammar (* ------------------------------------------------------------------------- *) (* Perform reachability analysis. *) let grammar = Reachability.trim grammar let () = Time.tick "Trimming" (* ------------------------------------------------------------------------- *) (* If [--depend] or [--raw-depend] was specified on the command line, perform dependency analysis and stop. *) let () = match Settings.depend with | Settings.OMRaw | Settings.OMPostprocess -> Infer.depend grammar (* never returns *) | Settings.OMNone -> () (* The purpose of [--depend] and [--raw-depend] is to support [--infer]. Indeed, [--infer] is implemented by producing a mock [.ml] file (which contains just the semantic actions) and invoking [ocamlc]. This requires certain [.cmi] files to exist. So, [--(raw-)depend] is a way for us to announce which [.cmi] files we need. It is implemented by producing the mock [.ml] file and running [ocamldep] on it. We also produce a mock [.mli] file, even though in principle it should be unnecessary -- see comment in [nonterminalType.mli]. *) (* ------------------------------------------------------------------------- *) (* If some flags imply that we will NOT produce an OCaml parser, then there is no need to perform type inference, so we act as if --infer was absent. This saves time and dependency nightmares. *) let skipping_parser_generation = Settings.coq || Settings.compile_errors <> None || Settings.interpret_error || Settings.list_errors || Settings.compare_errors <> None || Settings.update_errors <> None || Settings.echo_errors <> None || false (* maybe also: [preprocess_mode <> PMNormal] *) (* ------------------------------------------------------------------------- *) (* If [--infer] was specified on the command line, perform type inference. The OCaml type of every nonterminal is then known. *) let grammar = if Settings.infer && not skipping_parser_generation then let grammar = Infer.infer grammar in Time.tick "Inferring types for nonterminals"; grammar else grammar (* ------------------------------------------------------------------------- *) (* Expand away some of the position keywords. *) let grammar = KeywordExpansion.expand_grammar grammar (* ------------------------------------------------------------------------- *) (* If [--no-inline] was specified on the command line, skip the inlining of non terminal definitions marked with %inline. *) let grammar = if Settings.inline then begin let grammar, inlined = NonTerminalDefinitionInlining.inline grammar in if not Settings.infer && inlined && not skipping_parser_generation then Error.warning [] "you are using the standard library and/or the %%inline keyword. We\n\ recommend switching on --infer in order to avoid obscure type error messages."; Time.tick "Inlining"; grammar end else grammar (* ------------------------------------------------------------------------- *) (* If [--only-preprocess] or [--only-preprocess-drop] was specified on the command line, print the grammar and stop. Otherwise, continue. *) let () = match Settings.preprocess_mode with | Settings.PMOnlyPreprocess mode -> UnparameterizedPrinter.print mode stdout grammar; exit 0 | Settings.PMNormal -> () menhir-20171222/CHANGES.md0000664000175000017500000006166213217215727015126 0ustar fpottierfpottier# Changes ## 2017/12/22 * Add a flag `--unused-precedence-levels` to suppress all warnings about useless `%left`, `%right`, `%nonassoc` and `%prec` declarations. (Suggested by Zachary Tatlock.) ## 2017/12/06 * Fix the termination test that takes place before parameterized symbols are expanded away. The previous test was both unsound (it would accept grammars whose expansion did not terminate) and incomplete (it would reject grammars whose expansion did terminate). The new test is believed to be sound and complete. (Thanks to Martin Bodin for prompting us to look into this issue.) ## 2017/11/12 * Documentation: clarify the fact that `%type` declarations should carry types whose meaning does not depend on the headers `%{ ... %}`. ## 2017/10/13 * Remove the OCaml version check at installation time, for greater simplicity, and because for some reason it did not work properly under Cygwin. (Reported by Andrew Appel.) ## 2017/09/26 * `Makefile` fix: when determining whether the suffix `.exe` should be used, one should test whether the OS is Windows, not whether the compiler is MSVC. (Suggested by Jonathan Protzenko.) ## 2017/07/12 * Include the LaTeX sources of the manual in the official `.tar.gz` archive. This should allow the manual to be included as part of the Debian package. * Mention [Obelisk](https://github.com/Lelio-Brun/Obelisk), a pretty-printer for `.mly` files, in the manual. ## 2017/06/07 * Removed an undeclared dependency of MenhirSdk on Unix. (Reported and fixed by Frédéric Bour.) ## 2017/05/09 * Menhir now always places OCaml line number directives in the generated `.ml` file. (Until now, this was done only when `--infer` was off.) Thus, if a semantic action contains an `assert` statement, the file name and line number information carried by the `Assert_failure` exception should now be correct. (Reported by Helmut Brandl.) ## 2017/04/18 * Changed Menhir's license from QPL to GPLv2. MenhirLib remains under LGPLv2, with a linking exception. * Moved the repository to [gitlab.inria.fr](https://gitlab.inria.fr/fpottier/menhir/). * Introduced a new command line switch, `--cmly`, which causes Menhir to create a `.cmly` file, containing a description of the grammar and automaton. (Suggested by Frédéric Bour.) * Introduced a new library, MenhirSdk, which allows reading a `.cmly` file. The purpose of this library is to allow external tools to take advantage of the work performed by Menhir's front-end. (Suggested by Frédéric Bour.) * Introduced new syntax for attributes in a `.mly` file. Attributes are ignored by Menhir's back-ends, but are written to `.cmly` files, thus can be exploited by external tools via MenhirSdk. (Suggested by Frédéric Bour.) * The definition of a `%public` nonterminal symbol can now be split into several parts within a single `.mly` file. (This used to be permitted only over multiple `.mly` files.) (Suggested by Frédéric Bour.) * New functions in the incremental API: `shifts`, `acceptable`, `current_state_number`. * New functions in the incremental API and inspection API: `top`, `pop`, `pop_many`, `get`, `equal`, `force_reduction`, `feed`, `input_needed`, `state_has_default_reduction`, `production_index`, `find_production`. (Suggested by Frédéric Bour.) * New module `MenhirLib.ErrorReports`. This module is supposed to offer auxiliary functions that help produce good syntax error messages. This module does not yet contain much functionality and is expected to evolve in the future. * Incompatible change in the incremental API: the type `env` becomes `'a env`. * Incompatible change in the incremental API: the function `has_default_reduction` is renamed `env_has_default_reduction`. * The type `stack` and the function `stack` in the incremental API are deprecated. The new functions `top` and `pop` can be used instead to inspect the parser's stack. The module `MenhirLib.General` is deprecated as well. Deprecated functionality will be removed in the future. * Incompatible change in the incremental API: the type of the function `print_stack` in the result signature of the functor `MenhirLib.Printers.Make` changes to `'a env -> unit`. (Anyway, as of now, `MenhirLib.Printers` remains undocumented.) * Improved the syntax error message that is displayed when a `.mly` file is incorrect: the previous and next token are shown. * Fixed a bug where the module name `Basics` was shadowed (that is, if the user's project happened to contain a toplevel module by this name, then it could not be referred to from a `.mly` file). (Reported by François Thiré.) ## 2017/01/01 * Add `$MENHIR_STDLIB` as a way of controlling where Menhir looks for the file `standard.mly`. This environment variable overrides the installation-time default setting, and is itself overridden by the `--stdlib` command line switch. (Requested by Jonathan Protzenko.) * `Makefile` fix: filter out `'\r'` in the output of `menhir --suggest-ocamlfind`, so that the `Makefile` works when Menhir is compiled as a Windows executable. (Suggested by Jonathan Protzenko.) ## 2016/12/01 * Updated the Coq back-end for compatibility with Coq 8.6. (Jacques-Henri Jourdan.) ## 2016/11/15 * Fix in `--only-preprocess-for-ocamlyacc` mode: avoid printing newline characters inside a `%type` declaration, as this is forbidden by `ocamlyacc`. (Reported by Kenji Maillard.) * Fix in `--only-preprocess-for-ocamlyacc` mode: avoid variable capture caused by `ocamlyacc` internally translating `$i` to `_i`. (Reported by Kenji Maillard.) ## 2016/09/01 * New command line switch `--only-preprocess-for-ocamlyacc`, supposed to print the grammar in a form that `ocamlyacc` can accept. As of now, this feature is incomplete (in particular, support for Menhir's position keywords is missing), untested, and undocumented. It could be removed in the future. ## 2016/08/26 * Fixes in the output of `--only-preprocess`: * The order of productions is now preserved. (It was not. This matters if there are reduce/reduce conflicts.) * `%parameter` directives are now printed. (They were not). * `%on_error_reduce` directives are now printed. (They were not.) ## 2016/08/25 * `Makefile` fix, undoing a change made on 2016/03/03, which caused installation to fail under (some versions of?) Windows where dynamic linking is not supported. (Reported by Andrew Appel.) ## 2016/08/05 * `%on_error_reduce` declarations now have implicit priority levels, so as to tell Menhir what to do when two such declarations are applicable. Also, the well-formedness checks on `%type` and `%on_error_reduce` declarations have been reinforced. ## 2016/06/23 * A small change in the generated code (both in the code and table back-ends) so as to avoid OCaml's warning 41. The warning would arise (when compiling a generated parser with OCaml 4.03) because Menhir's exception `Error` has the same name as the data constructor `Error` in OCaml's pervasive library. (Reported by Bernhard Schommer.) ## 2016/05/18 * Anonymous rules now work also when used inside a parameterized rule. (This did not work until now.) When an anonymous rule is hoisted out of a parameterized rule, it may itself become parameterized. Menhir parameterizes it only over the parameters that it actually needs. ## 2016/05/04 * In the Coq backend, split the largest definitions into smaller ones. This circumvents a limitation of vm_compute on 32 bit machines. This also enables us to perform sharing between definitions, so that the generated files are much smaller. ## 2016/04/10 * When printing a grammar (which is done by the `--only-preprocess` options), remove the leading bar `|`, for compatibility with `yacc` and `bison`. ## 2016/03/11 * In the code back-end, generate type annotations when extracting a semantic value out of the stack. When working with a semantic value of some function type, OCaml would incorrectly warn that this function does not use its argument. This warning should now be gone. ## 2016/03/03 * Makefile changes, so as to support `ocamlbuild` 4.03, which seems to have stricter hygiene rules than previous versions. ## 2015/12/30 * Prevented an incorrect installation that would take place if `USE_OCAMLFIND` was given during `make all` but not during `make install`. Added a command line directive `--suggest-ocamlfind`. ## 2015/11/11 * Fixed a severe bug in Menhir 20151110 which (when using the code back-end) could cause a generated parser to crash. Thanks to ygrek for reporting the bug. * The code produced by version `XXXXXXXX` of `menhir --table` can now be linked only against a matching version of MenhirLib. If an incorrect version of MenhirLib is installed, the OCaml compiler should complain that `MenhirLib.StaticVersion.require_XXXXXXXX` is undefined. ## 2015/11/10 * Optimized the computation of `$symbolstartpos`, based on a couple of assumptions about the lexer. (See the manual.) ## 2015/11/04 * Modified the treatment of `%inline` so that the positions that are computed are the same, regardless of whether `%inline` is used. This property did not hold until now. It now does. Of course, this means that the positions computed by the new Menhir are not the same as those computed by older versions of Menhir. * Fixed a bug in the treatment of `%inline` that would lead to an incorrect position being computed when the caller and callee had a variable by the same name. * Modified Menhir so as to compute the start and end positions in the exact same way as `ocamlyacc`. (There used to be a difference in the treatment of epsilon productions.) Of course, this means that the positions computed by the new Menhir are not the same as those computed by older versions of Menhir. Added the keyword `$symbolstartpos` so as to simulate `Parsing.symbol_start_pos()` in the `ocamlyacc` world. The keyword `$startpos` sometimes produces a position that is too far off to the left; `$symbolstartpos` produces a more accurate position. * Incompatible change of the incremental API: instead of a unit argument, the entry points (which are named after the start symbols) now require an initial position, which typically should be `lexbuf.lex_curr_p`. ## 2015/11/03 * Fix-fix-and-re-fix the `Makefile` in an attempt to allow installation under opam/Windows. Thanks to Daniel Weil for patient explanations and testing. ## 2015/10/29 * MenhirLib is now installed in both binary and source forms. `menhir --suggest-menhirLib` reports where MenhirLib is installed. This can be used to retrieve a snapshot of MenhirLib in source form and include it in your project (if you wish to use `--table` mode, yet do not wish to have a dependency on MenhirLib). ## 2015/10/26 * Allow `--list-errors` to work on 32-bit machines (with low hard limits). This should fix a problem whereby the 2015/10/23 release could not bootstrap on a 32-bit machine. ## 2015/10/23 * New declaration `%on_error_reduce foo`, where `foo` is a nonterminal symbol. This modifies the automaton as follows. In every state where a production of the form `foo -> ...` is ready to be reduced, every error action is replaced with a reduction of this production. (If there is a conflict between several productions that could be reduced in this manner, nothing is done.) This does not affect the language that is accepted by the automaton, but delays the detection of an error: more reductions take place before the error is detected. * Fixed a bug whereby Menhir would warn about a useless `%prec` declaration, even though it was useful. This would happen when the declaration was duplicated (by inlining or by macro-expansion) and some but not all of the copies were useful. * Added `has_default_reduction` to the incremental API. * Modified the meaning of `--canonical` to allow default reductions to take place. This implies no loss of precision in terms of lookahead sets, and should allow gaining more contextual information when a syntax error is encountered. (It should also lead to a smaller automaton.) * A brand new set of tools to work on syntax errors. * New command `--list-errors`, which produces a list of input sentences which are representative of all possible syntax errors. (Costly.) * New command `--interpret-error`, which confirms that one particular input sentence ends in a syntax error, and prints the number of the state in which this error occurs. * New command `--compile-errors`, which compiles a list of erroneous sentences (together with error messages) to OCaml code. * New command `--compare-errors`, which compares two lists of erroneous sentences to check if they cover the same error states. * New command `--update-errors`, which updates the auto-generated comments in a list of erroneous sentences. * New command `--echo-errors`, which removes all comments and messages from a list of erroneous sentences, and echoes just the sentences. ## 2015/10/16 * Additions to the incremental API. * A `supplier` is a function that produces tokens on demand. * `lexer_lexbuf_to_supplier` turns a lexer and a lexbuf into a supplier. * `loop` is a ready-made made main parsing loop. * `loop_handle` is a variant that lets the user do her own error handling. * `loop_handle_undo` is a variant that additionally allows undoing the last few "spurious" reductions. * `number` maps a state of the LR(1) automaton to its number. * Incompatible change of the incremental API: renamed the type `'a result` to `'a checkpoint`. This is a better name anyway, and should help avoid confusion with the type `'a result` introduced in OCaml 4.03. ## 2015/10/12 * Avoid using `$(shell pwd)` in `Makefile`, for better Windows compatibility. ## 2015/10/05 * Fixed a bug where inconsistent OCaml code was generated when `--table` and `--external-tokens` were used together. (Reported by Darin Morrison.) * In `--infer` mode, leave the `.ml` file around (instead of removing it) if `ocamlc` fails, so we have a chance to understand what's wrong. ## 2015/09/21 * Re-established some error messages concerning the mis-use of `$i` which had disappeared on 2015/06/29. ## 2015/09/11 * Fixed the mysterious message that would appear when a nonterminal symbol begins with an uppercase letter and `--infer` is turned on. Clarified the documentation to indicate that a (non-start) nonterminal symbol can begin with an uppercase letter, but this is not recommended. ## 2015/08/27 * New option `--inspection` (added last January, documented only now). This generates an inspection API which allows inspecting the automaton's stack, among other things. This API can in principle be used to write custom code for error reporting, error recovery, etc. It is not yet mature and may change in the future. ## 2015/07/20 * Added the command line options `--unused-token ` and `--unused-tokens`. ## 2015/06/29 * Changed the treatment of the positional keywords `$i`. They are now rewritten into variables of the form `_i` where `i` is an integer. Users are advised not to use variables of this form inside semantic actions. ## 2015/02/11 * Added support for anonymous rules. This allows writing, e.g., `list(e = expression SEMI { e })` whereas previously one should have written `list(terminated(e, SEMI))`. ## 2015/02/09 * Moved all of the demos to `ocamlbuild` (instead of `make`). ## 2015/01/18 * Incompatible change of the incremental API. The incremental API now exposes shift events too. ## 2015/01/16 * Fixed a couple bugs in `Makefile` and `src/Makefile` which would cause compilation and installation to fail with `TARGET=byte`. (Reported by Jérémie Courrèges-Anglas and Daniel Dickman.) ## 2015/01/01 * Incompatible change of the incremental API. The entry point `main_incremental` is now named `Incremental.main`. ## 2014/12/29 * Incompatible change of the incremental API. * The API now exposes reduction events. * The type `'a result` is now private. * The type `env` is no longer parameterized. * `handle` is renamed to `resume`. * `offer` and `resume` now expect a result, not an environment. ## 2014/12/22 * Documented the Coq back-end (designed and implemented by Jacques-Henri Jourdan). ## 2014/12/15 * New incremental API (in `--table` mode only), inspired by Frédéric Bour. ## 2014/12/11 * Menhir now reports an error if one of the start symbols produces either the empty language or the singleton language {epsilon}. * Although some people out there actually define a start symbol that recognizes {epsilon} (and use it as a way of initializing or re-initializing some global state), this is considered bad style. Furthermore, by ruling out this case, we are able to simplify the table back-end a little bit. ## 2014/12/12 * A speed improvement in the code back-end. ## 2014/12/08 * Menhir now requires OCaml 4.02 (instead of 3.09). ## 2014/12/02 * Removed support for the `$previouserror` keyword. * Removed support for `--error-recovery` mode. ## 2014/02/18 * In the Coq backend, use `'` instead of `_` as separator in identifiers. Also, correct a serious bug that was inadvertently introduced on 2013/03/01 (r319). ## 2014/02/14 * Lexer fix so as to support an open variant type `[> ...]` within a `%type<...>` declaration. ## 2013/12/16 * Updated the `Makefile` so that `install` no longer depends on `all`. * Updated the demos so that the lexer does not invoke `exit 0` when encoutering `eof`. (This should be more intuitive.) ## 2013/09/11 * Fixed a newline conversion problem that would prevent Menhir from building on Windows when using ocaml 4.01. ## 2013/03/02 * Switched to ocamlbuild. Many thanks to Daniel Weil for offering very useful guidance. ## 2013/01/16 * `menhir --depend` was broken since someone added new whitespace in the output of `ocamldep`. Fixed. ## 2012/12/19 * Fixed a compilation problem that would arise when a file produced by Menhir on a 64-bit platform was compiled by ocaml on a 32-bit platform. ## 2012/08/25 * Performance improvements in the computation of various information about the automaton (module `Invariant`). The improvements will be noticeable only for very large automata. ## 2012/06/07 * The option `--log-grammar 3` (and above) now causes the `FOLLOW` sets for terminal symbols to be computed and displayed. ## 2012/05/25 * Added the flag `--canonical`, which causes Menhir to produce a canonical LR(1) automaton in the style of Knuth. This means that no merging of states takes place during the construction of the automaton, and that no default reductions are allowed. ## 2012/01/23 * Fixed a bug whereby a `%nonassoc` declaration was not respected. This declaration requests that a shift/reduce conflict be reduced in favor of neither shifting nor reducing, that is, a syntax error must occur. However, due to an unforeseen interaction with the default reduction mechanism, this declaration was sometimes ignored and reduction would take place. ## 2012/01/09 * Changes in the (undocumented) Coq back-end so as to match the ESOP 2012 paper. ## 2011/10/19 * The `Makefile` now tests whether Unix or Windows is used (the test is performed by evaluating `Sys.os_type` under `ocaml`) and changes a couple settings accordingly: * the executable file name is either `menhir` or `menhir.exe` * the object file suffix is either `.o` or `.obj` * Added `--strict`, which causes many warnings about the grammar and about the automaton to be considered errors. * The `#` annotations that are inserted in the generated `.ml` file now retain their full path. (That is, we no longer use `Filename.basename`.) This implies that the `#` annotations depend on how Menhir is invoked -- e.g., `menhir foo/bar.mly` and `cd foo && menhir bar.mly` will produce different results. Nevertheless, this seems reasonable and useful (e.g., in conjunction with `ocamlbuild` and a hierarchy of files). Thanks to Daniel Weil. ## 2011/10/06 * With the `-lg 1` switch, Menhir now indicates whether the grammar is SLR(1). ## 2011/05/24 * Removed the lock in `ocamldep.wrapper`. It is the responsibility of the user to avoid interferences with other processes (or other instances of the script) that create and/or remove files. ## 2011/04/28 * The (internal) computation of the automaton's invariant was broken and has been fixed. Surprisingly, this does not seem to affect the generated code, (which was correct,) so no observable bug is fixed. Hopefully no bug is introduced! ## 2011/04/07 * The grammar description files (`.mly`) are now read in up front and stored in memory while they are parsed. This allows us to avoid the use of `pos_in` and `seek_in`, which do not work correctly when CRLF conversion is being performed. ## 2011/04/05 * Fixed a bug in the type inference module (for parameterized non-terminals) which would cause an infinite loop. ## 2011/01/24 * Fixed a bug that would cause an assertion failure in the generated parser in some situations where the input stream was incorrect and the grammar involved the error token. The fix might cause grammars that use the error token to behave differently (hopefully more accurately) as of now. ## 2009/06/18 * `Makefile` changes: build and install only the bytecode version of MenhirLib when `TARGET=byte` is set. ## 2009/02/06 * Fixed `ocamldep.wrapper` to avoid quoting the name of the `ocaml` command. This is hoped to fix a compilation problem under MinGW. ## 2009/02/04 * A `Makefile` fix to avoid a problem under Windows/Cygwin. * Renamed the `ocaml-check-version` script so as to avoid a warning. ## 2008/09/05 * Ocaml summer project: added `--interpret`, `--table`, and `--suggest-*`. ## 2008/08/06 * Fixed a problem that would cause the code inliner to abort when a semantic value and a non-terminal symbol happened to have the same name. * Removed code sharing. ## 2008/06/20 * Removed an incorrect assertion that caused failures (`lr1.ml`, line 134). ## 2007/12/05 * Disabled code sharing by default, as it is currently broken. (See Yann's message; assertion failure at runtime.) ## 2007/12/01 * Added an optimization to share code among states that have identical outgoing transition tables. ## 2007/08/30 * Small `Makefile` change: create an executable file for `check-ocaml-version` in order to work around the absence of dynamic loading on some platforms. ## 2007/05/20 * Made a fundamental change in the construction of the LR(1) automaton in order to eliminate a bug that could lead to spurious conflicts -- thanks to Ketti for submitting a bug report. ## 2007/05/18 * Added `--follow-construction` to help understand the construction of the LR(1) automaton (very verbose). ## 2007/05/11 * Code generation: more explicit qualifications with `Pervasives` so as to avoid capture when the user redefines some of the built-in operators, such as `(+)`. * Added a new demo (`calc-param`) that shows how to use `%parameter`. ## 2007/03/22 * `Makefile` improvements (check for `PREFIX`; bootstrap in bytecode now also available). Slight changes to `OMakefile.shared`. ## 2007/02/15 * Portability fix in `Makefile` and `Makefile.shared` (avoided `which`). ## 2006/12/15 * Portability fix in `Makefile.shared` (replaced `&>` with `2>&1 >`). ## 2006/06/23 * Made a slight restriction to Pager's criterion so as to never introduce fake conflict tokens (see `Lr0.compatible`). This might help make conflict explanations more accurate in the future. ## 2006/06/16 * Fixed bug that would cause positions to become invalid after inlining. ## 2006/06/15 * Fixed `--depend` to be more lenient when analyzing `ocamldep`'s output. * Added `--raw-depend` which transmits `ocamldep`'s output unchanged (for use in conjunction with `omake`). ## 2006/06/12 * Fixed bug that would cause `--only-preprocess` to print `%token` declarations also for pseudo-tokens. * Fixed bug that caused some precedence declarations to be incorrectly reported as useless. * Improved things so that useless pseudo-tokens now also cause warnings. * Fixed bug that would cause `%type` directives for terminal symbols to be incorrectly accepted. * Fixed bug that would occur when a semantic action containing `$i` keywords was inlined. ## 2006/05/05 * Fixed problem that caused some end-of-stream conflicts not to be reported. * Fixed Pager's compatibility criterion to avoid creating end-of-stream conflicts. ## 2006/04/21 * Fixed problem that allowed generating incorrect but apparently well-typed Objective Caml code when a semantic action was ill-typed and `--infer` was omitted. ## 2006/03/29 * Improved conflict reports by factoring out maximal common derivation contexts. ## 2006/03/28 * Fixed bug that could arise when explaining a conflict in a non-LALR(1) grammar. ## 2006/03/27 * Changed count of reduce/reduce conflicts to allow a comparison with `ocamlyacc`'s diagnostics. * When refusing to resolve a conflict, report all diagnostics before dying. ## 2006/03/18 * Added display of `FOLLOW` sets when using `--log-grammar 2`. * Added `--graph` option. * Fixed behavior of `--depend` option. ## 2006/01/06 * Removed reversed lists from the standard library. menhir-20171222/INSTALLATION.md0000664000175000017500000000253013217215727015744 0ustar fpottierfpottier# Installation ## Requirements You need OCaml 4.02 or later, ocamlbuild, and GNU make. ## Configuration Choices ### `PREFIX` The value of the `PREFIX` variable can be changed to control where the software, the standard library, and the documentation are stored. These files are copied to the following places: ``` $PREFIX/bin/ $PREFIX/share/menhir/ $PREFIX/share/doc/menhir/ $PREFIX/share/man/man1/ ``` `PREFIX` must be set when invoking `make all` and `make install` (see below). ### `USE_OCAMLFIND` The support libraries, `MenhirLib` and `MenhirSdk`, are installed either via ocamlfind or directly in the directory `$PREFIX/share/menhir`. Installing via ocamlfind is recommended (and is the default). It requires the `ocamlfind` executable to be found in the `PATH`. An explicit choice can be made by setting `USE_OCAMLFIND` to `true` or `false` when running `make all` (see below). ### `TARGET` If your machine does not have the native code OCaml compiler (`ocamlopt`), but does have the bytecode compiler (`ocamlc`), then you should define `TARGET=byte` when running `make all` and `make install`. ## Compilation and Installation Compile and install as follows: ``` make PREFIX=/usr/local USE_OCAMLFIND=true all sudo make PREFIX=/usr/local install ``` If necessary, adjust `PREFIX`, `USE_OCAMLFIND` and `TARGET` as described above. menhir-20171222/Makefile0000664000175000017500000001740513217215727015170 0ustar fpottierfpottier# This is the main Makefile that is shipped as part of the source package. # Keep in mind that the hierarchy that is shipped is not identical to the # hierarchy within the git repository. Some sub-directories are not shipped. # The documentation (manual.pdf, menhir.1) is pre-built and stored at the root. # This Makefile can also be used directly in the repository. In that case, # the documentation and demos are not installed. # The hierarchy that is shipped includes: # demos # menhir.1 # manual.pdf # src # Makefile (this one) # ---------------------------------------------------------------------------- # The following variables must/can be configured. ifndef PREFIX $(error Please define PREFIX) endif ifndef TARGET TARGET := native endif # ---------------------------------------------------------------------------- # By default, we attempt to use ocamlfind (if present in the PATH), but it # is possible to prevent that externally by setting USE_OCAMLFIND to false. # USE_OCAMLFIND is used only at build time (i.e., by "make all"). At # (un)installation time, instead, we query menhir using --suggest-ocamlfind. # This should protect us against people who pass USE_OCAMLFIND at build time # and forget to pass it at (un)installation time. ifndef USE_OCAMLFIND USE_OCAMLFIND = ocamlfind ocamlc -v >/dev/null 2>&1 endif # ---------------------------------------------------------------------------- # Installation paths. # These may be overridden from outside; e.g., our opam package description # provides its own values of docdir, libdir, and mandir. bindir := $(PREFIX)/bin docdir := $(PREFIX)/share/doc/menhir libdir := $(PREFIX)/share/menhir mandir := $(PREFIX)/share/man/man1 MANS := menhir.1 DOCS := manual.pdf demos MLYLIB := src/standard.mly # ---------------------------------------------------------------------------- # A few settings differ on Windows versus Unix. # If the compiler is MSVC, then object file names end in .obj instead of .o. ifneq (,$(shell ocamlc -config | grep -E "ccomp_type: msvc")) OBJ := obj # LIBSUFFIX := lib else OBJ := o # LIBSUFFIX := a endif # If we are under Windows (regardless of whether we are using MSVC or mingw) # then the name of the executable file ends in .exe. ifeq ($(OS),Windows_NT) MENHIREXE := menhir.exe else MENHIREXE := menhir endif # The path $(installation_libdir), which is recorded in src/installation.ml (see # below), must sometimes be translated using cygpath. # This one is tricky. To summarize, if I understood correctly, we can assume # that Cygwin always exists when Menhir is compiled and installed (because # executing a Makefile, like this one, requires Cygwin), but we cannot assume # that Menhir will be executed under Cygwin. If the OCaml compiler is # configured to produce a Cygwin executable, then, yes, Cygwin is there at # execution time, so path translation is not necessary (and should not be # performed). On the other hand, if the OCaml compiler is configured to # produce a native Windows executable, then Cygwin is not there at execution # time and path translation is required. In summary, path translation must be # performed if "os_type" is "Win32" or "Win64", and must not be performed if # "os_type" is "Cygwin" or "Unix". ifneq (,$(shell ocamlc -config | grep -E "os_type: (Win32|Win64)")) installation_libdir := $(shell cygpath -m $(libdir)) else installation_libdir := $(libdir) endif # ------------------------------------------------------------------------- # The names of the modules in MenhirLib are obtained by reading the # non-comment lines in menhirLib.mlpack. MENHIRLIB_MODULES := $(shell grep -ve "^[ \t\n\r]*\#" src/menhirLib.mlpack) # ---------------------------------------------------------------------------- # The directories where things are built. # For Menhir and MenhirLib. BUILDDIR := src/_stage2 # For MenhirSdk. SDKDIR := src/_sdk # ---------------------------------------------------------------------------- # Compilation. .PHONY: all install uninstall all: # Installation time settings are recorded within src/installation.ml. # This file is recreated every time so as to avoid becoming stale. @ rm -f src/installation.ml @ echo "let libdir = \"$(installation_libdir)\"" > src/installation.ml @ if $(USE_OCAMLFIND) ; then \ echo "let ocamlfind = true" >> src/installation.ml ; \ else \ echo "let ocamlfind = false" >> src/installation.ml ; \ fi # Compile the Menhir executable. # This causes MenhirLib to be compiled, too, as it is used inside Menhir. # Compile MenhirSdk. @ $(MAKE) -C src bootstrap sdk # The source file menhirLib.ml is created by concatenating all of the source # files that make up MenhirLib. This file is not needed to compile Menhir or # MenhirLib. It is installed at the same time as MenhirLib and is copied by # Menhir when the user requests a self-contained parser (one that is not # dependent on MenhirLib). @ echo "Creating menhirLib.ml" @ rm -f $(BUILDDIR)/menhirLib.ml @ for m in $(MENHIRLIB_MODULES) ; do \ echo "module $$m = struct" >> $(BUILDDIR)/menhirLib.ml ; \ cat src/$$m.ml >> $(BUILDDIR)/menhirLib.ml ; \ echo "end" >> $(BUILDDIR)/menhirLib.ml ; \ done # The source file menhirLib.mli is created in the same way. If a module # does not have an .mli file, then we assume that its .ml file contains # type (and module type) definitions only, so we copy it instead of the # (non-existent) .mli file. @ echo "Creating menhirLib.mli" @ rm -f $(BUILDDIR)/menhirLib.mli @ for m in $(MENHIRLIB_MODULES) ; do \ echo "module $$m : sig" >> $(BUILDDIR)/menhirLib.mli ; \ if [ -f src/$$m.mli ] ; then \ cat src/$$m.mli >> $(BUILDDIR)/menhirLib.mli ; \ else \ cat src/$$m.ml >> $(BUILDDIR)/menhirLib.mli ; \ fi ; \ echo "end" >> $(BUILDDIR)/menhirLib.mli ; \ done # ------------------------------------------------------------------------- # The files that should be installed as part of menhirLib. MENHIRLIB := menhirLib.mli menhirLib.ml menhirLib.cmi menhirLib.cmo ifneq ($(TARGET),byte) MENHIRLIB := $(MENHIRLIB) menhirLib.cmx menhirLib.$(OBJ) endif # ------------------------------------------------------------------------- # The files that should be installed as part of menhirSdk. MENHIRSDK := menhirSdk.cmi menhirSdk.cmo ifneq ($(TARGET),byte) MENHIRSDK := $(MENHIRSDK) menhirSdk.cmx menhirSdk.$(OBJ) endif # ---------------------------------------------------------------------------- # Installation. install: # Install the executable. mkdir -p $(bindir) install $(BUILDDIR)/menhir.$(TARGET) $(bindir)/$(MENHIREXE) # Install Menhir's standard library. mkdir -p $(libdir) install -m 644 $(MLYLIB) $(libdir) # Install MenhirLib and MenhirSdk. @if `$(BUILDDIR)/menhir.$(TARGET) --suggest-ocamlfind | tr -d '\r'` ; then \ echo 'Installing MenhirLib and MenhirSdk via ocamlfind.' ; \ cp -f src/menhirLib.META META ; \ ocamlfind install menhirLib META $(patsubst %,$(BUILDDIR)/%,$(MENHIRLIB)) ; \ cp -f src/menhirSdk.META META ; \ ocamlfind install menhirSdk META $(patsubst %,$(SDKDIR)/%,$(MENHIRSDK)) ; \ rm -f META ; \ else \ echo 'Installing MenhirLib and MenhirSdk manually.' ; \ install -m 644 $(patsubst %,$(BUILDDIR)/%,$(MENHIRLIB)) $(libdir) ; \ install -m 644 $(patsubst %,$(SDKDIR)/%,$(MENHIRSDK)) $(libdir) ; \ fi # Install the documentation, if it has been built. if [ -f manual.pdf ] ; then \ mkdir -p $(docdir) $(mandir) && \ cp -r $(DOCS) $(docdir) && \ cp -r $(MANS) $(mandir) ; \ fi uninstall: @if `$(bindir)/$(MENHIREXE) --suggest-ocamlfind` ; then \ echo 'Un-installing MenhirLib and MenhirSdk via ocamlfind.' ; \ ocamlfind remove menhirLib ; \ ocamlfind remove menhirSdk ; \ fi rm -rf $(bindir)/$(MENHIREXE) rm -rf $(libdir) rm -rf $(docdir) rm -rf $(mandir)/$(MANS) menhir-20171222/menhir.10000664000175000017500000000612613217215730015064 0ustar fpottierfpottier.\" Hey, EMACS: -*- nroff -*- .TH MENHIR 1 "April 19, 2006" .\" Please adjust this date whenever revising the manpage. .\" .\" Some roff macros, for reference: .\" .nh disable hyphenation .\" .hy enable hyphenation .\" .ad l left justify .\" .ad b justify to both left and right margins .\" .nf disable filling .\" .fi enable filling .\" .br insert line break .\" .sp insert n+1 empty lines .\" for manpage-specific macros, see man(7) .SH NAME menhir \- parser generator for OCaml .SH SYNOPSIS .B menhir .RI [ options ] " files" .SH DESCRIPTION .B menhir is a LR(1) parser generator for the Objective Caml programming language. That is, Menhir compiles LR(1) grammar specifications down to Objective Caml code. It is mostly compatible with .BR ocamlyacc (1). .SH OPTIONS .TP .B \-h, \-\-help Show summary of options. .TP .BI \-b,\ \-\-base\ basename Specifies a base name for the output file(s). .TP .B \-\-comment Include comments in the generated code. .TP .B \-\-depend Invoke ocamldep and display dependencies. .TP .B \-\-dump Describe the automaton in .IR basename .automaton. .TP .B \-\-error\-recovery Attempt recovery by discarding tokens after errors. .TP .B \-\-explain Explain conflicts in .IR basename .conflicts. .TP .BI \-\-external\-tokens\ module Import token type definition from .IR module . .TP .B \-\-graph Write grammar's dependency graph to .IR basename .dot. .TP .B \-\-infer Invoke ocamlc for ahead of time type inference. .TP .B \-\-interpret Interpret the sentences provided on stdin. .TP .B \-\-interpret\-show\-cst Show a concrete syntax tree upon acceptance. .TP .BI \-la,\ \-\-log\-automaton\ level Log information about the automaton. .TP .BI \-lc,\ \-\-log\-code\ level Log information about the generated code. .TP .BI \-lg,\ \-\-log\-grammar\ level Log information about the grammar. .TP .B \-\-no\-inline Ignore the %inline keyword. .TP .B \-\-no\-stdlib Do not load the standard library. .TP .BI \-\-ocamlc\ command Specifies how ocamlc should be invoked. .TP .BI \-\-ocamldep\ command Specifies how ocamldep should be invoked. .TP .B \-\-only\-preprocess Print a simplified grammar and exit. .TP .B \-\-only\-tokens Generate token type definition only, no code. .TP .B \-\-raw\-depend Invoke ocamldep and echo its raw output. .TP .BI \-\-stdlib\ directory Specify where the standard library lies. .TP .B \-\-suggest\-comp\-flags Suggest compilation flags for ocaml{c,opt}. .TP .B \-\-suggest\-link\-flags-byte Suggest link flags for ocamlc. .TP .B \-\-suggest\-link\-flags-opt Suggest link flags for ocamlopt. .TP .B \-t, \-\-table Use the table-based back-end. .TP .B \-\-timings Display internal timings. .TP .B \-\-trace Include tracing instructions in the generated code. .TP .B \-\-version Show version number and exit. .TP .B \-v Synonymous with .BR \-\-dump\ \-\-explain . .SH SEE ALSO .BR ocaml (1). .SH AUTHOR .B menhir was written by Fran\(,cois Pottier and Yann R\('egis-Gianas. .PP This manual page was written by Samuel Mimram , for the Debian project (but may be used by others). menhir-20171222/LICENSE0000664000175000017500000012776613217215727014551 0ustar fpottierfpottierIn the following, "THE LIBRARY" refers to the following files: 1- the file src/standard.mly; 2- the OCaml source files whose basename appears in the file src/menhirLib.mlpack and whose extension is ".ml" or ".mli". "THE GENERATOR" refers to all other files in this archive or repository, except those in the subdirectory test/, which are not covered by this license. THE GENERATOR is distributed under the terms of the GNU General Public License version 2 (included below). THE LIBRARY is distributed under the terms of the GNU Library General Public License version 2 (included below). As a special exception to the GNU Library General Public License, you may link, statically or dynamically, a "work that uses the Library" with a publicly distributed version of the Library to produce an executable file containing portions of the Library, and distribute that executable file under terms of your choice, without any of the additional requirements listed in clause 6 of the GNU Library General Public License. By "a publicly distributed version of the Library", we mean either the unmodified Library as distributed by INRIA, or a modified version of the Library that is distributed under the conditions defined in clause 3 of the GNU Library General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU Library General Public License. ---------------------------------------------------------------------- GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. ---------------------------------------------------------------------- GNU LIBRARY GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1991 Free Software Foundation, Inc. 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the library GPL. It is numbered 2 because it goes with version 2 of the ordinary GPL.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Library General Public License, applies to some specially designated Free Software Foundation software, and to any other libraries whose authors decide to use it. You can use it for your libraries, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library, or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link a program with the library, you must provide complete object files to the recipients so that they can relink them with the library, after making changes to the library and recompiling it. And you must show them these terms so they know their rights. Our method of protecting your rights has two steps: (1) copyright the library, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the library. Also, for each distributor's protection, we want to make certain that everyone understands that there is no warranty for this free library. If the library is modified by someone else and passed on, we want its recipients to know that what they have is not the original version, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that companies distributing free software will individually obtain patent licenses, thus in effect transforming the program into proprietary software. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License, which was designed for utility programs. This license, the GNU Library General Public License, applies to certain designated libraries. This license is quite different from the ordinary one; be sure to read it in full, and don't assume that anything in it is the same as in the ordinary license. The reason we have a separate public license for some libraries is that they blur the distinction we usually make between modifying or adding to a program and simply using it. Linking a program with a library, without changing the library, is in some sense simply using the library, and is analogous to running a utility program or application program. However, in a textual and legal sense, the linked executable is a combined work, a derivative of the original library, and the ordinary General Public License treats it as such. Because of this blurred distinction, using the ordinary General Public License for libraries did not effectively promote software sharing, because most developers did not use the libraries. We concluded that weaker conditions might promote sharing better. However, unrestricted linking of non-free programs would deprive the users of those programs of all benefit from the free status of the libraries themselves. This Library General Public License is intended to permit developers of non-free programs to use free libraries, while preserving your freedom as a user of such programs to change the free libraries that are incorporated in them. (We have not seen how to achieve this as regards changes in header files, but we have achieved it as regards changes in the actual functions of the Library.) The hope is that this will lead to faster development of free libraries. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, while the latter only works together with the library. Note that it is possible for a library to be covered by the ordinary General Public License rather than by this special one. GNU LIBRARY GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Library General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also compile or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. c) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. d) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Library General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS Appendix: How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! menhir-20171222/manual.pdf0000664000175000017500000151655313217215732015505 0ustar fpottierfpottier%PDF-1.5 % 5 0 obj << /Type /ObjStm /N 100 /First 806 /Length 1216 /Filter /FlateDecode >> stream xڕVr6}W[$ą ɤii3&!cPAȗ~}Jx*Q%svrIMYRդ I+jDµ Yֆ&BpeRgHIR "kbbHՖtAZYMEZSYT ]R)JҖKM%b(%EQcT8](OƢraJc,|,+IbQd2T#֪{9[l [Ք [b?!$* $V +.KHDQC $u;QRj! p*]]dau,`K˒\Z6@a S cx U!3 &CD< 0W!,Gz80[,PakA Z@md!P0ب"̌-ٶi*63/JJNIY!(-TL*5GդQ7y ڲR2a80\(+ ikcSkΉd JTPVU`2d"ׯ%z>zuNFߤ. /szf}]çԟk\OM;aa.(,:E׾ :DsLw;{]I,|:uniqeQ.bb -q Mp2{}\\bqup!EGѷHa c3ˊ~)]Đ=cf}V>ŞRETr9UIi\ oi]DIuߥ O w>jӧn{Zv,aڸGfy> }73s: Ә.wW*- ˾k̤?P7Iҕ|}'~O2X|;GfPߡ@ a2vh_1EVKҋ0v3Nn(q4JOggڤ&fOgU?y&n$Rν;*]|8J]Oq Cu5'`M+$7AwJp_h4rj;w=1w'9eEe2VxqQPNor L#snewCOa_~Ys> stream xڅJ1}3&`KJYRP5 )V/⻛%KxH~ @?];+d@W j+ Z D"䪋/J(nY]V>}0FkY9Qʮ1fB2J4a ǬFLYYД@iՕ9!S)$Tܤ>NV3P.~\dM k%fy;Һfj  ee%'XȬtUZSRz4|݄~99[%953ҬU_v } endstream endobj 275 0 obj << /Length 1242 /Filter /FlateDecode >> stream xMs6 >S'fN3wz`%a I}Ķpb$\ -0GG~Bā0 gtЉeag4v P=8Б VÁ0 =O"n!=>'oqXDžks|YkncYrGi!w?|jd/bu}l2a(<(\C(Qd*#Os&4r'!tW֢A$B9siQ prH.F sWτ2 B^,,a1mOצfmk$6Z"BfnLIYi]O>UW&&/= ,Ô[h/&2l.z"/hl"6 Nd5dz\G.Nm BR~w4Lq1mD\͸&!0z$ljG[ ,%|z;ޑaK :=`~&8NՌSDd.:#dJLx/m!_c4gcS;qOn E^j0x^t(#[:cK%9B^͖1<3,>TlԹtSf{Z\Oj4CiC/\f5mFt:P%:-$. w<~)GV 6/i?HOJIG2$Ը >a{V12PcO&9>?{lI[+E,]3o>C.U='mt0ă@'ަJ=#m&%S&K;:~6`?JMmSt@y=Uf mTq[';7[i~Ž^I2G[팢 Mz7l:|1E/'"s $y!r"> stream xWn0WxWpw6*6H]]80(_O:ճ%޽zQ@vcN/ڍn{nTjS*7~$#KydS|#zٻGs;这χ'k(QIĻJ%7)$}V:!3C9^E:fCfTY*QZĺkt}O\\WTiΈFӖ eA񄀍xqȘ|En|e/56eۚʣ]L,GVS*7Ia )lr/~kW endstream endobj 312 0 obj << /Length 3756 /Filter /FlateDecode >> stream xZKﯘ`|lWIJl84"e>v<>hVr!xn({ ~w}e;UpGw)d${~ |uoMlcu/;<5vt0Xu(N6UtSp\t'خ8(m5=qW[ʇ{ol-ͻt*:ߟmY,m-۲!잳pToOE3TLF4ZK(Xsgz1馺כr_'%v-Mj.x\=2>)!i{O.Uh~eFV_=楷6z|AhY06%lЍ\:Tom@j792|CScVM:^sy|T)Ǯo/a(LTx/EYܤʘzksqwQ*k Nf`|3؅Q)w "De q[Wf 6ix2A^6LZ .X+"\ˊNV"`8f!c!_,)NGA> 慟SYۮжpFuGZM*{d,6@Hq Z[ܤg.F$@[0*Eᄉ=7ae1X8 [*2M`jЗ!(o=>fa8$|tܾYX{$TC 5/@{ގ4d!= f[4V{[|ht^pyWRWg>Ă#ߢ[NíPdu!̷n_E cIreޞ*!$u4U,/^ї"+ ;eʄZ=^@N΀2ϲ؎取eĀ"y53{PŽX8((ӹECF5ƛuvݰ~`u=\kld9 zDSW K7y7TIf7ʰ2EF H.1,kc917%`ohw< ձ S,=Y*j`Ek-(YdNJ526+e.hL8Po:/mK'hZ͖ &J]b˒Rc)҉' #C='V.*.9`fo׶'NHRW\ֲ4i]!Q||rm7({a\XlkPr'93 *eV W5ΗGv,OhgDܙJC[Ř4}/S0bU$O<2YȤ|+MTWs>aS߉Զ)N-KeSǙ&vpܬ7˂#CRa.ݱ;9yaʒAkvL x*`ljp}]a2w]=1ـV2u$[N-é v,zYm5fo,5Wq2zw:>f+ܫjAuVro)9`2Pr&](B6(p`,nnhn}[O4+}Cl<-S[Wۭǐ>Б*d"\oY*gIB7.)8@ 30/dڷ$ENwbE`]*Rn{Et&W] B?Y'>̖2qF ^TMֺ!R/*ã$cZI @Ls A2\cð@/g|%c,nT]HiFiJLr UK5l90G4OLڸk{)/sSj?/͕?4 ٬_ܞ6z>P7)p=MDj prnn[2=/ MQB՞fd>%SZO5yvE9h]S ð';KN ;sS*Nʑk3:%|0ҿ qIEP'7ҍ0Wl+?:*t/ɥ  k1XKf@ajJ5I9a$=w}g&mǢF<E1.W,fl^W<i @`o32B;4QLd"Lr(oZ-ϔ DTVj[7L!ͮ\xp6}Pe@ 3y{űJ,D!}3飘3I(NA'{Z2ӡrO'⽐- WXPvob SxF~HVMѽ쐑qp1Mf7Y2"QC{*[#: #eN9 =) A.Qzۣ`mGoqx=]fU` ڵ%F%Bq$KRȳG>l0\Pilb n!O}G)Z/RJ8)wry:՗ȃIsFL?'Ht/"+*%xv}A@u*$U]TGW|?c IgWkuAՠqCR-Ela"[R7x7WقGl!=? &_%zxV*L}tOnT:pJrb|^@ pN&#0E M>R`oݦYF!ZOoJ-YGe&L?a.b?ƣ-pU~W H/=q8R[+eCvPTaCek00 ιB`$D*ص(=X̨;Y;W ܏{ c5/> ?dܞ)T2j8aB|#ʎl7ܶV+\m״D\~kzkHM`.ܭekQ-܏jB0rp :o<[@+z%3|.:],_-Mm+&?w@ pn> stream xڵn6,H &@=$Bʨ[*RA`4UbUoo› |",MdR0? 2Ǝ0ȓAۗ@S=iT0c_Ne;`KK;w1-U];ܵMyjx3}*۱nH$qA+A* w?\D8u[4pk}w|s8 0ũyiK"D,PrZw!Y>I%Rh-TG)*` U(Z,6ӹѣP$""mH$_࢐3;E!sAK\ˁmVTW# mV%2 %u[?2k nA;FebBnQejÈm߃$e3tvT]OYK8MqSl׋UOt?֨ǪrΨG^ly[OZlh"Ih%@A$?Cx΂'?,>ԍJ]D̢쯻B li.hwT.FpQG;t{WB͜Ɨ$)I ڋ3"E8j)q 0}(Dgf[c z'~¾Eyh-.~>zqLw>( vp̲L šRw7AfgĞͨbp\CTãCɐ3$l-Țddٕc9_ku;VS#džoy҅\x=;c9:>I]'mh7>ӕegBBfrpoh<KCɥ'Sl<ˤdiKJm`cUQ$|ڇ(k)^.WlJ:z]֤>Wej͡EwTjU<]E8"0 x#m S 'SPpF7Z%x;!hwE[Tq5cyp"؈ $TE2fU%h8uc\@oh23& 6AɊjnn!n a21H,|9\bl*BwYd+Sd1 18LȁPb@AX*4AMGy(?X(^XV'Ep)V9Xv* >\d̽y.B]#Kaeytp(<:BxݛcK+C&DQQNqdb(e$87 O1>ce0GzЊ1gqD<HlB^+V`K*GmHUeh;MW7Xa1o } &.&F*vRx%U2- ŇM{( fBdzp̓"&H,ہ Д!7&Q$q#)aM&_9&"i!(R'dؚxibg%/TJ!WYbUvʼn8:ú x\ n@[E%@{"\VU3!ƎQ A-1:" 3z ۩\CȲHhR^s&uo1w}OYFʌ\# ilr\;T0kϜ;+V~_Vq/CF(g!`׼Ǎd],r.Zͺ+zJ)iN}j̝i<)Ԍu+L9u=NrBel⩱gѝYPd8+Lqآq L;,>-$Q֒Vn\LyiN̖42yMO:Qs:?uMA>6|)qw*/,9kBB. AȰ^Hzt0I7M-B ':`DN "3oLwxMuU|Sih=H]b9/p< Yi-<8\ ,Ͽ-&O7z "G .il:m6y _C mɁW?a{:2[ЮOEv*Cv endstream endobj 206 0 obj << /Type /ObjStm /N 100 /First 896 /Length 2719 /Filter /FlateDecode >> stream xڽ[ђS7}W1yYRK-J!KvvR<e) sd0<|/VtK3U#LhTx >b7[]bBHT3Ɔ#D3e0X @$(2(ed% 'LQrJ;"MS1Q=;W3u&bƗQ)LTL j> RQclR ʵ? S^jTQC Fs+bwy%eRÛ8e7Vr2˷o˷͇ջzww3yh6x͟l;~MMl6lKګ/?n>/r fŷp8s5뛫Wucj}~\nڮM?ne3?|  XatZε4eY>_K驙X}ܙO@|WYY Dl.Xmm6v?!}{./vyˋ]^b./uyK]^R.O<.O<.O[]{lf{ڶ%qmo\%V?up_ZJd&cs~lq(T-|E#AZ%KZjaQx1Q RPhB&PC VTٖ' #mF0<(&" S:00FQӑxx<)JC`= zX<2O.aIxxbC3cGqb^90c6?Ozbp|dAy,Y-&#GʔQ 'xHe`m `'8ɺ(AKpT㏈qiTֶY0+ЖF*I +#'=)!M[L: F: 'Z Eb;~¹7քS+Ԓİå?Ɨy.,z| xSJ3?G5G'<ъ.[89@sڹKܥҾ ަjosoKoq G-P5 RmUK.Y,/w+ۮ"}MQ膪m7ۀ#{j j{i2[:$ ~6wrB=0BeaY <\n=]-޿_]97Eȴ+4onbDa^,|E؜K"pXҖfwf=e 혖P%2 b~wwnX_[zn _l2y+$,+FAM;1x[ÞI"-3[ddF ,QU?`NʣYa%: I bj$]4HhL=Iu@xytn~O0k4t!h)M|miJ^ ^NBWӫ>O uddXR'c{s`Y0(3[,FrnAqHVr{EBpx <ئnIm(uXxs^pMAz`Qx$l7P endstream endobj 347 0 obj << /Length 3604 /Filter /FlateDecode >> stream xM~ ،DԜ6/}K$ ,qm%hHl~}g8Diě ړ!E{wo"oE~G,.,NŜEfAwN~\1,gʢñ7NHKV!pAI6+4e8bj t%ފ`7z7TN驡q*ԙp\88D!*Y*/Nf|>[Wԅ{stj?՝iQWEWO7I O_~?}o>OW<( m FPvu9ÚK!،Q R %_CkinQylJ gT8 ~w*K]HƤb\ݑ>̲Qes@ )hlf a:%P:s8˦=W 5yxfRl2si;]@i3EwK=vbkQrk%P""+GA)WHTΉ=#.N2v%9wMq?΃7.8wI/5 v}#ptܽguEݺUZ7ZW…-l]!4kP A`'.OJb¯o%up餘l LPG~ t刿\6=yC%R<3Z)Iox2鳇7M endstream endobj 355 0 obj << /Length 3457 /Filter /FlateDecode >> stream xڭZ[~?b_z5#(hS$h.ЇU,9p䥼>۾ؼs8BE77"ߩFF ysp7T"77u|ۭ*dp*tT~4{MTu*z*,囲3Z="]>PcsYMJ{}ŜzIRsJdtj?e 0fX$Vjv94AU5M4ZkȒS7i HlFfULӏw R"/[nj#Bs2>W}t%ۍ_cM}{Hb$}D^dBSDp{uPH@Y5DI}> `AYOҖl&Y/n'e?uU3PنwhUG*6X$G&Jy{Lt:ӌ T@{*Vov{*~b4p}:Y13bеqOfV}{`*V$7AatՁaaxkC(AΒM8}:%kKI_j 纚dz____^ZpPZQMUDQ_ 瑇tKtAz8~'rL-rqsy @ ꄮ^^U@2!Bċ#N*2 {4qr+= Aˢ<8!g#?6zvDl(_r W@$ WyLֽ(L.\ǘ\h?nU>]AW"/ ?| D&.b[5#"WQAt2ʄ0G" ҢP@)R N8pTO4}aptv&ld!Tƥ3{l[+?EuMS!? SEb.8Qh@;_ jnyTL 6m֦lY;C-wk*P$kƠbMhU_4 CY)'G=˄P6WgSe9}9&ϵCEc%Nkuzuꚺ$*չ̭9pLʠ/- fAP{pTp-wR-x4lJsZ6t{swqRKӰ3!țsX0L&GO7?9mGKOƉͧ2`\'w3=cuURVrZL:dMYyMQ7wA7{q/>!Sʞue!s$[؛mCW?kYdi֨< ?Vj M;PښXܘ?P,]Tзscmw ނ6QRrDLZ.N'qFN,N;ePɕ>+Qgh{L 0bC1r{ 4{~S%\oKA*..CHhKV_nv:@]yT b~T✵߇S[PJ=֘O_C{,@[ֈvMo ^6JqFoxL~ fiH$ ~cqnm%GؠH! 'm N[ޣ@3t-!}g 2͟z=lX ,놝Ζ`-[yNNiN]VET5k 4 '=g'niq뛞Û/y#DtU`;{K]E! {C[n@zcs3T%,;VouO?Mc갟Χ8?vWػP e\Ή('5C2JRRN*]sHlOʁP~ܤ_ֻZyRP+ x0rρz~5PsWjx7<굷SONOXÏ$X;Ia2<'{Nk,vv8ma;|Ӡ;5xͤGbc5^&3-LKL֙CF/ø`ؠ?=u6>>lk}T$iD$&2x魶a!(5Ũk׎GIϛ2S*\j|Q}.TVlί)Hq+)>mF.((]RcWӭFyF = L`ok AI_Z4.DW+,XIRc4MRyq6!lll芟vMxw418d> G^@/pG4~tWYuej:Jgo< |EU‹l|l$=F.}ҥ`]t{'UI9CW mIGkes@&.z(O0[cleTC$|Dgx> stream xr]_1/jxmlGʕXUyX\ iϐ<,b{€fyA݊OБtYiT-;R΢%){&s(ޫ;n iZw;/c(8h4-Qۀz$H[s ?X% QքᠹsGOsF菑3Pn\?Ue{A wP<'rZly.2̛$pT6 /JE/J/OgAĎI {G+GB1\=SWsA@Kש'1B" . Ǥz[wF;zJ$vsq˓>(;UÃhAdZ(Ro/`%ۡ@!nP~]H27ˣ[ I:s!%A< pl6e:DtterQg'SNt%g ˹>h2Yq,?Ijd>-ڸ4ktoS N1fJgYւ  ,9{r)Ybf -}9*!lg2jf eRvFc1 s)4Gh^}r)lF/Psؿp{a { uXI:Iܯ$ "S#+]WoX7,ɤۇM6խy3n4S$ېpt͍% b=;RJS?Y:$r# kU]r)1$s"Fxl`84I@p >AgަUڪl=,⮴R*+iQsiwE HsL3EkآYii:&X:۪}nQmZf 5Іx}F 5l?mIqk־7UfK~?<8 (0Oo}>l`M 췳K"t_u(ZM6vQIPGZŴn222FYUY+f$ e reL[΃yKkyW jӢj t> bgyMRhW-M"KUOyQmEF4W1Ƭpuer!.ыFjuL%m|,N -'3aV 5yv㹤0a3J=-OIA319 cg4M 2fe7yS0bmoM/A]]W]Z )MW/|Ɍχ)!ј^"PL&.4'}Dh 앪2o^t:[vӳW,*^-);GF۪WbNf>%~fa{/xnը]J$ :a !2 ,Ap>;7^ eІ.$7Zf-p=ݾ-q =,bZ 첰_bj#/m]:ʮjʜTi1`Kq `yjqq4YkF}:@؞)1 &{03(AXpw RG"&I2p {% > stream xkoryKҢ% $( CV J[I{[wCeEz_V\>3y~o~7o}zcm.X,77͏b58a(C9][Xl57LKEo]Y԰lچ:;wmw*e6)F6oV!F@)1_l4Kv٩ڹI%O"'aq5mzhwڑ4<4cY qZĜif^KߤLcH=-ݖE{ps;n\G ;Q>a.YF_vzRDŽt*ex#Cm1 ؃ce):Cۿ+y'v@i dbS?J\jQ9C5{Fs1.*k$,jIeˢ)E1?M et8CuB˳WQ&fxָ_uEHieT9T0PlܢFX,yQJ5lP%sI K'rfrAZ$GK,T=}˻Hd v+7/ڣ仜V'Z-}&Vp!T-4Io#[J&3bl9~-jfd$Ls9%D⺆& 2h 9d攐m|ɵqytC|¤VE1%##ȥ߻s >g,.dD  C02YQ**4¯7™K!FsI]pQ>#ұ~^M"RFM$d5YB&5^&簚@]R3nY{}ՠ);0=6FH8/A+:;18k`ʨ Ή}g:PL'NWyBB9|p@D2'gΡ*NfjH=OΤ%.$L/b;s tɜ}|Dv c~ \O *V )XW.zհ\+ѿ6`z\tJl3o)3]+_bf()?*5su#Tkƕ^%K8g|Jpm7`L<}Ȅ&alWug`,Xy=@]a#m5:Z L^I2*/;LmT+"KԲq_("F3Jh6+j)m%x۶vCU`o0͛潷wH*v9x&p^CwBÅ3N VOs$4.NFed3v+w{J t [e0Hc9dЫ[-A]O1r;XHĺmN0ʉAխϻ˿Uwt,}yA; GMi]8K0y, 2Toi^(#j0a*. Xl,ݭ=5Ԯ\x4nۢQƃl=SOO~G״ `#"0`v#AeM KqR ,UB!]̸;uy7.?M 򷢻A!OUT\ =MMt [ɘ6/ G kk>٨)yEsk c>Fq C>2^LL"4 mq)?G[bĕϕ'G9)\: `"!N5'd.s &$.A @+E5ԏ_М B7[hJVzP -,UmqX;ӡ S ڝAjmOڀ?]f*PMT꙼8Uy؞fT% Bl ٌėڐp Ag)aj8^bYU_nDj3} oh#W+gBAiG W b琳x<6txV>&/lswG(@Č& rA[B'3ʸ=4Xyl[x!z:*9,AHLc25E' &z5}x@Е tK3PIqB,딞y)ZDO< 2R0IhD~D 25*S{!cG_YkRXb]+?`ɼv6~:AFnɾ=00*\pdxxa G$8 $k0iFIJD"B,Ϳ endstream endobj 382 0 obj << /Length 3538 /Filter /FlateDecode >> stream xn_<M}cl`À# ^s ٵ||y''Ů꺫/__}+".*1WT"Ww"#H72omJ{ daD$1 {N v`7GBoMU5GgLi[tHMk{NmHŒF"WSnHG'krKb=x,Z㉁€+?#'ZJiĴ8䩒k,74\;onޚ =9K|r1v5)L/kd9U+ Xo=:e\ٔ =xV9?=#Q  ď=%\+ЮhjC<吕i}!ppCP`WlOpE7!4F){"[n$ ӫoZpُs-^n?pUVR̗^L}h7(jhb"Akkj?+;b-Vu/W)*|qdtM5slV"Ѭ%T`O^at7|HPkjɫAOcb?݃e?^)5`!roNmHJo-Pb~qgWfXeIGW#Ady"¼V`֝*sm)0ϋ r n8Қ͑Wsⓗ=<8 `i}B6Á@\?hI.}nL\\ M!2bܜF5{n4\֧s?#["y*5[V(7:5Mhxe"/B:6>dQ|RJ2'II95Rz]AσZBBIr0(O <zF ELPAQ-e+kkU=A^r&,tE"]Jt w"MT´b@}f2Ⱌ(Y2͹0 8n~%4D&L&Ŷ6l߱y>'ѳRs7s7nrgY|#4酋`ddk ~r(xaz5 ȁ/م\I}/Z+4`,Ql]$O;w"f Pˆ he@=? Pf3A%Lsoh" J7K0! 8@GpݠW\un |i\Ksբу@«ǿea /w=33 %d YBW˿T|wQ/thmTK d eM`]+5.*@z51˸#3V]Δ\v ,#0{[3βKG[;FoHUv=r*z~ZUP}Ŕ$Zp5r }߿Eͩ'Kx(-TqaIeFp/a%U  nݼ*6sؘ(K$::X' MO\Ceo?Y総4NxCc*L㱼k5P# ueDi{Q>T՞ 4M>y{N_%qe@|YԞH I?96؛#4,rG?\A-ԻnOWQ+"Dqh^+ڛL!& ܍O: *#*`X'aRk`$$=aHA ϕ{oMf厍YG )D4㥗?//<1Ű]M֐'!H>\*j,K`h6p=›>@]7Tz\d9^h[iڸД6bkaȂ{ "Tya`J8100=LȜdXԩ4^^3wf<= EGPв4͉ޞSr0-MMixȊ׋h5Il7@ŪOb|Qe)%w)XGGgko^uXYƆbrn.=Hc |>V{՜^q*>߯$lo i!r@ؿ*TbI짷w_ҮZ endstream endobj 401 0 obj << /Length 3430 /Filter /FlateDecode >> stream xڭ]~>$eDRG`W4+ Mʒ#w~9CiȋMp8Tr|&=oxʤ7䜳B76z{+}Ꟈj@Uq[ܗOۿ>wioĕ`9>_OMƙ!Id WnD%DpyӛӘƟx34EDz Go~5 g&z붩[W+/[3ۮCKR{.J&U_!v<0ULYŸ4&>`Jb:71(`\%_#"w&FWTt-$c,C#IɲR #) 0$?.us\kK~<#xb&N.SD`%l aĞ;lVwݠN\~׃CdEo^7s8^98'ΦnZ}<}C}Վh7,?@rTJf Ib& Vfi|q)KAijնX"t3-" AI"@"۵Fwhi, 6hfx.$i֛2<쟺fθFcQқ; ۡݼЊzAYzKvuO9AFRV%U2b4-JBx*woqqQB #@PkL=CsmhX!@l~8Z1-!7փ8 3 W 2A|m8fÊJ׮ڱw8Ӯ^RK?ᄾC0Mu].޵3wad]Q섓X{V۪n0 E; 9-_0K.1Wg'3WpvIKpbӪ]6:$"XW\ԁj=,p{[ $xgDҜX/=M=d@ tFeR1n JR–\N 8}E*Їcq' m 6S0pzv 3BDbMÜT^`u̹9+ddF1\3Wb` q K )}ՠ v!k:ѦݱQg};\V <~,4[uns< mulnw1z9^ФQSV '$w ;? Lg5'R$9T(IrEX)! ুOԱfd/$$ZSTp3ZzN[IӜvSU }P GY3r,241ye4CknxL*}1 mefjRќ4| [ŝ'iƗގ1<"cZb#~EQBBiFIr|`l4 M @B`\E,ї3B*b@a`)Y/EAFz:yaK /}g2g0^g]`kڨۦڀ LfȰ@!`[슭C#! HP pV)HX\C,0dz}d/,2_ Bl0͢ ,p !3WIn4~а<:Mrv΅z蚏um5'Z੬#ay]c xIjX}@4 {J0%&eث^#^ 0cdO$8 z.AC$KÄXX )D= ;hҢ'=~Җs]׏;٭*g,^X:x)YVs~223̅ʌ"TгV](IR#\Q-J$ad, D~i {$K L.FF̘<.*O>.9ș(YL},O;(R.jW&zվzAT-?0V!`ms悗8o\RMor~ӽCΗbu%I᳜!B?u6+"E]W]xa-W?{}v,.TPũXH.&0m+`5]XAYco C)ͦjh+LR l2M=խyʧ)|R-%QT=}n+tzᒒ3Fsx 1ZM,O'=O\ B\3&%ʥP.[X(9F6Ilz|*p|5-۬NJ`I-Tь\ZrMՙ f%Y۷(IW&2 S}!X!սk~H3M(V[!w ӼEi+yћxdS>X^7e>APAlJ2T8Z@=Tk2Jl>l!e(Ӭ[W|c{?˗;DկTK&,B?C$ jp/2@1M{>}oSϻOy~6и7]YQ0+SW*U|q(JYMP$+I"n?.y endstream endobj 412 0 obj << /Length 3372 /Filter /FlateDecode >> stream xڝ]ܶݿ^{H}&&.PW˻ε|tO"p8o*yn~x='FGJ}swU&P7M= F:S&KzZ`,ѥ&rqqPw?Ƿxw?nsUvRh0 3~QՠRMGc ݯ8ѓːMW=JO,_g|0}s^ Zl2-ؙD< \lrdhdч Yh55/gAq% [1E PK#|PVߣ|H xwߪ6GdN9dIHfWA-XX,rIgȟگ0"Y8"1,h)XA2s#aJU&*Pը3Vm͎60)؆HZD#oeEZ{9@ECAЂ0Hx ByG_UXtfB;yw滰}]`#-11Uk7xtkJ\d\x,eywK_/$GNlT4]$(r+@D#{U7smY.(FlN'Z$ZEŞGsA.:5tK!Tde slW_,Xp qiR%:[2a/$S.b; L;TlΖx)KI/KHpLO1w秥V~I+FD,` Kjd\*_bAN"C 6k['K[ cir(2K۴g(c@ߣ$uLj5>Ǽ2UDqTJ5܈95xcۇ}TJmO|484:E3L"t ԻG<'La8}w9Fr\/ V0JK@c+OX衉f[g=ut[Cb*K~z4S78_ĺ %P蘘 3'#tZUW!zA ='/TKhq?nJo4 Ԛ$f%`L>o,R R^U/<ق2A|5ml:6j ,G$Wci=4SdYoL$G Z T`c*Kp̞+kP[ߧP 7 w( *H^ L`lLsmUW_0 endstream endobj 423 0 obj << /Length 2907 /Filter /FlateDecode >> stream xZY~_/lfS,0 f6d'I ȶ-ĖI$=U,RWGX,bnoo_EW}' XF ɛ3|ɣ(}h>oWQϗBG˛L͒LKE3/Up,٧HG6kV4 n]&E,ڢ*eU)v=?65؛&ﳲ?1 i*Y,s)pd_C+YuҚ2X\Lꉊx!msg+tXw&<"\ RǖKa{Xc̈́#"|Tzh,).'0ᘧW,՛K(LJ-@56= l{)bs2O=Iv9:)`kD8O>-~IEQӓDDwFU.}6ePi,k óuzj*1ϤL2)CX_eG^ 26q15m~yˣZtq>-y)hS&8Y k g?ZC0A ~ Ō'egXvÆߢƥW_N$Su7 S\r\P/ %לM~ 9;\f}2is{AMm*M'M`1eR }SܗԺV7/c7~Ƭ= 9DN㪥bqpn+bP63gha| L2k:CR#ק_%,5߅@t&fW!]a)!Lz1.5]KulxCu4R7>w}Ɨni]\ /3׵{y~O"ߍI&X݆nC2r,B/Ox嘛I^V|v9 M}Lxht2]AѵmWpS[J( 9>BA/d| 6eq%p> Sv bam$yd$rlob\{(,'}f$MO0ۛ)Drn|^Ǔ< 5q^+tzV5vPxw zv7縲[ lq档ß ź6&|Qa H|TtJDJ1J..}fAi lxV+s>PxѴnfQȺFpbi3 oӕPz@e޴h.4rw}n8hǦ~3Pk@b6h˒a`v.1-/}q"v=aC] ΦRn^ĄM+ JG,cy||D?QM~A_ f&?(n+'.zݓE8(Y‘x4קP#g9cB8UB'ˇ7qD;l> stream xZ[۶~_>xF%7Iu$N$J1E*$6!kL/K8<88@K' 킞x+'lr@s 'd|@N\ `f>CR:-x@Cli_Y0M*|[l [N}oG*MPDnj1@ 8Omϼj j+"훇Y64jBԞS#^W1iZ4(cI[.~NV0ɽMd$ t哛c` ys6 =_eC5c4EpS*BwrޜR.NgC4Y*`9K`Wi]C|.a=.I Ġ2#fVZSp:'UK㒴yw`L dMkI.'^iҲȊIe:W6 5>|T.4H;U#H7۴\s_tuqV3ll"Z d+O!٤t;."qv_$H1 Ts-AbΊfԏK_R5rп|BnQ"p n&w:RbzDgї,bhp*ņD9zv]: \ ުGIL' ڊ+҈VQ_;N/< θ)mpʼnA\551cGv)i+8Ӂ%#mZQRҢG[P,1ip|gie3LSr5[H rT=D4#~ ,oXŦ1yxq4'sHe+a.-D.)V>nuG9@TV5oق,$(Sr辰|% VqZ&:ɪAuO&*hc ;1mƦ&u$5>ϛ.,|e.bym~ǐVF)b3b*SA 53[?a!gE(<4lP@#Qy(s`YLC2֊hRKB5V tx|+l)Ow:|_cX0(~hkZ#A74b|t8}WV TR0f|EaI2myV-i2dڄkYCO:_>(׏?*9ϋQzF>I ~\N?!1A*Xs f yRYqU >j'N'{~hkNJMT8lZ,}rT(AG)i, m2X#`vMIwJ+.fzK4da%Q+ewQup AL'B81Ɋ uOXU3qgϡ*48ҢʁJTW 4]#mp0`e!i^;ssYDӏ=}%ŅⰻԚ@)=1m5ZHu 4"Jk$Is/9\pRچjuXO|hz.'Uk*޲ܥ'FA( -d M_<+t5Z Gڰvxo p 5W>D5A&.pP)LZ 0e¡X.kե(iO0ǩٲgM:AZ@|n}mnW/@ٲɱ~ꗟh,GHm'_y¢vZH֬x 2'TY[o11eOL_Uзuۯч__x_ umblӒ} :e1}H-QDnX 1E#!]w W]Y۟ (no~sQݩ2 lIeD2~AS=VjTGDX*{Q>Z_?"P) m*xcN/&_/5aǨajp3{#Ik5#]ӏu.M1_h9Wo${͌n`p4\cH^aҁfÂf,{Ć]?k^#J:!2CK*e@XI?8}Zj|9-,/4!ɪQkĩ0ObrSt݃1QBp,b'tkt1H]O"&=dd+$JN65*W*tgkZ ܁>0fR0;K @P qwSX](mw'|7Y*$ %G15娻yh&c:} dR Տ mH"L#)s|vI?tB))$ಿ{ZN) 78Tw9Ye>;|OOqu{o66 endstream endobj 336 0 obj << /Type /ObjStm /N 100 /First 874 /Length 2266 /Filter /FlateDecode >> stream xZo~M8I\Zȃ\.΀VNҞMސ;7Ȭ=_NhA[;Z&I.`&YeS̄rB ;C.g6IA5M|V#U&S㧖^iA{|*Nf5S'`*;~S|qf%S#y;L뜣K$LS KB˯@е̙Ac37 V*\K0à [$\z+4vh_V\ԸA ,YZf\\ >Sp];\%Nvz 55x|SC1$:;R1|e)Zb>`tJm3q;:I M%Zh]0|C (kfa6wvJh5; ƴ餆EA1/Ʈr[Cw ;jɍvt*FwΈե'ZVϞ֯&V?j8[}kV6ovuGEޘń/', &<{a6?ݼ#nb+ a"yW߽i$ ]ل}ym8v_.w Ëj7Bk0Pq"T|*$;A|C7\<֯6vQ//ݬns*S~7o67^so q*|k=ݰY7*m!y-ܷml۷2:ylk9`c:Mb `bz1ݯ2ylhF5r5l[UL,DžaN%ra였r.IK) XiͫE4XZ;(R$&M鮊F9;Q-Njtdžr&*)jac,)~d*'˱/Bl"R}0@3eb;" b~  tR' ] "/R&^d_&.eQʑ($ǑK=t6:2:2:K:3LxMDT'Ι f!6 Ejc cu>FYe9),5!`?qrDN"81awPMTy>BSVPH$cb*@E [z?__o`E L"`˹h(#2k8]PO9)>!KpVtZ%Z`4OpHgugD['N{ R'WwT W9p*}L-&!?Q'z$0QˡW%Agd,xp;r,Jf{},mlGLwqԂBy+b~Z }ˋ1|i; m8y*T#*DzBjp0Cs"!,'A2f^ m#SkNdcAEa +%VNl6 FʓnrLI٪5M5])*N%G";(C_@_~ːWDcc(%kȾ= h , [X:U`7(әnA8㖗[`?XV9,f ~6Ԝ1l<.y:de,Nӱhdʯawx;#O厉?XsX endstream endobj 443 0 obj << /Length 3517 /Filter /FlateDecode >> stream x[ݓ#7PxiCpKX.I]LARG^ 3ffV|kCqKjZݿni_|秛W?B^gb/ˢhb/LseTw_~*3=h|׵;MMkWD~<;7-uC,'jj#QPS<靁1N*vEKi=ךx0ϛ-BIEEaF4:eUagcڳݬSq陫zj[hᔪ=]-Zip2|k%&7){xwD 㫘?gAL6 Qh?ybb"S`P迹_1bj6xV5F_@>7,/XkQXS!xHFV$֧B4˂!L#Ki\x}BYąov^߫@}(ξ-61jr0{q~)z&hْǩvI*5#o_Q:>Υ>LF&OTX؟̉哱C <;C/qm)kmEhX=wYHʀ wQe@vi-z;ץ5_x75o ?f R8 +& %Fg#qy=Z0ELڛ= ^@߰AxrǞL*D4t `.Ơ[RA֘X']#یM:Xڜ6s)v-Z`,̕b:@Jj[!fZ3Ww)ٴo\+Lf~va0tʼnjZsG['Ilj ٰE-mb yd'J*[Y5IEoN ScBHAA"#?w!"ۓKEx5̇^Q.D|r]iuܡ> &~ᄎop yCr&#a+'h@o!SE+3@|Y,^֋4^Zg5K" Kx"m 1f< L!\ )_WwD$"Z,ju0QW 5BݘDyA&<ϵz]2:r9#uк3S];RdrYPY=`\Ϥ m B >)KW+?h [sl[WtxVGV/@,ʁ+hDH?OU|H']cq~8ݳnkIĆC|}fBk I0lbk`p02}> Ne^<-MeI~H`<[Sqp0"F Q#q tá)i?Y$ιnhk;;Qu^ w9M@{ujQ<ꬂ~;iζ= 1~KĵGHVNcjٲsrm~E܇Ȥu|Ψcb%ÅN kFhx O{pCЭ; /=1K!Zx*yf2褐 "8I^μd8a-¡|;y87{HL䪩[aVH-iA HmJ!_*ޮ$ 56mzl+ЂΧ9[ QϧM&H!U<[Q 4jSv&m7|&T(еwGXgXgGd Ip f[ѥ~ߞ5FAH* dZ›վ)zI"[K8VoǦGz9Gp}#Dx0ckgZ[—Qm) b<>nlZaN$axb4=vUQ΍Bk~͋`Rs%!Q3[[D<#9szÍgȟYm@5߂۠ [:ފh߾L:.gQVk<[~"9qjʍyڝqw"|*G:gA`,)c |v^#ES]vE €Dj,!LC@M/jt[Քvy8Zi(^,ky[}b Ef?b,bJD)VdA{.k 5+)o"=pV(v!d; oXy"T!1bz>=9BjpC9,ͧ'f/h EQP ѰH*aܿOg4!Kv Th%t/KYNB`Xa:ww,p.N8L&XX'_VEc!Ŧ̑eT<% 3%1p‡EluTD 0 +zjl0A7x+ãMI_6yKh'{$E 9P5]1uU-jqiQ6>_÷_o~=E7#X; Nz" Mb+@Vًآ ԓ I1/wJk{ e>ߑ{'M&u|:L*8cD{q> stream xZKϯ-0lsb6⃓xaPRψE$.Rd]n6=EFwO|IzER(I&h%2S'M650d鷳t`8 #pe]ٞ@Cr ᠡ# !\g/ATpͦ:`Q}gGUv@B+. d/>({eD0&d%H5?it> >y>?@.0Dq~EZr)_^'ғ?:HQo dZ&VAϑE.g𢌊y\̡hWLEo?_©*q -z,ʁqhW"lR7Onܜ=X6Y 05m{Ęx>DQ†: ޴lL⽇Mt\?Q4"C3QgݚܗJI/mAJM=eZk?C(/N c"X|h\WvDc?~3M1~,ꎃL>J[kNxۯD@Q M4󥫀~%P(gÏ&ѲYΉPKX^Rׁ>,G7X^KD5(Ij9ZM k%oIif"#9VPf>eX4`(# VoP)#Z<.Y)U =]q/ڇ1gC ]_ۢET庵{d`P)ŝ۲\*DXKE tJ/5ݒlC7*?v:snWkgxܴ~MXyqfȾ`yS`'~W 8i5tw5'(lMTڸ5w `A;VS?N;UiTć}qn~>;*[V^K\ rį.74ZM-rlkM %ӫajhFN@ iG`zgޛ cOۡ*ߕl#;l2Ai+ 0\CUi{-@;Wn}0/v8w_vDFڕi编JAyL^e6oLGy`ڲ?恷P ?1&9 Nd;[ endstream endobj 462 0 obj << /Length 3087 /Filter /FlateDecode >> stream xَF}B/Jİ<dx}@Q=aTxV6[We]]]U]]W;w_ݕrMayN%@ WWm~ڔx:=}bw}k=s7ᴓ; NM"Zd;n-CK4M^K`O{DVyNd_-wH{Dquhez,B ߢ0oSNN[y~[ eyȢyZ'^a]fK:<%JDtǀ1~'XQ@M!\|k 9 =,rѠ_m=N]|Uv{:hp P?8x/`Y37*ۤEo-#igV[T/9׵9t dUuutT‚KOi &^1\]6]f4$/lq#i^_ Ϝy2nnOb":\_u;t] ?yprѺ* Hs0MV痖IvmuN۪ mljO1\r9Ʌ3nao*}o|l+IDI0tBP5I^RJ)A<{Q_Uveښ n4QoZ4eICuʱ+d<ѧKabs3r^C= Z(V(![8)-yWhiApXy-8x^M LYyʭaE%/vixjg r Kⰷ0^硰0 $n\僮?K^WjНhMtbZW O gI$qLtDӁKyٴ&=l%<ʹ*+J3.q0v(uok'q 7DsYVt`#L1]|4-f#՚:Mv;ez46ANqh,%)uV-e2|ZTt™h)bG^@{JQRp kYwYO[wG? \TED#--#RZ0pꀘ)EHlt0χ<3[IJ}$wi#7ZwWfS^KJuk~ꥮLj乪'wwQ1$#}6)eX.簃 #_ Mi (ݭ=0ᙲ6~h+&O-gdTe&d8OO\ºk1G]g(z.Ã5Y+`8>܇lbU ,g/rb},#,ԏ!̈n<Wܕix-u)/HD:"gz](%iwT(K=EIAFx !n+Rȥmb-]Xyp %%'GdFX*ri;uZZ)ux/B 6oTdqB*0`' ocHW%/T]Ϳr^>TZH6,g;I0q{÷?:N|/ĉc[EJBHj> stream xZ[s~`*X 'wd&gAGmR<$$Cd/9 )`,>,v]O+a' ^Jxa9+kVͿ p7u/6ӿ0t)dx #y =zWijNSpd*y%p??|iċ"MlK+9n_n>~{ ֤z~ w?mۛgB``XZstq%8 +N.tX.G/IkbϡoEpUU*N 5w$=Kf+i-)aFC@}L*"Wg*o.3ɨ} ˜Ux262WY9/W% ,g@6)+CmY=%YR7O4)Z930P8>baBٮ;j;=_G|ٝW/wcS@B5 ,7~p)廦oݾ.5韛8ΩzoZ)-ϋHȄ䘡߹g8sm^gjQQsS֢4+*GnwjGLP"BVF&KƊx jfM4F-aY*x5i3eO˅x>K3ED.SMn9 cB}skS^/ڲOG?C7H Ox<)vt{jy"&Ohyزww?mW-Lkh>m%?<mQ)8x| ~O?ö0k Mkd?DnwgmVw!N kp%?[fB P e|3ZYw[Դ oDq\74omcM-]2rtgXeUHgAkrNٚ-jܒ ܼ}E+u9>En.RD7\n2i[ !:`C00Fa1R.D_df3#9>}B9t] ś\^O 쎓e 2߻tO1YX1?ߟ$xm&~6OmqGN;H ; lq` 6bHm1s%nga4-16Ѻ߸?rN:Wutij@Z# ~y49 -KMG=nՇ}A/7D~073h5}ZUdL|x&D٠h;KK6/tFEWmtb~.,ֵLYpY \?4xi<>4@6$J znh`3Pb 63pr9V k w|Rj\zuDXlp<%uA*D"z~p0a8GQc:x0S dIGGx2WwQ2w،:̥=<χ|yI~1Pſe$%"=#60~t|eT`*}u{?T>7dBϣ_fYʊѳ:i/ݘkph`4;2[͆ݓ36ǀn a^z,nTwB<˗zu׬\5\ZW͸ֆ^Vˑ} don0 M9 N/MC'ɦ5ùַ廬KdӋXHEikx^Y4 ?WR I^9^`r!D40O endstream endobj 488 0 obj << /Length 2769 /Filter /FlateDecode >> stream xY[o~`jքs%}(Rs)i!,hٕDE⸿HI98Eވ3s9ߜ+&I3,Eg` D@ڇUKmY:'aq4DQ13X JSAL88L<! T@΁1$a@Qe(,3hIHɓD1S)6)1Rx@XH8{T *91֓s迤LH$5k[hz>_LhNc|@ #xdJHU>g24#P>g+x>y'o Aͮ\/ ʶF0  w7SEh *,YTPnm5-cQ UFa91;0 3{ќ'` Am<٤Zrd^1 D4D ̸ &CQ@)I4o :堅20K 2o֎Z}qh֭C`)ڮ뛹hӿM[ {)7q(U.N;r`n=3 7PHyc\c*/Ůjz(QLx]ކj7]BtO0Xg*{}{ }̩-J(1tƗ0u$iv,BMO2^[ȥb!_"[O~ĉRL#pH9aQzgCK?pM\>a$My[lf_[Q9hYNbߔn*i(n\x5]uNMS]oF7+{Ns+;/6~rotAȼ#pjNf6=+)ۍo],,37şUXt՜ڎva&ua/`]׃0ܸxG4a}Ꮹ7sqzg?̫u(OKs@3f]g.ah'h{t4k 5Nq/B~E +6ʵr떸u[3m-/F2`s),juW Cu{n+qW4Xv#jc ^Hylf]Ī q^}X7#E_wnTsI>/$3qI]3غr܆s. HʲGo*n!MI3<}YEaPu=HeE^k67‚Q-qfdCVv00zDez$k R/GQDOQ7\q7F\DLx 396!n3PLX֘!>mY0|Mk+:85`U6@yyLsutV6pOVƕn޺]j|9 Ǥm='0ozp˹;Ei;Bt˱jɦ"V4k&+U_0HW(N2+5O}QM'`4f߱Aц }fs44f́rҵ1g #~ t_TF_BGMsGIv^zR@@6gZ}5A#OݙW/I[FʥFgU+ޔn9h nlr'Q˗V錳85ecW%>bN8beiW瘵y5B[3߃+7`aꏜE "xKN Aw!ɳyFdN{G,_vXAtզcDq㥵dz ֨sPeɸ>8⸞I+_v0WpV]oeV4耡dhjU]u]=0e^ߔ~h9]7YJ.> stream xڝْ6_1yZM&@ʋ+[o%y؝VHeTHʓٯ߾~_ou~⨌Kuu{ҨLӫ\(NnWo~77m7j?^5z@&inw7koڸByv $cB*Tݠ=`_atZu. ߦL{uF(rWSˠ+nQQ97ΠD)s/vmii}AtEмtF,i$3nX! +:5Z8gݟl[0!Dx[_C,J&q\m.6U$n jL+>F窗Vw/dk"߯5 _doglW|͊gGH}3a俢!7Z;6$Q xKx0>]{٪8^`j0h  .{0yDZ]ڑ)Qc YRrr#@K2mOE?6qkh8“+_=gOTbO&>=듐1%me`ŸvgGb(>($=11eA96 ]7W@C|]\\FN 8k;vV@dhf32fZ)S(+^d(T6Ptn>+=т*2uqgBYQ2Ux:1!^V:OAw nX8y~yN޽"-Mu8ZCeQ N/rW=Їaxuh$\K IX96̸ʲ.}+AdOl 8*@p n//3ɫ, eF0wUAX8LWLt"\1$ꋴdAmXBr5;GO[pxk5j83 )CN dξN}fVe,rE,W;tG<&I1~{)?__pf˥F+b4h=wsHzjgz7wLnV"=U$*'RlFٔ]M-.NUj"j-A-vA0_$ =(V/ĹIk!q.|YZ R@ iEܞ3p\hu޶0c-f̼7;@"e\UG` !h5lg6@ϳPJ~f1l?% 3t`ąKb#>a,l`~c\c;iPI RDj RYefw *'TkXtH⊖LYk\;oAk6_~_/ֶNr@ dwK#fRMͺ&xb{AF̫=r#CDE &o B, @޼F%(~wL-͎h읕*8Rx&9wboy>]-܎< y]-]Q&N183.Sdpl] :w{еgzX3:Icc+J@/U{!6CCkpC#+0wi&O*M9Q3)f_궱=u=OYO7R/#+p>d\YrT3]gp7JkN""Q0|%&co@Q$aOZē'j.jpYSt)ӚH/Dq.B݇{P^if28ȌtboP.d#$lj 8:m0ňBhH] 8$.)ޓ3KY`mCx @]FOLgiOWO.pa(x咃1~>ƢTI61&Ĩ$ܣ ;sE'kbWvXiWIdb.H|K9Uņݍ'qVb w)I㤱n[smHv|S4?Lee n;4$ THKdQr}8!*r! :Gt_}|=CCw(-GZp߄x2D:r gLj?bCLpET^^bw5Rf!z'+(o]u:>V/\:Zm&*XWJMPIJ#JRr 1`ha|sOCEZ{P^Ǔ5r+I 9xnkkISehd# *eǥZGK󅧋Z3B\㜟$_i%;g3Pޫg[:aUL`*GU^S$ G@-%RbPil6fB/.Ʊ;*r!fdK2$"B$e(O̪A'2I5ԔaC5qz ]H#ʉIkMCv=kI2|A{'B^{!0f=vCwn㲊nEiTB[ɠw.j"?\<~ gE> stream xڥ;ێ븑K)} g'H ɃlmؒWbK}CdV,֝|x~FOI(dY`U"~x~>rvqݣ7 ߟtatJ5cmzf.`7-6@G%7 `?LݹvIQeWc"v>^PϼK=h0(ب`{(/pX#F&YJT4`avi2 Ў^\j~';^ڎKu] d{Î}A]S7<ᄑ \==/߾?%v9z%K(DxH/0^>d(W?fJ`BuBL/ r+a߶?w]|aIq.4w D1My~'L*7<˩>ӂvmu;8\P<5&U̖*!xEP xKWk5DTN8*e]R6 ;(^'ʧ'%j&1{hPؒb`]Gg׸^ːCcVe 2i 峣_xL|!+v0߇y˥>W0I~(}{3y#@.ڙXPOQԸ$0gb7mW?t,fcB{I 4GMRi\/YYg6hd4bzELpڄaVW10yJ&6H+FZZSRrm9F̗$E S8) }1gzJ|mX./#O"0 VKy:.# .4A>Ry5:5V{RQ]{Y#adF6l"k656.:]0&SgƖ>D{`=[7JO4+ 㕧ؾnx*zhQ"-Ĺbݱ(%i.df>)ietJO X.@Wi*$ oagy&ifٷ`;מ*s2p</us =8< K* 0״' ۵2J :YK{^X 驉am)ELJfJ!3XHMT]Igب;c6:P"L+VoDNG)2wA#ܧЉ\m7;. ,$X3O@rAidŊ6ImzELp 6C% TeWni{nYP×8 ~0/JV 6!V˓) F,TY1v& 5 [ԃ[aܳ.q4a4E4ru@F 0˜{x#/I{ WḠx$xP߻C UK`KR?xclZe^gzUP*0d!;r"iҟnخΠ5TsĹk5Cq uf GAh>L$EP$܇U `|6`3ĥ'jv}3J\-"ɶѾ`_P)F=!waDS/uo!d7jޙj-K28WQCW1-'~͐+04& Rz=1SŘ³zCl @\U<yӣN m><1 88WRwnBwkY Uhaί$&1x7Ad\ nF"B.\1eȹjInL&i49WS9\m1s`iqsJ9W,ʓ5jQO`d]ө -P׹6U"`Ǡ6bADxbےK9xOK=cSf"swo6H ,D"+Ai r*+;Q?C-Fg9fIjD?k?O^ kLCEx]YfT5I-FzWP8X^7 &5xu{̲ߊREvtjpw=Iڐ8?1 AݍsvT  6cy0` HUPԔs4q0o K4Kӊ=w 6v⎨"_EsAU>񩆗O8HsVh?_ [0T1B~џnϖ@+xpkFxz?o{ }`.U͐)\g]qŀx)I qM2=8wTZEgXk=$f$ܥ`^X&v(ը+gyXw,Q4*/ ͭ'499˷ !P"Eu%/|CIϯ w"Ewlb!@ķ7 gLj/vjoG(gu6۫8M)|ڻwk 9ɿ*q$ژr cS7@keo=wl$W!B/Bp♉q?*')j Y4Ӌ4s>( OD23ef~cc)1>?2&(4Afֿ ]Q4mFrN}I WPr%Q0 Tf6r.WrN &0u7>'֯[b=Z{^sZת.8''ZGNsrԴmH AE_F$ ;du.1dd}XC$Pj8}ݚ򲯟oV h l$h@S5Ò"!_aD%rB`$+_L>{m2{L/T^r au5,OL0Cǵ[5' %Wo9P‹r(p.cD/XF,E̓񼥜zwcjY|1YWp5E%v.& b3]ݼ8T% > k]7HGbToHdIH9G-+()!GA1B*lmp+q| (7bh@O ︉]ӫ1EKS9Nh; t*N} ;-Eoq~׷O@e"\K"@g՗?}^,mP3%\Z]@<#3&$H}Rs&P) V%\>?}* endstream endobj 519 0 obj << /Length 1066 /Filter /FlateDecode >> stream xWo6_A(*ooևbI"{ Ȳ+,<[ {(Œi[NҮ~w4C#BD 6P4"Ͱai°feEא ] C&If66lQ8:X^ PcN 攣r,1Ӄ+8jW3㷿_yumy43XJڂa:`7tT:u[.VŁLtHYqXѮ}-FmD 4)ʬn!~B+OĺI}#D_?cHdېV 1aMBys e>p%0tp S! ޖgjkrC/kd~jp`k1tmz32, pa-k4H")4Fd>!h!9J91id-^4uh-qmGJܢJE6[ )W v%k\952ȧ..&a> *է$_Iϒmgܴ'"Wꯊ$Ձ*k_]:;|U_]T04(}K7Qu%Z\Ǒ۠mM>~)_C[j@eNc o+-k xg҉}Ku.)|%FviŜ˝(8SKI->w ɒzO*t|]2`{S Ur?0ɽ~`a@yBNvgˢC3:]U> stream x]} g>4%HѤic @Z,9= ޡ}Ù|Ka ^zw7*_ȃ<\펫8QRaVןwnKm~od:J1KV֗}4kߟ ~ vME㲾YyiP /6qږ~{QsZQ6D.-n6 DGqk SՕ39/LJ4qF,bI!"݊!V0~82D^xƀ(QpcK7HV K_q_Y*$1(f_"ue*Xh>PP޲pK>8tc9BL_M$BRDɅFY >fB}: } ^) JHE|Q[{ )Y:^,ZPDR¹\e;_=r>姟Y|DfI} [qyza] x0hZ2^;ӻ]oaiw(BQJ>eڦgW̩ݫw@ {,`0|`u8(P2_=UUE"]TIɷ7J'8lnI|Md娲? m[S%Lm`mæ=; sDJGBQB CRfx@Î2ֽ!pDMkMvu0E8<(6*iue+!NV==844 1G4:mۮo>ӣ!Ve Z;d&JdYT)!/tdd Èta}CaN gٽnTDNi ݅R%`Դϼg?Af }pݜbД6;Ƭς2 T+4o4QI9Wo+Ȓ'>-ٿv4 :R3 x~XM4`87X /LZ^"$%jW?իnX*ĞZ؉$R_F5R (ټVT:؅ǦK5=!v'Y?hci b.bhy0i)+Fà")\ֺ0A(a|[i;z,f ;;t7GRsAW u9MI&LUYڣm[t ̌naVu\MԮ!ڎHM)_hEKN؍MfXmg[.?j"Q>1I5g P)7 R / `.TyO(d9ڍ?&Ep!r,GBʼ!b-mVؗJofS^dfUss?\EtA4ox:1nɛ3k2Եbj #bh[?e U9IHe6EN0N-ۦ> =Sc-׋z_>^c?ՉrJlJ^dFv@N8-ڈ@ʩd~Wf-G+a{5Ș7.98q6σޘP-r1lµp2K!܋$rW% ## )k{ I2z3V;$RV>H)oz8{=x*K9 騩ъU]C< KhǤ7nX }$Y ɕ1z{r.sO 9+G3|Y?آNU8Nm)g R ]U+g`,mGrFB3./rɉqjz*x> 1 F$Rw-PP%YJn)>֩u? ۴k[ɷʋŘ @:Yݙxe6W뚢X.[~fi 疁*C=3n }wӕA{ţ' {5~O@WhRmvbAB wW͙f89uM3)`9kX3VSv8TCRVz_qκCDZ GbEuVn֩a.,.`np%B1sŎL;50=wn~MĔIWZi `zC$иD\ŕ>s8S!٧%9إNnf-U&g(n*"dCEHhCLK LE(Dݼ8ʽ:_EH)N96@=ptln_NCa8. |aӄsx AxlsȎCkAfM=f;opf9_+0H #ŊY oU7}\ W!#djƁ# D,s03)N"i]m`EçJލ \G2"\ Sڋ`M!}OB8׆-XEako}2Pe!6Ore)q.Ԍy *^ S endstream endobj 535 0 obj << /Length 3304 /Filter /FlateDecode >> stream xZ_ O1WtQ$Yh^yGcǞ؞l6E{Ien<,ƢhRHGzy#7onze%E&3yxXI`hUyo~vǦy{S>H+OAf K?>T$ q# Jgw0xdn{vЌ-$բw Rzp[' Bؔ "Gz`bAь#&mILJGrMN,4mr굍`4ְ١" #9Mir8-[- #G@iV}oPB Q2ITx}ig2&p~7d̛>t,7 oX&dSo~Qn@6/1*5\m']EzfPfV$HBs{ҭ ʭbmV G?FvuW}Y?ӛʾl]D-"'::m~b̑M[k^LP|dJrj@;xӓD_+m~]!_C4M펧Fm|wC]wwWEA ~7bDΌcɬ-?Sޭ3!*ySģ+~7LdSh)@ɢ{x(I-3KV.GBo _lݚȈtʖ)C[@rKx\هH8|f2-taoXDծͫ6mDs^:ƈ@j8ƈ\Rsu Vְa+kQaܙn&85YuS G3KYUdG6W`!#?*um|*d:sgfBkβYl9<:?-yHn_n3W?rO=b ɿ؜=󗟐ޣ-pK~-tYrYIi+,0>5UEk%]GNP*$ؽ+śuڱٗ8~*]gBR?m?VF!>8d3];Lu3lVhWc꾱R׆9[wjFdGXoe荴L6'+"u>Havd@dɵβ }J^%YYvIE%s=>h;1MqGDzSw4FAd x<mpM Ui×7/k'±Xe X]O9 +@(3ůDa3eݻX5QOр>۞;8RGF*XדYOҁ2:Էܫ7j@3k ۡ`6$~P::f5 0j թN*HD&PTwLvAB}$u aeo^1t xp|RcI6zA*0I%/ <ˡ)G)!-٢'[<0/?WX:|?Tѩ E;NT Sv[ӘĐ/ˑ &k60:002INS4PRQ':zHeuH'<8Ϥ3誼uHu#a#t\R=FV/x؛FCIG*2tȬ}k;r. Yd szWRTw_( {K -֢u Ecżq9loFLWO,``4lǚk "ktQ.U2pU$euR1gnP\*E̾q\z$8WECuy QPp%s"m0kks/Ļ-sʉ gA&<3)!鍆 cjKHba-V=> .s/xͰWdE9wlVH,|7#݅un~*;o!^8t>F0bMKGÜ~g??zT@޴ KL2*yORDa~/j͏G濛/Ce2+h%e_NߦCke06Œ' 配[ *3>Kj޾KX&)@^(:9{,NPq*bcJF46w-N8qjW-ߧ@HJ)D "RT 95dVt6ǨKXXVgc{O ҫK*5RR{t)qF0똞.15Z35; ) ʒpGKhSYo!öA42`H}FV]q(몬1Q!cڛ .Ӌ9hkEL@ ^Ds)J!.]{A81bc/4KE=YEO Cvnn3[kNz+^TA-_v<P4LTHw7kteÝY?.v>Z~kc|isH'CtseTjg;z/ c]S SۈP4c_ I FM(¡u٪ׯ(0. P`8w<aeKƯM{b{̉> stream xZo#7~_ǻ*HI@\w@C`ҭ[,n/.㌽Ɍ=$E~"!,r B@2sBs ۤZl" lx3RCCD2(Z Z,iE g WV )٢eJU !Y_xB^1T+Noe 6 ljh)] :l>WC )IJA Yn^E|)SL d-%`qKv5;8Yt(›-JAS~8(]JBY\ aP4a+bPm%c55Q01˰*n0X/Ӕ fTlO2 |n%a:XZcxK x3d+ uW (z?JcR`;EpXd>,،{[H)'՜@Q0bMb,UG o3Rڀq (S:fv.-{-{:'[!҉@)xG+ڞuG<0 7CǜAVi8ƊXbOnoln\cBaLp0|H'@1Ҥ#qYsi9c)9-V=)W0nr`jtnɐhh,K4$_ϕ@}ZD̏pllkɇ>dt Hdd [aUB&r4oo,XI 8?צTsmʶm7/ۆ6 .CmK{Z%1[C)NE' 8FDz!um$cBMm ͸w" hrDۢB-C/bY-GJ G)aQdF/۝clx,^!~=85h#v4=oy;?2K޶oY>2}3d/u̚˜94¼F}|}ZTǞCj1m> >FF R^H,2wNNAQgDAp8g.Gb- مPh]k NMolB;Ak9vn-lB'sSf2)~Tޏ&(7D Oq?VS:bVe=mۀ9!G)Ɔ=2 M#S}YA`RHdKК_Y>`]Lb8N6k!TI#A4tywM:<{8ps H LHu7j/soSwwHr9q=> stream xڽ]sܶݿ:&ogz4'j "q>N ɳb$@QtDv`q~x̾zur'*Y]Vq&,\22WWz}t*]@A.IV SlU}ͣ^7nJ}AӢ,ۮ5 >vXtp{n-HiFtӡg:y(ۣ +sFu5ԵEûnfMw?us:4,E3N^W T^*Xm̲(˄leOS2q0%}3}2GT]{8'1D YG3IX$D"oi[)=o <Bj22NKbN;Q4i $ObX Pg{Pod5K"ު<* #5Anݱ87I>,̬g?8O?DߺkyxϠc>m &%>*{0 K)bMaϣ}=TG2&ƁH#P[MОQAF?ԍݮ(& KqoPa:]qmu+Pt$hoڰq1ǐ$aD)29Z O>qL 1* ҈{|:Xt/K{'_l s0߸^mc6k@'IpTQ_Q=O(jmD< &3v.Fj>Z=(0AjP/]Ƕ^rIb| 㱨rL?rE_l#'#OИ#ҹޏ 0MI8 Md2i,HMJSBF.qFR4FجE4¬Zu L-sɖfA =3[]^ӄEM2p4-}}R,17!)p>\ew I  FY@9]Bvdvh ICk"8\U5QM]R&)%@'{|< I #Fx f 6qFUY=ѧ4VJ{]w$O&D)'OmzNr둜<]. cn Ic*$_ChPLঋ02>d\aJ+u;!^h~}?Zΐ\a&Al99Cio&i{56#QT^&٨2P>+6?:[*uj9ŵ[HXʱ}:mCN\8ռFLd"tFO9Zħ.N.c!1=b"U]E3.nqX:fKS3"Z8z6=[” la{G[K)+@48`ko)CCէI%R7B5nY r:4^|^?ٱ47M1~Ә_PS(jB/wW{O/_u1քȦ*g6r3-RVe8٘_;?3t *fE:nIaz{OZ$ h+u|Lês^P9맷Z܎0%;ϝ_ջU!.|DvQ7Jx>J d`-ӂvX6z "$KEMl׵bJ2~ /GN@ȩmV ö߷w۲[BM0LI H/8 m+ =P 4܍jjj\(4/9O%7 FH~uM% H0bG CU/3m}r "j[#HJ&mB^&G\\8H0Fp"OD&g}R,7,3[c~Za Vˢjj%SRn>uU&;i"/>9\u?qhx]7HgZS.Ss^s IP/%fy_o-r,2xQy (qW%4ʳDf_r%Oz g9>DɘX<=Ӏ>ӄ3Q8{C?d 9:t,KcJ l)@Rʃqy?$t endstream endobj 549 0 obj << /Length 2463 /Filter /FlateDecode >> stream xڥYKs6W谩nQ6$pr,MFOa$_挟勗oLpL]^EY"$eyݿ/H$g`+ͼ~zoh;LGoiWqoe2U',&~IYo W*}[hCQʤUDi.7ZDUJD# +2kbr]榜0nqkjZڽm~I̮G@-v \hM @R씛7ȠTlMDsDJ l.he#Ҵn^o`=J}EuU NʋBAхQ9Qw=tF)ꡁq}UɈnN`fq\ k`iW~vUQ5{E)9zDdUA 9Q>me f4."DChWw?d.Eȑ|ECyMjj/ bˆ$;dVʀLd ڱvWk7A8L硦T}Yuj @ &H6Shp_7uMz}طT d%HBc/wΫkxN@-ts-Z7ˉ%줙%nv>{/ ؜$\ >柖-ǟ1#\v*3̧ǐf7/=Vఒu7`$>MmÐl|2epH}wcjҕ}rXT xsTY ~Z҇3Rpܰ3y|^'Q++4$J F0uw|U`; RA2!ƝNչNnnC^ձց/eLy`̳R s1`TZ|޿{F6"0)VpP͵Td0BK`yS GߡvBNɢt `P {d , sܫ!ĒhL @'ƳRgLk*xVXf+{oܦ}Go k=LNu[F>]*'"I,l5vq~=7L 87[ѐt9*<:N`U: G >N@J @#{b2MY"#4'~^SK1r[}vVl^âMN ǗcHfߑu,nEd iǫL{ ڿh׾4>J(84bg `0wb'JQ 38da)C&gO_y{M ^H+a]EXMdy}OJF-j7>$Myb7Hpde 8vuEc '~*Y&0 !N _k]u%.&PR Z&7u_͠W mV7ՁLU n34c"X'ϱb,%#gNj:ǯ$0ՕrbNȽ&8}: :O({ȍm aY@E`VwH:j+7зvZ@Ae |?.dM_ O2y\XHw:ɘܲI\ O!}f`x&E Nf,MSPƘ:L$R֜_n endstream endobj 560 0 obj << /Length 3255 /Filter /FlateDecode >> stream xڽZY~_1oi7H;Y;k q`՜ieՒcg'>Uf OMHVXb˛yy7_}d7JLf&N›Di!?vSaڡl}ݻk>ou*EJEVEO^hΦfSjyחuǺ >:wjlʼnCzT#'^缻 _Lg̃N(!FylN0NȢ~?vǴHNrbyG:ǵ֊W2A¹؅yY̍E~ix2H+E" Ĺ*}L(7h$7|jnu{CUU/p+^'xeI򖚿H~o>'QO?|05Qkr^xuB=FXJp*Qi4]ٷ|䮡XJwO`d ͢zyxeuvSi}i":iڮ9=}^|'*vBԍk^^ڇ:i@cIt(L;,MҠJ#=xM>ZN06kQ`A6҄<&г; bH%؂*ɫ Ee밃o%xDe<ߢ0Ī,ޕcӝs2b~G ԧǣ|8xՕ&"MWV$d.i+@j`up 7{U/WrJǖռqV vT )4^/}Cx$d8 $r@"2Aҿ~ MHr1l<64\JP݋Moy]fՎU3FSǮ9 !ŮgAХPr(0rЁߙa\&9!s^eA83FQ$mbDŽe/.t42ځC$TgS W 6tiâ(Ji8Rs 6CHDD{HN~.wo|GDNcHp oEj" kS#ǒ3􆆾qavi!nɉX4], bjuoCh\[7jb6y fby݌y3zq:jJ?xG瓩y1~󽞁ˎ LW|pT4ncR`i:߁ۂÊNkӔZ[Rx<9 $ShGW@!5xmoEv(xfqyt6 He`0ATH;B'@ '=s+.1PJ{4ϙ;H>=,1Xt3eUAZAc {1Z~Y{<*n|J8 GXMP:ќ#)GFkƇ-sR;au{ݭ+tp*X_\6f9{qn;g&ÉI.$3x|m [ha;< "v f|/pc6`|1ËA(mA8UH3h{4=b}`q=ܶ; 7'Es hMN qf)(Rڀ(i֞ڹv0|jĉ: S9΢1&ݹ<+n[Ũ>n31,@.g1Vuy16oLS̀|[%W&dcv,"=f6(<->Ҁ Ƒ9JW2/cAI"`Br ZR "N[5E1ԫ8*NMGV2Sz%dj21~D"[sŔwV %/:k30(( (EK3DVsf@A i%ӕ~bx;Z{ˤP"~*"qYSq[f~ pc /$A'[4pQ+3~s5Q%L%{u9Qf2uGXrOzɁ#l (6}9UEۤ[=<9v(N6_&r/&m2H%_N,q+q c֗S3V5Mn1~EH`㑤(*嫻3\-O}$dA"RǢjR,8/Dj(rE^㜆Y|QY6MȜ[._RWJF>vuxXua+u"'>Ugџ ֒,_%cI{ּ7X>i}ѕB %QSjW$Qfwki ܳ& amtDgV{Z> <, ZM4^fYjږX}Q md!;6օ pZ/xa4Y5ri[w[m^2 ` OJ$kAlToHJԡd,,:3~zKu/bO6Fܿ~x}. c?Dh6=gzJ`z endstream endobj 564 0 obj << /Length 2591 /Filter /FlateDecode >> stream xڵYYo#~_$} n Y,&z-wZJȏO״l'A^Dv(Xաh6q?t##G61T*v[FWrO:f+vF[[=9;4arc/mmյ"p>Mdht/EfgVKd८ WH:-6RB:Z8ݭ [#v[->]] 8Om۟]isQu@_O;eEۖvs!0mZvZ},:W]؏;!{F%ڞssg}@Ѷv4 v+ڥTDJMkEd,b3A9#^s=~X "G 0L{v;-KI1IO+vҀw}Q/D$;h/M}^TbA}k@nv))³()dJg՚"dWʉk'U"5"54 aO"\ l[ت@ūU }?[ĆW9ۙ]O%\ǦwI(tVwgH7*ZXa&T5NާMrGGFFBY"*X'h"e3{;JDBlھت7C 4P7GL;< ! ,auc,kt[xsM+m` 1 nO^qQ[[ Gs)żT 0q5u~V2`s8j gllE.q*25ȅo6D9bT&MtN?hgM>*_*WsM} 8hVq=mxyAS:8$ ??vkO ő(ueyNUDu(W`ojEfS@QGf!I (RA/C# 10銯xAOʣ Х rz]ֶ#/,o22P-**6APǀ]}\i-˖' 5WϒxWGk4YPh)?nЦW!7]شScM3|+b`҄C1#10x`W=}*L6tʜ/0RFeS:j22!xdB+jfЩ3Siv.4^V-[iN9^ }14RWQP Oe(/ =KAT#g)si) cIKr! (DD`:AapbLH ܁=;|K&DHF}>]M͂9MWKG}|Ye?yw|ڻ 'jW[]o5}L= m6= G91v J' { .pDNDmY:xB"UBXLNGQэoJQ W r\»`1}"mc8~ۺg^{o, (\rp$* 7 uDž$ɟ (2?zI rqTJ\})fI)0lSt*Y2x> stream xYKm+R`tlo٠!Kt[Y2OԋzxA.YdU_Uym~Γ~KB&Fykx{{۲[{uU0`hkNxV΄O;nm;8uaʅDT<[0EͤcY˖8iekǎ~g"/!_x/{F@A+u^tL d(¿c^PYY[f;+lLU;(rgr}+/}B,:!qe? ~ oT?t4I ߹SMpzƩu[u&L bFnOc~Le,T|pLAƄs Б"~g<'y1p{Fh!ǰK;ʹb3^"}ƓVK+9GfTcG_$~;&++,b7|gG4>8cubKKFe/7ڰHxBtS J.LaVk蒙%B@g1rgtP]@Vvnh7n{ 4]E@3{.Nҷ0z*0-XY> g[/=4-wN=R4WIMў(,[pX1SԫPի 1 }g*cx`T tBÏkI g˫yҪAγVBt0*lOhyl C65uR?4 5e:A[\#pZ JCm@_߳K s> ™.ENg⌛E}߈&քpC3 ^UͦhմU? t>.G: Y C5t⮟HE~p?օTH8Se}OrA]+ʎ.jFXy񨓟۶Y576UcKBErl*E.nQp^ :K=`9] S%_!'mi,Є_E@MKR`b&_pAqjH 5d3"FdG.##/kCz[@My7}28vKa<(UKotlQs@Eq0ypk0C2hCd$ O2їnvoR!*ݱGFPZfALڜݼv$Q&饬*rV4`!A1C"\Cr 3Pܹ@%Y@ROǃO&pjSy =s ca荱-i!0Zk2q4) B'q"k}SH^ `zxw D?~#3ڞlVKWlw7aVU<`I0 Cib2ɭƢY#3˝$u8ۥt7$3z3>U"#U-|5VxHS=XIo yϢ2 g+[jŽ!nMg8bJ8m"fs?KMHf̜i-<f"b`õPo@t2a)d&X~wTl<*[77h'}%T>47穟+y3 !FS[#qˡ⥬p`iDݟ*l7㳨_&(hQfMB:7թlSn6˚R$rzP IVK۰VqllKn١8k0=DePA.3.sP`;˛@ޝLf{X ZQJ&D:m@ri0Тa"00t0T> stream xڭn+~2*$h)\iCS#&w4b@?g!g(mbYȳӝ'iNI\=Xq|*-dxgP5Xʬ6*x}^y($t4tjFq5\fQ-PR }}ia9M3yw4禬{.:mmÎKN5\OtQqk=V8e8Cf٭6:As = D0'S"66Z *Fl* cy<߹8nng4lH;)L-0+>sŹd`j}"OǽXX[cjA=]n؞ʾ/'J%%%F Eۙvǁ{b &T"bCOsFC6'1NL E-h_yZ-1ZXk;h=,Iۙs? L"YLlnYae)@ydU [TS]TvOamZ'x~u)'7ro:^'zt.d6AW Z8 *9}şȫ#pkOY(ؒॿVY{?'6hjSG`ْ~~8yDC6O5ُIwzH*Y^3lH.*x)MHgvvEY2"/<؏hvRXe?n \ٙSf} l'$+_Rq 1G@}Ͽ=SŶ34pc+0͡5fKvv'ˆYR=2]@yFe@?uA# A+YmTbDuA sK0T% oHSٚ5 HL֘f|[ԷO>OF7C ˾+&%ݷ5%uIɊ&i4Hi"$ O+Sn!Cu M 85Cp(2%l V ㏕t_S4DJ].3 tpu<4"dwaZiA? N\*ܓWѰj4mV<ܪu6RaPOuϦ>}MYgF4L ۍ_`|jCe̕TiV׹'_->ض[TRd#4Q1sY=7ǝpT [3"oxLMjNEJxCQ@axvĜ+(UME[L83/XL!SVBp,d2Y3>`Vf{Cǀ]R; vZo]q#|08_p&d.o'TڽM%'za"u;lvfBm ܮ |"mROݣG}$.nH\SBKKrm~ۄ텈4 Ež91d{\^st̞1@=?veIb->jmSѹ*^l*oR$^\UE[(v1hx{˵9Ĥ-l/LٗԔ+eJDqz2r@b? 5cW FE$Z4_n5nCzt Sm_F&#x&8p.x/ z 0 #c2 ΗD뢣W; 9\3ș! k.Cd"S(3`.#o)X< cQW cZ7\I[Ok hʍﴳ:r9SK ?Fs'担_X?N-X Ep*6H$%cUR2@sj6EHK3YJۯ~`¹7*9@7D]xH7J""R3K]!.nR~zCҁ$߼ |o拉w9W !тsVoi(EGޠviilLC\w@T)g++EvbvX)n9\kM:b,ϖ/ev7pll۟uBѐEz5]%BN-obEZ$x+gqkV:oAyY.x5NiBJp- >W߃Ee|(LrhR^? endstream endobj 579 0 obj << /Length 3223 /Filter /FlateDecode >> stream xZKϯmzp LbrHM9d,Ӷ21)HQRS{CN(X,"q~?3KW4"2t|XbRF"zޯ^v͖qN eUԗ:[z?Ϝ)Q PcA3RfS])K[NH6D:& >Wa74Z;}mu~S1G[P90-I-+UW]uNNF0jˣXH+%1FbQq9[S|{>(l la/ [5,5.?AIl N2ONnaKTx4]:baJ?$eΫD +aovLδZeWUF߃.okhZ8cA%#b_U탔X DWRj#+vhx7cƟڻ_Ԇr/`}aWgԖ!@4edo@ w!OƏeT-Ǡi8<HU;/}֭ и$YTOpӱj+K Z LSVeYtrnܢG= Pܽⳬګ*(n*I̔(M,? ݠ FeFDG?[\~*L;}T<-wmEdWW7ו&/(>4.W4}_N裭2> KG_H<ъzt_>Ih{8ƚo,h0f_]N xIE&-W[FI +?Nv#ުn$fK?a[E';kߝRGLg{z}Gν4h@2]_U8^On8ME j@ pƥ«q f]fG9*X ™ X5.@uw%DB<0 3S]&fC:RBIp29J b[\\"oӌE/ͯv(hx[^/Z SԨLҩNZSM4Z__.yjվDN۟ !6n;M[` aP0DH:@ȰӜqlln93F {MխoeDzhuն/ նf<** ,1[M`-OIe22 `Nl@0!]tki0Hɱ,2hs@{apw2“!ǵ dw|UWoU7ڟ9#]y-ܯgd^-m0{?ch(Ua >آ *$Ii.Yg^pÔg$^쪲!,1 b ǣh=GcX8lF:w?|l 6QKyb%ک)J}$]NO니9YލK;cy> stream xڥYH}~Ex@HUCV+4rJb:vzf'U}QVǕΓwwބ+KjwX*FyjWNQf=[Zd]Y7[zdq͚6lxHec'O{U | 򮬫ƶlݲoƄeGki?vG/> JNe)M3|DUD}{\{?b`N#ܿ6Pdk]Vg}1%:UABXW bYqfntomF{l:l~P3sʭKݺ%zw}CmVȎGf:,Zܿ^kD ?_}8%2h-Cm%* 3!Hy1Hhwi;05OA_$Ah߃ŨvV+;_nnxW_mVIQ{D5CUz^&PnVeO mRNU7d`Ⱦ ƕl] > ;8ݛ{W|`}~lS~焹yu Lh氵ܜRScѣ:RgO܂ckn15Sa'+ͶfkT+? ,fxE |kv$0Tpo.tdsOW@|JҺ],9bQxRNٹsG,ȃ73$:cF+'T P~jl&]/X4 DXeBv }*P%(Yb'?Hr0_Lkeɗ<= rtҐ{}wu?k|?UM'Q"B΅݉â8}[d"cb1.4gqD=5HO4OәQyAIbٓ,jtbTM"LxU݀'2}.By{.%btEnh"Z1&sd'8N ffr|n[^g{ئ!3`t3L/4&+Ki=PF@0vuҶj1ZpZg$8Xo5 jlD}uThmF{zFCX*zWgc+sR@/h#N/d @l#: 6 N@b̂S&!p+2GA=͉eL 7^& ;Z^7` uU8D*/i9f`(LlB库0-F&+*շ,׆4MֽtĿgri?U;uM 9XfQ9*$h Nn Ck-5sf:vI_Ee Q \u癡ϣM >q^حq^ޖ1 yog,B\ %.(1fb,b#|""‘ iRr/YdžISwno:/I%3I|4@B<#j.rz_X㭖`2B!ܴYV %J v'+d*% $CR/&BYuExىo &p_T"וy4X"Tp Ɠ)F%/_}z ƵX騦3kNN$> stream xڽn}"o+.t-H:E L,yEyCJB_f[IE]tӇ>'+"[?ݥRe1g[XJ??I2.9rqhڦWݾjʚ~SiPֺŕ\Z/dhp>*m.Kĕ7v)^{@ܭDPbdRߴrE_cO%>V-m ўQ.f\xQd\Lx Op{P>!SRv[wdpXHcD̵ȬfS4c#;jBLsɢg;`6 WSD$?4_Ueξ-rl/yDxXb3X/rƋMMRLFNq2Z?=Gz 7*Dq+V &wmvvjʽQ=roA6  -Ձ/E"??}Y 'җeBlBHLJގ X-MDt0C|N%} A |A'`2 & ?bdx.H2}NCiΒ1zݤ\") vaRxF dž4D)o.1ޚb;,X8'|*너 H'м4G1X3)OuPZOU:[ 5}Y}D2AVYET[ ֲ(;1"``oChɸB-y|A%J{0ΪMrϿњPFŰNS(? (d:˄z;@ cӱueIU,1]3 *,f#kgk:f-hq"PH!AM`MJz`_U8)kE'-N"**ÃT82F>bzKqs5De9-+I7v,&~Զ}n)3Unl &s3(AApMFYN;le~KY*P~3hji|)8,Kg lj0=V*!x4$^6b'̇3ɢ4 3kxД w-RhKSՀsKg*@Ζ&tɠUϮ]c@y>r6"j#ةzM>uWvJä.o(h^7.LdT`wS 6dٴh jX@<-.L"O"_'8Dqk`F'< ?YW%Rd 4y<5 s a "nu]}"K@fŴIfDQ vlL-0XFFC%85pIUmdsGUyN[NvREO''T!C;mv6%ZnQ{yKg$2F.}[MY6yE}Jzʵ:W#n.gRBL:PY)̶~ [L8n!H4fQYdT\EAJI;0};|1lyG' ieI8EVWѾ}yqf}Q*B߿l^(K]"8 U];G^Wz`\;ZW#6!Xg8[ףey2CX5k۽;X//DN 3ő% !gI bι>D endstream endobj 595 0 obj << /Length 3463 /Filter /FlateDecode >> stream xڵZ~~(X+"NW^@@>4Łp$q{ۿ3!E^~k8o »߅ww" w4 deBatp拪Dd<@O]X J_˾u5ۃoHqnC9 i\BCg" aii-λ78*qUxQXݬ5>* Ϣf#b'4r49U#=Uq#+ǩ&= b>~:3Xf/bߑyX 쩻fiRR܊dOAƳx U*J%=Uǯd)>MT΢HC,RCcٌEp>ي rin@/rz(q4ޕ?PjiVR6Li]?2յCo8رTGgjqlύݥkWQ&gGED0¡ZeF́>ؕK- 7fqJSۗp4j7y 2!̨EըM>2vP ^6Pk9ӱj7 [9, %3e߷[{M@1 ,8A,%EEGjn~?!KaB.qhհt,A|D(M|hR`YqˤT(}Ыߩ;tE1!$#J]kU?v\^S;ūD aAMY-SHR4=۪'J`imO#d.O :0b*+9-FSNX,Ma|I) V&UOen2A 餵jp!pcVf4vqx!!gXNˌo"ً3Iµmpy9Bli3I{"c4s _gDrZ BwػW]|ofo1 4%@#%A >)69^qC{SYm}Ml)GCa{y&:Z|,Ttd 1ogY;T<8v_R"U)M3bI1tZL ˮY91USxfXvIA{sN/9!MVnb1 áTkbK2aD71 WWb9*Aצ^V`ѧFa}t  _-,ur*&68hX /W_&jMJZf8}~BW-y0!5ldG9ΨB\_coNw60hm^9w)^zY܌ ȞEvwozw]B,^ eM :Y殴-2ƙ9Q -uQyr̠\n/gwr(LZڔ2JN=yb!!?Ҳ\8$@B@R aQv!72R.F}xx?Š~_ endstream endobj 616 0 obj << /Length 3660 /Filter /FlateDecode >> stream xڕَ6},nHԝ},& !,d#(m~<"Gu.](߯}0(2{<ܥ] w?ot!ͩji;۽qm;/xz߂ E}qg<^:#⪽/I9qYT#qx*"S{> zV]M-O5'ƶr,{XÒAF8!à&?Dq&%CiNeᦱsu՞#}[{nwH28xm,xYn4ڙ5:GmI]#`'lnӇ ;\ t"2˥=ER0C3ӻ( 4UsgAZCCGE ֭~SvxjQF%dD>U3 6!gJrT|B{Q) I8@Zsc'$I7{AV80Hߊ_J-p{'@_ōd>lm`#%ԸL4ȼAn-yV(J/u'=qO_]1TsKP%n ImO%B/ط,n̥FAimح0hlcWa؏_qOB[-(3?&&olڃ֣H`"kZ^c0b7h-ܨ,v*$ȳZ𮢒o1M4 a|SxH)W _FTE6A%˅71L$tY.4: I#'cĖ02 2溙J0h:>%V3WkLUSS x[;!xw (r}MNU ŠAz#C'N:N˔$\d*ekkW$ti+ @Sk~e|9pHۡ?-Zۚ3ǏHD$ID&>4j(XGrC%$fҍZ!.]:ZSE>/ 2tLfI@/ Yy `,ǰ K^>OJ>죀IJlyp*~>*Q|Lpn,dΦY 3ړ=CkI?ܮd1>C4; 6AkT$QH[kz&Y|0j=%k <]>d__oxAMqZ^xb3jg.#H ;E4z8ˏ,Q0blUfn ]O<^kO^j*I''+)r7]ݙ ZC vME6 *xbx@XaOU$]֕ <OZK`twR<~KTW_n\2R_!qfAT*!JLtӞ}TI Ɛ Q>ղX (L;%XGI9`9=f%I1)oR[+E%bPamIfF ï;eI$39ڷ3bŨm;e R`+#d E]ϿQ 1 Kz§8 nrMXqN` .z}*/3#%|gdf_dAp7?_M|yٱ&hҍ\a_[nq?6"މ#%Ugy78p|UhN]@#D: z +1t=s*Jp`QSu ,<`""[ȩ+;7J*ը륚 aOSr F^TcbzQ˽`BM[)! x{C)LK4㜅M?ƮTcotE|*r?cw#Ќ3hnf[U+ 4dDg㋖|agnZ/4(K8b?BߡGj9*EWO6{6kFb5D:|7voj. 70lr,JQ$tY地$,q% $VQ#Z6 endstream endobj 624 0 obj << /Length 945 /Filter /FlateDecode >> stream xV[o6~  P%uW1P&K,? i`hŕ8 ln=\x.>A+xZs%!E :2BquERR?n=!'Wo<͈jНñ;ѝpy}ıE v!)m-+JcY+[K![hUKޔ-N.GTibFDT+nT.X~_݆_gM0{@`͚ǾNncmfVƤI+mV0xˀp(ҰThY Jƚm@cj\lQ@B5!SMEIM u~/T(.Z=8"Rl#wB̤)ѯ`vѸ7tҰnQsq%_#ژR4ێC,8a킼2ÚtCFU unZ>iY/( PѻGiڰ ıtQV%+:ܶE5-P@Jt ZRex؋ 5`Ӳ]$1_|oTvBŠQ,h߃OR"^s⸑;b=qc]a #ϝfWY=/HYY7! :GI2^[@aӃ @ss_&X'$u#,pχgr@?pd@ Ap(t;P++ewG_ K_#Tl~NJuo~(bSdزO_eJ"5VսeBf탬V5+فW9'C!&ljfa:**UU16 endstream endobj 631 0 obj << /Length 3674 /Filter /FlateDecode >> stream xڍۮ}lE[AM4@pFsLYz mw$PK<$̙kَeEvIMStqȧ0gq)t:w sM }ƧP;^$N|c1\7Lx-xAX (nnoL[`M_/E39C%DB 08Iz0W 0PQpZ&;B8 W9ܩ믥09K*|;6R` b܅˃"/\ƠdkD"DЄKdhl׷8މ34nltwfǍ/̗͗d+KC;ǖq~gp= ;人&Ze#~=tVً, #-xtb>"8K E$$TM.DvsЍb36#Xoڕ#<<JY$|QU/}2DZr|;ua: =Ў ?uz떇ƹΓܡkn%ofG\*y1][W$Н9DtM_l);6^6CPp"tW[И[5RO1 ? Ae@6~ Kb̴)t(6 ߶k #c?Em;-˿\E2bMB[%SxtlEqN㾔:bW[dpNͰeW[oZw/j;ÝAWi('[wC%:6~DDjlE6K;`f-:ְ+;J<,Ң11mAQ\dU+> stream xڝZYo#~X@,I9 8l0l&5@ZR}hHS*!lϼXd5b_Vn~NVnH*p]vh/{ڟ'??uylZ~˷i?Un8%\z`ߔ]vGݜ^2 %t`XPfy%GD>[б4 ON븝ȌVR^]YJȔ-zg_ոѡ7HX[I4 UZZ%U2]~'Y V()բDhg3Me,,X(M(49ר+ndMSg0ɎyK|wϞҾ(AFI]RH,o8v{|J&slOb˪{Zf(J Š2e&8.ܡ}u_lf<̴R$Ipɵ0X 0ޔu1<$] -5{miQS.X J_<oVU XϬ8X5eLM>}G!cǴfox)+L"<7kSJHD"R2IN-XN"G 'utl%6aPo3d <8K4TPAhmȢ4~ < $, 3ͦn+D '>"jo{XhI G.E1B86‰"CxH&u-E_qaEd%H$H#o6xNd0%tOiK':Ix戯 ǀ8_ _x3`Gxrx.a[uB 9q(D^ĭNJX ԲkژiqwfUYÁv 8ex C9v5ѢvuA(fEG|_ SSYDv9](OLOHXЎM/uYNvYڝ@\r),5}Ji=DJwtu.='i7x/*d$ڀL1p8Xejk-}GmTB̶&$/oHQ$ pHy0 !͎pŅ39%qO7 RHaoYbgCyty̦jr2NudAOAP/^DPhF^e̋B oRA 0/:UȷW[Â`x>z&y#1횺C q^XLճ 8%{Wd Y+r=hN쳑(tijV"i2-I8k!~&ұnEjHp*`]Ҭ0]:!5ƦkΪr@H<=}Lez_@}c4  ԰a1`2g!o]mX c6F{20"-M sWtNqG icܒ\r [ V3A9h~^3oSބБwܒ8'!/ >Z3݅QUqrp& }w` x_\"ΖĴH~ I^|4Lz!5+j*XE WB*CWjVtIi GԼ 5 ㊫Ь Ӳ/uӭb}4Z[MiZµvʟQ2TtjhdGφ96#.<9HDnt nN-HKf! (HkQB~eS,95K3I>O17uni-r[)꼼Hw_8U]W̟=x@ i|aMqZ}3TWu R%qqE">m.bNML_0Njb[xپPT;2hbn9{F b( O mH2m> ZC1;kQ5JUTLcĊ Թ<];ug75)/S0ʹ!.i%ɎԳ ܚbVӣ33ATcq@GZȲDr5;y},%6Q"6}E/ig',4B~U?Xуd3[C-Ptv;FdW"W4ʮpft_x #%^PgU*FB݅0HLQ݀P5 Hy5ECC {Z7T)x;!@q[UOk0(V^'ERg%{fAT !Hz:~<;ZB#l!*@:=Wׄcr{f ִOJM8 *'*" ,jCpDwk@X7/L]whRՊ3K|b~u-O[>Q}xGmI$xsn//E2)!Mza׮nS 9m֦M~َ؝!V(l9M n"ZʖY,}zY8+KϓA$]VdtJ-#ۆzSH$a+c.`a%fxS?s@4O^haZ{wGi+O)L]HG@ ,'8)e esy \EK}2EAZ/X8~f{d~Bтgۅ wr87Ш1@?:W<IlM endstream endobj 654 0 obj << /Length 3228 /Filter /FlateDecode >> stream xڥks:PybyiI/ɵN>$q/!QggX.ž`w]gﯾOuR7U" 塮޿^*^?w];^0H7\wuýVS-M}R˺yhVF8cKk{{G^f2;zXUn#_y8|~gXVG+n~m2^bQ/TEQW8ѫW])t .SNjE]:._I$~p/_/opF1/ck,1/t*%{N?e\ug 05#vI- ;xeZQF L[G\"|Aa'7m%TU?pOv})^ I(umd O)=|VIS Ii^we=@y(X-47\o˪;{4zk,-r6b HoK.#C1.!fت7@Nqxn$Q8O#SQU7 ?~-5i$ x*#Z[a ǾXDс#D f`gmZDh.+4|@P/|-= "es0ښڃhQ|%f4u],29~ӺJ<JՓ.1HLYhQ`{F7OQv? _y/y(Hx{Sأd}2ZP\%](@dVCpp|A<wycRj*in]5(=uzB/>+^N7B<.讟&ZlMf}^r *f9H!ȓqKӼ 7~«fq>iTޣE3>^|x7x7@[g BdHf#Bփ֦_️[%[6dՍ,9Vs'M>MQpAH^?qs OlxLiŒ@yV z\C+{tF0q"ow}甘9QH9$&[>`ZӒqKRYBc.2dL0\6궻&tc薙&I峻:U(iLx-^a sRL)ɒ7vy3{VQ gYV# ^ݧV=Q޻ԑJ;>6 {K#] d(I:2$6QV\e$%GojVmS[: e^S;=EN'r 8IbL"k+f#IjWafByTJ@t%HU :zr!_&UU*q gs8'Kys|05fiƋWyN1 R)B6r ܿlTntIjj>_r4Rց5:Q` oeXUkt#nT{][_BS`o<"_ko:^Fϧ%qQI\~,x鮡 FRM)17EK6Q!shbxqwg^VF"w5wʋJ16 -,aF8x̹RM" %kI3ĨRA~bA%"p{>MV+/Yq3׉$b2o47,t)l#[`$՚:(#s)Jn\% w!B9G5 Iet;j͏GR#r$_O k M`jo!> stream xZmo9_)ݗMmjh=O7vĎ3$C $NbqŚ舫]L-+IMN#U]&H*u0"BV b3H5:EtIm8;|Z>rJ /%8FcFG,J1ܰ`fLX:6"8:.QLX(FU[BD(: a1&(Q9#).fT"ŊS?3F&*vOY]0D] :ѥqQ̧hB`5UTuUql)±⩲ ֪j:XYD:Z]{c5j|cJ4092&yv*̮Q~Dq9T.M1quYbr? ^ϵ_= c cCaHBq @ ?e:@|~آj;`Wq-% fP"^sdW9ZxfFx[9VJSj2&` jj }ٝ "׮E+ Cחo'?p'qo(+k~Z^.Wgߦ]׮׏|~=]1i^.;=uK웫^Bt Ѭ _BcI_$~F4Cm;?t͛s/Zv-v]AV\u{^ګa>΃ [,3]a,tMbXb/O닮|Ǥq]SߛWO?33"j38쵘 T=^>sͯ7K c˅/7l~7j&4Ix.u/] W1hS=~ү-]ү =ZqG)Φ0!'S$gM#æ}E|I明= ;!X+N i8ihehuhV(C-WV}`WgIw`$&4>(7U *Sac|1t6œ{ykq`BG7^qÚ}6$ wӼ8=h^k7gͿ^>tݧ5yw9j>VͻOˮc0_5ˋ F\fͫl[7e~hǒl1G)xhɰ~nG-d@! P!$A34B2^G>t;K=oCZy/] P̴97~+oP~t$sdže(-m o A]AтJOwñtnE&4)FЩy󁘷m8eLidܾ"g d} Px<5 ğPr9:rxO4.W%W,/|/qLb?h%aЊs=4}~|KWgz|=]f!U| MM"dps[QWIW9aɾ*j6N`vqܶGǦ,G4[hȎTj> ҹܺ۲MagOi'V)\bXdơMC+CC Z(V$Y|4G@Ŀv#QB~ֻ!Շ{"@DbD{‡]m^۩?O%Fhr^J>F?w_]rx̔^n(l)pujӋr2Sf3y+`oY@\veE۵UY4)xQS}u/쭴L ,؍N%n̫BroHg"l=!I1)1bsEF]]cq}8\ #VhJ$ouaVǩ%(&օH:yD-bBSV j %eG5 ?UӁ[MC.;H=}>n\"mp endstream endobj 664 0 obj << /Length 1101 /Filter /FlateDecode >> stream xWnF}W,keI E7U MD¼)m)Rr83gf,u@:'_fޙ":tjYeȲ&vϧӛOrz|:ՍN>\=ۖԯJ(PmY䋂SunM_AWw%X6xj}1 D9r |K/葂MӶ 9%eض㡈:bK +-!ү' (̺iDAjD1 )l;6-^G!qlڈRd&~\0}.`0M LGb~/bH\ GD1NGaGB$.8[H2Rz 7:rpSW,ӋOK^u+,aA [[]C+ȴ۵bDy\"0E}fEG,H@:m]-NňnO=2A?^ο}6-0ML=.7yχ2qCR397!΃JSo]W=Bn+9UVa&˽XS!6 q#IM||y'ELN|S;TVyFfDG+_ࠈ@q/Zab?]SC-5^l=gɫo YU,z/6N߽0p0uM4\,xz<h|ݷ endstream endobj 682 0 obj << /Length 3918 /Filter /FlateDecode >> stream xڵZY~_~i7Xa`QkxSufg O$W㫢qo{K/5F*3D&Fիl}cݛ l߿6fS"lky|8TQe '!=mo-p5Mcwo6DJ?x?wL>[yۇ:yTmE'50?л]Su;n=֩2:J0㯽oH2`M5" }5V7gۻSsj]o{b;"Zv⠘vSK^luaϗ1凢>-/ms;svg=mϭ[#^mRF/U{m&O%.r5gZCy_U FO撏}Wn|܍GrBpRW[qKtPh|cQM,7sQ$n|(t۱v_'t2ړC[0֫זd.6P'C{$nP{kfy ]Qg=2d~G)JWTxnD*s9,hk\'t c6ziwP|9qW @êV˚h4Q7::Cƨ4LŁ~I6؊5TAji`dZe\DL\$hmXПmDd/M44},SIyM<Pi,L;]6 զ5h Q꞉,Pa¬1F8nZ֋sA0 ?PMlSZW^0Fgc9ܢo2+fԗx2,3AXu a, @ьS-ᑫtyP.5кAzZqӸaԓ{ T6'WJSzqeqk -I`DGS<7(rT`|UVKB&yAWmݼRAWnNHqo~y?Q*e3e"?oV  G~\ȉ+) v+'t=QҸEG)m{$ZD>3ڵGc7"q.*߲IA%zym1etlw?1,}Fgg~)?=?Y x.$0$,:caHvĖ+I"1;6;{eoHKhp KK> 8vk̋ r釞v>`}}y1>}G^ >*XF=Ny`x ab 8` (d *X8Rbu=R3nU'YmzA Ba:HqXdf/u`2"+ C[ aDq/#B1t9ÀP{{NtgW5oΓ]ѯ^7XsÐ02Ź*ǭxa`A(aNh9mUx S #C@_p{<2+e 1nH%MLx(-^q߭P=GXVLe-Itn:;^͊E8? _3W-׼Q51ͼh{h]Qw⎯^Ib.@^t p`EOֵcNn_UYD [M-ADP;4E",!U|PIjTU7506HʢhnPg #pÀ*|VR k脱ջ[E@ gxq>҉`IUuUMbt jVhpJ%"PzfU)V邭;n^z0f17@9Gb|*<Ɣ 0L㊘_焸)N;8ا,j>N5m[ɄFso`!~+"xz2au5&,Dg7(+/ߠX6†"Y%$y.lĊ}Ţ)sNH}q'xn+cE72)݄pǤO%$ozc]c) ݰv " ̛Eih&q o??­cOЖ,TRZlAm:S/1N`m#;m4bΰk5un1Vʲ=JՔڥhe !#>IhI+鍻Z7GZ d2Q0vky6q!ÇTqW FL(R?LZ2#R!N'F$c,8AH^q獨Yg*̂AޜRm>I|Q>^ ڰyTKso)A|LMDuU͟qN1$O?ڑSy/fqncZ#1BtqaOvQ""̔m DX芘TϴWsPqϭXR; []}:hrU8랽c tLv4 "  -t|tRr1LN깜\<ĈT% by1X;p0h,1$ؗȚHrSw}KI5i1X~:]t(ʙ9 Tt;vڮ:EIVKVd@ϥXA|)t~L'i?%`p0G:G?"Hfm;:(2\-rᝣ28ſ4ޝ W H^W4٘5Rh_JVr3Cf ~L%|bU!5y@"/LI:6}ٯt0Qr 2n7Vj9O©Q"6B+Z⽒HR\gQ&hӤSIyQ{m2YB␰0& K!ί {:ҿ->$Oql^ yDI*qh ) Β,O2Ok7浀l6 <ɏ$aE&H)`.3=OmIv0std?9M}I_M$s凌k[6΍Db2zšL!(j[̀'}Pǧ7ʪ>`N$r1R8㇟ {k㽧Kf4Ա<4|zLsre^L,nPn+ܤL7X!Iu$UsKʙCo0ǤQBd4K ^ѳde\K (A,H&ŎIG݊xo` /&KS@/B0hs(_5[F i轃O}x[zpRV,0E d{᧒_4G¬?-t .ŷP)n  Х-ST%y/IǺpv?LC+_<bX88 ~|ya 5Oܻi2x<3E8k̇Vq-c8ySAV_w5:"p2c#fʊz% lyt'M{yk1VÎ=bVn9 {y#eB${F1E]Ȳ/c,a"+";{ I?a@7>>/{F q6oUc̶7>Y L<31xwDE.z endstream endobj 702 0 obj << /Length 3597 /Filter /FlateDecode >> stream xڽZK6W!FiE޻'Lj/ Eq%N6j-J*Vzzf/$T$Hp|omypswG7i Mɳ#AR.$ސv=/Fl*}mnZVVJnjXu~{ pVQkjÔ `hx;E?FX1D  Hdxs8șԋÈulwQWkE%<~Q0>$<q LX!;}DAP]fT #YE>I}t-yK+ď_Le7~e^~6ڞ a&L;=xj+V9$;sp6C3^;þ /C;L50 H`/'FRW 7EN/cƂg~݊ڔPb!7>"O ƭ[<|4x$$p*RnY"|>^V%`Yz9p 3/qqWgts0)XsLhUnfhOsDpNׇ pmë`χ <]Fg]+s$`iۦXxQS(FRA3Y5Lc(SkX 5Gظzy#v|CXmqLLjt$sڣՓ}-0{4Wxy$YbS }JSD;ԛ{/Ci:]Pd]cD~*1K&Kg5(*-kq0aL{_:فq{ ңݺa ;s:%=s)ьՇ<`JC?#y(86 63oiΆ阱)$xd=*'۞4gCKÊe,[f_:xTx!fbh;b~^cV\[Do@̚qA.3“yP+˯\ݫ4r^WҘTpy5 涔s1HcUZ~ E^UFr$fXDaA4WМۡ*jM\ˬnTVLi1 rϖ7ɝNs"UܤjpQ}\+apPH B)5,.M5-|5r57a.Oaᝩ|BɁ0I8_GY8m/IA\u_{TnjU3JL"5zOKLb؇Tc*+!i-| 7L7X|Z&i SP3eu[T%OM/Sk1eߘZߘ&Sof ڮm#AujIr-Mzs&㼚әghSQ;2)ۡ+?840 D6^8rں͕Pk D 6 !+9W7>{Ox+ݼ^BIoƬ"gEeY׭x+ٮ,s&  iITE9[=(,)Nh)T[IN VDGUPp8d!M1O~8VVFT飻1O]h^pbb{MhS&5?Iz58l;tVS3b YG߉2XQsIrpΛ]-阰6L)e8.@ g[Γ%f]2f mk \g(u_  2I8#V^4&D`_Wn Z*kȲY=gi1lgͅ#+ƍ^=ׁl3IrF3ɦ^b4_SG.&9\aY0QR$oSyƜ')@3V2 :w @WKE2؍F9aUb2/ot"?Θ*6 R~c2J}GCfٍNɉ<+_\ѦGXeNv@!Xp u|Cy\h+l)Oڄk"`yeuV5rUV\B6ۉc:K[j@CfުNObg [,Yla ̋(~R 7pgK =` 7I 4$N#~Fw8z!u`%#>M#^5'+_m DI_5p<[.I,DH.{Wb y.; ]n+_ܣI&mZ=A{gmUi ۄ99Li4`@ԠySQ;!|+k":y8YlxMf[FQ!r-,@U+i,6o灝JR<0͉P3A k[jLRSkѵbU7Z*86X 16L.#,˲% u/V0#ut!8 \UeOLH 1 ]?26': 'ju-vs `3AT'K!!5ww_}ې endstream endobj 714 0 obj << /Length 3222 /Filter /FlateDecode >> stream xZݏblFԷR@sM[`!K9[r;_%vsK QCr88rpp|&woxf7:PEP蛻Md:TAtsWvzs)tܶ=>My:ay45w 6ݡΊ7Bj[l~y^n媭͜woIV2col겫yw߂ v:RI\^ Hj߳B\iA6v0B?c3pM<Ăfmh8)f$ e]TL&JR H'K$:tm ÕLA).JBh`dyd876 6?cÝ=  $j8T[ߙKgzE}`?l^9OH*KR{5k{N3Ɠ-AqIh#ƴmê)b%9&a|9']y6?ξ-*51L˔btauUENj0$]LuY=a!@mҞz~9(PF>R]* PҌ}#ԟ v/֎y;0HWLf&wlxDS~CkDyߞ{s.]랼v}3ShT.)(?IaLyC4 ^Zs]U:CHR@>=S¥=هJ؉%OP$umw$t֔ IӘ^i|.ù.i^>E tl?y˓~->_MwN{t{P*5H"ژ0SCKkaITy 3V# Una% l9Zf^cSJZ8QAatǶ!9"Q3Lm'MzS ᴊt(HY牊|HoQH[r$І.ؚ*KRZs{߳s@οڮĴ ঳? Qܻ,nY0gq !jLZQ89sIoچDL%7'A)ʌ,z[B\ yvZn\G\ #CxJL˜l72R&,0XDŽY3>GYlC(9DbkEx$d<ʞ'[Dǁ0Ďr."\; :0@)W7 5.  |* ]`3`ZN;qU$"'f!ZI_Ν}qa_V` ɺqOdKpKzTLCiJ!؎CN9og ˒綗}13[9|r[uzs9xktztx%kVLb|!Q(JS&Ud\P*hWpW'YT@omoRVvyyD0dOxK9h%9+i)\=MJz,s5J5}aQ APK' ?ny*祩z+,w+?vՖ~P䐅=<}m1鶬afdx­^FĈ,;[48J^J*?[~r5 <Ҝv#J[۝˓Ih! ^ڢ}) ''Г+$B )Al%`pt me.l{(cTRFe?@=X+j5<0A1L+#lfٞ"S2S5F:z& mnڐQ0NYvhHƉq2?/-bSo] a0Nd9ܩĒs'"̨LvYU=L+AnɈ.([N mr'g1gJl~|)uz5' JjC{@l移 RcWIM Ci,^q6D:web8pX'=tY+1GZaS dSkKRWUfpwϱks*^J0q1v@7]6}=~ | 쵓,)fn0yl~O| pO+JRV 0HUE]ǼaPDp`+ǣBO3AAz2=ac`O/ ߋ Oke*8"ZjqCX &.SFsU'#:ϨZR3|mDkYrkr 3 C꬗S:Uн۽ f M | 4AR^]7LTNdI--Yֵ|2`MN wH* W/=Q\W5'UX՜D4."CL> GGqU\g e`WKw~U0ZG\'m:Ĺ~>× ^Z" SB<" ؙUgz84NTƋy[EP0^# nf\Y8׶G"ѩbeoߡĜ|k\ tnG1J707xU$1}? i"9Keى<6~^(OT*M¥~?M y]g |hw=B&R^z  8ªRZp z' endstream endobj 726 0 obj << /Length 2622 /Filter /FlateDecode >> stream xڽYo6bQ =2CIC P1 J J I[g~3\p4_ً̟72X$, E]1S\D\2_U><:Gڋ7"Zp%~%x'Kx(镼,2/Ζ"=}@*۬.h־Yg4HuU6K>o7ҋ7k"q Z5=2,C22dY,E8 `M]m@G^j]HL" {~r(PPt۬nh ]qzUM:ө1as)d9 >(2Fru Kө5=.+GQr}j{^qgA-Y %/P wq5GhC -LtΚ hj2+1+uLn{z&"; #7$F;Xb.J}>CE\).c{e4^9Hܵ˘%CL~\;09TC|7N,$l84]Bl#)V1O8|kJf-:!1[S#(:F.-ra*0@V4;(cN"pLw\BXDN_L ;JDb8oGBEa\AyDe#K&gݐu `X\hXd!MT&pa2M%xH7Nd=M𺶇 3ZE芨~R[>,]Φqr $ْfLbµ7Q0^1O#J/u:wWθ $Ӑ(lէ pA3'= $&OQ†u &-$R 84=Bz%Xv+v$0,ͤ O8ΡI-9a$x1Gal+8@KҴH8"!n4:8A+ hy±4F=iugf7VMC8q,B"WITwLOAOhO[/,_ӻhւב%ޗc1w[882>hg_,=l["Mii2VJw| /[d akmxNgjGt;(oVi"2391>Oɡ|لd~8哳/A/2 (/Pu3љ/Uβ\>2szO.Å820gE#/#*Uh5n.7_=Jy8cG2 z#mHmHsfx7$88I{8g5 30=#MQsbSeק/ OxXBޓЙ .f<3]s:;I]HqhHrQi0WWo~񮕺2a◤d~wU Vv3;9na>]񢻏mR*ϭqdekoHwWo@N& V.uAk-FNԼ+ЕՕdED*x:i5oTƉm-[ L;>ͮ•dJ;8\_B B&)oudNIἻxL[9N6t$~!WcHP&Q}hE0i;8ɴӇ;^OZ8u8#y"ճz2 endstream endobj 747 0 obj << /Length 3374 /Filter /FlateDecode >> stream xڽْܶ]_1v(e.r8$f O7YY/3`j >Em_~ln$dA ֛_կEOG 0c,ovAŊ;U[ШjfWy;首 ﻡn}JzZ4?ʫ ]Wz={u W8 εfl "LEWWT }( |꥖݀DQɲo_%'h?oqWXv5M`6* Bynݪ¦'>T%^IQ$4rd$(HI G`\d~8Д% cEȄRaBO!dPp;%^:wӕ-qkw|M X$'`b&ii]Q'4!9CΈ]C_v Fx @ѿDds:=]%xwmoEtҡ㩥%LWd1J /r_[$ȼU']ŭ7#)nq;s @ ~#YgP"1F+EȎ-I 6|5amhY\*z FfI ,D:Z뵥qt[n`?<( cPR+{4}6} 8Tp m*Oj~XY$,GqWRm7z<ٌ0dUCcMQi>ٗ&:SA;8ŏtgS&y V2JdʰM4+|௫{X{9XQL/#XEeGe\qkه1X 1|sH*= * [ I]3_d9x0y Q |nfU7Aܬu DTUVkoT0f? K2ߨ~wmP|eJ-4fvkL:;9rU8x>v <$XM/E{^`̇V=E{=YD˰eȯxal9`5밐 16 EQ?S'E e(^R$D5G+N2FNiE;^m鶱랗-ԇnҟ0Ih{Ɩ8`7=R(g8QhV10++f %<]˾4`a~MmFdy46-;%ʾ~98au6њ >o|a"dvor+^2&ʝ#M!BV3ib;9ba$1jEεUm" գ@>4(+.=Tm^*4t *y + ?"OGSr,f/Lp%ҷ#ZI~MGF @Zc}ӏjx-ӌ_0k""O|t&_ .还\$B MES@fpzd{ҴҌk4zUKy uJC׌57(Bzn ?@p2Xtr b6oAQ\l~j~\0#E (bG!?(a 8?g=ƅR^( K΃:{)Kߕ쌌E-;6 ߆ Qj/ݬN)d, +ȳg a֮(xX\̸Qw@@L<2HGyD'B(…5l[&Q.CF't ǃ.\Li '&|&m98"|{u%/ktA3b^#G)o>7<ȐWQ}U}\f@Sh;(-YaxJt,v. 'EVnp,ӕ2ɔ[AV빥28iy\- Zst؞K'?P~º5Nͼw*ᦲ֟JͲy^.~\%yGtNOXМZkRۏGR% _ZFOƉ 1aE^L-߀X`Cg=üN0(җqhg֥.:ءt[ms%+c;nCtyG/"ԧl.~J6ZL-_@\e.P-Jeٞm/Q`iפXIM׫o]Jj<] eM/8A.s|>p=n0Td" h*6bnfhYb:s87IKGݙ {dWo~^92yJ='MݻhdiD%X$鏠.(zx@a׬؁njƭC3Rsuz=t[ .+cUF"c]qDe&qILdYYB{[4U< ؝MdvP1% 殢XSGA,6& !ܯK75YAyi/rN=IV|oBBˎKM jcZtf}YPSi(Ay#8oFUi$<3fyx˂4Wf*}6 x$(bl^.Èp O5c{*f]}P(A6O&J嶓=:,~ A/ Or+ N5a֪*|g \"s [9 Bjy} [A{?)E\;~Ee,k" _)HvJ fK_|o6&\/WH,oԪ! |_Dc0Lgv䐂POQs=sW]z} lQY:=h,rŴƥ'{yj:X?W~򉼲ի(=4PB-%]4cOFް1)nA_.x⫂pvM[7ߔƱqw.1 AIܶ|G\'> stream x[mo7_sH_+pI (d{jKjS4ϳi${#43<3dRPL x%#9 FG&*?љ99WdH1(RNEDA񊡓B8fUt \0r*q>,SFTd<Db$y W)'1' R1NV4P8 ar6`R.F]+ H߮`iL$t"F.L:X!S+&K11`%RXu|31q:՛X<~“rd>*e0ęDΝ?sR堎NPg8_dL 'l^VDv` 'LXnUx 5v/hJ 8ۋŔQ24)JeԡM2bjS % (IebtY3;Eщ TqavP Bbk -rE TRs;ա=ex)zxL{_2*COuMk'_4/ޞֿ.o6e۽tpW{cK?|X4ۋ޼ )ۈYŤ[0LsPrOǦya6?mLꗷ]k}ydz3Q:le rG/4i~R-^mqP PT: %'[l $[Pf I)QSia&$g#j7d𵓩᳭\,qsuNh%S`j@-=7ZlV`!W-PZJ|g^&3Mלybt0WWBQuprFa18z[4n6݋Ek&g˾o?}kŢyY2=hH8=bh ? 郒@Yxbg޼g_4]"cqKE?f0 z,bv⭤EԅhlG_dYqD8@RP*f@'s6*6,'`iPI+|pGupc|82:H)@"Ul@]u>ܹBE>rBqTy؎1 ,SV6Nm%s0`q tMNN D,Kle)Il 聪XM>bO4F52 S"JKBR1u%B I="PaMOf`]S ggG1]b;jͻM߯?(j |8hcGjXқ C[DmII(@n?Co= Æex0ǔ;:j5xjM尟ꌜ* ɡQQc^A+o.GA2gvC2)ht6*fR1*aMtΪ_Hj 񚜍d!sc$ay*$rohf9 Px P, waQZe{}^vo637g 0p P^!}O!yú'Br4`#tq#D' N32ѓb0S'l OLmxPgp1)[坤mEwnvVKk^Obsݮ߬jsEsq}]^"]=c "ä"z㩦Y=,C `L8£qguNTex8FҦby2g:,O-2'&-F,3׆$8-DXq#4倎!r4 ^HoCn%|/%v U}!5L9XkcvaaabX'(ix-P* H=\:[-A; 4^] 2Q~#* 2S~'%@vxdrM;#͠p r:-^`S`rß2MGPBJ/o]+:|˜ endstream endobj 756 0 obj << /Length 3399 /Filter /FlateDecode >> stream xڵZm۶_~Tp@36;NӦif|IЉ5$M],7A%u?b"Z|"R-r'"Y*cq&),bq]|ǻ?~)X?ol'>F}ܬ-ƴ>udNޢ/h,,٬ϺKmhtzEMu*-E!7{8E^ |t$P D hHjw }ڔvEyP\CvԂs+%(x뉜IPGQI4`>2ndj@HGvGc#pEopws| ˭|9xͤEv8Z:ymyuwBͩ=$Gᩙ瘔1TS)c'm~)+iY<٤J flH?C`$+Ʈ u qa2l>>*i,h K@ޖIPHy5+Re\q< Gv4=L]͆^YcFw"G'PA/e5L?ODJC؝;]SdV(!OcguT|=58%: ݇`=PϑRMѸep) [l-k仵z瞞zgj,)Xw.SrA0K WֱYI:It J8K@rSo/׎hW+P_ӓ+؇ǎAeQs*5a`$\0aP~6" FtOp1˲>xɄ?]HӇth8iAQi;xvr nOcUR2ČTܕ؍ޜ!ȃĒXޮv#cHS? m]3-v:T;ՆV}HF/v> UvDct !F(I&"=b~!1 7iLl4˱pAE`gL+C6uzueT!mr^L`YDj8T}2T҇+` LG?ݮ.\- 葫x/F7nEǝ9nl$Oo EtgCi}1L*SꙻD> ES%vÕtG88T`V;͘5DNL ݭxQn8 bx⿎ endstream endobj 778 0 obj << /Length 3501 /Filter /FlateDecode >> stream xڝ]۶=:iF ~/;'Ҵ 1E( .vA2uN"~C&yM<{7" ʰ7, 4E0L>y*M-$K`7l(fwiwt& USkmN׊r?"CNkuPZ5Չ!rVT&Ai{WS$a<;_Y8Sm@0i -a[,< D,EII;v;".Yej-2(.JV@.~ ӰɎz]Pɍ> pdQx2Nj5En\~*\=%dɄ3}[Uoi:5QaRYs Tpy'=[/MkH&37iyIBğer6`TqJT~b:7Ej[1-m){}?S hwfݢ,gQ!݊wH+Eq$\Tݪ[:/vxR(  `tas#e$۩[s:OQYeV3,pcE>Q'xA vvN4f}HGi-b ڵtQo~00vK2I%T{ &#y{c G|@/hQ+D̢Pτx&hy-$##~ J#N4^du 츿]$<$j걓UY8p6^[Y u6yLM 1BqPDVsD>RGJZj;%JD ʕ w?@1 \W|>%#1JrbFX \7p|]C;cmcTZS]p_qKf=WFVͺD}nTfҌ,4A?)QD }v.`uTgk%XhK o?[P4 .s0r5tG}糰R4p!R4@CJ=dyL!m`?vvHyC' r2}>c/ܐ Db >@lymvA/{ΞB|8jA'ξPZ::SP SF|JЉsg:sj6E.>u/D]k5$d59 6H쐲5N:[cXLR 7itIO=Rޮu[^|zо皘T4>8{r㆏(ߖ]+;k;{N}\] .I^Vrykxt4': =R -vKv4i u ZZ9`H ܨj+WTc Q5ü77'\ōP E9\A6"Lw #qeu@MWCqxe4ZkRۙ"BV`fɘ;O'm5vy\YRJw|_$s[yˏqX:n^M^"eOw:`VWZnS(@P %8Hm;ߣ{_͞pziS>h㆚9MkK1JXI@ X"& l.{RAxi/侦&*]Yzyt\wsKWk/ `.%ZYlbdNR oY*o_>\ayϳ$l 6ܺl? .4Zy6l(14tku-Ƹ]YC<`+_וA9/4N?SZρW< ޹xڳ ;uO^Y(h~8Ayp{1d\Ӫ.:6"uIR|$Nn#ouSH*^#r_>5Tg|dI+=%(v:>U) Lp, ‹0foZ񷕪N WV6$7.?#l 4@S.Lھz8 9@sl֊OX1lxUy!vv7Cs 9t?{/SzNC@}.\8ky}dQ{vj/@{b3A9yw^O˭8 ay؞`%۲3I_W%W@0WLzLVƥld b;wgwxm J4nwʍ/_?odA56ˑSoqz(ٴ.^˕ۄ vmPs]<#:۹( endstream endobj 810 0 obj << /Length 2682 /Filter /FlateDecode >> stream xڵYMs6Wԗ`|$";I'(NN,ATI*$V! pgWtgs}q% I" ) fP1_޴/ xq[z= +cA:O7~1nhPS ޖy4_Ai77S4e L/QoC@ÐhVa!3vʔ..g ld Ky,\Xw˗!SFDJbQˡχ#Z-zV3#NCƔa‰?  48 W{=s=KdIGLD0 7ͽ,K f ><,>e˗^(#QzS=\j@VX#ѥX]$ *<$EeGߚڔ+W 3`][]f~^aY7X<7ovQ6>lt -r|XDWG0,jtX䧙 ØĬ5x({مQ(8VYW=L:eY˘W DHh~ ο fmmc wb+5`,+NX\rO)(Zl[{S7%Qhp3*0*0EV; ^]ߑ$ ەYQ|*SH~.~P=Pew:X!#0 hVPd~:#'{3YAhxU8xG~%Ձpc"T064F$pUVzG+R.h:)SR=c,}۵*t-bX=~덍bgF@I| \`k> uIHDtC \@w&rN]^~}{.r%*7P9ϦFY:/c(ڧ#cIE¸+ 3)gqVg}Ƣu˿[d۵ {!i)5$#P`x]dETsYyl2+6y5Drd)"pඨ봭%n~VRg6x Jip *}mtL d }/}d]-o ;C5v ?f[-d3dF[{U8 Uo3?6ɪt*]H}sbQɗYLY\*=;,tŘucE2L(JmUuR*O!IVgMmW k{bo])lMkB ܄Y]ߣ(>I$<ڑDzo2) +-JՎ2Da:":ҵ:5bGoN [6FȆ=5 tbJ ^|+Ufks|]tz: -1į e{SUVGQ RFHYB1 @a[M{XA3NRZ(/O*DP:`v,JUbǕ^" |g:qK^pkimj^>eoTy/Ц ;!'AM$;}D'C]8o΋s,l+j>[VWvF US4| +<:ǥԫ‹Lp Dp/W`? |+)=}LjHh#!IP3[Oo#LfPM?ab9Ԏ;ǁ\)y'ۋ;n ᆧڦP?Yja5 6uzU#>qfunif- o#dWnC-T n{y2yQO;tjWA&H!DqD!? U܃Us|tamVWuu>WI{֏u<\(6^gfLP6 s]*ǡr5 JIuۯ,J S{E-N: SOMAiv8+Ƅfc[]zw3Je}Jfx~w\<RxԱr>juZ< ߉} 'q8q/΃u+xG4_Y ϊ(uGr8^LNpU /r%dD(:CumokGN($%%6(!Thb2g endstream endobj 833 0 obj << /Length1 1689 /Length2 10043 /Length3 0 /Length 11147 /Filter /FlateDecode >> stream xڍTk6t )=tww 50Htw]Hҝ (xy_֬5\{_aaZv.Na'Ap-f]!Pg1d`A& B<ՠW.00''So". Aje3A Clq~0Y10H9+3@ ;=D9tV0\0!0a ݝۊ3!;6 w[~ P9*kqSA`bvv}0yl tT0d? lwYvqdeu=!ζ#!ʁ@@ֿ GW= qY>H*>W+8pp819[@WB`s\glm W03XI/΃-]`+;0J|aP / rW`_+asq!V%1O8`0~\ߟ<>L5-ii)UP7;7+xxo? _yV wwn _ ogЇtN>N/yo{Nf$=ӟq0k}X]55jur}iv.^N?WyZsl^8G3X }POJ <  5@urفrɛwK\d/{oQ{SR[Nߟ=J>kF3H~.\ CҤ’ayQ;HT"/ҦU]JL)}Mb9wB"gmzɛtZюKx*TݑnG9AJY^*'79nO-%ʖ/jm7aJHuIE[ NlEXgQ4 'A{*֨BTI0ՎL)$)}bf 6p=q>^2KVձ {$%@8Fc2Cub]y޾ue3O?n K=]:d#|ݠ{͔,{!>d3XweLm+wm=zAHO_t2\՞dd)/X74X}f[r#Ry˲0-5 0PkrٽOoɿt2"CzY!+S2NMQr<<1x 3wwG`'Q@kq Lcemt_cq -slO7qBJOW`izwY|Z/yI(Eƽ.ȪQjo!AܦV!gx7*;3nEo5>&'Fo  xX>Fr #Ρ<ƢU <0ׯ&1?,z=t|xへ{蚢Qtu?vB_ yv K"/Y-mKH]Y8dg輗i'u R,aZ5XLEpe`;%E.zeL7!$FIH?RZO*C0&l40 䔼\4Qru+}YHew^lW#rS4}]yqz)g0+]EA;ό`)ԅh)54mc«%`Q1i#F"wTD[{fI]v I_L%׭s]Lv|OÆ&V"xZjoӑ)p]QC.5ʡ^Ӄ;X]deN,ױDBr-EsQxՊ-' E紘&^=rhY\oFtkUo'< T|'S|ֻ!b^xt_mv4}屸u˽9Ãfql#vRw+j .B Gp58~\`ӊOL/0+PdOr4u?IIoM`!Y.f=e'phВݟPzrAySyԋ]O1XNS(]ѰGGop nngZwn&E/-r1jͲ6A(a]>Obˮn> ayCUQtpxXU9^p3`|lF`*ƫ+WvBb^&Y*A+G,koB29<hne҄vG?:VQރ; Ѭnv١*%h9w!jFrpOq2j]Hw .=0\{&pʁ Zú6-fy&SrzkHT? o7o tv Zů­I`Ǧ*2mʌ>Cy?bv :v@ʥgfA6COtkP1U#qq싧I\T]Տ%av#db+NH{;ȶ9)gA!•J uIHTjH7Tϫ9KH(8fHb'FUSM;wbxwޢOb/>3 #b[rqUf+"&*ErΜbݔHL^A.UQ/W WKo͒D m@u`4 (f9Ǵ4ӫ`n$V3Bc.pZbb(.nD/c 3 :b*r[*uo &X3Q@iO^^KZ E.\ "AOs}Ly+WtPl)z<"s,8[;**AteQI09qb98 9bJo,>%2k{.Ƣ|瘳+lU$9l𺻅Xzha@ *qb}Η׽>ʚOtwUR- 7N8A]d5eyULmIv8O[u[9ɽQa?Q4A@'eKpӓeN>4-̸-[֔ P)<UO3<612=o50%B$7e>D#Qǜx&~p2xgNo Oqg&PX腀@>~vv,]-Ϡ$㖧d@ :zrW2x1Nこt|:B"x8f~%zBn+_UbLML`S{Bt {[8oQˁUI;32SjzmGwQ1` ^'#6bJFf#D,wѬXRT qXrT0Qt޻ !srS/Rݪ%NǦϮH\y,oɂ~]yAQ{g /T[8$8ٿXk˫}]كu-#;!usYPX۳^WY. hܳ~C k%nQόoa a{}+lSRGlaKӘ:SUPCFI3M`9h۝;J2<~!qdڬzɲEis˘w{; yfq 0/}"ejii"^ hyH!_SGy`8i\W 88Xʹw S3aVIQځ*^G-Gy_Jxi3B4)JSEo7{g?m9+ubG'F֛ฝ6kqV-aqZm"P?u!(Gn<*r:ls6ix;n|GAz*m -'*1i\@Q^ˀqc'"/?hSֹIUgH)giF5XRHF_|qsXI +ŤR)\7UPYv({K^ T?*E#޷\Iİ?~)_Q/W܊lxP82$3ZЛ,Mط܄1v58G^3JCĝHn!ݾ%ւ.'xgnf}=Zi(kR6nX/ςySWz_\#Vkd Ndjy <76ԕ )o>z@%/X֨Y#u3i$_h0d޸qMNJ(/Ijÿ*[4+ss ҵk(Lw$J%NhGtxz?sL>Fj:+[e$wߟP;/ c!h ԮIӊI*p<5L`.k}/MR08VP}dDub9Jz06zdJކL)5#o *)yp "u8[B9j:[.KV5& n&:#͋h1=xX%>ZdݶkSv ߌ@3&.)BE3K>EG=s7DGłTf*\V+8c=# 0uW˘U=u}{0*rWb Eɼnd}z %Kd?uV+g~~H6oPp'CzUBmrcvidE-+3 l"9O3Ȫ%zi#S=6:dzٔ>vme2.v  uqя02~Zl.]oS ՘8ֱ(CieIYVQRk0$nC1։{ >;* [ֱ"Uzz89 U(r5m`]I~.y^~LXߵ/DJZOVzш34<~]sjN$­Eԕȗvϧ)ׄ?3vNJ1ZPO 'F=5)'F.ٲ:9cm8\wj1/\SO5{>o~PC9ѻiyD3Qރ*L뉘Yfh Lޫ}M'q{,}"@.(zdwrOGgrmagLqݪ-,xfh&c2,zPr9]Z=$0`S^gV*m$E,",ixo vgXPG++q7q5Ck&{>˷&,٬uhi@`)Щ/2)d y]H]ÜCzχvz4-:ӫ,o1Sw_ NJ<5!]F{~\"FlpeWD$-tvBn39$„"ڷD??VL raM&XD[;;MU oOZh xE tYsʻNL6GM㗎0cZd Us6,&}vEA͹5fw%2ǭa@DmuUAӺ8FvFS4GkA7" ] ֋XfGkN%:[Zرb]a)PMĕh:bH>PK+O͍`G:R[0ݔADV]pbMc?%[~>2YmV)IPUe`Ό%:Eᙰy'RHŜ ]V7oz27YŵIϞvkJ3/xX=3nR?ŀp}^.i 8* oq[\Z6|FL5?e*9}{Dܲ q-:)A s?5;0BC->r5a QU-+3۾_D \"BA@ZUXph؏'*+g[A_ Kgڐ|-/GqB@aTwiriusPY!f.vv-WςJJVbԋvfv̅m?`ҕT@q.v)ZiWRZJ^Ԣl{l VWq9&#P4kc41 YN7+3YWj|'x}?* Aghpe>&L! 2flVJK% =`u|V`+)K]L`֯FV2+H qq+Z5c4ު,c煘$ GU_4/QT8Zqƙ+ʜP9QXE{Y#3u.זH2a (Dx6wNϟ+%HҤlugp1$[fSӔ2(%)(:m'/pSudzja^ s 6d 7 $=3R򶡩ibEf_a?!^`;VAV՛97Щ /:۠9'%2OsƔK\o_*ֲ)hۮ'+W^7=: ^]g=Zȅh-}V=0%NzcajԮǡ6E|VlɦoRKe-0Ã׏{1s$<:&#y_w/V_kWh˓ &Gf%?N3GJ&M/O4zP=,}+zhE,=Z 3 O5'm Lz!lSAb.ٗVK,Z"qW^;7Ұ"Lެ}|Ğ$q'?|obtqyڮA!XY+ґTrb41Q߇0V'Jel=pDūaʡ幫r Ŋ}~2O>KÍdGu& v |u@RC"#-? sn`Sߺ RZi@=9m9k=hX8l̫폣+/bʬ>׭m+Lo?My endstream endobj 835 0 obj << /Length1 1386 /Length2 6067 /Length3 0 /Length 7018 /Filter /FlateDecode >> stream xڍtTS.JS!Ф{bBI.J R" HrsZgygy¨#k+/X T`^0quRP ֧`q($@ PH8E톰`+ p7 4{3D Gp?JKc0.b||Pg4/ND`=8~ hA&%b ?~}-' Dc3ܑ6p7{8h߻ ; ]HoE8m% ^"m~Nh6E8AߝC%Y]xhE#~ 6(gg8&՟ ^7ߟ:"QH߿-ickw>C$ugB`"{~7vB~\[p?-G䋆z;iA v$cp?6vn/=^6(˧g gP^/ / }g(o"mQ]=e `ŴPX,aoNQW!%w'a# uF8y`Y*@?Մ ܝ;b ò" G^pf3@uPhį 6 V`0G%Upه"%<~!a&®k Bm^ "Ql EZ/!Q=7n8 #BC^6z򬾗86y_dyӥ8f4{N\idТ%zqzw^!=u{ SW&}蘦H^1uGJu鳐ǘ?NdRh>!J&]'oR^!SS$6/u[걕IkL!W_3QW 9!}2ɻ/ (_%7cG+Nĭ#٠Gd2`f >S*NMɲF̕VkE%wݘ 8d-z}KR%džzYgr\A|1Yz=fN$7R$DJk:ct$۷ V|Fz2pk^ H:PvH:0Bᬲ"oL$>v?wJgjІvt{_LG,Z&vLyv. r ȱHQ.L:"umm-GE?rF_=,gMT%Xϧ׭jhvnu1 R00s?,Wl{R4(eЋeA~g!uΫ >vWi{ĉɮ,t{+=pm%nOur#8\-I^p{RƓr*-KYne4`E&Fo-㜋w@R>{K1arBZu7ƁG7g Ց" y;u۞ӔE1M( q+rAxMaxl,TjSSƕik[b:^ZX\iU˴7g}wN9-LIݔ>΄n:2$AFW-l*h[3'oRH}Qhnkv0JU 65LO*OjJV]cLƜoC&agd#r4xtZHҼLp`º`k|ɓz.{?e m'fwu {Os[]kX P5r`EO^*qVT,| S0}^{jxC#)d槉섚|#R+X:XC鹪Ť_߬`\dAEG[Ons-h"<Z)A|\[mN2_`FM8(.r%)ݢr(9&aTPhj„ܗ@z!=AfX?VRm ߶x7gA Ѧ7Ka`Iý\^6>_)'zBR2̺cFMseqq_?}Ԅv,w6X^Tߘ&3z|R!qÔ,"v,՗ l} 1mwNT寮<Y5o/ғy!+Wv8F $( T'm0uM$#VՉA~= q܂LY+\.5W$lJw${,07:tLUA~)Uli{RѸzF^l)oSY^݉NOfq;/rKe쾀UW|a tk^B&Ʀ %؃NS± _7O՟ܡ[ӻθA~f!"u ̭m&qF0'm?5rQ%S)I£{Do+ECw&w Lǟ "oJGR<LDRzэsz:ܜ;KNAeki\b7s^hҊ ?zs;gQy)Ujll$6~QTě[ZL D s4+#&̈/2wa%zwջ0$;џ.+-ȫue//tvt?mC{JB6ѤQUp' vA'=GS ׄ" R'mڦSiV\7琧/͝rKq)ڞ]Q%La1rPd]gAt|a/`+;ɅS uG ѡ՜I>d$FI晄>H3Ch:q=8f)Vx T[ &nçb0ƽlq'{űGPD+r+Ň3G R[ȯe}EX3x3[kICeB\C K bLCk͚XaaRrQ+Ց,{TA16MmHAmXEy>N׀[G#h#.wWrx1Pũ܋a|e/xEF͒[]D6݌JKzϞ8?!16e9͋wYk kɜl7 O,G.,Z8㎰.ALg"y0".9>nUJ=nԝtqMv}(gs=Ig߫Vo:e|6j_{+^ Y[NAldԽ5v}1ff^(A%;> N5.y;b84>;B"VzW+FI&Q"mEGѢ,ZL>Ko cOл]>q<͝Л\#yC ozWjw'y3lGiJgQq: l~MB&^Qڍ[lo;]Y5p+Ԥ7)Zl;Ȫu6\{썃+1/pk_UZ-MɸnꈌGsO4 D%~t,_agj _>I dAP}U{ eT\j(EE{霰a=DDNLW gy36qQ͞9 ¡͹Er^{W9+ɮ\ =FE4gɑ%X~2.z 3|%E<+Z@P!/=s9?DmdE19SGdYĶWeQLXZAhW-lin8I9N7r`(˫}I h  5c7U8,] 98+㱁|uQW-w3JR a2C+L&Y",OcRen!AQc$;*m֋R?5ڶMqO0, 3q}w̕>&F=QH"Ј#I*ϖb~}*TCe1ЧNdH-١-ދ?"6QyGB˰jLJ&䜩">[0Xaz7ZӅvy-ޣ͚H#{&̄kvW}( ؍x~Lgl_e"Sa*B}9ap6hy4\C ӹIH"b\va2C!E3ŞYhONa&2ԘOShj8Owezg@Iʈ}* Sm+ m> gk_>+prZ<9]-OTeSN9oۡIDžMaq!~ Btqi 79n\' ،ܰL5 &Ṁ z:`[9w+UuL#Z5B7W"}hw%ɲtjpv*Ttx0)ak[⻏L_I$,+gP81LzR*X~"sN ?cw'SJGB}[B*ƅD'jJ\<"l-* b^(Z͏GY7#k=ŀΡIV/v_!V͖8K'%~E[,Y ;*.+x3l~]|uY^vG4_;M_ V%J\,vÒJg2Mg /ۃ*17|3c*gUrKuWЬq S%͇bE RB2JoF$vE3q<`jtE<{?u} endstream endobj 837 0 obj << /Length1 1512 /Length2 8590 /Length3 0 /Length 9593 /Filter /FlateDecode >> stream xڍwT. CJHww  Cw#*t CIIH %H+)y;߹ZYk}~^kY *0_Pm ᳳAP0> qIE8Ô@;=mw@ b@qIAAĿ%p=g]\8sq . 0twp%@xH ܼ2ܼ_`}t@n*g@ ݝ 8p@!`ם7 kt=R6 ?c rppw!0g h#"~gA ;?*]x % vs0GEw770 ;?%pu  st]1 VW[ FD%`OE{#!7|WApp0 |popp!=;pR{@ϿOwrtAfz<Uo OD'$*&!n@0'w_=ӿ2\7tX pVw_o17GGAn wF [j!n-UGA 3BT ~`G=/2(s^+nܭ;J&!aGLHT AwMwCb?qg+/Q @rx~N¿pu_Gw]B\=Z/DD psxܭ?BF@.@G;m6=Ŀv vrwrj={'O˷>x}57_,7:+b ~"uaUXn:p7%EeЕK3TFz1m]{?lFXW׃Zߨ$B~K4~fv=>{Ljdd,wY%~NpqŲPdbW- #1GVt`i§fb\"[BXcwl;p-/I|-ΪKY6Q#̸zb0'0AdxY?4&嬿q`3l,/𡲋e¼d&c:61a V; -SR$(h}aO4/gRpeÁN4ԗ:6RLcA]V W~r9j HJ*~2fљl"UV{&^곬$ m5)DF$NVNl(T)'}3nᥛ*Qr:}[sSC%WHu%vYұA3Jݴ䨞7 նqf_Lbژ|`hNA2ZUclJ"& LKհ7s=qoj+_?P9 7MXBfxG PTƙUv*%u% [y*/گ叭:ٖdݸK⧚-Ofz2=%F]ch8SYiڪ$'E_z,Q|57$سZEa4 ;PX pю)I_ e*ξ*4c wחN'  y/, ˸m|&k׼ax eOۦ"öLmOOZ81bL mHJ,) .|X ^ QY;6|O4unfҲ!+bi{RnӆCcƹv`s@%WSVg xgVaǧk/hV )Z(31skrt W뱱A/_T4P/[vlOzh59؝_WĞj fzhWhz'gN=|a QOQح%YT+*n#߬seI#KѶo[F+[bSfԈ\#O-/U!k@g} j.vY;vGO{Q)}}&d^T^˪|&WpSlͤ9Ft}0FhD }9UJyǙ5xݪ~sQ?K ~ qtĆ4tpjpC5NTU-س ξ~xaf"tܝ mcSʗd26TؐFR퉒#s NRU;̘aTpY^E= 4qO&DQsٚ`߄jkؒO.0l#_޺ +ϫi;!rth*?_gz@==cqhVq0L%y^ڥ!Sƚ1QxnikoB˛UUq^Ld^6VXV 尙I)u#j"#cȼs|o]$<%)ś狨gv<@:S [9'j1a:%=X!O@ȵD<a1R{T[&&R3CqDdU9Gʧ"wcs[=^cUYT93ʷ|$< u̞RҴLl$~7(X4)nz;X Hy[嘔 #xى[YY$9Š@kDz #}tU%Ե 7)3`P1X{Ut5o & Ĥ~-l83_F3]( R܃~gCץ V<+h׼{W" =G-E g}_8eE{:`:E r)q"_?Z2U|EQ媡J2\ۅ4Նoø c{@GyVgY/~H׏^|A:GKf TٛJ_8߸Ll3?n#f~B簓(1eLO)?\A)%\RxB >B};=o\D 4\ `ίtoŎ7]ɕK*O]!"GbzuHH7>'RT6^֏bOV)vkCF!_ ILKX$zIĢsڭV2L%u^yjdq$̲a4]%+lM.Fѕ-\g-%2~Bf(Pb|_s/~jCĈg▍ѶP.(ƐݨNESy5AԎ%8|Z3@Xp:cwq?n\fd$3t }ae{mZVe)8*eMXCݜsbި'zsOUʾBFH5[(9Mv+ӯCÌN3)_O(*2'R;Zg,{D;{vd:ВED%' jɬuߏJ{4_ͶV!j 67HHgǯn3tP!#"7XgY\|3u{r!?s12*ϡ S+R1eo5ƫ \\+S^YE8EQ5WeoN*p$:j웚x%0m8cATL!#$AW^{g2?J}^beʣUpIf6 k1 7*nHmOw׮- qۯ6o<[VTm5<~<.y҇JKע|v8LkSfl8jKo7IEB&g~(imv` ]xDWyl>-"XZB,W*yg䆐Oϥލj*~=w.0Wl—y2 t#}yC@!-'MKU_y]2p 弾'FRY'F9r_xfּP-E` :Dr8\v-a4hPac ki DB~710G* V&5H_JyUɟ<9,~i#7SO 8 /en.]eU-K#tÝ`635:ihl2G5$ML'%1۞Oڊ> }O\nmI2ϩ".!eHsaΟr)4ހ*(2CcCM뵹> A1|Z8աyס5,Wz]; p_?mϸl>n\8 |?L[#rh=}AֱCl[ގހHF湪lULoYIQep}/9]t; &z ȡ(,ufeazpOƾ{l܌uXWkAC@jEYG|Zc}v2R.ŏ^H>X]=}L9u?T)gkT;ٷZ~9p%Wː&7_lBVb\5z!Rf sM稧ؽ6QqOE2`/r0ZJ+Y*H !{zνK.H%;ea܎\_ϖ)MKh7CyCq9sj<-}y"F/[<q?͑=>N_g532'}`J+Vdަl&iy /&eMѲ@Sg}7V ˳s (2)Mf+ݚvktAm7򮉒dPzKI[Ex|'>-'kׅɯ􂥰Y[5O 4k$V9ڤ>Hd$ʸd R}͙/g/)Y$՞tw W@f _v/n5]3N ~ngu4SԽf}^RNrHYU~(s܌>-Y`te)6fF3uF6c|L0k^8 &eQESsDlpC@Ɂp+3ݛovϩUBl3:#1s8͒zcTHX1gDm.\I^bLH#]c277UcU7>]AGRMx*S?r᫴q$Vܼ rͽ] 1gu)TV52WXg!u|ápM>Ebu*y*g$ pA]7ePy_hfTMOi|ʹV3ꛢ%%C0Jƌ†${]" &߲ȋ &zcw^k~ 9{,%?h,R'N]#h&sop0X0iSA ^R_cZ^)H痢Bn![)hZ;Ql2{3u1UY/>l ކo5<i1|lQZѭ/]D)lj8$!O!z ҹ,I}gNW.LY @vZ}}zhM3-T.)Ô' Yΐ?ߕk"LXװ+Ǟ9ltXbڜ\MKǚ;x8i|-͊V8Wc}n\sayV[}O`G;gZFhD]^G=GdPzkÀ !SK7x` fGM?"*٦DGhxrddzu(p.®]jb M!k#(` U kgI0~!?MbsیNb"dГ(y+ըR3+%іͅ%iA\7Vn#pJeSSċD](V%6BYH}$Wقp, &_@gY/s|Zv➞̕-ܝ8wr7?LUxBY#`T..9G}619J:ot.+LL)9/}|15&TE5U8YxiB.Mh}l$Ju|9끑Ú:o^6͵ѫ ;׶3I[8# Y+~o{ % ?nψ(R/$I&k3ƓR)lAo,x9p`9E\[8۴,! eJi^B c Ju ,)AݕZa>s5̞t*GU󖆧>acK>| ?޳ZbXIDK˭a6qgV_57L7&iHVcgl[[&z%Y; }HIxNf_zc"#zq~8nH> %`ftZGBiRUӪY -+eQO޶:x켈qg!Շ#@A#OGz De`~;RsE%E-dm Lu[ܪs4Q%?K1Bi #LO3:R9aM)O[y=QNj:v~Te|Rӕ!3}.ofpyY}>4(IRCl,S>TAǪv^nJ[`㉤݈X5!(@Pl_?@aO \e֮p R͖%aZ9[c! Bg1 JX=ї0̮KZ`݅$!;м/yk2%x8ZV~*՚\?%ŽP3THu0L-pL~&) I+ Ǥ0'DG"SX+ 8b?U3AI`&%f!DM^7'4z`zDƂŅb}O٭/6v5. cLTLAgV"o|%Ta^@wtw~Oh˙ IexImRGPo;S]^poUmIܬkQ7y_OD=ܧ endstream endobj 750 0 obj << /Type /ObjStm /N 100 /First 900 /Length 4436 /Filter /FlateDecode >> stream x[[sF~ׯǸ*_bU$+ywq 1ErA*v!}"(V2>} )sBEFyx3;Z)5ΊA(i P:PVK ^ K4&1RbH͌Zj B;}:(yc0xdeH0^$ qDAID1NRhftt0)V)Lª0z,Q[dmN*#: '* p>.zKê XE[b?%`=rv)=l-i- 8PTuαu~xT(+K\PJCCXko;(iӣz-DP˷,kzV` u=_H~P֫e;WYd䢟ꓦz,Kx70Mբ/l>_`Y( !Ayty?6E{Ry*|SxbU7E ZX`Oa<>ۅ(_V+tl" Qt7M j35dabmmZRgOgy7|;_/˳f=f6UqږzmyQϛ<-ˋj*W{ݮYUʸ-7%r{\ٍQ=K)!&*e:>l,"ݞ,vCoTU!0WŞuP$u|moۿ];~<ߏB?^ x>&d!tD.b vf.w<a:仠 E-aUF0ͺ.Y}|/H`^ RӲs;4K@; E-,$$tߩr;(j1<8(a)0֏p:ƭ/e=׳bcn(d b$SvΫYV?&vQl#¦]a-nHh'dv\,fӬ&I>"1$ i"ʄ"ABLLOeV61Ģ_"## K`и vˤϚ䬩j¹O z{p TId[F;|A`fuЪQ %I[9! W/j3C =; >aU~ Ci1nY-$nn .TMϫf@DMr~]0@rCd_EV3?y0*KFx}4{/T}ȩtV\Ve @d^'Wm`vw uwROlOn'{^ c7qGJ^&ay7z [SƺޫWɨX{dzw?];|G0ya <02⪪mWb9\V˺]oT,W{ ]AH+ %~粘(ӝ+[b i)H!xUދ( k!ڷj{#5g(rG<%zȽt?8yWM>U05ZQ,3GFTpWC43Ds <8nw;x| v.ǭvqh{<Ilg;xAٽavg;̼vǸO_eXwKr7Mf ~v:V<̠mѼͶW݂~Dryvlώ [}I|K_oPl[J&5@/Q@yo2Gv9F:S.Cv2Uc'e~"h|>.Ȯ=C]f OK u OJ>Ax^¬c[|}7 }2^5o`ӿ7H7ӻg-4,W,mc;0Hy O--i"8ؾuT*2{ sT

22:PH$E̓9:U)f W^x; y/;bVWz|`N4A>Mfݝ5nܩ 8þc ˓I 3|d8Ψ${9hAR|UOQ6h<}o&HzBBh8‚wIe.ZD1Og'FYې[ ye'G6亮e3|:]yrtaK|M~nWIr@6;,$-xPɜˢQ )ނ?p=6`)x ; 5Z,voNG)9b-~*9 q ![\f oM e(YՂbVd"!߶L4r07>RqS-p>ԙ Suü)ХⓗҰ;3~BVeî\c&f'۪4f6=G:E-a{bِ}EܝC~ºeuColl,o7d`C8I`zs19|` ^q"!hxcM8cM&$tȦ<޼݉/o~|up17\o2ʾWrc6em7q" =u$җȚ/_>vן>ؗ?h??cm >[oo*y2o-ɽ";GFv2kZd6oe9e|i1lC~tѠWb;Q]x{R>= u쒇/մmEZ@ͻ^%Q1Vv-Dp_Myk'|Q-s|FsPrM+߬Y3}6?(AѺ/zP U˃gayT-*u[UY/Wl1WeS~,\m8)WUbëc8|"c"ތm{D(% VD̯g-MrTYVӭ+9nz=OCe;l(6L\2zns]**€[5 endstream endobj 839 0 obj << /Length1 1357 /Length2 5943 /Length3 0 /Length 6878 /Filter /FlateDecode >> stream xڍTT]sI``[DR.iw޵];g}>y!REm 0!H&pEB۔@(6pP\JP $((/G8B pIȦwF@QSpr⼿.`hP` (n # *% rAx##`hU: $!0Z48\fƷb0meh%Y18Elbg⤣TYjTKr)bISɫхW[w8ռhAIo~K NG{{hD w=XfBn@{tj W^EʍV:ޡXSu"CSL@`^k*.U U&=_:Y GHr|NmGr-EKqM#ZV k Nl*.EYgj{M2d~E~aD >NAu-pAT &"b8.fSo/lx2yšq4AWHXb. TW}jb/6T8Shf>q.@|S݋z\‘լVqI+wʓ#GFPToG[Ls*6ҧd~Wi6{!t᭟.Mʂ5V$&ÛpiWƄ}y岄[,=y"cIp]!F.ڑzBPCV&7"7ke sM.@iX2:[HRg1"%<\S.5a„t`!P5[Qm5V'ټua*^eK[ǁ3j.;Ʌ(+,M1Tjz!نa齙5l`h:tqLC*⭱l1{'Ǒv16;[\Z8w.UkPJ9QjH4j僝SV߾cL/2H" t6&6VJ8ЩOo)&.2ta ? =Vj~>3J*ȩ2p7;ց\9rYrz:GOvU,'g# (T?{f{@Lgj%D y;j>.eHW88)+5 n& d&bŲ+~hJɀ+4"71 ^jg7 u5Y-cJ޼i}>= d5 ѫ@w= "ۍ2k}aa-/fj 6Fcc z4+"j/i|7֒B_pR+hG0dB[Ȑtm2 VbZjZRfwi%iǥ˘_zi5FGߵ iJI)9_*7Qw7!W~ٓ~BDyLM pd ͰvuyCk)zf#EZnF.Ne6]R9+]&(!_JIVv9u>T9qlKY˻XWd)K_3m=WsKރFW;u4̮SrV]'u,&8yFV{|yβ1?gL$HrM+8oƻ.Z:wI~- - ~ !Wvct5ǺV\=bB(c2xc=\JJ{!Q)J4{Ȱ:f,/ Z*.|f  huW*:Dpv>8?*ul"yV_ݞ& +>&mNc7:wL25&. W :TvͻaSSSw`}h/7p"J69cy ű939*ڈJu`xu,hrwwВ#pGmto{4;.R ihBfHPɁ-׻ls 2^jv)atgLǓHfܘ8@;RJZ^6rR`wX0IL2쾺=*_kת옕a1ݚ\`~t0\&8HikGG4Kz(c5IeSŎ(RUx P9nFW`. z4t$:3CՃ$?K1VD =u.n."iC  fS/qwQeTnx9RVv@l>jR#3~~d\eɥK6dËpu &]GUK[$|9 Q2FW[sd1Fb=)Mj stK5w{7j"n^x~c$ 8#5[ɺ ?hS3,>[[ b*v^ɇp3}03}?"ƾFZ[agY;".D͓-(Dז %56;  ]5!'lwY_ A!Y]y%2YA"<$n=YD3Qcf<䃶: ˄/mqT}~ϗaĎhtZ*2zN]^gCț1k'&>߬Vߨ~/-w\]/&0uX\-^>mޱQ"tm{i떥TϫQ\L܎L~rbP Wn`;N .Ny3?=^Y\- lQ(-3Tз,w7EJZ~ߖМkn;UL2h[[;>_άHگ#2 Ň5S=&'3uxe21jI`,9!H59;}rP`ŰWH*'G[5xc+i>ޛy>#bOpR\ ~K{jCܭ1q'H e zPv{*32SxXVu7q$#et#kxϧrxnsN}H;mz3zODwd1Ŭ&/ W3&' N.(NIT:bT[?xI<;uw²M{z 5W0?p[wLQ[ #WĿp@3I4slDp^`*Wy´w9e=IDlJt5kz= N)*R5 8}<i\'bDPo٣=XU$Q&`(FjSx<XcZ* P׈哇VwrB( 0O)mpXMRzJ.Qݪ1_ÄYtE Ŗz&c]z&?#ۄ0# btgmɇl6eǜ_Ƽُs6S(m ],GW&?0~gacok1k3W ^j)fV#c5]z1'?EQcs xmU'ׇ!7Tt.uݾJO`'gAyqzwRq_gGS`YLwv@*Y 7b5==&ge?tƵ& h]eVt@vWWgcpY"l0hٔ}f۰>L%!~* 5*n\J?{[r}}5%-QT6 {qiPQCR鋙NK඄U$kONv?Kfzb7?6cWД  PN.-)Hs3$aTEM7dGz+tEج'p#)<̐GC/rhp Y'vLJ"ˣ37wY"0ץnsIV^#`)J.`}|.2[a;WM{;BGP?XT5/*jPgI +?'_|3.v_Qh̾e^. Ni"7~&Uov-QBtMk8U[!BeWtrC:(鐘SsGomyJ5{3T:Q>]u>Ag>9NI쵷Qx_tח[ 0ݦh(t&x7!CHzR攕u]8@} 9Xu haҋy(,FQb;>q*.i]:}G:̢_pܭ$BߕzE|rc[gU̶=iTұe\  .|T#"fphh]Jɒ v{#+r&1-3"(i aN/J){g9enُnECjD~iժTP4?IO _^~%ј=&c0(DtaLM5^ x6G-i`x F&x "np5lgobjk郷 ] KJڨBOjuYj/'VjxC]#! aFǙr?6ы4n)?>)6Ө{99.Pu:ŨJ0gYx懍 *:siӹ9Acrΐ΢r3SJww?Ms:J;zeٵuz]njbxgX˿Vl7}*UfA $Eu*۞ [4hNOnL.\.VH6\6u1sа]y|= QT 8̊|ە9)g endstream endobj 842 0 obj << /Length1 1494 /Length2 6804 /Length3 0 /Length 7822 /Filter /FlateDecode >> stream xڍxTڲ6"M# Հ.ޤI "H.Io"/9sVJf̞ov >,a QBQ<@^~q)//bE ,FW$+lPh&Ps8PD /7*Pq5gGxBP}~ `qbb"ܿW(дA9@;l`} Ay#cEÃɋpx@Q=~ вqU/> cGءi  b;:Y zZa`oi+isUrrOGL""Bѱ?_g,-Zn/B?rS(;#%7췝8Ca^1uC@S!FW9UeY=Z@T! #PhF?Wc|h/ǟvma}tCh qF9 }WS7teЩ sv.a(ꊞߪCW5xB@ Dc]hU,(zgTiunS9kptY_ZOWc9Ai*y( Mg0~\$=j8Q ˌ-x:LGpNM'r!P|A˜F<|fȼ65'#s5p3P5|ki96g?]@Z*cH 5Ebk$Ѕ|Yjus8mkmnZAw5#f%s{VO鬶 oNrIgcc\ gW z(8K?5%M1!;c0/u` .#vƋ׏S4,:Fr~y0y ?k~>q`DO1`ґuwZU=0V}e?֩,DFl~)H8߀t@c#C뎩xl4 ӧ2W^H7K/MRORyW+e z^-65ԣhaTЉj+R@*){Cl]nԾm«oZSN2[1Ͽlf(4 sbPʵ\\5#-ҹ Bh=IӁ8W#>UYUX;okM1ލ{+ԴDf׻n2QJȧEFp[v;Lp b,[S!Qϸh:Pu^>}ɽ5ctO}Zɑe)Ё͒NsMYTzGzEX9 D2Vܹ#9Ndv]Y+KL,bꐊ;R0{CPGaCz;Q$MQ#f-5Xe3UB“ũY pO`n`r0,є>?^Ov q'\"6e ,OS EZ4^!- ";S,DawB %y$?;$UpX>U!:r0B.RMkuj+ hr-F)[-GNڒTG?*df I0U'D"xOo-2|zl~ol95fԂT͗aɑhhCx^ia~2C)_"~/C? _V4SXNtiW_;6yYFn3990$7. Jt d3Zl0SNUqZ h[* ڮ4Z )˲8ߓ&<:EÒ3HjGXKX9buӝ@{*|-QT$mk`}Ả4Q%EFahnH}̙? \ɶVfb y0զ ǎ1nc#N^wm?s_Bm2HXk "7 qĽl gM_pr-LRGۊo[i摩'3¹e;(CWjM(CLC.M)?ޓOw_];b_g!ǔچ3FuALXZw*49orc.<FPIs|U*}wOL~ ?Z4eĤpְo|^V.^uͳ:Vh$DPu76ڛoi{JIoK-G<_\ik>\ċN=,݁u֎ay"4 t8^@ evŝNT"xP؉`f~A F.2(]с~߮")OG/ؠF#u%凩ߴfn6_ C&gc 9b\gT# /=[yp^qkf>f4u l#O;Ov@x>bDb8#>"|,˥Vm8ъ1G{hhT4_C~%%hIN;|簖@(+g{01B {yʬ^龆lcE&a yߦ}y  T+pxPQG *W~EiC2c}ju-&4=(U/ΗX͒҇(ݷTޘ#?_na|o0o6FCBFvOu.|%di!>0mm'ssٻrpwK:Vd!H:NnL# S*j/HC嬷Fc2y@ht_@f]N?E djj7҂~ZNlRw13)R&8J?T=",Rd?U\VG u'#ެN>jčf(L|cw3Yˌ(2Mɫ]=}D5*Gˍ&xP #ivUҼ"h/io Ɍa-崌Ld`oܿf}=xm0a樆B9W *qGL 1FN/[lZ RDo-g&Rj GZWa:l>OlM٥D9u:z ^!60gQ*= -W >IE<3Iy)whP9NNN)N@vߪI䱌dNGξ >Hwyӭ~e^JrvANIQ,3I]Cr? |X%t9nAEލS6Iy$v&OL#wd(u}!f5ˢO9-I c.C?+rͳnWrٱڱjr^[.V%7:{4r2  r4 NM?^\ y !W=Ǜ<%RHPzv!oUq#AiG {˷ƻOY/׶04 = t|)5W:ϸxW>KUbxԦ>U>e !`Oii4$pźMUyb`֯|ݑSoxbTܸѦ22gi:EMčY}©)dz=U%ɶ|eyzf,a@yJ);wO\ݲ~TVΪ߬0[!] 0/ $p+9C> 9h8u_?:P_2ќ^-<*nL-qABIɆv܃;Q%&:y޻o_Zl I4q&IMtk&JK:s3Vv%aVֵ6t_z}vr>oN#0$_'U"ƼpSPx6 ڍi^۲~jZ{yz.6$%&aC~]seuP~jIàգO\9;ka^gÑ!ғzJҢוUoE Yfvq b䅮]7 qH1Gǰ7vE(o͋Who;RJB6 -,u1ӞOt&yWurGϬj"ݾv`< 'aAZ鋰M'## {vebr *S3l6L,ֺ~ n)$G̖Ҹˁ #Yab&-:Rp;J=}C1Yч m 'oNEe#&l!u)s)zi-j K9"A)̇4&Ql&` 1.E-LpOissIr(4L1qL.~<@g2f*<QFs!8bO~3ֿ)\BR7^,Yk]7xmkD?q ??`~f#FWY ̕:i@aEtIם ǓWi\X$Qt=5f9j?ʴe Y4UW Qō8!34-|FJB?ǸTŤ^H&QM?[Ë{}Jv?% QLkD;aƙQs#D(~$?d UI2=GV#=Tr+nnEc9my"p+30)q^pLqCg1}Rj66+?Sbho5q?wfGY`f4r!cZNW sF H^)-nv<馰I%{ lj&[߶ W/ܳ}x},ki'lE`'2eSxXKn[=;Lln})7Ryrc&v|K7ҞbϷ{x"u='7ooraٶ'-^3s#Q.Nepdz o^fzP;},AG{|.zW oʙ 4g(Bj%ONhն|ks~R#s$PxScn{kˏݽ޼T<ބ LJjR!9>O.L6K #&3wѾ2S{+/Raz\aǺX-g{QH;+T(qJ%HܷzjeUzqatSGtkYs`/&pqn)gOrZ"h+y;o2ў4=DNSa&4G)ԐT]Gj}("ޘW_Jj)aBs>X8쌰 ҴE7̎d֦"*gآ!h{~-̉U;if!xXP8lܳA;vk ^yVՋrc<k_WkJ.p+IUr]Bja&vbNSY%;@H+Zp1(a@UA8cyX#'Zn`bwE𭣡svM`APҌ`W[cV8#90Dsa/ջs_n)j^׼5BE&7͞j?Y  m&#nk&Z$ov8p3We,2 =&WtW\I"9Zйf/k:2+2EO:7a$}? VnL^WU*-њsN!WfbK}`҈508HE/tZ jt1<^ endstream endobj 844 0 obj << /Length1 1372 /Length2 5935 /Length3 0 /Length 6880 /Filter /FlateDecode >> stream xڍuTݶ-A@%Bґ.А%$A JEDtKA91#\ss~b30Rw(, T3b 0XeĹ .3DdAQ@mPDRVDJ 2"1@5'(v qmEdd]$ AqW0  C"p>(+ù {yyX'BF,㉀ uE 8"pc= A @a(8o4!PȺg_P E Q@{ G (/"C=Hs(!xXÂH_# *?eu\@ᰀ_!1}ܬ3 G{ -<%22@; sU ;( Ov@ 0 "#a8:FY/Bx>~ F~Z 17OH HKd,cEm\-=([1cϿk>?kEUG` 0 %-)77twwC].> xzCmo9ipGpPQx1 p$7@`$7e5$ a"[Y`9X.xs_u 3Q I x9OoH8 Og ڣ1_*. v a?J<0~+ֿ@x#` 4L.ܩ:RK{_Zb-%pܓu/gj܇]O3z92¿q8mݖ2G޵%w怸G3; I,Po>2IyB yl>q!.\Tpւ]Y RYpsZc-8YZS` &ZCg8#H|ƻ4< ɲHZ&:_m&GXn})L]#爠]8(S凛va#VbLj 춺g8Ј4G’g7WyH)Z$ vn+憯rǁw)e%md$"t2tթjܞwKT(]y7w{0!ט>Vxb quC 5~fҶfgwYߎkuz_<ٿ5v1vZ4[:mϧ)~x[~鞰0lFaP`y{s%I:|ڕiZxUH|V?*/}i;`R$1QKA^zCLtog;UD~+3 DEpd㧏h^@idJrM\UC4 e5k6AeLWwK`9w)B |E r!n+uw7NJUԀ4t/X 6L6 ^xV٩"j@ټ0;ŸkjXGLJ3=(N\G&7inzha?7r[:ikz|c| d#q2|PPgmKqS%PDYٯ{>o={1)]="&njyXE`9P^xN(e?>ޕ}@:G&*9rd٧Z6'b-*]m(GʱCИa `rv* RYelptcq>2h?|wBuuZT!<,z,w5IGj'ƒ*˟Oi8fsNCzorIw.`gd؟Kx^x0#ye)p6yIʗ4?{~rKkG#4 Gdn>y,ȼa<AҾ4PN""1 7/JI딖a f&l^- &v^^ao@ug(3$#5#x ;X{O>}:Ktxqqc Ng)6gAKig/+޾~c9ψw7A`P "E] nS̴SPTb)sc,RG0ϟGd6M~䗆(o:0X BEO>ȯ fMtCdh킻 `"y'*f:DflYd&eK.a Ob]^}2jD;"޴&:<ǛTnupEWf5³ &N9)+yi+Jn+d~= .-1桽έhetn~Z^ƒcXi_x-0=آKCIQ秛ȟĂmHnEOZd08vwvxg "Y;#6>ݲ&8a_bEvYi:,$#IzCmַ  acx9R]4naK %nS nQ'}o{uyKCiqő($I_c gng^ËՏ-'8Pzf&I.1 LRV,xF( ܋^R;}OX5s#(|ijCf&{=ɅĪx bO 5[2 !PǍD5=3eXUhRqS3g;j T PV3Q֟}+mфC#-_GFoQ;e:GuҢW!{YɶZ8n6#gَVe[<5߼S.%gpg'sPpH)TR{ )h/|/xEY'Q2n?իo#|$%um%=K_'S_v4ײyE8+m~!q(O4Uԍ~a a{ RYd]~S.(@d Of.AblMJ]fԗo7Ǐ]b5?i,/HH|ꄻ^ Xtl0ZЖQ$KC{kĨUqfb7Iv7}|j3VY9>#rUw{bmYˢ\8Lo-Y#yH Ѽtӿlx8cXl MN~e˛{r]^UҤb6`Lg.Okx1^|? Hm!UJtkѠu@RdavK"n,qqg1O̸.mSM#]ܛk="Z$IAua ( nl: ˯|b~(v:S4JigS 0b,ktm73%`SPF~F$ImtV:"3I˔ {0kHmťQ1QMsɬvEaRTE|!v//ˆvGEZ]U(*b P[9ZTu EݥTdU{$/Ȗ~!۞WLv}J&hݺ}N`+<`vsrN])AU0fv_Umۓn1c=3Y*ȼ [G^#Җ~|[,ּďpԖwZku>yIEmvc*|7tAZ#6qrxYh%Y ǜGqAzؐrHɢkWL%Qg (?"XۤY}՛y۷%M\ ٍizGTok95<[پԚSLB8*%yy#vm2,]Ֆޫ`Ik7,/*d~`N~D9IP|›<x'k"U q^C%t7J Wܠ/\hѩmn>ҋe{ŕOL>}7ڄ 1b!O7I0i.*'?2\E5ʰPi/U:Sv`nɋ5@OWg.4kfqqzqaEDBQR3}uPO{.pAOUћl֊J$v=g;`kՁ[p)\2'e WzVt<T' Ru endstream endobj 846 0 obj << /Length1 726 /Length2 7624 /Length3 0 /Length 8217 /Filter /FlateDecode >> stream xmReP%8 Cpw`w . Wkj T&А`gpQSK:PD  t@ #ƎF tpt[YCtm; vsp[\\Y]EX4i@5` $UTetZY ف ` ;;@,G=@IZS\FEY -ɪ) B,C. sl:{,P AcoK߰??\`vSڃtP3@ h!.q[Cڃ<_P3!VcȀ=@`-v| 9J ?V G寁Zr?2{!`@Pſ_eU G?'`\ u{ Xs#lyDF˒pf3srعxy||G3?uM ma\0&>O:oq|TqvnkmaB8WњE%03<En4K|W@UjWs#A\94eq!Ha+-RHG=ʴ {5v~ "KoDd=1X qJMdb[;LuGOC%5bk'] qeQ+{@t2LZv9W) `h%R|<$;-ڃx;zRO4<\3 G~"$ҥ-eu5atU&7D<="6GoPzq 0~5_cZǗxR`2R'(Pviѯ^m+vT<-Z7D[d)]dȦ"+wlt7UἬXQfϔN.|;4N_+ 2nU]ܬ^ UɤdŔT &|D+ȓ)Ȝ[#PSBx3Ȅf\ ?\YC{ⴤgkZsc7|Nm:VmnF*~z[* R)qāC# CTq!-Gq|Q\J1-uXIsNF{ZnZU:B\社H7lXڑIIzP(ʉ9;$v-$?!i#qH{FSis4pJC{:2+l l$X_;⥌'S7@˪f8 3q Jj9:OrEg3~ ;|E+,фJ)[z7 V`0{iEcFuKW7b~(5tiǴs dЌ$!n )lH!VŨhrhRKptFc*X`!nu% u6t~˰'&uRb FƹOoxfx'ưO8#T1\2mu$sfSCpp5*yQN ;7,eAñ jɮx9Ob \x(&QӱEYV؝&taxn_(^0qMZE"b iɏ y?xC7ajNuOaOXfF -3LӼ)ZLbPɘБhɐ>e | 7e F,;wOBQ2ڸLV--wrV#Zӳ7S%M+WX]~:cAf\+#WF ٨3 ^oWLc|ȿ: ߱\jqeJ&8Tbr)94i``%Zϫ um"|训"7U gGRy],R_KyM^k= { 싫bfbwzyÛ,FF3ܸvsD]nu/${MG)N IT/y8=%F?%s4{I ?Gt6U~}LcaJn pCVu}t(b+AM`õdw19jf(-\b-u7V:ibˇ|tͦL9ۀhnS1l~-D ;%Bz0Oc(f x*#^Cz_fD \ޠMxc#ZϨB+~dY| 9T*Rixηq“D _a:jUD ` {6K bae5Q?_.;[ •ЪNRQ&KBk_WIضb` zd<K?OhjoX@nk7V[׵ZmI(RyR}/d ؐR8DwkW?V"KZ &ڄ嘅&oy7V~~~+Mq5z2_Dk]+aJ"aЛs_,M)3VqP$Jl׭JHPxK|n]w`ͅE;$g;.:E/:+Tz*@^kNz2ݭbgWm2 pU~&S F; Ę7~7v8_=U2ÐeB{>zpɸVfBEB79(Wеʸ ~AOM 5uWu(7)A`Z8-efP B)qh=Q :~ ywP~aS9BIRֲAq-|6ΩZ״i8|=w Pla0n9W&θWV%+Ww^ QZR:tom^Xެ4'ds7+ɾ$Lg:ekr#AܭOe36Ak4JWY ]lΟ2GxE8cTxŇv kloFg bvbuB^2m(qWn4Ok1üǴ-pK7;K3 ïtY47bijLx>axBiޕM%U@@O>@IAݭVOӻ]G$aJyrue:Nsu T=yYȯb{2)\K[ $7+='ݻlacq\<7巛_ng |CGǦ?'!H`ncߪaUD3Tjt/2py JXS2 훬.wjEgrϐ9B%}7c痂DŽ2>a(h.w>sx3-IfsN}xX]1r:9H7$ IVlI";IxohƼ[(pgդ0}zbo #+J37bYq^TN^"1{ג*ڂ2Ts1g^t,.O)F8dZ8Y%sa?Xn-` q6]^Ƒ{aN3lLPqYE:z5q%c_)/Q]jce Y?;Cjn3 E; `2{qF.1&0:[n\76&Y$Qs ЮO?| 9 ìIkC)<8H.߈<]ۉG169˹aJ w{3i"ȯ6/y}jԼgKnז9`=3o4?WwpH]B ,R @%' \u풒 ZO!>y5nt#Tnx\԰ERY@,dvv*^dUgιWa29|b*< @B\b 08Nݽzq=mi~#_8\p1${7hm"}|]6_"RBi2C@5hvos>M)m埔86 $rN\F 6ٚhծQ1(A\[Vp]&ܸ'Yp&czfl$@Fp_3l>X@iv-"I^~e!'9lG\/}VQg)HL~ rgu.x/HQods.X_VX0Rʿ8J? I?}$W"xϝ18AȂQ07,TKe jwb򨓆73{ 04ž#k_QB=uE- b]sפ wXWISga~ kPؕlk` >.ÓRS+3:[/7I~ѕ\]:KUaTnz/tl h:M.$lWnH{e- }ʚ͏Ν8GNR@#F\&}aPc簆%ADӡ?3;ц$Bb$&+"婃DHE6;9l1d1  'Jd:G(jn ˮ^|&7$lM0~74#VU 4&Ҏ 6p"*H-(Y2PFAmCB7r9 ο~U6K=+KI"l,83ߑ8>Pc.eT]@KmYP;a%ܑY<d-Ҏ;Av*K⡊$uՌsߪOc+xr;Ҧ*q eyٔ~T CAm3̱,Ʈ\=[$$:o a.Qx6IԳ,Y*רs_)MVYVp$#~=';\Q y40 7Onc=ĽLyl @;~V$dmZ)O%p v gJu85ZdׅU~",zS?epu36Ɍeq; ^h̖"#ku(7O x vn,@Ljݝn9ѯܻ\7upO5C#>E"&to%+\%R.W>^ !n'*Հ-[ 둟^Hq3=h/FOhu%/khk?ՇT e V_͵{X:*bf:j_ 2x#T6<jGM@q'oǁL41$fsK9^v%m}[hjy#gu^gn3фC˻͙ۗ7P43VD<,_Y c\mF- Cሥn:@:n ,FiS?s!h_iqb8M*}S呂mg7[#l響":`ly2K#_L۸bmcV.,f$'ט!._M>J4*2M&lX#+cd @O zX9ΙlNԳ*P 4#^۪ĴZô̲S a9L@s9YI?#X(3Ԋʕ)&hAg8,A*ױw~k@a0ޙr ҵjgöh"[6 {РV0:}HyNB`Vfv>N6|+)M+YEsҩ!#fX7(@|F>a6\y%5yJqiH#q?fΰ x--\(vJF+H*:-}z yQ5G|B)#+6 ${ft/-vpݸކH΃-+4]qHÃjhoUQP]8*E50u!K^? ~bOu)蠭oEX7 >q|t.YAڸ=fE{qN3~814VU,1Iʬ6Q]~Ns1܌9u\Ň${ LqJt2]3UlH;Qj f :M˒+' ;1R+.~kq\Z_AG)woPW@+#UR)G)(:9NBOj.p`eK_;STV:_Q/&ƸRExXXc# Pwc+i#XR`{ `l!Ob}4|1kdL|]o& OL[o mGO.N4<دqjQ,0SZc`X7b.3*[t.ud1L$1b?44wm4Ǭg7jlw`VdlV(m ?F0&nb>/3 endstream endobj 848 0 obj << /Length1 738 /Length2 19105 /Length3 0 /Length 19703 /Filter /FlateDecode >> stream xlcp].'m۶Ķmwl۶mu̎3sfNZ׺Ɉ\T=LtLqM&F.6#= N T&f3##@@iBo#@ nocfibusstsvwr'IXf6@RB^ :]m,M&@;g  ``bogj/L`tr-@NLUH\A^ & *03JnO4_\'O2?$z8&& hni/ޤ1: ( ` 4'Fupu:MNv#;q4ۘ<\v@&Zx)3L,-=.ofdkk3O?cPVSVF-fgbojigPq\#'V4: u9#'K#_習cfб08\L\W''˿'O/Y@n}ބ'*%OhfnPf tuF吅ν*N@c]fbxɩ$~5oJض&7w.ASٹ5Ҏ|?%b :!wIPQdiT#Y^ 쯃!6(,t8 z`lڸE_$yr^` EHȆ 2(IuMr̙H0[`*$̉ǎZЁL`JQbY;U:on*"РFg6*{| ȦM vR/L:%nyv1P`5h=i 6Sy!3֥& O h%3O^C F\8[=Eet & ,&{_`䋔nT'Wq+A: op$G #=Bb\i\Z|K\6g P_Vf@Ǩ~Hn/z{xvu6/:5Ih]b<-p,s$.]# cdތ<)aSf N΋_^@ػt/t\ni80#\@`-Ї[Qr(\#_ɐr4M&i{qs-B/ yC&O?T꼃@ :znYP1~q9#.|h_mguUZRF)7#"Y[:08ݧ7^.QHmSpРةJ!k^>x1cIh'f9G[6({*&[%FOONu뷶9iL` ٲ7pkk6w޶jFzbS՞G $%+2TTmVuzLo;pt$ŮFRLoYԩ#]%UO[d-j.Ӳ Rirڻ@׶)͵$c!cUjrӏbQ .zHk+74;w9^_;ߤ1/*[ $+_Qx?5e9MNZ h6Hz iP";P'ZOohӜJ;M VFkHRTVg[qz0e-Cj{cwx% aIȓ[PS5d>2GP[>'%6 RH "iH4%T{z@a{lnj j4Wh;Fxqw; U%Ob #8 F "dJvő̦'@tZ6h'{E&BHw E5,H"Ң/ |1X)ꘄo꩎hW҅&w. SvY>tt&VU?icA%/cף'w41g Ǘݪ[^Z| |g >Hl@[V8^Yw%(K[q>N`n#룽߰9^>$:Mht V}٧?H4 Mc$m":$ݮOk,XC^MP+a=#3&BO 7.2;9&Z !#~6[4I4VYCT )&3 VXn)+uzŽyXs&v`ku -dڗ-0A:h6ea 5IDT|;?|)l j4d& ֛&2oOlR|a$2?iq emr4yqҌ:5MǍjcrY! 6hg#UR5QSrU a[R(`?o:0p=<3ꣵwӴ%IG5Xyo63`aa}A} bƺ2' o:Hq՘7!/MPZ > h on %woPDr3;&lEU[Z10ګf( ;PPPޖ3-'/fi/]SJ#!!wpAę7ΎO˜!Ѻ^_B`r9cqKTd3@`sPf}LCJV[̱7/{ ;']Zs͐/v ^J;Hzgʐ,}[1MUѕF@-S|WkkN 9|HI6FDCqS q|L2FH) yQc3SI]eӧPpQ&s;)Ov"#xwƜ/_͎pKE&9ur{~4񼈑9%~Hߐx} )1PlCs dL|QlfCa7(7U 4 eMOjQ}#)C^CוfޣR=.>H"*A~T;:Yn>N Ȫ'ȏlMۦh"dlޚz4Ti'X!H1>TŎsw+EOw>FM&m<<,oOb#KI6RdFdMljYk!9wp|g7u6Y'.V;ea S3Nc tת'~/@~߷ggJ̧L ڃ'N1ۦM4KNqd\N !q)́d {V#rdUY8Lv*I&݄HGfEQr[{v-.nךZ詪]Ӱ2 z_D2&a {A}DQJAWQw v'!?[ 2sh7% np_hYUBxޔD@LL-\ !=֖-UrV 1jg;G{OuS$AS 1wѮ `fk Ç:jq<)J F,Jy&/xm"Řa7k˖#]ِcZ*(}aiU6m >Frg[430;]׷{{?O?iiN[[:8hRmrߙQV^ϒfmJ!"?KlODix%vůo\o*9_[if'E`M5 Ov8>Gm}=% E]Q/ E7loV5 qi /ːʎ:)>PL*Æ/u<sGԥg*G=mT )XG6exbKpݫfX]- J.7 N tGh)NЪ{eaF)>GM{vRBr_8:E'j⿠DT$IOhWp{/%7-̼P0lu|"`BM(jx2s&Ûbb4dPZDbӓfڄp@ ]Olʢo7 &?9%ʰqأCne`&tzywI7#{9 ;^61] C oOjT w7Z"o1-"1B2ÿIfr\~c"и("5Xp|̺'T1-pm n3Soщ3㏋џC^)6KcFwC;C_z3Ɂ?e{ABb栙(4ޏb4 $۝FX!)xfq#"A@Dt+F3E[(oI (Ēp1\F8T0E $> @B0hY,Wu+P'Nv[ET^^6Q~Tc򹜅+D6(Dk>?eƏNa1sxv^49Mr&mXג[ѩa)y=cL^ZCq3َ F&d"aޠ BjhRKfqQW\WFe^^=LU쉔\hd /ߥD^4ߧegʖ1BKHb#*cGG|xkksAv=ZX3vj7߆jzJrxFQvnyF޸ MN̰|CARY%+؛8> /b2k(G(3Ac9{p)0CQ̨T0sN6xrDwG%6  rVا+6]?':\LQ28r1r::TKs Qߊp"M+7ҩ3kLiu8#֣KWieGVveK$w>pO7vkxeɛ߇_SX m-dCbՄ[Ҿ_j(;RNwoh̛{PP $>4,~mDXQs7Xx Qry^ي[m3!*{B󂗩DkEv*m^ʲN*Q*>=dw9[/7~ҚyDJY`=c^pM4^;XKm6&&罫/mwurDFSr}I`oBOxHyd'`wGm˞KҪ{_>I]D (^qk#=T)ƸbHPP(YK?mp 8⠻_c~ {>kަZorRBiJԲcX]M=uVmx:7׮ ;Up)q04ߢG֡ ksr1H5[F{$+Xar\ baJM`' ګ%^xbN弯=kdgv6>+>N6^k}5P,Mt׶Ë zv".ZLֻg__\!%j@;uR9竪_&H0d[Ͳ̘}_.ؑ%"* \6OB~p-x`!`اY$F1P Ge{i!K֦?;J&nVHR&*ҋ"-6_":[g^`F>LڦO|vZ)6ԤT%Ț=+d(5:"R6.1?զرj{d*;zYZ48+7dL V(vY el꾈V4jPM yZBMK/UtjwxKٱTLO&<Ί&XriJפּ6WM4>o%iXhcjf$vt#Ķ^|[;}orlMib6gna ԔwJd7gozk[>x1{_\hYqХ.Ť[S9h2s>`@<+ϺZ^ #$̛Sn%j2G-2Rafpᔰzs,AHq~.rcv0LR `VC0" a='RߣU9+HjRY0%_[cuB Wb"VMȖ0诪КƌnԵ[D=|R ~CBu(=oW V ^k$ Nvls_ԡ[I_?ڕĬgZ͒8_LۮB9` a+# ;hW7 &z=vf#O[[+R5i}3]urH +{`[3qf& !͊`?lCMl]8<qBҨPtXja+FYl"KkAgrA^JƤ w0Ynkߚ{Jr/W:{O(Hb/# &v[Rd\]ZS-I!- J5Ӗ@f;r`zY/BYWnWZ8gcX)e*u:UR6=eZruFw|zl]ՠ@=>"jF/hv~O 3=I$f5t" (`"vK#%kmMWd{>KcAMޞf<1V .TzGz{a{(NO?xkF̻Ca+m@vDKRyB-: M2 omΒr@;T?@i6\(קּ⁂]zt=Vm5{ɼOl|SWaBͣT,!53Bpj[Cvޢ%32FJV?iB }ՒA& dN%^ ւtȠŽ/[5wy\u`%ꓤ븉MU!tl=$؝dԳJNЖ^Ggn]~rUn~ӗh͖҇'gLՌ&e]1#esb~'G^6HVMH^ 5lZjkc~ ZVuN5)0ԓ,t'34h|EmʹaB <(Hjgr.@?P> S0(/jq,k]g>+x(H-ħ},NWFw>jc?}ڟvP oc(M91 -ۮuŖ"[ՔRw$IB; "_T5f۫2. ECC]$0/,b mᇄ\aXcUڿ=R9p tL :pJd}D$?X yJd=zߜD?_l\{6j lق.mf&G@ #WJbbc*:S{0T\sG{L>5z(kʾ?#-wsm+ Nx65h%oB ,ʤV5IޛeT&qHr,ѥ%~_5Α7ikfUt;9Dj\cV4cهڞ8w2JS:GN%$C.Ԛ>-&iIgc(!`=0&aL|%y bXCCN41P F sW,r{ia"}ҫئ&*9;hfQy92la󏍴3;OStq(|$,+.|dSZCNӿ h7){n`A[KbT}.@u.Q/vHvI:뱂1@0(!]KL܅o1v[Gq}PL-yW̖Ps~cFLiDqjmOНƂ+* ڤ:Fצ Bv%,vr A7VϮ"K'y{@ɜnp2[`-*{GjAX>:Ūmfy<;G\;/G]J {835?g9{>ˑTj1FhQ,D_unU[}81&%{)/;y{bH X,Eg:B# Ӻc+6,,.<&caQ.mv=h;LQc>c];}@!ž9p&*{wl 4ݞc7*mfvIUj\:p2U' ÷ *UNX#))F$Vz~TKK˂F S$ܳ|pvl`87A+F*:e%26m!"ţ16{[?*_sm&}G/F=$3 ni~7p@*[p\w3,/O!]ˬnl91tdg89TWF7P=a'o׀nC.z* R  חSI@ $arT\¹z."iQ~e>k]m'En>a]P"9aE3kFנ]U%:Lhv|4FRoTy&*z U<2W7T÷3fI2wKp]S&17ォɥ@q]<䓍ߎfS݃PQ]/%qH귚fTʜKHEOl;AzBrIfQƳt7k:-¡)Buf#(fA:jGeذubW )~#Oxݦ4vwݐuVk[>iN.X[X&͛.]sw>[ Dժ;U{al'E0ٵJ+\s DF3VhZ[ :L H!l-_ND s^7e@^a*؍|ҬA}LH::R.E$*%*.X\(پx31:%3TYioq{ ͦ =£j6%r\^*~U" ylْ 9>]w {]]:+ =3!x/?sCZ4 u<%D;#ti'^,+y]ocX/@+4 NJjCb:45[=,P=gC0!-rd$O 9a!k}[5E걔k`5>(8۫ks6u3V2[RI#^<̰`A%/l[: ?M*!ޣ9c zg̶{:Vf 088peyp]8`3uEk[K0٦Фt,J*b%7./fH(dH.}$0qZZ\Xq` GM&!&70wޓDMKϯ4CslZAP=&B!_ImoorʞBkFFh|_h:,$Lhl['7[՝|JٟPGM5%-̐4P6 k`~qŻ 6yj؝6.Vp70-$s.욚0۰Ib免>ة5dpBa ^1vK-YO5Pq6%2E1Sp%QGZ}fR-TPOF0绐Z+7L8YR(,,Ӆ) Bɜe9!8{%Bg־P8RnCo+4*bNI8T6F@c'9m1Ms҇KԒޘnhOA ]}W >(l 11S~t{;@ߏO&(m|4'ʨ?۵w6fg빳xD\"{禍Xe8*CiTowX8 n音,t' `Yh)!pOpvII3} qygKI&4l3^'U.M:' 3]x4bźy. ǡ 0 !-O&lQ40\,}lzԾ7=ma;7GyFBēwp媨 ֺ۟.ooI$NGuFt%GwF0-!7t27%cGΤ)z6ߺlvN~5m@2gv|=lE1˾FZ[K >,ed'b!{(Hy*C,215l}xg "m>8.bv`,mbң ϭ<$Z _(T3eX^OѝJGŀX2j+ؤئ4F9.!q%=PS12 jWyrH7i79=t /TrC3 /cwENo8Sdm$1bsdqSP]YAe ob ,Y~̃*:vI '`%mF#^/Oұb{}/d# G >͎A ӜQTSu|d2oEgaǾ)yRY86N+5|)S×:xsdFU~̑n\yc\Da*R>osF JUÔ WD ҒU`\ .AS{4ů_# v3_;-p@Ug}'?d ƝO|wҍŝ|Y(p2?|Py]XAC |}T9Gj+0yCUsD`#^9OqK*|73lĠko:*. iIYrWOF5=.&)6X~Si(GO RCYct?UԭbsKbgloР:ؿlF1h187 FCX@gr`R>K~Gc1466t0ZR%yjەvgvd&T )a㋕+ 'joy_jޣRJE񍖭Ԩ\`oc#;Pbi/5[S!dcV&jy)`m?V6gR>K^#l&rb3W.5P`5eᖓ*/O1A`pQz~+R_8@H}vL |UxV"ݫè @J^~ʕ.uO|w*Tvhe1spggS"-z*q)(Nh ayƒ01ǀBhų:W=C? (:JFvIW QWJ*[lvdA;=u؉q!E/T>roMzot10)ʷ[ )0-B.s?l n"ND4yVfYbӷ z [ͧ6N<@I>'f87*cszv<W!x*uz0ؐ v8j֝\&:yc`\pR ^R憮;VLbӤ!jUOirX2 eP# n Ub}_#a0(nLcoT)Bj Gg5/>h^y/FGo<"}S4TɦR 'D~|{Ce\`JTZ Ըp8 DAt?9hk›"H8 4ի /C89?g~AI(h-fR[: ktSvu>oJ}""@%Ģ7(TuI>jI>o,hdKe!:GSԻ0|+5 Ol 6URDć0 y2n4S (ƞm73;VF"4IV`fxӅQ;c EV*$

5yN;Cq;o/+GMA!l60"!l`}&0, Y gaؽPZ(;ao |TΤL۩y*<5NS}H܉Q#v_4İ;`90S4<V#%'Jη/^iX}ҭz.}1K;/V ]_ͨ=08I>j>cOK~ՠ k+s*4D1\QJ ^ڂd8g)HmqvX'#k&p1muhlyqB"i7+<+U!ȝOwTl/^BYޠrn]7.ǖ rm +ﻝŰn o~åo4KܫXx˱p]|TCILXD8BA>FpVpnRW8( }2%8qr0bn/l/*'j7(&[ 0MG*)u*Fl&k= é9lh/9hZYj} qLe]^pZp`}i6(Y;FclU?sXx`$:Nc{m.R f!Vë9`sȉ3f3F7.AD[l2s25 +S+#UT+'6HW\,g x [Q2fBb&u׫+sX7`?{ġݶ 6'FTv1FD$Xwq 1t{~i&p 뇮*NKBO, XZ ?F]Ͱ n3R8gPŸg(OSbڴ0D^&C1iZ zNC%+k^.=-[ \[Ҋc!iꢵ4\X`!Axmjȋc6/@ۇhop C)QHDJyÁ>f&a+)a}J!r=0x u!ߎ}\Jug@ڹ !|$Kh!q=y-Lak2"`Zr̙99vpac T91" }'R)N(tF70@8< hܵ!hIAhz(S]JxYu~!| J|12r̞4#녳:hG{4 sf 'HO:>U";D l`o҅cW`_k?QxhU3;> xL7ks+>iNx?Zz;ZIxo/酜 aQзdI9h}  |k^J #܈}ׄCuXA75'P|Z"Srĭ~a̦XN={Pt"g6S f 0ËS|#VwS'<?9({1[ ~ ,0xVzAo^ ੻0A(+o ',l (<\921 &9Q_^f6 V?"Fm` ar]LR,I%UyG6L1aAQ^3.z|U/'4 AЊmĊ+ѯhcA$TZM,qUX@< endstream endobj 850 0 obj << /Length1 725 /Length2 26525 /Length3 0 /Length 27074 /Filter /FlateDecode >> stream xlctn]5''Ķub۶mĶm۶mvrb޷o;j͚5֬{:+{؛021pD8 tФB&v&\5c= M p03wPQZ[X8T\-TP'y7~{Y3X,g!U.AM3N_¿d<(DT Tk{r'Ps뜍Zu]EhUKy}4]%iުU!HypMvob)Q%*¾@%XJAחb̓ͽXؠ]w]}rF-?o;3 WyQ}foߞrsL §A9 %Э79y^er-vEn|9l E@}|pB b{0 ~M&gexptFrh.zfsN*|Ai^r}s9Ef6(`Ub zN&4&-#5 \oJ^BHj]+ C~j넇.t|+8}NXz@j(Fg;%3]%>l>+_[rncj/e(YQdش G;jP`id y8CA3ӁXDc*T]+2]/#p"ijLg$Li*`OkQ4cL +pk0 ASXWwJMk2B'd ' ssS ?|L[=`jK.>Sk%4tp+15 5SҜaoS{f(YLrxMy^{W-@҇.N>)jpS{v pEҭPb-pE4ɡ dBi@gb#2M`S:9Hϫm9 :"eN;eT%IA0e۹"?XIS~`PtE) :RJd?V$0N+yT#WdHz:#p>DKB9grƝ@( e[[nRR>[r?7dLuUpgZ1N?g}@wA)b[:]*-eZkr+ޓk]]MxJ^ 6\VG|A;: ݛU?I~ҽ,TMh)y*f46gu٭5u'ɴ;牿ŖculAO'ĺ87-*gUXV-9v4Z'HK6z7!mw/=;ꪙBv{5;S+CՄ 0Vi Y֔̀yZ|oܖ59JŬ*>(N׎VN@׵n DeW#*_rlBZ|'օNkMrC,MѰЌூxQj+q4Jp0nS7]05垭~]+m-"10#Yq+׬/L(Fpc^ZLSGx9QX_5>cyQGH^|W,S{bPw}1n\%f٣Ը)Oo9RX,'ovPsb[9MtNTml9$]K TĐl,F75C& l'S3푗+XM Ĩ; RTԊZPE}-̅A+?~OW](@h*꘢i- ׇe:MC_gcֱ͸%m!mZd6{+}r^29U$ϮԄE}&4ΧU\pqLw. ސH-B=AQTi((79VW|0h5:e?D=ܿ^T, #uUĮV/Tن=#@*0"x.7Fp&߳%ʙyĭFS }@!wWGLL0^zjU)sH=‚%4a)BYy-.j$ 򭇕%m@Z32Gc ?GHgZHH^oKL],O[F+,.g2760A9ӏxں;Y\VB~D*7".4[1\+HFuN t+`ഠm8U3@e;*\+}1 9$/a[A_ݮd:s;^~Aٷw 4U/A4fJ-R<31 ij̋}K7ȡ@-_ߏdRIiFe 2+˾L+QvBe$,XajXh[F1qG/4p/CȆ8W t˒V^vMcp6@ xq6 *ņq75cOWFT:x6)g*6wkꮳ,_boxjs$1ҟ.ZG~ ؎%{gx1r'? bO6-eO@v7'^5MS~P@NJ妷SH{8S@l߭Pen rfdJMz2 sKg( >bL\g?-|ۚa7M`L{n2LGPP]>+AK6b>*F`'_l}yLn^,Dgi_nGJCs$'s ?i3!4kk3]L2LFM3NB |'J6gώ53niQOFA]?m^\"/$E=r/W\x00Mh.8i)K~#tھz$J牣A3$z7PW"|}`> `@AvF:'Gc<.\ȡ䝡]C KITR]C,9`" ĞJm7g7RmJiÎ$3Ԇ|JK+c͹N㜄 7.&*0ϥym9hW<"ƻ3 n!s3w;%\Y k &˻{l6c\-T0]/:F)'#2r9R5J5"&f70фoJK2Ak .7檮lX wqMa_LKAft 03:XB/l+Q ޽0>0]L'ۂν*vGH5gJyz{:d$mA-#ӷK\n9mа6X K#s&HHyǺDZYeK戎s1 4bI{(co}W^!vrS)x L$X!_D\gDr J5'.apn2!e|6,jFѡWZ2Ɗ=*=2l! $'P%s8,5xv`s'D ;gEMj DKS/G'QZ-+iT T ;""4pp]LtfIҽUTqO$Yc Nֲ|b2du5ʊйKAE Ulzʶ +f%_ÈG [!ؼ sj^EL"F0"wfVP+n1g,~b#8r%eW*HM؃) ig(pe<&L䟺W4 gwܨfqd"6PS$^Rrla0o%lT,.n0"TK.3;yyY nnL)M{D3\\wn9ɾ]M9JYXs9N aX7>e+K:1+> Q1D+ ?{>,V;Twx e8ˠA9g H bR*I~T}ڄ;&6KbP; &c5N伀u6ZOYv&dE~i ,+ /?L6B )})h|h˹涝$)%O IDˊJv x#v#},En\:H ~OO׮UEڗ.zY(G2a$ILWFlW@` ;B`Ȧs$pj;5Θ]~sq%Mٗ}F̒t?C]i-V?V0oZA:KyҁaFJށ5!EQ܍;Ex:H-v<˙ 22xCm(M!mj154ù`o3=g w'<5$g5s]`}3|%UJX=Ug>Dɑs <s1M?ԽYTDl9|LX"[^ecrS1]<|^^dp{kLVZ9:4M 6RzCC߳"ս88(YIL-.:D|~T>$OHݔ̄$lӧ-},oFr-Py"G+:x`i50puO57qJ*Ws!uxte8WwImN_~EKEA#)Sw]H-e4&%cTG*b0ZX/# ( 4G ,Ku,*>;z,K' BIRBt0K",_ \ JHiT&9/ma_f ȽIwD#N0%(CWFG#.yZ[v ΰ?_%|1ztU$2 aR$^N|aՖPe)dUk@oqn^D}PȚA#Hm-mG" 7nADk\%aCbė9WyyM^KKE @BaWAb J+  >R`( jBOFp52k^6!U8Q\[mo<Qۗ[wl-xq>n/M+B8#Taw(wrzVI~q"ф.NsQO:Vc7ZBT)ۮCuGwm{8v$\J =7&8 Q.ցZbXym  Enֳ-iR`BԲ28݀ߢXB]z r_khh *[Rr6XZC v fi͞]D;VbGwL9I/!pK2ݽ> )2Ib8LuBt="Xjs?;lWh-h;j3}v+jHSp=< W(xr10ئƑ<+`QeNJl4Lk|tܪ|'2$XR/=I.TWEg!JQ58m0/f*6,0;rZVl/? 2xĵбJQKZ_HYwїUf `bcqBΚ:8u5g􂇧X7_iT0?g 3Qv5i᥾63 krJ|zTN)]K AHOٽ.q}ziTgК$ط (%V]2J+k!=mugJH$|>ba݋b=dbR MLHN7vneP覆ť͢S@6<akSZ</S'_,\}uf+]=,#zX>cFuL^M0 ћ%wؠd En'G:% *8Ph߄`*6_M6ͯtܿMb{Z2+NȫoZƨTDObj:ex4h7WE]Ay̜^s @ŀTJo@UB3]fƏG p_YG|"m ڄЙPknz*§Z¼gn͏-V~89M~Qn,ˡ7$> hֆ1a8nZW9F_&YJvqiGxAIH`)8M#􌅆F g\r9 k gшπG?TkݑAۑ䄱P߉îEa7hN7bNo73s徶@j4ߞٻ&#͖8oSBi:Y`֍:9%#C4ʺhK^.`t@+_W;1x4Fc0(A2<7 L FS$FAxjOoEUG$,5w6V趚2D!А/;a+YabVkl_gz;0זF [ 2y)BdGz ab qM@~ɹAhBo7џm2SQ@3 t;.ŠW2D ?S}+^;uHLF15Et4inRȷe&[#˦4By =q0_\l@XaYi Z aTKeX,wRǴ(yKZ`q.DR+Ox0z/ X%G`s%$EiUô gLw8N8%VV/)!b13{s#>gSyb.ޯ 5=Μe% $􀐣Ee3 ֘lW^5I֋%tM88ka$u^Zz-$ڇpOd[:i:r'R{HQ>=6ofX"\+< cp%Vn(𲬿. s@P;/U[Џ׍tKݝa.#Z6oRy+gLHf_r}Z: i߾4m u_K9{yKLgkBl4I-Qoݫ̸F=yc,֤?*e0d,y0t@9tݖq@{kA3) )aSy[vzr2W]"fS'6V> _!P۬45o%)fAr 鶙~] V+Bwp ˧>2[X ?@Lqyn-wHo\ϊ򳃀852DD"N/^x1s(X"ވzu㤞^5TZ{dzީw1Diw bKSH8Skӱ*#8Ɂ0QeI,ʏ;_;̩DiwHc 9Rz0hvx{=!9JO_I =ڐe!״ ][:ξJTr?o^00 bw*D{؜`HGF;t5VCLK+_~]rMw~x ,Y]B/)qL]^ ɖ}8!͑7ԪOX`Rh{l3 U :thM0J3_VrC}Tđ1EMORf%AޅRye5AG< B qTHF|0 ˆ=ށl/*tK}< G1b`X sziSG'E;!Zq4Q3X'xZI5hG{r"9巴 ʇvrZvo?}~ ^JB] )XKp?c6 $ygEM;+1.+ng1*s3Zr VtDfO0DO$I1 mss@bNPi ˲g,~29xhq\fnFP9푁,[Fc0u|\ a .^ |Vq΁X˽$ @H X%R*^rZےiZǴ&2d-JEjUN"]/ |Sw$ 7PE1ZZ=ecx)֓iikP> ̚ɴϙdJlyR/wqSqYcYB`śxLɷ_9V\(Rd@uXGPGH:?9d3*,K;Ԫ}U,BpXW|MX;zd$. *8+6o:IJvDWك4d>6j3ňHM"ģ'vTh:?6DRJƽl1HٯK~[řHM<pZI{ f2CldE k5G-CT8kN,. ,SƱB]P0qKnj5p|2zVMOuHjHer"QdҨŰ%e|>Vhi윭yWWHmL ]dz{$-ĕ`?ve-O?/~f╖J0 0yow%b̬F.Ku="[l#WZ s@!xܴQwE3/ ̅5Gn< ښcpJXH-ɔF<]z ~֥~p*CLᶔ?ưL8 X75c?F:g]+?E)}_EDƄ#]҅\uK` 3JH"fP=;d.`QOGi,~>j,=m"zvL;wƧċJuM)cUq3#4\-,(OS#SȈ |3H}]"֯xAYsR3߿$xlM3εpYS%U\r΁z%iVX$m$;_ '։G۩mۑRݓ*T҈Ys+!g=qUH8Sq?HN0h<¿d | hCy09_+7`]ph./mᶿ)RK!--"CN 4L1;9'蛗 }'Ї;ŭoqT=!!v>wb'EgQκ)CrⳐQ"bH&27;SQ$ol4;A}~bZ7\bR>d \(Qjl8 .6LVd_՛Ө­;)3 *huozgnrojPkd'ߪ \%wv*n' 4w(NY nѫ91@~*?yֶs$QܔH YƄ /(e߾YAb&fN| =Y_&`^^6{;eAA5ٹF vwd?E"7BJy6q8g4$"SS7E~~ k`<5>եBwG74{bݭ 6<罄'ugKĞ7UK4Af=Kx!$ɹ~B=;!a.lfdc.;Eb4ֈZ }N@p:SiᴁUsQZ+#-VV34iud()fu#UcO azt>iW[N,~[K|D)sf2,Hq<$0KuNԀ<)볽զ(P8k2Άq6TW]OӬty+d"|@7Kl9aQ_0 ,1g4@{o{^ҫP qz B}OI,VY BP8I{ Pga8`c^NVpAkmk B&5dc YS,Rн Y6xp`+'>CbnSi]>!>uBIƒ 8~l9dOy4Όl'V<.K <<C7̞z!J||T2 > e]p#`,e9 )ΚX0`2ձc$!v bY6ƴŲ}  x Pe`>-8 ފ(l! c[M:|/ *'Y} gnKn m-[|]8{ZY( ;W"xZEp(xKu&>td.qxT >#z}/kczq5/sU+-ny'= CVe(& Y2.x, 4I 6(rula ;>tL GীrFg!:3GImzic=K={\ߥFHѲ:FߴB) OR]  8hI(.&~pz޽Nљ5\/j -u0򴨧qx~"V'[OYyI)*\#gͅɻ^vEon7g W*&aX]il#TG0i}2G+C]_W:ۜl֨WƮ<ϳo -zfv Om(FՀHtrm<!WT3&u|k-^*PY5#Ypf\Œu%ajN1Z@c 76i< B5 LM4J)pk}ꜦqEk-$PɪTY./psewx૙Ξ)' nk0F C, V\r4wb>"ҔKi^B73c;ݻz?솄+fY~#;mCwTXEJ0[E> yj< :0ZEBh;:.?@~}{-'"b9;>JD%#4-Bii#p>}mpUto(rwFx:OT_Rto -2C_DODR_5(b*~we jNF=yX,ɴ4Ad0m@pNC{C}M6` mRƫm4fsIg>=8 p?txq(1kCi]lcKsS(N߆%ߚ ONv^I^J'm 8rso#L!G.i4\Ǘ՛YƔ&sqSF7aG٘PܨpY'x*}'$)^ vľ3IGS'q ni'7N /s>#X۴0T2,1Xy}|7BKb!>8<0v\ @9rDΨ2Hk1&FmưF(pJJ #ZlNܻl! u .Mx[+@"I:PI N='[,d GK~U'3p~ Fm:e5咬dųPF2FlT@̒d6 m9VV~JC9z{s<M~@iOI@aYsP9gwQۘ¤2`-p펗1g퇃EM3~e1Ekb_y{>M{xI3[Q^ؖ>vIP@(78Ĵ-6#-_J$(/k `_/C>w%R 6S1|vIA iuPԘ6 ȴځ,tP+8kL5rbh#qH6Iy*?-rE1G}qo@ާ d떸ęƷ B*\4mĀWϚRr&W0 L*C #z0SBsn#ÍM[aTv Doĥb5tVV%ߡ|BPʤ|hz2"_ͳVkLi9;QZsզ v:Z/@=rI.SػFDAy1S BD#J8ikݵ[GetM~vٰE04I\̋G bAA}_vy$6j7AH 'He5ђٶڎ +yj|˖mĆ_ Quql̗3kWk;,ˋ}&5x]571)h_A =}N XԄ /x z_ϛI. N0CK!zMb0ĐGT9262HoA97:裠mXX]r1KHH!<].n/NY]4tH+$,Ey&]wdVxiopcwh*!5 >w W y_턘$UnEV7+bw,xA%:+گ-jt: X8RD=͜w6 OB(4KH ;IS[–b?8: {ijdZę6-N+gW/R04FQNii7kĬ; v;18lL8G.bUԸConaeLԝ̖|zdhSڥ}Vk:NԈ `c-8uuj1G_w6n8/{ @kQ8R<Z/h#5Y͗A4v_V8.`ҾmQhfug]^'5$l`rA UxX;l)|R-0W_\MX_J@5;=x!,0k3H-v WkL2&WA"5J7y  4a ye`ǹ/|z&NlC YGaШ V)L8EL/Ѓ2Y[ ttA H5AΌyN=,2c;Ix6'޳hoD/EɔV־A=r+.װ;uv +A{hV-* ^S,qZFXB|4,eHՈ.CK?r?ZzT\>l\8 _ U^n2詸R'>;s'> nXk<4ZDx ]Y(uwOy/R_k$Sގ@9G-R-a;#>b,p/4bIY̼Q[z=q g8<4+< hpI.i=:o^e/}ŽR1 -,@W◃.׉2^9=ipz!iA ".,uW}&qW\1 fKciaV<]mɷAm[5UR$4xKs)-gM.*Tz{gq\!>yG+OYB6fUjjڅ͈P{A o> S``J^N݆{(Lb\}% L²B"itIaQas~mUrHwnzTD*'1|I|, Hߪ6+mZ0TtݣC3H07RolmG:D:^3 ͙A&EY365ZQ}yM0%]AEݦD5!Ʒ$q0ĵЯ[N釉׹I@qgO+X'r1ΰ0vmٖzYìL39O0POET)3 _5r_MWhQ{U-,G?m`(as);0g݈1ًX@M?u;aƖp6,qP촞+Ys`zm>8:'WsAbă\b ߰?H$hȴ5zy))ZCE$xlx,erl!"-~xL0QMhQuѦ9f=/ɗË449~,NGљSLpwKYn<&86+ 8ߠ?Ԟ\427T:#xvTV3D"g+{_e˯[W FMKcvpm /YO$w'gX׉9ȅ{æ`F^_X[ݲ8ڵʪal]1Ȑ_N 5-2D*U ݩExǡ8^'7gPqA$uy,Ca Fd@K,Ŀ;r㤇NۺaO!E*.>ϮGB0Jhȯc)2,1$Ĭnvy-rԪ$6m*u%|ΔЉdG6g_StIb֭m$/)%$4",- 7s 6nDZ=ne YT-?{ &օ:G^4čw@Ԧ4rjZT>&,h! 6@0O(mI__\3W? 8TmtܵX~eS83#]B|Oͩ#M @n8sYU&ahkL^mi邯{mZxsj_Ҭ/O2lW2t(` ٬Ĵӥ\T;{9.jSe {_/ nÅxĄZWOU1wYDw!Z?es7~l(1F=WRA W)춸4N!KʫoX UXQ߻C&꿊LNiD>>+f~h{/s_1rvfO}A2">̄P{gB\FYNXB^0O ,Y|8xnI3P|c 9dqJ!>"cia\Dr1?o>oL8B4e}gy#Os+7gʍCFQnH {ʅMcM.~,7uo!ĢQ4@bܹA-QR& ӫN1.%I3.6h$g )Na & wP7 ̉gPwSy,LX!H<}Z@ƻҤm<&~cgDS ;lN>ci=t#ro~pYR_3IŚ"D*@S3Jk6PY\`w9;A`[z`cvuZ۹Ay/3.A2{qHiCpp܅-z1"m='@4əD Nm2o_jxJ/EeObh%ܥ(B|n~S50t6b#'ۋתҼW:GnaXOgTD )BS$ޘ+$90&KVH)\>*,o-A~t9-Å$-tA/ݦ{$1_$BS% 7ﺿov6yjJg~{Vzsmа)5" r UDӧ}a{lUrE4##M@%qj~ XZXQhșQ}??CUfh7@$go;? ̞-s1$,iO`#Y_=hay]bz-,AWtB BSH'!P=pl\}{'F)fYhgSRRlc&c=p4c.b3}+ݝ Uye\?ɰ|(5[ʖ;o+φV%΍j7?6 }RA]!>@nubԼP/0vߚX -CmD;T8d0WtbvVQbx@34$5RERm šɔZj IDUy;0\@L~D=c?Y1@~͏n?Mki [@;3*J'G'^ E=U'YbTS{.FnOP!M0q6nN=W3gGAkA>FABV $)| %fQ?$g&u>3Ru,hygIاQ_dyyz/Y1By;5Si"S`&)S\!T갋)0*fjgK#j*8Ryy&Ok$έK*#}^eViwįigBs #n`*YEg~VS{.,C#S?:!~*O wK×[ uQdϵjG%뭙ۉ%u-Qy;NyqzM_t@*]7^eo Iy`}\k'?5/͟EI>ӥMLq0@\fnRB3趡?plO` H^~;DuA:6QʢIU4=`݀Q~7#ξb`[ vY;fWU68C:j<Sv$L(l$|~𢊘YӆrF4!Lg8͆􆇛8lە{%yS˙ v1'PQk7WL3wΡ5!ylM|!5P]8F v R#o=nRj6kLwiv = 8ۼ_Ҭ~Ŋ>vg 8ZK;laQRdI/A+ >~aE[%n_+p4 kf!u S/q̓l?z{N{0C! ErB>.yɇ{TZ]x>k͸m2龳[a|臶j3Ok.,!" R5T,MZ`$z(zI=>cv~.l0?|jF澴٠K1n_C2""Ir@I7#E$ji!#+LA ,-+m,aʦȄ FfvšTm-Cf7b`W 4Cԣ%R- i!=xS%@`X&4g\Q9P;HEi+nўdlLΈ{M}놂Hl0c'GjL2xǃ1Cq:%oEAq%(R XV3W I0y! X@CohT!C@ƞˎy-+W݋:Oyǚwh):@EPhnT 4Wx#% ԶoPt(gޜĨkfÚ |DIzSZ1-*noS Lno55~rv h7C/jY,>_ p'Pi2Bve̫vep5^{my*7[HD&kGZ>d=(D.LQ~ܐ%  KsVL]ɺYd +ڠiTN qE]i!W  ok)AtY ӱgs< 3, A?܊ZsT6.6L}1]9ŴޛğxcS)0Q0e˜J>U+&_CwP"ܖ_ʣ̅ 165=.Vg ?sP1OqkW@/xkGw' %,R#T1MQ/)\Z틎VFV O|g{}*qw7r$"i ے7{üod!&1cG lj0w;3Ѧ\KeM30t4_X2aYBq>ٟrF_!_զsbR }fߨOIC46|JݽHra%+eH0]C }ֽO8cYdrTF א(lRT xW?_ȤiYp$ml\fI\QƊҭi+,͜:5,)1m n>uXb EV(e.أn;cJP5(àbHpb O:\!diЯ.!L,^>iu[-E)W=LcڏBLx1 w@=Uc]w3gj'ÇF~@;05i^㘾SOJz"E:8j;6Hs^P=M?tnnS7,Ia{nDD{iqꠢ_ Q[7-.*8.f{(T/:(@hV\L$}B˦?M {A=QyBM `in+Qj셿@  ^ m 1D(ܽTg"-2FKW5CX3`-6SYSs,~s2 \ :QNJ3Hܝk ch%?˾1?A~f%P;D-ڣr==c~?eHn;\ y B9BQCuW$RY]߀v̿;gy|C:,F28đ4dN/W%624>(Gs$NK?t`o0YЀA'sn~ ,0 \+GJxMP$HlpzM ?0aqRfy"||"[5Ol8ɉ.wCv~g>Mx?oU"k7<(8 M.;Q|mp2vо8/z#L_wG)UDj, `F!z endstream endobj 852 0 obj << /Length1 725 /Length2 22309 /Length3 0 /Length 22859 /Filter /FlateDecode >> stream xlspo-v~Ɏm۶mٱm۶mNvl;9zj3{9=WUwZDbv.*t \e1F3 ))@l`d0100<,-\Ɣj6&N5{{7Kc ++?Iʦ S)@X^ASRN@!. 73u2(Xd,MM)fN v&LovnN.3sȊ˩DUv&\65vqWu4Z6k`bi025nvf&?hH 015'F@!lob71uK{w' 5 .!hg| ,,=LM,]?F?hϪx)ы*H S 373( -A: Ʋ.Nmÿt_-`_t]L\-?djaj jolZ+Z4_ N=?kJ6{l9lat󠂓?`Ļw+Blg w25MVܷvC!\L X6%cs#Pj%+VʱY1ڠsOғ8.9J_8y# =[Rl}Š0k+amMn 8c:ON B_*X3Xe+(U Wp̓qbbۿ޺[e0mZkA^`g)x{@-~%Ϧ{6zK t:O*(i= 2Ch5-I/42l^Xа^~<.&M**̚^Ʂ[PoӍhl־31c\RDd"RS%Nޅҟ #Y?4fVADl- =(H&wYgWJ%VO1;Ƞ;.g\)Ò2DI˔ i1훱j8p켦c }MLEWE39k7Ze+IfInms&fN6ppSt}z)Ʉ*( i'IV_@81}Gf$֦ŗ,q;i\B3(mykߦ|{ٛ936}xQe>:dUFk@ؖ4~0 {Ԑ"jTGuy ˮ渨!F%/v:=Mxi6mKTŸ/ Op-;RyFߵO2289I@e q}e`>@WdE6uYhZ $%>omi4]FcYX,o_@cfÙSAH-CzLy`uřmb~bmxZ-# S%ʚJփnں5]=WP2SYÙD"8ZdTmr' \oWN||{gxq) Ov[ ,>Z#eu)kGy[I $+ bؐ>.q{=,r_hOtoH3Nڏ@ fH4T_s 6W ѐ]Qބ0N*y6?bL`+B_jR9HR(PP{{F/sZː篳WrnuP_w93i0͏駅4 "$b`oܩO@O0Ű()E^A\Xcuqg.g f,'3#Eڗ~bL n͕m9O)̜&(EvW/7{ݨŌCc`' _}?G7eZ&P! ~P ^54+ߴ3C_E,/!poKFNRXR {G@ad>&+1dm )h[\JH_ Ylqh%Dfj}V"Hiɇ!l^BhI~=΅I6wBQ :w-Ix'߾nVdOs'P'ji;JңF0> Ńh08$ C4BңI0S[uT pݭ"HMjWY&c &I?;bZ-r0ag 9Q.V5>gkZx@ꇵ6a WavO]"aRP~c|4xŒ⒨ٰAefLQ/? Va/ Ntld&^ kEoCX]ym*Z]FmDg}wo65(O~?$smooJCYGdp SWǔ4,ʱn'D#>(2d-"" r—-1fr927 Z9`@G_${uS 3깇rh*` :L(g=ZD&pbq])!L>TKFdHW⤏ڐ6|㴭^$X  RR[ԸۭrmvtiD\Cezk~O5lywcWn?hn$EK:k ӄEW kw<|Ǔ )Ðg[!ކ'8O#oV(Ǥ+ܧޗ# (({JQɕ fm|Xo|փl||@CS=Ҟ%)"PN|~TnzVWT+ @D/ v~V6v1b/ 'n @CەCV ʰؑb5LRU>aI(#chmZ-˵$ar0!KAʌ {HEň idU}Tiõ"!Gg< f\,pEɎAJq 䰼4ai0xY4v`Jx5D ~:G# &ZT;a-KMIcö$S7g0^ՅTޖx/qCV9Wv2%EZ/1P "& $%R}sl[jǁ. Пj !qg-ffg7bą6袂y+%]h J?N0q!(FLŮ>7DQ&`fZXP+4?qcƧ+`9.%Zn,:L{w)jy i-kd})#M$[S䡸F1S$*Nʭb$(Ł@'a8$1ĘT,WFwM:R\.ذh**0w`Px!1r@u 0c|/`ӄHI:kw1*1pbc7gT"8Dn@~!+%G'ġIEY{~ VÉ.I*6Ҙ9Zzɰ:s\쿷}7`辌ۛ#(Z?KQvRU]…/T)jC7s˨W*#aPb`7.$nz3GkBG38lfX)b%ƥ3ٲ*UI{8zZUܵR~ZU,pa@ |<9(}ZJ.i6>S_H}DZV9;cj-) >HZͰXr QMo> v]11BKȔ]RJG@qAhk٤L2uG*EA. Wcx C@Fd_KF}Nl8~j#/\78ӝ "#ZlV{ 0X҇ G8vi/GnM=ěL?IcuʸǺk4}DPAI|> Cb7!rVis=_[1_3a%j8=?t*y`slrT4n&Ww֤k,m}IA56 5P=zH|3>תhi=/szAaDT$M+8uEl3'{qNu9 J {&xepRk S1pAY+g\!Xd) asmLU{ X"e4" Z(oP\)ж#U1oǹZ (\+Zs~Zw] PʇjII`q0yyrؗ띄>lNJToÔ4ÀB憌LS)偤} xLO ?Z*gS]r_aOJ:•")!7۫,aU9Lး&nAXCeu  . ?/Dj9mH1a1YlbZ}@i>*;*ũZB_y>sy2K ~NgiJVy^+ݲl4RaftC ?\|J8^jN_'}[{>7L*mz+a4!r<0Ѓ]+JtFlTZ/bjL_oA3Tbweгbe?][ HX#Yل<+ު@ EFN\KM7ТfP+Tj\2=LP}m^}^"9sU&~kK+=O覝>|H2TB{)c_ Gf€sJ!TȀOHz)\ɝylM 8ƉJPRnܧS2vǙPA򖗷ti^E^nC<5Juۆ@+6#D=)--"_w8mGW$ `j]e3j.]!f竞^5b}pW*.>~,C4@:Ղ;+#6ߥ:8kj\U/Ը#F*X -UU1[u,5K.*Ua 2. na:N[2sMCY%|pח;ߞ+VZ"f|5_Y%hq {}FlIEz!a&r cSƴg짉eݎu&qYۭ bt !Z#Kۖrߗ/nz߳ T=k]mԩش,AJoEAޔl{ves)6A܇J1^<4BB$6*'o3M1t?8)گJmkgFs !IɈ1M0ͯڻ위/Jb@A"y27Õ %-iD'Ř?ЋgLTMc%vLu|~A2}%C_ s$~ٷox!%^lq􂪩m[բY6o ?{9duL[vD$a[Y -3ߵ\>>d;C/sGFA&D,e9w+IܘTSUƘZ,yn5 Ѥ10^H>斉WzR*G/Gځ%4V\T| 7;%[c '#i+VY1Q H 2C `(}ouK?꧚QF4~;2`O?^^Las,N*`27 wE@-}J5݅: 4.넭.W3Y!ƉNf^:Ni{8(֨ xa?8`&wGǮ:CE@q~~,oJ2D>F94P#]"Sx十n.*(o3-,7NV;Vzl8sۑ "\LXxY3 =IBm+hN;7q8~m:h`D avnY/աUBP0e\;n KrC@tA~n{%Fp/6& >4oq27/ι5VpHi))tU 1 %kf-uohmL}QYUZ],cYNT69v$V ܂@n׍rgPݮŮw*;`tKu1XȺD{\`(]N0 0EoocL]pd]QDAw=Z1YS <YsK%{{Q}JXAX1ru9q?`T\qb2n њ@$̢-J"}{Qab^*50D[nuAj7BycF4ƁD/*.{tN@`3spFn(5+麒}}$}$1@Fe ;w#KK߃j:S~Qt{֚ vB|T(1ұ^ yF?O[~LbJ:3 TGE]L shh3** GWTCl;mhrX©XvaS#N: ӁE iOX An_žy!!4eP). &ZwtA NuR?rPZ'9ÕSSkd%%`Td}}g{2-_ޮOVx`8S0&Ŧ-m􄊨wX]g{> (}Z+I};O虡~-a~m G fQeS˓e/;)R5ȨM%c&nj+yȳɈ;ΛJJfӢ QlG XEwV$qGsp:)8 5BqKF2&y?/+/#eN $:;u*a< mYѹ+P ,#TıLˊr4^d%abXjKѕlԿv(;6Y8u6|;e12+ D: { jd+6i/}ճ޹е+RAb4F:gN@O4]΅p-T Dq cc~J'/oHRLvKV`*s]6D(ڕzu_=D(D_zDZT/\ڐJ֨IN4ӊJ=,i,?923Szhv8_{U+\Eo?i1eѬ$pVy54ڿ%4DQhn,r)[NmC c:eDB WgUBmř9uHo;BƮ6T9lAN!#( '6u^j"4J<h])P*?uքvc^i*ʏu 0HUð&WRV[,D1;o)k$Ȓ [3lc"xbƷjQ P ~h}5uǚvC.|gr\%F1gmyre{| [ٮQcN`D=E*+<}[MU]"~YWI=ǎeMiワIl:s0!L11O$9yzGTSQy^ORbzrLē/,H8.# #HP1Ojl< @B2]1׃1W;OlC텦 E<7ڏ6[uxDz jqs2R@lƪ`/ה7dVGm ߴMد)ZtSXfdoG/) JכaPK#*^r"%er/JFbk#5}R 7yC\x^N¢ԃ&`릍4wH;]`-@z~~ZOWO3_4na@qa96aCPԪ=6S''V{[[NOrNv+|NQA'r`GC 9BG8̃gQ FTc=8>ی6`?8Of$F3~oIu|)s键"Dzr&jIp O@)WtH;5-r[^s+0 ς2V$f݃QYAA[YAYx}0vLmdS#\2JQŗ g/Ȇ'J h+tտr1r%{wI?6)/`v5 OK)~WFyؼ(:|R+.M; ,/dۀȌ+xrbtr1WNɈPV>SSc dܗ|3 Y݉p> ^+myq* m8׺ ؇al"#y|]j*1g̨FgHo,e;MX^He~C*ȔH T>vؖ"WOA0͸|%;qܽ*6j[" ci&5#2Nb|K[&Lhݚd q՜AշP=0=fO7 {A-]kGy9:`ӸEھz8%4y =*O Gz 7X{e\Km* A"y:rhzkATo*f͑XgmO> hy@ǤvU)虋0 ]fkIkzz IgE25]߷97/gއ\{m q/ł5%1**2(qeȚ o+Jljy{|nNJgUٙjB+&e.+h־27P=Dͺ +N2 ʩ+qf˖91ozMCQ{\W#*-]gUqn|7WQ;!xZv%(HU[{ܴߪl-h6nbUe!j`8]⎼^%+x[;J )'Tɑ.f<+صyDt }#76·pwTd-&CZjPlYd&-tMyB60:3̌"6^Tٜ ΥE|Au DƾBoS{eERώuhFJ@w;Mr-N PSQj.!gq_KqIT" 8@-הN{r,fre`$hh}[*GmyP[%hM[ Ͼ )d炟$ dt?!8eqn0q4 hAU Z =QdXALVm`(ĕ3-`IʮƎe'faOSI=SqA8 }5lTmSm% n>?NΧm)Nȝ.RZ'Jm7(6*pՆ0r2zLVwWʨ'0Hᇪ͢(1 雒=3z"bv`4˿FlсJA;Pvq(@%:T'׾Uzk).5[QEZ2߫wҹ,Eٶ.ǼoL qUt=L*\ !YJ̗Ϭa<Ԝ4 9&RA^?xYUw}@.~9<"uhD*q>nDAo8 &8*U(q`)dƞR$J/}#ݐ<.v&Y˨Y>(NgFsDKwSB$Xަ{0?^Yf_|"j>H-mMPm~wSY t(jd@M ԽPk 850\pWmEWo{78H\FJ01lW@V8kK5`:N -7 3C˅YBF T *m tYɼ`ũeƘ\4` k\¯w'85_VݕnXS3C [#pNF&xf2CB~ ٪uvu(KQ4-C{Ots¥-g4Wh">6|JQ/UCPkW?`D+O|~;<8]6,A|,4LU4PMz`7Ya*-]KH^Sf9ȣnx%pÇVZƾw F<W0&v='83r8irlopU`45kO-u187byd`*iK#. A.r] .^ .Z \֐Q!XgUvJ}I^r_rjZ qN =@)o&L #,,A޹ {=S[ JUu|[#W3bh'z Q=4ЩYRő.qCN_vʛ[.w*0 UZ\j*>tWaU=f+xb:Z[(lQ-# pbbJcy1- YQ Y.Fq3 8"p*^J@EzzX̃ Le ޷+I>N)dC`ѻJ R!?8 'Ihk[`=(^lIm%DG3A.ŒkGbmQ8~).B=kpxظ{J,gg#аuV[e>rxx)#=b~ADo }ȔaN e0)9Mr}9&[ ɦB v&űZlQgyq&^"a]qpQm9kH8D ;q5K-#_&, z2q(K-R+y0z\%{3ZdIN7[JЫ]~hpQ8XDZ %e _>X3?Z:Lc4Jf)ˡfQ,P P# A~Xҡ;IRqHV|"ռ6z5It=NDvZVF ~׻t<8|0 Jp:OCj*yٹ~$Zf,I3ƻi#=C4! 'TEϷ Jt/ug7vzA%#ȩA;uT\s=q;Qy`FQý4^ f"K._J@(*l3P"}FO@'jo}Ma;r1klFDFV0t!i>,5S㠌^I*-wlއ`o?+xNiFŤYA% ]f h3Pr ŝ&fOs |.&kx=?}t }pʹAhg̩v7ChsS>o(e|r5|7* D iv|90VAuL=:Lqt'Ʈ|yo x_cDD"=>_8t E}_%׊%؞P. p4UG#8GO7xrA6)hgX0\Dޒ؍wE ו6/BGr$9w r:S:qdIX.@Peq%+~Vc`x"U6$7.V9.Rre?v3( r^|isɪ<,úJɃ[4*AO:\+*6@ @b(cO%٨&i˜~ g.\*}̞ߖDz֚"z)r)?HDf I. 䀛NoSG8|ݠycix~ Kcn WnY~+ Sڒ~+hQ1Eaۦ{;S@'j\;e$1?#cF;^V_}>_u<7ȐSF㍁Ish~nb ^Цbq\.]yVμ5wPK'Mvz7HL씼o)>(@1~8,XϘ;iP4X1=$J|-S8%NiYA?ΎxD=YZiL}rxUY/'!T=2n< EeE6Ձal7m.xv"oLR[qU͝2uI56)f7xZ3gV R]&W{:c%kY әF7ћEahR@Άr{ʔM]g!0=/٫MZOw{}mldH,ο;gH y6n'ki6R,Y^p|Ȭ>oi`JJ_vk^_FL+ȧn{GB {BԺCfStxn7,5Xo&<%q$Lw4+Nу^fiO[ܞXFG_&3W@:}"p}D qJ09{lo.P-1e+ jqSD7,WP8<;=($>aO.\"rZ 7[0Vڃ XLjQSlh[+P18Z}yQ3>M #Rk`Hh !3!B2{{!a⤴_s܄+V͢2nx[ؐҀAp}3_<t=/FE @VFdS$_IJ8 ζ T\Ӡž8}Z=Q MAҍw BD#xHRx80V~]c3y |b/ӱbN]*2jZ.J@'ZO9s#U ֢ɴDavg(M8rpW4:hG _u*Ib 4gCH]O0Be*+! >Dע6&K}@`|(,ڳ6||@P~edH-N#^]^{Rot+d5P@"j]Q>²?P?)q$;-OQ~MGEf(VUn=K}K8i10˔ۼ-9)gbGsz5?}FKw\}qӞa]kG.oLjGcH7En1GΘQy}m>T6 *ᢖ@1J؛.e x*V$_b0|U]͎zNOn,hD5G}+$OR=!{^Bí=Uky2/]p~ta1l |i5r3A:1Rح\Z#J|95@ ѓ/nmB&K44-9fؒF i퐠ִ&1=GqU;$W٫MGoҷarnš`j>gu 67(Пs`.{N@%'.3ݽy4n"ES*5.}ϡSst"TE X*qҊh S\ѣTD8]6̖~ZƦ ~Uh>ecGk<=U݌9@'V+?EvFM/-,E`8 Xw6Y׊[o:H@p'^^OLD_b~ʫB\x"l+Wp"O#Orɋ紟1A"St,~"]2ltmRdJ.O=mr Mszmvcu'ueۤ5M* %l 썪25U 9Fӭ=I<+fC `Ktea7DjA *_7]|Ñ+L_⑖F9ݿFS`U?(Z^:[5frU7? J,J9Q&3r6Ƈ;6ȫ ? (}q7=҆Cjsa]uoxUr#px ?W?H9e %,D@0=pyxu; SٴPԹF3/a/$ 1 f}.6D"_ĽǕ 5!5?t}IמqDyt.B̌cز cܵ!o5g5Uu~rf>nQYq(wc_᭗%d ℑ2[fs f%P1gcxS!pmM@y KqI<};Ch[h#x̭cj 5(Pok9B>xKvиz͎;r C_BotBU4ur Оo8I ΓDo'܆DA! H@TLtQ9WIr.|lʲtL[n;KvlOQk#M@&JC$.}Lx՝y#s/z SF$2@Б}$aOi@[#Gɪr 1駟s|7X/5|9AuY*Y"H1ߛll@z=s+kK1lNj ju3|&s?j,J҉;Ɯ b*WT?HtΣ}G^ Ly?t8mdKeJ 8=;T݂]U(QMذS|5_5V= |s>(r!"%+j6\qemD# o=¾(л!wLFg<5DO` ܨ"iu{Ԅ .D&t^ܿW؇d۶A=rՖe^~zGB$h>QJY_Ӥ2dPR|xE}4[MYgӋVsP6O\©tDy|>~IR9ª-J vy*6^E{^>"|8x# ib#Mm'Q}+Pу^TJ8g}, ٴUٳT"QǮ1L`8 =9@uYn":@pEmx\KyK8} hƮc;D䘛kÜDM6᯵y:DyCc y@1#Խs-S}}:5ҧil} M6qٸl~{R]F.c}uݍ'Neʣ%DJu? Dk.oҶUeNyh|Q)Z]Ⱦ|u}JG.E˶9Nes2Ao⏈u2Z_0tw:fևDQ~^ ΰ;n-Aゼ;GXU ƻyPik|;jKhq(Y^I^KJ7Ē.mѡL '4! 3B.v/f̅Y{O,d,-bB e>pOnŗ*M0W̐@OP# .uc+,%z.uỵ:2 {$f5*"J'4Fım GJK~gڂ]ϿqC+A\{w ׃gd(iGpU^Ŕy\qT.AmYnrf08i{85*?կaw1f+ԗ7u6Kjhe\pJzu91ۗ dE9dt: Z`9?4d-2 infY eg;D1J]^.]m^GEgd$0qc"P!8RxL28fWgnnLNG QnW%EvcheG]>:R{C*aMKtFU3Íi-X7;Gcw0)"dg,P*5KxyS(WM%  !5J}BQ 'iw[+3Aћ kTNXJdU~EPp\#()M[|>BOXXAjg\q%g pL#Q1V"IxbԈk*)n-_V>ct΁e9-[> stream xlcn%ZeOٶm۶m6vٵe۶m>_X2Gf9fFs-RB1{;OSFZF:. #'+TNŔ njP6u02IN. c5CK[K'Mٕɕ"eSS) ,))'Sڙ:\l,2ƦvΦ3{'89?̜m*br*Qaza @Fr;gM]\ ,,:hFF _Iڙ:7!(ѐ`bjO)B kobdN M5& I3G>@b& .jX?{#kjbjr4gWMl<׊Eըg373(+Zg`߾@_#翖eb`21Y ]cW''S;O/=Lk!V-a~EԳˌ+S7H&W*x3F;D1د%ĕ~5[oz`jXr`7w.kA T,ٹ5ReH1V)uɑaMg-B _@waoH- +õ ꯬5;%0Dȶ*7ﯘ~el .<*œYQze<ܝq{VW!w p&ݖ%b9]^rq[UG«*lɜ7-/¶bFJK$q!G]qVy81疩QEV֗_9-Bp׸ϣ^\ Н21q w,geRwyBĚ*'dXXkLݪ6ܫ~;j1UY{-B iWv G@?~Y7e+;L?It&CУ];;.U"tuD](]i;D827EBƍ`Fߑbu4%VkWljlCڬQ"TV`#|$pڃF !0L_1mٹ=9a;Irf~tB9eP18#;2640 fT@xO\Z+[gs%hܫCަA7X5ɱ;eR<&LjbֹnkMZ(am5M>X>@:Ql In'qmdVO݊mg$ڵ$OY8{D&c x;v.|GyZ+2'LxN;S&ͼzFuzpC\%\3!/TRi)P+ g@]qaڌ˪ N T-?ڒ07;)N>!?IQ$) r7wv8DtqC)Mh(9,Ũ IC;E34I,Lpfڳ4cG7pTf9Bо'OҸ[_aB&/re%f :.#=";)¬D/:cGمcJ`_/ݼRuEݐX*s #sX@ N7.<kCn''9ot2Ԟ#)GL\^KlwP쐶)|9$AϿq VhuY~Ae8vJV^QfA&ʇ . A]A5`I>[ *)BH;޷0.E`~Bp@QAYRᬙ$]SK|ކe-v(bIvqړ ;m02\;lSPcF7Wr?(@&K,yeII ʨ6BZ,qÙ|6H=灿N VL,b KvB:+(3ďɆʽ^10/wT4x{/ s>AgZhW#>yu#Y%P,x#Hm̗Up8{PAߜ7e&:k778 ys+^( v`P(mOx:Mҵ?;$M -ЉBN2yau}Al\i[D{ B>mU덌r79Q:i hH: l)FKeB^&)@2h.[pϴ«.mF?"p_N)i&Ē;][Yd].xy'Z z쏗í`H J-AsʢsWB]anjXd[v[p/?ᝦyq2Db04ŮE:U9hkM/ i5hs{Ĭx?B_ VAv LjmG:䉆#q@S<vՇCF <ġ1͡j2aN DgUg^߱']7\^`WppU!٠ 'c^t٩֪( Aj<"!IGSRp /C_캥r~E/% =9GSZarkg*ʀYi)i<] TD9_Wkư+/}@GE~SET: dž;퍉Hvn R԰.\ ro\f1Ү(ߡSz@l3\֋PׯPIc /[~<xm|)V=HloyYZU^q8UA{*1:_LRZn%;<"|z2j'gxE:A;G2hmcixI5ZUǜtc7{7F=z5j3OV˂f+橾X9U{$3_8'7-Ho&"zo<88Ca uӆ~;xhvFY0/TAkゅ氨 TjUCꗩ %ּf+{|;yTї-*o6LO~ˠ ʖb@/_ziЃW%4kw4s% ˻"R#moDT^1Q eFlV\U.i82dvrUyH끐YYxlLܸ(*\Z'!vujnn>e }:SWEݑ{cuŀ|jFXxZb| bX'~Gx‚爵yhX6g[&6߾HІ)A&u]oʝ`.1ph ErZ(~=3k5ia Y̻c(L)Y?b s̯5x6p BZ?AJA7>` pUd\'2ofrAԤ*=_CK jG0z)Vx99S6 ZׅQ/4o«'kNj- >?V}Hwb4qVAWX6ʈhk*[Ps׊_{ W5Q1DaeK_WQꈿ(p>6ЭGD!"%7ug DZ%ڽxώqhkVrt ӊ;a(\5 8X"}(|+1M(fc4֓t{MR*owe*l1Z`eYG_*Jv~E?k Ma]?GE]2i*lMCd3 ?}Y`:CG֡Ҩ;Z\%EhDu>zҔwxqPC[d6x*cҲ{ M;@IqÃO)aHt$at)E)RŠ!1rQ`S%+nW9E7|-v3(aZJm ~K\wlAxFPzKȆQsrp"&/Sy!J{C53Tqqftu'^9}(/,@0ERp.$grmԷNA̺1JV;$ ;#iIM FQU >‚-e&%_6JV EH_'GFsuy\XJNK0`b 3gH0h+^k CT(y2BI7VmSκx;2c'oCeG~K"Ϛ?t˃r[82E6HL_Υ={zߣ9V*oQ4F *zfw3I }aZ%QReIYEjɗoЛK RUk=*Y  }Ű9AvǠTlO dT`RbF;5ZNQ@K"}\j/T % )f=ێ5P&pP2\@r$-\\`#*hk;91@ysjza@p6ܨ4*av3Z@MBJMa+m%|}[瓷jYUƽ}pBawPc7}pe.u*'Q_ 0Wt&HЗp+kS³эOjG[X_ n&@ǷRfA#}V%tVNၕw$)㌪9tī_rv\h]@諆,1 &{OeJEWYsh6F*Y'Vܵ\.;}߇a+Z==oŹ{B oYtU]v4+L/-{:3SQ\H9ST+`aYb`"Z<=UsEi/)@_f%3[h/lH| 涺L5A|yQ>18O=sDSҌN6b-Ask6ԓuyiA;4Ɲ=maeʃ5F*7>S& TÞ!*=CaWf)=MhX^e:emGW:jEj_Őr5Vzu+|_?XD.yi(s{^2G֦ #`Zs;߲cР# b+ R&&Շ 6QN9K6 WF5/!%xCXF$!0cq>qٗ=^>'!2 L{?兜'xtNj4bTUktKgoF T@i+ڐ閶Xj/ꚜOQ~x,Ãh&)J!B_P[KrM=8XWA]N7c^ȋNZadB_M2aFK'_ \MM,ƮzDZ`k,7pvԙ]9@GѱĞA9#Rqv`z8RUh@cF$WNߵ!523-cO fsKKU(ej@:C]+ip'v6GS{؃rO|~qHQ]?[̂E5 9ct{Nȴ1=Aqx" ͨPnOZҖSό򱡷tl݉!O~|_0E~ =GZE1-w`Jҧ\ ejn[h XӋ,0ftw{9K&֘֬`We^} +~iKoIvJiqW(9 '`?  _h:XAQOv=Ð@gV)՘AmU[ Y2P)_r 5 Sv1M;Mj>J[;aNsp9I㕨hwٷg^ΑJ#pEYvSl5j?TeÎ6}@qg'xLƔ/cx>8'`zط9|@oOzXeXh1$ٯ6\9A9\$0OU$I>ݤU,74=kci!ξX ΠXqEfDLGTᇁMg`da]hd $ y|Q%[Pt$U-Κu8b|%a2^ChT @P~^餧.ZL26n7_0E>-ԩI`Q {o;T%d*SfQw퓫ފn ލK=u?dNoF$#l1쓴ȩno0Ec)_;D-XϬϪ{`v"c?K xZܨh=D8*eNg`D ڢ oAX#?I?nەM,9z%&POSFM^_љSL ] Ôڛ N9OA(*:;6,g䡾>1?J5;ԂeʚyeSt'嘤/R[K0\GMCx7dg٫"9Kr/ 1R: a^_ (qnli8dɋU"n:܇6Ij󣚍?z{*8{K iA[$cG(]Z,b|1XS 2]Փ.tݟRB/힨({طHꂏd,i8C`֚VCd|UQs l:-Q4$ny& 4zgC܀T&8x#!\,?3oyչӯsgPCEf ܍Cc~! E(IS2e[:  T󤄚Uբ$b}Rw&1XJ;|n:ٌG[ Pgݾ'_'IPsLJ8'g h:uPސ q9?qA\V*@酩_i+('"X Sj+?UN3Ŀ Ps =䡂AosեWSs32Uz͑{CnmcĠ\ɏ;0/Di,_v'Hw5k| =ch]){Iӎ @qJ*V^Ӽiq^L 7j?.Na!pښ4C^GcJ F~@S jDzDV\q~tLoi3L:;8 bRpXLCM( WtP>Vc Km&p%}ǯ+8 * `hyܝ.Ns{$0+i "Cs'3yA#3X,a#X HzkbʉT(+AOW($)؂ѷYO R`GE&gM, ]_;=pyd\r#<&L|7t!-)dv+ᔪiaC'xaЙ6h~X_nc*T=y(ݳ00P؁Bmq˱X"}Y>gzKyȎʐ "V'_pGy.³)r3VdKodYaXga$E[WP7 AbaD p!bW.7V;U=.`BÛcv{ٱaATK3+;RW ȕB{  &/$#P9?juX`\) 򠻹QK>3hd$͂j`ʩbrxk\,7 R,{:ZL 9)0_^k4еl4|RѳÓm;vtm?JRoXwMX#wZj~6P] ۍ:4ã|~f' 7R`5 }/CJs l: 6mXj#FOvSwfӳnuH]JPn^r  S.geV$*#',r,w~wc}kqGQ- &9sJ胭E4cb`.L5hRƐ84sOq`Ac):rHh2n- W)Ծd$"dFa+cݹհs1+Mz8W.eiNJ$M*ZQ-\ᤛgZN K: v}>0[5_XŶ. {^yWZɹ}/H=]-NtR}AU؆c׃QX*qA~YcH#QR{F V8vKJ#h:(|ڤ튽4^w9qg:w C4êL~p3l.◟z-ЏLO F!7ޙ([T)x}I %0's:cCBf ?[6eʔ V$t< 8a^?;:u`8׍}ҡq[$Ʌ~m; Ӝ 2}7zwv[o[8 F#r hQn _GY,0 M֓͗I s4oIkv"A؜k4$ٔځUT˺P"Pvpn(}N])G FS*Q0!8n̼[N ȋZh∌Sp*YP{)7 ؏7],$"7j,(9_~jE4Ȯ$NwoIJhd+scP)FGG1 ^vNڵzamaWx&~ .>#gl O礓CwQݽu:!$]Aƙq9=K98͙3w dD${8'FTOJw CΈS Rt& H/~LuE\t 2i6`#vNr@7qIww /.M#eP%^UeyOCg$ğK#*zokiEaI{2m=J~V D6,{\}\2{7(ݍM/e4 1?:|ަΥEhf@EeoeO?]?sM~_l芨Z6IQzVo~#<[8CB쎥Fss,5*;iR'|4$!9[g ?KkxS>I|ϹXu:Ld5>lIG&X\[;b@W*!kHYM0}'✺Dt%ݶݶm۶m۶m۶m۶9gf^yJ$”87EAW_lqt:i7No.Lʙ8wlIYc@}MX+RLlB_*5e :4Mf@qJ.7CFE)Jf&!"=Jptu+pORp(ߞr:\O軔^5wJT}쿎_4qVzwBf\c+ʥC]J@8rLM5V 2EWI3TY+|MPOC8xu`$ /|O OSl6ʨX2V?ИfŎ>vvy5CżV$#l5uR"3+nAU!}w7= AU`K>э`rwAhרqyy.Щ궱2&4!Y49Mb\%H?ww%0C[XG$mTз)>Ve`_I퍸 =Ӥڴ3#f?ZH NhXНoJ69.y픠r#~bF̹'KcftZe̲2i]eVhG_чalMA ?("y7mԬmg/cgb9r篖.td)x/G}dm~Q@jv4k-; C~JL՚90KUM/YuqÅ~c"[BF.7e'~lF8I~u0nDl`>9`ɠ#)g45q|yx !=q{DN)6y{,w ?J,st^HVƚSIJE僳pH C8Ĕp5 㒺k-Ik:oV5l*),.pa@ki})Q!OOx?=L𰩟])^!=ѭ\ f3쯻%Wd׈ٶj/`]l_Pr,= don=mWG 1y֎EtmjAy3NWݳz\"EEQs@pBZ#d ⪎-H6r̳ε8l\1v2\5OVu.e? Ƣ~ /[`@B{A1G'3WwMIҭd鵹ʌ{ `mY`7*MS5ԓӀ|2*n֋rׇ,k]c4nR ]O=d M#AEje8rqlhuWHG &pU5457U9k# w#E{ayAW&ih %8dʷm< XRG;no f!BTw4 t&% ՟7:KC+ϳaϳxTPLx*;7k{\zJQqME_N|ZJ-5DMbVO;NhM)#aq&ʳGؙ(z"c]Gݓwv] %yQ3<[a:='xMltخKPN2ae3TLFGsYe<$w~*!K^Q2Sr=r؞Eߒ3-pj!oOp #*azVi SCƉc]S*$X T%"੽:ηuygR~.0΀k\1Ewcj+oI \}H'l3J V3M1!Gÿ/k[S{t1d*YG!7[Am/l\2uE. [a*L ۾w-ǀR!dyp6S97`4-{~:ڐG U;.;cT= 47ܠގag%gjr"[`TII cP8г;Z,Y ь+;$ͽwͻG<.BU3zZWZ z>@rQFاED[} 8d U+wiq(p'A;ڰuVzp8cFHZGԥ|irw0sqv0'RqN%D۬3&DހMC)G.z6=`7c mZX;}DfʥݞLo&=l5+_lxbHKS]_C趥¨: wP 2yN/6c$=0 "O[ ªH~zQ|4w^xG,Xe{f*gDD}ؾZs6rXCޅً^Am%s\iVNR q5z "uȇiT注o®3KJ5a~V @ț\5j4zؐ @' \pJVZyBI5 i[oh%ѿdu}eF+yXHnhSP@?=4EHFF.¯25%UL GhFVh;CCR~ ׅG=Qꭨ& E(%_&j]@3W{!enMO@Nˀ)zOMxcO)NxTC 'K +^Z{n^CV!|d9]֪b8S,I*~"pxʤ'tw=!G)(edKfN0z~MݟhT쿔BMX"h!kG$;eC$d #uExfT{oQC@BI#HOZyźp Ç]%ےD~jɓyzmb(uJdĝG/xtz2 5]BʕOBȍj[s3<:B.50W!*y#z;04fBB_Nv-q%^ؓT?/WҋƳlMJ&\0Ѩ9 ݇HDDH-I/<.J;F4tey|x$}?y\ pNn jk NG%x`3~5YH!尧5vY__q 3b[Z]h.&8vL*n/x*`0ԵDk9H#E:Ip&,W w g 4 _U]S:ۀc+v>V44^ݩם+جBCk) w4>-ɏ{2:֟7X6\_(ܦ@h :3$avQgb/ kY+"'|>(kb?Z +ž:zM!>!^YfB F; #ғSgy g8*a|?2~Ive;riTA QSf06VWh:E3HO{M fnVBG$GasOP}lbp A*?+aX0-X!7`$D5"@PǕۻ' )5F^zĘhuh0^T; O"D*_SM(N4-pM,sqcԅ+0-Sȡ38P ⏺DKaC VDe;YrWbW/j2hT.?.a Γj:9imnz1^D{[ Mr\lBX>1V*eha 9vHaIi{QmWH7\h:qbd.̼GйQjaܿ=CꨇN|T&'62X"SN[W LA{"u5`[]]XY1RZG FiXҊ3'vt1[8hYiPSx}XMy]'QOyKR.h$fU+D_8pJD\Ur7v5[[l&+lUm>Lnߢ@-0d*E2z0|4!QnY~ 8CIO:|YԔ%g4i&Zu!y-gM;4KyA/.qjn%~N?.pesB`'L)i}X)^|cà{ps F: G[]֚1z竽ke'JȊ<Ȏ<$ьnP3[*UG㾄X뫲:<ׄ a}7>g6]u_Wn'Y03l._Eh 1AT4"S7daO> ),CWk$WML#+VIͫ\c8]kI]2KzDwÏa;kC/MTǾqcuc}i>a .T6B3d*^JU,Qh 9;g_Pu8Q?ȱ /{1QgT)APUH eB՚% XK z< 9Iǫ]b$8fԬ? M 0q=? !_qrNvEği7oS wfƉ JMiwB=گ=IߊhdQ}^@ny@ߡnaY؅`{t q f9;>C.|PFw>Z%`ea˻*5酪<{V .kFl\Ff ݈wʽ5eKSўbnd[q 9{~dթc9(v_V4PUt~V i\HW1QKLÛ2ǨSY SI&uUo Vqs8]h.mZ) eS:EYJcAh|-QP ݋ŚẉM_@>8=,D+ \zgh%Bp O~~XDK_f6znd,s {dw\:r" ߉<]EK_pB߶z`"[qUB݌T-?D(Ple| p>I93ajxNP[=hnp97X DYq+L!P<06|k d+N)0HS`8@ IQ;WD{WIBIt~}#mzYVU!MY&*L_XqY.tH,/9}`z395 G͢[\ZGa-+s'\w akM;l|A9&OH ReC_3l8-$rWۑ3ӟMkOBpRr!a|n]y^ 9ޙkq޴̸Y2RD$Ю{ Yݏ%ֈnhT6!d*czᆏ"Õt$CO$:=I)U5)`Z/IB߳v:-z FraK6}}cZŁQPխc3Chfi.V6e(cjXUr.+Wނ i;ڙ&vQ`ϲ<O ~89Ӽ@R_2v!t aQ~{ _cY$hfE0Λ P$bT*tJD?yB-;ܚrpFٓH$:;6{Lip`( 4w{9JvͷqXcC)|+W K& RdO㸮qq@iCYo9lَg)iZ OCKA;Eټȶ !و0¿MKFp ;ӜqO&ujpd9{ T.BI[ dj;јR|#QFg&bݛ~ftӜ&(5GS>g͍ޞ_,]䆈xf_N5:Z;< g YˠLxZ8,Lּ1W)67H=|'t,݁oi^Op>_&1X.ڑ֦7!.\ȯ~h5壉Cϩ.28e~ʺ!  nԳ?LҐ+q-}~]ěmw=R˪ӕf=%‘5%pмQ·5${gK*RSL$a$͡ݙk&:}Ph$x?1-6M>xkBfid6*j|#»b$`2Ce,y_e_E"VQX#s<*PIa)+"ŮDFh;9*o//mȻw;0Q i ,XfnN^N {=+41G=:%2ó+ 4 7oL}8| MV2%8+^؋FpdMG,o_sb18j-h.UKP!;]fn8\2#=Qli]?k)vi]C9/:@F0/FО.z5[mb>z8qpmTROy/ W|\ [uPy)mz{M0mw0 qR#Q<ԼEL Qv/( u:4N \iBؐ&*w9#ղs0J ԤkxR3o)m-Uny^Bu>z!7\}|y%^3vyV* FO%x6lQkkAgdKiP$IĐJKg UEdg8 ٨7J9vGrtFŘ$L4&cϹ>*aTҬ>UJlJxPvMgdR( 7%fh[yWQB_8*}F܉N~wOqPh(Ob'2rA5ػuwWPڹpni<(L̋N'9 zWbiOD/BדHQX<`:,K< !c(Qz`!- h~ x4eWn NMߞN hѫ~-}mLgi!{G]bj3=q׋tad8Qj5K_ϴ0_0a6IUpgGH归6B xF 0OXb"4?*BC)Wg8tgz桄}i4u1_H@9dVj1'8 xJ??-QzZG5p2vv]iYPD= _/^ @sr0i3ddcI6Kνk+7jFm"(fXZ>]*`S#""ɉ,ItwIF_ay*1cOW1l~L䄿aS_&HSjQqG&bC}YREq}P 6A9~WKiX:b u|:A|k2smAQIU@6&[GKwm7g}R>ۀ0DnttЍ"a,|ǥ ɲ#h 3xpM!9DSh]̅1mveC0yxxzR8 [mnX)k͊ͺ+ P̍M1_ gtְ0{`:7"}ȍ0T# U# @NpFv0Gނ"}nh%ۋEdZLDB,Gֻp /o+mZUwpX*]LF8Zdk9nu&Qz`3p1pQ=wHhp..Q;{ >Qs*;&nHf&mMH.c3Ӟd9j8C vY ެ,;j$Cvcۧib wҭ0=YԵ` ۙxx+n9dCV̗aE_C29c P{]BTۜ&G 4j8mp7Gz̅GoaMpȚ.$dYoLUC9(jI);AB ?ט?> PG +& zCZCp c2h6]*sO΅;n 0`2z 5躺%F$^@EZʼnX"@{$n n >Wêe1;TKvS2(fBpo8c"o)~ @+B50;INntR[-ht9<th2M ߡb27"z^~Q^oo68ٍ"Ex#2MԩD!1͍*J EZ^\'g`1W#]]vŬ&C1QHc@ɗf6icy1af'0kaK_Ϥ< s> +F-(C֔.5-}dn-t)j$l$BƱ P7[e.p&\J6-s|mᚡ-rtKƙo ;lvzYsloԼ΄!qf>8LY.:as VR׺o`Vǒ" .sZәm3JLmGN4uE_ TN?&T n`ϭɦO}fY~c톒IEsJZ R@35aE@+k,LiBh1'_Wn5QuϺNYb GXނ gp`١ҕ6G[{SAƈdэ_@g eySdzfep7ɴ0mPP5*=(k#6\'Ah{,E$mT"CNa]DܨGGAzYܱuӍYQYBT_íI̒P SGem9r5:g,CGEL5劋b8iaњ&-0u{p;ka(K!h]s_)rw9F e]a:K"4ʯN +}1tD]us;ϰ k#i{DfV%!R@(8b* ::]Vޞ=fr2K̚zG= yi"ކ_zTd=1a)N({FXIm3i)ѶkG_a܀Ӿrm ]N[^@-(FacEmQ!b*NڦfyAq7caE\7/5,-r\DzMISVx՜5;5C=?hJKA lG ngNFgnOMWb%g˩Xeݩ{&ҾԺT", h/²aP/ +4HF]v7MAP 0A @gEzS(-*AQJӀ~@S`'_eF{ &2˼i1Sgk_d22+iH}FAu/מЋhW_Jp0L(~`Ӌszpt-zbQg3ձƂɆmJt~V*|J(qU |%۷H au9{"[d/.!5fUIJmHV/!i}KQpoS;gjvu/O*cܷ+F_Ux~\*ɸ|MEtDo1i\eg?JN"%y,)\USI3`f\5<1Jwzb,k4m?ed,:\"h)4'$?6cu5x"B\~woFƜg_[DBpuKAGa?{1!c7k*Bi[;t!~5L7^%=?&;?%=!^*tRyR dž3\*u 0㈝F`?dBdJ}q~6=XG lSll$qJp: PzD=׆҄T _M{~Tkfejfl'쮲`x,3F3т7C]2<~q.5+C6(P ‚༹AӱnP?}EY6!ݴ5.I$韇ٲeЕ͆Tч[LfoJ>9[TQ@N 7!| !syo=.XwHO27'~V4'}`ҵ-]*+@bz+WP-+2"we OMZIG#j"N202PY|A@YG._RB?Hur1|.;򐺭vj/M]ϵ\-\]+R#xUx !7O eTK zğ4GR?ŶKE:2vsH9,ʮ|8y}7i s"Ug^sQ} *4Z3MG\IF(dr/jv"n[\R6X08 p}m',t)$Wg1@Q'O嘜/;@rCZU1d [k6 k:J1u;>Oƀh#1YeT'-n#9V5z$_ +-+MWWIalU~!˜?;F~+50lS mvзa[a*ke`g CZ-TqRX(1H6'D3$Ω[*p3m8˚^;8-f^%ۍqH!r&q2ʡkt@#,?9ɓq y414@)"~7%o LsESJ#[2hDVZIO@m~ V9uc@>?Y6Ψ*nd#,6\9#˲6Ҙ`~ yr!wΜ:k#0xwFk6x&ⵎ}v| 0殟CmAZ+V{e?: -|MZxϣb*7RV*%9ii[p~NJB1Y 7l`d١ڛ/<4FA~.Dx8;&yK׌toh=UWO3h o0hJ2@'Eo_uln?y5j ]fyTЈΤԝ屘قMatʾ WqO-ad\-V :).wk*3D4,%M ^vFݐL2a b,{c%5li0!CM@EW pCPHJ~cq#Tg~12;_)k*XYb[='giNr8{f;R/λu_tT̮CP ғB櫥Yi%cVYA<0햜֩_YETBvXBSqiXS@>׉'Fw1 UuX=]T[Jѽ/kQq&:ڏ!rߨX,Ks?QYB͒^Xd ?a>avȧGڏRCd+,!}.>VnkUifI6"8~vHNq M㽰SF{MKRB+3t+^l˔P$6=ԬUN6#j9:9wAS"h^d]xM?o6t5 0xg~ mI?L<#3pޜPH@xè2#?¬m3>%Ob|)Z+@^Ca$6 f 7 CJ)If uO6/P`(~aj0,+7c|fWHZrI൬ w17[E_Fݎ$o=# kcms't-ތՖS42ڰm+ML`FҬpɯzemO (6'rf[zb6 (1ςLr%,?xqˁÉdXSZ7Qs`piLRcе˫vcq<5X'\$}}s;RfoNΒ(7F`tuUH$a]YˆpMt#"&L~{u<[b|e}Eo QjV|Ӽ ʬw~b(w"c[0MH^~#IPQ]UT%[Go%CNhqlN9GHC<3f­LKMӆ@(n^M#i%w8I_ }mt˹2~ qD~ @˥f&k^6v-̕`, X޳(:&ߛE$b1 bl㜜w+\ߘr6V95L퉗x{CqeAFy {Wv&g0|`~aq҆WYkFSU=ntSP@*4 NbJ`0PQOJ~{hnUgy]n 's zt@GdJ dL⩂y1_;J|XG^Y/3*. Q#EБʛ#?}+˕X?@]c.>4u<Mw(m@KjoY/Vd% qk;1낞''%iv |oO[fԘ,N N\mԋ[VC|"Tӗ#2O 98(ϾkJoC PHB;BiM[02H{Мmsޟ!x5 'ɽ'U,"Z>Y=T!pOk|+\KBxLOŵ ę--nyؠ'_Ba~#xfB 7i U^Чύq6G?iq&d #lJRG "Lt\=V,Jc91&B]՚a!q~$G}2/SC7`4ZSFh_oP,%ivJ9YS=_6:6)qjךGD.t}W7a+_.}O, LGJzqNvNccm 7o!!e{CXuD*֣_a"R%[D"acbbg<8zJ36p[lSX~żj^PV چe'_slxU?kf>5VQߟ1U"9k:X Mnl% :ϑG闌_ [)Fsq3V٧%W8k #ϡdoj=E &P@qg.pRX<&'Z)HmiBIcYo0jޘOe~BCVLp9:0& _F~G9p5TNqn".1ʍhA^PHb'ʚvax t8”\[@mdk?A: >aHGmPo_{TܙSKٞr p@8 7g]*b\r5 i CQ񅻘%5D2_$0<{7l CgkG CMR8=&,VRax 4j@6ԝЪHb7z/ 3$05N%W/ ;v<ޕ**Hk0ebCJQ -$~61IrBmw%Լ1t ҆0Jdj/ 9ޕ pð#W2\7ڶ)7s^)H)&-U,zs(+eV/xa[7LʆO7i}S[K#z"tiSU 沍mIo++K|Q{jn^~hm%߾xIU3l(`FN#&F62 lL5D6^dL#j$ f97bo2_zQw %S \*ZP>P/2J\Jc4<Qks>{c_JIjH;5Y5$+Ã5oqz総ܤT3ֵ=]p"mN9."]K*0 z 3u]fo9- 0o֢Y$D\\e# ѸV "j!*-;?eJLnβ@@Y\ c@ĩP{:(P|KR 9Qlt=EV6{db:8CJ)BGȧP}U'o0}[N0aw\"nL@ zWVVSݓ=EN|cfX>)w}Vs.~AVM^cER&O%B!dNZSppeyzKͶ4K,HW6gEos-7ɇrvWl~^7\Ejn}m4G@iqb}ok]x_BJcb~#IɘΆ{\1@hn ÌWxAeC8@[r3St0mj\ ?!U<Ra/rxf?{VVm{D侓 t#7M]ؐخmۓQ31.C9'\6>"! ɜ?C=.SGMT۩dr> i}ߓK剭B3L4EBa[c,l W `&"NjZdaxhɦ"Fă /wU hsw:[v|&2<߱jZ`xI!$sKv O./q/`X^` {YVᩄ?&r0N젬 QVJTD(AU-zuCChӴf,z$Y, +؋YV߲BМUak:v:YϬv7"|+R%pzr:6I"UWlRWhg@5# :sho;k6 LH;ӆ*fns:sD1e>_Vuޞnfɝ}bhff[!=OՠY3[mY(xuDJ" C(hs* p8դ]Gfl^S8刢{|0iFf ̾.BeNqJ} ȓEJsD)GOɥoIlwfuEsZIt҉{ku܀\B=! t}SMAh +>PSzP"؏Ŕ,k꩕ ]6T{gakzc76| '-H=NjZギ}~_Xxu%yޱi 26a4jg5ÿ3 8~]Vj,e&Ԯe&W,; בhrݰ.Oqj-⌠/bVyZQ̘.,K֧sv:dTJisLXqP@%lz?(zIqCP\+بfNْX4xٽ͵%AWanuhW>E70EU։$&GORSt<6H"\B[Ğ7A.\V@}KzaT %CmOR2Ve S҃՛:G =F˧]h= q$0MbCHQ%Rְ 757ٲN |\)Ȍ!P'ZMWW5`$bXUH~ToI.HV\\䰩 XHॲ}I;Pi fL3QTȀV "Ţ"95tn u8|ei^Kpl"[@ p)m同_qVvU.& VGzr6klr<:wvs4Sݢ<7'Ƣϋ^N9 X!F-`3|[3@wVZQR />`QX3\C]NDž7ki|YN4''Ee7J>b~E-B5@xgJnǥd>^ Nr9H>/o`Ɨg[%N#pV?#O/mtn;(u״*/ö.?[us gс~?ĀǎZy\7準`2eU,M⹐kl `*di:n'L?BD8S|WX VMWq XHSH۲ N ?[Z6#cpy PM{pld ՝/Ӳ^BWc۫e]Yw0l>7UւT.=dc6*R# bO夥uAeRGufRQ o ~v1-E;0:keZjIH^(y\_5dmzb3ПwU;ڡ`C6 03\nRa]|Y0 wѪ o f;xTXvi&\yւ|߶@ EMH_Eyx 1ڮ.P^VJ4hbW"e^OػF2ueeP%JgLeGs>쌯,*>(kom+ I ø挙߇5cvC:8>2\7R>06,>P=5_1Cgk N2}[/ƀNs,؃^ӗltkxBˮI티%fv C$[cމ%S34wл5^YsDz endstream endobj 856 0 obj << /Length1 725 /Length2 22975 /Length3 0 /Length 23507 /Filter /FlateDecode >> stream xlcpnݶ-ZJĶm۶m;+m+m۶>W_?mh>f1IDl=Mhh9J zZ&!Gg ;[agN1@Cp03wQ;P60pY۹Z]]]]\h]xiYdbp67ZX5$db*1[Gk@Ʉ`j05''u5qt @FDY@TNV "D,05Hى󿪣8em?XutE 00rYK7 [S;.MC~4v51 ػ88dMm6p6G [OIX]LV3OodL-\lM骱R "Fvf%4p4rD/chТ׈Pv^4&Vz35rqt4u_dbnblgdR+R8[N5;B2}l1hnwx};`ȳ{#qDhkw<1EZLTa[V{ ec$ v~眾(O9ɜs^#Pʃ,_h+FC gOU4k\pmgKϼ<0T3oѠ 7Sk_-R$j&DJMDEi1KUWk!Ora:]=.k hZ}~aYjfS1D6Qs*:fVJGEr80z^X(73?5h\LǤ`]X~$V'q־Iln~[Wv;)keGQ;0P N nBb [ ֦D=v;=փ>2.RCf[AGaDQ`%9S<[otOyX:*rgGH?cvMB BAc#',Փ(-!&_bt쏠AxJ)7ͩE[m} 93ьRQb:>ǝYЯEan&̆vcS9)u  ix&<`AQ V+p`G 7,>~q1 gӻ`3ٵs #y#Ze `dGJ%M׫ ߅f>˃VDF0t4s5kDgՃ48݄/qMlUͅL>+^? |G=<\a_ðNZӄ]o{)XC\/{ U1^ ][Z݋f]}m/y RbniHI[}]&LNQSI=Ԉ<")K$vsHr{W Y9tV@6)`ܬJybU\q@~a7CB bB>#5oš,? |ߛVA,/}wn:cMvs^<6BU@15Ȓ-t|";TeԷEݍ4e-7`%3rlz,|(^@PIJVDOG\D:|TTAm6ɏ[Fd-ˋIqzy%V1~t \m݌H~'8g.1D.W!7%6&m_ c4"x̊#\ϸI bQ8~iι*Icu=/znGƩƐ%dfpMaf~N d$lݺ Ëy% {j[ky,:'A̓RܗkC5.$,hۄc+''3M'vΏj K}0߫ f0G3 Xщ x~zt6YUi6.*:Ibۈ.csՃ`M}I,ǁ|UvA3A%Vp9۝˅w拮hLMV"oa1>w>gm#̼D/2KxP\&hIwBB-hDcS}j8!cw0 A:NuL)B0F/06A4f[3M Ma]$:O>VfoSf:JN 5Tv挬72*(CB.a׽z(p$?4.(ZX#΄ #+),zES#Jkp)ե;aVVhPw(w@}rqo"";Lo4(cYV,4}_B 6iAn"qZ9 Gatu=ݨ%OȋNjE8m )8)A @3OAv"S8qc-{%"tZ[^ar7DF&㳍v0W.]_D"R3 Hrjw{M & \.v:h=)vJ2jVrhLe8]}~'-ʂ2Gi 7MFr'"T}' )n8-<:窎T`B,_DUX{Ng-!+Ӗ/v :'SF`g6/654l_Q41F_&/Z(`#T} Y_alcvwXPZ eu!Pgܾ,~A5u!2}IK|xNZ M_HP](a֣mz[*!t,μ 646B}O =ni+j*~ُoڀvgDHod./GRgƊq1,Ӥ-3GE3eTl7Ss{'k2{ʂ4lc 9-*:ch#Tғa>SUI߉[uZ˔eӑ_.l3C`Ҭj<CAG|.£b I~EJb,GATLDN#`{߷~:(ө1XN8 7vatX3fqޔ,I3wxm.|S#Ps-jw?3cLGPNMMFK[A =X?&q5}2{@ PG8L8 YZa6n/qn0s V^a$LmsKn(،Ӄr(\Cn%ywj"J:DiDՄ׷"ĭ>D.ZZd b={5B)  ypP5 u^DGmGNVgIEA*!7 ZD/W'|AEǯƾ'nGg+]2.Qa$QOG|mqNJw,g\5F&F tP U D%/h`V{B3wrJ{֑8x_5gVQ?`r~v[%ss% e-CѨ\eWz7䌲mp;U>F+e۶SN)'y| 5UBŸZa&ZcWp.mЫ++r,qh^"oHr<+bcϝR39zƢj4Qzyb9zI!C%pmNj;4lqO-fTōe=C.ї"M_NC1SdpU Ev7Ն:*)4xA%?023RpH< xYG-ujLqgVM2UJ58Y^j# wNM>Tstfy:c>Zr2'߻?` |\GP }g1i5sm3О_/A>͠)kjMXxGDqV'B3i9A#6vq7؏,q5?eaI JWJ2b)UE"?]k' LX]~A,%PKY 1fX|&t >B˘V2W o0ÐѿK?DHO|^a[G*">g ӗ)gO=,ӖK_ ,h.t-u J^\f7zt7C-Ϊ B_)J{#!04+CZ~ΈBsOHnOP BVXu%8{.PG1HGo6!+kŜԣ@o+M싖YgؔB:ЮkIG\SOԋpPH_]bb?W`s>D)`MC,sX`AbGqh|(c)ҧ+w?xm&a}H9,4u[~W"cx:M  ]]z /Ce0Dv,'DŽ6FT}/ r8kMʱO)x*3c=ML T ZOj,Y~t[ ؝=ZTݡojp o-V;ˢs|^lzZc>sie|kyƠؤ9yEF :47!DFHuLR!&^/٠ig9s_TⶳQp,px-|c[Vl:,,e;sAg.:w (vSt ƚbIp"tEґ2oW>)ww+F®- l@W p=:I5}B<8bp1h`>KbU2j1J*sf=jn9N؄X𒙏Wy K H"WVeT%F[ʽ/6NlpQ\:=u{yA׆+kk}m>g~ )Yc:S˝s$TNSSYڼyjEi*nȍCӵL9{-n!iL8-5SX_OlU`0=iLNZ22w0/}6ɝGI1Hy_ȵm "[%<-*N}P0>89QUPn0qE] Rhv F(1\C&T;_",$Kcu9+4&Q+b.o 4.2_+Dg^U/B]p*ΠB'E 2p CiI2>3q]e;u7ʟ|?X =s`P=d;H//lI02u`b2L{A6J6W~nȽGFnקNØE?*:_׉3lP jdop_~aLq1,'FlÈV`1fDW[u;0oD&dfciR!U~ޫ^[:#;8>'4C)z~/#P͉ `? 1m-?|kDmQF݀SMMJ8YYtp.9@ͲrG[X_OGX[Dȉ1*#ŗ΁5{ Rz􌤅BltI(߬ctRN\TZet'`-X|^PoJƳk1E7g\6*!{&"nq"W#zo s ;ASЅ>{P)$5FsRPVPquSQ8i$](n O^z{`g^MDco{@,obԟҶ>d-`B06x8Y[r >CzEf@.Rs6늹 =|dCfIѸ1{ԓBUsLC0gۯd\xO߃Ea}T3pNa{м߉^OrM-*' Y#WPs7K8dw3Jy&#lbÙۜ$iKlʲmߜcb?MFKJTK W"3+,y9: B<#f?%\|O%wk (Q/^TM喰GB tübrc;L;LC"aZ=oM#]?& o jD1=ξ6Wnz s' ]I./Kn5v D^Z(]N|7'7nl nv7A::CfS1Yغs_@q.rF"hUU[?A$` xXY'o"(2kVўXcnx]#*L΅-f~Wwmh; hIa'G2ؽk_ZpPi*xVICgj ST6Bݣ ɟEٵv30oit 6f$0d(+G`++O@ й٫ybҳ4kK;)/݊wifSjP/albJKy)nS\#Q{41 eઠUv}o Myk_ݥxt *&4WtG!>FtT hS Dcozap`u1(6Kq4ӺIw(0_pm$S쾪L8 ?"Eu%J$?umT`jH,o,dl9ylgfk͎d 5ײ*rNWL:[{J{ "ɬN[E W;>còeg 4Bc<~p7oj%̶L k)%yª.O tz_>^ZkONcnIq~Bb  Z]!@EދUI݂鰅%[ȧD< [mP].b@W ST`6Γ;->?f͂g䖇Yb갤G&%/;OlM;;]a5E8rx2VJ\Eϙa?,S-2Q[E,v O/Y9XAe,sـ# Z^˄Ș|| MM,}f(1KcRh(jO/vGybtiŢOeK?_;omb=Q|=:s eH{y.N2ԕ3igx aPGL@ީ)ZvDž (bWL)c '~Fs̹` gNrSoB*{yFSYX q+> N7ϼҥQj>'eRA ëКy'ۚZ|{"z!#M~Ji#$ےy0Ib7:wz5:>ڟS1 5 jASA  fjw"Z3X?xfN7СE^?/Wҳ'z 2kY ߛz"QWmr#R]~=wق.N$t֖5IhMiѺҍ#7"{g?rWtdϧߐbbFg15<^}պ(愈F+ CijkjWEZ@#lM$Dž0{ҨuߩcU-C^=ՐV0CNb! _Z=٧(N:N}>0uoU[9)5إhLt㐬s#$WReC@eg؂&WyMr+>ޒ聤|=h*ɾ /QF'ʶajd;gd =&Is 3͚knB:g+if8n) *fXՊ%Md-F8j;0l '~w# On[7*EF75|;f|U6ֶ}q@4:CTQ3ZVLgXw1u+`ͤz.kuܽ5Vw _~Q] qt^meyG D0@n@'x-}/429:}gemd^bǵ b g1:[fa\PaR'*>4_0fT~?SSiK4nvkLH-K .}=ّVFTh%-|Xuw԰ exJv0:"VLhJ Twʎ¦1Pwq 8 s?Mno8PA=@ǂX?4gebrw>hP/z-"MG5utq]CcA4jx ˽J^PKG'[Co8`j1ԏ5UcR.Hɸ8FC)$!BL91GWv?^KX:zޫ"uxn;n1ze)Ntƨ_ Թ=^Ya)7k{2|\BŚse2nƣ!?.^2fՊ ٻ%`1$M |/#]fΚyiŮ++GAS{T$emycdJU,'V/nlѮ= q*}?Z9>OK UXhQ;TI3&EBWsJҩHCOCw)1y f/A_{+t9.!S_s53ZJP@WqLx Ts'݆>w$*Df1EltՏ ~-a%C}WS1[N8yU.^ugѓ*(M9F*X!V 34;0!T*Xp&Q,J/5K]2򐾪 Ѝ.D /g#AN1PZ&)ECc9sJu5J[xl•"527O 6AItUQ_cv5g*&8 ž>XBկ% -Ruc,Yhqp3Db(3#-n+]79Sh3r';9w:daFKpmtyAus8=+4ylSM[H|R* ~BRIT*d]>q mzĴYˍkK:<*$+Ixk(4-'K;ܑ0;`䀛~Z gSfC̜7,˱6eJ$U:9lZ7nh 2`9C"}ER$_9̱~ bxz.QG `(e~'x }Cp%'wɜ*\+ 9|OoW<6xĦmR`4wfV.= # h%'rpgxk!<` UF۶b>절r?=QA) RA:/Jeu&L6׶ dw z!Q4kV}sY:X<8??L[(RlNr`Pd rۉg'oBjYcevm`w|0~o͗fq~QK&>UדK*lT8}f?&I-J?1dٺ3ǰ8LCrdqX.7Bv2" < 9Umɚ}H0AE3d2Eu>m16tJa~9P+N=-ZO'Xv&.I\@2WCDmr}[`# ;x;6 1.)XWZ' [ 7QyJ}M FqsMCKm00{rTRf~hƇhc'E 񀬛\`aƑwͭ.\҉֣ 'a#dy,{ai^>c[>GӃtf2Kôӆ˖V J7w G꯹VD{1u-hMѤW! },RLYM;K4u]gs@XM7Ŗݫz-Dg x#lDJD9P!<$wϬ0Ϫ[ ̖ ϙ@ rP3Qmq!8A[E!ObajHa!NU0 !<1gɏ owf+Fl(^ky]=<7g;^j2!]:Ȕ\цþq 5=_Mv abOt+<~꽨}'QԬbB`l&Dž:{LSvFMѮ{~/4C.N1’tLhՉ̸ Nv@/0jD5}2XAS+dT\~myl^i62o#~=IF=9UKh>/Ӧ@65K?)]R=]> 1ͼ W/ğdJ>kY>}^2cAv(ԟi rmPVSvRbp3_"dRl#IHBDWK,Pe 7Do ,6@A{RI!b@?I+%Z% mvҼG>p0a}"l +f4~{ n}Uoۛ2H #Kvdm5+XYv Eʓ$m/K7qSMA6$ZzhX' wj"h/SCwԿ(ū o%r3V1@-_z!`<!> Y+|ݘq~t^ghϼZU:`P#ן7ktr#t>wge1f4=NgyZ?B%"Cq%ekÆ/m) UI4 x' 2 vFX q^A u+A΂~ehn QV{~5ͅ }Y@k=UAYX?]yS3v:GK2l4 0/sfž@"a7^h oeI?˩"J?wcQTNkW@2 O?][$ųնٶ]Sm8Y m۶6i1ٶz~oϿp΋\׹?[P6*"I@ ty΍F0QӆkڢT:" kIUQ75 A4G1v ̉K*17`-)xaN:(K$қ7y0zDl0/v5b^L'zR-lC7$8;]oӐ D5=*MJ2 a?^qFiJ44ru_z:%qxZzsLV1ְ>>tAXmEuh 4$u=)Ԫ}7Q ښC{j4ˊ|b.J=κّܰOt|)i{p\(oܯ+S.hHW֟TzHHXz ʞӼνN D@xmh΀Yp"pI d^(ڲ~PA`7Š7?99`;>O֭=y~|J" _o6vִ%d ?`}ogO+v̏yÙ'_NRp]H 9Ql5ԵZ SZ_jLvBH `fP(cX=)ŎnmkO|gCnXAP"2To>O9߸ͺ6B1D:u)c'`)k3 !v ZyJ5pjn>'YXqܚ3+nqp'Kh,i8N,axLI>:EвO٣{uaV*RJ2/ /enN)d`u)Sq,Dg 7`# 7)RLWYQ&hoO WUZHRMs~nˇˌJ\;Y9aE9U/(+IRp ܧn5 rU{V?]$~ '`05tiރkK< (hdД8dNk#?31CEM֮;kec Is^INscΎ+s^/ \&Qn >muS, ΂c0 ~ѷcv+$bCJI˟XZ ^E3BUa2x-зyyduwt^n>sVʼ+aVu8|f> Z,ŗe"4 nTb-"\tg$ p+( Bpg7'$z/)ӨvSgzr`,؞fu|wX ┞,j-u@[!|U6Q͢#.V' ITbs!zRwVy/#>* ʝCkEpE ,mok"rgwmݚҠI'(o@Yd*}UFͺR![ j _)2C3Lwe!BQfOV[ǯVk8 eOa4ԩ%`;H!8+~a8nQE"ǩfedK?o܃xLSC19 8>IvJ0I+*vJuvŽфg&+Uz8rSJ"8QNaZ#Wg 7+h%ҤdcB]dE-'2N˜^%X"olE(c||Vd# _%m[5%|#zC.M  67mئ;:TQ|A^+̽G\hL7 d༦3Z KSi ~N/_ΡHI/mfl8þʮeBA{TP={rUW(WkG"#%%+.00vm#./Veө Dd2 ʻa,-5HclGgZ4HcML!*DTOiGJ;RŁ-30r5,瀢}M>͐Bњ2%Ld͎bDH pڄ=iwpMOtظ^OUOd$QLrl*Wk5h(6ԧmQYʇ"VW+mI8p͗^A5!bMzl"US ]k1C?gVS+PAZnpԩd-jD#>0Hp؏ݜ cu/jg^ǢRkDC]~#))C96cO(EXiRv)ɣ=!"/+?@6BFV8EׂIZJ;;Oh5) /ٛ-ˡ1. Wڄo\RQiZ虧,yAU𭢚ęA(һ%`7fҢ,Ìko! L0LK}iD X4 }FlAMէ=!\0KE Ԏ8Ccë6yd zck~E2)=@VI,ŶεIz{R.o:gn LxT@Zd|OBBAIwrqS0pӈ{wE?^D*:\|~MWf̣AR2HpͰbc4sѾ#C.x@F[ݿ:V$ ]^JBk>[8x<|r XsO$3!0 kЊpEa'bB`FrЁ.[ }.^U]6%RJLO*|.yhI%"rҦx^lmj770 cj*\if@ ;_,}~Q EWS BoVv#^v*[U'еymtL/A_He =DUSV|$'G6L&WOAUj_S1rYZ4p""&Tk:C qz@;Qo̍;GX /ro'pn%;\FtגUk]Hzqs{rC{^WbtChQJG0 "ḍc̝6Y!x!eqpz,ܸ%@~+إL>yw?9akK'(е*nCYo vx&ԝ{0#:Z{o8AOS)Y{H> uu` /VMymCm|-^*\%y 4ǰx6 oR peWA=\":ܜ4>KǞ 'T9Y1CZC5Du+g>hEMr/oao~]$TVQ-uN?fCL dL?[fs/AaL.+%X%tGf^u Rw.$8r]t9صVYrSiʹ3{;}IDn3dkTo#4'sP '"O(9 X\`, )KR~I` v O (s']I ~Ne%sS6`O޹"ѯ]BVУ(l_{x-: `;4%w-u UE7rCǵ+i*fk܉.HM=^ҋU+" 6 sqWǪaÑ42g[N|g(—s`^VZ׹ o"LQdO_˰`Yc>]D6i_cTvLҨ?cfY6zŚ䯫UBPW/z)+v~n_[t=),l k@#ߔ07zFcqi9 E4语ki$)$(jT430P?"&ZD`x͉I$t#D=t{dS| .4FAtY, 8ɏ#,OA먇LZMuU; )TiZ.3~J!g6YDʼn!މwor=oWhsr%z5ͧZ. Jcd;]ioOD,r ^0@?)03Jq/K-`?g&(bFtξ_qqD7//fh'O_͇8+4( )GeAe<`U}!Np붟ϲ2u'J+:Dۻ,~)h@#k0\bR[D馕`YD*,F-]]xߤ㰘F5f>;+|ƾ"@i)bLORGewl!}1 ;fF6̧%.WwK(N-Րgt\,84^~q˅"xfI3bu,\]uWliVl2MpR҃!{2%T`=(lr+XCs+z[M)yZʴgJ:96vO|ɗ.o<Ia{=|3fq>LKx.".1-U5-(4e@t`UDXӇ(j;ԬٰЯ?yzF1לuOR1 St kq2ުv8&r'fpN)C P"G +Na ^d)a&*abz[-w%B$ѰQ1>uz(lA* rف!Qm*HR?Է˃Yxb6ڮȨSĻ(PHsmIN{lȃQhWvP>o9(ko a9B3S5J+~T9PWm[p[B#_s'F&ΕWX#B}swN%Ov^9Z8"8J+9Tzs9G}Xïp[F5ÛeK |E3 r"鑏]}Vd"xcaS6u(lĭWF[7D2D}o Ur(4g H _1rH'#b)/c;Z_lYO0)s%_6!1ޅe/7oI Cm߭\W+`hz!Ej * ( _)IS1jqߒ;"0IZHڣN endstream endobj 858 0 obj << /Length1 1626 /Length2 15973 /Length3 0 /Length 16829 /Filter /FlateDecode >> stream xڬct$.QUұmFUTlv:m۶}sƨ5gg5Hil b6 ZF:. , Ί@J*lX胀\u@h`b0rrr"ml]LLA U%uJjj0pf&_6V@k_De 2,ay I9q*@h ׷(8Xd @J=?9tlfӀ.@\4[35 @63kCKG` 6 C{3[oU` 1idcOKY;@@? #3[K}׿ڛ1M,ab3uokklE/f 1#ߚM̬Ikc#ÿF׀(ʿ$l-]F@cz9ߒ;DoEyo}Oh1GKK9} 7ѷ}g2GWWFV?} #6+ ÿfbf.@#3)XeW6[Yhçbjfhav_E^BNDJE+PT\mr?0BB6.wZF6-3 ߻'俀,7sh훁_tF蟵Q[ݴemhoW]]vt",/rDz!mTs}*m:SB8ު'>Z\Ol0-;<Pv碮S}M=UpلdcPST-|hc|N`e\ :,oxp{:+[+(gWgn :h),LkК&k\NNK]n{J6QO'BPDѱhFo.&E51<|0J_ -\^Lj-Xڅ%f؍5q2߷U m?~} %yt;lR0Rm{;8%K&sōjc`02 F;6rgTŖPqF6mъ9+քzF;rO^Egpis1da`߯{WQ^RT&-8x];&)Άnu,m3䔜c9n Z*Ы,Zb᪊#oz;^|njf^?#@]}v_ΌPs:3Jf̺1)S:M}1bF"߹#$iة&ü4b<2b6ſnыצ_R0M.J+Wl^`U8JfbX;@cPL̩Ӄ5Ct^gOUmDSHy!˶Ȼ^2|2zDiK-Bnv.fg,Z2g)HjhK _ϕ tG1}p2^MC[XK,Fj nӺ(ChRS:(!U'7RݓF"q؛׌!֚j6>8EpV01F]wx\SY833trx)}f]w=Q]ލnԏSa+ߝ(2{j} OL,IS5R,&7 ߳eLkj;I;\ g$.fąt @yo]4;b9{Yy\7 U+z/+c{u7ܚ|ӫ{-%m;x({9yZuiȁ6f7>;>ز) gP&'g xIA.멷`.4<ce )S7L?'7+} x `Y.dLU")y"]g\rM&C\&+x"6BnB-;8mtdk]Yh>̃=/ ȤbZ.tٻ.O 刢h('y.τrG29dNކΙYS%]R` q+3 5RߒmQxTF͋'D5RN;=ZZVh`ߕ.5 9SOwdCj1#[#|V!^~; oҒnn{xNLgpSiQ!6I&/&U P|eD5l(b$z<\?Xi6kpc}u*T#oLvOIKѡ}}3?~yG@B3C`/I} a e9U<]8?F?GFϭվ_z֑hv8nF#D,|qqObH%stכyc=@5`\* gݭ҉Z:F; rXoX ORq X%{WM[, cJDCdb]_6=x׾UR4@}_f6+O}FΕCF"cղ { oU. .4ESgS"(m#\y i&nq0oWH\I4vdttgq#]x{./bZJ$/#:7ꝕ_ -faRދJVoo qgg`[uDIfMIp+l ĿSTPv7GG6;N6kjfPuszd8,7cH{vyԓ’s6lD /|qT=nB(! Ox)0Pp`s% 7PE!W0w# ]_%~}9:(RhU8}IRtN#?*1nqTKAQҲcGF6oZyϻa]7/_`uv SjVK`MuWz,UAي.c729f8p~-f_̼*+[?rv1 YtKzP}%{,NAW//̀ g¦4|BߟZ<&Q ,ʝ!ڢO0Yyݤtus **g+JZS9ψ&Rcv>wʄ3K4?g%oHv$Cv@UBmsDTE gW5 vm>ZzgW=4JN ǽ8+D_ '7gbw . ~CԼl̏fq 7UVY"zc!m1/ɯ1*87?{"%?CV73|^hU6AbrI-(ԒqrfK ԒF? 3D%FU30(OL0A)|:#X>)ewV\đ.Cǎ$xL݀')] 41ْ^a))7€PG0Nn>s!ar6b{f8t dk5q%0x"*YѾI*xuȄ'*[܄>Rm8wqIN+ZZžt$XU98>`l-a|w{9l;k$MqQUr̒Poߍ sJQյd*N}X+}؎we=q|RN[( 1^tI*&v}FOS0q?౴qTlM<ǷA,6mNz'0qD,I73eQ+\Mingߖ'nV*[ % AiRهΜo{8$*WhҎ+jUv^~;ݠn\KOJmwQp%UиB6;qnUuN~Z7=ζ,p)NRܶInCFp-"Q O 9aT~&b]Հ h@dp͛)⧆dq b%QY,  /gԗhK?S!8IRiH?6l)Ocv!NZtLǨi%X͆* / ZdxcT2~3Q8Mfxr `!5. m`qEH}Px<JGcѓ2J2ϡ\v'7fQNrQtG*/EL}+%G&/ڋY |~{ca}C^Um1a!^18'؉~+3>l?04['V]gʣc3RU[ =Qێ4X("oK6٬P$*R0`Oy 7ʁ`0_ɇk|"1ɠXhWĴ!7˾$r6I5~Byiw7v{+DQF/; uu&ٮNe߄3*P% 5Tpئ N KgXd%aѦfӥ.ĝpfa: 0^WjK\H,݀oXV3-[;d4,l`N#Ŝ|A+-m ⡞+ݣuaVyEP ye\m8gOQ5ius&I&%ynβ Ye|+L;xxE#KB|0s{8m ef4r4vG4BOGU\awNOZ1rf 6ߟj-iF+ ˀwYW!b<=/1K>h{{:DY;z)Tt'd.\1=\s넷vOz8\1acw)=Ii.e.x/цG!5PK}q2Y%Ӥ|Q.UNÿ\+5&hEX):8~8gsfg gC羭χߧsRun6|1ow^ ~ڊؕ!X[dH4 96O %`6k\8=Vk`eEJ~XJ!]m"?T]ST::4'vXu&em b %|ɋGMe=e}U&PE[xr+BMlD +߮7khK.f{RҵU1:;},62{[4mq'7(GjMg|0V`QjPk^cU| 1w.F!D5+oc$*KW2 ;lx|Qڎ`VmZ˽jFyXmL.\BI E _FۂC]K4ْ?{9hqJܣz(Go=8|\rj&xOJM=Qb Zc5$4yd'qzRVhe1&([D:qq 5-pp9 rV ԨX~>{6[Y&2}5/@W+xX;9b/4x. qD8X1rV%EU-r@1PٛНq g2a_:K 9rv?Zy9=ps ]j| S'Kg{HMدW,bM&-/hb6OK_)k~st,8pS̼ym^C^~(IϢkH%Z]P${bM} >X\#~b{ !-rq#{ CrE̛m{Ht!lBe.sXາsb[Z{LKMd>_$ԙSg6UYiPP $gSDFtݎ5|.iAi4}?\@;W?&T"Qknq5CCaԝɉNiWlѦp瞊}hC*tOT0º,O@Yw 9n@w +!(>oHE6#_Bk];uM${<=n,;d1T0$'Y2rm1$*@3c`q;wqrW|w'/`4_rN"XMIGmTi("3 &USXb?'Oj[; =S.!j`>_.笚^;HMaE_+O&yme- {gHmr"LlBmvkƕKp/(Pݾ@ ?6g(-UڞcþyO#.!V2|!a/܇^tx&Pu`o;4Ţ.\Ñ/)qj3;xO+#U c-(e[{j> 0i Ӑ/of|on҉H>@ =&y~#0 [RYj[z㜵)~*D0&Ic(O$v4@l\gO\}m&NSjnΛp{D3%g[-;| zjxH tOBC|h/vxu7V&K~jgkbjeJLP[s=~i)U]]M zE8DqyjǷݡ)0|鈔9IϧW OϷ~|1eey¥ulcaנC|$j/[uwV@5#qN喬΢ y-jc{Wܥ'_;f[$u6#~[a̖H@2t,]+Q-Uc\KjU͝0BOX_ EPn~HarXgT6>de֣e 9 (ЌnkׅqvjP#!*SԘkCo "<|?zUlW; 7q ,R6sJW#PW8kTxKS7͹'/z?wY*=1$h7ʽ9"?dK{nqU52?YS(PfųVŔ=hShݕ~uSE1[I,2C׊IS!ES6 HA=מ𿁒wugDMl,fJ ym=_ܥB;/ɴLz`6#Gzof;u5<ܙ?^V~ I}Sf 3ۿߩuvŗ!6[-^\PB)Q8@NQ-G2"Q`JڅUQOUO#n?YS#Yuj@BsE֫(I1-nCtpMn,4OPe}<'ytFjNc,vK7A>IzVN$ů F97>t˯D3bhrHrZ,S-Β0`'TWTp@9p~NOPV;ɳYVAYmp/X,!82 [sa_)>O!GQ27)6|E;#F%s23 Fۃ:ur6hf b8k@;k}$'/#^y\aI%Z9w e6[vtodlEC> Er 1qv7Gn7 Jrc/U@2߳fӏbwmp{.CeҤ!NO7A27l-$Xn'AނPH߰2Hd Ly݃{(>W cLiH3m #yUvIak̜umgnp5 "oKj4XO1sgc'9_C}ozkb܄jy`/R1V0 S;8q@)-W,,oaw{$$$}k)|ߞZM[$u1y`M3F:ZZy̤ey %%ohLF7<*[@T*Q~rtRE2#4lǶtA61 ޯ&KA$:,Jq2.cO;SDrz::t\6L `C ?:ұsN@XqŤv]xfwм]=D(ĉfT?=:՚f<t ru~]|D<:g> 8>T.֒zzL)kjo!` ~ gpk;¶Ӫzj)?Z(2(jB8i;A4>$—?dP@sp?*sWѕI!+ ñ5_uPbdK( \88+Uy@K/<,/@r"V^~}*8m޼ҀO_7NXb-Z/٘* )J߂ O7jL-EiqB(I-ۗ6R#h7;!;S;fkAEƷ .!1>757A6u TO!gꊋ<zT …o"mسhd+*DVuM>`viD}1O+LE<P &J2R(lws"ڧ3{Uٶ:Xo $NO?dj|Guv"$JQ3À (P+lTX]i޲8I f7v J G,5QTlGw+h+ W&;su.)Bŏ]l."扞Ul<*#-8FlMj|_g{ܙ/|D(Sr6߄5iK$$q>ƀ{D5,i%æid4Ksh_~+uR[zYeMP_hpubd]&AE DIϝ-}'vAFk0SN sxQJ™0eV%E kO/ 6}2qCG6ՍQ.њs2-<B}RAq_(H"~L.F'Ҕd: <g̃-Ϡ;KQ {D)`ڷQ*%/|kIQt*]-n!e7xP_}jI\,%rlBr·:[}iFUשyK3PB1Ğ.׽7fጕi<{b9/0?լd3~a5k ZN\b*m@kSoF^!=qŠM Mi!#3Su2۞DWN~>^w{lWʝ rum1U)q.$(dX$\c#-Z_r&NߒfGOM6`NDs8> B{t4|Zm;FN'BdQ>Jc `WmnH2/;,IxFqvPy2Q8W8Z 2f~_{GB(ztF_\x 4733|7g6 BLGھ7c8ڄWɼp{U azK;҉ Z/6-B䗧@X W)J$G*G]vl@6Ls~e5wi K{R~ ' \鍍RǺ'Ԕ2u$DJ+3pa Cۥ|4fNy'n[{m%REvo ޫ)_5,inÊ^Lp- 驹m!Kw`ʭK%|}z|f".r=h,"Agԁ L14\êφQ*nB0h0y5Ǧ2;|TL7?qs^2rkEҫTVs3p÷Ąh}(UwB3djm",zc!PyV)=j Ji.BB8-zE9<54&776Lh$EcW %m^$l`($ sN Ԗi0\('0vK"=^$IuF~)*!H}Y̷uM w*qB C#1ۆ($b34?OBeZ9E!'Y QㆧLpbBRn*_??,dC:M8$˸Ϲ@>ؖW i^ jw` i(pFQmv4Ef$&Y8mzI-6u:A$w7I"g)|n) >~QNUڣk'rj2i׹Gt볢QvTߪm8=6@ sÕ? 6gMFIdlhX5 0Z+#ޕK{RN $RWfNv=hK'r)Ty%;jBX4ð#> OGNaJ\ܶGeה ޚA[R:4  8oKN*)’H$/}0Kbof-C0(~G2kJI Bu_lHi0q!KZ*I%S!qbѺ\Ss:5Qr%N='BXP˽Х+xS&.5496UR {Ja?/ =԰`JOԻP^YYG&Xwuj$q1l'T8DL4%z?D"vU޺5G'iD~h[ҕʾEp u*gf)[Pq:^-*kKO ϗ z[srqa}tVI}Ap|'Z?"7Jƻ͈/m:(vY׋"( f ukl;~s endstream endobj 860 0 obj << /Length1 1642 /Length2 12202 /Length3 0 /Length 13056 /Filter /FlateDecode >> stream xڭweT\ݲ-,kp@74HCp''8CNpww{tjUf{SkIXB>e!P6.vN!*^"̦+@=3 8HCB]%@hp  `+k(I[C[YOl`x}pAW&Z  *`SίE~[@ 3qc8X*ͅK`pqZ_݀@ǿ V# gv%jAN{%S@],PkTui 6  ם ׿J{yE`+G hgw.`e pZ;[]\^i^: O՛;:y {@h\ܯ1-hbg_08y, 4U5$鿧2 "L?]}wjYW;;Usǜs(6 4yW[%!v_U6.>v.`w:ja ۽vmK*go5/%,WNCVKKY^1iެP-G FUX/*II;56n^ ?@oU̡`w!';''FbW iB,_? ίb=^ ݁h3 `Th5qv߈aw'|_cqVA%_J`cU{sӖ"@'$q.-sW2C@bCH)U~N&EH<('7nyt׎Xɵ_ pan{z!vmJ/lNx@0sxFwpG>-wJŻn|뿵#uFGH7ߛ[@M͸Y 'JOP~AP6$qӀkogI*9"x_nѻl=)#'{ .2L[:Rw﷓ v?^FEj< ;exEݵr#|۳Ah+ U\HϳM\nd;&)xXuZy;qjOVhG%{yW$a/*^X?~U:A|"e2"~iD{7gcu$ȩ)FGb)+KT} 4C{C/,э!,7B&֯nDF-﬒>L{͟ѐU J[Һ$ڎ&VQ@yHWjOiAx"27GN$;w <o_zTJ{EF' TNVAecdJҶj1r?/e  ?66BL9qYnن*WWX06"DPS+Ak $>9m'oYІ - c X.C7 Uk94slj_mhr~ώqn!SV#z2s}e580l ,wLtAʝs@fG[ ŬqvkSVE[ʧimVڣaaH)C:2ͪkwleua5`ft08#¹*CFA"fӾP"҃hR} .-P!7Ta{&%Aej!EYsʝ=\o/ޱD٨";8-OspN[g+~[wvz9]XG~ٟy8@U:3.;h$* 舨,n)WXR%4Hֶ5ʂ}mC?=>{g"8\}񜬴s:R3y7Aˆ ч Wqgyb$OO"iuX4ӧO9mTzN,c *ܛszb^x<ڳi?"X:FnZTMx@.u vdFs|M̟V)ԛ Qol)Z,P`$A\cSF|\Qs1t_+I.YTb$O.(bwQ߬L"Կn%7l$_n& 'QLԁ/K'_AݠxV,eR[&Wѷ] Vî)5ȲgFd ^B{O[i/OKaGP̡>̓ HtޜIL& 6>.dn{EhH0QZ^gO2ĂsǛNtMf|H"^Ԥ2ӱ'+OݕcH6I7Ks 1zx'cjTq|tnZIF.)eKfsoVKflCE.eoaoYsU\%Ԟ&Ip"1r67uH$?@ 5"&<CКlf[gВWKC˥Є& ~_)_!)0e鶉w-(L~MMˁ񸦺3h`&mRHe&"6 GTt-GX@J29lZFj3vNg JJq 1p~#`~Kn:| X iKmWq'|::Pt_Vz4ڏrRun/^1\MQ>RjZ/ÏUlf슘"[{At>ncn0je%!"A.i\>Ž*L:a6㓵v{9#0t[%'/葽/P%4{VqU oldme:.8kpP+79 kG@'v2+\kWҭZVmNEͶv{i_t~|>.,10yiZp!ӷ 4Bf}g +0A>9gdC 4l=f8"K0\̫ p|2RQ-<>mCHZM?IA`2'tv.t*kRE 2ZI*5a*/fG7ݖ4;ϛмG}s~a)ڦ Uq\Br5Wj3z=290a< S(CQ^MFb9 ` U|* Wᷰ;y{d)ooFr%Mp4;k?Do+&؞?ݙb~ ?gNN>j9ĔJcC m.{<5؞1^drjLஊ9ުGw|h=0|b[0޳鰶#eZL@Ҩww)Kb4 + LƓY0v,?^JF3 Svi)wof3j1zXZa٣R#>krǡ}Atz9HsΙ@ͧ:U"Iٚ g3Ry9ZMPh/ X5?x{;kEiǤ]1H*d$B[%!Ee*u/"g/&I2Nٍ4Q|cmE,Np7?cbkep+4L,QL[> ?u,Y%}t(@!*܀KӁLgwhQٶSrhA8cm/ :Y ZEtfZwtN">IbKT.Aar">E }:aZߞOC'dTy6I))#.^ܦ+:n#}C]B-vs$: >8ᑻh 4mLft"Jz R~;hËΏqd!/@H0iF# 8gT+:+5D,l:)uE9;z)m/060Uÿy%,R|l9f?|J~Kx3Lŭ;Ey6G%ӯ%$ = "~f\_ʝϦ =}ҔQYz=GuKO/7+z 1;a"-hz_/aw[V NgY\Z;qgk-V_e`9}bVfv$i 65gI2Eyq$VVV! 0>hhdz QP9!ΐ5䂗VV J\ PiWݧAE<[+zEOdK%yD 'f0\Iϕ@IUrQFt[}P4 kwWY*UA'. M .eE[Pf.άm_,+`;id,.]3Mr2p*ɒclb&L/ FӒ Y3LMby] h f=׻-o\*6H ПzFcbL1eKtyAn0c4(ov +>hh2;]+:QG(;W\xfӥ ^^b!4L?nu=dG/Ĵ=KR}.{Mf I#`w O\wRtߧMGQw[{\2?!15Ro?I ?[_e[;%+H5E|zs.^톿4eZţ[> -{A;$^X=7CKUg8M湟w޷iN5M4G^lj*m]nK^GUaiK/M\Nv9aQczs4Ap猷ZW3_*ȕH] `.![dś<5]rq68$D%|_n=WT2q"~sXH0.R}t<M`txƣ!J:`E6S1˽/$ި.6^<^gX!NANOA>3~O" UY4Т1;b]w]!m-1ږi!wjz&ڭx/gMVZXT/Qx<=t)2LQ*Jl綝8DD<ƪnYȋȲ*3 T^>N;dDvr nQld "q_8You@@rQtx}HK*,GES6[mHэ%͙?\+"bd(y'|20kIT#䓀qAq0//+iȕc)ܒCvEd y%="Mќ0t LLΪ"1j:ta*Fg׵wѧj8{uts ٷ=#EnJӟ:č)f|?[Fu V鞨wBa pa .xY+.lNSe٣Ƹ`H%u,<NV6oRf2:`XwǬ`Vz ӢEuomե6Ng[3e*Wtx8s'+%đ-ʮg}<.Ap9 V@WLֲȤ^fo#VgM%eU0 lS =Tu-Ss :pLknQ ǫ"kv3F- L5`RQh%i2P*Ȼx2!ЗU](ujJMe揧}Q&OZ{O<|8SkUJ(W2v=q>X E3# L~<ҨMռô$S{‡e_ S"ϤwaJ?eڛ%tf/]7r^$a‰9^e-QGh~\>!ؾj'?@NL;Z( Tg}EJ#g'6:wHiŽ?_l]Nc yzs@ɍw\gVaܯmc6@~07#ltМ)z?ú HQŶ_eE$y`R\)w]7j-p'ID#Kb: fCrb?glQZ͔K1My'//ڐfdu3s|˨`X_l.wt!4r".TtgѐS(<1XuD+0rGo0kg|$KoGv/9 cwsX$sc|'7=naؚ(,C& AlZޚGtai9顠ET>ATR`J)ݰ 7KGf ݝmO6sP Tk <2Ne4j/`x Zz;(P )z8NMGyM.33b/YK A==J`U,p;Kc3qFHw$i•ÕA8Y&DcI BS-w0n@Tz;Ū%DY=5I:PJM@VKy w?~}^ύb/hؿk6ܜ;9U50ܻ{VMQP][U\\p*AWx#<Ǣ`}dH~Dtnݼ^"m d~3JdC|OVh83ˀ 'X82؞UVᩴaoV`,hvSgn?*3"Li͜10gSr,Bpݼ)-r9 ;aqki bj-{Ȇ є@P=V8gl`xDި˄\fn֡da۩?M&-KS s˹lWbi;WHib2??뚂fٴd?5fnF¬qՕ, .}A?A9d̚-'}v} !E­x@aѦ0L2WӸl% gL/W\jr>}UjzUioU(c ˤt0޲g%sUpdN~xWi)bI["x%y.lԏ H˝#H',RANPQ]rh 6*U!O\3[0Q n0@؄C{"ZuIHKvtː%rLAp'0ZP(>gݻa, -~cG./BKk蛶7R(d`5:o&JT;\)LkB>Ӆ8=&(dUPi#09A;qإ?XTJ_5_:Zjǻ>~W 5nW1_(SQ* m*D^s5ô ~ ʮgaFY;ʱ v{G;$z99A<a of"nTA§sa!j V] /orT5"8'd"gzh &ᲟWIBXW1!P&! 8+,fLbѼrqD7 =^ Dz?S \hKΒ֏(B,.%|1Zgi <3lM(MUXe3 mMuhk 3~;|rm?!Ę]/z\ '{X+Q(MX=ڠ`QWPEu%o YO݆ },žCұ:4<g^E ] oٗcyN"]τHoJRu< N&|o,Mihd~XZ%j'K/^j\Xz-wW'{KWq#@|;jF("]8z/H'<L_v(N݁$;/>~BZt[xWZ9o_U}cu X-(jpҨ6A\4J]ЋseNM!]m&{'}biQ{hfоv*p5X )~\ "ͧ&it]<ᕨDЀ=~A;HD4,k3e p,4:o+w:%UzD$+jSK,n,>mmYwA!+mM\{܅ؔdx.68`Ċ؋ij-91$Ӟ.Rt3)x|"U L# $ C8}{,{af5ωwGe>Y i! 1Y{ jT{ԋ[E\!?$IjT|7FUǑ>ѐtAsiYLUn6cc~^~uz !\co*P*KU~wsj|XV=lzl鳽w G6L)xM#1\Vbf"EE^ voiT*r:_D^(6{6TbI"T8’5%e-W0w#O5z^wj5Hx/Hy"jjh&Y3"4)Fz͍&r7NP&|t`cרb>OCNUG#Sb-2$d$`q1 j(CRY݀yrߪ(v)^ʽlcAȂAeE [6mlK3j_Rզ0/Y~܈vONtdi@Xdٻ6QBikr{G1[cBPSo},{ywATTڛm!zuut4Yo"b|R?u&i<I+F5ڽR[jG?Y)!`Y(I x > 8XW KN=U!/@I+%RK5ḣ6 &4 ]GeXV}0 ɑ|C> ~Qvf(x &~{L0\R6&h x,9P<-~1j8}"r?0dVDA\ ˇNQHCNӇD{.قJ[LHr&x3QGXϿNh>OOeڟgA[P̯rdpqV({> stream xڬct]%VlU|Ƕm6ضmVJ*6*m6+y>}zOqq텹0^cl2"E:!S{c3q{;:&zFnLLLK`pv03u301sGE p0s:;NFv.{bڙظ_rpkaWL UQTyXW 7kijoOIu19\<\el0:;y 4\v-/_ulnUzрTZӮ]Ǽ J,xe_}rJ L,}#!a-k1!? d`#ĵ;KkXס!ac[zZeI5F($R4nG֓lҸ,F Mw)I栿d}lh-m`XAG:G:c""AY^H㥊 2j $wOIiDKS]~իu7F.~݂׊Bkw@̎ tT^/T6TP(n,o%L{AJqGO.'=f9]?)醦T_KLe`sO)j%{\`WQwE,O#8b?GNzjL _4 ;He^N߽*}&+՘8y o6>q8jŸqf`Z86iG;2SFfjٯ3*=0 w+R  cټ2um  ÍO" . ,qƚ7:㝨Cjا$Ցؓ+pN{D%~; D{o&29d #W[\"w0i16ce[[[:Fs^!wtpAJdfgHzd_ A?RJ-L2DNxOQ&> MظWoG #C^e%uuzc{ B?;5 HJh)C ;h@&1cTݢK!t{! qeid\3Bٴs݆inYc:ʂG3KwRuӤRHQuձ5/v߀2 $KC |\UZN&ٛ8-.T%a]-Fs'h7*_IHg77I}[uQ"B!6*&rAH"|ӈ?wi1NK4ż؜ޙ}?ja;tgSVБ '׳&oD<9* #.?=|OgiXnS! -y Y.s| Bb5f1OeYS N%PFy Mޓl{ bb\gÚ2O Iu󮥺Jlif=QӶp:2HZۑGo0qe@=?@FR֪Q0ԏ@9 zV CQ剋6}TJwgT.v$ YYSX4L>Gp< Ԓ5%Q~AR3}EA``+g+™:$0:bԓO3u=(iKǰ"baj 0@r_('<|kZ5>/T:.Ww۵R}v<4-D<O>jJEmt bU Uqhci'"H(3m{v@Q]VoJZ!r1AYں4T \ џyyq9XDCG*oNP"0,S<T<؅FDo~ו=?#5B%ۏWZ/:AF03A ÐG0}>}r'|C2OK@[V]}"zh cfE^:@נM?V X=]ɏbBQmbd4Hc1Nâ%F^XM9IpywvA6KlOa,oaTz-Gj9E5J-d^L-w t01p tꟛkQ< 0Xu9jd,>^LR[zy=Dbb>f1 9_R"zuӌ꛶tITBq|-$٫őuAR-ɱF6&Cj⽄ %lb?6R@sq;Jgz (UǠ94ꏿs|I֜Fǿփ!x9_Ь Y~}\2` M+)BS6Z*PhS4dC$a;EϠjW-DCX1%EP1F4y`Io,-}=goI9k#hn ^F7ᚍ *E=:d𧕟<؊"b\ zb5:W"|=v">j^gdT(æۘ}<A^kKV;ȝS߿bLX# =ŏw7[2b;SSHk7v ŽX|z։Hi.̽QfDFn ϣV۱jr c~ͅMǧdPni# wc" u/7h|hGf9( f ( xյjI5mZI{xV_Y%\Ȕ6QnAM3tBXt'j);bk uIs#C=a`tKi~v|) D_`-WZF߶ ʺ4ΰy2ҔDl&deňj}NNo) 4Lis%4kGvc2hS^.Etvtߥ:RJ:<3jO" nLo`C ZH6۪m{Jetah<k[RK>,LI[H Һ.O7EK x(aEuN՜ XjfQ6A7l+w[*Eekkz~CLPPfɩYTՀز6*Op8y3^d}8~2xJ%N;"gk*;D*dNőc aHs{tvL&gk+X[X@g$-ϞU>>/%%`JzFYSe ޜWAzVtLO^}@$?#֎iH%xF˿ Rl {sO h6,VFҲh;XVK&U 2 -zGEz_ :4֣urN|g㡒dSJ]qxBk/(&j汍l:̘Ġ֏PT Pc*okzytUAC BLCP|bAwlߊ1>([Ի٫#uK D:}RO[2eet}k AvZq4R=u*] v鬸{A@ZŴKd2{,~}z/j-t8Z"-,"$c1C*Eo=(Ӥ11Lfױ IWfV'JF[iw@Mˢ)HwCRR#]5懽CF,*:J֔J孤g3i2r]o|#hgOKSTq.Ui8*weeGmXopg @0sR2|4ʞRH35Ǵ& ,.?ƈSԬI-7D+9eҽLⓋ2җ_`|nؾ@#lЌYZ *SsAaBmY.Idzl[&:O8ɤ~os7"+tt j Ծ4n*j8;kZa@3I.6TuO'4'2YcPYp%jKoP jFcZpKM %vO/tP`L.ca!asqž!OO0(T{b(G軇T/IFMPȯ5 -leƝK8KZQ hlU;X{%),?G{n 'ΞzI34\;DsLFNytJ11HȱjL*uqXA/fm&/#\d8!}h'FD-k%\ ̩='_%MA<;~adx\Z"8矾R&J5e88ݵRhC⧦|/verbP>]a2b9YˑӶRk꒶"७=mH(#>pzm~ǜF`+;z FJZn֣iI]a;hWҗ؏:*7HVzO353ΦY:*v,.v(ߝt4LGކlsËZl>J$j~(,omjKzb-KEƧ:`~.v5PX*%EYmAdȏΓwHaB)kIKڨ~kd<\no NP;y A.*(2o9LHӸo2:qWDZ$, 7ٯ9S^0H]J kJ1\ gzڇ!;JsDidIԩpJFN5{#إiQ y D=%/qRubTfuXK!3~b)L5$֖|ktC"\.X Uz$U7ƶtx²no ^!$#C\~|\m g:܂1,,jF^T:]"DQ->⦁xx7(^C {.D[~Wm|ХHJ}٤^ T %;'2k>R)U7;-dv8FR-isdosl#@|Ƹ(re9Ulf1XvVOY聕ir.dpPP!r^ydSr~\t[ bÆmk', ʏswa$7&$iK駎k%cQ|k:zNgwnf~})y#4yQ>k T)1` O ~s5(;f0ea#jD^-Ӡ7$3߭?՝ÐJJj R1fAՋGQǸVs-^#dP6}rw RS`[Qfcp syݩWDl5k ]7>qIK,Z y,$@jxN] :hwW#/c(쾝 C^ fȅnQ߿Sq d;NAqvXkL_w/#s9D,h߮ ۼd2.C;X@ c庾,r)X1"Tr.\R,I"oנLP' y.*mi9Ę˲/d4? $lW _\eBKhl^>+ QXD#y3SfZN0zw]2dI*?E^@мag/0H!'~r䦵܂p8VwiIBnc_a,({G5KFAp9ajk/}h bK씍M(^E(e ,!빷+C#Zd&D5JV3B嘬FƅJïe(3o,KI:>r E ZY/̘JDHcs*LgA`*&ΠKg9wFJ[y8L g6c*q+'\j%>8S${>$e_oz# kՑ:EFgxw҆Kb-Tv^ #!c2&"%%`$YQw骰p =v.-//:ģɝ:u1@;cTsOp { EI0#d-Y:poQ- ^܌ĔY;D>\9B(?5v; TD=:Zs ~w öGzg$qA4IϠXrԿk2W*,e٦pȢ>lSQ {>c=B"uwx5wY037+ӐN30&2e`5,{ Է2c9c`x> @6PB<2J6=bt{bj/V]AU4'WwLϴU%^^*i> l9p)/^LTNƘFg`:4fo5f';[|aV-.4!Ȣ ӑ1`ڬfyX[{Dht8Yʪ5 m GULA~08[f쌝_~.36VjLIyv`OgBntVU7׮?= 7KϷPZ9fP"$"D{v(EpWW.8ƍc7S2^ *+sXu ۳=?bRSb@ YHRYsጓ7j}wxK#DY9MX6al`̈́q>I[P tәy m-8t"+ 894}XJKq}v޴{GXNn "KtBOn*3⠼ep]J<*7TJ4v/}f:viH>>WJ"$KLP(|OH-<=I+ i3bP7v% S>LBńSM.w16rtAT/LW&#GCחB玌e/'ȏKH]O!EѶIe|IĈ{sz#+ed\zQ E ʬd|T2~iuᾧ>;Aqr u+=JgXbAUт΋9\_Nq"Xy?T/gbULԛ5VЇCDz-sCGYu*lPQpZ_~[2ԜQ m!Jנynhզ`JY:'8qS;4 dDXv%3j*,V hpw9&/u1lAnk9T+_*GWk!d\j!&57{:18iN,Ac{(a^0N%z}.׋`Ļ 6NGIThǎ}~i[qXj£(f33XpZ6tv}1Msڗњ1$l5Kҡި`yRB v, 06xHJR6eN3-{N8kɰ֢0`u'`:ɿ+pk$#;q~07BJtÝ"|*h Gh#ĉ}9glQ:FM fL|+u"N$]y&]v'[5鹲In]yɵ7Q%-*eDZ$#ϻ%-E&x4mrן Oj%#.%53Aeۥ3K 9fMRM0EwSxCHXQMź4%;[X&. 2S~+ibdf\A;9a2vx?CV:S|E 4S L+$ar^y莺E_ j`߄On3!=d(XTBΜXeBu+@4׽\qLg\C gØ=Ub͖ >͢x%kkcI9=`=K3[ա3%`Z#n !s{=nἎLp B GM.7cYeEٌZQ0NB+,I 2l{|ʡ76ӄ mkeC=0 [hW-9 i2%($0>sg}ma(L`ECǔKcCٽ}vXk8 @N=aYN(9NvrBW4:hx;0&&X{.t_sxZ--C"´T6 O\_:k Hw )ˊ w狒r|X޶a[hz fht ocb@&~'#%9H]2zb⛺I[+yzmB'FxX>R/XT_KSk+t%kR PH܁ֱw]"tJ_UQR Cq}' 0kU{/3&6EA4}f1z&=w1fTޔٷ]yI'+.75yob[4?^c(n?U0+$8OĿm%E MzPyߵ)Rx(&;fWKə |">F0&*=Y;{+/%1ZbCa{]#=ɭqbȬcWad;GǏ`<˟)!79pߦ,`NE) PTBEeCVT,kAdJ>l}KQ  (|anLrU69x6qT1f$ƭ6 QoƴY,FZ8GVʤ~@1e-!dynA\Tm?ԬBmC UQaqIk@8lY].ɹKQ5"lH<} *%v74rrQF|ͲуR8>"&jDq1PtAk4x1O 2JR>emjsG<]#&|ΦbqGHUQQ)#[\U/jkKw¿kζ,"7OJWXZvH5q$>?[=b}KCmڠlOcw?M >C $pC\q;յ$iQLz}D_)Z$V2p', X1,ӳD +"b5Ƥ1_FWӜ$O :ϫr{ubc_ݓɅPfZtBa1ȢlC+2G43VԱ.`.%9[irqy)]fW-ERc|C6$*32)PjYƄPQrT**( H;%ԇ>[P "Ndyk϶&Qqc`0 JH(L\|w׷GgNVG_1nH9C_ZVЦos9ljqAimy1E WFFpg\a_+5Pxt/xJ6L9D$4 3ءl3.vHtq1ȊHA?铞]Wܲ ^c%BDQxpQV?(9*fiDK:){bD֧KL$Jwi @pV52s*ɯ:l!ڠ-;3jAf(Ԓ -eJy?͋/|N=.&x}ELjÕT;jֆ\%hbDf-Z1`I'敜\T4U^pbnd~ ,{Uzܭ70+B'e yd+5tI,Jmxn4ˊ]܊U w6" ҊS0(RwRI3-9Xi5OH91t+Z h,fU1.ݺѫF1ԛ1(zٱoJJ&p`ވ8og+&. G?S=Iyvr1o1/b[bӃ--禰UM ;n%z>S-iY[J&Z#6."3S14Y} NM)|;7ƴY‰"#@5A|jh2r|ЛZgF[hJ])RqD$JmtXP,`_)^4ڵdCoR 3wq O+$r[T>\ q@CEպH`栫|CZoֻ%yZnеIS%ha9;𰤦4={g5!e[ <N}0ܬHLFyZ/5SLF<5 ٽ?N<< .S(z:\t #LiDNuvVkycx/m\"X2!1|FDSYUKX y*.XaI*2@Qghgsݝ\ozVK C-{;Tm 8j@\n' $`5qj&ͬuf}w(7AI%^"mKW6R= GJ%.[16hA28DWBKQԼ.!I(L_Sue&Q I$3K ~%%*u;]IR`iNFzAqlZrhhQK{񀟚ѳz!C{tx"OF{CxͺEG FtHˎK-|W١ R"R#[8YjPP" Bf&#u|[+&,]lk!"": ]&nH5i& ~rVUTc#HO-V&L v\pGPzudr^)JFZnlm2i!yԷKo( V7+ #XY,_TSUҳ$>nޮy6[{k#1`&֗Q@]WZ(aLL]d%xt>Wy J{]7BL9GU ʔY,f"(iZJ^9JkikRs o/lSsu͔[z<~tʆcPʂ*$ nnrMV gID$w=@ł75gb3D4Iui1|f/XoR(/J,7#hT_!#"ȧFmM`K0)Fw3[,^2}&7Ia7;WYӟSmV&SWIjB4?}>E6Mp1I(.LM߇nJ^znkCr\_f8k_uԁ#޺Q#S`3ೌib kyAn I5v)}ƒ Laii9>(u#N&h0%>¦h2"qQYqPhᩰJ C S-UBӘ5n'rgC:񄍣uؙ'qת6_p xMgWm/Ǜʫi 3y@ц _(.r`I?sS1aY} R*㚺T`eLa7C mn@if9S x_|Jj"#f VO|g<Ҽ/BR%_KJ`)}74trU5C3߀TF 7|秔EAȊB~d{,Vb{gT{6VbD|&M ڪSc\Bv᭭AY{9֩!9 ,#FӧH*x03`8cg8D\&xrnLm#j^=%95dG|LL^ݓI̞z 0IWղ,Z;d[UӼSLx]pkuJܨut0݃R'ZC!2U Fq[N"p"}ss>FEX]?r\areN(4Hv"Ἕg\!+e^BcjViN@E 5[oq: endstream endobj 864 0 obj << /Length1 1647 /Length2 15819 /Length3 0 /Length 16683 /Filter /FlateDecode >> stream xڬeT_%[pq]h ]asg/3CSԮTz592PƉ 6rvTA4sr2EN [QC' 7@hXX\\\[;wJUI?-ldfhekg qK ́S ")%'SHmPp6~6@j r 9 v@c_71`t9: 9 m-dcllOJ_2[G'GcoTQdnOlG_`kw?% Ku28nN2L@vVc%s+ gGf@p:XtNVmrrZ2 0i7~103nl _DOPM`4E`u@2 "UJ-le%gh=g 3l  drՁrW!l*Do3Q4Q9L ޿6&@+ :߿NLLS1[ۿ!nQIAak߮pRqg$uY[JX Ig`{^G.?aEkYC'@?^/4b6ƶ&_`cgk?@au֘'"5=ͩ;{hBTr(خ^ ϯʶ75l:a}}_`ˊ;xMJݛAA{Wvyc JI`gBQI `ڏ%8.kjzME`5t>mV,<9!O)q] W$OwgRT[Zg?)U&;s>R@(~//㐛?}<(LIҪ:]4WXKj|7o_RMEAY3>7sib(w;NF25&ot XKvXe*#.AXJٔwSQjԍ[9ϔfF.ƜހT5c3sSUmf'NiLp%;eB/k zLxENXݏ47S~zl9e&4)~+"}l] -,n} As9vI>YJ,W]{s'KDVUIeJg'=w߭e2!N:"/^X؉!bx]/%xJq@oU28q!+.X4zʛFd<9<׽.sj\vY3Uڧ 924Fۍ+,Y\wCcNGw)#|5!n% /*e0+`s@xio-t.j- 3(`n97 Rǂ-!MH*rG[dq@]:w?' ϩ+MxFMo]!ɑ2?]N֛>^ȯYy7( xOtqpyJ m.J޾΄үMwun8?ONѫȔpI 4kdx8qXv~zՙIz<>J)!*^o&(~uYF.V7ĸfB`O^혯ns\]FC|~ցODvnE@sW/yhDRI~5.~6$fȦJԭZ^ZjW|,"BIȤF<62C(]h"XC6C0HD5 t]Omԡ&E{d)έ5dT5f|es@5JF;S=Y8Us$|Y+D>s/MdN:%xψ(hz $4XnwR?Ck{8 &RƥS5{wBDÂ|K1u%ճQѣ̯O{αGs"5+τu!CµeBxX1!?'7Ȅ~8eHPy x"X~& ڶhF(xt͛X熥|Q` *HȨ:vy'*T/:$Yc> jFD#"bIJ4[XB n~=YXoq (&>MSOv;g !k=~X%DEXz~ܺTcRMmZhia#}0FydJ.(lOYdv3p)詫9g930v~aʦ-q&ðDSN WPYMv~ܲQ׮9wAPyR!k9u~h^TN;9w#&9K!#~A#LN8#W"5GN|_HofNH*.;Fa;#JPOm-_ $-T{[gX NCMiUNڛ܉RHb%"rRg#-js8|~^EJet'bSꀖ {~ ìO-Q*yHWpOv> zz&zfJ1)> aߴF^@< Qod*n EW"jαGם[GvBzR C D{n"!/L6~"V&ɬ,://V`sPSmApW'\Bu R::`rgӖ(`3.NFhH p7ȃ>5!9Kx`|W 7uN 1 ^CeYh7%24һR06, ;r 1dd3]MyϘP]2:OO$nR/ A|9#6ϪeǑ_Г8a*2^??&EA&7;~X]ZLOI=k9(,g#߈Hx~.M&ĆDt/9> &p2vri(З;&4#,㿫Pk1:9]_d5K~bo2{Ҹ[`_M\u\L$~3ٔA5!'EҢN?Wm?&`N=  [*j^QxR{>`]=}I o9]or`9/roZBy3wCKƭd8sJ7/u:n9c\MƌXX}:.,_ t:e-?Ug'/S}~)ik ˱ǯ-G2damFEV-l22燅abh9kq\a.\hɷ6o/Y"7"Pꣁ]"E?bԾnZA?/4UL"ta8IYK:u䣔5SFʃ(:+Y]@@-N' N-\?K'"Gʾs2̧Qs&~K\-SWM {dsuۮ/u/[i !1v&,D⬕չ , a-bp}n 3F2'CU~ur-BGfS'x_ %·_T{m FZ o:$2MW5ΐLȏm)|q`Q߆ L~U2au9ٮεXhHFL]xf/C$ )#9v^րFx"7`%ܣ~C^루>YgԿT cmG1=;&rF*t[JZX&vG)u 1O>.|5v cFxw^7HTь=:@f`a40'0̸{<0El3K BAt麇E]R+pMA ;jkh(CãȇTYϸJ$rk T.q`ǢZ{e,jD[v`mr!ja0ek=Y>ǩ(J_> ^RK24Qɝ3v29 )wZdL(BRUG&ӐIGM2MukMY2Bl %Lp` O]{NI"$KA~?'V|!.#\r\.Ihф\4h.?_Cќgr ojԹk"q=ΐ5fc$+ fߛxY>9ѓ}% 5]y^]nk(BF."Իl L0eE}Z-c)b.V944s{d`)Qј1&!7[lB7cNɡz2#m9vڃT$^d0q6J׮U9C TE$`\6ty;2mIOtT ۜRacDUR,-8SY=Qrp{ (XCDo'a]6%^z.a'XGU }lWQyxhp)}ki;ܪ'woVY4H"H@hO?=KZ1Y|(O Sa)+\.sOĖw4()`Zζ6~RmVގ>o Ey`ʦ!~oOacG\"D0BMɠx&]Jr~RY>%1Ta!:;${}qr"uO/;'\AU.#tfi~c1OQ)Lr8wp9΅%̱5`+bH2.) p'Q'B-_l*fVAEn ⴦ގ-݀0⪇q8IL Nbt@O5R>̨;G$fႥ:ujTXR3<"pkB t%/ j+GB.0#G9_zU>+IIneu|DOFُ]Bn } IdLPzӊD 98վݖ=ŅV]McVdۤC*E -9q~R(<|'LlaØȜZĀU9[?m#c͡C361Uas8ǘT(cHt#\?([苇~\3؟J`wˏdE*7)GqFqeyS/6T?(tٙ&d ,Bo#z̗i2[[G*(0RMljGHsamɚV6/ MJ@2+޶$w#DM,@IrK&Yu?E7tbdaP vS{f@P:cT1S9/Qp50m|5W8R熄Cx~5bfz@%CasNnsh 1>,P`@TrtxX;n:bWO_uC0ߠz ''[O8H'Y_ jrix.Gʹm*N_`OޗwWASˆNp?0 ܠygT*"Sq9N6#0̟i삅̰9s'&]2vbZH:>2+=MIi㧷!0zy 1t{ hn38KW_ogRꞴ~p *8.W|^{3ǻТ}yUJF¦H[+Oghq#Y{h8xzAAv߭9yW0F)v3堅8E.7ZEB, Wʵi}.׋VL*&m Zf p*,bn&nP/ 4 Q09r EPIn/`A3+L"⡨O@C%8<]0cp ' YT,RGQ4q#UXF}xG ðg{fB쨏mT~EgLH |эx >4@ Ú'54Wal;H[yh7ߝG%%L;AIc&7Zc v0+h*#n1kxЪG*ַg=5WOꡒLm52+h?mn^zzbjr~4NYZqy)Cь@mT(uk8}ԓk9 hQ&$i(3E;2  zl;\XzT^Rbb"HJnV2Fr{s,2å؜ZY&QΝ/fqo#~KϏOR͕%< h85 |Uh;{EIïl{|Nd}=7ЋtȖfIHKj@-\!z=ȅ(9McqkJR˔[2ۊtI;cwbi 1&VɌ5zjĜx12#:V:3ombHX̩d|9U#a($%BB1PVgp8J>!`&[ۋN"Ëy{D).'zR8G|b/t&x,,c Rn.ӘzS7;i!lU`ĥDH>H/÷/g]]FJ!$~]ob3kwR wӽͱ |r˗5BsP1 |ۓ?t?MV6 >"㉃ (&q)t!KQ_@OګŇKhh"bq>|Ļ~oP+3)2Qi! t{XV 7?[bF5?s2G:$𔔿N %X|\{O̤9څIkW P!/ϼ+{C3;" =[I?@amʁv7.ŰH nޮX=4Rɞ&e|&q-SPS|xOn?|tET@Mv)M[]3ԆaSXҭ]^\G:+?br9XBC%OI 3#l5ߦg#UYt\wATF\$oGGŶľw7A9440lQ I#nL>t ѱ!s/1Cj?  CPE 籬֛D߉m-}5 L~G[H ΜX˵'N}M _k.S.tj8Zfqmt,mXiCmW9=}=!:)А7 [BF"d`n \(-Q( O~i7}Be(t~=2nڮ8SD"ʍX~;;HhšTj^=:uV!)V[ d%gIINNKy uȟOB,:Xwa_Ŕ9{qtN'.d\fĎp#`ă-B<\SGn <!RS nnKzG/lk}E]W_OCu2po(lۏi4"͘uݓ!Sr"Dn 8 LjrC/ r΍!)3\  >ێ?pDBLm  &Ylѧ*t>I`cU(GQb*mbiS::4=ou4u=梹{5>mMd+Dg<[B4!b1zQrs.H{5J= ZieE"y?"9,fۦtIqә8xUsk:* _օ}S;Xxfu`fר 9g;r]O֠ 9? ׯ.P|-@ J`CeT9!9љmS`fn9_3F_{i-u4w/yA 2e`v[$h1тLl01d1ceĂAc뮙ӣLMOx>i3 NZn%x}wFY&ćg$rq쉨!)(sYgAIE${Wel$ZF,W2%__hԨ&>Ʒ+DWq'"Id>Ӽl(rMmg)`'?V0}\*Mh3tP7E61U3C;@_SjO'_qsM+@'l' zqu)d~P>T7*mlYb™{[NDR~), ){乂`Z,p$|hUʔӑ8 j K+H2C&,~xtsTS1@@O */#b|? /*jڊgCWη<), y7S# tK+$Sj[%>pױEyk"3Z]< =*%ޖ q9/m{d_׉:V_مJk:_G-+U P<2x!09ۨ_X"KXIe Mx+T3g$zh+p\He() ղ8>Lyq=xJ7 vI{ sJc6xf4 t=$_6n\ 5P.1݋2)Rt,HEb]4 @l= NX^diUAayS"l5SSDxp 5vaDjR[jhp $];|r|MnJ h𜒇ʫHүsT6BlىC٧jk1nzj;8hB"KqS->ӣp;4CZ;;zl\Il#+Q.ᨹ;m׵ U9U71 /κ 2ORq/4FUtã_jtUï /n0ZN~я;`K'O`TCP]HkvT mǪ؉ }7`#=" ? ;EFPR`YvÀdĖʐڜZi`+wm8sg3Ilk17Hn>2X, <yĜUFN*jIC_ȼ֎(տb NPOJl$)Q FW4D^m:n?kpȲ-9{px=g<-(L#DWǗU<2jWE8cK@bՑ'.\JWS2:R\Kdd< f,e,gnE,ǝE>xDF#%,|" GfXXNyPˆ)T4 ͛[y("ϧ^Mv1ăV6C 8r'?6SgLQi }N166Sn]6|tM < p"e&,IhDXվ2ĴF/%׉=9%z0Ѩ(1 .s/6eUeh,Z㱬W?ōW?`dJaj^y/b2VY ι@*PT}Nѧ$.T96%|+M(.=r2Y+;TΙ^ N ͏vݧ f0K!&8rѻ-'M'R,tj~-c?L"`mo$17#zF|^!Wm 7 ŋJ `i#4bHD&bsc|P:w@x} GgdnYb:W,>Dӭ]Ib4!PE9cUxRĩɚha>)ؓ7Y9]O Z8)4쏱WrHmQPɻ#[%R,^mƈB ^THT7Ar9ѤPA}o&9h)Ŭ)[lc1syRj$ 8~6'fόpd2ds9]%D|&w(ҔW.tVEoᤷzTw=(/m$@k^,OT߀!ՕI+u2FXZbQ@w f7'[//h^*T~G4>@V j.qF3/^H%~c?V$nq<~G#7],6n^ endstream endobj 841 0 obj << /Type /ObjStm /N 100 /First 909 /Length 3451 /Filter /FlateDecode >> stream xڽZYs~#}J5&6͖qcwB E4s_ut;$#8%`~5Q%k՜p/$hM'J '[MDjab9hRH6 =JuD:E cbOsA P2b>IX'`s&Ɓ{0*R3xf{챆̉}:|00|0t@Z< h8swށόX\:@=9H ߼4 yXlxp`{o8LFNS f?B$ȃLD9VH\  ÙWXq3ls\+ {RaC[a-s Eap5R{'d`qO-0L%A_E*S{觐Ja@CHB{DQ@pp.&C o4Qb89 .~e^|-}1]{h _.z3'%Q'o}}0Q}$|:=(xtrhRϮ'a{|ULB^%9-΋y@aP%bl_ɳ'+8; X䲺s<}wh/r=+zUt׋b*ŗhB'rFdUN'@@~7+ *`DvZo4%l=:q~yP8هcE>'cf=zA~#:vq1)>գ@7oo--L_]U'o>9G\1/ZUOE? .'OGKr^r2Zh|F|2Y Y Bo4X,kjE1W `F_d_rU/V,i9] t狂rgl[Y>/T"qd8O8ǖ#!]8¿Xll'`ݎf%jDvPsxZwfS;KԳH]wx~Y]_chݕw=݊mnvݿk}"R.c{}zs{:|6nWq  k*YOq7MzI݇I=40$&{1iҮoFЪ%n#̽qc7i#J6/_&m[aнI8?Tw8' m?g>:hmٰNj~tTۧ}b 'Oz{sBjaUUysϯ?C R(,¯ : *)*tZBT?Zu?NA:4s׿aCy:-VL]2{lҶj{]L(cĄظ0á2kB![ZAq!F ?!c%.IY'MJ#\lSRTYe jVTzN֫ uE!5rXQEHڥ%s]C?l[Z)S\w-j5QŷΦ,bCeeb,u"KdNOdkY>5A77j9Odǵְ: jEu@Fjt~H56d,,ݩn5Zm}lI(}Y ҉]ʼ_3oSj|yhcfhrS5zMǂ^ғqbz$T9ӱ{,4ZIwk9:aAX R YTyYCT "*(O!C>TǂyAGCٱ +O2H3^q%z\Dezpu•q%: ts;\‚ rRlzy*iDw$׍{ x^3NDQ-k]Wli/ IKn\'lב{ ^{7]3wׇ1IZk Nu\; I1~?ڳ5>k\Zl&btk֚^1&vgm6م' H!|k"*Qg{m6]]DLmִh*&;Ov릩xG#3bUofdf/5K8̠ڵ~gL.[s|`mS]ql̨pt?5K 5dm%^>$>x6FC8ql*B~ˡ/W_~+)x.%Q9-W=}x$R&z!^PdB 0Bj6&Y\El50(!RdU^}+^ތUy;Z}߿]̋媜]WSY\/SL endstream endobj 912 0 obj << /Author()/Title()/Subject()/Creator(LaTeX with hyperref package)/Producer(pdfTeX-1.40.16)/Keywords() /CreationDate (D:20171222155105+01'00') /ModDate (D:20171222155105+01'00') /Trapped /False /PTEX.Fullbanner (This is pdfTeX, Version 3.14159265-2.6-1.40.16 (TeX Live 2015/Debian) kpathsea version 6.2.1) >> endobj 874 0 obj << /Type /ObjStm /N 38 /First 335 /Length 1753 /Filter /FlateDecode >> stream xڝXKo6WXP7b>NGI-VZwmd3|q8N1&eFx|:'0o !LH M ń3j"rI)1@3i-I0Q^2%%:{Ŕz^3e Sagi!h80$Lizd=dF ?34#dR8c8x8G17>|XM?vwiدC8Mf6qD_,\b~؀ s˱ۡv;4 E%./asf7ynw|M*OK_y/tmԞF$9qJ==u5exclba~;>bmwޛ0=4.ufv]m/CpCʦƗf{lPF-CO$Hv63j"y4]yim?7(’~ܟ}{jC4EoU 1M6zR:>oN`<[@A{cg1IX1 7w$2c~6i X$F%"1s XcK s(*yI朤 ,$%aXQ\l䡭^`)׉GM<%(3rdEx-21BCJCh:R1t- ulp:/X_꼴c: A4Rq&AyKD)ūN* 2Qtθt]i\էn3œҢ͈]mtt̔*QjhEԀ(56?5M}8m{>#NY-fdhW ]`,]Cn]˼C#P}EWPS璽l^/ 2AneMpo+k T' X.>et2$ Ig,=X'.M|؆]'/&QJ]\$Kp.v4O^DAdȋ.B"DO<Y(aE|\lqӧwa]J|-3%z?O sy(yYwWnS)h*-o$dա1Mxg.N?7*,KF:n%8WU3͝XL. ɻp_2ҫ\2ntwlgz~b.I0zJmAu#x&)Sŧ٦a4-};7s,7W}20  b/ y:6L?AWų[@ # >2m>;cCTk;}x"E_oH?t'@q3c߷cWj QV~iTܿ~:[&Swvan]Ǻ<2 endstream endobj 913 0 obj << /Type /XRef /Index [0 914] /Size 914 /W [1 3 1] /Root 911 0 R /Info 912 0 R /ID [ ] /Length 2163 /Filter /FlateDecode >> stream x[h]YsrIN5i6m&mKN7^*03>@dUX :"0/>,/0  }[߷.8ܧ= *s%gεQ| s(f*emAEۢuhG6)AZ3FЄ6=lmhh)hQ&Rh(;sh]}PF[F[GV(@?m48J9Q-Z2;c` 6 p0 )0֎LyA@[g)ρ)4ypmE0]O)g%Z,Z +*:֌vR?@kAS]t *hM,R#VД5=:$%J(Ekɼ %iJ.Qh6*h(hGN1^We&{RlMF 4e .C3}>?L1c|PF"=ԉ')M Xɪj.i4=( ر 0A C7vl<08c0$Q+_<tq0`aǁ^FoWksĀ3"^FBIDl6W#,H0#D9H#"892")6ΘkXW^d723DK"9>tFH\$91,㊹Ot'\ExKm^5 ȪI]LihJe4oEYe hZ:FeBB̰NkZ@;^~uh:=T; T=nALE0 J+t] =sqe0Wf%p̂+*upwys/?σ%k,Yqஹ{tZx :mlr@ky5C([:N804c|B+^0j5>o߀Y=ݍ$I}bp&KXDTH"GH"C$RHI"%$RHI" $H" SҀeo+̤on%J6A=y %K0/a^¼y ˾}rײўe~QG{qg5~-fQf`/tT¯tTcoIGVHGuVƸj.MGwwbO^}7 9Ҝ)/"(Oڜoؼ4lrnњ󕚷6pEv9?9|;s>r>rr9$| `[U_ᇟۜw endstream endobj startxref 431096 %%EOF menhir-20171222/README.md0000664000175000017500000000121413217215727014776 0ustar fpottierfpottier# Menhir Menhir is an LR(1) parser generator for OCaml. Menhir has a [home page](http://gallium.inria.fr/~fpottier/menhir/). ## Installation OCaml (4.02 or later), ocamlbuild, and GNU make are needed. The latest released version of Menhir can be easily installed via `opam`, OCaml's package manager. Just type `opam install menhir`. For manual installation, see [INSTALLATION.md](INSTALLATION.md). ## Authors * [François Pottier](Francois.Pottier@inria.fr) * [Yann Régis-Gianas](Yann.Regis-Gianas@pps.jussieu.fr) ## Contributors * Frédéric Bour (incremental engine, inspection API, attributes, SDK) * Jacques-Henri Jourdan (Coq back-end)