pax_global_header00006660000000000000000000000064117131014020014500gustar00rootroot0000000000000052 comment=bdf606616a398f74a54d0a4bc2d42947b67aeff4 cppo-0.9.3/000077500000000000000000000000001171310140200124525ustar00rootroot00000000000000cppo-0.9.3/Changes000066400000000000000000000007321171310140200137470ustar00rootroot000000000000002012-02-03: release 0.9.3 [pkg] New way of building the tar.gz archive. 2011-08-12: release 0.9.2 [+ui] Added two predefined macros STRINGIFY and CONCAT for making string literals and for building identifiers respectively. 2011-07-20: release 0.9.1 [+ui] Added support for processing sections of files using external programs (#ext/#endext, -x option) [doc] Moved and extended documentation into the README file. 2009-11-17: initial version 0.9.0 cppo-0.9.3/LICENSE000066400000000000000000000025651171310140200134670ustar00rootroot00000000000000Copyright (c) 2009-2011 Martin Jambon All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cppo-0.9.3/Makefile000066400000000000000000000051061171310140200141140ustar00rootroot00000000000000VERSION = 0.9.3 ifndef OCAMLYACC OCAMLYACC = ocamlyacc #OCAMLYACC = menhir endif export OCAMLYACC ifndef PREFIX PREFIX = /usr/local endif export PREFIX ifndef BINDIR BINDIR = $(PREFIX)/bin endif export BINDIR .PHONY: default all opt install clean test default: opt ML = cppo_version.ml cppo_types.ml \ cppo_parser.mli cppo_parser.ml \ cppo_lexer.ml \ cppo_command.ml \ cppo_eval.ml cppo_main.ml all: $(ML) ocamlc -o cppo -dtypes unix.cma $(ML) opt: $(ML) ocamlopt -o cppo -dtypes unix.cmxa $(ML) install: install -m 0755 cppo $(BINDIR) || \ install -m 0755 cppo.exe $(BINDIR) cppo_version.ml: Makefile echo 'let cppo_version = "$(VERSION)"' > cppo_version.ml cppo_lexer.ml: cppo_lexer.mll cppo_types.ml cppo_parser.ml ocamllex cppo_lexer.mll ifeq ($(DEV),true) cppo_parser.mli cppo_parser.ml: cppo_parser.mly cppo_types.ml menhir -v cppo_parser.mly else cppo_parser.mli cppo_parser.ml: cppo_parser.mly cppo_types.ml $(OCAMLYACC) cppo_parser.mly endif test: cd testdata; $(MAKE) clean: rm -f *.cm[iox] *.o *.annot *.conflicts *.automaton \ cppo \ cppo_parser.mli cppo_parser.ml cppo_lexer.ml cppo_version.ml cd examples; $(MAKE) clean SUBDIRS = testdata examples SVNURL = svn+ssh://mjambon@svn.forge.ocamlcore.org/svnroot/cppo/trunk/cppo archive: @echo "Making archive for version $(VERSION)" @if [ -z "$$WWW" ]; then \ echo '*** Environment variable WWW is undefined ***' >&2; \ exit 1; \ fi @if [ -n "$$(svn status -q)" ]; then \ echo "*** There are uncommitted changes, aborting. ***" >&2; \ exit 1; \ fi $(MAKE) && ./cppo -help > $$WWW/cppo-help.txt rm -rf /tmp/cppo /tmp/cppo-$(VERSION) && \ cd /tmp && \ svn co "$(SVNURL)" && \ for x in "." $(SUBDIRS); do \ rm -rf /tmp/cppo/$$x/.svn; \ done && \ cd /tmp && cp -r cppo cppo-$(VERSION) && \ tar czf cppo.tar.gz cppo && \ tar cjf cppo.tar.bz2 cppo && \ tar czf cppo-$(VERSION).tar.gz cppo-$(VERSION) && \ tar cjf cppo-$(VERSION).tar.bz2 cppo-$(VERSION) mv /tmp/cppo.tar.gz /tmp/cppo.tar.bz2 ../releases mv /tmp/cppo-$(VERSION).tar.gz /tmp/cppo-$(VERSION).tar.bz2 ../releases cp ../releases/cppo.tar.gz $$WWW/ cp ../releases/cppo.tar.bz2 $$WWW/ cp ../releases/cppo-$(VERSION).tar.gz $$WWW/ cp ../releases/cppo-$(VERSION).tar.bz2 $$WWW/ cd ../releases && \ svn add cppo.tar.gz cppo.tar.bz2 \ cppo-$(VERSION).tar.gz cppo-$(VERSION).tar.bz2 && \ svn commit -m "cppo version $(VERSION)" cp README $$WWW/cppo-manual-$(VERSION).txt cp LICENSE $$WWW/cppo-license.txt cp Changes $$WWW/cppo-changes.txt echo 'let cppo_version = "$(VERSION)"' \ > $$WWW/cppo-version.ml cppo-0.9.3/README000066400000000000000000000221721171310140200133360ustar00rootroot00000000000000 +=======================+ | cppo: cpp for OCaml | +=======================+ Introduction ============ Cppo is an equivalent of the C preprocessor for OCaml programs. It allows the definition of simple macros and file inclusion. Cppo is: - OCaml-friendly (unlike cpp) - easy to learn without consulting a manual (unlike m4 or camlp4) - reasonably fast (unlike camlmix) - simple to install and to maintain (unlike camlp4-based tools) User guide ========== Cppo is a preprocessor for programming languages that follow lexical rules compatible with OCaml. Cppo supports a number of directives. A directive is a '#' sign placed at the beginning of a line, possibly preceded by some whitespace, and followed by a valid directive name or by a number: BLANK* "#" BLANK* ("define"|"undef" |"if"|"ifdef"|"ifndef"|"else"|"elif"|"endif" |"include" |"warning"|"error" |"ext"|"endext") ... Directives can be split into multiple lines by placing a backslash \ at the end of the line to be continued. In general, any special character can used as a normal character by preceding it with backslash. 1. File inclusion ----------------- #include "hello.ml" This is how a source file "hello.ml" can be included. Relative paths are searched first in the directory of the current file and then in the search paths added on the command line using -I, if any. 2. Macros --------- This is a simple macro that doesn't take an argument ("object-like macro" in the cpp jargon): #define Ms Mississippi match state with Ms -> true | _ -> false After preprocessing by cppo, the code above becomes: match state with Mississippi -> true | _ -> false If needed, defined macros can be undefined. This is required prior to redefining a macro: #undef X An important distinction with cpp is that only previously-defined macros are accessible. Defining, undefining or redefining a macro has no effect on how previous macros will expand. Macros can take arguments ("function-like macro" in the cpp jargon). Both in the definition (#define) and in macro application the opening parenthesis must stick to the macro's identifier: #define debug(args) if !debugging then Printf.eprintf args else () debug("Testing %i" (1 + 1)) is expanded into: if !debugging then Printf.eprintf "Testing %i" (1 + 1) else () Here is a multiline macro definition. Newlines occurring between tokens must be protected by a backslash: #define repeat_until(action,condition) \ action; \ while not (condition) do \ action \ done All user-definable macros are constant. There are however two predefined variable macros: __FILE__ and __LINE__ which take the value of the position in the source file where the macro is being expanded. #define loc (Printf.sprintf "File %S, line %i" __FILE__ __LINE__) Macros can be defined on the command line as follows: # preprocessing only cppo -D 'VERSION 1.0' example.ml # preprocessing and compiling ocamlopt -c -pp "cppo -D 'VERSION 1.0'" example.ml 3. Conditionals --------------- Here is a quick reference on conditionals available in cppo. If you are not familiar with #ifdef, #ifndef, #if, #else and #elif, please refer to the corresponding section in the cpp manual. #ifndef VERSION #warning "VERSION is undefined" #define VERSION "n/a" #endif #ifndef VERSION #error "VERSION is undefined" #endif #if OCAML_MAJOR >= 3 && OCAML_MINOR >= 10 ... #endif #ifdef X ... #elif defined Y ... #else ... #endif The boolean expressions following #if and #elif may perform arithmetic operations and tests over 64-bit ints. Boolean expressions: defined ... followed by an identifier, returns true if such a macro exists true false ( ... ) ... && ... ... || ... not ... Arithmetic comparisons used in boolean expressions: ... = ... ... < ... ... > ... ... <> ... ... <= ... ... >= ... Arithmetic operators over signed 64-bit ints: ( ... ) ... + ... ... - ... ... * ... ... / ... ... mod ... ... lsl ... ... lsr ... ... asr ... ... land ... ... lor ... ... lxor ... lnot ... Macro identifiers can be used in place of ints as long as they expand to an int literal, e.g.: #define one 1 #if one + one <> 2 #error "Something's wrong." #endif 4. Source file location ----------------------- Location directives are the same as OCaml and are echoed in the output. They consist of a line number optionally followed by a file name: # 123 # 456 "source" 5. Messages ----------- Warnings and error messages can be produced by the preprocessor: #ifndef X #warning "Assuming default value for X" #define X 1 #elif X = 0 #error "X may not be null" #endif 6. Calling an external processor -------------------------------- Cppo provides a mechanism for converting sections of a file using and external program. Such a section must be placed between #ext and #endext directives. $ cat foo ABC #ext lowercase DEF #endext GHI #ext lowercase KLM NOP #endext QRS $ cppo -x lowercase:'tr "[A-Z]" "[a-z]"' foo # 1 "foo" ABC def # 5 "foo" GHI klm nop # 10 "foo" QRS In the example above, "lowercase" is the name given on the command-line to external command 'tr "[A-Z]" "[a-z]"' that reads input from stdin and writes its output to stdout. 7. Escaping ----------- The following characters can be escaped by a backslash when needed: ( ) , # In OCaml # is used for method calls. It is usually not a problem because in order to be interpreted as a preprocessor directive, it must be the first non-blank character of a line and be a known directive. If an object has a define method and you want # to appear first on a line, you would have to use \# instead: obj \#define Line directives in the usual format supported by OCaml are correctly interpreted by cppo. Comments and string literals constitute single tokens even when they span across multiple lines. Therefore newlines within string literals and comments should remain as-is (no preceding backslash) even in a macro body: #define welcome \ "********** *Welcome!* ********** " 8. Concatenation ---------------- CONCAT() is a predefined macro that takes two arguments, removes any whitespace between and around them and fuses them into a single identifier. The result of the concatenation must be a valid identifier of the form [A-Za-z_][A-Za-z0-9_]+ or [A-Za-z], or empty. For example, #define x 123 CONCAT(z, x) expands into: z123 However the following is illegal: #define x 123 CONCAT(x, z) because 123z does not form a valid identifier. CONCAT(a,b) is roughly the equivalent a##b in cpp syntax. 9. Stringification ------------------ STRINGIFY() is a predefined macro that takes one argument, removes any leading and trailing whitespace, reduces each internal whitespace sequence to a single space character and produces a valid OCaml string literal. For example, #define TRACE(f) Printf.printf ">>> %s\n" STRINGIFY(f); f TRACE(print_endline) "Hello" is expanded into: Printf.printf ">>> %s\n" "print_endline"; print_endline "Hello" STRINGIFY(x) is the equivalent of #x in cpp syntax. 10. Detailed command-line usage and options ------------------------------------------ Usage: ./cppo [OPTIONS] [FILE1 [FILE2 ...]] Options: -D DEF Equivalent of interpreting '#define DEF' before processing the input -U IDENT Equivalent of interpreting '#undef IDENT' before processing the input -I DIR Add directory DIR to the search path for included files -o FILE Output file -q Identify and preserve camlp4 quotations -s Output line directives pointing to the exact source location of each token, including those coming from the body of macro definitions. This behavior is off by default. -n Do not output any line directive other than those found in the input (overrides -s). -version Print the version of the program and exit. -x NAME:CMD_TEMPLATE Define a custom preprocessor target section starting with: #ext "NAME" and ending with: #endext NAME must be a lowercase identifier of the form [a-z][A-Za-z0-9_]* CMD_TEMPLATE is a command template supporting the following special sequences: %F file name (unescaped; beware of potential scripting attacks) %B number of the first line %E number of the last line %% a single percent sign Filename, first line number and last line number are also available from the following environment variables: CPPO_FILE, CPPO_FIRST_LINE, CPPO_LAST_LINE. The command produced is expected to read the data lines from stdin and to write its output to stdout. -help Display this list of options --help Display this list of options ------------------------------------------------------------------------------ Martin Jambon cppo-0.9.3/cppo_command.ml000066400000000000000000000030331171310140200154420ustar00rootroot00000000000000(* $Id$ *) open Printf type command_token = [ `Text of string | `Loc_file | `Loc_first_line | `Loc_last_line ] type command_template = command_token list let parse s : command_template = let rec loop acc buf s len i = if i >= len then let s = Buffer.contents buf in if s = "" then acc else `Text s :: acc else if i = len - 1 then ( Buffer.add_char buf s.[i]; `Text (Buffer.contents buf) :: acc ) else let c = s.[i] in if c = '%' then let acc = let s = Buffer.contents buf in Buffer.clear buf; if s = "" then acc else `Text s :: acc in let x = match s.[i+1] with 'F' -> `Loc_file | 'B' -> `Loc_first_line | 'E' -> `Loc_last_line | '%' -> `Text "%" | _ -> failwith ( sprintf "Invalid escape sequence in command template %S. \ Use %%%% for a %% sign." s ) in loop (x :: acc) buf s len (i + 2) else ( Buffer.add_char buf c; loop acc buf s len (i + 1) ) in let len = String.length s in List.rev (loop [] (Buffer.create len) s len 0) let subst (cmd : command_template) file first last = let l = List.map ( function `Text s -> s | `Loc_file -> file | `Loc_first_line -> string_of_int first | `Loc_last_line -> string_of_int last ) cmd in String.concat "" l cppo-0.9.3/cppo_eval.ml000066400000000000000000000400161171310140200147550ustar00rootroot00000000000000(* $Id$ *) open Printf open Cppo_types module S = Set.Make (String) module M = Map.Make (String) let empty_env = M.empty let builtins = [ "__FILE__", (fun env -> `Special); "__LINE__", (fun env -> `Special); "STRINGIFY", (fun env -> `Defun (dummy_loc, "STRINGIFY", ["x"], [`Stringify (`Ident (dummy_loc, "x", None))], env) ); "CONCAT", (fun env -> `Defun (dummy_loc, "CONCAT", ["x";"y"], [`Concat (`Ident (dummy_loc, "x", None), `Ident (dummy_loc, "y", None))], env) ); ] let is_reserved s = List.exists (fun (s', _) -> s = s') builtins let builtin_env = List.fold_left (fun env (s, f) -> M.add s (f env) env) M.empty builtins let line_directive buf prev_file pos = let file = pos.Lexing.pos_fname in let len = Buffer.length buf in if len > 0 && Buffer.nth buf (len - 1) <> '\n' then Buffer.add_char buf '\n'; (match prev_file with Some s when s = file -> bprintf buf "# %i\n" pos.Lexing.pos_lnum | _ -> bprintf buf "# %i %S\n" pos.Lexing.pos_lnum pos.Lexing.pos_fname ); bprintf buf "%s" (String.make (pos.Lexing.pos_cnum - pos.Lexing.pos_bol) ' ') let rec add_sep sep last = function [] -> [ last ] | [x] -> [ x; last ] | x :: l -> x :: sep :: add_sep sep last l let trim s = let len = String.length s in let is_space = function ' ' | '\t' | '\n' | '\r' -> true | _ -> false in let first = let x = ref len in (try for i = 0 to len - 1 do if not (is_space s.[i]) then ( x := i; raise Exit ) done with Exit -> () ); !x in let last = let x = ref (-1) in (try for i = len - 1 downto 0 do if not (is_space s.[i]) then ( x := i; raise Exit ) done with Exit -> () ); !x in if first <= last then String.sub s first (last - first + 1) else "" let int_of_string_with_space s = try Some (Int64.of_string (trim s)) with _ -> None let remove_space l = List.filter (function `Text (_, true, _) -> false | _ -> true) l let trim_and_compact buf s = let started = ref false in let need_space = ref false in for i = 0 to String.length s - 1 do match s.[i] with ' ' | '\t' | '\n' | '\r' -> if !started then need_space := true | c -> if !need_space then Buffer.add_char buf ' '; (match c with '\"' -> Buffer.add_string buf "\\\"" | '\\' -> Buffer.add_string buf "\\\\" | c -> Buffer.add_char buf c); started := true; need_space := false done let stringify buf s = Buffer.add_char buf '\"'; trim_and_compact buf s; Buffer.add_char buf '\"' let trim_and_compact_string s = let buf = Buffer.create (String.length s) in trim_and_compact buf s; Buffer.contents buf let is_ident s = let len = String.length s in len > 0 && (match s.[0] with 'A'..'Z' | 'a'..'z' -> true | '_' when len > 1 -> true | _ -> false) && (try for i = 1 to len - 1 do match s.[i] with 'A'..'Z' | 'a'..'z' | '_' | '0'..'9' -> () | _ -> raise Exit done; true with Exit -> false) let concat loc x y = let s = trim_and_compact_string x ^ trim_and_compact_string y in if not (s = "" || is_ident s) then error loc (sprintf "CONCAT() does not expand into a valid identifier nor \ into whitespace:\n%S" s) else if s = "" then " " else " " ^ s ^ " " let rec eval_int env (x : arith_expr) : int64 = match x with `Int x -> x | `Ident (loc, name) -> let l = try match M.find name env with `Def (_, _, l, _) -> l | `Defun _ -> error loc (sprintf "%S expects arguments" name) | `Special -> assert false with Not_found -> error loc (sprintf "Undefined identifier %S" name) in (try match remove_space l with [ `Ident (loc, name, None) ] -> eval_int env (`Ident (loc, name)) | _ -> let text = List.map ( function `Text (_, is_space, s) -> s | _ -> error loc (sprintf "Identifier %S is not bound to a constant" name) ) l in let s = String.concat "" text in (match int_of_string_with_space s with None -> error loc (sprintf "Identifier %S is not bound to an int literal" name) | Some n -> n ) with Cppo_error s -> error loc (sprintf "Identifier %S does not expand to an int:\n%s" name s) ) | `Neg x -> Int64.neg (eval_int env x) | `Add (a, b) -> Int64.add (eval_int env a) (eval_int env b) | `Sub (a, b) -> Int64.sub (eval_int env a) (eval_int env b) | `Mul (a, b) -> Int64.mul (eval_int env a) (eval_int env b) | `Div (loc, a, b) -> (try Int64.div (eval_int env a) (eval_int env b) with Division_by_zero -> error loc "Division by zero") | `Mod (loc, a, b) -> (try Int64.rem (eval_int env a) (eval_int env b) with Division_by_zero -> error loc "Division by zero") | `Lnot a -> Int64.lognot (eval_int env a) | `Lsl (a, b) -> let n = eval_int env a in let shift = eval_int env b in let shift = if shift >= 64L then 64L else if shift <= -64L then -64L else shift in Int64.shift_left n (Int64.to_int shift) | `Lsr (a, b) -> let n = eval_int env a in let shift = eval_int env b in let shift = if shift >= 64L then 64L else if shift <= -64L then -64L else shift in Int64.shift_right_logical n (Int64.to_int shift) | `Asr (a, b) -> let n = eval_int env a in let shift = eval_int env b in let shift = if shift >= 64L then 64L else if shift <= -64L then -64L else shift in Int64.shift_right n (Int64.to_int shift) | `Land (a, b) -> Int64.logand (eval_int env a) (eval_int env b) | `Lor (a, b) -> Int64.logor (eval_int env a) (eval_int env b) | `Lxor (a, b) -> Int64.logxor (eval_int env a) (eval_int env b) let rec eval_bool env (x : bool_expr) = match x with `True -> true | `False -> false | `Defined s -> M.mem s env | `Not x -> not (eval_bool env x) | `And (a, b) -> eval_bool env a && eval_bool env b | `Or (a, b) -> eval_bool env a || eval_bool env b | `Eq (a, b) -> eval_int env a = eval_int env b | `Lt (a, b) -> eval_int env a < eval_int env b | `Gt (a, b) -> eval_int env a > eval_int env b type globals = { call_loc : Cppo_types.loc; (* location used to set the value of __FILE__ and __LINE__ global variables *) mutable buf : Buffer.t; (* buffer where the output is written *) included : S.t; (* set of already-included files *) require_location : bool ref; (* whether a line directive should be printed before outputting the next token *) last_file_loc : string option ref; (* used to test whether a line directive should include the file name *) show_exact_locations : bool; (* whether line directives should be printed even for expanded macro bodies *) enable_loc : bool ref; (* whether line directives should be printed *) g_preserve_quotations : bool; (* identify and preserve camlp4 quotations *) incdirs : string list; (* directories for finding included files *) current_directory : string; (* directory containing the current file *) extensions : (string, Cppo_command.command_template) Hashtbl.t; (* mapping from extension ID to pipeline command *) } let parse ~preserve_quotations file lexbuf = let lexer_env = Cppo_lexer.init ~preserve_quotations file lexbuf in try Cppo_parser.main (Cppo_lexer.line lexer_env) lexbuf with Parsing.Parse_error -> error (Cppo_lexer.loc lexbuf) "syntax error" | Cppo_types.Cppo_error _ as e -> raise e | e -> error (Cppo_lexer.loc lexbuf) (Printexc.to_string e) let plural n = if abs n <= 1 then "" else "s" let maybe_print_location g pos = if !(g.enable_loc) then let prev_file = !(g.last_file_loc) in let file = pos.Lexing.pos_fname in if !(g.require_location) then ( line_directive g.buf prev_file pos; g.last_file_loc := Some file ) let expand_ext g loc id data = let cmd_tpl = try Hashtbl.find g.extensions id with Not_found -> error loc (sprintf "Undefined extension %s" id) in let p1, p2 = loc in let file = p1.Lexing.pos_fname in let first = p1.Lexing.pos_lnum in let last = p2.Lexing.pos_lnum in let cmd = Cppo_command.subst cmd_tpl file first last in Unix.putenv "CPPO_FILE" file; Unix.putenv "CPPO_FIRST_LINE" (string_of_int first); Unix.putenv "CPPO_LAST_LINE" (string_of_int last); let (ic, oc) as p = Unix.open_process cmd in output_string oc data; close_out oc; (try while true do bprintf g.buf "%s\n" (input_line ic) done with End_of_file -> () ); match Unix.close_process p with Unix.WEXITED 0 -> () | Unix.WEXITED n -> failwith (sprintf "Command %S exited with status %i" cmd n) | _ -> failwith (sprintf "Command %S failed" cmd) let rec include_file g loc rel_file env = let file = if not (Filename.is_relative rel_file) then if Sys.file_exists rel_file then rel_file else error loc (sprintf "Included file %S does not exist" rel_file) else try let dir = List.find ( fun dir -> let file = Filename.concat dir rel_file in Sys.file_exists file ) (g.current_directory :: g.incdirs) in if dir = Filename.current_dir_name then rel_file else Filename.concat dir rel_file with Not_found -> error loc (sprintf "Cannot find included file %S" rel_file) in if S.mem file g.included then failwith (sprintf "Cyclic inclusion of file %S" file) else let ic = open_in file in let lexbuf = Lexing.from_channel ic in let l = parse ~preserve_quotations:g.g_preserve_quotations file lexbuf in close_in ic; expand_list { g with included = S.add file g.included; current_directory = Filename.dirname file } env l and expand_list ?(top = false) g env l = List.fold_left (expand_node ~top g) env l and expand_node ?(top = false) g env0 x = match x with `Ident (loc, name, opt_args) -> let def = try Some (M.find name env0) with Not_found -> None in let g = if top && def <> None then { g with call_loc = loc } else g in let enable_loc0 = !(g.enable_loc) in if def <> None then ( g.require_location := true; if not g.show_exact_locations then ( (* error reports will point more or less to the point where the code is included rather than the source location of the macro definition *) maybe_print_location g (fst loc); g.enable_loc := false ) ); let env = match def, opt_args with None, None -> expand_node g env0 (`Text (loc, false, name)) | None, Some args -> let with_sep = add_sep [`Text (loc, false, ",")] [`Text (loc, false, ")")] args in let l = `Text (loc, false, name ^ "(") :: List.flatten with_sep in expand_list g env0 l | Some (`Defun (_, _, arg_names, _, _)), None -> error loc (sprintf "%S expects %i arguments but is applied to none." name (List.length arg_names)) | Some (`Def _), Some l -> error loc (sprintf "%S expects no arguments" name) | Some (`Def (_, _, l, env)), None -> ignore (expand_list g env l); env0 | Some (`Defun (_, _, arg_names, l, env)), Some args -> let argc = List.length arg_names in let n = List.length args in let args = (* it's ok to pass an empty arg if one arg is expected *) if n = 0 && argc = 1 then [[]] else args in if argc <> n then error loc (sprintf "%S expects %i argument%s but is applied to \ %i argument%s." name argc (plural argc) n (plural n)) else let app_env = List.fold_left2 ( fun env name l -> M.add name (`Def (loc, name, l, env0)) env ) env arg_names args in ignore (expand_list g app_env l); env0 | Some `Special, _ -> assert false in if def = None then g.require_location := false else g.require_location := true; (* restore initial setting *) g.enable_loc := enable_loc0; env | `Def (loc, name, body)-> g.require_location := true; if M.mem name env0 then error loc (sprintf "%S is already defined" name) else M.add name (`Def (loc, name, body, env0)) env0 | `Defun (loc, name, arg_names, body) -> g.require_location := true; if M.mem name env0 then error loc (sprintf "%S is already defined" name) else M.add name (`Defun (loc, name, arg_names, body, env0)) env0 | `Undef (loc, name) -> g.require_location := true; if is_reserved name then error loc (sprintf "%S is a built-in variable that cannot be undefined" name) else M.remove name env0 | `Include (loc, file) -> g.require_location := true; let env = include_file g loc file env0 in g.require_location := true; env | `Ext (loc, id, data) -> g.require_location := true; expand_ext g loc id data; g.require_location := true; g.last_file_loc := None; env0 | `Cond (loc, test, if_true, if_false) -> let l = if eval_bool env0 test then if_true else if_false in g.require_location := true; let env = expand_list g env0 l in g.require_location := true; env | `Error (loc, msg) -> error loc msg | `Warning (loc, msg) -> warning loc msg; env0 | `Text (loc, is_space, s) -> if not is_space then ( maybe_print_location g (fst loc); g.require_location := false ); Buffer.add_string g.buf s; env0 | `Seq l -> expand_list g env0 l | `Stringify x -> let enable_loc0 = !(g.enable_loc) in g.enable_loc := false; let buf0 = g.buf in let local_buf = Buffer.create 100 in g.buf <- local_buf; ignore (expand_node g env0 x); stringify buf0 (Buffer.contents local_buf); g.buf <- buf0; g.enable_loc := enable_loc0; env0 | `Concat (x, y) -> let enable_loc0 = !(g.enable_loc) in g.enable_loc := false; let buf0 = g.buf in let local_buf = Buffer.create 100 in g.buf <- local_buf; ignore (expand_node g env0 x); let xs = Buffer.contents local_buf in Buffer.clear local_buf; ignore (expand_node g env0 y); let ys = Buffer.contents local_buf in let s = concat g.call_loc xs ys in Buffer.add_string buf0 s; g.buf <- buf0; g.enable_loc := enable_loc0; env0 | `Line (loc, opt_file, n) -> (* printing a line directive is not strictly needed *) (match opt_file with None -> maybe_print_location g (fst loc); bprintf g.buf "\n# %i\n" n | Some file -> bprintf g.buf "\n# %i %S\n" n file ); (* printing the location next time is needed because it just changed *) g.require_location := true; env0 | `Current_line loc -> maybe_print_location g (fst loc); g.require_location := true; let pos, _ = g.call_loc in bprintf g.buf " %i " pos.Lexing.pos_lnum; env0 | `Current_file loc -> maybe_print_location g (fst loc); g.require_location := true; let pos, _ = g.call_loc in bprintf g.buf " %S " pos.Lexing.pos_fname; env0 let include_inputs ~extensions ~preserve_quotations ~incdirs ~show_exact_locations ~show_no_locations buf env l = let enable_loc = not show_no_locations in List.fold_left ( fun env (dir, file, open_, close) -> let l = parse ~preserve_quotations file (open_ ()) in close (); let g = { call_loc = dummy_loc; buf = buf; included = S.empty; require_location = ref true; last_file_loc = ref None; show_exact_locations = show_exact_locations; enable_loc = ref enable_loc; g_preserve_quotations = preserve_quotations; incdirs = incdirs; current_directory = dir; extensions = extensions; } in expand_list ~top:true { g with included = S.add file g.included } env l ) env l cppo-0.9.3/cppo_lexer.mll000066400000000000000000000354271171310140200153330ustar00rootroot00000000000000(* $Id$ *) { open Printf open Lexing open Cppo_types open Cppo_parser let pos1 lexbuf = lexbuf.lex_start_p let pos2 lexbuf = lexbuf.lex_curr_p let loc lexbuf = (pos1 lexbuf, pos2 lexbuf) let lexer_error lexbuf descr = error (loc lexbuf) descr let new_file lb name = lb.lex_curr_p <- { lb.lex_curr_p with pos_fname = name } let lex_new_lines lb = let n = ref 0 in let s = lb.lex_buffer in for i = lb.lex_start_pos to lb.lex_curr_pos do if s.[i] = '\n' then incr n done; let p = lb.lex_curr_p in lb.lex_curr_p <- { p with pos_lnum = p.pos_lnum + !n; pos_bol = p.pos_cnum } let count_new_lines lb n = let p = lb.lex_curr_p in lb.lex_curr_p <- { p with pos_lnum = p.pos_lnum + n; pos_bol = p.pos_cnum } (* must start a new line *) let update_pos lb p added_chars added_breaks = let cnum = p.pos_cnum + added_chars in lb.lex_curr_p <- { pos_fname = p.pos_fname; pos_lnum = p.pos_lnum + added_breaks; pos_bol = cnum; pos_cnum = cnum } let set_lnum lb opt_file lnum = let p = lb.lex_curr_p in let cnum = p.pos_cnum in let fname = match opt_file with None -> p.pos_fname | Some file -> file in lb.lex_curr_p <- { pos_fname = fname; pos_bol = cnum; pos_cnum = cnum; pos_lnum = lnum } let shift lb n = let p = lb.lex_curr_p in lb.lex_curr_p <- { p with pos_cnum = p.pos_cnum + n } let read_hexdigit c = match c with '0'..'9' -> Char.code c - 48 | 'A'..'F' -> Char.code c - 55 | 'a'..'z' -> Char.code c - 87 | _ -> invalid_arg "read_hexdigit" let read_hex2 c1 c2 = Char.chr (read_hexdigit c1 * 16 + read_hexdigit c2) type env = { preserve_quotations : bool; mutable lexer : [ `Ocaml | `Test ]; mutable line_start : bool; mutable in_directive : bool; (* true while processing a directive, until the final newline *) buf : Buffer.t; mutable token_start : Lexing.position; lexbuf : Lexing.lexbuf; } let new_line env = env.line_start <- true; count_new_lines env.lexbuf 1 let clear env = Buffer.clear env.buf let add env s = env.line_start <- false; Buffer.add_string env.buf s let add_char env c = env.line_start <- false; Buffer.add_char env.buf c let get env = Buffer.contents env.buf let long_loc e = (e.token_start, pos2 e.lexbuf) } (* standard character classes used for macro identifiers *) let upper = ['A'-'Z'] let lower = ['a'-'z'] let digit = ['0'-'9'] let identchar = upper | lower | digit | [ '_' '\'' ] (* iso-8859-1 upper and lower characters used for ocaml identifiers *) let oc_upper = ['A'-'Z' '\192'-'\214' '\216'-'\222'] let oc_lower = ['a'-'z' '\223'-'\246' '\248'-'\255'] let oc_identchar = oc_upper | oc_lower | digit | ['_' '\''] (* Identifiers: ident is used for macro names and is a subset of oc_ident *) let ident = (lower | '_' identchar | upper) identchar* let oc_ident = (oc_lower | '_' oc_identchar | oc_upper) oc_identchar* let hex = ['0'-'9' 'a'-'f' 'A'-'F'] let oct = ['0'-'7'] let bin = ['0'-'1'] let operator_char = [ '!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~'] let infix_symbol = ['=' '<' '>' '@' '^' '|' '&' '+' '-' '*' '/' '$' '%'] operator_char* let prefix_symbol = ['!' '?' '~'] operator_char* let blank = [ ' ' '\t' ] let space = [ ' ' '\t' '\r' '\n' ] let line = ( [^'\n'] | '\\' ('\r'? '\n') )* ('\n' | eof) let dblank0 = (blank | '\\' '\r'? '\n')* let dblank1 = blank (blank | '\\' '\r'? '\n')* rule token e = parse "" { (* We use two different lexers for boolean expressions in #if directives and for regular OCaml tokens. *) match e.lexer with `Ocaml -> ocaml_token e lexbuf | `Test -> test_token e lexbuf } and line e = parse blank* "#" as s { match e.lexer with `Test -> lexer_error lexbuf "Syntax error in boolean expression" | `Ocaml -> if e.line_start then ( e.in_directive <- true; clear e; add e s; e.token_start <- pos1 lexbuf; e.line_start <- false; directive e lexbuf ) else ( e.line_start <- false; clear e; TEXT (loc lexbuf, false, s) ) } | "" { clear e; token e lexbuf } and directive e = parse blank* "define" dblank1 (ident as id) "(" { DEFUN (long_loc e, id) } | blank* "define" dblank1 (ident as id) { assert e.in_directive; DEF (long_loc e, id) } | blank* "undef" dblank1 (ident as id) { blank_until_eol e lexbuf; UNDEF (long_loc e, id) } | blank* "if" dblank1 { e.lexer <- `Test; IF (long_loc e) } | blank* "elif" dblank1 { e.lexer <- `Test; ELIF (long_loc e) } | blank* "ifdef" dblank1 (ident as id) { blank_until_eol e lexbuf; IFDEF (long_loc e, `Defined id) } | blank* "ifndef" dblank1 (ident as id) { blank_until_eol e lexbuf; IFDEF (long_loc e, `Not (`Defined id)) } | blank* "ext" dblank1 (ident as id) { blank_until_eol e lexbuf; clear e; let s = read_ext e lexbuf in EXT (long_loc e, id, s) } | blank* "define" dblank1 oc_ident | blank* "undef" dblank1 oc_ident | blank* "ifdef" dblank1 oc_ident | blank* "ifndef" dblank1 oc_ident | blank* "ext" dblank1 oc_ident { error (loc lexbuf) "Identifiers containing non-ASCII characters \ may not be used as macro identifiers" } | blank* "else" { blank_until_eol e lexbuf; ELSE (long_loc e) } | blank* "endif" { blank_until_eol e lexbuf; ENDIF (long_loc e) } | blank* "include" dblank0 '"' { clear e; eval_string e lexbuf; blank_until_eol e lexbuf; INCLUDE (long_loc e, get e) } | blank* "error" dblank0 '"' { clear e; eval_string e lexbuf; blank_until_eol e lexbuf; ERROR (long_loc e, get e) } | blank* "warning" dblank0 '"' { clear e; eval_string e lexbuf; blank_until_eol e lexbuf; WARNING (long_loc e, get e) } | blank* (['0'-'9']+ as lnum) dblank0 '\r'? '\n' { e.in_directive <- false; new_line e; let here = long_loc e in let fname = None in let lnum = int_of_string lnum in (* Apply line directive regardless of possible #if condition. *) set_lnum lexbuf fname lnum; LINE (here, None, lnum) } | blank* (['0'-'9']+ as lnum) dblank0 '"' { clear e; eval_string e lexbuf; blank_until_eol e lexbuf; let here = long_loc e in let fname = Some (get e) in let lnum = int_of_string lnum in (* Apply line directive regardless of possible #if condition. *) set_lnum lexbuf fname lnum; LINE (here, fname, lnum) } | blank* { e.in_directive <- false; add e (lexeme lexbuf); TEXT (long_loc e, true, get e) } | blank* ['a'-'z']+ { e.in_directive <- false; add e (lexeme lexbuf); TEXT (long_loc e, false, get e) } and blank_until_eol e = parse blank* eof | blank* '\r'? '\n' { new_line e; e.in_directive <- false } | "" { lexer_error lexbuf "syntax error in directive" } and read_ext e = parse blank* "#" blank* "endext" blank* ('\r'? '\n' | eof) { let s = get e in clear e; new_line e; e.in_directive <- false; s } | (blank* as a) "\\" ("#" blank* "endext" blank* '\r'? '\n' as b) { add e a; add e b; new_line e; read_ext e lexbuf } | [^'\n']* '\n' as x { add e x; new_line e; read_ext e lexbuf } | eof { lexer_error lexbuf "End of file within #ext ... #endext" } and ocaml_token e = parse "__LINE__" { e.line_start <- false; CURRENT_LINE (loc lexbuf) } | "__FILE__" { e.line_start <- false; CURRENT_FILE (loc lexbuf) } | ident as s { e.line_start <- false; IDENT (loc lexbuf, s) } | oc_ident as s { e.line_start <- false; TEXT (loc lexbuf, false, s) } | ident as s "(" { e.line_start <- false; FUNIDENT (loc lexbuf, s) } | "'\n'" | "'\r\n'" { new_line e; TEXT (loc lexbuf, false, lexeme lexbuf) } | ")" { e.line_start <- false; CL_PAREN (loc lexbuf) } | "," { e.line_start <- false; COMMA (loc lexbuf) } | "\\)" { e.line_start <- false; TEXT (loc lexbuf, false, " )") } | "\\," { e.line_start <- false; TEXT (loc lexbuf, false, " ,") } | "\\(" { e.line_start <- false; TEXT (loc lexbuf, false, " (") } | "\\#" { e.line_start <- false; TEXT (loc lexbuf, false, " #") } | '`' | "!=" | "#" | "&" | "&&" | "(" | "*" | "+" | "-" | "-." | "->" | "." | ".. :" | "::" | ":=" | ":>" | ";" | ";;" | "<" | "<-" | "=" | ">" | ">]" | ">}" | "?" | "??" | "[" | "[<" | "[>" | "[|" | "]" | "_" | "`" | "{" | "{<" | "|" | "|]" | "}" | "~" | ">>" | prefix_symbol | infix_symbol | "'" ([^ '\'' '\\'] | '\\' (_ | digit digit digit | 'x' hex hex)) "'" { e.line_start <- false; TEXT (loc lexbuf, false, lexeme lexbuf) } | blank+ { TEXT (loc lexbuf, true, lexeme lexbuf) } | '\\' ('\r'? '\n' as nl) { new_line e; if e.in_directive then TEXT (loc lexbuf, true, nl) else TEXT (loc lexbuf, false, lexeme lexbuf) } | '\r'? '\n' { new_line e; if e.in_directive then ( e.in_directive <- false; ENDEF (loc lexbuf) ) else TEXT (loc lexbuf, true, lexeme lexbuf) } | "(*" { clear e; add e "(*"; e.token_start <- pos1 lexbuf; comment e 1 lexbuf } | '"' { clear e; add e "\""; e.token_start <- pos1 lexbuf; string e lexbuf; e.line_start <- false; TEXT (long_loc e, false, get e) } | "<:" | "<<" { if e.preserve_quotations then ( clear e; add e (lexeme lexbuf); e.token_start <- pos1 lexbuf; quotation e lexbuf; e.line_start <- false; TEXT (long_loc e, false, get e) ) else ( e.line_start <- false; TEXT (loc lexbuf, false, lexeme lexbuf) ) } | '-'? ( digit (digit | '_')* | ("0x"| "0X") hex (hex | '_')* | ("0o"| "0O") oct (oct | '_')* | ("0b"| "0B") bin (bin | '_')* ) | '-'? digit (digit | '_')* ('.' (digit | '_')* )? (['e' 'E'] ['+' '-']? digit (digit | '_')* )? { e.line_start <- false; TEXT (loc lexbuf, false, lexeme lexbuf) } | blank+ { TEXT (loc lexbuf, true, lexeme lexbuf) } | _ { e.line_start <- false; TEXT (loc lexbuf, false, lexeme lexbuf) } | eof { EOF } and comment e depth = parse "(*" { add e "(*"; comment e (depth + 1) lexbuf } | "*)" { let depth = depth - 1 in add e "*)"; if depth > 0 then comment e depth lexbuf else ( e.line_start <- false; TEXT (long_loc e, false, get e) ) } | '"' { add_char e '"'; string e lexbuf; comment e depth lexbuf } | "'\n'" | "'\r\n'" { new_line e; add e (lexeme lexbuf); comment e depth lexbuf } | "'" ([^ '\'' '\\'] | '\\' (_ | digit digit digit | 'x' hex hex)) "'" { add e (lexeme lexbuf); comment e depth lexbuf } | '\r'? '\n' { new_line e; add e (lexeme lexbuf); comment e depth lexbuf } | [^'(' '*' '"' '\r' '\n']+ { (* tolerates unmatched single quotes in comments, unlike the standard ocaml lexer *) add e (lexeme lexbuf); comment e depth lexbuf } | _ { add e (lexeme lexbuf); comment e depth lexbuf } | eof { lexer_error lexbuf "Unterminated comment reaching the end of file" } and string e = parse '"' { add_char e '"' } | "\\\\" | '\\' '"' { add e (lexeme lexbuf); string e lexbuf } | '\\' '\r'? '\n' { add e (lexeme lexbuf); new_line e; string e lexbuf } | '\r'? '\n' { if e.in_directive then lexer_error lexbuf "Unterminated string literal" else ( add e (lexeme lexbuf); new_line e; string e lexbuf ) } | _ as c { add_char e c; string e lexbuf } | eof { } and eval_string e = parse '"' { } | '\\' (['\'' '\"' '\\'] as c) { add_char e c; eval_string e lexbuf } | '\\' '\r'? '\n' { assert e.in_directive; eval_string e lexbuf } | '\r'? '\n' { assert e.in_directive; lexer_error lexbuf "Unterminated string literal" } | '\\' (digit digit digit as s) { add_char e (Char.chr (int_of_string s)); eval_string e lexbuf } | '\\' 'x' (hex as c1) (hex as c2) { add_char e (read_hex2 c1 c2); eval_string e lexbuf } | '\\' 'b' { add_char e '\b'; eval_string e lexbuf } | '\\' 'n' { add_char e '\n'; eval_string e lexbuf } | '\\' 'r' { add_char e '\r'; eval_string e lexbuf } | '\\' 't' { add_char e '\t'; eval_string e lexbuf } | [^ '\"' '\\']+ { add e (lexeme lexbuf); eval_string e lexbuf } | eof { lexer_error lexbuf "Unterminated string literal" } and quotation e = parse ">>" { add e ">>" } | "\\>>" { add e "\\>>"; quotation e lexbuf } | '\\' '\r'? '\n' { if e.in_directive then ( new_line e; quotation e lexbuf ) else ( add e (lexeme lexbuf); new_line e; quotation e lexbuf ) } | '\r'? '\n' { if e.in_directive then lexer_error lexbuf "Unterminated quotation" else ( add e (lexeme lexbuf); new_line e; quotation e lexbuf ) } | [^'>' '\\' '\r' '\n']+ { add e (lexeme lexbuf); quotation e lexbuf } | eof { lexer_error lexbuf "Unterminated quotation" } and test_token e = parse "true" { TRUE } | "false" { FALSE } | "defined" { DEFINED } | "(" { OP_PAREN } | ")" { CL_PAREN (loc lexbuf) } | "&&" { AND } | "||" { OR } | "not" { NOT } | "=" { EQ } | "<" { LT } | ">" { GT } | "<>" { NE } | "<=" { LE } | ">=" { GE } | '-'? ( digit (digit | '_')* | ("0x"| "0X") hex (hex | '_')* | ("0o"| "0O") oct (oct | '_')* | ("0b"| "0B") bin (bin | '_')* ) { let s = Lexing.lexeme lexbuf in try INT (Int64.of_string s) with _ -> error (loc lexbuf) (sprintf "Integer constant %s is out the valid range for int64" s) } | "+" { PLUS } | "-" { MINUS } | "*" { STAR } | "/" { SLASH (loc lexbuf) } | "mod" { MOD (loc lexbuf) } | "lsl" { LSL } | "lsr" { LSR } | "asr" { ASR } | "land" { LAND } | "lor" { LOR } | "lxor" { LXOR } | "lnot" { LNOT } | ident { IDENT (loc lexbuf, lexeme lexbuf) } | blank+ { test_token e lexbuf } | '\\' '\r'? '\n' { new_line e; test_token e lexbuf } | '\r'? '\n' | eof { assert e.in_directive; e.in_directive <- false; new_line e; e.lexer <- `Ocaml; ENDTEST (loc lexbuf) } | _ { error (loc lexbuf) (sprintf "Invalid token %s" (Lexing.lexeme lexbuf)) } { let init ~preserve_quotations file lexbuf = new_file lexbuf file; { preserve_quotations = preserve_quotations; lexer = `Ocaml; line_start = true; in_directive = false; buf = Buffer.create 200; token_start = Lexing.dummy_pos; lexbuf = lexbuf; } } cppo-0.9.3/cppo_main.ml000066400000000000000000000103661171310140200147570ustar00rootroot00000000000000(* $Id$ *) open Printf let add_extension tbl s = let i = try String.index s ':' with Not_found -> failwith "Invalid -x argument" in let id = String.sub s 0 i in let raw_tpl = String.sub s (i+1) (String.length s - i - 1) in let cmd_tpl = Cppo_command.parse raw_tpl in if Hashtbl.mem tbl id then failwith ("Multiple definitions for extension " ^ id) else Hashtbl.add tbl id cmd_tpl let () = let extensions = Hashtbl.create 10 in let files = ref [] in let header = ref [] in let incdirs = ref [] in let out_file = ref None in let preserve_quotations = ref false in let show_exact_locations = ref false in let show_no_locations = ref false in let options = [ "-D", Arg.String (fun s -> header := ("#define " ^ s ^ "\n") :: !header), "DEF Equivalent of interpreting '#define DEF' before processing the input"; "-U", Arg.String (fun s -> header := ("#undef " ^ s ^ "\n") :: !header), "IDENT Equivalent of interpreting '#undef IDENT' before processing the input"; "-I", Arg.String (fun s -> incdirs := s :: !incdirs), "DIR Add directory DIR to the search path for included files"; "-o", Arg.String (fun s -> out_file := Some s), "FILE Output file"; "-q", Arg.Set preserve_quotations, " Identify and preserve camlp4 quotations"; "-s", Arg.Set show_exact_locations, " Output line directives pointing to the exact source location of each token, including those coming from the body of macro definitions. This behavior is off by default."; "-n", Arg.Set show_no_locations, " Do not output any line directive other than those found in the input (overrides -s)."; "-version", Arg.Unit (fun () -> print_endline Cppo_version.cppo_version; exit 0), " Print the version of the program and exit."; "-x", Arg.String (fun s -> add_extension extensions s), "NAME:CMD_TEMPLATE Define a custom preprocessor target section starting with: #ext \"NAME\" and ending with: #endext NAME must be a lowercase identifier of the form [a-z][A-Za-z0-9_]* CMD_TEMPLATE is a command template supporting the following special sequences: %F file name (unescaped; beware of potential scripting attacks) %B number of the first line %E number of the last line %% a single percent sign Filename, first line number and last line number are also available from the following environment variables: CPPO_FILE, CPPO_FIRST_LINE, CPPO_LAST_LINE. The command produced is expected to read the data lines from stdin and to write its output to stdout." ] in let msg = sprintf "\ Usage: %s [OPTIONS] [FILE1 [FILE2 ...]] Options:" Sys.argv.(0) in let add_file s = files := s :: !files in Arg.parse options add_file msg; let inputs = let preliminaries = match List.rev !header with [] -> [] | l -> let s = String.concat "" l in [ Sys.getcwd (), "", (fun () -> Lexing.from_string s), (fun () -> ()) ] in let main = match List.rev !files with [] -> [ Sys.getcwd (), "", (fun () -> Lexing.from_channel stdin), (fun () -> ()) ] | l -> List.map ( fun file -> let ic = lazy (open_in file) in Filename.dirname file, file, (fun () -> Lexing.from_channel (Lazy.force ic)), (fun () -> close_in (Lazy.force ic)) ) l in preliminaries @ main in let env = Cppo_eval.builtin_env in let buf = Buffer.create 10_000 in let _env = try Cppo_eval.include_inputs ~extensions ~preserve_quotations: !preserve_quotations ~incdirs: (List.rev !incdirs) ~show_exact_locations: !show_exact_locations ~show_no_locations: !show_no_locations buf env inputs with Cppo_types.Cppo_error msg | Failure msg -> eprintf "Error: %s\n%!" msg; exit 1 in match !out_file with None -> print_string (Buffer.contents buf); flush stdout | Some file -> let oc = open_out file in output_string oc (Buffer.contents buf); close_out oc cppo-0.9.3/cppo_parser.mly000066400000000000000000000152321171310140200155150ustar00rootroot00000000000000/* $Id$ */ %{ open Printf open Cppo_types let print = print_string let rhs_loc n1 n2 = (Parsing.rhs_start_pos n1, Parsing.rhs_end_pos n2) %} /* Directives */ %token < Cppo_types.loc * string > DEF DEFUN UNDEF INCLUDE WARNING ERROR %token < Cppo_types.loc * string option * int > LINE %token < Cppo_types.loc * Cppo_types.bool_expr > IFDEF %token < Cppo_types.loc * string * string > EXT %token < Cppo_types.loc > ENDEF IF ELIF ELSE ENDIF ENDTEST /* Boolean expressions in #if/#elif directives */ %token OP_PAREN TRUE FALSE DEFINED NOT AND OR EQ LT GT NE LE GE PLUS MINUS STAR LNOT LSL LSR ASR LAND LOR LXOR %token < Cppo_types.loc > SLASH MOD %token < int64 > INT /* Regular program and shared terminals */ %token < Cppo_types.loc > CL_PAREN COMMA CURRENT_LINE CURRENT_FILE %token < Cppo_types.loc * string > IDENT FUNIDENT %token < Cppo_types.loc * bool * string > TEXT /* bool means "is space" */ %token EOF /* Priorities for boolean expressions */ %left OR %left AND /* Priorities for arithmetics */ %left PLUS MINUS %left STAR SLASH %left MOD LSL LSR ASR LAND LOR LXOR %nonassoc NOT %nonassoc LNOT %nonassoc UMINUS %start main %type < Cppo_types.node list > main %% main: full_node main { $1 :: $2 } | EOF { [] } ; full_node: CL_PAREN { `Text ($1, false, ")") } | COMMA { `Text ($1, false, ",") } | node { $1 } ; node_list0: node node_list0 { $1 :: $2 } | { [] } ; full_node_list0: full_node full_node_list0 { $1 :: $2 } | { [] } ; /* TODO: make lone COMMAs valid only in "main" rule */ /* TODO: same for parentheses */ node: TEXT { `Text $1 } | IDENT { let loc, name = $1 in `Ident (loc, name, None) } | FUNIDENT args1 CL_PAREN { (* macro application that receives at least one argument, possibly empty. We cannot distinguish syntactically between zero argument and one empty argument. *) let (pos1, _), name = $1 in let _, pos2 = $3 in `Ident ((pos1, pos2), name, Some $2) } | FUNIDENT error { error (fst $1) "Invalid macro application" } | CURRENT_LINE { `Current_line $1 } | CURRENT_FILE { `Current_file $1 } | DEF full_node_list0 ENDEF { let (pos1, _), name = $1 in (* Additional spacing is needed for cases like '+foo+' expanding into '++' instead of '+ +'. *) let safe_space = `Text ($3, true, " ") in let body = $2 @ [safe_space] in let _, pos2 = $3 in `Def ((pos1, pos2), name, body) } | DEFUN def_args1 CL_PAREN full_node_list0 ENDEF { let (pos1, _), name = $1 in let args = $2 in (* Additional spacing is needed for cases like 'foo()bar' where 'foo()' expands into 'abc', giving 'abcbar' instead of 'abc bar'; Also needed for '+foo()+' expanding into '++' instead of '+ +'. *) let safe_space = `Text ($5, true, " ") in let body = $4 @ [safe_space] in let _, pos2 = $5 in `Defun ((pos1, pos2), name, args, body) } | DEFUN CL_PAREN { error (fst (fst $1), snd $2) "At least one argument is required" } | UNDEF { `Undef $1 } | WARNING { `Warning $1 } | ERROR { `Error $1 } | INCLUDE { `Include $1 } | EXT { `Ext $1 } | IF test full_node_list0 elif_list ENDIF { let pos1, _ = $1 in let _, pos2 = $5 in let loc = (pos1, pos2) in let test = $2 in let if_true = $3 in let if_false = List.fold_right ( fun (loc, test, if_true) if_false -> [`Cond (loc, test, if_true, if_false) ] ) $4 [] in `Cond (loc, test, if_true, if_false) } | IF test full_node_list0 elif_list error { (* BUG? ocamlyacc fails to reduce that rule but not menhir *) error $1 "missing #endif" } | IFDEF full_node_list0 elif_list ENDIF { let (pos1, _), test = $1 in let _, pos2 = $4 in let loc = (pos1, pos2) in let if_true = $2 in let if_false = List.fold_right ( fun (loc, test, if_true) if_false -> [`Cond (loc, test, if_true, if_false) ] ) $3 [] in `Cond (loc, test, if_true, if_false) } | IFDEF full_node_list0 elif_list error { error (fst $1) "missing #endif" } | IF test full_node_list0 ELSE full_node_list0 ENDIF { `Cond ((fst $1, snd $6), $2, $3, $5) } | IF test full_node_list0 ELSE full_node_list0 error { error $1 "missing #endif" } | IFDEF full_node_list0 ELSE full_node_list0 ENDIF { `Cond ((fst (fst $1), snd $5), (snd $1), $2, $4) } | IFDEF full_node_list0 ELSE full_node_list0 error { error (fst $1) "missing #endif" } | LINE { `Line $1 } ; elif_list: ELIF test full_node_list0 elif_list { let pos1, _ = $1 in let pos2 = Parsing.rhs_end_pos 4 in ((pos1, pos2), $2, $3) :: $4 } | { [] } ; args1: node_list0 COMMA args1 { $1 :: $3 } | node_list0 { [ $1 ] } ; def_args1: IDENT COMMA def_args1 { (snd $1) :: $3 } | IDENT { [ snd $1 ] } ; test: bexpr ENDTEST { $1 } ; /* Boolean expressions after #if or #elif */ bexpr: | TRUE { `True } | FALSE { `False } | DEFINED IDENT { `Defined (snd $2) } | OP_PAREN bexpr CL_PAREN { $2 } | NOT bexpr { `Not $2 } | bexpr AND bexpr { `And ($1, $3) } | bexpr OR bexpr { `Or ($1, $3) } | aexpr EQ aexpr { `Eq ($1, $3) } | aexpr LT aexpr { `Lt ($1, $3) } | aexpr GT aexpr { `Gt ($1, $3) } | aexpr NE aexpr { `Not (`Eq ($1, $3)) } | aexpr LE aexpr { `Not (`Gt ($1, $3)) } | aexpr GE aexpr { `Not (`Lt ($1, $3)) } ; /* Arithmetic expressions within boolean expressions */ aexpr: | INT { `Int $1 } | IDENT { `Ident $1 } | OP_PAREN aexpr CL_PAREN { $2 } | aexpr PLUS aexpr { `Add ($1, $3) } | aexpr MINUS aexpr { `Sub ($1, $3) } | aexpr STAR aexpr { `Mul ($1, $3) } | aexpr SLASH aexpr { `Div ($2, $1, $3) } | aexpr MOD aexpr { `Mod ($2, $1, $3) } | aexpr LSL aexpr { `Lsl ($1, $3) } | aexpr LSR aexpr { `Lsr ($1, $3) } | aexpr ASR aexpr { `Lsr ($1, $3) } | aexpr LAND aexpr { `Land ($1, $3) } | aexpr LOR aexpr { `Lor ($1, $3) } | aexpr LXOR aexpr { `Lxor ($1, $3) } | LNOT aexpr { `Lnot $2 } | MINUS aexpr %prec UMINUS { `Neg $2 } ; cppo-0.9.3/cppo_types.ml000066400000000000000000000055111171310140200151730ustar00rootroot00000000000000(* $Id$ *) open Printf open Lexing module String_set = Set.Make (String) module String_map = Map.Make (String) type loc = position * position type bool_expr = [ `True | `False | `Defined of string | `Not of bool_expr (* not *) | `And of (bool_expr * bool_expr) (* && *) | `Or of (bool_expr * bool_expr) (* || *) | `Eq of (arith_expr * arith_expr) (* = *) | `Lt of (arith_expr * arith_expr) (* < *) | `Gt of (arith_expr * arith_expr) (* > *) (* syntax for additional operators: <>, <=, >= *) ] and arith_expr = (* signed int64 *) [ `Int of int64 | `Ident of (loc * string) (* must be bound to a valid int literal. Expansion of macro functions is not supported. *) | `Neg of arith_expr (* - *) | `Add of (arith_expr * arith_expr) (* + *) | `Sub of (arith_expr * arith_expr) (* - *) | `Mul of (arith_expr * arith_expr) (* * *) | `Div of (loc * arith_expr * arith_expr) (* / *) | `Mod of (loc * arith_expr * arith_expr) (* mod *) (* Bitwise operations on 64 bits *) | `Lnot of arith_expr (* lnot *) | `Lsl of (arith_expr * arith_expr) (* lsl *) | `Lsr of (arith_expr * arith_expr) (* lsr *) | `Asr of (arith_expr * arith_expr) (* asr *) | `Land of (arith_expr * arith_expr) (* land *) | `Lor of (arith_expr * arith_expr) (* lor *) | `Lxor of (arith_expr * arith_expr) (* lxor *) ] and node = [ `Ident of (loc * string * node list list option) | `Def of (loc * string * node list) | `Defun of (loc * string * string list * node list) | `Undef of (loc * string) | `Include of (loc * string) | `Ext of (loc * string * string) | `Cond of (loc * bool_expr * node list * node list) | `Error of (loc * string) | `Warning of (loc * string) | `Text of (loc * bool * string) (* bool is true for space tokens *) | `Seq of node list | `Stringify of node | `Concat of (node * node) | `Line of (loc * string option * int) | `Current_line of loc | `Current_file of loc ] let string_of_loc (pos1, pos2) = let line1 = pos1.pos_lnum and start1 = pos1.pos_bol in Printf.sprintf "File %S, line %i, characters %i-%i" pos1.pos_fname line1 (pos1.pos_cnum - start1) (pos2.pos_cnum - start1) exception Cppo_error of string let error loc s = let msg = sprintf "%s\nError: %s" (string_of_loc loc) s in raise (Cppo_error msg) let warning loc s = let msg = sprintf "%s\nWarning: %s" (string_of_loc loc) s in eprintf "%s\n%!" msg let make_line_directive ?(fname = true) pos = let spaces = String.make (pos.pos_cnum - pos.pos_bol) ' ' in if fname then sprintf "# %i %S\n%s" pos.pos_lnum pos.pos_fname spaces else sprintf "# %i\n%s" pos.pos_lnum spaces let parse_file = ref ((fun file -> assert false) : string -> node list) let dummy_loc = (Lexing.dummy_pos, Lexing.dummy_pos) cppo-0.9.3/examples/000077500000000000000000000000001171310140200142705ustar00rootroot00000000000000cppo-0.9.3/examples/Makefile000066400000000000000000000002441171310140200157300ustar00rootroot00000000000000.PHONY: all clean all: ../cppo debug.ml > debug.out ../cppo french.ml > french.out ocamllex lexer.mll ../cppo lexer.ml > lexer.out clean: rm -f *.out lexer.ml cppo-0.9.3/examples/debug.ml000066400000000000000000000002021171310140200157020ustar00rootroot00000000000000#ifdef DEBUG #define debug(s) Printf.eprintf "[%S %i] %s\n%!" __FILE__ __LINE__ s #else #define debug(s) () #endif debug("test") cppo-0.9.3/examples/french.ml000066400000000000000000000012441171310140200160700ustar00rootroot00000000000000#define soit let #define fonction function #define fon fun #define dans in #define si if #define alors then #define sinon else #define Liste List #define Affichef Printf #define affichef printf #define separation split #define tri sort soit rec separation x = fonction y :: l -> soit l1, l2 = separation x l dans si y < x alors (y :: l1), l2 sinon l1, (y :: l2) | [] -> [], [] soit rec tri = fonction x :: l -> soit l1, l2 = separation x l dans tri l1 @ [x] @ tri l2 | [] -> [] soit () = soit l = tri [ 5; 3; 7; 1; 7; 4; 99; 22 ] dans Liste.iter (fon i -> Affichef.affichef "%i " i) l; Affichef.affichef "\n" cppo-0.9.3/examples/lexer.mll000066400000000000000000000003171171310140200161160ustar00rootroot00000000000000(* Warning: ocamllex doesn't accept cppo directives within the rules section. *) rule token = parse ['a'-'z']+ { `String (Lexing.lexeme lexbuf) } { #ifndef NOFOO let foo () = () #endif } cppo-0.9.3/test.cppo000066400000000000000000000041531171310140200143170ustar00rootroot00000000000000(* comment *) #define pi 3.14 f(1) #define f(x) x+pi f(2) #undef pi f(3) #ifdef g "g" is defined #else "g" is not defined #endif #define a(x) b() #define b(x) a() a() debug("a") debug("b") #define z 123 #define y z #define x y #if x lsl 1 = 2*123 #if 1 = 2 #error "test" #endif success #else failure #endif #define test_multiline \ "abc\ def" \ (* 123 \ 456 *) test_multiline #define test_args(x,y) x y test_args("a","b") #define test_argc(x) x y test_argc(aa\,bb) #define test_esc(x) x test_esc(\,\)\() blah #define xyz #ifdef xyz #error "xyz should not have been defined" #endif #define sticky1(x) _ #define sticky2(x) sticky1()_ (* the 2 underscores should be space-separated *) sticky2() #define empty1 #define empty2 +empty1+ (* there should be some space between the pluses *) empty2 (* (* nested comment with single single quote: ' *) "*)" *) #define arg obj \# define arg ' (* lone single quote *) #define one 1 one๊ is not 1 ๊ #undef x #define x # x is # #undef one #define one 1 #if (one+one = 100 + \ 64 lsr 3 / 4 - lnot lnot 100) && \ 1 + 3 * 5 = 16 && \ 22 mod 7 = 1 && \ lnot 0 = 0xffffffffffffffff && \ -1 asr 100 = -1 && \ -1 land (1 lsl 1 lsr 1) = 1 && \ -1 lor 1 = -1 && \ -2 lxor 1 = -1 && \ lnot -1 = 0 && \ true && not false && defined one && \ (true || true && false) good maths #else #error "math error" #endif #undef f #undef g #undef x #undef y #define trace(f) \ let f x = \ printf "call %s\n%!" STRINGIFY(f); \ let y = f x in \ printf "return %s\n%!" STRINGIFY(f); \ y \ ;; trace(g) #define field(name,type) \ val mutable name : type option \ method CONCAT(get_, name) = name \ method CONCAT(set_, name) x = name <- Some x class foo () = object field(field_1, int) field(field_2, string) end #define DEBUG(x) \ (if !debug then \ eprintf "[debug] %s %i: " __FILE__ __LINE__; \ eprintf x; \ eprintf "\n") DEBUG("test1 %i %i" x y) DEBUG("test2 %i" x) #include "testdata/incl.cppo" # 123456 #789 "test" #include "testdata/incl.cppo" #define debug(s) Printf.eprintf "%S %i: %s\n%!" __FILE__ __LINE__ s end cppo-0.9.3/testdata/000077500000000000000000000000001171310140200142635ustar00rootroot00000000000000cppo-0.9.3/testdata/Makefile000066400000000000000000000002331171310140200157210ustar00rootroot00000000000000.PHONY: all ext all: ext ../cppo -x rot13:"tr '[a-z]' '[n-za-m]'" \ -x source:"./source.sh '%F' %B %E" \ ext.cppo > ext.out diff -u ext.ref ext.out cppo-0.9.3/testdata/ext.cppo000066400000000000000000000001071171310140200157440ustar00rootroot00000000000000hello #ext rot13 abc \#endext def #endext goodbye #ext source #endext cppo-0.9.3/testdata/ext.ref000066400000000000000000000003711171310140200155620ustar00rootroot00000000000000# 1 "ext.cppo" hello nop #raqrkg qrs # 7 "ext.cppo" goodbye # 9 (* hello #ext rot13 abc \#endext def #endext goodbye #ext source #endext *) (* Environment variables: CPPO_FILE=ext.cppo CPPO_FIRST_LINE=9 CPPO_LAST_LINE=11 *) # 11 cppo-0.9.3/testdata/incl.cppo000066400000000000000000000000401171310140200160650ustar00rootroot00000000000000included #include "incl2.cppo" cppo-0.9.3/testdata/incl2.cppo000066400000000000000000000000031171310140200161460ustar00rootroot00000000000000ok cppo-0.9.3/testdata/source.sh000077500000000000000000000003621171310140200161230ustar00rootroot00000000000000#! /bin/sh -e echo "# $2" echo "(*" cat "$1" echo "*)" echo "(*" echo " Environment variables:" echo " CPPO_FILE=$CPPO_FILE" echo " CPPO_FIRST_LINE=$CPPO_FIRST_LINE" echo " CPPO_LAST_LINE=$CPPO_LAST_LINE" echo "*)" echo "# $3"