pax_global_header00006660000000000000000000000064124410565510014515gustar00rootroot0000000000000052 comment=5240119a39a079d25e07eecd9852d673aee87acb ocaml-text-0.8/000077500000000000000000000000001244105655100134415ustar00rootroot00000000000000ocaml-text-0.8/.gitignore000066400000000000000000000000551244105655100154310ustar00rootroot00000000000000_build setup.exe setup.data setup.log manual ocaml-text-0.8/.project000066400000000000000000000000131244105655100151020ustar00rootroot00000000000000ocaml-text ocaml-text-0.8/CHANGES000066400000000000000000000023531244105655100144370ustar00rootroot00000000000000===== 0.7.1 ==== * Use OASIS 0.4.x compiled_setup alpha feature * Fixed _oasis file for the syntax extension ===== 0.7 ===== * Switched to git and github * Using OASIS 0.4.x ===== 0.6 ===== * Update for latest oasis ===== 0.5 ===== * use more search pathes for iconv * fix a bug in Encoding.recode_string * several optimizations for the Text module ===== 0.4 ===== * Fix a bug in regular expression generation for PCRE * Adding the function Text.transform * Adding the module Encoding_bigarray for encoding/decoding to/from bigarrays instead of strings * Automatically detect whether -liconv is needed or not ===== 0.3 ===== * Adding a user manual * Adding support for regular expressions inside arbitrary pattern matching * Adding a syntax extension for converting human readable regular expressions over UTF-8 strings into PCRE ones * Faster reimplementation of Encoding.recode_string. (note: the ?fallback argument have been removed, use //TRANSLIT instead) * Text.encode use transliteration instead of a fallback string * Fixing various bugs in the module Text (thanks to unit tests) * Adding unit tests * Adding support for windows (using GetACP instead of nl_langinfo) ===== 0.2 ===== ocaml-text-0.8/LICENSE000066400000000000000000000027561244105655100144600ustar00rootroot00000000000000Copyright (c) 2009, Jeremie Dimino All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Jeremie Dimino nor the names of his contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ocaml-text-0.8/Makefile000066400000000000000000000017421244105655100151050ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 46f8bd9984975bd4727bed22d0876cd2) SETUP = ./setup.exe build: setup.data $(SETUP) $(SETUP) -build $(BUILDFLAGS) doc: setup.data $(SETUP) build $(SETUP) -doc $(DOCFLAGS) test: setup.data $(SETUP) build $(SETUP) -test $(TESTFLAGS) all: $(SETUP) $(SETUP) -all $(ALLFLAGS) install: setup.data $(SETUP) $(SETUP) -install $(INSTALLFLAGS) uninstall: setup.data $(SETUP) $(SETUP) -uninstall $(UNINSTALLFLAGS) reinstall: setup.data $(SETUP) $(SETUP) -reinstall $(REINSTALLFLAGS) clean: $(SETUP) $(SETUP) -clean $(CLEANFLAGS) distclean: $(SETUP) $(SETUP) -distclean $(DISTCLEANFLAGS) $(RM) $(SETUP) setup.data: $(SETUP) $(SETUP) -configure $(CONFIGUREFLAGS) configure: $(SETUP) $(SETUP) -configure $(CONFIGUREFLAGS) setup.exe: setup.ml ocamlfind ocamlopt -o $@ $< || ocamlfind ocamlc -o $@ $< || true $(RM) setup.cmi setup.cmo setup.cmx setup.o .PHONY: build doc test all install uninstall reinstall clean distclean configure # OASIS_STOP ocaml-text-0.8/README000066400000000000000000000025761244105655100143330ustar00rootroot00000000000000 OCaml-Text Jeremie Dimino February 2009 url: https://forge.ocamlcore.org/projects/ocaml-text/ * What it is ? OCaml-Text is a library for dealing with ``text'', i.e. sequence of unicode characters, in a convenient way It supports: - character encoding/decoding using iconv - manipulation of text as UTF-8 encoded strings - localised text functions such as compare, upper, ... - human readable regular expression inside patterns In order to be the compatible with other ocaml library without pain, OCaml-Text choose to use only regular strings for dealing with text. * Requirement OCaml-Text needs ocaml >= 3.11 and libiconv. It require also pcre bindings for ocaml to have regular expression support. * Building and installation To build and install it just type: $ ocaml setup.ml -configure $ ocaml setup.ml -build $ ocaml setup.ml -install This will install the "text" package (ocamlfind is required), and the "text.pcre" if compiled with support for pcre. If you get the development version you must obtain oasis (http://oasis.forge.ocamlcore.org/). * Development The last development version of ocaml-text can always be found in the darcs repository hosted at darcs.ocamlcore.org: $ darcs get http://darcs.ocamlcore.org/repos/ocaml-text local variables: mode: outline end: ocaml-text-0.8/_oasis000066400000000000000000000064001244105655100146410ustar00rootroot00000000000000# +-------------------------------------------------------------------+ # | Package parameters | # +-------------------------------------------------------------------+ OASISFormat: 0.4 Name: ocaml-text Version: 0.7 LicenseFile: LICENSE License: BSD-3-clause Authors: Jérémie Dimino Homepage: http://ocaml-text.forge.ocamlcore.org/ BuildTools: ocamlbuild Plugins: META (0.4), DevFiles (0.4) Synopsis: ocaml-text AlphaFeatures: compiled_setup_ml Description: OCaml-Text is an OCaml library for dealing with "text", i.e. sequences of unicode characters, in a convenient way. # +-------------------------------------------------------------------+ # | Flags | # +-------------------------------------------------------------------+ Flag pcre Description: support for pcre Default: false # +-------------------------------------------------------------------+ # | Libraries | # +-------------------------------------------------------------------+ Library text Path: src Install: true BuildDepends: bytes Modules: Text, Encoding CSources: ml_text.c, common.h Library "bigarray" Path: src Install: true FindlibParent: text BuildDepends: text, bigarray Modules: Encoding_bigarray CSources: ml_text_bigarray.c, common.h Library "pcre" Path: src FindlibParent: text Build$: flag(pcre) Install$: flag(pcre) Modules: Text_pcre BuildDepends: text, pcre Library "pcre-syntax" Path: syntax FindlibParent: text Build$: flag(pcre) Install$: flag(pcre) InternalModules: Pa_text_main, Pa_text_parse, Pa_text_regexp, Pa_text_env, Pa_text_types, Pa_text_util BuildDepends: text, camlp4, camlp4.quotations.o, camlp4.extend XMETAType: syntax XMETADescription: Syntax extension for writing human readable regexps in OCaml sources XMETARequires: camlp4, text # +-------------------------------------------------------------------+ # | Doc | # +-------------------------------------------------------------------+ Document "manual" Title: OCamlText user manual Type: custom (0.4) Install: false XCustom: make -C manual manual.pdf DataFiles: manual/manual.pdf InstallDir: $pdfdir Document "api" Title: API reference for OCamlText Type: ocamlbuild (0.4) Install: false InstallDir: $htmldir/api DataFiles: style.css BuildTools: ocamldoc XOCamlbuildPath: ./ XOCamlbuildLibraries: text # +-------------------------------------------------------------------+ # | Tests | # +-------------------------------------------------------------------+ Executable test Path: tests Install: false CompiledObject: best MainIs: test.ml BuildDepends: text Test main Command: $test TestTools: test # +-------------------------------------------------------------------+ # | Misc | # +-------------------------------------------------------------------+ SourceRepository head Type: git Location: git://github.com/vbmithr/ocaml-text Browser: http://github.com/vbmithr/ocaml-text ocaml-text-0.8/_tags000066400000000000000000000026131244105655100144630ustar00rootroot00000000000000# -*- conf -*- : syntax_camlp4o : use_iconv # OASIS_START # DO NOT EDIT (digest: 4e0f9df4e481ad80c07cec09691a505f) # Ignore VCS directories, you can use the same kind of rule outside # OASIS_START/STOP if you want to exclude directories that contains # useless stuff for the build process true: annot, bin_annot <**/.svn>: -traverse <**/.svn>: not_hygienic ".bzr": -traverse ".bzr": not_hygienic ".hg": -traverse ".hg": not_hygienic ".git": -traverse ".git": not_hygienic "_darcs": -traverse "_darcs": not_hygienic # Library text "src/text.cmxs": use_text : use_libtext_stubs "src/ml_text.c": pkg_bytes # Library bigarray "src/bigarray.cmxs": use_bigarray : use_libbigarray_stubs : pkg_bigarray "src/ml_text_bigarray.c": pkg_bigarray "src/ml_text_bigarray.c": pkg_bytes "src/ml_text_bigarray.c": use_text # Library pcre "src/pcre.cmxs": use_pcre : pkg_bytes : pkg_pcre : use_text # Library pcre-syntax "syntax/pcre-syntax.cmxs": use_pcre-syntax : pkg_bytes : pkg_camlp4 : pkg_camlp4.extend : pkg_camlp4.quotations.o : use_text # Executable test : pkg_bytes : use_text : pkg_bytes : use_text # OASIS_STOP ocaml-text-0.8/api.odocl000066400000000000000000000001521244105655100152320ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 409ce00c58649525879357a67c770e4e) src/Text src/Encoding # OASIS_STOP ocaml-text-0.8/configure000077500000000000000000000005571244105655100153570ustar00rootroot00000000000000#!/bin/sh # OASIS_START # DO NOT EDIT (digest: 6f7b8221311e800a7093dc3b793f67ca) set -e FST=true for i in "$@"; do if $FST; then set -- FST=false fi case $i in --*=*) ARG=${i%%=*} VAL=${i##*=} set -- "$@" "$ARG" "$VAL" ;; *) set -- "$@" "$i" ;; esac done make configure CONFIGUREFLAGS="$*" # OASIS_STOP ocaml-text-0.8/dist.sh000077500000000000000000000002521244105655100147420ustar00rootroot00000000000000#!/bin/sh NAME=`oasis query Name 2> /dev/null` VERSION=`oasis query Version 2> /dev/null` DARCS_REPO=`pwd` export DARCS_REPO exec darcs dist --dist-name $NAME-$VERSION ocaml-text-0.8/examples/000077500000000000000000000000001244105655100152575ustar00rootroot00000000000000ocaml-text-0.8/examples/Makefile000066400000000000000000000002441244105655100167170ustar00rootroot00000000000000.PHONY: all clean all: regexp regexp: regexp.ml ocamlfind ocamlc -linkpkg -syntax camlp4o -package text.pcre -o regexp regexp.ml clean: rm -f regexp *.cm* *.o ocaml-text-0.8/examples/regexp.ml000066400000000000000000000011021244105655100170750ustar00rootroot00000000000000(* * regexp.ml * --------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) let digit3 = <:re< ["0".."9"]{1-3} >> let () = match Sys.argv with | [|program_name; <:re< (digit3 as d1 : int) "." (digit3 as d2 : int) "." (digit3 as d3 : int) "." (digit3 as d4 : int) >>|] -> Printf.printf "d1 = %d, d2 = %d, d3 = %d, d4 = %d\n" d1 d2 d3 d4 | _ -> Printf.printf "usage: %s \n" Sys.argv.(0) ocaml-text-0.8/manual/000077500000000000000000000000001244105655100147165ustar00rootroot00000000000000ocaml-text-0.8/manual/Makefile000066400000000000000000000003271244105655100163600ustar00rootroot00000000000000.PHONY: all clean clean-aux all: manual.pdf %.pdf: %.tex rubber --pdf $< %.html: %.tex hevea -fix manual.tex clean: clean-aux rm -f *.pdf clean-aux: rm -f *.aux *.dvi *.log *.out *.toc *.html *.htoc *.haux ocaml-text-0.8/manual/manual.tex000066400000000000000000000501171244105655100167210ustar00rootroot00000000000000%% manual.tex %% ---------- %% Copyright : (c) 2010, Jeremie Dimino %% Licence : BSD3 %% %% This file is a part of ocaml-text. \documentclass{article} \usepackage[utf8]{inputenc} \usepackage{xspace} \usepackage{verbatim} \usepackage[english]{babel} \usepackage{amsmath} \usepackage{amssymb} \usepackage{hyperref} \usepackage{listings} \usepackage{xcolor} \usepackage{fullpage} %% +-----------------------------------------------------------------+ %% | Configuration | %% +-----------------------------------------------------------------+ \hypersetup{% a4paper=true, pdfstartview=FitH, colorlinks=false, pdfborder=0 0 0, pdftitle = {OCaml-text user manual}, pdfauthor = {Jérémie Dimino}, pdfkeywords = {OCaml, Unicode} } \lstset{ language=[Objective]Caml, extendedchars=\true, inputencoding=utf8, showspaces=false, showstringspaces=false, showtabs=false, basicstyle=\ttfamily, frame=l, framerule=1.5mm, xleftmargin=6mm, framesep=4mm, rulecolor=\color{lightgray}, moredelim=*[s][\itshape]{(*}{*)}, moredelim=[is][\textcolor{darkgray}]{§}{§}, escapechar=°, keywordstyle=\color[rgb]{0.627451, 0.125490, 0.941176}, stringstyle=\color[rgb]{0.545098, 0.278431, 0.364706}, commentstyle=\color[rgb]{0.698039, 0.133333, 0.133333}, numberstyle=\color[rgb]{0.372549, 0.619608, 0.627451} } %% +-----------------------------------------------------------------+ %% | Aliases | %% +-----------------------------------------------------------------+ \newcommand{\oct}{\texttt{ocamlt-text}\xspace} %% +-----------------------------------------------------------------+ %% | Headers | %% +-----------------------------------------------------------------+ \title{OCaml-text user manual} \author{Jérémie Dimino} \begin{document} \maketitle %% +-----------------------------------------------------------------+ %% | Table of contents | %% +-----------------------------------------------------------------+ \setcounter{tocdepth}{2} \tableofcontents %% +-----------------------------------------------------------------+ %% | Section | %% +-----------------------------------------------------------------+ \section{Introduction} \oct is a library for manipulation of unicode text. It can replace the \texttt{String} module of the standard library when you need to access to strings as sequence of UTF-8 encoded unicode characters. It also supports encoding (resp. decoding) of unicode text into (resp. from) a lot of different characrter encodings. %% +-----------------------------------------------------------------+ %% | Section | %% +-----------------------------------------------------------------+ \section{Character encoding} \oct uses \texttt{libiconv} to transcode between variaous character encodings. The \texttt{libiconv} itself is quite painfull to use, \oct tries to offer a cleaner interface, which is located in the module \texttt{Encoding}. \subsection{Decoding} Decoding means extracting a unicode character from a sequence of bytes. To decode text, the first thing to do is to create a decoder; this is done by the \texttt{Encoding.decoder} function: \lstset{language=[Objective]Caml}\begin{lstlisting} val decoder : Encoding.t -> Encoding.decoder \end{lstlisting} The type \texttt{Encoding.t} is the type of character encoding. It is defined as an alias to the type string; in fact it is simply the name of the character encoding, such as \texttt{``UTF-8''}, \texttt{``ASCII''}, ... The decoder allow you to decode characters, by using the decode function: \lstset{language=[Objective]Caml}\begin{lstlisting} val decode : decoder -> string -> int -> int -> decoding_result \end{lstlisting} It takes as arguments: \begin{itemize} \item a decoder, of type \texttt{Encoding.decoder} \item a buffer $buf$, which contains encoded characters \item an offset $ofs$ in the buffer \item a length $len$. \end{itemize} \texttt{decode} will read up to $len$ bytes in the buffer, starting at the offset $ofs$. If the bytes does not contains a valid multi-byte sequence, it will returns \texttt{Dec\_error}. If the decoder read $len$ bytes without reaching the end of the multi-byte sequence, it returns \texttt{Dec\_need\_more}. If it succeed, it returns \texttt{Dec\_ok(code\_point, num)} where \texttt{code\_point} is the code-point that has been successfully decoded, ad \texttt{num} is the number of bytes consumed. \subsection{Encoding} Encoding means transforming a unicode character into a sequence of bytes, depending of the character encoding. Encoding characters works almost like decoding. The first things is to create a decoder with: \lstset{language=[Objective]Caml}\begin{lstlisting} val encoder : Encoding.t -> Encoding.encoder \end{lstlisting} then, encoding is done by: \lstset{language=[Objective]Caml}\begin{lstlisting} val encode : encoder -> string -> int -> int -> code_point -> encoding_result \end{lstlisting} Arguments have the same meaning that for decoding, except that the buffer will be written insteand of read. \texttt{encode} will write into the buffer the multi-byte sequence correspoing to the given code-point. On success it returns \texttt{Enc\_ok num} where \texttt{num} is the number of bytes written. If the unicode character cannot be represented in the given encoding, it returns \texttt{Enc\_error}. If the buffer does not contain enough room for the multi-byte seuqnece, it returns \texttt{Enc\_need\_more}.. \subsection{The system encoding} The system character encoding, \texttt{Encoding.system} is determined by environment variables. If you print non-ascii text on a terminal, it is a good idea to encode it in the system encoding. You may also use transliteration (see section \ref{special-encoding}) to prevent failling when unicode character cannot be encoded in the system encoding. \subsection{Special encodings} \label{special-encoding} The \texttt{libiconv} library allow character encoding names to be suffixed by \texttt{//IGNORE} or \texttt{//TRANSLIT}. The first one means that character that cannot be represented in given encoding are skipped silently. The secong means that these characters will be approximated. Note that \texttt{//TRANSLIT} depends on the current locale settings. For example, consider the following program: \lstset{language=[Objective]Caml}\begin{lstlisting} print_endline (Encoding.recode_string ~src:"UTF-8" ~dst:"ASCII//TRANSLIT" "Mon nom est J\xc3\xa9r\xc3\xa9mie") \end{lstlisting} (where \texttt{c3a9} is the UTF-8 representation of ``é''). According to the current locale settings, the printing will be different: \lstset{language=bash}\begin{lstlisting} $ LANG=fr_FR.UTF-8 ./a.out Mon nom est Jeremie $ LANG=C ./a.out Mon nom est J?r?mie \end{lstlisting} The advantage of transliteration is that encoding text will never fail, and give an acceptable result. %% +-----------------------------------------------------------------+ %% | Section | %% +-----------------------------------------------------------------+ \section{Text manipulation} The \texttt{Text} module of \oct is designed to handle unicode text manipulation. By unicode text, we means sequence of unicode characters, and not sequence of bytes. However, to stay compatible with the rest of the ocaml world which uses only standard latin-1 strings, and to keep pattern matching over unicode text, \oct choose to represent text as UTF-8 strings, without using an abstract type. This is OK as long as you respect the following rules: \begin{itemize} \item \textbf{Text is immutable:} never modify in place the contents of a string containing text \item \textbf{Never trust inputs:} always check for validity text comming from the outside world \item \textbf{Use the right functions:} if you want to iterate over characters of a text, compute the number of characters contained in a text, ... use \texttt{UTF-8} aware functions \end{itemize} The module \texttt{Text} always assumes that strings it receive contains valid \texttt{UTF-8} encoded text. It is your job to ensure it is the case. \subsection{UTF-8 validation} \emph{UTF-8 validation} consists on verifying whether a string contains valid UTF-8 encoded text. This can be done with one of these two functions: \lstset{language=[Objective]Caml}\begin{lstlisting} val check : string -> string option val validate : string -> unit \end{lstlisting} \texttt{Text.check} receive a string, and returns \texttt{Some error} if the given string does not contains valid UTF-8 encoded text. Otherwise it returns \texttt{None}. \texttt{Text.validate} does the same thing but raises an exception instead of returning an option. For example: \lstset{language=[Objective]Caml}\begin{lstlisting} # Text.check "Hello";; - : string option = None # Text.check "\xff";; - : string option = Some "at offset 0: invalid start of UTF-8 sequence" # Text.validate "Hello";; - : unit = () # Text.validate "\xff";; Exception: Text.Invalid ("at offset 0: invalid start of UTF-8 sequence", "\255"). \end{lstlisting} \subsection{Iteration} Since UTF-8 encoded character use variable sequence length, iteration over a text can not be done the same way as iteration over a byte array. Indeed, to get the nth character of a text, you need to scan the whole text before the character. Instead, \oct provides an API to iterate over a text by using pointers (of type \texttt{Text.pointer}). A pointer represent the position of a UTF-8 encoded unicode character in a text. You can create a pointer by using one of these three functions: \lstset{language=[Objective]Caml}\begin{lstlisting} val pointer_l : t -> pointer (** Returns a pointer to the left of the given text *) val pointer_r : t -> pointer (** Returns a pointer to the right of the given text *) val pointer_at : t -> int -> pointer (** [pointer_at txt n] returns a pointer to the character at position [n] in [txt]. *) \end{lstlisting} Once you have a pointer, you scan text to the right or left. Here is a simple example, where we scan the given text to find a character ``.'': \lstset{language=[Objective]Caml}\begin{lstlisting} let search txt = let rec loop pointer = match Text.next pointer with | None -> (* End of the text *) false | Some(".", pointer) -> true | Some(ch, pointer) -> loop pointer in loop (Text.pointer_l txt) \end{lstlisting} Each call to \texttt{Text.next} returns either \texttt{None} if the end of the text have been reached, or \texttt{Some(ch, pointer)} where: \begin{itemize} \item \texttt{ch} is the character pointed by the pointer \item \texttt{pointer} is a pointer to the next character or the end of text \end{itemize} \texttt{Text.prev} works in the same way. Of course, using pointers is the last resort when the functions of the module \texttt{Text} are not sufficient. %% +-----------------------------------------------------------------+ %% | Section | %% +-----------------------------------------------------------------+ \section{Regular expressions with PCRE} If compiled with support for PCRE, \oct define a syntax extension for writing human readable regular expressions in ocaml sources. \subsection{Enabling the syntax extension} If you are using \texttt{ocamlfind}, simply adds the ``text.pcre'' to the list of packages. For example, to compile a file ``foo.ml'' using the syntax extension, just type: \lstset{language=[Objective]Caml}\begin{lstlisting} $ ocamlfind ocamlc -syntax camlp4o -package text.pcre -linkpkg -o foo foo.ml \end{lstlisting} \subsection{Syntax of regular expression} Here is the grammar of regular expressions: \begin{itemize} \item \textbf{\emph{string literal}} matches exactly the given string \item \textbf{\emph{\_}} (underscore) matches any character, except new-line in non multiline mode \item \textbf{\emph{regexp regexp}} match the concatenation of the two given regular expression \item \textbf{\emph{regexp $\mid$ regexp}} matches the first regular expression or the second \item \textbf{\emph{regexp\{n\}}} matches n times the given regular expression \item \textbf{\emph{regexp\{n-m\}}} matches at least $n$ times and up to $m$ times the given regular expression \item \textbf{\emph{regexp\{n+\}}} matches at least $n$ times the given regular expression, and maybe more \item \textbf{\emph{regexp*}} matches the given regular expression 0 time or more. This is the same as \textbf{\emph{regexp\{0+\}}} \item \textbf{\emph{regexp+}} matches the given regular expression 1 time or more. This is the same as \textbf{\emph{regexp\{1+\}}} \item \textbf{\emph{regexp?}} matches the given regular expression 0 time or 1 time. This is the same as \textbf{\emph{regexp\{0-1\}}} \item \textbf{\emph{[ character-set ]}} matches any character of \textbf{\emph{character-set}} \item \textbf{\emph{[\^\ character-set ]}} matches any character that is not a member of \textbf{\emph{character-set}} \item \textbf{\emph{( regexp )}} matches \textbf{\emph{regexp}} \item \textbf{\emph{$<$ regexp}} does a look behing \item \textbf{\emph{$$ regexp}} does a look ahead (without consuming any character) \item \textbf{\emph{$>!$ regexp}} does a negative look ahead (without consuming any character) \item \textbf{\emph{regexp as ident}} matches \textbf{\emph{regexp}} and bind the result to the variable \textbf{\emph{ident}} \item \textbf{\emph{regexp as ident : type}} matches \textbf{\emph{regexp}} and bind the result to the variable \textbf{\emph{ident}}, mapping it with the function \texttt{\emph{type\_of\_string}} \item \textbf{\emph{regexp as ident := func}} matches \textbf{\emph{regexp}} and bind the result to the variable \textbf{\emph{ident}}, mapping it with the function \texttt{\emph{func}} which may be any ocaml expression \item \textbf{\emph{$\backslash$ ident}} is a backward reference to a previously bounded variable \item \textbf{\emph{ident}} matches the regular expression contained in the variable \textbf{\emph{ident}} \item \textbf{\emph{if ident then regexp}} matches \textbf{\emph{regexp}} if \textbf{\emph{ident}} as been previously matched. \item \textbf{\emph{if ident then regexp else regexp}} is the same as the previous contruction but with an else branch. \item \textbf{\emph{$\&+$ mode}} enable the given mode \item \textbf{\emph{$\&-$ mode}} disable the given mode \end{itemize} A \emph{string literal} can be any string, with classic ocaml escape sequence. Moreover, it also support the new escape sequence $\backslash u\{XXXX\}$ where $XXXX$ is a unicode code-point written in hexadecimal. For example $\backslash u\{e9\}$ correspond to the latin-1 character ``é''. \subsection{Quotations} The syntax extension defines two camlp4 quotations, that might be used in expressions or in patterns. The first one is ``\texttt{re\_text}''. It takes a regular expression as defined before and convert it to a string, following the syntax of PCRE. For example: \lstset{language=[Objective]Caml}\begin{lstlisting} let re = Pcre.regexp <:re_text< "foo" _* "bar" >> \end{lstlisting} The goal of this quotation is to make regular expression more readable. The second quotation, ``\texttt{re}'' expands into a compiled regular expression, of type \texttt{Pcre.regexp}, for examples: \lstset{language=[Objective]Caml}\begin{lstlisting} let re = <:re< "foo" _* "bar" >> let f str = Pcre.exec ~rex:<:re< "foo" _* "bar" >> str \end{lstlisting} Note that in both case, the regular expression will be compiled only one time. And in the second example, it will be compiled the first time it is used (by using lazy evaluation). But the more interesting use of this quotation is in pattern matchings. It is possible to put a regular expression in an arbitrary pattern, and capture variables. Here is a simple example of what you can do: \lstset{language=[Objective]Caml}\begin{lstlisting} let rec f = function | <:re< "foo" (_* as x) "bar" >> :: _ -> Some x | _ :: l -> f l | [] -> None \end{lstlisting} If is also possible to use several regular expressions in the same pattern: \lstset{language=[Objective]Caml}\begin{lstlisting} match v with | <:re< "foo" (_* as x) "bar" >> :: <:re< "a"* " " "b"* >> :: _ -> ... ... \end{lstlisting} \subsection{Variables} Variables are identifiers, starting with a lower or upper case letter, which are bound to a regular expression. By default \oct defines variables for each posix characters class: \texttt{lower}, \texttt{upper}, \texttt{alpha}, \texttt{digit}, \texttt{alnum}, \texttt{punct}, \texttt{graph} \texttt{print}, \texttt{blank}, \texttt{cntrl}, \texttt{xdigit}, \texttt{space} \texttt{ascii}, \texttt{word}. Each one matches exactly one character. Note that they match only ASCII letters. \oct also defines variables for unicode properties. For example \texttt{Ll}, will match all lowercase letters, including non-ASCII ones. For a list of all unicode properties, look at the manual page \texttt{pcresyntax(3)}. \oct defines variables for each script, such as \texttt{Arabic} or \texttt{Greek}. In addition, it defines the following variables: \begin{itemize} \item \texttt{hspace} matching any horizontal space character, including non-ASCII ones \item \texttt{vspace} matching any vertical space character, including non-ASCII ones \item \texttt{bound} matching any word boundary character, including non-ASCII ones \item \texttt{bos} matching the beginning of a subject, whatever the current mode is \item \texttt{eos} matching the end of a subject, whatever the current mode is \end{itemize} New variables can be defined by toplevel bindings. For instance: \lstset{language=[Objective]Caml}\begin{lstlisting} let digit3 = <:re< ["0"-"9"]{3} >> \end{lstlisting} will generate the binding for the \texttt{digit3} variable and define the regexp variable \texttt{digit3} for the rest of the file. If the contents of a variable matches text of length 1, it can be used in character set. And if possible, it can be negated by prefixing it with a ``\texttt{!}''. All predefined variables and all character set variables can be negated. \subsection{Modes} Modes may be activated (resp. disabled) by using the syntax ``\texttt{$\&+$ mode}'' (resp. ``\texttt{$\&-$ mode}'') in a regular expression. Available modes are: \begin{itemize} \item \emph{i} or \emph{caseless}: performs case-insentive matching \item \emph{m} or \emph{multiline}: pass into multiline mode; \texttt{\^} and \texttt{\$} will match the beginning and end of lines instead of beginning and end of subject \item \emph{s}, \emph{singleline} or \emph{dotall}: the \texttt{\_} will match any characters, including newline ones. \end{itemize} \subsection{Greedy vs possessive vs lazy} The post-operators $?$, $+$, $*$, and more generally $\{...\}$ may be suffixed with one of $?$ or $+$ to modify their behaviour. By default regular expressions are greedy, which means that they match the maximum possible they can. Suffixing them with $?$ will make them lazy, which means the contrary. For example, consider the following function: \lstset{language=[Objective]Caml}\begin{lstlisting} let f = function | <:re< "a"* as x >> -> Some x | _ -> None \end{lstlisting} if we apply it on $aaa$ we got: \lstset{language=[Objective]Caml}\begin{lstlisting} $ f "aaa";; - : Text.t option = Some "aaa" \end{lstlisting} Now, if we make the matching $"a"*$ lazy: \lstset{language=[Objective]Caml}\begin{lstlisting} let f = function | <:re< "a"*? as x >> -> Some x | _ -> None \end{lstlisting} we got: \lstset{language=[Objective]Caml}\begin{lstlisting} $ f "aaa";; - : Text.t option = Some "" \end{lstlisting} Possessive prevents backtracking. For example, with: \lstset{language=[Objective]Caml}\begin{lstlisting} let f = function | <:re< ("a"* as x) ("a" as y) >> -> Some(x, y) | _ -> None \end{lstlisting} we got: \lstset{language=[Objective]Caml}\begin{lstlisting} $ f "aaa";; - : Text.t option = None \end{lstlisting} \end{document} ocaml-text-0.8/myocamlbuild.ml000066400000000000000000000441111244105655100164550ustar00rootroot00000000000000(* * myocamlbuild.ml * --------------- * Copyright : (c) 2011, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) (* OASIS_START *) (* DO NOT EDIT (digest: 0d3ee811b8ff2dae0e7af52092a7e05d) *) module OASISGettext = struct (* # 22 "src/oasis/OASISGettext.ml" *) let ns_ str = str let s_ str = str let f_ (str: ('a, 'b, 'c, 'd) format4) = str let fn_ fmt1 fmt2 n = if n = 1 then fmt1^^"" else fmt2^^"" let init = [] end module OASISExpr = struct (* # 22 "src/oasis/OASISExpr.ml" *) open OASISGettext type test = string type flag = string type t = | EBool of bool | ENot of t | EAnd of t * t | EOr of t * t | EFlag of flag | ETest of test * string type 'a choices = (t * 'a) list let eval var_get t = let rec eval' = function | EBool b -> b | ENot e -> not (eval' e) | EAnd (e1, e2) -> (eval' e1) && (eval' e2) | EOr (e1, e2) -> (eval' e1) || (eval' e2) | EFlag nm -> let v = var_get nm in assert(v = "true" || v = "false"); (v = "true") | ETest (nm, vl) -> let v = var_get nm in (v = vl) in eval' t let choose ?printer ?name var_get lst = let rec choose_aux = function | (cond, vl) :: tl -> if eval var_get cond then vl else choose_aux tl | [] -> let str_lst = if lst = [] then s_ "" else String.concat (s_ ", ") (List.map (fun (cond, vl) -> match printer with | Some p -> p vl | None -> s_ "") lst) in match name with | Some nm -> failwith (Printf.sprintf (f_ "No result for the choice list '%s': %s") nm str_lst) | None -> failwith (Printf.sprintf (f_ "No result for a choice list: %s") str_lst) in choose_aux (List.rev lst) end # 132 "myocamlbuild.ml" module BaseEnvLight = struct (* # 22 "src/base/BaseEnvLight.ml" *) module MapString = Map.Make(String) type t = string MapString.t let default_filename = Filename.concat (Sys.getcwd ()) "setup.data" let load ?(allow_empty=false) ?(filename=default_filename) () = if Sys.file_exists filename then begin let chn = open_in_bin filename in let st = Stream.of_channel chn in let line = ref 1 in let st_line = Stream.from (fun _ -> try match Stream.next st with | '\n' -> incr line; Some '\n' | c -> Some c with Stream.Failure -> None) in let lexer = Genlex.make_lexer ["="] st_line in let rec read_file mp = match Stream.npeek 3 lexer with | [Genlex.Ident nm; Genlex.Kwd "="; Genlex.String value] -> Stream.junk lexer; Stream.junk lexer; Stream.junk lexer; read_file (MapString.add nm value mp) | [] -> mp | _ -> failwith (Printf.sprintf "Malformed data file '%s' line %d" filename !line) in let mp = read_file MapString.empty in close_in chn; mp end else if allow_empty then begin MapString.empty end else begin failwith (Printf.sprintf "Unable to load environment, the file '%s' doesn't exist." filename) end let rec var_expand str env = let buff = Buffer.create ((String.length str) * 2) in Buffer.add_substitute buff (fun var -> try var_expand (MapString.find var env) env with Not_found -> failwith (Printf.sprintf "No variable %s defined when trying to expand %S." var str)) str; Buffer.contents buff let var_get name env = var_expand (MapString.find name env) env let var_choose lst env = OASISExpr.choose (fun nm -> var_get nm env) lst end # 237 "myocamlbuild.ml" module MyOCamlbuildFindlib = struct (* # 22 "src/plugins/ocamlbuild/MyOCamlbuildFindlib.ml" *) (** OCamlbuild extension, copied from * http://brion.inria.fr/gallium/index.php/Using_ocamlfind_with_ocamlbuild * by N. Pouillard and others * * Updated on 2009/02/28 * * Modified by Sylvain Le Gall *) open Ocamlbuild_plugin type conf = { no_automatic_syntax: bool; } (* these functions are not really officially exported *) let run_and_read = Ocamlbuild_pack.My_unix.run_and_read let blank_sep_strings = Ocamlbuild_pack.Lexers.blank_sep_strings let exec_from_conf exec = let exec = let env_filename = Pathname.basename BaseEnvLight.default_filename in let env = BaseEnvLight.load ~filename:env_filename ~allow_empty:true () in try BaseEnvLight.var_get exec env with Not_found -> Printf.eprintf "W: Cannot get variable %s\n" exec; exec in let fix_win32 str = if Sys.os_type = "Win32" then begin let buff = Buffer.create (String.length str) in (* Adapt for windowsi, ocamlbuild + win32 has a hard time to handle '\\'. *) String.iter (fun c -> Buffer.add_char buff (if c = '\\' then '/' else c)) str; Buffer.contents buff end else begin str end in fix_win32 exec let split s ch = let buf = Buffer.create 13 in let x = ref [] in let flush () = x := (Buffer.contents buf) :: !x; Buffer.clear buf in String.iter (fun c -> if c = ch then flush () else Buffer.add_char buf c) s; flush (); List.rev !x let split_nl s = split s '\n' let before_space s = try String.before s (String.index s ' ') with Not_found -> s (* ocamlfind command *) let ocamlfind x = S[Sh (exec_from_conf "ocamlfind"); x] (* This lists all supported packages. *) let find_packages () = List.map before_space (split_nl & run_and_read (exec_from_conf "ocamlfind" ^ " list")) (* Mock to list available syntaxes. *) let find_syntaxes () = ["camlp4o"; "camlp4r"] let well_known_syntax = [ "camlp4.quotations.o"; "camlp4.quotations.r"; "camlp4.exceptiontracer"; "camlp4.extend"; "camlp4.foldgenerator"; "camlp4.listcomprehension"; "camlp4.locationstripper"; "camlp4.macro"; "camlp4.mapgenerator"; "camlp4.metagenerator"; "camlp4.profiler"; "camlp4.tracer" ] let dispatch conf = function | After_options -> (* By using Before_options one let command line options have an higher * priority on the contrary using After_options will guarantee to have * the higher priority override default commands by ocamlfind ones *) Options.ocamlc := ocamlfind & A"ocamlc"; Options.ocamlopt := ocamlfind & A"ocamlopt"; Options.ocamldep := ocamlfind & A"ocamldep"; Options.ocamldoc := ocamlfind & A"ocamldoc"; Options.ocamlmktop := ocamlfind & A"ocamlmktop"; Options.ocamlmklib := ocamlfind & A"ocamlmklib" | After_rules -> (* When one link an OCaml library/binary/package, one should use * -linkpkg *) flag ["ocaml"; "link"; "program"] & A"-linkpkg"; if not (conf.no_automatic_syntax) then begin (* For each ocamlfind package one inject the -package option when * compiling, computing dependencies, generating documentation and * linking. *) List.iter begin fun pkg -> let base_args = [A"-package"; A pkg] in (* TODO: consider how to really choose camlp4o or camlp4r. *) let syn_args = [A"-syntax"; A "camlp4o"] in let (args, pargs) = (* Heuristic to identify syntax extensions: whether they end in ".syntax"; some might not. *) if Filename.check_suffix pkg "syntax" || List.mem pkg well_known_syntax then (syn_args @ base_args, syn_args) else (base_args, []) in flag ["ocaml"; "compile"; "pkg_"^pkg] & S args; flag ["ocaml"; "ocamldep"; "pkg_"^pkg] & S args; flag ["ocaml"; "doc"; "pkg_"^pkg] & S args; flag ["ocaml"; "link"; "pkg_"^pkg] & S base_args; flag ["ocaml"; "infer_interface"; "pkg_"^pkg] & S args; (* TODO: Check if this is allowed for OCaml < 3.12.1 *) flag ["ocaml"; "compile"; "package("^pkg^")"] & S pargs; flag ["ocaml"; "ocamldep"; "package("^pkg^")"] & S pargs; flag ["ocaml"; "doc"; "package("^pkg^")"] & S pargs; flag ["ocaml"; "infer_interface"; "package("^pkg^")"] & S pargs; end (find_packages ()); end; (* Like -package but for extensions syntax. Morover -syntax is useless * when linking. *) List.iter begin fun syntax -> flag ["ocaml"; "compile"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; flag ["ocaml"; "ocamldep"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; flag ["ocaml"; "doc"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; flag ["ocaml"; "infer_interface"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; end (find_syntaxes ()); (* The default "thread" tag is not compatible with ocamlfind. * Indeed, the default rules add the "threads.cma" or "threads.cmxa" * options when using this tag. When using the "-linkpkg" option with * ocamlfind, this module will then be added twice on the command line. * * To solve this, one approach is to add the "-thread" option when using * the "threads" package using the previous plugin. *) flag ["ocaml"; "pkg_threads"; "compile"] (S[A "-thread"]); flag ["ocaml"; "pkg_threads"; "doc"] (S[A "-I"; A "+threads"]); flag ["ocaml"; "pkg_threads"; "link"] (S[A "-thread"]); flag ["ocaml"; "pkg_threads"; "infer_interface"] (S[A "-thread"]); flag ["ocaml"; "package(threads)"; "compile"] (S[A "-thread"]); flag ["ocaml"; "package(threads)"; "doc"] (S[A "-I"; A "+threads"]); flag ["ocaml"; "package(threads)"; "link"] (S[A "-thread"]); flag ["ocaml"; "package(threads)"; "infer_interface"] (S[A "-thread"]); | _ -> () end module MyOCamlbuildBase = struct (* # 22 "src/plugins/ocamlbuild/MyOCamlbuildBase.ml" *) (** Base functions for writing myocamlbuild.ml @author Sylvain Le Gall *) open Ocamlbuild_plugin module OC = Ocamlbuild_pack.Ocaml_compiler type dir = string type file = string type name = string type tag = string (* # 62 "src/plugins/ocamlbuild/MyOCamlbuildBase.ml" *) type t = { lib_ocaml: (name * dir list * string list) list; lib_c: (name * dir * file list) list; flags: (tag list * (spec OASISExpr.choices)) list; (* Replace the 'dir: include' from _tags by a precise interdepends in * directory. *) includes: (dir * dir list) list; } let env_filename = Pathname.basename BaseEnvLight.default_filename let dispatch_combine lst = fun e -> List.iter (fun dispatch -> dispatch e) lst let tag_libstubs nm = "use_lib"^nm^"_stubs" let nm_libstubs nm = nm^"_stubs" let dispatch t e = let env = BaseEnvLight.load ~filename:env_filename ~allow_empty:true () in match e with | Before_options -> let no_trailing_dot s = if String.length s >= 1 && s.[0] = '.' then String.sub s 1 ((String.length s) - 1) else s in List.iter (fun (opt, var) -> try opt := no_trailing_dot (BaseEnvLight.var_get var env) with Not_found -> Printf.eprintf "W: Cannot get variable %s\n" var) [ Options.ext_obj, "ext_obj"; Options.ext_lib, "ext_lib"; Options.ext_dll, "ext_dll"; ] | After_rules -> (* Declare OCaml libraries *) List.iter (function | nm, [], intf_modules -> ocaml_lib nm; let cmis = List.map (fun m -> (String.uncapitalize m) ^ ".cmi") intf_modules in dep ["ocaml"; "link"; "library"; "file:"^nm^".cma"] cmis | nm, dir :: tl, intf_modules -> ocaml_lib ~dir:dir (dir^"/"^nm); List.iter (fun dir -> List.iter (fun str -> flag ["ocaml"; "use_"^nm; str] (S[A"-I"; P dir])) ["compile"; "infer_interface"; "doc"]) tl; let cmis = List.map (fun m -> dir^"/"^(String.uncapitalize m)^".cmi") intf_modules in dep ["ocaml"; "link"; "library"; "file:"^dir^"/"^nm^".cma"] cmis) t.lib_ocaml; (* Declare directories dependencies, replace "include" in _tags. *) List.iter (fun (dir, include_dirs) -> Pathname.define_context dir include_dirs) t.includes; (* Declare C libraries *) List.iter (fun (lib, dir, headers) -> (* Handle C part of library *) flag ["link"; "library"; "ocaml"; "byte"; tag_libstubs lib] (S[A"-dllib"; A("-l"^(nm_libstubs lib)); A"-cclib"; A("-l"^(nm_libstubs lib))]); flag ["link"; "library"; "ocaml"; "native"; tag_libstubs lib] (S[A"-cclib"; A("-l"^(nm_libstubs lib))]); flag ["link"; "program"; "ocaml"; "byte"; tag_libstubs lib] (S[A"-dllib"; A("dll"^(nm_libstubs lib))]); (* When ocaml link something that use the C library, then one need that file to be up to date. This holds both for programs and for libraries. *) dep ["link"; "ocaml"; tag_libstubs lib] [dir/"lib"^(nm_libstubs lib)^"."^(!Options.ext_lib)]; dep ["compile"; "ocaml"; tag_libstubs lib] [dir/"lib"^(nm_libstubs lib)^"."^(!Options.ext_lib)]; (* TODO: be more specific about what depends on headers *) (* Depends on .h files *) dep ["compile"; "c"] headers; (* Setup search path for lib *) flag ["link"; "ocaml"; "use_"^lib] (S[A"-I"; P(dir)]); ) t.lib_c; (* Add flags *) List.iter (fun (tags, cond_specs) -> let spec = BaseEnvLight.var_choose cond_specs env in let rec eval_specs = function | S lst -> S (List.map eval_specs lst) | A str -> A (BaseEnvLight.var_expand str env) | spec -> spec in flag tags & (eval_specs spec)) t.flags | _ -> () let dispatch_default conf t = dispatch_combine [ dispatch t; MyOCamlbuildFindlib.dispatch conf; ] end # 606 "myocamlbuild.ml" open Ocamlbuild_plugin;; let package_default = { MyOCamlbuildBase.lib_ocaml = [ ("text", ["src"], []); ("bigarray", ["src"], []); ("pcre", ["src"], []); ("pcre-syntax", ["syntax"], []) ]; lib_c = [ ("text", "src", ["src/common.h"]); ("bigarray", "src", ["src/common.h"]) ]; flags = []; includes = [("tests", ["src"]); ("syntax", ["src"])] } ;; let conf = {MyOCamlbuildFindlib.no_automatic_syntax = false} let dispatch_default = MyOCamlbuildBase.dispatch_default conf package_default;; # 632 "myocamlbuild.ml" (* OASIS_STOP *) open Ocamlbuild_plugin let my_dispatch = function | Before_options -> Options.make_links := false | After_rules -> let env = BaseEnvLight.load () in if BaseEnvLight.var_get "need_liconv" env = "true" then begin flag ["ocamlmklib"; "c"; "use_iconv"] & A"-liconv"; flag ["link"; "ocaml"; "use_iconv"] & S[A"-cclib"; A"-liconv"] end; let dir = BaseEnvLight.var_get "iconv_prefix" env in if dir <> "" then begin flag ["ocamlmklib"; "c"; "use_iconv"] & A("-L" ^ dir ^ "/lib"); flag ["c"; "compile"; "use_iconv"] & S[A"-ccopt"; A("-I" ^ dir ^ "/include")]; flag ["link"; "ocaml"; "use_iconv"] & S[A"-cclib"; A("-L" ^ dir ^ "/lib")] end | _ -> () let () = dispatch (fun hook -> dispatch_default hook; my_dispatch hook) ocaml-text-0.8/opam000066400000000000000000000003701244105655100143200ustar00rootroot00000000000000opam-version: "1.2" name: "text" maintainer: "vb@luminar.eu.org" build: [ ["./configure" "--%{pcre:enable}%-pcre"] [make] [make "install"] ] remove: [["ocamlfind" "remove" "text"]] depends: ["ocamlfind" {build} "base-bytes"] depopts: ["pcre"]ocaml-text-0.8/predist.sh000077500000000000000000000004541244105655100154550ustar00rootroot00000000000000#!/bin/sh # Generate CHANGES.darcs [ -d "$DARCS_REPO" ] && darcs changes --repodir "$DARCS_REPO" > CHANGES.darcs # Build the user manual for release cd manual make manual.pdf # Remove intermediate files make clean-aux cd .. # Add OASIS stuff oasis setup # Cleanup rm -f predist.sh boring dist.sh ocaml-text-0.8/setup.ml000066400000000000000000005745761244105655100151630ustar00rootroot00000000000000(* * setup.ml * -------- * Copyright : (c) 2011, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) (* OASIS_START *) (* DO NOT EDIT (digest: accafa1ae06ad235d9eeddf57e24958f) *) (* Regenerated by OASIS v0.4.5 Visit http://oasis.forge.ocamlcore.org for more information and documentation about functions used in this file. *) module OASISGettext = struct (* # 22 "src/oasis/OASISGettext.ml" *) let ns_ str = str let s_ str = str let f_ (str: ('a, 'b, 'c, 'd) format4) = str let fn_ fmt1 fmt2 n = if n = 1 then fmt1^^"" else fmt2^^"" let init = [] end module OASISContext = struct (* # 22 "src/oasis/OASISContext.ml" *) open OASISGettext type level = [ `Debug | `Info | `Warning | `Error] type t = { (* TODO: replace this by a proplist. *) quiet: bool; info: bool; debug: bool; ignore_plugins: bool; ignore_unknown_fields: bool; printf: level -> string -> unit; } let printf lvl str = let beg = match lvl with | `Error -> s_ "E: " | `Warning -> s_ "W: " | `Info -> s_ "I: " | `Debug -> s_ "D: " in prerr_endline (beg^str) let default = ref { quiet = false; info = false; debug = false; ignore_plugins = false; ignore_unknown_fields = false; printf = printf; } let quiet = {!default with quiet = true} let fspecs () = (* TODO: don't act on default. *) let ignore_plugins = ref false in ["-quiet", Arg.Unit (fun () -> default := {!default with quiet = true}), s_ " Run quietly"; "-info", Arg.Unit (fun () -> default := {!default with info = true}), s_ " Display information message"; "-debug", Arg.Unit (fun () -> default := {!default with debug = true}), s_ " Output debug message"; "-ignore-plugins", Arg.Set ignore_plugins, s_ " Ignore plugin's field."; "-C", (* TODO: remove this chdir. *) Arg.String (fun str -> Sys.chdir str), s_ "dir Change directory before running."], fun () -> {!default with ignore_plugins = !ignore_plugins} end module OASISString = struct (* # 22 "src/oasis/OASISString.ml" *) (** Various string utilities. Mostly inspired by extlib and batteries ExtString and BatString libraries. @author Sylvain Le Gall *) let nsplitf str f = if str = "" then [] else let buf = Buffer.create 13 in let lst = ref [] in let push () = lst := Buffer.contents buf :: !lst; Buffer.clear buf in let str_len = String.length str in for i = 0 to str_len - 1 do if f str.[i] then push () else Buffer.add_char buf str.[i] done; push (); List.rev !lst (** [nsplit c s] Split the string [s] at char [c]. It doesn't include the separator. *) let nsplit str c = nsplitf str ((=) c) let find ~what ?(offset=0) str = let what_idx = ref 0 in let str_idx = ref offset in while !str_idx < String.length str && !what_idx < String.length what do if str.[!str_idx] = what.[!what_idx] then incr what_idx else what_idx := 0; incr str_idx done; if !what_idx <> String.length what then raise Not_found else !str_idx - !what_idx let sub_start str len = let str_len = String.length str in if len >= str_len then "" else String.sub str len (str_len - len) let sub_end ?(offset=0) str len = let str_len = String.length str in if len >= str_len then "" else String.sub str 0 (str_len - len) let starts_with ~what ?(offset=0) str = let what_idx = ref 0 in let str_idx = ref offset in let ok = ref true in while !ok && !str_idx < String.length str && !what_idx < String.length what do if str.[!str_idx] = what.[!what_idx] then incr what_idx else ok := false; incr str_idx done; if !what_idx = String.length what then true else false let strip_starts_with ~what str = if starts_with ~what str then sub_start str (String.length what) else raise Not_found let ends_with ~what ?(offset=0) str = let what_idx = ref ((String.length what) - 1) in let str_idx = ref ((String.length str) - 1) in let ok = ref true in while !ok && offset <= !str_idx && 0 <= !what_idx do if str.[!str_idx] = what.[!what_idx] then decr what_idx else ok := false; decr str_idx done; if !what_idx = -1 then true else false let strip_ends_with ~what str = if ends_with ~what str then sub_end str (String.length what) else raise Not_found let replace_chars f s = let buf = Buffer.create (String.length s) in String.iter (fun c -> Buffer.add_char buf (f c)) s; Buffer.contents buf end module OASISUtils = struct (* # 22 "src/oasis/OASISUtils.ml" *) open OASISGettext module MapExt = struct module type S = sig include Map.S val add_list: 'a t -> (key * 'a) list -> 'a t val of_list: (key * 'a) list -> 'a t val to_list: 'a t -> (key * 'a) list end module Make (Ord: Map.OrderedType) = struct include Map.Make(Ord) let rec add_list t = function | (k, v) :: tl -> add_list (add k v t) tl | [] -> t let of_list lst = add_list empty lst let to_list t = fold (fun k v acc -> (k, v) :: acc) t [] end end module MapString = MapExt.Make(String) module SetExt = struct module type S = sig include Set.S val add_list: t -> elt list -> t val of_list: elt list -> t val to_list: t -> elt list end module Make (Ord: Set.OrderedType) = struct include Set.Make(Ord) let rec add_list t = function | e :: tl -> add_list (add e t) tl | [] -> t let of_list lst = add_list empty lst let to_list = elements end end module SetString = SetExt.Make(String) let compare_csl s1 s2 = String.compare (String.lowercase s1) (String.lowercase s2) module HashStringCsl = Hashtbl.Make (struct type t = string let equal s1 s2 = (String.lowercase s1) = (String.lowercase s2) let hash s = Hashtbl.hash (String.lowercase s) end) module SetStringCsl = SetExt.Make (struct type t = string let compare = compare_csl end) let varname_of_string ?(hyphen='_') s = if String.length s = 0 then begin invalid_arg "varname_of_string" end else begin let buf = OASISString.replace_chars (fun c -> if ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') then c else hyphen) s; in let buf = (* Start with a _ if digit *) if '0' <= s.[0] && s.[0] <= '9' then "_"^buf else buf in String.lowercase buf end let varname_concat ?(hyphen='_') p s = let what = String.make 1 hyphen in let p = try OASISString.strip_ends_with ~what p with Not_found -> p in let s = try OASISString.strip_starts_with ~what s with Not_found -> s in p^what^s let is_varname str = str = varname_of_string str let failwithf fmt = Printf.ksprintf failwith fmt end module PropList = struct (* # 22 "src/oasis/PropList.ml" *) open OASISGettext type name = string exception Not_set of name * string option exception No_printer of name exception Unknown_field of name * name let () = Printexc.register_printer (function | Not_set (nm, Some rsn) -> Some (Printf.sprintf (f_ "Field '%s' is not set: %s") nm rsn) | Not_set (nm, None) -> Some (Printf.sprintf (f_ "Field '%s' is not set") nm) | No_printer nm -> Some (Printf.sprintf (f_ "No default printer for value %s") nm) | Unknown_field (nm, schm) -> Some (Printf.sprintf (f_ "Field %s is not defined in schema %s") nm schm) | _ -> None) module Data = struct type t = (name, unit -> unit) Hashtbl.t let create () = Hashtbl.create 13 let clear t = Hashtbl.clear t (* # 78 "src/oasis/PropList.ml" *) end module Schema = struct type ('ctxt, 'extra) value = { get: Data.t -> string; set: Data.t -> ?context:'ctxt -> string -> unit; help: (unit -> string) option; extra: 'extra; } type ('ctxt, 'extra) t = { name: name; fields: (name, ('ctxt, 'extra) value) Hashtbl.t; order: name Queue.t; name_norm: string -> string; } let create ?(case_insensitive=false) nm = { name = nm; fields = Hashtbl.create 13; order = Queue.create (); name_norm = (if case_insensitive then String.lowercase else fun s -> s); } let add t nm set get extra help = let key = t.name_norm nm in if Hashtbl.mem t.fields key then failwith (Printf.sprintf (f_ "Field '%s' is already defined in schema '%s'") nm t.name); Hashtbl.add t.fields key { set = set; get = get; help = help; extra = extra; }; Queue.add nm t.order let mem t nm = Hashtbl.mem t.fields nm let find t nm = try Hashtbl.find t.fields (t.name_norm nm) with Not_found -> raise (Unknown_field (nm, t.name)) let get t data nm = (find t nm).get data let set t data nm ?context x = (find t nm).set data ?context x let fold f acc t = Queue.fold (fun acc k -> let v = find t k in f acc k v.extra v.help) acc t.order let iter f t = fold (fun () -> f) () t let name t = t.name end module Field = struct type ('ctxt, 'value, 'extra) t = { set: Data.t -> ?context:'ctxt -> 'value -> unit; get: Data.t -> 'value; sets: Data.t -> ?context:'ctxt -> string -> unit; gets: Data.t -> string; help: (unit -> string) option; extra: 'extra; } let new_id = let last_id = ref 0 in fun () -> incr last_id; !last_id let create ?schema ?name ?parse ?print ?default ?update ?help extra = (* Default value container *) let v = ref None in (* If name is not given, create unique one *) let nm = match name with | Some s -> s | None -> Printf.sprintf "_anon_%d" (new_id ()) in (* Last chance to get a value: the default *) let default () = match default with | Some d -> d | None -> raise (Not_set (nm, Some (s_ "no default value"))) in (* Get data *) let get data = (* Get value *) try (Hashtbl.find data nm) (); match !v with | Some x -> x | None -> default () with Not_found -> default () in (* Set data *) let set data ?context x = let x = match update with | Some f -> begin try f ?context (get data) x with Not_set _ -> x end | None -> x in Hashtbl.replace data nm (fun () -> v := Some x) in (* Parse string value, if possible *) let parse = match parse with | Some f -> f | None -> fun ?context s -> failwith (Printf.sprintf (f_ "Cannot parse field '%s' when setting value %S") nm s) in (* Set data, from string *) let sets data ?context s = set ?context data (parse ?context s) in (* Output value as string, if possible *) let print = match print with | Some f -> f | None -> fun _ -> raise (No_printer nm) in (* Get data, as a string *) let gets data = print (get data) in begin match schema with | Some t -> Schema.add t nm sets gets extra help | None -> () end; { set = set; get = get; sets = sets; gets = gets; help = help; extra = extra; } let fset data t ?context x = t.set data ?context x let fget data t = t.get data let fsets data t ?context s = t.sets data ?context s let fgets data t = t.gets data end module FieldRO = struct let create ?schema ?name ?parse ?print ?default ?update ?help extra = let fld = Field.create ?schema ?name ?parse ?print ?default ?update ?help extra in fun data -> Field.fget data fld end end module OASISMessage = struct (* # 22 "src/oasis/OASISMessage.ml" *) open OASISGettext open OASISContext let generic_message ~ctxt lvl fmt = let cond = if ctxt.quiet then false else match lvl with | `Debug -> ctxt.debug | `Info -> ctxt.info | _ -> true in Printf.ksprintf (fun str -> if cond then begin ctxt.printf lvl str end) fmt let debug ~ctxt fmt = generic_message ~ctxt `Debug fmt let info ~ctxt fmt = generic_message ~ctxt `Info fmt let warning ~ctxt fmt = generic_message ~ctxt `Warning fmt let error ~ctxt fmt = generic_message ~ctxt `Error fmt end module OASISVersion = struct (* # 22 "src/oasis/OASISVersion.ml" *) open OASISGettext type s = string type t = string type comparator = | VGreater of t | VGreaterEqual of t | VEqual of t | VLesser of t | VLesserEqual of t | VOr of comparator * comparator | VAnd of comparator * comparator (* Range of allowed characters *) let is_digit c = '0' <= c && c <= '9' let is_alpha c = ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') let is_special = function | '.' | '+' | '-' | '~' -> true | _ -> false let rec version_compare v1 v2 = if v1 <> "" || v2 <> "" then begin (* Compare ascii string, using special meaning for version * related char *) let val_ascii c = if c = '~' then -1 else if is_digit c then 0 else if c = '\000' then 0 else if is_alpha c then Char.code c else (Char.code c) + 256 in let len1 = String.length v1 in let len2 = String.length v2 in let p = ref 0 in (** Compare ascii part *) let compare_vascii () = let cmp = ref 0 in while !cmp = 0 && !p < len1 && !p < len2 && not (is_digit v1.[!p] && is_digit v2.[!p]) do cmp := (val_ascii v1.[!p]) - (val_ascii v2.[!p]); incr p done; if !cmp = 0 && !p < len1 && !p = len2 then val_ascii v1.[!p] else if !cmp = 0 && !p = len1 && !p < len2 then - (val_ascii v2.[!p]) else !cmp in (** Compare digit part *) let compare_digit () = let extract_int v p = let start_p = !p in while !p < String.length v && is_digit v.[!p] do incr p done; let substr = String.sub v !p ((String.length v) - !p) in let res = match String.sub v start_p (!p - start_p) with | "" -> 0 | s -> int_of_string s in res, substr in let i1, tl1 = extract_int v1 (ref !p) in let i2, tl2 = extract_int v2 (ref !p) in i1 - i2, tl1, tl2 in match compare_vascii () with | 0 -> begin match compare_digit () with | 0, tl1, tl2 -> if tl1 <> "" && is_digit tl1.[0] then 1 else if tl2 <> "" && is_digit tl2.[0] then -1 else version_compare tl1 tl2 | n, _, _ -> n end | n -> n end else begin 0 end let version_of_string str = str let string_of_version t = t let version_compare_string s1 s2 = version_compare (version_of_string s1) (version_of_string s2) let chop t = try let pos = String.rindex t '.' in String.sub t 0 pos with Not_found -> t let rec comparator_apply v op = match op with | VGreater cv -> (version_compare v cv) > 0 | VGreaterEqual cv -> (version_compare v cv) >= 0 | VLesser cv -> (version_compare v cv) < 0 | VLesserEqual cv -> (version_compare v cv) <= 0 | VEqual cv -> (version_compare v cv) = 0 | VOr (op1, op2) -> (comparator_apply v op1) || (comparator_apply v op2) | VAnd (op1, op2) -> (comparator_apply v op1) && (comparator_apply v op2) let rec string_of_comparator = function | VGreater v -> "> "^(string_of_version v) | VEqual v -> "= "^(string_of_version v) | VLesser v -> "< "^(string_of_version v) | VGreaterEqual v -> ">= "^(string_of_version v) | VLesserEqual v -> "<= "^(string_of_version v) | VOr (c1, c2) -> (string_of_comparator c1)^" || "^(string_of_comparator c2) | VAnd (c1, c2) -> (string_of_comparator c1)^" && "^(string_of_comparator c2) let rec varname_of_comparator = let concat p v = OASISUtils.varname_concat p (OASISUtils.varname_of_string (string_of_version v)) in function | VGreater v -> concat "gt" v | VLesser v -> concat "lt" v | VEqual v -> concat "eq" v | VGreaterEqual v -> concat "ge" v | VLesserEqual v -> concat "le" v | VOr (c1, c2) -> (varname_of_comparator c1)^"_or_"^(varname_of_comparator c2) | VAnd (c1, c2) -> (varname_of_comparator c1)^"_and_"^(varname_of_comparator c2) let rec comparator_ge v' = let cmp v = version_compare v v' >= 0 in function | VEqual v | VGreaterEqual v | VGreater v -> cmp v | VLesserEqual _ | VLesser _ -> false | VOr (c1, c2) -> comparator_ge v' c1 || comparator_ge v' c2 | VAnd (c1, c2) -> comparator_ge v' c1 && comparator_ge v' c2 end module OASISLicense = struct (* # 22 "src/oasis/OASISLicense.ml" *) (** License for _oasis fields @author Sylvain Le Gall *) type license = string type license_exception = string type license_version = | Version of OASISVersion.t | VersionOrLater of OASISVersion.t | NoVersion type license_dep_5_unit = { license: license; excption: license_exception option; version: license_version; } type license_dep_5 = | DEP5Unit of license_dep_5_unit | DEP5Or of license_dep_5 list | DEP5And of license_dep_5 list type t = | DEP5License of license_dep_5 | OtherLicense of string (* URL *) end module OASISExpr = struct (* # 22 "src/oasis/OASISExpr.ml" *) open OASISGettext type test = string type flag = string type t = | EBool of bool | ENot of t | EAnd of t * t | EOr of t * t | EFlag of flag | ETest of test * string type 'a choices = (t * 'a) list let eval var_get t = let rec eval' = function | EBool b -> b | ENot e -> not (eval' e) | EAnd (e1, e2) -> (eval' e1) && (eval' e2) | EOr (e1, e2) -> (eval' e1) || (eval' e2) | EFlag nm -> let v = var_get nm in assert(v = "true" || v = "false"); (v = "true") | ETest (nm, vl) -> let v = var_get nm in (v = vl) in eval' t let choose ?printer ?name var_get lst = let rec choose_aux = function | (cond, vl) :: tl -> if eval var_get cond then vl else choose_aux tl | [] -> let str_lst = if lst = [] then s_ "" else String.concat (s_ ", ") (List.map (fun (cond, vl) -> match printer with | Some p -> p vl | None -> s_ "") lst) in match name with | Some nm -> failwith (Printf.sprintf (f_ "No result for the choice list '%s': %s") nm str_lst) | None -> failwith (Printf.sprintf (f_ "No result for a choice list: %s") str_lst) in choose_aux (List.rev lst) end module OASISText = struct (* # 22 "src/oasis/OASISText.ml" *) type elt = | Para of string | Verbatim of string | BlankLine type t = elt list end module OASISTypes = struct (* # 22 "src/oasis/OASISTypes.ml" *) type name = string type package_name = string type url = string type unix_dirname = string type unix_filename = string type host_dirname = string type host_filename = string type prog = string type arg = string type args = string list type command_line = (prog * arg list) type findlib_name = string type findlib_full = string type compiled_object = | Byte | Native | Best type dependency = | FindlibPackage of findlib_full * OASISVersion.comparator option | InternalLibrary of name type tool = | ExternalTool of name | InternalExecutable of name type vcs = | Darcs | Git | Svn | Cvs | Hg | Bzr | Arch | Monotone | OtherVCS of url type plugin_kind = [ `Configure | `Build | `Doc | `Test | `Install | `Extra ] type plugin_data_purpose = [ `Configure | `Build | `Install | `Clean | `Distclean | `Install | `Uninstall | `Test | `Doc | `Extra | `Other of string ] type 'a plugin = 'a * name * OASISVersion.t option type all_plugin = plugin_kind plugin type plugin_data = (all_plugin * plugin_data_purpose * (unit -> unit)) list (* # 115 "src/oasis/OASISTypes.ml" *) type 'a conditional = 'a OASISExpr.choices type custom = { pre_command: (command_line option) conditional; post_command: (command_line option) conditional; } type common_section = { cs_name: name; cs_data: PropList.Data.t; cs_plugin_data: plugin_data; } type build_section = { bs_build: bool conditional; bs_install: bool conditional; bs_path: unix_dirname; bs_compiled_object: compiled_object; bs_build_depends: dependency list; bs_build_tools: tool list; bs_c_sources: unix_filename list; bs_data_files: (unix_filename * unix_filename option) list; bs_ccopt: args conditional; bs_cclib: args conditional; bs_dlllib: args conditional; bs_dllpath: args conditional; bs_byteopt: args conditional; bs_nativeopt: args conditional; } type library = { lib_modules: string list; lib_pack: bool; lib_internal_modules: string list; lib_findlib_parent: findlib_name option; lib_findlib_name: findlib_name option; lib_findlib_containers: findlib_name list; } type object_ = { obj_modules: string list; obj_findlib_fullname: findlib_name list option; } type executable = { exec_custom: bool; exec_main_is: unix_filename; } type flag = { flag_description: string option; flag_default: bool conditional; } type source_repository = { src_repo_type: vcs; src_repo_location: url; src_repo_browser: url option; src_repo_module: string option; src_repo_branch: string option; src_repo_tag: string option; src_repo_subdir: unix_filename option; } type test = { test_type: [`Test] plugin; test_command: command_line conditional; test_custom: custom; test_working_directory: unix_filename option; test_run: bool conditional; test_tools: tool list; } type doc_format = | HTML of unix_filename | DocText | PDF | PostScript | Info of unix_filename | DVI | OtherDoc type doc = { doc_type: [`Doc] plugin; doc_custom: custom; doc_build: bool conditional; doc_install: bool conditional; doc_install_dir: unix_filename; doc_title: string; doc_authors: string list; doc_abstract: string option; doc_format: doc_format; doc_data_files: (unix_filename * unix_filename option) list; doc_build_tools: tool list; } type section = | Library of common_section * build_section * library | Object of common_section * build_section * object_ | Executable of common_section * build_section * executable | Flag of common_section * flag | SrcRepo of common_section * source_repository | Test of common_section * test | Doc of common_section * doc type section_kind = [ `Library | `Object | `Executable | `Flag | `SrcRepo | `Test | `Doc ] type package = { oasis_version: OASISVersion.t; ocaml_version: OASISVersion.comparator option; findlib_version: OASISVersion.comparator option; alpha_features: string list; beta_features: string list; name: package_name; version: OASISVersion.t; license: OASISLicense.t; license_file: unix_filename option; copyrights: string list; maintainers: string list; authors: string list; homepage: url option; synopsis: string; description: OASISText.t option; categories: url list; conf_type: [`Configure] plugin; conf_custom: custom; build_type: [`Build] plugin; build_custom: custom; install_type: [`Install] plugin; install_custom: custom; uninstall_custom: custom; clean_custom: custom; distclean_custom: custom; files_ab: unix_filename list; sections: section list; plugins: [`Extra] plugin list; disable_oasis_section: unix_filename list; schema_data: PropList.Data.t; plugin_data: plugin_data; } end module OASISFeatures = struct (* # 22 "src/oasis/OASISFeatures.ml" *) open OASISTypes open OASISUtils open OASISGettext open OASISVersion module MapPlugin = Map.Make (struct type t = plugin_kind * name let compare = Pervasives.compare end) module Data = struct type t = { oasis_version: OASISVersion.t; plugin_versions: OASISVersion.t option MapPlugin.t; alpha_features: string list; beta_features: string list; } let create oasis_version alpha_features beta_features = { oasis_version = oasis_version; plugin_versions = MapPlugin.empty; alpha_features = alpha_features; beta_features = beta_features } let of_package pkg = create pkg.OASISTypes.oasis_version pkg.OASISTypes.alpha_features pkg.OASISTypes.beta_features let add_plugin (plugin_kind, plugin_name, plugin_version) t = {t with plugin_versions = MapPlugin.add (plugin_kind, plugin_name) plugin_version t.plugin_versions} let plugin_version plugin_kind plugin_name t = MapPlugin.find (plugin_kind, plugin_name) t.plugin_versions let to_string t = Printf.sprintf "oasis_version: %s; alpha_features: %s; beta_features: %s; \ plugins_version: %s" (OASISVersion.string_of_version t.oasis_version) (String.concat ", " t.alpha_features) (String.concat ", " t.beta_features) (String.concat ", " (MapPlugin.fold (fun (_, plg) ver_opt acc -> (plg^ (match ver_opt with | Some v -> " "^(OASISVersion.string_of_version v) | None -> "")) :: acc) t.plugin_versions [])) end type origin = | Field of string * string | Section of string | NoOrigin type stage = Alpha | Beta let string_of_stage = function | Alpha -> "alpha" | Beta -> "beta" let field_of_stage = function | Alpha -> "AlphaFeatures" | Beta -> "BetaFeatures" type publication = InDev of stage | SinceVersion of OASISVersion.t type t = { name: string; plugin: all_plugin option; publication: publication; description: unit -> string; } (* TODO: mutex protect this. *) let all_features = Hashtbl.create 13 let since_version ver_str = SinceVersion (version_of_string ver_str) let alpha = InDev Alpha let beta = InDev Beta let to_string t = Printf.sprintf "feature: %s; plugin: %s; publication: %s" t.name (match t.plugin with | None -> "" | Some (_, nm, _) -> nm) (match t.publication with | InDev stage -> string_of_stage stage | SinceVersion ver -> ">= "^(OASISVersion.string_of_version ver)) let data_check t data origin = let no_message = "no message" in let check_feature features stage = let has_feature = List.mem t.name features in if not has_feature then match origin with | Field (fld, where) -> Some (Printf.sprintf (f_ "Field %s in %s is only available when feature %s \ is in field %s.") fld where t.name (field_of_stage stage)) | Section sct -> Some (Printf.sprintf (f_ "Section %s is only available when features %s \ is in field %s.") sct t.name (field_of_stage stage)) | NoOrigin -> Some no_message else None in let version_is_good ~min_version version fmt = let version_is_good = OASISVersion.comparator_apply version (OASISVersion.VGreaterEqual min_version) in Printf.ksprintf (fun str -> if version_is_good then None else Some str) fmt in match origin, t.plugin, t.publication with | _, _, InDev Alpha -> check_feature data.Data.alpha_features Alpha | _, _, InDev Beta -> check_feature data.Data.beta_features Beta | Field(fld, where), None, SinceVersion min_version -> version_is_good ~min_version data.Data.oasis_version (f_ "Field %s in %s is only valid since OASIS v%s, update \ OASISFormat field from '%s' to '%s' after checking \ OASIS changelog.") fld where (string_of_version min_version) (string_of_version data.Data.oasis_version) (string_of_version min_version) | Field(fld, where), Some(plugin_knd, plugin_name, _), SinceVersion min_version -> begin try let plugin_version_current = try match Data.plugin_version plugin_knd plugin_name data with | Some ver -> ver | None -> failwithf (f_ "Field %s in %s is only valid for the OASIS \ plugin %s since v%s, but no plugin version is \ defined in the _oasis file, change '%s' to \ '%s (%s)' in your _oasis file.") fld where plugin_name (string_of_version min_version) plugin_name plugin_name (string_of_version min_version) with Not_found -> failwithf (f_ "Field %s in %s is only valid when the OASIS plugin %s \ is defined.") fld where plugin_name in version_is_good ~min_version plugin_version_current (f_ "Field %s in %s is only valid for the OASIS plugin %s \ since v%s, update your plugin from '%s (%s)' to \ '%s (%s)' after checking the plugin's changelog.") fld where plugin_name (string_of_version min_version) plugin_name (string_of_version plugin_version_current) plugin_name (string_of_version min_version) with Failure msg -> Some msg end | Section sct, None, SinceVersion min_version -> version_is_good ~min_version data.Data.oasis_version (f_ "Section %s is only valid for since OASIS v%s, update \ OASISFormat field from '%s' to '%s' after checking OASIS \ changelog.") sct (string_of_version min_version) (string_of_version data.Data.oasis_version) (string_of_version min_version) | Section sct, Some(plugin_knd, plugin_name, _), SinceVersion min_version -> begin try let plugin_version_current = try match Data.plugin_version plugin_knd plugin_name data with | Some ver -> ver | None -> failwithf (f_ "Section %s is only valid for the OASIS \ plugin %s since v%s, but no plugin version is \ defined in the _oasis file, change '%s' to \ '%s (%s)' in your _oasis file.") sct plugin_name (string_of_version min_version) plugin_name plugin_name (string_of_version min_version) with Not_found -> failwithf (f_ "Section %s is only valid when the OASIS plugin %s \ is defined.") sct plugin_name in version_is_good ~min_version plugin_version_current (f_ "Section %s is only valid for the OASIS plugin %s \ since v%s, update your plugin from '%s (%s)' to \ '%s (%s)' after checking the plugin's changelog.") sct plugin_name (string_of_version min_version) plugin_name (string_of_version plugin_version_current) plugin_name (string_of_version min_version) with Failure msg -> Some msg end | NoOrigin, None, SinceVersion min_version -> version_is_good ~min_version data.Data.oasis_version "%s" no_message | NoOrigin, Some(plugin_knd, plugin_name, _), SinceVersion min_version -> begin try let plugin_version_current = match Data.plugin_version plugin_knd plugin_name data with | Some ver -> ver | None -> raise Not_found in version_is_good ~min_version plugin_version_current "%s" no_message with Not_found -> Some no_message end let data_assert t data origin = match data_check t data origin with | None -> () | Some str -> failwith str let data_test t data = match data_check t data NoOrigin with | None -> true | Some str -> false let package_test t pkg = data_test t (Data.of_package pkg) let create ?plugin name publication description = let () = if Hashtbl.mem all_features name then failwithf "Feature '%s' is already declared." name in let t = { name = name; plugin = plugin; publication = publication; description = description; } in Hashtbl.add all_features name t; t let get_stage name = try (Hashtbl.find all_features name).publication with Not_found -> failwithf (f_ "Feature %s doesn't exist.") name let list () = Hashtbl.fold (fun _ v acc -> v :: acc) all_features [] (* * Real flags. *) let features = create "features_fields" (since_version "0.4") (fun () -> s_ "Enable to experiment not yet official features.") let flag_docs = create "flag_docs" (since_version "0.3") (fun () -> s_ "Building docs require '-docs' flag at configure.") let flag_tests = create "flag_tests" (since_version "0.3") (fun () -> s_ "Running tests require '-tests' flag at configure.") let pack = create "pack" (since_version "0.3") (fun () -> s_ "Allow to create packed library.") let section_object = create "section_object" beta (fun () -> s_ "Implement an object section.") let dynrun_for_release = create "dynrun_for_release" alpha (fun () -> s_ "Make '-setup-update dynamic' suitable for releasing project.") let compiled_setup_ml = create "compiled_setup_ml" alpha (fun () -> s_ "It compiles the setup.ml and speed-up actions done with it.") let disable_oasis_section = create "disable_oasis_section" alpha (fun () -> s_ "Allows the OASIS section comments and digest to be omitted in \ generated files.") let no_automatic_syntax = create "no_automatic_syntax" alpha (fun () -> s_ "Disable the automatic inclusion of -syntax camlp4o for packages \ that matches the internal heuristic (if a dependency ends with \ a .syntax or is a well known syntax).") end module OASISUnixPath = struct (* # 22 "src/oasis/OASISUnixPath.ml" *) type unix_filename = string type unix_dirname = string type host_filename = string type host_dirname = string let current_dir_name = "." let parent_dir_name = ".." let is_current_dir fn = fn = current_dir_name || fn = "" let concat f1 f2 = if is_current_dir f1 then f2 else let f1' = try OASISString.strip_ends_with ~what:"/" f1 with Not_found -> f1 in f1'^"/"^f2 let make = function | hd :: tl -> List.fold_left (fun f p -> concat f p) hd tl | [] -> invalid_arg "OASISUnixPath.make" let dirname f = try String.sub f 0 (String.rindex f '/') with Not_found -> current_dir_name let basename f = try let pos_start = (String.rindex f '/') + 1 in String.sub f pos_start ((String.length f) - pos_start) with Not_found -> f let chop_extension f = try let last_dot = String.rindex f '.' in let sub = String.sub f 0 last_dot in try let last_slash = String.rindex f '/' in if last_slash < last_dot then sub else f with Not_found -> sub with Not_found -> f let capitalize_file f = let dir = dirname f in let base = basename f in concat dir (String.capitalize base) let uncapitalize_file f = let dir = dirname f in let base = basename f in concat dir (String.uncapitalize base) end module OASISHostPath = struct (* # 22 "src/oasis/OASISHostPath.ml" *) open Filename module Unix = OASISUnixPath let make = function | [] -> invalid_arg "OASISHostPath.make" | hd :: tl -> List.fold_left Filename.concat hd tl let of_unix ufn = if Sys.os_type = "Unix" then ufn else make (List.map (fun p -> if p = Unix.current_dir_name then current_dir_name else if p = Unix.parent_dir_name then parent_dir_name else p) (OASISString.nsplit ufn '/')) end module OASISSection = struct (* # 22 "src/oasis/OASISSection.ml" *) open OASISTypes let section_kind_common = function | Library (cs, _, _) -> `Library, cs | Object (cs, _, _) -> `Object, cs | Executable (cs, _, _) -> `Executable, cs | Flag (cs, _) -> `Flag, cs | SrcRepo (cs, _) -> `SrcRepo, cs | Test (cs, _) -> `Test, cs | Doc (cs, _) -> `Doc, cs let section_common sct = snd (section_kind_common sct) let section_common_set cs = function | Library (_, bs, lib) -> Library (cs, bs, lib) | Object (_, bs, obj) -> Object (cs, bs, obj) | Executable (_, bs, exec) -> Executable (cs, bs, exec) | Flag (_, flg) -> Flag (cs, flg) | SrcRepo (_, src_repo) -> SrcRepo (cs, src_repo) | Test (_, tst) -> Test (cs, tst) | Doc (_, doc) -> Doc (cs, doc) (** Key used to identify section *) let section_id sct = let k, cs = section_kind_common sct in k, cs.cs_name let string_of_section sct = let k, nm = section_id sct in (match k with | `Library -> "library" | `Object -> "object" | `Executable -> "executable" | `Flag -> "flag" | `SrcRepo -> "src repository" | `Test -> "test" | `Doc -> "doc") ^" "^nm let section_find id scts = List.find (fun sct -> id = section_id sct) scts module CSection = struct type t = section let id = section_id let compare t1 t2 = compare (id t1) (id t2) let equal t1 t2 = (id t1) = (id t2) let hash t = Hashtbl.hash (id t) end module MapSection = Map.Make(CSection) module SetSection = Set.Make(CSection) end module OASISBuildSection = struct (* # 22 "src/oasis/OASISBuildSection.ml" *) end module OASISExecutable = struct (* # 22 "src/oasis/OASISExecutable.ml" *) open OASISTypes let unix_exec_is (cs, bs, exec) is_native ext_dll suffix_program = let dir = OASISUnixPath.concat bs.bs_path (OASISUnixPath.dirname exec.exec_main_is) in let is_native_exec = match bs.bs_compiled_object with | Native -> true | Best -> is_native () | Byte -> false in OASISUnixPath.concat dir (cs.cs_name^(suffix_program ())), if not is_native_exec && not exec.exec_custom && bs.bs_c_sources <> [] then Some (dir^"/dll"^cs.cs_name^"_stubs"^(ext_dll ())) else None end module OASISLibrary = struct (* # 22 "src/oasis/OASISLibrary.ml" *) open OASISTypes open OASISUtils open OASISGettext open OASISSection (* Look for a module file, considering capitalization or not. *) let find_module source_file_exists bs modul = let possible_base_fn = List.map (OASISUnixPath.concat bs.bs_path) [modul; OASISUnixPath.uncapitalize_file modul; OASISUnixPath.capitalize_file modul] in (* TODO: we should be able to be able to determine the source for every * files. Hence we should introduce a Module(source: fn) for the fields * Modules and InternalModules *) List.fold_left (fun acc base_fn -> match acc with | `No_sources _ -> begin let file_found = List.fold_left (fun acc ext -> if source_file_exists (base_fn^ext) then (base_fn^ext) :: acc else acc) [] [".ml"; ".mli"; ".mll"; ".mly"] in match file_found with | [] -> acc | lst -> `Sources (base_fn, lst) end | `Sources _ -> acc) (`No_sources possible_base_fn) possible_base_fn let source_unix_files ~ctxt (cs, bs, lib) source_file_exists = List.fold_left (fun acc modul -> match find_module source_file_exists bs modul with | `Sources (base_fn, lst) -> (base_fn, lst) :: acc | `No_sources _ -> OASISMessage.warning ~ctxt (f_ "Cannot find source file matching \ module '%s' in library %s") modul cs.cs_name; acc) [] (lib.lib_modules @ lib.lib_internal_modules) let generated_unix_files ~ctxt ~is_native ~has_native_dynlink ~ext_lib ~ext_dll ~source_file_exists (cs, bs, lib) = let find_modules lst ext = let find_module modul = match find_module source_file_exists bs modul with | `Sources (base_fn, [fn]) when ext <> "cmi" && Filename.check_suffix fn ".mli" -> None (* No implementation files for pure interface. *) | `Sources (base_fn, _) -> Some [base_fn] | `No_sources lst -> OASISMessage.warning ~ctxt (f_ "Cannot find source file matching \ module '%s' in library %s") modul cs.cs_name; Some lst in List.fold_left (fun acc nm -> match find_module nm with | None -> acc | Some base_fns -> List.map (fun base_fn -> base_fn ^"."^ext) base_fns :: acc) [] lst in (* The .cmx that be compiled along *) let cmxs = let should_be_built = match bs.bs_compiled_object with | Native -> true | Best -> is_native | Byte -> false in if should_be_built then if lib.lib_pack then find_modules [cs.cs_name] "cmx" else find_modules (lib.lib_modules @ lib.lib_internal_modules) "cmx" else [] in let acc_nopath = [] in (* The headers and annot/cmt files that should be compiled along *) let headers = let sufx = if lib.lib_pack then [".cmti"; ".cmt"; ".annot"] else [".cmi"; ".cmti"; ".cmt"; ".annot"] in List.map begin List.fold_left begin fun accu s -> let dot = String.rindex s '.' in let base = String.sub s 0 dot in List.map ((^) base) sufx @ accu end [] end (find_modules lib.lib_modules "cmi") in (* Compute what libraries should be built *) let acc_nopath = (* Add the packed header file if required *) let add_pack_header acc = if lib.lib_pack then [cs.cs_name^".cmi"; cs.cs_name^".cmti"; cs.cs_name^".cmt"] :: acc else acc in let byte acc = add_pack_header ([cs.cs_name^".cma"] :: acc) in let native acc = let acc = add_pack_header (if has_native_dynlink then [cs.cs_name^".cmxs"] :: acc else acc) in [cs.cs_name^".cmxa"] :: [cs.cs_name^ext_lib] :: acc in match bs.bs_compiled_object with | Native -> byte (native acc_nopath) | Best when is_native -> byte (native acc_nopath) | Byte | Best -> byte acc_nopath in (* Add C library to be built *) let acc_nopath = if bs.bs_c_sources <> [] then begin ["lib"^cs.cs_name^"_stubs"^ext_lib] :: ["dll"^cs.cs_name^"_stubs"^ext_dll] :: acc_nopath end else acc_nopath in (* All the files generated *) List.rev_append (List.rev_map (List.rev_map (OASISUnixPath.concat bs.bs_path)) acc_nopath) (headers @ cmxs) end module OASISObject = struct (* # 22 "src/oasis/OASISObject.ml" *) open OASISTypes open OASISGettext let source_unix_files ~ctxt (cs, bs, obj) source_file_exists = List.fold_left (fun acc modul -> match OASISLibrary.find_module source_file_exists bs modul with | `Sources (base_fn, lst) -> (base_fn, lst) :: acc | `No_sources _ -> OASISMessage.warning ~ctxt (f_ "Cannot find source file matching \ module '%s' in object %s") modul cs.cs_name; acc) [] obj.obj_modules let generated_unix_files ~ctxt ~is_native ~source_file_exists (cs, bs, obj) = let find_module ext modul = match OASISLibrary.find_module source_file_exists bs modul with | `Sources (base_fn, _) -> [base_fn ^ ext] | `No_sources lst -> OASISMessage.warning ~ctxt (f_ "Cannot find source file matching \ module '%s' in object %s") modul cs.cs_name ; lst in let header, byte, native, c_object, f = match obj.obj_modules with | [ m ] -> (find_module ".cmi" m, find_module ".cmo" m, find_module ".cmx" m, find_module ".o" m, fun x -> x) | _ -> ([cs.cs_name ^ ".cmi"], [cs.cs_name ^ ".cmo"], [cs.cs_name ^ ".cmx"], [cs.cs_name ^ ".o"], OASISUnixPath.concat bs.bs_path) in List.map (List.map f) ( match bs.bs_compiled_object with | Native -> native :: c_object :: byte :: header :: [] | Best when is_native -> native :: c_object :: byte :: header :: [] | Byte | Best -> byte :: header :: []) end module OASISFindlib = struct (* # 22 "src/oasis/OASISFindlib.ml" *) open OASISTypes open OASISUtils open OASISGettext open OASISSection type library_name = name type findlib_part_name = name type 'a map_of_findlib_part_name = 'a OASISUtils.MapString.t exception InternalLibraryNotFound of library_name exception FindlibPackageNotFound of findlib_name type group_t = | Container of findlib_name * group_t list | Package of (findlib_name * common_section * build_section * [`Library of library | `Object of object_] * group_t list) type data = common_section * build_section * [`Library of library | `Object of object_] type tree = | Node of (data option) * (tree MapString.t) | Leaf of data let findlib_mapping pkg = (* Map from library name to either full findlib name or parts + parent. *) let fndlb_parts_of_lib_name = let fndlb_parts cs lib = let name = match lib.lib_findlib_name with | Some nm -> nm | None -> cs.cs_name in let name = String.concat "." (lib.lib_findlib_containers @ [name]) in name in List.fold_left (fun mp -> function | Library (cs, _, lib) -> begin let lib_name = cs.cs_name in let fndlb_parts = fndlb_parts cs lib in if MapString.mem lib_name mp then failwithf (f_ "The library name '%s' is used more than once.") lib_name; match lib.lib_findlib_parent with | Some lib_name_parent -> MapString.add lib_name (`Unsolved (lib_name_parent, fndlb_parts)) mp | None -> MapString.add lib_name (`Solved fndlb_parts) mp end | Object (cs, _, obj) -> begin let obj_name = cs.cs_name in if MapString.mem obj_name mp then failwithf (f_ "The object name '%s' is used more than once.") obj_name; let findlib_full_name = match obj.obj_findlib_fullname with | Some ns -> String.concat "." ns | None -> obj_name in MapString.add obj_name (`Solved findlib_full_name) mp end | Executable _ | Test _ | Flag _ | SrcRepo _ | Doc _ -> mp) MapString.empty pkg.sections in (* Solve the above graph to be only library name to full findlib name. *) let fndlb_name_of_lib_name = let rec solve visited mp lib_name lib_name_child = if SetString.mem lib_name visited then failwithf (f_ "Library '%s' is involved in a cycle \ with regard to findlib naming.") lib_name; let visited = SetString.add lib_name visited in try match MapString.find lib_name mp with | `Solved fndlb_nm -> fndlb_nm, mp | `Unsolved (lib_nm_parent, post_fndlb_nm) -> let pre_fndlb_nm, mp = solve visited mp lib_nm_parent lib_name in let fndlb_nm = pre_fndlb_nm^"."^post_fndlb_nm in fndlb_nm, MapString.add lib_name (`Solved fndlb_nm) mp with Not_found -> failwithf (f_ "Library '%s', which is defined as the findlib parent of \ library '%s', doesn't exist.") lib_name lib_name_child in let mp = MapString.fold (fun lib_name status mp -> match status with | `Solved _ -> (* Solved initialy, no need to go further *) mp | `Unsolved _ -> let _, mp = solve SetString.empty mp lib_name "" in mp) fndlb_parts_of_lib_name fndlb_parts_of_lib_name in MapString.map (function | `Solved fndlb_nm -> fndlb_nm | `Unsolved _ -> assert false) mp in (* Convert an internal library name to a findlib name. *) let findlib_name_of_library_name lib_nm = try MapString.find lib_nm fndlb_name_of_lib_name with Not_found -> raise (InternalLibraryNotFound lib_nm) in (* Add a library to the tree. *) let add sct mp = let fndlb_fullname = let cs, _, _ = sct in let lib_name = cs.cs_name in findlib_name_of_library_name lib_name in let rec add_children nm_lst (children: tree MapString.t) = match nm_lst with | (hd :: tl) -> begin let node = try add_node tl (MapString.find hd children) with Not_found -> (* New node *) new_node tl in MapString.add hd node children end | [] -> (* Should not have a nameless library. *) assert false and add_node tl node = if tl = [] then begin match node with | Node (None, children) -> Node (Some sct, children) | Leaf (cs', _, _) | Node (Some (cs', _, _), _) -> (* TODO: allow to merge Package, i.e. * archive(byte) = "foo.cma foo_init.cmo" *) let cs, _, _ = sct in failwithf (f_ "Library '%s' and '%s' have the same findlib name '%s'") cs.cs_name cs'.cs_name fndlb_fullname end else begin match node with | Leaf data -> Node (Some data, add_children tl MapString.empty) | Node (data_opt, children) -> Node (data_opt, add_children tl children) end and new_node = function | [] -> Leaf sct | hd :: tl -> Node (None, MapString.add hd (new_node tl) MapString.empty) in add_children (OASISString.nsplit fndlb_fullname '.') mp in let rec group_of_tree mp = MapString.fold (fun nm node acc -> let cur = match node with | Node (Some (cs, bs, lib), children) -> Package (nm, cs, bs, lib, group_of_tree children) | Node (None, children) -> Container (nm, group_of_tree children) | Leaf (cs, bs, lib) -> Package (nm, cs, bs, lib, []) in cur :: acc) mp [] in let group_mp = List.fold_left (fun mp -> function | Library (cs, bs, lib) -> add (cs, bs, `Library lib) mp | Object (cs, bs, obj) -> add (cs, bs, `Object obj) mp | _ -> mp) MapString.empty pkg.sections in let groups = group_of_tree group_mp in let library_name_of_findlib_name = lazy begin (* Revert findlib_name_of_library_name. *) MapString.fold (fun k v mp -> MapString.add v k mp) fndlb_name_of_lib_name MapString.empty end in let library_name_of_findlib_name fndlb_nm = try MapString.find fndlb_nm (Lazy.force library_name_of_findlib_name) with Not_found -> raise (FindlibPackageNotFound fndlb_nm) in groups, findlib_name_of_library_name, library_name_of_findlib_name let findlib_of_group = function | Container (fndlb_nm, _) | Package (fndlb_nm, _, _, _, _) -> fndlb_nm let root_of_group grp = let rec root_lib_aux = (* We do a DFS in the group. *) function | Container (_, children) -> List.fold_left (fun res grp -> if res = None then root_lib_aux grp else res) None children | Package (_, cs, bs, lib, _) -> Some (cs, bs, lib) in match root_lib_aux grp with | Some res -> res | None -> failwithf (f_ "Unable to determine root library of findlib library '%s'") (findlib_of_group grp) end module OASISFlag = struct (* # 22 "src/oasis/OASISFlag.ml" *) end module OASISPackage = struct (* # 22 "src/oasis/OASISPackage.ml" *) end module OASISSourceRepository = struct (* # 22 "src/oasis/OASISSourceRepository.ml" *) end module OASISTest = struct (* # 22 "src/oasis/OASISTest.ml" *) end module OASISDocument = struct (* # 22 "src/oasis/OASISDocument.ml" *) end module OASISExec = struct (* # 22 "src/oasis/OASISExec.ml" *) open OASISGettext open OASISUtils open OASISMessage (* TODO: I don't like this quote, it is there because $(rm) foo expands to * 'rm -f' foo... *) let run ~ctxt ?f_exit_code ?(quote=true) cmd args = let cmd = if quote then if Sys.os_type = "Win32" then if String.contains cmd ' ' then (* Double the 1st double quote... win32... sigh *) "\""^(Filename.quote cmd) else cmd else Filename.quote cmd else cmd in let cmdline = String.concat " " (cmd :: args) in info ~ctxt (f_ "Running command '%s'") cmdline; match f_exit_code, Sys.command cmdline with | None, 0 -> () | None, i -> failwithf (f_ "Command '%s' terminated with error code %d") cmdline i | Some f, i -> f i let run_read_output ~ctxt ?f_exit_code cmd args = let fn = Filename.temp_file "oasis-" ".txt" in try begin let () = run ~ctxt ?f_exit_code cmd (args @ [">"; Filename.quote fn]) in let chn = open_in fn in let routput = ref [] in begin try while true do routput := (input_line chn) :: !routput done with End_of_file -> () end; close_in chn; Sys.remove fn; List.rev !routput end with e -> (try Sys.remove fn with _ -> ()); raise e let run_read_one_line ~ctxt ?f_exit_code cmd args = match run_read_output ~ctxt ?f_exit_code cmd args with | [fst] -> fst | lst -> failwithf (f_ "Command return unexpected output %S") (String.concat "\n" lst) end module OASISFileUtil = struct (* # 22 "src/oasis/OASISFileUtil.ml" *) open OASISGettext let file_exists_case fn = let dirname = Filename.dirname fn in let basename = Filename.basename fn in if Sys.file_exists dirname then if basename = Filename.current_dir_name then true else List.mem basename (Array.to_list (Sys.readdir dirname)) else false let find_file ?(case_sensitive=true) paths exts = (* Cardinal product of two list *) let ( * ) lst1 lst2 = List.flatten (List.map (fun a -> List.map (fun b -> a, b) lst2) lst1) in let rec combined_paths lst = match lst with | p1 :: p2 :: tl -> let acc = (List.map (fun (a, b) -> Filename.concat a b) (p1 * p2)) in combined_paths (acc :: tl) | [e] -> e | [] -> [] in let alternatives = List.map (fun (p, e) -> if String.length e > 0 && e.[0] <> '.' then p ^ "." ^ e else p ^ e) ((combined_paths paths) * exts) in List.find (fun file -> (if case_sensitive then file_exists_case file else Sys.file_exists file) && not (Sys.is_directory file) ) alternatives let which ~ctxt prg = let path_sep = match Sys.os_type with | "Win32" -> ';' | _ -> ':' in let path_lst = OASISString.nsplit (Sys.getenv "PATH") path_sep in let exec_ext = match Sys.os_type with | "Win32" -> "" :: (OASISString.nsplit (Sys.getenv "PATHEXT") path_sep) | _ -> [""] in find_file ~case_sensitive:false [path_lst; [prg]] exec_ext (**/**) let rec fix_dir dn = (* Windows hack because Sys.file_exists "src\\" = false when * Sys.file_exists "src" = true *) let ln = String.length dn in if Sys.os_type = "Win32" && ln > 0 && dn.[ln - 1] = '\\' then fix_dir (String.sub dn 0 (ln - 1)) else dn let q = Filename.quote (**/**) let cp ~ctxt ?(recurse=false) src tgt = if recurse then match Sys.os_type with | "Win32" -> OASISExec.run ~ctxt "xcopy" [q src; q tgt; "/E"] | _ -> OASISExec.run ~ctxt "cp" ["-r"; q src; q tgt] else OASISExec.run ~ctxt (match Sys.os_type with | "Win32" -> "copy" | _ -> "cp") [q src; q tgt] let mkdir ~ctxt tgt = OASISExec.run ~ctxt (match Sys.os_type with | "Win32" -> "md" | _ -> "mkdir") [q tgt] let rec mkdir_parent ~ctxt f tgt = let tgt = fix_dir tgt in if Sys.file_exists tgt then begin if not (Sys.is_directory tgt) then OASISUtils.failwithf (f_ "Cannot create directory '%s', a file of the same name already \ exists") tgt end else begin mkdir_parent ~ctxt f (Filename.dirname tgt); if not (Sys.file_exists tgt) then begin f tgt; mkdir ~ctxt tgt end end let rmdir ~ctxt tgt = if Sys.readdir tgt = [||] then begin match Sys.os_type with | "Win32" -> OASISExec.run ~ctxt "rd" [q tgt] | _ -> OASISExec.run ~ctxt "rm" ["-r"; q tgt] end else begin OASISMessage.error ~ctxt (f_ "Cannot remove directory '%s': not empty.") tgt end let glob ~ctxt fn = let basename = Filename.basename fn in if String.length basename >= 2 && basename.[0] = '*' && basename.[1] = '.' then begin let ext_len = (String.length basename) - 2 in let ext = String.sub basename 2 ext_len in let dirname = Filename.dirname fn in Array.fold_left (fun acc fn -> try let fn_ext = String.sub fn ((String.length fn) - ext_len) ext_len in if fn_ext = ext then (Filename.concat dirname fn) :: acc else acc with Invalid_argument _ -> acc) [] (Sys.readdir dirname) end else begin if file_exists_case fn then [fn] else [] end end # 2893 "setup.ml" module BaseEnvLight = struct (* # 22 "src/base/BaseEnvLight.ml" *) module MapString = Map.Make(String) type t = string MapString.t let default_filename = Filename.concat (Sys.getcwd ()) "setup.data" let load ?(allow_empty=false) ?(filename=default_filename) () = if Sys.file_exists filename then begin let chn = open_in_bin filename in let st = Stream.of_channel chn in let line = ref 1 in let st_line = Stream.from (fun _ -> try match Stream.next st with | '\n' -> incr line; Some '\n' | c -> Some c with Stream.Failure -> None) in let lexer = Genlex.make_lexer ["="] st_line in let rec read_file mp = match Stream.npeek 3 lexer with | [Genlex.Ident nm; Genlex.Kwd "="; Genlex.String value] -> Stream.junk lexer; Stream.junk lexer; Stream.junk lexer; read_file (MapString.add nm value mp) | [] -> mp | _ -> failwith (Printf.sprintf "Malformed data file '%s' line %d" filename !line) in let mp = read_file MapString.empty in close_in chn; mp end else if allow_empty then begin MapString.empty end else begin failwith (Printf.sprintf "Unable to load environment, the file '%s' doesn't exist." filename) end let rec var_expand str env = let buff = Buffer.create ((String.length str) * 2) in Buffer.add_substitute buff (fun var -> try var_expand (MapString.find var env) env with Not_found -> failwith (Printf.sprintf "No variable %s defined when trying to expand %S." var str)) str; Buffer.contents buff let var_get name env = var_expand (MapString.find name env) env let var_choose lst env = OASISExpr.choose (fun nm -> var_get nm env) lst end # 2998 "setup.ml" module BaseContext = struct (* # 22 "src/base/BaseContext.ml" *) (* TODO: get rid of this module. *) open OASISContext let args () = fst (fspecs ()) let default = default end module BaseMessage = struct (* # 22 "src/base/BaseMessage.ml" *) (** Message to user, overrid for Base @author Sylvain Le Gall *) open OASISMessage open BaseContext let debug fmt = debug ~ctxt:!default fmt let info fmt = info ~ctxt:!default fmt let warning fmt = warning ~ctxt:!default fmt let error fmt = error ~ctxt:!default fmt end module BaseEnv = struct (* # 22 "src/base/BaseEnv.ml" *) open OASISGettext open OASISUtils open PropList module MapString = BaseEnvLight.MapString type origin_t = | ODefault | OGetEnv | OFileLoad | OCommandLine type cli_handle_t = | CLINone | CLIAuto | CLIWith | CLIEnable | CLIUser of (Arg.key * Arg.spec * Arg.doc) list type definition_t = { hide: bool; dump: bool; cli: cli_handle_t; arg_help: string option; group: string option; } let schema = Schema.create "environment" (* Environment data *) let env = Data.create () (* Environment data from file *) let env_from_file = ref MapString.empty (* Lexer for var *) let var_lxr = Genlex.make_lexer [] let rec var_expand str = let buff = Buffer.create ((String.length str) * 2) in Buffer.add_substitute buff (fun var -> try (* TODO: this is a quick hack to allow calling Test.Command * without defining executable name really. I.e. if there is * an exec Executable toto, then $(toto) should be replace * by its real name. It is however useful to have this function * for other variable that depend on the host and should be * written better than that. *) let st = var_lxr (Stream.of_string var) in match Stream.npeek 3 st with | [Genlex.Ident "utoh"; Genlex.Ident nm] -> OASISHostPath.of_unix (var_get nm) | [Genlex.Ident "utoh"; Genlex.String s] -> OASISHostPath.of_unix s | [Genlex.Ident "ocaml_escaped"; Genlex.Ident nm] -> String.escaped (var_get nm) | [Genlex.Ident "ocaml_escaped"; Genlex.String s] -> String.escaped s | [Genlex.Ident nm] -> var_get nm | _ -> failwithf (f_ "Unknown expression '%s' in variable expansion of %s.") var str with | Unknown_field (_, _) -> failwithf (f_ "No variable %s defined when trying to expand %S.") var str | Stream.Error e -> failwithf (f_ "Syntax error when parsing '%s' when trying to \ expand %S: %s") var str e) str; Buffer.contents buff and var_get name = let vl = try Schema.get schema env name with Unknown_field _ as e -> begin try MapString.find name !env_from_file with Not_found -> raise e end in var_expand vl let var_choose ?printer ?name lst = OASISExpr.choose ?printer ?name var_get lst let var_protect vl = let buff = Buffer.create (String.length vl) in String.iter (function | '$' -> Buffer.add_string buff "\\$" | c -> Buffer.add_char buff c) vl; Buffer.contents buff let var_define ?(hide=false) ?(dump=true) ?short_desc ?(cli=CLINone) ?arg_help ?group name (* TODO: type constraint on the fact that name must be a valid OCaml id *) dflt = let default = [ OFileLoad, (fun () -> MapString.find name !env_from_file); ODefault, dflt; OGetEnv, (fun () -> Sys.getenv name); ] in let extra = { hide = hide; dump = dump; cli = cli; arg_help = arg_help; group = group; } in (* Try to find a value that can be defined *) let var_get_low lst = let errors, res = List.fold_left (fun (errors, res) (o, v) -> if res = None then begin try errors, Some (v ()) with | Not_found -> errors, res | Failure rsn -> (rsn :: errors), res | e -> (Printexc.to_string e) :: errors, res end else errors, res) ([], None) (List.sort (fun (o1, _) (o2, _) -> Pervasives.compare o2 o1) lst) in match res, errors with | Some v, _ -> v | None, [] -> raise (Not_set (name, None)) | None, lst -> raise (Not_set (name, Some (String.concat (s_ ", ") lst))) in let help = match short_desc with | Some fs -> Some fs | None -> None in let var_get_lst = FieldRO.create ~schema ~name ~parse:(fun ?(context=ODefault) s -> [context, fun () -> s]) ~print:var_get_low ~default ~update:(fun ?context x old_x -> x @ old_x) ?help extra in fun () -> var_expand (var_get_low (var_get_lst env)) let var_redefine ?hide ?dump ?short_desc ?cli ?arg_help ?group name dflt = if Schema.mem schema name then begin (* TODO: look suspsicious, we want to memorize dflt not dflt () *) Schema.set schema env ~context:ODefault name (dflt ()); fun () -> var_get name end else begin var_define ?hide ?dump ?short_desc ?cli ?arg_help ?group name dflt end let var_ignore (e: unit -> string) = () let print_hidden = var_define ~hide:true ~dump:false ~cli:CLIAuto ~arg_help:"Print even non-printable variable. (debug)" "print_hidden" (fun () -> "false") let var_all () = List.rev (Schema.fold (fun acc nm def _ -> if not def.hide || bool_of_string (print_hidden ()) then nm :: acc else acc) [] schema) let default_filename = BaseEnvLight.default_filename let load ?allow_empty ?filename () = env_from_file := BaseEnvLight.load ?allow_empty ?filename () let unload () = env_from_file := MapString.empty; Data.clear env let dump ?(filename=default_filename) () = let chn = open_out_bin filename in let output nm value = Printf.fprintf chn "%s=%S\n" nm value in let mp_todo = (* Dump data from schema *) Schema.fold (fun mp_todo nm def _ -> if def.dump then begin try let value = Schema.get schema env nm in output nm value with Not_set _ -> () end; MapString.remove nm mp_todo) !env_from_file schema in (* Dump data defined outside of schema *) MapString.iter output mp_todo; (* End of the dump *) close_out chn let print () = let printable_vars = Schema.fold (fun acc nm def short_descr_opt -> if not def.hide || bool_of_string (print_hidden ()) then begin try let value = Schema.get schema env nm in let txt = match short_descr_opt with | Some s -> s () | None -> nm in (txt, value) :: acc with Not_set _ -> acc end else acc) [] schema in let max_length = List.fold_left max 0 (List.rev_map String.length (List.rev_map fst printable_vars)) in let dot_pad str = String.make ((max_length - (String.length str)) + 3) '.' in Printf.printf "\nConfiguration: \n"; List.iter (fun (name, value) -> Printf.printf "%s: %s %s\n" name (dot_pad name) value) (List.rev printable_vars); Printf.printf "\n%!" let args () = let arg_concat = OASISUtils.varname_concat ~hyphen:'-' in [ "--override", Arg.Tuple ( let rvr = ref "" in let rvl = ref "" in [ Arg.Set_string rvr; Arg.Set_string rvl; Arg.Unit (fun () -> Schema.set schema env ~context:OCommandLine !rvr !rvl) ] ), "var+val Override any configuration variable."; ] @ List.flatten (Schema.fold (fun acc name def short_descr_opt -> let var_set s = Schema.set schema env ~context:OCommandLine name s in let arg_name = OASISUtils.varname_of_string ~hyphen:'-' name in let hlp = match short_descr_opt with | Some txt -> txt () | None -> "" in let arg_hlp = match def.arg_help with | Some s -> s | None -> "str" in let default_value = try Printf.sprintf (f_ " [%s]") (Schema.get schema env name) with Not_set _ -> "" in let args = match def.cli with | CLINone -> [] | CLIAuto -> [ arg_concat "--" arg_name, Arg.String var_set, Printf.sprintf (f_ "%s %s%s") arg_hlp hlp default_value ] | CLIWith -> [ arg_concat "--with-" arg_name, Arg.String var_set, Printf.sprintf (f_ "%s %s%s") arg_hlp hlp default_value ] | CLIEnable -> let dflt = if default_value = " [true]" then s_ " [default: enabled]" else s_ " [default: disabled]" in [ arg_concat "--enable-" arg_name, Arg.Unit (fun () -> var_set "true"), Printf.sprintf (f_ " %s%s") hlp dflt; arg_concat "--disable-" arg_name, Arg.Unit (fun () -> var_set "false"), Printf.sprintf (f_ " %s%s") hlp dflt ] | CLIUser lst -> lst in args :: acc) [] schema) end module BaseArgExt = struct (* # 22 "src/base/BaseArgExt.ml" *) open OASISUtils open OASISGettext let parse argv args = (* Simulate command line for Arg *) let current = ref 0 in try Arg.parse_argv ~current:current (Array.concat [[|"none"|]; argv]) (Arg.align args) (failwithf (f_ "Don't know what to do with arguments: '%s'")) (s_ "configure options:") with | Arg.Help txt -> print_endline txt; exit 0 | Arg.Bad txt -> prerr_endline txt; exit 1 end module BaseCheck = struct (* # 22 "src/base/BaseCheck.ml" *) open BaseEnv open BaseMessage open OASISUtils open OASISGettext let prog_best prg prg_lst = var_redefine prg (fun () -> let alternate = List.fold_left (fun res e -> match res with | Some _ -> res | None -> try Some (OASISFileUtil.which ~ctxt:!BaseContext.default e) with Not_found -> None) None prg_lst in match alternate with | Some prg -> prg | None -> raise Not_found) let prog prg = prog_best prg [prg] let prog_opt prg = prog_best prg [prg^".opt"; prg] let ocamlfind = prog "ocamlfind" let version var_prefix cmp fversion () = (* Really compare version provided *) let var = var_prefix^"_version_"^(OASISVersion.varname_of_comparator cmp) in var_redefine ~hide:true var (fun () -> let version_str = match fversion () with | "[Distributed with OCaml]" -> begin try (var_get "ocaml_version") with Not_found -> warning (f_ "Variable ocaml_version not defined, fallback \ to default"); Sys.ocaml_version end | res -> res in let version = OASISVersion.version_of_string version_str in if OASISVersion.comparator_apply version cmp then version_str else failwithf (f_ "Cannot satisfy version constraint on %s: %s (version: %s)") var_prefix (OASISVersion.string_of_comparator cmp) version_str) () let package_version pkg = OASISExec.run_read_one_line ~ctxt:!BaseContext.default (ocamlfind ()) ["query"; "-format"; "%v"; pkg] let package ?version_comparator pkg () = let var = OASISUtils.varname_concat "pkg_" (OASISUtils.varname_of_string pkg) in let findlib_dir pkg = let dir = OASISExec.run_read_one_line ~ctxt:!BaseContext.default (ocamlfind ()) ["query"; "-format"; "%d"; pkg] in if Sys.file_exists dir && Sys.is_directory dir then dir else failwithf (f_ "When looking for findlib package %s, \ directory %s return doesn't exist") pkg dir in let vl = var_redefine var (fun () -> findlib_dir pkg) () in ( match version_comparator with | Some ver_cmp -> ignore (version var ver_cmp (fun _ -> package_version pkg) ()) | None -> () ); vl end module BaseOCamlcConfig = struct (* # 22 "src/base/BaseOCamlcConfig.ml" *) open BaseEnv open OASISUtils open OASISGettext module SMap = Map.Make(String) let ocamlc = BaseCheck.prog_opt "ocamlc" let ocamlc_config_map = (* Map name to value for ocamlc -config output (name ^": "^value) *) let rec split_field mp lst = match lst with | line :: tl -> let mp = try let pos_semicolon = String.index line ':' in if pos_semicolon > 1 then ( let name = String.sub line 0 pos_semicolon in let linelen = String.length line in let value = if linelen > pos_semicolon + 2 then String.sub line (pos_semicolon + 2) (linelen - pos_semicolon - 2) else "" in SMap.add name value mp ) else ( mp ) with Not_found -> ( mp ) in split_field mp tl | [] -> mp in let cache = lazy (var_protect (Marshal.to_string (split_field SMap.empty (OASISExec.run_read_output ~ctxt:!BaseContext.default (ocamlc ()) ["-config"])) [])) in var_redefine "ocamlc_config_map" ~hide:true ~dump:false (fun () -> (* TODO: update if ocamlc change !!! *) Lazy.force cache) let var_define nm = (* Extract data from ocamlc -config *) let avlbl_config_get () = Marshal.from_string (ocamlc_config_map ()) 0 in let chop_version_suffix s = try String.sub s 0 (String.index s '+') with _ -> s in let nm_config, value_config = match nm with | "ocaml_version" -> "version", chop_version_suffix | _ -> nm, (fun x -> x) in var_redefine nm (fun () -> try let map = avlbl_config_get () in let value = SMap.find nm_config map in value_config value with Not_found -> failwithf (f_ "Cannot find field '%s' in '%s -config' output") nm (ocamlc ())) end module BaseStandardVar = struct (* # 22 "src/base/BaseStandardVar.ml" *) open OASISGettext open OASISTypes open OASISExpr open BaseCheck open BaseEnv let ocamlfind = BaseCheck.ocamlfind let ocamlc = BaseOCamlcConfig.ocamlc let ocamlopt = prog_opt "ocamlopt" let ocamlbuild = prog "ocamlbuild" (**/**) let rpkg = ref None let pkg_get () = match !rpkg with | Some pkg -> pkg | None -> failwith (s_ "OASIS Package is not set") let var_cond = ref [] let var_define_cond ~since_version f dflt = let holder = ref (fun () -> dflt) in let since_version = OASISVersion.VGreaterEqual (OASISVersion.version_of_string since_version) in var_cond := (fun ver -> if OASISVersion.comparator_apply ver since_version then holder := f ()) :: !var_cond; fun () -> !holder () (**/**) let pkg_name = var_define ~short_desc:(fun () -> s_ "Package name") "pkg_name" (fun () -> (pkg_get ()).name) let pkg_version = var_define ~short_desc:(fun () -> s_ "Package version") "pkg_version" (fun () -> (OASISVersion.string_of_version (pkg_get ()).version)) let c = BaseOCamlcConfig.var_define let os_type = c "os_type" let system = c "system" let architecture = c "architecture" let ccomp_type = c "ccomp_type" let ocaml_version = c "ocaml_version" (* TODO: Check standard variable presence at runtime *) let standard_library_default = c "standard_library_default" let standard_library = c "standard_library" let standard_runtime = c "standard_runtime" let bytecomp_c_compiler = c "bytecomp_c_compiler" let native_c_compiler = c "native_c_compiler" let model = c "model" let ext_obj = c "ext_obj" let ext_asm = c "ext_asm" let ext_lib = c "ext_lib" let ext_dll = c "ext_dll" let default_executable_name = c "default_executable_name" let systhread_supported = c "systhread_supported" let flexlink = BaseCheck.prog "flexlink" let flexdll_version = var_define ~short_desc:(fun () -> "FlexDLL version (Win32)") "flexdll_version" (fun () -> let lst = OASISExec.run_read_output ~ctxt:!BaseContext.default (flexlink ()) ["-help"] in match lst with | line :: _ -> Scanf.sscanf line "FlexDLL version %s" (fun ver -> ver) | [] -> raise Not_found) (**/**) let p name hlp dflt = var_define ~short_desc:hlp ~cli:CLIAuto ~arg_help:"dir" name dflt let (/) a b = if os_type () = Sys.os_type then Filename.concat a b else if os_type () = "Unix" then OASISUnixPath.concat a b else OASISUtils.failwithf (f_ "Cannot handle os_type %s filename concat") (os_type ()) (**/**) let prefix = p "prefix" (fun () -> s_ "Install architecture-independent files dir") (fun () -> match os_type () with | "Win32" -> let program_files = Sys.getenv "PROGRAMFILES" in program_files/(pkg_name ()) | _ -> "/usr/local") let exec_prefix = p "exec_prefix" (fun () -> s_ "Install architecture-dependent files in dir") (fun () -> "$prefix") let bindir = p "bindir" (fun () -> s_ "User executables") (fun () -> "$exec_prefix"/"bin") let sbindir = p "sbindir" (fun () -> s_ "System admin executables") (fun () -> "$exec_prefix"/"sbin") let libexecdir = p "libexecdir" (fun () -> s_ "Program executables") (fun () -> "$exec_prefix"/"libexec") let sysconfdir = p "sysconfdir" (fun () -> s_ "Read-only single-machine data") (fun () -> "$prefix"/"etc") let sharedstatedir = p "sharedstatedir" (fun () -> s_ "Modifiable architecture-independent data") (fun () -> "$prefix"/"com") let localstatedir = p "localstatedir" (fun () -> s_ "Modifiable single-machine data") (fun () -> "$prefix"/"var") let libdir = p "libdir" (fun () -> s_ "Object code libraries") (fun () -> "$exec_prefix"/"lib") let datarootdir = p "datarootdir" (fun () -> s_ "Read-only arch-independent data root") (fun () -> "$prefix"/"share") let datadir = p "datadir" (fun () -> s_ "Read-only architecture-independent data") (fun () -> "$datarootdir") let infodir = p "infodir" (fun () -> s_ "Info documentation") (fun () -> "$datarootdir"/"info") let localedir = p "localedir" (fun () -> s_ "Locale-dependent data") (fun () -> "$datarootdir"/"locale") let mandir = p "mandir" (fun () -> s_ "Man documentation") (fun () -> "$datarootdir"/"man") let docdir = p "docdir" (fun () -> s_ "Documentation root") (fun () -> "$datarootdir"/"doc"/"$pkg_name") let htmldir = p "htmldir" (fun () -> s_ "HTML documentation") (fun () -> "$docdir") let dvidir = p "dvidir" (fun () -> s_ "DVI documentation") (fun () -> "$docdir") let pdfdir = p "pdfdir" (fun () -> s_ "PDF documentation") (fun () -> "$docdir") let psdir = p "psdir" (fun () -> s_ "PS documentation") (fun () -> "$docdir") let destdir = p "destdir" (fun () -> s_ "Prepend a path when installing package") (fun () -> raise (PropList.Not_set ("destdir", Some (s_ "undefined by construct")))) let findlib_version = var_define "findlib_version" (fun () -> BaseCheck.package_version "findlib") let is_native = var_define "is_native" (fun () -> try let _s: string = ocamlopt () in "true" with PropList.Not_set _ -> let _s: string = ocamlc () in "false") let ext_program = var_define "suffix_program" (fun () -> match os_type () with | "Win32" | "Cygwin" -> ".exe" | _ -> "") let rm = var_define ~short_desc:(fun () -> s_ "Remove a file.") "rm" (fun () -> match os_type () with | "Win32" -> "del" | _ -> "rm -f") let rmdir = var_define ~short_desc:(fun () -> s_ "Remove a directory.") "rmdir" (fun () -> match os_type () with | "Win32" -> "rd" | _ -> "rm -rf") let debug = var_define ~short_desc:(fun () -> s_ "Turn ocaml debug flag on") ~cli:CLIEnable "debug" (fun () -> "true") let profile = var_define ~short_desc:(fun () -> s_ "Turn ocaml profile flag on") ~cli:CLIEnable "profile" (fun () -> "false") let tests = var_define_cond ~since_version:"0.3" (fun () -> var_define ~short_desc:(fun () -> s_ "Compile tests executable and library and run them") ~cli:CLIEnable "tests" (fun () -> "false")) "true" let docs = var_define_cond ~since_version:"0.3" (fun () -> var_define ~short_desc:(fun () -> s_ "Create documentations") ~cli:CLIEnable "docs" (fun () -> "true")) "true" let native_dynlink = var_define ~short_desc:(fun () -> s_ "Compiler support generation of .cmxs.") ~cli:CLINone "native_dynlink" (fun () -> let res = let ocaml_lt_312 () = OASISVersion.comparator_apply (OASISVersion.version_of_string (ocaml_version ())) (OASISVersion.VLesser (OASISVersion.version_of_string "3.12.0")) in let flexdll_lt_030 () = OASISVersion.comparator_apply (OASISVersion.version_of_string (flexdll_version ())) (OASISVersion.VLesser (OASISVersion.version_of_string "0.30")) in let has_native_dynlink = let ocamlfind = ocamlfind () in try let fn = OASISExec.run_read_one_line ~ctxt:!BaseContext.default ocamlfind ["query"; "-predicates"; "native"; "dynlink"; "-format"; "%d/%a"] in Sys.file_exists fn with _ -> false in if not has_native_dynlink then false else if ocaml_lt_312 () then false else if (os_type () = "Win32" || os_type () = "Cygwin") && flexdll_lt_030 () then begin BaseMessage.warning (f_ ".cmxs generation disabled because FlexDLL needs to be \ at least 0.30. Please upgrade FlexDLL from %s to 0.30.") (flexdll_version ()); false end else true in string_of_bool res) let init pkg = rpkg := Some pkg; List.iter (fun f -> f pkg.oasis_version) !var_cond end module BaseFileAB = struct (* # 22 "src/base/BaseFileAB.ml" *) open BaseEnv open OASISGettext open BaseMessage let to_filename fn = let fn = OASISHostPath.of_unix fn in if not (Filename.check_suffix fn ".ab") then warning (f_ "File '%s' doesn't have '.ab' extension") fn; Filename.chop_extension fn let replace fn_lst = let buff = Buffer.create 13 in List.iter (fun fn -> let fn = OASISHostPath.of_unix fn in let chn_in = open_in fn in let chn_out = open_out (to_filename fn) in ( try while true do Buffer.add_string buff (var_expand (input_line chn_in)); Buffer.add_char buff '\n' done with End_of_file -> () ); Buffer.output_buffer chn_out buff; Buffer.clear buff; close_in chn_in; close_out chn_out) fn_lst end module BaseLog = struct (* # 22 "src/base/BaseLog.ml" *) open OASISUtils let default_filename = Filename.concat (Filename.dirname BaseEnv.default_filename) "setup.log" module SetTupleString = Set.Make (struct type t = string * string let compare (s11, s12) (s21, s22) = match String.compare s11 s21 with | 0 -> String.compare s12 s22 | n -> n end) let load () = if Sys.file_exists default_filename then begin let chn = open_in default_filename in let scbuf = Scanf.Scanning.from_file default_filename in let rec read_aux (st, lst) = if not (Scanf.Scanning.end_of_input scbuf) then begin let acc = try Scanf.bscanf scbuf "%S %S\n" (fun e d -> let t = e, d in if SetTupleString.mem t st then st, lst else SetTupleString.add t st, t :: lst) with Scanf.Scan_failure _ -> failwith (Scanf.bscanf scbuf "%l" (fun line -> Printf.sprintf "Malformed log file '%s' at line %d" default_filename line)) in read_aux acc end else begin close_in chn; List.rev lst end in read_aux (SetTupleString.empty, []) end else begin [] end let register event data = let chn_out = open_out_gen [Open_append; Open_creat; Open_text] 0o644 default_filename in Printf.fprintf chn_out "%S %S\n" event data; close_out chn_out let unregister event data = if Sys.file_exists default_filename then begin let lst = load () in let chn_out = open_out default_filename in let write_something = ref false in List.iter (fun (e, d) -> if e <> event || d <> data then begin write_something := true; Printf.fprintf chn_out "%S %S\n" e d end) lst; close_out chn_out; if not !write_something then Sys.remove default_filename end let filter events = let st_events = List.fold_left (fun st e -> SetString.add e st) SetString.empty events in List.filter (fun (e, _) -> SetString.mem e st_events) (load ()) let exists event data = List.exists (fun v -> (event, data) = v) (load ()) end module BaseBuilt = struct (* # 22 "src/base/BaseBuilt.ml" *) open OASISTypes open OASISGettext open BaseStandardVar open BaseMessage type t = | BExec (* Executable *) | BExecLib (* Library coming with executable *) | BLib (* Library *) | BObj (* Library *) | BDoc (* Document *) let to_log_event_file t nm = "built_"^ (match t with | BExec -> "exec" | BExecLib -> "exec_lib" | BLib -> "lib" | BObj -> "obj" | BDoc -> "doc")^ "_"^nm let to_log_event_done t nm = "is_"^(to_log_event_file t nm) let register t nm lst = BaseLog.register (to_log_event_done t nm) "true"; List.iter (fun alt -> let registered = List.fold_left (fun registered fn -> if OASISFileUtil.file_exists_case fn then begin BaseLog.register (to_log_event_file t nm) (if Filename.is_relative fn then Filename.concat (Sys.getcwd ()) fn else fn); true end else registered) false alt in if not registered then warning (f_ "Cannot find an existing alternative files among: %s") (String.concat (s_ ", ") alt)) lst let unregister t nm = List.iter (fun (e, d) -> BaseLog.unregister e d) (BaseLog.filter [to_log_event_file t nm; to_log_event_done t nm]) let fold t nm f acc = List.fold_left (fun acc (_, fn) -> if OASISFileUtil.file_exists_case fn then begin f acc fn end else begin warning (f_ "File '%s' has been marked as built \ for %s but doesn't exist") fn (Printf.sprintf (match t with | BExec | BExecLib -> (f_ "executable %s") | BLib -> (f_ "library %s") | BObj -> (f_ "object %s") | BDoc -> (f_ "documentation %s")) nm); acc end) acc (BaseLog.filter [to_log_event_file t nm]) let is_built t nm = List.fold_left (fun is_built (_, d) -> (try bool_of_string d with _ -> false)) false (BaseLog.filter [to_log_event_done t nm]) let of_executable ffn (cs, bs, exec) = let unix_exec_is, unix_dll_opt = OASISExecutable.unix_exec_is (cs, bs, exec) (fun () -> bool_of_string (is_native ())) ext_dll ext_program in let evs = (BExec, cs.cs_name, [[ffn unix_exec_is]]) :: (match unix_dll_opt with | Some fn -> [BExecLib, cs.cs_name, [[ffn fn]]] | None -> []) in evs, unix_exec_is, unix_dll_opt let of_library ffn (cs, bs, lib) = let unix_lst = OASISLibrary.generated_unix_files ~ctxt:!BaseContext.default ~source_file_exists:(fun fn -> OASISFileUtil.file_exists_case (OASISHostPath.of_unix fn)) ~is_native:(bool_of_string (is_native ())) ~has_native_dynlink:(bool_of_string (native_dynlink ())) ~ext_lib:(ext_lib ()) ~ext_dll:(ext_dll ()) (cs, bs, lib) in let evs = [BLib, cs.cs_name, List.map (List.map ffn) unix_lst] in evs, unix_lst let of_object ffn (cs, bs, obj) = let unix_lst = OASISObject.generated_unix_files ~ctxt:!BaseContext.default ~source_file_exists:(fun fn -> OASISFileUtil.file_exists_case (OASISHostPath.of_unix fn)) ~is_native:(bool_of_string (is_native ())) (cs, bs, obj) in let evs = [BObj, cs.cs_name, List.map (List.map ffn) unix_lst] in evs, unix_lst end module BaseCustom = struct (* # 22 "src/base/BaseCustom.ml" *) open BaseEnv open BaseMessage open OASISTypes open OASISGettext let run cmd args extra_args = OASISExec.run ~ctxt:!BaseContext.default ~quote:false (var_expand cmd) (List.map var_expand (args @ (Array.to_list extra_args))) let hook ?(failsafe=false) cstm f e = let optional_command lst = let printer = function | Some (cmd, args) -> String.concat " " (cmd :: args) | None -> s_ "No command" in match var_choose ~name:(s_ "Pre/Post Command") ~printer lst with | Some (cmd, args) -> begin try run cmd args [||] with e when failsafe -> warning (f_ "Command '%s' fail with error: %s") (String.concat " " (cmd :: args)) (match e with | Failure msg -> msg | e -> Printexc.to_string e) end | None -> () in let res = optional_command cstm.pre_command; f e in optional_command cstm.post_command; res end module BaseDynVar = struct (* # 22 "src/base/BaseDynVar.ml" *) open OASISTypes open OASISGettext open BaseEnv open BaseBuilt let init pkg = (* TODO: disambiguate exec vs other variable by adding exec_VARNAME. *) (* TODO: provide compile option for library libary_byte_args_VARNAME... *) List.iter (function | Executable (cs, bs, exec) -> if var_choose bs.bs_build then var_ignore (var_redefine (* We don't save this variable *) ~dump:false ~short_desc:(fun () -> Printf.sprintf (f_ "Filename of executable '%s'") cs.cs_name) (OASISUtils.varname_of_string cs.cs_name) (fun () -> let fn_opt = fold BExec cs.cs_name (fun _ fn -> Some fn) None in match fn_opt with | Some fn -> fn | None -> raise (PropList.Not_set (cs.cs_name, Some (Printf.sprintf (f_ "Executable '%s' not yet built.") cs.cs_name))))) | Library _ | Object _ | Flag _ | Test _ | SrcRepo _ | Doc _ -> ()) pkg.sections end module BaseTest = struct (* # 22 "src/base/BaseTest.ml" *) open BaseEnv open BaseMessage open OASISTypes open OASISExpr open OASISGettext let test lst pkg extra_args = let one_test (failure, n) (test_plugin, cs, test) = if var_choose ~name:(Printf.sprintf (f_ "test %s run") cs.cs_name) ~printer:string_of_bool test.test_run then begin let () = info (f_ "Running test '%s'") cs.cs_name in let back_cwd = match test.test_working_directory with | Some dir -> let cwd = Sys.getcwd () in let chdir d = info (f_ "Changing directory to '%s'") d; Sys.chdir d in chdir dir; fun () -> chdir cwd | None -> fun () -> () in try let failure_percent = BaseCustom.hook test.test_custom (test_plugin pkg (cs, test)) extra_args in back_cwd (); (failure_percent +. failure, n + 1) with e -> begin back_cwd (); raise e end end else begin info (f_ "Skipping test '%s'") cs.cs_name; (failure, n) end in let failed, n = List.fold_left one_test (0.0, 0) lst in let failure_percent = if n = 0 then 0.0 else failed /. (float_of_int n) in let msg = Printf.sprintf (f_ "Tests had a %.2f%% failure rate") (100. *. failure_percent) in if failure_percent > 0.0 then failwith msg else info "%s" msg; (* Possible explanation why the tests where not run. *) if OASISFeatures.package_test OASISFeatures.flag_tests pkg && not (bool_of_string (BaseStandardVar.tests ())) && lst <> [] then BaseMessage.warning "Tests are turned off, consider enabling with \ 'ocaml setup.ml -configure --enable-tests'" end module BaseDoc = struct (* # 22 "src/base/BaseDoc.ml" *) open BaseEnv open BaseMessage open OASISTypes open OASISGettext let doc lst pkg extra_args = let one_doc (doc_plugin, cs, doc) = if var_choose ~name:(Printf.sprintf (f_ "documentation %s build") cs.cs_name) ~printer:string_of_bool doc.doc_build then begin info (f_ "Building documentation '%s'") cs.cs_name; BaseCustom.hook doc.doc_custom (doc_plugin pkg (cs, doc)) extra_args end in List.iter one_doc lst; if OASISFeatures.package_test OASISFeatures.flag_docs pkg && not (bool_of_string (BaseStandardVar.docs ())) && lst <> [] then BaseMessage.warning "Docs are turned off, consider enabling with \ 'ocaml setup.ml -configure --enable-docs'" end module BaseSetup = struct (* # 22 "src/base/BaseSetup.ml" *) open BaseEnv open BaseMessage open OASISTypes open OASISSection open OASISGettext open OASISUtils type std_args_fun = package -> string array -> unit type ('a, 'b) section_args_fun = name * (package -> (common_section * 'a) -> string array -> 'b) type t = { configure: std_args_fun; build: std_args_fun; doc: ((doc, unit) section_args_fun) list; test: ((test, float) section_args_fun) list; install: std_args_fun; uninstall: std_args_fun; clean: std_args_fun list; clean_doc: (doc, unit) section_args_fun list; clean_test: (test, unit) section_args_fun list; distclean: std_args_fun list; distclean_doc: (doc, unit) section_args_fun list; distclean_test: (test, unit) section_args_fun list; package: package; oasis_fn: string option; oasis_version: string; oasis_digest: Digest.t option; oasis_exec: string option; oasis_setup_args: string list; setup_update: bool; } (* Associate a plugin function with data from package *) let join_plugin_sections filter_map lst = List.rev (List.fold_left (fun acc sct -> match filter_map sct with | Some e -> e :: acc | None -> acc) [] lst) (* Search for plugin data associated with a section name *) let lookup_plugin_section plugin action nm lst = try List.assoc nm lst with Not_found -> failwithf (f_ "Cannot find plugin %s matching section %s for %s action") plugin nm action let configure t args = (* Run configure *) BaseCustom.hook t.package.conf_custom (fun () -> (* Reload if preconf has changed it *) begin try unload (); load (); with _ -> () end; (* Run plugin's configure *) t.configure t.package args; (* Dump to allow postconf to change it *) dump ()) (); (* Reload environment *) unload (); load (); (* Save environment *) print (); (* Replace data in file *) BaseFileAB.replace t.package.files_ab let build t args = BaseCustom.hook t.package.build_custom (t.build t.package) args let doc t args = BaseDoc.doc (join_plugin_sections (function | Doc (cs, e) -> Some (lookup_plugin_section "documentation" (s_ "build") cs.cs_name t.doc, cs, e) | _ -> None) t.package.sections) t.package args let test t args = BaseTest.test (join_plugin_sections (function | Test (cs, e) -> Some (lookup_plugin_section "test" (s_ "run") cs.cs_name t.test, cs, e) | _ -> None) t.package.sections) t.package args let all t args = let rno_doc = ref false in let rno_test = ref false in let arg_rest = ref [] in Arg.parse_argv ~current:(ref 0) (Array.of_list ((Sys.executable_name^" all") :: (Array.to_list args))) [ "-no-doc", Arg.Set rno_doc, s_ "Don't run doc target"; "-no-test", Arg.Set rno_test, s_ "Don't run test target"; "--", Arg.Rest (fun arg -> arg_rest := arg :: !arg_rest), s_ "All arguments for configure."; ] (failwithf (f_ "Don't know what to do with '%s'")) ""; info "Running configure step"; configure t (Array.of_list (List.rev !arg_rest)); info "Running build step"; build t [||]; (* Load setup.log dynamic variables *) BaseDynVar.init t.package; if not !rno_doc then begin info "Running doc step"; doc t [||]; end else begin info "Skipping doc step" end; if not !rno_test then begin info "Running test step"; test t [||] end else begin info "Skipping test step" end let install t args = BaseCustom.hook t.package.install_custom (t.install t.package) args let uninstall t args = BaseCustom.hook t.package.uninstall_custom (t.uninstall t.package) args let reinstall t args = uninstall t args; install t args let clean, distclean = let failsafe f a = try f a with e -> warning (f_ "Action fail with error: %s") (match e with | Failure msg -> msg | e -> Printexc.to_string e) in let generic_clean t cstm mains docs tests args = BaseCustom.hook ~failsafe:true cstm (fun () -> (* Clean section *) List.iter (function | Test (cs, test) -> let f = try List.assoc cs.cs_name tests with Not_found -> fun _ _ _ -> () in failsafe (f t.package (cs, test)) args | Doc (cs, doc) -> let f = try List.assoc cs.cs_name docs with Not_found -> fun _ _ _ -> () in failsafe (f t.package (cs, doc)) args | Library _ | Object _ | Executable _ | Flag _ | SrcRepo _ -> ()) t.package.sections; (* Clean whole package *) List.iter (fun f -> failsafe (f t.package) args) mains) () in let clean t args = generic_clean t t.package.clean_custom t.clean t.clean_doc t.clean_test args in let distclean t args = (* Call clean *) clean t args; (* Call distclean code *) generic_clean t t.package.distclean_custom t.distclean t.distclean_doc t.distclean_test args; (* Remove generated file *) List.iter (fun fn -> if Sys.file_exists fn then begin info (f_ "Remove '%s'") fn; Sys.remove fn end) (BaseEnv.default_filename :: BaseLog.default_filename :: (List.rev_map BaseFileAB.to_filename t.package.files_ab)) in clean, distclean let version t _ = print_endline t.oasis_version let update_setup_ml, no_update_setup_ml_cli = let b = ref true in b, ("-no-update-setup-ml", Arg.Clear b, s_ " Don't try to update setup.ml, even if _oasis has changed.") let default_oasis_fn = "_oasis" let update_setup_ml t = let oasis_fn = match t.oasis_fn with | Some fn -> fn | None -> default_oasis_fn in let oasis_exec = match t.oasis_exec with | Some fn -> fn | None -> "oasis" in let ocaml = Sys.executable_name in let setup_ml, args = match Array.to_list Sys.argv with | setup_ml :: args -> setup_ml, args | [] -> failwith (s_ "Expecting non-empty command line arguments.") in let ocaml, setup_ml = if Sys.executable_name = Sys.argv.(0) then (* We are not running in standard mode, probably the script * is precompiled. *) "ocaml", "setup.ml" else ocaml, setup_ml in let no_update_setup_ml_cli, _, _ = no_update_setup_ml_cli in let do_update () = let oasis_exec_version = OASISExec.run_read_one_line ~ctxt:!BaseContext.default ~f_exit_code: (function | 0 -> () | 1 -> failwithf (f_ "Executable '%s' is probably an old version \ of oasis (< 0.3.0), please update to version \ v%s.") oasis_exec t.oasis_version | 127 -> failwithf (f_ "Cannot find executable '%s', please install \ oasis v%s.") oasis_exec t.oasis_version | n -> failwithf (f_ "Command '%s version' exited with code %d.") oasis_exec n) oasis_exec ["version"] in if OASISVersion.comparator_apply (OASISVersion.version_of_string oasis_exec_version) (OASISVersion.VGreaterEqual (OASISVersion.version_of_string t.oasis_version)) then begin (* We have a version >= for the executable oasis, proceed with * update. *) (* TODO: delegate this check to 'oasis setup'. *) if Sys.os_type = "Win32" then failwithf (f_ "It is not possible to update the running script \ setup.ml on Windows. Please update setup.ml by \ running '%s'.") (String.concat " " (oasis_exec :: "setup" :: t.oasis_setup_args)) else begin OASISExec.run ~ctxt:!BaseContext.default ~f_exit_code: (function | 0 -> () | n -> failwithf (f_ "Unable to update setup.ml using '%s', \ please fix the problem and retry.") oasis_exec) oasis_exec ("setup" :: t.oasis_setup_args); OASISExec.run ~ctxt:!BaseContext.default ocaml (setup_ml :: args) end end else failwithf (f_ "The version of '%s' (v%s) doesn't match the version of \ oasis used to generate the %s file. Please install at \ least oasis v%s.") oasis_exec oasis_exec_version setup_ml t.oasis_version in if !update_setup_ml then begin try match t.oasis_digest with | Some dgst -> if Sys.file_exists oasis_fn && dgst <> Digest.file default_oasis_fn then begin do_update (); true end else false | None -> false with e -> error (f_ "Error when updating setup.ml. If you want to avoid this error, \ you can bypass the update of %s by running '%s %s %s %s'") setup_ml ocaml setup_ml no_update_setup_ml_cli (String.concat " " args); raise e end else false let setup t = let catch_exn = ref true in try let act_ref = ref (fun _ -> failwithf (f_ "No action defined, run '%s %s -help'") Sys.executable_name Sys.argv.(0)) in let extra_args_ref = ref [] in let allow_empty_env_ref = ref false in let arg_handle ?(allow_empty_env=false) act = Arg.Tuple [ Arg.Rest (fun str -> extra_args_ref := str :: !extra_args_ref); Arg.Unit (fun () -> allow_empty_env_ref := allow_empty_env; act_ref := act); ] in Arg.parse (Arg.align ([ "-configure", arg_handle ~allow_empty_env:true configure, s_ "[options*] Configure the whole build process."; "-build", arg_handle build, s_ "[options*] Build executables and libraries."; "-doc", arg_handle doc, s_ "[options*] Build documents."; "-test", arg_handle test, s_ "[options*] Run tests."; "-all", arg_handle ~allow_empty_env:true all, s_ "[options*] Run configure, build, doc and test targets."; "-install", arg_handle install, s_ "[options*] Install libraries, data, executables \ and documents."; "-uninstall", arg_handle uninstall, s_ "[options*] Uninstall libraries, data, executables \ and documents."; "-reinstall", arg_handle reinstall, s_ "[options*] Uninstall and install libraries, data, \ executables and documents."; "-clean", arg_handle ~allow_empty_env:true clean, s_ "[options*] Clean files generated by a build."; "-distclean", arg_handle ~allow_empty_env:true distclean, s_ "[options*] Clean files generated by a build and configure."; "-version", arg_handle ~allow_empty_env:true version, s_ " Display version of OASIS used to generate this setup.ml."; "-no-catch-exn", Arg.Clear catch_exn, s_ " Don't catch exception, useful for debugging."; ] @ (if t.setup_update then [no_update_setup_ml_cli] else []) @ (BaseContext.args ()))) (failwithf (f_ "Don't know what to do with '%s'")) (s_ "Setup and run build process current package\n"); (* Build initial environment *) load ~allow_empty:!allow_empty_env_ref (); (** Initialize flags *) List.iter (function | Flag (cs, {flag_description = hlp; flag_default = choices}) -> begin let apply ?short_desc () = var_ignore (var_define ~cli:CLIEnable ?short_desc (OASISUtils.varname_of_string cs.cs_name) (fun () -> string_of_bool (var_choose ~name:(Printf.sprintf (f_ "default value of flag %s") cs.cs_name) ~printer:string_of_bool choices))) in match hlp with | Some hlp -> apply ~short_desc:(fun () -> hlp) () | None -> apply () end | _ -> ()) t.package.sections; BaseStandardVar.init t.package; BaseDynVar.init t.package; if t.setup_update && update_setup_ml t then () else !act_ref t (Array.of_list (List.rev !extra_args_ref)) with e when !catch_exn -> error "%s" (Printexc.to_string e); exit 1 end # 5409 "setup.ml" module InternalConfigurePlugin = struct (* # 22 "src/plugins/internal/InternalConfigurePlugin.ml" *) (** Configure using internal scheme @author Sylvain Le Gall *) open BaseEnv open OASISTypes open OASISUtils open OASISGettext open BaseMessage (** Configure build using provided series of check to be done * and then output corresponding file. *) let configure pkg argv = let var_ignore_eval var = let _s: string = var () in () in let errors = ref SetString.empty in let buff = Buffer.create 13 in let add_errors fmt = Printf.kbprintf (fun b -> errors := SetString.add (Buffer.contents b) !errors; Buffer.clear b) buff fmt in let warn_exception e = warning "%s" (Printexc.to_string e) in (* Check tools *) let check_tools lst = List.iter (function | ExternalTool tool -> begin try var_ignore_eval (BaseCheck.prog tool) with e -> warn_exception e; add_errors (f_ "Cannot find external tool '%s'") tool end | InternalExecutable nm1 -> (* Check that matching tool is built *) List.iter (function | Executable ({cs_name = nm2}, {bs_build = build}, _) when nm1 = nm2 -> if not (var_choose build) then add_errors (f_ "Cannot find buildable internal executable \ '%s' when checking build depends") nm1 | _ -> ()) pkg.sections) lst in let build_checks sct bs = if var_choose bs.bs_build then begin if bs.bs_compiled_object = Native then begin try var_ignore_eval BaseStandardVar.ocamlopt with e -> warn_exception e; add_errors (f_ "Section %s requires native compilation") (OASISSection.string_of_section sct) end; (* Check tools *) check_tools bs.bs_build_tools; (* Check depends *) List.iter (function | FindlibPackage (findlib_pkg, version_comparator) -> begin try var_ignore_eval (BaseCheck.package ?version_comparator findlib_pkg) with e -> warn_exception e; match version_comparator with | None -> add_errors (f_ "Cannot find findlib package %s") findlib_pkg | Some ver_cmp -> add_errors (f_ "Cannot find findlib package %s (%s)") findlib_pkg (OASISVersion.string_of_comparator ver_cmp) end | InternalLibrary nm1 -> (* Check that matching library is built *) List.iter (function | Library ({cs_name = nm2}, {bs_build = build}, _) when nm1 = nm2 -> if not (var_choose build) then add_errors (f_ "Cannot find buildable internal library \ '%s' when checking build depends") nm1 | _ -> ()) pkg.sections) bs.bs_build_depends end in (* Parse command line *) BaseArgExt.parse argv (BaseEnv.args ()); (* OCaml version *) begin match pkg.ocaml_version with | Some ver_cmp -> begin try var_ignore_eval (BaseCheck.version "ocaml" ver_cmp BaseStandardVar.ocaml_version) with e -> warn_exception e; add_errors (f_ "OCaml version %s doesn't match version constraint %s") (BaseStandardVar.ocaml_version ()) (OASISVersion.string_of_comparator ver_cmp) end | None -> () end; (* Findlib version *) begin match pkg.findlib_version with | Some ver_cmp -> begin try var_ignore_eval (BaseCheck.version "findlib" ver_cmp BaseStandardVar.findlib_version) with e -> warn_exception e; add_errors (f_ "Findlib version %s doesn't match version constraint %s") (BaseStandardVar.findlib_version ()) (OASISVersion.string_of_comparator ver_cmp) end | None -> () end; (* Make sure the findlib version is fine for the OCaml compiler. *) begin let ocaml_ge4 = OASISVersion.version_compare (OASISVersion.version_of_string (BaseStandardVar.ocaml_version())) (OASISVersion.version_of_string "4.0.0") >= 0 in if ocaml_ge4 then let findlib_lt132 = OASISVersion.version_compare (OASISVersion.version_of_string (BaseStandardVar.findlib_version())) (OASISVersion.version_of_string "1.3.2") < 0 in if findlib_lt132 then add_errors "OCaml >= 4.0.0 requires Findlib version >= 1.3.2" end; (* FlexDLL *) if BaseStandardVar.os_type () = "Win32" || BaseStandardVar.os_type () = "Cygwin" then begin try var_ignore_eval BaseStandardVar.flexlink with e -> warn_exception e; add_errors (f_ "Cannot find 'flexlink'") end; (* Check build depends *) List.iter (function | Executable (_, bs, _) | Library (_, bs, _) as sct -> build_checks sct bs | Doc (_, doc) -> if var_choose doc.doc_build then check_tools doc.doc_build_tools | Test (_, test) -> if var_choose test.test_run then check_tools test.test_tools | _ -> ()) pkg.sections; (* Check if we need native dynlink (presence of libraries that compile to * native) *) begin let has_cmxa = List.exists (function | Library (_, bs, _) -> var_choose bs.bs_build && (bs.bs_compiled_object = Native || (bs.bs_compiled_object = Best && bool_of_string (BaseStandardVar.is_native ()))) | _ -> false) pkg.sections in if has_cmxa then var_ignore_eval BaseStandardVar.native_dynlink end; (* Check errors *) if SetString.empty != !errors then begin List.iter (fun e -> error "%s" e) (SetString.elements !errors); failwithf (fn_ "%d configuration error" "%d configuration errors" (SetString.cardinal !errors)) (SetString.cardinal !errors) end end module InternalInstallPlugin = struct (* # 22 "src/plugins/internal/InternalInstallPlugin.ml" *) (** Install using internal scheme @author Sylvain Le Gall *) open BaseEnv open BaseStandardVar open BaseMessage open OASISTypes open OASISFindlib open OASISGettext open OASISUtils let exec_hook = ref (fun (cs, bs, exec) -> cs, bs, exec) let lib_hook = ref (fun (cs, bs, lib) -> cs, bs, lib, []) let obj_hook = ref (fun (cs, bs, obj) -> cs, bs, obj, []) let doc_hook = ref (fun (cs, doc) -> cs, doc) let install_file_ev = "install-file" let install_dir_ev = "install-dir" let install_findlib_ev = "install-findlib" let win32_max_command_line_length = 8000 let split_install_command ocamlfind findlib_name meta files = if Sys.os_type = "Win32" then (* Arguments for the first command: *) let first_args = ["install"; findlib_name; meta] in (* Arguments for remaining commands: *) let other_args = ["install"; findlib_name; "-add"] in (* Extract as much files as possible from [files], [len] is the current command line length: *) let rec get_files len acc files = match files with | [] -> (List.rev acc, []) | file :: rest -> let len = len + 1 + String.length file in if len > win32_max_command_line_length then (List.rev acc, files) else get_files len (file :: acc) rest in (* Split the command into several commands. *) let rec split args files = match files with | [] -> [] | _ -> (* Length of "ocamlfind install [META|-add]" *) let len = List.fold_left (fun len arg -> len + 1 (* for the space *) + String.length arg) (String.length ocamlfind) args in match get_files len [] files with | ([], _) -> failwith (s_ "Command line too long.") | (firsts, others) -> let cmd = args @ firsts in (* Use -add for remaining commands: *) let () = let findlib_ge_132 = OASISVersion.comparator_apply (OASISVersion.version_of_string (BaseStandardVar.findlib_version ())) (OASISVersion.VGreaterEqual (OASISVersion.version_of_string "1.3.2")) in if not findlib_ge_132 then failwithf (f_ "Installing the library %s require to use the \ flag '-add' of ocamlfind because the command \ line is too long. This flag is only available \ for findlib 1.3.2. Please upgrade findlib from \ %s to 1.3.2") findlib_name (BaseStandardVar.findlib_version ()) in let cmds = split other_args others in cmd :: cmds in (* The first command does not use -add: *) split first_args files else ["install" :: findlib_name :: meta :: files] let install pkg argv = let in_destdir = try let destdir = destdir () in (* Practically speaking destdir is prepended * at the beginning of the target filename *) fun fn -> destdir^fn with PropList.Not_set _ -> fun fn -> fn in let install_file ?tgt_fn src_file envdir = let tgt_dir = in_destdir (envdir ()) in let tgt_file = Filename.concat tgt_dir (match tgt_fn with | Some fn -> fn | None -> Filename.basename src_file) in (* Create target directory if needed *) OASISFileUtil.mkdir_parent ~ctxt:!BaseContext.default (fun dn -> info (f_ "Creating directory '%s'") dn; BaseLog.register install_dir_ev dn) tgt_dir; (* Really install files *) info (f_ "Copying file '%s' to '%s'") src_file tgt_file; OASISFileUtil.cp ~ctxt:!BaseContext.default src_file tgt_file; BaseLog.register install_file_ev tgt_file in (* Install data into defined directory *) let install_data srcdir lst tgtdir = let tgtdir = OASISHostPath.of_unix (var_expand tgtdir) in List.iter (fun (src, tgt_opt) -> let real_srcs = OASISFileUtil.glob ~ctxt:!BaseContext.default (Filename.concat srcdir src) in if real_srcs = [] then failwithf (f_ "Wildcard '%s' doesn't match any files") src; List.iter (fun fn -> install_file fn (fun () -> match tgt_opt with | Some s -> OASISHostPath.of_unix (var_expand s) | None -> tgtdir)) real_srcs) lst in let make_fnames modul sufx = List.fold_right begin fun sufx accu -> (String.capitalize modul ^ sufx) :: (String.uncapitalize modul ^ sufx) :: accu end sufx [] in (** Install all libraries *) let install_libs pkg = let files_of_library (f_data, acc) data_lib = let cs, bs, lib, lib_extra = !lib_hook data_lib in if var_choose bs.bs_install && BaseBuilt.is_built BaseBuilt.BLib cs.cs_name then begin let acc = (* Start with acc + lib_extra *) List.rev_append lib_extra acc in let acc = (* Add uncompiled header from the source tree *) let path = OASISHostPath.of_unix bs.bs_path in List.fold_left begin fun acc modul -> begin try [List.find OASISFileUtil.file_exists_case (List.map (Filename.concat path) (make_fnames modul [".mli"; ".ml"]))] with Not_found -> warning (f_ "Cannot find source header for module %s \ in library %s") modul cs.cs_name; [] end @ List.filter OASISFileUtil.file_exists_case (List.map (Filename.concat path) (make_fnames modul [".annot";".cmti";".cmt"])) @ acc end acc lib.lib_modules in let acc = (* Get generated files *) BaseBuilt.fold BaseBuilt.BLib cs.cs_name (fun acc fn -> fn :: acc) acc in let f_data () = (* Install data associated with the library *) install_data bs.bs_path bs.bs_data_files (Filename.concat (datarootdir ()) pkg.name); f_data () in (f_data, acc) end else begin (f_data, acc) end and files_of_object (f_data, acc) data_obj = let cs, bs, obj, obj_extra = !obj_hook data_obj in if var_choose bs.bs_install && BaseBuilt.is_built BaseBuilt.BObj cs.cs_name then begin let acc = (* Start with acc + obj_extra *) List.rev_append obj_extra acc in let acc = (* Add uncompiled header from the source tree *) let path = OASISHostPath.of_unix bs.bs_path in List.fold_left begin fun acc modul -> begin try [List.find OASISFileUtil.file_exists_case (List.map (Filename.concat path) (make_fnames modul [".mli"; ".ml"]))] with Not_found -> warning (f_ "Cannot find source header for module %s \ in object %s") modul cs.cs_name; [] end @ List.filter OASISFileUtil.file_exists_case (List.map (Filename.concat path) (make_fnames modul [".annot";".cmti";".cmt"])) @ acc end acc obj.obj_modules in let acc = (* Get generated files *) BaseBuilt.fold BaseBuilt.BObj cs.cs_name (fun acc fn -> fn :: acc) acc in let f_data () = (* Install data associated with the object *) install_data bs.bs_path bs.bs_data_files (Filename.concat (datarootdir ()) pkg.name); f_data () in (f_data, acc) end else begin (f_data, acc) end in (* Install one group of library *) let install_group_lib grp = (* Iterate through all group nodes *) let rec install_group_lib_aux data_and_files grp = let data_and_files, children = match grp with | Container (_, children) -> data_and_files, children | Package (_, cs, bs, `Library lib, children) -> files_of_library data_and_files (cs, bs, lib), children | Package (_, cs, bs, `Object obj, children) -> files_of_object data_and_files (cs, bs, obj), children in List.fold_left install_group_lib_aux data_and_files children in (* Findlib name of the root library *) let findlib_name = findlib_of_group grp in (* Determine root library *) let root_lib = root_of_group grp in (* All files to install for this library *) let f_data, files = install_group_lib_aux (ignore, []) grp in (* Really install, if there is something to install *) if files = [] then begin warning (f_ "Nothing to install for findlib library '%s'") findlib_name end else begin let meta = (* Search META file *) let _, bs, _ = root_lib in let res = Filename.concat bs.bs_path "META" in if not (OASISFileUtil.file_exists_case res) then failwithf (f_ "Cannot find file '%s' for findlib library %s") res findlib_name; res in let files = (* Make filename shorter to avoid hitting command max line length * too early, esp. on Windows. *) let remove_prefix p n = let plen = String.length p in let nlen = String.length n in if plen <= nlen && String.sub n 0 plen = p then begin let fn_sep = if Sys.os_type = "Win32" then '\\' else '/' in let cutpoint = plen + (if plen < nlen && n.[plen] = fn_sep then 1 else 0) in String.sub n cutpoint (nlen - cutpoint) end else n in List.map (remove_prefix (Sys.getcwd ())) files in info (f_ "Installing findlib library '%s'") findlib_name; let ocamlfind = ocamlfind () in let commands = split_install_command ocamlfind findlib_name meta files in List.iter (OASISExec.run ~ctxt:!BaseContext.default ocamlfind) commands; BaseLog.register install_findlib_ev findlib_name end; (* Install data files *) f_data (); in let group_libs, _, _ = findlib_mapping pkg in (* We install libraries in groups *) List.iter install_group_lib group_libs in let install_execs pkg = let install_exec data_exec = let cs, bs, exec = !exec_hook data_exec in if var_choose bs.bs_install && BaseBuilt.is_built BaseBuilt.BExec cs.cs_name then begin let exec_libdir () = Filename.concat (libdir ()) pkg.name in BaseBuilt.fold BaseBuilt.BExec cs.cs_name (fun () fn -> install_file ~tgt_fn:(cs.cs_name ^ ext_program ()) fn bindir) (); BaseBuilt.fold BaseBuilt.BExecLib cs.cs_name (fun () fn -> install_file fn exec_libdir) (); install_data bs.bs_path bs.bs_data_files (Filename.concat (datarootdir ()) pkg.name) end in List.iter (function | Executable (cs, bs, exec)-> install_exec (cs, bs, exec) | _ -> ()) pkg.sections in let install_docs pkg = let install_doc data = let cs, doc = !doc_hook data in if var_choose doc.doc_install && BaseBuilt.is_built BaseBuilt.BDoc cs.cs_name then begin let tgt_dir = OASISHostPath.of_unix (var_expand doc.doc_install_dir) in BaseBuilt.fold BaseBuilt.BDoc cs.cs_name (fun () fn -> install_file fn (fun () -> tgt_dir)) (); install_data Filename.current_dir_name doc.doc_data_files doc.doc_install_dir end in List.iter (function | Doc (cs, doc) -> install_doc (cs, doc) | _ -> ()) pkg.sections in install_libs pkg; install_execs pkg; install_docs pkg (* Uninstall already installed data *) let uninstall _ argv = List.iter (fun (ev, data) -> if ev = install_file_ev then begin if OASISFileUtil.file_exists_case data then begin info (f_ "Removing file '%s'") data; Sys.remove data end else begin warning (f_ "File '%s' doesn't exist anymore") data end end else if ev = install_dir_ev then begin if Sys.file_exists data && Sys.is_directory data then begin if Sys.readdir data = [||] then begin info (f_ "Removing directory '%s'") data; OASISFileUtil.rmdir ~ctxt:!BaseContext.default data end else begin warning (f_ "Directory '%s' is not empty (%s)") data (String.concat ", " (Array.to_list (Sys.readdir data))) end end else begin warning (f_ "Directory '%s' doesn't exist anymore") data end end else if ev = install_findlib_ev then begin info (f_ "Removing findlib library '%s'") data; OASISExec.run ~ctxt:!BaseContext.default (ocamlfind ()) ["remove"; data] end else failwithf (f_ "Unknown log event '%s'") ev; BaseLog.unregister ev data) (* We process event in reverse order *) (List.rev (BaseLog.filter [install_file_ev; install_dir_ev; install_findlib_ev])) end # 6273 "setup.ml" module OCamlbuildCommon = struct (* # 22 "src/plugins/ocamlbuild/OCamlbuildCommon.ml" *) (** Functions common to OCamlbuild build and doc plugin *) open OASISGettext open BaseEnv open BaseStandardVar open OASISTypes type extra_args = string list let ocamlbuild_clean_ev = "ocamlbuild-clean" let ocamlbuildflags = var_define ~short_desc:(fun () -> "OCamlbuild additional flags") "ocamlbuildflags" (fun () -> "") (** Fix special arguments depending on environment *) let fix_args args extra_argv = List.flatten [ if (os_type ()) = "Win32" then [ "-classic-display"; "-no-log"; "-no-links"; "-install-lib-dir"; (Filename.concat (standard_library ()) "ocamlbuild") ] else []; if not (bool_of_string (is_native ())) || (os_type ()) = "Win32" then [ "-byte-plugin" ] else []; args; if bool_of_string (debug ()) then ["-tag"; "debug"] else []; if bool_of_string (tests ()) then ["-tag"; "tests"] else []; if bool_of_string (profile ()) then ["-tag"; "profile"] else []; OASISString.nsplit (ocamlbuildflags ()) ' '; Array.to_list extra_argv; ] (** Run 'ocamlbuild -clean' if not already done *) let run_clean extra_argv = let extra_cli = String.concat " " (Array.to_list extra_argv) in (* Run if never called with these args *) if not (BaseLog.exists ocamlbuild_clean_ev extra_cli) then begin OASISExec.run ~ctxt:!BaseContext.default (ocamlbuild ()) (fix_args ["-clean"] extra_argv); BaseLog.register ocamlbuild_clean_ev extra_cli; at_exit (fun () -> try BaseLog.unregister ocamlbuild_clean_ev extra_cli with _ -> ()) end (** Run ocamlbuild, unregister all clean events *) let run_ocamlbuild args extra_argv = (* TODO: enforce that target in args must be UNIX encoded i.e. toto/index.html *) OASISExec.run ~ctxt:!BaseContext.default (ocamlbuild ()) (fix_args args extra_argv); (* Remove any clean event, we must run it again *) List.iter (fun (e, d) -> BaseLog.unregister e d) (BaseLog.filter [ocamlbuild_clean_ev]) (** Determine real build directory *) let build_dir extra_argv = let rec search_args dir = function | "-build-dir" :: dir :: tl -> search_args dir tl | _ :: tl -> search_args dir tl | [] -> dir in search_args "_build" (fix_args [] extra_argv) end module OCamlbuildPlugin = struct (* # 22 "src/plugins/ocamlbuild/OCamlbuildPlugin.ml" *) (** Build using ocamlbuild @author Sylvain Le Gall *) open OASISTypes open OASISGettext open OASISUtils open OASISString open BaseEnv open OCamlbuildCommon open BaseStandardVar open BaseMessage let cond_targets_hook = ref (fun lst -> lst) let build extra_args pkg argv = (* Return the filename in build directory *) let in_build_dir fn = Filename.concat (build_dir argv) fn in (* Return the unix filename in host build directory *) let in_build_dir_of_unix fn = in_build_dir (OASISHostPath.of_unix fn) in let cond_targets = List.fold_left (fun acc -> function | Library (cs, bs, lib) when var_choose bs.bs_build -> begin let evs, unix_files = BaseBuilt.of_library in_build_dir_of_unix (cs, bs, lib) in let tgts = List.flatten (List.filter (fun l -> l <> []) (List.map (List.filter (fun fn -> ends_with ~what:".cma" fn || ends_with ~what:".cmxs" fn || ends_with ~what:".cmxa" fn || ends_with ~what:(ext_lib ()) fn || ends_with ~what:(ext_dll ()) fn)) unix_files)) in match tgts with | _ :: _ -> (evs, tgts) :: acc | [] -> failwithf (f_ "No possible ocamlbuild targets for library %s") cs.cs_name end | Object (cs, bs, obj) when var_choose bs.bs_build -> begin let evs, unix_files = BaseBuilt.of_object in_build_dir_of_unix (cs, bs, obj) in let tgts = List.flatten (List.filter (fun l -> l <> []) (List.map (List.filter (fun fn -> ends_with ".cmo" fn || ends_with ".cmx" fn)) unix_files)) in match tgts with | _ :: _ -> (evs, tgts) :: acc | [] -> failwithf (f_ "No possible ocamlbuild targets for object %s") cs.cs_name end | Executable (cs, bs, exec) when var_choose bs.bs_build -> begin let evs, unix_exec_is, unix_dll_opt = BaseBuilt.of_executable in_build_dir_of_unix (cs, bs, exec) in let target ext = let unix_tgt = (OASISUnixPath.concat bs.bs_path (OASISUnixPath.chop_extension exec.exec_main_is))^ext in let evs = (* Fix evs, we want to use the unix_tgt, without copying *) List.map (function | BaseBuilt.BExec, nm, lst when nm = cs.cs_name -> BaseBuilt.BExec, nm, [[in_build_dir_of_unix unix_tgt]] | ev -> ev) evs in evs, [unix_tgt] in (* Add executable *) let acc = match bs.bs_compiled_object with | Native -> (target ".native") :: acc | Best when bool_of_string (is_native ()) -> (target ".native") :: acc | Byte | Best -> (target ".byte") :: acc in acc end | Library _ | Object _ | Executable _ | Test _ | SrcRepo _ | Flag _ | Doc _ -> acc) [] (* Keep the pkg.sections ordered *) (List.rev pkg.sections); in (* Check and register built files *) let check_and_register (bt, bnm, lst) = List.iter (fun fns -> if not (List.exists OASISFileUtil.file_exists_case fns) then failwithf (fn_ "Expected built file %s doesn't exist." "None of expected built files %s exists." (List.length fns)) (String.concat (s_ " or ") (List.map (Printf.sprintf "'%s'") fns))) lst; (BaseBuilt.register bt bnm lst) in (* Run the hook *) let cond_targets = !cond_targets_hook cond_targets in (* Run a list of target... *) run_ocamlbuild (List.flatten (List.map snd cond_targets) @ extra_args) argv; (* ... and register events *) List.iter check_and_register (List.flatten (List.map fst cond_targets)) let clean pkg extra_args = run_clean extra_args; List.iter (function | Library (cs, _, _) -> BaseBuilt.unregister BaseBuilt.BLib cs.cs_name | Executable (cs, _, _) -> BaseBuilt.unregister BaseBuilt.BExec cs.cs_name; BaseBuilt.unregister BaseBuilt.BExecLib cs.cs_name | _ -> ()) pkg.sections end module OCamlbuildDocPlugin = struct (* # 22 "src/plugins/ocamlbuild/OCamlbuildDocPlugin.ml" *) (* Create documentation using ocamlbuild .odocl files @author Sylvain Le Gall *) open OASISTypes open OASISGettext open OASISMessage open OCamlbuildCommon open BaseStandardVar type run_t = { extra_args: string list; run_path: unix_filename; } let doc_build run pkg (cs, doc) argv = let index_html = OASISUnixPath.make [ run.run_path; cs.cs_name^".docdir"; "index.html"; ] in let tgt_dir = OASISHostPath.make [ build_dir argv; OASISHostPath.of_unix run.run_path; cs.cs_name^".docdir"; ] in run_ocamlbuild (index_html :: run.extra_args) argv; List.iter (fun glb -> BaseBuilt.register BaseBuilt.BDoc cs.cs_name [OASISFileUtil.glob ~ctxt:!BaseContext.default (Filename.concat tgt_dir glb)]) ["*.html"; "*.css"] let doc_clean run pkg (cs, doc) argv = run_clean argv; BaseBuilt.unregister BaseBuilt.BDoc cs.cs_name end # 6651 "setup.ml" module CustomPlugin = struct (* # 22 "src/plugins/custom/CustomPlugin.ml" *) (** Generate custom configure/build/doc/test/install system @author *) open BaseEnv open OASISGettext open OASISTypes type t = { cmd_main: command_line conditional; cmd_clean: (command_line option) conditional; cmd_distclean: (command_line option) conditional; } let run = BaseCustom.run let main t _ extra_args = let cmd, args = var_choose ~name:(s_ "main command") t.cmd_main in run cmd args extra_args let clean t pkg extra_args = match var_choose t.cmd_clean with | Some (cmd, args) -> run cmd args extra_args | _ -> () let distclean t pkg extra_args = match var_choose t.cmd_distclean with | Some (cmd, args) -> run cmd args extra_args | _ -> () module Build = struct let main t pkg extra_args = main t pkg extra_args; List.iter (fun sct -> let evs = match sct with | Library (cs, bs, lib) when var_choose bs.bs_build -> begin let evs, _ = BaseBuilt.of_library OASISHostPath.of_unix (cs, bs, lib) in evs end | Executable (cs, bs, exec) when var_choose bs.bs_build -> begin let evs, _, _ = BaseBuilt.of_executable OASISHostPath.of_unix (cs, bs, exec) in evs end | _ -> [] in List.iter (fun (bt, bnm, lst) -> BaseBuilt.register bt bnm lst) evs) pkg.sections let clean t pkg extra_args = clean t pkg extra_args; (* TODO: this seems to be pretty generic (at least wrt to ocamlbuild * considering moving this to BaseSetup? *) List.iter (function | Library (cs, _, _) -> BaseBuilt.unregister BaseBuilt.BLib cs.cs_name | Executable (cs, _, _) -> BaseBuilt.unregister BaseBuilt.BExec cs.cs_name; BaseBuilt.unregister BaseBuilt.BExecLib cs.cs_name | _ -> ()) pkg.sections let distclean t pkg extra_args = distclean t pkg extra_args end module Test = struct let main t pkg (cs, test) extra_args = try main t pkg extra_args; 0.0 with Failure s -> BaseMessage.warning (f_ "Test '%s' fails: %s") cs.cs_name s; 1.0 let clean t pkg (cs, test) extra_args = clean t pkg extra_args let distclean t pkg (cs, test) extra_args = distclean t pkg extra_args end module Doc = struct let main t pkg (cs, _) extra_args = main t pkg extra_args; BaseBuilt.register BaseBuilt.BDoc cs.cs_name [] let clean t pkg (cs, _) extra_args = clean t pkg extra_args; BaseBuilt.unregister BaseBuilt.BDoc cs.cs_name let distclean t pkg (cs, _) extra_args = distclean t pkg extra_args end end # 6799 "setup.ml" open OASISTypes;; let setup_t = { BaseSetup.configure = InternalConfigurePlugin.configure; build = OCamlbuildPlugin.build []; test = [ ("main", CustomPlugin.Test.main { CustomPlugin.cmd_main = [(OASISExpr.EBool true, ("$test", []))]; cmd_clean = [(OASISExpr.EBool true, None)]; cmd_distclean = [(OASISExpr.EBool true, None)] }) ]; doc = [ ("manual", CustomPlugin.Doc.main { CustomPlugin.cmd_main = [ (OASISExpr.EBool true, ("make", ["-C"; "manual"; "manual.pdf"])) ]; cmd_clean = [(OASISExpr.EBool true, None)]; cmd_distclean = [(OASISExpr.EBool true, None)] }); ("api", OCamlbuildDocPlugin.doc_build {OCamlbuildDocPlugin.extra_args = []; run_path = "./"}) ]; install = InternalInstallPlugin.install; uninstall = InternalInstallPlugin.uninstall; clean = [OCamlbuildPlugin.clean]; clean_test = [ ("main", CustomPlugin.Test.clean { CustomPlugin.cmd_main = [(OASISExpr.EBool true, ("$test", []))]; cmd_clean = [(OASISExpr.EBool true, None)]; cmd_distclean = [(OASISExpr.EBool true, None)] }) ]; clean_doc = [ ("manual", CustomPlugin.Doc.clean { CustomPlugin.cmd_main = [ (OASISExpr.EBool true, ("make", ["-C"; "manual"; "manual.pdf"])) ]; cmd_clean = [(OASISExpr.EBool true, None)]; cmd_distclean = [(OASISExpr.EBool true, None)] }); ("api", OCamlbuildDocPlugin.doc_clean {OCamlbuildDocPlugin.extra_args = []; run_path = "./"}) ]; distclean = []; distclean_test = [ ("main", CustomPlugin.Test.distclean { CustomPlugin.cmd_main = [(OASISExpr.EBool true, ("$test", []))]; cmd_clean = [(OASISExpr.EBool true, None)]; cmd_distclean = [(OASISExpr.EBool true, None)] }) ]; distclean_doc = [ ("manual", CustomPlugin.Doc.distclean { CustomPlugin.cmd_main = [ (OASISExpr.EBool true, ("make", ["-C"; "manual"; "manual.pdf"])) ]; cmd_clean = [(OASISExpr.EBool true, None)]; cmd_distclean = [(OASISExpr.EBool true, None)] }) ]; package = { oasis_version = "0.4"; ocaml_version = None; findlib_version = None; alpha_features = ["compiled_setup_ml"]; beta_features = []; name = "ocaml-text"; version = "0.7"; license = OASISLicense.DEP5License (OASISLicense.DEP5Unit { OASISLicense.license = "BSD-3-clause"; excption = None; version = OASISLicense.NoVersion }); license_file = Some "LICENSE"; copyrights = []; maintainers = []; authors = ["J\195\169r\195\169mie Dimino"]; homepage = Some "http://ocaml-text.forge.ocamlcore.org/"; synopsis = "ocaml-text"; description = Some [ OASISText.Para "OCaml-Text is an OCaml library for dealing with \"text\", i.e. sequences of unicode characters, in a convenient way." ]; categories = []; conf_type = (`Configure, "internal", Some "0.4"); conf_custom = { pre_command = [(OASISExpr.EBool true, None)]; post_command = [(OASISExpr.EBool true, None)] }; build_type = (`Build, "ocamlbuild", Some "0.4"); build_custom = { pre_command = [(OASISExpr.EBool true, None)]; post_command = [(OASISExpr.EBool true, None)] }; install_type = (`Install, "internal", Some "0.4"); install_custom = { pre_command = [(OASISExpr.EBool true, None)]; post_command = [(OASISExpr.EBool true, None)] }; uninstall_custom = { pre_command = [(OASISExpr.EBool true, None)]; post_command = [(OASISExpr.EBool true, None)] }; clean_custom = { pre_command = [(OASISExpr.EBool true, None)]; post_command = [(OASISExpr.EBool true, None)] }; distclean_custom = { pre_command = [(OASISExpr.EBool true, None)]; post_command = [(OASISExpr.EBool true, None)] }; files_ab = []; sections = [ Flag ({ cs_name = "pcre"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, { flag_description = Some "support for pcre"; flag_default = [(OASISExpr.EBool true, false)] }); Library ({ cs_name = "text"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, { bs_build = [(OASISExpr.EBool true, true)]; bs_install = [(OASISExpr.EBool true, true)]; bs_path = "src"; bs_compiled_object = Best; bs_build_depends = [FindlibPackage ("bytes", None)]; bs_build_tools = [ExternalTool "ocamlbuild"]; bs_c_sources = ["ml_text.c"; "common.h"]; bs_data_files = []; bs_ccopt = [(OASISExpr.EBool true, [])]; bs_cclib = [(OASISExpr.EBool true, [])]; bs_dlllib = [(OASISExpr.EBool true, [])]; bs_dllpath = [(OASISExpr.EBool true, [])]; bs_byteopt = [(OASISExpr.EBool true, [])]; bs_nativeopt = [(OASISExpr.EBool true, [])] }, { lib_modules = ["Text"; "Encoding"]; lib_pack = false; lib_internal_modules = []; lib_findlib_parent = None; lib_findlib_name = None; lib_findlib_containers = [] }); Library ({ cs_name = "bigarray"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, { bs_build = [(OASISExpr.EBool true, true)]; bs_install = [(OASISExpr.EBool true, true)]; bs_path = "src"; bs_compiled_object = Best; bs_build_depends = [ InternalLibrary "text"; FindlibPackage ("bigarray", None) ]; bs_build_tools = [ExternalTool "ocamlbuild"]; bs_c_sources = ["ml_text_bigarray.c"; "common.h"]; bs_data_files = []; bs_ccopt = [(OASISExpr.EBool true, [])]; bs_cclib = [(OASISExpr.EBool true, [])]; bs_dlllib = [(OASISExpr.EBool true, [])]; bs_dllpath = [(OASISExpr.EBool true, [])]; bs_byteopt = [(OASISExpr.EBool true, [])]; bs_nativeopt = [(OASISExpr.EBool true, [])] }, { lib_modules = ["Encoding_bigarray"]; lib_pack = false; lib_internal_modules = []; lib_findlib_parent = Some "text"; lib_findlib_name = None; lib_findlib_containers = [] }); Library ({ cs_name = "pcre"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, { bs_build = [ (OASISExpr.EBool true, false); (OASISExpr.EFlag "pcre", true) ]; bs_install = [ (OASISExpr.EBool true, false); (OASISExpr.EFlag "pcre", true) ]; bs_path = "src"; bs_compiled_object = Best; bs_build_depends = [ InternalLibrary "text"; FindlibPackage ("pcre", None) ]; bs_build_tools = [ExternalTool "ocamlbuild"]; bs_c_sources = []; bs_data_files = []; bs_ccopt = [(OASISExpr.EBool true, [])]; bs_cclib = [(OASISExpr.EBool true, [])]; bs_dlllib = [(OASISExpr.EBool true, [])]; bs_dllpath = [(OASISExpr.EBool true, [])]; bs_byteopt = [(OASISExpr.EBool true, [])]; bs_nativeopt = [(OASISExpr.EBool true, [])] }, { lib_modules = ["Text_pcre"]; lib_pack = false; lib_internal_modules = []; lib_findlib_parent = Some "text"; lib_findlib_name = None; lib_findlib_containers = [] }); Library ({ cs_name = "pcre-syntax"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, { bs_build = [ (OASISExpr.EBool true, false); (OASISExpr.EFlag "pcre", true) ]; bs_install = [ (OASISExpr.EBool true, false); (OASISExpr.EFlag "pcre", true) ]; bs_path = "syntax"; bs_compiled_object = Best; bs_build_depends = [ InternalLibrary "text"; FindlibPackage ("camlp4", None); FindlibPackage ("camlp4.quotations.o", None); FindlibPackage ("camlp4.extend", None) ]; bs_build_tools = [ExternalTool "ocamlbuild"]; bs_c_sources = []; bs_data_files = []; bs_ccopt = [(OASISExpr.EBool true, [])]; bs_cclib = [(OASISExpr.EBool true, [])]; bs_dlllib = [(OASISExpr.EBool true, [])]; bs_dllpath = [(OASISExpr.EBool true, [])]; bs_byteopt = [(OASISExpr.EBool true, [])]; bs_nativeopt = [(OASISExpr.EBool true, [])] }, { lib_modules = []; lib_pack = false; lib_internal_modules = [ "Pa_text_main"; "Pa_text_parse"; "Pa_text_regexp"; "Pa_text_env"; "Pa_text_types"; "Pa_text_util" ]; lib_findlib_parent = Some "text"; lib_findlib_name = None; lib_findlib_containers = [] }); Doc ({ cs_name = "manual"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, { doc_type = (`Doc, "custom", Some "0.4"); doc_custom = { pre_command = [(OASISExpr.EBool true, None)]; post_command = [(OASISExpr.EBool true, None)] }; doc_build = [ (OASISExpr.ENot (OASISExpr.EFlag "docs"), false); (OASISExpr.EFlag "docs", true) ]; doc_install = [(OASISExpr.EBool true, false)]; doc_install_dir = "$pdfdir"; doc_title = "OCamlText user manual"; doc_authors = []; doc_abstract = None; doc_format = OtherDoc; doc_data_files = [("manual/manual.pdf", None)]; doc_build_tools = [ExternalTool "ocamlbuild"] }); Doc ({ cs_name = "api"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, { doc_type = (`Doc, "ocamlbuild", Some "0.4"); doc_custom = { pre_command = [(OASISExpr.EBool true, None)]; post_command = [(OASISExpr.EBool true, None)] }; doc_build = [ (OASISExpr.ENot (OASISExpr.EFlag "docs"), false); (OASISExpr.EFlag "docs", true) ]; doc_install = [(OASISExpr.EBool true, false)]; doc_install_dir = "$htmldir/api"; doc_title = "API reference for OCamlText"; doc_authors = []; doc_abstract = None; doc_format = OtherDoc; doc_data_files = [("style.css", None)]; doc_build_tools = [ExternalTool "ocamlbuild"; ExternalTool "ocamldoc"] }); Executable ({ cs_name = "test"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, { bs_build = [(OASISExpr.EBool true, true)]; bs_install = [(OASISExpr.EBool true, false)]; bs_path = "tests"; bs_compiled_object = Best; bs_build_depends = [InternalLibrary "text"]; bs_build_tools = [ExternalTool "ocamlbuild"]; bs_c_sources = []; bs_data_files = []; bs_ccopt = [(OASISExpr.EBool true, [])]; bs_cclib = [(OASISExpr.EBool true, [])]; bs_dlllib = [(OASISExpr.EBool true, [])]; bs_dllpath = [(OASISExpr.EBool true, [])]; bs_byteopt = [(OASISExpr.EBool true, [])]; bs_nativeopt = [(OASISExpr.EBool true, [])] }, {exec_custom = false; exec_main_is = "test.ml"}); Test ({ cs_name = "main"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, { test_type = (`Test, "custom", Some "0.4"); test_command = [(OASISExpr.EBool true, ("$test", []))]; test_custom = { pre_command = [(OASISExpr.EBool true, None)]; post_command = [(OASISExpr.EBool true, None)] }; test_working_directory = None; test_run = [ (OASISExpr.ENot (OASISExpr.EFlag "tests"), false); (OASISExpr.EFlag "tests", true) ]; test_tools = [ExternalTool "ocamlbuild"; InternalExecutable "test" ] }); SrcRepo ({ cs_name = "head"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, { src_repo_type = Git; src_repo_location = "git://github.com/vbmithr/ocaml-text"; src_repo_browser = Some "http://github.com/vbmithr/ocaml-text"; src_repo_module = None; src_repo_branch = None; src_repo_tag = None; src_repo_subdir = None }) ]; plugins = [(`Extra, "META", Some "0.4"); (`Extra, "DevFiles", Some "0.4")]; disable_oasis_section = []; schema_data = PropList.Data.create (); plugin_data = [] }; oasis_fn = Some "_oasis"; oasis_version = "0.4.5"; oasis_digest = Some "h\251\144\022\1879u\190\230\218\199,\158.\202\192"; oasis_exec = None; oasis_setup_args = []; setup_update = false };; let setup () = BaseSetup.setup setup_t;; # 7262 "setup.ml" (* OASIS_STOP *) (* List of paths to search for iconv *) let search_paths = [ "/usr"; "/usr/local"; "/opt"; "/opt/local"; "/sw"; "/mingw"; "/mingw/local"; ] (* +-----------------------------------------------------------------+ | Search for iconv.h | +-----------------------------------------------------------------+ *) let search_iconv () = let rec loop = function | [] -> "" | dir :: dirs -> if Sys.file_exists (dir ^ "/include/iconv.h") then dir else loop dirs in loop search_paths let iconv_prefix = BaseEnv.var_define ~short_desc:(fun () -> "iconv installation prefix") "iconv_prefix" search_iconv (* +-----------------------------------------------------------------+ | Test whether -liconv is needed or not | +-----------------------------------------------------------------+ *) let stub_code = " #include #include CAMLprim value ocaml_text_test() { iconv_open(0, 0); return Val_unit; } " let caml_code = " external test : unit -> unit = \"ocaml_text_test\" let () = test () " let compile ocamlc log_file stub_file caml_file args = let result = ref false and dir = iconv_prefix () in OASISExec.run ~ctxt:(!OASISContext.default) ~f_exit_code:(fun x -> result := x = 0) ocamlc (List.flatten [ ["-custom"]; (if dir = "" then [] else ["-ccopt"; "-I" ^ dir ^ "/include"; "-cclib"; "-L" ^ dir ^ "/lib"]); args; [Filename.quote stub_file; Filename.quote caml_file; "2>"; Filename.quote log_file]; ]); !result let safe_remove file_name = try Sys.remove file_name with exn -> () let printf level msg = !(OASISContext.default).OASISContext.printf level msg let check_iconv () = printf `Info "Testing whether -liconv is needed"; let ocamlc = BaseEnv.var_get "ocamlc" and ext_obj = BaseEnv.var_get "ext_obj" and exec_name = BaseEnv.var_get "default_executable_name" in (* Put the code into a temporary file. *) let stub_file, oc = Filename.open_temp_file "ocaml_text_stub" ".c" in output_string oc stub_code; close_out oc; let caml_file, oc = Filename.open_temp_file "ocaml_text_caml" ".ml" in output_string oc caml_code; close_out oc; let log_file = Filename.temp_file "ocaml_text" ".log" in (* Cleanup things on exit. *) at_exit (fun () -> safe_remove log_file; safe_remove stub_file; safe_remove (Filename.chop_extension (Filename.basename stub_file) ^ ext_obj); safe_remove exec_name; safe_remove caml_file; safe_remove (Filename.chop_extension caml_file ^ ".cmi"); safe_remove (Filename.chop_extension caml_file ^ ".cmo")); (* Compile it without -liconv. *) if compile ocamlc log_file stub_file caml_file [] then "false" else if compile ocamlc log_file stub_file caml_file ["-cclib"; "-liconv"] then "true" else begin printf `Error "libiconv seems to be missing!"; exit 1 end (* Define the need_liconv variable *) let need_liconv = BaseEnv.var_define ~short_desc:(fun () -> "-liconv is needed") "need_liconv" check_iconv let () = setup () ocaml-text-0.8/src/000077500000000000000000000000001244105655100142305ustar00rootroot00000000000000ocaml-text-0.8/src/META000066400000000000000000000023221244105655100147000ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 73eda264e5285750a38263859be858c4) version = "0.7" description = "ocaml-text" requires = "bytes" archive(byte) = "text.cma" archive(byte, plugin) = "text.cma" archive(native) = "text.cmxa" archive(native, plugin) = "text.cmxs" exists_if = "text.cma" package "pcre-syntax" ( version = "0.7" description = "Syntax extension for writing human readable regexps in OCaml sources" requires = "camlp4 text" archive(syntax, preprocessor) = "pcre-syntax.cma" archive(syntax, toploop) = "pcre-syntax.cma" archive(syntax, preprocessor, native) = "pcre-syntax.cmxa" archive(syntax, preprocessor, native, plugin) = "pcre-syntax.cmxs" exists_if = "pcre-syntax.cma" ) package "pcre" ( version = "0.7" description = "ocaml-text" requires = "text pcre" archive(byte) = "pcre.cma" archive(byte, plugin) = "pcre.cma" archive(native) = "pcre.cmxa" archive(native, plugin) = "pcre.cmxs" exists_if = "pcre.cma" ) package "bigarray" ( version = "0.7" description = "ocaml-text" requires = "text bigarray" archive(byte) = "bigarray.cma" archive(byte, plugin) = "bigarray.cma" archive(native) = "bigarray.cmxa" archive(native, plugin) = "bigarray.cmxs" exists_if = "bigarray.cma" ) # OASIS_STOP ocaml-text-0.8/src/bigarray.mldylib000066400000000000000000000001461244105655100174070ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 16470106de26238ac006aa6f5cacf6c5) Encoding_bigarray # OASIS_STOP ocaml-text-0.8/src/bigarray.mllib000066400000000000000000000001461244105655100170520ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 16470106de26238ac006aa6f5cacf6c5) Encoding_bigarray # OASIS_STOP ocaml-text-0.8/src/common.h000066400000000000000000000010671244105655100156750ustar00rootroot00000000000000/* * common.h * -------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. */ #ifndef __COMMON_H #define __COMMON_H #include #include /* define the easiest encoding to use: */ #ifdef ARCH_BIG_ENDIAN #define NATIVE_UCS "UCS-4BE" #else #define NATIVE_UCS "UCS-4LE" #endif /* Constant for ocaml constructors: */ #define Val_need_more (Val_int(0)) #define Val_error (Val_int(1)) #define Iconv_val(v) (*(iconv_t*)Data_custom_val(v)) #endif /* __COMMON_H */ ocaml-text-0.8/src/encoding.ml000066400000000000000000000033321244105655100163510ustar00rootroot00000000000000(* * encoding.ml * ----------- * Copyright : (c) 2009, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) type t = string type code_point = int type decoder type encoder type decoding_result = | Dec_ok of code_point * int | Dec_need_more | Dec_error type encoding_result = | Enc_ok of int | Enc_need_more | Enc_error external init : unit -> t = "ml_text_init" external decoder : t -> decoder = "ml_text_decoder" external encoder : t -> encoder = "ml_text_encoder" external stub_decode : decoder -> string -> int -> int -> decoding_result = "ml_text_decode" external stub_encode : encoder -> string -> int -> int -> code_point -> encoding_result = "ml_text_encode" external stub_recode_string : t -> t -> string -> string = "ml_text_recode_string" let system = init () let decode decoder buf pos len = if pos < 0 || pos + len > String.length buf then invalid_arg "Encoding.decode" else stub_decode decoder buf pos len let encode decoder buf pos len code = if pos < 0 || pos + len > String.length buf then invalid_arg "Encoding.encode" else stub_encode decoder buf pos len code let equal a b = let len_a = String.length a and len_b = String.length b in let rec loop i = let end_of_a = i = len_a || (i + 2 <= len_a && a.[i] = '/' && a.[i + 1] = '/') and end_of_b = i = len_b || (i + 2 <= len_b && b.[i] = '/' && b.[i + 1] = '/') in if end_of_a && end_of_b then true else if end_of_a || end_of_b then false else if Char.lowercase a.[i] = Char.lowercase b.[i] then loop (i + 1) else false in loop 0 let recode_string ~src ~dst str = if equal src dst then str else stub_recode_string src dst str ocaml-text-0.8/src/encoding.mli000066400000000000000000000044121244105655100165220ustar00rootroot00000000000000(* * encoding.mli * ------------ * Copyright : (c) 2009, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) (** {6 Character encodings} *) type t = string (** Type of a character encoding *) val equal : t -> t -> bool (** [equal e1 e2] returns whether [e1] and [e2] denotes the same encoding. It does a caseless comparison of [e1] or [e2] without optionnal suffixes ("//IGNORE" or "//TRANSLIT"). *) val system : t (** The character encoding used by the system *) type code_point = int (** Type of a unicode code-point. *) val recode_string : src : t -> dst : t -> string -> string (** [recode_string ~src ~dst str] recode [str] from [src] encoding to [dst] encoding. *) (** {6 Decoding} *) type decoder (** Type of a decoder *) val decoder : t -> decoder (** Creates a decoder from an encoding-name *) (** Result of a decoding operation *) type decoding_result = | Dec_ok of code_point * int (** [Dec_ok(code, num)] means that the operation succeed. [code] is the unicode code-point read and [num] is the number of bytes read by the decoder. *) | Dec_need_more (** [Dec_not_finished] means that the input contains a not-terminated sequence *) | Dec_error (** [Dec_error] means that the input contains an invalid sequence *) val decode : decoder -> string -> int -> int -> decoding_result (** [decode decoder buffer ptr len] decodes with [decoder] bytes at position [ptr] in [buffer] *) (** {6 Encoding} *) type encoder (** Type of an encoder *) val encoder : t -> encoder (** Creates an encoder from an encoding-name *) (** Result of an encoding operation *) type encoding_result = | Enc_ok of int (** [Enc_ok num] means that the operation succeed and [num] bytes have been written. *) | Enc_need_more (** [Enc_need_more] means that there is not enough space in the output to ouput all bytes. *) | Enc_error (** [Enc_error] means that the given code-point cannot be encoded in the given encoding *) val encode : encoder -> string -> int -> int -> code_point -> encoding_result (** [encode decoder buffer ptr len code] encodes [code] with [encoder] at position [ptr] in [buffer] *) ocaml-text-0.8/src/encoding_bigarray.ml000066400000000000000000000015521244105655100202330ustar00rootroot00000000000000(* * encoding_bigarray.ml * -------------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) open Bigarray type byte_array = (char, int8_unsigned_elt, c_layout) Array1.t external stub_decode : Encoding.decoder -> byte_array -> int -> int -> Encoding.decoding_result = "ml_text_decode_bigarray" external stub_encode : Encoding.encoder -> byte_array -> int -> int -> Encoding.code_point -> Encoding.encoding_result = "ml_text_encode_bigarray" let decode decoder buf pos len = if pos < 0 || pos + len > Array1.dim buf then invalid_arg "Encoding_bigarray.decode" else stub_decode decoder buf pos len let encode decoder buf pos len code = if pos < 0 || pos + len > Array1.dim buf then invalid_arg "Encoding_bigarray.encode" else stub_encode decoder buf pos len code ocaml-text-0.8/src/encoding_bigarray.mli000066400000000000000000000010231244105655100203750ustar00rootroot00000000000000(* * encoding_bigarray.mli * --------------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) (** Encoding/decoding to/from bigarrays *) open Bigarray type byte_array = (char, int8_unsigned_elt, c_layout) Array1.t (** Type of array of bytes. *) val decode : Encoding.decoder -> byte_array -> int -> int -> Encoding.decoding_result val encode : Encoding.encoder -> byte_array -> int -> int -> Encoding.code_point -> Encoding.encoding_result ocaml-text-0.8/src/libbigarray_stubs.clib000066400000000000000000000001471244105655100205740ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: c88a58b3bacd1ec84bad4de7d87430a9) ml_text_bigarray.o # OASIS_STOP ocaml-text-0.8/src/libtext-bigarray_stubs.clib000066400000000000000000000001471244105655100215560ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: c88a58b3bacd1ec84bad4de7d87430a9) ml_text_bigarray.o # OASIS_STOP ocaml-text-0.8/src/libtext_stubs.clib000066400000000000000000000001361244105655100177560ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 4e640edfc439673dda97ff9ec3ddb65a) ml_text.o # OASIS_STOP ocaml-text-0.8/src/ml_text.c000066400000000000000000000206561244105655100160610ustar00rootroot00000000000000/* * ml_text.c * --------- * Copyright : (c) 2009, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. */ #define _ISOC99_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include /* There is no nl_langinfo on windows: */ #ifdef _WIN32 #include #else #include #endif #include "common.h" /* +-----------------------------------------------------------------+ | Custom block for iconv descriptors | +-----------------------------------------------------------------+ */ void ml_iconv_finalize(value cd) { iconv_close(Iconv_val(cd)); } int ml_iconv_compare(value v1, value v2) { return (int)((long)Iconv_val(v1) - (long)Iconv_val(v2)); } long ml_iconv_hash(value v) { return (long)Iconv_val(v); } static struct custom_operations ops = { "iconv", ml_iconv_finalize, ml_iconv_compare, ml_iconv_hash, custom_serialize_default, custom_deserialize_default }; /* +-----------------------------------------------------------------+ | Initialization | +-----------------------------------------------------------------+ */ /* This function returns the system encoding: */ CAMLprim value ml_text_init(value unit) { CAMLparam1(unit); /* Set the locale acording to environment variables: */ setlocale(LC_CTYPE, ""); setlocale(LC_COLLATE, ""); #ifdef _WIN32 /* Use codepage on windows */ char codeset[128]; sprintf(codeset, "CP%d", GetACP()); CAMLreturn(caml_copy_string(codeset)); #else /* Get the codeset used by current locale: */ char *codeset = nl_langinfo(CODESET); /* If the encoding cannot be determined, just use ascii: */ CAMLreturn(caml_copy_string(codeset ? codeset : "ASCII")); #endif } /* +-----------------------------------------------------------------+ | Decoding | +-----------------------------------------------------------------+ */ CAMLprim value ml_text_decoder(value enc) { CAMLparam1(enc); /* A decoder is an iconv descriptor from enc to UCS-4: */ iconv_t cd = iconv_open(NATIVE_UCS, String_val(enc)); if (cd == (iconv_t)-1) caml_failwith("Encoding.decoder: invalid encoding"); else { value result = caml_alloc_custom(&ops, sizeof(iconv_t), 0, 1); *(iconv_t*) Data_custom_val(result) = cd; CAMLreturn(result); } } CAMLprim value ml_text_decode(value cd_val, value buf_val, value pos_val, value len_val) { CAMLparam4(cd_val, buf_val, pos_val, len_val); uint32_t code; size_t len = Long_val(len_val); size_t in_left = len; char *in_bytes = String_val(buf_val) + Long_val(pos_val); size_t out_left = 4; char *out_bytes = (char*)&code; iconv(Iconv_val(cd_val), &in_bytes, &in_left, &out_bytes, &out_left); if (out_left == 0) { value result = caml_alloc_tuple(2); Store_field(result, 0, Val_int(code)); Store_field(result, 1, Val_int(len - in_left)); CAMLreturn(result); } else if (errno == EINVAL) CAMLreturn(Val_need_more); else CAMLreturn(Val_error); } /* +-----------------------------------------------------------------+ | Encoding | +-----------------------------------------------------------------+ */ CAMLprim value ml_text_encoder(value enc) { CAMLparam1(enc); /* A decoder is an iconv descriptor from UCS-4 to enc: */ iconv_t cd = iconv_open(String_val(enc), NATIVE_UCS); if (cd == (iconv_t)-1) caml_failwith("Encoding.encoder: invalid encoding"); else { value result = caml_alloc_custom(&ops, sizeof(iconv_t), 0, 1); *(iconv_t*) Data_custom_val(result) = cd; CAMLreturn(result); } } CAMLprim value ml_text_encode(value cd_val, value buf_val, value pos_val, value len_val, value code_val) { CAMLparam5(cd_val, buf_val, pos_val, len_val, code_val); uint32_t code = Int_val(code_val); size_t len = Long_val(len_val); size_t in_left = 4; char *in_bytes = (char*)&code; size_t out_left = len; char *out_bytes = String_val(buf_val) + Long_val(pos_val); iconv(Iconv_val(cd_val), &in_bytes, &in_left, &out_bytes, &out_left); if (in_left == 0) { value result = caml_alloc_tuple(1); Store_field(result, 0, Val_int(len - out_left)); CAMLreturn(result); } else if (errno == E2BIG) CAMLreturn(Val_need_more); else CAMLreturn(Val_error); } /* +-----------------------------------------------------------------+ | Character utilities | +-----------------------------------------------------------------+ */ value ml_text_upper(value ch) { return Val_int(towupper(Int_val(ch))); } value ml_text_lower(value ch) { return Val_int(towlower(Int_val(ch))); } #define IS(name) value ml_text_is_##name(value ch) { return Val_bool(isw##name(Int_val(ch))); } IS(alnum) IS(alpha) IS(blank) IS(cntrl) IS(digit) IS(graph) IS(lower) IS(print) IS(punct) IS(space) IS(upper) IS(xdigit) /* +-----------------------------------------------------------------+ | Text comparison | +-----------------------------------------------------------------+ */ CAMLprim value ml_text_compare(value s1, value s2) { CAMLparam2(s1, s2); int res = strcoll(String_val(s1), String_val(s2)); if (res < 0) CAMLreturn(Val_int(-1)); else if (res > 0) CAMLreturn(Val_int(1)); else CAMLreturn(Val_int(0)); } /* +-----------------------------------------------------------------+ | String recoding | +-----------------------------------------------------------------+ */ CAMLprim value ml_text_recode_string(value enc_src, value enc_dst, value str) { CAMLparam3(str, enc_src, enc_dst); CAMLlocal1(result); iconv_t cd = iconv_open(String_val(enc_dst), String_val(enc_src)); if (cd == (iconv_t)-1) caml_failwith("Encoding.recode_string: invalid encoding"); /* Length of the output buffer. It is initialised to the length of the input string, which should be a good approximation: */ size_t len = caml_string_length(str); /* Pointer to the beginning of the output buffer. The +1 is for the NULL terminating byte. */ char *dst_buffer = malloc(len + 1); if (dst_buffer == NULL) caml_failwith("Encoding.recode_string: out of memory"); /* iconv arguments */ char *src_bytes = String_val(str); char *dst_bytes = dst_buffer; size_t src_remaining = len; size_t dst_remaining = len; while (src_remaining) { size_t count = iconv (cd, &src_bytes, &src_remaining, &dst_bytes, &dst_remaining); if (count == (size_t) -1) { switch (errno) { case EILSEQ: free(dst_buffer); iconv_close(cd); caml_failwith("Encoding.recode_string: invalid multibyte sequence found in the input"); case EINVAL: free(dst_buffer); iconv_close(cd); caml_failwith("Encoding.recode_string: incomplete multibyte sequence found in the input"); case E2BIG: { /* Ouput offest relative to the beginning of the destination buffer: */ size_t offset = dst_bytes - dst_buffer; /* Try with a buffer 2 times bigger: */ len *= 2; dst_buffer = realloc(dst_buffer, len + 1); if (dst_buffer == NULL) caml_failwith("Encoding.recode_string: out of memory"); dst_bytes = dst_buffer + offset; dst_remaining += len; break; } default: free(dst_buffer); iconv_close(cd); caml_failwith("Encoding.recode_string: unknown error"); } } }; *dst_bytes = 0; result = caml_alloc_string(dst_bytes - dst_buffer); memcpy(String_val(result), dst_buffer, dst_bytes - dst_buffer); /* Clean-up */ free(dst_buffer); iconv_close(cd); CAMLreturn(result); } /* +-----------------------------------------------------------------+ | Text normalization | +-----------------------------------------------------------------+ */ CAMLprim value ml_text_strxfrm(value string) { CAMLparam1(string); size_t length = strxfrm(NULL, String_val(string), 0); char buffer[length + 1]; strxfrm(buffer, String_val(string), length + 1); CAMLreturn(caml_copy_string(buffer)); } ocaml-text-0.8/src/ml_text_bigarray.c000066400000000000000000000034321244105655100177320ustar00rootroot00000000000000/* * ml_text_bigarray.c * ------------------ * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. */ #include #include #include #include #include #include #include #include #include #include "common.h" CAMLprim value ml_text_decode_bigarray(value cd_val, value buf_val, value pos_val, value len_val) { CAMLparam4(cd_val, buf_val, pos_val, len_val); uint32_t code; size_t len = Long_val(len_val); size_t in_left = len; char *in_bytes = (char*)Caml_ba_data_val(buf_val) + Long_val(pos_val); size_t out_left = 4; char *out_bytes = (char*)&code; iconv(Iconv_val(cd_val), &in_bytes, &in_left, &out_bytes, &out_left); if (out_left == 0) { value result = caml_alloc_tuple(2); Store_field(result, 0, Val_int(code)); Store_field(result, 1, Val_int(len - in_left)); CAMLreturn(result); } else if (errno == EINVAL) CAMLreturn(Val_need_more); else CAMLreturn(Val_error); } CAMLprim value ml_text_encode_bigarray(value cd_val, value buf_val, value pos_val, value len_val, value code_val) { CAMLparam5(cd_val, buf_val, pos_val, len_val, code_val); uint32_t code = Int_val(code_val); size_t len = Long_val(len_val); size_t in_left = 4; char *in_bytes = (char*)&code; size_t out_left = len; char *out_bytes = (char*)Caml_ba_data_val(buf_val) + Long_val(pos_val); iconv(Iconv_val(cd_val), &in_bytes, &in_left, &out_bytes, &out_left); if (in_left == 0) { value result = caml_alloc_tuple(1); Store_field(result, 0, Val_int(len - out_left)); CAMLreturn(result); } else if (errno == E2BIG) CAMLreturn(Val_need_more); else CAMLreturn(Val_error); } ocaml-text-0.8/src/pcre.mldylib000066400000000000000000000001361244105655100165370ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 2d5d79d2f665014686840169f9f5f4f3) Text_pcre # OASIS_STOP ocaml-text-0.8/src/pcre.mllib000066400000000000000000000001361244105655100162020ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 2d5d79d2f665014686840169f9f5f4f3) Text_pcre # OASIS_STOP ocaml-text-0.8/src/text-bigarray.mldylib000066400000000000000000000001461244105655100203710ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 16470106de26238ac006aa6f5cacf6c5) Encoding_bigarray # OASIS_STOP ocaml-text-0.8/src/text-bigarray.mllib000066400000000000000000000001461244105655100200340ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 16470106de26238ac006aa6f5cacf6c5) Encoding_bigarray # OASIS_STOP ocaml-text-0.8/src/text-pcre.mldylib000066400000000000000000000001361244105655100175210ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 2d5d79d2f665014686840169f9f5f4f3) Text_pcre # OASIS_STOP ocaml-text-0.8/src/text-pcre.mllib000066400000000000000000000001361244105655100171640ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 2d5d79d2f665014686840169f9f5f4f3) Text_pcre # OASIS_STOP ocaml-text-0.8/src/text.ml000066400000000000000000000636331244105655100155610ustar00rootroot00000000000000(* * text.ml * ------- * Copyright : (c) 2009, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) type t = string exception Invalid of string * string let fail str pos msg = raise (Invalid(Printf.sprintf "at position %d: %s" pos msg, str)) (* +-----------------------------------------------------------------+ | Unsafe primitives (use with caution!) | +-----------------------------------------------------------------+ *) let byte str i = Char.code (String.unsafe_get str i) let set_byte str i n = Bytes.unsafe_set str i (Char.unsafe_chr n) let unsafe_sub str ofs len = let res = Bytes.create len in String.unsafe_blit str ofs res 0 len; Bytes.unsafe_to_string res (* +-----------------------------------------------------------------+ | UTF-8 validation | +-----------------------------------------------------------------+ *) let check s = let fail i msg = Some(Printf.sprintf "at position %d: %s" i msg) in let len = String.length s in let rec main i = if i = len then None else let ch = String.unsafe_get s i in match ch with | '\x00' .. '\x7f' -> main (i + 1) | '\xc0' .. '\xdf' -> if i + 1 >= len then fail len "premature end of UTF8 sequence" else begin let byte1 = Char.code (String.unsafe_get s (i + 1)) in if byte1 land 0xc0 != 0x80 then fail (i + 1) "malformed UTF8 sequence" else if ((Char.code ch land 0x1f) lsl 6) lor (byte1 land 0x3f) < 0x80 then fail i "overlong UTF8 sequence" else main (i + 2) end | '\xe0' .. '\xef' -> if i + 2 >= len then fail len "premature end of UTF8 sequence" else begin let byte1 = Char.code (String.unsafe_get s (i + 1)) and byte2 = Char.code (String.unsafe_get s (i + 2)) in if byte1 land 0xc0 != 0x80 then fail (i + 1) "malformed UTF8 sequence" else if byte2 land 0xc0 != 0x80 then fail (i + 2) "malformed UTF8 sequence" else if ((Char.code ch land 0x0f) lsl 12) lor ((byte1 land 0x3f) lsl 6) lor (byte2 land 0x3f) < 0x800 then fail i "overlong UTF8 sequence" else main (i + 3) end | '\xf0' .. '\xf7' -> if i + 3 >= len then fail len "premature end of UTF8 sequence" else begin let byte1 = Char.code (String.unsafe_get s (i + 1)) and byte2 = Char.code (String.unsafe_get s (i + 2)) and byte3 = Char.code (String.unsafe_get s (i + 3)) in if byte1 land 0xc0 != 0x80 then fail (i + 1) "malformed UTF8 sequence" else if byte2 land 0xc0 != 0x80 then fail (i + 2) "malformed UTF8 sequence" else if byte3 land 0xc0 != 0x80 then fail (i + 3) "malformed UTF8 sequence" else if ((Char.code ch land 0x07) lsl 18) lor ((byte1 land 0x3f) lsl 12) lor ((byte2 land 0x3f) lsl 6) lor (byte3 land 0x3f) < 0x10000 then fail i "overlong UTF8 sequence" else main (i + 4) end | _ -> fail i "invalid start of UTF8 sequence" in main 0 let invalid str = match check str with | None -> raise (Invalid("", str)) | Some msg -> raise (Invalid(msg, str)) let validate str = match check str with | None -> () | Some msg -> raise (Invalid(msg, str)) (* +-----------------------------------------------------------------+ | Encoding/decoding | +-----------------------------------------------------------------+ *) let sys_encoding = Encoding.system ^ "//TRANSLIT" let encode ?(encoding=sys_encoding) txt = Encoding.recode_string ~src:"UTF-8" ~dst:encoding txt let decode ?(encoding=sys_encoding) txt = Encoding.recode_string ~src:encoding ~dst:"UTF-8" txt let to_ascii txt = encode ~encoding:"ASCII//TRANSLIT" txt (* +-----------------------------------------------------------------+ | Unsafe UTF-8 naviguation | +-----------------------------------------------------------------+ *) let unsafe_next str ofs = match String.unsafe_get str ofs with | '\x00' .. '\x7f' -> ofs + 1 | '\xc0' .. '\xdf' -> if ofs + 2 > String.length str then fail str ofs "unterminated UTF-8 sequence" else ofs + 2 | '\xe0' .. '\xef' -> if ofs + 3 > String.length str then fail str ofs "unterminated UTF-8 sequence" else ofs + 3 | '\xf0' .. '\xf7' -> if ofs + 4 > String.length str then fail str ofs "unterminated UTF-8 sequence" else ofs + 4 | _ -> fail str ofs "invalid start of UTF-8 sequence" let unsafe_prev str ofs = match String.unsafe_get str (ofs - 1) with | '\x00' .. '\x7f' -> ofs - 1 | '\x80' .. '\xbf' -> if ofs >= 2 then match String.unsafe_get str (ofs - 2) with | '\xc0' .. '\xdf' -> ofs - 2 | '\x80' .. '\xbf' -> if ofs >= 3 then match String.unsafe_get str (ofs - 3) with | '\xe0' .. '\xef' -> ofs - 3 | '\x80' .. '\xbf' -> if ofs >= 4 then match String.unsafe_get str (ofs - 4) with | '\xf0' .. '\xf7' -> ofs - 4 | _ -> fail str (ofs - 4) "invalid start of UTF-8 sequence" else fail str (ofs - 3) "invalid start of UTF-8 string" | _ -> fail str (ofs - 3) "invalid middle of UTF-8 sequence" else fail str (ofs - 2) "invaild start of UTF-8 string" | _ -> fail str (ofs - 2) "invalid middle of UTF-8 sequence" else fail str (ofs - 1) "invalid start of UTF-8 string" | _ -> fail str (ofs - 1) "invalid end of UTF-8 sequence" let unsafe_extract_next str ofs = let ch = String.unsafe_get str ofs in match ch with | '\x00' .. '\x7f' -> (Char.code ch, ofs + 1) | '\xc0' .. '\xdf' -> if ofs + 2 > String.length str then fail str ofs "unterminated UTF-8 sequence" else (((Char.code ch land 0x1f) lsl 6) lor (byte str (ofs + 1) land 0x3f), ofs + 2) | '\xe0' .. '\xef' -> if ofs + 3 > String.length str then fail str ofs "unterminated UTF-8 sequence" else (((Char.code ch land 0x0f) lsl 12) lor ((byte str (ofs + 1) land 0x3f) lsl 6) lor (byte str (ofs + 2) land 0x3f), ofs + 3) | '\xf0' .. '\xf7' -> if ofs + 4 > String.length str then fail str ofs "unterminated UTF-8 sequence" else (((Char.code ch land 0x07) lsl 18) lor ((byte str (ofs + 1) land 0x3f) lsl 12) lor ((byte str (ofs + 2) land 0x3f) lsl 6) lor (byte str (ofs + 3) land 0x3f), ofs + 4) | _ -> fail str ofs "invalid start of UTF-8 sequence" (* +-----------------------------------------------------------------+ | Pointers | +-----------------------------------------------------------------+ *) type pointer = { txt : t; (* The text to which we are pointing *) ofs : int; (* The position in bytes *) } let pointer_l txt = { txt = txt; ofs = 0 } let pointer_r txt = { txt = txt; ofs = String.length txt } let offset ptr = ptr.ofs let next ptr = let len = String.length ptr.txt in if ptr.ofs = len then None else let ofs = unsafe_next ptr.txt ptr.ofs in Some(unsafe_sub ptr.txt ptr.ofs (ofs - ptr.ofs), { ptr with ofs = ofs }) let prev ptr = if ptr.ofs = 0 then None else let ofs = unsafe_prev ptr.txt ptr.ofs in Some(unsafe_sub ptr.txt ofs (ptr.ofs - ofs), { ptr with ofs = ofs }) let rec move_offset_l str ofs len = if len = 0 then ofs else if ofs = String.length str then invalid_arg "Text.move" else move_offset_l str (unsafe_next str ofs) (len - 1) let rec move_offset_r str ofs len = if len = 0 then ofs else if ofs = 0 then invalid_arg "Text.move" else move_offset_r str (unsafe_prev str ofs) (len + 1) let move_offset str ofs len = if len = 0 then ofs else if len > 0 then move_offset_l str ofs len else move_offset_r str ofs len let move len ptr = { ptr with ofs = move_offset ptr.txt ptr.ofs len } let chunk a b = if a.txt != b.txt then invalid_arg "Text.chunk" else if a.ofs < b.ofs then unsafe_sub a.txt a.ofs (b.ofs - a.ofs) else unsafe_sub a.txt b.ofs (a.ofs - b.ofs) let offset_at txt idx = if idx < 0 then move_offset_r txt (String.length txt) idx else move_offset_l txt 0 idx let pointer_at txt idx = { txt = txt; ofs = offset_at txt idx } let rec position_rec ptr ofs pos = if ofs >= ptr.ofs then pos else position_rec ptr (unsafe_next ptr.txt ofs) (pos + 1) let position ptr = position_rec ptr 0 0 let rec ptr_equal_at_aux txt sub ofs = function | -1 -> true | n -> byte txt (ofs + n) = byte sub n && ptr_equal_at_aux txt sub ofs (n - 1) let ptr_equal_at txt ofs sub len sub_len = if ofs + sub_len > len then false else ptr_equal_at_aux txt sub ofs (sub_len - 1) let equal_at ptr sub = ptr_equal_at ptr.txt ptr.ofs sub (String.length ptr.txt) (String.length sub) (* +-----------------------------------------------------------------+ | High-level functions | +-----------------------------------------------------------------+ *) let rec length_rec str ofs len = if ofs = String.length str then len else length_rec str (unsafe_next str ofs) (len + 1) let length str = length_rec str 0 0 let code str = if str = "" then invalid_arg "Text.code" else let ch = String.unsafe_get str 0 in match ch with | '\x00' .. '\x7f' -> Char.code ch | '\xc0' .. '\xdf' -> if 2 > String.length str then fail str 0 "unterminated UTF-8 sequence" else ((Char.code ch land 0x1f) lsl 6) lor (byte str 1 land 0x3f) | '\xe0' .. '\xef' -> if 3 > String.length str then fail str 0 "unterminated UTF-8 sequence" else ((Char.code ch land 0x0f) lsl 12) lor ((byte str 1 land 0x3f) lsl 6) lor (byte str 2 land 0x3f) | '\xf0' .. '\xf7' -> if 4 > String.length str then fail str 0 "unterminated UTF-8 sequence" else ((Char.code ch land 0x07) lsl 18) lor ((byte str 1 land 0x3f) lsl 12) lor ((byte str 2 land 0x3f) lsl 6) lor (byte str 3 land 0x3f) | _ -> fail str 0 "invalid start of UTF-8 sequence" let char code = if code < 0 then invalid_arg "Text.char" else if code < 0x80 then begin let s = Bytes.create 1 in set_byte s 0 code; Bytes.unsafe_to_string s end else if code <= 0x800 then begin let s = Bytes.create 2 in set_byte s 0 ((code lsr 6) lor 0xc0); set_byte s 1 ((code land 0x3f) lor 0x80); Bytes.unsafe_to_string s end else if code <= 0x10000 then begin let s = Bytes.create 3 in set_byte s 0 ((code lsr 12) lor 0xe0); set_byte s 1 (((code lsr 6) land 0x3f) lor 0x80); set_byte s 2 ((code land 0x3f) lor 0x80); Bytes.unsafe_to_string s end else if code <= 0x10ffff then begin let s = Bytes.create 4 in set_byte s 0 ((code lsr 18) lor 0xf0); set_byte s 1 (((code lsr 12) land 0x3f) lor 0x80); set_byte s 2 (((code lsr 6) land 0x3f) lor 0x80); set_byte s 3 ((code land 0x3f) lor 0x80); Bytes.unsafe_to_string s end else invalid_arg "Text.char" let get txt idx = let ofs = offset_at txt idx in if ofs = String.length txt then invalid_arg "Text.get" else let ofs' = unsafe_next txt ofs in unsafe_sub txt ofs (ofs' - ofs) let sub txt idx len = let a = offset_at txt idx in let b = move_offset txt a len in if a > b then unsafe_sub txt b (a - b) else unsafe_sub txt a (b - a) let slice txt a b = let a = offset_at txt a and b = offset_at txt b in if a > b then invalid_arg "Text.slice" else unsafe_sub txt a (b - a) let splice txt a b repl = let a = offset_at txt a and b = offset_at txt b in if a > b then invalid_arg "Text.slice" else begin let res = Bytes.create (a + String.length repl + String.length txt - b) in String.unsafe_blit txt 0 res 0 a; String.unsafe_blit repl 0 res a (String.length repl); String.unsafe_blit txt b res (a + String.length repl) (String.length txt - b); Bytes.unsafe_to_string res end let repeat n txt = let len = String.length txt in let res = Bytes.create (n * len) in let ofs = ref 0 in for i = 1 to n do String.unsafe_blit txt 0 res !ofs len; ofs := !ofs + len done; Bytes.unsafe_to_string res let rec iter_rec f txt ofs = if ofs <> String.length txt then begin let ofs' = unsafe_next txt ofs in f (unsafe_sub txt ofs (ofs' - ofs)); iter_rec f txt ofs' end let iter f txt = iter_rec f txt 0 let rec rev_iter_rec f txt ofs = if ofs <> 0 then begin let ofs' = unsafe_prev txt ofs in f (unsafe_sub txt ofs' (ofs - ofs')); rev_iter_rec f txt ofs' end let rev_iter f txt = rev_iter_rec f txt (String.length txt) let rev txt = let len = String.length txt in let ofs_src = ref len and ofs_dst = ref 0 in let res = Bytes.create len in while !ofs_src > 0 do let ofs = unsafe_prev txt !ofs_src in let len = !ofs_src - ofs in String.unsafe_blit txt ofs res !ofs_dst len; ofs_src := ofs; ofs_dst := !ofs_dst + len done; Bytes.unsafe_to_string res let init n f = let buf = Buffer.create n in for i = 0 to n - 1 do Buffer.add_string buf (f i) done; Buffer.contents buf let rev_init n f = let buf = Buffer.create n in for i = n - 1 downto 0 do Buffer.add_string buf (f i) done; Buffer.contents buf let concat sep l = match l with | [] -> "" | x :: l -> let sep_len = String.length sep in let len = List.fold_left (fun len str -> len + sep_len + String.length str) (String.length x) l in let res = Bytes.create len in String.unsafe_blit x 0 res 0 (String.length x); ignore (List.fold_left (fun ofs str -> String.unsafe_blit sep 0 res ofs sep_len; let ofs = ofs + sep_len in let len = String.length str in String.unsafe_blit str 0 res ofs len; ofs + len) (String.length x) l); Bytes.unsafe_to_string res let rev_concat sep l = match l with | [] -> "" | x :: l -> let sep_len = String.length sep in let len = List.fold_left (fun len str -> len + sep_len + String.length str) (String.length x) l in let res = Bytes.create len in let ofs = len - String.length x in String.unsafe_blit x 0 res ofs (String.length x); ignore (List.fold_left (fun ofs str -> let ofs = ofs - sep_len in String.unsafe_blit sep 0 res ofs sep_len; let len = String.length str in let ofs = ofs - len in String.unsafe_blit str 0 res ofs len; ofs) ofs l); Bytes.unsafe_to_string res let explode txt = let l = ref [] in rev_iter (fun t -> l := t :: !l) txt; !l let rev_explode txt = let l = ref [] in iter (fun t -> l := t :: !l) txt; !l let implode l = concat "" l let rev_implode l = rev_concat "" l let map f txt = let buf = Buffer.create (String.length txt) in iter (fun ch -> Buffer.add_string buf (f ch)) txt; Buffer.contents buf let rev_map f txt = let buf = Buffer.create (String.length txt) in rev_iter (fun ch -> Buffer.add_string buf (f ch)) txt; Buffer.contents buf let fold f txt acc = let acc = ref acc in iter (fun ch -> acc := f ch !acc) txt; !acc let rev_fold f txt acc = let acc = ref acc in rev_iter (fun ch -> acc := f ch !acc) txt; !acc let filter f txt = map (fun ch -> if f ch then ch else "") txt let rev_filter f txt = rev_map (fun ch -> if f ch then ch else "") txt let rec for_all_rec f txt ofs = if ofs = String.length txt then true else begin let ofs' = unsafe_next txt ofs in f (unsafe_sub txt ofs (ofs' - ofs)) && for_all_rec f txt ofs' end let for_all f txt = for_all_rec f txt 0 let rec exists_rec f txt ofs = if ofs = String.length txt then false else begin let ofs' = unsafe_next txt ofs in f (unsafe_sub txt ofs (ofs' - ofs)) || exists_rec f txt ofs' end let exists f txt = exists_rec f txt 0 let count f txt = let c = ref 0 in iter (fun ch -> if f ch then incr c) txt; !c (* +-----------------------------------------------------------------+ | Character class | +-----------------------------------------------------------------+ *) external ml_is_alnum : int -> bool = "ml_text_is_alnum" external ml_is_alpha : int -> bool = "ml_text_is_alpha" external ml_is_blank : int -> bool = "ml_text_is_blank" external ml_is_cntrl : int -> bool = "ml_text_is_cntrl" external ml_is_digit : int -> bool = "ml_text_is_digit" external ml_is_graph : int -> bool = "ml_text_is_graph" external ml_is_lower : int -> bool = "ml_text_is_lower" external ml_is_print : int -> bool = "ml_text_is_print" external ml_is_punct : int -> bool = "ml_text_is_punct" external ml_is_space : int -> bool = "ml_text_is_space" external ml_is_upper : int -> bool = "ml_text_is_upper" external ml_is_xdigit : int -> bool = "ml_text_is_xdigit" let rec for_all_code_rec f txt ofs = if ofs = String.length txt then true else begin let code, ofs' = unsafe_extract_next txt ofs in f code && for_all_code_rec f txt ofs' end let for_all_code f txt = for_all_code_rec f txt 0 let is_ascii s = let rec loop = function | -1 -> true | i -> byte s i < 128 && loop (i - 1) in loop (String.length s - 1) let is_alnum = for_all_code ml_is_alnum let is_alpha = for_all_code ml_is_alpha let is_blank = for_all_code ml_is_blank let is_cntrl = for_all_code ml_is_cntrl let is_digit = for_all_code ml_is_digit let is_graph = for_all_code ml_is_graph let is_lower = for_all_code ml_is_lower let is_print = for_all_code ml_is_print let is_punct = for_all_code ml_is_punct let is_space = for_all_code ml_is_space let is_upper = for_all_code ml_is_upper let is_xdigit = for_all_code ml_is_xdigit (* +-----------------------------------------------------------------+ | Searching, Splitting, ... | +-----------------------------------------------------------------+ *) let words txt = let rec loop ptr = match next ptr with | Some(ch, ptr') -> if is_punct ch || is_space ch then loop ptr' else loop_word ptr ptr' | None -> [] and loop_word ptr_start ptr = match next ptr with | Some(ch, ptr') -> if is_punct ch || is_space ch then chunk ptr_start ptr :: loop ptr' else loop_word ptr_start ptr' | None -> [chunk ptr_start ptr] in loop (pointer_l txt) let lines txt = let rec loop start_ptr ptr = match next ptr with | Some("\n", ptr') -> chunk start_ptr ptr :: loop ptr' ptr' | Some("\r", ptr') -> begin match next ptr' with | Some("\n", ptr') -> chunk start_ptr ptr :: loop ptr' ptr' | Some(ch, ptr') -> loop start_ptr ptr' | None -> match chunk start_ptr ptr with | "" -> [] | t -> [t] end | Some(ch, ptr) -> loop start_ptr ptr | None -> match chunk start_ptr ptr with | "" -> [] | t -> [t] in let ptr = pointer_l txt in loop ptr ptr let split ?(max=max_int) ?(sep=" ") txt = let len = String.length txt and sep_len = String.length sep in let rec loop ofs = function | 0 -> [] | 1 -> [String.sub txt ofs (len - ofs)] | rem -> loop_word ofs ofs rem and loop_word start_ofs ofs rem = if ofs = len then [String.sub txt start_ofs (ofs - start_ofs)] else if ptr_equal_at txt ofs sep len sep_len then String.sub txt start_ofs (ofs - start_ofs) :: loop (ofs + sep_len) (rem - 1) else loop_word start_ofs (unsafe_next txt ofs) rem in if sep = "" then explode txt else loop 0 max let rev_split ?(max=max_int) ?(sep=" ") txt = let len = String.length txt and sep_len = String.length sep in let rec loop acc ofs = function | 0 -> acc | 1 -> String.sub txt 0 ofs :: acc | rem -> loop_word acc ofs ofs rem and loop_word acc ofs end_ofs rem = if ofs = 0 then String.sub txt 0 end_ofs :: acc else let ofs = unsafe_prev txt ofs in if ptr_equal_at txt ofs sep len sep_len then loop (String.sub txt (ofs + sep_len) (end_ofs - ofs - sep_len) :: acc) ofs (rem - 1) else loop_word acc ofs end_ofs rem in if sep = "" then explode txt else loop [] len max let find ?from txt patt = let len = String.length txt and patt_len = String.length patt in let rec loop ofs = if ofs + patt_len > len then None else if ptr_equal_at txt ofs patt len patt_len then Some{ txt = txt; ofs = ofs } else loop (unsafe_next txt ofs) in loop (match from with | Some ptr -> ptr.ofs | None -> 0) let rev_find ?from txt patt = let len = String.length txt and patt_len = String.length patt in let rec loop ofs = if ofs < 0 then None else if ptr_equal_at txt ofs patt len patt_len then Some{ txt = txt; ofs = ofs } else loop (unsafe_prev txt ofs) in loop (match from with | Some ptr -> ptr.ofs | None -> len) let replace text ~patt ~repl = let len = String.length text and patt_len = String.length patt in let res = Buffer.create len in let rec loop ofs_start ofs = if ofs = len then begin Buffer.add_substring res text ofs_start (ofs - ofs_start); Buffer.contents res end else if ptr_equal_at text ofs patt len patt_len then begin Buffer.add_substring res text ofs_start (ofs - ofs_start); Buffer.add_string res repl; let ofs = ofs + patt_len in loop ofs ofs end else loop ofs_start (unsafe_next text ofs) in match patt, text with | "", "" -> repl | "", _ -> concat "" [repl; concat repl (explode text); repl] | _ -> loop 0 0 let contains txt = function | "" -> true | sub -> let len = String.length txt and sub_len = String.length sub in let rec loop ofs = if ofs = len then false else ptr_equal_at txt ofs sub len sub_len || loop (unsafe_next txt ofs) in loop 0 let starts_with txt sub = equal_at (pointer_l txt) sub let ends_with txt sub = let idx = length txt - length sub in if idx < 0 then false else equal_at (pointer_at txt idx) sub let strip ?(chars=[" "; "\t"; "\r"; "\n"]) txt = let rec loop_a a = match next a with | Some(t, a) when List.mem t chars -> loop_a a | _ -> a in let a = loop_a (pointer_l txt) in let rec loop_b b = if b.ofs = a.ofs then b else match prev b with | Some(t, b) when List.mem t chars -> loop_b b | _ -> b in let b = loop_b (pointer_r txt) in chunk a b let rstrip ?(chars=[" "; "\t"; "\r"; "\n"]) txt = let rec loop_b b = match prev b with | Some(t, b) when List.mem t chars -> loop_b b | _ -> chunk (pointer_l txt) b in loop_b (pointer_r txt) let lstrip ?(chars=[" "; "\t"; "\r"; "\n"]) txt = let rec loop_a a = match next a with | Some(t, a) when List.mem t chars -> loop_a a | _ -> chunk a (pointer_r txt) in loop_a (pointer_l txt) let rchop = function | "" -> "" | txt -> let len = String.length txt in let ofs = unsafe_prev txt len in unsafe_sub txt 0 ofs let lchop = function | "" -> "" | txt -> let len = String.length txt in let ofs = unsafe_next txt 0 in unsafe_sub txt ofs (len - ofs) (* +-----------------------------------------------------------------+ | Upper/lower casing | +-----------------------------------------------------------------+ *) let rec map_code_rec f buf txt ofs = if ofs = String.length txt then Buffer.contents buf else begin let code, ofs = unsafe_extract_next txt ofs in Buffer.add_string buf (char (f code)); map_code_rec f buf txt ofs end let map_code f txt = let buf = Buffer.create (String.length txt) in map_code_rec f buf txt 0 external ml_upper : int -> int = "ml_text_upper" external ml_lower : int -> int = "ml_text_lower" let upper = map_code ml_upper let lower = map_code ml_lower let map_first_code f = function | "" -> "" | txt -> let code, ofs = unsafe_extract_next txt 0 in char (f code) ^ unsafe_sub txt ofs (String.length txt - ofs) let capitalize = map_first_code ml_upper let uncapitalize = map_first_code ml_lower (* +-----------------------------------------------------------------+ | Comparison | +-----------------------------------------------------------------+ *) external ml_compare : string -> string -> int = "ml_text_compare" let compare t1 t2 = ml_compare (encode t1) (encode t2) let icompare t1 t2 = ml_compare (encode (lower t1)) (encode (lower t2)) external ml_transform : t -> t = "ml_text_strxfrm" let transform str = ml_transform (encode str) ocaml-text-0.8/src/text.mldylib000066400000000000000000000001421244105655100165670ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 8471c49331aeb032ffe7f20b5509e5d1) Text Encoding # OASIS_STOP ocaml-text-0.8/src/text.mli000066400000000000000000000323111244105655100157170ustar00rootroot00000000000000(* * text.mli * -------- * Copyright : (c) 2009, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) (** UTF-8 encoded strings *) (** This modules is intended for ``text'' manipulation. By text we mean sequence of unicode characters. For compatibility and simplicity reasons, text is represented by UTF-8 encoded strings, and there is no special types for unicode characters, whose are just represented by 1-length text. All functions of this module expect to by applied on valid UTF-8 encoded strings, and may raise [Invalid] if this is not the case. *) type t = string (** Type of text *) exception Invalid of string * string (** [Invalid(error, text)] Exception raised when an invalid UTF-8 encoded string is encountered. [text] is the faulty text and [error] is a description of the first error in [text]. *) val check : string -> string option (** [check str] checks that [str] is a valid UTF-8 encoded string. Returns [None] if it is the case, or [Some error] otherwise. *) val validate : string -> unit (** Same as check but raises an exception in case the argument is not a valid text. *) (** {6 Encoding/decoding} *) val encode : ?encoding : Encoding.t -> t -> string (** [encode ?encoding txt] encode the given text with [encoding], which defaults to {!Encoding.system} plus transliteration. *) val decode : ?encoding : Encoding.t -> string -> t (** [decode ?encoding str] decode the given string encoded in [encoding], which defaults to {!Encoding.system} *) val to_ascii : t -> t (** [to_ascii txt] returns an approximative ascii version of [txt]. This is the same as [encode ~encoding:"ASCII//TRANSLIT" txt] *) (** {6 Informations} *) val length : t -> int (** Return the number of unicode character contained in the given text *) (** {6 Construction/access} *) val code : t -> int (** [code text] returns the unicode code-point of first character of [text]. For example: - [code "A" = 65] - [code "é" = 0xe9] *) val char : int -> t (** [char code] returns the character corresponding to the given unicode code-point. For example: - [char 65 = "A"] - [char 0xe9 = "é"] @raise Invalid_argument if [code] is not a valid unicode code-point. Valid code-point are all integers in the range [0..0x10ffff]. *) val get : t -> int -> t (** [get text n] returns the [n]-th character of [text]. [n] is a number of unicode character, not bytes. A negative value is interpreted as a position from the end of the text. For example: - [get "abc" 0 = "a"] - [get "abc" 2 = "c"] - [get "aéb" 1 = "é"] - [get "aéb" 2 = "b"] - [nth "abc" (-1) = "c"] *) val sub : t -> int -> int -> t (** [sub text pos len] Returns the sub-text of [text] starting at position [pos] and of length [len]. [pos] and/or [len] may be negative. For example: - [sub "ocaml" 1 2 = "ca"] - [sub "ocaml" 3 (-2) = "ca"] - [sub "ocaml" (-2) 1 = "m"] *) val slice : t -> int -> int -> t (** [slice text a b] returns the text contained in [txt] between [a] and [b] (exlusive). [a] and/or [b] may be negative. For example: - [slice "abc" 0 1 = "a"] - [slice "abcdef" 1 (-1) = "bcde"] *) val splice : t -> int -> int -> t -> t (** [splice text a b repl] replace the text between [a] and [b] (exclusive) by [repl]. For example: - [splice "abcd" 1 2 "plop" = "aplopcd"] - [splice "abcd" 1 2 "" = "acd"] *) val repeat : int -> t -> t (** [repeat n text] returns [text] concatened [n]-times with itself. *) val init : int -> (int -> t) -> t (** [init n f] returns [f 0 ^ f 1 ^ ... ^ f (n - 1)] *) val rev_init : int -> (int -> t) -> t (** [rev_init n f] returns [f (n - 1) ^ f 1 ^ ... ^ f 0] *) (** {6 Locale specific functions} *) val upper : t -> t (** [upper t] returns the upper-cased version of [t]. *) val lower : t -> t (** [lower t] returns the upper-cased version of [t]. *) val capitalize : t -> t (** [capitalize t] returns [t] with its first letter upper-cased *) val uncapitalize : t -> t (** [capitalize t] returns [t] with its first letter lower-cased *) val compare : t -> t -> int (** Compares two texts according to the current locale *) val icompare : t -> t -> int (** Compares two texts, case-insensitive *) val transform : t -> t (** [transform str] transforms [str] in a way such that comparing two string [str1] and [str2] transformed with [Pervasives.compare] give the same result as comparing them with {!compare}. *) (** {6 Transformations} *) val rev : t -> t (** [rev t] returns the sequence of characters of [t] in reverse order. For example: - [rev "ocaml" = "lmaco"] - [rev "héhé" = "éhéh"] *) val concat : t -> t list -> t (** [concat sep l] returns the concatenation of all texts contained in [l], separated by [sep]. For example: - [concat "/" ["a"; "b"; "c"] = "a/b/c"] *) val rev_concat : t -> t list -> t (** [rev_concat sep l] returns the concatenation of all texts contained in [l], separated by [sep]. For example: - [concat "/" ["a"; "b"; "c"] = "c/b/a"] *) val explode : t -> t list (** [explode txt] returns the list of all characters of [txt]. For example: - [explode "" = []] - [explode "abé" = ["a"; "b"; "é"]] *) val rev_explode : t -> t list (** [rev_explode txt] returns the list of all characters of [txt], in reverse order. For example: - [rev_explode "ocaml" = ["l"; "m"; "a"; "c"; "o"]] *) val implode : t list -> t (** [implode l] returns the concatenation of all texts contained in [l]. This is the same as [concat "" l], but a bit more efficient. For example: - [implode ["o"; "c"; "a"; "m"; "l"] = "ocaml"] - [implode ["abc"; "def"] = "abcdef"] *) val rev_implode : t list -> t (** [rev_implode l] returns the concatenation of all texts contained in [l], in reverse order. For example: - [implode ["o"; "c"; "a"; "m"; "l"] = "lmaco"] - [implode ["abc"; "def"] = "defabc"] *) (** {6 Tests} *) (** The following functions tests whether all characters of the given text verify a property: *) val is_ascii : t -> bool val is_alnum : t -> bool val is_alpha : t -> bool val is_blank : t -> bool val is_cntrl : t -> bool val is_digit : t -> bool val is_graph : t -> bool val is_lower : t -> bool val is_print : t -> bool val is_punct : t -> bool val is_space : t -> bool val is_upper : t -> bool val is_xdigit : t -> bool (** {6 Text traversals} *) (** For all the following functions we give a equivalent implementation, and examples. They have the same semantic as the equivalent implementation but are more efficient. *) val map : (t -> t) -> t -> t (** [map f text] ~ [implode (List.map f (explode text))] [map (function "a" -> "x" | t -> t) "abc" = "xbc"] *) val rev_map : (t -> t) -> t -> t (** [rev_map f text] ~ [implode (List.rev_map f (explode text))] [rev_map (function "a" -> "x" | t -> t) "abc" = "cbx"] *) val fold : (t -> 'a -> 'a) -> t -> 'a -> 'a (** [fold f x text] ~ [List.fold_left f x (explode text)] [fold (fun acc t -> acc + code t) 0 "ABC" = 198] *) val rev_fold : (t -> 'a -> 'a) -> t -> 'a -> 'a (** [fold f text x] ~ [List.fold_left f x (rev_explode text)] [rev_fold (fun t acc -> acc + code t) "ABC" 0 = 198] *) val filter : (t -> bool) -> t -> t (** [filter text] ~ [implode (List.filter f (explode text))] [filter is_alpha "1a2E" = "aE"] *) val rev_filter : (t -> bool) -> t -> t (** [rev_filter text] ~ [implode (List.filter f (rev_explode text))] [rev_filter is_alpha "1a2E" = "Ea"] *) val iter : (t -> unit) -> t -> unit (** [iter f text] ~ [List.iter f (explode text)] *) val rev_iter : (t -> unit) -> t -> unit (** [iter f text] ~ [List.iter f (rev_explode text)] *) (** {6 Scanning} *) val for_all : (t -> bool) -> t -> bool (** [for_all f text] returns whether all characters of [text] verify the predicate [f] *) val exists : (t -> bool) -> t -> bool (** [exists f text] returns whether at least one character of [text] verify [f] *) val count : (t -> bool) -> t -> int (** [count f text] returhs the number of characters of [text] verifying [f] *) (** {6 Splitting} *) val words : t -> t list (** Returns all words of the given text. Words are sequence of non-space and non-punct characters. *) val lines : t -> t list (** Returns all lines of the given text, without end of line characters. Both ["\r\n"] and ["\n"] are recognized as end of line delimiters. *) val split : ?max : int -> ?sep : t -> t -> t list (** [split ?max ?sep text] split [text] according to [sep]. If [max] is specified, returns at most [max] splits. [sep] defaults to [" "]. For example: - [split ~sep:"/" "a/b/c" = ["a"; "b"; "c"]] - [split ~sep:".." "a..b..c" = ["a"; "b"; "c"]] - [split ~max:1 "a b c" = ["a b c"]] - [split ~max:2 "a b c" = ["a"; "b c"]] *) val rev_split : ?max : int -> ?sep : t -> t -> t list (** [rev_split ?max text sep] split [text] according to [sep] in reverse order. For example: - [split ~sep:"/" "a/b/c" = ["c"; "b"; "a"]] - [split ~max:1 "a b c" = ["a b c"]] - [split ~max:2 "a b c" = ["a b"; "c"]] - [rev_split ~max:2 ~sep:"." "toto.mli" = ["toto"; "mli"]] *) val replace : t -> patt : t -> repl : t -> t (** [replace text ~patt ~repl] replace all occurences of [patt] in [text] by [repl]. For example: - [replace "abcd" ~patt:"b" ~repl:"x" = "axcd"] - [replace "Hello world!" ~patt:"world" ~repl:"you" = "Hello you!"] *) (** {6 Tests} *) val contains : t -> t -> bool (** [contains text sub] returns whether [sub] appears in [text] *) val starts_with : t -> t -> bool (** [starts_with text prefix] returns [true] iff [s] starts with [prefix]. For example: - [starts_with "abcd" "ab" = true] - [starts_with "abcd" "af" = false] - [starts_with "ab" "abcd" = false] *) val ends_with : t -> t -> bool (** [ends_with s suffix] returns [true] iff [s] ends with [suffix]. For example: - [ends_with "abcd" "cd" = true] - [ends_with "abcd" "hd" = false] - [ends_with "ab" "abc" = false] *) (** {6 Stripping} *) val strip : ?chars : t list -> t -> t (** [strip ?chars text] removes all characters of [text] which are part of [chars] at the right and left. [chars] defaults to whitespaces. *) val rstrip : ?chars : t list -> t -> t (** [rstrip ?chars text] removes all characters of [text] which are part of [chars] at the right. *) val lstrip : ?chars : t list -> t -> t (** [lstrip ?chars text] removes all characters of [text] which are part of [chars] at the left. *) val rchop : t -> t (** [rchop t] returns [t] without is last character. Returns [""] if [t = ""]. *) val lchop : t -> t (** [lchop t] returns [t] without is first character. Returns [""] if [t = ""] *) (** {6 Pointers} *) (** Since characters are not encoded by a fixed number of bytes, accessing them by character position is not efficient. The following functions allow you to iterates in a string in an efficient way. *) type pointer (** A pointer to a unicode character in a text. *) val pointer_l : t -> pointer (** Returns a pointer to the left of the given text *) val pointer_r : t -> pointer (** Returns a pointer to the right of the given text *) val pointer_at : t -> int -> pointer (** [pointer_at txt n] returns a pointer to the character at position [n] in [txt]. *) val next : pointer -> (t * pointer) option (** [next ptr] if [ptr] is at the end of text, returns [None], otherwise, returns [Some(ch, ptr')] where [ch] is the character at current position and [ptr'] is the pointer to the next character of the text. *) val prev : pointer -> (t * pointer) option (** [prev ptr] if [ptr] is at the beginning of text, returns [None], otherwise, returns [Some(ch, ptr')] where [ptr'] points to the previous character and [ch] is the character at [ptr']. *) val move : int -> pointer -> pointer (** [move n ptr] moves [ptr] by [n] unicode characters. If [n < 0] then [ptr] is moved to the left. Raises [Invalid_argument] if the result is outside the text. *) val chunk : pointer -> pointer -> t (** [chunk a b] returns the chunk of text between [a] and [b]. Raises [Invalid_arugment] if [a] or [b]. *) val offset : pointer -> int (** [offset ptr] returns the position in bytes of [ptr] *) val position : pointer -> int (** [position ptr] returns the position in unicode character of [ptr] *) val equal_at : pointer -> t -> bool (** [equal_at ptr str] returns wether [ptr] points to a substring equal to [str] *) val find : ?from : pointer -> t -> t -> pointer option (** [find ?from text patt] returns a pointer to the first occurrence of [patt] in [text]. *) val rev_find : ?from : pointer -> t -> t -> pointer option (** [find ?from text patt] returns a pointer to the last occurrence of [patt] in [text]. *) ocaml-text-0.8/src/text.mllib000066400000000000000000000001421244105655100162320ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 8471c49331aeb032ffe7f20b5509e5d1) Text Encoding # OASIS_STOP ocaml-text-0.8/src/text_pcre.ml000066400000000000000000000012071244105655100165570ustar00rootroot00000000000000(* * text_pcre.ml * ------------ * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) let iflags = Pcre.cflags [`UTF8; `ANCHORED] let regexp str = Pcre.regexp ~iflags str let exec store array = try store := Array.map (fun (regexp, text) -> Pcre.exec ~rex:(Lazy.force regexp) text) array; true with Not_found -> false let get_substring substrings index = try Pcre.get_substring substrings index with Not_found -> "" let get_substring_ofs substrings index = try fst (Pcre.get_substring_ofs substrings index) with Not_found -> -1 ocaml-text-0.8/src/text_pcre.mli000066400000000000000000000006721244105655100167350ustar00rootroot00000000000000(* * text_pcre.mli * ------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) (** This module is used by the syntax extension *) val exec : Pcre.substrings array ref -> (Pcre.regexp Lazy.t * Text.t) array -> bool val regexp : string -> Pcre.regexp val get_substring : Pcre.substrings -> int -> Text.t val get_substring_ofs : Pcre.substrings -> int -> int ocaml-text-0.8/style.css000066400000000000000000000050361244105655100153170ustar00rootroot00000000000000/* A style for ocamldoc. Daniel C. Buenzli, Jérémie Dimino */ body { padding: 0em; border: 0em; margin: 2em 10% 2em 10%; font-weight: normal; line-height: 130%; text-align: justify; background: white; color : black; min-width: 40ex; } pre, p, div, span, img, table, td, ol, ul, li { padding: 0em; border: 0em; margin: 0em } h1, h2, h3, h4, h5, h6, div.h7, div.h8, div.h9 { fontsize: 100%; margin-bottom: 1em padding: 1ex 0em 0em 0em; border: 0em; margin: 1em 0em 0em 0em; font-weight : bold; text-align: center; } h1 { font-size : 140% } h2, h3, h4, h5, h6, div.h7, div.h8, div.h9 { font-size : 100%; border-top-style : none; margin: 1ex 0em 0em 0em; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px; text-align: center; padding: 2px; } h2 { font-size : 120%; background-color: #90BDFF ; } h3 { background-color: #90DDFF; } h4 { background-color: #90EDFF; } h5 { background-color: #90FDFF; } h6 { background-color: #C0FFFF; } div.h7 { background-color: #E0FFFF; } div.h8 { background-color: #F0FFFF; } div.h9 { background-color: #FFFFFF; } .navbar { padding-bottom : 1em; margin-bottom: 1em; border-bottom: 1px solid #000000; border-bottom-style: dotted; } p { padding: 1em 0ex 0em 0em } a, a:link, a:visited, a:active, a:hover { color : #009; text-decoration: none } a:hover { color : #009; text-decoration : none; background-color: #5FFF88 } hr { border-style: none; } table { font-size : 100% /* Why ? */ } ul li { padding: 1em 0em 0em 0em; margin:0em 0em 0em 2.5ex } ol li { padding: 1em 0em 0em 0em; margin:0em 0em 0em 2em } pre { margin: 3ex 0em 1ex 0em; background-color: #edf0f9; } .keyword { font-weight: bold; color: #a020f0; } .keywordsign { font-weight: bold; color: #a020f0; } .typefieldcomment { color : #b22222; } .keywordsign { color: #a020f0; } .code { font-size: 120%; color: #5f5f5f; } .info { margin: 0em 0em 0em 2em } .comment { color : #b22222; } .constructor { color : #072 } .type { color : #228b22; } .string { color : #bc8f8f; } .warning { color : Red; font-weight : bold } div.sig_block { margin-left: 2em } .typetable { color : #b8860b; border-style : hidden } .indextable { border-style : hidden } .paramstable { border-style : hidden; padding: 5pt 5pt } .superscript { font-size : 80% } .subscript { font-size : 80% } ocaml-text-0.8/syntax/000077500000000000000000000000001244105655100147675ustar00rootroot00000000000000ocaml-text-0.8/syntax/pa_text_env.ml000066400000000000000000000004051244105655100176340ustar00rootroot00000000000000(* * pa_text_env.ml * -------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) include Map.Make(String) let lookup key env = try Some(find key env) with Not_found -> None ocaml-text-0.8/syntax/pa_text_env.mli000066400000000000000000000005311244105655100200050ustar00rootroot00000000000000(* * pa_text_env.mli * --------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) (** Environment *) include Map.S with type key = string val lookup : string -> 'a t -> 'a option (** [lookup key env] returns the binding of [key] in [env] if any. *) ocaml-text-0.8/syntax/pa_text_main.ml000066400000000000000000000434611244105655100200010ustar00rootroot00000000000000(* * pa_text_main.ml * --------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) open Camlp4.PreCast open Syntax open Pa_text_types open Pa_text_regexp let lookup tbl key = try Some(Hashtbl.find tbl key) with Not_found -> None (* +-----------------------------------------------------------------+ | Unicode quotations | +-----------------------------------------------------------------+ *) (* The syntax extension added the escape sequence "\u{XXXX}". To prevent the lexer to fail and die since this is not a valid ocaml escape sequece, we replace the backslash and "u" by a null character. The restoring of the escape sequenc will be done later, during printing of regular expression by [Pa_text_regexp.to_string]. *) (* replace "\\u{XXXX}" -> "\x00\x00{XXXX}" *) let hide_unicode_quotations txt = let rec loop_in_string acc = function | [] -> Text.rev_implode acc | "\\" :: "\"" :: l -> loop_in_string ("\\\"" :: acc) l | "\"" :: l -> loop_search_string ("\"" :: acc) l | "\\" :: "\\" :: l -> loop_in_string ( "\\\\" :: acc) l | "\\" :: "u" :: l -> begin match Pa_text_util.split_hexa_quotation l with | Some(txt, l) -> loop_in_string (txt :: "\x00\x00" :: acc) l | None -> loop_in_string ("\\u" :: acc) l end | x :: l -> loop_in_string (x :: acc) l and loop_search_string acc = function | [] -> Text.rev_implode acc | "\"" :: l -> loop_in_string ("\"" :: acc) l | x :: l -> loop_search_string (x :: acc) l in loop_search_string [] (Text.explode txt) (* +-----------------------------------------------------------------+ | Initial environment | +-----------------------------------------------------------------+ *) let global_env = ref Pa_text_env.empty let add_vars l = global_env := List.fold_left (fun env (id, exp_regexp) -> Pa_text_env.add id exp_regexp env) !global_env l let () = add_vars [ ("lower", posix "lower" true); ("upper", posix "upper" true); ("alpha", posix "alpha" true); ("digit", posix "digit" true); ("alnum", posix "alnum" true); ("punct", posix "punct" true); ("graph", posix "graph" true); ("print", posix "print" true); ("blank", posix "blank" true); ("cntrl", posix "cntrl" true); ("xdigit", posix "xdigit" true); ("space", posix "space" true); ("ascii", posix "ascii" true); ("word", posix "word" true); ("newline", meta "\\R" None); ("hspace", meta "\\h" (Some "\\H")); ("vspace", meta "\\v" (Some "\\V")); ("bound", meta "\\b" (Some "\\B")); ("bos", meta "\\A" None); ("eos", meta "\\z" None); ]; (* Unicode properties *) add_vars (List.map (fun name -> (name, meta ("\\p{" ^ name ^ "}") (Some ("\\P{" ^ name ^ "}")))) [ "C"; "Cc"; "Cf"; "Cn"; "Co"; "Cs"; "L"; "Ll"; "Lm"; "Lo"; "Lt"; "Lu"; "M"; "Mc"; "Me"; "Mn"; "N"; "Nd"; "Nl"; "No"; "P"; "Pc"; "Pd"; "Pe"; "Pf"; "Pi"; "Po"; "Ps"; "S"; "Sc"; "Sk"; "Sm"; "So"; "Z"; "Zl"; "Zp"; "Zs"; ]); (* Scripts *) add_vars (List.map (fun name -> (name, meta ("\\p{" ^ name ^ "}") (Some ("\\P{" ^ name ^ "}")))) [ "Arabic"; "Armenian"; "Balinese"; "Bengali"; "Bopomofo"; "Braille"; "Buginese"; "Buhid"; "Canadian_boriginal"; "Cherokee"; "Common"; "Coptic"; "Cuneiform"; "Cypriot"; "Cyrillic"; "Deseret"; "Devanagari"; "Ethiopic"; "Georgian"; "Glagolitic"; "Gothic"; "Greek"; "Gujarati"; "Gurmukhi"; "Han"; "Hangul"; "Hanunoo"; "Hebrew"; "Hiragana"; "Inherited"; "Kannada"; "Katakana"; "Kharoshthi"; "Khmer"; "Lao"; "Latin"; "Limbu"; "Linear_B"; "Malayalam"; "Mongolian"; "Myanmar"; "New_Tai_Lue"; "Nko"; "Ogham"; "Old_Italic"; "Old_Persian"; "Oriya"; "Osmanya"; "Phags_Pa"; "Phoenician"; "Runic"; "Shavian"; "Sinhala"; "Syloti_Nagri"; "Syriac"; "Tagalog"; "Tagbanwa"; "Tai_Le"; "Tamil"; "Telugu"; "Thaana"; "Thai"; "Tibetan"; "Tifinagh"; "Ugaritic"; "Yi"; ]) (* +-----------------------------------------------------------------+ | Quotation expansion | +-----------------------------------------------------------------+ *) let prefix = "__pa_text_pcre_" (* Mapping from unique identifier of the form [__pa_text_pcre_NNN] to its corresponding regular expression ast *) let regexps : (string, Pa_text_parse.parse_tree * [ `text | `regexp ]) Hashtbl.t = Hashtbl.create 42 let gen_id = let nb = ref 0 in fun () -> let x = !nb in nb := x + 1; prefix ^ string_of_int x let expand_patt_regexp _loc _loc_name_opt quotation_contents = let ast = Pa_text_parse.parse _loc (hide_unicode_quotations quotation_contents) in let id = gen_id () in Hashtbl.add regexps id (ast, `regexp); <:patt< $lid:id$ >> let expand_expr_regexp _loc _loc_name_opt quotation_contents = let ast = Pa_text_parse.parse _loc (hide_unicode_quotations quotation_contents) in let id = gen_id () in Hashtbl.add regexps id (ast, `regexp); <:expr< $lid:id$ >> let expand_expr_text _loc _loc_name_opt quotation_contents = let ast = Pa_text_parse.parse _loc (hide_unicode_quotations quotation_contents) in let id = gen_id () in Hashtbl.add regexps id (ast, `text); <:expr< $lid:id$ >> (* +-----------------------------------------------------------------+ | Code generation via ast filters | +-----------------------------------------------------------------+ *) type 'a collector = { prefix : string; mutable next_id : int; mutable collect : (Loc.t * string * 'a) list; } let collect collector _loc data = let id = Printf.sprintf "%s%s_%d" prefix collector.prefix collector.next_id in collector.next_id <- collector.next_id + 1; collector.collect <- (_loc, id, data) :: collector.collect; <:ident< $lid:id$ >> (* Verify that [id] is an id generated by [gen_id] *) let is_special_id id = let rec aux1 i = if i = String.length prefix then aux2 i else i < String.length id && id.[i] = prefix.[i] && aux1 (i + 1) and aux2 i = (i < String.length id) && match id.[i] with | '0' .. '9' -> aux3 (i + 1) | _ -> false and aux3 i = if i = String.length id then true else match id.[i] with | '0' .. '9' -> aux3 (i + 1) | _ -> false in aux1 0 (* Generate the expression for the given regular expression: *) let gen_compile_regexp _loc env regexp = <:expr< lazy(Text_pcre.regexp $str:Pa_text_regexp.to_string (Pa_text_regexp.of_parse_tree env regexp)$) >> (* Collects all regular expressions in the pattern of a match case branch. [global_regexp_collector] collect all regular expression found in the toplevel expression, and [local_regexp_collector] collects all regular expression of the current branch. *) class map_pattern env global_regexp_collector local_regexp_collector = object inherit Ast.map as super method patt p = match super#patt p with | <:patt@_loc< $lid:id$ >> as p when is_special_id id -> begin match lookup regexps id with | Some(regexp, `regexp) -> (* [regexp_id] is the variable which will appears at the toplevel: *) let regexp_id = collect global_regexp_collector _loc (gen_compile_regexp _loc env regexp) in (* [capture_id] is the variable which will capture the string in the pattern: *) let capture_id = collect local_regexp_collector _loc (<:expr< $id:regexp_id$ >>, regexp) in <:patt< $id:capture_id$ >> | _ -> p end | p -> p end module StringSet = Set.Make(String) class collect_pattern_lids set = object inherit Ast.map as super method patt patt = match super#patt patt with | <:patt< $lid:id$ >> as patt -> set := StringSet.add id !set; patt | p -> patt end (* Check that all variables contained in variables are distincts *) let check_collision patt variables = let add set (_loc, id, n, conv) = if StringSet.mem id set then Loc.raise _loc (Failure (Printf.sprintf "Variable %s is bound several times in this matching" id)) else StringSet.add id set in let set = ref StringSet.empty in let _ = (new collect_pattern_lids set)#patt patt in let _ = List.fold_left (fun set vars -> List.fold_left add set vars) !set variables in () (* Maps all branch of the given match case. It returns [(b, mc)] where [b] is [true] iff at least one branch have been modified and [mc] is the result. *) let rec map_match mapper env global_regexp_collector = function | <:match_case@_loc< $patt$ when $cond$ -> $expr$ >> as mc -> let local_regexp_collector = { prefix = "var"; next_id = 0; collect = [] } in (* Map the pattern and collect regexp it contains *) let patt = (new map_pattern env global_regexp_collector local_regexp_collector)#patt patt in if local_regexp_collector.collect = [] then (* If nothing has changed, keep the branch unchanged *) (false, mc) else let cond = mapper#expr cond and expr = mapper#expr expr in (* Generate the array [|(__pa_text_pcre_var_0, __pa_text_pcre_regexp_0); ... |] *) let arr = Ast.ExArr(_loc, Ast.exSem_of_list (List.rev_map (fun (_loc, id, (expr, regexp)) -> <:expr< ($expr$, $lid:id$) >>) local_regexp_collector.collect)) in (* The when condition: *) let check_expr = <:expr< Text_pcre.exec __pa_text_pcre_result $arr$ >> in (* Merge the original condition with our new one: *) let cond = match cond with | <:expr< >> -> check_expr | _ -> <:expr< $cond$ && $check_expr$ >> in (* Collect all capture variables in regexps: *) let variables_by_regexp = List.map (fun (_loc, id, (expr, regexp)) -> (Pa_text_parse.collect_regexp_bindings regexp)) local_regexp_collector.collect in (* Check for conflicts *) check_collision patt variables_by_regexp; (* Bind pattern variables *) let rec make_bindings regexp_number acc = function | [] -> acc | variables :: rest -> let acc = List.fold_left begin fun acc (_loc, id, n, conv) -> let binding = match conv with | Pa_text_parse.Identity -> <:binding< $lid:id$ = Text_pcre.get_substring (Array.unsafe_get !__pa_text_pcre_result $int:string_of_int regexp_number$) $int:string_of_int n$ >> | Pa_text_parse.Constant e -> <:binding< $lid:id$ = $e$ >> | Pa_text_parse.Function f -> <:binding< $lid:id$ = $f$ (Text_pcre.get_substring (Array.unsafe_get !__pa_text_pcre_result $int:string_of_int regexp_number$) $int:string_of_int n$) >> | Pa_text_parse.Position -> <:binding< $lid:id$ = Text_pcre.get_substring_ofs (Array.unsafe_get !__pa_text_pcre_result $int:string_of_int regexp_number$) $int:string_of_int n$ >> in binding :: acc end acc variables in make_bindings (regexp_number + 1) acc rest in (* Make the compiler happy (no "unused variables") *) let expr = List.fold_left (fun expr vars -> List.fold_left (fun expr (_loc, id, _, _) -> <:expr< ignore $lid:id$; $expr$ >>) expr vars) expr variables_by_regexp in (true, <:match_case< $patt$ when $cond$ -> let $Ast.biAnd_of_list (make_bindings 0 [] variables_by_regexp)$ in $expr$ >>) | <:match_case@_loc< $mc1$ | $mc2$ >> -> let (b1, mc1) = map_match mapper env global_regexp_collector mc1 and (b2, mc2) = map_match mapper env global_regexp_collector mc2 in (b1 || b2, <:match_case< $mc1$ | $mc2$ >>) | mc -> (false, mc) (* [global_regexp_collector] collects all regular expression found in the expression *) class map env global_regexp_collector = object(self) inherit Ast.map as super method expr expr = let expr = super#expr expr in match expr with | <:expr@_loc< match $e$ with $mc$ >> -> let modified, mc = map_match self env global_regexp_collector mc in if modified then <:expr< let __pa_text_pcre_result = ref [||] in match $e$ with $mc$ >> else expr | <:expr@_loc< function $mc$ >> -> let modified, mc = map_match self env global_regexp_collector mc in if modified then <:expr< let __pa_text_pcre_result = ref [||] in function $mc$ >> else expr | <:expr@_loc< $lid:id$ >> when is_special_id id -> begin match lookup regexps id with | Some(regexp, `regexp) -> let regexp_id = collect global_regexp_collector _loc (gen_compile_regexp _loc env regexp) in <:expr< Lazy.force $id:regexp_id$ >> | Some(regexp, `text) -> <:expr< $str:Pa_text_regexp.to_string (Pa_text_regexp.of_parse_tree env regexp)$ >> | None -> expr end | expr -> expr end (* map expressions: [expr] becomes: {[ let __pa_text_pcre_regexp_0 = lazy(Pcre.compile "....") in let __pa_text_pcre_regexp_1 = lazy(Pcre.compile "....") in ... let __pa_text_pcre_regexp_N = lazy(Pcre.compile "....") in expr ]} *) let map_expr e = let collector = { prefix = "regexp"; next_id = 0; collect = [] } in let e = (new map !global_env collector)#expr e in match collector.collect with | [] -> e | (_loc, id, expr) :: collect -> let binding = List.fold_left (fun acc (_loc, id, expr) -> <:binding< $acc$ and $lid:id$ = $expr$ >>) <:binding< $lid:id$ = $expr$ >> collect in <:expr< let $binding$ in $e$ >> (* Map class expresions: {[ class class_expr ]} becomes: {[ class let __pa_text_pcre_regexp_0 = lazy(Pcre.compile "....") in let __pa_text_pcre_regexp_1 = lazy(Pcre.compile "....") in ... let __pa_text_pcre_regexp_N = lazy(Pcre.compile "....") in class_expr ]} *) let map_class_expr e = let collector = { prefix = "regexp"; next_id = 0; collect = [] } in let e = (new map !global_env collector)#class_expr e in match collector.collect with | [] -> e | (_loc, id, expr) :: collect -> let binding = List.fold_left (fun acc (_loc, id, expr) -> <:binding< $acc$ and $lid:id$ = $expr$ >>) <:binding< $lid:id$ = $expr$ >> collect in <:class_expr< let $binding$ in $e$ >> (* map let bindings: {[ let id = expr ]} becomes: {[ let id = let __pa_text_pcre_regexp_0 = lazy(Pcre.compile "....") in let __pa_text_pcre_regexp_1 = lazy(Pcre.compile "....") in ... let __pa_text_pcre_regexp_N = lazy(Pcre.compile "....") in expr ]} *) let rec map_binding new_env = function | <:binding@_loc< $lid:id$ = $lid:id_re$ >> as binding when is_special_id id_re -> begin match lookup regexps id_re with | Some(parse_tree, `regexp) -> let regexp = Pa_text_regexp.of_parse_tree !global_env parse_tree in (Pa_text_env.add id regexp new_env, <:binding< $lid:id$ = Text_pcre.regexp $str:Pa_text_regexp.to_string regexp$ >>) | Some(parse_tree, `text) -> let regexp = Pa_text_regexp.of_parse_tree !global_env parse_tree in (new_env, <:binding< $lid:id$ = $str:Pa_text_regexp.to_string regexp$ >>) | None -> (new_env, binding) end | <:binding@_loc< $id$ = $e$ >> -> (new_env, <:binding< $id$ = $map_expr e$ >>) | <:binding@_loc< $a$ and $b$ >> -> let new_env, binding_a = map_binding new_env a in let new_env, binding_b = map_binding new_env b in (new_env, <:binding< $binding_a$ and $binding_b$ >>) | binding -> (new_env, binding) (* Map top-level definitions *) let map_def = function | Ast.StVal(loc, is_rec, binding) -> (* let id = expr *) let new_env, binding = map_binding !global_env binding in global_env := new_env; Ast.StVal(loc, is_rec, binding) | Ast.StExp(loc, expr) -> (* expr *) Ast.StExp(loc, map_expr expr) | Ast.StCls(loc, ce) -> (* class class_expr *) Ast.StCls(loc, map_class_expr ce) | x -> x (* +-----------------------------------------------------------------+ | Registration | +-----------------------------------------------------------------+ *) let () = Quotation.add "re" Quotation.DynAst.patt_tag expand_patt_regexp; Quotation.add "re" Quotation.DynAst.expr_tag expand_expr_regexp; Quotation.add "re_text" Quotation.DynAst.expr_tag expand_expr_text; let map = (Ast.map_str_item map_def)#str_item in AstFilters.register_str_item_filter map; AstFilters.register_topphrase_filter map ocaml-text-0.8/syntax/pa_text_parse.ml000066400000000000000000000165051244105655100201660ustar00rootroot00000000000000(* * pa_text_parse.ml * ---------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) open Camlp4.PreCast open Syntax open Pa_text_types (* +-----------------------------------------------------------------+ | Types | +-----------------------------------------------------------------+ *) type converter = | Constant of Ast.expr | Function of Ast.expr | Position | Identity type charset_atom = | Ca_variable of Loc.t * string * bool | Ca_range of Loc.t * Text.t * Text.t | Ca_literal of Loc.t * Text.t type charset = charset_atom list type parse_tree = | Literal of Loc.t * Text.t | Repeat of Loc.t * parse_tree * int * int option * greediness | Concat of Loc.t * parse_tree * parse_tree | Alternative of Loc.t * parse_tree * parse_tree | Bind of Loc.t * parse_tree * string * converter | Charset of Loc.t * charset * bool | Meta of Loc.t * Text.t * Text.t option | Variable of Loc.t * string * bool | Backward_reference of Loc.t * string | Mode of Loc.t * mode * bool | Look of Loc.t * direction * parse_tree * bool | Group of Loc.t * parse_tree | Condition of Loc.t * string * parse_tree * parse_tree option (* +-----------------------------------------------------------------+ | Grammar of regular expression | +-----------------------------------------------------------------+ *) let regexp_eoi = Gram.Entry.mk "regexp_eoi" EXTEND Gram GLOBAL: regexp_eoi; utf8_string: [ [ s = STRING -> match Text.check s with | Some error -> Loc.raise _loc (Failure("invalid UTF-8 string: " ^ error)) | None -> s ] ]; range: [ [ a = INT -> let a = int_of_string a in if a < 0 then Loc.raise _loc (Failure "range bounds must be positive number") else (a, Some a) | a = INT; "-"; b = INT -> let a = int_of_string a and b = int_of_string b in if a < 0 || b < a then Loc.raise _loc (Failure "invalid range bounds") else (a, Some b) | a = INT; "+" -> let a = int_of_string a in if a < 0 then Loc.raise _loc (Failure "range bounds must be positive number") else (a, None) ] ]; state: [ [ "!" -> false | -> true ] ]; charset_atom: [ [ a = utf8_string; ["-" | ".."]; b = utf8_string -> if Text.length a <> 1 || Text.length b <> 1 then Loc.raise _loc (Failure("UTF-8 string literals in charset range must contain only one unicode character")) else if Text.code a < Text.code b then Ca_range(_loc, a, b) else Loc.raise _loc (Failure "invalid charset: the upper limit must be greater than the lower limit") | s = utf8_string -> Ca_literal(_loc, s) | st = state; id = LIDENT -> Ca_variable(_loc, id, st) | st = state; id = UIDENT -> Ca_variable(_loc, id, st) ] ]; charset: [ [ l = LIST0 charset_atom -> l ] ]; mode: [ [ mode = LIDENT -> match mode with | "i" | "caseless" -> Caseless | "m" | "multiline" -> Multiline | "s" | "singleline" | "dotall" -> Dot_all | _ -> Loc.raise _loc (Failure(Printf.sprintf "invalid mode: '%s'" mode)) ] ]; regexp: [ [ r = SELF; "as"; i = LIDENT; conv = OPT [ ":"; s = LIDENT -> Function <:expr< $lid: s ^ "_of_string"$ >> | ":="; e = expr -> Function e | "="; e = expr -> Constant e ] -> Bind(_loc, r, i, match conv with Some c -> c | None -> Identity) | r1 = SELF; "|"; r2 = SELF -> Alternative(_loc, r1, r2) | r1 = SELF; r2 = SELF -> Concat(_loc, r1, r2) ] | "postop" NONA [ r = SELF; "*" -> Repeat(_loc, r, 0, None, Greedy) | r = SELF; "+" -> Repeat(_loc, r, 1, None, Greedy) | r = SELF; "?" -> Repeat(_loc, r, 0, Some 1, Greedy) | r = SELF; "{"; (a, b) = range; "}" -> Repeat(_loc, r, a, b, Greedy) | r = SELF; "*?" -> Repeat(_loc, r, 0, None, Lazy) | r = SELF; "+?" -> Repeat(_loc, r, 1, None, Lazy) | r = SELF; "??" -> Repeat(_loc, r, 0, Some 1, Lazy) | r = SELF; "{"; (a, b) = range; "}"; "?" -> Repeat(_loc, r, a, b, Lazy) | r = SELF; "*+" -> Repeat(_loc, r, 0, None, Possessive) | r = SELF; "++" -> Repeat(_loc, r, 1, None, Possessive) | r = SELF; "?+" -> Repeat(_loc, r, 0, Some 1, Possessive) | r = SELF; "{"; (a, b) = range; "}"; "+" -> Repeat(_loc, r, a, b, Possessive) ] | "preop" NONA [ "\\"; id = LIDENT -> Backward_reference (_loc, id) ] | "simple" NONA [ "["; cs = charset; "]" -> Charset(_loc, cs, true) | "[^"; cs = charset; "]" -> Charset(_loc, cs, false) | s = utf8_string -> Literal(_loc, s) | "_" -> Meta(_loc, ".", None) | st = state; i = LIDENT -> Variable(_loc, i, st) | st = state; i = UIDENT -> Variable(_loc, i, st) | "^" -> Meta(_loc, "^", None) | "$" -> Meta(_loc, "$", None) | "&+"; mode = mode -> Mode(_loc, mode, true) | "&-"; mode = mode -> Mode(_loc, mode, false) | "@"; name = LIDENT -> Bind(_loc, Literal(_loc, ""), name, Position) | "("; r = SELF; ")" -> Group(_loc, r) | "<"; r = SELF -> Look(_loc, Behind, r, true) | " Look(_loc, Behind, r, false) | ">"; r = SELF -> Look(_loc, Ahead, r, true) | ">!"; r = SELF -> Look(_loc, Ahead, r, false) | "if"; id = LIDENT; "then"; r_then = SELF; r_else = maybe_else -> Condition(_loc, id, r_then, r_else) ] ]; maybe_else: [ [ "else"; r = regexp -> Some r | -> None ] ]; regexp_eoi: [ [ re = regexp; `EOI -> re ] ]; END (* +-----------------------------------------------------------------+ | Manipulation | +-----------------------------------------------------------------+ *) let collect_regexp_bindings ast = let rec loop n acc = function | Literal _ | Variable _ | Charset _ | Meta _ | Backward_reference _ | Mode _ -> (n, acc) | Group(_, r) -> loop n acc r | Look(_, _, r, _) -> loop n acc r | Repeat(_, r, _, _, _) -> loop n acc r | Concat(_, r1, r2) -> let n, acc = loop n acc r1 in loop n acc r2 | Alternative(_, r1, r2) -> let n, acc = loop n acc r1 in loop n acc r2 | Bind(_loc, r, id, conv) -> loop (n + 1) ((_loc, id, n, conv) :: acc) r | Condition(_, _, r_then, None) -> loop n acc r_then | Condition(_, _, r_then, Some r_else) -> let n, acc = loop n acc r_then in loop n acc r_else in snd (loop 1 [] ast) (* +-----------------------------------------------------------------+ | Parsing | +-----------------------------------------------------------------+ *) let parse loc contents = Gram.parse_string regexp_eoi loc contents ocaml-text-0.8/syntax/pa_text_parse.mli000066400000000000000000000064741244105655100203430ustar00rootroot00000000000000(* * pa_text_parse.mli * ----------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) (** Parsing of regular expression quotations *) open Camlp4.PreCast open Pa_text_types (** {6 Parse tree} *) (** Variable converters. A converter is used to set how capture variables are bound. *) type converter = | Constant of Ast.expr (** Set the variable to this expression, independantly of the matched string. *) | Function of Ast.expr (** Maps the result of a capture with the given function. *) | Position (** Returns the position of the matched string. *) | Identity (** No convertion. Binds the variable to captured string. *) (** Atom in a range (between "[" and "]"): *) type charset_atom = | Ca_variable of Loc.t * string * bool (** [Ca_variable(loc, id, state)] *) | Ca_range of Loc.t * Text.t * Text.t (** [Ca_range(loc, min, max)] matches characters with a code-point between [min] and [max]. *) | Ca_literal of Loc.t * Text.t (** A literal text, that match any character that belong to him *) type charset = charset_atom list (** AST of a parsed regular expression *) type parse_tree = | Literal of Loc.t * Text.t (** A literal string. *) | Repeat of Loc.t * parse_tree * int * int option * greediness (** [Pt_repeat(loc, pt, min, max, greediness)] *) | Concat of Loc.t * parse_tree * parse_tree (** Concatenates two regular expression *) | Alternative of Loc.t * parse_tree * parse_tree (** Union of regular expression *) | Bind of Loc.t * parse_tree * string * converter (** [Pt_bind(loc, pt, id, conv)] Bind a regular expression to an identifier *) | Charset of Loc.t * charset * bool (** [Pt_charset(loc, charset, state)] defines a character set. If [state] is [false], the charset is negated. *) | Meta of Loc.t * Text.t * Text.t option (** [Pt_meta(loc, normal, negated)] negated is the negation of normal *) | Variable of Loc.t * string * bool (** [Pt_variable(loc, id, state)] inline the variable [id] from an environment. If [state] is [false], it tries to negate the contents of [id]. *) | Backward_reference of Loc.t * string (** [Pt_backward_reference(loc, id)] *) | Mode of Loc.t * mode * bool (** [Pt_mode(loc, mode, state)] enable or disable a mode. If [state] is [true], the mode is enabled, otherwise it is disabled. *) | Look of Loc.t * direction * parse_tree * bool (** [Pt_look(loc, dir, pt, state)] defines a look around. *) | Group of Loc.t * parse_tree (** [Group pt] represent a regular expression that cannot be inlined. *) | Condition of Loc.t * string * parse_tree * parse_tree option (** [Condition(loc, id, r_then, r_else)] is [r_then] if the capture variable [id] matched something, and r_else otherwise. *) (** {6 Manipulation} *) val collect_regexp_bindings : parse_tree -> (Loc.t * string * int * converter) list (** Returns the list of variables contained in the given parse-tree, with their group number and converter *) (** {6 Parsing} *) val parse : Loc.t -> string -> parse_tree (** [parse loc string] parses the given string into a parse-tree. *) ocaml-text-0.8/syntax/pa_text_regexp.ml000066400000000000000000000335261244105655100203500ustar00rootroot00000000000000(* * pa_text_regexp.ml * ----------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) open Camlp4.PreCast open Pa_text_types module Ast : sig type charset_atom = | Ca_range of Text.t * Text.t | Ca_literal of Text.t | Ca_posix of Text.t * bool | Ca_meta of Text.t type charset = charset_atom list type t = private | Literal of Text.t | Group of t | Capture of t | Repeat of t * int * int option * greediness | Concat of t list | Alternatives of t list | Charset of charset * bool | Posix of Text.t * bool | Meta of Text.t * Text.t option | Backward_reference of int | Mode of mode * bool | Look of direction * t * bool | Condition of int * t * t option val epsilon : t val literal : Text.t -> t val group : t -> t val capture : t -> t val repeat : t -> int -> int option -> greediness -> t val concat : t list -> t val alternatives : t list -> t val charset : charset -> bool -> t val posix : Text.t -> bool -> t val meta : Text.t -> Text.t option -> t val backward_reference : int -> t val mode : mode -> bool -> t val look : direction -> t -> bool -> t val condition : int -> t -> t option -> t end = struct type charset_atom = | Ca_range of Text.t * Text.t | Ca_literal of Text.t | Ca_posix of Text.t * bool | Ca_meta of Text.t type charset = charset_atom list type t = | Literal of Text.t | Group of t | Capture of t | Repeat of t * int * int option * greediness | Concat of t list | Alternatives of t list | Charset of charset * bool | Posix of Text.t * bool | Meta of Text.t * Text.t option | Backward_reference of int | Mode of mode * bool | Look of direction * t * bool | Condition of int * t * t option (* +---------------------------------------------------------------+ | Constructors | +---------------------------------------------------------------+ *) let epsilon = Literal "" let literal text = Literal text let group r = match r with | Group _ | Capture _ | Condition _ | Charset _ | Meta(".", None) -> r | _ -> Group r let capture r = match r with | Group r -> Capture r | _ -> Capture r let repeat r min max greediness = match r with | Literal "" -> epsilon | _ -> Repeat(r, min, max, greediness) let concat = function | [] -> epsilon | [r] -> r | l -> (* Inline concatenations: *) Concat(List.flatten (List.map (function | Concat l -> l | Group(Concat l) -> l | re -> [re]) l)) let alternatives = function | [] -> epsilon | [r] -> r | l -> (* Inline non-grouped alternatives: *) Alternatives(List.flatten (List.map (function | Alternatives l -> l | re -> [re]) l)) let charset l state = Charset(l, state) let posix name state = Posix(name, state) let meta text ntext = Meta(text, ntext) let backward_reference n = Backward_reference n let mode mode state = Mode(mode, state) let look dir r state = Look(dir, r, state) let condition id r_then r_else = let r_then = match r_then with | Group r -> r | r -> r and r_else = match r_else with | Some(Group r) -> Some r | x -> x in Condition(id, r_then, r_else) end include Ast (* +-----------------------------------------------------------------+ | Manipulation | +-----------------------------------------------------------------+ *) let rec negate = function | Literal _ -> None | Group r -> begin match negate r with | Some r -> Some(group r) | None -> None end | Capture r -> begin match negate r with | Some r -> Some(capture r) | None -> None end | Repeat _ -> None | Concat _ -> None | Alternatives _ -> None | Charset(cs, state) -> Some(charset cs (not state)) | Posix(name, state) -> Some(posix name (not state)) | Meta(a, None) -> None | Meta(a, Some b) -> Some(meta b (Some a)) | Backward_reference _ -> None | Mode _ -> None | Look _ -> None | Condition _ -> None (* +-----------------------------------------------------------------+ | Parse tree -> regular expression | +-----------------------------------------------------------------+ *) module P = Pa_text_parse let of_parse_tree ~env ~parse_tree = (* [vars] is the mapping from capture to their index, and [n] is the next available index: *) let rec loop vars n = function | P.Group(_, r) -> let vars, n, r = loop vars n r in (vars, n, group r) | P.Literal(_, lit) -> (vars, n, literal lit) | P.Repeat(_, r, min, max, greediness) -> let vars, n, r = loop vars n r in (vars, n, repeat (group r) min max greediness) | P.Concat(_, r1, r2) -> let vars, n, r1 = loop vars n r1 in let vars, n, r2 = loop vars n r2 in (vars, n, concat [r1; r2]) | P.Alternative(_, r1, r2) -> let vars, n, r1 = loop vars n r1 in let vars, n, r2 = loop vars n r2 in (vars, n, alternatives [r1; r2]) | P.Bind(_, r, id, _) -> let vars = Pa_text_env.add id n vars in let vars, n, r = loop vars (n + 1) r in (vars, n, capture r) | P.Charset(_, cs, state) -> let l = List.map begin function | P.Ca_variable(_loc, var, state) -> begin match Pa_text_env.lookup var env with | Some re -> (* Try to negate the regular expression if required: *) let re = if state then re else match negate re with | Some re -> re | None -> Loc.raise _loc (Failure "cannot negate this regular expression") in (* Inline the variable if possible *) let rec loop = function | Group r | Capture r -> loop r | Posix(name, state) -> [Ca_posix(name, state)] | Charset(atoms, state') -> if state <> state' then Loc.raise _loc (Failure "cannot inline a charset with a difference state") else atoms | Literal txt -> [Ca_literal txt] | Meta(txt, _) -> [Ca_meta txt] | _ -> Loc.raise _loc (Failure(var ^ " is not a charset or a literal")) in loop re | None -> Loc.raise _loc (Failure("unbounded variable: " ^ var)) end | P.Ca_range(_loc, a, b) -> [Ca_range(a, b)] | P.Ca_literal(_loc, lit) -> [Ca_literal lit] end cs in (vars, n, charset (List.flatten l) state) | P.Meta(_, text, ntext) -> (vars, n, meta text ntext) | P.Variable(loc, id, state) -> begin match Pa_text_env.lookup id env with | Some re -> let re = if state then re else match negate re with | Some re -> re | None -> Loc.raise loc (Failure "cannot negate this regular expression") in (vars, n, re) | None -> Loc.raise loc (Failure("unbounded variable: " ^ id)) end | P.Backward_reference(loc, id) -> begin try (vars, n, backward_reference (Pa_text_env.find id vars)) with Not_found -> Loc.raise loc (Failure "invalid backward reference") end | P.Condition(loc, id, r_then, None) -> begin try let vars, n, r_then = loop vars n r_then in (vars, n, condition (Pa_text_env.find id vars) r_then None) with Not_found -> Loc.raise loc (Failure "invalid backward reference") end | P.Condition(loc, id, r_then, Some r_else) -> begin try let vars, n, r_then = loop vars n r_then in let vars, n, r_else = loop vars n r_else in (vars, n, condition (Pa_text_env.find id vars) r_then None) with Not_found -> Loc.raise loc (Failure "invalid backward reference") end | P.Mode(loc, mode, state) -> (vars, n, Ast.mode mode state) | P.Look(_, dir, r, state) -> let vars, n, r = loop vars n r in (vars, n, look dir r state) in let vars, n, re = loop Pa_text_env.empty 1 parse_tree in re (* +-----------------------------------------------------------------+ | Literal escaping | +-----------------------------------------------------------------+ *) (* Escape special characters in literals, and restore unicode quotations: *) let escape text = let rec loop acc = function | ("\\" | "^" | "$" | "." | "[" | "|" | "(" | ")" | "?" | "*" | "+" | "{" as ch) :: l -> loop (ch :: "\\" :: acc) l | "\x00" :: "\x00" :: l -> begin match Pa_text_util.split_hexa_quotation l with | Some(txt, l) -> loop (txt :: "\\\\x" :: acc) l | None -> loop ("\x00\x00" :: acc) l end | x :: l -> loop (x :: acc) l | [] -> Text.rev_implode acc in loop [] (Text.explode text) (* Same as [espace] but for text in charset (between "[" and "]"), and restore unicode quotations: *) let escape_in_charset text = let rec loop acc = function | ("\\" | "-" | "[" | "]" | "^" as ch) :: l -> loop (ch :: "\\" :: acc) l | "\x00" :: "\x00" :: l -> begin match Pa_text_util.split_hexa_quotation l with | Some(txt, l) -> loop (txt :: "\\\\x" :: acc) l | None -> loop ("\x00\x00" :: acc) l end | x :: l -> loop (x :: acc) l | [] -> Text.rev_implode acc in loop [] (Text.explode text) (* +-----------------------------------------------------------------+ | Marshaling | +-----------------------------------------------------------------+ *) let string_of_mode = function | Caseless -> "i" | Multiline -> "m" | Dot_all -> "s" let to_string re = let buffer = Buffer.create 42 in let add str = Buffer.add_string buffer str in let addg = function | Greedy -> () | Lazy -> add "?" | Possessive -> add "+" in let rec loop = function | Literal lit -> add (escape lit) | Group re -> add "(?:"; loop re; add ")" | Capture re -> add "("; loop re; add ")" | Repeat(re, 0, None, g) -> loop re; add "*"; addg g | Repeat(re, 1, None, g) -> loop re; add "+"; addg g | Repeat(re, 0, Some 1, g) -> loop re; add "?"; addg g | Repeat(re, min, Some max, g) -> loop re; add "{"; add (string_of_int min); add ","; add (string_of_int max); add "}"; addg g | Repeat(re, min, None, g) -> loop re; add "{"; add (string_of_int min); add ",}"; addg g | Concat l -> List.iter loop l | Alternatives(r :: l) -> loop r; List.iter (fun r -> add "|"; loop r) l | Alternatives [] -> assert false | Charset(cs, true) -> add "["; List.iter loop_charset cs; add "]" | Charset(cs, false) -> add "[^"; List.iter loop_charset cs; add "]" | Posix(name, true) -> add "[[:"; add name; add ":]]" | Posix(name, false) -> add "[[:^"; add name; add ":]]" | Meta(t, nt) -> add t | Backward_reference n -> add "\\g{"; add (string_of_int n); add "}" | Mode(mode, true) -> add "(?"; add (string_of_mode mode); add ")" | Mode(mode, false) -> add "(?-"; add (string_of_mode mode); add ")" | Look(Ahead, r, true) -> add "(?="; loop r; add ")" | Look(Ahead, r, false) -> add "(?!"; loop r; add ")" | Look(Behind, r, true) -> add "(?<="; loop r; add ")" | Look(Behind, r, false) -> add "(? add "(?("; add (string_of_int n); add ")"; loop r_then; begin match r_else with | Some r -> add "|"; loop r; | None -> () end; add ")" and loop_charset = function | Ca_range(a, b) -> add (escape_in_charset a); add "-"; add (escape_in_charset b) | Ca_literal lit -> add (escape_in_charset lit) | Ca_posix(name, state) -> if state then add "[:" else add "[:^"; add name; add ":]" | Ca_meta t -> add t in loop re; Buffer.contents buffer ocaml-text-0.8/syntax/pa_text_regexp.mli000066400000000000000000000034401244105655100205110ustar00rootroot00000000000000(* * pa_text_regexp.mli * ------------------ * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) (** Manipulation of regular expressions *) open Pa_text_types (** {6 AST of regular expression} *) type charset_atom = | Ca_range of Text.t * Text.t | Ca_literal of Text.t | Ca_posix of Text.t * bool | Ca_meta of Text.t type charset = charset_atom list type t = private | Literal of Text.t | Group of t | Capture of t | Repeat of t * int * int option * greediness | Concat of t list | Alternatives of t list | Charset of charset * bool | Posix of Text.t * bool | Meta of Text.t * Text.t option | Backward_reference of int | Mode of mode * bool | Look of direction * t * bool | Condition of int * t * t option (** {6 Constructors} *) val epsilon : t val literal : Text.t -> t val group : t -> t val capture : t -> t val repeat : t -> int -> int option -> greediness -> t val concat : t list -> t val alternatives : t list -> t val charset : charset -> bool -> t val posix : Text.t -> bool -> t val meta : Text.t -> Text.t option -> t val backward_reference : int -> t val mode : mode -> bool -> t val look : direction -> t -> bool -> t (** {6 T manipulation} *) val negate : t -> t option (** Try to negate the given regular expression. *) (** {6 parse-tree --> ast} *) val of_parse_tree : env : t Pa_text_env.t -> parse_tree : Pa_text_parse.parse_tree -> t (** [of_parse_tree ~env ~parse_tree] returns the t corresponding to the given parse-tree, with all variables inlined (using [env]) and backward reference resolved. *) (** {6 Marshaling} *) val to_string : t -> string (** [to_string regexp] returns the string representation of [regexp] that will be passed to PCRE. *) ocaml-text-0.8/syntax/pa_text_types.ml000066400000000000000000000007111244105655100202100ustar00rootroot00000000000000(* * pa_text_types.ml * ---------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) (** Common types *) type mode = Caseless | Multiline | Dot_all (** Modes that can be enabled or disabled *) type greediness = Greedy | Lazy | Possessive (** Greediness of a regular expression *) type direction = Behind | Ahead (** Direction of a look-around expression *) ocaml-text-0.8/syntax/pa_text_util.ml000066400000000000000000000010141244105655100200160ustar00rootroot00000000000000(* * pa_text_util.ml * --------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) let split_hexa_quotation = function | "{" :: x :: l when Text.is_xdigit x -> let rec skip_hexa acc = function | "}" :: l -> Some(Text.rev_implode ("}" :: acc), l) | x :: l when Text.is_xdigit x -> skip_hexa (x :: acc) l | _ -> None in skip_hexa [x; "{"] l | _ -> None ocaml-text-0.8/syntax/pa_text_util.mli000066400000000000000000000006371244105655100202010ustar00rootroot00000000000000(* * pa_text_util.mli * ---------------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) (** Utilities *) val split_hexa_quotation : Text.t list -> (Text.t * Text.t list) option (** Recognises patterns of the form "{XXX}" at the beginning of a string. If it match, it returns the quotation and the rest of the list. *) ocaml-text-0.8/syntax/pcre-syntax.mldylib000066400000000000000000000002451244105655100206230ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 8e482e804b0e43cbc187f6fa04681622) Pa_text_main Pa_text_parse Pa_text_regexp Pa_text_env Pa_text_types Pa_text_util # OASIS_STOP ocaml-text-0.8/syntax/pcre-syntax.mllib000066400000000000000000000002451244105655100202660ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 8e482e804b0e43cbc187f6fa04681622) Pa_text_main Pa_text_parse Pa_text_regexp Pa_text_env Pa_text_types Pa_text_util # OASIS_STOP ocaml-text-0.8/syntax/text-pcre-syntax.mldylib000066400000000000000000000002451244105655100216050ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 8e482e804b0e43cbc187f6fa04681622) Pa_text_main Pa_text_parse Pa_text_regexp Pa_text_env Pa_text_types Pa_text_util # OASIS_STOP ocaml-text-0.8/syntax/text-pcre-syntax.mllib000066400000000000000000000002451244105655100212500ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 8e482e804b0e43cbc187f6fa04681622) Pa_text_main Pa_text_parse Pa_text_regexp Pa_text_env Pa_text_types Pa_text_util # OASIS_STOP ocaml-text-0.8/tests/000077500000000000000000000000001244105655100146035ustar00rootroot00000000000000ocaml-text-0.8/tests/test.ml000066400000000000000000000122051244105655100161140ustar00rootroot00000000000000(* * test.ml * ------- * Copyright : (c) 2010, Jeremie Dimino * Licence : BSD3 * * This file is a part of ocaml-text. *) open Printf open Text let success = ref 0 let failure = ref 0 let test name func = try match func () with | true -> incr success | false -> printf "test %s failed\n%!" name; incr failure with exn -> incr failure; printf "test %s raised an exception: %s" name (Printexc.to_string exn) let () = test "length" (fun () -> length "abc" = 3); test "length" (fun () -> length "ééé" = 3); test "code" (fun () -> code "é" = 0xe9); test "char" (fun () -> char 0xe9 = "é"); test "get" (fun () -> get "éà" 1 = "à"); test "get" (fun () -> get "abc" (-1) = "c"); test "sub" (fun () -> sub "ocaml" 1 2 = "ca"); test "sub" (fun () -> sub "ocaml" 3 (-2) = "ca"); test "sub" (fun () -> sub "ocaml" (-2) 1 = "m"); test "slice" (fun () -> slice "abc" 0 1 = "a"); test "slice" (fun () -> slice "abcdef" 1 (-1) = "bcde"); test "splice" (fun () -> splice "abcd" 1 2 "plop" = "aplopcd"); test "splice" (fun () -> splice "abcd" 1 2 "" = "acd"); test "repeat" (fun () -> repeat 5 "a" = "aaaaa"); test "init" (fun () -> init 5 string_of_int = "01234"); test "rev_init" (fun () -> rev_init 5 string_of_int = "43210"); test "upper" (fun () -> upper "abcd" = "ABCD"); test "lower" (fun () -> lower "ABCD" = "abcd"); test "capitalize" (fun () -> capitalize "oCaml" = "OCaml"); test "uncapitalize" (fun () -> uncapitalize "OCaml" = "oCaml"); test "capitalize" (fun () -> capitalize "" = ""); test "uncapitalize" (fun () -> uncapitalize "" = ""); test "compare" (fun () -> compare "abc" "abd" = -1); test "icompare" (fun () -> icompare "abc" "ABC" = 0); test "rev" (fun () -> rev "ocaml" = "lmaco"); test "rev" (fun () -> rev "héhé" = "éhéh"); test "concat" (fun () -> concat "/" ["a"; "b"; "c"] = "a/b/c"); test "rev_concat" (fun () -> rev_concat "/" ["a"; "b"; "c"] = "c/b/a"); test "explode" (fun () -> explode "" = []); test "explode" (fun () -> explode "abé" = ["a"; "b"; "é"]); test "rev_explode" (fun () -> rev_explode "ocaml" = ["l"; "m"; "a"; "c"; "o"]); test "implode" (fun () -> implode ["o"; "c"; "a"; "m"; "l"] = "ocaml"); test "implode" (fun () -> implode ["abc"; "def"] = "abcdef"); test "implode" (fun () -> rev_implode ["o"; "c"; "a"; "m"; "l"] = "lmaco"); test "implode" (fun () -> rev_implode ["abc"; "def"] = "defabc"); test "map" (fun () -> map (function "a" -> "x" | t -> t) "abc" = "xbc"); test "rev_map" (fun () -> rev_map (function "a" -> "x" | t -> t) "abc" = "cbx"); test "fold" (fun () -> fold (fun t acc -> acc + code t) "ABC" 0 = 198); test "rev_fold" (fun () -> rev_fold (fun t acc -> acc + code t) "ABC" 0 = 198); test "filter" (fun () -> filter is_alpha "1a2E" = "aE"); test "rev_filter" (fun () -> rev_filter is_alpha "1a2E" = "Ea"); test "for_all" (fun () -> for_all is_ascii "abcd" = true); test "for_all" (fun () -> for_all is_digit "1234" = true); test "for_all" (fun () -> for_all is_digit "12a" = false); test "exists" (fun () -> exists is_alpha "1234" = false); test "exists" (fun () -> exists is_alpha "123a" = true); test "words" (fun () -> words "Hello, world!" = ["Hello"; "world"]); test "lines" (fun () -> lines "foo\nbar" = ["foo"; "bar"]); test "lines" (fun () -> lines "foo\nbar\n" = ["foo"; "bar"]); test "lines" (fun () -> lines "foo\r\nbar\r\n" = ["foo"; "bar"]); test "split" (fun () -> split ~sep:"/" "/usr/share/doc" = [""; "usr"; "share"; "doc"]); test "split" (fun () -> split ~sep:"." "192.168.1.1" = ["192"; "168"; "1"; "1"]); test "split" (fun () -> split ~sep:"." ~max:2 "192.168.1.1" = ["192"; "168.1.1"]); test "split" (fun () -> split ~sep:"/" "a/b/c" = ["a"; "b"; "c"]); test "split" (fun () -> split ~sep:".." "a..b..c" = ["a"; "b"; "c"]); test "split" (fun () -> split ~max:1 "a b c" = ["a b c"]); test "split" (fun () -> split ~max:2 "a b c" = ["a"; "b c"]); test "rev_split" (fun () -> rev_split ~sep:"." ~max:2 "192.168.1.1" = ["192.168.1"; "1"]); test "replace" (fun () -> replace "abcd" ~patt:"b" ~repl:"x" = "axcd"); test "replace" (fun () -> replace "Hello world!" ~patt:"world" ~repl:"you" = "Hello you!"); test "contains" (fun () -> contains "bbaacc" "aa" = true); test "contains" (fun () -> contains "" "" = true); test "contains" (fun () -> contains "aaa" "" = true); test "contains" (fun () -> contains "abc" "aa" = false); test "starts_with" (fun () -> starts_with "abcd" "ab" = true); test "starts_with" (fun () -> starts_with "abcd" "af" = false); test "starts_with" (fun () -> starts_with "ab" "abcd" = false); test "ends_with" (fun () -> ends_with "abcd" "cd" = true); test "ends_with" (fun () -> ends_with "abcd" "hd" = false); test "ends_with" (fun () -> ends_with "ab" "abc" = false); test "strip" (fun () -> strip " \t\r\n toto \r\n \t" = "toto"); test "rstrip" (fun () -> rstrip " foo " = " foo"); test "lstrip" (fun () -> lstrip " foo " = "foo "); test "rchop" (fun () -> rchop "abcd" = "abc"); test "lchop" (fun () -> lchop "abcd" = "bcd"); Printf.printf "\nsuccess: %d\nfailure: %d\n%!" !success !failure ocaml-text-0.8/text-api.odocl000066400000000000000000000001521244105655100162140ustar00rootroot00000000000000# OASIS_START # DO NOT EDIT (digest: 409ce00c58649525879357a67c770e4e) src/Text src/Encoding # OASIS_STOP