pax_global_header 0000666 0000000 0000000 00000000064 12267100573 0014515 g ustar 00root root 0000000 0000000 52 comment=50df36703ab34c15541170f36ca9be4e8723e3da
xml-light-2.4/ 0000775 0000000 0000000 00000000000 12267100573 0013267 5 ustar 00root root 0000000 0000000 xml-light-2.4/.gitignore 0000664 0000000 0000000 00000000132 12267100573 0015253 0 ustar 00root root 0000000 0000000 *.ml
*.mli
*.byte
*.native
*.cmi
*.cmo
*.cma
*.cmx
*.cmxs
*.cmxa
*~
*.o
*.a
*.exe
META
doc xml-light-2.4/META.in 0000664 0000000 0000000 00000000173 12267100573 0014346 0 ustar 00root root 0000000 0000000 version="@VERSION@"
archive(byte)="xml-light.cma"
archive(native)="xml-light.cmxa"
archive(native,plugin)="xml-light.cmxs"
xml-light-2.4/Makefile 0000664 0000000 0000000 00000006255 12267100573 0014737 0 ustar 00root root 0000000 0000000 # Makefile generated by OCamake
# http://tech.motion-twin.com
VERSION=2.4
OCAMLOPT=ocamlopt
OCAMLC=ocamlc
OCAMLFIND=ocamlfind
.SUFFIXES : .ml .mli .cmo .cmx .cmi .mll .mly
INSTALLDIR=`$(OCAMLC) -where`
CFLAGS=
LFLAGS= -a
LIBS=
NATDYNLINK := $(shell if [ -f `ocamlc -where`/dynlink.cmxa ]; then echo YES; else echo NO; fi)
ifeq "${NATDYNLINK}" "YES"
CMXS=xml-light.cmxs
endif
all: xml-light.cma test.exe doc
opt: xml-light.cmxa $(CMXS) test_opt.exe
installcommon: all
cp xml.mli xmlParser.mli dtd.mli xml.cmi xmlParser.cmi dtd.cmi $(INSTALLDIR)
installbyte: all installcommon
cp xml-light.cma $(INSTALLDIR)
installopt: opt installcommon
cp xml-light.a xml-light.cmxa $(CMXS) xml.cmx dtd.cmx xmlParser.cmx $(INSTALLDIR)
install: installbyte installopt
wininstall: all opt
cp xml-light.cmxa xml-light.lib xml-light.cma xml.mli xmlParser.mli dtd.mli xml.cmi xmlParser.cmi dtd.cmi xml.cmx dtd.cmx xmlParser.cmx c:\ocaml\lib
install_ocamlfind: all opt
cp META.in META
sed -itmp "s|@VERSION@|$(VERSION)|g" META
rm -rf METAtmp
$(OCAMLFIND) install xml-light META xml-light.a xml-light.cma xml-light.cmxa $(CMXS) xml.cmx dtd.cmx xmlParser.cmx xml.mli xmlParser.mli dtd.mli xml.cmi xmlParser.cmi dtd.cmi
doc:
mkdir doc
ocamldoc -sort -html -d doc xml.mli dtd.mli xmlParser.mli
test.exe: xml-light.cma
$(OCAMLC) xml-light.cma test.ml -o test.exe
test_opt.exe: xml-light.cmxa
$(OCAMLOPT) xml-light.cmxa test.ml -o test_opt.exe
xml-light.cma: xml_parser.cmo xml_lexer.cmo dtd.cmo xmlParser.cmo xml.cmo
$(OCAMLC) -o xml-light.cma $(LFLAGS) $(LIBS) xml_parser.cmo xml_lexer.cmo dtd.cmo xmlParser.cmo xml.cmo
xml-light.cmxa: xml_parser.cmx xml_lexer.cmx dtd.cmx xmlParser.cmx xml.cmx
$(OCAMLOPT) -o xml-light.cmxa $(LFLAGS) $(LIBS) xml_parser.cmx xml_lexer.cmx dtd.cmx xmlParser.cmx xml.cmx
xml-light.cmxs: xml-light.cmxa
$(OCAMLOPT) -shared -linkall -I . -o xml-light.cmxs xml-light.cmxa
dtd.cmo: xml.cmi xml_lexer.cmi dtd.cmi
dtd.cmx: xml.cmi xml_lexer.cmi dtd.cmi
xml.cmo: dtd.cmi xmlParser.cmi xml_lexer.cmi xml.cmi
xml.cmx: dtd.cmi xmlParser.cmi xml_lexer.cmi xml.cmi
xmlParser.cmo: dtd.cmi xml.cmi xml_lexer.cmi xmlParser.cmi
xmlParser.cmx: dtd.cmi xml.cmi xml_lexer.cmi xmlParser.cmi
dtd.cmi: xml.cmi
xml.cmi:
xmlParser.cmi: dtd.cmi xml.cmi
xml_lexer.cmi: dtd.cmi
xml_parser.cmo: xml_parser.ml dtd.cmi xml_parser.mli xml_parser.cmi
xml_parser.cmx: xml_parser.ml dtd.cmi xml_parser.mli xml_parser.cmi
xml_parser.cmi: xml_parser.mli dtd.cmi xml.cmi
xml_lexer.cmo: xml_lexer.ml xml_lexer.cmi
xml_lexer.cmx: xml_lexer.ml xml_lexer.cmi
clean:
rm -f xml-light.cma test.exe dtd.cmo dtd.cmi test.cmo test.cmi xml.cmo xml.cmi xmlParser.cmo xmlParser.cmi dtd.cmi xml.cmi xmlParser.cmi xml_lexer.cmi xml_lexer.cmo xml_lexer.ml xml_parser.mli xml_parser.cmi xml_parser.ml xml_parser.cmo
rm -f xml-light.lib xml-light.a xml-light.cmxa test_opt.exe dtd.cmx dtd.obj dtd.o test.cmx test.obj test.o xml.cmx xml.obj xml.o xmlParser.cmx xmlParser.obj xmlParser.o xml_lexer.cmx xml_lexer.obj xml_lexer.o xml_parser.cmx xml_parser.obj xml_parser.o
# SUFFIXES
.ml.cmo:
$(OCAMLC) $(CFLAGS) -c $<
.ml.cmx:
$(OCAMLOPT) $(CFLAGS) -c $<
.mli.cmi:
$(OCAMLC) $(CFLAGS) $<
.mll.ml:
ocamllex $<
.mly.ml:
ocamlyacc $<
xml-light-2.4/README 0000664 0000000 0000000 00000004535 12267100573 0014156 0 ustar 00root root 0000000 0000000 Xml-Light Version 2.4 :
-----------------------
Xml Light is a minimal Xml parser & printer for OCaml.
It provide few functions to parse a basic Xml document into
an OCaml data structure and to print back the data structures
to an Xml document.
Xml Light has also support for DTD (Document Type Definition).
Install
-------
make install
by default, Xml Light is installed in the 'ocamlc -where' directory.
you can change it by editing the Makefile.
for Windows users, if you're using the MSVC version of ocaml and
don't have cygwin tools installed, you can do : nmake all
and then copy manually the files to the place you want.
Usage
-----
simple samples :
-- parse / print an xml string ---
let x = Xml.parse_string "TEXT" in
Printf.printf "XML formated = \n%s" (Xml.to_string_fmt x);
-- load an xml and a dtd , prove and print ---
let x = Xml.parse_file "myfile.xml" in
let dtd = Dtd.parse_file "myfile.dtd" in
let x = Dtd.prove (Dtd.check dtd) "start" x in
print_endline (Xml.to_string x)
Documentation
-------------
HTML documentation can be generated with ocamldoc :
make doc
you can also directly browse the MLI files to read it.
Licence
-------
Xml Light is distributed under the terms of the GNU Library General
Public License, with the special exception on linking described
below. (This is the OCaml library licence.)
As a special exception to the GNU Library General Public License, you
may link, statically or dynamically, a "work that uses the Library"
with a publicly distributed version of the Library to produce an
executable file containing portions of the Library, and distribute
that executable file under terms of your choice, without any of the
additional requirements listed in clause 6 of the GNU Library General
Public License. By "a publicly distributed version of the Library", we
mean either the unmodified Library as distributed by INRIA, or a
modified version of the Library that is distributed under the
conditions defined in clause 3 of the GNU Library General Public
License. This exception does not however invalidate any other reasons
why the executable file might be covered by the GNU Library General
Public License.
Credits
-------
(c)2003-2005 Nicolas Cannasse (ncannasse@motion-twin.com)
(c)2003-2005 Motion-Twin
Some parts of this code source has an additionnal copyright to Jacques Garrigue
xml-light-2.4/dtd.ml 0000664 0000000 0000000 00000036712 12267100573 0014405 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library has the special exception on linking described in file
* README.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*)
open Xml
open Printf
type parse_error_msg =
| InvalidDTDDecl
| InvalidDTDElement
| InvalidDTDAttribute
| InvalidDTDTag
| DTDItemExpected
type check_error =
| ElementDefinedTwice of string
| AttributeDefinedTwice of string * string
| ElementEmptyContructor of string
| ElementReferenced of string * string
| ElementNotDeclared of string
| WrongImplicitValueForID of string * string
type prove_error =
| UnexpectedPCData
| UnexpectedTag of string
| UnexpectedAttribute of string
| InvalidAttributeValue of string
| RequiredAttribute of string
| ChildExpected of string
| EmptyExpected
| DuplicateID of string
| MissingID of string
type dtd_child =
| DTDTag of string
| DTDPCData
| DTDOptional of dtd_child
| DTDZeroOrMore of dtd_child
| DTDOneOrMore of dtd_child
| DTDChoice of dtd_child list
| DTDChildren of dtd_child list
type dtd_element_type =
| DTDEmpty
| DTDAny
| DTDChild of dtd_child
type dtd_attr_default =
| DTDDefault of string
| DTDRequired
| DTDImplied
| DTDFixed of string
type dtd_attr_type =
| DTDCData
| DTDNMToken
| DTDEnum of string list
| DTDID
| DTDIDRef
type dtd_item =
| DTDAttribute of string * string * dtd_attr_type * dtd_attr_default
| DTDElement of string * dtd_element_type
type dtd_result =
| DTDNext
| DTDNotMatched
| DTDMatched
| DTDMatchedResult of dtd_child
type error_pos = {
eline : int;
eline_start : int;
emin : int;
emax : int;
}
type parse_error = parse_error_msg * Xml.error_pos
exception Parse_error of parse_error
exception Check_error of check_error
exception Prove_error of prove_error
type dtd = dtd_item list
module StringMap = Map.Make(String)
type 'a map = 'a StringMap.t ref
type checked = {
c_elements : dtd_element_type map;
c_attribs : (dtd_attr_type * dtd_attr_default) map map;
}
type dtd_state = {
elements : dtd_element_type map;
attribs : (dtd_attr_type * dtd_attr_default) map map;
mutable current : dtd_element_type;
mutable curtag : string;
state : (string * dtd_element_type) Stack.t;
}
let file_not_found = ref (fun _ -> assert false)
let _raises e =
file_not_found := e
let create_map() = ref StringMap.empty
let empty_map = create_map()
let find_map m k = StringMap.find k (!m)
let set_map m k v = m := StringMap.add k v (!m)
let unset_map m k = m := StringMap.remove k (!m)
let iter_map f m = StringMap.iter f (!m)
let fold_map f m = StringMap.fold f (!m)
let mem_map m k = StringMap.mem k (!m)
let pos source =
let line, lstart, min, max = Xml_lexer.pos source in
(Obj.magic {
eline = line;
eline_start = lstart;
emin = min;
emax = max;
} : Xml.error_pos)
let convert = function
| Xml_lexer.EInvalidDTDDecl -> InvalidDTDDecl
| Xml_lexer.EInvalidDTDElement -> InvalidDTDElement
| Xml_lexer.EInvalidDTDTag -> InvalidDTDTag
| Xml_lexer.EDTDItemExpected -> DTDItemExpected
| Xml_lexer.EInvalidDTDAttribute -> InvalidDTDAttribute
let parse source =
try
Xml_lexer.init source;
(* local cast Dtd.dtd -> dtd *)
let dtd = (Obj.magic Xml_lexer.dtd source : dtd) in
Xml_lexer.close source;
dtd
with
| Xml_lexer.DTDError e ->
Xml_lexer.close source;
raise (Parse_error (convert e,pos source))
let parse_string s = parse (Lexing.from_string s)
let parse_in ch = parse (Lexing.from_channel ch)
let parse_file fname =
let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
try
let x = parse (Lexing.from_channel ch) in
close_in ch;
x
with
e ->
close_in ch;
raise e
let check dtd =
let attribs = create_map() in
let hdone = create_map() in
let htodo = create_map() in
let ftodo tag from =
try
ignore(find_map hdone tag);
with
Not_found ->
try
match find_map htodo tag with
| None -> set_map htodo tag from
| Some _ -> ()
with
Not_found ->
set_map htodo tag from
in
let fdone tag edata =
try
ignore(find_map hdone tag);
raise (Check_error (ElementDefinedTwice tag));
with
Not_found ->
unset_map htodo tag;
set_map hdone tag edata
in
let fattrib tag aname adata =
(match adata with
| DTDID,DTDImplied -> ()
| DTDID,DTDRequired -> ()
| DTDID,_ -> raise (Check_error (WrongImplicitValueForID (tag,aname)))
| _ -> ());
let h = (try
find_map attribs tag
with
Not_found ->
let h = create_map() in
set_map attribs tag h;
h) in
try
ignore(find_map h aname);
raise (Check_error (AttributeDefinedTwice (tag,aname)));
with
Not_found ->
set_map h aname adata
in
let check_item = function
| DTDAttribute (tag,aname,atype,adef) ->
let utag = String.uppercase tag in
ftodo utag None;
fattrib utag (String.uppercase aname) (atype,adef)
| DTDElement (tag,etype) ->
let utag = String.uppercase tag in
fdone utag etype;
let check_type = function
| DTDEmpty -> ()
| DTDAny -> ()
| DTDChild x ->
let rec check_child = function
| DTDTag s -> ftodo (String.uppercase s) (Some utag)
| DTDPCData -> ()
| DTDOptional c
| DTDZeroOrMore c
| DTDOneOrMore c ->
check_child c
| DTDChoice []
| DTDChildren [] ->
raise (Check_error (ElementEmptyContructor tag))
| DTDChoice l
| DTDChildren l ->
List.iter check_child l
in
check_child x
in
check_type etype
in
List.iter check_item dtd;
iter_map (fun t from ->
match from with
| None -> raise (Check_error (ElementNotDeclared t))
| Some tag -> raise (Check_error (ElementReferenced (t,tag)))
) htodo;
{
c_elements = hdone;
c_attribs = attribs;
}
let start_prove dtd root =
let d = {
elements = dtd.c_elements;
attribs = dtd.c_attribs;
state = Stack.create();
current = DTDChild (DTDTag root);
curtag = "_root";
} in
try
ignore(find_map d.elements (String.uppercase root));
d
with
Not_found -> raise (Check_error (ElementNotDeclared root))
(* - for debug only - *)
let to_string_ref = ref (fun _ -> assert false)
let trace dtd tag =
let item = DTDElement ("current",dtd.current) in
printf "%s : %s\n"
(match tag with None -> "#PCDATA" | Some t -> t)
(!to_string_ref item)
exception TmpResult of dtd_result
let prove_child dtd tag =
match dtd.current with
| DTDEmpty -> raise (Prove_error EmptyExpected)
| DTDAny -> ()
| DTDChild elt ->
let rec update = function
| DTDTag s ->
(match tag with
| None -> DTDNotMatched
| Some t when t = String.uppercase s -> DTDMatched
| Some _ -> DTDNotMatched)
| DTDPCData ->
(match tag with
| None -> DTDMatched
| Some _ -> DTDNotMatched)
| DTDOptional x ->
(match update x with
| DTDNotMatched
| DTDNext -> DTDNext
| DTDMatched
| DTDMatchedResult _ -> DTDMatched)
| DTDZeroOrMore x ->
(match update x with
| DTDNotMatched
| DTDNext -> DTDNext
| DTDMatched
| DTDMatchedResult _ -> DTDMatchedResult (DTDZeroOrMore x))
| DTDOneOrMore x ->
(match update x with
| DTDNotMatched
| DTDNext -> DTDNotMatched
| DTDMatched
| DTDMatchedResult _ -> DTDMatchedResult (DTDZeroOrMore x))
| DTDChoice l ->
(try
(match List.exists (fun x ->
match update x with
| DTDMatched -> true
| DTDMatchedResult _ as r -> raise (TmpResult r)
| DTDNext | DTDNotMatched -> false) l with
| true -> DTDMatched
| false -> DTDNotMatched)
with
TmpResult r -> r)
| DTDChildren [] -> assert false (* DTD is checked ! *)
| DTDChildren (h :: t) ->
(match update h with
| DTDNext ->
(match t with
| [] -> DTDNotMatched
| _ -> update (DTDChildren t))
| DTDNotMatched -> DTDNotMatched
| DTDMatchedResult r ->
DTDMatchedResult (DTDChildren (r::t))
| DTDMatched ->
match t with
| [] -> DTDMatched
| _ -> DTDMatchedResult (DTDChildren t))
in
match update elt with
| DTDNext | DTDNotMatched ->
(match tag with
| None -> raise (Prove_error UnexpectedPCData)
| Some t -> raise (Prove_error (UnexpectedTag t)))
| DTDMatched ->
dtd.current <- DTDEmpty
| DTDMatchedResult r ->
dtd.current <- DTDChild r
let is_nmtoken_char = function
| 'A'..'Z' | 'a'..'z' | '0'..'9' | '.' | '-' | '_' | ':' -> true
| _ -> false
let prove_attrib dtd hid hidref attr aname (atype,adef) accu =
let aval = (try Some (List.assoc aname attr) with Not_found -> None) in
(match atype, aval with
| DTDCData, _ -> ()
| DTDNMToken, None -> ()
| DTDNMToken, Some v ->
for i = 0 to String.length v - 1 do
if not (is_nmtoken_char v.[i]) then raise (Prove_error (InvalidAttributeValue aname));
done
| DTDEnum l, None -> ()
| DTDEnum l, Some v ->
if not (List.exists ((=) v) l) then raise (Prove_error (InvalidAttributeValue aname))
| DTDID, None -> ()
| DTDID, Some id ->
if mem_map hid id then raise (Prove_error (DuplicateID id));
set_map hid id ()
| DTDIDRef, None -> ()
| DTDIDRef, Some idref ->
set_map hidref idref ());
match adef, aval with
| DTDRequired, None -> raise (Prove_error (RequiredAttribute aname))
| DTDFixed v, Some av when v <> av -> raise (Prove_error (InvalidAttributeValue aname))
| DTDImplied, None -> accu
| DTDFixed v , None
| DTDDefault _, Some v
| DTDDefault v, None
| DTDRequired, Some v
| DTDImplied, Some v
| DTDFixed _, Some v -> (aname,v) :: accu
let check_attrib ahash (aname,_) =
try
ignore(find_map ahash aname);
with
Not_found -> raise (Prove_error (UnexpectedAttribute aname))
let rec do_prove hid hidref dtd = function
| PCData s ->
prove_child dtd None;
PCData s
| Element (tag,attr,childs) ->
let utag = String.uppercase tag in
let uattr = List.map (fun (aname,aval) -> String.uppercase aname , aval) attr in
prove_child dtd (Some utag);
Stack.push (dtd.curtag,dtd.current) dtd.state;
let elt = (try find_map dtd.elements utag with Not_found -> raise (Prove_error (UnexpectedTag tag))) in
let ahash = (try find_map dtd.attribs utag with Not_found -> empty_map) in
dtd.curtag <- tag;
dtd.current <- elt;
List.iter (check_attrib ahash) uattr;
let attr = fold_map (prove_attrib dtd hid hidref uattr) ahash [] in
let childs = ref (List.map (do_prove hid hidref dtd) childs) in
(match dtd.current with
| DTDAny
| DTDEmpty -> ()
| DTDChild elt ->
let name = ref "" in
let rec check = function
| DTDTag t ->
name := t;
false
| DTDPCData when !childs = [] ->
childs := [PCData ""];
true
| DTDPCData ->
name := "#PCDATA";
false
| DTDOptional _ -> true
| DTDZeroOrMore _ -> true
| DTDOneOrMore e ->
ignore(check e);
false
| DTDChoice l -> List.exists check l
| DTDChildren l -> List.for_all check l
in
match check elt with
| true -> ()
| false -> raise (Prove_error (ChildExpected !name)));
let ctag, cur = Stack.pop dtd.state in
dtd.curtag <- tag;
dtd.current <- cur;
Element (tag,attr,!childs)
let prove dtd root xml =
let hid = create_map() in
let hidref = create_map() in
let x = do_prove hid hidref (start_prove dtd root) xml in
iter_map (fun id () ->
if not (mem_map hid id) then raise (Prove_error (MissingID id))
) hidref;
x
let parse_error_msg = function
| InvalidDTDDecl -> "Invalid DOCTYPE declaration"
| InvalidDTDElement -> "Invalid DTD element declaration"
| InvalidDTDAttribute -> "Invalid DTD attribute declaration"
| InvalidDTDTag -> "Invalid DTD tag"
| DTDItemExpected -> "DTD item expected"
let parse_error (msg,pos) =
let pos = (Obj.magic pos : error_pos) in
if pos.emin = pos.emax then
sprintf "%s line %d character %d" (parse_error_msg msg) pos.eline (pos.emin - pos.eline_start)
else
sprintf "%s line %d characters %d-%d" (parse_error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
let check_error = function
| ElementDefinedTwice tag -> sprintf "Element '%s' defined twice" tag
| AttributeDefinedTwice (tag,aname) -> sprintf "Attribute '%s' of element '%s' defined twice" aname tag
| ElementEmptyContructor tag -> sprintf "Element '%s' has empty constructor" tag
| ElementReferenced (tag,from) -> sprintf "Element '%s' referenced by '%s' is not declared" tag from
| ElementNotDeclared tag -> sprintf "Element '%s' needed but is not declared" tag
| WrongImplicitValueForID (tag,idname) -> sprintf "Attribute '%s' of type ID of element '%s' not defined with implicit value #REQUIRED or #IMPLIED" idname tag
let prove_error = function
| UnexpectedPCData -> "Unexpected PCData"
| UnexpectedTag tag -> sprintf "Unexpected tag : '%s'" tag
| UnexpectedAttribute att -> sprintf "Unexpected attribute : '%s'" att
| InvalidAttributeValue att -> sprintf "Invalid attribute value for '%s'" att
| RequiredAttribute att -> sprintf "Required attribute not found : '%s'" att
| ChildExpected cname -> sprintf "Child expected : '%s'" cname
| EmptyExpected -> "No more children expected"
| DuplicateID id -> sprintf "ID '%s' used several times" id
| MissingID idref -> sprintf "missing ID value for IDREF '%s'" idref
let to_string = function
| DTDAttribute (tag,aname,atype,adef) ->
let atype_to_string = function
| DTDCData -> "CDATA"
| DTDNMToken -> "NMTOKEN"
| DTDEnum l -> sprintf "(%s)" (String.concat "|" l)
| DTDID -> "ID"
| DTDIDRef -> "IDREF"
in
let adefault_to_string = function
| DTDDefault s -> sprintf "\"%s\"" s
| DTDRequired -> "#REQUIRED"
| DTDImplied -> "#IMPLIED"
| DTDFixed s -> sprintf "#FIXED \"%s\"" s
in
sprintf "" tag aname (atype_to_string atype) (adefault_to_string adef)
| DTDElement (tag,etype) ->
let rec echild_to_string = function
| DTDTag s -> s
| DTDPCData -> "#PCDATA"
| DTDOptional c -> sprintf "%s?" (echild_to_string c)
| DTDZeroOrMore c -> sprintf "%s*" (echild_to_string c)
| DTDOneOrMore c -> sprintf "%s+" (echild_to_string c)
| DTDChoice [c] -> echild_to_string c
| DTDChoice l -> sprintf "(%s)" (String.concat "|" (List.map echild_to_string l))
| DTDChildren [c] -> echild_to_string c
| DTDChildren l -> sprintf "(%s)" (String.concat "," (List.map echild_to_string l))
in
let etype_to_string = function
| DTDEmpty -> "EMPTY"
| DTDAny -> "ANY"
| DTDChild x ->
let rec op_to_string = function
| DTDOptional c -> sprintf "%s?" (op_to_string c)
| DTDZeroOrMore c -> sprintf "%s*" (op_to_string c)
| DTDOneOrMore c -> sprintf "%s+" (op_to_string c)
| _ -> ""
in
let rec root = function
| DTDOptional c
| DTDZeroOrMore c
| DTDOneOrMore c ->
root c
| DTDChoice [_]
| DTDChildren [_] as x ->
x, false
| DTDChoice _
| DTDChildren _ as x ->
x, true
| x -> x, false
in
match root x with
| r, true -> sprintf "%s%s" (echild_to_string r) (op_to_string x)
| r, false -> sprintf "(%s%s)" (echild_to_string r) (op_to_string x)
in
sprintf "" tag (etype_to_string etype)
;;
to_string_ref := to_string
xml-light-2.4/dtd.mli 0000664 0000000 0000000 00000013050 12267100573 0014544 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library has the special exception on linking described in file
* README.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*)
(** Xml Light DTD
This module provide several functions to create, check, and use DTD
to prove Xml documents : {ul
{li using the DTD types, you can directly create your own DTD structure}
{li the {!Dtd.check} function can then be used to check that all DTD
states have been declared, that no attributes are declared twice,
and so on.}
{li the {!Dtd.prove} function can be used to check an {!Xml} data
structure with a checked DTD. The function will return the
expanded Xml document or raise an exception if the DTD proving
fails.}
}
{i Note about ENTITIES:}
While parsing Xml, PCDATA is always parsed and
the Xml entities & > < ' " are replaced by their
corresponding ASCII characters. For Xml attributes, theses can be
put between either double or simple quotes, and the backslash character
can be used to escape inner quotes. There is no support for CDATA Xml
nodes or PCDATA attributes declarations in DTD, and no support for
user-defined entities using the ENTITY DTD element.
*)
(** {6 The DTD Types} *)
type dtd_child =
| DTDTag of string
| DTDPCData
| DTDOptional of dtd_child
| DTDZeroOrMore of dtd_child
| DTDOneOrMore of dtd_child
| DTDChoice of dtd_child list
| DTDChildren of dtd_child list
type dtd_element_type =
| DTDEmpty
| DTDAny
| DTDChild of dtd_child
type dtd_attr_default =
| DTDDefault of string
| DTDRequired
| DTDImplied
| DTDFixed of string
type dtd_attr_type =
| DTDCData
| DTDNMToken
| DTDEnum of string list
| DTDID
| DTDIDRef
type dtd_item =
| DTDAttribute of string * string * dtd_attr_type * dtd_attr_default
| DTDElement of string * dtd_element_type
type dtd = dtd_item list
type checked
(** {6 The DTD Functions} *)
(** Parse the named file into a Dtd data structure. Raise
{!Xml.File_not_found} if an error occured while opening the file.
Raise {!Dtd.Parse_error} if parsing failed. *)
val parse_file : string -> dtd
(** Read the content of the in_channel and parse it into a Dtd data
structure. Raise {!Dtd.Parse_error} if parsing failed. *)
val parse_in : in_channel -> dtd
(** Parse the string containing a Dtd document into a Dtd data
structure. Raise {!Dtd.Parse_error} if parsing failed. *)
val parse_string : string -> dtd
(** Check the Dtd data structure declaration and return a checked
DTD. Raise {!Dtd.Check_error} if the DTD checking failed. *)
val check : dtd -> checked
(** Prove an Xml document using a checked DTD and an entry point.
The entry point is the first excepted tag of the Xml document,
the returned Xml document has the same structure has the original
one, excepted that non declared optional attributes have been set
to their default value specified in the DTD.
Raise {!Dtd.Check_error} [ElementNotDeclared] if the entry point
is not found, raise {!Dtd.Prove_error} if the Xml document failed
to be proved with the DTD. *)
val prove : checked -> string -> Xml.xml -> Xml.xml
(** Print a DTD element into a string. You can easily get a DTD
document from a DTD data structure using for example
[String.concat "\n" (List.map Dtd.to_string) my_dtd] *)
val to_string : dtd_item -> string
(** {6 The DTD Exceptions} *)
(** There is three types of DTD excecptions : {ul
{li {!Dtd.Parse_error} is raised when an error occured while
parsing a DTD document into a DTD data structure.}
{li {!Dtd.Check_error} is raised when an error occured while
checking a DTD data structure for completeness, or when the
prove entry point is not found when calling {!Dtd.prove}.}
{li {!Dtd.Prove_error} is raised when an error occured while
proving an Xml document.}
}
Several string conversion functions are provided to enable you
to report errors to the user.
*)
type parse_error_msg =
| InvalidDTDDecl
| InvalidDTDElement
| InvalidDTDAttribute
| InvalidDTDTag
| DTDItemExpected
type check_error =
| ElementDefinedTwice of string
| AttributeDefinedTwice of string * string
| ElementEmptyContructor of string
| ElementReferenced of string * string
| ElementNotDeclared of string
| WrongImplicitValueForID of string * string
type prove_error =
| UnexpectedPCData
| UnexpectedTag of string
| UnexpectedAttribute of string
| InvalidAttributeValue of string
| RequiredAttribute of string
| ChildExpected of string
| EmptyExpected
| DuplicateID of string
| MissingID of string
type parse_error = parse_error_msg * Xml.error_pos
exception Parse_error of parse_error
exception Check_error of check_error
exception Prove_error of prove_error
val parse_error : parse_error -> string
val check_error : check_error -> string
val prove_error : prove_error -> string
(**/**)
(* internal usage only... *)
val _raises : (string -> exn) -> unit
xml-light-2.4/makedoc.bat 0000664 0000000 0000000 00000000101 12267100573 0015352 0 ustar 00root root 0000000 0000000 @ocamldoc -sort -html -d doc xml.mli dtd.mli xmlParser.mli
@pause xml-light-2.4/test.ml 0000664 0000000 0000000 00000003553 12267100573 0014606 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library has the special exception on linking described in file
* README.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*)
open Xml
open Dtd
let parse data =
match data.[0] with
| '#' -> Xml.parse_file (String.sub data 1 ((String.length data)-2))
| _ -> Xml.parse_string data
;;
let buf = ref "" in
print_endline "Please enter some XML data followed (press return twice to parse) :";
try
while true do
match read_line() with
| "" when !buf <> "" ->
let data = !buf in
buf := "";
(try
let x = parse data in
print_endline "Parsing...";
print_endline (Xml.to_string_fmt x);
with
| Xml.Error msg ->
Printf.printf "Xml error : %s\n" (Xml.error msg)
| Dtd.Parse_error msg ->
Printf.printf "Dtd parse error : %s\n" (Dtd.parse_error msg)
| Dtd.Check_error msg ->
Printf.printf "Dtd check error : %s\n" (Dtd.check_error msg)
| Dtd.Prove_error msg ->
Printf.printf "Dtd prove error : %s\n" (Dtd.prove_error msg))
| s ->
buf := !buf ^ s ^ "\n"
done
with
End_of_file -> print_endline "Exit."
xml-light-2.4/xml.dsp 0000664 0000000 0000000 00000004473 12267100573 0014607 0 ustar 00root root 0000000 0000000 # Microsoft Developer Studio Project File - Name="xml" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) External Target" 0x0106
CFG=xml - Win32 Bytecode
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
!MESSAGE NMAKE /f "xml.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
!MESSAGE NMAKE /f "xml.mak" CFG="xml - Win32 Bytecode"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "xml - Win32 Bytecode" (based on "Win32 (x86) External Target")
!MESSAGE
# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir ""
# PROP BASE Intermediate_Dir ""
# PROP BASE Cmd_Line "ocamake xml.dsp"
# PROP BASE Rebuild_Opt "-all"
# PROP BASE Target_File "xml.exe"
# PROP BASE Bsc_Name ""
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir ""
# PROP Intermediate_Dir ""
# PROP Cmd_Line "ocamake xml.dsp -P xml_parser.ml -P xml_lexer.ml -P dtd.ml -P xmlParser.ml"
# PROP Rebuild_Opt "-all"
# PROP Target_File "xml.exe"
# PROP Bsc_Name ""
# PROP Target_Dir ""
# Begin Target
# Name "xml - Win32 Bytecode"
!IF "$(CFG)" == "xml - Win32 Bytecode"
!ENDIF
# Begin Group "ML Files"
# PROP Default_Filter "ml;mly;mll"
# Begin Source File
SOURCE=.\dtd.ml
# End Source File
# Begin Source File
SOURCE=.\test.ml
# End Source File
# Begin Source File
SOURCE=.\xml.ml
# End Source File
# Begin Source File
SOURCE=.\xml_lexer.mll
# End Source File
# Begin Source File
SOURCE=.\xml_parser.mly
# End Source File
# Begin Source File
SOURCE=.\xmlParser.ml
# End Source File
# End Group
# Begin Group "MLI Files"
# PROP Default_Filter "mli"
# Begin Source File
SOURCE=.\dtd.mli
# End Source File
# Begin Source File
SOURCE=.\xml.mli
# End Source File
# Begin Source File
SOURCE=.\xml_lexer.mli
# End Source File
# Begin Source File
SOURCE=.\xmlParser.mli
# End Source File
# End Group
# End Target
# End Project
xml-light-2.4/xml.dsw 0000664 0000000 0000000 00000001023 12267100573 0014602 0 ustar 00root root 0000000 0000000 Microsoft Developer Studio Workspace File, Format Version 6.00
# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
###############################################################################
Project: "xml"=".\xml.dsp" - Package Owner=<4>
Package=<5>
{{{
}}}
Package=<4>
{{{
}}}
###############################################################################
Global:
Package=<5>
{{{
}}}
Package=<3>
{{{
}}}
###############################################################################
xml-light-2.4/xml.ml 0000664 0000000 0000000 00000015760 12267100573 0014432 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library has the special exception on linking described in file
* README.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*)
open Printf
type xml =
| Element of (string * (string * string) list * xml list)
| PCData of string
type error_pos = {
eline : int;
eline_start : int;
emin : int;
emax : int;
}
type error_msg =
| UnterminatedComment
| UnterminatedString
| UnterminatedEntity
| IdentExpected
| CloseExpected
| NodeExpected
| AttributeNameExpected
| AttributeValueExpected
| EndOfTagExpected of string
| EOFExpected
type error = error_msg * error_pos
exception Error of error
exception File_not_found of string
exception Not_element of xml
exception Not_pcdata of xml
exception No_attribute of string
let default_parser = XmlParser.make()
let pos source =
let line, lstart, min, max = Xml_lexer.pos source in
{
eline = line;
eline_start = lstart;
emin = min;
emax = max;
}
let parse (p:XmlParser.t) (source:XmlParser.source) =
(* local cast Xml.xml -> xml *)
(Obj.magic XmlParser.parse p source : xml)
let parse_in ch = parse default_parser (XmlParser.SChannel ch)
let parse_string str = parse default_parser (XmlParser.SString str)
let parse_file f =
let p = XmlParser.make() in
let path = Filename.dirname f in
XmlParser.resolve p (fun file ->
let name = (match path with "." -> file | _ -> path ^ "/" ^ file) in
Dtd.check (Dtd.parse_file name)
);
parse p (XmlParser.SFile f)
let error_msg = function
| UnterminatedComment -> "Unterminated comment"
| UnterminatedString -> "Unterminated string"
| UnterminatedEntity -> "Unterminated entity"
| IdentExpected -> "Ident expected"
| CloseExpected -> "Element close expected"
| NodeExpected -> "Xml node expected"
| AttributeNameExpected -> "Attribute name expected"
| AttributeValueExpected -> "Attribute value expected"
| EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
| EOFExpected -> "End of file expected"
let error (msg,pos) =
if pos.emin = pos.emax then
sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start)
else
sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
let line e = e.eline
let range e =
e.emin - e.eline_start , e.emax - e.eline_start
let abs_range e =
e.emin , e.emax
let tag = function
| Element (tag,_,_) -> tag
| x -> raise (Not_element x)
let pcdata = function
| PCData text -> text
| x -> raise (Not_pcdata x)
let attribs = function
| Element (_,attr,_) -> attr
| x -> raise (Not_element x)
let attrib x att =
match x with
| Element (_,attr,_) ->
(try
let att = String.lowercase att in
snd (List.find (fun (n,_) -> String.lowercase n = att) attr)
with
Not_found ->
raise (No_attribute att))
| x ->
raise (Not_element x)
let children = function
| Element (_,_,clist) -> clist
| x -> raise (Not_element x)
(*let enum = function
| Element (_,_,clist) -> List.to_enum clist
| x -> raise (Not_element x)
*)
let iter f = function
| Element (_,_,clist) -> List.iter f clist
| x -> raise (Not_element x)
let map f = function
| Element (_,_,clist) -> List.map f clist
| x -> raise (Not_element x)
let fold f v = function
| Element (_,_,clist) -> List.fold_left f v clist
| x -> raise (Not_element x)
let tmp = Buffer.create 200
let buffer_pcdata text =
let l = String.length text in
for p = 0 to l-1 do
match text.[p] with
| '>' -> Buffer.add_string tmp ">"
| '<' -> Buffer.add_string tmp "<"
| '&' ->
if p < l-1 && text.[p+1] = '#' then
Buffer.add_char tmp '&'
else
Buffer.add_string tmp "&"
| '\'' -> Buffer.add_string tmp "'"
| '"' -> Buffer.add_string tmp """
| c -> Buffer.add_char tmp c
done
let buffer_attr (n,v) =
Buffer.add_char tmp ' ';
Buffer.add_string tmp n;
Buffer.add_string tmp "=\"";
let l = String.length v in
for p = 0 to l-1 do
match v.[p] with
| '\\' -> Buffer.add_string tmp "\\\\"
| '"' -> Buffer.add_string tmp "\\\""
| c -> Buffer.add_char tmp c
done;
Buffer.add_char tmp '"'
let to_string x =
let pcdata = ref false in
let rec loop = function
| Element (tag,alist,[]) ->
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp "/>";
pcdata := false;
| Element (tag,alist,l) ->
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_char tmp '>';
pcdata := false;
List.iter loop l;
Buffer.add_string tmp "";
Buffer.add_string tmp tag;
Buffer.add_char tmp '>';
pcdata := false;
| PCData text ->
if !pcdata then Buffer.add_char tmp ' ';
buffer_pcdata text;
pcdata := true;
in
Buffer.reset tmp;
loop x;
let s = Buffer.contents tmp in
Buffer.reset tmp;
s
let to_string_fmt x =
let rec loop ?(newl=false) tab = function
| Element (tag,alist,[]) ->
Buffer.add_string tmp tab;
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp "/>";
if newl then Buffer.add_char tmp '\n';
| Element (tag,alist,[PCData text]) ->
Buffer.add_string tmp tab;
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp ">";
buffer_pcdata text;
Buffer.add_string tmp "";
Buffer.add_string tmp tag;
Buffer.add_char tmp '>';
if newl then Buffer.add_char tmp '\n';
| Element (tag,alist,l) ->
Buffer.add_string tmp tab;
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp ">\n";
List.iter (loop ~newl:true (tab^" ")) l;
Buffer.add_string tmp tab;
Buffer.add_string tmp "";
Buffer.add_string tmp tag;
Buffer.add_char tmp '>';
if newl then Buffer.add_char tmp '\n';
| PCData text ->
buffer_pcdata text;
if newl then Buffer.add_char tmp '\n';
in
Buffer.reset tmp;
loop "" x;
let s = Buffer.contents tmp in
Buffer.reset tmp;
s
;;
XmlParser._raises (fun x p ->
(* local cast : Xml.error_msg -> error_msg *)
Error ((Obj.magic x : error_msg),pos p))
(fun f -> File_not_found f)
(fun x p -> Dtd.Parse_error (x,
(* local cast : Xml.error_pos -> error_pos *)
(Obj.magic (pos p))));
Dtd._raises (fun f -> File_not_found f); xml-light-2.4/xml.mli 0000664 0000000 0000000 00000013271 12267100573 0014576 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library has the special exception on linking described in file
* README.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*)
(** Xml Light
Xml Light is a minimal Xml parser & printer for OCaml.
It provide few functions to parse a basic Xml document into
an OCaml data structure and to print back the data structures
to an Xml document.
Xml Light has also support for {b DTD} (Document Type Definition).
{i (c)Copyright 2002-2003 Nicolas Cannasse}
*)
(** {6 Xml Data Structure} *)
(** An Xml node is either
[Element (tag-name, attributes, children)] or [PCData text] *)
type xml =
| Element of (string * (string * string) list * xml list)
| PCData of string
(** {6 Xml Parsing} *)
(** For easily parsing an Xml data source into an xml data structure,
you can use theses functions. But if you want advanced parsing usage,
please look at the {!XmlParser} module.
All the parsing functions can raise some exceptions, see the
{{:#exc}Exceptions} section for more informations. *)
(** Parse the named file into an Xml data structure. *)
val parse_file : string -> xml
(** Read the content of the in_channel and parse it into an Xml data
structure. *)
val parse_in : in_channel -> xml
(** Parse the string containing an Xml document into an Xml data
structure. *)
val parse_string : string -> xml
(** {6:exc Xml Exceptions} *)
(** Several exceptions can be raised when parsing an Xml document : {ul
{li {!Xml.Error} is raised when an xml parsing error occurs. the
{!Xml.error_msg} tells you which error occured during parsing
and the {!Xml.error_pos} can be used to retreive the document
location where the error occured at.}
{li {!Xml.File_not_found} is raised when and error occured while
opening a file with the {!Xml.parse_file} function or when a
DTD file declared by the Xml document is not found {i (see the
{!XmlParser} module for more informations on how to handle the
DTD file loading)}.}
}
If the Xml document is containing a DTD, then some other exceptions
can be raised, see the module {!Dtd} for more informations.
*)
type error_pos
type error_msg =
| UnterminatedComment
| UnterminatedString
| UnterminatedEntity
| IdentExpected
| CloseExpected
| NodeExpected
| AttributeNameExpected
| AttributeValueExpected
| EndOfTagExpected of string
| EOFExpected
type error = error_msg * error_pos
exception Error of error
exception File_not_found of string
(** Get a full error message from an Xml error. *)
val error : error -> string
(** Get the Xml error message as a string. *)
val error_msg : error_msg -> string
(** Get the line the error occured at. *)
val line : error_pos -> int
(** Get the relative character range (in current line) the error occured at.*)
val range : error_pos -> int * int
(** Get the absolute character range the error occured at. *)
val abs_range : error_pos -> int * int
(** {6 Xml Functions} *)
exception Not_element of xml
exception Not_pcdata of xml
exception No_attribute of string
(** [tag xdata] returns the tag value of the xml node.
Raise {!Xml.Not_element} if the xml is not an element *)
val tag : xml -> string
(** [pcdata xdata] returns the PCData value of the xml node.
Raise {!Xml.Not_pcdata} if the xml is not a PCData *)
val pcdata : xml -> string
(** [attribs xdata] returns the attribute list of the xml node.
First string if the attribute name, second string is attribute value.
Raise {!Xml.Not_element} if the xml is not an element *)
val attribs : xml -> (string * string) list
(** [attrib xdata "href"] returns the value of the ["href"]
attribute of the xml node (attribute matching is case-insensitive).
Raise {!Xml.No_attribute} if the attribute does not exists in the node's
attribute list
Raise {!Xml.Not_element} if the xml is not an element *)
val attrib : xml -> string -> string
(** [children xdata] returns the children list of the xml node
Raise {!Xml.Not_element} if the xml is not an element *)
val children : xml -> xml list
(*** [enum xdata] returns the children enumeration of the xml node
Raise {!Xml.Not_element} if the xml is not an element *)
(* val enum : xml -> xml Enum.t *)
(** [iter f xdata] calls f on all children of the xml node.
Raise {!Xml.Not_element} if the xml is not an element *)
val iter : (xml -> unit) -> xml -> unit
(** [map f xdata] is equivalent to [List.map f (Xml.children xdata)]
Raise {!Xml.Not_element} if the xml is not an element *)
val map : (xml -> 'a) -> xml -> 'a list
(** [fold f init xdata] is equivalent to
[List.fold_left f init (Xml.children xdata)]
Raise {!Xml.Not_element} if the xml is not an element *)
val fold : ('a -> xml -> 'a) -> 'a -> xml -> 'a
(** {6 Xml Printing} *)
(** Print the xml data structure into a compact xml string (without
any user-readable formating ). *)
val to_string : xml -> string
(** Print the xml data structure into an user-readable string with
tabs and lines break between different nodes. *)
val to_string_fmt : xml -> string
xml-light-2.4/xmlParser.ml 0000664 0000000 0000000 00000012161 12267100573 0015577 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
* Copyright (C) 2003 Jacques Garrigue
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library has the special exception on linking described in file
* README.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*)
open Printf
open Dtd
open Xml
type t = {
mutable prove : bool;
mutable check_eof : bool;
mutable concat_pcdata : bool;
mutable resolve : (string -> checked);
}
type source =
| SFile of string
| SChannel of in_channel
| SString of string
| SLexbuf of Lexing.lexbuf
type state = {
source : Lexing.lexbuf;
stack : Xml_lexer.token Stack.t;
xparser : t;
}
exception Internal_error of Xml.error_msg
exception NoMoreData
let xml_error = ref (fun _ -> assert false)
let dtd_error = ref (fun _ -> assert false)
let file_not_found = ref (fun _ -> assert false)
let _raises e f d =
xml_error := e;
file_not_found := f;
dtd_error := d
let make () =
{
prove = true;
check_eof = true;
concat_pcdata = true;
resolve = (fun file -> raise (!file_not_found file))
}
let prove p v = p.prove <- v
let resolve p f = p.resolve <- f
let check_eof p v = p.check_eof <- v
let concat_pcdata p v = p.concat_pcdata <- v
let pop s =
try
Stack.pop s.stack
with
Stack.Empty ->
Xml_lexer.token s.source
let push t s =
Stack.push t s.stack
let rec read_node s =
match pop s with
| Xml_lexer.PCData s -> PCData s
| Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
| Xml_lexer.Tag (tag, attr, false) -> Element (tag, attr, read_elems ~tag s)
| t ->
push t s;
raise NoMoreData
and
read_elems ?tag s =
let elems = ref [] in
(try
while true do
match s.xparser.concat_pcdata , read_node s , !elems with
| true , PCData c , (PCData c2) :: q ->
elems := PCData (sprintf "%s\n%s" c2 c) :: q
| _ , x , l ->
elems := x :: l
done
with
NoMoreData -> ());
match pop s with
| Xml_lexer.Endtag s when Some s = tag -> List.rev !elems
| Xml_lexer.Eof when tag = None -> List.rev !elems
| t ->
match tag with
| None -> raise (Internal_error EOFExpected)
| Some s -> raise (Internal_error (EndOfTagExpected s))
let read_xml s =
match s.xparser.prove, pop s with
| true, Xml_lexer.DocType (root, Xml_lexer.DTDFile file) ->
let pos = Xml_lexer.pos s.source in
let dtd = s.xparser.resolve file in
Xml_lexer.restore pos;
let x = read_node s in
Dtd.prove dtd root x
| true, Xml_lexer.DocType (root, Xml_lexer.DTDData dtd) ->
let dtd = Dtd.check dtd in
let x = read_node s in
Dtd.prove dtd root x
| false, Xml_lexer.DocType _ ->
read_node s
| _, t ->
push t s;
read_node s
let convert = function
| Xml_lexer.EUnterminatedComment -> UnterminatedComment
| Xml_lexer.EUnterminatedString -> UnterminatedString
| Xml_lexer.EIdentExpected -> IdentExpected
| Xml_lexer.ECloseExpected -> CloseExpected
| Xml_lexer.ENodeExpected -> NodeExpected
| Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
| Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
| Xml_lexer.EUnterminatedEntity -> UnterminatedEntity
let dtd_convert = function
| Xml_lexer.EInvalidDTDDecl -> InvalidDTDDecl
| Xml_lexer.EInvalidDTDTag -> InvalidDTDTag
| Xml_lexer.EDTDItemExpected -> DTDItemExpected
| Xml_lexer.EInvalidDTDElement -> InvalidDTDElement
| Xml_lexer.EInvalidDTDAttribute -> InvalidDTDAttribute
let do_parse xparser source =
try
Xml_lexer.init source;
let s = { source = source; xparser = xparser; stack = Stack.create(); } in
let tk = pop s in
(* skip UTF8 BOM *)
if tk <> Xml_lexer.PCData "\239\187\191" then push tk s;
let x = read_xml s in
if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
Xml_lexer.close source;
x
with
| NoMoreData ->
Xml_lexer.close source;
raise (!xml_error NodeExpected source)
| Internal_error e ->
Xml_lexer.close source;
raise (!xml_error e source)
| Xml_lexer.Error e ->
Xml_lexer.close source;
raise (!xml_error (convert e) source)
| Xml_lexer.DTDError e ->
Xml_lexer.close source;
raise (!dtd_error (dtd_convert e) source)
let parse p = function
| SChannel ch -> do_parse p (Lexing.from_channel ch)
| SString str -> do_parse p (Lexing.from_string str)
| SLexbuf lex -> do_parse p lex
| SFile fname ->
let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
try
let x = do_parse p (Lexing.from_channel ch) in
close_in ch;
x
with
e ->
close_in ch;
raise e
xml-light-2.4/xmlParser.mli 0000664 0000000 0000000 00000006705 12267100573 0015757 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library has the special exception on linking described in file
* README.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*)
(** Xml Light Parser
While basic parsing functions can be used in the {!Xml} module, this module
is providing a way to create, configure and run an Xml parser.
*)
(** Abstract type for an Xml parser. *)
type t
(** Several kind of resources can contain Xml documents. *)
type source =
| SFile of string
| SChannel of in_channel
| SString of string
| SLexbuf of Lexing.lexbuf
(** This function returns a new parser with default options. *)
val make : unit -> t
(** This function enable or disable automatic DTD proving with the parser.
Note that Xml documents having no reference to a DTD are never proved
when parsed (but you can prove them later using the {!Dtd} module
{i (by default, prove is true)}. *)
val prove : t -> bool -> unit
(** When parsing an Xml document from a file using the {!Xml.parse_file}
function, the DTD file if declared by the Xml document has to be in the
same directory as the xml file. When using other parsing functions,
such as on a string or on a channel, the parser will raise everytime
{!Xml.File_not_found} if a DTD file is needed and prove enabled. To enable
the DTD loading of the file, the user have to configure the Xml parser
with a [resolve] function which is taking as argument the DTD filename and
is returning a checked DTD. The user can then implement any kind of DTD
loading strategy, and can use the {!Dtd} module functions to parse and check
the DTD file {i (by default, the resolve function is raising}
{!Xml.File_not_found}). *)
val resolve : t -> (string -> Dtd.checked) -> unit
(** When a Xml document is parsed, the parser will check that the end of the
document is reached, so for example parsing [""] will fail instead
of returning only the A element. You can turn off this check by setting
[check_eof] to [false] {i (by default, check_eof is true)}. *)
val check_eof : t -> bool -> unit
(** Once the parser is configurated, you can run the parser on a any kind
of xml document source to parse its contents into an Xml data structure. *)
val parse : t -> source -> Xml.xml
(** When several PCData elements are separed by a \n (or \r\n), you can
either split the PCData in two distincts PCData or merge them with \n
as seperator into one PCData. The default behavior is to concat the
PCData, but this can be changed for a given parser with this flag. *)
val concat_pcdata : t -> bool -> unit
(**/**)
(* internal usage only... *)
val _raises : (Xml.error_msg -> Lexing.lexbuf -> exn) -> (string -> exn) -> (Dtd.parse_error_msg -> Lexing.lexbuf -> exn) -> unit xml-light-2.4/xml_lexer.mli 0000664 0000000 0000000 00000003346 12267100573 0015777 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library has the special exception on linking described in file
* README.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*)
type error =
| EUnterminatedComment
| EUnterminatedString
| EIdentExpected
| ECloseExpected
| ENodeExpected
| EAttributeNameExpected
| EAttributeValueExpected
| EUnterminatedEntity
type dtd_error =
| EInvalidDTDDecl
| EInvalidDTDTag
| EDTDItemExpected
| EInvalidDTDElement
| EInvalidDTDAttribute
exception Error of error
exception DTDError of dtd_error
type dtd_decl =
| DTDFile of string
| DTDData of Dtd.dtd
type token =
| Tag of string * (string * string) list * bool
| PCData of string
| Endtag of string
| DocType of (string * dtd_decl)
| Eof
type pos = int * int * int * int
val init : Lexing.lexbuf -> unit
val close : Lexing.lexbuf -> unit
val token : Lexing.lexbuf -> token
val dtd : Lexing.lexbuf -> Dtd.dtd
val pos : Lexing.lexbuf -> pos
val restore : pos -> unit xml-light-2.4/xml_lexer.mll 0000664 0000000 0000000 00000030311 12267100573 0015772 0 ustar 00root root 0000000 0000000 {(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library has the special exception on linking described in file
* README.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*)
open Lexing
open Xml_parser
open Dtd
type error =
| EUnterminatedComment
| EUnterminatedString
| EIdentExpected
| ECloseExpected
| ENodeExpected
| EAttributeNameExpected
| EAttributeValueExpected
| EUnterminatedEntity
type dtd_error =
| EInvalidDTDDecl
| EInvalidDTDTag
| EDTDItemExpected
| EInvalidDTDElement
| EInvalidDTDAttribute
exception Error of error
exception DTDError of dtd_error
type pos = int * int * int * int
type dtd_decl =
| DTDFile of string
| DTDData of dtd
type dtd_item_type =
| TElement
| TAttribute
type token =
| Tag of string * (string * string) list * bool
| PCData of string
| Endtag of string
| DocType of (string * dtd_decl)
| Eof
let last_pos = ref 0
and current_line = ref 0
and current_line_start = ref 0
let tmp = Buffer.create 200
let entities = [
"gt" ,62, ">";
"lt" ,60, "<";
"amp" ,38, "&";
"apos",39, "'";
"quot",34, "\"";
]
let idents = Hashtbl.create 0
let _ = begin
List.iter (fun (str,code,res) ->
Hashtbl.add idents (str^";") res;
if code > 0
then Hashtbl.add idents ("#" ^ string_of_int code ^ ";") res
) entities
end
let init lexbuf =
current_line := 1;
current_line_start := lexeme_start lexbuf;
last_pos := !current_line_start
let close lexbuf =
Buffer.reset tmp
let pos lexbuf =
!current_line , !current_line_start ,
!last_pos ,
lexeme_start lexbuf
let restore (cl,cls,lp,_) =
current_line := cl;
current_line_start := cls;
last_pos := lp
let newline lexbuf =
incr current_line;
last_pos := lexeme_end lexbuf;
current_line_start := !last_pos
let error lexbuf e =
last_pos := lexeme_start lexbuf;
raise (Error e)
let dtd_error lexbuf e =
last_pos := lexeme_start lexbuf;
raise (DTDError e)
}
let newline = ['\n']
let break = ['\r']
let space = [' ' '\t']
let identchar = ['A'-'Z' 'a'-'z' '_' '0'-'9' ':' '-']
let entitychar = ['A'-'Z' 'a'-'z' '0'-'9']
let pcchar = [^ '\r' '\n' '<' '>' '&']
let cdata_start = ['c''C']['d''D']['a''A']['t''T']['a''A']
rule token = parse
| newline
{
newline lexbuf;
token lexbuf
}
| (space | break) +
{
last_pos := lexeme_end lexbuf;
token lexbuf
}
| ""
{ () }
| eof
{ raise (Error EUnterminatedComment) }
| _
{ comment lexbuf }
and header = parse
| newline
{
newline lexbuf;
header lexbuf
}
| "?>"
{ () }
| eof
{ error lexbuf ECloseExpected }
| _
{ header lexbuf }
and cdata = parse
| [^ ']' '\n']+
{
Buffer.add_string tmp (lexeme lexbuf);
cdata lexbuf
}
| newline
{
newline lexbuf;
Buffer.add_string tmp (lexeme lexbuf);
cdata lexbuf
}
| "]]>"
{ Buffer.contents tmp }
| ']'
{
Buffer.add_string tmp (lexeme lexbuf);
cdata lexbuf
}
| eof
{ error lexbuf ECloseExpected }
and pcdata = parse
| pcchar+
{
Buffer.add_string tmp (lexeme lexbuf);
pcdata lexbuf
}
| '&'
{
Buffer.add_string tmp (entity lexbuf);
pcdata lexbuf
}
| ""
{ Buffer.contents tmp }
and entity = parse
| entitychar+ ';'
{
let ident = lexeme lexbuf in
try
Hashtbl.find idents (String.lowercase ident)
with
Not_found -> "&" ^ ident
}
| '#' ['0'-'9']+ ';'
{
let ident = lexeme lexbuf in
try
Hashtbl.find idents (String.lowercase ident)
with
Not_found -> "&" ^ ident
}
| _ | eof
{ raise (Error EUnterminatedEntity) }
and ident_name = parse
| identchar+
{ lexeme lexbuf }
| _ | eof
{ error lexbuf EIdentExpected }
and close_tag = parse
| '>'
{ () }
| _ | eof
{ error lexbuf ECloseExpected }
and attributes = parse
| '>'
{ [], false }
| "/>"
{ [], true }
| "" (* do not read a char ! *)
{
let key = attribute lexbuf in
let data = attribute_data lexbuf in
ignore_spaces lexbuf;
let others, closed = attributes lexbuf in
(key, data) :: others, closed
}
and attribute = parse
| identchar+
{ lexeme lexbuf }
| _ | eof
{ error lexbuf EAttributeNameExpected }
and attribute_data = parse
| space* '=' space* '"'
{
Buffer.reset tmp;
last_pos := lexeme_end lexbuf;
dq_string lexbuf
}
| space* '=' space* '\''
{
Buffer.reset tmp;
last_pos := lexeme_end lexbuf;
q_string lexbuf
}
| _ | eof
{ error lexbuf EAttributeValueExpected }
and dq_string = parse
| '"'
{ Buffer.contents tmp }
| '\\' [ '"' '\\' ]
{
Buffer.add_char tmp (lexeme_char lexbuf 1);
dq_string lexbuf
}
| eof
{ raise (Error EUnterminatedString) }
| _
{
Buffer.add_char tmp (lexeme_char lexbuf 0);
dq_string lexbuf
}
and q_string = parse
| '\''
{ Buffer.contents tmp }
| '\\' [ '\'' '\\' ]
{
Buffer.add_char tmp (lexeme_char lexbuf 1);
q_string lexbuf
}
| eof
{ raise (Error EUnterminatedString) }
| _
{
Buffer.add_char tmp (lexeme_char lexbuf 0);
q_string lexbuf
}
and dtd_data = parse
| "PUBLIC"
{
ignore_spaces lexbuf;
(* skipping Public ID *)
let _ = dtd_file lexbuf in
let file = dtd_file lexbuf in
dtd_end_decl lexbuf;
DTDFile file
}
| "SYSTEM"
{
ignore_spaces lexbuf;
let file = dtd_file lexbuf in
dtd_end_decl lexbuf;
DTDFile file
}
| '['
{
ignore_spaces lexbuf;
let data = dtd_intern lexbuf in
dtd_end_decl lexbuf;
DTDData data
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDDecl }
and dtd_file = parse
| '"'
{
Buffer.reset tmp;
let s = dq_string lexbuf in
ignore_spaces lexbuf;
s
}
| '\''
{
Buffer.reset tmp;
let s = q_string lexbuf in
ignore_spaces lexbuf;
s
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDDecl }
and dtd_intern = parse
| ']'
{
ignore_spaces lexbuf;
[]
}
| ""
{
let l = dtd_item lexbuf in
l @ (dtd_intern lexbuf)
}
and dtd = parse
| eof
{ [] }
| newline
{
newline lexbuf;
dtd lexbuf
}
| (space | break)+
{ dtd lexbuf }
| ""
{
let l = dtd_item lexbuf in
l @ (dtd lexbuf)
}
and dtd_end_decl = parse
| '>'
{ ignore_spaces lexbuf }
| _ | eof
{ dtd_error lexbuf EInvalidDTDDecl }
and dtd_item = parse
| "