pax_global_header00006660000000000000000000000064127370277000014517gustar00rootroot0000000000000052 comment=633f285a5062016eee881878d8ee44fb12664149 biniou-1.0.12/000077500000000000000000000000001273702770000130655ustar00rootroot00000000000000biniou-1.0.12/.gitignore000066400000000000000000000001661273702770000150600ustar00rootroot00000000000000*~ *.cm[ioxat] *.[oa] *.cmx[as] *.cmti *.annot META bdump test-stream.dat test2.bin *.byte *.native *.exe test_biniou biniou-1.0.12/Changes000066400000000000000000000020351273702770000143600ustar00rootroot00000000000000Releases of biniou ================== !!! = some incompatibilities opt = optimizations +ui = additions in the user interface -ui = restrictions in the user interface bug = bug or security fix doc = major changes in the documentation pkg = changes in the structure of the package or in the installation procedure 2012-03-19 1.0.2: [+ui] support for flushing output to abstract OO channels 2012-01-03 1.0.1: [+ui] new experimental array streaming utility [+ui] compiling with -g, allowing stack trace recording [doc] fixed description of the format for string encoding 2010-12-04 1.0.0: [+ui] added system of references for sharing values [!!!] new variant `Shared [!!!] new fields in input and output buffers [+ui] bdump: option -h to specify alternate dictionary [pkg] standalone reference for the biniou format 2010-09-13 0.9.1: [bug] fixed Bi_inbuf.from_channel [pkg] added INSTALL file 2010-08-04 0.9.0: first release biniou-1.0.12/INSTALL000066400000000000000000000012321273702770000141140ustar00rootroot00000000000000 Installation instructions for biniou Requirements: - Objective Caml (>= 3.11 is fine, earlier versions are probably fine too) - GNU make - Findlib (`ocamlfind' command) - easy-format GODI makes the installation process straightforward, although other package managers can be equally convenient. Manual installation is done using: make # or `make all' for the bytecode-only version make install # or `make BINDIR=/foo/bin install' for installing executables # in a place other than the guessed default. Uninstallation: make uninstall Bugs and feedback should be sent to Martin Jambon . biniou-1.0.12/LICENSE000066400000000000000000000025601273702770000140750ustar00rootroot00000000000000Copyright (c) 2010 Martin Jambon All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. biniou-1.0.12/META.in000066400000000000000000000003341273702770000141430ustar00rootroot00000000000000name = "biniou" version = "@@VERSION@@" description = "Extensible binary serialization format" requires = "easy-format" archive(byte) = "biniou.cma" archive(native) = "biniou.cmxa" archive(native,plugin) = "biniou.cmxs" biniou-1.0.12/Makefile000066400000000000000000000104361273702770000145310ustar00rootroot00000000000000VERSION = 1.0.12 FLAGS = -g -annot -bin-annot PACKS = easy-format ifeq "$(shell ocamlfind ocamlc -config |grep os_type)" "os_type: Win32" EXE=.exe else EXE= endif BEST = $(shell \ if ocamlfind ocamlopt 2>/dev/null; then \ echo .native; \ else \ echo .byte; \ fi \ ) NATDYNLINK = $(shell \ if [ -f `ocamlfind ocamlc -where`/dynlink.cmxa ]; then \ echo YES; \ else \ echo NO; \ fi \ ) ifeq "${NATDYNLINK}" "YES" CMXS=biniou.cmxs endif .PHONY: default all byte opt install doc test default: all test_biniou$(EXE) ifeq "$(BEST)" ".native" all: byte opt doc META else all: byte doc META endif byte: biniou.cma bdump.byte opt: biniou.cmxa $(CMXS) bdump.native test: test_biniou$(EXE) ./$< ifndef PREFIX PREFIX = $(shell dirname $$(dirname $$(which ocamlfind))) export PREFIX endif ifndef BINDIR BINDIR = $(PREFIX)/bin export BINDIR endif META: META.in Makefile sed -e 's:@@VERSION@@:$(VERSION):' META.in > META SOURCES = bi_util.mli bi_util.ml \ bi_share.mli bi_share.ml \ bi_outbuf.mli bi_outbuf.ml bi_inbuf.mli bi_inbuf.ml \ bi_vint.mli bi_vint.ml bi_io.mli bi_io.ml \ bi_dump.ml bi_stream.mli bi_stream.ml MLI = $(filter %.mli, $(SOURCES)) ML = $(filter %.ml, $(SOURCES)) CMI = $(MLI:.mli=.cmi) CMT = $(MLI:.mli=.cmt) ANNOT = $(MLI:.mli=.annot) CMO = $(ML:.ml=.cmo) CMX = $(ML:.ml=.cmx) O = $(ML:.ml=.o) biniou.cma: $(SOURCES) Makefile ocamlfind ocamlc -a $(FLAGS) -o biniou.cma \ -package "$(PACKS)" $(SOURCES) biniou.cmxa: $(SOURCES) Makefile ocamlfind ocamlopt -a $(FLAGS) \ -o biniou.cmxa -package "$(PACKS)" $(SOURCES) biniou.cmxs: biniou.cmxa ocamlfind ocamlopt -shared -linkall -I . -o $@ $^ bdump.byte: biniou.cma bdump.ml ocamlfind ocamlc -o $@ $(FLAGS) \ -package $(PACKS) -linkpkg $^ bdump.native: biniou.cmxa bdump.ml ocamlfind ocamlopt -o $@ $(FLAGS) \ -package $(PACKS) -linkpkg $^ test_biniou.byte: biniou.cma test_biniou.ml ocamlfind ocamlc -o $@ $(FLAGS) \ -package "$(PACKS) unix" -linkpkg $^ test_biniou.native: biniou.cmxa test_biniou.ml ocamlfind ocamlopt -o $@ $(FLAGS) \ -package "$(PACKS) unix" -linkpkg $^ %$(EXE): %$(BEST) cp $< $@ doc: doc/index.html doc/index.html: $(MLI) mkdir -p doc ocamlfind ocamldoc -d doc -html -package easy-format $(MLI) install: META byte if [ -f bdump.native ]; then \ cp bdump.native $(BINDIR)/bdump$(EXE); \ else \ cp bdump.byte $(BINDIR)/bdump$(EXE); \ fi ocamlfind install biniou META \ $(MLI) $(CMI) $(CMT) $(ANNOT) $(CMO) biniou.cma \ -optional $(CMX) $(O) biniou.cmxa biniou.a biniou.cmxs uninstall: rm -f $(BINDIR)/bdump{.exe,} ocamlfind remove biniou .PHONY: clean clean: rm -f *.o *.a *.cm[ioxa] *.cmxa *~ *.annot META rm -f {bdump,test_biniou}{.exe,.byte,.native,} rm -rf doc rm -f test.bin test_channels.bin SUBDIRS = SVNURL = svn://svn.forge.ocamlcore.org/svnroot/biniou/trunk/biniou .PHONY: archive archive: @echo "Making archive for version $(VERSION)" @if [ -z "$$WWW" ]; then \ echo '*** Environment variable WWW is undefined ***' >&2; \ exit 1; \ fi @if [ -n "$$(svn status -q)" ]; then \ echo "*** There are uncommitted changes, aborting. ***" >&2; \ exit 1; \ fi $(MAKE) && ./bdump -help > $$WWW/bdump-help.txt mkdir -p $$WWW/biniou-doc $(MAKE) doc && cp doc/* $$WWW/biniou-doc/ rm -rf /tmp/biniou /tmp/biniou-$(VERSION) && \ cd /tmp && \ svn co "$(SVNURL)" && \ for x in "." $(SUBDIRS); do \ rm -rf /tmp/biniou/$$x/.svn; \ done && \ cd /tmp && cp -r biniou biniou-$(VERSION) && \ tar czf biniou.tar.gz biniou && \ tar cjf biniou.tar.bz2 biniou && \ tar czf biniou-$(VERSION).tar.gz biniou-$(VERSION) && \ tar cjf biniou-$(VERSION).tar.bz2 biniou-$(VERSION) mv /tmp/biniou.tar.gz /tmp/biniou.tar.bz2 ../releases mv /tmp/biniou-$(VERSION).tar.gz \ /tmp/biniou-$(VERSION).tar.bz2 ../releases cp ../releases/biniou.tar.gz $$WWW/ cp ../releases/biniou.tar.bz2 $$WWW/ cp ../releases/biniou-$(VERSION).tar.gz $$WWW/ cp ../releases/biniou-$(VERSION).tar.bz2 $$WWW/ cd ../releases && \ svn add biniou.tar.gz biniou.tar.bz2 \ biniou-$(VERSION).tar.gz biniou-$(VERSION).tar.bz2 && \ svn commit -m "biniou version $(VERSION)" cp LICENSE $$WWW/biniou-license.txt cp Changes $$WWW/biniou-changes.txt cp biniou-format.txt $$WWW/biniou-format.txt echo 'let biniou_version = "$(VERSION)"' \ > $$WWW/biniou-version.ml biniou-1.0.12/README.md000066400000000000000000000015101273702770000143410ustar00rootroot00000000000000Biniou ====== Biniou (pronounced "be new") is a binary data format designed for speed, safety, ease of use and backward compatibility as protocols evolve. Biniou is vastly equivalent to JSON in terms of functionality but allows implementations several times faster (4 times faster than [yojson](https://github.com/mjambon/yojson)), with 25-35% space savings. Biniou data can be decoded into human-readable form without knowledge of type definitions except for field and variant names which are represented by 31-bit hashes. A program named `bdump` is provided for routine visualization of biniou data files. The program [atdgen](https://mjambon.github.io/atdgen-doc/) is used to derive OCaml-Biniou serializers and deserializers from type definitions. Biniou format specification: https://mjambon.github.io/atdgen-doc/biniou-format.txt biniou-1.0.12/bdump.ml000066400000000000000000000041051273702770000145260ustar00rootroot00000000000000open Printf let () = let file = ref None in let dic = ref [] in let dic_file = ref (Bi_dump.default_dict_path ()) in let use_global_dictionary = ref true in let options = [ "-d", Arg.String (fun s -> dic := Bi_dump.load_lines !dic s), "file File containing words to add to the dictionary, one per line"; "-h", Arg.String (fun s -> dic_file := Some s), "file Location of the dictionary used for unhashing. Default: $HOME/.bdump-dict on Unix, $HOMEPATH\\_bdump-dict on Windows"; "-w", Arg.String (fun s -> dic := List.rev_append (Bi_dump.split s) !dic), "word1,word2,... Comma-separated list of words to add to the dictionary"; "-x", Arg.Clear use_global_dictionary, sprintf " Do not load nor update the dictionary used for name unhashing."; ] in let msg = sprintf "Usage: %s [file] [options]" Sys.argv.(0) in let error () = Arg.usage options msg in let set_file s = match !file with None -> file := Some s | Some _ -> error () in Arg.parse options set_file msg; if !use_global_dictionary then ( let must_save = !dic <> [] in dic := Bi_dump.load_dictionary !dic_file !dic; if must_save then Bi_dump.save_dictionary !dic_file !dic ); let unhash = Bi_io.make_unhash !dic in let ic = match !file with None -> stdin | Some s -> open_in_bin s in let inbuf = Bi_inbuf.from_string (Bi_dump.load ic) in let value_count = ref 0 in Printexc.record_backtrace true; (try while true do (try ignore (Bi_inbuf.peek inbuf) with Bi_inbuf.End_of_input -> raise Exit); Bi_io.print_view_of_tree (Bi_io.read_tree ~unhash inbuf); print_newline (); incr value_count; done; with Exit -> () | e -> Printf.eprintf "Broken input after reading %i value%s: \ exception %s\n" !value_count (if !value_count > 1 then "s" else "") (Printexc.to_string e); Printexc.print_backtrace stderr; flush stderr ); close_in ic biniou-1.0.12/bi_dump.ml000066400000000000000000000037161273702770000150450ustar00rootroot00000000000000open Printf (* let split s = Str.split (Str.regexp ",") s *) let split s = let acc = ref [] in let stop = ref (String.length s) in for i = !stop - 1 downto 0 do if s.[i] = ',' then ( let start = i + 1 in acc := String.sub s start (!stop - start) :: !acc; stop := i ) done; String.sub s 0 !stop :: !acc let load_lines accu s = let ic = open_in s in let l = ref accu in (try while true do l := input_line ic :: List.rev !l done with End_of_file -> close_in ic ); !l let load ic = let buf = Buffer.create 1000 in try while true do Buffer.add_char buf (input_char ic); done; assert false with End_of_file -> Buffer.contents buf let ( // ) = Filename.concat let default_dict_path () = try match Sys.os_type with "Unix" -> Some (Sys.getenv "HOME" // ".bdump-dict") | "Win32" -> Some (Sys.getenv "HOMEPATH" // "_bdump-dict") | "Cygwin" -> Some (Sys.getenv "HOME" // ".bdump-dict") | _ -> None with Not_found -> None let load_dictionary dic_file accu = match dic_file with None -> accu | Some fn -> if Sys.file_exists fn then try load_lines accu fn with e -> failwith (sprintf "Cannot load dictionary from %S: %s\n%!" fn (Printexc.to_string e)) else accu let write_uniq oc a = if Array.length a > 0 then ( fprintf oc "%s\n" a.(0); ignore ( Array.fold_left ( fun last x -> if last <> x then fprintf oc "%s\n" x; x ) a.(0) a ) ) let save_dictionary dic_file l = match dic_file with None -> () | Some fn -> let a = Array.of_list l in Array.sort String.compare a; let oc = open_out fn in let finally () = close_out_noerr oc in try write_uniq oc a; finally () with e -> finally (); raise e biniou-1.0.12/bi_inbuf.ml000066400000000000000000000045221273702770000151770ustar00rootroot00000000000000type t = { mutable i_s : string; mutable i_pos : int; mutable i_len : int; mutable i_offs : int; mutable i_max_len : int; i_refill : (t -> int -> unit); i_shared : Bi_share.Rd.tbl; } exception End_of_input let try_preread ib n = if ib.i_len - ib.i_pos < n then ( ib.i_refill ib n; min (ib.i_len - ib.i_pos) n ) else n let read ib n = let pos = ib.i_pos in if ib.i_len - pos >= n then ( ib.i_pos <- pos + n; pos ) else if try_preread ib n >= n then let pos = ib.i_pos in ib.i_pos <- ib.i_pos + n; pos else raise End_of_input let read_char ib = let pos = ib.i_pos in if ib.i_len - pos > 0 then ( let c = String.unsafe_get ib.i_s pos in ib.i_pos <- pos + 1; c ) else if try_preread ib 1 > 0 then let pos = ib.i_pos in let c = String.unsafe_get ib.i_s pos in ib.i_pos <- pos + 1; c else raise End_of_input let peek ib = let pos = ib.i_pos in if ib.i_len - pos > 0 then ( String.unsafe_get ib.i_s pos ) else if try_preread ib 1 > 0 then String.unsafe_get ib.i_s ib.i_pos else raise End_of_input let from_string ?(pos = 0) ?(shrlen = 16) s = { i_s = s; i_pos = pos; i_len = String.length s; i_offs = -pos; i_max_len = String.length s; i_refill = (fun ib n -> ()); i_shared = Bi_share.Rd.create shrlen; } (* Like Pervasives.really_input but returns the number of bytes read instead of raising End_of_file when the end of file is reached. *) let rec not_really_input ic s pos len accu = let n = input ic s pos len in if n < len && n > 0 then not_really_input ic s (pos + n) (len - n) (accu + n) else accu + n let refill_from_channel ic ib n = if n > ib.i_max_len then invalid_arg "Bi_inbuf.refill_from_channel" else ( let rem_len = ib.i_len - ib.i_pos in if rem_len < n then let s = ib.i_s in String.blit s ib.i_pos s 0 rem_len; let to_read = n - rem_len in let really_read = not_really_input ic s rem_len to_read 0 in ib.i_offs <- ib.i_offs + ib.i_pos; ib.i_pos <- 0; ib.i_len <- rem_len + really_read ) let from_channel ?(len = 4096) ?(shrlen = 16) ic = { i_s = String.create len; i_pos = 0; i_len = 0; i_offs = 0; i_max_len = len; i_refill = refill_from_channel ic; i_shared = Bi_share.Rd.create shrlen; } biniou-1.0.12/bi_inbuf.mli000066400000000000000000000073301273702770000153500ustar00rootroot00000000000000(** Input buffer *) type t = { mutable i_s : string; (** This is the buffer string. It can be accessed for reading but should normally only be written to or replaced only by the [i_refill] function. *) mutable i_pos : int; (** This is the current position in the input buffer. All data before that may be erased at anytime. *) mutable i_len : int; (** This is the position of the first byte of invalid input data. Data starting at [i_pos] and ending at [i_len-1] is considered valid input data that is available to the user. Beware that calls to [try_preread], [read] and other read functions may move data around and therefore modify the values of [i_pos] and [i_len] in order to keep pointing to the correct data segment. *) mutable i_offs : int; (** Length of data read and discarded from the buffer. This indicates the position in the input stream of the first byte of the buffer. The current position in the input stream is [i_offs + i_pos]. The total length of input stream data put into the buffer is [i_offs + i_len]. *) mutable i_max_len : int; (** This is the length of [i_s]. *) i_refill : t -> int -> unit; (** Function called when not enough data is available in the buffer. The int argument is the total number of bytes that must be available starting at position [i_pos] when the function returns. This function typically does nothing if all input data already has been placed into the buffer. The [i_pos] and [i_len] fields can be modified the [i_refill] function, as long as the available data that was starting from [i_pos] still starts from the new value of [i_pos]. All the other fields can be modified as well. *) i_shared : Bi_share.Rd.tbl; (** Hash table used to map positions in the input stream to shared objects (if any). *) } exception End_of_input (** Exception raised by all the functions of this module when it is not possible to return a valid result because there is not enough data to read from the buffer. *) val try_preread : t -> int -> int (** [try_preread ib n] make at least [n] bytes available for reading in [ib.i_s], unless the end of the input is reached. The result indicates how many bytes were made available. If smaller than [n], the result indicates that the end of the input was reached. [ib.i_pos] is set to point to the first available byte. *) val read : t -> int -> int (** [read ib n] makes at least [n] bytes available for reading or raises the [End_of_input] exception. The result is the position of the first available byte. [ib.i_pos] is moved to point to the next position after the [n] bytes. @raise End_of_input if there is less than [n] bytes before the end of input. *) val read_char : t -> char (** Read just one byte. @raise End_of_input if the end of input has already been reached. *) val peek : t -> char (** Return the next byte without moving forward. @raise End_of_input if the end of input has already been reached. *) val from_string : ?pos:int -> ?shrlen:int -> string -> t (** Create an input buffer from a string. @param pos position to start from. Default: 0. @param shrlen initial length of the table used to store shared values. *) val from_channel : ?len:int -> ?shrlen:int -> in_channel -> t (** Create an input buffer from an in_channel. Such a buffer is not extensible and [read] requests may not exceed [len]. @param len buffer length. @param shrlen initial length of the table used to store shared values. *) biniou-1.0.12/bi_io.ml000066400000000000000000000562301273702770000145060ustar00rootroot00000000000000open Printf open Bi_outbuf open Bi_inbuf type node_tag = int let bool_tag = 0 let int8_tag = 1 let int16_tag = 2 let int32_tag = 3 let int64_tag = 4 let float32_tag = 11 let float64_tag = 12 let uvint_tag = 16 let svint_tag = 17 let string_tag = 18 let array_tag = 19 let tuple_tag = 20 let record_tag = 21 let num_variant_tag = 22 let variant_tag = 23 let unit_tag = 24 let table_tag = 25 let shared_tag = 26 type hash = int (* Data tree, for testing purposes. *) type tree = [ `Unit | `Bool of bool | `Int8 of char | `Int16 of int | `Int32 of Int32.t | `Int64 of Int64.t | `Float32 of float | `Float64 of float | `Uvint of int | `Svint of int | `String of string | `Array of (node_tag * tree array) option | `Tuple of tree array | `Record of (string option * hash * tree) array | `Num_variant of (int * tree option) | `Variant of (string option * hash * tree option) | `Table of ((string option * hash * node_tag) array * tree array array) option | `Shared of tree ] (* extend sign bit *) let make_signed x = if x > 0x3FFFFFFF then x - (1 lsl 31) else x (* Same function as the one used for OCaml variants and object methods. *) let hash_name s = let accu = ref 0 in for i = 0 to String.length s - 1 do accu := 223 * !accu + Char.code s.[i] done; (* reduce to 31 bits *) accu := !accu land (1 lsl 31 - 1); (* make it signed for 64 bits architectures *) make_signed !accu (* Structure of a hashtag: 4 bytes, argbit 7bits 8bits 8bits 8bits +---------------------+ 31-bit hash argbit = 1 iff hashtag is followed by an argument, this is always 1 for record fields. *) let write_hashtag ob h has_arg = let h = h land 0x7fffffff in let pos = Bi_outbuf.alloc ob 4 in let s = ob.o_s in String.unsafe_set s (pos+3) (Char.chr (h land 0xff)); let h = h lsr 8 in String.unsafe_set s (pos+2) (Char.chr (h land 0xff)); let h = h lsr 8 in String.unsafe_set s (pos+1) (Char.chr (h land 0xff)); let h = h lsr 8 in String.unsafe_set s pos ( Char.chr ( if has_arg then h lor 0x80 else h ) ) let string_of_hashtag h has_arg = let ob = Bi_outbuf.create 4 in write_hashtag ob h has_arg; Bi_outbuf.contents ob let read_hashtag ib cont = let i = Bi_inbuf.read ib 4 in let s = ib.i_s in let x0 = Char.code s.[i] in let has_arg = x0 >= 0x80 in let x1 = (x0 land 0x7f) lsl 24 in let x2 = (Char.code s.[i+1]) lsl 16 in let x3 = (Char.code s.[i+2]) lsl 8 in let x4 = Char.code s.[i+3] in let h = make_signed (x1 lor x2 lor x3 lor x4) in cont ib h has_arg let read_field_hashtag ib = let i = Bi_inbuf.read ib 4 in let s = ib.i_s in let x0 = Char.code (String.unsafe_get s i) in if x0 < 0x80 then Bi_util.error "Corrupted data (invalid field hashtag)"; let x1 = (x0 land 0x7f) lsl 24 in let x2 = (Char.code (String.unsafe_get s (i+1))) lsl 16 in let x3 = (Char.code (String.unsafe_get s (i+2))) lsl 8 in let x4 = Char.code (String.unsafe_get s (i+3)) in make_signed (x1 lor x2 lor x3 lor x4) type int7 = int let write_numtag ob i has_arg = if i < 0 || i > 0x7f then Bi_util.error "Corrupted data (invalid numtag)"; let x = if has_arg then i lor 0x80 else i in Bi_outbuf.add_char ob (Char.chr x) let read_numtag ib cont = let i = Bi_inbuf.read ib 1 in let x = Char.code ib.i_s.[i] in let has_arg = x >= 0x80 in cont ib (x land 0x7f) has_arg let make_unhash l = let tbl = Hashtbl.create (4 * List.length l) in List.iter ( fun s -> let h = hash_name s in try match Hashtbl.find tbl h with Some s' -> if s <> s' then failwith ( sprintf "Bi_io.make_unhash: \ %S and %S have the same hash, please pick another name" s s' ) | None -> assert false with Not_found -> Hashtbl.add tbl h (Some s) ) l; fun h -> try Hashtbl.find tbl h with Not_found -> None let write_tag ob x = Bi_outbuf.add_char ob (Char.chr x) let write_untagged_unit ob () = Bi_outbuf.add_char ob '\x00' let write_untagged_bool ob x = Bi_outbuf.add_char ob (if x then '\x01' else '\x00') let write_untagged_char ob x = Bi_outbuf.add_char ob x let write_untagged_int8 ob x = Bi_outbuf.add_char ob (Char.chr x) let write_untagged_int16 ob x = Bi_outbuf.add_char ob (Char.chr (x lsr 8)); Bi_outbuf.add_char ob (Char.chr (x land 0xff)) let write_untagged_int32 ob x = let high = Int32.to_int (Int32.shift_right_logical x 16) in Bi_outbuf.add_char ob (Char.chr (high lsr 8)); Bi_outbuf.add_char ob (Char.chr (high land 0xff)); let low = Int32.to_int x in Bi_outbuf.add_char ob (Char.chr ((low lsr 8) land 0xff)); Bi_outbuf.add_char ob (Char.chr (low land 0xff)) let write_untagged_float32 ob x = write_untagged_int32 ob (Int32.bits_of_float x) let float_endianness = lazy ( match String.unsafe_get (Obj.magic 1.0) 0 with '\x3f' -> `Big | '\x00' -> `Little | _ -> assert false ) let read_untagged_float64 ib = let i = Bi_inbuf.read ib 8 in let s = ib.i_s in let x = Obj.new_block Obj.double_tag 8 in (match Lazy.force float_endianness with `Little -> for j = 0 to 7 do String.unsafe_set (Obj.obj x) (7-j) (String.unsafe_get s (i+j)) done | `Big -> for j = 0 to 7 do String.unsafe_set (Obj.obj x) j (String.unsafe_get s (i+j)) done ); (Obj.obj x : float) let write_untagged_float64 ob x = let i = Bi_outbuf.alloc ob 8 in let s = ob.o_s in (match Lazy.force float_endianness with `Little -> for j = 0 to 7 do String.unsafe_set s (i+j) (String.unsafe_get (Obj.magic x) (7-j)) done | `Big -> for j = 0 to 7 do String.unsafe_set s (i+j) (String.unsafe_get (Obj.magic x) j) done ) (* let write_untagged_int64 ob x = let x4 = Int64.to_int (Int64.shift_right_logical x 48) in Bi_outbuf.add_char ob (Char.chr (x4 lsr 8)); Bi_outbuf.add_char ob (Char.chr (x4 land 0xff)); let x3 = Int64.to_int (Int64.shift_right_logical x 32) in Bi_outbuf.add_char ob (Char.chr ((x3 lsr 8) land 0xff)); Bi_outbuf.add_char ob (Char.chr (x3 land 0xff)); let x2 = Int64.to_int (Int64.shift_right_logical x 16) in Bi_outbuf.add_char ob (Char.chr ((x2 lsr 8) land 0xff)); Bi_outbuf.add_char ob (Char.chr (x2 land 0xff)); let x1 = Int64.to_int x in Bi_outbuf.add_char ob (Char.chr ((x1 lsr 8) land 0xff)); Bi_outbuf.add_char ob (Char.chr (x1 land 0xff)) *) let write_untagged_int64 ob x = write_untagged_float64 ob (Int64.float_of_bits x) let safety_test () = let s = "\x3f\xf0\x06\x05\x04\x03\x02\x01" in let x = 1.00146962706651288 in let y = read_untagged_float64 (Bi_inbuf.from_string s) in if x <> y then assert false; let ob = Bi_outbuf.create 8 in write_untagged_float64 ob x; if Bi_outbuf.contents ob <> s then assert false let write_untagged_string ob s = Bi_vint.write_uvint ob (String.length s); Bi_outbuf.add_string ob s let write_untagged_uvint = Bi_vint.write_uvint let write_untagged_svint = Bi_vint.write_svint let write_unit ob () = write_tag ob unit_tag; write_untagged_unit ob () let write_bool ob x = write_tag ob bool_tag; write_untagged_bool ob x let write_char ob x = write_tag ob int8_tag; write_untagged_char ob x let write_int8 ob x = write_tag ob int8_tag; write_untagged_int8 ob x let write_int16 ob x = write_tag ob int16_tag; write_untagged_int16 ob x let write_int32 ob x = write_tag ob int32_tag; write_untagged_int32 ob x let write_int64 ob x = write_tag ob int64_tag; write_untagged_int64 ob x let write_float32 ob x = write_tag ob float32_tag; write_untagged_float32 ob x let write_float64 ob x = write_tag ob float64_tag; write_untagged_float64 ob x let write_string ob x = write_tag ob string_tag; write_untagged_string ob x let write_uvint ob x = write_tag ob uvint_tag; write_untagged_uvint ob x let write_svint ob x = write_tag ob svint_tag; write_untagged_svint ob x let rec write_t ob tagged (x : tree) = match x with `Unit -> if tagged then write_tag ob unit_tag; write_untagged_unit ob () | `Bool x -> if tagged then write_tag ob bool_tag; write_untagged_bool ob x | `Int8 x -> if tagged then write_tag ob int8_tag; write_untagged_char ob x | `Int16 x -> if tagged then write_tag ob int16_tag; write_untagged_int16 ob x | `Int32 x -> if tagged then write_tag ob int32_tag; write_untagged_int32 ob x | `Int64 x -> if tagged then write_tag ob int64_tag; write_untagged_int64 ob x | `Float32 x -> if tagged then write_tag ob float32_tag; write_untagged_float32 ob x | `Float64 x -> if tagged then write_tag ob float64_tag; write_untagged_float64 ob x | `Uvint x -> if tagged then write_tag ob uvint_tag; Bi_vint.write_uvint ob x | `Svint x -> if tagged then write_tag ob svint_tag; Bi_vint.write_svint ob x | `String s -> if tagged then write_tag ob string_tag; write_untagged_string ob s | `Array o -> if tagged then write_tag ob array_tag; (match o with None -> Bi_vint.write_uvint ob 0 | Some (node_tag, a) -> let len = Array.length a in Bi_vint.write_uvint ob len; if len > 0 then ( write_tag ob node_tag; Array.iter (write_t ob false) a ) ) | `Tuple a -> if tagged then write_tag ob tuple_tag; Bi_vint.write_uvint ob (Array.length a); Array.iter (write_t ob true) a | `Record a -> if tagged then write_tag ob record_tag; Bi_vint.write_uvint ob (Array.length a); Array.iter (write_field ob) a | `Num_variant (i, x) -> if tagged then write_tag ob num_variant_tag; write_numtag ob i (x <> None); (match x with None -> () | Some v -> write_t ob true v) | `Variant (o, h, x) -> if tagged then write_tag ob variant_tag; write_hashtag ob h (x <> None); (match x with None -> () | Some v -> write_t ob true v) | `Table o -> if tagged then write_tag ob table_tag; (match o with None -> Bi_vint.write_uvint ob 0 | Some (fields, a) -> let row_num = Array.length a in Bi_vint.write_uvint ob row_num; if row_num > 0 then let col_num = Array.length fields in Bi_vint.write_uvint ob col_num; Array.iter ( fun (name, h, tag) -> write_hashtag ob h true; write_tag ob tag ) fields; if row_num > 0 then ( for i = 0 to row_num - 1 do let ai = a.(i) in if Array.length ai <> col_num then invalid_arg "Bi_io.write_t: Malformed `Table"; for j = 0 to col_num - 1 do write_t ob false ai.(j) done done ) ) | `Shared x -> if tagged then write_tag ob shared_tag; let offset = Bi_share.Wr.put ob.o_shared (x, Bi_share.dummy_type_id) (ob.o_offs + ob.o_len) in Bi_vint.write_uvint ob offset; if offset = 0 then write_t ob true x and write_field ob (s, h, x) = write_hashtag ob h true; write_t ob true x let write_tree ob x = write_t ob true x let string_of_tree x = let ob = Bi_outbuf.create 1000 in write_tree ob x; Bi_outbuf.contents ob let tag_of_tree (x : tree) = match x with `Unit -> unit_tag | `Bool _ -> bool_tag | `Int8 _ -> int8_tag | `Int16 _ -> int16_tag | `Int32 _ -> int32_tag | `Int64 _ -> int64_tag | `Float32 _ -> float32_tag | `Float64 _ -> float64_tag | `Uvint _ -> uvint_tag | `Svint _ -> svint_tag | `String _ -> string_tag | `Array _ -> array_tag | `Tuple _ -> tuple_tag | `Record _ -> record_tag | `Num_variant _ -> num_variant_tag | `Variant _ -> variant_tag | `Table _ -> table_tag | `Shared _ -> shared_tag let read_tag ib = Char.code (Bi_inbuf.read_char ib) let read_untagged_unit ib = match Bi_inbuf.read_char ib with '\x00' -> () | _ -> Bi_util.error "Corrupted data (unit value)" let read_untagged_bool ib = match Bi_inbuf.read_char ib with '\x00' -> false | '\x01' -> true | _ -> Bi_util.error "Corrupted data (bool value)" let read_untagged_char ib = Bi_inbuf.read_char ib let read_untagged_int8 ib = Char.code (Bi_inbuf.read_char ib) let read_untagged_int16 ib = let i = Bi_inbuf.read ib 2 in let s = ib.i_s in ((Char.code s.[i]) lsl 8) lor (Char.code s.[i+1]) let read_untagged_int32 ib = let i = Bi_inbuf.read ib 4 in let s = ib.i_s in let x1 = Int32.of_int (((Char.code s.[i ]) lsl 8) lor (Char.code s.[i+1])) in let x2 = Int32.of_int (((Char.code s.[i+2]) lsl 8) lor (Char.code s.[i+3])) in Int32.logor (Int32.shift_left x1 16) x2 let read_untagged_float32 ib = Int32.float_of_bits (read_untagged_int32 ib) (* let read_untagged_int64 ib = let i = Bi_inbuf.read ib 8 in let s = ib.i_s in let x1 = Int64.of_int (((Char.code s.[i ]) lsl 8) lor (Char.code s.[i+1])) in let x2 = Int64.of_int (((Char.code s.[i+2]) lsl 8) lor (Char.code s.[i+3])) in let x3 = Int64.of_int (((Char.code s.[i+4]) lsl 8) lor (Char.code s.[i+5])) in let x4 = Int64.of_int (((Char.code s.[i+6]) lsl 8) lor (Char.code s.[i+7])) in Int64.logor (Int64.shift_left x1 48) (Int64.logor (Int64.shift_left x2 32) (Int64.logor (Int64.shift_left x3 16) x4)) *) let read_untagged_int64 ib = Int64.bits_of_float (read_untagged_float64 ib) let read_untagged_string ib = let len = Bi_vint.read_uvint ib in let str = String.create len in let pos = ref 0 in let rem = ref len in while !rem > 0 do let bytes_read = Bi_inbuf.try_preread ib !rem in if bytes_read = 0 then Bi_util.error "Corrupted data (string)" else ( String.blit ib.i_s ib.i_pos str !pos bytes_read; ib.i_pos <- ib.i_pos + bytes_read; pos := !pos + bytes_read; rem := !rem - bytes_read ) done; str let read_untagged_uvint = Bi_vint.read_uvint let read_untagged_svint = Bi_vint.read_svint let read_unit ib = read_untagged_unit ib; `Unit let read_bool ib = `Bool (read_untagged_bool ib) let read_int8 ib = `Int8 (read_untagged_char ib) let read_int16 ib = `Int16 (read_untagged_int16 ib) let read_int32 ib = `Int32 (read_untagged_int32 ib) let read_int64 ib = `Int64 (read_untagged_int64 ib) let read_float32 ib = `Float32 (read_untagged_float32 ib) let read_float64 ib = `Float64 (read_untagged_float64 ib) let read_uvint ib = `Uvint (read_untagged_uvint ib) let read_svint ib = `Svint (read_untagged_svint ib) let read_string ib = `String (read_untagged_string ib) let print s = print_string s; print_newline () let read_tree ?(unhash = make_unhash []) ib : tree = let rec read_array ib = let len = Bi_vint.read_uvint ib in if len = 0 then `Array None else let tag = read_tag ib in let read = reader_of_tag tag in `Array (Some (tag, Array.init len (fun _ -> read ib))) and read_tuple ib = let len = Bi_vint.read_uvint ib in `Tuple (Array.init len (fun _ -> read_tree ib)) and read_field ib = let h = read_field_hashtag ib in let name = unhash h in let x = read_tree ib in (name, h, x) and read_record ib = let len = Bi_vint.read_uvint ib in `Record (Array.init len (fun _ -> read_field ib)) and read_num_variant_cont ib i has_arg = let x = if has_arg then Some (read_tree ib) else None in `Num_variant (i, x) and read_num_variant ib = read_numtag ib read_num_variant_cont and read_variant_cont ib h has_arg = let name = unhash h in let x = if has_arg then Some (read_tree ib) else None in `Variant (name, h, x) and read_variant ib = read_hashtag ib read_variant_cont and read_table ib = let row_num = Bi_vint.read_uvint ib in if row_num = 0 then `Table None else let col_num = Bi_vint.read_uvint ib in let fields = Array.init col_num ( fun _ -> let h = read_field_hashtag ib in let name = unhash h in let tag = read_tag ib in (name, h, tag) ) in let readers = Array.map (fun (name, h, tag) -> reader_of_tag tag) fields in let a = Array.init row_num (fun _ -> Array.init col_num (fun j -> readers.(j) ib)) in `Table (Some (fields, a)) and read_shared ib = let pos = ib.i_offs + ib.i_pos in let offset = Bi_vint.read_uvint ib in if offset = 0 then let rec r = `Shared r in Bi_share.Rd.put ib.i_shared (pos, Bi_share.dummy_type_id) (Obj.repr r); let x = read_tree ib in Obj.set_field (Obj.repr r) 1 (Obj.repr x); r else Obj.obj (Bi_share.Rd.get ib.i_shared (pos - offset, Bi_share.dummy_type_id)) and reader_of_tag = function 0 (* bool *) -> read_bool | 1 (* int8 *) -> read_int8 | 2 (* int16 *) -> read_int16 | 3 (* int32 *) -> read_int32 | 4 (* int64 *) -> read_int64 | 11 (* float32 *) -> read_float32 | 12 (* float64 *) -> read_float64 | 16 (* uvint *) -> read_uvint | 17 (* svint *) -> read_svint | 18 (* string *) -> read_string | 19 (* array *) -> read_array | 20 (* tuple *) -> read_tuple | 21 (* record *) -> read_record | 22 (* num_variant *) -> read_num_variant | 23 (* variant *) -> read_variant | 24 (* unit *) -> read_unit | 25 (* table *) -> read_table | 26 (* shared *) -> read_shared | _ -> Bi_util.error "Corrupted data (invalid tag)" and read_tree ib : tree = reader_of_tag (read_tag ib) ib in read_tree ib let tree_of_string ?unhash s = read_tree ?unhash (Bi_inbuf.from_string s) let skip_bytes ib n = ignore (Bi_inbuf.read ib n) let skip_unit ib = skip_bytes ib 1 let skip_bool ib = skip_bytes ib 1 let skip_int8 ib = skip_bytes ib 1 let skip_int16 ib = skip_bytes ib 2 let skip_int32 ib = skip_bytes ib 4 let skip_int64 ib = skip_bytes ib 8 let skip_float32 ib = skip_bytes ib 4 let skip_float64 ib = skip_bytes ib 8 let skip_uvint ib = ignore (read_untagged_uvint ib) let skip_svint ib = ignore (read_untagged_svint ib) let skip_string ib = let len = Bi_vint.read_uvint ib in skip_bytes ib len let rec skip_array ib = let len = Bi_vint.read_uvint ib in if len = 0 then () else let tag = read_tag ib in let read = skipper_of_tag tag in for i = 1 to len do read ib done and skip_tuple ib = let len = Bi_vint.read_uvint ib in for i = 1 to len do skip ib done and skip_field ib = ignore (read_field_hashtag ib); skip ib and skip_record ib = let len = Bi_vint.read_uvint ib in for i = 1 to len do skip_field ib done and skip_num_variant_cont ib i has_arg = if has_arg then skip ib and skip_num_variant ib = read_numtag ib skip_num_variant_cont and skip_variant_cont ib h has_arg = if has_arg then skip ib and skip_variant ib = read_hashtag ib skip_variant_cont and skip_table ib = let row_num = Bi_vint.read_uvint ib in if row_num = 0 then () else let col_num = Bi_vint.read_uvint ib in let readers = Array.init col_num ( fun _ -> ignore (read_field_hashtag ib); skipper_of_tag (read_tag ib) ) in for i = 1 to row_num do for j = 1 to col_num do readers.(j) ib done done and skipper_of_tag = function 0 (* bool *) -> skip_bool | 1 (* int8 *) -> skip_int8 | 2 (* int16 *) -> skip_int16 | 3 (* int32 *) -> skip_int32 | 4 (* int64 *) -> skip_int64 | 11 (* float32 *) -> skip_float32 | 12 (* float64 *) -> skip_float64 | 16 (* uvint *) -> skip_uvint | 17 (* svint *) -> skip_svint | 18 (* string *) -> skip_string | 19 (* array *) -> skip_array | 20 (* tuple *) -> skip_tuple | 21 (* record *) -> skip_record | 22 (* num_variant *) -> skip_num_variant | 23 (* variant *) -> skip_variant | 24 (* unit *) -> skip_unit | 25 (* table *) -> skip_table | _ -> Bi_util.error "Corrupted data (invalid tag)" and skip ib : unit = skipper_of_tag (read_tag ib) ib (* Equivalent of Array.map that guarantees a left-to-right order *) let array_map f a = let len = Array.length a in if len = 0 then [||] else ( let r = Array.make len (f (Array.unsafe_get a 0)) in for i = 1 to len - 1 do Array.unsafe_set r i (f (Array.unsafe_get a i)) done; r ) module Pp = struct open Easy_format let array = list let record = list let tuple = { list with space_after_opening = false; space_before_closing = false; align_closing = false } let variant = { list with separators_stick_left = true } let map f a = Array.to_list (array_map f a) let rec format shared (x : tree) = match x with `Unit -> Atom ("unit", atom) | `Bool x -> Atom ((if x then "true" else "false"), atom) | `Int8 x -> Atom (sprintf "0x%02x" (Char.code x), atom) | `Int16 x -> Atom (sprintf "0x%04x" x, atom) | `Int32 x -> Atom (sprintf "0x%08lx" x, atom) | `Int64 x -> Atom (sprintf "0x%016Lx" x, atom) | `Float32 x -> Atom (string_of_float x, atom) | `Float64 x -> Atom (string_of_float x, atom) | `Uvint x -> Atom (string_of_int x, atom) | `Svint x -> Atom (string_of_int x, atom) | `String s -> Atom (sprintf "%S" s, atom) | `Array None -> Atom ("[]", atom) | `Array (Some (_, a)) -> List (("[", ",", "]", array), map (format shared) a) | `Tuple a -> List (("(", ",", ")", tuple), map (format shared) a) | `Record a -> List (("{", ",", "}", record), map (format_field shared) a) | `Num_variant (i, o) -> let suffix = if i = 0 then "" else string_of_int i in (match o with None -> Atom ("None" ^ suffix, atom) | Some x -> let cons = Atom ("Some" ^ suffix, atom) in Label ((cons, label), format shared x)) | `Variant (opt_name, h, o) -> let name = match opt_name with None -> sprintf "#%08lx" (Int32.of_int h) | Some s -> sprintf "%S" s in (match o with None -> Atom ("<" ^ name ^ ">", atom) | Some x -> List (("<", "", ">", tuple), [ Label ((Atom (name ^ ":", atom), label), format shared x) ]) ) | `Table None -> Atom ("[]", atom) | `Table (Some (header, aa)) -> let record_array = `Array ( Some ( record_tag, Array.map ( fun a -> `Record ( Array.mapi ( fun i x -> let s, h, _ = header.(i) in (s, h, x) ) a ) ) aa ) ) in format shared record_array | `Shared x -> let tbl, p = shared in incr p; let pos = !p in let offset = Bi_share.Wr.put tbl (x, Bi_share.dummy_type_id) pos in if offset = 0 then Label ((Atom (sprintf "shared%i ->" pos, atom), label), format shared x) else Atom (sprintf "shared%i" (pos - offset), atom) and format_field shared (o, h, x) = let s = match o with None -> sprintf "#%08lx" (Int32.of_int h) | Some s -> sprintf "%S" s in Label ((Atom (sprintf "%s:" s, atom), label), format shared x) end let init () = (Bi_share.Wr.create 512, ref 0) let view_of_tree t = Easy_format.Pretty.to_string (Pp.format (init ()) t) let print_view_of_tree t = Easy_format.Pretty.to_stdout (Pp.format (init ()) t) let output_view_of_tree oc t = Easy_format.Pretty.to_channel oc (Pp.format (init ()) t) let view ?unhash s = view_of_tree (tree_of_string ?unhash s) let print_view ?unhash s = print_view_of_tree (tree_of_string ?unhash s) let output_view ?unhash oc s = output_view_of_tree oc (tree_of_string ?unhash s) biniou-1.0.12/bi_io.mli000066400000000000000000000162251273702770000146570ustar00rootroot00000000000000(** Input and output functions for the Biniou serialization format *) (** {1 Node tags} *) type node_tag = int val bool_tag : node_tag (** Tag indicating a bool node. *) val int8_tag : node_tag (** Tag indicating an int8 node. *) val int16_tag : node_tag (** Tag indicating an int16 node. *) val int32_tag : node_tag (** Tag indicating an int32 node. *) val int64_tag : node_tag (** Tag indicating an int64 node. *) val float32_tag : node_tag (** Tag indicating a float32 node. *) val float64_tag : node_tag (** Tag indicating a float64 node. *) val uvint_tag : node_tag (** Tag indicating a uvint node. *) val svint_tag : node_tag (** Tag indicating an svint node. *) val string_tag : node_tag (** Tag indicating a string node. *) val array_tag : node_tag (** Tag indicating an array node. *) val tuple_tag : node_tag (** Tag indicating a tuple node. *) val record_tag : node_tag (** Tag indicating a record node. *) val num_variant_tag : node_tag (** Tag indicating a num_variant node. *) val variant_tag : node_tag (** Tag indicating a variant node. *) val unit_tag : node_tag (** Tag indicating a unit node. *) val table_tag : node_tag (** Tag indicating a table node. *) val shared_tag : node_tag (** Tag indicating a shared node. *) val write_tag : Bi_outbuf.t -> node_tag -> unit (** Write one-byte tag to a buffer. *) val read_tag : Bi_inbuf.t -> node_tag (** Read one-byte tag from a buffer. *) (** {1 Tags of variants and record fields} *) type hash = int (** 31-bit hash *) val hash_name : string -> hash (** Hash function used to compute field name tags and variant tags from their full name. *) val write_hashtag : Bi_outbuf.t -> hash -> bool -> unit (** [write_hashtag ob h has_arg] writes variant tag [h] to buffer [ob]. [has_arg] indicates whether the variant has an argument. This function can be used for record field names as well, in which case [has_arg] may only be [true]. *) val string_of_hashtag : hash -> bool -> string (** Same as [write_hashtag] but writes to a string. *) val read_hashtag : Bi_inbuf.t -> (Bi_inbuf.t -> hash -> bool -> 'a) -> 'a (** [read_hashtag ib f] reads a variant tag as hash [h] and flag [has_arg] and returns [f h has_arg]. *) val read_field_hashtag : Bi_inbuf.t -> hash (** [read_field_hashtag ib] reads a field tag and returns the 31-bit hash. *) val make_unhash : string list -> (hash -> string option) (** Compute the hash of each string of the input list and return a function that converts a hash back to the original string. Lookups do not allocate memory blocks. @raise Failure if the input list contains two different strings with the same hash. *) type int7 = int (** 7-bit int used to represent a num_variant tag. *) val write_numtag : Bi_outbuf.t -> int7 -> bool -> unit (** [write_numtag ob i has_arg] writes the tag of a num_variant. The tag name is represented by [i] which must be within \[0, 127\] and the flag [has_arg] which indicates the presence of an argument. *) val read_numtag : Bi_inbuf.t -> (Bi_inbuf.t -> int7 -> bool -> 'a) -> 'a (** [read_numtag ib f] reads a num_variant tag and processes the tag name [i] and flag [has_arg] using [f]. *) (** {1 Atom writers} *) (** The [write_untagged_] functions write an untagged value (VAL) to an output buffer while the other [write_] functions write a tagged value (TAGVAL). *) val write_untagged_unit : Bi_outbuf.t -> unit -> unit val write_untagged_bool : Bi_outbuf.t -> bool -> unit val write_untagged_char : Bi_outbuf.t -> char -> unit val write_untagged_int8 : Bi_outbuf.t -> int -> unit val write_untagged_int16 : Bi_outbuf.t -> int -> unit val write_untagged_int32 : Bi_outbuf.t -> int32 -> unit val write_untagged_int64 : Bi_outbuf.t -> int64 -> unit val write_untagged_float32 : Bi_outbuf.t -> float -> unit val write_untagged_float64 : Bi_outbuf.t -> float -> unit val write_untagged_string : Bi_outbuf.t -> string -> unit val write_untagged_uvint : Bi_outbuf.t -> int -> unit val write_untagged_svint : Bi_outbuf.t -> int -> unit val write_unit : Bi_outbuf.t -> unit -> unit val write_bool : Bi_outbuf.t -> bool -> unit val write_char : Bi_outbuf.t -> char -> unit val write_int8 : Bi_outbuf.t -> int -> unit val write_int16 : Bi_outbuf.t -> int -> unit val write_int32 : Bi_outbuf.t -> int32 -> unit val write_int64 : Bi_outbuf.t -> int64 -> unit val write_float32 : Bi_outbuf.t -> float -> unit val write_float64 : Bi_outbuf.t -> float -> unit val write_string : Bi_outbuf.t -> string -> unit val write_uvint : Bi_outbuf.t -> int -> unit val write_svint : Bi_outbuf.t -> int -> unit (** {1 Atom readers} *) (** The [read_untagged_] functions read an untagged value (VAL) from an input buffer. *) val read_untagged_unit : Bi_inbuf.t -> unit val read_untagged_bool : Bi_inbuf.t -> bool val read_untagged_char : Bi_inbuf.t -> char val read_untagged_int8 : Bi_inbuf.t -> int val read_untagged_int16 : Bi_inbuf.t -> int val read_untagged_int32 : Bi_inbuf.t -> int32 val read_untagged_int64 : Bi_inbuf.t -> int64 val read_untagged_float32 : Bi_inbuf.t -> float val read_untagged_float64 : Bi_inbuf.t -> float val read_untagged_string : Bi_inbuf.t -> string val read_untagged_uvint : Bi_inbuf.t -> int val read_untagged_svint : Bi_inbuf.t -> int val skip : Bi_inbuf.t -> unit (** Read and discard a value. Useful for skipping unknown record fields. *) (** {1 Generic tree} *) type tree = [ | `Unit | `Bool of bool | `Int8 of char | `Int16 of int | `Int32 of Int32.t | `Int64 of Int64.t | `Float32 of float | `Float64 of float | `Uvint of int | `Svint of int | `String of string | `Array of (node_tag * tree array) option | `Tuple of tree array | `Record of (string option * hash * tree) array | `Num_variant of (int * tree option) | `Variant of (string option * hash * tree option) | `Table of ((string option * hash * node_tag) array * tree array array) option | `Shared of tree ] (** Tree representing serialized data, useful for testing and for untyped transformations. *) val write_tree : Bi_outbuf.t -> tree -> unit (** Serialization of a tree to a buffer. *) val string_of_tree : tree -> string (** Serialization of a tree into a string. *) val read_tree : ?unhash:(hash -> string option) -> Bi_inbuf.t -> tree (** Deserialization of a tree from a buffer. *) val tree_of_string : ?unhash:(hash -> string option) -> string -> tree (** Deserialization of a tree from a string. *) val tag_of_tree : tree -> node_tag (** Returns the node tag of the given tree. *) val view_of_tree : tree -> string val view : ?unhash:(hash -> string option) -> string -> string (** Prints a human-readable representation of the data into a string. *) val print_view_of_tree : tree -> unit val print_view : ?unhash:(hash -> string option) -> string -> unit (** Prints a human-readable representation of the data to stdout. *) val output_view_of_tree : out_channel -> tree -> unit val output_view : ?unhash:(hash -> string option) -> out_channel -> string -> unit (** Prints a human-readable representation of the data to an out_channel. *) val safety_test : unit -> unit (** Check that certain low-level hacks work as expected *) biniou-1.0.12/bi_outbuf.ml000066400000000000000000000052011273702770000153730ustar00rootroot00000000000000type t = { mutable o_s : string; mutable o_max_len : int; mutable o_len : int; mutable o_offs : int; o_init_len : int; o_make_room : (t -> int -> unit); mutable o_shared : Bi_share.Wr.tbl; o_shared_init_len : int; } let really_extend b n = let slen0 = b.o_max_len in let reqlen = b.o_len + n in let slen = let x = max reqlen (2 * slen0) in if x <= Sys.max_string_length then x else if Sys.max_string_length < reqlen then invalid_arg "Buf.extend: reached Sys.max_string_length" else Sys.max_string_length in let s = String.create slen in String.blit b.o_s 0 s 0 b.o_len; b.o_s <- s; b.o_max_len <- slen let flush_to_output abstract_output b n = abstract_output b.o_s 0 b.o_len; b.o_offs <- b.o_offs + b.o_len; b.o_len <- 0; if n > b.o_max_len then really_extend b n let flush_to_channel oc = flush_to_output (output oc) let create ?(make_room = really_extend) ?(shrlen = 16) n = { o_s = String.create n; o_max_len = n; o_len = 0; o_offs = 0; o_init_len = n; o_make_room = make_room; o_shared = Bi_share.Wr.create shrlen; o_shared_init_len = shrlen; } let create_channel_writer ?(len = 4096) ?shrlen oc = create ~make_room:(flush_to_channel oc) ?shrlen len let flush_channel_writer b = b.o_make_room b 0 let create_output_writer ?(len = 4096) ?shrlen out = create ~make_room:(flush_to_output out#output) ?shrlen len let flush_output_writer = flush_channel_writer (* Guarantee that the buffer string has enough room for n additional bytes. *) let extend b n = if b.o_len + n > b.o_max_len then b.o_make_room b n let alloc b n = extend b n; let pos = b.o_len in b.o_len <- pos + n; pos let add_substring b s pos len = extend b len; String.blit s pos b.o_s b.o_len len; b.o_len <- b.o_len + len let add_string b s = add_substring b s 0 (String.length s) let add_char b c = let pos = alloc b 1 in b.o_s.[pos] <- c let unsafe_add_char b c = let len = b.o_len in b.o_s.[len] <- c; b.o_len <- len + 1 let add_char2 b c1 c2 = let pos = alloc b 2 in let s = b.o_s in String.unsafe_set s pos c1; String.unsafe_set s (pos+1) c2 let add_char4 b c1 c2 c3 c4 = let pos = alloc b 4 in let s = b.o_s in String.unsafe_set s pos c1; String.unsafe_set s (pos+1) c2; String.unsafe_set s (pos+2) c3; String.unsafe_set s (pos+3) c4 let clear b = b.o_offs <- 0; b.o_len <- 0; Bi_share.Wr.clear b.o_shared let reset b = if String.length b.o_s <> b.o_init_len then b.o_s <- String.create b.o_init_len; b.o_offs <- 0; b.o_len <- 0; b.o_shared <- Bi_share.Wr.create b.o_shared_init_len let contents b = String.sub b.o_s 0 b.o_len biniou-1.0.12/bi_outbuf.mli000066400000000000000000000067111273702770000155530ustar00rootroot00000000000000(** Output buffer *) type t = { mutable o_s : string; (** Buffer string *) mutable o_max_len : int; (** Same as [String.length s] *) mutable o_len : int; (** Length of the data present in the buffer = current position in the buffer *) mutable o_offs : int; (** Length of data written and flushed out of the buffer. The total number of bytes written to the buffer is therefore [o_offs + o_len]. *) o_init_len : int; (** Initial length of the buffer *) o_make_room : t -> int -> unit; (** [make_room buf n] must provide space for at least the requested number of bytes [n], typically by reallocating a larger buffer string or by flushing the data to a channel. This function is only called when there is not enough space for [n] bytes. *) mutable o_shared : Bi_share.Wr.tbl; (** Hash table used to map shared objects to positions in the input stream. *) o_shared_init_len : int; (** Initial length of the [o_shared] table. *) } val really_extend : t -> int -> unit (** Default make_room function: reallocate a larger buffer string. *) val flush_to_channel : out_channel -> t -> int -> unit (** Alternate make_room function: write to an out_channel. *) val create : ?make_room:(t -> int -> unit) -> ?shrlen:int -> int -> t (** Create a buffer. The default [make_room] function is [really_extend]. @param shrlen initial size of the table used to store shared values. *) val contents : t -> string (** Returns the data currently in the buffer. *) val create_channel_writer : ?len:int -> ?shrlen:int -> out_channel -> t val flush_channel_writer : t -> unit (** Pair of convenience functions for creating a buffer that flushes data to an out_channel when it is full. *) val create_output_writer : ?len:int -> ?shrlen:int -> < output : string -> int -> int -> int; .. > -> t val flush_output_writer : t -> unit (** Pair of convenience functions for creating a buffer that flushes data to an object when it is full. *) val extend : t -> int -> unit (** Guarantee that the buffer string has enough room for n additional bytes. *) val alloc : t -> int -> int (** [alloc buf n] makes room for [n] bytes and returns the position of the first byte in the buffer string [buf.s]. It behaves as if [n] arbitrary bytes were added and it is the user's responsibility to set them to some meaningful values by accessing [buf.s] directly. *) val add_string : t -> string -> unit (** Add a string to the buffer. *) val add_substring : t -> string -> int -> int -> unit (** [add_substring dst src srcpos len] copies [len] bytes from string [src] to buffer [dst] starting from position [srcpos]. *) val add_char : t -> char -> unit (** Add a byte to the buffer. *) val add_char2 : t -> char -> char -> unit (** Add two bytes to the buffer. *) val add_char4 : t -> char -> char -> char -> char -> unit (** Add four bytes to the buffer. *) val unsafe_add_char : t -> char -> unit (** Add a byte to the buffer without checking that there is enough room for it. *) val clear : t -> unit (** Remove any data present in the buffer and in the table holding shared objects. *) val reset : t -> unit (** Remove any data present in the buffer and reset it to its original size. Remove any data present in the table holding shared objects and reset it to its original size. *) biniou-1.0.12/bi_share.ml000066400000000000000000000020061273702770000151710ustar00rootroot00000000000000type type_id = int let dummy_type_id = 0 let create_type_id = let n = ref dummy_type_id in fun () -> incr n; if !n < 0 then failwith "Bi_share.Rd_poly.create_type_id: \ exhausted available type_id's" else !n module Wr = struct module H = Hashtbl.Make ( struct type t = Obj.t * type_id let equal (x1, t1) (x2, t2) = x1 == x2 && t1 == t2 let hash = Hashtbl.hash end ) type tbl = int H.t let create = H.create let clear tbl = if H.length tbl > 0 then H.clear tbl let put tbl k pos = try let pos0 = H.find tbl (Obj.magic k) in pos - pos0 with Not_found -> H.add tbl (Obj.magic k) pos; 0 end module Rd = struct type tbl = ((int * type_id), Obj.t) Hashtbl.t let create n = Hashtbl.create n let clear = Hashtbl.clear let put tbl pos x = Hashtbl.add tbl pos x let get tbl pos = try Hashtbl.find tbl pos with Not_found -> Bi_util.error "Corrupted data (invalid reference)" end biniou-1.0.12/bi_share.mli000066400000000000000000000017611273702770000153510ustar00rootroot00000000000000(** \[not for general use\] *) (**/**) type type_id val dummy_type_id : type_id val create_type_id : unit -> type_id module Wr : sig type tbl val create : int -> tbl val clear : tbl -> unit val put : tbl -> ('a * type_id) -> int -> int (** [put tbl x pos] returns 0 if [x] is not already in the table and adds [x] to the table. [pos] is the absolute position of the first byte of the ref value excluding its tag. If [x] is found in the table, then the difference between [pos] and the original position is returned. *) end module Rd : sig type tbl val create : int -> tbl val clear : tbl -> unit val put : tbl -> (int * type_id) -> Obj.t -> unit (** [put tbl pos x] puts the position of a new shared value into the table. [pos] is the absolute position of the first byte of the ref value excluding its tag. *) val get : tbl -> (int * type_id) -> Obj.t (** [get tbl pos] returns the value stored at this position or raises a {!Bi_util.Error} exception. *) end biniou-1.0.12/bi_stream.ml000066400000000000000000000070231273702770000153660ustar00rootroot00000000000000open Printf let error s = failwith ("Bi_stream: " ^ s) let input_int64 ic = match Sys.word_size with 64 -> let n = ref 0 in for i = 1 to 8 do n := (!n lsl 8) lor (input_byte ic); done; if !n < 0 then error "Corrupted stream: excessive chunk length"; !n | 32 -> for i = 1 to 4 do if input_byte ic <> 0 then error "Chunk length exceeds supported range on this platform" done; let n = ref 0 in for i = 1 to 4 do n := (!n lsl 8) lor (input_byte ic); done; if !n < 0 then error "Chunk length exceeds supported range on this platform"; !n | n -> error (sprintf "unsupported word size (%i)" n) let output_int64 oc n = match Sys.word_size with 64 -> let n = ref n in for i = 1 to 8 do output_char oc (char_of_int (!n lsr 56)); n := !n lsl 8 done | 32 -> output_string oc "\000\000\000\000"; let n = ref n in for i = 1 to 4 do output_char oc (char_of_int (!n lsr 24)); n := !n lsl 8 done | n -> error (sprintf "unsupported word size (%i)" n) let rec read_chunk of_string ic = match input_char ic with '\001' -> let len = input_int64 ic in if len > Sys.max_string_length then error (sprintf "Corrupted stream: excessive chunk length (%i bytes)" len); let s = String.create len in really_input ic s 0 len; Some (of_string s) | '\000' -> None | c -> error (sprintf "Corrupted stream: %C" c) let flatten st = let a = ref [| |] in let pos = ref 0 in let rec next i = if !pos >= Array.length !a then ( match Stream.peek st with None -> None | Some a' -> Stream.junk st; a := a'; pos := 0; next i ) else ( let x = (!a).(!pos) in incr pos; Some x ) in Stream.from next let read_stream of_string ic = flatten (Stream.from (fun i -> read_chunk of_string ic)) let rev_array_of_list l = match l with [] -> [||] | x :: tl -> let r = ref tl in let len = List.length l in let a = Array.make len x in for i = len - 2 downto 0 do match !r with hd :: tl -> a.(i) <- hd; r := tl; | [] -> assert false done; a let write_stream ?(chunk_len = 1024) to_string oc st = let n = ref 0 in let acc = ref [] in let flush_chunk () = let a = rev_array_of_list !acc in acc := []; n := 0; let s = to_string a in output_char oc '\001'; output_int64 oc (String.length s); output_string oc s in Stream.iter ( fun x -> incr n; acc := x :: !acc; if !n >= chunk_len then flush_chunk () ) st; if !n > 0 then flush_chunk (); output_char oc '\000' let test l = List.iter (fun x -> assert (x >= 0 && x <= 9)) l; let to_string a = String.concat "" (List.map string_of_int (Array.to_list a)) in let of_string s = Array.init (String.length s) (fun i -> int_of_string (String.make 1 s.[i])) in let st = Stream.of_list l in let oc = open_out "test-stream.dat" in write_stream ~chunk_len:2 to_string oc st; close_out oc; let ic = open_in "test-stream.dat" in let st' = read_stream of_string ic in let l' = ref [] in Stream.iter (fun i -> l' := i :: !l') st'; close_in ic; l = List.rev !l' biniou-1.0.12/bi_stream.mli000066400000000000000000000025311273702770000155360ustar00rootroot00000000000000(** Streaming utilities (experimental) *) (** This module offers a streaming interface for representing long lists of elements that cannot fit in memory. Stream items are serialized as chunks of configurable length. Stream format (independent from the biniou serialization format): {v ( ONE INT64 BYTE* )* ZERO v} where [INT64] is the length of a chunk (unsigned big-endian 64-bit int), i.e. the number of following [BYTE]s. [ONE] and [ZERO] are the single-byte representations of 1 and 0 and are used to indicate whether the end of the stream is reached. *) val read_stream : (string -> 'a array) -> in_channel -> 'a Stream.t (** [read_stream of_string ic] creates an OCaml stream from an input channel [ic]. The data come in chunks and each chunk is converted from a string to an array by calling [of_string]. *) val write_stream : ?chunk_len:int -> ('a array -> string) -> out_channel -> 'a Stream.t -> unit (** [write_stream to_string oc st] writes an OCaml stream to the output channel [oc]. It creates chunks of [chunk_len], except for the last chunk which is usually smaller. @param chunk_len has a default value of 1024. The limit supported by this OCaml implementation on 32-bit platforms is 16777215. *) (**/**) val test : int list -> bool biniou-1.0.12/bi_util.ml000066400000000000000000000017101273702770000150450ustar00rootroot00000000000000exception Error of string let error s = raise (Error s) (* Debugging utilities. *) let string8_of_int x = let s = String.create 8 in for i = 0 to 7 do s.[7-i] <- Char.chr (0xff land (x lsr (8 * i))) done; s let string4_of_int x = let s = String.create 4 in for i = 0 to 3 do s.[3-i] <- Char.chr (0xff land (x lsr (8 * i))) done; s let print_bits ?(pos = 0) ?len s = let slen = String.length s in if pos < 0 || (pos > 0 && pos >= slen) then invalid_arg "Bi_util.print_bits"; let len = match len with None -> slen - pos | Some len -> if len > slen - pos then invalid_arg "Bi_util.print_bits" else len in let r = String.create (len * 9) in for i = 0 to len - 1 do let k = i * 9 in let x = Char.code s.[pos+i] in for j = 0 to 7 do r.[k+j] <- if (x lsr (7 - j)) land 1 = 0 then '0' else '1' done; r.[k+8] <- if (i + 1) mod 8 = 0 then '\n' else ' ' done; r biniou-1.0.12/bi_util.mli000066400000000000000000000006051273702770000152200ustar00rootroot00000000000000(** Error handling etc. *) exception Error of string (** Multipurpose exception normally raised when invalid data is found by a read or write operation. *) val error : string -> 'a (** [error msg] is equivalent to [raise (Error msg)]. *) (**/**) val string8_of_int : int -> string val string4_of_int : int -> string val print_bits : ?pos:int -> ?len:int -> string -> string biniou-1.0.12/bi_vint.ml000066400000000000000000000065161273702770000150610ustar00rootroot00000000000000(* Variable-byte encoding of 8-byte integers (starting from 0). *) open Printf open Bi_outbuf open Bi_inbuf type uint = int (* Word size in bytes *) let word_size = if 0x7fffffff = -1 then 4 else 8 (* Maximum int size in bits *) let max_int_bits = 8 * word_size - 1 (* Maximum length of a vint decodable into an OCaml int, maximum value of the highest byte of the largest vint supported *) let max_vint_bytes, max_highest_byte = if max_int_bits mod 7 = 0 then let m = max_int_bits / 7 in let h = 1 lsl 7 - 1 in m, h else let m = max_int_bits / 7 + 1 in let h = 1 lsl (max_int_bits mod 7) - 1 in m, h let check_highest_byte x = if x > max_highest_byte then Bi_util.error "Vint exceeding range of OCaml ints" let unsigned_of_signed i = if i >= 0 then (* 0 -> 0 1 -> 2 2 -> 4 3 -> 6 *) i lsl 1 else (* -1 -> 1 -2 -> 3 -3 -> 5 *) ((-1-i) lsl 1) lor 1 let signed_of_unsigned i = if i land 1 = 0 then i lsr 1 else -1 - (i lsr 1) let write_uvint buf i = Bi_outbuf.extend buf max_vint_bytes; let x = ref i in while !x lsr 7 <> 0 do let byte = 0x80 lor (!x land 0x7f) in Bi_outbuf.unsafe_add_char buf (Char.chr byte); x := !x lsr 7; done; Bi_outbuf.unsafe_add_char buf (Char.chr !x) let write_svint buf i = write_uvint buf (unsigned_of_signed i) (* convenience *) let uvint_of_uint ?buf i = let buffer = match buf with | None -> Bi_outbuf.create 10 | Some b -> b in Bi_outbuf.clear buffer; write_uvint buffer i; Bi_outbuf.contents buffer let svint_of_int ?buf i = uvint_of_uint ?buf (unsigned_of_signed i) let read_uvint ib = let avail = Bi_inbuf.try_preread ib max_vint_bytes in let s = ib.i_s in let pos = ib.i_pos in let x = ref 0 in (try for i = 0 to avail - 1 do let b = Char.code s.[pos+i] in x := ((b land 0x7f) lsl (7*i)) lor !x; if b < 0x80 then ( ib.i_pos <- pos + i + 1; if i + 1 = max_vint_bytes then check_highest_byte b; raise Exit ) done; Bi_util.error "Unterminated vint or vint exceeding range of OCaml ints" with Exit -> () ); !x let read_svint ib = signed_of_unsigned (read_uvint ib) (* convenience *) let check_end_of_input ib = if Bi_inbuf.try_preread ib 1 > 0 then Bi_util.error "Junk input after end of vint" let uint_of_uvint s = let ib = Bi_inbuf.from_string s in let x = read_uvint ib in check_end_of_input ib; x let int_of_svint s = let ib = Bi_inbuf.from_string s in let x = read_svint ib in check_end_of_input ib; x (* Testing *) let string_of_list l = let ob = Bi_outbuf.create 100 in List.iter (write_uvint ob) l; Bi_outbuf.contents ob let rec read_list ib = if ib.i_pos < ib.i_len then let x = read_uvint ib in x :: read_list ib else [] let list_of_string s = read_list (Bi_inbuf.from_string s) let print_list l = List.iter ( fun i -> printf "dec %i\nhex %x\nbin %s\n" i i (Bi_util.print_bits (Bi_util.string8_of_int i)) ) l let test () = let l = [ 0; 0xfffffff; (0x01020304 lsl 32) lor 0x05060708; max_int; min_int ] in printf "Input:\n"; print_list l; let l' = list_of_string (string_of_list l) in printf "Output:\n"; print_list l'; if l = l' then print_endline "SUCCESS" else print_endline "FAILURE" biniou-1.0.12/bi_vint.mli000066400000000000000000000047631273702770000152340ustar00rootroot00000000000000(** Vint: variable-length representation of integers *) (** This module currently provides only conversions between vint and the OCaml int type. Here are the current limits of OCaml ints on 32-bit and 64-bit systems: {v word length (bits) 32 64 int length (bits) 31 63 min_int (lowest signed int) 0x40000000 0x4000000000000000 -1073741824 -4611686018427387904 max_int (greatest signed int) 0x3fffffff 0x3fffffffffffffff 1073741823 4611686018427387903 lowest unsigned int 0x0 0x0 0 0 greatest unsigned int 0x7fffffff 0x7fffffffffffffff 2147483647 9223372036854775807 maximum vint length (data bits) 31 63 maximum vint length (total bytes) 5 9 v} *) type uint = int (** Unsigned int. Note that ints (signed) and uints use the same representation for integers within \[0, [max_int]\]. *) val uvint_of_uint : ?buf:Bi_outbuf.t -> uint -> string (** Convert an unsigned int to a vint. @param buf existing output buffer that could be reused by this function instead of creating a new one. *) val svint_of_int : ?buf:Bi_outbuf.t -> int -> string (** Convert a signed int to a vint. @param buf existing output buffer that could be reused by this function instead of creating a new one. *) val uint_of_uvint : string -> uint (** Interpret a vint as an unsigned int. @raise Bi_util.Error if the input string is not a single valid uvint that is representable using the uint type. *) val int_of_svint : string -> int (** Interpret a vint as a signed int. @raise Bi_util.Error if the input string is not a single valid svint that is representable using the int type. *) val write_uvint : Bi_outbuf.t -> uint -> unit (** Write an unsigned int to a buffer. *) val write_svint : Bi_outbuf.t -> int -> unit (** Write a signed int to a buffer. *) val read_uvint : Bi_inbuf.t -> uint (** Read an unsigned int from a buffer. @raise Bi_util.Error if there is no data to read from or if the uvint is not representable using the uint type. *) val read_svint : Bi_inbuf.t -> int (** Read a signed int from a buffer. @raise Bi_util.Error if there is no data to read from or if the svint is not representable using the int type. *) biniou-1.0.12/biniou-format.txt000066400000000000000000000142521273702770000164050ustar00rootroot00000000000000 The Biniou format ----------------- Contents: 1. Grammar 2. Tags 3. Fixed-length types 4. Vints 5. Field and variant name hashing 6. Numeric variants 1. Grammar TAGVAL ::= TAG VAL // A biniou value with its matching tag VAL ::= ATOM | ARRAY | TUPLE | RECORD | NUM_VARIANT | VARIANT | TABLE | SHARED ATOM ::= unit // 0, using one byte | bool // 0 for false, 1 for true, using one byte | int8 // 1 arbitrary byte | int16 // 2 arbitrary bytes | int32 // 4 arbitrary bytes | int64 // 8 arbitrary bytes | float32 // IEEE-754 binary32 | float64 // IEEE-754 binary64 | uvint // unsigned variable-length int | svint // signed variable-length int | STRING // sequence of any number of bytes prefixed by its length STRING ::= LENGTH byte* ARRAY ::= LENGTH (TAG VAL* )? NUM_VARIANT ::= NUM_VARIANT_TAG TAGVAL? VARIANT ::= VARIANT_TAG TAGVAL? TUPLE ::= LENGTH TAGVAL* RECORD ::= LENGTH (FIELD_TAG TAGVAL)* TABLE ::= LENGTH (LENGTH (FIELD_TAG TAG)* (VAL* )* )? // list of records SHARED ::= OFFSET TAGVAL? // Value given iff the offset is 0. // Otherwise, the offset indicates the // relative position to the left of a SHARED // to which we are redirected. TAG ::= int8 // identifies a type of node LENGTH ::= uvint OFFSET ::= uvint NUM_VARIANT_TAG ::= int8 // 0-127 if no argument, 128-255 if has argument VARIANT_TAG ::= int32 // first bit indicates argument, then 31-bit hash FIELD_TAG ::= int32 // 31-bit hash (first bit always 1) 2. Tags Tags indicate the shallow structure of any biniou value. The biniou format is such that the tag of any value is known from the input data. This allows decoding biniou data as a tree where each node represents a biniou value, without requiring external type information. The tag values for the various kinds of biniou values are: Type of value Tag --------------------------- bool 0 int8 1 int16 2 int32 3 int64 4 float32 11 float64 12 uvint 16 svint 17 string 18 ARRAY 19 TUPLE 20 RECORD 21 NUM_VARIANT 22 VARIANT 23 unit 24 TABLE 25 SHARED 26 3. Fixed-length types Atomic values of type unit, bool, int8, int16, int32, int64, float32 and float64 represent arbitrary sequences of 1, 2, 4 or 8 bytes. In order to make the visualization of data easier, the default interpretation of these values shall be used: Length in bytes Type of value Default interpretation --------------------------------------------------------------------- 1 unit 0 represents the unit value 1 bool 0 represents false, 1 represents true 1 int8 unsigned 8-bit int 2 int16 big endian unsigned 16-bit int 4 int32 big endian unsigned 32-bit int 8 int64 big endian unsigned 64-bit int 4 float32 big endian IEEE-754 binary32 (float) 8 float64 big endian IEEE-754 binary64 (double) 4. Vints Vints are a variable-length, byte-aligned representation of positive integers. A vint is represented by a sequence of bytes from least significant to most significant. In all the bytes except the last one, the high bit is set to 1 and indicates that more bytes follow. The high bit of the last byte is set to 0. The remaining 7 bits in each byte represent data. Here is the representation of some sample values: 0xxxxxxx 0 00000000 1 00000001 2 00000010 127 01111111 1xxxxxxx 0xxxxxxx 128 10000000 00000001 129 10000001 00000001 255 11111111 00000001 256 11111111 00000010 16383 11111111 01111111 1xxxxxxx 1xxxxxxx 0xxxxxxx 16384 10000000 10000000 00000001 16385 10000001 10000000 00000001 Positive integers can be represented by standard vints. We call this representation unsigned vint or uvint. Arbitrary integers can also be represented using vints, after mapping to positive integers. We call this representation signed vint or svint. Positive numbers and 0 are mapped to even numbers and negative numbers are mapped to odd positive numbers. Here is the mapping for small numbers: vint unsigned signed representation interpretation interpretation (uvint) (svint) 0xxxxxx0 00000000 0 0 00000010 2 1 00000100 4 2 00000110 6 3 0xxxxxx1 00000001 1 -1 00000011 3 -2 00000101 5 -3 5. Field and variant name hashing Record field names and variant names are represented by a 31-bit tag which must be a hash of the name. The following hash function must be used: hash(s): h <- 0 for i = 0 to length(s) - 1 do h <- 223 * h + s[i] done h <- h mod 2^31 return h For example, hash("Hello") is 0x37eea2f2. A full field tag or variant tag is made of 32 bits. The first bit is 0 for variants without an argument, and 1 for variants with an argument or record fields. The remaining 31 bits are the hash of field or variant name described above. 6. Numeric variants Numeric variants are a more compact alternative to variants using 32-bit hash-based tags since the tag of numeric variants takes only one byte. The most common use of numeric variants is for an option type. A value of type option is either None or Some value, e.g. None, Some 123 or Some 0. This allows to represent undefined values without reserving a special value called null or undefined. biniou-1.0.12/test_biniou.ml000066400000000000000000000121121273702770000157400ustar00rootroot00000000000000open Printf open Bi_io let rec deep_cycle = `Tuple [| `Shared deep_cycle |] let test_tree : tree = `Tuple [| `Unit; `Num_variant (0, None); `Num_variant (0, Some (`Svint 127)); `Array (Some (svint_tag, [| `Svint 1; `Svint 2 |])); `Tuple [| `Shared deep_cycle; `Shared deep_cycle |]; `Record [| (Some "abc", hash_name "abc", `String "hello"); (Some "number", hash_name "number", `Svint 123); (Some "variant1", hash_name "variant1", `Variant (Some "Foo", hash_name "Foo", Some (`Svint (-456)))); (Some "variant2", hash_name "variant2", `Variant (Some "Bar", hash_name "Bar", None)); |]; `Table ( Some ( [| (Some "name", hash_name "name", string_tag); (Some "age", hash_name "age", uvint_tag) |], [| [| `String "Francisco"; `Uvint 67 |]; [| `String "Mateo"; `Uvint 23 |]; [| `String "Clara"; `Uvint 27 |]; [| `String "Jose"; `Uvint 39 |]; |] ) ); `Array ( Some ( array_tag, [| `Array ( Some ( float64_tag, [| `Float64 1.234567; `Float64 2.345678; `Float64 3.456789; |] ) ); `Array ( Some ( float64_tag, [| `Float64 4.567890; `Float64 5.678901; `Float64 6.789012 |] ) ); `Array ( Some ( float64_tag, [| `Float64 7.890123; `Float64 8.901234; `Float64 9.012345 |] ) ); `Array ( Some ( float64_tag, [| `Float64 10.123456; `Float64 11.234567; `Float64 12.345678 |] ) ); |] ) ) |] let unhash = make_unhash [ "abc"; "number"; "variant1"; "variant2"; "Foo"; "Bar"; "name"; "age" ] let test () = let s = string_of_tree test_tree in let test_tree2 = tree_of_string ~unhash s in (s, String.length s, test_tree2, test_tree2 = test_tree) let test_json () = let s = "[\ null,\ null,\ 127,\ [1,2],\ [[1,[1]],1]\ {\"abc\":\"hello\",\ \"number\":123,\ \"variant1\":[\"Foo\",-456],\ \"variant2\":\"Bar\"},\ [[1,\"first\"],[2,\"second\"],[3,\"third\"],[4,\"fourth\"]],\ [\ {\"name\":\"Francisco\",\"age\":67},\ {\"name\":\"Mateo\",\"age\":23},\ {\"name\":\"Clara\",\"age\":27},\ {\"name\":\"Jose\",\"age\":39}\ ],\ [\ [1.234567,2.345678,3.456789],\ [4.567890,5.678901,6.789012],\ [7.890123,8.901234,9.012345],\ [10.123456,11.234567,12.345678]\ ],\ ]" in s, String.length s type foo = { abc : string; number : int; variant1 : [ `Foo of int ]; variant2 : [ `Bar ] } type person = { name : string; age : int } let native_test_tree = ( (), None, Some 127, [| 1; 2 |], { abc = "hello"; number = 123; variant1 = `Foo (-456); variant2 = `Bar }, [| 1, "first"; 2, "second"; 3, "third"; 4, "fourth"; |], [| { name = "Francisco"; age = 67 }; { name = "Mateo"; age = 23 }; { name = "Clara"; age = 27 }; { name = "Jose"; age = 39 }; |], [| [| 1.234567; 2.345678; 3.456789 |]; [| 4.567890; 5.678901; 6.789012 |]; [| 7.890123; 8.901234; 9.012345 |]; [| 10.123456; 11.234567; 12.345678 |] |] ) let marshal x = Marshal.to_string x [(*Marshal.No_sharing*)] let unmarshal s = Marshal.from_string s 0 let native_test_tree_marshalled = marshal native_test_tree let marshal_wr_perf n = for i = 1 to n do ignore (marshal native_test_tree) done let marshal_rd_perf n = for i = 1 to n do ignore (unmarshal native_test_tree_marshalled) done let test_tree_binioued = string_of_tree test_tree let biniou_wr_perf n = for i = 1 to n do ignore (string_of_tree test_tree) done let biniou_rd_perf n = for i = 1 to n do ignore (tree_of_string test_tree_binioued) done let time name f x = let t1 = Unix.gettimeofday () in ignore (f x); let t2 = Unix.gettimeofday () in Printf.printf "%s: %.3f\n%!" name (t2 -. t1) let wr_perf () = let n = 1_000_000 in time "wr biniou" biniou_wr_perf n; time "wr marshal" marshal_wr_perf n let rd_perf () = let n = 1_000_000 in time "rd biniou" biniou_rd_perf n; time "rd marshal" marshal_rd_perf n let eq x y = Marshal.to_string x [] = Marshal.to_string y [] let test_channels x = let file = "test_channels.bin" in let oc = open_out_bin file in let ob = Bi_outbuf.create_channel_writer oc in write_tree ob x; Bi_outbuf.flush_channel_writer ob; close_out oc; let ic = open_in_bin file in let ib = Bi_inbuf.from_channel ic in let x' = read_tree ib in if not (eq x x') then ( printf "Error in writing or reading via channels:\n"; Bi_io.print_view (string_of_tree x'); print_newline (); ) let () = Bi_io.safety_test (); let s = string_of_tree test_tree in Bi_io.print_view s; print_newline (); let x = tree_of_string s in if s <> string_of_tree x then printf "Error in writing or reading\n%!"; test_channels x; let oc = open_out_bin "test.bin" in output_string oc s; close_out oc; wr_perf (); rd_perf (); assert (Bi_stream.test [5; 3; 8; 4]); assert (Bi_stream.test [])