zandoye-charinfo_width-6a2ed28ba68c/.hg_archival.txt0000644000000000000000000000017113401501066020621 0ustar 00000000000000repo: 36ea4d623720763d564a4f7da08fa26d13ae7108 node: 6a2ed28ba68cddab6927ac27a9b991f01ea85ec5 branch: default tag: 1.1.0 zandoye-charinfo_width-6a2ed28ba68c/.hgignore0000644000000000000000000000021013401501066017330 0ustar 00000000000000syntax: glob **/*.annot **/.\#* **/*~ **/Session.vim **/.*.swp **/*.o **/*.a **/*.so **/*.cm* _build .merlin charInfo_width.install zandoye-charinfo_width-6a2ed28ba68c/.hgtags0000644000000000000000000000013613401501066017012 0ustar 00000000000000d7f73843b16fe7b87f54498d58bb26bd7b82350e 0.1.0 8e2f23adc6b1d587f487d80b8a4a04ec0bf89e2e 1.0.0 zandoye-charinfo_width-6a2ed28ba68c/CHANGES.md0000644000000000000000000000057013401501066017130 0ustar 00000000000000## 1.1.0 (2018-12-04) * Loosen the restriction of parameter module of `CharInfo_width.String` functor. Only the `length` and `get` functions are required. ## 1.0.0 (2018-12-03) * remove the `width\_utext` function * add `CharInfo_width.String` functor to calculates all kind of strings compatible with `Camomile.UnicodeString.Type.t` ## 0.1.0 (2018-11-16) initial release zandoye-charinfo_width-6a2ed28ba68c/Makefile0000644000000000000000000000046413401501066017200 0ustar 00000000000000default: dune build install: dune install uninstall: dune uninstall doc: dune build @doc clean: dune clean runtest: dune runtest all-supported-ocaml-versions: dune build @install --workspace dune-workspace.4.02.dev --root . dune build @install @runtest --workspace dune-workspace.dev --root . zandoye-charinfo_width-6a2ed28ba68c/README.md0000644000000000000000000000305313401501066017014 0ustar 00000000000000# CharInfo\_width Determine column width for a character. # How to use `CharInfo_width.width c` returns the column width of `c` where `c` is of type `Camomile.UChar.t` and the value returned is of type `int`. This module is implemented purely in OCaml and the `width` function follows the prototype of POSIX's wcwidth. i.e. If `c` is a printable character, the value is at least 0. If `c` is null character (L'\0'), the value is 0. Otherwise, -1 is returned. The `width_exn` function, when encounter an unprintable character, it raises `Failure "unprintable character"` instead of returning -1. By default, the `width` and `width_exn` function is compatible with ncursesw, ncursesw based CLIs, terminals. The way they consider the width of a character is the same. An optional parameter, `cfg`, can extend extra width info. The current width info table of ncursesw, xterm, xterm-compatible terminal is inadequate and limited, so is the default cfg of this module. When implement raw mode command-line interface, e.g. readline, a text editor, better extend extra width info by `cfg`. An on going sample repository of width table is here: [charInfo\_width\_extra](https://bitbucket.org/zandoye/charinfo_width_extra) This module also provides a functor, `CharInfo_width.String`. This functor accepts a `Camomile.UnicodeString` compatible module to calculate the width of a unicode string. The returned value is either `Ok width` or `Error pos-of-unprintable-character`. # Document The document is available [here](https://zandoye.bitbucket.io/doc/_html/charInfo_width/). zandoye-charinfo_width-6a2ed28ba68c/VERSION0000644000000000000000000000000613401501066016600 0ustar 000000000000001.1.0 zandoye-charinfo_width-6a2ed28ba68c/charInfo_width.opam0000644000000000000000000000133713401501066021346 0ustar 00000000000000opam-version: "2.0" maintainer: "zandoye@gmail.com" authors: [ "ZAN DoYe" ] homepage: "https://bitbucket.org/zandoye/charinfo_width/" bug-reports: "https://bitbucket.org/zandoye/charinfo_width/issues" license: "MIT" dev-repo: "hg://https://bitbucket.org/zandoye/charinfo_width" build: [ ["dune" "build" "-p" name "-j" jobs] ["dune" "runtest" "-p" name "-j" jobs] {with-test & (switch > "4.03.0") & (switch < "999.0~")} ] depends: [ "ocaml" {>= "4.02.3"} "result" "camomile" {>= "1.0.0" & < "2.0~"} "dune" {build} "ppx_expect" {with-test} ] synopsis: "Determine column width for a character" description: """ This module is implemented purely in OCaml and the width function follows the prototype of POSIX's wcwidth.""" zandoye-charinfo_width-6a2ed28ba68c/dune-project0000644000000000000000000000002013401501066020046 0ustar 00000000000000(lang dune 1.4) zandoye-charinfo_width-6a2ed28ba68c/dune-workspace.4.02.dev0000644000000000000000000000010413401501066021540 0ustar 00000000000000(lang dune 1.4) (context (opam (switch 4.02.3))) (profile release) zandoye-charinfo_width-6a2ed28ba68c/dune-workspace.dev0000644000000000000000000000016413401501066021164 0ustar 00000000000000(lang dune 1.4) (context (opam (switch 4.04.2))) (context (opam (switch 4.06.1))) (context (opam (switch 4.07.1))) zandoye-charinfo_width-6a2ed28ba68c/src/cfg.ml0000644000000000000000000002727313401501066017427 0ustar 00000000000000open Result module MiniParsec = struct open Printf type pos= { cnum: int; line: int; bol: int; } type state= { data: string; maxlen: int; pos: pos } let initState data= { data; maxlen= String.length data; pos= { cnum= 0; line= 1; bol= 0; }; } type error= pos * string type 'a reply= (('a * state), error) result type 'a parser= state -> 'a reply type 'a t= 'a parser let string_of_pos pos= sprintf "line %d, characters %d" pos.line (pos.cnum - pos.bol) let string_of_pos_full pos= sprintf "offset %d, line %d, characters %d" pos.cnum pos.line (pos.cnum - pos.bol) (* parser generator *) let any= fun state-> let pos= state.pos in if pos.cnum < state.maxlen then let found= String.get state.data state.pos.cnum in let pos= { pos with cnum= pos.cnum + 1 } in (Ok (found, { state with pos })) else (Error (state.pos, "out of bounds")) let char c= fun state-> let pos= state.pos in if pos.cnum < state.maxlen then let found= String.get state.data pos.cnum in if found = c then let pos= { pos with cnum= pos.cnum + 1 } in (Ok (found, { state with pos })) else Error ( state.pos, sprintf "\"%c\" expected but \"%c\" found" c found) else (Error (state.pos, "out of bounds")) let string str= fun state-> let pos= state.pos in let len= String.length str in if state.maxlen - pos.cnum >= len then let found= String.sub state.data pos.cnum len in if found = str then let pos= { pos with cnum= pos.cnum + len } in (Ok (found, { state with pos })) else Error ( state.pos, sprintf "\"%s\" expected but \"%s\" found" str found) else (Error (state.pos, "out of bounds")) let satisfy test= fun state-> let pos= state.pos in if pos.cnum < state.maxlen then let found= String.get state.data pos.cnum in if test found then let pos= { pos with cnum= pos.cnum + 1 } in (Ok (found, { state with pos })) else Error ( state.pos, sprintf "\"%c\" isn't satisfied" found) else (Error (state.pos, "out of bounds")) (* combinator *) let fail msg= fun state-> Error (state.pos, msg) let return v= fun state-> Ok (v, state) let bind (p: 'a parser) (f: 'a -> 'b parser)= fun state-> let result= p state in match result with | Error e-> Error e | Ok (v,state)-> f v state let (>>=)= bind let (>>) p1 p2= p1 >>= fun _ -> p2 let (<<) p1 p2= p1 >>= fun x-> p2 >> return x let (|>>) p f= p >>= fun v-> return (f v) let (>>$) p v= p >> return v let (<|>) (p1:'a parser) (p2:'a parser)= fun state-> let result= p1 state in match result with | Error _-> p2 state | Ok _-> result let between left right p= left >> p << right let many p= let rec parser s= (((p |>> fun v-> Some v) <|> return None) >>= (function | Some v-> parser |>> (fun r-> v :: r) | None-> return [])) s in parser let many1 p= p >>= fun v-> many p |>> fun l-> v :: l let rec times num p s= if num > 0 then (p >>= (fun v-> times (num-1) p |>> (fun r-> v::r))) s else (return []) s let sepBy1 sep p= p >>= fun head-> many (sep >> p) >>= fun body-> return (head :: body) let sepBy sep p= sepBy1 sep p <|> return [] let sepEndBy sep p= many (p << sep) let sepEndBy1 sep p= many1 (p << sep) let opt default p= p <|> return default let option p= p |>> (fun v-> Some v) <|> return None let lookAhead p= fun state-> let reply= p state in match reply with | Ok (r, newState)-> Ok (r, state) | Error _-> reply [@@ocaml.warning "-27"] let followedBy p msg= fun state-> let reply= p state in match reply with | Ok _-> Ok ((), state) | Error _-> Error (state.pos, msg) let notFollowedBy p msg= fun state-> let reply= p state in match reply with | Ok _-> Error (state.pos, msg) | Error _-> Ok ((), state) (* parser *) let eof state= if state.pos.cnum >= state.maxlen then Ok ((), state) else Error (state.pos, "not eof") let newline_lf state= let pos= state.pos in if pos.cnum < state.maxlen then let found= String.get state.data pos.cnum in if found = '\n' then let cnum= pos.cnum + 1 and line= pos.line + 1 in let bol= cnum in let pos= { cnum; line; bol } in (Ok (String.make 1 found, { state with pos })) else Error ( state.pos, sprintf "newline-lf expected but \"%c\" found" found) else (Error (state.pos, "out of bounds")) let newline_cr state= let pos= state.pos in if pos.cnum < state.maxlen then let found= String.get state.data pos.cnum in if found = '\r' then let cnum= pos.cnum + 1 and line= pos.line + 1 in let bol= cnum in let pos= { cnum; line; bol } in (Ok (String.make 1 found, { state with pos })) else Error ( state.pos, sprintf "newline-cr expected but \"%c\" found" found) else (Error (state.pos, "out of bounds")) let newline_crlf state= let pos= state.pos in if pos.cnum + 2 <= state.maxlen then let found= String.sub state.data pos.cnum 2 in if found = "\r\n" then let cnum= pos.cnum + 1 and line= pos.line + 1 in let bol= cnum in let pos= { cnum; line; bol } in (Ok (found, { state with pos })) else Error ( state.pos, sprintf "newline-crlf expected but \"%s\" found" found) else (Error (state.pos, "out of bounds")) let newline_lfcr state= let pos= state.pos in if pos.cnum + 2 <= state.maxlen then let found= String.sub state.data pos.cnum 2 in if found = "\n\r" then let cnum= pos.cnum + 1 and line= pos.line + 1 in let bol= cnum in let pos= { cnum; line; bol } in (Ok (found, { state with pos })) else Error ( state.pos, sprintf "newline-lfcr expected but \"%s\" found" found) else (Error (state.pos, "out of bounds")) let newline= newline_crlf <|> newline_lfcr <|> newline_lf <|> newline_cr let int8= any |>> int_of_char let int16= any >>= fun l-> any |>> fun h-> int_of_char h lsl 8 + int_of_char l let int16_net= any >>= fun h-> any |>> fun l-> int_of_char h lsl 8 + int_of_char l let int32= int16 >>= fun l-> int16 |>> fun h-> Int32.(add (shift_left (of_int h) 16) (of_int l)) let int32_net= int16_net >>= fun h-> int16_net |>> fun l-> Int32.(add (shift_left (of_int h) 16) (of_int l)) let int64= int32 >>= fun l-> int32 |>> fun h-> Int64.(add (shift_left (of_int32 h) 32) (of_int32 l)) let int64_net= int32_net >>= fun h-> int32_net |>> fun l-> Int64.(add (shift_left (of_int32 h) 32) (of_int32 l)) let num_dec= satisfy (fun c-> '0' <= c && c <= '9') let num_bin= satisfy (fun c-> c = '0' || c = '1') let num_oct= satisfy (fun c-> '0' <= c && c <= '7') let num_hex= satisfy (fun c-> '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F') let lowercase= satisfy (fun c-> 'a' <= c && c <= 'z') let uppercase= satisfy (fun c-> 'A' <= c && c <= 'Z') (* start parsing *) let parse_string parser str= parser (initState str) end [@@ocaml.warning "-32-34"] module Parser = struct open MiniParsec let string_of_cl cl= String.concat "" (List.map (String.make 1) cl) (* OCaml comments. Nested comments are handled correctly. *) let rec p_comment state= (string "(*" >> many ((p_comment |>> String.concat "") <|> (((newline |>> fun _-> '\n') <|> char '*' << notFollowedBy (char ')') "" <|> satisfy ((<>) '*')) |>> String.make 1)) << string "*)") state let p_space= char ' ' <|> char '\t' <|> (newline |>> (fun _-> '\n')) <|> (p_comment |>> (fun _-> ' ')) let p_spaces= many p_space let p_spaces1= many1 p_space let p_ocaml_num_dec= many1 num_dec let p_ocaml_num_bin= (string "0b" <|> string "0B") >> many1 num_bin >>= fun v-> return ('0'::'b'::v) let p_ocaml_num_oct= (string "0o" <|> string "0O") >> many1 num_oct >>= fun v-> return ('0'::'o'::v) let p_ocaml_num_hex= (string "0x" <|> string "0X") >> many1 num_hex >>= fun v-> return ('0'::'x'::v) let p_ocaml_num= p_ocaml_num_bin <|> p_ocaml_num_oct <|> p_ocaml_num_hex <|> p_ocaml_num_dec let p_code= p_ocaml_num |>> string_of_cl |>> int_of_string let p_tuple= char '(' >> p_spaces >> p_code >>= fun start-> p_spaces >> char ',' >> p_spaces >> p_code >>= fun stop-> p_spaces >> char ')' >> return (start, stop) let p_tuples= let p_tuples_tl= many (p_spaces >> char ';' >> p_spaces >> p_tuple) << option (p_spaces >> char ';') in p_tuple >>= fun hd-> p_tuples_tl >>= fun tl-> return (hd::tl) let p_list= (p_spaces >> char '[' >> p_spaces) >> opt [] p_tuples << (p_spaces >> char ']' >> p_spaces1) let p_assgin name= p_spaces >> string "let" >> p_spaces >> string name >> p_spaces >> char '=' >> p_spaces >> p_list let p_unprintable= p_assgin "unprintable" |>> fun s-> `Unprintable s let p_combining= p_assgin "combining" |>> fun s-> `Combining s let p_w2= p_assgin "w2" |>> fun s-> `W2 s let p_w3= p_assgin "w3" |>> fun s-> `W3 s let p_w4= p_assgin "w4" |>> fun s-> `W4 s let p_w5= p_assgin "w5" |>> fun s-> `W5 s let p_w6= p_assgin "w6" |>> fun s-> `W6 s let p_set= p_unprintable <|> p_combining <|> p_w2 <|> p_w3 <|> p_w4 <|> p_w5 <|> p_w6 <|> (eof |>> fun _-> `Eof) let p_cfg= let unprintable= ref [] and combining= ref [] and w2= ref [] and w3= ref [] and w4= ref [] and w5= ref [] and w6= ref [] in let rec p_cfg_aux state= let result= p_set state in match result with | Error e-> Error e | Ok (`Eof, state)-> Ok ((), state) | Ok (`Unprintable set, state)-> unprintable:= set; p_cfg_aux state | Ok (`Combining set, state)-> combining:= set; p_cfg_aux state | Ok (`W2 set, state)-> w2:= set; p_cfg_aux state | Ok (`W3 set, state)-> w3:= set; p_cfg_aux state | Ok (`W4 set, state)-> w4:= set; p_cfg_aux state | Ok (`W5 set, state)-> w5:= set; p_cfg_aux state | Ok (`W6 set, state)-> w6:= set; p_cfg_aux state in p_cfg_aux |>> fun ()-> (!unprintable, !combining, !w2, !w3, !w4, !w5, !w6) end type widthTable= { unprintable: Codes.t; combining: Codes.t; w2: Codes.t; w3: Codes.t; w4: Codes.t; w5: Codes.t; w6: Codes.t; } type t= widthTable let load_from_string cfg= match MiniParsec.parse_string Parser.p_cfg cfg with | Ok ((unprintable, combining, w2, w3, w4, w5, w6), _)-> let unprintable= Codes.of_tuple_list unprintable and combining= Codes.of_tuple_list combining and w2= Codes.of_tuple_list w2 and w3= Codes.of_tuple_list w3 and w4= Codes.of_tuple_list w4 and w5= Codes.of_tuple_list w5 and w6= Codes.of_tuple_list w6 in Ok { unprintable; combining; w2; w3; w4; w5; w6 } | Error (pos, _)-> Error pos.cnum let load_from_path path= let ic= open_in path in let length= in_channel_length ic in let cfg= really_input_string ic length in load_from_string cfg let union cfg1 cfg2= let unprintable= Codes.union cfg1.unprintable cfg2.unprintable and combining= Codes.union cfg1.combining cfg2.combining and w2= Codes.union cfg1.w2 cfg2.w2 and w3= Codes.union cfg1.w3 cfg2.w3 and w4= Codes.union cfg1.w4 cfg2.w4 and w5= Codes.union cfg1.w5 cfg2.w5 and w6= Codes.union cfg1.w6 cfg2.w6 in { unprintable; combining; w2; w3; w4; w5; w6 } zandoye-charinfo_width-6a2ed28ba68c/src/cfg.mli0000644000000000000000000000255413401501066017573 0ustar 00000000000000(** {2 Cfg.widthTable} The configuration file contains width info that can be loaded by [load_from_path path_of_config_file]. The config syntax is a subset of OCaml. You can define several codes set in it: - unprintable - combining - w2 - w3 - w4 - w5 - w6 The type of there value is [(int * int) list]. We don't have to define all of the values and the sequence of definition doesn't matter. Here is a sample config file: {[ let unprintable= [ (888, 0x379)(* dec, hex *); (0b1110001011, 0o1613)(* bin, oct *) ] let w2= [(0x01c4, 0x01cc)] (* DŽ , nj *)(* (* nested comments *) *) ]} *) type widthTable = { unprintable : Codes.t; (** set contains unprintable characters *) combining : Codes.t; (** set contains combinging characters *) w2 : Codes.t; (** set contains characters of width 2 *) w3 : Codes.t; (** set contains characters of width 3 *) w4 : Codes.t; (** set contains characters of width 4 *) w5 : Codes.t; (** set contains characters of width 5 *) w6 : Codes.t; (** set contains characters of width 6 *) } type t = widthTable val load_from_string : string -> (t, int) Result.result (** [load_from_string str] parse configurations in string [str] *) val load_from_path : string -> (t, int) Result.result (** [load_from_path path] open and loads the config file from [path] *) val union : t -> t -> t (** [widthTable union] *) zandoye-charinfo_width-6a2ed28ba68c/src/charInfo_width.ml0000644000000000000000000000302213401501066021602 0ustar 00000000000000open CamomileLibraryDefault.Camomile open Result module Cfg = Cfg let width ?(cfg: Cfg.t option= None) uchar= let ucs= UChar.int_of uchar in if ucs >= 0x20 && ucs < 0x7f then 1 (* ascii printing char *) else if ucs = 0 then 0 else if ucs < 0x20 || ucs >= 0x7f && ucs < 0xa0 then -1 (* control characters *) else if Combining.(Codes.mem uchar set) then 0 else if Fullwidth.is_fullwidth ucs then 2 else match cfg with | Some widthTable-> if Codes.mem uchar widthTable.unprintable then -1 else if Codes.mem uchar widthTable.combining then 0 else if Codes.mem uchar widthTable.w2 then 2 else if Codes.mem uchar widthTable.w3 then 3 else if Codes.mem uchar widthTable.w4 then 4 else if Codes.mem uchar widthTable.w5 then 5 else if Codes.mem uchar widthTable.w6 then 6 else 1 | None-> 1 let width_exn ?(cfg: Cfg.t option= None) uchar= let w= width ~cfg uchar in if w = -1 then raise (Failure "unprintable character") else w module type UnicodeString_mini = sig type t val get : t -> int -> UChar.t val length : t -> int end module String(US:UnicodeString_mini) = struct let width ?(cfg: Cfg.t option= None) (us: US.t)= let length= US.length us in let rec aux ws i= if i < length then let wc= width ~cfg (US.get us i) in if wc = -1 then Error i else aux (ws+wc) (i+1) else Ok ws in aux 0 0 end zandoye-charinfo_width-6a2ed28ba68c/src/charInfo_width.mli0000644000000000000000000000230713401501066021760 0ustar 00000000000000open CamomileLibraryDefault.Camomile open Result module Cfg = Cfg val width: ?cfg: Cfg.t option -> UChar.t -> int (** [width c] returns the column width of [c] where [c] is of type [Camomile.UChar.t] and the value returned is of type [int]. This module is implemented purely in OCaml and follows the prototype of POSIX's wcwidth. i.e. If [c] is a printable character, the value is at least 0. If [c] is null character (L'\0'), the value is 0. Otherwise, -1 is returned. *) val width_exn: ?cfg: Cfg.t option -> UChar.t -> int (** when encounter an unprintable character, [width_exn c] raises [Failure "unprintable character"] instead of returning -1. *) module type UnicodeString_mini = sig type t val get : t -> int -> UChar.t val length : t -> int end (** minimization of Camomile.UnicodeString.Type *) module String : functor (US : UnicodeString_mini) -> sig val width : ?cfg:Cfg.widthTable option -> US.t -> (int, int) result (** [width str] returns the column width of [str] and the value returned is of type [(int, int) result]. When [Ok width] returnted, [width] is the width of [str]. When [Error pos] returned, [pos] is the offset of the left most unprintable character in [str]. *) end zandoye-charinfo_width-6a2ed28ba68c/src/codes.ml0000644000000000000000000000053213401501066017752 0ustar 00000000000000open CamomileLibraryDefault.Camomile include USet let add_ranges l s= List.fold_left (fun s (start, stop)-> add_range start stop s) s l let tuple_to_range (start, stop)= let start= UChar.of_int start and stop= UChar.of_int stop in start, stop let of_tuple_list l= let ranges= List.map tuple_to_range l in add_ranges ranges empty zandoye-charinfo_width-6a2ed28ba68c/src/codes.mli0000644000000000000000000000454713401501066020135 0ustar 00000000000000(** [Codes] expands some functions based on CamomileLibrary.USet, a module implements Sets of Unicode characters, implemented as sets of intervals. *) (** [USet.t] *) type t (** {3 extended functions} *) (** based on add_range, [add_ranges l s] add a list of ranges [l] to [s]. *) val add_ranges : (CamomileLibrary.UChar.t * CamomileLibrary.UChar.t) list -> t -> t (** [tuple_to_range tuple] convert [tuple] to a UChar range *) val tuple_to_range : int * int -> CamomileLibrary.UChar.t * CamomileLibrary.UChar.t (** [of_tuple_list l] convert int tuple list [l] to a [USet.t] *) val of_tuple_list : (int * int) list -> t (** {3 Below are type signatures of the original [CamomileLibrary.USet] module} *) val empty : t val is_empty : t -> bool val mem : CamomileLibrary.UChar.t -> t -> bool val add : CamomileLibrary.UChar.t -> t -> t val add_range : CamomileLibrary.UChar.t -> CamomileLibrary.UChar.t -> t -> t val singleton : CamomileLibrary.UChar.t -> t val remove : CamomileLibrary.UChar.t -> t -> t val remove_range : CamomileLibrary.UChar.t -> CamomileLibrary.UChar.t -> t -> t val union : t -> t -> t val inter : t -> t -> t val diff : t -> t -> t val compl : t -> t val compare : t -> t -> int val equal : t -> t -> bool val subset : t -> t -> bool val from : CamomileLibrary.UChar.t -> t -> t val after : CamomileLibrary.UChar.t -> t -> t val until : CamomileLibrary.UChar.t -> t -> t val before : CamomileLibrary.UChar.t -> t -> t val iter : (CamomileLibrary.UChar.t -> unit) -> t -> unit val iter_range : (CamomileLibrary.UChar.t -> CamomileLibrary.UChar.t -> unit) -> t -> unit val fold : (CamomileLibrary.UChar.t -> 'a -> 'a) -> t -> 'a -> 'a val fold_range : (CamomileLibrary.UChar.t -> CamomileLibrary.UChar.t -> 'a -> 'a) -> t -> 'a -> 'a val for_all : (CamomileLibrary.UChar.t -> bool) -> t -> bool val exists : (CamomileLibrary.UChar.t -> bool) -> t -> bool val filter : (CamomileLibrary.UChar.t -> bool) -> t -> t val partition : (CamomileLibrary.UChar.t -> bool) -> t -> t * t val cardinal : t -> int val elements : t -> CamomileLibrary.UChar.t list val ranges : t -> (CamomileLibrary.UChar.t * CamomileLibrary.UChar.t) list val min_elt : t -> CamomileLibrary.UChar.t val max_elt : t -> CamomileLibrary.UChar.t val choose : t -> CamomileLibrary.UChar.t val uset_of_iset : CamomileLibrary.Private.ISet.t -> t val iset_of_uset : t -> CamomileLibrary.Private.ISet.t zandoye-charinfo_width-6a2ed28ba68c/src/combining.ml0000644000000000000000000000526613401501066020633 0ustar 00000000000000let set= Codes.of_tuple_list [ (0x0300, 0x036F); (0x0483, 0x0486); (0x0488, 0x0489); (0x0591, 0x05BD); (0x05BF, 0x05BF); (0x05C1, 0x05C2); (0x05C4, 0x05C5); (0x05C7, 0x05C7); (0x0600, 0x0603); (0x0610, 0x0615); (0x064B, 0x065E); (0x0670, 0x0670); (0x06D6, 0x06E4); (0x06E7, 0x06E8); (0x06EA, 0x06ED); (0x070F, 0x070F); (0x0711, 0x0711); (0x0730, 0x074A); (0x07A6, 0x07B0); (0x07EB, 0x07F3); (0x0901, 0x0902); (0x093C, 0x093C); (0x0941, 0x0948); (0x094D, 0x094D); (0x0951, 0x0954); (0x0962, 0x0963); (0x0981, 0x0981); (0x09BC, 0x09BC); (0x09C1, 0x09C4); (0x09CD, 0x09CD); (0x09E2, 0x09E3); (0x0A01, 0x0A02); (0x0A3C, 0x0A3C); (0x0A41, 0x0A42); (0x0A47, 0x0A48); (0x0A4B, 0x0A4D); (0x0A70, 0x0A71); (0x0A81, 0x0A82); (0x0ABC, 0x0ABC); (0x0AC1, 0x0AC5); (0x0AC7, 0x0AC8); (0x0ACD, 0x0ACD); (0x0AE2, 0x0AE3); (0x0B01, 0x0B01); (0x0B3C, 0x0B3C); (0x0B3F, 0x0B3F); (0x0B41, 0x0B43); (0x0B4D, 0x0B4D); (0x0B56, 0x0B56); (0x0B82, 0x0B82); (0x0BC0, 0x0BC0); (0x0BCD, 0x0BCD); (0x0C3E, 0x0C40); (0x0C46, 0x0C48); (0x0C4A, 0x0C4D); (0x0C55, 0x0C56); (0x0CBC, 0x0CBC); (0x0CBF, 0x0CBF); (0x0CC6, 0x0CC6); (0x0CCC, 0x0CCD); (0x0CE2, 0x0CE3); (0x0D41, 0x0D43); (0x0D4D, 0x0D4D); (0x0DCA, 0x0DCA); (0x0DD2, 0x0DD4); (0x0DD6, 0x0DD6); (0x0E31, 0x0E31); (0x0E34, 0x0E3A); (0x0E47, 0x0E4E); (0x0EB1, 0x0EB1); (0x0EB4, 0x0EB9); (0x0EBB, 0x0EBC); (0x0EC8, 0x0ECD); (0x0F18, 0x0F19); (0x0F35, 0x0F35); (0x0F37, 0x0F37); (0x0F39, 0x0F39); (0x0F71, 0x0F7E); (0x0F80, 0x0F84); (0x0F86, 0x0F87); (0x0F90, 0x0F97); (0x0F99, 0x0FBC); (0x0FC6, 0x0FC6); (0x102D, 0x1030); (0x1032, 0x1032); (0x1036, 0x1037); (0x1039, 0x1039); (0x1058, 0x1059); (0x1160, 0x11FF); (0x135F, 0x135F); (0x1712, 0x1714); (0x1732, 0x1734); (0x1752, 0x1753); (0x1772, 0x1773); (0x17B4, 0x17B5); (0x17B7, 0x17BD); (0x17C6, 0x17C6); (0x17C9, 0x17D3); (0x17DD, 0x17DD); (0x180B, 0x180D); (0x18A9, 0x18A9); (0x1920, 0x1922); (0x1927, 0x1928); (0x1932, 0x1932); (0x1939, 0x193B); (0x1A17, 0x1A18); (0x1B00, 0x1B03); (0x1B34, 0x1B34); (0x1B36, 0x1B3A); (0x1B3C, 0x1B3C); (0x1B42, 0x1B42); (0x1B6B, 0x1B73); (0x1DC0, 0x1DCA); (0x1DFE, 0x1DFF); (0x200B, 0x200F); (0x202A, 0x202E); (0x2060, 0x2063); (0x206A, 0x206F); (0x20D0, 0x20EF); (0x302A, 0x302F); (0x3099, 0x309A); (0xA806, 0xA806); (0xA80B, 0xA80B); (0xA825, 0xA826); (0xFB1E, 0xFB1E); (0xFE00, 0xFE0F); (* Variation Selectors *) (0xFE20, 0xFE23); (0xFEFF, 0xFEFF); (0xFFF9, 0xFFFB); (0x10A01, 0x10A03); (0x10A05, 0x10A06); (0x10A0C, 0x10A0F); (0x10A38, 0x10A3A); (0x10A3F, 0x10A3F); (0x1D167, 0x1D169); (0x1D173, 0x1D182); (0x1D185, 0x1D18B); (0x1D1AA, 0x1D1AD); (0x1D242, 0x1D244); (0xE0001, 0xE0001); (0xE0020, 0xE007F); (0xE0100, 0xE01EF); ] zandoye-charinfo_width-6a2ed28ba68c/src/dune0000644000000000000000000000020213401501066017173 0ustar 00000000000000(library (name charInfo_width) (public_name charInfo_width) (libraries result camomile) (flags (:standard -safe-string))) zandoye-charinfo_width-6a2ed28ba68c/src/fullwidth.ml0000644000000000000000000000120413401501066020654 0ustar 00000000000000let is_fullwidth ucs= (ucs >= 0x1100) && ( ucs <= 0x115f (* Hangul Jamo init. consonants *) || ucs == 0x2329 || ucs == 0x232a || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) (* CJK ... Yi *) || (ucs >= 0xac00 && ucs <= 0xd7a3) (* Hangul Syllables *) || (ucs >= 0xf900 && ucs <= 0xfaff) (* CJK Compatibility Ideographs *) || (ucs >= 0xfe10 && ucs <= 0xfe19) (* Vertical forms *) || (ucs >= 0xfe30 && ucs <= 0xfe6f) (* CJK Compatibility Forms *) || (ucs >= 0xff00 && ucs <= 0xff60) (* Fullwidth Forms *) || (ucs >= 0xffe0 && ucs <= 0xffe6) || (ucs >= 0x20000 && ucs <= 0x2fffd) || (ucs >= 0x30000 && ucs <= 0x3fffd)) zandoye-charinfo_width-6a2ed28ba68c/test/dune0000644000000000000000000000021113401501066017363 0ustar 00000000000000(library (name test) (flags (:standard -safe-string)) (libraries charInfo_width) (inline_tests) (preprocess (pps ppx_expect))) zandoye-charinfo_width-6a2ed28ba68c/test/test.ml0000644000000000000000000000101413401501066020020 0ustar 00000000000000open CamomileLibraryDefault open Result open Printf let msg= "a͜b͡c字符宽度" let%expect_test "width"= let length= Camomile.UTF8.length msg in for i= 0 to length - 1 do let c= Camomile.UTF8.get msg i in let len= CharInfo_width.width c in printf " %d" len done; [%expect "1 0 1 0 1 2 2 2 2"] module UTF8 = CharInfo_width.String(Camomile.UTF8) let%test "string: width"= UTF8.width msg = Ok (1 + 0 + 1 + 0 + 1 + 2 + 2 + 2 + 2) let%test "string: error position"= UTF8.width "ab\ncd" = Error 2