pax_global_header00006660000000000000000000000064144643651220014521gustar00rootroot0000000000000052 comment=be395792a79ec99a30cd4858e5b3f2a561d1b4f3 charInfo_width-2.0.0/000077500000000000000000000000001446436512200144505ustar00rootroot00000000000000charInfo_width-2.0.0/.gitignore000066400000000000000000000001731446436512200164410ustar00rootroot00000000000000**/*.annot **/.\#* **/*~ **/Session.vim **/.*.swp **/*.o **/*.a **/*.so **/*.cm* /_build .merlin charInfo_width.install charInfo_width-2.0.0/CHANGES.md000066400000000000000000000007161446436512200160460ustar00rootroot00000000000000## 2.0.0 (2023-08-08) * compatible with camomile 2.0 (#1, @dmbaturin Daniil Baturin) ## 1.1.0 (2018-12-04) * Loosen the restriction of parameter module of `CharInfo_width.String` functor. Only the `length` and `get` functions are required. ## 1.0.0 (2018-12-03) * remove the `width\_utext` function * add `CharInfo_width.String` functor to calculates all kind of strings compatible with `Camomile.UnicodeString.Type.t` ## 0.1.0 (2018-11-16) initial release charInfo_width-2.0.0/LICENSE000066400000000000000000000021031446436512200154510ustar00rootroot00000000000000The MIT License Copyright (c) 2019, ZAN DoYe Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. charInfo_width-2.0.0/Makefile000066400000000000000000000003621446436512200161110ustar00rootroot00000000000000default: dune build install: dune install uninstall: dune uninstall doc: dune build @doc clean: dune clean runtest: dune runtest all-supported-ocaml-versions: dune build @install @runtest --workspace dune-workspace.dev --root . charInfo_width-2.0.0/README.md000066400000000000000000000030531446436512200157300ustar00rootroot00000000000000# CharInfo\_width Determine column width for a character. # How to use `CharInfo_width.width c` returns the column width of `c` where `c` is of type `Camomile.UChar.t` and the value returned is of type `int`. This module is implemented purely in OCaml and the `width` function follows the prototype of POSIX's wcwidth. i.e. If `c` is a printable character, the value is at least 0. If `c` is null character (L'\0'), the value is 0. Otherwise, -1 is returned. The `width_exn` function, when encounter an unprintable character, it raises `Failure "unprintable character"` instead of returning -1. By default, the `width` and `width_exn` function is compatible with ncursesw, ncursesw based CLIs, terminals. The way they consider the width of a character is the same. An optional parameter, `cfg`, can extend extra width info. The current width info table of ncursesw, xterm, xterm-compatible terminal is inadequate and limited, so is the default cfg of this module. When implement raw mode command-line interface, e.g. readline, a text editor, better extend extra width info by `cfg`. An on going sample repository of width table is here: [charInfo\_width\_extra](https://bitbucket.org/zandoye/charinfo_width_extra) This module also provides a functor, `CharInfo_width.String`. This functor accepts a `Camomile.UnicodeString` compatible module to calculate the width of a unicode string. The returned value is either `Ok width` or `Error pos-of-unprintable-character`. # Document The document is available [here](https://zandoye.bitbucket.io/doc/_html/charInfo_width/). charInfo_width-2.0.0/VERSION000066400000000000000000000000061446436512200155140ustar00rootroot000000000000002.0.0 charInfo_width-2.0.0/charInfo_width.opam000066400000000000000000000013211446436512200202530ustar00rootroot00000000000000opam-version: "2.0" maintainer: "zandoye@gmail.com" authors: [ "ZAN DoYe" ] homepage: "https://bitbucket.org/zandoye/charinfo_width/" bug-reports: "https://bitbucket.org/zandoye/charinfo_width/issues" license: "MIT" dev-repo: "hg://https://bitbucket.org/zandoye/charinfo_width" build: [ ["dune" "build" "-p" name "-j" jobs] ["dune" "runtest" "-p" name "-j" jobs] {with-test & (ocaml:version >= "4.04.0")} ] depends: [ "ocaml" {>= "4.02.3"} "result" "camomile" {>= "1.0.0" & < "2.0~"} "dune" {build} "ppx_expect" {with-test} ] synopsis: "Determine column width for a character" description: """ This module is implemented purely in OCaml and the width function follows the prototype of POSIX's wcwidth.""" charInfo_width-2.0.0/dune-project000066400000000000000000000000201446436512200167620ustar00rootroot00000000000000(lang dune 1.4) charInfo_width-2.0.0/dune-workspace.dev000066400000000000000000000000431446436512200200740ustar00rootroot00000000000000(lang dune 1.4) (profile release) charInfo_width-2.0.0/src/000077500000000000000000000000001446436512200152375ustar00rootroot00000000000000charInfo_width-2.0.0/src/cfg.ml000066400000000000000000000272731446436512200163430ustar00rootroot00000000000000open Result module MiniParsec = struct open Printf type pos= { cnum: int; line: int; bol: int; } type state= { data: string; maxlen: int; pos: pos } let initState data= { data; maxlen= String.length data; pos= { cnum= 0; line= 1; bol= 0; }; } type error= pos * string type 'a reply= (('a * state), error) result type 'a parser= state -> 'a reply type 'a t= 'a parser let string_of_pos pos= sprintf "line %d, characters %d" pos.line (pos.cnum - pos.bol) let string_of_pos_full pos= sprintf "offset %d, line %d, characters %d" pos.cnum pos.line (pos.cnum - pos.bol) (* parser generator *) let any= fun state-> let pos= state.pos in if pos.cnum < state.maxlen then let found= String.get state.data state.pos.cnum in let pos= { pos with cnum= pos.cnum + 1 } in (Ok (found, { state with pos })) else (Error (state.pos, "out of bounds")) let char c= fun state-> let pos= state.pos in if pos.cnum < state.maxlen then let found= String.get state.data pos.cnum in if found = c then let pos= { pos with cnum= pos.cnum + 1 } in (Ok (found, { state with pos })) else Error ( state.pos, sprintf "\"%c\" expected but \"%c\" found" c found) else (Error (state.pos, "out of bounds")) let string str= fun state-> let pos= state.pos in let len= String.length str in if state.maxlen - pos.cnum >= len then let found= String.sub state.data pos.cnum len in if found = str then let pos= { pos with cnum= pos.cnum + len } in (Ok (found, { state with pos })) else Error ( state.pos, sprintf "\"%s\" expected but \"%s\" found" str found) else (Error (state.pos, "out of bounds")) let satisfy test= fun state-> let pos= state.pos in if pos.cnum < state.maxlen then let found= String.get state.data pos.cnum in if test found then let pos= { pos with cnum= pos.cnum + 1 } in (Ok (found, { state with pos })) else Error ( state.pos, sprintf "\"%c\" isn't satisfied" found) else (Error (state.pos, "out of bounds")) (* combinator *) let fail msg= fun state-> Error (state.pos, msg) let return v= fun state-> Ok (v, state) let bind (p: 'a parser) (f: 'a -> 'b parser)= fun state-> let result= p state in match result with | Error e-> Error e | Ok (v,state)-> f v state let (>>=)= bind let (>>) p1 p2= p1 >>= fun _ -> p2 let (<<) p1 p2= p1 >>= fun x-> p2 >> return x let (|>>) p f= p >>= fun v-> return (f v) let (>>$) p v= p >> return v let (<|>) (p1:'a parser) (p2:'a parser)= fun state-> let result= p1 state in match result with | Error _-> p2 state | Ok _-> result let between left right p= left >> p << right let many p= let rec parser s= (((p |>> fun v-> Some v) <|> return None) >>= (function | Some v-> parser |>> (fun r-> v :: r) | None-> return [])) s in parser let many1 p= p >>= fun v-> many p |>> fun l-> v :: l let rec times num p s= if num > 0 then (p >>= (fun v-> times (num-1) p |>> (fun r-> v::r))) s else (return []) s let sepBy1 sep p= p >>= fun head-> many (sep >> p) >>= fun body-> return (head :: body) let sepBy sep p= sepBy1 sep p <|> return [] let sepEndBy sep p= many (p << sep) let sepEndBy1 sep p= many1 (p << sep) let opt default p= p <|> return default let option p= p |>> (fun v-> Some v) <|> return None let lookAhead p= fun state-> let reply= p state in match reply with | Ok (r, newState)-> Ok (r, state) | Error _-> reply [@@ocaml.warning "-27"] let followedBy p msg= fun state-> let reply= p state in match reply with | Ok _-> Ok ((), state) | Error _-> Error (state.pos, msg) let notFollowedBy p msg= fun state-> let reply= p state in match reply with | Ok _-> Error (state.pos, msg) | Error _-> Ok ((), state) (* parser *) let eof state= if state.pos.cnum >= state.maxlen then Ok ((), state) else Error (state.pos, "not eof") let newline_lf state= let pos= state.pos in if pos.cnum < state.maxlen then let found= String.get state.data pos.cnum in if found = '\n' then let cnum= pos.cnum + 1 and line= pos.line + 1 in let bol= cnum in let pos= { cnum; line; bol } in (Ok (String.make 1 found, { state with pos })) else Error ( state.pos, sprintf "newline-lf expected but \"%c\" found" found) else (Error (state.pos, "out of bounds")) let newline_cr state= let pos= state.pos in if pos.cnum < state.maxlen then let found= String.get state.data pos.cnum in if found = '\r' then let cnum= pos.cnum + 1 and line= pos.line + 1 in let bol= cnum in let pos= { cnum; line; bol } in (Ok (String.make 1 found, { state with pos })) else Error ( state.pos, sprintf "newline-cr expected but \"%c\" found" found) else (Error (state.pos, "out of bounds")) let newline_crlf state= let pos= state.pos in if pos.cnum + 2 <= state.maxlen then let found= String.sub state.data pos.cnum 2 in if found = "\r\n" then let cnum= pos.cnum + 1 and line= pos.line + 1 in let bol= cnum in let pos= { cnum; line; bol } in (Ok (found, { state with pos })) else Error ( state.pos, sprintf "newline-crlf expected but \"%s\" found" found) else (Error (state.pos, "out of bounds")) let newline_lfcr state= let pos= state.pos in if pos.cnum + 2 <= state.maxlen then let found= String.sub state.data pos.cnum 2 in if found = "\n\r" then let cnum= pos.cnum + 1 and line= pos.line + 1 in let bol= cnum in let pos= { cnum; line; bol } in (Ok (found, { state with pos })) else Error ( state.pos, sprintf "newline-lfcr expected but \"%s\" found" found) else (Error (state.pos, "out of bounds")) let newline= newline_crlf <|> newline_lfcr <|> newline_lf <|> newline_cr let int8= any |>> int_of_char let int16= any >>= fun l-> any |>> fun h-> int_of_char h lsl 8 + int_of_char l let int16_net= any >>= fun h-> any |>> fun l-> int_of_char h lsl 8 + int_of_char l let int32= int16 >>= fun l-> int16 |>> fun h-> Int32.(add (shift_left (of_int h) 16) (of_int l)) let int32_net= int16_net >>= fun h-> int16_net |>> fun l-> Int32.(add (shift_left (of_int h) 16) (of_int l)) let int64= int32 >>= fun l-> int32 |>> fun h-> Int64.(add (shift_left (of_int32 h) 32) (of_int32 l)) let int64_net= int32_net >>= fun h-> int32_net |>> fun l-> Int64.(add (shift_left (of_int32 h) 32) (of_int32 l)) let num_dec= satisfy (fun c-> '0' <= c && c <= '9') let num_bin= satisfy (fun c-> c = '0' || c = '1') let num_oct= satisfy (fun c-> '0' <= c && c <= '7') let num_hex= satisfy (fun c-> '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F') let lowercase= satisfy (fun c-> 'a' <= c && c <= 'z') let uppercase= satisfy (fun c-> 'A' <= c && c <= 'Z') (* start parsing *) let parse_string parser str= parser (initState str) end [@@ocaml.warning "-32-34"] module Parser = struct open MiniParsec let string_of_cl cl= String.concat "" (List.map (String.make 1) cl) (* OCaml comments. Nested comments are handled correctly. *) let rec p_comment state= (string "(*" >> many ((p_comment |>> String.concat "") <|> (((newline |>> fun _-> '\n') <|> char '*' << notFollowedBy (char ')') "" <|> satisfy ((<>) '*')) |>> String.make 1)) << string "*)") state let p_space= char ' ' <|> char '\t' <|> (newline |>> (fun _-> '\n')) <|> (p_comment |>> (fun _-> ' ')) let p_spaces= many p_space let p_spaces1= many1 p_space let p_ocaml_num_dec= many1 num_dec let p_ocaml_num_bin= (string "0b" <|> string "0B") >> many1 num_bin >>= fun v-> return ('0'::'b'::v) let p_ocaml_num_oct= (string "0o" <|> string "0O") >> many1 num_oct >>= fun v-> return ('0'::'o'::v) let p_ocaml_num_hex= (string "0x" <|> string "0X") >> many1 num_hex >>= fun v-> return ('0'::'x'::v) let p_ocaml_num= p_ocaml_num_bin <|> p_ocaml_num_oct <|> p_ocaml_num_hex <|> p_ocaml_num_dec let p_code= p_ocaml_num |>> string_of_cl |>> int_of_string let p_tuple= char '(' >> p_spaces >> p_code >>= fun start-> p_spaces >> char ',' >> p_spaces >> p_code >>= fun stop-> p_spaces >> char ')' >> return (start, stop) let p_tuples= let p_tuples_tl= many (p_spaces >> char ';' >> p_spaces >> p_tuple) << option (p_spaces >> char ';') in p_tuple >>= fun hd-> p_tuples_tl >>= fun tl-> return (hd::tl) let p_list= (p_spaces >> char '[' >> p_spaces) >> opt [] p_tuples << (p_spaces >> char ']' >> p_spaces1) let p_assgin name= p_spaces >> string "let" >> p_spaces >> string name >> p_spaces >> char '=' >> p_spaces >> p_list let p_unprintable= p_assgin "unprintable" |>> fun s-> `Unprintable s let p_combining= p_assgin "combining" |>> fun s-> `Combining s let p_w2= p_assgin "w2" |>> fun s-> `W2 s let p_w3= p_assgin "w3" |>> fun s-> `W3 s let p_w4= p_assgin "w4" |>> fun s-> `W4 s let p_w5= p_assgin "w5" |>> fun s-> `W5 s let p_w6= p_assgin "w6" |>> fun s-> `W6 s let p_set= p_unprintable <|> p_combining <|> p_w2 <|> p_w3 <|> p_w4 <|> p_w5 <|> p_w6 <|> (eof |>> fun _-> `Eof) let p_cfg= let unprintable= ref [] and combining= ref [] and w2= ref [] and w3= ref [] and w4= ref [] and w5= ref [] and w6= ref [] in let rec p_cfg_aux state= let result= p_set state in match result with | Error e-> Error e | Ok (`Eof, state)-> Ok ((), state) | Ok (`Unprintable set, state)-> unprintable:= set; p_cfg_aux state | Ok (`Combining set, state)-> combining:= set; p_cfg_aux state | Ok (`W2 set, state)-> w2:= set; p_cfg_aux state | Ok (`W3 set, state)-> w3:= set; p_cfg_aux state | Ok (`W4 set, state)-> w4:= set; p_cfg_aux state | Ok (`W5 set, state)-> w5:= set; p_cfg_aux state | Ok (`W6 set, state)-> w6:= set; p_cfg_aux state in p_cfg_aux |>> fun ()-> (!unprintable, !combining, !w2, !w3, !w4, !w5, !w6) end type widthTable= { unprintable: Codes.t; combining: Codes.t; w2: Codes.t; w3: Codes.t; w4: Codes.t; w5: Codes.t; w6: Codes.t; } type t= widthTable let load_from_string cfg= match MiniParsec.parse_string Parser.p_cfg cfg with | Ok ((unprintable, combining, w2, w3, w4, w5, w6), _)-> let unprintable= Codes.of_tuple_list unprintable and combining= Codes.of_tuple_list combining and w2= Codes.of_tuple_list w2 and w3= Codes.of_tuple_list w3 and w4= Codes.of_tuple_list w4 and w5= Codes.of_tuple_list w5 and w6= Codes.of_tuple_list w6 in Ok { unprintable; combining; w2; w3; w4; w5; w6 } | Error (pos, _)-> Error pos.cnum let load_from_path path= let ic= open_in path in let length= in_channel_length ic in let cfg= really_input_string ic length in load_from_string cfg let union cfg1 cfg2= let unprintable= Codes.union cfg1.unprintable cfg2.unprintable and combining= Codes.union cfg1.combining cfg2.combining and w2= Codes.union cfg1.w2 cfg2.w2 and w3= Codes.union cfg1.w3 cfg2.w3 and w4= Codes.union cfg1.w4 cfg2.w4 and w5= Codes.union cfg1.w5 cfg2.w5 and w6= Codes.union cfg1.w6 cfg2.w6 in { unprintable; combining; w2; w3; w4; w5; w6 } charInfo_width-2.0.0/src/cfg.mli000066400000000000000000000025541446436512200165070ustar00rootroot00000000000000(** {2 Cfg.widthTable} The configuration file contains width info that can be loaded by [load_from_path path_of_config_file]. The config syntax is a subset of OCaml. You can define several codes set in it: - unprintable - combining - w2 - w3 - w4 - w5 - w6 The type of there value is [(int * int) list]. We don't have to define all of the values and the sequence of definition doesn't matter. Here is a sample config file: {[ let unprintable= [ (888, 0x379)(* dec, hex *); (0b1110001011, 0o1613)(* bin, oct *) ] let w2= [(0x01c4, 0x01cc)] (* DŽ , nj *)(* (* nested comments *) *) ]} *) type widthTable = { unprintable : Codes.t; (** set contains unprintable characters *) combining : Codes.t; (** set contains combinging characters *) w2 : Codes.t; (** set contains characters of width 2 *) w3 : Codes.t; (** set contains characters of width 3 *) w4 : Codes.t; (** set contains characters of width 4 *) w5 : Codes.t; (** set contains characters of width 5 *) w6 : Codes.t; (** set contains characters of width 6 *) } type t = widthTable val load_from_string : string -> (t, int) Result.result (** [load_from_string str] parse configurations in string [str] *) val load_from_path : string -> (t, int) Result.result (** [load_from_path path] open and loads the config file from [path] *) val union : t -> t -> t (** [widthTable union] *) charInfo_width-2.0.0/src/charInfo_width.ml000066400000000000000000000027731446436512200205320ustar00rootroot00000000000000open Camomile open Result module Cfg = Cfg let width ?(cfg: Cfg.t option= None) uchar= let ucs= UChar.int_of uchar in if ucs >= 0x20 && ucs < 0x7f then 1 (* ascii printing char *) else if ucs = 0 then 0 else if ucs < 0x20 || ucs >= 0x7f && ucs < 0xa0 then -1 (* control characters *) else if Combining.(Codes.mem uchar set) then 0 else if Fullwidth.is_fullwidth ucs then 2 else match cfg with | Some widthTable-> if Codes.mem uchar widthTable.unprintable then -1 else if Codes.mem uchar widthTable.combining then 0 else if Codes.mem uchar widthTable.w2 then 2 else if Codes.mem uchar widthTable.w3 then 3 else if Codes.mem uchar widthTable.w4 then 4 else if Codes.mem uchar widthTable.w5 then 5 else if Codes.mem uchar widthTable.w6 then 6 else 1 | None-> 1 let width_exn ?(cfg: Cfg.t option= None) uchar= let w= width ~cfg uchar in if w = -1 then raise (Failure "unprintable character") else w module type UnicodeString_mini = sig type t val get : t -> int -> UChar.t val length : t -> int end module String(US:UnicodeString_mini) = struct let width ?(cfg: Cfg.t option= None) (us: US.t)= let length= US.length us in let rec aux ws i= if i < length then let wc= width ~cfg (US.get us i) in if wc = -1 then Error i else aux (ws+wc) (i+1) else Ok ws in aux 0 0 end charInfo_width-2.0.0/src/charInfo_width.mli000066400000000000000000000022601446436512200206720ustar00rootroot00000000000000open Camomile open Result module Cfg = Cfg val width: ?cfg: Cfg.t option -> UChar.t -> int (** [width c] returns the column width of [c] where [c] is of type [Camomile.UChar.t] and the value returned is of type [int]. This module is implemented purely in OCaml and follows the prototype of POSIX's wcwidth. i.e. If [c] is a printable character, the value is at least 0. If [c] is null character (L'\0'), the value is 0. Otherwise, -1 is returned. *) val width_exn: ?cfg: Cfg.t option -> UChar.t -> int (** when encounter an unprintable character, [width_exn c] raises [Failure "unprintable character"] instead of returning -1. *) module type UnicodeString_mini = sig type t val get : t -> int -> UChar.t val length : t -> int end (** minimization of Camomile.UnicodeString.Type *) module String : functor (US : UnicodeString_mini) -> sig val width : ?cfg:Cfg.widthTable option -> US.t -> (int, int) result (** [width str] returns the column width of [str] and the value returned is of type [(int, int) result]. When [Ok width] returnted, [width] is the width of [str]. When [Error pos] returned, [pos] is the offset of the left most unprintable character in [str]. *) end charInfo_width-2.0.0/src/codes.ml000066400000000000000000000005031446436512200166640ustar00rootroot00000000000000open Camomile include USet let add_ranges l s= List.fold_left (fun s (start, stop)-> add_range start stop s) s l let tuple_to_range (start, stop)= let start= UChar.of_int start and stop= UChar.of_int stop in start, stop let of_tuple_list l= let ranges= List.map tuple_to_range l in add_ranges ranges empty charInfo_width-2.0.0/src/codes.mli000066400000000000000000000041531446436512200170420ustar00rootroot00000000000000(** [Codes] expands some functions based on Camomile.USet, a module implements Sets of Unicode characters, implemented as sets of intervals. *) (** [USet.t] *) type t (** {3 extended functions} *) (** based on add_range, [add_ranges l s] add a list of ranges [l] to [s]. *) val add_ranges : (Camomile.UChar.t * Camomile.UChar.t) list -> t -> t (** [tuple_to_range tuple] convert [tuple] to a UChar range *) val tuple_to_range : int * int -> Camomile.UChar.t * Camomile.UChar.t (** [of_tuple_list l] convert int tuple list [l] to a [USet.t] *) val of_tuple_list : (int * int) list -> t (** {3 Below are type signatures of the original [Camomile.USet] module} *) val empty : t val is_empty : t -> bool val mem : Camomile.UChar.t -> t -> bool val add : Camomile.UChar.t -> t -> t val add_range : Camomile.UChar.t -> Camomile.UChar.t -> t -> t val singleton : Camomile.UChar.t -> t val remove : Camomile.UChar.t -> t -> t val remove_range : Camomile.UChar.t -> Camomile.UChar.t -> t -> t val union : t -> t -> t val inter : t -> t -> t val diff : t -> t -> t val compl : t -> t val compare : t -> t -> int val equal : t -> t -> bool val subset : t -> t -> bool val from : Camomile.UChar.t -> t -> t val after : Camomile.UChar.t -> t -> t val until : Camomile.UChar.t -> t -> t val before : Camomile.UChar.t -> t -> t val iter : (Camomile.UChar.t -> unit) -> t -> unit val iter_range : (Camomile.UChar.t -> Camomile.UChar.t -> unit) -> t -> unit val fold : (Camomile.UChar.t -> 'a -> 'a) -> t -> 'a -> 'a val fold_range : (Camomile.UChar.t -> Camomile.UChar.t -> 'a -> 'a) -> t -> 'a -> 'a val for_all : (Camomile.UChar.t -> bool) -> t -> bool val exists : (Camomile.UChar.t -> bool) -> t -> bool val filter : (Camomile.UChar.t -> bool) -> t -> t val partition : (Camomile.UChar.t -> bool) -> t -> t * t val cardinal : t -> int val elements : t -> Camomile.UChar.t list val ranges : t -> (Camomile.UChar.t * Camomile.UChar.t) list val min_elt : t -> Camomile.UChar.t val max_elt : t -> Camomile.UChar.t val choose : t -> Camomile.UChar.t val uset_of_iset : Camomile.Private.ISet.t -> t val iset_of_uset : t -> Camomile.Private.ISet.t charInfo_width-2.0.0/src/combining.ml000066400000000000000000000052661446436512200175470ustar00rootroot00000000000000let set= Codes.of_tuple_list [ (0x0300, 0x036F); (0x0483, 0x0486); (0x0488, 0x0489); (0x0591, 0x05BD); (0x05BF, 0x05BF); (0x05C1, 0x05C2); (0x05C4, 0x05C5); (0x05C7, 0x05C7); (0x0600, 0x0603); (0x0610, 0x0615); (0x064B, 0x065E); (0x0670, 0x0670); (0x06D6, 0x06E4); (0x06E7, 0x06E8); (0x06EA, 0x06ED); (0x070F, 0x070F); (0x0711, 0x0711); (0x0730, 0x074A); (0x07A6, 0x07B0); (0x07EB, 0x07F3); (0x0901, 0x0902); (0x093C, 0x093C); (0x0941, 0x0948); (0x094D, 0x094D); (0x0951, 0x0954); (0x0962, 0x0963); (0x0981, 0x0981); (0x09BC, 0x09BC); (0x09C1, 0x09C4); (0x09CD, 0x09CD); (0x09E2, 0x09E3); (0x0A01, 0x0A02); (0x0A3C, 0x0A3C); (0x0A41, 0x0A42); (0x0A47, 0x0A48); (0x0A4B, 0x0A4D); (0x0A70, 0x0A71); (0x0A81, 0x0A82); (0x0ABC, 0x0ABC); (0x0AC1, 0x0AC5); (0x0AC7, 0x0AC8); (0x0ACD, 0x0ACD); (0x0AE2, 0x0AE3); (0x0B01, 0x0B01); (0x0B3C, 0x0B3C); (0x0B3F, 0x0B3F); (0x0B41, 0x0B43); (0x0B4D, 0x0B4D); (0x0B56, 0x0B56); (0x0B82, 0x0B82); (0x0BC0, 0x0BC0); (0x0BCD, 0x0BCD); (0x0C3E, 0x0C40); (0x0C46, 0x0C48); (0x0C4A, 0x0C4D); (0x0C55, 0x0C56); (0x0CBC, 0x0CBC); (0x0CBF, 0x0CBF); (0x0CC6, 0x0CC6); (0x0CCC, 0x0CCD); (0x0CE2, 0x0CE3); (0x0D41, 0x0D43); (0x0D4D, 0x0D4D); (0x0DCA, 0x0DCA); (0x0DD2, 0x0DD4); (0x0DD6, 0x0DD6); (0x0E31, 0x0E31); (0x0E34, 0x0E3A); (0x0E47, 0x0E4E); (0x0EB1, 0x0EB1); (0x0EB4, 0x0EB9); (0x0EBB, 0x0EBC); (0x0EC8, 0x0ECD); (0x0F18, 0x0F19); (0x0F35, 0x0F35); (0x0F37, 0x0F37); (0x0F39, 0x0F39); (0x0F71, 0x0F7E); (0x0F80, 0x0F84); (0x0F86, 0x0F87); (0x0F90, 0x0F97); (0x0F99, 0x0FBC); (0x0FC6, 0x0FC6); (0x102D, 0x1030); (0x1032, 0x1032); (0x1036, 0x1037); (0x1039, 0x1039); (0x1058, 0x1059); (0x1160, 0x11FF); (0x135F, 0x135F); (0x1712, 0x1714); (0x1732, 0x1734); (0x1752, 0x1753); (0x1772, 0x1773); (0x17B4, 0x17B5); (0x17B7, 0x17BD); (0x17C6, 0x17C6); (0x17C9, 0x17D3); (0x17DD, 0x17DD); (0x180B, 0x180D); (0x18A9, 0x18A9); (0x1920, 0x1922); (0x1927, 0x1928); (0x1932, 0x1932); (0x1939, 0x193B); (0x1A17, 0x1A18); (0x1B00, 0x1B03); (0x1B34, 0x1B34); (0x1B36, 0x1B3A); (0x1B3C, 0x1B3C); (0x1B42, 0x1B42); (0x1B6B, 0x1B73); (0x1DC0, 0x1DCA); (0x1DFE, 0x1DFF); (0x200B, 0x200F); (0x202A, 0x202E); (0x2060, 0x2063); (0x206A, 0x206F); (0x20D0, 0x20EF); (0x302A, 0x302F); (0x3099, 0x309A); (0xA806, 0xA806); (0xA80B, 0xA80B); (0xA825, 0xA826); (0xFB1E, 0xFB1E); (0xFE00, 0xFE0F); (* Variation Selectors *) (0xFE20, 0xFE23); (0xFEFF, 0xFEFF); (0xFFF9, 0xFFFB); (0x10A01, 0x10A03); (0x10A05, 0x10A06); (0x10A0C, 0x10A0F); (0x10A38, 0x10A3A); (0x10A3F, 0x10A3F); (0x1D167, 0x1D169); (0x1D173, 0x1D182); (0x1D185, 0x1D18B); (0x1D1AA, 0x1D1AD); (0x1D242, 0x1D244); (0xE0001, 0xE0001); (0xE0020, 0xE007F); (0xE0100, 0xE01EF); ] charInfo_width-2.0.0/src/dune000066400000000000000000000002021446436512200161070ustar00rootroot00000000000000(library (name charInfo_width) (public_name charInfo_width) (libraries result camomile) (flags (:standard -safe-string))) charInfo_width-2.0.0/src/fullwidth.ml000066400000000000000000000012041446436512200175700ustar00rootroot00000000000000let is_fullwidth ucs= (ucs >= 0x1100) && ( ucs <= 0x115f (* Hangul Jamo init. consonants *) || ucs == 0x2329 || ucs == 0x232a || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) (* CJK ... Yi *) || (ucs >= 0xac00 && ucs <= 0xd7a3) (* Hangul Syllables *) || (ucs >= 0xf900 && ucs <= 0xfaff) (* CJK Compatibility Ideographs *) || (ucs >= 0xfe10 && ucs <= 0xfe19) (* Vertical forms *) || (ucs >= 0xfe30 && ucs <= 0xfe6f) (* CJK Compatibility Forms *) || (ucs >= 0xff00 && ucs <= 0xff60) (* Fullwidth Forms *) || (ucs >= 0xffe0 && ucs <= 0xffe6) || (ucs >= 0x20000 && ucs <= 0x2fffd) || (ucs >= 0x30000 && ucs <= 0x3fffd)) charInfo_width-2.0.0/test/000077500000000000000000000000001446436512200154275ustar00rootroot00000000000000charInfo_width-2.0.0/test/dune000066400000000000000000000002111446436512200162770ustar00rootroot00000000000000(library (name test) (flags (:standard -safe-string)) (libraries charInfo_width) (inline_tests) (preprocess (pps ppx_expect))) charInfo_width-2.0.0/test/test.ml000066400000000000000000000007761446436512200167520ustar00rootroot00000000000000open Camomile open Result open Printf let msg= "a͜b͡c字符宽度" let%expect_test "width"= let length= Camomile.UTF8.length msg in for i= 0 to length - 1 do let c= Camomile.UTF8.get msg i in let len= CharInfo_width.width c in printf " %d" len done; [%expect "1 0 1 0 1 2 2 2 2"] module UTF8 = CharInfo_width.String(Camomile.UTF8) let%test "string: width"= UTF8.width msg = Ok (1 + 0 + 1 + 0 + 1 + 2 + 2 + 2 + 2) let%test "string: error position"= UTF8.width "ab\ncd" = Error 2