pax_global_header00006660000000000000000000000064142135770640014523gustar00rootroot0000000000000052 comment=9afbf57ac7ef10a03bece6cae677a151fa164666 markup.ml-1.0.3/000077500000000000000000000000001421357706400134325ustar00rootroot00000000000000markup.ml-1.0.3/.github/000077500000000000000000000000001421357706400147725ustar00rootroot00000000000000markup.ml-1.0.3/.github/FUNDING.yml000066400000000000000000000000201421357706400165770ustar00rootroot00000000000000github: aantron markup.ml-1.0.3/.github/workflows/000077500000000000000000000000001421357706400170275ustar00rootroot00000000000000markup.ml-1.0.3/.github/workflows/test.yml000066400000000000000000000021121421357706400205250ustar00rootroot00000000000000name: test on: [push, pull_request] jobs: opam: runs-on: ubuntu-latest strategy: fail-fast: false matrix: ocaml: - 4.13.1 - 4.12.1 - 4.11.2 - 4.10.2 - 4.09.1 - 4.08.1 - 4.07.1 - 4.06.1 - 4.05.0 - 4.04.2 - 4.03.0 steps: - uses: actions/checkout@v2 - uses: ocaml/setup-ocaml@v2 with: ocaml-compiler: ${{matrix.ocaml}} - run: sudo apt-get install python-bs4 - run: opam install --deps-only --with-test . --yes - run: opam install js_of_ocaml --yes - run: opam exec -- make test - run: opam exec -- make js-test - run: opam exec -- make dependency-test - run: opam lint - if: ${{matrix.ocaml == '4.13.1'}} env: COVERALLS_REPO_TOKEN: ${{secrets.GITHUB_TOKEN}} PULL_REQUEST_NUMBER: ${{github.event.number}} run: | opam install xmlm ocamlnet --yes opam exec -- make performance-test opam exec -- make clean coverage opam exec -- bisect-ppx-report send-to Coveralls markup.ml-1.0.3/.gitignore000066400000000000000000000002011421357706400154130ustar00rootroot00000000000000scratch/ _opam/ _build/ bisect*.out _coverage *.install *.merlin *.sexp doc/markup.odocl doc/html doc/publish doc/*.zip .vscode/ markup.ml-1.0.3/LICENSE.md000066400000000000000000000020541421357706400150370ustar00rootroot00000000000000Copyright © 2016-2021 Anton Bachin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. markup.ml-1.0.3/Makefile000066400000000000000000000055671421357706400151070ustar00rootroot00000000000000.PHONY : build build : dune build -p markup,markup-lwt # This is not part of the ordinary build process. The output file, entities.ml, # is checked into git. .PHONY : entities entities : dune exec src/translate_entities/translate_entities.exe \ > src/entities.ml .PHONY : test test : dune runtest .PHONY : coverage coverage : find . -name '*.coverage' | xargs rm -f dune runtest --instrument-with bisect_ppx --force bisect-ppx-report html --expect src/ --do-not-expect src/translate_entities/ bisect-ppx-report summary @echo See _coverage/index.html .PHONY : performance-test performance-test : dune exec test/performance/performance_markup.exe dune exec test/performance/performance_nethtml.exe dune exec test/performance/performance_xmlm.exe .PHONY : js-test js-test : dune build test/js_of_ocaml/test_js_of_ocaml.bc.js .PHONY : dependency-test dependency-test : dune exec test/dependency/dep_core.exe dune exec test/dependency/dep_lwt.exe dune exec test/dependency/dep_lwt_unix.exe # Everything from here to "clean" is inactive, pending porting to odoc. OCAML_VERSION := \ $(shell ocamlc -version | grep -E -o '^[0-9]+\.[0-9]+' | sed 's/\.//') OCAMLBUILD := ocamlbuild -use-ocamlfind -j 0 -no-links HTML := docs/html DOCFLAGS := -docflags -colorize-code if_package = ! ocamlfind query $(1) > /dev/null 2> /dev/null || ( $(2) ) .PHONY : docs docs : docs-odocl $(OCAMLBUILD) $(DOCFLAGS) doc/$(LIB).docdir/index.html rm -rf $(HTML) mkdir -p $(HTML) rsync -r _build/doc/$(LIB).docdir/* $(HTML)/ cp doc/style.css $(HTML)/ $(call if_package,lambdasoup,\ test $(OCAML_VERSION) -eq 402 \ || ( make docs-postprocess \ && rm -f $(HTML)/type_*.html $(HTML)/html.stamp $(HTML)/index*.html \ && _build/doc/postprocess.native )) @echo "\nSee $(HTML)/index.html" .PHONY : docs-postprocess docs-postprocess : $(OCAMLBUILD) postprocess.native ODOCL := doc/markup.odocl .PHONY : docs-odocl docs-odocl : echo Markup > $(ODOCL) $(call if_package,lwt,echo Markup_lwt >> $(ODOCL)) $(call if_package,lwt.unix,echo Markup_lwt_unix >> $(ODOCL)) PUBLISH := docs/publish .PHONY : publish-docs publish-docs : check-doc-prereqs docs rm -rf $(PUBLISH) mkdir -p $(PUBLISH) cd $(PUBLISH) \ && git init \ && git remote add github git@github.com:aantron/markup.ml.git \ && rsync -r ../html/* ./ \ && git add -A \ && git commit -m 'Markup.ml documentation.' \ && git push -uf github master:gh-pages DOC_ZIP := docs/$(LIB)-$(VERSION)-doc.zip .PHONY : package-docs package-docs : check-doc-prereqs docs rm -f $(DOC_ZIP) zip -9 $(DOC_ZIP) $(HTML)/* .PHONY : check-doc-prereqs check-doc-prereqs : @ocamlfind query lwt.unix > /dev/null 2> /dev/null \ || (echo "\nLwt not installed" && false) @ocamlfind query lambdasoup > /dev/null 2> /dev/null \ || (echo "\nLambda Soup not installed" && false) .PHONY : clean clean : rm -rf $(HTML) $(PUBLISH) $(DOC_ZIP) dune clean rm -rf _coverage markup.ml-1.0.3/README.md000066400000000000000000000170301421357706400147120ustar00rootroot00000000000000# Markup.ml   [![CI status][ci-img]][ci] [![Coverage][coveralls-img]][coveralls] [ci]: https://github.com/aantron/markup.ml/actions [ci-img]: https://img.shields.io/github/workflow/status/aantron/markup.ml/test/master [coveralls]: https://coveralls.io/github/aantron/markup.ml?branch=master [coveralls-img]: https://img.shields.io/coveralls/aantron/markup.ml/master.svg Markup.ml is a pair of parsers implementing the [HTML5][HTML5] and [XML][XML] specifications, including error recovery. Usage is simple, because each parser is a function from byte streams to parsing signal streams: ![Usage example][sample] [sample]: https://github.com/aantron/markup.ml/blob/master/docs/sample.png In addition to being error-correcting, the parsers are: - **streaming**: parsing partial input and emitting signals while more input is still being received; - **lazy**: not parsing input unless you have requested the next parsing signal, so you can easily stop parsing partway through a document; - **non-blocking**: they can be used with [Lwt][lwt], but still provide a straightforward synchronous interface for simple usage; and - **one-pass**: memory consumption is limited since the parsers don't build up a document representation, nor buffer input beyond a small amount of lookahead. The parsers detect character encodings automatically, and emit everything in UTF-8. The HTML parser understands SVG and MathML, in addition to HTML5. Here is a breakdown showing the signal stream and errors emitted during the parsing and pretty-printing of `bad_html`: ```ocaml string bad_html "

Markup.ml

rocks!" |> parse_html `Start_element "body" |> signals `Start_element "p" `Start_element "em" `Text ["Markup.ml"] ~report (1, 10) (`Unmatched_start_tag "em") `End_element (* : recovery *) `End_element (*

: not an error *) `Start_element "p" `Start_element "em" (* recovery *) `Text ["rocks!"] `End_element (* *) `End_element (*

*) `End_element (* *) |> pretty_print (* adjusts the `Text signals *) |> write_html |> to_channel stdout;; "...shown above..." (* valid HTML *) ``` The parsers are [tested][tests] thoroughly. For a higher-level parser, see [Lambda Soup][lambdasoup], which is based on Markup.ml, but can search documents using CSS selectors, and perform various manipulations.
## Overview and basic usage The interface is centered around four functions between byte streams and signal streams: [`parse_html`][parse_html], [`write_html`][write_html], [`parse_xml`][parse_xml], and [`write_xml`][write_xml]. These have several optional arguments for fine-tuning their behavior. The rest of the functions either [input][input] or [output][output] byte streams, or [transform][transform] signal streams in some interesting way. Here is an example with an optional argument: ```ocaml (* Show up to 10 XML well-formedness errors to the user. Stop after the 10th, without reading more input. *) let report = let count = ref 0 in fun location error -> error |> Error.to_string ~location |> prerr_endline; count := !count + 1; if !count >= 10 then raise_notrace Exit file "some.xml" |> fst |> parse_xml ~report |> signals |> drain ``` [input]: http://aantron.github.io/markup.ml/#2_Inputsources [output]: http://aantron.github.io/markup.ml/#2_Outputdestinations [transform]: http://aantron.github.io/markup.ml/#2_Utility
## Advanced: [Cohttp][cohttp] + Markup.ml + [Lambda Soup][lambdasoup] + [Lwt][lwt] This program requests a Google search, then does a streaming scrape of result titles. It exits when it finds a GitHub link, without reading more input. Only one `h3` element is converted into an in-memory tree at a time. ```ocaml let () = Lwt_main.run begin (* Send request. Assume success. *) let url = "https://www.google.com/search?q=markup.ml" in let%lwt _, body = Cohttp_lwt_unix.Client.get (Uri.of_string url) in (* Adapt response to a Markup.ml stream. *) let body = body |> Cohttp_lwt.Body.to_stream |> Markup_lwt.lwt_stream in (* Set up a lazy stream of h3 elements. *) let h3s = Markup.(body |> strings_to_bytes |> parse_html |> signals |> elements (fun (_ns, name) _attrs -> name = "h3")) in (* Find the GitHub link. .iter and .load cause actual reading of data. *) h3s |> Markup_lwt.iter (fun h3 -> let%lwt h3 = Markup_lwt.load h3 in match Soup.(from_signals h3 $? "a[href*=github]") with | None -> Lwt.return_unit | Some anchor -> print_endline (String.concat "" (Soup.texts anchor)); exit 0) end ``` This prints `GitHub - aantron/markup.ml: Error-recovering streaming HTML5 and ...`. To run it, do: ```sh ocamlfind opt -linkpkg -package lwt.ppx,cohttp.lwt,markup.lwt,lambdasoup \ scrape.ml && ./a.out ``` You can get all the necessary packages by ``` opam install lwt_ssl opam install cohttp-lwt-unix lambdasoup markup ```
## Installing ``` opam install markup ```
## Documentation The interface of Markup.ml is three modules: [`Markup`][Markup], [`Markup_lwt`][Markup_lwt], and [`Markup_lwt_unix`][Markup_lwt_unix]. The last two are available only if you have [Lwt][lwt] installed (OPAM package `lwt`). The documentation includes a summary of the [conformance status][conformance] of Markup.ml.
## Depending Markup.ml uses [semantic versioning][semver], but is currently in `0.x.x`. The minor version number will be incremented on breaking changes.
## Contributing Contributions are very much welcome. Please see [`CONTRIBUTING`][contributing] for instructions, suggestions, and an overview of the code. There is also a list of [easy issues][easy].
## License Markup.ml is distributed under the [MIT license][license]. The Markup.ml source distribution includes a copy of the HTML5 entity list, which is distributed under the [W3C document license][w3c-license]. [parse_html]: http://aantron.github.io/markup.ml/#VALparse_html [write_html]: http://aantron.github.io/markup.ml/#VALwrite_html [parse_xml]: http://aantron.github.io/markup.ml/#VALparse_xml [write_xml]: http://aantron.github.io/markup.ml/#VALwrite_xml [HTML5]: https://www.w3.org/TR/html5/ [XML]: https://www.w3.org/TR/xml/ [tests]: https://github.com/aantron/markup.ml/tree/master/test [signal]: http://aantron.github.io/markup.ml/#TYPEsignal [lwt]: https://github.com/ocsigen/lwt [lambdasoup]: https://github.com/aantron/lambda-soup [cohttp]: https://github.com/mirage/ocaml-cohttp [license]: https://github.com/aantron/markup.ml/blob/master/LICENSE.md [contributing]: https://github.com/aantron/markup.ml/blob/master/docs/CONTRIBUTING.md [email]: mailto:antonbachin@yahoo.com [Markup]: http://aantron.github.io/markup.ml [Markup_lwt]: http://aantron.github.io/markup.ml/Markup_lwt.html [Markup_lwt_unix]: http://aantron.github.io/markup.ml/Markup_lwt_unix.html [conformance]: http://aantron.github.io/markup.ml/#2_Conformancestatus [w3c-license]: https://www.w3.org/Consortium/Legal/2002/copyright-documents-20021231 [semver]: http://semver.org/ [easy]: https://github.com/aantron/markup.ml/labels/easy markup.ml-1.0.3/docs/000077500000000000000000000000001421357706400143625ustar00rootroot00000000000000markup.ml-1.0.3/docs/CONTRIBUTING.md000066400000000000000000000233201421357706400166130ustar00rootroot00000000000000# Contributing to Markup.ml
#### Table of contents - [Getting started](#getting-started) - [Building and testing](#building) - [Code overview](#code-overview) - [Common concepts](#common-concepts) - [Structure](#structure)
## Getting started To get a development version of Markup.ml, do: ``` git clone https://github.com/aantron/markup.ml.git cd markup.ml opam install --deps-only . ```
## Building and testing To test the code, run `make test`. To generate a coverage report, run `make coverage`. There are several other kinds of testing: - `make performance-test` measures time for Markup.ml to parse some XML and HTML files. You should have `ocamlnet` and `xmlm` installed. Those libraries will also be measured, for comparison. - `make js-test` checks that `Markup_lwt` can be linked into a `js_of_ocaml` program, i.e. that it is not accidentally pulling in any Unix dependencies. - `make dependency-test` pins and installs Markup.ml using opam, then builds some small programs that depend on Markup.ml. This tests correct installation and that no dependencies are missing.
## Code overview ### Common concepts The library is internally written entirely in continuation-passing style (CPS), i.e., roughly speaking, *using callbacks*. Except for really trivial helpers, most internal functions in Markup.ml take two continuations (callbacks): one to call if the function succeeds, and one to call if it fails with an exception. So, for a function `f` we would think of as taking as one `int` argument, and returning a `string`, the type signature would look like this: ```ocaml val f : int -> (exn -> unit) -> (string -> unit) -> unit ``` The code will call it on `1337` as `f 1337 throw k`. If `f` succeeds, say with result `"foo"`, it will call `k "foo"`. If it fails, say with `Exit`, it will call `throw Exit`. The point of all this is that `f` doesn't have to return right away: it can, perhaps transitively, trigger some I/O, and call `throw` or `k` only later, when the I/O completes. Due to pervasive use of CPS, there are two useful type aliases defined in [`Markup.Common`][common]: ```ocaml type 'a cont = 'a -> unit type 'a cps = exn cont -> 'a cont -> unit ``` With these aliases, the signature of `f` can be abbreviated as: ```ocaml val f : int -> string cps ``` which is much more legible. The other important internal type in Markup.ml is the continuation-passing style stream, or *kstream* (`k` being the traditional meta-variable for a continuation). The fundamental operation on a stream is getting the next element, and for kstreams this looks like: ```ocaml Kstream.next : 'a Kstream.t -> exn cont -> unit cont -> 'a cont -> unit ``` When you call `next kstream on_exn on_empty k`, `next` eventually calls: - `on_exn exn` if trying to retrieve the next element resulted in exception `exn`, - `on_empty ()` if the stream ended, or - `k v` in the remaining case, when the stream has a next value `v`. Each of the parsers and serializers in Markup.ml is a chain of stream processors, tied together by these kstreams. For example, the HTML and XML parsers both... - take a stream of bytes, - transform it into a stream of Unicode characters paired with locations, - transform that into a stream of language tokens, like "start tag," - and transform that into a stream of parsing signals, like "start element."
The synchronous default API of Markup.ml, seen in the [`README`][readme], is a thin wrapper over this internal implementation. What makes it synchronous is that the underlying I/O functions guarantee that each call to a CPS function `f` will call one of its continuations (callbacks) *before* `f` returns. Likewise, the Lwt API is another thin wrapper, which translates between CPS and Lwt promises. What makes this API asynchronous is that underlying I/O functions might not call their continuations until long after the functions have returned, and this delay is propagated to the continuations nearest to the surface API. [readme]: https://github.com/aantron/markup.ml#readme
### Structure As for how the stream processors are chained together, The HTML specification strongly suggests a structure for the parser in the section [*8.2.1 Overview of the parsing model*][model], from where the following diagram is taken:

[model]: https://www.w3.org/TR/html5/syntax.html#overview-of-the-parsing-model The XML parser follows the same structure, even though it is not explicitly suggested by the XML specification. The modules can be arranged in the following categories. Where a module directly implements a box from the diagram, the box name is indicated in boldface. Until the modules dealing with Lwt, only `Markup.Stream_io` does I/O. The rest of the modules are pure with respect to I/O. Almost everything is based directly on specifications. Most functions are commented with the HTML or XML specification section number they are implementing. It may also be useful to see the [conformance status][conformance] – these are all the known deviations by Markup.ml from the specifications. #### Helpers - [`Markup.Common`][common] – shared definitions, compiler compatibility, etc. - [`Markup.Error`][error] – parsing and serialization error type. Markup.ml does not throw exceptions, because all errors are recoverable. - [`Markup.Namespace`][namespace] – namespace URI to prefix conversion and back. - [`Markup.Entities`][entities] – checked-in auto-generated HTML5 entity list. The source for this file is `src/entities.json`, and the generator is `src/translate_entities.ml`. Neither of these latter two files is part of the built Markup.ml, nor of the build process. - [`Markup.Trie`][trie] – trie for incrementally searching the entity list. - [`Markup.Kstream`][kstream] – above-mentioned CPS streams. - [`Markup.Text`][text] – some utilities for `Markup.Html_tokenizer` and `Markup.Xml_tokenizer`; see below. #### I/O - [`Markup.Stream_io`][stream_io] – make byte streams from files, strings, etc., write byte streams to strings, etc. – the first stage of parsing and the last stage of serialization (**Network** in the diagram). This uses the I/O functions in `Pervasives`. #### Encodings - [`Markup.Encoding`][encoding] – byte streams to Unicode character streams (**Byte Stream Decoder** in the diagram). For UTF-8, this is a wrapper around `uutf`. - [`Markup.Detect`][detect] – prescans byte streams to detect encodings. - [`Markup.Input`][input] – Unicode streams to "preprocessed" Unicode streams – in HTML5 parlance, this just means normalizing CR-LF to CR, and attaching locations (**Input Stream Preprocessor** in the diagram). #### HTML parsing - [`Markup.Html_tokenizer`][html_tokenizer] – preprocessed Unicode streams to HTML lexeme streams (**Tokenizer** in the diagram). HTML lexemes are things like start tags, end tags, and runs of text. - [`Markup.Html_parser`][html_parser] – HTML lexeme streams to HTML signal streams (**Tree Construction** in the diagram). Signal streams are things like "start an element," "start another element as its child," "now end the child," "now end the root element." They are basically a left-to-right traversal of a DOM tree, without the DOM tree actually being in memory. #### XML parsing - [`Markup.Xml_tokenizer`][xml_tokenizer] – as for HTML above, but for XML. - [`Markup.Xml_parser`][xml_parser] - as for HTML above, but for XML. #### HTML writing - [`Markup.Html_writer`][html_writer] – HTML signal streams back to UTF-8-encoded byte streams. #### XML writing - [`Markup.Xml_writer`][xml_writer] - as for HTML above, but for XML. #### User-friendly APIs - [`Markup.Utility`][utility] – convenience functions on signal streams for the user. - [`Markup`][main], [`Markup_lwt`][lwt], [`Markup_lwt_unix`][lwt_unix] – the public interface for operating all of the above machinery without having to touch CPS. [common]: https://github.com/aantron/markup.ml/blob/master/src/common.ml [error]: https://github.com/aantron/markup.ml/blob/master/src/error.ml [namespace]: https://github.com/aantron/markup.ml/blob/master/src/namespace.mli [entities]: https://github.com/aantron/markup.ml/blob/master/src/entities.ml [trie]: https://github.com/aantron/markup.ml/blob/master/src/trie.ml [kstream]: https://github.com/aantron/markup.ml/blob/master/src/kstream.mli [stream_io]: https://github.com/aantron/markup.ml/blob/master/src/stream_io.ml [encoding]: https://github.com/aantron/markup.ml/blob/master/src/encoding.ml [input]: https://github.com/aantron/markup.ml/blob/master/src/input.mli [html_tokenizer]: https://github.com/aantron/markup.ml/blob/master/src/html_tokenizer.mli [html_parser]: https://github.com/aantron/markup.ml/blob/master/src/html_parser.mli [html_writer]: https://github.com/aantron/markup.ml/blob/master/src/html_writer.mli [xml_tokenizer]: https://github.com/aantron/markup.ml/blob/master/src/xml_tokenizer.mli [xml_parser]: https://github.com/aantron/markup.ml/blob/master/src/xml_parser.mli [xml_writer]: https://github.com/aantron/markup.ml/blob/master/src/xml_writer.mli [text]: https://github.com/aantron/markup.ml/blob/master/src/text.ml [detect]: https://github.com/aantron/markup.ml/blob/master/src/detect.mli [utility]: https://github.com/aantron/markup.ml/blob/master/src/utility.ml [main]: https://github.com/aantron/markup.ml/blob/master/src/markup.mli [lwt]: https://github.com/aantron/markup.ml/blob/master/src/markup_lwt.mli [lwt_unix]: https://github.com/aantron/markup.ml/blob/master/src/markup_lwt_unix.mli [conformance]: http://aantron.github.io/markup.ml/#2_Conformancestatus markup.ml-1.0.3/docs/footer.html000066400000000000000000000005061421357706400165470ustar00rootroot00000000000000 markup.ml-1.0.3/docs/header.html000066400000000000000000000005211421357706400164760ustar00rootroot00000000000000

Markup.ml

markup.ml-1.0.3/docs/postprocess.ml000066400000000000000000000226301421357706400173030ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Soup let (|>) x f = f x type transform = | Rename of string | TableOfContents | UpTo of string | Class of string | WithType of string | Meta of (string * string option) let transforms = ["Markup.html", [Rename "index.html"; TableOfContents; Class "index"; Meta ("Markup.ml - Error-recovering HTML and XML parsers for OCaml", Some ("Streaming, error-recovering, standards-based HTML(5) and " ^ "XML parsers with an interface designed for ease of use."))]; "Markup.Error.html", [UpTo "index.html"; Meta ("Error - Markup.ml", None)]; "Markup.Encoding.html", [UpTo "index.html"; Meta ("Encoding - Markup.ml", None)]; "Markup.Ns.html", [UpTo "index.html"; Meta ("Ns - Markup.ml", None)]; "Markup.ASYNCHRONOUS.html", [UpTo "index.html"; Class "asynchronous"; Meta ("ASYNCHRONOUS - Markup.ml", None)]; "Markup_lwt.html", [UpTo "index.html"; Class "asynchronous"; WithType "Lwt"; Meta ("Markup_lwt - Markup.ml", None)]; "Markup_lwt_unix.html", [UpTo "index.html"; Meta ("Markup_lwt_unix - Markup.ml", None)]; "Markup.ASYNCHRONOUS.Encoding.html", [UpTo "Markup.ASYNCHRONOUS.html"; Meta ("ASYNCHRONOUS.Encoding - Markup.ml", None)]] let rec find_map f = function | [] -> None | x::l -> match f x with | None -> find_map f l | Some _ as v -> v let lookup f file = try transforms |> List.assoc file |> find_map f with Not_found -> None let should_rename file = lookup (function | Rename name -> Some name | _ -> None) file let new_name file = match should_rename file with | None -> file | Some name -> name let should_make_toc file = try transforms |> List.assoc file |> List.mem TableOfContents with Not_found -> false let should_make_up file = lookup (function | UpTo name -> Some name | _ -> None) file let should_add_class file = lookup (function | Class name -> Some name | _ -> None) file let should_add_with_type file = lookup (function | WithType name -> Some name | _ -> None) file let html_directory = "doc/html" let read_output_file name = Filename.concat html_directory name |> read_file let write_output_file name text = write_file (Filename.concat html_directory name) text let read_fragment name = Filename.concat "doc" name |> read_file let clean_up_head soup name = soup $$ "head link:not([rel=stylesheet])" |> iter delete; let address = "http://aantron.github.io/markup.ml" in let canonical = match new_name name with | "index.html" -> address | name -> address ^ "/" ^ name in let title, description = let result = lookup (function | Meta v -> Some v | _ -> None) name in match result with | None -> failwith ("no metadata for " ^ name) | Some v -> v in let meta_content = "" ^ title ^ "\n\n" ^ "\n" ^ "" in let meta_content = match description with | None -> meta_content | Some text -> meta_content ^ "\n" in soup $ "title" |> delete; meta_content |> parse |> children |> iter (append_child (soup $ "head")) let clean_up_header soup = soup $ ".navbar" |> delete; soup $ "hr" |> delete; read_fragment "header.html" |> parse |> replace (soup $ "h1"); read_fragment "footer.html" |> parse |> append_child (soup $ "body") let clean_up_content soup = soup $$ "body > br" |> iter delete; soup $$ "a:contains(\"..\")" |> iter unwrap; begin match soup $? "table.indextable" with | None -> () | Some table -> table |> R.previous_element |> delete end; soup $$ "a[href]" |> iter (fun a -> let link = R.attribute "href" a in let prefix = "Markup.html" in if String.length link >= String.length prefix && String.sub link 0 (String.length prefix) = prefix then let suffix = String.sub link (String.length prefix) (String.length link - String.length prefix) in set_attribute "href" ("index.html" ^ suffix) a); soup $$ "a:not(.protect):contains(\"Markup.\")" |> iter (fun a -> match a $? ".constructor" with | None -> let text = R.leaf_text a in let prefix = "Markup." in let text = String.sub text (String.length prefix) (String.length text - String.length prefix) in clear a; create_text text |> append_child a | Some element -> delete element; let inner_html = a $ ".code" |> children |> fold (fun s n -> s ^ (to_string n)) "" in let inner_html = String.sub inner_html 1 (String.length inner_html - 1) in a $ ".code" |> clear; inner_html |> parse |> children |> iter (append_child (a $ ".code"))); soup $$ "pre" |> filter (fun e -> e $? ".type" <> None) |> filter (fun e -> e $? "br" <> None) |> filter (fun e -> e $? "+ .info" <> None) |> iter (fun e -> e $ "+ .info" |> add_class "multiline-member"); let rec reassemble_lists () = match soup $? "ul + ul" with | None -> () | Some ul -> let ul = R.previous_element ul in let rec consume () = match ul $? "+ ul" with | None -> () | Some ul' -> R.child_element ul' |> append_child ul; delete ul'; consume () in consume (); reassemble_lists () in reassemble_lists (); soup $$ "ul" |> iter (fun ul -> ul |> R.previous_element |> delete); soup $$ "pre > .type" |> filter (fun e -> e $? "br" <> None) |> iter (fun e -> create_text " " |> prepend_child e; create_element "br" |> prepend_child e); let uncolor class_ content = soup $$ ("span." ^ class_) |> filter at_most_one_child |> filter (fun e -> leaf_text e = Some content) |> iter unwrap in uncolor "constructor" "Error"; uncolor "constructor" "Encoding"; uncolor "constructor" "Markup"; uncolor "constructor" "Markup_lwt"; uncolor "constructor" "Markup_lwt_unix"; uncolor "constructor" "Markup_async"; uncolor "constructor" "ASYNCHRONOUS"; uncolor "constructor" "Pervasives"; uncolor "constructor" "Lwt_io"; uncolor "keyword" "false"; uncolor "keyword" "parser"; soup $$ "span[id]" |> iter (fun span -> set_name "a" span; set_attribute "href" ("#" ^ (R.attribute "id" span)) span); soup $$ "h2[id]" |> iter (fun h2 -> let href = "#" ^ (R.attribute "id" h2) in let a = create_element ~attributes:["href", href] ~inner_text:(R.leaf_text h2) "a"; in clear h2; append_child h2 a) let add_with_type soup type_name = let extra = " with type 'a io = 'a " ^ "" ^ type_name ^ ".t" in parse extra |> children |> iter (append_child (soup $ "pre:contains(\"ASYNCHRONOUS\")")) let add_table_of_contents soup = let sections = soup $$ "h2" |> to_list |> List.map (fun h2 -> R.id h2, R.leaf_text h2) in let toc = create_element ~class_:"toc" "div" in create_element ~inner_text:"Module contents" "p" |> append_child toc; let links = create_element ~class_:"links" "div" in append_child toc links; ("", "[Top]")::sections |> List.iter (fun (id, title) -> create_element ~attributes:["href", "#" ^ id] ~inner_text:title "a" |> append_child links; create_element "br" |> append_child links); create_element "br" |> insert_after (toc $ "a"); create_element "br" |> append_child toc; create_element "br" |> append_child toc; create_element ~attributes:["href", "https://github.com/aantron/markup.ml"] ~classes:["github"; "hide-narrow"] ~inner_text:"GitHub" "a" |> append_child toc; toc $ "a" |> set_attribute "class" "hide-narrow"; append_child (soup $ ".info") toc let add_up_link soup to_ = let toc = match soup $? ".toc" with | Some element -> element | None -> let toc = create_element ~class_:"toc" "div" in append_child (soup $ ".info") toc; toc in let container = create_element ~class_:"hide-narrow" "div" in create_element ~inner_text:"[Up]" ~attributes:["href", to_] "a" |> append_child container; create_element "br" |> append_child container; create_element "br" |> append_child container; container |> prepend_child toc let () = html_directory |> Sys.readdir |> Array.to_list |> List.filter (fun f -> Filename.check_suffix f ".html") |> List.iter begin fun file -> let soup = file |> read_output_file |> parse in clean_up_head soup file; clean_up_header soup; clean_up_content soup; begin match should_add_with_type file with | None -> () | Some type_name -> add_with_type soup type_name end; if should_make_toc file then add_table_of_contents soup; begin match should_make_up file with | None -> () | Some target -> add_up_link soup target end; begin match should_add_class file with | None -> () | Some class_ -> soup $ "body" |> add_class class_ end; begin match should_rename file with | None -> soup |> to_string |> write_output_file file | Some name -> Sys.remove (Filename.concat html_directory file); soup |> to_string |> write_output_file name end end markup.ml-1.0.3/docs/sample.png000066400000000000000000002467761421357706400163770ustar00rootroot00000000000000PNG  IHDR=(Z)'iCCPICC ProfileXYTU[^{ġKn* "()%&E@E"DDQ1wo\s5Ws` i%񺸺'poL8Zc|lP+`}ȱ`GP@Bl&bF2JancM쳍etQlۛi?oo GG( #<lApLTws8h "m曘 ]>V(G`-M<gK7F3 Q% ylQ}*8!GA|DYUF(Fg|%9y'|#> $Ąڛo[gY_Fv:kx~!R[XQ`mǸXo`pAg/0_H8#M1mb =t6[T67 X}`xAZ}@$ m # _[-$(1ZA<*_#~J-PvFc>u*QV!g3‰ዲC+OߖQ4!vh7Fxw K0j=[#Va4Q(w 3HbОbѾ)26$q/>ioM$F!Hx^B7jk&~POoyv_ [ B?2*˫R;xdd߷/v[6|oY,e^h A4em0p7-l>Р+ p I(u xQ l#TZp\͠ tk#!x΍Y,o` C?`a.XU`]vwp4 gp\[k K@(fDT}qC2CJ"҉>2, 18 #O#ك9)ǜŴbn`c0Xj,'V5ź` ll) {]Qo8'SFצ+.;;kFq3%<φkX|6<?ů( <9A zc9*(DT#ZIB)b'qJAG!BI@BNQFq&3/ieMw((SSSS*zBZZڍ:: IdJ#%UZIc4D!]dR4hh´޴{h+h;h.1YӅ;G7@O/LoHGI_K~a`gep&,#Qє110"=S"SS7$3,l\I#S.S_zE3ef7̩ͧ--e3+!6k`mj]bF&-ƶ]m{{Os ::9;8twZv6p.vtv2wsr;n~}vDv%`r:m]cS{ayMb̀7%AAA BLBCCCB7œ ^7"##G$&բD/gb]1Ug(N4.+n*^+"~%)r"]bDPxRn\QLoJ*_jzn'@{|؛w68l:Ezh_2_9L˜2j&eW_ Ε=3/nL~iw,;QP0\TXuw(xvb˒ü=ydT(Ѹee:VTB2r*\?Nx|dkpMi-6)SO?~&z]DY7q+laߩrUj]_WE7SwaEOfForR_Tµk3O\pMwnݺ~[v;]jwU * )W˰p={##=ccܿC|4'aO>MO>M{}y 5/^6N*MvOL MO?y*lkץskY׋n,Ut;ξw+vԽՑє1o=xX>c'BĉOG~X]\4ɯYKA̗WV?ѷZ`c?DpY@k)Uׯxxs]A!Ϳ,$f_adzO>E1>zb2pvzȬsonO]Y }1%K_V`]Z[  >^Bm0 ['UbU8 >AMT6t!.'3QdIU4!99F>f!tG"r՝u]>G'~n=s7ϒU"a#!Q*%i1;:%/K5I74˶*PQaRPTՐզׁu>>կ114r13661GW,-ZZOLپ{ga݉"f3e&{CsPx3h&x=>/\sv Ȱ㽙O?O50S8Kx[ GϞ_>.-ӬZp+,@DjplA7/0?lgm‡}`0ɘ6Vaqָ [: pH FQXPQ*֣v~KCDDkM^~8=5]\ Vz d]Nw\Od9 sIJL*y%GP +^T6!ZBM_qYc([*&wtvƻ+\:@$<4l>")r-:<kw9. E-ʽis.;sr[5 +SBwTh1J'Լ<%~:ͳL:ϓ.]jbw~쓻OמӾy)1865JgVڜyH:'|P'O]~is ,|6wsVEV 573 552 @IDATxTם9n?ʂ~ȘrVl#fݨ(T)L modh5!#jl# Y-ubAm yA\<}}{x5 qB@!  =^>)B@! 4HCB@! QzD5K!B@Qz ! S(=SB@! (=B@)A@)QRH! B@iB@!  Jϔf)B@! J! B`JgJTR! GڀB@!0%3%Y )B@#m@! D,B@! D6 B@L LjB ! HB@! QzD5K!B@i/9xx#6.b*qe ݄D] Y3py-&9ۻ_+0Grik"L5Z~Da,߀g2iu!Fغk-@7q#,aǍW_w6F+s(qi<;!L&<(ԸEǭqTzJzOo=#CV|ɞh?T陵)Of% 8[<žle" TKu*=XGXlڻYtEn[ +5/tbȺ <[_ 2〚}Ϣ*3cZs;J6sd3چ ;|0?APH'0fmMX _Hm8`qCg{\2W 9_HNMAJJq~4Nܷl5 ;NjǷ#<n.OZ㷼e`oFÉt* aŤ 5ti'q G<+f$bn:G/HX[K֕ x{ù]FF$6/9Q QaӽCF{쮗VD=HmLx:)ùʹfmT*Z^u8}Hc(/fgd{*<,J u@ah3lsϖiű/_Yv~R]t튊S͙Bq7Aю JV.7p&4'4.  >BG1 8RAä ̟q9vjOȕZ.hR_3[+l=fY 1)w$PAaGgh@tXuJCDkπÇD'YP{a㐔LSl1n6a(< gtΨP+^ ʏS(>֗onn;>wq1H L㪳'W d\ s}Lɫ]˨'h\զO [d@C!-f_Nځ'£V*ңpZF\n!&bb tpOA1yWl7m_d ݿΙX+i*ߚn5:haj ,7=Hj~;~Ձ?wa*9v<ŏNo{,Jmo:4L]y)6,uuJW ݅3lE'ZwsatP+ՍE펳 7?s#:/0P xI=ph&biN ^OWve 6|q4f-1۽zNSH:\:xd*T ?>PЀ~N}ֆpdg4!2g/;p6 k&ۊI_1X'ǜAc| BmnqX*ߣ}T39Rv=oGeI9*Š1g/x I -4khwi-\j/&m{ +ФrdH=v> UTU +/-*D:枰3|ۡVwOe Ԫ}51?O'k^~Uu\J,w=$*5Z>v>؊4]u(uǖS6|dr JEMQUK&P&Ư]Sj~dUgT`Dͷ@;簾ʠ)2O]S)]3b)TRSE᷌+`(+7 5e%h)1krӊC)A`lJskPvckV/Jf(Ǎϔ^l{wl*•l,LK&\ྞm58mTQ)sTu…vȐQ**z,eɍƪ׀@r3 KBB$f ʷ,.d'bi B %#sB&*4E!t( ZAnr=@CiN4cO>ZcYR~ԩ%X}7}W_F*?:Yj. qH3846gk1ɳSi ._<"r/1VRōoM}s6pIKwC-縸Wށ\Z.熑7NC/r_p_Sut<)V Flvc~[a32S=F+ܛ$K7͎^Gqg5H;|M eY5g0?KOQcl_;SٴkmfXp1BQxr tϫQ/b%$7`ݱhP{l4sϪ506_QmP}{uWmWV ( Y֪c(Rva~l*RS;Jƣ+:ƽUTXS ^}8G3\R2Q"6!ZqrېE8;}Շ<q?alڭZ* W&u3pӹӐ^;2#RĖtrr9PIKi5@'2!0>Ʀ(tƪ^JVT 9܉o`723: 7)Nnz)|*:A>*= 炭%pۨL&ss0е9ΡEKTms-u ՠ{aT\CoCOvз_]xu^s!2 ܇el͢V* S$@wph+Ͻ7^e)n^oaXi 6h{=_a?}Gd|YLM'Aڄ76I`t I2s7EMvntwCvqk=5ʽ`uƀ5G˽Oߧ7IbD=Gk7Y9]j>8?|R\ڤއR8oIi: 82inĎ 3ae!@Gw?piv,gw17dJn5_Q W+*+ V6ZǷ0C%1mo_bÞ~}?9)Xw=bh嵁UCrv oZۜm\Q/H2rF\vFga$) OGec@gԇ f|/*7B10"dY8qǯ|yR611:;%DY>E+Icru"?*[#psl6N0>ƹꀩ AsJa-ca1%`㞂 /cSֈG@SW*9W'**w9y۰d} Hnp/7O w{~a#AŞ ujL4F[1BM9wC0 ZMKT@gUM΍0Pz2 (~"8w8*=QV Pu%=k'ƷőP8dF$DO)HҬZ翈eq+\J^ lIl l>@&nF?e!OtceAVŐf60hj1;-U)\녗єI}[?Zw?2Vފ'd^6[U  TzpSzfD֦ݜ9_ Q*j^h@|͢pP?ٹ=⩙x8 54Z'M$f% t5> Vpdžs_6ꋣr oͺQc^=V)HmVU3ERaAR_p*{4'Z7a-9H4vuj=lb 9nRPT&.^i=ҝ yˣ;;.ޫf% eK?16D=f'-yrhMg}v0 ™-(.zCHBwK Pag͕q-8SK+H.|_46<]^a@O[Ʊscl"$£Z@Q˴ xh}Frf뛝w^}djI&ۧ=\y\+5ve34"z;Jo HUeae?BYqEtqj\Snދ9;qsH)ltn_vR oc~I_^#7QJ?# ?6nB=ZC|eQ\ʮ7wޏטwn)\֩vɇG^=ݴ1 ʥukes 0oD )4~ʬBlY='=&Dkͩ8T^T0^b,çl^QVWkolsek!qJvi8ϛh=[m>qZK%ЁgnI܊BK~swa݊/^Fu?5e4V!g~W/c2Fvo $[]Wiݹ<9蚝p8pk 'lj&l봄o=?چ'S!'G{wN sBBlEs3gKFf|uDFxR+ˢ1l]yYi<5WĮfQ_]JQ٨] -A4}w({-~F7fg8L;L0ģqTl  H5n`.9_qI9)qF V3|J5 ([:e]liyv:zdxb7+vԊټJcɕWv7w*~I˭꺉ͷx3C7h=Nm⢅,Β*){A+O<Vzv[alMFAZ&ֳMdIτ}:9@npY`av|iIM0~ufFVԜ[xV9p(g=~hgnKVYOg=5K i ~Xkj^޸kQtXgTUR1+d~4F9)wqGN~ 䘃lʮ&?uU5My62ӵ1Mx^<"cӧNqhqR1v̶t=؝8K`=`nbXgl=Ѷ`G>2Rgo6=gϞBrTKUط?z#ʮڗx({eaOI oh{Kdecf-x:?^*ipB*݋lEKO#9K돯wMF4wStv5_ƙ_]RꋢMLiRɧB~n|?#f$2!De5' pxXfhM~hT{79Q Ms v&m6g-ߏD. ģ뻫1Yweo-ίuG:E)YOaNe?a-ш9S4yWʶER<ʦ+1hJKJCj?јYO5:Ү5kk7ւߗK#;::iY zixJ? rdpwAC֛HjN/h_=* G.dȉF* ,g;^Oo޼W,gnӖf3}091}monτQ>qW|_/Cjck,xڮKnx`j̒]rEXbE¬Xu_ nhZX6̹ívDx9k 1bY%XyI#|i*ێFОUЄeThVSkr׋|[:]vt+ڬR9J{AO}V]S.p⻸8?2ׯ [tu4MN-jʹĻtʕ!ae \wTdeaNZS/Ւ fF7_'{7xMB,7YH[Y{MZ'4tm74 ^AziNym)2Yƣ8?"脺 g,=B):M]d9=ZʶEPabn_=%mPN^>^ x(#ef`6523gp&fA\oGHh3/a!}nf;pj8o^*zM\Cy_4#5\ea>r@]M o7;ۧq'?#t+_<5Mn'c8V\.ʔίWCR?(%o:2XeӁOngc0ӏqȺaZ}00G?{/3K|e?tmYk9 6eT#`ܧq 6ٱ0=+$DR9E:o}r'{[rE#Mh9 {@IvG'V lϭAHh>MYq( ;P\7$f \OȆVk(0:*e=1(mG~ A +lңQ7FA^*Xo)O)eˢlzn8Ȧ tu Yfe'ξBʪ[͙bu=n\տֵTg'1ԋ~b pS, XIK&ڗQCXm,kE/mۏ2ފql&\l;f*4-BK^+gz癃!Gw{NZѸ!TgBʐ󛕯2}~#۸E-6SAUL姻@Ofkοis?^T}/&.,RJGB&^-O U^hEI'bcn-1ܨMͭ/S&R;j;l})6Qp(\r*¡W6Xu)Eds hM%QE3nH7sqEy >-䃕GhnlSHM*!gɬ tw_-.^O\HK7uFK9r?@.zݪ=7wTl%^nIF95_M4TPQۄjO56Rf1ܸv(v3 ɈP#xЖb<؍T漮6&n:Ց)˧;}39hr9?VTўЂ6p+D~hP}o#wԩu i 2[x*Z*q|0A=ťju~ M/AYV~"{lUzGY~!tN8IHNAJnޥ_K_F/ڧ? ڟfcg;oY}ʺ gGKGqp##x|40te\0i W#VbPwgV-O\7ضR79]ZHb([eS2enȖDZp kjވ ά.7vr"rwPg-GŅ 'Lpaiv̦>tMzQ/| z\sTzIux8"Mp?a6,^@޻ͩı^dz@NQ.٭Z澾ځ}Pi ƾ&>ʧhžTr l*Vhڬ}а1QGNZ<^J?Nc{Iŵ?-[ Ǜ>G7cJNJzK7l|o?zb.ʦloɖ&o8YP֨jT 9@]d:_px19e{(S2YǶ/uO31d 5:QϨUV*i4RQ⭘OF_|sVxC.+؆!gTc/:!C7I?>RLZVj)Dd?8/G~MD@j.ES8_ȗ9pب?'ƽqiK.+&4%jif`ŷߚ:u\Yz0%Y.- zyR_['I̚Qߞ{R(! Inr! $CDB@3?5IrB@! LUIS! tDtU" ! A@*i ! (=JD ! B`"3T%M! B`gU$B@LQz&)B@L:L*B@ JDP4B@IG@IW%"B@!0DB@!03DB@! &(=AUB@! &Qz&]@B@! Dg"JB@! # JϤH! LUIS! tDtU" ! A@*i ! (=JD ! B`"3T%M! B`gU$B@LQz&)B@L:L*B@ JDP4B@IG@IW%"B@!0DB@!03DB@! &(=AUB@! &Qz&]@B@! Dg"JB@! # JϤH! LUIS! tDtU" ! A@*i ! I'єhrnĂ+_AIō)Sr)B@σ8*=QV Pu%=k'ƷőϣX㑧eg 0̴M~NJ [(K>HHAkB@!0,[ʲc^]'ғt nL!9Q)vgLi~%b͒ú#xCHLKL}ـspEOZ xqpIZrHۀ犖"=fyJy3#T Uy(zUڝ~bjl()/@2uM s(QADfVnA~ E6l߂BD yr6m3ʋnG[ l.\~(-ɯ 9{s4~/OK˩B@{-=]Wp\u&8oktp8pkhXtǣ>̈G,3Џ }idzd ,vlyi#T 6Lf \[9J~fMTKьV"~h]MPq.<_SR:;[qZ+z(gT3Snj9fIn"#Cl6^ibUX*D6Mvy""O8SWqUy)<: =-SG(h gqB@! ecVz+a.B\]ՍbUa<`Vdh;V ޞ'jڵ )g9*MzM~u =ytexA~ 灵(:LeJs4vz|QjႣn#ﰈ(_z|kzdg Qs T!҂Uw)Z{mRY}/7hܪ_oIB@{ؖ|t f}0g<([vn(z6DҠ\S{*oIR2t ZnjẸL9jmIG?$sۤ'kx:66z5 -* )pӪfJM ͥ XR|_͎>EV)D ppoHd%Kg"ʋ|PwxOW! =M`lJONb{KHຍ=\ 4DRF8D%@Z K྘ZT#[jr+xEYp,ST[-ë43RgBOeVi~1S |;GNJ@q*ks~w"$%aKj1 ԕwȂD4f<㞞p)r`]O!1҄|?6r")='=v@Bylz͊E9!+~啅kJLCsi n]Wy]ߛinU|$DSCވn-~4ô_"x 8uǧw?ڲTXb&K@ ՛$6]y%,9%_Xs@IDATFWDۄ7UTypɪovZZl@W|l~I|LUU΄B#0==H[ 57V g|iEU bRttfRQOgN>qa:MyqLq) hW&n`MRxSW===V#^?*Gon6) ؗKGBc[ 鍇Ḧ%2S*Hg\75?ۋ'l_n`ɑB@_<Hc(|vWj>VpjU7ه* 3+_ J_.F_|0rhêqp4ƸgOIZS'W-G-P/:ԔL11?ݜBwrܥB@L՟_F.?dHp?Gcu0*G{}xAg\]ISY1p%J#3k?'bn\pB/B@{W\~qyXO7qu`@mN,U℀B@Lgyk " ! oddB@!2QzBF%B@(=wsB@! @D B@! f͵' ! !'dTP!  s7מ.B@L@QI@! Bn& J\{"B@!2QzBF%B@(=wsB@! @D B@! f͵' ! !'dTP!  s7מ.B@L@QI@! Bn& J\{"B@!2QzBF%B@(=wsB@! @89yH4QUqwuaDx!0 ؑ oAn)B`HPjҳ*g$˝(Q0w;䗠RĬ]t]Aٮ:8hinfmH¾-|^ϡNsw#4VEIōq,5.ވ0VzbBkp9,_U%8BRX,t|H '7w mq?PT "Gg1*g ~D8ֳ82NuP1סl1EG=cPw#99i='[m@eIƤ6/LHwj%טTؒ;Ql~isK  2R4< Qz6ݏpt:"7deӊ4{ b꧍U(٘=6pB[^ii.b<{ F5Oɲc^Yvs:ye| e8baF8#Föeh.nF4s=cxS ~VN(V9 1T~YXIի8%=Fcif*fD&ZG@ U ےDji*ײR6*`"qQ" Ƭ>V컏î&C8Suڥ_nz;wO+ہ:;^*ϣI컶82L)rK@-E+6zLTvq9i] ^u)WT,/zFqш, Y-W-$ok\V 8{ze{\a5-@sC刲SP%dCI޺5'~fl{~_՗n8!&>I,UUfuY0XdU0j=LmӨة~HRxdP\M9ye1o9 )ƧIu_Ǐ~[%JpY ǵ/~EĮ6 ܬ;iQNwbBu6› UvjWoRz]6u=[ +Xy {w94+wbW؍bUʾX@Ug;.jE婣 V:=g=W ytnQALv:Nci^²*8g5p*jE++Zf[q7 {z[r:hCJ*MfxӴXiӧ"}Bk َuOa\תpؐZi4^9e3n'xR|\Fs0-"G!GyXcpu5mO‚tMʡ/7 z,]eMੇ ^`uUz;s*k೑gY2=kQWGkgnd˦G]4xg 4c.1LLGT{?'}+zad(a`cQ/`H44_Pi5Mt:t%_C M .ʰQ˗'Žr~ P-oqyHAc$30}f[vrjhJZ6m<.qc, F'58U9u~AԼ}(GA;o2$ZTב]";\-|^i[\&%)Cs}V.!igL|*J9wg4崦-Q{zqy e+AR0e6YXwĕ5#z_!*~#i6ؠ ٳ Noo;rVUDP51 ≭U*5o66ͽWT,Aj]4c[x/c !vp#"Wִ_Yz4]Yѩ.4Kk'،Mhɨ4\0 8rx~r*x7*.\.jO v4VtҮe PGQ-ƦTgf L'0TAǕv,!72"hl{,})%p ƅ 2|1'';Ů ݍor#,ݛp*P\ekm] Ke+<|[piMope@!Nc⭼z - x>w kz]l8Qz\!9yj4p:fFb 5&H?J؂,lY?m(:ڧ]pY4q7o~m=~5l_8b(<}j0;bLJni&{Vns_{K^GkuS!0 Mi<5(;{ 5+:lQ7{6KYBV?3cIѫwׯ\ů~Y= Cr圈 2Cg6Һ3/r{hٲ?{u0 (A]m]kHm˥;&{*]T\C3--6D#mINk(^B@_ li+O'^ЊPۥ-D&Xa%j*](/UWuWwg8h}j4"cX* ,ڜIeAk $kKv&m#rj&s#'!;9w+a*ϣFD{|l 0b=pM)[£W!>A09`MeʨwJH^XCΜx _xQ+<*Zx eŴ(]s{ۅi;pANm`WO&Zpk`U#ѬPflWj{Vy;'Jߨ2|W7xndg-'~`azr jjsj_Z˦mfVmO=5-MnO/@Ch*5yX"Tw5^9F%{E)nmiwAGk{uRivvR**C45:#RTɊ:Z.{:wl\Rf<:|z\[wUp/xb:Kը`IZAL +nTV20 :- 㔉GEr,"^1p܇fhCւ8nUHb٥ N__Ao 05Hniva͍ \|lڜMf܃Ab6WjGRcțOkts VZL13X _܎^8j!H~g_6:b3'oKy[l5j aacy|r^"nTH&t hA ;xԵ '}8M`(=]Q_˷:heVn\=et*jKW|ki 8ЏDFč[\tkgi]?70k4D?sֵ̽UpĦ"]G>3.%, ՖRzB`b L孉->Y ~}]UFk  f}I ! &޹$z葁.MPlY)O`OoMy@! sj)oC_hC ZD ! B&?btEB8yO |=:+qkrZ<'~[\a’ª8O?4+#L-a_H֋'=s{qRT)!,X̎1SJ-@ s?7C+[ی'6FQYw9£r-˕;3Hk$PRqO߇5/COw.^4GRJl_J9xrfl}˃/B&s~:]Ŵԡ7VO"1qVk [{T]MGj5= ~zr3 Tw|y*1sgSe#~6E&̚=4$.1WǖGŗ_iA= ϭ+ᵆhl5̍Ϡ؋Cod$>_}ƖO"^WY{ÜB6|l˷A T;zfO o\F "5<m^);xuC9Uo}z{]#4,Ƕ5p?pmơW#igv_=]xǯ kqW%6O=_|N'17-*90:6۲ \8QtXIk6 -xbʓؚ3G`ݼcxrE`z>ig]u n]怂%%!D@:`SGDKФۤK$Nd'.im&W HȢ- s7ckPAǨ5x?yi8_l)"6#w+(Q#Ĉu7r_{3[k#$ϟw:f<99\U.Ԝ*ġRȈ )Bu~TQ ,dib/O b'w73Dyi)2py;ːMGsǢ],}"k }[_Kg? m]]?^M8.h]xlN +3L'D@"0nvX̝q89E)Qd2R`'1%anl<Œ7]uQS:&"4]ևig*~bN拨ЊMWʗC| .=~9Waxk΢Q&nf5>M\j8lXINgJ,Ū|sIopjϔ3A$)d֪息Y$2SWQ}"S+q&D͟/Ԕ =S` u8֘#-b 3WG5}Lw?Wq Ai1"+p8>lJ׈MMɗh02ްl.UJ#Ca\2<=1wzB"V AN&l|J2@F6\>Unig?+Twޅ(bgZ[S5HZ4E+08B%0 dT_DKo0R&">12D)g! -9{IIFԅBU(6 #gV.hӵ{;iIl&;B?wxنFy)WFp%E]lڜ$ PioC@j7*0ga udnӖn*"GnT"aĒ(oLLO 'NxF̭QHzzԐ;s*c2QT}i\ fEm"v꿰8 ͌מV44&( FD~r܃A0p쏐voHJ*HdO/GEUJr )#4>E YE:j)݇"7ߍo0^2fq[?&N,-b>qS}fE2#lGuc֊[WАȈLO7JJ%֬h>{;ٌGJV_Z:V2KTFNkGâ7JM;z) @AQY{o!<1(9uVH^լ `?hUEY0sU2Z{(@2>J `>Mi:.X14Z@ZG|sEḐkRO窏j\ ψ_rI1(upniy45-(KӅvRs!4YEw:ӳG}Wm?(rRF]0:;֔1 !eo}~pNI1R4q]JPu2ޚw"8z07k{xw]ouljA9hHPjcmT,֊ lE2'c 4vѽ39[isuĈɬ/Z^( I^r WBK h<()M֩,="2W ئSjg" #yl)ꭳ'4VY3(Ҷe_*%g8!+8. Jv&4Dl{y֞w?4]-!;8:$}@JY煻(qIjzWhgkFd>$xAX"WL^ mRu(j 6>XeZ2X.".NO]ex&MH(mTv}P B" i^(0?)}qc0uD9/bG~GM6aQC= 8̯oFhǗN#hDJhQ\rOՊ4~yݦMml?T3u.EXZlS%2G6u]u<(dTؕF k$v L|m֊FGl;mw}4+J0ƱtrF>VptH$7W!!N=zd /"E77'ȯx(η9FdY(n9 eTڤ iI`רJpMN-2GYD@" $h2<z~! CX(c]W9fx:FH1rG+sӣL]]}h%H$񁀔~TJ$D@"0B%D@"  $3.I)H$HLH%D@" Hg\t$R" H$" "(K$D@"0.Lϸ&ID@" H$#E@2=#EPH$D`\ qMHD@" HFdzF,/H$@@2=㢛$D@" AY^" H$qdzE7I"%D@" ))D@" H$nDJ$D@"0R$3ReyD@" H$H$D`HgD@"  $3.I)H$HLH%D@" *~S2PX .$%D@" IOr2[iu`n5rV=Q[V(H$x@`T){rM\:N֌{ŁcٰAHTc":ڽ2bF YD@" H$=Vz"fk^(:r.__ dM< ,u9FN%D@" #fzֿ)'(<E[סj"ᝏ(j~ӀbfWDݴ]FD̃_vu4աk 6,Sә}da gj qLӂ߆eISk1%p:ؤY\9Fp׭B`42EK̕'-JUHͶ5q2/e;Z~+6 /ޫH;.¦4J!{HwiT<FEڄ mTkZ~<&Lv\Ka^#:՘jF[ɘ6%OêZ,7=D#cS0_ǔiWU8D@" HA`dL8!FӲaDJUi1_dnEM_TeڍYHN (^rb)Fޤ#g 5+q KQ3edA >ţ7,iA\r;A׆o<C`zVn{ iBjퟌ.+ֶ4'OA%`8܍^#b뿥Sd|tv-q;ؼ@exbhfmiE+GGĹu7}>#+rRpDwv}Yo]u"D;׉CZ'LxL7ߊw4F84޴J<Ǽ;W$pkfFo5.~ zJw.o%;¦grX(&Y:m2Q7 Mk]5'ǁi LnԞ.Q"@.K[\8 5f:M2cj%|1n\_;q4_fFPo=CBX:^ z[K*fHj ^0'oߌptzn}v5XN B_O(!'/4Eo@zyQ GpZ>OmI (xZ?-]TaNO#?GWk(-)Va9Jٜnӣo2}xwYB]"y| x2޵p3<(}h[С.i+ϣ%QM4]s H#h+ mp\¹lT+`^\~e[Pԍ+K~H:N։7N|H1>DP*6J絎".=̵[q?k暣S[w>LIY,|$,'{Ii-Y&!) O_5\sHyk#DJndjDaÞCkq|hwpq!1!`BCryFiw>d| {ɊDh O L@_eـx =߼Dexq|}`<{nm HZEq2㶈e>>v |V?ni;Oq=ULy{t˿CVo!wb|{c#B O&+&[6@/G_xdnsk1ޡJ0<<.Z8^mh `80e,aYB[TK JHKWǫ-m6-Sh`'<.>֤6 nnfrf&.53jPר_S÷hjmzZQtu 8-yaϋiDcq ׮>[T\Uĵ̬\df=;1l+B!)>)w+~ֆ]^|_#Sg.Mn5?/] MFeqM#s@`<WJNam(ׯ.{V9i\P 81.|k"r/܄rV_>dʷP5a)B0KwJ%V_ð=dERw[ڍ%(<ЀK`+pգHF ݇sg#P#fow>KcP}J^U &J^QH_P?_V"!%QØdX n[(:R KEwXZX=Q_weH#i\B-e36੢u@HM`A!w}[քTfvDI85d?-G+z~2 4:Eʪ&`tp,j]3<%7 }T|q5jW4lT"u~%' )zf{!s lj^Y<♨n66+H6PNVUDZЄвGs+<͵v2Yk1(N{'pmS>4̳3yJ-$}WS^E#tM /r!Q"奟1 NAoޥ`q-2M1a:Y/c L-DYI Bį]RbZ | Oo@ ?287K`$FYj% c~4:MqCjÜ06/5)ojS`UW?g(l1!#tPqՍ:w:Xޥys,6?lߟCӔ8~>K_9j[e[`3R wf@>JĝLAw5 baeO܍g}94g5Ѹ}CD,_v͒܇,./,JY?oPjm*gj~$O l]r-ˊ~oRStE(s9{XNi.,6Lߊp O6:QuD׉rޜZ*܀ih#2³zp%z)ݚS^I5ސ}cxD'DFh;+b}2v*ϐBhܗw֒!_<->ͷ]%ت@V&UNN1mS}MCyjEL )1JM{%cy%n9:'n |7:hQ~isXL̓';.=KpD~u'2RlSQ.#ы2&7И%ȚUdr*bŘjd]s*X+M(v2u_5eUKmD mXZj^yk&$GNT3 40?QOvD`|# 1v€<5|%ɸ>=Ls ?<;a"@+T]c'ܑ$\*9# nw$oW""+-Qp_EpMxy ӾtIVdGyJvGFTqM:/ġ=NynXnXe[ m/>~χo!]R +-}r`4j-LOdZB ?:\rYOVm1 :nm/q'ny.m^ĎtXjaV Eڜ•sb m SL7](*%ADORŌ:=wݨܓO&*5хIߋ6螥'Qc<^SOE_/ꏾG`[PTebs GsA$wy[~XGbs$_B7[lF1j>:)z@>q1i׍>_SCO޷%}>Ϧ/Ϸϭ]JjNMDTza<KA)#Xo#E3&_ L],SO9f3\tǴ+Kr }o*#HJ-Kc&._G%?#Z߆.k(:P Ź$6Ź1a]}#Fq‹H2nw8[RèXĚ}Ne Ÿ 쳭s՞>< ڹK8ЇkH-+xkq6g\|钄CNI8s3N)y!7Ex@_4#"EY_QAk6OsxlτϣyVR Gi.Z\z(kV~uO@\ Ŝ'=15T"Du8Җf^y!m_ -FH6p_4 ľX}q;+e1!hs]m!=a-4?{߶!_׺\mGx=%=e-YF1P-'MaxO~u#%92G-蛛~h܊nj'v28?fy%& v6)#.ltmZ>xxEAMPLB }q,oKS̗\-PZ",~d|&r2P Sj2 ӗ0'h& Fq;Uc ^6l\I`xT{A&9 lnr7N&s3}9(\.|<_s짳[%ɦڮ5]%bf b$Sblb|{sJ~_u(mpu01g/#%3cOer8]EV ZE1~-voiN-E "b k[ź#* l™;Q-+~pC"b) VG1b[>PBASoެprl&0ɱ`nFL J}mi.NX0p`7,cVEe4Zsц!]n6 ~?L9KͶ@\9JSӶ`p3 ql +Jz| :b>E}]q_g;&6h:{2OSlP%M>5cZצ!ζ ܭx+?ǑR8^T+hb4 ,IWop(9ײ/:z$P| ba޺ܺ;z ;NSӮUKA 5G@j}yOHyD;}z-꓉8۠>43=wKvLIyKF}]5=v]MK\usGcQמWYq,Wg^\(*RXU=0TSFRN%HYdϻ˴>d[ZպPl~YF:rR>T~YYOGv`MppQ]p̈"mL*^M Wsr.ul՜.ފe0#݁+3\,\,6&eG֭[a'Ve܇?ߚbg"Lx&ed:>"Ӎӕ`c8B >HYXl/.J:>qpA\q}oazq%w+vq9vvī˸l,jʶ(u.7nY]rέpLGO ;Z|Z_*.K8<8vF+lⵯƲ+Tc'Q뫡4ЗڳO`V)JPfu.=}Q}(Eu n_TinL4i ,J.bY+Wl..S_lT=\s,c5 z,]%lߔL^=)uq=>cYmfZГYU .{p査M$hd|/Ӣ.ւk <οura[`A09(h$⍼J,$Y.2[Loo/$Ty$:R=o[J9~_ZGjF))*T/J\q}]j/^Gt P[](ԀU"wW+goxU%b\\n.hiryѠ6uHo֭B`O ץc*UP*OvuglDZ%X=&O;+הX@oޭaJYU܆hmMMġ[\TgP*H%7qS)!ST}bTx3"t'2C/LkT}*:T—ơ߮oVK:+67jqO[0ȄPg^#Ƅ)8͐89{. wٮ08W,U8 [n?waOǓC 3^y~D1I9r@*[`Ԓk:]dfNgk.'5SJ9fIOܷ߭඙Dܚ[ޞr4-=Ake֥ \]kqU18! dlel+R=nމKU٭Q6.kh1hK;/QsL ?Wp~T%siz:m@W$ C}а>+Ld%1s0چZ@&w]*=ZX~=dp0yrm_h8 4ŋ^Vi 4Ǐ=bڹo92=jxIq~||(0/+ xC&6u-|I4\Pі~(iRru:7CatG5M=י\fj!r͏wu*ŝ¾'Ѥp:llO/eK"uY n\钞~b 5\,(xٚ*,h>wht"]P#L. +>Pb71-okvyF۵>WI3!]uO;ќЃ>:k|0.5#A me_ ,Z%cc ǤX0ŸҞ]8B a iW_1vZƲZb"m.r.9F+sQ$Xö(]WP}o#axЦ?2&GyY{:0fܫ(ZYô0ڍd:  mږQ:)šQb9yD@}i{j<-#谛:չ m hdVHI0PO7Xp4M Pari>J[/*CL)}C?8jÞ0ЫL_8.=1弹C@bz 7Kn^G0҄T.ʼn8xCl]b3i~fM]XzR KG}DXlBy)lc-0|omCnEyuDyۻ8~K^%j(!uXWdIHN_J|E C mUCQm.s.z'5Jc7)9V͹VhkHa{'͞*K2u} MCv27\ۻjlV})]Ͽ4Qq4JȈKsE.cIgc,pު)|:u5򠿂[a[)W?DOmh .~<\l lZD@@2=E7ʇH$D!ǕD@" ܩHNyD@" Lq%D@"p"0 @ӏ"t肟G%ʐ=^b=bYHЌ*38)8rF}6آH$1 ă{J>Gڽ.Ė/޿620v16dφA<\utOZ "_Xxr~4&*_ab쁶c?6:+6fi~uQ;/N~O|g﹂S{G);=d o7jH}}\syB1~4HzL՘oE}{LQ:@Ÿbbo7ϥ5 $DC`:T%1,,Jfԍ na'đq: ,8oR ^GMW8:&L.k-LE%*Λ^|ee ~@|ɸt ^eFM=i8 D}'Q }cD&tu&ha%f T .J+3L1wa ֬ i%3 I+1GKNGga`T,ZE8s牋/QvKTT["~EBw2b:,D$-Ūx՟]l{{_sXel|.^ȼg2U" H4XuiK@[s L47A1m;̫):/w#0DXi|mgWݨ(*FEÈFhQZDc‰CW466#BcÝ%,5GpaCəc&RayUlH"3!@O)ư82<@sQeE3B^{be %9?Ci1=HGő;ɠv"9tZTɌg rN)50&},cB1ޑƯQT$$> 4'jZEcd9 w%$9kz? 02F49әN \i, ?f"Df}5a_Z>C/!W%އ`LZuu?`.3W({qlD5D@"0V%8M8-_aX>%RhH6_(Rf|z,BҢ4f2=fc6N6j0$^Ӟ۟-(ֈ{Ÿn0%(‡',.EnG{D Z!Sf!H$w</7 :nL7YO`ӕslɓeK34vO$U4=~FSFJqۯTEoHWU 9SWrD@" ܶ4EpOKZ\Cj p)D@" H$Q@3=+eߡ'(110!薓6 *$D@" +rb5{uPZdO՘o:/://=c +wi)>,n#T\BͩB*gأ,$D@"p{#p k+ǿ,^A! ֡,]>HZU .:ѬD?|NA9J=EJ>1[.z:cԷtϥ?B $@#s{Bij.g-$D@" S; H0FᡂOԯ_D"qHxcoz#K$D@"pG"p˙'j.ۺ0?ӣ:7_'4+=ٳ1qNsdb\x4djZ11d톌0ه"݂H$-gzں31 sO*>\ ZA%r ߞRC4BӭWnЛˏ4<ҳ&cD@" H$7czOd/SP@DBG7hlzY}[+GD@" H( sG.V" H$-d?lP" H$;ܑ.Z" H$w\>D@" HH$sGv|hD@" yVLO`<'EO8Dz(3H$DEg1nޜ_eƲ_cq^V5feywηS9-3+c\?t^ol!63 Kꯛȓ7D@" !s)b_jo0𝊗_~˫xW>H9u9 >6?4ϼXȍˌ0q:/|z:3`a2KG ~n$>ww$ϿDG?"Lm g<\%|jcj =hSjhlFskE|櫨wקg @mI{N` Okm% K.gVfc +jn KkG‡(d,&SCx5|[ofz76Yf)"/$D@"0$W!a9H3lIU8LsXv% (y蠴"889 Dkj:D Cəc&UJ3QODDa*L@0/8vblSIƯQTD A5~Eɏ*u2LA.ƚR<1 \qG*'PewԴ>$2Gi2P>g}E3~;UdH${m|ESf2(*>|}9}g>Aa0P vkTUZlH"N&O|EP,hمx~*FBiK !:Љ՗h<,ZajZ@Ca)/,[ӄ)Eh$-M&9UD@" LO#7~sVWP.V8"EUW[scgwu0w bL1u+krE&4 2$ fE^U|:D@" HFmaӃsߟJ"pGZLW1'ʖiE3LәHA~0;ǢWV 5D>UOuN-8G4L0u"6xzE\e?8 iBŅ+}7ҨvzA+u F"~<2Cyft©X"2`X|A&ڷtDE+ TeND@" 8n IGil޻Uu;61`y,N )`*ȋHII+jL1yzNX:毱 &!Ӂ*d%( {K-K~뱿o}3c-S w=86 lBR_/9\B)w|$<5 fXſSPji7cʀﺫ_q'3}?'>V;cNP'A@P$R_vH W3WS}" [mWI?#xUW+J"pb U4욷D B̎mL[ e.e*3٩ K vN8N/eӔYNJ̰bzQK7$:qג6ZM7da>sZ%a2X2ty;0;i##^ctn+f5&ղL18fX,3 -v'ʑJ-}qع-xĀ9 Ֆ[ӑA" ڳs1 ='RS]:TJxyՍ/v:Zz~%+Y[I3ޅ-`Iuz>*M2'qV?aO>ť#3Ft/豣 _f=ʦXDەO[*~+?RɋbRMQ1-Wb@=h0%+C+Cx}S6GKVs|r t5bVݠoB) 4qE8(54i.\Nu;;TP ;Z[qS<.yS݉hkw7|- Z^NDgX]B mܚ~kGK sR_Gy9-CK-Ib2ޯ,χuz°\o>W ?^R`Q<,/?GmG_ַ[|f ^GdPUx$MC"ehܵd(?qc#CCPIcV:vsVB)gA2d7w]y$5/s.2 ;Bv7|ǵk׆;suui+061ouؚcW 4 -+4tNh:CsAeB'wTKQmr]ǚu'bNDFOѴɏazl;K3P8ZiaRZOi*bUzUL{5ݨtvP곴k.lCM_%uuK&Q}hA`}_#qʚXt+^'Œ_Yz+S84Q3m}ʧ Ѐ"woWk U Ic򿄎ϰ4Wk2J +sx5jb=e[&9p"qmާ)p,mbTo~B1tlg OwF4Xy)! ~TdSZRI=X[/{_!pb>fͅcfW鲗,EIf=~Ak'pr5c/s,KG ^q%8g8C}wagik0|Ӻ֩hNS5'x~CY\}>i-N@́mߠƍoɡMEj$5[]BI1V$޻+ 8ax =IpzBy˹(>m_BH+׆ݟҞEZ3KԎ:8m'PE 6.muߣdPV6֮[D8ݕ5O9w&~RF[19 Jr u51oGÍhnѹZ`b:?>4\wvvˊ\VX{ΡEFn\$Yʺ6ا$`JE)gGw9XO*k퀵ʼh^j1&Od[>׭@^E'cݲx0GSo":)O{[W;vieHFqZu"sq,@il |+UhP<&`DW(eaDk6caVWdqRg*pE&Lj>=T?* 3ƙ1'И.T صRu-x"{*R[+ Z4[i Xp吝(vnuS{aIg{YfSշ"4&x^E޶7?Z5p;wRtQH&St>)ڜ?v{LYηQ6 S8y\KyJ6&l {moșX@P8Utb\ya4]Į[MIm k@ONL--P}VELYnb윓]lnW_ s-ıgT{.]at5ȍP:G+q>Fvm@W>[&_p'> 鳞%:w^1 ռU &PhWlie..&jSރM<OZ|}شPƳveϑG|.d8<1XkkʹA~+Yϫ r99?-;jogGͦmz;׷V `DŽwv}v5k[ Eu5塴GzPV b|BC}34Qk񩙞˔8r@c7w @a^G\ҧDqFj^{YHrT)Jˊl,Xby";MAr4 W8Ӯ& 2%ԕɘ 8kH3Yi/# ۔cb}̳5y6,a(͢pӮ@IAV,p2!LէK!AM QX8OýFž9?g|8x<#Vc?WFҶgn$ اY7Qdrs1o<_q.1ה1 *HNSq~H*)qITjF潥X҈;o4^ˉ3 <9I sj05x1#zS)toB,弦gksoZIz0uq1}=ஈɅHc$5RD:e=UWf<&O#G{]yP8AOT :5!bSұ`w&=n[ ^|%v$7 tR.$dR>8ϛ|鳼zj#@ssm`IZ3ҁ{AYQ7[[֭hE%p_^,^X^obR*TUU;d{:5xlm()*.?4;pMoVz_K$~Y$:s0E38 g,q+%k:}Iiˈ}dk?B/)8.&~B]fWX.b>'wu~/Sp633[Yw|bkA g[83@j\+~Ӣ~\?P1vEŇ#1 McG^sk#wJ2IGxpR ~3#&RL]AMS 5!3GW6iغkLh@_arA{)\YF̿k_Wb|AdzW18-u3g1g>¦;d{d\L^[eVڗǠ6;A٧ҩWxpl{#G !UdFv^rqc2%'Tc*9} ϸW=غ)obb:J=Lku%ۻd]|PO<՞a{Їwj{l/gv`[x|1MVdA(>ƛ_[3&fGh^?pślV*Id>9R8HlzU~stTSA@Ɔ?-A@A@@  0FDgErle-?ߞHzghU{&a/JiԑنT#0$CL~47െ$f΃z|O޶N0^h)fE@s"QD-7g,]D+?6S4 +h5R$Hx7;f>KWt&R4kB%w"F5TEr^1-G/:TU]w4^i\TCөx lq ?+N^*\r4Pۄ.-~)yEׇh4mLZbJܣiU)Z;idz&`eRi~/M}8bFQ:xi beUnp;;ʽ4`'ukæPW\G}JޗZ .3ux(Km… -kXf )oW B)CGsضt2Ӱky=0ayKEA'/G2KR$xT5mTu9ʘKߓ E DgeFjUyՍ/PxVЖeE1]$%!ɢE+$#zac9;P[..Y4xDz4[ı <ڕ;[*~+ Oavĺ||䡼cE18vGB=L0~Nji]*j'FCfQYfϯ.B %ڷM(.%PKc|w'.Z5mviZ[qӔ<=Awv>JїoQ8R aZ#mxEjA@dzB~kGK sR&_Gy9-lQӺ%I=B WVũM~]aaܡu\RqyX-cM%#TN(Z߆cn}=ꛕ*|j{9֯]#hrGb?4#GbY>/Mqcb$ #&}n?Yuj Y5_CKHr?.qq$k: `ܽk&8<}ugDG?kznhXik=.ep{gCx Mc83r{tOgL(ZzxuXY7yH>b5NO}Q͝c@ fVMiŰplGױ&lUmc!֭T@d4gե ֛g+W]u??OX2;MWF!6ޕkQTgD .laC7 Tў*r8f^\7(lv݊mCPsHmz(s췁~SRqvjW<&nG\{h'޻Y!qwS֭+BxzZRIBg|B=lڈKxtv9:x_ akK[ޜOMF-1>,-!Cg#$, 3w֋,Y#Q5W sgdȦ $zOAL>[ӟ9V5V{U\VQ"RtݟUxLrBM[Ei]B*v_e\,PSn|% W*"M9;;zC3j2.X}pUq n}4^\zgI < U|VD'gED]9ńSwb-cRuL7&;NiIko=F} Rή 8Iz : H 6էY)G иdq~k1~a7-PInsI<@BٔS}l;V[9+Gui&"?Ý)#b&;=j?C#$xzq.՜>-qᨽ|RSuz8Gj\Xav|rF7.ڑAMZھK4 6@Wt(l86:8:Fajf鈈/"k$nE o$֊Mn +^5ec<0I"n5ښ0+A+R2`S{*urm Ŭ q jzHKyࢶ`ʹDm!$P1(o1Ό9wz^-LSBZU`b Qَb'm9ܾ?r!a%^ sOӰ"z {\tEUm8E9#LApwGre[)WrQBy7ynGقdRCmJAM;oݪe}șXPm0#cR{Qcu7I\VC)x*n*w61ʛw܄D#ʥ~ns5CFID(.c3,mH7Hh.frjɏG\Zb?={NIdxĽFLFhjGddjdC2 v}g?ar9 1aG*^.Q튽rszԶhf(FnjUqי٤zajw iܜto0䞡39U7%kqKmğD)N_/Sc\abz>trc`_6-j*2'i[Q-U,ݓ;WmV iO#c=`2lC'6?{3eNA@=tvb Gpȯ$#:S wX=f*l Y+zZuס!uYM0'/1NjԟhNJ1^gD?y.ڑǘ7xvzHjMNy9=qH MyKP ]ۣ&eٍrq')<'#iy޼KR&q϶0YY eyTvpި5T`wR, uD;>{uӰKSY߯<-ILd/SYq!!ѭQDH?1 8:x3"@w.9AG<^ ".\dl8\c}J4sésYMi\df9 TUXYEUdI_륐] ߕIG;|Oɾn|KV?X]x18ʺs.ֽHll{O.Y=Aq@Ő=B[8&zrxdmRqP^Q%mBL'!z&pܬ)@]]iȑArTnPtT`ޖc}\V`{ArK-~ JӀ6xq8hyxnAj#4+@GvU#By݀WVYqyo^Tm0wԗoQP aIa<:V81C0,^&9^O8c"qvpv>P}(p 9Ⱥq&#E"}v(qMștq]#yKBr4DF0=qn!z|O(N-_`U,f`[%q{`E$4DhGk+'矐\xk "i p 0~DOq.n4EOd<2zDzƍZ6碆tn4GfLϣ=չ!GnUqwګ?;:\·Q@G֡xHC0BIsѰb]?fHtG!>u8u=n@6F=q^=9UGC555J rkfTH"A@ YGl|hWt/~\1 _UVI g?$w Πjﱣ)[% }G(`DkU9V79 C8{v3,8zOT㑵xsBeSi\Ĺ*rI5qen20d'}7/f6I46=1_d1XQ#/ -HITt?6` ֲU; t\z@ O,XƊh gb𻪹AEvo I\#ʱ)FZ%a15ICwzMCEH=Qkk=gYY[$ nc\|h!wxkYWM.OkNo}瘫1j`ߩϹml&t*~BE \ILWШZox6QڊlZtcsgPCʷky չ<*}{ vЧϩnT\͹տ0CneZEבi]^Q7fhOIFs#ϘY(+OBlӆoτ^ް=xxV Xw:X톿ÊR܅O "V(FYǒ9_e ܇7c-?<woŽSkހ҃oU|F$Aoccm4*c0Zk-"G.ZjnX3?:mj?׿mkr HJIGc;x`?5k5C!scV0Z3]]Do dBM Zw$#Wox GjDBTѿcI٭'o< ܲ/cܒ[_-Q-m'qE;Wd7:7(jȑ;V.FnZP -<.m9.N`9c"P.I9WI{[Q ^rn(i<g݃J :1sd {2:[ڎ[aGL7O5ʎ@Flg1;]ܝ`>y1ZpHuDSz=j߮^r1[^~mg1\ OPEcp1V*kA?qq?E﬈:JpNN mMWjU#Kk/qub31epJ"O pX.^ E?D…f zWwej5B i$ x\51a wHeV:w[3ĺ* O pӾkWcobO7CP {Fgℨ/:\n>r^xncDqQ(O}oD. [;{d皖t_;B>3(pw& %9{5Eb)97ʥo$j*psc?}(qwp HjYG;_!5#%otX8P]"G;` *FAfJkrkHI=hPxȑêUQ<[gд9ha,֐17,EܛKiS8>47<*sy+GqoxQ;~BVu(!T}\i;1( R4.Ρ{:3H,h7X %Oq'?η5AASc٩OJYsgxmi*dY(0c/ȵU?AZ{P;>9<Ҽ>/pƧr?Gn)f'NLGISC58cxv)~@5RA@LIT `JR.u"G@ /&N ^jB&Ϋ7/ -UG`췷B@A@s51u(-C(;InM`{C7Z~].7SA{xo8"HBA*L;72aIa<:VZH7)ReNF ms?]f㐪Pk_H!z⒐L=)셫I](=G$XUïJTȦYTy?8z|U!g!'7ðQ,w$4D1Sx(\O\HB7rhb= Y1-宧}z^mU`\>C56ydą jE,lz'uSQ෹85&ڒioNLM ȑkv?MG#Sgp5_#n`p8b3QV(c"Uo#>ύJDE ^Lػ6\Xw8Xzy_ɜ~9Oab sC$AqM]׎QϠ島H#5҆Ts?61HN.-qx?B?X֪r02f nr۰%bX.9H[H!m팕^{릕;i j7wtΞ CjmY<sG=e[)gy|NYv;Ͻ=H8{'Rd89eF𬁻6ݾ%tZCYCnC |6A, 7ia܈=֟OaMTttpj[Fep\bUι؈78|Y\=_aY_>}a(Iǐ$-UAF.eGKN|U;Ɂ1C)OaAULix'$xX]r]No[ Eq3œ#C V*4[fBcO 1׎}; EX:lXKK恪aF}QdŁibeIئ4u/;mQlE @)iXl9^{ kW>4Ssyc.:h\sY"2];B$xH[a=K[z4Xq!x2X\?-Xx(c&*cwTnd҇چIk0-g$QH/'+A|<$~LGhmmķ~42^C+tY. oUٶ?*4بumirۘnXZ#˙FT7ˣJ-X]= ".Y Ake( BtvB ylpy|A(g#O0k)b-$?:6GtTgUF+np 9i7e )巻׿mkr HJIGc;Wry2۰Cѥ/-<&.ڑ4v^j?n_7fk|NкㅆX[ierI@sV;mXoٻ9 4Ym>O[ iaR3n'i=Ӯu-6- [Fm8ޑظ0Ek[E_ [8؅OuX'ІrYڣ-3rUpSwkCkoʯ ޶AqQmgdipV{inTvօVcS.AFl f]O<(Jܤ4e%no8tʋ;K#-;~#؉ȅXuAFT~w^;xݣMsgO8wVDtNsV0+f bWwa4s (sftq YOn`ǡHmouwX_8fxi3,$Nȥ~C ;K!وweƈآ=ZǀLɘqm$aj;ӆ&M{^">~ $>K@燐4,Dd|xj> :NZt\sqGnd$N+߭'D:djwx@3|> %l1*RoxABh= _8 gz}%_ \dst%tcZ_A: )bJWx +cC/Xսx ` zp3:ì_ݚ|y1ԇ@#P/C{R!c?vh!b,fQΡ 5 cĕܷcI3IDAT3xĪș7X?㢢FUYU&qdMFL]/.rHלּc:__ᡎ#mԌŻen^86p,%dz`~,15?;ǤjDŧPn8cr=潉HSdX5*2 (Cr'?Q(Ɋ앯))$xxjzhף0 B {Œ{r6vvx" XVl+sY#[xZTkr[~{ ø.kɽq!A;C㱌cI}hE qyGYP7\(_!{IgAO'7ǢDb~/r''&1s fqO:q Fݯ`yw=q'lEPo?Be e5K[Q.. ˰7\RO ~kmro<[4]ۧZQ.;[ wa_Q۠.KFzt^yzQrF{3qpS~Wb0\vmAՒgk.H5μzkPoRhMrkΞpx|+`D5Hw~BOQ-l^12Gː۳zmϞ›7HuT7V1n7-L_7? -tU6 e>牳ʣݯa$ɼ`=ՇF'Kp-(zGL7nyݧ~w~!OZ Cf4?Y=u\k3z!⭊uUx;2fwl÷?cHݦ}̩w:+ҳ2N.Z~jZwsV8[nu|9ϏxutSki`9@ڽDyg&D^O/r-'9eviaѿ!¦sP=Lv#R(H9mLL{iB|s_xY}<8vhy߀b~Fuoo!`"2T;"xs4y:.;CK>5)+[;ux% 9l7 Ca*qT<ֹ#GJբC2[Vj3! k|%~h:" Gc3KSf2s}ihUkohޭnÓe5 I$BI>U9vP]u({[P,yG'x51|u)mms)*=G)RlkQ?,C GTK'!1fBHZ_'[PYxklGT5\n$ W]_s#GvxH`PJ!z5cPxlExUy5ُ)+r~1_C)ssP# sdG(6ik߉`e^Md jȫʮR^nuRTZuR:i*A mSB s{oXIWc? 9W  p5!kO:dp9+!}~9.$A@.AJA@k!z  =ԪvM3t-y-%X#A@A@`≞$dXpKy|KƧV  Lr&tuA?N7=hi]}wC-A@+2#$7 6K*XW5ӗ=mxoK`hrxtjvvV*cdrdL2e>3H-_`k7cEv%ae mߡfèl]ΑA@A`'NO 2 ie5xQ q4ݡۺ FF+)4\}ފr! U~rK k³"*@Y'~p~(ITd\Igw)EX:lXKN 59=KW`qV}QO6E}]-zRieu~ē@  |NT [RP? u:=cξO~K!y{}Xe# tBK%$Dy<|9Tb58{"Ҁjmr^>HYPS׀.GA@0C` &coX) C[ N\9P_v^aBJA %ģ/k.lr1K_P-e=ζʯ  3OkJMWP83qm/HN7z3> ݊{?YA@q9 B[OhU7|9 $m?8'[(e&HVwpYA@\VY8>DGGS>t;ݎƱJA@.54AA@RƬ*mT[A@ !z¬å @" DO[A@3   ኀ=nA@A ':\+ +OOOl#3Zv!c)V%>`ǔ)Cˤ]-uغ%be mߡf: ڙnA@A@ zh ":D"{d/E\a!2'v/6ddiΡ 8J[A@A@p!p3]$V3hn>G$|UʄE^[ymƖֿє9UHs+luRyA@FmI|wN;$x\=cǀC4|v@g/`bc'A@AGW:3PVe" ]>Y=hK\,?i)0Ey ]'O7hL+  \aG2_pR܆ը|#l)N~X E0ӯD >kjPڥŖ?  x"0 & ==+CE^A@A zuo9f'x! Wѣ)n[kQA@?LzvXG  E`aNA@wȳ  =+  !z&UwHeA@A`!ťȏwu$2e"3ݺ^TRn] J(@# D(;ߡ4qoЈeX`@W#6n!14/ Eq3,I3l˧Q_Ğö@ٕ' 7r5jDZX j"rdnWe[ղ:>ku4=@kigRTBkkӓw nE-`i \m*[w۔&HFo")4`:Zm( O{:h9^ePPp0 SPpx)3LePLd`)J:4>{'ii|&{eZkGrtyJgُޑ(L816H,`0H 4|dc+i`k ]h/ R=?pu' ʝ"]Gk >Euu}"4 \d JSm1[/0L.l(uՅ>Ln>Z 4eZ3чr1t;: 3Ӏv 0A'R8u/;"a߇G]gJա7/*\>R#N^‘y7a5]`-jjKa+Qw GU7C_1-,'JPme2At*.bgXJ*46ɾ52P`Ũ*ye4d;:}$,mn%g)yaqR>OKS F^9}DzO9@X+_91ZԀr^=ỏ;#.Ǔ~56LLl0&#! 5ͨv F,ЙnGu%>~]=i(@ zŌ쪕ʹ]PIQ%ʰ䭼al|(@bAz?~Y=xd>ǮJ<{1pGL_F_ƷclC\.Ztҏ.; ?cK}D`?ovVz+N~͈=cV+V"(T9M*Yھ6ku oXO}rU $Vh(Q'F][$E%]J-eQ,Li#(@ P891Df= >F-#%ܙQ0L_Mm6v904p]۲T퇣 v$Kk P6dĤ',r<7zyr'"/xDl!D (@X瞞8^ O`c~C\)=L/ <ʹk^3Fal֭@Rx;7o=3g(@ @^^D^ȢZ!vZqY)Lao+RVttwWhj)SHoOK1 PDcB.#i1d eeZit Q(.zai#::)] _"2&&(@M(PA w+!%aDreMdMנD3Ѿn=>w(}qx%x'\?&QV!A'`7&ğ经ѡ{ȦϲdJQ_ c/fn+pPpTW@]so4ܕ(@$szta2ㅀ'Brᅤt[ l!DӐԢ׿y P(N׹qtK=*K¡͍c9B}aڍ.ج[%Rx;7oy/P(@uJHYR<WXrZ #s ^&^x<4`އGP-]aV^؎S?O6DLަz,.ҾHpI>rSp5ԎTKi`${c' )rU.A8< g Mʢdp,GepKQR:DoTC 5}˟dߊd<;!A!F"I#8ֶSi"g>%}=U8G,KaZ(_%IɾJ }R=[%jDB"ⴭFtx*Ӧp7vW#Q؝V8!w`ȷ6PJ!װNL`KNX :?t.2,ѢFtlձc3LIУ|!Ak %ڃ$Dzpi`vaJ]jωw9%f Rb  0|`~)َsd/c0{Di %#so.y4u8aV{,>= y]*wM<)3'\w2;-('[tfu+2lAGʣrzp-4JoO 5/b0$EF~@uzh/AϏyD}Bkyg~BNz$hKVzۛ3<[^ ou59[L3nQ?iLp қ/(@ P`5:Q.'#+ه]pybx#sg szЛvDk_*pcAk7A9Av6ttwWm:9$CIJy' WuۑAHzV$ʱ冱'}߇c#ٷ~1g$=>دj9q E>v0Ent9#7Eq6P'a5Iu<}Kzb7V 8$$2^{6V\ ysvVr:k}'R/WqQT=E(@ g(@ P` 0|-(@ P` m84NT(@ P+ lmViɴXaq-(@ PP >=*4{\p2}dQй%Dܸ? _6v~:K)R)Fd(,DS`oʝ(@ PG0fJ¨OR2F$mD'%%O"i5 EH%R%KE|,H PXGYğ经ѡ{XJ~??*Ɨ)t9-8f.gn+ 8ՠj?U{>Put/)@ P|>GoH<&cQL?^^y<ӑB2"U" H|z=^IB P@sOOg/R넧~l1?p!~sʔScwk^T5CEIv 6VT )sN<ĝc"ͬ)@ PE!-_{#/zdQJ\u;`އz F8L[銭8Zw"'8(5 /(@ P-PpAaFkvY //F0?GYG,R(@ P =(@ PQ(& zքP(Pvhg;mkb=#Im(@"=:'r4Ν:64Qc.qML Rպv˚TJ(@ P${=QYIy&9ͩĢkR)+(@ P` lvα)'eiJVCۙqΓNq_M6ln mD4$i&27"ՠ4eD^} 7.|'>]5{O!0 4 U ė( =CJo=O*oYҁ)JOPsu:'f+zWz~,^MFY%k)wmC#@c 7R_β {Pm*{]ek#(@ PO`sz*a'i{b2} NvNʕS)$! _WSF]R))]X[>H2Ƭ>٢3/GR^zpz'N,Ʈyqm!-=dHn7FE|A Pz# "MHjU%A4fzy|?1:t!WǕ'@ݰ 'oۣ 2Ir7ڝӬ4)|w! S2#})wx\A|& (@ P%&AO~b{қ#ITN3n(\3 t@h=puĘIϷw>~֣˻X e2;n#8#S޼R t}j +(@M+ֳ73<24vY e%Eg+~E {2oHZ:IW쭩©s ^(@"xAIT(G4ڎ2(9h;?(@oN?|Yjeb4Bv=:(@ P0W(@`S(@ PA(@ 8l$(@ P z(@ P(if#)@ P`(@ PE!(N3I P~(@ P( =EqH P;@ P@Q0)FR( PBAOQf6(@=P(P z4(@ 0w(@`S(@ PA(@ 8l$(@ P z(@ P(if#)@ P`(@ PE!(N3I P~[IENDB`markup.ml-1.0.3/docs/style.css000066400000000000000000000064401421357706400162400ustar00rootroot00000000000000/* Syntax elements. */ .keyword { font-weight: bold; } .superscript { font-size: 4; } .subscript { font-size: 4; } .string { color: gray; } .comment { color: #0090E4; font-style: oblique; } .constructor { color: #114CA5; } /* Formatting. */ .warning { margin-right: 1ex; font-weight: bold; } .info { margin-left: 3em; margin-right: 3em; } .param_info { margin-top: 4px; margin-left: 3em; margin-right: 3em; } .info pre, pre.codepre { background-color: #f4f4f4; padding: 0.75em 1em; } .info .code { background-color: #eee; padding: 0 3px; } .info pre .code { background-color: transparent; padding: 0; } .typetable { border-style: hidden; } .paramstable { border-style: hidden; padding: 5pt; } td.typefieldcomment { font-size: smaller; } div.sig_block { margin-left: 2em; } table { font: inherit; border-spacing: 0; } td { padding: 0; } td.module { font-family: monospace; } /* General. */ body { font-size: 14px; font-family: "Helvetica Neue", Helvetica, sans-serif; line-height: 1.5; padding-top: 1.5em; padding-bottom: 1.5em; margin: 0; margin-left: 20em; max-width: 50em; background-color: #fefefe; } .toc { position: fixed; top: 3em; left: 1em; } .toc p { display: none; } .top-links { float: right; margin-top: 1.5em; margin-right: 3em; } .top-links a { margin-right: 1em; } p { margin-top: 1.5em; margin-bottom: 1.5em; } ul { margin-bottom: 1.5em; } p + ul { margin-top: -1.5em; } .asynchronous pre + pre { margin-top: 1.5em; } @media (max-width:980px) { body { margin-left: 1em; } .toc { position: static; } .toc p { display: block; margin-top: 3em; font-weight: bold; } .toc .hide-narrow { display: none; } .toc .links br:nth-of-type(1), .toc .links br:nth-of-type(2) { display: none; } .toc .links { columns: 15em 2; -webkit-columns: 15em 2; -moz-columns: 15em 2; } } .footer { margin-top: 9em; opacity: 0.3; } /* Sections. */ h2 { font-size: 150%; margin-top: 3em; margin-bottom: 1em; line-height: 1; } h3 { font-size: 100%; margin-top: 1.5em; margin-bottom: 1.5em; } body > pre:first-of-type { margin-top: 1.5em; } .info { margin-bottom: 1.5em; } .info.top { margin-bottom: 6em; } table .info { margin-bottom: 0; } .multiline-member { margin-top: 1.5em; } .end-of-section-text { margin-bottom: 3em; } /* Links. */ a { color: #416DFF; text-decoration: none; } a:hover { background-color: #ddd; text-decoration: underline; } a .code { color: #416DFF; } a.github { font-weight: bold; } h2:target, a:target { background-color: yellow; } h2[id] > a, a[id] { color: inherit; outline: none; } h2[id] > a:hover, a[id]:hover { background-color: yellow; text-decoration: none; } /* Code. */ pre { margin: 0; overflow: auto; } pre.codepre br { display: none; } /* Abuse of CSS for semantic structure. */ body.index .top ul, body.index .top pre:first-of-type, body.index .top pre:nth-of-type(2) + p, body.index .top table + p { margin-bottom: 3em; } markup.ml-1.0.3/dune-project000066400000000000000000000000201421357706400157440ustar00rootroot00000000000000(lang dune 2.7) markup.ml-1.0.3/markup-lwt.opam000066400000000000000000000010261421357706400164120ustar00rootroot00000000000000opam-version: "2.0" synopsis: "Adapter between Markup.ml and Lwt" license: "MIT" homepage: "https://github.com/aantron/markup.ml" doc: "http://aantron.github.io/markup.ml" bug-reports: "https://github.com/aantron/markup.ml/issues" authors: "Anton Bachin " maintainer: "Anton Bachin " dev-repo: "git+https://github.com/aantron/markup.ml.git" depends: [ "base-unix" "dune" {>= "2.7.0"} "lwt" "markup" "ocaml" {>= "4.03.0"} ] build: [ ["dune" "build" "-p" name "-j" jobs] ] markup.ml-1.0.3/markup.opam000066400000000000000000000033421421357706400156110ustar00rootroot00000000000000opam-version: "2.0" synopsis: "Error-recovering functional HTML5 and XML parsers and writers" license: "MIT" homepage: "https://github.com/aantron/markup.ml" doc: "http://aantron.github.io/markup.ml" bug-reports: "https://github.com/aantron/markup.ml/issues" authors: "Anton Bachin " maintainer: "Anton Bachin " dev-repo: "git+https://github.com/aantron/markup.ml.git" depends: [ "dune" {>= "2.7.0"} "ocaml" {>= "4.03.0"} "uchar" "uutf" {>= "1.0.0"} "bisect_ppx" {dev & >= "2.5.0"} "ounit2" {dev} ] # Markup.ml implicitly requires OCaml 4.02.3, as this is a contraint of Dune. build: [ ["dune" "build" "-p" name "-j" jobs] ] description: """ Markup.ml provides an HTML parser and an XML parser. The parsers are wrapped in a simple interface: they are functions that transform byte streams to parsing signal streams. Streams can be manipulated in various ways, such as processing by fold, filter, and map, assembly into DOM tree structures, or serialization back to HTML or XML. Both parsers are based on their respective standards. The HTML parser, in particular, is based on the state machines defined in HTML5. The parsers are error-recovering by default, and accept fragments. This makes it very easy to get a best-effort parse of some input. The parsers can, however, be easily configured to be strict, and to accept only full documents. Apart from this, the parsers are streaming (do not build up a document in memory), non-blocking (can be used with threading libraries), lazy (do not consume input unless the signal stream is being read), and process the input in a single pass. They automatically detect the character encoding of the input stream, and convert everything to UTF-8.""" markup.ml-1.0.3/src/000077500000000000000000000000001421357706400142215ustar00rootroot00000000000000markup.ml-1.0.3/src/common.ml000066400000000000000000000157371421357706400160600ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) type 'a cont = 'a -> unit type 'a cps = exn cont -> 'a cont -> unit type location = int * int let compare_locations (line, column) (line', column') = match line - line' with | 0 -> column - column' | order -> order type name = string * string let xml_ns = "http://www.w3.org/XML/1998/namespace" let xmlns_ns = "http://www.w3.org/2000/xmlns/" let xlink_ns = "http://www.w3.org/1999/xlink" let html_ns = "http://www.w3.org/1999/xhtml" let svg_ns = "http://www.w3.org/2000/svg" let mathml_ns = "http://www.w3.org/1998/Math/MathML" module Token_tag = struct type t = {name : string; attributes : (string * string) list; self_closing : bool} end type xml_declaration = {version : string; encoding : string option; standalone : bool option} type doctype = {doctype_name : string option; public_identifier : string option; system_identifier : string option; raw_text : string option; force_quirks : bool} type signal = [ `Start_element of name * (name * string) list | `End_element | `Text of string list | `Xml of xml_declaration | `Doctype of doctype | `PI of string * string | `Comment of string ] type general_token = [ `Xml of xml_declaration | `Doctype of doctype | `Start of Token_tag.t | `End of Token_tag.t | `Chars of string list | `Char of int | `PI of string * string | `Comment of string | `EOF ] let u_rep = Uchar.to_int Uutf.u_rep let add_utf_8 buffer c = Uutf.Buffer.add_utf_8 buffer (Uchar.unsafe_of_int c) let format_char = Printf.sprintf "U+%04X" (* Type constraints are necessary to avoid polymorphic comparison, which would greatly reduce performance: https://github.com/aantron/markup.ml/pull/15. *) let is_in_range (lower : int) (upper : int) c = c >= lower && c <= upper (* HTML 8.2.2.5. *) let is_control_character = function | 0x000B -> true | c when is_in_range 0x0001 0x0008 c -> true | c when is_in_range 0x000E 0x001F c -> true | c when is_in_range 0x007F 0x009F c -> true | _ -> false (* HTML 8.2.2.5. *) let is_non_character = function | c when is_in_range 0xFDD0 0xFDEF c -> true | c when (c land 0xFFFF = 0xFFFF) || (c land 0xFFFF = 0xFFFE) -> true | _ -> false let is_digit = is_in_range 0x0030 0x0039 let is_hex_digit = function | c when is_digit c -> true | c when is_in_range 0x0041 0x0046 c -> true | c when is_in_range 0x0061 0x0066 c -> true | _ -> false let is_scalar = function | c when (c >= 0x10FFFF) || ((c >= 0xD800) && (c <= 0xDFFF)) -> false | _ -> true let is_uppercase = is_in_range 0x0041 0x005A let is_lowercase = is_in_range 0x0061 0x007A let is_alphabetic = function | c when is_uppercase c -> true | c when is_lowercase c -> true | _ -> false let is_alphanumeric = function | c when is_alphabetic c -> true | c when is_digit c -> true | _ -> false let is_whitespace c = c = 0x0020 || c = 0x000A || c = 0x0009 || c = 0x000D let is_whitespace_only s = try s |> String.iter (fun c -> if is_whitespace (int_of_char c) then () else raise Exit); true with Exit -> false let to_lowercase = function | c when is_uppercase c -> c + 0x20 | c -> c let is_printable = is_in_range 0x0020 0x007E let char c = if is_printable c then begin let buffer = Buffer.create 4 in add_utf_8 buffer c; Buffer.contents buffer end else format_char c let is_valid_html_char c = not (is_control_character c || is_non_character c) let is_valid_xml_char c = is_in_range 0x0020 0xD7FF c || c = 0x0009 || c = 0x000A || c = 0x000D || is_in_range 0xE000 0xFFFD c || is_in_range 0x10000 0x10FFFF c let signal_to_string = function | `Comment s -> Printf.sprintf "" s | `Doctype d -> let text = match d.doctype_name with | None -> begin match d.raw_text with | None -> "" | Some s -> " " ^ s end | Some name -> match d.public_identifier, d.system_identifier with | None, None -> " " ^ name | Some p, None -> Printf.sprintf " %s PUBLIC \"%s\"" name p | None, Some s -> Printf.sprintf " %s SYSTEM \"%s\"" name s | Some p, Some s -> Printf.sprintf " %s PUBLIC \"%s\" \"%s\"" name p s in Printf.sprintf "" text | `Start_element (name, attributes) -> let name_to_string = function | "", local_name -> local_name | ns, local_name -> ns ^ ":" ^ local_name in let attributes = attributes |> List.map (fun (name, value) -> Printf.sprintf " %s=\"%s\"" (name_to_string name) value) |> String.concat "" in Printf.sprintf "<%s%s>" (name_to_string name) attributes | `End_element -> "" | `Text ss -> String.concat "" ss | `Xml x -> let s = Printf.sprintf "" x.version in let s = match x.encoding with | None -> s | Some encoding -> Printf.sprintf "%s encoding=\"%s\"" s encoding in let s = match x.standalone with | None -> s | Some standalone -> Printf.sprintf "%s standalone=\"%s\"" s (if standalone then "yes" else "no") in s ^ "?>" | `PI (target, s) -> Printf.sprintf "" target s let token_to_string = function | `Xml x -> signal_to_string (`Xml x) | `Doctype d -> signal_to_string (`Doctype d) | `Start t -> let name = "", t.Token_tag.name in let attributes = t.Token_tag.attributes |> List.map (fun (n, v) -> ("", n), v) in let s = signal_to_string (`Start_element (name, attributes)) in if not t.Token_tag.self_closing then s else (String.sub s 0 (String.length s - 1)) ^ "/>" | `End t -> Printf.sprintf "" t.Token_tag.name | `Chars ss -> String.concat "" ss | `Char i -> char i | `PI v -> signal_to_string (`PI v) | `Comment s -> signal_to_string (`Comment s) | `EOF -> "EOF" let whitespace_chars = " \t\n\r" let whitespace_prefix_length s = let rec loop index = if index = String.length s then index else if String.contains whitespace_chars s.[index] then loop (index + 1) else index in loop 0 let whitespace_suffix_length s = let rec loop rindex = if rindex = String.length s then rindex else if String.contains whitespace_chars s.[String.length s - rindex - 1] then loop (rindex + 1) else rindex in loop 0 let trim_string_left s = let prefix_length = whitespace_prefix_length s in String.sub s prefix_length (String.length s - prefix_length) let trim_string_right s = let suffix_length = whitespace_suffix_length s in String.sub s 0 (String.length s - suffix_length) (* String.trim not available for OCaml < 4.00. *) let trim_string s = s |> trim_string_left |> trim_string_right (* Specialization of List.mem at string list, to avoid polymorphic comparison. *) let list_mem_string (s : string) l = List.exists (fun s' -> s' = s) l markup.ml-1.0.3/src/detect.ml000066400000000000000000000406521421357706400160320ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common open Kstream open Encoding let name_to_encoding = function | "utf-8" -> Some utf_8 | "utf-16be" -> Some utf_16be | "utf-16le" -> Some utf_16le | "iso-8859-1" -> Some iso_8859_1 | "iso-8859-15" -> Some iso_8859_15 | "us-ascii" -> Some us_ascii | "windows-1251" -> Some windows_1251 | "windows-1252" -> Some windows_1252 | "ucs-4be" -> Some ucs_4be | "ucs-4le" -> Some ucs_4le | _ -> None (* 8.2.2.2. *) let guess_from_bom_html source throw k = peek_n 3 source throw (function | '\xFE'::'\xFF'::_ -> k (Some "utf-16be") | '\xFF'::'\xFE'::_ -> k (Some "utf-16le") | ['\xEF'; '\xBB'; '\xBF'] -> k (Some "utf-8") | _ -> k None) (* Appendix F.1. *) let guess_from_bom_xml source throw k = peek_n 4 source throw (function | ['\x00'; '\x00'; '\xFE'; '\xFF'] -> k (Some "ucs-4be") | ['\xFF'; '\xFE'; '\x00'; '\x00'] -> k (Some "ucs-4le") | ['\x00'; '\x00'; '\xFF'; '\xFE'] -> k (Some "ucs-4be-transposed") | ['\xFE'; '\xFF'; '\x00'; '\x00'] -> k (Some "ucs-4le-transposed") | '\xFE'::'\xFF'::_ -> k (Some "utf-16be") | '\xFF'::'\xFE'::_ -> k (Some "utf-16le") | '\xEF'::'\xBB'::'\xBF'::_ -> k (Some "utf-8") | _ -> k None) (* Appendix F.1. *) let guess_family_xml source throw k = peek_n 4 source throw (function | ['\x00'; '\x00'; '\x00'; '\x3C'] -> k (Some "ucs-4be") | ['\x3C'; '\x00'; '\x00'; '\x00'] -> k (Some "ucs-4le") | ['\x00'; '\x00'; '\x3C'; '\x00'] -> k (Some "ucs-4be-transposed") | ['\x00'; '\x3C'; '\x00'; '\x00'] -> k (Some "ucs-4le-transposed") | ['\x00'; '\x3C'; '\x00'; '\x3F'] -> k (Some "utf-16be") | ['\x3C'; '\x00'; '\x3F'; '\x00'] -> k (Some "utf-16le") | ['\x3C'; '\x3F'; '\x78'; '\x6D'] -> k (Some "utf-8") | ['\x4C'; '\x6F'; '\xA7'; '\x94'] -> k (Some "ebcdic") | _ -> k None) (* 5.2 in the Encoding Candidate Recommendation. *) let normalize_name for_html s = match String.lowercase_ascii (trim_string s) with | "unicode-1-1-utf-8" | "utf-8" | "utf8" -> "utf-8" | "866" | "cp866" | "csibm866" | "ibm866" -> "ibm866" | "csisolatin2" | "iso-8859-2" | "iso-ir-101" | "iso8859-2" | "iso88592" | "iso_8859-2" | "iso_8859-2:1987" | "l2" | "latin2" -> "iso-8859-2" | "csisolatin3" | "iso-8859-3" | "iso-ir-109" | "iso8859-3" | "iso88593" | "iso_8859-3" | "iso_8859-3:1988" | "l3" | "latin3" -> "iso-8859-3" | "csisolatin4" | "iso-8859-4" | "iso-ir-110" | "iso8859-4" | "iso88594" | "iso_8859-4" | "iso_8859-4:1988" | "l4" | "latin4" -> "iso-8859-4" | "csisolatincyrillic" | "cyrillic" | "iso-8859-5" | "iso-ir-144" | "iso8859-5" | "iso88595" | "iso_8859-5" | "iso_8859-5:1988" -> "iso-8859-5" | "arabic" | "asmo-708" | "csiso88596e" | "csiso88596i" | "csisolatinarabic" | "ecma-114" | "iso-8859-6" | "iso-8859-6-e" | "iso-8859-6-i" | "iso-ir-127" | "iso8859-6" | "iso88596" | "iso_8859-6" | "iso_8859-6:1987" -> "iso-8859-6" | "csisolatingreek" | "ecma-118" | "elot_928" | "greek" | "greek8" | "iso-8859-7" | "iso-ir-126" | "iso8859-7" | "iso88597" | "iso_8859-7" | "iso_8859-7:1987" | "sun_eu_greek" -> "iso-8859-7" | "csiso88598e" | "csisolatinhebrew" | "hebrew" | "iso-8859-8" | "iso-8859-8-e" | "iso-ir-138" | "iso8859-8" | "iso88598" | "iso_8859-8" | "iso_8859-8:1988" | "visual" -> "iso-8859-8" | "csiso88598i" | "iso-8859-8-i" | "logical" -> "iso-8859-8-i" | "csisolatin6" | "iso-8859-10" | "iso-ir-157" | "iso8859-10" | "iso885910" | "l6" | "latin6" -> "iso-8859-10" | "iso-8859-13" | "iso8859-13" | "iso885913" -> "iso-8859-13" | "iso-8859-14" | "iso8859-14" | "iso885914" -> "iso-8859-14" | "csisolatin9" | "iso-8859-15" | "iso8859-15" | "iso885915" | "iso_8859-15" | "l9" -> "iso-8859-15" | "iso-8859-16" -> "iso-8859-16" | "cskoi8r" | "koi" | "koi8" | "koi8-r" | "koi8_r" -> "koi8-r" | "koi8-ru" | "koi8-u" -> "koi8-u" | "csmacintosh" | "mac" | "macintosh" | "x-mac-roman" -> "macintosh" | "dos-874" | "iso-8859-11" | "iso8859-11" | "iso885911" | "tis-620" | "windows-874" -> "windows-874" | "cp1250" | "windows-1250" | "x-cp1250" -> "windows-1250" | "cp1251" | "windows-1251" | "x-cp1251" -> "windows-1251" | "ansi_x3.4-1968" | "ascii" | "us-ascii" -> if for_html then "windows-1252" else "us-ascii" | "cp819" | "csisolatin1" | "ibm819" | "iso-8859-1" | "iso-ir-100" | "iso8859-1" | "iso88591" | "iso_8859-1" | "iso_8859-1:1987" | "l1" | "latin1" -> if for_html then "windows-1252" else "iso-8859-1" | "cp1252" | "windows-1252" | "x-cp1252" -> "windows-1252" | "cp1253" | "windows-1253" | "x-cp1253" -> "windows-1253" | "cp1254" | "csisolatin5" | "iso-8859-9" | "iso-ir-148" | "iso8859-9" | "iso88599" | "iso_8859-9" | "iso_8859-9:1989" | "l5" | "latin5" | "windows-1254" | "x-cp1254" -> "windows-1254" | "cp1255" | "windows-1255" | "x-cp1255" -> "windows-1255" | "cp1256" | "windows-1256" | "x-cp1256" -> "windows-1256" | "cp1257" | "windows-1257" | "x-cp1257" -> "windows-1257" | "cp1258" | "windows-1258" | "x-cp1258" -> "windows-1258" | "x-mac-cyrillic" | "x-mac-ukrainian" -> "x-mac-cyrillic" | "chinese" | "csgb2312" | "csiso58gb231280" | "gb2312" | "gb_2312" | "gb_2312-80" | "gbk" | "iso-ir-58" | "x-gbk" -> "gbk" | "gb18030" -> "gb18030" | "big5" | "big5-hkscs" | "cn-big5" | "csbig5" | "x-x-big5" -> "big5" | "cseucpkdfmtjapanese" | "euc-jp" | "x-euc-jp" -> "euc-jp" | "csiso2022jp" | "iso-2022-jp" -> "iso-2022-jp" | "csshiftjis" | "ms932" | "ms_kanji" | "shift-jis" | "shift_jis" | "sjis" | "windows-31j" | "x-sjis" -> "shift_jis" | "cseuckr" | "csksc56011987" | "euc-kr" | "iso-ir-149" | "korean" | "ks_c_5601-1987" | "ks_c_5601-1989" | "ksc5601" | "ksc_5601" | "windows-949" -> "euc-kr" | "csiso2022kr" | "hz-gb-2312" | "iso-2022-cn" | "iso-2022-cn-ext" | "iso-2022-kr" -> "replacement" | "utf-16be" -> "utf-16be" | "utf-16" | "utf-16le" -> "utf-16le" | "x-user-defined" -> "x-user-defined" | s -> s (* 8.2.2.2. *) let meta_tag_prescan = let is_uppercase c = c >= 'A' && c <= 'Z' in let is_lowercase c = c >= 'a' && c <= 'z' in let is_letter c = is_uppercase c || is_lowercase c in let is_whitespace c = String.contains "\t\n\r\x0C " c in let rec skip_whitespace source throw k = next source throw k (function | c when is_whitespace c -> skip_whitespace source throw k | c -> push source c; k ()) in let read_quoted_value quote source throw k = let buffer = Buffer.create 32 in let rec iterate () = next source throw (fun () -> k "") (function | c when c = quote -> k (Buffer.contents buffer) | c -> add_utf_8 buffer (Char.code (Char.lowercase_ascii c)); iterate ()) in iterate () in let read_unquoted_value terminator source throw k = let buffer = Buffer.create 32 in let rec iterate () = next source throw (fun () -> k (Buffer.contents buffer)) (function | c when is_whitespace c || c = terminator -> push source c; k (Buffer.contents buffer) | c -> add_utf_8 buffer (Char.code (Char.lowercase_ascii c)); iterate ()) in iterate () in (* 2.6.5. *) let extract_encoding source throw k = let rec scan () = next source throw (fun () -> k None) begin function | 'c' -> next_n 6 source throw begin fun l -> match List.map Char.lowercase_ascii l with | ['h'; 'a'; 'r'; 's'; 'e'; 't'] -> skip_whitespace source throw (fun () -> next source throw (fun () -> k None) begin function | '=' -> skip_whitespace source throw (fun () -> next source throw (fun () -> k None) (fun c -> let continue_with = match c with | '"' | '\'' as c -> read_quoted_value c source throw | _ -> push source c; read_unquoted_value ';' source throw in continue_with (function | "" -> k None | s -> k (Some s)))) | c -> push source c; scan () end) | _ -> scan () end | _ -> scan () end in scan () in let everything = fun _ k -> k true in fun ?(supported = everything) ?(limit = 1024) source throw k -> let source, restore = checkpoint source in let finish result = restore (); k result in let source = let count = ref 0 in (fun throw empty k -> if !count >= limit then empty () else next source throw empty (fun c -> count := !count + 1; k c)) |> make in let get_attribute k' = let rec skip_leading k = next source throw (fun () -> k' None) (function | c when is_whitespace c || c = '/' -> skip_leading k | c -> push source c; k ()) in let read_name k = let buffer = Buffer.create 32 in let rec iterate () = next_option source throw begin function | Some ('=' as c) when Buffer.length buffer > 0 -> push source c; k (Buffer.contents buffer) | Some '/' | Some '>' | None as c -> push_option source c; if Buffer.length buffer = 0 then k' None else k' (Some (Buffer.contents buffer, "")) | Some c when is_whitespace c -> k (Buffer.contents buffer) | Some c -> add_utf_8 buffer (Char.code (Char.lowercase_ascii c)); iterate () end in iterate () in skip_leading (fun () -> read_name (fun name -> skip_whitespace source throw (fun () -> next_option source throw begin function | Some '=' -> skip_whitespace source throw (fun () -> next_option source throw (fun maybe_c -> let continue_with = match maybe_c with | Some ('\'' | '"' as c) -> read_quoted_value c source throw | Some c -> push source c; read_unquoted_value '>' source throw | None -> read_unquoted_value '>' source throw in continue_with (fun value -> k' (Some (name, value))))) | c -> push_option source c; k' (Some (name, "")) end))) in let read_attributes k = let rec iterate names got_pragma need_pragma charset = get_attribute begin function | None -> k got_pragma need_pragma charset | Some (name, value) -> if list_mem_string name names then iterate names got_pragma need_pragma charset else let names = name::names in match name with | "http-equiv" -> if value = "content-type" then iterate names true need_pragma charset else iterate names got_pragma need_pragma charset | "content" -> if charset <> None then iterate names got_pragma need_pragma charset else extract_encoding (Stream_io.string value) throw begin function | None -> iterate names got_pragma need_pragma charset | Some encoding -> iterate names got_pragma (Some true) (Some encoding) end | "charset" -> if value = "" then iterate names got_pragma need_pragma charset else iterate names got_pragma (Some false) (Some value) | _ -> iterate names got_pragma need_pragma charset end in iterate [] false None None in let process_attributes got_pragma need_pragma charset k = match need_pragma with | None -> k None | Some need_pragma -> if need_pragma && (not got_pragma) then k None else match charset with | None -> k None | Some charset -> let charset = match normalize_name true charset with | "utf-16be" | "utf-16le" | "utf-16" -> "utf-8" | s -> s in supported charset (function | true -> k (Some charset) | false -> k None) in let process_meta_tag k = read_attributes (fun got_pragma need_pragma charset -> process_attributes got_pragma need_pragma charset (function | None -> k () | v -> finish v)) in let rec close_comment k = next source throw (fun () -> finish None) (function | '-' -> next_n 2 source throw (function | ['-'; '>'] -> k () | l -> push_list source l; close_comment k) | _ -> close_comment k) in let close_tag k = let rec skip () = next source throw (fun () -> finish None) (function | c when is_whitespace c || c = '>' -> push source c; let rec drain_attributes () = get_attribute (function | None -> k () | Some _ -> drain_attributes ()) in drain_attributes () | _ -> skip ()) in skip () in let rec close_tag_like k = next source throw (fun () -> finish None) (function | '>' -> k () | _ -> close_tag_like k) in let rec scan () = next source throw (fun () -> finish None) begin function | '<' -> peek source throw (fun () -> finish None) begin function | '!' -> peek_n 3 source throw (function | ['!'; '-'; '-'] -> close_comment scan | _ -> close_tag_like scan) | '/' -> peek_n 2 source throw (function | ['/'; c] when is_letter c -> close_tag scan | _ -> close_tag_like scan) | '?' -> close_tag_like scan | 'm' -> peek_n 5 source throw (fun l -> match List.map Char.lowercase_ascii l with | ['m'; 'e'; 't'; 'a'; c] when is_whitespace c || c = '/' -> next_n 4 source throw (fun _ -> process_meta_tag scan) | _ -> close_tag scan) | c when is_letter c -> close_tag scan | _ -> scan () end | _ -> scan () end in scan () let read_xml_encoding_declaration bytes (family : Encoding.t) throw k = let bytes, restore = Kstream.checkpoint bytes in let k v = restore (); k v in let tokens = bytes |> family |> Input.preprocess is_valid_xml_char Error.ignore_errors |> Xml_tokenizer.tokenize Error.ignore_errors (fun _ -> None) in let rec prescan () = Kstream.next tokens throw (fun () -> k None) begin function | _, `Xml {Common.encoding} -> k encoding | _, `Comment _ -> prescan () | _, `Chars s when List.for_all is_whitespace_only s -> prescan () | _ -> k None end in prescan () let name_to_encoding_or_utf_8 encoding = match name_to_encoding encoding with | Some e -> e | None -> utf_8 let select_html ?limit bytes throw k = guess_from_bom_html bytes throw (function | Some encoding -> k (name_to_encoding_or_utf_8 encoding) | None -> meta_tag_prescan ?limit bytes throw (function | Some encoding -> k (name_to_encoding_or_utf_8 encoding) | None -> k utf_8)) let select_xml bytes throw k = guess_from_bom_xml bytes throw (function | Some encoding -> k (name_to_encoding_or_utf_8 encoding) | None -> (fun k' -> guess_family_xml bytes throw (function | None -> k' "utf-8" utf_8 | Some family -> k' family (name_to_encoding_or_utf_8 family))) (fun name family -> read_xml_encoding_declaration bytes family throw (function | None -> k (name_to_encoding_or_utf_8 name) | Some encoding -> match name, normalize_name false encoding with | "utf-8", "iso-8859-1" -> k iso_8859_1 | "utf-8", "us-ascii" -> k us_ascii | "utf-8", "windows-1251" -> k windows_1251 | "utf-8", "windows-1252" -> k windows_1252 | _ -> k (name_to_encoding_or_utf_8 name)))) markup.ml-1.0.3/src/detect.mli000066400000000000000000000014571421357706400162030ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common val select_html : ?limit:int -> char Kstream.t -> Encoding.t cps val select_xml : char Kstream.t -> Encoding.t cps (* The following values are exposed for testing. They are not used outside the module. *) val normalize_name : bool -> string -> string val guess_from_bom_html : char Kstream.t -> string option cps val guess_from_bom_xml : char Kstream.t -> string option cps val guess_family_xml : char Kstream.t -> string option cps val meta_tag_prescan : ?supported:(string -> bool cont -> unit) -> ?limit:int -> char Kstream.t -> string option cps val read_xml_encoding_declaration : char Kstream.t -> Encoding.t -> string option cps markup.ml-1.0.3/src/dune000066400000000000000000000003041421357706400150740ustar00rootroot00000000000000(library (name markup) (public_name markup) (synopsis "Error-recovering functional HTML5 and XML parsers") (instrumentation (backend bisect_ppx)) (libraries uutf) (flags (:standard -w -9))) markup.ml-1.0.3/src/encoding.ml000066400000000000000000000324631421357706400163510ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common open Kstream type t = ?report:Error.parse_handler -> char Kstream.t -> int Kstream.t let wrap f = fun ?(report = Error.ignore_errors) s -> f report s let bytes_empty = Bytes.create 0 (* Decoders based on the Uutf library. *) let uutf_decoder encoding name = (fun report bytes -> let decoder = Uutf.decoder ~encoding `Manual in (fun throw empty k -> let rec run () = match Uutf.decode decoder with | `End -> empty () | `Uchar c -> k (Uchar.to_int c) | `Malformed s -> let location = Uutf.decoder_line decoder, Uutf.decoder_col decoder in report location (`Decoding_error (s, name)) throw (fun () -> k u_rep) | `Await -> next bytes throw (fun () -> Uutf.Manual.src decoder bytes_empty 0 0; run ()) (fun c -> Uutf.Manual.src decoder (Bytes.make 1 c) 0 1; run ()) in run ()) |> make) |> wrap let utf_8 : t = uutf_decoder `UTF_8 "utf-8" let utf_16be : t = uutf_decoder `UTF_16BE "utf-16be" let utf_16le : t = uutf_decoder `UTF_16LE "utf-16le" let iso_8859_1 : t = uutf_decoder `ISO_8859_1 "iso-8859-1" let us_ascii : t = uutf_decoder `US_ASCII "us-ascii" (* Chooses UTF-16LE unless the BE BOM is present, as in http://www.w3.org/TR/encoding/ *) let utf_16 : t = (fun report bytes -> let constructor = fun throw k -> peek_n 2 bytes throw (function | ['\xFE'; '\xFF'] -> k (utf_16be ~report bytes) | _ -> k (utf_16le ~report bytes)) in construct constructor) |> wrap let ucs_4_decoder arrange name = (fun report bytes -> let first = ref true in let line = ref 1 in let column = ref 1 in let char k c = column := !column + 1; k c in let newline k c = column := 1; line := !line + 1; k c in (fun throw empty k -> let rec run () = next_n 4 bytes throw begin function | [b1; b2; b3; b4] -> let low, b2', b3', high = arrange (b1, b2, b3, b4) in let low, b2', b3', high = Char.code low, Char.code b2', Char.code b3', Char.code high in if high land 0x80 <> 0 then let s = Printf.sprintf "%c%c%c%c" b1 b2 b3 b4 in report (!line, !column) (`Decoding_error (s, name)) throw (fun () -> char k u_rep) else let scalar = (high lsl 24) lor (b3' lsl 16) lor (b2' lsl 8) lor low in let skip = if !first then begin first := false; scalar = Uchar.to_int Uutf.u_bom end else false in if skip then run () else if scalar = 0x000A then newline k scalar else char k scalar | [] -> empty () | l -> let buffer = Buffer.create 4 in l |> List.iter (Buffer.add_char buffer); report (!line, !column) (`Decoding_error (Buffer.contents buffer, name)) throw (fun () -> char k u_rep) end in run ()) |> make) |> wrap let ucs_4be : t = ucs_4_decoder (fun (b1, b2, b3, b4) -> b4, b3, b2, b1) "ucs-4be" let ucs_4le : t = ucs_4_decoder (fun bs -> bs) "ucs-4le" let ucs_4be_transposed : t = ucs_4_decoder (fun (b1, b2, b3, b4) -> b3, b4, b1, b2) "ucs-4be-transposed" let ucs_4le_transposed : t = ucs_4_decoder (fun (b1, b2, b3, b4) -> b2, b1, b4, b3) "ucs-4le-transposed" let code_page table = if Array.length table < 256 then raise (Invalid_argument "Markup.Encoding.code_page: array does not have 256 entries"); (fun _ bytes -> (fun throw empty k -> next bytes throw empty (fun c -> k table.(Char.code c))) |> make) |> wrap let windows_1251_table = [| (* ASCII *) 0x0000; 0x0001; 0x0002; 0x0003; 0x0004; 0x0005; 0x0006; 0x0007; 0x0008; 0x0009; 0x000A; 0x000B; 0x000C; 0x000D; 0x000E; 0x000F; 0x0010; 0x0011; 0x0012; 0x0013; 0x0014; 0x0015; 0x0016; 0x0017; 0x0018; 0x0019; 0x001A; 0x001B; 0x001C; 0x001D; 0x001E; 0x001F; 0x0020; 0x0021; 0x0022; 0x0023; 0x0024; 0x0025; 0x0026; 0x0027; 0x0028; 0x0029; 0x002A; 0x002B; 0x002C; 0x002D; 0x002E; 0x002F; 0x0030; 0x0031; 0x0032; 0x0033; 0x0034; 0x0035; 0x0036; 0x0037; 0x0038; 0x0039; 0x003A; 0x003B; 0x003C; 0x003D; 0x003E; 0x003F; 0x0040; 0x0041; 0x0042; 0x0043; 0x0044; 0x0045; 0x0046; 0x0047; 0x0048; 0x0049; 0x004A; 0x004B; 0x004C; 0x004D; 0x004E; 0x004F; 0x0050; 0x0051; 0x0052; 0x0053; 0x0054; 0x0055; 0x0056; 0x0057; 0x0058; 0x0059; 0x005A; 0x005B; 0x005C; 0x005D; 0x005E; 0x005F; 0x0060; 0x0061; 0x0062; 0x0063; 0x0064; 0x0065; 0x0066; 0x0067; 0x0068; 0x0069; 0x006A; 0x006B; 0x006C; 0x006D; 0x006E; 0x006F; 0x0070; 0x0071; 0x0072; 0x0073; 0x0074; 0x0075; 0x0076; 0x0077; 0x0078; 0x0079; 0x007A; 0x007B; 0x007C; 0x007D; 0x007E; 0x007F; (* 0x8_ *) 0x0402; 0x0403; 0x201A; 0x0453; 0x201E; 0x2026; 0x2020; 0x2021; 0x20AC; 0x2030; 0x0409; 0x2039; 0x040A; 0x040C; 0x040B; 0x040F; (* 0x9_ *) 0x0452; 0x2018; 0x2019; 0x201C; 0x201D; 0x2022; 0x2013; 0x2014; 0xFFFD; 0x2122; 0x0459; 0x203A; 0x045A; 0x045C; 0x045B; 0x045F; (* 0xA_ *) 0x00A0; 0x040E; 0x045E; 0x0408; 0x00A4; 0x0490; 0x00A6; 0x00A7; 0x0401; 0x00A9; 0x0404; 0x00AB; 0x00AC; 0x00AD; 0x00AE; 0x0407; (* 0xB_ *) 0x00B0; 0x00B1; 0x0406; 0x0456; 0x0491; 0x00B5; 0x00B6; 0x00B7; 0x0451; 0x2116; 0x0454; 0x00BB; 0x0458; 0x0405; 0x0455; 0x0457; (* 0xC_ *) 0x0410; 0x0411; 0x0412; 0x0413; 0x0414; 0x0415; 0x0416; 0x0417; 0x0418; 0x0419; 0x041A; 0x041B; 0x041C; 0x041D; 0x041E; 0x041F; (* 0xD_ *) 0x0410; 0x0421; 0x0422; 0x0423; 0x0424; 0x0425; 0x0426; 0x0427; 0x0428; 0x0429; 0x042A; 0x042B; 0x042C; 0x042D; 0x042E; 0x042F; (* 0xE_ *) 0x0430; 0x0431; 0x0432; 0x0433; 0x0434; 0x0435; 0x0436; 0x0437; 0x0438; 0x0439; 0x043A; 0x043B; 0x043C; 0x043D; 0x043E; 0x043F; (* 0xF_ *) 0x0440; 0x0441; 0x0442; 0x0443; 0x0444; 0x0445; 0x0446; 0x0447; 0x0448; 0x0449; 0x044A; 0x044B; 0x044C; 0x044D; 0x044E; 0x044F |] let windows_1251 : t = code_page windows_1251_table let windows_1252_table = [| (* ASCII *) 0x0000; 0x0001; 0x0002; 0x0003; 0x0004; 0x0005; 0x0006; 0x0007; 0x0008; 0x0009; 0x000A; 0x000B; 0x000C; 0x000D; 0x000E; 0x000F; 0x0010; 0x0011; 0x0012; 0x0013; 0x0014; 0x0015; 0x0016; 0x0017; 0x0018; 0x0019; 0x001A; 0x001B; 0x001C; 0x001D; 0x001E; 0x001F; 0x0020; 0x0021; 0x0022; 0x0023; 0x0024; 0x0025; 0x0026; 0x0027; 0x0028; 0x0029; 0x002A; 0x002B; 0x002C; 0x002D; 0x002E; 0x002F; 0x0030; 0x0031; 0x0032; 0x0033; 0x0034; 0x0035; 0x0036; 0x0037; 0x0038; 0x0039; 0x003A; 0x003B; 0x003C; 0x003D; 0x003E; 0x003F; 0x0040; 0x0041; 0x0042; 0x0043; 0x0044; 0x0045; 0x0046; 0x0047; 0x0048; 0x0049; 0x004A; 0x004B; 0x004C; 0x004D; 0x004E; 0x004F; 0x0050; 0x0051; 0x0052; 0x0053; 0x0054; 0x0055; 0x0056; 0x0057; 0x0058; 0x0059; 0x005A; 0x005B; 0x005C; 0x005D; 0x005E; 0x005F; 0x0060; 0x0061; 0x0062; 0x0063; 0x0064; 0x0065; 0x0066; 0x0067; 0x0068; 0x0069; 0x006A; 0x006B; 0x006C; 0x006D; 0x006E; 0x006F; 0x0070; 0x0071; 0x0072; 0x0073; 0x0074; 0x0075; 0x0076; 0x0077; 0x0078; 0x0079; 0x007A; 0x007B; 0x007C; 0x007D; 0x007E; 0x007F; (* 0x8_ *) 0x20AC; 0x0081; 0x201A; 0x0192; 0x201E; 0x2026; 0x2020; 0x2021; 0x02C6; 0x2030; 0x0160; 0x2039; 0x0152; 0x008D; 0x017D; 0x008F; (* 0x9_ *) 0x0090; 0x2018; 0x2019; 0x201C; 0x201D; 0x2022; 0x2013; 0x2014; 0x02DC; 0x2122; 0x0161; 0x203A; 0x0153; 0x009D; 0x017E; 0x0178; (* ISO-8859-1 *) 0x00A0; 0x00A1; 0x00A2; 0x00A3; 0x00A4; 0x00A5; 0x00A6; 0x00A7; 0x00A8; 0x00A9; 0x00AA; 0x00AB; 0x00AC; 0x00AD; 0x00AE; 0x00AF; 0x00B0; 0x00B1; 0x00B2; 0x00B3; 0x00B4; 0x00B5; 0x00B6; 0x00B7; 0x00B8; 0x00B9; 0x00BA; 0x00BB; 0x00BC; 0x00BD; 0x00BE; 0x00BF; 0x00C0; 0x00C1; 0x00C2; 0x00C3; 0x00C4; 0x00C5; 0x00C6; 0x00C7; 0x00C8; 0x00C9; 0x00CA; 0x00CB; 0x00CC; 0x00CD; 0x00CE; 0x00CF; 0x00D0; 0x00D1; 0x00D2; 0x00D3; 0x00D4; 0x00D5; 0x00D6; 0x00D7; 0x00D8; 0x00D9; 0x00DA; 0x00DB; 0x00DC; 0x00DD; 0x00DE; 0x00DF; 0x00E0; 0x00E1; 0x00E2; 0x00E3; 0x00E4; 0x00E5; 0x00E6; 0x00E7; 0x00E8; 0x00E9; 0x00EA; 0x00EB; 0x00EC; 0x00ED; 0x00EE; 0x00EF; 0x00F0; 0x00F1; 0x00F2; 0x00F3; 0x00F4; 0x00F5; 0x00F6; 0x00F7; 0x00F8; 0x00F9; 0x00FA; 0x00FB; 0x00FC; 0x00FD; 0x00FE; 0x00FF |] let windows_1252 : t = code_page windows_1252_table let ebcdic_37_table = [| (* 0x0_ *) 0x0000; 0x0001; 0x0002; 0x0003; 0x009C; 0x0009; 0x0086; 0x007F; 0x0097; 0x008D; 0x008E; 0x000B; 0x000C; 0x000D; 0x000E; 0x000F; (* 0x1_ *) 0x0010; 0x0011; 0x0012; 0x0013; 0x009D; 0x0085; 0x0008; 0x0087; 0x0018; 0x0019; 0x0092; 0x008F; 0x001C; 0x001D; 0x001E; 0x001F; (* 0x2_ *) 0x0080; 0x0081; 0x0082; 0x0083; 0x0084; 0x000A; 0x0017; 0x001B; 0x0088; 0x0089; 0x008A; 0x008B; 0x008C; 0x0005; 0x0006; 0x0007; (* 0x3_ *) 0x0090; 0x0091; 0x0016; 0x0093; 0x0094; 0x0095; 0x0096; 0x0004; 0x0098; 0x0099; 0x009A; 0x009B; 0x0014; 0x0015; 0x009E; 0x001A; (* 0x4_ *) 0x0020; 0x00A0; 0x00E2; 0x00E4; 0x00E0; 0x00E1; 0x00E3; 0x00E5; 0x00E7; 0x00F1; 0x00A2; 0x002E; 0x003C; 0x0028; 0x002B; 0x007C; (* 0x5_ *) 0x0026; 0x00E9; 0x00EA; 0x00EB; 0x00E8; 0x00ED; 0x00EE; 0x00EF; 0x00EC; 0x00DF; 0x0021; 0x0024; 0x002A; 0x0029; 0x003B; 0x00AC; (* 0x6_ *) 0x002D; 0x002F; 0x00C2; 0x00C4; 0x00C0; 0x00C1; 0x00C3; 0x00C5; 0x00C7; 0x00D1; 0x00A6; 0x002C; 0x0025; 0x005F; 0x003E; 0x003F; (* 0x7_ *) 0x00F8; 0x00C9; 0x00CA; 0x00CB; 0x00C8; 0x00CD; 0x00CE; 0x00CF; 0x00CC; 0x0060; 0x003A; 0x0023; 0x0040; 0x0027; 0x003D; 0x0022; (* 0x8_ *) 0x00D8; 0x0061; 0x0062; 0x0063; 0x0064; 0x0065; 0x0066; 0x0067; 0x0068; 0x0069; 0x00AB; 0x00BB; 0x00F0; 0x00FD; 0x00FE; 0x00B1; (* 0x9_ *) 0x00B0; 0x006A; 0x006B; 0x006C; 0x006D; 0x006E; 0x006F; 0x0070; 0x0071; 0x0072; 0x00AA; 0x00BA; 0x00E6; 0x00B8; 0x00C6; 0x00A4; (* 0xA_ *) 0x00B5; 0x007E; 0x0073; 0x0074; 0x0075; 0x0076; 0x0077; 0x0078; 0x0079; 0x007A; 0x00A1; 0x00BF; 0x00D0; 0x00DD; 0x00DE; 0x00AE; (* 0xB_ *) 0x005E; 0x00A3; 0x00A5; 0x00B7; 0x00A9; 0x00A7; 0x00B6; 0x00BC; 0x00BD; 0x00BE; 0x005B; 0x005D; 0x00AF; 0x00A8; 0x00B4; 0x00D7; (* 0xC_ *) 0x007B; 0x0041; 0x0042; 0x0043; 0x0044; 0x0045; 0x0046; 0x0047; 0x0048; 0x0049; 0x00AD; 0x00F4; 0x00F6; 0x00F2; 0x00F3; 0x00F5; (* 0xD_ *) 0x007D; 0x004A; 0x004B; 0x004C; 0x004D; 0x004E; 0x004F; 0x0050; 0x0051; 0x0052; 0x00B9; 0x00FB; 0x00FC; 0x00F9; 0x00FA; 0x00FF; (* 0xE_ *) 0x005C; 0x00F7; 0x0053; 0x0054; 0x0055; 0x0056; 0x0057; 0x0058; 0x0059; 0x005A; 0x00B2; 0x00D4; 0x00D6; 0x00D2; 0x00D3; 0x00D5; (* 0xF_ *) 0x0030; 0x0031; 0x0032; 0x0033; 0x0034; 0x0035; 0x0036; 0x0037; 0x0038; 0x0039; 0x00B3; 0x00DB; 0x00DC; 0x00D9; 0x00DA; 0x009F |] let ebcdic : t = code_page ebcdic_37_table let iso_8859_15_table = [| (* ASCII *) 0x0000; 0x0001; 0x0002; 0x0003; 0x0004; 0x0005; 0x0006; 0x0007; 0x0008; 0x0009; 0x000A; 0x000B; 0x000C; 0x000D; 0x000E; 0x000F; 0x0010; 0x0011; 0x0012; 0x0013; 0x0014; 0x0015; 0x0016; 0x0017; 0x0018; 0x0019; 0x001A; 0x001B; 0x001C; 0x001D; 0x001E; 0x001F; 0x0020; 0x0021; 0x0022; 0x0023; 0x0024; 0x0025; 0x0026; 0x0027; 0x0028; 0x0029; 0x002A; 0x002B; 0x002C; 0x002D; 0x002E; 0x002F; 0x0030; 0x0031; 0x0032; 0x0033; 0x0034; 0x0035; 0x0036; 0x0037; 0x0038; 0x0039; 0x003A; 0x003B; 0x003C; 0x003D; 0x003E; 0x003F; 0x0040; 0x0041; 0x0042; 0x0043; 0x0044; 0x0045; 0x0046; 0x0047; 0x0048; 0x0049; 0x004A; 0x004B; 0x004C; 0x004D; 0x004E; 0x004F; 0x0050; 0x0051; 0x0052; 0x0053; 0x0054; 0x0055; 0x0056; 0x0057; 0x0058; 0x0059; 0x005A; 0x005B; 0x005C; 0x005D; 0x005E; 0x005F; 0x0060; 0x0061; 0x0062; 0x0063; 0x0064; 0x0065; 0x0066; 0x0067; 0x0068; 0x0069; 0x006A; 0x006B; 0x006C; 0x006D; 0x006E; 0x006F; 0x0070; 0x0071; 0x0072; 0x0073; 0x0074; 0x0075; 0x0076; 0x0077; 0x0078; 0x0079; 0x007A; 0x007B; 0x007C; 0x007D; 0x007E; 0x007F; (* ISO-8859-1 *) 0x0080; 0x0081; 0x0082; 0x0083; 0x0084; 0x0085; 0x0086; 0x0087; 0x0088; 0x0089; 0x008A; 0x008B; 0x008C; 0x008D; 0x008E; 0x008F; 0x0090; 0x0091; 0x0092; 0x0093; 0x0094; 0x0095; 0x0096; 0x0097; 0x0098; 0x0099; 0x009A; 0x009B; 0x009C; 0x009D; 0x009E; 0x009F; (* 0xA_ *) 0x00A0; 0x00A1; 0x00A2; 0x00A3; 0x20AC; 0x00A5; 0x0160; 0x00A7; 0x0161; 0x00A9; 0x00AA; 0x00AB; 0x00AC; 0x00AD; 0x00AE; 0x00AF; (* 0xB_ *) 0x00B0; 0x00B1; 0x00B2; 0x00B3; 0x017D; 0x00B5; 0x00B6; 0x00B7; 0x017E; 0x00B9; 0x00BA; 0x00BB; 0x0152; 0x0153; 0x0178; 0x00BF; (* ISO-8859-1 *) 0x00C0; 0x00C1; 0x00C2; 0x00C3; 0x00C4; 0x00C5; 0x00C6; 0x00C7; 0x00C8; 0x00C9; 0x00CA; 0x00CB; 0x00CC; 0x00CD; 0x00CE; 0x00CF; 0x00D0; 0x00D1; 0x00D2; 0x00D3; 0x00D4; 0x00D5; 0x00D6; 0x00D7; 0x00D8; 0x00D9; 0x00DA; 0x00DB; 0x00DC; 0x00DD; 0x00DE; 0x00DF; 0x00E0; 0x00E1; 0x00E2; 0x00E3; 0x00E4; 0x00E5; 0x00E6; 0x00E7; 0x00E8; 0x00E9; 0x00EA; 0x00EB; 0x00EC; 0x00ED; 0x00EE; 0x00EF; 0x00F0; 0x00F1; 0x00F2; 0x00F3; 0x00F4; 0x00F5; 0x00F6; 0x00F7; 0x00F8; 0x00F9; 0x00FA; 0x00FB; 0x00FC; 0x00FD; 0x00FE; 0x00FF |] let iso_8859_15: t = code_page iso_8859_15_table markup.ml-1.0.3/src/entities.json000066400000000000000000004352761421357706400167610ustar00rootroot00000000000000/* Copyright © 2014 W3C® (MIT, ERCIM, Keio, Beihang). This software or document includes material copied from or derived from W3C Recommendation HTML5 [https://www.w3.org/TR/2014/REC-html5-20141028/]. */ { "Á": { "codepoints": [193], "characters": "\u00C1" }, "Á": { "codepoints": [193], "characters": "\u00C1" }, "á": { "codepoints": [225], "characters": "\u00E1" }, "á": { "codepoints": [225], "characters": "\u00E1" }, "Ă": { "codepoints": [258], "characters": "\u0102" }, "ă": { "codepoints": [259], "characters": "\u0103" }, "∾": { "codepoints": [8766], "characters": "\u223E" }, "∿": { "codepoints": [8767], "characters": "\u223F" }, "∾̳": { "codepoints": [8766, 819], "characters": "\u223E\u0333" }, "Â": { "codepoints": [194], "characters": "\u00C2" }, "Â": { "codepoints": [194], "characters": "\u00C2" }, "â": { "codepoints": [226], "characters": "\u00E2" }, "â": { "codepoints": [226], "characters": "\u00E2" }, "´": { "codepoints": [180], "characters": "\u00B4" }, "´": { "codepoints": [180], "characters": "\u00B4" }, "А": { "codepoints": [1040], "characters": "\u0410" }, "а": { "codepoints": [1072], "characters": "\u0430" }, "Æ": { "codepoints": [198], "characters": "\u00C6" }, "Æ": { "codepoints": [198], "characters": "\u00C6" }, "æ": { "codepoints": [230], "characters": "\u00E6" }, "æ": { "codepoints": [230], "characters": "\u00E6" }, "⁡": { "codepoints": [8289], "characters": "\u2061" }, "𝔄": { "codepoints": [120068], "characters": "\uD835\uDD04" }, "𝔞": { "codepoints": [120094], "characters": "\uD835\uDD1E" }, "À": { "codepoints": [192], "characters": "\u00C0" }, "À": { "codepoints": [192], "characters": "\u00C0" }, "à": { "codepoints": [224], "characters": "\u00E0" }, "à": { "codepoints": [224], "characters": "\u00E0" }, "ℵ": { "codepoints": [8501], "characters": "\u2135" }, "ℵ": { "codepoints": [8501], "characters": "\u2135" }, "Α": { "codepoints": [913], "characters": "\u0391" }, "α": { "codepoints": [945], "characters": "\u03B1" }, "Ā": { "codepoints": [256], "characters": "\u0100" }, "ā": { "codepoints": [257], "characters": "\u0101" }, "⨿": { "codepoints": [10815], "characters": "\u2A3F" }, "&": { "codepoints": [38], "characters": "\u0026" }, "&": { "codepoints": [38], "characters": "\u0026" }, "&": { "codepoints": [38], "characters": "\u0026" }, "&": { "codepoints": [38], "characters": "\u0026" }, "⩓": { "codepoints": [10835], "characters": "\u2A53" }, "∧": { "codepoints": [8743], "characters": "\u2227" }, "⩕": { "codepoints": [10837], "characters": "\u2A55" }, "⩜": { "codepoints": [10844], "characters": "\u2A5C" }, "⩘": { "codepoints": [10840], "characters": "\u2A58" }, "⩚": { "codepoints": [10842], "characters": "\u2A5A" }, "∠": { "codepoints": [8736], "characters": "\u2220" }, "⦤": { "codepoints": [10660], "characters": "\u29A4" }, "∠": { "codepoints": [8736], "characters": "\u2220" }, "∡": { "codepoints": [8737], "characters": "\u2221" }, "⦨": { "codepoints": [10664], "characters": "\u29A8" }, "⦩": { "codepoints": [10665], "characters": "\u29A9" }, "⦪": { "codepoints": [10666], "characters": "\u29AA" }, "⦫": { "codepoints": [10667], "characters": "\u29AB" }, "⦬": { "codepoints": [10668], "characters": "\u29AC" }, "⦭": { "codepoints": [10669], "characters": "\u29AD" }, "⦮": { "codepoints": [10670], "characters": "\u29AE" }, "⦯": { "codepoints": [10671], "characters": "\u29AF" }, "∟": { "codepoints": [8735], "characters": "\u221F" }, "⊾": { "codepoints": [8894], "characters": "\u22BE" }, "⦝": { "codepoints": [10653], "characters": "\u299D" }, "∢": { "codepoints": [8738], "characters": "\u2222" }, "Å": { "codepoints": [197], "characters": "\u00C5" }, "⍼": { "codepoints": [9084], "characters": "\u237C" }, "Ą": { "codepoints": [260], "characters": "\u0104" }, "ą": { "codepoints": [261], "characters": "\u0105" }, "𝔸": { "codepoints": [120120], "characters": "\uD835\uDD38" }, "𝕒": { "codepoints": [120146], "characters": "\uD835\uDD52" }, "≈": { "codepoints": [8776], "characters": "\u2248" }, "⩯": { "codepoints": [10863], "characters": "\u2A6F" }, "⩰": { "codepoints": [10864], "characters": "\u2A70" }, "≊": { "codepoints": [8778], "characters": "\u224A" }, "≋": { "codepoints": [8779], "characters": "\u224B" }, "'": { "codepoints": [39], "characters": "\u0027" }, "⁡": { "codepoints": [8289], "characters": "\u2061" }, "≈": { "codepoints": [8776], "characters": "\u2248" }, "≊": { "codepoints": [8778], "characters": "\u224A" }, "Å": { "codepoints": [197], "characters": "\u00C5" }, "Å": { "codepoints": [197], "characters": "\u00C5" }, "å": { "codepoints": [229], "characters": "\u00E5" }, "å": { "codepoints": [229], "characters": "\u00E5" }, "𝒜": { "codepoints": [119964], "characters": "\uD835\uDC9C" }, "𝒶": { "codepoints": [119990], "characters": "\uD835\uDCB6" }, "≔": { "codepoints": [8788], "characters": "\u2254" }, "*": { "codepoints": [42], "characters": "\u002A" }, "≈": { "codepoints": [8776], "characters": "\u2248" }, "≍": { "codepoints": [8781], "characters": "\u224D" }, "Ã": { "codepoints": [195], "characters": "\u00C3" }, "Ã": { "codepoints": [195], "characters": "\u00C3" }, "ã": { "codepoints": [227], "characters": "\u00E3" }, "ã": { "codepoints": [227], "characters": "\u00E3" }, "Ä": { "codepoints": [196], "characters": "\u00C4" }, "Ä": { "codepoints": [196], "characters": "\u00C4" }, "ä": { "codepoints": [228], "characters": "\u00E4" }, "ä": { "codepoints": [228], "characters": "\u00E4" }, "∳": { "codepoints": [8755], "characters": "\u2233" }, "⨑": { "codepoints": [10769], "characters": "\u2A11" }, "≌": { "codepoints": [8780], "characters": "\u224C" }, "϶": { "codepoints": [1014], "characters": "\u03F6" }, "‵": { "codepoints": [8245], "characters": "\u2035" }, "∽": { "codepoints": [8765], "characters": "\u223D" }, "⋍": { "codepoints": [8909], "characters": "\u22CD" }, "∖": { "codepoints": [8726], "characters": "\u2216" }, "⫧": { "codepoints": [10983], "characters": "\u2AE7" }, "⊽": { "codepoints": [8893], "characters": "\u22BD" }, "⌆": { "codepoints": [8966], "characters": "\u2306" }, "⌅": { "codepoints": [8965], "characters": "\u2305" }, "⌅": { "codepoints": [8965], "characters": "\u2305" }, "⎵": { "codepoints": [9141], "characters": "\u23B5" }, "⎶": { "codepoints": [9142], "characters": "\u23B6" }, "≌": { "codepoints": [8780], "characters": "\u224C" }, "Б": { "codepoints": [1041], "characters": "\u0411" }, "б": { "codepoints": [1073], "characters": "\u0431" }, "„": { "codepoints": [8222], "characters": "\u201E" }, "∵": { "codepoints": [8757], "characters": "\u2235" }, "∵": { "codepoints": [8757], "characters": "\u2235" }, "∵": { "codepoints": [8757], "characters": "\u2235" }, "⦰": { "codepoints": [10672], "characters": "\u29B0" }, "϶": { "codepoints": [1014], "characters": "\u03F6" }, "ℬ": { "codepoints": [8492], "characters": "\u212C" }, "ℬ": { "codepoints": [8492], "characters": "\u212C" }, "Β": { "codepoints": [914], "characters": "\u0392" }, "β": { "codepoints": [946], "characters": "\u03B2" }, "ℶ": { "codepoints": [8502], "characters": "\u2136" }, "≬": { "codepoints": [8812], "characters": "\u226C" }, "𝔅": { "codepoints": [120069], "characters": "\uD835\uDD05" }, "𝔟": { "codepoints": [120095], "characters": "\uD835\uDD1F" }, "⋂": { "codepoints": [8898], "characters": "\u22C2" }, "◯": { "codepoints": [9711], "characters": "\u25EF" }, "⋃": { "codepoints": [8899], "characters": "\u22C3" }, "⨀": { "codepoints": [10752], "characters": "\u2A00" }, "⨁": { "codepoints": [10753], "characters": "\u2A01" }, "⨂": { "codepoints": [10754], "characters": "\u2A02" }, "⨆": { "codepoints": [10758], "characters": "\u2A06" }, "★": { "codepoints": [9733], "characters": "\u2605" }, "▽": { "codepoints": [9661], "characters": "\u25BD" }, "△": { "codepoints": [9651], "characters": "\u25B3" }, "⨄": { "codepoints": [10756], "characters": "\u2A04" }, "⋁": { "codepoints": [8897], "characters": "\u22C1" }, "⋀": { "codepoints": [8896], "characters": "\u22C0" }, "⤍": { "codepoints": [10509], "characters": "\u290D" }, "⧫": { "codepoints": [10731], "characters": "\u29EB" }, "▪": { "codepoints": [9642], "characters": "\u25AA" }, "▴": { "codepoints": [9652], "characters": "\u25B4" }, "▾": { "codepoints": [9662], "characters": "\u25BE" }, "◂": { "codepoints": [9666], "characters": "\u25C2" }, "▸": { "codepoints": [9656], "characters": "\u25B8" }, "␣": { "codepoints": [9251], "characters": "\u2423" }, "▒": { "codepoints": [9618], "characters": "\u2592" }, "░": { "codepoints": [9617], "characters": "\u2591" }, "▓": { "codepoints": [9619], "characters": "\u2593" }, "█": { "codepoints": [9608], "characters": "\u2588" }, "=⃥": { "codepoints": [61, 8421], "characters": "\u003D\u20E5" }, "≡⃥": { "codepoints": [8801, 8421], "characters": "\u2261\u20E5" }, "⫭": { "codepoints": [10989], "characters": "\u2AED" }, "⌐": { "codepoints": [8976], "characters": "\u2310" }, "𝔹": { "codepoints": [120121], "characters": "\uD835\uDD39" }, "𝕓": { "codepoints": [120147], "characters": "\uD835\uDD53" }, "⊥": { "codepoints": [8869], "characters": "\u22A5" }, "⊥": { "codepoints": [8869], "characters": "\u22A5" }, "⋈": { "codepoints": [8904], "characters": "\u22C8" }, "⧉": { "codepoints": [10697], "characters": "\u29C9" }, "╗": { "codepoints": [9559], "characters": "\u2557" }, "╖": { "codepoints": [9558], "characters": "\u2556" }, "╕": { "codepoints": [9557], "characters": "\u2555" }, "┐": { "codepoints": [9488], "characters": "\u2510" }, "╔": { "codepoints": [9556], "characters": "\u2554" }, "╓": { "codepoints": [9555], "characters": "\u2553" }, "╒": { "codepoints": [9554], "characters": "\u2552" }, "┌": { "codepoints": [9484], "characters": "\u250C" }, "═": { "codepoints": [9552], "characters": "\u2550" }, "─": { "codepoints": [9472], "characters": "\u2500" }, "╦": { "codepoints": [9574], "characters": "\u2566" }, "╤": { "codepoints": [9572], "characters": "\u2564" }, "╥": { "codepoints": [9573], "characters": "\u2565" }, "┬": { "codepoints": [9516], "characters": "\u252C" }, "╩": { "codepoints": [9577], "characters": "\u2569" }, "╧": { "codepoints": [9575], "characters": "\u2567" }, "╨": { "codepoints": [9576], "characters": "\u2568" }, "┴": { "codepoints": [9524], "characters": "\u2534" }, "⊟": { "codepoints": [8863], "characters": "\u229F" }, "⊞": { "codepoints": [8862], "characters": "\u229E" }, "⊠": { "codepoints": [8864], "characters": "\u22A0" }, "╝": { "codepoints": [9565], "characters": "\u255D" }, "╜": { "codepoints": [9564], "characters": "\u255C" }, "╛": { "codepoints": [9563], "characters": "\u255B" }, "┘": { "codepoints": [9496], "characters": "\u2518" }, "╚": { "codepoints": [9562], "characters": "\u255A" }, "╙": { "codepoints": [9561], "characters": "\u2559" }, "╘": { "codepoints": [9560], "characters": "\u2558" }, "└": { "codepoints": [9492], "characters": "\u2514" }, "║": { "codepoints": [9553], "characters": "\u2551" }, "│": { "codepoints": [9474], "characters": "\u2502" }, "╬": { "codepoints": [9580], "characters": "\u256C" }, "╫": { "codepoints": [9579], "characters": "\u256B" }, "╪": { "codepoints": [9578], "characters": "\u256A" }, "┼": { "codepoints": [9532], "characters": "\u253C" }, "╣": { "codepoints": [9571], "characters": "\u2563" }, "╢": { "codepoints": [9570], "characters": "\u2562" }, "╡": { "codepoints": [9569], "characters": "\u2561" }, "┤": { "codepoints": [9508], "characters": "\u2524" }, "╠": { "codepoints": [9568], "characters": "\u2560" }, "╟": { "codepoints": [9567], "characters": "\u255F" }, "╞": { "codepoints": [9566], "characters": "\u255E" }, "├": { "codepoints": [9500], "characters": "\u251C" }, "‵": { "codepoints": [8245], "characters": "\u2035" }, "˘": { "codepoints": [728], "characters": "\u02D8" }, "˘": { "codepoints": [728], "characters": "\u02D8" }, "¦": { "codepoints": [166], "characters": "\u00A6" }, "¦": { "codepoints": [166], "characters": "\u00A6" }, "ℬ": { "codepoints": [8492], "characters": "\u212C" }, "𝒷": { "codepoints": [119991], "characters": "\uD835\uDCB7" }, "⁏": { "codepoints": [8271], "characters": "\u204F" }, "∽": { "codepoints": [8765], "characters": "\u223D" }, "⋍": { "codepoints": [8909], "characters": "\u22CD" }, "\": { "codepoints": [92], "characters": "\u005C" }, "⧅": { "codepoints": [10693], "characters": "\u29C5" }, "⟈": { "codepoints": [10184], "characters": "\u27C8" }, "•": { "codepoints": [8226], "characters": "\u2022" }, "•": { "codepoints": [8226], "characters": "\u2022" }, "≎": { "codepoints": [8782], "characters": "\u224E" }, "⪮": { "codepoints": [10926], "characters": "\u2AAE" }, "≏": { "codepoints": [8783], "characters": "\u224F" }, "≎": { "codepoints": [8782], "characters": "\u224E" }, "≏": { "codepoints": [8783], "characters": "\u224F" }, "Ć": { "codepoints": [262], "characters": "\u0106" }, "ć": { "codepoints": [263], "characters": "\u0107" }, "⋒": { "codepoints": [8914], "characters": "\u22D2" }, "∩": { "codepoints": [8745], "characters": "\u2229" }, "⩄": { "codepoints": [10820], "characters": "\u2A44" }, "⩉": { "codepoints": [10825], "characters": "\u2A49" }, "⩋": { "codepoints": [10827], "characters": "\u2A4B" }, "⩇": { "codepoints": [10823], "characters": "\u2A47" }, "⩀": { "codepoints": [10816], "characters": "\u2A40" }, "ⅅ": { "codepoints": [8517], "characters": "\u2145" }, "∩︀": { "codepoints": [8745, 65024], "characters": "\u2229\uFE00" }, "⁁": { "codepoints": [8257], "characters": "\u2041" }, "ˇ": { "codepoints": [711], "characters": "\u02C7" }, "ℭ": { "codepoints": [8493], "characters": "\u212D" }, "⩍": { "codepoints": [10829], "characters": "\u2A4D" }, "Č": { "codepoints": [268], "characters": "\u010C" }, "č": { "codepoints": [269], "characters": "\u010D" }, "Ç": { "codepoints": [199], "characters": "\u00C7" }, "Ç": { "codepoints": [199], "characters": "\u00C7" }, "ç": { "codepoints": [231], "characters": "\u00E7" }, "ç": { "codepoints": [231], "characters": "\u00E7" }, "Ĉ": { "codepoints": [264], "characters": "\u0108" }, "ĉ": { "codepoints": [265], "characters": "\u0109" }, "∰": { "codepoints": [8752], "characters": "\u2230" }, "⩌": { "codepoints": [10828], "characters": "\u2A4C" }, "⩐": { "codepoints": [10832], "characters": "\u2A50" }, "Ċ": { "codepoints": [266], "characters": "\u010A" }, "ċ": { "codepoints": [267], "characters": "\u010B" }, "¸": { "codepoints": [184], "characters": "\u00B8" }, "¸": { "codepoints": [184], "characters": "\u00B8" }, "¸": { "codepoints": [184], "characters": "\u00B8" }, "⦲": { "codepoints": [10674], "characters": "\u29B2" }, "¢": { "codepoints": [162], "characters": "\u00A2" }, "¢": { "codepoints": [162], "characters": "\u00A2" }, "·": { "codepoints": [183], "characters": "\u00B7" }, "·": { "codepoints": [183], "characters": "\u00B7" }, "ℭ": { "codepoints": [8493], "characters": "\u212D" }, "𝔠": { "codepoints": [120096], "characters": "\uD835\uDD20" }, "Ч": { "codepoints": [1063], "characters": "\u0427" }, "ч": { "codepoints": [1095], "characters": "\u0447" }, "✓": { "codepoints": [10003], "characters": "\u2713" }, "✓": { "codepoints": [10003], "characters": "\u2713" }, "Χ": { "codepoints": [935], "characters": "\u03A7" }, "χ": { "codepoints": [967], "characters": "\u03C7" }, "○": { "codepoints": [9675], "characters": "\u25CB" }, "ˆ": { "codepoints": [710], "characters": "\u02C6" }, "≗": { "codepoints": [8791], "characters": "\u2257" }, "↺": { "codepoints": [8634], "characters": "\u21BA" }, "↻": { "codepoints": [8635], "characters": "\u21BB" }, "⊛": { "codepoints": [8859], "characters": "\u229B" }, "⊚": { "codepoints": [8858], "characters": "\u229A" }, "⊝": { "codepoints": [8861], "characters": "\u229D" }, "⊙": { "codepoints": [8857], "characters": "\u2299" }, "®": { "codepoints": [174], "characters": "\u00AE" }, "Ⓢ": { "codepoints": [9416], "characters": "\u24C8" }, "⊖": { "codepoints": [8854], "characters": "\u2296" }, "⊕": { "codepoints": [8853], "characters": "\u2295" }, "⊗": { "codepoints": [8855], "characters": "\u2297" }, "⧃": { "codepoints": [10691], "characters": "\u29C3" }, "≗": { "codepoints": [8791], "characters": "\u2257" }, "⨐": { "codepoints": [10768], "characters": "\u2A10" }, "⫯": { "codepoints": [10991], "characters": "\u2AEF" }, "⧂": { "codepoints": [10690], "characters": "\u29C2" }, "∲": { "codepoints": [8754], "characters": "\u2232" }, "”": { "codepoints": [8221], "characters": "\u201D" }, "’": { "codepoints": [8217], "characters": "\u2019" }, "♣": { "codepoints": [9827], "characters": "\u2663" }, "♣": { "codepoints": [9827], "characters": "\u2663" }, "∷": { "codepoints": [8759], "characters": "\u2237" }, ":": { "codepoints": [58], "characters": "\u003A" }, "⩴": { "codepoints": [10868], "characters": "\u2A74" }, "≔": { "codepoints": [8788], "characters": "\u2254" }, "≔": { "codepoints": [8788], "characters": "\u2254" }, ",": { "codepoints": [44], "characters": "\u002C" }, "@": { "codepoints": [64], "characters": "\u0040" }, "∁": { "codepoints": [8705], "characters": "\u2201" }, "∘": { "codepoints": [8728], "characters": "\u2218" }, "∁": { "codepoints": [8705], "characters": "\u2201" }, "ℂ": { "codepoints": [8450], "characters": "\u2102" }, "≅": { "codepoints": [8773], "characters": "\u2245" }, "⩭": { "codepoints": [10861], "characters": "\u2A6D" }, "≡": { "codepoints": [8801], "characters": "\u2261" }, "∯": { "codepoints": [8751], "characters": "\u222F" }, "∮": { "codepoints": [8750], "characters": "\u222E" }, "∮": { "codepoints": [8750], "characters": "\u222E" }, "ℂ": { "codepoints": [8450], "characters": "\u2102" }, "𝕔": { "codepoints": [120148], "characters": "\uD835\uDD54" }, "∐": { "codepoints": [8720], "characters": "\u2210" }, "∐": { "codepoints": [8720], "characters": "\u2210" }, "©": { "codepoints": [169], "characters": "\u00A9" }, "©": { "codepoints": [169], "characters": "\u00A9" }, "©": { "codepoints": [169], "characters": "\u00A9" }, "©": { "codepoints": [169], "characters": "\u00A9" }, "℗": { "codepoints": [8471], "characters": "\u2117" }, "∳": { "codepoints": [8755], "characters": "\u2233" }, "↵": { "codepoints": [8629], "characters": "\u21B5" }, "⨯": { "codepoints": [10799], "characters": "\u2A2F" }, "✗": { "codepoints": [10007], "characters": "\u2717" }, "𝒞": { "codepoints": [119966], "characters": "\uD835\uDC9E" }, "𝒸": { "codepoints": [119992], "characters": "\uD835\uDCB8" }, "⫏": { "codepoints": [10959], "characters": "\u2ACF" }, "⫑": { "codepoints": [10961], "characters": "\u2AD1" }, "⫐": { "codepoints": [10960], "characters": "\u2AD0" }, "⫒": { "codepoints": [10962], "characters": "\u2AD2" }, "⋯": { "codepoints": [8943], "characters": "\u22EF" }, "⤸": { "codepoints": [10552], "characters": "\u2938" }, "⤵": { "codepoints": [10549], "characters": "\u2935" }, "⋞": { "codepoints": [8926], "characters": "\u22DE" }, "⋟": { "codepoints": [8927], "characters": "\u22DF" }, "↶": { "codepoints": [8630], "characters": "\u21B6" }, "⤽": { "codepoints": [10557], "characters": "\u293D" }, "⋓": { "codepoints": [8915], "characters": "\u22D3" }, "∪": { "codepoints": [8746], "characters": "\u222A" }, "⩈": { "codepoints": [10824], "characters": "\u2A48" }, "≍": { "codepoints": [8781], "characters": "\u224D" }, "⩆": { "codepoints": [10822], "characters": "\u2A46" }, "⩊": { "codepoints": [10826], "characters": "\u2A4A" }, "⊍": { "codepoints": [8845], "characters": "\u228D" }, "⩅": { "codepoints": [10821], "characters": "\u2A45" }, "∪︀": { "codepoints": [8746, 65024], "characters": "\u222A\uFE00" }, "↷": { "codepoints": [8631], "characters": "\u21B7" }, "⤼": { "codepoints": [10556], "characters": "\u293C" }, "⋞": { "codepoints": [8926], "characters": "\u22DE" }, "⋟": { "codepoints": [8927], "characters": "\u22DF" }, "⋎": { "codepoints": [8910], "characters": "\u22CE" }, "⋏": { "codepoints": [8911], "characters": "\u22CF" }, "¤": { "codepoints": [164], "characters": "\u00A4" }, "¤": { "codepoints": [164], "characters": "\u00A4" }, "↶": { "codepoints": [8630], "characters": "\u21B6" }, "↷": { "codepoints": [8631], "characters": "\u21B7" }, "⋎": { "codepoints": [8910], "characters": "\u22CE" }, "⋏": { "codepoints": [8911], "characters": "\u22CF" }, "∲": { "codepoints": [8754], "characters": "\u2232" }, "∱": { "codepoints": [8753], "characters": "\u2231" }, "⌭": { "codepoints": [9005], "characters": "\u232D" }, "‡": { "codepoints": [8225], "characters": "\u2021" }, "†": { "codepoints": [8224], "characters": "\u2020" }, "ℸ": { "codepoints": [8504], "characters": "\u2138" }, "↡": { "codepoints": [8609], "characters": "\u21A1" }, "⇓": { "codepoints": [8659], "characters": "\u21D3" }, "↓": { "codepoints": [8595], "characters": "\u2193" }, "‐": { "codepoints": [8208], "characters": "\u2010" }, "⫤": { "codepoints": [10980], "characters": "\u2AE4" }, "⊣": { "codepoints": [8867], "characters": "\u22A3" }, "⤏": { "codepoints": [10511], "characters": "\u290F" }, "˝": { "codepoints": [733], "characters": "\u02DD" }, "Ď": { "codepoints": [270], "characters": "\u010E" }, "ď": { "codepoints": [271], "characters": "\u010F" }, "Д": { "codepoints": [1044], "characters": "\u0414" }, "д": { "codepoints": [1076], "characters": "\u0434" }, "ⅅ": { "codepoints": [8517], "characters": "\u2145" }, "ⅆ": { "codepoints": [8518], "characters": "\u2146" }, "‡": { "codepoints": [8225], "characters": "\u2021" }, "⇊": { "codepoints": [8650], "characters": "\u21CA" }, "⤑": { "codepoints": [10513], "characters": "\u2911" }, "⩷": { "codepoints": [10871], "characters": "\u2A77" }, "°": { "codepoints": [176], "characters": "\u00B0" }, "°": { "codepoints": [176], "characters": "\u00B0" }, "∇": { "codepoints": [8711], "characters": "\u2207" }, "Δ": { "codepoints": [916], "characters": "\u0394" }, "δ": { "codepoints": [948], "characters": "\u03B4" }, "⦱": { "codepoints": [10673], "characters": "\u29B1" }, "⥿": { "codepoints": [10623], "characters": "\u297F" }, "𝔇": { "codepoints": [120071], "characters": "\uD835\uDD07" }, "𝔡": { "codepoints": [120097], "characters": "\uD835\uDD21" }, "⥥": { "codepoints": [10597], "characters": "\u2965" }, "⇃": { "codepoints": [8643], "characters": "\u21C3" }, "⇂": { "codepoints": [8642], "characters": "\u21C2" }, "´": { "codepoints": [180], "characters": "\u00B4" }, "˙": { "codepoints": [729], "characters": "\u02D9" }, "˝": { "codepoints": [733], "characters": "\u02DD" }, "`": { "codepoints": [96], "characters": "\u0060" }, "˜": { "codepoints": [732], "characters": "\u02DC" }, "⋄": { "codepoints": [8900], "characters": "\u22C4" }, "⋄": { "codepoints": [8900], "characters": "\u22C4" }, "⋄": { "codepoints": [8900], "characters": "\u22C4" }, "♦": { "codepoints": [9830], "characters": "\u2666" }, "♦": { "codepoints": [9830], "characters": "\u2666" }, "¨": { "codepoints": [168], "characters": "\u00A8" }, "ⅆ": { "codepoints": [8518], "characters": "\u2146" }, "ϝ": { "codepoints": [989], "characters": "\u03DD" }, "⋲": { "codepoints": [8946], "characters": "\u22F2" }, "÷": { "codepoints": [247], "characters": "\u00F7" }, "÷": { "codepoints": [247], "characters": "\u00F7" }, "÷": { "codepoints": [247], "characters": "\u00F7" }, "⋇": { "codepoints": [8903], "characters": "\u22C7" }, "⋇": { "codepoints": [8903], "characters": "\u22C7" }, "Ђ": { "codepoints": [1026], "characters": "\u0402" }, "ђ": { "codepoints": [1106], "characters": "\u0452" }, "⌞": { "codepoints": [8990], "characters": "\u231E" }, "⌍": { "codepoints": [8973], "characters": "\u230D" }, "$": { "codepoints": [36], "characters": "\u0024" }, "𝔻": { "codepoints": [120123], "characters": "\uD835\uDD3B" }, "𝕕": { "codepoints": [120149], "characters": "\uD835\uDD55" }, "¨": { "codepoints": [168], "characters": "\u00A8" }, "˙": { "codepoints": [729], "characters": "\u02D9" }, "⃜": { "codepoints": [8412], "characters": "\u20DC" }, "≐": { "codepoints": [8784], "characters": "\u2250" }, "≑": { "codepoints": [8785], "characters": "\u2251" }, "≐": { "codepoints": [8784], "characters": "\u2250" }, "∸": { "codepoints": [8760], "characters": "\u2238" }, "∔": { "codepoints": [8724], "characters": "\u2214" }, "⊡": { "codepoints": [8865], "characters": "\u22A1" }, "⌆": { "codepoints": [8966], "characters": "\u2306" }, "∯": { "codepoints": [8751], "characters": "\u222F" }, "¨": { "codepoints": [168], "characters": "\u00A8" }, "⇓": { "codepoints": [8659], "characters": "\u21D3" }, "⇐": { "codepoints": [8656], "characters": "\u21D0" }, "⇔": { "codepoints": [8660], "characters": "\u21D4" }, "⫤": { "codepoints": [10980], "characters": "\u2AE4" }, "⟸": { "codepoints": [10232], "characters": "\u27F8" }, "⟺": { "codepoints": [10234], "characters": "\u27FA" }, "⟹": { "codepoints": [10233], "characters": "\u27F9" }, "⇒": { "codepoints": [8658], "characters": "\u21D2" }, "⊨": { "codepoints": [8872], "characters": "\u22A8" }, "⇑": { "codepoints": [8657], "characters": "\u21D1" }, "⇕": { "codepoints": [8661], "characters": "\u21D5" }, "∥": { "codepoints": [8741], "characters": "\u2225" }, "↓": { "codepoints": [8595], "characters": "\u2193" }, "⇓": { "codepoints": [8659], "characters": "\u21D3" }, "↓": { "codepoints": [8595], "characters": "\u2193" }, "⤓": { "codepoints": [10515], "characters": "\u2913" }, "⇵": { "codepoints": [8693], "characters": "\u21F5" }, "̑": { "codepoints": [785], "characters": "\u0311" }, "⇊": { "codepoints": [8650], "characters": "\u21CA" }, "⇃": { "codepoints": [8643], "characters": "\u21C3" }, "⇂": { "codepoints": [8642], "characters": "\u21C2" }, "⥐": { "codepoints": [10576], "characters": "\u2950" }, "⥞": { "codepoints": [10590], "characters": "\u295E" }, "↽": { "codepoints": [8637], "characters": "\u21BD" }, "⥖": { "codepoints": [10582], "characters": "\u2956" }, "⥟": { "codepoints": [10591], "characters": "\u295F" }, "⇁": { "codepoints": [8641], "characters": "\u21C1" }, "⥗": { "codepoints": [10583], "characters": "\u2957" }, "⊤": { "codepoints": [8868], "characters": "\u22A4" }, "↧": { "codepoints": [8615], "characters": "\u21A7" }, "⤐": { "codepoints": [10512], "characters": "\u2910" }, "⌟": { "codepoints": [8991], "characters": "\u231F" }, "⌌": { "codepoints": [8972], "characters": "\u230C" }, "𝒟": { "codepoints": [119967], "characters": "\uD835\uDC9F" }, "𝒹": { "codepoints": [119993], "characters": "\uD835\uDCB9" }, "Ѕ": { "codepoints": [1029], "characters": "\u0405" }, "ѕ": { "codepoints": [1109], "characters": "\u0455" }, "⧶": { "codepoints": [10742], "characters": "\u29F6" }, "Đ": { "codepoints": [272], "characters": "\u0110" }, "đ": { "codepoints": [273], "characters": "\u0111" }, "⋱": { "codepoints": [8945], "characters": "\u22F1" }, "▿": { "codepoints": [9663], "characters": "\u25BF" }, "▾": { "codepoints": [9662], "characters": "\u25BE" }, "⇵": { "codepoints": [8693], "characters": "\u21F5" }, "⥯": { "codepoints": [10607], "characters": "\u296F" }, "⦦": { "codepoints": [10662], "characters": "\u29A6" }, "Џ": { "codepoints": [1039], "characters": "\u040F" }, "џ": { "codepoints": [1119], "characters": "\u045F" }, "⟿": { "codepoints": [10239], "characters": "\u27FF" }, "É": { "codepoints": [201], "characters": "\u00C9" }, "É": { "codepoints": [201], "characters": "\u00C9" }, "é": { "codepoints": [233], "characters": "\u00E9" }, "é": { "codepoints": [233], "characters": "\u00E9" }, "⩮": { "codepoints": [10862], "characters": "\u2A6E" }, "Ě": { "codepoints": [282], "characters": "\u011A" }, "ě": { "codepoints": [283], "characters": "\u011B" }, "≖": { "codepoints": [8790], "characters": "\u2256" }, "Ê": { "codepoints": [202], "characters": "\u00CA" }, "Ê": { "codepoints": [202], "characters": "\u00CA" }, "ê": { "codepoints": [234], "characters": "\u00EA" }, "ê": { "codepoints": [234], "characters": "\u00EA" }, "≕": { "codepoints": [8789], "characters": "\u2255" }, "Э": { "codepoints": [1069], "characters": "\u042D" }, "э": { "codepoints": [1101], "characters": "\u044D" }, "⩷": { "codepoints": [10871], "characters": "\u2A77" }, "Ė": { "codepoints": [278], "characters": "\u0116" }, "≑": { "codepoints": [8785], "characters": "\u2251" }, "ė": { "codepoints": [279], "characters": "\u0117" }, "ⅇ": { "codepoints": [8519], "characters": "\u2147" }, "≒": { "codepoints": [8786], "characters": "\u2252" }, "𝔈": { "codepoints": [120072], "characters": "\uD835\uDD08" }, "𝔢": { "codepoints": [120098], "characters": "\uD835\uDD22" }, "⪚": { "codepoints": [10906], "characters": "\u2A9A" }, "È": { "codepoints": [200], "characters": "\u00C8" }, "È": { "codepoints": [200], "characters": "\u00C8" }, "è": { "codepoints": [232], "characters": "\u00E8" }, "è": { "codepoints": [232], "characters": "\u00E8" }, "⪖": { "codepoints": [10902], "characters": "\u2A96" }, "⪘": { "codepoints": [10904], "characters": "\u2A98" }, "⪙": { "codepoints": [10905], "characters": "\u2A99" }, "∈": { "codepoints": [8712], "characters": "\u2208" }, "⏧": { "codepoints": [9191], "characters": "\u23E7" }, "ℓ": { "codepoints": [8467], "characters": "\u2113" }, "⪕": { "codepoints": [10901], "characters": "\u2A95" }, "⪗": { "codepoints": [10903], "characters": "\u2A97" }, "Ē": { "codepoints": [274], "characters": "\u0112" }, "ē": { "codepoints": [275], "characters": "\u0113" }, "∅": { "codepoints": [8709], "characters": "\u2205" }, "∅": { "codepoints": [8709], "characters": "\u2205" }, "◻": { "codepoints": [9723], "characters": "\u25FB" }, "∅": { "codepoints": [8709], "characters": "\u2205" }, "▫": { "codepoints": [9643], "characters": "\u25AB" }, " ": { "codepoints": [8195], "characters": "\u2003" }, " ": { "codepoints": [8196], "characters": "\u2004" }, " ": { "codepoints": [8197], "characters": "\u2005" }, "Ŋ": { "codepoints": [330], "characters": "\u014A" }, "ŋ": { "codepoints": [331], "characters": "\u014B" }, " ": { "codepoints": [8194], "characters": "\u2002" }, "Ę": { "codepoints": [280], "characters": "\u0118" }, "ę": { "codepoints": [281], "characters": "\u0119" }, "𝔼": { "codepoints": [120124], "characters": "\uD835\uDD3C" }, "𝕖": { "codepoints": [120150], "characters": "\uD835\uDD56" }, "⋕": { "codepoints": [8917], "characters": "\u22D5" }, "⧣": { "codepoints": [10723], "characters": "\u29E3" }, "⩱": { "codepoints": [10865], "characters": "\u2A71" }, "ε": { "codepoints": [949], "characters": "\u03B5" }, "Ε": { "codepoints": [917], "characters": "\u0395" }, "ε": { "codepoints": [949], "characters": "\u03B5" }, "ϵ": { "codepoints": [1013], "characters": "\u03F5" }, "≖": { "codepoints": [8790], "characters": "\u2256" }, "≕": { "codepoints": [8789], "characters": "\u2255" }, "≂": { "codepoints": [8770], "characters": "\u2242" }, "⪖": { "codepoints": [10902], "characters": "\u2A96" }, "⪕": { "codepoints": [10901], "characters": "\u2A95" }, "⩵": { "codepoints": [10869], "characters": "\u2A75" }, "=": { "codepoints": [61], "characters": "\u003D" }, "≂": { "codepoints": [8770], "characters": "\u2242" }, "≟": { "codepoints": [8799], "characters": "\u225F" }, "⇌": { "codepoints": [8652], "characters": "\u21CC" }, "≡": { "codepoints": [8801], "characters": "\u2261" }, "⩸": { "codepoints": [10872], "characters": "\u2A78" }, "⧥": { "codepoints": [10725], "characters": "\u29E5" }, "⥱": { "codepoints": [10609], "characters": "\u2971" }, "≓": { "codepoints": [8787], "characters": "\u2253" }, "ℰ": { "codepoints": [8496], "characters": "\u2130" }, "ℯ": { "codepoints": [8495], "characters": "\u212F" }, "≐": { "codepoints": [8784], "characters": "\u2250" }, "⩳": { "codepoints": [10867], "characters": "\u2A73" }, "≂": { "codepoints": [8770], "characters": "\u2242" }, "Η": { "codepoints": [919], "characters": "\u0397" }, "η": { "codepoints": [951], "characters": "\u03B7" }, "Ð": { "codepoints": [208], "characters": "\u00D0" }, "Ð": { "codepoints": [208], "characters": "\u00D0" }, "ð": { "codepoints": [240], "characters": "\u00F0" }, "ð": { "codepoints": [240], "characters": "\u00F0" }, "Ë": { "codepoints": [203], "characters": "\u00CB" }, "Ë": { "codepoints": [203], "characters": "\u00CB" }, "ë": { "codepoints": [235], "characters": "\u00EB" }, "ë": { "codepoints": [235], "characters": "\u00EB" }, "€": { "codepoints": [8364], "characters": "\u20AC" }, "!": { "codepoints": [33], "characters": "\u0021" }, "∃": { "codepoints": [8707], "characters": "\u2203" }, "∃": { "codepoints": [8707], "characters": "\u2203" }, "ℰ": { "codepoints": [8496], "characters": "\u2130" }, "ⅇ": { "codepoints": [8519], "characters": "\u2147" }, "ⅇ": { "codepoints": [8519], "characters": "\u2147" }, "≒": { "codepoints": [8786], "characters": "\u2252" }, "Ф": { "codepoints": [1060], "characters": "\u0424" }, "ф": { "codepoints": [1092], "characters": "\u0444" }, "♀": { "codepoints": [9792], "characters": "\u2640" }, "ffi": { "codepoints": [64259], "characters": "\uFB03" }, "ff": { "codepoints": [64256], "characters": "\uFB00" }, "ffl": { "codepoints": [64260], "characters": "\uFB04" }, "𝔉": { "codepoints": [120073], "characters": "\uD835\uDD09" }, "𝔣": { "codepoints": [120099], "characters": "\uD835\uDD23" }, "fi": { "codepoints": [64257], "characters": "\uFB01" }, "◼": { "codepoints": [9724], "characters": "\u25FC" }, "▪": { "codepoints": [9642], "characters": "\u25AA" }, "fj": { "codepoints": [102, 106], "characters": "\u0066\u006A" }, "♭": { "codepoints": [9837], "characters": "\u266D" }, "fl": { "codepoints": [64258], "characters": "\uFB02" }, "▱": { "codepoints": [9649], "characters": "\u25B1" }, "ƒ": { "codepoints": [402], "characters": "\u0192" }, "𝔽": { "codepoints": [120125], "characters": "\uD835\uDD3D" }, "𝕗": { "codepoints": [120151], "characters": "\uD835\uDD57" }, "∀": { "codepoints": [8704], "characters": "\u2200" }, "∀": { "codepoints": [8704], "characters": "\u2200" }, "⋔": { "codepoints": [8916], "characters": "\u22D4" }, "⫙": { "codepoints": [10969], "characters": "\u2AD9" }, "ℱ": { "codepoints": [8497], "characters": "\u2131" }, "⨍": { "codepoints": [10765], "characters": "\u2A0D" }, "½": { "codepoints": [189], "characters": "\u00BD" }, "½": { "codepoints": [189], "characters": "\u00BD" }, "⅓": { "codepoints": [8531], "characters": "\u2153" }, "¼": { "codepoints": [188], "characters": "\u00BC" }, "¼": { "codepoints": [188], "characters": "\u00BC" }, "⅕": { "codepoints": [8533], "characters": "\u2155" }, "⅙": { "codepoints": [8537], "characters": "\u2159" }, "⅛": { "codepoints": [8539], "characters": "\u215B" }, "⅔": { "codepoints": [8532], "characters": "\u2154" }, "⅖": { "codepoints": [8534], "characters": "\u2156" }, "¾": { "codepoints": [190], "characters": "\u00BE" }, "¾": { "codepoints": [190], "characters": "\u00BE" }, "⅗": { "codepoints": [8535], "characters": "\u2157" }, "⅜": { "codepoints": [8540], "characters": "\u215C" }, "⅘": { "codepoints": [8536], "characters": "\u2158" }, "⅚": { "codepoints": [8538], "characters": "\u215A" }, "⅝": { "codepoints": [8541], "characters": "\u215D" }, "⅞": { "codepoints": [8542], "characters": "\u215E" }, "⁄": { "codepoints": [8260], "characters": "\u2044" }, "⌢": { "codepoints": [8994], "characters": "\u2322" }, "ℱ": { "codepoints": [8497], "characters": "\u2131" }, "𝒻": { "codepoints": [119995], "characters": "\uD835\uDCBB" }, "ǵ": { "codepoints": [501], "characters": "\u01F5" }, "Γ": { "codepoints": [915], "characters": "\u0393" }, "γ": { "codepoints": [947], "characters": "\u03B3" }, "Ϝ": { "codepoints": [988], "characters": "\u03DC" }, "ϝ": { "codepoints": [989], "characters": "\u03DD" }, "⪆": { "codepoints": [10886], "characters": "\u2A86" }, "Ğ": { "codepoints": [286], "characters": "\u011E" }, "ğ": { "codepoints": [287], "characters": "\u011F" }, "Ģ": { "codepoints": [290], "characters": "\u0122" }, "Ĝ": { "codepoints": [284], "characters": "\u011C" }, "ĝ": { "codepoints": [285], "characters": "\u011D" }, "Г": { "codepoints": [1043], "characters": "\u0413" }, "г": { "codepoints": [1075], "characters": "\u0433" }, "Ġ": { "codepoints": [288], "characters": "\u0120" }, "ġ": { "codepoints": [289], "characters": "\u0121" }, "≧": { "codepoints": [8807], "characters": "\u2267" }, "≥": { "codepoints": [8805], "characters": "\u2265" }, "⪌": { "codepoints": [10892], "characters": "\u2A8C" }, "⋛": { "codepoints": [8923], "characters": "\u22DB" }, "≥": { "codepoints": [8805], "characters": "\u2265" }, "≧": { "codepoints": [8807], "characters": "\u2267" }, "⩾": { "codepoints": [10878], "characters": "\u2A7E" }, "⩾": { "codepoints": [10878], "characters": "\u2A7E" }, "⪩": { "codepoints": [10921], "characters": "\u2AA9" }, "⪀": { "codepoints": [10880], "characters": "\u2A80" }, "⪂": { "codepoints": [10882], "characters": "\u2A82" }, "⪄": { "codepoints": [10884], "characters": "\u2A84" }, "⋛︀": { "codepoints": [8923, 65024], "characters": "\u22DB\uFE00" }, "⪔": { "codepoints": [10900], "characters": "\u2A94" }, "𝔊": { "codepoints": [120074], "characters": "\uD835\uDD0A" }, "𝔤": { "codepoints": [120100], "characters": "\uD835\uDD24" }, "⋙": { "codepoints": [8921], "characters": "\u22D9" }, "≫": { "codepoints": [8811], "characters": "\u226B" }, "⋙": { "codepoints": [8921], "characters": "\u22D9" }, "ℷ": { "codepoints": [8503], "characters": "\u2137" }, "Ѓ": { "codepoints": [1027], "characters": "\u0403" }, "ѓ": { "codepoints": [1107], "characters": "\u0453" }, "≷": { "codepoints": [8823], "characters": "\u2277" }, "⪥": { "codepoints": [10917], "characters": "\u2AA5" }, "⪒": { "codepoints": [10898], "characters": "\u2A92" }, "⪤": { "codepoints": [10916], "characters": "\u2AA4" }, "⪊": { "codepoints": [10890], "characters": "\u2A8A" }, "⪊": { "codepoints": [10890], "characters": "\u2A8A" }, "≩": { "codepoints": [8809], "characters": "\u2269" }, "⪈": { "codepoints": [10888], "characters": "\u2A88" }, "⪈": { "codepoints": [10888], "characters": "\u2A88" }, "≩": { "codepoints": [8809], "characters": "\u2269" }, "⋧": { "codepoints": [8935], "characters": "\u22E7" }, "𝔾": { "codepoints": [120126], "characters": "\uD835\uDD3E" }, "𝕘": { "codepoints": [120152], "characters": "\uD835\uDD58" }, "`": { "codepoints": [96], "characters": "\u0060" }, "≥": { "codepoints": [8805], "characters": "\u2265" }, "⋛": { "codepoints": [8923], "characters": "\u22DB" }, "≧": { "codepoints": [8807], "characters": "\u2267" }, "⪢": { "codepoints": [10914], "characters": "\u2AA2" }, "≷": { "codepoints": [8823], "characters": "\u2277" }, "⩾": { "codepoints": [10878], "characters": "\u2A7E" }, "≳": { "codepoints": [8819], "characters": "\u2273" }, "𝒢": { "codepoints": [119970], "characters": "\uD835\uDCA2" }, "ℊ": { "codepoints": [8458], "characters": "\u210A" }, "≳": { "codepoints": [8819], "characters": "\u2273" }, "⪎": { "codepoints": [10894], "characters": "\u2A8E" }, "⪐": { "codepoints": [10896], "characters": "\u2A90" }, ">": { "codepoints": [62], "characters": "\u003E" }, ">": { "codepoints": [62], "characters": "\u003E" }, "≫": { "codepoints": [8811], "characters": "\u226B" }, ">": { "codepoints": [62], "characters": "\u003E" }, ">": { "codepoints": [62], "characters": "\u003E" }, "⪧": { "codepoints": [10919], "characters": "\u2AA7" }, "⩺": { "codepoints": [10874], "characters": "\u2A7A" }, "⋗": { "codepoints": [8919], "characters": "\u22D7" }, "⦕": { "codepoints": [10645], "characters": "\u2995" }, "⩼": { "codepoints": [10876], "characters": "\u2A7C" }, "⪆": { "codepoints": [10886], "characters": "\u2A86" }, "⥸": { "codepoints": [10616], "characters": "\u2978" }, "⋗": { "codepoints": [8919], "characters": "\u22D7" }, "⋛": { "codepoints": [8923], "characters": "\u22DB" }, "⪌": { "codepoints": [10892], "characters": "\u2A8C" }, "≷": { "codepoints": [8823], "characters": "\u2277" }, "≳": { "codepoints": [8819], "characters": "\u2273" }, "≩︀": { "codepoints": [8809, 65024], "characters": "\u2269\uFE00" }, "≩︀": { "codepoints": [8809, 65024], "characters": "\u2269\uFE00" }, "ˇ": { "codepoints": [711], "characters": "\u02C7" }, " ": { "codepoints": [8202], "characters": "\u200A" }, "½": { "codepoints": [189], "characters": "\u00BD" }, "ℋ": { "codepoints": [8459], "characters": "\u210B" }, "Ъ": { "codepoints": [1066], "characters": "\u042A" }, "ъ": { "codepoints": [1098], "characters": "\u044A" }, "⇔": { "codepoints": [8660], "characters": "\u21D4" }, "↔": { "codepoints": [8596], "characters": "\u2194" }, "⥈": { "codepoints": [10568], "characters": "\u2948" }, "↭": { "codepoints": [8621], "characters": "\u21AD" }, "^": { "codepoints": [94], "characters": "\u005E" }, "ℏ": { "codepoints": [8463], "characters": "\u210F" }, "Ĥ": { "codepoints": [292], "characters": "\u0124" }, "ĥ": { "codepoints": [293], "characters": "\u0125" }, "♥": { "codepoints": [9829], "characters": "\u2665" }, "♥": { "codepoints": [9829], "characters": "\u2665" }, "…": { "codepoints": [8230], "characters": "\u2026" }, "⊹": { "codepoints": [8889], "characters": "\u22B9" }, "ℌ": { "codepoints": [8460], "characters": "\u210C" }, "𝔥": { "codepoints": [120101], "characters": "\uD835\uDD25" }, "ℋ": { "codepoints": [8459], "characters": "\u210B" }, "⤥": { "codepoints": [10533], "characters": "\u2925" }, "⤦": { "codepoints": [10534], "characters": "\u2926" }, "⇿": { "codepoints": [8703], "characters": "\u21FF" }, "∻": { "codepoints": [8763], "characters": "\u223B" }, "↩": { "codepoints": [8617], "characters": "\u21A9" }, "↪": { "codepoints": [8618], "characters": "\u21AA" }, "ℍ": { "codepoints": [8461], "characters": "\u210D" }, "𝕙": { "codepoints": [120153], "characters": "\uD835\uDD59" }, "―": { "codepoints": [8213], "characters": "\u2015" }, "─": { "codepoints": [9472], "characters": "\u2500" }, "ℋ": { "codepoints": [8459], "characters": "\u210B" }, "𝒽": { "codepoints": [119997], "characters": "\uD835\uDCBD" }, "ℏ": { "codepoints": [8463], "characters": "\u210F" }, "Ħ": { "codepoints": [294], "characters": "\u0126" }, "ħ": { "codepoints": [295], "characters": "\u0127" }, "≎": { "codepoints": [8782], "characters": "\u224E" }, "≏": { "codepoints": [8783], "characters": "\u224F" }, "⁃": { "codepoints": [8259], "characters": "\u2043" }, "‐": { "codepoints": [8208], "characters": "\u2010" }, "Í": { "codepoints": [205], "characters": "\u00CD" }, "Í": { "codepoints": [205], "characters": "\u00CD" }, "í": { "codepoints": [237], "characters": "\u00ED" }, "í": { "codepoints": [237], "characters": "\u00ED" }, "⁣": { "codepoints": [8291], "characters": "\u2063" }, "Î": { "codepoints": [206], "characters": "\u00CE" }, "Î": { "codepoints": [206], "characters": "\u00CE" }, "î": { "codepoints": [238], "characters": "\u00EE" }, "î": { "codepoints": [238], "characters": "\u00EE" }, "И": { "codepoints": [1048], "characters": "\u0418" }, "и": { "codepoints": [1080], "characters": "\u0438" }, "İ": { "codepoints": [304], "characters": "\u0130" }, "Е": { "codepoints": [1045], "characters": "\u0415" }, "е": { "codepoints": [1077], "characters": "\u0435" }, "¡": { "codepoints": [161], "characters": "\u00A1" }, "¡": { "codepoints": [161], "characters": "\u00A1" }, "⇔": { "codepoints": [8660], "characters": "\u21D4" }, "ℑ": { "codepoints": [8465], "characters": "\u2111" }, "𝔦": { "codepoints": [120102], "characters": "\uD835\uDD26" }, "Ì": { "codepoints": [204], "characters": "\u00CC" }, "Ì": { "codepoints": [204], "characters": "\u00CC" }, "ì": { "codepoints": [236], "characters": "\u00EC" }, "ì": { "codepoints": [236], "characters": "\u00EC" }, "ⅈ": { "codepoints": [8520], "characters": "\u2148" }, "⨌": { "codepoints": [10764], "characters": "\u2A0C" }, "∭": { "codepoints": [8749], "characters": "\u222D" }, "⧜": { "codepoints": [10716], "characters": "\u29DC" }, "℩": { "codepoints": [8489], "characters": "\u2129" }, "IJ": { "codepoints": [306], "characters": "\u0132" }, "ij": { "codepoints": [307], "characters": "\u0133" }, "ℑ": { "codepoints": [8465], "characters": "\u2111" }, "Ī": { "codepoints": [298], "characters": "\u012A" }, "ī": { "codepoints": [299], "characters": "\u012B" }, "ℑ": { "codepoints": [8465], "characters": "\u2111" }, "ⅈ": { "codepoints": [8520], "characters": "\u2148" }, "ℐ": { "codepoints": [8464], "characters": "\u2110" }, "ℑ": { "codepoints": [8465], "characters": "\u2111" }, "ı": { "codepoints": [305], "characters": "\u0131" }, "⊷": { "codepoints": [8887], "characters": "\u22B7" }, "Ƶ": { "codepoints": [437], "characters": "\u01B5" }, "⇒": { "codepoints": [8658], "characters": "\u21D2" }, "∈": { "codepoints": [8712], "characters": "\u2208" }, "℅": { "codepoints": [8453], "characters": "\u2105" }, "∞": { "codepoints": [8734], "characters": "\u221E" }, "⧝": { "codepoints": [10717], "characters": "\u29DD" }, "ı": { "codepoints": [305], "characters": "\u0131" }, "∬": { "codepoints": [8748], "characters": "\u222C" }, "∫": { "codepoints": [8747], "characters": "\u222B" }, "⊺": { "codepoints": [8890], "characters": "\u22BA" }, "ℤ": { "codepoints": [8484], "characters": "\u2124" }, "∫": { "codepoints": [8747], "characters": "\u222B" }, "⊺": { "codepoints": [8890], "characters": "\u22BA" }, "⋂": { "codepoints": [8898], "characters": "\u22C2" }, "⨗": { "codepoints": [10775], "characters": "\u2A17" }, "⨼": { "codepoints": [10812], "characters": "\u2A3C" }, "⁣": { "codepoints": [8291], "characters": "\u2063" }, "⁢": { "codepoints": [8290], "characters": "\u2062" }, "Ё": { "codepoints": [1025], "characters": "\u0401" }, "ё": { "codepoints": [1105], "characters": "\u0451" }, "Į": { "codepoints": [302], "characters": "\u012E" }, "į": { "codepoints": [303], "characters": "\u012F" }, "𝕀": { "codepoints": [120128], "characters": "\uD835\uDD40" }, "𝕚": { "codepoints": [120154], "characters": "\uD835\uDD5A" }, "Ι": { "codepoints": [921], "characters": "\u0399" }, "ι": { "codepoints": [953], "characters": "\u03B9" }, "⨼": { "codepoints": [10812], "characters": "\u2A3C" }, "¿": { "codepoints": [191], "characters": "\u00BF" }, "¿": { "codepoints": [191], "characters": "\u00BF" }, "ℐ": { "codepoints": [8464], "characters": "\u2110" }, "𝒾": { "codepoints": [119998], "characters": "\uD835\uDCBE" }, "∈": { "codepoints": [8712], "characters": "\u2208" }, "⋵": { "codepoints": [8949], "characters": "\u22F5" }, "⋹": { "codepoints": [8953], "characters": "\u22F9" }, "⋴": { "codepoints": [8948], "characters": "\u22F4" }, "⋳": { "codepoints": [8947], "characters": "\u22F3" }, "∈": { "codepoints": [8712], "characters": "\u2208" }, "⁢": { "codepoints": [8290], "characters": "\u2062" }, "Ĩ": { "codepoints": [296], "characters": "\u0128" }, "ĩ": { "codepoints": [297], "characters": "\u0129" }, "І": { "codepoints": [1030], "characters": "\u0406" }, "і": { "codepoints": [1110], "characters": "\u0456" }, "Ï": { "codepoints": [207], "characters": "\u00CF" }, "Ï": { "codepoints": [207], "characters": "\u00CF" }, "ï": { "codepoints": [239], "characters": "\u00EF" }, "ï": { "codepoints": [239], "characters": "\u00EF" }, "Ĵ": { "codepoints": [308], "characters": "\u0134" }, "ĵ": { "codepoints": [309], "characters": "\u0135" }, "Й": { "codepoints": [1049], "characters": "\u0419" }, "й": { "codepoints": [1081], "characters": "\u0439" }, "𝔍": { "codepoints": [120077], "characters": "\uD835\uDD0D" }, "𝔧": { "codepoints": [120103], "characters": "\uD835\uDD27" }, "ȷ": { "codepoints": [567], "characters": "\u0237" }, "𝕁": { "codepoints": [120129], "characters": "\uD835\uDD41" }, "𝕛": { "codepoints": [120155], "characters": "\uD835\uDD5B" }, "𝒥": { "codepoints": [119973], "characters": "\uD835\uDCA5" }, "𝒿": { "codepoints": [119999], "characters": "\uD835\uDCBF" }, "Ј": { "codepoints": [1032], "characters": "\u0408" }, "ј": { "codepoints": [1112], "characters": "\u0458" }, "Є": { "codepoints": [1028], "characters": "\u0404" }, "є": { "codepoints": [1108], "characters": "\u0454" }, "Κ": { "codepoints": [922], "characters": "\u039A" }, "κ": { "codepoints": [954], "characters": "\u03BA" }, "ϰ": { "codepoints": [1008], "characters": "\u03F0" }, "Ķ": { "codepoints": [310], "characters": "\u0136" }, "ķ": { "codepoints": [311], "characters": "\u0137" }, "К": { "codepoints": [1050], "characters": "\u041A" }, "к": { "codepoints": [1082], "characters": "\u043A" }, "𝔎": { "codepoints": [120078], "characters": "\uD835\uDD0E" }, "𝔨": { "codepoints": [120104], "characters": "\uD835\uDD28" }, "ĸ": { "codepoints": [312], "characters": "\u0138" }, "Х": { "codepoints": [1061], "characters": "\u0425" }, "х": { "codepoints": [1093], "characters": "\u0445" }, "Ќ": { "codepoints": [1036], "characters": "\u040C" }, "ќ": { "codepoints": [1116], "characters": "\u045C" }, "𝕂": { "codepoints": [120130], "characters": "\uD835\uDD42" }, "𝕜": { "codepoints": [120156], "characters": "\uD835\uDD5C" }, "𝒦": { "codepoints": [119974], "characters": "\uD835\uDCA6" }, "𝓀": { "codepoints": [120000], "characters": "\uD835\uDCC0" }, "⇚": { "codepoints": [8666], "characters": "\u21DA" }, "Ĺ": { "codepoints": [313], "characters": "\u0139" }, "ĺ": { "codepoints": [314], "characters": "\u013A" }, "⦴": { "codepoints": [10676], "characters": "\u29B4" }, "ℒ": { "codepoints": [8466], "characters": "\u2112" }, "Λ": { "codepoints": [923], "characters": "\u039B" }, "λ": { "codepoints": [955], "characters": "\u03BB" }, "⟪": { "codepoints": [10218], "characters": "\u27EA" }, "⟨": { "codepoints": [10216], "characters": "\u27E8" }, "⦑": { "codepoints": [10641], "characters": "\u2991" }, "⟨": { "codepoints": [10216], "characters": "\u27E8" }, "⪅": { "codepoints": [10885], "characters": "\u2A85" }, "ℒ": { "codepoints": [8466], "characters": "\u2112" }, "«": { "codepoints": [171], "characters": "\u00AB" }, "«": { "codepoints": [171], "characters": "\u00AB" }, "↞": { "codepoints": [8606], "characters": "\u219E" }, "⇐": { "codepoints": [8656], "characters": "\u21D0" }, "←": { "codepoints": [8592], "characters": "\u2190" }, "⇤": { "codepoints": [8676], "characters": "\u21E4" }, "⤟": { "codepoints": [10527], "characters": "\u291F" }, "⤝": { "codepoints": [10525], "characters": "\u291D" }, "↩": { "codepoints": [8617], "characters": "\u21A9" }, "↫": { "codepoints": [8619], "characters": "\u21AB" }, "⤹": { "codepoints": [10553], "characters": "\u2939" }, "⥳": { "codepoints": [10611], "characters": "\u2973" }, "↢": { "codepoints": [8610], "characters": "\u21A2" }, "⪫": { "codepoints": [10923], "characters": "\u2AAB" }, "⤛": { "codepoints": [10523], "characters": "\u291B" }, "⤙": { "codepoints": [10521], "characters": "\u2919" }, "⪭": { "codepoints": [10925], "characters": "\u2AAD" }, "⪭︀": { "codepoints": [10925, 65024], "characters": "\u2AAD\uFE00" }, "⤎": { "codepoints": [10510], "characters": "\u290E" }, "⤌": { "codepoints": [10508], "characters": "\u290C" }, "❲": { "codepoints": [10098], "characters": "\u2772" }, "{": { "codepoints": [123], "characters": "\u007B" }, "[": { "codepoints": [91], "characters": "\u005B" }, "⦋": { "codepoints": [10635], "characters": "\u298B" }, "⦏": { "codepoints": [10639], "characters": "\u298F" }, "⦍": { "codepoints": [10637], "characters": "\u298D" }, "Ľ": { "codepoints": [317], "characters": "\u013D" }, "ľ": { "codepoints": [318], "characters": "\u013E" }, "Ļ": { "codepoints": [315], "characters": "\u013B" }, "ļ": { "codepoints": [316], "characters": "\u013C" }, "⌈": { "codepoints": [8968], "characters": "\u2308" }, "{": { "codepoints": [123], "characters": "\u007B" }, "Л": { "codepoints": [1051], "characters": "\u041B" }, "л": { "codepoints": [1083], "characters": "\u043B" }, "⤶": { "codepoints": [10550], "characters": "\u2936" }, "“": { "codepoints": [8220], "characters": "\u201C" }, "„": { "codepoints": [8222], "characters": "\u201E" }, "⥧": { "codepoints": [10599], "characters": "\u2967" }, "⥋": { "codepoints": [10571], "characters": "\u294B" }, "↲": { "codepoints": [8626], "characters": "\u21B2" }, "≦": { "codepoints": [8806], "characters": "\u2266" }, "≤": { "codepoints": [8804], "characters": "\u2264" }, "⟨": { "codepoints": [10216], "characters": "\u27E8" }, "←": { "codepoints": [8592], "characters": "\u2190" }, "⇐": { "codepoints": [8656], "characters": "\u21D0" }, "←": { "codepoints": [8592], "characters": "\u2190" }, "⇤": { "codepoints": [8676], "characters": "\u21E4" }, "⇆": { "codepoints": [8646], "characters": "\u21C6" }, "↢": { "codepoints": [8610], "characters": "\u21A2" }, "⌈": { "codepoints": [8968], "characters": "\u2308" }, "⟦": { "codepoints": [10214], "characters": "\u27E6" }, "⥡": { "codepoints": [10593], "characters": "\u2961" }, "⇃": { "codepoints": [8643], "characters": "\u21C3" }, "⥙": { "codepoints": [10585], "characters": "\u2959" }, "⌊": { "codepoints": [8970], "characters": "\u230A" }, "↽": { "codepoints": [8637], "characters": "\u21BD" }, "↼": { "codepoints": [8636], "characters": "\u21BC" }, "⇇": { "codepoints": [8647], "characters": "\u21C7" }, "↔": { "codepoints": [8596], "characters": "\u2194" }, "⇔": { "codepoints": [8660], "characters": "\u21D4" }, "↔": { "codepoints": [8596], "characters": "\u2194" }, "⇆": { "codepoints": [8646], "characters": "\u21C6" }, "⇋": { "codepoints": [8651], "characters": "\u21CB" }, "↭": { "codepoints": [8621], "characters": "\u21AD" }, "⥎": { "codepoints": [10574], "characters": "\u294E" }, "⊣": { "codepoints": [8867], "characters": "\u22A3" }, "↤": { "codepoints": [8612], "characters": "\u21A4" }, "⥚": { "codepoints": [10586], "characters": "\u295A" }, "⋋": { "codepoints": [8907], "characters": "\u22CB" }, "⊲": { "codepoints": [8882], "characters": "\u22B2" }, "⧏": { "codepoints": [10703], "characters": "\u29CF" }, "⊴": { "codepoints": [8884], "characters": "\u22B4" }, "⥑": { "codepoints": [10577], "characters": "\u2951" }, "⥠": { "codepoints": [10592], "characters": "\u2960" }, "↿": { "codepoints": [8639], "characters": "\u21BF" }, "⥘": { "codepoints": [10584], "characters": "\u2958" }, "↼": { "codepoints": [8636], "characters": "\u21BC" }, "⥒": { "codepoints": [10578], "characters": "\u2952" }, "⪋": { "codepoints": [10891], "characters": "\u2A8B" }, "⋚": { "codepoints": [8922], "characters": "\u22DA" }, "≤": { "codepoints": [8804], "characters": "\u2264" }, "≦": { "codepoints": [8806], "characters": "\u2266" }, "⩽": { "codepoints": [10877], "characters": "\u2A7D" }, "⩽": { "codepoints": [10877], "characters": "\u2A7D" }, "⪨": { "codepoints": [10920], "characters": "\u2AA8" }, "⩿": { "codepoints": [10879], "characters": "\u2A7F" }, "⪁": { "codepoints": [10881], "characters": "\u2A81" }, "⪃": { "codepoints": [10883], "characters": "\u2A83" }, "⋚︀": { "codepoints": [8922, 65024], "characters": "\u22DA\uFE00" }, "⪓": { "codepoints": [10899], "characters": "\u2A93" }, "⪅": { "codepoints": [10885], "characters": "\u2A85" }, "⋖": { "codepoints": [8918], "characters": "\u22D6" }, "⋚": { "codepoints": [8922], "characters": "\u22DA" }, "⪋": { "codepoints": [10891], "characters": "\u2A8B" }, "⋚": { "codepoints": [8922], "characters": "\u22DA" }, "≦": { "codepoints": [8806], "characters": "\u2266" }, "≶": { "codepoints": [8822], "characters": "\u2276" }, "≶": { "codepoints": [8822], "characters": "\u2276" }, "⪡": { "codepoints": [10913], "characters": "\u2AA1" }, "≲": { "codepoints": [8818], "characters": "\u2272" }, "⩽": { "codepoints": [10877], "characters": "\u2A7D" }, "≲": { "codepoints": [8818], "characters": "\u2272" }, "⥼": { "codepoints": [10620], "characters": "\u297C" }, "⌊": { "codepoints": [8970], "characters": "\u230A" }, "𝔏": { "codepoints": [120079], "characters": "\uD835\uDD0F" }, "𝔩": { "codepoints": [120105], "characters": "\uD835\uDD29" }, "≶": { "codepoints": [8822], "characters": "\u2276" }, "⪑": { "codepoints": [10897], "characters": "\u2A91" }, "⥢": { "codepoints": [10594], "characters": "\u2962" }, "↽": { "codepoints": [8637], "characters": "\u21BD" }, "↼": { "codepoints": [8636], "characters": "\u21BC" }, "⥪": { "codepoints": [10602], "characters": "\u296A" }, "▄": { "codepoints": [9604], "characters": "\u2584" }, "Љ": { "codepoints": [1033], "characters": "\u0409" }, "љ": { "codepoints": [1113], "characters": "\u0459" }, "⋘": { "codepoints": [8920], "characters": "\u22D8" }, "≪": { "codepoints": [8810], "characters": "\u226A" }, "⇇": { "codepoints": [8647], "characters": "\u21C7" }, "⌞": { "codepoints": [8990], "characters": "\u231E" }, "⇚": { "codepoints": [8666], "characters": "\u21DA" }, "⥫": { "codepoints": [10603], "characters": "\u296B" }, "◺": { "codepoints": [9722], "characters": "\u25FA" }, "Ŀ": { "codepoints": [319], "characters": "\u013F" }, "ŀ": { "codepoints": [320], "characters": "\u0140" }, "⎰": { "codepoints": [9136], "characters": "\u23B0" }, "⎰": { "codepoints": [9136], "characters": "\u23B0" }, "⪉": { "codepoints": [10889], "characters": "\u2A89" }, "⪉": { "codepoints": [10889], "characters": "\u2A89" }, "≨": { "codepoints": [8808], "characters": "\u2268" }, "⪇": { "codepoints": [10887], "characters": "\u2A87" }, "⪇": { "codepoints": [10887], "characters": "\u2A87" }, "≨": { "codepoints": [8808], "characters": "\u2268" }, "⋦": { "codepoints": [8934], "characters": "\u22E6" }, "⟬": { "codepoints": [10220], "characters": "\u27EC" }, "⇽": { "codepoints": [8701], "characters": "\u21FD" }, "⟦": { "codepoints": [10214], "characters": "\u27E6" }, "⟵": { "codepoints": [10229], "characters": "\u27F5" }, "⟸": { "codepoints": [10232], "characters": "\u27F8" }, "⟵": { "codepoints": [10229], "characters": "\u27F5" }, "⟷": { "codepoints": [10231], "characters": "\u27F7" }, "⟺": { "codepoints": [10234], "characters": "\u27FA" }, "⟷": { "codepoints": [10231], "characters": "\u27F7" }, "⟼": { "codepoints": [10236], "characters": "\u27FC" }, "⟶": { "codepoints": [10230], "characters": "\u27F6" }, "⟹": { "codepoints": [10233], "characters": "\u27F9" }, "⟶": { "codepoints": [10230], "characters": "\u27F6" }, "↫": { "codepoints": [8619], "characters": "\u21AB" }, "↬": { "codepoints": [8620], "characters": "\u21AC" }, "⦅": { "codepoints": [10629], "characters": "\u2985" }, "𝕃": { "codepoints": [120131], "characters": "\uD835\uDD43" }, "𝕝": { "codepoints": [120157], "characters": "\uD835\uDD5D" }, "⨭": { "codepoints": [10797], "characters": "\u2A2D" }, "⨴": { "codepoints": [10804], "characters": "\u2A34" }, "∗": { "codepoints": [8727], "characters": "\u2217" }, "_": { "codepoints": [95], "characters": "\u005F" }, "↙": { "codepoints": [8601], "characters": "\u2199" }, "↘": { "codepoints": [8600], "characters": "\u2198" }, "◊": { "codepoints": [9674], "characters": "\u25CA" }, "◊": { "codepoints": [9674], "characters": "\u25CA" }, "⧫": { "codepoints": [10731], "characters": "\u29EB" }, "(": { "codepoints": [40], "characters": "\u0028" }, "⦓": { "codepoints": [10643], "characters": "\u2993" }, "⇆": { "codepoints": [8646], "characters": "\u21C6" }, "⌟": { "codepoints": [8991], "characters": "\u231F" }, "⇋": { "codepoints": [8651], "characters": "\u21CB" }, "⥭": { "codepoints": [10605], "characters": "\u296D" }, "‎": { "codepoints": [8206], "characters": "\u200E" }, "⊿": { "codepoints": [8895], "characters": "\u22BF" }, "‹": { "codepoints": [8249], "characters": "\u2039" }, "ℒ": { "codepoints": [8466], "characters": "\u2112" }, "𝓁": { "codepoints": [120001], "characters": "\uD835\uDCC1" }, "↰": { "codepoints": [8624], "characters": "\u21B0" }, "↰": { "codepoints": [8624], "characters": "\u21B0" }, "≲": { "codepoints": [8818], "characters": "\u2272" }, "⪍": { "codepoints": [10893], "characters": "\u2A8D" }, "⪏": { "codepoints": [10895], "characters": "\u2A8F" }, "[": { "codepoints": [91], "characters": "\u005B" }, "‘": { "codepoints": [8216], "characters": "\u2018" }, "‚": { "codepoints": [8218], "characters": "\u201A" }, "Ł": { "codepoints": [321], "characters": "\u0141" }, "ł": { "codepoints": [322], "characters": "\u0142" }, "<": { "codepoints": [60], "characters": "\u003C" }, "<": { "codepoints": [60], "characters": "\u003C" }, "≪": { "codepoints": [8810], "characters": "\u226A" }, "<": { "codepoints": [60], "characters": "\u003C" }, "<": { "codepoints": [60], "characters": "\u003C" }, "⪦": { "codepoints": [10918], "characters": "\u2AA6" }, "⩹": { "codepoints": [10873], "characters": "\u2A79" }, "⋖": { "codepoints": [8918], "characters": "\u22D6" }, "⋋": { "codepoints": [8907], "characters": "\u22CB" }, "⋉": { "codepoints": [8905], "characters": "\u22C9" }, "⥶": { "codepoints": [10614], "characters": "\u2976" }, "⩻": { "codepoints": [10875], "characters": "\u2A7B" }, "◃": { "codepoints": [9667], "characters": "\u25C3" }, "⊴": { "codepoints": [8884], "characters": "\u22B4" }, "◂": { "codepoints": [9666], "characters": "\u25C2" }, "⦖": { "codepoints": [10646], "characters": "\u2996" }, "⥊": { "codepoints": [10570], "characters": "\u294A" }, "⥦": { "codepoints": [10598], "characters": "\u2966" }, "≨︀": { "codepoints": [8808, 65024], "characters": "\u2268\uFE00" }, "≨︀": { "codepoints": [8808, 65024], "characters": "\u2268\uFE00" }, "¯": { "codepoints": [175], "characters": "\u00AF" }, "¯": { "codepoints": [175], "characters": "\u00AF" }, "♂": { "codepoints": [9794], "characters": "\u2642" }, "✠": { "codepoints": [10016], "characters": "\u2720" }, "✠": { "codepoints": [10016], "characters": "\u2720" }, "⤅": { "codepoints": [10501], "characters": "\u2905" }, "↦": { "codepoints": [8614], "characters": "\u21A6" }, "↦": { "codepoints": [8614], "characters": "\u21A6" }, "↧": { "codepoints": [8615], "characters": "\u21A7" }, "↤": { "codepoints": [8612], "characters": "\u21A4" }, "↥": { "codepoints": [8613], "characters": "\u21A5" }, "▮": { "codepoints": [9646], "characters": "\u25AE" }, "⨩": { "codepoints": [10793], "characters": "\u2A29" }, "М": { "codepoints": [1052], "characters": "\u041C" }, "м": { "codepoints": [1084], "characters": "\u043C" }, "—": { "codepoints": [8212], "characters": "\u2014" }, "∺": { "codepoints": [8762], "characters": "\u223A" }, "∡": { "codepoints": [8737], "characters": "\u2221" }, " ": { "codepoints": [8287], "characters": "\u205F" }, "ℳ": { "codepoints": [8499], "characters": "\u2133" }, "𝔐": { "codepoints": [120080], "characters": "\uD835\uDD10" }, "𝔪": { "codepoints": [120106], "characters": "\uD835\uDD2A" }, "℧": { "codepoints": [8487], "characters": "\u2127" }, "µ": { "codepoints": [181], "characters": "\u00B5" }, "µ": { "codepoints": [181], "characters": "\u00B5" }, "∣": { "codepoints": [8739], "characters": "\u2223" }, "*": { "codepoints": [42], "characters": "\u002A" }, "⫰": { "codepoints": [10992], "characters": "\u2AF0" }, "·": { "codepoints": [183], "characters": "\u00B7" }, "·": { "codepoints": [183], "characters": "\u00B7" }, "−": { "codepoints": [8722], "characters": "\u2212" }, "⊟": { "codepoints": [8863], "characters": "\u229F" }, "∸": { "codepoints": [8760], "characters": "\u2238" }, "⨪": { "codepoints": [10794], "characters": "\u2A2A" }, "∓": { "codepoints": [8723], "characters": "\u2213" }, "⫛": { "codepoints": [10971], "characters": "\u2ADB" }, "…": { "codepoints": [8230], "characters": "\u2026" }, "∓": { "codepoints": [8723], "characters": "\u2213" }, "⊧": { "codepoints": [8871], "characters": "\u22A7" }, "𝕄": { "codepoints": [120132], "characters": "\uD835\uDD44" }, "𝕞": { "codepoints": [120158], "characters": "\uD835\uDD5E" }, "∓": { "codepoints": [8723], "characters": "\u2213" }, "ℳ": { "codepoints": [8499], "characters": "\u2133" }, "𝓂": { "codepoints": [120002], "characters": "\uD835\uDCC2" }, "∾": { "codepoints": [8766], "characters": "\u223E" }, "Μ": { "codepoints": [924], "characters": "\u039C" }, "μ": { "codepoints": [956], "characters": "\u03BC" }, "⊸": { "codepoints": [8888], "characters": "\u22B8" }, "⊸": { "codepoints": [8888], "characters": "\u22B8" }, "∇": { "codepoints": [8711], "characters": "\u2207" }, "Ń": { "codepoints": [323], "characters": "\u0143" }, "ń": { "codepoints": [324], "characters": "\u0144" }, "∠⃒": { "codepoints": [8736, 8402], "characters": "\u2220\u20D2" }, "≉": { "codepoints": [8777], "characters": "\u2249" }, "⩰̸": { "codepoints": [10864, 824], "characters": "\u2A70\u0338" }, "≋̸": { "codepoints": [8779, 824], "characters": "\u224B\u0338" }, "ʼn": { "codepoints": [329], "characters": "\u0149" }, "≉": { "codepoints": [8777], "characters": "\u2249" }, "♮": { "codepoints": [9838], "characters": "\u266E" }, "♮": { "codepoints": [9838], "characters": "\u266E" }, "ℕ": { "codepoints": [8469], "characters": "\u2115" }, " ": { "codepoints": [160], "characters": "\u00A0" }, " ": { "codepoints": [160], "characters": "\u00A0" }, "≎̸": { "codepoints": [8782, 824], "characters": "\u224E\u0338" }, "≏̸": { "codepoints": [8783, 824], "characters": "\u224F\u0338" }, "⩃": { "codepoints": [10819], "characters": "\u2A43" }, "Ň": { "codepoints": [327], "characters": "\u0147" }, "ň": { "codepoints": [328], "characters": "\u0148" }, "Ņ": { "codepoints": [325], "characters": "\u0145" }, "ņ": { "codepoints": [326], "characters": "\u0146" }, "≇": { "codepoints": [8775], "characters": "\u2247" }, "⩭̸": { "codepoints": [10861, 824], "characters": "\u2A6D\u0338" }, "⩂": { "codepoints": [10818], "characters": "\u2A42" }, "Н": { "codepoints": [1053], "characters": "\u041D" }, "н": { "codepoints": [1085], "characters": "\u043D" }, "–": { "codepoints": [8211], "characters": "\u2013" }, "≠": { "codepoints": [8800], "characters": "\u2260" }, "⤤": { "codepoints": [10532], "characters": "\u2924" }, "⇗": { "codepoints": [8663], "characters": "\u21D7" }, "↗": { "codepoints": [8599], "characters": "\u2197" }, "↗": { "codepoints": [8599], "characters": "\u2197" }, "≐̸": { "codepoints": [8784, 824], "characters": "\u2250\u0338" }, "​": { "codepoints": [8203], "characters": "\u200B" }, "​": { "codepoints": [8203], "characters": "\u200B" }, "​": { "codepoints": [8203], "characters": "\u200B" }, "​": { "codepoints": [8203], "characters": "\u200B" }, "≢": { "codepoints": [8802], "characters": "\u2262" }, "⤨": { "codepoints": [10536], "characters": "\u2928" }, "≂̸": { "codepoints": [8770, 824], "characters": "\u2242\u0338" }, "≫": { "codepoints": [8811], "characters": "\u226B" }, "≪": { "codepoints": [8810], "characters": "\u226A" }, " ": { "codepoints": [10], "characters": "\u000A" }, "∄": { "codepoints": [8708], "characters": "\u2204" }, "∄": { "codepoints": [8708], "characters": "\u2204" }, "𝔑": { "codepoints": [120081], "characters": "\uD835\uDD11" }, "𝔫": { "codepoints": [120107], "characters": "\uD835\uDD2B" }, "≧̸": { "codepoints": [8807, 824], "characters": "\u2267\u0338" }, "≱": { "codepoints": [8817], "characters": "\u2271" }, "≱": { "codepoints": [8817], "characters": "\u2271" }, "≧̸": { "codepoints": [8807, 824], "characters": "\u2267\u0338" }, "⩾̸": { "codepoints": [10878, 824], "characters": "\u2A7E\u0338" }, "⩾̸": { "codepoints": [10878, 824], "characters": "\u2A7E\u0338" }, "⋙̸": { "codepoints": [8921, 824], "characters": "\u22D9\u0338" }, "≵": { "codepoints": [8821], "characters": "\u2275" }, "≫⃒": { "codepoints": [8811, 8402], "characters": "\u226B\u20D2" }, "≯": { "codepoints": [8815], "characters": "\u226F" }, "≯": { "codepoints": [8815], "characters": "\u226F" }, "≫̸": { "codepoints": [8811, 824], "characters": "\u226B\u0338" }, "⇎": { "codepoints": [8654], "characters": "\u21CE" }, "↮": { "codepoints": [8622], "characters": "\u21AE" }, "⫲": { "codepoints": [10994], "characters": "\u2AF2" }, "∋": { "codepoints": [8715], "characters": "\u220B" }, "⋼": { "codepoints": [8956], "characters": "\u22FC" }, "⋺": { "codepoints": [8954], "characters": "\u22FA" }, "∋": { "codepoints": [8715], "characters": "\u220B" }, "Њ": { "codepoints": [1034], "characters": "\u040A" }, "њ": { "codepoints": [1114], "characters": "\u045A" }, "⇍": { "codepoints": [8653], "characters": "\u21CD" }, "↚": { "codepoints": [8602], "characters": "\u219A" }, "‥": { "codepoints": [8229], "characters": "\u2025" }, "≦̸": { "codepoints": [8806, 824], "characters": "\u2266\u0338" }, "≰": { "codepoints": [8816], "characters": "\u2270" }, "⇍": { "codepoints": [8653], "characters": "\u21CD" }, "↚": { "codepoints": [8602], "characters": "\u219A" }, "⇎": { "codepoints": [8654], "characters": "\u21CE" }, "↮": { "codepoints": [8622], "characters": "\u21AE" }, "≰": { "codepoints": [8816], "characters": "\u2270" }, "≦̸": { "codepoints": [8806, 824], "characters": "\u2266\u0338" }, "⩽̸": { "codepoints": [10877, 824], "characters": "\u2A7D\u0338" }, "⩽̸": { "codepoints": [10877, 824], "characters": "\u2A7D\u0338" }, "≮": { "codepoints": [8814], "characters": "\u226E" }, "⋘̸": { "codepoints": [8920, 824], "characters": "\u22D8\u0338" }, "≴": { "codepoints": [8820], "characters": "\u2274" }, "≪⃒": { "codepoints": [8810, 8402], "characters": "\u226A\u20D2" }, "≮": { "codepoints": [8814], "characters": "\u226E" }, "⋪": { "codepoints": [8938], "characters": "\u22EA" }, "⋬": { "codepoints": [8940], "characters": "\u22EC" }, "≪̸": { "codepoints": [8810, 824], "characters": "\u226A\u0338" }, "∤": { "codepoints": [8740], "characters": "\u2224" }, "⁠": { "codepoints": [8288], "characters": "\u2060" }, " ": { "codepoints": [160], "characters": "\u00A0" }, "ℕ": { "codepoints": [8469], "characters": "\u2115" }, "𝕟": { "codepoints": [120159], "characters": "\uD835\uDD5F" }, "⫬": { "codepoints": [10988], "characters": "\u2AEC" }, "¬": { "codepoints": [172], "characters": "\u00AC" }, "¬": { "codepoints": [172], "characters": "\u00AC" }, "≢": { "codepoints": [8802], "characters": "\u2262" }, "≭": { "codepoints": [8813], "characters": "\u226D" }, "∦": { "codepoints": [8742], "characters": "\u2226" }, "∉": { "codepoints": [8713], "characters": "\u2209" }, "≠": { "codepoints": [8800], "characters": "\u2260" }, "≂̸": { "codepoints": [8770, 824], "characters": "\u2242\u0338" }, "∄": { "codepoints": [8708], "characters": "\u2204" }, "≯": { "codepoints": [8815], "characters": "\u226F" }, "≱": { "codepoints": [8817], "characters": "\u2271" }, "≧̸": { "codepoints": [8807, 824], "characters": "\u2267\u0338" }, "≫̸": { "codepoints": [8811, 824], "characters": "\u226B\u0338" }, "≹": { "codepoints": [8825], "characters": "\u2279" }, "⩾̸": { "codepoints": [10878, 824], "characters": "\u2A7E\u0338" }, "≵": { "codepoints": [8821], "characters": "\u2275" }, "≎̸": { "codepoints": [8782, 824], "characters": "\u224E\u0338" }, "≏̸": { "codepoints": [8783, 824], "characters": "\u224F\u0338" }, "∉": { "codepoints": [8713], "characters": "\u2209" }, "⋵̸": { "codepoints": [8949, 824], "characters": "\u22F5\u0338" }, "⋹̸": { "codepoints": [8953, 824], "characters": "\u22F9\u0338" }, "∉": { "codepoints": [8713], "characters": "\u2209" }, "⋷": { "codepoints": [8951], "characters": "\u22F7" }, "⋶": { "codepoints": [8950], "characters": "\u22F6" }, "⋪": { "codepoints": [8938], "characters": "\u22EA" }, "⧏̸": { "codepoints": [10703, 824], "characters": "\u29CF\u0338" }, "⋬": { "codepoints": [8940], "characters": "\u22EC" }, "≮": { "codepoints": [8814], "characters": "\u226E" }, "≰": { "codepoints": [8816], "characters": "\u2270" }, "≸": { "codepoints": [8824], "characters": "\u2278" }, "≪̸": { "codepoints": [8810, 824], "characters": "\u226A\u0338" }, "⩽̸": { "codepoints": [10877, 824], "characters": "\u2A7D\u0338" }, "≴": { "codepoints": [8820], "characters": "\u2274" }, "⪢̸": { "codepoints": [10914, 824], "characters": "\u2AA2\u0338" }, "⪡̸": { "codepoints": [10913, 824], "characters": "\u2AA1\u0338" }, "∌": { "codepoints": [8716], "characters": "\u220C" }, "∌": { "codepoints": [8716], "characters": "\u220C" }, "⋾": { "codepoints": [8958], "characters": "\u22FE" }, "⋽": { "codepoints": [8957], "characters": "\u22FD" }, "⊀": { "codepoints": [8832], "characters": "\u2280" }, "⪯̸": { "codepoints": [10927, 824], "characters": "\u2AAF\u0338" }, "⋠": { "codepoints": [8928], "characters": "\u22E0" }, "∌": { "codepoints": [8716], "characters": "\u220C" }, "⋫": { "codepoints": [8939], "characters": "\u22EB" }, "⧐̸": { "codepoints": [10704, 824], "characters": "\u29D0\u0338" }, "⋭": { "codepoints": [8941], "characters": "\u22ED" }, "⊏̸": { "codepoints": [8847, 824], "characters": "\u228F\u0338" }, "⋢": { "codepoints": [8930], "characters": "\u22E2" }, "⊐̸": { "codepoints": [8848, 824], "characters": "\u2290\u0338" }, "⋣": { "codepoints": [8931], "characters": "\u22E3" }, "⊂⃒": { "codepoints": [8834, 8402], "characters": "\u2282\u20D2" }, "⊈": { "codepoints": [8840], "characters": "\u2288" }, "⊁": { "codepoints": [8833], "characters": "\u2281" }, "⪰̸": { "codepoints": [10928, 824], "characters": "\u2AB0\u0338" }, "⋡": { "codepoints": [8929], "characters": "\u22E1" }, "≿̸": { "codepoints": [8831, 824], "characters": "\u227F\u0338" }, "⊃⃒": { "codepoints": [8835, 8402], "characters": "\u2283\u20D2" }, "⊉": { "codepoints": [8841], "characters": "\u2289" }, "≁": { "codepoints": [8769], "characters": "\u2241" }, "≄": { "codepoints": [8772], "characters": "\u2244" }, "≇": { "codepoints": [8775], "characters": "\u2247" }, "≉": { "codepoints": [8777], "characters": "\u2249" }, "∤": { "codepoints": [8740], "characters": "\u2224" }, "∦": { "codepoints": [8742], "characters": "\u2226" }, "∦": { "codepoints": [8742], "characters": "\u2226" }, "⫽⃥": { "codepoints": [11005, 8421], "characters": "\u2AFD\u20E5" }, "∂̸": { "codepoints": [8706, 824], "characters": "\u2202\u0338" }, "⨔": { "codepoints": [10772], "characters": "\u2A14" }, "⊀": { "codepoints": [8832], "characters": "\u2280" }, "⋠": { "codepoints": [8928], "characters": "\u22E0" }, "⪯̸": { "codepoints": [10927, 824], "characters": "\u2AAF\u0338" }, "⊀": { "codepoints": [8832], "characters": "\u2280" }, "⪯̸": { "codepoints": [10927, 824], "characters": "\u2AAF\u0338" }, "⇏": { "codepoints": [8655], "characters": "\u21CF" }, "↛": { "codepoints": [8603], "characters": "\u219B" }, "⤳̸": { "codepoints": [10547, 824], "characters": "\u2933\u0338" }, "↝̸": { "codepoints": [8605, 824], "characters": "\u219D\u0338" }, "⇏": { "codepoints": [8655], "characters": "\u21CF" }, "↛": { "codepoints": [8603], "characters": "\u219B" }, "⋫": { "codepoints": [8939], "characters": "\u22EB" }, "⋭": { "codepoints": [8941], "characters": "\u22ED" }, "⊁": { "codepoints": [8833], "characters": "\u2281" }, "⋡": { "codepoints": [8929], "characters": "\u22E1" }, "⪰̸": { "codepoints": [10928, 824], "characters": "\u2AB0\u0338" }, "𝒩": { "codepoints": [119977], "characters": "\uD835\uDCA9" }, "𝓃": { "codepoints": [120003], "characters": "\uD835\uDCC3" }, "∤": { "codepoints": [8740], "characters": "\u2224" }, "∦": { "codepoints": [8742], "characters": "\u2226" }, "≁": { "codepoints": [8769], "characters": "\u2241" }, "≄": { "codepoints": [8772], "characters": "\u2244" }, "≄": { "codepoints": [8772], "characters": "\u2244" }, "∤": { "codepoints": [8740], "characters": "\u2224" }, "∦": { "codepoints": [8742], "characters": "\u2226" }, "⋢": { "codepoints": [8930], "characters": "\u22E2" }, "⋣": { "codepoints": [8931], "characters": "\u22E3" }, "⊄": { "codepoints": [8836], "characters": "\u2284" }, "⫅̸": { "codepoints": [10949, 824], "characters": "\u2AC5\u0338" }, "⊈": { "codepoints": [8840], "characters": "\u2288" }, "⊂⃒": { "codepoints": [8834, 8402], "characters": "\u2282\u20D2" }, "⊈": { "codepoints": [8840], "characters": "\u2288" }, "⫅̸": { "codepoints": [10949, 824], "characters": "\u2AC5\u0338" }, "⊁": { "codepoints": [8833], "characters": "\u2281" }, "⪰̸": { "codepoints": [10928, 824], "characters": "\u2AB0\u0338" }, "⊅": { "codepoints": [8837], "characters": "\u2285" }, "⫆̸": { "codepoints": [10950, 824], "characters": "\u2AC6\u0338" }, "⊉": { "codepoints": [8841], "characters": "\u2289" }, "⊃⃒": { "codepoints": [8835, 8402], "characters": "\u2283\u20D2" }, "⊉": { "codepoints": [8841], "characters": "\u2289" }, "⫆̸": { "codepoints": [10950, 824], "characters": "\u2AC6\u0338" }, "≹": { "codepoints": [8825], "characters": "\u2279" }, "Ñ": { "codepoints": [209], "characters": "\u00D1" }, "Ñ": { "codepoints": [209], "characters": "\u00D1" }, "ñ": { "codepoints": [241], "characters": "\u00F1" }, "ñ": { "codepoints": [241], "characters": "\u00F1" }, "≸": { "codepoints": [8824], "characters": "\u2278" }, "⋪": { "codepoints": [8938], "characters": "\u22EA" }, "⋬": { "codepoints": [8940], "characters": "\u22EC" }, "⋫": { "codepoints": [8939], "characters": "\u22EB" }, "⋭": { "codepoints": [8941], "characters": "\u22ED" }, "Ν": { "codepoints": [925], "characters": "\u039D" }, "ν": { "codepoints": [957], "characters": "\u03BD" }, "#": { "codepoints": [35], "characters": "\u0023" }, "№": { "codepoints": [8470], "characters": "\u2116" }, " ": { "codepoints": [8199], "characters": "\u2007" }, "≍⃒": { "codepoints": [8781, 8402], "characters": "\u224D\u20D2" }, "⊯": { "codepoints": [8879], "characters": "\u22AF" }, "⊮": { "codepoints": [8878], "characters": "\u22AE" }, "⊭": { "codepoints": [8877], "characters": "\u22AD" }, "⊬": { "codepoints": [8876], "characters": "\u22AC" }, "≥⃒": { "codepoints": [8805, 8402], "characters": "\u2265\u20D2" }, ">⃒": { "codepoints": [62, 8402], "characters": "\u003E\u20D2" }, "⤄": { "codepoints": [10500], "characters": "\u2904" }, "⧞": { "codepoints": [10718], "characters": "\u29DE" }, "⤂": { "codepoints": [10498], "characters": "\u2902" }, "≤⃒": { "codepoints": [8804, 8402], "characters": "\u2264\u20D2" }, "<⃒": { "codepoints": [60, 8402], "characters": "\u003C\u20D2" }, "⊴⃒": { "codepoints": [8884, 8402], "characters": "\u22B4\u20D2" }, "⤃": { "codepoints": [10499], "characters": "\u2903" }, "⊵⃒": { "codepoints": [8885, 8402], "characters": "\u22B5\u20D2" }, "∼⃒": { "codepoints": [8764, 8402], "characters": "\u223C\u20D2" }, "⤣": { "codepoints": [10531], "characters": "\u2923" }, "⇖": { "codepoints": [8662], "characters": "\u21D6" }, "↖": { "codepoints": [8598], "characters": "\u2196" }, "↖": { "codepoints": [8598], "characters": "\u2196" }, "⤧": { "codepoints": [10535], "characters": "\u2927" }, "Ó": { "codepoints": [211], "characters": "\u00D3" }, "Ó": { "codepoints": [211], "characters": "\u00D3" }, "ó": { "codepoints": [243], "characters": "\u00F3" }, "ó": { "codepoints": [243], "characters": "\u00F3" }, "⊛": { "codepoints": [8859], "characters": "\u229B" }, "⊚": { "codepoints": [8858], "characters": "\u229A" }, "Ô": { "codepoints": [212], "characters": "\u00D4" }, "Ô": { "codepoints": [212], "characters": "\u00D4" }, "ô": { "codepoints": [244], "characters": "\u00F4" }, "ô": { "codepoints": [244], "characters": "\u00F4" }, "О": { "codepoints": [1054], "characters": "\u041E" }, "о": { "codepoints": [1086], "characters": "\u043E" }, "⊝": { "codepoints": [8861], "characters": "\u229D" }, "Ő": { "codepoints": [336], "characters": "\u0150" }, "ő": { "codepoints": [337], "characters": "\u0151" }, "⨸": { "codepoints": [10808], "characters": "\u2A38" }, "⊙": { "codepoints": [8857], "characters": "\u2299" }, "⦼": { "codepoints": [10684], "characters": "\u29BC" }, "Œ": { "codepoints": [338], "characters": "\u0152" }, "œ": { "codepoints": [339], "characters": "\u0153" }, "⦿": { "codepoints": [10687], "characters": "\u29BF" }, "𝔒": { "codepoints": [120082], "characters": "\uD835\uDD12" }, "𝔬": { "codepoints": [120108], "characters": "\uD835\uDD2C" }, "˛": { "codepoints": [731], "characters": "\u02DB" }, "Ò": { "codepoints": [210], "characters": "\u00D2" }, "Ò": { "codepoints": [210], "characters": "\u00D2" }, "ò": { "codepoints": [242], "characters": "\u00F2" }, "ò": { "codepoints": [242], "characters": "\u00F2" }, "⧁": { "codepoints": [10689], "characters": "\u29C1" }, "⦵": { "codepoints": [10677], "characters": "\u29B5" }, "Ω": { "codepoints": [937], "characters": "\u03A9" }, "∮": { "codepoints": [8750], "characters": "\u222E" }, "↺": { "codepoints": [8634], "characters": "\u21BA" }, "⦾": { "codepoints": [10686], "characters": "\u29BE" }, "⦻": { "codepoints": [10683], "characters": "\u29BB" }, "‾": { "codepoints": [8254], "characters": "\u203E" }, "⧀": { "codepoints": [10688], "characters": "\u29C0" }, "Ō": { "codepoints": [332], "characters": "\u014C" }, "ō": { "codepoints": [333], "characters": "\u014D" }, "Ω": { "codepoints": [937], "characters": "\u03A9" }, "ω": { "codepoints": [969], "characters": "\u03C9" }, "Ο": { "codepoints": [927], "characters": "\u039F" }, "ο": { "codepoints": [959], "characters": "\u03BF" }, "⦶": { "codepoints": [10678], "characters": "\u29B6" }, "⊖": { "codepoints": [8854], "characters": "\u2296" }, "𝕆": { "codepoints": [120134], "characters": "\uD835\uDD46" }, "𝕠": { "codepoints": [120160], "characters": "\uD835\uDD60" }, "⦷": { "codepoints": [10679], "characters": "\u29B7" }, "“": { "codepoints": [8220], "characters": "\u201C" }, "‘": { "codepoints": [8216], "characters": "\u2018" }, "⦹": { "codepoints": [10681], "characters": "\u29B9" }, "⊕": { "codepoints": [8853], "characters": "\u2295" }, "⩔": { "codepoints": [10836], "characters": "\u2A54" }, "∨": { "codepoints": [8744], "characters": "\u2228" }, "↻": { "codepoints": [8635], "characters": "\u21BB" }, "⩝": { "codepoints": [10845], "characters": "\u2A5D" }, "ℴ": { "codepoints": [8500], "characters": "\u2134" }, "ℴ": { "codepoints": [8500], "characters": "\u2134" }, "ª": { "codepoints": [170], "characters": "\u00AA" }, "ª": { "codepoints": [170], "characters": "\u00AA" }, "º": { "codepoints": [186], "characters": "\u00BA" }, "º": { "codepoints": [186], "characters": "\u00BA" }, "⊶": { "codepoints": [8886], "characters": "\u22B6" }, "⩖": { "codepoints": [10838], "characters": "\u2A56" }, "⩗": { "codepoints": [10839], "characters": "\u2A57" }, "⩛": { "codepoints": [10843], "characters": "\u2A5B" }, "Ⓢ": { "codepoints": [9416], "characters": "\u24C8" }, "𝒪": { "codepoints": [119978], "characters": "\uD835\uDCAA" }, "ℴ": { "codepoints": [8500], "characters": "\u2134" }, "Ø": { "codepoints": [216], "characters": "\u00D8" }, "Ø": { "codepoints": [216], "characters": "\u00D8" }, "ø": { "codepoints": [248], "characters": "\u00F8" }, "ø": { "codepoints": [248], "characters": "\u00F8" }, "⊘": { "codepoints": [8856], "characters": "\u2298" }, "Õ": { "codepoints": [213], "characters": "\u00D5" }, "Õ": { "codepoints": [213], "characters": "\u00D5" }, "õ": { "codepoints": [245], "characters": "\u00F5" }, "õ": { "codepoints": [245], "characters": "\u00F5" }, "⨷": { "codepoints": [10807], "characters": "\u2A37" }, "⊗": { "codepoints": [8855], "characters": "\u2297" }, "⨶": { "codepoints": [10806], "characters": "\u2A36" }, "Ö": { "codepoints": [214], "characters": "\u00D6" }, "Ö": { "codepoints": [214], "characters": "\u00D6" }, "ö": { "codepoints": [246], "characters": "\u00F6" }, "ö": { "codepoints": [246], "characters": "\u00F6" }, "⌽": { "codepoints": [9021], "characters": "\u233D" }, "‾": { "codepoints": [8254], "characters": "\u203E" }, "⏞": { "codepoints": [9182], "characters": "\u23DE" }, "⎴": { "codepoints": [9140], "characters": "\u23B4" }, "⏜": { "codepoints": [9180], "characters": "\u23DC" }, "∥": { "codepoints": [8741], "characters": "\u2225" }, "¶": { "codepoints": [182], "characters": "\u00B6" }, "¶": { "codepoints": [182], "characters": "\u00B6" }, "∥": { "codepoints": [8741], "characters": "\u2225" }, "⫳": { "codepoints": [10995], "characters": "\u2AF3" }, "⫽": { "codepoints": [11005], "characters": "\u2AFD" }, "∂": { "codepoints": [8706], "characters": "\u2202" }, "∂": { "codepoints": [8706], "characters": "\u2202" }, "П": { "codepoints": [1055], "characters": "\u041F" }, "п": { "codepoints": [1087], "characters": "\u043F" }, "%": { "codepoints": [37], "characters": "\u0025" }, ".": { "codepoints": [46], "characters": "\u002E" }, "‰": { "codepoints": [8240], "characters": "\u2030" }, "⊥": { "codepoints": [8869], "characters": "\u22A5" }, "‱": { "codepoints": [8241], "characters": "\u2031" }, "𝔓": { "codepoints": [120083], "characters": "\uD835\uDD13" }, "𝔭": { "codepoints": [120109], "characters": "\uD835\uDD2D" }, "Φ": { "codepoints": [934], "characters": "\u03A6" }, "φ": { "codepoints": [966], "characters": "\u03C6" }, "ϕ": { "codepoints": [981], "characters": "\u03D5" }, "ℳ": { "codepoints": [8499], "characters": "\u2133" }, "☎": { "codepoints": [9742], "characters": "\u260E" }, "Π": { "codepoints": [928], "characters": "\u03A0" }, "π": { "codepoints": [960], "characters": "\u03C0" }, "⋔": { "codepoints": [8916], "characters": "\u22D4" }, "ϖ": { "codepoints": [982], "characters": "\u03D6" }, "ℏ": { "codepoints": [8463], "characters": "\u210F" }, "ℎ": { "codepoints": [8462], "characters": "\u210E" }, "ℏ": { "codepoints": [8463], "characters": "\u210F" }, "+": { "codepoints": [43], "characters": "\u002B" }, "⨣": { "codepoints": [10787], "characters": "\u2A23" }, "⊞": { "codepoints": [8862], "characters": "\u229E" }, "⨢": { "codepoints": [10786], "characters": "\u2A22" }, "∔": { "codepoints": [8724], "characters": "\u2214" }, "⨥": { "codepoints": [10789], "characters": "\u2A25" }, "⩲": { "codepoints": [10866], "characters": "\u2A72" }, "±": { "codepoints": [177], "characters": "\u00B1" }, "±": { "codepoints": [177], "characters": "\u00B1" }, "±": { "codepoints": [177], "characters": "\u00B1" }, "⨦": { "codepoints": [10790], "characters": "\u2A26" }, "⨧": { "codepoints": [10791], "characters": "\u2A27" }, "±": { "codepoints": [177], "characters": "\u00B1" }, "ℌ": { "codepoints": [8460], "characters": "\u210C" }, "⨕": { "codepoints": [10773], "characters": "\u2A15" }, "ℙ": { "codepoints": [8473], "characters": "\u2119" }, "𝕡": { "codepoints": [120161], "characters": "\uD835\uDD61" }, "£": { "codepoints": [163], "characters": "\u00A3" }, "£": { "codepoints": [163], "characters": "\u00A3" }, "⪻": { "codepoints": [10939], "characters": "\u2ABB" }, "≺": { "codepoints": [8826], "characters": "\u227A" }, "⪷": { "codepoints": [10935], "characters": "\u2AB7" }, "≼": { "codepoints": [8828], "characters": "\u227C" }, "⪳": { "codepoints": [10931], "characters": "\u2AB3" }, "⪯": { "codepoints": [10927], "characters": "\u2AAF" }, "≺": { "codepoints": [8826], "characters": "\u227A" }, "⪷": { "codepoints": [10935], "characters": "\u2AB7" }, "≼": { "codepoints": [8828], "characters": "\u227C" }, "≺": { "codepoints": [8826], "characters": "\u227A" }, "⪯": { "codepoints": [10927], "characters": "\u2AAF" }, "≼": { "codepoints": [8828], "characters": "\u227C" }, "≾": { "codepoints": [8830], "characters": "\u227E" }, "⪯": { "codepoints": [10927], "characters": "\u2AAF" }, "⪹": { "codepoints": [10937], "characters": "\u2AB9" }, "⪵": { "codepoints": [10933], "characters": "\u2AB5" }, "⋨": { "codepoints": [8936], "characters": "\u22E8" }, "≾": { "codepoints": [8830], "characters": "\u227E" }, "″": { "codepoints": [8243], "characters": "\u2033" }, "′": { "codepoints": [8242], "characters": "\u2032" }, "ℙ": { "codepoints": [8473], "characters": "\u2119" }, "⪹": { "codepoints": [10937], "characters": "\u2AB9" }, "⪵": { "codepoints": [10933], "characters": "\u2AB5" }, "⋨": { "codepoints": [8936], "characters": "\u22E8" }, "∏": { "codepoints": [8719], "characters": "\u220F" }, "∏": { "codepoints": [8719], "characters": "\u220F" }, "⌮": { "codepoints": [9006], "characters": "\u232E" }, "⌒": { "codepoints": [8978], "characters": "\u2312" }, "⌓": { "codepoints": [8979], "characters": "\u2313" }, "∝": { "codepoints": [8733], "characters": "\u221D" }, "∷": { "codepoints": [8759], "characters": "\u2237" }, "∝": { "codepoints": [8733], "characters": "\u221D" }, "∝": { "codepoints": [8733], "characters": "\u221D" }, "≾": { "codepoints": [8830], "characters": "\u227E" }, "⊰": { "codepoints": [8880], "characters": "\u22B0" }, "𝒫": { "codepoints": [119979], "characters": "\uD835\uDCAB" }, "𝓅": { "codepoints": [120005], "characters": "\uD835\uDCC5" }, "Ψ": { "codepoints": [936], "characters": "\u03A8" }, "ψ": { "codepoints": [968], "characters": "\u03C8" }, " ": { "codepoints": [8200], "characters": "\u2008" }, "𝔔": { "codepoints": [120084], "characters": "\uD835\uDD14" }, "𝔮": { "codepoints": [120110], "characters": "\uD835\uDD2E" }, "⨌": { "codepoints": [10764], "characters": "\u2A0C" }, "ℚ": { "codepoints": [8474], "characters": "\u211A" }, "𝕢": { "codepoints": [120162], "characters": "\uD835\uDD62" }, "⁗": { "codepoints": [8279], "characters": "\u2057" }, "𝒬": { "codepoints": [119980], "characters": "\uD835\uDCAC" }, "𝓆": { "codepoints": [120006], "characters": "\uD835\uDCC6" }, "ℍ": { "codepoints": [8461], "characters": "\u210D" }, "⨖": { "codepoints": [10774], "characters": "\u2A16" }, "?": { "codepoints": [63], "characters": "\u003F" }, "≟": { "codepoints": [8799], "characters": "\u225F" }, """: { "codepoints": [34], "characters": "\u0022" }, """: { "codepoints": [34], "characters": "\u0022" }, """: { "codepoints": [34], "characters": "\u0022" }, """: { "codepoints": [34], "characters": "\u0022" }, "⇛": { "codepoints": [8667], "characters": "\u21DB" }, "∽̱": { "codepoints": [8765, 817], "characters": "\u223D\u0331" }, "Ŕ": { "codepoints": [340], "characters": "\u0154" }, "ŕ": { "codepoints": [341], "characters": "\u0155" }, "√": { "codepoints": [8730], "characters": "\u221A" }, "⦳": { "codepoints": [10675], "characters": "\u29B3" }, "⟫": { "codepoints": [10219], "characters": "\u27EB" }, "⟩": { "codepoints": [10217], "characters": "\u27E9" }, "⦒": { "codepoints": [10642], "characters": "\u2992" }, "⦥": { "codepoints": [10661], "characters": "\u29A5" }, "⟩": { "codepoints": [10217], "characters": "\u27E9" }, "»": { "codepoints": [187], "characters": "\u00BB" }, "»": { "codepoints": [187], "characters": "\u00BB" }, "↠": { "codepoints": [8608], "characters": "\u21A0" }, "⇒": { "codepoints": [8658], "characters": "\u21D2" }, "→": { "codepoints": [8594], "characters": "\u2192" }, "⥵": { "codepoints": [10613], "characters": "\u2975" }, "⇥": { "codepoints": [8677], "characters": "\u21E5" }, "⤠": { "codepoints": [10528], "characters": "\u2920" }, "⤳": { "codepoints": [10547], "characters": "\u2933" }, "⤞": { "codepoints": [10526], "characters": "\u291E" }, "↪": { "codepoints": [8618], "characters": "\u21AA" }, "↬": { "codepoints": [8620], "characters": "\u21AC" }, "⥅": { "codepoints": [10565], "characters": "\u2945" }, "⥴": { "codepoints": [10612], "characters": "\u2974" }, "⤖": { "codepoints": [10518], "characters": "\u2916" }, "↣": { "codepoints": [8611], "characters": "\u21A3" }, "↝": { "codepoints": [8605], "characters": "\u219D" }, "⤜": { "codepoints": [10524], "characters": "\u291C" }, "⤚": { "codepoints": [10522], "characters": "\u291A" }, "∶": { "codepoints": [8758], "characters": "\u2236" }, "ℚ": { "codepoints": [8474], "characters": "\u211A" }, "⤐": { "codepoints": [10512], "characters": "\u2910" }, "⤏": { "codepoints": [10511], "characters": "\u290F" }, "⤍": { "codepoints": [10509], "characters": "\u290D" }, "❳": { "codepoints": [10099], "characters": "\u2773" }, "}": { "codepoints": [125], "characters": "\u007D" }, "]": { "codepoints": [93], "characters": "\u005D" }, "⦌": { "codepoints": [10636], "characters": "\u298C" }, "⦎": { "codepoints": [10638], "characters": "\u298E" }, "⦐": { "codepoints": [10640], "characters": "\u2990" }, "Ř": { "codepoints": [344], "characters": "\u0158" }, "ř": { "codepoints": [345], "characters": "\u0159" }, "Ŗ": { "codepoints": [342], "characters": "\u0156" }, "ŗ": { "codepoints": [343], "characters": "\u0157" }, "⌉": { "codepoints": [8969], "characters": "\u2309" }, "}": { "codepoints": [125], "characters": "\u007D" }, "Р": { "codepoints": [1056], "characters": "\u0420" }, "р": { "codepoints": [1088], "characters": "\u0440" }, "⤷": { "codepoints": [10551], "characters": "\u2937" }, "⥩": { "codepoints": [10601], "characters": "\u2969" }, "”": { "codepoints": [8221], "characters": "\u201D" }, "”": { "codepoints": [8221], "characters": "\u201D" }, "↳": { "codepoints": [8627], "characters": "\u21B3" }, "ℜ": { "codepoints": [8476], "characters": "\u211C" }, "ℜ": { "codepoints": [8476], "characters": "\u211C" }, "ℛ": { "codepoints": [8475], "characters": "\u211B" }, "ℜ": { "codepoints": [8476], "characters": "\u211C" }, "ℝ": { "codepoints": [8477], "characters": "\u211D" }, "▭": { "codepoints": [9645], "characters": "\u25AD" }, "®": { "codepoints": [174], "characters": "\u00AE" }, "®": { "codepoints": [174], "characters": "\u00AE" }, "®": { "codepoints": [174], "characters": "\u00AE" }, "®": { "codepoints": [174], "characters": "\u00AE" }, "∋": { "codepoints": [8715], "characters": "\u220B" }, "⇋": { "codepoints": [8651], "characters": "\u21CB" }, "⥯": { "codepoints": [10607], "characters": "\u296F" }, "⥽": { "codepoints": [10621], "characters": "\u297D" }, "⌋": { "codepoints": [8971], "characters": "\u230B" }, "ℜ": { "codepoints": [8476], "characters": "\u211C" }, "𝔯": { "codepoints": [120111], "characters": "\uD835\uDD2F" }, "⥤": { "codepoints": [10596], "characters": "\u2964" }, "⇁": { "codepoints": [8641], "characters": "\u21C1" }, "⇀": { "codepoints": [8640], "characters": "\u21C0" }, "⥬": { "codepoints": [10604], "characters": "\u296C" }, "Ρ": { "codepoints": [929], "characters": "\u03A1" }, "ρ": { "codepoints": [961], "characters": "\u03C1" }, "ϱ": { "codepoints": [1009], "characters": "\u03F1" }, "⟩": { "codepoints": [10217], "characters": "\u27E9" }, "→": { "codepoints": [8594], "characters": "\u2192" }, "⇒": { "codepoints": [8658], "characters": "\u21D2" }, "→": { "codepoints": [8594], "characters": "\u2192" }, "⇥": { "codepoints": [8677], "characters": "\u21E5" }, "⇄": { "codepoints": [8644], "characters": "\u21C4" }, "↣": { "codepoints": [8611], "characters": "\u21A3" }, "⌉": { "codepoints": [8969], "characters": "\u2309" }, "⟧": { "codepoints": [10215], "characters": "\u27E7" }, "⥝": { "codepoints": [10589], "characters": "\u295D" }, "⇂": { "codepoints": [8642], "characters": "\u21C2" }, "⥕": { "codepoints": [10581], "characters": "\u2955" }, "⌋": { "codepoints": [8971], "characters": "\u230B" }, "⇁": { "codepoints": [8641], "characters": "\u21C1" }, "⇀": { "codepoints": [8640], "characters": "\u21C0" }, "⇄": { "codepoints": [8644], "characters": "\u21C4" }, "⇌": { "codepoints": [8652], "characters": "\u21CC" }, "⇉": { "codepoints": [8649], "characters": "\u21C9" }, "↝": { "codepoints": [8605], "characters": "\u219D" }, "⊢": { "codepoints": [8866], "characters": "\u22A2" }, "↦": { "codepoints": [8614], "characters": "\u21A6" }, "⥛": { "codepoints": [10587], "characters": "\u295B" }, "⋌": { "codepoints": [8908], "characters": "\u22CC" }, "⊳": { "codepoints": [8883], "characters": "\u22B3" }, "⧐": { "codepoints": [10704], "characters": "\u29D0" }, "⊵": { "codepoints": [8885], "characters": "\u22B5" }, "⥏": { "codepoints": [10575], "characters": "\u294F" }, "⥜": { "codepoints": [10588], "characters": "\u295C" }, "↾": { "codepoints": [8638], "characters": "\u21BE" }, "⥔": { "codepoints": [10580], "characters": "\u2954" }, "⇀": { "codepoints": [8640], "characters": "\u21C0" }, "⥓": { "codepoints": [10579], "characters": "\u2953" }, "˚": { "codepoints": [730], "characters": "\u02DA" }, "≓": { "codepoints": [8787], "characters": "\u2253" }, "⇄": { "codepoints": [8644], "characters": "\u21C4" }, "⇌": { "codepoints": [8652], "characters": "\u21CC" }, "‏": { "codepoints": [8207], "characters": "\u200F" }, "⎱": { "codepoints": [9137], "characters": "\u23B1" }, "⎱": { "codepoints": [9137], "characters": "\u23B1" }, "⫮": { "codepoints": [10990], "characters": "\u2AEE" }, "⟭": { "codepoints": [10221], "characters": "\u27ED" }, "⇾": { "codepoints": [8702], "characters": "\u21FE" }, "⟧": { "codepoints": [10215], "characters": "\u27E7" }, "⦆": { "codepoints": [10630], "characters": "\u2986" }, "ℝ": { "codepoints": [8477], "characters": "\u211D" }, "𝕣": { "codepoints": [120163], "characters": "\uD835\uDD63" }, "⨮": { "codepoints": [10798], "characters": "\u2A2E" }, "⨵": { "codepoints": [10805], "characters": "\u2A35" }, "⥰": { "codepoints": [10608], "characters": "\u2970" }, ")": { "codepoints": [41], "characters": "\u0029" }, "⦔": { "codepoints": [10644], "characters": "\u2994" }, "⨒": { "codepoints": [10770], "characters": "\u2A12" }, "⇉": { "codepoints": [8649], "characters": "\u21C9" }, "⇛": { "codepoints": [8667], "characters": "\u21DB" }, "›": { "codepoints": [8250], "characters": "\u203A" }, "ℛ": { "codepoints": [8475], "characters": "\u211B" }, "𝓇": { "codepoints": [120007], "characters": "\uD835\uDCC7" }, "↱": { "codepoints": [8625], "characters": "\u21B1" }, "↱": { "codepoints": [8625], "characters": "\u21B1" }, "]": { "codepoints": [93], "characters": "\u005D" }, "’": { "codepoints": [8217], "characters": "\u2019" }, "’": { "codepoints": [8217], "characters": "\u2019" }, "⋌": { "codepoints": [8908], "characters": "\u22CC" }, "⋊": { "codepoints": [8906], "characters": "\u22CA" }, "▹": { "codepoints": [9657], "characters": "\u25B9" }, "⊵": { "codepoints": [8885], "characters": "\u22B5" }, "▸": { "codepoints": [9656], "characters": "\u25B8" }, "⧎": { "codepoints": [10702], "characters": "\u29CE" }, "⧴": { "codepoints": [10740], "characters": "\u29F4" }, "⥨": { "codepoints": [10600], "characters": "\u2968" }, "℞": { "codepoints": [8478], "characters": "\u211E" }, "Ś": { "codepoints": [346], "characters": "\u015A" }, "ś": { "codepoints": [347], "characters": "\u015B" }, "‚": { "codepoints": [8218], "characters": "\u201A" }, "⪼": { "codepoints": [10940], "characters": "\u2ABC" }, "≻": { "codepoints": [8827], "characters": "\u227B" }, "⪸": { "codepoints": [10936], "characters": "\u2AB8" }, "Š": { "codepoints": [352], "characters": "\u0160" }, "š": { "codepoints": [353], "characters": "\u0161" }, "≽": { "codepoints": [8829], "characters": "\u227D" }, "⪴": { "codepoints": [10932], "characters": "\u2AB4" }, "⪰": { "codepoints": [10928], "characters": "\u2AB0" }, "Ş": { "codepoints": [350], "characters": "\u015E" }, "ş": { "codepoints": [351], "characters": "\u015F" }, "Ŝ": { "codepoints": [348], "characters": "\u015C" }, "ŝ": { "codepoints": [349], "characters": "\u015D" }, "⪺": { "codepoints": [10938], "characters": "\u2ABA" }, "⪶": { "codepoints": [10934], "characters": "\u2AB6" }, "⋩": { "codepoints": [8937], "characters": "\u22E9" }, "⨓": { "codepoints": [10771], "characters": "\u2A13" }, "≿": { "codepoints": [8831], "characters": "\u227F" }, "С": { "codepoints": [1057], "characters": "\u0421" }, "с": { "codepoints": [1089], "characters": "\u0441" }, "⋅": { "codepoints": [8901], "characters": "\u22C5" }, "⊡": { "codepoints": [8865], "characters": "\u22A1" }, "⩦": { "codepoints": [10854], "characters": "\u2A66" }, "⤥": { "codepoints": [10533], "characters": "\u2925" }, "⇘": { "codepoints": [8664], "characters": "\u21D8" }, "↘": { "codepoints": [8600], "characters": "\u2198" }, "↘": { "codepoints": [8600], "characters": "\u2198" }, "§": { "codepoints": [167], "characters": "\u00A7" }, "§": { "codepoints": [167], "characters": "\u00A7" }, ";": { "codepoints": [59], "characters": "\u003B" }, "⤩": { "codepoints": [10537], "characters": "\u2929" }, "∖": { "codepoints": [8726], "characters": "\u2216" }, "∖": { "codepoints": [8726], "characters": "\u2216" }, "✶": { "codepoints": [10038], "characters": "\u2736" }, "𝔖": { "codepoints": [120086], "characters": "\uD835\uDD16" }, "𝔰": { "codepoints": [120112], "characters": "\uD835\uDD30" }, "⌢": { "codepoints": [8994], "characters": "\u2322" }, "♯": { "codepoints": [9839], "characters": "\u266F" }, "Щ": { "codepoints": [1065], "characters": "\u0429" }, "щ": { "codepoints": [1097], "characters": "\u0449" }, "Ш": { "codepoints": [1064], "characters": "\u0428" }, "ш": { "codepoints": [1096], "characters": "\u0448" }, "↓": { "codepoints": [8595], "characters": "\u2193" }, "←": { "codepoints": [8592], "characters": "\u2190" }, "∣": { "codepoints": [8739], "characters": "\u2223" }, "∥": { "codepoints": [8741], "characters": "\u2225" }, "→": { "codepoints": [8594], "characters": "\u2192" }, "↑": { "codepoints": [8593], "characters": "\u2191" }, "­": { "codepoints": [173], "characters": "\u00AD" }, "­": { "codepoints": [173], "characters": "\u00AD" }, "Σ": { "codepoints": [931], "characters": "\u03A3" }, "σ": { "codepoints": [963], "characters": "\u03C3" }, "ς": { "codepoints": [962], "characters": "\u03C2" }, "ς": { "codepoints": [962], "characters": "\u03C2" }, "∼": { "codepoints": [8764], "characters": "\u223C" }, "⩪": { "codepoints": [10858], "characters": "\u2A6A" }, "≃": { "codepoints": [8771], "characters": "\u2243" }, "≃": { "codepoints": [8771], "characters": "\u2243" }, "⪞": { "codepoints": [10910], "characters": "\u2A9E" }, "⪠": { "codepoints": [10912], "characters": "\u2AA0" }, "⪝": { "codepoints": [10909], "characters": "\u2A9D" }, "⪟": { "codepoints": [10911], "characters": "\u2A9F" }, "≆": { "codepoints": [8774], "characters": "\u2246" }, "⨤": { "codepoints": [10788], "characters": "\u2A24" }, "⥲": { "codepoints": [10610], "characters": "\u2972" }, "←": { "codepoints": [8592], "characters": "\u2190" }, "∘": { "codepoints": [8728], "characters": "\u2218" }, "∖": { "codepoints": [8726], "characters": "\u2216" }, "⨳": { "codepoints": [10803], "characters": "\u2A33" }, "⧤": { "codepoints": [10724], "characters": "\u29E4" }, "∣": { "codepoints": [8739], "characters": "\u2223" }, "⌣": { "codepoints": [8995], "characters": "\u2323" }, "⪪": { "codepoints": [10922], "characters": "\u2AAA" }, "⪬": { "codepoints": [10924], "characters": "\u2AAC" }, "⪬︀": { "codepoints": [10924, 65024], "characters": "\u2AAC\uFE00" }, "Ь": { "codepoints": [1068], "characters": "\u042C" }, "ь": { "codepoints": [1100], "characters": "\u044C" }, "/": { "codepoints": [47], "characters": "\u002F" }, "⧄": { "codepoints": [10692], "characters": "\u29C4" }, "⌿": { "codepoints": [9023], "characters": "\u233F" }, "𝕊": { "codepoints": [120138], "characters": "\uD835\uDD4A" }, "𝕤": { "codepoints": [120164], "characters": "\uD835\uDD64" }, "♠": { "codepoints": [9824], "characters": "\u2660" }, "♠": { "codepoints": [9824], "characters": "\u2660" }, "∥": { "codepoints": [8741], "characters": "\u2225" }, "⊓": { "codepoints": [8851], "characters": "\u2293" }, "⊓︀": { "codepoints": [8851, 65024], "characters": "\u2293\uFE00" }, "⊔": { "codepoints": [8852], "characters": "\u2294" }, "⊔︀": { "codepoints": [8852, 65024], "characters": "\u2294\uFE00" }, "√": { "codepoints": [8730], "characters": "\u221A" }, "⊏": { "codepoints": [8847], "characters": "\u228F" }, "⊑": { "codepoints": [8849], "characters": "\u2291" }, "⊏": { "codepoints": [8847], "characters": "\u228F" }, "⊑": { "codepoints": [8849], "characters": "\u2291" }, "⊐": { "codepoints": [8848], "characters": "\u2290" }, "⊒": { "codepoints": [8850], "characters": "\u2292" }, "⊐": { "codepoints": [8848], "characters": "\u2290" }, "⊒": { "codepoints": [8850], "characters": "\u2292" }, "□": { "codepoints": [9633], "characters": "\u25A1" }, "□": { "codepoints": [9633], "characters": "\u25A1" }, "□": { "codepoints": [9633], "characters": "\u25A1" }, "⊓": { "codepoints": [8851], "characters": "\u2293" }, "⊏": { "codepoints": [8847], "characters": "\u228F" }, "⊑": { "codepoints": [8849], "characters": "\u2291" }, "⊐": { "codepoints": [8848], "characters": "\u2290" }, "⊒": { "codepoints": [8850], "characters": "\u2292" }, "⊔": { "codepoints": [8852], "characters": "\u2294" }, "▪": { "codepoints": [9642], "characters": "\u25AA" }, "▪": { "codepoints": [9642], "characters": "\u25AA" }, "→": { "codepoints": [8594], "characters": "\u2192" }, "𝒮": { "codepoints": [119982], "characters": "\uD835\uDCAE" }, "𝓈": { "codepoints": [120008], "characters": "\uD835\uDCC8" }, "∖": { "codepoints": [8726], "characters": "\u2216" }, "⌣": { "codepoints": [8995], "characters": "\u2323" }, "⋆": { "codepoints": [8902], "characters": "\u22C6" }, "⋆": { "codepoints": [8902], "characters": "\u22C6" }, "☆": { "codepoints": [9734], "characters": "\u2606" }, "★": { "codepoints": [9733], "characters": "\u2605" }, "ϵ": { "codepoints": [1013], "characters": "\u03F5" }, "ϕ": { "codepoints": [981], "characters": "\u03D5" }, "¯": { "codepoints": [175], "characters": "\u00AF" }, "⋐": { "codepoints": [8912], "characters": "\u22D0" }, "⊂": { "codepoints": [8834], "characters": "\u2282" }, "⪽": { "codepoints": [10941], "characters": "\u2ABD" }, "⫅": { "codepoints": [10949], "characters": "\u2AC5" }, "⊆": { "codepoints": [8838], "characters": "\u2286" }, "⫃": { "codepoints": [10947], "characters": "\u2AC3" }, "⫁": { "codepoints": [10945], "characters": "\u2AC1" }, "⫋": { "codepoints": [10955], "characters": "\u2ACB" }, "⊊": { "codepoints": [8842], "characters": "\u228A" }, "⪿": { "codepoints": [10943], "characters": "\u2ABF" }, "⥹": { "codepoints": [10617], "characters": "\u2979" }, "⋐": { "codepoints": [8912], "characters": "\u22D0" }, "⊂": { "codepoints": [8834], "characters": "\u2282" }, "⊆": { "codepoints": [8838], "characters": "\u2286" }, "⫅": { "codepoints": [10949], "characters": "\u2AC5" }, "⊆": { "codepoints": [8838], "characters": "\u2286" }, "⊊": { "codepoints": [8842], "characters": "\u228A" }, "⫋": { "codepoints": [10955], "characters": "\u2ACB" }, "⫇": { "codepoints": [10951], "characters": "\u2AC7" }, "⫕": { "codepoints": [10965], "characters": "\u2AD5" }, "⫓": { "codepoints": [10963], "characters": "\u2AD3" }, "≻": { "codepoints": [8827], "characters": "\u227B" }, "⪸": { "codepoints": [10936], "characters": "\u2AB8" }, "≽": { "codepoints": [8829], "characters": "\u227D" }, "≻": { "codepoints": [8827], "characters": "\u227B" }, "⪰": { "codepoints": [10928], "characters": "\u2AB0" }, "≽": { "codepoints": [8829], "characters": "\u227D" }, "≿": { "codepoints": [8831], "characters": "\u227F" }, "⪰": { "codepoints": [10928], "characters": "\u2AB0" }, "⪺": { "codepoints": [10938], "characters": "\u2ABA" }, "⪶": { "codepoints": [10934], "characters": "\u2AB6" }, "⋩": { "codepoints": [8937], "characters": "\u22E9" }, "≿": { "codepoints": [8831], "characters": "\u227F" }, "∋": { "codepoints": [8715], "characters": "\u220B" }, "∑": { "codepoints": [8721], "characters": "\u2211" }, "∑": { "codepoints": [8721], "characters": "\u2211" }, "♪": { "codepoints": [9834], "characters": "\u266A" }, "⋑": { "codepoints": [8913], "characters": "\u22D1" }, "⊃": { "codepoints": [8835], "characters": "\u2283" }, "¹": { "codepoints": [185], "characters": "\u00B9" }, "¹": { "codepoints": [185], "characters": "\u00B9" }, "²": { "codepoints": [178], "characters": "\u00B2" }, "²": { "codepoints": [178], "characters": "\u00B2" }, "³": { "codepoints": [179], "characters": "\u00B3" }, "³": { "codepoints": [179], "characters": "\u00B3" }, "⪾": { "codepoints": [10942], "characters": "\u2ABE" }, "⫘": { "codepoints": [10968], "characters": "\u2AD8" }, "⫆": { "codepoints": [10950], "characters": "\u2AC6" }, "⊇": { "codepoints": [8839], "characters": "\u2287" }, "⫄": { "codepoints": [10948], "characters": "\u2AC4" }, "⊃": { "codepoints": [8835], "characters": "\u2283" }, "⊇": { "codepoints": [8839], "characters": "\u2287" }, "⟉": { "codepoints": [10185], "characters": "\u27C9" }, "⫗": { "codepoints": [10967], "characters": "\u2AD7" }, "⥻": { "codepoints": [10619], "characters": "\u297B" }, "⫂": { "codepoints": [10946], "characters": "\u2AC2" }, "⫌": { "codepoints": [10956], "characters": "\u2ACC" }, "⊋": { "codepoints": [8843], "characters": "\u228B" }, "⫀": { "codepoints": [10944], "characters": "\u2AC0" }, "⋑": { "codepoints": [8913], "characters": "\u22D1" }, "⊃": { "codepoints": [8835], "characters": "\u2283" }, "⊇": { "codepoints": [8839], "characters": "\u2287" }, "⫆": { "codepoints": [10950], "characters": "\u2AC6" }, "⊋": { "codepoints": [8843], "characters": "\u228B" }, "⫌": { "codepoints": [10956], "characters": "\u2ACC" }, "⫈": { "codepoints": [10952], "characters": "\u2AC8" }, "⫔": { "codepoints": [10964], "characters": "\u2AD4" }, "⫖": { "codepoints": [10966], "characters": "\u2AD6" }, "⤦": { "codepoints": [10534], "characters": "\u2926" }, "⇙": { "codepoints": [8665], "characters": "\u21D9" }, "↙": { "codepoints": [8601], "characters": "\u2199" }, "↙": { "codepoints": [8601], "characters": "\u2199" }, "⤪": { "codepoints": [10538], "characters": "\u292A" }, "ß": { "codepoints": [223], "characters": "\u00DF" }, "ß": { "codepoints": [223], "characters": "\u00DF" }, " ": { "codepoints": [9], "characters": "\u0009" }, "⌖": { "codepoints": [8982], "characters": "\u2316" }, "Τ": { "codepoints": [932], "characters": "\u03A4" }, "τ": { "codepoints": [964], "characters": "\u03C4" }, "⎴": { "codepoints": [9140], "characters": "\u23B4" }, "Ť": { "codepoints": [356], "characters": "\u0164" }, "ť": { "codepoints": [357], "characters": "\u0165" }, "Ţ": { "codepoints": [354], "characters": "\u0162" }, "ţ": { "codepoints": [355], "characters": "\u0163" }, "Т": { "codepoints": [1058], "characters": "\u0422" }, "т": { "codepoints": [1090], "characters": "\u0442" }, "⃛": { "codepoints": [8411], "characters": "\u20DB" }, "⌕": { "codepoints": [8981], "characters": "\u2315" }, "𝔗": { "codepoints": [120087], "characters": "\uD835\uDD17" }, "𝔱": { "codepoints": [120113], "characters": "\uD835\uDD31" }, "∴": { "codepoints": [8756], "characters": "\u2234" }, "∴": { "codepoints": [8756], "characters": "\u2234" }, "∴": { "codepoints": [8756], "characters": "\u2234" }, "Θ": { "codepoints": [920], "characters": "\u0398" }, "θ": { "codepoints": [952], "characters": "\u03B8" }, "ϑ": { "codepoints": [977], "characters": "\u03D1" }, "ϑ": { "codepoints": [977], "characters": "\u03D1" }, "≈": { "codepoints": [8776], "characters": "\u2248" }, "∼": { "codepoints": [8764], "characters": "\u223C" }, "  ": { "codepoints": [8287, 8202], "characters": "\u205F\u200A" }, " ": { "codepoints": [8201], "characters": "\u2009" }, " ": { "codepoints": [8201], "characters": "\u2009" }, "≈": { "codepoints": [8776], "characters": "\u2248" }, "∼": { "codepoints": [8764], "characters": "\u223C" }, "Þ": { "codepoints": [222], "characters": "\u00DE" }, "Þ": { "codepoints": [222], "characters": "\u00DE" }, "þ": { "codepoints": [254], "characters": "\u00FE" }, "þ": { "codepoints": [254], "characters": "\u00FE" }, "∼": { "codepoints": [8764], "characters": "\u223C" }, "˜": { "codepoints": [732], "characters": "\u02DC" }, "≃": { "codepoints": [8771], "characters": "\u2243" }, "≅": { "codepoints": [8773], "characters": "\u2245" }, "≈": { "codepoints": [8776], "characters": "\u2248" }, "×": { "codepoints": [215], "characters": "\u00D7" }, "×": { "codepoints": [215], "characters": "\u00D7" }, "⊠": { "codepoints": [8864], "characters": "\u22A0" }, "⨱": { "codepoints": [10801], "characters": "\u2A31" }, "⨰": { "codepoints": [10800], "characters": "\u2A30" }, "∭": { "codepoints": [8749], "characters": "\u222D" }, "⤨": { "codepoints": [10536], "characters": "\u2928" }, "⊤": { "codepoints": [8868], "characters": "\u22A4" }, "⌶": { "codepoints": [9014], "characters": "\u2336" }, "⫱": { "codepoints": [10993], "characters": "\u2AF1" }, "𝕋": { "codepoints": [120139], "characters": "\uD835\uDD4B" }, "𝕥": { "codepoints": [120165], "characters": "\uD835\uDD65" }, "⫚": { "codepoints": [10970], "characters": "\u2ADA" }, "⤩": { "codepoints": [10537], "characters": "\u2929" }, "‴": { "codepoints": [8244], "characters": "\u2034" }, "™": { "codepoints": [8482], "characters": "\u2122" }, "™": { "codepoints": [8482], "characters": "\u2122" }, "▵": { "codepoints": [9653], "characters": "\u25B5" }, "▿": { "codepoints": [9663], "characters": "\u25BF" }, "◃": { "codepoints": [9667], "characters": "\u25C3" }, "⊴": { "codepoints": [8884], "characters": "\u22B4" }, "≜": { "codepoints": [8796], "characters": "\u225C" }, "▹": { "codepoints": [9657], "characters": "\u25B9" }, "⊵": { "codepoints": [8885], "characters": "\u22B5" }, "◬": { "codepoints": [9708], "characters": "\u25EC" }, "≜": { "codepoints": [8796], "characters": "\u225C" }, "⨺": { "codepoints": [10810], "characters": "\u2A3A" }, "⃛": { "codepoints": [8411], "characters": "\u20DB" }, "⨹": { "codepoints": [10809], "characters": "\u2A39" }, "⧍": { "codepoints": [10701], "characters": "\u29CD" }, "⨻": { "codepoints": [10811], "characters": "\u2A3B" }, "⏢": { "codepoints": [9186], "characters": "\u23E2" }, "𝒯": { "codepoints": [119983], "characters": "\uD835\uDCAF" }, "𝓉": { "codepoints": [120009], "characters": "\uD835\uDCC9" }, "Ц": { "codepoints": [1062], "characters": "\u0426" }, "ц": { "codepoints": [1094], "characters": "\u0446" }, "Ћ": { "codepoints": [1035], "characters": "\u040B" }, "ћ": { "codepoints": [1115], "characters": "\u045B" }, "Ŧ": { "codepoints": [358], "characters": "\u0166" }, "ŧ": { "codepoints": [359], "characters": "\u0167" }, "≬": { "codepoints": [8812], "characters": "\u226C" }, "↞": { "codepoints": [8606], "characters": "\u219E" }, "↠": { "codepoints": [8608], "characters": "\u21A0" }, "Ú": { "codepoints": [218], "characters": "\u00DA" }, "Ú": { "codepoints": [218], "characters": "\u00DA" }, "ú": { "codepoints": [250], "characters": "\u00FA" }, "ú": { "codepoints": [250], "characters": "\u00FA" }, "↟": { "codepoints": [8607], "characters": "\u219F" }, "⇑": { "codepoints": [8657], "characters": "\u21D1" }, "↑": { "codepoints": [8593], "characters": "\u2191" }, "⥉": { "codepoints": [10569], "characters": "\u2949" }, "Ў": { "codepoints": [1038], "characters": "\u040E" }, "ў": { "codepoints": [1118], "characters": "\u045E" }, "Ŭ": { "codepoints": [364], "characters": "\u016C" }, "ŭ": { "codepoints": [365], "characters": "\u016D" }, "Û": { "codepoints": [219], "characters": "\u00DB" }, "Û": { "codepoints": [219], "characters": "\u00DB" }, "û": { "codepoints": [251], "characters": "\u00FB" }, "û": { "codepoints": [251], "characters": "\u00FB" }, "У": { "codepoints": [1059], "characters": "\u0423" }, "у": { "codepoints": [1091], "characters": "\u0443" }, "⇅": { "codepoints": [8645], "characters": "\u21C5" }, "Ű": { "codepoints": [368], "characters": "\u0170" }, "ű": { "codepoints": [369], "characters": "\u0171" }, "⥮": { "codepoints": [10606], "characters": "\u296E" }, "⥾": { "codepoints": [10622], "characters": "\u297E" }, "𝔘": { "codepoints": [120088], "characters": "\uD835\uDD18" }, "𝔲": { "codepoints": [120114], "characters": "\uD835\uDD32" }, "Ù": { "codepoints": [217], "characters": "\u00D9" }, "Ù": { "codepoints": [217], "characters": "\u00D9" }, "ù": { "codepoints": [249], "characters": "\u00F9" }, "ù": { "codepoints": [249], "characters": "\u00F9" }, "⥣": { "codepoints": [10595], "characters": "\u2963" }, "↿": { "codepoints": [8639], "characters": "\u21BF" }, "↾": { "codepoints": [8638], "characters": "\u21BE" }, "▀": { "codepoints": [9600], "characters": "\u2580" }, "⌜": { "codepoints": [8988], "characters": "\u231C" }, "⌜": { "codepoints": [8988], "characters": "\u231C" }, "⌏": { "codepoints": [8975], "characters": "\u230F" }, "◸": { "codepoints": [9720], "characters": "\u25F8" }, "Ū": { "codepoints": [362], "characters": "\u016A" }, "ū": { "codepoints": [363], "characters": "\u016B" }, "¨": { "codepoints": [168], "characters": "\u00A8" }, "¨": { "codepoints": [168], "characters": "\u00A8" }, "_": { "codepoints": [95], "characters": "\u005F" }, "⏟": { "codepoints": [9183], "characters": "\u23DF" }, "⎵": { "codepoints": [9141], "characters": "\u23B5" }, "⏝": { "codepoints": [9181], "characters": "\u23DD" }, "⋃": { "codepoints": [8899], "characters": "\u22C3" }, "⊎": { "codepoints": [8846], "characters": "\u228E" }, "Ų": { "codepoints": [370], "characters": "\u0172" }, "ų": { "codepoints": [371], "characters": "\u0173" }, "𝕌": { "codepoints": [120140], "characters": "\uD835\uDD4C" }, "𝕦": { "codepoints": [120166], "characters": "\uD835\uDD66" }, "↑": { "codepoints": [8593], "characters": "\u2191" }, "⇑": { "codepoints": [8657], "characters": "\u21D1" }, "↑": { "codepoints": [8593], "characters": "\u2191" }, "⤒": { "codepoints": [10514], "characters": "\u2912" }, "⇅": { "codepoints": [8645], "characters": "\u21C5" }, "↕": { "codepoints": [8597], "characters": "\u2195" }, "⇕": { "codepoints": [8661], "characters": "\u21D5" }, "↕": { "codepoints": [8597], "characters": "\u2195" }, "⥮": { "codepoints": [10606], "characters": "\u296E" }, "↿": { "codepoints": [8639], "characters": "\u21BF" }, "↾": { "codepoints": [8638], "characters": "\u21BE" }, "⊎": { "codepoints": [8846], "characters": "\u228E" }, "↖": { "codepoints": [8598], "characters": "\u2196" }, "↗": { "codepoints": [8599], "characters": "\u2197" }, "ϒ": { "codepoints": [978], "characters": "\u03D2" }, "υ": { "codepoints": [965], "characters": "\u03C5" }, "ϒ": { "codepoints": [978], "characters": "\u03D2" }, "Υ": { "codepoints": [933], "characters": "\u03A5" }, "υ": { "codepoints": [965], "characters": "\u03C5" }, "⊥": { "codepoints": [8869], "characters": "\u22A5" }, "↥": { "codepoints": [8613], "characters": "\u21A5" }, "⇈": { "codepoints": [8648], "characters": "\u21C8" }, "⌝": { "codepoints": [8989], "characters": "\u231D" }, "⌝": { "codepoints": [8989], "characters": "\u231D" }, "⌎": { "codepoints": [8974], "characters": "\u230E" }, "Ů": { "codepoints": [366], "characters": "\u016E" }, "ů": { "codepoints": [367], "characters": "\u016F" }, "◹": { "codepoints": [9721], "characters": "\u25F9" }, "𝒰": { "codepoints": [119984], "characters": "\uD835\uDCB0" }, "𝓊": { "codepoints": [120010], "characters": "\uD835\uDCCA" }, "⋰": { "codepoints": [8944], "characters": "\u22F0" }, "Ũ": { "codepoints": [360], "characters": "\u0168" }, "ũ": { "codepoints": [361], "characters": "\u0169" }, "▵": { "codepoints": [9653], "characters": "\u25B5" }, "▴": { "codepoints": [9652], "characters": "\u25B4" }, "⇈": { "codepoints": [8648], "characters": "\u21C8" }, "Ü": { "codepoints": [220], "characters": "\u00DC" }, "Ü": { "codepoints": [220], "characters": "\u00DC" }, "ü": { "codepoints": [252], "characters": "\u00FC" }, "ü": { "codepoints": [252], "characters": "\u00FC" }, "⦧": { "codepoints": [10663], "characters": "\u29A7" }, "⦜": { "codepoints": [10652], "characters": "\u299C" }, "ϵ": { "codepoints": [1013], "characters": "\u03F5" }, "ϰ": { "codepoints": [1008], "characters": "\u03F0" }, "∅": { "codepoints": [8709], "characters": "\u2205" }, "ϕ": { "codepoints": [981], "characters": "\u03D5" }, "ϖ": { "codepoints": [982], "characters": "\u03D6" }, "∝": { "codepoints": [8733], "characters": "\u221D" }, "⇕": { "codepoints": [8661], "characters": "\u21D5" }, "↕": { "codepoints": [8597], "characters": "\u2195" }, "ϱ": { "codepoints": [1009], "characters": "\u03F1" }, "ς": { "codepoints": [962], "characters": "\u03C2" }, "⊊︀": { "codepoints": [8842, 65024], "characters": "\u228A\uFE00" }, "⫋︀": { "codepoints": [10955, 65024], "characters": "\u2ACB\uFE00" }, "⊋︀": { "codepoints": [8843, 65024], "characters": "\u228B\uFE00" }, "⫌︀": { "codepoints": [10956, 65024], "characters": "\u2ACC\uFE00" }, "ϑ": { "codepoints": [977], "characters": "\u03D1" }, "⊲": { "codepoints": [8882], "characters": "\u22B2" }, "⊳": { "codepoints": [8883], "characters": "\u22B3" }, "⫫": { "codepoints": [10987], "characters": "\u2AEB" }, "⫨": { "codepoints": [10984], "characters": "\u2AE8" }, "⫩": { "codepoints": [10985], "characters": "\u2AE9" }, "В": { "codepoints": [1042], "characters": "\u0412" }, "в": { "codepoints": [1074], "characters": "\u0432" }, "⊫": { "codepoints": [8875], "characters": "\u22AB" }, "⊩": { "codepoints": [8873], "characters": "\u22A9" }, "⊨": { "codepoints": [8872], "characters": "\u22A8" }, "⊢": { "codepoints": [8866], "characters": "\u22A2" }, "⫦": { "codepoints": [10982], "characters": "\u2AE6" }, "⋁": { "codepoints": [8897], "characters": "\u22C1" }, "∨": { "codepoints": [8744], "characters": "\u2228" }, "⊻": { "codepoints": [8891], "characters": "\u22BB" }, "≚": { "codepoints": [8794], "characters": "\u225A" }, "⋮": { "codepoints": [8942], "characters": "\u22EE" }, "‖": { "codepoints": [8214], "characters": "\u2016" }, "|": { "codepoints": [124], "characters": "\u007C" }, "‖": { "codepoints": [8214], "characters": "\u2016" }, "|": { "codepoints": [124], "characters": "\u007C" }, "∣": { "codepoints": [8739], "characters": "\u2223" }, "|": { "codepoints": [124], "characters": "\u007C" }, "❘": { "codepoints": [10072], "characters": "\u2758" }, "≀": { "codepoints": [8768], "characters": "\u2240" }, " ": { "codepoints": [8202], "characters": "\u200A" }, "𝔙": { "codepoints": [120089], "characters": "\uD835\uDD19" }, "𝔳": { "codepoints": [120115], "characters": "\uD835\uDD33" }, "⊲": { "codepoints": [8882], "characters": "\u22B2" }, "⊂⃒": { "codepoints": [8834, 8402], "characters": "\u2282\u20D2" }, "⊃⃒": { "codepoints": [8835, 8402], "characters": "\u2283\u20D2" }, "𝕍": { "codepoints": [120141], "characters": "\uD835\uDD4D" }, "𝕧": { "codepoints": [120167], "characters": "\uD835\uDD67" }, "∝": { "codepoints": [8733], "characters": "\u221D" }, "⊳": { "codepoints": [8883], "characters": "\u22B3" }, "𝒱": { "codepoints": [119985], "characters": "\uD835\uDCB1" }, "𝓋": { "codepoints": [120011], "characters": "\uD835\uDCCB" }, "⫋︀": { "codepoints": [10955, 65024], "characters": "\u2ACB\uFE00" }, "⊊︀": { "codepoints": [8842, 65024], "characters": "\u228A\uFE00" }, "⫌︀": { "codepoints": [10956, 65024], "characters": "\u2ACC\uFE00" }, "⊋︀": { "codepoints": [8843, 65024], "characters": "\u228B\uFE00" }, "⊪": { "codepoints": [8874], "characters": "\u22AA" }, "⦚": { "codepoints": [10650], "characters": "\u299A" }, "Ŵ": { "codepoints": [372], "characters": "\u0174" }, "ŵ": { "codepoints": [373], "characters": "\u0175" }, "⩟": { "codepoints": [10847], "characters": "\u2A5F" }, "⋀": { "codepoints": [8896], "characters": "\u22C0" }, "∧": { "codepoints": [8743], "characters": "\u2227" }, "≙": { "codepoints": [8793], "characters": "\u2259" }, "℘": { "codepoints": [8472], "characters": "\u2118" }, "𝔚": { "codepoints": [120090], "characters": "\uD835\uDD1A" }, "𝔴": { "codepoints": [120116], "characters": "\uD835\uDD34" }, "𝕎": { "codepoints": [120142], "characters": "\uD835\uDD4E" }, "𝕨": { "codepoints": [120168], "characters": "\uD835\uDD68" }, "℘": { "codepoints": [8472], "characters": "\u2118" }, "≀": { "codepoints": [8768], "characters": "\u2240" }, "≀": { "codepoints": [8768], "characters": "\u2240" }, "𝒲": { "codepoints": [119986], "characters": "\uD835\uDCB2" }, "𝓌": { "codepoints": [120012], "characters": "\uD835\uDCCC" }, "⋂": { "codepoints": [8898], "characters": "\u22C2" }, "◯": { "codepoints": [9711], "characters": "\u25EF" }, "⋃": { "codepoints": [8899], "characters": "\u22C3" }, "▽": { "codepoints": [9661], "characters": "\u25BD" }, "𝔛": { "codepoints": [120091], "characters": "\uD835\uDD1B" }, "𝔵": { "codepoints": [120117], "characters": "\uD835\uDD35" }, "⟺": { "codepoints": [10234], "characters": "\u27FA" }, "⟷": { "codepoints": [10231], "characters": "\u27F7" }, "Ξ": { "codepoints": [926], "characters": "\u039E" }, "ξ": { "codepoints": [958], "characters": "\u03BE" }, "⟸": { "codepoints": [10232], "characters": "\u27F8" }, "⟵": { "codepoints": [10229], "characters": "\u27F5" }, "⟼": { "codepoints": [10236], "characters": "\u27FC" }, "⋻": { "codepoints": [8955], "characters": "\u22FB" }, "⨀": { "codepoints": [10752], "characters": "\u2A00" }, "𝕏": { "codepoints": [120143], "characters": "\uD835\uDD4F" }, "𝕩": { "codepoints": [120169], "characters": "\uD835\uDD69" }, "⨁": { "codepoints": [10753], "characters": "\u2A01" }, "⨂": { "codepoints": [10754], "characters": "\u2A02" }, "⟹": { "codepoints": [10233], "characters": "\u27F9" }, "⟶": { "codepoints": [10230], "characters": "\u27F6" }, "𝒳": { "codepoints": [119987], "characters": "\uD835\uDCB3" }, "𝓍": { "codepoints": [120013], "characters": "\uD835\uDCCD" }, "⨆": { "codepoints": [10758], "characters": "\u2A06" }, "⨄": { "codepoints": [10756], "characters": "\u2A04" }, "△": { "codepoints": [9651], "characters": "\u25B3" }, "⋁": { "codepoints": [8897], "characters": "\u22C1" }, "⋀": { "codepoints": [8896], "characters": "\u22C0" }, "Ý": { "codepoints": [221], "characters": "\u00DD" }, "Ý": { "codepoints": [221], "characters": "\u00DD" }, "ý": { "codepoints": [253], "characters": "\u00FD" }, "ý": { "codepoints": [253], "characters": "\u00FD" }, "Я": { "codepoints": [1071], "characters": "\u042F" }, "я": { "codepoints": [1103], "characters": "\u044F" }, "Ŷ": { "codepoints": [374], "characters": "\u0176" }, "ŷ": { "codepoints": [375], "characters": "\u0177" }, "Ы": { "codepoints": [1067], "characters": "\u042B" }, "ы": { "codepoints": [1099], "characters": "\u044B" }, "¥": { "codepoints": [165], "characters": "\u00A5" }, "¥": { "codepoints": [165], "characters": "\u00A5" }, "𝔜": { "codepoints": [120092], "characters": "\uD835\uDD1C" }, "𝔶": { "codepoints": [120118], "characters": "\uD835\uDD36" }, "Ї": { "codepoints": [1031], "characters": "\u0407" }, "ї": { "codepoints": [1111], "characters": "\u0457" }, "𝕐": { "codepoints": [120144], "characters": "\uD835\uDD50" }, "𝕪": { "codepoints": [120170], "characters": "\uD835\uDD6A" }, "𝒴": { "codepoints": [119988], "characters": "\uD835\uDCB4" }, "𝓎": { "codepoints": [120014], "characters": "\uD835\uDCCE" }, "Ю": { "codepoints": [1070], "characters": "\u042E" }, "ю": { "codepoints": [1102], "characters": "\u044E" }, "Ÿ": { "codepoints": [376], "characters": "\u0178" }, "ÿ": { "codepoints": [255], "characters": "\u00FF" }, "ÿ": { "codepoints": [255], "characters": "\u00FF" }, "Ź": { "codepoints": [377], "characters": "\u0179" }, "ź": { "codepoints": [378], "characters": "\u017A" }, "Ž": { "codepoints": [381], "characters": "\u017D" }, "ž": { "codepoints": [382], "characters": "\u017E" }, "З": { "codepoints": [1047], "characters": "\u0417" }, "з": { "codepoints": [1079], "characters": "\u0437" }, "Ż": { "codepoints": [379], "characters": "\u017B" }, "ż": { "codepoints": [380], "characters": "\u017C" }, "ℨ": { "codepoints": [8488], "characters": "\u2128" }, "​": { "codepoints": [8203], "characters": "\u200B" }, "Ζ": { "codepoints": [918], "characters": "\u0396" }, "ζ": { "codepoints": [950], "characters": "\u03B6" }, "ℨ": { "codepoints": [8488], "characters": "\u2128" }, "𝔷": { "codepoints": [120119], "characters": "\uD835\uDD37" }, "Ж": { "codepoints": [1046], "characters": "\u0416" }, "ж": { "codepoints": [1078], "characters": "\u0436" }, "⇝": { "codepoints": [8669], "characters": "\u21DD" }, "ℤ": { "codepoints": [8484], "characters": "\u2124" }, "𝕫": { "codepoints": [120171], "characters": "\uD835\uDD6B" }, "𝒵": { "codepoints": [119989], "characters": "\uD835\uDCB5" }, "𝓏": { "codepoints": [120015], "characters": "\uD835\uDCCF" }, "‍": { "codepoints": [8205], "characters": "\u200D" }, "‌": { "codepoints": [8204], "characters": "\u200C" } } markup.ml-1.0.3/src/entities.ml000066400000000000000000001657501421357706400164150ustar00rootroot00000000000000(* Copyright © 2014 W3C® (MIT, ERCIM, Keio, Beihang). This software or document includes material copied from or derived from W3C Recommendation HTML5 [https://www.w3.org/TR/2014/REC-html5-20141028/]. *) (* Generated automatically from entities.json. *) let entities : (string * [ `One of int | `Two of int * int ]) array = [| "Aacute", `One 0x000C1; "Aacut", `One 0x000C1; "aacute", `One 0x000E1; "aacut", `One 0x000E1; "Abreve", `One 0x00102; "abreve", `One 0x00103; "ac", `One 0x0223E; "acd", `One 0x0223F; "acE", `Two (0x0223E, 0x00333); "Acirc", `One 0x000C2; "Acir", `One 0x000C2; "acirc", `One 0x000E2; "acir", `One 0x000E2; "acute", `One 0x000B4; "acut", `One 0x000B4; "Acy", `One 0x00410; "acy", `One 0x00430; "AElig", `One 0x000C6; "AEli", `One 0x000C6; "aelig", `One 0x000E6; "aeli", `One 0x000E6; "af", `One 0x02061; "Afr", `One 0x1D504; "afr", `One 0x1D51E; "Agrave", `One 0x000C0; "Agrav", `One 0x000C0; "agrave", `One 0x000E0; "agrav", `One 0x000E0; "alefsym", `One 0x02135; "aleph", `One 0x02135; "Alpha", `One 0x00391; "alpha", `One 0x003B1; "Amacr", `One 0x00100; "amacr", `One 0x00101; "amalg", `One 0x02A3F; "AMP", `One 0x00026; "AM", `One 0x00026; "amp", `One 0x00026; "am", `One 0x00026; "And", `One 0x02A53; "and", `One 0x02227; "andand", `One 0x02A55; "andd", `One 0x02A5C; "andslope", `One 0x02A58; "andv", `One 0x02A5A; "ang", `One 0x02220; "ange", `One 0x029A4; "angle", `One 0x02220; "angmsd", `One 0x02221; "angmsdaa", `One 0x029A8; "angmsdab", `One 0x029A9; "angmsdac", `One 0x029AA; "angmsdad", `One 0x029AB; "angmsdae", `One 0x029AC; "angmsdaf", `One 0x029AD; "angmsdag", `One 0x029AE; "angmsdah", `One 0x029AF; "angrt", `One 0x0221F; "angrtvb", `One 0x022BE; "angrtvbd", `One 0x0299D; "angsph", `One 0x02222; "angst", `One 0x000C5; "angzarr", `One 0x0237C; "Aogon", `One 0x00104; "aogon", `One 0x00105; "Aopf", `One 0x1D538; "aopf", `One 0x1D552; "ap", `One 0x02248; "apacir", `One 0x02A6F; "apE", `One 0x02A70; "ape", `One 0x0224A; "apid", `One 0x0224B; "apos", `One 0x00027; "ApplyFunction", `One 0x02061; "approx", `One 0x02248; "approxeq", `One 0x0224A; "Aring", `One 0x000C5; "Arin", `One 0x000C5; "aring", `One 0x000E5; "arin", `One 0x000E5; "Ascr", `One 0x1D49C; "ascr", `One 0x1D4B6; "Assign", `One 0x02254; "ast", `One 0x0002A; "asymp", `One 0x02248; "asympeq", `One 0x0224D; "Atilde", `One 0x000C3; "Atild", `One 0x000C3; "atilde", `One 0x000E3; "atild", `One 0x000E3; "Auml", `One 0x000C4; "Aum", `One 0x000C4; "auml", `One 0x000E4; "aum", `One 0x000E4; "awconint", `One 0x02233; "awint", `One 0x02A11; "backcong", `One 0x0224C; "backepsilon", `One 0x003F6; "backprime", `One 0x02035; "backsim", `One 0x0223D; "backsimeq", `One 0x022CD; "Backslash", `One 0x02216; "Barv", `One 0x02AE7; "barvee", `One 0x022BD; "Barwed", `One 0x02306; "barwed", `One 0x02305; "barwedge", `One 0x02305; "bbrk", `One 0x023B5; "bbrktbrk", `One 0x023B6; "bcong", `One 0x0224C; "Bcy", `One 0x00411; "bcy", `One 0x00431; "bdquo", `One 0x0201E; "becaus", `One 0x02235; "Because", `One 0x02235; "because", `One 0x02235; "bemptyv", `One 0x029B0; "bepsi", `One 0x003F6; "bernou", `One 0x0212C; "Bernoullis", `One 0x0212C; "Beta", `One 0x00392; "beta", `One 0x003B2; "beth", `One 0x02136; "between", `One 0x0226C; "Bfr", `One 0x1D505; "bfr", `One 0x1D51F; "bigcap", `One 0x022C2; "bigcirc", `One 0x025EF; "bigcup", `One 0x022C3; "bigodot", `One 0x02A00; "bigoplus", `One 0x02A01; "bigotimes", `One 0x02A02; "bigsqcup", `One 0x02A06; "bigstar", `One 0x02605; "bigtriangledown", `One 0x025BD; "bigtriangleup", `One 0x025B3; "biguplus", `One 0x02A04; "bigvee", `One 0x022C1; "bigwedge", `One 0x022C0; "bkarow", `One 0x0290D; "blacklozenge", `One 0x029EB; "blacksquare", `One 0x025AA; "blacktriangle", `One 0x025B4; "blacktriangledown", `One 0x025BE; "blacktriangleleft", `One 0x025C2; "blacktriangleright", `One 0x025B8; "blank", `One 0x02423; "blk12", `One 0x02592; "blk14", `One 0x02591; "blk34", `One 0x02593; "block", `One 0x02588; "bne", `Two (0x0003D, 0x020E5); "bnequiv", `Two (0x02261, 0x020E5); "bNot", `One 0x02AED; "bnot", `One 0x02310; "Bopf", `One 0x1D539; "bopf", `One 0x1D553; "bot", `One 0x022A5; "bottom", `One 0x022A5; "bowtie", `One 0x022C8; "boxbox", `One 0x029C9; "boxDL", `One 0x02557; "boxDl", `One 0x02556; "boxdL", `One 0x02555; "boxdl", `One 0x02510; "boxDR", `One 0x02554; "boxDr", `One 0x02553; "boxdR", `One 0x02552; "boxdr", `One 0x0250C; "boxH", `One 0x02550; "boxh", `One 0x02500; "boxHD", `One 0x02566; "boxHd", `One 0x02564; "boxhD", `One 0x02565; "boxhd", `One 0x0252C; "boxHU", `One 0x02569; "boxHu", `One 0x02567; "boxhU", `One 0x02568; "boxhu", `One 0x02534; "boxminus", `One 0x0229F; "boxplus", `One 0x0229E; "boxtimes", `One 0x022A0; "boxUL", `One 0x0255D; "boxUl", `One 0x0255C; "boxuL", `One 0x0255B; "boxul", `One 0x02518; "boxUR", `One 0x0255A; "boxUr", `One 0x02559; "boxuR", `One 0x02558; "boxur", `One 0x02514; "boxV", `One 0x02551; "boxv", `One 0x02502; "boxVH", `One 0x0256C; "boxVh", `One 0x0256B; "boxvH", `One 0x0256A; "boxvh", `One 0x0253C; "boxVL", `One 0x02563; "boxVl", `One 0x02562; "boxvL", `One 0x02561; "boxvl", `One 0x02524; "boxVR", `One 0x02560; "boxVr", `One 0x0255F; "boxvR", `One 0x0255E; "boxvr", `One 0x0251C; "bprime", `One 0x02035; "Breve", `One 0x002D8; "breve", `One 0x002D8; "brvbar", `One 0x000A6; "brvba", `One 0x000A6; "Bscr", `One 0x0212C; "bscr", `One 0x1D4B7; "bsemi", `One 0x0204F; "bsim", `One 0x0223D; "bsime", `One 0x022CD; "bsol", `One 0x0005C; "bsolb", `One 0x029C5; "bsolhsub", `One 0x027C8; "bull", `One 0x02022; "bullet", `One 0x02022; "bump", `One 0x0224E; "bumpE", `One 0x02AAE; "bumpe", `One 0x0224F; "Bumpeq", `One 0x0224E; "bumpeq", `One 0x0224F; "Cacute", `One 0x00106; "cacute", `One 0x00107; "Cap", `One 0x022D2; "cap", `One 0x02229; "capand", `One 0x02A44; "capbrcup", `One 0x02A49; "capcap", `One 0x02A4B; "capcup", `One 0x02A47; "capdot", `One 0x02A40; "CapitalDifferentialD", `One 0x02145; "caps", `Two (0x02229, 0x0FE00); "caret", `One 0x02041; "caron", `One 0x002C7; "Cayleys", `One 0x0212D; "ccaps", `One 0x02A4D; "Ccaron", `One 0x0010C; "ccaron", `One 0x0010D; "Ccedil", `One 0x000C7; "Ccedi", `One 0x000C7; "ccedil", `One 0x000E7; "ccedi", `One 0x000E7; "Ccirc", `One 0x00108; "ccirc", `One 0x00109; "Cconint", `One 0x02230; "ccups", `One 0x02A4C; "ccupssm", `One 0x02A50; "Cdot", `One 0x0010A; "cdot", `One 0x0010B; "cedil", `One 0x000B8; "cedi", `One 0x000B8; "Cedilla", `One 0x000B8; "cemptyv", `One 0x029B2; "cent", `One 0x000A2; "cen", `One 0x000A2; "CenterDot", `One 0x000B7; "centerdot", `One 0x000B7; "Cfr", `One 0x0212D; "cfr", `One 0x1D520; "CHcy", `One 0x00427; "chcy", `One 0x00447; "check", `One 0x02713; "checkmark", `One 0x02713; "Chi", `One 0x003A7; "chi", `One 0x003C7; "cir", `One 0x025CB; "circ", `One 0x002C6; "circeq", `One 0x02257; "circlearrowleft", `One 0x021BA; "circlearrowright", `One 0x021BB; "circledast", `One 0x0229B; "circledcirc", `One 0x0229A; "circleddash", `One 0x0229D; "CircleDot", `One 0x02299; "circledR", `One 0x000AE; "circledS", `One 0x024C8; "CircleMinus", `One 0x02296; "CirclePlus", `One 0x02295; "CircleTimes", `One 0x02297; "cirE", `One 0x029C3; "cire", `One 0x02257; "cirfnint", `One 0x02A10; "cirmid", `One 0x02AEF; "cirscir", `One 0x029C2; "ClockwiseContourIntegral", `One 0x02232; "CloseCurlyDoubleQuote", `One 0x0201D; "CloseCurlyQuote", `One 0x02019; "clubs", `One 0x02663; "clubsuit", `One 0x02663; "Colon", `One 0x02237; "colon", `One 0x0003A; "Colone", `One 0x02A74; "colone", `One 0x02254; "coloneq", `One 0x02254; "comma", `One 0x0002C; "commat", `One 0x00040; "comp", `One 0x02201; "compfn", `One 0x02218; "complement", `One 0x02201; "complexes", `One 0x02102; "cong", `One 0x02245; "congdot", `One 0x02A6D; "Congruent", `One 0x02261; "Conint", `One 0x0222F; "conint", `One 0x0222E; "ContourIntegral", `One 0x0222E; "Copf", `One 0x02102; "copf", `One 0x1D554; "coprod", `One 0x02210; "Coproduct", `One 0x02210; "COPY", `One 0x000A9; "COP", `One 0x000A9; "copy", `One 0x000A9; "cop", `One 0x000A9; "copysr", `One 0x02117; "CounterClockwiseContourIntegral", `One 0x02233; "crarr", `One 0x021B5; "Cross", `One 0x02A2F; "cross", `One 0x02717; "Cscr", `One 0x1D49E; "cscr", `One 0x1D4B8; "csub", `One 0x02ACF; "csube", `One 0x02AD1; "csup", `One 0x02AD0; "csupe", `One 0x02AD2; "ctdot", `One 0x022EF; "cudarrl", `One 0x02938; "cudarrr", `One 0x02935; "cuepr", `One 0x022DE; "cuesc", `One 0x022DF; "cularr", `One 0x021B6; "cularrp", `One 0x0293D; "Cup", `One 0x022D3; "cup", `One 0x0222A; "cupbrcap", `One 0x02A48; "CupCap", `One 0x0224D; "cupcap", `One 0x02A46; "cupcup", `One 0x02A4A; "cupdot", `One 0x0228D; "cupor", `One 0x02A45; "cups", `Two (0x0222A, 0x0FE00); "curarr", `One 0x021B7; "curarrm", `One 0x0293C; "curlyeqprec", `One 0x022DE; "curlyeqsucc", `One 0x022DF; "curlyvee", `One 0x022CE; "curlywedge", `One 0x022CF; "curren", `One 0x000A4; "curre", `One 0x000A4; "curvearrowleft", `One 0x021B6; "curvearrowright", `One 0x021B7; "cuvee", `One 0x022CE; "cuwed", `One 0x022CF; "cwconint", `One 0x02232; "cwint", `One 0x02231; "cylcty", `One 0x0232D; "Dagger", `One 0x02021; "dagger", `One 0x02020; "daleth", `One 0x02138; "Darr", `One 0x021A1; "dArr", `One 0x021D3; "darr", `One 0x02193; "dash", `One 0x02010; "Dashv", `One 0x02AE4; "dashv", `One 0x022A3; "dbkarow", `One 0x0290F; "dblac", `One 0x002DD; "Dcaron", `One 0x0010E; "dcaron", `One 0x0010F; "Dcy", `One 0x00414; "dcy", `One 0x00434; "DD", `One 0x02145; "dd", `One 0x02146; "ddagger", `One 0x02021; "ddarr", `One 0x021CA; "DDotrahd", `One 0x02911; "ddotseq", `One 0x02A77; "deg", `One 0x000B0; "de", `One 0x000B0; "Del", `One 0x02207; "Delta", `One 0x00394; "delta", `One 0x003B4; "demptyv", `One 0x029B1; "dfisht", `One 0x0297F; "Dfr", `One 0x1D507; "dfr", `One 0x1D521; "dHar", `One 0x02965; "dharl", `One 0x021C3; "dharr", `One 0x021C2; "DiacriticalAcute", `One 0x000B4; "DiacriticalDot", `One 0x002D9; "DiacriticalDoubleAcute", `One 0x002DD; "DiacriticalGrave", `One 0x00060; "DiacriticalTilde", `One 0x002DC; "diam", `One 0x022C4; "Diamond", `One 0x022C4; "diamond", `One 0x022C4; "diamondsuit", `One 0x02666; "diams", `One 0x02666; "die", `One 0x000A8; "DifferentialD", `One 0x02146; "digamma", `One 0x003DD; "disin", `One 0x022F2; "div", `One 0x000F7; "divide", `One 0x000F7; "divid", `One 0x000F7; "divideontimes", `One 0x022C7; "divonx", `One 0x022C7; "DJcy", `One 0x00402; "djcy", `One 0x00452; "dlcorn", `One 0x0231E; "dlcrop", `One 0x0230D; "dollar", `One 0x00024; "Dopf", `One 0x1D53B; "dopf", `One 0x1D555; "Dot", `One 0x000A8; "dot", `One 0x002D9; "DotDot", `One 0x020DC; "doteq", `One 0x02250; "doteqdot", `One 0x02251; "DotEqual", `One 0x02250; "dotminus", `One 0x02238; "dotplus", `One 0x02214; "dotsquare", `One 0x022A1; "doublebarwedge", `One 0x02306; "DoubleContourIntegral", `One 0x0222F; "DoubleDot", `One 0x000A8; "DoubleDownArrow", `One 0x021D3; "DoubleLeftArrow", `One 0x021D0; "DoubleLeftRightArrow", `One 0x021D4; "DoubleLeftTee", `One 0x02AE4; "DoubleLongLeftArrow", `One 0x027F8; "DoubleLongLeftRightArrow", `One 0x027FA; "DoubleLongRightArrow", `One 0x027F9; "DoubleRightArrow", `One 0x021D2; "DoubleRightTee", `One 0x022A8; "DoubleUpArrow", `One 0x021D1; "DoubleUpDownArrow", `One 0x021D5; "DoubleVerticalBar", `One 0x02225; "DownArrow", `One 0x02193; "Downarrow", `One 0x021D3; "downarrow", `One 0x02193; "DownArrowBar", `One 0x02913; "DownArrowUpArrow", `One 0x021F5; "DownBreve", `One 0x00311; "downdownarrows", `One 0x021CA; "downharpoonleft", `One 0x021C3; "downharpoonright", `One 0x021C2; "DownLeftRightVector", `One 0x02950; "DownLeftTeeVector", `One 0x0295E; "DownLeftVector", `One 0x021BD; "DownLeftVectorBar", `One 0x02956; "DownRightTeeVector", `One 0x0295F; "DownRightVector", `One 0x021C1; "DownRightVectorBar", `One 0x02957; "DownTee", `One 0x022A4; "DownTeeArrow", `One 0x021A7; "drbkarow", `One 0x02910; "drcorn", `One 0x0231F; "drcrop", `One 0x0230C; "Dscr", `One 0x1D49F; "dscr", `One 0x1D4B9; "DScy", `One 0x00405; "dscy", `One 0x00455; "dsol", `One 0x029F6; "Dstrok", `One 0x00110; "dstrok", `One 0x00111; "dtdot", `One 0x022F1; "dtri", `One 0x025BF; "dtrif", `One 0x025BE; "duarr", `One 0x021F5; "duhar", `One 0x0296F; "dwangle", `One 0x029A6; "DZcy", `One 0x0040F; "dzcy", `One 0x0045F; "dzigrarr", `One 0x027FF; "Eacute", `One 0x000C9; "Eacut", `One 0x000C9; "eacute", `One 0x000E9; "eacut", `One 0x000E9; "easter", `One 0x02A6E; "Ecaron", `One 0x0011A; "ecaron", `One 0x0011B; "ecir", `One 0x02256; "Ecirc", `One 0x000CA; "Ecir", `One 0x000CA; "ecirc", `One 0x000EA; "ecir", `One 0x000EA; "ecolon", `One 0x02255; "Ecy", `One 0x0042D; "ecy", `One 0x0044D; "eDDot", `One 0x02A77; "Edot", `One 0x00116; "eDot", `One 0x02251; "edot", `One 0x00117; "ee", `One 0x02147; "efDot", `One 0x02252; "Efr", `One 0x1D508; "efr", `One 0x1D522; "eg", `One 0x02A9A; "Egrave", `One 0x000C8; "Egrav", `One 0x000C8; "egrave", `One 0x000E8; "egrav", `One 0x000E8; "egs", `One 0x02A96; "egsdot", `One 0x02A98; "el", `One 0x02A99; "Element", `One 0x02208; "elinters", `One 0x023E7; "ell", `One 0x02113; "els", `One 0x02A95; "elsdot", `One 0x02A97; "Emacr", `One 0x00112; "emacr", `One 0x00113; "empty", `One 0x02205; "emptyset", `One 0x02205; "EmptySmallSquare", `One 0x025FB; "emptyv", `One 0x02205; "EmptyVerySmallSquare", `One 0x025AB; "emsp", `One 0x02003; "emsp13", `One 0x02004; "emsp14", `One 0x02005; "ENG", `One 0x0014A; "eng", `One 0x0014B; "ensp", `One 0x02002; "Eogon", `One 0x00118; "eogon", `One 0x00119; "Eopf", `One 0x1D53C; "eopf", `One 0x1D556; "epar", `One 0x022D5; "eparsl", `One 0x029E3; "eplus", `One 0x02A71; "epsi", `One 0x003B5; "Epsilon", `One 0x00395; "epsilon", `One 0x003B5; "epsiv", `One 0x003F5; "eqcirc", `One 0x02256; "eqcolon", `One 0x02255; "eqsim", `One 0x02242; "eqslantgtr", `One 0x02A96; "eqslantless", `One 0x02A95; "Equal", `One 0x02A75; "equals", `One 0x0003D; "EqualTilde", `One 0x02242; "equest", `One 0x0225F; "Equilibrium", `One 0x021CC; "equiv", `One 0x02261; "equivDD", `One 0x02A78; "eqvparsl", `One 0x029E5; "erarr", `One 0x02971; "erDot", `One 0x02253; "Escr", `One 0x02130; "escr", `One 0x0212F; "esdot", `One 0x02250; "Esim", `One 0x02A73; "esim", `One 0x02242; "Eta", `One 0x00397; "eta", `One 0x003B7; "ETH", `One 0x000D0; "ET", `One 0x000D0; "eth", `One 0x000F0; "et", `One 0x000F0; "Euml", `One 0x000CB; "Eum", `One 0x000CB; "euml", `One 0x000EB; "eum", `One 0x000EB; "euro", `One 0x020AC; "excl", `One 0x00021; "exist", `One 0x02203; "Exists", `One 0x02203; "expectation", `One 0x02130; "ExponentialE", `One 0x02147; "exponentiale", `One 0x02147; "fallingdotseq", `One 0x02252; "Fcy", `One 0x00424; "fcy", `One 0x00444; "female", `One 0x02640; "ffilig", `One 0x0FB03; "fflig", `One 0x0FB00; "ffllig", `One 0x0FB04; "Ffr", `One 0x1D509; "ffr", `One 0x1D523; "filig", `One 0x0FB01; "FilledSmallSquare", `One 0x025FC; "FilledVerySmallSquare", `One 0x025AA; "fjlig", `Two (0x00066, 0x0006A); "flat", `One 0x0266D; "fllig", `One 0x0FB02; "fltns", `One 0x025B1; "fnof", `One 0x00192; "Fopf", `One 0x1D53D; "fopf", `One 0x1D557; "ForAll", `One 0x02200; "forall", `One 0x02200; "fork", `One 0x022D4; "forkv", `One 0x02AD9; "Fouriertrf", `One 0x02131; "fpartint", `One 0x02A0D; "frac12", `One 0x000BD; "frac1", `One 0x000BD; "frac13", `One 0x02153; "frac14", `One 0x000BC; "frac1", `One 0x000BC; "frac15", `One 0x02155; "frac16", `One 0x02159; "frac18", `One 0x0215B; "frac23", `One 0x02154; "frac25", `One 0x02156; "frac34", `One 0x000BE; "frac3", `One 0x000BE; "frac35", `One 0x02157; "frac38", `One 0x0215C; "frac45", `One 0x02158; "frac56", `One 0x0215A; "frac58", `One 0x0215D; "frac78", `One 0x0215E; "frasl", `One 0x02044; "frown", `One 0x02322; "Fscr", `One 0x02131; "fscr", `One 0x1D4BB; "gacute", `One 0x001F5; "Gamma", `One 0x00393; "gamma", `One 0x003B3; "Gammad", `One 0x003DC; "gammad", `One 0x003DD; "gap", `One 0x02A86; "Gbreve", `One 0x0011E; "gbreve", `One 0x0011F; "Gcedil", `One 0x00122; "Gcirc", `One 0x0011C; "gcirc", `One 0x0011D; "Gcy", `One 0x00413; "gcy", `One 0x00433; "Gdot", `One 0x00120; "gdot", `One 0x00121; "gE", `One 0x02267; "ge", `One 0x02265; "gEl", `One 0x02A8C; "gel", `One 0x022DB; "geq", `One 0x02265; "geqq", `One 0x02267; "geqslant", `One 0x02A7E; "ges", `One 0x02A7E; "gescc", `One 0x02AA9; "gesdot", `One 0x02A80; "gesdoto", `One 0x02A82; "gesdotol", `One 0x02A84; "gesl", `Two (0x022DB, 0x0FE00); "gesles", `One 0x02A94; "Gfr", `One 0x1D50A; "gfr", `One 0x1D524; "Gg", `One 0x022D9; "gg", `One 0x0226B; "ggg", `One 0x022D9; "gimel", `One 0x02137; "GJcy", `One 0x00403; "gjcy", `One 0x00453; "gl", `One 0x02277; "gla", `One 0x02AA5; "glE", `One 0x02A92; "glj", `One 0x02AA4; "gnap", `One 0x02A8A; "gnapprox", `One 0x02A8A; "gnE", `One 0x02269; "gne", `One 0x02A88; "gneq", `One 0x02A88; "gneqq", `One 0x02269; "gnsim", `One 0x022E7; "Gopf", `One 0x1D53E; "gopf", `One 0x1D558; "grave", `One 0x00060; "GreaterEqual", `One 0x02265; "GreaterEqualLess", `One 0x022DB; "GreaterFullEqual", `One 0x02267; "GreaterGreater", `One 0x02AA2; "GreaterLess", `One 0x02277; "GreaterSlantEqual", `One 0x02A7E; "GreaterTilde", `One 0x02273; "Gscr", `One 0x1D4A2; "gscr", `One 0x0210A; "gsim", `One 0x02273; "gsime", `One 0x02A8E; "gsiml", `One 0x02A90; "GT", `One 0x0003E; "G", `One 0x0003E; "Gt", `One 0x0226B; "gt", `One 0x0003E; "g", `One 0x0003E; "gtcc", `One 0x02AA7; "gtcir", `One 0x02A7A; "gtdot", `One 0x022D7; "gtlPar", `One 0x02995; "gtquest", `One 0x02A7C; "gtrapprox", `One 0x02A86; "gtrarr", `One 0x02978; "gtrdot", `One 0x022D7; "gtreqless", `One 0x022DB; "gtreqqless", `One 0x02A8C; "gtrless", `One 0x02277; "gtrsim", `One 0x02273; "gvertneqq", `Two (0x02269, 0x0FE00); "gvnE", `Two (0x02269, 0x0FE00); "Hacek", `One 0x002C7; "hairsp", `One 0x0200A; "half", `One 0x000BD; "hamilt", `One 0x0210B; "HARDcy", `One 0x0042A; "hardcy", `One 0x0044A; "hArr", `One 0x021D4; "harr", `One 0x02194; "harrcir", `One 0x02948; "harrw", `One 0x021AD; "Hat", `One 0x0005E; "hbar", `One 0x0210F; "Hcirc", `One 0x00124; "hcirc", `One 0x00125; "hearts", `One 0x02665; "heartsuit", `One 0x02665; "hellip", `One 0x02026; "hercon", `One 0x022B9; "Hfr", `One 0x0210C; "hfr", `One 0x1D525; "HilbertSpace", `One 0x0210B; "hksearow", `One 0x02925; "hkswarow", `One 0x02926; "hoarr", `One 0x021FF; "homtht", `One 0x0223B; "hookleftarrow", `One 0x021A9; "hookrightarrow", `One 0x021AA; "Hopf", `One 0x0210D; "hopf", `One 0x1D559; "horbar", `One 0x02015; "HorizontalLine", `One 0x02500; "Hscr", `One 0x0210B; "hscr", `One 0x1D4BD; "hslash", `One 0x0210F; "Hstrok", `One 0x00126; "hstrok", `One 0x00127; "HumpDownHump", `One 0x0224E; "HumpEqual", `One 0x0224F; "hybull", `One 0x02043; "hyphen", `One 0x02010; "Iacute", `One 0x000CD; "Iacut", `One 0x000CD; "iacute", `One 0x000ED; "iacut", `One 0x000ED; "ic", `One 0x02063; "Icirc", `One 0x000CE; "Icir", `One 0x000CE; "icirc", `One 0x000EE; "icir", `One 0x000EE; "Icy", `One 0x00418; "icy", `One 0x00438; "Idot", `One 0x00130; "IEcy", `One 0x00415; "iecy", `One 0x00435; "iexcl", `One 0x000A1; "iexc", `One 0x000A1; "iff", `One 0x021D4; "Ifr", `One 0x02111; "ifr", `One 0x1D526; "Igrave", `One 0x000CC; "Igrav", `One 0x000CC; "igrave", `One 0x000EC; "igrav", `One 0x000EC; "ii", `One 0x02148; "iiiint", `One 0x02A0C; "iiint", `One 0x0222D; "iinfin", `One 0x029DC; "iiota", `One 0x02129; "IJlig", `One 0x00132; "ijlig", `One 0x00133; "Im", `One 0x02111; "Imacr", `One 0x0012A; "imacr", `One 0x0012B; "image", `One 0x02111; "ImaginaryI", `One 0x02148; "imagline", `One 0x02110; "imagpart", `One 0x02111; "imath", `One 0x00131; "imof", `One 0x022B7; "imped", `One 0x001B5; "Implies", `One 0x021D2; "in", `One 0x02208; "incare", `One 0x02105; "infin", `One 0x0221E; "infintie", `One 0x029DD; "inodot", `One 0x00131; "Int", `One 0x0222C; "int", `One 0x0222B; "intcal", `One 0x022BA; "integers", `One 0x02124; "Integral", `One 0x0222B; "intercal", `One 0x022BA; "Intersection", `One 0x022C2; "intlarhk", `One 0x02A17; "intprod", `One 0x02A3C; "InvisibleComma", `One 0x02063; "InvisibleTimes", `One 0x02062; "IOcy", `One 0x00401; "iocy", `One 0x00451; "Iogon", `One 0x0012E; "iogon", `One 0x0012F; "Iopf", `One 0x1D540; "iopf", `One 0x1D55A; "Iota", `One 0x00399; "iota", `One 0x003B9; "iprod", `One 0x02A3C; "iquest", `One 0x000BF; "iques", `One 0x000BF; "Iscr", `One 0x02110; "iscr", `One 0x1D4BE; "isin", `One 0x02208; "isindot", `One 0x022F5; "isinE", `One 0x022F9; "isins", `One 0x022F4; "isinsv", `One 0x022F3; "isinv", `One 0x02208; "it", `One 0x02062; "Itilde", `One 0x00128; "itilde", `One 0x00129; "Iukcy", `One 0x00406; "iukcy", `One 0x00456; "Iuml", `One 0x000CF; "Ium", `One 0x000CF; "iuml", `One 0x000EF; "ium", `One 0x000EF; "Jcirc", `One 0x00134; "jcirc", `One 0x00135; "Jcy", `One 0x00419; "jcy", `One 0x00439; "Jfr", `One 0x1D50D; "jfr", `One 0x1D527; "jmath", `One 0x00237; "Jopf", `One 0x1D541; "jopf", `One 0x1D55B; "Jscr", `One 0x1D4A5; "jscr", `One 0x1D4BF; "Jsercy", `One 0x00408; "jsercy", `One 0x00458; "Jukcy", `One 0x00404; "jukcy", `One 0x00454; "Kappa", `One 0x0039A; "kappa", `One 0x003BA; "kappav", `One 0x003F0; "Kcedil", `One 0x00136; "kcedil", `One 0x00137; "Kcy", `One 0x0041A; "kcy", `One 0x0043A; "Kfr", `One 0x1D50E; "kfr", `One 0x1D528; "kgreen", `One 0x00138; "KHcy", `One 0x00425; "khcy", `One 0x00445; "KJcy", `One 0x0040C; "kjcy", `One 0x0045C; "Kopf", `One 0x1D542; "kopf", `One 0x1D55C; "Kscr", `One 0x1D4A6; "kscr", `One 0x1D4C0; "lAarr", `One 0x021DA; "Lacute", `One 0x00139; "lacute", `One 0x0013A; "laemptyv", `One 0x029B4; "lagran", `One 0x02112; "Lambda", `One 0x0039B; "lambda", `One 0x003BB; "Lang", `One 0x027EA; "lang", `One 0x027E8; "langd", `One 0x02991; "langle", `One 0x027E8; "lap", `One 0x02A85; "Laplacetrf", `One 0x02112; "laquo", `One 0x000AB; "laqu", `One 0x000AB; "Larr", `One 0x0219E; "lArr", `One 0x021D0; "larr", `One 0x02190; "larrb", `One 0x021E4; "larrbfs", `One 0x0291F; "larrfs", `One 0x0291D; "larrhk", `One 0x021A9; "larrlp", `One 0x021AB; "larrpl", `One 0x02939; "larrsim", `One 0x02973; "larrtl", `One 0x021A2; "lat", `One 0x02AAB; "lAtail", `One 0x0291B; "latail", `One 0x02919; "late", `One 0x02AAD; "lates", `Two (0x02AAD, 0x0FE00); "lBarr", `One 0x0290E; "lbarr", `One 0x0290C; "lbbrk", `One 0x02772; "lbrace", `One 0x0007B; "lbrack", `One 0x0005B; "lbrke", `One 0x0298B; "lbrksld", `One 0x0298F; "lbrkslu", `One 0x0298D; "Lcaron", `One 0x0013D; "lcaron", `One 0x0013E; "Lcedil", `One 0x0013B; "lcedil", `One 0x0013C; "lceil", `One 0x02308; "lcub", `One 0x0007B; "Lcy", `One 0x0041B; "lcy", `One 0x0043B; "ldca", `One 0x02936; "ldquo", `One 0x0201C; "ldquor", `One 0x0201E; "ldrdhar", `One 0x02967; "ldrushar", `One 0x0294B; "ldsh", `One 0x021B2; "lE", `One 0x02266; "le", `One 0x02264; "LeftAngleBracket", `One 0x027E8; "LeftArrow", `One 0x02190; "Leftarrow", `One 0x021D0; "leftarrow", `One 0x02190; "LeftArrowBar", `One 0x021E4; "LeftArrowRightArrow", `One 0x021C6; "leftarrowtail", `One 0x021A2; "LeftCeiling", `One 0x02308; "LeftDoubleBracket", `One 0x027E6; "LeftDownTeeVector", `One 0x02961; "LeftDownVector", `One 0x021C3; "LeftDownVectorBar", `One 0x02959; "LeftFloor", `One 0x0230A; "leftharpoondown", `One 0x021BD; "leftharpoonup", `One 0x021BC; "leftleftarrows", `One 0x021C7; "LeftRightArrow", `One 0x02194; "Leftrightarrow", `One 0x021D4; "leftrightarrow", `One 0x02194; "leftrightarrows", `One 0x021C6; "leftrightharpoons", `One 0x021CB; "leftrightsquigarrow", `One 0x021AD; "LeftRightVector", `One 0x0294E; "LeftTee", `One 0x022A3; "LeftTeeArrow", `One 0x021A4; "LeftTeeVector", `One 0x0295A; "leftthreetimes", `One 0x022CB; "LeftTriangle", `One 0x022B2; "LeftTriangleBar", `One 0x029CF; "LeftTriangleEqual", `One 0x022B4; "LeftUpDownVector", `One 0x02951; "LeftUpTeeVector", `One 0x02960; "LeftUpVector", `One 0x021BF; "LeftUpVectorBar", `One 0x02958; "LeftVector", `One 0x021BC; "LeftVectorBar", `One 0x02952; "lEg", `One 0x02A8B; "leg", `One 0x022DA; "leq", `One 0x02264; "leqq", `One 0x02266; "leqslant", `One 0x02A7D; "les", `One 0x02A7D; "lescc", `One 0x02AA8; "lesdot", `One 0x02A7F; "lesdoto", `One 0x02A81; "lesdotor", `One 0x02A83; "lesg", `Two (0x022DA, 0x0FE00); "lesges", `One 0x02A93; "lessapprox", `One 0x02A85; "lessdot", `One 0x022D6; "lesseqgtr", `One 0x022DA; "lesseqqgtr", `One 0x02A8B; "LessEqualGreater", `One 0x022DA; "LessFullEqual", `One 0x02266; "LessGreater", `One 0x02276; "lessgtr", `One 0x02276; "LessLess", `One 0x02AA1; "lesssim", `One 0x02272; "LessSlantEqual", `One 0x02A7D; "LessTilde", `One 0x02272; "lfisht", `One 0x0297C; "lfloor", `One 0x0230A; "Lfr", `One 0x1D50F; "lfr", `One 0x1D529; "lg", `One 0x02276; "lgE", `One 0x02A91; "lHar", `One 0x02962; "lhard", `One 0x021BD; "lharu", `One 0x021BC; "lharul", `One 0x0296A; "lhblk", `One 0x02584; "LJcy", `One 0x00409; "ljcy", `One 0x00459; "Ll", `One 0x022D8; "ll", `One 0x0226A; "llarr", `One 0x021C7; "llcorner", `One 0x0231E; "Lleftarrow", `One 0x021DA; "llhard", `One 0x0296B; "lltri", `One 0x025FA; "Lmidot", `One 0x0013F; "lmidot", `One 0x00140; "lmoust", `One 0x023B0; "lmoustache", `One 0x023B0; "lnap", `One 0x02A89; "lnapprox", `One 0x02A89; "lnE", `One 0x02268; "lne", `One 0x02A87; "lneq", `One 0x02A87; "lneqq", `One 0x02268; "lnsim", `One 0x022E6; "loang", `One 0x027EC; "loarr", `One 0x021FD; "lobrk", `One 0x027E6; "LongLeftArrow", `One 0x027F5; "Longleftarrow", `One 0x027F8; "longleftarrow", `One 0x027F5; "LongLeftRightArrow", `One 0x027F7; "Longleftrightarrow", `One 0x027FA; "longleftrightarrow", `One 0x027F7; "longmapsto", `One 0x027FC; "LongRightArrow", `One 0x027F6; "Longrightarrow", `One 0x027F9; "longrightarrow", `One 0x027F6; "looparrowleft", `One 0x021AB; "looparrowright", `One 0x021AC; "lopar", `One 0x02985; "Lopf", `One 0x1D543; "lopf", `One 0x1D55D; "loplus", `One 0x02A2D; "lotimes", `One 0x02A34; "lowast", `One 0x02217; "lowbar", `One 0x0005F; "LowerLeftArrow", `One 0x02199; "LowerRightArrow", `One 0x02198; "loz", `One 0x025CA; "lozenge", `One 0x025CA; "lozf", `One 0x029EB; "lpar", `One 0x00028; "lparlt", `One 0x02993; "lrarr", `One 0x021C6; "lrcorner", `One 0x0231F; "lrhar", `One 0x021CB; "lrhard", `One 0x0296D; "lrm", `One 0x0200E; "lrtri", `One 0x022BF; "lsaquo", `One 0x02039; "Lscr", `One 0x02112; "lscr", `One 0x1D4C1; "Lsh", `One 0x021B0; "lsh", `One 0x021B0; "lsim", `One 0x02272; "lsime", `One 0x02A8D; "lsimg", `One 0x02A8F; "lsqb", `One 0x0005B; "lsquo", `One 0x02018; "lsquor", `One 0x0201A; "Lstrok", `One 0x00141; "lstrok", `One 0x00142; "LT", `One 0x0003C; "L", `One 0x0003C; "Lt", `One 0x0226A; "lt", `One 0x0003C; "l", `One 0x0003C; "ltcc", `One 0x02AA6; "ltcir", `One 0x02A79; "ltdot", `One 0x022D6; "lthree", `One 0x022CB; "ltimes", `One 0x022C9; "ltlarr", `One 0x02976; "ltquest", `One 0x02A7B; "ltri", `One 0x025C3; "ltrie", `One 0x022B4; "ltrif", `One 0x025C2; "ltrPar", `One 0x02996; "lurdshar", `One 0x0294A; "luruhar", `One 0x02966; "lvertneqq", `Two (0x02268, 0x0FE00); "lvnE", `Two (0x02268, 0x0FE00); "macr", `One 0x000AF; "mac", `One 0x000AF; "male", `One 0x02642; "malt", `One 0x02720; "maltese", `One 0x02720; "Map", `One 0x02905; "map", `One 0x021A6; "mapsto", `One 0x021A6; "mapstodown", `One 0x021A7; "mapstoleft", `One 0x021A4; "mapstoup", `One 0x021A5; "marker", `One 0x025AE; "mcomma", `One 0x02A29; "Mcy", `One 0x0041C; "mcy", `One 0x0043C; "mdash", `One 0x02014; "mDDot", `One 0x0223A; "measuredangle", `One 0x02221; "MediumSpace", `One 0x0205F; "Mellintrf", `One 0x02133; "Mfr", `One 0x1D510; "mfr", `One 0x1D52A; "mho", `One 0x02127; "micro", `One 0x000B5; "micr", `One 0x000B5; "mid", `One 0x02223; "midast", `One 0x0002A; "midcir", `One 0x02AF0; "middot", `One 0x000B7; "middo", `One 0x000B7; "minus", `One 0x02212; "minusb", `One 0x0229F; "minusd", `One 0x02238; "minusdu", `One 0x02A2A; "MinusPlus", `One 0x02213; "mlcp", `One 0x02ADB; "mldr", `One 0x02026; "mnplus", `One 0x02213; "models", `One 0x022A7; "Mopf", `One 0x1D544; "mopf", `One 0x1D55E; "mp", `One 0x02213; "Mscr", `One 0x02133; "mscr", `One 0x1D4C2; "mstpos", `One 0x0223E; "Mu", `One 0x0039C; "mu", `One 0x003BC; "multimap", `One 0x022B8; "mumap", `One 0x022B8; "nabla", `One 0x02207; "Nacute", `One 0x00143; "nacute", `One 0x00144; "nang", `Two (0x02220, 0x020D2); "nap", `One 0x02249; "napE", `Two (0x02A70, 0x00338); "napid", `Two (0x0224B, 0x00338); "napos", `One 0x00149; "napprox", `One 0x02249; "natur", `One 0x0266E; "natural", `One 0x0266E; "naturals", `One 0x02115; "nbsp", `One 0x000A0; "nbs", `One 0x000A0; "nbump", `Two (0x0224E, 0x00338); "nbumpe", `Two (0x0224F, 0x00338); "ncap", `One 0x02A43; "Ncaron", `One 0x00147; "ncaron", `One 0x00148; "Ncedil", `One 0x00145; "ncedil", `One 0x00146; "ncong", `One 0x02247; "ncongdot", `Two (0x02A6D, 0x00338); "ncup", `One 0x02A42; "Ncy", `One 0x0041D; "ncy", `One 0x0043D; "ndash", `One 0x02013; "ne", `One 0x02260; "nearhk", `One 0x02924; "neArr", `One 0x021D7; "nearr", `One 0x02197; "nearrow", `One 0x02197; "nedot", `Two (0x02250, 0x00338); "NegativeMediumSpace", `One 0x0200B; "NegativeThickSpace", `One 0x0200B; "NegativeThinSpace", `One 0x0200B; "NegativeVeryThinSpace", `One 0x0200B; "nequiv", `One 0x02262; "nesear", `One 0x02928; "nesim", `Two (0x02242, 0x00338); "NestedGreaterGreater", `One 0x0226B; "NestedLessLess", `One 0x0226A; "NewLine", `One 0x0000A; "nexist", `One 0x02204; "nexists", `One 0x02204; "Nfr", `One 0x1D511; "nfr", `One 0x1D52B; "ngE", `Two (0x02267, 0x00338); "nge", `One 0x02271; "ngeq", `One 0x02271; "ngeqq", `Two (0x02267, 0x00338); "ngeqslant", `Two (0x02A7E, 0x00338); "nges", `Two (0x02A7E, 0x00338); "nGg", `Two (0x022D9, 0x00338); "ngsim", `One 0x02275; "nGt", `Two (0x0226B, 0x020D2); "ngt", `One 0x0226F; "ngtr", `One 0x0226F; "nGtv", `Two (0x0226B, 0x00338); "nhArr", `One 0x021CE; "nharr", `One 0x021AE; "nhpar", `One 0x02AF2; "ni", `One 0x0220B; "nis", `One 0x022FC; "nisd", `One 0x022FA; "niv", `One 0x0220B; "NJcy", `One 0x0040A; "njcy", `One 0x0045A; "nlArr", `One 0x021CD; "nlarr", `One 0x0219A; "nldr", `One 0x02025; "nlE", `Two (0x02266, 0x00338); "nle", `One 0x02270; "nLeftarrow", `One 0x021CD; "nleftarrow", `One 0x0219A; "nLeftrightarrow", `One 0x021CE; "nleftrightarrow", `One 0x021AE; "nleq", `One 0x02270; "nleqq", `Two (0x02266, 0x00338); "nleqslant", `Two (0x02A7D, 0x00338); "nles", `Two (0x02A7D, 0x00338); "nless", `One 0x0226E; "nLl", `Two (0x022D8, 0x00338); "nlsim", `One 0x02274; "nLt", `Two (0x0226A, 0x020D2); "nlt", `One 0x0226E; "nltri", `One 0x022EA; "nltrie", `One 0x022EC; "nLtv", `Two (0x0226A, 0x00338); "nmid", `One 0x02224; "NoBreak", `One 0x02060; "NonBreakingSpace", `One 0x000A0; "Nopf", `One 0x02115; "nopf", `One 0x1D55F; "Not", `One 0x02AEC; "not", `One 0x000AC; "no", `One 0x000AC; "NotCongruent", `One 0x02262; "NotCupCap", `One 0x0226D; "NotDoubleVerticalBar", `One 0x02226; "NotElement", `One 0x02209; "NotEqual", `One 0x02260; "NotEqualTilde", `Two (0x02242, 0x00338); "NotExists", `One 0x02204; "NotGreater", `One 0x0226F; "NotGreaterEqual", `One 0x02271; "NotGreaterFullEqual", `Two (0x02267, 0x00338); "NotGreaterGreater", `Two (0x0226B, 0x00338); "NotGreaterLess", `One 0x02279; "NotGreaterSlantEqual", `Two (0x02A7E, 0x00338); "NotGreaterTilde", `One 0x02275; "NotHumpDownHump", `Two (0x0224E, 0x00338); "NotHumpEqual", `Two (0x0224F, 0x00338); "notin", `One 0x02209; "notindot", `Two (0x022F5, 0x00338); "notinE", `Two (0x022F9, 0x00338); "notinva", `One 0x02209; "notinvb", `One 0x022F7; "notinvc", `One 0x022F6; "NotLeftTriangle", `One 0x022EA; "NotLeftTriangleBar", `Two (0x029CF, 0x00338); "NotLeftTriangleEqual", `One 0x022EC; "NotLess", `One 0x0226E; "NotLessEqual", `One 0x02270; "NotLessGreater", `One 0x02278; "NotLessLess", `Two (0x0226A, 0x00338); "NotLessSlantEqual", `Two (0x02A7D, 0x00338); "NotLessTilde", `One 0x02274; "NotNestedGreaterGreater", `Two (0x02AA2, 0x00338); "NotNestedLessLess", `Two (0x02AA1, 0x00338); "notni", `One 0x0220C; "notniva", `One 0x0220C; "notnivb", `One 0x022FE; "notnivc", `One 0x022FD; "NotPrecedes", `One 0x02280; "NotPrecedesEqual", `Two (0x02AAF, 0x00338); "NotPrecedesSlantEqual", `One 0x022E0; "NotReverseElement", `One 0x0220C; "NotRightTriangle", `One 0x022EB; "NotRightTriangleBar", `Two (0x029D0, 0x00338); "NotRightTriangleEqual", `One 0x022ED; "NotSquareSubset", `Two (0x0228F, 0x00338); "NotSquareSubsetEqual", `One 0x022E2; "NotSquareSuperset", `Two (0x02290, 0x00338); "NotSquareSupersetEqual", `One 0x022E3; "NotSubset", `Two (0x02282, 0x020D2); "NotSubsetEqual", `One 0x02288; "NotSucceeds", `One 0x02281; "NotSucceedsEqual", `Two (0x02AB0, 0x00338); "NotSucceedsSlantEqual", `One 0x022E1; "NotSucceedsTilde", `Two (0x0227F, 0x00338); "NotSuperset", `Two (0x02283, 0x020D2); "NotSupersetEqual", `One 0x02289; "NotTilde", `One 0x02241; "NotTildeEqual", `One 0x02244; "NotTildeFullEqual", `One 0x02247; "NotTildeTilde", `One 0x02249; "NotVerticalBar", `One 0x02224; "npar", `One 0x02226; "nparallel", `One 0x02226; "nparsl", `Two (0x02AFD, 0x020E5); "npart", `Two (0x02202, 0x00338); "npolint", `One 0x02A14; "npr", `One 0x02280; "nprcue", `One 0x022E0; "npre", `Two (0x02AAF, 0x00338); "nprec", `One 0x02280; "npreceq", `Two (0x02AAF, 0x00338); "nrArr", `One 0x021CF; "nrarr", `One 0x0219B; "nrarrc", `Two (0x02933, 0x00338); "nrarrw", `Two (0x0219D, 0x00338); "nRightarrow", `One 0x021CF; "nrightarrow", `One 0x0219B; "nrtri", `One 0x022EB; "nrtrie", `One 0x022ED; "nsc", `One 0x02281; "nsccue", `One 0x022E1; "nsce", `Two (0x02AB0, 0x00338); "Nscr", `One 0x1D4A9; "nscr", `One 0x1D4C3; "nshortmid", `One 0x02224; "nshortparallel", `One 0x02226; "nsim", `One 0x02241; "nsime", `One 0x02244; "nsimeq", `One 0x02244; "nsmid", `One 0x02224; "nspar", `One 0x02226; "nsqsube", `One 0x022E2; "nsqsupe", `One 0x022E3; "nsub", `One 0x02284; "nsubE", `Two (0x02AC5, 0x00338); "nsube", `One 0x02288; "nsubset", `Two (0x02282, 0x020D2); "nsubseteq", `One 0x02288; "nsubseteqq", `Two (0x02AC5, 0x00338); "nsucc", `One 0x02281; "nsucceq", `Two (0x02AB0, 0x00338); "nsup", `One 0x02285; "nsupE", `Two (0x02AC6, 0x00338); "nsupe", `One 0x02289; "nsupset", `Two (0x02283, 0x020D2); "nsupseteq", `One 0x02289; "nsupseteqq", `Two (0x02AC6, 0x00338); "ntgl", `One 0x02279; "Ntilde", `One 0x000D1; "Ntild", `One 0x000D1; "ntilde", `One 0x000F1; "ntild", `One 0x000F1; "ntlg", `One 0x02278; "ntriangleleft", `One 0x022EA; "ntrianglelefteq", `One 0x022EC; "ntriangleright", `One 0x022EB; "ntrianglerighteq", `One 0x022ED; "Nu", `One 0x0039D; "nu", `One 0x003BD; "num", `One 0x00023; "numero", `One 0x02116; "numsp", `One 0x02007; "nvap", `Two (0x0224D, 0x020D2); "nVDash", `One 0x022AF; "nVdash", `One 0x022AE; "nvDash", `One 0x022AD; "nvdash", `One 0x022AC; "nvge", `Two (0x02265, 0x020D2); "nvgt", `Two (0x0003E, 0x020D2); "nvHarr", `One 0x02904; "nvinfin", `One 0x029DE; "nvlArr", `One 0x02902; "nvle", `Two (0x02264, 0x020D2); "nvlt", `Two (0x0003C, 0x020D2); "nvltrie", `Two (0x022B4, 0x020D2); "nvrArr", `One 0x02903; "nvrtrie", `Two (0x022B5, 0x020D2); "nvsim", `Two (0x0223C, 0x020D2); "nwarhk", `One 0x02923; "nwArr", `One 0x021D6; "nwarr", `One 0x02196; "nwarrow", `One 0x02196; "nwnear", `One 0x02927; "Oacute", `One 0x000D3; "Oacut", `One 0x000D3; "oacute", `One 0x000F3; "oacut", `One 0x000F3; "oast", `One 0x0229B; "ocir", `One 0x0229A; "Ocirc", `One 0x000D4; "Ocir", `One 0x000D4; "ocirc", `One 0x000F4; "ocir", `One 0x000F4; "Ocy", `One 0x0041E; "ocy", `One 0x0043E; "odash", `One 0x0229D; "Odblac", `One 0x00150; "odblac", `One 0x00151; "odiv", `One 0x02A38; "odot", `One 0x02299; "odsold", `One 0x029BC; "OElig", `One 0x00152; "oelig", `One 0x00153; "ofcir", `One 0x029BF; "Ofr", `One 0x1D512; "ofr", `One 0x1D52C; "ogon", `One 0x002DB; "Ograve", `One 0x000D2; "Ograv", `One 0x000D2; "ograve", `One 0x000F2; "ograv", `One 0x000F2; "ogt", `One 0x029C1; "ohbar", `One 0x029B5; "ohm", `One 0x003A9; "oint", `One 0x0222E; "olarr", `One 0x021BA; "olcir", `One 0x029BE; "olcross", `One 0x029BB; "oline", `One 0x0203E; "olt", `One 0x029C0; "Omacr", `One 0x0014C; "omacr", `One 0x0014D; "Omega", `One 0x003A9; "omega", `One 0x003C9; "Omicron", `One 0x0039F; "omicron", `One 0x003BF; "omid", `One 0x029B6; "ominus", `One 0x02296; "Oopf", `One 0x1D546; "oopf", `One 0x1D560; "opar", `One 0x029B7; "OpenCurlyDoubleQuote", `One 0x0201C; "OpenCurlyQuote", `One 0x02018; "operp", `One 0x029B9; "oplus", `One 0x02295; "Or", `One 0x02A54; "or", `One 0x02228; "orarr", `One 0x021BB; "ord", `One 0x02A5D; "order", `One 0x02134; "orderof", `One 0x02134; "ordf", `One 0x000AA; "ord", `One 0x000AA; "ordm", `One 0x000BA; "ord", `One 0x000BA; "origof", `One 0x022B6; "oror", `One 0x02A56; "orslope", `One 0x02A57; "orv", `One 0x02A5B; "oS", `One 0x024C8; "Oscr", `One 0x1D4AA; "oscr", `One 0x02134; "Oslash", `One 0x000D8; "Oslas", `One 0x000D8; "oslash", `One 0x000F8; "oslas", `One 0x000F8; "osol", `One 0x02298; "Otilde", `One 0x000D5; "Otild", `One 0x000D5; "otilde", `One 0x000F5; "otild", `One 0x000F5; "Otimes", `One 0x02A37; "otimes", `One 0x02297; "otimesas", `One 0x02A36; "Ouml", `One 0x000D6; "Oum", `One 0x000D6; "ouml", `One 0x000F6; "oum", `One 0x000F6; "ovbar", `One 0x0233D; "OverBar", `One 0x0203E; "OverBrace", `One 0x023DE; "OverBracket", `One 0x023B4; "OverParenthesis", `One 0x023DC; "par", `One 0x02225; "para", `One 0x000B6; "par", `One 0x000B6; "parallel", `One 0x02225; "parsim", `One 0x02AF3; "parsl", `One 0x02AFD; "part", `One 0x02202; "PartialD", `One 0x02202; "Pcy", `One 0x0041F; "pcy", `One 0x0043F; "percnt", `One 0x00025; "period", `One 0x0002E; "permil", `One 0x02030; "perp", `One 0x022A5; "pertenk", `One 0x02031; "Pfr", `One 0x1D513; "pfr", `One 0x1D52D; "Phi", `One 0x003A6; "phi", `One 0x003C6; "phiv", `One 0x003D5; "phmmat", `One 0x02133; "phone", `One 0x0260E; "Pi", `One 0x003A0; "pi", `One 0x003C0; "pitchfork", `One 0x022D4; "piv", `One 0x003D6; "planck", `One 0x0210F; "planckh", `One 0x0210E; "plankv", `One 0x0210F; "plus", `One 0x0002B; "plusacir", `One 0x02A23; "plusb", `One 0x0229E; "pluscir", `One 0x02A22; "plusdo", `One 0x02214; "plusdu", `One 0x02A25; "pluse", `One 0x02A72; "PlusMinus", `One 0x000B1; "plusmn", `One 0x000B1; "plusm", `One 0x000B1; "plussim", `One 0x02A26; "plustwo", `One 0x02A27; "pm", `One 0x000B1; "Poincareplane", `One 0x0210C; "pointint", `One 0x02A15; "Popf", `One 0x02119; "popf", `One 0x1D561; "pound", `One 0x000A3; "poun", `One 0x000A3; "Pr", `One 0x02ABB; "pr", `One 0x0227A; "prap", `One 0x02AB7; "prcue", `One 0x0227C; "prE", `One 0x02AB3; "pre", `One 0x02AAF; "prec", `One 0x0227A; "precapprox", `One 0x02AB7; "preccurlyeq", `One 0x0227C; "Precedes", `One 0x0227A; "PrecedesEqual", `One 0x02AAF; "PrecedesSlantEqual", `One 0x0227C; "PrecedesTilde", `One 0x0227E; "preceq", `One 0x02AAF; "precnapprox", `One 0x02AB9; "precneqq", `One 0x02AB5; "precnsim", `One 0x022E8; "precsim", `One 0x0227E; "Prime", `One 0x02033; "prime", `One 0x02032; "primes", `One 0x02119; "prnap", `One 0x02AB9; "prnE", `One 0x02AB5; "prnsim", `One 0x022E8; "prod", `One 0x0220F; "Product", `One 0x0220F; "profalar", `One 0x0232E; "profline", `One 0x02312; "profsurf", `One 0x02313; "prop", `One 0x0221D; "Proportion", `One 0x02237; "Proportional", `One 0x0221D; "propto", `One 0x0221D; "prsim", `One 0x0227E; "prurel", `One 0x022B0; "Pscr", `One 0x1D4AB; "pscr", `One 0x1D4C5; "Psi", `One 0x003A8; "psi", `One 0x003C8; "puncsp", `One 0x02008; "Qfr", `One 0x1D514; "qfr", `One 0x1D52E; "qint", `One 0x02A0C; "Qopf", `One 0x0211A; "qopf", `One 0x1D562; "qprime", `One 0x02057; "Qscr", `One 0x1D4AC; "qscr", `One 0x1D4C6; "quaternions", `One 0x0210D; "quatint", `One 0x02A16; "quest", `One 0x0003F; "questeq", `One 0x0225F; "QUOT", `One 0x00022; "QUO", `One 0x00022; "quot", `One 0x00022; "quo", `One 0x00022; "rAarr", `One 0x021DB; "race", `Two (0x0223D, 0x00331); "Racute", `One 0x00154; "racute", `One 0x00155; "radic", `One 0x0221A; "raemptyv", `One 0x029B3; "Rang", `One 0x027EB; "rang", `One 0x027E9; "rangd", `One 0x02992; "range", `One 0x029A5; "rangle", `One 0x027E9; "raquo", `One 0x000BB; "raqu", `One 0x000BB; "Rarr", `One 0x021A0; "rArr", `One 0x021D2; "rarr", `One 0x02192; "rarrap", `One 0x02975; "rarrb", `One 0x021E5; "rarrbfs", `One 0x02920; "rarrc", `One 0x02933; "rarrfs", `One 0x0291E; "rarrhk", `One 0x021AA; "rarrlp", `One 0x021AC; "rarrpl", `One 0x02945; "rarrsim", `One 0x02974; "Rarrtl", `One 0x02916; "rarrtl", `One 0x021A3; "rarrw", `One 0x0219D; "rAtail", `One 0x0291C; "ratail", `One 0x0291A; "ratio", `One 0x02236; "rationals", `One 0x0211A; "RBarr", `One 0x02910; "rBarr", `One 0x0290F; "rbarr", `One 0x0290D; "rbbrk", `One 0x02773; "rbrace", `One 0x0007D; "rbrack", `One 0x0005D; "rbrke", `One 0x0298C; "rbrksld", `One 0x0298E; "rbrkslu", `One 0x02990; "Rcaron", `One 0x00158; "rcaron", `One 0x00159; "Rcedil", `One 0x00156; "rcedil", `One 0x00157; "rceil", `One 0x02309; "rcub", `One 0x0007D; "Rcy", `One 0x00420; "rcy", `One 0x00440; "rdca", `One 0x02937; "rdldhar", `One 0x02969; "rdquo", `One 0x0201D; "rdquor", `One 0x0201D; "rdsh", `One 0x021B3; "Re", `One 0x0211C; "real", `One 0x0211C; "realine", `One 0x0211B; "realpart", `One 0x0211C; "reals", `One 0x0211D; "rect", `One 0x025AD; "REG", `One 0x000AE; "RE", `One 0x000AE; "reg", `One 0x000AE; "re", `One 0x000AE; "ReverseElement", `One 0x0220B; "ReverseEquilibrium", `One 0x021CB; "ReverseUpEquilibrium", `One 0x0296F; "rfisht", `One 0x0297D; "rfloor", `One 0x0230B; "Rfr", `One 0x0211C; "rfr", `One 0x1D52F; "rHar", `One 0x02964; "rhard", `One 0x021C1; "rharu", `One 0x021C0; "rharul", `One 0x0296C; "Rho", `One 0x003A1; "rho", `One 0x003C1; "rhov", `One 0x003F1; "RightAngleBracket", `One 0x027E9; "RightArrow", `One 0x02192; "Rightarrow", `One 0x021D2; "rightarrow", `One 0x02192; "RightArrowBar", `One 0x021E5; "RightArrowLeftArrow", `One 0x021C4; "rightarrowtail", `One 0x021A3; "RightCeiling", `One 0x02309; "RightDoubleBracket", `One 0x027E7; "RightDownTeeVector", `One 0x0295D; "RightDownVector", `One 0x021C2; "RightDownVectorBar", `One 0x02955; "RightFloor", `One 0x0230B; "rightharpoondown", `One 0x021C1; "rightharpoonup", `One 0x021C0; "rightleftarrows", `One 0x021C4; "rightleftharpoons", `One 0x021CC; "rightrightarrows", `One 0x021C9; "rightsquigarrow", `One 0x0219D; "RightTee", `One 0x022A2; "RightTeeArrow", `One 0x021A6; "RightTeeVector", `One 0x0295B; "rightthreetimes", `One 0x022CC; "RightTriangle", `One 0x022B3; "RightTriangleBar", `One 0x029D0; "RightTriangleEqual", `One 0x022B5; "RightUpDownVector", `One 0x0294F; "RightUpTeeVector", `One 0x0295C; "RightUpVector", `One 0x021BE; "RightUpVectorBar", `One 0x02954; "RightVector", `One 0x021C0; "RightVectorBar", `One 0x02953; "ring", `One 0x002DA; "risingdotseq", `One 0x02253; "rlarr", `One 0x021C4; "rlhar", `One 0x021CC; "rlm", `One 0x0200F; "rmoust", `One 0x023B1; "rmoustache", `One 0x023B1; "rnmid", `One 0x02AEE; "roang", `One 0x027ED; "roarr", `One 0x021FE; "robrk", `One 0x027E7; "ropar", `One 0x02986; "Ropf", `One 0x0211D; "ropf", `One 0x1D563; "roplus", `One 0x02A2E; "rotimes", `One 0x02A35; "RoundImplies", `One 0x02970; "rpar", `One 0x00029; "rpargt", `One 0x02994; "rppolint", `One 0x02A12; "rrarr", `One 0x021C9; "Rrightarrow", `One 0x021DB; "rsaquo", `One 0x0203A; "Rscr", `One 0x0211B; "rscr", `One 0x1D4C7; "Rsh", `One 0x021B1; "rsh", `One 0x021B1; "rsqb", `One 0x0005D; "rsquo", `One 0x02019; "rsquor", `One 0x02019; "rthree", `One 0x022CC; "rtimes", `One 0x022CA; "rtri", `One 0x025B9; "rtrie", `One 0x022B5; "rtrif", `One 0x025B8; "rtriltri", `One 0x029CE; "RuleDelayed", `One 0x029F4; "ruluhar", `One 0x02968; "rx", `One 0x0211E; "Sacute", `One 0x0015A; "sacute", `One 0x0015B; "sbquo", `One 0x0201A; "Sc", `One 0x02ABC; "sc", `One 0x0227B; "scap", `One 0x02AB8; "Scaron", `One 0x00160; "scaron", `One 0x00161; "sccue", `One 0x0227D; "scE", `One 0x02AB4; "sce", `One 0x02AB0; "Scedil", `One 0x0015E; "scedil", `One 0x0015F; "Scirc", `One 0x0015C; "scirc", `One 0x0015D; "scnap", `One 0x02ABA; "scnE", `One 0x02AB6; "scnsim", `One 0x022E9; "scpolint", `One 0x02A13; "scsim", `One 0x0227F; "Scy", `One 0x00421; "scy", `One 0x00441; "sdot", `One 0x022C5; "sdotb", `One 0x022A1; "sdote", `One 0x02A66; "searhk", `One 0x02925; "seArr", `One 0x021D8; "searr", `One 0x02198; "searrow", `One 0x02198; "sect", `One 0x000A7; "sec", `One 0x000A7; "semi", `One 0x0003B; "seswar", `One 0x02929; "setminus", `One 0x02216; "setmn", `One 0x02216; "sext", `One 0x02736; "Sfr", `One 0x1D516; "sfr", `One 0x1D530; "sfrown", `One 0x02322; "sharp", `One 0x0266F; "SHCHcy", `One 0x00429; "shchcy", `One 0x00449; "SHcy", `One 0x00428; "shcy", `One 0x00448; "ShortDownArrow", `One 0x02193; "ShortLeftArrow", `One 0x02190; "shortmid", `One 0x02223; "shortparallel", `One 0x02225; "ShortRightArrow", `One 0x02192; "ShortUpArrow", `One 0x02191; "shy", `One 0x000AD; "sh", `One 0x000AD; "Sigma", `One 0x003A3; "sigma", `One 0x003C3; "sigmaf", `One 0x003C2; "sigmav", `One 0x003C2; "sim", `One 0x0223C; "simdot", `One 0x02A6A; "sime", `One 0x02243; "simeq", `One 0x02243; "simg", `One 0x02A9E; "simgE", `One 0x02AA0; "siml", `One 0x02A9D; "simlE", `One 0x02A9F; "simne", `One 0x02246; "simplus", `One 0x02A24; "simrarr", `One 0x02972; "slarr", `One 0x02190; "SmallCircle", `One 0x02218; "smallsetminus", `One 0x02216; "smashp", `One 0x02A33; "smeparsl", `One 0x029E4; "smid", `One 0x02223; "smile", `One 0x02323; "smt", `One 0x02AAA; "smte", `One 0x02AAC; "smtes", `Two (0x02AAC, 0x0FE00); "SOFTcy", `One 0x0042C; "softcy", `One 0x0044C; "sol", `One 0x0002F; "solb", `One 0x029C4; "solbar", `One 0x0233F; "Sopf", `One 0x1D54A; "sopf", `One 0x1D564; "spades", `One 0x02660; "spadesuit", `One 0x02660; "spar", `One 0x02225; "sqcap", `One 0x02293; "sqcaps", `Two (0x02293, 0x0FE00); "sqcup", `One 0x02294; "sqcups", `Two (0x02294, 0x0FE00); "Sqrt", `One 0x0221A; "sqsub", `One 0x0228F; "sqsube", `One 0x02291; "sqsubset", `One 0x0228F; "sqsubseteq", `One 0x02291; "sqsup", `One 0x02290; "sqsupe", `One 0x02292; "sqsupset", `One 0x02290; "sqsupseteq", `One 0x02292; "squ", `One 0x025A1; "Square", `One 0x025A1; "square", `One 0x025A1; "SquareIntersection", `One 0x02293; "SquareSubset", `One 0x0228F; "SquareSubsetEqual", `One 0x02291; "SquareSuperset", `One 0x02290; "SquareSupersetEqual", `One 0x02292; "SquareUnion", `One 0x02294; "squarf", `One 0x025AA; "squf", `One 0x025AA; "srarr", `One 0x02192; "Sscr", `One 0x1D4AE; "sscr", `One 0x1D4C8; "ssetmn", `One 0x02216; "ssmile", `One 0x02323; "sstarf", `One 0x022C6; "Star", `One 0x022C6; "star", `One 0x02606; "starf", `One 0x02605; "straightepsilon", `One 0x003F5; "straightphi", `One 0x003D5; "strns", `One 0x000AF; "Sub", `One 0x022D0; "sub", `One 0x02282; "subdot", `One 0x02ABD; "subE", `One 0x02AC5; "sube", `One 0x02286; "subedot", `One 0x02AC3; "submult", `One 0x02AC1; "subnE", `One 0x02ACB; "subne", `One 0x0228A; "subplus", `One 0x02ABF; "subrarr", `One 0x02979; "Subset", `One 0x022D0; "subset", `One 0x02282; "subseteq", `One 0x02286; "subseteqq", `One 0x02AC5; "SubsetEqual", `One 0x02286; "subsetneq", `One 0x0228A; "subsetneqq", `One 0x02ACB; "subsim", `One 0x02AC7; "subsub", `One 0x02AD5; "subsup", `One 0x02AD3; "succ", `One 0x0227B; "succapprox", `One 0x02AB8; "succcurlyeq", `One 0x0227D; "Succeeds", `One 0x0227B; "SucceedsEqual", `One 0x02AB0; "SucceedsSlantEqual", `One 0x0227D; "SucceedsTilde", `One 0x0227F; "succeq", `One 0x02AB0; "succnapprox", `One 0x02ABA; "succneqq", `One 0x02AB6; "succnsim", `One 0x022E9; "succsim", `One 0x0227F; "SuchThat", `One 0x0220B; "Sum", `One 0x02211; "sum", `One 0x02211; "sung", `One 0x0266A; "Sup", `One 0x022D1; "sup", `One 0x02283; "sup1", `One 0x000B9; "sup", `One 0x000B9; "sup2", `One 0x000B2; "sup", `One 0x000B2; "sup3", `One 0x000B3; "sup", `One 0x000B3; "supdot", `One 0x02ABE; "supdsub", `One 0x02AD8; "supE", `One 0x02AC6; "supe", `One 0x02287; "supedot", `One 0x02AC4; "Superset", `One 0x02283; "SupersetEqual", `One 0x02287; "suphsol", `One 0x027C9; "suphsub", `One 0x02AD7; "suplarr", `One 0x0297B; "supmult", `One 0x02AC2; "supnE", `One 0x02ACC; "supne", `One 0x0228B; "supplus", `One 0x02AC0; "Supset", `One 0x022D1; "supset", `One 0x02283; "supseteq", `One 0x02287; "supseteqq", `One 0x02AC6; "supsetneq", `One 0x0228B; "supsetneqq", `One 0x02ACC; "supsim", `One 0x02AC8; "supsub", `One 0x02AD4; "supsup", `One 0x02AD6; "swarhk", `One 0x02926; "swArr", `One 0x021D9; "swarr", `One 0x02199; "swarrow", `One 0x02199; "swnwar", `One 0x0292A; "szlig", `One 0x000DF; "szli", `One 0x000DF; "Tab", `One 0x00009; "target", `One 0x02316; "Tau", `One 0x003A4; "tau", `One 0x003C4; "tbrk", `One 0x023B4; "Tcaron", `One 0x00164; "tcaron", `One 0x00165; "Tcedil", `One 0x00162; "tcedil", `One 0x00163; "Tcy", `One 0x00422; "tcy", `One 0x00442; "tdot", `One 0x020DB; "telrec", `One 0x02315; "Tfr", `One 0x1D517; "tfr", `One 0x1D531; "there4", `One 0x02234; "Therefore", `One 0x02234; "therefore", `One 0x02234; "Theta", `One 0x00398; "theta", `One 0x003B8; "thetasym", `One 0x003D1; "thetav", `One 0x003D1; "thickapprox", `One 0x02248; "thicksim", `One 0x0223C; "ThickSpace", `Two (0x0205F, 0x0200A); "thinsp", `One 0x02009; "ThinSpace", `One 0x02009; "thkap", `One 0x02248; "thksim", `One 0x0223C; "THORN", `One 0x000DE; "THOR", `One 0x000DE; "thorn", `One 0x000FE; "thor", `One 0x000FE; "Tilde", `One 0x0223C; "tilde", `One 0x002DC; "TildeEqual", `One 0x02243; "TildeFullEqual", `One 0x02245; "TildeTilde", `One 0x02248; "times", `One 0x000D7; "time", `One 0x000D7; "timesb", `One 0x022A0; "timesbar", `One 0x02A31; "timesd", `One 0x02A30; "tint", `One 0x0222D; "toea", `One 0x02928; "top", `One 0x022A4; "topbot", `One 0x02336; "topcir", `One 0x02AF1; "Topf", `One 0x1D54B; "topf", `One 0x1D565; "topfork", `One 0x02ADA; "tosa", `One 0x02929; "tprime", `One 0x02034; "TRADE", `One 0x02122; "trade", `One 0x02122; "triangle", `One 0x025B5; "triangledown", `One 0x025BF; "triangleleft", `One 0x025C3; "trianglelefteq", `One 0x022B4; "triangleq", `One 0x0225C; "triangleright", `One 0x025B9; "trianglerighteq", `One 0x022B5; "tridot", `One 0x025EC; "trie", `One 0x0225C; "triminus", `One 0x02A3A; "TripleDot", `One 0x020DB; "triplus", `One 0x02A39; "trisb", `One 0x029CD; "tritime", `One 0x02A3B; "trpezium", `One 0x023E2; "Tscr", `One 0x1D4AF; "tscr", `One 0x1D4C9; "TScy", `One 0x00426; "tscy", `One 0x00446; "TSHcy", `One 0x0040B; "tshcy", `One 0x0045B; "Tstrok", `One 0x00166; "tstrok", `One 0x00167; "twixt", `One 0x0226C; "twoheadleftarrow", `One 0x0219E; "twoheadrightarrow", `One 0x021A0; "Uacute", `One 0x000DA; "Uacut", `One 0x000DA; "uacute", `One 0x000FA; "uacut", `One 0x000FA; "Uarr", `One 0x0219F; "uArr", `One 0x021D1; "uarr", `One 0x02191; "Uarrocir", `One 0x02949; "Ubrcy", `One 0x0040E; "ubrcy", `One 0x0045E; "Ubreve", `One 0x0016C; "ubreve", `One 0x0016D; "Ucirc", `One 0x000DB; "Ucir", `One 0x000DB; "ucirc", `One 0x000FB; "ucir", `One 0x000FB; "Ucy", `One 0x00423; "ucy", `One 0x00443; "udarr", `One 0x021C5; "Udblac", `One 0x00170; "udblac", `One 0x00171; "udhar", `One 0x0296E; "ufisht", `One 0x0297E; "Ufr", `One 0x1D518; "ufr", `One 0x1D532; "Ugrave", `One 0x000D9; "Ugrav", `One 0x000D9; "ugrave", `One 0x000F9; "ugrav", `One 0x000F9; "uHar", `One 0x02963; "uharl", `One 0x021BF; "uharr", `One 0x021BE; "uhblk", `One 0x02580; "ulcorn", `One 0x0231C; "ulcorner", `One 0x0231C; "ulcrop", `One 0x0230F; "ultri", `One 0x025F8; "Umacr", `One 0x0016A; "umacr", `One 0x0016B; "uml", `One 0x000A8; "um", `One 0x000A8; "UnderBar", `One 0x0005F; "UnderBrace", `One 0x023DF; "UnderBracket", `One 0x023B5; "UnderParenthesis", `One 0x023DD; "Union", `One 0x022C3; "UnionPlus", `One 0x0228E; "Uogon", `One 0x00172; "uogon", `One 0x00173; "Uopf", `One 0x1D54C; "uopf", `One 0x1D566; "UpArrow", `One 0x02191; "Uparrow", `One 0x021D1; "uparrow", `One 0x02191; "UpArrowBar", `One 0x02912; "UpArrowDownArrow", `One 0x021C5; "UpDownArrow", `One 0x02195; "Updownarrow", `One 0x021D5; "updownarrow", `One 0x02195; "UpEquilibrium", `One 0x0296E; "upharpoonleft", `One 0x021BF; "upharpoonright", `One 0x021BE; "uplus", `One 0x0228E; "UpperLeftArrow", `One 0x02196; "UpperRightArrow", `One 0x02197; "Upsi", `One 0x003D2; "upsi", `One 0x003C5; "upsih", `One 0x003D2; "Upsilon", `One 0x003A5; "upsilon", `One 0x003C5; "UpTee", `One 0x022A5; "UpTeeArrow", `One 0x021A5; "upuparrows", `One 0x021C8; "urcorn", `One 0x0231D; "urcorner", `One 0x0231D; "urcrop", `One 0x0230E; "Uring", `One 0x0016E; "uring", `One 0x0016F; "urtri", `One 0x025F9; "Uscr", `One 0x1D4B0; "uscr", `One 0x1D4CA; "utdot", `One 0x022F0; "Utilde", `One 0x00168; "utilde", `One 0x00169; "utri", `One 0x025B5; "utrif", `One 0x025B4; "uuarr", `One 0x021C8; "Uuml", `One 0x000DC; "Uum", `One 0x000DC; "uuml", `One 0x000FC; "uum", `One 0x000FC; "uwangle", `One 0x029A7; "vangrt", `One 0x0299C; "varepsilon", `One 0x003F5; "varkappa", `One 0x003F0; "varnothing", `One 0x02205; "varphi", `One 0x003D5; "varpi", `One 0x003D6; "varpropto", `One 0x0221D; "vArr", `One 0x021D5; "varr", `One 0x02195; "varrho", `One 0x003F1; "varsigma", `One 0x003C2; "varsubsetneq", `Two (0x0228A, 0x0FE00); "varsubsetneqq", `Two (0x02ACB, 0x0FE00); "varsupsetneq", `Two (0x0228B, 0x0FE00); "varsupsetneqq", `Two (0x02ACC, 0x0FE00); "vartheta", `One 0x003D1; "vartriangleleft", `One 0x022B2; "vartriangleright", `One 0x022B3; "Vbar", `One 0x02AEB; "vBar", `One 0x02AE8; "vBarv", `One 0x02AE9; "Vcy", `One 0x00412; "vcy", `One 0x00432; "VDash", `One 0x022AB; "Vdash", `One 0x022A9; "vDash", `One 0x022A8; "vdash", `One 0x022A2; "Vdashl", `One 0x02AE6; "Vee", `One 0x022C1; "vee", `One 0x02228; "veebar", `One 0x022BB; "veeeq", `One 0x0225A; "vellip", `One 0x022EE; "Verbar", `One 0x02016; "verbar", `One 0x0007C; "Vert", `One 0x02016; "vert", `One 0x0007C; "VerticalBar", `One 0x02223; "VerticalLine", `One 0x0007C; "VerticalSeparator", `One 0x02758; "VerticalTilde", `One 0x02240; "VeryThinSpace", `One 0x0200A; "Vfr", `One 0x1D519; "vfr", `One 0x1D533; "vltri", `One 0x022B2; "vnsub", `Two (0x02282, 0x020D2); "vnsup", `Two (0x02283, 0x020D2); "Vopf", `One 0x1D54D; "vopf", `One 0x1D567; "vprop", `One 0x0221D; "vrtri", `One 0x022B3; "Vscr", `One 0x1D4B1; "vscr", `One 0x1D4CB; "vsubnE", `Two (0x02ACB, 0x0FE00); "vsubne", `Two (0x0228A, 0x0FE00); "vsupnE", `Two (0x02ACC, 0x0FE00); "vsupne", `Two (0x0228B, 0x0FE00); "Vvdash", `One 0x022AA; "vzigzag", `One 0x0299A; "Wcirc", `One 0x00174; "wcirc", `One 0x00175; "wedbar", `One 0x02A5F; "Wedge", `One 0x022C0; "wedge", `One 0x02227; "wedgeq", `One 0x02259; "weierp", `One 0x02118; "Wfr", `One 0x1D51A; "wfr", `One 0x1D534; "Wopf", `One 0x1D54E; "wopf", `One 0x1D568; "wp", `One 0x02118; "wr", `One 0x02240; "wreath", `One 0x02240; "Wscr", `One 0x1D4B2; "wscr", `One 0x1D4CC; "xcap", `One 0x022C2; "xcirc", `One 0x025EF; "xcup", `One 0x022C3; "xdtri", `One 0x025BD; "Xfr", `One 0x1D51B; "xfr", `One 0x1D535; "xhArr", `One 0x027FA; "xharr", `One 0x027F7; "Xi", `One 0x0039E; "xi", `One 0x003BE; "xlArr", `One 0x027F8; "xlarr", `One 0x027F5; "xmap", `One 0x027FC; "xnis", `One 0x022FB; "xodot", `One 0x02A00; "Xopf", `One 0x1D54F; "xopf", `One 0x1D569; "xoplus", `One 0x02A01; "xotime", `One 0x02A02; "xrArr", `One 0x027F9; "xrarr", `One 0x027F6; "Xscr", `One 0x1D4B3; "xscr", `One 0x1D4CD; "xsqcup", `One 0x02A06; "xuplus", `One 0x02A04; "xutri", `One 0x025B3; "xvee", `One 0x022C1; "xwedge", `One 0x022C0; "Yacute", `One 0x000DD; "Yacut", `One 0x000DD; "yacute", `One 0x000FD; "yacut", `One 0x000FD; "YAcy", `One 0x0042F; "yacy", `One 0x0044F; "Ycirc", `One 0x00176; "ycirc", `One 0x00177; "Ycy", `One 0x0042B; "ycy", `One 0x0044B; "yen", `One 0x000A5; "ye", `One 0x000A5; "Yfr", `One 0x1D51C; "yfr", `One 0x1D536; "YIcy", `One 0x00407; "yicy", `One 0x00457; "Yopf", `One 0x1D550; "yopf", `One 0x1D56A; "Yscr", `One 0x1D4B4; "yscr", `One 0x1D4CE; "YUcy", `One 0x0042E; "yucy", `One 0x0044E; "Yuml", `One 0x00178; "yuml", `One 0x000FF; "yum", `One 0x000FF; "Zacute", `One 0x00179; "zacute", `One 0x0017A; "Zcaron", `One 0x0017D; "zcaron", `One 0x0017E; "Zcy", `One 0x00417; "zcy", `One 0x00437; "Zdot", `One 0x0017B; "zdot", `One 0x0017C; "zeetrf", `One 0x02128; "ZeroWidthSpace", `One 0x0200B; "Zeta", `One 0x00396; "zeta", `One 0x003B6; "Zfr", `One 0x02128; "zfr", `One 0x1D537; "ZHcy", `One 0x00416; "zhcy", `One 0x00436; "zigrarr", `One 0x021DD; "Zopf", `One 0x02124; "zopf", `One 0x1D56B; "Zscr", `One 0x1D4B5; "zscr", `One 0x1D4CF; "zwj", `One 0x0200D; "zwnj", `One 0x0200C |] markup.ml-1.0.3/src/error.ml000066400000000000000000000043201421357706400157030ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common type t = [ `Decoding_error of string * string | `Bad_token of string * string * string | `Unexpected_eoi of string | `Bad_document of string | `Unmatched_start_tag of string | `Unmatched_end_tag of string | `Bad_namespace of string | `Misnested_tag of string * string * (string * string) list | `Bad_content of string ] let explode_string s = let rec iterate index acc = if index >= String.length s then List.rev acc else iterate (index + 1) (s.[index]::acc) in iterate 0 [] let to_string ?location error = let fmt = Printf.sprintf in let message = match error with | `Decoding_error (bytes, encoding) -> begin match String.length bytes with | 0 -> fmt "bad bytes for encoding '%s'" encoding | 1 -> fmt "bad byte '0x%02X' for encoding '%s'" (Char.code bytes.[0]) encoding | _ -> fmt "bad bytes '%s' for encoding '%s'" (explode_string bytes |> List.map Char.code |> List.map (fmt "0x%02X") |> String.concat " ") encoding end | `Bad_token (s, production, reason) -> fmt "bad token '%s' in %s: %s" s production reason | `Unexpected_eoi in_ -> fmt "unexpected end of input in %s" in_ | `Bad_document reason -> fmt "bad document: %s" reason | `Unmatched_start_tag s -> fmt "unmatched start tag '%s'" s | `Unmatched_end_tag s -> fmt "unmatched end tag '%s'" s | `Bad_namespace s -> fmt "unknown namespace '%s'" s | `Misnested_tag (s, in_, _attributes) -> fmt "misnested tag: '%s' in '%s'" s in_ | `Bad_content s -> fmt "bad content in '%s'" s in match location with | None -> message | Some (line, column) -> fmt "line %i, column %i: %s" line column message type 'a handler = 'a -> t -> unit cps type parse_handler = location handler type write_handler = (signal * int) handler let ignore_errors _ _ _ resume = resume () let report_if report condition location detail throw k = if condition then report location (detail ()) throw k else k () markup.ml-1.0.3/src/html_parser.ml000066400000000000000000002674501421357706400171110ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common open Token_tag open Kstream (* Namespaces for pattern matching. *) type ns = [ `HTML | `MathML | `SVG | `Other of string ] type qname = ns * string module Ns : sig val to_string : ns -> string end = struct let to_string = function | `HTML -> html_ns | `MathML -> mathml_ns | `SVG -> svg_ns | `Other s -> s end (* Specialization of List.mem at qname list, to avoid polymorphic comparison. *) let list_mem_qname ((ns, tag) : qname) l = let rec loop = function | [] -> false | (ns', tag')::_ when ns' = ns && tag' = tag -> true | _::rest -> loop rest in loop l (* Elements. *) type element = {element_name : qname; location : location; is_html_integration_point : bool; suppress : bool; mutable buffering : bool; mutable is_open : bool; mutable attributes : (name * string) list; mutable end_location : location; mutable children : annotated_node list; mutable parent : element} and node = | Element of element | Text of string list | PI of string * string | Comment of string and annotated_node = location * node (* Element helpers. *) module Element : sig val create : ?is_html_integration_point:bool -> ?suppress:bool -> qname -> location -> element val dummy : element val is_special : qname -> bool val is_not_hidden : Token_tag.t -> bool end = struct let rec dummy = {element_name = `HTML, "dummy"; location = 1, 1; is_html_integration_point = false; suppress = true; buffering = false; is_open = false; attributes = []; end_location = 1, 1; children = []; parent = dummy} let create ?(is_html_integration_point = false) ?(suppress = false) name location = {element_name = name; location; is_html_integration_point; suppress; buffering = false; is_open = true; attributes = []; end_location = 1, 1; children = []; parent = dummy} let is_special name = list_mem_qname name [`HTML, "address"; `HTML, "applet"; `HTML, "area"; `HTML, "article"; `HTML, "aside"; `HTML, "base"; `HTML, "basefont"; `HTML, "bgsound"; `HTML, "blockquote"; `HTML, "body"; `HTML, "br"; `HTML, "button"; `HTML, "caption"; `HTML, "center"; `HTML, "col"; `HTML, "colgroup"; `HTML, "dd"; `HTML, "details"; `HTML, "dir"; `HTML, "div"; `HTML, "dl"; `HTML, "dt"; `HTML, "embed"; `HTML, "fieldset"; `HTML, "figcaption"; `HTML, "figure"; `HTML, "footer"; `HTML, "form"; `HTML, "frame"; `HTML, "frameset"; `HTML, "h1"; `HTML, "h2"; `HTML, "h3"; `HTML, "h4"; `HTML, "h5"; `HTML, "h6"; `HTML, "head"; `HTML, "header"; `HTML, "hgroup"; `HTML, "hr"; `HTML, "html"; `HTML, "iframe"; `HTML, "img"; `HTML, "input"; `HTML, "isindex"; `HTML, "li"; `HTML, "link"; `HTML, "listing"; `HTML, "main"; `HTML, "marquee"; `HTML, "meta"; `HTML, "nav"; `HTML, "noembed"; `HTML, "noframes"; `HTML, "noscript"; `HTML, "object"; `HTML, "ol"; `HTML, "p"; `HTML, "param"; `HTML, "plaintext"; `HTML, "pre"; `HTML, "script"; `HTML, "section"; `HTML, "select"; `HTML, "source"; `HTML, "style"; `HTML, "summary"; `HTML, "table"; `HTML, "tbody"; `HTML, "td"; `HTML, "template"; `HTML, "textarea"; `HTML, "tfoot"; `HTML, "th"; `HTML, "thead"; `HTML, "title"; `HTML, "tr"; `HTML, "track"; `HTML, "ul"; `HTML, "wbr"; `HTML, "xmp"; `MathML, "mi"; `MathML, "mo"; `MathML, "mn"; `MathML, "ms"; `MathML, "mtext"; `MathML, "annotation-xml"; `SVG, "foreignObject"; `SVG, "desc"; `SVG, "title"] let is_not_hidden tag = tag.Token_tag.attributes |> List.exists (fun (name, value) -> name = "type" && value <> "hidden") end (* Context detection. *) type simple_context = [ `Document | `Fragment of string ] type context = [ `Document | `Fragment of qname ] module Context : sig type t val uninitialized : unit -> t val initialize : (location * Html_tokenizer.token) Kstream.t -> [< simple_context ] option -> t -> unit cps val the_context : t -> context val element : t -> element option val token : t -> string option end = struct let detect tokens throw k = let tokens, restore = checkpoint tokens in let last_name = ref None in let next_token k = next_expected tokens throw (fun token -> begin match token with | _, `Start {name} -> last_name := Some name | _ -> () end; k token) in let k context = restore (); k (context, !last_name) in let rec scan () = next_token begin function | _, `Doctype _ -> k `Document | _, `Char c when not @@ is_whitespace c -> k (`Fragment "body") | _, `Char _ -> scan () | _, `EOF -> k (`Fragment "body") | _, `Start {name = "html"} -> k `Document | _, `Start {name = "head" | "body" | "frameset"} -> k (`Fragment "html") | _, `Start {name = "base" | "basefont" | "bgsound" | "link" | "meta" | "noframes" | "style" | "template" | "title"} -> k (`Fragment "head") | _, `Start {name = "frame"} -> k (`Fragment "frameset") | _, `Start {name = "li"} -> k (`Fragment "ul") | _, `Start {name = "caption" | "col" | "colgroup" | "tbody" | "tfoot" | "thead"} -> k (`Fragment "table") | _, `Start {name = "tr"} -> k (`Fragment "tbody") | _, `Start {name = "td" | "th"} -> k (`Fragment "tr") | _, `Start {name = "optgroup" | "option"} -> k (`Fragment "select") | _, `Start {name = "altglyph" | "altglyphdef" | "altglyphitem" | "animate" | "animatecolor" | "animatemotion" | "animatetransform" | "circle" | "clippath" | "color-profile" | "cursor" | "defs" | "desc" | "ellipse" | "feblend" | "fecolormatrix" | "fecomponenttransfer" | "fecomposite" | "fediffuselighting" | "fedisplacementmap" | "fedistantlight" | "feflood" | "fefunca" | "fefuncb" | "fefuncg" | "fefuncr" | "fegaussianblur" | "feimage" | "femerge" | "femergenode" | "femorphology" | "feoffset" | "fepointlight" | "fespecularlighting" | "fespotlight" | "fetile" | "feturbulence" | "filter" | "font-face" | "font-face-format" | "font-face-name" | "font-face-src" | "font-face-uri" | "foreignobject" | "g" | "glyph" | "glyphref" | "hkern" | "image" | "line" | "lineargradient" | "marker" | "mask" | "metadata" | "missing-glyph" | "mpath" | "path" | "pattern" | "polygon" | "polyline" | "radialgradient" | "rect" | "set" | "stop" | "switch" | "symbol" | "text" | "textpath" | "tref" | "tspan" | "use"} -> k (`Fragment "svg") | _, `Start {name = "maction" | "maligngroup" | "malignmark" | "menclose" | "merror" | "mfenced" | "mfrac" | "mglyph" | "mi" | "mlabeledtr" | "mlongdiv" | "mmultiscripts" | "mn" | "mo" | "mover" | "mpadded" | "mphantom" | "mroot" | "mrow" | "ms" | "mscarries" | "mscarry" | "msgroup" | "msline" | "mspace" | "msqrt" | "msrow" | "mstack" | "mstyle" | "msub" | "msup" | "msubsup" | "mtable" | "mtd" | "mtext" | "mtr" | "munder" | "munderover" | "semantics" | "annotation" | "annotation-xml"} -> k (`Fragment "math") | _, `Start _ -> k (`Fragment "body") | _, (`End _ | `Comment _) -> scan () end in scan () type t = (context * element option * string option) ref let uninitialized () = ref (`Document, None, None) let initialize tokens requested_context state throw k = (fun k -> match requested_context with | Some (`Fragment element) -> (* HTML element names are case-insensitive, even in foreign content. Lowercase the element name given by the user before analysis by the parser, to match this convention. [String.lowercase] is acceptable here because the API assumes the string [element] is in UTF-8. *) k (`Fragment (String.lowercase_ascii element), None) | Some (`Document as c) -> k (c, None) | None -> detect tokens throw k) (fun (detected_context, deciding_token) -> let context = match detected_context with | `Document -> `Document | `Fragment "math" -> `Fragment (`MathML, "math") | `Fragment "svg" -> `Fragment (`SVG, "svg") | `Fragment name -> `Fragment (`HTML, name) in let context_element = match context with | `Document -> None | `Fragment name -> let is_html_integration_point = match name with | `SVG, ("foreignObject" | "desc" | "title") -> true | _ -> false in Some (Element.create ~is_html_integration_point ~suppress:true name (1, 1)) in state := context, context_element, deciding_token; k ()) let the_context {contents = (c, _, _)} = c let element {contents = (_, e, _)} = e let token {contents = (_, _, t)} = t end (* Heplers for foreign content. *) module Foreign : sig val is_mathml_text_integration_point : qname -> bool val is_html_integration_point : ns -> string -> (string * string) list -> bool val adjust_mathml_attributes : ((string * string) * string) list -> ((string * string) * string) list val adjust_svg_attributes : ((string * string) * string) list -> ((string * string) * string) list val adjust_svg_tag_name : string -> string end = struct let is_mathml_text_integration_point qname = list_mem_qname qname [`MathML, "mi"; `MathML, "mo"; `MathML, "mn"; `MathML, "ms"; `MathML, "mtext"] let is_html_integration_point namespace tag_name attributes = match namespace with | `HTML | `Other _ -> false | `MathML -> tag_name = "annotation-xml" && attributes |> List.exists (function | "encoding", "text/html" -> true | "encoding", "application/xhtml+xml" -> true | _ -> false) | `SVG -> list_mem_string tag_name ["foreignObject"; "desc"; "title"] let adjust_mathml_attributes attributes = attributes |> List.map (fun ((ns, name), value) -> let name = if ns = mathml_ns && name = "definitionurl" then "definitionURL" else name in (ns, name), value) let adjust_svg_attributes attributes = attributes |> List.map (fun ((ns, name), value) -> if ns <> svg_ns then (ns, name), value else let name = match name with | "attributename" -> "attributeName" | "attributetype" -> "attributeType" | "basefrequency" -> "baseFrequency" | "baseprofile" -> "baseProfile" | "calcmode" -> "calcMode" | "clippathunits" -> "clipPathUnits" | "contentscripttype" -> "contentScriptType" | "contentstyletype" -> "contentStyleType" | "diffuseconstant" -> "diffuseConstant" | "edgemode" -> "edgeMode" | "externalresourcesrequired" -> "externalResourcesRequired" | "filterres" -> "filterRes" | "filterunits" -> "filterUnits" | "glyphref" -> "glyphRef" | "gradienttransform" -> "gradientTransform" | "gradientunits" -> "gradientUnits" | "kernelmatrix" -> "kernelMatrix" | "kernelunitlength" -> "kernelUnitLength" | "keypoints" -> "keyPoints" | "keysplines" -> "keySplines" | "keytimes" -> "keyTimes" | "lengthadjust" -> "lengthAdjust" | "limitingconeangle" -> "limitingConeAngle" | "markerheight" -> "markerHeight" | "markerunits" -> "markerUnits" | "markerwidth" -> "markerWidth" | "maskcontentunits" -> "maskContentUnits" | "maskunits" -> "maskUnits" | "numoctaves" -> "numOctaves" | "pathlength" -> "pathLength" | "patterncontentunits" -> "patternContentUnits" | "patterntransform" -> "patternTransform" | "patternunits" -> "patternUnits" | "pointsatx" -> "pointsAtX" | "pointsaty" -> "pointsAtY" | "pointsatz" -> "pointsAtZ" | "preservealpha" -> "preserveAlpha" | "preserveaspectratio" -> "preserveAspectRatio" | "primitiveunits" -> "primitiveUnits" | "refx" -> "refX" | "refy" -> "refY" | "repeatcount" -> "repeatCount" | "repeatdur" -> "repeatDur" | "requiredextensions" -> "requiredExtensions" | "requiredfeatures" -> "requiredFeatures" | "specularconstant" -> "specularConstant" | "specularexponent" -> "specularExponent" | "spreadmethod" -> "spreadMethod" | "startoffset" -> "startOffset" | "stddeviation" -> "stdDeviation" | "stitchtiles" -> "stitchTiles" | "surfacescale" -> "surfaceScale" | "systemlanguage" -> "systemLanguage" | "tablevalues" -> "tableValues" | "targetx" -> "targetX" | "targety" -> "targetY" | "textlength" -> "textLength" | "viewbox" -> "viewBox" | "viewtarget" -> "viewTarget" | "xchannelselector" -> "xChannelSelector" | "ychannelselector" -> "yChannelSelector" | "zoomandpan" -> "zoomAndPan" | _ -> name in (ns, name), value) let adjust_svg_tag_name = function | "altglyph" -> "altGlyph" | "altglyphdef" -> "altGlyphDef" | "altglyphitem" -> "altGlyphItem" | "animatecolor" -> "animateColor" | "animatemotion" -> "animateMotion" | "animatetransform" -> "animateTransform" | "clippath" -> "clipPath" | "feblend" -> "feBlend" | "fecolormatrix" -> "feColorMatrix" | "fecomponenttransfer" -> "feComponentTransfer" | "fecomposite" -> "feComposite" | "feconvolvematrix" -> "feConvolveMatrix" | "fediffuselighting" -> "feDiffuseLighting" | "fedisplacementmap" -> "feDisplacementMap" | "fedistantlight" -> "feDistantLight" | "fedropshadow" -> "feDropShadow" | "feflood" -> "feFlood" | "fefunca" -> "feFuncA" | "fefuncb" -> "feFuncB" | "fefuncg" -> "feFuncG" | "fefuncr" -> "feFuncR" | "fegaussianblur" -> "feGaussianBlur" | "feimage" -> "feImage" | "femerge" -> "feMerge" | "femergenode" -> "feMergeNode" | "femorphology" -> "feMorphology" | "feoffset" -> "feOffset" | "fepointlight" -> "fePointLight" | "fespecularlighting" -> "feSpecularLighting" | "fespotlight" -> "feSpotLight" | "fetile" -> "feTile" | "feturbulence" -> "feTurbulence" | "foreignobject" -> "foreignObject" | "glyphref" -> "glyphRef" | "lineargradient" -> "linearGradient" | "radialgradient" -> "radialGradient" | "textpath" -> "textPath" | s -> s end (* Stack of open elements. *) module Stack : sig type t = element list ref val create : unit -> t val current_element : t -> element option val require_current_element : t -> element val adjusted_current_element : Context.t -> t -> element option val current_element_is : t -> string list -> bool val current_element_is_foreign : Context.t -> t -> bool val has : t -> string -> bool val in_scope : t -> string -> bool val in_button_scope : t -> string -> bool val in_list_item_scope : t -> string -> bool val in_table_scope : t -> string -> bool val in_select_scope : t -> string -> bool val one_in_scope : t -> string list -> bool val one_in_table_scope : t -> string list -> bool val target_in_scope : t -> element -> bool val remove : t -> element -> unit val replace : t -> old:element -> new_:element -> unit val insert_below : t -> anchor:element -> new_:element -> unit end = struct type t = element list ref let create () = ref [] let current_element open_elements = match !open_elements with | [] -> None | element::_ -> Some element let require_current_element open_elements = match current_element open_elements with | None -> failwith "require_current_element: None" | Some element -> element let adjusted_current_element context open_elements = match !open_elements, Context.element context with | [_], Some element -> Some element | [], _ -> None | element::_, _ -> Some element let current_element_is open_elements names = match !open_elements with | {element_name = `HTML, name}::_ -> list_mem_string name names | _ -> false let current_element_is_foreign context open_elements = match adjusted_current_element context open_elements with | Some {element_name = ns, _} when ns <> `HTML -> true | _ -> false let has open_elements name = List.exists (fun {element_name = ns, name'} -> ns = `HTML && name' = name) !open_elements let in_scope_general scope_delimiters open_elements name' = let rec scan = function | [] -> false | {element_name = ns, name'' as name}::more -> if ns = `HTML && name'' = name' then true else if list_mem_qname name scope_delimiters then false else scan more in scan !open_elements let scope_delimiters = [`HTML, "applet"; `HTML, "caption"; `HTML, "html"; `HTML, "table"; `HTML, "td"; `HTML, "th"; `HTML, "marquee"; `HTML, "object"; `HTML, "template"; `MathML, "mi"; `MathML, "mo"; `MathML, "mn"; `MathML, "ms"; `MathML, "mtext"; `MathML, "annotation-xml"; `SVG, "foreignObject"; `SVG, "desc"; `SVG, "title"] let in_scope = in_scope_general scope_delimiters let in_button_scope = in_scope_general ((`HTML, "button")::scope_delimiters) let in_list_item_scope = in_scope_general ((`HTML, "ol")::(`HTML, "ul")::scope_delimiters) let in_table_scope = in_scope_general [`HTML, "html"; `HTML, "table"; `HTML, "template"] let in_select_scope open_elements name = let rec scan = function | [] -> false | {element_name = ns, name'}::more -> if ns <> `HTML then false else if name' = name then true else if name' = "optgroup" || name' = "option" then scan more else false in scan !open_elements let one_in_scope open_elements names = let rec scan = function | [] -> false | {element_name = ns, name' as name}::more -> if ns = `HTML && list_mem_string name' names then true else if list_mem_qname name scope_delimiters then false else scan more in scan !open_elements let one_in_table_scope open_elements names = let rec scan = function | [] -> false | {element_name = ns, name' as name}::more -> if ns = `HTML && list_mem_string name' names then true else if list_mem_qname name [`HTML, "html"; `HTML, "table"; `HTML, "template"] then false else scan more in scan !open_elements let target_in_scope open_elements node = let rec scan = function | [] -> false | e::more -> if e == node then true else if list_mem_qname node.element_name scope_delimiters then false else scan more in scan !open_elements let remove open_elements element = open_elements := List.filter ((!=) element) !open_elements; element.is_open <- false let replace open_elements ~old ~new_ = open_elements := List.map (fun e -> if e == old then (e.is_open <- false; new_) else e) !open_elements let insert_below open_elements ~anchor ~new_ = let rec insert prefix = function | [] -> List.rev prefix | e::more when e == anchor -> (List.rev prefix) @ (new_::e::more) | e::more -> insert (e::prefix) more in open_elements := insert [] !open_elements end (* List of active formatting elements. *) module Active : sig type entry = | Marker | Element_ of element * location * Token_tag.t type t = entry list ref val create : unit -> t val add_marker : t -> unit val clear_until_marker : t -> unit val has : t -> element -> bool val remove : t -> element -> unit val replace : t -> old:element -> new_:element -> unit val insert_after : t -> anchor:element -> new_:element -> unit val has_before_marker : t -> string -> element option end = struct type entry = | Marker | Element_ of element * location * Token_tag.t type t = entry list ref let create () = ref [] let add_marker active_formatting_elements = active_formatting_elements := Marker::!active_formatting_elements let clear_until_marker active_formatting_elements = let rec iterate = function | Marker::rest -> rest | (Element_ _)::rest -> iterate rest | [] -> [] in active_formatting_elements := iterate !active_formatting_elements let has active_formatting_elements element = !active_formatting_elements |> List.exists (function | Element_ (e, _, _) when e == element -> true | _ -> false) let remove active_formatting_elements element = active_formatting_elements := !active_formatting_elements |> List.filter (function | Element_ (e, _, _) when e == element -> false | _ -> true) let replace active_formatting_elements ~old ~new_ = active_formatting_elements := !active_formatting_elements |> List.map (function | Element_ (e, l, t) when e == old -> Element_ (new_, l, t) | e -> e) let insert_after active_formatting_elements ~anchor ~new_ = let rec insert prefix = function | [] -> List.rev prefix | (Element_ (e, l, t) as v)::more when e == anchor -> let new_entry = Element_ (new_, l, t) in (List.rev prefix) @ (v::new_entry::more) | v::more -> insert (v::prefix) more in active_formatting_elements := insert [] !active_formatting_elements let has_before_marker active_formatting_elements name = let rec scan = function | [] | Marker::_ -> None | Element_ (n, _, _)::_ when n.element_name = (`HTML, name) -> Some n | _::more -> scan more in scan !active_formatting_elements end type mode = unit -> unit (* Stack of template insertion modes. *) module Template : sig type t = mode list ref val create : unit -> t val push : t -> mode -> unit val pop : t -> unit end = struct type t = (unit -> unit) list ref let create () = ref [] let push template_insertion_modes mode = template_insertion_modes := mode::!template_insertion_modes let pop template_insertion_modes = match !template_insertion_modes with | [] -> () | _::rest -> template_insertion_modes := rest end (* Subtree buffers. HTML specifies the "adoption agency algorithm" for recovering from certain kinds of errors. This algorithm is (apparently) incompatible with streaming parsers that do not maintain a DOM - such as Markup.ml. So, when the Markup.ml parser encounters a situation in which it may be necessary to later run the adoption agency algorithm, it buffers its signal output. Instead of being emitted, the signals are used to construct a DOM subtree. If the algorithm is run, it is run on this subtree. Whenever the parser can "prove" that the subtree can no longer be involved in the adoption agency algorithm, it serializes the subtree into the signal stream. In practice, this means that buffering begins when a formatting element is encountered, and ends when the parent of the formatting element is popped off the open element stack. *) module Subtree : sig type t val create : Stack.t -> t val accumulate : t -> location -> signal -> bool val enable : t -> unit val disable : t -> (location * signal) list val adoption_agency_algorithm : t -> Active.t -> location -> string -> bool * (location * Error.t) list end = struct type t = {open_elements : Stack.t; mutable enabled : bool; mutable position : element} let create open_elements = {open_elements; enabled = false; position = Element.dummy} let accumulate subtree_buffer l s = if not subtree_buffer.enabled then true else begin begin match s with | `Start_element (_, attributes) -> let parent = subtree_buffer.position in let child = Stack.require_current_element subtree_buffer.open_elements in child.attributes <- attributes; child.parent <- parent; parent.children <- (l, Element child)::parent.children; subtree_buffer.position <- child | `End_element -> subtree_buffer.position.end_location <- l; subtree_buffer.position <- Stack.require_current_element subtree_buffer.open_elements | `Text ss -> subtree_buffer.position.children <- (l, Text ss)::subtree_buffer.position.children | `PI (t, s) -> subtree_buffer.position.children <- (l, PI (t, s))::subtree_buffer.position.children | `Comment s -> subtree_buffer.position.children <- (l, Comment s)::subtree_buffer.position.children | `Xml _ | `Doctype _ -> () end; false end let enable subtree_buffer = if subtree_buffer.enabled then () else match Stack.current_element subtree_buffer.open_elements with | None -> () | Some element -> element.buffering <- true; subtree_buffer.position <- element; subtree_buffer.enabled <- true let disable subtree_buffer = let rec traverse acc = function | l, Element {element_name; attributes; end_location; children} -> let name = Ns.to_string (fst element_name), snd element_name in let start_signal = l, `Start_element (name, attributes) in let end_signal = end_location, `End_element in start_signal::(List.fold_left traverse (end_signal::acc) children) | l, Text ss -> begin match acc with | (_, `Text ss')::rest -> (l, `Text (ss @ ss'))::rest | _ -> (l, `Text ss)::acc end | l, PI (t, s) -> (l, `PI (t, s))::acc | l, Comment s -> (l, `Comment s)::acc in let result = List.fold_left traverse [] (Stack.require_current_element subtree_buffer.open_elements).children in subtree_buffer.enabled <- false; result (* Part of 8.2.5.4.7. *) let adoption_agency_algorithm subtree_buffer active_formatting_elements l subject = let open_elements = subtree_buffer.open_elements in let above_removed_nodes = ref [] in let rec above_in_stack node = function | e::e'::_ when e == node -> e' | _::more -> above_in_stack node more | [] -> failwith "above_in_stack: not found" in let above_node node = if node.is_open then above_in_stack node !open_elements else try List.find (fun (e, _) -> e == node) !above_removed_nodes |> snd with Not_found -> failwith "above_node: not found" in let remove_node node = above_removed_nodes := (node, above_in_stack node !open_elements)::!above_removed_nodes; Stack.remove open_elements node in let reparent node new_parent = let old_parent = node.parent in let entry, filtered_children = let rec remove prefix = function | (_, Element e as entry)::rest when e == node -> entry, (List.rev prefix) @ rest | e::rest -> remove (e::prefix) rest | [] -> (node.location, Element node), old_parent.children in remove [] old_parent.children in old_parent.children <- filtered_children; new_parent.children <- entry::new_parent.children; node.parent <- new_parent in let inner_loop formatting_element furthest_block = let rec repeat inner_loop_counter node last_node bookmark = let node = above_node node in if node == formatting_element then last_node, bookmark else begin if inner_loop_counter > 3 then Active.remove active_formatting_elements node; if not @@ Active.has active_formatting_elements node then begin remove_node node; repeat (inner_loop_counter + 1) node last_node bookmark end else begin let new_node = {node with is_open = true; children = []; parent = Element.dummy} in node.end_location <- l; Stack.replace open_elements ~old:node ~new_:new_node; Active.replace active_formatting_elements ~old:node ~new_:new_node; reparent last_node new_node; repeat (inner_loop_counter + 1) new_node new_node (if last_node == furthest_block then Some new_node else bookmark) end end in repeat 1 furthest_block furthest_block None in let find_formatting_element () = let rec scan = function | [] -> None | Active.Marker::_ -> None | (Active.Element_ ({element_name = `HTML, n} as e, _, _))::_ when n = subject -> Some e | _::rest -> scan rest in scan !active_formatting_elements in let find_furthest_block formatting_element = let rec scan furthest = function | [] -> furthest | e::_ when e == formatting_element -> furthest | e::more when Element.is_special e.element_name -> scan (Some e) more | _::more -> scan furthest more in scan None !open_elements in let pop_to_formatting_element formatting_element = let rec pop () = match !open_elements with | [] -> () | e::more -> open_elements := more; e.is_open <- false; e.end_location <- l; if e != formatting_element then pop () in pop (); subtree_buffer.position <- Stack.require_current_element open_elements in let rec outer_loop outer_loop_counter errors = let outer_loop_counter = outer_loop_counter + 1 in if outer_loop_counter >= 8 then true, List.rev errors else begin match find_formatting_element () with | None -> false, List.rev errors | Some formatting_element -> if not formatting_element.is_open then begin Active.remove active_formatting_elements formatting_element; true, List.rev ((l, `Unmatched_end_tag subject)::errors) end else begin if not @@ Stack.target_in_scope open_elements formatting_element then begin true, List.rev ((l, `Unmatched_end_tag subject)::errors) end else begin let errors = if Stack.require_current_element open_elements == formatting_element then errors else (l, `Unmatched_end_tag subject)::errors in match find_furthest_block formatting_element with | None -> pop_to_formatting_element formatting_element; Active.remove active_formatting_elements formatting_element; true, List.rev errors | Some furthest_block -> formatting_element.end_location <- l; let common_ancestor = above_in_stack formatting_element !open_elements in let last_node, bookmark = inner_loop formatting_element furthest_block in reparent last_node common_ancestor; let new_node = {formatting_element with is_open = true; children = []; parent = Element.dummy} in new_node.children <- furthest_block.children; furthest_block.children <- []; new_node.children |> List.iter (function | _, Element child -> child.parent <- new_node | _ -> ()); reparent new_node furthest_block; begin match bookmark with | None -> Active.replace active_formatting_elements ~old:formatting_element ~new_:new_node | Some node -> Active.remove active_formatting_elements formatting_element; Active.insert_after active_formatting_elements ~anchor:node ~new_:new_node end; Stack.remove open_elements formatting_element; Stack.insert_below open_elements ~anchor:furthest_block ~new_:new_node; outer_loop outer_loop_counter errors end end end in let current_node = Stack.require_current_element open_elements in if current_node.element_name = (`HTML, subject) then begin open_elements := List.tl !open_elements; current_node.is_open <- false; current_node.end_location <- l; subtree_buffer.position <- Stack.require_current_element open_elements; Active.remove active_formatting_elements current_node; true, [] end else outer_loop 0 [] end let parse requested_context report (tokens, set_tokenizer_state, set_foreign) = let context = Context.uninitialized () in let throw = ref (fun _ -> ()) in let ended = ref (fun _ -> ()) in let output = ref (fun _ -> ()) in let report_if = Error.report_if report in let unmatched_end_tag l name k = report l (`Unmatched_end_tag name) !throw k in let misnested_tag l t context_name k = report l (`Misnested_tag (t.name, context_name, t.Token_tag.attributes)) !throw k in let open_elements = Stack.create () in let active_formatting_elements = Active.create () in let subtree_buffer = Subtree.create open_elements in let text = Text.prepare () in let template_insertion_modes = Template.create () in let frameset_ok = ref true in let head_seen = ref false in let form_element_pointer = ref None in let add_character = Text.add text in set_foreign (fun () -> Stack.current_element_is_foreign context open_elements); let report_if_stack_has_other_than names k = let rec iterate = function | [] -> k () | {element_name = ns, name; location}::more -> report_if (not (ns = `HTML && list_mem_string name names)) location (fun () -> `Unmatched_start_tag name) !throw (fun () -> iterate more) in iterate !open_elements in let rec current_mode = ref initial_mode and constructor throw_ k = Context.initialize tokens requested_context context throw_ (fun () -> let initial_tokenizer_state = match Context.the_context context with | `Fragment (`HTML, ("title" | "textarea")) -> `RCDATA | `Fragment (`HTML, ("style" | "xmp" | "iframe" | "noembed" | "noframes")) -> `RAWTEXT | `Fragment (`HTML, "script") -> `Script_data | `Fragment (`HTML, "plaintext") -> `PLAINTEXT | _ -> `Data in set_tokenizer_state initial_tokenizer_state; begin match Context.the_context context with | `Document -> () | `Fragment _ -> let notional_root = Element.create ~suppress:true (`HTML, "html") (1, 1) in open_elements := [notional_root] end; begin match Context.the_context context with | `Fragment (`HTML, "template") -> Template.push template_insertion_modes in_template_mode | _ -> () end; (* The following is a deviation from conformance. The goal is to avoid insertion of a element into a fragment beginning with a or element. *) begin match Context.token context with | Some ("body" | "frameset") -> head_seen := true | _ -> () end; current_mode := begin match Context.the_context context with | `Fragment _ -> reset_mode () | `Document -> initial_mode end; (fun throw_ e k -> throw := throw_; ended := e; output := k; !current_mode ()) |> make |> k) (* 8.2.3.1. *) and reset_mode () = let rec iterate last = function | [e] when not last && Context.the_context context <> `Document -> begin match Context.the_context context with | `Document -> assert false | `Fragment name -> iterate true [{e with element_name = name}] end | {element_name = _, "select"}::ancestors -> let rec iterate' = function | [] -> in_select_mode | {element_name = _, "template"}::_ -> in_select_mode | {element_name = _, "table"}::_ -> in_select_in_table_mode | _::ancestors -> iterate' ancestors in iterate' ancestors | {element_name = _, ("tr" | "th")}::_::_ -> in_cell_mode | {element_name = _, "tr"}::_ -> in_row_mode | {element_name = _, ("tbody" | "thead" | "tfoot")}::_ -> in_table_body_mode | {element_name = _, "caption"}::_ -> in_caption_mode | {element_name = _, "colgroup"}::_ -> in_column_group_mode | {element_name = _, "table"}::_ -> in_table_mode | {element_name = _, "template"}::_ -> begin match !template_insertion_modes with | [] -> initial_mode (* This is an internal error, actually. *) | mode::_ -> mode end (* The next case corresponds to item 12 of "Resetting the insertion mode appropriately." It is commented out as deliberate deviation from the specification, because that makes parsing of fragments intended for elements more intuitive. For conformance, the pattern in the following case would have to end with ::_::_, not ::_. *) (* | [{element_name = _, "head"}] -> in_body_mode *) | {element_name = _, "head"}::_ -> in_head_mode | {element_name = _, "body"}::_ -> in_body_mode | {element_name = _, "frameset"}::_ -> in_frameset_mode | {element_name = _, "html"}::_ -> if !head_seen then after_head_mode else before_head_mode | _::rest -> iterate last rest | [] -> in_body_mode in iterate false !open_elements and emit' l s m = if Subtree.accumulate subtree_buffer l s then begin current_mode := m; !output (l, s) end else m () and emit_list ss m = match ss with | [] -> m () | (l, s)::more -> emit' l s (fun () -> emit_list more m) and emit_text m = match Text.emit text with | None -> m () | Some (l', strings) -> emit' l' (`Text strings) m and emit l s m = emit_text (fun () -> emit' l s m) and push_and_emit ?(formatting = false) ?(acknowledge = false) ?(namespace = `HTML) ?(set_form_element_pointer = false) location ({Token_tag.name; attributes; self_closing} as tag) mode = report_if (self_closing && not acknowledge) location (fun () -> `Bad_token ("/>", "tag", "should not be self-closing")) !throw (fun () -> let namespace_string = Ns.to_string namespace in let tag_name = match namespace with | `SVG -> Foreign.adjust_svg_tag_name name | _ -> name in let is_html_integration_point = Foreign.is_html_integration_point namespace tag_name attributes in let attributes = List.map (fun (n, v) -> Namespace.Parsing.parse n, v) attributes in let attributes = match namespace with | `HTML | `Other _ -> attributes | `MathML -> Foreign.adjust_mathml_attributes attributes | `SVG -> Foreign.adjust_svg_attributes attributes in let element_entry = Element.create ~is_html_integration_point (namespace, name) location in open_elements := element_entry::!open_elements; if set_form_element_pointer then form_element_pointer := Some element_entry; if formatting then active_formatting_elements := Active.Element_ (element_entry, location, tag):: !active_formatting_elements; emit location (`Start_element ((namespace_string, tag_name), attributes)) mode) and push_implicit location name mode = push_and_emit location {Token_tag.name = name; attributes = []; self_closing = false} mode and pop location mode = match !open_elements with | [] -> mode () | element::more -> emit_text (fun () -> (fun k -> if not element.buffering then k () else emit_list (Subtree.disable subtree_buffer) k) (fun () -> open_elements := more; element.is_open <- false; if element.suppress then mode () else emit' location `End_element mode)) and pop_until condition location mode = let rec iterate () = match !open_elements with | [] -> mode () | element::_ -> if condition element then mode () else pop location iterate in iterate () and close_element ?(ns = `HTML) l name mode = pop_until (fun {element_name = ns', name'} -> ns' = ns && name' = name) l (fun () -> pop l mode) and pop_until_and_raise_errors names location mode = let rec iterate () = match !open_elements with | [] -> mode () | {element_name = ns, name}::_ -> if ns = `HTML && list_mem_string name names then pop location mode else report location (`Unmatched_start_tag name) !throw (fun () -> pop location iterate) in iterate () and pop_implied ?(except = "") location mode = pop_until (fun {element_name = _, name} -> name = except || not @@ list_mem_string name ["dd"; "dt"; "li"; "option"; "optgroup"; "p"; "rb"; "rp"; "rt"; "rtc"]) location mode and pop_to_table_context location mode = pop_until (function | {element_name = `HTML, ("table" | "template" | "html")} -> true | _ -> false) location mode and pop_to_table_body_context location mode = pop_until (function | {element_name = `HTML, ("tbody" | "thead" | "tfoot" | "template" | "html")} -> true | _ -> false) location mode and pop_to_table_row_context location mode = pop_until (function | {element_name = `HTML, ("tr" | "template" | "html")} -> true | _ -> false) location mode and close_element_with_implied name location mode = pop_implied ~except:name location (fun () -> let check_element k = match Stack.current_element open_elements with | Some {element_name = `HTML, name'} when name' = name -> k () | Some {element_name = _, name; location} -> report location (`Unmatched_start_tag name) !throw k | None -> unmatched_end_tag location name k in check_element (fun () -> close_element location name mode)) and close_cell location mode = pop_implied location (fun () -> (fun mode -> match Stack.current_element open_elements with | Some {element_name = `HTML, ("td" | "th")} -> mode () | Some {element_name = _, name} -> unmatched_end_tag location name mode | None -> unmatched_end_tag location "" mode) @@ (fun () -> pop_until (function | {element_name = `HTML, ("td" | "th")} -> true | _ -> false) location (fun () -> pop location mode))) and close_current_p_element l mode = if Stack.in_button_scope open_elements "p" then close_element_with_implied "p" l mode else mode () and close_preceding_tag names l mode = let rec scan = function | [] -> mode () | {element_name = (ns, name) as name'}::more -> if ns = `HTML && list_mem_string name names then close_element_with_implied name l mode else if Element.is_special name' && not @@ list_mem_qname name' [`HTML, "address"; `HTML, "div"; `HTML, "p"] then mode () else scan more in scan !open_elements and emit_end l = pop_until (fun _ -> false) l (fun () -> emit_text (fun () -> !ended ())) and reconstruct_active_formatting_elements mode = let rec get_prefix prefix = function | [] -> prefix, [] | Active.Marker::_ as l -> prefix, l | Active.Element_ ({is_open = true}, _, _)::_ as l -> prefix, l | Active.Element_ ({is_open = false}, l, tag)::more -> get_prefix ((l, tag)::prefix) more in let to_reopen, remainder = get_prefix [] !active_formatting_elements in active_formatting_elements := remainder; begin match to_reopen with | [] -> () | _::_ -> Subtree.enable subtree_buffer end; let rec reopen = function | [] -> mode () | (l, tag)::more -> push_and_emit ~formatting:true l tag (fun () -> reopen more) in reopen to_reopen (* 8.2.5. *) and dispatch tokens rules = next tokens !throw (fun () -> !ended ()) begin fun ((_, t) as v) -> let foreign = match Stack.adjusted_current_element context open_elements, t with | None, _ -> false | Some {element_name = `HTML, _}, _ -> false | Some {element_name}, `Start {name} when Foreign.is_mathml_text_integration_point element_name && name <> "mglyph" && name <> "malignmark" -> false | Some {element_name = `MathML, "annotation-xml"}, `Start {name = "svg"} -> false | Some {is_html_integration_point = true}, `Start _ -> false | Some {is_html_integration_point = true}, `Char _ -> false | _, `EOF -> false | _ -> true in if not foreign then rules v else foreign_content !current_mode (fun () -> rules v) v end (* 8.2.5.4.1. *) and initial_mode () = dispatch tokens begin function | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) -> initial_mode () | l, `Comment s -> emit l (`Comment s) initial_mode | l, `Doctype d -> emit l (`Doctype d) before_html_mode | v -> push tokens v; before_html_mode () end (* 8.2.5.4.2. *) and before_html_mode () = dispatch tokens begin function | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw before_html_mode | l, `Comment s -> emit l (`Comment s) before_html_mode | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) -> before_html_mode () | l, `Start ({name = "html"} as t) -> push_and_emit l t before_head_mode | l, `End {name} when not @@ list_mem_string name ["head"; "body"; "html"; "br"] -> unmatched_end_tag l name before_html_mode | l, _ as v -> push tokens v; push_implicit l "html" before_head_mode end (* 8.2.5.4.3. *) and before_head_mode () = dispatch tokens begin function | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) -> before_head_mode () | l, `Comment s -> emit l (`Comment s) before_head_mode | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw before_head_mode | _, `Start {name = "html"} as v -> in_body_mode_rules "html" before_head_mode v | l, `Start ({name = "head"} as t) -> head_seen := true; push_and_emit l t in_head_mode | l, `End {name} when not @@ list_mem_string name ["head"; "body"; "html"; "br"] -> report l (`Unmatched_end_tag name) !throw before_head_mode | l, _ as v -> head_seen := true; push tokens v; push_implicit l "head" in_head_mode end (* 8.2.5.4.4. *) and in_head_mode () = dispatch tokens (fun v -> in_head_mode_rules in_head_mode v) (* 8.2.5.4.4. *) and in_head_mode_rules mode = function | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) -> add_character l c; mode () | l, `Comment s -> emit l (`Comment s) mode | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw mode | _, `Start {name = "html"} as v -> in_body_mode_rules "head" in_head_mode v | l, `Start ({name = "base" | "basefont" | "bgsound" | "link" | "meta"} as t) -> push_and_emit ~acknowledge:true l t (fun () -> pop l mode) | l, `Start ({name = "title"} as t) -> push_and_emit l t (fun () -> parse_rcdata mode) | l, `Start ({name = "noframes" | "style"} as t) -> push_and_emit l t (fun () -> parse_rawtext mode) | l, `Start ({name = "noscript"} as t) -> push_and_emit l t in_head_noscript_mode | l, `Start ({name = "script"} as t) -> push_and_emit l t (fun () -> set_tokenizer_state `Script_data; text_mode mode) | l, `End {name = "head"} -> pop l after_head_mode | l, `Start ({name = "template"} as t) -> Active.add_marker active_formatting_elements; frameset_ok := false; Template.push template_insertion_modes in_template_mode; push_and_emit l t in_template_mode | l, `End {name = "template"} -> if not @@ Stack.has open_elements "template" then report l (`Unmatched_end_tag "template") !throw mode else begin Active.clear_until_marker active_formatting_elements; Template.pop template_insertion_modes; close_element_with_implied "template" l (fun () -> reset_mode () ()) end | l, `Start ({name = "head"} as t) -> misnested_tag l t "head" mode | l, `End {name} when not @@ list_mem_string name ["body"; "html"; "br"] -> report l (`Unmatched_end_tag name) !throw mode | l, _ as v -> push tokens v; pop l after_head_mode (* 8.2.5.4.5. *) and in_head_noscript_mode () = dispatch tokens begin function | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw in_head_noscript_mode | _, `Start {name = "html"} as v -> in_body_mode_rules "noscript" in_head_noscript_mode v | l, `End {name = "noscript"} -> pop l in_head_mode | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) | _, `Comment _ | _, `Start {name = "basefont" | "bgsound" | "link" | "meta" | "noframes" | "style"} as v -> in_head_mode_rules in_head_noscript_mode v | l, `Start ({name = "head" | "noscript"} as t) -> misnested_tag l t "noscript" in_head_noscript_mode | l, `End {name} when name <> "br" -> report l (`Unmatched_end_tag name) !throw in_head_noscript_mode | l, _ as v -> report l (`Bad_content "noscript") !throw (fun () -> push tokens v; pop l in_head_mode) end (* 8.2.5.4.6. *) and after_head_mode () = dispatch tokens begin function | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) -> add_character l c; after_head_mode () | l, `Comment s -> emit l (`Comment s) after_head_mode | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw after_head_mode | _, `Start {name = "html"} as v -> in_body_mode_rules "html" after_head_mode v | l, `Start ({name = "body"} as t) -> frameset_ok := false; push_and_emit l t in_body_mode | l, `Start ({name = "frameset"} as t) -> push_and_emit l t in_frameset_mode | l, `Start ({name = "base" | "basefont" | "bgsound" | "link" | "meta" | "noframes" | "script" | "style" | "template" | "title"} as t) as v -> misnested_tag l t "html" (fun () -> in_head_mode_rules after_head_mode v) | _, `End {name = "template"} as v -> in_head_mode_rules after_head_mode v | l, `Start {name = "head"} -> report l (`Bad_document "duplicate head element") !throw after_head_mode | l, `End {name} when not @@ list_mem_string name ["body"; "html"; "br"] -> report l (`Unmatched_end_tag name) !throw after_head_mode (* This case is not found in the specification. It is a deliberate deviation from conformance, so that fragments "..." don't get an implicit element generated after the element. *) | l, `EOF when (Context.the_context context = `Fragment (`HTML, "html") || Context.the_context context = `Fragment (`HTML, "head")) -> emit_end l | l, _ as t -> push tokens t; push_implicit l "body" in_body_mode end (* 8.2.5.4.7. *) and in_body_mode () = dispatch tokens (fun v -> in_body_mode_rules "body" in_body_mode v) (* 8.2.5.4.7. *) and in_body_mode_rules context_name mode = function | l, `Char 0 -> report l (`Bad_token ("U+0000", "body", "null")) !throw mode | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) -> reconstruct_active_formatting_elements (fun () -> add_character l c; mode ()) | l, `Char c -> frameset_ok := false; reconstruct_active_formatting_elements (fun () -> add_character l c; mode ()) | l, `Comment s -> emit l (`Comment s) mode | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw mode | l, `Start ({name = "html"} as t) -> misnested_tag l t context_name mode | _, `Start {name = "base" | "basefont" | "bgsound" | "link" | "meta" | "noframes" | "script" | "style" | "template" | "title"} | _, `End {name = "template"} as v -> in_head_mode_rules mode v | l, `Start ({name = "body"} as t) -> misnested_tag l t context_name mode | l, `Start ({name = "frameset"} as t) -> misnested_tag l t context_name (fun () -> match !open_elements with | [_] -> mode () | _ -> let rec second_is_body = function | [{element_name = `HTML, "body"}; _] -> true | [] -> false | _::more -> second_is_body more in if not @@ second_is_body !open_elements then mode () else if not !frameset_ok then mode () else (* There is a deviation here due to the nature of the parser: if a body element has been emitted, it can't be suppressed. *) pop_until (fun _ -> match !open_elements with [_] -> true | _ -> false) l (fun () -> push_and_emit l t in_frameset_mode)) | l, `EOF as v -> report_if_stack_has_other_than ["dd"; "dt"; "li"; "p"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"; "body"; "html"] (fun () -> match !template_insertion_modes with | [] -> emit_end l | _ -> in_template_mode_rules mode v) | l, `End {name = "body"} -> if not @@ Stack.in_scope open_elements "body" then report l (`Unmatched_end_tag "body") !throw mode else report_if_stack_has_other_than ["dd"; "dt"; "li"; "optgroup"; "option"; "p"; "rb"; "rp"; "rt"; "rtc"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"; "body"; "html"] (fun () -> after_body_mode ()) | l, `End {name = "html"} as v -> if not @@ Stack.in_scope open_elements "body" then report l (`Unmatched_end_tag "html") !throw mode else report_if_stack_has_other_than ["dd"; "dt"; "li"; "optgroup"; "option"; "p"; "rb"; "rp"; "rt"; "rtc"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"; "body"; "html"] (fun () -> push tokens v; after_body_mode ()) | l, `Start ({name = "address" | "article" | "aside" | "blockquote" | "center" | "details" | "dialog" | "dir" | "div" | "dl" | "fieldset" | "figcaption" | "figure" | "footer" | "header" | "hgroup" | "main" | "nav" | "ol" | "p" | "section" | "summary" | "ul"} as t) -> close_current_p_element l (fun () -> push_and_emit l t mode) | l, `Start ({name = "h1" | "h2" | "h3" | "h4" | "h5" | "h6"} as t) -> close_current_p_element l (fun () -> (fun mode' -> match Stack.current_element open_elements with | Some {element_name = `HTML, ("h1" | "h2" | "h3" | "h4" | "h5" | "h6" as name')} -> misnested_tag l t name' (fun () -> pop l mode') | _ -> mode' ()) (fun () -> push_and_emit l t mode)) | l, `Start ({name = "pre" | "listing"} as t) -> frameset_ok := false; close_current_p_element l (fun () -> push_and_emit l t (fun () -> next_expected tokens !throw (function | _, `Char 0x000A -> mode () | v -> push tokens v; mode ()))) | l, `Start ({name = "form"} as t) -> if !form_element_pointer <> None && not @@ Stack.has open_elements "template" then misnested_tag l t "form" mode else begin close_current_p_element l (fun () -> let in_template = Stack.has open_elements "template" in push_and_emit ~set_form_element_pointer:(not in_template) l t mode) end | l, `Start ({name = "li"} as t) -> frameset_ok := false; close_preceding_tag ["li"] l (fun () -> close_current_p_element l (fun () -> push_and_emit l t mode)) | l, `Start ({name = "dd" | "dt"} as t) -> frameset_ok := false; close_preceding_tag ["dd"; "dt"] l (fun () -> close_current_p_element l (fun () -> push_and_emit l t mode)) | l, `Start ({name = "plaintext"} as t) -> close_current_p_element l (fun () -> set_tokenizer_state `PLAINTEXT; push_and_emit l t mode) | l, `Start ({name = "button"} as t) -> (fun mode' -> if Stack.in_scope open_elements "button" then misnested_tag l t "button" (fun () -> close_element_with_implied "button" l mode') else mode' ()) (fun () -> frameset_ok := false; reconstruct_active_formatting_elements (fun () -> push_and_emit l t mode)) | l, `End {name = "address" | "article" | "aside" | "blockquote" | "button" | "center" | "details" | "dialog" | "dir" | "div" | "dl" | "fieldset" | "figcaption" | "figure" | "footer" | "header" | "hgroup" | "listing" | "main" | "nav" | "ol" | "pre" | "section" | "summary" | "ul" as name} -> if not @@ Stack.in_scope open_elements name then report l (`Unmatched_end_tag name) !throw mode else close_element_with_implied name l mode | l, `End {name = "form"} -> if not @@ Stack.has open_elements "template" then begin let form_element = !form_element_pointer in form_element_pointer := None; match form_element with | Some element when Stack.target_in_scope open_elements element -> pop_implied l (fun () -> match Stack.current_element open_elements with | Some element' when element' == element -> pop l mode | _ -> report element.location (`Unmatched_start_tag "form") !throw (fun () -> pop_until (fun element' -> element' == element) l (fun () -> pop l mode))) | _ -> report l (`Unmatched_end_tag "form") !throw mode end else if not @@ Stack.in_scope open_elements "form" then report l (`Unmatched_end_tag "form") !throw mode else close_element_with_implied "form" l mode | l, `End {name = "p"} -> (fun mode' -> if not @@ Stack.in_button_scope open_elements "p" then report l (`Unmatched_end_tag "p") !throw (fun () -> push_implicit l "p" mode') else mode' ()) (fun () -> close_element_with_implied "p" l mode) | l, `End {name = "li"} -> if not @@ Stack.in_list_item_scope open_elements "li" then report l (`Unmatched_end_tag "li") !throw mode else close_element_with_implied "li" l mode | l, `End {name = "dd" | "dt" as name} -> if not @@ Stack.in_scope open_elements name then report l (`Unmatched_end_tag name) !throw mode else close_element_with_implied name l mode | l, `End {name = "h1" | "h2" | "h3" | "h4" | "h5" | "h6" as name} -> if not @@ Stack.one_in_scope open_elements ["h1"; "h2"; "h3"; "h4"; "h5"; "h6"] then report l (`Unmatched_end_tag name) !throw mode else pop_implied l (fun () -> (fun next -> match Stack.current_element open_elements with | Some {element_name = `HTML, name'} when list_mem_string name' ["h1"; "h2"; "h3"; "h4"; "h5"; "h6"] -> next () | _ -> report l (`Unmatched_end_tag name) !throw next) @@ (fun () -> pop_until_and_raise_errors ["h1"; "h2"; "h3"; "h4"; "h5"; "h6"] l mode)) | l, `Start ({name = "a"} as t) -> (fun k -> match Active.has_before_marker active_formatting_elements "a" with | None -> k () | Some existing -> misnested_tag l t "a" (fun () -> adoption_agency_algorithm l "a" (fun () -> Stack.remove open_elements existing; Active.remove active_formatting_elements existing; k ()))) (fun () -> Subtree.enable subtree_buffer; reconstruct_active_formatting_elements (fun () -> push_and_emit ~formatting:true l t mode)) | l, `Start ({name = "b" | "big" | "code" | "em" | "font" | "i" | "s" | "small" | "strike" | "strong" | "tt" | "u"} as t) -> Subtree.enable subtree_buffer; reconstruct_active_formatting_elements (fun () -> push_and_emit ~formatting:true l t mode) | l, `Start ({name = "nobr"} as t) -> Subtree.enable subtree_buffer; reconstruct_active_formatting_elements (fun () -> (fun k -> if not @@ Stack.in_scope open_elements "nobr" then k () else misnested_tag l t "nobr" (fun () -> adoption_agency_algorithm l "nobr" (fun () -> reconstruct_active_formatting_elements k))) (fun () -> push_and_emit ~formatting:true l t mode)) | l, `End {name = "a" | "b" | "big" | "code" | "em" | "font" | "i" | "nobr" | "s" | "small" | "strike" | "strong" | "tt" | "u" as name} -> adoption_agency_algorithm l name mode | l, `Start ({name = "applet" | "marquee" | "object"} as t) -> frameset_ok := false; reconstruct_active_formatting_elements (fun () -> Active.add_marker active_formatting_elements; push_and_emit l t mode) | l, `End {name = "applet" | "marquee" | "object" as name} -> if not @@ Stack.in_scope open_elements name then report l (`Unmatched_end_tag name) !throw mode else begin Active.clear_until_marker active_formatting_elements; close_element_with_implied name l mode end | l, `Start ({name = "table"} as t) -> frameset_ok := false; close_current_p_element l (fun () -> push_and_emit l t in_table_mode) | l, `End {name = "br"} -> report l (`Unmatched_end_tag "br") !throw (fun () -> in_body_mode_rules context_name mode (l, `Start {Token_tag.name = "br"; attributes = []; self_closing = false})) | l, `Start ({name = "area" | "br" | "embed" | "img" | "keygen" | "wbr"} as t) -> frameset_ok := false; reconstruct_active_formatting_elements (fun () -> push_and_emit ~acknowledge:true l t (fun () -> pop l mode)) | l, `Start ({name = "input"} as t) -> if Element.is_not_hidden t then frameset_ok := false; reconstruct_active_formatting_elements (fun () -> push_and_emit ~acknowledge:true l t (fun () -> pop l mode)) | l, `Start ({name = "param" | "source" | "track"} as t) -> push_and_emit ~acknowledge:true l t (fun () -> pop l mode) | l, `Start ({name = "hr"} as t) -> frameset_ok := false; close_current_p_element l (fun () -> push_and_emit ~acknowledge:true l t (fun () -> pop l mode)) | l, `Start ({name = "image"} as t) -> report l (`Bad_token ("image", "tag", "should be 'img'")) !throw (fun () -> push tokens (l, `Start {t with name = "img"}); mode ()) | l, `Start ({name = "textarea"} as t) -> frameset_ok := false; push_and_emit l t (fun () -> set_tokenizer_state `RCDATA; next_expected tokens !throw (function | _, `Char 0x000A -> text_mode mode | v -> push tokens v; text_mode mode)) | l, `Start {name = "xmp"} -> frameset_ok := false; close_current_p_element l (fun () -> reconstruct_active_formatting_elements (fun () -> parse_rawtext mode)) | l, `Start ({name = "iframe"} as t) -> frameset_ok := false; push_and_emit l t (fun () -> parse_rawtext mode) | l, `Start ({name = "noembed"} as t) -> push_and_emit l t (fun () -> parse_rawtext mode) | l, `Start ({name = "select"} as t) -> frameset_ok := false; select_in_body l t in_select_mode | l, `Start ({name = "optgroup" | "option"} as t) -> (fun mode' -> if Stack.current_element_is open_elements ["option"] then pop l mode' else mode' ()) (fun () -> reconstruct_active_formatting_elements (fun () -> push_and_emit l t mode)) | l, `Start ({name = "rb" | "rtc"} as t) -> (fun mode' -> let finish () = if Stack.current_element_is open_elements ["ruby"] then mode' () else misnested_tag l t context_name mode' in if Stack.in_scope open_elements "ruby" then pop_implied l finish else finish ()) (fun () -> push_and_emit l t mode) | l, `Start ({name = "rp" | "rt"} as t) -> (fun mode' -> let finish () = if Stack.current_element_is open_elements ["ruby"; "rtc"] then mode' () else misnested_tag l t context_name mode' in if Stack.in_scope open_elements "ruby" then pop_implied ~except:"rtc" l finish else finish ()) (fun () -> push_and_emit l t mode) | l, `Start ({name = "math"} as t) -> reconstruct_active_formatting_elements (fun () -> push_and_emit ~acknowledge:true ~namespace:`MathML l t (fun () -> if t.self_closing then pop l mode else mode ())) | l, `Start ({name = "svg"} as t) -> reconstruct_active_formatting_elements (fun () -> push_and_emit ~acknowledge:true ~namespace:`SVG l t (fun () -> if t.self_closing then pop l mode else mode ())) | l, `Start ({name = "caption" | "col" | "colgroup" | "frame" | "head" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr"} as t) -> misnested_tag l t context_name mode | l, `Start t -> reconstruct_active_formatting_elements (fun () -> push_and_emit l t mode) | l, `End {name} -> any_other_end_tag_in_body l name mode (* Part of 8.2.5.4.7. *) and any_other_end_tag_in_body l name mode = let rec close = function | [] -> mode () | {element_name = (ns, name') as name''}::rest -> if ns = `HTML && name' = name then pop_implied ~except:name l (fun () -> pop l mode) else if Element.is_special name'' then report l (`Unmatched_end_tag name) !throw mode else close rest in close !open_elements (* Part of 8.2.5.4.7. *) and adoption_agency_algorithm l name mode = Subtree.enable subtree_buffer; emit_text (fun () -> let handled, errors = Subtree.adoption_agency_algorithm subtree_buffer active_formatting_elements l name in let rec report_all errors k = match errors with | [] -> k () | (l, error)::more -> report l error !throw (fun () -> report_all more k) in report_all errors (fun () -> if not handled then any_other_end_tag_in_body l name mode else mode ())) (* Part of 8.2.5.4.7. *) and select_in_body l t next_mode = frameset_ok := false; reconstruct_active_formatting_elements (fun () -> push_and_emit l t next_mode) (* 8.2.5.4.8. *) and text_mode original_mode = dispatch tokens begin function | l, `Char c -> add_character l c; text_mode original_mode | l, `EOF as v -> report l (`Unexpected_eoi "content") !throw (fun () -> push tokens v; pop l original_mode) | l, `End _ -> pop l original_mode | _ -> text_mode original_mode end (* 8.2.5.2. *) and parse_rcdata original_mode = set_tokenizer_state `RCDATA; text_mode original_mode (* 8.2.5.2. *) and parse_rawtext original_mode = set_tokenizer_state `RAWTEXT; text_mode original_mode and anything_else_in_table mode (l, _ as v) = report l (`Bad_content "table") !throw (fun () -> in_body_mode_rules "table" mode v) (* 8.2.5.4.9. *) and in_table_mode () = dispatch tokens (fun v -> in_table_mode_rules in_table_mode v) and in_table_mode_rules mode = function | _, `Char _ as v when Stack.current_element_is open_elements ["table"; "tbody"; "tfoot"; "thead"; "tr"] -> push tokens v; in_table_text_mode true [] mode | l, `Comment s -> emit l (`Comment s) mode | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw mode | l, `Start ({name = "caption"} as t) -> pop_to_table_context l (fun () -> Active.add_marker active_formatting_elements; push_and_emit l t in_caption_mode) | l, `Start ({name = "colgroup"} as t) -> pop_to_table_context l (fun () -> push_and_emit l t in_column_group_mode) | l, `Start {name = "col"} as v -> pop_to_table_context l (fun () -> push tokens v; push_implicit l "colgroup" in_column_group_mode) | l, `Start ({name = "tbody" | "tfoot" | "thead"} as t) -> pop_to_table_context l (fun () -> push_and_emit l t in_table_body_mode) | l, `Start {name = "td" | "th" | "tr"} as v -> pop_to_table_context l (fun () -> push tokens v; push_implicit l "tbody" in_table_body_mode) | l, `Start ({name = "table"} as t) as v -> misnested_tag l t "table" (fun () -> if not @@ Stack.has open_elements "table" then mode () else begin push tokens v; close_element l "table" (fun () -> reset_mode () ()) end) | l, `End {name = "table"} -> if not @@ Stack.in_table_scope open_elements "table" then report l (`Unmatched_end_tag "table") !throw mode else close_element l "table" (fun () -> reset_mode () ()) | l, `End {name = "body" | "caption" | "col" | "colgroup" | "html" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" as name} -> report l (`Unmatched_end_tag name) !throw mode | _, `Start {name = "style" | "script" | "template"} | _, `End {name = "template"} as v -> in_head_mode_rules mode v | l, `Start ({name = "input"} as t) when Element.is_not_hidden t -> misnested_tag l t "table" (fun () -> push_and_emit ~acknowledge:true l t (fun () -> pop l mode)) | l, `Start ({name = "form"} as t) -> misnested_tag l t "table" (fun () -> push_and_emit l t (fun () -> pop l mode)) | _, `EOF as v -> in_body_mode_rules "table" mode v | v -> anything_else_in_table mode v (* 8.2.5.4.10. *) and in_table_text_mode only_space cs mode = dispatch tokens begin function | l, `Char 0 -> report l (`Bad_token ("U+0000", "table", "null")) !throw (fun () -> in_table_text_mode only_space cs mode) | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) as v -> in_table_text_mode only_space (v::cs) mode | _, `Char _ as v -> in_table_text_mode false (v::cs) mode | v -> push tokens v; if not only_space then let rec reprocess = function | [] -> mode () | v::more -> anything_else_in_table (fun () -> reprocess more) v in reprocess (List.rev cs) else begin List.rev cs |> List.iter (function | l, `Char c -> add_character l c | _ -> ()); mode () end end (* 8.2.5.4.11. *) and in_caption_mode () = dispatch tokens begin function | l, `End {name = "caption"} -> if not @@ Stack.in_table_scope open_elements "caption" then report l (`Unmatched_end_tag "caption") !throw in_caption_mode else begin Active.clear_until_marker active_formatting_elements; close_element_with_implied "caption" l in_table_mode end | l, `Start ({name = "caption" | "col" | "colgroup" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr"} as t) as v -> misnested_tag l t "caption" (fun () -> if not @@ Stack.in_table_scope open_elements "caption" then in_caption_mode () else begin Active.clear_until_marker active_formatting_elements; push tokens v; close_element l "caption" in_table_mode end) | l, `End {name = "table"} as v -> report l (`Unmatched_end_tag "table") !throw (fun () -> if not @@ Stack.in_table_scope open_elements "caption" then in_caption_mode () else begin Active.clear_until_marker active_formatting_elements; push tokens v; close_element l "caption" in_table_mode end) | l, `End {name = ("body" | "col" | "colgroup" | "html" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr") as name} -> report l (`Unmatched_end_tag name) !throw in_caption_mode | l, `Start ({name = "select"} as t) -> select_in_body l t in_select_in_table_mode | v -> in_body_mode_rules "caption" in_caption_mode v end (* 8.2.5.4.12. *) and in_column_group_mode () = dispatch tokens begin function | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) -> add_character l c; in_column_group_mode () | l, `Comment s -> emit l (`Comment s) in_column_group_mode | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw in_column_group_mode | _, `Start {name = "html"} as v -> in_body_mode_rules "colgroup" in_column_group_mode v | l, `Start ({name = "col"} as t) -> push_and_emit ~acknowledge:true l t (fun () -> pop l in_column_group_mode) | l, `End {name = "colgroup"} -> if not @@ Stack.current_element_is open_elements ["colgroup"] then report l (`Unmatched_end_tag "colgroup") !throw in_column_group_mode else pop l in_table_mode | l, `End {name = "col"} -> report l (`Unmatched_end_tag "col") !throw in_column_group_mode | _, `Start {name = "template"} | _, `End {name = "template"} as v -> in_head_mode_rules in_column_group_mode v | _, `EOF as v -> in_body_mode_rules "colgroup" in_column_group_mode v | l, _ as v -> if not @@ Stack.current_element_is open_elements ["colgroup"] then report l (`Bad_content "colgroup") !throw in_table_mode else begin push tokens v; pop l in_table_mode end end (* 8.2.5.4.13. *) and in_table_body_mode () = dispatch tokens begin function | l, `Start ({name = "tr"} as t) -> pop_to_table_body_context l (fun () -> push_and_emit l t in_row_mode) | l, `Start ({name = "th" | "td"} as t) as v -> misnested_tag l t "table" (fun () -> pop_to_table_body_context l (fun () -> push tokens v; push_implicit l "tr" in_row_mode)) | l, `End {name = "tbody" | "tfoot" | "thead" as name} -> if not @@ Stack.in_table_scope open_elements name then report l (`Unmatched_end_tag name) !throw in_table_body_mode else pop_to_table_body_context l (fun () -> pop l in_table_mode) | l, `Start ({name = "caption" | "col" | "colgroup" | "tbody" | "tfoot" | "thead"} as t) as v -> if not @@ Stack.one_in_table_scope open_elements ["tbody"; "thead"; "tfoot"] then misnested_tag l t "table" in_table_body_mode else begin push tokens v; pop_to_table_body_context l (fun () -> pop l in_table_mode) end | l, `End {name = "table" as name} as v -> if not @@ Stack.one_in_table_scope open_elements ["tbody"; "thead"; "tfoot"] then report l (`Unmatched_end_tag name) !throw in_table_body_mode else begin push tokens v; pop_to_table_body_context l (fun () -> pop l in_table_mode) end | l, `End {name = "body" | "caption" | "col" | "colgroup" | "html" | "td" | "th" | "tr" as name} -> report l (`Unmatched_end_tag name) !throw in_table_body_mode | v -> in_table_mode_rules in_table_body_mode v end (* 8.2.5.4.14. *) and in_row_mode () = dispatch tokens begin function | l, `Start ({name = "th" | "td"} as t) -> Active.add_marker active_formatting_elements; pop_to_table_row_context l (fun () -> push_and_emit l t in_cell_mode) | l, `End {name = "tr"} -> if not @@ Stack.in_table_scope open_elements "tr" then report l (`Unmatched_end_tag "tr") !throw in_row_mode else pop_to_table_row_context l (fun () -> pop l in_table_body_mode) | l, `Start {name = ("caption" | "col" | "colgroup" | "tbody" | "tfoot" | "thead" | "tr")} | l, `End {name = "table"} as v -> if not @@ Stack.in_table_scope open_elements "tr" then match snd v with | `Start t -> misnested_tag l t "tr" in_row_mode | `End {name} -> report l (`Unmatched_end_tag name) !throw in_row_mode else pop_to_table_row_context l (fun () -> push tokens v; pop l in_table_body_mode) | l, `End {name = "tbody" | "tfoot" | "thead" as name} as v -> if not @@ Stack.in_table_scope open_elements name then report l (`Unmatched_end_tag name) !throw in_row_mode else if not @@ Stack.in_table_scope open_elements "tr" then in_row_mode () else pop_to_table_row_context l (fun () -> push tokens v; pop l in_table_body_mode) | l, `End {name = "body" | "caption" | "col" | "colgroup" | "html" | "td" | "th" as name} -> report l (`Unmatched_end_tag name) !throw in_row_mode | v -> in_table_mode_rules in_row_mode v end (* 8.2.5.4.15. *) and in_cell_mode () = dispatch tokens begin function | l, `End {name = "td" | "th" as name} -> if not @@ Stack.in_table_scope open_elements name then report l (`Unmatched_end_tag name) !throw in_cell_mode else close_element_with_implied name l (fun () -> Active.clear_until_marker active_formatting_elements; in_row_mode ()) | l, `Start ({name = "caption" | "col" | "colgroup" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr"} as t) as v -> if not @@ Stack.one_in_table_scope open_elements ["td"; "th"] then misnested_tag l t "td/th" in_cell_mode else close_cell l (fun () -> Active.clear_until_marker active_formatting_elements; push tokens v; in_row_mode ()) | l, `End {name = "body" | "caption" | "col" | "colgroup" | "html" as name} -> report l (`Unmatched_end_tag name) !throw in_cell_mode | l, `End {name = "table" | "tbody" | "tfoot" | "thead" | "tr" as name} as v -> if not @@ Stack.in_table_scope open_elements name then report l (`Unmatched_end_tag name) !throw in_cell_mode else close_cell l (fun () -> Active.clear_until_marker active_formatting_elements; push tokens v; in_row_mode ()) | l, `Start ({name = "select"} as t) -> select_in_body l t in_select_in_table_mode | v -> in_body_mode_rules "td" in_cell_mode v end (* 8.2.5.4.16. *) and in_select_mode () = dispatch tokens (fun v -> in_select_mode_rules in_select_mode v) and in_select_mode_rules mode = function | l, `Char 0 -> report l (`Bad_token ("U+0000", "select", "null")) !throw mode | l, `Char c -> add_character l c; mode () | l, `Comment s -> emit l (`Comment s) mode | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw mode | _, `Start {name = "html"} as v -> in_body_mode_rules "select" mode v | l, `Start ({name = "option"} as t) -> (fun mode' -> if Stack.current_element_is open_elements ["option"] then pop l mode' else mode' ()) (fun () -> push_and_emit l t mode) | l, `Start ({name = "optgroup"} as t) -> (fun mode' -> if Stack.current_element_is open_elements ["option"] then pop l mode' else mode' ()) @@ (fun mode' () -> if Stack.current_element_is open_elements ["optgroup"] then pop l mode' else mode' ()) @@ (fun () -> push_and_emit l t mode) | l, `End {name = "optgroup"} -> (fun mode' -> match !open_elements with | {element_name = `HTML, "option"}:: {element_name = `HTML, "optgroup"}::_ -> pop l mode' | _ -> mode' ()) (fun () -> if Stack.current_element_is open_elements ["optgroup"] then pop l mode else report l (`Unmatched_end_tag "optgroup") !throw mode) | l, `End {name = "option"} -> if Stack.current_element_is open_elements ["option"] then pop l mode else report l (`Unmatched_end_tag "option") !throw mode | l, `End {name = "select"} -> if not @@ Stack.in_select_scope open_elements "select" then report l (`Unmatched_end_tag "select") !throw mode else close_element l "select" (fun () -> reset_mode () ()) | l, `Start ({name = "select"} as t) -> misnested_tag l t "select" (fun () -> close_element l "select" (fun () -> reset_mode () ())) | l, `Start ({name = "input" | "keygen" | "textarea"} as t) as v -> misnested_tag l t "select" (fun () -> if not @@ Stack.in_select_scope open_elements "select" then mode () else begin push tokens v; close_element l "select" (fun () -> reset_mode () ()) end) | _, (`Start {name = "script" | "template"} | `End {name = "template"}) as v -> in_head_mode_rules mode v | _, `EOF as v -> in_body_mode_rules "select" mode v | l, _ -> report l (`Bad_content "select") !throw mode (* 8.2.5.4.17. *) and in_select_in_table_mode () = dispatch tokens begin function | l, `Start ({name = "caption" | "table" | "tbody" | "tfoot" | "thead" | "tr" | "td" | "th"} as t) as v -> misnested_tag l t "table" (fun () -> push tokens v; close_element l "select" (fun () -> reset_mode () ())) | l, `End {name = "caption" | "table" | "tbody" | "tfoot" | "thead" | "tr" | "td" | "th" as name} as v -> report l (`Unmatched_end_tag "name") !throw (fun () -> if not @@ Stack.in_table_scope open_elements name then in_select_in_table_mode () else begin push tokens v; close_element l "select" (fun () -> reset_mode () ()) end) | v -> in_select_mode_rules in_select_in_table_mode v end (* 8.2.5.4.18. *) and in_template_mode () = dispatch tokens (fun v -> in_table_mode_rules in_template_mode v) (* 8.2.5.4.18. *) and in_template_mode_rules mode = function | _, (`Char _ | `Comment _ | `Doctype _) as v -> in_body_mode_rules "template" mode v | _, `Start {name = "base" | "basefont" | "bgsound" | "link" | "meta" | "noframes" | "script" | "style" | "template" | "title"} | _, `End {name = "template"} as v -> in_head_mode_rules mode v | _, `Start {name = "caption" | "colgroup" | "tbody" | "tfoot" | "thead"} as v -> Template.pop template_insertion_modes; Template.push template_insertion_modes in_table_mode; push tokens v; in_table_mode () | _, `Start {name = "col"} as v -> Template.pop template_insertion_modes; Template.push template_insertion_modes in_column_group_mode; push tokens v; in_column_group_mode () | _, `Start {name = "tr"} as v -> Template.pop template_insertion_modes; Template.push template_insertion_modes in_table_body_mode; push tokens v; in_table_body_mode () | _, `Start {name = "td" | "th"} as v -> Template.pop template_insertion_modes; Template.push template_insertion_modes in_row_mode; push tokens v; in_row_mode () | _, `Start _ as v -> Template.pop template_insertion_modes; Template.push template_insertion_modes in_body_mode; push tokens v; in_body_mode () | l, `End {name} -> report l (`Unmatched_end_tag name) !throw mode | l, `EOF as v -> if not @@ Stack.has open_elements "template" then emit_end l else begin report l (`Unmatched_end_tag "template") !throw (fun () -> Active.clear_until_marker active_formatting_elements; Template.pop template_insertion_modes; push tokens v; close_element l "template" (fun () -> reset_mode () ())) end (* 8.2.5.4.19. *) and after_body_mode () = dispatch tokens begin function | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) as v -> in_body_mode_rules "html" after_body_mode v | l, `Comment s -> emit l (`Comment s) after_body_mode | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw after_body_mode | _, `Start {name = "html"} as v -> in_body_mode_rules "html" after_body_mode v | _, `End {name = "html"} -> after_after_body_mode () | l, `EOF -> emit_end l | l, _ as v -> report l (`Bad_document "content after body") !throw (fun () -> push tokens v; in_body_mode ()) end (* 8.2.5.4.20. *) and in_frameset_mode () = dispatch tokens begin function | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) -> add_character l c; in_frameset_mode () | l, `Comment s -> emit l (`Comment s) in_frameset_mode | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw in_frameset_mode | _, `Start {name = "html"} as v -> in_body_mode_rules "frameset" in_frameset_mode v | l, `Start ({name = "frameset"} as t) -> push_and_emit l t in_frameset_mode | l, `End {name = "frameset"} -> (fun mode' -> if Stack.current_element_is open_elements ["html"] then report l (`Unmatched_end_tag "frameset") !throw mode' else pop l mode') (fun () -> if Stack.current_element_is open_elements ["frameset"] then in_frameset_mode () else after_frameset_mode ()) | l, `Start ({name = "frame"} as t) -> push_and_emit ~acknowledge:true l t (fun () -> pop l in_frameset_mode) | _, `Start {name = "noframes"} as v -> in_head_mode_rules in_frameset_mode v | l, `EOF -> (fun mode' -> if not @@ Stack.current_element_is open_elements ["html"] then report l (`Unexpected_eoi "frameset") !throw mode' else mode' ()) (fun () -> emit_end l) | l, _ -> report l (`Bad_content "frameset") !throw in_frameset_mode end (* 8.2.5.4.21. *) and after_frameset_mode () = dispatch tokens begin function | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) -> add_character l c; after_frameset_mode () | l, `Comment s -> emit l (`Comment s) after_frameset_mode | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw after_frameset_mode | _, `Start {name = "html"} as v -> in_body_mode_rules "html" after_frameset_mode v | l, `End {name = "html"} -> close_element l "html" after_after_frameset_mode | _, `Start {name = "noframes"} as v -> in_head_mode_rules after_frameset_mode v | l, `EOF -> emit_end l | l, _ -> report l (`Bad_content "html") !throw after_frameset_mode end (* 8.2.5.4.22. *) and after_after_body_mode () = dispatch tokens begin function | l, `Comment s -> emit l (`Comment s) after_after_body_mode | _, `Doctype _ | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) | _, `Start {name = "html"} as v -> in_body_mode_rules "html" after_after_body_mode v | l, `EOF -> emit_end l | l, _ as v -> push tokens v; report l (`Bad_content "html") !throw in_body_mode end (* 8.2.5.4.23. *) and after_after_frameset_mode () = dispatch tokens begin function | l, `Comment s -> emit l (`Comment s) after_after_frameset_mode | _, `Doctype _ | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) | _, `Start {name = "html"} as v -> in_body_mode_rules "html" after_after_frameset_mode v | l, `EOF -> emit_end l | _, `Start {name = "noframes"} as v -> in_head_mode_rules after_after_frameset_mode v | l, _ -> report l (`Bad_content "html") !throw after_after_frameset_mode end (* 8.2.5.5. *) and foreign_start_tag mode l tag = let namespace = match Stack.adjusted_current_element context open_elements with | None -> `HTML | Some {element_name = ns, _} -> ns in push_and_emit ~acknowledge:true ~namespace l tag (fun () -> if tag.self_closing then pop l mode else mode ()) and is_html_font_tag tag = tag.Token_tag.attributes |> List.exists (function | ("color" | "face" | "size"), _ -> true | _ -> false) and foreign_content mode force_html v = match v with | l, `Char 0 -> report l (`Bad_token ("U+0000", "foreign content", "null")) !throw (fun () -> add_character l u_rep; mode ()) | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) -> add_character l c; mode () | l, `Char c -> frameset_ok := false; add_character l c; mode () | l, `Comment s -> emit l (`Comment s) mode | l, `Doctype _ -> report l (`Bad_document "doctype should be first") !throw mode | l, `Start ({name = "b" | "big" | "blockquote" | "body" | "br" | "center" | "code" | "dd" | "div" | "dl" | "dt" | "em" | "embed" | "font" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "head" | "hr" | "i" | "img" | "li" | "listing" | "main" | "meta" | "nobr" | "ol" | "p" | "pre" | "ruby" | "s" | "small" | "span" | "strong" | "strike" | "sub" | "sup" | "table" | "tt" | "u" | "ul" | "var" as name} as t) as v -> if name = "font" && not @@ is_html_font_tag t then foreign_start_tag mode l t else misnested_tag l t "xml tag" (fun () -> push tokens v; pop l (fun () -> pop_until (function | {element_name = `HTML, _} -> true | {is_html_integration_point = true} -> true | {element_name} -> Foreign.is_mathml_text_integration_point element_name) l mode)) | l, `Start t -> foreign_start_tag mode l t | l, `End {name = "script"} when match Stack.current_element open_elements with | Some {element_name = `SVG, "script"} -> true | _ -> false -> pop l mode | l, `End {name} -> (fun mode' -> match Stack.current_element open_elements with | Some {element_name = _, name'} when String.lowercase_ascii name' = name -> mode' () | _ -> report l (`Unmatched_end_tag name) !throw (fun () -> mode' ())) (fun () -> let rec scan = function | [] -> mode () | {element_name = ns, name'}::_ when String.lowercase_ascii name' = name -> close_element ~ns l name mode | {element_name = `HTML, _}::_ -> force_html () | _::rest -> scan rest in scan !open_elements) | _, `EOF -> force_html () in construct constructor markup.ml-1.0.3/src/html_parser.mli000066400000000000000000000006131421357706400172440ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common val parse : [< `Document | `Fragment of string ] option -> Error.parse_handler -> (location * Html_tokenizer.token) Kstream.t * (Html_tokenizer.state -> unit) * ((unit -> bool) -> unit) -> (location * signal) Kstream.t markup.ml-1.0.3/src/html_tokenizer.ml000066400000000000000000001531071421357706400176200ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common open Token_tag type token = [ `Doctype of doctype | `Start of Token_tag.t | `End of Token_tag.t | `Char of int | `Comment of string | `EOF ] type state = [ `Data | `RCDATA | `RAWTEXT | `Script_data | `PLAINTEXT ] let replace_windows_1252_entity = function | 0x80 -> 0x20AC | 0x82 -> 0x201A | 0x83 -> 0x0192 | 0x84 -> 0x201E | 0x85 -> 0x2026 | 0x86 -> 0x2020 | 0x87 -> 0x2021 | 0x88 -> 0x02C6 | 0x89 -> 0x2030 | 0x8A -> 0x0160 | 0x8B -> 0x2039 | 0x8C -> 0x0152 | 0x8E -> 0x017D | 0x91 -> 0x2018 | 0x92 -> 0x2019 | 0x93 -> 0x201C | 0x94 -> 0x201D | 0x95 -> 0x2022 | 0x96 -> 0x2013 | 0x97 -> 0x2014 | 0x98 -> 0x02DC | 0x99 -> 0x2122 | 0x9A -> 0x0161 | 0x9B -> 0x203A | 0x9C -> 0x0153 | 0x9E -> 0x017E | 0x9F -> 0x0178 | c -> c let named_entity_trie = lazy begin let trie = Trie.create () in Array.fold_left (fun trie (name, characters) -> Trie.add name characters trie) trie Entities.entities end type doctype_buffers = {mutable doctype_name : Buffer.t option; mutable public_identifier : Buffer.t option; mutable system_identifier : Buffer.t option; mutable force_quirks : bool} module Doctype_buffers = struct type t = doctype_buffers = {mutable doctype_name : Buffer.t option; mutable public_identifier : Buffer.t option; mutable system_identifier : Buffer.t option; mutable force_quirks : bool} end let add_doctype_char buffer c = let buffer = match buffer with | None -> Buffer.create 32 | Some buffer -> buffer in add_utf_8 buffer c; Some buffer type tag_buffers = {mutable start : bool; tag_name : Buffer.t; mutable self_closing : bool; mutable attributes : (string * string) list} module Tag_buffers = struct type t = tag_buffers = {mutable start : bool; tag_name : Buffer.t; mutable self_closing : bool; mutable attributes : (string * string) list} end let sequence_to_lowercase = List.map (fun (l, c) -> l, to_lowercase c) open Kstream let tokenize report (input, get_location) = let foreign = ref (fun () -> false) in let last_start_tag_name : string option ref = ref None in let is_appropriate_end_tag name_buffer = match !last_start_tag_name with | None -> false | Some name -> Buffer.contents name_buffer = name in let throw = ref (fun _ -> ()) in let ended = ref (fun _ -> ()) in let output = ref (fun _ -> ()) in let rec current_state = ref data_state and emit t s = current_state := s; !output t and emit_character l c s = emit (l, `Char c) s and emit_characters cs s = match cs with | [] -> s () | (l, c)::cs -> emit_character l c (fun () -> emit_characters cs s) and emit_eof () = emit (get_location (), `EOF) (fun () -> !ended ()) and emit_tag l tag' = let rec rev_deduplicate accumulator seen attributes k = match attributes with | [] -> k accumulator | (n, v)::more -> if list_mem_string n seen then report l (`Bad_token (n, "tag", "duplicate attribute")) !throw (fun () -> rev_deduplicate accumulator seen more k) else rev_deduplicate ((n, v)::accumulator) (n::seen) more k in rev_deduplicate [] [] (List.rev tag'.Tag_buffers.attributes) (fun attributes -> let tag = {Token_tag.name = Buffer.contents tag'.tag_name; self_closing = tag'.self_closing; attributes = List.rev attributes} in (fun k -> if tag'.start then begin last_start_tag_name := Some tag.name; k (`Start tag) end else (fun k -> match attributes with | (n, _)::_ -> report l (`Bad_token (n, "tag", "end tag with attributes")) !throw k | _ -> k ()) @@ (fun k () -> if tag.Token_tag.self_closing then report l (`Bad_token ("/>", "tag", "end tag cannot be self-closing")) !throw k else k ()) @@ (fun () -> k (`End tag))) (fun token -> emit (l, token) data_state)) and emit_comment l buffer = emit (l, `Comment (Buffer.contents buffer)) data_state and emit_doctype ?(quirks = false) l doctype = if quirks then doctype.Doctype_buffers.force_quirks <- true; let if_not_missing = function | None -> None | Some buffer -> Some (Buffer.contents buffer) in let doctype = {Common.doctype_name = if_not_missing doctype.doctype_name; public_identifier = if_not_missing doctype.public_identifier; system_identifier = if_not_missing doctype.system_identifier; raw_text = None; force_quirks = doctype.force_quirks} in emit (l, `Doctype doctype) data_state (* Implementation of 8.2.4.69 Tokenizing character references. *) and consume_character_reference in_attribute additional location k = peek_option input !throw (function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020 | 0x003C | 0x0026)) | None -> k None | Some (_, c) when Some c = additional -> k None | Some (_, 0x0023 as pound) -> let consume_digits filter k = let buffer = Buffer.create 8 in let rec iterate () = next_option input !throw (function | Some (_, c) when filter c -> Buffer.add_char buffer (Char.chr c); iterate () | v -> push_option input v; if Buffer.length buffer = 0 then k None else k (Some (Buffer.contents buffer))) in iterate () in let finish_digits prefix text s = let consume_semicolon k = next_option input !throw begin function | Some (_, 0x003B) -> k ";" | v -> push_option input v; report location (`Bad_token (prefix ^ text, "character reference", "missing ';' at end")) !throw (fun () -> k "") end in let convert s semicolon k' = let maybe_n = try Some (int_of_string s) with Failure _ -> None in match maybe_n with | Some n -> k' n | None -> report location (`Bad_token (prefix ^ text ^ semicolon, "character reference", "out of range")) !throw (fun () -> k (Some (`One u_rep))) in consume_semicolon begin fun semicolon -> convert s semicolon begin fun n' -> let n = replace_windows_1252_entity n' in if n <> n' then report location (`Bad_token (prefix ^ text ^ semicolon, "character reference", "Windows-1252 character")) !throw (fun () -> k (Some (`One n))) else match n with | n when not @@ is_scalar n || n = 0 -> report location (`Bad_token (prefix ^ text ^ semicolon, "character reference", "out of range")) !throw (fun () -> k (Some (`One u_rep))) | n when is_control_character n || is_non_character n -> report location (`Bad_token (prefix ^ text ^ semicolon, "character reference", "invalid HTML character")) !throw (fun () -> k (Some (`One n))) | n -> k (Some (`One n)) end end in next_expected input !throw (fun _ -> peek_option input !throw (function | Some (_, (0x0078 | 0x0058 as c) as x) -> let prefix = Printf.sprintf "&#%c" (Char.chr c) in next_expected input !throw (fun _ -> consume_digits is_hex_digit (function | None -> push_list input [pound; x]; report location (`Bad_token (prefix, "character reference", "expected digits")) !throw (fun () -> k None) | Some s -> finish_digits prefix s ("0x" ^ s))) | _ -> let prefix = "&#" in consume_digits is_digit (function | None -> push input pound; report location (`Bad_token (prefix, "character reference", "expected digits")) !throw (fun () -> k None) | Some s -> finish_digits prefix s s))) | _ -> let is_entity_like k = let finish replace text = push_list input (List.rev replace); k text in let buffer = Buffer.create 16 in let rec iterate replace = next_option input !throw (function | None -> finish replace None | Some ((_, c) as v) when is_alphanumeric c -> Buffer.add_char buffer (Char.chr c); iterate (v::replace) | Some ((_, 0x003B) as v) -> finish (v::replace) (Some (Buffer.contents buffer)) | Some v -> finish (v::replace) None) in iterate [] in let finish best matched replace = push_list input (List.rev replace); match best with | None -> is_entity_like (function | None -> k None | Some s -> report location (`Bad_token ("&" ^ s ^ ";", "entity reference", "no such entity")) !throw (fun () -> k None)) | Some (text, code_points) -> next_option input !throw (function | Some (_, 0x003B) -> k (Some code_points) | maybe_v -> let unterminated () = push_option input maybe_v; report location (`Bad_token ("&" ^ text, "entity reference", "missing ';' at end")) !throw (fun () -> k (Some code_points)) in if not in_attribute then unterminated () else match maybe_v with | Some ((_, c) as v) when is_alphanumeric c -> push_list input (List.rev (v::matched)); k None | Some ((_, 0x003D) as v) -> push_list input (List.rev (v::matched)); report location (`Bad_token ("&" ^ text ^ "=", "attribute", "unterminated entity reference followed by '='")) !throw(fun () -> k None) | _ -> unterminated ()) in let rec match_named best matched replace trie text = next_option input !throw (function | None -> finish best matched replace | Some ((_, c) as v) -> let trie = Trie.advance c trie in add_utf_8 text c; match Trie.matches trie with | Trie.No -> finish best matched (v::replace) | Trie.Prefix -> match_named best matched (v::replace) trie text | Trie.Multiple m -> let w = Buffer.contents text in match_named (Some (w, m)) (v::(replace @ matched)) [] trie text | Trie.Yes m -> let w = Buffer.contents text in finish (Some (w, m)) (v::(replace @ matched)) []) in match_named None [] [] (Lazy.force named_entity_trie) (Buffer.create 16)) (* 8.2.4.1. *) and data_state () = next_option input !throw begin function | Some (l, 0x0026) -> character_reference_state data_state l | Some (l, 0x003C) -> tag_open_state l | Some (l, 0) -> report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> emit (l, `Char 0) data_state) | None -> emit_eof () | Some (l, c) -> emit (l, `Char c) data_state end (* 8.2.4.2, 8.2.4.4. *) and character_reference_state state l = consume_character_reference false None l begin function | None -> emit (l, `Char 0x0026) state | Some (`One c) -> emit (l, `Char c) state | Some (`Two (c, c')) -> emit (l, `Char c) (fun () -> emit (l, `Char c') state) end (* 8.2.4.3. *) and rcdata_state () = next_option input !throw begin function | Some (l, 0x0026) -> character_reference_state rcdata_state l | Some (l, 0x003C as v) -> text_less_than_sign_state rcdata_state l [v] | Some (l, 0) -> report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> emit (l, `Char u_rep) rcdata_state) | None -> emit_eof () | Some (l, c) -> emit (l, `Char c) rcdata_state end (* 8.2.4.5. *) and rawtext_state () = next_option input !throw begin function | Some (l, 0x003C as v) -> text_less_than_sign_state rawtext_state l [v] | Some (l, 0) -> report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> emit (l, `Char u_rep) rawtext_state) | None -> emit_eof () | Some (l, c) -> emit (l, `Char c) rawtext_state end (* 8.2.4.6. *) and script_data_state () = next_option input !throw begin function | Some (l, 0x003C as v) -> script_data_less_than_sign_state l [v] | Some (l, 0) -> report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> emit_character l u_rep script_data_state) | None -> emit_eof () | Some (l, c) -> emit_character l c script_data_state end (* 8.2.4.7. *) and plaintext_state () = next_option input !throw begin function | Some (l, 0) -> report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> emit (l, `Char u_rep) plaintext_state) | None -> emit_eof () | Some (l, c) -> emit (l, `Char c) plaintext_state end (* 8.2.4.8. *) and tag_open_state l' = let tag = {start = true; tag_name = Buffer.create 16; self_closing = false; attributes = []} in next_option input !throw begin function | Some (_, 0x0021) -> markup_declaration_open_state l' | Some (_, 0x002F) -> end_tag_open_state l' tag | Some (_, c) when is_alphabetic c -> add_utf_8 tag.tag_name (to_lowercase c); tag_name_state l' tag | Some (_, 0x003F) -> report l' (`Bad_token (" bogus_comment_state l') | Some ((l, c) as v) -> report l (`Bad_token (char c, "tag", "invalid start character")) !throw (fun () -> push input v; emit_character l' 0x003C data_state) | None -> report (get_location ()) (`Unexpected_eoi "tag") !throw (fun () -> emit_character l' 0x003C data_state) end (* 8.2.4.9. *) and end_tag_open_state l' tag = tag.start <- false; next_option input !throw begin function | Some (_, c) when is_alphabetic c -> add_utf_8 tag.tag_name (to_lowercase c); tag_name_state l' tag | Some (_, 0x003E) -> report l' (`Bad_token ("", "tag", "no tag name")) !throw data_state | None -> report (get_location ()) (`Unexpected_eoi "tag") !throw (fun () -> let line, column = l' in emit (l', `Char 0x003C) (fun () -> emit ((line, column + 1), `Char 0x002F) data_state)) | Some (l, c) -> report l (`Bad_token (char c, "tag", "invalid start character")) !throw (fun () -> bogus_comment_state l') end (* 8.2.4.10. *) and tag_name_state l' tag = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> before_attribute_name_state l' tag | Some (_, 0x002F) -> self_closing_start_tag_state l' tag | Some (_, 0x003E) -> emit_tag l' tag | Some (l, 0) -> report l (`Bad_token ("U+0000", "tag name", "null")) !throw (fun () -> add_utf_8 tag.tag_name u_rep; tag_name_state l' tag) | None -> report (get_location ()) (`Unexpected_eoi "tag") !throw data_state | Some (_, c) -> add_utf_8 tag.tag_name (to_lowercase c); tag_name_state l' tag end (* 8.2.4.11, 8.2.4.14. *) and text_less_than_sign_state state l' cs = next_option input !throw begin function | Some (_, 0x002F as v) -> text_end_tag_open_state state l' (v::cs) | maybe_v -> push_option input maybe_v; emit_characters cs state end (* 8.2.4.12, 8.2.4.15, 8.2.4.18, 8.2.4.26. *) and text_end_tag_open_state state l' cs = next_option input !throw begin function | Some (_, c as v) when is_alphabetic c -> let name_buffer = Buffer.create 32 in add_utf_8 name_buffer (to_lowercase c); text_end_tag_name_state state l' (v::cs) name_buffer | maybe_v -> push_option input maybe_v; emit_characters (List.rev cs) state end (* 8.2.4.13, 8.2.4.16, 8.2.4.19, 8.2.4.27. *) and text_end_tag_name_state state l' cs name_buffer = let create_tag () = {start = false; tag_name = name_buffer; self_closing = false; attributes = []} in next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) when is_appropriate_end_tag name_buffer -> before_attribute_name_state l' (create_tag ()) | Some (_, 0x002F) when is_appropriate_end_tag name_buffer -> self_closing_start_tag_state l' (create_tag ()) | Some (_, 0x003E) when is_appropriate_end_tag name_buffer -> emit_tag l' (create_tag ()) | Some ((_, c) as v) when is_alphabetic c -> add_utf_8 name_buffer (to_lowercase c); text_end_tag_name_state state l' (v::cs) name_buffer | maybe_v -> push_option input maybe_v; emit_characters (List.rev cs) state end (* 8.2.4.17. *) and script_data_less_than_sign_state l' cs = next_option input !throw begin function | Some (_, 0x002F as v) -> text_end_tag_open_state script_data_state l' (v::cs) | Some (_, 0x0021 as v) -> emit_characters (List.rev (v::cs)) (fun () -> script_data_escape_start_state l') | maybe_v -> push_option input maybe_v; emit_characters cs script_data_state end (* 8.2.4.20. *) and script_data_escape_start_state l' = next_option input !throw begin function | Some (l, 0x002D) -> emit_character l 0x002D (fun () -> script_data_escape_start_dash_state l') | maybe_v -> push_option input maybe_v; script_data_state () end (* 8.2.4.21. *) and script_data_escape_start_dash_state l' = next_option input !throw begin function | Some (l, 0x002D) -> emit_character l 0x002D (fun () -> script_data_escaped_dash_dash_state l') | maybe_v -> push_option input maybe_v; script_data_state () end (* 8.2.4.22. *) and script_data_escaped_state l' = next_option input !throw begin function | Some (l, 0x002D) -> emit_character l 0x002D (fun () -> script_data_escaped_dash_state l') | Some ((l, 0x003C) as v) -> script_data_escaped_less_than_sign_state l' l [v] | Some (l, 0) -> report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> emit_character l u_rep (fun () -> script_data_escaped_state l')) | None -> report (get_location ()) (`Unexpected_eoi "script") !throw data_state | Some (l, c) -> emit_character l c (fun () -> script_data_escaped_state l') end (* 8.2.4.23. *) and script_data_escaped_dash_state l' = next_option input !throw begin function | Some (l, 0x002D) -> emit_character l 0x002D (fun () -> script_data_escaped_dash_dash_state l') | Some (l, 0x003C as v) -> script_data_escaped_less_than_sign_state l' l [v] | Some (l, 0) -> report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> emit_character l u_rep (fun () -> script_data_escaped_state l')) | None -> report (get_location ()) (`Unexpected_eoi "script") !throw data_state | Some (l, c) -> emit_character l c (fun () -> script_data_escaped_state l') end (* 8.2.4.24. *) and script_data_escaped_dash_dash_state l' = next_option input !throw begin function | Some (l, 0x002D) -> emit_character l 0x002D (fun () -> script_data_escaped_dash_dash_state l') | Some (l, 0x003C as v) -> script_data_escaped_less_than_sign_state l' l [v] | Some (l, 0x003E) -> emit_character l 0x003E script_data_state | Some (l, 0) -> report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> emit_character l u_rep (fun () -> script_data_escaped_state l')) | None -> report (get_location ()) (`Unexpected_eoi "script") !throw data_state | Some (l, c) -> emit_character l c (fun () -> script_data_escaped_state l') end (* 8.2.4.25. *) and script_data_escaped_less_than_sign_state l' l'' cs = next_option input !throw begin function | Some (_, 0x002F as v) -> text_end_tag_open_state (fun () -> script_data_escaped_state l') l'' (v::cs) | Some (_, c as v) when is_alphabetic c -> let tag_buffer = Buffer.create 32 in add_utf_8 tag_buffer (to_lowercase c); emit_characters (List.rev (v::cs)) (fun () -> script_data_double_escape_start_state l' tag_buffer) | maybe_v -> push_option input maybe_v; emit_characters cs (fun () -> script_data_escaped_state l') end (* 8.2.4.28. *) and script_data_double_escape_start_state l' tag_buffer = next_option input !throw begin function | Some (l, (0x0009 | 0x000A | 0x000C | 0x0020 | 0x002F | 0x003E as c)) -> emit_character l c (fun () -> if Buffer.contents tag_buffer = "script" then script_data_double_escaped_state l' else script_data_escaped_state l') | Some (l, c) when is_alphabetic c -> add_utf_8 tag_buffer (to_lowercase c); emit_character l c (fun () -> script_data_double_escape_start_state l' tag_buffer) | maybe_v -> push_option input maybe_v; script_data_escaped_state l' end (* 8.2.4.29. *) and script_data_double_escaped_state l' = next_option input !throw begin function | Some (l, 0x002D) -> emit_character l 0x002D (fun () -> script_data_double_escaped_dash_state l') | Some (l, 0x003C) -> emit_character l 0x003C (fun () -> script_data_double_escaped_less_than_sign_state l') | Some (l, 0) -> report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> emit_character l u_rep (fun () -> script_data_double_escaped_state l')) | None -> report (get_location ()) (`Unexpected_eoi "script") !throw data_state | Some (l, c) -> emit_character l c (fun () -> script_data_double_escaped_state l') end (* 8.2.4.30. *) and script_data_double_escaped_dash_state l' = next_option input !throw begin function | Some (l, 0x002D) -> emit_character l 0x002D (fun () -> script_data_double_escaped_dash_dash_state l') | Some (l, 0x003C) -> emit_character l 0x003C (fun () -> script_data_double_escaped_less_than_sign_state l') | Some (l, 0) -> report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> emit_character l u_rep (fun () -> script_data_double_escaped_state l')) | None -> report (get_location ()) (`Unexpected_eoi "script") !throw data_state | Some (l, c) -> emit_character l c (fun () -> script_data_double_escaped_state l') end (* 8.2.4.31. *) and script_data_double_escaped_dash_dash_state l' = next_option input !throw begin function | Some (l, 0x002D) -> emit_character l 0x002D (fun () -> script_data_double_escaped_dash_dash_state l') | Some (l, 0x003C) -> emit_character l 0x003C (fun () -> script_data_double_escaped_less_than_sign_state l') | Some (l, 0x003E) -> emit_character l 0x003E script_data_state | Some (l, 0) -> report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> emit_character l u_rep (fun () -> script_data_double_escaped_state l')) | None -> report (get_location ()) (`Unexpected_eoi "script") !throw data_state | Some (l, c) -> emit_character l c (fun () -> script_data_double_escaped_state l') end (* 8.2.4.32. *) and script_data_double_escaped_less_than_sign_state l' = next_option input !throw begin function | Some (l, 0x002F) -> let tag_buffer = Buffer.create 32 in emit_character l 0x002F (fun () -> script_data_double_escape_end_state l' tag_buffer) | maybe_v -> push_option input maybe_v; script_data_double_escaped_state l' end (* 8.2.4.33. *) and script_data_double_escape_end_state l' tag_buffer = next_option input !throw begin function | Some (l, (0x0009 | 0x000A | 0x000C | 0x0020 | 0x002F | 0x003E as c)) -> emit_character l c (fun () -> if Buffer.contents tag_buffer = "script" then script_data_escaped_state l' else script_data_double_escaped_state l') | Some (l, c) when is_alphabetic c -> add_utf_8 tag_buffer (to_lowercase c); emit_character l c (fun () -> script_data_double_escape_end_state l' tag_buffer) | maybe_v -> push_option input maybe_v; script_data_double_escaped_state l' end (* 8.2.4.34. *) and before_attribute_name_state l' tag = let start_attribute c = let name_buffer = Buffer.create 32 in add_utf_8 name_buffer c; attribute_name_state l' tag name_buffer in next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> before_attribute_name_state l' tag | Some (_, 0x002F) -> self_closing_start_tag_state l' tag | Some (_, 0x003E) -> emit_tag l' tag | None -> report (get_location ()) (`Unexpected_eoi "tag") !throw data_state | Some (l, 0) -> report l (`Bad_token ("U+0000", "attribute name", "null")) !throw (fun () -> start_attribute u_rep) | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D as c)) -> report l (`Bad_token (char c, "attribute name", "invalid start character")) !throw (fun () -> start_attribute c) | Some (_, c) -> start_attribute (to_lowercase c) end (* 8.2.4.35. *) and attribute_name_state l' tag name_buffer = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> after_attribute_name_state l' tag (Buffer.contents name_buffer) | Some (_, 0x002F) -> tag.attributes <- (Buffer.contents name_buffer, "")::tag.attributes; self_closing_start_tag_state l' tag | Some (_, 0x003D) -> before_attribute_value_state l' tag (Buffer.contents name_buffer) | Some (_, 0x003E) -> tag.attributes <- (Buffer.contents name_buffer, "")::tag.attributes; emit_tag l' tag | Some (l, 0) -> report l (`Bad_token ("U+0000", "attribute name", "null")) !throw (fun () -> add_utf_8 name_buffer u_rep; attribute_name_state l' tag name_buffer) | Some (l, (0x0022 | 0x0027 | 0x003C as c)) -> report l (`Bad_token (char c, "attribute name", "invalid name character")) !throw (fun () -> add_utf_8 name_buffer c; attribute_name_state l' tag name_buffer) | None -> report (get_location ()) (`Unexpected_eoi "tag") !throw data_state | Some (_, c) -> add_utf_8 name_buffer (to_lowercase c); attribute_name_state l' tag name_buffer end (* 8.2.4.36. *) and after_attribute_name_state l' tag name = let start_next_attribute c = tag.attributes <- (name, "")::tag.attributes; let name_buffer = Buffer.create 32 in add_utf_8 name_buffer c; attribute_name_state l' tag name_buffer in next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> after_attribute_name_state l' tag name | Some (_, 0x002F) -> tag.attributes <- (name, "")::tag.attributes; self_closing_start_tag_state l' tag | Some (_, 0x003D) -> before_attribute_value_state l' tag name | Some (_, 0x003E) -> tag.attributes <- (name, "")::tag.attributes; emit_tag l' tag | Some (l, 0) -> report l (`Bad_token ("U+0000", "attribute name", "null")) !throw (fun () -> start_next_attribute u_rep) | Some (l, (0x0022 | 0x0027 | 0x003C as c)) -> report l (`Bad_token (char c, "attribute name", "invalid start character")) !throw (fun () -> start_next_attribute c) | None -> report (get_location ()) (`Unexpected_eoi "tag") !throw data_state | Some (_, c) -> start_next_attribute (to_lowercase c) end (* 8.2.4.37. *) and before_attribute_value_state l' tag name = let start_value state maybe_c = let value_buffer = Buffer.create 32 in begin match maybe_c with | None -> () | Some c -> add_utf_8 value_buffer c end; state l' tag name value_buffer in next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> before_attribute_value_state l' tag name | Some (_, (0x0022 | 0x0027 as c)) -> start_value (attribute_value_quoted_state c) None | Some (_, 0x0026 as v) -> push input v; start_value attribute_value_unquoted_state None | Some (l, 0) -> report l (`Bad_token ("U+0000", "attribute value", "null")) !throw (fun () -> start_value attribute_value_unquoted_state (Some u_rep)) | Some (l, 0x003E) -> report l (`Bad_token (">", "tag", "expected attribute value after '='")) !throw (fun () -> tag.attributes <- (name, "")::tag.attributes; emit_tag l' tag) | Some (l, (0x003C | 0x003D | 0x0060 as c)) -> report l (`Bad_token (char c, "attribute value", "invalid start character")) !throw (fun () -> start_value attribute_value_unquoted_state (Some c)) | None -> report (get_location ()) (`Unexpected_eoi "tag") !throw data_state | Some (_, c) -> start_value attribute_value_unquoted_state (Some c) end (* 8.2.4.38 and 8.2.4.39. *) and attribute_value_quoted_state quote l' tag name value_buffer = next_option input !throw begin function | Some (_, c) when c = quote -> tag.attributes <- (name, Buffer.contents value_buffer)::tag.attributes; after_attribute_value_quoted_state l' tag | Some (l, 0x0026) -> character_reference_in_attribute quote l value_buffer (fun () -> attribute_value_quoted_state quote l' tag name value_buffer) | Some (l, 0) -> report l (`Bad_token ("U+0000", "attribute value", "null")) !throw (fun () -> add_utf_8 value_buffer u_rep; attribute_value_quoted_state quote l' tag name value_buffer) | None -> report (get_location ()) (`Unexpected_eoi "attribute value") !throw data_state | Some (_, c) -> add_utf_8 value_buffer c; attribute_value_quoted_state quote l' tag name value_buffer end (* 8.2.4.40. *) and attribute_value_unquoted_state l' tag name value_buffer = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> tag.attributes <- (name, Buffer.contents value_buffer)::tag.attributes; before_attribute_name_state l' tag | Some (l, 0x0026) -> character_reference_in_attribute 0x003E l value_buffer (fun () -> attribute_value_unquoted_state l' tag name value_buffer) | Some (_, 0x003E) -> tag.attributes <- (name, Buffer.contents value_buffer)::tag.attributes; emit_tag l' tag | Some (l, 0) -> report l (`Bad_token ("U+0000", "attribute value", "null")) !throw (fun () -> add_utf_8 value_buffer u_rep; attribute_value_unquoted_state l' tag name value_buffer) | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D | 0x0060 as c)) -> report l (`Bad_token (char c, "attribute value", "invalid character")) !throw (fun () -> add_utf_8 value_buffer c; attribute_value_unquoted_state l' tag name value_buffer) | None -> report (get_location ()) (`Unexpected_eoi "tag") !throw data_state | Some (_, c) -> add_utf_8 value_buffer c; attribute_value_unquoted_state l' tag name value_buffer end (* 8.2.4.41. *) and character_reference_in_attribute allowed l value_buffer k = consume_character_reference true (Some allowed) l begin function | None -> add_utf_8 value_buffer 0x0026; k () | Some (`One c) -> add_utf_8 value_buffer c; k () | Some (`Two (c, c')) -> add_utf_8 value_buffer c; add_utf_8 value_buffer c'; k () end (* 8.2.4.42. *) and after_attribute_value_quoted_state l' tag = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> before_attribute_name_state l' tag | Some (_, 0x002F) -> self_closing_start_tag_state l' tag | Some (_, 0x003E) -> emit_tag l' tag | None -> report (get_location ()) (`Unexpected_eoi "tag") !throw data_state | Some (l, c as v) -> push input v; report l (`Bad_token (char c, "tag", "expected whitespace before attribute")) !throw (fun () -> before_attribute_name_state l' tag) end (* 8.2.4.43. *) and self_closing_start_tag_state l' tag = next_option input !throw begin function | Some (_, 0x003E) -> tag.self_closing <- true; emit_tag l' tag | None -> report (get_location ()) (`Unexpected_eoi "tag") !throw data_state | Some (l, c as v) -> push input v; report l (`Bad_token (char c, "tag", "expected '/>'")) !throw (fun () -> before_attribute_name_state l' tag) end (* 8.2.4.44. *) and bogus_comment_state l' = let buffer = Buffer.create 256 in let rec consume () = next_option input !throw begin function | Some (_, 0x003E) -> emit_comment l' buffer | Some (_, 0) -> add_utf_8 buffer u_rep; consume () | None -> emit_comment l' buffer | Some (_, c) -> add_utf_8 buffer c; consume () end in consume () (* 8.2.4.45. *) and markup_declaration_open_state l' = peek_n 2 input !throw begin function | [_, 0x002D; _, 0x002D] -> next_n 2 input !throw (fun _ -> comment_start_state l' (Buffer.create 64)) | _ -> peek_n 7 input !throw begin fun l -> match sequence_to_lowercase l with | [_, 0x64; _, 0x6F; _, 0x63; _, 0x74; _, 0x79; _, 0x70; _, 0x65] -> next_n 7 input !throw (fun _ -> doctype_state l') | _ -> peek_n 7 input !throw (function | [_, 0x5B; _, 0x43; _, 0x44; _, 0x41; _, 0x54; _, 0x41; _, 0x5B] -> if !foreign () then next_n 7 input !throw (fun _ -> cdata_section_state ()) else report l' (`Bad_token (" bogus_comment_state l') | _ -> report l' (`Bad_token (" bogus_comment_state l')) end end (* 8.2.4.46. *) and comment_start_state l' buffer = next_option input !throw begin function | Some (_, 0x002D) -> comment_start_dash_state l' buffer | Some (l, 0) -> report l (`Bad_token ("U+0000", "comment", "null")) !throw (fun () -> add_utf_8 buffer u_rep; comment_state l' buffer) | Some (_, 0x003E) -> report l' (`Bad_token ("", "comment", "'-->' overlaps '", "comment", "'-->' overlaps ''")) !throw (fun () -> comment_end_bang_state l' buffer) | Some (l, 0x002D) -> report l (`Bad_token ("---", "comment", "'--' should be in '-->'")) !throw (fun () -> Buffer.add_char buffer '-'; comment_end_state l' buffer) | None -> report (get_location ()) (`Unexpected_eoi "comment") !throw (fun () -> emit_comment l' buffer) | Some (l, c) -> report l (`Bad_token ("--" ^ (char c), "comment", "'--' should be in '-->'")) !throw (fun () -> Buffer.add_string buffer "--"; add_utf_8 buffer c; comment_state l' buffer) end (* 8.2.4.51. *) and comment_end_bang_state l' buffer = next_option input !throw begin function | Some (_, 0x002D) -> Buffer.add_string buffer "--!"; comment_end_dash_state l' buffer | Some (_, 0x003E) -> emit_comment l' buffer | Some (l, 0) -> report l (`Bad_token ("U+0000", "comment", "null")) !throw (fun () -> Buffer.add_string buffer "--!"; add_utf_8 buffer u_rep; comment_state l' buffer) | None -> report (get_location ()) (`Unexpected_eoi "comment") !throw (fun () -> emit_comment l' buffer) | Some (_, c) -> Buffer.add_string buffer "--!"; add_utf_8 buffer c; comment_state l' buffer end (* 8.2.5.52. *) and doctype_state l' = let doctype = {doctype_name = None; public_identifier = None; system_identifier = None; force_quirks = false} in next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> before_doctype_name_state l' doctype | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (l, c as v) -> report l (`Bad_token (char c, "doctype", "expected whitespace")) !throw (fun () -> push input v; before_doctype_name_state l' doctype) end (* 8.2.5.53. *) and before_doctype_name_state l' doctype = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> before_doctype_name_state l' doctype | Some (l, 0) -> report l (`Bad_token ("U+0000", "doctype", "null")) !throw (fun () -> doctype.doctype_name <- add_doctype_char doctype.doctype_name u_rep; doctype_name_state l' doctype) | Some (l, 0x003E) -> report l (`Bad_token (">", "doctype", "expected name")) !throw (fun () -> emit_doctype ~quirks:true l' doctype) | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (_, c) -> doctype.doctype_name <- add_doctype_char doctype.doctype_name (to_lowercase c); doctype_name_state l' doctype end (* 8.2.5.54. *) and doctype_name_state l' doctype = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> after_doctype_name_state l' doctype | Some (_, 0x003E) -> emit_doctype l' doctype | Some (l, 0) -> report l (`Bad_token ("U+0000", "doctype", "null")) !throw (fun () -> doctype.doctype_name <- add_doctype_char doctype.doctype_name u_rep; doctype_name_state l' doctype) | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (_, c) -> doctype.doctype_name <- add_doctype_char doctype.doctype_name (to_lowercase c); doctype_name_state l' doctype end (* 8.2.4.55. *) and after_doctype_name_state l' doctype = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> after_doctype_name_state l' doctype | Some (_, 0x003E) -> emit_doctype l' doctype | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (l'', c as v) -> push input v; next_n 6 input !throw begin fun l -> match sequence_to_lowercase l with | [_, 0x70; _, 0x75; _, 0x62; _, 0x6C; _, 0x69; _, 0x63] -> after_doctype_public_keyword_state l' doctype | [_, 0x73; _, 0x79; _, 0x73; _, 0x74; _, 0x65; _, 0x6D] -> after_doctype_system_keyword_state l' doctype | vs -> push_list input vs; report l'' (`Bad_token (char c, "doctype", "expected 'PUBLIC' or 'SYSTEM'")) !throw (fun () -> doctype.force_quirks <- true; bogus_doctype_state l' doctype) end end (* Helper. *) and begin_public_identifier quote l' doctype = doctype.Doctype_buffers.public_identifier <- Some (Buffer.create 32); doctype_identifier_quoted_state (fun doctype c -> doctype.Doctype_buffers.public_identifier <- add_doctype_char doctype.Doctype_buffers.public_identifier c) quote after_doctype_public_identifier_state l' doctype (* Helper. *) and begin_system_identifier quote l' doctype = doctype.Doctype_buffers.system_identifier <- Some (Buffer.create 32); doctype_identifier_quoted_state (fun doctype c -> doctype.system_identifier <- add_doctype_char doctype.system_identifier c) quote after_doctype_system_identifier_state l' doctype (* 8.2.4.56. *) and after_doctype_public_keyword_state l' doctype = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> before_doctype_public_identifier_state l' doctype | Some (l, (0x0022 | 0x0027 as c)) -> report l (`Bad_token (char c, "doctype", "expected whitespace")) !throw (fun () -> begin_public_identifier c l' doctype) | Some (l, 0x003E) -> report l (`Bad_token (">", "doctype", "expected public identifier")) !throw (fun () -> emit_doctype ~quirks:true l' doctype) | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (l, c) -> report l (`Bad_token (char c, "doctype", "expected whitespace")) !throw (fun () -> doctype.force_quirks <- true; bogus_doctype_state l' doctype) end (* 8.2.4.57. *) and before_doctype_public_identifier_state l' doctype = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> before_doctype_public_identifier_state l' doctype | Some (_, (0x0022 | 0x0027 as c)) -> begin_public_identifier c l' doctype | Some (l, 0x003E) -> report l (`Bad_token (">", "doctype", "expected public identifier")) !throw (fun () -> emit_doctype ~quirks:true l' doctype) | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (l, c) -> report l (`Bad_token (char c, "doctype", "public identifier must be quoted")) !throw (fun () -> doctype.force_quirks <- true; bogus_doctype_state l' doctype) end (* 8.2.4.58, 8.2.4.59, 8.2.4.64, 8.2.4.65. *) and doctype_identifier_quoted_state add quote next_state l' doctype = next_option input !throw begin function | Some (_, c) when c = quote -> next_state l' doctype | Some (l, 0) -> report l (`Bad_token ("U+0000", "doctype", "null")) !throw (fun () -> add doctype u_rep; doctype_identifier_quoted_state add quote next_state l' doctype) | Some (l, 0x003E) -> report l (`Bad_token (">", "doctype", "'>' in identifier")) !throw (fun () -> emit_doctype ~quirks:true l' doctype) | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (_, c) -> add doctype c; doctype_identifier_quoted_state add quote next_state l' doctype end (* 8.2.4.60. *) and after_doctype_public_identifier_state l' doctype = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> between_doctype_public_and_system_identifiers l' doctype | Some (_, 0x003E) -> emit_doctype l' doctype | Some (l, (0x0022 | 0x0027 as c)) -> report l (`Bad_token (char c, "doctype", "expected whitespace")) !throw (fun () -> begin_system_identifier c l' doctype) | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (l, c) -> report l (`Bad_token (char c, "doctype", "system identifier must be quoted")) !throw (fun () -> doctype.force_quirks <- true; bogus_doctype_state l' doctype) end (* 8.2.4.61. *) and between_doctype_public_and_system_identifiers l' doctype = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> between_doctype_public_and_system_identifiers l' doctype | Some (_, 0x003E) -> emit_doctype l' doctype | Some (_, (0x0022 | 0x0027 as c)) -> begin_system_identifier c l' doctype | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (l, c) -> report l (`Bad_token (char c, "doctype", "system identifier must be quoted")) !throw (fun () -> doctype.force_quirks <- true; bogus_doctype_state l' doctype) end (* 8.2.4.62. *) and after_doctype_system_keyword_state l' doctype = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> before_doctype_system_identifier_state l' doctype | Some (l, (0x0022 | 0x0027 as c)) -> report l (`Bad_token (char c, "doctype", "expected whitespace")) !throw (fun () -> begin_system_identifier c l' doctype) | Some (l, 0x003E) -> report l (`Bad_token (">", "doctype", "expected system identifier")) !throw (fun () -> emit_doctype ~quirks:true l' doctype) | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (l, c) -> report l (`Bad_token (char c, "doctype", "expected whitespace")) !throw (fun () -> doctype.force_quirks <- true; bogus_doctype_state l' doctype) end (* 8.2.4.63. *) and before_doctype_system_identifier_state l' doctype = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> before_doctype_system_identifier_state l' doctype | Some (_, (0x0022 | 0x0027 as c)) -> begin_system_identifier c l' doctype | Some (l, 0x003E) -> report l (`Bad_token (">", "doctype", "expected system identifier")) !throw (fun () -> emit_doctype ~quirks:true l' doctype) | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (l, c) -> report l (`Bad_token (char c, "doctype", "system identifier must be quoted")) !throw (fun () -> doctype.force_quirks <- true; bogus_doctype_state l' doctype) end (* 8.2.4.66. *) and after_doctype_system_identifier_state l' doctype = next_option input !throw begin function | Some (_, (0x0009 | 0x000A | 0x000C | 0x0020)) -> after_doctype_system_identifier_state l' doctype | Some (_, 0x003E) -> emit_doctype l' doctype | None -> report (get_location ()) (`Unexpected_eoi "doctype") !throw (fun () -> emit_doctype ~quirks:true l' doctype) | Some (l, c) -> report l (`Bad_token (char c, "doctype", "junk after system identifier")) !throw (fun () -> bogus_doctype_state l' doctype) end (* 8.2.4.67. *) and bogus_doctype_state l' doctype = next_option input !throw begin function | Some (_, 0x003E) -> emit_doctype l' doctype | None -> emit_doctype l' doctype | _ -> bogus_doctype_state l' doctype end (* 8.2.4.68. *) and cdata_section_state () = next_option input !throw begin function | None -> data_state () | Some (l, 0x005D) -> peek_n 2 input !throw begin function | [_, 0x005D; _, 0x003E] -> next_n 2 input !throw (fun _ -> data_state ()) | _ -> emit (l, `Char 0x005D) cdata_section_state end | Some (l, c) -> emit (l, `Char c) cdata_section_state end in let stream = (fun throw_ e k -> throw := throw_; ended := e; output := k; !current_state ()) |> make in let set_state = function | `Data -> current_state := data_state | `RCDATA -> current_state := rcdata_state | `RAWTEXT -> current_state := rawtext_state | `Script_data -> current_state := script_data_state | `PLAINTEXT -> current_state := plaintext_state in let set_foreign = (:=) foreign in stream, set_state, set_foreign markup.ml-1.0.3/src/html_tokenizer.mli000066400000000000000000000010511421357706400177570ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common type token = [ `Doctype of doctype | `Start of Token_tag.t | `End of Token_tag.t | `Char of int | `Comment of string | `EOF ] type state = [ `Data | `RCDATA | `RAWTEXT | `Script_data | `PLAINTEXT ] val tokenize : Error.parse_handler -> (location * int) Kstream.t * (unit -> location) -> (location * token) Kstream.t * (state -> unit) * ((unit -> bool) -> unit) markup.ml-1.0.3/src/html_writer.ml000066400000000000000000000123461421357706400171210ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common let escape_attribute s = let buffer = Buffer.create (String.length s) in Uutf.String.fold_utf_8 (fun () _ -> function | `Malformed _ -> () | `Uchar c -> let c = Uchar.to_int c in match c with | 0x0026 -> Buffer.add_string buffer "&" | 0x00A0 -> Buffer.add_string buffer " " | 0x0022 -> Buffer.add_string buffer """ | _ -> add_utf_8 buffer c) () s; Buffer.contents buffer let escape_text s = let buffer = Buffer.create (String.length s) in Uutf.String.fold_utf_8 (fun () _ -> function | `Malformed _ -> () | `Uchar c -> let c = Uchar.to_int c in match c with | 0x0026 -> Buffer.add_string buffer "&" | 0x00A0 -> Buffer.add_string buffer " " | 0x003C -> Buffer.add_string buffer "<" | 0x003E -> Buffer.add_string buffer ">" | _ -> add_utf_8 buffer c) () s; Buffer.contents buffer let void_elements = ["area"; "base"; "basefont"; "bgsound"; "br"; "col"; "embed"; "frame"; "hr"; "img"; "input"; "keygen"; "link"; "meta"; "param"; "source"; "track"; "wbr"] let prepend_newline_for = ["pre"; "textarea"; "listing"] let rec starts_with_newline = function | [] -> false | s::more -> if String.length s = 0 then starts_with_newline more else s.[0] = '\x0A' open Kstream let literal_text_elements = ["style"; "script"; "xmp"; "iframe"; "noembed"; "noframes"; "plaintext"] let write ?(escape_attribute=escape_attribute) ?(escape_text=escape_text) signals = let open_elements = ref [] in let in_literal_text_element () = match !open_elements with | element :: _ -> List.mem element literal_text_elements | _ -> false in let rec queue = ref next_signal and emit_list l throw e k = match l with | [] -> next_signal throw e k | s::more -> queue := emit_list more; k s and next_signal throw e k = next signals throw e begin function | `Start_element ((ns, name') as name, attributes) -> let tag_name = match name with | ns, local_name when list_mem_string ns [html_ns; svg_ns; mathml_ns] -> local_name | ns, local_name when ns = xml_ns -> "xml:" ^ local_name | ns, local_name when ns = xmlns_ns -> "xmlns:" ^ local_name | ns, local_name when ns = xlink_ns -> "xlink:" ^ local_name | _, local_name -> (* An error. *) local_name in let attributes = attributes |> List.map (fun ((ns, local_name) as name, value) -> let name = match name with | "", _ -> local_name | _ when ns = xml_ns -> "xml:" ^ local_name | _, "xmlns" when ns = xmlns_ns -> "xmlns" | _ when ns = xmlns_ns -> "xmlns:" ^ local_name | _ when ns = xlink_ns -> "xlink:" ^ local_name | _ -> (* An error. *) local_name in name, value) in let rec prepend_attributes words = function | [] -> words | (name, value)::more -> prepend_attributes (" "::name::"=\""::(escape_attribute value)::"\""::words) more in let tag = "<"::tag_name::(prepend_attributes [">"] (List.rev attributes)) in let is_void = ns = html_ns && list_mem_string name' void_elements in if is_void then peek signals throw (fun () -> emit_list tag throw e k) (function | `End_element -> next_option signals throw (fun _ -> emit_list tag throw e k) | `Start_element _ | `Text _ | `Comment _ | `PI _ | `Xml _ | `Doctype _ -> open_elements := tag_name::!open_elements; emit_list tag throw e k) else begin open_elements := tag_name::!open_elements; if ns = html_ns && list_mem_string name' prepend_newline_for then peek_option signals throw (function | Some (`Text ss) when starts_with_newline ss -> emit_list (tag @ ["\n"]) throw e k | Some (`Text _ | `Start_element _ | `End_element | `Comment _ | `PI _ | `Doctype _ | `Xml _) | None -> emit_list tag throw e k) else emit_list tag throw e k end | `End_element -> begin match !open_elements with | [] -> next_signal throw e k | name::rest -> open_elements := rest; emit_list [""] throw e k end | `Text ss -> if List.for_all (fun s -> String.length s = 0) ss then next_signal throw e k else if in_literal_text_element () then emit_list ss throw e k else emit_list (List.map escape_text ss) throw e k | `Comment s -> emit_list [""] throw e k | `PI (target, s) -> emit_list [""] throw e k | `Doctype _ as doctype -> emit_list [signal_to_string doctype] throw e k | `Xml _ -> next_signal throw e k end in (fun throw e k -> !queue throw e k) |> make markup.ml-1.0.3/src/html_writer.mli000066400000000000000000000004511421357706400172640ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common val write : ?escape_attribute:(string -> string) -> ?escape_text:(string -> string) -> [< signal ] Kstream.t -> string Kstream.t markup.ml-1.0.3/src/input.ml000066400000000000000000000024001421357706400157060ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common open Kstream let preprocess is_valid_char report source = let first_char = ref true in let line = ref 1 in let column = ref 1 in let get_location () = !line, !column in let stream = (fun throw empty k -> let newline () = let location = !line, !column in line := !line + 1; column := 1; k (location, 0x0A) in let symbol c = let location = !line, !column in column := !column + 1; k (location, c) in let rec iterate () = next source throw empty (function | 0xFEFF when !first_char -> first_char := false; iterate () | 0x0D -> next source throw newline (function | 0x0A -> newline () | c -> push source c; newline ()) | 0x0A -> newline () | c when not (is_valid_char c) -> report (!line, !column) (`Bad_token (format_char c, "input", "out of range")) throw (fun () -> symbol c) | c -> symbol c) in iterate ()) |> make in stream, get_location markup.ml-1.0.3/src/input.mli000066400000000000000000000004421421357706400160630ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common val preprocess : (int -> bool) -> Error.parse_handler -> int Kstream.t -> (location * int) Kstream.t * (unit -> location) markup.ml-1.0.3/src/kstream.ml000066400000000000000000000064041421357706400162250ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common type 'a t = {mutable f : exn cont -> unit cont -> 'a cont -> unit} let make f = {f} let construct c = let s = ref None in (fun throw e k -> match !s with | None -> c throw (fun s' -> s := Some s'; s'.f throw e k) | Some s' -> s'.f throw e k) |> make let empty () = (fun _ e _ -> e ()) |> make let next {f} throw e k = f throw e k let next_option {f} throw k = f throw (fun () -> k None) (fun v -> k (Some v)) let next_expected {f} throw k = f throw (fun () -> throw (Failure "stream empty")) k let next_n n s throw k = if n < 0 then throw (Invalid_argument "n is negative") else let rec iterate acc = function | 0 -> k (List.rev acc) | n -> next s throw (fun () -> iterate acc 0) (fun v -> iterate (v::acc) (n - 1)) in iterate [] n let push ({f} as s) v = s.f <- fun _ _ k -> s.f <- f; k v let push_option s = function | None -> () | Some v -> push s v let push_list ({f} as s) = function | [] -> () | vs -> let remainder = ref vs in s.f <- fun throw e k -> match !remainder with | [] -> s.f <- f; f throw e k | v::vs -> remainder := vs; k v let peek s throw e k = next s throw e (fun v -> push s v; k v) let peek_option s throw k = peek s throw (fun () -> k None) (fun v -> k (Some v)) let peek_expected s throw k = peek s throw (fun () -> throw (Failure "stream empty")) k let peek_n n s throw k = next_n n s throw (fun vs -> push_list s vs; k vs) let tap g ({f} as s) = (s.f <- fun throw e k -> f throw e (fun v -> g v; k v)); fun () -> s.f <- f let checkpoint s = let buffer = ref [] in let s' = (fun throw e k -> s.f throw e (fun v -> buffer := v::!buffer; k v)) |> make in let restore () = push_list s (List.rev !buffer) in s', restore let transform f init s = let current_acc = ref (Some init) in let to_emit = ref [] in let rec operate throw e k = match !to_emit with | v::more -> to_emit := more; k v | [] -> match !current_acc with | None -> e () | Some acc -> next s throw e (fun v -> f acc v throw (fun (vs, acc') -> to_emit := vs; current_acc := acc'; operate throw e k)) in make operate let map f s = (fun throw e k -> next s throw e (fun v -> f v throw k)) |> make let rec fold f v s throw k = next s throw (fun () -> k v) (fun v' -> f v v' throw (fun v'' -> fold f v'' s throw k)) let iter f s throw k = fold (fun () v throw k -> f v throw k) () s throw k let filter_map f s = let rec emit throw e k = next s throw e (fun v -> f v throw (function | None -> emit throw e k | Some v -> k v)) in make emit let filter f s = s |> filter_map (fun v throw k -> f v throw (function | true -> k (Some v) | false -> k None)) let of_list l = let l = ref l in (fun _ e k -> match !l with | [] -> e () | v::l' -> l := l'; k v) |> make let to_list s throw k = fold (fun l v _ k -> k (v::l)) [] s throw (fun l -> k (List.rev l)) let enumerate s = let index = ref 0 in s |> map (fun v _ k -> index := !index + 1; k ((!index - 1), v)) markup.ml-1.0.3/src/kstream.mli000066400000000000000000000036521421357706400164000ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) (* CPS-friendly streams with elimination form next : 'a t -> (exn -> unit) -> (unit -> unit) -> ('a -> unit) -> unit where next stream throw e k calls e () if the stream is empty, f v if the next value in the stream is v, and throw exn if retrieving the next value raises exception exn. The *_expected and *_n functions can pass Failure and Invalid_argument, respectively, to their exception continuations. Occurence of these exceptions indicates programming errors, since the functions are not part of the interface of Markup.ml, and the internal code should be calling them only when it is statically provable that the functions will succeed. *) open Common type 'a t val make : (exn cont -> unit cont -> 'a cont -> unit) -> 'a t val construct : 'a t cps -> 'a t val empty : unit -> 'a t val next : 'a t -> exn cont -> unit cont -> 'a cont -> unit val next_option : 'a t -> 'a option cps val next_expected : 'a t -> 'a cps val next_n : int -> 'a t -> 'a list cps val push : 'a t -> 'a -> unit val push_option : 'a t -> 'a option -> unit val push_list : 'a t -> 'a list -> unit val peek : 'a t -> exn cont -> unit cont -> 'a cont -> unit val peek_option : 'a t -> 'a option cps val peek_expected : 'a t -> 'a cps val peek_n : int -> 'a t -> 'a list cps val tap : ('a -> unit) -> 'a t -> (unit -> unit) val checkpoint : 'a t -> 'a t * (unit -> unit) val transform : ('a -> 'b -> ('c list * 'a option) cps) -> 'a -> 'b t -> 'c t val map : ('a -> 'b cps) -> 'a t -> 'b t val fold : ('a -> 'b -> 'a cps) -> 'a -> 'b t -> 'a cps val iter : ('a -> unit cps) -> 'a t -> unit cps val filter_map : ('a -> 'b option cps) -> 'a t -> 'b t val filter : ('a -> bool cps) -> 'a t -> 'a t val of_list : 'a list -> 'a t val to_list : 'a t -> 'a list cps val enumerate : 'a t -> (int * 'a) t markup.ml-1.0.3/src/lwt/000077500000000000000000000000001421357706400150275ustar00rootroot00000000000000markup.ml-1.0.3/src/lwt/dune000066400000000000000000000002371421357706400157070ustar00rootroot00000000000000(library (name markup_lwt) (public_name markup-lwt) (synopsis "Lwt support for Markup.ml") (instrumentation (backend bisect_ppx)) (libraries lwt markup)) markup.ml-1.0.3/src/lwt/markup_lwt.ml000066400000000000000000000014121421357706400175440ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) let ensure_tail_calls ?hook:_hook = ignore let to_cps thread = fun throw k -> let thread = thread () in match Lwt.state thread with | Lwt.Return x -> k x | Lwt.Fail e -> throw e | Lwt.Sleep -> Lwt.on_any thread k throw module Adapter = struct type 'a t = 'a Lwt.t let return = Lwt.return let of_cps f = let thread, wake = Lwt.wait () in f (Lwt.wakeup_later_exn wake) (Lwt.wakeup_later wake); thread let to_cps = to_cps end include Markup.Asynchronous (Adapter) let lwt_stream s = (fun () -> Lwt_stream.get s) |> stream let to_lwt_stream s = (fun () -> next s) |> Lwt_stream.from markup.ml-1.0.3/src/lwt/markup_lwt.mli000066400000000000000000000024071421357706400177220ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) (** Lwt interface to Markup.ml. The majority of the functions in this interface are listed in the signature {!Markup.ASYNCHRONOUS}, and are not directly included on this page. There are also additional Lwt functions in module {!Markup_lwt_unix}. Those are based on [Lwt_io], and have been separated to make this module [Markup_lwt] usable on [js_of_ocaml], which does not support [Lwt_io]. This module is available if Markup.ml is installed when Lwt is installed, i.e. {[ opam install lwt markup ]} To link with this module, depend on the findlib package [markup.lwt] instead of package [markup]. *) open Markup include Markup.ASYNCHRONOUS with type 'a io := 'a Lwt.t val lwt_stream : 'a Lwt_stream.t -> ('a, async) stream (** Adapts an Lwt stream to a Markup.ml stream. *) val to_lwt_stream : ('a, _) stream -> 'a Lwt_stream.t (** Adapts a Markup.ml stream to an Lwt stream. *) val ensure_tail_calls : ?hook:((exn -> unit) ref) -> unit -> unit (** @deprecated Not necessary since Markup.ml 0.7.4. *) (**/**) val to_cps : (unit -> 'a Lwt.t) -> (exn -> unit) -> ('a -> unit) -> unit (**/**) markup.ml-1.0.3/src/lwt_unix/000077500000000000000000000000001421357706400160725ustar00rootroot00000000000000markup.ml-1.0.3/src/lwt_unix/dune000066400000000000000000000003021421357706400167430ustar00rootroot00000000000000(library (name markup_lwt_unix) (public_name markup-lwt.unix) (synopsis "Lwt_unix helpers for Markup.ml") (instrumentation (backend bisect_ppx)) (libraries lwt lwt.unix markup markup-lwt)) markup.ml-1.0.3/src/lwt_unix/markup_lwt_unix.ml000066400000000000000000000031111421357706400216500ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) module Kstream = Markup__Kstream (* Lwt.Infix not available for Lwt 2.4.6 (Ocaml 4.00). *) let (>>=) = Lwt.(>>=) let channel c = let ended = ref false in (fun () -> if !ended then Lwt.return_none else Lwt_io.read_char_opt c >>= function | Some _ as v -> Lwt.return v | None -> ended := true; Lwt.return_none) |> Markup_lwt.stream let file = let open_file name = (fun () -> Lwt_io.open_file ~mode:Lwt_io.input name) |> Markup_lwt.to_cps in let close c k = ((fun () -> Lwt_io.close c) |> Markup_lwt.to_cps) (fun _ -> k ()) (fun _ -> k ()) in fun name -> let closed = ref false in let close_fn = ref (fun () -> closed := true; Lwt.return_unit) in let constructor throw k = open_file name throw (fun c -> if !closed then throw (Lwt_io.Channel_closed "input") else begin close_fn := (fun () -> Lwt_io.close c); let s = channel c |> Markup.kstream in (fun throw e k -> Kstream.next s (fun exn -> close c (fun () -> throw exn)) (fun () -> close c e) k) |> Kstream.make |> k end) in let s = Kstream.construct constructor |> Markup.of_kstream in let close () = !close_fn () in s, close let to_channel c s = s |> Markup_lwt.iter (Lwt_io.write_char c) let to_file name s = Lwt_io.with_file ~mode:Lwt_io.output name (fun c -> to_channel c s) markup.ml-1.0.3/src/lwt_unix/markup_lwt_unix.mli000066400000000000000000000030371421357706400220300ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) (** Stream functions based on [Lwt_io]. This module contains additional functions over {!Markup_lwt}. [Markup_lwt_unix] is available if Markup.ml is installed when Lwt is installed, i.e. {[ opam install lwt markup ]} To link with this module, depend on the findlib package [markup.lwt.unix] instead of [markup] or [markup.lwt]. *) open Markup val channel : Lwt_io.input Lwt_io.channel -> (char, async) stream (** Evaluates to a stream that retrieves successive bytes from the given channel. If the channel cannot be read, the next read of the stream results in the thread failing with an exception, as specified in [Lwt_io]. *) val file : string -> (char, async) stream * (unit -> unit Lwt.t) (** Evaluates to a pair [s, close], where reading from stream [s] retrieves successive bytes from the given file, and completing [close ()] closes the file. If the file cannot be opened, the first read of the stream results in failure with an exception, as specified in [Lwt_io]. If the file cannot be read, reading the stream results in the reading thread failing with an exception, also as in [Lwt_io]. *) val to_channel : Lwt_io.output Lwt_io.channel -> (char, _) stream -> unit Lwt.t (** Writes bytes from the given stream to the given channel. *) val to_file : string -> (char, _) stream -> unit Lwt.t (** Writes bytes from the given stream to the given file. *) markup.ml-1.0.3/src/markup.ml000066400000000000000000000205361421357706400160600ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) module type IO = sig type 'a t val return : 'a -> 'a t val of_cps : ((exn -> unit) -> ('a -> unit) -> unit) -> 'a t val to_cps : (unit -> 'a t) -> ((exn -> unit) -> ('a -> unit) -> unit) end module Synchronous : IO with type 'a t = 'a = struct type 'a t = 'a exception Not_synchronous let return x = x let of_cps f = let result = ref None in f raise (fun v -> result := Some v); match !result with | None -> raise Not_synchronous | Some v -> v let to_cps f = fun throw k -> match f () with | v -> k v | exception exn -> throw exn end type async = unit type sync = unit type ('data, 'sync) stream = 'data Kstream.t let kstream s = s let of_kstream s = s let of_list = Kstream.of_list type location = Common.location let compare_locations = Common.compare_locations module Error = Error type name = Common.name type xml_declaration = Common.xml_declaration = {version : string; encoding : string option; standalone : bool option} type doctype = Common.doctype = {doctype_name : string option; public_identifier : string option; system_identifier : string option; raw_text : string option; force_quirks : bool} type signal = Common.signal let signal_to_string = Common.signal_to_string type 's parser = {mutable location : location; mutable signals : (signal, 's) stream} let signals parser = parser.signals let location parser = parser.location let stream_to_parser s = let parser = {location = (1, 1); signals = Kstream.empty ()} in parser.signals <- s |> Kstream.map (fun (l, v) _ k -> parser.location <- l; k v); parser module Cps = struct let parse_xml report ?encoding namespace entity context source = let with_encoding (encoding : Encoding.t) k = source |> encoding ~report |> Input.preprocess Common.is_valid_xml_char report |> Xml_tokenizer.tokenize report entity |> Xml_parser.parse context namespace report |> k in let constructor throw k = match encoding with | Some encoding -> with_encoding encoding k | None -> Detect.select_xml source throw (fun encoding -> with_encoding encoding k) in Kstream.construct constructor |> stream_to_parser let write_xml report prefix signals = signals |> Xml_writer.write report prefix |> Utility.strings_to_bytes let parse_html report ?encoding context source = let with_encoding (encoding : Encoding.t) k = source |> encoding ~report |> Input.preprocess Common.is_valid_html_char report |> Html_tokenizer.tokenize report |> Html_parser.parse context report |> k in let constructor throw k = match encoding with | Some encoding -> with_encoding encoding k | None -> Detect.select_html source throw (fun encoding -> with_encoding encoding k) in Kstream.construct constructor |> stream_to_parser let write_html ?escape_attribute ?escape_text signals = signals |> Html_writer.write ?escape_attribute ?escape_text |> Utility.strings_to_bytes end let string = Stream_io.string let buffer = Stream_io.buffer let channel = Stream_io.channel let file = Stream_io.file let to_channel c bytes = Stream_io.to_channel c bytes |> Synchronous.of_cps let to_file f bytes = Stream_io.to_file f bytes |> Synchronous.of_cps let preprocess_input_stream source = Input.preprocess (fun _ -> true) Error.ignore_errors source include Utility module Ns = struct let html = Common.html_ns let svg = Common.svg_ns let mathml = Common.mathml_ns let xml = Common.xml_ns let xmlns = Common.xmlns_ns let xlink = Common.xlink_ns end module type ASYNCHRONOUS = sig type 'a io module Encoding : sig type t = Encoding.t val decode : ?report:(location -> Error.t -> unit io) -> t -> (char, _) stream -> (int, async) stream end val parse_xml : ?report:(location -> Error.t -> unit io) -> ?encoding:Encoding.t -> ?namespace:(string -> string option) -> ?entity:(string -> string option) -> ?context:[< `Document | `Fragment ] -> (char, _) stream -> async parser val write_xml : ?report:((signal * int) -> Error.t -> unit io) -> ?prefix:(string -> string option) -> ([< signal ], _) stream -> (char, async) stream val parse_html : ?report:(location -> Error.t -> unit io) -> ?encoding:Encoding.t -> ?context:[< `Document | `Fragment of string ] -> (char, _) stream -> async parser val write_html : ?escape_attribute:(string -> string) -> ?escape_text:(string -> string) -> ([< signal ], _) stream -> (char, async) stream val fn : (unit -> char option io) -> (char, async) stream val to_string : (char, _) stream -> string io val to_buffer : (char, _) stream -> Buffer.t io val stream : (unit -> 'a option io) -> ('a, async) stream val next : ('a, _) stream -> 'a option io val peek : ('a, _) stream -> 'a option io val transform : ('a -> 'b -> ('c list * 'a option) io) -> 'a -> ('b, _) stream -> ('c, async) stream val fold : ('a -> 'b -> 'a io) -> 'a -> ('b, _) stream -> 'a io val map : ('a -> 'b io) -> ('a, _) stream -> ('b, async) stream val filter : ('a -> bool io) -> ('a, _) stream -> ('a, async) stream val filter_map : ('a -> 'b option io) -> ('a, _) stream -> ('b, async) stream val iter : ('a -> unit io) -> ('a, _) stream -> unit io val drain : ('a, _) stream -> unit io val to_list : ('a, _) stream -> 'a list io val load : ('a, _) stream -> ('a, sync) stream io val tree : ?text:(string list -> 'a) -> ?element:(name -> (name * string) list -> 'a list -> 'a) -> ?comment:(string -> 'a) -> ?pi:(string -> string -> 'a) -> ?xml:(xml_declaration -> 'a) -> ?doctype:(doctype -> 'a) -> ([< signal ], _) stream -> 'a option io end module Asynchronous (IO : IO) = struct let wrap_report report = fun l e -> IO.to_cps (fun () -> report l e) module Encoding = struct include Encoding let decode ?(report = fun _ _ -> IO.return ()) (f : Encoding.t) s = f ~report:(wrap_report report) s end let parse_xml ?(report = fun _ _ -> IO.return ()) ?encoding ?(namespace = fun _ -> None) ?(entity = fun _ -> None) ?context source = Cps.parse_xml (wrap_report report) ?encoding namespace entity context source let write_xml ?(report = fun _ _ -> IO.return ()) ?(prefix = fun _ -> None) signals = Cps.write_xml (wrap_report report) prefix signals let parse_html ?(report = fun _ _ -> IO.return ()) ?encoding ?context source = Cps.parse_html (wrap_report report) ?encoding context source let write_html ?escape_attribute ?escape_text signals = Cps.write_html ?escape_attribute ?escape_text signals let to_string bytes = Stream_io.to_string bytes |> IO.of_cps let to_buffer bytes = Stream_io.to_buffer bytes |> IO.of_cps let stream f = let f = IO.to_cps f in (fun throw e k -> f throw (function | None -> e () | Some v -> k v)) |> Kstream.make let fn = stream let next s = Kstream.next_option s |> IO.of_cps let peek s = Kstream.peek_option s |> IO.of_cps (* Without Flambda, thunks are repeatedly created and passed on IO.to_cps, resulting in a performance penalty. Flambda seems to optimize this away, however. *) let transform f v s = Kstream.transform (fun v s -> IO.to_cps (fun () -> f v s)) v s let fold f v s = Kstream.fold (fun v v' -> IO.to_cps (fun () -> f v v')) v s |> IO.of_cps let map f s = Kstream.map (fun v -> IO.to_cps (fun () -> f v)) s let filter f s = Kstream.filter (fun v -> IO.to_cps (fun () -> f v)) s let filter_map f s = Kstream.filter_map (fun v -> IO.to_cps (fun () -> f v)) s let iter f s = Kstream.iter (fun v -> IO.to_cps (fun () -> f v)) s |> IO.of_cps let drain s = iter (fun _ -> IO.return ()) s let to_list s = Kstream.to_list s |> IO.of_cps let load s = (fun throw k -> Kstream.to_list s throw (fun l -> k (Kstream.of_list l))) |> IO.of_cps let tree ?text ?element ?comment ?pi ?xml ?doctype s = Utility.tree ?text ?element ?comment ?pi ?xml ?doctype s |> IO.of_cps end include Asynchronous (Synchronous) markup.ml-1.0.3/src/markup.mli000066400000000000000000001051451421357706400162310ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) (** Error-recovering streaming HTML and XML parsers and writers. Markup.ml is an HTML and XML parsing and serialization library. It: - Is error-recovering, so you can get a best-effort parse of malformed input. - Reports all errors before recovery, so you can get strict parsing instead. - Conforms closely to the XML grammar and HTML parser from the respective specifications. - Accepts document fragments, but can be told to accept only full documents. - Detects character encodings automatically. - Supports both simple synchronous (this module) and non-blocking usage ({!Markup_lwt}). - Is streaming and lazy. Partial input is processed as soon as received, but only as needed. - Does one pass over the input and emits a stream of SAX-style parsing signals. A helper ({!tree}) allows that to be easily converted into DOM-style trees. The usage is straightforward. For example: {[ open Markup (* Correct and pretty-print HTML. *) channel stdin |> parse_html |> signals |> pretty_print |> write_html |> to_channel stdout (* Show up to 10 XML well-formedness errors to the user. Stop after the 10th, without reading more input. *) let report = let count = ref 0 in fun location error -> error |> Error.to_string ~location |> prerr_endline; count := !count + 1; if !count >= 10 then raise_notrace Exit string "some xml" |> parse_xml ~report |> signals |> drain (* Load HTML into a custom document tree data type. *) type html = Text of string | Element of string * html list file "some_file" |> fst |> parse_html |> signals |> tree ~text:(fun ss -> Text (String.concat "" ss)) ~element:(fun (_, name) _ children -> Element (name, children)) ]} The interface is centered around four functions. In pseudocode: {[ val parse_html : char stream -> signal stream val write_html : signal stream -> char stream val parse_xml : char stream -> signal stream val write_xml : signal stream -> char stream ]} Most of the remaining functions create streams from, or write streams to, strings, files, and channels, or manipulate streams, such as {!next} and the combinators {!map} and {!fold}. Apart from this module, Markup.ml provides two other top-level modules: {!modules:Markup_lwt Markup_lwt_unix} Most of the interface of {!Markup_lwt} is specified in signature {!ASYNCHRONOUS}, which will be shared with a [Markup_async] module, should it be implemented. Markup.ml is developed on {{:https://github.com/aantron/markup.ml} GitHub} and distributed under the {{:https://github.com/aantron/markup.ml/blob/master/LICENSE.md} MIT license}. This documentation is for version 1.0.0 of the library. Documentation for older versions can be found on the {{: https://github.com/aantron/markup.ml/releases} releases page}. *) (** {2 Streams} *) type async type sync (** Phantom types for use with [('a, 's) stream] in place of ['s]. See explanation below. *) type ('a, 's) stream (** Streams of elements of type ['a]. In simple usage, when using only this module [Markup], the additional type parameter ['s] is always [sync], and there is no need to consider it further. However, if you are using {!Markup_lwt}, you may create some [async] streams. The difference between the two is that {!next} on a [sync] stream retrieves an element before {!next} "returns," while {!next} on an [async] stream might not retrieve an element until later. As a result, it is not safe to pass an [async] stream where a [sync] stream is required. The phantom types are used to make the type checker catch such errors at compile time. *) (** {2 Errors} The parsers recover from errors automatically. If that is sufficient, you can ignore this section. However, if you want stricter behavior, or need to debug parser output, use optional argument [?report] of the parsers, and look in module {!Error}. *) type location = int * int (** Line and column for parsing errors. Both numbers are one-based. *) (** Error type and [to_string] function. *) module Error : sig type t = [ `Decoding_error of string * string | `Bad_token of string * string * string | `Unexpected_eoi of string | `Bad_document of string | `Unmatched_start_tag of string | `Unmatched_end_tag of string | `Bad_namespace of string | `Misnested_tag of string * string * (string * string) list | `Bad_content of string ] (** Errors reported by the parsers. A few of these are also used by the writers. - [`Decoding_error (bytes, encoding)] is reported by the decoders in module {! Encoding}. For example, if the UTF-8 decoder encounters a bare [0xA0] byte, it will report [`Decoding_error ("\xA0", "utf-8")]. - [`Bad_token (token, where, s)] is reported when there is a "local" problem with the syntax of the input stream, such as an invalid character or a duplicate attribute. For example, if the XML parser detects a [&] that is not part of an entity reference while reading an attribute, it will report [`Bad_token ("&", "attribute", "replace with '&'")] - [`Unexpected_eoi where] is reported by the parsers when the input ends before an item, such as a tag, element, or comment, is closed. [where] describes the kind of item that wasn't closed. - [`Bad_document s] is reported by the parsers when there is a problem with the top-level structure of the document. For example, if you are parsing an input stream as XML with [~context:`Document], and the parser finds an element after the root element, it will report [`Bad_document "not allowed after root element"]. - [`Unmatched_start_tag name] and [`Unmatched_end_tag name] are reported when tags aren't properly balanced. Note that not all unbalanced tags are parse errors in HTML. - [`Bad_namespace s] is reported by parsers when the prefix [s] can't be resolved to a namespace, and by the writers when the namespace [s] can't be resolved to a prefix (or the default namespace). - [`Misnested_tag (what, where, attributes)] is reported by the HTML parser when a tag appears where it is not allowed. For example, if the input has a [] tag inside a [

] tag, the parser will report [`Misnested_tag ("body", "p", [("class", "")])]. - [`Bad_content where] is reported by the HTML parser if an element has content it is not allowed to have. For example, if there is stray text at the top level of a [] element, the parser will report [`Bad_content "table"]. *) val to_string : ?location:location -> t -> string (** Converts an error to human-readable form. If [~location] is specified, location information is prepended to the output. *) end (** {2 Encodings} The parsers detect encodings automatically. If you need to specify an encoding, use optional argument [?encoding] of the parsers, and look in module {!Encoding}. *) (** Common Internet encodings such as UTF-8 and UTF-16; also includes some less popular encodings that are sometimes used for XML. *) module Encoding : sig type t (** Decoders. These are notionally maps from byte streams to Unicode scalar value streams, i.e. pseudocode type [char stream -> int stream]. *) val decode : ?report:(location -> Error.t -> unit) -> t -> (char, 's) stream -> (int, 's) stream (** Applies a decoder to a byte stream. Illegal input byte sequences result in calls to the error handler [~report] with error kind [`Decoding_error]. The illegal bytes are then skipped, and zero or more U+FFFD replacement characters are emitted. The default handler ignores errors. The locations provided to the error handler by the built-in decoders below in this module are fully accurate only if the input byte stream uses LF characters as line breaks. *) val utf_8 : t val utf_16be : t val utf_16le : t val utf_16 : t val iso_8859_1 : t val us_ascii : t val windows_1251 : t val windows_1252 : t val ucs_4be : t val ucs_4le : t val ucs_4be_transposed : t val ucs_4le_transposed : t val ebcdic : t (** Code page 37. *) end (** {2 Signals} *) type name = string * string (** Expanded name: a namespace URI followed by a local name. *) type xml_declaration = {version : string; encoding : string option; standalone : bool option} (** Representation of an XML declaration, i.e. []. *) type doctype = {doctype_name : string option; public_identifier : string option; system_identifier : string option; raw_text : string option; force_quirks : bool} (** Representation of a document type declaration. The HTML parser fills in all fields besides [raw_text]. The XML parser reads declarations roughly, and fills only the [raw_text] field with the text found in the declaration. *) type signal = [ `Start_element of name * (name * string) list | `End_element | `Text of string list | `Doctype of doctype | `Xml of xml_declaration | `PI of string * string | `Comment of string ] (** Parsing signals. The parsers emit them according to the following grammar: {[ doc ::= `Xml? misc* `Doctype? misc* element misc* misc ::= `PI | `Comment element ::= `Start_element content* `End_element content ::= `Text | element | `PI | `Comment ]} As a result, emitted [`Start_element] and [`End_element] signals are always balanced, and, if there is an XML declaration, it is the first signal. If parsing with [~context:`Document], the signal sequence will match the [doc] production until the first error. If parsing with [~context:`Fragment], it will match [content*]. If [~context] is not specified, the parser will pick one of the two by examining the input. As an example, if the XML parser is parsing {[ textmore text ]} it will emit the signal sequence {[ `Xml {version = "1.0"; encoding = None; standalone = None} `Start_element (("", "root"), []) `Text ["text"] `Start_element (("", "nested"), []) `Text ["more text"] `End_element `End_element ]} The [`Text] signal carries a [string list] instead of a single [string] because on 32-bit platforms, OCaml strings cannot be larger than 16MB. In case the parsers encounter a very long sequence of text, one whose length exceeds about [Sys.max_string_length / 2], they will emit a [`Text] signal with several strings. *) val signal_to_string : [< signal ] -> string (** Provides a human-readable representation of signals for debugging. *) (** {2 Parsers} *) type 's parser (** An ['s parser] is a thin wrapper around a [(signal, 's) stream] that supports access to additional information that is not carried directly in the stream, such as source locations. *) val signals : 's parser -> (signal, 's) stream (** Converts a parser to its underlying signal stream. *) val location : _ parser -> location (** Evaluates to the location of the last signal emitted on the parser's signal stream. If no signals have yet been emitted, evaluates to [(1, 1)]. *) (** {2 XML} *) val parse_xml : ?report:(location -> Error.t -> unit) -> ?encoding:Encoding.t -> ?namespace:(string -> string option) -> ?entity:(string -> string option) -> ?context:[< `Document | `Fragment ] -> (char, 's) stream -> 's parser (** Creates a parser that converts an XML byte stream to a signal stream. For simple usage, [string "foo" |> parse_xml |> signals]. If [~report] is provided, [report] is called for every error encountered. You may raise an exception in [report], and it will propagate to the code reading the signal stream. If [~encoding] is {e not} specified, the parser detects the input encoding automatically. Otherwise, the given encoding is used. [~namespace] is called when the parser is unable to resolve a namespace prefix. If it evaluates to [Some s], the parser maps the prefix to [s]. Otherwise, the parser reports [`Bad_namespace]. [~entity] is called when the parser is unable to resolve an entity reference. If it evaluates to [Some s], the parser inserts [s] into the text or attribute being parsed without any further parsing of [s]. [s] is assumed to be encoded in UTF-8. If [entity] evaluates to [None] instead, the parser reports [`Bad_token]. See {!xhtml_entity} if you are parsing XHTML. The meaning of [~context] is described at {! signal}, above. *) val write_xml : ?report:((signal * int) -> Error.t -> unit) -> ?prefix:(string -> string option) -> ([< signal ], 's) stream -> (char, 's) stream (** Converts an XML signal stream to a byte stream. If [~report] is provided, it is called for every error encountered. The first argument is a pair of the signal causing the error and its index in the signal stream. You may raise an exception in [report], and it will propagate to the code reading the byte stream. [~prefix] is called when the writer is unable to find a prefix in scope for a namespace URI. If it evaluates to [Some s], the writer uses [s] for the URI. Otherwise, the writer reports [`Bad_namespace]. *) (** {2 HTML} *) val parse_html : ?report:(location -> Error.t -> unit) -> ?encoding:Encoding.t -> ?context:[< `Document | `Fragment of string ] -> (char, 's) stream -> 's parser (** Similar to {!parse_xml}, but parses HTML with embedded SVG and MathML, never emits signals [`Xml] or [`PI], and [~context] has a different type on tag [`Fragment]. For HTML fragments, you should specify the enclosing element, e.g. [`Fragment "body"]. This is because, when parsing HTML, error recovery and the interpretation of text depend on the current element. For example, the text {[ foo ]} parses differently in [title] elements than in [p] elements. In the former, it is parsed as [foo], while in the latter, it is [foo] followed by a parse error due to unmatched tag []. To get these behaviors, set [~context] to [`Fragment "title"] and [`Fragment "p"], respectively. If you use [`Fragment "svg"], the fragment is assumed to be SVG markup. Likewise, [`Fragment "math"] causes the parser to parse MathML markup. If [~context] is omitted, the parser guesses it from the input stream. For example, if the first signal would be [`Doctype], the context is set to [`Document], but if the first signal would be [`Start_element "td"], the context is set to [`Fragment "tr"]. If the first signal would be [`Start_element "g"], the context is set to [`Fragment "svg"]. *) val write_html : ?escape_attribute:(string -> string) -> ?escape_text:(string -> string) -> ([< signal ], 's) stream -> (char, 's) stream (** Similar to {!write_xml}, but emits HTML5 instead of XML. If [~escape_attribute] and/or [~escape_text] are provided, they are used instead of default escaping functions. *) (** {2 Input sources} *) val string : string -> (char, sync) stream (** Evaluates to a stream that retrieves successive bytes from the given string. *) val buffer : Buffer.t -> (char, sync) stream (** Evaluates to a stream that retrieves successive bytes from the given buffer. Be careful of changing the buffer while it is being iterated by the stream. *) val channel : in_channel -> (char, sync) stream (** Evaluates to a stream that retrieves bytes from the given channel. If the channel cannot be read, the next read of the stream results in raising [Sys_error]. Note that this input source is synchronous because [Pervasives.in_channel] reads are blocking. For non-blocking channels, see {!Markup_lwt_unix}. *) val file : string -> (char, sync) stream * (unit -> unit) (** [file path] opens the file at [path], then evaluates to a pair [s, close], where reading from stream [s] retrieves successive bytes from the file, and calling [close ()] closes the file. The file is closed automatically if [s] is read to completion, or if reading [s] raises an exception. It is not necessary to call [close ()] in these cases. If the file cannot be opened, raises [Sys_error] immediately. If the file cannot be read, reading the stream raises [Sys_error]. *) val fn : (unit -> char option) -> (char, sync) stream (** [fn f] is a stream that retrives bytes by calling [f ()]. If the call results in [Some c], the stream emits [c]. If the call results in [None], the stream is considered to have ended. This is actually an alias for {!stream}, restricted to type [char]. *) (** {2 Output destinations} *) val to_string : (char, sync) stream -> string (** Eagerly retrieves bytes from the given stream and assembles a string. *) val to_buffer : (char, sync) stream -> Buffer.t (** Eagerly retrieves bytes from the given stream and places them into a buffer. *) val to_channel : out_channel -> (char, sync) stream -> unit (** Eagerly retrieves bytes from the given stream and writes them to the given channel. If writing fails, raises [Sys_error]. *) val to_file : string -> (char, sync) stream -> unit (** Eagerly retrieves bytes from the given stream and writes them to the given file. If writing fails, or the file cannot be opened, raises [Sys_error]. Note that the file is truncated (cleared) before writing. If you wish to append to file, open it with the appropriate flags and use [to_channel] on the resulting channel. *) (** {2 Stream operations} *) val stream : (unit -> 'a option) -> ('a, sync) stream (** [stream f] creates a stream that repeatedly calls [f ()]. Each time [f ()] evaluates to [Some v], the next item in the stream is [v]. The first time [f ()] evaluates to [None], the stream ends. *) val next : ('a, sync) stream -> 'a option (** Retrieves the next item in the stream, if any, and removes it from the stream. *) val peek : ('a, sync) stream -> 'a option (** Retrieves the next item in the stream, if any, but does not remove the item from the stream. *) val transform : ('a -> 'b -> 'c list * 'a option) -> 'a -> ('b, 's) stream -> ('c, 's) stream (** [transform f init s] lazily creates a stream by repeatedly applying [f acc v], where [acc] is an accumulator whose initial value is [init], and [v] is consecutive values of [s]. Each time, [f acc v] evaluates to a pair [(vs, maybe_acc')]. The values [vs] are added to the result stream. If [maybe_acc'] is [Some acc'], the accumulator is set to [acc']. Otherwise, if [maybe_acc'] is [None], the result stream ends. *) val fold : ('a -> 'b -> 'a) -> 'a -> ('b, sync) stream -> 'a (** [fold f init s] eagerly folds over the items [v], [v'], [v''], ... of [s], i.e. evaluates [f (f (f init v) v') v'']... *) val map : ('a -> 'b) -> ('a, 's) stream -> ('b, 's) stream (** [map f s] lazily applies [f] to each item of [s], and produces the resulting stream. *) val filter : ('a -> bool) -> ('a, 's) stream -> ('a, 's) stream (** [filter f s] is [s] without the items for which [f] evaluates to [false]. [filter] is lazy. *) val filter_map : ('a -> 'b option) -> ('a, 's) stream -> ('b, 's) stream (** [filter_map f s] lazily applies [f] to each item [v] of [s]. If [f v] evaluates to [Some v'], the result stream has [v']. If [f v] evaluates to [None], no item corresponding to [v] appears in the result stream. *) val iter : ('a -> unit) -> ('a, sync) stream -> unit (** [iter f s] eagerly applies [f] to each item of [s], i.e. evaluates [f v; f v'; f v'']... *) val drain : ('a, sync) stream -> unit (** [drain s] eagerly consumes [s]. This is useful for observing side effects, such as parsing errors, when you don't care about the parsing signals themselves. It is equivalent to [iter ignore s]. *) val of_list : 'a list -> ('a, sync) stream (** Produces a (lazy) stream from the given list. *) val to_list : ('a, sync) stream -> 'a list (** Eagerly converts the given stream to a list. *) (** {2 Utility} *) val content : ([< signal ], 's) stream -> (signal, 's) stream (** Filters out all signals besides [`Start_element], [`End_element], and [`Text]. *) val tree : ?text:(string list -> 'a) -> ?element:(name -> (name * string) list -> 'a list -> 'a) -> ?comment:(string -> 'a) -> ?pi:(string -> string -> 'a) -> ?xml:(xml_declaration -> 'a) -> ?doctype:(doctype -> 'a) -> ([< signal ], sync) stream -> 'a option (** This function's type signature may look intimidating, but it is actually easy to use. It is best introduced by example: {[ type my_dom = Text of string | Element of name * my_dom list "

HTML5 is easy to parse" |> string |> parse_html |> signals |> tree ~text:(fun ss -> Text (String.concat "" ss)) ~element:(fun (_ns, name) _attrs children -> Element (name, children)) ]} results in the structure {[ Element ("p" [ Text "HTML5 is "; Element ("em", [Text "easy"]); Text " to parse"]) ]} Formally, [tree] assembles a tree data structure of type ['a] from a signal stream. The stream is parsed according to the following grammar: {[ stream ::= node* node ::= element | `Text | `Comment | `PI | `Xml | `Doctype element ::= `Start_element node* `End_element ]} Each time [tree] matches a production of [node], it calls the corresponding function to convert the node into your tree type ['a]. For example, when [tree] matches [`Text ss], it calls [~text ss], if [~text] is supplied. Similarly, when [tree] matches [element], it calls [~element name attributes children], if [~element] is supplied. [tree] returns [None] when its input signal stream is empty. In terms of the original input bytes, this can correspond to either an empty input, or a non-empty input which the parser's error recovery completely discarded, producing no signals. See {!trees} if the input stream might have multiple top-level trees. This function [tree] only retrieves the first one. *) val trees : ?text:(string list -> 'a) -> ?element:(name -> (name * string) list -> 'a list -> 'a) -> ?comment:(string -> 'a) -> ?pi:(string -> string -> 'a) -> ?xml:(xml_declaration -> 'a) -> ?doctype:(doctype -> 'a) -> ([< signal ], 's) stream -> ('a, 's) stream (** Like {!tree}, but converts all top-level trees, not only the first one. The trees are emitted on the resulting stream, in the sequence that they appear in the input. *) type 'a node = [ `Element of name * (name * string) list * 'a list | `Text of string | `Doctype of doctype | `Xml of xml_declaration | `PI of string * string | `Comment of string ] (** See {!from_tree} below. *) val from_tree : ('a -> 'a node) -> 'a -> (signal, sync) stream (** Deconstructs tree data structures of type ['a] into signal streams. The function argument is applied to each data structure node. For example, {[ type my_dom = Text of string | Element of string * my_dom list let dom = Element ("p", [ Text "HTML5 is "; Element ("em", [Text "easy"]); Text " to parse"]) dom |> from_tree (function | Text s -> `Text s | Element (name, children) -> `Element (("", name), [], children)) ]} results in the signal stream {[ `Start_element (("", "p"), []) `Text ["HTML5 is "] `Start_element (("", "em"), []) `Text ["easy"] `End_element `Text " to parse" `End_element ]} *) val elements : (name -> (name * string) list -> bool) -> ([< signal ] as 'a, 's) stream -> (('a, 's) stream, 's) stream (** [elements f s] scans the signal stream [s] for [`Start_element (name, attributes)] signals that satisfy [f name attributes]. Each such matching signal is the beginning of a substream that ends with the corresponding [`End_element] signal. The result of [elements f s] is the stream of these substreams. Matches don't nest. If there is a matching element contained in another matching element, only the top one results in a substream. Code using [elements] does not have to read each substream to completion, or at all. However, once the using code has tried to get the next substream, it should not try to read a previous one. *) val text : ([< signal ], 's) stream -> (char, 's) stream (** Extracts all the text in a signal stream by discarding all markup. For each [`Text ss] signal, the result stream has the bytes of the strings [ss], and all other signals are ignored. *) val trim : (signal, 's) stream -> (signal, 's) stream (** Trims insignificant whitespace in an HTML signal stream. Whitespace around flow ("block") content does not matter, but whitespace in phrasing ("inline") content does. So, if the input stream is {[

foo bar

]} passing it through [Markup.trim] will result in {[

foo bar

]} Note that whitespace around the [] tag was preserved. *) val normalize_text : ([> `Text of string list ] as 'a, 's) stream -> ('a, 's) stream (** Concatenates adjacent [`Text] signals, then eliminates all empty strings, then all [`Text []] signals. Signals besides [`Text] are unaffected. Note that signal streams emitted by the parsers already have normalized text. This function is useful when you are inserting text into a signal stream after parsing, or generating streams from scratch, and would like to clean up the [`Text] signals. *) val pretty_print : (signal, 's) stream -> (signal, 's) stream (** Adjusts the whitespace in the [`Text] signals in the given stream so that the output appears nicely-indented when the stream is converted to bytes and written. This function is aware of the significance of whitespace in HTML, so it avoids changing the whitespace in phrasing ("inline") content. For example, pretty printing {[

foobar

]} results in {[

foobar

]} Note that no whitespace was inserted around [] and [], because doing so would create a word break that wasn't present in the original stream. *) val html5 : ([< signal ], 's) stream -> (signal, 's) stream (** Converts a signal stream into an HTML5 signal stream by stripping any document type declarations, XML declarations, and processing instructions, and prefixing the HTML5 doctype declaration. This is useful when converting between XHTML and HTML. *) val xhtml : ?dtd:[< `Strict_1_0 | `Transitional_1_0 | `Frameset_1_0 | `Strict_1_1 ] -> ([< signal ], 's) stream -> (signal, 's) stream (** Similar to {!html5}, but does not strip processing instructions, and prefixes an XHTML document type declaration and an XML declaration. The [~dtd] argument specifies which DTD to refer to in the doctype declaration. The default is [`Strict_1_1]. *) val xhtml_entity : string -> string option (** Translates XHTML entities. This function is for use with the [~entity] argument of {!parse_xml} when parsing XHTML. *) val strings_to_bytes : (string, 's) stream -> (char, 's) stream (** [strings_to_bytes s] is the stream of all the bytes of all strings in [s]. *) val compare_locations : location -> location -> int (** Orders locations according to their appearance in an input stream, i.e. first by line, and then, for locations on the same line, by column. *) (** {2 Namespaces} *) (** Common namespace URIs. *) module Ns : sig val html : string (** [http://www.w3.org/1999/xhtml]. Use for HTML and XHTML. *) val svg : string (** [http://www.w3.org/2000/svg]. *) val mathml : string (** [http://www.w3.org/1998/Math/MathML]. *) val xml : string (** [http://www.w3.org/XML/1998/namespace]. *) val xmlns : string (** [http://www.w3.org/2000/xmlns/]. *) val xlink : string (** [http://www.w3.org/1999/xlink]. *) end (** {2 Asynchronous interface} *) (**/**) module type IO = sig type 'a t val return : 'a -> 'a t val of_cps : ((exn -> unit) -> ('a -> unit) -> unit) -> 'a t val to_cps : (unit -> 'a t) -> ((exn -> unit) -> ('a -> unit) -> unit) end (**/**) (** Markup.ml interface for monadic I/O libraries such as Lwt and Async. This signature is implemented by {!Markup_lwt}, with a few additions. Each function here corresponds directly to the function in the basic module {!Markup} that has the same name. So, see {!Markup} for details. The only difference is that functions here, all of which are higher-order functions, take a function as argument that returns an ['a io] promise, rather than returning an already-computed value. *) module type ASYNCHRONOUS = sig (** {2 Promises} *) type 'a io (** Promise type. Replaced by ['a Lwt.t] in {!Markup_lwt}. *) (** {2 Encodings} *) (** Asynchronous counterpart to {!Markup.Encoding}. *) module Encoding : sig (**/**) type t = Encoding.t (**/**) val decode : ?report:(location -> Error.t -> unit io) -> Encoding.t -> (char, _) stream -> (int, async) stream end (** {2 XML} *) val parse_xml : ?report:(location -> Error.t -> unit io) -> ?encoding:Encoding.t -> ?namespace:(string -> string option) -> ?entity:(string -> string option) -> ?context:[< `Document | `Fragment ] -> (char, _) stream -> async parser val write_xml : ?report:((signal * int) -> Error.t -> unit io) -> ?prefix:(string -> string option) -> ([< signal ], _) stream -> (char, async) stream (** {2 HTML} *) val parse_html : ?report:(location -> Error.t -> unit io) -> ?encoding:Encoding.t -> ?context:[< `Document | `Fragment of string ] -> (char, _) stream -> async parser val write_html : ?escape_attribute:(string -> string) -> ?escape_text:(string -> string) -> ([< signal ], _) stream -> (char, async) stream (** {2 I/O} *) val fn : (unit -> char option io) -> (char, async) stream val to_string : (char, _) stream -> string io val to_buffer : (char, _) stream -> Buffer.t io (** {2 Stream manipulation} *) val stream : (unit -> 'a option io) -> ('a, async) stream val next : ('a, _) stream -> 'a option io val peek : ('a, _) stream -> 'a option io val transform : ('a -> 'b -> ('c list * 'a option) io) -> 'a -> ('b, _) stream -> ('c, async) stream val fold : ('a -> 'b -> 'a io) -> 'a -> ('b, _) stream -> 'a io val map : ('a -> 'b io) -> ('a, _) stream -> ('b, async) stream val filter : ('a -> bool io) -> ('a, _) stream -> ('a, async) stream val filter_map : ('a -> 'b option io) -> ('a, _) stream -> ('b, async) stream val iter : ('a -> unit io) -> ('a, _) stream -> unit io val drain : ('a, _) stream -> unit io val to_list : ('a, _) stream -> 'a list io val load : ('a, _) stream -> ('a, sync) stream io (** [load s] converts a general stream [s] to a synchronous stream by buffering it. *) (** {2 Utility} *) val tree : ?text:(string list -> 'a) -> ?element:(name -> (name * string) list -> 'a list -> 'a) -> ?comment:(string -> 'a) -> ?pi:(string -> string -> 'a) -> ?xml:(xml_declaration -> 'a) -> ?doctype:(doctype -> 'a) -> ([< signal ], _) stream -> 'a option io end (**/**) module Asynchronous (IO : IO) : ASYNCHRONOUS with type 'a io := 'a IO.t val kstream : ('a, _) stream -> 'a Kstream.t val of_kstream : 'a Kstream.t -> ('a, _) stream val preprocess_input_stream : (int, 's) stream -> (location * int, 's) stream * (unit -> location) (**/**) (** {2 Conformance status} The HTML parser seeks to implement {{:https://www.w3.org/TR/html5/syntax.html} section 8 of the HTML5 specification}. That section describes a parser, part of a full-blown user agent, that is building up a DOM representation of an HTML document. Markup.ml is neither inherently part of a user agent, nor does it build up a DOM representation. With respect to section 8 of HTML5, Markup.ml is concerned with only the syntax. When that section requires that the user agent perform an action, Markup.ml emits enough information for a hypothetical user agent based on it to be able to decide to perform this action. Likewise, Markup.ml seeks to emit enough information for a hypothetical user agent to build up a conforming DOM. The XML parser seeks to be a non-validating implementation of the {{:https://www.w3.org/TR/xml/} XML} and {{:https://www.w3.org/TR/xml-names/} Namespaces in XML} specifications. This rest of this section lists known deviations from HTML5, XML, and Namespaces in XML. Some of these deviations are meant to be corrected in future versions of Markup.ml, while others will probably remain. The latter satisfy some or all of the following properties: - They require non-local adjustment, especially of past nodes. For example, adjusting the start signal of the root node mid-way through the signal stream is difficult for a one-pass parser. - They are minor. Users implementing less than a conforming browser typically don't care about them. They typically have to do with obscure error recovery. There are no deviations affecting the parsing of well-formed input. - They can easily be corrected by code written over Markup.ml that builds up a DOM or maintains other auxiliary data structures during parsing. {3 To be corrected:} - XML: There is no attribute value normalization. - HTML: {e foster parenting} is not implemented, because it requires non-local adjustments. - HTML: Quirks mode is not honored. This affects the interaction between automatic closing of [p] elements and opening of [table] elements. - HTML: The parser has non-standard recovery from unmatched closing [form] tags in {{: https://github.com/aantron/markup.ml/commit/0bf4f1b} some situations}. - HTML: The parser ignores interactions between [form] and [template]. - HTML: The form translation for [isindex] is completely ignored. [isindex] is handled as an unknown element. {3 To remain:} - HTML: Except when detecting encodings, the parser does not try to read [] tags for encoding declarations. The user of Markup.ml should read these, if necessary. They are part of the emitted signal stream. - HTML: [noscript] elements are always parsed, as are [script] elements. For conforming behavior, if the user of Markup.ml "supports scripts," the user should serialize the content of [noscript] to a [`Text] signal using [write_html]. - HTML: Elements such as [title] that belong in [head], but are found between [head] and [body], are not moved into [head]. - HTML: [] tags found in the body do not have their attributes added to the [`Start_element "html"] signal emitted at the beginning of the document. *) markup.ml-1.0.3/src/namespace.ml000066400000000000000000000140701421357706400165110ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common let list_map_cps : ('a -> 'b cps) -> 'a list -> 'b list cps = fun f l throw k -> let rec loop accumulator = function | [] -> k (List.rev accumulator) | x::l -> f x throw (fun x' -> loop (x'::accumulator) l) in loop [] l module Parsing = struct type context_entry = {f : string -> string option; previous : context_entry} type context = context_entry ref let parse qualified_name = try let colon_index = String.index qualified_name ':' in if colon_index = 0 then raise Not_found; let prefix = String.sub qualified_name 0 colon_index in let suffix = String.sub qualified_name (colon_index + 1) (String.length qualified_name - colon_index - 1) in prefix, suffix with Not_found -> ("", qualified_name) let init top_level = let f = function | "xml" -> Some xml_ns | "xmlns" -> Some xmlns_ns | s -> top_level s in let rec entry = {f; previous = entry} in ref entry let expand_element report context raw_element_name throw k = let ns, name = parse raw_element_name in match !context.f ns with | Some uri -> k (uri, name) | None -> match ns with | "" -> k ("", name) | prefix -> report () (`Bad_namespace prefix) throw (fun () -> k (prefix, name)) let push report context raw_element_name raw_attributes throw k = let parsed_attributes = raw_attributes |> List.map (fun (name, value) -> parse name, value) in let f = parsed_attributes |> List.fold_left (fun f -> function | ("xmlns", prefix), uri -> (fun p -> if p = prefix then Some uri else f p) | ("", "xmlns"), uri -> (fun p -> if p = "" then Some uri else f p) | _ -> f) !context.f in let entry = {f; previous = !context} in context := entry; expand_element report context raw_element_name throw (fun expanded_element_name -> list_map_cps begin fun (name, value) _ k -> match name with | "", "xmlns" -> k ((xmlns_ns, "xmlns"), value) | "", name -> k (("", name), value) | ns, name -> match f ns with | Some uri -> k ((uri, name), value) | None -> report () (`Bad_namespace ns) throw (fun () -> k ((ns, name), value)) end parsed_attributes throw (fun expanded_attributes -> k (expanded_element_name, expanded_attributes))) let pop ({contents = {previous}} as context) = context := previous end module StringMap = Map.Make (String) module Writing = struct type context_entry = {namespace_to_prefix : string list StringMap.t; prefix_to_namespace : string StringMap.t; previous : context_entry} type context = context_entry ref * (string -> string option) let init top_level = let namespace_to_prefix = StringMap.empty |> StringMap.add "" [""] |> StringMap.add xml_ns ["xml"] |> StringMap.add xmlns_ns ["xmlns"] in let prefix_to_namespace = StringMap.empty |> StringMap.add "" "" |> StringMap.add "xml" xml_ns |> StringMap.add "xmlns" xmlns_ns in let rec entry = {namespace_to_prefix; prefix_to_namespace; previous = entry} in ref entry, top_level let lookup report allow_default context namespace throw k = let candidate_prefixes = try StringMap.find namespace !(fst context).namespace_to_prefix with Not_found -> [] in let prefix = try Some (candidate_prefixes |> List.find (fun prefix -> (allow_default || prefix <> "") && begin try StringMap.find prefix !(fst context).prefix_to_namespace = namespace with Not_found -> false end)) with Not_found -> None in let prefix = match prefix with | Some _ -> prefix | None -> match snd context namespace with | None -> None | Some prefix -> if not allow_default && prefix = "" || StringMap.mem prefix !(fst context).prefix_to_namespace then None else Some prefix in match prefix with | None -> report () (`Bad_namespace namespace) throw (fun () -> k "") | Some prefix -> k prefix let format prefix name = match prefix with | "" -> name | prefix -> prefix ^ ":" ^ name let unexpand_element report context (namespace, name) throw k = lookup report true context namespace throw (fun prefix -> k (format prefix name)) let unexpand_attribute report context ((namespace, name), value) throw k = match namespace with | "" -> k (name, value) | uri -> if uri = xmlns_ns && name = "xmlns" then k ("xmlns", value) else lookup report false context namespace throw (fun prefix -> k (format prefix name, value)) let extend k v map = let vs = try StringMap.find k map with Not_found -> [] in StringMap.add k (v::vs) map let push report context element_name attributes throw k = let namespace_to_prefix, prefix_to_namespace = attributes |> List.fold_left (fun (ns_to_prefix, prefix_to_ns) -> function | (ns, "xmlns"), uri when ns = xmlns_ns -> extend uri "" ns_to_prefix, StringMap.add "" uri prefix_to_ns | (ns, prefix), uri when ns = xmlns_ns -> extend uri prefix ns_to_prefix, StringMap.add prefix uri prefix_to_ns | _ -> ns_to_prefix, prefix_to_ns) (!(fst context).namespace_to_prefix, !(fst context).prefix_to_namespace) in let entry = {namespace_to_prefix; prefix_to_namespace; previous = !(fst context)} in (fst context) := entry; unexpand_element report context element_name throw (fun element_name -> list_map_cps (unexpand_attribute report context) attributes throw (fun attributes -> k (element_name, attributes))) let pop ({contents = {previous}}, _ as context) = (fst context) := previous end markup.ml-1.0.3/src/namespace.mli000066400000000000000000000014211421357706400166560ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common module Parsing : sig type context val init : (string -> string option) -> context val push : unit Error.handler -> context -> string -> (string * string) list -> (name * (name * string) list) cps val pop : context -> unit val expand_element : unit Error.handler -> context -> string -> name cps val parse : string -> string * string end module Writing : sig type context val init : (string -> string option) -> context val push : unit Error.handler -> context -> name -> (name * string) list -> (string * (string * string) list) cps val pop : context -> unit end markup.ml-1.0.3/src/stream_io.ml000066400000000000000000000042541421357706400165420ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Kstream let state_fold f initial = let state = ref initial in (fun throw e k -> f !state throw e (fun (c, new_state) -> state := new_state; k c)) |> make let string s = state_fold (fun i _ e k -> if i >= String.length s then e () else k (s.[i], i + 1)) 0 let buffer b = state_fold (fun i _ e k -> if i >= Buffer.length b then e () else k (Buffer.nth b i, i + 1)) 0 (* Optimized away by Flambda. *) type result = Count of int | Exn of exn let channel c = let ended = ref false in let buffer_length = 4096 in let buffer = Bytes.create buffer_length in let position = ref 0 in let buffered = ref 0 in (fun throw e k -> let position' = !position in if position' < !buffered then begin position := position' + 1; k (Bytes.get buffer position') end else let result = try Count (input c buffer 0 buffer_length) with exn -> Exn exn in match result with | Count 0 -> ended := true; e () | Count n -> position := 1; buffered := n; k (Bytes.get buffer 0) | Exn exn -> if !ended then e () else throw exn) |> make let file f = let c = open_in f in let s = channel c in let s' = (fun throw e k -> next s (fun exn -> close_in_noerr c; throw exn) (fun () -> close_in_noerr c; e ()) k) |> make in s', fun () -> close_in_noerr c let to_buffer s throw k = let buffer = Buffer.create 4096 in iter (fun b _ k -> Buffer.add_char buffer b; k ()) s throw (fun () -> k buffer) let to_string s throw k = to_buffer s throw (fun buffer -> k (Buffer.contents buffer)) let to_channel c s throw k = let write b throw k = let exn = try output_char c b; None with exn -> Some exn in match exn with | None -> k () | Some exn -> throw exn in iter write s throw k let to_file f s throw k = let c = open_out f in to_channel c s (fun exn -> close_out_noerr c; throw exn) (fun () -> close_out_noerr c; k ()) markup.ml-1.0.3/src/text.ml000066400000000000000000000025761421357706400155510ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common type t = {mutable strings : string list; buffer : Buffer.t; mutable location : location option} (* This is changed for unit testing. *) let length_limit = ref (Sys.max_string_length / 2) let prepare () = {strings = []; buffer = Buffer.create 256; location = None} let note_location text location = begin match text.location with | None -> text.location <- Some location | Some _ -> () end let adding text location = note_location text location; if Buffer.length text.buffer >= !length_limit then begin text.strings <- (Buffer.contents text.buffer)::text.strings; Buffer.clear text.buffer end let add text location c = adding text location; add_utf_8 text.buffer c (* This is only used for strings that are expected to be very small, at the moment. *) let add_string text location s = adding text location; Buffer.add_string text.buffer s let emit text = match text.location with | None -> None | Some location -> text.location <- None; if Buffer.length text.buffer = 0 then None else begin let strings = (Buffer.contents text.buffer)::text.strings |> List.rev in text.strings <- []; Buffer.clear text.buffer; Some (location, strings) end markup.ml-1.0.3/src/translate_entities/000077500000000000000000000000001421357706400201225ustar00rootroot00000000000000markup.ml-1.0.3/src/translate_entities/dune000066400000000000000000000000741421357706400210010ustar00rootroot00000000000000(executable (name translate_entities) (libraries yojson)) markup.ml-1.0.3/src/translate_entities/translate_entities.ml000066400000000000000000000026211421357706400243560ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) (* This is run to generate entities.ml. It is not part of the normal build process, as entities.ml is checked into the source repository. It is only needed when the layout of the data structure in entities.ml is changed. *) open! Yojson.Basic.Util let () = print_endline ("(* Copyright © 2014 W3C® (MIT, ERCIM, Keio, Beihang). This software or " ^ "document\n includes material copied from or derived from W3C " ^ "Recommendation HTML5\n " ^ "[https://www.w3.org/TR/2014/REC-html5-20141028/]. *)"); print_newline (); print_endline "(* Generated automatically from entities.json. *)"; print_newline (); print_string "let entities : "; print_string "(string * [ `One of int | `Two of int * int ]) array"; print_string " = [|\n "; Yojson.Basic.from_file "src/entities.json" |> to_assoc |> List.map (fun (k, v) -> let k = String.sub k 1 (String.length k - 2) in let v = v |> member "codepoints" |> to_list |> List.map to_int |> function | [c] -> Printf.sprintf "`One 0x%05X" c | [c; c'] -> Printf.sprintf "`Two (0x%05X, 0x%05X)" c c' | _ -> failwith "expected one or two code points" in Printf.sprintf "\"%s\", %s" k v) |> String.concat ";\n " |> print_endline; print_endline "|]" markup.ml-1.0.3/src/trie.ml000066400000000000000000000041031421357706400155140ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) (* Tries. These aren't fully functional nor fully mutable. To accumulate a trie, it is necessary to retain the latest result of [add]. However, previous tries become invalid after [add]. *) type 'a trie = | Empty | Leaf of 'a | Node of 'a option * 'a trie array let lower_limit = Char.code '0' let upper_limit = Char.code 'z' let array_size = upper_limit - lower_limit + 1 let create () = Empty let edge_index c = Char.code c - lower_limit let add key value trie = let rec traverse index trie = if index >= String.length key then match trie with | Empty | Leaf _ -> Leaf value | Node (_, children) -> Node (Some value, children) else let edge_index = edge_index key.[index] in let value', children, current_child = match trie with | Empty -> None, None, Empty | Leaf v -> Some v, None, Empty | Node (v, children) -> v, Some children, children.(edge_index) in let child = traverse (index + 1) current_child in let children = match children with | None -> Array.init array_size (fun i -> if i = edge_index then child else Empty) | Some children -> children.(edge_index) <- child; children in Node (value', children) in traverse 0 trie type 'a match_ = | No | Yes of 'a | Prefix | Multiple of 'a let matches = function | Empty -> No | Leaf v -> Yes v | Node (None, _) -> Prefix | Node (Some v, _) -> Multiple v let advance c = function | Empty | Leaf _ -> Empty | Node (_, children) -> if c < lower_limit || c > upper_limit then Empty else children.(c - lower_limit) let guess_memory_usage trie = let rec accumulate words = function | Empty -> words + 1 | Leaf _ -> words + 2 | Node (_, children) -> let words = words + 4 + Array.length children in Array.fold_left accumulate words children in accumulate 0 trie markup.ml-1.0.3/src/utility.ml000066400000000000000000000300261421357706400162570ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common open Kstream let content s = let filter signal _ k = match signal with | `Start_element _ | `End_element | `Text _ as signal -> k (Some signal) | `Comment _ | `PI _ | `Doctype _ | `Xml _ -> k None in filter_map filter s let strings_to_bytes strings = let current_string = ref "" in let index = ref 0 in let rec emit throw e k = if !index < String.length !current_string then begin index := !index + 1; k (!current_string.[!index - 1]) end else next strings throw e (fun s -> current_string := s; index := 0; emit throw e k) in make emit let unwrap_lists ls = let current_list = ref [] in let rec emit throw e k = match !current_list with | v::l -> current_list := l; k v | [] -> next ls throw e (fun l -> current_list := l; emit throw e k) in make emit let trees ?text ?element ?comment ?pi ?xml ?doctype s = let rec match_node throw k none = next s throw none begin function | `Start_element (name, attributes) -> match_content [] throw (fun children -> match element with | None -> match_node throw k none | Some element -> k (element name attributes children)) | `End_element -> none () | `Text ss -> begin match text with | None -> match_node throw k none | Some text -> k (text ss) end | `Doctype d -> begin match doctype with | None -> match_node throw k none | Some doctype -> k (doctype d) end | `Xml x -> begin match xml with | None -> match_node throw k none | Some xml -> k (xml x) end | `PI (t, s) -> begin match pi with | None -> match_node throw k none | Some pi -> k (pi t s) end | `Comment s -> begin match comment with | None -> match_node throw k none | Some comment -> k (comment s) end end and match_content acc throw k = match_node throw (fun n -> match_content (n::acc) throw k) (fun () -> k (List.rev acc)) in (fun throw e k -> match_node throw k e) |> make let tree ?text ?element ?comment ?pi ?xml ?doctype s throw k = let s' = trees ?text ?element ?comment ?pi ?xml ?doctype s in next s' throw (fun () -> k None) (fun t -> k (Some t)) type 'a node = [ `Element of name * (name * string) list * 'a list | `Text of string | `Doctype of doctype | `Xml of xml_declaration | `PI of string * string | `Comment of string ] let from_tree f node = let rec traverse acc node = match f node with | `Element (name, attributes, children) -> children |> List.fold_left traverse ((`Start_element (name, attributes))::acc) |> fun acc -> `End_element::acc | `Text s -> (`Text [s])::acc | `Doctype _ | `Xml _ | `PI _ | `Comment _ as node -> node::acc in traverse [] node |> List.rev |> of_list let elements select s = let depth = ref 0 in let started = ref 0 in let finished = ref 0 in let rec scan throw e k = next s throw e begin fun signal -> match signal with | `Start_element (name, attributes) when !started = !finished && select name attributes -> let index = !started + 1 in started := index; depth := 0; let constructor _ k = push s signal; (fun throw e k -> if !finished >= index then e () else next s throw e begin fun signal -> match signal with | `Start_element _ -> depth := !depth + 1; k signal | `End_element -> depth := !depth - 1; if !depth = 0 then finished := index; k signal | `Text _ | `Comment _ | `PI _ | `Doctype _ | `Xml _ -> k signal end) |> make |> k in construct constructor |> k | `Start_element _ when !started > !finished -> depth := !depth + 1; scan throw e k | `End_element when !started > !finished -> depth := !depth - 1; if !depth = 0 then finished := !started; scan throw e k | `Text _ | `Start_element _ | `End_element | `Comment _ | `PI _ | `Doctype _ | `Xml _ -> scan throw e k end in make scan let text s = let filter v _ k = match v with | `Text ss -> k (Some ss) | `Start_element _ | `End_element | `Comment _ | `PI _ | `Doctype _ | `Xml _ -> k None in filter_map filter s |> unwrap_lists |> strings_to_bytes let normalize_text s = let rec match_text acc throw e k = next_option s throw begin function | Some (`Text ss) -> match_text (ss::acc) throw e k | v -> push_option s v; let ss = List.rev acc |> List.flatten |> List.filter (fun s -> String.length s > 0) in match ss with | [] -> match_other throw e k | _ -> k (`Text ss) end and match_other throw e k = next s throw e (function | `Text ss -> match_text [ss] throw e k | signal -> k signal) in make match_other let is_phrasing_element (namespace, element_name) = if namespace <> html_ns then false else match element_name with | "a" | "abbr" | "b" | "bdi" | "bdo" | "br" | "button" | "cite" | "code" | "data" | "dfn" | "em" | "i" | "img" | "input" | "kbd" | "label" | "mark" | "pre" | "q" | "rb" | "rt" | "ruby" | "s" | "samp" | "select" | "small" | "span" | "strong" | "sub" | "sup" | "textarea" | "time" | "u" | "var" | "wbr" -> true | _ -> false let rec trim_string_list trim = function | [] -> [] | s::more -> match trim s with | "" -> trim_string_list trim more | s -> s::more let trim signals = let signals = normalize_text signals in let signals_and_flow = Kstream.transform begin fun phrasing_nesting_level signal _throw k -> match signal with | `Start_element (name, _) -> if phrasing_nesting_level > 0 then k ([signal, false], Some (phrasing_nesting_level + 1)) else if is_phrasing_element name then k ([signal, false], Some 1) else k ([signal, true], Some 0) | `End_element -> if phrasing_nesting_level > 0 then k ([signal, false], Some (phrasing_nesting_level - 1)) else k ([signal, true], Some 0) | _ -> k ([signal, false], Some phrasing_nesting_level) end 0 signals in let signals = Kstream.transform begin fun saw_flow_tag (signal, is_flow_tag) throw k -> match signal with | `Text ss -> let ss = if saw_flow_tag then trim_string_list Common.trim_string_left ss else ss in Kstream.peek_option signals_and_flow throw (fun maybe_signal -> let ss = match maybe_signal with | Some (_, true) -> ss |> List.rev |> trim_string_list Common.trim_string_right |> List.rev | _ -> ss in k ([`Text ss], Some false)) | _ -> k ([signal], Some is_flow_tag) end true signals_and_flow in normalize_text signals let tab_width = 1 let pretty_print signals = let signals = trim signals in let indent n = let n = if n < 0 then 0 else n in String.make (n * tab_width) ' ' in let rec current_state = ref (fun throw e k -> flow 0 throw e k) and flow indentation throw e k = next signals throw e begin fun signal -> match signal with | `Start_element (name, _) when not @@ is_phrasing_element name -> (* If the next signal is `End_element, don't insert a line break. This is mainly for collapsing inherently empty tags like and
. *) peek_expected signals throw begin fun next_signal -> match next_signal with | `End_element -> next_expected signals throw begin fun _ -> list [`Text [indent indentation]; signal; next_signal; `Text ["\n"]] (flow indentation) throw e k end | _ -> list [`Text [indent indentation]; signal; `Text ["\n"]] (flow (indentation + 1)) throw e k end | `End_element -> list [`Text [indent (indentation - 1)]; signal; `Text ["\n"]] (flow (indentation - 1)) throw e k | `Start_element _ | `Text _ -> push signals signal; list [`Text [indent indentation]] (phrasing indentation 0) throw e k | `Doctype _ -> list [signal; `Text ["\n"]] (flow indentation) throw e k | _ -> list [signal] (flow indentation) throw e k end and phrasing indentation phrasing_nesting_level throw e k = next signals throw e begin fun signal -> match signal with | `Start_element (name, _) when is_phrasing_element name -> list [signal] (phrasing indentation (phrasing_nesting_level + 1)) throw e k | `End_element when phrasing_nesting_level > 0 -> list [signal] (phrasing indentation (phrasing_nesting_level - 1)) throw e k | `Text _ -> list [signal] (phrasing indentation phrasing_nesting_level) throw e k | _ -> push signals signal; list [`Text ["\n"]] (flow indentation) throw e k end and list signals state throw e k = match signals with | [] -> state throw e k | signal::more -> current_state := list more state; k signal in (fun throw e k -> !current_state throw e k) |> make |> normalize_text let html5 s = let remove_markup v _ k = match v with | `Doctype _ | `Xml _ | `PI _ -> k None | `Text _ | `Start_element _ | `End_element | `Comment _ as v -> k (Some v) in s |> filter_map remove_markup |> fun s -> push s (`Doctype {doctype_name = Some "html"; public_identifier = None; system_identifier = None; raw_text = None; force_quirks = false}); s let xhtml ?dtd s = let doctype_text = match dtd with | Some `Strict_1_0 -> "html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" " ^ "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"" | Some `Transitional_1_0 -> "html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" " ^ "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"" | Some `Frameset_1_0 -> "html PUBLIC \"-//W3C//DTD XHTML 1.0 Frameset//EN\" " ^ "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd\"" | Some `Strict_1_1 | None -> "html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" " ^ "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\"" in let remove_markup v _ k = match v with | `Doctype _ | `Xml _ -> k None | `Text _ | `Start_element _ | `End_element | `Comment _ | `PI _ as v -> k (Some v) in s |> filter_map remove_markup |> fun s -> push s (`Doctype {doctype_name = None; public_identifier = None; system_identifier = None; raw_text = Some doctype_text; force_quirks = false}); push s (`Xml {version = "1.0"; encoding = Some "utf-8"; standalone = None}); s let xhtml_entity name = let rec lookup index = if index >= Array.length Entities.entities then raise Exit else if fst Entities.entities.(index) <> name then lookup (index + 1) else snd Entities.entities.(index) in try let buffer = Buffer.create 8 in match lookup 0 with | `One c -> add_utf_8 buffer c; Some (Buffer.contents buffer) | `Two (c, c') -> add_utf_8 buffer c; add_utf_8 buffer c'; Some (Buffer.contents buffer) with Exit -> None markup.ml-1.0.3/src/xml_parser.ml000066400000000000000000000153531421357706400167360ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common open Kstream open Token_tag let is_whitespace_only strings = List.for_all is_whitespace_only strings let parse context namespace report tokens = let open_elements = ref [] in let namespaces = Namespace.Parsing.init namespace in let is_fragment = ref false in let fragment_allowed = ref true in let throw = ref (fun _ -> ()) in let ended = ref (fun _ -> ()) in let output = ref (fun _ -> ()) in let rec current_state = ref (fun () -> match context with | None -> initial_state [] | Some `Document -> fragment_allowed := false; document_state () | Some `Fragment -> is_fragment := false; content_state ()) and emit l signal state = current_state := state; !output (l, signal) and push_and_emit l {name = raw_name; attributes} state = Namespace.Parsing.push (fun () -> report l) namespaces raw_name attributes !throw (fun (expanded_name, attributes) -> let rec deduplicate acc attributes k = match attributes with | [] -> k (List.rev acc) | ((n, _) as attr)::more -> if acc |> List.exists (fun (n', _) -> n' = n) then report l (`Bad_token (snd n, "tag", "duplicate attribute")) !throw (fun () -> deduplicate acc more k) else deduplicate (attr::acc) more k in deduplicate [] attributes (fun attributes -> open_elements := (l, expanded_name, raw_name)::!open_elements; emit l (`Start_element (expanded_name, attributes)) state)) and pop l state = match !open_elements with | [] -> state () | _::more -> Namespace.Parsing.pop namespaces; open_elements := more; emit l `End_element state and emit_end () = current_state := (fun () -> !ended ()); !ended () and initial_state leading = next_expected tokens !throw begin function | _, (`Xml _ | `Doctype _ | `Start _ | `End _) as v -> push tokens v; push_list tokens (List.rev leading); document_state () | _, `Chars s as v when is_whitespace_only s -> initial_state (v::leading) | _, (`Comment _ | `PI _) as v -> initial_state (v::leading) | _, (`Chars _ | `EOF) as v -> is_fragment := true; push tokens v; push_list tokens (List.rev leading); content_state () end and document_state () = next_expected tokens !throw begin function | l, `Xml declaration -> fragment_allowed := false; emit l (`Xml declaration) doctype_state | v -> push tokens v; doctype_state () end and doctype_state () = next_expected tokens !throw begin function | l, `Doctype d -> fragment_allowed := false; emit l (`Doctype d) root_state | _, `Chars s when is_whitespace_only s -> doctype_state () | l, `Comment s -> emit l (`Comment s) doctype_state | l, `PI s -> emit l (`PI s) doctype_state | l, `Xml _ -> report l (`Bad_document "XML declaration must be first") !throw doctype_state | l, `Chars _ -> report l (`Bad_document "text at top level") !throw doctype_state | v -> push tokens v; root_state () end and root_state () = next_expected tokens !throw begin function | l, `Start t -> if t.self_closing then push_and_emit l t (fun () -> pop l after_root_state) else push_and_emit l t content_state | _, `Chars s when is_whitespace_only s -> root_state () | l, `Comment s -> emit l (`Comment s) root_state | l, `PI s -> emit l (`PI s) root_state | l, `Xml _ -> report l (`Bad_document "XML declaration must be first") !throw root_state | l, `EOF -> report l (`Unexpected_eoi "document before root element") !throw emit_end | l, _ -> report l (`Bad_document "expected root element") !throw root_state end and after_root_state () = next_expected tokens !throw begin function | _, `Chars s when is_whitespace_only s -> after_root_state () | l, `Comment s -> emit l (`Comment s) after_root_state | l, `PI s -> emit l (`PI s) after_root_state | _, `EOF -> emit_end () | _, (`Chars _ | `Start _ | `End _) as v when !fragment_allowed -> is_fragment := true; push tokens v; content_state () | l, _ as v -> report l (`Bad_document "not allowed after root element") !throw (fun () -> is_fragment := true; push tokens v; content_state ()) end and content_state () = next_expected tokens !throw begin function | l, `Start t -> if t.self_closing then push_and_emit l t (fun () -> pop l content_state) else push_and_emit l t content_state | l, `End {name = raw_name} -> Namespace.Parsing.expand_element (fun () -> report l) namespaces raw_name !throw (fun expanded_name -> let is_on_stack = !open_elements |> List.exists (fun (_, name, _) -> name = expanded_name) in if not is_on_stack then report l (`Unmatched_end_tag raw_name) !throw content_state else let rec pop_until_match () = match !open_elements with | (_, name, _)::_ when name = expanded_name -> pop l (fun () -> match !open_elements with | [] when not !is_fragment -> after_root_state () | _ -> content_state ()) | (l', _, name)::_ -> report l' (`Unmatched_start_tag name) !throw (fun () -> pop l pop_until_match) | _ -> failwith "impossible" in pop_until_match ()) | l, `Chars s -> emit l (`Text s) content_state | l, `PI s -> emit l (`PI s) content_state | l, `Comment s -> emit l (`Comment s) content_state | l, `EOF -> let rec pop_stack () = match !open_elements with | [] -> emit_end () | (l', _, raw_name)::_ -> report l' (`Unmatched_start_tag raw_name) !throw (fun () -> pop l pop_stack) in pop_stack () | l, `Xml _ -> report l (`Bad_document "XML declaration should be at top level") !throw content_state | l, `Doctype _ -> report l (`Bad_document "doctype should be at top level") !throw content_state end in (fun throw_ e k -> throw := throw_; ended := e; output := k; !current_state ()) |> make markup.ml-1.0.3/src/xml_parser.mli000066400000000000000000000005371421357706400171050ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common val parse : [< `Document | `Fragment ] option -> (string -> string option) -> Error.parse_handler -> (location * Xml_tokenizer.token) Kstream.t -> (location * signal) Kstream.t markup.ml-1.0.3/src/xml_tokenizer.ml000066400000000000000000000721061421357706400174530ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common type token = [ `Xml of xml_declaration | `Doctype of doctype | `Start of Token_tag.t | `End of Token_tag.t | `Chars of string list | `PI of string * string | `Comment of string | `EOF ] let is_name_start_char c = is_in_range 0x0041 0x005A c || is_in_range 0x0061 0x007A c || c = 0x003A || c = 0x005F || is_in_range 0x00C0 0x00D6 c || is_in_range 0x00D8 0x00F6 c || is_in_range 0x00F8 0x02FF c || is_in_range 0x0370 0x037D c || is_in_range 0x037F 0x1FFF c || is_in_range 0x200C 0x200D c || is_in_range 0x2070 0x218F c || is_in_range 0x2C00 0x2FEF c || is_in_range 0x3001 0xD7EF c || is_in_range 0xF900 0xFDCF c || is_in_range 0xFDF0 0xFFFD c || is_in_range 0x10000 0xEFFFF c let is_name_char c = is_name_start_char c || is_in_range 0x0030 0x0039 c || c = 0x002D || c = 0x002E || c = 0x00B7 || is_in_range 0x0300 0x036F c || is_in_range 0x203F 0x2040 c let resolve_builtin_reference = function | "quot" -> Some "\"" | "amp" -> Some "&" | "apos" -> Some "'" | "lt" -> Some "<" | "gt" -> Some ">" | _ -> None open Kstream open Common.Token_tag let tokenize report resolve_reference (input, get_location) = let resolve_reference s = match resolve_builtin_reference s with | Some _ as v -> v | None -> resolve_reference s in let report_if = Error.report_if report in let throw = ref (fun _ -> ()) in let ended = ref (fun _ -> ()) in let output = ref (fun _ -> ()) in let parse_reference l' k = let input, restore = checkpoint input in let unresolved () = restore (); k None in let k s = k (Some s) in let unexpected_eoi () = report (get_location ()) (`Unexpected_eoi "reference") !throw (fun () -> unresolved ()) in let character_reference filter notation_prefix reference_prefix = let buffer = Buffer.create 32 in let rec read () = next input !throw unexpected_eoi begin function | _, 0x003B -> if Buffer.length buffer = 0 then report l' (`Bad_token (Printf.sprintf "&#%s;" reference_prefix, "reference", "empty character reference")) !throw unresolved else let s = Buffer.contents buffer in let maybe_n = try Some (int_of_string (notation_prefix ^ s)) with Failure _ -> None in begin match maybe_n with | None -> report l' (`Bad_token (Printf.sprintf "&#%s%s;" reference_prefix s, "reference", "number out of range")) !throw unresolved | Some n -> let utf_8_encoded = Buffer.create 8 in add_utf_8 utf_8_encoded n; k (Buffer.contents utf_8_encoded) end | _, c when filter c -> add_utf_8 buffer c; read () | l, c -> report l (`Bad_token (char c, "reference", "expected digit")) !throw unresolved end in read () in next input !throw unexpected_eoi begin function | _, 0x003B -> report l' (`Bad_token ("&;", "reference", "empty reference")) !throw unresolved | _, 0x0023 -> next input !throw unexpected_eoi begin function | _, 0x0078 -> character_reference is_hex_digit "0x" "x" | _, c as v when is_digit c || c = 0x003B -> push input v; character_reference is_digit "" "" | l, c -> report l (`Bad_token (char c, "reference", "expected digit")) !throw unresolved end | _, c when is_name_start_char c -> let buffer = Buffer.create 32 in add_utf_8 buffer c; let rec read () = next input !throw unexpected_eoi begin function | _, 0x003B -> let s = Buffer.contents buffer in begin match resolve_reference s with | Some s -> k s | None -> report l' (`Bad_token (s, "reference", "unknown entity")) !throw unresolved end | _, c when is_name_char c -> add_utf_8 buffer c; read () | l, c -> report l (`Bad_token (char c, "reference", "invalid name character")) !throw unresolved end in read () | l, c -> report l (`Bad_token (char c, "reference", "invalid start character")) !throw unresolved end in let extra_whitespace where l c k = report l (`Bad_token (char c, where, "whitespace not allowed here")) !throw k in let rec consume_whitespace k = next input !throw k (function | _, c when is_whitespace c -> consume_whitespace k | v -> push input v; k ()) in let parse_attribute with_references terminators l k' = let name_buffer = Buffer.create 32 in let value_buffer = Buffer.create 256 in let quote_opened = ref false in let quote_closed = ref false in let finish () = if Buffer.length name_buffer = 0 then k' None else let emit () = k' (Some (Buffer.contents name_buffer, Buffer.contents value_buffer)) in if !quote_opened then if not !quote_closed then report (get_location ()) (`Unexpected_eoi "attribute value") !throw emit else emit () else if Buffer.length value_buffer = 0 then report l (`Bad_token (Buffer.contents name_buffer, "attribute", "has no value")) !throw emit else emit () in let next' f = next input !throw finish begin function | _, c as v when List.mem c terminators -> push input v; finish (); | v -> f v end in let rec name_start_state () = next' begin function | l, c -> report_if (not @@ is_name_start_char c) l (fun () -> `Bad_token (char c, "attribute", "invalid start character")) !throw (fun () -> add_utf_8 name_buffer c; name_state ()) end and name_state () = next' begin function | _, 0x003D -> value_state () | l, c when is_whitespace c -> extra_whitespace "attribute" l c (fun () -> consume_whitespace equals_state) | l, c -> report_if (not @@ is_name_char c) l (fun () -> `Bad_token (char c, "attribute", "invalid name character")) !throw (fun () -> add_utf_8 name_buffer c; name_state ()) end and equals_state () = next' begin function | _, 0x003D -> value_state () | v -> push input v; finish () end and value_state () = next' begin function | l, c when is_whitespace c -> extra_whitespace "attribute" l c (fun () -> consume_whitespace value_state) | _, (0x0022 | 0x0027 as c) -> quote_opened := true; quoted_value_state c | l, c as v -> push input v; report l (`Bad_token (char c, "attribute", "unquoted value")) !throw unquoted_value_state end and handle_ampersand l state = parse_reference l begin function | Some s -> Buffer.add_string value_buffer s; state () | None -> report l (`Bad_token ("&", "attribute", "replace with '&'")) !throw (fun () -> add_utf_8 value_buffer 0x0026; state ()) end and handle_lt l state = report l (`Bad_token ("<", "attribute", "replace with '<'")) !throw (fun () -> add_utf_8 value_buffer 0x003C; state ()) and quoted_value_state quote = next input !throw finish begin function | _, c when c = quote -> quote_closed := true; finish () | l, 0x0026 when with_references -> handle_ampersand l (fun () -> quoted_value_state quote) | l, 0x003C -> handle_lt l (fun () -> quoted_value_state quote) | _, c -> add_utf_8 value_buffer c; quoted_value_state quote end and unquoted_value_state () = next' begin function | _, c as v when is_whitespace c -> push input v; finish () | l, 0x0026 when with_references -> handle_ampersand l unquoted_value_state | l, 0x003C -> handle_lt l unquoted_value_state | _, c -> add_utf_8 value_buffer c; unquoted_value_state () end in name_start_state () in let parse_declaration_or_processing_instruction l k = let pi = "processing instruction" in let xml = "xml declaration" in let target_buffer = Buffer.create 32 in let text_buffer = Buffer.create 512 in let attributes = ref [] in let next' context finish f = let rec initial_state () = next input !throw (fun () -> report (get_location ()) (`Unexpected_eoi context) !throw finish) begin function | l, 0x003F -> question_mark_state l | v -> f v end and question_mark_state l = next input !throw (fun () -> report (get_location ()) (`Unexpected_eoi context) !throw finish) begin function | _, 0x003E -> finish () | v -> push input v; f (l, 0x003F) end in initial_state () in let rec target_start_state () = next' pi finish_pi begin function | l, c when is_whitespace c -> extra_whitespace pi l c (fun () -> consume_whitespace target_start_state) | l, c -> report_if (not @@ is_name_start_char c) l (fun () -> `Bad_token (char c, pi, "invalid start character")) !throw (fun () -> add_utf_8 target_buffer c; target_state ()) end and target_state () = next' pi finish_pi begin function | _, c when is_whitespace c -> if String.lowercase_ascii (Buffer.contents target_buffer) = "xml" then xml_declaration_state () else text_state () | l, c -> report_if (not @@ is_name_char c) l (fun () -> `Bad_token (char c, pi, "invalid name character")) !throw (fun () -> add_utf_8 target_buffer c; target_state ()) end and text_state () = next' pi finish_pi (fun (_, c) -> add_utf_8 text_buffer c; text_state ()) and xml_declaration_state () = next' xml finish_xml begin function | _, c when is_whitespace c -> xml_declaration_state () | _, 0x003F -> xml_declaration_state () | l, _ as v -> push input v; parse_attribute false [0x003F] l (function | None -> xml_declaration_state () | Some (name, value) -> attributes := (l, name, value)::!attributes; xml_declaration_state ()) end and finish_pi () = if Buffer.length target_buffer = 0 then report l (`Bad_token (" k None) else if String.lowercase_ascii (Buffer.contents target_buffer) = "xml" then finish_xml () else k (Some (`PI (Buffer.contents target_buffer, Buffer.contents text_buffer))) and finish_xml () = let split f l = let rec scan prefix = function | x::suffix when f x -> Some (List.rev prefix, x, suffix) | x::suffix -> scan (x::prefix) suffix | [] -> None in scan [] l in let matches s (_, name, _) = String.lowercase_ascii name = s in let version_valid s = String.length s = 3 && s.[0] = '1' && s.[1] = '.' && is_digit (Char.code s.[2]) in let rec check_name attributes = let target = Buffer.contents target_buffer in report_if (target <> "xml") l (fun () -> `Bad_token (target, xml, "must be 'xml'")) !throw (fun () -> version_state attributes) and version_state attributes = match split (matches "version") attributes with | None -> report l (`Bad_token (" encoding_state "1.0" attributes) | Some (prefix, (l, name, value), suffix) -> report_if (name <> "version") l (fun () -> `Bad_token (name, xml, "must be 'version'")) !throw (fun () -> report_if (List.length prefix <> 0) l (fun () -> `Bad_token (name, xml, "must be first")) !throw (fun () -> report_if (not @@ version_valid value) l (fun () -> `Bad_token (value, xml, "must match 1.x")) !throw (fun () -> encoding_state value (prefix @ suffix)))) and encoding_state version attributes = match split (matches "encoding") attributes with | None -> standalone_state version None 0 attributes | Some (prefix, (l, name, value), suffix) -> report_if (name <> "encoding") l (fun () -> `Bad_token (name, xml, "must be 'encoding'")) !throw (fun () -> standalone_state version (Some value) (List.length prefix) (prefix @ suffix)) and standalone_state version encoding encoding_index attributes = match split (matches "standalone") attributes with | None -> final_state version encoding None attributes | Some (prefix, (l, name, value), suffix) -> report_if (name <> "standalone") l (fun () -> `Bad_token (name, xml, "must be 'standalone'")) !throw (fun () -> report_if (List.length prefix < encoding_index) l (fun () -> `Bad_token (name, xml, "must come after 'encoding'")) !throw (fun () -> (fun k -> match value with | "yes" -> k (Some true) | "no" -> k (Some false) | _ -> report l (`Bad_token (value, xml, "must be 'yes' or 'no'")) !throw (fun () -> match String.lowercase_ascii value with | "yes" -> k (Some true) | "no" -> k (Some false) | _ -> k None)) (fun v -> final_state version encoding v (prefix @ suffix)))) and final_state version encoding standalone attributes = (fun k -> match attributes with | (l, name, _)::_ -> report l (`Bad_token (name, xml, "not allowed here")) !throw k | [] -> k ()) (fun () -> k (Some (`Xml {version; encoding; standalone}))) in check_name (List.rev !attributes) in target_start_state () in let text = Text.prepare () in let note_character_location = Text.note_location text in let add_character = Text.add text in let add_string = Text.add_string text in let rec current_state = ref initial_state and emit' l t s = current_state := s; !output (l, t) and emit_chars state = match Text.emit text with | None -> state () | Some (l, strings) -> emit' l (`Chars strings) state and emit l t state = emit_chars (fun () -> emit' l t state) and emit_eoi ?during () = let l = get_location () in emit_chars (fun () -> (fun k' -> match during with | None -> k' () | Some production -> report l (`Unexpected_eoi production) !throw k') (fun () -> emit' l `EOF (fun () -> !ended ()))) and emit_start l name self_closing attributes state = let tag = {name = name; self_closing; attributes = List.rev attributes} in emit l (`Start tag) state and emit_end l name state = let tag = {name = name; self_closing = false; attributes = []} in emit l (`End tag) state and emit_doctype l buffer s = let doctype = {doctype_name = None; public_identifier = None; system_identifier = None; raw_text = Some (Buffer.contents buffer); force_quirks = false} in emit l (`Doctype doctype) s and lt_in_text l k = report l (`Bad_token ("<", "text", "replace with '<'")) !throw k and initial_state () = next input !throw (fun () -> emit_eoi ()) begin function | l, (0x005D as c) -> add_character l c; one_bracket_state l | l, 0x003C -> begin_markup_state l | l, (0x0026 as c) -> parse_reference l (function | None -> report l (`Bad_token (char c, "text", "replace with '&'")) !throw (fun () -> add_character l c; initial_state ()) | Some s -> add_string l s; initial_state ()) | l, c -> add_character l c; initial_state () end and one_bracket_state l' = next_option input !throw begin function | Some (l, (0x005D as c)) -> add_character l c; two_brackets_state l' l | v -> push_option input v; initial_state () end and two_brackets_state l' l'' = next_option input !throw begin function | Some (l, (0x003E as c)) -> report l' (`Bad_token ("]]>", "text", "must end a CDATA section")) !throw (fun () -> add_character l c; initial_state ()) | Some (l, (0x005D as c)) -> add_character l c; two_brackets_state l'' l | v -> push_option input v; initial_state () end and begin_markup_state l' = let recover v = lt_in_text l' (fun () -> add_character l' 0x003C; push_option input v; initial_state ()) in next input !throw (fun () -> report (get_location ()) (`Unexpected_eoi "tag") !throw (fun () -> recover None)) begin function | _, 0x0021 -> comment_cdata_or_doctype_state l' | _, 0x003F -> parse_declaration_or_processing_instruction l' (function | None -> initial_state () | Some token -> emit l' token initial_state) | _, 0x002F -> end_tag_state l' | _, c when is_name_start_char c -> let tag_name_buffer = Buffer.create 32 in add_utf_8 tag_name_buffer c; start_tag_state l' tag_name_buffer | l, c as v -> report l (`Bad_token (char c, "tag", "invalid start character")) !throw (fun () -> recover (Some v)) end and start_tag_state l' buffer = let recover v = lt_in_text l' (fun () -> add_character l' 0x003C; add_string l' (Buffer.contents buffer); push_option input v; initial_state ()) in next input !throw (fun () -> report (get_location ()) (`Unexpected_eoi "tag") !throw (fun () -> recover None)) begin function | _, 0x003E -> emit_start l' (Buffer.contents buffer) false [] initial_state | l, 0x002F -> close_empty_element_state l' l (Buffer.contents buffer) [] | _, c when is_whitespace c -> attributes_state l' (Buffer.contents buffer) [] | _, c when is_name_char c -> add_utf_8 buffer c; start_tag_state l' buffer | l, c as v -> report l (`Bad_token (char c, "tag", "invalid name character")) !throw (fun () -> recover (Some v)) end and attributes_state l' tag_name attributes = next input !throw begin fun () -> emit_start l' tag_name false attributes (fun () -> emit_eoi ~during:"tag" ()) end begin function | _, c when is_whitespace c -> attributes_state l' tag_name attributes | _, 0x003E -> emit_start l' tag_name false attributes initial_state | l, 0x002F -> close_empty_element_state l' l tag_name attributes | l, _ as v -> push input v; parse_attribute true [0x003E; 0x002F] l (function | None -> attributes_state l' tag_name attributes | Some (name, value) -> attributes_state l' tag_name ((name, value)::attributes)) end and close_empty_element_state l' l'' name attributes = next input !throw begin fun () -> emit_start l' name true attributes (fun () -> emit_eoi ~during:"tag" ()) end begin function | _, 0x003E -> emit_start l' name true attributes initial_state | v -> report l'' (`Bad_token (char 0x002F, "tag", "should be part of '/>'")) !throw (fun () -> push input v; attributes_state l' name attributes) end and end_tag_state l' = let recover v = lt_in_text l' (fun () -> add_character l' 0x003C; add_character l' 0x002F; push_option input v; initial_state ()) in next input !throw (fun () -> report (get_location ()) (`Unexpected_eoi "tag") !throw (fun () -> recover None)) begin function | _, c when is_name_start_char c -> let name_buffer = Buffer.create 32 in add_utf_8 name_buffer c; end_tag_name_state l' name_buffer | l, c as v -> report l (`Bad_token (char c, "tag", "invalid start character")) !throw (fun () -> recover (Some v)) end and end_tag_name_state l' buffer = let recover v = lt_in_text l' (fun () -> add_character l' 0x003C; add_character l' 0x002F; add_string l' (Buffer.contents buffer); push_option input v; initial_state ()) in next input !throw (fun () -> report (get_location ()) (`Unexpected_eoi "tag") !throw (fun () -> recover None)) begin function | _, 0x003E -> emit_end l' (Buffer.contents buffer) initial_state | _, c when is_whitespace c -> end_tag_whitespace_state false l' (Buffer.contents buffer) | _, c when is_name_char c -> add_utf_8 buffer c; end_tag_name_state l' buffer | l, c as v -> report l (`Bad_token (char c, "tag", "invalid name character")) !throw (fun () -> recover (Some v)) end and end_tag_whitespace_state reported l' name = next input !throw begin fun () -> emit_end l' name (fun () -> emit_eoi ~during:"tag" ()) end begin function | _, 0x003E -> emit_end l' name initial_state | _, c when is_whitespace c -> end_tag_whitespace_state reported l' name | l, c -> if not reported then report l (`Bad_token (char c, "tag", "attribute in end tag")) !throw (fun () -> end_tag_whitespace_state true l' name) else end_tag_whitespace_state reported l' name end and bad_comment_start s l k' = report l (`Bad_token (s, "comment", "should start with '"] throw e k end in (fun throw e k -> !queue throw e k) |> make markup.ml-1.0.3/src/xml_writer.mli000066400000000000000000000004261421357706400171220ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Common val write : Error.write_handler -> (string -> string option) -> [< signal ] Kstream.t -> string Kstream.t markup.ml-1.0.3/test/000077500000000000000000000000001421357706400144115ustar00rootroot00000000000000markup.ml-1.0.3/test/dependency/000077500000000000000000000000001421357706400165275ustar00rootroot00000000000000markup.ml-1.0.3/test/dependency/dep_core.ml000066400000000000000000000004141421357706400206400ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Markup let (|>) x f = f x let () = string "foo" |> parse_html |> signals |> write_html |> to_string |> ignore markup.ml-1.0.3/test/dependency/dep_lwt.ml000066400000000000000000000003601421357706400205160ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) let (|>) x f = f x let () = [1; 2; 3] |> Markup.of_list |> Markup_lwt.to_list |> ignore markup.ml-1.0.3/test/dependency/dep_lwt_unix.ml000066400000000000000000000004021421357706400215560ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) let (|>) x f = f x let () = Markup.of_list [] |> Markup_lwt_unix.to_channel Lwt_io.stdout |> Lwt_main.run markup.ml-1.0.3/test/dependency/dune000066400000000000000000000003471421357706400174110ustar00rootroot00000000000000(executable (name dep_core) (modules dep_core) (libraries markup)) (executable (name dep_lwt) (modules dep_lwt) (libraries markup-lwt)) (executable (name dep_lwt_unix) (modules dep_lwt_unix) (libraries markup-lwt.unix)) markup.ml-1.0.3/test/dune000066400000000000000000000002401421357706400152630ustar00rootroot00000000000000(executable (name test) (libraries markup ounit2 test_support)) (rule (alias runtest) (package markup) (action (run %{exe:test.exe} -runner sequential))) markup.ml-1.0.3/test/js_of_ocaml/000077500000000000000000000000001421357706400166645ustar00rootroot00000000000000markup.ml-1.0.3/test/js_of_ocaml/dune000066400000000000000000000001211421357706400175340ustar00rootroot00000000000000(executable (name test_js_of_ocaml) (modes js) (libraries markup markup-lwt)) markup.ml-1.0.3/test/js_of_ocaml/test_js_of_ocaml.ml000066400000000000000000000003311421357706400225250ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) let () = Markup.of_list [1; 2; 3] |> Markup_lwt.to_list |> ignore markup.ml-1.0.3/test/lwt_unix/000077500000000000000000000000001421357706400162625ustar00rootroot00000000000000markup.ml-1.0.3/test/lwt_unix/dune000066400000000000000000000002761421357706400171450ustar00rootroot00000000000000(executable (name test_lwt) (libraries lwt.unix markup-lwt.unix ounit2 test_support)) (rule (alias runtest) (package markup-lwt) (action (run %{exe:test_lwt.exe} -runner sequential))) markup.ml-1.0.3/test/lwt_unix/test_asynchronous.ml000066400000000000000000000021111421357706400224010ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 (* Lwt.Infix not available for Lwt 2.4.6 (Ocaml 4.00). *) let (>>=) = Lwt.(>>=) let (>|=) = Lwt.(>|=) open Markup_lwt let tests = [ ("asynchronous.stream,next" >:: fun _ -> let returned = ref false in let s = (fun () -> if not !returned then begin returned := true; Lwt.return (Some 1337) end else Lwt.return None) |> stream in (next s >|= assert_equal ~msg:"first" (Some 1337) >>= fun () -> next s >|= assert_equal ~msg:"second" None) |> Lwt_main.run); ("asynchronous.peek,drain" >:: fun _ -> let s = Markup.of_list [1; 2; 3] in (peek s >|= assert_equal ~msg:"1" (Some 1) >>= fun () -> peek s >|= assert_equal ~msg:"1b" (Some 1) >>= fun () -> next s >|= ignore >>= fun () -> peek s >|= assert_equal ~msg:"2" (Some 2) >>= fun () -> drain s >>= fun () -> peek s >|= assert_equal ~msg:"empty" None) |> Lwt_main.run) ] markup.ml-1.0.3/test/lwt_unix/test_lwt.ml000066400000000000000000000117611421357706400204670ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support (* Lwt.Infix not available for Lwt 2.4.6 (Ocaml 4.0.0). *) let (>>=) = Lwt.(>>=) let (>|=) = Lwt.(>|=) open Markup_lwt open Markup_lwt_unix let self = "./test_lwt.ml" let suite = "markup_lwt" >::: Test_asynchronous.tests @ [ ("lwt.stream" >:: fun _ -> let s = (fun () -> Lwt_unix.sleep 0.1 >|= fun () -> Some 1337) |> stream in next s >|= assert_equal (Some 1337) |> Lwt_main.run); ("lwt.stream.tail_call.to_cps" >:: fun _ -> let s = (fun () -> Lwt.return (Some 1337)) |> stream in let limit = 10000 in Lwt.catch (fun () -> fold (fun count _ -> if count >= limit then Lwt.fail Exit else Lwt.return (count + 1)) 0 s >|= ignore) (function | Exit -> Lwt.return_unit | exn -> Lwt.fail exn) |> Lwt_main.run); ("lwt.stream.tail_call.of_cps" >:: fun _ -> let t = ref (Lwt.wait ()) in let s = (fun () -> fst !t) |> stream in let rec repeat n = if n = 0 then Lwt.return_unit else begin let proceed = next s >>= (function | Some () -> repeat (n - 1) | None -> Lwt.fail_with "unexpected result") in let push = snd !t in t := Lwt.wait (); Lwt.wakeup push (Some ()); proceed end in Lwt_main.run (repeat 10000)); ("lwt.lwt_stream" >:: fun _ -> [1; 2; 3] |> Lwt_stream.of_list |> lwt_stream |> to_list >|= assert_equal [1; 2; 3] |> Lwt_main.run); ("lwt.to_lwt_stream" >:: fun _ -> [1; 2; 3] |> Markup.of_list |> to_lwt_stream |> Lwt_stream.to_list >|= assert_equal [1; 2; 3] |> Lwt_main.run); ("lwt.channel" >:: fun _ -> Lwt_io.with_file ~mode:Lwt_io.input self (fun c -> let s = channel c in next s >|= assert_equal (Some '(') >>= fun () -> next s >|= assert_equal (Some '*') >>= fun () -> next s >|= assert_equal (Some ' ') >>= fun () -> next s >|= assert_equal (Some 'T') >>= fun () -> drain s >>= fun () -> next s >|= assert_equal None >>= fun () -> next s >|= assert_equal None >>= fun () -> Lwt_io.close c >>= fun () -> next s >|= assert_equal None) |> Lwt_main.run); ("lwt.channel.closed" >:: fun _ -> Lwt_io.with_file ~mode:Lwt_io.input self (fun c -> let s = channel c in Lwt_io.close c >>= fun () -> Lwt.catch (fun () -> next s >|= wrong_k "did not fail") (function | Lwt_io.Channel_closed "input" -> Lwt.return_unit | _ -> wrong_k "wrong exception" () |> Lwt.return)) |> Lwt_main.run); ("lwt.to_channel" >:: fun context -> let name, c = bracket_tmpfile context in close_out_noerr c; (Lwt_io.with_file ~mode:Lwt_io.output name (fun c -> Markup.of_list ['f'; 'o'; 'o'] |> to_channel c) >>= fun () -> Markup.file name |> fst |> to_list >|= assert_equal ['f'; 'o'; 'o']) |> Lwt_main.run); ("lwt.file" >:: fun _ -> let s, close = file self in (next s >|= assert_equal (Some '(') >>= fun () -> next s >|= assert_equal (Some '*') >>= fun () -> next s >|= assert_equal (Some ' ') >>= fun () -> next s >|= assert_equal (Some 'T') >>= fun () -> drain s >>= fun () -> next s >|= assert_equal None >>= fun () -> next s >|= assert_equal None >>= fun () -> close () >>= fun () -> next s >|= assert_equal None) |> Lwt_main.run); ("lwt.file.closed" >:: fun _ -> let s, close = file self in (next s >|= assert_equal (Some '(') >>= fun () -> close () >>= fun () -> Lwt.catch (fun () -> next s >|= wrong_k "did not fail") (function | Lwt_io.Channel_closed "input" -> Lwt.return_unit | _ -> wrong_k "wrong exception" () |> Lwt.return)) |> Lwt_main.run); ("lwt.file.closed_early" >:: fun _ -> let s, close = file self in (close () >>= fun () -> Lwt.catch (fun () -> next s >|= wrong_k "did not fail") (function | Lwt_io.Channel_closed "input" -> Lwt.return_unit | _ -> wrong_k "wrong exception" () |> Lwt.return)) |> Lwt_main.run); ("lwt.to_file" >:: fun context -> let name, c = bracket_tmpfile context in close_out_noerr c; (Markup.of_list ['f'; 'o'; 'o'] |> to_file name >>= fun () -> Markup.file name |> fst |> to_list >|= assert_equal ['f'; 'o'; 'o']) |> Lwt_main.run); ("lwt.load" >:: fun _ -> (Markup.of_list ['f'; 'o'; 'o'] |> Markup_lwt.load >|= Markup.to_list >|= assert_equal ['f'; 'o'; 'o']) |> Lwt_main.run); ] let () = Printf.printf "\nRunning tests in %s\n" (Filename.basename Sys.argv.(0)); run_test_tt_main suite markup.ml-1.0.3/test/pages/000077500000000000000000000000001421357706400155105ustar00rootroot00000000000000markup.ml-1.0.3/test/pages/google000066400000000000000000000447441421357706400167240ustar00rootroot00000000000000Google
×
A better way to browse the web



 

Advanced searchLanguage tools

© 2015 - Privacy - Terms

markup.ml-1.0.3/test/pages/xml_spec000066400000000000000000004671751421357706400172710ustar00rootroot00000000000000 "> '"> amp, lt, gt, apos, quot"> ]>
Extensible Markup Language (XML) 1.0 REC-xml-&iso6.doc.date; W3C Recommendation &draft.day;&draft.month;&draft.year; http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date; http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.xml http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.html http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.pdf http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.ps http://www.w3.org/TR/REC-xml http://www.w3.org/TR/PR-xml-971208 Tim Bray Textuality and Netscape tbray@textuality.com Jean Paoli Microsoft jeanpa@microsoft.com C. M. Sperberg-McQueen University of Illinois at Chicago cmsmcq@uic.edu

The Extensible Markup Language (XML) is a subset of SGML that is completely described in this document. Its goal is to enable generic SGML to be served, received, and processed on the Web in the way that is now possible with HTML. XML has been designed for ease of implementation and for interoperability with both SGML and HTML.

This document has been reviewed by W3C Members and other interested parties and has been endorsed by the Director as a W3C Recommendation. It is a stable document and may be used as reference material or cited as a normative reference from another document. W3C's role in making the Recommendation is to draw attention to the specification and to promote its widespread deployment. This enhances the functionality and interoperability of the Web.

This document specifies a syntax created by subsetting an existing, widely used international text processing standard (Standard Generalized Markup Language, ISO 8879:1986(E) as amended and corrected) for use on the World Wide Web. It is a product of the W3C XML Activity, details of which can be found at http://www.w3.org/XML. A list of current W3C Recommendations and other technical documents can be found at http://www.w3.org/TR.

This specification uses the term URI, which is defined by , a work in progress expected to update and .

The list of known errors in this specification is available at http://www.w3.org/XML/xml-19980210-errata.

Please report errors in this document to xml-editor@w3.org.

Chicago, Vancouver, Mountain View, et al.: World-Wide Web Consortium, XML Working Group, 1996, 1997.

Created in electronic form.

English Extended Backus-Naur Form (formal grammar) 1997-12-03 : CMSMcQ : yet further changes 1997-12-02 : TB : further changes (see TB to XML WG, 2 December 1997) 1997-12-02 : CMSMcQ : deal with as many corrections and comments from the proofreaders as possible: entify hard-coded document date in pubdate element, change expansion of entity WebSGML, update status description as per Dan Connolly (am not sure about refernece to Berners-Lee et al.), add 'The' to abstract as per WG decision, move Relationship to Existing Standards to back matter and combine with References, re-order back matter so normative appendices come first, re-tag back matter so informative appendices are tagged informdiv1, remove XXX XXX from list of 'normative' specs in prose, move some references from Other References to Normative References, add RFC 1738, 1808, and 2141 to Other References (they are not normative since we do not require the processor to enforce any rules based on them), add reference to 'Fielding draft' (Berners-Lee et al.), move notation section to end of body, drop URIchar non-terminal and use SkipLit instead, lose stray reference to defunct nonterminal 'markupdecls', move reference to Aho et al. into appendix (Tim's right), add prose note saying that hash marks and fragment identifiers are NOT part of the URI formally speaking, and are NOT legal in system identifiers (processor 'may' signal an error). Work through: Tim Bray reacting to James Clark, Tim Bray on his own, Eve Maler, NOT DONE YET: change binary / text to unparsed / parsed. handle James's suggestion about < in attriubte values uppercase hex characters, namechar list, 1997-12-01 : JB : add some column-width parameters 1997-12-01 : CMSMcQ : begin round of changes to incorporate recent WG decisions and other corrections: binding sources of character encoding info (27 Aug / 3 Sept), correct wording of Faust quotation (restore dropped line), drop SDD from EncodingDecl, change text at version number 1.0, drop misleading (wrong!) sentence about ignorables and extenders, modify definition of PCData to make bar on msc grammatical, change grammar's handling of internal subset (drop non-terminal markupdecls), change definition of includeSect to allow conditional sections, add integral-declaration constraint on internal subset, drop misleading / dangerous sentence about relationship of entities with system storage objects, change table body tag to htbody as per EM change to DTD, add rule about space normalization in public identifiers, add description of how to generate our name-space rules from Unicode character database (needs further work!). 1997-10-08 : TB : Removed %-constructs again, new rules for PE appearance. 1997-10-01 : TB : Case-sensitive markup; cleaned up element-type defs, lotsa little edits for style 1997-09-25 : TB : Change to elm's new DTD, with substantial detail cleanup as a side-effect 1997-07-24 : CMSMcQ : correct error (lost *) in definition of ignoreSectContents (thanks to Makoto Murata) Allow all empty elements to have end-tags, consistent with SGML TC (as per JJC). 1997-07-23 : CMSMcQ : pre-emptive strike on pending corrections: introduce the term 'empty-element tag', note that all empty elements may use it, and elements declared EMPTY must use it. Add WFC requiring encoding decl to come first in an entity. Redefine notations to point to PIs as well as binary entities. Change autodetection table by removing bytes 3 and 4 from examples with Byte Order Mark. Add content model as a term and clarify that it applies to both mixed and element content. 1997-06-30 : CMSMcQ : change date, some cosmetic changes, changes to productions for choice, seq, Mixed, NotationType, Enumeration. Follow James Clark's suggestion and prohibit conditional sections in internal subset. TO DO: simplify production for ignored sections as a result, since we don't need to worry about parsers which don't expand PErefs finding a conditional section. 1997-06-29 : TB : various edits 1997-06-29 : CMSMcQ : further changes: Suppress old FINAL EDIT comments and some dead material. Revise occurrences of % in grammar to exploit Henry Thompson's pun, especially markupdecl and attdef. Remove RMD requirement relating to element content (?). 1997-06-28 : CMSMcQ : Various changes for 1 July draft: Add text for draconian error handling (introduce the term Fatal Error). RE deleta est (changing wording from original announcement to restrict the requirement to validating parsers). Tag definition of validating processor and link to it. Add colon as name character. Change def of %operator. Change standard definitions of lt, gt, amp. Strip leading zeros from #x00nn forms. 1997-04-02 : CMSMcQ : final corrections of editorial errors found in last night's proofreading. Reverse course once more on well-formed: Webster's Second hyphenates it, and that's enough for me. 1997-04-01 : CMSMcQ : corrections from JJC, EM, HT, and self 1997-03-31 : Tim Bray : many changes 1997-03-29 : CMSMcQ : some Henry Thompson (on entity handling), some Charles Goldfarb, some ERB decisions (PE handling in miscellaneous declarations. Changed Ident element to accept def attribute. Allow normalization of Unicode characters. move def of systemliteral into section on literals. 1997-03-28 : CMSMcQ : make as many corrections as possible, from Terry Allen, Norbert Mikula, James Clark, Jon Bosak, Henry Thompson, Paul Grosso, and self. Among other things: give in on "well formed" (Terry is right), tentatively rename QuotedCData as AttValue and Literal as EntityValue to be more informative, since attribute values are the only place QuotedCData was used, and vice versa for entity text and Literal. (I'd call it Entity Text, but 8879 uses that name for both internal and external entities.) 1997-03-26 : CMSMcQ : resynch the two forks of this draft, reapply my changes dated 03-20 and 03-21. Normalize old 'may not' to 'must not' except in the one case where it meant 'may or may not'. 1997-03-21 : TB : massive changes on plane flight from Chicago to Vancouver 1997-03-21 : CMSMcQ : correct as many reported errors as possible. 1997-03-20 : CMSMcQ : correct typos listed in CMSMcQ hand copy of spec. 1997-03-20 : CMSMcQ : cosmetic changes preparatory to revision for WWW conference April 1997: restore some of the internal entity references (e.g. to docdate, etc.), change character xA0 to &nbsp; and define nbsp as &#160;, and refill a lot of paragraphs for legibility. 1996-11-12 : CMSMcQ : revise using Tim's edits: Add list type of NUMBERED and change most lists either to BULLETS or to NUMBERED. Suppress QuotedNames, Names (not used). Correct trivial-grammar doc type decl. Rename 'marked section' as 'CDATA section' passim. Also edits from James Clark: Define the set of characters from which [^abc] subtracts. Charref should use just [0-9] not Digit. Location info needs cleaner treatment: remove? (ERB question). One example of a PI has wrong pic. Clarify discussion of encoding names. Encoding failure should lead to unspecified results; don't prescribe error recovery. Don't require exposure of entity boundaries. Ignore white space in element content. Reserve entity names of the form u-NNNN. Clarify relative URLs. And some of my own: Correct productions for content model: model cannot consist of a name, so "elements ::= cp" is no good. 1996-11-11 : CMSMcQ : revise for style. Add new rhs to entity declaration, for parameter entities. 1996-11-10 : CMSMcQ : revise for style. Fix / complete section on names, characters. Add sections on parameter entities, conditional sections. Still to do: Add compatibility note on deterministic content models. Finish stylistic revision. 1996-10-31 : TB : Add Entity Handling section 1996-10-30 : TB : Clean up term & termdef. Slip in ERB decision re EMPTY. 1996-10-28 : TB : Change DTD. Implement some of Michael's suggestions. Change comments back to //. Introduce language for XML namespace reservation. Add section on white-space handling. Lots more cleanup. 1996-10-24 : CMSMcQ : quick tweaks, implement some ERB decisions. Characters are not integers. Comments are /* */ not //. Add bibliographic refs to 10646, HyTime, Unicode. Rename old Cdata as MsData since it's only seen in marked sections. Call them attribute-value pairs not name-value pairs, except once. Internal subset is optional, needs '?'. Implied attributes should be signaled to the app, not have values supplied by processor. 1996-10-16 : TB : track down & excise all DSD references; introduce some EBNF for entity declarations. 1996-10-?? : TB : consistency check, fix up scraps so they all parse, get formatter working, correct a few productions. 1996-10-10/11 : CMSMcQ : various maintenance, stylistic, and organizational changes: Replace a few literals with xmlpio and pic entities, to make them consistent and ensure we can change pic reliably when the ERB votes. Drop paragraph on recognizers from notation section. Add match, exact match to terminology. Move old 2.2 XML Processors and Apps into intro. Mention comments, PIs, and marked sections in discussion of delimiter escaping. Streamline discussion of doctype decl syntax. Drop old section of 'PI syntax' for doctype decl, and add section on partial-DTD summary PIs to end of Logical Structures section. Revise DSD syntax section to use Tim's subset-in-a-PI mechanism. 1996-10-10 : TB : eliminate name recognizers (and more?) 1996-10-09 : CMSMcQ : revise for style, consistency through 2.3 (Characters) 1996-10-09 : CMSMcQ : re-unite everything for convenience, at least temporarily, and revise quickly 1996-10-08 : TB : first major homogenization pass 1996-10-08 : TB : turn "current" attribute on div type into CDATA 1996-10-02 : TB : remould into skeleton + entities 1996-09-30 : CMSMcQ : add a few more sections prior to exchange with Tim. 1996-09-20 : CMSMcQ : finish transcribing notes. 1996-09-19 : CMSMcQ : begin transcribing notes for draft. 1996-09-13 : CMSMcQ : made outline from notes of 09-06, do some housekeeping
Introduction

Extensible Markup Language, abbreviated XML, describes a class of data objects called XML documents and partially describes the behavior of computer programs which process them. XML is an application profile or restricted form of SGML, the Standard Generalized Markup Language . By construction, XML documents are conforming SGML documents.

XML documents are made up of storage units called entities, which contain either parsed or unparsed data. Parsed data is made up of characters, some of which form character data, and some of which form markup. Markup encodes a description of the document's storage layout and logical structure. XML provides a mechanism to impose constraints on the storage layout and logical structure.

A software module called an XML processor is used to read XML documents and provide access to their content and structure. It is assumed that an XML processor is doing its work on behalf of another module, called the application. This specification describes the required behavior of an XML processor in terms of how it must read XML data and the information it must provide to the application.

Origin and Goals

XML was developed by an XML Working Group (originally known as the SGML Editorial Review Board) formed under the auspices of the World Wide Web Consortium (W3C) in 1996. It was chaired by Jon Bosak of Sun Microsystems with the active participation of an XML Special Interest Group (previously known as the SGML Working Group) also organized by the W3C. The membership of the XML Working Group is given in an appendix. Dan Connolly served as the WG's contact with the W3C.

The design goals for XML are:

XML shall be straightforwardly usable over the Internet.

XML shall support a wide variety of applications.

XML shall be compatible with SGML.

It shall be easy to write programs which process XML documents.

The number of optional features in XML is to be kept to the absolute minimum, ideally zero.

XML documents should be human-legible and reasonably clear.

The XML design should be prepared quickly.

The design of XML shall be formal and concise.

XML documents shall be easy to create.

Terseness in XML markup is of minimal importance.

This specification, together with associated standards (Unicode and ISO/IEC 10646 for characters, Internet RFC 1766 for language identification tags, ISO 639 for language name codes, and ISO 3166 for country name codes), provides all the information necessary to understand XML Version &XML.version; and construct computer programs to process it.

This version of the XML specification &doc.distribution;.

Terminology

The terminology used to describe XML documents is defined in the body of this specification. The terms defined in the following list are used in building those definitions and in describing the actions of an XML processor:

Conforming documents and XML processors are permitted to but need not behave as described.

Conforming documents and XML processors are required to behave as described; otherwise they are in error.

A violation of the rules of this specification; results are undefined. Conforming software may detect and report an error and may recover from it.

An error which a conforming XML processor must detect and report to the application. After encountering a fatal error, the processor may continue processing the data to search for further errors and may report such errors to the application. In order to support correction of errors, the processor may make unprocessed data from the document (with intermingled character data and markup) available to the application. Once a fatal error is detected, however, the processor must not continue normal processing (i.e., it must not continue to pass character data and information about the document's logical structure to the application in the normal way).

Conforming software may or must (depending on the modal verb in the sentence) behave as described; if it does, it must provide users a means to enable or disable the behavior described.

A rule which applies to all valid XML documents. Violations of validity constraints are errors; they must, at user option, be reported by validating XML processors.

A rule which applies to all well-formed XML documents. Violations of well-formedness constraints are fatal errors.

(Of strings or names:) Two strings or names being compared must be identical. Characters with multiple possible representations in ISO/IEC 10646 (e.g. characters with both precomposed and base+diacritic forms) match only if they have the same representation in both strings. At user option, processors may normalize such characters to some canonical form. No case folding is performed. (Of strings and rules in the grammar:) A string matches a grammatical production if it belongs to the language generated by that production. (Of content and content models:) An element matches its declaration when it conforms in the fashion described in the constraint .

A feature of XML included solely to ensure that XML remains compatible with SGML.

A non-binding recommendation included to increase the chances that XML documents can be processed by the existing installed base of SGML processors which predate the &WebSGML;.

Documents

A data object is an XML document if it is well-formed, as defined in this specification. A well-formed XML document may in addition be valid if it meets certain further constraints.

Each XML document has both a logical and a physical structure. Physically, the document is composed of units called entities. An entity may refer to other entities to cause their inclusion in the document. A document begins in a "root" or document entity. Logically, the document is composed of declarations, elements, comments, character references, and processing instructions, all of which are indicated in the document by explicit markup. The logical and physical structures must nest properly, as described in .

Well-Formed XML Documents

A textual object is a well-formed XML document if:

Taken as a whole, it matches the production labeled document.

It meets all the well-formedness constraints given in this specification.

Each of the parsed entities which is referenced directly or indirectly within the document is well-formed.

Document document prolog element Misc*

Matching the document production implies that:

It contains one or more elements.

There is exactly one element, called the root, or document element, no part of which appears in the content of any other element. For all other elements, if the start-tag is in the content of another element, the end-tag is in the content of the same element. More simply stated, the elements, delimited by start- and end-tags, nest properly within each other.

As a consequence of this, for each non-root element C in the document, there is one other element P in the document such that C is in the content of P, but is not in the content of any other element that is in the content of P. P is referred to as the parent of C, and C as a child of P.

Characters

A parsed entity contains text, a sequence of characters, which may represent markup or character data. A character is an atomic unit of text as specified by ISO/IEC 10646 . Legal characters are tab, carriage return, line feed, and the legal graphic characters of Unicode and ISO/IEC 10646. The use of "compatibility characters", as defined in section 6.8 of , is discouraged. Character Range Char #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.

The mechanism for encoding character code points into bit patterns may vary from entity to entity. All XML processors must accept the UTF-8 and UTF-16 encodings of 10646; the mechanisms for signaling which of the two is in use, or for bringing other encodings into play, are discussed later, in .

Common Syntactic Constructs

This section defines some symbols used widely in the grammar.

S (white space) consists of one or more space (#x20) characters, carriage returns, line feeds, or tabs. White Space S (#x20 | #x9 | #xD | #xA)+

Characters are classified for convenience as letters, digits, or other characters. Letters consist of an alphabetic or syllabic base character possibly followed by one or more combining characters, or of an ideographic character. Full definitions of the specific characters in each class are given in .

A Name is a token beginning with a letter or one of a few punctuation characters, and continuing with letters, digits, hyphens, underscores, colons, or full stops, together known as name characters. Names beginning with the string "xml", or any string which would match (('X'|'x') ('M'|'m') ('L'|'l')), are reserved for standardization in this or future versions of this specification.

The colon character within XML names is reserved for experimentation with name spaces. Its meaning is expected to be standardized at some future point, at which point those documents using the colon for experimental purposes may need to be updated. (There is no guarantee that any name-space mechanism adopted for XML will in fact use the colon as a name-space delimiter.) In practice, this means that authors should not use the colon in XML names except as part of name-space experiments, but that XML processors should accept the colon as a name character.

An Nmtoken (name token) is any mixture of name characters. Names and Tokens NameChar Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender Name (Letter | '_' | ':') (NameChar)* Names Name (S Name)* Nmtoken (NameChar)+ Nmtokens Nmtoken (S Nmtoken)*

Literal data is any quoted string not containing the quotation mark used as a delimiter for that string. Literals are used for specifying the content of internal entities (EntityValue), the values of attributes (AttValue), and external identifiers (SystemLiteral). Note that a SystemLiteral can be parsed without scanning for markup. Literals EntityValue '"' ([^%&"] | PEReference | Reference)* '"' |  "'" ([^%&'] | PEReference | Reference)* "'" AttValue '"' ([^<&"] | Reference)* '"' |  "'" ([^<&'] | Reference)* "'" SystemLiteral ('"' [^"]* '"') | ("'" [^']* "'") PubidLiteral '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" PubidChar #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]

Character Data and Markup

Text consists of intermingled character data and markup. Markup takes the form of start-tags, end-tags, empty-element tags, entity references, character references, comments, CDATA section delimiters, document type declarations, and processing instructions.

All text that is not markup constitutes the character data of the document.

The ampersand character (&) and the left angle bracket (<) may appear in their literal form only when used as markup delimiters, or within a comment, a processing instruction, or a CDATA section. They are also legal within the literal entity value of an internal entity declaration; see . If they are needed elsewhere, they must be escaped using either numeric character references or the strings "&amp;" and "&lt;" respectively. The right angle bracket (>) may be represented using the string "&gt;", and must, for compatibility, be escaped using "&gt;" or a character reference when it appears in the string "]]>" in content, when that string is not marking the end of a CDATA section.

In the content of elements, character data is any string of characters which does not contain the start-delimiter of any markup. In a CDATA section, character data is any string of characters not including the CDATA-section-close delimiter, "]]>".

To allow attribute values to contain both single and double quotes, the apostrophe or single-quote character (') may be represented as "&apos;", and the double-quote character (") as "&quot;". Character Data CharData [^<&]* - ([^<&]* ']]>' [^<&]*)

Comments

Comments may appear anywhere in a document outside other markup; in addition, they may appear within the document type declaration at places allowed by the grammar. They are not part of the document's character data; an XML processor may, but need not, make it possible for an application to retrieve the text of comments. For compatibility, the string "--" (double-hyphen) must not occur within comments. Comments Comment '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'

An example of a comment: <!&como; declarations for <head> & <body> &comc;>

Processing Instructions

Processing instructions (PIs) allow documents to contain instructions for applications. Processing Instructions PI '<?' PITarget (S (Char* - (Char* &pic; Char*)))? &pic; PITarget Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) PIs are not part of the document's character data, but must be passed through to the application. The PI begins with a target (PITarget) used to identify the application to which the instruction is directed. The target names "XML", "xml", and so on are reserved for standardization in this or future versions of this specification. The XML Notation mechanism may be used for formal declaration of PI targets.

CDATA Sections

CDATA sections may occur anywhere character data may occur; they are used to escape blocks of text containing characters which would otherwise be recognized as markup. CDATA sections begin with the string "<![CDATA[" and end with the string "]]>": CDATA Sections CDSect CDStart CData CDEnd CDStart '<![CDATA[' CData (Char* - (Char* ']]>' Char*)) CDEnd ']]>' Within a CDATA section, only the CDEnd string is recognized as markup, so that left angle brackets and ampersands may occur in their literal form; they need not (and cannot) be escaped using "&lt;" and "&amp;". CDATA sections cannot nest.

An example of a CDATA section, in which "<greeting>" and "</greeting>" are recognized as character data, not markup: <![CDATA[<greeting>Hello, world!</greeting>]]>

Prolog and Document Type Declaration

XML documents may, and should, begin with an XML declaration which specifies the version of XML being used. For example, the following is a complete XML document, well-formed but not valid: Hello, world! ]]> and so is this: Hello, world! ]]>

The version number "1.0" should be used to indicate conformance to this version of this specification; it is an error for a document to use the value "1.0" if it does not conform to this version of this specification. It is the intent of the XML working group to give later versions of this specification numbers other than "1.0", but this intent does not indicate a commitment to produce any future versions of XML, nor if any are produced, to use any particular numbering scheme. Since future versions are not ruled out, this construct is provided as a means to allow the possibility of automatic version recognition, should it become necessary. Processors may signal an error if they receive documents labeled with versions they do not support.

The function of the markup in an XML document is to describe its storage and logical structure and to associate attribute-value pairs with its logical structures. XML provides a mechanism, the document type declaration, to define constraints on the logical structure and to support the use of predefined storage units. An XML document is valid if it has an associated document type declaration and if the document complies with the constraints expressed in it.

The document type declaration must appear before the first element in the document. Prolog prolog XMLDecl? Misc* (doctypedecl Misc*)? XMLDecl &xmlpio; VersionInfo EncodingDecl? SDDecl? S? &pic; VersionInfo S 'version' Eq (' VersionNum ' | " VersionNum ") Eq S? '=' S? VersionNum ([a-zA-Z0-9_.:] | '-')+ Misc Comment | PI | S

The XML document type declaration contains or points to markup declarations that provide a grammar for a class of documents. This grammar is known as a document type definition, or DTD. The document type declaration can point to an external subset (a special kind of external entity) containing markup declarations, or can contain the markup declarations directly in an internal subset, or can do both. The DTD for a document consists of both subsets taken together.

A markup declaration is an element type declaration, an attribute-list declaration, an entity declaration, or a notation declaration. These declarations may be contained in whole or in part within parameter entities, as described in the well-formedness and validity constraints below. For fuller information, see .

Document Type Definition doctypedecl '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' markupdecl elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment

The markup declarations may be made up in whole or in part of the replacement text of parameter entities. The productions later in this specification for individual nonterminals (elementdecl, AttlistDecl, and so on) describe the declarations after all the parameter entities have been included.

Root Element Type

The Name in the document type declaration must match the element type of the root element.

Proper Declaration/PE Nesting

Parameter-entity replacement text must be properly nested with markup declarations. That is to say, if either the first character or the last character of a markup declaration (markupdecl above) is contained in the replacement text for a parameter-entity reference, both must be contained in the same replacement text.

PEs in Internal Subset

In the internal DTD subset, parameter-entity references can occur only where markup declarations can occur, not within markup declarations. (This does not apply to references that occur in external parameter entities or to the external subset.)

Like the internal subset, the external subset and any external parameter entities referred to in the DTD must consist of a series of complete markup declarations of the types allowed by the non-terminal symbol markupdecl, interspersed with white space or parameter-entity references. However, portions of the contents of the external subset or of external parameter entities may conditionally be ignored by using the conditional section construct; this is not allowed in the internal subset. External Subset extSubset TextDecl? extSubsetDecl extSubsetDecl ( markupdecl | conditionalSect | PEReference | S )*

The external subset and external parameter entities also differ from the internal subset in that in them, parameter-entity references are permitted within markup declarations, not only between markup declarations.

An example of an XML document with a document type declaration: Hello, world! ]]> The system identifier "hello.dtd" gives the URI of a DTD for the document.

The declarations can also be given locally, as in this example: ]> Hello, world! ]]> If both the external and internal subsets are used, the internal subset is considered to occur before the external subset. This has the effect that entity and attribute-list declarations in the internal subset take precedence over those in the external subset.

Standalone Document Declaration

Markup declarations can affect the content of the document, as passed from an XML processor to an application; examples are attribute defaults and entity declarations. The standalone document declaration, which may appear as a component of the XML declaration, signals whether or not there are such declarations which appear external to the document entity. Standalone Document Declaration SDDecl S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))

In a standalone document declaration, the value "yes" indicates that there are no markup declarations external to the document entity (either in the DTD external subset, or in an external parameter entity referenced from the internal subset) which affect the information passed from the XML processor to the application. The value "no" indicates that there are or may be such external markup declarations. Note that the standalone document declaration only denotes the presence of external declarations; the presence, in a document, of references to external entities, when those entities are internally declared, does not change its standalone status.

If there are no external markup declarations, the standalone document declaration has no meaning. If there are external markup declarations but there is no standalone document declaration, the value "no" is assumed.

Any XML document for which standalone="no" holds can be converted algorithmically to a standalone document, which may be desirable for some network delivery applications.

Standalone Document Declaration

The standalone document declaration must have the value "no" if any external markup declarations contain declarations of:

attributes with default values, if elements to which these attributes apply appear in the document without specifications of values for these attributes, or

entities (other than &magicents;), if references to those entities appear in the document, or

attributes with values subject to normalization, where the attribute appears in the document with a value which will change as a result of normalization, or

element types with element content, if white space occurs directly within any instance of those types.

An example XML declaration with a standalone document declaration:<?xml version="&XML.version;" standalone='yes'?>

White Space Handling

In editing XML documents, it is often convenient to use "white space" (spaces, tabs, and blank lines, denoted by the nonterminal S in this specification) to set apart the markup for greater readability. Such white space is typically not intended for inclusion in the delivered version of the document. On the other hand, "significant" white space that should be preserved in the delivered version is common, for example in poetry and source code.

An XML processor must always pass all characters in a document that are not markup through to the application. A validating XML processor must also inform the application which of these characters constitute white space appearing in element content.

A special attribute named xml:space may be attached to an element to signal an intention that in that element, white space should be preserved by applications. In valid documents, this attribute, like any other, must be declared if it is used. When declared, it must be given as an enumerated type whose only possible values are "default" and "preserve". For example:]]>

The value "default" signals that applications' default white-space processing modes are acceptable for this element; the value "preserve" indicates the intent that applications preserve all the white space. This declared intent is considered to apply to all elements within the content of the element where it is specified, unless overriden with another instance of the xml:space attribute.

The root element of any document is considered to have signaled no intentions as regards application space handling, unless it provides a value for this attribute or the attribute is declared with a default value.

End-of-Line Handling

XML parsed entities are often stored in computer files which, for editing convenience, are organized into lines. These lines are typically separated by some combination of the characters carriage-return (#xD) and line-feed (#xA).

To simplify the tasks of applications, wherever an external parsed entity or the literal entity value of an internal parsed entity contains either the literal two-character sequence "#xD#xA" or a standalone literal #xD, an XML processor must pass to the application the single character #xA. (This behavior can conveniently be produced by normalizing all line breaks to #xA on input, before parsing.)

Language Identification

In document processing, it is often useful to identify the natural or formal language in which the content is written. A special attribute named xml:lang may be inserted in documents to specify the language used in the contents and attribute values of any element in an XML document. In valid documents, this attribute, like any other, must be declared if it is used. The values of the attribute are language identifiers as defined by , "Tags for the Identification of Languages": Language Identification LanguageID Langcode ('-' Subcode)* Langcode ISO639Code | IanaCode | UserCode ISO639Code ([a-z] | [A-Z]) ([a-z] | [A-Z]) IanaCode ('i' | 'I') '-' ([a-z] | [A-Z])+ UserCode ('x' | 'X') '-' ([a-z] | [A-Z])+ Subcode ([a-z] | [A-Z])+ The Langcode may be any of the following:

a two-letter language code as defined by , "Codes for the representation of names of languages"

a language identifier registered with the Internet Assigned Numbers Authority ; these begin with the prefix "i-" (or "I-")

a language identifier assigned by the user, or agreed on between parties in private use; these must begin with the prefix "x-" or "X-" in order to ensure that they do not conflict with names later standardized or registered with IANA

There may be any number of Subcode segments; if the first subcode segment exists and the Subcode consists of two letters, then it must be a country code from , "Codes for the representation of names of countries." If the first subcode consists of more than two letters, it must be a subcode for the language in question registered with IANA, unless the Langcode begins with the prefix "x-" or "X-".

It is customary to give the language code in lower case, and the country code (if any) in upper case. Note that these values, unlike other names in XML documents, are case insensitive.

For example: The quick brown fox jumps over the lazy dog.

What colour is it?

What color is it?

Habe nun, ach! Philosophie, Juristerei, und Medizin und leider auch Theologie durchaus studiert mit heiem Bemh'n. ]]>

The intent declared with xml:lang is considered to apply to all attributes and content of the element where it is specified, unless overridden with an instance of xml:lang on another element within that content.

A simple declaration for xml:lang might take the form xml:lang NMTOKEN #IMPLIED but specific default values may also be given, if appropriate. In a collection of French poems for English students, with glosses and notes in English, the xml:lang attribute might be declared this way: ]]>

Logical Structures

Each XML document contains one or more elements, the boundaries of which are either delimited by start-tags and end-tags, or, for empty elements, by an empty-element tag. Each element has a type, identified by name, sometimes called its "generic identifier" (GI), and may have a set of attribute specifications. Each attribute specification has a name and a value.

Element element EmptyElemTag | STag content ETag

This specification does not constrain the semantics, use, or (beyond syntax) names of the element types and attributes, except that names beginning with a match to (('X'|'x')('M'|'m')('L'|'l')) are reserved for standardization in this or future versions of this specification.

Element Type Match

The Name in an element's end-tag must match the element type in the start-tag.

Element Valid

An element is valid if there is a declaration matching elementdecl where the Name matches the element type, and one of the following holds:

The declaration matches EMPTY and the element has no content.

The declaration matches children and the sequence of child elements belongs to the language generated by the regular expression in the content model, with optional white space (characters matching the nonterminal S) between each pair of child elements.

The declaration matches Mixed and the content consists of character data and child elements whose types match names in the content model.

The declaration matches ANY, and the types of any child elements have been declared.

Start-Tags, End-Tags, and Empty-Element Tags

The beginning of every non-empty XML element is marked by a start-tag. Start-tag STag '<' Name (S Attribute)* S? '>' Attribute Name Eq AttValue The Name in the start- and end-tags gives the element's type. The Name-AttValue pairs are referred to as the attribute specifications of the element, with the Name in each pair referred to as the attribute name and the content of the AttValue (the text between the ' or " delimiters) as the attribute value.

Unique Att Spec

No attribute name may appear more than once in the same start-tag or empty-element tag.

Attribute Value Type

The attribute must have been declared; the value must be of the type declared for it. (For attribute types, see .)

No External Entity References

Attribute values cannot contain direct or indirect entity references to external entities.

No < in Attribute Values

The replacement text of any entity referred to directly or indirectly in an attribute value (other than "&lt;") must not contain a <.

An example of a start-tag: <termdef id="dt-dog" term="dog">

The end of every element that begins with a start-tag must be marked by an end-tag containing a name that echoes the element's type as given in the start-tag: End-tag ETag '</' Name S? '>'

An example of an end-tag:</termdef>

The text between the start-tag and end-tag is called the element's content: Content of Elements content (element | CharData | Reference | CDSect | PI | Comment)*

If an element is empty, it must be represented either by a start-tag immediately followed by an end-tag or by an empty-element tag. An empty-element tag takes a special form: Tags for Empty Elements EmptyElemTag '<' Name (S Attribute)* S? '/>'

Empty-element tags may be used for any element which has no content, whether or not it is declared using the keyword EMPTY. For interoperability, the empty-element tag must be used, and can only be used, for elements which are declared EMPTY.

Examples of empty elements: <IMG align="left" src="http://www.w3.org/Icons/WWW/w3c_home" /> <br></br> <br/>

Element Type Declarations

The element structure of an XML document may, for validation purposes, be constrained using element type and attribute-list declarations. An element type declaration constrains the element's content.

Element type declarations often constrain which element types can appear as children of the element. At user option, an XML processor may issue a warning when a declaration mentions an element type for which no declaration is provided, but this is not an error.

An element type declaration takes the form: Element Type Declaration elementdecl '<!ELEMENT' S Name S contentspec S? '>' contentspec 'EMPTY' | 'ANY' | Mixed | children where the Name gives the element type being declared.

Unique Element Type Declaration

No element type may be declared more than once.

Examples of element type declarations: <!ELEMENT br EMPTY> <!ELEMENT p (#PCDATA|emph)* > <!ELEMENT %name.para; %content.para; > <!ELEMENT container ANY>

Element Content

An element type has element content when elements of that type must contain only child elements (no character data), optionally separated by white space (characters matching the nonterminal S). In this case, the constraint includes a content model, a simple grammar governing the allowed types of the child elements and the order in which they are allowed to appear. The grammar is built on content particles (cps), which consist of names, choice lists of content particles, or sequence lists of content particles: Element-content Models children (choice | seq) ('?' | '*' | '+')? cp (Name | choice | seq) ('?' | '*' | '+')? choice '(' S? cp ( S? '|' S? cp )* S? ')' seq '(' S? cp ( S? ',' S? cp )* S? ')' where each Name is the type of an element which may appear as a child. Any content particle in a choice list may appear in the element content at the location where the choice list appears in the grammar; content particles occurring in a sequence list must each appear in the element content in the order given in the list. The optional character following a name or list governs whether the element or the content particles in the list may occur one or more (+), zero or more (*), or zero or one times (?). The absence of such an operator means that the element or content particle must appear exactly once. This syntax and meaning are identical to those used in the productions in this specification.

The content of an element matches a content model if and only if it is possible to trace out a path through the content model, obeying the sequence, choice, and repetition operators and matching each element in the content against an element type in the content model. For compatibility, it is an error if an element in the document can match more than one occurrence of an element type in the content model. For more information, see .

Proper Group/PE Nesting

Parameter-entity replacement text must be properly nested with parenthetized groups. That is to say, if either of the opening or closing parentheses in a choice, seq, or Mixed construct is contained in the replacement text for a parameter entity, both must be contained in the same replacement text.

For interoperability, if a parameter-entity reference appears in a choice, seq, or Mixed construct, its replacement text should not be empty, and neither the first nor last non-blank character of the replacement text should be a connector (| or ,).

Examples of element-content models: <!ELEMENT spec (front, body, back?)> <!ELEMENT div1 (head, (p | list | note)*, div2*)> <!ELEMENT dictionary-body (%div.mix; | %dict.mix;)*>

Mixed Content

An element type has mixed content when elements of that type may contain character data, optionally interspersed with child elements. In this case, the types of the child elements may be constrained, but not their order or their number of occurrences: Mixed-content Declaration Mixed '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' where the Names give the types of elements that may appear as children.

No Duplicate Types

The same name must not appear more than once in a single mixed-content declaration.

Examples of mixed content declarations: <!ELEMENT p (#PCDATA|a|ul|b|i|em)*> <!ELEMENT p (#PCDATA | %font; | %phrase; | %special; | %form;)* > <!ELEMENT b (#PCDATA)>

Attribute-List Declarations

Attributes are used to associate name-value pairs with elements. Attribute specifications may appear only within start-tags and empty-element tags; thus, the productions used to recognize them appear in . Attribute-list declarations may be used:

To define the set of attributes pertaining to a given element type.

To establish type constraints for these attributes.

To provide default values for attributes.

Attribute-list declarations specify the name, data type, and default value (if any) of each attribute associated with a given element type: Attribute-list Declaration AttlistDecl '<!ATTLIST' S Name AttDef* S? '>' AttDef S Name S AttType S DefaultDecl The Name in the AttlistDecl rule is the type of an element. At user option, an XML processor may issue a warning if attributes are declared for an element type not itself declared, but this is not an error. The Name in the AttDef rule is the name of the attribute.

When more than one AttlistDecl is provided for a given element type, the contents of all those provided are merged. When more than one definition is provided for the same attribute of a given element type, the first declaration is binding and later declarations are ignored. For interoperability, writers of DTDs may choose to provide at most one attribute-list declaration for a given element type, at most one attribute definition for a given attribute name, and at least one attribute definition in each attribute-list declaration. For interoperability, an XML processor may at user option issue a warning when more than one attribute-list declaration is provided for a given element type, or more than one attribute definition is provided for a given attribute, but this is not an error.

Attribute Types

XML attribute types are of three kinds: a string type, a set of tokenized types, and enumerated types. The string type may take any literal string as a value; the tokenized types have varying lexical and semantic constraints, as noted: Attribute Types AttType StringType | TokenizedType | EnumeratedType StringType 'CDATA' TokenizedType 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'

ID

Values of type ID must match the Name production. A name must not appear more than once in an XML document as a value of this type; i.e., ID values must uniquely identify the elements which bear them.

One ID per Element Type

No element type may have more than one ID attribute specified.

ID Attribute Default

An ID attribute must have a declared default of #IMPLIED or #REQUIRED.

IDREF

Values of type IDREF must match the Name production, and values of type IDREFS must match Names; each Name must match the value of an ID attribute on some element in the XML document; i.e. IDREF values must match the value of some ID attribute.

Entity Name

Values of type ENTITY must match the Name production, values of type ENTITIES must match Names; each Name must match the name of an unparsed entity declared in the DTD.

Name Token

Values of type NMTOKEN must match the Nmtoken production; values of type NMTOKENS must match Nmtokens.

Enumerated attributes can take one of a list of values provided in the declaration. There are two kinds of enumerated types: Enumerated Attribute Types EnumeratedType NotationType | Enumeration NotationType 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' Enumeration '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' A NOTATION attribute identifies a notation, declared in the DTD with associated system and/or public identifiers, to be used in interpreting the element to which the attribute is attached.

Notation Attributes

Values of this type must match one of the notation names included in the declaration; all notation names in the declaration must be declared.

Enumeration

Values of this type must match one of the Nmtoken tokens in the declaration.

For interoperability, the same Nmtoken should not occur more than once in the enumerated attribute types of a single element type.

Attribute Defaults

An attribute declaration provides information on whether the attribute's presence is required, and if not, how an XML processor should react if a declared attribute is absent in a document. Attribute Defaults DefaultDecl '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)

In an attribute declaration, #REQUIRED means that the attribute must always be provided, #IMPLIED that no default value is provided. If the declaration is neither #REQUIRED nor #IMPLIED, then the AttValue value contains the declared default value; the #FIXED keyword states that the attribute must always have the default value. If a default value is declared, when an XML processor encounters an omitted attribute, it is to behave as though the attribute were present with the declared default value.

Required Attribute

If the default declaration is the keyword #REQUIRED, then the attribute must be specified for all elements of the type in the attribute-list declaration.

Attribute Default Legal

The declared default value must meet the lexical constraints of the declared attribute type.

Fixed Attribute Default

If an attribute has a default value declared with the #FIXED keyword, instances of that attribute must match the default value.

Examples of attribute-list declarations: <!ATTLIST termdef id ID #REQUIRED name CDATA #IMPLIED> <!ATTLIST list type (bullets|ordered|glossary) "ordered"> <!ATTLIST form method CDATA #FIXED "POST">

Attribute-Value Normalization

Before the value of an attribute is passed to the application or checked for validity, the XML processor must normalize it as follows:

a character reference is processed by appending the referenced character to the attribute value

an entity reference is processed by recursively processing the replacement text of the entity

a whitespace character (#x20, #xD, #xA, #x9) is processed by appending #x20 to the normalized value, except that only a single #x20 is appended for a "#xD#xA" sequence that is part of an external parsed entity or the literal entity value of an internal parsed entity

other characters are processed by appending them to the normalized value

If the declared value is not CDATA, then the XML processor must further process the normalized attribute value by discarding any leading and trailing space (#x20) characters, and by replacing sequences of space (#x20) characters by a single space (#x20) character.

All attributes for which no declaration has been read should be treated by a non-validating parser as if declared CDATA.

Conditional Sections

Conditional sections are portions of the document type declaration external subset which are included in, or excluded from, the logical structure of the DTD based on the keyword which governs them. Conditional Section conditionalSect includeSect | ignoreSect includeSect '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' ignoreSect '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' ignoreSectContents Ignore ('<![' ignoreSectContents ']]>' Ignore)* Ignore Char* - (Char* ('<![' | ']]>') Char*)

Like the internal and external DTD subsets, a conditional section may contain one or more complete declarations, comments, processing instructions, or nested conditional sections, intermingled with white space.

If the keyword of the conditional section is INCLUDE, then the contents of the conditional section are part of the DTD. If the keyword of the conditional section is IGNORE, then the contents of the conditional section are not logically part of the DTD. Note that for reliable parsing, the contents of even ignored conditional sections must be read in order to detect nested conditional sections and ensure that the end of the outermost (ignored) conditional section is properly detected. If a conditional section with a keyword of INCLUDE occurs within a larger conditional section with a keyword of IGNORE, both the outer and the inner conditional sections are ignored.

If the keyword of the conditional section is a parameter-entity reference, the parameter entity must be replaced by its content before the processor decides whether to include or ignore the conditional section.

An example: <!ENTITY % draft 'INCLUDE' > <!ENTITY % final 'IGNORE' > <![%draft;[ <!ELEMENT book (comments*, title, body, supplements?)> ]]> <![%final;[ <!ELEMENT book (title, body, supplements?)> ]]>

Physical Structures

An XML document may consist of one or many storage units. These are called entities; they all have content and are all (except for the document entity, see below, and the external DTD subset) identified by name. Each XML document has one entity called the document entity, which serves as the starting point for the XML processor and may contain the whole document.

Entities may be either parsed or unparsed. A parsed entity's contents are referred to as its replacement text; this text is considered an integral part of the document.

An unparsed entity is a resource whose contents may or may not be text, and if text, may not be XML. Each unparsed entity has an associated notation, identified by name. Beyond a requirement that an XML processor make the identifiers for the entity and notation available to the application, XML places no constraints on the contents of unparsed entities.

Parsed entities are invoked by name using entity references; unparsed entities by name, given in the value of ENTITY or ENTITIES attributes.

General entities are entities for use within the document content. In this specification, general entities are sometimes referred to with the unqualified term entity when this leads to no ambiguity. Parameter entities are parsed entities for use within the DTD. These two types of entities use different forms of reference and are recognized in different contexts. Furthermore, they occupy different namespaces; a parameter entity and a general entity with the same name are two distinct entities.

Character and Entity References

A character reference refers to a specific character in the ISO/IEC 10646 character set, for example one not directly accessible from available input devices. Character Reference CharRef '&#' [0-9]+ ';' | '&hcro;' [0-9a-fA-F]+ ';' Legal Character

Characters referred to using character references must match the production for Char.

If the character reference begins with "&#x", the digits and letters up to the terminating ; provide a hexadecimal representation of the character's code point in ISO/IEC 10646. If it begins just with "&#", the digits up to the terminating ; provide a decimal representation of the character's code point.

An entity reference refers to the content of a named entity. References to parsed general entities use ampersand (&) and semicolon (;) as delimiters. Parameter-entity references use percent-sign (%) and semicolon (;) as delimiters.

Entity Reference Reference EntityRef | CharRef EntityRef '&' Name ';' PEReference '%' Name ';' Entity Declared

In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references, or a document with "standalone='yes'", the Name given in the entity reference must match that in an entity declaration, except that well-formed documents need not declare any of the following entities: &magicents;. The declaration of a parameter entity must precede any reference to it. Similarly, the declaration of a general entity must precede any reference to it which appears in a default value in an attribute-list declaration.

Note that if entities are declared in the external subset or in external parameter entities, a non-validating processor is not obligated to read and process their declarations; for such documents, the rule that an entity must be declared is a well-formedness constraint only if standalone='yes'.

Entity Declared

In a document with an external subset or external parameter entities with "standalone='no'", the Name given in the entity reference must match that in an entity declaration. For interoperability, valid documents should declare the entities &magicents;, in the form specified in . The declaration of a parameter entity must precede any reference to it. Similarly, the declaration of a general entity must precede any reference to it which appears in a default value in an attribute-list declaration.

Parsed Entity

An entity reference must not contain the name of an unparsed entity. Unparsed entities may be referred to only in attribute values declared to be of type ENTITY or ENTITIES.

No Recursion

A parsed entity must not contain a recursive reference to itself, either directly or indirectly.

In DTD

Parameter-entity references may only appear in the DTD.

Examples of character and entity references: Type <key>less-than</key> (&hcro;3C;) to save options. This document was prepared on &docdate; and is classified &security-level;.

Example of a parameter-entity reference: %ISOLat2;]]>

Entity Declarations

Entities are declared thus: Entity Declaration EntityDecl GEDecl | PEDecl GEDecl '<!ENTITY' S Name S EntityDef S? '>' PEDecl '<!ENTITY' S '%' S Name S PEDef S? '>' EntityDef EntityValue | (ExternalID NDataDecl?) PEDef EntityValue | ExternalID The Name identifies the entity in an entity reference or, in the case of an unparsed entity, in the value of an ENTITY or ENTITIES attribute. If the same entity is declared more than once, the first declaration encountered is binding; at user option, an XML processor may issue a warning if entities are declared multiple times.

Internal Entities

If the entity definition is an EntityValue, the defined entity is called an internal entity. There is no separate physical storage object, and the content of the entity is given in the declaration. Note that some processing of entity and character references in the literal entity value may be required to produce the correct replacement text: see .

An internal entity is a parsed entity.

Example of an internal entity declaration: <!ENTITY Pub-Status "This is a pre-release of the specification.">

External Entities

If the entity is not internal, it is an external entity, declared as follows: External Entity Declaration ExternalID 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral NDataDecl S 'NDATA' S Name If the NDataDecl is present, this is a general unparsed entity; otherwise it is a parsed entity.

Notation Declared

The Name must match the declared name of a notation.

The SystemLiteral is called the entity's system identifier. It is a URI, which may be used to retrieve the entity. Note that the hash mark (#) and fragment identifier frequently used with URIs are not, formally, part of the URI itself; an XML processor may signal an error if a fragment identifier is given as part of a system identifier. Unless otherwise provided by information outside the scope of this specification (e.g. a special XML element type defined by a particular DTD, or a processing instruction defined by a particular application specification), relative URIs are relative to the location of the resource within which the entity declaration occurs. A URI might thus be relative to the document entity, to the entity containing the external DTD subset, or to some other external parameter entity.

An XML processor should handle a non-ASCII character in a URI by representing the character in UTF-8 as one or more bytes, and then escaping these bytes with the URI escaping mechanism (i.e., by converting each byte to %HH, where HH is the hexadecimal notation of the byte value).

In addition to a system identifier, an external identifier may include a public identifier. An XML processor attempting to retrieve the entity's content may use the public identifier to try to generate an alternative URI. If the processor is unable to do so, it must use the URI specified in the system literal. Before a match is attempted, all strings of white space in the public identifier must be normalized to single space characters (#x20), and leading and trailing white space must be removed.

Examples of external entity declarations: <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml"> <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml"> <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif >

Parsed Entities The Text Declaration

External parsed entities may each begin with a text declaration. Text Declaration TextDecl &xmlpio; VersionInfo? EncodingDecl S? &pic;

The text declaration must be provided literally, not by reference to a parsed entity. No text declaration may appear at any position other than the beginning of an external parsed entity.

Well-Formed Parsed Entities

The document entity is well-formed if it matches the production labeled document. An external general parsed entity is well-formed if it matches the production labeled extParsedEnt. An external parameter entity is well-formed if it matches the production labeled extPE. Well-Formed External Parsed Entity extParsedEnt TextDecl? content extPE TextDecl? extSubsetDecl An internal general parsed entity is well-formed if its replacement text matches the production labeled content. All internal parameter entities are well-formed by definition.

A consequence of well-formedness in entities is that the logical and physical structures in an XML document are properly nested; no start-tag, end-tag, empty-element tag, element, comment, processing instruction, character reference, or entity reference can begin in one entity and end in another.

Character Encoding in Entities

Each external parsed entity in an XML document may use a different encoding for its characters. All XML processors must be able to read entities in either UTF-8 or UTF-16.

Entities encoded in UTF-16 must begin with the Byte Order Mark described by ISO/IEC 10646 Annex E and Unicode Appendix B (the ZERO WIDTH NO-BREAK SPACE character, #xFEFF). This is an encoding signature, not part of either the markup or the character data of the XML document. XML processors must be able to use this character to differentiate between UTF-8 and UTF-16 encoded documents.

Although an XML processor is required to read only entities in the UTF-8 and UTF-16 encodings, it is recognized that other encodings are used around the world, and it may be desired for XML processors to read entities that use them. Parsed entities which are stored in an encoding other than UTF-8 or UTF-16 must begin with a text declaration containing an encoding declaration: Encoding Declaration EncodingDecl S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) EncName [A-Za-z] ([A-Za-z0-9._] | '-')* Encoding name contains only Latin characters In the document entity, the encoding declaration is part of the XML declaration. The EncName is the name of the encoding used.

In an encoding declaration, the values "UTF-8", "UTF-16", "ISO-10646-UCS-2", and "ISO-10646-UCS-4" should be used for the various encodings and transformations of Unicode / ISO/IEC 10646, the values "ISO-8859-1", "ISO-8859-2", ... "ISO-8859-9" should be used for the parts of ISO 8859, and the values "ISO-2022-JP", "Shift_JIS", and "EUC-JP" should be used for the various encoded forms of JIS X-0208-1997. XML processors may recognize other encodings; it is recommended that character encodings registered (as charsets) with the Internet Assigned Numbers Authority , other than those just listed, should be referred to using their registered names. Note that these registered names are defined to be case-insensitive, so processors wishing to match against them should do so in a case-insensitive way.

In the absence of information provided by an external transport protocol (e.g. HTTP or MIME), it is an error for an entity including an encoding declaration to be presented to the XML processor in an encoding other than that named in the declaration, for an encoding declaration to occur other than at the beginning of an external entity, or for an entity which begins with neither a Byte Order Mark nor an encoding declaration to use an encoding other than UTF-8. Note that since ASCII is a subset of UTF-8, ordinary ASCII entities do not strictly need an encoding declaration.

It is a fatal error when an XML processor encounters an entity with an encoding that it is unable to process.

Examples of encoding declarations: <?xml encoding='UTF-8'?> <?xml encoding='EUC-JP'?>

XML Processor Treatment of Entities and References

The table below summarizes the contexts in which character references, entity references, and invocations of unparsed entities might appear and the required behavior of an XML processor in each case. The labels in the leftmost column describe the recognition context:

as a reference anywhere after the start-tag and before the end-tag of an element; corresponds to the nonterminal content.

as a reference within either the value of an attribute in a start-tag, or a default value in an attribute declaration; corresponds to the nonterminal AttValue.

as a Name, not a reference, appearing either as the value of an attribute which has been declared as type ENTITY, or as one of the space-separated tokens in the value of an attribute which has been declared as type ENTITIES.

as a reference within a parameter or internal entity's literal entity value in the entity's declaration; corresponds to the nonterminal EntityValue.

as a reference within either the internal or external subsets of the DTD, but outside of an EntityValue or AttValue.

Entity Type Character Parameter Internal General External Parsed General Unparsed Reference in Content Not recognized Included Included if validating Forbidden Included Reference in Attribute Value Not recognized Included in literal Forbidden Forbidden Included Occurs as Attribute Value Not recognized Forbidden Forbidden Notify Not recognized Reference in EntityValue Included in literal Bypassed Bypassed Forbidden Included Reference in DTD Included as PE Forbidden Forbidden Forbidden Forbidden Not Recognized

Outside the DTD, the % character has no special significance; thus, what would be parameter entity references in the DTD are not recognized as markup in content. Similarly, the names of unparsed entities are not recognized except when they appear in the value of an appropriately declared attribute.

Included

An entity is included when its replacement text is retrieved and processed, in place of the reference itself, as though it were part of the document at the location the reference was recognized. The replacement text may contain both character data and (except for parameter entities) markup, which must be recognized in the usual way, except that the replacement text of entities used to escape markup delimiters (the entities &magicents;) is always treated as data. (The string "AT&amp;T;" expands to "AT&T;" and the remaining ampersand is not recognized as an entity-reference delimiter.) A character reference is included when the indicated character is processed in place of the reference itself.

Included If Validating

When an XML processor recognizes a reference to a parsed entity, in order to validate the document, the processor must include its replacement text. If the entity is external, and the processor is not attempting to validate the XML document, the processor may, but need not, include the entity's replacement text. If a non-validating parser does not include the replacement text, it must inform the application that it recognized, but did not read, the entity.

This rule is based on the recognition that the automatic inclusion provided by the SGML and XML entity mechanism, primarily designed to support modularity in authoring, is not necessarily appropriate for other applications, in particular document browsing. Browsers, for example, when encountering an external parsed entity reference, might choose to provide a visual indication of the entity's presence and retrieve it for display only on demand.

Forbidden

The following are forbidden, and constitute fatal errors:

the appearance of a reference to an unparsed entity.

the appearance of any character or general-entity reference in the DTD except within an EntityValue or AttValue.

a reference to an external entity in an attribute value.

Included in Literal

When an entity reference appears in an attribute value, or a parameter entity reference appears in a literal entity value, its replacement text is processed in place of the reference itself as though it were part of the document at the location the reference was recognized, except that a single or double quote character in the replacement text is always treated as a normal data character and will not terminate the literal. For example, this is well-formed: ]]> while this is not: <!ENTITY EndAttr "27'" > <element attribute='a-&EndAttr;>

Notify

When the name of an unparsed entity appears as a token in the value of an attribute of declared type ENTITY or ENTITIES, a validating processor must inform the application of the system and public (if any) identifiers for both the entity and its associated notation.

Bypassed

When a general entity reference appears in the EntityValue in an entity declaration, it is bypassed and left as is.

Included as PE

Just as with external parsed entities, parameter entities need only be included if validating. When a parameter-entity reference is recognized in the DTD and included, its replacement text is enlarged by the attachment of one leading and one following space (#x20) character; the intent is to constrain the replacement text of parameter entities to contain an integral number of grammatical tokens in the DTD.

Construction of Internal Entity Replacement Text

In discussing the treatment of internal entities, it is useful to distinguish two forms of the entity's value. The literal entity value is the quoted string actually present in the entity declaration, corresponding to the non-terminal EntityValue. The replacement text is the content of the entity, after replacement of character references and parameter-entity references.

The literal entity value as given in an internal entity declaration (EntityValue) may contain character, parameter-entity, and general-entity references. Such references must be contained entirely within the literal entity value. The actual replacement text that is included as described above must contain the replacement text of any parameter entities referred to, and must contain the character referred to, in place of any character references in the literal entity value; however, general-entity references must be left as-is, unexpanded. For example, given the following declarations: ]]> then the replacement text for the entity "book" is: La Peste: Albert Camus, © 1947 Éditions Gallimard. &rights; The general-entity reference "&rights;" would be expanded should the reference "&book;" appear in the document's content or an attribute value.

These simple rules may have complex interactions; for a detailed discussion of a difficult example, see .

Predefined Entities

Entity and character references can both be used to escape the left angle bracket, ampersand, and other delimiters. A set of general entities (&magicents;) is specified for this purpose. Numeric character references may also be used; they are expanded immediately when recognized and must be treated as character data, so the numeric character references "&#60;" and "&#38;" may be used to escape < and & when they occur in character data.

All XML processors must recognize these entities whether they are declared or not. For interoperability, valid XML documents should declare these entities, like any others, before using them. If the entities in question are declared, they must be declared as internal entities whose replacement text is the single character being escaped or a character reference to that character, as shown below. ]]> Note that the < and & characters in the declarations of "lt" and "amp" are doubly escaped to meet the requirement that entity replacement be well-formed.

Notation Declarations

Notations identify by name the format of unparsed entities, the format of elements which bear a notation attribute, or the application to which a processing instruction is addressed.

Notation declarations provide a name for the notation, for use in entity and attribute-list declarations and in attribute specifications, and an external identifier for the notation which may allow an XML processor or its client application to locate a helper application capable of processing data in the given notation. Notation Declarations NotationDecl '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' PublicID 'PUBLIC' S PubidLiteral

XML processors must provide applications with the name and external identifier(s) of any notation declared and referred to in an attribute value, attribute definition, or entity declaration. They may additionally resolve the external identifier into the system identifier, file name, or other information needed to allow the application to call a processor for data in the notation described. (It is not an error, however, for XML documents to declare and refer to notations for which notation-specific applications are not available on the system where the XML processor or application is running.)

Document Entity

The document entity serves as the root of the entity tree and a starting-point for an XML processor. This specification does not specify how the document entity is to be located by an XML processor; unlike other entities, the document entity has no name and might well appear on a processor input stream without any identification at all.

Conformance Validating and Non-Validating Processors

Conforming XML processors fall into two classes: validating and non-validating.

Validating and non-validating processors alike must report violations of this specification's well-formedness constraints in the content of the document entity and any other parsed entities that they read.

Validating processors must report violations of the constraints expressed by the declarations in the DTD, and failures to fulfill the validity constraints given in this specification. To accomplish this, validating XML processors must read and process the entire DTD and all external parsed entities referenced in the document.

Non-validating processors are required to check only the document entity, including the entire internal DTD subset, for well-formedness. While they are not required to check the document for validity, they are required to process all the declarations they read in the internal DTD subset and in any parameter entity that they read, up to the first reference to a parameter entity that they do not read; that is to say, they must use the information in those declarations to normalize attribute values, include the replacement text of internal entities, and supply default attribute values. They must not process entity declarations or attribute-list declarations encountered after a reference to a parameter entity that is not read, since the entity may have contained overriding declarations.

Using XML Processors

The behavior of a validating XML processor is highly predictable; it must read every piece of a document and report all well-formedness and validity violations. Less is required of a non-validating processor; it need not read any part of the document other than the document entity. This has two effects that may be important to users of XML processors:

Certain well-formedness errors, specifically those that require reading external entities, may not be detected by a non-validating processor. Examples include the constraints entitled Entity Declared, Parsed Entity, and No Recursion, as well as some of the cases described as forbidden in .

The information passed from the processor to the application may vary, depending on whether the processor reads parameter and external entities. For example, a non-validating processor may not normalize attribute values, include the replacement text of internal entities, or supply default attribute values, where doing so depends on having read declarations in external or parameter entities.

For maximum reliability in interoperating between different XML processors, applications which use non-validating processors should not rely on any behaviors not required of such processors. Applications which require facilities such as the use of default attributes or internal entities which are declared in external entities should use validating XML processors.

Notation

The formal grammar of XML is given in this specification using a simple Extended Backus-Naur Form (EBNF) notation. Each rule in the grammar defines one symbol, in the form symbol ::= expression

Symbols are written with an initial capital letter if they are defined by a regular expression, or with an initial lower case letter otherwise. Literal strings are quoted.

Within the expression on the right-hand side of a rule, the following expressions are used to match strings of one or more characters:

where N is a hexadecimal integer, the expression matches the character in ISO/IEC 10646 whose canonical (UCS-4) code value, when interpreted as an unsigned binary number, has the value indicated. The number of leading zeros in the #xN form is insignificant; the number of leading zeros in the corresponding code value is governed by the character encoding in use and is not significant for XML.

matches any character with a value in the range(s) indicated (inclusive).

matches any character with a value outside the range indicated.

matches any character with a value not among the characters given.

matches a literal string matching that given inside the double quotes.

matches a literal string matching that given inside the single quotes.

These symbols may be combined to match more complex patterns as follows, where A and B represent simple expressions:

expression is treated as a unit and may be combined as described in this list.

matches A or nothing; optional A.

matches A followed by B.

matches A or B but not both.

matches any string that matches A but does not match B.

matches one or more occurrences of A.

matches zero or more occurrences of A.

Other notations used in the productions are:

comment.

well-formedness constraint; this identifies by name a constraint on well-formed documents associated with a production.

validity constraint; this identifies by name a constraint on valid documents associated with a production.

References Normative References (Internet Assigned Numbers Authority) Official Names for Character Sets, ed. Keld Simonsen et al. See ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets. IETF (Internet Engineering Task Force). RFC 1766: Tags for the Identification of Languages, ed. H. Alvestrand. 1995. (International Organization for Standardization). ISO 639:1988 (E). Code for the representation of names of languages. [Geneva]: International Organization for Standardization, 1988. (International Organization for Standardization). ISO 3166-1:1997 (E). Codes for the representation of names of countries and their subdivisions — Part 1: Country codes [Geneva]: International Organization for Standardization, 1997. ISO (International Organization for Standardization). ISO/IEC 10646-1993 (E). Information technology — Universal Multiple-Octet Coded Character Set (UCS) — Part 1: Architecture and Basic Multilingual Plane. [Geneva]: International Organization for Standardization, 1993 (plus amendments AM 1 through AM 7). The Unicode Consortium. The Unicode Standard, Version 2.0. Reading, Mass.: Addison-Wesley Developers Press, 1996. Other References Aho, Alfred V., Ravi Sethi, and Jeffrey D. Ullman. Compilers: Principles, Techniques, and Tools. Reading: Addison-Wesley, 1986, rpt. corr. 1988. Berners-Lee, T., R. Fielding, and L. Masinter. Uniform Resource Identifiers (URI): Generic Syntax and Semantics. 1997. (Work in progress; see updates to RFC1738.) Brggemann-Klein, Anne. Regular Expressions into Finite Automata. Extended abstract in I. Simon, Hrsg., LATIN 1992, S. 97-98. Springer-Verlag, Berlin 1992. Full Version in Theoretical Computer Science 120: 197-213, 1993. Brggemann-Klein, Anne, and Derick Wood. Deterministic Regular Languages. Universitt Freiburg, Institut fr Informatik, Bericht 38, Oktober 1991. James Clark. Comparison of SGML and XML. See http://www.w3.org/TR/NOTE-sgml-xml-971215. IETF (Internet Engineering Task Force). RFC 1738: Uniform Resource Locators (URL), ed. T. Berners-Lee, L. Masinter, M. McCahill. 1994. IETF (Internet Engineering Task Force). RFC 1808: Relative Uniform Resource Locators, ed. R. Fielding. 1995. IETF (Internet Engineering Task Force). RFC 2141: URN Syntax, ed. R. Moats. 1997. ISO (International Organization for Standardization). ISO 8879:1986(E). Information processing — Text and Office Systems — Standard Generalized Markup Language (SGML). First edition — 1986-10-15. [Geneva]: International Organization for Standardization, 1986. ISO (International Organization for Standardization). ISO/IEC 10744-1992 (E). Information technology — Hypermedia/Time-based Structuring Language (HyTime). [Geneva]: International Organization for Standardization, 1992. Extended Facilities Annexe. [Geneva]: International Organization for Standardization, 1996. Character Classes

Following the characteristics defined in the Unicode standard, characters are classed as base characters (among others, these contain the alphabetic characters of the Latin alphabet, without diacritics), ideographic characters, and combining characters (among others, this class contains most diacritics); these classes combine to form the class of letters. Digits and extenders are also distinguished. Characters Letter BaseChar | Ideographic BaseChar [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3] Ideographic [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] CombiningChar [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A Digit [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29] Extender #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]

The character classes defined here can be derived from the Unicode character database as follows:

Name start characters must have one of the categories Ll, Lu, Lo, Lt, Nl.

Name characters other than Name-start characters must have one of the categories Mc, Me, Mn, Lm, or Nd.

Characters in the compatibility area (i.e. with character code greater than #xF900 and less than #xFFFE) are not allowed in XML names.

Characters which have a font or compatibility decomposition (i.e. those with a "compatibility formatting tag" in field 5 of the database -- marked by field 5 beginning with a "<") are not allowed.

The following characters are treated as name-start characters rather than name characters, because the property file classifies them as Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.

Characters #x20DD-#x20E0 are excluded (in accordance with Unicode, section 5.14).

Character #x00B7 is classified as an extender, because the property list so identifies it.

Character #x0387 is added as a name character, because #x00B7 is its canonical equivalent.

Characters ':' and '_' are allowed as name-start characters.

Characters '-' and '.' are allowed as name characters.

XML and SGML

XML is designed to be a subset of SGML, in that every valid XML document should also be a conformant SGML document. For a detailed comparison of the additional restrictions that XML places on documents beyond those of SGML, see .

Expansion of Entity and Character References

This appendix contains some examples illustrating the sequence of entity- and character-reference recognition and expansion, as specified in .

If the DTD contains the declaration An ampersand (&#38;) may be escaped numerically (&#38;#38;) or with a general entity (&amp;).

" > ]]> then the XML processor will recognize the character references when it parses the entity declaration, and resolve them before storing the following string as the value of the entity "example": An ampersand (&) may be escaped numerically (&#38;) or with a general entity (&amp;).

]]>
A reference in the document to "&example;" will cause the text to be reparsed, at which time the start- and end-tags of the "p" element will be recognized and the three references will be recognized and expanded, resulting in a "p" element with the following content (all data, no delimiters or markup):

A more complex example will illustrate the rules and their effects fully. In the following example, the line numbers are solely for reference. 2 4 5 ' > 6 %xx; 7 ]> 8 This sample shows a &tricky; method. ]]> This produces the following:

in line 4, the reference to character 37 is expanded immediately, and the parameter entity "xx" is stored in the symbol table with the value "%zz;". Since the replacement text is not rescanned, the reference to parameter entity "zz" is not recognized. (And it would be an error if it were, since "zz" is not yet declared.)

in line 5, the character reference "&#60;" is expanded immediately and the parameter entity "zz" is stored with the replacement text "<!ENTITY tricky "error-prone" >", which is a well-formed entity declaration.

in line 6, the reference to "xx" is recognized, and the replacement text of "xx" (namely "%zz;") is parsed. The reference to "zz" is recognized in its turn, and its replacement text ("<!ENTITY tricky "error-prone" >") is parsed. The general entity "tricky" has now been declared, with the replacement text "error-prone".

in line 8, the reference to the general entity "tricky" is recognized, and it is expanded, so the full content of the "test" element is the self-describing (and ungrammatical) string This sample shows a error-prone method.

Deterministic Content Models

For compatibility, it is required that content models in element type declarations be deterministic.

SGML requires deterministic content models (it calls them "unambiguous"); XML processors built using SGML systems may flag non-deterministic content models as errors.

For example, the content model ((b, c) | (b, d)) is non-deterministic, because given an initial b the parser cannot know which b in the model is being matched without looking ahead to see which element follows the b. In this case, the two references to b can be collapsed into a single reference, making the model read (b, (c | d)). An initial b now clearly matches only a single name in the content model. The parser doesn't need to look ahead to see what follows; either c or d would be accepted.

More formally: a finite state automaton may be constructed from the content model using the standard algorithms, e.g. algorithm 3.5 in section 3.9 of Aho, Sethi, and Ullman . In many such algorithms, a follow set is constructed for each position in the regular expression (i.e., each leaf node in the syntax tree for the regular expression); if any position has a follow set in which more than one following position is labeled with the same element type name, then the content model is in error and may be reported as an error.

Algorithms exist which allow many but not all non-deterministic content models to be reduced automatically to equivalent deterministic models; see Brggemann-Klein 1991 .

Autodetection of Character Encodings

The XML encoding declaration functions as an internal label on each entity, indicating which character encoding is in use. Before an XML processor can read the internal label, however, it apparently has to know what character encoding is in use—which is what the internal label is trying to indicate. In the general case, this is a hopeless situation. It is not entirely hopeless in XML, however, because XML limits the general case in two ways: each implementation is assumed to support only a finite set of character encodings, and the XML encoding declaration is restricted in position and content in order to make it feasible to autodetect the character encoding in use in each entity in normal cases. Also, in many cases other sources of information are available in addition to the XML data stream itself. Two cases may be distinguished, depending on whether the XML entity is presented to the processor without, or with, any accompanying (external) information. We consider the first case first.

Because each XML entity not in UTF-8 or UTF-16 format must begin with an XML encoding declaration, in which the first characters must be '<?xml', any conforming processor can detect, after two to four octets of input, which of the following cases apply. In reading this list, it may help to know that in UCS-4, '<' is "#x0000003C" and '?' is "#x0000003F", and the Byte Order Mark required of UTF-16 data streams is "#xFEFF".

00 00 00 3C: UCS-4, big-endian machine (1234 order)

3C 00 00 00: UCS-4, little-endian machine (4321 order)

00 00 3C 00: UCS-4, unusual octet order (2143)

00 3C 00 00: UCS-4, unusual octet order (3412)

FE FF: UTF-16, big-endian

FF FE: UTF-16, little-endian

00 3C 00 3F: UTF-16, big-endian, no Byte Order Mark (and thus, strictly speaking, in error)

3C 00 3F 00: UTF-16, little-endian, no Byte Order Mark (and thus, strictly speaking, in error)

3C 3F 78 6D: UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns for the ASCII characters, the encoding declaration itself may be read reliably

4C 6F A7 94: EBCDIC (in some flavor; the full encoding declaration must be read to tell which code page is in use)

other: UTF-8 without an encoding declaration, or else the data stream is corrupt, fragmentary, or enclosed in a wrapper of some kind

This level of autodetection is enough to read the XML encoding declaration and parse the character-encoding identifier, which is still necessary to distinguish the individual members of each family of encodings (e.g. to tell UTF-8 from 8859, and the parts of 8859 from each other, or to distinguish the specific EBCDIC code page in use, and so on).

Because the contents of the encoding declaration are restricted to ASCII characters, a processor can reliably read the entire encoding declaration as soon as it has detected which family of encodings is in use. Since in practice, all widely used character encodings fall into one of the categories above, the XML encoding declaration allows reasonably reliable in-band labeling of character encodings, even when external sources of information at the operating-system or transport-protocol level are unreliable.

Once the processor has detected the character encoding in use, it can act appropriately, whether by invoking a separate input routine for each case, or by calling the proper conversion function on each character of input.

Like any self-labeling system, the XML encoding declaration will not work if any software changes the entity's character set or encoding without updating the encoding declaration. Implementors of character-encoding routines should be careful to ensure the accuracy of the internal and external information used to label the entity.

The second possible case occurs when the XML entity is accompanied by encoding information, as in some file systems and some network protocols. When multiple sources of information are available, their relative priority and the preferred method of handling conflict should be specified as part of the higher-level protocol used to deliver XML. Rules for the relative priority of the internal label and the MIME-type label in an external header, for example, should be part of the RFC document defining the text/xml and application/xml MIME types. In the interests of interoperability, however, the following rules are recommended.

If an XML entity is in a file, the Byte-Order Mark and encoding-declaration PI are used (if present) to determine the character encoding. All other heuristics and sources of information are solely for error recovery.

If an XML entity is delivered with a MIME type of text/xml, then the charset parameter on the MIME type determines the character encoding method; all other heuristics and sources of information are solely for error recovery.

If an XML entity is delivered with a MIME type of application/xml, then the Byte-Order Mark and encoding-declaration PI are used (if present) to determine the character encoding. All other heuristics and sources of information are solely for error recovery.

These rules apply only in the absence of protocol-level documentation; in particular, when the MIME types text/xml and application/xml are defined, the recommendations of the relevant RFC will supersede these rules.

W3C XML Working Group

This specification was prepared and approved for publication by the W3C XML Working Group (WG). WG approval of this specification does not necessarily imply that all WG members voted for its approval. The current and former members of the XML WG are:

Jon Bosak, SunChair James ClarkTechnical Lead Tim Bray, Textuality and NetscapeXML Co-editor Jean Paoli, MicrosoftXML Co-editor C. M. Sperberg-McQueen, U. of Ill.XML Co-editor Dan Connolly, W3CW3C Liaison Paula Angerstein, Texcel Steve DeRose, INSO Dave Hollander, HP Eliot Kimber, ISOGEN Eve Maler, ArborText Tom Magliery, NCSA Murray Maloney, Muzmo and Grif Makoto Murata, Fuji Xerox Information Systems Joel Nava, Adobe Conleth O'Connell, Vignette Peter Sharpe, SoftQuad John Tigue, DataChannel
markup.ml-1.0.3/test/performance/000077500000000000000000000000001421357706400167125ustar00rootroot00000000000000markup.ml-1.0.3/test/performance/dune000066400000000000000000000006601421357706400175720ustar00rootroot00000000000000(library (name performance_common) (modules performance_common) (libraries unix)) (executable (name performance_markup) (modules performance_markup) (libraries markup performance_common unix)) (executable (name performance_nethtml) (modules performance_nethtml) (libraries threads netstring performance_common)) (executable (name performance_xmlm) (modules performance_xmlm) (libraries unix performance_common xmlm)) markup.ml-1.0.3/test/performance/performance_common.ml000066400000000000000000000011731421357706400231170ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) let measure runs library source format f = let name = Printf.sprintf "%s: %s (%s)" library source format in let rec run = function | 0 -> () | n -> f (); run (n - 1) in let start_time = Unix.gettimeofday () in run runs; let duration = (Unix.gettimeofday ()) -. start_time in let average = duration /. (float_of_int runs) *. 1000000. in Printf.printf " %s: %.0f us\n" name average let google_page = "test/pages/google" let xml_spec = "test/pages/xml_spec" markup.ml-1.0.3/test/performance/performance_markup.ml000066400000000000000000000006771421357706400231360ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Performance_common open Markup let (|>) x f = f x let () = measure 100 "markup.ml" google_page "html" (fun () -> file google_page |> fst |> parse_html |> signals |> drain); measure 100 "markup.ml" xml_spec "xml" (fun () -> file xml_spec |> fst |> parse_xml |> signals |> drain) markup.ml-1.0.3/test/performance/performance_nethtml.ml000066400000000000000000000007451421357706400233060ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Performance_common open Nethtml let (|>) x f = f x let parse file = file |> open_in |> Lexing.from_channel |> parse_document ~dtd:relaxed_html40_dtd |> ignore let () = measure 100 "nethtml" google_page "html" (fun () -> parse google_page); measure 100 "nethtml" xml_spec "html" (fun () -> parse xml_spec) markup.ml-1.0.3/test/performance/performance_xmlm.ml000066400000000000000000000010451421357706400226020ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Performance_common open Xmlm let (|>) x f = f x let parse file = try make_input ~entity:(fun _ -> Some "") (`Channel (open_in file)) |> input_doc_tree ~el:(fun _ _ -> ()) ~data:ignore |> ignore with Xmlm.Error ((l, c), e) as exn -> Printf.printf "%i %i %s\n" l c (error_message e); raise exn let () = measure 100 "xmlm" xml_spec "xml" (fun () -> parse xml_spec) markup.ml-1.0.3/test/support/000077500000000000000000000000001421357706400161255ustar00rootroot00000000000000markup.ml-1.0.3/test/support/dune000066400000000000000000000000721421357706400170020ustar00rootroot00000000000000(library (name test_support) (libraries markup ounit2)) markup.ml-1.0.3/test/support/test_support.ml000066400000000000000000000062601421357706400212360ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Markup__Common module Text = Markup__Text module Error = Markup__Error module Kstream = Markup__Kstream let sprintf = Printf.sprintf let wrong_k message = fun _ -> assert_failure message let with_text_limit n f = let limit = !Text.length_limit in Text.length_limit := n; try f (); Text.length_limit := limit with exn -> Text.length_limit := limit; raise exn let expect_error : ?allow_recovery:int -> location -> Error.t -> (Error.parse_handler -> unit) -> unit = fun ?(allow_recovery = 0) l error f -> let errors = ref 0 in let report l' error' _ k = errors := !errors + 1; if !errors > 1 + allow_recovery then sprintf "got additional error '%s'" (Error.to_string ~location:l' error') |> assert_failure; if !errors = 1 && (l' <> l || error' <> error) then sprintf "got error \"%s\"\nexpected \"%s\"" (Error.to_string ~location:l' error') (Error.to_string ~location:l error) |> assert_failure; k () in f report; if !errors = 0 then sprintf "no error\nexpected \"%s\"" (Error.to_string ~location:l error) |> assert_failure let expect_sequence ?(prefix = false) id to_string sequence = let assert_failure s = assert_failure (id ^ "\n" ^ s) in let sequence = ref sequence in let invalid = ref false in let receive s throw = if !invalid then () else match !sequence with | [] -> if not prefix then begin invalid := true; sprintf "got \"%s\"\nexpected no more output" (to_string s) |> assert_failure end | first::rest -> if s = first then sequence := rest else begin invalid := true; sprintf "got \"%s\"\nexpected \"%s\"" (to_string s) (to_string first) |> assert_failure end; match rest, prefix with | [], true -> throw Exit | _ -> () in let ended () = if !invalid then () else match !sequence with | [] -> () | first::_ -> sprintf "got end\nexpected \"%s\"" (to_string first) |> assert_failure in receive, ended let iter iterate s = Kstream.iter iterate s (function | Exit -> () | exn -> raise exn) ignore type 'a general_signal = S of 'a | E of Error.t let expect_signals ?prefix signal_to_string id signals = let to_string = function | l, c, S s -> sprintf "line %i, column %i: %s" l c (signal_to_string s) | l, c, E e -> sprintf "line %i, column %i: %s" l c (Error.to_string e) in let receive, ended = expect_sequence ?prefix id to_string signals in let report (l, c) e throw k = receive (l, c, E e) throw; k () in let signal ((l, c), s) throw k = receive (l, c, S s) throw; k () in report, signal, ended let expect_strings id strings = let to_string = function | S s -> s | E e -> Error.to_string e in let receive, ended = expect_sequence id to_string strings in let report _ e throw k = receive (E e) throw; k () in let string s throw k = receive (S s) throw; k () in report, string, ended markup.ml-1.0.3/test/support/test_support.mli000066400000000000000000000015071421357706400214060ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open Markup__Common val wrong_k : string -> _ cont val with_text_limit : int -> (unit -> unit) -> unit val expect_error : ?allow_recovery:int -> location -> Markup.Error.t -> (Markup__Error.parse_handler -> unit) -> unit type 'a general_signal = S of 'a | E of Markup.Error.t val expect_signals : ?prefix:bool -> ('a -> string) -> string -> (int * int * 'a general_signal) list -> Markup__Error.parse_handler * ((location * 'a) -> unit cps) * (unit -> unit) val expect_strings : string -> string general_signal list -> Markup__Error.write_handler * (string -> unit cps) * (unit -> unit) val iter : ('a -> unit cps) -> 'a Markup__Kstream.t -> unit markup.ml-1.0.3/test/test.ml000066400000000000000000000012561421357706400157260ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 let suite = "markup.ml" >::: List.flatten [ Test_kstream.tests; Test_stream_io.tests; Test_encoding.tests; Test_input.tests; Test_trie.tests; Test_xml_tokenizer.tests; Test_xml_parser.tests; Test_xml_writer.tests; Test_html_tokenizer.tests; Test_html_parser.tests; Test_html_writer.tests; Test_detect.tests; Test_utility.tests; Test_integration.tests ] let () = Printf.printf "\nRunning tests in %s\n" (Filename.basename Sys.argv.(0)); run_test_tt_main suite markup.ml-1.0.3/test/test_detect.ml000066400000000000000000000177671421357706400172740ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support open Markup__Kstream open Markup__Stream_io open Markup__Detect let ok = wrong_k "failed" let _check_rewound chars s = if String.length s > 0 then next_option chars ok (assert_equal (Some s.[0])) else next_option chars ok (assert_equal None) let _check_encoding_guess f s guess = let chars = string s in f chars ok (assert_equal guess); _check_rewound chars s let tests = [ ("detect.normalize_name" >:: fun _ -> normalize_name true "l2" |> assert_equal "iso-8859-2"; normalize_name true "utf8" |> assert_equal "utf-8"; normalize_name true "\t utf-8 " |> assert_equal "utf-8"; normalize_name true "sjis" |> assert_equal "shift_jis"; normalize_name true "foobar" |> assert_equal "foobar"; normalize_name true " foobar " |> assert_equal "foobar"); ("detect.guess_from_bom_html" >:: fun _ -> let check = _check_encoding_guess guess_from_bom_html in check "\xfe\xff\x00f\x00o\x00o" (Some "utf-16be"); check "\xff\xfef\x00o\x00o\x00" (Some "utf-16le"); check "\xef\xbb\xbffoo" (Some "utf-8"); check "foo" None; check "\xfe\xff" (Some "utf-16be"); check "\xff\xfe" (Some "utf-16le"); check "\xef\xbb\xbf" (Some "utf-8"); check "" None); ("detect.guess_from_bom_xml" >:: fun _ -> let check = _check_encoding_guess guess_from_bom_xml in check "\x00\x00\xfe\xff\x00\x00\x00f" (Some "ucs-4be"); check "\x00\x00\xfe\xff" (Some "ucs-4be"); check "\xff\xfe\x00\x00f\x00\x00\x00" (Some "ucs-4le"); check "\xff\xfe\x00\x00" (Some "ucs-4le"); check "\x00\x00\xff\xfe\x00\x00f\x00" (Some "ucs-4be-transposed"); check "\x00\x00\xff\xfe" (Some "ucs-4be-transposed"); check "\xfe\xff\x00\x00\x00f\x00\x00" (Some "ucs-4le-transposed"); check "\xfe\xff\x00\x00" (Some "ucs-4le-transposed"); check "\xfe\xff\x00f\x00o\x00o" (Some "utf-16be"); check "\xff\xfef\x00o\x00o\x00" (Some "utf-16le"); check "\xef\xbb\xbffoo" (Some "utf-8"); check "foo" None; check "\xfe\xff" (Some "utf-16be"); check "\xff\xfe" (Some "utf-16le"); check "\xef\xbb\xbf" (Some "utf-8"); check "" None); ("detect.guess_family_xml" >:: fun _ -> let check = _check_encoding_guess guess_family_xml in check "\x00\x00\x00\x3c" (Some "ucs-4be"); check "\x3c\x00\x00\x00" (Some "ucs-4le"); check "\x00\x00\x3c\x00" (Some "ucs-4be-transposed"); check "\x00\x3c\x00\x00" (Some "ucs-4le-transposed"); check "\x00\x3c\x00\x3f" (Some "utf-16be"); check "\x3c\x00\x3f\x00" (Some "utf-16le"); check "\x3c\x3f\x78\x6d" (Some "utf-8"); check "\x4c\x6f\xa7\x94" (Some "ebcdic"); check "foo" None; check "" None); ("detect.meta_tag_prescan" >:: fun _ -> let check ?supported ?limit s result = let chars = string s in meta_tag_prescan ?supported ?limit chars ok (assert_equal result); _check_rewound chars s in check "" None; check "foobar" None; check " " None; check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" None; check "" None; check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check ("") (Some "iso-8859-15"); check "" None; check "" None; check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "x-user-defined"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" None; check "" None; check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "utf-8"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check " " (Some "shift_jis"); check " foobar " (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "<" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "shift_jis"); check "" (Some "utf-8"); let no_utf_8 = fun s k -> match s with | "utf-8" -> k false | _ -> k true in check ~supported:no_utf_8 "" None; check ~supported:no_utf_8 "" None; check ~supported:no_utf_8 "" (Some "shift_jis"); check ~supported:no_utf_8 "" (Some "shift_jis"); check ~supported:no_utf_8 "" (Some "shift_jis"); check ~limit:0 "" None; check ~limit:16 "" None; check ~limit:32 "" (Some "shift_jis")); ("detect.read_xml_encoding_declaration" >:: fun _ -> let check family s result = let chars = string s in read_xml_encoding_declaration chars family ok (assert_equal result); _check_rewound chars s in let open Markup__Encoding in check utf_8 "" (Some "utf-8"); check utf_16be "" None; check utf_8 "" (Some "us-ascii"); check utf_8 " " (Some "utf-8"); check utf_8 " a " None) ] markup.ml-1.0.3/test/test_encoding.ml000066400000000000000000000145211421357706400175730ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support open Markup__Common open Markup__Kstream open Markup__Stream_io open Markup__Encoding let ok = wrong_k "failed" let test_ucs_4 (f : Markup__Encoding.t) name s1 s2 bad_bytes = expect_error (1, 2) (`Decoding_error (bad_bytes, name)) begin fun report -> let chars = s1 |> string |> f ~report in next_option chars ok (assert_equal (Some (Char.code 'f'))); next_option chars ok (assert_equal (Some u_rep)); next_option chars ok (assert_equal (Some (Char.code 'o'))); next_option chars ok (assert_equal None); next_option chars ok (assert_equal None) end; expect_error (2, 2) (`Decoding_error ("\x00\x00\x00", name)) begin fun report -> let chars = s2 |> string |> f ~report in next_option chars ok (assert_equal (Some (Char.code 'f'))); next_option chars ok (assert_equal (Some 0x000A)); next_option chars ok (assert_equal (Some (Char.code 'o'))); next_option chars ok (assert_equal (Some u_rep)); next_option chars ok (assert_equal None); next_option chars ok (assert_equal None) end let tests = [ ("encoding.utf_8" >:: fun _ -> let s = "\xef\xbb\xbffoo\xf0\x9f\x90\x99bar\xa0more" in expect_error (1, 8) (`Decoding_error ("\xa0", "utf-8")) begin fun report -> let chars = s |> string |> utf_8 ~report in next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); next_option chars ok (assert_equal (Some 0x1F419)); next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); next_option chars ok (assert_equal (Some u_rep)); next_n 4 chars ok (assert_equal (List.map Char.code ['m'; 'o'; 'r'; 'e'])); next_option chars ok (assert_equal None); next_option chars ok (assert_equal None) end); ("encoding.utf_16be" >:: fun _ -> let s = "\xfe\xff\x00f\x00o\x00o\xd8\x3d\xdc\x19\x00b\xdc\x19\x00a\x00r" in expect_error (1, 6) (`Decoding_error ("\xdc\x19", "utf-16be")) begin fun report -> let chars = s |> string |> utf_16be ~report in next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); next_option chars ok (assert_equal (Some 0x1F419)); next_option chars ok (assert_equal (Some (Char.code 'b'))); next_option chars ok (assert_equal (Some u_rep)); next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r'])); next_option chars ok (assert_equal None); next_option chars ok (assert_equal None) end); ("encoding.utf_16le" >:: fun _ -> let s = "\xff\xfef\x00o\x00o\x00\x3d\xd8\x19\xdcb\x00\x19\xdca\x00r\x00" in expect_error (1, 6) (`Decoding_error ("\x19\xdc", "utf-16le")) begin fun report -> let chars = s |> string |> utf_16le ~report in next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); next_option chars ok (assert_equal (Some 0x1F419)); next_option chars ok (assert_equal (Some (Char.code 'b'))); next_option chars ok (assert_equal (Some u_rep)); next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r'])); next_option chars ok (assert_equal None); next_option chars ok (assert_equal None) end); ("encoding.iso_8859_1" >:: fun _ -> let chars = string "foo\xa0\xa4" |> iso_8859_1 in next_n 5 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'; '\xa0'; '\xa4'])); next_option chars ok (assert_equal None); next_option chars ok (assert_equal None)); ("encoding.iso_8859_15" >:: fun _ -> let chars = string "foo\xa0\xa4" |> iso_8859_15 in next_n 4 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'; '\xa0'])); next_option chars ok (assert_equal (Some 0x20AC)); next_option chars ok (assert_equal None); next_option chars ok (assert_equal None)); ("encoding.us_ascii" >:: fun _ -> let s = "foo\xa0bar" in expect_error (1, 4) (`Decoding_error ("\xa0", "us-ascii")) begin fun report -> let chars = s |> string |> us_ascii ~report in next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); next_option chars ok (assert_equal (Some u_rep)); next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); next_option chars ok (assert_equal None); next_option chars ok (assert_equal None) end); ("encoding.windows_1251" >:: fun _ -> let chars = string "foo\xe0\xe1\xe2bar" |> windows_1251 in next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); next_n 3 chars ok (assert_equal [0x0430; 0x0431; 0x0432]); next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); next_option chars ok (assert_equal None); next_option chars ok (assert_equal None)); ("encoding.windows_1252" >:: fun _ -> let chars = string "foo\x80\x83bar" |> windows_1252 in next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); next_n 2 chars ok (assert_equal [0x20AC; 0x0192]); next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); next_option chars ok (assert_equal None); next_option chars ok (assert_equal None)); ("encoding.ucs_4be" >:: fun _ -> test_ucs_4 ucs_4be "ucs-4be" "\x00\x00\xfe\xff\x00\x00\x00f\x80\x00\x00\x00\x00\x00\x00o" "\x00\x00\x00f\x00\x00\x00\n\x00\x00\x00o\x00\x00\x00" "\x80\x00\x00\x00"); ("encoding.ucs_4le" >:: fun _ -> test_ucs_4 ucs_4le "ucs-4le" "\xff\xfe\x00\x00f\x00\x00\x00\x00\x00\x00\x80o\x00\x00\x00" "f\x00\x00\x00\n\x00\x00\x00o\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x80"); ("encoding.ucs_4be_transposed" >:: fun _ -> test_ucs_4 ucs_4be_transposed "ucs-4be-transposed" "\x00\x00\xff\xfe\x00\x00f\x00\x00\x80\x00\x00\x00\x00o\x00" "\x00\x00f\x00\x00\x00\n\x00\x00\x00o\x00\x00\x00\x00" "\x00\x80\x00\x00"); ("encoding.ucs_4le_transposed" >:: fun _ -> test_ucs_4 ucs_4le_transposed "ucs-4le-transposed" "\xfe\xff\x00\x00\x00f\x00\x00\x00\x00\x80\x00\x00o\x00\x00" "\x00f\x00\x00\x00\n\x00\x00\x00o\x00\x00\x00\x00\x00" "\x00\x00\x80\x00"); ("encoding.ebcdic" >:: fun _ -> let chars = string "\x86\x96\x96" |> ebcdic in next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); next_option chars ok (assert_equal None); next_option chars ok (assert_equal None)); ] markup.ml-1.0.3/test/test_html_parser.ml000066400000000000000000001537341421357706400203370ustar00rootroot00000000000000(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support open Markup__Common module Error = Markup__Error let doctype = `Doctype {doctype_name = Some "html"; public_identifier = None; system_identifier = None; raw_text = None; force_quirks = false} let start_element name = `Start_element ((html_ns, name), []) let expect ?prefix ?(context = Some `Document) text signals = let report, iterate, ended = expect_signals ?prefix signal_to_string text signals in text |> Markup__Stream_io.string |> Markup__Encoding.utf_8 |> Markup__Input.preprocess is_valid_html_char Error.ignore_errors |> Markup__Html_tokenizer.tokenize Error.ignore_errors |> Markup__Html_parser.parse context report |> iter iterate; ended () let tests = [ ("html.parser.basic" >:: fun _ -> expect "" [ 1, 1, S doctype; 1, 16, S (start_element "html"); 1, 22, S (start_element "head"); 1, 28, S `End_element; 1, 35, S (start_element "body"); 1, 55, S `End_element; 1, 55, S `End_element]; expect ~prefix:true " " [ 1, 2, S (`Comment "foo"); 1, 13, S doctype]; expect ~prefix:true " " [ 1, 1, S doctype; 1, 17, S (`Comment "foo"); 1, 28, S (start_element "html")]; expect ~prefix:true " " [ 1, 1, S (start_element "html"); 1, 8, S (`Comment "foo"); 1, 19, S (start_element "head")]); ("html.parser.implicit-top-level" >:: fun _ -> expect "" [ 1, 1, S doctype; 1, 16, S (start_element "html"); 1, 16, S (start_element "head"); 1, 16, S `End_element; 1, 16, S (start_element "body"); 1, 16, S `End_element; 1, 16, S `End_element]; expect "" [ 1, 1, S doctype; 1, 16, S (start_element "html"); 1, 22, S (start_element "head"); 1, 22, S `End_element; 1, 22, S (start_element "body"); 1, 29, S `End_element; 1, 29, S `End_element]; expect "" [ 1, 1, S doctype; 1, 16, S (start_element "html"); 1, 16, S (start_element "head"); 1, 22, S `End_element; 1, 29, S (start_element "body"); 1, 29, S `End_element; 1, 29, S `End_element]; expect "" [ 1, 1, S doctype; 1, 16, S (start_element "html"); 1, 16, S (start_element "head"); 1, 16, S `End_element; 1, 16, S (start_element "body"); 1, 29, S `End_element; 1, 29, S `End_element]; expect "

" [ 1, 1, S doctype; 1, 16, S (start_element "html"); 1, 16, S (start_element "head"); 1, 16, S `End_element; 1, 16, S (start_element "body"); 1, 16, S (start_element "p"); 1, 19, S `End_element; 1, 23, S `End_element; 1, 23, S `End_element]; expect "" [ 1, 1, S doctype; 1, 16, S (start_element "html"); 1, 16, S (start_element "head"); 1, 16, S (start_element "title"); 1, 23, S `End_element; 1, 31, S `End_element; 1, 31, S (start_element "body"); 1, 31, S `End_element; 1, 31, S `End_element]); ("html.parser.no-doctype" >:: fun _ -> expect ~prefix:true "foo" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S (start_element "title"); 1, 8, S (`Text ["foo"])]); ("html.parser.double-doctype" >:: fun _ -> expect ~prefix:true "" [ 1, 1, S doctype; 1, 16, E (`Bad_document "doctype should be first"); 1, 31, S (start_element "html")]); ("html.parser.end-before-html" >:: fun _ -> expect ~prefix:true "

" [ 1, 1, E (`Unmatched_end_tag "p"); 1, 5, S (start_element "html")]); ("html.parser.junk-before-head" >:: fun _ -> expect ~prefix:true "

" [ 1, 1, S (start_element "html"); 1, 7, E (`Bad_document "doctype should be first"); 1, 22, E (`Misnested_tag ("html", "html", [])); 1, 28, E (`Unmatched_end_tag "p"); 1, 32, S (start_element "head")]); ("html.parser.head" >:: fun _ -> expect ~prefix:true " " [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 7, S (`Text [" "]); 1, 8, S (`Comment "foo"); 1, 18, S (start_element "link"); 1, 18, S `End_element; 1, 24, S (start_element "link"); 1, 24, S `End_element; 1, 31, S (start_element "meta"); 1, 31, S `End_element; 1, 37, S (start_element "meta"); 1, 37, S `End_element; 1, 44, S `End_element; 1, 51, S (start_element "body")]); ("html.parser.style" >:: fun _ -> expect ~prefix:true "" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 7, S (start_element "style"); 1, 14, S (`Text ["foo<"]); 1, 28, S `End_element; 1, 36, S `End_element; 1, 43, S (start_element "body")]); ("html.parser.title" >:: fun _ -> expect ~prefix:true "foo</head><" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 7, S (start_element "title"); 1, 14, S (`Text ["foo<"]); 1, 28, S `End_element; 1, 36, S `End_element; 1, 43, S (start_element "body")]); ("html.parser.script" >:: fun _ -> expect ~prefix:true "" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 7, S (start_element "script"); 1, 15, S (`Text ["

" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 7, S `End_element; 1, 14, S (`Text [" "]); 1, 15, S (`Comment "foo"); 1, 25, E (`Bad_document "doctype should be first"); 1, 40, E (`Misnested_tag ("html", "html", [])); 1, 46, E (`Misnested_tag ("meta", "html", [])); 1, 46, S (start_element "meta"); 1, 46, S `End_element; 1, 52, E (`Bad_document "duplicate head element"); 1, 58, E (`Unmatched_end_tag "p"); 1, 62, S (start_element "body")]); ("html.parser.whitespace-after-head" >:: fun _ -> expect " " [ 1, 1, S (start_element "html"); 1, 7, S (start_element "head"); 1, 13, S `End_element; 1, 20, S (`Text [" "]); 1, 21, S (start_element "body"); 1, 28, S `End_element; 1, 28, S `End_element]; expect "foo " [ 1, 1, S (start_element "html"); 1, 7, S (start_element "head"); 1, 13, S (start_element "title"); 1, 20, S (`Text ["foo"]); 1, 23, S `End_element; 1, 31, S `End_element; 1, 38, S (`Text [" "]); 1, 39, S (start_element "body"); 1, 46, S `End_element; 1, 46, S `End_element]); ("html.parser.body-content" >:: fun _ -> expect " bar" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 7, S (`Comment "foo"); 1, 17, S (`Text [" bar"]); 1, 28, S `End_element; 1, 28, S `End_element]); ("html.parser.body.whitespace" >:: fun _ -> expect ~context:(Some (`Fragment "body")) " \n\r\t\x0c " [ 1, 1, S (`Text [" \n\n\t\x0c\r"])]); ("html.parser.paragraphs" >:: fun _ -> expect "

foo

" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "p"); 1, 4, S (`Text ["foo"]); 1, 7, S `End_element; 1, 11, S `End_element; 1, 11, S `End_element]; expect "

foo

bar

baz
" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "p"); 1, 4, S (`Text ["foo"]); 1, 7, S `End_element; 1, 7, S (start_element "p"); 1, 10, S (`Text ["bar"]); 1, 13, S `End_element; 1, 13, S (start_element "div"); 1, 18, S (`Text ["baz"]); 1, 21, S `End_element; 1, 27, S `End_element; 1, 27, S `End_element]); ("html.parser.p.autoclose" >:: fun _ -> expect ("

\n" ^ "

\n" ^ "

\n" ^ "

\n" ^ "

\n" ^ "

\n" ^ "

    \n" ^ "

      \n" ^ "

      \n" ^ "

      ") [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "p"); 1, 4, S `End_element; 1, 4, S (start_element "address"); 1, 13, S `End_element; 1, 23, S (start_element "p"); 1, 26, S `End_element; 1, 26, S (start_element "article"); 1, 35, S `End_element; 1, 45, S (start_element "p"); 1, 48, S `End_element; 1, 48, S (start_element "aside"); 1, 55, S `End_element; 1, 63, S (`Text ["\n"]); 2, 1, S (start_element "p"); 2, 4, S `End_element; 2, 4, S (start_element "blockquote"); 2, 16, S `End_element; 2, 29, S (start_element "p"); 2, 32, S `End_element; 2, 32, S (start_element "center"); 2, 40, S `End_element; 2, 49, S (`Text ["\n"]); 3, 1, S (start_element "p"); 3, 4, S `End_element; 3, 4, S (start_element "details"); 3, 13, S `End_element; 3, 23, S (start_element "p"); 3, 26, S `End_element; 3, 26, S (start_element "dialog"); 3, 34, S `End_element; 3, 43, S (start_element "p"); 3, 46, S `End_element; 3, 46, S (start_element "dir"); 3, 51, S `End_element; 3, 57, S (`Text ["\n"]); 4, 1, S (start_element "p"); 4, 4, S `End_element; 4, 4, S (start_element "div"); 4, 9, S `End_element; 4, 15, S (start_element "p"); 4, 18, S `End_element; 4, 18, S (start_element "dl"); 4, 22, S `End_element; 4, 27, S (start_element "p"); 4, 30, S `End_element; 4, 30, S (start_element "fieldset"); 4, 40, S `End_element; 4, 51, S (`Text ["\n"]); 5, 1, S (start_element "p"); 5, 4, S `End_element; 5, 4, S (start_element "figcaption"); 5, 16, S `End_element; 5, 29, S (start_element "p"); 5, 32, S `End_element; 5, 32, S (start_element "figure"); 5, 40, S `End_element; 5, 49, S (`Text ["\n"]); 6, 1, S (start_element "p"); 6, 4, S `End_element; 6, 4, S (start_element "footer"); 6, 12, S `End_element; 6, 21, S (start_element "p"); 6, 24, S `End_element; 6, 24, S (start_element "header"); 6, 32, S `End_element; 6, 41, S (start_element "p"); 6, 44, S `End_element; 6, 44, S (start_element "hgroup"); 6, 52, S `End_element; 6, 61, S (`Text ["\n"]); 7, 1, S (start_element "p"); 7, 4, S `End_element; 7, 4, S (start_element "main"); 7, 10, S `End_element; 7, 17, S (start_element "p"); 7, 20, S `End_element; 7, 20, S (start_element "nav"); 7, 25, S `End_element; 7, 31, S (start_element "p"); 7, 34, S `End_element; 7, 34, S (start_element "ol"); 7, 38, S `End_element; 7, 43, S (start_element "p"); 7, 46, S `End_element; 7, 46, S (start_element "p"); 7, 49, S `End_element; 7, 53, S (`Text ["\n"]); 8, 1, S (start_element "p"); 8, 4, S `End_element; 8, 4, S (start_element "section"); 8, 13, S `End_element; 8, 23, S (start_element "p"); 8, 26, S `End_element; 8, 26, S (start_element "summary"); 8, 35, S `End_element; 8, 45, S (start_element "p"); 8, 48, S `End_element; 8, 48, S (start_element "ul"); 8, 52, S `End_element; 8, 57, S (`Text ["\n"]); 9, 1, S (start_element "p"); 9, 4, S `End_element; 9, 4, S (start_element "h1"); 9, 8, S `End_element; 9, 13, S (start_element "p"); 9, 16, S `End_element; 9, 16, S (start_element "h2"); 9, 20, S `End_element; 9, 25, S (start_element "p"); 9, 28, S `End_element; 9, 28, S (start_element "h3"); 9, 32, S `End_element; 9, 37, S (start_element "p"); 9, 40, S `End_element; 9, 40, S (start_element "h4"); 9, 44, S `End_element; 9, 49, S (start_element "p"); 9, 52, S `End_element; 9, 52, S (start_element "h5"); 9, 56, S `End_element; 9, 61, S (`Text ["\n"]); 10, 1, S (start_element "p"); 10, 4, S `End_element; 10, 4, S (start_element "h6"); 10, 8, S `End_element; 10, 13, S `End_element; 10, 13, S `End_element]); ("html.parser.attributes" >:: fun _ -> expect "
      " [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (`Start_element ((html_ns, "div"), [(("", ":class"), "foo")])); 1, 19, S `End_element; 1, 25, S `End_element; 1, 25, S `End_element]); ("html.parser.links" >:: fun _ -> expect {|foo|} [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (`Start_element ((html_ns, "a"), [(("", "href"), "foo.com?bar=on&acte=123")])); 1, 35, S (`Text ["foo"]); 1, 38, S `End_element; 1, 42, S `End_element; 1, 42, S `End_element]; expect {|foo|} [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (`Start_element ((html_ns, "a"), [(("", "href"), "foo.com?bar=on&image=on")])); 1, 35, S (`Text ["foo"]); 1, 38, S `End_element; 1, 42, S `End_element; 1, 42, S `End_element]; expect {|foo|} [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (`Start_element ((html_ns, "a"), [(("", "href"), "foo.com?bar=onℑ")])); 1, 33, S (`Text ["foo"]); 1, 36, S `End_element; 1, 40, S `End_element; 1, 40, S `End_element]); ("html.parser.headings" >:: fun _ -> expect "

      foo

      " [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "h1"); 1, 5, E (`Misnested_tag ("h2", "h1", [])); 1, 5, S `End_element; 1, 5, S (start_element "h2"); 1, 9, E (`Misnested_tag ("h3", "h2", [])); 1, 9, S `End_element; 1, 9, S (start_element "h3"); 1, 13, E (`Misnested_tag ("h4", "h3", [])); 1, 13, S `End_element; 1, 13, S (start_element "h4"); 1, 17, E (`Misnested_tag ("h5", "h4", [])); 1, 17, S `End_element; 1, 17, S (start_element "h5"); 1, 21, E (`Misnested_tag ("h6", "h5", [])); 1, 21, S `End_element; 1, 21, S (start_element "h6"); 1, 25, E (`Misnested_tag ("h1", "h6", [])); 1, 25, S `End_element; 1, 25, S (start_element "h1"); 1, 29, S (`Text ["foo"]); 1, 32, S `End_element; 1, 37, S `End_element; 1, 37, S `End_element]); ("html.parser.pre" >:: fun _ -> expect "

      foo
      " [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "p"); 1, 4, S `End_element; 1, 4, S (start_element "pre"); 1, 9, S (`Text ["foo"]); 1, 12, S `End_element; 1, 18, S `End_element; 1, 18, S `End_element]; expect "

      \n\nfoo
      " [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "p"); 1, 4, S `End_element; 1, 4, S (start_element "pre"); 2, 1, S (`Text ["\nfoo"]); 3, 4, S `End_element; 3, 10, S `End_element; 3, 10, S `End_element]); ("html.parser.listing.leading-newline" >:: fun _ -> expect "\n\nfoo" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "listing"); 2, 1, S (`Text ["\nfoo"]); 3, 4, S `End_element; 3, 14, S `End_element; 3, 14, S `End_element]); ("html.parser.textarea" >:: fun _ -> expect "" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "textarea"); 1, 11, S (`Text ["foo

      "]); 1, 18, S `End_element; 1, 29, S `End_element; 1, 29, S `End_element]; expect "" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "textarea"); 2, 1, S (`Text ["\nfoo

      "]); 3, 8, S `End_element; 3, 19, S `End_element; 3, 19, S `End_element]; expect ~context:(Some (`Fragment "body")) "

      foo

      " [ 1, 1, S (start_element "textarea"); 1, 11, S `End_element; 1, 22, S (start_element "p"); 1, 25, S (`Text ["foo"]); 1, 28, S `End_element]); ("html.parser.list" >:: fun _ -> expect "
      • foo
      • bar
      " [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "ul"); 1, 5, S (start_element "li"); 1, 9, S (`Text ["foo"]); 1, 12, S `End_element; 1, 12, S (start_element "li"); 1, 16, S (`Text ["bar"]); 1, 19, S `End_element; 1, 19, S `End_element; 1, 24, S `End_element; 1, 24, S `End_element]); ("html.parser.definition" >:: fun _ -> expect "

      foo
      bar" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "p"); 1, 4, S `End_element; 1, 4, S (start_element "dt"); 1, 8, S (`Text ["foo"]); 1, 11, S `End_element; 1, 11, S (start_element "dd"); 1, 15, S (`Text ["bar"]); 1, 18, S `End_element; 1, 18, S `End_element; 1, 18, S `End_element]); ("html.parser.plaintext" >:: fun _ -> expect "

      foo</plaintext></p>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "p"); 1, 4, S `End_element; 1, 4, S (start_element "plaintext"); 1, 4, E (`Unmatched_start_tag "plaintext"); 1, 15, S (`Text ["foo</plaintext></p>"]); 1, 34, S `End_element; 1, 34, S `End_element; 1, 34, S `End_element]); ("html.parser.table" >:: fun _ -> expect "<p><table><tr><td>foo</td><td>bar</td></tr></table>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "p"); 1, 4, S `End_element; 1, 4, S (start_element "table"); 1, 11, S (start_element "tbody"); 1, 11, S (start_element "tr"); 1, 15, S (start_element "td"); 1, 19, S (`Text ["foo"]); 1, 22, S `End_element; 1, 27, S (start_element "td"); 1, 31, S (`Text ["bar"]); 1, 34, S `End_element; 1, 39, S `End_element; 1, 44, S `End_element; 1, 44, S `End_element; 1, 52, S `End_element; 1, 52, S `End_element]); ("html.parser.select" >:: fun _ -> expect "<select><option>foo<option>bar</select>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "select"); 1, 9, S (start_element "option"); 1, 17, S (`Text ["foo"]); 1, 20, S `End_element; 1, 20, S (start_element "option"); 1, 28, S (`Text ["bar"]); 1, 31, S `End_element; 1, 31, S `End_element; 1, 40, S `End_element; 1, 40, S `End_element]); ("html.parser.datalist" >:: fun _ -> expect "<datalist><option><option></datalist>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "datalist"); 1, 11, S (start_element "option"); 1, 19, S `End_element; 1, 19, S (start_element "option"); 1, 27, S `End_element; 1, 27, S `End_element; 1, 38, S `End_element; 1, 38, S `End_element]); ("html.parser.datalist.whitespace" >:: fun _ -> expect "<datalist>\n<option>\n<option>\n</datalist>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "datalist"); 1, 11, S (`Text ["\n"]); 2, 1, S (start_element "option"); 2, 9, S (`Text ["\n"]); 3, 1, S `End_element; 3, 1, S (start_element "option"); 3, 9, S (`Text ["\n"]); 4, 1, S `End_element; 4, 1, S `End_element; 4, 12, S `End_element; 4, 12, S `End_element]); ("html.parser.ruby" >:: fun _ -> expect "<rb>a<rt>b" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, E (`Misnested_tag ("rb", "body", [])); 1, 1, S (start_element "rb"); 1, 6, E (`Misnested_tag ("rt", "body", [])); 1, 5, S (`Text ["a"]); 1, 6, S (start_element "rt"); 1, 6, E (`Unmatched_start_tag "rt"); 1, 1, E (`Unmatched_start_tag "rb"); 1, 10, S (`Text ["b"]); 1, 11, S `End_element; 1, 11, S `End_element; 1, 11, S `End_element; 1, 11, S `End_element]); ("html.parser.truncated-body" >:: fun _ -> expect "<body>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 7, S `End_element; 1, 7, S `End_element]; expect "<body></html>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 14, S `End_element; 1, 14, S `End_element]); ("html.parser.junk-in-body" >:: fun _ -> expect "<body>\x00<!DOCTYPE html><html><meta><body attr='value'></body>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 7, E (`Bad_token ("U+0000", "body", "null")); 1, 8, E (`Bad_document "doctype should be first"); 1, 23, E (`Misnested_tag ("html", "body", [])); 1, 29, S (start_element "meta"); 1, 29, S `End_element; 1, 35, E (`Misnested_tag ("body", "body", ["attr", "value"])); 1, 61, S `End_element; 1, 61, S `End_element]); ("html.parser.nested-html-in-body" >:: fun _ -> expect "<div><html></html>foo</div><div>bar</div>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "div"); 1, 6, E (`Misnested_tag ("html", "body", [])); 1, 1, E (`Unmatched_start_tag "div"); 1, 19, E (`Bad_content "html"); 1, 19, S (`Text ["foo"]); 1, 22, S `End_element; 1, 28, S (start_element "div"); 1, 33, S (`Text ["bar"]); 1, 36, S `End_element; 1, 42, S `End_element; 1, 42, S `End_element]); ("html.parser.nested-html-with-body-in-body" >:: fun _ -> expect "<p><html><body><p></body><br><p>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "p"); 1, 4, E (`Misnested_tag ("html", "body", [])); 1, 10, E (`Misnested_tag ("body", "body", [])); 1, 16, S `End_element; 1, 16, S (start_element "p"); 1, 26, E (`Bad_document ("content after body")); 1, 26, S (start_element "br"); 1, 26, S `End_element; 1, 30, S `End_element; 1, 30, S (start_element "p"); 1, 33, S `End_element; 1, 33, S `End_element; 1, 33, S `End_element] ); ("html.parser.whitespace-at-end" >:: fun _ -> expect "<html><body></body></html> " [ 1, 1, S (start_element "html"); 1, 7, S (start_element "head"); 1, 7, S `End_element; 1, 7, S (start_element "body"); 1, 27, S (`Text [" "]); 1, 28, S `End_element; 1, 28, S `End_element]); ("html.parser.foreign" >:: fun _ -> expect "<body><svg><g/></svg></body>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 7, S (`Start_element ((svg_ns, "svg"), [])); 1, 12, S (`Start_element ((svg_ns, "g"), [])); 1, 12, S `End_element; 1, 16, S `End_element; 1, 29, S `End_element; 1, 29, S `End_element]); ("html.parser.foreign.svg-followed-by-html" >:: fun _ -> expect ~context:(Some (`Fragment "body")) "<svg><feTile></feTile></svg><b></b>" [ 1, 1, S (`Start_element ((svg_ns, "svg"), [])); 1, 6, S (`Start_element ((svg_ns, "feTile"), [])); 1, 14, S `End_element; 1, 23, S `End_element; 1, 29, S (start_element "b"); 1, 32, S `End_element]); ("html.parser.reconstruct-active-formatting-elements" >:: fun _ -> expect "<p><em><strong>foo<p>bar" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "p"); 1, 8, E (`Unmatched_start_tag "strong"); 1, 4, S (start_element "em"); 1, 8, S (start_element "strong"); 1, 16, S (`Text ["foo"]); 1, 19, S `End_element; 1, 19, S `End_element; 1, 19, S `End_element; 1, 19, S (start_element "p"); 1, 8, E (`Unmatched_start_tag "strong"); 1, 4, E (`Unmatched_start_tag "em"); 1, 4, S (start_element "em"); 1, 8, S (start_element "strong"); 1, 22, S (`Text ["bar"]); 1, 25, S `End_element; 1, 25, S `End_element; 1, 25, S `End_element; 1, 25, S `End_element; 1, 25, S `End_element]); ("html.parser.close-formatting-elements" >:: fun _ -> expect ~context:(Some (`Fragment "body")) "<a>fo</a>o" [ 1, 1, S (start_element "a"); 1, 4, S (`Text ["fo"]); 1, 6, S `End_element; 1, 10, S (`Text ["o"])]); ("html.parser.reset-mode" >:: fun _ -> expect ~context:(Some (`Fragment "body")) "<table></table><table></table>" [ 1, 1, S (start_element "table"); 1, 8, S `End_element; 1, 16, S (start_element "table"); 1, 23, S `End_element]); ("html.parser.fragment" >:: fun _ -> expect ~context:(Some (`Fragment "title")) "</p>" [ 1, 1, S (`Text ["</p>"])]; expect ~context:(Some (`Fragment "textarea")) "</p>" [ 1, 1, S (`Text ["</p>"])]; expect ~context:(Some (`Fragment "body")) "</p>" [ 1, 1, E (`Unmatched_end_tag "p"); 1, 1, S (start_element "p"); 1, 1, S `End_element]; expect ~context:(Some (`Fragment "body")) "<!DOCTYPE html>" [ 1, 1, E (`Bad_document "doctype should be first")]); ("html.parser.fragment.rawtext" >:: fun _ -> expect ~context:(Some (`Fragment "style")) "&nbsp;</p>" [ 1, 1, S (`Text ["&nbsp;</p>"])]); ("html.parser.fragment.script" >:: fun _ -> expect ~context:(Some (`Fragment "script")) "&nbsp;</p>" [ 1, 1, S (`Text ["&nbsp;</p>"])]); ("html.parser.fragment.plaintext" >:: fun _ -> expect ~context:(Some (`Fragment "plaintext")) "&nbsp;</p></plaintext>" [ 1, 1, S (`Text ["&nbsp;</p></plaintext>"])]); ("html.parser.context-detection" >:: fun _ -> expect ~context:None "<p>foo</p>" [ 1, 1, S (start_element "p"); 1, 4, S (`Text ["foo"]); 1, 7, S `End_element]; expect ~context:None "<html></html>" [ 1, 1, S (start_element "html"); 1, 7, S (start_element "head"); 1, 7, S `End_element; 1, 7, S (start_element "body"); 1, 14, S `End_element; 1, 14, S `End_element]); ("html.parser.foreign-context" >:: fun _ -> expect ~context:None "<g/>" [ 1, 1, S (`Start_element ((svg_ns, "g"), [])); 1, 1, S `End_element]); ("html.parser.context-disambiguation" >:: fun _ -> expect ~context:(Some (`Fragment "svg")) "<a></a>" [ 1, 1, S (`Start_element ((svg_ns, "a"), [])); 1, 4, S `End_element]); ("html.parser.context-case-insensitivity" >:: fun _ -> expect ~context:(Some (`Fragment "SVG")) "<a></a>" [ 1, 1, S (`Start_element ((svg_ns, "a"), [])); 1, 4, S `End_element]); ("html.parser.bad-self-closing-tag" >:: fun _ -> expect "<p/>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, E (`Bad_token ("/>", "tag", "should not be self-closing")); 1, 1, S (start_element "p"); 1, 5, S `End_element; 1, 5, S `End_element; 1, 5, S `End_element]); ("html.parser.image-tag" >:: fun _ -> expect ~context:(Some (`Fragment "body")) "<image/>" [ 1, 1, E (`Bad_token ("image", "tag", "should be 'img'")); 1, 1, S (start_element "img"); 1, 1, S `End_element]); ("html.parser.nulls" >:: fun _ -> expect ~context:(Some (`Fragment "svg")) "\x00foo" [ 1, 1, E (`Bad_token ("U+0000", "foreign content", "null")); 1, 1, S (`Text ["\xef\xbf\xbdfoo"])]; expect ~context:(Some (`Fragment "body")) "<table>\x00foo</table>" [ 1, 1, S (start_element "table"); 1, 8, E (`Bad_token ("U+0000", "table", "null")); 1, 9, E (`Bad_content "table"); 1, 10, E (`Bad_content "table"); 1, 11, E (`Bad_content "table"); 1, 9, S (`Text ["foo"]); 1, 12, S `End_element]; expect ~context:(Some (`Fragment "select")) "\x00foo" [ 1, 1, E (`Bad_token ("U+0000", "select", "null")); 1, 2, S (`Text ["foo"])]); ("html.parser.foreign.cdata" >:: fun _ -> expect ~context:None "<svg><![CDATA[foo]]></svg>" [ 1, 1, S (`Start_element ((svg_ns, "svg"), [])); 1, 15, S (`Text ["foo"]); 1, 21, S `End_element]); ("html.parser.large-text" >:: fun _ -> with_text_limit 8 begin fun () -> expect ~context:None "foobar" [ 1, 1, S (`Text ["foobar"])]; expect ~context:None "foobarbaz" [ 1, 1, S (`Text ["foobarba"; "z"])] end); ("html.parser.adoption-agency.simple" >:: fun _ -> expect ~context:None "foo<b>bar</b>baz" [ 1, 1, S (`Text ["foo"]); 1, 4, S (start_element "b"); 1, 7, S (`Text ["bar"]); 1, 10, S `End_element; 1, 14, S (`Text ["baz"])]); ("html.parser.adoption-agency.stray" >:: fun _ -> expect ~context:None "foo</b>bar" [ 1, 4, E (`Unmatched_end_tag "b"); 1, 1, S (`Text ["foo"; "bar"])]); ("html.parser.adoption-agency.nested" >:: fun _ -> expect ~context:None "foo<b>bar<em>baz</em>quux</b>lulz" [ 1, 1, S (`Text ["foo"]); 1, 4, S (start_element "b"); 1, 7, S (`Text ["bar"]); 1, 10, S (start_element "em"); 1, 14, S (`Text ["baz"]); 1, 17, S `End_element; 1, 22, S (`Text ["quux"]); 1, 26, S `End_element; 1, 30, S (`Text ["lulz"])]); ("html.parser.adoption-agency.nested.stray" >:: fun _ -> expect ~context:None "foo<b>bar</em>baz</b>quux" [ 1, 10, E (`Unmatched_end_tag "em"); 1, 1, S (`Text ["foo"]); 1, 4, S (start_element "b"); 1, 7, S (`Text ["bar"; "baz"]); 1, 18, S `End_element; 1, 22, S (`Text ["quux"])]); ("html.parser.adoption-agency.interleaved" >:: fun _ -> expect ~context:None "foo<b>bar<em>baz</b>quux</em>" [ 1, 17, E (`Unmatched_end_tag "b"); 1, 1, S (`Text ["foo"]); 1, 4, S (start_element "b"); 1, 7, S (`Text ["bar"]); 1, 10, S (start_element "em"); 1, 14, S (`Text ["baz"]); 1, 17, S `End_element; 1, 17, S `End_element; 1, 10, S (start_element "em"); 1, 21, S (`Text ["quux"]); 1, 25, S `End_element]); ("html.parser.adoption-agency.block" >:: fun _ -> expect ~context:None "foo<b>bar<p>baz</b>quux" [ 1, 16, E (`Unmatched_end_tag "b"); 1, 1, S (`Text ["foo"]); 1, 4, S (start_element "b"); 1, 7, S (`Text ["bar"]); 1, 16, S `End_element; 1, 10, S (start_element "p"); 1, 4, S (start_element "b"); 1, 13, S (`Text ["baz"]); 1, 16, S `End_element; 1, 20, S (`Text ["quux"]); 1, 24, S `End_element]); ("html.parser.adoption-agency.block.nested" >:: fun _ -> expect ~context:None "foo<b>bar<em>baz<strong>quux<p>blah</b>lulz" [ 1, 36, E (`Unmatched_end_tag "b"); 1, 17, E (`Unmatched_start_tag "strong"); 1, 10, E (`Unmatched_start_tag "em"); 1, 1, S (`Text ["foo"]); 1, 4, S (start_element "b"); 1, 7, S (`Text ["bar"]); 1, 10, S (start_element "em"); 1, 14, S (`Text ["baz"]); 1, 17, S (start_element "strong"); 1, 25, S (`Text ["quux"]); 1, 36, S `End_element; 1, 36, S `End_element; 1, 36, S `End_element; 1, 10, S (start_element "em"); 1, 17, S (start_element "strong"); 1, 29, S (start_element "p"); 1, 4, S (start_element "b"); 1, 32, S (`Text ["blah"]); 1, 36, S `End_element; 1, 40, S (`Text ["lulz"]); 1, 44, S `End_element; 1, 44, S `End_element; 1, 44, S `End_element]); ("html.parser.adoption-agency.reconstructed" >:: fun _ -> expect ~context:None "<p><b>foo<p>bar<em>baz</b>quux" [ 1, 1, S (start_element "p"); 1, 4, E (`Unmatched_start_tag "b"); 1, 4, S (start_element "b"); 1, 7, S (`Text ["foo"]); 1, 10, S `End_element; 1, 10, S `End_element; 1, 10, S (start_element "p"); 1, 23, E (`Unmatched_end_tag "b"); 1, 16, E (`Unmatched_start_tag "em"); 1, 4, S (start_element "b"); 1, 13, S (`Text ["bar"]); 1, 16, S (start_element "em"); 1, 20, S (`Text ["baz"]); 1, 23, S `End_element; 1, 23, S `End_element; 1, 16, S (start_element "em"); 1, 27, S (`Text ["quux"]); 1, 31, S `End_element; 1, 31, S `End_element]); ("html.parser.noscript" >:: fun _ -> expect ~context:None "<head><noscript><meta></noscript></head>" [ 1, 1, S (start_element "head"); 1, 7, S (start_element "noscript"); 1, 17, S (start_element "meta"); 1, 17, S `End_element; 1, 23, S `End_element; 1, 34, S `End_element]); ("html.parser.noscript.bad" >:: fun _ -> expect ~context:None "<head><noscript><!DOCTYPE html><html><head><noscript></head></noscript>" [ 1, 1, S (start_element "head"); 1, 7, S (start_element "noscript"); 1, 17, E (`Bad_document "doctype should be first"); 1, 32, E (`Misnested_tag ("html", "noscript", [])); 1, 38, E (`Misnested_tag ("head", "noscript", [])); 1, 44, E (`Misnested_tag ("noscript", "noscript", [])); 1, 54, E (`Unmatched_end_tag "head"); 1, 61, S `End_element; 1, 72, S `End_element]); ("html.parser.noscript.head.content" >:: fun _ -> expect ~context:(Some (`Fragment "head")) "<noscript> \t\n<!--foo--> foo</noscript>" [ 1, 1, S (start_element "noscript"); 1, 11, S (`Text [" \t\n"]); 2, 1, S (`Comment "foo"); 2, 12, E (`Bad_content "noscript"); 2, 11, S (`Text [" "]); 2, 12, S `End_element; 2, 12, S (start_element "body"); 2, 15, E (`Unmatched_end_tag "noscript"); 2, 12, S (`Text ["foo"]); 2, 26, S `End_element]); ("html.parser.noscript.inferred.content" >:: fun _ -> expect ~context:None "<noscript> \t\n<!--foo--> foo</noscript>" [ 1, 1, S (start_element "noscript"); 1, 11, S (`Text [" \t\n"]); 2, 1, S (`Comment "foo"); 2, 11, S (`Text [" foo"]); 2, 15, S `End_element]); ("html.parser.noscript-script.inferred.content" >:: fun _ -> expect ~context:None "<script>foo</script><noscript>bar</noscript>" [ 1, 1, S (start_element "script"); 1, 9, S (`Text ["foo"]); 1, 12, S `End_element; 1, 21, S (start_element "noscript"); 1, 31, S (`Text ["bar"]); 1, 34, S `End_element]); ("html.parser.head.fragment" >:: fun _ -> expect ~context:None "<base>" [ 1, 1, S (start_element "base"); 1, 1, S `End_element]; expect ~context:None "<basefont>" [ 1, 1, S (start_element "basefont"); 1, 1, S `End_element]; expect ~context:None "<bgsound>" [ 1, 1, S (start_element "bgsound"); 1, 1, S `End_element]; expect ~context:None "<link>" [ 1, 1, S (start_element "link"); 1, 1, S `End_element]; expect ~context:None "<meta>" [ 1, 1, S (start_element "meta"); 1, 1, S `End_element]; expect ~context:None "<noframes></noframes>" [ 1, 1, S (start_element "noframes"); 1, 11, S `End_element]; expect ~context:None "<style></style>" [ 1, 1, S (start_element "style"); 1, 8, S `End_element]); ("html.parser.body.fragment" >:: fun _ -> expect ~context:None "<body></body>" [ 1, 1, S (start_element "body"); 1, 14, S `End_element]); ("html.parser.body.content-truncated" >:: fun _ -> expect ~context:None "<p></body></html>foo" [ 1, 1, S (start_element "p"); 1, 4, E (`Unmatched_end_tag "body"); 1, 11, E (`Unmatched_end_tag "html"); 1, 18, S (`Text ["foo"]); 1, 21, S `End_element]); ("html.parser.nested-button" >:: fun _ -> expect ~context:None "<button><button>submit</button></button>" [ 1, 1, S (start_element "button"); 1, 9, E (`Misnested_tag ("button", "button", [])); 1, 9, S `End_element; 1, 9, S (start_element "button"); 1, 17, S (`Text ["submit"]); 1, 23, S `End_element; 1, 32, E (`Unmatched_end_tag "button")]); ("html.parser.nested-list" >:: fun _ -> expect ~context:None "<ul><li><ul></li></ul></li></ul>" [ 1, 1, S (start_element "ul"); 1, 5, S (start_element "li"); 1, 9, S (start_element "ul"); 1, 13, E (`Unmatched_end_tag "li"); 1, 18, S `End_element; 1, 23, S `End_element; 1, 28, S `End_element]); ("html.parser.definitions" >:: fun _ -> expect ~context:None "</dd><dd></dd>" [ 1, 1, E (`Unmatched_end_tag "dd"); 1, 6, S (start_element "dd"); 1, 10, S `End_element]); ("html.parser.nested-achor" >:: fun _ -> expect ~context:None "<a><a></a></a>" [ 1, 4, E (`Misnested_tag ("a", "a", [])); 1, 11, E (`Unmatched_end_tag "a"); 1, 1, S (start_element "a"); 1, 4, S `End_element; 1, 4, S (start_element "a"); 1, 7, S `End_element]); ("html.parser.nested-anchor.reconstruct" >:: fun _ -> expect ~context:None "<p><a>foo<a>bar<p>baz" [ 1, 1, S (start_element "p"); 1, 10, E (`Misnested_tag ("a", "a", [])); 1, 10, E (`Unmatched_start_tag "a"); 1, 4, S (start_element "a"); 1, 7, S (`Text ["foo"]); 1, 10, S `End_element; 1, 10, S (start_element "a"); 1, 13, S (`Text ["bar"]); 1, 16, S `End_element; 1, 16, S `End_element; 1, 16, S (start_element "p"); 1, 10, E (`Unmatched_start_tag "a"); 1, 10, S (start_element "a"); 1, 19, S (`Text ["baz"]); 1, 22, S `End_element; 1, 22, S `End_element]); ("html.parser.nested-nobr" >:: fun _ -> expect ~context:None "foo<nobr>bar<nobr>baz</nobr>quux</nobr>blah" [ 1, 13, E (`Misnested_tag ("nobr", "nobr", [])); 1, 33, E (`Unmatched_end_tag "nobr"); 1, 1, S (`Text ["foo"]); 1, 4, S (start_element "nobr"); 1, 10, S (`Text ["bar"]); 1, 13, S `End_element; 1, 13, S (start_element "nobr"); 1, 19, S (`Text ["baz"]); 1, 22, S `End_element; 1, 29, S (`Text ["quux"; "blah"])]); ("html.parser.end-br" >:: fun _ -> expect ~context:None "<br></br>" [ 1, 1, S (start_element "br"); 1, 1, S `End_element; 1, 5, E (`Unmatched_end_tag "br"); 1, 5, S (start_element "br"); 1, 5, S `End_element]); ("html.parser.hr" >:: fun _ -> expect ~context:None "<p><hr>" [ 1, 1, S (start_element "p"); 1, 4, S `End_element; 1, 4, S (start_element "hr"); 1, 4, S `End_element]); ("html.parser.input" >:: fun _ -> expect ~context:None "<input type='text'>" [ 1, 1, S (`Start_element ((html_ns, "input"), [("", "type"), "text"])); 1, 1, S `End_element]); ("html.parser.iframe" >:: fun _ -> expect ~context:None "<iframe><p>foo&amp;</p></iframe>" [ 1, 1, S (start_element "iframe"); 1, 9, S (`Text ["<p>foo&amp;</p>"]); 1, 24, S `End_element]); ("html.parser.noembed" >:: fun _ -> expect ~context:None "<noembed><p>foo&amp;</p></noembed>" [ 1, 1, S (start_element "noembed"); 1, 10, S (`Text ["<p>foo&amp;</p>"]); 1, 25, S `End_element]); ("html.parser.generic-tag" >:: fun _ -> expect ~context:None "<foo></foo>" [ 1, 1, S (start_element "foo"); 1, 6, S `End_element]); ("html.parser.option.body" >:: fun _ -> expect ~context:(Some (`Fragment "body")) "<option><optgroup></optgroup>" [ 1, 1, S (start_element "option"); 1, 9, S `End_element; 1, 9, S (start_element "optgroup"); 1, 19, S `End_element]); ("html.parser.table-content-in-body" >:: fun _ -> expect ~context:(Some (`Fragment "body")) "<caption><col><colgroup><tbody><td><tfoot><th><thead><tr>" [ 1, 1, E (`Misnested_tag ("caption", "body", [])); 1, 10, E (`Misnested_tag ("col", "body", [])); 1, 15, E (`Misnested_tag ("colgroup", "body", [])); 1, 25, E (`Misnested_tag ("tbody", "body", [])); 1, 32, E (`Misnested_tag ("td", "body", [])); 1, 36, E (`Misnested_tag ("tfoot", "body", [])); 1, 43, E (`Misnested_tag ("th", "body", [])); 1, 47, E (`Misnested_tag ("thead", "body", [])); 1, 54, E (`Misnested_tag ("tr", "body", []))]); ("html.parser.caption" >:: fun _ -> expect ~context:None "<table><caption>foo<p>bar</caption></table>" [ 1, 1, S (start_element "table"); 1, 8, S (start_element "caption"); 1, 17, S (`Text ["foo"]); 1, 20, S (start_element "p"); 1, 23, S (`Text ["bar"]); 1, 26, S `End_element; 1, 26, S `End_element; 1, 36, S `End_element]); ("html.parser.colgroup" >:: fun _ -> expect ~context:None "<table><colgroup><col></colgroup></table>" [ 1, 1, S (start_element "table"); 1, 8, S (start_element "colgroup"); 1, 18, S (start_element "col"); 1, 18, S `End_element; 1, 23, S `End_element; 1, 34, S `End_element]); ("html.parser.colgroup.implicit" >:: fun _ -> expect ~context:None "<table><col></table>" [ 1, 1, S (start_element "table"); 1, 8, S (start_element "colgroup"); 1, 8, S (start_element "col"); 1, 8, S `End_element; 1, 13, S `End_element; 1, 13, S `End_element]); ("html.parser.td.direct" >:: fun _ -> expect ~context:None "<table><td></td></table>" [ 1, 1, S (start_element "table"); 1, 8, S (start_element "tbody"); 1, 8, E (`Misnested_tag ("td", "table", [])); 1, 8, S (start_element "tr"); 1, 8, S (start_element "td"); 1, 12, S `End_element; 1, 17, S `End_element; 1, 17, S `End_element; 1, 17, S `End_element]); ("html.parser.tbody" >:: fun _ -> expect ~context:None "<table><tbody></tbody></table>" [ 1, 1, S (start_element "table"); 1, 8, S (start_element "tbody"); 1, 15, S `End_element; 1, 23, S `End_element]); ("html.parser.nested-table" >:: fun _ -> expect ~context:None "<table><table></table>" [ 1, 1, S (start_element "table"); 1, 8, E (`Misnested_tag ("table", "table", [])); 1, 8, S `End_element; 1, 8, S (start_element "table"); 1, 15, S `End_element]); ("html.parser.nested-caption" >:: fun _ -> expect ~context:None "<table><caption><caption></caption></table>" [ 1, 1, S (start_element "table"); 1, 8, S (start_element "caption"); 1, 17, E (`Misnested_tag ("caption", "caption", [])); 1, 17, S `End_element; 1, 17, S (start_element "caption"); 1, 26, S `End_element; 1, 36, S `End_element]); ("html.parser.truncated-caption" >:: fun _ -> expect ~context:None "<table><caption></table>" [ 1, 1, S (start_element "table"); 1, 8, S (start_element "caption"); 1, 17, E (`Unmatched_end_tag "table"); 1, 17, S `End_element; 1, 17, S `End_element]); ("html.parser.nested-tbody" >:: fun _ -> expect ~context:None "<tbody><tbody></tbody>" [ 1, 1, S (start_element "tbody"); 1, 8, S `End_element; 1, 8, S (start_element "tbody"); 1, 15, S `End_element]); ("html.parser.option" >:: fun _ -> expect ~context:None "<option></option>" [ 1, 1, S (start_element "option"); 1, 9, S `End_element]); ("html.parser.optgroup" >:: fun _ -> expect ~context:None "<select><optgroup><option><optgroup><option></optgroup></select>" [ 1, 1, S (start_element "select"); 1, 9, S (start_element "optgroup"); 1, 19, S (start_element "option"); 1, 27, S `End_element; 1, 27, S `End_element; 1, 27, S (start_element "optgroup"); 1, 37, S (start_element "option"); 1, 45, S `End_element; 1, 45, S `End_element; 1, 56, S `End_element]); ("html.parser.form" >:: fun _ -> expect ~context:None "<form></form>" [ 1, 1, S (start_element "form"); 1, 7, S `End_element]); ("html.parser.form.nested" >:: fun _ -> expect ~context:(Some (`Fragment "body")) "<form><form></form>" [ 1, 1, S (start_element "form"); 1, 7, E (`Misnested_tag ("form", "form", [])); 1, 13, S `End_element] ); ("html.parser.form.unopened" >:: fun _ -> expect ~context:(Some (`Fragment "body")) "</form>" [ 1, 1, E (`Unmatched_end_tag "form")]); ("html.parser.noframes" >:: fun _ -> expect ~context:None "<noframes>foo&amp;bar</a></noframes>" [ 1, 1, S (start_element "noframes"); 1, 11, S (`Text ["foo&amp;bar</a>"]); 1, 26, S `End_element]); ("html.parser.frameset" >:: fun _ -> expect "<frameset><frame></frameset>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "frameset"); 1, 11, S (start_element "frame"); 1, 11, S `End_element; 1, 18, S `End_element; 1, 29, S `End_element]); ("html.parser.frameset.fragment" >:: fun _ -> expect ~context:None "<frameset></frameset>" [ 1, 1, S (start_element "frameset"); 1, 11, S `End_element]; expect ~context:None "<frame>" [ 1, 1, S (start_element "frame"); 1, 1, S `End_element]); ("html.parser.frameset.content" >:: fun _ -> expect ~context:None ("<frameset> \t\n<!--foo--><noframes></noframes><frameset></frameset>" ^ "</frameset>") [ 1, 1, S (start_element "frameset"); 1, 11, S (`Text [" \t\n"]); 2, 1, S (`Comment "foo"); 2, 11, S (start_element "noframes"); 2, 21, S `End_element; 2, 32, S (start_element "frameset"); 2, 42, S `End_element; 2, 53, S `End_element]); ("html.parser.frameset.bad" >:: fun _ -> expect ~context:None "<frameset><!DOCTYPE html><html>f" [ 1, 1, S (start_element "frameset"); 1, 11, E (`Bad_document "doctype should be first"); 1, 26, E (`Misnested_tag ("html", "frameset", [])); 1, 32, E (`Bad_content "frameset"); 1, 33, E (`Unexpected_eoi "frameset"); 1, 33, S `End_element]); ("html.parser.after-frameset.content" >:: fun _ -> expect ~context:None "<frameset></frameset> \t\n<!--foo--><noframes></noframes></html>" [ 1, 1, S (start_element "frameset"); 1, 11, S `End_element; 1, 22, S (`Text [" \t\n"]); 2, 1, S (`Comment "foo"); 2, 11, S (start_element "noframes"); 2, 21, S `End_element]); ("html.parser.after-frameset.bad" >:: fun _ -> expect ~context:None "<frameset></frameset><!DOCTYPE html><html>f" [ 1, 1, S (start_element "frameset"); 1, 11, S `End_element; 1, 22, E (`Bad_document "doctype should be first"); 1, 37, E (`Misnested_tag ("html", "html", [])); 1, 43, E (`Bad_content "html")]); ("html.parser.frameset-in-body" >:: fun _ -> expect ~context:(Some (`Fragment "body")) "<frameset><p>" [ 1, 1, E (`Misnested_tag ("frameset", "body", [])); 1, 11, S (start_element "p"); 1, 14, S `End_element]; expect ~context:None "<body><p><frameset><p></body>" [ 1, 1, S (start_element "body"); 1, 7, S (start_element "p"); 1, 10, E (`Misnested_tag ("frameset", "body", [])); 1, 20, S `End_element; 1, 20, S (start_element "p"); 1, 30, S `End_element; 1, 30, S `End_element]; expect ~context:None "<p><frameset><p>" [ 1, 1, S (start_element "p"); 1, 4, E (`Misnested_tag ("frameset", "body", [])); 1, 14, S `End_element; 1, 14, S (start_element "p"); 1, 17, S `End_element]; expect "<p><frameset><frame></frameset>" [ 1, 1, S (start_element "html"); 1, 1, S (start_element "head"); 1, 1, S `End_element; 1, 1, S (start_element "body"); 1, 1, S (start_element "p"); 1, 4, E (`Misnested_tag ("frameset", "body", [])); 1, 4, S `End_element; 1, 4, S `End_element; 1, 4, S (start_element "frameset"); 1, 14, S (start_element "frame"); 1, 14, S `End_element; 1, 21, S `End_element; 1, 32, S `End_element]) ] ������������������������������������markup.ml-1.0.3/test/test_html_tokenizer.ml���������������������������������������������������������0000664�0000000�0000000�00000112512�14213577064�0021042�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support open Markup__Common module Error = Markup__Error module Kstream = Markup__Kstream let doctype ?name ?public_identifier ?system_identifier ?(force_quirks = false) () = {doctype_name = name; public_identifier; system_identifier; raw_text = None; force_quirks} let tag ?(self_closing = false) name attributes = {Token_tag.name; attributes; self_closing} let expect ?state ?(foreign = false) text signals = let report, iterate, ended = expect_signals token_to_string text signals in let stream, set_state, set_foreign = text |> Markup__Stream_io.string |> Markup__Encoding.utf_8 |> Markup__Input.preprocess is_valid_html_char Error.ignore_errors |> Markup__Html_tokenizer.tokenize report in set_foreign (fun () -> foreign); let stream = match state with | None -> stream | Some state -> let open Kstream in let switched = ref false in (fun throw e k -> next stream throw e (fun t -> if not !switched then begin switched := true; set_state state; end; k t)) |> make in iter iterate stream; ended () let char_sequence ?(start = 1) ?(no_eof = false) s = let rec assemble acc index = if index >= String.length s then let acc = if no_eof then acc else (1, index + start, S `EOF)::acc in List.rev acc else assemble ((1, index + start, S (`Char (Char.code s.[index])))::acc) (index + 1) in assemble [] 0 let tests = [ ("html.tokenizer.empty" >:: fun _ -> expect "" [ 1, 1, S `EOF]); ("html.tokenizer.text" >:: fun _ -> expect "foo" (char_sequence "foo"); expect "f\x00oo" ([ 1, 1, S (`Char 0x66); 1, 2, E (`Bad_token ("U+0000", "content", "null")); 1, 2, S (`Char 0x00)] @ (char_sequence ~start:3 "oo"))); ("html.tokenizer.reference" >:: fun _ -> expect "&lt;&nbsp;&#48;&#x31;&#X32;&acE;" [ 1, 1, S (`Char 0x3C); 1, 5, S (`Char 0xA0); 1, 11, S (`Char 0x30); 1, 16, S (`Char 0x31); 1, 22, S (`Char 0x32); 1, 28, S (`Char 0x223E); 1, 28, S (`Char 0x0333); 1, 33, S `EOF]; expect "&\t" (char_sequence "&\t"); expect "&\n" [ 1, 1, S (`Char 0x26); 1, 2, S (`Char 0x0A); 2, 1, S `EOF]; expect "& " (char_sequence "& "); expect "&<" [ 1, 1, S (`Char 0x26); 1, 3, E (`Unexpected_eoi "tag"); 1, 2, S (`Char 0x3C); 1, 3, S `EOF]; expect "&&" (char_sequence "&&"); expect "&" (char_sequence "&")); ("html.tokenizer.bad-numeric-reference" >:: fun _ -> let reference = "character reference" in expect "&#z" ([ 1, 1, E (`Bad_token ("&#", reference, "expected digits"))] @ (char_sequence "&#z")); expect "&#xz" ([ 1, 1, E (`Bad_token ("&#x", reference, "expected digits"))] @ (char_sequence "&#xz")); expect "&#Xz" ([ 1, 1, E (`Bad_token ("&#X", reference, "expected digits"))] @ (char_sequence "&#Xz")); expect "&#48z" [ 1, 1, E (`Bad_token ("&#48", reference, "missing ';' at end")); 1, 1, S (`Char 0x30); 1, 5, S (`Char 0x7A); 1, 6, S `EOF]; expect "&#x30z" [ 1, 1, E (`Bad_token ("&#x30", reference, "missing ';' at end")); 1, 1, S (`Char 0x30); 1, 6, S (`Char 0x7A); 1, 7, S `EOF]; expect "&#X30z" [ 1, 1, E (`Bad_token ("&#X30", reference, "missing ';' at end")); 1, 1, S (`Char 0x30); 1, 6, S (`Char 0x7A); 1, 7, S `EOF]; expect "&#1000000000000000000000000000000;" [ 1, 1, E (`Bad_token ("&#1000000000000000000000000000000;", reference, "out of range")); 1, 1, S (`Char u_rep); 1, 35, S `EOF]; expect "&#1000000000000000000000000000000" [ 1, 1, E (`Bad_token ("&#1000000000000000000000000000000", reference, "missing ';' at end")); 1, 1, E (`Bad_token ("&#1000000000000000000000000000000", reference, "out of range")); 1, 1, S (`Char u_rep); 1, 34, S `EOF]; expect "&#xD800;" [ 1, 1, E (`Bad_token ("&#xD800;", reference, "out of range")); 1, 1, S (`Char u_rep); 1, 9, S `EOF]; expect "&#x110000;" [ 1, 1, E (`Bad_token ("&#x110000;", reference, "out of range")); 1, 1, S (`Char u_rep); 1, 11, S `EOF]; expect "&#0;" [ 1, 1, E (`Bad_token ("&#0;", reference, "out of range")); 1, 1, S (`Char u_rep); 1, 5, S `EOF]; expect "&#x01;" [ 1, 1, E (`Bad_token ("&#x01;", reference, "invalid HTML character")); 1, 1, S (`Char 0x01); 1, 7, S `EOF]); ("html.tokenizer.windows-1252-reference" >:: fun _ -> let sequence = let rec generate acc position = function | [] -> List.rev ((1, position, S `EOF)::acc) | (reference, translation)::rest -> let error = 1, position, E (`Bad_token (Printf.sprintf "&#x%02X;" reference, "character reference", "Windows-1252 character")) in let character = 1, position, S (`Char translation) in generate (character::error::acc) (position + 6) rest in generate [] 1 [0x80, 0x20AC; 0x82, 0x201A; 0x83, 0x0192; 0x84, 0x201E; 0x85, 0x2026; 0x86, 0x2020; 0x87, 0x2021; 0x88, 0x02C6; 0x89, 0x2030; 0x8A, 0x0160; 0x8B, 0x2039; 0x8C, 0x0152; 0x8E, 0x017D; 0x91, 0x2018; 0x92, 0x2019; 0x93, 0x201C; 0x94, 0x201D; 0x95, 0x2022; 0x96, 0x2013; 0x97, 0x2014; 0x98, 0x02DC; 0x99, 0x2122; 0x9A, 0x0161; 0x9B, 0x203A; 0x9C, 0x0153; 0x9E, 0x017E; 0x9F, 0x0178] in expect ("&#x80;&#x82;&#x83;&#x84;&#x85;&#x86;&#x87;&#x88;&#x89;&#x8A;" ^ "&#x8B;&#x8C;&#x8E;&#x91;&#x92;&#x93;&#x94;&#x95;&#x96;&#x97;" ^ "&#x98;&#x99;&#x9A;&#x9B;&#x9C;&#x9E;&#x9F;") sequence); ("html.tokenizer.bad-entity-reference" >:: fun _ -> let reference = "entity reference" in expect "&unknown" (char_sequence "&unknown"); expect "&unknown;" ([ 1, 1, E (`Bad_token ("&unknown;", reference, "no such entity"))] @ (char_sequence "&unknown;")); expect "&NBSP" (char_sequence "&NBSP"); expect "&nbsp" ([ 1, 1, E (`Bad_token ("&nbsp", reference, "missing ';' at end")); 1, 1, S (`Char 0xA0); 1, 6, S `EOF]); expect "&ltz" ([ 1, 1, E (`Bad_token ("&lt", reference, "missing ';' at end")); 1, 1, S (`Char 0x3C); 1, 4, S (`Char 0x7A); 1, 5, S `EOF]); expect "&ltz;" ([ 1, 1, E (`Bad_token ("&lt", reference, "missing ';' at end")); 1, 1, S (`Char 0x3C); 1, 4, S (`Char 0x7A); 1, 5, S (`Char 0x3B); 1, 6, S `EOF]); expect "&a" (char_sequence "&a"); expect "&\xc2\xa0" (char_sequence "&\xa0")); ("html.tokenizer.rcdata" >:: fun _ -> expect ~state:`RCDATA "f&lt;" [ 1, 1, S (`Char 0x66); 1, 2, S (`Char 0x3C); 1, 6, S `EOF]; expect ~state:`RCDATA "fo<</</FoO>" (char_sequence "fo<</</FoO>"); expect ~state:`RCDATA "<title>foo</bar>&lt;</titlE><a>" ([ 1, 1, S (`Start (tag "title" []))] @ (char_sequence ~start:8 ~no_eof:true "foo</bar><") @ [ 1, 21, S (`End (tag "title" [])); 1, 29, S (`Start (tag "a" [])); 1, 32, S `EOF]); expect ~state:`RCDATA "f\x00</foo>" ([ 1, 1, S (`Char 0x66); 1, 2, E (`Bad_token ("U+0000", "content", "null")); 1, 2, S (`Char u_rep)] @ (char_sequence ~start:3 "</foo>")); expect ~state:`RCDATA "<title>f</title >" [ 1, 1, S (`Start (tag "title" [])); 1, 8, S (`Char 0x66); 1, 9, S (`End (tag "title" [])); 1, 18, S `EOF]; expect ~state:`RCDATA "<title>f</title foo='bar'>" [ 1, 1, S (`Start (tag "title" [])); 1, 8, S (`Char 0x66); 1, 9, E (`Bad_token ("foo", "tag", "end tag with attributes")); 1, 9, S (`End (tag "title" ["foo", "bar"])); 1, 27, S `EOF]; expect ~state:`RCDATA "<title>f</title/>" [ 1, 1, S (`Start (tag "title" [])); 1, 8, S (`Char 0x66); 1, 9, E (`Bad_token ("/>", "tag", "end tag cannot be self-closing")); 1, 9, S (`End (tag ~self_closing:true "title" [])); 1, 18, S `EOF]); ("html.tokenizer.rawtext" >:: fun _ -> expect ~state:`RAWTEXT "f&lt;" (char_sequence "f&lt;"); expect ~state:`RAWTEXT "f<</</FoO>" (char_sequence "f<</</FoO>"); expect ~state:`RAWTEXT "<style>foo</bar>&lt;</style><a>" ([ 1, 1, S (`Start (tag "style" []))] @ (char_sequence ~start:8 ~no_eof:true "foo</bar>&lt;") @ [ 1, 21, S (`End (tag "style" [])); 1, 29, S (`Start (tag "a" [])); 1, 32, S `EOF]); expect ~state:`RAWTEXT "f\x00</foo>" ([ 1, 1, S (`Char 0x66); 1, 2, E (`Bad_token ("U+0000", "content", "null")); 1, 2, S (`Char u_rep)] @ (char_sequence ~start:3 "</foo>"))); ("html.tokenizer.script-data" >:: fun _ -> expect ~state:`Script_data "f<</</FoO>" (char_sequence "f<</</FoO>"); expect ~state:`Script_data "f<!a" (char_sequence "f<!a"); expect ~state:`Script_data "f<!-a" (char_sequence "f<!-a"); expect ~state:`Script_data "f<!-->" (char_sequence "f<!-->"); expect ~state:`Script_data "f<!--->" (char_sequence "f<!--->"); expect ~state:`Script_data "f<!--a-->" (char_sequence "f<!--a-->"); expect ~state:`Script_data "f<!--<a-->" (char_sequence "f<!--<a-->"); expect ~state:`Script_data "<script><!--a</script><a>" ([ 1, 1, S (`Start (tag "script" []))] @ (char_sequence ~start:9 ~no_eof:true "<!--a") @ [ 1, 14, S (`End (tag "script" [])); 1, 23, S (`Start (tag "a" [])); 1, 26, S `EOF]); expect ~state:`Script_data "f<!--o\x00o" ((char_sequence ~no_eof:true "f<!--o") @ [1, 7, E (`Bad_token ("U+0000", "script", "null")); 1, 7, S (`Char u_rep); 1, 8, S (`Char 0x6F); 1, 9, E (`Unexpected_eoi "script"); 1, 9, S `EOF]); expect ~state:`Script_data "f<!--a-a-->" (char_sequence "f<!--a-a-->"); expect ~state:`Script_data "f<!--a-<a-->" (char_sequence "f<!--a-<a-->"); expect ~state:`Script_data "f<!--a-<scRipt-->" (char_sequence "f<!--a-<scRipt-->"); expect ~state:`Script_data "f<!--a-<scRipt>-->" (char_sequence "f<!--a-<scRipt>-->"); expect ~state:`Script_data "f<!--a-<scRipt>a</scripT>-->" (char_sequence "f<!--a-<scRipt>a</scripT>-->"); expect ~state:`Script_data "f<!--a-<script>-a-</script>-->" (char_sequence "f<!--a-<script>-a-</script>-->"); expect ~state:`Script_data "f<!--a-<script>--a---<--</script>-->" (char_sequence "f<!--a-<script>--a---<--</script>-->"); expect ~state:`Script_data "f<!--a-<script>a</a></0-->" (char_sequence "f<!--a-<script>a</a></0-->"); expect ~state:`Script_data "f<!--a-<a>a-->" (char_sequence "f<!--a-<a>a-->"); expect ~state:`Script_data "f<!--a-\x00-" ((char_sequence ~no_eof:true "f<!--a-") @ [ 1, 8, E (`Bad_token ("U+0000", "script", "null")); 1, 8, S (`Char u_rep); 1, 9, S (`Char 0x02D); 1, 10, E (`Unexpected_eoi "script"); 1, 10, S `EOF]); expect ~state:`Script_data "f<!--a--\x00--" ((char_sequence ~no_eof:true "f<!--a--") @ [ 1, 9, E (`Bad_token ("U+0000", "script", "null")); 1, 9, S (`Char u_rep); 1, 10, S (`Char 0x02D); 1, 11, S (`Char 0x02D); 1, 12, E (`Unexpected_eoi "script"); 1, 12, S `EOF]); expect ~state:`Script_data "f<!--<script>\x00" ((char_sequence ~no_eof:true "f<!--<script>") @ [ 1, 14, E (`Bad_token ("U+0000", "script", "null")); 1, 14, S (`Char u_rep); 1, 15, E (`Unexpected_eoi "script"); 1, 15, S `EOF]); expect ~state:`Script_data "f<!--<script>-\x00-" ((char_sequence ~no_eof:true "f<!--<script>-") @ [ 1, 15, E (`Bad_token ("U+0000", "script", "null")); 1, 15, S (`Char u_rep); 1, 16, S (`Char 0x2D); 1, 17, E (`Unexpected_eoi "script"); 1, 17, S `EOF]); expect ~state:`Script_data "f<!--<script>--\x00--" ((char_sequence ~no_eof:true "f<!--<script>--") @ [ 1, 16, E (`Bad_token ("U+0000", "script", "null")); 1, 16, S (`Char u_rep); 1, 17, S (`Char 0x2D); 1, 18, S (`Char 0x2D); 1, 19, E (`Unexpected_eoi "script"); 1, 19, S `EOF]); expect ~state:`Script_data "f<!--a< -->" (char_sequence "f<!--a< -->"); expect ~state:`Script_data "<script>foo</bar>&lt;</script><a>" ([ 1, 1, S (`Start (tag "script" []))] @ (char_sequence ~start:9 ~no_eof:true "foo</bar>&lt;") @ [ 1, 22, S (`End (tag "script" [])); 1, 31, S (`Start (tag "a" [])); 1, 34, S `EOF]); expect ~state:`Script_data "f\x00</foo>" ([ 1, 1, S (`Char 0x66); 1, 2, E (`Bad_token ("U+0000", "content", "null")); 1, 2, S (`Char u_rep)] @ (char_sequence ~start:3 "</foo>"))); ("html.tokenizer.plaintext" >:: fun _ -> expect ~state:`PLAINTEXT "<plaintext>foo&lt;</plaintext>" ([ 1, 1, S (`Start (tag "plaintext" []))] @ (char_sequence ~start:12 "foo&lt;</plaintext>")); expect ~state:`PLAINTEXT "f\x00</foo>" ([ 1, 1, S (`Char 0x66); 1, 2, E (`Bad_token ("U+0000", "content", "null")); 1, 2, S (`Char u_rep)] @ (char_sequence ~start:3 "</foo>"))); ("html.tokenizer.comment" >:: fun _ -> expect "<!--foo-bar-->" [ 1, 1, S (`Comment "foo-bar"); 1, 15, S `EOF]; expect "<!---->" [ 1, 1, S (`Comment ""); 1, 8, S `EOF]; expect "<!---a-->" [ 1, 1, S (`Comment "-a"); 1, 10, S `EOF]); ("html.tokenizer.bad-comment" >:: fun _ -> expect "<!foo>" [ 1, 1, E (`Bad_token ("<!", "comment", "should begin with '<!--'")); 1, 1, S (`Comment "foo"); 1, 7, S `EOF]; expect "<!--\x00foo-->" [ 1, 5, E (`Bad_token ("U+0000", "comment", "null")); 1, 1, S (`Comment "\xef\xbf\xbdfoo"); 1, 12, S `EOF]; expect "<!-->" [ 1, 1, E (`Bad_token ("<!-->", "comment", "'-->' overlaps '<!--'")); 1, 1, S (`Comment ""); 1, 6, S `EOF]; expect "<!--" [ 1, 5, E (`Unexpected_eoi "comment"); 1, 1, S (`Comment ""); 1, 5, S `EOF]; expect "<!--->" [ 1, 1, E (`Bad_token ("<!--->", "comment", "'-->' overlaps '<!--'")); 1, 1, S (`Comment ""); 1, 7, S `EOF]; expect "<!---\x00-->" [ 1, 6, E (`Bad_token ("U+0000", "comment", "null")); 1, 1, S (`Comment "-\xef\xbf\xbd"); 1, 10, S `EOF]; expect "<!---" [ 1, 6, E (`Unexpected_eoi "comment"); 1, 1, S (`Comment ""); 1, 6, S `EOF]; expect "<!--a\x00-->" [ 1, 6, E (`Bad_token ("U+0000", "comment", "null")); 1, 1, S (`Comment "a\xef\xbf\xbd"); 1, 10, S `EOF]; expect "<!--a" [ 1, 6, E (`Unexpected_eoi "comment"); 1, 1, S (`Comment "a"); 1, 6, S `EOF]; expect "<!--a-\x00-" [ 1, 7, E (`Bad_token ("U+0000", "comment", "null")); 1, 9, E (`Unexpected_eoi "comment"); 1, 1, S (`Comment "a-\xef\xbf\xbd"); 1, 9, S `EOF]; expect "<!--a--\x00-->" [ 1, 8, E (`Bad_token ("U+0000", "comment", "null")); 1, 1, S (`Comment "a--\xef\xbf\xbd"); 1, 12, S `EOF]; expect "<!--a--!>" [ 1, 8, E (`Bad_token ("--!", "comment", "'--' should be in '-->'")); 1, 1, S (`Comment "a"); 1, 10, S `EOF]; expect "<!--a--->" [ 1, 8, E (`Bad_token ("---", "comment", "'--' should be in '-->'")); 1, 1, S (`Comment "a-"); 1, 10, S `EOF]; expect "<!--a--" [ 1, 8, E (`Unexpected_eoi "comment"); 1, 1, S (`Comment "a"); 1, 8, S `EOF]; expect "<!--a--a-->" [ 1, 8, E (`Bad_token ("--a", "comment", "'--' should be in '-->'")); 1, 1, S (`Comment "a--a"); 1, 12, S `EOF]; expect "<!--a--!-->" [ 1, 8, E (`Bad_token ("--!", "comment", "'--' should be in '-->'")); 1, 1, S (`Comment "a--!"); 1, 12, S `EOF]; expect "<!--a--!\x00a-->" [ 1, 8, E (`Bad_token ("--!", "comment", "'--' should be in '-->'")); 1, 9, E (`Bad_token ("U+0000", "comment", "null")); 1, 1, S (`Comment "a--!\xef\xbf\xbda"); 1, 14, S `EOF]; expect "<!--a--!a-->" [ 1, 8, E (`Bad_token ("--!", "comment", "'--' should be in '-->'")); 1, 1, S (`Comment "a--!a"); 1, 13, S `EOF]; expect "<!--a--!" [ 1, 8, E (`Bad_token ("--!", "comment", "'--' should be in '-->'")); 1, 9, E (`Unexpected_eoi "comment"); 1, 1, S (`Comment "a"); 1, 9, S `EOF]); ("html.tokenizer.doctype" >:: fun _ -> expect "<!DOCTYPE html>" [ 1, 1, S (`Doctype (doctype ~name:"html" ())); 1, 16, S `EOF]; expect "<!DOCTYPE html>" [ 1, 1, S (`Doctype (doctype ~name:"html" ())); 1, 17, S `EOF]; expect "<!DOCTYPE hTmL>" [ 1, 1, S (`Doctype (doctype ~name:"html" ())); 1, 16, S `EOF]; expect "<!DOCTYPE html >" [ 1, 1, S (`Doctype (doctype ~name:"html" ())); 1, 18, S `EOF]; expect "<!DOCTYPE html PUBLIC 'foo'>" [ 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"foo" ())); 1, 29, S `EOF]; expect "<!DOCTYPE html SYSTEM 'bar'>" [ 1, 1, S (`Doctype (doctype ~name:"html" ~system_identifier:"bar" ())); 1, 29, S `EOF]; expect "<!DOCTYPE html PUBLIC 'foo' 'bar'>" [ 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"foo" ~system_identifier:"bar" ())); 1, 36, S `EOF]; expect "<!DOCTYPE html PuBlIc \"foo\" >" [ 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"foo" ())); 1, 31, S `EOF]; expect "<!DOCTYPE html sYsTeM \"bar\" >" [ 1, 1, S (`Doctype (doctype ~name:"html" ~system_identifier:"bar" ())); 1, 31, S `EOF]); ("html.tokenizer.bad-doctype" >:: fun _ -> expect "<!DOCTYPEhtml>" [ 1, 10, E (`Bad_token ("h", "doctype", "expected whitespace")); 1, 1, S (`Doctype (doctype ~name:"html" ())); 1, 15, S `EOF]; expect "<!DOCTYPE" [ 1, 10, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~force_quirks:true ())); 1, 10, S `EOF]; expect "<!DOCTYPE \x00html>" [ 1, 11, E (`Bad_token ("U+0000", "doctype", "null")); 1, 1, S (`Doctype (doctype ~name:"\xef\xbf\xbdhtml" ())); 1, 17, S `EOF]; expect "<!DOCTYPE >" [ 1, 11, E (`Bad_token (">", "doctype", "expected name")); 1, 1, S (`Doctype (doctype ~force_quirks:true ())); 1, 12, S `EOF]; expect "<!DOCTYPE " [ 1, 11, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~force_quirks:true ())); 1, 11, S `EOF]; expect "<!DOCTYPE html\x00>" [ 1, 15, E (`Bad_token ("U+0000", "doctype", "null")); 1, 1, S (`Doctype (doctype ~name:"html\xef\xbf\xbd" ())); 1, 17, S `EOF]; expect "<!DOCTYPE html" [ 1, 15, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 15, S `EOF]; expect "<!DOCTYPE html " [ 1, 16, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 16, S `EOF]; expect "<!DOCTYPE html P>f" [ 1, 16, E (`Bad_token ("P", "doctype", "expected 'PUBLIC' or 'SYSTEM'")); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 18, S (`Char 0x66); 1, 19, S `EOF]; expect "<!DOCTYPE html PUBLIC'foo'>" [ 1, 22, E (`Bad_token ("'", "doctype", "expected whitespace")); 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"foo" ())); 1, 28, S `EOF]; expect "<!DOCTYPE html PUBLIC\"foo\">" [ 1, 22, E (`Bad_token ("\"", "doctype", "expected whitespace")); 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"foo" ())); 1, 28, S `EOF]; expect "<!DOCTYPE html PUBLIC>" [ 1, 22, E (`Bad_token (">", "doctype", "expected public identifier")); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 23, S `EOF]; expect "<!DOCTYPE html PUBLIC" [ 1, 22, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 22, S `EOF]; expect "<!DOCTYPE html PUBLICfoo 'bar'>" [ 1, 22, E (`Bad_token ("f", "doctype", "expected whitespace")); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 32, S `EOF]; expect "<!DOCTYPE html PUBLIC >" [ 1, 23, E (`Bad_token (">", "doctype", "expected public identifier")); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 24, S `EOF]; expect "<!DOCTYPE html PUBLIC " [ 1, 23, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 23, S `EOF]; expect "<!DOCTYPE html PUBLIC foo>" [ 1, 23, E (`Bad_token ("f", "doctype", "public identifier must be quoted")); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 27, S `EOF]; expect "<!DOCTYPE html PUBLIC 'f\x00oo>f" [ 1, 25, E (`Bad_token ("U+0000", "doctype", "null")); 1, 28, E (`Bad_token (">", "doctype", "'>' in identifier")); 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"f\xef\xbf\xbdoo" ~force_quirks:true ())); 1, 29, S (`Char 0x66); 1, 30, S `EOF]; expect "<!DOCTYPE html PUBLIC 'foo" [ 1, 27, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"foo" ~force_quirks:true ())); 1, 27, S `EOF]; expect "<!DOCTYPE html PUBLIC 'foo''bar'>" [ 1, 28, E (`Bad_token ("'", "doctype", "expected whitespace")); 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"foo" ~system_identifier:"bar" ())); 1, 34, S `EOF]; expect "<!DOCTYPE html PUBLIC 'foo'" [ 1, 28, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"foo" ~force_quirks:true ())); 1, 28, S `EOF]; expect "<!DOCTYPE html PUBLIC 'foo'bar>" [ 1, 28, E (`Bad_token ("b", "doctype", "system identifier must be quoted")); 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"foo" ~force_quirks:true ())); 1, 32, S `EOF]; expect "<!DOCTYPE html PUBLIC 'foo' " [ 1, 29, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"foo" ~force_quirks:true ())); 1, 29, S `EOF]; expect "<!DOCTYPE html PUBLIC 'foo' bar>" [ 1, 29, E (`Bad_token ("b", "doctype", "system identifier must be quoted")); 1, 1, S (`Doctype (doctype ~name:"html" ~public_identifier:"foo" ~force_quirks:true ())); 1, 33, S `EOF]; expect "<!DOCTYPE html SYSTEM'foo'>" [ 1, 22, E (`Bad_token ("'", "doctype", "expected whitespace")); 1, 1, S (`Doctype (doctype ~name:"html" ~system_identifier:"foo" ())); 1, 28, S `EOF]; expect "<!DOCTYPE html SYSTEM\"foo\">" [ 1, 22, E (`Bad_token ("\"", "doctype", "expected whitespace")); 1, 1, S (`Doctype (doctype ~name:"html" ~system_identifier:"foo" ())); 1, 28, S `EOF]; expect "<!DOCTYPE html SYSTEM>" [ 1, 22, E (`Bad_token (">", "doctype", "expected system identifier")); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 23, S `EOF]; expect "<!DOCTYPE html SYSTEM" [ 1, 22, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 22, S `EOF]; expect "<!DOCTYPE html SYSTEMfoo 'bar'>" [ 1, 22, E (`Bad_token ("f", "doctype", "expected whitespace")); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 32, S `EOF]; expect "<!DOCTYPE html SYSTEM >" [ 1, 23, E (`Bad_token (">", "doctype", "expected system identifier")); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 24, S `EOF]; expect "<!DOCTYPE html SYSTEM " [ 1, 23, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 23, S `EOF]; expect "<!DOCTYPE html SYSTEM foo>" [ 1, 23, E (`Bad_token ("f", "doctype", "system identifier must be quoted")); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 27, S `EOF]; expect "<!DOCTYPE html SYSTEM 'foo'" [ 1, 28, E (`Unexpected_eoi "doctype"); 1, 1, S (`Doctype (doctype ~name:"html" ~system_identifier:"foo" ~force_quirks:true ())); 1, 28, S `EOF]; expect "<!DOCTYPE html SYSTEM 'foo' and stuff>" [ 1, 29, E (`Bad_token ("a", "doctype", "junk after system identifier")); 1, 1, S (`Doctype (doctype ~name:"html" ~system_identifier:"foo" ())); 1, 39, S `EOF]; expect "<!DOCTYPE html P" [ 1, 16, E (`Bad_token ("P", "doctype", "expected 'PUBLIC' or 'SYSTEM'")); 1, 1, S (`Doctype (doctype ~name:"html" ~force_quirks:true ())); 1, 17, S `EOF]); ("html.tokenizer.cdata" >:: fun _ -> expect ~foreign:true "<![CDATA[foo&lt;<bar>]]>" ((char_sequence ~start:10 ~no_eof:true "foo&lt;<bar>") @ [ 1, 25, S `EOF]); expect ~foreign:true "<![CDATA[foo" (char_sequence ~start:10 "foo"); expect ~foreign:true "<![CDATA[foo]foo]]foo]]>" ((char_sequence ~start:10 ~no_eof:true "foo]foo]]foo") @ [ 1, 25, S `EOF])); ("html.tokenizer.bad-cdata" >:: fun _ -> expect "<![CDATA[foo&lt;<bar]]>" [ 1, 1, E (`Bad_token ("<![CDATA[", "content", "CDATA sections not allowed in HTML")); 1, 1, S (`Comment "[CDATA[foo&lt;<bar]]"); 1, 24, S `EOF]); ("html.tokenizer.start-tag" >:: fun _ -> expect "text<foO>text" ((char_sequence ~no_eof:true "text") @ [ 1, 5, S (`Start (tag "foo" []))] @ (char_sequence ~start:10 "text")); expect "<foo >" [ 1, 1, S (`Start (tag "foo" [])); 1, 7, S `EOF]; expect "<FoO >" [ 1, 1, S (`Start (tag "foo" [])); 1, 8, S `EOF]; expect "<foo bar='baz'>" [ 1, 1, S (`Start (tag "foo" ["bar", "baz"])); 1, 16, S `EOF]; expect "<foo BaR = 'baz' quux\t=\t\"lulz\" enabled>" [ 1, 1, S (`Start (tag "foo" ["bar", "baz"; "quux", "lulz"; "enabled", ""])); 1, 44, S `EOF]; expect "<foo enabled >" [ 1, 1, S (`Start (tag "foo" ["enabled", ""])); 1, 15, S `EOF]; expect "<foo enabled bar='baz'>" [ 1, 1, S (`Start (tag "foo" ["enabled", ""; "bar", "baz"])); 1, 24, S `EOF]; expect "<foo bar=baz>" [ 1, 1, S (`Start (tag "foo" ["bar", "baz"])); 1, 14, S `EOF]); ("html.tokenizer.self-closing-tag" >:: fun _ -> expect "<foo/>" [ 1, 1, S (`Start (tag ~self_closing:true "foo" [])); 1, 7, S `EOF]; expect "<foO />" [ 1, 1, S (`Start (tag ~self_closing:true "foo" [])); 1, 8, S `EOF]; expect "<foo bar='baz'/>" [ 1, 1, S (`Start (tag ~self_closing:true "foo" ["bar", "baz"])); 1, 17, S `EOF]; expect "<foo bar='baz' />" [ 1, 1, S (`Start (tag ~self_closing:true "foo" ["bar", "baz"])); 1, 18, S `EOF]; expect "<foo bar/>" [ 1, 1, S (`Start (tag ~self_closing:true "foo" ["bar", ""])); 1, 11, S `EOF]; expect "<foo bar />" [ 1, 1, S (`Start (tag ~self_closing:true "foo" ["bar", ""])); 1, 12, S `EOF]); ("html.tokenizer.end-tag" >:: fun _ -> expect "</foo>" [ 1, 1, S (`End (tag "foo" [])); 1, 7, S `EOF]; expect "</foO >" [ 1, 1, S (`End (tag "foo" [])); 1, 8, S `EOF]); ("html.tokenizer.reference-in-attribute" >:: fun _ -> let reference = "entity reference" in expect "<foo bar='&lt;'>" [ 1, 1, S (`Start (tag "foo" ["bar", "<"])); 1, 17, S `EOF]; expect "<foo bar='&'>" [ 1, 1, S (`Start (tag "foo" ["bar", "&"])); 1, 14, S `EOF]; expect "<foo bar='&lt'>" [ 1, 11, E (`Bad_token ("&lt", reference, "missing ';' at end")); 1, 1, S (`Start (tag "foo" ["bar", "<"])); 1, 16, S `EOF]; expect "<foo bar='&ltz'>" [ 1, 1, S (`Start (tag "foo" ["bar", "&ltz"])); 1, 17, S `EOF]; expect "<foo bar='&lt='>" [ 1, 11, E (`Bad_token ("&lt=", "attribute", "unterminated entity reference followed by '='")); 1, 1, S (`Start (tag "foo" ["bar", "&lt="])); 1, 17, S `EOF]; expect "<foo bar='&image='>" [ 1, 11, E (`Bad_token ("&image=", "attribute", "unterminated entity reference followed by '='")); 1, 1, S (`Start (tag "foo" ["bar", "&image="])); 1, 20, S `EOF]; expect "<foo bar=&amp;>" [ 1, 1, S (`Start (tag "foo" ["bar", "&"])); 1, 16, S `EOF]; expect "<foo bar='&acE;'>" [ 1, 1, S (`Start (tag "foo" ["bar", "\xe2\x88\xbe\xcc\xb3"])); 1, 18, S `EOF]); ("html.tokenizer.bad-attribute-set" >:: fun _ -> expect "<foo bar='a' bar='b'>" [ 1, 1, E (`Bad_token ("bar", "tag", "duplicate attribute")); 1, 1, S (`Start (tag "foo" ["bar", "a"])); 1, 22, S `EOF]; expect "<foo BaR='a' bAr='b'>" [ 1, 1, E (`Bad_token ("bar", "tag", "duplicate attribute")); 1, 1, S (`Start (tag "foo" ["bar", "a"])); 1, 22, S `EOF]; expect "</foo bar='a'>" [ 1, 1, E (`Bad_token ("bar", "tag", "end tag with attributes")); 1, 1, S (`End (tag "foo" ["bar", "a"])); 1, 15, S `EOF]); ("html.tokenizer.bad-start-tag" >:: fun _ -> expect "< " ([ 1, 2, E (`Bad_token (" ", "tag", "invalid start character"))] @ (char_sequence "< ")); expect "<" [ 1, 2, E (`Unexpected_eoi "tag"); 1, 1, S (`Char 0x3C); 1, 2, S `EOF]; expect "<f" [ 1, 3, E (`Unexpected_eoi "tag"); 1, 3, S `EOF]; expect "<f\x00>" [ 1, 3, E (`Bad_token ("U+0000", "tag name", "null")); 1, 1, S (`Start (tag "f\xef\xbf\xbd" [])); 1, 5, S `EOF]; expect "<foo " [ 1, 6, E (`Unexpected_eoi "tag"); 1, 6, S `EOF]; expect "<foo bar=''" [ 1, 12, E (`Unexpected_eoi "tag"); 1, 12, S `EOF]; expect "<foo bar=''baz=''>" [ 1, 12, E (`Bad_token ("b", "tag", "expected whitespace before attribute")); 1, 1, S (`Start (tag "foo" ["bar", ""; "baz", ""])); 1, 19, S `EOF]); ("html.tokenizer.bad-self-closing-tag" >:: fun _ -> expect "<foo/" [ 1, 6, E (`Unexpected_eoi "tag"); 1, 6, S `EOF]; expect "<foo / bar='baz'>" [ 1, 7, E (`Bad_token (" ", "tag", "expected '/>'")); 1, 1, S (`Start (tag "foo" ["bar", "baz"])); 1, 18, S `EOF]); ("html.tokenizer.bad-end-tag" >:: fun _ -> expect "</foo/>" [ 1, 1, E (`Bad_token ("/>", "tag", "end tag cannot be self-closing")); 1, 1, S (`End (tag ~self_closing:true "foo" [])); 1, 8, S `EOF]; expect "</" ([ 1, 3, E (`Unexpected_eoi "tag")] @ (char_sequence "</")); expect "</>foo" ([ 1, 1, E (`Bad_token ("</>", "tag", "no tag name"))] @ (char_sequence ~start:4 "foo")); expect "</ foo>" [ 1, 3, E (`Bad_token (" ", "tag", "invalid start character")); 1, 1, S (`Comment "foo"); 1, 8, S `EOF]; expect "</f" [ 1, 4, E (`Unexpected_eoi "tag"); 1, 4, S `EOF]; expect "</f\x00>" [ 1, 4, E (`Bad_token ("U+0000", "tag name", "null")); 1, 1, S (`End (tag "f\xef\xbf\xbd" [])); 1, 6, S `EOF]); ("html.tokenizer.bad-attribute" >:: fun _ -> let name = "attribute name" in expect "<foo \x00bar=''>" [ 1, 6, E (`Bad_token ("U+0000", name, "null")); 1, 1, S (`Start (tag "foo" ["\xef\xbf\xbdbar", ""])); 1, 14, S `EOF]; expect "<foo \"bar=''>" [ 1, 6, E (`Bad_token ("\"", name, "invalid start character")); 1, 1, S (`Start (tag "foo" ["\"bar", ""])); 1, 14, S `EOF]; expect "<foo 'bar=''>" [ 1, 6, E (`Bad_token ("'", name, "invalid start character")); 1, 1, S (`Start (tag "foo" ["'bar", ""])); 1, 14, S `EOF]; expect "<foo <bar=''>" [ 1, 6, E (`Bad_token ("<", name, "invalid start character")); 1, 1, S (`Start (tag "foo" ["<bar", ""])); 1, 14, S `EOF]; expect "<foo =bar=''>" [ 1, 6, E (`Bad_token ("=", name, "invalid start character")); 1, 1, S (`Start (tag "foo" ["=bar", ""])); 1, 14, S `EOF]; expect "<foo b\"'<\x00ar=''>" [ 1, 7, E (`Bad_token ("\"", name, "invalid name character")); 1, 8, E (`Bad_token ("'", name, "invalid name character")); 1, 9, E (`Bad_token ("<", name, "invalid name character")); 1, 10, E (`Bad_token ("U+0000", name, "null")); 1, 1, S (`Start (tag "foo" ["b\"'<\xef\xbf\xbdar", ""])); 1, 17, S `EOF]; expect "<foo bar" [ 1, 9, E (`Unexpected_eoi "tag"); 1, 9, S `EOF]; expect "<foo bar \x00='baz'>" [ 1, 10, E (`Bad_token ("U+0000", name, "null")); 1, 1, S (`Start (tag "foo" ["bar", ""; "\xef\xbf\xbd", "baz"])); 1, 18, S `EOF]; expect "<foo bar \" \' <>" [ 1, 10, E (`Bad_token ("\"", name, "invalid start character")); 1, 12, E (`Bad_token ("'", name, "invalid start character")); 1, 14, E (`Bad_token ("<", name, "invalid start character")); 1, 1, S (`Start (tag "foo" ["bar", ""; "\"", ""; "'", ""; "<", ""])); 1, 16, S `EOF]; expect "<foo bar " [ 1, 10, E (`Unexpected_eoi "tag"); 1, 10, S `EOF]; let value = "attribute value" in expect "<foo bar=\x00 baz=< quux== lulz=` omg=>" [ 1, 10, E (`Bad_token ("U+0000", value, "null")); 1, 16, E (`Bad_token ("<", value, "invalid start character")); 1, 23, E (`Bad_token ("=", value, "invalid start character")); 1, 30, E (`Bad_token ("`", value, "invalid start character")); 1, 36, E (`Bad_token (">", "tag", "expected attribute value after '='")); 1, 1, S (`Start (tag "foo" ["bar", "\xef\xbf\xbd"; "baz", "<"; "quux", "="; "lulz", "`"; "omg", ""])); 1, 37, S `EOF]; expect "<foo bar=" [ 1, 10, E (`Unexpected_eoi "tag"); 1, 10, S `EOF]; expect "<foo bar='\x00'>" [ 1, 11, E (`Bad_token ("U+0000", value, "null")); 1, 1, S (`Start (tag "foo" ["bar", "\xef\xbf\xbd"])); 1, 14, S `EOF]; expect "<foo bar='" [ 1, 11, E (`Unexpected_eoi value); 1, 11, S `EOF]; expect "<foo bar=b\x00\"'<=`az>" [ 1, 11, E (`Bad_token ("U+0000", value, "null")); 1, 12, E (`Bad_token ("\"", value, "invalid character")); 1, 13, E (`Bad_token ("'", value, "invalid character")); 1, 14, E (`Bad_token ("<", value, "invalid character")); 1, 15, E (`Bad_token ("=", value, "invalid character")); 1, 16, E (`Bad_token ("`", value, "invalid character")); 1, 1, S (`Start (tag "foo" ["bar", "b\xef\xbf\xbd\"'<=`az"])); 1, 20, S `EOF]; expect "<foo bar=b" [ 1, 11, E (`Unexpected_eoi "tag"); 1, 11, S `EOF]); ("html.tokenizer.processing-instruction" >:: fun _ -> expect "<?foo?>" [ 1, 1, E (`Bad_token ("<?", "content", "HTML does not have processing instructions")); 1, 1, S (`Comment "foo?"); 1, 8, S `EOF]; expect "<?foo>" [ 1, 1, E (`Bad_token ("<?", "content", "HTML does not have processing instructions")); 1, 1, S (`Comment "foo"); 1, 7, S `EOF]; expect "<?foo\x00?>" [ 1, 1, E (`Bad_token ("<?", "content", "HTML does not have processing instructions")); 1, 1, S (`Comment "foo\xef\xbf\xbd?"); 1, 9, S `EOF]; expect "<?>" [ 1, 1, E (`Bad_token ("<?", "content", "HTML does not have processing instructions")); 1, 1, S (`Comment ""); 1, 4, S `EOF]; expect "<?foo" [ 1, 1, E (`Bad_token ("<?", "content", "HTML does not have processing instructions")); 1, 1, S (`Comment "foo"); 1, 6, S `EOF]) ] ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������markup.ml-1.0.3/test/test_html_writer.ml������������������������������������������������������������0000664�0000000�0000000�00000010074�14213577064�0020344�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support open Markup__Common let expect id signals strings = let _, iterate, ended = expect_strings id strings in signals |> Markup__Kstream.of_list |> Markup__Html_writer.write |> iter iterate; ended () let tests = [ ("html.writer.empty" >:: fun _ -> expect "empty" [] []); ("html.writer.text" >:: fun _ -> expect "text" [`Text ["foo"]] [S "foo"]; expect "adjacent text" [`Text ["foo"]; `Text ["bar"]] [S "foo"; S "bar"]; expect "empty text" [`Text [""]] []); ("html.writer.text-escaping" >:: fun _ -> expect "text escaping" [`Text ["<foo&bar>\xc2\xa0baz"]] [S "&lt;foo&amp;bar&gt;&nbsp;baz"]); ("html.writer.doctype" >:: fun _ -> let doctype = {doctype_name = Some "html"; public_identifier = None; system_identifier = None; raw_text = None; force_quirks = false} in expect "doctype" [`Doctype doctype] [S "<!DOCTYPE html>"]; let doctype = {doctype with doctype_name = None} in expect "bad doctype" [`Doctype doctype] [S "<!DOCTYPE>"]); ("html.writer.comment" >:: fun _ -> expect "comment" [`Comment "foo"] [S "<!--"; S "foo"; S "-->"]); ("html.writer.processing-instruction" >:: fun _ -> expect "processing instruction" [`PI ("foo", "bar")] [S "<?"; S "foo"; S " "; S "bar"; S ">"]); ("html.writer.xml-declaration" >:: fun _ -> let xml = {version = "1.0"; encoding = None; standalone = None} in expect "xml declaration" [`Xml xml] []); ("html.writer.element" >:: fun _ -> expect "element" [`Start_element ((html_ns, "p"), []); `End_element] [S "<"; S "p"; S ">"; S "</"; S "p"; S ">"]); ("html.writer.void-element" >:: fun _ -> expect "void element" [`Start_element ((html_ns, "head"), []); `Start_element ((html_ns, "meta"), []); `End_element; `Start_element ((html_ns, "meta"), []); `End_element; `End_element] [S "<"; S "head"; S ">"; S "<"; S "meta"; S ">"; S "<"; S "meta"; S ">"; S "</"; S "head"; S ">"]); ("html.writer.void-element-with-content" >:: fun _ -> expect "void element with content" [`Start_element ((html_ns, "head"), []); `Start_element ((html_ns, "meta"), []); `Text ["foo"]; `End_element; `Start_element ((html_ns, "meta"), []); `End_element; `End_element] [S "<"; S "head"; S ">"; S "<"; S "meta"; S ">"; S "foo"; S "</"; S "meta"; S ">"; S "<"; S "meta"; S ">"; S "</"; S "head"; S ">"]); ("html.writer.pre" >:: fun _ -> expect "pre" [`Start_element ((html_ns, "pre"), []); `Text ["\nfoo"]; `End_element] [S "<"; S "pre"; S ">"; S "\n"; S "\nfoo"; S "</"; S "pre"; S ">"]); ("html.writer.attributes" >:: fun _ -> expect "attributes" [`Start_element ((html_ns, "p"), [("", "id"), "foo"; ("", "class"), "bar"]); `End_element] [S "<"; S "p"; S " "; S "id"; S "=\""; S "foo"; S "\""; S " "; S "class"; S "=\""; S "bar"; S "\""; S ">"; S "</"; S "p"; S ">"]); ("html.writer.attribute-escaping" >:: fun _ -> expect "attribute escaping" [`Start_element ((html_ns, "p"), [("", "id"), "foo<>\"&\xc2\xa0"]); `End_element] [S "<"; S "p"; S " "; S "id"; S "=\""; S "foo<>&quot;&amp;&nbsp;"; S "\""; S ">"; S "</"; S "p"; S ">"]); ("html.writer.foreign-element" >:: fun _ -> expect "foreign element" [`Start_element ((svg_ns, "use"), [(xlink_ns, "href"), "#foo"]); `End_element] [S "<"; S "use"; S " "; S "xlink:href"; S "=\""; S "#foo"; S "\""; S ">"; S "</"; S "use"; S ">"]); ("html.writer.script-element" >:: fun _ -> expect "script element" [ `Start_element ((html_ns, "script"), []); `Text ["true && false"]; `End_element ] [ S "<"; S "script"; S ">"; S "true && false"; S "</"; S "script"; S ">" ]); ] ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������markup.ml-1.0.3/test/test_input.ml������������������������������������������������������������������0000664�0000000�0000000�00000003513�14213577064�0017143�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support open Markup__Common open Markup__Kstream open Markup__Stream_io open Markup__Encoding open Markup__Input let ok = wrong_k "failed" let tests = [ ("input.xml" >:: fun _ -> expect_error (4, 2) (`Bad_token ("U+0000", "input", "out of range")) begin fun report -> let s, get_location = string "fo\no\xc2\xa0ba\rr\xa0ba\r\nz\x00quux" |> utf_8 |> preprocess is_valid_xml_char report in to_list s ok (assert_equal [ (1, 1), 0x66; (1, 2), 0x6F; (1, 3), 0x0A; (2, 1), 0x6F; (2, 2), 0xA0; (2, 3), 0x62; (2, 4), 0x61; (2, 5), 0x0A; (3, 1), 0x72; (3, 2), 0xFFFD; (3, 3), 0x62; (3, 4), 0x61; (3, 5), 0x0A; (4, 1), 0x7A; (4, 2), 0x00; (4, 3), 0x71; (4, 4), 0x75; (4, 5), 0x75; (4, 6), 0x78 ]); get_location () |> assert_equal (4, 7) end); ("input.html" >:: fun _ -> expect_error (1, 8) (`Bad_token ("U+0001", "input", "out of range")) begin fun report -> let s, get_location = string "foo\x00bar\x01" |> utf_8 |> preprocess is_valid_html_char report in to_list s ok (assert_equal [ (1, 1), 0x66; (1, 2), 0x6F; (1, 3), 0x6F; (1, 4), 0x00; (1, 5), 0x62; (1, 6), 0x61; (1, 7), 0x72; (1, 8), 0x01 ]); get_location () |> assert_equal (1, 9) end); ("input.bom" >:: fun _ -> [0xFEFF; 0x66] |> of_list |> preprocess is_valid_xml_char Markup__Error.ignore_errors |> fst |> fun s -> to_list s ok (assert_equal [(1, 1), 0x66])) ] �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������markup.ml-1.0.3/test/test_integration.ml������������������������������������������������������������0000664�0000000�0000000�00000007102�14213577064�0020325�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Markup let tests = [ ("integration.xml" >:: fun _ -> "<?xml version='1.0' encoding='windows-1252'?><root>\xa0</root><a></a>" |> string |> parse_xml |> signals |> write_xml |> to_string |> assert_equal ("<?xml version=\"1.0\" encoding=\"windows-1252\"?>" ^ "<root>\xc2\xa0</root><a/>"); "\xfe\xff\x00f\x00o\x00o" |> string |> parse_xml |> signals |> write_xml |> to_string |> assert_equal "foo"); ("integration.html" >:: fun _ -> "<!DOCTYPE html><html><body><p><em>foo<p>bar" |> string |> parse_html |> signals |> write_html |> to_string |> assert_equal ("<!DOCTYPE html><html><head></head><body><p><em>foo</em></p>" ^ "<p><em>bar</em></p></body></html>")); ("integration.html.encoding" >:: fun _ -> ("<!DOCTYPE html><html><head><meta http-equiv='content-type' " ^ "content='text/html' charset='iso-8859-15'></head>" ^ "<body><p><em>\xA0\xA4foo<p>bar") |> string |> parse_html |> signals |> write_html |> to_string |> assert_equal ("<!DOCTYPE html><html><head><meta http-equiv=\"content-type\" " ^ "content=\"text/html\" charset=\"iso-8859-15\"></head>" ^ "<body><p><em>&nbsp;\xE2\x82\xACfoo</em></p>" ^ "<p><em>bar</em></p></body></html>")); ("integration.html.context-disambiguation" >:: fun _ -> "<a></a>" |> string |> parse_html ~context:(`Fragment "svg") |> signals |> to_list |> assert_equal [`Start_element ((Ns.svg, "a"), []); `End_element]); ("integration.pretty_print" >:: fun _ -> "<root>foo<nested>bar</nested><nested>baz</nested></root>" |> string |> parse_xml |> signals |> pretty_print |> write_xml |> to_string |> assert_equal ("<root>\n foo\n <nested>\n bar\n </nested>\n" ^ " <nested>\n baz\n </nested>\n</root>\n")); ("integration.locations" >:: fun _ -> let parser = "<root>foo</root>" |> string |> parse_xml in assert_equal (location parser) (1, 1); parser |> signals |> next |> ignore; assert_equal (location parser) (1, 1); parser |> signals |> next |> ignore; assert_equal (location parser) (1, 7); parser |> signals |> next |> ignore; assert_equal (location parser) (1, 10); parser |> signals |> next |> ignore; assert_equal (location parser) (1, 10)); ("integration.reread-html-tree" >:: fun _ -> let stream = "<p></p>" |> string |> parse_html |> signals in let assemble () = stream |> tree ~text:(fun _ -> ()) ~element:(fun _ _ _ -> ()) in assert_equal ~msg:"fi" (assemble ()) (Some ()); assert_equal ~msg:"fi" (assemble ()) None); ("integration.doctype.round-trip" >:: fun _ -> ({|<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |} ^ {|"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">|}) |> string |> parse_html |> signals |> write_html |> to_string |> assert_equal ({|<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |} ^ {|"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">|} ^ {|<html><head></head><body></body></html>|})); ("integration.doctype.pretty_print" >:: fun _ -> "<!DOCTYPE html><div></div>" |> string |> parse_html |> signals |> pretty_print |> write_html |> to_string |> assert_equal ("<!DOCTYPE html>\n<html>\n <head></head>\n" ^ " <body>\n <div></div>\n </body>\n</html>\n")); ] ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������markup.ml-1.0.3/test/test_kstream.ml����������������������������������������������������������������0000664�0000000�0000000�00000033442�14213577064�0017456�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support open Markup__Kstream let exn = Failure "failure" let then_exn l = let s = of_list l in (fun throw _ k -> next s throw (fun () -> throw exn) k) |> make let failed_wrong = wrong_k "failed" let failed = assert_equal exn let internal_tests = [ ("kstream.internal.make" >:: fun _ -> let s = (fun _ _ k -> k "foo") |> make in next s failed_wrong (wrong_k "empty") (assert_equal "foo")); ("kstream.internal.of_list,next" >:: fun _ -> let s = of_list [1; 2; 3] in next s failed_wrong (wrong_k "empty (1)") (assert_equal ~msg:"1" 1); next s failed_wrong (wrong_k "empty (2)") (assert_equal ~msg:"2" 2); next s failed_wrong (wrong_k "empty (3)") (assert_equal ~msg:"3" 3); next s failed_wrong ignore (wrong_k "not empty"); next s failed_wrong ignore (wrong_k "not empty")); ("kstream.internal.next.exn" >:: fun _ -> let s = (fun throw _ _ -> throw exn) |> make in next s failed (wrong_k "empty") (wrong_k "not empty")); ("kstream.internal.to_list" >:: fun _ -> to_list (of_list [1; 2; 3]) failed_wrong (assert_equal [1; 2; 3])); ("kstream.internal.to_list.exn" >:: fun _ -> to_list (then_exn [1]) failed (wrong_k "did not fail")); ("kstream.internal.next_option" >:: fun _ -> let s = of_list [1; 2; 3] in next_option s failed_wrong (assert_equal ~msg:"1" (Some 1)); next_option s failed_wrong (assert_equal ~msg:"2" (Some 2)); next_option s failed_wrong (assert_equal ~msg:"3" (Some 3)); next_option s failed_wrong (assert_equal ~msg:"empty" None); next_option s failed_wrong (assert_equal ~msg:"still empty" None)); ("kstream.internal.next_option.exn" >:: fun _ -> let s = then_exn [1] in next_option s failed_wrong (assert_equal ~msg:"1" (Some 1)); next_option s failed (wrong_k "did not fail")); ("kstream.internal.next_expected" >:: fun _ -> let s = of_list [1; 2; 3] in next_expected s failed_wrong (assert_equal ~msg:"1" 1); next_expected s failed_wrong (assert_equal ~msg:"2" 2); next_expected s failed_wrong (assert_equal ~msg:"3" 3); next_expected s (assert_equal (Failure "stream empty")) (wrong_k "not empty")); ("kstream.internal.next_expected.exn" >:: fun _ -> let s = then_exn [1] in next_expected s failed_wrong (assert_equal ~msg:"1" 1); next_expected s failed (wrong_k "did not fail")); ("kstream.internal.next_n" >:: fun _ -> let s = of_list [1; 2; 3] in next_n 2 s failed_wrong (assert_equal ~msg:"1,2" [1; 2]); next_n 2 s failed_wrong (assert_equal ~msg:"3" [3]); next_n 2 s failed_wrong (assert_equal ~msg:"empty" []); next_n (-1) s (assert_equal (Invalid_argument "n is negative")) (wrong_k "did not fail")); ("kstream.internal.next_n.exn" >:: fun _ -> let s = then_exn [1; 2; 3] in next_n 2 s failed_wrong (assert_equal ~msg:"1,2" [1; 2]); next_n 2 s failed (wrong_k "did not fail")); ("kstream.internal.push" >:: fun _ -> let s = of_list [1; 2; 3] in push s 4; push s 5; to_list s failed_wrong (assert_equal [5; 4; 1; 2; 3])); ("kstream.internal.push_option" >:: fun _ -> let s = of_list [1; 2; 3] in push_option s (None); push_option s (Some 5); to_list s failed_wrong (assert_equal [5; 1; 2; 3])); ("kstream.internal.push_list" >:: fun _ -> let s = of_list [1; 2; 3] in push_list s [4; 5]; push_list s [6; 7; 8]; push_list s []; to_list s failed_wrong (assert_equal [6; 7; 8; 4; 5; 1; 2; 3])); ("kstream.internal.peek" >:: fun _ -> let s = of_list [1; 2; 3] in peek s failed_wrong (wrong_k "empty (1)") (assert_equal ~msg:"1" 1); peek s failed_wrong (wrong_k "empty (1b)") (assert_equal ~msg:"1b" 1); next s failed_wrong ignore ignore; peek s failed_wrong (wrong_k "empty (2)") (assert_equal ~msg:"2" 2); to_list s failed_wrong ignore; peek s failed_wrong ignore (wrong_k "not empty")); ("kstream.internal.peek.exn" >:: fun _ -> let s = then_exn [1] in peek s failed_wrong (wrong_k "empty (1)") (assert_equal ~msg:"1" 1); next s failed_wrong ignore ignore; peek s failed (wrong_k "empty") (wrong_k "not empty")); ("kstream.internal.peek_option" >:: fun _ -> let s = of_list [1; 2; 3] in peek_option s failed_wrong (assert_equal ~msg:"1" (Some 1)); peek_option s failed_wrong (assert_equal ~msg:"1b" (Some 1)); next s failed_wrong ignore ignore; peek_option s failed_wrong (assert_equal ~msg:"2" (Some 2)); to_list s failed_wrong ignore; peek_option s failed_wrong (assert_equal ~msg:"empty" None)); ("kstream.internal.peek_option.exn" >:: fun _ -> let s = then_exn [1] in peek_option s failed_wrong (assert_equal ~msg:"1" (Some 1)); next s failed_wrong ignore ignore; peek_option s failed (wrong_k "did not fail")); ("kstream.internal.peek_expected" >:: fun _ -> let s = of_list [1; 2; 3] in peek_expected s failed_wrong (assert_equal ~msg:"1" 1); peek_expected s failed_wrong (assert_equal ~msg:"1b" 1); next s failed_wrong ignore ignore; peek_expected s failed_wrong (assert_equal ~msg:"2" 2); to_list s failed_wrong ignore; peek_expected s (assert_equal (Failure "stream empty")) (wrong_k "did not fail")); ("kstream.internal.peek_expected.exn" >:: fun _ -> let s = then_exn [1] in peek_expected s failed_wrong (assert_equal ~msg:"1" 1); next s failed_wrong ignore ignore; peek_expected s failed (wrong_k "did not fail")); ("kstream.internal.peek_n" >:: fun _ -> let s = of_list [1; 2; 3] in peek_n 2 s failed_wrong (assert_equal ~msg:"1,2" [1; 2]); peek_n 2 s failed_wrong (assert_equal ~msg:"1,2" [1; 2]); next_n 2 s failed_wrong ignore; peek_n 2 s failed_wrong (assert_equal ~msg:"3" [3]); to_list s failed_wrong ignore; peek_n 2 s failed_wrong (assert_equal ~msg:"empty" []); peek_n (-1) s (assert_equal (Invalid_argument "n is negative")) (wrong_k "did not fail")); ("kstream.internal.peek_n.exn" >:: fun _ -> let s = then_exn [1; 2; 3] in peek_n 2 s failed_wrong (assert_equal ~msg:"1,2" [1; 2]); next_n 2 s failed_wrong ignore; peek_n 2 s failed (wrong_k "did not fail")); ("kstream.internal.tap" >:: fun _ -> let buffer = Buffer.create 4 in let s = of_list ['f'; 'o'; 'o'; 'b'; 'a'; 'r'] in let restore = tap (Buffer.add_char buffer) s in peek_n 3 s failed_wrong ignore; next_n 3 s failed_wrong ignore; next s failed_wrong ignore ignore; restore (); to_list s failed_wrong ignore; assert_equal (Buffer.contents buffer) "foob"); ("kstream.internal.tap.exn" >:: fun _ -> let buffer = Buffer.create 4 in let s = then_exn ['f'; 'o'; 'o'; 'b'] in (tap (Buffer.add_char buffer) s |> ignore) [@ocaml.warning "-5"]; to_list s failed (wrong_k "did not fail"); assert_equal (Buffer.contents buffer) "foob"); ("kstream.internal.checkpoint" >:: fun _ -> let s = of_list [1; 2; 3] in let s', restore = checkpoint s in next s' failed_wrong (wrong_k "empty") (assert_equal 1); peek_n 2 s' failed_wrong (assert_equal [2; 3]); restore (); to_list s failed_wrong (assert_equal ~msg:"restore" [1; 2; 3]); let s = of_list [1; 2; 3] in push s 0; let s', restore = checkpoint s in next_n 2 s' failed_wrong (assert_equal [0; 1]); restore ()); ("kstream.internal.checkpoint.exn" >:: fun _ -> let s = then_exn [1; 2; 3] in checkpoint s |> ignore; to_list s failed (wrong_k "did not fail")); ("kstream.internal.construct" >:: fun _ -> let called = ref false in let s = construct (fun _ k -> called := true; k (of_list [1; 2; 3])) in assert_bool "not called" (not !called); next s failed_wrong (wrong_k "empty") (assert_equal ~msg:"1" 1); assert_bool "called" !called; to_list s failed_wrong (assert_equal ~msg:"2,3" [2; 3])); ("kstream.internal.construct.exn" >:: fun _ -> let s = construct (fun throw _ -> throw exn) in next s failed (wrong_k "empty") (wrong_k "not empty")); ("kstream.internal.construct.compose" >:: fun _ -> let constructor1_calls = ref 0 in let constructor2_calls = ref 0 in let s = construct (fun _ k -> constructor1_calls := !constructor1_calls + 1; k (construct (fun _ k -> constructor2_calls := !constructor2_calls + 1; k (of_list [1; 2; 3])))) in next_option s failed_wrong (assert_equal (Some 1)); next_option s failed_wrong (assert_equal (Some 2)); assert_equal ~msg:"constructor 1" !constructor1_calls 1; assert_equal ~msg:"constructor 2" !constructor2_calls 1); ("kstream.internal.map" >:: fun _ -> let s = of_list [1; 2; 3] |> map (fun v _ k -> k (v + 1)) in to_list s failed_wrong (assert_equal [2; 3; 4])); ("kstream.internal.map.exn" >:: fun _ -> let s = then_exn [1; 2; 3] |> map (fun v _ k -> k (v + 1)) in to_list s failed (wrong_k "did not fail"); let s = of_list [1; 2; 3] |> map (fun _ throw _ -> throw exn) in to_list s failed (wrong_k "did not fail")); ("kstream.internal.transform" >:: fun _ -> let nth_double n = transform (fun acc v _ k -> if acc = n then k ([v; v], None) else k ([], Some (acc + 1))) 0 in let s = of_list [1; 2; 3] |> nth_double 1 in to_list s failed_wrong (assert_equal [2; 2])); ("kstream.internal.fold" >:: fun _ -> fold (fun v v' _ k -> k (v + v')) 0 (of_list [1; 2; 3]) failed_wrong (assert_equal 6)); ("kstream.internal.fold.exn" >:: fun _ -> fold (fun v v' _ k -> k (v + v')) 0 (then_exn [1; 2; 3]) failed (wrong_k "did not fail"); fold (fun _ _ throw _ -> throw exn) 0 (of_list [1; 2; 3]) failed (wrong_k "did not fail")); ("kstream.internal.iter" >:: fun _ -> let sum = ref 0 in iter (fun v _ k -> sum := !sum + v; k ()) (of_list [1; 2; 3]) failed_wrong ignore; assert_equal !sum 6); ("kstream.internal.iter.exn" >:: fun _ -> iter (fun v _ k -> k (ignore v)) (then_exn [1; 2; 3]) failed (wrong_k "did not fail"); iter (fun _ throw _ -> throw exn) (of_list [1; 2; 3]) failed (wrong_k "did not fail")); ("kstream.internal.filter_map" >:: fun _ -> let s = filter_map (fun v _ k -> k (if v mod 2 = 0 then Some (string_of_int v) else None)) (of_list [1; 2; 3; 4]) in to_list s failed_wrong (assert_equal ["2"; "4"])); ("kstream.internal.filter_map.exn" >:: fun _ -> let s = filter_map (fun v _ k -> k (Some v)) (then_exn [1; 2; 3; 4]) in to_list s failed (wrong_k "did not fail"); let s = filter_map (fun _ throw _ -> throw exn) (of_list [1; 2; 3; 4]) in to_list s failed (wrong_k "did not fail")); ("kstream.internal.filter" >:: fun _ -> let s = filter (fun v _ k -> k (v mod 2 = 0)) (of_list [1; 2; 3; 4]) in to_list s failed_wrong (assert_equal [2; 4])); ("kstream.internal.filter.exn" >:: fun _ -> let s = filter (fun _ _ k -> k true) (then_exn [1; 2; 3; 4]) in to_list s failed (wrong_k "did not fail"); let s = filter (fun _ throw _ -> throw exn) (of_list [1; 2; 3; 4]) in to_list s failed (wrong_k "did not fail")); ("kstream.internal.enumerate" >:: fun _ -> let s = enumerate (of_list ['f'; 'o'; 'o']) in to_list s failed_wrong (assert_equal [0, 'f'; 1, 'o'; 2, 'o'])); ("kstream.internal.enumerate.exn" >:: fun _ -> let s = enumerate (then_exn [1; 2; 3]) in to_list s failed (wrong_k "did not fail")); ("kstream.internal.tail_call" >:: fun _ -> let s = make (fun _ _ k -> k 1337) in let limit = 100000 in fold (fun count _ throw k -> if count >= limit then throw Exit else k (count + 1)) 0 s (function | Exit -> () | exn -> raise exn) (wrong_k "finished")) ] open Markup let synchronous_interface_tests = [ ("kstream.sync.stream,next" >:: fun _ -> let emitted = ref false in let s = stream (fun () -> if not !emitted then (emitted := true; Some "foo") else None) in next s |> assert_equal ~msg:"foo" (Some "foo"); next s |> assert_equal ~msg:"empty" None); ("kstream.sync.of_list" >:: fun _ -> let s = of_list [1; 2; 3] in next s |> assert_equal ~msg:"1" (Some 1); next s |> assert_equal ~msg:"2" (Some 2); next s |> assert_equal ~msg:"3" (Some 3); next s |> assert_equal ~msg:"empty" None; next s |> assert_equal ~msg:"still empty" None); ("kstream.sync.to_list" >:: fun _ -> of_list [1; 2; 3] |> to_list |> assert_equal [1; 2; 3]); ("kstream.sync.peek" >:: fun _ -> let s = of_list [1; 2; 3] in peek s |> assert_equal ~msg:"1" (Some 1); peek s |> assert_equal ~msg:"1 again" (Some 1); next s |> ignore; peek s |> assert_equal ~msg:"2" (Some 2); to_list s |> ignore; peek s |> assert_equal ~msg:"empty" None); ("kstream.sync.fold" >:: fun _ -> of_list [1; 2; 3] |> fold (+) 0 |> assert_equal 6); ("kstream.sync.map" >:: fun _ -> of_list [1; 2; 3] |> map string_of_int |> to_list |> assert_equal ["1"; "2"; "3"]); ("kstream.sync.filter" >:: fun _ -> of_list [1; 2; 3; 4] |> filter (fun v -> v mod 2 = 0) |> to_list |> assert_equal [2; 4]); ("kstream.sync.filter_map" >:: fun _ -> of_list [1; 2; 3; 4] |> filter_map (fun v -> if v mod 2 = 0 then Some (string_of_int v) else None) |> to_list |> assert_equal ["2"; "4"]); ("kstream.sync.iter" >:: fun _ -> let sum = ref 0 in of_list [1; 2; 3] |> iter (fun v -> sum := !sum + v); assert_equal !sum 6); ("kstream.sync.drain" >:: fun _ -> let s = of_list [1; 2; 3] in peek s |> assert_equal ~msg:"not empty" (Some 1); drain s; peek s |> assert_equal ~msg:"empty" None); ("kstream.sync.exn" >:: fun _ -> let s = (fun () -> raise exn) |> stream in try drain s; with exn' -> assert_equal exn' exn) ] let tests = internal_tests @ synchronous_interface_tests ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������markup.ml-1.0.3/test/test_stream_io.ml��������������������������������������������������������������0000664�0000000�0000000�00000010211�14213577064�0017757�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Markup let self = "./test_stream_io.ml" let no_file = "./no_such_file" let directory = "." let exn = Failure ("failure") let fails = fun () -> raise exn let with_file_reading name f = let c = open_in name in try f c; close_in_noerr c with exn -> close_in_noerr c; raise exn let with_file_writing name f = let c = open_out name in try f c; close_out_noerr c with exn -> close_out_noerr c; raise exn let tests = [ ("stream_io.sync.string" >:: fun _ -> let s = string "foo" in to_list s |> assert_equal ['f'; 'o'; 'o']; next s |> assert_equal None; next s |> assert_equal None); ("stream_io.sync.buffer" >:: fun _ -> let b = Buffer.create 4 in let s = buffer b in Buffer.add_string b "foo"; to_list s |> assert_equal ['f'; 'o'; 'o']; next s |> assert_equal None; next s |> assert_equal None); ("stream_io.sync.channel" >:: fun _ -> with_file_reading self (fun c -> let s = channel c in next s |> assert_equal (Some '('); next s |> assert_equal (Some '*'); next s |> assert_equal (Some ' '); next s |> assert_equal (Some 'T'); drain s; next s |> assert_equal None; next s |> assert_equal None; close_in_noerr c; next s |> assert_equal None)); ("stream_io.sync.channel.closed" >:: fun _ -> with_file_reading self (fun c -> let s = channel c in close_in_noerr c; assert_raises (Sys_error "Bad file descriptor") (fun () -> next s |> ignore))); ("stream_io.sync.file" >:: fun _ -> let s, close = file self in next s |> assert_equal (Some '('); next s |> assert_equal (Some '*'); next s |> assert_equal (Some ' '); next s |> assert_equal (Some 'T'); drain s; next s |> assert_equal None; next s |> assert_equal None; close (); next s |> assert_equal None); ("stream_io.sync.file.closed" >:: fun _ -> let s, close = file self in close (); assert_raises (Sys_error "Bad file descriptor") (fun () -> next s |> ignore)); ("stream_io.sync.file.not_found" >:: fun _ -> assert_raises (Sys_error (no_file ^ ": No such file or directory")) (fun () -> file no_file |> ignore)); ("stream_io.sync.to_buffer" >:: fun _ -> of_list ['f'; 'o'; 'o'] |> to_string |> assert_equal "foo"); ("stream_io.sync.to_string.exn" >:: fun _ -> assert_raises exn (fun () -> fails |> stream |> to_string |> ignore)); ("stream_io.sync.to_buffer" >:: fun _ -> of_list ['f'; 'o'; 'o'] |> to_buffer |> Buffer.contents |> assert_equal "foo"); ("stream_io.sync.to_buffer.exn" >:: fun _ -> assert_raises exn (fun () -> fails |> stream |> to_buffer |> ignore)); ("stream_io.sync.to_channel" >:: fun context -> let name, c = bracket_tmpfile context in of_list ['f'; 'o'; 'o'] |> to_channel c; close_out_noerr c; with_file_reading name (fun c -> input_line c |> assert_equal "foo"; assert_raises End_of_file (fun () -> input_line c |> ignore))); ("stream_io.sync.to_channel.exn" >:: fun _ -> assert_raises exn (fun () -> fails |> stream |> to_channel stdout)); ("stream_io.sync.to_channel.closed" >:: fun context -> let _, c = bracket_tmpfile context in close_out_noerr c; assert_raises (Sys_error "Bad file descriptor") (fun () -> of_list ['f'; 'o'; 'o'] |> to_channel c)); ("stream_io.sync.to_file" >:: fun context -> let name, c = bracket_tmpfile context in close_out_noerr c; of_list ['f'; 'o'; 'o'] |> to_file name; with_file_reading name (fun c -> input_line c |> assert_equal "foo"; assert_raises End_of_file (fun () -> input_line c |> ignore))); ("stream_io.sync.to_file.exn" >:: fun context -> let name, c = bracket_tmpfile context in close_out_noerr c; assert_raises exn (fun () -> fails |> stream |> to_file name)); ("stream_io.sync.to_file.not_a_file" >:: fun _ -> assert_raises (Sys_error (directory ^ ": Is a directory")) (fun () -> of_list ['f'; 'o'; 'o'] |> to_file directory)) ] ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������markup.ml-1.0.3/test/test_trie.ml�������������������������������������������������������������������0000664�0000000�0000000�00000005762�14213577064�0016757�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 module Trie = Markup__Trie let singleton w value = Trie.create () |> Trie.add w value let advance w trie = let rec loop index trie = if index >= String.length w then trie else loop (index + 1) (Trie.advance (Char.code w.[index]) trie) in loop 0 trie let assert_matches trie w status = assert_equal (Trie.matches (advance w trie)) status let tests = [ ("trie.empty" >:: fun _ -> let trie = Trie.create () in assert_equal (Trie.matches trie) Trie.No); ("trie.one-character" >:: fun _ -> let trie = singleton "a" 0 in assert_matches trie "" Trie.Prefix; assert_matches trie "a" (Trie.Yes 0); assert_matches trie "b" Trie.No); ("trie.simple-word" >:: fun _ -> let trie = singleton "ab" 0 in assert_matches trie "" Trie.Prefix; assert_matches trie "a" Trie.Prefix; assert_matches trie "b" Trie.No; assert_matches trie "ab" (Trie.Yes 0)); ("trie.prefix-match" >:: fun _ -> let trie = Trie.create () in let trie = Trie.add "a" 0 trie in let trie = Trie.add "ab" 1 trie in assert_matches trie "" Trie.Prefix; assert_matches trie "a" (Trie.Multiple 0); assert_matches trie "ab" (Trie.Yes 1)); ("trie.branching" >:: fun _ -> let trie = Trie.create () in let trie = Trie.add "aa" 0 trie in let trie = Trie.add "ab" 1 trie in assert_matches trie "" Trie.Prefix; assert_matches trie "a" Trie.Prefix; assert_matches trie "aa" (Trie.Yes 0); assert_matches trie "ab" (Trie.Yes 1)); ("trie.unsupported-character" >:: fun _ -> let trie = singleton "a" 0 in assert_matches trie " " Trie.No); ("trie.advance-no-match" >:: fun _ -> let trie = singleton "a" 0 in assert_matches trie "a" (Trie.Yes 0); assert_matches trie "aa" Trie.No; assert_matches trie "aaa" Trie.No); ("trie.empty-string" >:: fun _ -> let trie = singleton "" 0 in assert_matches trie "" (Trie.Yes 0)); ("trie.replace-leaf" >:: fun _ -> let trie = singleton "a" 0 in assert_matches trie "a" (Trie.Yes 0); let trie = Trie.add "a" 1 trie in assert_matches trie "a" (Trie.Yes 1)); ("trie.replace-node" >:: fun _ -> let trie = singleton "ab" 0 in assert_matches trie "a" Trie.Prefix; let trie = Trie.add "a" 1 trie in assert_matches trie "a" (Trie.Multiple 1); let trie = Trie.add "a" 2 trie in assert_matches trie "a" (Trie.Multiple 2)); ("trie.memory_usage" >:: fun _ -> let trie = singleton "ab" 0 in let expected_nodes = 2 in let expected_leaves = 1 in let expected_empty_leaves = expected_nodes * Trie.array_size - expected_leaves - (expected_nodes - 1) in let expected_machine_word_count = expected_nodes * (4 + Trie.array_size) + expected_leaves * 2 + expected_empty_leaves * 1 in assert_equal (Trie.guess_memory_usage trie) expected_machine_word_count); ] ��������������markup.ml-1.0.3/test/test_utility.ml����������������������������������������������������������������0000664�0000000�0000000�00000015132�14213577064�0017507�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support open! Markup module Kstream = Markup__Kstream let doctype = `Doctype {Markup.doctype_name = Some "html"; public_identifier = None; system_identifier = None; raw_text = None; force_quirks = false} let start_element name = `Start_element ((Markup.Ns.html, name), []) let ok = wrong_k "failed" type dom = | Text of string | Element of string * dom list let tests = [ ("utility.content" >:: fun _ -> "<?xml version='1.0'?><!DOCTYPE html><!--blah--><p>foo</p><?bar baz?>" |> string |> parse_xml |> signals |> content |> write_xml |> to_string |> assert_equal "<p>foo</p>"); ("utility.strings_to_bytes" >:: fun _ -> ["foo"; "bar"] |> Kstream.of_list |> Markup__Utility.strings_to_bytes |> fun s -> Kstream.to_list s ok (assert_equal ['f'; 'o'; 'o'; 'b'; 'a'; 'r'])); ("utility.tree" >:: fun _ -> [start_element "a"; `Comment "blah"; `Text ["foo"]; start_element "b"; `Text ["bar"]; `End_element; `Text ["baz"]; `End_element] |> of_list |> tree ~text:(fun ss -> Text (String.concat "" ss)) ~element:(fun (_, name) _ children -> Element (name, children)) |> assert_equal (Some (Element ("a", [Text "foo"; Element ("b", [Text "bar"]); Text "baz"])))); ("utility.tree.empty" >:: fun _ -> [] |> of_list |> tree ~text:ignore ~element:(fun _ _ _ -> ()) |> assert_equal None); ("utility.tree.reread" >:: fun _ -> let signals = [start_element "p"; `End_element; start_element "p"; `End_element] |> of_list in tree signals ~text:ignore ~element:(fun _ _ _ -> ()) |> ignore; signals |> to_list |> List.length |> assert_equal 2); ("utility.from_tree" >:: fun _ -> let dom = Element ("p", [Text "foo"; Element ("em", [Text "bar"]); Text "baz"]) in dom |> from_tree (function | Element (name, children) -> `Element ((Markup.Ns.html, name), [], children) | Text s -> `Text s) |> to_list |> assert_equal [ start_element "p"; `Text ["foo"]; start_element "em"; `Text ["bar"]; `End_element; `Text ["baz"]; `End_element]); ("utility.text" >:: fun _ -> [`Xml {Markup.version = "1.0"; encoding = None; standalone = None}; `Comment "blah"; `Text ["foo"]; start_element "a"; `Text ["bar"; "baz"]; `End_element] |> of_list |> text |> to_string |> assert_equal "foobarbaz"); ("utility.trim" >:: fun _ -> [start_element "div"; `Text ["\n "]; start_element "p"; `Text ["\n "]; start_element "em"; `Text ["foo"]; `End_element; `Text [" bar\n "]; `End_element; `Text ["\n "]; start_element "pre"; `Text ["\n baz \n "]; `End_element; `Text ["\n"]; `End_element] |> of_list |> trim |> to_list |> assert_equal [start_element "div"; start_element "p"; start_element "em"; `Text ["foo"]; `End_element; `Text [" bar"]; `End_element; start_element "pre"; `Text ["\n baz \n "]; `End_element; `End_element]); ("utility.trim.doctype" >:: fun _ -> [doctype; `Text ["\n"]; start_element "div"; `End_element] |> of_list |> trim |> to_list |> assert_equal [ doctype; start_element "div"; `End_element]); ("utility.normalize_text" >:: fun _ -> [`Text [""]; start_element "a"; `Text ["foo"]; `Text ["bar"]; `End_element; `Text ["foo"; "bar"]; `Text ["baz"; ""; "quux"]] |> of_list |> normalize_text |> to_list |> assert_equal [ start_element "a"; `Text ["foo"; "bar"]; `End_element; `Text ["foo"; "bar"; "baz"; "quux"]]); ("utility.pretty_print" >:: fun _ -> [start_element "div"; start_element "p"; start_element "em"; `Text ["foo"]; `End_element; `Text ["bar"]; `End_element; start_element "pre"; `Text ["\n baz \n "]; `End_element; `End_element] |> of_list |> pretty_print |> to_list |> assert_equal [ start_element "div"; `Text ["\n"; " "]; start_element "p"; `Text ["\n"; " "]; start_element "em"; `Text ["foo"]; `End_element; `Text ["bar"; "\n"; " "]; `End_element; `Text ["\n"; " "]; start_element "pre"; `Text ["\n baz \n "]; `End_element; `Text ["\n"]; `End_element; `Text ["\n"]]); ("utility.pretty_print.doctype" >:: fun _ -> [doctype; start_element "div"; start_element "p"; `End_element; `End_element] |> of_list |> pretty_print |> to_list |> assert_equal [ doctype; `Text ["\n"]; start_element "div"; `Text ["\n"; " "]; start_element "p"; `End_element; `Text ["\n"]; `End_element; `Text ["\n"]]); ("utility.pretty_print.doctype.existing-newline" >:: fun _ -> [doctype; `Text ["\n"]; start_element "div"; `End_element] |> of_list |> pretty_print |> to_list |> assert_equal [ doctype; `Text ["\n"]; start_element "div"; `End_element; `Text ["\n"]]); ("utility.html5" >:: fun _ -> [doctype; doctype; `PI ("foo", "bar"); `Xml {Markup.version = "1.0"; encoding = Some "utf-8"; standalone = None}; `Comment "foo"; start_element "p"; `Text ["foo"]; `End_element] |> of_list |> html5 |> to_list |> assert_equal [ doctype; `Comment "foo"; start_element "p"; `Text ["foo"]; `End_element]); ("utility.xhtml" >:: fun _ -> [doctype; doctype; `PI ("foo", "bar"); `Xml {Markup.version = "1.0"; encoding = Some "utf-8"; standalone = None}; `Comment "foo"; start_element "p"; `Text ["foo"]; `End_element] |> of_list |> xhtml |> to_list |> assert_equal [ `Xml {Markup.version = "1.0"; encoding = Some "utf-8"; standalone = None}; `Doctype { Markup.doctype_name = None; public_identifier = None; system_identifier = None; raw_text = Some ("html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" " ^ "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\""); force_quirks = false}; `PI ("foo", "bar"); `Comment "foo"; start_element "p"; `Text ["foo"]; `End_element]); ] ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������markup.ml-1.0.3/test/test_xml_parser.ml�������������������������������������������������������������0000664�0000000�0000000�00000032406�14213577064�0020163�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support open Markup__Common module Error = Markup__Error let xml_decl = Test_xml_tokenizer.xml_decl let raw_doctype = Test_xml_tokenizer.raw_doctype let start_element name = `Start_element (("", name), []) let no_custom_entities = fun _ -> None let no_top_level_namespaces = fun _ -> None let expect ?context ?(namespace = no_top_level_namespaces) text signals = let report, iterate, ended = expect_signals signal_to_string text signals in text |> Markup__Stream_io.string |> Markup__Encoding.utf_8 |> Markup__Input.preprocess is_valid_xml_char Error.ignore_errors |> Markup__Xml_tokenizer.tokenize Error.ignore_errors no_custom_entities |> Markup__Xml_parser.parse context namespace report |> iter iterate; ended () let tests = [ ("xml.parser.empty" >:: fun _ -> expect "" []); ("xml.parser.document" >:: fun _ -> expect "<root>foo</root>" [ 1, 1, S (start_element "root"); 1, 7, S (`Text ["foo"]); 1, 10, S `End_element]; expect " <root > foo </root > " [ 1, 3, S (start_element "root"); 1, 10, S (`Text [" foo "]); 1, 15, S `End_element]; expect "<!DOCTYPE html><root>foo</root>" [ 1, 1, S (raw_doctype "html"); 1, 16, S (start_element "root"); 1, 22, S (`Text ["foo"]); 1, 25, S `End_element]; expect "<?xml version='1.0'?><root>foo</root>" [ 1, 1, S (xml_decl "1.0" None None); 1, 22, S (start_element "root"); 1, 28, S (`Text ["foo"]); 1, 31, S `End_element]; expect "<?xml version='1.0'?> <!DOCTYPE html> <root>foo</root>" [ 1, 1, S (xml_decl "1.0" None None); 1, 24, S (raw_doctype "html"); 1, 41, S (start_element "root"); 1, 47, S (`Text ["foo"]); 1, 50, S `End_element]); ("xml.parser.leading-comments" >:: fun _ -> expect "<?xml version='1.0'?> <!--foo--><!DOCTYPE html> <!--bar--><root></root>" [ 1, 1, S (xml_decl "1.0" None None); 1, 23, S (`Comment "foo"); 1, 33, S (raw_doctype "html"); 1, 49, S (`Comment "bar"); 1, 59, S (start_element "root"); 1, 65, S `End_element]; expect " <!-- foo --> <root></root>" [ 1, 2, S (`Comment " foo "); 1, 15, S (start_element "root"); 1, 21, S `End_element]); ("xml.parser.trailing-comment" >:: fun _ -> expect "<root></root> <!-- foo --> " [ 1, 1, S (start_element "root"); 1, 7, S `End_element; 1, 15, S (`Comment " foo ")]); ("xml.parser.leading-processing-instructions" >:: fun _ -> expect "<?xml version='1.0'?> <?foo bar?><!DOCTYPE html> <?bar foo?><a></a>" [ 1, 1, S (xml_decl "1.0" None None); 1, 23, S (`PI ("foo", "bar")); 1, 34, S (raw_doctype "html"); 1, 50, S (`PI ("bar", "foo")); 1, 61, S (start_element "a"); 1, 64, S `End_element]; expect " <?foo bar?> <root></root>" [ 1, 2, S (`PI ("foo", "bar")); 1, 14, S (start_element "root"); 1, 20, S `End_element]); ("xml.parser.trailing-processing-instruction" >:: fun _ -> expect "<root></root> <?foo bar?>" [ 1, 1, S (start_element "root"); 1, 7, S `End_element; 1, 15, S (`PI ("foo", "bar"))]); ("xml.parser.junk-before-xml-declaration" >:: fun _ -> expect " <?xml version='1.0'?><root></root>" [ 1, 2, E (`Bad_document "XML declaration must be first"); 1, 23, S (start_element "root"); 1, 29, S `End_element]; expect " <?xml version='1.0'?><!DOCTYPE html><root></root>" [ 1, 2, E (`Bad_document "XML declaration must be first"); 1, 23, S (raw_doctype "html"); 1, 38, S (start_element "root"); 1, 44, S `End_element]; expect "<!-- foo --><?xml version='1.0'?><!DOCTYPE html><root></root>" [ 1, 1, S (`Comment " foo "); 1, 13, E (`Bad_document "XML declaration must be first"); 1, 34, S (raw_doctype "html"); 1, 49, S (start_element "root"); 1, 55, S `End_element]); ("xml.parser.junk-before-doctype" >:: fun _ -> expect "<?xml version='1.0'?>foo<!DOCTYPE html><root></root>" [ 1, 1, S (xml_decl "1.0" None None); 1, 22, E (`Bad_document "text at top level"); 1, 25, S (raw_doctype "html"); 1, 40, S (start_element "root"); 1, 46, S `End_element]); ("xml.parser.junk-before-root" >:: fun _ -> expect "<?xml version='1.0'?><!DOCTYPE html>foo<root></root>" [ 1, 1, S (xml_decl "1.0" None None); 1, 22, S (raw_doctype "html"); 1, 37, E (`Bad_document "expected root element"); 1, 40, S (start_element "root"); 1, 46, S `End_element]); ("xml.parser.junk-after-root" >:: fun _ -> expect "<root></root>foo" [ 1, 1, S (start_element "root"); 1, 7, S `End_element; 1, 14, S (`Text ["foo"])]; expect "<root></root> <foo></foo> <bar></bar>" [ 1, 1, S (start_element "root"); 1, 7, S `End_element; 1, 15, S (start_element "foo"); 1, 20, S `End_element; 1, 26, S (`Text [" "]); 1, 27, S (start_element "bar"); 1, 32, S `End_element]; expect "<?xml version='1.0'?><root/>foo" [ 1, 1, S (xml_decl "1.0" None None); 1, 22, S (start_element "root"); 1, 22, S `End_element; 1, 29, E (`Bad_document "not allowed after root element"); 1, 29, S (`Text ["foo"])]); ("xml.parser.self-closing-root" >:: fun _ -> expect "<root/>" [ 1, 1, S (start_element "root"); 1, 1, S `End_element]); ("xml.parser.content" >:: fun _ -> expect "<root>foo<!--bar--><?baz quux?>&lt;<![CDATA[&gt;]]></root>" [ 1, 1, S (start_element "root"); 1, 7, S (`Text ["foo"]); 1, 10, S (`Comment "bar"); 1, 20, S (`PI ("baz", "quux")); 1, 32, S (`Text ["<&gt;"]); 1, 52, S `End_element]; expect "<root><nested><more>foo</more></nested><a>bar</a><blah/></root>" [ 1, 1, S (start_element "root"); 1, 7, S (start_element "nested"); 1, 15, S (start_element "more"); 1, 21, S (`Text ["foo"]); 1, 24, S `End_element; 1, 31, S `End_element; 1, 40, S (start_element "a"); 1, 43, S (`Text ["bar"]); 1, 46, S `End_element; 1, 50, S (start_element "blah"); 1, 50, S `End_element; 1, 57, S `End_element]); ("xml.parser.prolog-in-content" >:: fun _ -> expect "<root><?xml version='1.0'?><!DOCTYPE html></root>" [ 1, 1, S (start_element "root"); 1, 7, E (`Bad_document "XML declaration should be at top level"); 1, 28, E (`Bad_document "doctype should be at top level"); 1, 43, S `End_element]); ("xml.parser.attributes" >:: fun _ -> expect "<root foo='bar'/>" [ 1, 1, S (`Start_element (("", "root"), [("", "foo"), "bar"])); 1, 1, S `End_element]); ("xml.parser.bad-attributes" >:: fun _ -> expect "<root foo='bar' foo='baz'/>" [ 1, 1, E (`Bad_token ("foo", "tag", "duplicate attribute")); 1, 1, S (`Start_element (("", "root"), [("", "foo"), "bar"])); 1, 1, S `End_element]; expect "<root xmlns:a='some_ns' xmlns:b='some_ns' a:foo='' b:foo=''/>" [ 1, 1, E (`Bad_token ("foo", "tag", "duplicate attribute")); 1, 1, S (`Start_element (("", "root"), [(xmlns_ns, "a"), "some_ns"; (xmlns_ns, "b"), "some_ns"; ("some_ns", "foo"), ""])); 1, 1, S `End_element]); ("xml.parser.misnested-tags" >:: fun _ -> expect "<foo>" [ 1, 1, S (start_element "foo"); 1, 1, E (`Unmatched_start_tag "foo"); 1, 6, S `End_element]; expect "<foo></bar></foo>" [ 1, 1, S (start_element "foo"); 1, 6, E (`Unmatched_end_tag "bar"); 1, 12, S `End_element]; expect "<foo><bar><baz></foo>" [ 1, 1, S (start_element "foo"); 1, 6, S (start_element "bar"); 1, 11, S (start_element "baz"); 1, 11, E (`Unmatched_start_tag "baz"); 1, 16, S `End_element; 1, 6, E (`Unmatched_start_tag "bar"); 1, 16, S `End_element; 1, 16, S `End_element]); ("xml.parser.fragment" >:: fun _ -> expect "foo<bar/>" [ 1, 1, S (`Text ["foo"]); 1, 4, S (start_element "bar"); 1, 4, S `End_element]; expect " <!-- foo --> bar <baz></baz> <quux/>" [ 1, 1, S (`Text [" "]); 1, 2, S (`Comment " foo "); 1, 14, S (`Text [" bar "]); 1, 19, S (start_element "baz"); 1, 24, S `End_element; 1, 30, S (`Text [" "]); 1, 31, S (start_element "quux"); 1, 31, S `End_element]; expect "foo" [ 1, 1, S (`Text ["foo"])]); ("xml.parser.namespaces" >:: fun _ -> expect "<root xmlns='some_ns' xmlns:a='other_ns' a:foo='bar' baz='quux'></root>" [ 1, 1, S (`Start_element (("some_ns", "root"), [(xmlns_ns, "xmlns"), "some_ns"; (xmlns_ns, "a"), "other_ns"; ("other_ns", "foo"), "bar"; ("", "baz"), "quux"])); 1, 65, S `End_element]; expect "<a:root xmlns:a='some_ns' xmlns:b='other_ns'></b:root>" [ 1, 1, S (`Start_element (("some_ns", "root"), [(xmlns_ns, "a"), "some_ns"; (xmlns_ns, "b"), "other_ns"])); 1, 46, E (`Unmatched_end_tag "b:root"); 1, 1, E (`Unmatched_start_tag "a:root"); 1, 55, S `End_element]; expect "<a:root xmlns:a='some_ns' xmlns:b='some_ns'></b:root>" [ 1, 1, S (`Start_element (("some_ns", "root"), [(xmlns_ns, "a"), "some_ns"; (xmlns_ns, "b"), "some_ns"])); 1, 45, S `End_element]; expect "<root xmlns='some_ns'><foo bar='baz'/></root>" [ 1, 1, S (`Start_element (("some_ns", "root"), [(xmlns_ns, "xmlns"), "some_ns"])); 1, 23, S (`Start_element (("some_ns", "foo"), [("", "bar"), "baz"])); 1, 23, S `End_element; 1, 39, S `End_element]; expect "<root xmlns:a='some_ns'><a:foo bar='baz'/><quux/></root>" [ 1, 1, S (`Start_element (("", "root"), [(xmlns_ns, "a"), "some_ns"])); 1, 25, S (`Start_element (("some_ns", "foo"), [("", "bar"), "baz"])); 1, 25, S `End_element; 1, 43, S (`Start_element (("", "quux"), [])); 1, 43, S `End_element; 1, 50, S `End_element]; expect ("<root xmlns:a='some_ns' xmlns:b='other_ns'><foo xmlns:a='another_ns'>" ^ "<a:bar/><b:baz/></foo></root>") [ 1, 1, S (`Start_element (("", "root"), [(xmlns_ns, "a"), "some_ns"; (xmlns_ns, "b"), "other_ns"])); 1, 44, S (`Start_element (("", "foo"), [(xmlns_ns, "a"), "another_ns"])); 1, 70, S (`Start_element (("another_ns", "bar"), [])); 1, 70, S `End_element; 1, 78, S (`Start_element (("other_ns", "baz"), [])); 1, 78, S `End_element; 1, 86, S `End_element; 1, 92, S `End_element]; expect "<root xmlns='some_ns'><foo xmlns='other_ns'><bar/></foo></root>" [ 1, 1, S (`Start_element (("some_ns", "root"), [(xmlns_ns, "xmlns"), "some_ns"])); 1, 23, S (`Start_element (("other_ns", "foo"), [(xmlns_ns, "xmlns"), "other_ns"])); 1, 45, S (`Start_element (("other_ns", "bar"), [])); 1, 45, S `End_element; 1, 51, S `End_element; 1, 57, S `End_element]; expect "<root xmlns='some_ns'></root><foo/>" [ 1, 1, S (`Start_element (("some_ns", "root"), [(xmlns_ns, "xmlns"), "some_ns"])); 1, 23, S `End_element; 1, 30, S (start_element "foo"); 1, 30, S `End_element]; expect ("<root xmlns:a='some_ns'><foo xmlns:a='other_ns'><a:bar/></foo>" ^ "<a:baz/></root>") [ 1, 1, S (`Start_element (("", "root"), [(xmlns_ns, "a"), "some_ns"])); 1, 25, S (`Start_element (("", "foo"), [(xmlns_ns, "a"), "other_ns"])); 1, 49, S (`Start_element (("other_ns", "bar"), [])); 1, 49, S `End_element; 1, 57, S `End_element; 1, 63, S (`Start_element (("some_ns", "baz"), [])); 1, 63, S `End_element; 1, 71, S `End_element]); ("xml.parser.bad-namespaces" >:: fun _ -> expect "<a:root><foo b:bar=''/></a:root>" [ 1, 1, E (`Bad_namespace "a"); 1, 1, S (`Start_element (("a", "root"), [])); 1, 9, E (`Bad_namespace "b"); 1, 9, S (`Start_element (("", "foo"), [("b", "bar"), ""])); 1, 9, S `End_element; 1, 24, E (`Bad_namespace "a"); 1, 24, S `End_element]); ("xml.parser.custom-namespaces" >:: fun _ -> let namespace = function | "a" -> Some "some_ns" | "b" -> Some "other_ns" | "xmlns" -> Some "bad" | _ -> None in expect ~namespace "<a:root b:foo='bar' xmlns:c='baz'/>" [ 1, 1, S (`Start_element (("some_ns", "root"), [("other_ns", "foo"), "bar"; (xmlns_ns, "c"), "baz"])); 1, 1, S `End_element]) ] ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������markup.ml-1.0.3/test/test_xml_tokenizer.ml����������������������������������������������������������0000664�0000000�0000000�00000066375�14213577064�0020715�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support open Markup__Common module Error = Markup__Error let xml_decl version encoding standalone = `Xml {version; encoding; standalone} let raw_doctype s = `Doctype {doctype_name = None; public_identifier = None; system_identifier = None; raw_text = Some s; force_quirks = false} let tag ?(self_closing = false) name attributes = {Token_tag.name; attributes; self_closing} let no_custom_entities = fun _ -> None let expect ?(entity = no_custom_entities) text signals = let report, iterate, ended = expect_signals token_to_string text signals in text |> Markup__Stream_io.string |> Markup__Encoding.utf_8 |> Markup__Input.preprocess is_valid_xml_char Error.ignore_errors |> Markup__Xml_tokenizer.tokenize report entity |> iter iterate; ended () let xml = "xml declaration" let pi = "processing instruction" let tests = [ ("xml.tokenizer.empty" >:: fun _ -> expect "" [ 1, 1, S `EOF]); ("xml.tokenizer.whitespace" >:: fun _ -> expect " \t \n \x09 \x0d \x0d\x0a " [ 1, 1, S (`Chars [" \t \n \x09 \x0a \x0a "]); 4, 2, S `EOF]); ("xml.tokenizer.text" >:: fun _ -> expect "foo bar" [ 1, 1, S (`Chars ["foo bar"]); 1, 8, S `EOF]; expect "foo > bar" [ 1, 1, S (`Chars ["foo > bar"]); 1, 10, S `EOF]); ("xml.tokenizer.spurious-cdata-end" >:: fun _ -> expect "foo]]>bar" [ 1, 4, E (`Bad_token ("]]>", "text", "must end a CDATA section")); 1, 1, S (`Chars ["foo]]>bar"]); 1, 10, S `EOF]; expect "foo]]]>bar" [ 1, 5, E (`Bad_token ("]]>", "text", "must end a CDATA section")); 1, 1, S (`Chars ["foo]]]>bar"]); 1, 11, S `EOF]; expect "foo]]bar" [ 1, 1, S (`Chars ["foo]]bar"]); 1, 9, S `EOF]; expect "foo]>bar" [ 1, 1, S (`Chars ["foo]>bar"]); 1, 9, S `EOF]); ("xml.tokenizer.comment" >:: fun _ -> expect "text<!-- foo -->text" [ 1, 1, S (`Chars ["text"]); 1, 5, S (`Comment " foo "); 1, 17, S (`Chars ["text"]); 1, 21, S `EOF]); ("xml.tokenizer.bad-comment-start" >:: fun _ -> expect "text<!foo -->" [ 1, 5, E (`Bad_token ("<!", "comment", "should start with '<!--'")); 1, 5, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["text<!foo -->"]); 1, 14, S `EOF]; expect "<!<!-- foo -->" [ 1, 1, E (`Bad_token ("<!", "comment", "should start with '<!--'")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<!"]); 1, 3, S (`Comment " foo "); 1, 15, S `EOF]; expect "<!" [ 1, 1, E (`Bad_token ("<!", "comment", "should start with '<!--'")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<!"]); 1, 3, S `EOF]; expect "text<!-foo -->" [ 1, 5, E (`Bad_token ("<!-", "comment", "should start with '<!--'")); 1, 5, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["text<!-foo -->"]); 1, 15, S `EOF]; expect "<!-<!-- foo -->" [ 1, 1, E (`Bad_token ("<!-", "comment", "should start with '<!--'")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<!-"]); 1, 4, S (`Comment " foo "); 1, 16, S `EOF]; expect "<!-" [ 1, 1, E (`Bad_token ("<!-", "comment", "should start with '<!--'")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<!-"]); 1, 4, S `EOF]); ("xml.tokenizer.comment-end" >:: fun _ -> expect "<!-- foo - -->" [ 1, 1, S (`Comment " foo - "); 1, 15, S `EOF]; expect "<!-- foo -- -->" [ 1, 10, E (`Bad_token ("--", "comment", "should be followed by '>'")); 1, 1, S (`Comment " foo -- "); 1, 16, S `EOF]; expect "<!-- foo --->" [ 1, 10, E (`Bad_token ("--", "comment", "should be followed by '>'")); 1, 1, S (`Comment " foo -"); 1, 14, S `EOF]); ("xml.tokenizer.unterminated-comment" >:: fun _ -> expect "<!--" [ 1, 1, S (`Comment ""); 1, 5, E (`Unexpected_eoi "comment"); 1, 5, S `EOF]; expect "<!-- foo" [ 1, 1, S (`Comment " foo"); 1, 9, E (`Unexpected_eoi "comment"); 1, 9, S `EOF]; expect "<!-- foo -" [ 1, 1, S (`Comment " foo "); 1, 11, E (`Unexpected_eoi "comment"); 1, 11, S `EOF]; expect "<!-- foo --" [ 1, 1, S (`Comment " foo "); 1, 12, E (`Unexpected_eoi "comment"); 1, 12, S `EOF]); ("xml.tokenizer.markup-in-comment" >:: fun _ -> expect "<!-- <foo> -->" [ 1, 1, S (`Comment " <foo> "); 1, 15, S `EOF]; expect "<!-- &gt; -->" [ 1, 1, S (`Comment " &gt; "); 1, 14, S `EOF]); ("xml.tokenizer.cdata" >:: fun _ -> expect "text<![CDATA[foo<bar>&amp;]]baz]]]quux]]]>text" [ 1, 1, S (`Chars ["textfoo<bar>&amp;]]baz]]]quux]text"]); 1, 47, S `EOF]); ("xml.tokenizer.bad-cdata-start" >:: fun _ -> expect "<![foo" [ 1, 1, E (`Bad_token ("<![", "cdata", "should start with '<![CDATA['")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<![foo"]); 1, 7, S `EOF]; expect "<![cdata" [ 1, 1, E (`Bad_token ("<![", "cdata", "should start with '<![CDATA['")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<![cdata"]); 1, 9, S `EOF]; expect "<![<![CDATA[bar]]>" [ 1, 1, E (`Bad_token ("<![", "cdata", "should start with '<![CDATA['")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<![bar"]); 1, 19, S `EOF]; expect "<![" [ 1, 1, E (`Bad_token ("<![", "cdata", "should start with '<![CDATA['")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<!["]); 1, 4, S `EOF]); ("xml.tokenizer.unterminated-cdata" >:: fun _ -> expect "<![CDATA[foo" [ 1, 1, S (`Chars ["foo"]); 1, 13, E (`Unexpected_eoi "cdata"); 1, 13, S `EOF]; expect "<![CDATA[foo]" [ 1, 1, S (`Chars ["foo"]); 1, 14, E (`Unexpected_eoi "cdata"); 1, 14, S `EOF]; expect "<![CDATA[foo]]" [ 1, 1, S (`Chars ["foo"]); 1, 15, E (`Unexpected_eoi "cdata"); 1, 15, S `EOF]); ("xml.tokenizer.doctype" >:: fun _ -> expect "text<!DOCTYPE html [ <!ELEMENT foo (#PCDATA)> ]>text" [ 1, 1, S (`Chars ["text"]); 1, 5, S (raw_doctype "html [ <!ELEMENT foo (#PCDATA)> ]"); 1, 49, S (`Chars ["text"]); 1, 53, S `EOF]; expect "text<!DOCTYPE html SYSTEM \"html.dtd\">text" [ 1, 1, S (`Chars ["text"]); 1, 5, S (raw_doctype "html SYSTEM \"html.dtd\""); 1, 38, S (`Chars ["text"]); 1, 42, S `EOF]; expect "text<!DOCTYPE html SYSTEM \"<!ELEMENT\">text" [ 1, 1, S (`Chars ["text"]); 1, 5, S (raw_doctype "html SYSTEM \"<!ELEMENT\""); 1, 39, S (`Chars ["text"]); 1, 43, S `EOF]; expect "text<!DOCTYPE html SYSTEM 'html.dtd'>text" [ 1, 1, S (`Chars ["text"]); 1, 5, S (raw_doctype "html SYSTEM 'html.dtd'"); 1, 38, S (`Chars ["text"]); 1, 42, S `EOF]; expect "text<!DOCTYPE html SYSTEM '<!ELEMENT'>text" [ 1, 1, S (`Chars ["text"]); 1, 5, S (raw_doctype "html SYSTEM '<!ELEMENT'"); 1, 39, S (`Chars ["text"]); 1, 43, S `EOF]); ("xml.tokenizer.bad-doctype-start" >:: fun _ -> let error = `Bad_token ("<!D", "doctype", "should start with '<!DOCTYPE '") in expect "<!Doctype html>" [ 1, 1, E error; 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<!Doctype html>"]); 1, 16, S `EOF]; expect "<!D<!DOCTYPE html>" [ 1, 1, E error; 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<!D"]); 1, 4, S (raw_doctype "html"); 1, 19, S `EOF]; expect "<!DOC" [ 1, 1, E error; 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<!DOC"]); 1, 6, S `EOF]); ("xml.tokenizer.unterminated-doctype" >:: fun _ -> expect "<!DOCTYPE html" [ 1, 1, S (raw_doctype "html"); 1, 15, E (`Unexpected_eoi "doctype"); 1, 15, S `EOF]; expect "<!DOCTYPE html SYSTEM \"foo>" [ 1, 1, S (raw_doctype "html SYSTEM \"foo>"); 1, 28, E (`Unexpected_eoi "doctype"); 1, 28, S `EOF]; expect "<!DOCTYPE html SYSTEM 'foo>" [ 1, 1, S (raw_doctype "html SYSTEM 'foo>"); 1, 28, E (`Unexpected_eoi "doctype"); 1, 28, S `EOF]; expect "<!DOCTYPE html [ <!ELEMENT " [ 1, 1, S (raw_doctype "html [ <!ELEMENT "); 1, 28, E (`Unexpected_eoi "doctype"); 1, 28, S `EOF]); ("xml.tokenizer.pi-in-doctype" >:: fun _ -> expect "<!DOCTYPE html [ <?foo bar?> ]>" [ 1, 1, S (raw_doctype "html [ <?foo bar?> ]"); 1, 32, S `EOF]; expect "<!DOCTYPE html [ <?foo bar ]>" [ 1, 30, E (`Unexpected_eoi "processing instruction"); 1, 1, S (raw_doctype "html [ <?foo bar ]>"); 1, 30, E (`Unexpected_eoi "doctype"); 1, 30, S `EOF]); ("xml.tokenizer.comment-in-doctype" >:: fun _ -> expect "<!DOCTYPE html [ <!-- foo --> ]>" [ 1, 1, S (raw_doctype "html [ <!-- foo --> ]"); 1, 33, S `EOF]; expect "<!DOCTYPE html [ <!-- foo ]>" [ 1, 1, S (raw_doctype "html [ <!-- foo ]>"); 1, 29, E (`Unexpected_eoi "doctype"); 1, 29, S `EOF]); ("xml.tokenizer.reference" >:: fun _ -> expect "foo&lt;bar&gt;&amp;&quot;&apos;baz&#48;&#x31;quux" [ 1, 1, S (`Chars ["foo<bar>&\"'baz01quux"]); 1, 50, S `EOF]; expect "&#955;" [ 1, 1, S (`Chars ["λ"]); 1, 7, S `EOF]); ("xml.tokenizer.bad-reference" >:: fun _ -> expect "&" [ 1, 2, E (`Unexpected_eoi "reference"); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars ["&"]); 1, 2, S `EOF]; expect "&lt" [ 1, 4, E (`Unexpected_eoi "reference"); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars ["&lt"]); 1, 4, S `EOF]; expect "&;" [ 1, 1, E (`Bad_token ("&;", "reference", "empty reference")); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars ["&;"]); 1, 3, S `EOF]; expect "&<!-- foo -->" [ 1, 2, E (`Bad_token ("<", "reference", "invalid start character")); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars ["&"]); 1, 2, S (`Comment " foo "); 1, 14, S `EOF]; expect "&lt<!-- foo -->" [ 1, 4, E (`Bad_token ("<", "reference", "invalid name character")); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars ["&lt"]); 1, 4, S (`Comment " foo "); 1, 16, S `EOF]; expect "&#<!-- foo -->" [ 1, 3, E (`Bad_token ("<", "reference", "expected digit")); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars ["&#"]); 1, 3, S (`Comment " foo "); 1, 15, S `EOF]; expect "&#;" [ 1, 1, E (`Bad_token ("&#;", "reference", "empty character reference")); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars ["&#;"]); 1, 4, S `EOF]; expect "&#x<!-- foo -->" [ 1, 4, E (`Bad_token ("<", "reference", "expected digit")); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars ["&#x"]); 1, 4, S (`Comment " foo "); 1, 16, S `EOF]; let empty_character_reference = "empty character reference" in expect "&#x;" [ 1, 1, E (`Bad_token ("&#x;", "reference", empty_character_reference)); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars ["&#x;"]); 1, 5, S `EOF]; expect "&#6a;" [ 1, 4, E (`Bad_token ("a", "reference", "expected digit")); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars ["&#6a;"]); 1, 6, S `EOF]; let absurd = "&#x1000000000000000000000000000000000000000;" in expect absurd [ 1, 1, E (`Bad_token (absurd, "reference", "number out of range")); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars [absurd]); 1, 45, S `EOF]); ("xml.tokenizer.custom-reference" >:: fun _ -> let entity = function | "test" -> Some "custom" | "lt" -> Some "foobar" | _ -> None in expect ~entity "&test; &lt;" [ 1, 1, S (`Chars ["custom <"]); 1, 12, S `EOF]; expect ~entity "&other;" [ 1, 1, E (`Bad_token ("other", "reference", "unknown entity")); 1, 1, E (`Bad_token ("&", "text", "replace with '&amp;'")); 1, 1, S (`Chars ["&other;"]); 1, 8, S `EOF]); ("xml.tokenizer.faux-markup" >:: fun _ -> expect "&lt;!-- foo -->" [ 1, 1, S (`Chars ["<!-- foo -->"]); 1, 16, S `EOF]); ("xml.tokenizer.start-tag" >:: fun _ -> expect "text<foo>text" [ 1, 1, S (`Chars ["text"]); 1, 5, S (`Start (tag "foo" [])); 1, 10, S (`Chars ["text"]); 1, 14, S `EOF]; expect "<foo >" [ 1, 1, S (`Start (tag "foo" [])); 1, 8, S `EOF]); ("xml.tokenizer.self-closing-tag" >:: fun _ -> expect "<foo/>" [ 1, 1, S (`Start (tag ~self_closing:true "foo" [])); 1, 7, S `EOF]; expect "<foo />" [ 1, 1, S (`Start (tag ~self_closing:true "foo" [])); 1, 9, S `EOF]); ("xml.tokenizer.end-tag" >:: fun _ -> expect "</foo>" [ 1, 1, S (`End (tag "foo" [])); 1, 7, S `EOF]; expect "</foo >" [ 1, 1, S (`End (tag "foo" [])); 1, 10, S `EOF]); ("xml.tokenizer.bad-tag-names" >:: fun _ -> expect "<<foo>" [ 1, 2, E (`Bad_token ("<", "tag", "invalid start character")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<"]); 1, 2, S (`Start (tag "foo" [])); 1, 7, S `EOF]; expect "</<foo>" [ 1, 3, E (`Bad_token ("<", "tag", "invalid start character")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["</"]); 1, 3, S (`Start (tag "foo" [])); 1, 8, S `EOF]; expect "<abc<foo>" [ 1, 5, E (`Bad_token ("<", "tag", "invalid name character")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<abc"]); 1, 5, S (`Start (tag "foo" [])); 1, 10, S `EOF]; expect "</abc<foo>" [ 1, 6, E (`Bad_token ("<", "tag", "invalid name character")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["</abc"]); 1, 6, S (`Start (tag "foo" [])); 1, 11, S `EOF]; expect "< foo>" [ 1, 2, E (`Bad_token (" ", "tag", "invalid start character")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["< foo>"]); 1, 7, S `EOF]; expect "</ foo>" [ 1, 3, E (`Bad_token (" ", "tag", "invalid start character")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["</ foo>"]); 1, 8, S `EOF]; expect "<>" [ 1, 2, E (`Bad_token (">", "tag", "invalid start character")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<>"]); 1, 3, S `EOF]; expect "</>" [ 1, 3, E (`Bad_token (">", "tag", "invalid start character")); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["</>"]); 1, 4, S `EOF]); ("xml.tokenizer.junk-in-end-tag" >:: fun _ -> expect "</foo bar>" [ 1, 7, E (`Bad_token ("b", "tag", "attribute in end tag")); 1, 1, S (`End (tag "foo" [])); 1, 11, S `EOF]); ("xml.tokenizer.stray-slash-in-tag" >:: fun _ -> expect "<foo / />" [ 1, 6, E (`Bad_token ("/", "tag", "should be part of '/>'")); 1, 1, S (`Start (tag ~self_closing:true "foo" [])); 1, 10, S `EOF]); ("xml.tokenizer.unterminated-tag" >:: fun _ -> expect "<foo" [ 1, 5, E (`Unexpected_eoi "tag"); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["<foo"]); 1, 5, S `EOF]; expect "foo<" [ 1, 5, E (`Unexpected_eoi "tag"); 1, 4, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["foo<"]); 1, 5, S `EOF]; expect "</" [ 1, 3, E (`Unexpected_eoi "tag"); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["</"]); 1, 3, S `EOF]; expect "</foo" [ 1, 6, E (`Unexpected_eoi "tag"); 1, 1, E (`Bad_token ("<", "text", "replace with '&lt;'")); 1, 1, S (`Chars ["</foo"]); 1, 6, S `EOF]; expect "</foo " [ 1, 1, S (`End (tag "foo" [])); 1, 7, E (`Unexpected_eoi "tag"); 1, 7, S `EOF]; expect "<foo /" [ 1, 1, S (`Start (tag ~self_closing:true "foo" [])); 1, 7, E (`Unexpected_eoi "tag"); 1, 7, S `EOF]; expect "<foo bar='' " [ 1, 1, S (`Start (tag "foo" ["bar", ""])); 1, 13, E (`Unexpected_eoi "tag"); 1, 13, S `EOF]); ("xml.tokenizer.attributes" >:: fun _ -> expect "<foo bar='baz' blah='blahh'>" [ 1, 1, S (`Start (tag "foo" ["bar", "baz"; "blah", "blahh"])); 1, 29, S `EOF]; let attributes = ["bar", "baz"; "blah", "blahh"] in expect "<foo bar='baz' blah='blahh'/>" [ 1, 1, S (`Start (tag ~self_closing:true "foo" attributes)); 1, 30, S `EOF]; expect "<foo bar='' baz=\"'\" blah='\"' >" [ 1, 1, S (`Start (tag "foo" ["bar", ""; "baz", "'"; "blah", "\""])); 1, 38, S `EOF]; expect "<foo bar='>' baz='/'>" [ 1, 1, S (`Start (tag "foo" ["bar", ">"; "baz", "/"])); 1, 22, S `EOF]); ("xml.tokenizer.references-in-attributes" >:: fun _ -> expect "<foo bar='&lt;'>" [ 1, 1, S (`Start (tag "foo" ["bar", "<"])); 1, 17, S `EOF]; expect "<foo bar=\"&lt;\">" [ 1, 1, S (`Start (tag "foo" ["bar", "<"])); 1, 17, S `EOF]; expect "<foo bar='&#48;&#x31;'>" [ 1, 1, S (`Start (tag "foo" ["bar", "01"])); 1, 24, S `EOF]; expect "<foo bar='&'>text" [ 1, 12, E (`Bad_token ("'", "reference", "invalid start character")); 1, 11, E (`Bad_token ("&", "attribute", "replace with '&amp;'")); 1, 1, S (`Start (tag "foo" ["bar", "&"])); 1, 14, S (`Chars ["text"]); 1, 18, S `EOF]); ("xml.tokenizer.bad-attribute-name" >:: fun _ -> expect "<foo !bar=''>" [ 1, 6, E (`Bad_token ("!", "attribute", "invalid start character")); 1, 1, S (`Start (tag "foo" ["!bar", ""])); 1, 14, S `EOF]; expect "<foo b!ar=''>" [ 1, 7, E (`Bad_token ("!", "attribute", "invalid name character")); 1, 1, S (`Start (tag "foo" ["b!ar", ""])); 1, 14, S `EOF]); ("xml.tokenizer.good-attribute-name" >:: fun _ -> expect "<foo -bar=''>" [ 1, 6, E (`Bad_token ("-", "attribute", "invalid start character")); 1, 1, S (`Start (tag "foo" ["-bar", ""])); 1, 14, S `EOF]; expect "<foo b-ar=''>" [ 1, 1, S (`Start (tag "foo" ["b-ar", ""])); 1, 14, S `EOF]); ("xml.tokenizer.bad-attribute-whitespace" >:: fun _ -> expect "<foo bar = 'baz'>" [ 1, 9, E (`Bad_token (" ", "attribute", "whitespace not allowed here")); 1, 12, E (`Bad_token (" ", "attribute", "whitespace not allowed here")); 1, 1, S (`Start (tag "foo" ["bar", "baz"])); 1, 20, S `EOF]); ("xml.tokenizer.bad-attribute-value" >:: fun _ -> expect "<foo bar>" [ 1, 6, E (`Bad_token ("bar", "attribute", "has no value")); 1, 1, S (`Start (tag "foo" ["bar", ""])); 1, 10, S `EOF]; expect "<foo bar=>" [ 1, 6, E (`Bad_token ("bar", "attribute", "has no value")); 1, 1, S (`Start (tag "foo" ["bar", ""])); 1, 11, S `EOF]; expect "<foo bar=baz blah=''>" [ 1, 10, E (`Bad_token ("b", "attribute", "unquoted value")); 1, 1, S (`Start (tag "foo" ["bar", "baz"; "blah", ""])); 1, 22, S `EOF]; expect "<foo bar= baz>" [ 1, 10, E (`Bad_token (" ", "attribute", "whitespace not allowed here")); 1, 11, E (`Bad_token ("b", "attribute", "unquoted value")); 1, 1, S (`Start (tag "foo" ["bar", "baz"])); 1, 15, S `EOF]; expect "<foo bar=&amp;>" [ 1, 10, E (`Bad_token ("&", "attribute", "unquoted value")); 1, 1, S (`Start (tag "foo" ["bar", "&"])); 1, 16, S `EOF]; expect "<foo bar=&>" [ 1, 10, E (`Bad_token ("&", "attribute", "unquoted value")); 1, 11, E (`Bad_token (">", "reference", "invalid start character")); 1, 10, E (`Bad_token ("&", "attribute", "replace with '&amp;'")); 1, 1, S (`Start (tag "foo" ["bar", "&"])); 1, 12, S `EOF]; expect "<foo bar='<' baz=<>" [ 1, 11, E (`Bad_token ("<", "attribute", "replace with '&lt;'")); 1, 18, E (`Bad_token ("<", "attribute", "unquoted value")); 1, 18, E (`Bad_token ("<", "attribute", "replace with '&lt;'")); 1, 1, S (`Start (tag "foo" ["bar", "<"; "baz", "<"])); 1, 20, S `EOF]; expect "<foo bar='baz" [ 1, 14, E (`Unexpected_eoi "attribute value"); 1, 1, S (`Start (tag "foo" ["bar", "baz"])); 1, 14, E (`Unexpected_eoi "tag"); 1, 14, S `EOF]); ("xml.tokenizer.processing-instruction" >:: fun _ -> expect "text<?target content?>text" [ 1, 1, S (`Chars ["text"]); 1, 5, S (`PI ("target", "content")); 1, 23, S (`Chars ["text"]); 1, 27, S `EOF]; (* Disabled to avoid trigraph warning from the C compiler. *) (* expect "<?target content ? ??>" [ 1, 1, S (`PI ("target", " content ? ?")); 1, 24, S `EOF]; *) expect "<?target &amp;<?>" [ 1, 1, S (`PI ("target", "&amp;<")); 1, 18, S `EOF]); ("xml.tokenizer.bad-processing-instruction" >:: fun _ -> expect "<? target content?>" [ 1, 3, E (`Bad_token (" ", pi, "whitespace not allowed here")); 1, 1, S (`PI ("target", "content")); 1, 20, S `EOF]; expect "<?&amp; content?>" [ 1, 3, E (`Bad_token ("&", pi, "invalid start character")); 1, 7, E (`Bad_token (";", pi, "invalid name character")); 1, 1, S (`PI ("&amp;", "content")); 1, 18, S `EOF]; (* Disabled to avoid trigraph warning from the C compiler. *) (* expect "<??>" [ 1, 1, E (`Bad_token ("<?...", pi, "empty")); 1, 5, S `EOF] *)); ("xml.tokenizer.unterminated-processing-instruction" >:: fun _ -> expect "<?" [ 1, 3, E (`Unexpected_eoi pi); 1, 1, E (`Bad_token ("<?...", pi, "empty")); 1, 3, S `EOF]; expect "<?target" [ 1, 9, E (`Unexpected_eoi pi); 1, 1, S (`PI ("target", "")); 1, 9, S `EOF]; expect "<?target content" [ 1, 17, E (`Unexpected_eoi pi); 1, 1, S (`PI ("target", "content")); 1, 17, S `EOF]; expect "<?target content?" [ 1, 18, E (`Unexpected_eoi pi); 1, 1, S (`PI ("target", "content")); 1, 18, S `EOF]); ("xml.tokenizer.xml-declaration" >:: fun _ -> expect "<?xml version='1.0'?>" [ 1, 1, S (xml_decl "1.0" None None); 1, 22, S `EOF]; expect "<?xml version='1.1' encoding=\"utf-8\" standalone='yes' ?>" [ 1, 1, S (xml_decl "1.1" (Some "utf-8") (Some true)); 1, 60, S `EOF]; expect "<?xml version='1.2' standalone='no'?>" [ 1, 1, S (xml_decl "1.2" None (Some false)); 1, 38, S `EOF]); ("xml.tokenizer.case-mismatch-in-xml-declaration" >:: fun _ -> expect "<?XmL VeRsIoN='1.0' ENCODING='utf-8' sTaNdAlOnE='YeS'?>" [ 1, 1, E (`Bad_token ("XmL", xml, "must be 'xml'")); 1, 7, E (`Bad_token ("VeRsIoN", xml, "must be 'version'")); 1, 21, E (`Bad_token ("ENCODING", xml, "must be 'encoding'")); 1, 38, E (`Bad_token ("sTaNdAlOnE", xml, "must be 'standalone'")); 1, 38, E (`Bad_token ("YeS", xml, "must be 'yes' or 'no'")); 1, 1, S (xml_decl "1.0" (Some "utf-8") (Some true)); 1, 56, S `EOF]); ("xml.tokenizer.missing-xml-version" >:: fun _ -> expect "<?xml?>" [ 1, 1, E (`Bad_token ("<?xml...", xml, "missing version")); 1, 1, S (xml_decl "1.0" None None); 1, 8, S `EOF]; expect "<?xml ?>" [ 1, 1, E (`Bad_token ("<?xml...", xml, "missing version")); 1, 1, S (xml_decl "1.0" None None); 1, 9, S `EOF]; expect "<?XmL?>" [ 1, 1, E (`Bad_token ("XmL", xml, "must be 'xml'")); 1, 1, E (`Bad_token ("<?xml...", xml, "missing version")); 1, 1, S (xml_decl "1.0" None None); 1, 8, S `EOF]; expect "<?xml standalone='yes'?>" [ 1, 1, E (`Bad_token ("<?xml...", xml, "missing version")); 1, 1, S (xml_decl "1.0" None (Some true)); 1, 25, S `EOF]); ("xml.tokenizer.version-not-first" >:: fun _ -> expect "<?xml standalone='yes' version='1.0'?>" [ 1, 24, E (`Bad_token ("version", xml, "must be first")); 1, 1, S (xml_decl "1.0" None (Some true)); 1, 39, S `EOF]); ("xml.tokenizer.bad-version" >:: fun _ -> expect "<?xml version='2.0'?>" [ 1, 7, E (`Bad_token ("2.0", xml, "must match 1.x")); 1, 1, S (xml_decl "2.0" None None); 1, 22, S `EOF]); ("xml.tokenizer.bad-standalone" >:: fun _ -> expect "<?xml version='1.0' standalone='maybe'?>" [ 1, 21, E (`Bad_token ("maybe", xml, "must be 'yes' or 'no'")); 1, 1, S (xml_decl "1.0" None None); 1, 41, S `EOF]; expect "<?xml version='1.0' standalone='yes' encoding='utf-8'?>" [ 1, 21, E (`Bad_token ("standalone", xml, "must come after 'encoding'")); 1, 1, S (xml_decl "1.0" (Some "utf-8") (Some true)); 1, 56, S `EOF]); ("xml.tokenizer.junk-in-xml-declaration" >:: fun _ -> expect "<?xml version='1.0' foo='bar'?>" [ 1, 21, E (`Bad_token ("foo", xml, "not allowed here")); 1, 1, S (xml_decl "1.0" None None); 1, 32, S `EOF]); ("xml.tokenizer.unterminated-xml-declaration" >:: fun _ -> expect "<?xml " [ 1, 7, E (`Unexpected_eoi xml); 1, 1, E (`Bad_token ("<?xml...", xml, "missing version")); 1, 1, S (xml_decl "1.0" None None); 1, 7, S `EOF]; expect "<?xml version='1.0' " [ 1, 21, E (`Unexpected_eoi xml); 1, 1, S (xml_decl "1.0" None None); 1, 21, S `EOF]); ("xml.tokenizer.large-text" >:: fun _ -> with_text_limit 8 begin fun () -> expect "foobar" [ 1, 1, S (`Chars ["foobar"]); 1, 7, S `EOF]; expect "foobarbaz" [ 1, 1, S (`Chars ["foobarba"; "z"]); 1, 10, S `EOF] end) ] �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������markup.ml-1.0.3/test/test_xml_writer.ml�������������������������������������������������������������0000664�0000000�0000000�00000014626�14213577064�0020207�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������(* This file is part of Markup.ml, released under the MIT license. See LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *) open OUnit2 open Test_support open Markup__Common let no_prefixes = fun _ -> None let expect ?(prefix = no_prefixes) id signals strings = let report, iterate, ended = expect_strings id strings in signals |> Markup__Kstream.of_list |> Markup__Xml_writer.write report prefix |> iter iterate; ended () let tests = [ ("xml.writer.empty" >:: fun _ -> expect "empty" [] []); ("xml.writer.text" >:: fun _ -> expect "text" [`Text ["foo"]] [S "foo"]; expect "adjacent text" [`Text ["foo"]; `Text ["bar"]] [S "foo"; S "bar"]; expect "empty text" [`Text [""]] []); ("xml.writer.text-escaping" >:: fun _ -> expect "text escaping" [`Text ["<foo&bar>"]] [S "&lt;foo&amp;bar&gt;"]); ("xml.writer.xml-declaration" >:: fun _ -> expect "version only" [`Xml {version = "1.0"; encoding = None; standalone = None}] [S "<?xml"; S " "; S "version"; S "=\""; S "1.0"; S "\""; S "?>"]; expect "encoding" [`Xml {version = "1.0"; encoding = Some "utf-8"; standalone = None}] [S "<?xml"; S " "; S "version"; S "=\""; S "1.0"; S "\""; S " "; S "encoding"; S "=\""; S "utf-8"; S "\""; S "?>"]; expect "standalone: yes" [`Xml {version = "1.0"; encoding = None; standalone = Some true}] [S "<?xml"; S " "; S "version"; S "=\""; S "1.0"; S "\""; S " "; S "standalone"; S "=\""; S "yes"; S "\""; S "?>"]; expect "standalone: no" [`Xml {version = "1.0"; encoding = None; standalone = Some false}] [S "<?xml"; S " "; S "version"; S "=\""; S "1.0"; S "\""; S " "; S "standalone"; S "=\""; S "no"; S "\""; S "?>"]; expect "encoding and standalone" [`Xml {version = "1.0"; encoding = Some "utf-8"; standalone = Some false}] [S "<?xml"; S " "; S "version"; S "=\""; S "1.0"; S "\""; S " "; S "encoding"; S "=\""; S "utf-8"; S "\""; S " "; S "standalone"; S "=\""; S "no"; S "\""; S "?>"]); ("xml.writer.doctype" >:: fun _ -> let doctype = {doctype_name = None; public_identifier = None; system_identifier = None; raw_text = Some "html"; force_quirks = false} in expect "doctype" [`Doctype doctype] [S "<!DOCTYPE "; S "html"; S ">"]); ("xml.writer.processing-instruction" >:: fun _ -> expect "processing instruction" [`PI ("foo", "bar")] [S "<?"; S "foo"; S " "; S "bar"; S "?>"]); ("xml.writer.comment" >:: fun _ -> expect "comment" [`Comment "foo"] [S "<!--"; S "foo"; S "-->"]); ("xml.writer.element" >:: fun _ -> expect "self-closing element" [`Start_element (("", "foo"), []); `End_element] [S "<"; S "foo"; S "/>"]; expect "element with text" [`Start_element (("", "foo"), []); `Text ["bar"]; `End_element] [S "<"; S "foo"; S ">"; S "bar"; S "</"; S "foo"; S ">"]; expect "nested elements" [`Start_element (("", "foo"), []); `Start_element (("", "bar"), []); `Start_element (("", "baz"), []); `End_element; `End_element; `End_element] [S "<"; S "foo"; S ">"; S "<"; S "bar"; S ">"; S "<"; S "baz"; S "/>"; S "</"; S "bar"; S ">"; S "</"; S "foo"; S ">"]); ("xml.writer.attribute" >:: fun _ -> expect "attribute" [`Start_element (("", "foo"), [("", "bar"), "baz"; ("", "blah"), "lulz"]); `End_element] [S "<"; S "foo"; S " "; S "bar"; S "=\""; S "baz"; S "\""; S " "; S "blah"; S "=\""; S "lulz"; S "\""; S "/>"]); ("xml.writer.attribute-escaping" >:: fun _ -> expect "attribute escaping" [`Start_element (("", "foo"), [("", "bar"), "<baz>&'\""]); `End_element] [S "<"; S "foo"; S " "; S "bar"; S "=\""; S "&lt;baz&gt;&amp;&apos;&quot;"; S "\""; S "/>"]); ("xml.writer.namespace" >:: fun _ -> expect "default namespace" [`Start_element (("some_ns", "foo"), [(xmlns_ns, "xmlns"), "some_ns"; ("", "bar"), "baz"]); `Start_element (("some_ns", "quux"), []); `End_element; `End_element] [S "<"; S "foo"; S " "; S "xmlns"; S "=\""; S "some_ns"; S "\""; S " "; S "bar"; S "=\""; S "baz"; S "\""; S ">"; S "<"; S "quux"; S "/>"; S "</"; S "foo"; S ">"]; expect "prefix" [`Start_element (("some_ns", "foo"), [(xmlns_ns, "a"), "some_ns"; ("some_ns", "bar"), "baz"]); `Start_element (("some_ns", "quux"), []); `End_element; `End_element] [S "<"; S "a:foo"; S " "; S "xmlns:a"; S "=\""; S "some_ns"; S "\""; S " "; S "a:bar"; S "=\""; S "baz"; S "\""; S ">"; S "<"; S "a:quux"; S "/>"; S "</"; S "a:foo"; S ">"]; expect "shadowing" [`Start_element (("", "foo"), [(xmlns_ns, "a"), "some_ns"]); `Start_element (("some_ns", "bar"), [(xmlns_ns, "a"), "other_ns"]); `End_element; `End_element] [S "<"; S "foo"; S " "; S "xmlns:a"; S "=\""; S "some_ns"; S "\""; S ">"; E (`Bad_namespace "some_ns"); S "<"; S "bar"; S " "; S "xmlns:a"; S "=\""; S "other_ns"; S "\""; S "/>"; S "</"; S "foo"; S ">"]; expect "shadowing resolution" [`Start_element (("", "foo"), [(xmlns_ns, "a"), "some_ns"; (xmlns_ns, "b"), "some_ns"]); `Start_element (("some_ns", "bar"), [(xmlns_ns, "a"), "other_ns"]); `End_element; `End_element] [S "<"; S "foo"; S " "; S "xmlns:a"; S "=\""; S "some_ns"; S "\""; S " "; S "xmlns:b"; S "=\""; S "some_ns"; S "\""; S ">"; S "<"; S "b:bar"; S " "; S "xmlns:a"; S "=\""; S "other_ns"; S "\""; S "/>"; S "</"; S "foo"; S ">"]); ("xml.writer.top-level-namespace" >:: fun _ -> let prefix = function | "some_ns" -> Some "a" | _ -> None in expect ~prefix "top-level namespace" [`Start_element (("some_ns", "foo"), []); `End_element] [S "<"; S "a:foo"; S "/>"]; expect ~prefix "top-level namespace shadowed" [`Start_element (("some_ns", "foo"), [(xmlns_ns, "a"), "other_ns"]); `End_element] [E (`Bad_namespace "some_ns"); S "<"; S "foo"; S " "; S "xmlns:a"; S "=\""; S "other_ns"; S "\""; S "/>"]) ] ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������