pax_global_header00006660000000000000000000000064146021277340014520gustar00rootroot0000000000000052 comment=9c03446ae7d26a92300c787b7c283dd899e3a161 detox-2.0.0/000077500000000000000000000000001460212773400126425ustar00rootroot00000000000000detox-2.0.0/.circleci/000077500000000000000000000000001460212773400144755ustar00rootroot00000000000000detox-2.0.0/.circleci/config.yml000066400000000000000000000056401460212773400164720ustar00rootroot00000000000000version: 2.1 executors: docker: # Docker using the Base Convenience Image docker: - image: cimg/base:stable linux: # a Linux VM running Ubuntu 20.04 machine: image: ubuntu-2004:current # Define a job to be invoked later in a workflow. jobs: build: parameters: os: type: executor executor: << parameters.os >> steps: - checkout - run: name: "autoreconf" command: "autoreconf --install" - run: name: "configure" command: "./configure" - run: name: "make" command: "make" - run: name: "simple test" command: "./bin/simple-test.sh" - run: name: "install" command: "sudo make install" build-macos: macos: # macos executor running Xcode xcode: 12.5.1 steps: - checkout - run: name: "install missing packages" command: "HOMEBREW_NO_AUTO_UPDATE=1 brew install pkg-config coreutils" - run: name: "autoreconf" command: "autoreconf --install" - run: name: "configure" command: "./configure" - run: name: "make" command: "make" - run: name: "simple test" command: "./bin/simple-test.sh" - run: name: "install" command: "sudo make install" unit-test: docker: - image: cimg/base:stable steps: - checkout - run: name: "install" command: "sudo apt update && sudo apt install -y check" - run: name: "autoreconf" command: "autoreconf --install" - run: name: "configure" command: "./configure --with-check" - run: name: "make" command: "make" - run: name: "check" command: "make check" - run: name: "distcheck" command: "make distcheck" maintainer-clean: docker: - image: cimg/base:stable steps: - checkout - run: name: "install" command: "sudo apt update && sudo apt install -y flex bison" - run: name: "autoreconf" command: "autoreconf --install" - run: name: "configure" command: "./configure" - run: name: "maintainer clean" command: "make maintainer-clean" - run: name: "autoreconf" command: "autoreconf --install" - run: name: "configure" command: "./configure" - run: name: "make" command: "make" - run: name: "check" command: "make check" - run: name: "distcheck" command: "make distcheck" workflows: workflow: jobs: - build: matrix: parameters: os: ["docker", "linux"] - build-macos - unit-test - maintainer-clean detox-2.0.0/.editorconfig000066400000000000000000000013171460212773400153210ustar00rootroot00000000000000# EditorConfig spec: https://editorconfig.org # top-most EditorConfig file root = true # Unix-style newlines with a newline ending every file [**] charset = utf-8 end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true # 4 space indentation for chilly [**.c] indent_size = 4 indent_style = space [**.h] indent_size = 4 indent_style = space [**.l] indent_size = 4 indent_style = space [**.y] indent_size = 4 indent_style = space # Tab indentation for automake, autoconf, shell, and system config files [**.am] indent_style = tab [**.ac] indent_style = tab [**.sh] indent_style = tab [etc/detoxrc] indent_style = tab # 2 spaces for YAML files [**.yml] indent_style = space indent_size = 2 detox-2.0.0/.gitignore000066400000000000000000000007431460212773400146360ustar00rootroot00000000000000# Maintainer files *.bak *.orig *.patch *.rej *~ .idea # Automake / Autoconf aclocal.m4 autom4te.cache/ compile config.h config.log config.status configure depcomp .deps/ install-sh Makefile Makefile.in missing stamp-h1 test-driver ylwrap # Debugging Cruft *.i *.s a.out # Built Objects *.o # Built Packages /detox-*.tar.gz /detox-*.tar.bz2 /detox-*.zip # Coverage files *.gcda *.gcno *.gcov /coverage.info /coverage/ mallocfail_hashes # CLion cmake-build-debug CMakeLists.txt detox-2.0.0/BUILD.md000066400000000000000000000054751460212773400140360ustar00rootroot00000000000000# Build Instructions For general instructions, please see the build instructions in `README.md`. # Development Instructions ## Tools Used Basic Development: - autoconf - automake - bash - bison or yacc - flex - gcc - make - php - pkg-config or pkgconf Linting: - astyle - cppcheck - mandoc - sparse Testing: - check - lcov - printf - sed - strace - valgrind ## Rebuild Internals ```bash make make internals ``` ## Code and Content Formatting Check code formatting: ```bash astyle --style=kr --indent-switches --add-braces --pad-oper --pad-header $(ls src/*.[ch] | grep -E -v 'config_file_(lex|yacc)') ``` Check formatting on man pages: ```bash mandoc -T lint man/*.[15] ``` Confirm tests reference correct GitHub issue: ```bash grep -ri github tests/legacy/ | sed -e s'/[^0-9 ]//g' -e s'/ \+/ /g' -e s'/^0\+//' ``` ## Debugging ```bash ./configure --enable-debug --with-check make clean make make check cat tests/unit/*.log ``` ## Testing ### Unit and Regression Testing ```bash ./configure --with-check make clean make make check ``` ### Static Analysis ```bash make clean make cppcheck src/*.[ch] sparse src/*.[ch] ``` ### Code Coverage ```bash ./configure --with-coverage --with-check make clean make make check make coverage ``` A directory, `coverage/`, will be created. Open index.html to see the code coverage report. One of the following might work: ```bash gio open coverage/index.html # or firefox coverage/index.html # or chromium coverage/index.html ``` #### Code Coverage - Unit Tests Only ```bash ./configure --with-coverage --with-check make clean make ( cd tests/unit/ && make check ) make coverage ``` #### Code Coverage - Legacy Tests Only ```bash ./configure --with-coverage --with-check make clean make ( cd tests/legacy/ && make check ) make coverage ``` #### Caveats For some reason, if you pass `-ftest-coverage` to gcc when you're linking objects into an executable, whichever object is first on the command line will have its `.gcno` file wiped out. There is a hack in `src/Makefile.am` that puts `-ftest-coverage` on the `DEFS` variable. # Release Instructions ## Release 1. Update version at head of `CHANGELOG.md`. 2. Update version link at footer of `CHANGELOG.md`. 3. Update version in `configure.ac`. 4. Commit and push. ```bash git diff git add -A git commit -m "Release v1.4.0" git tag git tag v1.4.0 git push git push --tags ``` 5. Build tarballs. ```bash TAR_OPTIONS="--owner=0 --group=0 --numeric-owner" export TAR_OPTIONS make dist make dist-bzip2 make dist-zip ``` 6. Create a new release on GitHub, using the rendered contents of the CHANGELOG, and attaching the release files. 7. Create a new release on Sourceforge, using the same items. 8. Update the "latest stable" version in README.md, if appropriate. 9. Play [Alwa's Legacy]. [Alwa's Legacy]: https://eldenpixels.com/alwas-legacy/ detox-2.0.0/CHANGELOG.md000066400000000000000000000317001460212773400144540ustar00rootroot00000000000000# CHANGELOG All notable changes to this project will be documented in this file. For releases after 1.3.0, the format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [2.0.0] - 2024-03-30 ### Added - Look for detoxrc in `$XDG_CONFIG_HOME`. [#101] ### Changed - Enabling verbose mode is now done with either `-v` or `--verbose`. - Renamed `.ts` files to `.template` so they don't appear as TypeScript files. ### Merged - detox v1.4.5 ## [2.0.0-beta2] - 2021-08-14 ### Fixed - `inline-detox` no longer chokes when a stream doesn't end in a newline. [#74] - Compilation works under msys2. [#80] ### Merged - detox v1.4.4 - detox v1.4.3 - detox v1.4.2 ## [2.0.0-beta1] - 2021-03-05 ### Added - Added a new transliteration table, `unidecode.tbl`, based on [Text:Unidecode]. [#47] [#53] - A new config file statement telling `detox` to use a builtin table. [#28] [#50] ### Changed - BREAKING CHANGE: Transliteration no longer happens by default. To emulate the old behavior, use `detox -s utf_8`. [#21] - PACKAGE MAINTAINERS: The default config file and translation tables no longer end with `.sample`. [#59] - Builtin config file now matches the default supplied one. [#42] - Builtin translation tables are generated from `table/*.tbl`. [#21] [#29] - Cleaned up man pages. PDF versions are automatically generated. [#22] - Code uses spaces instead of tabs. [#44] - CP-1252 transliteration is now done via a separate table. [#48] - Files and directories starting with a period are ignored during recursion. [#64] - The default config file enables `remove_trailing` in the `wipeup` filter. [#43] - The default config file now explicitly uses the builtin statement. [#28] [#50] - The `max_length` filter no longer acts like the wipeup filter. Excess periods are not reduced within this filter. [#46] - The UTF-8 filter no longer behaves like the safe filter. All characters between 0x20 and 0x7E are preserved. [#40] ## Removed - Removed obsolete BUGS from man pages. One caveat has been moved to CAVEATS. [#37] - The deprecated command line option `--remove-trailing` is now removed. Use the sequence `wipeup { remove_trailing; };` instead. [#24] ### Fixed - Numerous internal bugs and inconsistencies. [#21] [#31] [#41] - The `max_length` filter recognizes files with two extensions. [#46] - The `safe` filter converts all ASCII control characters to `_`. [#21] [#29] - The `safe` filter ignores characters between 0x80 and 0xFF. [#21] [#29] - The `uncgi` filter converts `+` to a space as expected. [#72] - The `utf_8` filter no longer eats a byte if an invalid Unicode sequence is encountered. [#72] ### Security - Added additional compiler protection flags. [#31] - Symlinks that point at directories are no longer followed when `--special` and `-r` are specified together. [#23] - UTF-8 encoded NULL (0x0000) values are converted to `_hidden_null_` to make them obvious. [#40] ## [1.4.5] - 2021-08-15 ### Fixed - Autoconf macros have been updated to support 2.70 changes. [#82] ## [1.4.4] - 2021-08-14 ### Fixed - Add explicit large file support via autoconf. [#81] ## [1.4.3] - 2021-07-24 ### Fixed - Fixed build when the `stat` struct is missing `st_blocks`. [#77] ## [1.4.2] - 2021-03-06 ### Fixed - Replaced instances of `cp -an` with `test` and `install` in the Makefile rule that copies `yyz.sample` to `yyz`. [#73] - Fixed `make distcheck`. [#73] ## [1.4.1] - 2021-02-20 ### Fixed - Fixed a memory overflow bug while reading files from the command line, using a patch from David Tardon, which was passed on by UsernameRandomlyGenerated. [#56] [#sf-patch-3] ## [1.4.0] - 2021-02-11 ### Added - Regression tests for basic functionality, based on old custom scripts. - Regression tests confirming fixes for previously fixed issues: [#14], [#19]. ### Changed - Removed one check for `.` and `..` when traversing a directory tree. [#12] - Regenerated config file parser. - Updated the safe filter to translate new lines, carriage returns, and tabs into underscores. [#9] [#11] [#17] ### Fixed - The examples in `detox.1` no longer say `-c` when they mean `-f`. [#30] - The command synopsis in `detox.1` and `inline-detox.1` no longer adds a dash before the `sequence` and `configfile`. [#30] ## [1.3.3] - 2021-02-03 ### Fixed - Fix version identifier in `detox` binary. ## [1.3.2] - 2021-01-31 ### Fixed - Table based UTF-8 translation no longer mangles characters. [#14] ## [1.3.1] - 2021-01-30 ### Fixed - Merged fix for Debian #861537, written by Vasily Kolobkov, passed on by Zenaan Harkness, Quentin Guittard, and Joao Eriberto Mota Filho. This addresses an issue with detox generating malformed characters during translation. [#14] ## [1.3.0] - 2017-03-04 - Migrated from `configure.in` and `Makefile.in` to the full autoconf suite. [#1] - Remove `detox_path.h`, in favor of command line defines. [#1] - Removed `libpopt` support. [#2] - Fixed the way `inline-detox` is generated. [#6] - Merged `parse_option_*.[ch]` and `file*.[ch]`. [#1], [#2], [#6] - Added `--inline` as an option to `detox`, to enable inline mode on the main binary. [#6] ## [1.2.1] - 2017-02-27 - Migrated documents to Markdown for better presentation on github. - Applied Debian patch `01-make-upstream-makefiles-parallel-build-safe.patch`, written by Patrick Schoenfeld and updated by Joao Eriberto Mota Filho. This adds additional variables to the `Makefile` for safe parallel builds and GCC hardening. - Applied Debian patch `02-fix-wrong-use-of-hyphens-in-manpage.patch`, written by Patrick Schoenfeld and updated by Joao Eriberto Mota Filho. This fixes an errant "-" in the manpage, and corrects a spelling mistake. - Applied Debian patch `03-remove-build-instructions-from-upstream-readme.patch`, written by Patrick Schoenfeld, in spirit. I had already converted the `README` to `README.md`, so it did not apply. I moved the compilation instructions into a new file, BUILD.md, instead. - Applied Debian patch `04-change-default-sequence-to-use-utf8-table.patch`, written by Teemu Likonen. This changes the default character set from ISO 8859-1 to UTF-8. - Applied Debian patch `05-install-missing-file.patch`, written by Nelson A. de Oliveira. This ensures that the `safe.tbl` file gets installed during `make install` (`make install-safe-config`). - Applied Debian patch `06-fix-arguments.patch`, written by Joao Eriberto Mota Filho. This fixes several calls to printf that were causing `-Werror=format-security` to fail. - Removed CVS `$Id$` tags and updated copyright. - Added `inline-detox.1`, from the Debian package, adapted from `detox.1` by Patrick Schoenfeld. - Updated `configure` script from GNU Autoconf 2.61 to 2.69. - Updated config file parsers; `flex` goes from 5.33 to 6.0, `bison` goes from 2.3 to 3.0.4. - Added a minor work around to stop compiler noise regarding `yylex()`. ## [1.2.0] - 2008-04-12 - Modified the safe filter to use a translation table. - Modified the safe filter fallback (previous functionality) to operate without any special behavior. The wipeup filter now picks up where the safe filter left off. - Fixed the default permissions on install (files are 644 now). - Updated `libpopt` support to work on Linux under the PowerPC platform (chars are unsigned by default). - Included the generated `lex` and `yacc` files in the default package. - Added additional logic to allow files on case insensitive filesystems to have their case changed. - Added the ability to set locale specific translations in the translation tables. - Added German specific translations to the translation tables. - Added the ability to ignore specific files. [sourceforge.net tracker #1253826] - Fixed a bug where directories specified on the command line wouldn't get translated. [sourceforge.net tracker #1213623] - Added support for translating large files. [sourceforge.net tracker #1509493] - Added inline-detox for stream based detoxification. ## 1.1.1 - 2005-03-13 - Modified `Makefile` to support parallel builds. - Added `${DESTDIR}` to install paths, for Gentoo package builds. - Modified the install script to not overwrite existing configuration files or translation tables. - Modified the install script to install the config file and translation tables as `".sample"` as well as the working version, for all users, but in particular, to make patching the `Makefile` easier for the FreeBSD port. ## 1.1.0 - 2005-03-05 - Added lowercase filter. - Added `libpopt` support to facilitate long options on Darwin or Solaris. - Fixed some compiler gripes with `lex`/`yacc`. - Replaced the hardcoded `-ll` in `Makefile.in` with `@LEXLIB@`. ## 1.0.0 - 2004-08-08 - Added a new filter for translating UTF-8 encoded Unicode characters. - Added handling of configuration files for controlling what sequence filters are run in. - Added handling of loadable translation tables, so the user can control how the ISO 8859-1 and Unicode filters operate. - Added a new filter for trimming based on the max length. - Added command line options: -f set config file -L list sequences -n the same as --dry-run -s set sequence - Added handling for an environmental variable `DETOX_SEQUENCE`, which sets the default sequence name. - Translation of some Icelandic characters has changed. 0xd0, 0xde, 0xf0, 0xfe, the Icelandic characters for "Eth" and "Thorn" have been changed from "D", "Y", "o", "y" to "TH" and "th". - Fixed translation of 0xfc (u), 0xfd (y) and 0xff (y). - Added `.depend` generation to the `Makefile`. - Created more man pages (`detoxrc.5` and `detox.tbl.5`). ## 0.9.1 - 2004-07-15 - Added `-d` flag to install - Broke installation out into a script to handle differences between Solaris and BSD/Linux. - Added function check for `getopt_long`. ## 0.9.0 - 2004-02-16 - Initial release [Unreleased]: https://github.com/dharple/detox/compare/v2.0.0...main [2.0.0]: https://github.com/dharple/detox/compare/v2.0.0-beta2...v2.0.0 [2.0.0-beta2]: https://github.com/dharple/detox/compare/v2.0.0-beta1...v2.0.0-beta2 [2.0.0-beta1]: https://github.com/dharple/detox/compare/v1.4.1...v2.0.0-beta1 [1.4.5]: https://github.com/dharple/detox/compare/v1.4.4...v1.4.5 [1.4.4]: https://github.com/dharple/detox/compare/v1.4.3...v1.4.4 [1.4.3]: https://github.com/dharple/detox/compare/v1.4.2...v1.4.3 [1.4.2]: https://github.com/dharple/detox/compare/v1.4.1...v1.4.2 [1.4.1]: https://github.com/dharple/detox/compare/v1.4.0...v1.4.1 [1.4.0]: https://github.com/dharple/detox/compare/v1.3.3...v1.4.0 [1.3.3]: https://github.com/dharple/detox/compare/v1.3.2...v1.3.3 [1.3.2]: https://github.com/dharple/detox/compare/v1.3.1...v1.3.2 [1.3.1]: https://github.com/dharple/detox/compare/v1.3.0...v1.3.1 [1.3.0]: https://github.com/dharple/detox/compare/v1.2.1...v1.3.0 [1.2.1]: https://github.com/dharple/detox/compare/v1.2.0...v1.2.1 [1.2.0]: https://github.com/dharple/detox/releases/tag/v1.2.0 [#101]: https://github.com/dharple/detox/pull/101 [#82]: https://github.com/dharple/detox/issues/82 [#81]: https://github.com/dharple/detox/issues/81 [#80]: https://github.com/dharple/detox/issues/80 [#77]: https://github.com/dharple/detox/issues/77 [#74]: https://github.com/dharple/detox/issues/74 [#73]: https://github.com/dharple/detox/issues/73 [#72]: https://github.com/dharple/detox/issues/72 [#64]: https://github.com/dharple/detox/issues/64 [#59]: https://github.com/dharple/detox/issues/59 [#56]: https://github.com/dharple/detox/issues/56 [#53]: https://github.com/dharple/detox/issues/53 [#50]: https://github.com/dharple/detox/issues/50 [#48]: https://github.com/dharple/detox/issues/48 [#47]: https://github.com/dharple/detox/issues/47 [#46]: https://github.com/dharple/detox/issues/46 [#44]: https://github.com/dharple/detox/issues/44 [#43]: https://github.com/dharple/detox/issues/43 [#42]: https://github.com/dharple/detox/issues/42 [#41]: https://github.com/dharple/detox/issues/41 [#40]: https://github.com/dharple/detox/issues/40 [#37]: https://github.com/dharple/detox/issues/37 [#31]: https://github.com/dharple/detox/issues/31 [#30]: https://github.com/dharple/detox/issues/30 [#29]: https://github.com/dharple/detox/issues/29 [#28]: https://github.com/dharple/detox/issues/28 [#24]: https://github.com/dharple/detox/issues/24 [#23]: https://github.com/dharple/detox/issues/23 [#22]: https://github.com/dharple/detox/issues/22 [#21]: https://github.com/dharple/detox/issues/21 [#19]: https://github.com/dharple/detox/issues/19 [#17]: https://github.com/dharple/detox/issues/17 [#14]: https://github.com/dharple/detox/issues/14 [#12]: https://github.com/dharple/detox/issues/12 [#11]: https://github.com/dharple/detox/issues/11 [#9]: https://github.com/dharple/detox/issues/9 [#6]: https://github.com/dharple/detox/issues/6 [#2]: https://github.com/dharple/detox/issues/2 [#1]: https://github.com/dharple/detox/issues/1 [#sf-patch-3]: https://sourceforge.net/p/detox/patches/3/ [mikrosimage/detox]: https://github.com/mikrosimage/detox [Text::Unidecode]: https://metacpan.org/pod/Text::Unidecode detox-2.0.0/HACKING-v1.md000066400000000000000000000062151460212773400145600ustar00rootroot00000000000000# Hacking Detox v1.x ## Step 1 - Confirm you're using detox v1.x The instructions are slightly different for version 2. Run `detox -V` to see the version you're using. If you're using an early copy of version 2, please reach out to me via GitHub issues. ## Step 2 - Find Your Config Files Run `detox -L -v | head`. You'll see output that looks something like this: ``` sequence name: default (*) source file: /etc/detoxrc cleaner: utf_8 cleaner: safe cleaner: wipeup sequence name: iso8859_1 source file: /etc/detoxrc cleaner: iso8859_1 cleaner: safe cleaner: wipeup ``` The `source file:` line tells us where detox's config file is. Chances are the config file is `/etc/detoxrc` or `/usr/local/etc/detoxrc`. ## Step 3 - Find Your Translation Tables If your config file is `/etc/detoxrc`, your translation tables should be in `/usr/share/detox/`. If your config file is `/usr/local/etc/detoxrc`, your translation tables should be in `/usr/local/share/detox/`. ## Step 4 - Edit Your Translation Tables Note the `default` sequence above; this is the sequence that detox runs if you don't specify the sequence to run on the command line. The sequence above runs three filters. The first filter, `utf_8`, takes multibyte characters and transliterates them to 7-bit ASCII characters. The second filter, `safe`, is what translates characters to `-` or `_`. The third filter, `wipeup`, will deduplicate cases of `-` and `_`, and sometimes `.`. If you want to change the way detox cleans up characters, you'll want to edit the translation table `safe.tbl`, in the location you found. Edit `safe.tbl` in your favorite editor. For instance: ``` cd /usr/share/detox sudo cp safe.tbl original-safe.tbl sudo vi safe.tbl # or nano, emacs, gedit, whatever ``` ## Step 5 - Figuring Out What To Change The translation tables are simple in concept, but potentially overwhelming at first. For instance, this line turns a space into an underscore. ``` 0x20 _ # space ``` You can tell that it's operating on a space one of two ways. I've left comments next to any entry that's changed by default. For any entry that isn't changed, the value will be the same as its Latin-1 (ISO 8859-1) equivalent. For instance: ``` 0x36 6 ``` Leaves the number 6 alone. ## Step 6 - Making Changes To make detox change a space into a period, instead of an underscore, find the line that starts with 0x20 and change it to this: ``` 0x20 . # space ``` To make detox change the number 6 into the word six, find the line that starts with 0x36 and change it to this: ``` 0x36 six # 6 ``` ## Step 7 - Testing Save your changes and open a new terminal. Use `inline-detox` to test your change. If you don't have `inline-detox`, you can just run `detox --inline` and it'll do the same thing. ``` echo "this is the 6th test" | inline-detox ``` The old output would be: ``` this_is_the_6th_test ``` The new output will now be: ``` this.is.the.sixth.test ``` ## Conclusion Hopefully this helps you get started with changing the behavior of detox. If not, please reach out via GitHub issues. detox-2.0.0/LICENSE000066400000000000000000000027641460212773400136600ustar00rootroot00000000000000BSD 3-Clause License Copyright (c) 2004-2024, Doug Harple All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. detox-2.0.0/Makefile.am000066400000000000000000000020371460212773400147000ustar00rootroot00000000000000SUBDIRS = src tests dist_doc_DATA = \ BUILD.md \ CHANGELOG.md \ LICENSE \ README.md \ THANKS.md dist_man1_MANS = man/detox.1 man/inline-detox.1 dist_man5_MANS = man/detoxrc.5 man/detox.tbl.5 dist_sysconf_DATA = etc/detoxrc dist_pkgdata_DATA = \ table/cp1252.tbl \ table/iso8859_1.tbl \ table/safe.tbl \ table/unicode.tbl \ table/unidecode.tbl EXTRA_DIST = man/detox.1.pdf man/inline-detox.1.pdf man/detoxrc.5.pdf man/detox.tbl.5.pdf internals: bin/generate-builtin.sh || true bin/generate-legacy-tests.sh || true bin/generate-pdf.sh || true bin/generate-unit-tests.sh || true valgrind: USE_VALGRIND=1 $(top_srcdir)/tests/test.sh src/detox # # code coverage rules # if WITH_COVERAGE coverage: lcov --capture --directory src --output-file coverage.info genhtml coverage.info --output-directory coverage coverage-text: cd src && make coverage-text else coverage: echo "code coverage is not enabled; run ./configure --with-coverage" coverage-text: coverage endif # WITH_COVERAGE clean-local: rm -rf coverage.info coverage/ detox-2.0.0/README.md000066400000000000000000000073471460212773400141340ustar00rootroot00000000000000# Overview `detox` is a program that renames files to make them easier to work with under Unix and related operating systems. Spaces and various other unsafe characters (such as "`$`") get replaced with "`_`". ISO 8859-1 (Latin-1) characters can be transliterated to ASCII, as can UTF-8 characters. More details are contained in the `detox.1` [man page]. --- # Notice for Package Maintainers Version 2 adds a new dependency for package builds: `pkg-config` or `pkgconf`. You may also need to add a dependency on `libtool`. Please let me know if you do, by creating an [issue]. I'll update the docs accordingly. Also, the default config file and translation tables are no longer prefixed with `.sample`. Please open an [issue] for any build problems encountered. Thanks! # Notice for Everyone I have renamed the `master` branch to `main`. If you have a copy of detox checked out, you can update your code to point at the new branch using [these steps]. Alternatively, you can clone a fresh copy of the repo. --- # Runtime Notes The most important option to learn is `-n`, aka `--dry-run`. This will let you run `detox` without actually changing any files, so that you can get an idea of what `detox` is all about. The simplest way to run `detox` is to just run it on a directory containing files that need work: ``` detox xfer_files/ ``` You can also just to specify the filename: ``` detox my\ bad\ file.txt ``` You can also specify recursion (this works best on directories): ``` detox -r /music/transferred_from_elsewhere/ ``` # Building Detox Many distributions include a copy of detox with their packages. If you wish to build it from scratch, you'll need the following tools: - autoconf - automake - bison / yacc / byacc - flex / lex - gcc / clang - make - pkg-config / pkgconf ## Prerequisites To install the needed packages on Debian, Ubuntu, Mint, and other Debian derivatives, run: ``` sudo apt install autoconf automake bison flex gcc make pkg-config ``` To install the needed packages on macOS, run: ``` brew install autoconf automake bison flex gcc make pkg-config ``` On FreeBSD, run: ``` sudo pkg install autoconf automake gcc pkgconf wget ``` On NetBSD, run: ``` sudo pkgin install autoconf automake mozilla-rootcerts pkgconf wget sudo mozilla-rootcerts install ``` On MSYS2, run: ``` pacman -S --needed base-devel gcc git mingw-w64-x86_64-toolchain ``` ## Install Stable from GitHub Package To build a stable copy of `detox`, run: ``` wget https://github.com/dharple/detox/releases/download/v1.4.5/detox-1.4.5.tar.gz tar xzvf detox-1.4.5.tar.gz cd detox-1.4.5 ./configure make make install ``` If that fails during the configure or make steps, run: ``` autoreconf --install ./configure make make install ``` ## Install Stable from Repository To build a stable copy of `detox` from source, install the package `git`, then run: ``` git clone -b 1.x https://github.com/dharple/detox.git cd detox autoreconf --install ./configure make make install ``` ## Install Development from Repository To build a development copy of `detox` from source, install the package `git`, then run: ``` git clone -b main https://github.com/dharple/detox.git cd detox autoreconf --install ./configure make make install ``` ## Uninstall To remove a copy of `detox` that was installed via these methods, from the same directory that you ran `make install`, run: ``` make uninstall ``` # Contact For support, to report a defect, or to request a new feature, please use the [GitHub Issues system]. [GitHub Issues system]: https://github.com/dharple/detox/issues/ [issue]: https://github.com/dharple/detox/issues/ [man page]: https://raw.githubusercontent.com/dharple/detox/main/man/detox.1.pdf [these steps]: https://gist.github.com/dharple/79b51d1c2fc0fea64fb84659581a6dc9 detox-2.0.0/THANKS.md000066400000000000000000000065011460212773400141560ustar00rootroot00000000000000# Thanks *If I missed you, please let me know!* ninedotnine on github added the ability to load config files from `$XDG_CONFIG_HOME`. a1346054 on github contributed maintenance improvments. Thomas Klausner reminded me that `cp -n` does not work on NetBSD. Sean M. Burke for [Text::Unidecode], which helped with Unicode transliteration and became a new table, `unidecode.tbl`. Additionally, Behat's [PHP transliteration library] put Text::Unidecode in a format I was comfortable working with. K Lange's awesome `AC_C_COMPILE_FLAGS` macro helped me write my own, updated version of the same feature. Hans-Peter Jansen gave me the impetus I needed to finally fix the char / unsigned char differences throughout the code. Also, he provided a list of security-based compiler flags. David Tardon fixed a memory overflow bug, which was passed to me by UsernameRandomlyGenerated. Mplx let me know that `detox.1` had a typo in the examples. Daniel Hauck for letting me know that passing `.` as an argument was not working. Kanliot and Tyler Adams for reporting on issues with newlines and spaces causing problems. Thank you to Vasily Kolobkov, Zenaan Harkness, Quentin Guittard, Joao Eriberto Mota Filho, and Andrew Berezovskyi for the fix addressing the issue with detox generating malformed characters during a translation. Sanjaymsh added the PowerPC architecture to the Travis tests. Special thanks to Patrick Schoenfeld and Joao Eriberto Mota Filho, for maintaining the Debian version. Release v1.2.1 is almost entirely comprised of their patches. Many thanks to Miguel Angelo Rozsas for suggesting UTF-8 support and his valuable input following that suggestion. This change helped introduce translation tables into detox. Ciaran McCreesh put together a package for Gentoo (rock!), submitted a patch to add a prefix onto install paths (for package builds) and helped fix parallel builds. Lou Alfonso for suggesting that the safe filter be controlled through a table so that it can be tuned easily. Christoph Wegscheider noticed that the install script was installing everything 0755, for pointing out that not everyone has lex or yacc, and for his input on how umlauts should be converted. Gerg Thor informed me that characters on the PowerPC platform are unsigned by default and that the libpopt parser was rolling into an infinite loop as a result. Jon Amundsen pointed out that the lowercase converting sequence was only really working on case-insensitive filesystems. zero_dogg from sourceforge.net suggested adding the ability to prevent certain files from being translated. rsnemmen from sourceforge.net pointed out a bug in the way directories are handled when passed in from the command line. # Additional Thanks Eric S. Raymond for his work on "The Art of UNIX Programming" http://www.bbsinc.com/iso8859.html for their help in building a complete list of Latin-1 translations. SourceForge.net for their generous hosting of this and many other projects. http://en.wikipedia.org/wiki/UTF-8 for its help with explaining UTF-8. Data Structures Using C - Tenenbaum, Langsam and Augenstein - for their help with hashes. Paul Oakenfold for his unbelievable mixes. Version 1.0.0 was developed with the help of his Great Wall mix. [PHP transliteration library]: https://github.com/Behat/Transliterator [Text::Unidecode]: https://metacpan.org/pod/Text::Unidecode detox-2.0.0/bin/000077500000000000000000000000001460212773400134125ustar00rootroot00000000000000detox-2.0.0/bin/generate-builtin.sh000077500000000000000000000010161460212773400172050ustar00rootroot00000000000000#!/usr/bin/env bash # # Generate src/builtin_table.c # PROJECT_ROOT=$(dirname "$(dirname "$(realpath "$0")")") cd "$PROJECT_ROOT" || exit SRCDIR=$PROJECT_ROOT/src GENERATE=$SRCDIR/generate-builtin-table if [ ! -x "$GENERATE" ] ; then echo "please build $GENERATE first" exit 1 fi cp "$SRCDIR"/builtin_table.c.in "$SRCDIR"/builtin_table.c for TABLE in safe iso8859_1 unicode cp1252 ; do echo "process builtin $TABLE" $GENERATE "$PROJECT_ROOT"/table/$TABLE.tbl | sed -e"s/NEW/$TABLE/" >> "$SRCDIR"/builtin_table.c done detox-2.0.0/bin/generate-embedded-detoxrc.php000066400000000000000000000014131460212773400211110ustar00rootroot00000000000000 $maxLength) { $output[] = $build; $build = ''; } $build .= $line; } $output[] = $build; printf("static char *detoxrc =\n"); foreach ($output as $line) { printf(" \"%s\"\n", $line); } printf(" ;\n"); detox-2.0.0/bin/generate-legacy-tests.sh000077500000000000000000000011001460212773400201350ustar00rootroot00000000000000#!/usr/bin/env bash # # Generate legacy test fixtures from man pages # PROJECT_ROOT=$(dirname "$(dirname "$(realpath "$0")")") cd "$PROJECT_ROOT" || exit MANDIR=$PROJECT_ROOT/man TESTDIR=$PROJECT_ROOT/tests/legacy echo "process detoxrc.5 into regression test" sed -n '/START SAMPLE/,/END SAMPLE/p' "$MANDIR"/detoxrc.5 | sed -e's/^[.]." /# /' > "$TESTDIR"/man-page-example/detoxrc.detoxrc.5 echo "process detox.tbl.5 into regression test" sed -n '/START SAMPLE/,/END SAMPLE/p' "$MANDIR"/detox.tbl.5 | sed -e's/^[.]." /# /' > "$TESTDIR"/man-page-example/detoxrc.detox.tbl.5 detox-2.0.0/bin/generate-pdf.sh000077500000000000000000000006031460212773400163110ustar00rootroot00000000000000#!/usr/bin/env bash # # Generate PDFs from man pages # PROJECT_ROOT=$(dirname "$(dirname "$(realpath "$0")")") cd "$PROJECT_ROOT" || exit MANDIR=$PROJECT_ROOT/man MANDOC=$(command -v mandoc) if [ ! -x "$MANDOC" ] ; then echo "Please install mandoc" exit 1 fi for FILE in "$MANDIR"/*.[15] ; do echo "process man page $(basename "$FILE")" $MANDOC -T pdf "$FILE" > "$FILE".pdf done detox-2.0.0/bin/generate-unit-tests.sh000077500000000000000000000021031460212773400176540ustar00rootroot00000000000000#!/usr/bin/env bash # # Generate tests/unit/*.c from .ts # PROJECT_ROOT=$(dirname "$(dirname "$(realpath "$0")")") cd "$PROJECT_ROOT" || exit TESTDIR=$PROJECT_ROOT/tests/unit cd "$TESTDIR" || exit CHECKMK=$(command -v checkmk) if [ ! -x "$CHECKMK" ] ; then echo "Please install checkmk, possibly through the 'check' package, before using this" exit 1 fi BASE=/tmp/detoxtest/ if [ ! -d "$BASE" ] ; then mkdir "$BASE" fi WORK=$(realpath "$(mktemp -d "$BASE"/work-XXXXXX)") for FILE in *.template ; do INPUT=$(basename "$FILE") OUTPUT="${INPUT%.template}.c" echo -n "process check file $INPUT... " $CHECKMK "$INPUT" > "$WORK"/"$OUTPUT" sed -i -e 's/^ . Edit the original.*/\0\n * Run `make internals` from the base of the project to regenerate this file./' "$WORK"/"$OUTPUT" if [ ! -f "$OUTPUT" ] ; then echo "created" mv "$WORK"/"$OUTPUT" "$OUTPUT" else diff -q "$WORK"/"$OUTPUT" "$OUTPUT" > /dev/null 2>&1 if [ "$?" -eq "0" ] ; then echo rm "$WORK"/"$OUTPUT" else echo "updated" mv "$OUTPUT" "$OUTPUT".bak mv "$WORK"/"$OUTPUT" "$OUTPUT" fi fi done detox-2.0.0/bin/make-cp1252.sh000077500000000000000000000023221460212773400155770ustar00rootroot00000000000000#!/usr/bin/env bash # # This script generates the CP-1252 translation table based on a table found on # Wikipedia which lists Unicode equivalents for CP-1252 characters. # PROJECT_ROOT=$(dirname "$(dirname "$(realpath "$0")")") TABLEPATH="$PROJECT_ROOT/table" TABLE1=$TABLEPATH/unicode.tbl TABLE2=$TABLEPATH/unidecode.tbl START_HEX=0x0080 # # CP-1252 Translation - 0x0080-0x009F # # https://en.wikipedia.org/wiki/Windows-1252 # # This list was pulled from the above article on Wikipedia on 2021-02-22. # Simple regexes converted the lines to either "undef" or the first hex code in # the table. # CHARS=" 20AC undef 201A 0192 201E 2026 2020 2021 02C6 2030 0160 2039 0152 undef 017D undef undef 2018 2019 201C 201D 2022 2013 2014 02DC 2122 0161 203A 0153 undef 017E 0178 " CURRENT=$(printf "%d" "$START_HEX") for CHAR in $CHARS ; do CURRENT_HEX=$(printf "0x%04X" "$CURRENT") if [ "$CHAR" = "undef" ] ; then echo "# $CURRENT_HEX undef" else CHECK=$(grep -c 0x"$CHAR" "$TABLE1") if [ "$CHECK" -eq "1" ] ; then grep 0x"$CHAR" "$TABLE1" | sed -e"s/^0x[0-9A-F]\{4,\}/$CURRENT_HEX/" else echo -n "# " grep 0x"$CHAR" "$TABLE2" | sed -e"s/^0x[0-9A-F]\{4,\}/$CURRENT_HEX/" fi fi CURRENT=$((CURRENT + 1)) done detox-2.0.0/bin/mallocfail-wrapper.sh000077500000000000000000000043451460212773400175400ustar00rootroot00000000000000#!/bin/bash -x # # Wrapper around mallocfail # # This is still experimental and most likely will not work on your system. # PROJECT_ROOT=$(dirname "$(dirname "$(realpath "$0")")") SLEEP=0.5s DETOX="$PROJECT_ROOT/src/detox" if [ ! -f "$DETOX" ] ; then cd "$PROJECT_ROOT" || exit make fi # -------------------------------------------------------- # onboard mallocfail # -------------------------------------------------------- MALLOCFAIL=/tmp/detoxtest/mallocfail/ MALLOCFAILSO=$MALLOCFAIL/mallocfail.so if [ ! -d "$MALLOCFAIL" ] ; then cd /tmp || exit mkdir -p detoxtest cd detoxtest || exit git clone https://github.com/ralight/mallocfail.git cd mallocfail || exit make else if [ ! -f "$MALLOCFAILSO" ] ; then cd "$MALLOCFAIL" || exit make clean && make fi fi if [ ! -f "$MALLOCFAILSO" ] ; then echo "could not find $MALLOCFAILSO" exit 1 fi cd "$PROJECT_ROOT" || exit # -------------------------------------------------------- BASE="/tmp/detoxtest/$(date +"%Y%m%d")" if [ ! -d "$BASE" ] ; then mkdir -p "$BASE" fi WORK=$(realpath "$(mktemp -d "$BASE"/test-mallocfail-XXXXXX)") FAIL=$WORK/fail mkdir -p "$FAIL" MALLOCFAIL_FILE=$WORK/hashes.txt export MALLOCFAIL_FILE MALLOCFAIL_DEBUG=1 export MALLOCFAIL_DEBUG MALLOCFAIL_FAIL_COUNT=1 export MALLOCFAIL_FAIL_COUNT COUNT=0 while true ; do OUTPUT="$WORK/pass-$(printf "%04d" $COUNT).txt" echo "iteration $COUNT" # LD_PRELOAD=$MALLOCFAILSO $DETOX -L -v > $OUTPUT # LD_PRELOAD=$MALLOCFAILSO $DETOX -f "$PROJECT_ROOT"/etc/detoxrc -L -v > $OUTPUT # dmesg | LD_PRELOAD=$MALLOCFAILSO $DETOX -s utf_8-legacy -f "$PROJECT_ROOT"/etc/detoxrc --inline > $OUTPUT LD_PRELOAD=$MALLOCFAILSO $DETOX -s utf_8-legacy -f "$PROJECT_ROOT"/etc/detoxrc --dry-run --recursive /tmp > "$OUTPUT" EXIT=$? if [ "$EXIT" -eq "139" ] ; then cat "$OUTPUT" echo "segfault" cp "$OUTPUT" "$FAIL"/ sleep $SLEEP fi if [ "$EXIT" -eq "0" ] ; then cat "$OUTPUT" if [ "$(grep -ci "Start trace" "$OUTPUT")" -gt "0" ] ; then echo "malloc failed but the script didn't exit" if [ "$(grep -ci "stdio2/printf" "$OUTPUT")" -gt "0" ] ; then echo "printf caused this one..." fi cp "$OUTPUT" "$FAIL"/ sleep $SLEEP else echo "all tests passed" break fi fi COUNT=$((COUNT+1)) done ls -al "$FAIL" detox-2.0.0/bin/simple-test.sh000077500000000000000000000022371460212773400162230ustar00rootroot00000000000000#!/usr/bin/env bash # # Very simple test suite. # # Run: # bin/simple-test.sh # # Full Test Suite: # tests/test.sh src/detox # set -e PROJECT_ROOT=$(dirname "$(dirname "$(realpath "$0")")") DETOX="$PROJECT_ROOT/src/detox" DETOXRC="$PROJECT_ROOT/etc/detoxrc" if [ ! -x "$DETOX" ] ; then echo "please compile detox first" exit 1 fi echo -n "version: " $DETOX -V # ------------------------------------------- # just checking for errors $DETOX -L -v > /dev/null # ------------------------------------------ # still just checking for errors if [ -f "$DETOXRC" ] ; then $DETOX -f "$DETOXRC" -L -v > /dev/null else echo "couldn't find detoxrc" fi # ------------------------------------------- INPUT="hi there" OUTPUT="hi_there" CHECK=$(echo "$INPUT" | $DETOX --inline) if [ "$CHECK" != "$OUTPUT" ] ; then echo "failed to rename \"$INPUT\" to \"$OUTPUT\"" exit 1 fi # ------------------------------------------- INPUT="hi - - - there" OUTPUT="hi-there" CHECK=$(echo "$INPUT" | $DETOX --inline) if [ "$CHECK" != "$OUTPUT" ] ; then echo "failed to rename \"$INPUT\" to \"$OUTPUT\"" exit 1 fi # ------------------------------------------- echo "simple tests passed" detox-2.0.0/configure.ac000066400000000000000000000057271460212773400151430ustar00rootroot00000000000000# Process this file with autoconf to produce a configure script. AC_INIT([detox], [2.0.0], [detox.dharple at gmail.com], [], [https://github.com/dharple/detox]) AM_INIT_AUTOMAKE([foreign -Wall -Werror]) AC_PROG_CC AC_PROG_LEX([noyywrap]) AC_PROG_YACC AC_CHECK_FUNCS([getopt_long]) AC_STRUCT_ST_BLOCKS AC_SYS_LARGEFILE AC_CHECK_PROGS([MANDOC], [mandoc]) AM_CONDITIONAL([MANDOC_INSTALLED], [test -n "$MANDOC"]) AC_SEARCH_LIBS([ceil], [m]) # # References and reasons for compiler flags: # # https://github.com/klange/prboom/blob/master/autotools/ac_c_compile_flags.m4 # https://github.com/dharple/detox/issues/31 # https://www.keil.com/support/man/docs/armclang_ref/armclang_ref_cjh1548250046139.htm # https://developers.redhat.com/blog/2020/05/22/stack-clash-mitigation-in-gcc-part-3/ # https://gcc.gnu.org/onlinedocs/gccint/LTO-Overview.html # AC_DEFUN([AC_CHECK_CFLAG], [ HOLD="$CFLAGS" AC_MSG_CHECKING(whether compiler supports $1) CFLAGS="$HOLD $1 -Werror" AC_COMPILE_IFELSE( [AC_LANG_PROGRAM( [[]], [[]] )], [ HOLD="$CFLAGS" AC_MSG_RESULT(yes) ], [ AC_MSG_RESULT(no) ] ) CFLAGS="$HOLD" ]) AC_CHECK_CFLAG([[-flto=auto]]) AC_CHECK_CFLAG([[-fstack-clash-protection]]) AC_CHECK_CFLAG([[-fstack-protector-strong]]) # # Support for check unit tests # # # source: https://www.gnu.org/savannah-checkouts/gnu/autoconf/manual/autoconf-2.70/html_node/External-Software.html # AC_ARG_WITH([check], [AS_HELP_STRING([--with-check], [enable experimental support for check])], [], [with_check=no] ) AS_IF([test "x$with_check" != xno], [ PKG_CHECK_EXISTS([CHECK], [check >= 0.10.0], [ # the name of this macro is deceiving, it checks to see # if the module is available, and configures it PKG_CHECK_MODULES([CHECK], [check >= 0.10.0]) AC_DEFINE([HAVE_LIBCHECK], [1], [Define if you have libcheck]) ], [ AC_MSG_FAILURE([--with-check was given, but test for check failed]) ] ) ]) AM_CONDITIONAL([WITH_CHECK], [test "x$with_check" != xno]) # # Support for code coverage # AC_ARG_WITH([coverage], [AS_HELP_STRING([--with-coverage], [enable experimental support for coverage])], [], [with_coverage=no] ) AS_IF([test "x$with_coverage" != xno], [ AC_CHECK_CFLAG([[-fprofile-arcs]]) # see src/Makefile.am # AC_CHECK_CFLAG([[-ftest-coverage]]) AC_DEFINE([SUPPORT_COVERAGE], [1], [Define if you want to support coverage tests]) ]) AM_CONDITIONAL([WITH_COVERAGE], [test "x$with_coverage" != xno]) # # Support for debugging # AC_ARG_ENABLE([debug], [AS_HELP_STRING([--enable-debug], [Turn on debugging])], [case "${enableval}" in yes) debug=true ;; no) debug=false ;; *) AC_MSG_ERROR([bad value ${enableval} for --enable-debug]) ;; esac],[debug=false]) AM_CONDITIONAL([DEBUG], [test x$debug = xtrue]) AM_COND_IF([DEBUG], [ AC_DEFINE([DEBUG], [1], [Enables verbose debugging in key points]) ]); # # # AC_CONFIG_HEADERS([src/config.h]) AC_CONFIG_FILES([ Makefile src/Makefile tests/Makefile tests/legacy/Makefile tests/unit/Makefile ]) AC_OUTPUT detox-2.0.0/etc/000077500000000000000000000000001460212773400134155ustar00rootroot00000000000000detox-2.0.0/etc/detoxrc000066400000000000000000000036241460212773400150150ustar00rootroot00000000000000# # config file for detox(1) # # Remove problematic characters. # # # Default sequence. # sequence default { safe { builtin "safe"; }; wipeup { remove_trailing; }; }; # # Sequences meant primarily for detox # # transliterates ISO 8859-1 into ASCII sequence "iso8859_1" { iso8859_1 { builtin "iso8859_1"; }; safe { builtin "safe"; }; wipeup { remove_trailing; }; }; # transliterates CP-1252 and ISO8859-1 into ASCII sequence "iso8859_1-legacy" { iso8859_1 { builtin "cp1252"; }; iso8859_1 { builtin "iso8859_1"; }; safe { builtin "safe"; }; wipeup { remove_trailing; }; }; # transliterates UTF-8 into ASCII sequence "utf_8" { utf_8 { builtin "unicode"; }; safe { builtin "safe"; }; wipeup { remove_trailing; }; }; # transliterates CP-1252 and UTF-8 into ASCII # this is *very* rare sequence "utf_8-legacy" { utf_8 { builtin "cp1252"; }; utf_8 { builtin "unicode"; }; safe { builtin "safe"; }; wipeup { remove_trailing; }; }; # decodes CGI-escaped characters sequence "uncgi" { uncgi; safe { builtin "safe"; }; wipeup { remove_trailing; }; }; # converts files to lower case sequence "lower" { safe { builtin "safe"; }; lower; wipeup { remove_trailing; }; }; # # Sequences meant primarily for inline-detox # # transliterates ISO 8859-1 to ASCII sequence "iso8859_1-only" { iso8859_1 { builtin "iso8859_1"; }; }; # transliterates CP-1252 to ASCII sequence "cp1252-only" { iso8859_1 { builtin "cp1252"; }; }; # transliterates UTF-8 to ASCII sequence "utf_8-only" { utf_8 { builtin "unicode"; }; }; # decodes CGI-escaped characters sequence "uncgi-only" { uncgi; }; # converts files to lower case sequence "lower-only" { lower; }; # # Files to ignore during recursion (detox only) # # Any file or directory starting with '.' is automatically ignored except when # it is passed on the command-line. # ignore { filename "{arch}"; }; detox-2.0.0/man/000077500000000000000000000000001460212773400134155ustar00rootroot00000000000000detox-2.0.0/man/detox.1000066400000000000000000000110321460212773400146170ustar00rootroot00000000000000.\" .\" This file is part of the Detox package. .\" .\" Copyright (c) Doug Harple .\" .\" For the full copyright and license information, please view the LICENSE .\" file that was distributed with this source code. .\" .Dd February 24, 2021 .Dt DETOX 1 .Os .Sh NAME .Nm detox .Nd clean up filenames .Sh SYNOPSIS .Nm .Op Fl f Pa configfile .Op Fl n | -dry-run .Op Fl r .Op Fl s Ar sequence .Op Fl -special .Op Fl v .Ar .Nm .Op Fl L .Op Fl f Pa configfile .Op Fl v .Nm .Op Fl h | -help .Nm .Op Fl V .Sh DESCRIPTION The .Nm utility renames files to make them easier to work with under Unix and Unix-like operating systems. It replaces characters that make it hard to type out a filename with dashes and underscores. It also provides transliteration-based filters, converting ISO 8859-1 or UTF-8 to ASCII, in part or in whole. An additional filter unescapes CGI-escaped filenames. .Ss Sequences .Nm is driven by a configurable series of filters, called a sequence. Sequences are covered in more detail in .Xr detoxrc 5 and are discoverable with the .Fl L option. The default sequence will run the .Ar safe and .Ar wipeup filters. Other examples of pre-configured sequences are .Ar iso8859_1 and .Ar utf_8 , which both provide transliteration to ASCII and then finish with the .Ar safe and .Ar wipeup filters. .Ss Options .Bl -tag -width Fl .It Fl f Pa configfile Use .Pa configfile instead of the default configuration files for loading translation sequences. No other config file will be parsed. .It Fl h , -help Display helpful information. .It Fl -inline Run in inline mode. See .Xr inline-detox 1 for more details. .It Fl L List the currently available sequences. When paired with .Fl v this option shows what filters are used in each sequence and any properties applied to the filters. .It Fl n , -dry-run Doesn't actually change anything. This implies the .Fl v option. .It Fl r Recurse into subdirectories. Any file or directory that starts with a period, such as .Pa .git/ or .Pa .cache/ , will be ignored during recursion unless specified on the command line. Also, any file or directory specified in the ignore section of the config file will be ignored during recursion. .It Fl s Ar sequence Use .Ar sequence instead of .Cm default . .It Fl -special Works on special files (including links). Normally .Nm ignores these files. .Nm will not recurse into symlinks that point at directories. .It Fl v Be verbose about which files are being renamed. .It Fl V Show the current version of .Nm . .El .Sh FILES .Bl -tag -width Fl .It Pa /etc/detoxrc The system-wide detoxrc file. .It Pa ~/.detoxrc A user's personal detoxrc. Normally it extends the system-wide .Pa detoxrc , unless .Fl f has been specified, in which case, it is ignored. .It Pa /usr/share/detox/cp1252.tbl The provided CP-1252 transliteration table. .It Pa /usr/share/detox/iso8859_1.tbl The provided ISO 8859-1 transliteration table. .It Pa /usr/share/detox/safe.tbl The provided safe character translation table. .It Pa /usr/share/detox/unicode.tbl The provided Unicode transliteration table, used by the UTF-8 filter. .It Pa /usr/share/detox/unidecode.tbl An additional Unicode tranlsiteration table, based on .Xr Text::Unidecode 3pm . .El .Sh EXAMPLES .Bl -tag -width Fl .It Nm Fl s Ar lower Fl r Fl v Fl n Pa /tmp/new_files Will run the sequence .Ar lower recursively, listing any changes, without changing anything, on the files of .Pa /tmp/new_files . .It Nm Fl f Pa my_detoxrc Fl L Fl v Will list the sequences within .Pa my_detoxrc , showing their filters and options. .El .Sh SEE ALSO .Xr inline-detox 1 , .Xr Text::Unidecode 3pm , .Xr detox.tbl 5 , .Xr detoxrc 5 , .Xr ascii 7 , .Xr iso_8859-1 7 , .Xr unicode 7 , .Xr utf-8 7 .Sh HISTORY .Nm was originally designed to clean up files that I had received from friends which had been created using other operating systems. It's trivial to create a filename with spaces, parenthesis, brackets, and ampersands under some operating systems. These have special meaning within .Fx and Linux, and cause problems when you go to access them. I created .Nm to clean up these files. .Pp Version 2.0 stepped back from transliteration out of the box, instead focusing on ease of use. The primary motivations for this were user-provided feedback, and the fact that many modern Unix-like OSs use UTF-8 as their primary character set. Transliterating from UTF-8 to ASCII in this scenario is lossy and pointless. .Sh AUTHORS .Nm was written by .An Doug Harple . .Sh CAVEATS If, after the translation of a filename is finished, a file already exists with that same name, .Nm will not rename the file. detox-2.0.0/man/detox.1.pdf000066400000000000000000001007011460212773400153710ustar00rootroot00000000000000%PDF-1.1 1 0 obj << >> endobj 3 0 obj << /Type /Font /Subtype /Type1 /Name /F0 /BaseFont /Times-Roman >> endobj 4 0 obj << /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Times-Bold >> endobj 5 0 obj << /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Times-Italic >> endobj 6 0 obj << /Type /Font /Subtype /Type1 /Name /F3 /BaseFont /Times-BoldItalic >> endobj 7 0 obj << /Length 8 0 R >> stream BT /F1 11 Tf 68.024 702.988 Td (NAME) Tj ET BT /F1 11 Tf 81.774 687.599 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 110.187 687.599 Td (-) Tj ET BT /F0 11 Tf 116.600 687.599 Td (clean) Tj ET BT /F0 11 Tf 142.560 687.599 Td (up) Tj ET BT /F0 11 Tf 156.310 687.599 Td (filenames) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 656.821 Td (SYNOPSIS) Tj ET BT /F1 11 Tf 81.774 641.432 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 110.187 641.432 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 113.850 641.432 Td (-f) Tj ET /F2 11 Tf BT /F2 11 Tf 123.926 641.432 Td (configfile) Tj ET /F0 11 Tf BT /F0 11 Tf 165.484 641.432 Td (]) Tj ET BT /F0 11 Tf 171.897 641.432 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 175.560 641.432 Td (-n) Tj ET /F0 11 Tf BT /F0 11 Tf 188.089 641.432 Td (|) Tj ET /F1 11 Tf BT /F1 11 Tf 193.039 641.432 Td (--dry-run) Tj ET /F0 11 Tf BT /F0 11 Tf 237.644 641.432 Td (]) Tj ET BT /F0 11 Tf 244.057 641.432 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 247.720 641.432 Td (-r) Tj ET /F0 11 Tf BT /F0 11 Tf 256.267 641.432 Td (]) Tj ET BT /F0 11 Tf 262.680 641.432 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 266.343 641.432 Td (-s) Tj ET /F2 11 Tf BT /F2 11 Tf 277.035 641.432 Td (sequence) Tj ET /F0 11 Tf BT /F0 11 Tf 317.350 641.432 Td (]) Tj ET BT /F0 11 Tf 323.763 641.432 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 327.426 641.432 Td (--special) Tj ET /F0 11 Tf BT /F0 11 Tf 366.531 641.432 Td (]) Tj ET BT /F0 11 Tf 372.944 641.432 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 376.607 641.432 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 385.770 641.432 Td (]) Tj ET /F2 11 Tf BT /F2 11 Tf 392.183 641.432 Td (file) Tj ET BT /F2 11 Tf 408.991 641.432 Td (...) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 626.043 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 110.187 626.043 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 113.850 626.043 Td (-L) Tj ET /F0 11 Tf BT /F0 11 Tf 124.850 626.043 Td (]) Tj ET BT /F0 11 Tf 131.263 626.043 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 134.926 626.043 Td (-f) Tj ET /F2 11 Tf BT /F2 11 Tf 145.002 626.043 Td (configfile) Tj ET /F0 11 Tf BT /F0 11 Tf 186.560 626.043 Td (]) Tj ET BT /F0 11 Tf 192.973 626.043 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 196.636 626.043 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 205.799 626.043 Td (]) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 610.654 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 110.187 610.654 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 113.850 610.654 Td (-h) Tj ET /F0 11 Tf BT /F0 11 Tf 126.379 610.654 Td (|) Tj ET /F1 11 Tf BT /F1 11 Tf 131.329 610.654 Td (--help) Tj ET /F0 11 Tf BT /F0 11 Tf 158.829 610.654 Td (]) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 595.265 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 110.187 595.265 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 113.850 595.265 Td (-V) Tj ET /F0 11 Tf BT /F0 11 Tf 125.455 595.265 Td (]) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 564.487 Td (DESCRIPTION) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 549.098 Td (The) Tj ET /F1 11 Tf BT /F1 11 Tf 101.629 549.098 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 130.042 549.098 Td (utility) Tj ET BT /F0 11 Tf 159.082 549.098 Td (renames) Tj ET BT /F0 11 Tf 198.484 549.098 Td (files) Tj ET BT /F0 11 Tf 220.176 549.098 Td (to) Tj ET BT /F0 11 Tf 231.484 549.098 Td (make) Tj ET BT /F0 11 Tf 258.060 549.098 Td (them) Tj ET BT /F0 11 Tf 282.810 549.098 Td (easier) Tj ET BT /F0 11 Tf 311.212 549.098 Td (to) Tj ET BT /F0 11 Tf 322.520 549.098 Td (work) Tj ET BT /F0 11 Tf 347.875 549.098 Td (with) Tj ET BT /F0 11 Tf 370.183 549.098 Td (under) Tj ET BT /F0 11 Tf 397.980 549.098 Td (Unix) Tj ET BT /F0 11 Tf 422.730 549.098 Td (and) Tj ET BT /F0 11 Tf 441.364 549.098 Td (Unix-like) Tj ET BT /F0 11 Tf 486.277 549.098 Td (operating) Tj ET BT /F0 11 Tf 81.774 533.709 Td (systems.) Tj ET BT /F0 11 Tf 124.861 533.709 Td (It) Tj ET BT /F0 11 Tf 134.332 533.709 Td (replaces) Tj ET BT /F0 11 Tf 173.118 533.709 Td (characters) Tj ET BT /F0 11 Tf 220.451 533.709 Td (that) Tj ET BT /F0 11 Tf 239.701 533.709 Td (make) Tj ET BT /F0 11 Tf 266.277 533.709 Td (it) Tj ET BT /F0 11 Tf 275.143 533.709 Td (hard) Tj ET BT /F0 11 Tf 297.440 533.709 Td (to) Tj ET BT /F0 11 Tf 308.748 533.709 Td (type) Tj ET BT /F0 11 Tf 330.440 533.709 Td (out) Tj ET BT /F0 11 Tf 347.248 533.709 Td (a) Tj ET BT /F0 11 Tf 354.882 533.709 Td (filename) Tj ET BT /F0 11 Tf 396.121 533.709 Td (with) Tj ET BT /F0 11 Tf 418.429 533.709 Td (dashes) Tj ET BT /F0 11 Tf 450.505 533.709 Td (and) Tj ET BT /F0 11 Tf 469.139 533.709 Td (underscores.) Tj ET BT /F0 11 Tf 529.925 533.709 Td (It) Tj ET BT /F0 11 Tf 81.774 518.320 Td (also) Tj ET BT /F0 11 Tf 102.245 518.320 Td (provides) Tj ET BT /F0 11 Tf 142.879 518.320 Td (transliteration-based) Tj ET BT /F0 11 Tf 235.444 518.320 Td (filters,) Tj ET BT /F0 11 Tf 266.607 518.320 Td (converting) Tj ET BT /F0 11 Tf 316.404 518.320 Td (ISO) Tj ET BT /F0 11 Tf 336.875 518.320 Td (8859-1) Tj ET BT /F0 11 Tf 370.788 518.320 Td (or) Tj ET BT /F0 11 Tf 382.701 518.320 Td (UTF-8) Tj ET BT /F0 11 Tf 415.393 518.320 Td (to) Tj ET BT /F0 11 Tf 426.701 518.320 Td (ASCII,) Tj ET BT /F0 11 Tf 460.922 518.320 Td (in) Tj ET BT /F0 11 Tf 472.230 518.320 Td (part) Tj ET BT /F0 11 Tf 492.085 518.320 Td (or) Tj ET BT /F0 11 Tf 503.998 518.320 Td (in) Tj ET BT /F0 11 Tf 81.774 502.931 Td (whole.) Tj ET BT /F0 11 Tf 116.908 502.931 Td (An) Tj ET BT /F0 11 Tf 133.100 502.931 Td (additional) Tj ET BT /F0 11 Tf 179.850 502.931 Td (filter) Tj ET BT /F0 11 Tf 203.984 502.931 Td (unescapes) Tj ET BT /F0 11 Tf 251.328 502.931 Td (CGI-escaped) Tj ET BT /F0 11 Tf 311.498 502.931 Td (filenames.) Tj ET /F1 11 Tf BT /F1 11 Tf 76.274 472.153 Td (Sequences) Tj ET BT /F1 11 Tf 81.774 456.764 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 110.187 456.764 Td (is) Tj ET BT /F0 11 Tf 120.274 456.764 Td (driven) Tj ET BT /F0 11 Tf 151.129 456.764 Td (by) Tj ET BT /F0 11 Tf 164.879 456.764 Td (a) Tj ET BT /F0 11 Tf 172.513 456.764 Td (configurable) Tj ET BT /F0 11 Tf 230.857 456.764 Td (series) Tj ET BT /F0 11 Tf 258.654 456.764 Td (of) Tj ET BT /F0 11 Tf 270.567 456.764 Td (filters,) Tj ET BT /F0 11 Tf 301.730 456.764 Td (called) Tj ET BT /F0 11 Tf 330.748 456.764 Td (a) Tj ET BT /F0 11 Tf 338.382 456.764 Td (sequence.) Tj ET BT /F0 11 Tf 386.947 456.764 Td (Sequences) Tj ET BT /F0 11 Tf 436.128 456.764 Td (are) Tj ET BT /F0 11 Tf 452.309 456.764 Td (covered) Tj ET BT /F0 11 Tf 489.874 456.764 Td (in) Tj ET BT /F0 11 Tf 501.182 456.764 Td (more) Tj ET BT /F0 11 Tf 81.774 441.375 Td (detail) Tj ET BT /F0 11 Tf 108.966 441.375 Td (in) Tj ET BT /F0 11 Tf 120.274 441.375 Td (detoxrc\(5\)) Tj ET BT /F0 11 Tf 168.839 441.375 Td (and) Tj ET BT /F0 11 Tf 187.473 441.375 Td (are) Tj ET BT /F0 11 Tf 203.654 441.375 Td (discoverable) Tj ET BT /F0 11 Tf 261.998 441.375 Td (with) Tj ET BT /F0 11 Tf 284.306 441.375 Td (the) Tj ET /F1 11 Tf BT /F1 11 Tf 300.498 441.375 Td (-L) Tj ET /F0 11 Tf BT /F0 11 Tf 314.248 441.375 Td (option.) Tj ET BT /F0 11 Tf 350.614 441.375 Td (The) Tj ET BT /F0 11 Tf 370.469 441.375 Td (default) Tj ET BT /F0 11 Tf 403.766 441.375 Td (sequence) Tj ET BT /F0 11 Tf 446.831 441.375 Td (will) Tj ET BT /F0 11 Tf 466.697 441.375 Td (run) Tj ET BT /F0 11 Tf 484.110 441.375 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 500.302 441.375 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 520.773 441.375 Td (and) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 425.986 Td (wipeup) Tj ET /F0 11 Tf BT /F0 11 Tf 116.303 425.986 Td (filters.) Tj ET BT /F0 11 Tf 150.216 425.986 Td (Other) Tj ET BT /F0 11 Tf 178.013 425.986 Td (examples) Tj ET BT /F0 11 Tf 222.310 425.986 Td (of) Tj ET BT /F0 11 Tf 234.223 425.986 Td (pre-configured) Tj ET BT /F0 11 Tf 302.335 425.986 Td (sequences) Tj ET BT /F0 11 Tf 349.679 425.986 Td (are) Tj ET /F2 11 Tf BT /F2 11 Tf 365.860 425.986 Td (iso8859_1) Tj ET /F0 11 Tf BT /F0 11 Tf 414.447 425.986 Td (and) Tj ET /F2 11 Tf BT /F2 11 Tf 433.081 425.986 Td (utf_8) Tj ET /F0 11 Tf BT /F0 11 Tf 455.697 425.986 Td (,) Tj ET BT /F0 11 Tf 461.197 425.986 Td (which) Tj ET BT /F0 11 Tf 490.831 425.986 Td (both) Tj ET BT /F0 11 Tf 81.774 410.597 Td (provide) Tj ET BT /F0 11 Tf 118.129 410.597 Td (transliteration) Tj ET BT /F0 11 Tf 181.984 410.597 Td (to) Tj ET BT /F0 11 Tf 193.292 410.597 Td (ASCII) Tj ET BT /F0 11 Tf 224.763 410.597 Td (and) Tj ET BT /F0 11 Tf 243.397 410.597 Td (then) Tj ET BT /F0 11 Tf 265.089 410.597 Td (finish) Tj ET BT /F0 11 Tf 292.897 410.597 Td (with) Tj ET BT /F0 11 Tf 315.205 410.597 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 331.397 410.597 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 351.868 410.597 Td (and) Tj ET /F2 11 Tf BT /F2 11 Tf 370.502 410.597 Td (wipeup) Tj ET /F0 11 Tf BT /F0 11 Tf 405.031 410.597 Td (filters.) Tj ET /F1 11 Tf BT /F1 11 Tf 76.274 379.819 Td (Options) Tj ET BT /F1 11 Tf 81.774 364.430 Td (-f) Tj ET /F2 11 Tf BT /F2 11 Tf 91.850 364.430 Td (configfile) Tj ET /F0 11 Tf BT /F0 11 Tf 141.064 364.430 Td (Use) Tj ET /F2 11 Tf BT /F2 11 Tf 160.919 364.430 Td (configfile) Tj ET /F0 11 Tf BT /F0 11 Tf 205.227 364.430 Td (instead) Tj ET BT /F0 11 Tf 239.140 364.430 Td (of) Tj ET BT /F0 11 Tf 251.053 364.430 Td (the) Tj ET BT /F0 11 Tf 267.245 364.430 Td (default) Tj ET BT /F0 11 Tf 300.542 364.430 Td (configuration) Tj ET BT /F0 11 Tf 362.560 364.430 Td (files) Tj ET BT /F0 11 Tf 384.252 364.430 Td (for) Tj ET BT /F0 11 Tf 399.828 364.430 Td (loading) Tj ET BT /F0 11 Tf 435.578 364.430 Td (translation) Tj ET BT /F0 11 Tf 484.770 364.430 Td (sequences.) Tj ET BT /F0 11 Tf 142.274 349.041 Td (No) Tj ET BT /F0 11 Tf 158.466 349.041 Td (other) Tj ET BT /F0 11 Tf 183.821 349.041 Td (config) Tj ET BT /F0 11 Tf 214.676 349.041 Td (file) Tj ET BT /F0 11 Tf 232.089 349.041 Td (will) Tj ET BT /F0 11 Tf 251.955 349.041 Td (be) Tj ET BT /F0 11 Tf 265.089 349.041 Td (parsed.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 318.263 Td (-h) Tj ET /F0 11 Tf BT /F0 11 Tf 91.553 318.263 Td (,) Tj ET /F1 11 Tf BT /F1 11 Tf 97.053 318.263 Td (--help) Tj ET /F0 11 Tf BT /F0 11 Tf 144.122 318.263 Td (Display) Tj ET BT /F0 11 Tf 181.093 318.263 Td (helpful) Tj ET BT /F0 11 Tf 215.006 318.263 Td (information.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 287.485 Td (--inline) Tj ET /F0 11 Tf BT /F0 11 Tf 143.506 287.485 Td (Run) Tj ET BT /F0 11 Tf 164.593 287.485 Td (in) Tj ET BT /F0 11 Tf 175.901 287.485 Td (inline) Tj ET BT /F0 11 Tf 203.709 287.485 Td (mode.) Tj ET BT /F0 11 Tf 236.401 287.485 Td (See) Tj ET BT /F0 11 Tf 255.035 287.485 Td (inline-detox\(1\)) Tj ET BT /F0 11 Tf 323.774 287.485 Td (for) Tj ET BT /F0 11 Tf 339.350 287.485 Td (more) Tj ET BT /F0 11 Tf 364.705 287.485 Td (details.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 256.707 Td (-L) Tj ET /F0 11 Tf BT /F0 11 Tf 142.890 256.707 Td (List) Tj ET BT /F0 11 Tf 162.756 256.707 Td (the) Tj ET BT /F0 11 Tf 178.948 256.707 Td (currently) Tj ET BT /F0 11 Tf 221.408 256.707 Td (available) Tj ET BT /F0 11 Tf 263.868 256.707 Td (sequences.) Tj ET BT /F0 11 Tf 316.712 256.707 Td (When) Tj ET BT /F0 11 Tf 345.730 256.707 Td (paired) Tj ET BT /F0 11 Tf 375.969 256.707 Td (with) Tj ET /F1 11 Tf BT /F1 11 Tf 398.277 256.707 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 410.190 256.707 Td (this) Tj ET BT /F0 11 Tf 428.835 256.707 Td (option) Tj ET BT /F0 11 Tf 459.701 256.707 Td (shows) Tj ET BT /F0 11 Tf 489.951 256.707 Td (what) Tj ET BT /F0 11 Tf 514.085 256.707 Td (filters) Tj ET BT /F0 11 Tf 142.274 241.318 Td (are) Tj ET BT /F0 11 Tf 158.455 241.318 Td (used) Tj ET BT /F0 11 Tf 181.368 241.318 Td (in) Tj ET BT /F0 11 Tf 192.676 241.318 Td (each) Tj ET BT /F0 11 Tf 215.578 241.318 Td (sequence) Tj ET BT /F0 11 Tf 258.643 241.318 Td (and) Tj ET BT /F0 11 Tf 277.277 241.318 Td (any) Tj ET BT /F0 11 Tf 295.911 241.318 Td (properties) Tj ET BT /F0 11 Tf 342.650 241.318 Td (applied) Tj ET BT /F0 11 Tf 377.784 241.318 Td (to) Tj ET BT /F0 11 Tf 389.092 241.318 Td (the) Tj ET BT /F0 11 Tf 405.284 241.318 Td (filters.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 210.540 Td (-n) Tj ET /F0 11 Tf BT /F0 11 Tf 91.553 210.540 Td (,) Tj ET /F1 11 Tf BT /F1 11 Tf 97.053 210.540 Td (--dry-run) Tj ET /F0 11 Tf BT /F0 11 Tf 147.180 210.540 Td (Doesn't) Tj ET BT /F0 11 Tf 184.756 210.540 Td (actually) Tj ET BT /F0 11 Tf 222.332 210.540 Td (change) Tj ET BT /F0 11 Tf 256.234 210.540 Td (anything.) Tj ET BT /F0 11 Tf 302.984 210.540 Td (This) Tj ET BT /F0 11 Tf 325.292 210.540 Td (implies) Tj ET BT /F0 11 Tf 360.437 210.540 Td (the) Tj ET /F1 11 Tf BT /F1 11 Tf 376.629 210.540 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 388.542 210.540 Td (option.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 179.762 Td (-r) Tj ET /F0 11 Tf BT /F0 11 Tf 143.495 179.762 Td (Recurse) Tj ET BT /F0 11 Tf 181.676 179.762 Td (into) Tj ET BT /F0 11 Tf 201.542 179.762 Td (subdirectories.) Tj ET BT /F0 11 Tf 271.502 179.762 Td (Any) Tj ET BT /F0 11 Tf 293.194 179.762 Td (file) Tj ET BT /F0 11 Tf 310.607 179.762 Td (or) Tj ET BT /F0 11 Tf 322.520 179.762 Td (directory) Tj ET BT /F0 11 Tf 364.980 179.762 Td (that) Tj ET BT /F0 11 Tf 384.230 179.762 Td (starts) Tj ET BT /F0 11 Tf 410.201 179.762 Td (with) Tj ET BT /F0 11 Tf 432.509 179.762 Td (a) Tj ET BT /F0 11 Tf 440.143 179.762 Td (period,) Tj ET BT /F0 11 Tf 473.748 179.762 Td (such) Tj ET BT /F0 11 Tf 496.661 179.762 Td (as) Tj ET /F2 11 Tf BT /F2 11 Tf 508.574 179.762 Td (.git/) Tj ET /F0 11 Tf BT /F0 11 Tf 528.748 179.762 Td (or) Tj ET /F2 11 Tf BT /F2 11 Tf 142.274 164.373 Td (.cache/) Tj ET /F0 11 Tf BT /F0 11 Tf 173.734 164.373 Td (,) Tj ET BT /F0 11 Tf 179.234 164.373 Td (will) Tj ET BT /F0 11 Tf 199.100 164.373 Td (be) Tj ET BT /F0 11 Tf 212.234 164.373 Td (ignored) Tj ET BT /F0 11 Tf 248.589 164.373 Td (during) Tj ET BT /F0 11 Tf 280.060 164.373 Td (recursion) Tj ET BT /F0 11 Tf 323.741 164.373 Td (unless) Tj ET BT /F0 11 Tf 353.991 164.373 Td (specified) Tj ET BT /F0 11 Tf 396.451 164.373 Td (on) Tj ET BT /F0 11 Tf 410.201 164.373 Td (the) Tj ET BT /F0 11 Tf 426.393 164.373 Td (command) Tj ET BT /F0 11 Tf 472.527 164.373 Td (line.) Tj ET BT /F0 11 Tf 497.277 164.373 Td (Also,) Tj ET BT /F0 11 Tf 523.556 164.373 Td (any) Tj ET BT /F0 11 Tf 142.274 148.984 Td (file) Tj ET BT /F0 11 Tf 159.687 148.984 Td (or) Tj ET BT /F0 11 Tf 171.600 148.984 Td (directory) Tj ET BT /F0 11 Tf 214.060 148.984 Td (specified) Tj ET BT /F0 11 Tf 256.520 148.984 Td (in) Tj ET BT /F0 11 Tf 267.828 148.984 Td (the) Tj ET BT /F0 11 Tf 284.020 148.984 Td (ignore) Tj ET BT /F0 11 Tf 314.875 148.984 Td (section) Tj ET BT /F0 11 Tf 348.788 148.984 Td (of) Tj ET BT /F0 11 Tf 360.701 148.984 Td (the) Tj ET BT /F0 11 Tf 376.893 148.984 Td (config) Tj ET BT /F0 11 Tf 407.748 148.984 Td (file) Tj ET BT /F0 11 Tf 425.161 148.984 Td (will) Tj ET BT /F0 11 Tf 445.027 148.984 Td (be) Tj ET BT /F0 11 Tf 458.161 148.984 Td (ignored) Tj ET BT /F0 11 Tf 494.516 148.984 Td (during) Tj ET BT /F0 11 Tf 142.274 133.595 Td (recursion.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 102.817 Td (-s) Tj ET /F2 11 Tf BT /F2 11 Tf 92.466 102.817 Td (sequence) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 102.817 Td (Use) Tj ET /F2 11 Tf BT /F2 11 Tf 162.129 102.817 Td (sequence) Tj ET /F0 11 Tf BT /F0 11 Tf 205.194 102.817 Td (instead) Tj ET BT /F0 11 Tf 239.107 102.817 Td (of) Tj ET /F1 11 Tf BT /F1 11 Tf 251.020 102.817 Td (default) Tj ET /F0 11 Tf BT /F0 11 Tf 284.020 102.817 Td (.) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (DETOX\(1\)) Tj ET BT /F0 11 Tf 245.498 739.233 Td (General) Tj ET BT /F0 11 Tf 283.063 739.233 Td (Commands) Tj ET BT /F0 11 Tf 335.929 739.233 Td (Manual) Tj ET BT /F0 11 Tf 494.164 739.233 Td (DETOX\(1\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 8 0 obj 15526 endobj 9 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 10 0 obj << /Type /Page /Parent 2 0 R /Resources 9 0 R /Contents 7 0 R >> endobj 11 0 obj << /Length 12 0 R >> stream BT /F1 11 Tf 81.774 702.988 Td (--special) Tj ET /F0 11 Tf BT /F0 11 Tf 143.506 702.988 Td (Works) Tj ET BT /F0 11 Tf 175.582 702.988 Td (on) Tj ET BT /F0 11 Tf 189.332 702.988 Td (special) Tj ET BT /F0 11 Tf 222.629 702.988 Td (files) Tj ET BT /F0 11 Tf 244.321 702.988 Td (\(including) Tj ET BT /F0 11 Tf 292.292 702.988 Td (links\).) Tj ET BT /F0 11 Tf 325.600 702.988 Td (Normally) Tj ET /F1 11 Tf BT /F1 11 Tf 370.513 702.988 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 398.926 702.988 Td (ignores) Tj ET BT /F0 11 Tf 434.060 702.988 Td (these) Tj ET BT /F0 11 Tf 459.415 702.988 Td (files.) Tj ET /F1 11 Tf BT /F1 11 Tf 486.607 702.988 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 515.020 702.988 Td (will) Tj ET BT /F0 11 Tf 142.274 687.599 Td (not) Tj ET BT /F0 11 Tf 159.082 687.599 Td (recurse) Tj ET BT /F0 11 Tf 193.589 687.599 Td (into) Tj ET BT /F0 11 Tf 213.455 687.599 Td (symlinks) Tj ET BT /F0 11 Tf 255.937 687.599 Td (that) Tj ET BT /F0 11 Tf 275.187 687.599 Td (point) Tj ET BT /F0 11 Tf 300.553 687.599 Td (at) Tj ET BT /F0 11 Tf 311.245 687.599 Td (directories.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 656.821 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 656.821 Td (Be) Tj ET BT /F0 11 Tf 157.245 656.821 Td (verbose) Tj ET BT /F0 11 Tf 194.205 656.821 Td (about) Tj ET BT /F0 11 Tf 221.397 656.821 Td (which) Tj ET BT /F0 11 Tf 251.031 656.821 Td (files) Tj ET BT /F0 11 Tf 272.723 656.821 Td (are) Tj ET BT /F0 11 Tf 288.904 656.821 Td (being) Tj ET BT /F0 11 Tf 316.096 656.821 Td (renamed.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 626.043 Td (-V) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 626.043 Td (Show) Tj ET BT /F0 11 Tf 170.082 626.043 Td (the) Tj ET BT /F0 11 Tf 186.274 626.043 Td (current) Tj ET BT /F0 11 Tf 220.176 626.043 Td (version) Tj ET BT /F0 11 Tf 255.310 626.043 Td (of) Tj ET /F1 11 Tf BT /F1 11 Tf 267.223 626.043 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 292.886 626.043 Td (.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 595.265 Td (FILES) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 579.876 Td (/etc/detoxrc) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 579.876 Td (The) Tj ET BT /F0 11 Tf 162.129 579.876 Td (system-wide) Tj ET BT /F0 11 Tf 220.484 579.876 Td (detoxrc) Tj ET BT /F0 11 Tf 256.223 579.876 Td (file.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 549.098 Td (~/.detoxrc) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 549.098 Td (A) Tj ET BT /F0 11 Tf 152.966 549.098 Td (user's) Tj ET BT /F0 11 Tf 181.984 549.098 Td (personal) Tj ET BT /F0 11 Tf 222.002 549.098 Td (detoxrc.) Tj ET BT /F0 11 Tf 263.241 549.098 Td (Normally) Tj ET BT /F0 11 Tf 308.154 549.098 Td (it) Tj ET BT /F0 11 Tf 317.020 549.098 Td (extends) Tj ET BT /F0 11 Tf 353.375 549.098 Td (the) Tj ET BT /F0 11 Tf 369.567 549.098 Td (system-wide) Tj ET /F2 11 Tf BT /F2 11 Tf 427.922 549.098 Td (detoxrc) Tj ET /F0 11 Tf BT /F0 11 Tf 460.911 549.098 Td (,) Tj ET BT /F0 11 Tf 466.411 549.098 Td (unless) Tj ET /F1 11 Tf BT /F1 11 Tf 496.661 549.098 Td (-f) Tj ET /F0 11 Tf BT /F0 11 Tf 506.737 549.098 Td (has) Tj ET BT /F0 11 Tf 142.274 533.709 Td (been) Tj ET BT /F0 11 Tf 165.792 533.709 Td (specified,) Tj ET BT /F0 11 Tf 211.002 533.709 Td (in) Tj ET BT /F0 11 Tf 222.310 533.709 Td (which) Tj ET BT /F0 11 Tf 251.944 533.709 Td (case,) Tj ET BT /F0 11 Tf 276.375 533.709 Td (it) Tj ET BT /F0 11 Tf 285.241 533.709 Td (is) Tj ET BT /F0 11 Tf 295.328 533.709 Td (ignored.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 502.931 Td (/usr/share/detox/cp1252.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 487.542 Td (The) Tj ET BT /F0 11 Tf 162.129 487.542 Td (provided) Tj ET BT /F0 11 Tf 203.984 487.542 Td (CP-1252) Tj ET BT /F0 11 Tf 245.850 487.542 Td (transliteration) Tj ET BT /F0 11 Tf 309.705 487.542 Td (table.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 456.764 Td (/usr/share/detox/iso8859_1.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 441.375 Td (The) Tj ET BT /F0 11 Tf 162.129 441.375 Td (provided) Tj ET BT /F0 11 Tf 203.984 441.375 Td (ISO) Tj ET BT /F0 11 Tf 224.455 441.375 Td (8859-1) Tj ET BT /F0 11 Tf 258.368 441.375 Td (transliteration) Tj ET BT /F0 11 Tf 322.223 441.375 Td (table.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 410.597 Td (/usr/share/detox/safe.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 395.208 Td (The) Tj ET BT /F0 11 Tf 162.129 395.208 Td (provided) Tj ET BT /F0 11 Tf 203.984 395.208 Td (safe) Tj ET BT /F0 11 Tf 224.444 395.208 Td (character) Tj ET BT /F0 11 Tf 267.498 395.208 Td (translation) Tj ET BT /F0 11 Tf 316.690 395.208 Td (table.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 364.430 Td (/usr/share/detox/unicode.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 349.041 Td (The) Tj ET BT /F0 11 Tf 162.129 349.041 Td (provided) Tj ET BT /F0 11 Tf 203.984 349.041 Td (Unicode) Tj ET BT /F0 11 Tf 244.002 349.041 Td (transliteration) Tj ET BT /F0 11 Tf 307.857 349.041 Td (table,) Tj ET BT /F0 11 Tf 334.741 349.041 Td (used) Tj ET BT /F0 11 Tf 357.654 349.041 Td (by) Tj ET BT /F0 11 Tf 371.404 349.041 Td (the) Tj ET BT /F0 11 Tf 387.596 349.041 Td (UTF-8) Tj ET BT /F0 11 Tf 420.288 349.041 Td (filter.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 318.263 Td (/usr/share/detox/unidecode.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 302.874 Td (An) Tj ET BT /F0 11 Tf 158.466 302.874 Td (additional) Tj ET BT /F0 11 Tf 205.216 302.874 Td (Unicode) Tj ET BT /F0 11 Tf 245.234 302.874 Td (tranlsiteration) Tj ET BT /F0 11 Tf 309.089 302.874 Td (table,) Tj ET BT /F0 11 Tf 335.973 302.874 Td (based) Tj ET BT /F0 11 Tf 363.770 302.874 Td (on) Tj ET BT /F0 11 Tf 377.520 302.874 Td (Text::Unidecode\(3pm\).) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 272.096 Td (EXAMPLES) Tj ET BT /F1 11 Tf 81.774 256.707 Td (detox) Tj ET BT /F1 11 Tf 110.187 256.707 Td (-s) Tj ET /F2 11 Tf BT /F2 11 Tf 120.879 256.707 Td (lower) Tj ET /F1 11 Tf BT /F1 11 Tf 148.687 256.707 Td (-r) Tj ET BT /F1 11 Tf 159.984 256.707 Td (-v) Tj ET BT /F1 11 Tf 171.897 256.707 Td (-n) Tj ET /F2 11 Tf BT /F2 11 Tf 184.426 256.707 Td (/tmp/new_files) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 241.318 Td (Will) Tj ET BT /F0 11 Tf 164.582 241.318 Td (run) Tj ET BT /F0 11 Tf 181.995 241.318 Td (the) Tj ET BT /F0 11 Tf 198.187 241.318 Td (sequence) Tj ET /F2 11 Tf BT /F2 11 Tf 241.252 241.318 Td (lower) Tj ET /F0 11 Tf BT /F0 11 Tf 269.060 241.318 Td (recursively,) Tj ET BT /F0 11 Tf 323.433 241.318 Td (listing) Tj ET BT /F0 11 Tf 353.694 241.318 Td (any) Tj ET BT /F0 11 Tf 372.328 241.318 Td (changes,) Tj ET BT /F0 11 Tf 413.259 241.318 Td (without) Tj ET BT /F0 11 Tf 449.625 241.318 Td (changing) Tj ET BT /F0 11 Tf 492.701 241.318 Td (anything,) Tj ET BT /F0 11 Tf 142.274 225.929 Td (on) Tj ET BT /F0 11 Tf 156.024 225.929 Td (the) Tj ET BT /F0 11 Tf 172.216 225.929 Td (files) Tj ET BT /F0 11 Tf 193.908 225.929 Td (of) Tj ET /F2 11 Tf BT /F2 11 Tf 205.821 225.929 Td (/tmp/new_files) Tj ET /F0 11 Tf BT /F0 11 Tf 269.995 225.929 Td (.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 195.151 Td (detox) Tj ET BT /F1 11 Tf 110.187 195.151 Td (-f) Tj ET /F2 11 Tf BT /F2 11 Tf 120.263 195.151 Td (my_detoxrc) Tj ET /F1 11 Tf BT /F1 11 Tf 174.328 195.151 Td (-L) Tj ET BT /F1 11 Tf 188.078 195.151 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 179.762 Td (Will) Tj ET BT /F0 11 Tf 164.582 179.762 Td (list) Tj ET BT /F0 11 Tf 180.785 179.762 Td (the) Tj ET BT /F0 11 Tf 196.977 179.762 Td (sequences) Tj ET BT /F0 11 Tf 244.321 179.762 Td (within) Tj ET /F2 11 Tf BT /F2 11 Tf 275.187 179.762 Td (my_detoxrc) Tj ET /F0 11 Tf BT /F0 11 Tf 326.502 179.762 Td (,) Tj ET BT /F0 11 Tf 332.002 179.762 Td (showing) Tj ET BT /F0 11 Tf 372.031 179.762 Td (their) Tj ET BT /F0 11 Tf 394.944 179.762 Td (filters) Tj ET BT /F0 11 Tf 423.357 179.762 Td (and) Tj ET BT /F0 11 Tf 441.991 179.762 Td (options.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 148.984 Td (SEE) Tj ET BT /F1 11 Tf 91.564 148.984 Td (ALSO) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 133.595 Td (inline-detox\(1\),) Tj ET BT /F0 11 Tf 153.263 133.595 Td (Text::Unidecode\(3pm\),) Tj ET BT /F0 11 Tf 259.578 133.595 Td (detox.tbl\(5\),) Tj ET BT /F0 11 Tf 316.712 133.595 Td (detoxrc\(5\),) Tj ET BT /F0 11 Tf 368.027 133.595 Td (ascii\(7\),) Tj ET BT /F0 11 Tf 406.516 133.595 Td (iso_8859-1\(7\),) Tj ET BT /F0 11 Tf 474.342 133.595 Td (unicode\(7\),) Tj ET BT /F0 11 Tf 81.774 118.206 Td (utf-8\(7\)) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (DETOX\(1\)) Tj ET BT /F0 11 Tf 245.498 739.233 Td (General) Tj ET BT /F0 11 Tf 283.063 739.233 Td (Commands) Tj ET BT /F0 11 Tf 335.929 739.233 Td (Manual) Tj ET BT /F0 11 Tf 494.164 739.233 Td (DETOX\(1\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 12 0 obj 8652 endobj 13 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 14 0 obj << /Type /Page /Parent 2 0 R /Resources 13 0 R /Contents 11 0 R >> endobj 15 0 obj << /Length 16 0 R >> stream BT /F1 11 Tf 68.024 702.988 Td (HISTORY) Tj ET BT /F1 11 Tf 81.774 687.599 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 110.187 687.599 Td (was) Tj ET BT /F0 11 Tf 130.042 687.599 Td (originally) Tj ET BT /F0 11 Tf 175.571 687.599 Td (designed) Tj ET BT /F0 11 Tf 217.426 687.599 Td (to) Tj ET BT /F0 11 Tf 228.734 687.599 Td (clean) Tj ET BT /F0 11 Tf 254.694 687.599 Td (up) Tj ET BT /F0 11 Tf 268.444 687.599 Td (files) Tj ET BT /F0 11 Tf 290.136 687.599 Td (that) Tj ET BT /F0 11 Tf 309.386 687.599 Td (I) Tj ET BT /F0 11 Tf 315.799 687.599 Td (had) Tj ET BT /F0 11 Tf 334.433 687.599 Td (received) Tj ET BT /F0 11 Tf 374.440 687.599 Td (from) Tj ET BT /F0 11 Tf 398.574 687.599 Td (friends) Tj ET BT /F0 11 Tf 431.871 687.599 Td (which) Tj ET BT /F0 11 Tf 461.505 687.599 Td (had) Tj ET BT /F0 11 Tf 480.139 687.599 Td (been) Tj ET BT /F0 11 Tf 503.657 687.599 Td (created) Tj ET BT /F0 11 Tf 81.774 672.210 Td (using) Tj ET BT /F0 11 Tf 108.361 672.210 Td (other) Tj ET BT /F0 11 Tf 133.716 672.210 Td (operating) Tj ET BT /F0 11 Tf 178.013 672.210 Td (systems.) Tj ET BT /F0 11 Tf 221.100 672.210 Td (It's) Tj ET BT /F0 11 Tf 238.513 672.210 Td (trivial) Tj ET BT /F0 11 Tf 267.542 672.210 Td (to) Tj ET BT /F0 11 Tf 278.850 672.210 Td (create) Tj ET BT /F0 11 Tf 307.857 672.210 Td (a) Tj ET BT /F0 11 Tf 315.491 672.210 Td (filename) Tj ET BT /F0 11 Tf 356.730 672.210 Td (with) Tj ET BT /F0 11 Tf 379.038 672.210 Td (spaces,) Tj ET BT /F0 11 Tf 413.248 672.210 Td (parenthesis,) Tj ET BT /F0 11 Tf 468.237 672.210 Td (brackets,) Tj ET BT /F0 11 Tf 510.389 672.210 Td (and) Tj ET BT /F0 11 Tf 81.774 656.821 Td (ampersands) Tj ET BT /F0 11 Tf 136.455 656.821 Td (under) Tj ET BT /F0 11 Tf 164.252 656.821 Td (some) Tj ET BT /F0 11 Tf 190.223 656.821 Td (operating) Tj ET BT /F0 11 Tf 234.520 656.821 Td (systems.) Tj ET BT /F0 11 Tf 277.607 656.821 Td (These) Tj ET BT /F0 11 Tf 306.625 656.821 Td (have) Tj ET BT /F0 11 Tf 330.143 656.821 Td (special) Tj ET BT /F0 11 Tf 363.440 656.821 Td (meaning) Tj ET BT /F0 11 Tf 404.074 656.821 Td (within) Tj ET BT /F0 11 Tf 434.940 656.821 Td (FreeBSD) Tj ET BT /F0 11 Tf 478.632 656.821 Td (and) Tj ET BT /F0 11 Tf 497.266 656.821 Td (Linux,) Tj ET BT /F0 11 Tf 81.774 641.432 Td (and) Tj ET BT /F0 11 Tf 100.408 641.432 Td (cause) Tj ET BT /F0 11 Tf 127.589 641.432 Td (problems) Tj ET BT /F0 11 Tf 171.281 641.432 Td (when) Tj ET BT /F0 11 Tf 197.857 641.432 Td (you) Tj ET BT /F0 11 Tf 217.107 641.432 Td (go) Tj ET BT /F0 11 Tf 230.857 641.432 Td (to) Tj ET BT /F0 11 Tf 242.165 641.432 Td (access) Tj ET BT /F0 11 Tf 273.009 641.432 Td (them.) Tj ET BT /F0 11 Tf 303.259 641.432 Td (I) Tj ET BT /F0 11 Tf 309.672 641.432 Td (created) Tj ET /F1 11 Tf BT /F1 11 Tf 344.179 641.432 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 372.592 641.432 Td (to) Tj ET BT /F0 11 Tf 383.900 641.432 Td (clean) Tj ET BT /F0 11 Tf 409.860 641.432 Td (up) Tj ET BT /F0 11 Tf 423.610 641.432 Td (these) Tj ET BT /F0 11 Tf 448.965 641.432 Td (files.) Tj ET BT /F0 11 Tf 81.774 610.654 Td (Version) Tj ET BT /F0 11 Tf 119.350 610.654 Td (2.0) Tj ET BT /F0 11 Tf 135.850 610.654 Td (stepped) Tj ET BT /F0 11 Tf 172.205 610.654 Td (back) Tj ET BT /F0 11 Tf 195.723 610.654 Td (from) Tj ET BT /F0 11 Tf 219.857 610.654 Td (transliteration) Tj ET BT /F0 11 Tf 283.712 610.654 Td (out) Tj ET BT /F0 11 Tf 300.520 610.654 Td (of) Tj ET BT /F0 11 Tf 312.433 610.654 Td (the) Tj ET BT /F0 11 Tf 328.625 610.654 Td (box,) Tj ET BT /F0 11 Tf 350.625 610.654 Td (instead) Tj ET BT /F0 11 Tf 384.538 610.654 Td (focusing) Tj ET BT /F0 11 Tf 425.172 610.654 Td (on) Tj ET BT /F0 11 Tf 438.922 610.654 Td (ease) Tj ET BT /F0 11 Tf 460.603 610.654 Td (of) Tj ET BT /F0 11 Tf 472.516 610.654 Td (use.) Tj ET BT /F0 11 Tf 495.429 610.654 Td (The) Tj ET BT /F0 11 Tf 81.774 595.265 Td (primary) Tj ET BT /F0 11 Tf 119.350 595.265 Td (motivations) Tj ET BT /F0 11 Tf 174.053 595.265 Td (for) Tj ET BT /F0 11 Tf 189.629 595.265 Td (this) Tj ET BT /F0 11 Tf 208.274 595.265 Td (were) Tj ET BT /F0 11 Tf 232.397 595.265 Td (user-provided) Tj ET BT /F0 11 Tf 296.241 595.265 Td (feedback,) Tj ET BT /F0 11 Tf 341.440 595.265 Td (and) Tj ET BT /F0 11 Tf 360.074 595.265 Td (the) Tj ET BT /F0 11 Tf 376.266 595.265 Td (fact) Tj ET BT /F0 11 Tf 395.505 595.265 Td (that) Tj ET BT /F0 11 Tf 414.755 595.265 Td (many) Tj ET BT /F0 11 Tf 441.947 595.265 Td (modern) Tj ET BT /F0 11 Tf 478.302 595.265 Td (Unix-like) Tj ET BT /F0 11 Tf 523.215 595.265 Td (OSs) Tj ET BT /F0 11 Tf 81.774 579.876 Td (use) Tj ET BT /F0 11 Tf 99.187 579.876 Td (UTF-8) Tj ET BT /F0 11 Tf 131.879 579.876 Td (as) Tj ET BT /F0 11 Tf 143.792 579.876 Td (their) Tj ET BT /F0 11 Tf 166.705 579.876 Td (primary) Tj ET BT /F0 11 Tf 204.281 579.876 Td (character) Tj ET BT /F0 11 Tf 247.335 579.876 Td (set.) Tj ET BT /F0 11 Tf 267.806 579.876 Td (Transliterating) Tj ET BT /F0 11 Tf 335.324 579.876 Td (from) Tj ET BT /F0 11 Tf 359.458 579.876 Td (UTF-8) Tj ET BT /F0 11 Tf 392.150 579.876 Td (to) Tj ET BT /F0 11 Tf 403.458 579.876 Td (ASCII) Tj ET BT /F0 11 Tf 434.929 579.876 Td (in) Tj ET BT /F0 11 Tf 446.237 579.876 Td (this) Tj ET BT /F0 11 Tf 464.882 579.876 Td (scenario) Tj ET BT /F0 11 Tf 504.284 579.876 Td (is) Tj ET BT /F0 11 Tf 514.371 579.876 Td (lossy) Tj ET BT /F0 11 Tf 81.774 564.487 Td (and) Tj ET BT /F0 11 Tf 100.408 564.487 Td (pointless.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 533.709 Td (AUTHORS) Tj ET BT /F1 11 Tf 81.774 518.320 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 110.187 518.320 Td (was) Tj ET BT /F0 11 Tf 130.042 518.320 Td (written) Tj ET BT /F0 11 Tf 163.955 518.320 Td (by) Tj ET BT /F0 11 Tf 177.705 518.320 Td (Doug) Tj ET BT /F0 11 Tf 204.897 518.320 Td (Harple.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 487.542 Td (CAVEATS) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 472.153 Td (If,) Tj ET BT /F0 11 Tf 94.600 472.153 Td (after) Tj ET BT /F0 11 Tf 117.502 472.153 Td (the) Tj ET BT /F0 11 Tf 133.694 472.153 Td (translation) Tj ET BT /F0 11 Tf 182.886 472.153 Td (of) Tj ET BT /F0 11 Tf 194.799 472.153 Td (a) Tj ET BT /F0 11 Tf 202.433 472.153 Td (filename) Tj ET BT /F0 11 Tf 243.672 472.153 Td (is) Tj ET BT /F0 11 Tf 253.759 472.153 Td (finished,) Tj ET BT /F0 11 Tf 294.701 472.153 Td (a) Tj ET BT /F0 11 Tf 302.335 472.153 Td (file) Tj ET BT /F0 11 Tf 319.748 472.153 Td (already) Tj ET BT /F0 11 Tf 354.871 472.153 Td (exists) Tj ET BT /F0 11 Tf 382.679 472.153 Td (with) Tj ET BT /F0 11 Tf 404.987 472.153 Td (that) Tj ET BT /F0 11 Tf 424.237 472.153 Td (same) Tj ET BT /F0 11 Tf 449.592 472.153 Td (name,) Tj ET /F1 11 Tf BT /F1 11 Tf 478.918 472.153 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 507.331 472.153 Td (will) Tj ET BT /F0 11 Tf 527.197 472.153 Td (not) Tj ET BT /F0 11 Tf 81.774 456.764 Td (rename) Tj ET BT /F0 11 Tf 116.897 456.764 Td (the) Tj ET BT /F0 11 Tf 133.089 456.764 Td (file.) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (DETOX\(1\)) Tj ET BT /F0 11 Tf 245.498 739.233 Td (General) Tj ET BT /F0 11 Tf 283.063 739.233 Td (Commands) Tj ET BT /F0 11 Tf 335.929 739.233 Td (Manual) Tj ET BT /F0 11 Tf 494.164 739.233 Td (DETOX\(1\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 16 0 obj 7292 endobj 17 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 18 0 obj << /Type /Page /Parent 2 0 R /Resources 17 0 R /Contents 15 0 R >> endobj 2 0 obj << /Type /Pages /MediaBox [0 0 612 790] /Count 3 /Kids [ 10 0 R 14 0 R 18 0 R] >> endobj 19 0 obj << /Type /Catalog /Pages 2 0 R >> endobj xref 0 20 0000000000 65535 f 0000000009 00000 n 0000032590 00000 n 0000000030 00000 n 0000000112 00000 n 0000000193 00000 n 0000000276 00000 n 0000000363 00000 n 0000015941 00000 n 0000015962 00000 n 0000016057 00000 n 0000016138 00000 n 0000024844 00000 n 0000024865 00000 n 0000024961 00000 n 0000025044 00000 n 0000032390 00000 n 0000032411 00000 n 0000032507 00000 n 0000032687 00000 n trailer << /Size 20 /Root 19 0 R /Info 1 0 R >> startxref 32737 %%EOF detox-2.0.0/man/detox.tbl.5000066400000000000000000000061401460212773400154070ustar00rootroot00000000000000.\" .\" This file is part of the Detox package. .\" .\" Copyright (c) Doug Harple .\" .\" For the full copyright and license information, please view the LICENSE .\" file that was distributed with this source code. .\" .Dd February 24, 2021 .Dt DETOX.TBL 5 .Os .Sh NAME .Nm detox.tbl .Nd translation table for .Xr detox 1 .Sh OVERVIEW .Cm detox allows for configuration of how the safe, ISO 8859-1, and UTF-8 (Unicode) filters operate. Through text-based translation tables, it is possible to tune how these character sets are interpreted. .Sh SYNTAX The format of the translation tables is simple. There are two levels: one containing meta data and one containing the actual translations. .Bl -tag -width 0.25i .It Cm default Ar _ Default specifies the default translation for a character. An empty or non-existent default indicates that any unknown character should fall through to the next filter. In this manner, it is possible to chain together multiple translation tables in a sequence. .It Cm start Indicates the start of a value list within the translation table. .It Cm start Ar lang Indicates the start of a language specific value list within the translation table. .It Cm end Indicates the end of a value list within the translation table. .It Ar value translation Value can be specified in decimal (1), hex (0x01) or octal (01). The same rules that apply to sscanf apply here. .Pp Translation can be a string or a quoted string, with either single or double quotes. .El .Sh EXAMPLES The following example shows a portion of a .Ar safe table, with only a few character replacements specified. .Bd -literal # # This is a simple example of a "safe" table. It only translates 4 characters. # # The default is commented out, so any character that is not in this table will # be ignored. # # default _ # # This is the main replacement block. Each line specifies a character and a # string to replace it with. # start 0x09 _tab_ # comments work on lines, too 0x24 _dollar_ # $$$ 0x26 _and_ # ampersand end # # Starts an optional, language-specific translation block. detox will read # your locale and load the block if the word after start matches the language # portion of your locale. # # In the example here, the character $ will be replaced with "_money_" if the # user is working in English. If the user is using a different language, $ # will be replaced with the value configured in the previous block, "_dollar_". # start en 0x24 _money_ # money money end # EOF .Ed .Pp You could then enable this table in your .Pa ~/.detoxrc , in conjunction with other filters. .Bd -literal .\" START SAMPLE # Sample detoxrc sequence default { safe { filename "/home/MYUSERNAME/.local/share/detox/safe.tbl"; }; safe; wipeup; }; # EOF .\" END SAMPLE .Ed .Pp When .Cm detox is run, it will run the custom safe filter first, then run the default .Ar safe filter, and the finally the .Ar wipeup filter. See .Xr detoxrc 5 for more details on the various filter types. .Sh SEE ALSO .Xr detox 1 , .Xr detoxrc 5 , .Xr ascii 7 , .Xr iso_8859-1 7 , .Xr unicode 7 , .Xr utf-8 7 .Sh AUTHORS detox was written by .An "Doug Harple" . detox-2.0.0/man/detox.tbl.5.pdf000066400000000000000000000564721460212773400161740ustar00rootroot00000000000000%PDF-1.1 1 0 obj << >> endobj 3 0 obj << /Type /Font /Subtype /Type1 /Name /F0 /BaseFont /Times-Roman >> endobj 4 0 obj << /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Times-Bold >> endobj 5 0 obj << /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Times-Italic >> endobj 6 0 obj << /Type /Font /Subtype /Type1 /Name /F3 /BaseFont /Times-BoldItalic >> endobj 7 0 obj << /Length 8 0 R >> stream BT /F1 11 Tf 68.024 702.988 Td (NAME) Tj ET BT /F1 11 Tf 81.774 687.599 Td (detox.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 125.774 687.599 Td (-) Tj ET BT /F0 11 Tf 132.187 687.599 Td (translation) Tj ET BT /F0 11 Tf 181.379 687.599 Td (table) Tj ET BT /F0 11 Tf 205.513 687.599 Td (for) Tj ET BT /F0 11 Tf 221.089 687.599 Td (detox\(1\)) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 656.821 Td (OVERVIEW) Tj ET BT /F1 11 Tf 81.774 641.432 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 110.187 641.432 Td (allows) Tj ET BT /F0 11 Tf 141.658 641.432 Td (for) Tj ET BT /F0 11 Tf 157.234 641.432 Td (configuration) Tj ET BT /F0 11 Tf 219.252 641.432 Td (of) Tj ET BT /F0 11 Tf 231.165 641.432 Td (how) Tj ET BT /F0 11 Tf 252.857 641.432 Td (the) Tj ET BT /F0 11 Tf 269.049 641.432 Td (safe,) Tj ET BT /F0 11 Tf 292.259 641.432 Td (ISO) Tj ET BT /F0 11 Tf 312.730 641.432 Td (8859-1,) Tj ET BT /F0 11 Tf 349.393 641.432 Td (and) Tj ET BT /F0 11 Tf 368.027 641.432 Td (UTF-8) Tj ET BT /F0 11 Tf 400.719 641.432 Td (\(Unicode\)) Tj ET BT /F0 11 Tf 448.063 641.432 Td (filters) Tj ET BT /F0 11 Tf 476.476 641.432 Td (operate.) Tj ET BT /F0 11 Tf 81.774 626.043 Td (Through) Tj ET BT /F0 11 Tf 122.408 626.043 Td (text-based) Tj ET BT /F0 11 Tf 170.368 626.043 Td (translation) Tj ET BT /F0 11 Tf 219.560 626.043 Td (tables,) Tj ET BT /F0 11 Tf 250.723 626.043 Td (it) Tj ET BT /F0 11 Tf 259.589 626.043 Td (is) Tj ET BT /F0 11 Tf 269.676 626.043 Td (possible) Tj ET BT /F0 11 Tf 308.484 626.043 Td (to) Tj ET BT /F0 11 Tf 319.792 626.043 Td (tune) Tj ET BT /F0 11 Tf 341.484 626.043 Td (how) Tj ET BT /F0 11 Tf 363.176 626.043 Td (these) Tj ET BT /F0 11 Tf 388.531 626.043 Td (character) Tj ET BT /F0 11 Tf 431.585 626.043 Td (sets) Tj ET BT /F0 11 Tf 450.835 626.043 Td (are) Tj ET BT /F0 11 Tf 467.016 626.043 Td (interpreted.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 595.265 Td (SYNTAX) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 579.876 Td (The) Tj ET BT /F0 11 Tf 101.629 579.876 Td (format) Tj ET BT /F0 11 Tf 133.705 579.876 Td (of) Tj ET BT /F0 11 Tf 145.618 579.876 Td (the) Tj ET BT /F0 11 Tf 161.810 579.876 Td (translation) Tj ET BT /F0 11 Tf 211.002 579.876 Td (tables) Tj ET BT /F0 11 Tf 239.415 579.876 Td (is) Tj ET BT /F0 11 Tf 249.502 579.876 Td (simple.) Tj ET BT /F0 11 Tf 287.089 579.876 Td (There) Tj ET BT /F0 11 Tf 315.491 579.876 Td (are) Tj ET BT /F0 11 Tf 331.672 579.876 Td (two) Tj ET BT /F0 11 Tf 350.922 579.876 Td (levels:) Tj ET BT /F0 11 Tf 382.393 579.876 Td (one) Tj ET BT /F0 11 Tf 401.027 579.876 Td (containing) Tj ET BT /F0 11 Tf 450.219 579.876 Td (meta) Tj ET BT /F0 11 Tf 474.353 579.876 Td (data) Tj ET BT /F0 11 Tf 495.429 579.876 Td (and) Tj ET BT /F0 11 Tf 514.063 579.876 Td (one) Tj ET BT /F0 11 Tf 81.774 564.487 Td (containing) Tj ET BT /F0 11 Tf 130.966 564.487 Td (the) Tj ET BT /F0 11 Tf 147.158 564.487 Td (actual) Tj ET BT /F0 11 Tf 176.176 564.487 Td (translations.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 533.709 Td (default) Tj ET /F2 11 Tf BT /F2 11 Tf 117.524 533.709 Td (_) Tj ET /F0 11 Tf BT /F0 11 Tf 105.270 518.320 Td (Default) Tj ET BT /F0 11 Tf 141.009 518.320 Td (specifies) Tj ET BT /F0 11 Tf 182.248 518.320 Td (the) Tj ET BT /F0 11 Tf 198.440 518.320 Td (default) Tj ET BT /F0 11 Tf 231.737 518.320 Td (translation) Tj ET BT /F0 11 Tf 280.929 518.320 Td (for) Tj ET BT /F0 11 Tf 296.505 518.320 Td (a) Tj ET BT /F0 11 Tf 304.139 518.320 Td (character.) Tj ET BT /F0 11 Tf 352.693 518.320 Td (An) Tj ET BT /F0 11 Tf 368.885 518.320 Td (empty) Tj ET BT /F0 11 Tf 399.135 518.320 Td (or) Tj ET BT /F0 11 Tf 411.048 518.320 Td (non-existent) Tj ET BT /F0 11 Tf 468.182 518.320 Td (default) Tj ET BT /F0 11 Tf 501.479 518.320 Td (indicates) Tj ET BT /F0 11 Tf 105.270 502.931 Td (that) Tj ET BT /F0 11 Tf 124.520 502.931 Td (any) Tj ET BT /F0 11 Tf 143.154 502.931 Td (unknown) Tj ET BT /F0 11 Tf 186.846 502.931 Td (character) Tj ET BT /F0 11 Tf 229.900 502.931 Td (should) Tj ET BT /F0 11 Tf 261.987 502.931 Td (fall) Tj ET BT /F0 11 Tf 279.400 502.931 Td (through) Tj ET BT /F0 11 Tf 316.371 502.931 Td (to) Tj ET BT /F0 11 Tf 327.679 502.931 Td (the) Tj ET BT /F0 11 Tf 343.871 502.931 Td (next) Tj ET BT /F0 11 Tf 365.563 502.931 Td (filter.) Tj ET BT /F0 11 Tf 395.197 502.931 Td (In) Tj ET BT /F0 11 Tf 407.110 502.931 Td (this) Tj ET BT /F0 11 Tf 425.755 502.931 Td (manner,) Tj ET BT /F0 11 Tf 464.244 502.931 Td (it) Tj ET BT /F0 11 Tf 473.110 502.931 Td (is) Tj ET BT /F0 11 Tf 483.197 502.931 Td (possible) Tj ET BT /F0 11 Tf 522.005 502.931 Td (to) Tj ET BT /F0 11 Tf 105.270 487.542 Td (chain) Tj ET BT /F0 11 Tf 131.846 487.542 Td (together) Tj ET BT /F0 11 Tf 170.643 487.542 Td (multiple) Tj ET BT /F0 11 Tf 210.067 487.542 Td (translation) Tj ET BT /F0 11 Tf 259.259 487.542 Td (tables) Tj ET BT /F0 11 Tf 287.672 487.542 Td (in) Tj ET BT /F0 11 Tf 298.980 487.542 Td (a) Tj ET BT /F0 11 Tf 306.614 487.542 Td (sequence.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 456.764 Td (start) Tj ET /F0 11 Tf BT /F0 11 Tf 105.270 441.375 Td (Indicates) Tj ET BT /F0 11 Tf 147.730 441.375 Td (the) Tj ET BT /F0 11 Tf 163.922 441.375 Td (start) Tj ET BT /F0 11 Tf 185.614 441.375 Td (of) Tj ET BT /F0 11 Tf 197.527 441.375 Td (a) Tj ET BT /F0 11 Tf 205.161 441.375 Td (value) Tj ET BT /F0 11 Tf 231.737 441.375 Td (list) Tj ET BT /F0 11 Tf 247.940 441.375 Td (within) Tj ET BT /F0 11 Tf 278.806 441.375 Td (the) Tj ET BT /F0 11 Tf 294.998 441.375 Td (translation) Tj ET BT /F0 11 Tf 344.190 441.375 Td (table.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 410.597 Td (start) Tj ET /F2 11 Tf BT /F2 11 Tf 106.513 410.597 Td (lang) Tj ET /F0 11 Tf BT /F0 11 Tf 105.270 395.208 Td (Indicates) Tj ET BT /F0 11 Tf 147.730 395.208 Td (the) Tj ET BT /F0 11 Tf 163.922 395.208 Td (start) Tj ET BT /F0 11 Tf 185.614 395.208 Td (of) Tj ET BT /F0 11 Tf 197.527 395.208 Td (a) Tj ET BT /F0 11 Tf 205.161 395.208 Td (language) Tj ET BT /F0 11 Tf 247.621 395.208 Td (specific) Tj ET BT /F0 11 Tf 284.581 395.208 Td (value) Tj ET BT /F0 11 Tf 311.157 395.208 Td (list) Tj ET BT /F0 11 Tf 327.360 395.208 Td (within) Tj ET BT /F0 11 Tf 358.226 395.208 Td (the) Tj ET BT /F0 11 Tf 374.418 395.208 Td (translation) Tj ET BT /F0 11 Tf 423.610 395.208 Td (table.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 364.430 Td (end) Tj ET /F0 11 Tf BT /F0 11 Tf 106.502 364.430 Td (Indicates) Tj ET BT /F0 11 Tf 148.962 364.430 Td (the) Tj ET BT /F0 11 Tf 165.154 364.430 Td (end) Tj ET BT /F0 11 Tf 183.788 364.430 Td (of) Tj ET BT /F0 11 Tf 195.701 364.430 Td (a) Tj ET BT /F0 11 Tf 203.335 364.430 Td (value) Tj ET BT /F0 11 Tf 229.911 364.430 Td (list) Tj ET BT /F0 11 Tf 246.114 364.430 Td (within) Tj ET BT /F0 11 Tf 276.980 364.430 Td (the) Tj ET BT /F0 11 Tf 293.172 364.430 Td (translation) Tj ET BT /F0 11 Tf 342.364 364.430 Td (table.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 333.652 Td (value) Tj ET BT /F2 11 Tf 108.350 333.652 Td (translation) Tj ET /F0 11 Tf BT /F0 11 Tf 105.270 318.263 Td (Value) Tj ET BT /F0 11 Tf 134.288 318.263 Td (can) Tj ET BT /F0 11 Tf 152.306 318.263 Td (be) Tj ET BT /F0 11 Tf 165.440 318.263 Td (specified) Tj ET BT /F0 11 Tf 207.900 318.263 Td (in) Tj ET BT /F0 11 Tf 219.208 318.263 Td (decimal) Tj ET BT /F0 11 Tf 256.784 318.263 Td (\(1\),) Tj ET BT /F0 11 Tf 275.110 318.263 Td (hex) Tj ET BT /F0 11 Tf 293.744 318.263 Td (\(0x01\)) Tj ET BT /F0 11 Tf 325.820 318.263 Td (or) Tj ET BT /F0 11 Tf 337.733 318.263 Td (octal) Tj ET BT /F0 11 Tf 361.867 318.263 Td (\(01\).) Tj ET BT /F0 11 Tf 388.443 318.263 Td (The) Tj ET BT /F0 11 Tf 408.298 318.263 Td (same) Tj ET BT /F0 11 Tf 433.653 318.263 Td (rules) Tj ET BT /F0 11 Tf 457.787 318.263 Td (that) Tj ET BT /F0 11 Tf 477.037 318.263 Td (apply) Tj ET BT /F0 11 Tf 504.229 318.263 Td (to) Tj ET BT /F0 11 Tf 515.537 318.263 Td (sscanf) Tj ET BT /F0 11 Tf 105.270 302.874 Td (apply) Tj ET BT /F0 11 Tf 132.462 302.874 Td (here.) Tj ET BT /F0 11 Tf 105.270 272.096 Td (Translation) Tj ET BT /F0 11 Tf 158.125 272.096 Td (can) Tj ET BT /F0 11 Tf 176.143 272.096 Td (be) Tj ET BT /F0 11 Tf 189.277 272.096 Td (a) Tj ET BT /F0 11 Tf 196.911 272.096 Td (string) Tj ET BT /F0 11 Tf 224.719 272.096 Td (or) Tj ET BT /F0 11 Tf 236.632 272.096 Td (a) Tj ET BT /F0 11 Tf 244.266 272.096 Td (quoted) Tj ET BT /F0 11 Tf 276.958 272.096 Td (string,) Tj ET BT /F0 11 Tf 307.516 272.096 Td (with) Tj ET BT /F0 11 Tf 329.824 272.096 Td (either) Tj ET BT /F0 11 Tf 357.621 272.096 Td (single) Tj ET BT /F0 11 Tf 386.650 272.096 Td (or) Tj ET BT /F0 11 Tf 398.563 272.096 Td (double) Tj ET BT /F0 11 Tf 431.255 272.096 Td (quotes.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 241.318 Td (EXAMPLES) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 225.929 Td (The) Tj ET BT /F0 11 Tf 101.629 225.929 Td (following) Tj ET BT /F0 11 Tf 147.158 225.929 Td (example) Tj ET BT /F0 11 Tf 187.176 225.929 Td (shows) Tj ET BT /F0 11 Tf 217.426 225.929 Td (a) Tj ET BT /F0 11 Tf 225.060 225.929 Td (portion) Tj ET BT /F0 11 Tf 259.589 225.929 Td (of) Tj ET BT /F0 11 Tf 271.502 225.929 Td (a) Tj ET /F2 11 Tf BT /F2 11 Tf 279.136 225.929 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 299.607 225.929 Td (table,) Tj ET BT /F0 11 Tf 326.491 225.929 Td (with) Tj ET BT /F0 11 Tf 348.799 225.929 Td (only) Tj ET BT /F0 11 Tf 371.107 225.929 Td (a) Tj ET BT /F0 11 Tf 378.741 225.929 Td (few) Tj ET BT /F0 11 Tf 397.980 225.929 Td (character) Tj ET BT /F0 11 Tf 441.034 225.929 Td (replacements) Tj ET BT /F0 11 Tf 81.774 210.540 Td (specified.) Tj ET BT /F0 11 Tf 81.774 179.762 Td (#) Tj ET BT /F0 11 Tf 81.774 164.373 Td (#) Tj ET BT /F0 11 Tf 90.024 164.373 Td (This) Tj ET BT /F0 11 Tf 112.332 164.373 Td (is) Tj ET BT /F0 11 Tf 122.419 164.373 Td (a) Tj ET BT /F0 11 Tf 130.053 164.373 Td (simple) Tj ET BT /F0 11 Tf 162.140 164.373 Td (example) Tj ET BT /F0 11 Tf 202.158 164.373 Td (of) Tj ET BT /F0 11 Tf 214.071 164.373 Td (a) Tj ET BT /F0 11 Tf 221.705 164.373 Td ("safe") Tj ET BT /F0 11 Tf 251.141 164.373 Td (table.) Tj ET BT /F0 11 Tf 280.775 164.373 Td (It) Tj ET BT /F0 11 Tf 290.246 164.373 Td (only) Tj ET BT /F0 11 Tf 312.554 164.373 Td (translates) Tj ET BT /F0 11 Tf 356.851 164.373 Td (4) Tj ET BT /F0 11 Tf 365.101 164.373 Td (characters.) Tj ET BT /F0 11 Tf 81.774 148.984 Td (#) Tj ET BT /F0 11 Tf 81.774 133.595 Td (#) Tj ET BT /F0 11 Tf 90.024 133.595 Td (The) Tj ET BT /F0 11 Tf 109.879 133.595 Td (default) Tj ET BT /F0 11 Tf 143.176 133.595 Td (is) Tj ET BT /F0 11 Tf 153.263 133.595 Td (commented) Tj ET BT /F0 11 Tf 207.339 133.595 Td (out,) Tj ET BT /F0 11 Tf 226.897 133.595 Td (so) Tj ET BT /F0 11 Tf 239.426 133.595 Td (any) Tj ET BT /F0 11 Tf 258.060 133.595 Td (character) Tj ET BT /F0 11 Tf 301.114 133.595 Td (that) Tj ET BT /F0 11 Tf 320.364 133.595 Td (is) Tj ET BT /F0 11 Tf 330.451 133.595 Td (not) Tj ET BT /F0 11 Tf 347.259 133.595 Td (in) Tj ET BT /F0 11 Tf 358.567 133.595 Td (this) Tj ET BT /F0 11 Tf 377.212 133.595 Td (table) Tj ET BT /F0 11 Tf 401.346 133.595 Td (will) Tj ET BT /F0 11 Tf 81.774 118.206 Td (#) Tj ET BT /F0 11 Tf 90.024 118.206 Td (be) Tj ET BT /F0 11 Tf 103.158 118.206 Td (ignored.) Tj ET BT /F0 11 Tf 81.774 102.817 Td (#) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (DETOX.TBL\(5\)) Tj ET BT /F0 11 Tf 261.371 739.233 Td (File) Tj ET BT /F0 11 Tf 281.237 739.233 Td (Formats) Tj ET BT /F0 11 Tf 320.045 739.233 Td (Manual) Tj ET BT /F0 11 Tf 470.635 739.233 Td (DETOX.TBL\(5\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 8 0 obj 11499 endobj 9 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 10 0 obj << /Type /Page /Parent 2 0 R /Resources 9 0 R /Contents 7 0 R >> endobj 11 0 obj << /Length 12 0 R >> stream BT /F0 11 Tf 81.774 702.988 Td (#) Tj ET BT /F0 11 Tf 90.024 702.988 Td (default) Tj ET BT /F0 11 Tf 128.821 702.988 Td (_) Tj ET BT /F0 11 Tf 81.774 672.210 Td (#) Tj ET BT /F0 11 Tf 81.774 656.821 Td (#) Tj ET BT /F0 11 Tf 90.024 656.821 Td (This) Tj ET BT /F0 11 Tf 112.332 656.821 Td (is) Tj ET BT /F0 11 Tf 122.419 656.821 Td (the) Tj ET BT /F0 11 Tf 138.611 656.821 Td (main) Tj ET BT /F0 11 Tf 163.361 656.821 Td (replacement) Tj ET BT /F0 11 Tf 219.868 656.821 Td (block.) Tj ET BT /F0 11 Tf 252.560 656.821 Td (Each) Tj ET BT /F0 11 Tf 277.299 656.821 Td (line) Tj ET BT /F0 11 Tf 296.549 656.821 Td (specifies) Tj ET BT /F0 11 Tf 337.788 656.821 Td (a) Tj ET BT /F0 11 Tf 345.422 656.821 Td (character) Tj ET BT /F0 11 Tf 388.476 656.821 Td (and) Tj ET BT /F0 11 Tf 407.110 656.821 Td (a) Tj ET BT /F0 11 Tf 81.774 641.432 Td (#) Tj ET BT /F0 11 Tf 90.024 641.432 Td (string) Tj ET BT /F0 11 Tf 117.832 641.432 Td (to) Tj ET BT /F0 11 Tf 129.140 641.432 Td (replace) Tj ET BT /F0 11 Tf 163.647 641.432 Td (it) Tj ET BT /F0 11 Tf 172.513 641.432 Td (with.) Tj ET BT /F0 11 Tf 81.774 626.043 Td (#) Tj ET BT /F0 11 Tf 81.774 595.265 Td (start) Tj ET BT /F0 11 Tf 81.774 579.876 Td (0x09) Tj ET BT /F0 11 Tf 125.774 579.876 Td (_tab_) Tj ET BT /F0 11 Tf 213.774 579.876 Td (#) Tj ET BT /F0 11 Tf 222.024 579.876 Td (comments) Tj ET BT /F0 11 Tf 269.995 579.876 Td (work) Tj ET BT /F0 11 Tf 295.350 579.876 Td (on) Tj ET BT /F0 11 Tf 309.100 579.876 Td (lines,) Tj ET BT /F0 11 Tf 335.379 579.876 Td (too) Tj ET BT /F0 11 Tf 81.774 564.487 Td (0x24) Tj ET BT /F0 11 Tf 125.774 564.487 Td (_dollar_) Tj ET BT /F0 11 Tf 169.774 564.487 Td (#) Tj ET BT /F0 11 Tf 178.024 564.487 Td ($$$) Tj ET BT /F0 11 Tf 81.774 549.098 Td (0x26) Tj ET BT /F0 11 Tf 125.774 549.098 Td (_and_) Tj ET BT /F0 11 Tf 213.774 549.098 Td (#) Tj ET BT /F0 11 Tf 222.024 549.098 Td (ampersand) Tj ET BT /F0 11 Tf 81.774 533.709 Td (end) Tj ET BT /F0 11 Tf 81.774 502.931 Td (#) Tj ET BT /F0 11 Tf 81.774 487.542 Td (#) Tj ET BT /F0 11 Tf 90.024 487.542 Td (Starts) Tj ET BT /F0 11 Tf 117.832 487.542 Td (an) Tj ET BT /F0 11 Tf 130.966 487.542 Td (optional,) Tj ET BT /F0 11 Tf 172.524 487.542 Td (language-specific) Tj ET BT /F0 11 Tf 252.857 487.542 Td (translation) Tj ET BT /F0 11 Tf 302.049 487.542 Td (block.) Tj ET BT /F0 11 Tf 334.741 487.542 Td (detox) Tj ET BT /F0 11 Tf 361.933 487.542 Td (will) Tj ET BT /F0 11 Tf 381.799 487.542 Td (read) Tj ET BT /F0 11 Tf 81.774 472.153 Td (#) Tj ET BT /F0 11 Tf 90.024 472.153 Td (your) Tj ET BT /F0 11 Tf 112.937 472.153 Td (locale) Tj ET BT /F0 11 Tf 141.955 472.153 Td (and) Tj ET BT /F0 11 Tf 160.589 472.153 Td (load) Tj ET BT /F0 11 Tf 182.281 472.153 Td (the) Tj ET BT /F0 11 Tf 198.473 472.153 Td (block) Tj ET BT /F0 11 Tf 225.665 472.153 Td (if) Tj ET BT /F0 11 Tf 235.136 472.153 Td (the) Tj ET BT /F0 11 Tf 251.328 472.153 Td (word) Tj ET BT /F0 11 Tf 276.683 472.153 Td (after) Tj ET BT /F0 11 Tf 299.585 472.153 Td (start) Tj ET BT /F0 11 Tf 321.277 472.153 Td (matches) Tj ET BT /F0 11 Tf 360.074 472.153 Td (the) Tj ET BT /F0 11 Tf 376.266 472.153 Td (language) Tj ET BT /F0 11 Tf 81.774 456.764 Td (#) Tj ET BT /F0 11 Tf 90.024 456.764 Td (portion) Tj ET BT /F0 11 Tf 124.553 456.764 Td (of) Tj ET BT /F0 11 Tf 136.466 456.764 Td (your) Tj ET BT /F0 11 Tf 159.379 456.764 Td (locale.) Tj ET BT /F0 11 Tf 81.774 441.375 Td (#) Tj ET BT /F0 11 Tf 81.774 425.986 Td (#) Tj ET BT /F0 11 Tf 90.024 425.986 Td (In) Tj ET BT /F0 11 Tf 101.937 425.986 Td (the) Tj ET BT /F0 11 Tf 118.129 425.986 Td (example) Tj ET BT /F0 11 Tf 158.147 425.986 Td (here,) Tj ET BT /F0 11 Tf 182.578 425.986 Td (the) Tj ET BT /F0 11 Tf 198.770 425.986 Td (character) Tj ET BT /F0 11 Tf 241.824 425.986 Td ($) Tj ET BT /F0 11 Tf 250.074 425.986 Td (will) Tj ET BT /F0 11 Tf 269.940 425.986 Td (be) Tj ET BT /F0 11 Tf 283.074 425.986 Td (replaced) Tj ET BT /F0 11 Tf 323.081 425.986 Td (with) Tj ET BT /F0 11 Tf 345.389 425.986 Td ("_money_") Tj ET BT /F0 11 Tf 398.057 425.986 Td (if) Tj ET BT /F0 11 Tf 407.528 425.986 Td (the) Tj ET BT /F0 11 Tf 81.774 410.597 Td (#) Tj ET BT /F0 11 Tf 90.024 410.597 Td (user) Tj ET BT /F0 11 Tf 111.100 410.597 Td (is) Tj ET BT /F0 11 Tf 121.187 410.597 Td (working) Tj ET BT /F0 11 Tf 160.600 410.597 Td (in) Tj ET BT /F0 11 Tf 171.908 410.597 Td (English.) Tj ET BT /F0 11 Tf 213.774 410.597 Td (If) Tj ET BT /F0 11 Tf 223.850 410.597 Td (the) Tj ET BT /F0 11 Tf 240.042 410.597 Td (user) Tj ET BT /F0 11 Tf 261.118 410.597 Td (is) Tj ET BT /F0 11 Tf 271.205 410.597 Td (using) Tj ET BT /F0 11 Tf 297.792 410.597 Td (a) Tj ET BT /F0 11 Tf 305.426 410.597 Td (different) Tj ET BT /F0 11 Tf 346.049 410.597 Td (language,) Tj ET BT /F0 11 Tf 391.259 410.597 Td ($) Tj ET BT /F0 11 Tf 81.774 395.208 Td (#) Tj ET BT /F0 11 Tf 90.024 395.208 Td (will) Tj ET BT /F0 11 Tf 109.890 395.208 Td (be) Tj ET BT /F0 11 Tf 123.024 395.208 Td (replaced) Tj ET BT /F0 11 Tf 163.031 395.208 Td (with) Tj ET BT /F0 11 Tf 185.339 395.208 Td (the) Tj ET BT /F0 11 Tf 201.531 395.208 Td (value) Tj ET BT /F0 11 Tf 228.107 395.208 Td (configured) Tj ET BT /F0 11 Tf 278.509 395.208 Td (in) Tj ET BT /F0 11 Tf 289.817 395.208 Td (the) Tj ET BT /F0 11 Tf 306.009 395.208 Td (previous) Tj ET BT /F0 11 Tf 346.643 395.208 Td (block,) Tj ET BT /F0 11 Tf 376.585 395.208 Td ("_dollar_".) Tj ET BT /F0 11 Tf 81.774 379.819 Td (#) Tj ET BT /F0 11 Tf 81.774 349.041 Td (start) Tj ET BT /F0 11 Tf 103.466 349.041 Td (en) Tj ET BT /F0 11 Tf 81.774 333.652 Td (0x24) Tj ET BT /F0 11 Tf 125.774 333.652 Td (_money_) Tj ET BT /F0 11 Tf 213.774 333.652 Td (#) Tj ET BT /F0 11 Tf 222.024 333.652 Td (money) Tj ET BT /F0 11 Tf 254.716 333.652 Td (money) Tj ET BT /F0 11 Tf 81.774 318.263 Td (end) Tj ET BT /F0 11 Tf 81.774 287.485 Td (#) Tj ET BT /F0 11 Tf 90.024 287.485 Td (EOF) Tj ET BT /F0 11 Tf 81.774 256.707 Td (You) Tj ET BT /F0 11 Tf 103.466 256.707 Td (could) Tj ET BT /F0 11 Tf 130.658 256.707 Td (then) Tj ET BT /F0 11 Tf 152.350 256.707 Td (enable) Tj ET BT /F0 11 Tf 183.810 256.707 Td (this) Tj ET BT /F0 11 Tf 202.455 256.707 Td (table) Tj ET BT /F0 11 Tf 226.589 256.707 Td (in) Tj ET BT /F0 11 Tf 237.897 256.707 Td (your) Tj ET /F2 11 Tf BT /F2 11 Tf 260.810 256.707 Td (~/.detoxrc) Tj ET /F0 11 Tf BT /F0 11 Tf 305.558 256.707 Td (,) Tj ET BT /F0 11 Tf 311.058 256.707 Td (in) Tj ET BT /F0 11 Tf 322.366 256.707 Td (conjunction) Tj ET BT /F0 11 Tf 377.058 256.707 Td (with) Tj ET BT /F0 11 Tf 399.366 256.707 Td (other) Tj ET BT /F0 11 Tf 424.721 256.707 Td (filters.) Tj ET BT /F0 11 Tf 81.774 225.929 Td (#) Tj ET BT /F0 11 Tf 90.024 225.929 Td (Sample) Tj ET BT /F0 11 Tf 125.774 225.929 Td (detoxrc) Tj ET BT /F0 11 Tf 81.774 195.151 Td (sequence) Tj ET BT /F0 11 Tf 124.839 195.151 Td (default) Tj ET BT /F0 11 Tf 158.136 195.151 Td ({) Tj ET BT /F0 11 Tf 125.774 179.762 Td (safe) Tj ET BT /F0 11 Tf 146.234 179.762 Td ({) Tj ET BT /F0 11 Tf 169.774 164.373 Td (filename) Tj ET BT /F0 11 Tf 211.013 164.373 Td ("/home/MYUSERNAME/.local/share/detox/safe.tbl";) Tj ET BT /F0 11 Tf 125.774 148.984 Td (};) Tj ET BT /F0 11 Tf 125.774 133.595 Td (safe;) Tj ET BT /F0 11 Tf 125.774 118.206 Td (wipeup;) Tj ET BT /F0 11 Tf 81.774 102.817 Td (};) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (DETOX.TBL\(5\)) Tj ET BT /F0 11 Tf 261.371 739.233 Td (File) Tj ET BT /F0 11 Tf 281.237 739.233 Td (Formats) Tj ET BT /F0 11 Tf 320.045 739.233 Td (Manual) Tj ET BT /F0 11 Tf 470.635 739.233 Td (DETOX.TBL\(5\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 12 0 obj 7675 endobj 13 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 14 0 obj << /Type /Page /Parent 2 0 R /Resources 13 0 R /Contents 11 0 R >> endobj 15 0 obj << /Length 16 0 R >> stream BT /F0 11 Tf 81.774 702.988 Td (#) Tj ET BT /F0 11 Tf 90.024 702.988 Td (EOF) Tj ET BT /F0 11 Tf 81.774 672.210 Td (When) Tj ET /F1 11 Tf BT /F1 11 Tf 110.792 672.210 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 139.205 672.210 Td (is) Tj ET BT /F0 11 Tf 149.292 672.210 Td (run,) Tj ET BT /F0 11 Tf 169.455 672.210 Td (it) Tj ET BT /F0 11 Tf 178.321 672.210 Td (will) Tj ET BT /F0 11 Tf 198.187 672.210 Td (run) Tj ET BT /F0 11 Tf 215.600 672.210 Td (the) Tj ET BT /F0 11 Tf 231.792 672.210 Td (custom) Tj ET BT /F0 11 Tf 266.321 672.210 Td (safe) Tj ET BT /F0 11 Tf 286.781 672.210 Td (filter) Tj ET BT /F0 11 Tf 310.915 672.210 Td (first,) Tj ET BT /F0 11 Tf 334.136 672.210 Td (then) Tj ET BT /F0 11 Tf 355.828 672.210 Td (run) Tj ET BT /F0 11 Tf 373.241 672.210 Td (the) Tj ET BT /F0 11 Tf 389.433 672.210 Td (default) Tj ET /F2 11 Tf BT /F2 11 Tf 422.730 672.210 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 443.201 672.210 Td (filter,) Tj ET BT /F0 11 Tf 470.085 672.210 Td (and) Tj ET BT /F0 11 Tf 488.719 672.210 Td (the) Tj ET BT /F0 11 Tf 504.911 672.210 Td (finally) Tj ET BT /F0 11 Tf 81.774 656.821 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 97.966 656.821 Td (wipeup) Tj ET /F0 11 Tf BT /F0 11 Tf 132.495 656.821 Td (filter.) Tj ET BT /F0 11 Tf 162.129 656.821 Td (See) Tj ET BT /F0 11 Tf 180.763 656.821 Td (detoxrc\(5\)) Tj ET BT /F0 11 Tf 229.328 656.821 Td (for) Tj ET BT /F0 11 Tf 244.904 656.821 Td (more) Tj ET BT /F0 11 Tf 270.259 656.821 Td (details) Tj ET BT /F0 11 Tf 301.730 656.821 Td (on) Tj ET BT /F0 11 Tf 315.480 656.821 Td (the) Tj ET BT /F0 11 Tf 331.672 656.821 Td (various) Tj ET BT /F0 11 Tf 366.806 656.821 Td (filter) Tj ET BT /F0 11 Tf 390.940 656.821 Td (types.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 626.043 Td (SEE) Tj ET BT /F1 11 Tf 91.564 626.043 Td (ALSO) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 610.654 Td (detox\(1\),) Tj ET BT /F0 11 Tf 124.542 610.654 Td (detoxrc\(5\),) Tj ET BT /F0 11 Tf 175.857 610.654 Td (ascii\(7\),) Tj ET BT /F0 11 Tf 214.346 610.654 Td (iso_8859-1\(7\),) Tj ET BT /F0 11 Tf 282.172 610.654 Td (unicode\(7\),) Tj ET BT /F0 11 Tf 335.324 610.654 Td (utf-8\(7\)) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 579.876 Td (AUTHORS) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 564.487 Td (detox) Tj ET BT /F0 11 Tf 108.966 564.487 Td (was) Tj ET BT /F0 11 Tf 128.821 564.487 Td (written) Tj ET BT /F0 11 Tf 162.734 564.487 Td (by) Tj ET BT /F0 11 Tf 176.484 564.487 Td (Doug) Tj ET BT /F0 11 Tf 203.676 564.487 Td (Harple.) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (DETOX.TBL\(5\)) Tj ET BT /F0 11 Tf 261.371 739.233 Td (File) Tj ET BT /F0 11 Tf 281.237 739.233 Td (Formats) Tj ET BT /F0 11 Tf 320.045 739.233 Td (Manual) Tj ET BT /F0 11 Tf 470.635 739.233 Td (DETOX.TBL\(5\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 16 0 obj 2945 endobj 17 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 18 0 obj << /Type /Page /Parent 2 0 R /Resources 17 0 R /Contents 15 0 R >> endobj 2 0 obj << /Type /Pages /MediaBox [0 0 612 790] /Count 3 /Kids [ 10 0 R 14 0 R 18 0 R] >> endobj 19 0 obj << /Type /Catalog /Pages 2 0 R >> endobj xref 0 20 0000000000 65535 f 0000000009 00000 n 0000023239 00000 n 0000000030 00000 n 0000000112 00000 n 0000000193 00000 n 0000000276 00000 n 0000000363 00000 n 0000011914 00000 n 0000011935 00000 n 0000012030 00000 n 0000012111 00000 n 0000019840 00000 n 0000019861 00000 n 0000019957 00000 n 0000020040 00000 n 0000023039 00000 n 0000023060 00000 n 0000023156 00000 n 0000023336 00000 n trailer << /Size 20 /Root 19 0 R /Info 1 0 R >> startxref 23386 %%EOF detox-2.0.0/man/detoxrc.5000066400000000000000000000147421460212773400151630ustar00rootroot00000000000000.\" .\" This file is part of the Detox package. .\" .\" Copyright (c) Doug Harple .\" .\" For the full copyright and license information, please view the LICENSE .\" file that was distributed with this source code. .\" .Dd February 24, 2021 .Dt DETOXRC 5 .Os .Sh NAME .Nm detoxrc .Nd configuration file for .Xr detox 1 .Sh OVERVIEW .Cm detox allows for configuration of its sequences through config files. This document describes how these files work. .Sh IMPORTANT When setting up a new set of rules, the .Ar safe and .Ar wipeup filters should always be run after a translating filter (or series thereof), such as the .Ar utf_8 or the .Ar uncgi filters. Otherwise, the risk of introducing difficult characters into the filename is introduced. .Sh SYNTAX The format of this configuration file is C-like. It is based loosely off the configuration files used by .Cm named . Each statement is semicolon terminated, and modifiers on a particular statement are generally contained within braces. .Bl -tag -width 0.25i .It Cm sequence Qo Ar name Qc Bro Ar sequence; ... Brc ; Defines a sequence of filters to run a filename through. .Ar name specifies how the user will refer to the particular sequence during runtime. Quotes around the sequence name are generally optional, but should be used if the sequence name does not start with a letter. .Pp There is a special sequence, named .Ar default , which is the default sequence used by .Cm detox . This can be overridden through the command line option .Fl s or the environmental variable .Ev DETOX_SEQUENCE . .Pp Sequence names are case sensitive and unique throughout all sequences; that is, if a system-wide file defines .Ar normal_seq and a user has a sequence with the same name in their .Pa .detoxrc , the users' .Ar normal_seq will replace the system-wide version. .It Cm ignore Bro Cm filename Qo Ar filename Qc ; ... Brc ; Any filename listed here will be ignored during recursion. Note that all files beginning with a period, such as .Pa .git or .Pa .config will be ignored by .Cm detox during recursion. .It Cm # comments Any thing after a # on any line is ignored. .El .Ss SEQUENCES All of these statements occur within a .Cm sequence block. .Bl -tag -width 0.25i .It Cm iso8859_1 ; .It Cm iso8859_1 Bro Cm builtin Qo Ar name Qc ; Brc ; .It Cm iso8859_1 Bro Cm filename Qo Ar /path/to/filename Qc ; Brc ; This transliterates ISO 8859-1 characters between 0xA0 and 0xFF into lower ASCII equivalents. The output is not necessarily safe, and should also be run through the .Ar safe filter. .Pp If .Ar builtin is specified, a builtin table with the name specified will be used. .Pp Under normal circumstances, the filename syntax is not needed. .Cm detox looks in several locations for a file called .Pa iso8859_1.tbl , which is a set of rules defining how an ISO 8859-1 character should be translated. If .Cm detox can't find the translation table, it will fall back on the builtin table .Pa iso8859_1 . .Pp You can also download or create your own, and tell .Cm detox the location of it using the filename syntax shown above. .Pp You can chain together multiple .Ar iso8859_1 filters, as long as the default value of all but the last one it empty. This is explained in .Xr detox.tbl 5 . .Pp This filter is mutually exclusive with the .Ar utf_8 filter. .It Cm utf_8 ; .It Cm utf_8 Bro Cm builtin Qo Ar name Qc ; Brc ; .It Cm utf_8 Bro Cm filename Qo Ar /path/to/filename Qc ; Brc ; This transliterations Unicode characters, encoded using UTF-8, into lower ASCII equivalents. .Pp This operates in a manner similar to .Ar iso8859_1 , except it looks for a translation table called .Pa unicode.tbl . .Pp Similar to the .Ar iso8859_1 filter, an internal table exists, based on the stock translation table, called .Pa unicode . .It Cm uncgi ; This translates CGI-escaped strings into their ASCII equivalents. The output of this is not necessarily safe, and should be run through the .Ar safe filter, at the least. .It Cm safe ; .It Cm safe Bro Cm builtin Qo Ar name Qc ; Brc ; .It Cm safe Bro Cm filename Qo Ar /path/to/filename Qc ; Brc ; This could also be called "safe for Unix-like operating systems". It translates characters that are difficult to work with in Unix environments into characters that are not. .Pp Similar to the .Ar iso8859_1 and .Ar utf_8 filters, this can be controlled using a translation table. This filter also has an internal version of the translation table, which can be accessed via the builtin table .Ar safe . .It Cm wipeup ; .It Cm wipeup Bro Cm remove_trailing ; Brc ; Reduces any series of underscores or dashes to a single character. The dash takes precedence. .Pp If .Cm remove_trailing is set, then periods are added to the set of characters to work on. The period then takes precedence, followed by the dash. .Pp If a hash character, underscore, or dash are present at the start of the filename, they will be removed. .It Cm max_length Bro Cm length Ar value ; Brc ; This trims a filename down to the length specified (or less). It is conscious of extensions and attempts to preserve anything following the last period in a filename. .Pp For instance, given a max length of 12, and a filename of .Pa this_is_my_file.txt , the filter would output .Pa this_is_.txt . .It Cm lower ; This translates uppercase characters into lowercase characters. It only works on ASCII characters. .El .Sh BUILTIN TABLES .Bl -tag -width 0.25i .It cp1252 A translation table for transliterating CP-1252 characters to ASCII. This is no longer a common use case, and has been moved to a separate table. .It iso8859_1 A translation table for transliterating single-byte characters with the high bit set from ISO 8859-1 to ASCII. .It safe A replacement table for characters that are hard to work with under Unix and Unix-like OSs. .It unicode A translation table for transliterating multi-byte characters encoded in UTF-8 to ASCII. .El .Sh EXAMPLES .Bd -literal .\" START SAMPLE # transliterate UTF-8 to ASCII (using chained tables), clean up sequence utf8 { utf_8 { filename "/usr/local/share/detox/custom.tbl"; }; utf_8 { builtin "unicode"; }; safe { builtin "safe"; }; wipeup { remove_trailing; }; max_length { length 128; }; }; # decode CGI, transliterate CP-1252 to ASCII, clean up sequence "cgi-cp1252" { uncgi; iso8859_1 { builtin "cp1252"; }; safe { builtin "safe"; }; }; .\" END SAMPLE .Ed .Sh SEE ALSO .Xr detox 1 , .Xr inline-detox 1 , .Xr detox.tbl 5 , .Xr ascii 7 , .Xr iso_8859-1 7 , .Xr unicode 7 , .Xr utf-8 7 .Sh AUTHORS detox was written by .An "Doug Harple" . detox-2.0.0/man/detoxrc.5.pdf000066400000000000000000001401601460212773400157250ustar00rootroot00000000000000%PDF-1.1 1 0 obj << >> endobj 3 0 obj << /Type /Font /Subtype /Type1 /Name /F0 /BaseFont /Times-Roman >> endobj 4 0 obj << /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Times-Bold >> endobj 5 0 obj << /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Times-Italic >> endobj 6 0 obj << /Type /Font /Subtype /Type1 /Name /F3 /BaseFont /Times-BoldItalic >> endobj 7 0 obj << /Length 8 0 R >> stream BT /F1 11 Tf 68.024 702.988 Td (NAME) Tj ET BT /F1 11 Tf 81.774 687.599 Td (detoxrc) Tj ET /F0 11 Tf BT /F0 11 Tf 119.955 687.599 Td (-) Tj ET BT /F0 11 Tf 126.368 687.599 Td (configuration) Tj ET BT /F0 11 Tf 188.386 687.599 Td (file) Tj ET BT /F0 11 Tf 205.799 687.599 Td (for) Tj ET BT /F0 11 Tf 221.375 687.599 Td (detox\(1\)) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 656.821 Td (OVERVIEW) Tj ET BT /F1 11 Tf 81.774 641.432 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 110.187 641.432 Td (allows) Tj ET BT /F0 11 Tf 141.658 641.432 Td (for) Tj ET BT /F0 11 Tf 157.234 641.432 Td (configuration) Tj ET BT /F0 11 Tf 219.252 641.432 Td (of) Tj ET BT /F0 11 Tf 231.165 641.432 Td (its) Tj ET BT /F0 11 Tf 244.310 641.432 Td (sequences) Tj ET BT /F0 11 Tf 291.654 641.432 Td (through) Tj ET BT /F0 11 Tf 328.625 641.432 Td (config) Tj ET BT /F0 11 Tf 359.480 641.432 Td (files.) Tj ET BT /F0 11 Tf 386.672 641.432 Td (This) Tj ET BT /F0 11 Tf 408.980 641.432 Td (document) Tj ET BT /F0 11 Tf 455.114 641.432 Td (describes) Tj ET BT /F0 11 Tf 498.795 641.432 Td (how) Tj ET BT /F0 11 Tf 520.487 641.432 Td (these) Tj ET BT /F0 11 Tf 81.774 626.043 Td (files) Tj ET BT /F0 11 Tf 103.466 626.043 Td (work.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 595.265 Td (IMPORTANT) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 579.876 Td (When) Tj ET BT /F0 11 Tf 110.792 579.876 Td (setting) Tj ET BT /F0 11 Tf 142.879 579.876 Td (up) Tj ET BT /F0 11 Tf 156.629 579.876 Td (a) Tj ET BT /F0 11 Tf 164.263 579.876 Td (new) Tj ET BT /F0 11 Tf 185.339 579.876 Td (set) Tj ET BT /F0 11 Tf 200.310 579.876 Td (of) Tj ET BT /F0 11 Tf 212.223 579.876 Td (rules,) Tj ET BT /F0 11 Tf 239.107 579.876 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 255.299 579.876 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 275.770 579.876 Td (and) Tj ET /F2 11 Tf BT /F2 11 Tf 294.404 579.876 Td (wipeup) Tj ET /F0 11 Tf BT /F0 11 Tf 328.933 579.876 Td (filters) Tj ET BT /F0 11 Tf 357.346 579.876 Td (should) Tj ET BT /F0 11 Tf 389.433 579.876 Td (always) Tj ET BT /F0 11 Tf 422.730 579.876 Td (be) Tj ET BT /F0 11 Tf 435.864 579.876 Td (run) Tj ET BT /F0 11 Tf 453.277 579.876 Td (after) Tj ET BT /F0 11 Tf 476.179 579.876 Td (a) Tj ET BT /F0 11 Tf 483.813 579.876 Td (translating) Tj ET BT /F0 11 Tf 81.774 564.487 Td (filter) Tj ET BT /F0 11 Tf 105.908 564.487 Td (\(or) Tj ET BT /F0 11 Tf 121.484 564.487 Td (series) Tj ET BT /F0 11 Tf 149.281 564.487 Td (thereof\),) Tj ET BT /F0 11 Tf 189.596 564.487 Td (such) Tj ET BT /F0 11 Tf 212.509 564.487 Td (as) Tj ET BT /F0 11 Tf 224.422 564.487 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 240.614 564.487 Td (utf_8) Tj ET /F0 11 Tf BT /F0 11 Tf 265.980 564.487 Td (or) Tj ET BT /F0 11 Tf 277.893 564.487 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 294.085 564.487 Td (uncgi) Tj ET /F0 11 Tf BT /F0 11 Tf 321.277 564.487 Td (filters.) Tj ET BT /F0 11 Tf 355.190 564.487 Td (Otherwise,) Tj ET BT /F0 11 Tf 405.900 564.487 Td (the) Tj ET BT /F0 11 Tf 422.092 564.487 Td (risk) Tj ET BT /F0 11 Tf 441.342 564.487 Td (of) Tj ET BT /F0 11 Tf 453.255 564.487 Td (introducing) Tj ET BT /F0 11 Tf 506.726 564.487 Td (difficult) Tj ET BT /F0 11 Tf 81.774 549.098 Td (characters) Tj ET BT /F0 11 Tf 129.107 549.098 Td (into) Tj ET BT /F0 11 Tf 148.973 549.098 Td (the) Tj ET BT /F0 11 Tf 165.165 549.098 Td (filename) Tj ET BT /F0 11 Tf 206.404 549.098 Td (is) Tj ET BT /F0 11 Tf 216.491 549.098 Td (introduced.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 518.320 Td (SYNTAX) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 502.931 Td (The) Tj ET BT /F0 11 Tf 101.629 502.931 Td (format) Tj ET BT /F0 11 Tf 133.705 502.931 Td (of) Tj ET BT /F0 11 Tf 145.618 502.931 Td (this) Tj ET BT /F0 11 Tf 164.263 502.931 Td (configuration) Tj ET BT /F0 11 Tf 226.281 502.931 Td (file) Tj ET BT /F0 11 Tf 243.694 502.931 Td (is) Tj ET BT /F0 11 Tf 253.781 502.931 Td (C-like.) Tj ET BT /F0 11 Tf 289.531 502.931 Td (It) Tj ET BT /F0 11 Tf 299.002 502.931 Td (is) Tj ET BT /F0 11 Tf 309.089 502.931 Td (based) Tj ET BT /F0 11 Tf 336.886 502.931 Td (loosely) Tj ET BT /F0 11 Tf 371.415 502.931 Td (off) Tj ET BT /F0 11 Tf 386.991 502.931 Td (the) Tj ET BT /F0 11 Tf 403.183 502.931 Td (configuration) Tj ET BT /F0 11 Tf 465.201 502.931 Td (files) Tj ET BT /F0 11 Tf 486.893 502.931 Td (used) Tj ET BT /F0 11 Tf 509.806 502.931 Td (by) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 487.542 Td (named) Tj ET /F0 11 Tf BT /F0 11 Tf 113.553 487.542 Td (.) Tj ET BT /F0 11 Tf 121.803 487.542 Td (Each) Tj ET BT /F0 11 Tf 146.542 487.542 Td (statement) Tj ET BT /F0 11 Tf 191.455 487.542 Td (is) Tj ET BT /F0 11 Tf 201.542 487.542 Td (semicolon) Tj ET BT /F0 11 Tf 249.513 487.542 Td (terminated,) Tj ET BT /F0 11 Tf 302.060 487.542 Td (and) Tj ET BT /F0 11 Tf 320.694 487.542 Td (modifiers) Tj ET BT /F0 11 Tf 365.607 487.542 Td (on) Tj ET BT /F0 11 Tf 379.357 487.542 Td (a) Tj ET BT /F0 11 Tf 386.991 487.542 Td (particular) Tj ET BT /F0 11 Tf 431.893 487.542 Td (statement) Tj ET BT /F0 11 Tf 476.806 487.542 Td (are) Tj ET BT /F0 11 Tf 492.987 487.542 Td (generally) Tj ET BT /F0 11 Tf 81.774 472.153 Td (contained) Tj ET BT /F0 11 Tf 127.292 472.153 Td (within) Tj ET BT /F0 11 Tf 158.158 472.153 Td (braces.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 441.375 Td (sequence) Tj ET /F0 11 Tf BT /F0 11 Tf 126.687 441.375 Td (") Tj ET /F2 11 Tf BT /F2 11 Tf 131.175 441.375 Td (name) Tj ET /F0 11 Tf BT /F0 11 Tf 155.001 441.375 Td (") Tj ET BT /F0 11 Tf 162.239 441.375 Td ({) Tj ET /F2 11 Tf BT /F2 11 Tf 167.519 441.375 Td (sequence;) Tj ET BT /F2 11 Tf 214.247 441.375 Td (...) Tj ET /F0 11 Tf BT /F0 11 Tf 222.497 441.375 Td (};) Tj ET BT /F0 11 Tf 105.270 425.986 Td (Defines) Tj ET BT /F0 11 Tf 142.230 425.986 Td (a) Tj ET BT /F0 11 Tf 149.864 425.986 Td (sequence) Tj ET BT /F0 11 Tf 192.929 425.986 Td (of) Tj ET BT /F0 11 Tf 204.842 425.986 Td (filters) Tj ET BT /F0 11 Tf 233.255 425.986 Td (to) Tj ET BT /F0 11 Tf 244.563 425.986 Td (run) Tj ET BT /F0 11 Tf 261.976 425.986 Td (a) Tj ET BT /F0 11 Tf 269.610 425.986 Td (filename) Tj ET BT /F0 11 Tf 310.849 425.986 Td (through.) Tj ET /F2 11 Tf BT /F2 11 Tf 353.320 425.986 Td (name) Tj ET /F0 11 Tf BT /F0 11 Tf 379.896 425.986 Td (specifies) Tj ET BT /F0 11 Tf 421.135 425.986 Td (how) Tj ET BT /F0 11 Tf 442.827 425.986 Td (the) Tj ET BT /F0 11 Tf 459.019 425.986 Td (user) Tj ET BT /F0 11 Tf 480.095 425.986 Td (will) Tj ET BT /F0 11 Tf 499.961 425.986 Td (refer) Tj ET BT /F0 11 Tf 523.468 425.986 Td (to) Tj ET BT /F0 11 Tf 105.270 410.597 Td (the) Tj ET BT /F0 11 Tf 121.462 410.597 Td (particular) Tj ET BT /F0 11 Tf 166.364 410.597 Td (sequence) Tj ET BT /F0 11 Tf 209.429 410.597 Td (during) Tj ET BT /F0 11 Tf 240.900 410.597 Td (runtime.) Tj ET BT /F0 11 Tf 283.371 410.597 Td (Quotes) Tj ET BT /F0 11 Tf 317.284 410.597 Td (around) Tj ET BT /F0 11 Tf 350.581 410.597 Td (the) Tj ET BT /F0 11 Tf 366.773 410.597 Td (sequence) Tj ET BT /F0 11 Tf 409.838 410.597 Td (name) Tj ET BT /F0 11 Tf 436.414 410.597 Td (are) Tj ET BT /F0 11 Tf 452.595 410.597 Td (generally) Tj ET BT /F0 11 Tf 496.276 410.597 Td (optional,) Tj ET BT /F0 11 Tf 105.270 395.208 Td (but) Tj ET BT /F0 11 Tf 122.078 395.208 Td (should) Tj ET BT /F0 11 Tf 154.165 395.208 Td (be) Tj ET BT /F0 11 Tf 167.299 395.208 Td (used) Tj ET BT /F0 11 Tf 190.212 395.208 Td (if) Tj ET BT /F0 11 Tf 199.683 395.208 Td (the) Tj ET BT /F0 11 Tf 215.875 395.208 Td (sequence) Tj ET BT /F0 11 Tf 258.940 395.208 Td (name) Tj ET BT /F0 11 Tf 285.516 395.208 Td (does) Tj ET BT /F0 11 Tf 308.429 395.208 Td (not) Tj ET BT /F0 11 Tf 325.237 395.208 Td (start) Tj ET BT /F0 11 Tf 346.929 395.208 Td (with) Tj ET BT /F0 11 Tf 369.237 395.208 Td (a) Tj ET BT /F0 11 Tf 376.871 395.208 Td (letter.) Tj ET BT /F0 11 Tf 105.270 364.430 Td (There) Tj ET BT /F0 11 Tf 133.672 364.430 Td (is) Tj ET BT /F0 11 Tf 143.759 364.430 Td (a) Tj ET BT /F0 11 Tf 151.393 364.430 Td (special) Tj ET BT /F0 11 Tf 184.690 364.430 Td (sequence,) Tj ET BT /F0 11 Tf 230.505 364.430 Td (named) Tj ET /F2 11 Tf BT /F2 11 Tf 262.581 364.430 Td (default) Tj ET /F0 11 Tf BT /F0 11 Tf 293.139 364.430 Td (,) Tj ET BT /F0 11 Tf 298.639 364.430 Td (which) Tj ET BT /F0 11 Tf 328.273 364.430 Td (is) Tj ET BT /F0 11 Tf 338.360 364.430 Td (the) Tj ET BT /F0 11 Tf 354.552 364.430 Td (default) Tj ET BT /F0 11 Tf 387.849 364.430 Td (sequence) Tj ET BT /F0 11 Tf 430.914 364.430 Td (used) Tj ET BT /F0 11 Tf 453.827 364.430 Td (by) Tj ET /F1 11 Tf BT /F1 11 Tf 467.577 364.430 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 493.240 364.430 Td (.) Tj ET BT /F0 11 Tf 501.490 364.430 Td (This) Tj ET BT /F0 11 Tf 523.798 364.430 Td (can) Tj ET BT /F0 11 Tf 105.270 349.041 Td (be) Tj ET BT /F0 11 Tf 118.404 349.041 Td (overridden) Tj ET BT /F0 11 Tf 168.806 349.041 Td (through) Tj ET BT /F0 11 Tf 205.777 349.041 Td (the) Tj ET BT /F0 11 Tf 221.969 349.041 Td (command) Tj ET BT /F0 11 Tf 268.103 349.041 Td (line) Tj ET BT /F0 11 Tf 287.353 349.041 Td (option) Tj ET /F1 11 Tf BT /F1 11 Tf 318.219 349.041 Td (-s) Tj ET /F0 11 Tf BT /F0 11 Tf 328.911 349.041 Td (or) Tj ET BT /F0 11 Tf 340.824 349.041 Td (the) Tj ET BT /F0 11 Tf 357.016 349.041 Td (environmental) Tj ET BT /F0 11 Tf 423.313 349.041 Td (variable) Tj ET BT /F0 11 Tf 105.270 333.652 Td (DETOX_SEQUENCE.) Tj ET BT /F0 11 Tf 105.270 302.874 Td (Sequence) Tj ET BT /F0 11 Tf 150.172 302.874 Td (names) Tj ET BT /F0 11 Tf 181.027 302.874 Td (are) Tj ET BT /F0 11 Tf 197.208 302.874 Td (case) Tj ET BT /F0 11 Tf 218.889 302.874 Td (sensitive) Tj ET BT /F0 11 Tf 260.139 302.874 Td (and) Tj ET BT /F0 11 Tf 278.773 302.874 Td (unique) Tj ET BT /F0 11 Tf 311.465 302.874 Td (throughout) Tj ET BT /F0 11 Tf 362.494 302.874 Td (all) Tj ET BT /F0 11 Tf 376.244 302.874 Td (sequences;) Tj ET BT /F0 11 Tf 426.646 302.874 Td (that) Tj ET BT /F0 11 Tf 445.896 302.874 Td (is,) Tj ET BT /F0 11 Tf 458.733 302.874 Td (if) Tj ET BT /F0 11 Tf 468.204 302.874 Td (a) Tj ET BT /F0 11 Tf 475.838 302.874 Td (system-wide) Tj ET BT /F0 11 Tf 105.270 287.485 Td (file) Tj ET BT /F0 11 Tf 122.683 287.485 Td (defines) Tj ET /F2 11 Tf BT /F2 11 Tf 157.201 287.485 Td (normal_seq) Tj ET /F0 11 Tf BT /F0 11 Tf 211.893 287.485 Td (and) Tj ET BT /F0 11 Tf 230.527 287.485 Td (a) Tj ET BT /F0 11 Tf 238.161 287.485 Td (user) Tj ET BT /F0 11 Tf 259.237 287.485 Td (has) Tj ET BT /F0 11 Tf 276.650 287.485 Td (a) Tj ET BT /F0 11 Tf 284.284 287.485 Td (sequence) Tj ET BT /F0 11 Tf 327.349 287.485 Td (with) Tj ET BT /F0 11 Tf 349.657 287.485 Td (the) Tj ET BT /F0 11 Tf 365.849 287.485 Td (same) Tj ET BT /F0 11 Tf 391.204 287.485 Td (name) Tj ET BT /F0 11 Tf 417.780 287.485 Td (in) Tj ET BT /F0 11 Tf 429.088 287.485 Td (their) Tj ET /F2 11 Tf BT /F2 11 Tf 452.001 287.485 Td (.detoxrc) Tj ET /F0 11 Tf BT /F0 11 Tf 487.740 287.485 Td (,) Tj ET BT /F0 11 Tf 493.240 287.485 Td (the) Tj ET BT /F0 11 Tf 509.432 287.485 Td (users') Tj ET /F2 11 Tf BT /F2 11 Tf 105.270 272.096 Td (normal_seq) Tj ET /F0 11 Tf BT /F0 11 Tf 159.962 272.096 Td (will) Tj ET BT /F0 11 Tf 179.828 272.096 Td (replace) Tj ET BT /F0 11 Tf 214.335 272.096 Td (the) Tj ET BT /F0 11 Tf 230.527 272.096 Td (system-wide) Tj ET BT /F0 11 Tf 288.882 272.096 Td (version.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 241.318 Td (ignore) Tj ET /F0 11 Tf BT /F0 11 Tf 114.466 241.318 Td ({) Tj ET /F1 11 Tf BT /F1 11 Tf 119.746 241.318 Td (filename) Tj ET /F0 11 Tf BT /F0 11 Tf 162.822 241.318 Td (") Tj ET /F2 11 Tf BT /F2 11 Tf 167.310 241.318 Td (filename) Tj ET /F0 11 Tf BT /F0 11 Tf 205.194 241.318 Td (";) Tj ET BT /F0 11 Tf 215.490 241.318 Td (...};) Tj ET BT /F0 11 Tf 105.270 225.929 Td (Any) Tj ET BT /F0 11 Tf 126.962 225.929 Td (filename) Tj ET BT /F0 11 Tf 168.201 225.929 Td (listed) Tj ET BT /F0 11 Tf 194.788 225.929 Td (here) Tj ET BT /F0 11 Tf 216.469 225.929 Td (will) Tj ET BT /F0 11 Tf 236.335 225.929 Td (be) Tj ET BT /F0 11 Tf 249.469 225.929 Td (ignored) Tj ET BT /F0 11 Tf 285.824 225.929 Td (during) Tj ET BT /F0 11 Tf 317.295 225.929 Td (recursion.) Tj ET BT /F0 11 Tf 366.476 225.929 Td (Note) Tj ET BT /F0 11 Tf 390.610 225.929 Td (that) Tj ET BT /F0 11 Tf 409.860 225.929 Td (all) Tj ET BT /F0 11 Tf 423.610 225.929 Td (files) Tj ET BT /F0 11 Tf 445.302 225.929 Td (beginning) Tj ET BT /F0 11 Tf 492.052 225.929 Td (with) Tj ET BT /F0 11 Tf 514.360 225.929 Td (a) Tj ET BT /F0 11 Tf 105.270 210.540 Td (period,) Tj ET BT /F0 11 Tf 138.875 210.540 Td (such) Tj ET BT /F0 11 Tf 161.788 210.540 Td (as) Tj ET /F2 11 Tf BT /F2 11 Tf 173.701 210.540 Td (.git) Tj ET /F0 11 Tf BT /F0 11 Tf 190.817 210.540 Td (or) Tj ET /F2 11 Tf BT /F2 11 Tf 202.730 210.540 Td (.config) Tj ET /F0 11 Tf BT /F0 11 Tf 235.730 210.540 Td (will) Tj ET BT /F0 11 Tf 255.596 210.540 Td (be) Tj ET BT /F0 11 Tf 268.730 210.540 Td (ignored) Tj ET BT /F0 11 Tf 305.085 210.540 Td (by) Tj ET /F1 11 Tf BT /F1 11 Tf 318.835 210.540 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 347.248 210.540 Td (during) Tj ET BT /F0 11 Tf 378.719 210.540 Td (recursion.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 179.762 Td (#) Tj ET BT /F1 11 Tf 90.024 179.762 Td (comments) Tj ET /F0 11 Tf BT /F0 11 Tf 105.270 164.373 Td (Any) Tj ET BT /F0 11 Tf 126.962 164.373 Td (thing) Tj ET BT /F0 11 Tf 152.328 164.373 Td (after) Tj ET BT /F0 11 Tf 175.230 164.373 Td (a) Tj ET BT /F0 11 Tf 182.864 164.373 Td (#) Tj ET BT /F0 11 Tf 191.114 164.373 Td (on) Tj ET BT /F0 11 Tf 204.864 164.373 Td (any) Tj ET BT /F0 11 Tf 223.498 164.373 Td (line) Tj ET BT /F0 11 Tf 242.748 164.373 Td (is) Tj ET BT /F0 11 Tf 252.835 164.373 Td (ignored.) Tj ET /F1 11 Tf BT /F1 11 Tf 76.274 133.595 Td (SEQUENCES) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 118.206 Td (All) Tj ET BT /F0 11 Tf 98.582 118.206 Td (of) Tj ET BT /F0 11 Tf 110.495 118.206 Td (these) Tj ET BT /F0 11 Tf 135.850 118.206 Td (statements) Tj ET BT /F0 11 Tf 185.042 118.206 Td (occur) Tj ET BT /F0 11 Tf 212.223 118.206 Td (within) Tj ET BT /F0 11 Tf 243.089 118.206 Td (a) Tj ET /F1 11 Tf BT /F1 11 Tf 250.723 118.206 Td (sequence) Tj ET /F0 11 Tf BT /F0 11 Tf 295.636 118.206 Td (block.) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (DETOXRC\(5\)) Tj ET BT /F0 11 Tf 261.371 739.233 Td (File) Tj ET BT /F0 11 Tf 281.237 739.233 Td (Formats) Tj ET BT /F0 11 Tf 320.045 739.233 Td (Manual) Tj ET BT /F0 11 Tf 479.490 739.233 Td (DETOXRC\(5\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 8 0 obj 14370 endobj 9 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 10 0 obj << /Type /Page /Parent 2 0 R /Resources 9 0 R /Contents 7 0 R >> endobj 11 0 obj << /Length 12 0 R >> stream BT /F1 11 Tf 81.774 702.988 Td (iso8859) Tj ET /F2 11 Tf BT /F2 11 Tf 116.611 702.988 Td (_) Tj ET /F1 11 Tf BT /F1 11 Tf 122.111 702.988 Td (1) Tj ET /F0 11 Tf BT /F0 11 Tf 127.611 702.988 Td (;) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 672.210 Td (iso8859) Tj ET /F2 11 Tf BT /F2 11 Tf 116.611 672.210 Td (_) Tj ET /F1 11 Tf BT /F1 11 Tf 122.111 672.210 Td (1) Tj ET /F0 11 Tf BT /F0 11 Tf 130.361 672.210 Td ({) Tj ET /F1 11 Tf BT /F1 11 Tf 135.641 672.210 Td (builtin) Tj ET /F0 11 Tf BT /F0 11 Tf 169.576 672.210 Td (") Tj ET /F2 11 Tf BT /F2 11 Tf 174.064 672.210 Td (name) Tj ET /F0 11 Tf BT /F0 11 Tf 197.890 672.210 Td (";};) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 641.432 Td (iso8859) Tj ET /F2 11 Tf BT /F2 11 Tf 116.611 641.432 Td (_) Tj ET /F1 11 Tf BT /F1 11 Tf 122.111 641.432 Td (1) Tj ET /F0 11 Tf BT /F0 11 Tf 130.361 641.432 Td ({) Tj ET /F1 11 Tf BT /F1 11 Tf 135.641 641.432 Td (filename) Tj ET /F0 11 Tf BT /F0 11 Tf 178.717 641.432 Td (") Tj ET /F2 11 Tf BT /F2 11 Tf 183.205 641.432 Td (/path/to/filename) Tj ET /F0 11 Tf BT /F0 11 Tf 258.379 641.432 Td (";};) Tj ET BT /F0 11 Tf 105.270 626.043 Td (This) Tj ET BT /F0 11 Tf 127.578 626.043 Td (transliterates) Tj ET BT /F0 11 Tf 186.538 626.043 Td (ISO) Tj ET BT /F0 11 Tf 207.009 626.043 Td (8859-1) Tj ET BT /F0 11 Tf 240.922 626.043 Td (characters) Tj ET BT /F0 11 Tf 288.255 626.043 Td (between) Tj ET BT /F0 11 Tf 327.657 626.043 Td (0xA0) Tj ET BT /F0 11 Tf 354.849 626.043 Td (and) Tj ET BT /F0 11 Tf 373.483 626.043 Td (0xFF) Tj ET BT /F0 11 Tf 399.465 626.043 Td (into) Tj ET BT /F0 11 Tf 419.331 626.043 Td (lower) Tj ET BT /F0 11 Tf 447.128 626.043 Td (ASCII) Tj ET BT /F0 11 Tf 478.599 626.043 Td (equivalents.) Tj ET BT /F0 11 Tf 105.270 610.654 Td (The) Tj ET BT /F0 11 Tf 125.125 610.654 Td (output) Tj ET BT /F0 11 Tf 155.991 610.654 Td (is) Tj ET BT /F0 11 Tf 166.078 610.654 Td (not) Tj ET BT /F0 11 Tf 182.886 610.654 Td (necessarily) Tj ET BT /F0 11 Tf 234.509 610.654 Td (safe,) Tj ET BT /F0 11 Tf 257.719 610.654 Td (and) Tj ET BT /F0 11 Tf 276.353 610.654 Td (should) Tj ET BT /F0 11 Tf 308.440 610.654 Td (also) Tj ET BT /F0 11 Tf 328.911 610.654 Td (be) Tj ET BT /F0 11 Tf 342.045 610.654 Td (run) Tj ET BT /F0 11 Tf 359.458 610.654 Td (through) Tj ET BT /F0 11 Tf 396.429 610.654 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 412.621 610.654 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 433.092 610.654 Td (filter.) Tj ET BT /F0 11 Tf 105.270 579.876 Td (If) Tj ET /F2 11 Tf BT /F2 11 Tf 115.346 579.876 Td (builtin) Tj ET /F0 11 Tf BT /F0 11 Tf 146.828 579.876 Td (is) Tj ET BT /F0 11 Tf 156.915 579.876 Td (specified,) Tj ET BT /F0 11 Tf 202.125 579.876 Td (a) Tj ET BT /F0 11 Tf 209.759 579.876 Td (builtin) Tj ET BT /F0 11 Tf 241.241 579.876 Td (table) Tj ET BT /F0 11 Tf 265.375 579.876 Td (with) Tj ET BT /F0 11 Tf 287.683 579.876 Td (the) Tj ET BT /F0 11 Tf 303.875 579.876 Td (name) Tj ET BT /F0 11 Tf 330.451 579.876 Td (specified) Tj ET BT /F0 11 Tf 372.911 579.876 Td (will) Tj ET BT /F0 11 Tf 392.777 579.876 Td (be) Tj ET BT /F0 11 Tf 405.911 579.876 Td (used.) Tj ET BT /F0 11 Tf 105.270 549.098 Td (Under) Tj ET BT /F0 11 Tf 135.509 549.098 Td (normal) Tj ET BT /F0 11 Tf 169.422 549.098 Td (circumstances,) Tj ET BT /F0 11 Tf 237.237 549.098 Td (the) Tj ET BT /F0 11 Tf 253.429 549.098 Td (filename) Tj ET BT /F0 11 Tf 294.668 549.098 Td (syntax) Tj ET BT /F0 11 Tf 326.139 549.098 Td (is) Tj ET BT /F0 11 Tf 336.226 549.098 Td (not) Tj ET BT /F0 11 Tf 353.034 549.098 Td (needed.) Tj ET /F1 11 Tf BT /F1 11 Tf 392.436 549.098 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 420.849 549.098 Td (looks) Tj ET BT /F0 11 Tf 447.436 549.098 Td (in) Tj ET BT /F0 11 Tf 458.744 549.098 Td (several) Tj ET BT /F0 11 Tf 492.646 549.098 Td (locations) Tj ET BT /F0 11 Tf 105.270 533.709 Td (for) Tj ET BT /F0 11 Tf 120.846 533.709 Td (a) Tj ET BT /F0 11 Tf 128.480 533.709 Td (file) Tj ET BT /F0 11 Tf 145.893 533.709 Td (called) Tj ET /F2 11 Tf BT /F2 11 Tf 174.911 533.709 Td (iso8859_1.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 235.114 533.709 Td (,) Tj ET BT /F0 11 Tf 240.614 533.709 Td (which) Tj ET BT /F0 11 Tf 270.248 533.709 Td (is) Tj ET BT /F0 11 Tf 280.335 533.709 Td (a) Tj ET BT /F0 11 Tf 287.969 533.709 Td (set) Tj ET BT /F0 11 Tf 302.940 533.709 Td (of) Tj ET BT /F0 11 Tf 314.853 533.709 Td (rules) Tj ET BT /F0 11 Tf 338.987 533.709 Td (defining) Tj ET BT /F0 11 Tf 378.400 533.709 Td (how) Tj ET BT /F0 11 Tf 400.092 533.709 Td (an) Tj ET BT /F0 11 Tf 413.226 533.709 Td (ISO) Tj ET BT /F0 11 Tf 433.697 533.709 Td (8859-1) Tj ET BT /F0 11 Tf 467.610 533.709 Td (character) Tj ET BT /F0 11 Tf 510.664 533.709 Td (should) Tj ET BT /F0 11 Tf 105.270 518.320 Td (be) Tj ET BT /F0 11 Tf 118.404 518.320 Td (translated.) Tj ET BT /F0 11 Tf 169.422 518.320 Td (If) Tj ET /F1 11 Tf BT /F1 11 Tf 179.498 518.320 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 207.911 518.320 Td (can't) Tj ET BT /F0 11 Tf 232.650 518.320 Td (find) Tj ET BT /F0 11 Tf 253.121 518.320 Td (the) Tj ET BT /F0 11 Tf 269.313 518.320 Td (translation) Tj ET BT /F0 11 Tf 318.505 518.320 Td (table,) Tj ET BT /F0 11 Tf 345.389 518.320 Td (it) Tj ET BT /F0 11 Tf 354.255 518.320 Td (will) Tj ET BT /F0 11 Tf 374.121 518.320 Td (fall) Tj ET BT /F0 11 Tf 391.534 518.320 Td (back) Tj ET BT /F0 11 Tf 415.052 518.320 Td (on) Tj ET BT /F0 11 Tf 428.802 518.320 Td (the) Tj ET BT /F0 11 Tf 444.994 518.320 Td (builtin) Tj ET BT /F0 11 Tf 476.476 518.320 Td (table) Tj ET /F2 11 Tf BT /F2 11 Tf 105.270 502.931 Td (iso8859_1) Tj ET /F0 11 Tf BT /F0 11 Tf 151.107 502.931 Td (.) Tj ET BT /F0 11 Tf 105.270 472.153 Td (You) Tj ET BT /F0 11 Tf 126.962 472.153 Td (can) Tj ET BT /F0 11 Tf 144.980 472.153 Td (also) Tj ET BT /F0 11 Tf 165.451 472.153 Td (download) Tj ET BT /F0 11 Tf 211.585 472.153 Td (or) Tj ET BT /F0 11 Tf 223.498 472.153 Td (create) Tj ET BT /F0 11 Tf 252.505 472.153 Td (your) Tj ET BT /F0 11 Tf 275.418 472.153 Td (own,) Tj ET BT /F0 11 Tf 299.860 472.153 Td (and) Tj ET BT /F0 11 Tf 318.494 472.153 Td (tell) Tj ET /F1 11 Tf BT /F1 11 Tf 335.302 472.153 Td (detox) Tj ET /F0 11 Tf BT /F0 11 Tf 363.715 472.153 Td (the) Tj ET BT /F0 11 Tf 379.907 472.153 Td (location) Tj ET BT /F0 11 Tf 418.099 472.153 Td (of) Tj ET BT /F0 11 Tf 430.012 472.153 Td (it) Tj ET BT /F0 11 Tf 438.878 472.153 Td (using) Tj ET BT /F0 11 Tf 465.465 472.153 Td (the) Tj ET BT /F0 11 Tf 481.657 472.153 Td (filename) Tj ET BT /F0 11 Tf 105.270 456.764 Td (syntax) Tj ET BT /F0 11 Tf 136.741 456.764 Td (shown) Tj ET BT /F0 11 Tf 168.212 456.764 Td (above.) Tj ET BT /F0 11 Tf 105.270 425.986 Td (You) Tj ET BT /F0 11 Tf 126.962 425.986 Td (can) Tj ET BT /F0 11 Tf 144.980 425.986 Td (chain) Tj ET BT /F0 11 Tf 171.556 425.986 Td (together) Tj ET BT /F0 11 Tf 210.353 425.986 Td (multiple) Tj ET /F2 11 Tf BT /F2 11 Tf 249.777 425.986 Td (iso8859_1) Tj ET /F0 11 Tf BT /F0 11 Tf 298.364 425.986 Td (filters,) Tj ET BT /F0 11 Tf 329.527 425.986 Td (as) Tj ET BT /F0 11 Tf 341.440 425.986 Td (long) Tj ET BT /F0 11 Tf 363.748 425.986 Td (as) Tj ET BT /F0 11 Tf 375.661 425.986 Td (the) Tj ET BT /F0 11 Tf 391.853 425.986 Td (default) Tj ET BT /F0 11 Tf 425.150 425.986 Td (value) Tj ET BT /F0 11 Tf 451.726 425.986 Td (of) Tj ET BT /F0 11 Tf 463.639 425.986 Td (all) Tj ET BT /F0 11 Tf 477.389 425.986 Td (but) Tj ET BT /F0 11 Tf 494.197 425.986 Td (the) Tj ET BT /F0 11 Tf 510.389 425.986 Td (last) Tj ET BT /F0 11 Tf 105.270 410.597 Td (one) Tj ET BT /F0 11 Tf 123.904 410.597 Td (it) Tj ET BT /F0 11 Tf 132.770 410.597 Td (empty.) Tj ET BT /F0 11 Tf 168.520 410.597 Td (This) Tj ET BT /F0 11 Tf 190.828 410.597 Td (is) Tj ET BT /F0 11 Tf 200.915 410.597 Td (explained) Tj ET BT /F0 11 Tf 246.433 410.597 Td (in) Tj ET BT /F0 11 Tf 257.741 410.597 Td (detox.tbl\(5\).) Tj ET BT /F0 11 Tf 105.270 379.819 Td (This) Tj ET BT /F0 11 Tf 127.578 379.819 Td (filter) Tj ET BT /F0 11 Tf 151.712 379.819 Td (is) Tj ET BT /F0 11 Tf 161.799 379.819 Td (mutually) Tj ET BT /F0 11 Tf 203.665 379.819 Td (exclusive) Tj ET BT /F0 11 Tf 247.962 379.819 Td (with) Tj ET BT /F0 11 Tf 270.270 379.819 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 286.462 379.819 Td (utf_8) Tj ET /F0 11 Tf BT /F0 11 Tf 311.828 379.819 Td (filter.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 349.041 Td (utf) Tj ET /F2 11 Tf BT /F2 11 Tf 95.216 349.041 Td (_) Tj ET /F1 11 Tf BT /F1 11 Tf 100.716 349.041 Td (8) Tj ET /F0 11 Tf BT /F0 11 Tf 106.216 349.041 Td (;) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 318.263 Td (utf) Tj ET /F2 11 Tf BT /F2 11 Tf 95.216 318.263 Td (_) Tj ET /F1 11 Tf BT /F1 11 Tf 100.716 318.263 Td (8) Tj ET /F0 11 Tf BT /F0 11 Tf 108.966 318.263 Td ({) Tj ET /F1 11 Tf BT /F1 11 Tf 114.246 318.263 Td (builtin) Tj ET /F0 11 Tf BT /F0 11 Tf 148.181 318.263 Td (") Tj ET /F2 11 Tf BT /F2 11 Tf 152.669 318.263 Td (name) Tj ET /F0 11 Tf BT /F0 11 Tf 176.495 318.263 Td (";};) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 287.485 Td (utf) Tj ET /F2 11 Tf BT /F2 11 Tf 95.216 287.485 Td (_) Tj ET /F1 11 Tf BT /F1 11 Tf 100.716 287.485 Td (8) Tj ET /F0 11 Tf BT /F0 11 Tf 108.966 287.485 Td ({) Tj ET /F1 11 Tf BT /F1 11 Tf 114.246 287.485 Td (filename) Tj ET /F0 11 Tf BT /F0 11 Tf 157.322 287.485 Td (") Tj ET /F2 11 Tf BT /F2 11 Tf 161.810 287.485 Td (/path/to/filename) Tj ET /F0 11 Tf BT /F0 11 Tf 236.984 287.485 Td (";};) Tj ET BT /F0 11 Tf 105.270 272.096 Td (This) Tj ET BT /F0 11 Tf 127.578 272.096 Td (transliterations) Tj ET BT /F0 11 Tf 195.712 272.096 Td (Unicode) Tj ET BT /F0 11 Tf 235.730 272.096 Td (characters,) Tj ET BT /F0 11 Tf 285.813 272.096 Td (encoded) Tj ET BT /F0 11 Tf 325.215 272.096 Td (using) Tj ET BT /F0 11 Tf 351.802 272.096 Td (UTF-8,) Tj ET BT /F0 11 Tf 387.244 272.096 Td (into) Tj ET BT /F0 11 Tf 407.110 272.096 Td (lower) Tj ET BT /F0 11 Tf 434.907 272.096 Td (ASCII) Tj ET BT /F0 11 Tf 466.378 272.096 Td (equivalents.) Tj ET BT /F0 11 Tf 105.270 241.318 Td (This) Tj ET BT /F0 11 Tf 127.578 241.318 Td (operates) Tj ET BT /F0 11 Tf 166.980 241.318 Td (in) Tj ET BT /F0 11 Tf 178.288 241.318 Td (a) Tj ET BT /F0 11 Tf 185.922 241.318 Td (manner) Tj ET BT /F0 11 Tf 221.661 241.318 Td (similar) Tj ET BT /F0 11 Tf 254.969 241.318 Td (to) Tj ET /F2 11 Tf BT /F2 11 Tf 266.277 241.318 Td (iso8859_1) Tj ET /F0 11 Tf BT /F0 11 Tf 312.114 241.318 Td (,) Tj ET BT /F0 11 Tf 317.614 241.318 Td (except) Tj ET BT /F0 11 Tf 349.074 241.318 Td (it) Tj ET BT /F0 11 Tf 357.940 241.318 Td (looks) Tj ET BT /F0 11 Tf 384.527 241.318 Td (for) Tj ET BT /F0 11 Tf 400.103 241.318 Td (a) Tj ET BT /F0 11 Tf 407.737 241.318 Td (translation) Tj ET BT /F0 11 Tf 456.929 241.318 Td (table) Tj ET BT /F0 11 Tf 481.063 241.318 Td (called) Tj ET /F2 11 Tf BT /F2 11 Tf 105.270 225.929 Td (unicode.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 154.462 225.929 Td (.) Tj ET BT /F0 11 Tf 105.270 195.151 Td (Similar) Tj ET BT /F0 11 Tf 140.415 195.151 Td (to) Tj ET BT /F0 11 Tf 151.723 195.151 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 167.915 195.151 Td (iso8859_1) Tj ET /F0 11 Tf BT /F0 11 Tf 216.502 195.151 Td (filter,) Tj ET BT /F0 11 Tf 243.386 195.151 Td (an) Tj ET BT /F0 11 Tf 256.520 195.151 Td (internal) Tj ET BT /F0 11 Tf 292.875 195.151 Td (table) Tj ET BT /F0 11 Tf 317.009 195.151 Td (exists,) Tj ET BT /F0 11 Tf 347.567 195.151 Td (based) Tj ET BT /F0 11 Tf 375.364 195.151 Td (on) Tj ET BT /F0 11 Tf 389.114 195.151 Td (the) Tj ET BT /F0 11 Tf 405.306 195.151 Td (stock) Tj ET BT /F0 11 Tf 431.277 195.151 Td (translation) Tj ET BT /F0 11 Tf 480.469 195.151 Td (table,) Tj ET BT /F0 11 Tf 507.353 195.151 Td (called) Tj ET /F2 11 Tf BT /F2 11 Tf 105.270 179.762 Td (unicode) Tj ET /F0 11 Tf BT /F0 11 Tf 140.096 179.762 Td (.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 148.984 Td (uncgi) Tj ET /F0 11 Tf BT /F0 11 Tf 107.448 148.984 Td (;) Tj ET BT /F0 11 Tf 105.270 133.595 Td (This) Tj ET BT /F0 11 Tf 127.578 133.595 Td (translates) Tj ET BT /F0 11 Tf 171.875 133.595 Td (CGI-escaped) Tj ET BT /F0 11 Tf 232.045 133.595 Td (strings) Tj ET BT /F0 11 Tf 264.132 133.595 Td (into) Tj ET BT /F0 11 Tf 283.998 133.595 Td (their) Tj ET BT /F0 11 Tf 306.911 133.595 Td (ASCII) Tj ET BT /F0 11 Tf 338.382 133.595 Td (equivalents.) Tj ET BT /F0 11 Tf 396.737 133.595 Td (The) Tj ET BT /F0 11 Tf 416.592 133.595 Td (output) Tj ET BT /F0 11 Tf 447.458 133.595 Td (of) Tj ET BT /F0 11 Tf 459.371 133.595 Td (this) Tj ET BT /F0 11 Tf 478.016 133.595 Td (is) Tj ET BT /F0 11 Tf 488.103 133.595 Td (not) Tj ET BT /F0 11 Tf 105.270 118.206 Td (necessarily) Tj ET BT /F0 11 Tf 156.893 118.206 Td (safe,) Tj ET BT /F0 11 Tf 180.103 118.206 Td (and) Tj ET BT /F0 11 Tf 198.737 118.206 Td (should) Tj ET BT /F0 11 Tf 230.824 118.206 Td (be) Tj ET BT /F0 11 Tf 243.958 118.206 Td (run) Tj ET BT /F0 11 Tf 261.371 118.206 Td (through) Tj ET BT /F0 11 Tf 298.342 118.206 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 314.534 118.206 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 335.005 118.206 Td (filter,) Tj ET BT /F0 11 Tf 361.889 118.206 Td (at) Tj ET BT /F0 11 Tf 372.581 118.206 Td (the) Tj ET BT /F0 11 Tf 388.773 118.206 Td (least.) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (DETOXRC\(5\)) Tj ET BT /F0 11 Tf 261.371 739.233 Td (File) Tj ET BT /F0 11 Tf 281.237 739.233 Td (Formats) Tj ET BT /F0 11 Tf 320.045 739.233 Td (Manual) Tj ET BT /F0 11 Tf 479.490 739.233 Td (DETOXRC\(5\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 12 0 obj 13400 endobj 13 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 14 0 obj << /Type /Page /Parent 2 0 R /Resources 13 0 R /Contents 11 0 R >> endobj 15 0 obj << /Length 16 0 R >> stream BT /F1 11 Tf 81.774 702.988 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 100.100 702.988 Td (;) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 672.210 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 102.850 672.210 Td ({) Tj ET /F1 11 Tf BT /F1 11 Tf 108.130 672.210 Td (builtin) Tj ET /F0 11 Tf BT /F0 11 Tf 142.065 672.210 Td (") Tj ET /F2 11 Tf BT /F2 11 Tf 146.553 672.210 Td (name) Tj ET /F0 11 Tf BT /F0 11 Tf 170.379 672.210 Td (";};) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 641.432 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 102.850 641.432 Td ({) Tj ET /F1 11 Tf BT /F1 11 Tf 108.130 641.432 Td (filename) Tj ET /F0 11 Tf BT /F0 11 Tf 151.206 641.432 Td (") Tj ET /F2 11 Tf BT /F2 11 Tf 155.694 641.432 Td (/path/to/filename) Tj ET /F0 11 Tf BT /F0 11 Tf 230.868 641.432 Td (";};) Tj ET BT /F0 11 Tf 105.270 626.043 Td (This) Tj ET BT /F0 11 Tf 127.578 626.043 Td (could) Tj ET BT /F0 11 Tf 154.770 626.043 Td (also) Tj ET BT /F0 11 Tf 175.241 626.043 Td (be) Tj ET BT /F0 11 Tf 188.375 626.043 Td (called) Tj ET BT /F0 11 Tf 217.393 626.043 Td ("safe) Tj ET BT /F0 11 Tf 242.341 626.043 Td (for) Tj ET BT /F0 11 Tf 257.917 626.043 Td (Unix-like) Tj ET BT /F0 11 Tf 302.830 626.043 Td (operating) Tj ET BT /F0 11 Tf 347.127 626.043 Td (systems".) Tj ET BT /F0 11 Tf 394.702 626.043 Td (It) Tj ET BT /F0 11 Tf 404.173 626.043 Td (translates) Tj ET BT /F0 11 Tf 448.470 626.043 Td (characters) Tj ET BT /F0 11 Tf 495.803 626.043 Td (that) Tj ET BT /F0 11 Tf 515.053 626.043 Td (are) Tj ET BT /F0 11 Tf 105.270 610.654 Td (difficult) Tj ET BT /F0 11 Tf 143.462 610.654 Td (to) Tj ET BT /F0 11 Tf 154.770 610.654 Td (work) Tj ET BT /F0 11 Tf 180.125 610.654 Td (with) Tj ET BT /F0 11 Tf 202.433 610.654 Td (in) Tj ET BT /F0 11 Tf 213.741 610.654 Td (Unix) Tj ET BT /F0 11 Tf 238.491 610.654 Td (environments) Tj ET BT /F0 11 Tf 301.125 610.654 Td (into) Tj ET BT /F0 11 Tf 320.991 610.654 Td (characters) Tj ET BT /F0 11 Tf 368.324 610.654 Td (that) Tj ET BT /F0 11 Tf 387.574 610.654 Td (are) Tj ET BT /F0 11 Tf 403.755 610.654 Td (not.) Tj ET BT /F0 11 Tf 105.270 579.876 Td (Similar) Tj ET BT /F0 11 Tf 140.415 579.876 Td (to) Tj ET BT /F0 11 Tf 151.723 579.876 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 167.915 579.876 Td (iso8859_1) Tj ET /F0 11 Tf BT /F0 11 Tf 216.502 579.876 Td (and) Tj ET /F2 11 Tf BT /F2 11 Tf 235.136 579.876 Td (utf_8) Tj ET /F0 11 Tf BT /F0 11 Tf 260.502 579.876 Td (filters,) Tj ET BT /F0 11 Tf 291.665 579.876 Td (this) Tj ET BT /F0 11 Tf 310.310 579.876 Td (can) Tj ET BT /F0 11 Tf 328.328 579.876 Td (be) Tj ET BT /F0 11 Tf 341.462 579.876 Td (controlled) Tj ET BT /F0 11 Tf 388.817 579.876 Td (using) Tj ET BT /F0 11 Tf 415.404 579.876 Td (a) Tj ET BT /F0 11 Tf 423.038 579.876 Td (translation) Tj ET BT /F0 11 Tf 472.230 579.876 Td (table.) Tj ET BT /F0 11 Tf 501.864 579.876 Td (This) Tj ET BT /F0 11 Tf 105.270 564.487 Td (filter) Tj ET BT /F0 11 Tf 129.404 564.487 Td (also) Tj ET BT /F0 11 Tf 149.875 564.487 Td (has) Tj ET BT /F0 11 Tf 167.288 564.487 Td (an) Tj ET BT /F0 11 Tf 180.422 564.487 Td (internal) Tj ET BT /F0 11 Tf 216.777 564.487 Td (version) Tj ET BT /F0 11 Tf 251.911 564.487 Td (of) Tj ET BT /F0 11 Tf 263.824 564.487 Td (the) Tj ET BT /F0 11 Tf 280.016 564.487 Td (translation) Tj ET BT /F0 11 Tf 329.208 564.487 Td (table,) Tj ET BT /F0 11 Tf 356.092 564.487 Td (which) Tj ET BT /F0 11 Tf 385.726 564.487 Td (can) Tj ET BT /F0 11 Tf 403.744 564.487 Td (be) Tj ET BT /F0 11 Tf 416.878 564.487 Td (accessed) Tj ET BT /F0 11 Tf 458.106 564.487 Td (via) Tj ET BT /F0 11 Tf 474.298 564.487 Td (the) Tj ET BT /F0 11 Tf 490.490 564.487 Td (builtin) Tj ET BT /F0 11 Tf 521.972 564.487 Td (table) Tj ET /F2 11 Tf BT /F2 11 Tf 105.270 549.098 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 122.991 549.098 Td (.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 518.320 Td (wipeup) Tj ET /F0 11 Tf BT /F0 11 Tf 116.006 518.320 Td (;) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 487.542 Td (wipeup) Tj ET /F0 11 Tf BT /F0 11 Tf 118.756 487.542 Td ({) Tj ET /F1 11 Tf BT /F1 11 Tf 124.036 487.542 Td (remove) Tj ET /F2 11 Tf BT /F2 11 Tf 158.851 487.542 Td (_) Tj ET /F1 11 Tf BT /F1 11 Tf 164.351 487.542 Td (trailing) Tj ET /F0 11 Tf BT /F0 11 Tf 199.188 487.542 Td (;};) Tj ET BT /F0 11 Tf 105.270 472.153 Td (Reduces) Tj ET BT /F0 11 Tf 145.288 472.153 Td (any) Tj ET BT /F0 11 Tf 163.922 472.153 Td (series) Tj ET BT /F0 11 Tf 191.719 472.153 Td (of) Tj ET BT /F0 11 Tf 203.632 472.153 Td (underscores) Tj ET BT /F0 11 Tf 258.918 472.153 Td (or) Tj ET BT /F0 11 Tf 270.831 472.153 Td (dashes) Tj ET BT /F0 11 Tf 302.907 472.153 Td (to) Tj ET BT /F0 11 Tf 314.215 472.153 Td (a) Tj ET BT /F0 11 Tf 321.849 472.153 Td (single) Tj ET BT /F0 11 Tf 350.878 472.153 Td (character.) Tj ET BT /F0 11 Tf 399.432 472.153 Td (The) Tj ET BT /F0 11 Tf 419.287 472.153 Td (dash) Tj ET BT /F0 11 Tf 442.200 472.153 Td (takes) Tj ET BT /F0 11 Tf 467.555 472.153 Td (precedence.) Tj ET BT /F0 11 Tf 105.270 441.375 Td (If) Tj ET /F1 11 Tf BT /F1 11 Tf 115.346 441.375 Td (remove) Tj ET /F2 11 Tf BT /F2 11 Tf 150.161 441.375 Td (_) Tj ET /F1 11 Tf BT /F1 11 Tf 155.661 441.375 Td (trailing) Tj ET /F0 11 Tf BT /F0 11 Tf 193.248 441.375 Td (is) Tj ET BT /F0 11 Tf 203.335 441.375 Td (set,) Tj ET BT /F0 11 Tf 221.056 441.375 Td (then) Tj ET BT /F0 11 Tf 242.748 441.375 Td (periods) Tj ET BT /F0 11 Tf 277.882 441.375 Td (are) Tj ET BT /F0 11 Tf 294.063 441.375 Td (added) Tj ET BT /F0 11 Tf 323.081 441.375 Td (to) Tj ET BT /F0 11 Tf 334.389 441.375 Td (the) Tj ET BT /F0 11 Tf 350.581 441.375 Td (set) Tj ET BT /F0 11 Tf 365.552 441.375 Td (of) Tj ET BT /F0 11 Tf 377.465 441.375 Td (characters) Tj ET BT /F0 11 Tf 424.798 441.375 Td (to) Tj ET BT /F0 11 Tf 436.106 441.375 Td (work) Tj ET BT /F0 11 Tf 461.461 441.375 Td (on.) Tj ET BT /F0 11 Tf 480.711 441.375 Td (The) Tj ET BT /F0 11 Tf 500.566 441.375 Td (period) Tj ET BT /F0 11 Tf 105.270 425.986 Td (then) Tj ET BT /F0 11 Tf 126.962 425.986 Td (takes) Tj ET BT /F0 11 Tf 152.317 425.986 Td (precedence,) Tj ET BT /F0 11 Tf 207.284 425.986 Td (followed) Tj ET BT /F0 11 Tf 249.139 425.986 Td (by) Tj ET BT /F0 11 Tf 262.889 425.986 Td (the) Tj ET BT /F0 11 Tf 279.081 425.986 Td (dash.) Tj ET BT /F0 11 Tf 105.270 395.208 Td (If) Tj ET BT /F0 11 Tf 115.346 395.208 Td (a) Tj ET BT /F0 11 Tf 122.980 395.208 Td (hash) Tj ET BT /F0 11 Tf 145.893 395.208 Td (character,) Tj ET BT /F0 11 Tf 191.697 395.208 Td (underscore,) Tj ET BT /F0 11 Tf 245.454 395.208 Td (or) Tj ET BT /F0 11 Tf 257.367 395.208 Td (dash) Tj ET BT /F0 11 Tf 280.280 395.208 Td (are) Tj ET BT /F0 11 Tf 296.461 395.208 Td (present) Tj ET BT /F0 11 Tf 330.979 395.208 Td (at) Tj ET BT /F0 11 Tf 341.671 395.208 Td (the) Tj ET BT /F0 11 Tf 357.863 395.208 Td (start) Tj ET BT /F0 11 Tf 379.555 395.208 Td (of) Tj ET BT /F0 11 Tf 391.468 395.208 Td (the) Tj ET BT /F0 11 Tf 407.660 395.208 Td (filename,) Tj ET BT /F0 11 Tf 451.649 395.208 Td (they) Tj ET BT /F0 11 Tf 473.341 395.208 Td (will) Tj ET BT /F0 11 Tf 493.207 395.208 Td (be) Tj ET BT /F0 11 Tf 105.270 379.819 Td (removed.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 349.041 Td (max) Tj ET /F2 11 Tf BT /F2 11 Tf 101.937 349.041 Td (_) Tj ET /F1 11 Tf BT /F1 11 Tf 107.437 349.041 Td (length) Tj ET /F0 11 Tf BT /F0 11 Tf 139.524 349.041 Td ({) Tj ET /F1 11 Tf BT /F1 11 Tf 144.804 349.041 Td (length) Tj ET /F2 11 Tf BT /F2 11 Tf 176.891 349.041 Td (value) Tj ET /F0 11 Tf BT /F0 11 Tf 200.717 349.041 Td (;};) Tj ET BT /F0 11 Tf 105.270 333.652 Td (This) Tj ET BT /F0 11 Tf 127.578 333.652 Td (trims) Tj ET BT /F0 11 Tf 152.944 333.652 Td (a) Tj ET BT /F0 11 Tf 160.578 333.652 Td (filename) Tj ET BT /F0 11 Tf 201.817 333.652 Td (down) Tj ET BT /F0 11 Tf 229.009 333.652 Td (to) Tj ET BT /F0 11 Tf 240.317 333.652 Td (the) Tj ET BT /F0 11 Tf 256.509 333.652 Td (length) Tj ET BT /F0 11 Tf 286.759 333.652 Td (specified) Tj ET BT /F0 11 Tf 329.219 333.652 Td (\(or) Tj ET BT /F0 11 Tf 344.795 333.652 Td (less\).) Tj ET BT /F0 11 Tf 373.208 333.652 Td (It) Tj ET BT /F0 11 Tf 382.679 333.652 Td (is) Tj ET BT /F0 11 Tf 392.766 333.652 Td (conscious) Tj ET BT /F0 11 Tf 438.900 333.652 Td (of) Tj ET BT /F0 11 Tf 450.813 333.652 Td (extensions) Tj ET BT /F0 11 Tf 500.005 333.652 Td (and) Tj ET BT /F0 11 Tf 105.270 318.263 Td (attempts) Tj ET BT /F0 11 Tf 145.299 318.263 Td (to) Tj ET BT /F0 11 Tf 156.607 318.263 Td (preserve) Tj ET BT /F0 11 Tf 196.614 318.263 Td (anything) Tj ET BT /F0 11 Tf 237.864 318.263 Td (following) Tj ET BT /F0 11 Tf 283.393 318.263 Td (the) Tj ET BT /F0 11 Tf 299.585 318.263 Td (last) Tj ET BT /F0 11 Tf 317.614 318.263 Td (period) Tj ET BT /F0 11 Tf 348.469 318.263 Td (in) Tj ET BT /F0 11 Tf 359.777 318.263 Td (a) Tj ET BT /F0 11 Tf 367.411 318.263 Td (filename.) Tj ET BT /F0 11 Tf 105.270 287.485 Td (For) Tj ET BT /F0 11 Tf 123.299 287.485 Td (instance,) Tj ET BT /F0 11 Tf 164.846 287.485 Td (given) Tj ET BT /F0 11 Tf 192.038 287.485 Td (a) Tj ET BT /F0 11 Tf 199.672 287.485 Td (max) Tj ET BT /F0 11 Tf 221.364 287.485 Td (length) Tj ET BT /F0 11 Tf 251.614 287.485 Td (of) Tj ET BT /F0 11 Tf 263.527 287.485 Td (12,) Tj ET BT /F0 11 Tf 280.027 287.485 Td (and) Tj ET BT /F0 11 Tf 298.661 287.485 Td (a) Tj ET BT /F0 11 Tf 306.295 287.485 Td (filename) Tj ET BT /F0 11 Tf 347.534 287.485 Td (of) Tj ET /F2 11 Tf BT /F2 11 Tf 359.447 287.485 Td (this_is_my_file.txt) Tj ET /F0 11 Tf BT /F0 11 Tf 439.813 287.485 Td (,) Tj ET BT /F0 11 Tf 445.313 287.485 Td (the) Tj ET BT /F0 11 Tf 461.505 287.485 Td (filter) Tj ET BT /F0 11 Tf 485.639 287.485 Td (would) Tj ET BT /F0 11 Tf 105.270 272.096 Td (output) Tj ET /F2 11 Tf BT /F2 11 Tf 136.136 272.096 Td (this_is_.txt) Tj ET /F0 11 Tf BT /F0 11 Tf 184.118 272.096 Td (.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 241.318 Td (lower) Tj ET /F0 11 Tf BT /F0 11 Tf 108.042 241.318 Td (;) Tj ET BT /F0 11 Tf 105.270 225.929 Td (This) Tj ET BT /F0 11 Tf 127.578 225.929 Td (translates) Tj ET BT /F0 11 Tf 171.875 225.929 Td (uppercase) Tj ET BT /F0 11 Tf 218.603 225.929 Td (characters) Tj ET BT /F0 11 Tf 265.936 225.929 Td (into) Tj ET BT /F0 11 Tf 285.802 225.929 Td (lowercase) Tj ET BT /F0 11 Tf 332.530 225.929 Td (characters.) Tj ET BT /F0 11 Tf 385.363 225.929 Td (It) Tj ET BT /F0 11 Tf 394.834 225.929 Td (only) Tj ET BT /F0 11 Tf 417.142 225.929 Td (works) Tj ET BT /F0 11 Tf 446.776 225.929 Td (on) Tj ET BT /F0 11 Tf 460.526 225.929 Td (ASCII) Tj ET BT /F0 11 Tf 491.997 225.929 Td (characters.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 195.151 Td (BUILTIN) Tj ET BT /F1 11 Tf 117.227 195.151 Td (TABLES) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 179.762 Td (cp1252) Tj ET BT /F0 11 Tf 105.270 164.373 Td (A) Tj ET BT /F0 11 Tf 115.962 164.373 Td (translation) Tj ET BT /F0 11 Tf 165.154 164.373 Td (table) Tj ET BT /F0 11 Tf 189.288 164.373 Td (for) Tj ET BT /F0 11 Tf 204.864 164.373 Td (transliterating) Tj ET BT /F0 11 Tf 268.719 164.373 Td (CP-1252) Tj ET BT /F0 11 Tf 310.585 164.373 Td (characters) Tj ET BT /F0 11 Tf 357.918 164.373 Td (to) Tj ET BT /F0 11 Tf 369.226 164.373 Td (ASCII.) Tj ET BT /F0 11 Tf 406.197 164.373 Td (This) Tj ET BT /F0 11 Tf 428.505 164.373 Td (is) Tj ET BT /F0 11 Tf 438.592 164.373 Td (no) Tj ET BT /F0 11 Tf 452.342 164.373 Td (longer) Tj ET BT /F0 11 Tf 483.197 164.373 Td (a) Tj ET BT /F0 11 Tf 490.831 164.373 Td (common) Tj ET BT /F0 11 Tf 105.270 148.984 Td (use) Tj ET BT /F0 11 Tf 122.683 148.984 Td (case,) Tj ET BT /F0 11 Tf 147.114 148.984 Td (and) Tj ET BT /F0 11 Tf 165.748 148.984 Td (has) Tj ET BT /F0 11 Tf 183.161 148.984 Td (been) Tj ET BT /F0 11 Tf 206.679 148.984 Td (moved) Tj ET BT /F0 11 Tf 239.371 148.984 Td (to) Tj ET BT /F0 11 Tf 250.679 148.984 Td (a) Tj ET BT /F0 11 Tf 258.313 148.984 Td (separate) Tj ET BT /F0 11 Tf 297.099 148.984 Td (table.) Tj ET BT /F0 11 Tf 81.774 118.206 Td (iso8859_1) Tj ET BT /F0 11 Tf 105.270 102.817 Td (A) Tj ET BT /F0 11 Tf 115.962 102.817 Td (translation) Tj ET BT /F0 11 Tf 165.154 102.817 Td (table) Tj ET BT /F0 11 Tf 189.288 102.817 Td (for) Tj ET BT /F0 11 Tf 204.864 102.817 Td (transliterating) Tj ET BT /F0 11 Tf 268.719 102.817 Td (single-byte) Tj ET BT /F0 11 Tf 320.353 102.817 Td (characters) Tj ET BT /F0 11 Tf 367.686 102.817 Td (with) Tj ET BT /F0 11 Tf 389.994 102.817 Td (the) Tj ET BT /F0 11 Tf 406.186 102.817 Td (high) Tj ET BT /F0 11 Tf 428.494 102.817 Td (bit) Tj ET BT /F0 11 Tf 442.860 102.817 Td (set) Tj ET BT /F0 11 Tf 457.831 102.817 Td (from) Tj ET BT /F0 11 Tf 481.965 102.817 Td (ISO) Tj ET BT /F0 11 Tf 502.436 102.817 Td (8859-1) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (DETOXRC\(5\)) Tj ET BT /F0 11 Tf 261.371 739.233 Td (File) Tj ET BT /F0 11 Tf 281.237 739.233 Td (Formats) Tj ET BT /F0 11 Tf 320.045 739.233 Td (Manual) Tj ET BT /F0 11 Tf 479.490 739.233 Td (DETOXRC\(5\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 16 0 obj 12882 endobj 17 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 18 0 obj << /Type /Page /Parent 2 0 R /Resources 17 0 R /Contents 15 0 R >> endobj 19 0 obj << /Length 20 0 R >> stream BT /F0 11 Tf 105.270 702.988 Td (to) Tj ET BT /F0 11 Tf 116.578 702.988 Td (ASCII.) Tj ET BT /F0 11 Tf 81.774 672.210 Td (safe) Tj ET BT /F0 11 Tf 105.270 672.210 Td (A) Tj ET BT /F0 11 Tf 115.962 672.210 Td (replacement) Tj ET BT /F0 11 Tf 172.469 672.210 Td (table) Tj ET BT /F0 11 Tf 196.603 672.210 Td (for) Tj ET BT /F0 11 Tf 212.179 672.210 Td (characters) Tj ET BT /F0 11 Tf 259.512 672.210 Td (that) Tj ET BT /F0 11 Tf 278.762 672.210 Td (are) Tj ET BT /F0 11 Tf 294.943 672.210 Td (hard) Tj ET BT /F0 11 Tf 317.240 672.210 Td (to) Tj ET BT /F0 11 Tf 328.548 672.210 Td (work) Tj ET BT /F0 11 Tf 353.903 672.210 Td (with) Tj ET BT /F0 11 Tf 376.211 672.210 Td (under) Tj ET BT /F0 11 Tf 404.008 672.210 Td (Unix) Tj ET BT /F0 11 Tf 428.758 672.210 Td (and) Tj ET BT /F0 11 Tf 447.392 672.210 Td (Unix-like) Tj ET BT /F0 11 Tf 492.305 672.210 Td (OSs.) Tj ET BT /F0 11 Tf 81.774 641.432 Td (unicode) Tj ET BT /F0 11 Tf 105.270 626.043 Td (A) Tj ET BT /F0 11 Tf 115.962 626.043 Td (translation) Tj ET BT /F0 11 Tf 165.154 626.043 Td (table) Tj ET BT /F0 11 Tf 189.288 626.043 Td (for) Tj ET BT /F0 11 Tf 204.864 626.043 Td (transliterating) Tj ET BT /F0 11 Tf 268.719 626.043 Td (multi-byte) Tj ET BT /F0 11 Tf 317.306 626.043 Td (characters) Tj ET BT /F0 11 Tf 364.639 626.043 Td (encoded) Tj ET BT /F0 11 Tf 404.041 626.043 Td (in) Tj ET BT /F0 11 Tf 415.349 626.043 Td (UTF-8) Tj ET BT /F0 11 Tf 448.041 626.043 Td (to) Tj ET BT /F0 11 Tf 459.349 626.043 Td (ASCII.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 595.265 Td (EXAMPLES) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 579.876 Td (#) Tj ET BT /F0 11 Tf 90.024 579.876 Td (transliterate) Tj ET BT /F0 11 Tf 144.705 579.876 Td (UTF-8) Tj ET BT /F0 11 Tf 177.397 579.876 Td (to) Tj ET BT /F0 11 Tf 188.705 579.876 Td (ASCII) Tj ET BT /F0 11 Tf 220.176 579.876 Td (\(using) Tj ET BT /F0 11 Tf 250.426 579.876 Td (chained) Tj ET BT /F0 11 Tf 287.386 579.876 Td (tables\),) Tj ET BT /F0 11 Tf 322.212 579.876 Td (clean) Tj ET BT /F0 11 Tf 348.172 579.876 Td (up) Tj ET BT /F0 11 Tf 81.774 564.487 Td (sequence) Tj ET BT /F0 11 Tf 124.839 564.487 Td (utf8) Tj ET BT /F0 11 Tf 145.310 564.487 Td ({) Tj ET BT /F0 11 Tf 87.274 549.098 Td (utf_8) Tj ET BT /F0 11 Tf 113.245 549.098 Td ({) Tj ET BT /F0 11 Tf 92.774 533.709 Td (filename) Tj ET BT /F0 11 Tf 134.013 533.709 Td ("/usr/local/share/detox/custom.tbl";) Tj ET BT /F0 11 Tf 87.274 518.320 Td (};) Tj ET BT /F0 11 Tf 87.274 502.931 Td (utf_8) Tj ET BT /F0 11 Tf 113.245 502.931 Td ({) Tj ET BT /F0 11 Tf 92.774 487.542 Td (builtin) Tj ET BT /F0 11 Tf 124.256 487.542 Td ("unicode";) Tj ET BT /F0 11 Tf 87.274 472.153 Td (};) Tj ET BT /F0 11 Tf 87.274 456.764 Td (safe) Tj ET BT /F0 11 Tf 107.734 456.764 Td ({) Tj ET BT /F0 11 Tf 92.774 441.375 Td (builtin) Tj ET BT /F0 11 Tf 124.256 441.375 Td ("safe";) Tj ET BT /F0 11 Tf 87.274 425.986 Td (};) Tj ET BT /F0 11 Tf 87.274 410.597 Td (wipeup) Tj ET BT /F0 11 Tf 122.408 410.597 Td ({) Tj ET BT /F0 11 Tf 92.774 395.208 Td (remove_trailing;) Tj ET BT /F0 11 Tf 87.274 379.819 Td (};) Tj ET BT /F0 11 Tf 87.274 364.430 Td (max_length) Tj ET BT /F0 11 Tf 141.966 364.430 Td ({) Tj ET BT /F0 11 Tf 92.774 349.041 Td (length) Tj ET BT /F0 11 Tf 123.024 349.041 Td (128;) Tj ET BT /F0 11 Tf 87.274 333.652 Td (};) Tj ET BT /F0 11 Tf 81.774 318.263 Td (};) Tj ET BT /F0 11 Tf 81.774 302.874 Td (#) Tj ET BT /F0 11 Tf 90.024 302.874 Td (decode) Tj ET BT /F0 11 Tf 123.926 302.874 Td (CGI,) Tj ET BT /F0 11 Tf 148.368 302.874 Td (transliterate) Tj ET BT /F0 11 Tf 203.049 302.874 Td (CP-1252) Tj ET BT /F0 11 Tf 244.915 302.874 Td (to) Tj ET BT /F0 11 Tf 256.223 302.874 Td (ASCII,) Tj ET BT /F0 11 Tf 290.444 302.874 Td (clean) Tj ET BT /F0 11 Tf 316.404 302.874 Td (up) Tj ET BT /F0 11 Tf 81.774 287.485 Td (sequence) Tj ET BT /F0 11 Tf 124.839 287.485 Td ("cgi-cp1252") Tj ET BT /F0 11 Tf 186.054 287.485 Td ({) Tj ET BT /F0 11 Tf 87.274 272.096 Td (uncgi;) Tj ET BT /F0 11 Tf 87.274 256.707 Td (iso8859_1) Tj ET BT /F0 11 Tf 135.861 256.707 Td ({) Tj ET BT /F0 11 Tf 92.774 241.318 Td (builtin) Tj ET BT /F0 11 Tf 124.256 241.318 Td ("cp1252";) Tj ET BT /F0 11 Tf 87.274 225.929 Td (};) Tj ET BT /F0 11 Tf 87.274 210.540 Td (safe) Tj ET BT /F0 11 Tf 107.734 210.540 Td ({) Tj ET BT /F0 11 Tf 92.774 195.151 Td (builtin) Tj ET BT /F0 11 Tf 124.256 195.151 Td ("safe";) Tj ET BT /F0 11 Tf 87.274 179.762 Td (};) Tj ET BT /F0 11 Tf 81.774 164.373 Td (};) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 133.595 Td (SEE) Tj ET BT /F1 11 Tf 91.564 133.595 Td (ALSO) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 118.206 Td (detox\(1\),) Tj ET BT /F0 11 Tf 124.542 118.206 Td (inline-detox\(1\),) Tj ET BT /F0 11 Tf 196.031 118.206 Td (detox.tbl\(5\),) Tj ET BT /F0 11 Tf 253.165 118.206 Td (ascii\(7\),) Tj ET BT /F0 11 Tf 291.654 118.206 Td (iso_8859-1\(7\),) Tj ET BT /F0 11 Tf 359.480 118.206 Td (unicode\(7\),) Tj ET BT /F0 11 Tf 412.632 118.206 Td (utf-8\(7\)) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (DETOXRC\(5\)) Tj ET BT /F0 11 Tf 261.371 739.233 Td (File) Tj ET BT /F0 11 Tf 281.237 739.233 Td (Formats) Tj ET BT /F0 11 Tf 320.045 739.233 Td (Manual) Tj ET BT /F0 11 Tf 479.490 739.233 Td (DETOXRC\(5\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 20 0 obj 5371 endobj 21 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 22 0 obj << /Type /Page /Parent 2 0 R /Resources 21 0 R /Contents 19 0 R >> endobj 23 0 obj << /Length 24 0 R >> stream BT /F1 11 Tf 68.024 702.988 Td (AUTHORS) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 687.599 Td (detox) Tj ET BT /F0 11 Tf 108.966 687.599 Td (was) Tj ET BT /F0 11 Tf 128.821 687.599 Td (written) Tj ET BT /F0 11 Tf 162.734 687.599 Td (by) Tj ET BT /F0 11 Tf 176.484 687.599 Td (Doug) Tj ET BT /F0 11 Tf 203.676 687.599 Td (Harple.) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (DETOXRC\(5\)) Tj ET BT /F0 11 Tf 261.371 739.233 Td (File) Tj ET BT /F0 11 Tf 281.237 739.233 Td (Formats) Tj ET BT /F0 11 Tf 320.045 739.233 Td (Manual) Tj ET BT /F0 11 Tf 479.490 739.233 Td (DETOXRC\(5\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 24 0 obj 811 endobj 25 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 26 0 obj << /Type /Page /Parent 2 0 R /Resources 25 0 R /Contents 23 0 R >> endobj 2 0 obj << /Type /Pages /MediaBox [0 0 612 790] /Count 5 /Kids [ 10 0 R 14 0 R 18 0 R 22 0 R 26 0 R] >> endobj 27 0 obj << /Type /Catalog /Pages 2 0 R >> endobj xref 0 28 0000000000 65535 f 0000000009 00000 n 0000048463 00000 n 0000000030 00000 n 0000000112 00000 n 0000000193 00000 n 0000000276 00000 n 0000000363 00000 n 0000014785 00000 n 0000014806 00000 n 0000014901 00000 n 0000014982 00000 n 0000028436 00000 n 0000028458 00000 n 0000028554 00000 n 0000028637 00000 n 0000041573 00000 n 0000041595 00000 n 0000041691 00000 n 0000041774 00000 n 0000047199 00000 n 0000047220 00000 n 0000047316 00000 n 0000047399 00000 n 0000048264 00000 n 0000048284 00000 n 0000048380 00000 n 0000048574 00000 n trailer << /Size 28 /Root 27 0 R /Info 1 0 R >> startxref 48624 %%EOF detox-2.0.0/man/inline-detox.1000066400000000000000000000077361460212773400161130ustar00rootroot00000000000000.\" .\" This file is part of the Detox package. .\" .\" Copyright (c) Doug Harple .\" .\" For the full copyright and license information, please view the LICENSE .\" file that was distributed with this source code. .\" .Dd February 24, 2021 .Dt INLINE-DETOX 1 .Os .Sh NAME .Nm inline-detox .Nd clean up filenames (stream-based) .Sh SYNOPSIS .Nm .Op Fl f Pa configfile .Op Fl s Ar sequence .Op Fl v .Nm .Op Fl f Pa configfile .Op Fl s Ar sequence .Op Fl v .Ar .Nm .Op Fl L .Op Fl f Pa configfile .Op Fl v .Nm .Op Fl h | -help .Nm .Op Fl V .Sh DESCRIPTION The .Nm utility generates new filenames to make them easier to work with under Unix and Unix-like operating systems. It replaces characters that make it hard to type out a filename with dashes and underscores. It also provides transliteration-based filters, converting ISO 8859-1 or UTF-8 to ASCII, in part or in whole. An additional filter unescapes CGI-escaped filenames. .Pp .Nm reads filename(s) from the input stream and writes the updated filename(s) to the output stream. .Pp If a filename is passed on the command line, .Nm reads this file and processes each line before writing it to the output stream. .Pp Running .Cm detox .Fl -inline is identical to running .Nm . .Ss Sequences .Nm is driven by a configurable series of filters, called a sequence. Sequences are covered in more detail in .Xr detoxrc 5 and are discoverable with the .Fl L option. The default sequence will run the .Ar safe and .Ar wipeup filters. Other examples of pre-configured sequences are .Ar iso8859_1 and .Ar utf_8 , which both provide transliteration to ASCII and then finish with the .Ar safe and .Ar wipeup filters. .Ss Options .Bl -tag -width Fl .It Fl f Pa configfile Use .Pa configfile instead of the default configuration files for loading translation sequences. No other config file will be parsed. .It Fl h , -help Display helpful information. .It Fl L List the currently available sequences. When paired with .Fl v this option shows what filters are used in each sequence and any properties applied to the filters. .It Fl s Ar sequence Use .Ar sequence instead of .Cm default . .It Fl v Be verbose about which files are being renamed. .It Fl V Show the current version of .Nm . .El .Sh FILES .Bl -tag -width Fl .It Pa /etc/detoxrc The system-wide detoxrc file. .It Pa ~/.detoxrc A user's personal detoxrc. Normally it extends the system-wide .Pa detoxrc , unless .Fl f has been specified, in which case, it is ignored. .It Pa /usr/share/detox/cp1252.tbl The provided CP-1252 transliteration table. .It Pa /usr/share/detox/iso8859_1.tbl The provided ISO 8859-1 transliteration table. .It Pa /usr/share/detox/safe.tbl The provided safe character translation table. .It Pa /usr/share/detox/unicode.tbl The provided Unicode transliteration table, used by the UTF-8 filter. .It Pa /usr/share/detox/unidecode.tbl An additional Unicode tranlsiteration table, based on .Xr Text::Unidecode 3pm . .El .Sh EXAMPLES .Bl -tag -width Fl .It echo "Foo Bar" | Nm Fl s Ar lower Fl v Will run the sequence .Ar lower , listing any changes and returning the result to the output stream. .El .Sh SEE ALSO .Xr detox 1 , .Xr Text::Unidecode 3pm , .Xr detox.tbl 5 , .Xr detoxrc 5 , .Xr ascii 7 , .Xr iso_8859-1 7 , .Xr unicode 7 , .Xr utf-8 7 .Sh HISTORY .Nm was originally designed to clean up files that I had received from friends which had been created using other operating systems. It's trivial to create a filename with spaces, parenthesis, brackets, and ampersands under some operating systems. These have special meaning within .Fx and Linux, and cause problems when you go to access them. I created .Nm to clean up these files. .Pp Version 2.0 stepped back from transliteration out of the box, instead focusing on ease of use. The primary motivations for this were user-provided feedback, and the fact that many modern Unix-like OSs use UTF-8 as their primary character set. Transliterating from UTF-8 to ASCII in this scenario is lossy and pointless. .Sh AUTHORS .Nm was written by .An Doug Harple . detox-2.0.0/man/inline-detox.1.pdf000066400000000000000000000713211460212773400166520ustar00rootroot00000000000000%PDF-1.1 1 0 obj << >> endobj 3 0 obj << /Type /Font /Subtype /Type1 /Name /F0 /BaseFont /Times-Roman >> endobj 4 0 obj << /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Times-Bold >> endobj 5 0 obj << /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Times-Italic >> endobj 6 0 obj << /Type /Font /Subtype /Type1 /Name /F3 /BaseFont /Times-BoldItalic >> endobj 7 0 obj << /Length 8 0 R >> stream BT /F1 11 Tf 68.024 702.988 Td (NAME) Tj ET BT /F1 11 Tf 81.774 687.599 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 140.140 687.599 Td (-) Tj ET BT /F0 11 Tf 146.553 687.599 Td (clean) Tj ET BT /F0 11 Tf 172.513 687.599 Td (up) Tj ET BT /F0 11 Tf 186.263 687.599 Td (filenames) Tj ET BT /F0 11 Tf 231.781 687.599 Td (\(stream-based\)) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 656.821 Td (SYNOPSIS) Tj ET BT /F1 11 Tf 81.774 641.432 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 140.140 641.432 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 143.803 641.432 Td (-f) Tj ET /F2 11 Tf BT /F2 11 Tf 153.879 641.432 Td (configfile) Tj ET /F0 11 Tf BT /F0 11 Tf 195.437 641.432 Td (]) Tj ET BT /F0 11 Tf 201.850 641.432 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 205.513 641.432 Td (-s) Tj ET /F2 11 Tf BT /F2 11 Tf 216.205 641.432 Td (sequence) Tj ET /F0 11 Tf BT /F0 11 Tf 256.520 641.432 Td (]) Tj ET BT /F0 11 Tf 262.933 641.432 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 266.596 641.432 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 275.759 641.432 Td (]) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 626.043 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 140.140 626.043 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 143.803 626.043 Td (-f) Tj ET /F2 11 Tf BT /F2 11 Tf 153.879 626.043 Td (configfile) Tj ET /F0 11 Tf BT /F0 11 Tf 195.437 626.043 Td (]) Tj ET BT /F0 11 Tf 201.850 626.043 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 205.513 626.043 Td (-s) Tj ET /F2 11 Tf BT /F2 11 Tf 216.205 626.043 Td (sequence) Tj ET /F0 11 Tf BT /F0 11 Tf 256.520 626.043 Td (]) Tj ET BT /F0 11 Tf 262.933 626.043 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 266.596 626.043 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 275.759 626.043 Td (]) Tj ET /F2 11 Tf BT /F2 11 Tf 282.172 626.043 Td (file) Tj ET BT /F2 11 Tf 298.980 626.043 Td (...) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 610.654 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 140.140 610.654 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 143.803 610.654 Td (-L) Tj ET /F0 11 Tf BT /F0 11 Tf 154.803 610.654 Td (]) Tj ET BT /F0 11 Tf 161.216 610.654 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 164.879 610.654 Td (-f) Tj ET /F2 11 Tf BT /F2 11 Tf 174.955 610.654 Td (configfile) Tj ET /F0 11 Tf BT /F0 11 Tf 216.513 610.654 Td (]) Tj ET BT /F0 11 Tf 222.926 610.654 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 226.589 610.654 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 235.752 610.654 Td (]) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 595.265 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 140.140 595.265 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 143.803 595.265 Td (-h) Tj ET /F0 11 Tf BT /F0 11 Tf 156.332 595.265 Td (|) Tj ET /F1 11 Tf BT /F1 11 Tf 161.282 595.265 Td (--help) Tj ET /F0 11 Tf BT /F0 11 Tf 188.782 595.265 Td (]) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 579.876 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 140.140 579.876 Td ([) Tj ET /F1 11 Tf BT /F1 11 Tf 143.803 579.876 Td (-V) Tj ET /F0 11 Tf BT /F0 11 Tf 155.408 579.876 Td (]) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 549.098 Td (DESCRIPTION) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 533.709 Td (The) Tj ET /F1 11 Tf BT /F1 11 Tf 101.629 533.709 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 159.995 533.709 Td (utility) Tj ET BT /F0 11 Tf 189.035 533.709 Td (generates) Tj ET BT /F0 11 Tf 233.321 533.709 Td (new) Tj ET BT /F0 11 Tf 254.397 533.709 Td (filenames) Tj ET BT /F0 11 Tf 299.915 533.709 Td (to) Tj ET BT /F0 11 Tf 311.223 533.709 Td (make) Tj ET BT /F0 11 Tf 337.799 533.709 Td (them) Tj ET BT /F0 11 Tf 362.549 533.709 Td (easier) Tj ET BT /F0 11 Tf 390.951 533.709 Td (to) Tj ET BT /F0 11 Tf 402.259 533.709 Td (work) Tj ET BT /F0 11 Tf 427.614 533.709 Td (with) Tj ET BT /F0 11 Tf 449.922 533.709 Td (under) Tj ET BT /F0 11 Tf 477.719 533.709 Td (Unix) Tj ET BT /F0 11 Tf 502.469 533.709 Td (and) Tj ET BT /F0 11 Tf 521.103 533.709 Td (Unix-) Tj ET BT /F0 11 Tf 81.774 518.320 Td (like) Tj ET BT /F0 11 Tf 101.024 518.320 Td (operating) Tj ET BT /F0 11 Tf 145.321 518.320 Td (systems.) Tj ET BT /F0 11 Tf 188.408 518.320 Td (It) Tj ET BT /F0 11 Tf 197.879 518.320 Td (replaces) Tj ET BT /F0 11 Tf 236.665 518.320 Td (characters) Tj ET BT /F0 11 Tf 283.998 518.320 Td (that) Tj ET BT /F0 11 Tf 303.248 518.320 Td (make) Tj ET BT /F0 11 Tf 329.824 518.320 Td (it) Tj ET BT /F0 11 Tf 338.690 518.320 Td (hard) Tj ET BT /F0 11 Tf 360.987 518.320 Td (to) Tj ET BT /F0 11 Tf 372.295 518.320 Td (type) Tj ET BT /F0 11 Tf 393.987 518.320 Td (out) Tj ET BT /F0 11 Tf 410.795 518.320 Td (a) Tj ET BT /F0 11 Tf 418.429 518.320 Td (filename) Tj ET BT /F0 11 Tf 459.668 518.320 Td (with) Tj ET BT /F0 11 Tf 481.976 518.320 Td (dashes) Tj ET BT /F0 11 Tf 514.052 518.320 Td (and) Tj ET BT /F0 11 Tf 81.774 502.931 Td (underscores.) Tj ET BT /F0 11 Tf 142.560 502.931 Td (It) Tj ET BT /F0 11 Tf 152.031 502.931 Td (also) Tj ET BT /F0 11 Tf 172.502 502.931 Td (provides) Tj ET BT /F0 11 Tf 213.136 502.931 Td (transliteration-based) Tj ET BT /F0 11 Tf 305.701 502.931 Td (filters,) Tj ET BT /F0 11 Tf 336.864 502.931 Td (converting) Tj ET BT /F0 11 Tf 386.661 502.931 Td (ISO) Tj ET BT /F0 11 Tf 407.132 502.931 Td (8859-1) Tj ET BT /F0 11 Tf 441.045 502.931 Td (or) Tj ET BT /F0 11 Tf 452.958 502.931 Td (UTF-8) Tj ET BT /F0 11 Tf 485.650 502.931 Td (to) Tj ET BT /F0 11 Tf 496.958 502.931 Td (ASCII,) Tj ET BT /F0 11 Tf 531.179 502.931 Td (in) Tj ET BT /F0 11 Tf 81.774 487.542 Td (part) Tj ET BT /F0 11 Tf 101.629 487.542 Td (or) Tj ET BT /F0 11 Tf 113.542 487.542 Td (in) Tj ET BT /F0 11 Tf 124.850 487.542 Td (whole.) Tj ET BT /F0 11 Tf 159.984 487.542 Td (An) Tj ET BT /F0 11 Tf 176.176 487.542 Td (additional) Tj ET BT /F0 11 Tf 222.926 487.542 Td (filter) Tj ET BT /F0 11 Tf 247.060 487.542 Td (unescapes) Tj ET BT /F0 11 Tf 294.404 487.542 Td (CGI-escaped) Tj ET BT /F0 11 Tf 354.574 487.542 Td (filenames.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 456.764 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 140.140 456.764 Td (reads) Tj ET BT /F0 11 Tf 166.100 456.764 Td (filename\(s\)) Tj ET BT /F0 11 Tf 218.944 456.764 Td (from) Tj ET BT /F0 11 Tf 243.078 456.764 Td (the) Tj ET BT /F0 11 Tf 259.270 456.764 Td (input) Tj ET BT /F0 11 Tf 284.636 456.764 Td (stream) Tj ET BT /F0 11 Tf 316.712 456.764 Td (and) Tj ET BT /F0 11 Tf 335.346 456.764 Td (writes) Tj ET BT /F0 11 Tf 364.980 456.764 Td (the) Tj ET BT /F0 11 Tf 381.172 456.764 Td (updated) Tj ET BT /F0 11 Tf 418.748 456.764 Td (filename\(s\)) Tj ET BT /F0 11 Tf 471.592 456.764 Td (to) Tj ET BT /F0 11 Tf 482.900 456.764 Td (the) Tj ET BT /F0 11 Tf 499.092 456.764 Td (output) Tj ET BT /F0 11 Tf 81.774 441.375 Td (stream.) Tj ET BT /F0 11 Tf 81.774 410.597 Td (If) Tj ET BT /F0 11 Tf 91.850 410.597 Td (a) Tj ET BT /F0 11 Tf 99.484 410.597 Td (filename) Tj ET BT /F0 11 Tf 140.723 410.597 Td (is) Tj ET BT /F0 11 Tf 150.810 410.597 Td (passed) Tj ET BT /F0 11 Tf 182.886 410.597 Td (on) Tj ET BT /F0 11 Tf 196.636 410.597 Td (the) Tj ET BT /F0 11 Tf 212.828 410.597 Td (command) Tj ET BT /F0 11 Tf 258.962 410.597 Td (line,) Tj ET /F1 11 Tf BT /F1 11 Tf 280.962 410.597 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 339.328 410.597 Td (reads) Tj ET BT /F0 11 Tf 365.288 410.597 Td (this) Tj ET BT /F0 11 Tf 383.933 410.597 Td (file) Tj ET BT /F0 11 Tf 401.346 410.597 Td (and) Tj ET BT /F0 11 Tf 419.980 410.597 Td (processes) Tj ET BT /F0 11 Tf 464.882 410.597 Td (each) Tj ET BT /F0 11 Tf 487.784 410.597 Td (line) Tj ET BT /F0 11 Tf 507.034 410.597 Td (before) Tj ET BT /F0 11 Tf 81.774 395.208 Td (writing) Tj ET BT /F0 11 Tf 116.303 395.208 Td (it) Tj ET BT /F0 11 Tf 125.169 395.208 Td (to) Tj ET BT /F0 11 Tf 136.477 395.208 Td (the) Tj ET BT /F0 11 Tf 152.669 395.208 Td (output) Tj ET BT /F0 11 Tf 183.535 395.208 Td (stream.) Tj ET BT /F0 11 Tf 81.774 364.430 Td (Running) Tj ET /F1 11 Tf BT /F1 11 Tf 122.419 364.430 Td (detox) Tj ET BT /F1 11 Tf 150.832 364.430 Td (--inline) Tj ET /F0 11 Tf BT /F0 11 Tf 187.198 364.430 Td (is) Tj ET BT /F0 11 Tf 197.285 364.430 Td (identical) Tj ET BT /F0 11 Tf 237.919 364.430 Td (to) Tj ET BT /F0 11 Tf 249.227 364.430 Td (running) Tj ET /F1 11 Tf BT /F1 11 Tf 286.198 364.430 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 341.814 364.430 Td (.) Tj ET /F1 11 Tf BT /F1 11 Tf 76.274 333.652 Td (Sequences) Tj ET BT /F1 11 Tf 81.774 318.263 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 140.140 318.263 Td (is) Tj ET BT /F0 11 Tf 150.227 318.263 Td (driven) Tj ET BT /F0 11 Tf 181.082 318.263 Td (by) Tj ET BT /F0 11 Tf 194.832 318.263 Td (a) Tj ET BT /F0 11 Tf 202.466 318.263 Td (configurable) Tj ET BT /F0 11 Tf 260.810 318.263 Td (series) Tj ET BT /F0 11 Tf 288.607 318.263 Td (of) Tj ET BT /F0 11 Tf 300.520 318.263 Td (filters,) Tj ET BT /F0 11 Tf 331.683 318.263 Td (called) Tj ET BT /F0 11 Tf 360.701 318.263 Td (a) Tj ET BT /F0 11 Tf 368.335 318.263 Td (sequence.) Tj ET BT /F0 11 Tf 416.900 318.263 Td (Sequences) Tj ET BT /F0 11 Tf 466.081 318.263 Td (are) Tj ET BT /F0 11 Tf 482.262 318.263 Td (covered) Tj ET BT /F0 11 Tf 519.827 318.263 Td (in) Tj ET BT /F0 11 Tf 81.774 302.874 Td (more) Tj ET BT /F0 11 Tf 107.129 302.874 Td (detail) Tj ET BT /F0 11 Tf 134.321 302.874 Td (in) Tj ET BT /F0 11 Tf 145.629 302.874 Td (detoxrc\(5\)) Tj ET BT /F0 11 Tf 194.194 302.874 Td (and) Tj ET BT /F0 11 Tf 212.828 302.874 Td (are) Tj ET BT /F0 11 Tf 229.009 302.874 Td (discoverable) Tj ET BT /F0 11 Tf 287.353 302.874 Td (with) Tj ET BT /F0 11 Tf 309.661 302.874 Td (the) Tj ET /F1 11 Tf BT /F1 11 Tf 325.853 302.874 Td (-L) Tj ET /F0 11 Tf BT /F0 11 Tf 339.603 302.874 Td (option.) Tj ET BT /F0 11 Tf 375.969 302.874 Td (The) Tj ET BT /F0 11 Tf 395.824 302.874 Td (default) Tj ET BT /F0 11 Tf 429.121 302.874 Td (sequence) Tj ET BT /F0 11 Tf 472.186 302.874 Td (will) Tj ET BT /F0 11 Tf 492.052 302.874 Td (run) Tj ET BT /F0 11 Tf 509.465 302.874 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 525.657 302.874 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 287.485 Td (and) Tj ET /F2 11 Tf BT /F2 11 Tf 100.408 287.485 Td (wipeup) Tj ET /F0 11 Tf BT /F0 11 Tf 134.937 287.485 Td (filters.) Tj ET BT /F0 11 Tf 168.850 287.485 Td (Other) Tj ET BT /F0 11 Tf 196.647 287.485 Td (examples) Tj ET BT /F0 11 Tf 240.944 287.485 Td (of) Tj ET BT /F0 11 Tf 252.857 287.485 Td (pre-configured) Tj ET BT /F0 11 Tf 320.969 287.485 Td (sequences) Tj ET BT /F0 11 Tf 368.313 287.485 Td (are) Tj ET /F2 11 Tf BT /F2 11 Tf 384.494 287.485 Td (iso8859_1) Tj ET /F0 11 Tf BT /F0 11 Tf 433.081 287.485 Td (and) Tj ET /F2 11 Tf BT /F2 11 Tf 451.715 287.485 Td (utf_8) Tj ET /F0 11 Tf BT /F0 11 Tf 474.331 287.485 Td (,) Tj ET BT /F0 11 Tf 479.831 287.485 Td (which) Tj ET BT /F0 11 Tf 509.465 287.485 Td (both) Tj ET BT /F0 11 Tf 81.774 272.096 Td (provide) Tj ET BT /F0 11 Tf 118.129 272.096 Td (transliteration) Tj ET BT /F0 11 Tf 181.984 272.096 Td (to) Tj ET BT /F0 11 Tf 193.292 272.096 Td (ASCII) Tj ET BT /F0 11 Tf 224.763 272.096 Td (and) Tj ET BT /F0 11 Tf 243.397 272.096 Td (then) Tj ET BT /F0 11 Tf 265.089 272.096 Td (finish) Tj ET BT /F0 11 Tf 292.897 272.096 Td (with) Tj ET BT /F0 11 Tf 315.205 272.096 Td (the) Tj ET /F2 11 Tf BT /F2 11 Tf 331.397 272.096 Td (safe) Tj ET /F0 11 Tf BT /F0 11 Tf 351.868 272.096 Td (and) Tj ET /F2 11 Tf BT /F2 11 Tf 370.502 272.096 Td (wipeup) Tj ET /F0 11 Tf BT /F0 11 Tf 405.031 272.096 Td (filters.) Tj ET /F1 11 Tf BT /F1 11 Tf 76.274 241.318 Td (Options) Tj ET BT /F1 11 Tf 81.774 225.929 Td (-f) Tj ET /F2 11 Tf BT /F2 11 Tf 91.850 225.929 Td (configfile) Tj ET /F0 11 Tf BT /F0 11 Tf 141.064 225.929 Td (Use) Tj ET /F2 11 Tf BT /F2 11 Tf 160.919 225.929 Td (configfile) Tj ET /F0 11 Tf BT /F0 11 Tf 205.227 225.929 Td (instead) Tj ET BT /F0 11 Tf 239.140 225.929 Td (of) Tj ET BT /F0 11 Tf 251.053 225.929 Td (the) Tj ET BT /F0 11 Tf 267.245 225.929 Td (default) Tj ET BT /F0 11 Tf 300.542 225.929 Td (configuration) Tj ET BT /F0 11 Tf 362.560 225.929 Td (files) Tj ET BT /F0 11 Tf 384.252 225.929 Td (for) Tj ET BT /F0 11 Tf 399.828 225.929 Td (loading) Tj ET BT /F0 11 Tf 435.578 225.929 Td (translation) Tj ET BT /F0 11 Tf 484.770 225.929 Td (sequences.) Tj ET BT /F0 11 Tf 142.274 210.540 Td (No) Tj ET BT /F0 11 Tf 158.466 210.540 Td (other) Tj ET BT /F0 11 Tf 183.821 210.540 Td (config) Tj ET BT /F0 11 Tf 214.676 210.540 Td (file) Tj ET BT /F0 11 Tf 232.089 210.540 Td (will) Tj ET BT /F0 11 Tf 251.955 210.540 Td (be) Tj ET BT /F0 11 Tf 265.089 210.540 Td (parsed.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 179.762 Td (-h) Tj ET /F0 11 Tf BT /F0 11 Tf 91.553 179.762 Td (,) Tj ET /F1 11 Tf BT /F1 11 Tf 97.053 179.762 Td (--help) Tj ET /F0 11 Tf BT /F0 11 Tf 144.122 179.762 Td (Display) Tj ET BT /F0 11 Tf 181.093 179.762 Td (helpful) Tj ET BT /F0 11 Tf 215.006 179.762 Td (information.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 148.984 Td (-L) Tj ET /F0 11 Tf BT /F0 11 Tf 142.890 148.984 Td (List) Tj ET BT /F0 11 Tf 162.756 148.984 Td (the) Tj ET BT /F0 11 Tf 178.948 148.984 Td (currently) Tj ET BT /F0 11 Tf 221.408 148.984 Td (available) Tj ET BT /F0 11 Tf 263.868 148.984 Td (sequences.) Tj ET BT /F0 11 Tf 316.712 148.984 Td (When) Tj ET BT /F0 11 Tf 345.730 148.984 Td (paired) Tj ET BT /F0 11 Tf 375.969 148.984 Td (with) Tj ET /F1 11 Tf BT /F1 11 Tf 398.277 148.984 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 410.190 148.984 Td (this) Tj ET BT /F0 11 Tf 428.835 148.984 Td (option) Tj ET BT /F0 11 Tf 459.701 148.984 Td (shows) Tj ET BT /F0 11 Tf 489.951 148.984 Td (what) Tj ET BT /F0 11 Tf 514.085 148.984 Td (filters) Tj ET BT /F0 11 Tf 142.274 133.595 Td (are) Tj ET BT /F0 11 Tf 158.455 133.595 Td (used) Tj ET BT /F0 11 Tf 181.368 133.595 Td (in) Tj ET BT /F0 11 Tf 192.676 133.595 Td (each) Tj ET BT /F0 11 Tf 215.578 133.595 Td (sequence) Tj ET BT /F0 11 Tf 258.643 133.595 Td (and) Tj ET BT /F0 11 Tf 277.277 133.595 Td (any) Tj ET BT /F0 11 Tf 295.911 133.595 Td (properties) Tj ET BT /F0 11 Tf 342.650 133.595 Td (applied) Tj ET BT /F0 11 Tf 377.784 133.595 Td (to) Tj ET BT /F0 11 Tf 389.092 133.595 Td (the) Tj ET BT /F0 11 Tf 405.284 133.595 Td (filters.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 102.817 Td (-s) Tj ET /F2 11 Tf BT /F2 11 Tf 92.466 102.817 Td (sequence) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 102.817 Td (Use) Tj ET /F2 11 Tf BT /F2 11 Tf 162.129 102.817 Td (sequence) Tj ET /F0 11 Tf BT /F0 11 Tf 205.194 102.817 Td (instead) Tj ET BT /F0 11 Tf 239.107 102.817 Td (of) Tj ET /F1 11 Tf BT /F1 11 Tf 251.020 102.817 Td (default) Tj ET /F0 11 Tf BT /F0 11 Tf 284.020 102.817 Td (.) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (INLINE-DETOX\(1\)) Tj ET BT /F0 11 Tf 245.498 739.233 Td (General) Tj ET BT /F0 11 Tf 283.063 739.233 Td (Commands) Tj ET BT /F0 11 Tf 335.929 739.233 Td (Manual) Tj ET BT /F0 11 Tf 453.849 739.233 Td (INLINE-DETOX\(1\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 8 0 obj 14744 endobj 9 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 10 0 obj << /Type /Page /Parent 2 0 R /Resources 9 0 R /Contents 7 0 R >> endobj 11 0 obj << /Length 12 0 R >> stream BT /F1 11 Tf 81.774 702.988 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 702.988 Td (Be) Tj ET BT /F0 11 Tf 157.245 702.988 Td (verbose) Tj ET BT /F0 11 Tf 194.205 702.988 Td (about) Tj ET BT /F0 11 Tf 221.397 702.988 Td (which) Tj ET BT /F0 11 Tf 251.031 702.988 Td (files) Tj ET BT /F0 11 Tf 272.723 702.988 Td (are) Tj ET BT /F0 11 Tf 288.904 702.988 Td (being) Tj ET BT /F0 11 Tf 316.096 702.988 Td (renamed.) Tj ET /F1 11 Tf BT /F1 11 Tf 81.774 672.210 Td (-V) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 672.210 Td (Show) Tj ET BT /F0 11 Tf 170.082 672.210 Td (the) Tj ET BT /F0 11 Tf 186.274 672.210 Td (current) Tj ET BT /F0 11 Tf 220.176 672.210 Td (version) Tj ET BT /F0 11 Tf 255.310 672.210 Td (of) Tj ET /F1 11 Tf BT /F1 11 Tf 267.223 672.210 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 322.839 672.210 Td (.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 641.432 Td (FILES) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 626.043 Td (/etc/detoxrc) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 626.043 Td (The) Tj ET BT /F0 11 Tf 162.129 626.043 Td (system-wide) Tj ET BT /F0 11 Tf 220.484 626.043 Td (detoxrc) Tj ET BT /F0 11 Tf 256.223 626.043 Td (file.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 595.265 Td (~/.detoxrc) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 595.265 Td (A) Tj ET BT /F0 11 Tf 152.966 595.265 Td (user's) Tj ET BT /F0 11 Tf 181.984 595.265 Td (personal) Tj ET BT /F0 11 Tf 222.002 595.265 Td (detoxrc.) Tj ET BT /F0 11 Tf 263.241 595.265 Td (Normally) Tj ET BT /F0 11 Tf 308.154 595.265 Td (it) Tj ET BT /F0 11 Tf 317.020 595.265 Td (extends) Tj ET BT /F0 11 Tf 353.375 595.265 Td (the) Tj ET BT /F0 11 Tf 369.567 595.265 Td (system-wide) Tj ET /F2 11 Tf BT /F2 11 Tf 427.922 595.265 Td (detoxrc) Tj ET /F0 11 Tf BT /F0 11 Tf 460.911 595.265 Td (,) Tj ET BT /F0 11 Tf 466.411 595.265 Td (unless) Tj ET /F1 11 Tf BT /F1 11 Tf 496.661 595.265 Td (-f) Tj ET /F0 11 Tf BT /F0 11 Tf 506.737 595.265 Td (has) Tj ET BT /F0 11 Tf 142.274 579.876 Td (been) Tj ET BT /F0 11 Tf 165.792 579.876 Td (specified,) Tj ET BT /F0 11 Tf 211.002 579.876 Td (in) Tj ET BT /F0 11 Tf 222.310 579.876 Td (which) Tj ET BT /F0 11 Tf 251.944 579.876 Td (case,) Tj ET BT /F0 11 Tf 276.375 579.876 Td (it) Tj ET BT /F0 11 Tf 285.241 579.876 Td (is) Tj ET BT /F0 11 Tf 295.328 579.876 Td (ignored.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 549.098 Td (/usr/share/detox/cp1252.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 533.709 Td (The) Tj ET BT /F0 11 Tf 162.129 533.709 Td (provided) Tj ET BT /F0 11 Tf 203.984 533.709 Td (CP-1252) Tj ET BT /F0 11 Tf 245.850 533.709 Td (transliteration) Tj ET BT /F0 11 Tf 309.705 533.709 Td (table.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 502.931 Td (/usr/share/detox/iso8859_1.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 487.542 Td (The) Tj ET BT /F0 11 Tf 162.129 487.542 Td (provided) Tj ET BT /F0 11 Tf 203.984 487.542 Td (ISO) Tj ET BT /F0 11 Tf 224.455 487.542 Td (8859-1) Tj ET BT /F0 11 Tf 258.368 487.542 Td (transliteration) Tj ET BT /F0 11 Tf 322.223 487.542 Td (table.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 456.764 Td (/usr/share/detox/safe.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 441.375 Td (The) Tj ET BT /F0 11 Tf 162.129 441.375 Td (provided) Tj ET BT /F0 11 Tf 203.984 441.375 Td (safe) Tj ET BT /F0 11 Tf 224.444 441.375 Td (character) Tj ET BT /F0 11 Tf 267.498 441.375 Td (translation) Tj ET BT /F0 11 Tf 316.690 441.375 Td (table.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 410.597 Td (/usr/share/detox/unicode.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 395.208 Td (The) Tj ET BT /F0 11 Tf 162.129 395.208 Td (provided) Tj ET BT /F0 11 Tf 203.984 395.208 Td (Unicode) Tj ET BT /F0 11 Tf 244.002 395.208 Td (transliteration) Tj ET BT /F0 11 Tf 307.857 395.208 Td (table,) Tj ET BT /F0 11 Tf 334.741 395.208 Td (used) Tj ET BT /F0 11 Tf 357.654 395.208 Td (by) Tj ET BT /F0 11 Tf 371.404 395.208 Td (the) Tj ET BT /F0 11 Tf 387.596 395.208 Td (UTF-8) Tj ET BT /F0 11 Tf 420.288 395.208 Td (filter.) Tj ET /F2 11 Tf BT /F2 11 Tf 81.774 364.430 Td (/usr/share/detox/unidecode.tbl) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 349.041 Td (An) Tj ET BT /F0 11 Tf 158.466 349.041 Td (additional) Tj ET BT /F0 11 Tf 205.216 349.041 Td (Unicode) Tj ET BT /F0 11 Tf 245.234 349.041 Td (tranlsiteration) Tj ET BT /F0 11 Tf 309.089 349.041 Td (table,) Tj ET BT /F0 11 Tf 335.973 349.041 Td (based) Tj ET BT /F0 11 Tf 363.770 349.041 Td (on) Tj ET BT /F0 11 Tf 377.520 349.041 Td (Text::Unidecode\(3pm\).) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 318.263 Td (EXAMPLES) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 302.874 Td (echo) Tj ET BT /F0 11 Tf 105.292 302.874 Td (Foo) Tj ET BT /F0 11 Tf 125.158 302.874 Td (Bar) Tj ET BT /F0 11 Tf 143.792 302.874 Td (|) Tj ET /F1 11 Tf BT /F1 11 Tf 148.742 302.874 Td (inline-detox) Tj ET BT /F1 11 Tf 207.108 302.874 Td (-s) Tj ET /F2 11 Tf BT /F2 11 Tf 217.800 302.874 Td (lower) Tj ET /F1 11 Tf BT /F1 11 Tf 245.608 302.874 Td (-v) Tj ET /F0 11 Tf BT /F0 11 Tf 142.274 287.485 Td (Will) Tj ET BT /F0 11 Tf 164.582 287.485 Td (run) Tj ET BT /F0 11 Tf 181.995 287.485 Td (the) Tj ET BT /F0 11 Tf 198.187 287.485 Td (sequence) Tj ET /F2 11 Tf BT /F2 11 Tf 241.252 287.485 Td (lower) Tj ET /F0 11 Tf BT /F0 11 Tf 266.310 287.485 Td (,) Tj ET BT /F0 11 Tf 271.810 287.485 Td (listing) Tj ET BT /F0 11 Tf 302.071 287.485 Td (any) Tj ET BT /F0 11 Tf 320.705 287.485 Td (changes) Tj ET BT /F0 11 Tf 358.886 287.485 Td (and) Tj ET BT /F0 11 Tf 377.520 287.485 Td (returning) Tj ET BT /F0 11 Tf 420.596 287.485 Td (the) Tj ET BT /F0 11 Tf 436.788 287.485 Td (result) Tj ET BT /F0 11 Tf 463.980 287.485 Td (to) Tj ET BT /F0 11 Tf 475.288 287.485 Td (the) Tj ET BT /F0 11 Tf 491.480 287.485 Td (output) Tj ET BT /F0 11 Tf 142.274 272.096 Td (stream.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 241.318 Td (SEE) Tj ET BT /F1 11 Tf 91.564 241.318 Td (ALSO) Tj ET /F0 11 Tf BT /F0 11 Tf 81.774 225.929 Td (detox\(1\),) Tj ET BT /F0 11 Tf 124.542 225.929 Td (Text::Unidecode\(3pm\),) Tj ET BT /F0 11 Tf 230.857 225.929 Td (detox.tbl\(5\),) Tj ET BT /F0 11 Tf 287.991 225.929 Td (detoxrc\(5\),) Tj ET BT /F0 11 Tf 339.306 225.929 Td (ascii\(7\),) Tj ET BT /F0 11 Tf 377.795 225.929 Td (iso_8859-1\(7\),) Tj ET BT /F0 11 Tf 445.621 225.929 Td (unicode\(7\),) Tj ET BT /F0 11 Tf 498.773 225.929 Td (utf-8\(7\)) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 195.151 Td (HISTORY) Tj ET BT /F1 11 Tf 81.774 179.762 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 140.140 179.762 Td (was) Tj ET BT /F0 11 Tf 159.995 179.762 Td (originally) Tj ET BT /F0 11 Tf 205.524 179.762 Td (designed) Tj ET BT /F0 11 Tf 247.379 179.762 Td (to) Tj ET BT /F0 11 Tf 258.687 179.762 Td (clean) Tj ET BT /F0 11 Tf 284.647 179.762 Td (up) Tj ET BT /F0 11 Tf 298.397 179.762 Td (files) Tj ET BT /F0 11 Tf 320.089 179.762 Td (that) Tj ET BT /F0 11 Tf 339.339 179.762 Td (I) Tj ET BT /F0 11 Tf 345.752 179.762 Td (had) Tj ET BT /F0 11 Tf 364.386 179.762 Td (received) Tj ET BT /F0 11 Tf 404.393 179.762 Td (from) Tj ET BT /F0 11 Tf 428.527 179.762 Td (friends) Tj ET BT /F0 11 Tf 461.824 179.762 Td (which) Tj ET BT /F0 11 Tf 491.458 179.762 Td (had) Tj ET BT /F0 11 Tf 510.092 179.762 Td (been) Tj ET BT /F0 11 Tf 81.774 164.373 Td (created) Tj ET BT /F0 11 Tf 116.281 164.373 Td (using) Tj ET BT /F0 11 Tf 142.868 164.373 Td (other) Tj ET BT /F0 11 Tf 168.223 164.373 Td (operating) Tj ET BT /F0 11 Tf 212.520 164.373 Td (systems.) Tj ET BT /F0 11 Tf 255.607 164.373 Td (It's) Tj ET BT /F0 11 Tf 273.020 164.373 Td (trivial) Tj ET BT /F0 11 Tf 302.049 164.373 Td (to) Tj ET BT /F0 11 Tf 313.357 164.373 Td (create) Tj ET BT /F0 11 Tf 342.364 164.373 Td (a) Tj ET BT /F0 11 Tf 349.998 164.373 Td (filename) Tj ET BT /F0 11 Tf 391.237 164.373 Td (with) Tj ET BT /F0 11 Tf 413.545 164.373 Td (spaces,) Tj ET BT /F0 11 Tf 447.755 164.373 Td (parenthesis,) Tj ET BT /F0 11 Tf 502.744 164.373 Td (brackets,) Tj ET BT /F0 11 Tf 81.774 148.984 Td (and) Tj ET BT /F0 11 Tf 100.408 148.984 Td (ampersands) Tj ET BT /F0 11 Tf 155.089 148.984 Td (under) Tj ET BT /F0 11 Tf 182.886 148.984 Td (some) Tj ET BT /F0 11 Tf 208.857 148.984 Td (operating) Tj ET BT /F0 11 Tf 253.154 148.984 Td (systems.) Tj ET BT /F0 11 Tf 296.241 148.984 Td (These) Tj ET BT /F0 11 Tf 325.259 148.984 Td (have) Tj ET BT /F0 11 Tf 348.777 148.984 Td (special) Tj ET BT /F0 11 Tf 382.074 148.984 Td (meaning) Tj ET BT /F0 11 Tf 422.708 148.984 Td (within) Tj ET BT /F0 11 Tf 453.574 148.984 Td (FreeBSD) Tj ET BT /F0 11 Tf 497.266 148.984 Td (and) Tj ET BT /F0 11 Tf 81.774 133.595 Td (Linux,) Tj ET BT /F0 11 Tf 113.553 133.595 Td (and) Tj ET BT /F0 11 Tf 132.187 133.595 Td (cause) Tj ET BT /F0 11 Tf 159.368 133.595 Td (problems) Tj ET BT /F0 11 Tf 203.060 133.595 Td (when) Tj ET BT /F0 11 Tf 229.636 133.595 Td (you) Tj ET BT /F0 11 Tf 248.886 133.595 Td (go) Tj ET BT /F0 11 Tf 262.636 133.595 Td (to) Tj ET BT /F0 11 Tf 273.944 133.595 Td (access) Tj ET BT /F0 11 Tf 304.788 133.595 Td (them.) Tj ET BT /F0 11 Tf 335.038 133.595 Td (I) Tj ET BT /F0 11 Tf 341.451 133.595 Td (created) Tj ET /F1 11 Tf BT /F1 11 Tf 375.958 133.595 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 434.324 133.595 Td (to) Tj ET BT /F0 11 Tf 445.632 133.595 Td (clean) Tj ET BT /F0 11 Tf 471.592 133.595 Td (up) Tj ET BT /F0 11 Tf 485.342 133.595 Td (these) Tj ET BT /F0 11 Tf 510.697 133.595 Td (files.) Tj ET BT /F0 11 Tf 81.774 102.817 Td (Version) Tj ET BT /F0 11 Tf 119.350 102.817 Td (2.0) Tj ET BT /F0 11 Tf 135.850 102.817 Td (stepped) Tj ET BT /F0 11 Tf 172.205 102.817 Td (back) Tj ET BT /F0 11 Tf 195.723 102.817 Td (from) Tj ET BT /F0 11 Tf 219.857 102.817 Td (transliteration) Tj ET BT /F0 11 Tf 283.712 102.817 Td (out) Tj ET BT /F0 11 Tf 300.520 102.817 Td (of) Tj ET BT /F0 11 Tf 312.433 102.817 Td (the) Tj ET BT /F0 11 Tf 328.625 102.817 Td (box,) Tj ET BT /F0 11 Tf 350.625 102.817 Td (instead) Tj ET BT /F0 11 Tf 384.538 102.817 Td (focusing) Tj ET BT /F0 11 Tf 425.172 102.817 Td (on) Tj ET BT /F0 11 Tf 438.922 102.817 Td (ease) Tj ET BT /F0 11 Tf 460.603 102.817 Td (of) Tj ET BT /F0 11 Tf 472.516 102.817 Td (use.) Tj ET BT /F0 11 Tf 495.429 102.817 Td (The) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (INLINE-DETOX\(1\)) Tj ET BT /F0 11 Tf 245.498 739.233 Td (General) Tj ET BT /F0 11 Tf 283.063 739.233 Td (Commands) Tj ET BT /F0 11 Tf 335.929 739.233 Td (Manual) Tj ET BT /F0 11 Tf 453.849 739.233 Td (INLINE-DETOX\(1\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 12 0 obj 10482 endobj 13 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 14 0 obj << /Type /Page /Parent 2 0 R /Resources 13 0 R /Contents 11 0 R >> endobj 15 0 obj << /Length 16 0 R >> stream BT /F0 11 Tf 81.774 702.988 Td (primary) Tj ET BT /F0 11 Tf 119.350 702.988 Td (motivations) Tj ET BT /F0 11 Tf 174.053 702.988 Td (for) Tj ET BT /F0 11 Tf 189.629 702.988 Td (this) Tj ET BT /F0 11 Tf 208.274 702.988 Td (were) Tj ET BT /F0 11 Tf 232.397 702.988 Td (user-provided) Tj ET BT /F0 11 Tf 296.241 702.988 Td (feedback,) Tj ET BT /F0 11 Tf 341.440 702.988 Td (and) Tj ET BT /F0 11 Tf 360.074 702.988 Td (the) Tj ET BT /F0 11 Tf 376.266 702.988 Td (fact) Tj ET BT /F0 11 Tf 395.505 702.988 Td (that) Tj ET BT /F0 11 Tf 414.755 702.988 Td (many) Tj ET BT /F0 11 Tf 441.947 702.988 Td (modern) Tj ET BT /F0 11 Tf 478.302 702.988 Td (Unix-like) Tj ET BT /F0 11 Tf 523.215 702.988 Td (OSs) Tj ET BT /F0 11 Tf 81.774 687.599 Td (use) Tj ET BT /F0 11 Tf 99.187 687.599 Td (UTF-8) Tj ET BT /F0 11 Tf 131.879 687.599 Td (as) Tj ET BT /F0 11 Tf 143.792 687.599 Td (their) Tj ET BT /F0 11 Tf 166.705 687.599 Td (primary) Tj ET BT /F0 11 Tf 204.281 687.599 Td (character) Tj ET BT /F0 11 Tf 247.335 687.599 Td (set.) Tj ET BT /F0 11 Tf 267.806 687.599 Td (Transliterating) Tj ET BT /F0 11 Tf 335.324 687.599 Td (from) Tj ET BT /F0 11 Tf 359.458 687.599 Td (UTF-8) Tj ET BT /F0 11 Tf 392.150 687.599 Td (to) Tj ET BT /F0 11 Tf 403.458 687.599 Td (ASCII) Tj ET BT /F0 11 Tf 434.929 687.599 Td (in) Tj ET BT /F0 11 Tf 446.237 687.599 Td (this) Tj ET BT /F0 11 Tf 464.882 687.599 Td (scenario) Tj ET BT /F0 11 Tf 504.284 687.599 Td (is) Tj ET BT /F0 11 Tf 514.371 687.599 Td (lossy) Tj ET BT /F0 11 Tf 81.774 672.210 Td (and) Tj ET BT /F0 11 Tf 100.408 672.210 Td (pointless.) Tj ET /F1 11 Tf BT /F1 11 Tf 68.024 641.432 Td (AUTHORS) Tj ET BT /F1 11 Tf 81.774 626.043 Td (inline-detox) Tj ET /F0 11 Tf BT /F0 11 Tf 140.140 626.043 Td (was) Tj ET BT /F0 11 Tf 159.995 626.043 Td (written) Tj ET BT /F0 11 Tf 193.908 626.043 Td (by) Tj ET BT /F0 11 Tf 207.658 626.043 Td (Doug) Tj ET BT /F0 11 Tf 234.850 626.043 Td (Harple.) Tj ET /F0 11 Tf BT /F0 11 Tf 68.024 739.233 Td (INLINE-DETOX\(1\)) Tj ET BT /F0 11 Tf 245.498 739.233 Td (General) Tj ET BT /F0 11 Tf 283.063 739.233 Td (Commands) Tj ET BT /F0 11 Tf 335.929 739.233 Td (Manual) Tj ET BT /F0 11 Tf 453.849 739.233 Td (INLINE-DETOX\(1\)) Tj ET BT /F0 11 Tf 68.024 36.245 Td (Debian) Tj ET BT /F0 11 Tf 267.036 36.245 Td (February) Tj ET BT /F0 11 Tf 309.496 36.245 Td (24,) Tj ET BT /F0 11 Tf 325.996 36.245 Td (2021) Tj ET BT /F0 11 Tf 512.490 36.245 Td (Debian) Tj ET endstream endobj 16 0 obj 2419 endobj 17 0 obj << /ProcSet [/PDF /Text] /Font << /F0 3 0 R /F1 4 0 R /F2 5 0 R /F3 6 0 R >> >> endobj 18 0 obj << /Type /Page /Parent 2 0 R /Resources 17 0 R /Contents 15 0 R >> endobj 2 0 obj << /Type /Pages /MediaBox [0 0 612 790] /Count 3 /Kids [ 10 0 R 14 0 R 18 0 R] >> endobj 19 0 obj << /Type /Catalog /Pages 2 0 R >> endobj xref 0 20 0000000000 65535 f 0000000009 00000 n 0000028766 00000 n 0000000030 00000 n 0000000112 00000 n 0000000193 00000 n 0000000276 00000 n 0000000363 00000 n 0000015159 00000 n 0000015180 00000 n 0000015275 00000 n 0000015356 00000 n 0000025892 00000 n 0000025914 00000 n 0000026010 00000 n 0000026093 00000 n 0000028566 00000 n 0000028587 00000 n 0000028683 00000 n 0000028863 00000 n trailer << /Size 20 /Root 19 0 R /Info 1 0 R >> startxref 28913 %%EOF detox-2.0.0/src/000077500000000000000000000000001460212773400134315ustar00rootroot00000000000000detox-2.0.0/src/.gitignore000066400000000000000000000001601460212773400154160ustar00rootroot00000000000000# Built Binaries check-table detox escape-utf-8 generate-builtin-table inline-detox test-table-regression *.exe detox-2.0.0/src/Makefile.am000066400000000000000000000034251460212773400154710ustar00rootroot00000000000000# # References and reasons for compiler flags: # # https://github.com/dharple/detox/issues/31 # https://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html # https://access.redhat.com/blogs/766093/posts/1976213 # AM_CFLAGS = \ -DDATADIR=\"$(datadir)\" \ -DSYSCONFDIR=\"$(sysconfdir)\" \ -DYY_NO_INPUT \ -DYY_NO_UNPUT \ -D_FORTIFY_SOURCE=2 \ -Wall \ -Werror # # # AM_YFLAGS = -d AM_LFLAGS = -i EXTRA_DIST = *.h bin_PROGRAMS = \ detox \ inline-detox noinst_PROGRAMS = \ check-table \ escape-utf-8 \ generate-builtin-table detox_SOURCES = \ config_file_yacc.y \ config_file_lex.l \ builtin_table.c \ clean_string.c \ clean_utf_8.c \ config_file.c \ config_file_dump.c \ config_file_spoof.c \ detox.c \ file.c \ filelist.c \ filter.c \ parse_options.c \ parse_table.c \ sequence.c \ table.c \ wrapped.c inline_detox_SOURCES = $(detox_SOURCES) check_table_SOURCES = \ check_table.c \ table.c \ table_dump.c \ wrapped.c escape_utf_8_SOURCES = \ builtin_table.c \ clean_string.c \ clean_utf_8.c \ escape_utf_8.c \ file.c \ filelist.c \ filter.c \ parse_options.c \ parse_table.c \ sequence.c \ table.c \ table_dump.c \ wrapped.c generate_builtin_table_SOURCES = \ parse_table.c \ table.c \ generate_builtin_table.c \ wrapped.c # # code coverage rules # if WITH_COVERAGE # # This is so hacky. If you go the normal route, and put `-ftest-coverage` on a # CFLAGS variable, it works as expected, BUT, when the linking happens, gcc # will zero out the `.gcno` file for the first file on the link list. # DEFS += -ftest-coverage coverage-text: gcov *.c else coverage-text: echo "code coverage is not enabled; run ./configure --with-coverage" endif # WITH_COVERAGE # # # clean-local: rm -f *.i *.s rm -f *.gcov *.gcno *.gcda rm -f test-table-regression detox-2.0.0/src/builtin_table.c000066400000000000000000000772671460212773400164350ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. * */ #include "table.h" #include "builtin_table.h" /** * Generated from safe.tbl */ static table_row_t builtin_safe_rows[55] = { { .key = 0x0001, .data = "_" }, { .key = 0x0002, .data = "_" }, { .key = 0x0003, .data = "_" }, { .key = 0x0004, .data = "_" }, { .key = 0x0005, .data = "_" }, { .key = 0x0006, .data = "_" }, { .key = 0x0007, .data = "_" }, { .key = 0x0008, .data = "_" }, { .key = 0x0009, .data = "_" }, { .key = 0x000a, .data = "_" }, { .key = 0x000b, .data = "_" }, { .key = 0x000c, .data = "_" }, { .key = 0x000d, .data = "_" }, { .key = 0x000e, .data = "_" }, { .key = 0x000f, .data = "_" }, { .key = 0x0010, .data = "_" }, { .key = 0x0011, .data = "_" }, { .key = 0x0012, .data = "_" }, { .key = 0x0013, .data = "_" }, { .key = 0x0014, .data = "_" }, { .key = 0x0015, .data = "_" }, { .key = 0x0016, .data = "_" }, { .key = 0x0017, .data = "_" }, { .key = 0x0018, .data = "_" }, { .key = 0x0019, .data = "_" }, { .key = 0x001a, .data = "_" }, { .key = 0x001b, .data = "_" }, { .key = 0x001c, .data = "_" }, { .key = 0x001d, .data = "_" }, { .key = 0x001e, .data = "_" }, { .key = 0x001f, .data = "_" }, { .key = 0x0020, .data = "_" }, { .key = 0x0021, .data = "_" }, { .key = 0x0022, .data = "_" }, { .key = 0x0024, .data = "_" }, { .key = 0x0026, .data = "_and_" }, { .key = 0x0027, .data = "_" }, { .key = 0x0028, .data = "-" }, { .key = 0x0029, .data = "-" }, { .key = 0x002a, .data = "_" }, { .key = 0x002f, .data = "_" }, { .key = 0x003a, .data = "_" }, { .key = 0x003b, .data = "_" }, { .key = 0x003c, .data = "_" }, { .key = 0x003e, .data = "_" }, { .key = 0x003f, .data = "_" }, { .key = 0x0040, .data = "_" }, { .key = 0x005b, .data = "-" }, { .key = 0x005c, .data = "_" }, { .key = 0x005d, .data = "-" }, { .key = 0x0060, .data = "_" }, { .key = 0x007b, .data = "-" }, { .key = 0x007c, .data = "_" }, { .key = 0x007d, .data = "-" }, { .key = 0x007f, .data = "_" }, }; static table_t builtin_safe_table = { .length = 55, .used = 55, .max_data_length = 5, .max_key = 0x007f, .hits = 0, .misses = 0, .seeks = 0, .overwrites = 0, .use_hash = 0, .builtin = 1, .rows = builtin_safe_rows, .default_translation = NULL, }; table_t *load_builtin_safe_table(void) { return table_resize(&builtin_safe_table, 256, 1); } /** * Generated from iso8859_1.tbl */ static table_row_t builtin_iso8859_1_rows[88] = { { .key = 0x00a0, .data = " " }, { .key = 0x00a1, .data = "!" }, { .key = 0x00a2, .data = "_cent_" }, { .key = 0x00a3, .data = "_pound_" }, { .key = 0x00a4, .data = "$" }, { .key = 0x00a5, .data = "_yen_" }, { .key = 0x00a7, .data = "_ss_" }, { .key = 0x00a8, .data = " " }, { .key = 0x00a9, .data = "_copy_" }, { .key = 0x00aa, .data = "_a_" }, { .key = 0x00ab, .data = "\"" }, { .key = 0x00ad, .data = "-" }, { .key = 0x00ae, .data = "_reg_" }, { .key = 0x00b0, .data = "_deg_" }, { .key = 0x00b2, .data = "^2" }, { .key = 0x00b3, .data = "^3" }, { .key = 0x00b4, .data = "'" }, { .key = 0x00b5, .data = "u" }, { .key = 0x00b6, .data = "_pp_" }, { .key = 0x00b7, .data = "*" }, { .key = 0x00b8, .data = " " }, { .key = 0x00b9, .data = "^1" }, { .key = 0x00ba, .data = "_o_" }, { .key = 0x00bb, .data = "\"" }, { .key = 0x00bf, .data = "?" }, { .key = 0x00c0, .data = "A" }, { .key = 0x00c1, .data = "A" }, { .key = 0x00c2, .data = "A" }, { .key = 0x00c3, .data = "A" }, { .key = 0x00c4, .data = "A" }, { .key = 0x00c5, .data = "A" }, { .key = 0x00c6, .data = "AE" }, { .key = 0x00c7, .data = "C" }, { .key = 0x00c8, .data = "E" }, { .key = 0x00c9, .data = "E" }, { .key = 0x00ca, .data = "E" }, { .key = 0x00cb, .data = "E" }, { .key = 0x00cc, .data = "I" }, { .key = 0x00cd, .data = "I" }, { .key = 0x00ce, .data = "I" }, { .key = 0x00cf, .data = "I" }, { .key = 0x00d0, .data = "TH" }, { .key = 0x00d1, .data = "N" }, { .key = 0x00d2, .data = "O" }, { .key = 0x00d3, .data = "O" }, { .key = 0x00d4, .data = "O" }, { .key = 0x00d5, .data = "O" }, { .key = 0x00d6, .data = "O" }, { .key = 0x00d7, .data = "x" }, { .key = 0x00d8, .data = "O" }, { .key = 0x00d9, .data = "U" }, { .key = 0x00da, .data = "U" }, { .key = 0x00db, .data = "U" }, { .key = 0x00dc, .data = "U" }, { .key = 0x00dd, .data = "Y" }, { .key = 0x00de, .data = "TH" }, { .key = 0x00df, .data = "ss" }, { .key = 0x00e0, .data = "a" }, { .key = 0x00e1, .data = "a" }, { .key = 0x00e2, .data = "a" }, { .key = 0x00e3, .data = "a" }, { .key = 0x00e4, .data = "a" }, { .key = 0x00e5, .data = "a" }, { .key = 0x00e6, .data = "ae" }, { .key = 0x00e7, .data = "c" }, { .key = 0x00e8, .data = "e" }, { .key = 0x00e9, .data = "e" }, { .key = 0x00ea, .data = "e" }, { .key = 0x00eb, .data = "e" }, { .key = 0x00ec, .data = "i" }, { .key = 0x00ed, .data = "i" }, { .key = 0x00ee, .data = "i" }, { .key = 0x00ef, .data = "i" }, { .key = 0x00f0, .data = "th" }, { .key = 0x00f1, .data = "n" }, { .key = 0x00f2, .data = "o" }, { .key = 0x00f3, .data = "o" }, { .key = 0x00f4, .data = "o" }, { .key = 0x00f5, .data = "o" }, { .key = 0x00f6, .data = "o" }, { .key = 0x00f8, .data = "o" }, { .key = 0x00f9, .data = "u" }, { .key = 0x00fa, .data = "u" }, { .key = 0x00fb, .data = "u" }, { .key = 0x00fc, .data = "u" }, { .key = 0x00fd, .data = "y" }, { .key = 0x00fe, .data = "th" }, { .key = 0x00ff, .data = "y" }, }; static table_t builtin_iso8859_1_table = { .length = 88, .used = 88, .max_data_length = 7, .max_key = 0x00ff, .hits = 0, .misses = 0, .seeks = 0, .overwrites = 0, .use_hash = 0, .builtin = 1, .rows = builtin_iso8859_1_rows, .default_translation = "_", }; table_t *load_builtin_iso8859_1_table(void) { return table_resize(&builtin_iso8859_1_table, 256, 1); } /** * Generated from unicode.tbl */ static table_row_t builtin_unicode_rows[627] = { { .key = 0x0020, .data = " " }, { .key = 0x0021, .data = "!" }, { .key = 0x0022, .data = "\"" }, { .key = 0x0023, .data = "#" }, { .key = 0x0024, .data = "$" }, { .key = 0x0025, .data = "%" }, { .key = 0x0026, .data = "&" }, { .key = 0x0027, .data = "'" }, { .key = 0x0028, .data = "(" }, { .key = 0x0029, .data = ")" }, { .key = 0x002a, .data = "*" }, { .key = 0x002b, .data = "+" }, { .key = 0x002c, .data = "," }, { .key = 0x002d, .data = "-" }, { .key = 0x002e, .data = "." }, { .key = 0x002f, .data = "/" }, { .key = 0x0030, .data = "0" }, { .key = 0x0031, .data = "1" }, { .key = 0x0032, .data = "2" }, { .key = 0x0033, .data = "3" }, { .key = 0x0034, .data = "4" }, { .key = 0x0035, .data = "5" }, { .key = 0x0036, .data = "6" }, { .key = 0x0037, .data = "7" }, { .key = 0x0038, .data = "8" }, { .key = 0x0039, .data = "9" }, { .key = 0x003a, .data = ":" }, { .key = 0x003b, .data = ";" }, { .key = 0x003c, .data = "<" }, { .key = 0x003d, .data = "=" }, { .key = 0x003e, .data = ">" }, { .key = 0x003f, .data = "?" }, { .key = 0x0040, .data = "@" }, { .key = 0x0041, .data = "A" }, { .key = 0x0042, .data = "B" }, { .key = 0x0043, .data = "C" }, { .key = 0x0044, .data = "D" }, { .key = 0x0045, .data = "E" }, { .key = 0x0046, .data = "F" }, { .key = 0x0047, .data = "G" }, { .key = 0x0048, .data = "H" }, { .key = 0x0049, .data = "I" }, { .key = 0x004a, .data = "J" }, { .key = 0x004b, .data = "K" }, { .key = 0x004c, .data = "L" }, { .key = 0x004d, .data = "M" }, { .key = 0x004e, .data = "N" }, { .key = 0x004f, .data = "O" }, { .key = 0x0050, .data = "P" }, { .key = 0x0051, .data = "Q" }, { .key = 0x0052, .data = "R" }, { .key = 0x0053, .data = "S" }, { .key = 0x0054, .data = "T" }, { .key = 0x0055, .data = "U" }, { .key = 0x0056, .data = "V" }, { .key = 0x0057, .data = "W" }, { .key = 0x0058, .data = "X" }, { .key = 0x0059, .data = "Y" }, { .key = 0x005a, .data = "Z" }, { .key = 0x005b, .data = "[" }, { .key = 0x005c, .data = "\\" }, { .key = 0x005d, .data = "]" }, { .key = 0x005e, .data = "^" }, { .key = 0x005f, .data = "_" }, { .key = 0x0060, .data = "`" }, { .key = 0x0061, .data = "a" }, { .key = 0x0062, .data = "b" }, { .key = 0x0063, .data = "c" }, { .key = 0x0064, .data = "d" }, { .key = 0x0065, .data = "e" }, { .key = 0x0066, .data = "f" }, { .key = 0x0067, .data = "g" }, { .key = 0x0068, .data = "h" }, { .key = 0x0069, .data = "i" }, { .key = 0x006a, .data = "j" }, { .key = 0x006b, .data = "k" }, { .key = 0x006c, .data = "l" }, { .key = 0x006d, .data = "m" }, { .key = 0x006e, .data = "n" }, { .key = 0x006f, .data = "o" }, { .key = 0x0070, .data = "p" }, { .key = 0x0071, .data = "q" }, { .key = 0x0072, .data = "r" }, { .key = 0x0073, .data = "s" }, { .key = 0x0074, .data = "t" }, { .key = 0x0075, .data = "u" }, { .key = 0x0076, .data = "v" }, { .key = 0x0077, .data = "w" }, { .key = 0x0078, .data = "x" }, { .key = 0x0079, .data = "y" }, { .key = 0x007a, .data = "z" }, { .key = 0x007b, .data = "{" }, { .key = 0x007c, .data = "|" }, { .key = 0x007d, .data = "}" }, { .key = 0x007e, .data = "~" }, { .key = 0x00a0, .data = " " }, { .key = 0x00a1, .data = "!" }, { .key = 0x00a2, .data = "_cent_" }, { .key = 0x00a3, .data = "_pound_" }, { .key = 0x00a4, .data = "$" }, { .key = 0x00a5, .data = "_yen_" }, { .key = 0x00a7, .data = "_ss_" }, { .key = 0x00a8, .data = " " }, { .key = 0x00a9, .data = "_copy_" }, { .key = 0x00aa, .data = "_a_" }, { .key = 0x00ab, .data = "\"" }, { .key = 0x00ad, .data = "-" }, { .key = 0x00ae, .data = "_reg_" }, { .key = 0x00b0, .data = "_deg_" }, { .key = 0x00b2, .data = "^2" }, { .key = 0x00b3, .data = "^3" }, { .key = 0x00b4, .data = "'" }, { .key = 0x00b5, .data = "u" }, { .key = 0x00b6, .data = "_pp_" }, { .key = 0x00b7, .data = "*" }, { .key = 0x00b8, .data = " " }, { .key = 0x00b9, .data = "^1" }, { .key = 0x00ba, .data = "_o_" }, { .key = 0x00bb, .data = "\"" }, { .key = 0x00bf, .data = "?" }, { .key = 0x00c0, .data = "A" }, { .key = 0x00c1, .data = "A" }, { .key = 0x00c2, .data = "A" }, { .key = 0x00c3, .data = "A" }, { .key = 0x00c4, .data = "A" }, { .key = 0x00c5, .data = "A" }, { .key = 0x00c6, .data = "AE" }, { .key = 0x00c7, .data = "C" }, { .key = 0x00c8, .data = "E" }, { .key = 0x00c9, .data = "E" }, { .key = 0x00ca, .data = "E" }, { .key = 0x00cb, .data = "E" }, { .key = 0x00cc, .data = "I" }, { .key = 0x00cd, .data = "I" }, { .key = 0x00ce, .data = "I" }, { .key = 0x00cf, .data = "I" }, { .key = 0x00d0, .data = "TH" }, { .key = 0x00d1, .data = "N" }, { .key = 0x00d2, .data = "O" }, { .key = 0x00d3, .data = "O" }, { .key = 0x00d4, .data = "O" }, { .key = 0x00d5, .data = "O" }, { .key = 0x00d6, .data = "O" }, { .key = 0x00d7, .data = "x" }, { .key = 0x00d8, .data = "O" }, { .key = 0x00d9, .data = "U" }, { .key = 0x00da, .data = "U" }, { .key = 0x00db, .data = "U" }, { .key = 0x00dc, .data = "U" }, { .key = 0x00dd, .data = "Y" }, { .key = 0x00de, .data = "TH" }, { .key = 0x00df, .data = "ss" }, { .key = 0x00e0, .data = "a" }, { .key = 0x00e1, .data = "a" }, { .key = 0x00e2, .data = "a" }, { .key = 0x00e3, .data = "a" }, { .key = 0x00e4, .data = "a" }, { .key = 0x00e5, .data = "a" }, { .key = 0x00e6, .data = "ae" }, { .key = 0x00e7, .data = "c" }, { .key = 0x00e8, .data = "e" }, { .key = 0x00e9, .data = "e" }, { .key = 0x00ea, .data = "e" }, { .key = 0x00eb, .data = "e" }, { .key = 0x00ec, .data = "i" }, { .key = 0x00ed, .data = "i" }, { .key = 0x00ee, .data = "i" }, { .key = 0x00ef, .data = "i" }, { .key = 0x00f0, .data = "th" }, { .key = 0x00f1, .data = "n" }, { .key = 0x00f2, .data = "o" }, { .key = 0x00f3, .data = "o" }, { .key = 0x00f4, .data = "o" }, { .key = 0x00f5, .data = "o" }, { .key = 0x00f6, .data = "o" }, { .key = 0x00f8, .data = "o" }, { .key = 0x00f9, .data = "u" }, { .key = 0x00fa, .data = "u" }, { .key = 0x00fb, .data = "u" }, { .key = 0x00fc, .data = "u" }, { .key = 0x00fd, .data = "y" }, { .key = 0x00fe, .data = "th" }, { .key = 0x00ff, .data = "y" }, { .key = 0x0100, .data = "A" }, { .key = 0x0101, .data = "a" }, { .key = 0x0102, .data = "A" }, { .key = 0x0103, .data = "a" }, { .key = 0x0104, .data = "A" }, { .key = 0x0105, .data = "a" }, { .key = 0x0106, .data = "C" }, { .key = 0x0107, .data = "c" }, { .key = 0x0108, .data = "C" }, { .key = 0x0109, .data = "c" }, { .key = 0x010a, .data = "C" }, { .key = 0x010b, .data = "c" }, { .key = 0x010c, .data = "C" }, { .key = 0x010d, .data = "c" }, { .key = 0x010e, .data = "D" }, { .key = 0x010f, .data = "d" }, { .key = 0x0110, .data = "D" }, { .key = 0x0111, .data = "d" }, { .key = 0x0112, .data = "E" }, { .key = 0x0113, .data = "e" }, { .key = 0x0114, .data = "E" }, { .key = 0x0115, .data = "e" }, { .key = 0x0116, .data = "E" }, { .key = 0x0117, .data = "e" }, { .key = 0x0118, .data = "E" }, { .key = 0x0119, .data = "e" }, { .key = 0x011a, .data = "E" }, { .key = 0x011b, .data = "e" }, { .key = 0x011c, .data = "G" }, { .key = 0x011d, .data = "g" }, { .key = 0x011e, .data = "G" }, { .key = 0x011f, .data = "g" }, { .key = 0x0120, .data = "G" }, { .key = 0x0121, .data = "g" }, { .key = 0x0122, .data = "G" }, { .key = 0x0123, .data = "g" }, { .key = 0x0124, .data = "H" }, { .key = 0x0125, .data = "h" }, { .key = 0x0126, .data = "H" }, { .key = 0x0127, .data = "h" }, { .key = 0x0128, .data = "I" }, { .key = 0x0129, .data = "i" }, { .key = 0x012a, .data = "I" }, { .key = 0x012b, .data = "i" }, { .key = 0x012c, .data = "I" }, { .key = 0x012d, .data = "i" }, { .key = 0x012e, .data = "I" }, { .key = 0x012f, .data = "i" }, { .key = 0x0130, .data = "I" }, { .key = 0x0131, .data = "i" }, { .key = 0x0132, .data = "IJ" }, { .key = 0x0133, .data = "ij" }, { .key = 0x0134, .data = "J" }, { .key = 0x0135, .data = "j" }, { .key = 0x0136, .data = "K" }, { .key = 0x0137, .data = "k" }, { .key = 0x0138, .data = "q" }, { .key = 0x0139, .data = "L" }, { .key = 0x013a, .data = "l" }, { .key = 0x013b, .data = "L" }, { .key = 0x013c, .data = "l" }, { .key = 0x013d, .data = "L" }, { .key = 0x013e, .data = "l" }, { .key = 0x013f, .data = "L" }, { .key = 0x0140, .data = "l" }, { .key = 0x0141, .data = "L" }, { .key = 0x0142, .data = "l" }, { .key = 0x0143, .data = "N" }, { .key = 0x0144, .data = "n" }, { .key = 0x0145, .data = "N" }, { .key = 0x0146, .data = "n" }, { .key = 0x0147, .data = "N" }, { .key = 0x0148, .data = "n" }, { .key = 0x0149, .data = "'n" }, { .key = 0x014a, .data = "NG" }, { .key = 0x014b, .data = "ng" }, { .key = 0x014c, .data = "O" }, { .key = 0x014d, .data = "o" }, { .key = 0x014e, .data = "O" }, { .key = 0x014f, .data = "o" }, { .key = 0x0150, .data = "O" }, { .key = 0x0151, .data = "o" }, { .key = 0x0152, .data = "OE" }, { .key = 0x0153, .data = "oe" }, { .key = 0x0154, .data = "R" }, { .key = 0x0155, .data = "r" }, { .key = 0x0156, .data = "R" }, { .key = 0x0157, .data = "r" }, { .key = 0x0158, .data = "R" }, { .key = 0x0159, .data = "r" }, { .key = 0x015a, .data = "S" }, { .key = 0x015b, .data = "s" }, { .key = 0x015c, .data = "S" }, { .key = 0x015d, .data = "s" }, { .key = 0x015e, .data = "S" }, { .key = 0x015f, .data = "s" }, { .key = 0x0160, .data = "S" }, { .key = 0x0161, .data = "s" }, { .key = 0x0162, .data = "T" }, { .key = 0x0163, .data = "t" }, { .key = 0x0164, .data = "T" }, { .key = 0x0165, .data = "t" }, { .key = 0x0166, .data = "T" }, { .key = 0x0167, .data = "t" }, { .key = 0x0168, .data = "U" }, { .key = 0x0169, .data = "u" }, { .key = 0x016a, .data = "U" }, { .key = 0x016b, .data = "u" }, { .key = 0x016c, .data = "U" }, { .key = 0x016d, .data = "u" }, { .key = 0x016e, .data = "U" }, { .key = 0x016f, .data = "u" }, { .key = 0x0170, .data = "U" }, { .key = 0x0171, .data = "u" }, { .key = 0x0172, .data = "U" }, { .key = 0x0173, .data = "u" }, { .key = 0x0174, .data = "W" }, { .key = 0x0175, .data = "w" }, { .key = 0x0176, .data = "Y" }, { .key = 0x0177, .data = "y" }, { .key = 0x0178, .data = "Y" }, { .key = 0x0179, .data = "Z" }, { .key = 0x017a, .data = "z" }, { .key = 0x017b, .data = "Z" }, { .key = 0x017c, .data = "z" }, { .key = 0x017d, .data = "Z" }, { .key = 0x017e, .data = "z" }, { .key = 0x017f, .data = "s" }, { .key = 0x0180, .data = "b" }, { .key = 0x0181, .data = "B" }, { .key = 0x0182, .data = "B" }, { .key = 0x0183, .data = "b" }, { .key = 0x0184, .data = "B" }, { .key = 0x0185, .data = "b" }, { .key = 0x0186, .data = "O" }, { .key = 0x0187, .data = "C" }, { .key = 0x0188, .data = "c" }, { .key = 0x0189, .data = "D" }, { .key = 0x018a, .data = "D" }, { .key = 0x018b, .data = "D" }, { .key = 0x018c, .data = "d" }, { .key = 0x018d, .data = "z" }, { .key = 0x018e, .data = "E" }, { .key = 0x018f, .data = "E" }, { .key = 0x0190, .data = "E" }, { .key = 0x0191, .data = "F" }, { .key = 0x0192, .data = "f" }, { .key = 0x0193, .data = "G" }, { .key = 0x0194, .data = "Y" }, { .key = 0x0195, .data = "hv" }, { .key = 0x0196, .data = "I" }, { .key = 0x0197, .data = "I" }, { .key = 0x0198, .data = "K" }, { .key = 0x0199, .data = "k" }, { .key = 0x019a, .data = "l" }, { .key = 0x019b, .data = "l" }, { .key = 0x019c, .data = "w" }, { .key = 0x019d, .data = "N" }, { .key = 0x019e, .data = "n" }, { .key = 0x019f, .data = "O" }, { .key = 0x01a0, .data = "O" }, { .key = 0x01a1, .data = "o" }, { .key = 0x01a2, .data = "OI" }, { .key = 0x01a3, .data = "oi" }, { .key = 0x01a4, .data = "P" }, { .key = 0x01a5, .data = "p" }, { .key = 0x01a6, .data = "YR" }, { .key = 0x01a7, .data = "S" }, { .key = 0x01a8, .data = "s" }, { .key = 0x01a9, .data = "SH" }, { .key = 0x01aa, .data = "sh" }, { .key = 0x01ab, .data = "t" }, { .key = 0x01ac, .data = "T" }, { .key = 0x01ad, .data = "t" }, { .key = 0x01ae, .data = "T" }, { .key = 0x01af, .data = "U" }, { .key = 0x01b0, .data = "u" }, { .key = 0x01b1, .data = "Y" }, { .key = 0x01b2, .data = "V" }, { .key = 0x01b3, .data = "Y" }, { .key = 0x01b4, .data = "y" }, { .key = 0x01b5, .data = "Z" }, { .key = 0x01b6, .data = "z" }, { .key = 0x01b7, .data = "ZH" }, { .key = 0x01b8, .data = "ZH" }, { .key = 0x01b9, .data = "zh" }, { .key = 0x01ba, .data = "zh" }, { .key = 0x01bb, .data = "dz" }, { .key = 0x01bc, .data = "5" }, { .key = 0x01bd, .data = "5" }, { .key = 0x01be, .data = "ts" }, { .key = 0x01bf, .data = "w" }, { .key = 0x01c4, .data = "DZ" }, { .key = 0x01c5, .data = "Dz" }, { .key = 0x01c6, .data = "dz" }, { .key = 0x01c7, .data = "LJ" }, { .key = 0x01c8, .data = "Lj" }, { .key = 0x01c9, .data = "lj" }, { .key = 0x01ca, .data = "NJ" }, { .key = 0x01cb, .data = "Nj" }, { .key = 0x01cc, .data = "nj" }, { .key = 0x01cd, .data = "A" }, { .key = 0x01ce, .data = "a" }, { .key = 0x01cf, .data = "I" }, { .key = 0x01d0, .data = "i" }, { .key = 0x01d1, .data = "O" }, { .key = 0x01d2, .data = "o" }, { .key = 0x01d3, .data = "U" }, { .key = 0x01d4, .data = "u" }, { .key = 0x01d5, .data = "U" }, { .key = 0x01d6, .data = "u" }, { .key = 0x01d7, .data = "U" }, { .key = 0x01d8, .data = "u" }, { .key = 0x01d9, .data = "U" }, { .key = 0x01da, .data = "u" }, { .key = 0x01db, .data = "U" }, { .key = 0x01dc, .data = "u" }, { .key = 0x01dd, .data = "e" }, { .key = 0x01de, .data = "A" }, { .key = 0x01df, .data = "a" }, { .key = 0x01e0, .data = "A" }, { .key = 0x01e1, .data = "a" }, { .key = 0x01e2, .data = "AE" }, { .key = 0x01e3, .data = "ae" }, { .key = 0x01e4, .data = "G" }, { .key = 0x01e5, .data = "g" }, { .key = 0x01e6, .data = "G" }, { .key = 0x01e7, .data = "g" }, { .key = 0x01e8, .data = "K" }, { .key = 0x01e9, .data = "k" }, { .key = 0x01ea, .data = "O" }, { .key = 0x01eb, .data = "o" }, { .key = 0x01ec, .data = "O" }, { .key = 0x01ed, .data = "o" }, { .key = 0x01ee, .data = "ZH" }, { .key = 0x01ef, .data = "zh" }, { .key = 0x01f0, .data = "j" }, { .key = 0x01f1, .data = "DZ" }, { .key = 0x01f2, .data = "Dz" }, { .key = 0x01f3, .data = "dz" }, { .key = 0x01f4, .data = "G" }, { .key = 0x01f5, .data = "g" }, { .key = 0x01f6, .data = "HU" }, { .key = 0x01f7, .data = "W" }, { .key = 0x01f8, .data = "N" }, { .key = 0x01f9, .data = "n" }, { .key = 0x01fa, .data = "A" }, { .key = 0x01fb, .data = "a" }, { .key = 0x01fc, .data = "AE" }, { .key = 0x01fd, .data = "ae" }, { .key = 0x01fe, .data = "O" }, { .key = 0x01ff, .data = "o" }, { .key = 0x0200, .data = "A" }, { .key = 0x0201, .data = "a" }, { .key = 0x0202, .data = "A" }, { .key = 0x0203, .data = "a" }, { .key = 0x0204, .data = "E" }, { .key = 0x0205, .data = "e" }, { .key = 0x0206, .data = "E" }, { .key = 0x0207, .data = "e" }, { .key = 0x0208, .data = "I" }, { .key = 0x0209, .data = "i" }, { .key = 0x020a, .data = "I" }, { .key = 0x020b, .data = "i" }, { .key = 0x020c, .data = "O" }, { .key = 0x020d, .data = "o" }, { .key = 0x020e, .data = "O" }, { .key = 0x020f, .data = "o" }, { .key = 0x0210, .data = "R" }, { .key = 0x0211, .data = "r" }, { .key = 0x0212, .data = "R" }, { .key = 0x0213, .data = "r" }, { .key = 0x0214, .data = "U" }, { .key = 0x0215, .data = "u" }, { .key = 0x0216, .data = "U" }, { .key = 0x0217, .data = "u" }, { .key = 0x0218, .data = "S" }, { .key = 0x0219, .data = "s" }, { .key = 0x021a, .data = "T" }, { .key = 0x021b, .data = "t" }, { .key = 0x021c, .data = "Y" }, { .key = 0x021d, .data = "y" }, { .key = 0x021e, .data = "H" }, { .key = 0x021f, .data = "h" }, { .key = 0x0220, .data = "N" }, { .key = 0x0221, .data = "d" }, { .key = 0x0222, .data = "OU" }, { .key = 0x0223, .data = "ou" }, { .key = 0x0224, .data = "Z" }, { .key = 0x0225, .data = "z" }, { .key = 0x0226, .data = "A" }, { .key = 0x0227, .data = "a" }, { .key = 0x0228, .data = "E" }, { .key = 0x0229, .data = "e" }, { .key = 0x022a, .data = "O" }, { .key = 0x022b, .data = "o" }, { .key = 0x022c, .data = "O" }, { .key = 0x022d, .data = "o" }, { .key = 0x022e, .data = "O" }, { .key = 0x022f, .data = "o" }, { .key = 0x0230, .data = "O" }, { .key = 0x0231, .data = "o" }, { .key = 0x0232, .data = "Y" }, { .key = 0x0233, .data = "y" }, { .key = 0x0234, .data = "l" }, { .key = 0x0235, .data = "n" }, { .key = 0x0236, .data = "t" }, { .key = 0x0237, .data = "j" }, { .key = 0x0238, .data = "db" }, { .key = 0x0239, .data = "qp" }, { .key = 0x023a, .data = "A" }, { .key = 0x023b, .data = "C" }, { .key = 0x023c, .data = "c" }, { .key = 0x023d, .data = "L" }, { .key = 0x023e, .data = "T" }, { .key = 0x023f, .data = "s" }, { .key = 0x0240, .data = "z" }, { .key = 0x0243, .data = "B" }, { .key = 0x0244, .data = "U" }, { .key = 0x0245, .data = "^" }, { .key = 0x0246, .data = "E" }, { .key = 0x0247, .data = "e" }, { .key = 0x0248, .data = "J" }, { .key = 0x0249, .data = "j" }, { .key = 0x024a, .data = "q" }, { .key = 0x024b, .data = "q" }, { .key = 0x024c, .data = "R" }, { .key = 0x024d, .data = "r" }, { .key = 0x024e, .data = "Y" }, { .key = 0x024f, .data = "y" }, { .key = 0x02c6, .data = "^" }, { .key = 0x02dc, .data = "~" }, { .key = 0x03a1, .data = "P" }, { .key = 0x03c1, .data = "p" }, { .key = 0x1952, .data = "n" }, { .key = 0x1959, .data = "u" }, { .key = 0x1963, .data = "l" }, { .key = 0x1971, .data = "e" }, { .key = 0x1974, .data = "c" }, { .key = 0x1e9e, .data = "SS" }, { .key = 0x2000, .data = " " }, { .key = 0x2001, .data = " " }, { .key = 0x2002, .data = " " }, { .key = 0x2003, .data = " " }, { .key = 0x2004, .data = " " }, { .key = 0x2005, .data = " " }, { .key = 0x2006, .data = " " }, { .key = 0x2007, .data = " " }, { .key = 0x2008, .data = " " }, { .key = 0x2009, .data = " " }, { .key = 0x200a, .data = " " }, { .key = 0x200b, .data = "" }, { .key = 0x200c, .data = "" }, { .key = 0x200d, .data = "" }, { .key = 0x200e, .data = "" }, { .key = 0x200f, .data = "" }, { .key = 0x2010, .data = "-" }, { .key = 0x2011, .data = "-" }, { .key = 0x2012, .data = "-" }, { .key = 0x2013, .data = "-" }, { .key = 0x2014, .data = "-" }, { .key = 0x2015, .data = "-" }, { .key = 0x2017, .data = "_" }, { .key = 0x2018, .data = "'" }, { .key = 0x2019, .data = "'" }, { .key = 0x201a, .data = "'" }, { .key = 0x201b, .data = "'" }, { .key = 0x201c, .data = "\"" }, { .key = 0x201d, .data = "\"" }, { .key = 0x201e, .data = "\"" }, { .key = 0x201f, .data = "\"" }, { .key = 0x2020, .data = "+" }, { .key = 0x2021, .data = "++" }, { .key = 0x2022, .data = "*" }, { .key = 0x2024, .data = "." }, { .key = 0x2025, .data = ".." }, { .key = 0x2026, .data = "..." }, { .key = 0x2027, .data = "." }, { .key = 0x202f, .data = " " }, { .key = 0x2030, .data = "%" }, { .key = 0x2031, .data = "%" }, { .key = 0x2032, .data = "'" }, { .key = 0x2033, .data = "''" }, { .key = 0x2034, .data = "'''" }, { .key = 0x2035, .data = "`" }, { .key = 0x2036, .data = "``" }, { .key = 0x2037, .data = "```" }, { .key = 0x2038, .data = "^" }, { .key = 0x203b, .data = "*" }, { .key = 0x203c, .data = "!!" }, { .key = 0x203d, .data = "?" }, { .key = 0x203e, .data = "-" }, { .key = 0x203f, .data = "_" }, { .key = 0x2040, .data = "-" }, { .key = 0x2041, .data = "^" }, { .key = 0x2042, .data = "***" }, { .key = 0x2043, .data = "-" }, { .key = 0x2045, .data = "-[" }, { .key = 0x2046, .data = "]-" }, { .key = 0x2047, .data = "??" }, { .key = 0x2048, .data = "?!" }, { .key = 0x2049, .data = "!?" }, { .key = 0x204a, .data = "&" }, { .key = 0x204b, .data = "_pp_" }, { .key = 0x204e, .data = "*" }, { .key = 0x204f, .data = "[)" }, { .key = 0x2051, .data = "**" }, { .key = 0x2052, .data = "%" }, { .key = 0x2053, .data = "~" }, { .key = 0x2054, .data = "_" }, { .key = 0x2055, .data = "*" }, { .key = 0x2056, .data = "..." }, { .key = 0x2057, .data = "''''" }, { .key = 0x2058, .data = "...." }, { .key = 0x2059, .data = "....." }, { .key = 0x205a, .data = ".." }, { .key = 0x205b, .data = "...." }, { .key = 0x205d, .data = ":" }, { .key = 0x205e, .data = ":" }, { .key = 0x205f, .data = " " }, { .key = 0x2060, .data = " " }, { .key = 0x20a0, .data = "ECU" }, { .key = 0x20a1, .data = "CL" }, { .key = 0x20a2, .data = "Cr" }, { .key = 0x20a3, .data = "FF" }, { .key = 0x20a4, .data = "L" }, { .key = 0x20a5, .data = "mil" }, { .key = 0x20a6, .data = "N" }, { .key = 0x20a7, .data = "Pts" }, { .key = 0x20a8, .data = "Rs" }, { .key = 0x20a9, .data = "W" }, { .key = 0x20aa, .data = "NS" }, { .key = 0x20ab, .data = "D" }, { .key = 0x20ac, .data = "EUR" }, { .key = 0x20ad, .data = "K" }, { .key = 0x20ae, .data = "T" }, { .key = 0x20af, .data = "Dr" }, { .key = 0x20b1, .data = "$" }, { .key = 0x20b2, .data = "C" }, { .key = 0x20bb, .data = "M" }, { .key = 0x20bf, .data = "_btc_" }, { .key = 0x2122, .data = "_tm_" }, { .key = 0x10348, .data = "hu" }, { .key = 0x1f37a, .data = "_beer_" }, }; static table_t builtin_unicode_table = { .length = 627, .used = 627, .max_data_length = 7, .max_key = 0x1f37a, .hits = 0, .misses = 0, .seeks = 0, .overwrites = 1, .use_hash = 0, .builtin = 1, .rows = builtin_unicode_rows, .default_translation = "_", }; table_t *load_builtin_unicode_table(void) { return table_resize(&builtin_unicode_table, 768, 1); } /** * Generated from cp1252.tbl */ static table_row_t builtin_cp1252_rows[25] = { { .key = 0x0080, .data = "EUR" }, { .key = 0x0082, .data = "'" }, { .key = 0x0083, .data = "f" }, { .key = 0x0084, .data = "\"" }, { .key = 0x0085, .data = "..." }, { .key = 0x0086, .data = "+" }, { .key = 0x0087, .data = "++" }, { .key = 0x0088, .data = "^" }, { .key = 0x0089, .data = "%" }, { .key = 0x008a, .data = "S" }, { .key = 0x008c, .data = "OE" }, { .key = 0x008e, .data = "Z" }, { .key = 0x0091, .data = "'" }, { .key = 0x0092, .data = "'" }, { .key = 0x0093, .data = "\"" }, { .key = 0x0094, .data = "\"" }, { .key = 0x0095, .data = "*" }, { .key = 0x0096, .data = "-" }, { .key = 0x0097, .data = "-" }, { .key = 0x0098, .data = "~" }, { .key = 0x0099, .data = "_tm_" }, { .key = 0x009a, .data = "s" }, { .key = 0x009c, .data = "oe" }, { .key = 0x009e, .data = "z" }, { .key = 0x009f, .data = "Y" }, }; static table_t builtin_cp1252_table = { .length = 25, .used = 25, .max_data_length = 4, .max_key = 0x009f, .hits = 0, .misses = 0, .seeks = 0, .overwrites = 0, .use_hash = 0, .builtin = 1, .rows = builtin_cp1252_rows, .default_translation = NULL, }; table_t *load_builtin_cp1252_table(void) { return table_resize(&builtin_cp1252_table, 256, 1); } detox-2.0.0/src/builtin_table.c.in000066400000000000000000000004411460212773400170160ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. * */ #include "table.h" #include "builtin_table.h" detox-2.0.0/src/builtin_table.h000066400000000000000000000011121460212773400164120ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __BUILTIN_TABLE_H #define __BUILTIN_TABLE_H #include "detox_struct.h" #define BUILTIN_TABLE_MULTIPLE 256 extern table_t *load_builtin_safe_table(void); extern table_t *load_builtin_iso8859_1_table(void); extern table_t *load_builtin_unicode_table(void); extern table_t *load_builtin_cp1252_table(void); #endif /* __BUILTIN_TABLE_H */ detox-2.0.0/src/check_table.c000066400000000000000000000241251460212773400160250ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ /** * Helper utility to review tables. */ #include #include #include #include #include #include #include #include "builtin_table.h" #include "table_dump.h" #include "table.h" #include "wrapped.h" #define LINE_LENGTH 6 enum { BASE_STATE, INSIDE_STATE }; static table_t *check_table(char *filename, int use_hash) { FILE *table_file; char *work; int code; int offset; char *parsed; int err; int size; int check_max_data_length; int ret; int state; int last; char *check; table_t *table; struct stat table_stat; err = stat(filename, &table_stat); if (err == -1) { return NULL; } size = 0; if (table_stat.st_size > 0) { size = table_stat.st_size / LINE_LENGTH; } #ifdef HAVE_STRUCT_STAT_ST_BLOCKS else { size = (512 * table_stat.st_blocks) / LINE_LENGTH; } #endif if (size < 500) { size = 500; } table = table_init(size); if (table == NULL) { return NULL; } table->use_hash = use_hash; table_file = fopen(filename, "r"); if (table_file == NULL) { fprintf(stderr, "Unable to open translation table: %s\n", strerror(errno)); return NULL; } work = wrapped_malloc(1024); parsed = wrapped_malloc(1024); last = 0; check_max_data_length = 1; state = BASE_STATE; while (fgets(work, 1024, table_file) != NULL) { if (*work == '#') { /* * Don't even bother */ continue; } parsed[0] = '\0'; if (state == BASE_STATE) { ret = sscanf(work, " %s %n", parsed, &offset); if (ret == 0) { continue; } if (strncasecmp(parsed, "start", 5) == 0) { last = 0; if (work[offset] == '\0') { // All languages state = INSIDE_STATE; continue; } if (work[offset] == '"') { sscanf(work + offset + 1, "%[^\"]", parsed); } else if (work[offset] == '\'') { sscanf(work + offset + 1, "%[^']", parsed); } else { sscanf(work + offset, "%s", parsed); } printf("skipping language subsection for %s\n", parsed); continue; } if (strncasecmp(parsed, "default", 7) == 0) { if (work[offset] == '\0') { table->default_translation = NULL; continue; } if (work[offset] == '"') { sscanf(work + offset + 1, "%[^\"]", parsed); } else if (work[offset] == '\'') { sscanf(work + offset + 1, "%[^']", parsed); } else { sscanf(work + offset, "%s", parsed); } table->default_translation = wrapped_strdup(parsed); if (strlen(parsed) > check_max_data_length) { check_max_data_length = strlen(parsed); } continue; } continue; } /* * Inside state */ code = -1; ret = sscanf(work, "%i %n", &code, &offset); if (ret == 0 || code < 0 || offset < 0) { /* * Check for end */ ret = sscanf(work, " %s %n", parsed, &offset); if (ret > 0 && strncasecmp(parsed, "end", 5) == 0) { state = BASE_STATE; } continue; } if (work[offset] == '\0') { continue; } if (work[offset] == '"') { sscanf(work + offset + 1, "%[^\"]", parsed); } else if (work[offset] == '\'') { sscanf(work + offset + 1, "%[^']", parsed); } else { sscanf(work + offset, "%s", parsed); } ret = table_put(table, code, parsed); if (ret == -1) { fprintf(stderr, "Failed to add row 0x%04x \"%s\" to translation table\n", code, parsed); table_free(table); fclose(table_file); free(work); free(parsed); return NULL; } check = table_get(table, code); if (check == NULL || strcmp(parsed, check) != 0) { fprintf(stderr, "Failed to retrieve row 0x%04x \"%s\" in translation table\n", code, parsed); table_free(table); fclose(table_file); free(work); free(parsed); return NULL; } if (strlen(parsed) > check_max_data_length) { check_max_data_length = strlen(parsed); } if (code < last) { printf("warning - code 0x%04X appears after 0x%04X\n", code, last); } else if (code == last) { printf("warning - code 0x%04X appears twice in a row\n", code); } last = code; } if (table->max_data_length != check_max_data_length) { printf("warning - table max length (%d) doesn't match our calculation (%d)\n", table->max_data_length, check_max_data_length); } free(work); free(parsed); fclose(table_file); return table; } static table_t *check_table_again(char *filename, table_t *source) { FILE *table_file; char *work; int code; int offset; char *parsed; int err; int ret; int state; char *check; table_t *table; struct stat table_stat; err = stat(filename, &table_stat); if (err == -1) { return NULL; } table = table_resize(source, ((int) ceil(source->used / BUILTIN_TABLE_MULTIPLE) + 1) * BUILTIN_TABLE_MULTIPLE, 1); if (table == NULL) { return NULL; } table_file = fopen(filename, "r"); if (table_file == NULL) { fprintf(stderr, "Unable to open translation table: %s\n", strerror(errno)); return NULL; } work = wrapped_malloc(1024); parsed = wrapped_malloc(1024); state = BASE_STATE; while (fgets(work, 1024, table_file) != NULL) { if (*work == '#') { /* * Don't even bother */ continue; } parsed[0] = '\0'; if (state == BASE_STATE) { ret = sscanf(work, " %s %n", parsed, &offset); if (ret == 0) { continue; } if (strncasecmp(parsed, "start", 5) == 0) { if (work[offset] == '\0') { // All languages state = INSIDE_STATE; continue; } if (work[offset] == '"') { sscanf(work + offset + 1, "%[^\"]", parsed); } else if (work[offset] == '\'') { sscanf(work + offset + 1, "%[^']", parsed); } else { sscanf(work + offset, "%s", parsed); } printf("skipping language subsection for %s\n", parsed); continue; } if (strncasecmp(parsed, "default", 7) == 0) { continue; } continue; } /* * Inside state */ code = -1; ret = sscanf(work, "%i %n", &code, &offset); if (ret == 0 || code < 0 || offset < 0) { /* * Check for end */ ret = sscanf(work, " %s %n", parsed, &offset); if (ret > 0 && strncasecmp(parsed, "end", 5) == 0) { state = BASE_STATE; } continue; } if (work[offset] == '\0') { continue; } if (work[offset] == '"') { sscanf(work + offset + 1, "%[^\"]", parsed); } else if (work[offset] == '\'') { sscanf(work + offset + 1, "%[^']", parsed); } else { sscanf(work + offset, "%s", parsed); } check = table_get(table, code); if (check == NULL || strcmp(parsed, check) != 0) { fprintf(stderr, "Failed to retrieve row 0x%04x \"%s\" in resized translation table\n", code, parsed); table_free(table); fclose(table_file); free(work); free(parsed); return NULL; } } free(work); free(parsed); fclose(table_file); return table; } int main(int argc, char **argv) { table_t *table; table_t *new_table; int optcode; int builtin = 0; int dump = 0; int use_hash = 1; int verbose = 0; while ((optcode = getopt(argc, argv, "bdlhv")) != -1) { switch (optcode) { case 'b': // simulate a builtin table builtin = 1; break; case 'd': dump = 1; break; case 'h': printf("check-table [-bdlhv] filename.tbl\n"); return 0; case 'l': // linear use_hash = 0; break; case 'v': verbose = 1; break; default: fprintf(stderr, "unknown option: %c\n", optcode); return -1; } } if (optind < argc) { printf("checking: %s\n", argv[optind]); table = check_table(argv[optind], use_hash); if (builtin) { new_table = check_table_again(argv[optind], table); table_free(table); table = new_table; new_table = NULL; } if (table) { if (dump) { table_dump(table, verbose); } table_stats(table); table_free(table); } } else { printf("please specify a file to operate on\n"); return -1; } return 0; } detox-2.0.0/src/clean_string.c000066400000000000000000000176641460212773400162630ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include #include "clean_string.h" #include "wrapped.h" #include "table.h" #define ISO8859_1_UPPER_BIT 0x80 /** * Transliterates ISO 8859-1 characters (Latin-1) into lower ASCII characters. * * @param filename The filename to clean. * @param table_t The transliteration table to use. * * @return The updated filename or NULL if an error occurred. */ char *clean_iso8859_1(char *filename, table_t *table) { char *output, *input_walk, *output_walk, *replace_walk; int new_value; if (filename == NULL) { return NULL; } if (table == NULL) { fprintf(stderr, "internal error\n"); exit(EXIT_FAILURE); } output = wrapped_malloc((strlen(filename) * table->max_data_length) + 1); input_walk = filename; output_walk = output; while (*input_walk != '\0') { if ((*input_walk & ISO8859_1_UPPER_BIT) == 0) { *output_walk++ = *input_walk++; continue; } new_value = (unsigned char) input_walk[0]; replace_walk = table_get(table, new_value); if (replace_walk == NULL) { if (table->default_translation == NULL) { /* * Null translation == leave it alone */ *output_walk++ = *input_walk++; continue; } else { replace_walk = table->default_translation; } } while (*replace_walk != '\0') { *output_walk++ = *replace_walk++; } input_walk++; } *output_walk = '\0'; return output; } /** * Replaces difficult-to-work-with characters with underscores and dashes. * * @param filename The filename to clean. * @param table_t The replacement table to use. * * @return The updated filename or NULL if an error occurred. */ char *clean_safe(char *filename, table_t *table) { char *output, *input_walk, *output_walk, *replace_walk; if (filename == NULL) { return NULL; } if (table == NULL) { fprintf(stderr, "internal error\n"); exit(EXIT_FAILURE); } output = wrapped_malloc((strlen(filename) * table->max_data_length) + 1); input_walk = filename; output_walk = output; while (*input_walk != '\0') { replace_walk = table_get(table, *input_walk); if (replace_walk == NULL) { if (table->default_translation == NULL) { /* * Null translation == leave it alone */ *output_walk++ = *input_walk++; continue; } else { replace_walk = table->default_translation; } } while (*replace_walk != '\0') { *output_walk++ = *replace_walk++; } input_walk++; } *output_walk = '\0'; return output; } /** * Cleans up any CGI encoded characters, in the form "%" followed by 2 hex * digits. * * @param filename The filename to clean. * * @return The updated filename or NULL if an error occurred. */ char *clean_uncgi(char *filename) { char *output, *input_walk, *output_walk; char conv[3]; if (filename == NULL) { return NULL; } output = wrapped_malloc(strlen(filename) + 1); input_walk = filename; output_walk = output; while (*input_walk != '\0') { if (input_walk[0] == '%' && isxdigit((int) input_walk[1]) && isxdigit((int) input_walk[2])) { conv[0] = input_walk[1]; conv[1] = input_walk[2]; conv[2] = 0; *output_walk++ = (char) strtol(conv, NULL, 16); input_walk += 3; } else if (input_walk[0] == '+') { *output_walk++ = ' '; input_walk++; } else { *output_walk++ = *input_walk++; } } *output_walk = '\0'; return output; } /** * Reduces any series of underscores or dashes to a single character. The dash * takes precedence. * * If remove_trailing is set, then periods are added to the set of characters * to work on. The period then takes precedence, followed by the dash. * * If a hash character, underscore, or dash are present at the start of the * filename, they will be removed. * * @param filename The filename to clean. * @param remove_trailing Whether or not to include periods in the set of * characters to operate on. * * @return The updated filename or NULL if an error occurred. */ char *clean_wipeup(char *filename, int remove_trailing) { char *output, *input_walk, *output_walk; char *search, *seek, *current; if (filename == NULL) { return NULL; } /* remove any -, _, or # at beginning of string */ while (*filename == '-' || *filename == '_' || *filename == '#') { filename++; } output = wrapped_malloc(strlen(filename) + 1); search = wrapped_strdup(remove_trailing ? ".-_" : "-_"); input_walk = filename; output_walk = output; current = NULL; while (*input_walk != '\0') { seek = strchr(search, *input_walk); if (seek != NULL) { if (current == NULL || seek < current) { current = seek; } input_walk++; continue; } if (current != NULL) { *output_walk++ = current[0]; current = NULL; } *output_walk++ = *input_walk++; } if (current != NULL) { *output_walk++ = current[0]; } *output_walk = '\0'; free(search); return output; } /* * Trims a file down to specified length. */ char *clean_max_length(char *filename, size_t max_length) { char *extension; char *input_walk; char *output; size_t body_length; size_t extension_length; if (filename == NULL) { return NULL; } if (max_length <= 0) { max_length = 256; } // check to see if the file is smaller than the max length if (strlen(filename) <= max_length) { return wrapped_strdup(filename); } output = wrapped_malloc(max_length + 1); snprintf(output, max_length + 1, "%s", filename); // check to see if the file has no extension extension = input_walk = strrchr(filename, '.'); if (input_walk == NULL) { return output; } if (strlen(extension) == 1) { return output; } // look back 5 characters for a second extension while (--input_walk > filename) { if (extension - input_walk > 5) { break; } if (*input_walk == '.') { extension = input_walk; break; } } extension_length = strlen(extension); if (max_length <= extension_length) { fprintf(stderr, "warning: max_length %d is less than required file length for '%s'. giving up.\n", (int) max_length, filename); free(output); return wrapped_strdup(filename); } body_length = max_length - extension_length; snprintf(output + body_length, extension_length + 1, "%s", extension); return output; } /** * Converts all characters to lowercase. * * @param filename The filename to clean. * * @return The updated filename or NULL if an error occurred. */ char *clean_lower(char *filename) { char *output, *input_walk, *output_walk; if (filename == NULL) { return NULL; } output = wrapped_malloc(strlen(filename) + 1); input_walk = filename; output_walk = output; while (*input_walk != '\0') { if (isupper((int) *input_walk)) { *output_walk++ = tolower((int) *input_walk++); } else { *output_walk++ = *input_walk++; } } *output_walk = '\0'; return output; } detox-2.0.0/src/clean_string.h000066400000000000000000000012521460212773400162520ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __CLEAN_STRING_H #define __CLEAN_STRING_H #include "detox_struct.h" extern char *clean_iso8859_1(char *filename, table_t *table); extern char *clean_safe(char *filename, table_t *table); extern char *clean_uncgi(char *filename); extern char *clean_wipeup(char *filename, int remove_trailing); extern char *clean_max_length(char *filename, size_t max_length); extern char *clean_lower(char *filename); #endif /* __CLEAN_STRING_H */ detox-2.0.0/src/clean_utf_8.c000066400000000000000000000147751460212773400160020ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "config.h" #include #include #include #include "clean_utf_8.h" #include "table.h" #include "wrapped.h" #define UNICODE_MAX_VALUE 0x10FFFF static char *null_replacement = "_hidden_null_"; static char *invalid_replacement = "_"; #define UTF_8_ENCODED_MASK 0xC0 #define UTF_8_ENCODED_START 0xC0 #define UTF_8_ENCODED_CONT 0x80 #define UTF_8_ENCODED_6_BYTES_MASK 0xFE #define UTF_8_ENCODED_6_BYTES 0xFC #define UTF_8_ENCODED_5_BYTES_MASK 0xFC #define UTF_8_ENCODED_5_BYTES 0xF8 #define UTF_8_ENCODED_4_BYTES_MASK 0xF8 #define UTF_8_ENCODED_4_BYTES 0xF0 #define UTF_8_ENCODED_3_BYTES_MASK 0xF0 #define UTF_8_ENCODED_3_BYTES 0xE0 #define UTF_8_ENCODED_2_BYTES_MASK 0xE0 #define UTF_8_ENCODED_2_BYTES 0xC0 #define UPPER_BIT 0x80 #define check_width(chr, size) if ((chr & UTF_8_ENCODED_ ## size ## _BYTES_MASK) == UTF_8_ENCODED_ ## size ## _BYTES) { return size; } #define is_upper_bit_set(chr) ((chr & UPPER_BIT) == UPPER_BIT) #define unpack_cont(chr) ((unsigned char) chr & ~UTF_8_ENCODED_MASK) #define unpack_start(chr, size) ((unsigned char) chr & ~UTF_8_ENCODED_ ## size ## _BYTES_MASK) /** * Translates UTF-8 characters (Unicode Translation Format - 8 Bit) into * Unicode and then runs the transliteration table. * * @param filename The filename to clean. * @param table_t The transliteration table to use. * * @return The updated filename or NULL if an error occurred. */ char *clean_utf_8(char *filename, table_t *table) { char *output, *input_walk, *output_walk, *replace_walk; unsigned int new_value; int expected_chars; int characters_eaten; int utf_8_width; int failed; if (filename == NULL) { return NULL; } if (table == NULL) { fprintf(stderr, "internal error\n"); exit(EXIT_FAILURE); } output = wrapped_malloc((strlen(filename) * table->max_data_length) + 1); input_walk = filename; output_walk = output; while (*input_walk != '\0') { utf_8_width = get_utf_8_width(*input_walk); switch (utf_8_width) { case 1: // 0aaaaaaa new_value = (unsigned char) input_walk[0]; break; case 2: // 110aaaaa 10bbbbbb new_value = unpack_start(*input_walk, 2); break; case 3: // 1110aaaa 10bbbbbb 10cccccc new_value = unpack_start(*input_walk, 3); break; case 4: // 11110aaa 10bbbbbb 10cccccc 10dddddd new_value = unpack_start(*input_walk, 4); break; case 5: // 111110aa 10bbbbbb 10cccccc 10dddddd 10eeeeee new_value = unpack_start(*input_walk, 5); break; case 6: // 1111110a 10bbbbbb 10cccccc 10dddddd 10ffffff new_value = unpack_start(*input_walk, 6); break; default: // -1 fprintf(stderr, "detox: warning: invalid encoding\n"); *output_walk++ = invalid_replacement[0]; input_walk++; continue; } expected_chars = utf_8_width - 1; characters_eaten = utf_8_width; failed = 0; while (expected_chars > 0) { new_value <<= 6; input_walk++; if (*input_walk == '\0') { fprintf(stderr, "detox: warning: UTF-8 sequence ended unexpectedly (null)\n"); failed = 1; break; } if (!is_utf_8_cont(*input_walk)) { fprintf(stderr, "detox: warning: UTF-8 sequence ended unexpectedly (missing con't)\n"); failed = 1; break; } new_value += unpack_cont(*input_walk); expected_chars--; } if (failed) { *output_walk++ = invalid_replacement[0]; continue; } #ifdef DEBUG if (utf_8_width > 1) { fprintf(stderr, "detox: debug: found character 0x%04x, width: %d\n", new_value, utf_8_width); } #endif input_walk++; replace_walk = table_get(table, new_value); // // Never allow a NULL encoded into 2+ byte UTF-8 to persist. // if (replace_walk == NULL && new_value == 0) { fprintf(stderr, "detox: warning: UTF-8 encoded NULL encountered\n"); replace_walk = null_replacement; } // // If we're off the top, take off, nuke it from orbit. It's the only // way to be sure. // if (replace_walk == NULL && new_value > UNICODE_MAX_VALUE) { fprintf(stderr, "detox: warning: unicode character 0x%08X is invalid\n", new_value); replace_walk = invalid_replacement; } if (replace_walk == NULL) { replace_walk = table->default_translation; } if (replace_walk == NULL) { /* * Null translation == leave it alone */ input_walk -= characters_eaten; while (characters_eaten > 0) { *output_walk++ = *input_walk++; characters_eaten--; } continue; } while (*replace_walk != '\0') { *output_walk++ = *replace_walk++; } } *output_walk = '\0'; return output; } /** * Returns the width, in bytes, of a UTF-8 character, based on the start * character. * * @param c The character to examine. * * @return An integer between 1 and 6. If the character is invalid, -1 will be * returned. */ int get_utf_8_width(char c) { if (is_utf_8_start(c)) { check_width(c, 2); check_width(c, 3); check_width(c, 4); check_width(c, 5); check_width(c, 6); } if (is_upper_bit_set(c)) { return -1; } return 1; } /** * Returns true if the character is part of a UTF-8 sequence, but not the first * byte. * * @param c The character to examine. * * @return bool */ int is_utf_8_cont(char c) { return ((c & UTF_8_ENCODED_MASK) == UTF_8_ENCODED_CONT); } /** * Returns true if the character is the start of a UTF-8 sequence. * * @param c The character to examine. * * @return bool */ int is_utf_8_start(char c) { return ((c & UTF_8_ENCODED_MASK) == UTF_8_ENCODED_START); } detox-2.0.0/src/clean_utf_8.h000066400000000000000000000010311460212773400157640ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __CLEAN_UTF_8_H #define __CLEAN_UTF_8_H #include "detox_struct.h" #define UTF_8_MAX_LENGTH 6 extern char *clean_utf_8(char *filename, table_t *table); extern int get_utf_8_width(char c); extern int is_utf_8_cont(char c); extern int is_utf_8_start(char c); #endif /* __CLEAN_UTF_8_H */ detox-2.0.0/src/config.h.in000066400000000000000000000044721460212773400154630ustar00rootroot00000000000000/* src/config.h.in. Generated from configure.ac by autoheader. */ /* Enables verbose debugging in key points */ #undef DEBUG /* Define to 1 if you have the `getopt_long' function. */ #undef HAVE_GETOPT_LONG /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H /* Define if you have libcheck */ #undef HAVE_LIBCHECK /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H /* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H /* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H /* Define to 1 if you have the header file. */ #undef HAVE_STRING_H /* Define to 1 if `st_blocks' is a member of `struct stat'. */ #undef HAVE_STRUCT_STAT_ST_BLOCKS /* Define to 1 if your `struct stat' has `st_blocks'. Deprecated, use `HAVE_STRUCT_STAT_ST_BLOCKS' instead. */ #undef HAVE_ST_BLOCKS /* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H /* Name of package */ #undef PACKAGE /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT /* Define to the full name of this package. */ #undef PACKAGE_NAME /* Define to the full name and version of this package. */ #undef PACKAGE_STRING /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME /* Define to the home page for this package. */ #undef PACKAGE_URL /* Define to the version of this package. */ #undef PACKAGE_VERSION /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS /* Define if you want to support coverage tests */ #undef SUPPORT_COVERAGE /* Version number of package */ #undef VERSION /* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a `char[]'. */ #undef YYTEXT_POINTER /* Enable large inode numbers on Mac OS X 10.5. */ #ifndef _DARWIN_USE_64_BIT_INODE # define _DARWIN_USE_64_BIT_INODE 1 #endif /* Number of bits in a file offset, on hosts where this is settable. */ #undef _FILE_OFFSET_BITS /* Define for large files, on AIX-style hosts. */ #undef _LARGE_FILES detox-2.0.0/src/config_file.c000066400000000000000000000051741460212773400160500ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "detox_struct.h" #include "config_file.h" #include "config_file_spoof.h" #include "wrapped.h" config_file_t *config_file_init(void) { config_file_t *ret; ret = wrapped_malloc(sizeof(config_file_t)); memset(ret, 0, sizeof(config_file_t)); return ret; } /** * Loads a config file, any config file. * * @param options_t *main_options The options from the command line. * * @return config_file_t */ config_file_t *config_file_load(options_t *main_options) { config_file_t *config_file = NULL; char *check_config_file = NULL; char *file_work = NULL; int err; if (main_options->check_config_file) { check_config_file = wrapped_strdup(main_options->check_config_file); } if (check_config_file != NULL) { config_file = parse_config_file(check_config_file, NULL, main_options); if (config_file == NULL) { fprintf(stderr, "detox: unable to open: %s\n", check_config_file); exit(EXIT_FAILURE); } } else { check_config_file = wrapped_malloc(MAX_PATH_LEN); #ifdef SYSCONFDIR err = snprintf(check_config_file, MAX_PATH_LEN, "%s/detoxrc", SYSCONFDIR); if (err < MAX_PATH_LEN) { config_file = parse_config_file(check_config_file, NULL, main_options); } #endif if (config_file == NULL) { config_file = parse_config_file("/etc/detoxrc", NULL, main_options); } if (config_file == NULL) { config_file = parse_config_file("/usr/local/etc/detoxrc", NULL, main_options); } file_work = getenv("HOME"); if (file_work != NULL) { err = snprintf(check_config_file, MAX_PATH_LEN, "%s/.detoxrc", file_work); if (err < MAX_PATH_LEN) { config_file = parse_config_file(check_config_file, config_file, main_options); } } file_work = getenv("XDG_CONFIG_HOME"); if (file_work != NULL) { err = snprintf(check_config_file, MAX_PATH_LEN, "%s/detox/detoxrc", file_work); if (err < MAX_PATH_LEN) { config_file = parse_config_file(check_config_file, config_file, main_options); } } if (config_file == NULL) { config_file = spoof_config_file(); } free(check_config_file); } return config_file; } detox-2.0.0/src/config_file.h000066400000000000000000000011321460212773400160430ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __CONFIG_FILE_H #define __CONFIG_FILE_H #include "detox_struct.h" extern int config_file_lineno; extern config_file_t *config_file_init(void); extern config_file_t *config_file_load(options_t *main_options); extern config_file_t *parse_config_file(char *filename, config_file_t *previous_list, options_t *main_options); #endif /* __CONFIG_FILE_H */ detox-2.0.0/src/config_file_dump.c000066400000000000000000000053211460212773400170670ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include "detox_struct.h" #include "config_file_dump.h" #include "filelist.h" void dump_config_file(config_file_t *config_file, options_t *main_options) { sequence_t *sequence = NULL; filter_t *filter = NULL; char *file_walk; int count = 0; if (!main_options->verbose) { printf("available sequences:\n"); } sequence = config_file->sequences; while (sequence != NULL) { if (main_options->verbose) { if (count++ > 0) { printf("\n"); } printf("sequence name: "); } else { printf("\t"); } printf("%s%s\n", sequence->name, (main_options->sequence_to_use == sequence) ? " (*)" : ""); if (main_options->verbose) { printf("\tsource file: %s\n", sequence->source_filename); filter = sequence->filters; while (filter != NULL) { if (filter->cleaner == FILTER_UNCGI) { printf("\tcleaner: uncgi\n"); } else if (filter->cleaner == FILTER_SAFE) { printf("\tcleaner: safe\n"); } else if (filter->cleaner == FILTER_WIPEUP) { printf("\tcleaner: wipeup\n"); printf("\t\tremove trailing: %s\n", filter->remove_trailing ? "yes" : "no"); } else if (filter->cleaner == FILTER_ISO8859_1) { printf("\tcleaner: iso8859_1\n"); } else if (filter->cleaner == FILTER_UTF_8) { printf("\tcleaner: utf_8\n"); } else if (filter->cleaner == FILTER_MAX_LENGTH) { printf("\tcleaner: max length\n"); printf("\t\tlength: %d\n", (unsigned int) filter->max_length); } else if (filter->cleaner == FILTER_LOWER) { printf("\tcleaner: lower\n"); } if (filter->builtin != NULL) { printf("\t\tbuiltin table: %s\n", filter->builtin); } else if (filter->filename != NULL) { printf("\t\ttranslation table: %s\n", filter->filename); } filter = filter->next; } } sequence = sequence->next; } if (filelist_count(config_file->files_to_ignore) > 0) { printf("\nfiles to ignore:\n"); while ((file_walk = filelist_get(config_file->files_to_ignore))) { printf("\t%s\n", file_walk); } } } detox-2.0.0/src/config_file_dump.h000066400000000000000000000006741460212773400171020ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __CONFIG_FILE_DUMP_H #define __CONFIG_FILE_DUMP_H #include "detox_struct.h" extern void dump_config_file(config_file_t *config_file, options_t *main_options); #endif /* __CONFIG_FILE_DUMP_H */ detox-2.0.0/src/config_file_lex.c000066400000000000000000001466461460212773400167320ustar00rootroot00000000000000 #line 3 "config_file_lex.c" #define YY_INT_ALIGNED short int /* A lexical scanner generated by flex */ #define FLEX_SCANNER #define YY_FLEX_MAJOR_VERSION 2 #define YY_FLEX_MINOR_VERSION 6 #define YY_FLEX_SUBMINOR_VERSION 4 #if YY_FLEX_SUBMINOR_VERSION > 0 #define FLEX_BETA #endif /* First, we deal with platform-specific or compiler-specific issues. */ /* begin standard C headers. */ #include #include #include #include /* end standard C headers. */ /* flex integer type definitions */ #ifndef FLEXINT_H #define FLEXINT_H /* C99 systems have . Non-C99 systems may or may not. */ #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, * if you want the limit (max/min) macros for int types. */ #ifndef __STDC_LIMIT_MACROS #define __STDC_LIMIT_MACROS 1 #endif #include typedef int8_t flex_int8_t; typedef uint8_t flex_uint8_t; typedef int16_t flex_int16_t; typedef uint16_t flex_uint16_t; typedef int32_t flex_int32_t; typedef uint32_t flex_uint32_t; #else typedef signed char flex_int8_t; typedef short int flex_int16_t; typedef int flex_int32_t; typedef unsigned char flex_uint8_t; typedef unsigned short int flex_uint16_t; typedef unsigned int flex_uint32_t; /* Limits of integral types. */ #ifndef INT8_MIN #define INT8_MIN (-128) #endif #ifndef INT16_MIN #define INT16_MIN (-32767-1) #endif #ifndef INT32_MIN #define INT32_MIN (-2147483647-1) #endif #ifndef INT8_MAX #define INT8_MAX (127) #endif #ifndef INT16_MAX #define INT16_MAX (32767) #endif #ifndef INT32_MAX #define INT32_MAX (2147483647) #endif #ifndef UINT8_MAX #define UINT8_MAX (255U) #endif #ifndef UINT16_MAX #define UINT16_MAX (65535U) #endif #ifndef UINT32_MAX #define UINT32_MAX (4294967295U) #endif #ifndef SIZE_MAX #define SIZE_MAX (~(size_t)0) #endif #endif /* ! C99 */ #endif /* ! FLEXINT_H */ /* begin standard C++ headers. */ /* TODO: this is always defined, so inline it */ #define yyconst const #if defined(__GNUC__) && __GNUC__ >= 3 #define yynoreturn __attribute__((__noreturn__)) #else #define yynoreturn #endif /* Returned upon end-of-file. */ #define YY_NULL 0 /* Promotes a possibly negative, possibly signed char to an * integer in range [0..255] for use as an array index. */ #define YY_SC_TO_UI(c) ((YY_CHAR) (c)) /* Enter a start condition. This macro really ought to take a parameter, * but we do it the disgusting crufty way forced on us by the ()-less * definition of BEGIN. */ #define BEGIN (yy_start) = 1 + 2 * /* Translate the current start state into a value that can be later handed * to BEGIN to return to the state. The YYSTATE alias is for lex * compatibility. */ #define YY_START (((yy_start) - 1) / 2) #define YYSTATE YY_START /* Action number for EOF rule of a given start state. */ #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) /* Special action meaning "start processing a new file". */ #define YY_NEW_FILE yyrestart( yyin ) #define YY_END_OF_BUFFER_CHAR 0 /* Size of default input buffer. */ #ifndef YY_BUF_SIZE #ifdef __ia64__ /* On IA-64, the buffer size is 16k, not 8k. * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. * Ditto for the __ia64__ case accordingly. */ #define YY_BUF_SIZE 32768 #else #define YY_BUF_SIZE 16384 #endif /* __ia64__ */ #endif /* The state buf must be large enough to hold one state per character in the main buffer. */ #define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) #ifndef YY_TYPEDEF_YY_BUFFER_STATE #define YY_TYPEDEF_YY_BUFFER_STATE typedef struct yy_buffer_state *YY_BUFFER_STATE; #endif #ifndef YY_TYPEDEF_YY_SIZE_T #define YY_TYPEDEF_YY_SIZE_T typedef size_t yy_size_t; #endif extern int yyleng; extern FILE *yyin, *yyout; #define EOB_ACT_CONTINUE_SCAN 0 #define EOB_ACT_END_OF_FILE 1 #define EOB_ACT_LAST_MATCH 2 #define YY_LESS_LINENO(n) #define YY_LINENO_REWIND_TO(ptr) /* Return all but the first "n" matched characters back to the input stream. */ #define yyless(n) \ do \ { \ /* Undo effects of setting up yytext. */ \ int yyless_macro_arg = (n); \ YY_LESS_LINENO(yyless_macro_arg);\ *yy_cp = (yy_hold_char); \ YY_RESTORE_YY_MORE_OFFSET \ (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ YY_DO_BEFORE_ACTION; /* set up yytext again */ \ } \ while ( 0 ) #define unput(c) yyunput( c, (yytext_ptr) ) #ifndef YY_STRUCT_YY_BUFFER_STATE #define YY_STRUCT_YY_BUFFER_STATE struct yy_buffer_state { FILE *yy_input_file; char *yy_ch_buf; /* input buffer */ char *yy_buf_pos; /* current position in input buffer */ /* Size of input buffer in bytes, not including room for EOB * characters. */ int yy_buf_size; /* Number of characters read into yy_ch_buf, not including EOB * characters. */ int yy_n_chars; /* Whether we "own" the buffer - i.e., we know we created it, * and can realloc() it to grow it, and should free() it to * delete it. */ int yy_is_our_buffer; /* Whether this is an "interactive" input source; if so, and * if we're using stdio for input, then we want to use getc() * instead of fread(), to make sure we stop fetching input after * each newline. */ int yy_is_interactive; /* Whether we're considered to be at the beginning of a line. * If so, '^' rules will be active on the next match, otherwise * not. */ int yy_at_bol; int yy_bs_lineno; /**< The line count. */ int yy_bs_column; /**< The column count. */ /* Whether to try to fill the input buffer when we reach the * end of it. */ int yy_fill_buffer; int yy_buffer_status; #define YY_BUFFER_NEW 0 #define YY_BUFFER_NORMAL 1 /* When an EOF's been seen but there's still some text to process * then we mark the buffer as YY_EOF_PENDING, to indicate that we * shouldn't try reading from the input source any more. We might * still have a bunch of tokens to match, though, because of * possible backing-up. * * When we actually see the EOF, we change the status to "new" * (via yyrestart()), so that the user can continue scanning by * just pointing yyin at a new input file. */ #define YY_BUFFER_EOF_PENDING 2 }; #endif /* !YY_STRUCT_YY_BUFFER_STATE */ /* Stack of input buffers. */ static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */ static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */ static YY_BUFFER_STATE * yy_buffer_stack = NULL; /**< Stack as an array. */ /* We provide macros for accessing buffer states in case in the * future we want to put the buffer states in a more general * "scanner state". * * Returns the top of the stack, or NULL. */ #define YY_CURRENT_BUFFER ( (yy_buffer_stack) \ ? (yy_buffer_stack)[(yy_buffer_stack_top)] \ : NULL) /* Same as previous macro, but useful when we know that the buffer stack is not * NULL or when we need an lvalue. For internal use only. */ #define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)] /* yy_hold_char holds the character lost when yytext is formed. */ static char yy_hold_char; static int yy_n_chars; /* number of characters read into yy_ch_buf */ int yyleng; /* Points to current character in buffer. */ static char *yy_c_buf_p = NULL; static int yy_init = 0; /* whether we need to initialize */ static int yy_start = 0; /* start state number */ /* Flag which is used to allow yywrap()'s to do buffer switches * instead of setting up a fresh yyin. A bit of a hack ... */ static int yy_did_buffer_switch_on_eof; void yyrestart ( FILE *input_file ); void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer ); YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size ); void yy_delete_buffer ( YY_BUFFER_STATE b ); void yy_flush_buffer ( YY_BUFFER_STATE b ); void yypush_buffer_state ( YY_BUFFER_STATE new_buffer ); void yypop_buffer_state ( void ); static void yyensure_buffer_stack ( void ); static void yy_load_buffer_state ( void ); static void yy_init_buffer ( YY_BUFFER_STATE b, FILE *file ); #define YY_FLUSH_BUFFER yy_flush_buffer( YY_CURRENT_BUFFER ) YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size ); YY_BUFFER_STATE yy_scan_string ( const char *yy_str ); YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, int len ); void *yyalloc ( yy_size_t ); void *yyrealloc ( void *, yy_size_t ); void yyfree ( void * ); #define yy_new_buffer yy_create_buffer #define yy_set_interactive(is_interactive) \ { \ if ( ! YY_CURRENT_BUFFER ){ \ yyensure_buffer_stack (); \ YY_CURRENT_BUFFER_LVALUE = \ yy_create_buffer( yyin, YY_BUF_SIZE ); \ } \ YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ } #define yy_set_bol(at_bol) \ { \ if ( ! YY_CURRENT_BUFFER ){\ yyensure_buffer_stack (); \ YY_CURRENT_BUFFER_LVALUE = \ yy_create_buffer( yyin, YY_BUF_SIZE ); \ } \ YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ } #define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) /* Begin user sect3 */ #define yywrap() (/*CONSTCOND*/1) #define YY_SKIP_YYWRAP typedef flex_uint8_t YY_CHAR; FILE *yyin = NULL, *yyout = NULL; typedef int yy_state_type; extern int yylineno; int yylineno = 1; extern char *yytext; #ifdef yytext_ptr #undef yytext_ptr #endif #define yytext_ptr yytext static yy_state_type yy_get_previous_state ( void ); static yy_state_type yy_try_NUL_trans ( yy_state_type current_state ); static int yy_get_next_buffer ( void ); static void yynoreturn yy_fatal_error ( const char* msg ); /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. */ #define YY_DO_BEFORE_ACTION \ (yytext_ptr) = yy_bp; \ yyleng = (int) (yy_cp - yy_bp); \ (yy_hold_char) = *yy_cp; \ *yy_cp = '\0'; \ (yy_c_buf_p) = yy_cp; #define YY_NUM_RULES 23 #define YY_END_OF_BUFFER 24 /* This struct is not used in this scanner, but its presence is necessary. */ struct yy_trans_info { flex_int32_t yy_verify; flex_int32_t yy_nxt; }; static const flex_int16_t yy_accept[111] = { 0, 0, 0, 24, 23, 1, 3, 23, 2, 21, 22, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 17, 18, 1, 0, 19, 2, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 9, 20, 20, 20, 14, 15, 20, 20, 20, 6, 20, 8, 20, 20, 20, 16, 4, 20, 20, 20, 20, 20, 5, 20, 20, 20, 13, 7, 20, 20, 10, 20, 20, 20, 20, 20, 11, 0 } ; static const YY_CHAR yy_ec[256] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 7, 6, 6, 6, 8, 6, 6, 9, 10, 1, 11, 1, 1, 1, 1, 1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 15, 15, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 15, 15, 1, 1, 1, 1, 34, 1, 35, 36, 37, 15, 38, 39, 40, 41, 42, 15, 15, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 15, 15, 56, 1, 57, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } ; static const YY_CHAR yy_meta[58] = { 0, 1, 1, 2, 1, 1, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1 } ; static const flex_int16_t yy_base[114] = { 0, 0, 0, 225, 234, 221, 234, 55, 0, 54, 234, 0, 35, 46, 49, 52, 57, 55, 62, 50, 52, 234, 234, 218, 77, 234, 0, 76, 0, 71, 75, 79, 79, 81, 73, 73, 85, 91, 83, 96, 95, 89, 94, 100, 93, 209, 101, 104, 177, 97, 106, 93, 114, 176, 119, 111, 123, 120, 200, 119, 122, 129, 120, 0, 136, 133, 199, 125, 136, 146, 144, 199, 142, 0, 147, 148, 142, 0, 0, 141, 144, 154, 0, 187, 0, 156, 125, 166, 0, 0, 168, 77, 172, 162, 176, 0, 68, 164, 167, 0, 0, 176, 184, 0, 179, 179, 182, 181, 187, 0, 234, 227, 230, 67 } ; static const flex_int16_t yy_def[114] = { 0, 110, 1, 110, 110, 110, 110, 111, 112, 110, 110, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 110, 110, 110, 111, 110, 112, 110, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 0, 110, 110, 110 } ; static const flex_int16_t yy_nxt[292] = { 0, 4, 5, 6, 7, 8, 9, 9, 9, 9, 9, 10, 11, 12, 11, 11, 11, 13, 11, 11, 14, 15, 16, 11, 11, 11, 11, 17, 18, 11, 19, 11, 20, 11, 4, 11, 12, 11, 11, 13, 11, 11, 14, 15, 16, 11, 11, 11, 11, 17, 18, 11, 19, 11, 20, 11, 21, 22, 25, 25, 27, 27, 27, 27, 27, 29, 30, 31, 33, 35, 28, 36, 41, 39, 37, 100, 34, 32, 38, 40, 25, 25, 27, 27, 27, 27, 27, 29, 30, 31, 33, 42, 35, 36, 41, 39, 43, 37, 34, 32, 38, 40, 44, 45, 46, 47, 48, 49, 50, 51, 52, 96, 53, 42, 54, 55, 56, 57, 43, 59, 60, 62, 63, 64, 44, 45, 46, 47, 48, 49, 50, 51, 65, 52, 53, 67, 54, 55, 56, 57, 68, 59, 60, 62, 63, 64, 69, 70, 72, 73, 74, 75, 76, 77, 65, 79, 80, 67, 81, 93, 82, 84, 68, 85, 86, 87, 88, 89, 69, 70, 72, 73, 74, 75, 76, 77, 90, 79, 80, 92, 94, 81, 82, 84, 95, 85, 86, 87, 88, 89, 97, 98, 99, 101, 102, 103, 104, 91, 90, 105, 106, 92, 107, 94, 108, 109, 95, 83, 78, 71, 66, 61, 97, 98, 99, 101, 102, 103, 58, 104, 23, 105, 106, 23, 107, 110, 108, 109, 24, 24, 24, 26, 110, 26, 3, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110 } ; static const flex_int16_t yy_chk[292] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 7, 9, 9, 9, 9, 9, 12, 13, 14, 15, 16, 113, 17, 20, 19, 18, 96, 15, 14, 18, 19, 24, 24, 27, 27, 27, 27, 27, 12, 13, 14, 15, 29, 16, 17, 20, 19, 30, 18, 15, 14, 18, 19, 31, 32, 33, 34, 35, 36, 37, 38, 39, 91, 40, 29, 41, 42, 43, 44, 30, 46, 47, 49, 50, 51, 31, 32, 33, 34, 35, 36, 37, 38, 52, 39, 40, 54, 41, 42, 43, 44, 55, 46, 47, 49, 50, 51, 56, 57, 59, 60, 61, 62, 64, 65, 52, 67, 68, 54, 69, 86, 70, 72, 55, 74, 75, 76, 79, 80, 56, 57, 59, 60, 61, 62, 64, 65, 81, 67, 68, 85, 87, 69, 70, 72, 90, 74, 75, 76, 79, 80, 92, 93, 94, 97, 98, 101, 102, 83, 81, 104, 105, 85, 106, 87, 107, 108, 90, 71, 66, 58, 53, 48, 92, 93, 94, 97, 98, 101, 45, 102, 23, 104, 105, 5, 106, 3, 107, 108, 111, 111, 111, 112, 0, 112, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110 } ; static yy_state_type yy_last_accepting_state; static char *yy_last_accepting_cpos; extern int yy_flex_debug; int yy_flex_debug = 0; /* The intent behind this definition is that it'll catch * any uses of REJECT which flex missed. */ #define REJECT reject_used_but_not_detected #define yymore() yymore_used_but_not_detected #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET char *yytext; #line 1 "config_file_lex.l" #line 2 "config_file_lex.l" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "config_file_yacc.h" #include "config_file.h" int config_file_lineno = 1; #line 563 "config_file_lex.c" #line 564 "config_file_lex.c" #define INITIAL 0 #ifndef YY_NO_UNISTD_H /* Special case for "unistd.h", since it is non-ANSI. We include it way * down here because we want the user's section 1 to have been scanned first. * The user has a chance to override it with an option. */ #include #endif #ifndef YY_EXTRA_TYPE #define YY_EXTRA_TYPE void * #endif static int yy_init_globals ( void ); /* Accessor methods to globals. These are made visible to non-reentrant scanners for convenience. */ int yylex_destroy ( void ); int yyget_debug ( void ); void yyset_debug ( int debug_flag ); YY_EXTRA_TYPE yyget_extra ( void ); void yyset_extra ( YY_EXTRA_TYPE user_defined ); FILE *yyget_in ( void ); void yyset_in ( FILE * _in_str ); FILE *yyget_out ( void ); void yyset_out ( FILE * _out_str ); int yyget_leng ( void ); char *yyget_text ( void ); int yyget_lineno ( void ); void yyset_lineno ( int _line_number ); /* Macros after this point can all be overridden by user definitions in * section 1. */ #ifndef YY_SKIP_YYWRAP #ifdef __cplusplus extern "C" int yywrap ( void ); #else extern int yywrap ( void ); #endif #endif #ifndef YY_NO_UNPUT static void yyunput ( int c, char *buf_ptr ); #endif #ifndef yytext_ptr static void yy_flex_strncpy ( char *, const char *, int ); #endif #ifdef YY_NEED_STRLEN static int yy_flex_strlen ( const char * ); #endif #ifndef YY_NO_INPUT #ifdef __cplusplus static int yyinput ( void ); #else static int input ( void ); #endif #endif /* Amount of stuff to slurp up with each read. */ #ifndef YY_READ_BUF_SIZE #ifdef __ia64__ /* On IA-64, the buffer size is 16k, not 8k */ #define YY_READ_BUF_SIZE 16384 #else #define YY_READ_BUF_SIZE 8192 #endif /* __ia64__ */ #endif /* Copy whatever the last rule matched to the standard output. */ #ifndef ECHO /* This used to be an fputs(), but since the string might contain NUL's, * we now use fwrite(). */ #define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0) #endif /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, * is returned in "result". */ #ifndef YY_INPUT #define YY_INPUT(buf,result,max_size) \ if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ { \ int c = '*'; \ int n; \ for ( n = 0; n < max_size && \ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ buf[n] = (char) c; \ if ( c == '\n' ) \ buf[n++] = (char) c; \ if ( c == EOF && ferror( yyin ) ) \ YY_FATAL_ERROR( "input in flex scanner failed" ); \ result = n; \ } \ else \ { \ errno=0; \ while ( (result = (int) fread(buf, 1, (yy_size_t) max_size, yyin)) == 0 && ferror(yyin)) \ { \ if( errno != EINTR) \ { \ YY_FATAL_ERROR( "input in flex scanner failed" ); \ break; \ } \ errno=0; \ clearerr(yyin); \ } \ }\ \ #endif /* No semi-colon after return; correct usage is to write "yyterminate();" - * we don't want an extra ';' after the "return" because that will cause * some compilers to complain about unreachable statements. */ #ifndef yyterminate #define yyterminate() return YY_NULL #endif /* Number of entries by which start-condition stack grows. */ #ifndef YY_START_STACK_INCR #define YY_START_STACK_INCR 25 #endif /* Report a fatal error. */ #ifndef YY_FATAL_ERROR #define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) #endif /* end tables serialization structures and prototypes */ /* Default declaration of generated scanner - a define so the user can * easily add parameters. */ #ifndef YY_DECL #define YY_DECL_IS_OURS 1 extern int yylex (void); #define YY_DECL int yylex (void) #endif /* !YY_DECL */ /* Code executed at the beginning of each rule, after yytext and yyleng * have been set up. */ #ifndef YY_USER_ACTION #define YY_USER_ACTION #endif /* Code executed at the end of each rule. */ #ifndef YY_BREAK #define YY_BREAK /*LINTED*/break; #endif #define YY_RULE_SETUP \ YY_USER_ACTION /** The main scanner function which does all the work. */ YY_DECL { yy_state_type yy_current_state; char *yy_cp, *yy_bp; int yy_act; if ( !(yy_init) ) { (yy_init) = 1; #ifdef YY_USER_INIT YY_USER_INIT; #endif if ( ! (yy_start) ) (yy_start) = 1; /* first start state */ if ( ! yyin ) yyin = stdin; if ( ! yyout ) yyout = stdout; if ( ! YY_CURRENT_BUFFER ) { yyensure_buffer_stack (); YY_CURRENT_BUFFER_LVALUE = yy_create_buffer( yyin, YY_BUF_SIZE ); } yy_load_buffer_state( ); } { #line 20 "config_file_lex.l" #line 784 "config_file_lex.c" while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ { yy_cp = (yy_c_buf_p); /* Support of yytext. */ *yy_cp = (yy_hold_char); /* yy_bp points to the position in yy_ch_buf of the start of * the current run. */ yy_bp = yy_cp; yy_current_state = (yy_start); yy_match: do { YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ; if ( yy_accept[yy_current_state] ) { (yy_last_accepting_state) = yy_current_state; (yy_last_accepting_cpos) = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 111 ) yy_c = yy_meta[yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; ++yy_cp; } while ( yy_base[yy_current_state] != 234 ); yy_find_action: yy_act = yy_accept[yy_current_state]; if ( yy_act == 0 ) { /* have to back up */ yy_cp = (yy_last_accepting_cpos); yy_current_state = (yy_last_accepting_state); yy_act = yy_accept[yy_current_state]; } YY_DO_BEFORE_ACTION; do_action: /* This label is used only to access EOF actions. */ switch ( yy_act ) { /* beginning of action switch */ case 0: /* must back up */ /* undo the effects of YY_DO_BEFORE_ACTION */ *yy_cp = (yy_hold_char); yy_cp = (yy_last_accepting_cpos); yy_current_state = (yy_last_accepting_state); goto yy_find_action; case 1: YY_RULE_SETUP #line 22 "config_file_lex.l" { /* ignore whitespace */ } YY_BREAK case 2: YY_RULE_SETUP #line 24 "config_file_lex.l" { /* ignore comments */ } YY_BREAK case 3: /* rule 3 can match eol */ YY_RULE_SETUP #line 26 "config_file_lex.l" { config_file_lineno++; } YY_BREAK case 4: YY_RULE_SETUP #line 28 "config_file_lex.l" { return BUILTIN; } YY_BREAK case 5: YY_RULE_SETUP #line 29 "config_file_lex.l" { return FILENAME; } YY_BREAK case 6: YY_RULE_SETUP #line 30 "config_file_lex.l" { return IGNORE; } YY_BREAK case 7: YY_RULE_SETUP #line 31 "config_file_lex.l" { return ISO8859_1; } YY_BREAK case 8: YY_RULE_SETUP #line 32 "config_file_lex.l" { return LENGTH; } YY_BREAK case 9: YY_RULE_SETUP #line 33 "config_file_lex.l" { return LOWER; } YY_BREAK case 10: YY_RULE_SETUP #line 34 "config_file_lex.l" { return MAX_LENGTH; } YY_BREAK case 11: YY_RULE_SETUP #line 35 "config_file_lex.l" { return REMOVE_TRAILING; } YY_BREAK case 12: YY_RULE_SETUP #line 36 "config_file_lex.l" { return SAFE; } YY_BREAK case 13: YY_RULE_SETUP #line 37 "config_file_lex.l" { return SEQUENCE; } YY_BREAK case 14: YY_RULE_SETUP #line 38 "config_file_lex.l" { return UNCGI; } YY_BREAK case 15: YY_RULE_SETUP #line 39 "config_file_lex.l" { return UTF_8; } YY_BREAK case 16: YY_RULE_SETUP #line 40 "config_file_lex.l" { return WIPEUP; } YY_BREAK case 17: YY_RULE_SETUP #line 42 "config_file_lex.l" { return OPEN; } YY_BREAK case 18: YY_RULE_SETUP #line 43 "config_file_lex.l" { return CLOSE; } YY_BREAK case 19: /* rule 19 can match eol */ YY_RULE_SETUP #line 45 "config_file_lex.l" { yylval.string = strdup(yytext+1); if (yylval.string[yyleng-2] != '"') { printf("Unterminated character string\n"); } else { yylval.string[yyleng-2] = '\0'; } return QSTRING; } YY_BREAK case 20: YY_RULE_SETUP #line 56 "config_file_lex.l" { yylval.string = strdup(yytext); return ID; } YY_BREAK case 21: YY_RULE_SETUP #line 61 "config_file_lex.l" { yylval.nvalue = atoi(yytext); return NVALUE; } YY_BREAK case 22: YY_RULE_SETUP #line 66 "config_file_lex.l" { return EOL; } YY_BREAK case 23: YY_RULE_SETUP #line 68 "config_file_lex.l" ECHO; YY_BREAK #line 973 "config_file_lex.c" case YY_STATE_EOF(INITIAL): yyterminate(); case YY_END_OF_BUFFER: { /* Amount of text matched not including the EOB char. */ int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1; /* Undo the effects of YY_DO_BEFORE_ACTION. */ *yy_cp = (yy_hold_char); YY_RESTORE_YY_MORE_OFFSET if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) { /* We're scanning a new file or input source. It's * possible that this happened because the user * just pointed yyin at a new source and called * yylex(). If so, then we have to assure * consistency between YY_CURRENT_BUFFER and our * globals. Here is the right place to do so, because * this is the first action (other than possibly a * back-up) that will match for the new input source. */ (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; } /* Note that here we test for yy_c_buf_p "<=" to the position * of the first EOB in the buffer, since yy_c_buf_p will * already have been incremented past the NUL character * (since all states make transitions on EOB to the * end-of-buffer state). Contrast this with the test * in input(). */ if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) { /* This was really a NUL. */ yy_state_type yy_next_state; (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; yy_current_state = yy_get_previous_state( ); /* Okay, we're now positioned to make the NUL * transition. We couldn't have * yy_get_previous_state() go ahead and do it * for us because it doesn't know how to deal * with the possibility of jamming (and we don't * want to build jamming into it because then it * will run more slowly). */ yy_next_state = yy_try_NUL_trans( yy_current_state ); yy_bp = (yytext_ptr) + YY_MORE_ADJ; if ( yy_next_state ) { /* Consume the NUL. */ yy_cp = ++(yy_c_buf_p); yy_current_state = yy_next_state; goto yy_match; } else { yy_cp = (yy_c_buf_p); goto yy_find_action; } } else switch ( yy_get_next_buffer( ) ) { case EOB_ACT_END_OF_FILE: { (yy_did_buffer_switch_on_eof) = 0; if ( yywrap( ) ) { /* Note: because we've taken care in * yy_get_next_buffer() to have set up * yytext, we can now set up * yy_c_buf_p so that if some total * hoser (like flex itself) wants to * call the scanner after we return the * YY_NULL, it'll still work - another * YY_NULL will get returned. */ (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ; yy_act = YY_STATE_EOF(YY_START); goto do_action; } else { if ( ! (yy_did_buffer_switch_on_eof) ) YY_NEW_FILE; } break; } case EOB_ACT_CONTINUE_SCAN: (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; yy_current_state = yy_get_previous_state( ); yy_cp = (yy_c_buf_p); yy_bp = (yytext_ptr) + YY_MORE_ADJ; goto yy_match; case EOB_ACT_LAST_MATCH: (yy_c_buf_p) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)]; yy_current_state = yy_get_previous_state( ); yy_cp = (yy_c_buf_p); yy_bp = (yytext_ptr) + YY_MORE_ADJ; goto yy_find_action; } break; } default: YY_FATAL_ERROR( "fatal flex scanner internal error--no action found" ); } /* end of action switch */ } /* end of scanning one token */ } /* end of user's declarations */ } /* end of yylex */ /* yy_get_next_buffer - try to read in a new buffer * * Returns a code representing an action: * EOB_ACT_LAST_MATCH - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position * EOB_ACT_END_OF_FILE - end of file */ static int yy_get_next_buffer (void) { char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; char *source = (yytext_ptr); int number_to_move, i; int ret_val; if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] ) YY_FATAL_ERROR( "fatal flex scanner internal error--end of buffer missed" ); if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) { /* Don't try to fill the buffer, so this is an EOF. */ if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 ) { /* We matched a single character, the EOB, so * treat this as a final EOF. */ return EOB_ACT_END_OF_FILE; } else { /* We matched some text prior to the EOB, first * process it. */ return EOB_ACT_LAST_MATCH; } } /* Try to read more data. */ /* First move last chars to start of buffer. */ number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr) - 1); for ( i = 0; i < number_to_move; ++i ) *(dest++) = *(source++); if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) /* don't do the read, it's not guaranteed to return an EOF, * just force an EOF */ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0; else { int num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; while ( num_to_read <= 0 ) { /* Not enough room in the buffer - grow it. */ /* just a shorter name for the current buffer */ YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; int yy_c_buf_p_offset = (int) ((yy_c_buf_p) - b->yy_ch_buf); if ( b->yy_is_our_buffer ) { int new_size = b->yy_buf_size * 2; if ( new_size <= 0 ) b->yy_buf_size += b->yy_buf_size / 8; else b->yy_buf_size *= 2; b->yy_ch_buf = (char *) /* Include room in for 2 EOB chars. */ yyrealloc( (void *) b->yy_ch_buf, (yy_size_t) (b->yy_buf_size + 2) ); } else /* Can't grow it, we don't own it. */ b->yy_ch_buf = NULL; if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "fatal error - scanner input buffer overflow" ); (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset]; num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; } if ( num_to_read > YY_READ_BUF_SIZE ) num_to_read = YY_READ_BUF_SIZE; /* Read in more data. */ YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), (yy_n_chars), num_to_read ); YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); } if ( (yy_n_chars) == 0 ) { if ( number_to_move == YY_MORE_ADJ ) { ret_val = EOB_ACT_END_OF_FILE; yyrestart( yyin ); } else { ret_val = EOB_ACT_LAST_MATCH; YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_EOF_PENDING; } } else ret_val = EOB_ACT_CONTINUE_SCAN; if (((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { /* Extend the array by 50%, plus the number we really need. */ int new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1); YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc( (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size ); if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); /* "- 2" to take care of EOB's */ YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size - 2); } (yy_n_chars) += number_to_move; YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR; YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR; (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; return ret_val; } /* yy_get_previous_state - get the state just before the EOB char was reached */ static yy_state_type yy_get_previous_state (void) { yy_state_type yy_current_state; char *yy_cp; yy_current_state = (yy_start); for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp ) { YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); if ( yy_accept[yy_current_state] ) { (yy_last_accepting_state) = yy_current_state; (yy_last_accepting_cpos) = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 111 ) yy_c = yy_meta[yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; } return yy_current_state; } /* yy_try_NUL_trans - try to make a transition on the NUL character * * synopsis * next_state = yy_try_NUL_trans( current_state ); */ static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state ) { int yy_is_jam; char *yy_cp = (yy_c_buf_p); YY_CHAR yy_c = 1; if ( yy_accept[yy_current_state] ) { (yy_last_accepting_state) = yy_current_state; (yy_last_accepting_cpos) = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 111 ) yy_c = yy_meta[yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; yy_is_jam = (yy_current_state == 110); return yy_is_jam ? 0 : yy_current_state; } #ifndef YY_NO_UNPUT static void yyunput (int c, char * yy_bp ) { char *yy_cp; yy_cp = (yy_c_buf_p); /* undo effects of setting up yytext */ *yy_cp = (yy_hold_char); if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 ) { /* need to shift things up to make room */ /* +2 for EOB chars. */ int number_to_move = (yy_n_chars) + 2; char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[ YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2]; char *source = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]; while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) *--dest = *--source; yy_cp += (int) (dest - source); yy_bp += (int) (dest - source); YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = (int) YY_CURRENT_BUFFER_LVALUE->yy_buf_size; if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 ) YY_FATAL_ERROR( "flex scanner push-back overflow" ); } *--yy_cp = (char) c; (yytext_ptr) = yy_bp; (yy_hold_char) = *yy_cp; (yy_c_buf_p) = yy_cp; } #endif #ifndef YY_NO_INPUT #ifdef __cplusplus static int yyinput (void) #else static int input (void) #endif { int c; *(yy_c_buf_p) = (yy_hold_char); if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR ) { /* yy_c_buf_p now points to the character we want to return. * If this occurs *before* the EOB characters, then it's a * valid NUL; if not, then we've hit the end of the buffer. */ if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) /* This was really a NUL. */ *(yy_c_buf_p) = '\0'; else { /* need more input */ int offset = (int) ((yy_c_buf_p) - (yytext_ptr)); ++(yy_c_buf_p); switch ( yy_get_next_buffer( ) ) { case EOB_ACT_LAST_MATCH: /* This happens because yy_g_n_b() * sees that we've accumulated a * token and flags that we need to * try matching the token before * proceeding. But for input(), * there's no matching to consider. * So convert the EOB_ACT_LAST_MATCH * to EOB_ACT_END_OF_FILE. */ /* Reset buffer status. */ yyrestart( yyin ); /*FALLTHROUGH*/ case EOB_ACT_END_OF_FILE: { if ( yywrap( ) ) return 0; if ( ! (yy_did_buffer_switch_on_eof) ) YY_NEW_FILE; #ifdef __cplusplus return yyinput(); #else return input(); #endif } case EOB_ACT_CONTINUE_SCAN: (yy_c_buf_p) = (yytext_ptr) + offset; break; } } } c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */ *(yy_c_buf_p) = '\0'; /* preserve yytext */ (yy_hold_char) = *++(yy_c_buf_p); return c; } #endif /* ifndef YY_NO_INPUT */ /** Immediately switch to a different input stream. * @param input_file A readable stream. * * @note This function does not reset the start condition to @c INITIAL . */ void yyrestart (FILE * input_file ) { if ( ! YY_CURRENT_BUFFER ){ yyensure_buffer_stack (); YY_CURRENT_BUFFER_LVALUE = yy_create_buffer( yyin, YY_BUF_SIZE ); } yy_init_buffer( YY_CURRENT_BUFFER, input_file ); yy_load_buffer_state( ); } /** Switch to a different input buffer. * @param new_buffer The new input buffer. * */ void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ) { /* TODO. We should be able to replace this entire function body * with * yypop_buffer_state(); * yypush_buffer_state(new_buffer); */ yyensure_buffer_stack (); if ( YY_CURRENT_BUFFER == new_buffer ) return; if ( YY_CURRENT_BUFFER ) { /* Flush out information for old buffer. */ *(yy_c_buf_p) = (yy_hold_char); YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); } YY_CURRENT_BUFFER_LVALUE = new_buffer; yy_load_buffer_state( ); /* We don't actually know whether we did this switch during * EOF (yywrap()) processing, but the only time this flag * is looked at is after yywrap() is called, so it's safe * to go ahead and always set it. */ (yy_did_buffer_switch_on_eof) = 1; } static void yy_load_buffer_state (void) { (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; (yy_hold_char) = *(yy_c_buf_p); } /** Allocate and initialize an input buffer state. * @param file A readable stream. * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. * * @return the allocated buffer state. */ YY_BUFFER_STATE yy_create_buffer (FILE * file, int size ) { YY_BUFFER_STATE b; b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) ); if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); b->yy_buf_size = size; /* yy_ch_buf has to be 2 characters longer than the size given because * we need to put in 2 end-of-buffer characters. */ b->yy_ch_buf = (char *) yyalloc( (yy_size_t) (b->yy_buf_size + 2) ); if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); b->yy_is_our_buffer = 1; yy_init_buffer( b, file ); return b; } /** Destroy the buffer. * @param b a buffer created with yy_create_buffer() * */ void yy_delete_buffer (YY_BUFFER_STATE b ) { if ( ! b ) return; if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; if ( b->yy_is_our_buffer ) yyfree( (void *) b->yy_ch_buf ); yyfree( (void *) b ); } /* Initializes or reinitializes a buffer. * This function is sometimes called more than once on the same buffer, * such as during a yyrestart() or at EOF. */ static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file ) { int oerrno = errno; yy_flush_buffer( b ); b->yy_input_file = file; b->yy_fill_buffer = 1; /* If b is the current buffer, then yy_init_buffer was _probably_ * called from yyrestart() or through yy_get_next_buffer. * In that case, we don't want to reset the lineno or column. */ if (b != YY_CURRENT_BUFFER){ b->yy_bs_lineno = 1; b->yy_bs_column = 0; } b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; errno = oerrno; } /** Discard all buffered characters. On the next scan, YY_INPUT will be called. * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. * */ void yy_flush_buffer (YY_BUFFER_STATE b ) { if ( ! b ) return; b->yy_n_chars = 0; /* We always need two end-of-buffer characters. The first causes * a transition to the end-of-buffer state. The second causes * a jam in that state. */ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; b->yy_buf_pos = &b->yy_ch_buf[0]; b->yy_at_bol = 1; b->yy_buffer_status = YY_BUFFER_NEW; if ( b == YY_CURRENT_BUFFER ) yy_load_buffer_state( ); } /** Pushes the new state onto the stack. The new state becomes * the current state. This function will allocate the stack * if necessary. * @param new_buffer The new state. * */ void yypush_buffer_state (YY_BUFFER_STATE new_buffer ) { if (new_buffer == NULL) return; yyensure_buffer_stack(); /* This block is copied from yy_switch_to_buffer. */ if ( YY_CURRENT_BUFFER ) { /* Flush out information for old buffer. */ *(yy_c_buf_p) = (yy_hold_char); YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); } /* Only push if top exists. Otherwise, replace top. */ if (YY_CURRENT_BUFFER) (yy_buffer_stack_top)++; YY_CURRENT_BUFFER_LVALUE = new_buffer; /* copied from yy_switch_to_buffer. */ yy_load_buffer_state( ); (yy_did_buffer_switch_on_eof) = 1; } /** Removes and deletes the top of the stack, if present. * The next element becomes the new top. * */ void yypop_buffer_state (void) { if (!YY_CURRENT_BUFFER) return; yy_delete_buffer(YY_CURRENT_BUFFER ); YY_CURRENT_BUFFER_LVALUE = NULL; if ((yy_buffer_stack_top) > 0) --(yy_buffer_stack_top); if (YY_CURRENT_BUFFER) { yy_load_buffer_state( ); (yy_did_buffer_switch_on_eof) = 1; } } /* Allocates the stack if it does not exist. * Guarantees space for at least one push. */ static void yyensure_buffer_stack (void) { yy_size_t num_to_alloc; if (!(yy_buffer_stack)) { /* First allocation is just for 2 elements, since we don't know if this * scanner will even need a stack. We use 2 instead of 1 to avoid an * immediate realloc on the next call. */ num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */ (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc (num_to_alloc * sizeof(struct yy_buffer_state*) ); if ( ! (yy_buffer_stack) ) YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*)); (yy_buffer_stack_max) = num_to_alloc; (yy_buffer_stack_top) = 0; return; } if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){ /* Increase the buffer to prepare for a possible push. */ yy_size_t grow_size = 8 /* arbitrary grow size */; num_to_alloc = (yy_buffer_stack_max) + grow_size; (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc ((yy_buffer_stack), num_to_alloc * sizeof(struct yy_buffer_state*) ); if ( ! (yy_buffer_stack) ) YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); /* zero only the new slots.*/ memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*)); (yy_buffer_stack_max) = num_to_alloc; } } /** Setup the input buffer state to scan directly from a user-specified character buffer. * @param base the character buffer * @param size the size in bytes of the character buffer * * @return the newly allocated buffer state object. */ YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size ) { YY_BUFFER_STATE b; if ( size < 2 || base[size-2] != YY_END_OF_BUFFER_CHAR || base[size-1] != YY_END_OF_BUFFER_CHAR ) /* They forgot to leave room for the EOB's. */ return NULL; b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) ); if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); b->yy_buf_size = (int) (size - 2); /* "- 2" to take care of EOB's */ b->yy_buf_pos = b->yy_ch_buf = base; b->yy_is_our_buffer = 0; b->yy_input_file = NULL; b->yy_n_chars = b->yy_buf_size; b->yy_is_interactive = 0; b->yy_at_bol = 1; b->yy_fill_buffer = 0; b->yy_buffer_status = YY_BUFFER_NEW; yy_switch_to_buffer( b ); return b; } /** Setup the input buffer state to scan a string. The next call to yylex() will * scan from a @e copy of @a str. * @param yystr a NUL-terminated string to scan * * @return the newly allocated buffer state object. * @note If you want to scan bytes that may contain NUL values, then use * yy_scan_bytes() instead. */ YY_BUFFER_STATE yy_scan_string (const char * yystr ) { return yy_scan_bytes( yystr, (int) strlen(yystr) ); } /** Setup the input buffer state to scan the given bytes. The next call to yylex() will * scan from a @e copy of @a bytes. * @param yybytes the byte buffer to scan * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. * * @return the newly allocated buffer state object. */ YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, int _yybytes_len ) { YY_BUFFER_STATE b; char *buf; yy_size_t n; int i; /* Get memory for full buffer, including space for trailing EOB's. */ n = (yy_size_t) (_yybytes_len + 2); buf = (char *) yyalloc( n ); if ( ! buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); for ( i = 0; i < _yybytes_len; ++i ) buf[i] = yybytes[i]; buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; b = yy_scan_buffer( buf, n ); if ( ! b ) YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); /* It's okay to grow etc. this buffer, and we should throw it * away when we're done. */ b->yy_is_our_buffer = 1; return b; } #ifndef YY_EXIT_FAILURE #define YY_EXIT_FAILURE 2 #endif static void yynoreturn yy_fatal_error (const char* msg ) { fprintf( stderr, "%s\n", msg ); exit( YY_EXIT_FAILURE ); } /* Redefine yyless() so it works in section 3 code. */ #undef yyless #define yyless(n) \ do \ { \ /* Undo effects of setting up yytext. */ \ int yyless_macro_arg = (n); \ YY_LESS_LINENO(yyless_macro_arg);\ yytext[yyleng] = (yy_hold_char); \ (yy_c_buf_p) = yytext + yyless_macro_arg; \ (yy_hold_char) = *(yy_c_buf_p); \ *(yy_c_buf_p) = '\0'; \ yyleng = yyless_macro_arg; \ } \ while ( 0 ) /* Accessor methods (get/set functions) to struct members. */ /** Get the current line number. * */ int yyget_lineno (void) { return yylineno; } /** Get the input stream. * */ FILE *yyget_in (void) { return yyin; } /** Get the output stream. * */ FILE *yyget_out (void) { return yyout; } /** Get the length of the current token. * */ int yyget_leng (void) { return yyleng; } /** Get the current token. * */ char *yyget_text (void) { return yytext; } /** Set the current line number. * @param _line_number line number * */ void yyset_lineno (int _line_number ) { yylineno = _line_number; } /** Set the input stream. This does not discard the current * input buffer. * @param _in_str A readable stream. * * @see yy_switch_to_buffer */ void yyset_in (FILE * _in_str ) { yyin = _in_str ; } void yyset_out (FILE * _out_str ) { yyout = _out_str ; } int yyget_debug (void) { return yy_flex_debug; } void yyset_debug (int _bdebug ) { yy_flex_debug = _bdebug ; } static int yy_init_globals (void) { /* Initialization is the same as for the non-reentrant scanner. * This function is called from yylex_destroy(), so don't allocate here. */ (yy_buffer_stack) = NULL; (yy_buffer_stack_top) = 0; (yy_buffer_stack_max) = 0; (yy_c_buf_p) = NULL; (yy_init) = 0; (yy_start) = 0; /* Defined in main.c */ #ifdef YY_STDINIT yyin = stdin; yyout = stdout; #else yyin = NULL; yyout = NULL; #endif /* For future reference: Set errno on error, since we are called by * yylex_init() */ return 0; } /* yylex_destroy is for both reentrant and non-reentrant scanners. */ int yylex_destroy (void) { /* Pop the buffer stack, destroying each element. */ while(YY_CURRENT_BUFFER){ yy_delete_buffer( YY_CURRENT_BUFFER ); YY_CURRENT_BUFFER_LVALUE = NULL; yypop_buffer_state(); } /* Destroy the stack itself. */ yyfree((yy_buffer_stack) ); (yy_buffer_stack) = NULL; /* Reset the globals. This is important in a non-reentrant scanner so the next time * yylex() is called, initialization will occur. */ yy_init_globals( ); return 0; } /* * Internal utility routines. */ #ifndef yytext_ptr static void yy_flex_strncpy (char* s1, const char * s2, int n ) { int i; for ( i = 0; i < n; ++i ) s1[i] = s2[i]; } #endif #ifdef YY_NEED_STRLEN static int yy_flex_strlen (const char * s ) { int n; for ( n = 0; s[n]; ++n ) ; return n; } #endif void *yyalloc (yy_size_t size ) { return malloc(size); } void *yyrealloc (void * ptr, yy_size_t size ) { /* The cast to (char *) in the following accommodates both * implementations that use char* generic pointers, and those * that use void* generic pointers. It works with the latter * because both ANSI C and C++ allow castless assignment from * any pointer type to void*, and deal with argument conversions * as though doing an assignment. */ return realloc(ptr, size); } void yyfree (void * ptr ) { free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ } #define YYTABLES_NAME "yytables" #line 68 "config_file_lex.l" detox-2.0.0/src/config_file_lex.l000066400000000000000000000026271460212773400167310ustar00rootroot00000000000000%{ /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "config_file_yacc.h" #include "config_file.h" int config_file_lineno = 1; %} %option noyywrap %% [\t ]+ { /* ignore whitespace */ } #.* { /* ignore comments */ } \n { config_file_lineno++; } builtin { return BUILTIN; } filename { return FILENAME; } ignore { return IGNORE; } iso8859_1 { return ISO8859_1; } length { return LENGTH; } lower { return LOWER; } max_length { return MAX_LENGTH; } remove_trailing { return REMOVE_TRAILING; } safe { return SAFE; } sequence { return SEQUENCE; } uncgi { return UNCGI; } utf_8 { return UTF_8; } wipeup { return WIPEUP; } [{] { return OPEN; } [}] { return CLOSE; } \"[^\"\n]*[\"\n] { yylval.string = strdup(yytext+1); if (yylval.string[yyleng-2] != '"') { printf("Unterminated character string\n"); } else { yylval.string[yyleng-2] = '\0'; } return QSTRING; } [a-zA-Z][a-zA-Z0-9_]* { yylval.string = strdup(yytext); return ID; } [0-9]+ { yylval.nvalue = atoi(yytext); return NVALUE; } [;] { return EOL; } %% detox-2.0.0/src/config_file_spoof.c000066400000000000000000000107011460212773400172460ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "detox_struct.h" #include "config_file.h" #include "config_file_spoof.h" #include "filelist.h" #include "filter.h" #include "sequence.h" #include "wrapped.h" /** * Generates a filter that uses a builtin table. * * @param builtin The name of the builtin table to use. * * @return */ static filter_t *generate_builtin_filter(int cleaner, char *builtin) { filter_t *filter; filter = filter_init(cleaner); filter->builtin = wrapped_strdup(builtin); return filter; } /** * Generates a filter. * * @return */ static filter_t *generate_filter(int cleaner) { filter_t *filter; filter = filter_init(cleaner); if (cleaner == FILTER_SAFE) { filter->builtin = wrapped_strdup("safe"); } return filter; } /** * Generates a new sequence. * * @param name The name to give the new sequence. * * @return */ static sequence_t *generate_sequence(char *name) { sequence_t *sequence; sequence = sequence_init(name); sequence->source_filename = wrapped_strdup("built-in config file"); return sequence; } /** * Generates a wipeup filter. * * @param remove_trailing Whether or not to include periods in the reduction list. * * @return */ static filter_t *generate_wipeup_filter(int remove_trailing) { filter_t *filter; filter = filter_init(FILTER_WIPEUP); filter->remove_trailing = remove_trailing; return filter; } /** * Out of nothing, a config file. * * @return */ config_file_t *spoof_config_file(void) { config_file_t *ret = NULL; sequence_t *sequence = NULL; filter_t *filter = NULL; filter_t *safe_wipeup_filter = NULL; // generate safe_wipeup_filter safe_wipeup_filter = generate_filter(FILTER_SAFE); safe_wipeup_filter->next = generate_wipeup_filter(1); // build containing structure ret = config_file_init(); // default sequence ret->sequences = sequence = generate_sequence("default"); sequence->filters = safe_wipeup_filter; // iso8859_1 sequence = sequence->next = generate_sequence("iso8859_1"); sequence->filters = filter = generate_builtin_filter(FILTER_ISO8859_1, "iso8859_1"); filter->next = safe_wipeup_filter; // iso8859_1-legacy sequence = sequence->next = generate_sequence("iso8859_1-legacy"); sequence->filters = filter = generate_builtin_filter(FILTER_ISO8859_1, "cp1252"); filter = filter->next = generate_builtin_filter(FILTER_ISO8859_1, "iso8859_1"); filter->next = safe_wipeup_filter; // utf_8 sequence = sequence->next = generate_sequence("utf_8"); sequence->filters = filter = generate_builtin_filter(FILTER_UTF_8, "unicode"); filter->next = safe_wipeup_filter; // utf_8-legacy sequence = sequence->next = generate_sequence("utf_8-legacy"); sequence->filters = filter = generate_builtin_filter(FILTER_UTF_8, "cp1252"); filter = filter->next = generate_builtin_filter(FILTER_UTF_8, "unicode"); filter->next = safe_wipeup_filter; // uncgi sequence = sequence->next = generate_sequence("uncgi"); sequence->filters = filter = generate_filter(FILTER_UNCGI); filter->next = safe_wipeup_filter; // lower sequence = sequence->next = generate_sequence("lower"); sequence->filters = filter = generate_filter(FILTER_SAFE); filter = filter->next = generate_filter(FILTER_LOWER); filter->next = generate_wipeup_filter(1); // iso8859_1-only sequence = sequence->next = generate_sequence("iso8859_1-only"); sequence->filters = generate_builtin_filter(FILTER_ISO8859_1, "iso8859_1"); // cp1252-only sequence = sequence->next = generate_sequence("cp1252-only"); sequence->filters = generate_builtin_filter(FILTER_ISO8859_1, "cp1252"); // utf_8-only sequence = sequence->next = generate_sequence("utf_8-only"); sequence->filters = generate_builtin_filter(FILTER_UTF_8, "unicode"); // uncgi-only sequence = sequence->next = generate_sequence("uncgi-only"); sequence->filters = generate_filter(FILTER_UNCGI); // lower-only sequence = sequence->next = generate_sequence("lower-only"); sequence->filters = generate_filter(FILTER_LOWER); // files to ignore ret->files_to_ignore = filelist_init(); filelist_put(ret->files_to_ignore, "{arch}"); return ret; } detox-2.0.0/src/config_file_spoof.h000066400000000000000000000006331460212773400172560ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __CONFIG_FILE_SPOOF_H #define __CONFIG_FILE_SPOOF_H #include "detox_struct.h" extern config_file_t *spoof_config_file(void); #endif /* __CONFIG_FILE_SPOOF_H */ detox-2.0.0/src/config_file_yacc.c000066400000000000000000001674541460212773400170610ustar00rootroot00000000000000/* A Bison parser, made by GNU Bison 3.5.1. */ /* Bison implementation for Yacc-like parsers in C Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* As a special exception, you may create a larger work that contains part or all of the Bison parser skeleton and distribute that work under terms of your choice, so long as that work isn't itself a parser generator using the skeleton or a modified version thereof as a parser skeleton. Alternatively, if you modify or redistribute the parser skeleton itself, you may (at your option) remove this special exception, which will cause the skeleton and the resulting Bison output files to be licensed under the GNU General Public License without this special exception. This special exception was added by the Free Software Foundation in version 2.2 of Bison. */ /* C LALR(1) parser skeleton written by Richard Stallman, by simplifying the original so-called "semantic" parser. */ /* All symbols defined below should begin with yy or YY, to avoid infringing on user name space. This should be done even for local variables, as they might otherwise be expanded by user macros. There are some unavoidable exceptions within include files to define necessary library symbols; they are noted "INFRINGES ON USER NAME SPACE" below. */ /* Undocumented macros, especially those whose name start with YY_, are private implementation details. Do not rely on them. */ /* Identify Bison output. */ #define YYBISON 1 /* Bison version. */ #define YYBISON_VERSION "3.5.1" /* Skeleton name. */ #define YYSKELETON_NAME "yacc.c" /* Pure parsers. */ #define YYPURE 0 /* Push parsers. */ #define YYPUSH 0 /* Pull parsers. */ #define YYPULL 1 /* First part of user prologue. */ #line 1 "config_file_yacc.y" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "detox_struct.h" #include "config_file.h" #include "filelist.h" #include "filter.h" #include "sequence.h" #include "wrapped.h" /* * I must apologize in advance for the cryptic, global variable names. */ static sequence_t *cf_sequence_ret, *cf_sequence_current; static filter_t *cf_filter_ret, *cf_filter_current; static char *current_name = NULL; static char *current_filename = NULL; static filelist_t *files_to_ignore; void cf_append_sequence_list(void); void cf_append_filter(int cleaner, char *builtin, char *filename, int max_length, int remove_trailing); void cf_append_ignore_entry(void *str); void yyerror (char *s); int yylex (void); #line 112 "config_file_yacc.c" # ifndef YY_CAST # ifdef __cplusplus # define YY_CAST(Type, Val) static_cast (Val) # define YY_REINTERPRET_CAST(Type, Val) reinterpret_cast (Val) # else # define YY_CAST(Type, Val) ((Type) (Val)) # define YY_REINTERPRET_CAST(Type, Val) ((Type) (Val)) # endif # endif # ifndef YY_NULLPTR # if defined __cplusplus # if 201103L <= __cplusplus # define YY_NULLPTR nullptr # else # define YY_NULLPTR 0 # endif # else # define YY_NULLPTR ((void*)0) # endif # endif /* Enabling verbose error messages. */ #ifdef YYERROR_VERBOSE # undef YYERROR_VERBOSE # define YYERROR_VERBOSE 1 #else # define YYERROR_VERBOSE 0 #endif /* Use api.header.include to #include this header instead of duplicating it here. */ #ifndef YY_YY_CONFIG_FILE_YACC_H_INCLUDED # define YY_YY_CONFIG_FILE_YACC_H_INCLUDED /* Debug traces. */ #ifndef YYDEBUG # define YYDEBUG 0 #endif #if YYDEBUG extern int yydebug; #endif /* Token type. */ #ifndef YYTOKENTYPE # define YYTOKENTYPE enum yytokentype { BUILTIN = 258, CLOSE = 259, EOL = 260, FILENAME = 261, IGNORE = 262, ISO8859_1 = 263, LENGTH = 264, LOWER = 265, MAX_LENGTH = 266, OPEN = 267, REMOVE_TRAILING = 268, SAFE = 269, SEQUENCE = 270, UNCGI = 271, UTF_8 = 272, WIPEUP = 273, NVALUE = 274, ID = 275, QSTRING = 276 }; #endif /* Tokens. */ #define BUILTIN 258 #define CLOSE 259 #define EOL 260 #define FILENAME 261 #define IGNORE 262 #define ISO8859_1 263 #define LENGTH 264 #define LOWER 265 #define MAX_LENGTH 266 #define OPEN 267 #define REMOVE_TRAILING 268 #define SAFE 269 #define SEQUENCE 270 #define UNCGI 271 #define UTF_8 272 #define WIPEUP 273 #define NVALUE 274 #define ID 275 #define QSTRING 276 /* Value type. */ #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED union YYSTYPE { #line 43 "config_file_yacc.y" char *string; /* string buffer */ int cmd; /* command value */ int nvalue; /* nvalue */ #line 212 "config_file_yacc.c" }; typedef union YYSTYPE YYSTYPE; # define YYSTYPE_IS_TRIVIAL 1 # define YYSTYPE_IS_DECLARED 1 #endif extern YYSTYPE yylval; int yyparse (void); #endif /* !YY_YY_CONFIG_FILE_YACC_H_INCLUDED */ #ifdef short # undef short #endif /* On compilers that do not define __PTRDIFF_MAX__ etc., make sure and (if available) are included so that the code can choose integer types of a good width. */ #ifndef __PTRDIFF_MAX__ # include /* INFRINGES ON USER NAME SPACE */ # if defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ # include /* INFRINGES ON USER NAME SPACE */ # define YY_STDINT_H # endif #endif /* Narrow types that promote to a signed type and that can represent a signed or unsigned integer of at least N bits. In tables they can save space and decrease cache pressure. Promoting to a signed type helps avoid bugs in integer arithmetic. */ #ifdef __INT_LEAST8_MAX__ typedef __INT_LEAST8_TYPE__ yytype_int8; #elif defined YY_STDINT_H typedef int_least8_t yytype_int8; #else typedef signed char yytype_int8; #endif #ifdef __INT_LEAST16_MAX__ typedef __INT_LEAST16_TYPE__ yytype_int16; #elif defined YY_STDINT_H typedef int_least16_t yytype_int16; #else typedef short yytype_int16; #endif #if defined __UINT_LEAST8_MAX__ && __UINT_LEAST8_MAX__ <= __INT_MAX__ typedef __UINT_LEAST8_TYPE__ yytype_uint8; #elif (!defined __UINT_LEAST8_MAX__ && defined YY_STDINT_H \ && UINT_LEAST8_MAX <= INT_MAX) typedef uint_least8_t yytype_uint8; #elif !defined __UINT_LEAST8_MAX__ && UCHAR_MAX <= INT_MAX typedef unsigned char yytype_uint8; #else typedef short yytype_uint8; #endif #if defined __UINT_LEAST16_MAX__ && __UINT_LEAST16_MAX__ <= __INT_MAX__ typedef __UINT_LEAST16_TYPE__ yytype_uint16; #elif (!defined __UINT_LEAST16_MAX__ && defined YY_STDINT_H \ && UINT_LEAST16_MAX <= INT_MAX) typedef uint_least16_t yytype_uint16; #elif !defined __UINT_LEAST16_MAX__ && USHRT_MAX <= INT_MAX typedef unsigned short yytype_uint16; #else typedef int yytype_uint16; #endif #ifndef YYPTRDIFF_T # if defined __PTRDIFF_TYPE__ && defined __PTRDIFF_MAX__ # define YYPTRDIFF_T __PTRDIFF_TYPE__ # define YYPTRDIFF_MAXIMUM __PTRDIFF_MAX__ # elif defined PTRDIFF_MAX # ifndef ptrdiff_t # include /* INFRINGES ON USER NAME SPACE */ # endif # define YYPTRDIFF_T ptrdiff_t # define YYPTRDIFF_MAXIMUM PTRDIFF_MAX # else # define YYPTRDIFF_T long # define YYPTRDIFF_MAXIMUM LONG_MAX # endif #endif #ifndef YYSIZE_T # ifdef __SIZE_TYPE__ # define YYSIZE_T __SIZE_TYPE__ # elif defined size_t # define YYSIZE_T size_t # elif defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ # include /* INFRINGES ON USER NAME SPACE */ # define YYSIZE_T size_t # else # define YYSIZE_T unsigned # endif #endif #define YYSIZE_MAXIMUM \ YY_CAST (YYPTRDIFF_T, \ (YYPTRDIFF_MAXIMUM < YY_CAST (YYSIZE_T, -1) \ ? YYPTRDIFF_MAXIMUM \ : YY_CAST (YYSIZE_T, -1))) #define YYSIZEOF(X) YY_CAST (YYPTRDIFF_T, sizeof (X)) /* Stored state numbers (used for stacks). */ typedef yytype_int8 yy_state_t; /* State numbers in computations. */ typedef int yy_state_fast_t; #ifndef YY_ # if defined YYENABLE_NLS && YYENABLE_NLS # if ENABLE_NLS # include /* INFRINGES ON USER NAME SPACE */ # define YY_(Msgid) dgettext ("bison-runtime", Msgid) # endif # endif # ifndef YY_ # define YY_(Msgid) Msgid # endif #endif #ifndef YY_ATTRIBUTE_PURE # if defined __GNUC__ && 2 < __GNUC__ + (96 <= __GNUC_MINOR__) # define YY_ATTRIBUTE_PURE __attribute__ ((__pure__)) # else # define YY_ATTRIBUTE_PURE # endif #endif #ifndef YY_ATTRIBUTE_UNUSED # if defined __GNUC__ && 2 < __GNUC__ + (7 <= __GNUC_MINOR__) # define YY_ATTRIBUTE_UNUSED __attribute__ ((__unused__)) # else # define YY_ATTRIBUTE_UNUSED # endif #endif /* Suppress unused-variable warnings by "using" E. */ #if ! defined lint || defined __GNUC__ # define YYUSE(E) ((void) (E)) #else # define YYUSE(E) /* empty */ #endif #if defined __GNUC__ && ! defined __ICC && 407 <= __GNUC__ * 100 + __GNUC_MINOR__ /* Suppress an incorrect diagnostic about yylval being uninitialized. */ # define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ _Pragma ("GCC diagnostic push") \ _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") \ _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") # define YY_IGNORE_MAYBE_UNINITIALIZED_END \ _Pragma ("GCC diagnostic pop") #else # define YY_INITIAL_VALUE(Value) Value #endif #ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN # define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN # define YY_IGNORE_MAYBE_UNINITIALIZED_END #endif #ifndef YY_INITIAL_VALUE # define YY_INITIAL_VALUE(Value) /* Nothing. */ #endif #if defined __cplusplus && defined __GNUC__ && ! defined __ICC && 6 <= __GNUC__ # define YY_IGNORE_USELESS_CAST_BEGIN \ _Pragma ("GCC diagnostic push") \ _Pragma ("GCC diagnostic ignored \"-Wuseless-cast\"") # define YY_IGNORE_USELESS_CAST_END \ _Pragma ("GCC diagnostic pop") #endif #ifndef YY_IGNORE_USELESS_CAST_BEGIN # define YY_IGNORE_USELESS_CAST_BEGIN # define YY_IGNORE_USELESS_CAST_END #endif #define YY_ASSERT(E) ((void) (0 && (E))) #if ! defined yyoverflow || YYERROR_VERBOSE /* The parser invokes alloca or malloc; define the necessary symbols. */ # ifdef YYSTACK_USE_ALLOCA # if YYSTACK_USE_ALLOCA # ifdef __GNUC__ # define YYSTACK_ALLOC __builtin_alloca # elif defined __BUILTIN_VA_ARG_INCR # include /* INFRINGES ON USER NAME SPACE */ # elif defined _AIX # define YYSTACK_ALLOC __alloca # elif defined _MSC_VER # include /* INFRINGES ON USER NAME SPACE */ # define alloca _alloca # else # define YYSTACK_ALLOC alloca # if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS # include /* INFRINGES ON USER NAME SPACE */ /* Use EXIT_SUCCESS as a witness for stdlib.h. */ # ifndef EXIT_SUCCESS # define EXIT_SUCCESS 0 # endif # endif # endif # endif # endif # ifdef YYSTACK_ALLOC /* Pacify GCC's 'empty if-body' warning. */ # define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) # ifndef YYSTACK_ALLOC_MAXIMUM /* The OS might guarantee only one guard page at the bottom of the stack, and a page size can be as small as 4096 bytes. So we cannot safely invoke alloca (N) if N exceeds 4096. Use a slightly smaller number to allow for a few compiler-allocated temporary stack slots. */ # define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ # endif # else # define YYSTACK_ALLOC YYMALLOC # define YYSTACK_FREE YYFREE # ifndef YYSTACK_ALLOC_MAXIMUM # define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM # endif # if (defined __cplusplus && ! defined EXIT_SUCCESS \ && ! ((defined YYMALLOC || defined malloc) \ && (defined YYFREE || defined free))) # include /* INFRINGES ON USER NAME SPACE */ # ifndef EXIT_SUCCESS # define EXIT_SUCCESS 0 # endif # endif # ifndef YYMALLOC # define YYMALLOC malloc # if ! defined malloc && ! defined EXIT_SUCCESS void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ # endif # endif # ifndef YYFREE # define YYFREE free # if ! defined free && ! defined EXIT_SUCCESS void free (void *); /* INFRINGES ON USER NAME SPACE */ # endif # endif # endif #endif /* ! defined yyoverflow || YYERROR_VERBOSE */ #if (! defined yyoverflow \ && (! defined __cplusplus \ || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) /* A type that is properly aligned for any stack member. */ union yyalloc { yy_state_t yyss_alloc; YYSTYPE yyvs_alloc; }; /* The size of the maximum gap between one aligned stack and the next. */ # define YYSTACK_GAP_MAXIMUM (YYSIZEOF (union yyalloc) - 1) /* The size of an array large to enough to hold all stacks, each with N elements. */ # define YYSTACK_BYTES(N) \ ((N) * (YYSIZEOF (yy_state_t) + YYSIZEOF (YYSTYPE)) \ + YYSTACK_GAP_MAXIMUM) # define YYCOPY_NEEDED 1 /* Relocate STACK from its old location to the new one. The local variables YYSIZE and YYSTACKSIZE give the old and new number of elements in the stack, and YYPTR gives the new location of the stack. Advance YYPTR to a properly aligned location for the next stack. */ # define YYSTACK_RELOCATE(Stack_alloc, Stack) \ do \ { \ YYPTRDIFF_T yynewbytes; \ YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ Stack = &yyptr->Stack_alloc; \ yynewbytes = yystacksize * YYSIZEOF (*Stack) + YYSTACK_GAP_MAXIMUM; \ yyptr += yynewbytes / YYSIZEOF (*yyptr); \ } \ while (0) #endif #if defined YYCOPY_NEEDED && YYCOPY_NEEDED /* Copy COUNT objects from SRC to DST. The source and destination do not overlap. */ # ifndef YYCOPY # if defined __GNUC__ && 1 < __GNUC__ # define YYCOPY(Dst, Src, Count) \ __builtin_memcpy (Dst, Src, YY_CAST (YYSIZE_T, (Count)) * sizeof (*(Src))) # else # define YYCOPY(Dst, Src, Count) \ do \ { \ YYPTRDIFF_T yyi; \ for (yyi = 0; yyi < (Count); yyi++) \ (Dst)[yyi] = (Src)[yyi]; \ } \ while (0) # endif # endif #endif /* !YYCOPY_NEEDED */ /* YYFINAL -- State number of the termination state. */ #define YYFINAL 2 /* YYLAST -- Last index in YYTABLE. */ #define YYLAST 85 /* YYNTOKENS -- Number of terminals. */ #define YYNTOKENS 22 /* YYNNTS -- Number of nonterminals. */ #define YYNNTS 19 /* YYNRULES -- Number of rules. */ #define YYNRULES 43 /* YYNSTATES -- Number of states. */ #define YYNSTATES 90 #define YYUNDEFTOK 2 #define YYMAXUTOK 276 /* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM as returned by yylex, with out-of-bounds checking. */ #define YYTRANSLATE(YYX) \ (0 <= (YYX) && (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) /* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM as returned by yylex. */ static const yytype_int8 yytranslate[] = { 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 }; #if YYDEBUG /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ static const yytype_uint8 yyrline[] = { 0, 73, 73, 75, 78, 80, 83, 86, 89, 92, 93, 96, 98, 100, 102, 104, 106, 108, 111, 113, 115, 117, 120, 122, 124, 126, 129, 131, 133, 135, 138, 140, 142, 145, 147, 149, 152, 155, 158, 161, 162, 165, 170, 172 }; #endif #if YYDEBUG || YYERROR_VERBOSE || 0 /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. First, the terminals, then, starting at YYNTOKENS, nonterminals. */ static const char *const yytname[] = { "$end", "error", "$undefined", "BUILTIN", "CLOSE", "EOL", "FILENAME", "IGNORE", "ISO8859_1", "LENGTH", "LOWER", "MAX_LENGTH", "OPEN", "REMOVE_TRAILING", "SAFE", "SEQUENCE", "UNCGI", "UTF_8", "WIPEUP", "NVALUE", "ID", "QSTRING", "$accept", "configfile", "rule", "sequence", "sequence_open", "sequence_close", "method_list", "method", "iso8859_1", "utf_8", "safe", "wipeup", "max_length", "ignore", "ignore_open", "ignore_close", "ignore_list", "ignore_filename", "string", YY_NULLPTR }; #endif # ifdef YYPRINT /* YYTOKNUM[NUM] -- (External) token number corresponding to the (internal) symbol number NUM (which must be that of a token). */ static const yytype_int16 yytoknum[] = { 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276 }; # endif #define YYPACT_NINF (-29) #define yypact_value_is_default(Yyn) \ ((Yyn) == YYPACT_NINF) #define YYTABLE_NINF (-1) #define yytable_value_is_error(Yyn) \ 0 /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing STATE-NUM. */ static const yytype_int8 yypact[] = { -29, 1, -29, -10, -15, -29, -29, 24, -29, 4, -29, -29, -29, 6, 35, 10, 36, 37, 45, 39, 41, 3, -29, 47, 49, 50, 51, 52, -15, 18, -29, -29, 23, -29, 0, 33, -29, 40, -1, 53, -29, -29, -29, -29, -29, -29, -29, 54, 55, -29, -29, -15, -29, -15, -29, 26, -15, -29, -15, -15, -29, -15, -29, 56, -29, -29, -29, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, -29, -29, -29, -29, -29, -29, -29, -29 }; /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. Performed when YYTABLE does not specify something else to do. Zero means the default is an error. */ static const yytype_int8 yydefact[] = { 2, 0, 1, 0, 0, 3, 4, 0, 5, 0, 37, 43, 42, 0, 18, 0, 33, 26, 0, 22, 30, 0, 9, 0, 0, 0, 0, 0, 0, 0, 39, 7, 0, 12, 0, 0, 11, 0, 0, 0, 6, 10, 14, 15, 16, 13, 17, 0, 0, 36, 40, 0, 19, 0, 34, 0, 0, 27, 0, 0, 23, 0, 31, 0, 8, 41, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 21, 20, 35, 29, 28, 25, 24 }; /* YYPGOTO[NTERM-NUM]. */ static const yytype_int8 yypgoto[] = { -29, -29, -29, -29, -29, -29, -29, 64, -29, -29, -29, -29, -29, -29, -29, -29, -29, 48, -28 }; /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int8 yydefgoto[] = { -1, 1, 5, 6, 7, 40, 21, 22, 23, 24, 25, 26, 27, 8, 9, 49, 29, 30, 13 }; /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If positive, shift that token. If negative, reduce the rule whose number is the opposite. If YYTABLE_NINF, syntax error. */ static const yytype_int8 yytable[] = { 47, 2, 10, 62, 54, 11, 12, 39, 3, 55, 28, 14, 63, 15, 16, 33, 4, 17, 31, 18, 19, 20, 48, 67, 28, 68, 51, 52, 70, 53, 71, 72, 14, 73, 15, 16, 56, 57, 17, 58, 18, 19, 20, 59, 60, 69, 61, 32, 34, 35, 36, 37, 42, 38, 43, 44, 45, 46, 64, 65, 66, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 50, 0, 0, 0, 0, 0, 0, 0, 41 }; static const yytype_int8 yycheck[] = { 28, 0, 12, 4, 4, 20, 21, 4, 7, 9, 6, 8, 13, 10, 11, 5, 15, 14, 12, 16, 17, 18, 4, 51, 6, 53, 3, 4, 56, 6, 58, 59, 8, 61, 10, 11, 3, 4, 14, 6, 16, 17, 18, 3, 4, 19, 6, 12, 12, 12, 5, 12, 5, 12, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 29, -1, -1, -1, -1, -1, -1, -1, 21 }; /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing symbol of state STATE-NUM. */ static const yytype_int8 yystos[] = { 0, 23, 0, 7, 15, 24, 25, 26, 35, 36, 12, 20, 21, 40, 8, 10, 11, 14, 16, 17, 18, 28, 29, 30, 31, 32, 33, 34, 6, 38, 39, 12, 12, 5, 12, 12, 5, 12, 12, 4, 27, 29, 5, 5, 5, 5, 5, 40, 4, 37, 39, 3, 4, 6, 4, 9, 3, 4, 6, 3, 4, 6, 4, 13, 5, 5, 5, 40, 40, 19, 40, 40, 40, 40, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 }; /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ static const yytype_int8 yyr1[] = { 0, 22, 23, 23, 24, 24, 25, 26, 27, 28, 28, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 36, 37, 38, 38, 39, 40, 40 }; /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ static const yytype_int8 yyr2[] = { 0, 2, 0, 2, 1, 1, 3, 3, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 3, 6, 6, 1, 3, 6, 6, 1, 3, 6, 6, 1, 3, 5, 1, 3, 6, 3, 2, 2, 1, 2, 3, 1, 1 }; #define yyerrok (yyerrstatus = 0) #define yyclearin (yychar = YYEMPTY) #define YYEMPTY (-2) #define YYEOF 0 #define YYACCEPT goto yyacceptlab #define YYABORT goto yyabortlab #define YYERROR goto yyerrorlab #define YYRECOVERING() (!!yyerrstatus) #define YYBACKUP(Token, Value) \ do \ if (yychar == YYEMPTY) \ { \ yychar = (Token); \ yylval = (Value); \ YYPOPSTACK (yylen); \ yystate = *yyssp; \ goto yybackup; \ } \ else \ { \ yyerror (YY_("syntax error: cannot back up")); \ YYERROR; \ } \ while (0) /* Error token number */ #define YYTERROR 1 #define YYERRCODE 256 /* Enable debugging if requested. */ #if YYDEBUG # ifndef YYFPRINTF # include /* INFRINGES ON USER NAME SPACE */ # define YYFPRINTF fprintf # endif # define YYDPRINTF(Args) \ do { \ if (yydebug) \ YYFPRINTF Args; \ } while (0) /* This macro is provided for backward compatibility. */ #ifndef YY_LOCATION_PRINT # define YY_LOCATION_PRINT(File, Loc) ((void) 0) #endif # define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ do { \ if (yydebug) \ { \ YYFPRINTF (stderr, "%s ", Title); \ yy_symbol_print (stderr, \ Type, Value); \ YYFPRINTF (stderr, "\n"); \ } \ } while (0) /*-----------------------------------. | Print this symbol's value on YYO. | `-----------------------------------*/ static void yy_symbol_value_print (FILE *yyo, int yytype, YYSTYPE const * const yyvaluep) { FILE *yyoutput = yyo; YYUSE (yyoutput); if (!yyvaluep) return; # ifdef YYPRINT if (yytype < YYNTOKENS) YYPRINT (yyo, yytoknum[yytype], *yyvaluep); # endif YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN YYUSE (yytype); YY_IGNORE_MAYBE_UNINITIALIZED_END } /*---------------------------. | Print this symbol on YYO. | `---------------------------*/ static void yy_symbol_print (FILE *yyo, int yytype, YYSTYPE const * const yyvaluep) { YYFPRINTF (yyo, "%s %s (", yytype < YYNTOKENS ? "token" : "nterm", yytname[yytype]); yy_symbol_value_print (yyo, yytype, yyvaluep); YYFPRINTF (yyo, ")"); } /*------------------------------------------------------------------. | yy_stack_print -- Print the state stack from its BOTTOM up to its | | TOP (included). | `------------------------------------------------------------------*/ static void yy_stack_print (yy_state_t *yybottom, yy_state_t *yytop) { YYFPRINTF (stderr, "Stack now"); for (; yybottom <= yytop; yybottom++) { int yybot = *yybottom; YYFPRINTF (stderr, " %d", yybot); } YYFPRINTF (stderr, "\n"); } # define YY_STACK_PRINT(Bottom, Top) \ do { \ if (yydebug) \ yy_stack_print ((Bottom), (Top)); \ } while (0) /*------------------------------------------------. | Report that the YYRULE is going to be reduced. | `------------------------------------------------*/ static void yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, int yyrule) { int yylno = yyrline[yyrule]; int yynrhs = yyr2[yyrule]; int yyi; YYFPRINTF (stderr, "Reducing stack by rule %d (line %d):\n", yyrule - 1, yylno); /* The symbols being reduced. */ for (yyi = 0; yyi < yynrhs; yyi++) { YYFPRINTF (stderr, " $%d = ", yyi + 1); yy_symbol_print (stderr, yystos[+yyssp[yyi + 1 - yynrhs]], &yyvsp[(yyi + 1) - (yynrhs)] ); YYFPRINTF (stderr, "\n"); } } # define YY_REDUCE_PRINT(Rule) \ do { \ if (yydebug) \ yy_reduce_print (yyssp, yyvsp, Rule); \ } while (0) /* Nonzero means print parse trace. It is left uninitialized so that multiple parsers can coexist. */ int yydebug; #else /* !YYDEBUG */ # define YYDPRINTF(Args) # define YY_SYMBOL_PRINT(Title, Type, Value, Location) # define YY_STACK_PRINT(Bottom, Top) # define YY_REDUCE_PRINT(Rule) #endif /* !YYDEBUG */ /* YYINITDEPTH -- initial size of the parser's stacks. */ #ifndef YYINITDEPTH # define YYINITDEPTH 200 #endif /* YYMAXDEPTH -- maximum size the stacks can grow to (effective only if the built-in stack extension method is used). Do not make this value too large; the results are undefined if YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) evaluated with infinite-precision integer arithmetic. */ #ifndef YYMAXDEPTH # define YYMAXDEPTH 10000 #endif #if YYERROR_VERBOSE # ifndef yystrlen # if defined __GLIBC__ && defined _STRING_H # define yystrlen(S) (YY_CAST (YYPTRDIFF_T, strlen (S))) # else /* Return the length of YYSTR. */ static YYPTRDIFF_T yystrlen (const char *yystr) { YYPTRDIFF_T yylen; for (yylen = 0; yystr[yylen]; yylen++) continue; return yylen; } # endif # endif # ifndef yystpcpy # if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE # define yystpcpy stpcpy # else /* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in YYDEST. */ static char * yystpcpy (char *yydest, const char *yysrc) { char *yyd = yydest; const char *yys = yysrc; while ((*yyd++ = *yys++) != '\0') continue; return yyd - 1; } # endif # endif # ifndef yytnamerr /* Copy to YYRES the contents of YYSTR after stripping away unnecessary quotes and backslashes, so that it's suitable for yyerror. The heuristic is that double-quoting is unnecessary unless the string contains an apostrophe, a comma, or backslash (other than backslash-backslash). YYSTR is taken from yytname. If YYRES is null, do not copy; instead, return the length of what the result would have been. */ static YYPTRDIFF_T yytnamerr (char *yyres, const char *yystr) { if (*yystr == '"') { YYPTRDIFF_T yyn = 0; char const *yyp = yystr; for (;;) switch (*++yyp) { case '\'': case ',': goto do_not_strip_quotes; case '\\': if (*++yyp != '\\') goto do_not_strip_quotes; else goto append; append: default: if (yyres) yyres[yyn] = *yyp; yyn++; break; case '"': if (yyres) yyres[yyn] = '\0'; return yyn; } do_not_strip_quotes: ; } if (yyres) return yystpcpy (yyres, yystr) - yyres; else return yystrlen (yystr); } # endif /* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message about the unexpected token YYTOKEN for the state stack whose top is YYSSP. Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is not large enough to hold the message. In that case, also set *YYMSG_ALLOC to the required number of bytes. Return 2 if the required number of bytes is too large to store. */ static int yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg, yy_state_t *yyssp, int yytoken) { enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; /* Internationalized format string. */ const char *yyformat = YY_NULLPTR; /* Arguments of yyformat: reported tokens (one for the "unexpected", one per "expected"). */ char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; /* Actual size of YYARG. */ int yycount = 0; /* Cumulated lengths of YYARG. */ YYPTRDIFF_T yysize = 0; /* There are many possibilities here to consider: - If this state is a consistent state with a default action, then the only way this function was invoked is if the default action is an error action. In that case, don't check for expected tokens because there are none. - The only way there can be no lookahead present (in yychar) is if this state is a consistent state with a default action. Thus, detecting the absence of a lookahead is sufficient to determine that there is no unexpected or expected token to report. In that case, just report a simple "syntax error". - Don't assume there isn't a lookahead just because this state is a consistent state with a default action. There might have been a previous inconsistent state, consistent state with a non-default action, or user semantic action that manipulated yychar. - Of course, the expected token list depends on states to have correct lookahead information, and it depends on the parser not to perform extra reductions after fetching a lookahead from the scanner and before detecting a syntax error. Thus, state merging (from LALR or IELR) and default reductions corrupt the expected token list. However, the list is correct for canonical LR with one exception: it will still contain any token that will not be accepted due to an error action in a later state. */ if (yytoken != YYEMPTY) { int yyn = yypact[+*yyssp]; YYPTRDIFF_T yysize0 = yytnamerr (YY_NULLPTR, yytname[yytoken]); yysize = yysize0; yyarg[yycount++] = yytname[yytoken]; if (!yypact_value_is_default (yyn)) { /* Start YYX at -YYN if negative to avoid negative indexes in YYCHECK. In other words, skip the first -YYN actions for this state because they are default actions. */ int yyxbegin = yyn < 0 ? -yyn : 0; /* Stay within bounds of both yycheck and yytname. */ int yychecklim = YYLAST - yyn + 1; int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; int yyx; for (yyx = yyxbegin; yyx < yyxend; ++yyx) if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR && !yytable_value_is_error (yytable[yyx + yyn])) { if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) { yycount = 1; yysize = yysize0; break; } yyarg[yycount++] = yytname[yyx]; { YYPTRDIFF_T yysize1 = yysize + yytnamerr (YY_NULLPTR, yytname[yyx]); if (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM) yysize = yysize1; else return 2; } } } } switch (yycount) { # define YYCASE_(N, S) \ case N: \ yyformat = S; \ break default: /* Avoid compiler warnings. */ YYCASE_(0, YY_("syntax error")); YYCASE_(1, YY_("syntax error, unexpected %s")); YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s")); YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); # undef YYCASE_ } { /* Don't count the "%s"s in the final size, but reserve room for the terminator. */ YYPTRDIFF_T yysize1 = yysize + (yystrlen (yyformat) - 2 * yycount) + 1; if (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM) yysize = yysize1; else return 2; } if (*yymsg_alloc < yysize) { *yymsg_alloc = 2 * yysize; if (! (yysize <= *yymsg_alloc && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; return 1; } /* Avoid sprintf, as that infringes on the user's name space. Don't have undefined behavior even if the translation produced a string with the wrong number of "%s"s. */ { char *yyp = *yymsg; int yyi = 0; while ((*yyp = *yyformat) != '\0') if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) { yyp += yytnamerr (yyp, yyarg[yyi++]); yyformat += 2; } else { ++yyp; ++yyformat; } } return 0; } #endif /* YYERROR_VERBOSE */ /*-----------------------------------------------. | Release the memory associated to this symbol. | `-----------------------------------------------*/ static void yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep) { YYUSE (yyvaluep); if (!yymsg) yymsg = "Deleting"; YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN YYUSE (yytype); YY_IGNORE_MAYBE_UNINITIALIZED_END } /* The lookahead symbol. */ int yychar; /* The semantic value of the lookahead symbol. */ YYSTYPE yylval; /* Number of syntax errors so far. */ int yynerrs; /*----------. | yyparse. | `----------*/ int yyparse (void) { yy_state_fast_t yystate; /* Number of tokens to shift before error messages enabled. */ int yyerrstatus; /* The stacks and their tools: 'yyss': related to states. 'yyvs': related to semantic values. Refer to the stacks through separate pointers, to allow yyoverflow to reallocate them elsewhere. */ /* The state stack. */ yy_state_t yyssa[YYINITDEPTH]; yy_state_t *yyss; yy_state_t *yyssp; /* The semantic value stack. */ YYSTYPE yyvsa[YYINITDEPTH]; YYSTYPE *yyvs; YYSTYPE *yyvsp; YYPTRDIFF_T yystacksize; int yyn; int yyresult; /* Lookahead token as an internal (translated) token number. */ int yytoken = 0; /* The variables used to return semantic value and location from the action routines. */ YYSTYPE yyval; #if YYERROR_VERBOSE /* Buffer for error messages, and its allocated size. */ char yymsgbuf[128]; char *yymsg = yymsgbuf; YYPTRDIFF_T yymsg_alloc = sizeof yymsgbuf; #endif #define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) /* The number of symbols on the RHS of the reduced rule. Keep to zero when no symbol should be popped. */ int yylen = 0; yyssp = yyss = yyssa; yyvsp = yyvs = yyvsa; yystacksize = YYINITDEPTH; YYDPRINTF ((stderr, "Starting parse\n")); yystate = 0; yyerrstatus = 0; yynerrs = 0; yychar = YYEMPTY; /* Cause a token to be read. */ goto yysetstate; /*------------------------------------------------------------. | yynewstate -- push a new state, which is found in yystate. | `------------------------------------------------------------*/ yynewstate: /* In all cases, when you get here, the value and location stacks have just been pushed. So pushing a state here evens the stacks. */ yyssp++; /*--------------------------------------------------------------------. | yysetstate -- set current state (the top of the stack) to yystate. | `--------------------------------------------------------------------*/ yysetstate: YYDPRINTF ((stderr, "Entering state %d\n", yystate)); YY_ASSERT (0 <= yystate && yystate < YYNSTATES); YY_IGNORE_USELESS_CAST_BEGIN *yyssp = YY_CAST (yy_state_t, yystate); YY_IGNORE_USELESS_CAST_END if (yyss + yystacksize - 1 <= yyssp) #if !defined yyoverflow && !defined YYSTACK_RELOCATE goto yyexhaustedlab; #else { /* Get the current used size of the three stacks, in elements. */ YYPTRDIFF_T yysize = yyssp - yyss + 1; # if defined yyoverflow { /* Give user a chance to reallocate the stack. Use copies of these so that the &'s don't force the real ones into memory. */ yy_state_t *yyss1 = yyss; YYSTYPE *yyvs1 = yyvs; /* Each stack pointer address is followed by the size of the data in use in that stack, in bytes. This used to be a conditional around just the two extra args, but that might be undefined if yyoverflow is a macro. */ yyoverflow (YY_("memory exhausted"), &yyss1, yysize * YYSIZEOF (*yyssp), &yyvs1, yysize * YYSIZEOF (*yyvsp), &yystacksize); yyss = yyss1; yyvs = yyvs1; } # else /* defined YYSTACK_RELOCATE */ /* Extend the stack our own way. */ if (YYMAXDEPTH <= yystacksize) goto yyexhaustedlab; yystacksize *= 2; if (YYMAXDEPTH < yystacksize) yystacksize = YYMAXDEPTH; { yy_state_t *yyss1 = yyss; union yyalloc *yyptr = YY_CAST (union yyalloc *, YYSTACK_ALLOC (YY_CAST (YYSIZE_T, YYSTACK_BYTES (yystacksize)))); if (! yyptr) goto yyexhaustedlab; YYSTACK_RELOCATE (yyss_alloc, yyss); YYSTACK_RELOCATE (yyvs_alloc, yyvs); # undef YYSTACK_RELOCATE if (yyss1 != yyssa) YYSTACK_FREE (yyss1); } # endif yyssp = yyss + yysize - 1; yyvsp = yyvs + yysize - 1; YY_IGNORE_USELESS_CAST_BEGIN YYDPRINTF ((stderr, "Stack size increased to %ld\n", YY_CAST (long, yystacksize))); YY_IGNORE_USELESS_CAST_END if (yyss + yystacksize - 1 <= yyssp) YYABORT; } #endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */ if (yystate == YYFINAL) YYACCEPT; goto yybackup; /*-----------. | yybackup. | `-----------*/ yybackup: /* Do appropriate processing given the current state. Read a lookahead token if we need one and don't already have one. */ /* First try to decide what to do without reference to lookahead token. */ yyn = yypact[yystate]; if (yypact_value_is_default (yyn)) goto yydefault; /* Not known => get a lookahead token if don't already have one. */ /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ if (yychar == YYEMPTY) { YYDPRINTF ((stderr, "Reading a token: ")); yychar = yylex (); } if (yychar <= YYEOF) { yychar = yytoken = YYEOF; YYDPRINTF ((stderr, "Now at end of input.\n")); } else { yytoken = YYTRANSLATE (yychar); YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); } /* If the proper action on seeing token YYTOKEN is to reduce or to detect an error, take that action. */ yyn += yytoken; if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) goto yydefault; yyn = yytable[yyn]; if (yyn <= 0) { if (yytable_value_is_error (yyn)) goto yyerrlab; yyn = -yyn; goto yyreduce; } /* Count tokens shifted since error; after three, turn off error status. */ if (yyerrstatus) yyerrstatus--; /* Shift the lookahead token. */ YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); yystate = yyn; YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN *++yyvsp = yylval; YY_IGNORE_MAYBE_UNINITIALIZED_END /* Discard the shifted token. */ yychar = YYEMPTY; goto yynewstate; /*-----------------------------------------------------------. | yydefault -- do the default action for the current state. | `-----------------------------------------------------------*/ yydefault: yyn = yydefact[yystate]; if (yyn == 0) goto yyerrlab; goto yyreduce; /*-----------------------------. | yyreduce -- do a reduction. | `-----------------------------*/ yyreduce: /* yyn is the number of a rule to reduce with. */ yylen = yyr2[yyn]; /* If YYLEN is nonzero, implement the default value of the action: '$$ = $1'. Otherwise, the following line sets YYVAL to garbage. This behavior is undocumented and Bison users should not rely upon it. Assigning to YYVAL unconditionally makes the parser a bit smaller, and it avoids a GCC warning that YYVAL may be used uninitialized. */ yyval = yyvsp[1-yylen]; YY_REDUCE_PRINT (yyn); switch (yyn) { case 7: #line 86 "config_file_yacc.y" { current_name = (yyvsp[-1].string); } #line 1437 "config_file_yacc.c" break; case 8: #line 89 "config_file_yacc.y" { cf_append_sequence_list(); } #line 1443 "config_file_yacc.c" break; case 11: #line 96 "config_file_yacc.y" { cf_append_filter(FILTER_UNCGI, NULL, NULL, 0, 0); } #line 1449 "config_file_yacc.c" break; case 12: #line 98 "config_file_yacc.y" { cf_append_filter(FILTER_LOWER, NULL, NULL, 0, 0); } #line 1455 "config_file_yacc.c" break; case 18: #line 111 "config_file_yacc.y" { cf_append_filter(FILTER_ISO8859_1, NULL, NULL, 0, 0); } #line 1461 "config_file_yacc.c" break; case 19: #line 113 "config_file_yacc.y" { cf_append_filter(FILTER_ISO8859_1, NULL, NULL, 0, 0); } #line 1467 "config_file_yacc.c" break; case 20: #line 115 "config_file_yacc.y" { cf_append_filter(FILTER_ISO8859_1, NULL, (yyvsp[-2].string), 0, 0); } #line 1473 "config_file_yacc.c" break; case 21: #line 117 "config_file_yacc.y" { cf_append_filter(FILTER_ISO8859_1, (yyvsp[-2].string), NULL, 0, 0); } #line 1479 "config_file_yacc.c" break; case 22: #line 120 "config_file_yacc.y" { cf_append_filter(FILTER_UTF_8, NULL, NULL, 0, 0); } #line 1485 "config_file_yacc.c" break; case 23: #line 122 "config_file_yacc.y" { cf_append_filter(FILTER_UTF_8, NULL, NULL, 0, 0); } #line 1491 "config_file_yacc.c" break; case 24: #line 124 "config_file_yacc.y" { cf_append_filter(FILTER_UTF_8, NULL, (yyvsp[-2].string), 0, 0); } #line 1497 "config_file_yacc.c" break; case 25: #line 126 "config_file_yacc.y" { cf_append_filter(FILTER_UTF_8, (yyvsp[-2].string), NULL, 0, 0); } #line 1503 "config_file_yacc.c" break; case 26: #line 129 "config_file_yacc.y" { cf_append_filter(FILTER_SAFE, NULL, NULL, 0, 0); } #line 1509 "config_file_yacc.c" break; case 27: #line 131 "config_file_yacc.y" { cf_append_filter(FILTER_SAFE, NULL, NULL, 0, 0); } #line 1515 "config_file_yacc.c" break; case 28: #line 133 "config_file_yacc.y" { cf_append_filter(FILTER_SAFE, NULL, (yyvsp[-2].string), 0, 0); } #line 1521 "config_file_yacc.c" break; case 29: #line 135 "config_file_yacc.y" { cf_append_filter(FILTER_SAFE, (yyvsp[-2].string), NULL, 0, 0); } #line 1527 "config_file_yacc.c" break; case 30: #line 138 "config_file_yacc.y" { cf_append_filter(FILTER_WIPEUP, NULL, NULL, 0, 0); } #line 1533 "config_file_yacc.c" break; case 31: #line 140 "config_file_yacc.y" { cf_append_filter(FILTER_WIPEUP, NULL, NULL, 0, 0); } #line 1539 "config_file_yacc.c" break; case 32: #line 142 "config_file_yacc.y" { cf_append_filter(FILTER_WIPEUP, NULL, NULL, 0, 1); } #line 1545 "config_file_yacc.c" break; case 33: #line 145 "config_file_yacc.y" { cf_append_filter(FILTER_MAX_LENGTH, NULL, NULL, 0, 0); } #line 1551 "config_file_yacc.c" break; case 34: #line 147 "config_file_yacc.y" { cf_append_filter(FILTER_MAX_LENGTH, NULL, NULL, 0, 0); } #line 1557 "config_file_yacc.c" break; case 35: #line 149 "config_file_yacc.y" { cf_append_filter(FILTER_MAX_LENGTH, NULL, NULL, (yyvsp[-2].nvalue), 0); } #line 1563 "config_file_yacc.c" break; case 41: #line 165 "config_file_yacc.y" { cf_append_ignore_entry((yyvsp[-1].string)); } #line 1571 "config_file_yacc.c" break; case 42: #line 170 "config_file_yacc.y" { (yyval.string) = (yyvsp[0].string); } #line 1577 "config_file_yacc.c" break; case 43: #line 172 "config_file_yacc.y" { (yyval.string) = (yyvsp[0].string); } #line 1583 "config_file_yacc.c" break; #line 1587 "config_file_yacc.c" default: break; } /* User semantic actions sometimes alter yychar, and that requires that yytoken be updated with the new translation. We take the approach of translating immediately before every use of yytoken. One alternative is translating here after every semantic action, but that translation would be missed if the semantic action invokes YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an incorrect destructor might then be invoked immediately. In the case of YYERROR or YYBACKUP, subsequent parser actions might lead to an incorrect destructor call or verbose syntax error message before the lookahead is translated. */ YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); YYPOPSTACK (yylen); yylen = 0; YY_STACK_PRINT (yyss, yyssp); *++yyvsp = yyval; /* Now 'shift' the result of the reduction. Determine what state that goes to, based on the state we popped back to and the rule number reduced by. */ { const int yylhs = yyr1[yyn] - YYNTOKENS; const int yyi = yypgoto[yylhs] + *yyssp; yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp ? yytable[yyi] : yydefgoto[yylhs]); } goto yynewstate; /*--------------------------------------. | yyerrlab -- here on detecting error. | `--------------------------------------*/ yyerrlab: /* Make sure we have latest lookahead translation. See comments at user semantic actions for why this is necessary. */ yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar); /* If not already recovering from an error, report this error. */ if (!yyerrstatus) { ++yynerrs; #if ! YYERROR_VERBOSE yyerror (YY_("syntax error")); #else # define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \ yyssp, yytoken) { char const *yymsgp = YY_("syntax error"); int yysyntax_error_status; yysyntax_error_status = YYSYNTAX_ERROR; if (yysyntax_error_status == 0) yymsgp = yymsg; else if (yysyntax_error_status == 1) { if (yymsg != yymsgbuf) YYSTACK_FREE (yymsg); yymsg = YY_CAST (char *, YYSTACK_ALLOC (YY_CAST (YYSIZE_T, yymsg_alloc))); if (!yymsg) { yymsg = yymsgbuf; yymsg_alloc = sizeof yymsgbuf; yysyntax_error_status = 2; } else { yysyntax_error_status = YYSYNTAX_ERROR; yymsgp = yymsg; } } yyerror (yymsgp); if (yysyntax_error_status == 2) goto yyexhaustedlab; } # undef YYSYNTAX_ERROR #endif } if (yyerrstatus == 3) { /* If just tried and failed to reuse lookahead token after an error, discard it. */ if (yychar <= YYEOF) { /* Return failure if at end of input. */ if (yychar == YYEOF) YYABORT; } else { yydestruct ("Error: discarding", yytoken, &yylval); yychar = YYEMPTY; } } /* Else will try to reuse lookahead token after shifting the error token. */ goto yyerrlab1; /*---------------------------------------------------. | yyerrorlab -- error raised explicitly by YYERROR. | `---------------------------------------------------*/ yyerrorlab: /* Pacify compilers when the user code never invokes YYERROR and the label yyerrorlab therefore never appears in user code. */ if (0) YYERROR; /* Do not reclaim the symbols of the rule whose action triggered this YYERROR. */ YYPOPSTACK (yylen); yylen = 0; YY_STACK_PRINT (yyss, yyssp); yystate = *yyssp; goto yyerrlab1; /*-------------------------------------------------------------. | yyerrlab1 -- common code for both syntax error and YYERROR. | `-------------------------------------------------------------*/ yyerrlab1: yyerrstatus = 3; /* Each real token shifted decrements this. */ for (;;) { yyn = yypact[yystate]; if (!yypact_value_is_default (yyn)) { yyn += YYTERROR; if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) { yyn = yytable[yyn]; if (0 < yyn) break; } } /* Pop the current state because it cannot handle the error token. */ if (yyssp == yyss) YYABORT; yydestruct ("Error: popping", yystos[yystate], yyvsp); YYPOPSTACK (1); yystate = *yyssp; YY_STACK_PRINT (yyss, yyssp); } YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN *++yyvsp = yylval; YY_IGNORE_MAYBE_UNINITIALIZED_END /* Shift the error token. */ YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); yystate = yyn; goto yynewstate; /*-------------------------------------. | yyacceptlab -- YYACCEPT comes here. | `-------------------------------------*/ yyacceptlab: yyresult = 0; goto yyreturn; /*-----------------------------------. | yyabortlab -- YYABORT comes here. | `-----------------------------------*/ yyabortlab: yyresult = 1; goto yyreturn; #if !defined yyoverflow || YYERROR_VERBOSE /*-------------------------------------------------. | yyexhaustedlab -- memory exhaustion comes here. | `-------------------------------------------------*/ yyexhaustedlab: yyerror (YY_("memory exhausted")); yyresult = 2; /* Fall through. */ #endif /*-----------------------------------------------------. | yyreturn -- parsing is finished, return the result. | `-----------------------------------------------------*/ yyreturn: if (yychar != YYEMPTY) { /* Make sure we have latest lookahead translation. See comments at user semantic actions for why this is necessary. */ yytoken = YYTRANSLATE (yychar); yydestruct ("Cleanup: discarding lookahead", yytoken, &yylval); } /* Do not reclaim the symbols of the rule whose action triggered this YYABORT or YYACCEPT. */ YYPOPSTACK (yylen); YY_STACK_PRINT (yyss, yyssp); while (yyssp != yyss) { yydestruct ("Cleanup: popping", yystos[+*yyssp], yyvsp); YYPOPSTACK (1); } #ifndef yyoverflow if (yyss != yyssa) YYSTACK_FREE (yyss); #endif #if YYERROR_VERBOSE if (yymsg != yymsgbuf) YYSTACK_FREE (yymsg); #endif return yyresult; } #line 175 "config_file_yacc.y" extern FILE *yyin; extern FILE *yyout; config_file_t *parse_config_file(char *filename, config_file_t *previous_results, options_t *main_options) { config_file_t *ret = NULL; current_filename = filename; /* * XXX - Should we be closing the default yyin/yyout? If so, should we * be setting them to NULL at the end of this function? */ yyin = fopen(filename, "r"); if (yyin == NULL) { return previous_results; } yyout = fopen("/dev/null", "w"); /* * Initialize the return variable */ if (previous_results) { ret = previous_results; } else { ret = config_file_init(); } /* * Initialize the sequence list */ cf_sequence_ret = NULL; cf_sequence_current = NULL; if (previous_results && previous_results->sequences) { cf_sequence_ret = previous_results->sequences; cf_sequence_current = cf_sequence_ret; while (cf_sequence_current->next != NULL) { cf_sequence_current = cf_sequence_current->next; } } /* * Initialize the ignore list */ if (previous_results && filelist_count(previous_results->files_to_ignore) > 0) { files_to_ignore = previous_results->files_to_ignore; } else { files_to_ignore = filelist_init(); } /* * Reset the sequence entry holding vars */ cf_filter_ret = NULL; cf_filter_current = NULL; do { yyparse(); } while (!feof(yyin)); fclose(yyin); fclose(yyout); /* * Populate returns */ ret->sequences = cf_sequence_ret; ret->files_to_ignore = files_to_ignore; return ret; } void yyerror(char *s) { /* * XXX - Is extern valid here? Does it do what I'm expecting? */ extern char *yytext; fprintf(stderr, "detox: error parsing config file %s: %s\n", current_filename, s); fprintf(stderr, "\tline %d", config_file_lineno); if (yytext != NULL) { fprintf(stderr, ": %s", yytext); } fprintf(stderr, "\n"); exit(EXIT_FAILURE); } void cf_append_sequence_list(void) { sequence_t *work; if (current_name == NULL) { current_name = wrapped_strdup("default"); } work = NULL; if (cf_sequence_ret != NULL) { work = cf_sequence_ret; while (work != NULL) { if (strcmp(work->name, current_name) == 0) { break; } work = work->next; } } if (work != NULL) { /* * XXX - Free Old Tree */ } else { work = sequence_init(current_name); /* * Append to the tree first. If we don't, we could create a * circular reference. */ if (cf_sequence_ret == NULL) { cf_sequence_ret = cf_sequence_current = work; } else { cf_sequence_current->next = work; cf_sequence_current = work; } } work->filters = cf_filter_ret; work->source_filename = wrapped_strdup(current_filename); cf_filter_ret = cf_filter_current = NULL; } void cf_append_filter(int cleaner, char *builtin, char *filename, int max_length, int remove_trailing) { filter_t *work; work = filter_init(cleaner); work->builtin = (builtin == NULL) ? NULL : wrapped_strdup(builtin); work->filename = (filename == NULL) ? NULL : wrapped_strdup(filename); work->max_length = (size_t) max_length; work->remove_trailing = remove_trailing; if (cf_filter_ret == NULL) { cf_filter_ret = cf_filter_current = work; } else { cf_filter_current->next = work; cf_filter_current = work; } } void cf_append_ignore_entry(void *str) { filelist_put(files_to_ignore, str); } detox-2.0.0/src/config_file_yacc.h000066400000000000000000000061001460212773400170420ustar00rootroot00000000000000/* A Bison parser, made by GNU Bison 3.5.1. */ /* Bison interface for Yacc-like parsers in C Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* As a special exception, you may create a larger work that contains part or all of the Bison parser skeleton and distribute that work under terms of your choice, so long as that work isn't itself a parser generator using the skeleton or a modified version thereof as a parser skeleton. Alternatively, if you modify or redistribute the parser skeleton itself, you may (at your option) remove this special exception, which will cause the skeleton and the resulting Bison output files to be licensed under the GNU General Public License without this special exception. This special exception was added by the Free Software Foundation in version 2.2 of Bison. */ /* Undocumented macros, especially those whose name start with YY_, are private implementation details. Do not rely on them. */ #ifndef YY_YY_CONFIG_FILE_YACC_H_INCLUDED # define YY_YY_CONFIG_FILE_YACC_H_INCLUDED /* Debug traces. */ #ifndef YYDEBUG # define YYDEBUG 0 #endif #if YYDEBUG extern int yydebug; #endif /* Token type. */ #ifndef YYTOKENTYPE # define YYTOKENTYPE enum yytokentype { BUILTIN = 258, CLOSE = 259, EOL = 260, FILENAME = 261, IGNORE = 262, ISO8859_1 = 263, LENGTH = 264, LOWER = 265, MAX_LENGTH = 266, OPEN = 267, REMOVE_TRAILING = 268, SAFE = 269, SEQUENCE = 270, UNCGI = 271, UTF_8 = 272, WIPEUP = 273, NVALUE = 274, ID = 275, QSTRING = 276 }; #endif /* Tokens. */ #define BUILTIN 258 #define CLOSE 259 #define EOL 260 #define FILENAME 261 #define IGNORE 262 #define ISO8859_1 263 #define LENGTH 264 #define LOWER 265 #define MAX_LENGTH 266 #define OPEN 267 #define REMOVE_TRAILING 268 #define SAFE 269 #define SEQUENCE 270 #define UNCGI 271 #define UTF_8 272 #define WIPEUP 273 #define NVALUE 274 #define ID 275 #define QSTRING 276 /* Value type. */ #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED union YYSTYPE { #line 43 "config_file_yacc.y" char *string; /* string buffer */ int cmd; /* command value */ int nvalue; /* nvalue */ #line 105 "config_file_yacc.h" }; typedef union YYSTYPE YYSTYPE; # define YYSTYPE_IS_TRIVIAL 1 # define YYSTYPE_IS_DECLARED 1 #endif extern YYSTYPE yylval; int yyparse (void); #endif /* !YY_YY_CONFIG_FILE_YACC_H_INCLUDED */ detox-2.0.0/src/config_file_yacc.y000066400000000000000000000173471460212773400171020ustar00rootroot00000000000000%{ /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "detox_struct.h" #include "config_file.h" #include "filelist.h" #include "filter.h" #include "sequence.h" #include "wrapped.h" /* * I must apologize in advance for the cryptic, global variable names. */ static sequence_t *cf_sequence_ret, *cf_sequence_current; static filter_t *cf_filter_ret, *cf_filter_current; static char *current_name = NULL; static char *current_filename = NULL; static filelist_t *files_to_ignore; void cf_append_sequence_list(void); void cf_append_filter(int cleaner, char *builtin, char *filename, int max_length, int remove_trailing); void cf_append_ignore_entry(void *str); void yyerror (char *s); int yylex (void); %} %union { char *string; /* string buffer */ int cmd; /* command value */ int nvalue; /* nvalue */ } %token BUILTIN %token CLOSE %token EOL %token FILENAME %token IGNORE %token ISO8859_1 %token LENGTH %token LOWER %token MAX_LENGTH %token OPEN %token REMOVE_TRAILING %token SAFE %token SEQUENCE %token UNCGI %token UTF_8 %token WIPEUP %token NVALUE %token ID %token QSTRING %type string %% configfile: | configfile rule ; rule: sequence | ignore ; sequence: sequence_open method_list sequence_close ; sequence_open: SEQUENCE string OPEN { current_name = $2; } ; sequence_close: CLOSE EOL { cf_append_sequence_list(); } ; method_list: method | method_list method ; method: UNCGI EOL { cf_append_filter(FILTER_UNCGI, NULL, NULL, 0, 0); } | LOWER EOL { cf_append_filter(FILTER_LOWER, NULL, NULL, 0, 0); } | wipeup EOL | iso8859_1 EOL | utf_8 EOL | safe EOL | max_length EOL ; iso8859_1: ISO8859_1 { cf_append_filter(FILTER_ISO8859_1, NULL, NULL, 0, 0); } | ISO8859_1 OPEN CLOSE { cf_append_filter(FILTER_ISO8859_1, NULL, NULL, 0, 0); } | ISO8859_1 OPEN FILENAME string EOL CLOSE { cf_append_filter(FILTER_ISO8859_1, NULL, $4, 0, 0); } | ISO8859_1 OPEN BUILTIN string EOL CLOSE { cf_append_filter(FILTER_ISO8859_1, $4, NULL, 0, 0); } ; utf_8: UTF_8 { cf_append_filter(FILTER_UTF_8, NULL, NULL, 0, 0); } | UTF_8 OPEN CLOSE { cf_append_filter(FILTER_UTF_8, NULL, NULL, 0, 0); } | UTF_8 OPEN FILENAME string EOL CLOSE { cf_append_filter(FILTER_UTF_8, NULL, $4, 0, 0); } | UTF_8 OPEN BUILTIN string EOL CLOSE { cf_append_filter(FILTER_UTF_8, $4, NULL, 0, 0); } ; safe: SAFE { cf_append_filter(FILTER_SAFE, NULL, NULL, 0, 0); } | SAFE OPEN CLOSE { cf_append_filter(FILTER_SAFE, NULL, NULL, 0, 0); } | SAFE OPEN FILENAME string EOL CLOSE { cf_append_filter(FILTER_SAFE, NULL, $4, 0, 0); } | SAFE OPEN BUILTIN string EOL CLOSE { cf_append_filter(FILTER_SAFE, $4, NULL, 0, 0); } ; wipeup: WIPEUP { cf_append_filter(FILTER_WIPEUP, NULL, NULL, 0, 0); } | WIPEUP OPEN CLOSE { cf_append_filter(FILTER_WIPEUP, NULL, NULL, 0, 0); } | WIPEUP OPEN REMOVE_TRAILING EOL CLOSE { cf_append_filter(FILTER_WIPEUP, NULL, NULL, 0, 1); } ; max_length: MAX_LENGTH { cf_append_filter(FILTER_MAX_LENGTH, NULL, NULL, 0, 0); } | MAX_LENGTH OPEN CLOSE { cf_append_filter(FILTER_MAX_LENGTH, NULL, NULL, 0, 0); } | MAX_LENGTH OPEN LENGTH NVALUE EOL CLOSE { cf_append_filter(FILTER_MAX_LENGTH, NULL, NULL, $4, 0); } ; ignore: ignore_open ignore_list ignore_close ; ignore_open: IGNORE OPEN ; ignore_close: CLOSE EOL ; ignore_list: ignore_filename | ignore_list ignore_filename ; ignore_filename: FILENAME string EOL { cf_append_ignore_entry($2); } ; string: QSTRING { $$ = $1; } | ID { $$ = $1; } ; %% extern FILE *yyin; extern FILE *yyout; config_file_t *parse_config_file(char *filename, config_file_t *previous_results, options_t *main_options) { config_file_t *ret = NULL; current_filename = filename; /* * XXX - Should we be closing the default yyin/yyout? If so, should we * be setting them to NULL at the end of this function? */ yyin = fopen(filename, "r"); if (yyin == NULL) { return previous_results; } yyout = fopen("/dev/null", "w"); /* * Initialize the return variable */ if (previous_results) { ret = previous_results; } else { ret = config_file_init(); } /* * Initialize the sequence list */ cf_sequence_ret = NULL; cf_sequence_current = NULL; if (previous_results && previous_results->sequences) { cf_sequence_ret = previous_results->sequences; cf_sequence_current = cf_sequence_ret; while (cf_sequence_current->next != NULL) { cf_sequence_current = cf_sequence_current->next; } } /* * Initialize the ignore list */ if (previous_results && filelist_count(previous_results->files_to_ignore) > 0) { files_to_ignore = previous_results->files_to_ignore; } else { files_to_ignore = filelist_init(); } /* * Reset the sequence entry holding vars */ cf_filter_ret = NULL; cf_filter_current = NULL; do { yyparse(); } while (!feof(yyin)); fclose(yyin); fclose(yyout); /* * Populate returns */ ret->sequences = cf_sequence_ret; ret->files_to_ignore = files_to_ignore; return ret; } void yyerror(char *s) { /* * XXX - Is extern valid here? Does it do what I'm expecting? */ extern char *yytext; fprintf(stderr, "detox: error parsing config file %s: %s\n", current_filename, s); fprintf(stderr, "\tline %d", config_file_lineno); if (yytext != NULL) { fprintf(stderr, ": %s", yytext); } fprintf(stderr, "\n"); exit(EXIT_FAILURE); } void cf_append_sequence_list(void) { sequence_t *work; if (current_name == NULL) { current_name = wrapped_strdup("default"); } work = NULL; if (cf_sequence_ret != NULL) { work = cf_sequence_ret; while (work != NULL) { if (strcmp(work->name, current_name) == 0) { break; } work = work->next; } } if (work != NULL) { /* * XXX - Free Old Tree */ } else { work = sequence_init(current_name); /* * Append to the tree first. If we don't, we could create a * circular reference. */ if (cf_sequence_ret == NULL) { cf_sequence_ret = cf_sequence_current = work; } else { cf_sequence_current->next = work; cf_sequence_current = work; } } work->filters = cf_filter_ret; work->source_filename = wrapped_strdup(current_filename); cf_filter_ret = cf_filter_current = NULL; } void cf_append_filter(int cleaner, char *builtin, char *filename, int max_length, int remove_trailing) { filter_t *work; work = filter_init(cleaner); work->builtin = (builtin == NULL) ? NULL : wrapped_strdup(builtin); work->filename = (filename == NULL) ? NULL : wrapped_strdup(filename); work->max_length = (size_t) max_length; work->remove_trailing = remove_trailing; if (cf_filter_ret == NULL) { cf_filter_ret = cf_filter_current = work; } else { cf_filter_current->next = work; cf_filter_current = work; } } void cf_append_ignore_entry(void *str) { filelist_put(files_to_ignore, str); } detox-2.0.0/src/detox.c000066400000000000000000000070241460212773400147230ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include #include #include "detox_struct.h" #include "config_file_dump.h" #include "config_file.h" #include "file.h" #include "filelist.h" #include "parse_options.h" #include "sequence.h" int main(int argc, char **argv) { struct stat stat_info; int err; config_file_t *config_file = NULL; options_t *main_options; char *file_work = NULL; char *file_walk; main_options = parse_options_getopt(argc, argv); if (main_options == NULL) { fprintf(stderr, "detox: an error occurred while parsing command line arguments\n"); exit(EXIT_FAILURE); } config_file = config_file_load(main_options); if (config_file == NULL) { fprintf(stderr, "detox: no config file to work with\n"); exit(EXIT_FAILURE); } /* * Store the files_to_ignore array in the main_options struct for use in * parse_dir/file/special */ main_options->files_to_ignore = config_file->files_to_ignore; /* * Determine which sequence to use */ main_options->sequence_to_use = sequence_choose_default(config_file->sequences, main_options->sequence_name); /* * List sequences */ if (main_options->list_sequences) { dump_config_file(config_file, main_options); exit(EXIT_SUCCESS); } /* * Fail if no sequence is available */ if (main_options->sequence_to_use == NULL) { /* * XXX - Explain this better */ fprintf(stderr, "detox: no sequence to work with\n"); exit(EXIT_FAILURE); } /* * Check translation tables */ sequence_review(main_options->sequence_to_use); /* * Do some actual work */ if (!main_options->is_inline_mode) { while ((file_walk = filelist_get(main_options->files))) { if (main_options->verbose) { printf("Scanning: %s\n", file_walk); } err = lstat(file_walk, &stat_info); if (err == -1) { fprintf(stderr, "%s: %s\n", file_walk, strerror(errno)); } else { if (S_ISDIR(stat_info.st_mode)) { file_work = parse_file(file_walk, main_options); parse_dir(file_work, main_options); free(file_work); } else if (S_ISREG(stat_info.st_mode)) { parse_file(file_walk, main_options); } else if (main_options->special) { parse_file(file_walk, main_options); } } } } else { if (filelist_count(main_options->files) > 0) { while ((file_walk = filelist_get(main_options->files))) { err = lstat(file_walk, &stat_info); if (err == -1) { fprintf(stderr, "%s: %s\n", file_walk, strerror(errno)); } else { if (S_ISDIR(stat_info.st_mode)) { fprintf(stderr, "%s: is a directory\n", file_walk); } else { parse_inline(file_walk, NULL, main_options); } } } } else { parse_inline(NULL, NULL, main_options); } } return 0; } detox-2.0.0/src/detox_struct.h000066400000000000000000000035321460212773400163340ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __DETOX_STRUCT_H #define __DETOX_STRUCT_H #include #define MAX_PATH_LEN 256 enum { FILTER_ISO8859_1 = 1, FILTER_LOWER, FILTER_MAX_LENGTH, FILTER_SAFE, FILTER_UNCGI, FILTER_UTF_8, FILTER_WIPEUP }; typedef struct { char **files; int max; int count; int ptr; } filelist_t; typedef struct { unsigned int key; char *data; } table_row_t; typedef struct { int length; int used; int max_data_length; char *default_translation; table_row_t *rows; int hits; int misses; int overwrites; int seeks; int use_hash; int builtin; int max_key; } table_t; typedef struct filter_t_ref { struct filter_t_ref *next; int cleaner; char *filename; char *builtin; int remove_trailing; size_t max_length; table_t *table; } filter_t; /* * Holds information about all of the defined sequences */ typedef struct sequence_t_ref { struct sequence_t_ref *next; char *name; filter_t *filters; char *source_filename; } sequence_t; /* * Holds the result of a config file parse */ typedef struct { sequence_t *sequences; filelist_t *files_to_ignore; } config_file_t; /** * Holds options that affect the entire operation of the program. */ typedef struct { int dry_run; int is_inline_bin; int is_inline_mode; int list_sequences; int recurse; int special; int verbose; sequence_t *sequence_to_use; filelist_t *files_to_ignore; char *sequence_name; char *check_config_file; filelist_t *files; } options_t; #endif /* __DETOX_STRUCT_H */ detox-2.0.0/src/escape_utf_8.c000066400000000000000000000101771460212773400161500ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "config.h" #include #include #include #include #include "file.h" #include "filter.h" #include "parse_options.h" #include "sequence.h" #include "table.h" #include "table_dump.h" #include "wrapped.h" #define MAX_LENGTH 16 /** * Generated from c_escape.tbl */ static table_row_t builtin_c_escape_rows[36] = { { .key = 0x0001, .data = "\\x01" }, { .key = 0x0002, .data = "\\x02" }, { .key = 0x0003, .data = "\\x03" }, { .key = 0x0004, .data = "\\x04" }, { .key = 0x0005, .data = "\\x05" }, { .key = 0x0006, .data = "\\x06" }, { .key = 0x0007, .data = "\\a" }, { .key = 0x0008, .data = "\\b" }, { .key = 0x0009, .data = "\\t" }, { .key = 0x000a, .data = "\\n" }, { .key = 0x000b, .data = "\\v" }, { .key = 0x000c, .data = "\\f" }, { .key = 0x000d, .data = "\\r" }, { .key = 0x000e, .data = "\\x0e" }, { .key = 0x000f, .data = "\\x0f" }, { .key = 0x0010, .data = "\\x10" }, { .key = 0x0011, .data = "\\x11" }, { .key = 0x0012, .data = "\\x12" }, { .key = 0x0013, .data = "\\x13" }, { .key = 0x0014, .data = "\\x14" }, { .key = 0x0015, .data = "\\x15" }, { .key = 0x0016, .data = "\\x16" }, { .key = 0x0017, .data = "\\x17" }, { .key = 0x0018, .data = "\\x18" }, { .key = 0x0019, .data = "\\x19" }, { .key = 0x001a, .data = "\\x1a" }, { .key = 0x001b, .data = "\\x1b" }, { .key = 0x001c, .data = "\\x1c" }, { .key = 0x001d, .data = "\\x1d" }, { .key = 0x001e, .data = "\\x1e" }, { .key = 0x001f, .data = "\\x1f" }, { .key = 0x0022, .data = "\\\"" }, { .key = 0x0027, .data = "\\'" }, { .key = 0x003f, .data = "\\?" }, { .key = 0x005c, .data = "\\\\" }, { .key = 0x007f, .data = "\\x7f" }, }; static table_t builtin_c_escape_table = { .length = 36, .used = 36, .max_data_length = 4, .max_key = 0x007f, .hits = 0, .misses = 0, .seeks = 0, .overwrites = 0, .use_hash = 0, .builtin = 1, .rows = builtin_c_escape_rows, .default_translation = NULL, }; /** * Generates a translation table that escapes everything not between 0x20 and * 0x7E (inclusive). * * For characters 0x01-0x1F and 0x7F, a \x escape sequence is used. This could * be improved, e.g. use \n instead of \x0A and \t instead of \x09. * * For characters 0x80- */ table_t *generate_c_escape_table(void) { table_t *table; char *work; int i; work = wrapped_malloc(MAX_LENGTH); table = table_resize(&builtin_c_escape_table, 0x110000, 1); for (i = 0x20; i < 0x7F; i++) { if (table_get(table, i) == NULL) { snprintf(work, MAX_LENGTH, "%c", i); table_put(table, i, work); } } for (i = 0x80; i < 0x10000; i++) { snprintf(work, MAX_LENGTH, "\\u%04X", i); table_put(table, i, work); } for (; i < 0x110000; i++) { snprintf(work, MAX_LENGTH, "\\U%08X", i); table_put(table, i, work); } free(work); #ifdef DEBUG table_reset(table); #endif return table; } /** * Reads from stdin and escapes everything so that: * * a) all Unicode is obvious, and * b) the output is safe to use in a C string. */ int main(void) { table_t *table; options_t *options; sequence_t *sequence; filter_t *filter; table = generate_c_escape_table(); options = options_init(); options->sequence_to_use = sequence = sequence_init("utf_8-escape"); sequence->source_filename = wrapped_strdup(__FILE__); sequence->filters = filter = filter_init(FILTER_UTF_8); filter->filename = wrapped_strdup(__FILE__); filter->table = table; parse_inline(NULL, NULL, options); #ifdef DEBUG printf("\n\n"); table_stats(table); table_free(table); #endif } detox-2.0.0/src/file.c000066400000000000000000000247321460212773400145240ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "config.h" #include #include #include #include #include #include #include "clean_utf_8.h" #include "file.h" #include "filelist.h" #include "sequence.h" #include "wrapped.h" /** * Determines if the file should be ignored * * @param filename The file to check * @param options The main options * * @return bool */ static int ignore_file(const char *filename, const options_t *options) { char *file_walk; if (filename[0] == '.') { return 1; } while ((file_walk = filelist_get(options->files_to_ignore))) { if (strcmp(filename, file_walk) == 0) { return 1; } } return 0; } /** * Whether or not the file is "." or ".." * * @param filename The file to check * * @return bool */ static int is_protected(const char *filename) { if (filename == NULL) { return 0; } return (filename[0] == '.' && (filename[1] == '\0' || (filename[1] == '.' && filename[2] == '\0'))); } /* * Renames file to a safe filename. */ char *parse_file(char *filename, options_t *options) { char *old_filename, *old_filename_ptr, *new_filename; char *work, *hold; struct stat stat_info_old; struct stat stat_info_new; int err; size_t len; len = strlen(filename) + 1; old_filename = wrapped_malloc(len); memcpy(old_filename, filename, len); old_filename_ptr = strrchr(old_filename, '/'); if (old_filename_ptr != NULL) { old_filename_ptr++; } else { old_filename_ptr = old_filename; } /* * Do the actual filename cleaning */ work = wrapped_strdup(old_filename_ptr); if (is_protected(work)) { work = NULL; } hold = sequence_run_filters(options->sequence_to_use, work); if (work != NULL) { free(work); } work = hold; if (work == NULL) { return old_filename; } /* check to see if nothing changed */ if (strcmp(old_filename_ptr, work) == 0) { return old_filename; } len = (old_filename_ptr - old_filename); new_filename = wrapped_malloc(len + strlen(work) + 1); strncpy(new_filename, old_filename, len); strcpy(new_filename + len, work); free(work); err = lstat(old_filename, &stat_info_old); if (err == -1) { free(new_filename); return old_filename; } err = lstat(new_filename, &stat_info_new); if (err != -1) { // New file exists if (stat_info_old.st_dev != stat_info_new.st_dev || // Different device stat_info_old.st_ino != stat_info_new.st_ino || // Different inode stat_info_old.st_nlink > 1) { // More than one hard link fprintf(stderr, "Cannot rename %s to %s: file already exists\n", old_filename, new_filename); free(new_filename); return old_filename; } } if (options->verbose || options->dry_run) { printf("%s -> %s\n", old_filename, new_filename); } if (options->dry_run) { free(new_filename); return old_filename; } err = rename(old_filename, new_filename); if (err == -1) { fprintf(stderr, "Cannot rename %s to %s: %s\n", old_filename, new_filename, strerror(errno)); free(new_filename); return old_filename; } free(old_filename); return new_filename; } /* * Handles directory. */ void parse_dir(char *filename, options_t *options) { char *new_file, *work; DIR *dir_handle; struct dirent *dir_entry; struct stat stat_info; int check_file; int err; size_t new_file_length; err = lstat(filename, &stat_info); if (err == -1) { return; } if (!S_ISDIR(stat_info.st_mode)) { return; } new_file_length = strlen(filename) + 1024; new_file = wrapped_malloc(new_file_length); /* * Parse directory */ dir_handle = opendir(filename); if (dir_handle == NULL) { fprintf(stderr, "unable to parse: %s\n", strerror(errno)); free(new_file); // too many open files if (errno == EMFILE) { exit(EXIT_FAILURE); } return; } dir_entry = readdir(dir_handle); while (dir_entry != NULL) { /* * Check for files that need to be ignored */ check_file = !ignore_file(dir_entry->d_name, options); if (check_file) { snprintf(new_file, new_file_length, "%s/%s", filename, dir_entry->d_name); lstat(new_file, &stat_info); if (S_ISDIR(stat_info.st_mode)) { work = parse_file(new_file, options); if (options->recurse) { parse_dir(work, options); } free(work); } else if (S_ISREG(stat_info.st_mode) || options->special) { work = parse_file(new_file, options); free(work); } } dir_entry = readdir(dir_handle); } closedir(dir_handle); } /** * Parses files in a stream, either read from STDIN, or a filename. Writes the * results to a stream, either STDOUT, or a filename. * * @param in_filename Filename to read from. Set to NULL to use STDIN. * @param out_filename Filename to write to. Set to NULL to use STDOUT. * @param options Detox options. */ void parse_inline(char *in_filename, char *out_filename, options_t *options) { FILE *in_fp; FILE *out_fp; size_t buf_size; size_t padding; char *base; char *hold; char *seek; char *work; int err; int has_newline; int remaining; if (in_filename != NULL) { if (!(in_fp = fopen(in_filename, "r"))) { fprintf(stderr, "%s: %s\n", in_filename, strerror(errno)); return; } } else { in_fp = stdin; } if (out_filename != NULL) { if (!(out_fp = fopen(out_filename, "w"))) { fprintf(stderr, "%s: %s\n", out_filename, strerror(errno)); return; } } else { out_fp = stdout; } buf_size = INLINE_BUF_SIZE; padding = INLINE_BUF_PADDING; base = wrapped_malloc(buf_size); while (fgets(base, buf_size - padding, in_fp)) { hold = strrchr(base, '\n'); if (hold == NULL) { #ifdef DEBUG fprintf(stderr, "detox: debug: fgets() didn't find a new line\n"); #endif // // Check to see if we stopped in the middle of a UTF-8 character. // hold = seek = strchr(base, '\0'); seek--; #ifdef DEBUG if (is_utf_8_cont(*seek)) { fprintf(stderr, "detox: debug: looks like we're in the middle of a UTF-8 character\n"); } #endif while (is_utf_8_cont(*seek) && (hold - seek) < UTF_8_MAX_LENGTH) { #ifdef DEBUG fprintf(stderr, "detox: debug: at %02x, moving back one\n", (unsigned char) *seek); #endif seek--; } #ifdef DEBUG fprintf(stderr, "detox: debug: now at %02x\n", (unsigned char) *seek); #endif if (is_utf_8_start(*seek)) { #ifdef DEBUG fprintf(stderr, "detox: debug: at %02x, this is the start of a UTF-8 char, seeking ahead\n", (unsigned char) *seek); #endif remaining = get_utf_8_width(*seek); do { #ifdef DEBUG fprintf(stderr, "detox: debug: at %02x\n", (unsigned char) *seek); #endif seek++; remaining--; } while (remaining > 0 && is_utf_8_cont(*seek)); #ifdef DEBUG fprintf(stderr, "detox: debug: done with initial seek at %02x\n", (unsigned char) *seek); #endif if (remaining > 0 && *seek == '\0') { #ifdef DEBUG fprintf(stderr, "detox: debug: we still need more data\n"); #endif // try bringing bytes in one at a time while (remaining > 0) { err = fgetc(in_fp); if (err == EOF) { #ifdef DEBUG fprintf(stderr, "detox: debug: hit EOF\n"); #endif break; } *seek = err; #ifdef DEBUG fprintf(stderr, "detox: debug: read %02x\n", (unsigned char) *seek); #endif // but if it isn't a UTF-8 continuation byte if (!is_utf_8_cont(*seek)) { #ifdef DEBUG fprintf(stderr, "detox: debug: %02x is not a UTF-8 continuation byte\n", (unsigned char) *seek); #endif // try to push it back on to the buffer err = ungetc(*seek, in_fp); if (err == EOF) { fprintf(stderr, "detox: warning: stream push back failed\n"); seek++; } remaining = 0; break; } seek++; remaining--; } *seek = '\0'; } #ifdef DEBUG fprintf(stderr, "detox: debug: done with secondary seek at %02x\n", (unsigned char) *seek); #endif } } // if we did fgetc(), and ungetc() failed, this might return // differently from before hold = strrchr(base, '\n'); if (hold == NULL) { has_newline = 0; } else { has_newline = 1; *hold = '\0'; } work = wrapped_strdup(base); if (is_protected(work)) { work = NULL; } hold = sequence_run_filters(options->sequence_to_use, work); if (work != NULL) { free(work); } work = hold; // // we're using printf("%s", ...) because don't know what is in the // string... // if (work != NULL) { fprintf(out_fp, "%s", work); free(work); } else { fprintf(out_fp, "%s", base); } if (has_newline) { fprintf(out_fp, "\n"); } } if (in_filename != NULL) { fclose(in_fp); } if (out_filename != NULL) { fclose(out_fp); } } detox-2.0.0/src/file.h000066400000000000000000000011201460212773400145130ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __FILE_H #define __FILE_H #include "detox_struct.h" #define INLINE_BUF_SIZE 1024 #define INLINE_BUF_PADDING 16 extern char *parse_file(char *filename, options_t *options); extern void parse_dir(char *filename, options_t *options); extern void parse_inline(char *in_filename, char *out_filename, options_t *options); #endif /* __FILE_H */ detox-2.0.0/src/filelist.c000066400000000000000000000050221460212773400154070ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "filelist.h" #include "wrapped.h" #define FILELIST_CHUNK 16 /** * Returns the current count of files in the list. * * @param list */ int filelist_count(filelist_t *list) { if (list == NULL) { return 0; } return list->count; } /** * Releases memory currently held by a list. Do *not* use the list after * calling this function on it. * * @param list */ void filelist_free(filelist_t *list) { int i; if (list == NULL) { return; } for (i = 0; i < list->count; i++) { free(list->files[i]); } free(list->files); free(list); } /** * Retrieves the next member of the filelist. * * @param list * * @return */ char *filelist_get(filelist_t *list) { if (list == NULL || list->count == 0) { return NULL; } if (list->ptr == list->count) { list->ptr = 0; return NULL; } return list->files[list->ptr++]; } /** * Initializes a new filelist. * * @return */ filelist_t *filelist_init(void) { filelist_t *list; list = wrapped_malloc(sizeof(filelist_t)); memset(list, 0, sizeof(filelist_t)); list->max = FILELIST_CHUNK; list->files = wrapped_malloc(sizeof(char *) * list->max); return list; } /** * Adds a file to a file list. * * @param list * @param file */ void filelist_put(filelist_t *list, const char *file) { if (list == NULL) { fprintf(stderr, "detox: could not write to a null filelist\n"); exit(EXIT_FAILURE); } if (list->count == list->max) { char **new_files; int new_max; size_t new_size; int i; new_max = list->max + FILELIST_CHUNK; new_size = sizeof(char *) * new_max; new_files = wrapped_malloc(new_size); memset(new_files, 0, new_size); for (i = 0; i < list->count; i++) { new_files[i] = list->files[i]; } free(list->files); list->files = new_files; list->max = new_max; } list->files[list->count++] = wrapped_strdup(file); filelist_reset(list); } /** * Resets the internal pointer of a file list. * * @param list */ inline void filelist_reset(filelist_t *list) { if (list == NULL) { return; } list->ptr = 0; } detox-2.0.0/src/filelist.h000066400000000000000000000011541460212773400154160ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __FILELIST_H #define __FILELIST_H #include "detox_struct.h" extern int filelist_count(filelist_t *list); extern void filelist_free(filelist_t *list); extern char *filelist_get(filelist_t *list); extern filelist_t *filelist_init(void); extern void filelist_put(filelist_t *list, const char *file); extern void filelist_reset(filelist_t *list); #endif /* __FILELIST_H */ detox-2.0.0/src/filter.c000066400000000000000000000133401460212773400150630ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include #include "detox_struct.h" #include "builtin_table.h" #include "clean_string.h" #include "clean_utf_8.h" #include "parse_table.h" #include "wrapped.h" /** * Attempts to find a translation table associated with a filter. * * @param check_filename The filename to look for. * * @return */ table_t *filter_find_table(const char *check_filename) { table_t *table = NULL; int err; char *check_config_file; check_config_file = alloca(MAX_PATH_LEN); if (check_config_file == NULL) { fprintf(stderr, "out of memory: %s\n", strerror(errno)); exit(EXIT_FAILURE); } #ifdef DATADIR err = snprintf(check_config_file, MAX_PATH_LEN, "%s/detox/%s", DATADIR, check_filename); if (err < MAX_PATH_LEN) { table = parse_table(check_config_file); if (table != NULL) { return table; } } #endif err = snprintf(check_config_file, MAX_PATH_LEN, "/usr/share/detox/%s", check_filename); if (err < MAX_PATH_LEN) { table = parse_table(check_config_file); if (table != NULL) { return table; } } err = snprintf(check_config_file, MAX_PATH_LEN, "/usr/local/share/detox/%s", check_filename); if (err < MAX_PATH_LEN) { table = parse_table(check_config_file); } return table; } filter_t *filter_init(int cleaner) { filter_t *ret; ret = wrapped_malloc(sizeof(filter_t)); memset(ret, 0, sizeof(filter_t)); ret->cleaner = cleaner; return ret; } /** * Uses a builtin table for a filter. * * @param filter The filter to load a builtin for. */ table_t *filter_load_builtin(filter_t *filter) { if (filter->cleaner == FILTER_ISO8859_1) { return load_builtin_iso8859_1_table(); } else if (filter->cleaner == FILTER_UTF_8) { return load_builtin_unicode_table(); } else if (filter->cleaner == FILTER_SAFE) { return load_builtin_safe_table(); } return NULL; } /** * Uses a builtin table for a filter. * * Valid values: * - builtin:cp1252 * - builtin:iso8859_1 * - builtin:safe * - builtin:unicode * * @param filename Builtin filename to use */ table_t *filter_load_builtin_by_filename(const char *filename) { if (strcmp(filename, "iso8859_1") == 0) { return load_builtin_iso8859_1_table(); } else if (strcmp(filename, "unicode") == 0) { return load_builtin_unicode_table(); } else if (strcmp(filename, "safe") == 0) { return load_builtin_safe_table(); } else if (strcmp(filename, "cp1252") == 0) { return load_builtin_cp1252_table(); } return NULL; } /** * Loads the translation table associated with a filter. * * @param filter The filter to check. */ table_t *filter_load_table(filter_t *filter) { table_t *table = NULL; char *check_filename = NULL; int do_search = 1; if (filter->cleaner == FILTER_ISO8859_1) { check_filename = "iso8859_1.tbl"; } else if (filter->cleaner == FILTER_UTF_8) { check_filename = "unicode.tbl"; } else if (filter->cleaner == FILTER_SAFE) { check_filename = "safe.tbl"; } else { return NULL; } if (filter->builtin != NULL) { table = filter_load_builtin_by_filename(filter->builtin); if (table == NULL) { fprintf(stderr, "detox: unable to locate builtin table \"%s\"\n", filter->builtin); exit(EXIT_FAILURE); } return table; } if (filter->filename != NULL) { check_filename = filter->filename; do_search = 0; } if (do_search) { table = filter_find_table(check_filename); if (table != NULL) { return table; } // load builtin translation tables table = filter_load_builtin(filter); if (table == NULL) { fprintf(stderr, "detox: unable to locate translation table or fall back\n"); exit(EXIT_FAILURE); } } else { table = parse_table(check_filename); if (table == NULL) { fprintf(stderr, "detox: unable to parse file: \"%s\"\n", check_filename); exit(EXIT_FAILURE); } } return table; } /** * Runs a filter. * * @param filter The filter to run. * @param filename The filename to run through the filter. * * @return The filtered filename, or NULL if a problem was encountered. */ char *filter_run(filter_t *filter, char *filename) { char *ret; if (filter == NULL) { fprintf(stderr, "internal error\n"); exit(EXIT_FAILURE); } if (filename == NULL) { return NULL; } switch (filter->cleaner) { case FILTER_ISO8859_1: ret = clean_iso8859_1(filename, filter->table); break; case FILTER_LOWER: ret = clean_lower(filename); break; case FILTER_MAX_LENGTH: ret = clean_max_length(filename, filter->max_length); break; case FILTER_SAFE: ret = clean_safe(filename, filter->table); break; case FILTER_UNCGI: ret = clean_uncgi(filename); break; case FILTER_UTF_8: ret = clean_utf_8(filename, filter->table); break; case FILTER_WIPEUP: ret = clean_wipeup(filename, filter->remove_trailing); break; default: fprintf(stderr, "detox: unknown filter %d\n", filter->cleaner); exit(EXIT_FAILURE); } return ret; } detox-2.0.0/src/filter.h000066400000000000000000000012251460212773400150670ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef FILTER_H #define FILTER_H #include "detox_struct.h" extern table_t *filter_find_table(const char *check_filename); extern filter_t *filter_init(int cleaner); extern table_t *filter_load_builtin_by_filename(const char *filename); extern table_t *filter_load_builtin(filter_t *filter); extern table_t *filter_load_table(filter_t *filter); extern char *filter_run(filter_t *filter, char *work); #endif //FILTER_H detox-2.0.0/src/generate_builtin_table.c000066400000000000000000000062451460212773400202730ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ /** * Helper utility that generates conversion tables. */ #include #include #include #include #include "builtin_table.h" #include "parse_table.h" #include "table.h" #include "wrapped.h" /** * Escapes a string for this particular case. * * @param in * * @return */ static char *escape_string(char *in) { char *ret, *work; ret = work = wrapped_malloc((strlen(in) * 2) + 1); while (*in != '\0') { switch (*in) { case '"': case '\\': *work++ = '\\'; *work++ = *in++; break; default: *work++ = *in++; break; } } *work = '\0'; return ret; } static void generate_loader(char *filename) { table_t *table; table_t *hold_table; int i; hold_table = parse_table(filename); table = table_resize(hold_table, hold_table->max_key + 1, 1); printf( "\n" "/**\n" " * Generated from %s\n" " */\n\n", basename(filename) ); // printf( "static table_row_t builtin_NEW_rows[%d] = {\n", table->used ); for (i = 0; i < table->length; i++) { if (table->rows[i].key == 0) { continue; } printf( " { " ".key = 0x%04x, " ".data = \"%s\" " "},\n", table->rows[i].key, escape_string(table->rows[i].data) ); } printf( "};\n\n" ); // printf( "static table_t builtin_NEW_table = {\n" " .length = %d,\n" " .used = %d,\n" " .max_data_length = %d,\n" " .max_key = 0x%04x,\n" " .hits = 0,\n" " .misses = 0,\n" " .seeks = 0,\n" " .overwrites = %d,\n" " .use_hash = 0,\n" " .builtin = 1,\n" " .rows = builtin_NEW_rows,\n", table->used, table->used, table->max_data_length, table->max_key, table->overwrites ); if (table->default_translation == NULL) { printf( " .default_translation = NULL,\n" ); } else { printf( " .default_translation = \"%s\",\n", table->default_translation ); } printf( "};\n\n" ); // printf( "table_t *load_builtin_NEW_table(void)\n" "{\n" " return table_resize(&builtin_NEW_table, %d, 1);\n" "}\n", ((int) ceil(table->used / BUILTIN_TABLE_MULTIPLE) + 1) * BUILTIN_TABLE_MULTIPLE ); } int main(int argc, char **argv) { if (argc == 1) { fprintf(stderr, "please specify a file to operate on\n"); return -1; } generate_loader(argv[1]); return 0; } detox-2.0.0/src/parse_options.c000066400000000000000000000145451460212773400164730ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "config.h" #include #include #include #include #include "detox_struct.h" #include "filelist.h" #include "parse_options.h" #include "wrapped.h" #ifdef HAVE_GETOPT_LONG #include #endif enum { LONG_OPTION_INLINE = 1, LONG_OPTION_RECURSIVE, LONG_OPTION_SPECIAL }; #define INLINE_DETOX_BIN "inline-detox" /* expect this to be overwritten! */ static int long_option = 0; #ifdef HAVE_GETOPT_LONG static struct option longopts[] = { /* long options with equivalents */ {"dry-run", no_argument, NULL, 'n'}, {"help", no_argument, NULL, 'h'}, {"verbose", no_argument, NULL, 'v'}, /* long options without */ {"inline", no_argument, &long_option, LONG_OPTION_INLINE}, {"recursive", no_argument, &long_option, LONG_OPTION_RECURSIVE}, {"special", no_argument, &long_option, LONG_OPTION_SPECIAL}, /* done */ {NULL, 0, NULL, 0} }; #endif /* *INDENT-OFF* */ char usage_message[] = { "usage: detox [-hLnrvV] [-f configfile] [-s sequence]" #ifdef HAVE_GETOPT_LONG " [--dry-run] [--help] [--inline] [--recursive] [--special] [--verbose]" "\n\t " #endif " file [file ...]\n" }; char help_message[] = { " -f configfile choose which config file to use\n" #ifdef HAVE_GETOPT_LONG " -h --help this message\n" #else " -h this message\n" #endif #ifdef HAVE_GETOPT_LONG " --inline run inline mode\n" #endif " -L list available sequences and exit\n" " with -v ... dump sequence contents\n" #ifdef HAVE_GETOPT_LONG " -n --dry-run do a dry run (don't actually do anything)\n" " -r --recursive be recursive (descend into subdirectories)\n" #else " -n do a dry run (don't actually do anything)\n" " -r be recursive (descend into subdirectories)\n" #endif " -s sequence choose which sequence to detox with\n" #ifdef HAVE_GETOPT_LONG " --special work on links and special files\n" #endif " -v --verbose be verbose\n" " -V show the current version\n" }; char usage_message_inline[] = { "usage: inline-detox [-hLvV] [-f configfile] [-s sequence] [file]\n" }; char help_message_inline[] = { " -f configfile choose which config file to use\n" #ifdef HAVE_GETOPT_LONG " -h --help this message\n" #else " -h this message\n" #endif " -L list available sequences and exit\n" " with -v ... dump sequence contents\n" " -s sequence choose which sequence to detox with\n" " -v --verbose be verbose\n" " -V show the current version\n" }; /* *INDENT-ON* */ options_t *options_init(void) { options_t *ret; ret = wrapped_malloc(sizeof(options_t)); memset(ret, 0, sizeof(options_t)); return ret; } options_t *parse_options_getopt(int argc, char **argv) { int optcode; options_t *main_options; char *binname; main_options = options_init(); main_options->sequence_name = getenv("DETOX_SEQUENCE"); binname = basename(argv[0]); main_options->is_inline_bin = main_options->is_inline_mode = (strcmp(binname, INLINE_DETOX_BIN) == 0); #ifdef HAVE_GETOPT_LONG while ((optcode = getopt_long(argc, argv, "hrvV?Ls:f:n", longopts, NULL)) != -1) { #else while ((optcode = getopt(argc, argv, "hrvV?Ls:f:n")) != -1) { #endif switch (optcode) { case 'h': printf("%s", !main_options->is_inline_bin ? usage_message : usage_message_inline); printf("\n"); printf("%s", !main_options->is_inline_bin ? help_message : help_message_inline); exit(EXIT_SUCCESS); case 'f': /* * XXX - free multiple check_config_files */ main_options->check_config_file = wrapped_strdup(optarg); break; case 'L': main_options->list_sequences = 1; break; case 'n': main_options->dry_run = 1; break; case 'r': main_options->recurse = 1; break; case 's': /* * XXX - free multiple sequence name opts */ main_options->sequence_name = wrapped_strdup(optarg); break; case 'v': main_options->verbose++; break; case 'V': printf("%s\n", PACKAGE_STRING); exit(EXIT_SUCCESS); case '?': printf("%s", !main_options->is_inline_bin ? usage_message : usage_message_inline); exit(EXIT_SUCCESS); case 0: switch (long_option) { case LONG_OPTION_INLINE: main_options->is_inline_mode = 1; break; case LONG_OPTION_RECURSIVE: main_options->recurse = 1; break; case LONG_OPTION_SPECIAL: main_options->special = 1; break; default: /* * getopt_long shouldn't let us get here... * verify? */ printf("unknown option: %s\n", optarg); break; } long_option = 0; /* clean up! */ break; default: fprintf(stderr, "unknown option: %c\n", optcode); exit(EXIT_FAILURE); } } if (main_options->list_sequences) { /* * Early Retirement */ return main_options; } main_options->files = filelist_init(); if (optind < argc) { while (optind < argc) { filelist_put(main_options->files, argv[optind]); optind++; } } else if (!main_options->is_inline_mode) { printf("%s", !main_options->is_inline_bin ? usage_message : usage_message_inline); exit(EXIT_FAILURE); } return main_options; } detox-2.0.0/src/parse_options.h000066400000000000000000000011121460212773400164620ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. * */ #ifndef __PARSE_OPTIONS_H #define __PARSE_OPTIONS_H #include "detox_struct.h" extern char usage_message[]; extern char help_message[]; extern char usage_message_inline[]; extern char help_message_inline[]; extern options_t *options_init(void); extern options_t *parse_options_getopt(int argc, char **argv); #endif /* __PARSE_OPTIONS_H */ detox-2.0.0/src/parse_table.c000066400000000000000000000110161460212773400160550ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include #include #include #include "table.h" #include "wrapped.h" #define LINE_LENGTH 6 enum { BASE_STATE, INSIDE_STATE }; table_t *parse_table(char *filename) { FILE *table_file; char *work; int code; int offset; char *parsed; int err; int size; int ret; int state; char *system_ctype; table_t *table; struct stat table_stat; err = stat(filename, &table_stat); if (err == -1) { return NULL; } system_ctype = setlocale(LC_CTYPE, ""); if (system_ctype == NULL) { system_ctype = ""; // I don't think we can free the return from setlocale() } size = 0; if (table_stat.st_size > 0) { size = table_stat.st_size / LINE_LENGTH; } #ifdef HAVE_STRUCT_STAT_ST_BLOCKS else { size = (512 * table_stat.st_blocks) / LINE_LENGTH; } #endif if (size < 500) { size = 500; } table = table_init(size); if (table == NULL) { return NULL; } table_file = fopen(filename, "r"); if (table_file == NULL) { fprintf(stderr, "Unable to open translation table: %s\n", strerror(errno)); return NULL; } work = wrapped_malloc(1024); parsed = wrapped_malloc(1024); state = BASE_STATE; while (fgets(work, 1024, table_file) != NULL) { if (*work == '#') { /* * Don't even bother */ continue; } parsed[0] = '\0'; if (state == BASE_STATE) { ret = sscanf(work, " %s %n", parsed, &offset); if (ret == 0) { continue; } if (strncasecmp(parsed, "start", 5) == 0) { if (work[offset] == '\0') { // All languages state = INSIDE_STATE; continue; } if (work[offset] == '"') { sscanf(work + offset + 1, "%[^\"]", parsed); } else if (work[offset] == '\'') { sscanf(work + offset + 1, "%[^']", parsed); } else { sscanf(work + offset, "%s", parsed); } if (strncasecmp(parsed, system_ctype, strlen(parsed)) == 0) { state = INSIDE_STATE; } // else ignore this start/end block continue; } if (strncasecmp(parsed, "default", 7) == 0) { if (work[offset] == '\0') { table->default_translation = NULL; continue; } if (work[offset] == '"') { sscanf(work + offset + 1, "%[^\"]", parsed); } else if (work[offset] == '\'') { sscanf(work + offset + 1, "%[^']", parsed); } else { sscanf(work + offset, "%s", parsed); } table->default_translation = wrapped_strdup(parsed); continue; } continue; } /* * Inside state */ code = -1; ret = sscanf(work, "%i %n", &code, &offset); if (ret == 0 || code < 0 || offset < 0) { /* * Check for end */ ret = sscanf(work, " %s %n", parsed, &offset); if (ret > 0 && strncasecmp(parsed, "end", 5) == 0) { state = BASE_STATE; } continue; } if (work[offset] == '\0') { continue; } if (work[offset] == '"') { sscanf(work + offset + 1, "%[^\"]", parsed); } else if (work[offset] == '\'') { sscanf(work + offset + 1, "%[^']", parsed); } else { sscanf(work + offset, "%s", parsed); } ret = table_put(table, code, parsed); if (ret == -1) { fprintf(stderr, "Failed to add row 0x%04x \"%s\" to translation table\n", code, parsed); table_free(table); fclose(table_file); free(work); free(parsed); return NULL; } } free(work); free(parsed); fclose(table_file); return table; } detox-2.0.0/src/parse_table.h000066400000000000000000000006121460212773400160620ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. * */ #ifndef __PARSE_TABLE_H #define __PARSE_TABLE_H #include "detox_struct.h" extern table_t *parse_table(char *filename); #endif /* __PARSE_TABLE_H */ detox-2.0.0/src/sequence.c000066400000000000000000000050651460212773400154130ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "detox_struct.h" #include "filter.h" #include "sequence.h" #include "wrapped.h" /** * Chooses which sequence to use. * * @param sequences Sequences from the config file. * @param name The sequence name from the command line, if any. * * @return The chosen sequence. */ sequence_t *sequence_choose_default(sequence_t *sequences, const char *name) { sequence_t *which = NULL; sequence_t *work = sequences; while (work != NULL) { if (strcmp(work->name, (name == NULL) ? "default" : name) == 0) { which = work; break; } work = work->next; } /* * If no sequence was found, and the user didn't specify a sequence * to use, just use the first sequence. */ if (which == NULL && name == NULL) { if (sequences != NULL) { which = sequences; } } return which; } sequence_t *sequence_init(const char *name) { sequence_t *ret; ret = wrapped_malloc(sizeof(sequence_t)); memset(ret, 0, sizeof(sequence_t)); ret->name = wrapped_strdup(name); return ret; } /** * Reviews a sequence to confirm that it's valid. * * @param sequence_t *sequence * * @return void */ void sequence_review(sequence_t *sequence) { filter_t *filter = sequence->filters; table_t *table = NULL; while (filter != NULL) { table = filter_load_table(filter); if (table != NULL) { filter->table = table; } filter = filter->next; } } /** * Runs the filters associated with a sequence. * * @param sequence The sequence of filters to run. * @param filename The filename to run through the filters. * * @return The filtered filename, or NULL if a problem was encountered. */ char *sequence_run_filters(sequence_t *sequence, char *filename) { filter_t *filter; char *hold; char *work; if (sequence == NULL) { fprintf(stderr, "internal error\n"); exit(EXIT_FAILURE); } if (filename == NULL) { return NULL; } filter = sequence->filters; work = wrapped_strdup(filename); while (filter != NULL && work != NULL) { hold = filter_run(filter, work); free(work); work = hold; filter = filter->next; } return work; } detox-2.0.0/src/sequence.h000066400000000000000000000011061460212773400154100ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef SEQUENCE_H #define SEQUENCE_H #include "detox_struct.h" extern sequence_t *sequence_choose_default(sequence_t *sequences, const char *name); extern sequence_t *sequence_init(const char *name); extern void sequence_review(sequence_t *sequence); extern char *sequence_run_filters(sequence_t *sequence, char *in); #endif //SEQUENCE_H detox-2.0.0/src/table.c000066400000000000000000000104121460212773400146620ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include "table.h" #include "wrapped.h" /* * Internal function declarations */ static int table_hash(int table_length, unsigned int key); table_t *table_init(int max_rows) { table_t *ret; size_t row_length; if (max_rows <= 0) { max_rows = 500; } row_length = max_rows * sizeof(table_row_t); ret = wrapped_malloc(sizeof(table_t)); memset(ret, 0, sizeof(table_t)); ret->rows = wrapped_malloc(row_length); memset(ret->rows, 0, row_length); ret->length = max_rows; ret->use_hash = 1; return ret; } table_t *table_resize(table_t *table, int rows, int use_hash) { table_t *ret; int i; ret = table_init(rows); if (ret == NULL) { return table; } ret->use_hash = use_hash; if (table == NULL) { return ret; } if (table->default_translation != NULL) { ret->default_translation = wrapped_strdup(table->default_translation); } ret->overwrites = table->overwrites; for (i = 0; i < table->length; i++) { if (table->rows[i].key > 0 && table->rows[i].data != NULL) { table_put(ret, table->rows[i].key, table->rows[i].data); } } return ret; } void table_free(table_t *table) { int i; if (table == NULL || table->builtin == 1) { return; } for (i = 0; i < table->length; i++) { if (table->rows[i].key > 0 && table->rows[i].data != NULL) { free(table->rows[i].data); } } if (table->default_translation != NULL) { free(table->default_translation); } free(table->rows); free(table); } static int table_hash(int table_length, unsigned int key) { return key % table_length; } /** * Puts a row on the table. * * Key cannot be zero. * * @param table_t * table The table to use. * @param unsigned int key The key for the table. * @param char * data The data to store for the key. * * @return int The stored location, or -1 if an error occurred. */ int table_put(table_t *table, unsigned int key, char *data) { int offset; int seek; int i; if (table == NULL || key == 0) { return -1; } if (table->length == table->used) { return -1; } offset = -1; if (table->use_hash) { seek = table_hash(table->length, key); if (table->rows[seek].key == 0 || table->rows[seek].key == key) { offset = seek; } } if (offset == -1) { for (i = 0; i < table->length; i++) { if (table->rows[i].key == 0 || table->rows[i].key == key) { offset = i; break; } } if (offset == -1) { return -1; } } if (table->rows[offset].key == key) { table->overwrites++; } table->rows[offset].key = key; table->rows[offset].data = wrapped_strdup(data); table->used++; if (table->max_key < key) { table->max_key = key; } if (table->max_data_length < strlen(data)) { table->max_data_length = strlen(data); } return offset; } /** * Gets a row from the table. * * Key cannot be zero. * * @param table The table to use. * @param key The key for the table. * * @return The value from the table, or NULL if it could not be found. */ char *table_get(table_t *table, unsigned int key) { int offset; int seek; int i; if (table == NULL || key == 0) { return NULL; } offset = -1; if (table->use_hash) { seek = table_hash(table->length, key); if (table->rows[seek].key == key) { offset = seek; } } if (offset == -1) { table->seeks++; for (i = 0; i < table->length; i++) { if (table->rows[i].key == key) { offset = i; break; } } if (offset == -1) { table->misses++; return NULL; } } table->hits++; return table->rows[offset].data; } detox-2.0.0/src/table.h000066400000000000000000000012321460212773400146670ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __TABLE_H #define __TABLE_H #include "detox_struct.h" #define table_reset(t) t->hits = t->misses = t->seeks = 0; extern table_t *table_init(int max_rows); extern table_t *table_resize(table_t *table, int rows, int use_hash); extern void table_free(table_t *table); extern int table_put(table_t *table, unsigned int key, char *data); extern char *table_get(table_t *table, unsigned int key); #endif /* __TABLE_H */ detox-2.0.0/src/table_dump.c000066400000000000000000000024341460212773400157140ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ /** * Helper utility to dump a translation table. */ #include #include "table_dump.h" void table_dump(table_t *table, int verbose) { int i; for (i = 0; i < table->length; i++) { if (!verbose && table->rows[i].key == 0) { continue; } printf("index: %d, ", i); if (table->rows[i].key == 0) { printf("empty\n"); } else { printf("key: 0x%04x, data: %s\n", table->rows[i].key, table->rows[i].data); } } } void table_stats(table_t *table) { printf("used: %d\n", table->used); printf("available: %d\n", table->length); printf("used percent: %0.2f\n", table->used * 100.0 / table->length); printf("hits: %d\n", table->hits); printf("misses: %d\n", table->misses); printf("seeks: %d\n", table->seeks); printf("overwrites: %d\n", table->overwrites); printf("longest entry: %d\n", table->max_data_length); printf("default translation: %s\n", table->default_translation); printf("maximum key: 0x%04x\n", table->max_key); } detox-2.0.0/src/table_dump.h000066400000000000000000000006671460212773400157270ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __TABLE_DUMP_H #define __TABLE_DUMP_H #include "detox_struct.h" extern void table_dump(table_t *table, int verbose); extern void table_stats(table_t *table); #endif /* __TABLE_DUMP_H */ detox-2.0.0/src/wrapped.c000066400000000000000000000022641460212773400152430ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "config.h" #include #include #include #include #include "wrapped.h" #ifdef SUPPORT_COVERAGE int wrapped_malloc_failure = 0; int wrapped_strdup_failure = 0; #endif void *wrapped_malloc(size_t size) { void *ret; int err; ret = malloc(size); #ifdef SUPPORT_COVERAGE if (wrapped_malloc_failure != 0) { free(ret); ret = NULL; } #endif if (ret == NULL) { err = errno; fprintf(stderr, "detox: out of memory: %s\n", strerror(err)); exit(EXIT_FAILURE); } return ret; } char *wrapped_strdup(const char *s) { char *ret; int err; ret = strdup(s); #ifdef SUPPORT_COVERAGE if (wrapped_strdup_failure != 0) { free(ret); ret = NULL; } #endif if (ret == NULL) { err = errno; fprintf(stderr, "detox: out of memory: %s\n", strerror(err)); exit(EXIT_FAILURE); } return ret; } detox-2.0.0/src/wrapped.h000066400000000000000000000010031460212773400152360ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __WRAPPED_H #define __WRAPPED_H #include #ifdef SUPPORT_COVERAGE extern int wrapped_malloc_failure; extern int wrapped_strdup_failure; #endif extern void *wrapped_malloc(size_t size); extern char *wrapped_strdup(const char *s); #endif /* __WRAPPED_H */ detox-2.0.0/table/000077500000000000000000000000001460212773400137315ustar00rootroot00000000000000detox-2.0.0/table/c_escape.tbl000066400000000000000000000027001460212773400161750ustar00rootroot00000000000000# This file is part of the Detox package. # # Copyright (c) Doug Harple # # For the full copyright and license information, please view the LICENSE # file that was distributed with this source code. # # This file holds the escape sequences needed to escape all characters between # 0x01 and 0x7F. # start 0x01 \x01 # SOH (start of heading) 0x02 \x02 # STX (start of text) 0x03 \x03 # ETX (end of text) 0x04 \x04 # EOT (end of transmission) 0x05 \x05 # ENQ (enquiry) 0x06 \x06 # ACK (acknowledge) 0x07 \a # BEL '\a' (bell) 0x08 \b # BS '\b' (backspace) 0x09 \t # HT '\t' (horizontal tab) 0x0a \n # LF '\n' (new line) 0x0b \v # VT '\v' (vertical tab) 0x0c \f # FF '\f' (form feed) 0x0d \r # CR '\r' (carriage ret) 0x0e \x0e # SO (shift out) 0x0f \x0f # SI (shift in) 0x10 \x10 # DLE (data link escape) 0x11 \x11 # DC1 (device control 1) 0x12 \x12 # DC2 (device control 2) 0x13 \x13 # DC3 (device control 3) 0x14 \x14 # DC4 (device control 4) 0x15 \x15 # NAK (negative ack.) 0x16 \x16 # SYN (synchronous idle) 0x17 \x17 # ETB (end of trans. blk) 0x18 \x18 # CAN (cancel) 0x19 \x19 # EM (end of medium) 0x1a \x1a # SUB (substitute) 0x1b \x1b # ESC (escape) 0x1c \x1c # FS (file separator) 0x1d \x1d # GS (group separator) 0x1e \x1e # RS (record separator) 0x1f \x1f # US (unit separator) 0x22 '\"' # " 0x27 "\'" # ' 0x3F \? # ? 0x5c \\ # \ 0x7f \x7f # DEL end detox-2.0.0/table/cp1252.tbl000066400000000000000000000027761460212773400153640ustar00rootroot00000000000000# This file is part of the Detox package. # # Copyright (c) Doug Harple # # For the full copyright and license information, please view the LICENSE # file that was distributed with this source code. # # This table contains some rules for transliterating Window CP-1252 characters # into ASCII. # # Thanks to: # - https://en.wikipedia.org/wiki/Windows-1252 # # # "default" is commented out because this is not intended to be run in # isolation. It should be a prefilter in front of the ISO 8859-1 filter, or, # in very rare cases, the UTF-8 one. # # default start # # CP-1252 Translation - 0x0080-0x009F # 0x0080 EUR # EURO SIGN 0x0082 "'" # SINGLE LOW-9 QUOTATION MARK 0x0083 f # LATIN SMALL LETTER F WITH HOOK 0x0084 '"' # DOUBLE LOW-9 QUOTATION MARK 0x0085 ... # HORIZONTAL ELLIPSIS 0x0086 + # DAGGER 0x0087 ++ # DOUBLE DAGGER 0x0088 ^ # MODIFIER LETTER CIRCUMFLEX ACCENT 0x0089 % # PER MILLE SIGN 0x008A S # LATIN CAPITAL LETTER S WITH CARON 0x008C OE # LATIN CAPITAL LIGATURE OE 0x008E Z # LATIN CAPITAL LETTER Z WITH CARON 0x0091 "'" # LEFT SINGLE QUOTATION MARK 0x0092 "'" # RIGHT SINGLE QUOTATION MARK 0x0093 '"' # LEFT DOUBLE QUOTATION MARK 0x0094 '"' # RIGHT DOUBLE QUOTATION MARK 0x0095 * # BULLET 0x0096 - # EN DASH 0x0097 - # EM DASH 0x0098 ~ # SMALL TILDE 0x0099 _tm_ # TRADE MARK SIGN 0x009A s # LATIN SMALL LETTER S WITH CARON 0x009C oe # LATIN SMALL LIGATURE OE 0x009E z # LATIN SMALL LETTER Z WITH CARON 0x009F Y # LATIN CAPITAL LETTER Y WITH DIAERESIS end detox-2.0.0/table/iso8859_1.tbl000066400000000000000000000110601460212773400160020ustar00rootroot00000000000000# This file is part of the Detox package. # # Copyright (c) Doug Harple # # For the full copyright and license information, please view the LICENSE # file that was distributed with this source code. # # Special thanks to: http://www.bbsinc.com/iso8859.html # # # This file transliterates upper 8-bit characters encoded in ISO 8859-1 into # ASCII. It's meant to be used with the iso8859_1 filter, which selects for # characters between 0x80 and 0xFF. # default _ start # # Latin 1 Supplemental - 0x0080-0x00FF # # https://unicode.org/charts/PDF/U0080.pdf # 0x00A0 " " # NO-BREAK SPACE 0x00A1 ! # INVERTED EXCLAMATION MARK 0x00A2 _cent_ # CENT SIGN 0x00A3 _pound_ # POUND SIGN 0x00A4 $ # CURRENCY SIGN 0x00A5 _yen_ # YEN SIGN 0x00A7 _ss_ # SECTION SIGN 0x00A8 " " # DIAERESIS 0x00A9 _copy_ # COPYRIGHT SIGN 0x00AA _a_ # FEMININE ORDINAL INDICATOR 0x00AB '"' # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00AD - # SOFT HYPHEN 0x00AE _reg_ # REGISTERED SIGN 0x00B0 _deg_ # DEGREE SIGN 0x00B2 ^2 # SUPERSCRIPT TWO 0x00B3 ^3 # SUPERSCRIPT THREE 0x00B4 "'" # ACUTE ACCENT 0x00B5 u # MICRO SIGN 0x00B6 _pp_ # PILCROW SIGN 0x00B7 * # MIDDLE DOT 0x00B8 " " # CEDILLA 0x00B9 ^1 # SUPERSCRIPT ONE 0x00BA _o_ # MASCULINE ORDINAL INDICATOR 0x00BB '"' # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00BF ? # INVERTED QUESTION MARK 0x00C0 A # LATIN CAPITAL LETTER A WITH GRAVE 0x00C1 A # LATIN CAPITAL LETTER A WITH ACUTE 0x00C2 A # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00C3 A # LATIN CAPITAL LETTER A WITH TILDE 0x00C4 A # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00C5 A # LATIN CAPITAL LETTER A WITH RING ABOVE 0x00C6 AE # LATIN CAPITAL LETTER AE 0x00C7 C # LATIN CAPITAL LETTER C WITH CEDILLA 0x00C8 E # LATIN CAPITAL LETTER E WITH GRAVE 0x00C9 E # LATIN CAPITAL LETTER E WITH ACUTE 0x00CA E # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00CB E # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00CC I # LATIN CAPITAL LETTER I WITH GRAVE 0x00CD I # LATIN CAPITAL LETTER I WITH ACUTE 0x00CE I # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00CF I # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00D0 TH # LATIN CAPITAL LETTER ETH (Icelandic) 0x00D1 N # LATIN CAPITAL LETTER N WITH TILDE 0x00D2 O # LATIN CAPITAL LETTER O WITH GRAVE 0x00D3 O # LATIN CAPITAL LETTER O WITH ACUTE 0x00D4 O # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00D5 O # LATIN CAPITAL LETTER O WITH TILDE 0x00D6 O # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00D7 x # MULTIPLICATION SIGN 0x00D8 O # LATIN CAPITAL LETTER O WITH STROKE 0x00D9 U # LATIN CAPITAL LETTER U WITH GRAVE 0x00DA U # LATIN CAPITAL LETTER U WITH ACUTE 0x00DB U # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00DC U # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00DD Y # LATIN CAPITAL LETTER Y WITH ACUTE 0x00DE TH # LATIN CAPITAL LETTER THORN (Icelandic) 0x00DF ss # LATIN SMALL LETTER SHARP S (German) 0x00E0 a # LATIN SMALL LETTER A WITH GRAVE 0x00E1 a # LATIN SMALL LETTER A WITH ACUTE 0x00E2 a # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00E3 a # LATIN SMALL LETTER A WITH TILDE 0x00E4 a # LATIN SMALL LETTER A WITH DIAERESIS 0x00E5 a # LATIN SMALL LETTER A WITH RING ABOVE 0x00E6 ae # LATIN SMALL LETTER AE 0x00E7 c # LATIN SMALL LETTER C WITH CEDILLA 0x00E8 e # LATIN SMALL LETTER E WITH GRAVE 0x00E9 e # LATIN SMALL LETTER E WITH ACUTE 0x00EA e # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00EB e # LATIN SMALL LETTER E WITH DIAERESIS 0x00EC i # LATIN SMALL LETTER I WITH GRAVE 0x00ED i # LATIN SMALL LETTER I WITH ACUTE 0x00EE i # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00EF i # LATIN SMALL LETTER I WITH DIAERESIS 0x00F0 th # LATIN SMALL LETTER ETH (Icelandic) 0x00F1 n # LATIN SMALL LETTER N WITH TILDE 0x00F2 o # LATIN SMALL LETTER O WITH GRAVE 0x00F3 o # LATIN SMALL LETTER O WITH ACUTE 0x00F4 o # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00F5 o # LATIN SMALL LETTER O WITH TILDE 0x00F6 o # LATIN SMALL LETTER O WITH DIAERESIS 0x00F8 o # LATIN SMALL LETTER O WITH STROKE 0x00F9 u # LATIN SMALL LETTER U WITH GRAVE 0x00FA u # LATIN SMALL LETTER U WITH ACUTE 0x00FB u # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00FC u # LATIN SMALL LETTER U WITH DIAERESIS 0x00FD y # LATIN SMALL LETTER Y WITH ACUTE 0x00FE th # LATIN SMALL LETTER THORN (Icelandic) 0x00FF y # LATIN SMALL LETTER Y WITH DIAERESIS end # # German specific conversions # start de 0x00C4 AE # LATIN CAPITAL LETTER A WITH UMLAUT 0x00D6 OE # LATIN CAPITAL LETTER O WITH UMLAUT 0x00DC UE # LATIN CAPITAL LETTER U WITH UMLAUT 0x00E4 ae # LATIN SMALL LETTER A WITH UMLAUT 0x00F6 oe # LATIN SMALL LETTER O WITH UMLAUT 0x00FC ue # LATIN SMALL LETTER U WITH UMLAUT end detox-2.0.0/table/safe.tbl000066400000000000000000000036221460212773400153550ustar00rootroot00000000000000# This file is part of the Detox package. # # Copyright (c) Doug Harple # # For the full copyright and license information, please view the LICENSE # file that was distributed with this source code. # # This file replaces 7-bit characters that are special to shells with "_" or # "-". The exception is ampersand, which gets translated to "_and_". # # The absence of a "default" option means that if a character is not specified # here, it will not be altered. # start # # Control characters to translate to _ # # Descriptions pulled from ascii(7) # 0x01 _ # SOH (start of heading) 0x02 _ # STX (start of text) 0x03 _ # ETX (end of text) 0x04 _ # EOT (end of transmission) 0x05 _ # ENQ (enquiry) 0x06 _ # ACK (acknowledge) 0x07 _ # BEL '\a' (bell) 0x08 _ # BS '\b' (backspace) 0x09 _ # HT '\t' (horizontal tab) 0x0a _ # LF '\n' (new line) 0x0b _ # VT '\v' (vertical tab) 0x0c _ # FF '\f' (form feed) 0x0d _ # CR '\r' (carriage ret) 0x0e _ # SO (shift out) 0x0f _ # SI (shift in) 0x10 _ # DLE (data link escape) 0x11 _ # DC1 (device control 1) 0x12 _ # DC2 (device control 2) 0x13 _ # DC3 (device control 3) 0x14 _ # DC4 (device control 4) 0x15 _ # NAK (negative ack.) 0x16 _ # SYN (synchronous idle) 0x17 _ # ETB (end of trans. blk) 0x18 _ # CAN (cancel) 0x19 _ # EM (end of medium) 0x1a _ # SUB (substitute) 0x1b _ # ESC (escape) 0x1c _ # FS (file separator) 0x1d _ # GS (group separator) 0x1e _ # RS (record separator) 0x1f _ # US (unit separator) 0x7f _ # DEL # # Chars to translate to _ # 0x20 _ # space 0x21 _ # ! 0x22 _ # " 0x24 _ # $ 0x27 _ # ' 0x2a _ # * 0x2f _ # / 0x3a _ # : 0x3b _ # ; 0x3c _ # < 0x3e _ # > 0x3f _ # ? 0x40 _ # @ 0x5c _ # \ 0x60 _ # ` 0x7c _ # | # # Chars to translate to - # 0x28 - # ( 0x29 - # ) 0x5b - # [ 0x5d - # ] 0x7b - # { 0x7d - # } # # Other # 0x26 _and_ # & end detox-2.0.0/table/unicode.tbl000066400000000000000000000660211460212773400160670ustar00rootroot00000000000000# This file is part of the Detox package. # # Copyright (c) Doug Harple # # For the full copyright and license information, please view the LICENSE # file that was distributed with this source code. # # Special thanks to: # - https://www.unicode.org/Public/5.2.0/ucd/UnicodeData.txt # - https://metacpan.org/pod/Text::Unidecode # - https://github.com/Behat/Transliterator # - https://www.unicode.org/charts/ # # Used for translating Icelandic: # - http://www.bartleby.com/65/th/Thingval.html # - http://www.wordiq.com/definition/Thorn_%28letter%29 # - http://www.wordiq.com/definition/Edh # # Greenlandic: # - http://std.dkuug.dk/cultreg/registrations/narrative/da_DK,_4.3.html # - https://en.wikipedia.org/wiki/Kra_(letter) # # Sami: # - http://www.wordiq.com/definition/Sami_languages # # Germanic: # - http://www.wordiq.com/definition/%DF # # Currency: # - # https://www.xe.com/symbols.php # default _ start # # C0 Controls and Basic Latin - 0x0000–0x007F # # https://www.unicode.org/charts/PDF/U0000.pdf # 0x0020 " " # SPACE 0x0021 ! # EXCLAMATION MARK 0x0022 '"' # QUOTATION MARK 0x0023 # # NUMBER SIGN 0x0024 $ # DOLLAR SIGN 0x0025 % # PERCENT SIGN 0x0026 & # AMPERSAND 0x0027 "'" # APOSTROPHE 0x0028 ( # LEFT PARENTHESIS 0x0029 ) # RIGHT PARENTHESIS 0x002A * # ASTERISK 0x002B + # PLUS SIGN 0x002C , # COMMA 0x002D - # HYPHEN-MINUS 0x002E . # FULL STOP 0x002F / # SOLIDUS 0x0030 0 # DIGIT ZERO 0x0031 1 # DIGIT ONE 0x0032 2 # DIGIT TWO 0x0033 3 # DIGIT THREE 0x0034 4 # DIGIT FOUR 0x0035 5 # DIGIT FIVE 0x0036 6 # DIGIT SIX 0x0037 7 # DIGIT SEVEN 0x0038 8 # DIGIT EIGHT 0x0039 9 # DIGIT NINE 0x003A : # COLON 0x003B ; # SEMICOLON 0x003C < # LESS-THAN SIGN 0x003D = # EQUALS SIGN 0x003E > # GREATER-THAN SIGN 0x003F ? # QUESTION MARK 0x0040 @ # COMMERCIAL AT 0x0041 A # LATIN CAPITAL LETTER A 0x0042 B # LATIN CAPITAL LETTER B 0x0043 C # LATIN CAPITAL LETTER C 0x0044 D # LATIN CAPITAL LETTER D 0x0045 E # LATIN CAPITAL LETTER E 0x0046 F # LATIN CAPITAL LETTER F 0x0047 G # LATIN CAPITAL LETTER G 0x0048 H # LATIN CAPITAL LETTER H 0x0049 I # LATIN CAPITAL LETTER I 0x004A J # LATIN CAPITAL LETTER J 0x004B K # LATIN CAPITAL LETTER K 0x004C L # LATIN CAPITAL LETTER L 0x004D M # LATIN CAPITAL LETTER M 0x004E N # LATIN CAPITAL LETTER N 0x004F O # LATIN CAPITAL LETTER O 0x0050 P # LATIN CAPITAL LETTER P 0x0051 Q # LATIN CAPITAL LETTER Q 0x0052 R # LATIN CAPITAL LETTER R 0x0053 S # LATIN CAPITAL LETTER S 0x0054 T # LATIN CAPITAL LETTER T 0x0055 U # LATIN CAPITAL LETTER U 0x0056 V # LATIN CAPITAL LETTER V 0x0057 W # LATIN CAPITAL LETTER W 0x0058 X # LATIN CAPITAL LETTER X 0x0059 Y # LATIN CAPITAL LETTER Y 0x005A Z # LATIN CAPITAL LETTER Z 0x005B [ # LEFT SQUARE BRACKET 0x005C \ # REVERSE SOLIDUS 0x005D ] # RIGHT SQUARE BRACKET 0x005E ^ # CIRCUMFLEX ACCENT 0x005F _ # LOW LINE 0x0060 ` # GRAVE ACCENT 0x0061 a # LATIN SMALL LETTER A 0x0062 b # LATIN SMALL LETTER B 0x0063 c # LATIN SMALL LETTER C 0x0064 d # LATIN SMALL LETTER D 0x0065 e # LATIN SMALL LETTER E 0x0066 f # LATIN SMALL LETTER F 0x0067 g # LATIN SMALL LETTER G 0x0068 h # LATIN SMALL LETTER H 0x0069 i # LATIN SMALL LETTER I 0x006A j # LATIN SMALL LETTER J 0x006B k # LATIN SMALL LETTER K 0x006C l # LATIN SMALL LETTER L 0x006D m # LATIN SMALL LETTER M 0x006E n # LATIN SMALL LETTER N 0x006F o # LATIN SMALL LETTER O 0x0070 p # LATIN SMALL LETTER P 0x0071 q # LATIN SMALL LETTER Q 0x0072 r # LATIN SMALL LETTER R 0x0073 s # LATIN SMALL LETTER S 0x0074 t # LATIN SMALL LETTER T 0x0075 u # LATIN SMALL LETTER U 0x0076 v # LATIN SMALL LETTER V 0x0077 w # LATIN SMALL LETTER W 0x0078 x # LATIN SMALL LETTER X 0x0079 y # LATIN SMALL LETTER Y 0x007A z # LATIN SMALL LETTER Z 0x007B { # LEFT CURLY BRACKET 0x007C | # VERTICAL LINE 0x007D } # RIGHT CURLY BRACKET 0x007E ~ # TILDE # # Latin 1 Supplemental - 0x0080-0x00FF # # https://unicode.org/charts/PDF/U0080.pdf # 0x00A0 " " # NO-BREAK SPACE 0x00A1 ! # INVERTED EXCLAMATION MARK 0x00A2 _cent_ # CENT SIGN 0x00A3 _pound_ # POUND SIGN 0x00A4 $ # CURRENCY SIGN 0x00A5 _yen_ # YEN SIGN 0x00A7 _ss_ # SECTION SIGN 0x00A8 " " # DIAERESIS 0x00A9 _copy_ # COPYRIGHT SIGN 0x00AA _a_ # FEMININE ORDINAL INDICATOR 0x00AB '"' # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00AD - # SOFT HYPHEN 0x00AE _reg_ # REGISTERED SIGN 0x00B0 _deg_ # DEGREE SIGN 0x00B2 ^2 # SUPERSCRIPT TWO 0x00B3 ^3 # SUPERSCRIPT THREE 0x00B4 "'" # ACUTE ACCENT 0x00B5 u # MICRO SIGN 0x00B6 _pp_ # PILCROW SIGN 0x00B7 * # MIDDLE DOT 0x00B8 " " # CEDILLA 0x00B9 ^1 # SUPERSCRIPT ONE 0x00BA _o_ # MASCULINE ORDINAL INDICATOR 0x00BB '"' # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00BF ? # INVERTED QUESTION MARK 0x00C0 A # LATIN CAPITAL LETTER A WITH GRAVE 0x00C1 A # LATIN CAPITAL LETTER A WITH ACUTE 0x00C2 A # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00C3 A # LATIN CAPITAL LETTER A WITH TILDE 0x00C4 A # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00C5 A # LATIN CAPITAL LETTER A WITH RING ABOVE 0x00C6 AE # LATIN CAPITAL LETTER AE 0x00C7 C # LATIN CAPITAL LETTER C WITH CEDILLA 0x00C8 E # LATIN CAPITAL LETTER E WITH GRAVE 0x00C9 E # LATIN CAPITAL LETTER E WITH ACUTE 0x00CA E # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00CB E # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00CC I # LATIN CAPITAL LETTER I WITH GRAVE 0x00CD I # LATIN CAPITAL LETTER I WITH ACUTE 0x00CE I # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00CF I # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00D0 TH # LATIN CAPITAL LETTER ETH (Icelandic) 0x00D1 N # LATIN CAPITAL LETTER N WITH TILDE 0x00D2 O # LATIN CAPITAL LETTER O WITH GRAVE 0x00D3 O # LATIN CAPITAL LETTER O WITH ACUTE 0x00D4 O # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00D5 O # LATIN CAPITAL LETTER O WITH TILDE 0x00D6 O # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00D7 x # MULTIPLICATION SIGN 0x00D8 O # LATIN CAPITAL LETTER O WITH STROKE 0x00D9 U # LATIN CAPITAL LETTER U WITH GRAVE 0x00DA U # LATIN CAPITAL LETTER U WITH ACUTE 0x00DB U # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00DC U # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00DD Y # LATIN CAPITAL LETTER Y WITH ACUTE 0x00DE TH # LATIN CAPITAL LETTER THORN (Icelandic) 0x00DF ss # LATIN SMALL LETTER SHARP S (German) 0x00E0 a # LATIN SMALL LETTER A WITH GRAVE 0x00E1 a # LATIN SMALL LETTER A WITH ACUTE 0x00E2 a # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00E3 a # LATIN SMALL LETTER A WITH TILDE 0x00E4 a # LATIN SMALL LETTER A WITH DIAERESIS 0x00E5 a # LATIN SMALL LETTER A WITH RING ABOVE 0x00E6 ae # LATIN SMALL LETTER AE 0x00E7 c # LATIN SMALL LETTER C WITH CEDILLA 0x00E8 e # LATIN SMALL LETTER E WITH GRAVE 0x00E9 e # LATIN SMALL LETTER E WITH ACUTE 0x00EA e # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00EB e # LATIN SMALL LETTER E WITH DIAERESIS 0x00EC i # LATIN SMALL LETTER I WITH GRAVE 0x00ED i # LATIN SMALL LETTER I WITH ACUTE 0x00EE i # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00EF i # LATIN SMALL LETTER I WITH DIAERESIS 0x00F0 th # LATIN SMALL LETTER ETH (Icelandic) 0x00F1 n # LATIN SMALL LETTER N WITH TILDE 0x00F2 o # LATIN SMALL LETTER O WITH GRAVE 0x00F3 o # LATIN SMALL LETTER O WITH ACUTE 0x00F4 o # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00F5 o # LATIN SMALL LETTER O WITH TILDE 0x00F6 o # LATIN SMALL LETTER O WITH DIAERESIS 0x00F8 o # LATIN SMALL LETTER O WITH STROKE 0x00F9 u # LATIN SMALL LETTER U WITH GRAVE 0x00FA u # LATIN SMALL LETTER U WITH ACUTE 0x00FB u # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00FC u # LATIN SMALL LETTER U WITH DIAERESIS 0x00FD y # LATIN SMALL LETTER Y WITH ACUTE 0x00FE th # LATIN SMALL LETTER THORN (Icelandic) 0x00FF y # LATIN SMALL LETTER Y WITH DIAERESIS # # Latin Extended A - 0x0100-0x17F # # https://www.unicode.org/charts/PDF/U0100.pdf # 0x0100 A # LATIN CAPITAL LETTER A WITH MACRON 0x0101 a # LATIN SMALL LETTER A WITH MACRON 0x0102 A # LATIN CAPITAL LETTER A WITH BREVE 0x0103 a # LATIN SMALL LETTER A WITH BREVE 0x0104 A # LATIN CAPITAL LETTER A WITH OGONEK 0x0105 a # LATIN SMALL LETTER A WITH OGONEK 0x0106 C # LATIN CAPITAL LETTER C WITH ACUTE 0x0107 c # LATIN SMALL LETTER C WITH ACUTE 0x0108 C # LATIN CAPITAL LETTER C WITH CIRCUMFLEX 0x0109 c # LATIN SMALL LETTER C WITH CIRCUMFLEX 0x010A C # LATIN CAPITAL LETTER C WITH DOT ABOVE 0x010B c # LATIN SMALL LETTER C WITH DOT ABOVE 0x010C C # LATIN CAPITAL LETTER C WITH CARON 0x010D c # LATIN SMALL LETTER C WITH CARON 0x010E D # LATIN CAPITAL LETTER D WITH CARON 0x010F d # LATIN SMALL LETTER D WITH CARON 0x0110 D # LATIN CAPITAL LETTER D WITH STROKE 0x0111 d # LATIN SMALL LETTER D WITH STROKE 0x0112 E # LATIN CAPITAL LETTER E WITH MACRON 0x0113 e # LATIN SMALL LETTER E WITH MACRON 0x0114 E # LATIN CAPITAL LETTER E WITH BREVE 0x0115 e # LATIN SMALL LETTER E WITH BREVE 0x0116 E # LATIN CAPITAL LETTER E WITH DOT ABOVE 0x0117 e # LATIN SMALL LETTER E WITH DOT ABOVE 0x0118 E # LATIN CAPITAL LETTER E WITH OGONEK 0x0119 e # LATIN SMALL LETTER E WITH OGONEK 0x011A E # LATIN CAPITAL LETTER E WITH CARON 0x011B e # LATIN SMALL LETTER E WITH CARON 0x011C G # LATIN CAPITAL LETTER G WITH CIRCUMFLEX 0x011D g # LATIN SMALL LETTER G WITH CIRCUMFLEX 0x011E G # LATIN CAPITAL LETTER G WITH BREVE 0x011F g # LATIN SMALL LETTER G WITH BREVE 0x0120 G # LATIN CAPITAL LETTER G WITH DOT ABOVE 0x0121 g # LATIN SMALL LETTER G WITH DOT ABOVE 0x0122 G # LATIN CAPITAL LETTER G WITH CEDILLA 0x0123 g # LATIN SMALL LETTER G WITH CEDILLA 0x0124 H # LATIN CAPITAL LETTER H WITH CIRCUMFLEX 0x0125 h # LATIN SMALL LETTER H WITH CIRCUMFLEX 0x0126 H # LATIN CAPITAL LETTER H WITH STROKE 0x0127 h # LATIN SMALL LETTER H WITH STROKE 0x0128 I # LATIN CAPITAL LETTER I WITH TILDE 0x0129 i # LATIN SMALL LETTER I WITH TILDE 0x012A I # LATIN CAPITAL LETTER I WITH MACRON 0x012B i # LATIN SMALL LETTER I WITH MACRON 0x012C I # LATIN CAPITAL LETTER I WITH BREVE 0x012D i # LATIN SMALL LETTER I WITH BREVE 0x012E I # LATIN CAPITAL LETTER I WITH OGONEK 0x012F i # LATIN SMALL LETTER I WITH OGONEK 0x0130 I # LATIN CAPITAL LETTER I WITH DOT ABOVE 0x0131 i # LATIN SMALL LETTER DOTLESS I 0x0132 IJ # LATIN CAPITAL LIGATURE IJ 0x0133 ij # LATIN SMALL LIGATURE IJ 0x0134 J # LATIN CAPITAL LETTER J WITH CIRCUMFLEX 0x0135 j # LATIN SMALL LETTER J WITH CIRCUMFLEX 0x0136 K # LATIN CAPITAL LETTER K WITH CEDILLA 0x0137 k # LATIN SMALL LETTER K WITH CEDILLA 0x0138 q # LATIN SMALL LETTER KRA (Greenlandic) 0x0139 L # LATIN CAPITAL LETTER L WITH ACUTE 0x013A l # LATIN SMALL LETTER L WITH ACUTE 0x013B L # LATIN CAPITAL LETTER L WITH CEDILLA 0x013C l # LATIN SMALL LETTER L WITH CEDILLA 0x013D L # LATIN CAPITAL LETTER L WITH CARON 0x013E l # LATIN SMALL LETTER L WITH CARON 0x013F L # LATIN CAPITAL LETTER L WITH MIDDLE DOT 0x0140 l # LATIN SMALL LETTER L WITH MIDDLE DOT 0x0141 L # LATIN CAPITAL LETTER L WITH STROKE 0x0142 l # LATIN SMALL LETTER L WITH STROKE 0x0143 N # LATIN CAPITAL LETTER N WITH ACUTE 0x0144 n # LATIN SMALL LETTER N WITH ACUTE 0x0145 N # LATIN CAPITAL LETTER N WITH CEDILLA 0x0146 n # LATIN SMALL LETTER N WITH CEDILLA 0x0147 N # LATIN CAPITAL LETTER N WITH CARON 0x0148 n # LATIN SMALL LETTER N WITH CARON 0x0149 "'n" # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 0x014A NG # LATIN CAPITAL LETTER ENG (Sami) 0x014B ng # LATIN SMALL LETTER ENG (Sami) 0x014C O # LATIN CAPITAL LETTER O WITH MACRON 0x014D o # LATIN SMALL LETTER O WITH MACRON 0x014E O # LATIN CAPITAL LETTER O WITH BREVE 0x014F o # LATIN SMALL LETTER O WITH BREVE 0x0150 O # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 0x0151 o # LATIN SMALL LETTER O WITH DOUBLE ACUTE 0x0152 OE # LATIN CAPITAL LIGATURE OE 0x0153 oe # LATIN SMALL LIGATURE OE 0x0154 R # LATIN CAPITAL LETTER R WITH ACUTE 0x0155 r # LATIN SMALL LETTER R WITH ACUTE 0x0156 R # LATIN CAPITAL LETTER R WITH CEDILLA 0x0157 r # LATIN SMALL LETTER R WITH CEDILLA 0x0158 R # LATIN CAPITAL LETTER R WITH CARON 0x0159 r # LATIN SMALL LETTER R WITH CARON 0x015A S # LATIN CAPITAL LETTER S WITH ACUTE 0x015B s # LATIN SMALL LETTER S WITH ACUTE 0x015C S # LATIN CAPITAL LETTER S WITH CIRCUMFLEX 0x015D s # LATIN SMALL LETTER S WITH CIRCUMFLEX 0x015E S # LATIN CAPITAL LETTER S WITH CEDILLA 0x015F s # LATIN SMALL LETTER S WITH CEDILLA 0x0160 S # LATIN CAPITAL LETTER S WITH CARON 0x0161 s # LATIN SMALL LETTER S WITH CARON 0x0162 T # LATIN CAPITAL LETTER T WITH CEDILLA 0x0163 t # LATIN SMALL LETTER T WITH CEDILLA 0x0164 T # LATIN CAPITAL LETTER T WITH CARON 0x0165 t # LATIN SMALL LETTER T WITH CARON 0x0166 T # LATIN CAPITAL LETTER T WITH STROKE 0x0167 t # LATIN SMALL LETTER T WITH STROKE 0x0168 U # LATIN CAPITAL LETTER U WITH TILDE 0x0169 u # LATIN SMALL LETTER U WITH TILDE 0x016A U # LATIN CAPITAL LETTER U WITH MACRON 0x016B u # LATIN SMALL LETTER U WITH MACRON 0x016C U # LATIN CAPITAL LETTER U WITH BREVE 0x016D u # LATIN SMALL LETTER U WITH BREVE 0x016E U # LATIN CAPITAL LETTER U WITH RING ABOVE 0x016F u # LATIN SMALL LETTER U WITH RING ABOVE 0x0170 U # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 0x0171 u # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x0172 U # LATIN CAPITAL LETTER U WITH OGONEK 0x0173 u # LATIN SMALL LETTER U WITH OGONEK 0x0174 W # LATIN CAPITAL LETTER W WITH CIRCUMFLEX 0x0175 w # LATIN SMALL LETTER W WITH CIRCUMFLEX 0x0176 Y # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX 0x0177 y # LATIN SMALL LETTER Y WITH CIRCUMFLEX 0x0178 Y # LATIN CAPITAL LETTER Y WITH DIAERESIS 0x0179 Z # LATIN CAPITAL LETTER Z WITH ACUTE 0x017A z # LATIN SMALL LETTER Z WITH ACUTE 0x017B Z # LATIN CAPITAL LETTER Z WITH DOT ABOVE 0x017C z # LATIN SMALL LETTER Z WITH DOT ABOVE 0x017D Z # LATIN CAPITAL LETTER Z WITH CARON 0x017E z # LATIN SMALL LETTER Z WITH CARON 0x017F s # LATIN SMALL LETTER LONG S # # Latin Extended B - 0x0180-0x024F # # https://www.unicode.org/charts/PDF/U0180.pdf # 0x0180 b # LATIN SMALL LETTER B WITH STROKE 0x0181 B # LATIN CAPITAL LETTER B WITH HOOK 0x0182 B # LATIN CAPITAL LETTER B WITH TOPBAR 0x0183 b # LATIN SMALL LETTER B WITH TOPBAR 0x0184 B # LATIN CAPITAL LETTER TONE SIX 0x0185 b # LATIN SMALL LETTER TONE SIX 0x0186 O # LATIN CAPITAL LETTER OPEN O 0x0187 C # LATIN CAPITAL LETTER C WITH HOOK 0x0188 c # LATIN SMALL LETTER C WITH HOOK 0x0189 D # LATIN CAPITAL LETTER AFRICAN D 0x018A D # LATIN CAPITAL LETTER D WITH HOOK 0x018B D # LATIN CAPITAL LETTER D WITH TOPBAR 0x018C d # LATIN SMALL LETTER D WITH TOPBAR 0x018D z # LATIN SMALL LETTER TURNED DELTA 0x018E E # LATIN CAPITAL LETTER REVERSED E 0x018F E # LATIN CAPITAL LETTER SCHWA 0x0190 E # LATIN CAPITAL LETTER OPEN E 0x0191 F # LATIN CAPITAL LETTER F WITH HOOK 0x0192 f # LATIN SMALL LETTER F WITH HOOK 0x0193 G # LATIN CAPITAL LETTER G WITH HOOK 0x0194 Y # LATIN CAPITAL LETTER GAMMA 0x0195 hv # LATIN SMALL LETTER HV 0x0196 I # LATIN CAPITAL LETTER IOTA 0x0197 I # LATIN CAPITAL LETTER I WITH STROKE 0x0198 K # LATIN CAPITAL LETTER K WITH HOOK 0x0199 k # LATIN SMALL LETTER K WITH HOOK 0x019A l # LATIN SMALL LETTER L WITH BAR 0x019B l # LATIN SMALL LETTER LAMBDA WITH STROKE 0x019C w # LATIN CAPITAL LETTER TURNED M 0x019D N # LATIN CAPITAL LETTER N WITH LEFT HOOK 0x019E n # LATIN SMALL LETTER N WITH LONG RIGHT LEG 0x019F O # LATIN CAPITAL LETTER O WITH MIDDLE TILDE 0x01A0 O # LATIN CAPITAL LETTER O WITH HORN 0x01A1 o # LATIN SMALL LETTER O WITH HORN 0x01A2 OI # LATIN CAPITAL LETTER OI 0x01A3 oi # LATIN SMALL LETTER OI 0x01A4 P # LATIN CAPITAL LETTER P WITH HOOK 0x01A5 p # LATIN SMALL LETTER P WITH HOOK 0x01A6 YR # LATIN LETTER YR 0x01A7 S # LATIN CAPITAL LETTER TONE TWO 0x01A8 s # LATIN SMALL LETTER TONE TWO 0x01A9 SH # LATIN CAPITAL LETTER ESH 0x01AA sh # LATIN LETTER REVERSED ESH LOOP 0x01AB t # LATIN SMALL LETTER T WITH PALATAL HOOK 0x01AC T # LATIN CAPITAL LETTER T WITH HOOK 0x01AD t # LATIN SMALL LETTER T WITH HOOK 0x01AE T # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK 0x01AF U # LATIN CAPITAL LETTER U WITH HORN 0x01B0 u # LATIN SMALL LETTER U WITH HORN 0x01B1 Y # LATIN CAPITAL LETTER UPSILON 0x01B2 V # LATIN CAPITAL LETTER V WITH HOOK 0x01B3 Y # LATIN CAPITAL LETTER Y WITH HOOK 0x01B4 y # LATIN SMALL LETTER Y WITH HOOK 0x01B5 Z # LATIN CAPITAL LETTER Z WITH STROKE 0x01B6 z # LATIN SMALL LETTER Z WITH STROKE 0x01B7 ZH # LATIN CAPITAL LETTER EZH 0x01B8 ZH # LATIN CAPITAL LETTER EZH REVERSED 0x01B9 zh # LATIN SMALL LETTER EZH REVERSED 0x01BA zh # LATIN SMALL LETTER EZH WITH TAIL 0x01BB dz # LATIN LETTER TWO WITH STROKE 0x01BC 5 # LATIN CAPITAL LETTER TONE FIVE 0x01BD 5 # LATIN SMALL LETTER TONE FIVE 0x01BE ts # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE 0x01BF w # LATIN LETTER WYNN 0x01C4 DZ # LATIN CAPITAL LETTER DZ WITH CARON 0x01C5 Dz # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON 0x01C6 dz # LATIN SMALL LETTER DZ WITH CARON 0x01C7 LJ # LATIN CAPITAL LETTER LJ 0x01C8 Lj # LATIN CAPITAL LETTER L WITH SMALL LETTER J 0x01C9 lj # LATIN SMALL LETTER LJ 0x01CA NJ # LATIN CAPITAL LETTER NJ 0x01CB Nj # LATIN CAPITAL LETTER N WITH SMALL LETTER J 0x01CC nj # LATIN SMALL LETTER NJ 0x01CD A # LATIN CAPITAL LETTER A WITH CARON 0x01CE a # LATIN SMALL LETTER A WITH CARON 0x01CF I # LATIN CAPITAL LETTER I WITH CARON 0x01D0 i # LATIN SMALL LETTER I WITH CARON 0x01D1 O # LATIN CAPITAL LETTER O WITH CARON 0x01D2 o # LATIN SMALL LETTER O WITH CARON 0x01D3 U # LATIN CAPITAL LETTER U WITH CARON 0x01D4 u # LATIN SMALL LETTER U WITH CARON 0x01D5 U # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON 0x01D6 u # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON 0x01D7 U # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE 0x01D8 u # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE 0x01D9 U # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON 0x01DA u # LATIN SMALL LETTER U WITH DIAERESIS AND CARON 0x01DB U # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE 0x01DC u # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE 0x01DD e # LATIN SMALL LETTER TURNED E 0x01DE A # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON 0x01DF a # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON 0x01E0 A # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON 0x01E1 a # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON 0x01E2 AE # LATIN CAPITAL LETTER AE WITH MACRON 0x01E3 ae # LATIN SMALL LETTER AE WITH MACRON 0x01E4 G # LATIN CAPITAL LETTER G WITH STROKE 0x01E5 g # LATIN SMALL LETTER G WITH STROKE 0x01E6 G # LATIN CAPITAL LETTER G WITH CARON 0x01E7 g # LATIN SMALL LETTER G WITH CARON 0x01E8 K # LATIN CAPITAL LETTER K WITH CARON 0x01E9 k # LATIN SMALL LETTER K WITH CARON 0x01EA O # LATIN CAPITAL LETTER O WITH OGONEK 0x01EB o # LATIN SMALL LETTER O WITH OGONEK 0x01EC O # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON 0x01ED o # LATIN SMALL LETTER O WITH OGONEK AND MACRON 0x01EE ZH # LATIN CAPITAL LETTER EZH WITH CARON 0x01EF zh # LATIN SMALL LETTER EZH WITH CARON 0x01F0 j # LATIN SMALL LETTER J WITH CARON 0x01F1 DZ # LATIN CAPITAL LETTER DZ 0x01F2 Dz # LATIN CAPITAL LETTER D WITH SMALL LETTER Z 0x01F3 dz # LATIN SMALL LETTER DZ 0x01F4 G # LATIN CAPITAL LETTER G WITH ACUTE 0x01F5 g # LATIN SMALL LETTER G WITH ACUTE 0x01F6 HU # LATIN CAPITAL LETTER HWAIR 0x01F7 W # LATIN CAPITAL LETTER WYNN 0x01F8 N # LATIN CAPITAL LETTER N WITH GRAVE 0x01F9 n # LATIN SMALL LETTER N WITH GRAVE 0x01FA A # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE 0x01FB a # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE 0x01FC AE # LATIN CAPITAL LETTER AE WITH ACUTE 0x01FD ae # LATIN SMALL LETTER AE WITH ACUTE 0x01FE O # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE 0x01FF o # LATIN SMALL LETTER O WITH STROKE AND ACUTE 0x0200 A # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE 0x0201 a # LATIN SMALL LETTER A WITH DOUBLE GRAVE 0x0202 A # LATIN CAPITAL LETTER A WITH INVERTED BREVE 0x0203 a # LATIN SMALL LETTER A WITH INVERTED BREVE 0x0204 E # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE 0x0205 e # LATIN SMALL LETTER E WITH DOUBLE GRAVE 0x0206 E # LATIN CAPITAL LETTER E WITH INVERTED BREVE 0x0207 e # LATIN SMALL LETTER E WITH INVERTED BREVE 0x0208 I # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE 0x0209 i # LATIN SMALL LETTER I WITH DOUBLE GRAVE 0x020A I # LATIN CAPITAL LETTER I WITH INVERTED BREVE 0x020B i # LATIN SMALL LETTER I WITH INVERTED BREVE 0x020C O # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE 0x020D o # LATIN SMALL LETTER O WITH DOUBLE GRAVE 0x020E O # LATIN CAPITAL LETTER O WITH INVERTED BREVE 0x020F o # LATIN SMALL LETTER O WITH INVERTED BREVE 0x0210 R # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE 0x0211 r # LATIN SMALL LETTER R WITH DOUBLE GRAVE 0x0212 R # LATIN CAPITAL LETTER R WITH INVERTED BREVE 0x0213 r # LATIN SMALL LETTER R WITH INVERTED BREVE 0x0214 U # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE 0x0215 u # LATIN SMALL LETTER U WITH DOUBLE GRAVE 0x0216 U # LATIN CAPITAL LETTER U WITH INVERTED BREVE 0x0217 u # LATIN SMALL LETTER U WITH INVERTED BREVE 0x0218 S # LATIN CAPITAL LETTER S WITH COMMA BELOW 0x0219 s # LATIN SMALL LETTER S WITH COMMA BELOW 0x021A T # LATIN CAPITAL LETTER T WITH COMMA BELOW 0x021B t # LATIN SMALL LETTER T WITH COMMA BELOW 0x021C Y # LATIN CAPITAL LETTER YOGH 0x021D y # LATIN SMALL LETTER YOGH 0x021E H # LATIN CAPITAL LETTER H WITH CARON 0x021F h # LATIN SMALL LETTER H WITH CARON 0x0220 N # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG 0x0221 d # LATIN SMALL LETTER D WITH CURL 0x0222 OU # LATIN CAPITAL LETTER OU 0x0223 ou # LATIN SMALL LETTER OU 0x0224 Z # LATIN CAPITAL LETTER Z WITH HOOK 0x0225 z # LATIN SMALL LETTER Z WITH HOOK 0x0226 A # LATIN CAPITAL LETTER A WITH DOT ABOVE 0x0227 a # LATIN SMALL LETTER A WITH DOT ABOVE 0x0228 E # LATIN CAPITAL LETTER E WITH CEDILLA 0x0229 e # LATIN SMALL LETTER E WITH CEDILLA 0x022A O # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON 0x022B o # LATIN SMALL LETTER O WITH DIAERESIS AND MACRON 0x022C O # LATIN CAPITAL LETTER O WITH TILDE AND MACRON 0x022D o # LATIN SMALL LETTER O WITH TILDE AND MACRON 0x022E O # LATIN CAPITAL LETTER O WITH DOT ABOVE 0x022F o # LATIN SMALL LETTER O WITH DOT ABOVE 0x0230 O # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON 0x0231 o # LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON 0x0232 Y # LATIN CAPITAL LETTER Y WITH MACRON 0x0233 y # LATIN SMALL LETTER Y WITH MACRON 0x0234 l # LATIN SMALL LETTER L WITH CURL 0x0235 n # LATIN SMALL LETTER N WITH CURL 0x0236 t # LATIN SMALL LETTER T WITH CURL 0x0237 j # LATIN SMALL LETTER DOTLESS J 0x0238 db # LATIN SMALL LETTER DB DIGRAPH 0x0239 qp # LATIN SMALL LETTER QP DIGRAPH 0x023A A # LATIN CAPITAL LETTER A WITH STROKE 0x023B C # LATIN CAPITAL LETTER C WITH STROKE 0x023C c # LATIN SMALL LETTER C WITH STROKE 0x023D L # LATIN CAPITAL LETTER L WITH BAR 0x023E T # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE 0x023F s # LATIN SMALL LETTER S WITH SWASH TAIL 0x0240 z # LATIN SMALL LETTER Z WITH SWASH TAIL 0x0243 B # LATIN CAPITAL LETTER B WITH STROKE 0x0244 U # LATIN CAPITAL LETTER U BAR 0x0245 ^ # LATIN CAPITAL LETTER TURNED V 0x0246 E # LATIN CAPITAL LETTER E WITH STROKE 0x0247 e # LATIN SMALL LETTER E WITH STROKE 0x0248 J # LATIN CAPITAL LETTER J WITH STROKE 0x0249 j # LATIN SMALL LETTER J WITH STROKE 0x024A q # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL 0x024B q # LATIN SMALL LETTER Q WITH HOOK TAIL 0x024C R # LATIN CAPITAL LETTER R WITH STROKE 0x024D r # LATIN SMALL LETTER R WITH STROKE 0x024E Y # LATIN CAPITAL LETTER Y WITH STROKE 0x024F y # LATIN SMALL LETTER Y WITH STROKE # # ... # 0x02C6 ^ # MODIFIER LETTER CIRCUMFLEX ACCENT 0x02DC ~ # SMALL TILDE # # Greek and Coptic - 0x0370–0x03FF # # https://unicode.org/charts/PDF/U0370.pdf # 0x03A1 P # GREEK CAPITAL LETTER RHO 0x03C1 p # GREEK SMALL LETTER RHO # # Tai Le - 0x1950–0x197F # # https://unicode.org/charts/PDF/U1950.pdf # 0x1952 n # TAI LE LETTER NG 0x1959 u # TAI LE LETTER PA 0x1963 l # TAI LE LETTER A 0x1971 e # TAI LE LETTER TONE-3 0x1974 c # TAI LE LETTER TONE-6 # # ... # 0x1E9E SS # LATIN CAPITAL LETTER SHARP S # # General Punctuation - 0x2000-0x206F # # https://unicode.org/charts/PDF/U2000.pdf # 0x2000 " " # EN QUAD 0x2001 " " # EM QUAD 0x2002 " " # EN SPACE 0x2003 " " # EM SPACE 0x2004 " " # THREE-PER-EM SPACE 0x2005 " " # FOUR-PER-EM SPACE 0x2006 " " # SIX-PER-EM SPACE 0x2007 " " # FIGURE SPACE 0x2008 " " # PUNCTUATION SPACE 0x2009 " " # THIN SPACE 0x200A " " # HAIR SPACE 0x200B "" # ZERO WIDTH SPACE 0x200C "" # ZERO WIDTH NON-JOINER 0x200D "" # ZERO NON-JOINER 0x200E "" # LEFT-TO-RIGHT MARK 0x200F "" # RIGHT-TO-LEFT MARK 0x2010 - # HYPHEN 0x2011 - # NON-BREAKING HYPHEN 0x2012 - # FIGURE DASH 0x2013 - # EN DASH 0x2014 - # EM DASH 0x2015 - # HORIZONTAL BAR 0x2017 _ # DOUBLE LOW LINE 0x2018 "'" # LEFT SINGLE QUOTATION MARK 0x2019 "'" # RIGHT SINGLE QUOTATION MARK 0x201A "'" # SINGLE LOW-9 QUOTATION MARK 0x201B "'" # SINGLE HIGH-REVERSED-9 QUOTATION MARK 0x201C '"' # LEFT DOUBLE QUOTATION MARK 0x201D '"' # RIGHT DOUBLE QUOTATION MARK 0x201E '"' # DOUBLE LOW-9 QUOTATION MARK 0x201F '"' # DOUBLE HIGH-REVERSED-9 QUOTATION MARK 0x2020 + # DAGGER 0x2021 ++ # DOUBLE DAGGER 0x2022 * # BULLET 0x2024 . # ONE DOT LEADER 0x2025 .. # TWO DOT LEADER 0x2026 ... # HORIZONTAL ELLIPSIS 0x2027 . # HYPHENATION POINT 0x202F " " # NARROW NO-BREAK SPACE 0x2030 % # PER MILLE SIGN 0x2031 % # PER TEN THOUSAND SIGN 0x2032 "'" # PRIME 0x2033 "''" # DOUBLE PRIME 0x2034 "'''" # TRIPLE PRIME 0x2035 ` # REVERSED PRIME 0x2036 `` # REVERSED DOUBLE PRIME 0x2037 ``` # REVERSED TRIPLE PRIME 0x2038 ^ # CARET 0x203B * # REFERENCE MARK 0x203C !! # DOUBLE EXCLAMATION MARK 0x203D ? # INTERROBANG 0x203E - # OVERLINE 0x203F _ # UNDERTIE 0x2040 - # CHARACTER TIE 0x2041 ^ # CARET INSERTION POINT 0x2042 *** # ASTERISM 0x2043 - # HYPHEN BULLET 0x2045 -[ # LEFT SQUARE BRACKET WITH QUILL 0x2046 ]- # RIGHT SQUARE BRACKET WITH QUILL 0x2047 ?? # DOUBLE QUESTION MARK 0x2048 ?! # QUESTION EXCLAMATION MARK 0x2049 !? # EXCLAMATION QUESTION MARK 0x204A & # TIRONIAN SIGN ET 0x204B _pp_ # REVERSED PILCROW SIGN 0x204E * # LOW ASTERISK 0x204F [) # REVERSED SEMICOLON 0x2051 ** # TWO ASTERISKS ALIGNED VERTICALLY 0x2052 % # COMMERCIAL MINUS SIGN 0x2053 ~ # SWING DASH 0x2054 _ # INVERTED UNDERTIE 0x2055 * # FLOWER PUNCTUATION MARK 0x2056 ... # THREE DOT PUNCTUATION 0x2057 "''''" # QUADRUPLE PRIME 0x2058 .... # FOUR DOT PUNCTUATION 0x2059 ..... # FIVE DOT PUNCTUATION 0x205A .. # TWO DOT PUNCTUATION 0x205B .... # FOUR DOT MARK 0x205D : # TRICOLON 0x205E : # VERTICAL FOUR DOTS 0x205F " " # MEDIUM MATHEMATICAL SPACE 0x2060 " " # WORD JOINER # # # Currency Symbols - 0x20A0–0x20CF # # https://www.unicode.org/charts/PDF/U20A0.pdf # 0x20A0 ECU # EURO-CURRENCY SIGN 0x20A1 CL # COLON SIGN 0x20A2 Cr # CRUZEIRO SIGN 0x20A3 FF # FRENCH FRANC SIGN 0x20A4 L # LIRA SIGN 0x20A5 mil # MILL SIGN 0x20A6 N # NAIRA SIGN 0x20A7 Pts # PESETA SIGN 0x20A8 Rs # RUPEE SIGN 0x20A9 W # WON SIGN 0x20AA NS # NEW SHEQEL SIGN 0x20AB D # DONG SIGN 0x20AC EUR # EURO SIGN 0x20AD K # KIP SIGN 0x20AE T # TUGRIK SIGN 0x20AF Dr # DRACHMA SIGN 0x20B1 $ # PESO SIGN 0x20B2 Gs # GUARANI SIGN 0x20B2 C # CEDI SIGN 0x20BB M # NORDIC MARK SIGN 0x20BF _btc_ # BITCOIN SIGN # # ... # 0x2122 _tm_ # TRADE MARK SIGN # # ... # 0x10348 hu # HWAIR # # h/t to homebrew # 0x1f37a _beer_ # in a tree... end # # German specific conversions # start de 0x00C4 Ae # LATIN CAPITAL LETTER A WITH UMLAUT 0x00D6 Oe # LATIN CAPITAL LETTER O WITH UMLAUT 0x00DC Ue # LATIN CAPITAL LETTER U WITH UMLAUT 0x00E4 ae # LATIN SMALL LETTER A WITH UMLAUT 0x00F6 oe # LATIN SMALL LETTER O WITH UMLAUT 0x00FC ue # LATIN SMALL LETTER U WITH UMLAUT end detox-2.0.0/table/unidecode.tbl000066400000000000000000011653701460212773400164100ustar00rootroot00000000000000# # I claim no copyright over this file; the contents are 100% derivative. # - Doug Harple # # This file is generated from the following sources: # - https://www.unicode.org/Public/5.2.0/ucd/UnicodeData.txt # - https://metacpan.org/pod/Text::Unidecode # - https://github.com/Behat/Transliterator # # The code to generate it lives here: # - https://github.com/dharple/detox-helper/test.php # # Behat\Transliterator\SyncTool::LIB_VERSION = '1.27' default _ start # # Characters 0x0000 to 0x00FF # 0x0020 " " # SPACE 0x0021 ! # EXCLAMATION MARK 0x0022 '"' # QUOTATION MARK 0x0023 # # NUMBER SIGN 0x0024 $ # DOLLAR SIGN 0x0025 % # PERCENT SIGN 0x0026 & # AMPERSAND 0x0027 "'" # APOSTROPHE 0x0028 ( # LEFT PARENTHESIS 0x0029 ) # RIGHT PARENTHESIS 0x002A * # ASTERISK 0x002B + # PLUS SIGN 0x002C , # COMMA 0x002D - # HYPHEN-MINUS 0x002E . # FULL STOP 0x002F / # SOLIDUS 0x0030 0 # DIGIT ZERO 0x0031 1 # DIGIT ONE 0x0032 2 # DIGIT TWO 0x0033 3 # DIGIT THREE 0x0034 4 # DIGIT FOUR 0x0035 5 # DIGIT FIVE 0x0036 6 # DIGIT SIX 0x0037 7 # DIGIT SEVEN 0x0038 8 # DIGIT EIGHT 0x0039 9 # DIGIT NINE 0x003A : # COLON 0x003B ; # SEMICOLON 0x003C < # LESS-THAN SIGN 0x003D = # EQUALS SIGN 0x003E > # GREATER-THAN SIGN 0x003F ? # QUESTION MARK 0x0040 @ # COMMERCIAL AT 0x0041 A # LATIN CAPITAL LETTER A 0x0042 B # LATIN CAPITAL LETTER B 0x0043 C # LATIN CAPITAL LETTER C 0x0044 D # LATIN CAPITAL LETTER D 0x0045 E # LATIN CAPITAL LETTER E 0x0046 F # LATIN CAPITAL LETTER F 0x0047 G # LATIN CAPITAL LETTER G 0x0048 H # LATIN CAPITAL LETTER H 0x0049 I # LATIN CAPITAL LETTER I 0x004A J # LATIN CAPITAL LETTER J 0x004B K # LATIN CAPITAL LETTER K 0x004C L # LATIN CAPITAL LETTER L 0x004D M # LATIN CAPITAL LETTER M 0x004E N # LATIN CAPITAL LETTER N 0x004F O # LATIN CAPITAL LETTER O 0x0050 P # LATIN CAPITAL LETTER P 0x0051 Q # LATIN CAPITAL LETTER Q 0x0052 R # LATIN CAPITAL LETTER R 0x0053 S # LATIN CAPITAL LETTER S 0x0054 T # LATIN CAPITAL LETTER T 0x0055 U # LATIN CAPITAL LETTER U 0x0056 V # LATIN CAPITAL LETTER V 0x0057 W # LATIN CAPITAL LETTER W 0x0058 X # LATIN CAPITAL LETTER X 0x0059 Y # LATIN CAPITAL LETTER Y 0x005A Z # LATIN CAPITAL LETTER Z 0x005B [ # LEFT SQUARE BRACKET 0x005C \ # REVERSE SOLIDUS 0x005D ] # RIGHT SQUARE BRACKET 0x005E ^ # CIRCUMFLEX ACCENT 0x005F _ # LOW LINE 0x0060 ` # GRAVE ACCENT 0x0061 a # LATIN SMALL LETTER A 0x0062 b # LATIN SMALL LETTER B 0x0063 c # LATIN SMALL LETTER C 0x0064 d # LATIN SMALL LETTER D 0x0065 e # LATIN SMALL LETTER E 0x0066 f # LATIN SMALL LETTER F 0x0067 g # LATIN SMALL LETTER G 0x0068 h # LATIN SMALL LETTER H 0x0069 i # LATIN SMALL LETTER I 0x006A j # LATIN SMALL LETTER J 0x006B k # LATIN SMALL LETTER K 0x006C l # LATIN SMALL LETTER L 0x006D m # LATIN SMALL LETTER M 0x006E n # LATIN SMALL LETTER N 0x006F o # LATIN SMALL LETTER O 0x0070 p # LATIN SMALL LETTER P 0x0071 q # LATIN SMALL LETTER Q 0x0072 r # LATIN SMALL LETTER R 0x0073 s # LATIN SMALL LETTER S 0x0074 t # LATIN SMALL LETTER T 0x0075 u # LATIN SMALL LETTER U 0x0076 v # LATIN SMALL LETTER V 0x0077 w # LATIN SMALL LETTER W 0x0078 x # LATIN SMALL LETTER X 0x0079 y # LATIN SMALL LETTER Y 0x007A z # LATIN SMALL LETTER Z 0x007B { # LEFT CURLY BRACKET 0x007C | # VERTICAL LINE 0x007D } # RIGHT CURLY BRACKET 0x007E ~ # TILDE 0x00A0 " " # NO-BREAK SPACE 0x00A1 ! # INVERTED EXCLAMATION MARK 0x00A2 C/ # CENT SIGN 0x00A3 PS # POUND SIGN 0x00A4 $? # CURRENCY SIGN 0x00A5 Y= # YEN SIGN 0x00A6 | # BROKEN BAR 0x00A7 SS # SECTION SIGN 0x00A8 '"' # DIAERESIS 0x00A9 (c) # COPYRIGHT SIGN 0x00AA a # FEMININE ORDINAL INDICATOR 0x00AB << # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00AC ! # NOT SIGN 0x00AE (r) # REGISTERED SIGN 0x00AF - # MACRON 0x00B0 deg # DEGREE SIGN 0x00B1 +- # PLUS-MINUS SIGN 0x00B2 2 # SUPERSCRIPT TWO 0x00B3 3 # SUPERSCRIPT THREE 0x00B4 "'" # ACUTE ACCENT 0x00B5 u # MICRO SIGN 0x00B6 P # PILCROW SIGN 0x00B7 * # MIDDLE DOT 0x00B8 , # CEDILLA 0x00B9 1 # SUPERSCRIPT ONE 0x00BA o # MASCULINE ORDINAL INDICATOR 0x00BB >> # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00BC 1/4 # VULGAR FRACTION ONE QUARTER 0x00BD 1/2 # VULGAR FRACTION ONE HALF 0x00BE 3/4 # VULGAR FRACTION THREE QUARTERS 0x00BF ? # INVERTED QUESTION MARK 0x00C0 A # LATIN CAPITAL LETTER A WITH GRAVE 0x00C1 A # LATIN CAPITAL LETTER A WITH ACUTE 0x00C2 A # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00C3 A # LATIN CAPITAL LETTER A WITH TILDE 0x00C4 A # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00C5 A # LATIN CAPITAL LETTER A WITH RING ABOVE 0x00C6 AE # LATIN CAPITAL LETTER AE 0x00C7 C # LATIN CAPITAL LETTER C WITH CEDILLA 0x00C8 E # LATIN CAPITAL LETTER E WITH GRAVE 0x00C9 E # LATIN CAPITAL LETTER E WITH ACUTE 0x00CA E # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00CB E # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00CC I # LATIN CAPITAL LETTER I WITH GRAVE 0x00CD I # LATIN CAPITAL LETTER I WITH ACUTE 0x00CE I # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00CF I # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00D0 D # LATIN CAPITAL LETTER ETH 0x00D1 N # LATIN CAPITAL LETTER N WITH TILDE 0x00D2 O # LATIN CAPITAL LETTER O WITH GRAVE 0x00D3 O # LATIN CAPITAL LETTER O WITH ACUTE 0x00D4 O # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00D5 O # LATIN CAPITAL LETTER O WITH TILDE 0x00D6 O # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00D7 x # MULTIPLICATION SIGN 0x00D8 O # LATIN CAPITAL LETTER O WITH STROKE 0x00D9 U # LATIN CAPITAL LETTER U WITH GRAVE 0x00DA U # LATIN CAPITAL LETTER U WITH ACUTE 0x00DB U # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00DC U # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00DD Y # LATIN CAPITAL LETTER Y WITH ACUTE 0x00DE Th # LATIN CAPITAL LETTER THORN 0x00DF ss # LATIN SMALL LETTER SHARP S 0x00E0 a # LATIN SMALL LETTER A WITH GRAVE 0x00E1 a # LATIN SMALL LETTER A WITH ACUTE 0x00E2 a # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00E3 a # LATIN SMALL LETTER A WITH TILDE 0x00E4 a # LATIN SMALL LETTER A WITH DIAERESIS 0x00E5 a # LATIN SMALL LETTER A WITH RING ABOVE 0x00E6 ae # LATIN SMALL LETTER AE 0x00E7 c # LATIN SMALL LETTER C WITH CEDILLA 0x00E8 e # LATIN SMALL LETTER E WITH GRAVE 0x00E9 e # LATIN SMALL LETTER E WITH ACUTE 0x00EA e # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00EB e # LATIN SMALL LETTER E WITH DIAERESIS 0x00EC i # LATIN SMALL LETTER I WITH GRAVE 0x00ED i # LATIN SMALL LETTER I WITH ACUTE 0x00EE i # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00EF i # LATIN SMALL LETTER I WITH DIAERESIS 0x00F0 d # LATIN SMALL LETTER ETH 0x00F1 n # LATIN SMALL LETTER N WITH TILDE 0x00F2 o # LATIN SMALL LETTER O WITH GRAVE 0x00F3 o # LATIN SMALL LETTER O WITH ACUTE 0x00F4 o # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00F5 o # LATIN SMALL LETTER O WITH TILDE 0x00F6 o # LATIN SMALL LETTER O WITH DIAERESIS 0x00F7 / # DIVISION SIGN 0x00F8 o # LATIN SMALL LETTER O WITH STROKE 0x00F9 u # LATIN SMALL LETTER U WITH GRAVE 0x00FA u # LATIN SMALL LETTER U WITH ACUTE 0x00FB u # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00FC u # LATIN SMALL LETTER U WITH DIAERESIS 0x00FD y # LATIN SMALL LETTER Y WITH ACUTE 0x00FE th # LATIN SMALL LETTER THORN 0x00FF y # LATIN SMALL LETTER Y WITH DIAERESIS # # Characters 0x0100 to 0x01FF # 0x0100 A # LATIN CAPITAL LETTER A WITH MACRON 0x0101 a # LATIN SMALL LETTER A WITH MACRON 0x0102 A # LATIN CAPITAL LETTER A WITH BREVE 0x0103 a # LATIN SMALL LETTER A WITH BREVE 0x0104 A # LATIN CAPITAL LETTER A WITH OGONEK 0x0105 a # LATIN SMALL LETTER A WITH OGONEK 0x0106 C # LATIN CAPITAL LETTER C WITH ACUTE 0x0107 c # LATIN SMALL LETTER C WITH ACUTE 0x0108 C # LATIN CAPITAL LETTER C WITH CIRCUMFLEX 0x0109 c # LATIN SMALL LETTER C WITH CIRCUMFLEX 0x010A C # LATIN CAPITAL LETTER C WITH DOT ABOVE 0x010B c # LATIN SMALL LETTER C WITH DOT ABOVE 0x010C C # LATIN CAPITAL LETTER C WITH CARON 0x010D c # LATIN SMALL LETTER C WITH CARON 0x010E D # LATIN CAPITAL LETTER D WITH CARON 0x010F d # LATIN SMALL LETTER D WITH CARON 0x0110 D # LATIN CAPITAL LETTER D WITH STROKE 0x0111 d # LATIN SMALL LETTER D WITH STROKE 0x0112 E # LATIN CAPITAL LETTER E WITH MACRON 0x0113 e # LATIN SMALL LETTER E WITH MACRON 0x0114 E # LATIN CAPITAL LETTER E WITH BREVE 0x0115 e # LATIN SMALL LETTER E WITH BREVE 0x0116 E # LATIN CAPITAL LETTER E WITH DOT ABOVE 0x0117 e # LATIN SMALL LETTER E WITH DOT ABOVE 0x0118 E # LATIN CAPITAL LETTER E WITH OGONEK 0x0119 e # LATIN SMALL LETTER E WITH OGONEK 0x011A E # LATIN CAPITAL LETTER E WITH CARON 0x011B e # LATIN SMALL LETTER E WITH CARON 0x011C G # LATIN CAPITAL LETTER G WITH CIRCUMFLEX 0x011D g # LATIN SMALL LETTER G WITH CIRCUMFLEX 0x011E G # LATIN CAPITAL LETTER G WITH BREVE 0x011F g # LATIN SMALL LETTER G WITH BREVE 0x0120 G # LATIN CAPITAL LETTER G WITH DOT ABOVE 0x0121 g # LATIN SMALL LETTER G WITH DOT ABOVE 0x0122 G # LATIN CAPITAL LETTER G WITH CEDILLA 0x0123 g # LATIN SMALL LETTER G WITH CEDILLA 0x0124 H # LATIN CAPITAL LETTER H WITH CIRCUMFLEX 0x0125 h # LATIN SMALL LETTER H WITH CIRCUMFLEX 0x0126 H # LATIN CAPITAL LETTER H WITH STROKE 0x0127 h # LATIN SMALL LETTER H WITH STROKE 0x0128 I # LATIN CAPITAL LETTER I WITH TILDE 0x0129 i # LATIN SMALL LETTER I WITH TILDE 0x012A I # LATIN CAPITAL LETTER I WITH MACRON 0x012B i # LATIN SMALL LETTER I WITH MACRON 0x012C I # LATIN CAPITAL LETTER I WITH BREVE 0x012D i # LATIN SMALL LETTER I WITH BREVE 0x012E I # LATIN CAPITAL LETTER I WITH OGONEK 0x012F i # LATIN SMALL LETTER I WITH OGONEK 0x0130 I # LATIN CAPITAL LETTER I WITH DOT ABOVE 0x0131 i # LATIN SMALL LETTER DOTLESS I 0x0132 IJ # LATIN CAPITAL LIGATURE IJ 0x0133 ij # LATIN SMALL LIGATURE IJ 0x0134 J # LATIN CAPITAL LETTER J WITH CIRCUMFLEX 0x0135 j # LATIN SMALL LETTER J WITH CIRCUMFLEX 0x0136 K # LATIN CAPITAL LETTER K WITH CEDILLA 0x0137 k # LATIN SMALL LETTER K WITH CEDILLA 0x0138 k # LATIN SMALL LETTER KRA 0x0139 L # LATIN CAPITAL LETTER L WITH ACUTE 0x013A l # LATIN SMALL LETTER L WITH ACUTE 0x013B L # LATIN CAPITAL LETTER L WITH CEDILLA 0x013C l # LATIN SMALL LETTER L WITH CEDILLA 0x013D L # LATIN CAPITAL LETTER L WITH CARON 0x013E l # LATIN SMALL LETTER L WITH CARON 0x013F L # LATIN CAPITAL LETTER L WITH MIDDLE DOT 0x0140 l # LATIN SMALL LETTER L WITH MIDDLE DOT 0x0141 L # LATIN CAPITAL LETTER L WITH STROKE 0x0142 l # LATIN SMALL LETTER L WITH STROKE 0x0143 N # LATIN CAPITAL LETTER N WITH ACUTE 0x0144 n # LATIN SMALL LETTER N WITH ACUTE 0x0145 N # LATIN CAPITAL LETTER N WITH CEDILLA 0x0146 n # LATIN SMALL LETTER N WITH CEDILLA 0x0147 N # LATIN CAPITAL LETTER N WITH CARON 0x0148 n # LATIN SMALL LETTER N WITH CARON 0x0149 "'n" # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 0x014A ng # LATIN CAPITAL LETTER ENG 0x014B NG # LATIN SMALL LETTER ENG 0x014C O # LATIN CAPITAL LETTER O WITH MACRON 0x014D o # LATIN SMALL LETTER O WITH MACRON 0x014E O # LATIN CAPITAL LETTER O WITH BREVE 0x014F o # LATIN SMALL LETTER O WITH BREVE 0x0150 O # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 0x0151 o # LATIN SMALL LETTER O WITH DOUBLE ACUTE 0x0152 OE # LATIN CAPITAL LIGATURE OE 0x0153 oe # LATIN SMALL LIGATURE OE 0x0154 R # LATIN CAPITAL LETTER R WITH ACUTE 0x0155 r # LATIN SMALL LETTER R WITH ACUTE 0x0156 R # LATIN CAPITAL LETTER R WITH CEDILLA 0x0157 r # LATIN SMALL LETTER R WITH CEDILLA 0x0158 R # LATIN CAPITAL LETTER R WITH CARON 0x0159 r # LATIN SMALL LETTER R WITH CARON 0x015A S # LATIN CAPITAL LETTER S WITH ACUTE 0x015B s # LATIN SMALL LETTER S WITH ACUTE 0x015C S # LATIN CAPITAL LETTER S WITH CIRCUMFLEX 0x015D s # LATIN SMALL LETTER S WITH CIRCUMFLEX 0x015E S # LATIN CAPITAL LETTER S WITH CEDILLA 0x015F s # LATIN SMALL LETTER S WITH CEDILLA 0x0160 S # LATIN CAPITAL LETTER S WITH CARON 0x0161 s # LATIN SMALL LETTER S WITH CARON 0x0162 T # LATIN CAPITAL LETTER T WITH CEDILLA 0x0163 t # LATIN SMALL LETTER T WITH CEDILLA 0x0164 T # LATIN CAPITAL LETTER T WITH CARON 0x0165 t # LATIN SMALL LETTER T WITH CARON 0x0166 T # LATIN CAPITAL LETTER T WITH STROKE 0x0167 t # LATIN SMALL LETTER T WITH STROKE 0x0168 U # LATIN CAPITAL LETTER U WITH TILDE 0x0169 u # LATIN SMALL LETTER U WITH TILDE 0x016A U # LATIN CAPITAL LETTER U WITH MACRON 0x016B u # LATIN SMALL LETTER U WITH MACRON 0x016C U # LATIN CAPITAL LETTER U WITH BREVE 0x016D u # LATIN SMALL LETTER U WITH BREVE 0x016E U # LATIN CAPITAL LETTER U WITH RING ABOVE 0x016F u # LATIN SMALL LETTER U WITH RING ABOVE 0x0170 U # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 0x0171 u # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x0172 U # LATIN CAPITAL LETTER U WITH OGONEK 0x0173 u # LATIN SMALL LETTER U WITH OGONEK 0x0174 W # LATIN CAPITAL LETTER W WITH CIRCUMFLEX 0x0175 w # LATIN SMALL LETTER W WITH CIRCUMFLEX 0x0176 Y # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX 0x0177 y # LATIN SMALL LETTER Y WITH CIRCUMFLEX 0x0178 Y # LATIN CAPITAL LETTER Y WITH DIAERESIS 0x0179 Z # LATIN CAPITAL LETTER Z WITH ACUTE 0x017A z # LATIN SMALL LETTER Z WITH ACUTE 0x017B Z # LATIN CAPITAL LETTER Z WITH DOT ABOVE 0x017C z # LATIN SMALL LETTER Z WITH DOT ABOVE 0x017D Z # LATIN CAPITAL LETTER Z WITH CARON 0x017E z # LATIN SMALL LETTER Z WITH CARON 0x017F s # LATIN SMALL LETTER LONG S 0x0180 b # LATIN SMALL LETTER B WITH STROKE 0x0181 B # LATIN CAPITAL LETTER B WITH HOOK 0x0182 B # LATIN CAPITAL LETTER B WITH TOPBAR 0x0183 b # LATIN SMALL LETTER B WITH TOPBAR 0x0184 6 # LATIN CAPITAL LETTER TONE SIX 0x0185 6 # LATIN SMALL LETTER TONE SIX 0x0186 O # LATIN CAPITAL LETTER OPEN O 0x0187 C # LATIN CAPITAL LETTER C WITH HOOK 0x0188 c # LATIN SMALL LETTER C WITH HOOK 0x0189 D # LATIN CAPITAL LETTER AFRICAN D 0x018A D # LATIN CAPITAL LETTER D WITH HOOK 0x018B D # LATIN CAPITAL LETTER D WITH TOPBAR 0x018C d # LATIN SMALL LETTER D WITH TOPBAR 0x018D d # LATIN SMALL LETTER TURNED DELTA 0x018E 3 # LATIN CAPITAL LETTER REVERSED E 0x018F @ # LATIN CAPITAL LETTER SCHWA 0x0190 E # LATIN CAPITAL LETTER OPEN E 0x0191 F # LATIN CAPITAL LETTER F WITH HOOK 0x0192 f # LATIN SMALL LETTER F WITH HOOK 0x0193 G # LATIN CAPITAL LETTER G WITH HOOK 0x0194 G # LATIN CAPITAL LETTER GAMMA 0x0195 hv # LATIN SMALL LETTER HV 0x0196 I # LATIN CAPITAL LETTER IOTA 0x0197 I # LATIN CAPITAL LETTER I WITH STROKE 0x0198 K # LATIN CAPITAL LETTER K WITH HOOK 0x0199 k # LATIN SMALL LETTER K WITH HOOK 0x019A l # LATIN SMALL LETTER L WITH BAR 0x019B l # LATIN SMALL LETTER LAMBDA WITH STROKE 0x019C W # LATIN CAPITAL LETTER TURNED M 0x019D N # LATIN CAPITAL LETTER N WITH LEFT HOOK 0x019E n # LATIN SMALL LETTER N WITH LONG RIGHT LEG 0x019F O # LATIN CAPITAL LETTER O WITH MIDDLE TILDE 0x01A0 O # LATIN CAPITAL LETTER O WITH HORN 0x01A1 o # LATIN SMALL LETTER O WITH HORN 0x01A2 OI # LATIN CAPITAL LETTER OI 0x01A3 oi # LATIN SMALL LETTER OI 0x01A4 P # LATIN CAPITAL LETTER P WITH HOOK 0x01A5 p # LATIN SMALL LETTER P WITH HOOK 0x01A6 YR # LATIN LETTER YR 0x01A7 2 # LATIN CAPITAL LETTER TONE TWO 0x01A8 2 # LATIN SMALL LETTER TONE TWO 0x01A9 SH # LATIN CAPITAL LETTER ESH 0x01AA sh # LATIN LETTER REVERSED ESH LOOP 0x01AB t # LATIN SMALL LETTER T WITH PALATAL HOOK 0x01AC T # LATIN CAPITAL LETTER T WITH HOOK 0x01AD t # LATIN SMALL LETTER T WITH HOOK 0x01AE T # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK 0x01AF U # LATIN CAPITAL LETTER U WITH HORN 0x01B0 u # LATIN SMALL LETTER U WITH HORN 0x01B1 Y # LATIN CAPITAL LETTER UPSILON 0x01B2 V # LATIN CAPITAL LETTER V WITH HOOK 0x01B3 Y # LATIN CAPITAL LETTER Y WITH HOOK 0x01B4 y # LATIN SMALL LETTER Y WITH HOOK 0x01B5 Z # LATIN CAPITAL LETTER Z WITH STROKE 0x01B6 z # LATIN SMALL LETTER Z WITH STROKE 0x01B7 ZH # LATIN CAPITAL LETTER EZH 0x01B8 ZH # LATIN CAPITAL LETTER EZH REVERSED 0x01B9 zh # LATIN SMALL LETTER EZH REVERSED 0x01BA zh # LATIN SMALL LETTER EZH WITH TAIL 0x01BB 2 # LATIN LETTER TWO WITH STROKE 0x01BC 5 # LATIN CAPITAL LETTER TONE FIVE 0x01BD 5 # LATIN SMALL LETTER TONE FIVE 0x01BE ts # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE 0x01BF w # LATIN LETTER WYNN 0x01C0 | # LATIN LETTER DENTAL CLICK 0x01C1 || # LATIN LETTER LATERAL CLICK 0x01C2 |= # LATIN LETTER ALVEOLAR CLICK 0x01C3 ! # LATIN LETTER RETROFLEX CLICK 0x01C4 DZ # LATIN CAPITAL LETTER DZ WITH CARON 0x01C5 Dz # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON 0x01C6 dz # LATIN SMALL LETTER DZ WITH CARON 0x01C7 LJ # LATIN CAPITAL LETTER LJ 0x01C8 Lj # LATIN CAPITAL LETTER L WITH SMALL LETTER J 0x01C9 lj # LATIN SMALL LETTER LJ 0x01CA NJ # LATIN CAPITAL LETTER NJ 0x01CB Nj # LATIN CAPITAL LETTER N WITH SMALL LETTER J 0x01CC nj # LATIN SMALL LETTER NJ 0x01CD A # LATIN CAPITAL LETTER A WITH CARON 0x01CE a # LATIN SMALL LETTER A WITH CARON 0x01CF I # LATIN CAPITAL LETTER I WITH CARON 0x01D0 i # LATIN SMALL LETTER I WITH CARON 0x01D1 O # LATIN CAPITAL LETTER O WITH CARON 0x01D2 o # LATIN SMALL LETTER O WITH CARON 0x01D3 U # LATIN CAPITAL LETTER U WITH CARON 0x01D4 u # LATIN SMALL LETTER U WITH CARON 0x01D5 U # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON 0x01D6 u # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON 0x01D7 U # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE 0x01D8 u # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE 0x01D9 U # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON 0x01DA u # LATIN SMALL LETTER U WITH DIAERESIS AND CARON 0x01DB U # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE 0x01DC u # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE 0x01DD @ # LATIN SMALL LETTER TURNED E 0x01DE A # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON 0x01DF a # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON 0x01E0 A # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON 0x01E1 a # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON 0x01E2 AE # LATIN CAPITAL LETTER AE WITH MACRON 0x01E3 ae # LATIN SMALL LETTER AE WITH MACRON 0x01E4 G # LATIN CAPITAL LETTER G WITH STROKE 0x01E5 g # LATIN SMALL LETTER G WITH STROKE 0x01E6 G # LATIN CAPITAL LETTER G WITH CARON 0x01E7 g # LATIN SMALL LETTER G WITH CARON 0x01E8 K # LATIN CAPITAL LETTER K WITH CARON 0x01E9 k # LATIN SMALL LETTER K WITH CARON 0x01EA O # LATIN CAPITAL LETTER O WITH OGONEK 0x01EB o # LATIN SMALL LETTER O WITH OGONEK 0x01EC O # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON 0x01ED o # LATIN SMALL LETTER O WITH OGONEK AND MACRON 0x01EE ZH # LATIN CAPITAL LETTER EZH WITH CARON 0x01EF zh # LATIN SMALL LETTER EZH WITH CARON 0x01F0 j # LATIN SMALL LETTER J WITH CARON 0x01F1 DZ # LATIN CAPITAL LETTER DZ 0x01F2 Dz # LATIN CAPITAL LETTER D WITH SMALL LETTER Z 0x01F3 dz # LATIN SMALL LETTER DZ 0x01F4 G # LATIN CAPITAL LETTER G WITH ACUTE 0x01F5 g # LATIN SMALL LETTER G WITH ACUTE 0x01F6 HV # LATIN CAPITAL LETTER HWAIR 0x01F7 W # LATIN CAPITAL LETTER WYNN 0x01F8 N # LATIN CAPITAL LETTER N WITH GRAVE 0x01F9 n # LATIN SMALL LETTER N WITH GRAVE 0x01FA A # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE 0x01FB a # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE 0x01FC AE # LATIN CAPITAL LETTER AE WITH ACUTE 0x01FD ae # LATIN SMALL LETTER AE WITH ACUTE 0x01FE O # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE 0x01FF o # LATIN SMALL LETTER O WITH STROKE AND ACUTE # # Characters 0x0200 to 0x02FF # 0x0200 A # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE 0x0201 a # LATIN SMALL LETTER A WITH DOUBLE GRAVE 0x0202 A # LATIN CAPITAL LETTER A WITH INVERTED BREVE 0x0203 a # LATIN SMALL LETTER A WITH INVERTED BREVE 0x0204 E # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE 0x0205 e # LATIN SMALL LETTER E WITH DOUBLE GRAVE 0x0206 E # LATIN CAPITAL LETTER E WITH INVERTED BREVE 0x0207 e # LATIN SMALL LETTER E WITH INVERTED BREVE 0x0208 I # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE 0x0209 i # LATIN SMALL LETTER I WITH DOUBLE GRAVE 0x020A I # LATIN CAPITAL LETTER I WITH INVERTED BREVE 0x020B i # LATIN SMALL LETTER I WITH INVERTED BREVE 0x020C O # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE 0x020D o # LATIN SMALL LETTER O WITH DOUBLE GRAVE 0x020E O # LATIN CAPITAL LETTER O WITH INVERTED BREVE 0x020F o # LATIN SMALL LETTER O WITH INVERTED BREVE 0x0210 R # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE 0x0211 r # LATIN SMALL LETTER R WITH DOUBLE GRAVE 0x0212 R # LATIN CAPITAL LETTER R WITH INVERTED BREVE 0x0213 r # LATIN SMALL LETTER R WITH INVERTED BREVE 0x0214 U # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE 0x0215 u # LATIN SMALL LETTER U WITH DOUBLE GRAVE 0x0216 U # LATIN CAPITAL LETTER U WITH INVERTED BREVE 0x0217 u # LATIN SMALL LETTER U WITH INVERTED BREVE 0x0218 S # LATIN CAPITAL LETTER S WITH COMMA BELOW 0x0219 s # LATIN SMALL LETTER S WITH COMMA BELOW 0x021A T # LATIN CAPITAL LETTER T WITH COMMA BELOW 0x021B t # LATIN SMALL LETTER T WITH COMMA BELOW 0x021C Y # LATIN CAPITAL LETTER YOGH 0x021D y # LATIN SMALL LETTER YOGH 0x021E H # LATIN CAPITAL LETTER H WITH CARON 0x021F h # LATIN SMALL LETTER H WITH CARON 0x0220 N # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG 0x0221 d # LATIN SMALL LETTER D WITH CURL 0x0222 OU # LATIN CAPITAL LETTER OU 0x0223 ou # LATIN SMALL LETTER OU 0x0224 Z # LATIN CAPITAL LETTER Z WITH HOOK 0x0225 z # LATIN SMALL LETTER Z WITH HOOK 0x0226 A # LATIN CAPITAL LETTER A WITH DOT ABOVE 0x0227 a # LATIN SMALL LETTER A WITH DOT ABOVE 0x0228 E # LATIN CAPITAL LETTER E WITH CEDILLA 0x0229 e # LATIN SMALL LETTER E WITH CEDILLA 0x022A O # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON 0x022B o # LATIN SMALL LETTER O WITH DIAERESIS AND MACRON 0x022C O # LATIN CAPITAL LETTER O WITH TILDE AND MACRON 0x022D o # LATIN SMALL LETTER O WITH TILDE AND MACRON 0x022E O # LATIN CAPITAL LETTER O WITH DOT ABOVE 0x022F o # LATIN SMALL LETTER O WITH DOT ABOVE 0x0230 O # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON 0x0231 o # LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON 0x0232 Y # LATIN CAPITAL LETTER Y WITH MACRON 0x0233 y # LATIN SMALL LETTER Y WITH MACRON 0x0234 l # LATIN SMALL LETTER L WITH CURL 0x0235 n # LATIN SMALL LETTER N WITH CURL 0x0236 t # LATIN SMALL LETTER T WITH CURL 0x0237 j # LATIN SMALL LETTER DOTLESS J 0x0238 db # LATIN SMALL LETTER DB DIGRAPH 0x0239 qp # LATIN SMALL LETTER QP DIGRAPH 0x023A A # LATIN CAPITAL LETTER A WITH STROKE 0x023B C # LATIN CAPITAL LETTER C WITH STROKE 0x023C c # LATIN SMALL LETTER C WITH STROKE 0x023D L # LATIN CAPITAL LETTER L WITH BAR 0x023E T # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE 0x023F s # LATIN SMALL LETTER S WITH SWASH TAIL 0x0240 z # LATIN SMALL LETTER Z WITH SWASH TAIL 0x0243 B # LATIN CAPITAL LETTER B WITH STROKE 0x0244 U # LATIN CAPITAL LETTER U BAR 0x0245 ^ # LATIN CAPITAL LETTER TURNED V 0x0246 E # LATIN CAPITAL LETTER E WITH STROKE 0x0247 e # LATIN SMALL LETTER E WITH STROKE 0x0248 J # LATIN CAPITAL LETTER J WITH STROKE 0x0249 j # LATIN SMALL LETTER J WITH STROKE 0x024A q # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL 0x024B q # LATIN SMALL LETTER Q WITH HOOK TAIL 0x024C R # LATIN CAPITAL LETTER R WITH STROKE 0x024D r # LATIN SMALL LETTER R WITH STROKE 0x024E Y # LATIN CAPITAL LETTER Y WITH STROKE 0x024F y # LATIN SMALL LETTER Y WITH STROKE 0x0250 a # LATIN SMALL LETTER TURNED A 0x0251 a # LATIN SMALL LETTER ALPHA 0x0252 a # LATIN SMALL LETTER TURNED ALPHA 0x0253 b # LATIN SMALL LETTER B WITH HOOK 0x0254 o # LATIN SMALL LETTER OPEN O 0x0255 c # LATIN SMALL LETTER C WITH CURL 0x0256 d # LATIN SMALL LETTER D WITH TAIL 0x0257 d # LATIN SMALL LETTER D WITH HOOK 0x0258 e # LATIN SMALL LETTER REVERSED E 0x0259 @ # LATIN SMALL LETTER SCHWA 0x025A @ # LATIN SMALL LETTER SCHWA WITH HOOK 0x025B e # LATIN SMALL LETTER OPEN E 0x025C e # LATIN SMALL LETTER REVERSED OPEN E 0x025D e # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK 0x025E e # LATIN SMALL LETTER CLOSED REVERSED OPEN E 0x025F j # LATIN SMALL LETTER DOTLESS J WITH STROKE 0x0260 g # LATIN SMALL LETTER G WITH HOOK 0x0261 g # LATIN SMALL LETTER SCRIPT G 0x0262 g # LATIN LETTER SMALL CAPITAL G 0x0263 g # LATIN SMALL LETTER GAMMA 0x0264 u # LATIN SMALL LETTER RAMS HORN 0x0265 Y # LATIN SMALL LETTER TURNED H 0x0266 h # LATIN SMALL LETTER H WITH HOOK 0x0267 h # LATIN SMALL LETTER HENG WITH HOOK 0x0268 i # LATIN SMALL LETTER I WITH STROKE 0x0269 i # LATIN SMALL LETTER IOTA 0x026A I # LATIN LETTER SMALL CAPITAL I 0x026B l # LATIN SMALL LETTER L WITH MIDDLE TILDE 0x026C l # LATIN SMALL LETTER L WITH BELT 0x026D l # LATIN SMALL LETTER L WITH RETROFLEX HOOK 0x026E lZ # LATIN SMALL LETTER LEZH 0x026F W # LATIN SMALL LETTER TURNED M 0x0270 W # LATIN SMALL LETTER TURNED M WITH LONG LEG 0x0271 m # LATIN SMALL LETTER M WITH HOOK 0x0272 n # LATIN SMALL LETTER N WITH LEFT HOOK 0x0273 n # LATIN SMALL LETTER N WITH RETROFLEX HOOK 0x0274 n # LATIN LETTER SMALL CAPITAL N 0x0275 o # LATIN SMALL LETTER BARRED O 0x0276 OE # LATIN LETTER SMALL CAPITAL OE 0x0277 O # LATIN SMALL LETTER CLOSED OMEGA 0x0278 F # LATIN SMALL LETTER PHI 0x0279 r # LATIN SMALL LETTER TURNED R 0x027A r # LATIN SMALL LETTER TURNED R WITH LONG LEG 0x027B r # LATIN SMALL LETTER TURNED R WITH HOOK 0x027C r # LATIN SMALL LETTER R WITH LONG LEG 0x027D r # LATIN SMALL LETTER R WITH TAIL 0x027E r # LATIN SMALL LETTER R WITH FISHHOOK 0x027F r # LATIN SMALL LETTER REVERSED R WITH FISHHOOK 0x0280 R # LATIN LETTER SMALL CAPITAL R 0x0281 R # LATIN LETTER SMALL CAPITAL INVERTED R 0x0282 s # LATIN SMALL LETTER S WITH HOOK 0x0283 S # LATIN SMALL LETTER ESH 0x0284 j # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK 0x0285 S # LATIN SMALL LETTER SQUAT REVERSED ESH 0x0286 S # LATIN SMALL LETTER ESH WITH CURL 0x0287 t # LATIN SMALL LETTER TURNED T 0x0288 t # LATIN SMALL LETTER T WITH RETROFLEX HOOK 0x0289 u # LATIN SMALL LETTER U BAR 0x028A U # LATIN SMALL LETTER UPSILON 0x028B v # LATIN SMALL LETTER V WITH HOOK 0x028C ^ # LATIN SMALL LETTER TURNED V 0x028D w # LATIN SMALL LETTER TURNED W 0x028E y # LATIN SMALL LETTER TURNED Y 0x028F Y # LATIN LETTER SMALL CAPITAL Y 0x0290 z # LATIN SMALL LETTER Z WITH RETROFLEX HOOK 0x0291 z # LATIN SMALL LETTER Z WITH CURL 0x0292 Z # LATIN SMALL LETTER EZH 0x0293 Z # LATIN SMALL LETTER EZH WITH CURL 0x0297 C # LATIN LETTER STRETCHED C 0x0298 @ # LATIN LETTER BILABIAL CLICK 0x0299 B # LATIN LETTER SMALL CAPITAL B 0x029A E # LATIN SMALL LETTER CLOSED OPEN E 0x029B G # LATIN LETTER SMALL CAPITAL G WITH HOOK 0x029C H # LATIN LETTER SMALL CAPITAL H 0x029D j # LATIN SMALL LETTER J WITH CROSSED-TAIL 0x029E k # LATIN SMALL LETTER TURNED K 0x029F L # LATIN LETTER SMALL CAPITAL L 0x02A0 q # LATIN SMALL LETTER Q WITH HOOK 0x02A3 dz # LATIN SMALL LETTER DZ DIGRAPH 0x02A4 dZ # LATIN SMALL LETTER DEZH DIGRAPH 0x02A5 dz # LATIN SMALL LETTER DZ DIGRAPH WITH CURL 0x02A6 ts # LATIN SMALL LETTER TS DIGRAPH 0x02A7 tS # LATIN SMALL LETTER TESH DIGRAPH 0x02A8 tC # LATIN SMALL LETTER TC DIGRAPH WITH CURL 0x02A9 fN # LATIN SMALL LETTER FENG DIGRAPH 0x02AA ls # LATIN SMALL LETTER LS DIGRAPH 0x02AB lz # LATIN SMALL LETTER LZ DIGRAPH 0x02AC WW # LATIN LETTER BILABIAL PERCUSSIVE 0x02AD ]] # LATIN LETTER BIDENTAL PERCUSSIVE 0x02AE h # LATIN SMALL LETTER TURNED H WITH FISHHOOK 0x02AF h # LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 0x02B0 h # MODIFIER LETTER SMALL H 0x02B1 h # MODIFIER LETTER SMALL H WITH HOOK 0x02B2 j # MODIFIER LETTER SMALL J 0x02B3 r # MODIFIER LETTER SMALL R 0x02B4 r # MODIFIER LETTER SMALL TURNED R 0x02B5 r # MODIFIER LETTER SMALL TURNED R WITH HOOK 0x02B6 r # MODIFIER LETTER SMALL CAPITAL INVERTED R 0x02B7 w # MODIFIER LETTER SMALL W 0x02B8 y # MODIFIER LETTER SMALL Y 0x02B9 "'" # MODIFIER LETTER PRIME 0x02BA '"' # MODIFIER LETTER DOUBLE PRIME 0x02BB ` # MODIFIER LETTER TURNED COMMA 0x02BC "'" # MODIFIER LETTER APOSTROPHE 0x02BD ` # MODIFIER LETTER REVERSED COMMA 0x02BE ` # MODIFIER LETTER RIGHT HALF RING 0x02BF "'" # MODIFIER LETTER LEFT HALF RING 0x02C2 < # MODIFIER LETTER LEFT ARROWHEAD 0x02C3 > # MODIFIER LETTER RIGHT ARROWHEAD 0x02C4 ^ # MODIFIER LETTER UP ARROWHEAD 0x02C5 V # MODIFIER LETTER DOWN ARROWHEAD 0x02C6 ^ # MODIFIER LETTER CIRCUMFLEX ACCENT 0x02C7 V # CARON 0x02C8 "'" # MODIFIER LETTER VERTICAL LINE 0x02C9 - # MODIFIER LETTER MACRON 0x02CA / # MODIFIER LETTER ACUTE ACCENT 0x02CB \ # MODIFIER LETTER GRAVE ACCENT 0x02CC , # MODIFIER LETTER LOW VERTICAL LINE 0x02CD _ # MODIFIER LETTER LOW MACRON 0x02CE \ # MODIFIER LETTER LOW GRAVE ACCENT 0x02CF / # MODIFIER LETTER LOW ACUTE ACCENT 0x02D0 : # MODIFIER LETTER TRIANGULAR COLON 0x02D1 . # MODIFIER LETTER HALF TRIANGULAR COLON 0x02D2 ` # MODIFIER LETTER CENTRED RIGHT HALF RING 0x02D3 "'" # MODIFIER LETTER CENTRED LEFT HALF RING 0x02D4 ^ # MODIFIER LETTER UP TACK 0x02D5 V # MODIFIER LETTER DOWN TACK 0x02D6 + # MODIFIER LETTER PLUS SIGN 0x02D7 - # MODIFIER LETTER MINUS SIGN 0x02D8 V # BREVE 0x02D9 . # DOT ABOVE 0x02DA @ # RING ABOVE 0x02DB , # OGONEK 0x02DC ~ # SMALL TILDE 0x02DD '"' # DOUBLE ACUTE ACCENT 0x02DE R # MODIFIER LETTER RHOTIC HOOK 0x02DF X # MODIFIER LETTER CROSS ACCENT 0x02E0 G # MODIFIER LETTER SMALL GAMMA 0x02E1 l # MODIFIER LETTER SMALL L 0x02E2 s # MODIFIER LETTER SMALL S 0x02E3 x # MODIFIER LETTER SMALL X 0x02EC V # MODIFIER LETTER VOICING 0x02ED = # MODIFIER LETTER UNASPIRATED 0x02EE '"' # MODIFIER LETTER DOUBLE APOSTROPHE # # Characters 0x0300 to 0x03FF # 0x0363 a # COMBINING LATIN SMALL LETTER A 0x0364 e # COMBINING LATIN SMALL LETTER E 0x0365 i # COMBINING LATIN SMALL LETTER I 0x0366 o # COMBINING LATIN SMALL LETTER O 0x0367 u # COMBINING LATIN SMALL LETTER U 0x0368 c # COMBINING LATIN SMALL LETTER C 0x0369 d # COMBINING LATIN SMALL LETTER D 0x036A h # COMBINING LATIN SMALL LETTER H 0x036B m # COMBINING LATIN SMALL LETTER M 0x036C r # COMBINING LATIN SMALL LETTER R 0x036D t # COMBINING LATIN SMALL LETTER T 0x036E v # COMBINING LATIN SMALL LETTER V 0x036F x # COMBINING LATIN SMALL LETTER X 0x0374 "'" # GREEK NUMERAL SIGN 0x0375 , # GREEK LOWER NUMERAL SIGN 0x037E ? # GREEK QUESTION MARK 0x0386 A # GREEK CAPITAL LETTER ALPHA WITH TONOS 0x0387 ; # GREEK ANO TELEIA 0x0388 E # GREEK CAPITAL LETTER EPSILON WITH TONOS 0x0389 E # GREEK CAPITAL LETTER ETA WITH TONOS 0x038A I # GREEK CAPITAL LETTER IOTA WITH TONOS 0x038C O # GREEK CAPITAL LETTER OMICRON WITH TONOS 0x038E U # GREEK CAPITAL LETTER UPSILON WITH TONOS 0x038F O # GREEK CAPITAL LETTER OMEGA WITH TONOS 0x0390 I # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 0x0391 A # GREEK CAPITAL LETTER ALPHA 0x0392 B # GREEK CAPITAL LETTER BETA 0x0393 G # GREEK CAPITAL LETTER GAMMA 0x0394 D # GREEK CAPITAL LETTER DELTA 0x0395 E # GREEK CAPITAL LETTER EPSILON 0x0396 Z # GREEK CAPITAL LETTER ZETA 0x0397 E # GREEK CAPITAL LETTER ETA 0x0398 Th # GREEK CAPITAL LETTER THETA 0x0399 I # GREEK CAPITAL LETTER IOTA 0x039A K # GREEK CAPITAL LETTER KAPPA 0x039B L # GREEK CAPITAL LETTER LAMDA 0x039C M # GREEK CAPITAL LETTER MU 0x039D N # GREEK CAPITAL LETTER NU 0x039E Ks # GREEK CAPITAL LETTER XI 0x039F O # GREEK CAPITAL LETTER OMICRON 0x03A0 P # GREEK CAPITAL LETTER PI 0x03A1 R # GREEK CAPITAL LETTER RHO 0x03A3 S # GREEK CAPITAL LETTER SIGMA 0x03A4 T # GREEK CAPITAL LETTER TAU 0x03A5 U # GREEK CAPITAL LETTER UPSILON 0x03A6 Ph # GREEK CAPITAL LETTER PHI 0x03A7 Kh # GREEK CAPITAL LETTER CHI 0x03A8 Ps # GREEK CAPITAL LETTER PSI 0x03A9 O # GREEK CAPITAL LETTER OMEGA 0x03AA I # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA 0x03AB U # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA 0x03AC a # GREEK SMALL LETTER ALPHA WITH TONOS 0x03AD e # GREEK SMALL LETTER EPSILON WITH TONOS 0x03AE e # GREEK SMALL LETTER ETA WITH TONOS 0x03AF i # GREEK SMALL LETTER IOTA WITH TONOS 0x03B0 u # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 0x03B1 a # GREEK SMALL LETTER ALPHA 0x03B2 b # GREEK SMALL LETTER BETA 0x03B3 g # GREEK SMALL LETTER GAMMA 0x03B4 d # GREEK SMALL LETTER DELTA 0x03B5 e # GREEK SMALL LETTER EPSILON 0x03B6 z # GREEK SMALL LETTER ZETA 0x03B7 e # GREEK SMALL LETTER ETA 0x03B8 th # GREEK SMALL LETTER THETA 0x03B9 i # GREEK SMALL LETTER IOTA 0x03BA k # GREEK SMALL LETTER KAPPA 0x03BB l # GREEK SMALL LETTER LAMDA 0x03BC m # GREEK SMALL LETTER MU 0x03BD n # GREEK SMALL LETTER NU 0x03BE x # GREEK SMALL LETTER XI 0x03BF o # GREEK SMALL LETTER OMICRON 0x03C0 p # GREEK SMALL LETTER PI 0x03C1 r # GREEK SMALL LETTER RHO 0x03C2 s # GREEK SMALL LETTER FINAL SIGMA 0x03C3 s # GREEK SMALL LETTER SIGMA 0x03C4 t # GREEK SMALL LETTER TAU 0x03C5 u # GREEK SMALL LETTER UPSILON 0x03C6 ph # GREEK SMALL LETTER PHI 0x03C7 kh # GREEK SMALL LETTER CHI 0x03C8 ps # GREEK SMALL LETTER PSI 0x03C9 o # GREEK SMALL LETTER OMEGA 0x03CA i # GREEK SMALL LETTER IOTA WITH DIALYTIKA 0x03CB u # GREEK SMALL LETTER UPSILON WITH DIALYTIKA 0x03CC o # GREEK SMALL LETTER OMICRON WITH TONOS 0x03CD u # GREEK SMALL LETTER UPSILON WITH TONOS 0x03CE o # GREEK SMALL LETTER OMEGA WITH TONOS 0x03D0 b # GREEK BETA SYMBOL 0x03D1 th # GREEK THETA SYMBOL 0x03D2 U # GREEK UPSILON WITH HOOK SYMBOL 0x03D3 U # GREEK UPSILON WITH ACUTE AND HOOK SYMBOL 0x03D4 U # GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL 0x03D5 ph # GREEK PHI SYMBOL 0x03D6 p # GREEK PI SYMBOL 0x03D7 & # GREEK KAI SYMBOL 0x03DA St # GREEK LETTER STIGMA 0x03DB st # GREEK SMALL LETTER STIGMA 0x03DC W # GREEK LETTER DIGAMMA 0x03DD w # GREEK SMALL LETTER DIGAMMA 0x03DE Q # GREEK LETTER KOPPA 0x03DF q # GREEK SMALL LETTER KOPPA 0x03E0 Sp # GREEK LETTER SAMPI 0x03E1 sp # GREEK SMALL LETTER SAMPI 0x03E2 Sh # COPTIC CAPITAL LETTER SHEI 0x03E3 sh # COPTIC SMALL LETTER SHEI 0x03E4 F # COPTIC CAPITAL LETTER FEI 0x03E5 f # COPTIC SMALL LETTER FEI 0x03E6 Kh # COPTIC CAPITAL LETTER KHEI 0x03E7 kh # COPTIC SMALL LETTER KHEI 0x03E8 H # COPTIC CAPITAL LETTER HORI 0x03E9 h # COPTIC SMALL LETTER HORI 0x03EA G # COPTIC CAPITAL LETTER GANGIA 0x03EB g # COPTIC SMALL LETTER GANGIA 0x03EC CH # COPTIC CAPITAL LETTER SHIMA 0x03ED ch # COPTIC SMALL LETTER SHIMA 0x03EE Ti # COPTIC CAPITAL LETTER DEI 0x03EF ti # COPTIC SMALL LETTER DEI 0x03F0 k # GREEK KAPPA SYMBOL 0x03F1 r # GREEK RHO SYMBOL 0x03F2 c # GREEK LUNATE SIGMA SYMBOL 0x03F3 j # GREEK LETTER YOT # # Characters 0x0400 to 0x04FF # 0x0400 Ie # CYRILLIC CAPITAL LETTER IE WITH GRAVE 0x0401 Io # CYRILLIC CAPITAL LETTER IO 0x0402 Dj # CYRILLIC CAPITAL LETTER DJE 0x0403 Gj # CYRILLIC CAPITAL LETTER GJE 0x0404 E # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0x0405 Dz # CYRILLIC CAPITAL LETTER DZE 0x0406 I # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 0x0407 Yi # CYRILLIC CAPITAL LETTER YI 0x0408 J # CYRILLIC CAPITAL LETTER JE 0x0409 Lj # CYRILLIC CAPITAL LETTER LJE 0x040A Nj # CYRILLIC CAPITAL LETTER NJE 0x040B Tsh # CYRILLIC CAPITAL LETTER TSHE 0x040C Kj # CYRILLIC CAPITAL LETTER KJE 0x040D I # CYRILLIC CAPITAL LETTER I WITH GRAVE 0x040E U # CYRILLIC CAPITAL LETTER SHORT U 0x040F Dzh # CYRILLIC CAPITAL LETTER DZHE 0x0410 A # CYRILLIC CAPITAL LETTER A 0x0411 B # CYRILLIC CAPITAL LETTER BE 0x0412 V # CYRILLIC CAPITAL LETTER VE 0x0413 G # CYRILLIC CAPITAL LETTER GHE 0x0414 D # CYRILLIC CAPITAL LETTER DE 0x0415 E # CYRILLIC CAPITAL LETTER IE 0x0416 Zh # CYRILLIC CAPITAL LETTER ZHE 0x0417 Z # CYRILLIC CAPITAL LETTER ZE 0x0418 I # CYRILLIC CAPITAL LETTER I 0x0419 I # CYRILLIC CAPITAL LETTER SHORT I 0x041A K # CYRILLIC CAPITAL LETTER KA 0x041B L # CYRILLIC CAPITAL LETTER EL 0x041C M # CYRILLIC CAPITAL LETTER EM 0x041D N # CYRILLIC CAPITAL LETTER EN 0x041E O # CYRILLIC CAPITAL LETTER O 0x041F P # CYRILLIC CAPITAL LETTER PE 0x0420 R # CYRILLIC CAPITAL LETTER ER 0x0421 S # CYRILLIC CAPITAL LETTER ES 0x0422 T # CYRILLIC CAPITAL LETTER TE 0x0423 U # CYRILLIC CAPITAL LETTER U 0x0424 F # CYRILLIC CAPITAL LETTER EF 0x0425 Kh # CYRILLIC CAPITAL LETTER HA 0x0426 Ts # CYRILLIC CAPITAL LETTER TSE 0x0427 Ch # CYRILLIC CAPITAL LETTER CHE 0x0428 Sh # CYRILLIC CAPITAL LETTER SHA 0x0429 Shch # CYRILLIC CAPITAL LETTER SHCHA 0x042B Y # CYRILLIC CAPITAL LETTER YERU 0x042C "'" # CYRILLIC CAPITAL LETTER SOFT SIGN 0x042D E # CYRILLIC CAPITAL LETTER E 0x042E Iu # CYRILLIC CAPITAL LETTER YU 0x042F Ia # CYRILLIC CAPITAL LETTER YA 0x0430 a # CYRILLIC SMALL LETTER A 0x0431 b # CYRILLIC SMALL LETTER BE 0x0432 v # CYRILLIC SMALL LETTER VE 0x0433 g # CYRILLIC SMALL LETTER GHE 0x0434 d # CYRILLIC SMALL LETTER DE 0x0435 e # CYRILLIC SMALL LETTER IE 0x0436 zh # CYRILLIC SMALL LETTER ZHE 0x0437 z # CYRILLIC SMALL LETTER ZE 0x0438 i # CYRILLIC SMALL LETTER I 0x0439 i # CYRILLIC SMALL LETTER SHORT I 0x043A k # CYRILLIC SMALL LETTER KA 0x043B l # CYRILLIC SMALL LETTER EL 0x043C m # CYRILLIC SMALL LETTER EM 0x043D n # CYRILLIC SMALL LETTER EN 0x043E o # CYRILLIC SMALL LETTER O 0x043F p # CYRILLIC SMALL LETTER PE 0x0440 r # CYRILLIC SMALL LETTER ER 0x0441 s # CYRILLIC SMALL LETTER ES 0x0442 t # CYRILLIC SMALL LETTER TE 0x0443 u # CYRILLIC SMALL LETTER U 0x0444 f # CYRILLIC SMALL LETTER EF 0x0445 kh # CYRILLIC SMALL LETTER HA 0x0446 ts # CYRILLIC SMALL LETTER TSE 0x0447 ch # CYRILLIC SMALL LETTER CHE 0x0448 sh # CYRILLIC SMALL LETTER SHA 0x0449 shch # CYRILLIC SMALL LETTER SHCHA 0x044B y # CYRILLIC SMALL LETTER YERU 0x044C "'" # CYRILLIC SMALL LETTER SOFT SIGN 0x044D e # CYRILLIC SMALL LETTER E 0x044E iu # CYRILLIC SMALL LETTER YU 0x044F ia # CYRILLIC SMALL LETTER YA 0x0450 ie # CYRILLIC SMALL LETTER IE WITH GRAVE 0x0451 io # CYRILLIC SMALL LETTER IO 0x0452 dj # CYRILLIC SMALL LETTER DJE 0x0453 gj # CYRILLIC SMALL LETTER GJE 0x0454 ie # CYRILLIC SMALL LETTER UKRAINIAN IE 0x0455 dz # CYRILLIC SMALL LETTER DZE 0x0456 i # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 0x0457 yi # CYRILLIC SMALL LETTER YI 0x0458 j # CYRILLIC SMALL LETTER JE 0x0459 lj # CYRILLIC SMALL LETTER LJE 0x045A nj # CYRILLIC SMALL LETTER NJE 0x045B tsh # CYRILLIC SMALL LETTER TSHE 0x045C kj # CYRILLIC SMALL LETTER KJE 0x045D i # CYRILLIC SMALL LETTER I WITH GRAVE 0x045E u # CYRILLIC SMALL LETTER SHORT U 0x045F dzh # CYRILLIC SMALL LETTER DZHE 0x0460 O # CYRILLIC CAPITAL LETTER OMEGA 0x0461 o # CYRILLIC SMALL LETTER OMEGA 0x0462 E # CYRILLIC CAPITAL LETTER YAT 0x0463 e # CYRILLIC SMALL LETTER YAT 0x0464 Ie # CYRILLIC CAPITAL LETTER IOTIFIED E 0x0465 ie # CYRILLIC SMALL LETTER IOTIFIED E 0x0466 E # CYRILLIC CAPITAL LETTER LITTLE YUS 0x0467 e # CYRILLIC SMALL LETTER LITTLE YUS 0x0468 Ie # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS 0x0469 ie # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS 0x046A O # CYRILLIC CAPITAL LETTER BIG YUS 0x046B o # CYRILLIC SMALL LETTER BIG YUS 0x046C Io # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS 0x046D io # CYRILLIC SMALL LETTER IOTIFIED BIG YUS 0x046E Ks # CYRILLIC CAPITAL LETTER KSI 0x046F ks # CYRILLIC SMALL LETTER KSI 0x0470 Ps # CYRILLIC CAPITAL LETTER PSI 0x0471 ps # CYRILLIC SMALL LETTER PSI 0x0472 F # CYRILLIC CAPITAL LETTER FITA 0x0473 f # CYRILLIC SMALL LETTER FITA 0x0474 Y # CYRILLIC CAPITAL LETTER IZHITSA 0x0475 y # CYRILLIC SMALL LETTER IZHITSA 0x0476 Y # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT 0x0477 y # CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT 0x0478 u # CYRILLIC CAPITAL LETTER UK 0x0479 u # CYRILLIC SMALL LETTER UK 0x047A O # CYRILLIC CAPITAL LETTER ROUND OMEGA 0x047B o # CYRILLIC SMALL LETTER ROUND OMEGA 0x047C O # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO 0x047D o # CYRILLIC SMALL LETTER OMEGA WITH TITLO 0x047E Ot # CYRILLIC CAPITAL LETTER OT 0x047F ot # CYRILLIC SMALL LETTER OT 0x0480 Q # CYRILLIC CAPITAL LETTER KOPPA 0x0481 q # CYRILLIC SMALL LETTER KOPPA 0x0482 *1000* # CYRILLIC THOUSANDS SIGN 0x0488 *100.000* # COMBINING CYRILLIC HUNDRED THOUSANDS SIGN 0x0489 *1.000.000* # COMBINING CYRILLIC MILLIONS SIGN 0x048C '"' # CYRILLIC CAPITAL LETTER SEMISOFT SIGN 0x048D '"' # CYRILLIC SMALL LETTER SEMISOFT SIGN 0x048E "R'" # CYRILLIC CAPITAL LETTER ER WITH TICK 0x048F "r'" # CYRILLIC SMALL LETTER ER WITH TICK 0x0490 "G'" # CYRILLIC CAPITAL LETTER GHE WITH UPTURN 0x0491 "g'" # CYRILLIC SMALL LETTER GHE WITH UPTURN 0x0492 "G'" # CYRILLIC CAPITAL LETTER GHE WITH STROKE 0x0493 "g'" # CYRILLIC SMALL LETTER GHE WITH STROKE 0x0494 "G'" # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK 0x0495 "g'" # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK 0x0496 "Zh'" # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER 0x0497 "zh'" # CYRILLIC SMALL LETTER ZHE WITH DESCENDER 0x0498 "Z'" # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER 0x0499 "z'" # CYRILLIC SMALL LETTER ZE WITH DESCENDER 0x049A "K'" # CYRILLIC CAPITAL LETTER KA WITH DESCENDER 0x049B "k'" # CYRILLIC SMALL LETTER KA WITH DESCENDER 0x049C "K'" # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE 0x049D "k'" # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE 0x049E "K'" # CYRILLIC CAPITAL LETTER KA WITH STROKE 0x049F "k'" # CYRILLIC SMALL LETTER KA WITH STROKE 0x04A0 "K'" # CYRILLIC CAPITAL LETTER BASHKIR KA 0x04A1 "k'" # CYRILLIC SMALL LETTER BASHKIR KA 0x04A2 "N'" # CYRILLIC CAPITAL LETTER EN WITH DESCENDER 0x04A3 "n'" # CYRILLIC SMALL LETTER EN WITH DESCENDER 0x04A4 Ng # CYRILLIC CAPITAL LIGATURE EN GHE 0x04A5 ng # CYRILLIC SMALL LIGATURE EN GHE 0x04A6 "P'" # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK 0x04A7 "p'" # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK 0x04A8 Kh # CYRILLIC CAPITAL LETTER ABKHASIAN HA 0x04A9 kh # CYRILLIC SMALL LETTER ABKHASIAN HA 0x04AA "S'" # CYRILLIC CAPITAL LETTER ES WITH DESCENDER 0x04AB "s'" # CYRILLIC SMALL LETTER ES WITH DESCENDER 0x04AC "T'" # CYRILLIC CAPITAL LETTER TE WITH DESCENDER 0x04AD "t'" # CYRILLIC SMALL LETTER TE WITH DESCENDER 0x04AE U # CYRILLIC CAPITAL LETTER STRAIGHT U 0x04AF u # CYRILLIC SMALL LETTER STRAIGHT U 0x04B0 "U'" # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE 0x04B1 "u'" # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE 0x04B2 "Kh'" # CYRILLIC CAPITAL LETTER HA WITH DESCENDER 0x04B3 "kh'" # CYRILLIC SMALL LETTER HA WITH DESCENDER 0x04B4 Tts # CYRILLIC CAPITAL LIGATURE TE TSE 0x04B5 tts # CYRILLIC SMALL LIGATURE TE TSE 0x04B6 "Ch'" # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER 0x04B7 "ch'" # CYRILLIC SMALL LETTER CHE WITH DESCENDER 0x04B8 "Ch'" # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE 0x04B9 "ch'" # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE 0x04BA H # CYRILLIC CAPITAL LETTER SHHA 0x04BB h # CYRILLIC SMALL LETTER SHHA 0x04BC Ch # CYRILLIC CAPITAL LETTER ABKHASIAN CHE 0x04BD ch # CYRILLIC SMALL LETTER ABKHASIAN CHE 0x04BE "Ch'" # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER 0x04BF "ch'" # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER 0x04C0 ` # CYRILLIC LETTER PALOCHKA 0x04C1 Zh # CYRILLIC CAPITAL LETTER ZHE WITH BREVE 0x04C2 zh # CYRILLIC SMALL LETTER ZHE WITH BREVE 0x04C3 "K'" # CYRILLIC CAPITAL LETTER KA WITH HOOK 0x04C4 "k'" # CYRILLIC SMALL LETTER KA WITH HOOK 0x04C7 "N'" # CYRILLIC CAPITAL LETTER EN WITH HOOK 0x04C8 "n'" # CYRILLIC SMALL LETTER EN WITH HOOK 0x04CB Ch # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE 0x04CC ch # CYRILLIC SMALL LETTER KHAKASSIAN CHE 0x04D0 a # CYRILLIC CAPITAL LETTER A WITH BREVE 0x04D1 a # CYRILLIC SMALL LETTER A WITH BREVE 0x04D2 A # CYRILLIC CAPITAL LETTER A WITH DIAERESIS 0x04D3 a # CYRILLIC SMALL LETTER A WITH DIAERESIS 0x04D4 Ae # CYRILLIC CAPITAL LIGATURE A IE 0x04D5 ae # CYRILLIC SMALL LIGATURE A IE 0x04D6 Ie # CYRILLIC CAPITAL LETTER IE WITH BREVE 0x04D7 ie # CYRILLIC SMALL LETTER IE WITH BREVE 0x04D8 @ # CYRILLIC CAPITAL LETTER SCHWA 0x04D9 @ # CYRILLIC SMALL LETTER SCHWA 0x04DA @ # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS 0x04DB @ # CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS 0x04DC Zh # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS 0x04DD zh # CYRILLIC SMALL LETTER ZHE WITH DIAERESIS 0x04DE Z # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS 0x04DF z # CYRILLIC SMALL LETTER ZE WITH DIAERESIS 0x04E0 Dz # CYRILLIC CAPITAL LETTER ABKHASIAN DZE 0x04E1 dz # CYRILLIC SMALL LETTER ABKHASIAN DZE 0x04E2 I # CYRILLIC CAPITAL LETTER I WITH MACRON 0x04E3 i # CYRILLIC SMALL LETTER I WITH MACRON 0x04E4 I # CYRILLIC CAPITAL LETTER I WITH DIAERESIS 0x04E5 i # CYRILLIC SMALL LETTER I WITH DIAERESIS 0x04E6 O # CYRILLIC CAPITAL LETTER O WITH DIAERESIS 0x04E7 o # CYRILLIC SMALL LETTER O WITH DIAERESIS 0x04E8 O # CYRILLIC CAPITAL LETTER BARRED O 0x04E9 o # CYRILLIC SMALL LETTER BARRED O 0x04EA O # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS 0x04EB o # CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS 0x04EC E # CYRILLIC CAPITAL LETTER E WITH DIAERESIS 0x04ED e # CYRILLIC SMALL LETTER E WITH DIAERESIS 0x04EE U # CYRILLIC CAPITAL LETTER U WITH MACRON 0x04EF u # CYRILLIC SMALL LETTER U WITH MACRON 0x04F0 U # CYRILLIC CAPITAL LETTER U WITH DIAERESIS 0x04F1 u # CYRILLIC SMALL LETTER U WITH DIAERESIS 0x04F2 U # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE 0x04F3 u # CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE 0x04F4 Ch # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS 0x04F5 ch # CYRILLIC SMALL LETTER CHE WITH DIAERESIS 0x04F8 Y # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS 0x04F9 y # CYRILLIC SMALL LETTER YERU WITH DIAERESIS # # Characters 0x0500 to 0x05FF # 0x0531 A # ARMENIAN CAPITAL LETTER AYB 0x0532 B # ARMENIAN CAPITAL LETTER BEN 0x0533 G # ARMENIAN CAPITAL LETTER GIM 0x0534 D # ARMENIAN CAPITAL LETTER DA 0x0535 E # ARMENIAN CAPITAL LETTER ECH 0x0536 Z # ARMENIAN CAPITAL LETTER ZA 0x0537 E # ARMENIAN CAPITAL LETTER EH 0x0538 E # ARMENIAN CAPITAL LETTER ET 0x0539 T` # ARMENIAN CAPITAL LETTER TO 0x053A Zh # ARMENIAN CAPITAL LETTER ZHE 0x053B I # ARMENIAN CAPITAL LETTER INI 0x053C L # ARMENIAN CAPITAL LETTER LIWN 0x053D Kh # ARMENIAN CAPITAL LETTER XEH 0x053E Ts # ARMENIAN CAPITAL LETTER CA 0x053F K # ARMENIAN CAPITAL LETTER KEN 0x0540 H # ARMENIAN CAPITAL LETTER HO 0x0541 Dz # ARMENIAN CAPITAL LETTER JA 0x0542 Gh # ARMENIAN CAPITAL LETTER GHAD 0x0543 Ch # ARMENIAN CAPITAL LETTER CHEH 0x0544 M # ARMENIAN CAPITAL LETTER MEN 0x0545 Y # ARMENIAN CAPITAL LETTER YI 0x0546 N # ARMENIAN CAPITAL LETTER NOW 0x0547 Sh # ARMENIAN CAPITAL LETTER SHA 0x0548 O # ARMENIAN CAPITAL LETTER VO 0x0549 Ch` # ARMENIAN CAPITAL LETTER CHA 0x054A P # ARMENIAN CAPITAL LETTER PEH 0x054B J # ARMENIAN CAPITAL LETTER JHEH 0x054C Rh # ARMENIAN CAPITAL LETTER RA 0x054D S # ARMENIAN CAPITAL LETTER SEH 0x054E V # ARMENIAN CAPITAL LETTER VEW 0x054F T # ARMENIAN CAPITAL LETTER TIWN 0x0550 R # ARMENIAN CAPITAL LETTER REH 0x0551 Ts` # ARMENIAN CAPITAL LETTER CO 0x0552 W # ARMENIAN CAPITAL LETTER YIWN 0x0553 P` # ARMENIAN CAPITAL LETTER PIWR 0x0554 K` # ARMENIAN CAPITAL LETTER KEH 0x0555 O # ARMENIAN CAPITAL LETTER OH 0x0556 F # ARMENIAN CAPITAL LETTER FEH 0x0559 < # ARMENIAN MODIFIER LETTER LEFT HALF RING 0x055A "'" # ARMENIAN APOSTROPHE 0x055B / # ARMENIAN EMPHASIS MARK 0x055C ! # ARMENIAN EXCLAMATION MARK 0x055D , # ARMENIAN COMMA 0x055E ? # ARMENIAN QUESTION MARK 0x055F . # ARMENIAN ABBREVIATION MARK 0x0561 a # ARMENIAN SMALL LETTER AYB 0x0562 b # ARMENIAN SMALL LETTER BEN 0x0563 g # ARMENIAN SMALL LETTER GIM 0x0564 d # ARMENIAN SMALL LETTER DA 0x0565 e # ARMENIAN SMALL LETTER ECH 0x0566 z # ARMENIAN SMALL LETTER ZA 0x0567 e # ARMENIAN SMALL LETTER EH 0x0568 e # ARMENIAN SMALL LETTER ET 0x0569 t` # ARMENIAN SMALL LETTER TO 0x056A zh # ARMENIAN SMALL LETTER ZHE 0x056B i # ARMENIAN SMALL LETTER INI 0x056C l # ARMENIAN SMALL LETTER LIWN 0x056D kh # ARMENIAN SMALL LETTER XEH 0x056E ts # ARMENIAN SMALL LETTER CA 0x056F k # ARMENIAN SMALL LETTER KEN 0x0570 h # ARMENIAN SMALL LETTER HO 0x0571 dz # ARMENIAN SMALL LETTER JA 0x0572 gh # ARMENIAN SMALL LETTER GHAD 0x0573 ch # ARMENIAN SMALL LETTER CHEH 0x0574 m # ARMENIAN SMALL LETTER MEN 0x0575 y # ARMENIAN SMALL LETTER YI 0x0576 n # ARMENIAN SMALL LETTER NOW 0x0577 sh # ARMENIAN SMALL LETTER SHA 0x0578 o # ARMENIAN SMALL LETTER VO 0x0579 ch` # ARMENIAN SMALL LETTER CHA 0x057A p # ARMENIAN SMALL LETTER PEH 0x057B j # ARMENIAN SMALL LETTER JHEH 0x057C rh # ARMENIAN SMALL LETTER RA 0x057D s # ARMENIAN SMALL LETTER SEH 0x057E v # ARMENIAN SMALL LETTER VEW 0x057F t # ARMENIAN SMALL LETTER TIWN 0x0580 r # ARMENIAN SMALL LETTER REH 0x0581 ts` # ARMENIAN SMALL LETTER CO 0x0582 w # ARMENIAN SMALL LETTER YIWN 0x0583 p` # ARMENIAN SMALL LETTER PIWR 0x0584 k` # ARMENIAN SMALL LETTER KEH 0x0585 o # ARMENIAN SMALL LETTER OH 0x0586 f # ARMENIAN SMALL LETTER FEH 0x0587 ew # ARMENIAN SMALL LIGATURE ECH YIWN 0x0589 . # ARMENIAN FULL STOP 0x058A - # ARMENIAN HYPHEN 0x05B0 @ # HEBREW POINT SHEVA 0x05B1 e # HEBREW POINT HATAF SEGOL 0x05B2 a # HEBREW POINT HATAF PATAH 0x05B3 o # HEBREW POINT HATAF QAMATS 0x05B4 i # HEBREW POINT HIRIQ 0x05B5 e # HEBREW POINT TSERE 0x05B6 e # HEBREW POINT SEGOL 0x05B7 a # HEBREW POINT PATAH 0x05B8 a # HEBREW POINT QAMATS 0x05B9 o # HEBREW POINT HOLAM 0x05BB u # HEBREW POINT QUBUTS 0x05BC "'" # HEBREW POINT DAGESH OR MAPIQ 0x05C0 | # HEBREW PUNCTUATION PASEQ 0x05C3 : # HEBREW PUNCTUATION SOF PASUQ 0x05D1 b # HEBREW LETTER BET 0x05D2 g # HEBREW LETTER GIMEL 0x05D3 d # HEBREW LETTER DALET 0x05D4 h # HEBREW LETTER HE 0x05D5 v # HEBREW LETTER VAV 0x05D6 z # HEBREW LETTER ZAYIN 0x05D7 kh # HEBREW LETTER HET 0x05D8 t # HEBREW LETTER TET 0x05D9 y # HEBREW LETTER YOD 0x05DA k # HEBREW LETTER FINAL KAF 0x05DB k # HEBREW LETTER KAF 0x05DC l # HEBREW LETTER LAMED 0x05DD m # HEBREW LETTER FINAL MEM 0x05DE m # HEBREW LETTER MEM 0x05DF n # HEBREW LETTER FINAL NUN 0x05E0 n # HEBREW LETTER NUN 0x05E1 s # HEBREW LETTER SAMEKH 0x05E2 ` # HEBREW LETTER AYIN 0x05E3 p # HEBREW LETTER FINAL PE 0x05E4 p # HEBREW LETTER PE 0x05E5 ts # HEBREW LETTER FINAL TSADI 0x05E6 ts # HEBREW LETTER TSADI 0x05E7 q # HEBREW LETTER QOF 0x05E8 r # HEBREW LETTER RESH 0x05E9 sh # HEBREW LETTER SHIN 0x05EA t # HEBREW LETTER TAV 0x05F0 V # HEBREW LIGATURE YIDDISH DOUBLE VAV 0x05F1 oy # HEBREW LIGATURE YIDDISH VAV YOD 0x05F2 i # HEBREW LIGATURE YIDDISH DOUBLE YOD 0x05F3 "'" # HEBREW PUNCTUATION GERESH 0x05F4 '"' # HEBREW PUNCTUATION GERSHAYIM # # Characters 0x0600 to 0x06FF # 0x060C , # ARABIC COMMA 0x061B ; # ARABIC SEMICOLON 0x061F ? # ARABIC QUESTION MARK 0x0622 a # ARABIC LETTER ALEF WITH MADDA ABOVE 0x0623 "'" # ARABIC LETTER ALEF WITH HAMZA ABOVE 0x0624 "w'" # ARABIC LETTER WAW WITH HAMZA ABOVE 0x0626 "y'" # ARABIC LETTER YEH WITH HAMZA ABOVE 0x0628 b # ARABIC LETTER BEH 0x0629 @ # ARABIC LETTER TEH MARBUTA 0x062A t # ARABIC LETTER TEH 0x062B th # ARABIC LETTER THEH 0x062C j # ARABIC LETTER JEEM 0x062D H # ARABIC LETTER HAH 0x062E kh # ARABIC LETTER KHAH 0x062F d # ARABIC LETTER DAL 0x0630 dh # ARABIC LETTER THAL 0x0631 r # ARABIC LETTER REH 0x0632 z # ARABIC LETTER ZAIN 0x0633 s # ARABIC LETTER SEEN 0x0634 sh # ARABIC LETTER SHEEN 0x0635 S # ARABIC LETTER SAD 0x0636 D # ARABIC LETTER DAD 0x0637 T # ARABIC LETTER TAH 0x0638 Z # ARABIC LETTER ZAH 0x0639 ` # ARABIC LETTER AIN 0x063A G # ARABIC LETTER GHAIN 0x0641 f # ARABIC LETTER FEH 0x0642 q # ARABIC LETTER QAF 0x0643 k # ARABIC LETTER KAF 0x0644 l # ARABIC LETTER LAM 0x0645 m # ARABIC LETTER MEEM 0x0646 n # ARABIC LETTER NOON 0x0647 h # ARABIC LETTER HEH 0x0648 w # ARABIC LETTER WAW 0x0649 ~ # ARABIC LETTER ALEF MAKSURA 0x064A y # ARABIC LETTER YEH 0x064B an # ARABIC FATHATAN 0x064C un # ARABIC DAMMATAN 0x064D in # ARABIC KASRATAN 0x064E a # ARABIC FATHA 0x064F u # ARABIC DAMMA 0x0650 i # ARABIC KASRA 0x0651 W # ARABIC SHADDA 0x0654 "'" # ARABIC HAMZA ABOVE 0x0655 "'" # ARABIC HAMZA BELOW 0x0660 0 # ARABIC-INDIC DIGIT ZERO 0x0661 1 # ARABIC-INDIC DIGIT ONE 0x0662 2 # ARABIC-INDIC DIGIT TWO 0x0663 3 # ARABIC-INDIC DIGIT THREE 0x0664 4 # ARABIC-INDIC DIGIT FOUR 0x0665 5 # ARABIC-INDIC DIGIT FIVE 0x0666 6 # ARABIC-INDIC DIGIT SIX 0x0667 7 # ARABIC-INDIC DIGIT SEVEN 0x0668 8 # ARABIC-INDIC DIGIT EIGHT 0x0669 9 # ARABIC-INDIC DIGIT NINE 0x066A % # ARABIC PERCENT SIGN 0x066B . # ARABIC DECIMAL SEPARATOR 0x066C , # ARABIC THOUSANDS SEPARATOR 0x066D * # ARABIC FIVE POINTED STAR 0x0671 "'" # ARABIC LETTER ALEF WASLA 0x0672 "'" # ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE 0x0673 "'" # ARABIC LETTER ALEF WITH WAVY HAMZA BELOW 0x0675 "'" # ARABIC LETTER HIGH HAMZA ALEF 0x0676 "'w" # ARABIC LETTER HIGH HAMZA WAW 0x0677 "'u" # ARABIC LETTER U WITH HAMZA ABOVE 0x0678 "'y" # ARABIC LETTER HIGH HAMZA YEH 0x0679 tt # ARABIC LETTER TTEH 0x067A tth # ARABIC LETTER TTEHEH 0x067B b # ARABIC LETTER BEEH 0x067C t # ARABIC LETTER TEH WITH RING 0x067D T # ARABIC LETTER TEH WITH THREE DOTS ABOVE DOWNWARDS 0x067E p # ARABIC LETTER PEH 0x067F th # ARABIC LETTER TEHEH 0x0680 bh # ARABIC LETTER BEHEH 0x0681 "'h" # ARABIC LETTER HAH WITH HAMZA ABOVE 0x0682 H # ARABIC LETTER HAH WITH TWO DOTS VERTICAL ABOVE 0x0683 ny # ARABIC LETTER NYEH 0x0684 dy # ARABIC LETTER DYEH 0x0685 H # ARABIC LETTER HAH WITH THREE DOTS ABOVE 0x0686 ch # ARABIC LETTER TCHEH 0x0687 cch # ARABIC LETTER TCHEHEH 0x0688 dd # ARABIC LETTER DDAL 0x0689 D # ARABIC LETTER DAL WITH RING 0x068A D # ARABIC LETTER DAL WITH DOT BELOW 0x068B Dt # ARABIC LETTER DAL WITH DOT BELOW AND SMALL TAH 0x068C dh # ARABIC LETTER DAHAL 0x068D ddh # ARABIC LETTER DDAHAL 0x068E d # ARABIC LETTER DUL 0x068F D # ARABIC LETTER DAL WITH THREE DOTS ABOVE DOWNWARDS 0x0690 D # ARABIC LETTER DAL WITH FOUR DOTS ABOVE 0x0691 rr # ARABIC LETTER RREH 0x0692 R # ARABIC LETTER REH WITH SMALL V 0x0693 R # ARABIC LETTER REH WITH RING 0x0694 R # ARABIC LETTER REH WITH DOT BELOW 0x0695 R # ARABIC LETTER REH WITH SMALL V BELOW 0x0696 R # ARABIC LETTER REH WITH DOT BELOW AND DOT ABOVE 0x0697 R # ARABIC LETTER REH WITH TWO DOTS ABOVE 0x0698 j # ARABIC LETTER JEH 0x0699 R # ARABIC LETTER REH WITH FOUR DOTS ABOVE 0x069A S # ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE 0x069B S # ARABIC LETTER SEEN WITH THREE DOTS BELOW 0x069C S # ARABIC LETTER SEEN WITH THREE DOTS BELOW AND THREE DOTS ABOVE 0x069D S # ARABIC LETTER SAD WITH TWO DOTS BELOW 0x069E S # ARABIC LETTER SAD WITH THREE DOTS ABOVE 0x069F T # ARABIC LETTER TAH WITH THREE DOTS ABOVE 0x06A0 GH # ARABIC LETTER AIN WITH THREE DOTS ABOVE 0x06A1 F # ARABIC LETTER DOTLESS FEH 0x06A2 F # ARABIC LETTER FEH WITH DOT MOVED BELOW 0x06A3 F # ARABIC LETTER FEH WITH DOT BELOW 0x06A4 v # ARABIC LETTER VEH 0x06A5 f # ARABIC LETTER FEH WITH THREE DOTS BELOW 0x06A6 ph # ARABIC LETTER PEHEH 0x06A7 Q # ARABIC LETTER QAF WITH DOT ABOVE 0x06A8 Q # ARABIC LETTER QAF WITH THREE DOTS ABOVE 0x06A9 kh # ARABIC LETTER KEHEH 0x06AA k # ARABIC LETTER SWASH KAF 0x06AB K # ARABIC LETTER KAF WITH RING 0x06AC K # ARABIC LETTER KAF WITH DOT ABOVE 0x06AD ng # ARABIC LETTER NG 0x06AE K # ARABIC LETTER KAF WITH THREE DOTS BELOW 0x06AF g # ARABIC LETTER GAF 0x06B0 G # ARABIC LETTER GAF WITH RING 0x06B1 N # ARABIC LETTER NGOEH 0x06B2 G # ARABIC LETTER GAF WITH TWO DOTS BELOW 0x06B3 G # ARABIC LETTER GUEH 0x06B4 G # ARABIC LETTER GAF WITH THREE DOTS ABOVE 0x06B5 L # ARABIC LETTER LAM WITH SMALL V 0x06B6 L # ARABIC LETTER LAM WITH DOT ABOVE 0x06B7 L # ARABIC LETTER LAM WITH THREE DOTS ABOVE 0x06B8 L # ARABIC LETTER LAM WITH THREE DOTS BELOW 0x06B9 N # ARABIC LETTER NOON WITH DOT BELOW 0x06BA N # ARABIC LETTER NOON GHUNNA 0x06BB N # ARABIC LETTER RNOON 0x06BC N # ARABIC LETTER NOON WITH RING 0x06BD N # ARABIC LETTER NOON WITH THREE DOTS ABOVE 0x06BE h # ARABIC LETTER HEH DOACHASHMEE 0x06BF Ch # ARABIC LETTER TCHEH WITH DOT ABOVE 0x06C0 hy # ARABIC LETTER HEH WITH YEH ABOVE 0x06C1 h # ARABIC LETTER HEH GOAL 0x06C2 H # ARABIC LETTER HEH GOAL WITH HAMZA ABOVE 0x06C3 @ # ARABIC LETTER TEH MARBUTA GOAL 0x06C4 W # ARABIC LETTER WAW WITH RING 0x06C5 oe # ARABIC LETTER KIRGHIZ OE 0x06C6 oe # ARABIC LETTER OE 0x06C7 u # ARABIC LETTER U 0x06C8 yu # ARABIC LETTER YU 0x06C9 yu # ARABIC LETTER KIRGHIZ YU 0x06CA W # ARABIC LETTER WAW WITH TWO DOTS ABOVE 0x06CB v # ARABIC LETTER VE 0x06CC y # ARABIC LETTER FARSI YEH 0x06CD Y # ARABIC LETTER YEH WITH TAIL 0x06CE Y # ARABIC LETTER YEH WITH SMALL V 0x06CF W # ARABIC LETTER WAW WITH DOT ABOVE 0x06D2 y # ARABIC LETTER YEH BARREE 0x06D3 "y'" # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE 0x06D4 . # ARABIC FULL STOP 0x06D5 ae # ARABIC LETTER AE 0x06DD @ # ARABIC END OF AYAH 0x06DE # # ARABIC START OF RUB EL HIZB 0x06E9 ^ # ARABIC PLACE OF SAJDAH 0x06F0 0 # EXTENDED ARABIC-INDIC DIGIT ZERO 0x06F1 1 # EXTENDED ARABIC-INDIC DIGIT ONE 0x06F2 2 # EXTENDED ARABIC-INDIC DIGIT TWO 0x06F3 3 # EXTENDED ARABIC-INDIC DIGIT THREE 0x06F4 4 # EXTENDED ARABIC-INDIC DIGIT FOUR 0x06F5 5 # EXTENDED ARABIC-INDIC DIGIT FIVE 0x06F6 6 # EXTENDED ARABIC-INDIC DIGIT SIX 0x06F7 7 # EXTENDED ARABIC-INDIC DIGIT SEVEN 0x06F8 8 # EXTENDED ARABIC-INDIC DIGIT EIGHT 0x06F9 9 # EXTENDED ARABIC-INDIC DIGIT NINE 0x06FA Sh # ARABIC LETTER SHEEN WITH DOT BELOW 0x06FB D # ARABIC LETTER DAD WITH DOT BELOW 0x06FC Gh # ARABIC LETTER GHAIN WITH DOT BELOW 0x06FD & # ARABIC SIGN SINDHI AMPERSAND 0x06FE +m # ARABIC SIGN SINDHI POSTPOSITION MEN # # Characters 0x0700 to 0x07FF # 0x0700 // # SYRIAC END OF PARAGRAPH 0x0701 / # SYRIAC SUPRALINEAR FULL STOP 0x0702 , # SYRIAC SUBLINEAR FULL STOP 0x0703 ! # SYRIAC SUPRALINEAR COLON 0x0704 ! # SYRIAC SUBLINEAR COLON 0x0705 - # SYRIAC HORIZONTAL COLON 0x0706 , # SYRIAC COLON SKEWED LEFT 0x0707 , # SYRIAC COLON SKEWED RIGHT 0x0708 ; # SYRIAC SUPRALINEAR COLON SKEWED LEFT 0x070A ~ # SYRIAC CONTRACTION 0x070B { # SYRIAC HARKLEAN OBELUS 0x070C } # SYRIAC HARKLEAN METOBELUS 0x070D * # SYRIAC HARKLEAN ASTERISCUS 0x0710 "'" # SYRIAC LETTER ALAPH 0x0712 b # SYRIAC LETTER BETH 0x0713 g # SYRIAC LETTER GAMAL 0x0714 g # SYRIAC LETTER GAMAL GARSHUNI 0x0715 d # SYRIAC LETTER DALATH 0x0716 d # SYRIAC LETTER DOTLESS DALATH RISH 0x0717 h # SYRIAC LETTER HE 0x0718 w # SYRIAC LETTER WAW 0x0719 z # SYRIAC LETTER ZAIN 0x071A H # SYRIAC LETTER HETH 0x071B t # SYRIAC LETTER TETH 0x071C t # SYRIAC LETTER TETH GARSHUNI 0x071D y # SYRIAC LETTER YUDH 0x071E yh # SYRIAC LETTER YUDH HE 0x071F k # SYRIAC LETTER KAPH 0x0720 l # SYRIAC LETTER LAMADH 0x0721 m # SYRIAC LETTER MIM 0x0722 n # SYRIAC LETTER NUN 0x0723 s # SYRIAC LETTER SEMKATH 0x0724 s # SYRIAC LETTER FINAL SEMKATH 0x0725 ` # SYRIAC LETTER E 0x0726 p # SYRIAC LETTER PE 0x0727 p # SYRIAC LETTER REVERSED PE 0x0728 S # SYRIAC LETTER SADHE 0x0729 q # SYRIAC LETTER QAPH 0x072A r # SYRIAC LETTER RISH 0x072B sh # SYRIAC LETTER SHIN 0x072C t # SYRIAC LETTER TAW 0x0730 a # SYRIAC PTHAHA ABOVE 0x0731 a # SYRIAC PTHAHA BELOW 0x0732 a # SYRIAC PTHAHA DOTTED 0x0733 A # SYRIAC ZQAPHA ABOVE 0x0734 A # SYRIAC ZQAPHA BELOW 0x0735 A # SYRIAC ZQAPHA DOTTED 0x0736 e # SYRIAC RBASA ABOVE 0x0737 e # SYRIAC RBASA BELOW 0x0738 e # SYRIAC DOTTED ZLAMA HORIZONTAL 0x0739 E # SYRIAC DOTTED ZLAMA ANGULAR 0x073A i # SYRIAC HBASA ABOVE 0x073B i # SYRIAC HBASA BELOW 0x073C u # SYRIAC HBASA-ESASA DOTTED 0x073D u # SYRIAC ESASA ABOVE 0x073E u # SYRIAC ESASA BELOW 0x073F o # SYRIAC RWAHA 0x0741 ` # SYRIAC QUSHSHAYA 0x0742 "'" # SYRIAC RUKKAKHA 0x0745 X # SYRIAC THREE DOTS ABOVE 0x0746 Q # SYRIAC THREE DOTS BELOW 0x0747 @ # SYRIAC OBLIQUE LINE ABOVE 0x0748 @ # SYRIAC OBLIQUE LINE BELOW 0x0749 | # SYRIAC MUSIC 0x074A + # SYRIAC BARREKH 0x0780 h # THAANA LETTER HAA 0x0781 sh # THAANA LETTER SHAVIYANI 0x0782 n # THAANA LETTER NOONU 0x0783 r # THAANA LETTER RAA 0x0784 b # THAANA LETTER BAA 0x0785 L # THAANA LETTER LHAVIYANI 0x0786 k # THAANA LETTER KAAFU 0x0787 "'" # THAANA LETTER ALIFU 0x0788 v # THAANA LETTER VAAVU 0x0789 m # THAANA LETTER MEEMU 0x078A f # THAANA LETTER FAAFU 0x078B dh # THAANA LETTER DHAALU 0x078C th # THAANA LETTER THAA 0x078D l # THAANA LETTER LAAMU 0x078E g # THAANA LETTER GAAFU 0x078F ny # THAANA LETTER GNAVIYANI 0x0790 s # THAANA LETTER SEENU 0x0791 d # THAANA LETTER DAVIYANI 0x0792 z # THAANA LETTER ZAVIYANI 0x0793 t # THAANA LETTER TAVIYANI 0x0794 y # THAANA LETTER YAA 0x0795 p # THAANA LETTER PAVIYANI 0x0796 j # THAANA LETTER JAVIYANI 0x0797 ch # THAANA LETTER CHAVIYANI 0x0798 tt # THAANA LETTER TTAA 0x0799 hh # THAANA LETTER HHAA 0x079A kh # THAANA LETTER KHAA 0x079B th # THAANA LETTER THAALU 0x079C z # THAANA LETTER ZAA 0x079D sh # THAANA LETTER SHEENU 0x079E s # THAANA LETTER SAADHU 0x079F d # THAANA LETTER DAADHU 0x07A0 t # THAANA LETTER TO 0x07A1 z # THAANA LETTER ZO 0x07A2 ` # THAANA LETTER AINU 0x07A3 gh # THAANA LETTER GHAINU 0x07A4 q # THAANA LETTER QAAFU 0x07A5 w # THAANA LETTER WAAVU 0x07A6 a # THAANA ABAFILI 0x07A7 aa # THAANA AABAAFILI 0x07A8 i # THAANA IBIFILI 0x07A9 ee # THAANA EEBEEFILI 0x07AA u # THAANA UBUFILI 0x07AB oo # THAANA OOBOOFILI 0x07AC e # THAANA EBEFILI 0x07AD ey # THAANA EYBEYFILI 0x07AE o # THAANA OBOFILI 0x07AF oa # THAANA OABOAFILI # # Characters 0x0800 to 0x08FF # # # Characters 0x0900 to 0x09FF # 0x0901 N # DEVANAGARI SIGN CANDRABINDU 0x0902 N # DEVANAGARI SIGN ANUSVARA 0x0903 H # DEVANAGARI SIGN VISARGA 0x0905 a # DEVANAGARI LETTER A 0x0906 aa # DEVANAGARI LETTER AA 0x0907 i # DEVANAGARI LETTER I 0x0908 ii # DEVANAGARI LETTER II 0x0909 u # DEVANAGARI LETTER U 0x090A uu # DEVANAGARI LETTER UU 0x090B R # DEVANAGARI LETTER VOCALIC R 0x090C L # DEVANAGARI LETTER VOCALIC L 0x090D eN # DEVANAGARI LETTER CANDRA E 0x090E e # DEVANAGARI LETTER SHORT E 0x090F e # DEVANAGARI LETTER E 0x0910 ai # DEVANAGARI LETTER AI 0x0911 oN # DEVANAGARI LETTER CANDRA O 0x0912 o # DEVANAGARI LETTER SHORT O 0x0913 o # DEVANAGARI LETTER O 0x0914 au # DEVANAGARI LETTER AU 0x0915 k # DEVANAGARI LETTER KA 0x0916 kh # DEVANAGARI LETTER KHA 0x0917 g # DEVANAGARI LETTER GA 0x0918 gh # DEVANAGARI LETTER GHA 0x0919 ng # DEVANAGARI LETTER NGA 0x091A c # DEVANAGARI LETTER CA 0x091B ch # DEVANAGARI LETTER CHA 0x091C j # DEVANAGARI LETTER JA 0x091D jh # DEVANAGARI LETTER JHA 0x091E ny # DEVANAGARI LETTER NYA 0x091F tt # DEVANAGARI LETTER TTA 0x0920 tth # DEVANAGARI LETTER TTHA 0x0921 dd # DEVANAGARI LETTER DDA 0x0922 ddh # DEVANAGARI LETTER DDHA 0x0923 nn # DEVANAGARI LETTER NNA 0x0924 t # DEVANAGARI LETTER TA 0x0925 th # DEVANAGARI LETTER THA 0x0926 d # DEVANAGARI LETTER DA 0x0927 dh # DEVANAGARI LETTER DHA 0x0928 n # DEVANAGARI LETTER NA 0x0929 nnn # DEVANAGARI LETTER NNNA 0x092A p # DEVANAGARI LETTER PA 0x092B ph # DEVANAGARI LETTER PHA 0x092C b # DEVANAGARI LETTER BA 0x092D bh # DEVANAGARI LETTER BHA 0x092E m # DEVANAGARI LETTER MA 0x092F y # DEVANAGARI LETTER YA 0x0930 r # DEVANAGARI LETTER RA 0x0931 rr # DEVANAGARI LETTER RRA 0x0932 l # DEVANAGARI LETTER LA 0x0933 l # DEVANAGARI LETTER LLA 0x0934 lll # DEVANAGARI LETTER LLLA 0x0935 v # DEVANAGARI LETTER VA 0x0936 sh # DEVANAGARI LETTER SHA 0x0937 ss # DEVANAGARI LETTER SSA 0x0938 s # DEVANAGARI LETTER SA 0x0939 h # DEVANAGARI LETTER HA 0x093C "'" # DEVANAGARI SIGN NUKTA 0x093D "'" # DEVANAGARI SIGN AVAGRAHA 0x093E aa # DEVANAGARI VOWEL SIGN AA 0x093F i # DEVANAGARI VOWEL SIGN I 0x0940 ii # DEVANAGARI VOWEL SIGN II 0x0941 u # DEVANAGARI VOWEL SIGN U 0x0942 uu # DEVANAGARI VOWEL SIGN UU 0x0943 R # DEVANAGARI VOWEL SIGN VOCALIC R 0x0944 RR # DEVANAGARI VOWEL SIGN VOCALIC RR 0x0945 eN # DEVANAGARI VOWEL SIGN CANDRA E 0x0946 e # DEVANAGARI VOWEL SIGN SHORT E 0x0947 e # DEVANAGARI VOWEL SIGN E 0x0948 ai # DEVANAGARI VOWEL SIGN AI 0x0949 oN # DEVANAGARI VOWEL SIGN CANDRA O 0x094A o # DEVANAGARI VOWEL SIGN SHORT O 0x094B o # DEVANAGARI VOWEL SIGN O 0x094C au # DEVANAGARI VOWEL SIGN AU 0x0950 AUM # DEVANAGARI OM 0x0951 "'" # DEVANAGARI STRESS SIGN UDATTA 0x0952 "'" # DEVANAGARI STRESS SIGN ANUDATTA 0x0953 ` # DEVANAGARI GRAVE ACCENT 0x0954 "'" # DEVANAGARI ACUTE ACCENT 0x0958 q # DEVANAGARI LETTER QA 0x0959 khh # DEVANAGARI LETTER KHHA 0x095A ghh # DEVANAGARI LETTER GHHA 0x095B z # DEVANAGARI LETTER ZA 0x095C dddh # DEVANAGARI LETTER DDDHA 0x095D rh # DEVANAGARI LETTER RHA 0x095E f # DEVANAGARI LETTER FA 0x095F yy # DEVANAGARI LETTER YYA 0x0960 RR # DEVANAGARI LETTER VOCALIC RR 0x0961 LL # DEVANAGARI LETTER VOCALIC LL 0x0962 L # DEVANAGARI VOWEL SIGN VOCALIC L 0x0963 LL # DEVANAGARI VOWEL SIGN VOCALIC LL 0x0964 / # DEVANAGARI DANDA 0x0965 // # DEVANAGARI DOUBLE DANDA 0x0966 0 # DEVANAGARI DIGIT ZERO 0x0967 1 # DEVANAGARI DIGIT ONE 0x0968 2 # DEVANAGARI DIGIT TWO 0x0969 3 # DEVANAGARI DIGIT THREE 0x096A 4 # DEVANAGARI DIGIT FOUR 0x096B 5 # DEVANAGARI DIGIT FIVE 0x096C 6 # DEVANAGARI DIGIT SIX 0x096D 7 # DEVANAGARI DIGIT SEVEN 0x096E 8 # DEVANAGARI DIGIT EIGHT 0x096F 9 # DEVANAGARI DIGIT NINE 0x0970 . # DEVANAGARI ABBREVIATION SIGN 0x0981 N # BENGALI SIGN CANDRABINDU 0x0982 N # BENGALI SIGN ANUSVARA 0x0983 H # BENGALI SIGN VISARGA 0x0985 a # BENGALI LETTER A 0x0986 aa # BENGALI LETTER AA 0x0987 i # BENGALI LETTER I 0x0988 ii # BENGALI LETTER II 0x0989 u # BENGALI LETTER U 0x098A uu # BENGALI LETTER UU 0x098B R # BENGALI LETTER VOCALIC R 0x098C RR # BENGALI LETTER VOCALIC L 0x098F e # BENGALI LETTER E 0x0990 ai # BENGALI LETTER AI 0x0993 o # BENGALI LETTER O 0x0994 au # BENGALI LETTER AU 0x0995 k # BENGALI LETTER KA 0x0996 kh # BENGALI LETTER KHA 0x0997 g # BENGALI LETTER GA 0x0998 gh # BENGALI LETTER GHA 0x0999 ng # BENGALI LETTER NGA 0x099A c # BENGALI LETTER CA 0x099B ch # BENGALI LETTER CHA 0x099C j # BENGALI LETTER JA 0x099D jh # BENGALI LETTER JHA 0x099E ny # BENGALI LETTER NYA 0x099F tt # BENGALI LETTER TTA 0x09A0 tth # BENGALI LETTER TTHA 0x09A1 dd # BENGALI LETTER DDA 0x09A2 ddh # BENGALI LETTER DDHA 0x09A3 nn # BENGALI LETTER NNA 0x09A4 t # BENGALI LETTER TA 0x09A5 th # BENGALI LETTER THA 0x09A6 d # BENGALI LETTER DA 0x09A7 dh # BENGALI LETTER DHA 0x09A8 n # BENGALI LETTER NA 0x09AA p # BENGALI LETTER PA 0x09AB ph # BENGALI LETTER PHA 0x09AC b # BENGALI LETTER BA 0x09AD bh # BENGALI LETTER BHA 0x09AE m # BENGALI LETTER MA 0x09AF y # BENGALI LETTER YA 0x09B0 r # BENGALI LETTER RA 0x09B2 l # BENGALI LETTER LA 0x09B6 sh # BENGALI LETTER SHA 0x09B7 ss # BENGALI LETTER SSA 0x09B8 s # BENGALI LETTER SA 0x09B9 h # BENGALI LETTER HA 0x09BC "'" # BENGALI SIGN NUKTA 0x09BE aa # BENGALI VOWEL SIGN AA 0x09BF i # BENGALI VOWEL SIGN I 0x09C0 ii # BENGALI VOWEL SIGN II 0x09C1 u # BENGALI VOWEL SIGN U 0x09C2 uu # BENGALI VOWEL SIGN UU 0x09C3 R # BENGALI VOWEL SIGN VOCALIC R 0x09C4 RR # BENGALI VOWEL SIGN VOCALIC RR 0x09C7 e # BENGALI VOWEL SIGN E 0x09C8 ai # BENGALI VOWEL SIGN AI 0x09CB o # BENGALI VOWEL SIGN O 0x09CC au # BENGALI VOWEL SIGN AU 0x09D7 + # BENGALI AU LENGTH MARK 0x09DC rr # BENGALI LETTER RRA 0x09DD rh # BENGALI LETTER RHA 0x09DF yy # BENGALI LETTER YYA 0x09E0 RR # BENGALI LETTER VOCALIC RR 0x09E1 LL # BENGALI LETTER VOCALIC LL 0x09E2 L # BENGALI VOWEL SIGN VOCALIC L 0x09E3 LL # BENGALI VOWEL SIGN VOCALIC LL 0x09E6 0 # BENGALI DIGIT ZERO 0x09E7 1 # BENGALI DIGIT ONE 0x09E8 2 # BENGALI DIGIT TWO 0x09E9 3 # BENGALI DIGIT THREE 0x09EA 4 # BENGALI DIGIT FOUR 0x09EB 5 # BENGALI DIGIT FIVE 0x09EC 6 # BENGALI DIGIT SIX 0x09ED 7 # BENGALI DIGIT SEVEN 0x09EE 8 # BENGALI DIGIT EIGHT 0x09EF 9 # BENGALI DIGIT NINE 0x09F0 "r'" # BENGALI LETTER RA WITH MIDDLE DIAGONAL 0x09F1 r` # BENGALI LETTER RA WITH LOWER DIAGONAL 0x09F2 Rs # BENGALI RUPEE MARK 0x09F3 Rs # BENGALI RUPEE SIGN 0x09F4 1/ # BENGALI CURRENCY NUMERATOR ONE 0x09F5 2/ # BENGALI CURRENCY NUMERATOR TWO 0x09F6 3/ # BENGALI CURRENCY NUMERATOR THREE 0x09F7 4/ # BENGALI CURRENCY NUMERATOR FOUR 0x09F8 1 - 1/ # BENGALI CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR 0x09F9 /16 # BENGALI CURRENCY DENOMINATOR SIXTEEN 0x0A02 N # GURMUKHI SIGN BINDI 0x0A05 a # GURMUKHI LETTER A 0x0A06 aa # GURMUKHI LETTER AA 0x0A07 i # GURMUKHI LETTER I 0x0A08 ii # GURMUKHI LETTER II 0x0A09 u # GURMUKHI LETTER U 0x0A0A uu # GURMUKHI LETTER UU 0x0A0F ee # GURMUKHI LETTER EE 0x0A10 ai # GURMUKHI LETTER AI 0x0A13 oo # GURMUKHI LETTER OO 0x0A14 au # GURMUKHI LETTER AU 0x0A15 k # GURMUKHI LETTER KA 0x0A16 kh # GURMUKHI LETTER KHA 0x0A17 g # GURMUKHI LETTER GA 0x0A18 gh # GURMUKHI LETTER GHA 0x0A19 ng # GURMUKHI LETTER NGA 0x0A1A c # GURMUKHI LETTER CA 0x0A1B ch # GURMUKHI LETTER CHA 0x0A1C j # GURMUKHI LETTER JA 0x0A1D jh # GURMUKHI LETTER JHA 0x0A1E ny # GURMUKHI LETTER NYA 0x0A1F tt # GURMUKHI LETTER TTA 0x0A20 tth # GURMUKHI LETTER TTHA 0x0A21 dd # GURMUKHI LETTER DDA 0x0A22 ddh # GURMUKHI LETTER DDHA 0x0A23 nn # GURMUKHI LETTER NNA 0x0A24 t # GURMUKHI LETTER TA 0x0A25 th # GURMUKHI LETTER THA 0x0A26 d # GURMUKHI LETTER DA 0x0A27 dh # GURMUKHI LETTER DHA 0x0A28 n # GURMUKHI LETTER NA 0x0A2A p # GURMUKHI LETTER PA 0x0A2B ph # GURMUKHI LETTER PHA 0x0A2C b # GURMUKHI LETTER BA 0x0A2D bb # GURMUKHI LETTER BHA 0x0A2E m # GURMUKHI LETTER MA 0x0A2F y # GURMUKHI LETTER YA 0x0A30 r # GURMUKHI LETTER RA 0x0A32 l # GURMUKHI LETTER LA 0x0A33 ll # GURMUKHI LETTER LLA 0x0A35 v # GURMUKHI LETTER VA 0x0A36 sh # GURMUKHI LETTER SHA 0x0A38 s # GURMUKHI LETTER SA 0x0A39 h # GURMUKHI LETTER HA 0x0A3C "'" # GURMUKHI SIGN NUKTA 0x0A3E aa # GURMUKHI VOWEL SIGN AA 0x0A3F i # GURMUKHI VOWEL SIGN I 0x0A40 ii # GURMUKHI VOWEL SIGN II 0x0A41 u # GURMUKHI VOWEL SIGN U 0x0A42 uu # GURMUKHI VOWEL SIGN UU 0x0A47 ee # GURMUKHI VOWEL SIGN EE 0x0A48 ai # GURMUKHI VOWEL SIGN AI 0x0A4B oo # GURMUKHI VOWEL SIGN OO 0x0A4C au # GURMUKHI VOWEL SIGN AU 0x0A59 khh # GURMUKHI LETTER KHHA 0x0A5A ghh # GURMUKHI LETTER GHHA 0x0A5B z # GURMUKHI LETTER ZA 0x0A5C rr # GURMUKHI LETTER RRA 0x0A5E f # GURMUKHI LETTER FA 0x0A66 0 # GURMUKHI DIGIT ZERO 0x0A67 1 # GURMUKHI DIGIT ONE 0x0A68 2 # GURMUKHI DIGIT TWO 0x0A69 3 # GURMUKHI DIGIT THREE 0x0A6A 4 # GURMUKHI DIGIT FOUR 0x0A6B 5 # GURMUKHI DIGIT FIVE 0x0A6C 6 # GURMUKHI DIGIT SIX 0x0A6D 7 # GURMUKHI DIGIT SEVEN 0x0A6E 8 # GURMUKHI DIGIT EIGHT 0x0A6F 9 # GURMUKHI DIGIT NINE 0x0A70 N # GURMUKHI TIPPI 0x0A71 H # GURMUKHI ADDAK 0x0A74 G.E.O. # GURMUKHI EK ONKAR 0x0A81 N # GUJARATI SIGN CANDRABINDU 0x0A82 N # GUJARATI SIGN ANUSVARA 0x0A83 H # GUJARATI SIGN VISARGA 0x0A85 a # GUJARATI LETTER A 0x0A86 aa # GUJARATI LETTER AA 0x0A87 i # GUJARATI LETTER I 0x0A88 ii # GUJARATI LETTER II 0x0A89 u # GUJARATI LETTER U 0x0A8A uu # GUJARATI LETTER UU 0x0A8B R # GUJARATI LETTER VOCALIC R 0x0A8D eN # GUJARATI VOWEL CANDRA E 0x0A8F e # GUJARATI LETTER E 0x0A90 ai # GUJARATI LETTER AI 0x0A91 oN # GUJARATI VOWEL CANDRA O 0x0A93 o # GUJARATI LETTER O 0x0A94 au # GUJARATI LETTER AU 0x0A95 k # GUJARATI LETTER KA 0x0A96 kh # GUJARATI LETTER KHA 0x0A97 g # GUJARATI LETTER GA 0x0A98 gh # GUJARATI LETTER GHA 0x0A99 ng # GUJARATI LETTER NGA 0x0A9A c # GUJARATI LETTER CA 0x0A9B ch # GUJARATI LETTER CHA 0x0A9C j # GUJARATI LETTER JA 0x0A9D jh # GUJARATI LETTER JHA 0x0A9E ny # GUJARATI LETTER NYA 0x0A9F tt # GUJARATI LETTER TTA 0x0AA0 tth # GUJARATI LETTER TTHA 0x0AA1 dd # GUJARATI LETTER DDA 0x0AA2 ddh # GUJARATI LETTER DDHA 0x0AA3 nn # GUJARATI LETTER NNA 0x0AA4 t # GUJARATI LETTER TA 0x0AA5 th # GUJARATI LETTER THA 0x0AA6 d # GUJARATI LETTER DA 0x0AA7 dh # GUJARATI LETTER DHA 0x0AA8 n # GUJARATI LETTER NA 0x0AAA p # GUJARATI LETTER PA 0x0AAB ph # GUJARATI LETTER PHA 0x0AAC b # GUJARATI LETTER BA 0x0AAD bh # GUJARATI LETTER BHA 0x0AAE m # GUJARATI LETTER MA 0x0AAF ya # GUJARATI LETTER YA 0x0AB0 r # GUJARATI LETTER RA 0x0AB2 l # GUJARATI LETTER LA 0x0AB3 ll # GUJARATI LETTER LLA 0x0AB5 v # GUJARATI LETTER VA 0x0AB6 sh # GUJARATI LETTER SHA 0x0AB7 ss # GUJARATI LETTER SSA 0x0AB8 s # GUJARATI LETTER SA 0x0AB9 h # GUJARATI LETTER HA 0x0ABC "'" # GUJARATI SIGN NUKTA 0x0ABD "'" # GUJARATI SIGN AVAGRAHA 0x0ABE aa # GUJARATI VOWEL SIGN AA 0x0ABF i # GUJARATI VOWEL SIGN I 0x0AC0 ii # GUJARATI VOWEL SIGN II 0x0AC1 u # GUJARATI VOWEL SIGN U 0x0AC2 uu # GUJARATI VOWEL SIGN UU 0x0AC3 R # GUJARATI VOWEL SIGN VOCALIC R 0x0AC4 RR # GUJARATI VOWEL SIGN VOCALIC RR 0x0AC5 eN # GUJARATI VOWEL SIGN CANDRA E 0x0AC7 e # GUJARATI VOWEL SIGN E 0x0AC8 ai # GUJARATI VOWEL SIGN AI 0x0AC9 oN # GUJARATI VOWEL SIGN CANDRA O 0x0ACB o # GUJARATI VOWEL SIGN O 0x0ACC au # GUJARATI VOWEL SIGN AU 0x0AD0 AUM # GUJARATI OM 0x0AE0 RR # GUJARATI LETTER VOCALIC RR 0x0AE6 0 # GUJARATI DIGIT ZERO 0x0AE7 1 # GUJARATI DIGIT ONE 0x0AE8 2 # GUJARATI DIGIT TWO 0x0AE9 3 # GUJARATI DIGIT THREE 0x0AEA 4 # GUJARATI DIGIT FOUR 0x0AEB 5 # GUJARATI DIGIT FIVE 0x0AEC 6 # GUJARATI DIGIT SIX 0x0AED 7 # GUJARATI DIGIT SEVEN 0x0AEE 8 # GUJARATI DIGIT EIGHT 0x0AEF 9 # GUJARATI DIGIT NINE 0x0B01 N # ORIYA SIGN CANDRABINDU 0x0B02 N # ORIYA SIGN ANUSVARA 0x0B03 H # ORIYA SIGN VISARGA 0x0B05 a # ORIYA LETTER A 0x0B06 aa # ORIYA LETTER AA 0x0B07 i # ORIYA LETTER I 0x0B08 ii # ORIYA LETTER II 0x0B09 u # ORIYA LETTER U 0x0B0A uu # ORIYA LETTER UU 0x0B0B R # ORIYA LETTER VOCALIC R 0x0B0C L # ORIYA LETTER VOCALIC L 0x0B0F e # ORIYA LETTER E 0x0B10 ai # ORIYA LETTER AI 0x0B13 o # ORIYA LETTER O 0x0B14 au # ORIYA LETTER AU 0x0B15 k # ORIYA LETTER KA 0x0B16 kh # ORIYA LETTER KHA 0x0B17 g # ORIYA LETTER GA 0x0B18 gh # ORIYA LETTER GHA 0x0B19 ng # ORIYA LETTER NGA 0x0B1A c # ORIYA LETTER CA 0x0B1B ch # ORIYA LETTER CHA 0x0B1C j # ORIYA LETTER JA 0x0B1D jh # ORIYA LETTER JHA 0x0B1E ny # ORIYA LETTER NYA 0x0B1F tt # ORIYA LETTER TTA 0x0B20 tth # ORIYA LETTER TTHA 0x0B21 dd # ORIYA LETTER DDA 0x0B22 ddh # ORIYA LETTER DDHA 0x0B23 nn # ORIYA LETTER NNA 0x0B24 t # ORIYA LETTER TA 0x0B25 th # ORIYA LETTER THA 0x0B26 d # ORIYA LETTER DA 0x0B27 dh # ORIYA LETTER DHA 0x0B28 n # ORIYA LETTER NA 0x0B2A p # ORIYA LETTER PA 0x0B2B ph # ORIYA LETTER PHA 0x0B2C b # ORIYA LETTER BA 0x0B2D bh # ORIYA LETTER BHA 0x0B2E m # ORIYA LETTER MA 0x0B2F y # ORIYA LETTER YA 0x0B30 r # ORIYA LETTER RA 0x0B32 l # ORIYA LETTER LA 0x0B33 ll # ORIYA LETTER LLA 0x0B36 sh # ORIYA LETTER SHA 0x0B37 ss # ORIYA LETTER SSA 0x0B38 s # ORIYA LETTER SA 0x0B39 h # ORIYA LETTER HA 0x0B3C "'" # ORIYA SIGN NUKTA 0x0B3D "'" # ORIYA SIGN AVAGRAHA 0x0B3E aa # ORIYA VOWEL SIGN AA 0x0B3F i # ORIYA VOWEL SIGN I 0x0B40 ii # ORIYA VOWEL SIGN II 0x0B41 u # ORIYA VOWEL SIGN U 0x0B42 uu # ORIYA VOWEL SIGN UU 0x0B43 R # ORIYA VOWEL SIGN VOCALIC R 0x0B47 e # ORIYA VOWEL SIGN E 0x0B48 ai # ORIYA VOWEL SIGN AI 0x0B4B o # ORIYA VOWEL SIGN O 0x0B4C au # ORIYA VOWEL SIGN AU 0x0B56 + # ORIYA AI LENGTH MARK 0x0B57 + # ORIYA AU LENGTH MARK 0x0B5C rr # ORIYA LETTER RRA 0x0B5D rh # ORIYA LETTER RHA 0x0B5F yy # ORIYA LETTER YYA 0x0B60 RR # ORIYA LETTER VOCALIC RR 0x0B61 LL # ORIYA LETTER VOCALIC LL 0x0B66 0 # ORIYA DIGIT ZERO 0x0B67 1 # ORIYA DIGIT ONE 0x0B68 2 # ORIYA DIGIT TWO 0x0B69 3 # ORIYA DIGIT THREE 0x0B6A 4 # ORIYA DIGIT FOUR 0x0B6B 5 # ORIYA DIGIT FIVE 0x0B6C 6 # ORIYA DIGIT SIX 0x0B6D 7 # ORIYA DIGIT SEVEN 0x0B6E 8 # ORIYA DIGIT EIGHT 0x0B6F 9 # ORIYA DIGIT NINE 0x0B82 N # TAMIL SIGN ANUSVARA 0x0B83 H # TAMIL SIGN VISARGA 0x0B85 a # TAMIL LETTER A 0x0B86 aa # TAMIL LETTER AA 0x0B87 i # TAMIL LETTER I 0x0B88 ii # TAMIL LETTER II 0x0B89 u # TAMIL LETTER U 0x0B8A uu # TAMIL LETTER UU 0x0B8E e # TAMIL LETTER E 0x0B8F ee # TAMIL LETTER EE 0x0B90 ai # TAMIL LETTER AI 0x0B92 o # TAMIL LETTER O 0x0B93 oo # TAMIL LETTER OO 0x0B94 au # TAMIL LETTER AU 0x0B95 k # TAMIL LETTER KA 0x0B99 ng # TAMIL LETTER NGA 0x0B9A c # TAMIL LETTER CA 0x0B9C j # TAMIL LETTER JA 0x0B9E ny # TAMIL LETTER NYA 0x0B9F tt # TAMIL LETTER TTA 0x0BA3 nn # TAMIL LETTER NNA 0x0BA4 t # TAMIL LETTER TA 0x0BA8 n # TAMIL LETTER NA 0x0BA9 nnn # TAMIL LETTER NNNA 0x0BAA p # TAMIL LETTER PA 0x0BAE m # TAMIL LETTER MA 0x0BAF y # TAMIL LETTER YA 0x0BB0 r # TAMIL LETTER RA 0x0BB1 rr # TAMIL LETTER RRA 0x0BB2 l # TAMIL LETTER LA 0x0BB3 ll # TAMIL LETTER LLA 0x0BB4 lll # TAMIL LETTER LLLA 0x0BB5 v # TAMIL LETTER VA 0x0BB7 ss # TAMIL LETTER SSA 0x0BB8 s # TAMIL LETTER SA 0x0BB9 h # TAMIL LETTER HA 0x0BBE aa # TAMIL VOWEL SIGN AA 0x0BBF i # TAMIL VOWEL SIGN I 0x0BC0 ii # TAMIL VOWEL SIGN II 0x0BC1 u # TAMIL VOWEL SIGN U 0x0BC2 uu # TAMIL VOWEL SIGN UU 0x0BC6 e # TAMIL VOWEL SIGN E 0x0BC7 ee # TAMIL VOWEL SIGN EE 0x0BC8 ai # TAMIL VOWEL SIGN AI 0x0BCA o # TAMIL VOWEL SIGN O 0x0BCB oo # TAMIL VOWEL SIGN OO 0x0BCC au # TAMIL VOWEL SIGN AU 0x0BD7 + # TAMIL AU LENGTH MARK 0x0BE6 0 # TAMIL DIGIT ZERO 0x0BE7 1 # TAMIL DIGIT ONE 0x0BE8 2 # TAMIL DIGIT TWO 0x0BE9 3 # TAMIL DIGIT THREE 0x0BEA 4 # TAMIL DIGIT FOUR 0x0BEB 5 # TAMIL DIGIT FIVE 0x0BEC 6 # TAMIL DIGIT SIX 0x0BED 7 # TAMIL DIGIT SEVEN 0x0BEE 8 # TAMIL DIGIT EIGHT 0x0BEF 9 # TAMIL DIGIT NINE 0x0BF0 +10+ # TAMIL NUMBER TEN 0x0BF1 +100+ # TAMIL NUMBER ONE HUNDRED 0x0BF2 +1000+ # TAMIL NUMBER ONE THOUSAND 0x0C01 N # TELUGU SIGN CANDRABINDU 0x0C02 N # TELUGU SIGN ANUSVARA 0x0C03 H # TELUGU SIGN VISARGA 0x0C05 a # TELUGU LETTER A 0x0C06 aa # TELUGU LETTER AA 0x0C07 i # TELUGU LETTER I 0x0C08 ii # TELUGU LETTER II 0x0C09 u # TELUGU LETTER U 0x0C0A uu # TELUGU LETTER UU 0x0C0B R # TELUGU LETTER VOCALIC R 0x0C0C L # TELUGU LETTER VOCALIC L 0x0C0E e # TELUGU LETTER E 0x0C0F ee # TELUGU LETTER EE 0x0C10 ai # TELUGU LETTER AI 0x0C12 o # TELUGU LETTER O 0x0C13 oo # TELUGU LETTER OO 0x0C14 au # TELUGU LETTER AU 0x0C15 k # TELUGU LETTER KA 0x0C16 kh # TELUGU LETTER KHA 0x0C17 g # TELUGU LETTER GA 0x0C18 gh # TELUGU LETTER GHA 0x0C19 ng # TELUGU LETTER NGA 0x0C1A c # TELUGU LETTER CA 0x0C1B ch # TELUGU LETTER CHA 0x0C1C j # TELUGU LETTER JA 0x0C1D jh # TELUGU LETTER JHA 0x0C1E ny # TELUGU LETTER NYA 0x0C1F tt # TELUGU LETTER TTA 0x0C20 tth # TELUGU LETTER TTHA 0x0C21 dd # TELUGU LETTER DDA 0x0C22 ddh # TELUGU LETTER DDHA 0x0C23 nn # TELUGU LETTER NNA 0x0C24 t # TELUGU LETTER TA 0x0C25 th # TELUGU LETTER THA 0x0C26 d # TELUGU LETTER DA 0x0C27 dh # TELUGU LETTER DHA 0x0C28 n # TELUGU LETTER NA 0x0C2A p # TELUGU LETTER PA 0x0C2B ph # TELUGU LETTER PHA 0x0C2C b # TELUGU LETTER BA 0x0C2D bh # TELUGU LETTER BHA 0x0C2E m # TELUGU LETTER MA 0x0C2F y # TELUGU LETTER YA 0x0C30 r # TELUGU LETTER RA 0x0C31 rr # TELUGU LETTER RRA 0x0C32 l # TELUGU LETTER LA 0x0C33 ll # TELUGU LETTER LLA 0x0C35 v # TELUGU LETTER VA 0x0C36 sh # TELUGU LETTER SHA 0x0C37 ss # TELUGU LETTER SSA 0x0C38 s # TELUGU LETTER SA 0x0C39 h # TELUGU LETTER HA 0x0C3E aa # TELUGU VOWEL SIGN AA 0x0C3F i # TELUGU VOWEL SIGN I 0x0C40 ii # TELUGU VOWEL SIGN II 0x0C41 u # TELUGU VOWEL SIGN U 0x0C42 uu # TELUGU VOWEL SIGN UU 0x0C43 R # TELUGU VOWEL SIGN VOCALIC R 0x0C44 RR # TELUGU VOWEL SIGN VOCALIC RR 0x0C46 e # TELUGU VOWEL SIGN E 0x0C47 ee # TELUGU VOWEL SIGN EE 0x0C48 ai # TELUGU VOWEL SIGN AI 0x0C4A o # TELUGU VOWEL SIGN O 0x0C4B oo # TELUGU VOWEL SIGN OO 0x0C4C au # TELUGU VOWEL SIGN AU 0x0C55 + # TELUGU LENGTH MARK 0x0C56 + # TELUGU AI LENGTH MARK 0x0C60 RR # TELUGU LETTER VOCALIC RR 0x0C61 LL # TELUGU LETTER VOCALIC LL 0x0C66 0 # TELUGU DIGIT ZERO 0x0C67 1 # TELUGU DIGIT ONE 0x0C68 2 # TELUGU DIGIT TWO 0x0C69 3 # TELUGU DIGIT THREE 0x0C6A 4 # TELUGU DIGIT FOUR 0x0C6B 5 # TELUGU DIGIT FIVE 0x0C6C 6 # TELUGU DIGIT SIX 0x0C6D 7 # TELUGU DIGIT SEVEN 0x0C6E 8 # TELUGU DIGIT EIGHT 0x0C6F 9 # TELUGU DIGIT NINE 0x0C82 N # KANNADA SIGN ANUSVARA 0x0C83 H # KANNADA SIGN VISARGA 0x0C85 a # KANNADA LETTER A 0x0C86 aa # KANNADA LETTER AA 0x0C87 i # KANNADA LETTER I 0x0C88 ii # KANNADA LETTER II 0x0C89 u # KANNADA LETTER U 0x0C8A uu # KANNADA LETTER UU 0x0C8B R # KANNADA LETTER VOCALIC R 0x0C8C L # KANNADA LETTER VOCALIC L 0x0C8E e # KANNADA LETTER E 0x0C8F ee # KANNADA LETTER EE 0x0C90 ai # KANNADA LETTER AI 0x0C92 o # KANNADA LETTER O 0x0C93 oo # KANNADA LETTER OO 0x0C94 au # KANNADA LETTER AU 0x0C95 k # KANNADA LETTER KA 0x0C96 kh # KANNADA LETTER KHA 0x0C97 g # KANNADA LETTER GA 0x0C98 gh # KANNADA LETTER GHA 0x0C99 ng # KANNADA LETTER NGA 0x0C9A c # KANNADA LETTER CA 0x0C9B ch # KANNADA LETTER CHA 0x0C9C j # KANNADA LETTER JA 0x0C9D jh # KANNADA LETTER JHA 0x0C9E ny # KANNADA LETTER NYA 0x0C9F tt # KANNADA LETTER TTA 0x0CA0 tth # KANNADA LETTER TTHA 0x0CA1 dd # KANNADA LETTER DDA 0x0CA2 ddh # KANNADA LETTER DDHA 0x0CA3 nn # KANNADA LETTER NNA 0x0CA4 t # KANNADA LETTER TA 0x0CA5 th # KANNADA LETTER THA 0x0CA6 d # KANNADA LETTER DA 0x0CA7 dh # KANNADA LETTER DHA 0x0CA8 n # KANNADA LETTER NA 0x0CAA p # KANNADA LETTER PA 0x0CAB ph # KANNADA LETTER PHA 0x0CAC b # KANNADA LETTER BA 0x0CAD bh # KANNADA LETTER BHA 0x0CAE m # KANNADA LETTER MA 0x0CAF y # KANNADA LETTER YA 0x0CB0 r # KANNADA LETTER RA 0x0CB1 rr # KANNADA LETTER RRA 0x0CB2 l # KANNADA LETTER LA 0x0CB3 ll # KANNADA LETTER LLA 0x0CB5 v # KANNADA LETTER VA 0x0CB6 sh # KANNADA LETTER SHA 0x0CB7 ss # KANNADA LETTER SSA 0x0CB8 s # KANNADA LETTER SA 0x0CB9 h # KANNADA LETTER HA 0x0CBE aa # KANNADA VOWEL SIGN AA 0x0CBF i # KANNADA VOWEL SIGN I 0x0CC0 ii # KANNADA VOWEL SIGN II 0x0CC1 u # KANNADA VOWEL SIGN U 0x0CC2 uu # KANNADA VOWEL SIGN UU 0x0CC3 R # KANNADA VOWEL SIGN VOCALIC R 0x0CC4 RR # KANNADA VOWEL SIGN VOCALIC RR 0x0CC6 e # KANNADA VOWEL SIGN E 0x0CC7 ee # KANNADA VOWEL SIGN EE 0x0CC8 ai # KANNADA VOWEL SIGN AI 0x0CCA o # KANNADA VOWEL SIGN O 0x0CCB oo # KANNADA VOWEL SIGN OO 0x0CCC au # KANNADA VOWEL SIGN AU 0x0CD5 + # KANNADA LENGTH MARK 0x0CD6 + # KANNADA AI LENGTH MARK 0x0CDE lll # KANNADA LETTER FA 0x0CE0 RR # KANNADA LETTER VOCALIC RR 0x0CE1 LL # KANNADA LETTER VOCALIC LL 0x0CE6 0 # KANNADA DIGIT ZERO 0x0CE7 1 # KANNADA DIGIT ONE 0x0CE8 2 # KANNADA DIGIT TWO 0x0CE9 3 # KANNADA DIGIT THREE 0x0CEA 4 # KANNADA DIGIT FOUR 0x0CEB 5 # KANNADA DIGIT FIVE 0x0CEC 6 # KANNADA DIGIT SIX 0x0CED 7 # KANNADA DIGIT SEVEN 0x0CEE 8 # KANNADA DIGIT EIGHT 0x0CEF 9 # KANNADA DIGIT NINE 0x0D02 N # MALAYALAM SIGN ANUSVARA 0x0D03 H # MALAYALAM SIGN VISARGA 0x0D05 a # MALAYALAM LETTER A 0x0D06 aa # MALAYALAM LETTER AA 0x0D07 i # MALAYALAM LETTER I 0x0D08 ii # MALAYALAM LETTER II 0x0D09 u # MALAYALAM LETTER U 0x0D0A uu # MALAYALAM LETTER UU 0x0D0B R # MALAYALAM LETTER VOCALIC R 0x0D0C L # MALAYALAM LETTER VOCALIC L 0x0D0E e # MALAYALAM LETTER E 0x0D0F ee # MALAYALAM LETTER EE 0x0D10 ai # MALAYALAM LETTER AI 0x0D12 o # MALAYALAM LETTER O 0x0D13 oo # MALAYALAM LETTER OO 0x0D14 au # MALAYALAM LETTER AU 0x0D15 k # MALAYALAM LETTER KA 0x0D16 kh # MALAYALAM LETTER KHA 0x0D17 g # MALAYALAM LETTER GA 0x0D18 gh # MALAYALAM LETTER GHA 0x0D19 ng # MALAYALAM LETTER NGA 0x0D1A c # MALAYALAM LETTER CA 0x0D1B ch # MALAYALAM LETTER CHA 0x0D1C j # MALAYALAM LETTER JA 0x0D1D jh # MALAYALAM LETTER JHA 0x0D1E ny # MALAYALAM LETTER NYA 0x0D1F tt # MALAYALAM LETTER TTA 0x0D20 tth # MALAYALAM LETTER TTHA 0x0D21 dd # MALAYALAM LETTER DDA 0x0D22 ddh # MALAYALAM LETTER DDHA 0x0D23 nn # MALAYALAM LETTER NNA 0x0D24 t # MALAYALAM LETTER TA 0x0D25 th # MALAYALAM LETTER THA 0x0D26 d # MALAYALAM LETTER DA 0x0D27 dh # MALAYALAM LETTER DHA 0x0D28 n # MALAYALAM LETTER NA 0x0D2A p # MALAYALAM LETTER PA 0x0D2B ph # MALAYALAM LETTER PHA 0x0D2C b # MALAYALAM LETTER BA 0x0D2D bh # MALAYALAM LETTER BHA 0x0D2E m # MALAYALAM LETTER MA 0x0D2F y # MALAYALAM LETTER YA 0x0D30 r # MALAYALAM LETTER RA 0x0D31 rr # MALAYALAM LETTER RRA 0x0D32 l # MALAYALAM LETTER LA 0x0D33 ll # MALAYALAM LETTER LLA 0x0D34 lll # MALAYALAM LETTER LLLA 0x0D35 v # MALAYALAM LETTER VA 0x0D36 sh # MALAYALAM LETTER SHA 0x0D37 ss # MALAYALAM LETTER SSA 0x0D38 s # MALAYALAM LETTER SA 0x0D39 h # MALAYALAM LETTER HA 0x0D3E aa # MALAYALAM VOWEL SIGN AA 0x0D3F i # MALAYALAM VOWEL SIGN I 0x0D40 ii # MALAYALAM VOWEL SIGN II 0x0D41 u # MALAYALAM VOWEL SIGN U 0x0D42 uu # MALAYALAM VOWEL SIGN UU 0x0D43 R # MALAYALAM VOWEL SIGN VOCALIC R 0x0D46 e # MALAYALAM VOWEL SIGN E 0x0D47 ee # MALAYALAM VOWEL SIGN EE 0x0D48 ai # MALAYALAM VOWEL SIGN AI 0x0D4A o # MALAYALAM VOWEL SIGN O 0x0D4B oo # MALAYALAM VOWEL SIGN OO 0x0D4C au # MALAYALAM VOWEL SIGN AU 0x0D57 + # MALAYALAM AU LENGTH MARK 0x0D60 RR # MALAYALAM LETTER VOCALIC RR 0x0D61 LL # MALAYALAM LETTER VOCALIC LL 0x0D66 0 # MALAYALAM DIGIT ZERO 0x0D67 1 # MALAYALAM DIGIT ONE 0x0D68 2 # MALAYALAM DIGIT TWO 0x0D69 3 # MALAYALAM DIGIT THREE 0x0D6A 4 # MALAYALAM DIGIT FOUR 0x0D6B 5 # MALAYALAM DIGIT FIVE 0x0D6C 6 # MALAYALAM DIGIT SIX 0x0D6D 7 # MALAYALAM DIGIT SEVEN 0x0D6E 8 # MALAYALAM DIGIT EIGHT 0x0D6F 9 # MALAYALAM DIGIT NINE 0x0D82 N # SINHALA SIGN ANUSVARAYA 0x0D83 H # SINHALA SIGN VISARGAYA 0x0D85 a # SINHALA LETTER AYANNA 0x0D86 aa # SINHALA LETTER AAYANNA 0x0D87 ae # SINHALA LETTER AEYANNA 0x0D88 aae # SINHALA LETTER AEEYANNA 0x0D89 i # SINHALA LETTER IYANNA 0x0D8A ii # SINHALA LETTER IIYANNA 0x0D8B u # SINHALA LETTER UYANNA 0x0D8C uu # SINHALA LETTER UUYANNA 0x0D8D R # SINHALA LETTER IRUYANNA 0x0D8E RR # SINHALA LETTER IRUUYANNA 0x0D8F L # SINHALA LETTER ILUYANNA 0x0D90 LL # SINHALA LETTER ILUUYANNA 0x0D91 e # SINHALA LETTER EYANNA 0x0D92 ee # SINHALA LETTER EEYANNA 0x0D93 ai # SINHALA LETTER AIYANNA 0x0D94 o # SINHALA LETTER OYANNA 0x0D95 oo # SINHALA LETTER OOYANNA 0x0D96 au # SINHALA LETTER AUYANNA 0x0D9A k # SINHALA LETTER ALPAPRAANA KAYANNA 0x0D9B kh # SINHALA LETTER MAHAAPRAANA KAYANNA 0x0D9C g # SINHALA LETTER ALPAPRAANA GAYANNA 0x0D9D gh # SINHALA LETTER MAHAAPRAANA GAYANNA 0x0D9E ng # SINHALA LETTER KANTAJA NAASIKYAYA 0x0D9F nng # SINHALA LETTER SANYAKA GAYANNA 0x0DA0 c # SINHALA LETTER ALPAPRAANA CAYANNA 0x0DA1 ch # SINHALA LETTER MAHAAPRAANA CAYANNA 0x0DA2 j # SINHALA LETTER ALPAPRAANA JAYANNA 0x0DA3 jh # SINHALA LETTER MAHAAPRAANA JAYANNA 0x0DA4 ny # SINHALA LETTER TAALUJA NAASIKYAYA 0x0DA5 jny # SINHALA LETTER TAALUJA SANYOOGA NAAKSIKYAYA 0x0DA6 nyj # SINHALA LETTER SANYAKA JAYANNA 0x0DA7 tt # SINHALA LETTER ALPAPRAANA TTAYANNA 0x0DA8 tth # SINHALA LETTER MAHAAPRAANA TTAYANNA 0x0DA9 dd # SINHALA LETTER ALPAPRAANA DDAYANNA 0x0DAA ddh # SINHALA LETTER MAHAAPRAANA DDAYANNA 0x0DAB nn # SINHALA LETTER MUURDHAJA NAYANNA 0x0DAC nndd # SINHALA LETTER SANYAKA DDAYANNA 0x0DAD t # SINHALA LETTER ALPAPRAANA TAYANNA 0x0DAE th # SINHALA LETTER MAHAAPRAANA TAYANNA 0x0DAF d # SINHALA LETTER ALPAPRAANA DAYANNA 0x0DB0 dh # SINHALA LETTER MAHAAPRAANA DAYANNA 0x0DB1 n # SINHALA LETTER DANTAJA NAYANNA 0x0DB3 nd # SINHALA LETTER SANYAKA DAYANNA 0x0DB4 p # SINHALA LETTER ALPAPRAANA PAYANNA 0x0DB5 ph # SINHALA LETTER MAHAAPRAANA PAYANNA 0x0DB6 b # SINHALA LETTER ALPAPRAANA BAYANNA 0x0DB7 bh # SINHALA LETTER MAHAAPRAANA BAYANNA 0x0DB8 m # SINHALA LETTER MAYANNA 0x0DB9 mb # SINHALA LETTER AMBA BAYANNA 0x0DBA y # SINHALA LETTER YAYANNA 0x0DBB r # SINHALA LETTER RAYANNA 0x0DBD l # SINHALA LETTER DANTAJA LAYANNA 0x0DC0 v # SINHALA LETTER VAYANNA 0x0DC1 sh # SINHALA LETTER TAALUJA SAYANNA 0x0DC2 ss # SINHALA LETTER MUURDHAJA SAYANNA 0x0DC3 s # SINHALA LETTER DANTAJA SAYANNA 0x0DC4 h # SINHALA LETTER HAYANNA 0x0DC5 ll # SINHALA LETTER MUURDHAJA LAYANNA 0x0DC6 f # SINHALA LETTER FAYANNA 0x0DCF aa # SINHALA VOWEL SIGN AELA-PILLA 0x0DD0 ae # SINHALA VOWEL SIGN KETTI AEDA-PILLA 0x0DD1 aae # SINHALA VOWEL SIGN DIGA AEDA-PILLA 0x0DD2 i # SINHALA VOWEL SIGN KETTI IS-PILLA 0x0DD3 ii # SINHALA VOWEL SIGN DIGA IS-PILLA 0x0DD4 u # SINHALA VOWEL SIGN KETTI PAA-PILLA 0x0DD6 uu # SINHALA VOWEL SIGN DIGA PAA-PILLA 0x0DD8 R # SINHALA VOWEL SIGN GAETTA-PILLA 0x0DD9 e # SINHALA VOWEL SIGN KOMBUVA 0x0DDA ee # SINHALA VOWEL SIGN DIGA KOMBUVA 0x0DDB ai # SINHALA VOWEL SIGN KOMBU DEKA 0x0DDC o # SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA 0x0DDD oo # SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA 0x0DDE au # SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA 0x0DDF L # SINHALA VOWEL SIGN GAYANUKITTA 0x0DF2 RR # SINHALA VOWEL SIGN DIGA GAETTA-PILLA 0x0DF3 LL # SINHALA VOWEL SIGN DIGA GAYANUKITTA 0x0DF4 . # SINHALA PUNCTUATION KUNDDALIYA 0x0E01 k # THAI CHARACTER KO KAI 0x0E02 kh # THAI CHARACTER KHO KHAI 0x0E03 kh # THAI CHARACTER KHO KHUAT 0x0E04 kh # THAI CHARACTER KHO KHWAI 0x0E05 kh # THAI CHARACTER KHO KHON 0x0E06 kh # THAI CHARACTER KHO RAKHANG 0x0E07 ng # THAI CHARACTER NGO NGU 0x0E08 cch # THAI CHARACTER CHO CHAN 0x0E09 ch # THAI CHARACTER CHO CHING 0x0E0A ch # THAI CHARACTER CHO CHANG 0x0E0B ch # THAI CHARACTER SO SO 0x0E0C ch # THAI CHARACTER CHO CHOE 0x0E0D y # THAI CHARACTER YO YING 0x0E0E d # THAI CHARACTER DO CHADA 0x0E0F t # THAI CHARACTER TO PATAK 0x0E10 th # THAI CHARACTER THO THAN 0x0E11 th # THAI CHARACTER THO NANGMONTHO 0x0E12 th # THAI CHARACTER THO PHUTHAO 0x0E13 n # THAI CHARACTER NO NEN 0x0E14 d # THAI CHARACTER DO DEK 0x0E15 t # THAI CHARACTER TO TAO 0x0E16 th # THAI CHARACTER THO THUNG 0x0E17 th # THAI CHARACTER THO THAHAN 0x0E18 th # THAI CHARACTER THO THONG 0x0E19 n # THAI CHARACTER NO NU 0x0E1A b # THAI CHARACTER BO BAIMAI 0x0E1B p # THAI CHARACTER PO PLA 0x0E1C ph # THAI CHARACTER PHO PHUNG 0x0E1D f # THAI CHARACTER FO FA 0x0E1E ph # THAI CHARACTER PHO PHAN 0x0E1F f # THAI CHARACTER FO FAN 0x0E20 ph # THAI CHARACTER PHO SAMPHAO 0x0E21 m # THAI CHARACTER MO MA 0x0E22 y # THAI CHARACTER YO YAK 0x0E23 r # THAI CHARACTER RO RUA 0x0E24 R # THAI CHARACTER RU 0x0E25 l # THAI CHARACTER LO LING 0x0E26 L # THAI CHARACTER LU 0x0E27 w # THAI CHARACTER WO WAEN 0x0E28 s # THAI CHARACTER SO SALA 0x0E29 s # THAI CHARACTER SO RUSI 0x0E2A s # THAI CHARACTER SO SUA 0x0E2B h # THAI CHARACTER HO HIP 0x0E2C l # THAI CHARACTER LO CHULA 0x0E2D ` # THAI CHARACTER O ANG 0x0E2E h # THAI CHARACTER HO NOKHUK 0x0E2F ~ # THAI CHARACTER PAIYANNOI 0x0E30 a # THAI CHARACTER SARA A 0x0E31 a # THAI CHARACTER MAI HAN-AKAT 0x0E32 aa # THAI CHARACTER SARA AA 0x0E33 am # THAI CHARACTER SARA AM 0x0E34 i # THAI CHARACTER SARA I 0x0E35 ii # THAI CHARACTER SARA II 0x0E36 ue # THAI CHARACTER SARA UE 0x0E37 uue # THAI CHARACTER SARA UEE 0x0E38 u # THAI CHARACTER SARA U 0x0E39 uu # THAI CHARACTER SARA UU 0x0E3A "'" # THAI CHARACTER PHINTHU 0x0E3F Bh. # THAI CURRENCY SYMBOL BAHT 0x0E40 e # THAI CHARACTER SARA E 0x0E41 ae # THAI CHARACTER SARA AE 0x0E42 o # THAI CHARACTER SARA O 0x0E43 ai # THAI CHARACTER SARA AI MAIMUAN 0x0E44 ai # THAI CHARACTER SARA AI MAIMALAI 0x0E45 ao # THAI CHARACTER LAKKHANGYAO 0x0E46 + # THAI CHARACTER MAIYAMOK 0x0E4D M # THAI CHARACTER NIKHAHIT 0x0E4F * # THAI CHARACTER FONGMAN 0x0E50 0 # THAI DIGIT ZERO 0x0E51 1 # THAI DIGIT ONE 0x0E52 2 # THAI DIGIT TWO 0x0E53 3 # THAI DIGIT THREE 0x0E54 4 # THAI DIGIT FOUR 0x0E55 5 # THAI DIGIT FIVE 0x0E56 6 # THAI DIGIT SIX 0x0E57 7 # THAI DIGIT SEVEN 0x0E58 8 # THAI DIGIT EIGHT 0x0E59 9 # THAI DIGIT NINE 0x0E5A // # THAI CHARACTER ANGKHANKHU 0x0E5B /// # THAI CHARACTER KHOMUT 0x0E81 k # LAO LETTER KO 0x0E82 kh # LAO LETTER KHO SUNG 0x0E84 kh # LAO LETTER KHO TAM 0x0E87 ng # LAO LETTER NGO 0x0E88 ch # LAO LETTER CO 0x0E8A s # LAO LETTER SO TAM 0x0E8D ny # LAO LETTER NYO 0x0E94 d # LAO LETTER DO 0x0E95 h # LAO LETTER TO 0x0E96 th # LAO LETTER THO SUNG 0x0E97 th # LAO LETTER THO TAM 0x0E99 n # LAO LETTER NO 0x0E9A b # LAO LETTER BO 0x0E9B p # LAO LETTER PO 0x0E9C ph # LAO LETTER PHO SUNG 0x0E9D f # LAO LETTER FO TAM 0x0E9E ph # LAO LETTER PHO TAM 0x0E9F f # LAO LETTER FO SUNG 0x0EA1 m # LAO LETTER MO 0x0EA2 y # LAO LETTER YO 0x0EA3 r # LAO LETTER LO LING 0x0EA5 l # LAO LETTER LO LOOT 0x0EA7 w # LAO LETTER WO 0x0EAA s # LAO LETTER SO SUNG 0x0EAB h # LAO LETTER HO SUNG 0x0EAD ` # LAO LETTER O 0x0EAF ~ # LAO ELLIPSIS 0x0EB0 a # LAO VOWEL SIGN A 0x0EB2 aa # LAO VOWEL SIGN AA 0x0EB3 am # LAO VOWEL SIGN AM 0x0EB4 i # LAO VOWEL SIGN I 0x0EB5 ii # LAO VOWEL SIGN II 0x0EB6 y # LAO VOWEL SIGN Y 0x0EB7 yy # LAO VOWEL SIGN YY 0x0EB8 u # LAO VOWEL SIGN U 0x0EB9 uu # LAO VOWEL SIGN UU 0x0EBB o # LAO VOWEL SIGN MAI KON 0x0EBC l # LAO SEMIVOWEL SIGN LO 0x0EBD ny # LAO SEMIVOWEL SIGN NYO 0x0EC0 e # LAO VOWEL SIGN E 0x0EC1 ei # LAO VOWEL SIGN EI 0x0EC2 o # LAO VOWEL SIGN O 0x0EC3 ay # LAO VOWEL SIGN AY 0x0EC4 ai # LAO VOWEL SIGN AI 0x0EC6 + # LAO KO LA 0x0ECD M # LAO NIGGAHITA 0x0ED0 0 # LAO DIGIT ZERO 0x0ED1 1 # LAO DIGIT ONE 0x0ED2 2 # LAO DIGIT TWO 0x0ED3 3 # LAO DIGIT THREE 0x0ED4 4 # LAO DIGIT FOUR 0x0ED5 5 # LAO DIGIT FIVE 0x0ED6 6 # LAO DIGIT SIX 0x0ED7 7 # LAO DIGIT SEVEN 0x0ED8 8 # LAO DIGIT EIGHT 0x0ED9 9 # LAO DIGIT NINE 0x0EDC hn # LAO HO NO 0x0EDD hm # LAO HO MO # # Characters 0x0F00 to 0x0FFF # 0x0F00 AUM # TIBETAN SYLLABLE OM 0x0F08 // # TIBETAN MARK SBRUL SHAD 0x0F09 * # TIBETAN MARK BSKUR YIG MGO 0x0F0B - # TIBETAN MARK INTERSYLLABIC TSHEG 0x0F0C / # TIBETAN MARK DELIMITER TSHEG BSTAR 0x0F0D / # TIBETAN MARK SHAD 0x0F0E // # TIBETAN MARK NYIS SHAD 0x0F0F -/ # TIBETAN MARK TSHEG SHAD 0x0F10 +/ # TIBETAN MARK NYIS TSHEG SHAD 0x0F11 X/ # TIBETAN MARK RIN CHEN SPUNGS SHAD 0x0F12 /XX/ # TIBETAN MARK RGYA GRAM SHAD 0x0F13 /X/ # TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN 0x0F14 , # TIBETAN MARK GTER TSHEG 0x0F20 0 # TIBETAN DIGIT ZERO 0x0F21 1 # TIBETAN DIGIT ONE 0x0F22 2 # TIBETAN DIGIT TWO 0x0F23 3 # TIBETAN DIGIT THREE 0x0F24 4 # TIBETAN DIGIT FOUR 0x0F25 5 # TIBETAN DIGIT FIVE 0x0F26 6 # TIBETAN DIGIT SIX 0x0F27 7 # TIBETAN DIGIT SEVEN 0x0F28 8 # TIBETAN DIGIT EIGHT 0x0F29 9 # TIBETAN DIGIT NINE 0x0F2A .5 # TIBETAN DIGIT HALF ONE 0x0F2B 1.5 # TIBETAN DIGIT HALF TWO 0x0F2C 2.5 # TIBETAN DIGIT HALF THREE 0x0F2D 3.5 # TIBETAN DIGIT HALF FOUR 0x0F2E 4.5 # TIBETAN DIGIT HALF FIVE 0x0F2F 5.5 # TIBETAN DIGIT HALF SIX 0x0F30 6.5 # TIBETAN DIGIT HALF SEVEN 0x0F31 7.5 # TIBETAN DIGIT HALF EIGHT 0x0F32 8.5 # TIBETAN DIGIT HALF NINE 0x0F33 -.5 # TIBETAN DIGIT HALF ZERO 0x0F34 + # TIBETAN MARK BSDUS RTAGS 0x0F35 * # TIBETAN MARK NGAS BZUNG NYI ZLA 0x0F36 ^ # TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN 0x0F37 _ # TIBETAN MARK NGAS BZUNG SGOR RTAGS 0x0F39 ~ # TIBETAN MARK TSA -PHRU 0x0F3B ] # TIBETAN MARK GUG RTAGS GYAS 0x0F3C [[ # TIBETAN MARK ANG KHANG GYON 0x0F3D ]] # TIBETAN MARK ANG KHANG GYAS 0x0F40 k # TIBETAN LETTER KA 0x0F41 kh # TIBETAN LETTER KHA 0x0F42 g # TIBETAN LETTER GA 0x0F43 gh # TIBETAN LETTER GHA 0x0F44 ng # TIBETAN LETTER NGA 0x0F45 c # TIBETAN LETTER CA 0x0F46 ch # TIBETAN LETTER CHA 0x0F47 j # TIBETAN LETTER JA 0x0F49 ny # TIBETAN LETTER NYA 0x0F4A tt # TIBETAN LETTER TTA 0x0F4B tth # TIBETAN LETTER TTHA 0x0F4C dd # TIBETAN LETTER DDA 0x0F4D ddh # TIBETAN LETTER DDHA 0x0F4E nn # TIBETAN LETTER NNA 0x0F4F t # TIBETAN LETTER TA 0x0F50 th # TIBETAN LETTER THA 0x0F51 d # TIBETAN LETTER DA 0x0F52 dh # TIBETAN LETTER DHA 0x0F53 n # TIBETAN LETTER NA 0x0F54 p # TIBETAN LETTER PA 0x0F55 ph # TIBETAN LETTER PHA 0x0F56 b # TIBETAN LETTER BA 0x0F57 bh # TIBETAN LETTER BHA 0x0F58 m # TIBETAN LETTER MA 0x0F59 ts # TIBETAN LETTER TSA 0x0F5A tsh # TIBETAN LETTER TSHA 0x0F5B dz # TIBETAN LETTER DZA 0x0F5C dzh # TIBETAN LETTER DZHA 0x0F5D w # TIBETAN LETTER WA 0x0F5E zh # TIBETAN LETTER ZHA 0x0F5F z # TIBETAN LETTER ZA 0x0F60 "'" # TIBETAN LETTER -A 0x0F61 y # TIBETAN LETTER YA 0x0F62 r # TIBETAN LETTER RA 0x0F63 l # TIBETAN LETTER LA 0x0F64 sh # TIBETAN LETTER SHA 0x0F65 ssh # TIBETAN LETTER SSA 0x0F66 s # TIBETAN LETTER SA 0x0F67 h # TIBETAN LETTER HA 0x0F68 a # TIBETAN LETTER A 0x0F69 kss # TIBETAN LETTER KSSA 0x0F6A r # TIBETAN LETTER FIXED-FORM RA 0x0F71 aa # TIBETAN VOWEL SIGN AA 0x0F72 i # TIBETAN VOWEL SIGN I 0x0F73 ii # TIBETAN VOWEL SIGN II 0x0F74 u # TIBETAN VOWEL SIGN U 0x0F75 uu # TIBETAN VOWEL SIGN UU 0x0F76 R # TIBETAN VOWEL SIGN VOCALIC R 0x0F77 RR # TIBETAN VOWEL SIGN VOCALIC RR 0x0F78 L # TIBETAN VOWEL SIGN VOCALIC L 0x0F79 LL # TIBETAN VOWEL SIGN VOCALIC LL 0x0F7A e # TIBETAN VOWEL SIGN E 0x0F7B ee # TIBETAN VOWEL SIGN EE 0x0F7C o # TIBETAN VOWEL SIGN O 0x0F7D oo # TIBETAN VOWEL SIGN OO 0x0F7E M # TIBETAN SIGN RJES SU NGA RO 0x0F7F H # TIBETAN SIGN RNAM BCAD 0x0F80 i # TIBETAN VOWEL SIGN REVERSED I 0x0F81 ii # TIBETAN VOWEL SIGN REVERSED II 0x0F90 k # TIBETAN SUBJOINED LETTER KA 0x0F91 kh # TIBETAN SUBJOINED LETTER KHA 0x0F92 g # TIBETAN SUBJOINED LETTER GA 0x0F93 gh # TIBETAN SUBJOINED LETTER GHA 0x0F94 ng # TIBETAN SUBJOINED LETTER NGA 0x0F95 c # TIBETAN SUBJOINED LETTER CA 0x0F96 ch # TIBETAN SUBJOINED LETTER CHA 0x0F97 j # TIBETAN SUBJOINED LETTER JA 0x0F99 ny # TIBETAN SUBJOINED LETTER NYA 0x0F9A tt # TIBETAN SUBJOINED LETTER TTA 0x0F9B tth # TIBETAN SUBJOINED LETTER TTHA 0x0F9C dd # TIBETAN SUBJOINED LETTER DDA 0x0F9D ddh # TIBETAN SUBJOINED LETTER DDHA 0x0F9E nn # TIBETAN SUBJOINED LETTER NNA 0x0F9F t # TIBETAN SUBJOINED LETTER TA 0x0FA0 th # TIBETAN SUBJOINED LETTER THA 0x0FA1 d # TIBETAN SUBJOINED LETTER DA 0x0FA2 dh # TIBETAN SUBJOINED LETTER DHA 0x0FA3 n # TIBETAN SUBJOINED LETTER NA 0x0FA4 p # TIBETAN SUBJOINED LETTER PA 0x0FA5 ph # TIBETAN SUBJOINED LETTER PHA 0x0FA6 b # TIBETAN SUBJOINED LETTER BA 0x0FA7 bh # TIBETAN SUBJOINED LETTER BHA 0x0FA8 m # TIBETAN SUBJOINED LETTER MA 0x0FA9 ts # TIBETAN SUBJOINED LETTER TSA 0x0FAA tsh # TIBETAN SUBJOINED LETTER TSHA 0x0FAB dz # TIBETAN SUBJOINED LETTER DZA 0x0FAC dzh # TIBETAN SUBJOINED LETTER DZHA 0x0FAD w # TIBETAN SUBJOINED LETTER WA 0x0FAE zh # TIBETAN SUBJOINED LETTER ZHA 0x0FAF z # TIBETAN SUBJOINED LETTER ZA 0x0FB0 "'" # TIBETAN SUBJOINED LETTER -A 0x0FB1 y # TIBETAN SUBJOINED LETTER YA 0x0FB2 r # TIBETAN SUBJOINED LETTER RA 0x0FB3 l # TIBETAN SUBJOINED LETTER LA 0x0FB4 sh # TIBETAN SUBJOINED LETTER SHA 0x0FB5 ss # TIBETAN SUBJOINED LETTER SSA 0x0FB6 s # TIBETAN SUBJOINED LETTER SA 0x0FB7 h # TIBETAN SUBJOINED LETTER HA 0x0FB8 a # TIBETAN SUBJOINED LETTER A 0x0FB9 kss # TIBETAN SUBJOINED LETTER KSSA 0x0FBA w # TIBETAN SUBJOINED LETTER FIXED-FORM WA 0x0FBB y # TIBETAN SUBJOINED LETTER FIXED-FORM YA 0x0FBC r # TIBETAN SUBJOINED LETTER FIXED-FORM RA 0x0FBE X # TIBETAN KU RU KHA 0x0FBF :X: # TIBETAN KU RU KHA BZHI MIG CAN 0x0FC0 /O/ # TIBETAN CANTILLATION SIGN HEAVY BEAT 0x0FC1 /o/ # TIBETAN CANTILLATION SIGN LIGHT BEAT 0x0FC2 \o\ # TIBETAN CANTILLATION SIGN CANG TE-U 0x0FC3 (O) # TIBETAN CANTILLATION SIGN SBUB -CHAL # # Characters 0x1000 to 0x10FF # 0x1000 k # MYANMAR LETTER KA 0x1001 kh # MYANMAR LETTER KHA 0x1002 g # MYANMAR LETTER GA 0x1003 gh # MYANMAR LETTER GHA 0x1004 ng # MYANMAR LETTER NGA 0x1005 c # MYANMAR LETTER CA 0x1006 ch # MYANMAR LETTER CHA 0x1007 j # MYANMAR LETTER JA 0x1008 jh # MYANMAR LETTER JHA 0x1009 ny # MYANMAR LETTER NYA 0x100A nny # MYANMAR LETTER NNYA 0x100B tt # MYANMAR LETTER TTA 0x100C tth # MYANMAR LETTER TTHA 0x100D dd # MYANMAR LETTER DDA 0x100E ddh # MYANMAR LETTER DDHA 0x100F nn # MYANMAR LETTER NNA 0x1010 tt # MYANMAR LETTER TA 0x1011 th # MYANMAR LETTER THA 0x1012 d # MYANMAR LETTER DA 0x1013 dh # MYANMAR LETTER DHA 0x1014 n # MYANMAR LETTER NA 0x1015 p # MYANMAR LETTER PA 0x1016 ph # MYANMAR LETTER PHA 0x1017 b # MYANMAR LETTER BA 0x1018 bh # MYANMAR LETTER BHA 0x1019 m # MYANMAR LETTER MA 0x101A y # MYANMAR LETTER YA 0x101B r # MYANMAR LETTER RA 0x101C l # MYANMAR LETTER LA 0x101D w # MYANMAR LETTER WA 0x101E s # MYANMAR LETTER SA 0x101F h # MYANMAR LETTER HA 0x1020 ll # MYANMAR LETTER LLA 0x1021 a # MYANMAR LETTER A 0x1023 i # MYANMAR LETTER I 0x1024 ii # MYANMAR LETTER II 0x1025 u # MYANMAR LETTER U 0x1026 uu # MYANMAR LETTER UU 0x1027 e # MYANMAR LETTER E 0x1029 o # MYANMAR LETTER O 0x102A au # MYANMAR LETTER AU 0x102C aa # MYANMAR VOWEL SIGN AA 0x102D i # MYANMAR VOWEL SIGN I 0x102E ii # MYANMAR VOWEL SIGN II 0x102F u # MYANMAR VOWEL SIGN U 0x1030 uu # MYANMAR VOWEL SIGN UU 0x1031 e # MYANMAR VOWEL SIGN E 0x1032 ai # MYANMAR VOWEL SIGN AI 0x1036 N # MYANMAR SIGN ANUSVARA 0x1037 "'" # MYANMAR SIGN DOT BELOW 0x1038 : # MYANMAR SIGN VISARGA 0x1040 0 # MYANMAR DIGIT ZERO 0x1041 1 # MYANMAR DIGIT ONE 0x1042 2 # MYANMAR DIGIT TWO 0x1043 3 # MYANMAR DIGIT THREE 0x1044 4 # MYANMAR DIGIT FOUR 0x1045 5 # MYANMAR DIGIT FIVE 0x1046 6 # MYANMAR DIGIT SIX 0x1047 7 # MYANMAR DIGIT SEVEN 0x1048 8 # MYANMAR DIGIT EIGHT 0x1049 9 # MYANMAR DIGIT NINE 0x104A / # MYANMAR SIGN LITTLE SECTION 0x104B // # MYANMAR SIGN SECTION 0x104C n* # MYANMAR SYMBOL LOCATIVE 0x104D r* # MYANMAR SYMBOL COMPLETED 0x104E l* # MYANMAR SYMBOL AFOREMENTIONED 0x104F e* # MYANMAR SYMBOL GENITIVE 0x1050 sh # MYANMAR LETTER SHA 0x1051 ss # MYANMAR LETTER SSA 0x1052 R # MYANMAR LETTER VOCALIC R 0x1053 RR # MYANMAR LETTER VOCALIC RR 0x1054 L # MYANMAR LETTER VOCALIC L 0x1055 LL # MYANMAR LETTER VOCALIC LL 0x1056 R # MYANMAR VOWEL SIGN VOCALIC R 0x1057 RR # MYANMAR VOWEL SIGN VOCALIC RR 0x1058 L # MYANMAR VOWEL SIGN VOCALIC L 0x1059 LL # MYANMAR VOWEL SIGN VOCALIC LL 0x10A0 A # GEORGIAN CAPITAL LETTER AN 0x10A1 B # GEORGIAN CAPITAL LETTER BAN 0x10A2 G # GEORGIAN CAPITAL LETTER GAN 0x10A3 D # GEORGIAN CAPITAL LETTER DON 0x10A4 E # GEORGIAN CAPITAL LETTER EN 0x10A5 V # GEORGIAN CAPITAL LETTER VIN 0x10A6 Z # GEORGIAN CAPITAL LETTER ZEN 0x10A7 T` # GEORGIAN CAPITAL LETTER TAN 0x10A8 I # GEORGIAN CAPITAL LETTER IN 0x10A9 K # GEORGIAN CAPITAL LETTER KAN 0x10AA L # GEORGIAN CAPITAL LETTER LAS 0x10AB M # GEORGIAN CAPITAL LETTER MAN 0x10AC N # GEORGIAN CAPITAL LETTER NAR 0x10AD O # GEORGIAN CAPITAL LETTER ON 0x10AE P # GEORGIAN CAPITAL LETTER PAR 0x10AF Zh # GEORGIAN CAPITAL LETTER ZHAR 0x10B0 R # GEORGIAN CAPITAL LETTER RAE 0x10B1 S # GEORGIAN CAPITAL LETTER SAN 0x10B2 T # GEORGIAN CAPITAL LETTER TAR 0x10B3 U # GEORGIAN CAPITAL LETTER UN 0x10B4 P` # GEORGIAN CAPITAL LETTER PHAR 0x10B5 K` # GEORGIAN CAPITAL LETTER KHAR 0x10B6 "G'" # GEORGIAN CAPITAL LETTER GHAN 0x10B7 Q # GEORGIAN CAPITAL LETTER QAR 0x10B8 Sh # GEORGIAN CAPITAL LETTER SHIN 0x10B9 Ch` # GEORGIAN CAPITAL LETTER CHIN 0x10BA C` # GEORGIAN CAPITAL LETTER CAN 0x10BB "Z'" # GEORGIAN CAPITAL LETTER JIL 0x10BC C # GEORGIAN CAPITAL LETTER CIL 0x10BD Ch # GEORGIAN CAPITAL LETTER CHAR 0x10BE X # GEORGIAN CAPITAL LETTER XAN 0x10BF J # GEORGIAN CAPITAL LETTER JHAN 0x10C0 H # GEORGIAN CAPITAL LETTER HAE 0x10C1 E # GEORGIAN CAPITAL LETTER HE 0x10C2 Y # GEORGIAN CAPITAL LETTER HIE 0x10C3 W # GEORGIAN CAPITAL LETTER WE 0x10C4 Xh # GEORGIAN CAPITAL LETTER HAR 0x10C5 OE # GEORGIAN CAPITAL LETTER HOE 0x10D0 a # GEORGIAN LETTER AN 0x10D1 b # GEORGIAN LETTER BAN 0x10D2 g # GEORGIAN LETTER GAN 0x10D3 d # GEORGIAN LETTER DON 0x10D4 e # GEORGIAN LETTER EN 0x10D5 v # GEORGIAN LETTER VIN 0x10D6 z # GEORGIAN LETTER ZEN 0x10D7 t` # GEORGIAN LETTER TAN 0x10D8 i # GEORGIAN LETTER IN 0x10D9 k # GEORGIAN LETTER KAN 0x10DA l # GEORGIAN LETTER LAS 0x10DB m # GEORGIAN LETTER MAN 0x10DC n # GEORGIAN LETTER NAR 0x10DD o # GEORGIAN LETTER ON 0x10DE p # GEORGIAN LETTER PAR 0x10DF zh # GEORGIAN LETTER ZHAR 0x10E0 r # GEORGIAN LETTER RAE 0x10E1 s # GEORGIAN LETTER SAN 0x10E2 t # GEORGIAN LETTER TAR 0x10E3 u # GEORGIAN LETTER UN 0x10E4 p` # GEORGIAN LETTER PHAR 0x10E5 k` # GEORGIAN LETTER KHAR 0x10E6 "g'" # GEORGIAN LETTER GHAN 0x10E7 q # GEORGIAN LETTER QAR 0x10E8 sh # GEORGIAN LETTER SHIN 0x10E9 ch` # GEORGIAN LETTER CHIN 0x10EA c` # GEORGIAN LETTER CAN 0x10EB "z'" # GEORGIAN LETTER JIL 0x10EC c # GEORGIAN LETTER CIL 0x10ED ch # GEORGIAN LETTER CHAR 0x10EE x # GEORGIAN LETTER XAN 0x10EF j # GEORGIAN LETTER JHAN 0x10F0 h # GEORGIAN LETTER HAE 0x10F1 e # GEORGIAN LETTER HE 0x10F2 y # GEORGIAN LETTER HIE 0x10F3 w # GEORGIAN LETTER WE 0x10F4 xh # GEORGIAN LETTER HAR 0x10F5 oe # GEORGIAN LETTER HOE 0x10F6 f # GEORGIAN LETTER FI 0x10FB // # GEORGIAN PARAGRAPH SEPARATOR # # Characters 0x1100 to 0x11FF # 0x1100 g # HANGUL CHOSEONG KIYEOK 0x1101 gg # HANGUL CHOSEONG SSANGKIYEOK 0x1102 n # HANGUL CHOSEONG NIEUN 0x1103 d # HANGUL CHOSEONG TIKEUT 0x1104 dd # HANGUL CHOSEONG SSANGTIKEUT 0x1105 r # HANGUL CHOSEONG RIEUL 0x1106 m # HANGUL CHOSEONG MIEUM 0x1107 b # HANGUL CHOSEONG PIEUP 0x1108 bb # HANGUL CHOSEONG SSANGPIEUP 0x1109 s # HANGUL CHOSEONG SIOS 0x110A ss # HANGUL CHOSEONG SSANGSIOS 0x110C j # HANGUL CHOSEONG CIEUC 0x110D jj # HANGUL CHOSEONG SSANGCIEUC 0x110E c # HANGUL CHOSEONG CHIEUCH 0x110F k # HANGUL CHOSEONG KHIEUKH 0x1110 t # HANGUL CHOSEONG THIEUTH 0x1111 p # HANGUL CHOSEONG PHIEUPH 0x1112 h # HANGUL CHOSEONG HIEUH 0x1113 ng # HANGUL CHOSEONG NIEUN-KIYEOK 0x1114 nn # HANGUL CHOSEONG SSANGNIEUN 0x1115 nd # HANGUL CHOSEONG NIEUN-TIKEUT 0x1116 nb # HANGUL CHOSEONG NIEUN-PIEUP 0x1117 dg # HANGUL CHOSEONG TIKEUT-KIYEOK 0x1118 rn # HANGUL CHOSEONG RIEUL-NIEUN 0x1119 rr # HANGUL CHOSEONG SSANGRIEUL 0x111A rh # HANGUL CHOSEONG RIEUL-HIEUH 0x111B rN # HANGUL CHOSEONG KAPYEOUNRIEUL 0x111C mb # HANGUL CHOSEONG MIEUM-PIEUP 0x111D mN # HANGUL CHOSEONG KAPYEOUNMIEUM 0x111E bg # HANGUL CHOSEONG PIEUP-KIYEOK 0x111F bn # HANGUL CHOSEONG PIEUP-NIEUN 0x1121 bs # HANGUL CHOSEONG PIEUP-SIOS 0x1122 bsg # HANGUL CHOSEONG PIEUP-SIOS-KIYEOK 0x1123 bst # HANGUL CHOSEONG PIEUP-SIOS-TIKEUT 0x1124 bsb # HANGUL CHOSEONG PIEUP-SIOS-PIEUP 0x1125 bss # HANGUL CHOSEONG PIEUP-SSANGSIOS 0x1126 bsj # HANGUL CHOSEONG PIEUP-SIOS-CIEUC 0x1127 bj # HANGUL CHOSEONG PIEUP-CIEUC 0x1128 bc # HANGUL CHOSEONG PIEUP-CHIEUCH 0x1129 bt # HANGUL CHOSEONG PIEUP-THIEUTH 0x112A bp # HANGUL CHOSEONG PIEUP-PHIEUPH 0x112B bN # HANGUL CHOSEONG KAPYEOUNPIEUP 0x112C bbN # HANGUL CHOSEONG KAPYEOUNSSANGPIEUP 0x112D sg # HANGUL CHOSEONG SIOS-KIYEOK 0x112E sn # HANGUL CHOSEONG SIOS-NIEUN 0x112F sd # HANGUL CHOSEONG SIOS-TIKEUT 0x1130 sr # HANGUL CHOSEONG SIOS-RIEUL 0x1131 sm # HANGUL CHOSEONG SIOS-MIEUM 0x1132 sb # HANGUL CHOSEONG SIOS-PIEUP 0x1133 sbg # HANGUL CHOSEONG SIOS-PIEUP-KIYEOK 0x1134 sss # HANGUL CHOSEONG SIOS-SSANGSIOS 0x1135 s # HANGUL CHOSEONG SIOS-IEUNG 0x1136 sj # HANGUL CHOSEONG SIOS-CIEUC 0x1137 sc # HANGUL CHOSEONG SIOS-CHIEUCH 0x1138 sk # HANGUL CHOSEONG SIOS-KHIEUKH 0x1139 st # HANGUL CHOSEONG SIOS-THIEUTH 0x113A sp # HANGUL CHOSEONG SIOS-PHIEUPH 0x113B sh # HANGUL CHOSEONG SIOS-HIEUH 0x1140 Z # HANGUL CHOSEONG PANSIOS 0x1141 g # HANGUL CHOSEONG IEUNG-KIYEOK 0x1142 d # HANGUL CHOSEONG IEUNG-TIKEUT 0x1143 m # HANGUL CHOSEONG IEUNG-MIEUM 0x1144 b # HANGUL CHOSEONG IEUNG-PIEUP 0x1145 s # HANGUL CHOSEONG IEUNG-SIOS 0x1146 Z # HANGUL CHOSEONG IEUNG-PANSIOS 0x1148 j # HANGUL CHOSEONG IEUNG-CIEUC 0x1149 c # HANGUL CHOSEONG IEUNG-CHIEUCH 0x114A t # HANGUL CHOSEONG IEUNG-THIEUTH 0x114B p # HANGUL CHOSEONG IEUNG-PHIEUPH 0x114C N # HANGUL CHOSEONG YESIEUNG 0x114D j # HANGUL CHOSEONG CIEUC-IEUNG 0x1152 ck # HANGUL CHOSEONG CHIEUCH-KHIEUKH 0x1153 ch # HANGUL CHOSEONG CHIEUCH-HIEUH 0x1156 pb # HANGUL CHOSEONG PHIEUPH-PIEUP 0x1157 pN # HANGUL CHOSEONG KAPYEOUNPHIEUPH 0x1158 hh # HANGUL CHOSEONG SSANGHIEUH 0x1159 Q # HANGUL CHOSEONG YEORINHIEUH 0x1161 a # HANGUL JUNGSEONG A 0x1162 ae # HANGUL JUNGSEONG AE 0x1163 ya # HANGUL JUNGSEONG YA 0x1164 yae # HANGUL JUNGSEONG YAE 0x1165 eo # HANGUL JUNGSEONG EO 0x1166 e # HANGUL JUNGSEONG E 0x1167 yeo # HANGUL JUNGSEONG YEO 0x1168 ye # HANGUL JUNGSEONG YE 0x1169 o # HANGUL JUNGSEONG O 0x116A wa # HANGUL JUNGSEONG WA 0x116B wae # HANGUL JUNGSEONG WAE 0x116C oe # HANGUL JUNGSEONG OE 0x116D yo # HANGUL JUNGSEONG YO 0x116E u # HANGUL JUNGSEONG U 0x116F weo # HANGUL JUNGSEONG WEO 0x1170 we # HANGUL JUNGSEONG WE 0x1171 wi # HANGUL JUNGSEONG WI 0x1172 yu # HANGUL JUNGSEONG YU 0x1173 eu # HANGUL JUNGSEONG EU 0x1174 yi # HANGUL JUNGSEONG YI 0x1175 i # HANGUL JUNGSEONG I 0x1176 a-o # HANGUL JUNGSEONG A-O 0x1177 a-u # HANGUL JUNGSEONG A-U 0x1178 ya-o # HANGUL JUNGSEONG YA-O 0x1179 ya-yo # HANGUL JUNGSEONG YA-YO 0x117A eo-o # HANGUL JUNGSEONG EO-O 0x117B eo-u # HANGUL JUNGSEONG EO-U 0x117C eo-eu # HANGUL JUNGSEONG EO-EU 0x117D yeo-o # HANGUL JUNGSEONG YEO-O 0x117E yeo-u # HANGUL JUNGSEONG YEO-U 0x117F o-eo # HANGUL JUNGSEONG O-EO 0x1180 o-e # HANGUL JUNGSEONG O-E 0x1181 o-ye # HANGUL JUNGSEONG O-YE 0x1182 o-o # HANGUL JUNGSEONG O-O 0x1183 o-u # HANGUL JUNGSEONG O-U 0x1184 yo-ya # HANGUL JUNGSEONG YO-YA 0x1185 yo-yae # HANGUL JUNGSEONG YO-YAE 0x1186 yo-yeo # HANGUL JUNGSEONG YO-YEO 0x1187 yo-o # HANGUL JUNGSEONG YO-O 0x1188 yo-i # HANGUL JUNGSEONG YO-I 0x1189 u-a # HANGUL JUNGSEONG U-A 0x118A u-ae # HANGUL JUNGSEONG U-AE 0x118B u-eo-eu # HANGUL JUNGSEONG U-EO-EU 0x118C u-ye # HANGUL JUNGSEONG U-YE 0x118D u-u # HANGUL JUNGSEONG U-U 0x118E yu-a # HANGUL JUNGSEONG YU-A 0x118F yu-eo # HANGUL JUNGSEONG YU-EO 0x1190 yu-e # HANGUL JUNGSEONG YU-E 0x1191 yu-yeo # HANGUL JUNGSEONG YU-YEO 0x1192 yu-ye # HANGUL JUNGSEONG YU-YE 0x1193 yu-u # HANGUL JUNGSEONG YU-U 0x1194 yu-i # HANGUL JUNGSEONG YU-I 0x1195 eu-u # HANGUL JUNGSEONG EU-U 0x1196 eu-eu # HANGUL JUNGSEONG EU-EU 0x1197 yi-u # HANGUL JUNGSEONG YI-U 0x1198 i-a # HANGUL JUNGSEONG I-A 0x1199 i-ya # HANGUL JUNGSEONG I-YA 0x119A i-o # HANGUL JUNGSEONG I-O 0x119B i-u # HANGUL JUNGSEONG I-U 0x119C i-eu # HANGUL JUNGSEONG I-EU 0x119D i-U # HANGUL JUNGSEONG I-ARAEA 0x119E U # HANGUL JUNGSEONG ARAEA 0x119F U-eo # HANGUL JUNGSEONG ARAEA-EO 0x11A0 U-u # HANGUL JUNGSEONG ARAEA-U 0x11A1 U-i # HANGUL JUNGSEONG ARAEA-I 0x11A2 UU # HANGUL JUNGSEONG SSANGARAEA 0x11A8 g # HANGUL JONGSEONG KIYEOK 0x11A9 gg # HANGUL JONGSEONG SSANGKIYEOK 0x11AA gs # HANGUL JONGSEONG KIYEOK-SIOS 0x11AB n # HANGUL JONGSEONG NIEUN 0x11AC nj # HANGUL JONGSEONG NIEUN-CIEUC 0x11AD nh # HANGUL JONGSEONG NIEUN-HIEUH 0x11AE d # HANGUL JONGSEONG TIKEUT 0x11AF l # HANGUL JONGSEONG RIEUL 0x11B0 lg # HANGUL JONGSEONG RIEUL-KIYEOK 0x11B1 lm # HANGUL JONGSEONG RIEUL-MIEUM 0x11B2 lb # HANGUL JONGSEONG RIEUL-PIEUP 0x11B3 ls # HANGUL JONGSEONG RIEUL-SIOS 0x11B4 lt # HANGUL JONGSEONG RIEUL-THIEUTH 0x11B5 lp # HANGUL JONGSEONG RIEUL-PHIEUPH 0x11B6 lh # HANGUL JONGSEONG RIEUL-HIEUH 0x11B7 m # HANGUL JONGSEONG MIEUM 0x11B8 b # HANGUL JONGSEONG PIEUP 0x11B9 bs # HANGUL JONGSEONG PIEUP-SIOS 0x11BA s # HANGUL JONGSEONG SIOS 0x11BB ss # HANGUL JONGSEONG SSANGSIOS 0x11BC ng # HANGUL JONGSEONG IEUNG 0x11BD j # HANGUL JONGSEONG CIEUC 0x11BE c # HANGUL JONGSEONG CHIEUCH 0x11BF k # HANGUL JONGSEONG KHIEUKH 0x11C0 t # HANGUL JONGSEONG THIEUTH 0x11C1 p # HANGUL JONGSEONG PHIEUPH 0x11C2 h # HANGUL JONGSEONG HIEUH 0x11C3 gl # HANGUL JONGSEONG KIYEOK-RIEUL 0x11C4 gsg # HANGUL JONGSEONG KIYEOK-SIOS-KIYEOK 0x11C5 ng # HANGUL JONGSEONG NIEUN-KIYEOK 0x11C6 nd # HANGUL JONGSEONG NIEUN-TIKEUT 0x11C7 ns # HANGUL JONGSEONG NIEUN-SIOS 0x11C8 nZ # HANGUL JONGSEONG NIEUN-PANSIOS 0x11C9 nt # HANGUL JONGSEONG NIEUN-THIEUTH 0x11CA dg # HANGUL JONGSEONG TIKEUT-KIYEOK 0x11CB tl # HANGUL JONGSEONG TIKEUT-RIEUL 0x11CC lgs # HANGUL JONGSEONG RIEUL-KIYEOK-SIOS 0x11CD ln # HANGUL JONGSEONG RIEUL-NIEUN 0x11CE ld # HANGUL JONGSEONG RIEUL-TIKEUT 0x11CF lth # HANGUL JONGSEONG RIEUL-TIKEUT-HIEUH 0x11D0 ll # HANGUL JONGSEONG SSANGRIEUL 0x11D1 lmg # HANGUL JONGSEONG RIEUL-MIEUM-KIYEOK 0x11D2 lms # HANGUL JONGSEONG RIEUL-MIEUM-SIOS 0x11D3 lbs # HANGUL JONGSEONG RIEUL-PIEUP-SIOS 0x11D4 lbh # HANGUL JONGSEONG RIEUL-PIEUP-HIEUH 0x11D5 rNp # HANGUL JONGSEONG RIEUL-KAPYEOUNPIEUP 0x11D6 lss # HANGUL JONGSEONG RIEUL-SSANGSIOS 0x11D7 lZ # HANGUL JONGSEONG RIEUL-PANSIOS 0x11D8 lk # HANGUL JONGSEONG RIEUL-KHIEUKH 0x11D9 lQ # HANGUL JONGSEONG RIEUL-YEORINHIEUH 0x11DA mg # HANGUL JONGSEONG MIEUM-KIYEOK 0x11DB ml # HANGUL JONGSEONG MIEUM-RIEUL 0x11DC mb # HANGUL JONGSEONG MIEUM-PIEUP 0x11DD ms # HANGUL JONGSEONG MIEUM-SIOS 0x11DE mss # HANGUL JONGSEONG MIEUM-SSANGSIOS 0x11DF mZ # HANGUL JONGSEONG MIEUM-PANSIOS 0x11E0 mc # HANGUL JONGSEONG MIEUM-CHIEUCH 0x11E1 mh # HANGUL JONGSEONG MIEUM-HIEUH 0x11E2 mN # HANGUL JONGSEONG KAPYEOUNMIEUM 0x11E3 bl # HANGUL JONGSEONG PIEUP-RIEUL 0x11E4 bp # HANGUL JONGSEONG PIEUP-PHIEUPH 0x11E5 ph # HANGUL JONGSEONG PIEUP-HIEUH 0x11E6 pN # HANGUL JONGSEONG KAPYEOUNPIEUP 0x11E7 sg # HANGUL JONGSEONG SIOS-KIYEOK 0x11E8 sd # HANGUL JONGSEONG SIOS-TIKEUT 0x11E9 sl # HANGUL JONGSEONG SIOS-RIEUL 0x11EA sb # HANGUL JONGSEONG SIOS-PIEUP 0x11EB Z # HANGUL JONGSEONG PANSIOS 0x11EC g # HANGUL JONGSEONG IEUNG-KIYEOK 0x11ED ss # HANGUL JONGSEONG IEUNG-SSANGKIYEOK 0x11EF kh # HANGUL JONGSEONG IEUNG-KHIEUKH 0x11F0 N # HANGUL JONGSEONG YESIEUNG 0x11F1 Ns # HANGUL JONGSEONG YESIEUNG-SIOS 0x11F2 NZ # HANGUL JONGSEONG YESIEUNG-PANSIOS 0x11F3 pb # HANGUL JONGSEONG PHIEUPH-PIEUP 0x11F4 pN # HANGUL JONGSEONG KAPYEOUNPHIEUPH 0x11F5 hn # HANGUL JONGSEONG HIEUH-NIEUN 0x11F6 hl # HANGUL JONGSEONG HIEUH-RIEUL 0x11F7 hm # HANGUL JONGSEONG HIEUH-MIEUM 0x11F8 hb # HANGUL JONGSEONG HIEUH-PIEUP 0x11F9 Q # HANGUL JONGSEONG YEORINHIEUH # # Characters 0x1200 to 0x12FF # 0x1200 ha # ETHIOPIC SYLLABLE HA 0x1201 hu # ETHIOPIC SYLLABLE HU 0x1202 hi # ETHIOPIC SYLLABLE HI 0x1203 haa # ETHIOPIC SYLLABLE HAA 0x1204 hee # ETHIOPIC SYLLABLE HEE 0x1205 he # ETHIOPIC SYLLABLE HE 0x1206 ho # ETHIOPIC SYLLABLE HO 0x1208 la # ETHIOPIC SYLLABLE LA 0x1209 lu # ETHIOPIC SYLLABLE LU 0x120A li # ETHIOPIC SYLLABLE LI 0x120B laa # ETHIOPIC SYLLABLE LAA 0x120C lee # ETHIOPIC SYLLABLE LEE 0x120D le # ETHIOPIC SYLLABLE LE 0x120E lo # ETHIOPIC SYLLABLE LO 0x120F lwa # ETHIOPIC SYLLABLE LWA 0x1210 hha # ETHIOPIC SYLLABLE HHA 0x1211 hhu # ETHIOPIC SYLLABLE HHU 0x1212 hhi # ETHIOPIC SYLLABLE HHI 0x1213 hhaa # ETHIOPIC SYLLABLE HHAA 0x1214 hhee # ETHIOPIC SYLLABLE HHEE 0x1215 hhe # ETHIOPIC SYLLABLE HHE 0x1216 hho # ETHIOPIC SYLLABLE HHO 0x1217 hhwa # ETHIOPIC SYLLABLE HHWA 0x1218 ma # ETHIOPIC SYLLABLE MA 0x1219 mu # ETHIOPIC SYLLABLE MU 0x121A mi # ETHIOPIC SYLLABLE MI 0x121B maa # ETHIOPIC SYLLABLE MAA 0x121C mee # ETHIOPIC SYLLABLE MEE 0x121D me # ETHIOPIC SYLLABLE ME 0x121E mo # ETHIOPIC SYLLABLE MO 0x121F mwa # ETHIOPIC SYLLABLE MWA 0x1220 sza # ETHIOPIC SYLLABLE SZA 0x1221 szu # ETHIOPIC SYLLABLE SZU 0x1222 szi # ETHIOPIC SYLLABLE SZI 0x1223 szaa # ETHIOPIC SYLLABLE SZAA 0x1224 szee # ETHIOPIC SYLLABLE SZEE 0x1225 sze # ETHIOPIC SYLLABLE SZE 0x1226 szo # ETHIOPIC SYLLABLE SZO 0x1227 szwa # ETHIOPIC SYLLABLE SZWA 0x1228 ra # ETHIOPIC SYLLABLE RA 0x1229 ru # ETHIOPIC SYLLABLE RU 0x122A ri # ETHIOPIC SYLLABLE RI 0x122B raa # ETHIOPIC SYLLABLE RAA 0x122C ree # ETHIOPIC SYLLABLE REE 0x122D re # ETHIOPIC SYLLABLE RE 0x122E ro # ETHIOPIC SYLLABLE RO 0x122F rwa # ETHIOPIC SYLLABLE RWA 0x1230 sa # ETHIOPIC SYLLABLE SA 0x1231 su # ETHIOPIC SYLLABLE SU 0x1232 si # ETHIOPIC SYLLABLE SI 0x1233 saa # ETHIOPIC SYLLABLE SAA 0x1234 see # ETHIOPIC SYLLABLE SEE 0x1235 se # ETHIOPIC SYLLABLE SE 0x1236 so # ETHIOPIC SYLLABLE SO 0x1237 swa # ETHIOPIC SYLLABLE SWA 0x1238 sha # ETHIOPIC SYLLABLE SHA 0x1239 shu # ETHIOPIC SYLLABLE SHU 0x123A shi # ETHIOPIC SYLLABLE SHI 0x123B shaa # ETHIOPIC SYLLABLE SHAA 0x123C shee # ETHIOPIC SYLLABLE SHEE 0x123D she # ETHIOPIC SYLLABLE SHE 0x123E sho # ETHIOPIC SYLLABLE SHO 0x123F shwa # ETHIOPIC SYLLABLE SHWA 0x1240 qa # ETHIOPIC SYLLABLE QA 0x1241 qu # ETHIOPIC SYLLABLE QU 0x1242 qi # ETHIOPIC SYLLABLE QI 0x1243 qaa # ETHIOPIC SYLLABLE QAA 0x1244 qee # ETHIOPIC SYLLABLE QEE 0x1245 qe # ETHIOPIC SYLLABLE QE 0x1246 qo # ETHIOPIC SYLLABLE QO 0x1248 qwa # ETHIOPIC SYLLABLE QWA 0x124A qwi # ETHIOPIC SYLLABLE QWI 0x124B qwaa # ETHIOPIC SYLLABLE QWAA 0x124C qwee # ETHIOPIC SYLLABLE QWEE 0x124D qwe # ETHIOPIC SYLLABLE QWE 0x1250 qha # ETHIOPIC SYLLABLE QHA 0x1251 qhu # ETHIOPIC SYLLABLE QHU 0x1252 qhi # ETHIOPIC SYLLABLE QHI 0x1253 qhaa # ETHIOPIC SYLLABLE QHAA 0x1254 qhee # ETHIOPIC SYLLABLE QHEE 0x1255 qhe # ETHIOPIC SYLLABLE QHE 0x1256 qho # ETHIOPIC SYLLABLE QHO 0x1258 qhwa # ETHIOPIC SYLLABLE QHWA 0x125A qhwi # ETHIOPIC SYLLABLE QHWI 0x125B qhwaa # ETHIOPIC SYLLABLE QHWAA 0x125C qhwee # ETHIOPIC SYLLABLE QHWEE 0x125D qhwe # ETHIOPIC SYLLABLE QHWE 0x1260 ba # ETHIOPIC SYLLABLE BA 0x1261 bu # ETHIOPIC SYLLABLE BU 0x1262 bi # ETHIOPIC SYLLABLE BI 0x1263 baa # ETHIOPIC SYLLABLE BAA 0x1264 bee # ETHIOPIC SYLLABLE BEE 0x1265 be # ETHIOPIC SYLLABLE BE 0x1266 bo # ETHIOPIC SYLLABLE BO 0x1267 bwa # ETHIOPIC SYLLABLE BWA 0x1268 va # ETHIOPIC SYLLABLE VA 0x1269 vu # ETHIOPIC SYLLABLE VU 0x126A vi # ETHIOPIC SYLLABLE VI 0x126B vaa # ETHIOPIC SYLLABLE VAA 0x126C vee # ETHIOPIC SYLLABLE VEE 0x126D ve # ETHIOPIC SYLLABLE VE 0x126E vo # ETHIOPIC SYLLABLE VO 0x126F vwa # ETHIOPIC SYLLABLE VWA 0x1270 ta # ETHIOPIC SYLLABLE TA 0x1271 tu # ETHIOPIC SYLLABLE TU 0x1272 ti # ETHIOPIC SYLLABLE TI 0x1273 taa # ETHIOPIC SYLLABLE TAA 0x1274 tee # ETHIOPIC SYLLABLE TEE 0x1275 te # ETHIOPIC SYLLABLE TE 0x1276 to # ETHIOPIC SYLLABLE TO 0x1277 twa # ETHIOPIC SYLLABLE TWA 0x1278 ca # ETHIOPIC SYLLABLE CA 0x1279 cu # ETHIOPIC SYLLABLE CU 0x127A ci # ETHIOPIC SYLLABLE CI 0x127B caa # ETHIOPIC SYLLABLE CAA 0x127C cee # ETHIOPIC SYLLABLE CEE 0x127D ce # ETHIOPIC SYLLABLE CE 0x127E co # ETHIOPIC SYLLABLE CO 0x127F cwa # ETHIOPIC SYLLABLE CWA 0x1280 xa # ETHIOPIC SYLLABLE XA 0x1281 xu # ETHIOPIC SYLLABLE XU 0x1282 xi # ETHIOPIC SYLLABLE XI 0x1283 xaa # ETHIOPIC SYLLABLE XAA 0x1284 xee # ETHIOPIC SYLLABLE XEE 0x1285 xe # ETHIOPIC SYLLABLE XE 0x1286 xo # ETHIOPIC SYLLABLE XO 0x1288 xwa # ETHIOPIC SYLLABLE XWA 0x128A xwi # ETHIOPIC SYLLABLE XWI 0x128B xwaa # ETHIOPIC SYLLABLE XWAA 0x128C xwee # ETHIOPIC SYLLABLE XWEE 0x128D xwe # ETHIOPIC SYLLABLE XWE 0x1290 na # ETHIOPIC SYLLABLE NA 0x1291 nu # ETHIOPIC SYLLABLE NU 0x1292 ni # ETHIOPIC SYLLABLE NI 0x1293 naa # ETHIOPIC SYLLABLE NAA 0x1294 nee # ETHIOPIC SYLLABLE NEE 0x1295 ne # ETHIOPIC SYLLABLE NE 0x1296 no # ETHIOPIC SYLLABLE NO 0x1297 nwa # ETHIOPIC SYLLABLE NWA 0x1298 nya # ETHIOPIC SYLLABLE NYA 0x1299 nyu # ETHIOPIC SYLLABLE NYU 0x129A nyi # ETHIOPIC SYLLABLE NYI 0x129B nyaa # ETHIOPIC SYLLABLE NYAA 0x129C nyee # ETHIOPIC SYLLABLE NYEE 0x129D nye # ETHIOPIC SYLLABLE NYE 0x129E nyo # ETHIOPIC SYLLABLE NYO 0x129F nywa # ETHIOPIC SYLLABLE NYWA 0x12A0 "'a" # ETHIOPIC SYLLABLE GLOTTAL A 0x12A1 "'u" # ETHIOPIC SYLLABLE GLOTTAL U 0x12A3 "'aa" # ETHIOPIC SYLLABLE GLOTTAL AA 0x12A4 "'ee" # ETHIOPIC SYLLABLE GLOTTAL EE 0x12A5 "'e" # ETHIOPIC SYLLABLE GLOTTAL E 0x12A6 "'o" # ETHIOPIC SYLLABLE GLOTTAL O 0x12A7 "'wa" # ETHIOPIC SYLLABLE GLOTTAL WA 0x12A8 ka # ETHIOPIC SYLLABLE KA 0x12A9 ku # ETHIOPIC SYLLABLE KU 0x12AA ki # ETHIOPIC SYLLABLE KI 0x12AB kaa # ETHIOPIC SYLLABLE KAA 0x12AC kee # ETHIOPIC SYLLABLE KEE 0x12AD ke # ETHIOPIC SYLLABLE KE 0x12AE ko # ETHIOPIC SYLLABLE KO 0x12B0 kwa # ETHIOPIC SYLLABLE KWA 0x12B2 kwi # ETHIOPIC SYLLABLE KWI 0x12B3 kwaa # ETHIOPIC SYLLABLE KWAA 0x12B4 kwee # ETHIOPIC SYLLABLE KWEE 0x12B5 kwe # ETHIOPIC SYLLABLE KWE 0x12B8 kxa # ETHIOPIC SYLLABLE KXA 0x12B9 kxu # ETHIOPIC SYLLABLE KXU 0x12BA kxi # ETHIOPIC SYLLABLE KXI 0x12BB kxaa # ETHIOPIC SYLLABLE KXAA 0x12BC kxee # ETHIOPIC SYLLABLE KXEE 0x12BD kxe # ETHIOPIC SYLLABLE KXE 0x12BE kxo # ETHIOPIC SYLLABLE KXO 0x12C0 kxwa # ETHIOPIC SYLLABLE KXWA 0x12C2 kxwi # ETHIOPIC SYLLABLE KXWI 0x12C3 kxwaa # ETHIOPIC SYLLABLE KXWAA 0x12C4 kxwee # ETHIOPIC SYLLABLE KXWEE 0x12C5 kxwe # ETHIOPIC SYLLABLE KXWE 0x12C8 wa # ETHIOPIC SYLLABLE WA 0x12C9 wu # ETHIOPIC SYLLABLE WU 0x12CA wi # ETHIOPIC SYLLABLE WI 0x12CB waa # ETHIOPIC SYLLABLE WAA 0x12CC wee # ETHIOPIC SYLLABLE WEE 0x12CD we # ETHIOPIC SYLLABLE WE 0x12CE wo # ETHIOPIC SYLLABLE WO 0x12D0 `a # ETHIOPIC SYLLABLE PHARYNGEAL A 0x12D1 `u # ETHIOPIC SYLLABLE PHARYNGEAL U 0x12D2 `i # ETHIOPIC SYLLABLE PHARYNGEAL I 0x12D3 `aa # ETHIOPIC SYLLABLE PHARYNGEAL AA 0x12D4 `ee # ETHIOPIC SYLLABLE PHARYNGEAL EE 0x12D5 `e # ETHIOPIC SYLLABLE PHARYNGEAL E 0x12D6 `o # ETHIOPIC SYLLABLE PHARYNGEAL O 0x12D8 za # ETHIOPIC SYLLABLE ZA 0x12D9 zu # ETHIOPIC SYLLABLE ZU 0x12DA zi # ETHIOPIC SYLLABLE ZI 0x12DB zaa # ETHIOPIC SYLLABLE ZAA 0x12DC zee # ETHIOPIC SYLLABLE ZEE 0x12DD ze # ETHIOPIC SYLLABLE ZE 0x12DE zo # ETHIOPIC SYLLABLE ZO 0x12DF zwa # ETHIOPIC SYLLABLE ZWA 0x12E0 zha # ETHIOPIC SYLLABLE ZHA 0x12E1 zhu # ETHIOPIC SYLLABLE ZHU 0x12E2 zhi # ETHIOPIC SYLLABLE ZHI 0x12E3 zhaa # ETHIOPIC SYLLABLE ZHAA 0x12E4 zhee # ETHIOPIC SYLLABLE ZHEE 0x12E5 zhe # ETHIOPIC SYLLABLE ZHE 0x12E6 zho # ETHIOPIC SYLLABLE ZHO 0x12E7 zhwa # ETHIOPIC SYLLABLE ZHWA 0x12E8 ya # ETHIOPIC SYLLABLE YA 0x12E9 yu # ETHIOPIC SYLLABLE YU 0x12EA yi # ETHIOPIC SYLLABLE YI 0x12EB yaa # ETHIOPIC SYLLABLE YAA 0x12EC yee # ETHIOPIC SYLLABLE YEE 0x12ED ye # ETHIOPIC SYLLABLE YE 0x12EE yo # ETHIOPIC SYLLABLE YO 0x12F0 da # ETHIOPIC SYLLABLE DA 0x12F1 du # ETHIOPIC SYLLABLE DU 0x12F2 di # ETHIOPIC SYLLABLE DI 0x12F3 daa # ETHIOPIC SYLLABLE DAA 0x12F4 dee # ETHIOPIC SYLLABLE DEE 0x12F5 de # ETHIOPIC SYLLABLE DE 0x12F6 do # ETHIOPIC SYLLABLE DO 0x12F7 dwa # ETHIOPIC SYLLABLE DWA 0x12F8 dda # ETHIOPIC SYLLABLE DDA 0x12F9 ddu # ETHIOPIC SYLLABLE DDU 0x12FA ddi # ETHIOPIC SYLLABLE DDI 0x12FB ddaa # ETHIOPIC SYLLABLE DDAA 0x12FC ddee # ETHIOPIC SYLLABLE DDEE 0x12FD dde # ETHIOPIC SYLLABLE DDE 0x12FE ddo # ETHIOPIC SYLLABLE DDO 0x12FF ddwa # ETHIOPIC SYLLABLE DDWA # # Characters 0x1300 to 0x13FF # 0x1300 ja # ETHIOPIC SYLLABLE JA 0x1301 ju # ETHIOPIC SYLLABLE JU 0x1302 ji # ETHIOPIC SYLLABLE JI 0x1303 jaa # ETHIOPIC SYLLABLE JAA 0x1304 jee # ETHIOPIC SYLLABLE JEE 0x1305 je # ETHIOPIC SYLLABLE JE 0x1306 jo # ETHIOPIC SYLLABLE JO 0x1307 jwa # ETHIOPIC SYLLABLE JWA 0x1308 ga # ETHIOPIC SYLLABLE GA 0x1309 gu # ETHIOPIC SYLLABLE GU 0x130A gi # ETHIOPIC SYLLABLE GI 0x130B gaa # ETHIOPIC SYLLABLE GAA 0x130C gee # ETHIOPIC SYLLABLE GEE 0x130D ge # ETHIOPIC SYLLABLE GE 0x130E go # ETHIOPIC SYLLABLE GO 0x1310 gwa # ETHIOPIC SYLLABLE GWA 0x1312 gwi # ETHIOPIC SYLLABLE GWI 0x1313 gwaa # ETHIOPIC SYLLABLE GWAA 0x1314 gwee # ETHIOPIC SYLLABLE GWEE 0x1315 gwe # ETHIOPIC SYLLABLE GWE 0x1318 gga # ETHIOPIC SYLLABLE GGA 0x1319 ggu # ETHIOPIC SYLLABLE GGU 0x131A ggi # ETHIOPIC SYLLABLE GGI 0x131B ggaa # ETHIOPIC SYLLABLE GGAA 0x131C ggee # ETHIOPIC SYLLABLE GGEE 0x131D gge # ETHIOPIC SYLLABLE GGE 0x131E ggo # ETHIOPIC SYLLABLE GGO 0x1320 tha # ETHIOPIC SYLLABLE THA 0x1321 thu # ETHIOPIC SYLLABLE THU 0x1322 thi # ETHIOPIC SYLLABLE THI 0x1323 thaa # ETHIOPIC SYLLABLE THAA 0x1324 thee # ETHIOPIC SYLLABLE THEE 0x1325 the # ETHIOPIC SYLLABLE THE 0x1326 tho # ETHIOPIC SYLLABLE THO 0x1327 thwa # ETHIOPIC SYLLABLE THWA 0x1328 cha # ETHIOPIC SYLLABLE CHA 0x1329 chu # ETHIOPIC SYLLABLE CHU 0x132A chi # ETHIOPIC SYLLABLE CHI 0x132B chaa # ETHIOPIC SYLLABLE CHAA 0x132C chee # ETHIOPIC SYLLABLE CHEE 0x132D che # ETHIOPIC SYLLABLE CHE 0x132E cho # ETHIOPIC SYLLABLE CHO 0x132F chwa # ETHIOPIC SYLLABLE CHWA 0x1330 pha # ETHIOPIC SYLLABLE PHA 0x1331 phu # ETHIOPIC SYLLABLE PHU 0x1332 phi # ETHIOPIC SYLLABLE PHI 0x1333 phaa # ETHIOPIC SYLLABLE PHAA 0x1334 phee # ETHIOPIC SYLLABLE PHEE 0x1335 phe # ETHIOPIC SYLLABLE PHE 0x1336 pho # ETHIOPIC SYLLABLE PHO 0x1337 phwa # ETHIOPIC SYLLABLE PHWA 0x1338 tsa # ETHIOPIC SYLLABLE TSA 0x1339 tsu # ETHIOPIC SYLLABLE TSU 0x133A tsi # ETHIOPIC SYLLABLE TSI 0x133B tsaa # ETHIOPIC SYLLABLE TSAA 0x133C tsee # ETHIOPIC SYLLABLE TSEE 0x133D tse # ETHIOPIC SYLLABLE TSE 0x133E tso # ETHIOPIC SYLLABLE TSO 0x133F tswa # ETHIOPIC SYLLABLE TSWA 0x1340 tza # ETHIOPIC SYLLABLE TZA 0x1341 tzu # ETHIOPIC SYLLABLE TZU 0x1342 tzi # ETHIOPIC SYLLABLE TZI 0x1343 tzaa # ETHIOPIC SYLLABLE TZAA 0x1344 tzee # ETHIOPIC SYLLABLE TZEE 0x1345 tze # ETHIOPIC SYLLABLE TZE 0x1346 tzo # ETHIOPIC SYLLABLE TZO 0x1348 fa # ETHIOPIC SYLLABLE FA 0x1349 fu # ETHIOPIC SYLLABLE FU 0x134A fi # ETHIOPIC SYLLABLE FI 0x134B faa # ETHIOPIC SYLLABLE FAA 0x134C fee # ETHIOPIC SYLLABLE FEE 0x134D fe # ETHIOPIC SYLLABLE FE 0x134E fo # ETHIOPIC SYLLABLE FO 0x134F fwa # ETHIOPIC SYLLABLE FWA 0x1350 pa # ETHIOPIC SYLLABLE PA 0x1351 pu # ETHIOPIC SYLLABLE PU 0x1352 pi # ETHIOPIC SYLLABLE PI 0x1353 paa # ETHIOPIC SYLLABLE PAA 0x1354 pee # ETHIOPIC SYLLABLE PEE 0x1355 pe # ETHIOPIC SYLLABLE PE 0x1356 po # ETHIOPIC SYLLABLE PO 0x1357 pwa # ETHIOPIC SYLLABLE PWA 0x1358 rya # ETHIOPIC SYLLABLE RYA 0x1359 mya # ETHIOPIC SYLLABLE MYA 0x135A fya # ETHIOPIC SYLLABLE FYA 0x1361 " " # ETHIOPIC WORDSPACE 0x1362 . # ETHIOPIC FULL STOP 0x1363 , # ETHIOPIC COMMA 0x1364 ; # ETHIOPIC SEMICOLON 0x1365 : # ETHIOPIC COLON 0x1366 :: # ETHIOPIC PREFACE COLON 0x1367 ? # ETHIOPIC QUESTION MARK 0x1368 // # ETHIOPIC PARAGRAPH SEPARATOR 0x1369 1 # ETHIOPIC DIGIT ONE 0x136A 2 # ETHIOPIC DIGIT TWO 0x136B 3 # ETHIOPIC DIGIT THREE 0x136C 4 # ETHIOPIC DIGIT FOUR 0x136D 5 # ETHIOPIC DIGIT FIVE 0x136E 6 # ETHIOPIC DIGIT SIX 0x136F 7 # ETHIOPIC DIGIT SEVEN 0x1370 8 # ETHIOPIC DIGIT EIGHT 0x1371 9 # ETHIOPIC DIGIT NINE 0x1372 10+ # ETHIOPIC NUMBER TEN 0x1373 20+ # ETHIOPIC NUMBER TWENTY 0x1374 30+ # ETHIOPIC NUMBER THIRTY 0x1375 40+ # ETHIOPIC NUMBER FORTY 0x1376 50+ # ETHIOPIC NUMBER FIFTY 0x1377 60+ # ETHIOPIC NUMBER SIXTY 0x1378 70+ # ETHIOPIC NUMBER SEVENTY 0x1379 80+ # ETHIOPIC NUMBER EIGHTY 0x137A 90+ # ETHIOPIC NUMBER NINETY 0x137B 100+ # ETHIOPIC NUMBER HUNDRED 0x137C 10,000+ # ETHIOPIC NUMBER TEN THOUSAND 0x13A0 a # CHEROKEE LETTER A 0x13A1 e # CHEROKEE LETTER E 0x13A2 i # CHEROKEE LETTER I 0x13A3 o # CHEROKEE LETTER O 0x13A4 u # CHEROKEE LETTER U 0x13A5 v # CHEROKEE LETTER V 0x13A6 ga # CHEROKEE LETTER GA 0x13A7 ka # CHEROKEE LETTER KA 0x13A8 ge # CHEROKEE LETTER GE 0x13A9 gi # CHEROKEE LETTER GI 0x13AA go # CHEROKEE LETTER GO 0x13AB gu # CHEROKEE LETTER GU 0x13AC gv # CHEROKEE LETTER GV 0x13AD ha # CHEROKEE LETTER HA 0x13AE he # CHEROKEE LETTER HE 0x13AF hi # CHEROKEE LETTER HI 0x13B0 ho # CHEROKEE LETTER HO 0x13B1 hu # CHEROKEE LETTER HU 0x13B2 hv # CHEROKEE LETTER HV 0x13B3 la # CHEROKEE LETTER LA 0x13B4 le # CHEROKEE LETTER LE 0x13B5 li # CHEROKEE LETTER LI 0x13B6 lo # CHEROKEE LETTER LO 0x13B7 lu # CHEROKEE LETTER LU 0x13B8 lv # CHEROKEE LETTER LV 0x13B9 ma # CHEROKEE LETTER MA 0x13BA me # CHEROKEE LETTER ME 0x13BB mi # CHEROKEE LETTER MI 0x13BC mo # CHEROKEE LETTER MO 0x13BD mu # CHEROKEE LETTER MU 0x13BE na # CHEROKEE LETTER NA 0x13BF hna # CHEROKEE LETTER HNA 0x13C0 nah # CHEROKEE LETTER NAH 0x13C1 ne # CHEROKEE LETTER NE 0x13C2 ni # CHEROKEE LETTER NI 0x13C3 no # CHEROKEE LETTER NO 0x13C4 nu # CHEROKEE LETTER NU 0x13C5 nv # CHEROKEE LETTER NV 0x13C6 qua # CHEROKEE LETTER QUA 0x13C7 que # CHEROKEE LETTER QUE 0x13C8 qui # CHEROKEE LETTER QUI 0x13C9 quo # CHEROKEE LETTER QUO 0x13CA quu # CHEROKEE LETTER QUU 0x13CB quv # CHEROKEE LETTER QUV 0x13CC sa # CHEROKEE LETTER SA 0x13CD s # CHEROKEE LETTER S 0x13CE se # CHEROKEE LETTER SE 0x13CF si # CHEROKEE LETTER SI 0x13D0 so # CHEROKEE LETTER SO 0x13D1 su # CHEROKEE LETTER SU 0x13D2 sv # CHEROKEE LETTER SV 0x13D3 da # CHEROKEE LETTER DA 0x13D4 ta # CHEROKEE LETTER TA 0x13D5 de # CHEROKEE LETTER DE 0x13D6 te # CHEROKEE LETTER TE 0x13D7 di # CHEROKEE LETTER DI 0x13D8 ti # CHEROKEE LETTER TI 0x13D9 do # CHEROKEE LETTER DO 0x13DA du # CHEROKEE LETTER DU 0x13DB dv # CHEROKEE LETTER DV 0x13DC dla # CHEROKEE LETTER DLA 0x13DD tla # CHEROKEE LETTER TLA 0x13DE tle # CHEROKEE LETTER TLE 0x13DF tli # CHEROKEE LETTER TLI 0x13E0 tlo # CHEROKEE LETTER TLO 0x13E1 tlu # CHEROKEE LETTER TLU 0x13E2 tlv # CHEROKEE LETTER TLV 0x13E3 tsa # CHEROKEE LETTER TSA 0x13E4 tse # CHEROKEE LETTER TSE 0x13E5 tsi # CHEROKEE LETTER TSI 0x13E6 tso # CHEROKEE LETTER TSO 0x13E7 tsu # CHEROKEE LETTER TSU 0x13E8 tsv # CHEROKEE LETTER TSV 0x13E9 wa # CHEROKEE LETTER WA 0x13EA we # CHEROKEE LETTER WE 0x13EB wi # CHEROKEE LETTER WI 0x13EC wo # CHEROKEE LETTER WO 0x13ED wu # CHEROKEE LETTER WU 0x13EE wv # CHEROKEE LETTER WV 0x13EF ya # CHEROKEE LETTER YA 0x13F0 ye # CHEROKEE LETTER YE 0x13F1 yi # CHEROKEE LETTER YI 0x13F2 yo # CHEROKEE LETTER YO 0x13F3 yu # CHEROKEE LETTER YU 0x13F4 yv # CHEROKEE LETTER YV # # Characters 0x1400 to 0x14FF # 0x1401 e # CANADIAN SYLLABICS E 0x1402 aai # CANADIAN SYLLABICS AAI 0x1403 i # CANADIAN SYLLABICS I 0x1404 ii # CANADIAN SYLLABICS II 0x1405 o # CANADIAN SYLLABICS O 0x1406 oo # CANADIAN SYLLABICS OO 0x1407 oo # CANADIAN SYLLABICS Y-CREE OO 0x1408 ee # CANADIAN SYLLABICS CARRIER EE 0x1409 i # CANADIAN SYLLABICS CARRIER I 0x140A a # CANADIAN SYLLABICS A 0x140B aa # CANADIAN SYLLABICS AA 0x140C we # CANADIAN SYLLABICS WE 0x140D we # CANADIAN SYLLABICS WEST-CREE WE 0x140E wi # CANADIAN SYLLABICS WI 0x140F wi # CANADIAN SYLLABICS WEST-CREE WI 0x1410 wii # CANADIAN SYLLABICS WII 0x1411 wii # CANADIAN SYLLABICS WEST-CREE WII 0x1412 wo # CANADIAN SYLLABICS WO 0x1413 wo # CANADIAN SYLLABICS WEST-CREE WO 0x1414 woo # CANADIAN SYLLABICS WOO 0x1415 woo # CANADIAN SYLLABICS WEST-CREE WOO 0x1416 woo # CANADIAN SYLLABICS NASKAPI WOO 0x1417 wa # CANADIAN SYLLABICS WA 0x1418 wa # CANADIAN SYLLABICS WEST-CREE WA 0x1419 waa # CANADIAN SYLLABICS WAA 0x141A waa # CANADIAN SYLLABICS WEST-CREE WAA 0x141B waa # CANADIAN SYLLABICS NASKAPI WAA 0x141C ai # CANADIAN SYLLABICS AI 0x141D w # CANADIAN SYLLABICS Y-CREE W 0x141E "'" # CANADIAN SYLLABICS GLOTTAL STOP 0x141F t # CANADIAN SYLLABICS FINAL ACUTE 0x1420 k # CANADIAN SYLLABICS FINAL GRAVE 0x1421 sh # CANADIAN SYLLABICS FINAL BOTTOM HALF RING 0x1422 s # CANADIAN SYLLABICS FINAL TOP HALF RING 0x1423 n # CANADIAN SYLLABICS FINAL RIGHT HALF RING 0x1424 w # CANADIAN SYLLABICS FINAL RING 0x1425 n # CANADIAN SYLLABICS FINAL DOUBLE ACUTE 0x1427 w # CANADIAN SYLLABICS FINAL MIDDLE DOT 0x1428 c # CANADIAN SYLLABICS FINAL SHORT HORIZONTAL STROKE 0x142A l # CANADIAN SYLLABICS FINAL DOWN TACK 0x142B en # CANADIAN SYLLABICS EN 0x142C in # CANADIAN SYLLABICS IN 0x142D on # CANADIAN SYLLABICS ON 0x142E an # CANADIAN SYLLABICS AN 0x142F pe # CANADIAN SYLLABICS PE 0x1430 paai # CANADIAN SYLLABICS PAAI 0x1431 pi # CANADIAN SYLLABICS PI 0x1432 pii # CANADIAN SYLLABICS PII 0x1433 po # CANADIAN SYLLABICS PO 0x1434 poo # CANADIAN SYLLABICS POO 0x1435 poo # CANADIAN SYLLABICS Y-CREE POO 0x1436 hee # CANADIAN SYLLABICS CARRIER HEE 0x1437 hi # CANADIAN SYLLABICS CARRIER HI 0x1438 pa # CANADIAN SYLLABICS PA 0x1439 paa # CANADIAN SYLLABICS PAA 0x143A pwe # CANADIAN SYLLABICS PWE 0x143B pwe # CANADIAN SYLLABICS WEST-CREE PWE 0x143C pwi # CANADIAN SYLLABICS PWI 0x143D pwi # CANADIAN SYLLABICS WEST-CREE PWI 0x143E pwii # CANADIAN SYLLABICS PWII 0x143F pwii # CANADIAN SYLLABICS WEST-CREE PWII 0x1440 pwo # CANADIAN SYLLABICS PWO 0x1441 pwo # CANADIAN SYLLABICS WEST-CREE PWO 0x1442 pwoo # CANADIAN SYLLABICS PWOO 0x1443 pwoo # CANADIAN SYLLABICS WEST-CREE PWOO 0x1444 pwa # CANADIAN SYLLABICS PWA 0x1445 pwa # CANADIAN SYLLABICS WEST-CREE PWA 0x1446 pwaa # CANADIAN SYLLABICS PWAA 0x1447 pwaa # CANADIAN SYLLABICS WEST-CREE PWAA 0x1448 pwaa # CANADIAN SYLLABICS Y-CREE PWAA 0x1449 p # CANADIAN SYLLABICS P 0x144A p # CANADIAN SYLLABICS WEST-CREE P 0x144B h # CANADIAN SYLLABICS CARRIER H 0x144C te # CANADIAN SYLLABICS TE 0x144D taai # CANADIAN SYLLABICS TAAI 0x144E ti # CANADIAN SYLLABICS TI 0x144F tii # CANADIAN SYLLABICS TII 0x1450 to # CANADIAN SYLLABICS TO 0x1451 too # CANADIAN SYLLABICS TOO 0x1452 too # CANADIAN SYLLABICS Y-CREE TOO 0x1453 dee # CANADIAN SYLLABICS CARRIER DEE 0x1454 di # CANADIAN SYLLABICS CARRIER DI 0x1455 ta # CANADIAN SYLLABICS TA 0x1456 taa # CANADIAN SYLLABICS TAA 0x1457 twe # CANADIAN SYLLABICS TWE 0x1458 twe # CANADIAN SYLLABICS WEST-CREE TWE 0x1459 twi # CANADIAN SYLLABICS TWI 0x145A twi # CANADIAN SYLLABICS WEST-CREE TWI 0x145B twii # CANADIAN SYLLABICS TWII 0x145C twii # CANADIAN SYLLABICS WEST-CREE TWII 0x145D two # CANADIAN SYLLABICS TWO 0x145E two # CANADIAN SYLLABICS WEST-CREE TWO 0x145F twoo # CANADIAN SYLLABICS TWOO 0x1460 twoo # CANADIAN SYLLABICS WEST-CREE TWOO 0x1461 twa # CANADIAN SYLLABICS TWA 0x1462 twa # CANADIAN SYLLABICS WEST-CREE TWA 0x1463 twaa # CANADIAN SYLLABICS TWAA 0x1464 twaa # CANADIAN SYLLABICS WEST-CREE TWAA 0x1465 twaa # CANADIAN SYLLABICS NASKAPI TWAA 0x1466 t # CANADIAN SYLLABICS T 0x1467 tte # CANADIAN SYLLABICS TTE 0x1468 tti # CANADIAN SYLLABICS TTI 0x1469 tto # CANADIAN SYLLABICS TTO 0x146A tta # CANADIAN SYLLABICS TTA 0x146B ke # CANADIAN SYLLABICS KE 0x146C kaai # CANADIAN SYLLABICS KAAI 0x146D ki # CANADIAN SYLLABICS KI 0x146E kii # CANADIAN SYLLABICS KII 0x146F ko # CANADIAN SYLLABICS KO 0x1470 koo # CANADIAN SYLLABICS KOO 0x1471 koo # CANADIAN SYLLABICS Y-CREE KOO 0x1472 ka # CANADIAN SYLLABICS KA 0x1473 kaa # CANADIAN SYLLABICS KAA 0x1474 kwe # CANADIAN SYLLABICS KWE 0x1475 kwe # CANADIAN SYLLABICS WEST-CREE KWE 0x1476 kwi # CANADIAN SYLLABICS KWI 0x1477 kwi # CANADIAN SYLLABICS WEST-CREE KWI 0x1478 kwii # CANADIAN SYLLABICS KWII 0x1479 kwii # CANADIAN SYLLABICS WEST-CREE KWII 0x147A kwo # CANADIAN SYLLABICS KWO 0x147B kwo # CANADIAN SYLLABICS WEST-CREE KWO 0x147C kwoo # CANADIAN SYLLABICS KWOO 0x147D kwoo # CANADIAN SYLLABICS WEST-CREE KWOO 0x147E kwa # CANADIAN SYLLABICS KWA 0x147F kwa # CANADIAN SYLLABICS WEST-CREE KWA 0x1480 kwaa # CANADIAN SYLLABICS KWAA 0x1481 kwaa # CANADIAN SYLLABICS WEST-CREE KWAA 0x1482 kwaa # CANADIAN SYLLABICS NASKAPI KWAA 0x1483 k # CANADIAN SYLLABICS K 0x1484 kw # CANADIAN SYLLABICS KW 0x1485 keh # CANADIAN SYLLABICS SOUTH-SLAVEY KEH 0x1486 kih # CANADIAN SYLLABICS SOUTH-SLAVEY KIH 0x1487 koh # CANADIAN SYLLABICS SOUTH-SLAVEY KOH 0x1488 kah # CANADIAN SYLLABICS SOUTH-SLAVEY KAH 0x1489 ce # CANADIAN SYLLABICS CE 0x148A caai # CANADIAN SYLLABICS CAAI 0x148B ci # CANADIAN SYLLABICS CI 0x148C cii # CANADIAN SYLLABICS CII 0x148D co # CANADIAN SYLLABICS CO 0x148E coo # CANADIAN SYLLABICS COO 0x148F coo # CANADIAN SYLLABICS Y-CREE COO 0x1490 ca # CANADIAN SYLLABICS CA 0x1491 caa # CANADIAN SYLLABICS CAA 0x1492 cwe # CANADIAN SYLLABICS CWE 0x1493 cwe # CANADIAN SYLLABICS WEST-CREE CWE 0x1494 cwi # CANADIAN SYLLABICS CWI 0x1495 cwi # CANADIAN SYLLABICS WEST-CREE CWI 0x1496 cwii # CANADIAN SYLLABICS CWII 0x1497 cwii # CANADIAN SYLLABICS WEST-CREE CWII 0x1498 cwo # CANADIAN SYLLABICS CWO 0x1499 cwo # CANADIAN SYLLABICS WEST-CREE CWO 0x149A cwoo # CANADIAN SYLLABICS CWOO 0x149B cwoo # CANADIAN SYLLABICS WEST-CREE CWOO 0x149C cwa # CANADIAN SYLLABICS CWA 0x149D cwa # CANADIAN SYLLABICS WEST-CREE CWA 0x149E cwaa # CANADIAN SYLLABICS CWAA 0x149F cwaa # CANADIAN SYLLABICS WEST-CREE CWAA 0x14A0 cwaa # CANADIAN SYLLABICS NASKAPI CWAA 0x14A1 c # CANADIAN SYLLABICS C 0x14A2 th # CANADIAN SYLLABICS SAYISI TH 0x14A3 me # CANADIAN SYLLABICS ME 0x14A4 maai # CANADIAN SYLLABICS MAAI 0x14A5 mi # CANADIAN SYLLABICS MI 0x14A6 mii # CANADIAN SYLLABICS MII 0x14A7 mo # CANADIAN SYLLABICS MO 0x14A8 moo # CANADIAN SYLLABICS MOO 0x14A9 moo # CANADIAN SYLLABICS Y-CREE MOO 0x14AA ma # CANADIAN SYLLABICS MA 0x14AB maa # CANADIAN SYLLABICS MAA 0x14AC mwe # CANADIAN SYLLABICS MWE 0x14AD mwe # CANADIAN SYLLABICS WEST-CREE MWE 0x14AE mwi # CANADIAN SYLLABICS MWI 0x14AF mwi # CANADIAN SYLLABICS WEST-CREE MWI 0x14B0 mwii # CANADIAN SYLLABICS MWII 0x14B1 mwii # CANADIAN SYLLABICS WEST-CREE MWII 0x14B2 mwo # CANADIAN SYLLABICS MWO 0x14B3 mwo # CANADIAN SYLLABICS WEST-CREE MWO 0x14B4 mwoo # CANADIAN SYLLABICS MWOO 0x14B5 mwoo # CANADIAN SYLLABICS WEST-CREE MWOO 0x14B6 mwa # CANADIAN SYLLABICS MWA 0x14B7 mwa # CANADIAN SYLLABICS WEST-CREE MWA 0x14B8 mwaa # CANADIAN SYLLABICS MWAA 0x14B9 mwaa # CANADIAN SYLLABICS WEST-CREE MWAA 0x14BA mwaa # CANADIAN SYLLABICS NASKAPI MWAA 0x14BB m # CANADIAN SYLLABICS M 0x14BC m # CANADIAN SYLLABICS WEST-CREE M 0x14BD mh # CANADIAN SYLLABICS MH 0x14BE m # CANADIAN SYLLABICS ATHAPASCAN M 0x14BF m # CANADIAN SYLLABICS SAYISI M 0x14C0 ne # CANADIAN SYLLABICS NE 0x14C1 naai # CANADIAN SYLLABICS NAAI 0x14C2 ni # CANADIAN SYLLABICS NI 0x14C3 nii # CANADIAN SYLLABICS NII 0x14C4 no # CANADIAN SYLLABICS NO 0x14C5 noo # CANADIAN SYLLABICS NOO 0x14C6 noo # CANADIAN SYLLABICS Y-CREE NOO 0x14C7 na # CANADIAN SYLLABICS NA 0x14C8 naa # CANADIAN SYLLABICS NAA 0x14C9 nwe # CANADIAN SYLLABICS NWE 0x14CA nwe # CANADIAN SYLLABICS WEST-CREE NWE 0x14CB nwa # CANADIAN SYLLABICS NWA 0x14CC nwa # CANADIAN SYLLABICS WEST-CREE NWA 0x14CD nwaa # CANADIAN SYLLABICS NWAA 0x14CE nwaa # CANADIAN SYLLABICS WEST-CREE NWAA 0x14CF nwaa # CANADIAN SYLLABICS NASKAPI NWAA 0x14D0 n # CANADIAN SYLLABICS N 0x14D1 ng # CANADIAN SYLLABICS CARRIER NG 0x14D2 nh # CANADIAN SYLLABICS NH 0x14D3 le # CANADIAN SYLLABICS LE 0x14D4 laai # CANADIAN SYLLABICS LAAI 0x14D5 li # CANADIAN SYLLABICS LI 0x14D6 lii # CANADIAN SYLLABICS LII 0x14D7 lo # CANADIAN SYLLABICS LO 0x14D8 loo # CANADIAN SYLLABICS LOO 0x14D9 loo # CANADIAN SYLLABICS Y-CREE LOO 0x14DA la # CANADIAN SYLLABICS LA 0x14DB laa # CANADIAN SYLLABICS LAA 0x14DC lwe # CANADIAN SYLLABICS LWE 0x14DD lwe # CANADIAN SYLLABICS WEST-CREE LWE 0x14DE lwi # CANADIAN SYLLABICS LWI 0x14DF lwi # CANADIAN SYLLABICS WEST-CREE LWI 0x14E0 lwii # CANADIAN SYLLABICS LWII 0x14E1 lwii # CANADIAN SYLLABICS WEST-CREE LWII 0x14E2 lwo # CANADIAN SYLLABICS LWO 0x14E3 lwo # CANADIAN SYLLABICS WEST-CREE LWO 0x14E4 lwoo # CANADIAN SYLLABICS LWOO 0x14E5 lwoo # CANADIAN SYLLABICS WEST-CREE LWOO 0x14E6 lwa # CANADIAN SYLLABICS LWA 0x14E7 lwa # CANADIAN SYLLABICS WEST-CREE LWA 0x14E8 lwaa # CANADIAN SYLLABICS LWAA 0x14E9 lwaa # CANADIAN SYLLABICS WEST-CREE LWAA 0x14EA l # CANADIAN SYLLABICS L 0x14EB l # CANADIAN SYLLABICS WEST-CREE L 0x14EC l # CANADIAN SYLLABICS MEDIAL L 0x14ED se # CANADIAN SYLLABICS SE 0x14EE saai # CANADIAN SYLLABICS SAAI 0x14EF si # CANADIAN SYLLABICS SI 0x14F0 sii # CANADIAN SYLLABICS SII 0x14F1 so # CANADIAN SYLLABICS SO 0x14F2 soo # CANADIAN SYLLABICS SOO 0x14F3 soo # CANADIAN SYLLABICS Y-CREE SOO 0x14F4 sa # CANADIAN SYLLABICS SA 0x14F5 saa # CANADIAN SYLLABICS SAA 0x14F6 swe # CANADIAN SYLLABICS SWE 0x14F7 swe # CANADIAN SYLLABICS WEST-CREE SWE 0x14F8 swi # CANADIAN SYLLABICS SWI 0x14F9 swi # CANADIAN SYLLABICS WEST-CREE SWI 0x14FA swii # CANADIAN SYLLABICS SWII 0x14FB swii # CANADIAN SYLLABICS WEST-CREE SWII 0x14FC swo # CANADIAN SYLLABICS SWO 0x14FD swo # CANADIAN SYLLABICS WEST-CREE SWO 0x14FE swoo # CANADIAN SYLLABICS SWOO 0x14FF swoo # CANADIAN SYLLABICS WEST-CREE SWOO # # Characters 0x1500 to 0x15FF # 0x1500 swa # CANADIAN SYLLABICS SWA 0x1501 swa # CANADIAN SYLLABICS WEST-CREE SWA 0x1502 swaa # CANADIAN SYLLABICS SWAA 0x1503 swaa # CANADIAN SYLLABICS WEST-CREE SWAA 0x1504 swaa # CANADIAN SYLLABICS NASKAPI SWAA 0x1505 s # CANADIAN SYLLABICS S 0x1506 s # CANADIAN SYLLABICS ATHAPASCAN S 0x1507 sw # CANADIAN SYLLABICS SW 0x1508 s # CANADIAN SYLLABICS BLACKFOOT S 0x1509 sk # CANADIAN SYLLABICS MOOSE-CREE SK 0x150A skw # CANADIAN SYLLABICS NASKAPI SKW 0x150B sW # CANADIAN SYLLABICS NASKAPI S-W 0x150C spwa # CANADIAN SYLLABICS NASKAPI SPWA 0x150D stwa # CANADIAN SYLLABICS NASKAPI STWA 0x150E skwa # CANADIAN SYLLABICS NASKAPI SKWA 0x150F scwa # CANADIAN SYLLABICS NASKAPI SCWA 0x1510 she # CANADIAN SYLLABICS SHE 0x1511 shi # CANADIAN SYLLABICS SHI 0x1512 shii # CANADIAN SYLLABICS SHII 0x1513 sho # CANADIAN SYLLABICS SHO 0x1514 shoo # CANADIAN SYLLABICS SHOO 0x1515 sha # CANADIAN SYLLABICS SHA 0x1516 shaa # CANADIAN SYLLABICS SHAA 0x1517 shwe # CANADIAN SYLLABICS SHWE 0x1518 shwe # CANADIAN SYLLABICS WEST-CREE SHWE 0x1519 shwi # CANADIAN SYLLABICS SHWI 0x151A shwi # CANADIAN SYLLABICS WEST-CREE SHWI 0x151B shwii # CANADIAN SYLLABICS SHWII 0x151C shwii # CANADIAN SYLLABICS WEST-CREE SHWII 0x151D shwo # CANADIAN SYLLABICS SHWO 0x151E shwo # CANADIAN SYLLABICS WEST-CREE SHWO 0x151F shwoo # CANADIAN SYLLABICS SHWOO 0x1520 shwoo # CANADIAN SYLLABICS WEST-CREE SHWOO 0x1521 shwa # CANADIAN SYLLABICS SHWA 0x1522 shwa # CANADIAN SYLLABICS WEST-CREE SHWA 0x1523 shwaa # CANADIAN SYLLABICS SHWAA 0x1524 shwaa # CANADIAN SYLLABICS WEST-CREE SHWAA 0x1525 sh # CANADIAN SYLLABICS SH 0x1526 ye # CANADIAN SYLLABICS YE 0x1527 yaai # CANADIAN SYLLABICS YAAI 0x1528 yi # CANADIAN SYLLABICS YI 0x1529 yii # CANADIAN SYLLABICS YII 0x152A yo # CANADIAN SYLLABICS YO 0x152B yoo # CANADIAN SYLLABICS YOO 0x152C yoo # CANADIAN SYLLABICS Y-CREE YOO 0x152D ya # CANADIAN SYLLABICS YA 0x152E yaa # CANADIAN SYLLABICS YAA 0x152F ywe # CANADIAN SYLLABICS YWE 0x1530 ywe # CANADIAN SYLLABICS WEST-CREE YWE 0x1531 ywi # CANADIAN SYLLABICS YWI 0x1532 ywi # CANADIAN SYLLABICS WEST-CREE YWI 0x1533 ywii # CANADIAN SYLLABICS YWII 0x1534 ywii # CANADIAN SYLLABICS WEST-CREE YWII 0x1535 ywo # CANADIAN SYLLABICS YWO 0x1536 ywo # CANADIAN SYLLABICS WEST-CREE YWO 0x1537 ywoo # CANADIAN SYLLABICS YWOO 0x1538 ywoo # CANADIAN SYLLABICS WEST-CREE YWOO 0x1539 ywa # CANADIAN SYLLABICS YWA 0x153A ywa # CANADIAN SYLLABICS WEST-CREE YWA 0x153B ywaa # CANADIAN SYLLABICS YWAA 0x153C ywaa # CANADIAN SYLLABICS WEST-CREE YWAA 0x153D ywaa # CANADIAN SYLLABICS NASKAPI YWAA 0x153E y # CANADIAN SYLLABICS Y 0x153F y # CANADIAN SYLLABICS BIBLE-CREE Y 0x1540 y # CANADIAN SYLLABICS WEST-CREE Y 0x1541 yi # CANADIAN SYLLABICS SAYISI YI 0x1542 re # CANADIAN SYLLABICS RE 0x1543 re # CANADIAN SYLLABICS R-CREE RE 0x1544 le # CANADIAN SYLLABICS WEST-CREE LE 0x1545 raai # CANADIAN SYLLABICS RAAI 0x1546 ri # CANADIAN SYLLABICS RI 0x1547 rii # CANADIAN SYLLABICS RII 0x1548 ro # CANADIAN SYLLABICS RO 0x1549 roo # CANADIAN SYLLABICS ROO 0x154A lo # CANADIAN SYLLABICS WEST-CREE LO 0x154B ra # CANADIAN SYLLABICS RA 0x154C raa # CANADIAN SYLLABICS RAA 0x154D la # CANADIAN SYLLABICS WEST-CREE LA 0x154E rwaa # CANADIAN SYLLABICS RWAA 0x154F rwaa # CANADIAN SYLLABICS WEST-CREE RWAA 0x1550 r # CANADIAN SYLLABICS R 0x1551 r # CANADIAN SYLLABICS WEST-CREE R 0x1552 r # CANADIAN SYLLABICS MEDIAL R 0x1553 fe # CANADIAN SYLLABICS FE 0x1554 faai # CANADIAN SYLLABICS FAAI 0x1555 fi # CANADIAN SYLLABICS FI 0x1556 fii # CANADIAN SYLLABICS FII 0x1557 fo # CANADIAN SYLLABICS FO 0x1558 foo # CANADIAN SYLLABICS FOO 0x1559 fa # CANADIAN SYLLABICS FA 0x155A faa # CANADIAN SYLLABICS FAA 0x155B fwaa # CANADIAN SYLLABICS FWAA 0x155C fwaa # CANADIAN SYLLABICS WEST-CREE FWAA 0x155D f # CANADIAN SYLLABICS F 0x155E the # CANADIAN SYLLABICS THE 0x155F the # CANADIAN SYLLABICS N-CREE THE 0x1560 thi # CANADIAN SYLLABICS THI 0x1561 thi # CANADIAN SYLLABICS N-CREE THI 0x1562 thii # CANADIAN SYLLABICS THII 0x1563 thii # CANADIAN SYLLABICS N-CREE THII 0x1564 tho # CANADIAN SYLLABICS THO 0x1565 thoo # CANADIAN SYLLABICS THOO 0x1566 tha # CANADIAN SYLLABICS THA 0x1567 thaa # CANADIAN SYLLABICS THAA 0x1568 thwaa # CANADIAN SYLLABICS THWAA 0x1569 thwaa # CANADIAN SYLLABICS WEST-CREE THWAA 0x156A th # CANADIAN SYLLABICS TH 0x156B tthe # CANADIAN SYLLABICS TTHE 0x156C tthi # CANADIAN SYLLABICS TTHI 0x156D ttho # CANADIAN SYLLABICS TTHO 0x156E ttha # CANADIAN SYLLABICS TTHA 0x156F tth # CANADIAN SYLLABICS TTH 0x1570 tye # CANADIAN SYLLABICS TYE 0x1571 tyi # CANADIAN SYLLABICS TYI 0x1572 tyo # CANADIAN SYLLABICS TYO 0x1573 tya # CANADIAN SYLLABICS TYA 0x1574 he # CANADIAN SYLLABICS NUNAVIK HE 0x1575 hi # CANADIAN SYLLABICS NUNAVIK HI 0x1576 hii # CANADIAN SYLLABICS NUNAVIK HII 0x1577 ho # CANADIAN SYLLABICS NUNAVIK HO 0x1578 hoo # CANADIAN SYLLABICS NUNAVIK HOO 0x1579 ha # CANADIAN SYLLABICS NUNAVIK HA 0x157A haa # CANADIAN SYLLABICS NUNAVIK HAA 0x157B h # CANADIAN SYLLABICS NUNAVIK H 0x157C h # CANADIAN SYLLABICS NUNAVUT H 0x157D hk # CANADIAN SYLLABICS HK 0x157E qaai # CANADIAN SYLLABICS QAAI 0x157F qi # CANADIAN SYLLABICS QI 0x1580 qii # CANADIAN SYLLABICS QII 0x1581 qo # CANADIAN SYLLABICS QO 0x1582 qoo # CANADIAN SYLLABICS QOO 0x1583 qa # CANADIAN SYLLABICS QA 0x1584 qaa # CANADIAN SYLLABICS QAA 0x1585 q # CANADIAN SYLLABICS Q 0x1586 tlhe # CANADIAN SYLLABICS TLHE 0x1587 tlhi # CANADIAN SYLLABICS TLHI 0x1588 tlho # CANADIAN SYLLABICS TLHO 0x1589 tlha # CANADIAN SYLLABICS TLHA 0x158A re # CANADIAN SYLLABICS WEST-CREE RE 0x158B ri # CANADIAN SYLLABICS WEST-CREE RI 0x158C ro # CANADIAN SYLLABICS WEST-CREE RO 0x158D ra # CANADIAN SYLLABICS WEST-CREE RA 0x158E ngaai # CANADIAN SYLLABICS NGAAI 0x158F ngi # CANADIAN SYLLABICS NGI 0x1590 ngii # CANADIAN SYLLABICS NGII 0x1591 ngo # CANADIAN SYLLABICS NGO 0x1592 ngoo # CANADIAN SYLLABICS NGOO 0x1593 nga # CANADIAN SYLLABICS NGA 0x1594 ngaa # CANADIAN SYLLABICS NGAA 0x1595 ng # CANADIAN SYLLABICS NG 0x1596 nng # CANADIAN SYLLABICS NNG 0x1597 she # CANADIAN SYLLABICS SAYISI SHE 0x1598 shi # CANADIAN SYLLABICS SAYISI SHI 0x1599 sho # CANADIAN SYLLABICS SAYISI SHO 0x159A sha # CANADIAN SYLLABICS SAYISI SHA 0x159B the # CANADIAN SYLLABICS WOODS-CREE THE 0x159C thi # CANADIAN SYLLABICS WOODS-CREE THI 0x159D tho # CANADIAN SYLLABICS WOODS-CREE THO 0x159E tha # CANADIAN SYLLABICS WOODS-CREE THA 0x159F th # CANADIAN SYLLABICS WOODS-CREE TH 0x15A0 lhi # CANADIAN SYLLABICS LHI 0x15A1 lhii # CANADIAN SYLLABICS LHII 0x15A2 lho # CANADIAN SYLLABICS LHO 0x15A3 lhoo # CANADIAN SYLLABICS LHOO 0x15A4 lha # CANADIAN SYLLABICS LHA 0x15A5 lhaa # CANADIAN SYLLABICS LHAA 0x15A6 lh # CANADIAN SYLLABICS LH 0x15A7 the # CANADIAN SYLLABICS TH-CREE THE 0x15A8 thi # CANADIAN SYLLABICS TH-CREE THI 0x15A9 thii # CANADIAN SYLLABICS TH-CREE THII 0x15AA tho # CANADIAN SYLLABICS TH-CREE THO 0x15AB thoo # CANADIAN SYLLABICS TH-CREE THOO 0x15AC tha # CANADIAN SYLLABICS TH-CREE THA 0x15AD thaa # CANADIAN SYLLABICS TH-CREE THAA 0x15AE th # CANADIAN SYLLABICS TH-CREE TH 0x15AF b # CANADIAN SYLLABICS AIVILIK B 0x15B0 e # CANADIAN SYLLABICS BLACKFOOT E 0x15B1 i # CANADIAN SYLLABICS BLACKFOOT I 0x15B2 o # CANADIAN SYLLABICS BLACKFOOT O 0x15B3 a # CANADIAN SYLLABICS BLACKFOOT A 0x15B4 we # CANADIAN SYLLABICS BLACKFOOT WE 0x15B5 wi # CANADIAN SYLLABICS BLACKFOOT WI 0x15B6 wo # CANADIAN SYLLABICS BLACKFOOT WO 0x15B7 wa # CANADIAN SYLLABICS BLACKFOOT WA 0x15B8 ne # CANADIAN SYLLABICS BLACKFOOT NE 0x15B9 ni # CANADIAN SYLLABICS BLACKFOOT NI 0x15BA no # CANADIAN SYLLABICS BLACKFOOT NO 0x15BB na # CANADIAN SYLLABICS BLACKFOOT NA 0x15BC ke # CANADIAN SYLLABICS BLACKFOOT KE 0x15BD ki # CANADIAN SYLLABICS BLACKFOOT KI 0x15BE ko # CANADIAN SYLLABICS BLACKFOOT KO 0x15BF ka # CANADIAN SYLLABICS BLACKFOOT KA 0x15C0 he # CANADIAN SYLLABICS SAYISI HE 0x15C1 hi # CANADIAN SYLLABICS SAYISI HI 0x15C2 ho # CANADIAN SYLLABICS SAYISI HO 0x15C3 ha # CANADIAN SYLLABICS SAYISI HA 0x15C4 ghu # CANADIAN SYLLABICS CARRIER GHU 0x15C5 gho # CANADIAN SYLLABICS CARRIER GHO 0x15C6 ghe # CANADIAN SYLLABICS CARRIER GHE 0x15C7 ghee # CANADIAN SYLLABICS CARRIER GHEE 0x15C8 ghi # CANADIAN SYLLABICS CARRIER GHI 0x15C9 gha # CANADIAN SYLLABICS CARRIER GHA 0x15CA ru # CANADIAN SYLLABICS CARRIER RU 0x15CB ro # CANADIAN SYLLABICS CARRIER RO 0x15CC re # CANADIAN SYLLABICS CARRIER RE 0x15CD ree # CANADIAN SYLLABICS CARRIER REE 0x15CE ri # CANADIAN SYLLABICS CARRIER RI 0x15CF ra # CANADIAN SYLLABICS CARRIER RA 0x15D0 wu # CANADIAN SYLLABICS CARRIER WU 0x15D1 wo # CANADIAN SYLLABICS CARRIER WO 0x15D2 we # CANADIAN SYLLABICS CARRIER WE 0x15D3 wee # CANADIAN SYLLABICS CARRIER WEE 0x15D4 wi # CANADIAN SYLLABICS CARRIER WI 0x15D5 wa # CANADIAN SYLLABICS CARRIER WA 0x15D6 hwu # CANADIAN SYLLABICS CARRIER HWU 0x15D7 hwo # CANADIAN SYLLABICS CARRIER HWO 0x15D8 hwe # CANADIAN SYLLABICS CARRIER HWE 0x15D9 hwee # CANADIAN SYLLABICS CARRIER HWEE 0x15DA hwi # CANADIAN SYLLABICS CARRIER HWI 0x15DB hwa # CANADIAN SYLLABICS CARRIER HWA 0x15DC thu # CANADIAN SYLLABICS CARRIER THU 0x15DD tho # CANADIAN SYLLABICS CARRIER THO 0x15DE the # CANADIAN SYLLABICS CARRIER THE 0x15DF thee # CANADIAN SYLLABICS CARRIER THEE 0x15E0 thi # CANADIAN SYLLABICS CARRIER THI 0x15E1 tha # CANADIAN SYLLABICS CARRIER THA 0x15E2 ttu # CANADIAN SYLLABICS CARRIER TTU 0x15E3 tto # CANADIAN SYLLABICS CARRIER TTO 0x15E4 tte # CANADIAN SYLLABICS CARRIER TTE 0x15E5 ttee # CANADIAN SYLLABICS CARRIER TTEE 0x15E6 tti # CANADIAN SYLLABICS CARRIER TTI 0x15E7 tta # CANADIAN SYLLABICS CARRIER TTA 0x15E8 pu # CANADIAN SYLLABICS CARRIER PU 0x15E9 po # CANADIAN SYLLABICS CARRIER PO 0x15EA pe # CANADIAN SYLLABICS CARRIER PE 0x15EB pee # CANADIAN SYLLABICS CARRIER PEE 0x15EC pi # CANADIAN SYLLABICS CARRIER PI 0x15ED pa # CANADIAN SYLLABICS CARRIER PA 0x15EE p # CANADIAN SYLLABICS CARRIER P 0x15EF gu # CANADIAN SYLLABICS CARRIER GU 0x15F0 go # CANADIAN SYLLABICS CARRIER GO 0x15F1 ge # CANADIAN SYLLABICS CARRIER GE 0x15F2 gee # CANADIAN SYLLABICS CARRIER GEE 0x15F3 gi # CANADIAN SYLLABICS CARRIER GI 0x15F4 ga # CANADIAN SYLLABICS CARRIER GA 0x15F5 khu # CANADIAN SYLLABICS CARRIER KHU 0x15F6 kho # CANADIAN SYLLABICS CARRIER KHO 0x15F7 khe # CANADIAN SYLLABICS CARRIER KHE 0x15F8 khee # CANADIAN SYLLABICS CARRIER KHEE 0x15F9 khi # CANADIAN SYLLABICS CARRIER KHI 0x15FA kha # CANADIAN SYLLABICS CARRIER KHA 0x15FB kku # CANADIAN SYLLABICS CARRIER KKU 0x15FC kko # CANADIAN SYLLABICS CARRIER KKO 0x15FD kke # CANADIAN SYLLABICS CARRIER KKE 0x15FE kkee # CANADIAN SYLLABICS CARRIER KKEE 0x15FF kki # CANADIAN SYLLABICS CARRIER KKI # # Characters 0x1600 to 0x16FF # 0x1600 kka # CANADIAN SYLLABICS CARRIER KKA 0x1601 kk # CANADIAN SYLLABICS CARRIER KK 0x1602 nu # CANADIAN SYLLABICS CARRIER NU 0x1603 no # CANADIAN SYLLABICS CARRIER NO 0x1604 ne # CANADIAN SYLLABICS CARRIER NE 0x1605 nee # CANADIAN SYLLABICS CARRIER NEE 0x1606 ni # CANADIAN SYLLABICS CARRIER NI 0x1607 na # CANADIAN SYLLABICS CARRIER NA 0x1608 mu # CANADIAN SYLLABICS CARRIER MU 0x1609 mo # CANADIAN SYLLABICS CARRIER MO 0x160A me # CANADIAN SYLLABICS CARRIER ME 0x160B mee # CANADIAN SYLLABICS CARRIER MEE 0x160C mi # CANADIAN SYLLABICS CARRIER MI 0x160D ma # CANADIAN SYLLABICS CARRIER MA 0x160E yu # CANADIAN SYLLABICS CARRIER YU 0x160F yo # CANADIAN SYLLABICS CARRIER YO 0x1610 ye # CANADIAN SYLLABICS CARRIER YE 0x1611 yee # CANADIAN SYLLABICS CARRIER YEE 0x1612 yi # CANADIAN SYLLABICS CARRIER YI 0x1613 ya # CANADIAN SYLLABICS CARRIER YA 0x1614 ju # CANADIAN SYLLABICS CARRIER JU 0x1615 ju # CANADIAN SYLLABICS SAYISI JU 0x1616 jo # CANADIAN SYLLABICS CARRIER JO 0x1617 je # CANADIAN SYLLABICS CARRIER JE 0x1618 jee # CANADIAN SYLLABICS CARRIER JEE 0x1619 ji # CANADIAN SYLLABICS CARRIER JI 0x161A ji # CANADIAN SYLLABICS SAYISI JI 0x161B ja # CANADIAN SYLLABICS CARRIER JA 0x161C jju # CANADIAN SYLLABICS CARRIER JJU 0x161D jjo # CANADIAN SYLLABICS CARRIER JJO 0x161E jje # CANADIAN SYLLABICS CARRIER JJE 0x161F jjee # CANADIAN SYLLABICS CARRIER JJEE 0x1620 jji # CANADIAN SYLLABICS CARRIER JJI 0x1621 jja # CANADIAN SYLLABICS CARRIER JJA 0x1622 lu # CANADIAN SYLLABICS CARRIER LU 0x1623 lo # CANADIAN SYLLABICS CARRIER LO 0x1624 le # CANADIAN SYLLABICS CARRIER LE 0x1625 lee # CANADIAN SYLLABICS CARRIER LEE 0x1626 li # CANADIAN SYLLABICS CARRIER LI 0x1627 la # CANADIAN SYLLABICS CARRIER LA 0x1628 dlu # CANADIAN SYLLABICS CARRIER DLU 0x1629 dlo # CANADIAN SYLLABICS CARRIER DLO 0x162A dle # CANADIAN SYLLABICS CARRIER DLE 0x162B dlee # CANADIAN SYLLABICS CARRIER DLEE 0x162C dli # CANADIAN SYLLABICS CARRIER DLI 0x162D dla # CANADIAN SYLLABICS CARRIER DLA 0x162E lhu # CANADIAN SYLLABICS CARRIER LHU 0x162F lho # CANADIAN SYLLABICS CARRIER LHO 0x1630 lhe # CANADIAN SYLLABICS CARRIER LHE 0x1631 lhee # CANADIAN SYLLABICS CARRIER LHEE 0x1632 lhi # CANADIAN SYLLABICS CARRIER LHI 0x1633 lha # CANADIAN SYLLABICS CARRIER LHA 0x1634 tlhu # CANADIAN SYLLABICS CARRIER TLHU 0x1635 tlho # CANADIAN SYLLABICS CARRIER TLHO 0x1636 tlhe # CANADIAN SYLLABICS CARRIER TLHE 0x1637 tlhee # CANADIAN SYLLABICS CARRIER TLHEE 0x1638 tlhi # CANADIAN SYLLABICS CARRIER TLHI 0x1639 tlha # CANADIAN SYLLABICS CARRIER TLHA 0x163A tlu # CANADIAN SYLLABICS CARRIER TLU 0x163B tlo # CANADIAN SYLLABICS CARRIER TLO 0x163C tle # CANADIAN SYLLABICS CARRIER TLE 0x163D tlee # CANADIAN SYLLABICS CARRIER TLEE 0x163E tli # CANADIAN SYLLABICS CARRIER TLI 0x163F tla # CANADIAN SYLLABICS CARRIER TLA 0x1640 zu # CANADIAN SYLLABICS CARRIER ZU 0x1641 zo # CANADIAN SYLLABICS CARRIER ZO 0x1642 ze # CANADIAN SYLLABICS CARRIER ZE 0x1643 zee # CANADIAN SYLLABICS CARRIER ZEE 0x1644 zi # CANADIAN SYLLABICS CARRIER ZI 0x1645 za # CANADIAN SYLLABICS CARRIER ZA 0x1646 z # CANADIAN SYLLABICS CARRIER Z 0x1647 z # CANADIAN SYLLABICS CARRIER INITIAL Z 0x1648 dzu # CANADIAN SYLLABICS CARRIER DZU 0x1649 dzo # CANADIAN SYLLABICS CARRIER DZO 0x164A dze # CANADIAN SYLLABICS CARRIER DZE 0x164B dzee # CANADIAN SYLLABICS CARRIER DZEE 0x164C dzi # CANADIAN SYLLABICS CARRIER DZI 0x164D dza # CANADIAN SYLLABICS CARRIER DZA 0x164E su # CANADIAN SYLLABICS CARRIER SU 0x164F so # CANADIAN SYLLABICS CARRIER SO 0x1650 se # CANADIAN SYLLABICS CARRIER SE 0x1651 see # CANADIAN SYLLABICS CARRIER SEE 0x1652 si # CANADIAN SYLLABICS CARRIER SI 0x1653 sa # CANADIAN SYLLABICS CARRIER SA 0x1654 shu # CANADIAN SYLLABICS CARRIER SHU 0x1655 sho # CANADIAN SYLLABICS CARRIER SHO 0x1656 she # CANADIAN SYLLABICS CARRIER SHE 0x1657 shee # CANADIAN SYLLABICS CARRIER SHEE 0x1658 shi # CANADIAN SYLLABICS CARRIER SHI 0x1659 sha # CANADIAN SYLLABICS CARRIER SHA 0x165A sh # CANADIAN SYLLABICS CARRIER SH 0x165B tsu # CANADIAN SYLLABICS CARRIER TSU 0x165C tso # CANADIAN SYLLABICS CARRIER TSO 0x165D tse # CANADIAN SYLLABICS CARRIER TSE 0x165E tsee # CANADIAN SYLLABICS CARRIER TSEE 0x165F tsi # CANADIAN SYLLABICS CARRIER TSI 0x1660 tsa # CANADIAN SYLLABICS CARRIER TSA 0x1661 chu # CANADIAN SYLLABICS CARRIER CHU 0x1662 cho # CANADIAN SYLLABICS CARRIER CHO 0x1663 che # CANADIAN SYLLABICS CARRIER CHE 0x1664 chee # CANADIAN SYLLABICS CARRIER CHEE 0x1665 chi # CANADIAN SYLLABICS CARRIER CHI 0x1666 cha # CANADIAN SYLLABICS CARRIER CHA 0x1667 ttsu # CANADIAN SYLLABICS CARRIER TTSU 0x1668 ttso # CANADIAN SYLLABICS CARRIER TTSO 0x1669 ttse # CANADIAN SYLLABICS CARRIER TTSE 0x166A ttsee # CANADIAN SYLLABICS CARRIER TTSEE 0x166B ttsi # CANADIAN SYLLABICS CARRIER TTSI 0x166C ttsa # CANADIAN SYLLABICS CARRIER TTSA 0x166D X # CANADIAN SYLLABICS CHI SIGN 0x166E . # CANADIAN SYLLABICS FULL STOP 0x166F qai # CANADIAN SYLLABICS QAI 0x1670 ngai # CANADIAN SYLLABICS NGAI 0x1671 nngi # CANADIAN SYLLABICS NNGI 0x1672 nngii # CANADIAN SYLLABICS NNGII 0x1673 nngo # CANADIAN SYLLABICS NNGO 0x1674 nngoo # CANADIAN SYLLABICS NNGOO 0x1675 nnga # CANADIAN SYLLABICS NNGA 0x1676 nngaa # CANADIAN SYLLABICS NNGAA 0x1680 " " # OGHAM SPACE MARK 0x1681 b # OGHAM LETTER BEITH 0x1682 l # OGHAM LETTER LUIS 0x1683 f # OGHAM LETTER FEARN 0x1684 s # OGHAM LETTER SAIL 0x1685 n # OGHAM LETTER NION 0x1686 h # OGHAM LETTER UATH 0x1687 d # OGHAM LETTER DAIR 0x1688 t # OGHAM LETTER TINNE 0x1689 c # OGHAM LETTER COLL 0x168A q # OGHAM LETTER CEIRT 0x168B m # OGHAM LETTER MUIN 0x168C g # OGHAM LETTER GORT 0x168D ng # OGHAM LETTER NGEADAL 0x168E z # OGHAM LETTER STRAIF 0x168F r # OGHAM LETTER RUIS 0x1690 a # OGHAM LETTER AILM 0x1691 o # OGHAM LETTER ONN 0x1692 u # OGHAM LETTER UR 0x1693 e # OGHAM LETTER EADHADH 0x1694 i # OGHAM LETTER IODHADH 0x1695 ch # OGHAM LETTER EABHADH 0x1696 th # OGHAM LETTER OR 0x1697 ph # OGHAM LETTER UILLEANN 0x1698 p # OGHAM LETTER IFIN 0x1699 x # OGHAM LETTER EAMHANCHOLL 0x169A p # OGHAM LETTER PEITH 0x169B < # OGHAM FEATHER MARK 0x169C > # OGHAM REVERSED FEATHER MARK 0x16A0 f # RUNIC LETTER FEHU FEOH FE F 0x16A1 v # RUNIC LETTER V 0x16A2 u # RUNIC LETTER URUZ UR U 0x16A3 yr # RUNIC LETTER YR 0x16A4 y # RUNIC LETTER Y 0x16A5 w # RUNIC LETTER W 0x16A6 th # RUNIC LETTER THURISAZ THURS THORN 0x16A7 th # RUNIC LETTER ETH 0x16A8 a # RUNIC LETTER ANSUZ A 0x16A9 o # RUNIC LETTER OS O 0x16AA ac # RUNIC LETTER AC A 0x16AB ae # RUNIC LETTER AESC 0x16AC o # RUNIC LETTER LONG-BRANCH-OSS O 0x16AD o # RUNIC LETTER SHORT-TWIG-OSS O 0x16AE o # RUNIC LETTER O 0x16AF oe # RUNIC LETTER OE 0x16B0 on # RUNIC LETTER ON 0x16B1 r # RUNIC LETTER RAIDO RAD REID R 0x16B2 k # RUNIC LETTER KAUNA 0x16B3 c # RUNIC LETTER CEN 0x16B4 k # RUNIC LETTER KAUN K 0x16B5 g # RUNIC LETTER G 0x16B6 ng # RUNIC LETTER ENG 0x16B7 g # RUNIC LETTER GEBO GYFU G 0x16B8 g # RUNIC LETTER GAR 0x16B9 w # RUNIC LETTER WUNJO WYNN W 0x16BA h # RUNIC LETTER HAGLAZ H 0x16BB h # RUNIC LETTER HAEGL H 0x16BC h # RUNIC LETTER LONG-BRANCH-HAGALL H 0x16BD h # RUNIC LETTER SHORT-TWIG-HAGALL H 0x16BE n # RUNIC LETTER NAUDIZ NYD NAUD N 0x16BF n # RUNIC LETTER SHORT-TWIG-NAUD N 0x16C0 n # RUNIC LETTER DOTTED-N 0x16C1 i # RUNIC LETTER ISAZ IS ISS I 0x16C2 e # RUNIC LETTER E 0x16C3 j # RUNIC LETTER JERAN J 0x16C4 g # RUNIC LETTER GER 0x16C5 ae # RUNIC LETTER LONG-BRANCH-AR AE 0x16C6 a # RUNIC LETTER SHORT-TWIG-AR A 0x16C7 eo # RUNIC LETTER IWAZ EOH 0x16C8 p # RUNIC LETTER PERTHO PEORTH P 0x16C9 z # RUNIC LETTER ALGIZ EOLHX 0x16CA s # RUNIC LETTER SOWILO S 0x16CB s # RUNIC LETTER SIGEL LONG-BRANCH-SOL S 0x16CC s # RUNIC LETTER SHORT-TWIG-SOL S 0x16CD c # RUNIC LETTER C 0x16CE z # RUNIC LETTER Z 0x16CF t # RUNIC LETTER TIWAZ TIR TYR T 0x16D0 t # RUNIC LETTER SHORT-TWIG-TYR T 0x16D1 d # RUNIC LETTER D 0x16D2 b # RUNIC LETTER BERKANAN BEORC BJARKAN B 0x16D3 b # RUNIC LETTER SHORT-TWIG-BJARKAN B 0x16D4 p # RUNIC LETTER DOTTED-P 0x16D5 p # RUNIC LETTER OPEN-P 0x16D6 e # RUNIC LETTER EHWAZ EH E 0x16D7 m # RUNIC LETTER MANNAZ MAN M 0x16D8 m # RUNIC LETTER LONG-BRANCH-MADR M 0x16D9 m # RUNIC LETTER SHORT-TWIG-MADR M 0x16DA l # RUNIC LETTER LAUKAZ LAGU LOGR L 0x16DB l # RUNIC LETTER DOTTED-L 0x16DC ng # RUNIC LETTER INGWAZ 0x16DD ng # RUNIC LETTER ING 0x16DE d # RUNIC LETTER DAGAZ DAEG D 0x16DF o # RUNIC LETTER OTHALAN ETHEL O 0x16E0 ear # RUNIC LETTER EAR 0x16E1 ior # RUNIC LETTER IOR 0x16E2 qu # RUNIC LETTER CWEORTH 0x16E3 qu # RUNIC LETTER CALC 0x16E4 qu # RUNIC LETTER CEALC 0x16E5 s # RUNIC LETTER STAN 0x16E6 yr # RUNIC LETTER LONG-BRANCH-YR 0x16E7 yr # RUNIC LETTER SHORT-TWIG-YR 0x16E8 yr # RUNIC LETTER ICELANDIC-YR 0x16E9 q # RUNIC LETTER Q 0x16EA x # RUNIC LETTER X 0x16EB . # RUNIC SINGLE PUNCTUATION 0x16EC : # RUNIC MULTIPLE PUNCTUATION 0x16ED + # RUNIC CROSS PUNCTUATION 0x16EE 17 # RUNIC ARLAUG SYMBOL 0x16EF 18 # RUNIC TVIMADUR SYMBOL 0x16F0 19 # RUNIC BELGTHOR SYMBOL # # Characters 0x1700 to 0x17FF # 0x1780 k # KHMER LETTER KA 0x1781 kh # KHMER LETTER KHA 0x1782 g # KHMER LETTER KO 0x1783 gh # KHMER LETTER KHO 0x1784 ng # KHMER LETTER NGO 0x1785 c # KHMER LETTER CA 0x1786 ch # KHMER LETTER CHA 0x1787 j # KHMER LETTER CO 0x1788 jh # KHMER LETTER CHO 0x1789 ny # KHMER LETTER NYO 0x178A t # KHMER LETTER DA 0x178B tth # KHMER LETTER TTHA 0x178C d # KHMER LETTER DO 0x178D ddh # KHMER LETTER TTHO 0x178E nn # KHMER LETTER NNO 0x178F t # KHMER LETTER TA 0x1790 th # KHMER LETTER THA 0x1791 d # KHMER LETTER TO 0x1792 dh # KHMER LETTER THO 0x1793 n # KHMER LETTER NO 0x1794 p # KHMER LETTER BA 0x1795 ph # KHMER LETTER PHA 0x1796 b # KHMER LETTER PO 0x1797 bh # KHMER LETTER PHO 0x1798 m # KHMER LETTER MO 0x1799 y # KHMER LETTER YO 0x179A r # KHMER LETTER RO 0x179B l # KHMER LETTER LO 0x179C v # KHMER LETTER VO 0x179D sh # KHMER LETTER SHA 0x179E ss # KHMER LETTER SSO 0x179F s # KHMER LETTER SA 0x17A0 h # KHMER LETTER HA 0x17A1 l # KHMER LETTER LA 0x17A2 q # KHMER LETTER QA 0x17A3 a # KHMER INDEPENDENT VOWEL QAQ 0x17A4 aa # KHMER INDEPENDENT VOWEL QAA 0x17A5 i # KHMER INDEPENDENT VOWEL QI 0x17A6 ii # KHMER INDEPENDENT VOWEL QII 0x17A7 u # KHMER INDEPENDENT VOWEL QU 0x17A8 uk # KHMER INDEPENDENT VOWEL QUK 0x17A9 uu # KHMER INDEPENDENT VOWEL QUU 0x17AA uuv # KHMER INDEPENDENT VOWEL QUUV 0x17AB ry # KHMER INDEPENDENT VOWEL RY 0x17AC ryy # KHMER INDEPENDENT VOWEL RYY 0x17AD ly # KHMER INDEPENDENT VOWEL LY 0x17AE lyy # KHMER INDEPENDENT VOWEL LYY 0x17AF e # KHMER INDEPENDENT VOWEL QE 0x17B0 ai # KHMER INDEPENDENT VOWEL QAI 0x17B1 oo # KHMER INDEPENDENT VOWEL QOO TYPE ONE 0x17B2 oo # KHMER INDEPENDENT VOWEL QOO TYPE TWO 0x17B3 au # KHMER INDEPENDENT VOWEL QAU 0x17B4 a # KHMER VOWEL INHERENT AQ 0x17B5 aa # KHMER VOWEL INHERENT AA 0x17B6 aa # KHMER VOWEL SIGN AA 0x17B7 i # KHMER VOWEL SIGN I 0x17B8 ii # KHMER VOWEL SIGN II 0x17B9 y # KHMER VOWEL SIGN Y 0x17BA yy # KHMER VOWEL SIGN YY 0x17BB u # KHMER VOWEL SIGN U 0x17BC uu # KHMER VOWEL SIGN UU 0x17BD ua # KHMER VOWEL SIGN UA 0x17BE oe # KHMER VOWEL SIGN OE 0x17BF ya # KHMER VOWEL SIGN YA 0x17C0 ie # KHMER VOWEL SIGN IE 0x17C1 e # KHMER VOWEL SIGN E 0x17C2 ae # KHMER VOWEL SIGN AE 0x17C3 ai # KHMER VOWEL SIGN AI 0x17C4 oo # KHMER VOWEL SIGN OO 0x17C5 au # KHMER VOWEL SIGN AU 0x17C6 M # KHMER SIGN NIKAHIT 0x17C7 H # KHMER SIGN REAHMUK 0x17C8 a` # KHMER SIGN YUUKALEAPINTU 0x17CC r # KHMER SIGN ROBAT 0x17CE ! # KHMER SIGN KAKABAT 0x17D4 . # KHMER SIGN KHAN 0x17D5 // # KHMER SIGN BARIYOOSAN 0x17D6 : # KHMER SIGN CAMNUC PII KUUH 0x17D7 + # KHMER SIGN LEK TOO 0x17D8 ++ # KHMER SIGN BEYYAL 0x17D9 * # KHMER SIGN PHNAEK MUAN 0x17DA /// # KHMER SIGN KOOMUUT 0x17DB KR # KHMER CURRENCY SYMBOL RIEL 0x17DC "'" # KHMER SIGN AVAKRAHASANYA 0x17E0 0 # KHMER DIGIT ZERO 0x17E1 1 # KHMER DIGIT ONE 0x17E2 2 # KHMER DIGIT TWO 0x17E3 3 # KHMER DIGIT THREE 0x17E4 4 # KHMER DIGIT FOUR 0x17E5 5 # KHMER DIGIT FIVE 0x17E6 6 # KHMER DIGIT SIX 0x17E7 7 # KHMER DIGIT SEVEN 0x17E8 8 # KHMER DIGIT EIGHT 0x17E9 9 # KHMER DIGIT NINE # # Characters 0x1800 to 0x18FF # 0x1800 @ # MONGOLIAN BIRGA 0x1801 ... # MONGOLIAN ELLIPSIS 0x1802 , # MONGOLIAN COMMA 0x1803 . # MONGOLIAN FULL STOP 0x1804 : # MONGOLIAN COLON 0x1805 // # MONGOLIAN FOUR DOTS 0x1807 - # MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER 0x1808 , # MONGOLIAN MANCHU COMMA 0x1809 . # MONGOLIAN MANCHU FULL STOP 0x1810 0 # MONGOLIAN DIGIT ZERO 0x1811 1 # MONGOLIAN DIGIT ONE 0x1812 2 # MONGOLIAN DIGIT TWO 0x1813 3 # MONGOLIAN DIGIT THREE 0x1814 4 # MONGOLIAN DIGIT FOUR 0x1815 5 # MONGOLIAN DIGIT FIVE 0x1816 6 # MONGOLIAN DIGIT SIX 0x1817 7 # MONGOLIAN DIGIT SEVEN 0x1818 8 # MONGOLIAN DIGIT EIGHT 0x1819 9 # MONGOLIAN DIGIT NINE 0x1820 a # MONGOLIAN LETTER A 0x1821 e # MONGOLIAN LETTER E 0x1822 i # MONGOLIAN LETTER I 0x1823 o # MONGOLIAN LETTER O 0x1824 u # MONGOLIAN LETTER U 0x1825 O # MONGOLIAN LETTER OE 0x1826 U # MONGOLIAN LETTER UE 0x1827 ee # MONGOLIAN LETTER EE 0x1828 n # MONGOLIAN LETTER NA 0x1829 ng # MONGOLIAN LETTER ANG 0x182A b # MONGOLIAN LETTER BA 0x182B p # MONGOLIAN LETTER PA 0x182C q # MONGOLIAN LETTER QA 0x182D g # MONGOLIAN LETTER GA 0x182E m # MONGOLIAN LETTER MA 0x182F l # MONGOLIAN LETTER LA 0x1830 s # MONGOLIAN LETTER SA 0x1831 sh # MONGOLIAN LETTER SHA 0x1832 t # MONGOLIAN LETTER TA 0x1833 d # MONGOLIAN LETTER DA 0x1834 ch # MONGOLIAN LETTER CHA 0x1835 j # MONGOLIAN LETTER JA 0x1836 y # MONGOLIAN LETTER YA 0x1837 r # MONGOLIAN LETTER RA 0x1838 w # MONGOLIAN LETTER WA 0x1839 f # MONGOLIAN LETTER FA 0x183A k # MONGOLIAN LETTER KA 0x183B kha # MONGOLIAN LETTER KHA 0x183C ts # MONGOLIAN LETTER TSA 0x183D z # MONGOLIAN LETTER ZA 0x183E h # MONGOLIAN LETTER HAA 0x183F zr # MONGOLIAN LETTER ZRA 0x1840 lh # MONGOLIAN LETTER LHA 0x1841 zh # MONGOLIAN LETTER ZHI 0x1842 ch # MONGOLIAN LETTER CHI 0x1843 - # MONGOLIAN LETTER TODO LONG VOWEL SIGN 0x1844 e # MONGOLIAN LETTER TODO E 0x1845 i # MONGOLIAN LETTER TODO I 0x1846 o # MONGOLIAN LETTER TODO O 0x1847 u # MONGOLIAN LETTER TODO U 0x1848 O # MONGOLIAN LETTER TODO OE 0x1849 U # MONGOLIAN LETTER TODO UE 0x184A ng # MONGOLIAN LETTER TODO ANG 0x184B b # MONGOLIAN LETTER TODO BA 0x184C p # MONGOLIAN LETTER TODO PA 0x184D q # MONGOLIAN LETTER TODO QA 0x184E g # MONGOLIAN LETTER TODO GA 0x184F m # MONGOLIAN LETTER TODO MA 0x1850 t # MONGOLIAN LETTER TODO TA 0x1851 d # MONGOLIAN LETTER TODO DA 0x1852 ch # MONGOLIAN LETTER TODO CHA 0x1853 j # MONGOLIAN LETTER TODO JA 0x1854 ts # MONGOLIAN LETTER TODO TSA 0x1855 y # MONGOLIAN LETTER TODO YA 0x1856 w # MONGOLIAN LETTER TODO WA 0x1857 k # MONGOLIAN LETTER TODO KA 0x1858 g # MONGOLIAN LETTER TODO GAA 0x1859 h # MONGOLIAN LETTER TODO HAA 0x185A jy # MONGOLIAN LETTER TODO JIA 0x185B ny # MONGOLIAN LETTER TODO NIA 0x185C dz # MONGOLIAN LETTER TODO DZA 0x185D e # MONGOLIAN LETTER SIBE E 0x185E i # MONGOLIAN LETTER SIBE I 0x185F iy # MONGOLIAN LETTER SIBE IY 0x1860 U # MONGOLIAN LETTER SIBE UE 0x1861 u # MONGOLIAN LETTER SIBE U 0x1862 ng # MONGOLIAN LETTER SIBE ANG 0x1863 k # MONGOLIAN LETTER SIBE KA 0x1864 g # MONGOLIAN LETTER SIBE GA 0x1865 h # MONGOLIAN LETTER SIBE HA 0x1866 p # MONGOLIAN LETTER SIBE PA 0x1867 sh # MONGOLIAN LETTER SIBE SHA 0x1868 t # MONGOLIAN LETTER SIBE TA 0x1869 d # MONGOLIAN LETTER SIBE DA 0x186A j # MONGOLIAN LETTER SIBE JA 0x186B f # MONGOLIAN LETTER SIBE FA 0x186C g # MONGOLIAN LETTER SIBE GAA 0x186D h # MONGOLIAN LETTER SIBE HAA 0x186E ts # MONGOLIAN LETTER SIBE TSA 0x186F z # MONGOLIAN LETTER SIBE ZA 0x1870 r # MONGOLIAN LETTER SIBE RAA 0x1871 ch # MONGOLIAN LETTER SIBE CHA 0x1872 zh # MONGOLIAN LETTER SIBE ZHA 0x1873 i # MONGOLIAN LETTER MANCHU I 0x1874 k # MONGOLIAN LETTER MANCHU KA 0x1875 r # MONGOLIAN LETTER MANCHU RA 0x1876 f # MONGOLIAN LETTER MANCHU FA 0x1877 zh # MONGOLIAN LETTER MANCHU ZHA 0x1881 H # MONGOLIAN LETTER ALI GALI VISARGA ONE 0x1882 X # MONGOLIAN LETTER ALI GALI DAMARU 0x1883 W # MONGOLIAN LETTER ALI GALI UBADAMA 0x1884 M # MONGOLIAN LETTER ALI GALI INVERTED UBADAMA 0x1885 3 # MONGOLIAN LETTER ALI GALI BALUDA 0x1886 333 # MONGOLIAN LETTER ALI GALI THREE BALUDA 0x1887 a # MONGOLIAN LETTER ALI GALI A 0x1888 i # MONGOLIAN LETTER ALI GALI I 0x1889 k # MONGOLIAN LETTER ALI GALI KA 0x188A ng # MONGOLIAN LETTER ALI GALI NGA 0x188B c # MONGOLIAN LETTER ALI GALI CA 0x188C tt # MONGOLIAN LETTER ALI GALI TTA 0x188D tth # MONGOLIAN LETTER ALI GALI TTHA 0x188E dd # MONGOLIAN LETTER ALI GALI DDA 0x188F nn # MONGOLIAN LETTER ALI GALI NNA 0x1890 t # MONGOLIAN LETTER ALI GALI TA 0x1891 d # MONGOLIAN LETTER ALI GALI DA 0x1892 p # MONGOLIAN LETTER ALI GALI PA 0x1893 ph # MONGOLIAN LETTER ALI GALI PHA 0x1894 ss # MONGOLIAN LETTER ALI GALI SSA 0x1895 zh # MONGOLIAN LETTER ALI GALI ZHA 0x1896 z # MONGOLIAN LETTER ALI GALI ZA 0x1897 a # MONGOLIAN LETTER ALI GALI AH 0x1898 t # MONGOLIAN LETTER TODO ALI GALI TA 0x1899 zh # MONGOLIAN LETTER TODO ALI GALI ZHA 0x189A gh # MONGOLIAN LETTER MANCHU ALI GALI GHA 0x189B ng # MONGOLIAN LETTER MANCHU ALI GALI NGA 0x189C c # MONGOLIAN LETTER MANCHU ALI GALI CA 0x189D jh # MONGOLIAN LETTER MANCHU ALI GALI JHA 0x189E tta # MONGOLIAN LETTER MANCHU ALI GALI TTA 0x189F ddh # MONGOLIAN LETTER MANCHU ALI GALI DDHA 0x18A0 t # MONGOLIAN LETTER MANCHU ALI GALI TA 0x18A1 dh # MONGOLIAN LETTER MANCHU ALI GALI DHA 0x18A2 ss # MONGOLIAN LETTER MANCHU ALI GALI SSA 0x18A3 cy # MONGOLIAN LETTER MANCHU ALI GALI CYA 0x18A4 zh # MONGOLIAN LETTER MANCHU ALI GALI ZHA 0x18A5 z # MONGOLIAN LETTER MANCHU ALI GALI ZA 0x18A6 u # MONGOLIAN LETTER ALI GALI HALF U 0x18A7 y # MONGOLIAN LETTER ALI GALI HALF YA 0x18A8 bh # MONGOLIAN LETTER MANCHU ALI GALI BHA 0x18A9 "'" # MONGOLIAN LETTER ALI GALI DAGALGA # # Characters 0x1900 to 0x19FF # 0x1945 ? # LIMBU QUESTION MARK # # Characters 0x1A00 to 0x1AFF # # # Characters 0x1B00 to 0x1BFF # # # Characters 0x1C00 to 0x1CFF # # # Characters 0x1D00 to 0x1DFF # # # Characters 0x1E00 to 0x1EFF # 0x1E00 A # LATIN CAPITAL LETTER A WITH RING BELOW 0x1E01 a # LATIN SMALL LETTER A WITH RING BELOW 0x1E02 B # LATIN CAPITAL LETTER B WITH DOT ABOVE 0x1E03 b # LATIN SMALL LETTER B WITH DOT ABOVE 0x1E04 B # LATIN CAPITAL LETTER B WITH DOT BELOW 0x1E05 b # LATIN SMALL LETTER B WITH DOT BELOW 0x1E06 B # LATIN CAPITAL LETTER B WITH LINE BELOW 0x1E07 b # LATIN SMALL LETTER B WITH LINE BELOW 0x1E08 C # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE 0x1E09 c # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE 0x1E0A D # LATIN CAPITAL LETTER D WITH DOT ABOVE 0x1E0B d # LATIN SMALL LETTER D WITH DOT ABOVE 0x1E0C D # LATIN CAPITAL LETTER D WITH DOT BELOW 0x1E0D d # LATIN SMALL LETTER D WITH DOT BELOW 0x1E0E D # LATIN CAPITAL LETTER D WITH LINE BELOW 0x1E0F d # LATIN SMALL LETTER D WITH LINE BELOW 0x1E10 D # LATIN CAPITAL LETTER D WITH CEDILLA 0x1E11 d # LATIN SMALL LETTER D WITH CEDILLA 0x1E12 D # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW 0x1E13 d # LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW 0x1E14 E # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE 0x1E15 e # LATIN SMALL LETTER E WITH MACRON AND GRAVE 0x1E16 E # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE 0x1E17 e # LATIN SMALL LETTER E WITH MACRON AND ACUTE 0x1E18 E # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW 0x1E19 e # LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW 0x1E1A E # LATIN CAPITAL LETTER E WITH TILDE BELOW 0x1E1B e # LATIN SMALL LETTER E WITH TILDE BELOW 0x1E1C E # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE 0x1E1D e # LATIN SMALL LETTER E WITH CEDILLA AND BREVE 0x1E1E F # LATIN CAPITAL LETTER F WITH DOT ABOVE 0x1E1F f # LATIN SMALL LETTER F WITH DOT ABOVE 0x1E20 G # LATIN CAPITAL LETTER G WITH MACRON 0x1E21 g # LATIN SMALL LETTER G WITH MACRON 0x1E22 H # LATIN CAPITAL LETTER H WITH DOT ABOVE 0x1E23 h # LATIN SMALL LETTER H WITH DOT ABOVE 0x1E24 H # LATIN CAPITAL LETTER H WITH DOT BELOW 0x1E25 h # LATIN SMALL LETTER H WITH DOT BELOW 0x1E26 H # LATIN CAPITAL LETTER H WITH DIAERESIS 0x1E27 h # LATIN SMALL LETTER H WITH DIAERESIS 0x1E28 H # LATIN CAPITAL LETTER H WITH CEDILLA 0x1E29 h # LATIN SMALL LETTER H WITH CEDILLA 0x1E2A H # LATIN CAPITAL LETTER H WITH BREVE BELOW 0x1E2B h # LATIN SMALL LETTER H WITH BREVE BELOW 0x1E2C I # LATIN CAPITAL LETTER I WITH TILDE BELOW 0x1E2D i # LATIN SMALL LETTER I WITH TILDE BELOW 0x1E2E I # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE 0x1E2F i # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE 0x1E30 K # LATIN CAPITAL LETTER K WITH ACUTE 0x1E31 k # LATIN SMALL LETTER K WITH ACUTE 0x1E32 K # LATIN CAPITAL LETTER K WITH DOT BELOW 0x1E33 k # LATIN SMALL LETTER K WITH DOT BELOW 0x1E34 K # LATIN CAPITAL LETTER K WITH LINE BELOW 0x1E35 k # LATIN SMALL LETTER K WITH LINE BELOW 0x1E36 L # LATIN CAPITAL LETTER L WITH DOT BELOW 0x1E37 l # LATIN SMALL LETTER L WITH DOT BELOW 0x1E38 L # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON 0x1E39 l # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON 0x1E3A L # LATIN CAPITAL LETTER L WITH LINE BELOW 0x1E3B l # LATIN SMALL LETTER L WITH LINE BELOW 0x1E3C L # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW 0x1E3D l # LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW 0x1E3E M # LATIN CAPITAL LETTER M WITH ACUTE 0x1E3F m # LATIN SMALL LETTER M WITH ACUTE 0x1E40 M # LATIN CAPITAL LETTER M WITH DOT ABOVE 0x1E41 m # LATIN SMALL LETTER M WITH DOT ABOVE 0x1E42 M # LATIN CAPITAL LETTER M WITH DOT BELOW 0x1E43 m # LATIN SMALL LETTER M WITH DOT BELOW 0x1E44 N # LATIN CAPITAL LETTER N WITH DOT ABOVE 0x1E45 n # LATIN SMALL LETTER N WITH DOT ABOVE 0x1E46 N # LATIN CAPITAL LETTER N WITH DOT BELOW 0x1E47 n # LATIN SMALL LETTER N WITH DOT BELOW 0x1E48 N # LATIN CAPITAL LETTER N WITH LINE BELOW 0x1E49 n # LATIN SMALL LETTER N WITH LINE BELOW 0x1E4A N # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW 0x1E4B n # LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW 0x1E4C O # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE 0x1E4D o # LATIN SMALL LETTER O WITH TILDE AND ACUTE 0x1E4E O # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS 0x1E4F o # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS 0x1E50 O # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE 0x1E51 o # LATIN SMALL LETTER O WITH MACRON AND GRAVE 0x1E52 O # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE 0x1E53 o # LATIN SMALL LETTER O WITH MACRON AND ACUTE 0x1E54 P # LATIN CAPITAL LETTER P WITH ACUTE 0x1E55 p # LATIN SMALL LETTER P WITH ACUTE 0x1E56 P # LATIN CAPITAL LETTER P WITH DOT ABOVE 0x1E57 p # LATIN SMALL LETTER P WITH DOT ABOVE 0x1E58 R # LATIN CAPITAL LETTER R WITH DOT ABOVE 0x1E59 r # LATIN SMALL LETTER R WITH DOT ABOVE 0x1E5A R # LATIN CAPITAL LETTER R WITH DOT BELOW 0x1E5B r # LATIN SMALL LETTER R WITH DOT BELOW 0x1E5C R # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON 0x1E5D r # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON 0x1E5E R # LATIN CAPITAL LETTER R WITH LINE BELOW 0x1E5F r # LATIN SMALL LETTER R WITH LINE BELOW 0x1E60 S # LATIN CAPITAL LETTER S WITH DOT ABOVE 0x1E61 s # LATIN SMALL LETTER S WITH DOT ABOVE 0x1E62 S # LATIN CAPITAL LETTER S WITH DOT BELOW 0x1E63 s # LATIN SMALL LETTER S WITH DOT BELOW 0x1E64 S # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE 0x1E65 s # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE 0x1E66 S # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE 0x1E67 s # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE 0x1E68 S # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE 0x1E69 s # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE 0x1E6A T # LATIN CAPITAL LETTER T WITH DOT ABOVE 0x1E6B t # LATIN SMALL LETTER T WITH DOT ABOVE 0x1E6C T # LATIN CAPITAL LETTER T WITH DOT BELOW 0x1E6D t # LATIN SMALL LETTER T WITH DOT BELOW 0x1E6E T # LATIN CAPITAL LETTER T WITH LINE BELOW 0x1E6F t # LATIN SMALL LETTER T WITH LINE BELOW 0x1E70 T # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW 0x1E71 t # LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW 0x1E72 U # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW 0x1E73 u # LATIN SMALL LETTER U WITH DIAERESIS BELOW 0x1E74 U # LATIN CAPITAL LETTER U WITH TILDE BELOW 0x1E75 u # LATIN SMALL LETTER U WITH TILDE BELOW 0x1E76 U # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW 0x1E77 u # LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW 0x1E78 U # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE 0x1E79 u # LATIN SMALL LETTER U WITH TILDE AND ACUTE 0x1E7A U # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS 0x1E7B u # LATIN SMALL LETTER U WITH MACRON AND DIAERESIS 0x1E7C V # LATIN CAPITAL LETTER V WITH TILDE 0x1E7D v # LATIN SMALL LETTER V WITH TILDE 0x1E7E V # LATIN CAPITAL LETTER V WITH DOT BELOW 0x1E7F v # LATIN SMALL LETTER V WITH DOT BELOW 0x1E80 W # LATIN CAPITAL LETTER W WITH GRAVE 0x1E81 w # LATIN SMALL LETTER W WITH GRAVE 0x1E82 W # LATIN CAPITAL LETTER W WITH ACUTE 0x1E83 w # LATIN SMALL LETTER W WITH ACUTE 0x1E84 W # LATIN CAPITAL LETTER W WITH DIAERESIS 0x1E85 w # LATIN SMALL LETTER W WITH DIAERESIS 0x1E86 W # LATIN CAPITAL LETTER W WITH DOT ABOVE 0x1E87 w # LATIN SMALL LETTER W WITH DOT ABOVE 0x1E88 W # LATIN CAPITAL LETTER W WITH DOT BELOW 0x1E89 w # LATIN SMALL LETTER W WITH DOT BELOW 0x1E8A X # LATIN CAPITAL LETTER X WITH DOT ABOVE 0x1E8B x # LATIN SMALL LETTER X WITH DOT ABOVE 0x1E8C X # LATIN CAPITAL LETTER X WITH DIAERESIS 0x1E8D x # LATIN SMALL LETTER X WITH DIAERESIS 0x1E8E Y # LATIN CAPITAL LETTER Y WITH DOT ABOVE 0x1E8F y # LATIN SMALL LETTER Y WITH DOT ABOVE 0x1E90 Z # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX 0x1E91 z # LATIN SMALL LETTER Z WITH CIRCUMFLEX 0x1E92 Z # LATIN CAPITAL LETTER Z WITH DOT BELOW 0x1E93 z # LATIN SMALL LETTER Z WITH DOT BELOW 0x1E94 Z # LATIN CAPITAL LETTER Z WITH LINE BELOW 0x1E95 z # LATIN SMALL LETTER Z WITH LINE BELOW 0x1E96 h # LATIN SMALL LETTER H WITH LINE BELOW 0x1E97 t # LATIN SMALL LETTER T WITH DIAERESIS 0x1E98 w # LATIN SMALL LETTER W WITH RING ABOVE 0x1E99 y # LATIN SMALL LETTER Y WITH RING ABOVE 0x1E9A a # LATIN SMALL LETTER A WITH RIGHT HALF RING 0x1E9B s # LATIN SMALL LETTER LONG S WITH DOT ABOVE 0x1E9C s # LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE 0x1E9D s # LATIN SMALL LETTER LONG S WITH HIGH STROKE 0x1E9E Ss # LATIN CAPITAL LETTER SHARP S 0x1E9F d # LATIN SMALL LETTER DELTA 0x1EA0 A # LATIN CAPITAL LETTER A WITH DOT BELOW 0x1EA1 a # LATIN SMALL LETTER A WITH DOT BELOW 0x1EA2 A # LATIN CAPITAL LETTER A WITH HOOK ABOVE 0x1EA3 a # LATIN SMALL LETTER A WITH HOOK ABOVE 0x1EA4 A # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE 0x1EA5 a # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE 0x1EA6 A # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE 0x1EA7 a # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE 0x1EA8 A # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE 0x1EA9 a # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE 0x1EAA A # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE 0x1EAB a # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE 0x1EAC A # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW 0x1EAD a # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW 0x1EAE A # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE 0x1EAF a # LATIN SMALL LETTER A WITH BREVE AND ACUTE 0x1EB0 A # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE 0x1EB1 a # LATIN SMALL LETTER A WITH BREVE AND GRAVE 0x1EB2 A # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE 0x1EB3 a # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE 0x1EB4 A # LATIN CAPITAL LETTER A WITH BREVE AND TILDE 0x1EB5 a # LATIN SMALL LETTER A WITH BREVE AND TILDE 0x1EB6 A # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW 0x1EB7 a # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW 0x1EB8 E # LATIN CAPITAL LETTER E WITH DOT BELOW 0x1EB9 e # LATIN SMALL LETTER E WITH DOT BELOW 0x1EBA E # LATIN CAPITAL LETTER E WITH HOOK ABOVE 0x1EBB e # LATIN SMALL LETTER E WITH HOOK ABOVE 0x1EBC E # LATIN CAPITAL LETTER E WITH TILDE 0x1EBD e # LATIN SMALL LETTER E WITH TILDE 0x1EBE E # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE 0x1EBF e # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE 0x1EC0 E # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE 0x1EC1 e # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE 0x1EC2 E # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE 0x1EC3 e # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE 0x1EC4 E # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE 0x1EC5 e # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE 0x1EC6 E # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW 0x1EC7 e # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW 0x1EC8 I # LATIN CAPITAL LETTER I WITH HOOK ABOVE 0x1EC9 i # LATIN SMALL LETTER I WITH HOOK ABOVE 0x1ECA I # LATIN CAPITAL LETTER I WITH DOT BELOW 0x1ECB i # LATIN SMALL LETTER I WITH DOT BELOW 0x1ECC O # LATIN CAPITAL LETTER O WITH DOT BELOW 0x1ECD o # LATIN SMALL LETTER O WITH DOT BELOW 0x1ECE O # LATIN CAPITAL LETTER O WITH HOOK ABOVE 0x1ECF o # LATIN SMALL LETTER O WITH HOOK ABOVE 0x1ED0 O # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE 0x1ED1 o # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE 0x1ED2 O # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE 0x1ED3 o # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE 0x1ED4 O # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE 0x1ED5 o # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE 0x1ED6 O # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE 0x1ED7 o # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE 0x1ED8 O # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW 0x1ED9 o # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW 0x1EDA O # LATIN CAPITAL LETTER O WITH HORN AND ACUTE 0x1EDB o # LATIN SMALL LETTER O WITH HORN AND ACUTE 0x1EDC O # LATIN CAPITAL LETTER O WITH HORN AND GRAVE 0x1EDD o # LATIN SMALL LETTER O WITH HORN AND GRAVE 0x1EDE O # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE 0x1EDF o # LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE 0x1EE0 O # LATIN CAPITAL LETTER O WITH HORN AND TILDE 0x1EE1 o # LATIN SMALL LETTER O WITH HORN AND TILDE 0x1EE2 O # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW 0x1EE3 o # LATIN SMALL LETTER O WITH HORN AND DOT BELOW 0x1EE4 U # LATIN CAPITAL LETTER U WITH DOT BELOW 0x1EE5 u # LATIN SMALL LETTER U WITH DOT BELOW 0x1EE6 U # LATIN CAPITAL LETTER U WITH HOOK ABOVE 0x1EE7 u # LATIN SMALL LETTER U WITH HOOK ABOVE 0x1EE8 U # LATIN CAPITAL LETTER U WITH HORN AND ACUTE 0x1EE9 u # LATIN SMALL LETTER U WITH HORN AND ACUTE 0x1EEA U # LATIN CAPITAL LETTER U WITH HORN AND GRAVE 0x1EEB u # LATIN SMALL LETTER U WITH HORN AND GRAVE 0x1EEC U # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE 0x1EED u # LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE 0x1EEE U # LATIN CAPITAL LETTER U WITH HORN AND TILDE 0x1EEF u # LATIN SMALL LETTER U WITH HORN AND TILDE 0x1EF0 U # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW 0x1EF1 u # LATIN SMALL LETTER U WITH HORN AND DOT BELOW 0x1EF2 Y # LATIN CAPITAL LETTER Y WITH GRAVE 0x1EF3 y # LATIN SMALL LETTER Y WITH GRAVE 0x1EF4 Y # LATIN CAPITAL LETTER Y WITH DOT BELOW 0x1EF5 y # LATIN SMALL LETTER Y WITH DOT BELOW 0x1EF6 Y # LATIN CAPITAL LETTER Y WITH HOOK ABOVE 0x1EF7 y # LATIN SMALL LETTER Y WITH HOOK ABOVE 0x1EF8 Y # LATIN CAPITAL LETTER Y WITH TILDE 0x1EF9 y # LATIN SMALL LETTER Y WITH TILDE # # Characters 0x1F00 to 0x1FFF # 0x1F00 a # GREEK SMALL LETTER ALPHA WITH PSILI 0x1F01 a # GREEK SMALL LETTER ALPHA WITH DASIA 0x1F02 a # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA 0x1F03 a # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA 0x1F04 a # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA 0x1F05 a # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA 0x1F06 a # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI 0x1F07 a # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI 0x1F08 A # GREEK CAPITAL LETTER ALPHA WITH PSILI 0x1F09 A # GREEK CAPITAL LETTER ALPHA WITH DASIA 0x1F0A A # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA 0x1F0B A # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA 0x1F0C A # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA 0x1F0D A # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA 0x1F0E A # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI 0x1F0F A # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI 0x1F10 e # GREEK SMALL LETTER EPSILON WITH PSILI 0x1F11 e # GREEK SMALL LETTER EPSILON WITH DASIA 0x1F12 e # GREEK SMALL LETTER EPSILON WITH PSILI AND VARIA 0x1F13 e # GREEK SMALL LETTER EPSILON WITH DASIA AND VARIA 0x1F14 e # GREEK SMALL LETTER EPSILON WITH PSILI AND OXIA 0x1F15 e # GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 0x1F18 E # GREEK CAPITAL LETTER EPSILON WITH PSILI 0x1F19 E # GREEK CAPITAL LETTER EPSILON WITH DASIA 0x1F1A E # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA 0x1F1B E # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA 0x1F1C E # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA 0x1F1D E # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 0x1F20 e # GREEK SMALL LETTER ETA WITH PSILI 0x1F21 e # GREEK SMALL LETTER ETA WITH DASIA 0x1F22 e # GREEK SMALL LETTER ETA WITH PSILI AND VARIA 0x1F23 e # GREEK SMALL LETTER ETA WITH DASIA AND VARIA 0x1F24 e # GREEK SMALL LETTER ETA WITH PSILI AND OXIA 0x1F25 e # GREEK SMALL LETTER ETA WITH DASIA AND OXIA 0x1F26 e # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI 0x1F27 e # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI 0x1F28 E # GREEK CAPITAL LETTER ETA WITH PSILI 0x1F29 E # GREEK CAPITAL LETTER ETA WITH DASIA 0x1F2A E # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA 0x1F2B E # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA 0x1F2C E # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA 0x1F2D E # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA 0x1F2E E # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI 0x1F2F E # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI 0x1F30 i # GREEK SMALL LETTER IOTA WITH PSILI 0x1F31 i # GREEK SMALL LETTER IOTA WITH DASIA 0x1F32 i # GREEK SMALL LETTER IOTA WITH PSILI AND VARIA 0x1F33 i # GREEK SMALL LETTER IOTA WITH DASIA AND VARIA 0x1F34 i # GREEK SMALL LETTER IOTA WITH PSILI AND OXIA 0x1F35 i # GREEK SMALL LETTER IOTA WITH DASIA AND OXIA 0x1F36 i # GREEK SMALL LETTER IOTA WITH PSILI AND PERISPOMENI 0x1F37 i # GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI 0x1F38 I # GREEK CAPITAL LETTER IOTA WITH PSILI 0x1F39 I # GREEK CAPITAL LETTER IOTA WITH DASIA 0x1F3A I # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA 0x1F3B I # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA 0x1F3C I # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA 0x1F3D I # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA 0x1F3E I # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI 0x1F3F I # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI 0x1F40 o # GREEK SMALL LETTER OMICRON WITH PSILI 0x1F41 o # GREEK SMALL LETTER OMICRON WITH DASIA 0x1F42 o # GREEK SMALL LETTER OMICRON WITH PSILI AND VARIA 0x1F43 o # GREEK SMALL LETTER OMICRON WITH DASIA AND VARIA 0x1F44 o # GREEK SMALL LETTER OMICRON WITH PSILI AND OXIA 0x1F45 o # GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA 0x1F48 O # GREEK CAPITAL LETTER OMICRON WITH PSILI 0x1F49 O # GREEK CAPITAL LETTER OMICRON WITH DASIA 0x1F4A O # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA 0x1F4B O # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA 0x1F4C O # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA 0x1F4D O # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA 0x1F50 u # GREEK SMALL LETTER UPSILON WITH PSILI 0x1F51 u # GREEK SMALL LETTER UPSILON WITH DASIA 0x1F52 u # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA 0x1F53 u # GREEK SMALL LETTER UPSILON WITH DASIA AND VARIA 0x1F54 u # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA 0x1F55 u # GREEK SMALL LETTER UPSILON WITH DASIA AND OXIA 0x1F56 u # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI 0x1F57 u # GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI 0x1F59 U # GREEK CAPITAL LETTER UPSILON WITH DASIA 0x1F5B U # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA 0x1F5D U # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA 0x1F5F U # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI 0x1F60 o # GREEK SMALL LETTER OMEGA WITH PSILI 0x1F61 o # GREEK SMALL LETTER OMEGA WITH DASIA 0x1F62 o # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA 0x1F63 o # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA 0x1F64 o # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA 0x1F65 o # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA 0x1F66 o # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI 0x1F67 o # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI 0x1F68 O # GREEK CAPITAL LETTER OMEGA WITH PSILI 0x1F69 O # GREEK CAPITAL LETTER OMEGA WITH DASIA 0x1F6A O # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA 0x1F6B O # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA 0x1F6C O # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA 0x1F6D O # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA 0x1F6E O # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI 0x1F6F O # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI 0x1F70 a # GREEK SMALL LETTER ALPHA WITH VARIA 0x1F71 a # GREEK SMALL LETTER ALPHA WITH OXIA 0x1F72 e # GREEK SMALL LETTER EPSILON WITH VARIA 0x1F73 e # GREEK SMALL LETTER EPSILON WITH OXIA 0x1F74 e # GREEK SMALL LETTER ETA WITH VARIA 0x1F75 e # GREEK SMALL LETTER ETA WITH OXIA 0x1F76 i # GREEK SMALL LETTER IOTA WITH VARIA 0x1F77 i # GREEK SMALL LETTER IOTA WITH OXIA 0x1F78 o # GREEK SMALL LETTER OMICRON WITH VARIA 0x1F79 o # GREEK SMALL LETTER OMICRON WITH OXIA 0x1F7A u # GREEK SMALL LETTER UPSILON WITH VARIA 0x1F7B u # GREEK SMALL LETTER UPSILON WITH OXIA 0x1F7C o # GREEK SMALL LETTER OMEGA WITH VARIA 0x1F7D o # GREEK SMALL LETTER OMEGA WITH OXIA 0x1F80 a # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI 0x1F81 a # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI 0x1F82 a # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI 0x1F83 a # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI 0x1F84 a # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI 0x1F85 a # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI 0x1F86 a # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 0x1F87 a # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 0x1F88 A # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 0x1F89 A # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 0x1F8A A # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 0x1F8B A # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 0x1F8C A # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 0x1F8D A # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 0x1F8E A # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 0x1F8F A # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 0x1F90 e # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI 0x1F91 e # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI 0x1F92 e # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI 0x1F93 e # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI 0x1F94 e # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI 0x1F95 e # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI 0x1F96 e # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 0x1F97 e # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 0x1F98 E # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 0x1F99 E # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 0x1F9A E # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 0x1F9B E # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 0x1F9C E # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 0x1F9D E # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 0x1F9E E # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 0x1F9F E # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 0x1FA0 o # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI 0x1FA1 o # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI 0x1FA2 o # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI 0x1FA3 o # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI 0x1FA4 o # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI 0x1FA5 o # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI 0x1FA6 o # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 0x1FA7 o # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 0x1FA8 O # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 0x1FA9 O # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 0x1FAA O # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 0x1FAB O # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 0x1FAC O # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 0x1FAD O # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 0x1FAE O # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 0x1FAF O # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 0x1FB0 a # GREEK SMALL LETTER ALPHA WITH VRACHY 0x1FB1 a # GREEK SMALL LETTER ALPHA WITH MACRON 0x1FB2 a # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI 0x1FB3 a # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI 0x1FB4 a # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 0x1FB6 a # GREEK SMALL LETTER ALPHA WITH PERISPOMENI 0x1FB7 a # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI 0x1FB8 A # GREEK CAPITAL LETTER ALPHA WITH VRACHY 0x1FB9 A # GREEK CAPITAL LETTER ALPHA WITH MACRON 0x1FBA A # GREEK CAPITAL LETTER ALPHA WITH VARIA 0x1FBB A # GREEK CAPITAL LETTER ALPHA WITH OXIA 0x1FBC A # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 0x1FBD "'" # GREEK KORONIS 0x1FBE i # GREEK PROSGEGRAMMENI 0x1FBF "'" # GREEK PSILI 0x1FC0 ~ # GREEK PERISPOMENI 0x1FC1 '"~' # GREEK DIALYTIKA AND PERISPOMENI 0x1FC2 e # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI 0x1FC3 e # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI 0x1FC4 e # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 0x1FC6 e # GREEK SMALL LETTER ETA WITH PERISPOMENI 0x1FC7 e # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI 0x1FC8 E # GREEK CAPITAL LETTER EPSILON WITH VARIA 0x1FC9 E # GREEK CAPITAL LETTER EPSILON WITH OXIA 0x1FCA E # GREEK CAPITAL LETTER ETA WITH VARIA 0x1FCB E # GREEK CAPITAL LETTER ETA WITH OXIA 0x1FCC E # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 0x1FCD "'`" # GREEK PSILI AND VARIA 0x1FCE "''" # GREEK PSILI AND OXIA 0x1FCF "'~" # GREEK PSILI AND PERISPOMENI 0x1FD0 i # GREEK SMALL LETTER IOTA WITH VRACHY 0x1FD1 i # GREEK SMALL LETTER IOTA WITH MACRON 0x1FD2 i # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 0x1FD3 i # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 0x1FD6 i # GREEK SMALL LETTER IOTA WITH PERISPOMENI 0x1FD7 i # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 0x1FD8 I # GREEK CAPITAL LETTER IOTA WITH VRACHY 0x1FD9 I # GREEK CAPITAL LETTER IOTA WITH MACRON 0x1FDA I # GREEK CAPITAL LETTER IOTA WITH VARIA 0x1FDB I # GREEK CAPITAL LETTER IOTA WITH OXIA 0x1FDD "`'" # GREEK DASIA AND VARIA 0x1FDE "`'" # GREEK DASIA AND OXIA 0x1FDF `~ # GREEK DASIA AND PERISPOMENI 0x1FE0 u # GREEK SMALL LETTER UPSILON WITH VRACHY 0x1FE1 u # GREEK SMALL LETTER UPSILON WITH MACRON 0x1FE2 u # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 0x1FE3 u # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 0x1FE4 R # GREEK SMALL LETTER RHO WITH PSILI 0x1FE5 R # GREEK SMALL LETTER RHO WITH DASIA 0x1FE6 u # GREEK SMALL LETTER UPSILON WITH PERISPOMENI 0x1FE7 u # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 0x1FE8 U # GREEK CAPITAL LETTER UPSILON WITH VRACHY 0x1FE9 U # GREEK CAPITAL LETTER UPSILON WITH MACRON 0x1FEA U # GREEK CAPITAL LETTER UPSILON WITH VARIA 0x1FEB U # GREEK CAPITAL LETTER UPSILON WITH OXIA 0x1FEC R # GREEK CAPITAL LETTER RHO WITH DASIA 0x1FED '"`' # GREEK DIALYTIKA AND VARIA 0x1FEE ""'" # GREEK DIALYTIKA AND OXIA 0x1FEF ` # GREEK VARIA 0x1FF2 o # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI 0x1FF3 o # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI 0x1FF4 o # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 0x1FF6 o # GREEK SMALL LETTER OMEGA WITH PERISPOMENI 0x1FF7 o # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI 0x1FF8 O # GREEK CAPITAL LETTER OMICRON WITH VARIA 0x1FF9 O # GREEK CAPITAL LETTER OMICRON WITH OXIA 0x1FFA O # GREEK CAPITAL LETTER OMEGA WITH VARIA 0x1FFB O # GREEK CAPITAL LETTER OMEGA WITH OXIA 0x1FFC O # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 0x1FFD "'" # GREEK OXIA 0x1FFE ` # GREEK DASIA # # Characters 0x2000 to 0x20FF # 0x2000 " " # EN QUAD 0x2001 " " # EM QUAD 0x2002 " " # EN SPACE 0x2003 " " # EM SPACE 0x2004 " " # THREE-PER-EM SPACE 0x2005 " " # FOUR-PER-EM SPACE 0x2006 " " # SIX-PER-EM SPACE 0x2007 " " # FIGURE SPACE 0x2008 " " # PUNCTUATION SPACE 0x2009 " " # THIN SPACE 0x200A " " # HAIR SPACE 0x200B " " # ZERO WIDTH SPACE 0x2010 - # HYPHEN 0x2011 - # NON-BREAKING HYPHEN 0x2012 - # FIGURE DASH 0x2013 - # EN DASH 0x2014 -- # EM DASH 0x2015 -- # HORIZONTAL BAR 0x2016 || # DOUBLE VERTICAL LINE 0x2017 _ # DOUBLE LOW LINE 0x2018 "'" # LEFT SINGLE QUOTATION MARK 0x2019 "'" # RIGHT SINGLE QUOTATION MARK 0x201A , # SINGLE LOW-9 QUOTATION MARK 0x201B "'" # SINGLE HIGH-REVERSED-9 QUOTATION MARK 0x201C '"' # LEFT DOUBLE QUOTATION MARK 0x201D '"' # RIGHT DOUBLE QUOTATION MARK 0x201E ,, # DOUBLE LOW-9 QUOTATION MARK 0x201F '"' # DOUBLE HIGH-REVERSED-9 QUOTATION MARK 0x2020 + # DAGGER 0x2021 ++ # DOUBLE DAGGER 0x2022 * # BULLET 0x2023 *> # TRIANGULAR BULLET 0x2024 . # ONE DOT LEADER 0x2025 .. # TWO DOT LEADER 0x2026 ... # HORIZONTAL ELLIPSIS 0x2027 . # HYPHENATION POINT 0x2028 " " # LINE SEPARATOR 0x2029 " " # PARAGRAPH SEPARATOR 0x202F " " # NARROW NO-BREAK SPACE 0x2030 %0 # PER MILLE SIGN 0x2031 %00 # PER TEN THOUSAND SIGN 0x2032 "'" # PRIME 0x2033 "''" # DOUBLE PRIME 0x2034 "'''" # TRIPLE PRIME 0x2035 ` # REVERSED PRIME 0x2036 `` # REVERSED DOUBLE PRIME 0x2037 ``` # REVERSED TRIPLE PRIME 0x2038 ^ # CARET 0x2039 < # SINGLE LEFT-POINTING ANGLE QUOTATION MARK 0x203A > # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 0x203B * # REFERENCE MARK 0x203C !! # DOUBLE EXCLAMATION MARK 0x203D !? # INTERROBANG 0x203E - # OVERLINE 0x203F _ # UNDERTIE 0x2040 - # CHARACTER TIE 0x2041 ^ # CARET INSERTION POINT 0x2042 *** # ASTERISM 0x2043 -- # HYPHEN BULLET 0x2044 / # FRACTION SLASH 0x2045 -[ # LEFT SQUARE BRACKET WITH QUILL 0x2046 ]- # RIGHT SQUARE BRACKET WITH QUILL 0x2047 [?] # DOUBLE QUESTION MARK 0x2048 ?! # QUESTION EXCLAMATION MARK 0x2049 !? # EXCLAMATION QUESTION MARK 0x204A 7 # TIRONIAN SIGN ET 0x204B PP # REVERSED PILCROW SIGN 0x204C (] # BLACK LEFTWARDS BULLET 0x204D [) # BLACK RIGHTWARDS BULLET 0x2070 0 # SUPERSCRIPT ZERO 0x2074 4 # SUPERSCRIPT FOUR 0x2075 5 # SUPERSCRIPT FIVE 0x2076 6 # SUPERSCRIPT SIX 0x2077 7 # SUPERSCRIPT SEVEN 0x2078 8 # SUPERSCRIPT EIGHT 0x2079 9 # SUPERSCRIPT NINE 0x207A + # SUPERSCRIPT PLUS SIGN 0x207B - # SUPERSCRIPT MINUS 0x207C = # SUPERSCRIPT EQUALS SIGN 0x207D ( # SUPERSCRIPT LEFT PARENTHESIS 0x207E ) # SUPERSCRIPT RIGHT PARENTHESIS 0x207F n # SUPERSCRIPT LATIN SMALL LETTER N 0x2080 0 # SUBSCRIPT ZERO 0x2081 1 # SUBSCRIPT ONE 0x2082 2 # SUBSCRIPT TWO 0x2083 3 # SUBSCRIPT THREE 0x2084 4 # SUBSCRIPT FOUR 0x2085 5 # SUBSCRIPT FIVE 0x2086 6 # SUBSCRIPT SIX 0x2087 7 # SUBSCRIPT SEVEN 0x2088 8 # SUBSCRIPT EIGHT 0x2089 9 # SUBSCRIPT NINE 0x208A + # SUBSCRIPT PLUS SIGN 0x208B - # SUBSCRIPT MINUS 0x208C = # SUBSCRIPT EQUALS SIGN 0x208D ( # SUBSCRIPT LEFT PARENTHESIS 0x208E ) # SUBSCRIPT RIGHT PARENTHESIS 0x20A0 ECU # EURO-CURRENCY SIGN 0x20A1 CL # COLON SIGN 0x20A2 Cr # CRUZEIRO SIGN 0x20A3 FF # FRENCH FRANC SIGN 0x20A4 L # LIRA SIGN 0x20A5 mil # MILL SIGN 0x20A6 N # NAIRA SIGN 0x20A7 Pts # PESETA SIGN 0x20A8 Rs # RUPEE SIGN 0x20A9 W # WON SIGN 0x20AA NS # NEW SHEQEL SIGN 0x20AB D # DONG SIGN 0x20AC EUR # EURO SIGN 0x20AD K # KIP SIGN 0x20AE T # TUGRIK SIGN 0x20AF Dr # DRACHMA SIGN # # Characters 0x2100 to 0x21FF # 0x2122 tm # TRADE MARK SIGN 0x2153 1/3 # VULGAR FRACTION ONE THIRD 0x2154 2/3 # VULGAR FRACTION TWO THIRDS 0x2155 1/5 # VULGAR FRACTION ONE FIFTH 0x2156 2/5 # VULGAR FRACTION TWO FIFTHS 0x2157 3/5 # VULGAR FRACTION THREE FIFTHS 0x2158 4/5 # VULGAR FRACTION FOUR FIFTHS 0x2159 1/6 # VULGAR FRACTION ONE SIXTH 0x215A 5/6 # VULGAR FRACTION FIVE SIXTHS 0x215B 1/8 # VULGAR FRACTION ONE EIGHTH 0x215C 3/8 # VULGAR FRACTION THREE EIGHTHS 0x215D 5/8 # VULGAR FRACTION FIVE EIGHTHS 0x215E 7/8 # VULGAR FRACTION SEVEN EIGHTHS 0x215F 1/ # FRACTION NUMERATOR ONE 0x2160 I # ROMAN NUMERAL ONE 0x2161 II # ROMAN NUMERAL TWO 0x2162 III # ROMAN NUMERAL THREE 0x2163 IV # ROMAN NUMERAL FOUR 0x2164 V # ROMAN NUMERAL FIVE 0x2165 VI # ROMAN NUMERAL SIX 0x2166 VII # ROMAN NUMERAL SEVEN 0x2167 VIII # ROMAN NUMERAL EIGHT 0x2168 IX # ROMAN NUMERAL NINE 0x2169 X # ROMAN NUMERAL TEN 0x216A XI # ROMAN NUMERAL ELEVEN 0x216B XII # ROMAN NUMERAL TWELVE 0x216C L # ROMAN NUMERAL FIFTY 0x216D C # ROMAN NUMERAL ONE HUNDRED 0x216E D # ROMAN NUMERAL FIVE HUNDRED 0x216F M # ROMAN NUMERAL ONE THOUSAND 0x2170 i # SMALL ROMAN NUMERAL ONE 0x2171 ii # SMALL ROMAN NUMERAL TWO 0x2172 iii # SMALL ROMAN NUMERAL THREE 0x2173 iv # SMALL ROMAN NUMERAL FOUR 0x2174 v # SMALL ROMAN NUMERAL FIVE 0x2175 vi # SMALL ROMAN NUMERAL SIX 0x2176 vii # SMALL ROMAN NUMERAL SEVEN 0x2177 viii # SMALL ROMAN NUMERAL EIGHT 0x2178 ix # SMALL ROMAN NUMERAL NINE 0x2179 x # SMALL ROMAN NUMERAL TEN 0x217A xi # SMALL ROMAN NUMERAL ELEVEN 0x217B xii # SMALL ROMAN NUMERAL TWELVE 0x217C l # SMALL ROMAN NUMERAL FIFTY 0x217D c # SMALL ROMAN NUMERAL ONE HUNDRED 0x217E d # SMALL ROMAN NUMERAL FIVE HUNDRED 0x217F m # SMALL ROMAN NUMERAL ONE THOUSAND 0x2180 (D # ROMAN NUMERAL ONE THOUSAND C D 0x2181 D) # ROMAN NUMERAL FIVE THOUSAND 0x2182 ((|)) # ROMAN NUMERAL TEN THOUSAND 0x2183 ) # ROMAN NUMERAL REVERSED ONE HUNDRED 0x2190 - # LEFTWARDS ARROW 0x2191 | # UPWARDS ARROW 0x2192 - # RIGHTWARDS ARROW 0x2193 | # DOWNWARDS ARROW 0x2194 - # LEFT RIGHT ARROW 0x2195 | # UP DOWN ARROW 0x2196 \ # NORTH WEST ARROW 0x2197 / # NORTH EAST ARROW 0x2198 \ # SOUTH EAST ARROW 0x2199 / # SOUTH WEST ARROW 0x219A - # LEFTWARDS ARROW WITH STROKE 0x219B - # RIGHTWARDS ARROW WITH STROKE 0x219C ~ # LEFTWARDS WAVE ARROW 0x219D ~ # RIGHTWARDS WAVE ARROW 0x219E - # LEFTWARDS TWO HEADED ARROW 0x219F | # UPWARDS TWO HEADED ARROW 0x21A0 - # RIGHTWARDS TWO HEADED ARROW 0x21A1 | # DOWNWARDS TWO HEADED ARROW 0x21A2 - # LEFTWARDS ARROW WITH TAIL 0x21A3 - # RIGHTWARDS ARROW WITH TAIL 0x21A4 - # LEFTWARDS ARROW FROM BAR 0x21A5 | # UPWARDS ARROW FROM BAR 0x21A6 - # RIGHTWARDS ARROW FROM BAR 0x21A7 | # DOWNWARDS ARROW FROM BAR 0x21A8 | # UP DOWN ARROW WITH BASE 0x21A9 - # LEFTWARDS ARROW WITH HOOK 0x21AA - # RIGHTWARDS ARROW WITH HOOK 0x21AB - # LEFTWARDS ARROW WITH LOOP 0x21AC - # RIGHTWARDS ARROW WITH LOOP 0x21AD - # LEFT RIGHT WAVE ARROW 0x21AE - # LEFT RIGHT ARROW WITH STROKE 0x21AF | # DOWNWARDS ZIGZAG ARROW 0x21B0 | # UPWARDS ARROW WITH TIP LEFTWARDS 0x21B1 | # UPWARDS ARROW WITH TIP RIGHTWARDS 0x21B2 | # DOWNWARDS ARROW WITH TIP LEFTWARDS 0x21B3 | # DOWNWARDS ARROW WITH TIP RIGHTWARDS 0x21B4 | # RIGHTWARDS ARROW WITH CORNER DOWNWARDS 0x21B5 | # DOWNWARDS ARROW WITH CORNER LEFTWARDS 0x21B6 ^ # ANTICLOCKWISE TOP SEMICIRCLE ARROW 0x21B7 V # CLOCKWISE TOP SEMICIRCLE ARROW 0x21B8 \ # NORTH WEST ARROW TO LONG BAR 0x21B9 = # LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR 0x21BA V # ANTICLOCKWISE OPEN CIRCLE ARROW 0x21BB ^ # CLOCKWISE OPEN CIRCLE ARROW 0x21BC - # LEFTWARDS HARPOON WITH BARB UPWARDS 0x21BD - # LEFTWARDS HARPOON WITH BARB DOWNWARDS 0x21BE | # UPWARDS HARPOON WITH BARB RIGHTWARDS 0x21BF | # UPWARDS HARPOON WITH BARB LEFTWARDS 0x21C0 - # RIGHTWARDS HARPOON WITH BARB UPWARDS 0x21C1 - # RIGHTWARDS HARPOON WITH BARB DOWNWARDS 0x21C2 | # DOWNWARDS HARPOON WITH BARB RIGHTWARDS 0x21C3 | # DOWNWARDS HARPOON WITH BARB LEFTWARDS 0x21C4 = # RIGHTWARDS ARROW OVER LEFTWARDS ARROW 0x21C5 | # UPWARDS ARROW LEFTWARDS OF DOWNWARDS ARROW 0x21C6 = # LEFTWARDS ARROW OVER RIGHTWARDS ARROW 0x21C7 = # LEFTWARDS PAIRED ARROWS 0x21C8 | # UPWARDS PAIRED ARROWS 0x21C9 = # RIGHTWARDS PAIRED ARROWS 0x21CA | # DOWNWARDS PAIRED ARROWS 0x21CB = # LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON 0x21CC = # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON 0x21CD = # LEFTWARDS DOUBLE ARROW WITH STROKE 0x21CE = # LEFT RIGHT DOUBLE ARROW WITH STROKE 0x21CF = # RIGHTWARDS DOUBLE ARROW WITH STROKE 0x21D0 = # LEFTWARDS DOUBLE ARROW 0x21D1 | # UPWARDS DOUBLE ARROW 0x21D2 = # RIGHTWARDS DOUBLE ARROW 0x21D3 | # DOWNWARDS DOUBLE ARROW 0x21D4 = # LEFT RIGHT DOUBLE ARROW 0x21D5 | # UP DOWN DOUBLE ARROW 0x21D6 \ # NORTH WEST DOUBLE ARROW 0x21D7 / # NORTH EAST DOUBLE ARROW 0x21D8 \ # SOUTH EAST DOUBLE ARROW 0x21D9 / # SOUTH WEST DOUBLE ARROW 0x21DA = # LEFTWARDS TRIPLE ARROW 0x21DB = # RIGHTWARDS TRIPLE ARROW 0x21DC ~ # LEFTWARDS SQUIGGLE ARROW 0x21DD ~ # RIGHTWARDS SQUIGGLE ARROW 0x21DE | # UPWARDS ARROW WITH DOUBLE STROKE 0x21DF | # DOWNWARDS ARROW WITH DOUBLE STROKE 0x21E0 - # LEFTWARDS DASHED ARROW 0x21E1 | # UPWARDS DASHED ARROW 0x21E2 - # RIGHTWARDS DASHED ARROW 0x21E3 | # DOWNWARDS DASHED ARROW 0x21E4 - # LEFTWARDS ARROW TO BAR 0x21E5 - # RIGHTWARDS ARROW TO BAR 0x21E6 - # LEFTWARDS WHITE ARROW 0x21E7 | # UPWARDS WHITE ARROW 0x21E8 - # RIGHTWARDS WHITE ARROW 0x21E9 | # DOWNWARDS WHITE ARROW 0x21EA | # UPWARDS WHITE ARROW FROM BAR 0x21EB | # UPWARDS WHITE ARROW ON PEDESTAL 0x21EC | # UPWARDS WHITE ARROW ON PEDESTAL WITH HORIZONTAL BAR 0x21ED | # UPWARDS WHITE ARROW ON PEDESTAL WITH VERTICAL BAR 0x21EE | # UPWARDS WHITE DOUBLE ARROW 0x21EF | # UPWARDS WHITE DOUBLE ARROW ON PEDESTAL 0x21F0 - # RIGHTWARDS WHITE ARROW FROM WALL 0x21F1 \ # NORTH WEST ARROW TO CORNER 0x21F2 \ # SOUTH EAST ARROW TO CORNER 0x21F3 | # UP DOWN WHITE ARROW # # Characters 0x2200 to 0x22FF # 0x225F ? # QUESTIONED EQUAL TO # # Characters 0x2300 to 0x23FF # 0x2370 ? # APL FUNCTIONAL SYMBOL QUAD QUESTION # # Characters 0x2400 to 0x24FF # # # Characters 0x2500 to 0x25FF # 0x2500 - # BOX DRAWINGS LIGHT HORIZONTAL 0x2501 - # BOX DRAWINGS HEAVY HORIZONTAL 0x2502 | # BOX DRAWINGS LIGHT VERTICAL 0x2503 | # BOX DRAWINGS HEAVY VERTICAL 0x2504 - # BOX DRAWINGS LIGHT TRIPLE DASH HORIZONTAL 0x2505 - # BOX DRAWINGS HEAVY TRIPLE DASH HORIZONTAL 0x2506 | # BOX DRAWINGS LIGHT TRIPLE DASH VERTICAL 0x2507 | # BOX DRAWINGS HEAVY TRIPLE DASH VERTICAL 0x2508 - # BOX DRAWINGS LIGHT QUADRUPLE DASH HORIZONTAL 0x2509 - # BOX DRAWINGS HEAVY QUADRUPLE DASH HORIZONTAL 0x250A | # BOX DRAWINGS LIGHT QUADRUPLE DASH VERTICAL 0x250B | # BOX DRAWINGS HEAVY QUADRUPLE DASH VERTICAL 0x250C + # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x250D + # BOX DRAWINGS DOWN LIGHT AND RIGHT HEAVY 0x250E + # BOX DRAWINGS DOWN HEAVY AND RIGHT LIGHT 0x250F + # BOX DRAWINGS HEAVY DOWN AND RIGHT 0x2510 + # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2511 + # BOX DRAWINGS DOWN LIGHT AND LEFT HEAVY 0x2512 + # BOX DRAWINGS DOWN HEAVY AND LEFT LIGHT 0x2513 + # BOX DRAWINGS HEAVY DOWN AND LEFT 0x2514 + # BOX DRAWINGS LIGHT UP AND RIGHT 0x2515 + # BOX DRAWINGS UP LIGHT AND RIGHT HEAVY 0x2516 + # BOX DRAWINGS UP HEAVY AND RIGHT LIGHT 0x2517 + # BOX DRAWINGS HEAVY UP AND RIGHT 0x2518 + # BOX DRAWINGS LIGHT UP AND LEFT 0x2519 + # BOX DRAWINGS UP LIGHT AND LEFT HEAVY 0x251A + # BOX DRAWINGS UP HEAVY AND LEFT LIGHT 0x251B + # BOX DRAWINGS HEAVY UP AND LEFT 0x251C + # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x251D + # BOX DRAWINGS VERTICAL LIGHT AND RIGHT HEAVY 0x251E + # BOX DRAWINGS UP HEAVY AND RIGHT DOWN LIGHT 0x251F + # BOX DRAWINGS DOWN HEAVY AND RIGHT UP LIGHT 0x2520 + # BOX DRAWINGS VERTICAL HEAVY AND RIGHT LIGHT 0x2521 + # BOX DRAWINGS DOWN LIGHT AND RIGHT UP HEAVY 0x2522 + # BOX DRAWINGS UP LIGHT AND RIGHT DOWN HEAVY 0x2523 + # BOX DRAWINGS HEAVY VERTICAL AND RIGHT 0x2524 + # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x2525 + # BOX DRAWINGS VERTICAL LIGHT AND LEFT HEAVY 0x2526 + # BOX DRAWINGS UP HEAVY AND LEFT DOWN LIGHT 0x2527 + # BOX DRAWINGS DOWN HEAVY AND LEFT UP LIGHT 0x2528 + # BOX DRAWINGS VERTICAL HEAVY AND LEFT LIGHT 0x2529 + # BOX DRAWINGS DOWN LIGHT AND LEFT UP HEAVY 0x252A + # BOX DRAWINGS UP LIGHT AND LEFT DOWN HEAVY 0x252B + # BOX DRAWINGS HEAVY VERTICAL AND LEFT 0x252C + # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x252D + # BOX DRAWINGS LEFT HEAVY AND RIGHT DOWN LIGHT 0x252E + # BOX DRAWINGS RIGHT HEAVY AND LEFT DOWN LIGHT 0x252F + # BOX DRAWINGS DOWN LIGHT AND HORIZONTAL HEAVY 0x2530 + # BOX DRAWINGS DOWN HEAVY AND HORIZONTAL LIGHT 0x2531 + # BOX DRAWINGS RIGHT LIGHT AND LEFT DOWN HEAVY 0x2532 + # BOX DRAWINGS LEFT LIGHT AND RIGHT DOWN HEAVY 0x2533 + # BOX DRAWINGS HEAVY DOWN AND HORIZONTAL 0x2534 + # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x2535 + # BOX DRAWINGS LEFT HEAVY AND RIGHT UP LIGHT 0x2536 + # BOX DRAWINGS RIGHT HEAVY AND LEFT UP LIGHT 0x2537 + # BOX DRAWINGS UP LIGHT AND HORIZONTAL HEAVY 0x2538 + # BOX DRAWINGS UP HEAVY AND HORIZONTAL LIGHT 0x2539 + # BOX DRAWINGS RIGHT LIGHT AND LEFT UP HEAVY 0x253A + # BOX DRAWINGS LEFT LIGHT AND RIGHT UP HEAVY 0x253B + # BOX DRAWINGS HEAVY UP AND HORIZONTAL 0x253C + # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x253D + # BOX DRAWINGS LEFT HEAVY AND RIGHT VERTICAL LIGHT 0x253E + # BOX DRAWINGS RIGHT HEAVY AND LEFT VERTICAL LIGHT 0x253F + # BOX DRAWINGS VERTICAL LIGHT AND HORIZONTAL HEAVY 0x2540 + # BOX DRAWINGS UP HEAVY AND DOWN HORIZONTAL LIGHT 0x2541 + # BOX DRAWINGS DOWN HEAVY AND UP HORIZONTAL LIGHT 0x2542 + # BOX DRAWINGS VERTICAL HEAVY AND HORIZONTAL LIGHT 0x2543 + # BOX DRAWINGS LEFT UP HEAVY AND RIGHT DOWN LIGHT 0x2544 + # BOX DRAWINGS RIGHT UP HEAVY AND LEFT DOWN LIGHT 0x2545 + # BOX DRAWINGS LEFT DOWN HEAVY AND RIGHT UP LIGHT 0x2546 + # BOX DRAWINGS RIGHT DOWN HEAVY AND LEFT UP LIGHT 0x2547 + # BOX DRAWINGS DOWN LIGHT AND UP HORIZONTAL HEAVY 0x2548 + # BOX DRAWINGS UP LIGHT AND DOWN HORIZONTAL HEAVY 0x2549 + # BOX DRAWINGS RIGHT LIGHT AND LEFT VERTICAL HEAVY 0x254A + # BOX DRAWINGS LEFT LIGHT AND RIGHT VERTICAL HEAVY 0x254B + # BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL 0x254C - # BOX DRAWINGS LIGHT DOUBLE DASH HORIZONTAL 0x254D - # BOX DRAWINGS HEAVY DOUBLE DASH HORIZONTAL 0x254E | # BOX DRAWINGS LIGHT DOUBLE DASH VERTICAL 0x254F | # BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL 0x2550 - # BOX DRAWINGS DOUBLE HORIZONTAL 0x2551 | # BOX DRAWINGS DOUBLE VERTICAL 0x2552 + # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x2553 + # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x2554 + # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x2555 + # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x2556 + # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x2557 + # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x2558 + # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x2559 + # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x255A + # BOX DRAWINGS DOUBLE UP AND RIGHT 0x255B + # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x255C + # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x255D + # BOX DRAWINGS DOUBLE UP AND LEFT 0x255E + # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x255F + # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x2560 + # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x2561 + # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x2562 + # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x2563 + # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x2564 + # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x2565 + # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x2566 + # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x2567 + # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x2568 + # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x2569 + # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x256A + # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x256B + # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x256C + # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x256D + # BOX DRAWINGS LIGHT ARC DOWN AND RIGHT 0x256E + # BOX DRAWINGS LIGHT ARC DOWN AND LEFT 0x256F + # BOX DRAWINGS LIGHT ARC UP AND LEFT 0x2570 + # BOX DRAWINGS LIGHT ARC UP AND RIGHT 0x2571 / # BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT 0x2572 \ # BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT 0x2573 X # BOX DRAWINGS LIGHT DIAGONAL CROSS 0x2574 - # BOX DRAWINGS LIGHT LEFT 0x2575 | # BOX DRAWINGS LIGHT UP 0x2576 - # BOX DRAWINGS LIGHT RIGHT 0x2577 | # BOX DRAWINGS LIGHT DOWN 0x2578 - # BOX DRAWINGS HEAVY LEFT 0x2579 | # BOX DRAWINGS HEAVY UP 0x257A - # BOX DRAWINGS HEAVY RIGHT 0x257B | # BOX DRAWINGS HEAVY DOWN 0x257C - # BOX DRAWINGS LIGHT LEFT AND HEAVY RIGHT 0x257D | # BOX DRAWINGS LIGHT UP AND HEAVY DOWN 0x257E - # BOX DRAWINGS HEAVY LEFT AND LIGHT RIGHT 0x257F | # BOX DRAWINGS HEAVY UP AND LIGHT DOWN 0x2580 # # UPPER HALF BLOCK 0x2581 # # LOWER ONE EIGHTH BLOCK 0x2582 # # LOWER ONE QUARTER BLOCK 0x2583 # # LOWER THREE EIGHTHS BLOCK 0x2584 # # LOWER HALF BLOCK 0x2585 # # LOWER FIVE EIGHTHS BLOCK 0x2586 # # LOWER THREE QUARTERS BLOCK 0x2587 # # LOWER SEVEN EIGHTHS BLOCK 0x2588 # # FULL BLOCK 0x2589 # # LEFT SEVEN EIGHTHS BLOCK 0x258A # # LEFT THREE QUARTERS BLOCK 0x258B # # LEFT FIVE EIGHTHS BLOCK 0x258C # # LEFT HALF BLOCK 0x258D # # LEFT THREE EIGHTHS BLOCK 0x258E # # LEFT ONE QUARTER BLOCK 0x258F # # LEFT ONE EIGHTH BLOCK 0x2590 # # RIGHT HALF BLOCK 0x2591 # # LIGHT SHADE 0x2592 # # MEDIUM SHADE 0x2593 # # DARK SHADE 0x2594 - # UPPER ONE EIGHTH BLOCK 0x2595 | # RIGHT ONE EIGHTH BLOCK 0x25A0 # # BLACK SQUARE 0x25A1 # # WHITE SQUARE 0x25A2 # # WHITE SQUARE WITH ROUNDED CORNERS 0x25A3 # # WHITE SQUARE CONTAINING BLACK SMALL SQUARE 0x25A4 # # SQUARE WITH HORIZONTAL FILL 0x25A5 # # SQUARE WITH VERTICAL FILL 0x25A6 # # SQUARE WITH ORTHOGONAL CROSSHATCH FILL 0x25A7 # # SQUARE WITH UPPER LEFT TO LOWER RIGHT FILL 0x25A8 # # SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL 0x25A9 # # SQUARE WITH DIAGONAL CROSSHATCH FILL 0x25AA # # BLACK SMALL SQUARE 0x25AB # # WHITE SMALL SQUARE 0x25AC # # BLACK RECTANGLE 0x25AD # # WHITE RECTANGLE 0x25AE # # BLACK VERTICAL RECTANGLE 0x25AF # # WHITE VERTICAL RECTANGLE 0x25B0 # # BLACK PARALLELOGRAM 0x25B1 # # WHITE PARALLELOGRAM 0x25B2 ^ # BLACK UP-POINTING TRIANGLE 0x25B3 ^ # WHITE UP-POINTING TRIANGLE 0x25B4 ^ # BLACK UP-POINTING SMALL TRIANGLE 0x25B5 ^ # WHITE UP-POINTING SMALL TRIANGLE 0x25B6 > # BLACK RIGHT-POINTING TRIANGLE 0x25B7 > # WHITE RIGHT-POINTING TRIANGLE 0x25B8 > # BLACK RIGHT-POINTING SMALL TRIANGLE 0x25B9 > # WHITE RIGHT-POINTING SMALL TRIANGLE 0x25BA > # BLACK RIGHT-POINTING POINTER 0x25BB > # WHITE RIGHT-POINTING POINTER 0x25BC V # BLACK DOWN-POINTING TRIANGLE 0x25BD V # WHITE DOWN-POINTING TRIANGLE 0x25BE V # BLACK DOWN-POINTING SMALL TRIANGLE 0x25BF V # WHITE DOWN-POINTING SMALL TRIANGLE 0x25C0 < # BLACK LEFT-POINTING TRIANGLE 0x25C1 < # WHITE LEFT-POINTING TRIANGLE 0x25C2 < # BLACK LEFT-POINTING SMALL TRIANGLE 0x25C3 < # WHITE LEFT-POINTING SMALL TRIANGLE 0x25C4 < # BLACK LEFT-POINTING POINTER 0x25C5 < # WHITE LEFT-POINTING POINTER 0x25C6 * # BLACK DIAMOND 0x25C7 * # WHITE DIAMOND 0x25C8 * # WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND 0x25C9 * # FISHEYE 0x25CA * # LOZENGE 0x25CB * # WHITE CIRCLE 0x25CC * # DOTTED CIRCLE 0x25CD * # CIRCLE WITH VERTICAL FILL 0x25CE * # BULLSEYE 0x25CF * # BLACK CIRCLE 0x25D0 * # CIRCLE WITH LEFT HALF BLACK 0x25D1 * # CIRCLE WITH RIGHT HALF BLACK 0x25D2 * # CIRCLE WITH LOWER HALF BLACK 0x25D3 * # CIRCLE WITH UPPER HALF BLACK 0x25D4 * # CIRCLE WITH UPPER RIGHT QUADRANT BLACK 0x25D5 * # CIRCLE WITH ALL BUT UPPER LEFT QUADRANT BLACK 0x25D6 * # LEFT HALF BLACK CIRCLE 0x25D7 * # RIGHT HALF BLACK CIRCLE 0x25D8 * # INVERSE BULLET 0x25D9 * # INVERSE WHITE CIRCLE 0x25DA * # UPPER HALF INVERSE WHITE CIRCLE 0x25DB * # LOWER HALF INVERSE WHITE CIRCLE 0x25DC * # UPPER LEFT QUADRANT CIRCULAR ARC 0x25DD * # UPPER RIGHT QUADRANT CIRCULAR ARC 0x25DE * # LOWER RIGHT QUADRANT CIRCULAR ARC 0x25DF * # LOWER LEFT QUADRANT CIRCULAR ARC 0x25E0 * # UPPER HALF CIRCLE 0x25E1 * # LOWER HALF CIRCLE 0x25E2 * # BLACK LOWER RIGHT TRIANGLE 0x25E3 * # BLACK LOWER LEFT TRIANGLE 0x25E4 * # BLACK UPPER LEFT TRIANGLE 0x25E5 * # BLACK UPPER RIGHT TRIANGLE 0x25E6 * # WHITE BULLET 0x25E7 # # SQUARE WITH LEFT HALF BLACK 0x25E8 # # SQUARE WITH RIGHT HALF BLACK 0x25E9 # # SQUARE WITH UPPER LEFT DIAGONAL HALF BLACK 0x25EA # # SQUARE WITH LOWER RIGHT DIAGONAL HALF BLACK 0x25EB # # WHITE SQUARE WITH VERTICAL BISECTING LINE 0x25EC ^ # WHITE UP-POINTING TRIANGLE WITH DOT 0x25ED ^ # UP-POINTING TRIANGLE WITH LEFT HALF BLACK 0x25EE ^ # UP-POINTING TRIANGLE WITH RIGHT HALF BLACK 0x25EF O # LARGE CIRCLE 0x25F0 # # WHITE SQUARE WITH UPPER LEFT QUADRANT 0x25F1 # # WHITE SQUARE WITH LOWER LEFT QUADRANT 0x25F2 # # WHITE SQUARE WITH LOWER RIGHT QUADRANT 0x25F3 # # WHITE SQUARE WITH UPPER RIGHT QUADRANT 0x25F4 # # WHITE CIRCLE WITH UPPER LEFT QUADRANT 0x25F5 # # WHITE CIRCLE WITH LOWER LEFT QUADRANT 0x25F6 # # WHITE CIRCLE WITH LOWER RIGHT QUADRANT 0x25F7 # # WHITE CIRCLE WITH UPPER RIGHT QUADRANT # # Characters 0x2600 to 0x26FF # # # Characters 0x2800 to 0x28FF # 0x2800 " " # BRAILLE PATTERN BLANK 0x2801 a # BRAILLE PATTERN DOTS-1 0x2802 1 # BRAILLE PATTERN DOTS-2 0x2803 b # BRAILLE PATTERN DOTS-12 0x2804 "'" # BRAILLE PATTERN DOTS-3 0x2805 k # BRAILLE PATTERN DOTS-13 0x2806 2 # BRAILLE PATTERN DOTS-23 0x2807 l # BRAILLE PATTERN DOTS-123 0x2808 @ # BRAILLE PATTERN DOTS-4 0x2809 c # BRAILLE PATTERN DOTS-14 0x280A i # BRAILLE PATTERN DOTS-24 0x280B f # BRAILLE PATTERN DOTS-124 0x280C / # BRAILLE PATTERN DOTS-34 0x280D m # BRAILLE PATTERN DOTS-134 0x280E s # BRAILLE PATTERN DOTS-234 0x280F p # BRAILLE PATTERN DOTS-1234 0x2810 '"' # BRAILLE PATTERN DOTS-5 0x2811 e # BRAILLE PATTERN DOTS-15 0x2812 3 # BRAILLE PATTERN DOTS-25 0x2813 h # BRAILLE PATTERN DOTS-125 0x2814 9 # BRAILLE PATTERN DOTS-35 0x2815 o # BRAILLE PATTERN DOTS-135 0x2816 6 # BRAILLE PATTERN DOTS-235 0x2817 r # BRAILLE PATTERN DOTS-1235 0x2818 ^ # BRAILLE PATTERN DOTS-45 0x2819 d # BRAILLE PATTERN DOTS-145 0x281A j # BRAILLE PATTERN DOTS-245 0x281B g # BRAILLE PATTERN DOTS-1245 0x281C > # BRAILLE PATTERN DOTS-345 0x281D n # BRAILLE PATTERN DOTS-1345 0x281E t # BRAILLE PATTERN DOTS-2345 0x281F q # BRAILLE PATTERN DOTS-12345 0x2820 , # BRAILLE PATTERN DOTS-6 0x2821 * # BRAILLE PATTERN DOTS-16 0x2822 5 # BRAILLE PATTERN DOTS-26 0x2823 < # BRAILLE PATTERN DOTS-126 0x2824 - # BRAILLE PATTERN DOTS-36 0x2825 u # BRAILLE PATTERN DOTS-136 0x2826 8 # BRAILLE PATTERN DOTS-236 0x2827 v # BRAILLE PATTERN DOTS-1236 0x2828 . # BRAILLE PATTERN DOTS-46 0x2829 % # BRAILLE PATTERN DOTS-146 0x282A [ # BRAILLE PATTERN DOTS-246 0x282B $ # BRAILLE PATTERN DOTS-1246 0x282C + # BRAILLE PATTERN DOTS-346 0x282D x # BRAILLE PATTERN DOTS-1346 0x282E ! # BRAILLE PATTERN DOTS-2346 0x282F & # BRAILLE PATTERN DOTS-12346 0x2830 ; # BRAILLE PATTERN DOTS-56 0x2831 : # BRAILLE PATTERN DOTS-156 0x2832 4 # BRAILLE PATTERN DOTS-256 0x2833 \ # BRAILLE PATTERN DOTS-1256 0x2834 0 # BRAILLE PATTERN DOTS-356 0x2835 z # BRAILLE PATTERN DOTS-1356 0x2836 7 # BRAILLE PATTERN DOTS-2356 0x2837 ( # BRAILLE PATTERN DOTS-12356 0x2838 _ # BRAILLE PATTERN DOTS-456 0x283A w # BRAILLE PATTERN DOTS-2456 0x283B ] # BRAILLE PATTERN DOTS-12456 0x283C # # BRAILLE PATTERN DOTS-3456 0x283D y # BRAILLE PATTERN DOTS-13456 0x283E ) # BRAILLE PATTERN DOTS-23456 0x283F = # BRAILLE PATTERN DOTS-123456 0x2840 [d7] # BRAILLE PATTERN DOTS-7 0x2841 [d17] # BRAILLE PATTERN DOTS-17 0x2842 [d27] # BRAILLE PATTERN DOTS-27 0x2843 [d127] # BRAILLE PATTERN DOTS-127 0x2844 [d37] # BRAILLE PATTERN DOTS-37 0x2845 [d137] # BRAILLE PATTERN DOTS-137 0x2846 [d237] # BRAILLE PATTERN DOTS-237 0x2847 [d1237] # BRAILLE PATTERN DOTS-1237 0x2848 [d47] # BRAILLE PATTERN DOTS-47 0x2849 [d147] # BRAILLE PATTERN DOTS-147 0x284A [d247] # BRAILLE PATTERN DOTS-247 0x284B [d1247] # BRAILLE PATTERN DOTS-1247 0x284C [d347] # BRAILLE PATTERN DOTS-347 0x284D [d1347] # BRAILLE PATTERN DOTS-1347 0x284E [d2347] # BRAILLE PATTERN DOTS-2347 0x284F [d12347] # BRAILLE PATTERN DOTS-12347 0x2850 [d57] # BRAILLE PATTERN DOTS-57 0x2851 [d157] # BRAILLE PATTERN DOTS-157 0x2852 [d257] # BRAILLE PATTERN DOTS-257 0x2853 [d1257] # BRAILLE PATTERN DOTS-1257 0x2854 [d357] # BRAILLE PATTERN DOTS-357 0x2855 [d1357] # BRAILLE PATTERN DOTS-1357 0x2856 [d2357] # BRAILLE PATTERN DOTS-2357 0x2857 [d12357] # BRAILLE PATTERN DOTS-12357 0x2858 [d457] # BRAILLE PATTERN DOTS-457 0x2859 [d1457] # BRAILLE PATTERN DOTS-1457 0x285A [d2457] # BRAILLE PATTERN DOTS-2457 0x285B [d12457] # BRAILLE PATTERN DOTS-12457 0x285C [d3457] # BRAILLE PATTERN DOTS-3457 0x285D [d13457] # BRAILLE PATTERN DOTS-13457 0x285E [d23457] # BRAILLE PATTERN DOTS-23457 0x285F [d123457] # BRAILLE PATTERN DOTS-123457 0x2860 [d67] # BRAILLE PATTERN DOTS-67 0x2861 [d167] # BRAILLE PATTERN DOTS-167 0x2862 [d267] # BRAILLE PATTERN DOTS-267 0x2863 [d1267] # BRAILLE PATTERN DOTS-1267 0x2864 [d367] # BRAILLE PATTERN DOTS-367 0x2865 [d1367] # BRAILLE PATTERN DOTS-1367 0x2866 [d2367] # BRAILLE PATTERN DOTS-2367 0x2867 [d12367] # BRAILLE PATTERN DOTS-12367 0x2868 [d467] # BRAILLE PATTERN DOTS-467 0x2869 [d1467] # BRAILLE PATTERN DOTS-1467 0x286A [d2467] # BRAILLE PATTERN DOTS-2467 0x286B [d12467] # BRAILLE PATTERN DOTS-12467 0x286C [d3467] # BRAILLE PATTERN DOTS-3467 0x286D [d13467] # BRAILLE PATTERN DOTS-13467 0x286E [d23467] # BRAILLE PATTERN DOTS-23467 0x286F [d123467] # BRAILLE PATTERN DOTS-123467 0x2870 [d567] # BRAILLE PATTERN DOTS-567 0x2871 [d1567] # BRAILLE PATTERN DOTS-1567 0x2872 [d2567] # BRAILLE PATTERN DOTS-2567 0x2873 [d12567] # BRAILLE PATTERN DOTS-12567 0x2874 [d3567] # BRAILLE PATTERN DOTS-3567 0x2875 [d13567] # BRAILLE PATTERN DOTS-13567 0x2876 [d23567] # BRAILLE PATTERN DOTS-23567 0x2877 [d123567] # BRAILLE PATTERN DOTS-123567 0x2878 [d4567] # BRAILLE PATTERN DOTS-4567 0x2879 [d14567] # BRAILLE PATTERN DOTS-14567 0x287A [d24567] # BRAILLE PATTERN DOTS-24567 0x287B [d124567] # BRAILLE PATTERN DOTS-124567 0x287C [d34567] # BRAILLE PATTERN DOTS-34567 0x287D [d134567] # BRAILLE PATTERN DOTS-134567 0x287E [d234567] # BRAILLE PATTERN DOTS-234567 0x287F [d1234567] # BRAILLE PATTERN DOTS-1234567 0x2880 [d8] # BRAILLE PATTERN DOTS-8 0x2881 [d18] # BRAILLE PATTERN DOTS-18 0x2882 [d28] # BRAILLE PATTERN DOTS-28 0x2883 [d128] # BRAILLE PATTERN DOTS-128 0x2884 [d38] # BRAILLE PATTERN DOTS-38 0x2885 [d138] # BRAILLE PATTERN DOTS-138 0x2886 [d238] # BRAILLE PATTERN DOTS-238 0x2887 [d1238] # BRAILLE PATTERN DOTS-1238 0x2888 [d48] # BRAILLE PATTERN DOTS-48 0x2889 [d148] # BRAILLE PATTERN DOTS-148 0x288A [d248] # BRAILLE PATTERN DOTS-248 0x288B [d1248] # BRAILLE PATTERN DOTS-1248 0x288C [d348] # BRAILLE PATTERN DOTS-348 0x288D [d1348] # BRAILLE PATTERN DOTS-1348 0x288E [d2348] # BRAILLE PATTERN DOTS-2348 0x288F [d12348] # BRAILLE PATTERN DOTS-12348 0x2890 [d58] # BRAILLE PATTERN DOTS-58 0x2891 [d158] # BRAILLE PATTERN DOTS-158 0x2892 [d258] # BRAILLE PATTERN DOTS-258 0x2893 [d1258] # BRAILLE PATTERN DOTS-1258 0x2894 [d358] # BRAILLE PATTERN DOTS-358 0x2895 [d1358] # BRAILLE PATTERN DOTS-1358 0x2896 [d2358] # BRAILLE PATTERN DOTS-2358 0x2897 [d12358] # BRAILLE PATTERN DOTS-12358 0x2898 [d458] # BRAILLE PATTERN DOTS-458 0x2899 [d1458] # BRAILLE PATTERN DOTS-1458 0x289A [d2458] # BRAILLE PATTERN DOTS-2458 0x289B [d12458] # BRAILLE PATTERN DOTS-12458 0x289C [d3458] # BRAILLE PATTERN DOTS-3458 0x289D [d13458] # BRAILLE PATTERN DOTS-13458 0x289E [d23458] # BRAILLE PATTERN DOTS-23458 0x289F [d123458] # BRAILLE PATTERN DOTS-123458 0x28A0 [d68] # BRAILLE PATTERN DOTS-68 0x28A1 [d168] # BRAILLE PATTERN DOTS-168 0x28A2 [d268] # BRAILLE PATTERN DOTS-268 0x28A3 [d1268] # BRAILLE PATTERN DOTS-1268 0x28A4 [d368] # BRAILLE PATTERN DOTS-368 0x28A5 [d1368] # BRAILLE PATTERN DOTS-1368 0x28A6 [d2368] # BRAILLE PATTERN DOTS-2368 0x28A7 [d12368] # BRAILLE PATTERN DOTS-12368 0x28A8 [d468] # BRAILLE PATTERN DOTS-468 0x28A9 [d1468] # BRAILLE PATTERN DOTS-1468 0x28AA [d2468] # BRAILLE PATTERN DOTS-2468 0x28AB [d12468] # BRAILLE PATTERN DOTS-12468 0x28AC [d3468] # BRAILLE PATTERN DOTS-3468 0x28AD [d13468] # BRAILLE PATTERN DOTS-13468 0x28AE [d23468] # BRAILLE PATTERN DOTS-23468 0x28AF [d123468] # BRAILLE PATTERN DOTS-123468 0x28B0 [d568] # BRAILLE PATTERN DOTS-568 0x28B1 [d1568] # BRAILLE PATTERN DOTS-1568 0x28B2 [d2568] # BRAILLE PATTERN DOTS-2568 0x28B3 [d12568] # BRAILLE PATTERN DOTS-12568 0x28B4 [d3568] # BRAILLE PATTERN DOTS-3568 0x28B5 [d13568] # BRAILLE PATTERN DOTS-13568 0x28B6 [d23568] # BRAILLE PATTERN DOTS-23568 0x28B7 [d123568] # BRAILLE PATTERN DOTS-123568 0x28B8 [d4568] # BRAILLE PATTERN DOTS-4568 0x28B9 [d14568] # BRAILLE PATTERN DOTS-14568 0x28BA [d24568] # BRAILLE PATTERN DOTS-24568 0x28BB [d124568] # BRAILLE PATTERN DOTS-124568 0x28BC [d34568] # BRAILLE PATTERN DOTS-34568 0x28BD [d134568] # BRAILLE PATTERN DOTS-134568 0x28BE [d234568] # BRAILLE PATTERN DOTS-234568 0x28BF [d1234568] # BRAILLE PATTERN DOTS-1234568 0x28C0 [d78] # BRAILLE PATTERN DOTS-78 0x28C1 [d178] # BRAILLE PATTERN DOTS-178 0x28C2 [d278] # BRAILLE PATTERN DOTS-278 0x28C3 [d1278] # BRAILLE PATTERN DOTS-1278 0x28C4 [d378] # BRAILLE PATTERN DOTS-378 0x28C5 [d1378] # BRAILLE PATTERN DOTS-1378 0x28C6 [d2378] # BRAILLE PATTERN DOTS-2378 0x28C7 [d12378] # BRAILLE PATTERN DOTS-12378 0x28C8 [d478] # BRAILLE PATTERN DOTS-478 0x28C9 [d1478] # BRAILLE PATTERN DOTS-1478 0x28CA [d2478] # BRAILLE PATTERN DOTS-2478 0x28CB [d12478] # BRAILLE PATTERN DOTS-12478 0x28CC [d3478] # BRAILLE PATTERN DOTS-3478 0x28CD [d13478] # BRAILLE PATTERN DOTS-13478 0x28CE [d23478] # BRAILLE PATTERN DOTS-23478 0x28CF [d123478] # BRAILLE PATTERN DOTS-123478 0x28D0 [d578] # BRAILLE PATTERN DOTS-578 0x28D1 [d1578] # BRAILLE PATTERN DOTS-1578 0x28D2 [d2578] # BRAILLE PATTERN DOTS-2578 0x28D3 [d12578] # BRAILLE PATTERN DOTS-12578 0x28D4 [d3578] # BRAILLE PATTERN DOTS-3578 0x28D5 [d13578] # BRAILLE PATTERN DOTS-13578 0x28D6 [d23578] # BRAILLE PATTERN DOTS-23578 0x28D7 [d123578] # BRAILLE PATTERN DOTS-123578 0x28D8 [d4578] # BRAILLE PATTERN DOTS-4578 0x28D9 [d14578] # BRAILLE PATTERN DOTS-14578 0x28DA [d24578] # BRAILLE PATTERN DOTS-24578 0x28DB [d124578] # BRAILLE PATTERN DOTS-124578 0x28DC [d34578] # BRAILLE PATTERN DOTS-34578 0x28DD [d134578] # BRAILLE PATTERN DOTS-134578 0x28DE [d234578] # BRAILLE PATTERN DOTS-234578 0x28DF [d1234578] # BRAILLE PATTERN DOTS-1234578 0x28E0 [d678] # BRAILLE PATTERN DOTS-678 0x28E1 [d1678] # BRAILLE PATTERN DOTS-1678 0x28E2 [d2678] # BRAILLE PATTERN DOTS-2678 0x28E3 [d12678] # BRAILLE PATTERN DOTS-12678 0x28E4 [d3678] # BRAILLE PATTERN DOTS-3678 0x28E5 [d13678] # BRAILLE PATTERN DOTS-13678 0x28E6 [d23678] # BRAILLE PATTERN DOTS-23678 0x28E7 [d123678] # BRAILLE PATTERN DOTS-123678 0x28E8 [d4678] # BRAILLE PATTERN DOTS-4678 0x28E9 [d14678] # BRAILLE PATTERN DOTS-14678 0x28EA [d24678] # BRAILLE PATTERN DOTS-24678 0x28EB [d124678] # BRAILLE PATTERN DOTS-124678 0x28EC [d34678] # BRAILLE PATTERN DOTS-34678 0x28ED [d134678] # BRAILLE PATTERN DOTS-134678 0x28EE [d234678] # BRAILLE PATTERN DOTS-234678 0x28EF [d1234678] # BRAILLE PATTERN DOTS-1234678 0x28F0 [d5678] # BRAILLE PATTERN DOTS-5678 0x28F1 [d15678] # BRAILLE PATTERN DOTS-15678 0x28F2 [d25678] # BRAILLE PATTERN DOTS-25678 0x28F3 [d125678] # BRAILLE PATTERN DOTS-125678 0x28F4 [d35678] # BRAILLE PATTERN DOTS-35678 0x28F5 [d135678] # BRAILLE PATTERN DOTS-135678 0x28F6 [d235678] # BRAILLE PATTERN DOTS-235678 0x28F7 [d1235678] # BRAILLE PATTERN DOTS-1235678 0x28F8 [d45678] # BRAILLE PATTERN DOTS-45678 0x28F9 [d145678] # BRAILLE PATTERN DOTS-145678 0x28FA [d245678] # BRAILLE PATTERN DOTS-245678 0x28FB [d1245678] # BRAILLE PATTERN DOTS-1245678 0x28FC [d345678] # BRAILLE PATTERN DOTS-345678 0x28FD [d1345678] # BRAILLE PATTERN DOTS-1345678 0x28FE [d2345678] # BRAILLE PATTERN DOTS-2345678 0x28FF [d12345678] # BRAILLE PATTERN DOTS-12345678 # # Characters 0x2900 to 0x29FF # # # Characters 0x2A00 to 0x2AFF # 0x2A7B ? # LESS-THAN WITH QUESTION MARK ABOVE 0x2A7C ? # GREATER-THAN WITH QUESTION MARK ABOVE # # Characters 0x2B00 to 0x2BFF # # # Characters 0x2C00 to 0x2CFF # 0x2CFA ? # COPTIC OLD NUBIAN DIRECT QUESTION MARK 0x2CFB ? # COPTIC OLD NUBIAN INDIRECT QUESTION MARK # # Characters 0x2D00 to 0x2DFF # # # Characters 0x2E00 to 0x2EFF # 0x2E18 ? # INVERTED INTERROBANG 0x2E2E ? # REVERSED QUESTION MARK # # Characters 0x2F00 to 0x2FFF # # # Characters 0x3000 to 0x30FF # 0x3000 " " # IDEOGRAPHIC SPACE 0x3001 , # IDEOGRAPHIC COMMA 0x3002 . # IDEOGRAPHIC FULL STOP 0x3003 '"' # DITTO MARK 0x3004 [JIS] # JAPANESE INDUSTRIAL STANDARD SYMBOL 0x3005 '"' # IDEOGRAPHIC ITERATION MARK 0x3006 / # IDEOGRAPHIC CLOSING MARK 0x3007 0 # IDEOGRAPHIC NUMBER ZERO 0x3008 < # LEFT ANGLE BRACKET 0x3009 > # RIGHT ANGLE BRACKET 0x300A << # LEFT DOUBLE ANGLE BRACKET 0x300B >> # RIGHT DOUBLE ANGLE BRACKET 0x300C [ # LEFT CORNER BRACKET 0x300D ] # RIGHT CORNER BRACKET 0x300E { # LEFT WHITE CORNER BRACKET 0x300F } # RIGHT WHITE CORNER BRACKET 0x3010 [( # LEFT BLACK LENTICULAR BRACKET 0x3011 )] # RIGHT BLACK LENTICULAR BRACKET 0x3012 @ # POSTAL MARK 0x3013 X # GETA MARK 0x3014 [ # LEFT TORTOISE SHELL BRACKET 0x3015 ] # RIGHT TORTOISE SHELL BRACKET 0x3016 [[ # LEFT WHITE LENTICULAR BRACKET 0x3017 ]] # RIGHT WHITE LENTICULAR BRACKET 0x3018 (( # LEFT WHITE TORTOISE SHELL BRACKET 0x3019 )) # RIGHT WHITE TORTOISE SHELL BRACKET 0x301A [[ # LEFT WHITE SQUARE BRACKET 0x301B ]] # RIGHT WHITE SQUARE BRACKET 0x301C ~ # WAVE DASH 0x301D `` # REVERSED DOUBLE PRIME QUOTATION MARK 0x301E "''" # DOUBLE PRIME QUOTATION MARK 0x301F ,, # LOW DOUBLE PRIME QUOTATION MARK 0x3020 @ # POSTAL MARK FACE 0x3021 1 # HANGZHOU NUMERAL ONE 0x3022 2 # HANGZHOU NUMERAL TWO 0x3023 3 # HANGZHOU NUMERAL THREE 0x3024 4 # HANGZHOU NUMERAL FOUR 0x3025 5 # HANGZHOU NUMERAL FIVE 0x3026 6 # HANGZHOU NUMERAL SIX 0x3027 7 # HANGZHOU NUMERAL SEVEN 0x3028 8 # HANGZHOU NUMERAL EIGHT 0x3029 9 # HANGZHOU NUMERAL NINE 0x3030 ~ # WAVY DASH 0x3031 + # VERTICAL KANA REPEAT MARK 0x3032 + # VERTICAL KANA REPEAT WITH VOICED SOUND MARK 0x3033 + # VERTICAL KANA REPEAT MARK UPPER HALF 0x3034 + # VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF 0x3036 @ # CIRCLED POSTAL MARK 0x3037 // # IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL 0x3038 +10+ # HANGZHOU NUMERAL TEN 0x3039 +20+ # HANGZHOU NUMERAL TWENTY 0x303A +30+ # HANGZHOU NUMERAL THIRTY 0x3041 a # HIRAGANA LETTER SMALL A 0x3042 a # HIRAGANA LETTER A 0x3043 i # HIRAGANA LETTER SMALL I 0x3044 i # HIRAGANA LETTER I 0x3045 u # HIRAGANA LETTER SMALL U 0x3046 u # HIRAGANA LETTER U 0x3047 e # HIRAGANA LETTER SMALL E 0x3048 e # HIRAGANA LETTER E 0x3049 o # HIRAGANA LETTER SMALL O 0x304A o # HIRAGANA LETTER O 0x304B ka # HIRAGANA LETTER KA 0x304C ga # HIRAGANA LETTER GA 0x304D ki # HIRAGANA LETTER KI 0x304E gi # HIRAGANA LETTER GI 0x304F ku # HIRAGANA LETTER KU 0x3050 gu # HIRAGANA LETTER GU 0x3051 ke # HIRAGANA LETTER KE 0x3052 ge # HIRAGANA LETTER GE 0x3053 ko # HIRAGANA LETTER KO 0x3054 go # HIRAGANA LETTER GO 0x3055 sa # HIRAGANA LETTER SA 0x3056 za # HIRAGANA LETTER ZA 0x3057 si # HIRAGANA LETTER SI 0x3058 zi # HIRAGANA LETTER ZI 0x3059 su # HIRAGANA LETTER SU 0x305A zu # HIRAGANA LETTER ZU 0x305B se # HIRAGANA LETTER SE 0x305C ze # HIRAGANA LETTER ZE 0x305D so # HIRAGANA LETTER SO 0x305E zo # HIRAGANA LETTER ZO 0x305F ta # HIRAGANA LETTER TA 0x3060 da # HIRAGANA LETTER DA 0x3061 ti # HIRAGANA LETTER TI 0x3062 di # HIRAGANA LETTER DI 0x3063 tu # HIRAGANA LETTER SMALL TU 0x3064 tu # HIRAGANA LETTER TU 0x3065 du # HIRAGANA LETTER DU 0x3066 te # HIRAGANA LETTER TE 0x3067 de # HIRAGANA LETTER DE 0x3068 to # HIRAGANA LETTER TO 0x3069 do # HIRAGANA LETTER DO 0x306A na # HIRAGANA LETTER NA 0x306B ni # HIRAGANA LETTER NI 0x306C nu # HIRAGANA LETTER NU 0x306D ne # HIRAGANA LETTER NE 0x306E no # HIRAGANA LETTER NO 0x306F ha # HIRAGANA LETTER HA 0x3070 ba # HIRAGANA LETTER BA 0x3071 pa # HIRAGANA LETTER PA 0x3072 hi # HIRAGANA LETTER HI 0x3073 bi # HIRAGANA LETTER BI 0x3074 pi # HIRAGANA LETTER PI 0x3075 hu # HIRAGANA LETTER HU 0x3076 bu # HIRAGANA LETTER BU 0x3077 pu # HIRAGANA LETTER PU 0x3078 he # HIRAGANA LETTER HE 0x3079 be # HIRAGANA LETTER BE 0x307A pe # HIRAGANA LETTER PE 0x307B ho # HIRAGANA LETTER HO 0x307C bo # HIRAGANA LETTER BO 0x307D po # HIRAGANA LETTER PO 0x307E ma # HIRAGANA LETTER MA 0x307F mi # HIRAGANA LETTER MI 0x3080 mu # HIRAGANA LETTER MU 0x3081 me # HIRAGANA LETTER ME 0x3082 mo # HIRAGANA LETTER MO 0x3083 ya # HIRAGANA LETTER SMALL YA 0x3084 ya # HIRAGANA LETTER YA 0x3085 yu # HIRAGANA LETTER SMALL YU 0x3086 yu # HIRAGANA LETTER YU 0x3087 yo # HIRAGANA LETTER SMALL YO 0x3088 yo # HIRAGANA LETTER YO 0x3089 ra # HIRAGANA LETTER RA 0x308A ri # HIRAGANA LETTER RI 0x308B ru # HIRAGANA LETTER RU 0x308C re # HIRAGANA LETTER RE 0x308D ro # HIRAGANA LETTER RO 0x308E wa # HIRAGANA LETTER SMALL WA 0x308F wa # HIRAGANA LETTER WA 0x3090 wi # HIRAGANA LETTER WI 0x3091 we # HIRAGANA LETTER WE 0x3092 wo # HIRAGANA LETTER WO 0x3093 n # HIRAGANA LETTER N 0x3094 vu # HIRAGANA LETTER VU 0x309D '"' # HIRAGANA ITERATION MARK 0x309E '"' # HIRAGANA VOICED ITERATION MARK 0x30A1 a # KATAKANA LETTER SMALL A 0x30A2 a # KATAKANA LETTER A 0x30A3 i # KATAKANA LETTER SMALL I 0x30A4 i # KATAKANA LETTER I 0x30A5 u # KATAKANA LETTER SMALL U 0x30A6 u # KATAKANA LETTER U 0x30A7 e # KATAKANA LETTER SMALL E 0x30A8 e # KATAKANA LETTER E 0x30A9 o # KATAKANA LETTER SMALL O 0x30AA o # KATAKANA LETTER O 0x30AB ka # KATAKANA LETTER KA 0x30AC ga # KATAKANA LETTER GA 0x30AD ki # KATAKANA LETTER KI 0x30AE gi # KATAKANA LETTER GI 0x30AF ku # KATAKANA LETTER KU 0x30B0 gu # KATAKANA LETTER GU 0x30B1 ke # KATAKANA LETTER KE 0x30B2 ge # KATAKANA LETTER GE 0x30B3 ko # KATAKANA LETTER KO 0x30B4 go # KATAKANA LETTER GO 0x30B5 sa # KATAKANA LETTER SA 0x30B6 za # KATAKANA LETTER ZA 0x30B7 si # KATAKANA LETTER SI 0x30B8 zi # KATAKANA LETTER ZI 0x30B9 su # KATAKANA LETTER SU 0x30BA zu # KATAKANA LETTER ZU 0x30BB se # KATAKANA LETTER SE 0x30BC ze # KATAKANA LETTER ZE 0x30BD so # KATAKANA LETTER SO 0x30BE zo # KATAKANA LETTER ZO 0x30BF ta # KATAKANA LETTER TA 0x30C0 da # KATAKANA LETTER DA 0x30C1 ti # KATAKANA LETTER TI 0x30C2 di # KATAKANA LETTER DI 0x30C3 tu # KATAKANA LETTER SMALL TU 0x30C4 tu # KATAKANA LETTER TU 0x30C5 du # KATAKANA LETTER DU 0x30C6 te # KATAKANA LETTER TE 0x30C7 de # KATAKANA LETTER DE 0x30C8 to # KATAKANA LETTER TO 0x30C9 do # KATAKANA LETTER DO 0x30CA na # KATAKANA LETTER NA 0x30CB ni # KATAKANA LETTER NI 0x30CC nu # KATAKANA LETTER NU 0x30CD ne # KATAKANA LETTER NE 0x30CE no # KATAKANA LETTER NO 0x30CF ha # KATAKANA LETTER HA 0x30D0 ba # KATAKANA LETTER BA 0x30D1 pa # KATAKANA LETTER PA 0x30D2 hi # KATAKANA LETTER HI 0x30D3 bi # KATAKANA LETTER BI 0x30D4 pi # KATAKANA LETTER PI 0x30D5 hu # KATAKANA LETTER HU 0x30D6 bu # KATAKANA LETTER BU 0x30D7 pu # KATAKANA LETTER PU 0x30D8 he # KATAKANA LETTER HE 0x30D9 be # KATAKANA LETTER BE 0x30DA pe # KATAKANA LETTER PE 0x30DB ho # KATAKANA LETTER HO 0x30DC bo # KATAKANA LETTER BO 0x30DD po # KATAKANA LETTER PO 0x30DE ma # KATAKANA LETTER MA 0x30DF mi # KATAKANA LETTER MI 0x30E0 mu # KATAKANA LETTER MU 0x30E1 me # KATAKANA LETTER ME 0x30E2 mo # KATAKANA LETTER MO 0x30E3 ya # KATAKANA LETTER SMALL YA 0x30E4 ya # KATAKANA LETTER YA 0x30E5 yu # KATAKANA LETTER SMALL YU 0x30E6 yu # KATAKANA LETTER YU 0x30E7 yo # KATAKANA LETTER SMALL YO 0x30E8 yo # KATAKANA LETTER YO 0x30E9 ra # KATAKANA LETTER RA 0x30EA ri # KATAKANA LETTER RI 0x30EB ru # KATAKANA LETTER RU 0x30EC re # KATAKANA LETTER RE 0x30ED ro # KATAKANA LETTER RO 0x30EE wa # KATAKANA LETTER SMALL WA 0x30EF wa # KATAKANA LETTER WA 0x30F0 wi # KATAKANA LETTER WI 0x30F1 we # KATAKANA LETTER WE 0x30F2 wo # KATAKANA LETTER WO 0x30F3 n # KATAKANA LETTER N 0x30F4 vu # KATAKANA LETTER VU 0x30F5 ka # KATAKANA LETTER SMALL KA 0x30F6 ke # KATAKANA LETTER SMALL KE 0x30F7 va # KATAKANA LETTER VA 0x30F8 vi # KATAKANA LETTER VI 0x30F9 ve # KATAKANA LETTER VE 0x30FA vo # KATAKANA LETTER VO 0x30FD '"' # KATAKANA ITERATION MARK 0x30FE '"' # KATAKANA VOICED ITERATION MARK 0x3105 B # BOPOMOFO LETTER B 0x3106 P # BOPOMOFO LETTER P 0x3107 M # BOPOMOFO LETTER M 0x3108 F # BOPOMOFO LETTER F 0x3109 D # BOPOMOFO LETTER D 0x310A T # BOPOMOFO LETTER T 0x310B N # BOPOMOFO LETTER N 0x310C L # BOPOMOFO LETTER L 0x310D G # BOPOMOFO LETTER G 0x310E K # BOPOMOFO LETTER K 0x310F H # BOPOMOFO LETTER H 0x3110 J # BOPOMOFO LETTER J 0x3111 Q # BOPOMOFO LETTER Q 0x3112 X # BOPOMOFO LETTER X 0x3113 ZH # BOPOMOFO LETTER ZH 0x3114 CH # BOPOMOFO LETTER CH 0x3115 SH # BOPOMOFO LETTER SH 0x3116 R # BOPOMOFO LETTER R 0x3117 Z # BOPOMOFO LETTER Z 0x3118 C # BOPOMOFO LETTER C 0x3119 S # BOPOMOFO LETTER S 0x311A A # BOPOMOFO LETTER A 0x311B O # BOPOMOFO LETTER O 0x311C E # BOPOMOFO LETTER E 0x311D EH # BOPOMOFO LETTER EH 0x311E AI # BOPOMOFO LETTER AI 0x311F EI # BOPOMOFO LETTER EI 0x3120 AU # BOPOMOFO LETTER AU 0x3121 OU # BOPOMOFO LETTER OU 0x3122 AN # BOPOMOFO LETTER AN 0x3123 EN # BOPOMOFO LETTER EN 0x3124 ANG # BOPOMOFO LETTER ANG 0x3125 ENG # BOPOMOFO LETTER ENG 0x3126 ER # BOPOMOFO LETTER ER 0x3127 I # BOPOMOFO LETTER I 0x3128 U # BOPOMOFO LETTER U 0x3129 IU # BOPOMOFO LETTER IU 0x312A V # BOPOMOFO LETTER V 0x312B NG # BOPOMOFO LETTER NG 0x312C GN # BOPOMOFO LETTER GN 0x3131 g # HANGUL LETTER KIYEOK 0x3132 gg # HANGUL LETTER SSANGKIYEOK 0x3133 gs # HANGUL LETTER KIYEOK-SIOS 0x3134 n # HANGUL LETTER NIEUN 0x3135 nj # HANGUL LETTER NIEUN-CIEUC 0x3136 nh # HANGUL LETTER NIEUN-HIEUH 0x3137 d # HANGUL LETTER TIKEUT 0x3138 dd # HANGUL LETTER SSANGTIKEUT 0x3139 r # HANGUL LETTER RIEUL 0x313A lg # HANGUL LETTER RIEUL-KIYEOK 0x313B lm # HANGUL LETTER RIEUL-MIEUM 0x313C lb # HANGUL LETTER RIEUL-PIEUP 0x313D ls # HANGUL LETTER RIEUL-SIOS 0x313E lt # HANGUL LETTER RIEUL-THIEUTH 0x313F lp # HANGUL LETTER RIEUL-PHIEUPH 0x3140 rh # HANGUL LETTER RIEUL-HIEUH 0x3141 m # HANGUL LETTER MIEUM 0x3142 b # HANGUL LETTER PIEUP 0x3143 bb # HANGUL LETTER SSANGPIEUP 0x3144 bs # HANGUL LETTER PIEUP-SIOS 0x3145 s # HANGUL LETTER SIOS 0x3146 ss # HANGUL LETTER SSANGSIOS 0x3148 j # HANGUL LETTER CIEUC 0x3149 jj # HANGUL LETTER SSANGCIEUC 0x314A c # HANGUL LETTER CHIEUCH 0x314B k # HANGUL LETTER KHIEUKH 0x314C t # HANGUL LETTER THIEUTH 0x314D p # HANGUL LETTER PHIEUPH 0x314E h # HANGUL LETTER HIEUH 0x314F a # HANGUL LETTER A 0x3150 ae # HANGUL LETTER AE 0x3151 ya # HANGUL LETTER YA 0x3152 yae # HANGUL LETTER YAE 0x3153 eo # HANGUL LETTER EO 0x3154 e # HANGUL LETTER E 0x3155 yeo # HANGUL LETTER YEO 0x3156 ye # HANGUL LETTER YE 0x3157 o # HANGUL LETTER O 0x3158 wa # HANGUL LETTER WA 0x3159 wae # HANGUL LETTER WAE 0x315A oe # HANGUL LETTER OE 0x315B yo # HANGUL LETTER YO 0x315C u # HANGUL LETTER U 0x315D weo # HANGUL LETTER WEO 0x315E we # HANGUL LETTER WE 0x315F wi # HANGUL LETTER WI 0x3160 yu # HANGUL LETTER YU 0x3161 eu # HANGUL LETTER EU 0x3162 yi # HANGUL LETTER YI 0x3163 i # HANGUL LETTER I 0x3165 nn # HANGUL LETTER SSANGNIEUN 0x3166 nd # HANGUL LETTER NIEUN-TIKEUT 0x3167 ns # HANGUL LETTER NIEUN-SIOS 0x3168 nZ # HANGUL LETTER NIEUN-PANSIOS 0x3169 lgs # HANGUL LETTER RIEUL-KIYEOK-SIOS 0x316A ld # HANGUL LETTER RIEUL-TIKEUT 0x316B lbs # HANGUL LETTER RIEUL-PIEUP-SIOS 0x316C lZ # HANGUL LETTER RIEUL-PANSIOS 0x316D lQ # HANGUL LETTER RIEUL-YEORINHIEUH 0x316E mb # HANGUL LETTER MIEUM-PIEUP 0x316F ms # HANGUL LETTER MIEUM-SIOS 0x3170 mZ # HANGUL LETTER MIEUM-PANSIOS 0x3171 mN # HANGUL LETTER KAPYEOUNMIEUM 0x3172 bg # HANGUL LETTER PIEUP-KIYEOK 0x3174 bsg # HANGUL LETTER PIEUP-SIOS-KIYEOK 0x3175 bst # HANGUL LETTER PIEUP-SIOS-TIKEUT 0x3176 bj # HANGUL LETTER PIEUP-CIEUC 0x3177 bt # HANGUL LETTER PIEUP-THIEUTH 0x3178 bN # HANGUL LETTER KAPYEOUNPIEUP 0x3179 bbN # HANGUL LETTER KAPYEOUNSSANGPIEUP 0x317A sg # HANGUL LETTER SIOS-KIYEOK 0x317B sn # HANGUL LETTER SIOS-NIEUN 0x317C sd # HANGUL LETTER SIOS-TIKEUT 0x317D sb # HANGUL LETTER SIOS-PIEUP 0x317E sj # HANGUL LETTER SIOS-CIEUC 0x317F Z # HANGUL LETTER PANSIOS 0x3181 N # HANGUL LETTER YESIEUNG 0x3182 Ns # HANGUL LETTER YESIEUNG-SIOS 0x3183 NZ # HANGUL LETTER YESIEUNG-PANSIOS 0x3184 pN # HANGUL LETTER KAPYEOUNPHIEUPH 0x3185 hh # HANGUL LETTER SSANGHIEUH 0x3186 Q # HANGUL LETTER YEORINHIEUH 0x3187 yo-ya # HANGUL LETTER YO-YA 0x3188 yo-yae # HANGUL LETTER YO-YAE 0x3189 yo-i # HANGUL LETTER YO-I 0x318A yu-yeo # HANGUL LETTER YU-YEO 0x318B yu-ye # HANGUL LETTER YU-YE 0x318C yu-i # HANGUL LETTER YU-I 0x318D U # HANGUL LETTER ARAEA 0x318E U-i # HANGUL LETTER ARAEAE 0x31A0 BU # BOPOMOFO LETTER BU 0x31A1 ZI # BOPOMOFO LETTER ZI 0x31A2 JI # BOPOMOFO LETTER JI 0x31A3 GU # BOPOMOFO LETTER GU 0x31A4 EE # BOPOMOFO LETTER EE 0x31A5 ENN # BOPOMOFO LETTER ENN 0x31A6 OO # BOPOMOFO LETTER OO 0x31A7 ONN # BOPOMOFO LETTER ONN 0x31A8 IR # BOPOMOFO LETTER IR 0x31A9 ANN # BOPOMOFO LETTER ANN 0x31AA INN # BOPOMOFO LETTER INN 0x31AB UNN # BOPOMOFO LETTER UNN 0x31AC IM # BOPOMOFO LETTER IM 0x31AD NGG # BOPOMOFO LETTER NGG 0x31AE AINN # BOPOMOFO LETTER AINN 0x31AF AUNN # BOPOMOFO LETTER AUNN 0x31B0 AM # BOPOMOFO LETTER AM 0x31B1 OM # BOPOMOFO LETTER OM 0x31B2 ONG # BOPOMOFO LETTER ONG 0x31B3 INNN # BOPOMOFO LETTER INNN 0x31B4 P # BOPOMOFO FINAL LETTER P 0x31B5 T # BOPOMOFO FINAL LETTER T 0x31B6 K # BOPOMOFO FINAL LETTER K 0x31B7 H # BOPOMOFO FINAL LETTER H # # Characters 0x3200 to 0x32FF # 0x3200 (g) # PARENTHESIZED HANGUL KIYEOK 0x3201 (n) # PARENTHESIZED HANGUL NIEUN 0x3202 (d) # PARENTHESIZED HANGUL TIKEUT 0x3203 (r) # PARENTHESIZED HANGUL RIEUL 0x3204 (m) # PARENTHESIZED HANGUL MIEUM 0x3205 (b) # PARENTHESIZED HANGUL PIEUP 0x3206 (s) # PARENTHESIZED HANGUL SIOS 0x3207 () # PARENTHESIZED HANGUL IEUNG 0x3208 (j) # PARENTHESIZED HANGUL CIEUC 0x3209 (c) # PARENTHESIZED HANGUL CHIEUCH 0x320A (k) # PARENTHESIZED HANGUL KHIEUKH 0x320B (t) # PARENTHESIZED HANGUL THIEUTH 0x320C (p) # PARENTHESIZED HANGUL PHIEUPH 0x320D (h) # PARENTHESIZED HANGUL HIEUH 0x320E (ga) # PARENTHESIZED HANGUL KIYEOK A 0x320F (na) # PARENTHESIZED HANGUL NIEUN A 0x3210 (da) # PARENTHESIZED HANGUL TIKEUT A 0x3211 (ra) # PARENTHESIZED HANGUL RIEUL A 0x3212 (ma) # PARENTHESIZED HANGUL MIEUM A 0x3213 (ba) # PARENTHESIZED HANGUL PIEUP A 0x3214 (sa) # PARENTHESIZED HANGUL SIOS A 0x3215 (a) # PARENTHESIZED HANGUL IEUNG A 0x3216 (ja) # PARENTHESIZED HANGUL CIEUC A 0x3217 (ca) # PARENTHESIZED HANGUL CHIEUCH A 0x3218 (ka) # PARENTHESIZED HANGUL KHIEUKH A 0x3219 (ta) # PARENTHESIZED HANGUL THIEUTH A 0x321A (pa) # PARENTHESIZED HANGUL PHIEUPH A 0x321B (ha) # PARENTHESIZED HANGUL HIEUH A 0x321C (ju) # PARENTHESIZED HANGUL CIEUC U 0x3220 (1) # PARENTHESIZED IDEOGRAPH ONE 0x3221 (2) # PARENTHESIZED IDEOGRAPH TWO 0x3222 (3) # PARENTHESIZED IDEOGRAPH THREE 0x3223 (4) # PARENTHESIZED IDEOGRAPH FOUR 0x3224 (5) # PARENTHESIZED IDEOGRAPH FIVE 0x3225 (6) # PARENTHESIZED IDEOGRAPH SIX 0x3226 (7) # PARENTHESIZED IDEOGRAPH SEVEN 0x3227 (8) # PARENTHESIZED IDEOGRAPH EIGHT 0x3228 (9) # PARENTHESIZED IDEOGRAPH NINE 0x3229 (10) # PARENTHESIZED IDEOGRAPH TEN 0x322A (Yue) # PARENTHESIZED IDEOGRAPH MOON 0x322B (Huo) # PARENTHESIZED IDEOGRAPH FIRE 0x322C (Shui) # PARENTHESIZED IDEOGRAPH WATER 0x322D (Mu) # PARENTHESIZED IDEOGRAPH WOOD 0x322E (Jin) # PARENTHESIZED IDEOGRAPH METAL 0x322F (Tu) # PARENTHESIZED IDEOGRAPH EARTH 0x3230 (Ri) # PARENTHESIZED IDEOGRAPH SUN 0x3231 (Zhu) # PARENTHESIZED IDEOGRAPH STOCK 0x3232 (You) # PARENTHESIZED IDEOGRAPH HAVE 0x3233 (She) # PARENTHESIZED IDEOGRAPH SOCIETY 0x3234 (Ming) # PARENTHESIZED IDEOGRAPH NAME 0x3235 (Te) # PARENTHESIZED IDEOGRAPH SPECIAL 0x3236 (Cai) # PARENTHESIZED IDEOGRAPH FINANCIAL 0x3237 (Zhu) # PARENTHESIZED IDEOGRAPH CONGRATULATION 0x3238 (Lao) # PARENTHESIZED IDEOGRAPH LABOR 0x3239 (Dai) # PARENTHESIZED IDEOGRAPH REPRESENT 0x323A (Hu) # PARENTHESIZED IDEOGRAPH CALL 0x323B (Xue) # PARENTHESIZED IDEOGRAPH STUDY 0x323C (Jian) # PARENTHESIZED IDEOGRAPH SUPERVISE 0x323D (Qi) # PARENTHESIZED IDEOGRAPH ENTERPRISE 0x323E (Zi) # PARENTHESIZED IDEOGRAPH RESOURCE 0x323F (Xie) # PARENTHESIZED IDEOGRAPH ALLIANCE 0x3240 (Ji) # PARENTHESIZED IDEOGRAPH FESTIVAL 0x3241 (Xiu) # PARENTHESIZED IDEOGRAPH REST 0x3242 << # PARENTHESIZED IDEOGRAPH SELF 0x3243 >> # PARENTHESIZED IDEOGRAPH REACH 0x3244 [?] # CIRCLED IDEOGRAPH QUESTION 0x3260 (g) # CIRCLED HANGUL KIYEOK 0x3261 (n) # CIRCLED HANGUL NIEUN 0x3262 (d) # CIRCLED HANGUL TIKEUT 0x3263 (r) # CIRCLED HANGUL RIEUL 0x3264 (m) # CIRCLED HANGUL MIEUM 0x3265 (b) # CIRCLED HANGUL PIEUP 0x3266 (s) # CIRCLED HANGUL SIOS 0x3267 () # CIRCLED HANGUL IEUNG 0x3268 (j) # CIRCLED HANGUL CIEUC 0x3269 (c) # CIRCLED HANGUL CHIEUCH 0x326A (k) # CIRCLED HANGUL KHIEUKH 0x326B (t) # CIRCLED HANGUL THIEUTH 0x326C (p) # CIRCLED HANGUL PHIEUPH 0x326D (h) # CIRCLED HANGUL HIEUH 0x326E (ga) # CIRCLED HANGUL KIYEOK A 0x326F (na) # CIRCLED HANGUL NIEUN A 0x3270 (da) # CIRCLED HANGUL TIKEUT A 0x3271 (ra) # CIRCLED HANGUL RIEUL A 0x3272 (ma) # CIRCLED HANGUL MIEUM A 0x3273 (ba) # CIRCLED HANGUL PIEUP A 0x3274 (sa) # CIRCLED HANGUL SIOS A 0x3275 (a) # CIRCLED HANGUL IEUNG A 0x3276 (ja) # CIRCLED HANGUL CIEUC A 0x3277 (ca) # CIRCLED HANGUL CHIEUCH A 0x3278 (ka) # CIRCLED HANGUL KHIEUKH A 0x3279 (ta) # CIRCLED HANGUL THIEUTH A 0x327A (pa) # CIRCLED HANGUL PHIEUPH A 0x327B (ha) # CIRCLED HANGUL HIEUH A 0x327F KIS # KOREAN STANDARD SYMBOL 0x3280 (1) # CIRCLED IDEOGRAPH ONE 0x3281 (2) # CIRCLED IDEOGRAPH TWO 0x3282 (3) # CIRCLED IDEOGRAPH THREE 0x3283 (4) # CIRCLED IDEOGRAPH FOUR 0x3284 (5) # CIRCLED IDEOGRAPH FIVE 0x3285 (6) # CIRCLED IDEOGRAPH SIX 0x3286 (7) # CIRCLED IDEOGRAPH SEVEN 0x3287 (8) # CIRCLED IDEOGRAPH EIGHT 0x3288 (9) # CIRCLED IDEOGRAPH NINE 0x3289 (10) # CIRCLED IDEOGRAPH TEN 0x328A (Yue) # CIRCLED IDEOGRAPH MOON 0x328B (Huo) # CIRCLED IDEOGRAPH FIRE 0x328C (Shui) # CIRCLED IDEOGRAPH WATER 0x328D (Mu) # CIRCLED IDEOGRAPH WOOD 0x328E (Jin) # CIRCLED IDEOGRAPH METAL 0x328F (Tu) # CIRCLED IDEOGRAPH EARTH 0x3290 (Ri) # CIRCLED IDEOGRAPH SUN 0x3291 (Zhu) # CIRCLED IDEOGRAPH STOCK 0x3292 (You) # CIRCLED IDEOGRAPH HAVE 0x3293 (She) # CIRCLED IDEOGRAPH SOCIETY 0x3294 (Ming) # CIRCLED IDEOGRAPH NAME 0x3295 (Te) # CIRCLED IDEOGRAPH SPECIAL 0x3296 (Cai) # CIRCLED IDEOGRAPH FINANCIAL 0x3297 (Zhu) # CIRCLED IDEOGRAPH CONGRATULATION 0x3298 (Lao) # CIRCLED IDEOGRAPH LABOR 0x3299 (Mi) # CIRCLED IDEOGRAPH SECRET 0x329A (Nan) # CIRCLED IDEOGRAPH MALE 0x329B (Nu) # CIRCLED IDEOGRAPH FEMALE 0x329C (Shi) # CIRCLED IDEOGRAPH SUITABLE 0x329D (You) # CIRCLED IDEOGRAPH EXCELLENT 0x329E (Yin) # CIRCLED IDEOGRAPH PRINT 0x329F (Zhu) # CIRCLED IDEOGRAPH ATTENTION 0x32A0 (Xiang) # CIRCLED IDEOGRAPH ITEM 0x32A1 (Xiu) # CIRCLED IDEOGRAPH REST 0x32A2 (Xie) # CIRCLED IDEOGRAPH COPY 0x32A3 (Zheng) # CIRCLED IDEOGRAPH CORRECT 0x32A4 (Shang) # CIRCLED IDEOGRAPH HIGH 0x32A5 (Zhong) # CIRCLED IDEOGRAPH CENTRE 0x32A6 (Xia) # CIRCLED IDEOGRAPH LOW 0x32A7 (Zuo) # CIRCLED IDEOGRAPH LEFT 0x32A8 (You) # CIRCLED IDEOGRAPH RIGHT 0x32A9 (Yi) # CIRCLED IDEOGRAPH MEDICINE 0x32AA (Zong) # CIRCLED IDEOGRAPH RELIGION 0x32AB (Xue) # CIRCLED IDEOGRAPH STUDY 0x32AC (Jian) # CIRCLED IDEOGRAPH SUPERVISE 0x32AD (Qi) # CIRCLED IDEOGRAPH ENTERPRISE 0x32AE (Zi) # CIRCLED IDEOGRAPH RESOURCE 0x32AF (Xie) # CIRCLED IDEOGRAPH ALLIANCE 0x32B0 (Ye) # CIRCLED IDEOGRAPH NIGHT 0x32C0 1M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY 0x32C1 2M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR FEBRUARY 0x32C2 3M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR MARCH 0x32C3 4M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR APRIL 0x32C4 5M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR MAY 0x32C5 6M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR JUNE 0x32C6 7M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR JULY 0x32C7 8M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR AUGUST 0x32C8 9M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR SEPTEMBER 0x32C9 10M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR OCTOBER 0x32CA 11M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR NOVEMBER 0x32CB 12M # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER 0x32D0 a # CIRCLED KATAKANA A 0x32D1 i # CIRCLED KATAKANA I 0x32D2 u # CIRCLED KATAKANA U 0x32D3 u # CIRCLED KATAKANA E 0x32D4 o # CIRCLED KATAKANA O 0x32D5 ka # CIRCLED KATAKANA KA 0x32D6 ki # CIRCLED KATAKANA KI 0x32D7 ku # CIRCLED KATAKANA KU 0x32D8 ke # CIRCLED KATAKANA KE 0x32D9 ko # CIRCLED KATAKANA KO 0x32DA sa # CIRCLED KATAKANA SA 0x32DB si # CIRCLED KATAKANA SI 0x32DC su # CIRCLED KATAKANA SU 0x32DD se # CIRCLED KATAKANA SE 0x32DE so # CIRCLED KATAKANA SO 0x32DF ta # CIRCLED KATAKANA TA 0x32E0 ti # CIRCLED KATAKANA TI 0x32E1 tu # CIRCLED KATAKANA TU 0x32E2 te # CIRCLED KATAKANA TE 0x32E3 to # CIRCLED KATAKANA TO 0x32E4 na # CIRCLED KATAKANA NA 0x32E5 ni # CIRCLED KATAKANA NI 0x32E6 nu # CIRCLED KATAKANA NU 0x32E7 ne # CIRCLED KATAKANA NE 0x32E8 no # CIRCLED KATAKANA NO 0x32E9 ha # CIRCLED KATAKANA HA 0x32EA hi # CIRCLED KATAKANA HI 0x32EB hu # CIRCLED KATAKANA HU 0x32EC he # CIRCLED KATAKANA HE 0x32ED ho # CIRCLED KATAKANA HO 0x32EE ma # CIRCLED KATAKANA MA 0x32EF mi # CIRCLED KATAKANA MI 0x32F0 mu # CIRCLED KATAKANA MU 0x32F1 me # CIRCLED KATAKANA ME 0x32F2 mo # CIRCLED KATAKANA MO 0x32F3 ya # CIRCLED KATAKANA YA 0x32F4 yu # CIRCLED KATAKANA YU 0x32F5 yo # CIRCLED KATAKANA YO 0x32F6 ra # CIRCLED KATAKANA RA 0x32F7 ri # CIRCLED KATAKANA RI 0x32F8 ru # CIRCLED KATAKANA RU 0x32F9 re # CIRCLED KATAKANA RE 0x32FA ro # CIRCLED KATAKANA RO 0x32FB wa # CIRCLED KATAKANA WA 0x32FC wi # CIRCLED KATAKANA WI 0x32FD we # CIRCLED KATAKANA WE 0x32FE wo # CIRCLED KATAKANA WO # # Characters 0x3300 to 0x33FF # 0x3300 apartment # SQUARE APAATO 0x3301 alpha # SQUARE ARUHUA 0x3302 ampere # SQUARE ANPEA 0x3303 are # SQUARE AARU 0x3304 inning # SQUARE ININGU 0x3305 inch # SQUARE INTI 0x3306 won # SQUARE UON 0x3307 escudo # SQUARE ESUKUUDO 0x3308 acre # SQUARE EEKAA 0x3309 ounce # SQUARE ONSU 0x330A ohm # SQUARE OOMU 0x330B kai-ri # SQUARE KAIRI 0x330C carat # SQUARE KARATTO 0x330D calorie # SQUARE KARORII 0x330E gallon # SQUARE GARON 0x330F gamma # SQUARE GANMA 0x3310 giga # SQUARE GIGA 0x3311 guinea # SQUARE GINII 0x3312 curie # SQUARE KYURII 0x3313 guilder # SQUARE GIRUDAA 0x3314 kilo # SQUARE KIRO 0x3315 kilogram # SQUARE KIROGURAMU 0x3316 kilometer # SQUARE KIROMEETORU 0x3317 kilowatt # SQUARE KIROWATTO 0x3318 gram # SQUARE GURAMU 0x3319 gram ton # SQUARE GURAMUTON 0x331A cruzeiro # SQUARE KURUZEIRO 0x331B krone # SQUARE KUROONE 0x331C case # SQUARE KEESU 0x331D koruna # SQUARE KORUNA 0x331E co-op # SQUARE KOOPO 0x331F cycle # SQUARE SAIKURU 0x3320 centime # SQUARE SANTIIMU 0x3321 shilling # SQUARE SIRINGU 0x3322 centi # SQUARE SENTI 0x3323 cent # SQUARE SENTO 0x3324 dozen # SQUARE DAASU 0x3325 desi # SQUARE DESI 0x3326 dollar # SQUARE DORU 0x3327 ton # SQUARE TON 0x3328 nano # SQUARE NANO 0x3329 knot # SQUARE NOTTO 0x332A heights # SQUARE HAITU 0x332B percent # SQUARE PAASENTO 0x332C parts # SQUARE PAATU 0x332D barrel # SQUARE BAARERU 0x332E piaster # SQUARE PIASUTORU 0x332F picul # SQUARE PIKURU 0x3330 pico # SQUARE PIKO 0x3331 building # SQUARE BIRU 0x3332 farad # SQUARE HUARADDO 0x3333 feet # SQUARE HUIITO 0x3334 bushel # SQUARE BUSSYERU 0x3335 franc # SQUARE HURAN 0x3336 hectare # SQUARE HEKUTAARU 0x3337 peso # SQUARE PESO 0x3338 pfennig # SQUARE PENIHI 0x3339 hertz # SQUARE HERUTU 0x333A pence # SQUARE PENSU 0x333B page # SQUARE PEEZI 0x333C beta # SQUARE BEETA 0x333D point # SQUARE POINTO 0x333E volt # SQUARE BORUTO 0x333F hon # SQUARE HON 0x3340 pound # SQUARE PONDO 0x3341 hall # SQUARE HOORU 0x3342 horn # SQUARE HOON 0x3343 micro # SQUARE MAIKURO 0x3344 mile # SQUARE MAIRU 0x3345 mach # SQUARE MAHHA 0x3346 mark # SQUARE MARUKU 0x3347 mansion # SQUARE MANSYON 0x3348 micron # SQUARE MIKURON 0x3349 milli # SQUARE MIRI 0x334A millibar # SQUARE MIRIBAARU 0x334B mega # SQUARE MEGA 0x334C megaton # SQUARE MEGATON 0x334D meter # SQUARE MEETORU 0x334E yard # SQUARE YAADO 0x334F yard # SQUARE YAARU 0x3350 yuan # SQUARE YUAN 0x3351 liter # SQUARE RITTORU 0x3352 lira # SQUARE RIRA 0x3353 rupee # SQUARE RUPII 0x3354 ruble # SQUARE RUUBURU 0x3355 rem # SQUARE REMU 0x3356 roentgen # SQUARE RENTOGEN 0x3357 watt # SQUARE WATTO 0x3358 0h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO 0x3359 1h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ONE 0x335A 2h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWO 0x335B 3h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR THREE 0x335C 4h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FOUR 0x335D 5h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FIVE 0x335E 6h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SIX 0x335F 7h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SEVEN 0x3360 8h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR EIGHT 0x3361 9h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR NINE 0x3362 10h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TEN 0x3363 11h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ELEVEN 0x3364 12h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWELVE 0x3365 13h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR THIRTEEN 0x3366 14h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FOURTEEN 0x3367 15h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FIFTEEN 0x3368 16h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SIXTEEN 0x3369 17h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SEVENTEEN 0x336A 18h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR EIGHTEEN 0x336B 19h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR NINETEEN 0x336C 20h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY 0x336D 21h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-ONE 0x336E 22h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-TWO 0x336F 23h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-THREE 0x3370 24h # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-FOUR 0x3371 HPA # SQUARE HPA 0x3372 da # SQUARE DA 0x3373 AU # SQUARE AU 0x3374 bar # SQUARE BAR 0x3375 oV # SQUARE OV 0x3376 pc # SQUARE PC 0x337B Heisei # SQUARE ERA NAME HEISEI 0x337C Syouwa # SQUARE ERA NAME SYOUWA 0x337D Taisyou # SQUARE ERA NAME TAISYOU 0x337E Meiji # SQUARE ERA NAME MEIZI 0x337F Inc. # SQUARE CORPORATION 0x3380 pA # SQUARE PA AMPS 0x3381 nA # SQUARE NA 0x3382 microamp # SQUARE MU A 0x3383 mA # SQUARE MA 0x3384 kA # SQUARE KA 0x3385 kB # SQUARE KB 0x3386 MB # SQUARE MB 0x3387 GB # SQUARE GB 0x3388 cal # SQUARE CAL 0x3389 kcal # SQUARE KCAL 0x338A pF # SQUARE PF 0x338B nF # SQUARE NF 0x338C microFarad # SQUARE MU F 0x338D microgram # SQUARE MU G 0x338E mg # SQUARE MG 0x338F kg # SQUARE KG 0x3390 Hz # SQUARE HZ 0x3391 kHz # SQUARE KHZ 0x3392 MHz # SQUARE MHZ 0x3393 GHz # SQUARE GHZ 0x3394 THz # SQUARE THZ 0x3395 microliter # SQUARE MU L 0x3396 ml # SQUARE ML 0x3397 dl # SQUARE DL 0x3398 kl # SQUARE KL 0x3399 fm # SQUARE FM 0x339A nm # SQUARE NM 0x339B micrometer # SQUARE MU M 0x339C mm # SQUARE MM 0x339D cm # SQUARE CM 0x339E km # SQUARE KM 0x339F mm^2 # SQUARE MM SQUARED 0x33A0 cm^2 # SQUARE CM SQUARED 0x33A1 m^2 # SQUARE M SQUARED 0x33A2 km^2 # SQUARE KM SQUARED 0x33A3 mm^4 # SQUARE MM CUBED 0x33A4 cm^3 # SQUARE CM CUBED 0x33A5 m^3 # SQUARE M CUBED 0x33A6 km^3 # SQUARE KM CUBED 0x33A7 m/s # SQUARE M OVER S 0x33A8 m/s^2 # SQUARE M OVER S SQUARED 0x33A9 Pa # SQUARE PA 0x33AA kPa # SQUARE KPA 0x33AB MPa # SQUARE MPA 0x33AC GPa # SQUARE GPA 0x33AD rad # SQUARE RAD 0x33AE rad/s # SQUARE RAD OVER S 0x33AF rad/s^2 # SQUARE RAD OVER S SQUARED 0x33B0 ps # SQUARE PS 0x33B1 ns # SQUARE NS 0x33B2 microsecond # SQUARE MU S 0x33B3 ms # SQUARE MS 0x33B4 pV # SQUARE PV 0x33B5 nV # SQUARE NV 0x33B6 microvolt # SQUARE MU V 0x33B7 mV # SQUARE MV 0x33B8 kV # SQUARE KV 0x33B9 MV # SQUARE MV MEGA 0x33BA pW # SQUARE PW 0x33BB nW # SQUARE NW 0x33BC microwatt # SQUARE MU W 0x33BD mW # SQUARE MW 0x33BE kW # SQUARE KW 0x33BF MW # SQUARE MW MEGA 0x33C0 kOhm # SQUARE K OHM 0x33C1 MOhm # SQUARE M OHM 0x33C2 a.m. # SQUARE AM 0x33C3 Bq # SQUARE BQ 0x33C4 cc # SQUARE CC 0x33C5 cd # SQUARE CD 0x33C6 C/kg # SQUARE C OVER KG 0x33C7 Co. # SQUARE CO 0x33C8 dB # SQUARE DB 0x33C9 Gy # SQUARE GY 0x33CA ha # SQUARE HA 0x33CB HP # SQUARE HP 0x33CC in # SQUARE IN 0x33CD K.K. # SQUARE KK 0x33CE KM # SQUARE KM CAPITAL 0x33CF kt # SQUARE KT 0x33D0 lm # SQUARE LM 0x33D1 ln # SQUARE LN 0x33D2 log # SQUARE LOG 0x33D3 lx # SQUARE LX 0x33D4 mb # SQUARE MB SMALL 0x33D5 mil # SQUARE MIL 0x33D6 mol # SQUARE MOL 0x33D7 pH # SQUARE PH 0x33D8 p.m. # SQUARE PM 0x33D9 PPM # SQUARE PPM 0x33DA PR # SQUARE PR 0x33DB sr # SQUARE SR 0x33DC Sv # SQUARE SV 0x33DD Wb # SQUARE WB 0x33E0 1d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE 0x33E1 2d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWO 0x33E2 3d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THREE 0x33E3 4d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FOUR 0x33E4 5d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FIVE 0x33E5 6d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SIX 0x33E6 7d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SEVEN 0x33E7 8d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY EIGHT 0x33E8 9d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY NINE 0x33E9 10d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TEN 0x33EA 11d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ELEVEN 0x33EB 12d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWELVE 0x33EC 13d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTEEN 0x33ED 14d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FOURTEEN 0x33EE 15d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FIFTEEN 0x33EF 16d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SIXTEEN 0x33F0 17d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SEVENTEEN 0x33F1 18d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY EIGHTEEN 0x33F2 19d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY NINETEEN 0x33F3 20d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY 0x33F4 21d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-ONE 0x33F5 22d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-TWO 0x33F6 23d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-THREE 0x33F7 24d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-FOUR 0x33F8 25d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-FIVE 0x33F9 26d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-SIX 0x33FA 27d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-SEVEN 0x33FB 28d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-EIGHT 0x33FC 29d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-NINE 0x33FD 30d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY 0x33FE 31d # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE # # Characters 0x3400 to 0x34FF # # # Characters 0x4E00 to 0x4EFF # 0x4E00 Yi # # # Characters 0xA000 to 0xA0FF # 0xA000 it # YI SYLLABLE IT 0xA001 ix # YI SYLLABLE IX 0xA002 i # YI SYLLABLE I 0xA003 ip # YI SYLLABLE IP 0xA004 iet # YI SYLLABLE IET 0xA005 iex # YI SYLLABLE IEX 0xA006 ie # YI SYLLABLE IE 0xA007 iep # YI SYLLABLE IEP 0xA008 at # YI SYLLABLE AT 0xA009 ax # YI SYLLABLE AX 0xA00A a # YI SYLLABLE A 0xA00B ap # YI SYLLABLE AP 0xA00C uox # YI SYLLABLE UOX 0xA00D uo # YI SYLLABLE UO 0xA00E uop # YI SYLLABLE UOP 0xA00F ot # YI SYLLABLE OT 0xA010 ox # YI SYLLABLE OX 0xA011 o # YI SYLLABLE O 0xA012 op # YI SYLLABLE OP 0xA013 ex # YI SYLLABLE EX 0xA014 e # YI SYLLABLE E 0xA015 wu # YI SYLLABLE WU 0xA016 bit # YI SYLLABLE BIT 0xA017 bix # YI SYLLABLE BIX 0xA018 bi # YI SYLLABLE BI 0xA019 bip # YI SYLLABLE BIP 0xA01A biet # YI SYLLABLE BIET 0xA01B biex # YI SYLLABLE BIEX 0xA01C bie # YI SYLLABLE BIE 0xA01D biep # YI SYLLABLE BIEP 0xA01E bat # YI SYLLABLE BAT 0xA01F bax # YI SYLLABLE BAX 0xA020 ba # YI SYLLABLE BA 0xA021 bap # YI SYLLABLE BAP 0xA022 buox # YI SYLLABLE BUOX 0xA023 buo # YI SYLLABLE BUO 0xA024 buop # YI SYLLABLE BUOP 0xA025 bot # YI SYLLABLE BOT 0xA026 box # YI SYLLABLE BOX 0xA027 bo # YI SYLLABLE BO 0xA028 bop # YI SYLLABLE BOP 0xA029 bex # YI SYLLABLE BEX 0xA02A be # YI SYLLABLE BE 0xA02B bep # YI SYLLABLE BEP 0xA02C but # YI SYLLABLE BUT 0xA02D bux # YI SYLLABLE BUX 0xA02E bu # YI SYLLABLE BU 0xA02F bup # YI SYLLABLE BUP 0xA030 burx # YI SYLLABLE BURX 0xA031 bur # YI SYLLABLE BUR 0xA032 byt # YI SYLLABLE BYT 0xA033 byx # YI SYLLABLE BYX 0xA034 by # YI SYLLABLE BY 0xA035 byp # YI SYLLABLE BYP 0xA036 byrx # YI SYLLABLE BYRX 0xA037 byr # YI SYLLABLE BYR 0xA038 pit # YI SYLLABLE PIT 0xA039 pix # YI SYLLABLE PIX 0xA03A pi # YI SYLLABLE PI 0xA03B pip # YI SYLLABLE PIP 0xA03C piex # YI SYLLABLE PIEX 0xA03D pie # YI SYLLABLE PIE 0xA03E piep # YI SYLLABLE PIEP 0xA03F pat # YI SYLLABLE PAT 0xA040 pax # YI SYLLABLE PAX 0xA041 pa # YI SYLLABLE PA 0xA042 pap # YI SYLLABLE PAP 0xA043 puox # YI SYLLABLE PUOX 0xA044 puo # YI SYLLABLE PUO 0xA045 puop # YI SYLLABLE PUOP 0xA046 pot # YI SYLLABLE POT 0xA047 pox # YI SYLLABLE POX 0xA048 po # YI SYLLABLE PO 0xA049 pop # YI SYLLABLE POP 0xA04A put # YI SYLLABLE PUT 0xA04B pux # YI SYLLABLE PUX 0xA04C pu # YI SYLLABLE PU 0xA04D pup # YI SYLLABLE PUP 0xA04E purx # YI SYLLABLE PURX 0xA04F pur # YI SYLLABLE PUR 0xA050 pyt # YI SYLLABLE PYT 0xA051 pyx # YI SYLLABLE PYX 0xA052 py # YI SYLLABLE PY 0xA053 pyp # YI SYLLABLE PYP 0xA054 pyrx # YI SYLLABLE PYRX 0xA055 pyr # YI SYLLABLE PYR 0xA056 bbit # YI SYLLABLE BBIT 0xA057 bbix # YI SYLLABLE BBIX 0xA058 bbi # YI SYLLABLE BBI 0xA059 bbip # YI SYLLABLE BBIP 0xA05A bbiet # YI SYLLABLE BBIET 0xA05B bbiex # YI SYLLABLE BBIEX 0xA05C bbie # YI SYLLABLE BBIE 0xA05D bbiep # YI SYLLABLE BBIEP 0xA05E bbat # YI SYLLABLE BBAT 0xA05F bbax # YI SYLLABLE BBAX 0xA060 bba # YI SYLLABLE BBA 0xA061 bbap # YI SYLLABLE BBAP 0xA062 bbuox # YI SYLLABLE BBUOX 0xA063 bbuo # YI SYLLABLE BBUO 0xA064 bbuop # YI SYLLABLE BBUOP 0xA065 bbot # YI SYLLABLE BBOT 0xA066 bbox # YI SYLLABLE BBOX 0xA067 bbo # YI SYLLABLE BBO 0xA068 bbop # YI SYLLABLE BBOP 0xA069 bbex # YI SYLLABLE BBEX 0xA06A bbe # YI SYLLABLE BBE 0xA06B bbep # YI SYLLABLE BBEP 0xA06C bbut # YI SYLLABLE BBUT 0xA06D bbux # YI SYLLABLE BBUX 0xA06E bbu # YI SYLLABLE BBU 0xA06F bbup # YI SYLLABLE BBUP 0xA070 bburx # YI SYLLABLE BBURX 0xA071 bbur # YI SYLLABLE BBUR 0xA072 bbyt # YI SYLLABLE BBYT 0xA073 bbyx # YI SYLLABLE BBYX 0xA074 bby # YI SYLLABLE BBY 0xA075 bbyp # YI SYLLABLE BBYP 0xA076 nbit # YI SYLLABLE NBIT 0xA077 nbix # YI SYLLABLE NBIX 0xA078 nbi # YI SYLLABLE NBI 0xA079 nbip # YI SYLLABLE NBIP 0xA07A nbiex # YI SYLLABLE NBIEX 0xA07B nbie # YI SYLLABLE NBIE 0xA07C nbiep # YI SYLLABLE NBIEP 0xA07D nbat # YI SYLLABLE NBAT 0xA07E nbax # YI SYLLABLE NBAX 0xA07F nba # YI SYLLABLE NBA 0xA080 nbap # YI SYLLABLE NBAP 0xA081 nbot # YI SYLLABLE NBOT 0xA082 nbox # YI SYLLABLE NBOX 0xA083 nbo # YI SYLLABLE NBO 0xA084 nbop # YI SYLLABLE NBOP 0xA085 nbut # YI SYLLABLE NBUT 0xA086 nbux # YI SYLLABLE NBUX 0xA087 nbu # YI SYLLABLE NBU 0xA088 nbup # YI SYLLABLE NBUP 0xA089 nburx # YI SYLLABLE NBURX 0xA08A nbur # YI SYLLABLE NBUR 0xA08B nbyt # YI SYLLABLE NBYT 0xA08C nbyx # YI SYLLABLE NBYX 0xA08D nby # YI SYLLABLE NBY 0xA08E nbyp # YI SYLLABLE NBYP 0xA08F nbyrx # YI SYLLABLE NBYRX 0xA090 nbyr # YI SYLLABLE NBYR 0xA091 hmit # YI SYLLABLE HMIT 0xA092 hmix # YI SYLLABLE HMIX 0xA093 hmi # YI SYLLABLE HMI 0xA094 hmip # YI SYLLABLE HMIP 0xA095 hmiex # YI SYLLABLE HMIEX 0xA096 hmie # YI SYLLABLE HMIE 0xA097 hmiep # YI SYLLABLE HMIEP 0xA098 hmat # YI SYLLABLE HMAT 0xA099 hmax # YI SYLLABLE HMAX 0xA09A hma # YI SYLLABLE HMA 0xA09B hmap # YI SYLLABLE HMAP 0xA09C hmuox # YI SYLLABLE HMUOX 0xA09D hmuo # YI SYLLABLE HMUO 0xA09E hmuop # YI SYLLABLE HMUOP 0xA09F hmot # YI SYLLABLE HMOT 0xA0A0 hmox # YI SYLLABLE HMOX 0xA0A1 hmo # YI SYLLABLE HMO 0xA0A2 hmop # YI SYLLABLE HMOP 0xA0A3 hmut # YI SYLLABLE HMUT 0xA0A4 hmux # YI SYLLABLE HMUX 0xA0A5 hmu # YI SYLLABLE HMU 0xA0A6 hmup # YI SYLLABLE HMUP 0xA0A7 hmurx # YI SYLLABLE HMURX 0xA0A8 hmur # YI SYLLABLE HMUR 0xA0A9 hmyx # YI SYLLABLE HMYX 0xA0AA hmy # YI SYLLABLE HMY 0xA0AB hmyp # YI SYLLABLE HMYP 0xA0AC hmyrx # YI SYLLABLE HMYRX 0xA0AD hmyr # YI SYLLABLE HMYR 0xA0AE mit # YI SYLLABLE MIT 0xA0AF mix # YI SYLLABLE MIX 0xA0B0 mi # YI SYLLABLE MI 0xA0B1 mip # YI SYLLABLE MIP 0xA0B2 miex # YI SYLLABLE MIEX 0xA0B3 mie # YI SYLLABLE MIE 0xA0B4 miep # YI SYLLABLE MIEP 0xA0B5 mat # YI SYLLABLE MAT 0xA0B6 max # YI SYLLABLE MAX 0xA0B7 ma # YI SYLLABLE MA 0xA0B8 map # YI SYLLABLE MAP 0xA0B9 muot # YI SYLLABLE MUOT 0xA0BA muox # YI SYLLABLE MUOX 0xA0BB muo # YI SYLLABLE MUO 0xA0BC muop # YI SYLLABLE MUOP 0xA0BD mot # YI SYLLABLE MOT 0xA0BE mox # YI SYLLABLE MOX 0xA0BF mo # YI SYLLABLE MO 0xA0C0 mop # YI SYLLABLE MOP 0xA0C1 mex # YI SYLLABLE MEX 0xA0C2 me # YI SYLLABLE ME 0xA0C3 mut # YI SYLLABLE MUT 0xA0C4 mux # YI SYLLABLE MUX 0xA0C5 mu # YI SYLLABLE MU 0xA0C6 mup # YI SYLLABLE MUP 0xA0C7 murx # YI SYLLABLE MURX 0xA0C8 mur # YI SYLLABLE MUR 0xA0C9 myt # YI SYLLABLE MYT 0xA0CA myx # YI SYLLABLE MYX 0xA0CB my # YI SYLLABLE MY 0xA0CC myp # YI SYLLABLE MYP 0xA0CD fit # YI SYLLABLE FIT 0xA0CE fix # YI SYLLABLE FIX 0xA0CF fi # YI SYLLABLE FI 0xA0D0 fip # YI SYLLABLE FIP 0xA0D1 fat # YI SYLLABLE FAT 0xA0D2 fax # YI SYLLABLE FAX 0xA0D3 fa # YI SYLLABLE FA 0xA0D4 fap # YI SYLLABLE FAP 0xA0D5 fox # YI SYLLABLE FOX 0xA0D6 fo # YI SYLLABLE FO 0xA0D7 fop # YI SYLLABLE FOP 0xA0D8 fut # YI SYLLABLE FUT 0xA0D9 fux # YI SYLLABLE FUX 0xA0DA fu # YI SYLLABLE FU 0xA0DB fup # YI SYLLABLE FUP 0xA0DC furx # YI SYLLABLE FURX 0xA0DD fur # YI SYLLABLE FUR 0xA0DE fyt # YI SYLLABLE FYT 0xA0DF fyx # YI SYLLABLE FYX 0xA0E0 fy # YI SYLLABLE FY 0xA0E1 fyp # YI SYLLABLE FYP 0xA0E2 vit # YI SYLLABLE VIT 0xA0E3 vix # YI SYLLABLE VIX 0xA0E4 vi # YI SYLLABLE VI 0xA0E5 vip # YI SYLLABLE VIP 0xA0E6 viet # YI SYLLABLE VIET 0xA0E7 viex # YI SYLLABLE VIEX 0xA0E8 vie # YI SYLLABLE VIE 0xA0E9 viep # YI SYLLABLE VIEP 0xA0EA vat # YI SYLLABLE VAT 0xA0EB vax # YI SYLLABLE VAX 0xA0EC va # YI SYLLABLE VA 0xA0ED vap # YI SYLLABLE VAP 0xA0EE vot # YI SYLLABLE VOT 0xA0EF vox # YI SYLLABLE VOX 0xA0F0 vo # YI SYLLABLE VO 0xA0F1 vop # YI SYLLABLE VOP 0xA0F2 vex # YI SYLLABLE VEX 0xA0F3 vep # YI SYLLABLE VEP 0xA0F4 vut # YI SYLLABLE VUT 0xA0F5 vux # YI SYLLABLE VUX 0xA0F6 vu # YI SYLLABLE VU 0xA0F7 vup # YI SYLLABLE VUP 0xA0F8 vurx # YI SYLLABLE VURX 0xA0F9 vur # YI SYLLABLE VUR 0xA0FA vyt # YI SYLLABLE VYT 0xA0FB vyx # YI SYLLABLE VYX 0xA0FC vy # YI SYLLABLE VY 0xA0FD vyp # YI SYLLABLE VYP 0xA0FE vyrx # YI SYLLABLE VYRX 0xA0FF vyr # YI SYLLABLE VYR # # Characters 0xA100 to 0xA1FF # 0xA100 dit # YI SYLLABLE DIT 0xA101 dix # YI SYLLABLE DIX 0xA102 di # YI SYLLABLE DI 0xA103 dip # YI SYLLABLE DIP 0xA104 diex # YI SYLLABLE DIEX 0xA105 die # YI SYLLABLE DIE 0xA106 diep # YI SYLLABLE DIEP 0xA107 dat # YI SYLLABLE DAT 0xA108 dax # YI SYLLABLE DAX 0xA109 da # YI SYLLABLE DA 0xA10A dap # YI SYLLABLE DAP 0xA10B duox # YI SYLLABLE DUOX 0xA10C duo # YI SYLLABLE DUO 0xA10D dot # YI SYLLABLE DOT 0xA10E dox # YI SYLLABLE DOX 0xA10F do # YI SYLLABLE DO 0xA110 dop # YI SYLLABLE DOP 0xA111 dex # YI SYLLABLE DEX 0xA112 de # YI SYLLABLE DE 0xA113 dep # YI SYLLABLE DEP 0xA114 dut # YI SYLLABLE DUT 0xA115 dux # YI SYLLABLE DUX 0xA116 du # YI SYLLABLE DU 0xA117 dup # YI SYLLABLE DUP 0xA118 durx # YI SYLLABLE DURX 0xA119 dur # YI SYLLABLE DUR 0xA11A tit # YI SYLLABLE TIT 0xA11B tix # YI SYLLABLE TIX 0xA11C ti # YI SYLLABLE TI 0xA11D tip # YI SYLLABLE TIP 0xA11E tiex # YI SYLLABLE TIEX 0xA11F tie # YI SYLLABLE TIE 0xA120 tiep # YI SYLLABLE TIEP 0xA121 tat # YI SYLLABLE TAT 0xA122 tax # YI SYLLABLE TAX 0xA123 ta # YI SYLLABLE TA 0xA124 tap # YI SYLLABLE TAP 0xA125 tuot # YI SYLLABLE TUOT 0xA126 tuox # YI SYLLABLE TUOX 0xA127 tuo # YI SYLLABLE TUO 0xA128 tuop # YI SYLLABLE TUOP 0xA129 tot # YI SYLLABLE TOT 0xA12A tox # YI SYLLABLE TOX 0xA12B to # YI SYLLABLE TO 0xA12C top # YI SYLLABLE TOP 0xA12D tex # YI SYLLABLE TEX 0xA12E te # YI SYLLABLE TE 0xA12F tep # YI SYLLABLE TEP 0xA130 tut # YI SYLLABLE TUT 0xA131 tux # YI SYLLABLE TUX 0xA132 tu # YI SYLLABLE TU 0xA133 tup # YI SYLLABLE TUP 0xA134 turx # YI SYLLABLE TURX 0xA135 tur # YI SYLLABLE TUR 0xA136 ddit # YI SYLLABLE DDIT 0xA137 ddix # YI SYLLABLE DDIX 0xA138 ddi # YI SYLLABLE DDI 0xA139 ddip # YI SYLLABLE DDIP 0xA13A ddiex # YI SYLLABLE DDIEX 0xA13B ddie # YI SYLLABLE DDIE 0xA13C ddiep # YI SYLLABLE DDIEP 0xA13D ddat # YI SYLLABLE DDAT 0xA13E ddax # YI SYLLABLE DDAX 0xA13F dda # YI SYLLABLE DDA 0xA140 ddap # YI SYLLABLE DDAP 0xA141 dduox # YI SYLLABLE DDUOX 0xA142 dduo # YI SYLLABLE DDUO 0xA143 dduop # YI SYLLABLE DDUOP 0xA144 ddot # YI SYLLABLE DDOT 0xA145 ddox # YI SYLLABLE DDOX 0xA146 ddo # YI SYLLABLE DDO 0xA147 ddop # YI SYLLABLE DDOP 0xA148 ddex # YI SYLLABLE DDEX 0xA149 dde # YI SYLLABLE DDE 0xA14A ddep # YI SYLLABLE DDEP 0xA14B ddut # YI SYLLABLE DDUT 0xA14C ddux # YI SYLLABLE DDUX 0xA14D ddu # YI SYLLABLE DDU 0xA14E ddup # YI SYLLABLE DDUP 0xA14F ddurx # YI SYLLABLE DDURX 0xA150 ddur # YI SYLLABLE DDUR 0xA151 ndit # YI SYLLABLE NDIT 0xA152 ndix # YI SYLLABLE NDIX 0xA153 ndi # YI SYLLABLE NDI 0xA154 ndip # YI SYLLABLE NDIP 0xA155 ndiex # YI SYLLABLE NDIEX 0xA156 ndie # YI SYLLABLE NDIE 0xA157 ndat # YI SYLLABLE NDAT 0xA158 ndax # YI SYLLABLE NDAX 0xA159 nda # YI SYLLABLE NDA 0xA15A ndap # YI SYLLABLE NDAP 0xA15B ndot # YI SYLLABLE NDOT 0xA15C ndox # YI SYLLABLE NDOX 0xA15D ndo # YI SYLLABLE NDO 0xA15E ndop # YI SYLLABLE NDOP 0xA15F ndex # YI SYLLABLE NDEX 0xA160 nde # YI SYLLABLE NDE 0xA161 ndep # YI SYLLABLE NDEP 0xA162 ndut # YI SYLLABLE NDUT 0xA163 ndux # YI SYLLABLE NDUX 0xA164 ndu # YI SYLLABLE NDU 0xA165 ndup # YI SYLLABLE NDUP 0xA166 ndurx # YI SYLLABLE NDURX 0xA167 ndur # YI SYLLABLE NDUR 0xA168 hnit # YI SYLLABLE HNIT 0xA169 hnix # YI SYLLABLE HNIX 0xA16A hni # YI SYLLABLE HNI 0xA16B hnip # YI SYLLABLE HNIP 0xA16C hniet # YI SYLLABLE HNIET 0xA16D hniex # YI SYLLABLE HNIEX 0xA16E hnie # YI SYLLABLE HNIE 0xA16F hniep # YI SYLLABLE HNIEP 0xA170 hnat # YI SYLLABLE HNAT 0xA171 hnax # YI SYLLABLE HNAX 0xA172 hna # YI SYLLABLE HNA 0xA173 hnap # YI SYLLABLE HNAP 0xA174 hnuox # YI SYLLABLE HNUOX 0xA175 hnuo # YI SYLLABLE HNUO 0xA176 hnot # YI SYLLABLE HNOT 0xA177 hnox # YI SYLLABLE HNOX 0xA178 hnop # YI SYLLABLE HNOP 0xA179 hnex # YI SYLLABLE HNEX 0xA17A hne # YI SYLLABLE HNE 0xA17B hnep # YI SYLLABLE HNEP 0xA17C hnut # YI SYLLABLE HNUT 0xA17D nit # YI SYLLABLE NIT 0xA17E nix # YI SYLLABLE NIX 0xA17F ni # YI SYLLABLE NI 0xA180 nip # YI SYLLABLE NIP 0xA181 niex # YI SYLLABLE NIEX 0xA182 nie # YI SYLLABLE NIE 0xA183 niep # YI SYLLABLE NIEP 0xA184 nax # YI SYLLABLE NAX 0xA185 na # YI SYLLABLE NA 0xA186 nap # YI SYLLABLE NAP 0xA187 nuox # YI SYLLABLE NUOX 0xA188 nuo # YI SYLLABLE NUO 0xA189 nuop # YI SYLLABLE NUOP 0xA18A not # YI SYLLABLE NOT 0xA18B nox # YI SYLLABLE NOX 0xA18C no # YI SYLLABLE NO 0xA18D nop # YI SYLLABLE NOP 0xA18E nex # YI SYLLABLE NEX 0xA18F ne # YI SYLLABLE NE 0xA190 nep # YI SYLLABLE NEP 0xA191 nut # YI SYLLABLE NUT 0xA192 nux # YI SYLLABLE NUX 0xA193 nu # YI SYLLABLE NU 0xA194 nup # YI SYLLABLE NUP 0xA195 nurx # YI SYLLABLE NURX 0xA196 nur # YI SYLLABLE NUR 0xA197 hlit # YI SYLLABLE HLIT 0xA198 hlix # YI SYLLABLE HLIX 0xA199 hli # YI SYLLABLE HLI 0xA19A hlip # YI SYLLABLE HLIP 0xA19B hliex # YI SYLLABLE HLIEX 0xA19C hlie # YI SYLLABLE HLIE 0xA19D hliep # YI SYLLABLE HLIEP 0xA19E hlat # YI SYLLABLE HLAT 0xA19F hlax # YI SYLLABLE HLAX 0xA1A0 hla # YI SYLLABLE HLA 0xA1A1 hlap # YI SYLLABLE HLAP 0xA1A2 hluox # YI SYLLABLE HLUOX 0xA1A3 hluo # YI SYLLABLE HLUO 0xA1A4 hluop # YI SYLLABLE HLUOP 0xA1A5 hlox # YI SYLLABLE HLOX 0xA1A6 hlo # YI SYLLABLE HLO 0xA1A7 hlop # YI SYLLABLE HLOP 0xA1A8 hlex # YI SYLLABLE HLEX 0xA1A9 hle # YI SYLLABLE HLE 0xA1AA hlep # YI SYLLABLE HLEP 0xA1AB hlut # YI SYLLABLE HLUT 0xA1AC hlux # YI SYLLABLE HLUX 0xA1AD hlu # YI SYLLABLE HLU 0xA1AE hlup # YI SYLLABLE HLUP 0xA1AF hlurx # YI SYLLABLE HLURX 0xA1B0 hlur # YI SYLLABLE HLUR 0xA1B1 hlyt # YI SYLLABLE HLYT 0xA1B2 hlyx # YI SYLLABLE HLYX 0xA1B3 hly # YI SYLLABLE HLY 0xA1B4 hlyp # YI SYLLABLE HLYP 0xA1B5 hlyrx # YI SYLLABLE HLYRX 0xA1B6 hlyr # YI SYLLABLE HLYR 0xA1B7 lit # YI SYLLABLE LIT 0xA1B8 lix # YI SYLLABLE LIX 0xA1B9 li # YI SYLLABLE LI 0xA1BA lip # YI SYLLABLE LIP 0xA1BB liet # YI SYLLABLE LIET 0xA1BC liex # YI SYLLABLE LIEX 0xA1BD lie # YI SYLLABLE LIE 0xA1BE liep # YI SYLLABLE LIEP 0xA1BF lat # YI SYLLABLE LAT 0xA1C0 lax # YI SYLLABLE LAX 0xA1C1 la # YI SYLLABLE LA 0xA1C2 lap # YI SYLLABLE LAP 0xA1C3 luot # YI SYLLABLE LUOT 0xA1C4 luox # YI SYLLABLE LUOX 0xA1C5 luo # YI SYLLABLE LUO 0xA1C6 luop # YI SYLLABLE LUOP 0xA1C7 lot # YI SYLLABLE LOT 0xA1C8 lox # YI SYLLABLE LOX 0xA1C9 lo # YI SYLLABLE LO 0xA1CA lop # YI SYLLABLE LOP 0xA1CB lex # YI SYLLABLE LEX 0xA1CC le # YI SYLLABLE LE 0xA1CD lep # YI SYLLABLE LEP 0xA1CE lut # YI SYLLABLE LUT 0xA1CF lux # YI SYLLABLE LUX 0xA1D0 lu # YI SYLLABLE LU 0xA1D1 lup # YI SYLLABLE LUP 0xA1D2 lurx # YI SYLLABLE LURX 0xA1D3 lur # YI SYLLABLE LUR 0xA1D4 lyt # YI SYLLABLE LYT 0xA1D5 lyx # YI SYLLABLE LYX 0xA1D6 ly # YI SYLLABLE LY 0xA1D7 lyp # YI SYLLABLE LYP 0xA1D8 lyrx # YI SYLLABLE LYRX 0xA1D9 lyr # YI SYLLABLE LYR 0xA1DA git # YI SYLLABLE GIT 0xA1DB gix # YI SYLLABLE GIX 0xA1DC gi # YI SYLLABLE GI 0xA1DD gip # YI SYLLABLE GIP 0xA1DE giet # YI SYLLABLE GIET 0xA1DF giex # YI SYLLABLE GIEX 0xA1E0 gie # YI SYLLABLE GIE 0xA1E1 giep # YI SYLLABLE GIEP 0xA1E2 gat # YI SYLLABLE GAT 0xA1E3 gax # YI SYLLABLE GAX 0xA1E4 ga # YI SYLLABLE GA 0xA1E5 gap # YI SYLLABLE GAP 0xA1E6 guot # YI SYLLABLE GUOT 0xA1E7 guox # YI SYLLABLE GUOX 0xA1E8 guo # YI SYLLABLE GUO 0xA1E9 guop # YI SYLLABLE GUOP 0xA1EA got # YI SYLLABLE GOT 0xA1EB gox # YI SYLLABLE GOX 0xA1EC go # YI SYLLABLE GO 0xA1ED gop # YI SYLLABLE GOP 0xA1EE get # YI SYLLABLE GET 0xA1EF gex # YI SYLLABLE GEX 0xA1F0 ge # YI SYLLABLE GE 0xA1F1 gep # YI SYLLABLE GEP 0xA1F2 gut # YI SYLLABLE GUT 0xA1F3 gux # YI SYLLABLE GUX 0xA1F4 gu # YI SYLLABLE GU 0xA1F5 gup # YI SYLLABLE GUP 0xA1F6 gurx # YI SYLLABLE GURX 0xA1F7 gur # YI SYLLABLE GUR 0xA1F8 kit # YI SYLLABLE KIT 0xA1F9 kix # YI SYLLABLE KIX 0xA1FA ki # YI SYLLABLE KI 0xA1FB kip # YI SYLLABLE KIP 0xA1FC kiex # YI SYLLABLE KIEX 0xA1FD kie # YI SYLLABLE KIE 0xA1FE kiep # YI SYLLABLE KIEP 0xA1FF kat # YI SYLLABLE KAT # # Characters 0xA200 to 0xA2FF # 0xA200 kax # YI SYLLABLE KAX 0xA201 ka # YI SYLLABLE KA 0xA202 kap # YI SYLLABLE KAP 0xA203 kuox # YI SYLLABLE KUOX 0xA204 kuo # YI SYLLABLE KUO 0xA205 kuop # YI SYLLABLE KUOP 0xA206 kot # YI SYLLABLE KOT 0xA207 kox # YI SYLLABLE KOX 0xA208 ko # YI SYLLABLE KO 0xA209 kop # YI SYLLABLE KOP 0xA20A ket # YI SYLLABLE KET 0xA20B kex # YI SYLLABLE KEX 0xA20C ke # YI SYLLABLE KE 0xA20D kep # YI SYLLABLE KEP 0xA20E kut # YI SYLLABLE KUT 0xA20F kux # YI SYLLABLE KUX 0xA210 ku # YI SYLLABLE KU 0xA211 kup # YI SYLLABLE KUP 0xA212 kurx # YI SYLLABLE KURX 0xA213 kur # YI SYLLABLE KUR 0xA214 ggit # YI SYLLABLE GGIT 0xA215 ggix # YI SYLLABLE GGIX 0xA216 ggi # YI SYLLABLE GGI 0xA217 ggiex # YI SYLLABLE GGIEX 0xA218 ggie # YI SYLLABLE GGIE 0xA219 ggiep # YI SYLLABLE GGIEP 0xA21A ggat # YI SYLLABLE GGAT 0xA21B ggax # YI SYLLABLE GGAX 0xA21C gga # YI SYLLABLE GGA 0xA21D ggap # YI SYLLABLE GGAP 0xA21E gguot # YI SYLLABLE GGUOT 0xA21F gguox # YI SYLLABLE GGUOX 0xA220 gguo # YI SYLLABLE GGUO 0xA221 gguop # YI SYLLABLE GGUOP 0xA222 ggot # YI SYLLABLE GGOT 0xA223 ggox # YI SYLLABLE GGOX 0xA224 ggo # YI SYLLABLE GGO 0xA225 ggop # YI SYLLABLE GGOP 0xA226 gget # YI SYLLABLE GGET 0xA227 ggex # YI SYLLABLE GGEX 0xA228 gge # YI SYLLABLE GGE 0xA229 ggep # YI SYLLABLE GGEP 0xA22A ggut # YI SYLLABLE GGUT 0xA22B ggux # YI SYLLABLE GGUX 0xA22C ggu # YI SYLLABLE GGU 0xA22D ggup # YI SYLLABLE GGUP 0xA22E ggurx # YI SYLLABLE GGURX 0xA22F ggur # YI SYLLABLE GGUR 0xA230 mgiex # YI SYLLABLE MGIEX 0xA231 mgie # YI SYLLABLE MGIE 0xA232 mgat # YI SYLLABLE MGAT 0xA233 mgax # YI SYLLABLE MGAX 0xA234 mga # YI SYLLABLE MGA 0xA235 mgap # YI SYLLABLE MGAP 0xA236 mguox # YI SYLLABLE MGUOX 0xA237 mguo # YI SYLLABLE MGUO 0xA238 mguop # YI SYLLABLE MGUOP 0xA239 mgot # YI SYLLABLE MGOT 0xA23A mgox # YI SYLLABLE MGOX 0xA23B mgo # YI SYLLABLE MGO 0xA23C mgop # YI SYLLABLE MGOP 0xA23D mgex # YI SYLLABLE MGEX 0xA23E mge # YI SYLLABLE MGE 0xA23F mgep # YI SYLLABLE MGEP 0xA240 mgut # YI SYLLABLE MGUT 0xA241 mgux # YI SYLLABLE MGUX 0xA242 mgu # YI SYLLABLE MGU 0xA243 mgup # YI SYLLABLE MGUP 0xA244 mgurx # YI SYLLABLE MGURX 0xA245 mgur # YI SYLLABLE MGUR 0xA246 hxit # YI SYLLABLE HXIT 0xA247 hxix # YI SYLLABLE HXIX 0xA248 hxi # YI SYLLABLE HXI 0xA249 hxip # YI SYLLABLE HXIP 0xA24A hxiet # YI SYLLABLE HXIET 0xA24B hxiex # YI SYLLABLE HXIEX 0xA24C hxie # YI SYLLABLE HXIE 0xA24D hxiep # YI SYLLABLE HXIEP 0xA24E hxat # YI SYLLABLE HXAT 0xA24F hxax # YI SYLLABLE HXAX 0xA250 hxa # YI SYLLABLE HXA 0xA251 hxap # YI SYLLABLE HXAP 0xA252 hxuot # YI SYLLABLE HXUOT 0xA253 hxuox # YI SYLLABLE HXUOX 0xA254 hxuo # YI SYLLABLE HXUO 0xA255 hxuop # YI SYLLABLE HXUOP 0xA256 hxot # YI SYLLABLE HXOT 0xA257 hxox # YI SYLLABLE HXOX 0xA258 hxo # YI SYLLABLE HXO 0xA259 hxop # YI SYLLABLE HXOP 0xA25A hxex # YI SYLLABLE HXEX 0xA25B hxe # YI SYLLABLE HXE 0xA25C hxep # YI SYLLABLE HXEP 0xA25D ngiex # YI SYLLABLE NGIEX 0xA25E ngie # YI SYLLABLE NGIE 0xA25F ngiep # YI SYLLABLE NGIEP 0xA260 ngat # YI SYLLABLE NGAT 0xA261 ngax # YI SYLLABLE NGAX 0xA262 nga # YI SYLLABLE NGA 0xA263 ngap # YI SYLLABLE NGAP 0xA264 nguot # YI SYLLABLE NGUOT 0xA265 nguox # YI SYLLABLE NGUOX 0xA266 nguo # YI SYLLABLE NGUO 0xA267 ngot # YI SYLLABLE NGOT 0xA268 ngox # YI SYLLABLE NGOX 0xA269 ngo # YI SYLLABLE NGO 0xA26A ngop # YI SYLLABLE NGOP 0xA26B ngex # YI SYLLABLE NGEX 0xA26C nge # YI SYLLABLE NGE 0xA26D ngep # YI SYLLABLE NGEP 0xA26E hit # YI SYLLABLE HIT 0xA26F hiex # YI SYLLABLE HIEX 0xA270 hie # YI SYLLABLE HIE 0xA271 hat # YI SYLLABLE HAT 0xA272 hax # YI SYLLABLE HAX 0xA273 ha # YI SYLLABLE HA 0xA274 hap # YI SYLLABLE HAP 0xA275 huot # YI SYLLABLE HUOT 0xA276 huox # YI SYLLABLE HUOX 0xA277 huo # YI SYLLABLE HUO 0xA278 huop # YI SYLLABLE HUOP 0xA279 hot # YI SYLLABLE HOT 0xA27A hox # YI SYLLABLE HOX 0xA27B ho # YI SYLLABLE HO 0xA27C hop # YI SYLLABLE HOP 0xA27D hex # YI SYLLABLE HEX 0xA27E he # YI SYLLABLE HE 0xA27F hep # YI SYLLABLE HEP 0xA280 wat # YI SYLLABLE WAT 0xA281 wax # YI SYLLABLE WAX 0xA282 wa # YI SYLLABLE WA 0xA283 wap # YI SYLLABLE WAP 0xA284 wuox # YI SYLLABLE WUOX 0xA285 wuo # YI SYLLABLE WUO 0xA286 wuop # YI SYLLABLE WUOP 0xA287 wox # YI SYLLABLE WOX 0xA288 wo # YI SYLLABLE WO 0xA289 wop # YI SYLLABLE WOP 0xA28A wex # YI SYLLABLE WEX 0xA28B we # YI SYLLABLE WE 0xA28C wep # YI SYLLABLE WEP 0xA28D zit # YI SYLLABLE ZIT 0xA28E zix # YI SYLLABLE ZIX 0xA28F zi # YI SYLLABLE ZI 0xA290 zip # YI SYLLABLE ZIP 0xA291 ziex # YI SYLLABLE ZIEX 0xA292 zie # YI SYLLABLE ZIE 0xA293 ziep # YI SYLLABLE ZIEP 0xA294 zat # YI SYLLABLE ZAT 0xA295 zax # YI SYLLABLE ZAX 0xA296 za # YI SYLLABLE ZA 0xA297 zap # YI SYLLABLE ZAP 0xA298 zuox # YI SYLLABLE ZUOX 0xA299 zuo # YI SYLLABLE ZUO 0xA29A zuop # YI SYLLABLE ZUOP 0xA29B zot # YI SYLLABLE ZOT 0xA29C zox # YI SYLLABLE ZOX 0xA29D zo # YI SYLLABLE ZO 0xA29E zop # YI SYLLABLE ZOP 0xA29F zex # YI SYLLABLE ZEX 0xA2A0 ze # YI SYLLABLE ZE 0xA2A1 zep # YI SYLLABLE ZEP 0xA2A2 zut # YI SYLLABLE ZUT 0xA2A3 zux # YI SYLLABLE ZUX 0xA2A4 zu # YI SYLLABLE ZU 0xA2A5 zup # YI SYLLABLE ZUP 0xA2A6 zurx # YI SYLLABLE ZURX 0xA2A7 zur # YI SYLLABLE ZUR 0xA2A8 zyt # YI SYLLABLE ZYT 0xA2A9 zyx # YI SYLLABLE ZYX 0xA2AA zy # YI SYLLABLE ZY 0xA2AB zyp # YI SYLLABLE ZYP 0xA2AC zyrx # YI SYLLABLE ZYRX 0xA2AD zyr # YI SYLLABLE ZYR 0xA2AE cit # YI SYLLABLE CIT 0xA2AF cix # YI SYLLABLE CIX 0xA2B0 ci # YI SYLLABLE CI 0xA2B1 cip # YI SYLLABLE CIP 0xA2B2 ciet # YI SYLLABLE CIET 0xA2B3 ciex # YI SYLLABLE CIEX 0xA2B4 cie # YI SYLLABLE CIE 0xA2B5 ciep # YI SYLLABLE CIEP 0xA2B6 cat # YI SYLLABLE CAT 0xA2B7 cax # YI SYLLABLE CAX 0xA2B8 ca # YI SYLLABLE CA 0xA2B9 cap # YI SYLLABLE CAP 0xA2BA cuox # YI SYLLABLE CUOX 0xA2BB cuo # YI SYLLABLE CUO 0xA2BC cuop # YI SYLLABLE CUOP 0xA2BD cot # YI SYLLABLE COT 0xA2BE cox # YI SYLLABLE COX 0xA2BF co # YI SYLLABLE CO 0xA2C0 cop # YI SYLLABLE COP 0xA2C1 cex # YI SYLLABLE CEX 0xA2C2 ce # YI SYLLABLE CE 0xA2C3 cep # YI SYLLABLE CEP 0xA2C4 cut # YI SYLLABLE CUT 0xA2C5 cux # YI SYLLABLE CUX 0xA2C6 cu # YI SYLLABLE CU 0xA2C7 cup # YI SYLLABLE CUP 0xA2C8 curx # YI SYLLABLE CURX 0xA2C9 cur # YI SYLLABLE CUR 0xA2CA cyt # YI SYLLABLE CYT 0xA2CB cyx # YI SYLLABLE CYX 0xA2CC cy # YI SYLLABLE CY 0xA2CD cyp # YI SYLLABLE CYP 0xA2CE cyrx # YI SYLLABLE CYRX 0xA2CF cyr # YI SYLLABLE CYR 0xA2D0 zzit # YI SYLLABLE ZZIT 0xA2D1 zzix # YI SYLLABLE ZZIX 0xA2D2 zzi # YI SYLLABLE ZZI 0xA2D3 zzip # YI SYLLABLE ZZIP 0xA2D4 zziet # YI SYLLABLE ZZIET 0xA2D5 zziex # YI SYLLABLE ZZIEX 0xA2D6 zzie # YI SYLLABLE ZZIE 0xA2D7 zziep # YI SYLLABLE ZZIEP 0xA2D8 zzat # YI SYLLABLE ZZAT 0xA2D9 zzax # YI SYLLABLE ZZAX 0xA2DA zza # YI SYLLABLE ZZA 0xA2DB zzap # YI SYLLABLE ZZAP 0xA2DC zzox # YI SYLLABLE ZZOX 0xA2DD zzo # YI SYLLABLE ZZO 0xA2DE zzop # YI SYLLABLE ZZOP 0xA2DF zzex # YI SYLLABLE ZZEX 0xA2E0 zze # YI SYLLABLE ZZE 0xA2E1 zzep # YI SYLLABLE ZZEP 0xA2E2 zzux # YI SYLLABLE ZZUX 0xA2E3 zzu # YI SYLLABLE ZZU 0xA2E4 zzup # YI SYLLABLE ZZUP 0xA2E5 zzurx # YI SYLLABLE ZZURX 0xA2E6 zzur # YI SYLLABLE ZZUR 0xA2E7 zzyt # YI SYLLABLE ZZYT 0xA2E8 zzyx # YI SYLLABLE ZZYX 0xA2E9 zzy # YI SYLLABLE ZZY 0xA2EA zzyp # YI SYLLABLE ZZYP 0xA2EB zzyrx # YI SYLLABLE ZZYRX 0xA2EC zzyr # YI SYLLABLE ZZYR 0xA2ED nzit # YI SYLLABLE NZIT 0xA2EE nzix # YI SYLLABLE NZIX 0xA2EF nzi # YI SYLLABLE NZI 0xA2F0 nzip # YI SYLLABLE NZIP 0xA2F1 nziex # YI SYLLABLE NZIEX 0xA2F2 nzie # YI SYLLABLE NZIE 0xA2F3 nziep # YI SYLLABLE NZIEP 0xA2F4 nzat # YI SYLLABLE NZAT 0xA2F5 nzax # YI SYLLABLE NZAX 0xA2F6 nza # YI SYLLABLE NZA 0xA2F7 nzap # YI SYLLABLE NZAP 0xA2F8 nzuox # YI SYLLABLE NZUOX 0xA2F9 nzuo # YI SYLLABLE NZUO 0xA2FA nzox # YI SYLLABLE NZOX 0xA2FB nzop # YI SYLLABLE NZOP 0xA2FC nzex # YI SYLLABLE NZEX 0xA2FD nze # YI SYLLABLE NZE 0xA2FE nzux # YI SYLLABLE NZUX 0xA2FF nzu # YI SYLLABLE NZU # # Characters 0xA300 to 0xA3FF # 0xA300 nzup # YI SYLLABLE NZUP 0xA301 nzurx # YI SYLLABLE NZURX 0xA302 nzur # YI SYLLABLE NZUR 0xA303 nzyt # YI SYLLABLE NZYT 0xA304 nzyx # YI SYLLABLE NZYX 0xA305 nzy # YI SYLLABLE NZY 0xA306 nzyp # YI SYLLABLE NZYP 0xA307 nzyrx # YI SYLLABLE NZYRX 0xA308 nzyr # YI SYLLABLE NZYR 0xA309 sit # YI SYLLABLE SIT 0xA30A six # YI SYLLABLE SIX 0xA30B si # YI SYLLABLE SI 0xA30C sip # YI SYLLABLE SIP 0xA30D siex # YI SYLLABLE SIEX 0xA30E sie # YI SYLLABLE SIE 0xA30F siep # YI SYLLABLE SIEP 0xA310 sat # YI SYLLABLE SAT 0xA311 sax # YI SYLLABLE SAX 0xA312 sa # YI SYLLABLE SA 0xA313 sap # YI SYLLABLE SAP 0xA314 suox # YI SYLLABLE SUOX 0xA315 suo # YI SYLLABLE SUO 0xA316 suop # YI SYLLABLE SUOP 0xA317 sot # YI SYLLABLE SOT 0xA318 sox # YI SYLLABLE SOX 0xA319 so # YI SYLLABLE SO 0xA31A sop # YI SYLLABLE SOP 0xA31B sex # YI SYLLABLE SEX 0xA31C se # YI SYLLABLE SE 0xA31D sep # YI SYLLABLE SEP 0xA31E sut # YI SYLLABLE SUT 0xA31F sux # YI SYLLABLE SUX 0xA320 su # YI SYLLABLE SU 0xA321 sup # YI SYLLABLE SUP 0xA322 surx # YI SYLLABLE SURX 0xA323 sur # YI SYLLABLE SUR 0xA324 syt # YI SYLLABLE SYT 0xA325 syx # YI SYLLABLE SYX 0xA326 sy # YI SYLLABLE SY 0xA327 syp # YI SYLLABLE SYP 0xA328 syrx # YI SYLLABLE SYRX 0xA329 syr # YI SYLLABLE SYR 0xA32A ssit # YI SYLLABLE SSIT 0xA32B ssix # YI SYLLABLE SSIX 0xA32C ssi # YI SYLLABLE SSI 0xA32D ssip # YI SYLLABLE SSIP 0xA32E ssiex # YI SYLLABLE SSIEX 0xA32F ssie # YI SYLLABLE SSIE 0xA330 ssiep # YI SYLLABLE SSIEP 0xA331 ssat # YI SYLLABLE SSAT 0xA332 ssax # YI SYLLABLE SSAX 0xA333 ssa # YI SYLLABLE SSA 0xA334 ssap # YI SYLLABLE SSAP 0xA335 ssot # YI SYLLABLE SSOT 0xA336 ssox # YI SYLLABLE SSOX 0xA337 sso # YI SYLLABLE SSO 0xA338 ssop # YI SYLLABLE SSOP 0xA339 ssex # YI SYLLABLE SSEX 0xA33A sse # YI SYLLABLE SSE 0xA33B ssep # YI SYLLABLE SSEP 0xA33C ssut # YI SYLLABLE SSUT 0xA33D ssux # YI SYLLABLE SSUX 0xA33E ssu # YI SYLLABLE SSU 0xA33F ssup # YI SYLLABLE SSUP 0xA340 ssyt # YI SYLLABLE SSYT 0xA341 ssyx # YI SYLLABLE SSYX 0xA342 ssy # YI SYLLABLE SSY 0xA343 ssyp # YI SYLLABLE SSYP 0xA344 ssyrx # YI SYLLABLE SSYRX 0xA345 ssyr # YI SYLLABLE SSYR 0xA346 zhat # YI SYLLABLE ZHAT 0xA347 zhax # YI SYLLABLE ZHAX 0xA348 zha # YI SYLLABLE ZHA 0xA349 zhap # YI SYLLABLE ZHAP 0xA34A zhuox # YI SYLLABLE ZHUOX 0xA34B zhuo # YI SYLLABLE ZHUO 0xA34C zhuop # YI SYLLABLE ZHUOP 0xA34D zhot # YI SYLLABLE ZHOT 0xA34E zhox # YI SYLLABLE ZHOX 0xA34F zho # YI SYLLABLE ZHO 0xA350 zhop # YI SYLLABLE ZHOP 0xA351 zhet # YI SYLLABLE ZHET 0xA352 zhex # YI SYLLABLE ZHEX 0xA353 zhe # YI SYLLABLE ZHE 0xA354 zhep # YI SYLLABLE ZHEP 0xA355 zhut # YI SYLLABLE ZHUT 0xA356 zhux # YI SYLLABLE ZHUX 0xA357 zhu # YI SYLLABLE ZHU 0xA358 zhup # YI SYLLABLE ZHUP 0xA359 zhurx # YI SYLLABLE ZHURX 0xA35A zhur # YI SYLLABLE ZHUR 0xA35B zhyt # YI SYLLABLE ZHYT 0xA35C zhyx # YI SYLLABLE ZHYX 0xA35D zhy # YI SYLLABLE ZHY 0xA35E zhyp # YI SYLLABLE ZHYP 0xA35F zhyrx # YI SYLLABLE ZHYRX 0xA360 zhyr # YI SYLLABLE ZHYR 0xA361 chat # YI SYLLABLE CHAT 0xA362 chax # YI SYLLABLE CHAX 0xA363 cha # YI SYLLABLE CHA 0xA364 chap # YI SYLLABLE CHAP 0xA365 chuot # YI SYLLABLE CHUOT 0xA366 chuox # YI SYLLABLE CHUOX 0xA367 chuo # YI SYLLABLE CHUO 0xA368 chuop # YI SYLLABLE CHUOP 0xA369 chot # YI SYLLABLE CHOT 0xA36A chox # YI SYLLABLE CHOX 0xA36B cho # YI SYLLABLE CHO 0xA36C chop # YI SYLLABLE CHOP 0xA36D chet # YI SYLLABLE CHET 0xA36E chex # YI SYLLABLE CHEX 0xA36F che # YI SYLLABLE CHE 0xA370 chep # YI SYLLABLE CHEP 0xA371 chux # YI SYLLABLE CHUX 0xA372 chu # YI SYLLABLE CHU 0xA373 chup # YI SYLLABLE CHUP 0xA374 churx # YI SYLLABLE CHURX 0xA375 chur # YI SYLLABLE CHUR 0xA376 chyt # YI SYLLABLE CHYT 0xA377 chyx # YI SYLLABLE CHYX 0xA378 chy # YI SYLLABLE CHY 0xA379 chyp # YI SYLLABLE CHYP 0xA37A chyrx # YI SYLLABLE CHYRX 0xA37B chyr # YI SYLLABLE CHYR 0xA37C rrax # YI SYLLABLE RRAX 0xA37D rra # YI SYLLABLE RRA 0xA37E rruox # YI SYLLABLE RRUOX 0xA37F rruo # YI SYLLABLE RRUO 0xA380 rrot # YI SYLLABLE RROT 0xA381 rrox # YI SYLLABLE RROX 0xA382 rro # YI SYLLABLE RRO 0xA383 rrop # YI SYLLABLE RROP 0xA384 rret # YI SYLLABLE RRET 0xA385 rrex # YI SYLLABLE RREX 0xA386 rre # YI SYLLABLE RRE 0xA387 rrep # YI SYLLABLE RREP 0xA388 rrut # YI SYLLABLE RRUT 0xA389 rrux # YI SYLLABLE RRUX 0xA38A rru # YI SYLLABLE RRU 0xA38B rrup # YI SYLLABLE RRUP 0xA38C rrurx # YI SYLLABLE RRURX 0xA38D rrur # YI SYLLABLE RRUR 0xA38E rryt # YI SYLLABLE RRYT 0xA38F rryx # YI SYLLABLE RRYX 0xA390 rry # YI SYLLABLE RRY 0xA391 rryp # YI SYLLABLE RRYP 0xA392 rryrx # YI SYLLABLE RRYRX 0xA393 rryr # YI SYLLABLE RRYR 0xA394 nrat # YI SYLLABLE NRAT 0xA395 nrax # YI SYLLABLE NRAX 0xA396 nra # YI SYLLABLE NRA 0xA397 nrap # YI SYLLABLE NRAP 0xA398 nrox # YI SYLLABLE NROX 0xA399 nro # YI SYLLABLE NRO 0xA39A nrop # YI SYLLABLE NROP 0xA39B nret # YI SYLLABLE NRET 0xA39C nrex # YI SYLLABLE NREX 0xA39D nre # YI SYLLABLE NRE 0xA39E nrep # YI SYLLABLE NREP 0xA39F nrut # YI SYLLABLE NRUT 0xA3A0 nrux # YI SYLLABLE NRUX 0xA3A1 nru # YI SYLLABLE NRU 0xA3A2 nrup # YI SYLLABLE NRUP 0xA3A3 nrurx # YI SYLLABLE NRURX 0xA3A4 nrur # YI SYLLABLE NRUR 0xA3A5 nryt # YI SYLLABLE NRYT 0xA3A6 nryx # YI SYLLABLE NRYX 0xA3A7 nry # YI SYLLABLE NRY 0xA3A8 nryp # YI SYLLABLE NRYP 0xA3A9 nryrx # YI SYLLABLE NRYRX 0xA3AA nryr # YI SYLLABLE NRYR 0xA3AB shat # YI SYLLABLE SHAT 0xA3AC shax # YI SYLLABLE SHAX 0xA3AD sha # YI SYLLABLE SHA 0xA3AE shap # YI SYLLABLE SHAP 0xA3AF shuox # YI SYLLABLE SHUOX 0xA3B0 shuo # YI SYLLABLE SHUO 0xA3B1 shuop # YI SYLLABLE SHUOP 0xA3B2 shot # YI SYLLABLE SHOT 0xA3B3 shox # YI SYLLABLE SHOX 0xA3B4 sho # YI SYLLABLE SHO 0xA3B5 shop # YI SYLLABLE SHOP 0xA3B6 shet # YI SYLLABLE SHET 0xA3B7 shex # YI SYLLABLE SHEX 0xA3B8 she # YI SYLLABLE SHE 0xA3B9 shep # YI SYLLABLE SHEP 0xA3BA shut # YI SYLLABLE SHUT 0xA3BB shux # YI SYLLABLE SHUX 0xA3BC shu # YI SYLLABLE SHU 0xA3BD shup # YI SYLLABLE SHUP 0xA3BE shurx # YI SYLLABLE SHURX 0xA3BF shur # YI SYLLABLE SHUR 0xA3C0 shyt # YI SYLLABLE SHYT 0xA3C1 shyx # YI SYLLABLE SHYX 0xA3C2 shy # YI SYLLABLE SHY 0xA3C3 shyp # YI SYLLABLE SHYP 0xA3C4 shyrx # YI SYLLABLE SHYRX 0xA3C5 shyr # YI SYLLABLE SHYR 0xA3C6 rat # YI SYLLABLE RAT 0xA3C7 rax # YI SYLLABLE RAX 0xA3C8 ra # YI SYLLABLE RA 0xA3C9 rap # YI SYLLABLE RAP 0xA3CA ruox # YI SYLLABLE RUOX 0xA3CB ruo # YI SYLLABLE RUO 0xA3CC ruop # YI SYLLABLE RUOP 0xA3CD rot # YI SYLLABLE ROT 0xA3CE rox # YI SYLLABLE ROX 0xA3CF ro # YI SYLLABLE RO 0xA3D0 rop # YI SYLLABLE ROP 0xA3D1 rex # YI SYLLABLE REX 0xA3D2 re # YI SYLLABLE RE 0xA3D3 rep # YI SYLLABLE REP 0xA3D4 rut # YI SYLLABLE RUT 0xA3D5 rux # YI SYLLABLE RUX 0xA3D6 ru # YI SYLLABLE RU 0xA3D7 rup # YI SYLLABLE RUP 0xA3D8 rurx # YI SYLLABLE RURX 0xA3D9 rur # YI SYLLABLE RUR 0xA3DA ryt # YI SYLLABLE RYT 0xA3DB ryx # YI SYLLABLE RYX 0xA3DC ry # YI SYLLABLE RY 0xA3DD ryp # YI SYLLABLE RYP 0xA3DE ryrx # YI SYLLABLE RYRX 0xA3DF ryr # YI SYLLABLE RYR 0xA3E0 jit # YI SYLLABLE JIT 0xA3E1 jix # YI SYLLABLE JIX 0xA3E2 ji # YI SYLLABLE JI 0xA3E3 jip # YI SYLLABLE JIP 0xA3E4 jiet # YI SYLLABLE JIET 0xA3E5 jiex # YI SYLLABLE JIEX 0xA3E6 jie # YI SYLLABLE JIE 0xA3E7 jiep # YI SYLLABLE JIEP 0xA3E8 juot # YI SYLLABLE JUOT 0xA3E9 juox # YI SYLLABLE JUOX 0xA3EA juo # YI SYLLABLE JUO 0xA3EB juop # YI SYLLABLE JUOP 0xA3EC jot # YI SYLLABLE JOT 0xA3ED jox # YI SYLLABLE JOX 0xA3EE jo # YI SYLLABLE JO 0xA3EF jop # YI SYLLABLE JOP 0xA3F0 jut # YI SYLLABLE JUT 0xA3F1 jux # YI SYLLABLE JUX 0xA3F2 ju # YI SYLLABLE JU 0xA3F3 jup # YI SYLLABLE JUP 0xA3F4 jurx # YI SYLLABLE JURX 0xA3F5 jur # YI SYLLABLE JUR 0xA3F6 jyt # YI SYLLABLE JYT 0xA3F7 jyx # YI SYLLABLE JYX 0xA3F8 jy # YI SYLLABLE JY 0xA3F9 jyp # YI SYLLABLE JYP 0xA3FA jyrx # YI SYLLABLE JYRX 0xA3FB jyr # YI SYLLABLE JYR 0xA3FC qit # YI SYLLABLE QIT 0xA3FD qix # YI SYLLABLE QIX 0xA3FE qi # YI SYLLABLE QI 0xA3FF qip # YI SYLLABLE QIP # # Characters 0xA400 to 0xA4FF # 0xA400 qiet # YI SYLLABLE QIET 0xA401 qiex # YI SYLLABLE QIEX 0xA402 qie # YI SYLLABLE QIE 0xA403 qiep # YI SYLLABLE QIEP 0xA404 quot # YI SYLLABLE QUOT 0xA405 quox # YI SYLLABLE QUOX 0xA406 quo # YI SYLLABLE QUO 0xA407 quop # YI SYLLABLE QUOP 0xA408 qot # YI SYLLABLE QOT 0xA409 qox # YI SYLLABLE QOX 0xA40A qo # YI SYLLABLE QO 0xA40B qop # YI SYLLABLE QOP 0xA40C qut # YI SYLLABLE QUT 0xA40D qux # YI SYLLABLE QUX 0xA40E qu # YI SYLLABLE QU 0xA40F qup # YI SYLLABLE QUP 0xA410 qurx # YI SYLLABLE QURX 0xA411 qur # YI SYLLABLE QUR 0xA412 qyt # YI SYLLABLE QYT 0xA413 qyx # YI SYLLABLE QYX 0xA414 qy # YI SYLLABLE QY 0xA415 qyp # YI SYLLABLE QYP 0xA416 qyrx # YI SYLLABLE QYRX 0xA417 qyr # YI SYLLABLE QYR 0xA418 jjit # YI SYLLABLE JJIT 0xA419 jjix # YI SYLLABLE JJIX 0xA41A jji # YI SYLLABLE JJI 0xA41B jjip # YI SYLLABLE JJIP 0xA41C jjiet # YI SYLLABLE JJIET 0xA41D jjiex # YI SYLLABLE JJIEX 0xA41E jjie # YI SYLLABLE JJIE 0xA41F jjiep # YI SYLLABLE JJIEP 0xA420 jjuox # YI SYLLABLE JJUOX 0xA421 jjuo # YI SYLLABLE JJUO 0xA422 jjuop # YI SYLLABLE JJUOP 0xA423 jjot # YI SYLLABLE JJOT 0xA424 jjox # YI SYLLABLE JJOX 0xA425 jjo # YI SYLLABLE JJO 0xA426 jjop # YI SYLLABLE JJOP 0xA427 jjut # YI SYLLABLE JJUT 0xA428 jjux # YI SYLLABLE JJUX 0xA429 jju # YI SYLLABLE JJU 0xA42A jjup # YI SYLLABLE JJUP 0xA42B jjurx # YI SYLLABLE JJURX 0xA42C jjur # YI SYLLABLE JJUR 0xA42D jjyt # YI SYLLABLE JJYT 0xA42E jjyx # YI SYLLABLE JJYX 0xA42F jjy # YI SYLLABLE JJY 0xA430 jjyp # YI SYLLABLE JJYP 0xA431 njit # YI SYLLABLE NJIT 0xA432 njix # YI SYLLABLE NJIX 0xA433 nji # YI SYLLABLE NJI 0xA434 njip # YI SYLLABLE NJIP 0xA435 njiet # YI SYLLABLE NJIET 0xA436 njiex # YI SYLLABLE NJIEX 0xA437 njie # YI SYLLABLE NJIE 0xA438 njiep # YI SYLLABLE NJIEP 0xA439 njuox # YI SYLLABLE NJUOX 0xA43A njuo # YI SYLLABLE NJUO 0xA43B njot # YI SYLLABLE NJOT 0xA43C njox # YI SYLLABLE NJOX 0xA43D njo # YI SYLLABLE NJO 0xA43E njop # YI SYLLABLE NJOP 0xA43F njux # YI SYLLABLE NJUX 0xA440 nju # YI SYLLABLE NJU 0xA441 njup # YI SYLLABLE NJUP 0xA442 njurx # YI SYLLABLE NJURX 0xA443 njur # YI SYLLABLE NJUR 0xA444 njyt # YI SYLLABLE NJYT 0xA445 njyx # YI SYLLABLE NJYX 0xA446 njy # YI SYLLABLE NJY 0xA447 njyp # YI SYLLABLE NJYP 0xA448 njyrx # YI SYLLABLE NJYRX 0xA449 njyr # YI SYLLABLE NJYR 0xA44A nyit # YI SYLLABLE NYIT 0xA44B nyix # YI SYLLABLE NYIX 0xA44C nyi # YI SYLLABLE NYI 0xA44D nyip # YI SYLLABLE NYIP 0xA44E nyiet # YI SYLLABLE NYIET 0xA44F nyiex # YI SYLLABLE NYIEX 0xA450 nyie # YI SYLLABLE NYIE 0xA451 nyiep # YI SYLLABLE NYIEP 0xA452 nyuox # YI SYLLABLE NYUOX 0xA453 nyuo # YI SYLLABLE NYUO 0xA454 nyuop # YI SYLLABLE NYUOP 0xA455 nyot # YI SYLLABLE NYOT 0xA456 nyox # YI SYLLABLE NYOX 0xA457 nyo # YI SYLLABLE NYO 0xA458 nyop # YI SYLLABLE NYOP 0xA459 nyut # YI SYLLABLE NYUT 0xA45A nyux # YI SYLLABLE NYUX 0xA45B nyu # YI SYLLABLE NYU 0xA45C nyup # YI SYLLABLE NYUP 0xA45D xit # YI SYLLABLE XIT 0xA45E xix # YI SYLLABLE XIX 0xA45F xi # YI SYLLABLE XI 0xA460 xip # YI SYLLABLE XIP 0xA461 xiet # YI SYLLABLE XIET 0xA462 xiex # YI SYLLABLE XIEX 0xA463 xie # YI SYLLABLE XIE 0xA464 xiep # YI SYLLABLE XIEP 0xA465 xuox # YI SYLLABLE XUOX 0xA466 xuo # YI SYLLABLE XUO 0xA467 xot # YI SYLLABLE XOT 0xA468 xox # YI SYLLABLE XOX 0xA469 xo # YI SYLLABLE XO 0xA46A xop # YI SYLLABLE XOP 0xA46B xyt # YI SYLLABLE XYT 0xA46C xyx # YI SYLLABLE XYX 0xA46D xy # YI SYLLABLE XY 0xA46E xyp # YI SYLLABLE XYP 0xA46F xyrx # YI SYLLABLE XYRX 0xA470 xyr # YI SYLLABLE XYR 0xA471 yit # YI SYLLABLE YIT 0xA472 yix # YI SYLLABLE YIX 0xA473 yi # YI SYLLABLE YI 0xA474 yip # YI SYLLABLE YIP 0xA475 yiet # YI SYLLABLE YIET 0xA476 yiex # YI SYLLABLE YIEX 0xA477 yie # YI SYLLABLE YIE 0xA478 yiep # YI SYLLABLE YIEP 0xA479 yuot # YI SYLLABLE YUOT 0xA47A yuox # YI SYLLABLE YUOX 0xA47B yuo # YI SYLLABLE YUO 0xA47C yuop # YI SYLLABLE YUOP 0xA47D yot # YI SYLLABLE YOT 0xA47E yox # YI SYLLABLE YOX 0xA47F yo # YI SYLLABLE YO 0xA480 yop # YI SYLLABLE YOP 0xA481 yut # YI SYLLABLE YUT 0xA482 yux # YI SYLLABLE YUX 0xA483 yu # YI SYLLABLE YU 0xA484 yup # YI SYLLABLE YUP 0xA485 yurx # YI SYLLABLE YURX 0xA486 yur # YI SYLLABLE YUR 0xA487 yyt # YI SYLLABLE YYT 0xA488 yyx # YI SYLLABLE YYX 0xA489 yy # YI SYLLABLE YY 0xA48A yyp # YI SYLLABLE YYP 0xA48B yyrx # YI SYLLABLE YYRX 0xA48C yyr # YI SYLLABLE YYR 0xA490 Qot # YI RADICAL QOT 0xA491 Li # YI RADICAL LI 0xA492 Kit # YI RADICAL KIT 0xA493 Nyip # YI RADICAL NYIP 0xA494 Cyp # YI RADICAL CYP 0xA495 Ssi # YI RADICAL SSI 0xA496 Ggop # YI RADICAL GGOP 0xA497 Gep # YI RADICAL GEP 0xA498 Mi # YI RADICAL MI 0xA499 Hxit # YI RADICAL HXIT 0xA49A Lyr # YI RADICAL LYR 0xA49B Bbut # YI RADICAL BBUT 0xA49C Mop # YI RADICAL MOP 0xA49D Yo # YI RADICAL YO 0xA49E Put # YI RADICAL PUT 0xA49F Hxuo # YI RADICAL HXUO 0xA4A0 Tat # YI RADICAL TAT 0xA4A1 Ga # YI RADICAL GA 0xA4A4 Ddur # YI RADICAL DDUR 0xA4A5 Bur # YI RADICAL BUR 0xA4A6 Gguo # YI RADICAL GGUO 0xA4A7 Nyop # YI RADICAL NYOP 0xA4A8 Tu # YI RADICAL TU 0xA4A9 Op # YI RADICAL OP 0xA4AA Jjut # YI RADICAL JJUT 0xA4AB Zot # YI RADICAL ZOT 0xA4AC Pyt # YI RADICAL PYT 0xA4AD Hmo # YI RADICAL HMO 0xA4AE Yit # YI RADICAL YIT 0xA4AF Vur # YI RADICAL VUR 0xA4B0 Shy # YI RADICAL SHY 0xA4B1 Vep # YI RADICAL VEP 0xA4B2 Za # YI RADICAL ZA 0xA4B3 Jo # YI RADICAL JO 0xA4B5 Jjy # YI RADICAL JJY 0xA4B6 Got # YI RADICAL GOT 0xA4B7 Jjie # YI RADICAL JJIE 0xA4B8 Wo # YI RADICAL WO 0xA4B9 Du # YI RADICAL DU 0xA4BA Shur # YI RADICAL SHUR 0xA4BB Lie # YI RADICAL LIE 0xA4BC Cy # YI RADICAL CY 0xA4BD Cuop # YI RADICAL CUOP 0xA4BE Cip # YI RADICAL CIP 0xA4BF Hxop # YI RADICAL HXOP 0xA4C0 Shat # YI RADICAL SHAT 0xA4C2 Shop # YI RADICAL SHOP 0xA4C3 Che # YI RADICAL CHE 0xA4C4 Zziet # YI RADICAL ZZIET 0xA4C6 Ke # YI RADICAL KE # # Characters 0xA500 to 0xA5FF # # # Characters 0xA600 to 0xA6FF # 0xA60F ? # VAI QUESTION MARK 0xA6F7 ? # BAMUM QUESTION MARK # # Characters 0xA700 to 0xA7FF # # # Characters 0xA800 to 0xA8FF # # # Characters 0xA900 to 0xA9FF # # # Characters 0xAA00 to 0xAAFF # # # Characters 0xAC00 to 0xACFF # 0xAC00 ga # 0xD7A3 hih # # # Characters 0xD800 to 0xD8FF # # # Characters 0xDC00 to 0xDCFF # # # Characters 0xE000 to 0xE0FF # # # Characters 0xF900 to 0xF9FF # 0xF900 Kay # CJK COMPATIBILITY IDEOGRAPH-F900 0xF901 Kayng # CJK COMPATIBILITY IDEOGRAPH-F901 0xF902 Ke # CJK COMPATIBILITY IDEOGRAPH-F902 0xF903 Ko # CJK COMPATIBILITY IDEOGRAPH-F903 0xF904 Kol # CJK COMPATIBILITY IDEOGRAPH-F904 0xF905 Koc # CJK COMPATIBILITY IDEOGRAPH-F905 0xF906 Kwi # CJK COMPATIBILITY IDEOGRAPH-F906 0xF907 Kwi # CJK COMPATIBILITY IDEOGRAPH-F907 0xF908 Kyun # CJK COMPATIBILITY IDEOGRAPH-F908 0xF909 Kul # CJK COMPATIBILITY IDEOGRAPH-F909 0xF90A Kum # CJK COMPATIBILITY IDEOGRAPH-F90A 0xF90B Na # CJK COMPATIBILITY IDEOGRAPH-F90B 0xF90C Na # CJK COMPATIBILITY IDEOGRAPH-F90C 0xF90D Na # CJK COMPATIBILITY IDEOGRAPH-F90D 0xF90E La # CJK COMPATIBILITY IDEOGRAPH-F90E 0xF90F Na # CJK COMPATIBILITY IDEOGRAPH-F90F 0xF910 Na # CJK COMPATIBILITY IDEOGRAPH-F910 0xF911 Na # CJK COMPATIBILITY IDEOGRAPH-F911 0xF912 Na # CJK COMPATIBILITY IDEOGRAPH-F912 0xF913 Na # CJK COMPATIBILITY IDEOGRAPH-F913 0xF914 Nak # CJK COMPATIBILITY IDEOGRAPH-F914 0xF915 Nak # CJK COMPATIBILITY IDEOGRAPH-F915 0xF916 Nak # CJK COMPATIBILITY IDEOGRAPH-F916 0xF917 Nak # CJK COMPATIBILITY IDEOGRAPH-F917 0xF918 Nak # CJK COMPATIBILITY IDEOGRAPH-F918 0xF919 Nak # CJK COMPATIBILITY IDEOGRAPH-F919 0xF91A Nak # CJK COMPATIBILITY IDEOGRAPH-F91A 0xF91B Nan # CJK COMPATIBILITY IDEOGRAPH-F91B 0xF91C Nan # CJK COMPATIBILITY IDEOGRAPH-F91C 0xF91D Nan # CJK COMPATIBILITY IDEOGRAPH-F91D 0xF91E Nan # CJK COMPATIBILITY IDEOGRAPH-F91E 0xF91F Nan # CJK COMPATIBILITY IDEOGRAPH-F91F 0xF920 Nan # CJK COMPATIBILITY IDEOGRAPH-F920 0xF921 Nam # CJK COMPATIBILITY IDEOGRAPH-F921 0xF922 Nam # CJK COMPATIBILITY IDEOGRAPH-F922 0xF923 Nam # CJK COMPATIBILITY IDEOGRAPH-F923 0xF924 Nam # CJK COMPATIBILITY IDEOGRAPH-F924 0xF925 Nap # CJK COMPATIBILITY IDEOGRAPH-F925 0xF926 Nap # CJK COMPATIBILITY IDEOGRAPH-F926 0xF927 Nap # CJK COMPATIBILITY IDEOGRAPH-F927 0xF928 Nang # CJK COMPATIBILITY IDEOGRAPH-F928 0xF929 Nang # CJK COMPATIBILITY IDEOGRAPH-F929 0xF92A Nang # CJK COMPATIBILITY IDEOGRAPH-F92A 0xF92B Nang # CJK COMPATIBILITY IDEOGRAPH-F92B 0xF92C Nang # CJK COMPATIBILITY IDEOGRAPH-F92C 0xF92D Nay # CJK COMPATIBILITY IDEOGRAPH-F92D 0xF92E Nayng # CJK COMPATIBILITY IDEOGRAPH-F92E 0xF92F No # CJK COMPATIBILITY IDEOGRAPH-F92F 0xF930 No # CJK COMPATIBILITY IDEOGRAPH-F930 0xF931 No # CJK COMPATIBILITY IDEOGRAPH-F931 0xF932 No # CJK COMPATIBILITY IDEOGRAPH-F932 0xF933 No # CJK COMPATIBILITY IDEOGRAPH-F933 0xF934 No # CJK COMPATIBILITY IDEOGRAPH-F934 0xF935 No # CJK COMPATIBILITY IDEOGRAPH-F935 0xF936 No # CJK COMPATIBILITY IDEOGRAPH-F936 0xF937 No # CJK COMPATIBILITY IDEOGRAPH-F937 0xF938 No # CJK COMPATIBILITY IDEOGRAPH-F938 0xF939 No # CJK COMPATIBILITY IDEOGRAPH-F939 0xF93A No # CJK COMPATIBILITY IDEOGRAPH-F93A 0xF93B Nok # CJK COMPATIBILITY IDEOGRAPH-F93B 0xF93C Nok # CJK COMPATIBILITY IDEOGRAPH-F93C 0xF93D Nok # CJK COMPATIBILITY IDEOGRAPH-F93D 0xF93E Nok # CJK COMPATIBILITY IDEOGRAPH-F93E 0xF93F Nok # CJK COMPATIBILITY IDEOGRAPH-F93F 0xF940 Nok # CJK COMPATIBILITY IDEOGRAPH-F940 0xF941 Non # CJK COMPATIBILITY IDEOGRAPH-F941 0xF942 Nong # CJK COMPATIBILITY IDEOGRAPH-F942 0xF943 Nong # CJK COMPATIBILITY IDEOGRAPH-F943 0xF944 Nong # CJK COMPATIBILITY IDEOGRAPH-F944 0xF945 Nong # CJK COMPATIBILITY IDEOGRAPH-F945 0xF946 Noy # CJK COMPATIBILITY IDEOGRAPH-F946 0xF947 Noy # CJK COMPATIBILITY IDEOGRAPH-F947 0xF948 Noy # CJK COMPATIBILITY IDEOGRAPH-F948 0xF949 Noy # CJK COMPATIBILITY IDEOGRAPH-F949 0xF94A Nwu # CJK COMPATIBILITY IDEOGRAPH-F94A 0xF94B Nwu # CJK COMPATIBILITY IDEOGRAPH-F94B 0xF94C Nwu # CJK COMPATIBILITY IDEOGRAPH-F94C 0xF94D Nwu # CJK COMPATIBILITY IDEOGRAPH-F94D 0xF94E Nwu # CJK COMPATIBILITY IDEOGRAPH-F94E 0xF94F Nwu # CJK COMPATIBILITY IDEOGRAPH-F94F 0xF950 Nwu # CJK COMPATIBILITY IDEOGRAPH-F950 0xF951 Nwu # CJK COMPATIBILITY IDEOGRAPH-F951 0xF952 Nuk # CJK COMPATIBILITY IDEOGRAPH-F952 0xF953 Nuk # CJK COMPATIBILITY IDEOGRAPH-F953 0xF954 Num # CJK COMPATIBILITY IDEOGRAPH-F954 0xF955 Nung # CJK COMPATIBILITY IDEOGRAPH-F955 0xF956 Nung # CJK COMPATIBILITY IDEOGRAPH-F956 0xF957 Nung # CJK COMPATIBILITY IDEOGRAPH-F957 0xF958 Nung # CJK COMPATIBILITY IDEOGRAPH-F958 0xF959 Nung # CJK COMPATIBILITY IDEOGRAPH-F959 0xF95A Twu # CJK COMPATIBILITY IDEOGRAPH-F95A 0xF95B La # CJK COMPATIBILITY IDEOGRAPH-F95B 0xF95C Lak # CJK COMPATIBILITY IDEOGRAPH-F95C 0xF95D Lak # CJK COMPATIBILITY IDEOGRAPH-F95D 0xF95E Lan # CJK COMPATIBILITY IDEOGRAPH-F95E 0xF95F Lyeng # CJK COMPATIBILITY IDEOGRAPH-F95F 0xF960 Lo # CJK COMPATIBILITY IDEOGRAPH-F960 0xF961 Lyul # CJK COMPATIBILITY IDEOGRAPH-F961 0xF962 Li # CJK COMPATIBILITY IDEOGRAPH-F962 0xF963 Pey # CJK COMPATIBILITY IDEOGRAPH-F963 0xF964 Pen # CJK COMPATIBILITY IDEOGRAPH-F964 0xF965 Pyen # CJK COMPATIBILITY IDEOGRAPH-F965 0xF966 Pwu # CJK COMPATIBILITY IDEOGRAPH-F966 0xF967 Pwul # CJK COMPATIBILITY IDEOGRAPH-F967 0xF968 Pi # CJK COMPATIBILITY IDEOGRAPH-F968 0xF969 Sak # CJK COMPATIBILITY IDEOGRAPH-F969 0xF96A Sak # CJK COMPATIBILITY IDEOGRAPH-F96A 0xF96B Sam # CJK COMPATIBILITY IDEOGRAPH-F96B 0xF96C Sayk # CJK COMPATIBILITY IDEOGRAPH-F96C 0xF96D Sayng # CJK COMPATIBILITY IDEOGRAPH-F96D 0xF96E Sep # CJK COMPATIBILITY IDEOGRAPH-F96E 0xF96F Sey # CJK COMPATIBILITY IDEOGRAPH-F96F 0xF970 Sway # CJK COMPATIBILITY IDEOGRAPH-F970 0xF971 Sin # CJK COMPATIBILITY IDEOGRAPH-F971 0xF972 Sim # CJK COMPATIBILITY IDEOGRAPH-F972 0xF973 Sip # CJK COMPATIBILITY IDEOGRAPH-F973 0xF974 Ya # CJK COMPATIBILITY IDEOGRAPH-F974 0xF975 Yak # CJK COMPATIBILITY IDEOGRAPH-F975 0xF976 Yak # CJK COMPATIBILITY IDEOGRAPH-F976 0xF977 Yang # CJK COMPATIBILITY IDEOGRAPH-F977 0xF978 Yang # CJK COMPATIBILITY IDEOGRAPH-F978 0xF979 Yang # CJK COMPATIBILITY IDEOGRAPH-F979 0xF97A Yang # CJK COMPATIBILITY IDEOGRAPH-F97A 0xF97B Yang # CJK COMPATIBILITY IDEOGRAPH-F97B 0xF97C Yang # CJK COMPATIBILITY IDEOGRAPH-F97C 0xF97D Yang # CJK COMPATIBILITY IDEOGRAPH-F97D 0xF97E Yang # CJK COMPATIBILITY IDEOGRAPH-F97E 0xF97F Ye # CJK COMPATIBILITY IDEOGRAPH-F97F 0xF980 Ye # CJK COMPATIBILITY IDEOGRAPH-F980 0xF981 Ye # CJK COMPATIBILITY IDEOGRAPH-F981 0xF982 Ye # CJK COMPATIBILITY IDEOGRAPH-F982 0xF983 Ye # CJK COMPATIBILITY IDEOGRAPH-F983 0xF984 Ye # CJK COMPATIBILITY IDEOGRAPH-F984 0xF985 Ye # CJK COMPATIBILITY IDEOGRAPH-F985 0xF986 Ye # CJK COMPATIBILITY IDEOGRAPH-F986 0xF987 Ye # CJK COMPATIBILITY IDEOGRAPH-F987 0xF988 Ye # CJK COMPATIBILITY IDEOGRAPH-F988 0xF989 Ye # CJK COMPATIBILITY IDEOGRAPH-F989 0xF98A Yek # CJK COMPATIBILITY IDEOGRAPH-F98A 0xF98B Yek # CJK COMPATIBILITY IDEOGRAPH-F98B 0xF98C Yek # CJK COMPATIBILITY IDEOGRAPH-F98C 0xF98D Yek # CJK COMPATIBILITY IDEOGRAPH-F98D 0xF98E Yen # CJK COMPATIBILITY IDEOGRAPH-F98E 0xF98F Yen # CJK COMPATIBILITY IDEOGRAPH-F98F 0xF990 Yen # CJK COMPATIBILITY IDEOGRAPH-F990 0xF991 Yen # CJK COMPATIBILITY IDEOGRAPH-F991 0xF992 Yen # CJK COMPATIBILITY IDEOGRAPH-F992 0xF993 Yen # CJK COMPATIBILITY IDEOGRAPH-F993 0xF994 Yen # CJK COMPATIBILITY IDEOGRAPH-F994 0xF995 Yen # CJK COMPATIBILITY IDEOGRAPH-F995 0xF996 Yen # CJK COMPATIBILITY IDEOGRAPH-F996 0xF997 Yen # CJK COMPATIBILITY IDEOGRAPH-F997 0xF998 Yen # CJK COMPATIBILITY IDEOGRAPH-F998 0xF999 Yen # CJK COMPATIBILITY IDEOGRAPH-F999 0xF99A Yen # CJK COMPATIBILITY IDEOGRAPH-F99A 0xF99B Yen # CJK COMPATIBILITY IDEOGRAPH-F99B 0xF99C Yel # CJK COMPATIBILITY IDEOGRAPH-F99C 0xF99D Yel # CJK COMPATIBILITY IDEOGRAPH-F99D 0xF99E Yel # CJK COMPATIBILITY IDEOGRAPH-F99E 0xF99F Yel # CJK COMPATIBILITY IDEOGRAPH-F99F 0xF9A0 Yel # CJK COMPATIBILITY IDEOGRAPH-F9A0 0xF9A1 Yel # CJK COMPATIBILITY IDEOGRAPH-F9A1 0xF9A2 Yem # CJK COMPATIBILITY IDEOGRAPH-F9A2 0xF9A3 Yem # CJK COMPATIBILITY IDEOGRAPH-F9A3 0xF9A4 Yem # CJK COMPATIBILITY IDEOGRAPH-F9A4 0xF9A5 Yem # CJK COMPATIBILITY IDEOGRAPH-F9A5 0xF9A6 Yem # CJK COMPATIBILITY IDEOGRAPH-F9A6 0xF9A7 Yep # CJK COMPATIBILITY IDEOGRAPH-F9A7 0xF9A8 Yeng # CJK COMPATIBILITY IDEOGRAPH-F9A8 0xF9A9 Yeng # CJK COMPATIBILITY IDEOGRAPH-F9A9 0xF9AA Yeng # CJK COMPATIBILITY IDEOGRAPH-F9AA 0xF9AB Yeng # CJK COMPATIBILITY IDEOGRAPH-F9AB 0xF9AC Yeng # CJK COMPATIBILITY IDEOGRAPH-F9AC 0xF9AD Yeng # CJK COMPATIBILITY IDEOGRAPH-F9AD 0xF9AE Yeng # CJK COMPATIBILITY IDEOGRAPH-F9AE 0xF9AF Yeng # CJK COMPATIBILITY IDEOGRAPH-F9AF 0xF9B0 Yeng # CJK COMPATIBILITY IDEOGRAPH-F9B0 0xF9B1 Yeng # CJK COMPATIBILITY IDEOGRAPH-F9B1 0xF9B2 Yeng # CJK COMPATIBILITY IDEOGRAPH-F9B2 0xF9B3 Yeng # CJK COMPATIBILITY IDEOGRAPH-F9B3 0xF9B4 Yeng # CJK COMPATIBILITY IDEOGRAPH-F9B4 0xF9B5 Yey # CJK COMPATIBILITY IDEOGRAPH-F9B5 0xF9B6 Yey # CJK COMPATIBILITY IDEOGRAPH-F9B6 0xF9B7 Yey # CJK COMPATIBILITY IDEOGRAPH-F9B7 0xF9B8 Yey # CJK COMPATIBILITY IDEOGRAPH-F9B8 0xF9B9 O # CJK COMPATIBILITY IDEOGRAPH-F9B9 0xF9BA Yo # CJK COMPATIBILITY IDEOGRAPH-F9BA 0xF9BB Yo # CJK COMPATIBILITY IDEOGRAPH-F9BB 0xF9BC Yo # CJK COMPATIBILITY IDEOGRAPH-F9BC 0xF9BD Yo # CJK COMPATIBILITY IDEOGRAPH-F9BD 0xF9BE Yo # CJK COMPATIBILITY IDEOGRAPH-F9BE 0xF9BF Yo # CJK COMPATIBILITY IDEOGRAPH-F9BF 0xF9C0 Yo # CJK COMPATIBILITY IDEOGRAPH-F9C0 0xF9C1 Yo # CJK COMPATIBILITY IDEOGRAPH-F9C1 0xF9C2 Yo # CJK COMPATIBILITY IDEOGRAPH-F9C2 0xF9C3 Yo # CJK COMPATIBILITY IDEOGRAPH-F9C3 0xF9C4 Yong # CJK COMPATIBILITY IDEOGRAPH-F9C4 0xF9C5 Wun # CJK COMPATIBILITY IDEOGRAPH-F9C5 0xF9C6 Wen # CJK COMPATIBILITY IDEOGRAPH-F9C6 0xF9C7 Yu # CJK COMPATIBILITY IDEOGRAPH-F9C7 0xF9C8 Yu # CJK COMPATIBILITY IDEOGRAPH-F9C8 0xF9C9 Yu # CJK COMPATIBILITY IDEOGRAPH-F9C9 0xF9CA Yu # CJK COMPATIBILITY IDEOGRAPH-F9CA 0xF9CB Yu # CJK COMPATIBILITY IDEOGRAPH-F9CB 0xF9CC Yu # CJK COMPATIBILITY IDEOGRAPH-F9CC 0xF9CD Yu # CJK COMPATIBILITY IDEOGRAPH-F9CD 0xF9CE Yu # CJK COMPATIBILITY IDEOGRAPH-F9CE 0xF9CF Yu # CJK COMPATIBILITY IDEOGRAPH-F9CF 0xF9D0 Yu # CJK COMPATIBILITY IDEOGRAPH-F9D0 0xF9D1 Yuk # CJK COMPATIBILITY IDEOGRAPH-F9D1 0xF9D2 Yuk # CJK COMPATIBILITY IDEOGRAPH-F9D2 0xF9D3 Yuk # CJK COMPATIBILITY IDEOGRAPH-F9D3 0xF9D4 Yun # CJK COMPATIBILITY IDEOGRAPH-F9D4 0xF9D5 Yun # CJK COMPATIBILITY IDEOGRAPH-F9D5 0xF9D6 Yun # CJK COMPATIBILITY IDEOGRAPH-F9D6 0xF9D7 Yun # CJK COMPATIBILITY IDEOGRAPH-F9D7 0xF9D8 Yul # CJK COMPATIBILITY IDEOGRAPH-F9D8 0xF9D9 Yul # CJK COMPATIBILITY IDEOGRAPH-F9D9 0xF9DA Yul # CJK COMPATIBILITY IDEOGRAPH-F9DA 0xF9DB Yul # CJK COMPATIBILITY IDEOGRAPH-F9DB 0xF9DC Yung # CJK COMPATIBILITY IDEOGRAPH-F9DC 0xF9DD I # CJK COMPATIBILITY IDEOGRAPH-F9DD 0xF9DE I # CJK COMPATIBILITY IDEOGRAPH-F9DE 0xF9DF I # CJK COMPATIBILITY IDEOGRAPH-F9DF 0xF9E0 I # CJK COMPATIBILITY IDEOGRAPH-F9E0 0xF9E1 I # CJK COMPATIBILITY IDEOGRAPH-F9E1 0xF9E2 I # CJK COMPATIBILITY IDEOGRAPH-F9E2 0xF9E3 I # CJK COMPATIBILITY IDEOGRAPH-F9E3 0xF9E4 I # CJK COMPATIBILITY IDEOGRAPH-F9E4 0xF9E5 I # CJK COMPATIBILITY IDEOGRAPH-F9E5 0xF9E6 I # CJK COMPATIBILITY IDEOGRAPH-F9E6 0xF9E7 I # CJK COMPATIBILITY IDEOGRAPH-F9E7 0xF9E8 I # CJK COMPATIBILITY IDEOGRAPH-F9E8 0xF9E9 I # CJK COMPATIBILITY IDEOGRAPH-F9E9 0xF9EA I # CJK COMPATIBILITY IDEOGRAPH-F9EA 0xF9EB Ik # CJK COMPATIBILITY IDEOGRAPH-F9EB 0xF9EC Ik # CJK COMPATIBILITY IDEOGRAPH-F9EC 0xF9ED In # CJK COMPATIBILITY IDEOGRAPH-F9ED 0xF9EE In # CJK COMPATIBILITY IDEOGRAPH-F9EE 0xF9EF In # CJK COMPATIBILITY IDEOGRAPH-F9EF 0xF9F0 In # CJK COMPATIBILITY IDEOGRAPH-F9F0 0xF9F1 In # CJK COMPATIBILITY IDEOGRAPH-F9F1 0xF9F2 In # CJK COMPATIBILITY IDEOGRAPH-F9F2 0xF9F3 In # CJK COMPATIBILITY IDEOGRAPH-F9F3 0xF9F4 Im # CJK COMPATIBILITY IDEOGRAPH-F9F4 0xF9F5 Im # CJK COMPATIBILITY IDEOGRAPH-F9F5 0xF9F6 Im # CJK COMPATIBILITY IDEOGRAPH-F9F6 0xF9F7 Ip # CJK COMPATIBILITY IDEOGRAPH-F9F7 0xF9F8 Ip # CJK COMPATIBILITY IDEOGRAPH-F9F8 0xF9F9 Ip # CJK COMPATIBILITY IDEOGRAPH-F9F9 0xF9FA Cang # CJK COMPATIBILITY IDEOGRAPH-F9FA 0xF9FB Cek # CJK COMPATIBILITY IDEOGRAPH-F9FB 0xF9FC Ci # CJK COMPATIBILITY IDEOGRAPH-F9FC 0xF9FD Cip # CJK COMPATIBILITY IDEOGRAPH-F9FD 0xF9FE Cha # CJK COMPATIBILITY IDEOGRAPH-F9FE 0xF9FF Chek # CJK COMPATIBILITY IDEOGRAPH-F9FF # # Characters 0xFA00 to 0xFAFF # 0xFA00 Chey # CJK COMPATIBILITY IDEOGRAPH-FA00 0xFA01 Thak # CJK COMPATIBILITY IDEOGRAPH-FA01 0xFA02 Thak # CJK COMPATIBILITY IDEOGRAPH-FA02 0xFA03 Thang # CJK COMPATIBILITY IDEOGRAPH-FA03 0xFA04 Thayk # CJK COMPATIBILITY IDEOGRAPH-FA04 0xFA05 Thong # CJK COMPATIBILITY IDEOGRAPH-FA05 0xFA06 Pho # CJK COMPATIBILITY IDEOGRAPH-FA06 0xFA07 Phok # CJK COMPATIBILITY IDEOGRAPH-FA07 0xFA08 Hang # CJK COMPATIBILITY IDEOGRAPH-FA08 0xFA09 Hang # CJK COMPATIBILITY IDEOGRAPH-FA09 0xFA0A Hyen # CJK COMPATIBILITY IDEOGRAPH-FA0A 0xFA0B Hwak # CJK COMPATIBILITY IDEOGRAPH-FA0B 0xFA0C Wu # CJK COMPATIBILITY IDEOGRAPH-FA0C 0xFA0D Huo # CJK COMPATIBILITY IDEOGRAPH-FA0D 0xFA10 Zhong # CJK COMPATIBILITY IDEOGRAPH-FA10 0xFA12 Qing # CJK COMPATIBILITY IDEOGRAPH-FA12 0xFA15 Xi # CJK COMPATIBILITY IDEOGRAPH-FA15 0xFA16 Zhu # CJK COMPATIBILITY IDEOGRAPH-FA16 0xFA17 Yi # CJK COMPATIBILITY IDEOGRAPH-FA17 0xFA18 Li # CJK COMPATIBILITY IDEOGRAPH-FA18 0xFA19 Shen # CJK COMPATIBILITY IDEOGRAPH-FA19 0xFA1A Xiang # CJK COMPATIBILITY IDEOGRAPH-FA1A 0xFA1B Fu # CJK COMPATIBILITY IDEOGRAPH-FA1B 0xFA1C Jing # CJK COMPATIBILITY IDEOGRAPH-FA1C 0xFA1D Jing # CJK COMPATIBILITY IDEOGRAPH-FA1D 0xFA1E Yu # CJK COMPATIBILITY IDEOGRAPH-FA1E 0xFA20 Hagi # CJK COMPATIBILITY IDEOGRAPH-FA20 0xFA22 Zhu # CJK COMPATIBILITY IDEOGRAPH-FA22 0xFA25 Yi # CJK COMPATIBILITY IDEOGRAPH-FA25 0xFA26 Du # CJK COMPATIBILITY IDEOGRAPH-FA26 0xFA2A Fan # CJK COMPATIBILITY IDEOGRAPH-FA2A 0xFA2B Si # CJK COMPATIBILITY IDEOGRAPH-FA2B 0xFA2C Guan # CJK COMPATIBILITY IDEOGRAPH-FA2C # # Characters 0xFB00 to 0xFBFF # 0xFB00 ff # LATIN SMALL LIGATURE FF 0xFB01 fi # LATIN SMALL LIGATURE FI 0xFB02 fl # LATIN SMALL LIGATURE FL 0xFB03 ffi # LATIN SMALL LIGATURE FFI 0xFB04 ffl # LATIN SMALL LIGATURE FFL 0xFB05 st # LATIN SMALL LIGATURE LONG S T 0xFB06 st # LATIN SMALL LIGATURE ST 0xFB13 mn # ARMENIAN SMALL LIGATURE MEN NOW 0xFB14 me # ARMENIAN SMALL LIGATURE MEN ECH 0xFB15 mi # ARMENIAN SMALL LIGATURE MEN INI 0xFB16 vn # ARMENIAN SMALL LIGATURE VEW NOW 0xFB17 mkh # ARMENIAN SMALL LIGATURE MEN XEH 0xFB1D yi # HEBREW LETTER YOD WITH HIRIQ 0xFB1F ay # HEBREW LIGATURE YIDDISH YOD YOD PATAH 0xFB20 ` # HEBREW LETTER ALTERNATIVE AYIN 0xFB22 d # HEBREW LETTER WIDE DALET 0xFB23 h # HEBREW LETTER WIDE HE 0xFB24 k # HEBREW LETTER WIDE KAF 0xFB25 l # HEBREW LETTER WIDE LAMED 0xFB26 m # HEBREW LETTER WIDE FINAL MEM 0xFB27 m # HEBREW LETTER WIDE RESH 0xFB28 t # HEBREW LETTER WIDE TAV 0xFB29 + # HEBREW LETTER ALTERNATIVE PLUS SIGN 0xFB2A sh # HEBREW LETTER SHIN WITH SHIN DOT 0xFB2B s # HEBREW LETTER SHIN WITH SIN DOT 0xFB2C sh # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT 0xFB2D s # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT 0xFB2E a # HEBREW LETTER ALEF WITH PATAH 0xFB2F a # HEBREW LETTER ALEF WITH QAMATS 0xFB31 b # HEBREW LETTER BET WITH DAGESH 0xFB32 g # HEBREW LETTER GIMEL WITH DAGESH 0xFB33 d # HEBREW LETTER DALET WITH DAGESH 0xFB34 h # HEBREW LETTER HE WITH MAPIQ 0xFB35 v # HEBREW LETTER VAV WITH DAGESH 0xFB36 z # HEBREW LETTER ZAYIN WITH DAGESH 0xFB38 t # HEBREW LETTER TET WITH DAGESH 0xFB39 y # HEBREW LETTER YOD WITH DAGESH 0xFB3A k # HEBREW LETTER FINAL KAF WITH DAGESH 0xFB3B k # HEBREW LETTER KAF WITH DAGESH 0xFB3C l # HEBREW LETTER LAMED WITH DAGESH 0xFB3E l # HEBREW LETTER MEM WITH DAGESH 0xFB40 n # HEBREW LETTER NUN WITH DAGESH 0xFB41 n # HEBREW LETTER SAMEKH WITH DAGESH 0xFB43 p # HEBREW LETTER FINAL PE WITH DAGESH 0xFB44 p # HEBREW LETTER PE WITH DAGESH 0xFB46 ts # HEBREW LETTER TSADI WITH DAGESH 0xFB47 ts # HEBREW LETTER QOF WITH DAGESH 0xFB48 r # HEBREW LETTER RESH WITH DAGESH 0xFB49 sh # HEBREW LETTER SHIN WITH DAGESH 0xFB4A t # HEBREW LETTER TAV WITH DAGESH 0xFB4B vo # HEBREW LETTER VAV WITH HOLAM 0xFB4C b # HEBREW LETTER BET WITH RAFE 0xFB4D k # HEBREW LETTER KAF WITH RAFE 0xFB4E p # HEBREW LETTER PE WITH RAFE 0xFB4F l # HEBREW LIGATURE ALEF LAMED # # Characters 0xFC00 to 0xFCFF # # # Characters 0xFD00 to 0xFDFF # # # Characters 0xFE00 to 0xFEFF # 0xFE16 [?] # PRESENTATION FORM FOR VERTICAL QUESTION MARK 0xFE23 ~ # COMBINING DOUBLE TILDE RIGHT HALF 0xFE30 .. # PRESENTATION FORM FOR VERTICAL TWO DOT LEADER 0xFE31 -- # PRESENTATION FORM FOR VERTICAL EM DASH 0xFE32 - # PRESENTATION FORM FOR VERTICAL EN DASH 0xFE33 _ # PRESENTATION FORM FOR VERTICAL LOW LINE 0xFE34 _ # PRESENTATION FORM FOR VERTICAL WAVY LOW LINE 0xFE35 ( # PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS 0xFE36 ) # PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS 0xFE37 { # PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET 0xFE38 } # PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET 0xFE39 [ # PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET 0xFE3A ] # PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET 0xFE3B [( # PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET 0xFE3C )] # PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET 0xFE3D << # PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET 0xFE3E >> # PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET 0xFE3F < # PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET 0xFE40 > # PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET 0xFE41 [ # PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET 0xFE42 ] # PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET 0xFE43 { # PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET 0xFE44 } # PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET 0xFE50 , # SMALL COMMA 0xFE51 , # SMALL IDEOGRAPHIC COMMA 0xFE52 . # SMALL FULL STOP 0xFE54 ; # SMALL SEMICOLON 0xFE55 : # SMALL COLON 0xFE56 ? # SMALL QUESTION MARK 0xFE57 ! # SMALL EXCLAMATION MARK 0xFE58 - # SMALL EM DASH 0xFE59 ( # SMALL LEFT PARENTHESIS 0xFE5A ) # SMALL RIGHT PARENTHESIS 0xFE5B { # SMALL LEFT CURLY BRACKET 0xFE5C } # SMALL RIGHT CURLY BRACKET 0xFE5D { # SMALL LEFT TORTOISE SHELL BRACKET 0xFE5E } # SMALL RIGHT TORTOISE SHELL BRACKET 0xFE5F # # SMALL NUMBER SIGN 0xFE60 & # SMALL AMPERSAND 0xFE61 * # SMALL ASTERISK 0xFE62 + # SMALL PLUS SIGN 0xFE63 - # SMALL HYPHEN-MINUS 0xFE64 < # SMALL LESS-THAN SIGN 0xFE65 > # SMALL GREATER-THAN SIGN 0xFE66 = # SMALL EQUALS SIGN 0xFE68 \ # SMALL REVERSE SOLIDUS 0xFE69 $ # SMALL DOLLAR SIGN 0xFE6A % # SMALL PERCENT SIGN 0xFE6B @ # SMALL COMMERCIAL AT 0xFF01 ! # FULLWIDTH EXCLAMATION MARK 0xFF02 '"' # FULLWIDTH QUOTATION MARK 0xFF03 # # FULLWIDTH NUMBER SIGN 0xFF04 $ # FULLWIDTH DOLLAR SIGN 0xFF05 % # FULLWIDTH PERCENT SIGN 0xFF06 & # FULLWIDTH AMPERSAND 0xFF07 "'" # FULLWIDTH APOSTROPHE 0xFF08 ( # FULLWIDTH LEFT PARENTHESIS 0xFF09 ) # FULLWIDTH RIGHT PARENTHESIS 0xFF0A * # FULLWIDTH ASTERISK 0xFF0B + # FULLWIDTH PLUS SIGN 0xFF0C , # FULLWIDTH COMMA 0xFF0D - # FULLWIDTH HYPHEN-MINUS 0xFF0E . # FULLWIDTH FULL STOP 0xFF0F / # FULLWIDTH SOLIDUS 0xFF10 0 # FULLWIDTH DIGIT ZERO 0xFF11 1 # FULLWIDTH DIGIT ONE 0xFF12 2 # FULLWIDTH DIGIT TWO 0xFF13 3 # FULLWIDTH DIGIT THREE 0xFF14 4 # FULLWIDTH DIGIT FOUR 0xFF15 5 # FULLWIDTH DIGIT FIVE 0xFF16 6 # FULLWIDTH DIGIT SIX 0xFF17 7 # FULLWIDTH DIGIT SEVEN 0xFF18 8 # FULLWIDTH DIGIT EIGHT 0xFF19 9 # FULLWIDTH DIGIT NINE 0xFF1A : # FULLWIDTH COLON 0xFF1B ; # FULLWIDTH SEMICOLON 0xFF1C < # FULLWIDTH LESS-THAN SIGN 0xFF1D = # FULLWIDTH EQUALS SIGN 0xFF1E > # FULLWIDTH GREATER-THAN SIGN 0xFF1F ? # FULLWIDTH QUESTION MARK 0xFF20 @ # FULLWIDTH COMMERCIAL AT 0xFF21 A # FULLWIDTH LATIN CAPITAL LETTER A 0xFF22 B # FULLWIDTH LATIN CAPITAL LETTER B 0xFF23 C # FULLWIDTH LATIN CAPITAL LETTER C 0xFF24 D # FULLWIDTH LATIN CAPITAL LETTER D 0xFF25 E # FULLWIDTH LATIN CAPITAL LETTER E 0xFF26 F # FULLWIDTH LATIN CAPITAL LETTER F 0xFF27 G # FULLWIDTH LATIN CAPITAL LETTER G 0xFF28 H # FULLWIDTH LATIN CAPITAL LETTER H 0xFF29 I # FULLWIDTH LATIN CAPITAL LETTER I 0xFF2A J # FULLWIDTH LATIN CAPITAL LETTER J 0xFF2B K # FULLWIDTH LATIN CAPITAL LETTER K 0xFF2C L # FULLWIDTH LATIN CAPITAL LETTER L 0xFF2D M # FULLWIDTH LATIN CAPITAL LETTER M 0xFF2E N # FULLWIDTH LATIN CAPITAL LETTER N 0xFF2F O # FULLWIDTH LATIN CAPITAL LETTER O 0xFF30 P # FULLWIDTH LATIN CAPITAL LETTER P 0xFF31 Q # FULLWIDTH LATIN CAPITAL LETTER Q 0xFF32 R # FULLWIDTH LATIN CAPITAL LETTER R 0xFF33 S # FULLWIDTH LATIN CAPITAL LETTER S 0xFF34 T # FULLWIDTH LATIN CAPITAL LETTER T 0xFF35 U # FULLWIDTH LATIN CAPITAL LETTER U 0xFF36 V # FULLWIDTH LATIN CAPITAL LETTER V 0xFF37 W # FULLWIDTH LATIN CAPITAL LETTER W 0xFF38 X # FULLWIDTH LATIN CAPITAL LETTER X 0xFF39 Y # FULLWIDTH LATIN CAPITAL LETTER Y 0xFF3A Z # FULLWIDTH LATIN CAPITAL LETTER Z 0xFF3B [ # FULLWIDTH LEFT SQUARE BRACKET 0xFF3C \ # FULLWIDTH REVERSE SOLIDUS 0xFF3D ] # FULLWIDTH RIGHT SQUARE BRACKET 0xFF3E ^ # FULLWIDTH CIRCUMFLEX ACCENT 0xFF3F _ # FULLWIDTH LOW LINE 0xFF40 ` # FULLWIDTH GRAVE ACCENT 0xFF41 a # FULLWIDTH LATIN SMALL LETTER A 0xFF42 b # FULLWIDTH LATIN SMALL LETTER B 0xFF43 c # FULLWIDTH LATIN SMALL LETTER C 0xFF44 d # FULLWIDTH LATIN SMALL LETTER D 0xFF45 e # FULLWIDTH LATIN SMALL LETTER E 0xFF46 f # FULLWIDTH LATIN SMALL LETTER F 0xFF47 g # FULLWIDTH LATIN SMALL LETTER G 0xFF48 h # FULLWIDTH LATIN SMALL LETTER H 0xFF49 i # FULLWIDTH LATIN SMALL LETTER I 0xFF4A j # FULLWIDTH LATIN SMALL LETTER J 0xFF4B k # FULLWIDTH LATIN SMALL LETTER K 0xFF4C l # FULLWIDTH LATIN SMALL LETTER L 0xFF4D m # FULLWIDTH LATIN SMALL LETTER M 0xFF4E n # FULLWIDTH LATIN SMALL LETTER N 0xFF4F o # FULLWIDTH LATIN SMALL LETTER O 0xFF50 p # FULLWIDTH LATIN SMALL LETTER P 0xFF51 q # FULLWIDTH LATIN SMALL LETTER Q 0xFF52 r # FULLWIDTH LATIN SMALL LETTER R 0xFF53 s # FULLWIDTH LATIN SMALL LETTER S 0xFF54 t # FULLWIDTH LATIN SMALL LETTER T 0xFF55 u # FULLWIDTH LATIN SMALL LETTER U 0xFF56 v # FULLWIDTH LATIN SMALL LETTER V 0xFF57 w # FULLWIDTH LATIN SMALL LETTER W 0xFF58 x # FULLWIDTH LATIN SMALL LETTER X 0xFF59 y # FULLWIDTH LATIN SMALL LETTER Y 0xFF5A z # FULLWIDTH LATIN SMALL LETTER Z 0xFF5B { # FULLWIDTH LEFT CURLY BRACKET 0xFF5C | # FULLWIDTH VERTICAL LINE 0xFF5D } # FULLWIDTH RIGHT CURLY BRACKET 0xFF5E ~ # FULLWIDTH TILDE 0xFF61 . # HALFWIDTH IDEOGRAPHIC FULL STOP 0xFF62 [ # HALFWIDTH LEFT CORNER BRACKET 0xFF63 ] # HALFWIDTH RIGHT CORNER BRACKET 0xFF64 , # HALFWIDTH IDEOGRAPHIC COMMA 0xFF65 * # HALFWIDTH KATAKANA MIDDLE DOT 0xFF66 wo # HALFWIDTH KATAKANA LETTER WO 0xFF67 a # HALFWIDTH KATAKANA LETTER SMALL A 0xFF68 i # HALFWIDTH KATAKANA LETTER SMALL I 0xFF69 u # HALFWIDTH KATAKANA LETTER SMALL U 0xFF6A e # HALFWIDTH KATAKANA LETTER SMALL E 0xFF6B o # HALFWIDTH KATAKANA LETTER SMALL O 0xFF6C ya # HALFWIDTH KATAKANA LETTER SMALL YA 0xFF6D yu # HALFWIDTH KATAKANA LETTER SMALL YU 0xFF6E yo # HALFWIDTH KATAKANA LETTER SMALL YO 0xFF6F tu # HALFWIDTH KATAKANA LETTER SMALL TU 0xFF70 + # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 0xFF71 a # HALFWIDTH KATAKANA LETTER A 0xFF72 i # HALFWIDTH KATAKANA LETTER I 0xFF73 u # HALFWIDTH KATAKANA LETTER U 0xFF74 e # HALFWIDTH KATAKANA LETTER E 0xFF75 o # HALFWIDTH KATAKANA LETTER O 0xFF76 ka # HALFWIDTH KATAKANA LETTER KA 0xFF77 ki # HALFWIDTH KATAKANA LETTER KI 0xFF78 ku # HALFWIDTH KATAKANA LETTER KU 0xFF79 ke # HALFWIDTH KATAKANA LETTER KE 0xFF7A ko # HALFWIDTH KATAKANA LETTER KO 0xFF7B sa # HALFWIDTH KATAKANA LETTER SA 0xFF7C si # HALFWIDTH KATAKANA LETTER SI 0xFF7D su # HALFWIDTH KATAKANA LETTER SU 0xFF7E se # HALFWIDTH KATAKANA LETTER SE 0xFF7F so # HALFWIDTH KATAKANA LETTER SO 0xFF80 ta # HALFWIDTH KATAKANA LETTER TA 0xFF81 ti # HALFWIDTH KATAKANA LETTER TI 0xFF82 tu # HALFWIDTH KATAKANA LETTER TU 0xFF83 te # HALFWIDTH KATAKANA LETTER TE 0xFF84 to # HALFWIDTH KATAKANA LETTER TO 0xFF85 na # HALFWIDTH KATAKANA LETTER NA 0xFF86 ni # HALFWIDTH KATAKANA LETTER NI 0xFF87 nu # HALFWIDTH KATAKANA LETTER NU 0xFF88 ne # HALFWIDTH KATAKANA LETTER NE 0xFF89 no # HALFWIDTH KATAKANA LETTER NO 0xFF8A ha # HALFWIDTH KATAKANA LETTER HA 0xFF8B hi # HALFWIDTH KATAKANA LETTER HI 0xFF8C hu # HALFWIDTH KATAKANA LETTER HU 0xFF8D he # HALFWIDTH KATAKANA LETTER HE 0xFF8E ho # HALFWIDTH KATAKANA LETTER HO 0xFF8F ma # HALFWIDTH KATAKANA LETTER MA 0xFF90 mi # HALFWIDTH KATAKANA LETTER MI 0xFF91 mu # HALFWIDTH KATAKANA LETTER MU 0xFF92 me # HALFWIDTH KATAKANA LETTER ME 0xFF93 mo # HALFWIDTH KATAKANA LETTER MO 0xFF94 ya # HALFWIDTH KATAKANA LETTER YA 0xFF95 yu # HALFWIDTH KATAKANA LETTER YU 0xFF96 yo # HALFWIDTH KATAKANA LETTER YO 0xFF97 ra # HALFWIDTH KATAKANA LETTER RA 0xFF98 ri # HALFWIDTH KATAKANA LETTER RI 0xFF99 ru # HALFWIDTH KATAKANA LETTER RU 0xFF9A re # HALFWIDTH KATAKANA LETTER RE 0xFF9B ro # HALFWIDTH KATAKANA LETTER RO 0xFF9C wa # HALFWIDTH KATAKANA LETTER WA 0xFF9D n # HALFWIDTH KATAKANA LETTER N 0xFF9E : # HALFWIDTH KATAKANA VOICED SOUND MARK 0xFF9F ; # HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK 0xFFA1 g # HALFWIDTH HANGUL LETTER KIYEOK 0xFFA2 gg # HALFWIDTH HANGUL LETTER SSANGKIYEOK 0xFFA3 gs # HALFWIDTH HANGUL LETTER KIYEOK-SIOS 0xFFA4 n # HALFWIDTH HANGUL LETTER NIEUN 0xFFA5 nj # HALFWIDTH HANGUL LETTER NIEUN-CIEUC 0xFFA6 nh # HALFWIDTH HANGUL LETTER NIEUN-HIEUH 0xFFA7 d # HALFWIDTH HANGUL LETTER TIKEUT 0xFFA8 dd # HALFWIDTH HANGUL LETTER SSANGTIKEUT 0xFFA9 r # HALFWIDTH HANGUL LETTER RIEUL 0xFFAA lg # HALFWIDTH HANGUL LETTER RIEUL-KIYEOK 0xFFAB lm # HALFWIDTH HANGUL LETTER RIEUL-MIEUM 0xFFAC lb # HALFWIDTH HANGUL LETTER RIEUL-PIEUP 0xFFAD ls # HALFWIDTH HANGUL LETTER RIEUL-SIOS 0xFFAE lt # HALFWIDTH HANGUL LETTER RIEUL-THIEUTH 0xFFAF lp # HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH 0xFFB0 rh # HALFWIDTH HANGUL LETTER RIEUL-HIEUH 0xFFB1 m # HALFWIDTH HANGUL LETTER MIEUM 0xFFB2 b # HALFWIDTH HANGUL LETTER PIEUP 0xFFB3 bb # HALFWIDTH HANGUL LETTER SSANGPIEUP 0xFFB4 bs # HALFWIDTH HANGUL LETTER PIEUP-SIOS 0xFFB5 s # HALFWIDTH HANGUL LETTER SIOS 0xFFB6 ss # HALFWIDTH HANGUL LETTER SSANGSIOS 0xFFB8 j # HALFWIDTH HANGUL LETTER CIEUC 0xFFB9 jj # HALFWIDTH HANGUL LETTER SSANGCIEUC 0xFFBA c # HALFWIDTH HANGUL LETTER CHIEUCH 0xFFBB k # HALFWIDTH HANGUL LETTER KHIEUKH 0xFFBC t # HALFWIDTH HANGUL LETTER THIEUTH 0xFFBD p # HALFWIDTH HANGUL LETTER PHIEUPH 0xFFBE h # HALFWIDTH HANGUL LETTER HIEUH 0xFFC2 a # HALFWIDTH HANGUL LETTER A 0xFFC3 ae # HALFWIDTH HANGUL LETTER AE 0xFFC4 ya # HALFWIDTH HANGUL LETTER YA 0xFFC5 yae # HALFWIDTH HANGUL LETTER YAE 0xFFC6 eo # HALFWIDTH HANGUL LETTER EO 0xFFC7 e # HALFWIDTH HANGUL LETTER E 0xFFCA yeo # HALFWIDTH HANGUL LETTER YEO 0xFFCB ye # HALFWIDTH HANGUL LETTER YE 0xFFCC o # HALFWIDTH HANGUL LETTER O 0xFFCD wa # HALFWIDTH HANGUL LETTER WA 0xFFCE wae # HALFWIDTH HANGUL LETTER WAE 0xFFCF oe # HALFWIDTH HANGUL LETTER OE 0xFFD2 yo # HALFWIDTH HANGUL LETTER YO 0xFFD3 u # HALFWIDTH HANGUL LETTER U 0xFFD4 weo # HALFWIDTH HANGUL LETTER WEO 0xFFD5 we # HALFWIDTH HANGUL LETTER WE 0xFFD6 wi # HALFWIDTH HANGUL LETTER WI 0xFFD7 yu # HALFWIDTH HANGUL LETTER YU 0xFFDA eu # HALFWIDTH HANGUL LETTER EU 0xFFDB yi # HALFWIDTH HANGUL LETTER YI 0xFFDC i # HALFWIDTH HANGUL LETTER I 0xFFE0 /C # FULLWIDTH CENT SIGN 0xFFE1 PS # FULLWIDTH POUND SIGN 0xFFE2 ! # FULLWIDTH NOT SIGN 0xFFE3 - # FULLWIDTH MACRON 0xFFE4 | # FULLWIDTH BROKEN BAR 0xFFE5 Y= # FULLWIDTH YEN SIGN 0xFFE6 W= # FULLWIDTH WON SIGN 0xFFE8 | # HALFWIDTH FORMS LIGHT VERTICAL 0xFFE9 - # HALFWIDTH LEFTWARDS ARROW 0xFFEA | # HALFWIDTH UPWARDS ARROW 0xFFEB - # HALFWIDTH RIGHTWARDS ARROW 0xFFEC | # HALFWIDTH DOWNWARDS ARROW 0xFFED # # HALFWIDTH BLACK SQUARE 0xFFEE O # HALFWIDTH WHITE CIRCLE 0xFFF9 { # INTERLINEAR ANNOTATION ANCHOR 0xFFFA | # INTERLINEAR ANNOTATION SEPARATOR 0xFFFB } # INTERLINEAR ANNOTATION TERMINATOR end detox-2.0.0/tests/000077500000000000000000000000001460212773400140045ustar00rootroot00000000000000detox-2.0.0/tests/Makefile.am000066400000000000000000000000261460212773400160360ustar00rootroot00000000000000SUBDIRS = legacy unit detox-2.0.0/tests/build.sh000077500000000000000000000005341460212773400154440ustar00rootroot00000000000000#!/usr/bin/env bash set -ex # set up working dir WORK="/tmp/detox-build-test-$RANDOM" mkdir "$WORK" cd "$WORK" # git git clone -b 1.x https://github.com/dharple/detox cd detox # configure autoreconf --install ./configure # make make # tests src/detox -V tests/test.sh src/detox # distcheck make distcheck # cleanup cd rm -rf "$WORK" detox-2.0.0/tests/legacy/000077500000000000000000000000001460212773400152505ustar00rootroot00000000000000detox-2.0.0/tests/legacy/Makefile.am000066400000000000000000000003111460212773400172770ustar00rootroot00000000000000 check: if [ -x $(top_builddir)/tests/legacy/test.sh ] ; then $(top_builddir)/tests/legacy/test.sh $(top_builddir)/src/detox ; else $(top_builddir)/src/detox -V ; $(top_builddir)/src/detox -L -v ; fi detox-2.0.0/tests/legacy/character-helper.sh000077500000000000000000000043741460212773400210300ustar00rootroot00000000000000#!/usr/bin/env bash # Unicode: 0x0007 / Hex: \x07 / BELL UTF8_0007=$(printf "\007") export UTF8_0007; # Unicode: 0x0009 / Hex: \x09 / CHARACTER TABULATION UTF8_0009=$(printf "\011") export UTF8_0009; # Unicode: 0x003f / Hex: \x3f / QUESTION MARK UTF8_003F=$(printf "\077") export UTF8_003F; # Unicode: 0x007e / Hex: \x7e / TILDE UTF8_007E=$(printf "\176") export UTF8_007E; # Unicode: 0x007f / Hex: \x7f / DELETE UTF8_007F=$(printf "\177") export UTF8_007F; # Unicode: 0x0080 / Hex: \xc2\x80 / undefined UTF8_0080=$(printf "\302\200") export UTF8_0080; # Unicode: 0x00a0 / Hex: \xc2\xa0 / NO-BREAK SPACE UTF8_00A0=$(printf "\302\240") export UTF8_00A0; # Unicode: 0x00a9 / Hex: \xc2\xa9 / COPYRIGHT SIGN UTF8_00A9=$(printf "\302\251") export UTF8_00A9; # Unicode: 0x00ad / Hex: \xc2\xad / SOFT HYPHEN UTF8_00AD=$(printf "\302\255") export UTF8_00AD; # Unicode: 0x00ae / Hex: \xc2\xae / REGISTERED SIGN UTF8_00AE=$(printf "\302\256") export UTF8_00AE; # Unicode: 0x00b0 / Hex: \xc2\xb0 / DEGREE SIGN UTF8_00B0=$(printf "\302\260") export UTF8_00B0; # Unicode: 0x00be / Hex: \xc2\xbe / VULGAR FRACTION THREE QUARTERS UTF8_00BE=$(printf "\302\276") export UTF8_00BE; # Unicode: 0x00bf / Hex: \xc2\xbf / INVERTED QUESTION MARK UTF8_00BF=$(printf "\302\277") export UTF8_00BF; # Unicode: 0x00c0 / Hex: \xc3\x80 / LATIN CAPITAL LETTER A WITH GRAVE UTF8_00C0=$(printf "\303\200") export UTF8_00C0; # Unicode: 0x00c1 / Hex: \xc3\x81 / LATIN CAPITAL LETTER A WITH ACUTE UTF8_00C1=$(printf "\303\201") export UTF8_00C1; # Unicode: 0x00c6 / Hex: \xc3\x86 / LATIN CAPITAL LETTER AE UTF8_00C6=$(printf "\303\206") export UTF8_00C6; # Unicode: 0x00de / Hex: \xc3\x9e / LATIN CAPITAL LETTER THORN UTF8_00DE=$(printf "\303\236") export UTF8_00DE; # Unicode: 0x014a / Hex: \xc5\x8a / LATIN CAPITAL LETTER ENG UTF8_014A=$(printf "\305\212") export UTF8_014A; # Unicode: 0x0172 / Hex: \xc5\xb2 / LATIN CAPITAL LETTER U WITH OGONEK UTF8_0172=$(printf "\305\262") export UTF8_0172; # Unicode: 0x4000 / Hex: \xe4\x80\x80 UTF8_4000=$(printf "\344\200\200") export UTF8_4000; # Unicode: 0x10348 / Hex: \xf0\x90\x8d\x88 / GOTHIC LETTER HWAIR UTF8_10348=$(printf "\360\220\215\210") export UTF8_10348; # Unicode: 0x1f37a / Hex: \xf0\x9f\x8d\xba UTF8_1F37A=$(printf "\360\237\215\272") export UTF8_1F37A; detox-2.0.0/tests/legacy/github-issue-0011/000077500000000000000000000000001460212773400202375ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0011/test.sh000077500000000000000000000024451460212773400215620ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #11 is resolved # # https://github.com/dharple/detox/issues/11 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 INPUT=$(printf ' '$'\n\n''song name'$'\n''by band'$'\n''/') OUTPUT="_______song_name_by_band_" METHOD="safe" TABLE=$(dirname $(dirname "$TESTBASE"))/table/safe.tbl test_single_table "$DETOX" "$INPUT" "$OUTPUT" "$METHOD" "$TABLE" # --------------------------------------------------------------------------- METHOD1=safe TABLEPATH=$(dirname $(dirname "$TESTBASE"))/table test_sequence "$DETOX" "$INPUT" "$OUTPUT" "$TABLEPATH" "$METHOD1" # --------------------------------------------------------------------------- METHOD1=safe-basic test_sequence "$DETOX" "$INPUT" "$OUTPUT" "$TABLEPATH" "$METHOD1" # --------------------------------------------------------------------------- OUTPUT="$INPUT" METHOD1=uncgi test_sequence "$DETOX" "$INPUT" "$OUTPUT" "$TABLEPATH" "$METHOD1" # --------------------------------------------------------------------------- OUTPUT="song_name_by_band_" METHOD1=uncgi METHOD2=safe METHOD3=wipeup test_sequence "$DETOX" "$INPUT" "$OUTPUT" "$TABLEPATH" "$METHOD1" "$METHOD2" "$METHOD3" detox-2.0.0/tests/legacy/github-issue-0012/000077500000000000000000000000001460212773400202405ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0012/test.sh000077500000000000000000000026571460212773400215700ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #12 is resolved # # https://github.com/dharple/detox/issues/12 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 CONFIGPATH=$(dirname $(dirname "$TESTBASE"))/etc BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) # ----- cd "$WORK" touch "hi there" "another test" "wow" EXPECTED="hi_there another_test wow" $DETOX -r . for OUTPUT in $EXPECTED ; do if [ ! -f "$OUTPUT" ] ; then echo expected "$OUTPUT" not found exit 1 fi done # ----- cd "$WORK" touch "hi there2" "another test2" "wow2" EXPECTED="hi_there2 another_test2 wow2" cd "$TESTBASE" $DETOX -r "$WORK" cd "$WORK" for OUTPUT in $EXPECTED ; do if [ ! -f "$OUTPUT" ] ; then echo expected "$OUTPUT" not found exit 1 fi done # ----- cd "$WORK" mkdir 3 confirm.3 cd 3 touch "hi there3" "another test3" "wow3" EXPECTED="hi_there3 another_test3 wow3" cd "$TESTBASE" $DETOX -r "$WORK"/confirm.3/.. cd "$WORK"/3 for OUTPUT in $EXPECTED ; do if [ ! -f "$OUTPUT" ] ; then echo expected "$OUTPUT" not found exit 1 fi done # ----- INPUT="4/.gnome/ignore me" OUTPUT="$INPUT" cd "$WORK" mkdir -p 4/.gnome touch "$INPUT" $DETOX -f "$CONFIGPATH"/detoxrc -r 4/ if [ ! -f "$OUTPUT" ] ; then echo expected "$OUTPUT" not found exit 1 fi detox-2.0.0/tests/legacy/github-issue-0014/000077500000000000000000000000001460212773400202425ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0014/test.sh000077500000000000000000000014321460212773400215600ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #14 is resolved # # https://github.com/dharple/detox/issues/14 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh # Run the test based on data from # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=861537 DETOX=$1 INPUT="mÉ Æ.txt" OUTPUT="mÉ_Æ.txt" METHOD="utf_8" TABLE="test.tbl" test_single_table "$DETOX" "$INPUT" "$OUTPUT" "$METHOD" "$TABLE" # --------------------------------------------------------------------------- # Run the same test, based on the default table OUTPUT="mE AE.txt" TABLE=$(dirname $(dirname "$TESTBASE"))/table/unicode.tbl test_single_table "$DETOX" "$INPUT" "$OUTPUT" "$METHOD" "$TABLE" detox-2.0.0/tests/legacy/github-issue-0014/test.tbl000066400000000000000000000013701460212773400217250ustar00rootroot00000000000000start 0x0026 _and_ # AMPERSAND # Chars to translate to _ 0x0020 _ # space 0x0021 _ # ! 0x0022 _ # " 0x0024 _ # $ 0x0027 _ # ' 0x002a _ # * 0x002f _ # / 0x003a _ # : 0x003b _ # ; 0x003c _ # < 0x003e _ # > 0x003f _ # ? 0x0040 _ # @ 0x005c _ # \ 0x0060 _ # ` 0x007c _ # | # Chars to translate to - 0x0028 - # ( 0x0029 - # ) 0x005b - # [ 0x005d - # ] 0x007b - # { 0x007d - # } #0x00c9 e #0x00c6 ae end detox-2.0.0/tests/legacy/github-issue-0019/000077500000000000000000000000001460212773400202475ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0019/test.sh000077500000000000000000000007621460212773400215720ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #19 is resolved # # https://github.com/dharple/detox/issues/19 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh # Run the test based on data from the issue DETOX=$1 INPUT="01 5G Core Networks.pdf" OUTPUT="01 5G Core Networks.pdf" METHOD="safe" TABLE="test.tbl" test_single_table "$DETOX" "$INPUT" "$OUTPUT" "$METHOD" "$TABLE" detox-2.0.0/tests/legacy/github-issue-0019/test.tbl000066400000000000000000000006331460212773400217330ustar00rootroot00000000000000default start 0x23 _ # '#' 0x25 _ # % 0x2b + 0x2c _ # , 0x2d - 0x2e . 0x3d _ # = 0x5e _ # ^ 0x5f _ 0x7e ~ #0x20 _ # space 0x21 _ # ! 0x22 _ # " 0x24 _ # $ 0x27 _ # ' 0x2a _ # * 0x2f _ # / 0x3a _ # : 0x3b _ # ; 0x3c _ # < 0x3e _ # > 0x3f _ # ? #0x40 # @ 0x5c _ # \ 0x60 _ # ` 0x7c _ # | #0x28 - # ( #0x29 - # ) #0x5b - # [ #0x5d - # ] 0x7b - # { 0x7d - # } 0x26 _and_ # & end detox-2.0.0/tests/legacy/github-issue-0020/000077500000000000000000000000001460212773400202375ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0020/test.sh000077500000000000000000000027121460212773400215570ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #20 is resolved # # https://github.com/dharple/detox/issues/20 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi # ------------------------------------------------- WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) cd "$WORK" SOURCE=fool INPUT="hi there" OUTPUT=$INPUT touch "$SOURCE" ln -s "$SOURCE" "$INPUT" $DETOX "$INPUT" if [ ! -f "$OUTPUT" ] ; then echo "Expected $OUTPUT is not present" exit 1 fi # -------------------------------------------------- # same test, but with --special OUTPUT="hi_there" $DETOX --special "$INPUT" if [ ! -f "$OUTPUT" ] ; then echo "Expected $OUTPUT is not present" exit 1 fi # -------------------------------------------------- # repeat these tests, in a subdirectory WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) cd "$WORK" mkdir test cd test SOURCE=fool INPUT="test/hi there" OUTPUT="$INPUT" touch "$SOURCE" cd "$WORK" ln -s "$SOURCE" "$INPUT" $DETOX -r "$INPUT" if [ ! -f "$OUTPUT" ] ; then echo "Expected $OUTPUT is not present" exit 1 fi # -------------------------------------------------- # same test, but with --special OUTPUT="test/hi_there" $DETOX --special -r "$INPUT" if [ ! -f "$OUTPUT" ] ; then echo "Expected $OUTPUT is not present" exit 1 fi detox-2.0.0/tests/legacy/github-issue-0021/000077500000000000000000000000001460212773400202405ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0021/test.sh000077500000000000000000000056241460212773400215650ustar00rootroot00000000000000#!/usr/bin/env bash # # Confirms that GitHub issue #21 is fixed # # https://github.com/dharple/detox/issues/21 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 TABLEPATH=$(dirname $(dirname "$TESTBASE"))/table # used for test_detoxrc DETOXRC=$(dirname $(dirname "$TESTBASE"))/etc/detoxrc SEQUENCE=default # used for test_sequence METHOD1=safe-basic METHOD2=wipeup # --------------------------------------------------------------------------- INPUT="safe string" OUTPUT="safe_string" test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" "$SEQUENCE" test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" test_sequence "$DETOX" "$INPUT" "$OUTPUT" "$TABLEPATH" "$METHOD1" "$METHOD2" # --------------------------------------------------------------------------- INPUT=$(printf "${UTF8_00AE} reg") OUTPUT=$(printf "${UTF8_00AE}_reg") test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" "$SEQUENCE" test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" test_sequence "$DETOX" "$INPUT" "$OUTPUT" "$TABLEPATH" "$METHOD1" "$METHOD2" # --------------------------------------------------------------------------- INPUT=$(printf "${UTF8_00A9} copy") OUTPUT=$(printf "${UTF8_00A9}_copy") test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" "$SEQUENCE" test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" test_sequence "$DETOX" "$INPUT" "$OUTPUT" "$TABLEPATH" "$METHOD1" "$METHOD2" # --------------------------------------------------------------------------- INPUT=$(printf "${UTF8_00C6} capital AE") OUTPUT=$(printf "${UTF8_00C6}_capital_AE") test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" "$SEQUENCE" test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" test_sequence "$DETOX" "$INPUT" "$OUTPUT" "$TABLEPATH" "$METHOD1" "$METHOD2" # --------------------------------------------------------------------------- INPUT=$(printf "${UTF8_00DE} capital thorn") OUTPUT=$(printf "${UTF8_00DE}_capital_thorn") test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" "$SEQUENCE" test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" test_sequence "$DETOX" "$INPUT" "$OUTPUT" "$TABLEPATH" "$METHOD1" "$METHOD2" # --------------------------------------------------------------------------- INPUT=$(printf "${UTF8_014A} capital ENG") OUTPUT=$(printf "${UTF8_014A}_capital_ENG") test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" "$SEQUENCE" test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" test_sequence "$DETOX" "$INPUT" "$OUTPUT" "$TABLEPATH" "$METHOD1" "$METHOD2" # --------------------------------------------------------------------------- INPUT=$(printf "${UTF8_0172} capital U with Ogonek") OUTPUT=$(printf "${UTF8_0172}_capital_U_with_Ogonek") test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" "$SEQUENCE" test_detoxrc "$DETOX" "$INPUT" "$OUTPUT" "$DETOXRC" test_sequence "$DETOX" "$INPUT" "$OUTPUT" "$TABLEPATH" "$METHOD1" "$METHOD2" detox-2.0.0/tests/legacy/github-issue-0023/000077500000000000000000000000001460212773400202425ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0023/test.sh000077500000000000000000000034121460212773400215600ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #23 is resolved # # https://github.com/dharple/detox/issues/23 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) CHECK=$WORK/artephius.txt # ------------------------------------------------- # set up symlink loops within the testing directory # create a single file to work on cd "$WORK" mkdir test cd test ln -s .. simple_loop ln -s . local_loop mkdir deeper-test cd deeper-test touch 'hi there' mkdir further cd further cd "$TESTBASE" $DETOX -n -r --special "$WORK" > "$CHECK" 2>&1 COUNT=$(wc -l "$CHECK" | sed -e's/^ *//g' | cut -f 1 -d ' ') if [ "$COUNT" -ne "1" ] ; then echo expected 1 line of output from detox, found "$COUNT" instead exit 1 fi # ------------------------------------------------- # run again with no changes to confirm that the next test won't hit a false # positive $DETOX -n -r --special "$WORK" > "$CHECK" 2>&1 COUNT=$(wc -l "$CHECK" | sed -e's/^ *//g' | cut -f 1 -d ' ') if [ "$COUNT" -ne "1" ] ; then echo expected 1 line of output from detox, found "$COUNT" instead exit 1 fi # ------------------------------------------------- # set up a recursive loop cd "$WORK"/test/deeper-test/further/ ln -s ../.. long_loop cd "$TESTBASE" if [ ! -d "$WORK"/test/deeper-test/further/long_loop/deeper-test/ ] ; then echo test design failed exit 1 fi $DETOX -n -r --special "$WORK" > "$CHECK" 2>&1 COUNT=$(wc -l "$CHECK" | sed -e's/^ *//g' | cut -f 1 -d ' ') if [ "$COUNT" -ne "1" ] ; then echo expected 1 line of output from detox, found "$COUNT" instead exit 1 fi detox-2.0.0/tests/legacy/github-issue-0028/000077500000000000000000000000001460212773400202475ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0028/test.sh000077500000000000000000000047201460212773400215700ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #28 is resolved # # https://github.com/dharple/detox/issues/28 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 TABLEPATH=$(dirname $(dirname "$TESTBASE"))/table # ------------------------------------------------------------------------ BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi HOLD=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) # ------------------------------------------------------------------------ STRACE=$(command -v strace || true) if [ -z "$STRACE" ] ; then echo WARNING: strace is not installed exit 0 fi # --------------------------------------------------------------------------- FILE=/usr/share/detox/safe.tbl if [ -f $FILE ] ; then echo "WARNING: $FILE is on the filesystem. skipping..." exit 0 fi FILE=/usr/local/share/detox/safe.tbl if [ -f $FILE ] ; then echo "WARNING: $FILE is on the filesystem. skipping..." exit 0 fi # --------------------------------------------------------------------------- INPUT="hi there.txt" OUTPUT="hi_there.txt" for METHOD1 in safe safe-basic safe-search ; do CHECK="$HOLD/strace.$METHOD1.txt" SCRIPT="$HOLD/check.$METHOD1.sh" cat <<- DONE >> "$SCRIPT" #!/usr/bin/env bash . $TESTBASE/test-functions.sh . $TESTBASE/character-helper.sh DETOX="$DETOX" TABLEPATH="$TABLEPATH" INPUT="$INPUT" OUTPUT="$OUTPUT" METHOD1="$METHOD1" test_sequence "\$DETOX" "\$INPUT" "\$OUTPUT" "\$TABLEPATH" "\$METHOD1" DONE chmod +x "$SCRIPT" $STRACE -f -o "$CHECK" -s 1024 -e trace=file "$SCRIPT" case "$METHOD1" in safe | safe-basic ) COUNT=$(grep -c 'usr.share.detox.safe.tbl' "$CHECK" || true) if [ "$COUNT" -gt 0 ] ; then echo "Found evidence of searching for safe.tbl when using $METHOD1" exit 1 fi COUNT=$(grep -c 'usr.local.share.detox.safe.tbl' "$CHECK" || true) if [ "$COUNT" -gt 0 ] ; then echo "Found evidence of searching for safe.tbl when using $METHOD1" exit 1 fi ;; safe-search ) COUNT=$(grep -c 'usr.share.detox.safe.tbl' "$CHECK" || true) if [ "$COUNT" -eq 0 ] ; then echo "Found NO evidence of searching for safe.tbl when using $METHOD1" exit 1 fi COUNT=$(grep -c 'usr.local.share.detox.safe.tbl' "$CHECK" || true) if [ "$COUNT" -eq 0 ] ; then echo "Found NO evidence of searching for safe.tbl when using $METHOD1" exit 1 fi ;; esac done detox-2.0.0/tests/legacy/github-issue-0037/000077500000000000000000000000001460212773400202475ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0037/detoxrc.invalid.1000066400000000000000000000001001460212773400234150ustar00rootroot00000000000000sequence "invalid" { safe; wipeup; broken { pieces; }; }; detox-2.0.0/tests/legacy/github-issue-0037/detoxrc.invalid.2000066400000000000000000000000671460212773400234320ustar00rootroot00000000000000sequence "invalid" { safe; wipeup { broken; }; }; detox-2.0.0/tests/legacy/github-issue-0037/detoxrc.invalid.3000066400000000000000000000000451460212773400234270ustar00rootroot00000000000000sequence "invalid" { safe; wipeup; detox-2.0.0/tests/legacy/github-issue-0037/test.sh000077500000000000000000000037241460212773400215730ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #37 is resolved. Confirms that an invalid # config file won't cause a segfault. # # https://github.com/dharple/detox/issues/37 # if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 # ------------------------------------------------------ CHECK=$($DETOX -L) if [ $? -ne 0 ] ; then echo "detox -L exited with a non-zero status" exit 1 fi # ------------------------------------------------------ CHECK=$($DETOX -f detoxrc.invalid.1 -L 2>&1) if [ $? -eq 0 ] ; then echo "detox -L with an invalid config file exited with a zero status (file 1)" exit 1 fi EXPECTED="detox: error parsing config file detoxrc.invalid.1: syntax error line 4: broken" if [ "$CHECK" != "$EXPECTED" ] ; then echo "detox -L with an invalid config file did not return the expected message (file 1)" echo "Expected: $EXPECTED" echo "Actual: $CHECK" exit 1 fi # ------------------------------------------------------ CHECK=$($DETOX -f detoxrc.invalid.2 -L 2>&1) if [ $? -eq 0 ] ; then echo "detox -L with an invalid config file exited with a zero status (file 2)" exit 1 fi EXPECTED="detox: error parsing config file detoxrc.invalid.2: syntax error line 4: broken" if [ "$CHECK" != "$EXPECTED" ] ; then echo "detox -L with an invalid config file did not return the expected message (file 2)" echo "Expected: $EXPECTED" echo "Actual: $CHECK" exit 1 fi # ------------------------------------------------------ CHECK=$($DETOX -f detoxrc.invalid.3 -L 2>&1) if [ $? -eq 0 ] ; then echo "detox -L with an invalid config file exited with a zero status (file 3)" exit 1 fi EXPECTED="detox: error parsing config file detoxrc.invalid.3: syntax error line 4: " if [ "$CHECK" != "$EXPECTED" ] ; then echo "detox -L with an invalid config file did not return the expected message (file 3)" echo "Expected: $EXPECTED" echo "Actual: $CHECK" exit 1 fi detox-2.0.0/tests/legacy/github-issue-0043/000077500000000000000000000000001460212773400202445ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0043/test.sh000077500000000000000000000107251460212773400215670ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #43 is resolved # # https://github.com/dharple/detox/issues/43 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 TABLEPATH=$(dirname $(dirname "$TESTBASE"))/table METHOD1a=wipeup METHOD1b=wipeup-remove-trailing # --------------------------------------------------------------------------- INPUT="hi there" OUTPUTa="$INPUT" OUTPUTb="$INPUT" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="-hi there" OUTPUTa="hi there" OUTPUTb="hi there" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="_hi there" OUTPUTa="hi there" OUTPUTb="hi there" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="#hi there" OUTPUTa="hi there" OUTPUTb="hi there" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="-_#--hi there" OUTPUTa="hi there" OUTPUTb="hi there" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="hi-there" OUTPUTa="hi-there" OUTPUTb="hi-there" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="hi---there" OUTPUTa="hi-there" OUTPUTb="hi-there" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="hi.-there" OUTPUTa="hi.-there" OUTPUTb="hi.there" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="hi.-.-there" OUTPUTa="hi.-.-there" OUTPUTb="hi.there" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="hi_-_-there" OUTPUTa="hi-there" OUTPUTb="hi-there" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="hi_-_-there...-.--" OUTPUTa="hi-there...-.-" OUTPUTb="hi-there." test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="hi_-_-there...-.--_-_-________----_-____-test" OUTPUTa="hi-there...-.-test" OUTPUTb="hi-there.test" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="hi there-" OUTPUTa="hi there-" OUTPUTb="hi there-" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="hi there_" OUTPUTa="hi there_" OUTPUTb="hi there_" test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" # --------------------------------------------------------------------------- INPUT="hi there." OUTPUTa="hi there." OUTPUTb="hi there." test_sequence "$DETOX" "$INPUT" "$OUTPUTa" "$TABLEPATH" "$METHOD1a" test_sequence "$DETOX" "$INPUT" "$OUTPUTb" "$TABLEPATH" "$METHOD1b" detox-2.0.0/tests/legacy/github-issue-0050-invalid/000077500000000000000000000000001460212773400216665ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0050-invalid/detoxrc.invalid_builtin000066400000000000000000000000701460212773400264310ustar00rootroot00000000000000sequence "default" { safe { builtin "safe "; }; }; detox-2.0.0/tests/legacy/github-issue-0050-invalid/test.sh000077500000000000000000000014511460212773400232050ustar00rootroot00000000000000#!/usr/bin/env bash # # Confirms that GitHub issue #50 is fixed # # https://github.com/dharple/detox/issues/50 # if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 SEGFAULT=139 BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) # ---- cp detoxrc.invalid_builtin "$WORK"/detoxrc cd "$WORK" || exit INPUT="random filename.txt" touch "$INPUT" $DETOX -f detoxrc "$INPUT" 2>&1 RET=$? if [ $RET -eq "$SEGFAULT" ] ; then echo "possible segfault" exit 1 fi if [ $RET -ne 1 ] ; then echo "detox didn't reject an invalid builtin table name" exit 1 fi if [ ! -f "$INPUT" ] ; then echo "input file $INPUT was changed" exit 1 fi detox-2.0.0/tests/legacy/github-issue-0056/000077500000000000000000000000001460212773400202505ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0056/test.sh000077500000000000000000000021271460212773400215700ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #56 is resolved # # https://github.com/dharple/detox/issues/56 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi # ------------------------------------------------------------------------ WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) cd "$WORK" VALGRIND=$(command -v valgrind || true) touch a b c d e f g h i j k if [ -z "$VALGRIND" ] ; then echo WARNING: valgrind is not installed else $VALGRIND --quiet --error-exitcode=1 --track-origins=yes "$DETOX" a b c d e f g h i j k fi # ------------------------------------------------------------------------ INPUT="has space" OUTPUT="has_space" for COUNT in 10 100 1000 ; do WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) cd "$WORK" touch $(seq 1 $COUNT) "$INPUT" $DETOX $(seq 1 $COUNT) "$INPUT" if [ ! -f "$OUTPUT" ] ; then echo "Could not find expected file '$OUTPUT'" exit 1 fi done detox-2.0.0/tests/legacy/github-issue-0062/000077500000000000000000000000001460212773400202455ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0062/test.sh000077500000000000000000000017431460212773400215700ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #62 is resolved # # https://github.com/dharple/detox/issues/62 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) # ----- cd "$WORK" mkdir 1 2 3 4 5 INPUT="1/&ersand" touch "$INPUT" INPUT="2/|bar" touch "$INPUT" INPUT="3/:colon" touch "$INPUT" INPUT="4/,comma" touch "$INPUT" INPUT="5/#octothorpe" touch "$INPUT" INPUT="%percent" touch "$INPUT" INPUT="+plus" touch "$INPUT" INPUT=";semi colon" touch "$INPUT" # ----- cd "$TESTBASE" $DETOX --recursive "$WORK" cd "$WORK" # ----- EXPECTED="1/and_ampersand 2/bar 3/colon 4/,comma 5/octothorpe %percent +plus semi_colon" for OUTPUT in $EXPECTED ; do if [ ! -f "$OUTPUT" ] ; then echo expected "$OUTPUT" not found exit 1 fi done detox-2.0.0/tests/legacy/github-issue-0064/000077500000000000000000000000001460212773400202475ustar00rootroot00000000000000detox-2.0.0/tests/legacy/github-issue-0064/detoxrc.base000066400000000000000000000001361460212773400225530ustar00rootroot00000000000000sequence default { safe { builtin "safe"; }; wipeup; }; ignore { filename "{arch}"; }; detox-2.0.0/tests/legacy/github-issue-0064/detoxrc.remove_trailing000066400000000000000000000001661460212773400250320ustar00rootroot00000000000000sequence default { safe { builtin "safe"; }; wipeup { remove_trailing; }; }; ignore { filename "{arch}"; }; detox-2.0.0/tests/legacy/github-issue-0064/test.sh000077500000000000000000000044601460212773400215710ustar00rootroot00000000000000#!/usr/bin/env bash # # Tests to confirm GitHub issue #64 is resolved # # https://github.com/dharple/detox/issues/64 # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi DETOXRC1=$(dirname $(realpath "$0"))/detoxrc.base DETOXRC2=$(dirname $(realpath "$0"))/detoxrc.remove_trailing # ------------------------------------------------- WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) cd "$WORK" mkdir test INPUT="test/.hi there" OUTPUT="$INPUT" touch "$INPUT" cd "$WORK" $DETOX -r test $DETOX -f "$DETOXRC1" -r . $DETOX -f "$DETOXRC2" -r . if [ ! -f "$OUTPUT" ] ; then echo "Expected $OUTPUT is not present" exit 1 fi # ------------------------------------------------- WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) cd "$WORK" mkdir .git INPUT=".git/.hi there" OUTPUT="$INPUT" touch "$INPUT" cd "$WORK" $DETOX -r . $DETOX -f "$DETOXRC1" -r . $DETOX -f "$DETOXRC2" -r . if [ ! -f "$OUTPUT" ] ; then echo "Expected $OUTPUT is not present" exit 1 fi # ------------------------------------------------- WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) cd "$WORK" mkdir -p .git/objects/ INPUT=".git/objects/hi there" OUTPUT="$INPUT" touch "$INPUT" cd "$WORK" $DETOX -r . $DETOX -f "$DETOXRC1" -r . $DETOX -f "$DETOXRC2" -r . if [ ! -f "$OUTPUT" ] ; then echo "Expected $OUTPUT is not present" exit 1 fi # ------------------------------------------------- WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) cd "$WORK" mkdir -p {arch}/objects/ INPUT="{arch}/objects/hi there" OUTPUT="$INPUT" touch "$INPUT" cd "$WORK" # {arch} isn't built in to the base config $DETOX -f "$DETOXRC1" -r . $DETOX -f "$DETOXRC2" -r . if [ ! -f "$OUTPUT" ] ; then echo "Expected $OUTPUT is not present" exit 1 fi # ------------------------------------------------- WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) cd "$WORK" mkdir -p {arch}/objects/ INPUT="{arch}/objects/hi there" OUTPUT="$INPUT" touch "$INPUT" cd "$WORK/{arch}" # {arch} isn't built in to the base config $DETOX -f "$DETOXRC1" -r .. $DETOX -f "$DETOXRC2" -r .. cd "$WORK" if [ ! -f "$OUTPUT" ] ; then echo "Expected $OUTPUT is not present" exit 1 fi detox-2.0.0/tests/legacy/legacy-dir/000077500000000000000000000000001460212773400172705ustar00rootroot00000000000000detox-2.0.0/tests/legacy/legacy-dir/test.sh000077500000000000000000000016521460212773400206120ustar00rootroot00000000000000#!/usr/bin/env bash # # Subset of legacy regression tests: recursion # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi WORK=$(realpath $(mktemp -d $BASE/test-custom-XXXXXX)) # ----- cd "$WORK" mkdir 1 2 3 4 5 INPUT="1/&ersand" touch "$INPUT" INPUT="2/|bar" touch "$INPUT" INPUT="3/:colon" touch "$INPUT" INPUT="4/,comma" touch "$INPUT" INPUT="5/#octothorpe" touch "$INPUT" INPUT="%percent" touch "$INPUT" INPUT="+plus" touch "$INPUT" INPUT=";semi colon" touch "$INPUT" # ----- cd "$TESTBASE" $DETOX -r "$WORK" cd "$WORK" # ----- EXPECTED="1/and_ampersand 2/bar 3/colon 4/,comma 5/octothorpe %percent +plus semi_colon" for OUTPUT in $EXPECTED ; do if [ ! -f "$OUTPUT" ] ; then echo expected "$OUTPUT" not found exit 1 fi done detox-2.0.0/tests/legacy/man-page-example/000077500000000000000000000000001460212773400203665ustar00rootroot00000000000000detox-2.0.0/tests/legacy/man-page-example/detoxrc.detox.tbl.5000066400000000000000000000002421460212773400240240ustar00rootroot00000000000000# START SAMPLE # Sample detoxrc sequence default { safe { filename "/home/MYUSERNAME/.local/share/detox/safe.tbl"; }; safe; wipeup; }; # EOF # END SAMPLE detox-2.0.0/tests/legacy/man-page-example/detoxrc.detoxrc.5000066400000000000000000000007471460212773400236030ustar00rootroot00000000000000# START SAMPLE # transliterate UTF-8 to ASCII (using chained tables), clean up sequence utf8 { utf_8 { filename "/usr/local/share/detox/custom.tbl"; }; utf_8 { builtin "unicode"; }; safe { builtin "safe"; }; wipeup { remove_trailing; }; max_length { length 128; }; }; # decode CGI, transliterate CP-1252 to ASCII, clean up sequence "cgi-cp1252" { uncgi; iso8859_1 { builtin "cp1252"; }; safe { builtin "safe"; }; }; # END SAMPLE detox-2.0.0/tests/legacy/man-page-example/expected.detox.tbl.5.txt000066400000000000000000000003041460212773400247720ustar00rootroot00000000000000sequence name: default (*) source file: detoxrc.detox.tbl.5 cleaner: safe translation table: /home/MYUSERNAME/.local/share/detox/safe.tbl cleaner: safe cleaner: wipeup remove trailing: no detox-2.0.0/tests/legacy/man-page-example/expected.detoxrc.5.txt000066400000000000000000000006641460212773400245500ustar00rootroot00000000000000sequence name: utf8 (*) source file: detoxrc.detoxrc.5 cleaner: utf_8 translation table: /usr/local/share/detox/custom.tbl cleaner: utf_8 builtin table: unicode cleaner: safe builtin table: safe cleaner: wipeup remove trailing: yes cleaner: max length length: 128 sequence name: cgi-cp1252 source file: detoxrc.detoxrc.5 cleaner: uncgi cleaner: iso8859_1 builtin table: cp1252 cleaner: safe builtin table: safe detox-2.0.0/tests/legacy/man-page-example/test.sh000077500000000000000000000012601460212773400217030ustar00rootroot00000000000000#!/usr/bin/env bash # # Confirm that the sample detoxrcs in detoxrc.5 and detox.tbl.5 parse # correctly. # set -e if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 MANPATH=$(dirname $(dirname "$TESTBASE"))/man sed -n '/START SAMPLE/,/END SAMPLE/p' "$MANPATH"/detoxrc.5 | sed -e's/^[.]." /# /' | diff - detoxrc.detoxrc.5 sed -n '/START SAMPLE/,/END SAMPLE/p' "$MANPATH"/detox.tbl.5 | sed -e's/^[.]." /# /' | diff - detoxrc.detox.tbl.5 $DETOX -f detoxrc.detoxrc.5 -L -v | diff - expected.detoxrc.5.txt $DETOX -f detoxrc.detox.tbl.5 -L -v | diff - expected.detox.tbl.5.txt detox-2.0.0/tests/legacy/man-page-sequence-with-language/000077500000000000000000000000001460212773400232755ustar00rootroot00000000000000detox-2.0.0/tests/legacy/man-page-sequence-with-language/detoxrc.manpage000066400000000000000000000001521460212773400262750ustar00rootroot00000000000000# Sample detoxrc sequence default { safe { filename "safe-manpage.tbl"; }; safe; wipeup; }; # EOF detox-2.0.0/tests/legacy/man-page-sequence-with-language/safe-manpage.tbl000066400000000000000000000015511460212773400263260ustar00rootroot00000000000000# # This is a simple example of a "safe" table. It only translates 4 characters. # # The default is commented out, so any character that is not in this table will # be ignored. # # default _ # # This is the main replacement block. Each line specifies a character and a # string to replace it with. # start 0x09 _tab_ # comments work on lines, too 0x24 _dollar_ # $$$ 0x26 _and_ # ampersand end # # Starts an optional, language-specific translation block. detox will read # your locale and load the block if the word after start matches the language # portion of your locale. # # In the example here, the character $ will be replaced with "_money_" if the # user is working in English. If the user is using a different language, $ # will be replaced with the value configured in the previous block, "_dollar_". # start en 0x24 _money_ # money money end # EOF detox-2.0.0/tests/legacy/man-page-sequence-with-language/test.sh000077500000000000000000000033761460212773400246240ustar00rootroot00000000000000#!/usr/bin/env bash # # Confirm that the language selection from the detox.tbl.5 man page works as # described. # if [ -z "$TESTBASE" ] ; then echo TESTBASE needs to be passed via the env exit 1 fi . "$TESTBASE"/test-functions.sh . "$TESTBASE"/character-helper.sh DETOX=$1 INLINE=$(dirname "$DETOX")/inline-detox # ---------------------------------------------------------- INPUT=$(printf "\t \$ &") # ---------------------------------------------------------- WHICH=C.UTF-8 FOUND=$(locale -a | grep -c '^C[.]') if [ "$FOUND" -gt 0 ] ; then OUTPUT="tab_dollar_and_" for VAR in LANG LC_ALL LC_CTYPE ; do eval "$VAR=$WHICH" export $VAR done CHECK=$(echo "$INPUT" | $INLINE -f detoxrc.manpage) if [ "$CHECK" != "$OUTPUT" ] ; then echo "Man page is a lie. LANG=$WHICH" locale exit 1 fi else echo "skipping LANG=$WHICH ... not installed" fi # ---------------------------------------------------------- WHICH=de_DE.UTF-8 FOUND=$(locale -a | grep -c '^de_DE') if [ "$FOUND" -gt 0 ] ; then OUTPUT="tab_dollar_and_" for VAR in LANG LC_ALL LC_CTYPE ; do eval "$VAR=$WHICH" export $VAR done CHECK=$(echo "$INPUT" | $INLINE -f detoxrc.manpage) if [ "$CHECK" != "$OUTPUT" ] ; then echo "Man page is a lie. LANG=$WHICH" locale exit 1 fi else echo "skipping LANG=$WHICH ... not installed" fi # ---------------------------------------------------------- WHICH=en_US.UTF-8 FOUND=$(locale -a | grep -c '^en_US') if [ "$FOUND" -gt 0 ] ; then OUTPUT="tab_money_and_" for VAR in LANG LC_ALL LC_CTYPE ; do eval "$VAR=$WHICH" export $VAR done CHECK=$(echo "$INPUT" | $INLINE -f detoxrc.manpage) if [ "$CHECK" != "$OUTPUT" ] ; then echo "Man page is a lie. LANG=$WHICH" locale exit 1 fi else echo "skipping LANG=$WHICH ... not installed" fi detox-2.0.0/tests/legacy/test-functions.sh000066400000000000000000000141471460212773400206000ustar00rootroot00000000000000#!/usr/bin/env bash # # unit test functions # ## # Realpath polyfill for systems where it doesn't exist # # @param $1 Path to examine # if [ ! -x "$(command -v realpath)" ] ; then function realpath() { readlink -f $1 } fi ## # Executes a single test against a detoxrc, tables, and sequence. # # @param $1 Detox Executable # @param $2 Input Filename # @param $3 Output Filename # @param $4 Path to Config File # @param $5 Sequence to Run (leave blank to use default) # # @return int 0 for success, 1 for failure # function test_detoxrc () { if [ -z "$1" -o -z "$2" -o -z "$3" -o -z "$4" ] ; then echo missing parameters return 1 fi local DETOX=$1 local INPUT=$2 local OUTPUT=$3 local DETOXRC=$4 local SEQUENCE=$5 if [ ! -x "$DETOX" ] ; then echo $DETOX is not executable return 1 fi if [ ! -f "$DETOXRC" ] ; then echo $DETOXRC is not a file return 1 fi if [ -n "$USE_VALGRIND" ] ; then VALGRIND=$(command -v valgrind || true) if [ -z "$VALGRIND" ] ; then echo "USE_VALGRIND specified, but valgrind is not installed" exit 1 else DETOX="$VALGRIND --quiet --error-exitcode=1 --track-origins=yes $DETOX" fi fi local BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi local WORK=$(realpath $(mktemp -d $BASE/test-XXXXXX)) local RC=$WORK/detoxrc cp $DETOXRC $RC cd $WORK local ACTION="touch" local OPERATOR="-f" local CMDINPUT="$INPUT" if [ "${INPUT: -1}" = "/" ] ; then ACTION="mkdir" OPERATOR="-d" CMDINPUT="${INPUT:0: -1}" fi $ACTION -- "$CMDINPUT" if [ -n "$SEQUENCE" ] ; then $DETOX -s $SEQUENCE -f $RC -- "$CMDINPUT" else $DETOX -f $RC -- "$CMDINPUT" fi if [ ! $OPERATOR "$OUTPUT" ] ; then echo renaming "$INPUT" to "$OUTPUT" failed return 1 fi return 0 } ## # Executes a single test against a single function using a single table. # # @param $1 Detox Executable # @param $2 Input Filename # @param $3 Output Filename # @param $4 Method (utf_8, safe, etc) # @param $5 Table (path to the table to use) # # @return int 0 for success, 1 for failure # function test_single_table () { if [ -z "$1" -o -z "$2" -o -z "$3" -o -z "$4" -o -z "$5" ] ; then echo missing parameters return 1 fi local DETOX=$1 local INPUT=$2 local OUTPUT=$3 local METHOD=$4 local TABLE=$5 if [ ! -x "$DETOX" ] ; then echo $DETOX is not executable return 1 fi if [ ! -f "$TABLE" ] ; then echo $TABLE is not a file return 1 fi if [ -n "$USE_VALGRIND" ] ; then VALGRIND=$(command -v valgrind || true) if [ -z "$VALGRIND" ] ; then echo "USE_VALGRIND specified, but valgrind is not installed" exit 1 else DETOX="$VALGRIND --quiet --error-exitcode=1 --track-origins=yes $DETOX" fi fi local BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi local WORK=$(realpath $(mktemp -d $BASE/test-XXXXXX)) local TMPTABLE=$WORK/test.tbl cp $TABLE $TMPTABLE local SEQUENCE=unittest local RC=$WORK/detoxrc cat <<- DONE > $RC sequence $SEQUENCE { $METHOD { filename "$TMPTABLE"; }; }; DONE cd $WORK local ACTION="touch" local OPERATOR="-f" local CMDINPUT="$INPUT" if [ "${INPUT: -1}" = "/" ] ; then ACTION="mkdir" OPERATOR="-d" CMDINPUT="${INPUT:0: -1}" fi $ACTION -- "$CMDINPUT" $DETOX -s $SEQUENCE -f $RC -- "$CMDINPUT" if [ ! $OPERATOR "$OUTPUT" ] ; then echo renaming "$INPUT" to "$OUTPUT" failed return 1 fi return 0 } ## # Executes a single test against one or more functions in a sequence, using the # stock tables when applicable, from source control. # # @param $1 Detox Executable # @param $2 Input Filename # @param $3 Output Filename # @param $4 Path to Stock Tables # @param $5 First method (utf_8, safe, etc) # @param ... Additional methods # # @return int 0 for success, 1 for failure # function test_sequence () { if [ -z "$1" -o -z "$2" -o -z "$3" -o -z "$4" -o -z "$5" ] ; then echo missing parameters return 1 fi local DETOX=$1 local INPUT=$2 local OUTPUT=$3 local TABLEPATH=$4 shift 4 if [ ! -x "$DETOX" ] ; then echo $DETOX is not executable return 1 fi if [ ! -d "$TABLEPATH" ] ; then echo $TABLEPATH is not a dir return 1 fi if [ -n "$USE_VALGRIND" ] ; then VALGRIND=$(command -v valgrind || true) if [ -z "$VALGRIND" ] ; then echo "USE_VALGRIND specified, but valgrind is not installed" exit 1 else DETOX="$VALGRIND --quiet --error-exitcode=1 --track-origins=yes $DETOX" fi fi local BASE=/tmp/detoxtest/ if [ ! -d $BASE ] ; then mkdir $BASE fi local WORK=$(realpath $(mktemp -d $BASE/test-XXXXXX)) local SEQUENCE=unittest local RC=$WORK/detoxrc cat <<- DONE > $RC sequence $SEQUENCE { DONE while [ -n "$1" ] ; do case $1 in safe | iso8859_1 | utf_8 ) local TABLE=$1.tbl if [ $1 = "utf_8" ] ; then TABLE="unicode.tbl" fi cp $TABLEPATH/$TABLE $WORK/$TABLE cat <<- DONE >> $RC $1 { filename "$WORK/$TABLE"; }; DONE ;; lower | uncgi | wipeup ) cat <<- DONE >> $RC $1; DONE ;; wipeup-remove-trailing ) cat <<- DONE >> $RC wipeup { remove_trailing; }; DONE ;; max-length-* ) LENGTH=${1:11} cat <<- DONE >> $RC max_length { length $LENGTH; }; DONE ;; safe-search ) cat <<- DONE >> $RC safe; DONE ;; utf_8-search ) cat <<- DONE >> $RC utf_8; DONE ;; iso8859_1-search ) cat <<- DONE >> $RC iso8859_1; DONE ;; safe-basic ) cat <<- DONE >> $RC safe { builtin "safe"; }; DONE ;; utf_8-basic ) cat <<- DONE >> $RC utf_8 { builtin "unicode"; }; DONE ;; iso8859_1-basic ) cat <<- DONE >> $RC iso8859_1 { builtin "iso8859_1"; }; DONE ;; *) echo "Unknown method $1"; return 1; ;; esac shift done cat <<- DONE >> $RC }; DONE cd $WORK local ACTION="touch" local OPERATOR="-f" local CMDINPUT="$INPUT" if [ "${INPUT: -1}" = "/" ] ; then ACTION="mkdir" OPERATOR="-d" CMDINPUT="${INPUT:0: -1}" fi $ACTION -- "$CMDINPUT" $DETOX -s $SEQUENCE -f $RC -- "$CMDINPUT" if [ ! $OPERATOR "$OUTPUT" ] ; then echo renaming "$INPUT" to "$OUTPUT" failed return 1 fi return 0 } detox-2.0.0/tests/legacy/test.sh000077500000000000000000000013731460212773400165720ustar00rootroot00000000000000#!/usr/bin/env bash # # Executes all unit tests # if [ -z "$1" ] ; then echo missing detox path exit 1 fi . $(dirname "$0")/test-functions.sh DETOX=$(realpath "$1") if [ ! -x "$DETOX" ] ; then echo cannot find detox at "$1" exit 1 fi cd $(dirname "$0") || exit TESTBASE=$(pwd) export TESTBASE echo DETOX="$DETOX" echo TESTBASE="$TESTBASE" SUCCESS=1 for DIR in github-* legacy-* man-page-* ; do echo "------------------------------------------------------" echo running test $(basename "$DIR") cd "$DIR" || exit ./test.sh "$DETOX" if [ $? -ne 0 ] ; then SUCCESS=0 echo FAILED fi cd "$TESTBASE" || exit done echo "------------------------------------------------------" if [ $SUCCESS -ne 1 ] ; then echo one or more tests failed exit 1 fi detox-2.0.0/tests/unit/000077500000000000000000000000001460212773400147635ustar00rootroot00000000000000detox-2.0.0/tests/unit/.gitignore000066400000000000000000000004571460212773400167610ustar00rootroot00000000000000# check output *.log *.trs # specific files test_clean_iso8859_1 test_clean_lower test_clean_max_length test_clean_safe test_clean_uncgi test_clean_utf_8 test_clean_wipeup test_filelist test_parse_inline test_spoof_config_file test_table_regression test_table_max_length test_table_resize test_wrapped detox-2.0.0/tests/unit/Makefile.am000066400000000000000000000020601460212773400170150ustar00rootroot00000000000000if WITH_CHECK AM_CFLAGS = \ @CHECK_CFLAGS@ \ -DYY_NO_INPUT \ -DYY_NO_UNPUT \ -D_FORTIFY_SOURCE=2 \ -Wall \ -Werror AM_LDFLAGS = \ $(top_builddir)/src/builtin_table.o \ $(top_builddir)/src/clean_string.o \ $(top_builddir)/src/clean_utf_8.o \ $(top_builddir)/src/config_file_lex.o \ $(top_builddir)/src/config_file.o \ $(top_builddir)/src/config_file_spoof.o \ $(top_builddir)/src/config_file_yacc.o \ $(top_builddir)/src/file.o \ $(top_builddir)/src/filelist.o \ $(top_builddir)/src/filter.o \ $(top_builddir)/src/parse_options.o \ $(top_builddir)/src/parse_table.o \ $(top_builddir)/src/sequence.o \ $(top_builddir)/src/table.o \ $(top_builddir)/src/wrapped.o \ @CHECK_LIBS@ TESTS = \ test_clean_iso8859_1 \ test_clean_lower \ test_clean_max_length \ test_clean_safe \ test_clean_uncgi \ test_clean_utf_8 \ test_clean_wipeup \ test_filelist \ test_parse_inline \ test_spoof_config_file \ test_table_max_length \ test_table_resize \ test_wrapped check_PROGRAMS = $(TESTS) endif # WITH_CHECK clean-local: rm -f *.gcov *.gcno *.gcda detox-2.0.0/tests/unit/fixtures/000077500000000000000000000000001460212773400166345ustar00rootroot00000000000000detox-2.0.0/tests/unit/fixtures/test_table_resize.tbl000066400000000000000000000302341460212773400230500ustar00rootroot00000000000000# # Tests table regression. # # Old copy of the unicode table with extras. # default _ start # # Basic Latin - This is a subset of the basic 7-bit ASCII table, with unsafe # characters removed. # 0x0020 space # SPACE 0x0023 # # NUMBER SIGN 0x0025 % # PERCENT SIGN 0x0026 _and_ # AMPERSAND 0x002B + # PLUS SIGN 0x002C , # COMMA 0x002D - # HYPHEN-MINUS 0x002E . # FULL STOP 0x0030 0 # DIGIT ZERO 0x0031 1 # DIGIT ONE 0x0032 2 # DIGIT TWO 0x0033 3 # DIGIT THREE 0x0034 4 # DIGIT FOUR 0x0035 5 # DIGIT FIVE 0x0036 6 # DIGIT SIX 0x0037 7 # DIGIT SEVEN 0x0038 8 # DIGIT EIGHT 0x0039 9 # DIGIT NINE 0x003D = # EQUALS SIGN 0x0041 A # LATIN CAPITAL LETTER A 0x0042 B # LATIN CAPITAL LETTER B 0x0043 C # LATIN CAPITAL LETTER C 0x0044 D # LATIN CAPITAL LETTER D 0x0045 E # LATIN CAPITAL LETTER E 0x0046 F # LATIN CAPITAL LETTER F 0x0047 G # LATIN CAPITAL LETTER G 0x0048 H # LATIN CAPITAL LETTER H 0x0049 I # LATIN CAPITAL LETTER I 0x004A J # LATIN CAPITAL LETTER J 0x004B K # LATIN CAPITAL LETTER K 0x004C L # LATIN CAPITAL LETTER L 0x004D M # LATIN CAPITAL LETTER M 0x004E N # LATIN CAPITAL LETTER N 0x004F O # LATIN CAPITAL LETTER O 0x0050 P # LATIN CAPITAL LETTER P 0x0051 Q # LATIN CAPITAL LETTER Q 0x0052 R # LATIN CAPITAL LETTER R 0x0053 S # LATIN CAPITAL LETTER S 0x0054 T # LATIN CAPITAL LETTER T 0x0055 U # LATIN CAPITAL LETTER U 0x0056 V # LATIN CAPITAL LETTER V 0x0057 W # LATIN CAPITAL LETTER W 0x0058 X # LATIN CAPITAL LETTER X 0x0059 Y # LATIN CAPITAL LETTER Y 0x005A Z # LATIN CAPITAL LETTER Z 0x005E ^ # CIRCUMFLEX ACCENT 0x0061 a # LATIN SMALL LETTER A 0x0062 b # LATIN SMALL LETTER B 0x0063 c # LATIN SMALL LETTER C 0x0064 d # LATIN SMALL LETTER D 0x0065 e # LATIN SMALL LETTER E 0x0066 f # LATIN SMALL LETTER F 0x0067 g # LATIN SMALL LETTER G 0x0068 h # LATIN SMALL LETTER H 0x0069 i # LATIN SMALL LETTER I 0x006A j # LATIN SMALL LETTER J 0x006B k # LATIN SMALL LETTER K 0x006C l # LATIN SMALL LETTER L 0x006D m # LATIN SMALL LETTER M 0x006E n # LATIN SMALL LETTER N 0x006F o # LATIN SMALL LETTER O 0x0070 p # LATIN SMALL LETTER P 0x0071 q # LATIN SMALL LETTER Q 0x0072 r # LATIN SMALL LETTER R 0x0073 s # LATIN SMALL LETTER S 0x0074 t # LATIN SMALL LETTER T 0x0075 u # LATIN SMALL LETTER U 0x0076 v # LATIN SMALL LETTER V 0x0077 w # LATIN SMALL LETTER W 0x0078 x # LATIN SMALL LETTER X 0x0079 y # LATIN SMALL LETTER Y 0x007A z # LATIN SMALL LETTER Z 0x007E ~ # TILDE # # CP 1252 # 0x0085 ... # HORIZONTAL ELLIPSIS 0x0088 ^ # MODIFIER LETTER CIRCUMFLEX ACCENT 0x008A S # LATIN CAPITAL LETTER S WITH CARON 0x008C OE # LATIN CAPITAL LIGATURE OE 0x0096 - # EN DASH 0x0097 - # EM DASH 0x0099 _tm_ # TRADE MARK SIGN 0x009A s # LATIN SMALL LETTER S WITH CARON 0x009C oe # LATIN SMALL LIGATURE OE 0x009F Y # LATIN CAPITAL LETTER Y WITH DIAERESIS # # Latin 1 # 0x00A2 _cent_ 0x00A3 _pound_ 0x00A5 _yen_ 0x00A9 _copy_ 0x00AE _reg_ 0x00B2 2 0x00B3 3 0x00B5 b 0x00B6 _pp_ 0x00B9 1 0x00C0 A # LATIN CAPITAL LETTER A WITH GRAVE 0x00C1 A # LATIN CAPITAL LETTER A WITH ACUTE 0x00C2 A # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00C3 A # LATIN CAPITAL LETTER A WITH TILDE 0x00C4 A # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00C5 A # LATIN CAPITAL LETTER A WITH RING ABOVE 0x00C6 AE # LATIN CAPITAL LETTER AE 0x00C7 C # LATIN CAPITAL LETTER C WITH CEDILLA 0x00C8 E # LATIN CAPITAL LETTER E WITH GRAVE 0x00C9 E # LATIN CAPITAL LETTER E WITH ACUTE 0x00CA E # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00CB E # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00CC I # LATIN CAPITAL LETTER I WITH GRAVE 0x00CD I # LATIN CAPITAL LETTER I WITH ACUTE 0x00CE I # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00CF I # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00D0 TH # LATIN CAPITAL LETTER ETH (Icelandic) 0x00D1 N # LATIN CAPITAL LETTER N WITH TILDE 0x00D2 O # LATIN CAPITAL LETTER O WITH GRAVE 0x00D3 O # LATIN CAPITAL LETTER O WITH ACUTE 0x00D4 O # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00D5 O # LATIN CAPITAL LETTER O WITH TILDE 0x00D6 O # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00D7 x # MULTIPLICATION SIGN 0x00D8 O # LATIN CAPITAL LETTER O WITH STROKE 0x00D9 U # LATIN CAPITAL LETTER U WITH GRAVE 0x00DA U # LATIN CAPITAL LETTER U WITH ACUTE 0x00DB U # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00DC U # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00DD Y # LATIN CAPITAL LETTER Y WITH ACUTE 0x00DE TH # LATIN CAPITAL LETTER THORN (Icelandic) 0x00DF ss # LATIN SMALL LETTER SHARP S (German) 0x00E0 a # LATIN SMALL LETTER A WITH GRAVE 0x00E1 a # LATIN SMALL LETTER A WITH ACUTE 0x00E2 a # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00E3 a # LATIN SMALL LETTER A WITH TILDE 0x00E4 a # LATIN SMALL LETTER A WITH DIAERESIS 0x00E5 a # LATIN SMALL LETTER A WITH RING ABOVE 0x00E6 ae # LATIN SMALL LETTER AE 0x00E7 c # LATIN SMALL LETTER C WITH CEDILLA 0x00E8 e # LATIN SMALL LETTER E WITH GRAVE 0x00E9 e # LATIN SMALL LETTER E WITH ACUTE 0x00EA e # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00EB e # LATIN SMALL LETTER E WITH DIAERESIS 0x00EC i # LATIN SMALL LETTER I WITH GRAVE 0x00ED i # LATIN SMALL LETTER I WITH ACUTE 0x00EE i # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00EF i # LATIN SMALL LETTER I WITH DIAERESIS 0x00F0 th # LATIN SMALL LETTER ETH (Icelandic) 0x00F1 n # LATIN SMALL LETTER N WITH TILDE 0x00F2 o # LATIN SMALL LETTER O WITH GRAVE 0x00F3 o # LATIN SMALL LETTER O WITH ACUTE 0x00F4 o # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00F5 o # LATIN SMALL LETTER O WITH TILDE 0x00F6 o # LATIN SMALL LETTER O WITH DIAERESIS 0x00F8 o # LATIN SMALL LETTER O WITH STROKE 0x00F9 u # LATIN SMALL LETTER U WITH GRAVE 0x00FA u # LATIN SMALL LETTER U WITH ACUTE 0x00FB u # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00FC u # LATIN SMALL LETTER U WITH DIAERESIS 0x00FD y # LATIN SMALL LETTER Y WITH ACUTE 0x00FE th # LATIN SMALL LETTER THORN (Icelandic) 0x00FF y # LATIN SMALL LETTER Y WITH DIAERESIS # # Latin Extended A # 0x0100 A # LATIN CAPITAL LETTER A WITH MACRON 0x0101 a # LATIN SMALL LETTER A WITH MACRON 0x0102 A # LATIN CAPITAL LETTER A WITH BREVE 0x0103 a # LATIN SMALL LETTER A WITH BREVE 0x0104 A # LATIN CAPITAL LETTER A WITH OGONEK 0x0105 a # LATIN SMALL LETTER A WITH OGONEK 0x0106 C # LATIN CAPITAL LETTER C WITH ACUTE 0x0107 c # LATIN SMALL LETTER C WITH ACUTE 0x0108 C # LATIN CAPITAL LETTER C WITH CIRCUMFLEX 0x0109 c # LATIN SMALL LETTER C WITH CIRCUMFLEX 0x010A C # LATIN CAPITAL LETTER C WITH DOT ABOVE 0x010B c # LATIN SMALL LETTER C WITH DOT ABOVE 0x010C C # LATIN CAPITAL LETTER C WITH CARON 0x010D c # LATIN SMALL LETTER C WITH CARON 0x010E D # LATIN CAPITAL LETTER D WITH CARON 0x010F d # LATIN SMALL LETTER D WITH CARON 0x0110 D # LATIN CAPITAL LETTER D WITH STROKE 0x0111 d # LATIN SMALL LETTER D WITH STROKE 0x0112 E # LATIN CAPITAL LETTER E WITH MACRON 0x0113 e # LATIN SMALL LETTER E WITH MACRON 0x0114 E # LATIN CAPITAL LETTER E WITH BREVE 0x0115 e # LATIN SMALL LETTER E WITH BREVE 0x0116 E # LATIN CAPITAL LETTER E WITH DOT ABOVE 0x0117 e # LATIN SMALL LETTER E WITH DOT ABOVE 0x0118 E # LATIN CAPITAL LETTER E WITH OGONEK 0x0119 e # LATIN SMALL LETTER E WITH OGONEK 0x011A E # LATIN CAPITAL LETTER E WITH CARON 0x011B e # LATIN SMALL LETTER E WITH CARON 0x011C G # LATIN CAPITAL LETTER G WITH CIRCUMFLEX 0x011D g # LATIN SMALL LETTER G WITH CIRCUMFLEX 0x011E G # LATIN CAPITAL LETTER G WITH BREVE 0x011F g # LATIN SMALL LETTER G WITH BREVE 0x0120 G # LATIN CAPITAL LETTER G WITH DOT ABOVE 0x0121 g # LATIN SMALL LETTER G WITH DOT ABOVE 0x0122 G # LATIN CAPITAL LETTER G WITH CEDILLA 0x0123 g # LATIN SMALL LETTER G WITH CEDILLA 0x0124 H # LATIN CAPITAL LETTER H WITH CIRCUMFLEX 0x0125 h # LATIN SMALL LETTER H WITH CIRCUMFLEX 0x0126 H # LATIN CAPITAL LETTER H WITH STROKE 0x0127 h # LATIN SMALL LETTER H WITH STROKE 0x0128 I # LATIN CAPITAL LETTER I WITH TILDE 0x0129 i # LATIN SMALL LETTER I WITH TILDE 0x012A I # LATIN CAPITAL LETTER I WITH MACRON 0x012B i # LATIN SMALL LETTER I WITH MACRON 0x012C I # LATIN CAPITAL LETTER I WITH BREVE 0x012D i # LATIN SMALL LETTER I WITH BREVE 0x012E I # LATIN CAPITAL LETTER I WITH OGONEK 0x012F i # LATIN SMALL LETTER I WITH OGONEK 0x0130 I # LATIN CAPITAL LETTER I WITH DOT ABOVE 0x0131 i # LATIN SMALL LETTER DOTLESS I 0x0132 IJ # LATIN CAPITAL LIGATURE IJ 0x0133 ij # LATIN SMALL LIGATURE IJ 0x0134 J # LATIN CAPITAL LETTER J WITH CIRCUMFLEX 0x0135 j # LATIN SMALL LETTER J WITH CIRCUMFLEX 0x0136 K # LATIN CAPITAL LETTER K WITH CEDILLA 0x0137 k # LATIN SMALL LETTER K WITH CEDILLA 0x0138 q # LATIN SMALL LETTER KRA (Greenlandic) 0x0139 L # LATIN CAPITAL LETTER L WITH ACUTE 0x013A l # LATIN SMALL LETTER L WITH ACUTE 0x013B L # LATIN CAPITAL LETTER L WITH CEDILLA 0x013C l # LATIN SMALL LETTER L WITH CEDILLA 0x013D L # LATIN CAPITAL LETTER L WITH CARON 0x013E l # LATIN SMALL LETTER L WITH CARON 0x013F L # LATIN CAPITAL LETTER L WITH MIDDLE DOT 0x0140 l # LATIN SMALL LETTER L WITH MIDDLE DOT 0x0141 L # LATIN CAPITAL LETTER L WITH STROKE 0x0142 l # LATIN SMALL LETTER L WITH STROKE 0x0143 N # LATIN CAPITAL LETTER N WITH ACUTE 0x0144 n # LATIN SMALL LETTER N WITH ACUTE 0x0145 N # LATIN CAPITAL LETTER N WITH CEDILLA 0x0146 n # LATIN SMALL LETTER N WITH CEDILLA 0x0147 N # LATIN CAPITAL LETTER N WITH CARON 0x0148 n # LATIN SMALL LETTER N WITH CARON 0x0149 n # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 0x014A NG # LATIN CAPITAL LETTER ENG (Sami) 0x014B ng # LATIN SMALL LETTER ENG (Sami) 0x014C O # LATIN CAPITAL LETTER O WITH MACRON 0x014D o # LATIN SMALL LETTER O WITH MACRON 0x014E O # LATIN CAPITAL LETTER O WITH BREVE 0x014F o # LATIN SMALL LETTER O WITH BREVE 0x0150 O # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 0x0151 o # LATIN SMALL LETTER O WITH DOUBLE ACUTE 0x0152 OE # LATIN CAPITAL LIGATURE OE 0x0153 oe # LATIN SMALL LIGATURE OE 0x0154 R # LATIN CAPITAL LETTER R WITH ACUTE 0x0155 r # LATIN SMALL LETTER R WITH ACUTE 0x0156 R # LATIN CAPITAL LETTER R WITH CEDILLA 0x0157 r # LATIN SMALL LETTER R WITH CEDILLA 0x0158 R # LATIN CAPITAL LETTER R WITH CARON 0x0159 r # LATIN SMALL LETTER R WITH CARON 0x015A S # LATIN CAPITAL LETTER S WITH ACUTE 0x015B s # LATIN SMALL LETTER S WITH ACUTE 0x015C S # LATIN CAPITAL LETTER S WITH CIRCUMFLEX 0x015D s # LATIN SMALL LETTER S WITH CIRCUMFLEX 0x015E S # LATIN CAPITAL LETTER S WITH CEDILLA 0x015F s # LATIN SMALL LETTER S WITH CEDILLA 0x0160 S # LATIN CAPITAL LETTER S WITH CARON 0x0161 s # LATIN SMALL LETTER S WITH CARON 0x0162 T # LATIN CAPITAL LETTER T WITH CEDILLA 0x0163 t # LATIN SMALL LETTER T WITH CEDILLA 0x0164 T # LATIN CAPITAL LETTER T WITH CARON 0x0165 t # LATIN SMALL LETTER T WITH CARON 0x0166 T # LATIN CAPITAL LETTER T WITH STROKE 0x0167 t # LATIN SMALL LETTER T WITH STROKE 0x0168 U # LATIN CAPITAL LETTER U WITH TILDE 0x0169 u # LATIN SMALL LETTER U WITH TILDE 0x016A U # LATIN CAPITAL LETTER U WITH MACRON 0x016B u # LATIN SMALL LETTER U WITH MACRON 0x016C U # LATIN CAPITAL LETTER U WITH BREVE 0x016D u # LATIN SMALL LETTER U WITH BREVE 0x016E U # LATIN CAPITAL LETTER U WITH RING ABOVE 0x016F u # LATIN SMALL LETTER U WITH RING ABOVE 0x0170 U # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 0x0171 u # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x0172 U # LATIN CAPITAL LETTER U WITH OGONEK 0x0173 u # LATIN SMALL LETTER U WITH OGONEK 0x0174 W # LATIN CAPITAL LETTER W WITH CIRCUMFLEX 0x0175 w # LATIN SMALL LETTER W WITH CIRCUMFLEX 0x0176 Y # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX 0x0177 y # LATIN SMALL LETTER Y WITH CIRCUMFLEX 0x0178 Y # LATIN CAPITAL LETTER Y WITH DIAERESIS 0x0179 Z # LATIN CAPITAL LETTER Z WITH ACUTE 0x017A z # LATIN SMALL LETTER Z WITH ACUTE 0x017B Z # LATIN CAPITAL LETTER Z WITH DOT ABOVE 0x017C z # LATIN SMALL LETTER Z WITH DOT ABOVE 0x017D Z # LATIN CAPITAL LETTER Z WITH CARON 0x017E z # LATIN SMALL LETTER Z WITH CARON 0x017F s # LATIN SMALL LETTER LONG S # # Dunno where # 0x02C6 ^ # MODIFIER LETTER CIRCUMFLEX ACCENT 0x2010 - # HYPHEN 0x2011 - # NON-BREAKING HYPHEN 0x2012 - # FIGURE DASH 0x2013 - # EN DASH 0x2014 - # EM DASH 0x2015 - # HORIZONTAL BAR 0x2026 ... # HORIZONTAL ELLIPSIS 0x2122 _tm_ # TRADE MARK SIGN # # REGRESSION TESTING SECTION # 0xC0DE code # testing value 0xCAFE cafe # testing value 0xFACE face # testing value # 0xFACE punim # uncomment to break unit test # 0x4000 broken # uncomment to break unit test # # REGRESSION TESTING SECTION # end detox-2.0.0/tests/unit/test_clean_iso8859_1.c000066400000000000000000000057731460212773400207140ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_clean_iso8859_1.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_clean_iso8859_1.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "builtin_table.h" #include "clean_string.h" #include "detox_struct.h" #include "table.h" #include "unit_struct.h" #define DATA_COUNT 6 static struct test_filename data[DATA_COUNT] = { // legacy tests { .filename = "safe string", .expected_a = "safe string", .expected_b = "safe string", }, { .filename = "\xAE reg", .expected_a = "_reg_ reg", .expected_b = "_reg_ reg", }, { .filename = "\xA9 copy", .expected_a = "_copy_ copy", .expected_b = "_copy_ copy", }, { .filename = "\xC6 capital AE", .expected_a = "AE capital AE", .expected_b = "AE capital AE", }, { .filename = "\xDE capital thorn", .expected_a = "TH capital thorn", .expected_b = "TH capital thorn", }, // full coverage - test default translation rules { .filename = "\x80 not in table", .expected_a = "_ not in table", .expected_b = "\x80 not in table", }, }; START_TEST(test_clean_iso8859_1) { #line 62 table_t *table_a; table_t *table_b; char *output; int i; table_a = load_builtin_iso8859_1_table(); table_b = load_builtin_iso8859_1_table(); table_a->default_translation = strdup("_"); table_b->default_translation = NULL; // confirm that our a/b test is still valid ck_assert(table_get(table_b, 0x80) == NULL); for (i = 0; i < DATA_COUNT; i++) { output = clean_iso8859_1(data[i].filename, table_a); ck_assert_str_eq(output, data[i].expected_a); output = clean_iso8859_1(data[i].filename, table_b); ck_assert_str_eq(output, data[i].expected_b); } } END_TEST START_TEST(test_clean_iso8859_1_null) { #line 85 char *output; // confirm NULL works output = clean_iso8859_1(NULL, NULL); ck_assert(output == NULL); } END_TEST START_TEST(test_clean_iso8859_1_missing_table) { #line 92 clean_iso8859_1("what", NULL); } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; suite_add_tcase(s1, tc1_1); tcase_add_test(tc1_1, test_clean_iso8859_1); tcase_add_test(tc1_1, test_clean_iso8859_1_null); tcase_add_exit_test(tc1_1, test_clean_iso8859_1_missing_table, 1); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_clean_iso8859_1.template000066400000000000000000000043041460212773400222720ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "builtin_table.h" #include "clean_string.h" #include "detox_struct.h" #include "table.h" #include "unit_struct.h" #define DATA_COUNT 6 static struct test_filename data[DATA_COUNT] = { // legacy tests { .filename = "safe string", .expected_a = "safe string", .expected_b = "safe string", }, { .filename = "\xAE reg", .expected_a = "_reg_ reg", .expected_b = "_reg_ reg", }, { .filename = "\xA9 copy", .expected_a = "_copy_ copy", .expected_b = "_copy_ copy", }, { .filename = "\xC6 capital AE", .expected_a = "AE capital AE", .expected_b = "AE capital AE", }, { .filename = "\xDE capital thorn", .expected_a = "TH capital thorn", .expected_b = "TH capital thorn", }, // full coverage - test default translation rules { .filename = "\x80 not in table", .expected_a = "_ not in table", .expected_b = "\x80 not in table", }, }; #test test_clean_iso8859_1 table_t *table_a; table_t *table_b; char *output; int i; table_a = load_builtin_iso8859_1_table(); table_b = load_builtin_iso8859_1_table(); table_a->default_translation = strdup("_"); table_b->default_translation = NULL; // confirm that our a/b test is still valid ck_assert(table_get(table_b, 0x80) == NULL); for (i = 0; i < DATA_COUNT; i++) { output = clean_iso8859_1(data[i].filename, table_a); ck_assert_str_eq(output, data[i].expected_a); output = clean_iso8859_1(data[i].filename, table_b); ck_assert_str_eq(output, data[i].expected_b); } #test test_clean_iso8859_1_null char *output; // confirm NULL works output = clean_iso8859_1(NULL, NULL); ck_assert(output == NULL); #test-exit(1) test_clean_iso8859_1_missing_table clean_iso8859_1("what", NULL); detox-2.0.0/tests/unit/test_clean_lower.c000066400000000000000000000032761460212773400204700ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_clean_lower.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_clean_lower.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include // need defines first #include "config.h" #include "clean_string.h" #include "unit_struct.h" #define DATA_COUNT 4 static struct test_filename data[DATA_COUNT] = { // legacy tests { .filename = "lower", .expected = "lower" }, { .filename = "L0W3R", .expected = "l0w3r" }, { .filename = "UPPER", .expected = "upper" }, { .filename = "UPPer_2", .expected = "upper_2" }, }; START_TEST(test_clean_lower) { #line 32 char *output; int i; for (i = 0; i < DATA_COUNT; i++) { output = clean_lower(data[i].filename); ck_assert_str_eq(output, data[i].expected); } } END_TEST START_TEST(test_clean_lower_null) { #line 41 char *output; // confirm NULL works output = clean_lower(NULL); ck_assert(output == NULL); } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; suite_add_tcase(s1, tc1_1); tcase_add_test(tc1_1, test_clean_lower); tcase_add_test(tc1_1, test_clean_lower_null); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_clean_lower.template000066400000000000000000000017631460212773400220600ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include // need defines first #include "config.h" #include "clean_string.h" #include "unit_struct.h" #define DATA_COUNT 4 static struct test_filename data[DATA_COUNT] = { // legacy tests { .filename = "lower", .expected = "lower" }, { .filename = "L0W3R", .expected = "l0w3r" }, { .filename = "UPPER", .expected = "upper" }, { .filename = "UPPer_2", .expected = "upper_2" }, }; #test test_clean_lower char *output; int i; for (i = 0; i < DATA_COUNT; i++) { output = clean_lower(data[i].filename); ck_assert_str_eq(output, data[i].expected); } #test test_clean_lower_null char *output; // confirm NULL works output = clean_lower(NULL); ck_assert(output == NULL); detox-2.0.0/tests/unit/test_clean_max_length.c000066400000000000000000000076751460212773400214750ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_clean_max_length.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_clean_max_length.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "clean_string.h" #include "detox_struct.h" #include "unit_struct.h" #define DATA_COUNT 18 static struct test_filename data[DATA_COUNT] = { // legacy tests { .filename = "___________underscore_______________________.x", .expected = "___________underscore_________.x", .max_length = 32, }, { .filename = "..........how.....does.....this....translate......", .expected = "..........how.....does.....this.", .max_length = 32, }, { .filename = "_-_-_-_-_-dotted-_-_-_-_line.....part......two.......", .expected = "_-_-_-_-_-dotted-_-_-_-_line....", .max_length = 32, }, { .filename = "blah-.-de_._da", .expected = "blah-.-de_._da", .max_length = 32, }, // from the detoxrc.5 manpage { .filename = "this_is_my_file.txt", .expected = "this_is_.txt", .max_length = 12, }, // github issue 46 { .filename = "safe.txt", .expected = "safe.txt", .max_length = 32, }, { .filename = "safe.and.stu.ff.txt", .expected = "safe.an.ff.txt", .max_length = 14, }, { .filename = "safe and stuff.txt", .expected = "safe and.txt", .max_length = 12, }, { .filename = "safe and stuff.txt", .expected = "safe.txt", .max_length = 8, }, { .filename = "safe.tar.gz", .expected = "safe.tar.gz", .max_length = 32, }, { .filename = "safe and stuff.tar.gz", .expected = "safe and stuff.tar.gz", .max_length = 22, }, { .filename = "safe and stuff.tar.gz", .expected = "safe and stuff.tar.gz", .max_length = 21, }, { .filename = "safe and stuff.tar.gz", .expected = "safe and stuf.tar.gz", .max_length = 20, }, { .filename = "safe and stuff.tar.gz", .expected = "safe .tar.gz", .max_length = 12, }, { .filename = "safe and stuff.tar.gz", .expected = "sa.tar.gz", .max_length = 9, }, { .filename = "safe and stuff.tar.gz", .expected = "s.tar.gz", .max_length = 8, }, { .filename = "safe and stuff.tar.gz", .expected = "safe and stuff.tar.gz", .max_length = 7, }, { .filename = "safe and stuff.tar.gz", .expected = "safe and stuff.tar.gz", .max_length = 0, }, }; START_TEST(test_clean_max_length) { #line 135 char *output; int i; for (i = 0; i < DATA_COUNT; i++) { output = clean_max_length(data[i].filename, data[i].max_length); ck_assert_str_eq(output, data[i].expected); } } END_TEST START_TEST(test_clean_max_length_null) { #line 144 char *output; // confirm NULL works output = clean_max_length(NULL, 0); ck_assert(output == NULL); } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; suite_add_tcase(s1, tc1_1); tcase_add_test(tc1_1, test_clean_max_length); tcase_add_test(tc1_1, test_clean_max_length_null); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_clean_max_length.template000066400000000000000000000063341460212773400230550ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "clean_string.h" #include "detox_struct.h" #include "unit_struct.h" #define DATA_COUNT 18 static struct test_filename data[DATA_COUNT] = { // legacy tests { .filename = "___________underscore_______________________.x", .expected = "___________underscore_________.x", .max_length = 32, }, { .filename = "..........how.....does.....this....translate......", .expected = "..........how.....does.....this.", .max_length = 32, }, { .filename = "_-_-_-_-_-dotted-_-_-_-_line.....part......two.......", .expected = "_-_-_-_-_-dotted-_-_-_-_line....", .max_length = 32, }, { .filename = "blah-.-de_._da", .expected = "blah-.-de_._da", .max_length = 32, }, // from the detoxrc.5 manpage { .filename = "this_is_my_file.txt", .expected = "this_is_.txt", .max_length = 12, }, // github issue 46 { .filename = "safe.txt", .expected = "safe.txt", .max_length = 32, }, { .filename = "safe.and.stu.ff.txt", .expected = "safe.an.ff.txt", .max_length = 14, }, { .filename = "safe and stuff.txt", .expected = "safe and.txt", .max_length = 12, }, { .filename = "safe and stuff.txt", .expected = "safe.txt", .max_length = 8, }, { .filename = "safe.tar.gz", .expected = "safe.tar.gz", .max_length = 32, }, { .filename = "safe and stuff.tar.gz", .expected = "safe and stuff.tar.gz", .max_length = 22, }, { .filename = "safe and stuff.tar.gz", .expected = "safe and stuff.tar.gz", .max_length = 21, }, { .filename = "safe and stuff.tar.gz", .expected = "safe and stuf.tar.gz", .max_length = 20, }, { .filename = "safe and stuff.tar.gz", .expected = "safe .tar.gz", .max_length = 12, }, { .filename = "safe and stuff.tar.gz", .expected = "sa.tar.gz", .max_length = 9, }, { .filename = "safe and stuff.tar.gz", .expected = "s.tar.gz", .max_length = 8, }, { .filename = "safe and stuff.tar.gz", .expected = "safe and stuff.tar.gz", .max_length = 7, }, { .filename = "safe and stuff.tar.gz", .expected = "safe and stuff.tar.gz", .max_length = 0, }, }; #test test_clean_max_length char *output; int i; for (i = 0; i < DATA_COUNT; i++) { output = clean_max_length(data[i].filename, data[i].max_length); ck_assert_str_eq(output, data[i].expected); } #test test_clean_max_length_null char *output; // confirm NULL works output = clean_max_length(NULL, 0); ck_assert(output == NULL); detox-2.0.0/tests/unit/test_clean_safe.c000066400000000000000000000143111460212773400202460ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_clean_safe.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_clean_safe.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "builtin_table.h" #include "clean_string.h" #include "detox_struct.h" #include "table.h" #include "unit_struct.h" #define DATA_COUNT 46 static struct test_filename data[DATA_COUNT] = { // legacy tests { .filename = "lower", .expected_a = "lower" }, { .filename = "^acute", .expected_a = "^acute" }, { .filename = "&ersand", .expected_a = "_and_ampersand" }, { .filename = " * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "builtin_table.h" #include "clean_string.h" #include "detox_struct.h" #include "table.h" #include "unit_struct.h" #define DATA_COUNT 46 static struct test_filename data[DATA_COUNT] = { // legacy tests { .filename = "lower", .expected_a = "lower" }, { .filename = "^acute", .expected_a = "^acute" }, { .filename = "&ersand", .expected_a = "_and_ampersand" }, { .filename = " #line 1 "test_clean_uncgi.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include "clean_string.h" #include "unit_struct.h" #define DATA_COUNT 10 static struct test_filename data[DATA_COUNT] = { // legacy tests { .filename = "%3Dequals", .expected = "=equals" }, { .filename = "%25percent", .expected = "%percent" }, { .filename = "%3Fquestion%20mark", .expected = "?question mark" }, { .filename = "%2bplus", .expected = "+plus" }, { .filename = "%2ccomma", .expected = ",comma" }, { .filename = "%2Fslash", .expected = "/slash" }, // full coverage { .filename = "lower", .expected = "lower" }, { .filename = "%20space", .expected = " space" }, { .filename = "UPPER%3ALOWER", .expected = "UPPER:LOWER" }, { .filename = "here+and+there", .expected = "here and there" }, }; START_TEST(test_clean_uncgi) { #line 38 char *output; int i; for (i = 0; i < DATA_COUNT; i++) { output = clean_uncgi(data[i].filename); ck_assert_str_eq(output, data[i].expected); } } END_TEST START_TEST(test_clean_uncgi_null) { #line 47 char *output; // confirm NULL works output = clean_uncgi(NULL); ck_assert(output == NULL); } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; suite_add_tcase(s1, tc1_1); tcase_add_test(tc1_1, test_clean_uncgi); tcase_add_test(tc1_1, test_clean_uncgi_null); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_clean_uncgi.template000066400000000000000000000026301460212773400220270ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include "clean_string.h" #include "unit_struct.h" #define DATA_COUNT 10 static struct test_filename data[DATA_COUNT] = { // legacy tests { .filename = "%3Dequals", .expected = "=equals" }, { .filename = "%25percent", .expected = "%percent" }, { .filename = "%3Fquestion%20mark", .expected = "?question mark" }, { .filename = "%2bplus", .expected = "+plus" }, { .filename = "%2ccomma", .expected = ",comma" }, { .filename = "%2Fslash", .expected = "/slash" }, // full coverage { .filename = "lower", .expected = "lower" }, { .filename = "%20space", .expected = " space" }, { .filename = "UPPER%3ALOWER", .expected = "UPPER:LOWER" }, { .filename = "here+and+there", .expected = "here and there" }, }; #test test_clean_uncgi char *output; int i; for (i = 0; i < DATA_COUNT; i++) { output = clean_uncgi(data[i].filename); ck_assert_str_eq(output, data[i].expected); } #test test_clean_uncgi_null char *output; // confirm NULL works output = clean_uncgi(NULL); ck_assert(output == NULL); detox-2.0.0/tests/unit/test_clean_utf_8.c000066400000000000000000000164011460212773400203570ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_clean_utf_8.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_clean_utf_8.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "builtin_table.h" #include "clean_utf_8.h" #include "detox_struct.h" #include "table.h" #include "unit_struct.h" #define DATA_COUNT 19 static struct test_filename data[DATA_COUNT] = { // legacy { .filename = "safe string", .expected_a = "safe string", .expected_b = "safe string", }, { .filename = "\u00AE reg", .expected_a = "_reg_ reg", .expected_b = "_reg_ reg", }, { .filename = "\u00A9 copy", .expected_a = "_copy_ copy", .expected_b = "_copy_ copy", }, { .filename = "\u00C6 capital AE", .expected_a = "AE capital AE", .expected_b = "AE capital AE", }, { .filename = "\u00DE capital thorn", .expected_a = "TH capital thorn", .expected_b = "TH capital thorn", }, { .filename = "\u014A capital ENG", .expected_a = "NG capital ENG", .expected_b = "NG capital ENG", }, { .filename = "\u0172 capital U with Ogonek", .expected_a = "U capital U with Ogonek", .expected_b = "U capital U with Ogonek", }, // github issue 40 // + full coverage - test default translation rules { .filename = "_ () whatever", .expected_a = "_ () whatever", .expected_b = "_ () whatever", }, { .filename = "\u4000 ignore", .expected_a = "_ ignore", .expected_b = "\u4000 ignore", }, { .filename = "\x7F delete", .expected_a = "_ delete", .expected_b = "\x7F delete", }, // 2 byte UTF-8 encoded 0x00 (null) { .filename = "\xC0\x80 null", .expected_a = "_hidden_null_ null", .expected_b = "_hidden_null_ null", }, // 2 byte UTF-8 encoded 0x20 (space) { .filename = "\xC0\xA0 space", .expected_a = " space", .expected_b = " space", }, // 2 byte UTF-8 encoded 0x30 (zero) { .filename = "\xC0\xB0 zero", .expected_a = "0 zero", .expected_b = "0 zero", }, // 2 byte UTF-8 encoded 0x7E (tilde) { .filename = "\xC1\xBE tilde", .expected_a = "~ tilde", .expected_b = "~ tilde", }, // 2 byte UTF-8 encoded 0x7F (tilde) { .filename = "\xC1\xBF delete", .expected_a = "_ delete", .expected_b = "\xC1\xBF delete", }, // github issue 33 { .filename = "\U00010348 hwair", .expected_a = "hu hwair", .expected_b = "hu hwair", }, // confirm invalid single-byte character sequence { .filename = "\x80 delete", .expected_a = "_ delete", .expected_b = "_ delete", }, // phishing { .filename = "\u03A1a\u200E\u200EyPa\u1963\u200E : Y\u200E\u200Eo\u1959r \u200Ea\u1974\u1974o\u200E\u1959\u1952t \u200Eis \u1974\u1959\u200E\u200Err\u1971\u200E\u200E\u1952t\u1963y\u200E \u1963o\u200E\u200E\u1974k\u1971d\u200E\u200E for \u200Es\u1971\u1974\u1959\u200E\u200Erity \u200Er\u1971as\u200E\u200Eo\u1952s. P\u200E\u200E\u1963\u1971a\u200Es\u1971 \u1974\u1963\u200E\u200Ei\u1974k \u200E\u200Eo\u1952 t\u200Eh\u1971 \u1963i\u200E\u200E\u1952k b\u1971\u200E\u200E\u1963ow t\u200Eo \u1959\u1952\u1963\u200E\u200Eo\u1974k \u200Eyo\u200E\u1959r a\u200E\u200E\u1974\u1974o\u200E\u1959\u1952t", .expected_a = "PayPal : Your account is currently locked for security reasons. Please click on the link below to unlock your account", .expected_b = "PayPal : Your account is currently locked for security reasons. Please click on the link below to unlock your account", }, { .filename = "C\u200E\u200EAS\u200EE \u200EID :\u200E E", .expected_a = "CASE ID : E", .expected_b = "CASE ID : E", }, }; START_TEST(test_clean_utf_8) { #line 147 table_t *table_a; table_t *table_b; char *output; int i; table_a = load_builtin_unicode_table(); table_b = load_builtin_unicode_table(); table_a->default_translation = strdup("_"); table_b->default_translation = NULL; // confirm that our a/b test is still valid ck_assert(table_get(table_b, 0x4000) == NULL); ck_assert(table_get(table_b, 0x007F) == NULL); // legacy tests for (i = 0; i < DATA_COUNT; i++) { output = clean_utf_8(data[i].filename, table_a); ck_assert_str_eq(output, data[i].expected_a); output = clean_utf_8(data[i].filename, table_b); ck_assert_str_eq(output, data[i].expected_b); } } END_TEST START_TEST(test_clean_utf_8_null) { #line 172 char *output; // confirm NULL works output = clean_utf_8(NULL, NULL); ck_assert(output == NULL); } END_TEST START_TEST(test_clean_utf_8_missing_table) { #line 179 clean_utf_8("what", NULL); } END_TEST START_TEST(test_clean_utf_8_invalid) { #line 182 table_t *table; char *output; table = load_builtin_unicode_table(); table->default_translation = strdup("_"); // test an invalid UTF-8 sequence // the cleaner should replace the invalid sequence with an underscore output = clean_utf_8("\xC0" "blah", table); ck_assert_str_eq(output, "_blah"); output = clean_utf_8("blah" "\xC0", table); ck_assert_str_eq(output, "blah_"); } END_TEST START_TEST(test_clean_utf_8_beyond_unicode_max) { #line 198 table_t *table; char *output; table = load_builtin_unicode_table(); table->default_translation = strdup("_"); // 3-byte encoding a lower ASCII "6" output = clean_utf_8("\xE0\x80\xB6" "blah", table); ck_assert_str_eq(output, "6blah"); // 4-byte encoding a lower ASCII "6" output = clean_utf_8("\xF0\x80\x80\xB6" "blah", table); ck_assert_str_eq(output, "6blah"); // 5-byte encoding a lower ASCII "6" output = clean_utf_8("\xF8\x80\x80\x80\xB6" "blah", table); ck_assert_str_eq(output, "6blah"); // 6-byte encoding a lower ASCII "6" output = clean_utf_8("\xF8\x80\x80\x80\xB6" "blah", table); ck_assert_str_eq(output, "6blah"); // 6-byte max on Ubuntu 20.04 // Unicode 0x7FFFFFFF output = clean_utf_8("\xFD\xBF\xBF\xBF\xBF\xBF" "blah", table); ck_assert_str_eq(output, "_blah"); } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; suite_add_tcase(s1, tc1_1); tcase_add_test(tc1_1, test_clean_utf_8); tcase_add_test(tc1_1, test_clean_utf_8_null); tcase_add_exit_test(tc1_1, test_clean_utf_8_missing_table, 1); tcase_add_test(tc1_1, test_clean_utf_8_invalid); tcase_add_test(tc1_1, test_clean_utf_8_beyond_unicode_max); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_clean_utf_8.template000066400000000000000000000144521460212773400217540ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "builtin_table.h" #include "clean_utf_8.h" #include "detox_struct.h" #include "table.h" #include "unit_struct.h" #define DATA_COUNT 19 static struct test_filename data[DATA_COUNT] = { // legacy { .filename = "safe string", .expected_a = "safe string", .expected_b = "safe string", }, { .filename = "\u00AE reg", .expected_a = "_reg_ reg", .expected_b = "_reg_ reg", }, { .filename = "\u00A9 copy", .expected_a = "_copy_ copy", .expected_b = "_copy_ copy", }, { .filename = "\u00C6 capital AE", .expected_a = "AE capital AE", .expected_b = "AE capital AE", }, { .filename = "\u00DE capital thorn", .expected_a = "TH capital thorn", .expected_b = "TH capital thorn", }, { .filename = "\u014A capital ENG", .expected_a = "NG capital ENG", .expected_b = "NG capital ENG", }, { .filename = "\u0172 capital U with Ogonek", .expected_a = "U capital U with Ogonek", .expected_b = "U capital U with Ogonek", }, // github issue 40 // + full coverage - test default translation rules { .filename = "_ () whatever", .expected_a = "_ () whatever", .expected_b = "_ () whatever", }, { .filename = "\u4000 ignore", .expected_a = "_ ignore", .expected_b = "\u4000 ignore", }, { .filename = "\x7F delete", .expected_a = "_ delete", .expected_b = "\x7F delete", }, // 2 byte UTF-8 encoded 0x00 (null) { .filename = "\xC0\x80 null", .expected_a = "_hidden_null_ null", .expected_b = "_hidden_null_ null", }, // 2 byte UTF-8 encoded 0x20 (space) { .filename = "\xC0\xA0 space", .expected_a = " space", .expected_b = " space", }, // 2 byte UTF-8 encoded 0x30 (zero) { .filename = "\xC0\xB0 zero", .expected_a = "0 zero", .expected_b = "0 zero", }, // 2 byte UTF-8 encoded 0x7E (tilde) { .filename = "\xC1\xBE tilde", .expected_a = "~ tilde", .expected_b = "~ tilde", }, // 2 byte UTF-8 encoded 0x7F (tilde) { .filename = "\xC1\xBF delete", .expected_a = "_ delete", .expected_b = "\xC1\xBF delete", }, // github issue 33 { .filename = "\U00010348 hwair", .expected_a = "hu hwair", .expected_b = "hu hwair", }, // confirm invalid single-byte character sequence { .filename = "\x80 delete", .expected_a = "_ delete", .expected_b = "_ delete", }, // phishing { .filename = "\u03A1a\u200E\u200EyPa\u1963\u200E : Y\u200E\u200Eo\u1959r \u200Ea\u1974\u1974o\u200E\u1959\u1952t \u200Eis \u1974\u1959\u200E\u200Err\u1971\u200E\u200E\u1952t\u1963y\u200E \u1963o\u200E\u200E\u1974k\u1971d\u200E\u200E for \u200Es\u1971\u1974\u1959\u200E\u200Erity \u200Er\u1971as\u200E\u200Eo\u1952s. P\u200E\u200E\u1963\u1971a\u200Es\u1971 \u1974\u1963\u200E\u200Ei\u1974k \u200E\u200Eo\u1952 t\u200Eh\u1971 \u1963i\u200E\u200E\u1952k b\u1971\u200E\u200E\u1963ow t\u200Eo \u1959\u1952\u1963\u200E\u200Eo\u1974k \u200Eyo\u200E\u1959r a\u200E\u200E\u1974\u1974o\u200E\u1959\u1952t", .expected_a = "PayPal : Your account is currently locked for security reasons. Please click on the link below to unlock your account", .expected_b = "PayPal : Your account is currently locked for security reasons. Please click on the link below to unlock your account", }, { .filename = "C\u200E\u200EAS\u200EE \u200EID :\u200E E", .expected_a = "CASE ID : E", .expected_b = "CASE ID : E", }, }; #test test_clean_utf_8 table_t *table_a; table_t *table_b; char *output; int i; table_a = load_builtin_unicode_table(); table_b = load_builtin_unicode_table(); table_a->default_translation = strdup("_"); table_b->default_translation = NULL; // confirm that our a/b test is still valid ck_assert(table_get(table_b, 0x4000) == NULL); ck_assert(table_get(table_b, 0x007F) == NULL); // legacy tests for (i = 0; i < DATA_COUNT; i++) { output = clean_utf_8(data[i].filename, table_a); ck_assert_str_eq(output, data[i].expected_a); output = clean_utf_8(data[i].filename, table_b); ck_assert_str_eq(output, data[i].expected_b); } #test test_clean_utf_8_null char *output; // confirm NULL works output = clean_utf_8(NULL, NULL); ck_assert(output == NULL); #test-exit(1) test_clean_utf_8_missing_table clean_utf_8("what", NULL); #test test_clean_utf_8_invalid table_t *table; char *output; table = load_builtin_unicode_table(); table->default_translation = strdup("_"); // test an invalid UTF-8 sequence // the cleaner should replace the invalid sequence with an underscore output = clean_utf_8("\xC0" "blah", table); ck_assert_str_eq(output, "_blah"); output = clean_utf_8("blah" "\xC0", table); ck_assert_str_eq(output, "blah_"); #test test_clean_utf_8_beyond_unicode_max table_t *table; char *output; table = load_builtin_unicode_table(); table->default_translation = strdup("_"); // 3-byte encoding a lower ASCII "6" output = clean_utf_8("\xE0\x80\xB6" "blah", table); ck_assert_str_eq(output, "6blah"); // 4-byte encoding a lower ASCII "6" output = clean_utf_8("\xF0\x80\x80\xB6" "blah", table); ck_assert_str_eq(output, "6blah"); // 5-byte encoding a lower ASCII "6" output = clean_utf_8("\xF8\x80\x80\x80\xB6" "blah", table); ck_assert_str_eq(output, "6blah"); // 6-byte encoding a lower ASCII "6" output = clean_utf_8("\xF8\x80\x80\x80\xB6" "blah", table); ck_assert_str_eq(output, "6blah"); // 6-byte max on Ubuntu 20.04 // Unicode 0x7FFFFFFF output = clean_utf_8("\xFD\xBF\xBF\xBF\xBF\xBF" "blah", table); ck_assert_str_eq(output, "_blah"); detox-2.0.0/tests/unit/test_clean_wipeup.c000066400000000000000000000066551460212773400206550ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_clean_wipeup.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_clean_wipeup.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "clean_string.h" #include "detox_struct.h" #include "unit_struct.h" #define DATA_COUNT 11 static struct test_filename data[DATA_COUNT] = { { .filename = "-#dash_octothorpe-", .expected_a = "dash_octothorpe-", .expected_b = "dash_octothorpe-", }, { .filename = "-_dash_underscore-", .expected_a = "dash_underscore-", .expected_b = "dash_underscore-", }, { .filename = "ends with octothorpe#", .expected_a = "ends with octothorpe#", .expected_b = "ends with octothorpe#", }, { .filename = "#octothorpe", .expected_a = "octothorpe", .expected_b = "octothorpe", }, { .filename = "#-octothorpe_dash", .expected_a = "octothorpe_dash", .expected_b = "octothorpe_dash", }, { .filename = "_-underscore_dash", .expected_a = "underscore_dash", .expected_b = "underscore_dash", }, { .filename = "_underscore", .expected_a = "underscore", .expected_b = "underscore", }, { .filename = "___________underscore_______________________.x", .expected_a = "underscore_.x", .expected_b = "underscore.x", }, { .filename = "..........how.....does.....this....translate......", .expected_a = "..........how.....does.....this....translate......", .expected_b = ".how.does.this.translate.", }, { .filename = "_-_-_-_-_-dotted-_-_-_-_line.....part......two.......", .expected_a = "dotted-line.....part......two.......", .expected_b = "dotted-line.part.two.", }, { .filename = "blah-.-de_._da", .expected_a = "blah-.-de_._da", .expected_b = "blah.de.da", }, }; START_TEST(test_clean_wipeup) { #line 79 char *output; int i; // legacy tests for (i = 0; i < DATA_COUNT; i++) { output = clean_wipeup(data[i].filename, 0); ck_assert_str_eq(output, data[i].expected_a); } } END_TEST START_TEST(test_clean_wipeup_null) { #line 89 char *output; // confirm NULL works output = clean_wipeup(NULL, 0); ck_assert(output == NULL); } END_TEST START_TEST(test_clean_wipeuprt) { #line 96 char *output; int i; // legacy tests for (i = 0; i < DATA_COUNT; i++) { output = clean_wipeup(data[i].filename, 1); ck_assert_str_eq(output, data[i].expected_b); } } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; suite_add_tcase(s1, tc1_1); tcase_add_test(tc1_1, test_clean_wipeup); tcase_add_test(tc1_1, test_clean_wipeup_null); tcase_add_test(tc1_1, test_clean_wipeuprt); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_clean_wipeup.template000066400000000000000000000052211460212773400222320ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "clean_string.h" #include "detox_struct.h" #include "unit_struct.h" #define DATA_COUNT 11 static struct test_filename data[DATA_COUNT] = { { .filename = "-#dash_octothorpe-", .expected_a = "dash_octothorpe-", .expected_b = "dash_octothorpe-", }, { .filename = "-_dash_underscore-", .expected_a = "dash_underscore-", .expected_b = "dash_underscore-", }, { .filename = "ends with octothorpe#", .expected_a = "ends with octothorpe#", .expected_b = "ends with octothorpe#", }, { .filename = "#octothorpe", .expected_a = "octothorpe", .expected_b = "octothorpe", }, { .filename = "#-octothorpe_dash", .expected_a = "octothorpe_dash", .expected_b = "octothorpe_dash", }, { .filename = "_-underscore_dash", .expected_a = "underscore_dash", .expected_b = "underscore_dash", }, { .filename = "_underscore", .expected_a = "underscore", .expected_b = "underscore", }, { .filename = "___________underscore_______________________.x", .expected_a = "underscore_.x", .expected_b = "underscore.x", }, { .filename = "..........how.....does.....this....translate......", .expected_a = "..........how.....does.....this....translate......", .expected_b = ".how.does.this.translate.", }, { .filename = "_-_-_-_-_-dotted-_-_-_-_line.....part......two.......", .expected_a = "dotted-line.....part......two.......", .expected_b = "dotted-line.part.two.", }, { .filename = "blah-.-de_._da", .expected_a = "blah-.-de_._da", .expected_b = "blah.de.da", }, }; #test test_clean_wipeup char *output; int i; // legacy tests for (i = 0; i < DATA_COUNT; i++) { output = clean_wipeup(data[i].filename, 0); ck_assert_str_eq(output, data[i].expected_a); } #test test_clean_wipeup_null char *output; // confirm NULL works output = clean_wipeup(NULL, 0); ck_assert(output == NULL); #test test_clean_wipeuprt char *output; int i; // legacy tests for (i = 0; i < DATA_COUNT; i++) { output = clean_wipeup(data[i].filename, 1); ck_assert_str_eq(output, data[i].expected_b); } detox-2.0.0/tests/unit/test_filelist.c000066400000000000000000000067141460212773400200110ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_filelist.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_filelist.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "filelist.h" #define STRLEN 128 struct test_definition { int pass; int count; }; static struct test_definition tests[8] = { { .pass = 1, .count = 1 }, { .pass = 2, .count = 2 }, { .pass = 3, .count = 15 }, { .pass = 4, .count = 16 }, { .pass = 5, .count = 17 }, { .pass = 6, .count = 1024 }, { .pass = 7, .count = 100000 }, { .pass = -1 } }; START_TEST(test_filelist) { #line 35 char *work; filelist_t *test_list; int i; int j; char *check, *last; work = alloca(STRLEN); last = NULL; for (i = 0; tests[i].pass != -1; i++) { test_list = filelist_init(); for (j = 0; j < tests[i].count; j++) { snprintf(work, STRLEN, "test-%04d", j); filelist_put(test_list, work); } ck_assert_int_eq(filelist_count(test_list), tests[i].count); while ((check = filelist_get(test_list))) { last = check; } ck_assert_str_eq(last, work); // walk the pointer back to the beginning ck_assert_str_eq(filelist_get(test_list), "test-0000"); if (tests[i].count > 1) { ck_assert_str_eq(filelist_get(test_list), "test-0001"); } filelist_reset(test_list); // walk the pointer back to the beginning ck_assert_str_eq(filelist_get(test_list), "test-0000"); ck_assert_int_eq(filelist_count(test_list), tests[i].count); filelist_free(test_list); } } END_TEST START_TEST(test_filelist_get_null) { #line 77 filelist_get(NULL); ck_assert_int_eq(filelist_count(NULL), 0); } END_TEST START_TEST(test_filelist_get_empty) { #line 81 char *work; filelist_t *test_list; test_list = filelist_init(); work = filelist_get(test_list); ck_assert(work == NULL); ck_assert_int_eq(filelist_count(test_list), 0); filelist_free(test_list); } END_TEST START_TEST(test_filelist_get_one) { #line 95 filelist_t *test_list; test_list = filelist_init(); filelist_put(test_list, "whatever"); ck_assert_str_eq(filelist_get(test_list), "whatever"); ck_assert(filelist_get(test_list) == NULL); ck_assert_str_eq(filelist_get(test_list), "whatever"); ck_assert_int_eq(filelist_count(test_list), 1); filelist_free(test_list); } END_TEST START_TEST(test_filelist_put_null) { #line 112 filelist_put(NULL, NULL); } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; suite_add_tcase(s1, tc1_1); tcase_add_test(tc1_1, test_filelist); tcase_add_test(tc1_1, test_filelist_get_null); tcase_add_test(tc1_1, test_filelist_get_empty); tcase_add_test(tc1_1, test_filelist_get_one); tcase_add_exit_test(tc1_1, test_filelist_put_null, 1); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_filelist.template000066400000000000000000000050301460212773400213700ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "filelist.h" #define STRLEN 128 struct test_definition { int pass; int count; }; static struct test_definition tests[8] = { { .pass = 1, .count = 1 }, { .pass = 2, .count = 2 }, { .pass = 3, .count = 15 }, { .pass = 4, .count = 16 }, { .pass = 5, .count = 17 }, { .pass = 6, .count = 1024 }, { .pass = 7, .count = 100000 }, { .pass = -1 } }; #test test_filelist char *work; filelist_t *test_list; int i; int j; char *check, *last; work = alloca(STRLEN); last = NULL; for (i = 0; tests[i].pass != -1; i++) { test_list = filelist_init(); for (j = 0; j < tests[i].count; j++) { snprintf(work, STRLEN, "test-%04d", j); filelist_put(test_list, work); } ck_assert_int_eq(filelist_count(test_list), tests[i].count); while ((check = filelist_get(test_list))) { last = check; } ck_assert_str_eq(last, work); // walk the pointer back to the beginning ck_assert_str_eq(filelist_get(test_list), "test-0000"); if (tests[i].count > 1) { ck_assert_str_eq(filelist_get(test_list), "test-0001"); } filelist_reset(test_list); // walk the pointer back to the beginning ck_assert_str_eq(filelist_get(test_list), "test-0000"); ck_assert_int_eq(filelist_count(test_list), tests[i].count); filelist_free(test_list); } #test test_filelist_get_null filelist_get(NULL); ck_assert_int_eq(filelist_count(NULL), 0); #test test_filelist_get_empty char *work; filelist_t *test_list; test_list = filelist_init(); work = filelist_get(test_list); ck_assert(work == NULL); ck_assert_int_eq(filelist_count(test_list), 0); filelist_free(test_list); #test test_filelist_get_one filelist_t *test_list; test_list = filelist_init(); filelist_put(test_list, "whatever"); ck_assert_str_eq(filelist_get(test_list), "whatever"); ck_assert(filelist_get(test_list) == NULL); ck_assert_str_eq(filelist_get(test_list), "whatever"); ck_assert_int_eq(filelist_count(test_list), 1); filelist_free(test_list); #test-exit(1) test_filelist_put_null filelist_put(NULL, NULL); detox-2.0.0/tests/unit/test_parse_inline.c000066400000000000000000000252101460212773400206360ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_parse_inline.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_parse_inline.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "config.h" #include #include #include #include #include "builtin_table.h" #include "detox_struct.h" #include "file.h" #include "filter.h" #include "parse_options.h" #include "sequence.h" #include "table.h" #include "wrapped.h" #define LOCAL_PADDING 32 struct test_values { int table_value; char *table_entry; char *input; char *expected; }; #define DATA_COUNT 9 static struct test_values data[DATA_COUNT] = { // 4 byte UTF-8 { .table_value = 0x1F37A, .table_entry = "_root_beer_", .input = "\U0001F37A", .expected = "_root_beer_", }, // 3 byte UTF-8 { .table_value = 0x2021, .table_entry = "_double_dagger_", .input = "\u2021", .expected = "_double_dagger_", }, // 2 byte UTF-8 { .table_value = 0x00AE, .table_entry = "(r)", .input = "\u00AE", .expected = "(r)", }, // invalid 4 byte UTF-8 { .table_value = 0x0001, .table_entry = "never...", .input = "\xF0" "\x9F" "\x8D", .expected = "_", }, // invalid 4 byte UTF-8 with more text { .table_value = 0x0002, .table_entry = "never...", .input = "\xF0" "\x9F" "\x8D" "whatever", .expected = "_whatever", }, // invalid 3 byte UTF-8 { .table_value = 0x0003, .table_entry = "never...", .input = "\xE2" "\x80", .expected = "_", }, // invalid 3 byte UTF-8 with more text { .table_value = 0x0004, .table_entry = "never...", .input = "\xE2" "\x80" "whatever", .expected = "_whatever", }, // invalid 2 byte UTF-8 { .table_value = 0x0005, .table_entry = "never...", .input = "\xC0", .expected = "_", }, // invalid 2 byte UTF-8 with more text { .table_value = 0x0006, .table_entry = "never...", .input = "\xC0" "whatever", .expected = "_whatever", }, }; #define BAD_FILES_DATA_COUNT 6 static struct test_values bad_files_data[BAD_FILES_DATA_COUNT] = { { .input = "..", .expected = "..", }, { .input = ".git", .expected = ".git", }, { .input = ".UPPER", .expected = ".upper", }, { .input = "not.UPPER", .expected = "not.upper", }, { .input = ".wipeup------no", .expected = ".wipeup-no", }, { .input = "wipeup------yes", .expected = "wipeup-yes", }, }; START_TEST(test_parse_inline) { #line 154 FILE *in_fp; FILE *out_fp; options_t *options; sequence_t *sequence; filter_t *filter; char *expected; char *input; char *input_filename; char *output; char *output_filename; char *walk; int buffer_size; int end; int start; int use_newline; int x_length; int half_x_length; int i; // options = options_init(); options->sequence_to_use = sequence = sequence_init("utf_8-and-lower"); sequence->source_filename = wrapped_strdup(__FILE__); sequence->filters = filter = filter_init(FILTER_UTF_8); filter->builtin = wrapped_strdup("safe"); filter->table = load_builtin_safe_table(); for (i = 0; i < DATA_COUNT; i++) { // table_put(filter->table, 0x0001F37A, "_root_beer_"); table_put(filter->table, data[i].table_value, data[i].table_entry); } filter->next = filter_init(FILTER_LOWER); // start = INLINE_BUF_SIZE - (INLINE_BUF_PADDING * 2); end = INLINE_BUF_SIZE + (INLINE_BUF_PADDING * 2); buffer_size = end + LOCAL_PADDING; expected = wrapped_malloc(buffer_size); input = wrapped_malloc(buffer_size); output = wrapped_malloc(buffer_size); input_filename = wrapped_malloc(buffer_size); output_filename = wrapped_malloc(buffer_size); for (i = 0; i < DATA_COUNT; i++) { for (x_length = start; x_length < end; x_length++) { for (use_newline = 0; use_newline < 2; use_newline++) { #ifdef DEBUG fprintf(stderr, "CHECKING VALUE: 0x%04X, LENGTH: %d, NEW LINE: %s\n", data[i].table_value, x_length, use_newline ? "yes" : "no"); #endif // prep sprintf(input_filename, "/tmp/detoxtest-in-%d-%d-XXXXXX", x_length, use_newline); sprintf(output_filename, "/tmp/detoxtest-out-%d-%d-XXXXXX", x_length, use_newline); mktemp(input_filename); mktemp(output_filename); #ifdef DEBUG fprintf(stderr, "in: %s, out: %s\n", input_filename, output_filename); #endif // wipe memory memset(expected, 0, buffer_size); memset(input, 0, buffer_size); memset(output, 0, buffer_size); // --------------------------------------------------------- // BUILD INPUT // --------------------------------------------------------- // set input to string filled with the same character, "x". // the length is x_length. memset(input, 'x', x_length); // stick a bunch of uppercase Xs in the middle half_x_length = (int)(x_length / 2); memset(input + (int)(half_x_length / 2), 'X', half_x_length); // at the end of the input, add a 4-byte UTF-8 character walk = input + x_length; sprintf(walk, "%s%s", data[i].input, use_newline ? "\n" : ""); // --------------------------------------------------------- // BUILD EXPECTED // --------------------------------------------------------- // set expected to string filled with the same character, "x". // the length is x_length. memset(expected, 'x', x_length); // at the end of the expected, add the translated character walk = expected + x_length; sprintf(walk, "%s%s", data[i].expected, use_newline ? "\n" : ""); // --------------------------------------------------------- // write the string to a file in_fp = fopen(input_filename, "w"); fputs(input, in_fp); fclose(in_fp); // pass the file to parse_inline parse_inline(input_filename, output_filename, options); // read the output file out_fp = fopen(output_filename, "r"); if (fgets(output, buffer_size, out_fp) == NULL) { ck_abort_msg("fgets() failed"); } fclose(out_fp); // compare ck_assert_str_eq(expected, output); // cleanup unlink(output_filename); unlink(input_filename); } } } } END_TEST START_TEST(test_parse_inline_protected_file) { #line 298 FILE *in_fp; FILE *out_fp; options_t *options; sequence_t *sequence; filter_t *filter; char *expected; char *input; char *input_filename; char *output; char *output_filename; char *walk; int buffer_size; int i; // options = options_init(); options->sequence_to_use = sequence = sequence_init("wipeup-lower"); sequence->source_filename = wrapped_strdup(__FILE__); sequence->filters = filter = filter_init(FILTER_WIPEUP); filter->remove_trailing = 1; filter->next = filter_init(FILTER_LOWER); // buffer_size = INLINE_BUF_SIZE * 2; expected = wrapped_malloc(buffer_size); input = wrapped_malloc(buffer_size); output = wrapped_malloc(buffer_size); input_filename = wrapped_malloc(buffer_size); output_filename = wrapped_malloc(buffer_size); // prep sprintf(input_filename, "/tmp/detoxtest-in--XXXXXX"); sprintf(output_filename, "/tmp/detoxtest-out-XXXXXX"); mktemp(input_filename); mktemp(output_filename); #ifdef DEBUG fprintf(stderr, "in: %s, out: %s\n", input_filename, output_filename); #endif // wipe memory memset(expected, 0, buffer_size); memset(input, 0, buffer_size); memset(output, 0, buffer_size); // --------------------------------------------------------- // BUILD INPUT // --------------------------------------------------------- walk = input; for (i = 0; i < BAD_FILES_DATA_COUNT; i++) { sprintf(walk, "%s\n", bad_files_data[i].input); walk = strchr(walk, '\0'); } // --------------------------------------------------------- // BUILD EXPECTED // --------------------------------------------------------- walk = expected; for (i = 0; i < BAD_FILES_DATA_COUNT; i++) { sprintf(walk, "%s\n", bad_files_data[i].expected); walk = strchr(walk, '\0'); } // --------------------------------------------------------- // write the string to a file in_fp = fopen(input_filename, "w"); fputs(input, in_fp); fclose(in_fp); // pass the file to parse_inline parse_inline(input_filename, output_filename, options); // read the output file out_fp = fopen(output_filename, "r"); walk = output; while (fgets(walk, buffer_size, out_fp) != NULL) { walk += strlen(walk); } fclose(out_fp); // compare ck_assert_str_eq(expected, output); // cleanup unlink(output_filename); unlink(input_filename); } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; suite_add_tcase(s1, tc1_1); tcase_add_test(tc1_1, test_parse_inline); tcase_add_test(tc1_1, test_parse_inline_protected_file); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_parse_inline.template000066400000000000000000000236551460212773400222420ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "config.h" #include #include #include #include #include "builtin_table.h" #include "detox_struct.h" #include "file.h" #include "filter.h" #include "parse_options.h" #include "sequence.h" #include "table.h" #include "wrapped.h" #define LOCAL_PADDING 32 struct test_values { int table_value; char *table_entry; char *input; char *expected; }; #define DATA_COUNT 9 static struct test_values data[DATA_COUNT] = { // 4 byte UTF-8 { .table_value = 0x1F37A, .table_entry = "_root_beer_", .input = "\U0001F37A", .expected = "_root_beer_", }, // 3 byte UTF-8 { .table_value = 0x2021, .table_entry = "_double_dagger_", .input = "\u2021", .expected = "_double_dagger_", }, // 2 byte UTF-8 { .table_value = 0x00AE, .table_entry = "(r)", .input = "\u00AE", .expected = "(r)", }, // invalid 4 byte UTF-8 { .table_value = 0x0001, .table_entry = "never...", .input = "\xF0" "\x9F" "\x8D", .expected = "_", }, // invalid 4 byte UTF-8 with more text { .table_value = 0x0002, .table_entry = "never...", .input = "\xF0" "\x9F" "\x8D" "whatever", .expected = "_whatever", }, // invalid 3 byte UTF-8 { .table_value = 0x0003, .table_entry = "never...", .input = "\xE2" "\x80", .expected = "_", }, // invalid 3 byte UTF-8 with more text { .table_value = 0x0004, .table_entry = "never...", .input = "\xE2" "\x80" "whatever", .expected = "_whatever", }, // invalid 2 byte UTF-8 { .table_value = 0x0005, .table_entry = "never...", .input = "\xC0", .expected = "_", }, // invalid 2 byte UTF-8 with more text { .table_value = 0x0006, .table_entry = "never...", .input = "\xC0" "whatever", .expected = "_whatever", }, }; #define BAD_FILES_DATA_COUNT 6 static struct test_values bad_files_data[BAD_FILES_DATA_COUNT] = { { .input = "..", .expected = "..", }, { .input = ".git", .expected = ".git", }, { .input = ".UPPER", .expected = ".upper", }, { .input = "not.UPPER", .expected = "not.upper", }, { .input = ".wipeup------no", .expected = ".wipeup-no", }, { .input = "wipeup------yes", .expected = "wipeup-yes", }, }; #test test_parse_inline FILE *in_fp; FILE *out_fp; options_t *options; sequence_t *sequence; filter_t *filter; char *expected; char *input; char *input_filename; char *output; char *output_filename; char *walk; int buffer_size; int end; int start; int use_newline; int x_length; int half_x_length; int i; // options = options_init(); options->sequence_to_use = sequence = sequence_init("utf_8-and-lower"); sequence->source_filename = wrapped_strdup(__FILE__); sequence->filters = filter = filter_init(FILTER_UTF_8); filter->builtin = wrapped_strdup("safe"); filter->table = load_builtin_safe_table(); for (i = 0; i < DATA_COUNT; i++) { // table_put(filter->table, 0x0001F37A, "_root_beer_"); table_put(filter->table, data[i].table_value, data[i].table_entry); } filter->next = filter_init(FILTER_LOWER); // start = INLINE_BUF_SIZE - (INLINE_BUF_PADDING * 2); end = INLINE_BUF_SIZE + (INLINE_BUF_PADDING * 2); buffer_size = end + LOCAL_PADDING; expected = wrapped_malloc(buffer_size); input = wrapped_malloc(buffer_size); output = wrapped_malloc(buffer_size); input_filename = wrapped_malloc(buffer_size); output_filename = wrapped_malloc(buffer_size); for (i = 0; i < DATA_COUNT; i++) { for (x_length = start; x_length < end; x_length++) { for (use_newline = 0; use_newline < 2; use_newline++) { #ifdef DEBUG fprintf(stderr, "CHECKING VALUE: 0x%04X, LENGTH: %d, NEW LINE: %s\n", data[i].table_value, x_length, use_newline ? "yes" : "no"); #endif // prep sprintf(input_filename, "/tmp/detoxtest-in-%d-%d-XXXXXX", x_length, use_newline); sprintf(output_filename, "/tmp/detoxtest-out-%d-%d-XXXXXX", x_length, use_newline); mktemp(input_filename); mktemp(output_filename); #ifdef DEBUG fprintf(stderr, "in: %s, out: %s\n", input_filename, output_filename); #endif // wipe memory memset(expected, 0, buffer_size); memset(input, 0, buffer_size); memset(output, 0, buffer_size); // --------------------------------------------------------- // BUILD INPUT // --------------------------------------------------------- // set input to string filled with the same character, "x". // the length is x_length. memset(input, 'x', x_length); // stick a bunch of uppercase Xs in the middle half_x_length = (int)(x_length / 2); memset(input + (int)(half_x_length / 2), 'X', half_x_length); // at the end of the input, add a 4-byte UTF-8 character walk = input + x_length; sprintf(walk, "%s%s", data[i].input, use_newline ? "\n" : ""); // --------------------------------------------------------- // BUILD EXPECTED // --------------------------------------------------------- // set expected to string filled with the same character, "x". // the length is x_length. memset(expected, 'x', x_length); // at the end of the expected, add the translated character walk = expected + x_length; sprintf(walk, "%s%s", data[i].expected, use_newline ? "\n" : ""); // --------------------------------------------------------- // write the string to a file in_fp = fopen(input_filename, "w"); fputs(input, in_fp); fclose(in_fp); // pass the file to parse_inline parse_inline(input_filename, output_filename, options); // read the output file out_fp = fopen(output_filename, "r"); if (fgets(output, buffer_size, out_fp) == NULL) { ck_abort_msg("fgets() failed"); } fclose(out_fp); // compare ck_assert_str_eq(expected, output); // cleanup unlink(output_filename); unlink(input_filename); } } } #test test_parse_inline_protected_file FILE *in_fp; FILE *out_fp; options_t *options; sequence_t *sequence; filter_t *filter; char *expected; char *input; char *input_filename; char *output; char *output_filename; char *walk; int buffer_size; int i; // options = options_init(); options->sequence_to_use = sequence = sequence_init("wipeup-lower"); sequence->source_filename = wrapped_strdup(__FILE__); sequence->filters = filter = filter_init(FILTER_WIPEUP); filter->remove_trailing = 1; filter->next = filter_init(FILTER_LOWER); // buffer_size = INLINE_BUF_SIZE * 2; expected = wrapped_malloc(buffer_size); input = wrapped_malloc(buffer_size); output = wrapped_malloc(buffer_size); input_filename = wrapped_malloc(buffer_size); output_filename = wrapped_malloc(buffer_size); // prep sprintf(input_filename, "/tmp/detoxtest-in--XXXXXX"); sprintf(output_filename, "/tmp/detoxtest-out-XXXXXX"); mktemp(input_filename); mktemp(output_filename); #ifdef DEBUG fprintf(stderr, "in: %s, out: %s\n", input_filename, output_filename); #endif // wipe memory memset(expected, 0, buffer_size); memset(input, 0, buffer_size); memset(output, 0, buffer_size); // --------------------------------------------------------- // BUILD INPUT // --------------------------------------------------------- walk = input; for (i = 0; i < BAD_FILES_DATA_COUNT; i++) { sprintf(walk, "%s\n", bad_files_data[i].input); walk = strchr(walk, '\0'); } // --------------------------------------------------------- // BUILD EXPECTED // --------------------------------------------------------- walk = expected; for (i = 0; i < BAD_FILES_DATA_COUNT; i++) { sprintf(walk, "%s\n", bad_files_data[i].expected); walk = strchr(walk, '\0'); } // --------------------------------------------------------- // write the string to a file in_fp = fopen(input_filename, "w"); fputs(input, in_fp); fclose(in_fp); // pass the file to parse_inline parse_inline(input_filename, output_filename, options); // read the output file out_fp = fopen(output_filename, "r"); walk = output; while (fgets(walk, buffer_size, out_fp) != NULL) { walk += strlen(walk); } fclose(out_fp); // compare ck_assert_str_eq(expected, output); // cleanup unlink(output_filename); unlink(input_filename); detox-2.0.0/tests/unit/test_spoof_config_file.c000066400000000000000000000111631460212773400216420ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_spoof_config_file.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_spoof_config_file.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include #include "config_file_spoof.h" #include "config_file.h" #include "detox_struct.h" #include "parse_options.h" static char *detoxrc = "sequence default{safe{builtin \"safe\";};wipeup{remove_trailing;};};" "sequence \"iso8859_1\"{iso8859_1{builtin \"iso8859_1\";};safe{" "builtin \"safe\";};wipeup{remove_trailing;};};" "sequence \"iso8859_1-legacy\"{iso8859_1{builtin \"cp1252\";};iso8859_1{" "builtin \"iso8859_1\";};safe{builtin \"safe\";};wipeup{remove_trailing;" "};};sequence \"utf_8\"{utf_8{builtin \"unicode\";};safe{" "builtin \"safe\";};wipeup{remove_trailing;};};sequence \"utf_8-legacy\"{" "utf_8{builtin \"cp1252\";};utf_8{builtin \"unicode\";};safe{" "builtin \"safe\";};wipeup{remove_trailing;};};sequence \"uncgi\"{uncgi;" "safe{builtin \"safe\";};wipeup{remove_trailing;};};sequence \"lower\"{" "safe{builtin \"safe\";};lower;wipeup{remove_trailing;};};" "sequence \"iso8859_1-only\"{iso8859_1{builtin \"iso8859_1\";};};" "sequence \"cp1252-only\"{iso8859_1{builtin \"cp1252\";};};" "sequence \"utf_8-only\"{utf_8{builtin \"unicode\";};};" "sequence \"uncgi-only\"{uncgi;};sequence \"lower-only\"{lower;};ignore{" "filename \"{arch}\";};" ; static char *tempfile; void setup(void) { int fd; tempfile = malloc(1024); sprintf(tempfile, "/tmp/detoxrc-test-XXXXXX"); fd = mkstemp(tempfile); fprintf(stderr, "writing to %s\n", tempfile); dprintf(fd, "%s", detoxrc); } void teardown(void) { fprintf(stderr, "deleting %s\n", tempfile); unlink(tempfile); } START_TEST(test_spoof_config_file) { #line 59 options_t *main_options; config_file_t *parsed; config_file_t *spoofed; sequence_t *parsed_sequence; sequence_t *spoofed_sequence; filter_t *parsed_filter; filter_t *spoofed_filter; main_options = options_init(); parsed = parse_config_file(tempfile, NULL, main_options); spoofed = spoof_config_file(); // check names parsed_sequence = parsed->sequences; spoofed_sequence = spoofed->sequences; do { if (parsed_sequence == NULL || spoofed_sequence == NULL) { break; } ck_assert_str_eq(parsed_sequence->name, spoofed_sequence->name); // // // parsed_filter = parsed_sequence->filters; spoofed_filter = spoofed_sequence->filters; do { if (parsed_filter == NULL || spoofed_filter == NULL) { break; } ck_assert_msg(parsed_filter->cleaner == spoofed_filter->cleaner, "unit test cleaner doesn't match spoofed cleaner"); ck_assert_str_eq( parsed_filter->filename ? parsed_filter->filename : "...NULL...", spoofed_filter->filename ? spoofed_filter->filename : "...NULL..." ); ck_assert_str_eq( parsed_filter->builtin ? parsed_filter->builtin : "...NULL...", spoofed_filter->builtin ? spoofed_filter->builtin : "...NULL..." ); ck_assert_int_eq(parsed_filter->remove_trailing, spoofed_filter->remove_trailing); ck_assert_int_eq(parsed_filter->max_length, spoofed_filter->max_length); parsed_filter = parsed_filter->next; spoofed_filter = spoofed_filter->next; } while (1); // // // parsed_sequence = parsed_sequence->next; spoofed_sequence = spoofed_sequence->next; } while (1); ck_assert_msg(parsed_sequence == NULL, "unit test config file is too long"); ck_assert_msg(spoofed_sequence == NULL, "spoofed config file is too long"); } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; /* User-specified pre-run code */ #line 124 tcase_add_checked_fixture(tc1_1, setup, teardown); suite_add_tcase(s1, tc1_1); tcase_add_test(tc1_1, test_spoof_config_file); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_spoof_config_file.template000066400000000000000000000076761460212773400232510ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include #include "config_file_spoof.h" #include "config_file.h" #include "detox_struct.h" #include "parse_options.h" static char *detoxrc = "sequence default{safe{builtin \"safe\";};wipeup{remove_trailing;};};" "sequence \"iso8859_1\"{iso8859_1{builtin \"iso8859_1\";};safe{" "builtin \"safe\";};wipeup{remove_trailing;};};" "sequence \"iso8859_1-legacy\"{iso8859_1{builtin \"cp1252\";};iso8859_1{" "builtin \"iso8859_1\";};safe{builtin \"safe\";};wipeup{remove_trailing;" "};};sequence \"utf_8\"{utf_8{builtin \"unicode\";};safe{" "builtin \"safe\";};wipeup{remove_trailing;};};sequence \"utf_8-legacy\"{" "utf_8{builtin \"cp1252\";};utf_8{builtin \"unicode\";};safe{" "builtin \"safe\";};wipeup{remove_trailing;};};sequence \"uncgi\"{uncgi;" "safe{builtin \"safe\";};wipeup{remove_trailing;};};sequence \"lower\"{" "safe{builtin \"safe\";};lower;wipeup{remove_trailing;};};" "sequence \"iso8859_1-only\"{iso8859_1{builtin \"iso8859_1\";};};" "sequence \"cp1252-only\"{iso8859_1{builtin \"cp1252\";};};" "sequence \"utf_8-only\"{utf_8{builtin \"unicode\";};};" "sequence \"uncgi-only\"{uncgi;};sequence \"lower-only\"{lower;};ignore{" "filename \"{arch}\";};" ; static char *tempfile; void setup(void) { int fd; tempfile = malloc(1024); sprintf(tempfile, "/tmp/detoxrc-test-XXXXXX"); fd = mkstemp(tempfile); fprintf(stderr, "writing to %s\n", tempfile); dprintf(fd, "%s", detoxrc); } void teardown(void) { fprintf(stderr, "deleting %s\n", tempfile); unlink(tempfile); } #test test_spoof_config_file options_t *main_options; config_file_t *parsed; config_file_t *spoofed; sequence_t *parsed_sequence; sequence_t *spoofed_sequence; filter_t *parsed_filter; filter_t *spoofed_filter; main_options = options_init(); parsed = parse_config_file(tempfile, NULL, main_options); spoofed = spoof_config_file(); // check names parsed_sequence = parsed->sequences; spoofed_sequence = spoofed->sequences; do { if (parsed_sequence == NULL || spoofed_sequence == NULL) { break; } ck_assert_str_eq(parsed_sequence->name, spoofed_sequence->name); // // // parsed_filter = parsed_sequence->filters; spoofed_filter = spoofed_sequence->filters; do { if (parsed_filter == NULL || spoofed_filter == NULL) { break; } ck_assert_msg(parsed_filter->cleaner == spoofed_filter->cleaner, "unit test cleaner doesn't match spoofed cleaner"); ck_assert_str_eq( parsed_filter->filename ? parsed_filter->filename : "...NULL...", spoofed_filter->filename ? spoofed_filter->filename : "...NULL..." ); ck_assert_str_eq( parsed_filter->builtin ? parsed_filter->builtin : "...NULL...", spoofed_filter->builtin ? spoofed_filter->builtin : "...NULL..." ); ck_assert_int_eq(parsed_filter->remove_trailing, spoofed_filter->remove_trailing); ck_assert_int_eq(parsed_filter->max_length, spoofed_filter->max_length); parsed_filter = parsed_filter->next; spoofed_filter = spoofed_filter->next; } while (1); // // // parsed_sequence = parsed_sequence->next; spoofed_sequence = spoofed_sequence->next; } while (1); ck_assert_msg(parsed_sequence == NULL, "unit test config file is too long"); ck_assert_msg(spoofed_sequence == NULL, "spoofed config file is too long"); #main-pre tcase_add_checked_fixture(tc1_1, setup, teardown); detox-2.0.0/tests/unit/test_table_max_length.c000066400000000000000000000106051460212773400214650ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_table_max_length.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_table_max_length.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "table.h" struct test_data { int value; char *data; int max_length; }; #define DATA_COUNT 13 static struct test_data data[DATA_COUNT] = { { .value = 0x0001, .data = "a", .max_length = 1 }, { .value = 0x0002, .data = "b", .max_length = 1 }, { .value = 0x0003, .data = "c", .max_length = 1 }, { .value = 0x0004, .data = "don't", .max_length = 5 }, { .value = 0x0005, .data = "eat", .max_length = 5 }, { .value = 0x0006, .data = "the", .max_length = 5 }, { .value = 0x0007, .data = "joe o's", .max_length = 7 }, { .value = 0x0008, .data = "k", .max_length = 7 }, { .value = 0x0009, .data = "moo", .max_length = 7 }, { .value = 0x000A, .data = "true", .max_length = 7 }, { .value = 0x000B, .data = "insanity", .max_length = 8 }, { .value = 0x1001, .data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed", .max_length = 60, }, { .value = 0x1002, .data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", .max_length = 123, }, }; START_TEST(test_table_max_length) { #line 92 table_t *table; int i; table = table_init(DATA_COUNT * 10); for (i = 0; i < DATA_COUNT; i++) { table_put(table, data[i].value, data[i].data); ck_assert_int_eq(table->max_data_length, data[i].max_length); } } END_TEST START_TEST(test_table_max_length_after_resize) { #line 103 table_t *table; table_t *check_table; int i, j; int max_length, old_max_length; table = table_init(DATA_COUNT * 10); for (i = 0; i < DATA_COUNT; i++) { table_put(table, data[i].value, data[i].data); } max_length = table->max_data_length; // // confirm that max length gets copied over to a resized table // for (j = 0; j < 2; j++) { check_table = table_resize(table, table->length, j); ck_assert_int_eq(check_table->max_data_length, max_length); table_free(check_table); } // // put "a" in all the values, so when we resize, the max length on the new // table should be 1. // old_max_length = max_length; max_length = 1; for (i = 0; i < DATA_COUNT; i++) { table_put(table, data[i].value, "a"); } for (j = 0; j < 2; j++) { check_table = table_resize(table, table->length, j); ck_assert_int_eq(check_table->max_data_length, max_length); table_free(check_table); } // // put "" in all the values, so when we resize, the max length on the new // table should be 0. // max_length = 0; for (i = 0; i < DATA_COUNT; i++) { table_put(table, data[i].value, ""); } for (j = 0; j < 2; j++) { check_table = table_resize(table, table->length, j); ck_assert_int_eq(check_table->max_data_length, max_length); table_free(check_table); } // ck_assert_int_eq(table->max_data_length, old_max_length); } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; suite_add_tcase(s1, tc1_1); tcase_add_test(tc1_1, test_table_max_length); tcase_add_test(tc1_1, test_table_max_length_after_resize); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_table_max_length.template000066400000000000000000000072351460212773400230630ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include #include "table.h" struct test_data { int value; char *data; int max_length; }; #define DATA_COUNT 13 static struct test_data data[DATA_COUNT] = { { .value = 0x0001, .data = "a", .max_length = 1 }, { .value = 0x0002, .data = "b", .max_length = 1 }, { .value = 0x0003, .data = "c", .max_length = 1 }, { .value = 0x0004, .data = "don't", .max_length = 5 }, { .value = 0x0005, .data = "eat", .max_length = 5 }, { .value = 0x0006, .data = "the", .max_length = 5 }, { .value = 0x0007, .data = "joe o's", .max_length = 7 }, { .value = 0x0008, .data = "k", .max_length = 7 }, { .value = 0x0009, .data = "moo", .max_length = 7 }, { .value = 0x000A, .data = "true", .max_length = 7 }, { .value = 0x000B, .data = "insanity", .max_length = 8 }, { .value = 0x1001, .data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed", .max_length = 60, }, { .value = 0x1002, .data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", .max_length = 123, }, }; #test test_table_max_length table_t *table; int i; table = table_init(DATA_COUNT * 10); for (i = 0; i < DATA_COUNT; i++) { table_put(table, data[i].value, data[i].data); ck_assert_int_eq(table->max_data_length, data[i].max_length); } #test test_table_max_length_after_resize table_t *table; table_t *check_table; int i, j; int max_length, old_max_length; table = table_init(DATA_COUNT * 10); for (i = 0; i < DATA_COUNT; i++) { table_put(table, data[i].value, data[i].data); } max_length = table->max_data_length; // // confirm that max length gets copied over to a resized table // for (j = 0; j < 2; j++) { check_table = table_resize(table, table->length, j); ck_assert_int_eq(check_table->max_data_length, max_length); table_free(check_table); } // // put "a" in all the values, so when we resize, the max length on the new // table should be 1. // old_max_length = max_length; max_length = 1; for (i = 0; i < DATA_COUNT; i++) { table_put(table, data[i].value, "a"); } for (j = 0; j < 2; j++) { check_table = table_resize(table, table->length, j); ck_assert_int_eq(check_table->max_data_length, max_length); table_free(check_table); } // // put "" in all the values, so when we resize, the max length on the new // table should be 0. // max_length = 0; for (i = 0; i < DATA_COUNT; i++) { table_put(table, data[i].value, ""); } for (j = 0; j < 2; j++) { check_table = table_resize(table, table->length, j); ck_assert_int_eq(check_table->max_data_length, max_length); table_free(check_table); } // ck_assert_int_eq(table->max_data_length, old_max_length); detox-2.0.0/tests/unit/test_table_resize.c000066400000000000000000000101311460212773400206320ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_table_resize.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_table_resize.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "config.h" #include #include #include #include #include #include "parse_table.h" #include "table.h" struct test_definition { int pass; int size; int multiple; }; static struct test_definition tests[5] = { { .pass = 1, .size = 0 }, { .pass = 2, .size = 10000 }, { .pass = 3, .size = 1000000 }, { .pass = 4, .size = 0, .multiple = 16 }, { .pass = -1 } }; struct test_value { int value; char *expected; }; static struct test_value values[9] = { { .value = 0x0020, .expected = "space" }, { .value = 0x00A3, .expected = "_pound_" }, { .value = 0x014B, .expected = "ng" }, { .value = 0x0BAD, .expected = NULL }, { .value = 0x4000, .expected = NULL }, { .value = 0xC0DE, .expected = "code" }, { .value = 0xCAFE, .expected = "cafe" }, { .value = 0xFACE, .expected = "face" }, { .value = 0 } }; static char *table_filename = "fixtures/test_table_resize.tbl"; #define HEADER_LEN 256 #define MESSAGE_LEN 1024 START_TEST(test_table_resize) { #line 58 table_t *table; table_t *new_table; char *check; char *header; char *message; int i, j; int size; int use_hash; header = malloc(HEADER_LEN); message = malloc(MESSAGE_LEN); for (use_hash = 0; use_hash <= 1; use_hash++) { table = parse_table(table_filename); for (i = 0; tests[i].pass != -1; i++) { snprintf(header, HEADER_LEN, "pass = %d, use_hash = %d", tests[i].pass, use_hash); #ifdef DEBUG printf("%s\n", header); #endif if (tests[i].size > 0) { #ifdef DEBUG printf("resizing table to size %d\n", tests[i].size); #endif new_table = table_resize(table, tests[i].size, use_hash); table_free(table); table = new_table; new_table = NULL; } else if (tests[i].multiple > 0) { size = ((int) ceil(table->used / tests[i].multiple) + 1) * tests[i].multiple; #ifdef DEBUG printf("resizing table to size %d (as a multiple of %d)\n", size, tests[i].multiple); #endif new_table = table_resize(table, size, use_hash); table_free(table); table = new_table; new_table = NULL; } #ifdef DEBUG printf("table length: %d\ntable used: %d\n", table->length, table->used); #endif for (j = 0; values[j].value != 0; j++) { check = table_get(table, values[j].value); #ifdef DEBUG printf("\t0x%04x -> \"%s\"\n", values[j].value, check); #endif snprintf( message, MESSAGE_LEN, "%s, value = 0x%04X, expected = %s, output = %s", header, values[j].value, values[j].expected, check ); if (values[j].expected != NULL) { ck_assert_msg(check != NULL, message); ck_assert_msg(strcmp(check, values[j].expected) == 0, message); } else { ck_assert_msg(check == NULL, message); } } } table_free(table); } } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; suite_add_tcase(s1, tc1_1); tcase_add_test(tc1_1, test_table_resize); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_table_resize.template000066400000000000000000000067321460212773400222370ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include "config.h" #include #include #include #include #include #include "parse_table.h" #include "table.h" struct test_definition { int pass; int size; int multiple; }; static struct test_definition tests[5] = { { .pass = 1, .size = 0 }, { .pass = 2, .size = 10000 }, { .pass = 3, .size = 1000000 }, { .pass = 4, .size = 0, .multiple = 16 }, { .pass = -1 } }; struct test_value { int value; char *expected; }; static struct test_value values[9] = { { .value = 0x0020, .expected = "space" }, { .value = 0x00A3, .expected = "_pound_" }, { .value = 0x014B, .expected = "ng" }, { .value = 0x0BAD, .expected = NULL }, { .value = 0x4000, .expected = NULL }, { .value = 0xC0DE, .expected = "code" }, { .value = 0xCAFE, .expected = "cafe" }, { .value = 0xFACE, .expected = "face" }, { .value = 0 } }; static char *table_filename = "fixtures/test_table_resize.tbl"; #define HEADER_LEN 256 #define MESSAGE_LEN 1024 #test test_table_resize table_t *table; table_t *new_table; char *check; char *header; char *message; int i, j; int size; int use_hash; header = malloc(HEADER_LEN); message = malloc(MESSAGE_LEN); for (use_hash = 0; use_hash <= 1; use_hash++) { table = parse_table(table_filename); for (i = 0; tests[i].pass != -1; i++) { snprintf(header, HEADER_LEN, "pass = %d, use_hash = %d", tests[i].pass, use_hash); #ifdef DEBUG printf("%s\n", header); #endif if (tests[i].size > 0) { #ifdef DEBUG printf("resizing table to size %d\n", tests[i].size); #endif new_table = table_resize(table, tests[i].size, use_hash); table_free(table); table = new_table; new_table = NULL; } else if (tests[i].multiple > 0) { size = ((int) ceil(table->used / tests[i].multiple) + 1) * tests[i].multiple; #ifdef DEBUG printf("resizing table to size %d (as a multiple of %d)\n", size, tests[i].multiple); #endif new_table = table_resize(table, size, use_hash); table_free(table); table = new_table; new_table = NULL; } #ifdef DEBUG printf("table length: %d\ntable used: %d\n", table->length, table->used); #endif for (j = 0; values[j].value != 0; j++) { check = table_get(table, values[j].value); #ifdef DEBUG printf("\t0x%04x -> \"%s\"\n", values[j].value, check); #endif snprintf( message, MESSAGE_LEN, "%s, value = 0x%04X, expected = %s, output = %s", header, values[j].value, values[j].expected, check ); if (values[j].expected != NULL) { ck_assert_msg(check != NULL, message); ck_assert_msg(strcmp(check, values[j].expected) == 0, message); } else { ck_assert_msg(check == NULL, message); } } } table_free(table); } detox-2.0.0/tests/unit/test_wrapped.c000066400000000000000000000035711460212773400176360ustar00rootroot00000000000000/* * DO NOT EDIT THIS FILE. Generated by checkmk. * Edit the original source file "test_wrapped.template" instead. * Run `make internals` from the base of the project to regenerate this file. */ #include #line 1 "test_wrapped.template" /** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include // need defines first #include "config.h" #include "wrapped.h" START_TEST(test_wrapped_malloc_fail) { #line 20 #ifdef SUPPORT_COVERAGE wrapped_malloc_failure = 1; wrapped_malloc(5); # else fprintf(stderr, "warning: not testing malloc failure\n"); exit(EXIT_FAILURE); #endif } END_TEST START_TEST(test_wrapped_strdup_fail) { #line 29 #ifdef SUPPORT_COVERAGE wrapped_strdup_failure = 1; wrapped_strdup("test"); # else fprintf(stderr, "warning: not testing malloc failure\n"); exit(EXIT_FAILURE); #endif } END_TEST START_TEST(test_wrapped_malloc) { #line 38 void *test; test = wrapped_malloc(16); ck_assert((test != NULL)); } END_TEST START_TEST(test_wrapped_strdup) { #line 44 char *check; check = wrapped_strdup("so broken"); ck_assert_str_eq(check, "so broken"); } END_TEST int main(void) { Suite *s1 = suite_create("Core"); TCase *tc1_1 = tcase_create("Core"); SRunner *sr = srunner_create(s1); int nf; suite_add_tcase(s1, tc1_1); tcase_add_exit_test(tc1_1, test_wrapped_malloc_fail, 1); tcase_add_exit_test(tc1_1, test_wrapped_strdup_fail, 1); tcase_add_test(tc1_1, test_wrapped_malloc); tcase_add_test(tc1_1, test_wrapped_strdup); srunner_run_all(sr, CK_ENV); nf = srunner_ntests_failed(sr); srunner_free(sr); return nf == 0 ? 0 : 1; } detox-2.0.0/tests/unit/test_wrapped.template000066400000000000000000000020211460212773400212140ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #include #include #include // need defines first #include "config.h" #include "wrapped.h" #test-exit(1) test_wrapped_malloc_fail #ifdef SUPPORT_COVERAGE wrapped_malloc_failure = 1; wrapped_malloc(5); # else fprintf(stderr, "warning: not testing malloc failure\n"); exit(EXIT_FAILURE); #endif #test-exit(1) test_wrapped_strdup_fail #ifdef SUPPORT_COVERAGE wrapped_strdup_failure = 1; wrapped_strdup("test"); # else fprintf(stderr, "warning: not testing malloc failure\n"); exit(EXIT_FAILURE); #endif #test test_wrapped_malloc void *test; test = wrapped_malloc(16); ck_assert((test != NULL)); #test test_wrapped_strdup char *check; check = wrapped_strdup("so broken"); ck_assert_str_eq(check, "so broken"); detox-2.0.0/tests/unit/unit_struct.h000066400000000000000000000007031460212773400175170ustar00rootroot00000000000000/** * This file is part of the Detox package. * * Copyright (c) Doug Harple * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #ifndef __UNIT_STRUCT_H #define __UNIT_STRUCT_H struct test_filename { char *filename; char *expected; char *expected_a; char *expected_b; int max_length; }; #endif /* __UNIT_STRUCT_H */