cmarkgfm/0000755000175000017500000000000014212172760012527 5ustar carstencarstencmarkgfm/src/0000755000175000017500000000000014210444330013307 5ustar carstencarstencmarkgfm/src/cmarkgfm/0000755000175000017500000000000014210444330015076 5ustar carstencarstencmarkgfm/src/cmarkgfm/__init__.py0000644000175000017500000000044014210444330017205 0ustar carstencarstenfrom cmarkgfm.cmark import ( github_flavored_markdown_to_html, markdown_to_html, markdown_to_html_with_extensions, Options) __all__ = [ 'github_flavored_markdown_to_html', 'markdown_to_html', 'markdown_to_html_with_extensions', 'Options', ] cmarkgfm/src/cmarkgfm/cmark_module.h0000644000175000017500000000043014210444330017706 0ustar carstencarsten#ifndef CMARK_MODULE_H #define CMARK_MODULE_H #ifdef __cplusplus extern "C" { #endif #define CMARKEXTENSIONS_STATIC_DEFINE #include "cmark-gfm.h" #include "cmark-gfm-extension_api.h" #include "cmark-gfm-core-extensions.h" #ifdef __cplusplus } #endif #endif cmarkgfm/src/cmarkgfm/build_cmark.py0000644000175000017500000000521114210444330017723 0ustar carstencarstenimport distutils.ccompiler import distutils.dist import glob import io import os import sys import cffi # Get the directory for the cmark source files. It's under the package root # as /third_party/cmark/src HERE = os.path.dirname(os.path.abspath(__file__)) PACKAGE_ROOT = os.path.abspath(os.path.join(HERE, '../../')) SRC_DIR = os.path.join(PACKAGE_ROOT, 'third_party/cmark/src') EXTENSIONS_SRC_DIR = os.path.join(PACKAGE_ROOT, 'third_party/cmark/extensions') UNIX_GENERATED_SRC_DIR = os.path.join(PACKAGE_ROOT, 'generated', 'unix') WIN_GENERATED_SRC_DIR = os.path.join(PACKAGE_ROOT, 'generated', 'windows') CMARK_DEF_H_PATH = os.path.join(HERE, 'cmark.cffi.h') CMARK_MODULE_H_PATH = os.path.join(HERE, 'cmark_module.h') with io.open(CMARK_DEF_H_PATH, 'r', encoding='utf-8') as fh: CMARK_DEF_H = fh.read() with io.open(CMARK_MODULE_H_PATH, 'r', encoding='utf-8') as fh: CMARK_MODULE_H = fh.read() def _get_sources(dir, exclude=set()): sources = glob.iglob(os.path.join(dir, '*.c')) return sorted([ os.path.relpath(path, start=PACKAGE_ROOT) for path in sources if os.path.basename(path) not in exclude ]) SOURCES = _get_sources(SRC_DIR, exclude=set(['main.c'])) SOURCES.extend(_get_sources(EXTENSIONS_SRC_DIR)) def _compiler_type(): """ Gets the compiler type from distutils. On Windows with MSVC it will be "msvc". On macOS and linux it is "unix". Borrowed from https://github.com/pyca/cryptography/blob\ /05b34433fccdc2fec0bb014c3668068169d769fd/src/_cffi_src/utils.py#L78 """ dist = distutils.dist.Distribution() dist.parse_config_files() cmd = dist.get_command_obj('build') cmd.ensure_finalized() compiler = distutils.ccompiler.new_compiler(compiler=cmd.compiler) return compiler.compiler_type COMPILER_TYPE = _compiler_type() PY2 = sys.version_info[0] < 3 # Note: on Python 2.7 in Windows we're using mingw so we use the unix # srcs for that as well. if COMPILER_TYPE in {'unix', 'mingw32'} or PY2: EXTRA_COMPILE_ARGS = ['-std=c99'] GENERATED_SRC_DIR = UNIX_GENERATED_SRC_DIR elif COMPILER_TYPE == 'msvc': EXTRA_COMPILE_ARGS = ['/TP'] GENERATED_SRC_DIR = WIN_GENERATED_SRC_DIR else: raise AssertionError("unsupported compiler: %s" % COMPILER_TYPE) ffibuilder = cffi.FFI() ffibuilder.cdef(CMARK_DEF_H) ffibuilder.set_source( 'cmarkgfm._cmark', CMARK_MODULE_H, sources=SOURCES, include_dirs=[SRC_DIR, EXTENSIONS_SRC_DIR, GENERATED_SRC_DIR], extra_compile_args=EXTRA_COMPILE_ARGS ) if __name__ == "__main__": ffibuilder.compile(verbose=True) cmarkgfm/src/cmarkgfm/cmark.cffi.h0000644000175000017500000000471714210444330017263 0ustar carstencarsten/* cffi declarations for cmark */ typedef enum { /* Error status */ CMARK_NODE_NONE = ... } cmark_node_type; typedef struct cmark_node cmark_node; typedef struct cmark_parser cmark_parser; typedef struct cmark_mem { void *(*calloc)(size_t, size_t); void *(*realloc)(void *, size_t); void (*free)(void *); } cmark_mem; typedef void (*cmark_free_func) (cmark_mem *mem, void *user_data); typedef struct _cmark_llist { struct _cmark_llist *next; void *data; } cmark_llist; cmark_llist * cmark_llist_append (cmark_mem * mem, cmark_llist * head, void * data); void cmark_llist_free_full (cmark_mem * mem, cmark_llist * head, cmark_free_func free_func); void cmark_llist_free (cmark_mem * mem, cmark_llist * head); const char *cmark_version_string(); char *cmark_markdown_to_html(const char *text, size_t len, int options); cmark_node *cmark_parse_document(const char *buffer, size_t len, int options); cmark_node_type cmark_node_get_type(cmark_node *node); char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions); cmark_parser *cmark_parser_new(int options); void cmark_parser_free(cmark_parser *parser); void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len); cmark_node *cmark_parser_finish(cmark_parser *parser); #define CMARK_OPT_DEFAULT 0 #define CMARK_OPT_SOURCEPOS ... #define CMARK_OPT_HARDBREAKS ... #define CMARK_OPT_UNSAFE ... #define CMARK_OPT_NOBREAKS ... #define CMARK_OPT_NORMALIZE ... #define CMARK_OPT_VALIDATE_UTF8 ... #define CMARK_OPT_SMART ... #define CMARK_OPT_GITHUB_PRE_LANG ... #define CMARK_OPT_LIBERAL_HTML_TAG ... #define CMARK_OPT_FOOTNOTES ... #define CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE ... #define CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES ... // /* From cmark_extension_api.h */ typedef struct cmark_syntax_extension cmark_syntax_extension; cmark_syntax_extension *cmark_find_syntax_extension(const char *name); int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension); cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser); // /* From core-extensions.h */ void cmark_gfm_core_extensions_ensure_registered(void);cmarkgfm/src/cmarkgfm/cmark.py0000644000175000017500000001454514210444330016556 0ustar carstencarsten"""Python bindings to GitHub's cmark Markdown library.""" from __future__ import unicode_literals from cmarkgfm import _cmark CMARK_VERSION = "0.29.0.gfm.2" class Options(object): CMARK_OPT_DEFAULT = _cmark.lib.CMARK_OPT_DEFAULT CMARK_OPT_SOURCEPOS = _cmark.lib.CMARK_OPT_SOURCEPOS CMARK_OPT_HARDBREAKS = _cmark.lib.CMARK_OPT_HARDBREAKS CMARK_OPT_UNSAFE = _cmark.lib.CMARK_OPT_UNSAFE CMARK_OPT_NOBREAKS = _cmark.lib.CMARK_OPT_NOBREAKS CMARK_OPT_NORMALIZE = _cmark.lib.CMARK_OPT_NORMALIZE CMARK_OPT_VALIDATE_UTF8 = _cmark.lib.CMARK_OPT_VALIDATE_UTF8 CMARK_OPT_SMART = _cmark.lib.CMARK_OPT_SMART CMARK_OPT_GITHUB_PRE_LANG = _cmark.lib.CMARK_OPT_GITHUB_PRE_LANG CMARK_OPT_LIBERAL_HTML_TAG = _cmark.lib.CMARK_OPT_LIBERAL_HTML_TAG CMARK_OPT_FOOTNOTES = _cmark.lib.CMARK_OPT_FOOTNOTES CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE = ( _cmark.lib.CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE) CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES = ( _cmark.lib.CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES) def markdown_to_html(text, options=0): """Render the given Markdown text to HTML. This is a direct interface to ``cmark_markdown_to_html``. Args: text (str): The Markdown text to render to HTML. options (int): The cmark options. Returns: str: The HTML rendered from Markdown. """ encoded_text = text.encode('utf-8') raw_result = _cmark.lib.cmark_markdown_to_html( encoded_text, len(encoded_text), options) return _cmark.ffi.string(raw_result).decode('utf-8') def markdown_to_html_with_extensions(text, options=0, extensions=None): """Render the given Markdown text to HTML, using extensions. This is a high-level wrapper over the various functions needed to enable extensions, attach them to a parser, and render HTML. Args: text (str): The Markdown text to render to HTML. options (int): The cmark options. extensions (Sequence[str]): The list of extension names to use. Returns: str: The HTML rendered from Markdown. """ if extensions is None: extensions = [] core_extensions_ensure_registered() cmark_extensions = [] for extension_name in extensions: extension = find_syntax_extension(extension_name) if extension is None: raise ValueError('Unknown extension {}'.format(extension_name)) cmark_extensions.append(extension) parser = parser_new(options=options) try: for extension in cmark_extensions: parser_attach_syntax_extension(parser, extension) parser_feed(parser, text) root = parser_finish(parser) if _cmark.lib.cmark_node_get_type(root) == _cmark.lib.CMARK_NODE_NONE: raise ValueError('Error parsing markdown!') extensions_ll = parser_get_syntax_extensions(parser) output = render_html(root, options=options, extensions=extensions_ll) finally: parser_free(parser) return output def github_flavored_markdown_to_html(text, options=0): """Render the given GitHub-flavored Makrdown to HTML. This is a small wrapper over :func:`markdown_to_html_with_extensions`. The GitHub extensions and the option CMARK_OPT_GITHUB_PRE_LANG are applied. Args: text (str): The Markdown text to render to HTML. options (int): The cmark options. Returns: str: The HTML rendered from Markdown. """ # Force some more options; see # options = ( options | Options.CMARK_OPT_GITHUB_PRE_LANG ) return markdown_to_html_with_extensions( text, options=options, extensions=[ 'table', 'autolink', 'tagfilter', 'strikethrough', 'tasklist' ]) def parse_document(text, options=0): """Parse a document and return the root node. Args: text (str): The text to parse. options (int): The cmark options. Returns: Any: Opaque reference to the root node of the parsed syntax tree. """ encoded_text = text.encode('utf-8') return _cmark.lib.cmark_parse_document( encoded_text, len(encoded_text), options) def parser_new(options=0): """Direct wrapper over cmark_parser_new.""" return _cmark.lib.cmark_parser_new(options) def parser_free(parser): """Direct wrapper over cmark_parser_free.""" return _cmark.lib.cmark_parser_free(parser) def parser_feed(parser, text): """Direct wrapper over cmark_parser_feed.""" encoded_text = text.encode('utf-8') return _cmark.lib.cmark_parser_feed( parser, encoded_text, len(encoded_text)) def parser_finish(parser): """Direct wrapper over cmark_parser_finish.""" return _cmark.lib.cmark_parser_finish(parser) def render_html(root, options=0, extensions=None): """Render a given syntax tree as HTML. Args: root (Any): The reference to the root node of the syntax tree. options (int): The cmark options. extensions (Any): The reference to the syntax extensions, generally from :func:`parser_get_syntax_extensions` Returns: str: The rendered HTML. """ if extensions is None: extensions = _cmark.ffi.NULL raw_result = _cmark.lib.cmark_render_html( root, options, extensions) return _cmark.ffi.string(raw_result).decode('utf-8') def core_extensions_ensure_registered(): """Direct wrapper over core_extensions_ensure_registered.""" _cmark.lib.cmark_gfm_core_extensions_ensure_registered() def find_syntax_extension(name): """Direct wrapper over cmark_find_syntax_extension.""" encoded_name = name.encode('utf-8') extension = _cmark.lib.cmark_find_syntax_extension(encoded_name) if extension == _cmark.ffi.NULL: return None else: return extension def parser_attach_syntax_extension(parser, extension): """Direct wrapper over cmark_parser_attach_syntax_extension.""" _cmark.lib.cmark_parser_attach_syntax_extension(parser, extension) def parser_get_syntax_extensions(parser): """Direct wrapper over cmark_parser_get_syntax_extensions.""" return _cmark.lib.cmark_parser_get_syntax_extensions(parser) cmarkgfm/MANIFEST.in0000644000175000017500000000067214210444330014263 0ustar carstencarsten# Include the license and readme file include LICENSE.txt include README.rst # Include tests include tests/*.py # Include all third_party and generated sources # needed to build cmark. recursive-include src/cmarkgfm *.h recursive-include third_party/cmark/src *.c *.h *.inc recursive-include third_party/cmark/extensions *.c *.h *.inc recursive-include generated *.h # Include cmark licensing information include third_party/cmark/COPYING cmarkgfm/setup.py0000644000175000017500000000407314212172760014245 0ustar carstencarstenimport platform import sys from setuptools import setup, find_packages from setuptools.command.build_ext import build_ext from codecs import open from os import path here = path.abspath(path.dirname(__file__)) with open(path.join(here, 'README.rst'), encoding='utf-8') as f: long_description = f.read() class custom_build_ext(build_ext): """Custom build_ext command that uses mingw32 when building on Python2.7 in Windows.""" def finalize_options(self): build_ext.finalize_options(self) is_windows = platform.system() == 'Windows' is_py2 = sys.version_info[0] < 3 if self.compiler is None and is_windows and is_py2: self.compiler = 'mingw32' setup( name='cmarkgfm', version='0.8.0', description="Minimal bindings to GitHub's fork of cmark", long_description=long_description, long_description_content_type="text/x-rst", url='https://github.com/theacodes/cmarkgfm', author='The Python Packaging Authority', author_email='me@thea.codes, pypa-dev@googlegroups.com', classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'Topic :: Software Development :: Build Tools', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', ], package_dir={'': 'src'}, packages=find_packages(where='src'), cffi_modules=["src/cmarkgfm/build_cmark.py:ffibuilder"], setup_requires=["cffi>=1.15.0"], install_requires=["cffi>=1.15.0"], project_urls={ 'Bug Reports': 'https://github.com/theacodes/cmarkgfm/issues', 'Funding': 'https://donate.pypi.org', 'Source': 'https://github.com/theacodes/cmarkgfm', }, zip_safe=False, include_package_data=True, cmdclass={ 'build_ext': custom_build_ext, }, ) cmarkgfm/tests/0000755000175000017500000000000014210444330013662 5ustar carstencarstencmarkgfm/tests/test_cmark.py0000644000175000017500000000520714210444330016374 0ustar carstencarstenfrom __future__ import unicode_literals import textwrap from cmarkgfm import cmark def _normalize_ws(html): return textwrap.dedent(html.strip("\n")).strip("\n") def test_markdown_to_html(): text = u"Hello, **world**!" result = cmark.markdown_to_html(text) assert result == '

Hello, world!

\n' def test_render_html_with_extensions(): text = u"Hello, https://pypa.io!" result = cmark.markdown_to_html_with_extensions( text, extensions=['autolink']) expected = """

Hello, https://pypa.io!

""" assert _normalize_ws(result) == _normalize_ws(expected) def test_github_flavored_markdown_to_html(): text = u"Hello, https://pypa.io!" result = cmark.github_flavored_markdown_to_html(text) expected = """

Hello, https://pypa.io!

""" assert _normalize_ws(result) == _normalize_ws(expected) def test_github_flavored_markdown_to_html_pre_tag(): text = u"```python\nprint('hello')\n```" result = cmark.github_flavored_markdown_to_html(text) expected = """
print('hello')
        
""" assert _normalize_ws(result) == _normalize_ws(expected) def test_github_flavored_markdown_to_html_tasklist(): text = u"- [X] Task 1 Done\n- [ ] Task 2 Incomplete" result = cmark.github_flavored_markdown_to_html(text) expected = """ """ assert _normalize_ws(result) == _normalize_ws(expected) def test_parse_document(): text = u"Hello, **world**!" result = cmark.parse_document(text) assert result is not None def test_render_html(): text = u"Hello, **world**!" root = cmark.parse_document(text) result = cmark.render_html(root) assert result == '

Hello, world!

\n' def test_parser_interface(): text = u"Hello, **world**!" parser = cmark.parser_new() cmark.parser_feed(parser, text) root = cmark.parser_finish(parser) result = cmark.render_html(root) cmark.parser_free(parser) assert result == '

Hello, world!

\n' def test_core_extensions_ensure_registered(): cmark.core_extensions_ensure_registered() def test_find_syntax_extension(): extension = cmark.find_syntax_extension('table') assert extension is not None def test_find_syntax_extension_doesnt_exist(): extension = cmark.find_syntax_extension('notarealext') assert extension is None cmarkgfm/tests/__init__.py0000644000175000017500000000032014210444330015766 0ustar carstencarsten# the inclusion of the tests module is not meant to offer best practices for # testing in general, but rather to support the `find_packages` example in # setup.py that excludes installing the "tests" package cmarkgfm/generated/0000755000175000017500000000000014210444330014456 5ustar carstencarstencmarkgfm/generated/unix/0000755000175000017500000000000014210444330015441 5ustar carstencarstencmarkgfm/generated/unix/cmark-gfm_version.h0000644000175000017500000000026314210444330021224 0ustar carstencarsten#ifndef CMARK_GFM_VERSION_H #define CMARK_GFM_VERSION_H #define CMARK_GFM_VERSION ((0 << 24) | (29 << 16) | (0 << 8) | 3) #define CMARK_GFM_VERSION_STRING "0.29.0.gfm.3" #endif cmarkgfm/generated/unix/cmark-gfm_export.h0000644000175000017500000000207214210444330021060 0ustar carstencarsten #ifndef CMARK_GFM_EXPORT_H #define CMARK_GFM_EXPORT_H #ifdef CMARK_GFM_STATIC_DEFINE # define CMARK_GFM_EXPORT # define CMARK_GFM_NO_EXPORT #else # ifndef CMARK_GFM_EXPORT # ifdef libcmark_gfm_EXPORTS /* We are building this library */ # define CMARK_GFM_EXPORT __attribute__((visibility("default"))) # else /* We are using this library */ # define CMARK_GFM_EXPORT __attribute__((visibility("default"))) # endif # endif # ifndef CMARK_GFM_NO_EXPORT # define CMARK_GFM_NO_EXPORT __attribute__((visibility("hidden"))) # endif #endif #ifndef CMARK_GFM_DEPRECATED # define CMARK_GFM_DEPRECATED __attribute__ ((__deprecated__)) #endif #ifndef CMARK_GFM_DEPRECATED_EXPORT # define CMARK_GFM_DEPRECATED_EXPORT CMARK_GFM_EXPORT CMARK_GFM_DEPRECATED #endif #ifndef CMARK_GFM_DEPRECATED_NO_EXPORT # define CMARK_GFM_DEPRECATED_NO_EXPORT CMARK_GFM_NO_EXPORT CMARK_GFM_DEPRECATED #endif #if 0 /* DEFINE_NO_DEPRECATED */ # ifndef CMARK_GFM_NO_DEPRECATED # define CMARK_GFM_NO_DEPRECATED # endif #endif #endif /* CMARK_GFM_EXPORT_H */ cmarkgfm/generated/unix/config.h0000644000175000017500000000256114210444330017063 0ustar carstencarsten#ifndef CMARK_CONFIG_H #define CMARK_CONFIG_H #ifdef __cplusplus extern "C" { #endif #define HAVE_STDBOOL_H #ifdef HAVE_STDBOOL_H #include #elif !defined(__cplusplus) typedef char bool; #endif #define HAVE___BUILTIN_EXPECT #define HAVE___ATTRIBUTE__ #ifdef HAVE___ATTRIBUTE__ #define CMARK_ATTRIBUTE(list) __attribute__ (list) #else #define CMARK_ATTRIBUTE(list) #endif #ifndef CMARK_INLINE #if defined(_MSC_VER) && !defined(__cplusplus) #define CMARK_INLINE __inline #else #define CMARK_INLINE inline #endif #endif /* snprintf and vsnprintf fallbacks for MSVC before 2015, due to Valentin Milea http://stackoverflow.com/questions/2915672/ */ #if defined(_MSC_VER) && _MSC_VER < 1900 #include #include #define snprintf c99_snprintf #define vsnprintf c99_vsnprintf CMARK_INLINE int c99_vsnprintf(char *outBuf, size_t size, const char *format, va_list ap) { int count = -1; if (size != 0) count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap); if (count == -1) count = _vscprintf(format, ap); return count; } CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ...) { int count; va_list ap; va_start(ap, format); count = c99_vsnprintf(outBuf, size, format, ap); va_end(ap); return count; } #endif #ifdef __cplusplus } #endif #endif cmarkgfm/generated/unix/cmark-gfm-extensions_export.h0000644000175000017500000000250214210444330023253 0ustar carstencarsten #ifndef CMARK_GFM_EXTENSIONS_EXPORT_H #define CMARK_GFM_EXTENSIONS_EXPORT_H #ifdef CMARK_GFM_EXTENSIONS_STATIC_DEFINE # define CMARK_GFM_EXTENSIONS_EXPORT # define CMARK_GFM_EXTENSIONS_NO_EXPORT #else # ifndef CMARK_GFM_EXTENSIONS_EXPORT # ifdef libcmark_gfm_extensions_EXPORTS /* We are building this library */ # define CMARK_GFM_EXTENSIONS_EXPORT __attribute__((visibility("default"))) # else /* We are using this library */ # define CMARK_GFM_EXTENSIONS_EXPORT __attribute__((visibility("default"))) # endif # endif # ifndef CMARK_GFM_EXTENSIONS_NO_EXPORT # define CMARK_GFM_EXTENSIONS_NO_EXPORT __attribute__((visibility("hidden"))) # endif #endif #ifndef CMARK_GFM_EXTENSIONS_DEPRECATED # define CMARK_GFM_EXTENSIONS_DEPRECATED __attribute__ ((__deprecated__)) #endif #ifndef CMARK_GFM_EXTENSIONS_DEPRECATED_EXPORT # define CMARK_GFM_EXTENSIONS_DEPRECATED_EXPORT CMARK_GFM_EXTENSIONS_EXPORT CMARK_GFM_EXTENSIONS_DEPRECATED #endif #ifndef CMARK_GFM_EXTENSIONS_DEPRECATED_NO_EXPORT # define CMARK_GFM_EXTENSIONS_DEPRECATED_NO_EXPORT CMARK_GFM_EXTENSIONS_NO_EXPORT CMARK_GFM_EXTENSIONS_DEPRECATED #endif #if 0 /* DEFINE_NO_DEPRECATED */ # ifndef CMARK_GFM_EXTENSIONS_NO_DEPRECATED # define CMARK_GFM_EXTENSIONS_NO_DEPRECATED # endif #endif #endif /* CMARK_GFM_EXTENSIONS_EXPORT_H */ cmarkgfm/generated/windows/0000755000175000017500000000000014210444330016150 5ustar carstencarstencmarkgfm/generated/windows/cmark_export.h0000644000175000017500000000166714210444330021031 0ustar carstencarsten #ifndef CMARK_EXPORT_H #define CMARK_EXPORT_H #ifdef CMARK_STATIC_DEFINE # define CMARK_EXPORT # define CMARK_NO_EXPORT #else # ifndef CMARK_EXPORT # ifdef libcmark_gfm_EXPORTS /* We are building this library */ # define CMARK_EXPORT __declspec(dllexport) # else /* We are using this library */ # define CMARK_EXPORT __declspec(dllimport) # endif # endif # ifndef CMARK_NO_EXPORT # define CMARK_NO_EXPORT # endif #endif #ifndef CMARK_DEPRECATED # define CMARK_DEPRECATED __declspec(deprecated) #endif #ifndef CMARK_DEPRECATED_EXPORT # define CMARK_DEPRECATED_EXPORT CMARK_EXPORT CMARK_DEPRECATED #endif #ifndef CMARK_DEPRECATED_NO_EXPORT # define CMARK_DEPRECATED_NO_EXPORT CMARK_NO_EXPORT CMARK_DEPRECATED #endif #if 0 /* DEFINE_NO_DEPRECATED */ # ifndef CMARK_NO_DEPRECATED # define CMARK_NO_DEPRECATED # endif #endif #endif /* CMARK_EXPORT_H */ cmarkgfm/generated/windows/cmark-gfm_version.h0000644000175000017500000000027414210444330021735 0ustar carstencarsten#ifndef CMARK_GFM_VERSION_H #define CMARK_GFM_VERSION_H #define CMARK_GFM_VERSION ((0 << 24) | (28 << 16) | (3 << 8) | 17) #define CMARK_GFM_VERSION_STRING "0.28.3.gfm.17" #endif cmarkgfm/generated/windows/cmarkextensions_export.h0000644000175000017500000000224714210444330023144 0ustar carstencarsten #ifndef CMARKEXTENSIONS_EXPORT_H #define CMARKEXTENSIONS_EXPORT_H #ifdef CMARKEXTENSIONS_STATIC_DEFINE # define CMARKEXTENSIONS_EXPORT # define CMARKEXTENSIONS_NO_EXPORT #else # ifndef CMARKEXTENSIONS_EXPORT # ifdef libcmark_gfmextensions_EXPORTS /* We are building this library */ # define CMARKEXTENSIONS_EXPORT __declspec(dllexport) # else /* We are using this library */ # define CMARKEXTENSIONS_EXPORT __declspec(dllimport) # endif # endif # ifndef CMARKEXTENSIONS_NO_EXPORT # define CMARKEXTENSIONS_NO_EXPORT # endif #endif #ifndef CMARKEXTENSIONS_DEPRECATED # define CMARKEXTENSIONS_DEPRECATED __declspec(deprecated) #endif #ifndef CMARKEXTENSIONS_DEPRECATED_EXPORT # define CMARKEXTENSIONS_DEPRECATED_EXPORT CMARKEXTENSIONS_EXPORT CMARKEXTENSIONS_DEPRECATED #endif #ifndef CMARKEXTENSIONS_DEPRECATED_NO_EXPORT # define CMARKEXTENSIONS_DEPRECATED_NO_EXPORT CMARKEXTENSIONS_NO_EXPORT CMARKEXTENSIONS_DEPRECATED #endif #if 0 /* DEFINE_NO_DEPRECATED */ # ifndef CMARKEXTENSIONS_NO_DEPRECATED # define CMARKEXTENSIONS_NO_DEPRECATED # endif #endif #endif /* CMARKEXTENSIONS_EXPORT_H */ cmarkgfm/generated/windows/cmark-gfm_export.h0000644000175000017500000000202314210444330021563 0ustar carstencarsten #ifndef CMARK_GFM_EXPORT_H #define CMARK_GFM_EXPORT_H #ifdef CMARK_GFM_STATIC_DEFINE # define CMARK_GFM_EXPORT # define CMARK_GFM_NO_EXPORT #else # ifndef CMARK_GFM_EXPORT # ifdef libcmark_gfm_EXPORTS /* We are building this library */ # define CMARK_GFM_EXPORT __declspec(dllexport) # else /* We are using this library */ # define CMARK_GFM_EXPORT __declspec(dllimport) # endif # endif # ifndef CMARK_GFM_NO_EXPORT # define CMARK_GFM_NO_EXPORT # endif #endif #ifndef CMARK_GFM_DEPRECATED # define CMARK_GFM_DEPRECATED __declspec(deprecated) #endif #ifndef CMARK_GFM_DEPRECATED_EXPORT # define CMARK_GFM_DEPRECATED_EXPORT CMARK_GFM_EXPORT CMARK_GFM_DEPRECATED #endif #ifndef CMARK_GFM_DEPRECATED_NO_EXPORT # define CMARK_GFM_DEPRECATED_NO_EXPORT CMARK_GFM_NO_EXPORT CMARK_GFM_DEPRECATED #endif #if 0 /* DEFINE_NO_DEPRECATED */ # ifndef CMARK_GFM_NO_DEPRECATED # define CMARK_GFM_NO_DEPRECATED # endif #endif #endif /* CMARK_GFM_EXPORT_H */ cmarkgfm/generated/windows/config.h0000644000175000017500000000270714210444330017574 0ustar carstencarsten#ifndef CMARK_CONFIG_H #define CMARK_CONFIG_H #ifdef __cplusplus extern "C" { #endif #define HAVE_STDBOOL_H #ifdef HAVE_STDBOOL_H #include #elif !defined(__cplusplus) typedef char bool; #endif /* #undef HAVE___BUILTIN_EXPECT */ /* #undef HAVE___ATTRIBUTE__ */ #ifdef HAVE___ATTRIBUTE__ #define CMARK_ATTRIBUTE(list) __attribute__ (list) #else #define CMARK_ATTRIBUTE(list) #endif #ifndef CMARK_INLINE #if defined(_MSC_VER) && !defined(__cplusplus) #define CMARK_INLINE __inline #else #define CMARK_INLINE inline #endif #endif /* snprintf and vsnprintf fallbacks for MSVC before 2015, due to Valentin Milea http://stackoverflow.com/questions/2915672/ */ #if defined(_MSC_VER) && _MSC_VER < 1900 #include #include #define snprintf c99_snprintf #define vsnprintf c99_vsnprintf CMARK_INLINE int c99_vsnprintf(char *outBuf, size_t size, const char *format, va_list ap) { int count = -1; if (size != 0) count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap); if (count == -1) count = _vscprintf(format, ap); return count; } CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ...) { int count; va_list ap; va_start(ap, format); count = c99_vsnprintf(outBuf, size, format, ap); va_end(ap); return count; } #endif #ifdef __cplusplus } #endif #endif cmarkgfm/generated/windows/cmark-gfm-extensions_export.h0000644000175000017500000000243314210444330023765 0ustar carstencarsten #ifndef CMARK_GFM_EXTENSIONS_EXPORT_H #define CMARK_GFM_EXTENSIONS_EXPORT_H #ifdef CMARK_GFM_EXTENSIONS_STATIC_DEFINE # define CMARK_GFM_EXTENSIONS_EXPORT # define CMARK_GFM_EXTENSIONS_NO_EXPORT #else # ifndef CMARK_GFM_EXTENSIONS_EXPORT # ifdef libcmark_gfm_extensions_EXPORTS /* We are building this library */ # define CMARK_GFM_EXTENSIONS_EXPORT __declspec(dllexport) # else /* We are using this library */ # define CMARK_GFM_EXTENSIONS_EXPORT __declspec(dllimport) # endif # endif # ifndef CMARK_GFM_EXTENSIONS_NO_EXPORT # define CMARK_GFM_EXTENSIONS_NO_EXPORT # endif #endif #ifndef CMARK_GFM_EXTENSIONS_DEPRECATED # define CMARK_GFM_EXTENSIONS_DEPRECATED __declspec(deprecated) #endif #ifndef CMARK_GFM_EXTENSIONS_DEPRECATED_EXPORT # define CMARK_GFM_EXTENSIONS_DEPRECATED_EXPORT CMARK_GFM_EXTENSIONS_EXPORT CMARK_GFM_EXTENSIONS_DEPRECATED #endif #ifndef CMARK_GFM_EXTENSIONS_DEPRECATED_NO_EXPORT # define CMARK_GFM_EXTENSIONS_DEPRECATED_NO_EXPORT CMARK_GFM_EXTENSIONS_NO_EXPORT CMARK_GFM_EXTENSIONS_DEPRECATED #endif #if 0 /* DEFINE_NO_DEPRECATED */ # ifndef CMARK_GFM_EXTENSIONS_NO_DEPRECATED # define CMARK_GFM_EXTENSIONS_NO_DEPRECATED # endif #endif #endif /* CMARK_GFM_EXTENSIONS_EXPORT_H */ cmarkgfm/generated/windows/cmark_version.h0000644000175000017500000000031214210444330021157 0ustar carstencarsten#ifndef CMARK_VERSION_H #define CMARK_VERSION_H #define CMARK_VERSION ((0 << 24) | (28 << 16) | (3 << 8) | 12) #define CMARK_VERSION_STRING "0.28.3.gfm.12" #define CMARK_GFM_VERSION 12 #endif cmarkgfm/setup.cfg0000644000175000017500000000003214210444330014334 0ustar carstencarsten[bdist_wheel] universal=0 cmarkgfm/.gitmodules0000644000175000017500000000014114210444330014671 0ustar carstencarsten[submodule "third_party/cmark"] path = third_party/cmark url = https://github.com/github/cmark cmarkgfm/README.rst0000644000175000017500000001432414210444330014213 0ustar carstencarstencmarkgfm - Python bindings to GitHub's cmark ============================================ Minimalist Python bindings to GitHub's fork of cmark. Installation ------------ This package is published on PyPI as `cmarkgfm `__ and can be installed with `pip` or `pipenv`:: pip install --user cmarkgfm pipenv install cmarkgfm Wheels are provided for macOS, Linux, and Windows for Python 3.6, 3.7, 3.8, 3.9 and 3.10. Usage ----- High-level usage is really straightforward. To render normal CommonMark markdown: .. code-block:: python import cmarkgfm html = cmarkgfm.markdown_to_html(markdown_text) To render GitHub-flavored markdown: .. code-block:: python import cmarkgfm html = cmarkgfm.github_flavored_markdown_to_html(markdown_text) Advanced Usage -------------- **Options** Both rendering methods ``markdown_to_html`` and ``github_flavored_markdown_to_html`` have an optional ``options`` argument that can be used to activate `options of cmark `_. For example: .. code-block:: python import cmarkgfm from cmarkgfm.cmark import Options as cmarkgfmOptions options = ( cmarkgfmOptions.CMARK_OPT_GITHUB_PRE_LANG | cmarkgfmOptions.CMARK_OPT_SMART ) html = cmarkgfm.markdown_to_html(markdown_text, options) The options are: +-----------------------------------------+----------------------------------------------------+ | Option | Effect | +=========================================+====================================================+ | CMARK_OPT_UNSAFE (>=0.5.0) | Allows rendering unsafe HTML and links. | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_SAFE (<0.5.0) | Prevents rendering unsafe HTML and links. | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_SMART | Render curly quotes, en/em-dashes, ellipses | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_NORMALIZE | Consolidate adjacent text nodes. | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_HARDBREAKS | Renders line breaks within paragraphs as ``
`` | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_NOBREAKS | Render soft line breaks as spaces. | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_SOURCEPOS | Adds ``data-sourcepos`` to HTML tags indicating | | | the corresponding line/col ranges in the input | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_FOOTNOTES | Parse footnotes. | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_VALIDATE_UTF8 | Validate UTF\-8 in the input before parsing, | | | replacing illegal sequenceswith the replacement | | | character U+FFFD. | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_GITHUB_PRE_LANG | Use GitHub\-style tags for code blocks. | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_LIBERAL_HTML_TAG | Be liberal in interpreting inline HTML tags. | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE | Only parse strikethroughs if surrounded by exactly | | | 2 tildes. Gives some compatibility with redcarpet. | +-----------------------------------------+----------------------------------------------------+ | CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES | Use style attributes to align table cells instead | | | of align attributes. | +-----------------------------------------+----------------------------------------------------+ **Unsafe rendering** Since version 0.5.0, the default behavior is safe. In earlier versions, the default behavior is unsafe, as described below. To render potentially unsafe HTML since 0.5.0 pass the ``CMARK_OPT_UNSAFE`` option. CommonMark can render potentially unsafe HTML, including raw HTML, raw Javascript, and potentially unsafe links (including links that run scripts). Although ``github_flavored_markdown_to_html`` prevents some raw HTML tags (including ``script``) from being rendered, it does not block unsafe URLs in links. Therefore it is recommend to call the rendering method with the SAFE option turned on. The safe option does not render raw HTML or potentially dangerous URLs. (Raw HTML is replaced by a placeholder comment; potentially dangerous URLs are replaced by empty strings.) Dangerous URLs are those that begin with ``javascript:``, ``vbscript:``, ``file:``, or ``data:`` (except for ``image/png``, ``image/gif``, ``image/jpeg``, or ``image/webp`` mime types) To do this, use: .. code-block:: python # cmarkgfm<0.5.0 import cmarkgfm from cmarkgfm.cmark import Options as cmarkgfmOptions html = cmarkgfm.markdown_to_html(markdown_text, options=cmarkgfmOptions.CMARK_OPT_SAFE) # or html = cmarkgfm.github_flavored_markdown_to_html(markdown_text, options=cmarkgfmOptions.CMARK_OPT_SAFE) If you trust the markdown text to not include any unsafe tags and links, then you may skip this. Contributing ------------ Pull requests are welcome. :) License ------- This project is under the MIT License. It includes components under differing copyright under the ``third_party`` directory in this source tree. cmarkgfm/.coveragerc0000644000175000017500000000041414210444330014640 0ustar carstencarsten[run] branch = True omit = # Skip cffi builders */build_*.py source = cmarkgfm tests/ [paths] source = src/cmarkgfm .nox/*/lib/python*/site-packages/cmarkgfm [report] exclude_lines = pragma: no cover show_missing = True cmarkgfm/.github/0000755000175000017500000000000014210444330014060 5ustar carstencarstencmarkgfm/.github/workflows/0000755000175000017500000000000014210444330016115 5ustar carstencarstencmarkgfm/.github/workflows/nox-test.yaml0000644000175000017500000000302014210444330020555 0ustar carstencarsten name: Nox Tests on: push: pull_request: types: [opened, reopened, edited, synchronize] jobs: lint: runs-on: ubuntu-latest name: Lint steps: - uses: actions/checkout@v2 - name: Setup git submodule run: git submodule update --init --recursive - name: Setup python uses: actions/setup-python@v2 with: python-version: '3.9' architecture: x64 - name: Setup virtualenv run: | # Always install nox into Python 3, regardless of the Python version used. python3 -m venv noxenv noxenv/bin/pip install nox - name: Run lint run: | export NOXSESSION="lint" noxenv/bin/nox test: runs-on: ubuntu-latest strategy: fail-fast: false matrix: python-version: [ '3.6', '3.7', '3.8', '3.9', '3.10' ] name: Test Python ${{ matrix.python-version }} steps: - uses: actions/checkout@v2 - name: Setup git submodule run: git submodule update --init --recursive - name: Setup python uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} architecture: x64 - name: Setup virtualenv run: | # Always install nox into Python 3, regardless of the Python version used. python3 -m venv noxenv noxenv/bin/pip install nox - name: Run nox tests run: | export NOXSESSION="unit-${{ matrix.python-version }}" noxenv/bin/nox cmarkgfm/.github/workflows/pypi-publish.yml0000644000175000017500000000427714210444330021277 0ustar carstencarsten# This workflows will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: Build Wheels on: release: types: [created, edited] push: pull_request: types: [opened, reopened, edited, synchronize] jobs: build_wheels: name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-20.04, windows-2019, macos-10.15] steps: - uses: actions/checkout@v2 - name: Setup git submodule run: git submodule update --init --recursive - name: Checkout cmark-gfm uses: actions/checkout@v2 with: repository: github/cmark-gfm path: cmark-gfm - uses: actions/setup-python@v2 name: Install Python with: python-version: '3.9' - name: Install build deps run: | python -m pip --disable-pip-version-check install cibuildwheel==2.3.0 twine==3.7.1 - uses: docker/setup-qemu-action@v1 if: runner.os == 'Linux' name: Set up QEMU - name: Build wheels run: | python -m cibuildwheel --output-dir wheelhouse twine check ./wheelhouse/*.whl - name: Build sdist if: runner.os == 'Linux' run: | python setup.py sdist twine check ./dist/*.tar.gz - name: Publish sdist env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} if: ${{ github.event_name == 'release' && runner.os == 'Linux' && env.TWINE_USERNAME != null }} run: | twine upload --skip-existing ./dist/* - uses: actions/upload-artifact@v2 with: path: | ./wheelhouse/*.whl ./dist/*.tar.gz - name: Publish wheels env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} if: ${{ github.event_name == 'release' && env.TWINE_USERNAME != null }} run: | twine upload --skip-existing ./wheelhouse/* cmarkgfm/pyproject.toml0000644000175000017500000000065414210444330015441 0ustar carstencarsten[tool.cibuildwheel] build = "*" skip = "pp*" test-skip = "" archs = ["auto"] [tool.cibuildwheel.macos] archs = ["auto", "arm64"] [tool.cibuildwheel.linux] archs = ["auto", "aarch64"] before-all = "yum -y update && yum install -y libffi-devel" [[tool.cibuildwheel.overrides]] select = "*-musllinux*" before-all = "apk add libffi-dev" [[tool.cibuildwheel.overrides]] select = "*-musllinux*" before-all = "apk add libffi-dev" cmarkgfm/LICENSE.txt0000644000175000017500000000210714210444330014343 0ustar carstencarstenCopyright (c) 2018 Thea Flowers, The Python Packaging Authority (PyPA) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. cmarkgfm/noxfile.py0000644000175000017500000000263214210444330014541 0ustar carstencarstenimport os import platform import shutil import nox @nox.session(py=['3.6', '3.7', '3.8', '3.9', '3.10']) def unit(session): session.install('pytest', 'pytest-cov') session.install('.') session.run( 'pytest', '--cov-report', '', '--cov', 'cmarkgfm', '--cov', 'tests', 'tests', *session.posargs) session.run('coverage', 'report', '--show-missing') @nox.session def lint(session): session.install('flake8') session.run('flake8', 'src/cmarkgfm', 'tests') session.install('readme_renderer') session.run('python', 'setup.py', 'check', '-m', '-r', '-s') @nox.session(py=False) def regenerate(session): """Regenerates header files for cmark under ./generated.""" if platform.system() == 'Windows': output_dir = '../generated/windows' else: output_dir = '../generated/unix' session.run(shutil.rmtree, 'build', ignore_errors=True) session.run(os.makedirs, 'build') session.chdir('build') session.run('cmake', '../third_party/cmark') session.run(shutil.copy, 'src/cmark-gfm_export.h', output_dir) session.run(shutil.copy, 'src/cmark-gfm_version.h', output_dir) session.run(shutil.copy, 'src/config.h', output_dir) session.run(shutil.copy, 'extensions/cmark-gfm-extensions_export.h', output_dir) session.chdir('..') session.run(shutil.rmtree, 'build') cmarkgfm/third_party/0000755000175000017500000000000014210444330015051 5ustar carstencarstencmarkgfm/third_party/cmark/0000755000175000017500000000000014210444464016156 5ustar carstencarstencmarkgfm/third_party/cmark/src/0000755000175000017500000000000014210444464016745 5ustar carstencarstencmarkgfm/third_party/cmark/src/cmark-gfm-extension_api.h0000644000175000017500000006505314210444464023636 0ustar carstencarsten#ifndef CMARK_GFM_EXTENSION_API_H #define CMARK_GFM_EXTENSION_API_H #ifdef __cplusplus extern "C" { #endif #include "cmark-gfm.h" struct cmark_renderer; struct cmark_html_renderer; struct cmark_chunk; /** * ## Extension Support * * While the "core" of libcmark is strictly compliant with the * specification, an API is provided for extension writers to * hook into the parsing process. * * It should be noted that the cmark_node API already offers * room for customization, with methods offered to traverse and * modify the AST, and even define custom blocks. * When the desired customization is achievable in an error-proof * way using that API, it should be the preferred method. * * The following API requires a more in-depth understanding * of libcmark's parsing strategy, which is exposed * [here](http://spec.commonmark.org/0.24/#appendix-a-parsing-strategy). * * It should be used when "a posteriori" modification of the AST * proves to be too difficult / impossible to implement correctly. * * It can also serve as an intermediary step before extending * the specification, as an extension implemented using this API * will be trivially integrated in the core if it proves to be * desirable. */ typedef struct cmark_plugin cmark_plugin; /** A syntax extension that can be attached to a cmark_parser * with cmark_parser_attach_syntax_extension(). * * Extension writers should assign functions matching * the signature of the following 'virtual methods' to * implement new functionality. * * Their calling order and expected behaviour match the procedure outlined * at : * * During step 1, cmark will call the function provided through * 'cmark_syntax_extension_set_match_block_func' when it * iterates over an open block created by this extension, * to determine whether it could contain the new line. * If no function was provided, cmark will close the block. * * During step 2, if and only if the new line doesn't match any * of the standard syntax rules, cmark will call the function * provided through 'cmark_syntax_extension_set_open_block_func' * to let the extension determine whether that new line matches * one of its syntax rules. * It is the responsibility of the parser to create and add the * new block with cmark_parser_make_block and cmark_parser_add_child. * If no function was provided is NULL, the extension will have * no effect at all on the final block structure of the AST. * * #### Inline parsing phase hooks * * For each character provided by the extension through * 'cmark_syntax_extension_set_special_inline_chars', * the function provided by the extension through * 'cmark_syntax_extension_set_match_inline_func' * will get called, it is the responsibility of the extension * to scan the characters located at the current inline parsing offset * with the cmark_inline_parser API. * * Depending on the type of the extension, it can either: * * * Scan forward, determine that the syntax matches and return * a newly-created inline node with the appropriate type. * This is the technique that would be used if inline code * (with backticks) was implemented as an extension. * * Scan only the character(s) that its syntax rules require * for opening and closing nodes, push a delimiter on the * delimiter stack, and return a simple text node with its * contents set to the character(s) consumed. * This is the technique that would be used if emphasis * inlines were implemented as an extension. * * When an extension has pushed delimiters on the stack, * the function provided through * 'cmark_syntax_extension_set_inline_from_delim_func' * will get called in a latter phase, * when the inline parser has matched opener and closer delimiters * created by the extension together. * * It is then the responsibility of the extension to modify * and populate the opener inline text node, and to remove * the necessary delimiters from the delimiter stack. * * Finally, the extension should return NULL if its scan didn't * match its syntax rules. * * The extension can store whatever private data it might need * with 'cmark_syntax_extension_set_private', * and optionally define a free function for this data. */ typedef struct subject cmark_inline_parser; /** Exposed raw for now */ typedef struct delimiter { struct delimiter *previous; struct delimiter *next; cmark_node *inl_text; bufsize_t length; unsigned char delim_char; int can_open; int can_close; } delimiter; /** * ### Plugin API. * * Extensions should be distributed as dynamic libraries, * with a single exported function named after the distributed * filename. * * When discovering extensions (see cmark_init), cmark will * try to load a symbol named "init_{{filename}}" in all the * dynamic libraries it encounters. * * For example, given a dynamic library named myextension.so * (or myextension.dll), cmark will try to load the symbol * named "init_myextension". This means that the filename * must lend itself to forming a valid C identifier, with * the notable exception of dashes, which will be translated * to underscores, which means cmark will look for a function * named "init_my_extension" if it encounters a dynamic library * named "my-extension.so". * * See the 'cmark_plugin_init_func' typedef for the exact prototype * this function should follow. * * For now the extensibility of cmark is not complete, as * it only offers API to hook into the block parsing phase * (). * * See 'cmark_plugin_register_syntax_extension' for more information. */ /** The prototype plugins' init function should follow. */ typedef int (*cmark_plugin_init_func)(cmark_plugin *plugin); /** Register a syntax 'extension' with the 'plugin', it will be made * available as an extension and, if attached to a cmark_parser * with 'cmark_parser_attach_syntax_extension', it will contribute * to the block parsing process. * * See the documentation for 'cmark_syntax_extension' for information * on how to implement one. * * This function will typically be called from the init function * of external modules. * * This takes ownership of 'extension', one should not call * 'cmark_syntax_extension_free' on a registered extension. */ CMARK_GFM_EXPORT int cmark_plugin_register_syntax_extension(cmark_plugin *plugin, cmark_syntax_extension *extension); /** This will search for the syntax extension named 'name' among the * registered syntax extensions. * * It can then be attached to a cmark_parser * with the cmark_parser_attach_syntax_extension method. */ CMARK_GFM_EXPORT cmark_syntax_extension *cmark_find_syntax_extension(const char *name); /** Should create and add a new open block to 'parent_container' if * 'input' matches a syntax rule for that block type. It is allowed * to modify the type of 'parent_container'. * * Should return the newly created block if there is one, or * 'parent_container' if its type was modified, or NULL. */ typedef cmark_node * (*cmark_open_block_func) (cmark_syntax_extension *extension, int indented, cmark_parser *parser, cmark_node *parent_container, unsigned char *input, int len); typedef cmark_node *(*cmark_match_inline_func)(cmark_syntax_extension *extension, cmark_parser *parser, cmark_node *parent, unsigned char character, cmark_inline_parser *inline_parser); typedef delimiter *(*cmark_inline_from_delim_func)(cmark_syntax_extension *extension, cmark_parser *parser, cmark_inline_parser *inline_parser, delimiter *opener, delimiter *closer); /** Should return 'true' if 'input' can be contained in 'container', * 'false' otherwise. */ typedef int (*cmark_match_block_func) (cmark_syntax_extension *extension, cmark_parser *parser, unsigned char *input, int len, cmark_node *container); typedef const char *(*cmark_get_type_string_func) (cmark_syntax_extension *extension, cmark_node *node); typedef int (*cmark_can_contain_func) (cmark_syntax_extension *extension, cmark_node *node, cmark_node_type child); typedef int (*cmark_contains_inlines_func) (cmark_syntax_extension *extension, cmark_node *node); typedef void (*cmark_common_render_func) (cmark_syntax_extension *extension, struct cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options); typedef int (*cmark_commonmark_escape_func) (cmark_syntax_extension *extension, cmark_node *node, int c); typedef const char* (*cmark_xml_attr_func) (cmark_syntax_extension *extension, cmark_node *node); typedef void (*cmark_html_render_func) (cmark_syntax_extension *extension, struct cmark_html_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options); typedef int (*cmark_html_filter_func) (cmark_syntax_extension *extension, const unsigned char *tag, size_t tag_len); typedef cmark_node *(*cmark_postprocess_func) (cmark_syntax_extension *extension, cmark_parser *parser, cmark_node *root); typedef int (*cmark_ispunct_func) (char c); typedef void (*cmark_opaque_alloc_func) (cmark_syntax_extension *extension, cmark_mem *mem, cmark_node *node); typedef void (*cmark_opaque_free_func) (cmark_syntax_extension *extension, cmark_mem *mem, cmark_node *node); /** Free a cmark_syntax_extension. */ CMARK_GFM_EXPORT void cmark_syntax_extension_free (cmark_mem *mem, cmark_syntax_extension *extension); /** Return a newly-constructed cmark_syntax_extension, named 'name'. */ CMARK_GFM_EXPORT cmark_syntax_extension *cmark_syntax_extension_new (const char *name); CMARK_GFM_EXPORT cmark_node_type cmark_syntax_extension_add_node(int is_inline); CMARK_GFM_EXPORT void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension, int emphasis); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension, cmark_open_block_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension, cmark_match_block_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension, cmark_match_inline_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension, cmark_inline_from_delim_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension, cmark_llist *special_chars); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension, cmark_get_type_string_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension, cmark_can_contain_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension, cmark_contains_inlines_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_xml_attr_func(cmark_syntax_extension *extension, cmark_xml_attr_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension, cmark_html_render_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension, cmark_html_filter_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension, cmark_commonmark_escape_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, void *priv, cmark_free_func free_func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension, cmark_postprocess_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_opaque_alloc_func(cmark_syntax_extension *extension, cmark_opaque_alloc_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension, cmark_opaque_free_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser, cmark_ispunct_func func); /** Return the index of the line currently being parsed, starting with 1. */ CMARK_GFM_EXPORT int cmark_parser_get_line_number(cmark_parser *parser); /** Return the offset in bytes in the line being processed. * * Example: * * ### foo * * Here, offset will first be 0, then 5 (the index of the 'f' character). */ CMARK_GFM_EXPORT int cmark_parser_get_offset(cmark_parser *parser); /** * Return the offset in 'columns' in the line being processed. * * This value may differ from the value returned by * cmark_parser_get_offset() in that it accounts for tabs, * and as such should not be used as an index in the current line's * buffer. * * Example: * * cmark_parser_advance_offset() can be called to advance the * offset by a number of columns, instead of a number of bytes. * * In that case, if offset falls "in the middle" of a tab * character, 'column' and offset will differ. * * ``` * foo \t bar * ^ ^^ * offset (0) 20 * ``` * * If cmark_parser_advance_offset is called here with 'columns' * set to 'true' and 'offset' set to 22, cmark_parser_get_offset() * will return 20, whereas cmark_parser_get_column() will return * 22. * * Additionally, as tabs expand to the next multiple of 4 column, * cmark_parser_has_partially_consumed_tab() will now return * 'true'. */ CMARK_GFM_EXPORT int cmark_parser_get_column(cmark_parser *parser); /** Return the absolute index in bytes of the first nonspace * character coming after the offset as returned by * cmark_parser_get_offset() in the line currently being processed. * * Example: * * ``` * foo bar baz \n * ^ ^ ^ * 0 offset (16) first_nonspace (28) * ``` */ CMARK_GFM_EXPORT int cmark_parser_get_first_nonspace(cmark_parser *parser); /** Return the absolute index of the first nonspace column coming after 'offset' * in the line currently being processed, counting tabs as multiple * columns as appropriate. * * See the documentation for cmark_parser_get_first_nonspace() and * cmark_parser_get_column() for more information. */ CMARK_GFM_EXPORT int cmark_parser_get_first_nonspace_column(cmark_parser *parser); /** Return the difference between the values returned by * cmark_parser_get_first_nonspace_column() and * cmark_parser_get_column(). * * This is not a byte offset, as it can count one tab as multiple * characters. */ CMARK_GFM_EXPORT int cmark_parser_get_indent(cmark_parser *parser); /** Return 'true' if the line currently being processed has been entirely * consumed, 'false' otherwise. * * Example: * * ``` * foo bar baz \n * ^ * offset * ``` * * This function will return 'false' here. * * ``` * foo bar baz \n * ^ * offset * ``` * This function will still return 'false'. * * ``` * foo bar baz \n * ^ * offset * ``` * * At this point, this function will now return 'true'. */ CMARK_GFM_EXPORT int cmark_parser_is_blank(cmark_parser *parser); /** Return 'true' if the value returned by cmark_parser_get_offset() * is 'inside' an expanded tab. * * See the documentation for cmark_parser_get_column() for more * information. */ CMARK_GFM_EXPORT int cmark_parser_has_partially_consumed_tab(cmark_parser *parser); /** Return the length in bytes of the previously processed line, excluding potential * newline (\n) and carriage return (\r) trailing characters. */ CMARK_GFM_EXPORT int cmark_parser_get_last_line_length(cmark_parser *parser); /** Add a child to 'parent' during the parsing process. * * If 'parent' isn't the kind of node that can accept this child, * this function will back up till it hits a node that can, closing * blocks as appropriate. */ CMARK_GFM_EXPORT cmark_node*cmark_parser_add_child(cmark_parser *parser, cmark_node *parent, cmark_node_type block_type, int start_column); /** Advance the 'offset' of the parser in the current line. * * See the documentation of cmark_parser_get_offset() and * cmark_parser_get_column() for more information. */ CMARK_GFM_EXPORT void cmark_parser_advance_offset(cmark_parser *parser, const char *input, int count, int columns); CMARK_GFM_EXPORT void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len); /** Attach the syntax 'extension' to the 'parser', to provide extra syntax * rules. * See the documentation for cmark_syntax_extension for more information. * * Returns 'true' if the 'extension' was successfully attached, * 'false' otherwise. */ CMARK_GFM_EXPORT int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension); /** Change the type of 'node'. * * Return 0 if the type could be changed, 1 otherwise. */ CMARK_GFM_EXPORT int cmark_node_set_type(cmark_node *node, cmark_node_type type); /** Return the string content for all types of 'node'. * The pointer stays valid as long as 'node' isn't freed. */ CMARK_GFM_EXPORT const char *cmark_node_get_string_content(cmark_node *node); /** Set the string 'content' for all types of 'node'. * Copies 'content'. */ CMARK_GFM_EXPORT int cmark_node_set_string_content(cmark_node *node, const char *content); /** Get the syntax extension responsible for the creation of 'node'. * Return NULL if 'node' was created because it matched standard syntax rules. */ CMARK_GFM_EXPORT cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node); /** Set the syntax extension responsible for creating 'node'. */ CMARK_GFM_EXPORT int cmark_node_set_syntax_extension(cmark_node *node, cmark_syntax_extension *extension); /** * ## Inline syntax extension helpers * * The inline parsing process is described in detail at * */ /** Should return 'true' if the predicate matches 'c', 'false' otherwise */ typedef int (*cmark_inline_predicate)(int c); /** Advance the current inline parsing offset */ CMARK_GFM_EXPORT void cmark_inline_parser_advance_offset(cmark_inline_parser *parser); /** Get the current inline parsing offset */ CMARK_GFM_EXPORT int cmark_inline_parser_get_offset(cmark_inline_parser *parser); /** Set the offset in bytes in the chunk being processed by the given inline parser. */ CMARK_GFM_EXPORT void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset); /** Gets the cmark_chunk being operated on by the given inline parser. * Use cmark_inline_parser_get_offset to get our current position in the chunk. */ CMARK_GFM_EXPORT struct cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser); /** Returns 1 if the inline parser is currently in a bracket; pass 1 for 'image' * if you want to know about an image-type bracket, 0 for link-type. */ CMARK_GFM_EXPORT int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image); /** Remove the last n characters from the last child of the given node. * This only works where all n characters are in the single last child, and the last * child is CMARK_NODE_TEXT. */ CMARK_GFM_EXPORT void cmark_node_unput(cmark_node *node, int n); /** Get the character located at the current inline parsing offset */ CMARK_GFM_EXPORT unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser); /** Get the character located 'pos' bytes in the current line. */ CMARK_GFM_EXPORT unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, int pos); /** Whether the inline parser has reached the end of the current line */ CMARK_GFM_EXPORT int cmark_inline_parser_is_eof(cmark_inline_parser *parser); /** Get the characters located after the current inline parsing offset * while 'pred' matches. Free after usage. */ CMARK_GFM_EXPORT char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred); /** Push a delimiter on the delimiter stack. * See < for * more information on the parameters */ CMARK_GFM_EXPORT void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser, unsigned char c, int can_open, int can_close, cmark_node *inl_text); /** Remove 'delim' from the delimiter stack */ CMARK_GFM_EXPORT void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim); CMARK_GFM_EXPORT delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser); CMARK_GFM_EXPORT int cmark_inline_parser_get_line(cmark_inline_parser *parser); CMARK_GFM_EXPORT int cmark_inline_parser_get_column(cmark_inline_parser *parser); /** Convenience function to scan a given delimiter. * * 'left_flanking' and 'right_flanking' will be set to true if they * respectively precede and follow a non-space, non-punctuation * character. * * Additionally, 'punct_before' and 'punct_after' will respectively be set * if the preceding or following character is a punctuation character. * * Note that 'left_flanking' and 'right_flanking' can both be 'true'. * * Returns the number of delimiters encountered, in the limit * of 'max_delims', and advances the inline parsing offset. */ CMARK_GFM_EXPORT int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, int max_delims, unsigned char c, int *left_flanking, int *right_flanking, int *punct_before, int *punct_after); CMARK_GFM_EXPORT void cmark_manage_extensions_special_characters(cmark_parser *parser, int add); CMARK_GFM_EXPORT cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser); CMARK_GFM_EXPORT void cmark_arena_push(void); CMARK_GFM_EXPORT int cmark_arena_pop(void); #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/src/registry.c0000644000175000017500000000307614210444464020767 0ustar carstencarsten#include #include #include #include "config.h" #include "cmark-gfm.h" #include "syntax_extension.h" #include "registry.h" #include "plugin.h" extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; static cmark_llist *syntax_extensions = NULL; void cmark_register_plugin(cmark_plugin_init_func reg_fn) { cmark_plugin *plugin = cmark_plugin_new(); if (!reg_fn(plugin)) { cmark_plugin_free(plugin); return; } cmark_llist *syntax_extensions_list = cmark_plugin_steal_syntax_extensions(plugin), *it; for (it = syntax_extensions_list; it; it = it->next) { syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions, it->data); } cmark_llist_free(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions_list); cmark_plugin_free(plugin); } void cmark_release_plugins(void) { if (syntax_extensions) { cmark_llist_free_full( &CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions, (cmark_free_func) cmark_syntax_extension_free); syntax_extensions = NULL; } } cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem) { cmark_llist *it; cmark_llist *res = NULL; for (it = syntax_extensions; it; it = it->next) { res = cmark_llist_append(mem, res, it->data); } return res; } cmark_syntax_extension *cmark_find_syntax_extension(const char *name) { cmark_llist *tmp; for (tmp = syntax_extensions; tmp; tmp = tmp->next) { cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; if (!strcmp(ext->name, name)) return ext; } return NULL; } cmarkgfm/third_party/cmark/src/latex.c0000644000175000017500000002534314210444464020235 0ustar carstencarsten#include #include #include #include #include "config.h" #include "cmark-gfm.h" #include "node.h" #include "buffer.h" #include "utf8.h" #include "scanners.h" #include "render.h" #include "syntax_extension.h" #define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping) #define LIT(s) renderer->out(renderer, node, s, false, LITERAL) #define CR() renderer->cr(renderer) #define BLANKLINE() renderer->blankline(renderer) #define LIST_NUMBER_STRING_SIZE 20 static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, cmark_escaping escape, int32_t c, unsigned char nextc) { if (escape == LITERAL) { cmark_render_code_point(renderer, c); return; } switch (c) { case 123: // '{' case 125: // '}' case 35: // '#' case 37: // '%' case 38: // '&' cmark_render_ascii(renderer, "\\"); cmark_render_code_point(renderer, c); break; case 36: // '$' case 95: // '_' if (escape == NORMAL) { cmark_render_ascii(renderer, "\\"); } cmark_render_code_point(renderer, c); break; case 45: // '-' if (nextc == 45) { // prevent ligature cmark_render_ascii(renderer, "-{}"); } else { cmark_render_ascii(renderer, "-"); } break; case 126: // '~' if (escape == NORMAL) { cmark_render_ascii(renderer, "\\textasciitilde{}"); } else { cmark_render_code_point(renderer, c); } break; case 94: // '^' cmark_render_ascii(renderer, "\\^{}"); break; case 92: // '\\' if (escape == URL) { // / acts as path sep even on windows: cmark_render_ascii(renderer, "/"); } else { cmark_render_ascii(renderer, "\\textbackslash{}"); } break; case 124: // '|' cmark_render_ascii(renderer, "\\textbar{}"); break; case 60: // '<' cmark_render_ascii(renderer, "\\textless{}"); break; case 62: // '>' cmark_render_ascii(renderer, "\\textgreater{}"); break; case 91: // '[' case 93: // ']' cmark_render_ascii(renderer, "{"); cmark_render_code_point(renderer, c); cmark_render_ascii(renderer, "}"); break; case 34: // '"' cmark_render_ascii(renderer, "\\textquotedbl{}"); // requires \usepackage[T1]{fontenc} break; case 39: // '\'' cmark_render_ascii(renderer, "\\textquotesingle{}"); // requires \usepackage{textcomp} break; case 160: // nbsp cmark_render_ascii(renderer, "~"); break; case 8230: // hellip cmark_render_ascii(renderer, "\\ldots{}"); break; case 8216: // lsquo if (escape == NORMAL) { cmark_render_ascii(renderer, "`"); } else { cmark_render_code_point(renderer, c); } break; case 8217: // rsquo if (escape == NORMAL) { cmark_render_ascii(renderer, "\'"); } else { cmark_render_code_point(renderer, c); } break; case 8220: // ldquo if (escape == NORMAL) { cmark_render_ascii(renderer, "``"); } else { cmark_render_code_point(renderer, c); } break; case 8221: // rdquo if (escape == NORMAL) { cmark_render_ascii(renderer, "''"); } else { cmark_render_code_point(renderer, c); } break; case 8212: // emdash if (escape == NORMAL) { cmark_render_ascii(renderer, "---"); } else { cmark_render_code_point(renderer, c); } break; case 8211: // endash if (escape == NORMAL) { cmark_render_ascii(renderer, "--"); } else { cmark_render_code_point(renderer, c); } break; default: cmark_render_code_point(renderer, c); } } typedef enum { NO_LINK, URL_AUTOLINK, EMAIL_AUTOLINK, NORMAL_LINK, INTERNAL_LINK } link_type; static link_type get_link_type(cmark_node *node) { size_t title_len, url_len; cmark_node *link_text; char *realurl; int realurllen; bool isemail = false; if (node->type != CMARK_NODE_LINK) { return NO_LINK; } const char *url = cmark_node_get_url(node); cmark_chunk url_chunk = cmark_chunk_literal(url); if (url && *url == '#') { return INTERNAL_LINK; } url_len = strlen(url); if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) { return NO_LINK; } const char *title = cmark_node_get_title(node); title_len = strlen(title); // if it has a title, we can't treat it as an autolink: if (title_len == 0) { link_text = node->first_child; cmark_consolidate_text_nodes(link_text); if (!link_text) return NO_LINK; realurl = (char *)url; realurllen = (int)url_len; if (strncmp(realurl, "mailto:", 7) == 0) { realurl += 7; realurllen -= 7; isemail = true; } if (realurllen == link_text->as.literal.len && strncmp(realurl, (char *)link_text->as.literal.data, link_text->as.literal.len) == 0) { if (isemail) { return EMAIL_AUTOLINK; } else { return URL_AUTOLINK; } } } return NORMAL_LINK; } static int S_get_enumlevel(cmark_node *node) { int enumlevel = 0; cmark_node *tmp = node; while (tmp) { if (tmp->type == CMARK_NODE_LIST && cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) { enumlevel++; } tmp = tmp->parent; } return enumlevel; } static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { int list_number; int enumlevel; char list_number_string[LIST_NUMBER_STRING_SIZE]; bool entering = (ev_type == CMARK_EVENT_ENTER); cmark_list_type list_type; bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); if (node->extension && node->extension->latex_render_func) { node->extension->latex_render_func(node->extension, renderer, node, ev_type, options); return 1; } switch (node->type) { case CMARK_NODE_DOCUMENT: break; case CMARK_NODE_BLOCK_QUOTE: if (entering) { LIT("\\begin{quote}"); CR(); } else { LIT("\\end{quote}"); BLANKLINE(); } break; case CMARK_NODE_LIST: list_type = cmark_node_get_list_type(node); if (entering) { LIT("\\begin{"); LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize"); LIT("}"); CR(); list_number = cmark_node_get_list_start(node); if (list_number > 1) { enumlevel = S_get_enumlevel(node); // latex normally supports only five levels if (enumlevel >= 1 && enumlevel <= 5) { snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d", list_number); LIT("\\setcounter{enum"); switch (enumlevel) { case 1: LIT("i"); break; case 2: LIT("ii"); break; case 3: LIT("iii"); break; case 4: LIT("iv"); break; case 5: LIT("v"); break; default: LIT("i"); break; } LIT("}{"); OUT(list_number_string, false, NORMAL); LIT("}"); } CR(); } } else { LIT("\\end{"); LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize"); LIT("}"); BLANKLINE(); } break; case CMARK_NODE_ITEM: if (entering) { LIT("\\item "); } else { CR(); } break; case CMARK_NODE_HEADING: if (entering) { switch (cmark_node_get_heading_level(node)) { case 1: LIT("\\section"); break; case 2: LIT("\\subsection"); break; case 3: LIT("\\subsubsection"); break; case 4: LIT("\\paragraph"); break; case 5: LIT("\\subparagraph"); break; } LIT("{"); } else { LIT("}"); BLANKLINE(); } break; case CMARK_NODE_CODE_BLOCK: CR(); LIT("\\begin{verbatim}"); CR(); OUT(cmark_node_get_literal(node), false, LITERAL); CR(); LIT("\\end{verbatim}"); BLANKLINE(); break; case CMARK_NODE_HTML_BLOCK: break; case CMARK_NODE_CUSTOM_BLOCK: CR(); OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), false, LITERAL); CR(); break; case CMARK_NODE_THEMATIC_BREAK: BLANKLINE(); LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}"); BLANKLINE(); break; case CMARK_NODE_PARAGRAPH: if (!entering) { BLANKLINE(); } break; case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; case CMARK_NODE_LINEBREAK: LIT("\\\\"); CR(); break; case CMARK_NODE_SOFTBREAK: if (options & CMARK_OPT_HARDBREAKS) { LIT("\\\\"); CR(); } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { CR(); } else { OUT(" ", allow_wrap, NORMAL); } break; case CMARK_NODE_CODE: LIT("\\texttt{"); OUT(cmark_node_get_literal(node), false, NORMAL); LIT("}"); break; case CMARK_NODE_HTML_INLINE: break; case CMARK_NODE_CUSTOM_INLINE: OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), false, LITERAL); break; case CMARK_NODE_STRONG: if (entering) { LIT("\\textbf{"); } else { LIT("}"); } break; case CMARK_NODE_EMPH: if (entering) { LIT("\\emph{"); } else { LIT("}"); } break; case CMARK_NODE_LINK: if (entering) { const char *url = cmark_node_get_url(node); // requires \usepackage{hyperref} switch (get_link_type(node)) { case URL_AUTOLINK: LIT("\\url{"); OUT(url, false, URL); LIT("}"); return 0; // Don't process further nodes to avoid double-rendering artefacts case EMAIL_AUTOLINK: LIT("\\href{"); OUT(url, false, URL); LIT("}\\nolinkurl{"); break; case NORMAL_LINK: LIT("\\href{"); OUT(url, false, URL); LIT("}{"); break; case INTERNAL_LINK: LIT("\\protect\\hyperlink{"); OUT(url + 1, false, URL); LIT("}{"); break; case NO_LINK: LIT("{"); // error? } } else { LIT("}"); } break; case CMARK_NODE_IMAGE: if (entering) { LIT("\\protect\\includegraphics{"); // requires \include{graphicx} OUT(cmark_node_get_url(node), false, URL); LIT("}"); return 0; } break; case CMARK_NODE_FOOTNOTE_DEFINITION: case CMARK_NODE_FOOTNOTE_REFERENCE: // TODO break; default: assert(false); break; } return 1; } char *cmark_render_latex(cmark_node *root, int options, int width) { return cmark_render_latex_with_mem(root, options, width, cmark_node_mem(root)); } char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { return cmark_render(mem, root, options, width, outc, S_render_node); } cmarkgfm/third_party/cmark/src/render.c0000644000175000017500000001521214210444464020371 0ustar carstencarsten#include #include "buffer.h" #include "chunk.h" #include "cmark-gfm.h" #include "utf8.h" #include "render.h" #include "node.h" #include "syntax_extension.h" static CMARK_INLINE void S_cr(cmark_renderer *renderer) { if (renderer->need_cr < 1) { renderer->need_cr = 1; } } static CMARK_INLINE void S_blankline(cmark_renderer *renderer) { if (renderer->need_cr < 2) { renderer->need_cr = 2; } } static void S_out(cmark_renderer *renderer, cmark_node *node, const char *source, bool wrap, cmark_escaping escape) { int length = (int)strlen(source); unsigned char nextc; int32_t c; int i = 0; int last_nonspace; int len; cmark_chunk remainder = cmark_chunk_literal(""); int k = renderer->buffer->size - 1; cmark_syntax_extension *ext = NULL; cmark_node *n = node; while (n && !ext) { ext = n->extension; if (!ext) n = n->parent; } if (ext && !ext->commonmark_escape_func) ext = NULL; wrap = wrap && !renderer->no_linebreaks; if (renderer->in_tight_list_item && renderer->need_cr > 1) { renderer->need_cr = 1; } while (renderer->need_cr) { if (k < 0 || renderer->buffer->ptr[k] == '\n') { k -= 1; } else { cmark_strbuf_putc(renderer->buffer, '\n'); if (renderer->need_cr > 1) { cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, renderer->prefix->size); } } renderer->column = 0; renderer->last_breakable = 0; renderer->begin_line = true; renderer->begin_content = true; renderer->need_cr -= 1; } while (i < length) { if (renderer->begin_line) { cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, renderer->prefix->size); // note: this assumes prefix is ascii: renderer->column = renderer->prefix->size; } len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c); if (len == -1) { // error condition return; // return without rendering rest of string } if (ext && ext->commonmark_escape_func(ext, node, c)) cmark_strbuf_putc(renderer->buffer, '\\'); nextc = source[i + len]; if (c == 32 && wrap) { if (!renderer->begin_line) { last_nonspace = renderer->buffer->size; cmark_strbuf_putc(renderer->buffer, ' '); renderer->column += 1; renderer->begin_line = false; renderer->begin_content = false; // skip following spaces while (source[i + 1] == ' ') { i++; } // We don't allow breaks that make a digit the first character // because this causes problems with commonmark output. if (!cmark_isdigit(source[i + 1])) { renderer->last_breakable = last_nonspace; } } } else if (escape == LITERAL) { if (c == 10) { cmark_strbuf_putc(renderer->buffer, '\n'); renderer->column = 0; renderer->begin_line = true; renderer->begin_content = true; renderer->last_breakable = 0; } else { cmark_render_code_point(renderer, c); renderer->begin_line = false; // we don't set 'begin_content' to false til we've // finished parsing a digit. Reason: in commonmark // we need to escape a potential list marker after // a digit: renderer->begin_content = renderer->begin_content && cmark_isdigit((char)c) == 1; } } else { (renderer->outc)(renderer, node, escape, c, nextc); renderer->begin_line = false; renderer->begin_content = renderer->begin_content && cmark_isdigit((char)c) == 1; } // If adding the character went beyond width, look for an // earlier place where the line could be broken: if (renderer->width > 0 && renderer->column > renderer->width && !renderer->begin_line && renderer->last_breakable > 0) { // copy from last_breakable to remainder cmark_chunk_set_cstr(renderer->mem, &remainder, (char *)renderer->buffer->ptr + renderer->last_breakable + 1); // truncate at last_breakable cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable); // add newline, prefix, and remainder cmark_strbuf_putc(renderer->buffer, '\n'); cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, renderer->prefix->size); cmark_strbuf_put(renderer->buffer, remainder.data, remainder.len); renderer->column = renderer->prefix->size + remainder.len; cmark_chunk_free(renderer->mem, &remainder); renderer->last_breakable = 0; renderer->begin_line = false; renderer->begin_content = false; } i += len; } } // Assumes no newlines, assumes ascii content: void cmark_render_ascii(cmark_renderer *renderer, const char *s) { int origsize = renderer->buffer->size; cmark_strbuf_puts(renderer->buffer, s); renderer->column += renderer->buffer->size - origsize; } void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) { cmark_utf8proc_encode_char(c, renderer->buffer); renderer->column += 1; } char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, void (*outc)(cmark_renderer *, cmark_node *, cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options)) { cmark_strbuf pref = CMARK_BUF_INIT(mem); cmark_strbuf buf = CMARK_BUF_INIT(mem); cmark_node *cur; cmark_event_type ev_type; char *result; cmark_iter *iter = cmark_iter_new(root); cmark_renderer renderer = {mem, &buf, &pref, 0, width, 0, 0, true, true, false, false, outc, S_cr, S_blankline, S_out, 0}; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (!render_node(&renderer, cur, ev_type, options)) { // a false value causes us to skip processing // the node's contents. this is used for // autolinks. cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT); } } // ensure final newline if (renderer.buffer->size == 0 || renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') { cmark_strbuf_putc(renderer.buffer, '\n'); } result = (char *)cmark_strbuf_detach(renderer.buffer); cmark_iter_free(iter); cmark_strbuf_free(renderer.prefix); cmark_strbuf_free(renderer.buffer); return result; } cmarkgfm/third_party/cmark/src/scanners.h0000644000175000017500000000572114210444464020737 0ustar carstencarsten#ifndef CMARK_SCANNERS_H #define CMARK_SCANNERS_H #include "cmark-gfm.h" #include "chunk.h" #ifdef __cplusplus extern "C" { #endif bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset); bufsize_t _scan_scheme(const unsigned char *p); bufsize_t _scan_autolink_uri(const unsigned char *p); bufsize_t _scan_autolink_email(const unsigned char *p); bufsize_t _scan_html_tag(const unsigned char *p); bufsize_t _scan_liberal_html_tag(const unsigned char *p); bufsize_t _scan_html_block_start(const unsigned char *p); bufsize_t _scan_html_block_start_7(const unsigned char *p); bufsize_t _scan_html_block_end_1(const unsigned char *p); bufsize_t _scan_html_block_end_2(const unsigned char *p); bufsize_t _scan_html_block_end_3(const unsigned char *p); bufsize_t _scan_html_block_end_4(const unsigned char *p); bufsize_t _scan_html_block_end_5(const unsigned char *p); bufsize_t _scan_link_title(const unsigned char *p); bufsize_t _scan_spacechars(const unsigned char *p); bufsize_t _scan_atx_heading_start(const unsigned char *p); bufsize_t _scan_setext_heading_line(const unsigned char *p); bufsize_t _scan_open_code_fence(const unsigned char *p); bufsize_t _scan_close_code_fence(const unsigned char *p); bufsize_t _scan_entity(const unsigned char *p); bufsize_t _scan_dangerous_url(const unsigned char *p); bufsize_t _scan_footnote_definition(const unsigned char *p); #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n) #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) #define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n) #define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n) #define scan_liberal_html_tag(c, n) _scan_at(&_scan_liberal_html_tag, c, n) #define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n) #define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n) #define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n) #define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n) #define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n) #define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n) #define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n) #define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n) #define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n) #define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n) #define scan_setext_heading_line(c, n) \ _scan_at(&_scan_setext_heading_line, c, n) #define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n) #define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n) #define scan_entity(c, n) _scan_at(&_scan_entity, c, n) #define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n) #define scan_footnote_definition(c, n) _scan_at(&_scan_footnote_definition, c, n) #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/src/parser.h0000644000175000017500000000355314210444464020420 0ustar carstencarsten#ifndef CMARK_PARSER_H #define CMARK_PARSER_H #include #include "references.h" #include "node.h" #include "buffer.h" #ifdef __cplusplus extern "C" { #endif #define MAX_LINK_LABEL_LENGTH 1000 struct cmark_parser { struct cmark_mem *mem; /* A hashtable of urls in the current document for cross-references */ struct cmark_map *refmap; /* The root node of the parser, always a CMARK_NODE_DOCUMENT */ struct cmark_node *root; /* The last open block after a line is fully processed */ struct cmark_node *current; /* See the documentation for cmark_parser_get_line_number() in cmark.h */ int line_number; /* See the documentation for cmark_parser_get_offset() in cmark.h */ bufsize_t offset; /* See the documentation for cmark_parser_get_column() in cmark.h */ bufsize_t column; /* See the documentation for cmark_parser_get_first_nonspace() in cmark.h */ bufsize_t first_nonspace; /* See the documentation for cmark_parser_get_first_nonspace_column() in cmark.h */ bufsize_t first_nonspace_column; bufsize_t thematic_break_kill_pos; /* See the documentation for cmark_parser_get_indent() in cmark.h */ int indent; /* See the documentation for cmark_parser_is_blank() in cmark.h */ bool blank; /* See the documentation for cmark_parser_has_partially_consumed_tab() in cmark.h */ bool partially_consumed_tab; /* Contains the currently processed line */ cmark_strbuf curline; /* See the documentation for cmark_parser_get_last_line_length() in cmark.h */ bufsize_t last_line_length; /* FIXME: not sure about the difference with curline */ cmark_strbuf linebuf; /* Options set by the user, see the Options section in cmark.h */ int options; bool last_buffer_ended_with_cr; cmark_llist *syntax_extensions; cmark_llist *inline_syntax_extensions; cmark_ispunct_func backslash_ispunct; }; #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/src/inlines.h0000644000175000017500000000137114210444464020561 0ustar carstencarsten#ifndef CMARK_INLINES_H #define CMARK_INLINES_H #ifdef __cplusplus extern "C" { #endif #include "references.h" cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); CMARK_GFM_EXPORT void cmark_parse_inlines(cmark_parser *parser, cmark_node *parent, cmark_map *refmap, int options); bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, cmark_map *refmap); void cmark_inlines_add_special_character(unsigned char c, bool emphasis); void cmark_inlines_remove_special_character(unsigned char c, bool emphasis); #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/src/houdini_html_u.c0000644000175000017500000000675614210444464022136 0ustar carstencarsten#include #include #include #include "buffer.h" #include "houdini.h" #include "utf8.h" #include "entities.inc" /* Binary tree lookup code for entities added by JGM */ static const unsigned char *S_lookup(int i, int low, int hi, const unsigned char *s, int len) { int j; int cmp = strncmp((const char *)s, (const char *)cmark_entities[i].entity, len); if (cmp == 0 && cmark_entities[i].entity[len] == 0) { return (const unsigned char *)cmark_entities[i].bytes; } else if (cmp <= 0 && i > low) { j = i - ((i - low) / 2); if (j == i) j -= 1; return S_lookup(j, low, i - 1, s, len); } else if (cmp > 0 && i < hi) { j = i + ((hi - i) / 2); if (j == i) j += 1; return S_lookup(j, i + 1, hi, s, len); } else { return NULL; } } static const unsigned char *S_lookup_entity(const unsigned char *s, int len) { return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len); } bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { bufsize_t i = 0; if (size >= 3 && src[0] == '#') { int codepoint = 0; int num_digits = 0; if (_isdigit(src[1])) { for (i = 1; i < size && _isdigit(src[i]); ++i) { codepoint = (codepoint * 10) + (src[i] - '0'); if (codepoint >= 0x110000) { // Keep counting digits but // avoid integer overflow. codepoint = 0x110000; } } num_digits = i - 1; } else if (src[1] == 'x' || src[1] == 'X') { for (i = 2; i < size && _isxdigit(src[i]); ++i) { codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9); if (codepoint >= 0x110000) { // Keep counting digits but // avoid integer overflow. codepoint = 0x110000; } } num_digits = i - 2; } if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') { if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) || codepoint >= 0x110000) { codepoint = 0xFFFD; } cmark_utf8proc_encode_char(codepoint, ob); return i + 1; } } else { if (size > CMARK_ENTITY_MAX_LENGTH) size = CMARK_ENTITY_MAX_LENGTH; for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) { if (src[i] == ' ') break; if (src[i] == ';') { const unsigned char *entity = S_lookup_entity(src, i); if (entity != NULL) { cmark_strbuf_puts(ob, (const char *)entity); return i + 1; } break; } } } return 0; } int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { bufsize_t i = 0, org, ent; while (i < size) { org = i; while (i < size && src[i] != '&') i++; if (likely(i > org)) { if (unlikely(org == 0)) { if (i >= size) return 0; cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size)); } cmark_strbuf_put(ob, src + org, i - org); } /* escaping */ if (i >= size) break; i++; ent = houdini_unescape_ent(ob, src + i, size - i); i += ent; /* not really an entity */ if (ent == 0) cmark_strbuf_putc(ob, '&'); } return 1; } void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { if (!houdini_unescape_html(ob, src, size)) cmark_strbuf_put(ob, src, size); } cmarkgfm/third_party/cmark/src/buffer.c0000644000175000017500000001455714210444464020376 0ustar carstencarsten#include #include #include #include #include #include #include #include #include "config.h" #include "cmark_ctype.h" #include "buffer.h" /* Used as default value for cmark_strbuf->ptr so that people can always * assume ptr is non-NULL and zero terminated even for new cmark_strbufs. */ unsigned char cmark_strbuf__initbuf[1]; #ifndef MIN #define MIN(x, y) ((x < y) ? x : y) #endif void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, bufsize_t initial_size) { buf->mem = mem; buf->asize = 0; buf->size = 0; buf->ptr = cmark_strbuf__initbuf; if (initial_size > 0) cmark_strbuf_grow(buf, initial_size); } static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) { cmark_strbuf_grow(buf, buf->size + add); } void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { assert(target_size > 0); if (target_size < buf->asize) return; if (target_size > (bufsize_t)(INT32_MAX / 2)) { fprintf(stderr, "[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n", (INT32_MAX / 2)); abort(); } /* Oversize the buffer by 50% to guarantee amortized linear time * complexity on append operations. */ bufsize_t new_size = target_size + target_size / 2; new_size += 1; new_size = (new_size + 7) & ~7; buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL, new_size); buf->asize = new_size; } bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } void cmark_strbuf_free(cmark_strbuf *buf) { if (!buf) return; if (buf->ptr != cmark_strbuf__initbuf) buf->mem->free(buf->ptr); cmark_strbuf_init(buf->mem, buf, 0); } void cmark_strbuf_clear(cmark_strbuf *buf) { buf->size = 0; if (buf->asize > 0) buf->ptr[0] = '\0'; } void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len) { if (len <= 0 || data == NULL) { cmark_strbuf_clear(buf); } else { if (data != buf->ptr) { if (len >= buf->asize) cmark_strbuf_grow(buf, len); memmove(buf->ptr, data, len); } buf->size = len; buf->ptr[buf->size] = '\0'; } } void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { cmark_strbuf_set(buf, (const unsigned char *)string, string ? (bufsize_t)strlen(string) : 0); } void cmark_strbuf_putc(cmark_strbuf *buf, int c) { S_strbuf_grow_by(buf, 1); buf->ptr[buf->size++] = (unsigned char)(c & 0xFF); buf->ptr[buf->size] = '\0'; } void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len) { if (len <= 0) return; S_strbuf_grow_by(buf, len); memmove(buf->ptr + buf->size, data, len); buf->size += len; buf->ptr[buf->size] = '\0'; } void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { cmark_strbuf_put(buf, (const unsigned char *)string, (bufsize_t)strlen(string)); } void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf) { bufsize_t copylen; assert(buf); if (!data || datasize <= 0) return; data[0] = '\0'; if (buf->size == 0 || buf->asize <= 0) return; copylen = buf->size; if (copylen > datasize - 1) copylen = datasize - 1; memmove(data, buf->ptr, copylen); data[copylen] = '\0'; } void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) { cmark_strbuf t = *buf_a; *buf_a = *buf_b; *buf_b = t; } unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) { unsigned char *data = buf->ptr; if (buf->asize == 0) { /* return an empty string */ return (unsigned char *)buf->mem->calloc(1, 1); } cmark_strbuf_init(buf->mem, buf, 0); return data; } int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) { int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); return (result != 0) ? result : (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; } bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) { if (pos >= buf->size) return -1; if (pos < 0) pos = 0; const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos); if (!p) return -1; return (bufsize_t)(p - (const unsigned char *)buf->ptr); } bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) { if (pos < 0 || buf->size == 0) return -1; if (pos >= buf->size) pos = buf->size - 1; bufsize_t i; for (i = pos; i >= 0; i--) { if (buf->ptr[i] == (unsigned char)c) return i; } return -1; } void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { if (len < 0) len = 0; if (len < buf->size) { buf->size = len; buf->ptr[buf->size] = '\0'; } } void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { if (n > 0) { if (n > buf->size) n = buf->size; buf->size = buf->size - n; if (buf->size) memmove(buf->ptr, buf->ptr + n, buf->size); buf->ptr[buf->size] = '\0'; } } void cmark_strbuf_rtrim(cmark_strbuf *buf) { if (!buf->size) return; while (buf->size > 0) { if (!cmark_isspace(buf->ptr[buf->size - 1])) break; buf->size--; } buf->ptr[buf->size] = '\0'; } void cmark_strbuf_trim(cmark_strbuf *buf) { bufsize_t i = 0; if (!buf->size) return; while (i < buf->size && cmark_isspace(buf->ptr[i])) i++; cmark_strbuf_drop(buf, i); cmark_strbuf_rtrim(buf); } // Destructively modify string, collapsing consecutive // space and newline characters into a single space. void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { bool last_char_was_space = false; bufsize_t r, w; for (r = 0, w = 0; r < s->size; ++r) { if (cmark_isspace(s->ptr[r])) { if (!last_char_was_space) { s->ptr[w++] = ' '; last_char_was_space = true; } } else { s->ptr[w++] = s->ptr[r]; last_char_was_space = false; } } cmark_strbuf_truncate(s, w); } // Destructively unescape a string: remove backslashes before punctuation chars. extern void cmark_strbuf_unescape(cmark_strbuf *buf) { bufsize_t r, w; for (r = 0, w = 0; r < buf->size; ++r) { if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) r++; buf->ptr[w++] = buf->ptr[r]; } cmark_strbuf_truncate(buf, w); } cmarkgfm/third_party/cmark/src/xml.c0000644000175000017500000001317414210444464017717 0ustar carstencarsten#include #include #include #include #include "config.h" #include "cmark-gfm.h" #include "node.h" #include "buffer.h" #include "houdini.h" #include "syntax_extension.h" #define BUFFER_SIZE 100 // Functions to convert cmark_nodes to XML strings. static void escape_xml(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { houdini_escape_html0(dest, source, length, 0); } struct render_state { cmark_strbuf *xml; int indent; }; static CMARK_INLINE void indent(struct render_state *state) { int i; for (i = 0; i < state->indent; i++) { cmark_strbuf_putc(state->xml, ' '); } } static int S_render_node(cmark_node *node, cmark_event_type ev_type, struct render_state *state, int options) { cmark_strbuf *xml = state->xml; bool literal = false; cmark_delim_type delim; bool entering = (ev_type == CMARK_EVENT_ENTER); char buffer[BUFFER_SIZE]; if (entering) { indent(state); cmark_strbuf_putc(xml, '<'); cmark_strbuf_puts(xml, cmark_node_get_type_string(node)); if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) { snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"", node->start_line, node->start_column, node->end_line, node->end_column); cmark_strbuf_puts(xml, buffer); } if (node->extension && node->extension->xml_attr_func) { const char* r = node->extension->xml_attr_func(node->extension, node); if (r != NULL) cmark_strbuf_puts(xml, r); } literal = false; switch (node->type) { case CMARK_NODE_DOCUMENT: cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\""); break; case CMARK_NODE_TEXT: case CMARK_NODE_CODE: case CMARK_NODE_HTML_BLOCK: case CMARK_NODE_HTML_INLINE: cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); escape_xml(xml, node->as.literal.data, node->as.literal.len); cmark_strbuf_puts(xml, "as.heading.level); cmark_strbuf_puts(xml, buffer); break; case CMARK_NODE_CODE_BLOCK: if (node->as.code.info.len > 0) { cmark_strbuf_puts(xml, " info=\""); escape_xml(xml, node->as.code.info.data, node->as.code.info.len); cmark_strbuf_putc(xml, '"'); } cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); escape_xml(xml, node->as.code.literal.data, node->as.code.literal.len); cmark_strbuf_puts(xml, "as.custom.on_enter.data, node->as.custom.on_enter.len); cmark_strbuf_putc(xml, '"'); cmark_strbuf_puts(xml, " on_exit=\""); escape_xml(xml, node->as.custom.on_exit.data, node->as.custom.on_exit.len); cmark_strbuf_putc(xml, '"'); break; case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: cmark_strbuf_puts(xml, " destination=\""); escape_xml(xml, node->as.link.url.data, node->as.link.url.len); cmark_strbuf_putc(xml, '"'); cmark_strbuf_puts(xml, " title=\""); escape_xml(xml, node->as.link.title.data, node->as.link.title.len); cmark_strbuf_putc(xml, '"'); break; default: break; } if (node->first_child) { state->indent += 2; } else if (!literal) { cmark_strbuf_puts(xml, " /"); } cmark_strbuf_puts(xml, ">\n"); } else if (node->first_child) { state->indent -= 2; indent(state); cmark_strbuf_puts(xml, "\n"); } return 1; } char *cmark_render_xml(cmark_node *root, int options) { return cmark_render_xml_with_mem(root, options, cmark_node_mem(root)); } char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem) { char *result; cmark_strbuf xml = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; struct render_state state = {&xml, 0}; cmark_iter *iter = cmark_iter_new(root); cmark_strbuf_puts(state.xml, "\n"); cmark_strbuf_puts(state.xml, "\n"); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); S_render_node(cur, ev_type, &state, options); } result = (char *)cmark_strbuf_detach(&xml); cmark_iter_free(iter); return result; } cmarkgfm/third_party/cmark/src/footnotes.c0000644000175000017500000000337014210444464021134 0ustar carstencarsten#include "cmark-gfm.h" #include "parser.h" #include "footnotes.h" #include "inlines.h" #include "chunk.h" static void footnote_free(cmark_map *map, cmark_map_entry *_ref) { cmark_footnote *ref = (cmark_footnote *)_ref; cmark_mem *mem = map->mem; if (ref != NULL) { mem->free(ref->entry.label); if (ref->node) cmark_node_free(ref->node); mem->free(ref); } } void cmark_footnote_create(cmark_map *map, cmark_node *node) { cmark_footnote *ref; unsigned char *reflabel = normalize_map_label(map->mem, &node->as.literal); /* empty footnote name, or composed from only whitespace */ if (reflabel == NULL) return; assert(map->sorted == NULL); ref = (cmark_footnote *)map->mem->calloc(1, sizeof(*ref)); ref->entry.label = reflabel; ref->node = node; ref->entry.age = map->size; ref->entry.next = map->refs; map->refs = (cmark_map_entry *)ref; map->size++; } cmark_map *cmark_footnote_map_new(cmark_mem *mem) { return cmark_map_new(mem, footnote_free); } // Before calling `cmark_map_free` on a map with `cmark_footnotes`, first // unlink all of the footnote nodes before freeing their memory. // // Sometimes, two (unused) footnote nodes can end up referencing each other, // which as they get freed up by calling `cmark_map_free` -> `footnote_free` -> // etc, can lead to a use-after-free error. // // Better to `unlink` every footnote node first, setting their next, prev, and // parent pointers to NULL, and only then walk thru & free them up. void cmark_unlink_footnotes_map(cmark_map *map) { cmark_map_entry *ref; cmark_map_entry *next; ref = map->refs; while(ref) { next = ref->next; if (((cmark_footnote *)ref)->node) { cmark_node_unlink(((cmark_footnote *)ref)->node); } ref = next; } } cmarkgfm/third_party/cmark/src/houdini_href_e.c0000644000175000017500000000570714210444464022071 0ustar carstencarsten#include #include #include #include "houdini.h" /* * The following characters will not be escaped: * * -_.+!*'(),%#@?=;:/,+&$~ alphanum * * Note that this character set is the addition of: * * - The characters which are safe to be in an URL * - The characters which are *not* safe to be in * an URL because they are RESERVED characters. * * We assume (lazily) that any RESERVED char that * appears inside an URL is actually meant to * have its native function (i.e. as an URL * component/separator) and hence needs no escaping. * * There are two exceptions: the chacters & (amp) * and ' (single quote) do not appear in the table. * They are meant to appear in the URL as components, * yet they require special HTML-entity escaping * to generate valid HTML markup. * * All other characters will be escaped to %XX. * */ static const char HREF_SAFE[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { static const uint8_t hex_chars[] = "0123456789ABCDEF"; bufsize_t i = 0, org; uint8_t hex_str[3]; hex_str[0] = '%'; while (i < size) { org = i; while (i < size && HREF_SAFE[src[i]] != 0) i++; if (likely(i > org)) cmark_strbuf_put(ob, src + org, i - org); /* escaping */ if (i >= size) break; switch (src[i]) { /* amp appears all the time in URLs, but needs * HTML-entity escaping to be inside an href */ case '&': cmark_strbuf_puts(ob, "&"); break; /* the single quote is a valid URL character * according to the standard; it needs HTML * entity escaping too */ case '\'': cmark_strbuf_puts(ob, "'"); break; /* the space can be escaped to %20 or a plus * sign. we're going with the generic escape * for now. the plus thing is more commonly seen * when building GET strings */ #if 0 case ' ': cmark_strbuf_putc(ob, '+'); break; #endif /* every other character goes with a %XX escaping */ default: hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; hex_str[2] = hex_chars[src[i] & 0xF]; cmark_strbuf_put(ob, hex_str, 3); } i++; } return 1; } cmarkgfm/third_party/cmark/src/cmark_ctype.c0000644000175000017500000000332114210444464021411 0ustar carstencarsten#include #include "cmark_ctype.h" /** 1 = space, 2 = punct, 3 = digit, 4 = alpha, 0 = other */ static const uint8_t cmark_ctype_class[256] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, /* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, /* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0, /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /** * Returns 1 if c is a "whitespace" character as defined by the spec. */ int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; } /** * Returns 1 if c is an ascii punctuation character. */ int cmark_ispunct(char c) { return cmark_ctype_class[(uint8_t)c] == 2; } int cmark_isalnum(char c) { uint8_t result; result = cmark_ctype_class[(uint8_t)c]; return (result == 3 || result == 4); } int cmark_isdigit(char c) { return cmark_ctype_class[(uint8_t)c] == 3; } int cmark_isalpha(char c) { return cmark_ctype_class[(uint8_t)c] == 4; } cmarkgfm/third_party/cmark/src/buffer.h0000644000175000017500000000530214210444464020367 0ustar carstencarsten#ifndef CMARK_BUFFER_H #define CMARK_BUFFER_H #include #include #include #include #include #include "config.h" #include "cmark-gfm.h" #ifdef __cplusplus extern "C" { #endif typedef struct { cmark_mem *mem; unsigned char *ptr; bufsize_t asize, size; } cmark_strbuf; extern unsigned char cmark_strbuf__initbuf[]; #define CMARK_BUF_INIT(mem) \ { mem, cmark_strbuf__initbuf, 0, 0 } /** * Initialize a cmark_strbuf structure. * * For the cases where CMARK_BUF_INIT cannot be used to do static * initialization. */ CMARK_GFM_EXPORT void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, bufsize_t initial_size); /** * Grow the buffer to hold at least `target_size` bytes. */ CMARK_GFM_EXPORT void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); CMARK_GFM_EXPORT void cmark_strbuf_free(cmark_strbuf *buf); CMARK_GFM_EXPORT void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); CMARK_GFM_EXPORT bufsize_t cmark_strbuf_len(const cmark_strbuf *buf); CMARK_GFM_EXPORT int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); CMARK_GFM_EXPORT unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); CMARK_GFM_EXPORT void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf); static CMARK_INLINE const char *cmark_strbuf_cstr(const cmark_strbuf *buf) { return (char *)buf->ptr; } #define cmark_strbuf_at(buf, n) ((buf)->ptr[n]) CMARK_GFM_EXPORT void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); CMARK_GFM_EXPORT void cmark_strbuf_sets(cmark_strbuf *buf, const char *string); CMARK_GFM_EXPORT void cmark_strbuf_putc(cmark_strbuf *buf, int c); CMARK_GFM_EXPORT void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); CMARK_GFM_EXPORT void cmark_strbuf_puts(cmark_strbuf *buf, const char *string); CMARK_GFM_EXPORT void cmark_strbuf_clear(cmark_strbuf *buf); CMARK_GFM_EXPORT bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos); CMARK_GFM_EXPORT bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos); CMARK_GFM_EXPORT void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); CMARK_GFM_EXPORT void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); CMARK_GFM_EXPORT void cmark_strbuf_rtrim(cmark_strbuf *buf); CMARK_GFM_EXPORT void cmark_strbuf_trim(cmark_strbuf *buf); CMARK_GFM_EXPORT void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); CMARK_GFM_EXPORT void cmark_strbuf_unescape(cmark_strbuf *s); #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/src/CMakeLists.txt0000644000175000017500000001402214210444464021504 0ustar carstencarstenif(${CMAKE_VERSION} VERSION_GREATER "3.3") cmake_policy(SET CMP0063 NEW) endif() include(GNUInstallDirs) set(LIBRARY "libcmark-gfm") set(STATICLIBRARY "libcmark-gfm_static") set(HEADERS cmark-gfm.h cmark-gfm-extension_api.h parser.h buffer.h node.h iterator.h chunk.h references.h footnotes.h map.h utf8.h scanners.h inlines.h houdini.h cmark_ctype.h render.h registry.h syntax_extension.h plugin.h ) set(LIBRARY_SOURCES cmark.c node.c iterator.c blocks.c inlines.c scanners.c scanners.re utf8.c buffer.c references.c footnotes.c map.c render.c man.c xml.c html.c commonmark.c plaintext.c latex.c houdini_href_e.c houdini_html_e.c houdini_html_u.c cmark_ctype.c arena.c linked_list.c syntax_extension.c registry.c plugin.c ${HEADERS} ) set(PROGRAM "cmark-gfm") set(PROGRAM_SOURCES main.c) include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) include_directories( ${PROJECT_BINARY_DIR}/extensions ) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmark-gfm_version.h.in ${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm_version.h) include (GenerateExportHeader) include("../CheckFileOffsetBits.cmake") CHECK_FILE_OFFSET_BITS() add_executable(${PROGRAM} ${PROGRAM_SOURCES}) if(CMARK_SHARED) target_link_libraries(${PROGRAM} libcmark-gfm-extensions libcmark-gfm) elseif(CMARK_STATIC) target_link_libraries(${PROGRAM} libcmark-gfm-extensions_static libcmark-gfm_static) endif() # Disable the PUBLIC declarations when compiling the executable: set_target_properties(${PROGRAM} PROPERTIES COMPILE_FLAGS "-DCMARK_GFM_STATIC_DEFINE -DCMARK_GFM_EXTENSIONS_STATIC_DEFINE") # Check integrity of node structure when compiled as debug: set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES -DDEBUG") set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}") set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") # -fvisibility=hidden set(CMAKE_C_VISIBILITY_PRESET hidden) set(CMAKE_VISIBILITY_INLINES_HIDDEN 1) if (CMARK_SHARED) add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) # Include minor version and patch level in soname for now. set_target_properties(${LIBRARY} PROPERTIES OUTPUT_NAME "cmark-gfm" SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} VERSION ${PROJECT_VERSION}) set_property(TARGET ${LIBRARY} APPEND PROPERTY MACOSX_RPATH true) # Avoid name clash between PROGRAM and LIBRARY pdb files. set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfm_dll) generate_export_header(${LIBRARY} BASE_NAME ${PROJECT_NAME}) list(APPEND CMARK_INSTALL ${LIBRARY}) endif() if (CMARK_STATIC) add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES}) set_target_properties(${STATICLIBRARY} PROPERTIES COMPILE_FLAGS -DCMARK_GFM_STATIC_DEFINE POSITION_INDEPENDENT_CODE ON) if (MSVC) set_target_properties(${STATICLIBRARY} PROPERTIES OUTPUT_NAME "cmark-gfm_static" VERSION ${PROJECT_VERSION}) else() set_target_properties(${STATICLIBRARY} PROPERTIES OUTPUT_NAME "cmark-gfm" VERSION ${PROJECT_VERSION}) endif(MSVC) if (NOT CMARK_SHARED) generate_export_header(${STATICLIBRARY} BASE_NAME ${PROJECT_NAME}) endif() list(APPEND CMARK_INSTALL ${STATICLIBRARY}) endif() if (MSVC) set_property(TARGET ${PROGRAM} APPEND PROPERTY LINK_FLAGS /INCREMENTAL:NO) endif(MSVC) if(NOT MSVC OR CMAKE_HOST_SYSTEM_NAME STREQUAL Windows) set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON) include(InstallRequiredSystemLibraries) endif() set(libdir lib${LIB_SUFFIX}) install(TARGETS ${PROGRAM} ${CMARK_INSTALL} EXPORT cmark-gfm RUNTIME DESTINATION bin LIBRARY DESTINATION ${libdir} ARCHIVE DESTINATION ${libdir} ) if(CMARK_SHARED OR CMARK_STATIC) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcmark-gfm.pc.in ${CMAKE_CURRENT_BINARY_DIR}/libcmark-gfm.pc @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcmark-gfm.pc DESTINATION ${libdir}/pkgconfig) install(FILES cmark-gfm.h cmark-gfm-extension_api.h ${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm_export.h ${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm_version.h DESTINATION include ) install(EXPORT cmark-gfm DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake) endif() # Feature tests include(CheckIncludeFile) include(CheckCSourceCompiles) include(CheckCSourceRuns) include(CheckSymbolExists) CHECK_INCLUDE_FILE(stdbool.h HAVE_STDBOOL_H) CHECK_C_SOURCE_COMPILES( "int main() { __builtin_expect(0,0); return 0; }" HAVE___BUILTIN_EXPECT) CHECK_C_SOURCE_COMPILES(" int f(void) __attribute__ (()); int main() { return 0; } " HAVE___ATTRIBUTE__) CONFIGURE_FILE( ${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) # Always compile with warnings if(MSVC) # Force to always compile with W4 if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4") endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /WX /wd4706 /wd4204 /wd4221 /wd4100 /D_CRT_SECURE_NO_WARNINGS") elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-unused-parameter -std=c99 -pedantic") endif() # Compile as C++ under MSVC older than 12.0 if(MSVC AND MSVC_VERSION LESS 1800) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP") endif() if(CMAKE_BUILD_TYPE STREQUAL "Ubsan") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined") endif() if(CMARK_LIB_FUZZER) set(FUZZ_HARNESS "cmark-fuzz") add_executable(${FUZZ_HARNESS} ../test/cmark-fuzz.c ${LIBRARY_SOURCES}) target_link_libraries(${FUZZ_HARNESS} "${CMAKE_LIB_FUZZER_PATH}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize-coverage=trace-pc-guard") # cmark is written in C but the libFuzzer runtime is written in C++ which # needs to link against the C++ runtime. Explicitly link it into cmark-fuzz set_target_properties(${FUZZ_HARNESS} PROPERTIES LINK_FLAGS "-lstdc++") endif() cmarkgfm/third_party/cmark/src/scanners.re0000644000175000017500000002200514210444464021110 0ustar carstencarsten#include #include "chunk.h" #include "scanners.h" bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) { bufsize_t res; unsigned char *ptr = (unsigned char *)c->data; if (ptr == NULL || offset > c->len) { return 0; } else { unsigned char lim = ptr[c->len]; ptr[c->len] = '\0'; res = scanner(ptr + offset); ptr[c->len] = lim; } return res; } /*!re2c re2c:define:YYCTYPE = "unsigned char"; re2c:define:YYCURSOR = p; re2c:define:YYMARKER = marker; re2c:define:YYCTXMARKER = marker; re2c:yyfill:enable = 0; wordchar = [^\x00-\x20]; spacechar = [ \t\v\f\r\n]; reg_char = [^\\()\x00-\x20]; escaped_char = [\\][!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~-]; tagname = [A-Za-z][A-Za-z0-9-]*; blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul'; attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*; unquotedvalue = [^ \t\r\n\v\f"'=<>`\x00]+; singlequotedvalue = ['][^'\x00]*[']; doublequotedvalue = ["][^"\x00]*["]; attributevalue = unquotedvalue | singlequotedvalue | doublequotedvalue; attributevaluespec = spacechar* [=] spacechar* attributevalue; attribute = spacechar+ attributename attributevaluespec?; opentag = tagname attribute* spacechar* [/]? [>]; closetag = [/] tagname spacechar* [>]; htmlcomment = "!---->" | ("!--" ([-]? [^\x00>-]) ([-]? [^\x00-])* "-->"); processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00] | [>])* "?>"; declaration = "!" [A-Z]+ spacechar+ [^>\x00]* ">"; cdata = "![CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])* "]]>"; htmltag = opentag | closetag | htmlcomment | processinginstruction | declaration | cdata; in_parens_nosp = [(] (reg_char|escaped_char|[\\])* [)]; in_double_quotes = ["] (escaped_char|[^"\x00])* ["]; in_single_quotes = ['] (escaped_char|[^'\x00])* [']; in_parens = [(] (escaped_char|[^)\x00])* [)]; scheme = [A-Za-z][A-Za-z0-9.+-]{1,31}; */ // Try to match a scheme including colon. bufsize_t _scan_scheme(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c scheme [:] { return (bufsize_t)(p - start); } * { return 0; } */ } // Try to match URI autolink after first <, returning number of chars matched. bufsize_t _scan_autolink_uri(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); } * { return 0; } */ } // Try to match email autolink after first <, returning num of chars matched. bufsize_t _scan_autolink_email(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ [@] [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)* [>] { return (bufsize_t)(p - start); } * { return 0; } */ } // Try to match an HTML tag after first <, returning num of chars matched. bufsize_t _scan_html_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c htmltag { return (bufsize_t)(p - start); } * { return 0; } */ } // Try to (liberally) match an HTML tag after first <, returning num of chars matched. bufsize_t _scan_liberal_html_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [^\n\x00]+ [>] { return (bufsize_t)(p - start); } * { return 0; } */ } // Try to match an HTML block tag start line, returning // an integer code for the type of block (1-6, matching the spec). // #7 is handled by a separate function, below. bufsize_t _scan_html_block_start(const unsigned char *p) { const unsigned char *marker = NULL; /*!re2c [<] ('script'|'pre'|'style') (spacechar | [>]) { return 1; } '' { return (bufsize_t)(p - start); } * { return 0; } */ } // Try to match an HTML block end line of type 3 bufsize_t _scan_html_block_end_3(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [^\n\x00]* '?>' { return (bufsize_t)(p - start); } * { return 0; } */ } // Try to match an HTML block end line of type 4 bufsize_t _scan_html_block_end_4(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [^\n\x00]* '>' { return (bufsize_t)(p - start); } * { return 0; } */ } // Try to match an HTML block end line of type 5 bufsize_t _scan_html_block_end_5(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [^\n\x00]* ']]>' { return (bufsize_t)(p - start); } * { return 0; } */ } // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). bufsize_t _scan_link_title(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c ["] (escaped_char|[^"\x00])* ["] { return (bufsize_t)(p - start); } ['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); } [(] (escaped_char|[^()\x00])* [)] { return (bufsize_t)(p - start); } * { return 0; } */ } // Match space characters, including newlines. bufsize_t _scan_spacechars(const unsigned char *p) { const unsigned char *start = p; \ /*!re2c [ \t\v\f\r\n]+ { return (bufsize_t)(p - start); } * { return 0; } */ } // Match ATX heading start. bufsize_t _scan_atx_heading_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [#]{1,6} ([ \t]+|[\r\n]) { return (bufsize_t)(p - start); } * { return 0; } */ } // Match setext heading line. Return 1 for level-1 heading, // 2 for level-2, 0 for no match. bufsize_t _scan_setext_heading_line(const unsigned char *p) { const unsigned char *marker = NULL; /*!re2c [=]+ [ \t]* [\r\n] { return 1; } [-]+ [ \t]* [\r\n] { return 2; } * { return 0; } */ } // Scan an opening code fence. bufsize_t _scan_open_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } [~]{3,} / [^\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } * { return 0; } */ } // Scan a closing code fence with length at least len. bufsize_t _scan_close_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } [~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } * { return 0; } */ } // Scans an entity. // Returns number of chars matched. bufsize_t _scan_entity(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [&] ([#] ([Xx][A-Fa-f0-9]{1,6}|[0-9]{1,7}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] { return (bufsize_t)(p - start); } * { return 0; } */ } // Returns positive value if a URL begins in a way that is potentially // dangerous, with javascript:, vbscript:, file:, or data:, otherwise 0. bufsize_t _scan_dangerous_url(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c 'data:image/' ('png'|'gif'|'jpeg'|'webp') { return 0; } 'javascript:' | 'vbscript:' | 'file:' | 'data:' { return (bufsize_t)(p - start); } * { return 0; } */ } // Scans a footnote definition opening. bufsize_t _scan_footnote_definition(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c '[^' ([^\] \r\n\x00\t]+) ']:' [ \t]* { return (bufsize_t)(p - start); } * { return 0; } */ } cmarkgfm/third_party/cmark/src/chunk.h0000644000175000017500000000644114210444464020233 0ustar carstencarsten#ifndef CMARK_CHUNK_H #define CMARK_CHUNK_H #include #include #include #include "cmark-gfm.h" #include "buffer.h" #include "cmark_ctype.h" #define CMARK_CHUNK_EMPTY \ { NULL, 0, 0 } typedef struct cmark_chunk { unsigned char *data; bufsize_t len; bufsize_t alloc; // also implies a NULL-terminated string } cmark_chunk; static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) { if (c->alloc) mem->free(c->data); c->data = NULL; c->alloc = 0; c->len = 0; } static CMARK_INLINE void cmark_chunk_ltrim(cmark_chunk *c) { assert(!c->alloc); while (c->len && cmark_isspace(c->data[0])) { c->data++; c->len--; } } static CMARK_INLINE void cmark_chunk_rtrim(cmark_chunk *c) { assert(!c->alloc); while (c->len > 0) { if (!cmark_isspace(c->data[c->len - 1])) break; c->len--; } } static CMARK_INLINE void cmark_chunk_trim(cmark_chunk *c) { cmark_chunk_ltrim(c); cmark_chunk_rtrim(c); } static CMARK_INLINE bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, bufsize_t offset) { const unsigned char *p = (unsigned char *)memchr(ch->data + offset, c, ch->len - offset); return p ? (bufsize_t)(p - ch->data) : ch->len; } static CMARK_INLINE const char *cmark_chunk_to_cstr(cmark_mem *mem, cmark_chunk *c) { unsigned char *str; if (c->alloc) { return (char *)c->data; } str = (unsigned char *)mem->calloc(c->len + 1, 1); if (c->len > 0) { memcpy(str, c->data, c->len); } str[c->len] = 0; c->data = str; c->alloc = 1; return (char *)str; } static CMARK_INLINE void cmark_chunk_set_cstr(cmark_mem *mem, cmark_chunk *c, const char *str) { unsigned char *old = c->alloc ? c->data : NULL; if (str == NULL) { c->len = 0; c->data = NULL; c->alloc = 0; } else { c->len = (bufsize_t)strlen(str); c->data = (unsigned char *)mem->calloc(c->len + 1, 1); c->alloc = 1; memcpy(c->data, str, c->len + 1); } if (old != NULL) { mem->free(old); } } static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) { bufsize_t len = data ? (bufsize_t)strlen(data) : 0; cmark_chunk c = {(unsigned char *)data, len, 0}; return c; } static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos, bufsize_t len) { cmark_chunk c = {ch->data + pos, len, 0}; return c; } static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) { cmark_chunk c; c.len = buf->size; c.data = cmark_strbuf_detach(buf); c.alloc = 1; return c; } /* trim_new variants are to be used when the source chunk may or may not be * allocated; forces a newly allocated chunk. */ static CMARK_INLINE cmark_chunk cmark_chunk_ltrim_new(cmark_mem *mem, cmark_chunk *c) { cmark_chunk r = cmark_chunk_dup(c, 0, c->len); cmark_chunk_ltrim(&r); cmark_chunk_to_cstr(mem, &r); return r; } static CMARK_INLINE cmark_chunk cmark_chunk_rtrim_new(cmark_mem *mem, cmark_chunk *c) { cmark_chunk r = cmark_chunk_dup(c, 0, c->len); cmark_chunk_rtrim(&r); cmark_chunk_to_cstr(mem, &r); return r; } #endif cmarkgfm/third_party/cmark/src/case_fold_switch.inc0000644000175000017500000025012714210444464022747 0ustar carstencarsten switch (c) { case 0x0041: bufpush(0x0061); break; case 0x0042: bufpush(0x0062); break; case 0x0043: bufpush(0x0063); break; case 0x0044: bufpush(0x0064); break; case 0x0045: bufpush(0x0065); break; case 0x0046: bufpush(0x0066); break; case 0x0047: bufpush(0x0067); break; case 0x0048: bufpush(0x0068); break; case 0x0049: bufpush(0x0069); break; case 0x004A: bufpush(0x006A); break; case 0x004B: bufpush(0x006B); break; case 0x004C: bufpush(0x006C); break; case 0x004D: bufpush(0x006D); break; case 0x004E: bufpush(0x006E); break; case 0x004F: bufpush(0x006F); break; case 0x0050: bufpush(0x0070); break; case 0x0051: bufpush(0x0071); break; case 0x0052: bufpush(0x0072); break; case 0x0053: bufpush(0x0073); break; case 0x0054: bufpush(0x0074); break; case 0x0055: bufpush(0x0075); break; case 0x0056: bufpush(0x0076); break; case 0x0057: bufpush(0x0077); break; case 0x0058: bufpush(0x0078); break; case 0x0059: bufpush(0x0079); break; case 0x005A: bufpush(0x007A); break; case 0x00B5: bufpush(0x03BC); break; case 0x00C0: bufpush(0x00E0); break; case 0x00C1: bufpush(0x00E1); break; case 0x00C2: bufpush(0x00E2); break; case 0x00C3: bufpush(0x00E3); break; case 0x00C4: bufpush(0x00E4); break; case 0x00C5: bufpush(0x00E5); break; case 0x00C6: bufpush(0x00E6); break; case 0x00C7: bufpush(0x00E7); break; case 0x00C8: bufpush(0x00E8); break; case 0x00C9: bufpush(0x00E9); break; case 0x00CA: bufpush(0x00EA); break; case 0x00CB: bufpush(0x00EB); break; case 0x00CC: bufpush(0x00EC); break; case 0x00CD: bufpush(0x00ED); break; case 0x00CE: bufpush(0x00EE); break; case 0x00CF: bufpush(0x00EF); break; case 0x00D0: bufpush(0x00F0); break; case 0x00D1: bufpush(0x00F1); break; case 0x00D2: bufpush(0x00F2); break; case 0x00D3: bufpush(0x00F3); break; case 0x00D4: bufpush(0x00F4); break; case 0x00D5: bufpush(0x00F5); break; case 0x00D6: bufpush(0x00F6); break; case 0x00D8: bufpush(0x00F8); break; case 0x00D9: bufpush(0x00F9); break; case 0x00DA: bufpush(0x00FA); break; case 0x00DB: bufpush(0x00FB); break; case 0x00DC: bufpush(0x00FC); break; case 0x00DD: bufpush(0x00FD); break; case 0x00DE: bufpush(0x00FE); break; case 0x00DF: bufpush(0x0073); bufpush(0x0073); break; case 0x0100: bufpush(0x0101); break; case 0x0102: bufpush(0x0103); break; case 0x0104: bufpush(0x0105); break; case 0x0106: bufpush(0x0107); break; case 0x0108: bufpush(0x0109); break; case 0x010A: bufpush(0x010B); break; case 0x010C: bufpush(0x010D); break; case 0x010E: bufpush(0x010F); break; case 0x0110: bufpush(0x0111); break; case 0x0112: bufpush(0x0113); break; case 0x0114: bufpush(0x0115); break; case 0x0116: bufpush(0x0117); break; case 0x0118: bufpush(0x0119); break; case 0x011A: bufpush(0x011B); break; case 0x011C: bufpush(0x011D); break; case 0x011E: bufpush(0x011F); break; case 0x0120: bufpush(0x0121); break; case 0x0122: bufpush(0x0123); break; case 0x0124: bufpush(0x0125); break; case 0x0126: bufpush(0x0127); break; case 0x0128: bufpush(0x0129); break; case 0x012A: bufpush(0x012B); break; case 0x012C: bufpush(0x012D); break; case 0x012E: bufpush(0x012F); break; case 0x0130: bufpush(0x0069); bufpush(0x0307); break; case 0x0132: bufpush(0x0133); break; case 0x0134: bufpush(0x0135); break; case 0x0136: bufpush(0x0137); break; case 0x0139: bufpush(0x013A); break; case 0x013B: bufpush(0x013C); break; case 0x013D: bufpush(0x013E); break; case 0x013F: bufpush(0x0140); break; case 0x0141: bufpush(0x0142); break; case 0x0143: bufpush(0x0144); break; case 0x0145: bufpush(0x0146); break; case 0x0147: bufpush(0x0148); break; case 0x0149: bufpush(0x02BC); bufpush(0x006E); break; case 0x014A: bufpush(0x014B); break; case 0x014C: bufpush(0x014D); break; case 0x014E: bufpush(0x014F); break; case 0x0150: bufpush(0x0151); break; case 0x0152: bufpush(0x0153); break; case 0x0154: bufpush(0x0155); break; case 0x0156: bufpush(0x0157); break; case 0x0158: bufpush(0x0159); break; case 0x015A: bufpush(0x015B); break; case 0x015C: bufpush(0x015D); break; case 0x015E: bufpush(0x015F); break; case 0x0160: bufpush(0x0161); break; case 0x0162: bufpush(0x0163); break; case 0x0164: bufpush(0x0165); break; case 0x0166: bufpush(0x0167); break; case 0x0168: bufpush(0x0169); break; case 0x016A: bufpush(0x016B); break; case 0x016C: bufpush(0x016D); break; case 0x016E: bufpush(0x016F); break; case 0x0170: bufpush(0x0171); break; case 0x0172: bufpush(0x0173); break; case 0x0174: bufpush(0x0175); break; case 0x0176: bufpush(0x0177); break; case 0x0178: bufpush(0x00FF); break; case 0x0179: bufpush(0x017A); break; case 0x017B: bufpush(0x017C); break; case 0x017D: bufpush(0x017E); break; case 0x017F: bufpush(0x0073); break; case 0x0181: bufpush(0x0253); break; case 0x0182: bufpush(0x0183); break; case 0x0184: bufpush(0x0185); break; case 0x0186: bufpush(0x0254); break; case 0x0187: bufpush(0x0188); break; case 0x0189: bufpush(0x0256); break; case 0x018A: bufpush(0x0257); break; case 0x018B: bufpush(0x018C); break; case 0x018E: bufpush(0x01DD); break; case 0x018F: bufpush(0x0259); break; case 0x0190: bufpush(0x025B); break; case 0x0191: bufpush(0x0192); break; case 0x0193: bufpush(0x0260); break; case 0x0194: bufpush(0x0263); break; case 0x0196: bufpush(0x0269); break; case 0x0197: bufpush(0x0268); break; case 0x0198: bufpush(0x0199); break; case 0x019C: bufpush(0x026F); break; case 0x019D: bufpush(0x0272); break; case 0x019F: bufpush(0x0275); break; case 0x01A0: bufpush(0x01A1); break; case 0x01A2: bufpush(0x01A3); break; case 0x01A4: bufpush(0x01A5); break; case 0x01A6: bufpush(0x0280); break; case 0x01A7: bufpush(0x01A8); break; case 0x01A9: bufpush(0x0283); break; case 0x01AC: bufpush(0x01AD); break; case 0x01AE: bufpush(0x0288); break; case 0x01AF: bufpush(0x01B0); break; case 0x01B1: bufpush(0x028A); break; case 0x01B2: bufpush(0x028B); break; case 0x01B3: bufpush(0x01B4); break; case 0x01B5: bufpush(0x01B6); break; case 0x01B7: bufpush(0x0292); break; case 0x01B8: bufpush(0x01B9); break; case 0x01BC: bufpush(0x01BD); break; case 0x01C4: bufpush(0x01C6); break; case 0x01C5: bufpush(0x01C6); break; case 0x01C7: bufpush(0x01C9); break; case 0x01C8: bufpush(0x01C9); break; case 0x01CA: bufpush(0x01CC); break; case 0x01CB: bufpush(0x01CC); break; case 0x01CD: bufpush(0x01CE); break; case 0x01CF: bufpush(0x01D0); break; case 0x01D1: bufpush(0x01D2); break; case 0x01D3: bufpush(0x01D4); break; case 0x01D5: bufpush(0x01D6); break; case 0x01D7: bufpush(0x01D8); break; case 0x01D9: bufpush(0x01DA); break; case 0x01DB: bufpush(0x01DC); break; case 0x01DE: bufpush(0x01DF); break; case 0x01E0: bufpush(0x01E1); break; case 0x01E2: bufpush(0x01E3); break; case 0x01E4: bufpush(0x01E5); break; case 0x01E6: bufpush(0x01E7); break; case 0x01E8: bufpush(0x01E9); break; case 0x01EA: bufpush(0x01EB); break; case 0x01EC: bufpush(0x01ED); break; case 0x01EE: bufpush(0x01EF); break; case 0x01F0: bufpush(0x006A); bufpush(0x030C); break; case 0x01F1: bufpush(0x01F3); break; case 0x01F2: bufpush(0x01F3); break; case 0x01F4: bufpush(0x01F5); break; case 0x01F6: bufpush(0x0195); break; case 0x01F7: bufpush(0x01BF); break; case 0x01F8: bufpush(0x01F9); break; case 0x01FA: bufpush(0x01FB); break; case 0x01FC: bufpush(0x01FD); break; case 0x01FE: bufpush(0x01FF); break; case 0x0200: bufpush(0x0201); break; case 0x0202: bufpush(0x0203); break; case 0x0204: bufpush(0x0205); break; case 0x0206: bufpush(0x0207); break; case 0x0208: bufpush(0x0209); break; case 0x020A: bufpush(0x020B); break; case 0x020C: bufpush(0x020D); break; case 0x020E: bufpush(0x020F); break; case 0x0210: bufpush(0x0211); break; case 0x0212: bufpush(0x0213); break; case 0x0214: bufpush(0x0215); break; case 0x0216: bufpush(0x0217); break; case 0x0218: bufpush(0x0219); break; case 0x021A: bufpush(0x021B); break; case 0x021C: bufpush(0x021D); break; case 0x021E: bufpush(0x021F); break; case 0x0220: bufpush(0x019E); break; case 0x0222: bufpush(0x0223); break; case 0x0224: bufpush(0x0225); break; case 0x0226: bufpush(0x0227); break; case 0x0228: bufpush(0x0229); break; case 0x022A: bufpush(0x022B); break; case 0x022C: bufpush(0x022D); break; case 0x022E: bufpush(0x022F); break; case 0x0230: bufpush(0x0231); break; case 0x0232: bufpush(0x0233); break; case 0x023A: bufpush(0x2C65); break; case 0x023B: bufpush(0x023C); break; case 0x023D: bufpush(0x019A); break; case 0x023E: bufpush(0x2C66); break; case 0x0241: bufpush(0x0242); break; case 0x0243: bufpush(0x0180); break; case 0x0244: bufpush(0x0289); break; case 0x0245: bufpush(0x028C); break; case 0x0246: bufpush(0x0247); break; case 0x0248: bufpush(0x0249); break; case 0x024A: bufpush(0x024B); break; case 0x024C: bufpush(0x024D); break; case 0x024E: bufpush(0x024F); break; case 0x0345: bufpush(0x03B9); break; case 0x0370: bufpush(0x0371); break; case 0x0372: bufpush(0x0373); break; case 0x0376: bufpush(0x0377); break; case 0x037F: bufpush(0x03F3); break; case 0x0386: bufpush(0x03AC); break; case 0x0388: bufpush(0x03AD); break; case 0x0389: bufpush(0x03AE); break; case 0x038A: bufpush(0x03AF); break; case 0x038C: bufpush(0x03CC); break; case 0x038E: bufpush(0x03CD); break; case 0x038F: bufpush(0x03CE); break; case 0x0390: bufpush(0x03B9); bufpush(0x0308); bufpush(0x0301); break; case 0x0391: bufpush(0x03B1); break; case 0x0392: bufpush(0x03B2); break; case 0x0393: bufpush(0x03B3); break; case 0x0394: bufpush(0x03B4); break; case 0x0395: bufpush(0x03B5); break; case 0x0396: bufpush(0x03B6); break; case 0x0397: bufpush(0x03B7); break; case 0x0398: bufpush(0x03B8); break; case 0x0399: bufpush(0x03B9); break; case 0x039A: bufpush(0x03BA); break; case 0x039B: bufpush(0x03BB); break; case 0x039C: bufpush(0x03BC); break; case 0x039D: bufpush(0x03BD); break; case 0x039E: bufpush(0x03BE); break; case 0x039F: bufpush(0x03BF); break; case 0x03A0: bufpush(0x03C0); break; case 0x03A1: bufpush(0x03C1); break; case 0x03A3: bufpush(0x03C3); break; case 0x03A4: bufpush(0x03C4); break; case 0x03A5: bufpush(0x03C5); break; case 0x03A6: bufpush(0x03C6); break; case 0x03A7: bufpush(0x03C7); break; case 0x03A8: bufpush(0x03C8); break; case 0x03A9: bufpush(0x03C9); break; case 0x03AA: bufpush(0x03CA); break; case 0x03AB: bufpush(0x03CB); break; case 0x03B0: bufpush(0x03C5); bufpush(0x0308); bufpush(0x0301); break; case 0x03C2: bufpush(0x03C3); break; case 0x03CF: bufpush(0x03D7); break; case 0x03D0: bufpush(0x03B2); break; case 0x03D1: bufpush(0x03B8); break; case 0x03D5: bufpush(0x03C6); break; case 0x03D6: bufpush(0x03C0); break; case 0x03D8: bufpush(0x03D9); break; case 0x03DA: bufpush(0x03DB); break; case 0x03DC: bufpush(0x03DD); break; case 0x03DE: bufpush(0x03DF); break; case 0x03E0: bufpush(0x03E1); break; case 0x03E2: bufpush(0x03E3); break; case 0x03E4: bufpush(0x03E5); break; case 0x03E6: bufpush(0x03E7); break; case 0x03E8: bufpush(0x03E9); break; case 0x03EA: bufpush(0x03EB); break; case 0x03EC: bufpush(0x03ED); break; case 0x03EE: bufpush(0x03EF); break; case 0x03F0: bufpush(0x03BA); break; case 0x03F1: bufpush(0x03C1); break; case 0x03F4: bufpush(0x03B8); break; case 0x03F5: bufpush(0x03B5); break; case 0x03F7: bufpush(0x03F8); break; case 0x03F9: bufpush(0x03F2); break; case 0x03FA: bufpush(0x03FB); break; case 0x03FD: bufpush(0x037B); break; case 0x03FE: bufpush(0x037C); break; case 0x03FF: bufpush(0x037D); break; case 0x0400: bufpush(0x0450); break; case 0x0401: bufpush(0x0451); break; case 0x0402: bufpush(0x0452); break; case 0x0403: bufpush(0x0453); break; case 0x0404: bufpush(0x0454); break; case 0x0405: bufpush(0x0455); break; case 0x0406: bufpush(0x0456); break; case 0x0407: bufpush(0x0457); break; case 0x0408: bufpush(0x0458); break; case 0x0409: bufpush(0x0459); break; case 0x040A: bufpush(0x045A); break; case 0x040B: bufpush(0x045B); break; case 0x040C: bufpush(0x045C); break; case 0x040D: bufpush(0x045D); break; case 0x040E: bufpush(0x045E); break; case 0x040F: bufpush(0x045F); break; case 0x0410: bufpush(0x0430); break; case 0x0411: bufpush(0x0431); break; case 0x0412: bufpush(0x0432); break; case 0x0413: bufpush(0x0433); break; case 0x0414: bufpush(0x0434); break; case 0x0415: bufpush(0x0435); break; case 0x0416: bufpush(0x0436); break; case 0x0417: bufpush(0x0437); break; case 0x0418: bufpush(0x0438); break; case 0x0419: bufpush(0x0439); break; case 0x041A: bufpush(0x043A); break; case 0x041B: bufpush(0x043B); break; case 0x041C: bufpush(0x043C); break; case 0x041D: bufpush(0x043D); break; case 0x041E: bufpush(0x043E); break; case 0x041F: bufpush(0x043F); break; case 0x0420: bufpush(0x0440); break; case 0x0421: bufpush(0x0441); break; case 0x0422: bufpush(0x0442); break; case 0x0423: bufpush(0x0443); break; case 0x0424: bufpush(0x0444); break; case 0x0425: bufpush(0x0445); break; case 0x0426: bufpush(0x0446); break; case 0x0427: bufpush(0x0447); break; case 0x0428: bufpush(0x0448); break; case 0x0429: bufpush(0x0449); break; case 0x042A: bufpush(0x044A); break; case 0x042B: bufpush(0x044B); break; case 0x042C: bufpush(0x044C); break; case 0x042D: bufpush(0x044D); break; case 0x042E: bufpush(0x044E); break; case 0x042F: bufpush(0x044F); break; case 0x0460: bufpush(0x0461); break; case 0x0462: bufpush(0x0463); break; case 0x0464: bufpush(0x0465); break; case 0x0466: bufpush(0x0467); break; case 0x0468: bufpush(0x0469); break; case 0x046A: bufpush(0x046B); break; case 0x046C: bufpush(0x046D); break; case 0x046E: bufpush(0x046F); break; case 0x0470: bufpush(0x0471); break; case 0x0472: bufpush(0x0473); break; case 0x0474: bufpush(0x0475); break; case 0x0476: bufpush(0x0477); break; case 0x0478: bufpush(0x0479); break; case 0x047A: bufpush(0x047B); break; case 0x047C: bufpush(0x047D); break; case 0x047E: bufpush(0x047F); break; case 0x0480: bufpush(0x0481); break; case 0x048A: bufpush(0x048B); break; case 0x048C: bufpush(0x048D); break; case 0x048E: bufpush(0x048F); break; case 0x0490: bufpush(0x0491); break; case 0x0492: bufpush(0x0493); break; case 0x0494: bufpush(0x0495); break; case 0x0496: bufpush(0x0497); break; case 0x0498: bufpush(0x0499); break; case 0x049A: bufpush(0x049B); break; case 0x049C: bufpush(0x049D); break; case 0x049E: bufpush(0x049F); break; case 0x04A0: bufpush(0x04A1); break; case 0x04A2: bufpush(0x04A3); break; case 0x04A4: bufpush(0x04A5); break; case 0x04A6: bufpush(0x04A7); break; case 0x04A8: bufpush(0x04A9); break; case 0x04AA: bufpush(0x04AB); break; case 0x04AC: bufpush(0x04AD); break; case 0x04AE: bufpush(0x04AF); break; case 0x04B0: bufpush(0x04B1); break; case 0x04B2: bufpush(0x04B3); break; case 0x04B4: bufpush(0x04B5); break; case 0x04B6: bufpush(0x04B7); break; case 0x04B8: bufpush(0x04B9); break; case 0x04BA: bufpush(0x04BB); break; case 0x04BC: bufpush(0x04BD); break; case 0x04BE: bufpush(0x04BF); break; case 0x04C0: bufpush(0x04CF); break; case 0x04C1: bufpush(0x04C2); break; case 0x04C3: bufpush(0x04C4); break; case 0x04C5: bufpush(0x04C6); break; case 0x04C7: bufpush(0x04C8); break; case 0x04C9: bufpush(0x04CA); break; case 0x04CB: bufpush(0x04CC); break; case 0x04CD: bufpush(0x04CE); break; case 0x04D0: bufpush(0x04D1); break; case 0x04D2: bufpush(0x04D3); break; case 0x04D4: bufpush(0x04D5); break; case 0x04D6: bufpush(0x04D7); break; case 0x04D8: bufpush(0x04D9); break; case 0x04DA: bufpush(0x04DB); break; case 0x04DC: bufpush(0x04DD); break; case 0x04DE: bufpush(0x04DF); break; case 0x04E0: bufpush(0x04E1); break; case 0x04E2: bufpush(0x04E3); break; case 0x04E4: bufpush(0x04E5); break; case 0x04E6: bufpush(0x04E7); break; case 0x04E8: bufpush(0x04E9); break; case 0x04EA: bufpush(0x04EB); break; case 0x04EC: bufpush(0x04ED); break; case 0x04EE: bufpush(0x04EF); break; case 0x04F0: bufpush(0x04F1); break; case 0x04F2: bufpush(0x04F3); break; case 0x04F4: bufpush(0x04F5); break; case 0x04F6: bufpush(0x04F7); break; case 0x04F8: bufpush(0x04F9); break; case 0x04FA: bufpush(0x04FB); break; case 0x04FC: bufpush(0x04FD); break; case 0x04FE: bufpush(0x04FF); break; case 0x0500: bufpush(0x0501); break; case 0x0502: bufpush(0x0503); break; case 0x0504: bufpush(0x0505); break; case 0x0506: bufpush(0x0507); break; case 0x0508: bufpush(0x0509); break; case 0x050A: bufpush(0x050B); break; case 0x050C: bufpush(0x050D); break; case 0x050E: bufpush(0x050F); break; case 0x0510: bufpush(0x0511); break; case 0x0512: bufpush(0x0513); break; case 0x0514: bufpush(0x0515); break; case 0x0516: bufpush(0x0517); break; case 0x0518: bufpush(0x0519); break; case 0x051A: bufpush(0x051B); break; case 0x051C: bufpush(0x051D); break; case 0x051E: bufpush(0x051F); break; case 0x0520: bufpush(0x0521); break; case 0x0522: bufpush(0x0523); break; case 0x0524: bufpush(0x0525); break; case 0x0526: bufpush(0x0527); break; case 0x0528: bufpush(0x0529); break; case 0x052A: bufpush(0x052B); break; case 0x052C: bufpush(0x052D); break; case 0x052E: bufpush(0x052F); break; case 0x0531: bufpush(0x0561); break; case 0x0532: bufpush(0x0562); break; case 0x0533: bufpush(0x0563); break; case 0x0534: bufpush(0x0564); break; case 0x0535: bufpush(0x0565); break; case 0x0536: bufpush(0x0566); break; case 0x0537: bufpush(0x0567); break; case 0x0538: bufpush(0x0568); break; case 0x0539: bufpush(0x0569); break; case 0x053A: bufpush(0x056A); break; case 0x053B: bufpush(0x056B); break; case 0x053C: bufpush(0x056C); break; case 0x053D: bufpush(0x056D); break; case 0x053E: bufpush(0x056E); break; case 0x053F: bufpush(0x056F); break; case 0x0540: bufpush(0x0570); break; case 0x0541: bufpush(0x0571); break; case 0x0542: bufpush(0x0572); break; case 0x0543: bufpush(0x0573); break; case 0x0544: bufpush(0x0574); break; case 0x0545: bufpush(0x0575); break; case 0x0546: bufpush(0x0576); break; case 0x0547: bufpush(0x0577); break; case 0x0548: bufpush(0x0578); break; case 0x0549: bufpush(0x0579); break; case 0x054A: bufpush(0x057A); break; case 0x054B: bufpush(0x057B); break; case 0x054C: bufpush(0x057C); break; case 0x054D: bufpush(0x057D); break; case 0x054E: bufpush(0x057E); break; case 0x054F: bufpush(0x057F); break; case 0x0550: bufpush(0x0580); break; case 0x0551: bufpush(0x0581); break; case 0x0552: bufpush(0x0582); break; case 0x0553: bufpush(0x0583); break; case 0x0554: bufpush(0x0584); break; case 0x0555: bufpush(0x0585); break; case 0x0556: bufpush(0x0586); break; case 0x0587: bufpush(0x0565); bufpush(0x0582); break; case 0x10A0: bufpush(0x2D00); break; case 0x10A1: bufpush(0x2D01); break; case 0x10A2: bufpush(0x2D02); break; case 0x10A3: bufpush(0x2D03); break; case 0x10A4: bufpush(0x2D04); break; case 0x10A5: bufpush(0x2D05); break; case 0x10A6: bufpush(0x2D06); break; case 0x10A7: bufpush(0x2D07); break; case 0x10A8: bufpush(0x2D08); break; case 0x10A9: bufpush(0x2D09); break; case 0x10AA: bufpush(0x2D0A); break; case 0x10AB: bufpush(0x2D0B); break; case 0x10AC: bufpush(0x2D0C); break; case 0x10AD: bufpush(0x2D0D); break; case 0x10AE: bufpush(0x2D0E); break; case 0x10AF: bufpush(0x2D0F); break; case 0x10B0: bufpush(0x2D10); break; case 0x10B1: bufpush(0x2D11); break; case 0x10B2: bufpush(0x2D12); break; case 0x10B3: bufpush(0x2D13); break; case 0x10B4: bufpush(0x2D14); break; case 0x10B5: bufpush(0x2D15); break; case 0x10B6: bufpush(0x2D16); break; case 0x10B7: bufpush(0x2D17); break; case 0x10B8: bufpush(0x2D18); break; case 0x10B9: bufpush(0x2D19); break; case 0x10BA: bufpush(0x2D1A); break; case 0x10BB: bufpush(0x2D1B); break; case 0x10BC: bufpush(0x2D1C); break; case 0x10BD: bufpush(0x2D1D); break; case 0x10BE: bufpush(0x2D1E); break; case 0x10BF: bufpush(0x2D1F); break; case 0x10C0: bufpush(0x2D20); break; case 0x10C1: bufpush(0x2D21); break; case 0x10C2: bufpush(0x2D22); break; case 0x10C3: bufpush(0x2D23); break; case 0x10C4: bufpush(0x2D24); break; case 0x10C5: bufpush(0x2D25); break; case 0x10C7: bufpush(0x2D27); break; case 0x10CD: bufpush(0x2D2D); break; case 0x13F8: bufpush(0x13F0); break; case 0x13F9: bufpush(0x13F1); break; case 0x13FA: bufpush(0x13F2); break; case 0x13FB: bufpush(0x13F3); break; case 0x13FC: bufpush(0x13F4); break; case 0x13FD: bufpush(0x13F5); break; case 0x1C80: bufpush(0x0432); break; case 0x1C81: bufpush(0x0434); break; case 0x1C82: bufpush(0x043E); break; case 0x1C83: bufpush(0x0441); break; case 0x1C84: bufpush(0x0442); break; case 0x1C85: bufpush(0x0442); break; case 0x1C86: bufpush(0x044A); break; case 0x1C87: bufpush(0x0463); break; case 0x1C88: bufpush(0xA64B); break; case 0x1E00: bufpush(0x1E01); break; case 0x1E02: bufpush(0x1E03); break; case 0x1E04: bufpush(0x1E05); break; case 0x1E06: bufpush(0x1E07); break; case 0x1E08: bufpush(0x1E09); break; case 0x1E0A: bufpush(0x1E0B); break; case 0x1E0C: bufpush(0x1E0D); break; case 0x1E0E: bufpush(0x1E0F); break; case 0x1E10: bufpush(0x1E11); break; case 0x1E12: bufpush(0x1E13); break; case 0x1E14: bufpush(0x1E15); break; case 0x1E16: bufpush(0x1E17); break; case 0x1E18: bufpush(0x1E19); break; case 0x1E1A: bufpush(0x1E1B); break; case 0x1E1C: bufpush(0x1E1D); break; case 0x1E1E: bufpush(0x1E1F); break; case 0x1E20: bufpush(0x1E21); break; case 0x1E22: bufpush(0x1E23); break; case 0x1E24: bufpush(0x1E25); break; case 0x1E26: bufpush(0x1E27); break; case 0x1E28: bufpush(0x1E29); break; case 0x1E2A: bufpush(0x1E2B); break; case 0x1E2C: bufpush(0x1E2D); break; case 0x1E2E: bufpush(0x1E2F); break; case 0x1E30: bufpush(0x1E31); break; case 0x1E32: bufpush(0x1E33); break; case 0x1E34: bufpush(0x1E35); break; case 0x1E36: bufpush(0x1E37); break; case 0x1E38: bufpush(0x1E39); break; case 0x1E3A: bufpush(0x1E3B); break; case 0x1E3C: bufpush(0x1E3D); break; case 0x1E3E: bufpush(0x1E3F); break; case 0x1E40: bufpush(0x1E41); break; case 0x1E42: bufpush(0x1E43); break; case 0x1E44: bufpush(0x1E45); break; case 0x1E46: bufpush(0x1E47); break; case 0x1E48: bufpush(0x1E49); break; case 0x1E4A: bufpush(0x1E4B); break; case 0x1E4C: bufpush(0x1E4D); break; case 0x1E4E: bufpush(0x1E4F); break; case 0x1E50: bufpush(0x1E51); break; case 0x1E52: bufpush(0x1E53); break; case 0x1E54: bufpush(0x1E55); break; case 0x1E56: bufpush(0x1E57); break; case 0x1E58: bufpush(0x1E59); break; case 0x1E5A: bufpush(0x1E5B); break; case 0x1E5C: bufpush(0x1E5D); break; case 0x1E5E: bufpush(0x1E5F); break; case 0x1E60: bufpush(0x1E61); break; case 0x1E62: bufpush(0x1E63); break; case 0x1E64: bufpush(0x1E65); break; case 0x1E66: bufpush(0x1E67); break; case 0x1E68: bufpush(0x1E69); break; case 0x1E6A: bufpush(0x1E6B); break; case 0x1E6C: bufpush(0x1E6D); break; case 0x1E6E: bufpush(0x1E6F); break; case 0x1E70: bufpush(0x1E71); break; case 0x1E72: bufpush(0x1E73); break; case 0x1E74: bufpush(0x1E75); break; case 0x1E76: bufpush(0x1E77); break; case 0x1E78: bufpush(0x1E79); break; case 0x1E7A: bufpush(0x1E7B); break; case 0x1E7C: bufpush(0x1E7D); break; case 0x1E7E: bufpush(0x1E7F); break; case 0x1E80: bufpush(0x1E81); break; case 0x1E82: bufpush(0x1E83); break; case 0x1E84: bufpush(0x1E85); break; case 0x1E86: bufpush(0x1E87); break; case 0x1E88: bufpush(0x1E89); break; case 0x1E8A: bufpush(0x1E8B); break; case 0x1E8C: bufpush(0x1E8D); break; case 0x1E8E: bufpush(0x1E8F); break; case 0x1E90: bufpush(0x1E91); break; case 0x1E92: bufpush(0x1E93); break; case 0x1E94: bufpush(0x1E95); break; case 0x1E96: bufpush(0x0068); bufpush(0x0331); break; case 0x1E97: bufpush(0x0074); bufpush(0x0308); break; case 0x1E98: bufpush(0x0077); bufpush(0x030A); break; case 0x1E99: bufpush(0x0079); bufpush(0x030A); break; case 0x1E9A: bufpush(0x0061); bufpush(0x02BE); break; case 0x1E9B: bufpush(0x1E61); break; case 0x1E9E: bufpush(0x0073); bufpush(0x0073); break; case 0x1EA0: bufpush(0x1EA1); break; case 0x1EA2: bufpush(0x1EA3); break; case 0x1EA4: bufpush(0x1EA5); break; case 0x1EA6: bufpush(0x1EA7); break; case 0x1EA8: bufpush(0x1EA9); break; case 0x1EAA: bufpush(0x1EAB); break; case 0x1EAC: bufpush(0x1EAD); break; case 0x1EAE: bufpush(0x1EAF); break; case 0x1EB0: bufpush(0x1EB1); break; case 0x1EB2: bufpush(0x1EB3); break; case 0x1EB4: bufpush(0x1EB5); break; case 0x1EB6: bufpush(0x1EB7); break; case 0x1EB8: bufpush(0x1EB9); break; case 0x1EBA: bufpush(0x1EBB); break; case 0x1EBC: bufpush(0x1EBD); break; case 0x1EBE: bufpush(0x1EBF); break; case 0x1EC0: bufpush(0x1EC1); break; case 0x1EC2: bufpush(0x1EC3); break; case 0x1EC4: bufpush(0x1EC5); break; case 0x1EC6: bufpush(0x1EC7); break; case 0x1EC8: bufpush(0x1EC9); break; case 0x1ECA: bufpush(0x1ECB); break; case 0x1ECC: bufpush(0x1ECD); break; case 0x1ECE: bufpush(0x1ECF); break; case 0x1ED0: bufpush(0x1ED1); break; case 0x1ED2: bufpush(0x1ED3); break; case 0x1ED4: bufpush(0x1ED5); break; case 0x1ED6: bufpush(0x1ED7); break; case 0x1ED8: bufpush(0x1ED9); break; case 0x1EDA: bufpush(0x1EDB); break; case 0x1EDC: bufpush(0x1EDD); break; case 0x1EDE: bufpush(0x1EDF); break; case 0x1EE0: bufpush(0x1EE1); break; case 0x1EE2: bufpush(0x1EE3); break; case 0x1EE4: bufpush(0x1EE5); break; case 0x1EE6: bufpush(0x1EE7); break; case 0x1EE8: bufpush(0x1EE9); break; case 0x1EEA: bufpush(0x1EEB); break; case 0x1EEC: bufpush(0x1EED); break; case 0x1EEE: bufpush(0x1EEF); break; case 0x1EF0: bufpush(0x1EF1); break; case 0x1EF2: bufpush(0x1EF3); break; case 0x1EF4: bufpush(0x1EF5); break; case 0x1EF6: bufpush(0x1EF7); break; case 0x1EF8: bufpush(0x1EF9); break; case 0x1EFA: bufpush(0x1EFB); break; case 0x1EFC: bufpush(0x1EFD); break; case 0x1EFE: bufpush(0x1EFF); break; case 0x1F08: bufpush(0x1F00); break; case 0x1F09: bufpush(0x1F01); break; case 0x1F0A: bufpush(0x1F02); break; case 0x1F0B: bufpush(0x1F03); break; case 0x1F0C: bufpush(0x1F04); break; case 0x1F0D: bufpush(0x1F05); break; case 0x1F0E: bufpush(0x1F06); break; case 0x1F0F: bufpush(0x1F07); break; case 0x1F18: bufpush(0x1F10); break; case 0x1F19: bufpush(0x1F11); break; case 0x1F1A: bufpush(0x1F12); break; case 0x1F1B: bufpush(0x1F13); break; case 0x1F1C: bufpush(0x1F14); break; case 0x1F1D: bufpush(0x1F15); break; case 0x1F28: bufpush(0x1F20); break; case 0x1F29: bufpush(0x1F21); break; case 0x1F2A: bufpush(0x1F22); break; case 0x1F2B: bufpush(0x1F23); break; case 0x1F2C: bufpush(0x1F24); break; case 0x1F2D: bufpush(0x1F25); break; case 0x1F2E: bufpush(0x1F26); break; case 0x1F2F: bufpush(0x1F27); break; case 0x1F38: bufpush(0x1F30); break; case 0x1F39: bufpush(0x1F31); break; case 0x1F3A: bufpush(0x1F32); break; case 0x1F3B: bufpush(0x1F33); break; case 0x1F3C: bufpush(0x1F34); break; case 0x1F3D: bufpush(0x1F35); break; case 0x1F3E: bufpush(0x1F36); break; case 0x1F3F: bufpush(0x1F37); break; case 0x1F48: bufpush(0x1F40); break; case 0x1F49: bufpush(0x1F41); break; case 0x1F4A: bufpush(0x1F42); break; case 0x1F4B: bufpush(0x1F43); break; case 0x1F4C: bufpush(0x1F44); break; case 0x1F4D: bufpush(0x1F45); break; case 0x1F50: bufpush(0x03C5); bufpush(0x0313); break; case 0x1F52: bufpush(0x03C5); bufpush(0x0313); bufpush(0x0300); break; case 0x1F54: bufpush(0x03C5); bufpush(0x0313); bufpush(0x0301); break; case 0x1F56: bufpush(0x03C5); bufpush(0x0313); bufpush(0x0342); break; case 0x1F59: bufpush(0x1F51); break; case 0x1F5B: bufpush(0x1F53); break; case 0x1F5D: bufpush(0x1F55); break; case 0x1F5F: bufpush(0x1F57); break; case 0x1F68: bufpush(0x1F60); break; case 0x1F69: bufpush(0x1F61); break; case 0x1F6A: bufpush(0x1F62); break; case 0x1F6B: bufpush(0x1F63); break; case 0x1F6C: bufpush(0x1F64); break; case 0x1F6D: bufpush(0x1F65); break; case 0x1F6E: bufpush(0x1F66); break; case 0x1F6F: bufpush(0x1F67); break; case 0x1F80: bufpush(0x1F00); bufpush(0x03B9); break; case 0x1F81: bufpush(0x1F01); bufpush(0x03B9); break; case 0x1F82: bufpush(0x1F02); bufpush(0x03B9); break; case 0x1F83: bufpush(0x1F03); bufpush(0x03B9); break; case 0x1F84: bufpush(0x1F04); bufpush(0x03B9); break; case 0x1F85: bufpush(0x1F05); bufpush(0x03B9); break; case 0x1F86: bufpush(0x1F06); bufpush(0x03B9); break; case 0x1F87: bufpush(0x1F07); bufpush(0x03B9); break; case 0x1F88: bufpush(0x1F00); bufpush(0x03B9); break; case 0x1F89: bufpush(0x1F01); bufpush(0x03B9); break; case 0x1F8A: bufpush(0x1F02); bufpush(0x03B9); break; case 0x1F8B: bufpush(0x1F03); bufpush(0x03B9); break; case 0x1F8C: bufpush(0x1F04); bufpush(0x03B9); break; case 0x1F8D: bufpush(0x1F05); bufpush(0x03B9); break; case 0x1F8E: bufpush(0x1F06); bufpush(0x03B9); break; case 0x1F8F: bufpush(0x1F07); bufpush(0x03B9); break; case 0x1F90: bufpush(0x1F20); bufpush(0x03B9); break; case 0x1F91: bufpush(0x1F21); bufpush(0x03B9); break; case 0x1F92: bufpush(0x1F22); bufpush(0x03B9); break; case 0x1F93: bufpush(0x1F23); bufpush(0x03B9); break; case 0x1F94: bufpush(0x1F24); bufpush(0x03B9); break; case 0x1F95: bufpush(0x1F25); bufpush(0x03B9); break; case 0x1F96: bufpush(0x1F26); bufpush(0x03B9); break; case 0x1F97: bufpush(0x1F27); bufpush(0x03B9); break; case 0x1F98: bufpush(0x1F20); bufpush(0x03B9); break; case 0x1F99: bufpush(0x1F21); bufpush(0x03B9); break; case 0x1F9A: bufpush(0x1F22); bufpush(0x03B9); break; case 0x1F9B: bufpush(0x1F23); bufpush(0x03B9); break; case 0x1F9C: bufpush(0x1F24); bufpush(0x03B9); break; case 0x1F9D: bufpush(0x1F25); bufpush(0x03B9); break; case 0x1F9E: bufpush(0x1F26); bufpush(0x03B9); break; case 0x1F9F: bufpush(0x1F27); bufpush(0x03B9); break; case 0x1FA0: bufpush(0x1F60); bufpush(0x03B9); break; case 0x1FA1: bufpush(0x1F61); bufpush(0x03B9); break; case 0x1FA2: bufpush(0x1F62); bufpush(0x03B9); break; case 0x1FA3: bufpush(0x1F63); bufpush(0x03B9); break; case 0x1FA4: bufpush(0x1F64); bufpush(0x03B9); break; case 0x1FA5: bufpush(0x1F65); bufpush(0x03B9); break; case 0x1FA6: bufpush(0x1F66); bufpush(0x03B9); break; case 0x1FA7: bufpush(0x1F67); bufpush(0x03B9); break; case 0x1FA8: bufpush(0x1F60); bufpush(0x03B9); break; case 0x1FA9: bufpush(0x1F61); bufpush(0x03B9); break; case 0x1FAA: bufpush(0x1F62); bufpush(0x03B9); break; case 0x1FAB: bufpush(0x1F63); bufpush(0x03B9); break; case 0x1FAC: bufpush(0x1F64); bufpush(0x03B9); break; case 0x1FAD: bufpush(0x1F65); bufpush(0x03B9); break; case 0x1FAE: bufpush(0x1F66); bufpush(0x03B9); break; case 0x1FAF: bufpush(0x1F67); bufpush(0x03B9); break; case 0x1FB2: bufpush(0x1F70); bufpush(0x03B9); break; case 0x1FB3: bufpush(0x03B1); bufpush(0x03B9); break; case 0x1FB4: bufpush(0x03AC); bufpush(0x03B9); break; case 0x1FB6: bufpush(0x03B1); bufpush(0x0342); break; case 0x1FB7: bufpush(0x03B1); bufpush(0x0342); bufpush(0x03B9); break; case 0x1FB8: bufpush(0x1FB0); break; case 0x1FB9: bufpush(0x1FB1); break; case 0x1FBA: bufpush(0x1F70); break; case 0x1FBB: bufpush(0x1F71); break; case 0x1FBC: bufpush(0x03B1); bufpush(0x03B9); break; case 0x1FBE: bufpush(0x03B9); break; case 0x1FC2: bufpush(0x1F74); bufpush(0x03B9); break; case 0x1FC3: bufpush(0x03B7); bufpush(0x03B9); break; case 0x1FC4: bufpush(0x03AE); bufpush(0x03B9); break; case 0x1FC6: bufpush(0x03B7); bufpush(0x0342); break; case 0x1FC7: bufpush(0x03B7); bufpush(0x0342); bufpush(0x03B9); break; case 0x1FC8: bufpush(0x1F72); break; case 0x1FC9: bufpush(0x1F73); break; case 0x1FCA: bufpush(0x1F74); break; case 0x1FCB: bufpush(0x1F75); break; case 0x1FCC: bufpush(0x03B7); bufpush(0x03B9); break; case 0x1FD2: bufpush(0x03B9); bufpush(0x0308); bufpush(0x0300); break; case 0x1FD3: bufpush(0x03B9); bufpush(0x0308); bufpush(0x0301); break; case 0x1FD6: bufpush(0x03B9); bufpush(0x0342); break; case 0x1FD7: bufpush(0x03B9); bufpush(0x0308); bufpush(0x0342); break; case 0x1FD8: bufpush(0x1FD0); break; case 0x1FD9: bufpush(0x1FD1); break; case 0x1FDA: bufpush(0x1F76); break; case 0x1FDB: bufpush(0x1F77); break; case 0x1FE2: bufpush(0x03C5); bufpush(0x0308); bufpush(0x0300); break; case 0x1FE3: bufpush(0x03C5); bufpush(0x0308); bufpush(0x0301); break; case 0x1FE4: bufpush(0x03C1); bufpush(0x0313); break; case 0x1FE6: bufpush(0x03C5); bufpush(0x0342); break; case 0x1FE7: bufpush(0x03C5); bufpush(0x0308); bufpush(0x0342); break; case 0x1FE8: bufpush(0x1FE0); break; case 0x1FE9: bufpush(0x1FE1); break; case 0x1FEA: bufpush(0x1F7A); break; case 0x1FEB: bufpush(0x1F7B); break; case 0x1FEC: bufpush(0x1FE5); break; case 0x1FF2: bufpush(0x1F7C); bufpush(0x03B9); break; case 0x1FF3: bufpush(0x03C9); bufpush(0x03B9); break; case 0x1FF4: bufpush(0x03CE); bufpush(0x03B9); break; case 0x1FF6: bufpush(0x03C9); bufpush(0x0342); break; case 0x1FF7: bufpush(0x03C9); bufpush(0x0342); bufpush(0x03B9); break; case 0x1FF8: bufpush(0x1F78); break; case 0x1FF9: bufpush(0x1F79); break; case 0x1FFA: bufpush(0x1F7C); break; case 0x1FFB: bufpush(0x1F7D); break; case 0x1FFC: bufpush(0x03C9); bufpush(0x03B9); break; case 0x2126: bufpush(0x03C9); break; case 0x212A: bufpush(0x006B); break; case 0x212B: bufpush(0x00E5); break; case 0x2132: bufpush(0x214E); break; case 0x2160: bufpush(0x2170); break; case 0x2161: bufpush(0x2171); break; case 0x2162: bufpush(0x2172); break; case 0x2163: bufpush(0x2173); break; case 0x2164: bufpush(0x2174); break; case 0x2165: bufpush(0x2175); break; case 0x2166: bufpush(0x2176); break; case 0x2167: bufpush(0x2177); break; case 0x2168: bufpush(0x2178); break; case 0x2169: bufpush(0x2179); break; case 0x216A: bufpush(0x217A); break; case 0x216B: bufpush(0x217B); break; case 0x216C: bufpush(0x217C); break; case 0x216D: bufpush(0x217D); break; case 0x216E: bufpush(0x217E); break; case 0x216F: bufpush(0x217F); break; case 0x2183: bufpush(0x2184); break; case 0x24B6: bufpush(0x24D0); break; case 0x24B7: bufpush(0x24D1); break; case 0x24B8: bufpush(0x24D2); break; case 0x24B9: bufpush(0x24D3); break; case 0x24BA: bufpush(0x24D4); break; case 0x24BB: bufpush(0x24D5); break; case 0x24BC: bufpush(0x24D6); break; case 0x24BD: bufpush(0x24D7); break; case 0x24BE: bufpush(0x24D8); break; case 0x24BF: bufpush(0x24D9); break; case 0x24C0: bufpush(0x24DA); break; case 0x24C1: bufpush(0x24DB); break; case 0x24C2: bufpush(0x24DC); break; case 0x24C3: bufpush(0x24DD); break; case 0x24C4: bufpush(0x24DE); break; case 0x24C5: bufpush(0x24DF); break; case 0x24C6: bufpush(0x24E0); break; case 0x24C7: bufpush(0x24E1); break; case 0x24C8: bufpush(0x24E2); break; case 0x24C9: bufpush(0x24E3); break; case 0x24CA: bufpush(0x24E4); break; case 0x24CB: bufpush(0x24E5); break; case 0x24CC: bufpush(0x24E6); break; case 0x24CD: bufpush(0x24E7); break; case 0x24CE: bufpush(0x24E8); break; case 0x24CF: bufpush(0x24E9); break; case 0x2C00: bufpush(0x2C30); break; case 0x2C01: bufpush(0x2C31); break; case 0x2C02: bufpush(0x2C32); break; case 0x2C03: bufpush(0x2C33); break; case 0x2C04: bufpush(0x2C34); break; case 0x2C05: bufpush(0x2C35); break; case 0x2C06: bufpush(0x2C36); break; case 0x2C07: bufpush(0x2C37); break; case 0x2C08: bufpush(0x2C38); break; case 0x2C09: bufpush(0x2C39); break; case 0x2C0A: bufpush(0x2C3A); break; case 0x2C0B: bufpush(0x2C3B); break; case 0x2C0C: bufpush(0x2C3C); break; case 0x2C0D: bufpush(0x2C3D); break; case 0x2C0E: bufpush(0x2C3E); break; case 0x2C0F: bufpush(0x2C3F); break; case 0x2C10: bufpush(0x2C40); break; case 0x2C11: bufpush(0x2C41); break; case 0x2C12: bufpush(0x2C42); break; case 0x2C13: bufpush(0x2C43); break; case 0x2C14: bufpush(0x2C44); break; case 0x2C15: bufpush(0x2C45); break; case 0x2C16: bufpush(0x2C46); break; case 0x2C17: bufpush(0x2C47); break; case 0x2C18: bufpush(0x2C48); break; case 0x2C19: bufpush(0x2C49); break; case 0x2C1A: bufpush(0x2C4A); break; case 0x2C1B: bufpush(0x2C4B); break; case 0x2C1C: bufpush(0x2C4C); break; case 0x2C1D: bufpush(0x2C4D); break; case 0x2C1E: bufpush(0x2C4E); break; case 0x2C1F: bufpush(0x2C4F); break; case 0x2C20: bufpush(0x2C50); break; case 0x2C21: bufpush(0x2C51); break; case 0x2C22: bufpush(0x2C52); break; case 0x2C23: bufpush(0x2C53); break; case 0x2C24: bufpush(0x2C54); break; case 0x2C25: bufpush(0x2C55); break; case 0x2C26: bufpush(0x2C56); break; case 0x2C27: bufpush(0x2C57); break; case 0x2C28: bufpush(0x2C58); break; case 0x2C29: bufpush(0x2C59); break; case 0x2C2A: bufpush(0x2C5A); break; case 0x2C2B: bufpush(0x2C5B); break; case 0x2C2C: bufpush(0x2C5C); break; case 0x2C2D: bufpush(0x2C5D); break; case 0x2C2E: bufpush(0x2C5E); break; case 0x2C60: bufpush(0x2C61); break; case 0x2C62: bufpush(0x026B); break; case 0x2C63: bufpush(0x1D7D); break; case 0x2C64: bufpush(0x027D); break; case 0x2C67: bufpush(0x2C68); break; case 0x2C69: bufpush(0x2C6A); break; case 0x2C6B: bufpush(0x2C6C); break; case 0x2C6D: bufpush(0x0251); break; case 0x2C6E: bufpush(0x0271); break; case 0x2C6F: bufpush(0x0250); break; case 0x2C70: bufpush(0x0252); break; case 0x2C72: bufpush(0x2C73); break; case 0x2C75: bufpush(0x2C76); break; case 0x2C7E: bufpush(0x023F); break; case 0x2C7F: bufpush(0x0240); break; case 0x2C80: bufpush(0x2C81); break; case 0x2C82: bufpush(0x2C83); break; case 0x2C84: bufpush(0x2C85); break; case 0x2C86: bufpush(0x2C87); break; case 0x2C88: bufpush(0x2C89); break; case 0x2C8A: bufpush(0x2C8B); break; case 0x2C8C: bufpush(0x2C8D); break; case 0x2C8E: bufpush(0x2C8F); break; case 0x2C90: bufpush(0x2C91); break; case 0x2C92: bufpush(0x2C93); break; case 0x2C94: bufpush(0x2C95); break; case 0x2C96: bufpush(0x2C97); break; case 0x2C98: bufpush(0x2C99); break; case 0x2C9A: bufpush(0x2C9B); break; case 0x2C9C: bufpush(0x2C9D); break; case 0x2C9E: bufpush(0x2C9F); break; case 0x2CA0: bufpush(0x2CA1); break; case 0x2CA2: bufpush(0x2CA3); break; case 0x2CA4: bufpush(0x2CA5); break; case 0x2CA6: bufpush(0x2CA7); break; case 0x2CA8: bufpush(0x2CA9); break; case 0x2CAA: bufpush(0x2CAB); break; case 0x2CAC: bufpush(0x2CAD); break; case 0x2CAE: bufpush(0x2CAF); break; case 0x2CB0: bufpush(0x2CB1); break; case 0x2CB2: bufpush(0x2CB3); break; case 0x2CB4: bufpush(0x2CB5); break; case 0x2CB6: bufpush(0x2CB7); break; case 0x2CB8: bufpush(0x2CB9); break; case 0x2CBA: bufpush(0x2CBB); break; case 0x2CBC: bufpush(0x2CBD); break; case 0x2CBE: bufpush(0x2CBF); break; case 0x2CC0: bufpush(0x2CC1); break; case 0x2CC2: bufpush(0x2CC3); break; case 0x2CC4: bufpush(0x2CC5); break; case 0x2CC6: bufpush(0x2CC7); break; case 0x2CC8: bufpush(0x2CC9); break; case 0x2CCA: bufpush(0x2CCB); break; case 0x2CCC: bufpush(0x2CCD); break; case 0x2CCE: bufpush(0x2CCF); break; case 0x2CD0: bufpush(0x2CD1); break; case 0x2CD2: bufpush(0x2CD3); break; case 0x2CD4: bufpush(0x2CD5); break; case 0x2CD6: bufpush(0x2CD7); break; case 0x2CD8: bufpush(0x2CD9); break; case 0x2CDA: bufpush(0x2CDB); break; case 0x2CDC: bufpush(0x2CDD); break; case 0x2CDE: bufpush(0x2CDF); break; case 0x2CE0: bufpush(0x2CE1); break; case 0x2CE2: bufpush(0x2CE3); break; case 0x2CEB: bufpush(0x2CEC); break; case 0x2CED: bufpush(0x2CEE); break; case 0x2CF2: bufpush(0x2CF3); break; case 0xA640: bufpush(0xA641); break; case 0xA642: bufpush(0xA643); break; case 0xA644: bufpush(0xA645); break; case 0xA646: bufpush(0xA647); break; case 0xA648: bufpush(0xA649); break; case 0xA64A: bufpush(0xA64B); break; case 0xA64C: bufpush(0xA64D); break; case 0xA64E: bufpush(0xA64F); break; case 0xA650: bufpush(0xA651); break; case 0xA652: bufpush(0xA653); break; case 0xA654: bufpush(0xA655); break; case 0xA656: bufpush(0xA657); break; case 0xA658: bufpush(0xA659); break; case 0xA65A: bufpush(0xA65B); break; case 0xA65C: bufpush(0xA65D); break; case 0xA65E: bufpush(0xA65F); break; case 0xA660: bufpush(0xA661); break; case 0xA662: bufpush(0xA663); break; case 0xA664: bufpush(0xA665); break; case 0xA666: bufpush(0xA667); break; case 0xA668: bufpush(0xA669); break; case 0xA66A: bufpush(0xA66B); break; case 0xA66C: bufpush(0xA66D); break; case 0xA680: bufpush(0xA681); break; case 0xA682: bufpush(0xA683); break; case 0xA684: bufpush(0xA685); break; case 0xA686: bufpush(0xA687); break; case 0xA688: bufpush(0xA689); break; case 0xA68A: bufpush(0xA68B); break; case 0xA68C: bufpush(0xA68D); break; case 0xA68E: bufpush(0xA68F); break; case 0xA690: bufpush(0xA691); break; case 0xA692: bufpush(0xA693); break; case 0xA694: bufpush(0xA695); break; case 0xA696: bufpush(0xA697); break; case 0xA698: bufpush(0xA699); break; case 0xA69A: bufpush(0xA69B); break; case 0xA722: bufpush(0xA723); break; case 0xA724: bufpush(0xA725); break; case 0xA726: bufpush(0xA727); break; case 0xA728: bufpush(0xA729); break; case 0xA72A: bufpush(0xA72B); break; case 0xA72C: bufpush(0xA72D); break; case 0xA72E: bufpush(0xA72F); break; case 0xA732: bufpush(0xA733); break; case 0xA734: bufpush(0xA735); break; case 0xA736: bufpush(0xA737); break; case 0xA738: bufpush(0xA739); break; case 0xA73A: bufpush(0xA73B); break; case 0xA73C: bufpush(0xA73D); break; case 0xA73E: bufpush(0xA73F); break; case 0xA740: bufpush(0xA741); break; case 0xA742: bufpush(0xA743); break; case 0xA744: bufpush(0xA745); break; case 0xA746: bufpush(0xA747); break; case 0xA748: bufpush(0xA749); break; case 0xA74A: bufpush(0xA74B); break; case 0xA74C: bufpush(0xA74D); break; case 0xA74E: bufpush(0xA74F); break; case 0xA750: bufpush(0xA751); break; case 0xA752: bufpush(0xA753); break; case 0xA754: bufpush(0xA755); break; case 0xA756: bufpush(0xA757); break; case 0xA758: bufpush(0xA759); break; case 0xA75A: bufpush(0xA75B); break; case 0xA75C: bufpush(0xA75D); break; case 0xA75E: bufpush(0xA75F); break; case 0xA760: bufpush(0xA761); break; case 0xA762: bufpush(0xA763); break; case 0xA764: bufpush(0xA765); break; case 0xA766: bufpush(0xA767); break; case 0xA768: bufpush(0xA769); break; case 0xA76A: bufpush(0xA76B); break; case 0xA76C: bufpush(0xA76D); break; case 0xA76E: bufpush(0xA76F); break; case 0xA779: bufpush(0xA77A); break; case 0xA77B: bufpush(0xA77C); break; case 0xA77D: bufpush(0x1D79); break; case 0xA77E: bufpush(0xA77F); break; case 0xA780: bufpush(0xA781); break; case 0xA782: bufpush(0xA783); break; case 0xA784: bufpush(0xA785); break; case 0xA786: bufpush(0xA787); break; case 0xA78B: bufpush(0xA78C); break; case 0xA78D: bufpush(0x0265); break; case 0xA790: bufpush(0xA791); break; case 0xA792: bufpush(0xA793); break; case 0xA796: bufpush(0xA797); break; case 0xA798: bufpush(0xA799); break; case 0xA79A: bufpush(0xA79B); break; case 0xA79C: bufpush(0xA79D); break; case 0xA79E: bufpush(0xA79F); break; case 0xA7A0: bufpush(0xA7A1); break; case 0xA7A2: bufpush(0xA7A3); break; case 0xA7A4: bufpush(0xA7A5); break; case 0xA7A6: bufpush(0xA7A7); break; case 0xA7A8: bufpush(0xA7A9); break; case 0xA7AA: bufpush(0x0266); break; case 0xA7AB: bufpush(0x025C); break; case 0xA7AC: bufpush(0x0261); break; case 0xA7AD: bufpush(0x026C); break; case 0xA7AE: bufpush(0x026A); break; case 0xA7B0: bufpush(0x029E); break; case 0xA7B1: bufpush(0x0287); break; case 0xA7B2: bufpush(0x029D); break; case 0xA7B3: bufpush(0xAB53); break; case 0xA7B4: bufpush(0xA7B5); break; case 0xA7B6: bufpush(0xA7B7); break; case 0xAB70: bufpush(0x13A0); break; case 0xAB71: bufpush(0x13A1); break; case 0xAB72: bufpush(0x13A2); break; case 0xAB73: bufpush(0x13A3); break; case 0xAB74: bufpush(0x13A4); break; case 0xAB75: bufpush(0x13A5); break; case 0xAB76: bufpush(0x13A6); break; case 0xAB77: bufpush(0x13A7); break; case 0xAB78: bufpush(0x13A8); break; case 0xAB79: bufpush(0x13A9); break; case 0xAB7A: bufpush(0x13AA); break; case 0xAB7B: bufpush(0x13AB); break; case 0xAB7C: bufpush(0x13AC); break; case 0xAB7D: bufpush(0x13AD); break; case 0xAB7E: bufpush(0x13AE); break; case 0xAB7F: bufpush(0x13AF); break; case 0xAB80: bufpush(0x13B0); break; case 0xAB81: bufpush(0x13B1); break; case 0xAB82: bufpush(0x13B2); break; case 0xAB83: bufpush(0x13B3); break; case 0xAB84: bufpush(0x13B4); break; case 0xAB85: bufpush(0x13B5); break; case 0xAB86: bufpush(0x13B6); break; case 0xAB87: bufpush(0x13B7); break; case 0xAB88: bufpush(0x13B8); break; case 0xAB89: bufpush(0x13B9); break; case 0xAB8A: bufpush(0x13BA); break; case 0xAB8B: bufpush(0x13BB); break; case 0xAB8C: bufpush(0x13BC); break; case 0xAB8D: bufpush(0x13BD); break; case 0xAB8E: bufpush(0x13BE); break; case 0xAB8F: bufpush(0x13BF); break; case 0xAB90: bufpush(0x13C0); break; case 0xAB91: bufpush(0x13C1); break; case 0xAB92: bufpush(0x13C2); break; case 0xAB93: bufpush(0x13C3); break; case 0xAB94: bufpush(0x13C4); break; case 0xAB95: bufpush(0x13C5); break; case 0xAB96: bufpush(0x13C6); break; case 0xAB97: bufpush(0x13C7); break; case 0xAB98: bufpush(0x13C8); break; case 0xAB99: bufpush(0x13C9); break; case 0xAB9A: bufpush(0x13CA); break; case 0xAB9B: bufpush(0x13CB); break; case 0xAB9C: bufpush(0x13CC); break; case 0xAB9D: bufpush(0x13CD); break; case 0xAB9E: bufpush(0x13CE); break; case 0xAB9F: bufpush(0x13CF); break; case 0xABA0: bufpush(0x13D0); break; case 0xABA1: bufpush(0x13D1); break; case 0xABA2: bufpush(0x13D2); break; case 0xABA3: bufpush(0x13D3); break; case 0xABA4: bufpush(0x13D4); break; case 0xABA5: bufpush(0x13D5); break; case 0xABA6: bufpush(0x13D6); break; case 0xABA7: bufpush(0x13D7); break; case 0xABA8: bufpush(0x13D8); break; case 0xABA9: bufpush(0x13D9); break; case 0xABAA: bufpush(0x13DA); break; case 0xABAB: bufpush(0x13DB); break; case 0xABAC: bufpush(0x13DC); break; case 0xABAD: bufpush(0x13DD); break; case 0xABAE: bufpush(0x13DE); break; case 0xABAF: bufpush(0x13DF); break; case 0xABB0: bufpush(0x13E0); break; case 0xABB1: bufpush(0x13E1); break; case 0xABB2: bufpush(0x13E2); break; case 0xABB3: bufpush(0x13E3); break; case 0xABB4: bufpush(0x13E4); break; case 0xABB5: bufpush(0x13E5); break; case 0xABB6: bufpush(0x13E6); break; case 0xABB7: bufpush(0x13E7); break; case 0xABB8: bufpush(0x13E8); break; case 0xABB9: bufpush(0x13E9); break; case 0xABBA: bufpush(0x13EA); break; case 0xABBB: bufpush(0x13EB); break; case 0xABBC: bufpush(0x13EC); break; case 0xABBD: bufpush(0x13ED); break; case 0xABBE: bufpush(0x13EE); break; case 0xABBF: bufpush(0x13EF); break; case 0xFB00: bufpush(0x0066); bufpush(0x0066); break; case 0xFB01: bufpush(0x0066); bufpush(0x0069); break; case 0xFB02: bufpush(0x0066); bufpush(0x006C); break; case 0xFB03: bufpush(0x0066); bufpush(0x0066); bufpush(0x0069); break; case 0xFB04: bufpush(0x0066); bufpush(0x0066); bufpush(0x006C); break; case 0xFB05: bufpush(0x0073); bufpush(0x0074); break; case 0xFB06: bufpush(0x0073); bufpush(0x0074); break; case 0xFB13: bufpush(0x0574); bufpush(0x0576); break; case 0xFB14: bufpush(0x0574); bufpush(0x0565); break; case 0xFB15: bufpush(0x0574); bufpush(0x056B); break; case 0xFB16: bufpush(0x057E); bufpush(0x0576); break; case 0xFB17: bufpush(0x0574); bufpush(0x056D); break; case 0xFF21: bufpush(0xFF41); break; case 0xFF22: bufpush(0xFF42); break; case 0xFF23: bufpush(0xFF43); break; case 0xFF24: bufpush(0xFF44); break; case 0xFF25: bufpush(0xFF45); break; case 0xFF26: bufpush(0xFF46); break; case 0xFF27: bufpush(0xFF47); break; case 0xFF28: bufpush(0xFF48); break; case 0xFF29: bufpush(0xFF49); break; case 0xFF2A: bufpush(0xFF4A); break; case 0xFF2B: bufpush(0xFF4B); break; case 0xFF2C: bufpush(0xFF4C); break; case 0xFF2D: bufpush(0xFF4D); break; case 0xFF2E: bufpush(0xFF4E); break; case 0xFF2F: bufpush(0xFF4F); break; case 0xFF30: bufpush(0xFF50); break; case 0xFF31: bufpush(0xFF51); break; case 0xFF32: bufpush(0xFF52); break; case 0xFF33: bufpush(0xFF53); break; case 0xFF34: bufpush(0xFF54); break; case 0xFF35: bufpush(0xFF55); break; case 0xFF36: bufpush(0xFF56); break; case 0xFF37: bufpush(0xFF57); break; case 0xFF38: bufpush(0xFF58); break; case 0xFF39: bufpush(0xFF59); break; case 0xFF3A: bufpush(0xFF5A); break; case 0x10400: bufpush(0x10428); break; case 0x10401: bufpush(0x10429); break; case 0x10402: bufpush(0x1042A); break; case 0x10403: bufpush(0x1042B); break; case 0x10404: bufpush(0x1042C); break; case 0x10405: bufpush(0x1042D); break; case 0x10406: bufpush(0x1042E); break; case 0x10407: bufpush(0x1042F); break; case 0x10408: bufpush(0x10430); break; case 0x10409: bufpush(0x10431); break; case 0x1040A: bufpush(0x10432); break; case 0x1040B: bufpush(0x10433); break; case 0x1040C: bufpush(0x10434); break; case 0x1040D: bufpush(0x10435); break; case 0x1040E: bufpush(0x10436); break; case 0x1040F: bufpush(0x10437); break; case 0x10410: bufpush(0x10438); break; case 0x10411: bufpush(0x10439); break; case 0x10412: bufpush(0x1043A); break; case 0x10413: bufpush(0x1043B); break; case 0x10414: bufpush(0x1043C); break; case 0x10415: bufpush(0x1043D); break; case 0x10416: bufpush(0x1043E); break; case 0x10417: bufpush(0x1043F); break; case 0x10418: bufpush(0x10440); break; case 0x10419: bufpush(0x10441); break; case 0x1041A: bufpush(0x10442); break; case 0x1041B: bufpush(0x10443); break; case 0x1041C: bufpush(0x10444); break; case 0x1041D: bufpush(0x10445); break; case 0x1041E: bufpush(0x10446); break; case 0x1041F: bufpush(0x10447); break; case 0x10420: bufpush(0x10448); break; case 0x10421: bufpush(0x10449); break; case 0x10422: bufpush(0x1044A); break; case 0x10423: bufpush(0x1044B); break; case 0x10424: bufpush(0x1044C); break; case 0x10425: bufpush(0x1044D); break; case 0x10426: bufpush(0x1044E); break; case 0x10427: bufpush(0x1044F); break; case 0x104B0: bufpush(0x104D8); break; case 0x104B1: bufpush(0x104D9); break; case 0x104B2: bufpush(0x104DA); break; case 0x104B3: bufpush(0x104DB); break; case 0x104B4: bufpush(0x104DC); break; case 0x104B5: bufpush(0x104DD); break; case 0x104B6: bufpush(0x104DE); break; case 0x104B7: bufpush(0x104DF); break; case 0x104B8: bufpush(0x104E0); break; case 0x104B9: bufpush(0x104E1); break; case 0x104BA: bufpush(0x104E2); break; case 0x104BB: bufpush(0x104E3); break; case 0x104BC: bufpush(0x104E4); break; case 0x104BD: bufpush(0x104E5); break; case 0x104BE: bufpush(0x104E6); break; case 0x104BF: bufpush(0x104E7); break; case 0x104C0: bufpush(0x104E8); break; case 0x104C1: bufpush(0x104E9); break; case 0x104C2: bufpush(0x104EA); break; case 0x104C3: bufpush(0x104EB); break; case 0x104C4: bufpush(0x104EC); break; case 0x104C5: bufpush(0x104ED); break; case 0x104C6: bufpush(0x104EE); break; case 0x104C7: bufpush(0x104EF); break; case 0x104C8: bufpush(0x104F0); break; case 0x104C9: bufpush(0x104F1); break; case 0x104CA: bufpush(0x104F2); break; case 0x104CB: bufpush(0x104F3); break; case 0x104CC: bufpush(0x104F4); break; case 0x104CD: bufpush(0x104F5); break; case 0x104CE: bufpush(0x104F6); break; case 0x104CF: bufpush(0x104F7); break; case 0x104D0: bufpush(0x104F8); break; case 0x104D1: bufpush(0x104F9); break; case 0x104D2: bufpush(0x104FA); break; case 0x104D3: bufpush(0x104FB); break; case 0x10C80: bufpush(0x10CC0); break; case 0x10C81: bufpush(0x10CC1); break; case 0x10C82: bufpush(0x10CC2); break; case 0x10C83: bufpush(0x10CC3); break; case 0x10C84: bufpush(0x10CC4); break; case 0x10C85: bufpush(0x10CC5); break; case 0x10C86: bufpush(0x10CC6); break; case 0x10C87: bufpush(0x10CC7); break; case 0x10C88: bufpush(0x10CC8); break; case 0x10C89: bufpush(0x10CC9); break; case 0x10C8A: bufpush(0x10CCA); break; case 0x10C8B: bufpush(0x10CCB); break; case 0x10C8C: bufpush(0x10CCC); break; case 0x10C8D: bufpush(0x10CCD); break; case 0x10C8E: bufpush(0x10CCE); break; case 0x10C8F: bufpush(0x10CCF); break; case 0x10C90: bufpush(0x10CD0); break; case 0x10C91: bufpush(0x10CD1); break; case 0x10C92: bufpush(0x10CD2); break; case 0x10C93: bufpush(0x10CD3); break; case 0x10C94: bufpush(0x10CD4); break; case 0x10C95: bufpush(0x10CD5); break; case 0x10C96: bufpush(0x10CD6); break; case 0x10C97: bufpush(0x10CD7); break; case 0x10C98: bufpush(0x10CD8); break; case 0x10C99: bufpush(0x10CD9); break; case 0x10C9A: bufpush(0x10CDA); break; case 0x10C9B: bufpush(0x10CDB); break; case 0x10C9C: bufpush(0x10CDC); break; case 0x10C9D: bufpush(0x10CDD); break; case 0x10C9E: bufpush(0x10CDE); break; case 0x10C9F: bufpush(0x10CDF); break; case 0x10CA0: bufpush(0x10CE0); break; case 0x10CA1: bufpush(0x10CE1); break; case 0x10CA2: bufpush(0x10CE2); break; case 0x10CA3: bufpush(0x10CE3); break; case 0x10CA4: bufpush(0x10CE4); break; case 0x10CA5: bufpush(0x10CE5); break; case 0x10CA6: bufpush(0x10CE6); break; case 0x10CA7: bufpush(0x10CE7); break; case 0x10CA8: bufpush(0x10CE8); break; case 0x10CA9: bufpush(0x10CE9); break; case 0x10CAA: bufpush(0x10CEA); break; case 0x10CAB: bufpush(0x10CEB); break; case 0x10CAC: bufpush(0x10CEC); break; case 0x10CAD: bufpush(0x10CED); break; case 0x10CAE: bufpush(0x10CEE); break; case 0x10CAF: bufpush(0x10CEF); break; case 0x10CB0: bufpush(0x10CF0); break; case 0x10CB1: bufpush(0x10CF1); break; case 0x10CB2: bufpush(0x10CF2); break; case 0x118A0: bufpush(0x118C0); break; case 0x118A1: bufpush(0x118C1); break; case 0x118A2: bufpush(0x118C2); break; case 0x118A3: bufpush(0x118C3); break; case 0x118A4: bufpush(0x118C4); break; case 0x118A5: bufpush(0x118C5); break; case 0x118A6: bufpush(0x118C6); break; case 0x118A7: bufpush(0x118C7); break; case 0x118A8: bufpush(0x118C8); break; case 0x118A9: bufpush(0x118C9); break; case 0x118AA: bufpush(0x118CA); break; case 0x118AB: bufpush(0x118CB); break; case 0x118AC: bufpush(0x118CC); break; case 0x118AD: bufpush(0x118CD); break; case 0x118AE: bufpush(0x118CE); break; case 0x118AF: bufpush(0x118CF); break; case 0x118B0: bufpush(0x118D0); break; case 0x118B1: bufpush(0x118D1); break; case 0x118B2: bufpush(0x118D2); break; case 0x118B3: bufpush(0x118D3); break; case 0x118B4: bufpush(0x118D4); break; case 0x118B5: bufpush(0x118D5); break; case 0x118B6: bufpush(0x118D6); break; case 0x118B7: bufpush(0x118D7); break; case 0x118B8: bufpush(0x118D8); break; case 0x118B9: bufpush(0x118D9); break; case 0x118BA: bufpush(0x118DA); break; case 0x118BB: bufpush(0x118DB); break; case 0x118BC: bufpush(0x118DC); break; case 0x118BD: bufpush(0x118DD); break; case 0x118BE: bufpush(0x118DE); break; case 0x118BF: bufpush(0x118DF); break; case 0x1E900: bufpush(0x1E922); break; case 0x1E901: bufpush(0x1E923); break; case 0x1E902: bufpush(0x1E924); break; case 0x1E903: bufpush(0x1E925); break; case 0x1E904: bufpush(0x1E926); break; case 0x1E905: bufpush(0x1E927); break; case 0x1E906: bufpush(0x1E928); break; case 0x1E907: bufpush(0x1E929); break; case 0x1E908: bufpush(0x1E92A); break; case 0x1E909: bufpush(0x1E92B); break; case 0x1E90A: bufpush(0x1E92C); break; case 0x1E90B: bufpush(0x1E92D); break; case 0x1E90C: bufpush(0x1E92E); break; case 0x1E90D: bufpush(0x1E92F); break; case 0x1E90E: bufpush(0x1E930); break; case 0x1E90F: bufpush(0x1E931); break; case 0x1E910: bufpush(0x1E932); break; case 0x1E911: bufpush(0x1E933); break; case 0x1E912: bufpush(0x1E934); break; case 0x1E913: bufpush(0x1E935); break; case 0x1E914: bufpush(0x1E936); break; case 0x1E915: bufpush(0x1E937); break; case 0x1E916: bufpush(0x1E938); break; case 0x1E917: bufpush(0x1E939); break; case 0x1E918: bufpush(0x1E93A); break; case 0x1E919: bufpush(0x1E93B); break; case 0x1E91A: bufpush(0x1E93C); break; case 0x1E91B: bufpush(0x1E93D); break; case 0x1E91C: bufpush(0x1E93E); break; case 0x1E91D: bufpush(0x1E93F); break; case 0x1E91E: bufpush(0x1E940); break; case 0x1E91F: bufpush(0x1E941); break; case 0x1E920: bufpush(0x1E942); break; case 0x1E921: bufpush(0x1E943); break; default: bufpush(c); } cmarkgfm/third_party/cmark/src/main.c0000644000175000017500000002317614210444464020046 0ustar carstencarsten#include #include #include #include #include "config.h" #include "cmark-gfm.h" #include "node.h" #include "cmark-gfm-extension_api.h" #include "syntax_extension.h" #include "parser.h" #include "registry.h" #include "../extensions/cmark-gfm-core-extensions.h" #if defined(__OpenBSD__) # include # if OpenBSD >= 201605 # define USE_PLEDGE # include # endif #endif #if defined(__OpenBSD__) # include # if OpenBSD >= 201605 # define USE_PLEDGE # include # endif #endif #if defined(_WIN32) && !defined(__CYGWIN__) #include #include #endif typedef enum { FORMAT_NONE, FORMAT_HTML, FORMAT_XML, FORMAT_MAN, FORMAT_COMMONMARK, FORMAT_PLAINTEXT, FORMAT_LATEX } writer_format; void print_usage() { printf("Usage: cmark-gfm [FILE*]\n"); printf("Options:\n"); printf(" --to, -t FORMAT Specify output format (html, xml, man, " "commonmark, plaintext, latex)\n"); printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n"); printf(" --sourcepos Include source position attribute\n"); printf(" --hardbreaks Treat newlines as hard line breaks\n"); printf(" --nobreaks Render soft line breaks as spaces\n"); printf(" --unsafe Render raw HTML and dangerous URLs\n"); printf(" --smart Use smart punctuation\n"); printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n"); printf(" --github-pre-lang Use GitHub-style
 for code blocks\n");
  printf("  --extension, -e EXTENSION_NAME  Specify an extension name to use\n");
  printf("  --list-extensions               List available extensions and quit\n");
  printf("  --strikethrough-double-tilde    Only parse strikethrough (if enabled)\n");
  printf("                                  with two tildes\n");
  printf("  --table-prefer-style-attributes Use style attributes to align table cells\n"
         "                                  instead of align attributes.\n");
  printf("  --full-info-string              Include remainder of code block info\n"
         "                                  string in a separate attribute.\n");
  printf("  --help, -h       Print usage information\n");
  printf("  --version        Print version\n");
}

static bool print_document(cmark_node *document, writer_format writer,
                           int options, int width, cmark_parser *parser) {
  char *result;

  cmark_mem *mem = cmark_get_default_mem_allocator();

  switch (writer) {
  case FORMAT_HTML:
    result = cmark_render_html_with_mem(document, options, parser->syntax_extensions, mem);
    break;
  case FORMAT_XML:
    result = cmark_render_xml_with_mem(document, options, mem);
    break;
  case FORMAT_MAN:
    result = cmark_render_man_with_mem(document, options, width, mem);
    break;
  case FORMAT_COMMONMARK:
    result = cmark_render_commonmark_with_mem(document, options, width, mem);
    break;
  case FORMAT_PLAINTEXT:
    result = cmark_render_plaintext_with_mem(document, options, width, mem);
    break;
  case FORMAT_LATEX:
    result = cmark_render_latex_with_mem(document, options, width, mem);
    break;
  default:
    fprintf(stderr, "Unknown format %d\n", writer);
    return false;
  }
  printf("%s", result);
  mem->free(result);

  return true;
}

static void print_extensions(void) {
  cmark_llist *syntax_extensions;
  cmark_llist *tmp;

  printf ("Available extensions:\nfootnotes\n");

  cmark_mem *mem = cmark_get_default_mem_allocator();
  syntax_extensions = cmark_list_syntax_extensions(mem);
  for (tmp = syntax_extensions; tmp; tmp=tmp->next) {
    cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data;
    printf("%s\n", ext->name);
  }

  cmark_llist_free(mem, syntax_extensions);
}

int main(int argc, char *argv[]) {
  int i, numfps = 0;
  int *files;
  char buffer[4096];
  cmark_parser *parser = NULL;
  size_t bytes;
  cmark_node *document = NULL;
  int width = 0;
  char *unparsed;
  writer_format writer = FORMAT_HTML;
  int options = CMARK_OPT_DEFAULT;
  int res = 1;

#ifdef USE_PLEDGE
  if (pledge("stdio rpath", NULL) != 0) {
    perror("pledge");
    return 1;
  }
#endif

  cmark_gfm_core_extensions_ensure_registered();

#ifdef USE_PLEDGE
  if (pledge("stdio rpath", NULL) != 0) {
    perror("pledge");
    return 1;
  }
#endif

#if defined(_WIN32) && !defined(__CYGWIN__)
  _setmode(_fileno(stdin), _O_BINARY);
  _setmode(_fileno(stdout), _O_BINARY);
#endif

  files = (int *)calloc(argc, sizeof(*files));

  for (i = 1; i < argc; i++) {
    if (strcmp(argv[i], "--version") == 0) {
      printf("cmark-gfm %s", CMARK_GFM_VERSION_STRING);
      printf(" - CommonMark with GitHub Flavored Markdown converter\n(C) 2014-2016 John MacFarlane\n");
      goto success;
    } else if (strcmp(argv[i], "--list-extensions") == 0) {
      print_extensions();
      goto success;
    } else if (strcmp(argv[i], "--full-info-string") == 0) {
      options |= CMARK_OPT_FULL_INFO_STRING;
    } else if (strcmp(argv[i], "--table-prefer-style-attributes") == 0) {
      options |= CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES;
    } else if (strcmp(argv[i], "--strikethrough-double-tilde") == 0) {
      options |= CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE;
    } else if (strcmp(argv[i], "--sourcepos") == 0) {
      options |= CMARK_OPT_SOURCEPOS;
    } else if (strcmp(argv[i], "--hardbreaks") == 0) {
      options |= CMARK_OPT_HARDBREAKS;
    } else if (strcmp(argv[i], "--nobreaks") == 0) {
      options |= CMARK_OPT_NOBREAKS;
    } else if (strcmp(argv[i], "--smart") == 0) {
      options |= CMARK_OPT_SMART;
    } else if (strcmp(argv[i], "--github-pre-lang") == 0) {
      options |= CMARK_OPT_GITHUB_PRE_LANG;
    } else if (strcmp(argv[i], "--unsafe") == 0) {
      options |= CMARK_OPT_UNSAFE;
    } else if (strcmp(argv[i], "--validate-utf8") == 0) {
      options |= CMARK_OPT_VALIDATE_UTF8;
    } else if (strcmp(argv[i], "--liberal-html-tag") == 0) {
      options |= CMARK_OPT_LIBERAL_HTML_TAG;
    } else if ((strcmp(argv[i], "--help") == 0) ||
               (strcmp(argv[i], "-h") == 0)) {
      print_usage();
      goto success;
    } else if (strcmp(argv[i], "--width") == 0) {
      i += 1;
      if (i < argc) {
        width = (int)strtol(argv[i], &unparsed, 10);
        if (unparsed && strlen(unparsed) > 0) {
          fprintf(stderr, "failed parsing width '%s' at '%s'\n", argv[i],
                  unparsed);
          goto failure;
        }
      } else {
        fprintf(stderr, "--width requires an argument\n");
        goto failure;
      }
    } else if ((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--to") == 0)) {
      i += 1;
      if (i < argc) {
        if (strcmp(argv[i], "man") == 0) {
          writer = FORMAT_MAN;
        } else if (strcmp(argv[i], "html") == 0) {
          writer = FORMAT_HTML;
        } else if (strcmp(argv[i], "xml") == 0) {
          writer = FORMAT_XML;
        } else if (strcmp(argv[i], "commonmark") == 0) {
          writer = FORMAT_COMMONMARK;
        } else if (strcmp(argv[i], "plaintext") == 0) {
          writer = FORMAT_PLAINTEXT;
        } else if (strcmp(argv[i], "latex") == 0) {
          writer = FORMAT_LATEX;
        } else {
          fprintf(stderr, "Unknown format %s\n", argv[i]);
          goto failure;
        }
      } else {
        fprintf(stderr, "No argument provided for %s\n", argv[i - 1]);
        goto failure;
      }
    } else if ((strcmp(argv[i], "-e") == 0) || (strcmp(argv[i], "--extension") == 0)) {
      i += 1; // Simpler to handle extensions in a second pass, as we can directly register
              // them with the parser.

      if (i < argc && strcmp(argv[i], "footnotes") == 0) {
        options |= CMARK_OPT_FOOTNOTES;
      }
    } else if (*argv[i] == '-') {
      print_usage();
      goto failure;
    } else { // treat as file argument
      files[numfps++] = i;
    }
  }

#if DEBUG
  parser = cmark_parser_new(options);
#else
  parser = cmark_parser_new_with_mem(options, cmark_get_arena_mem_allocator());
#endif

  for (i = 1; i < argc; i++) {
    if ((strcmp(argv[i], "-e") == 0) || (strcmp(argv[i], "--extension") == 0)) {
      i += 1;
      if (i < argc) {
        if (strcmp(argv[i], "footnotes") == 0) {
          continue;
        }
        cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(argv[i]);
        if (!syntax_extension) {
          fprintf(stderr, "Unknown extension %s\n", argv[i]);
          goto failure;
        }
        cmark_parser_attach_syntax_extension(parser, syntax_extension);
      } else {
        fprintf(stderr, "No argument provided for %s\n", argv[i - 1]);
        goto failure;
      }
    }
  }

  for (i = 0; i < numfps; i++) {
    FILE *fp = fopen(argv[files[i]], "rb");
    if (fp == NULL) {
      fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]],
              strerror(errno));
      goto failure;
    }

    while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
      cmark_parser_feed(parser, buffer, bytes);
      if (bytes < sizeof(buffer)) {
        break;
      }
    }

    fclose(fp);
  }

  if (numfps == 0) {
    while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
      cmark_parser_feed(parser, buffer, bytes);
      if (bytes < sizeof(buffer)) {
        break;
      }
    }
  }

#ifdef USE_PLEDGE
  if (pledge("stdio", NULL) != 0) {
    perror("pledge");
    return 1;
  }
#endif

  document = cmark_parser_finish(parser);

  if (!document || !print_document(document, writer, options, width, parser))
    goto failure;

success:
  res = 0;

failure:

#if DEBUG
  if (parser)
  cmark_parser_free(parser);

  if (document)
    cmark_node_free(document);
#else
  cmark_arena_reset();
#endif

  cmark_release_plugins();

  free(files);

  return res;
}
cmarkgfm/third_party/cmark/src/plugin.c0000644000175000017500000000176014210444464020413 0ustar  carstencarsten#include 

#include "plugin.h"

extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;

int cmark_plugin_register_syntax_extension(cmark_plugin    * plugin,
                                        cmark_syntax_extension * extension) {
  plugin->syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, plugin->syntax_extensions, extension);
  return 1;
}

cmark_plugin *
cmark_plugin_new(void) {
  cmark_plugin *res = (cmark_plugin *) CMARK_DEFAULT_MEM_ALLOCATOR.calloc(1, sizeof(cmark_plugin));

  res->syntax_extensions = NULL;

  return res;
}

void
cmark_plugin_free(cmark_plugin *plugin) {
  cmark_llist_free_full(&CMARK_DEFAULT_MEM_ALLOCATOR,
                        plugin->syntax_extensions,
                        (cmark_free_func) cmark_syntax_extension_free);
  CMARK_DEFAULT_MEM_ALLOCATOR.free(plugin);
}

cmark_llist *
cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin) {
  cmark_llist *res = plugin->syntax_extensions;

  plugin->syntax_extensions = NULL;
  return res;
}
cmarkgfm/third_party/cmark/src/cmark-gfm.h0000644000175000017500000006255214210444464020774 0ustar  carstencarsten#ifndef CMARK_GFM_H
#define CMARK_GFM_H

#include 
#include 
#include "cmark-gfm_export.h"
#include "cmark-gfm_version.h"

#ifdef __cplusplus
extern "C" {
#endif

/** # NAME
 *
 * **cmark-gfm** - CommonMark parsing, manipulating, and rendering
 */

/** # DESCRIPTION
 *
 * ## Simple Interface
 */

/** Convert 'text' (assumed to be a UTF-8 encoded string with length
 * 'len') from CommonMark Markdown to HTML, returning a null-terminated,
 * UTF-8-encoded string. It is the caller's responsibility
 * to free the returned buffer.
 */
CMARK_GFM_EXPORT
char *cmark_markdown_to_html(const char *text, size_t len, int options);

/** ## Node Structure
 */

#define CMARK_NODE_TYPE_PRESENT (0x8000)
#define CMARK_NODE_TYPE_BLOCK (CMARK_NODE_TYPE_PRESENT | 0x0000)
#define CMARK_NODE_TYPE_INLINE (CMARK_NODE_TYPE_PRESENT | 0x4000)
#define CMARK_NODE_TYPE_MASK (0xc000)
#define CMARK_NODE_VALUE_MASK (0x3fff)

typedef enum {
  /* Error status */
  CMARK_NODE_NONE = 0x0000,

  /* Block */
  CMARK_NODE_DOCUMENT       = CMARK_NODE_TYPE_BLOCK | 0x0001,
  CMARK_NODE_BLOCK_QUOTE    = CMARK_NODE_TYPE_BLOCK | 0x0002,
  CMARK_NODE_LIST           = CMARK_NODE_TYPE_BLOCK | 0x0003,
  CMARK_NODE_ITEM           = CMARK_NODE_TYPE_BLOCK | 0x0004,
  CMARK_NODE_CODE_BLOCK     = CMARK_NODE_TYPE_BLOCK | 0x0005,
  CMARK_NODE_HTML_BLOCK     = CMARK_NODE_TYPE_BLOCK | 0x0006,
  CMARK_NODE_CUSTOM_BLOCK   = CMARK_NODE_TYPE_BLOCK | 0x0007,
  CMARK_NODE_PARAGRAPH      = CMARK_NODE_TYPE_BLOCK | 0x0008,
  CMARK_NODE_HEADING        = CMARK_NODE_TYPE_BLOCK | 0x0009,
  CMARK_NODE_THEMATIC_BREAK = CMARK_NODE_TYPE_BLOCK | 0x000a,
  CMARK_NODE_FOOTNOTE_DEFINITION = CMARK_NODE_TYPE_BLOCK | 0x000b,

  /* Inline */
  CMARK_NODE_TEXT          = CMARK_NODE_TYPE_INLINE | 0x0001,
  CMARK_NODE_SOFTBREAK     = CMARK_NODE_TYPE_INLINE | 0x0002,
  CMARK_NODE_LINEBREAK     = CMARK_NODE_TYPE_INLINE | 0x0003,
  CMARK_NODE_CODE          = CMARK_NODE_TYPE_INLINE | 0x0004,
  CMARK_NODE_HTML_INLINE   = CMARK_NODE_TYPE_INLINE | 0x0005,
  CMARK_NODE_CUSTOM_INLINE = CMARK_NODE_TYPE_INLINE | 0x0006,
  CMARK_NODE_EMPH          = CMARK_NODE_TYPE_INLINE | 0x0007,
  CMARK_NODE_STRONG        = CMARK_NODE_TYPE_INLINE | 0x0008,
  CMARK_NODE_LINK          = CMARK_NODE_TYPE_INLINE | 0x0009,
  CMARK_NODE_IMAGE         = CMARK_NODE_TYPE_INLINE | 0x000a,
  CMARK_NODE_FOOTNOTE_REFERENCE = CMARK_NODE_TYPE_INLINE | 0x000b,
} cmark_node_type;

extern cmark_node_type CMARK_NODE_LAST_BLOCK;
extern cmark_node_type CMARK_NODE_LAST_INLINE;

/* For backwards compatibility: */
#define CMARK_NODE_HEADER CMARK_NODE_HEADING
#define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK
#define CMARK_NODE_HTML CMARK_NODE_HTML_BLOCK
#define CMARK_NODE_INLINE_HTML CMARK_NODE_HTML_INLINE

typedef enum {
  CMARK_NO_LIST,
  CMARK_BULLET_LIST,
  CMARK_ORDERED_LIST
} cmark_list_type;

typedef enum {
  CMARK_NO_DELIM,
  CMARK_PERIOD_DELIM,
  CMARK_PAREN_DELIM
} cmark_delim_type;

typedef struct cmark_node cmark_node;
typedef struct cmark_parser cmark_parser;
typedef struct cmark_iter cmark_iter;
typedef struct cmark_syntax_extension cmark_syntax_extension;

/**
 * ## Custom memory allocator support
 */

/** Defines the memory allocation functions to be used by CMark
 * when parsing and allocating a document tree
 */
typedef struct cmark_mem {
  void *(*calloc)(size_t, size_t);
  void *(*realloc)(void *, size_t);
  void (*free)(void *);
} cmark_mem;

/** The default memory allocator; uses the system's calloc,
 * realloc and free.
 */
CMARK_GFM_EXPORT
cmark_mem *cmark_get_default_mem_allocator();

/** An arena allocator; uses system calloc to allocate large
 * slabs of memory.  Memory in these slabs is not reused at all.
 */
CMARK_GFM_EXPORT
cmark_mem *cmark_get_arena_mem_allocator();

/** Resets the arena allocator, quickly returning all used memory
 * to the operating system.
 */
CMARK_GFM_EXPORT
void cmark_arena_reset(void);

/** Callback for freeing user data with a 'cmark_mem' context.
 */
typedef void (*cmark_free_func) (cmark_mem *mem, void *user_data);


/*
 * ## Basic data structures
 *
 * To keep dependencies to the strict minimum, libcmark implements
 * its own versions of "classic" data structures.
 */

/**
 * ### Linked list
 */

/** A generic singly linked list.
 */
typedef struct _cmark_llist
{
  struct _cmark_llist *next;
  void         *data;
} cmark_llist;

/** Append an element to the linked list, return the possibly modified
 * head of the list.
 */
CMARK_GFM_EXPORT
cmark_llist * cmark_llist_append    (cmark_mem         * mem,
                                     cmark_llist       * head,
                                     void              * data);

/** Free the list starting with 'head', calling 'free_func' with the
 *  data pointer of each of its elements
 */
CMARK_GFM_EXPORT
void          cmark_llist_free_full (cmark_mem         * mem,
                                     cmark_llist       * head,
                                     cmark_free_func     free_func);

/** Free the list starting with 'head'
 */
CMARK_GFM_EXPORT
void          cmark_llist_free      (cmark_mem         * mem,
                                     cmark_llist       * head);

/**
 * ## Creating and Destroying Nodes
 */

/** Creates a new node of type 'type'.  Note that the node may have
 * other required properties, which it is the caller's responsibility
 * to assign.
 */
CMARK_GFM_EXPORT cmark_node *cmark_node_new(cmark_node_type type);

/** Same as `cmark_node_new`, but explicitly listing the memory
 * allocator used to allocate the node.  Note:  be sure to use the same
 * allocator for every node in a tree, or bad things can happen.
 */
CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type,
                                                 cmark_mem *mem);

CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_ext(cmark_node_type type,
                                                cmark_syntax_extension *extension);

CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_mem_and_ext(cmark_node_type type,
                                                cmark_mem *mem,
                                                cmark_syntax_extension *extension);

/** Frees the memory allocated for a node and any children.
 */
CMARK_GFM_EXPORT void cmark_node_free(cmark_node *node);

/**
 * ## Tree Traversal
 */

/** Returns the next node in the sequence after 'node', or NULL if
 * there is none.
 */
CMARK_GFM_EXPORT cmark_node *cmark_node_next(cmark_node *node);

/** Returns the previous node in the sequence after 'node', or NULL if
 * there is none.
 */
CMARK_GFM_EXPORT cmark_node *cmark_node_previous(cmark_node *node);

/** Returns the parent of 'node', or NULL if there is none.
 */
CMARK_GFM_EXPORT cmark_node *cmark_node_parent(cmark_node *node);

/** Returns the first child of 'node', or NULL if 'node' has no children.
 */
CMARK_GFM_EXPORT cmark_node *cmark_node_first_child(cmark_node *node);

/** Returns the last child of 'node', or NULL if 'node' has no children.
 */
CMARK_GFM_EXPORT cmark_node *cmark_node_last_child(cmark_node *node);

/**
 * ## Iterator
 *
 * An iterator will walk through a tree of nodes, starting from a root
 * node, returning one node at a time, together with information about
 * whether the node is being entered or exited.  The iterator will
 * first descend to a child node, if there is one.  When there is no
 * child, the iterator will go to the next sibling.  When there is no
 * next sibling, the iterator will return to the parent (but with
 * a 'cmark_event_type' of `CMARK_EVENT_EXIT`).  The iterator will
 * return `CMARK_EVENT_DONE` when it reaches the root node again.
 * One natural application is an HTML renderer, where an `ENTER` event
 * outputs an open tag and an `EXIT` event outputs a close tag.
 * An iterator might also be used to transform an AST in some systematic
 * way, for example, turning all level-3 headings into regular paragraphs.
 *
 *     void
 *     usage_example(cmark_node *root) {
 *         cmark_event_type ev_type;
 *         cmark_iter *iter = cmark_iter_new(root);
 *
 *         while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
 *             cmark_node *cur = cmark_iter_get_node(iter);
 *             // Do something with `cur` and `ev_type`
 *         }
 *
 *         cmark_iter_free(iter);
 *     }
 *
 * Iterators will never return `EXIT` events for leaf nodes, which are nodes
 * of type:
 *
 * * CMARK_NODE_HTML_BLOCK
 * * CMARK_NODE_THEMATIC_BREAK
 * * CMARK_NODE_CODE_BLOCK
 * * CMARK_NODE_TEXT
 * * CMARK_NODE_SOFTBREAK
 * * CMARK_NODE_LINEBREAK
 * * CMARK_NODE_CODE
 * * CMARK_NODE_HTML_INLINE
 *
 * Nodes must only be modified after an `EXIT` event, or an `ENTER` event for
 * leaf nodes.
 */

typedef enum {
  CMARK_EVENT_NONE,
  CMARK_EVENT_DONE,
  CMARK_EVENT_ENTER,
  CMARK_EVENT_EXIT
} cmark_event_type;

/** Creates a new iterator starting at 'root'.  The current node and event
 * type are undefined until 'cmark_iter_next' is called for the first time.
 * The memory allocated for the iterator should be released using
 * 'cmark_iter_free' when it is no longer needed.
 */
CMARK_GFM_EXPORT
cmark_iter *cmark_iter_new(cmark_node *root);

/** Frees the memory allocated for an iterator.
 */
CMARK_GFM_EXPORT
void cmark_iter_free(cmark_iter *iter);

/** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`,
 * `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`).
 */
CMARK_GFM_EXPORT
cmark_event_type cmark_iter_next(cmark_iter *iter);

/** Returns the current node.
 */
CMARK_GFM_EXPORT
cmark_node *cmark_iter_get_node(cmark_iter *iter);

/** Returns the current event type.
 */
CMARK_GFM_EXPORT
cmark_event_type cmark_iter_get_event_type(cmark_iter *iter);

/** Returns the root node.
 */
CMARK_GFM_EXPORT
cmark_node *cmark_iter_get_root(cmark_iter *iter);

/** Resets the iterator so that the current node is 'current' and
 * the event type is 'event_type'.  The new current node must be a
 * descendant of the root node or the root node itself.
 */
CMARK_GFM_EXPORT
void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
                      cmark_event_type event_type);

/**
 * ## Accessors
 */

/** Returns the user data of 'node'.
 */
CMARK_GFM_EXPORT void *cmark_node_get_user_data(cmark_node *node);

/** Sets arbitrary user data for 'node'.  Returns 1 on success,
 * 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data);

/** Set free function for user data */
CMARK_GFM_EXPORT
int cmark_node_set_user_data_free_func(cmark_node *node,
                                        cmark_free_func free_func);

/** Returns the type of 'node', or `CMARK_NODE_NONE` on error.
 */
CMARK_GFM_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node);

/** Like 'cmark_node_get_type', but returns a string representation
    of the type, or `""`.
 */
CMARK_GFM_EXPORT
const char *cmark_node_get_type_string(cmark_node *node);

/** Returns the string contents of 'node', or an empty
    string if none is set.  Returns NULL if called on a
    node that does not have string content.
 */
CMARK_GFM_EXPORT const char *cmark_node_get_literal(cmark_node *node);

/** Sets the string contents of 'node'.  Returns 1 on success,
 * 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content);

/** Returns the heading level of 'node', or 0 if 'node' is not a heading.
 */
CMARK_GFM_EXPORT int cmark_node_get_heading_level(cmark_node *node);

/* For backwards compatibility */
#define cmark_node_get_header_level cmark_node_get_heading_level
#define cmark_node_set_header_level cmark_node_set_heading_level

/** Sets the heading level of 'node', returning 1 on success and 0 on error.
 */
CMARK_GFM_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level);

/** Returns the list type of 'node', or `CMARK_NO_LIST` if 'node'
 * is not a list.
 */
CMARK_GFM_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node);

/** Sets the list type of 'node', returning 1 on success and 0 on error.
 */
CMARK_GFM_EXPORT int cmark_node_set_list_type(cmark_node *node,
                                          cmark_list_type type);

/** Returns the list delimiter type of 'node', or `CMARK_NO_DELIM` if 'node'
 * is not a list.
 */
CMARK_GFM_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node);

/** Sets the list delimiter type of 'node', returning 1 on success and 0
 * on error.
 */
CMARK_GFM_EXPORT int cmark_node_set_list_delim(cmark_node *node,
                                           cmark_delim_type delim);

/** Returns starting number of 'node', if it is an ordered list, otherwise 0.
 */
CMARK_GFM_EXPORT int cmark_node_get_list_start(cmark_node *node);

/** Sets starting number of 'node', if it is an ordered list. Returns 1
 * on success, 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_set_list_start(cmark_node *node, int start);

/** Returns 1 if 'node' is a tight list, 0 otherwise.
 */
CMARK_GFM_EXPORT int cmark_node_get_list_tight(cmark_node *node);

/** Sets the "tightness" of a list.  Returns 1 on success, 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);

/** Returns the info string from a fenced code block.
 */
CMARK_GFM_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);

/** Sets the info string in a fenced code block, returning 1 on
 * success and 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info);

/** Sets code blocks fencing details
 */
CMARK_GFM_EXPORT int cmark_node_set_fenced(cmark_node * node, int fenced,
    int length, int offset, char character);

/** Returns code blocks fencing details
 */
CMARK_GFM_EXPORT int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character);

/** Returns the URL of a link or image 'node', or an empty string
    if no URL is set.  Returns NULL if called on a node that is
    not a link or image.
 */
CMARK_GFM_EXPORT const char *cmark_node_get_url(cmark_node *node);

/** Sets the URL of a link or image 'node'. Returns 1 on success,
 * 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_set_url(cmark_node *node, const char *url);

/** Returns the title of a link or image 'node', or an empty
    string if no title is set.  Returns NULL if called on a node
    that is not a link or image.
 */
CMARK_GFM_EXPORT const char *cmark_node_get_title(cmark_node *node);

/** Sets the title of a link or image 'node'. Returns 1 on success,
 * 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_set_title(cmark_node *node, const char *title);

/** Returns the literal "on enter" text for a custom 'node', or
    an empty string if no on_enter is set.  Returns NULL if called
    on a non-custom node.
 */
CMARK_GFM_EXPORT const char *cmark_node_get_on_enter(cmark_node *node);

/** Sets the literal text to render "on enter" for a custom 'node'.
    Any children of the node will be rendered after this text.
    Returns 1 on success 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_set_on_enter(cmark_node *node,
                                         const char *on_enter);

/** Returns the literal "on exit" text for a custom 'node', or
    an empty string if no on_exit is set.  Returns NULL if
    called on a non-custom node.
 */
CMARK_GFM_EXPORT const char *cmark_node_get_on_exit(cmark_node *node);

/** Sets the literal text to render "on exit" for a custom 'node'.
    Any children of the node will be rendered before this text.
    Returns 1 on success 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit);

/** Returns the line on which 'node' begins.
 */
CMARK_GFM_EXPORT int cmark_node_get_start_line(cmark_node *node);

/** Returns the column at which 'node' begins.
 */
CMARK_GFM_EXPORT int cmark_node_get_start_column(cmark_node *node);

/** Returns the line on which 'node' ends.
 */
CMARK_GFM_EXPORT int cmark_node_get_end_line(cmark_node *node);

/** Returns the column at which 'node' ends.
 */
CMARK_GFM_EXPORT int cmark_node_get_end_column(cmark_node *node);

/**
 * ## Tree Manipulation
 */

/** Unlinks a 'node', removing it from the tree, but not freeing its
 * memory.  (Use 'cmark_node_free' for that.)
 */
CMARK_GFM_EXPORT void cmark_node_unlink(cmark_node *node);

/** Inserts 'sibling' before 'node'.  Returns 1 on success, 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_insert_before(cmark_node *node,
                                          cmark_node *sibling);

/** Inserts 'sibling' after 'node'. Returns 1 on success, 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling);

/** Replaces 'oldnode' with 'newnode' and unlinks 'oldnode' (but does
 * not free its memory).
 * Returns 1 on success, 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode);

/** Adds 'child' to the beginning of the children of 'node'.
 * Returns 1 on success, 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child);

/** Adds 'child' to the end of the children of 'node'.
 * Returns 1 on success, 0 on failure.
 */
CMARK_GFM_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child);

/** Consolidates adjacent text nodes.
 */
CMARK_GFM_EXPORT void cmark_consolidate_text_nodes(cmark_node *root);

/** Ensures a node and all its children own their own chunk memory.
 */
CMARK_GFM_EXPORT void cmark_node_own(cmark_node *root);

/**
 * ## Parsing
 *
 * Simple interface:
 *
 *     cmark_node *document = cmark_parse_document("Hello *world*", 13,
 *                                                 CMARK_OPT_DEFAULT);
 *
 * Streaming interface:
 *
 *     cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
 *     FILE *fp = fopen("myfile.md", "rb");
 *     while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
 *     	   cmark_parser_feed(parser, buffer, bytes);
 *     	   if (bytes < sizeof(buffer)) {
 *     	       break;
 *     	   }
 *     }
 *     document = cmark_parser_finish(parser);
 *     cmark_parser_free(parser);
 */

/** Creates a new parser object.
 */
CMARK_GFM_EXPORT
cmark_parser *cmark_parser_new(int options);

/** Creates a new parser object with the given memory allocator
 */
CMARK_GFM_EXPORT
cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);

/** Frees memory allocated for a parser object.
 */
CMARK_GFM_EXPORT
void cmark_parser_free(cmark_parser *parser);

/** Feeds a string of length 'len' to 'parser'.
 */
CMARK_GFM_EXPORT
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);

/** Finish parsing and return a pointer to a tree of nodes.
 */
CMARK_GFM_EXPORT
cmark_node *cmark_parser_finish(cmark_parser *parser);

/** Parse a CommonMark document in 'buffer' of length 'len'.
 * Returns a pointer to a tree of nodes.  The memory allocated for
 * the node tree should be released using 'cmark_node_free'
 * when it is no longer needed.
 */
CMARK_GFM_EXPORT
cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);

/** Parse a CommonMark document in file 'f', returning a pointer to
 * a tree of nodes.  The memory allocated for the node tree should be
 * released using 'cmark_node_free' when it is no longer needed.
 */
CMARK_GFM_EXPORT
cmark_node *cmark_parse_file(FILE *f, int options);

/**
 * ## Rendering
 */

/** Render a 'node' tree as XML.  It is the caller's responsibility
 * to free the returned buffer.
 */
CMARK_GFM_EXPORT
char *cmark_render_xml(cmark_node *root, int options);

/** As for 'cmark_render_xml', but specifying the allocator to use for
 * the resulting string.
 */
CMARK_GFM_EXPORT
char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem);

/** Render a 'node' tree as an HTML fragment.  It is up to the user
 * to add an appropriate header and footer. It is the caller's
 * responsibility to free the returned buffer.
 */
CMARK_GFM_EXPORT
char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions);

/** As for 'cmark_render_html', but specifying the allocator to use for
 * the resulting string.
 */
CMARK_GFM_EXPORT
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem);

/** Render a 'node' tree as a groff man page, without the header.
 * It is the caller's responsibility to free the returned buffer.
 */
CMARK_GFM_EXPORT
char *cmark_render_man(cmark_node *root, int options, int width);

/** As for 'cmark_render_man', but specifying the allocator to use for
 * the resulting string.
 */
CMARK_GFM_EXPORT
char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);

/** Render a 'node' tree as a commonmark document.
 * It is the caller's responsibility to free the returned buffer.
 */
CMARK_GFM_EXPORT
char *cmark_render_commonmark(cmark_node *root, int options, int width);

/** As for 'cmark_render_commonmark', but specifying the allocator to use for
 * the resulting string.
 */
CMARK_GFM_EXPORT
char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);

/** Render a 'node' tree as a plain text document.
 * It is the caller's responsibility to free the returned buffer.
 */
CMARK_GFM_EXPORT
char *cmark_render_plaintext(cmark_node *root, int options, int width);

/** As for 'cmark_render_plaintext', but specifying the allocator to use for
 * the resulting string.
 */
CMARK_GFM_EXPORT
char *cmark_render_plaintext_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);

/** Render a 'node' tree as a LaTeX document.
 * It is the caller's responsibility to free the returned buffer.
 */
CMARK_GFM_EXPORT
char *cmark_render_latex(cmark_node *root, int options, int width);

/** As for 'cmark_render_latex', but specifying the allocator to use for
 * the resulting string.
 */
CMARK_GFM_EXPORT
char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);

/**
 * ## Options
 */

/** Default options.
 */
#define CMARK_OPT_DEFAULT 0

/**
 * ### Options affecting rendering
 */

/** Include a `data-sourcepos` attribute on all block elements.
 */
#define CMARK_OPT_SOURCEPOS (1 << 1)

/** Render `softbreak` elements as hard line breaks.
 */
#define CMARK_OPT_HARDBREAKS (1 << 2)

/** `CMARK_OPT_SAFE` is defined here for API compatibility,
    but it no longer has any effect. "Safe" mode is now the default:
    set `CMARK_OPT_UNSAFE` to disable it.
 */
#define CMARK_OPT_SAFE (1 << 3)

/** Render raw HTML and unsafe links (`javascript:`, `vbscript:`,
 * `file:`, and `data:`, except for `image/png`, `image/gif`,
 * `image/jpeg`, or `image/webp` mime types).  By default,
 * raw HTML is replaced by a placeholder HTML comment. Unsafe
 * links are replaced by empty strings.
 */
#define CMARK_OPT_UNSAFE (1 << 17)

/** Render `softbreak` elements as spaces.
 */
#define CMARK_OPT_NOBREAKS (1 << 4)

/**
 * ### Options affecting parsing
 */

/** Legacy option (no effect).
 */
#define CMARK_OPT_NORMALIZE (1 << 8)

/** Validate UTF-8 in the input before parsing, replacing illegal
 * sequences with the replacement character U+FFFD.
 */
#define CMARK_OPT_VALIDATE_UTF8 (1 << 9)

/** Convert straight quotes to curly, --- to em dashes, -- to en dashes.
 */
#define CMARK_OPT_SMART (1 << 10)

/** Use GitHub-style 
 tags for code blocks instead of 
.
 */
#define CMARK_OPT_GITHUB_PRE_LANG (1 << 11)

/** Be liberal in interpreting inline HTML tags.
 */
#define CMARK_OPT_LIBERAL_HTML_TAG (1 << 12)

/** Parse footnotes.
 */
#define CMARK_OPT_FOOTNOTES (1 << 13)

/** Only parse strikethroughs if surrounded by exactly 2 tildes.
 * Gives some compatibility with redcarpet.
 */
#define CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE (1 << 14)

/** Use style attributes to align table cells instead of align attributes.
 */
#define CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES (1 << 15)

/** Include the remainder of the info string in code blocks in
 * a separate attribute.
 */
#define CMARK_OPT_FULL_INFO_STRING (1 << 16)

/**
 * ## Version information
 */

/** The library version as integer for runtime checks. Also available as
 * macro CMARK_VERSION for compile time checks.
 *
 * * Bits 16-23 contain the major version.
 * * Bits 8-15 contain the minor version.
 * * Bits 0-7 contain the patchlevel.
 *
 * In hexadecimal format, the number 0x010203 represents version 1.2.3.
 */
CMARK_GFM_EXPORT
int cmark_version(void);

/** The library version string for runtime checks. Also available as
 * macro CMARK_VERSION_STRING for compile time checks.
 */
CMARK_GFM_EXPORT
const char *cmark_version_string(void);

/** # AUTHORS
 *
 * John MacFarlane, Vicent Marti,  Kārlis Gaņģis, Nick Wellnhofer.
 */

#ifndef CMARK_NO_SHORT_NAMES
#define NODE_DOCUMENT CMARK_NODE_DOCUMENT
#define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE
#define NODE_LIST CMARK_NODE_LIST
#define NODE_ITEM CMARK_NODE_ITEM
#define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK
#define NODE_HTML_BLOCK CMARK_NODE_HTML_BLOCK
#define NODE_CUSTOM_BLOCK CMARK_NODE_CUSTOM_BLOCK
#define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH
#define NODE_HEADING CMARK_NODE_HEADING
#define NODE_HEADER CMARK_NODE_HEADER
#define NODE_THEMATIC_BREAK CMARK_NODE_THEMATIC_BREAK
#define NODE_HRULE CMARK_NODE_HRULE
#define NODE_TEXT CMARK_NODE_TEXT
#define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK
#define NODE_LINEBREAK CMARK_NODE_LINEBREAK
#define NODE_CODE CMARK_NODE_CODE
#define NODE_HTML_INLINE CMARK_NODE_HTML_INLINE
#define NODE_CUSTOM_INLINE CMARK_NODE_CUSTOM_INLINE
#define NODE_EMPH CMARK_NODE_EMPH
#define NODE_STRONG CMARK_NODE_STRONG
#define NODE_LINK CMARK_NODE_LINK
#define NODE_IMAGE CMARK_NODE_IMAGE
#define BULLET_LIST CMARK_BULLET_LIST
#define ORDERED_LIST CMARK_ORDERED_LIST
#define PERIOD_DELIM CMARK_PERIOD_DELIM
#define PAREN_DELIM CMARK_PAREN_DELIM
#endif

typedef int32_t bufsize_t;

#ifdef __cplusplus
}
#endif

#endif
cmarkgfm/third_party/cmark/src/footnotes.h0000644000175000017500000000070214210444464021135 0ustar  carstencarsten#ifndef CMARK_FOOTNOTES_H
#define CMARK_FOOTNOTES_H

#include "map.h"

#ifdef __cplusplus
extern "C" {
#endif

struct cmark_footnote {
  cmark_map_entry entry;
  cmark_node *node;
  unsigned int ix;
};

typedef struct cmark_footnote cmark_footnote;

void cmark_footnote_create(cmark_map *map, cmark_node *node);
cmark_map *cmark_footnote_map_new(cmark_mem *mem);

void cmark_unlink_footnotes_map(cmark_map *map);

#ifdef __cplusplus
}
#endif

#endif
cmarkgfm/third_party/cmark/src/commonmark.c0000644000175000017500000003477314210444464021272 0ustar  carstencarsten#include 
#include 
#include 
#include 
#include 

#include "config.h"
#include "cmark-gfm.h"
#include "node.h"
#include "buffer.h"
#include "utf8.h"
#include "scanners.h"
#include "render.h"
#include "syntax_extension.h"

#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, node, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define BLANKLINE() renderer->blankline(renderer)
#define ENCODED_SIZE 20
#define LISTMARKER_SIZE 20

// Functions to convert cmark_nodes to commonmark strings.

static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, 
                              cmark_escaping escape,
                              int32_t c, unsigned char nextc) {
  bool needs_escaping = false;
  bool follows_digit =
      renderer->buffer->size > 0 &&
      cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
  char encoded[ENCODED_SIZE];

  needs_escaping =
      c < 0x80 && escape != LITERAL &&
      ((escape == NORMAL &&
        (c < 0x20 ||
	 c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
         c == '>' || c == '\\' || c == '`' || c == '~' || c == '!' ||
         (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
         (renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
          // begin_content doesn't get set to false til we've passed digits
          // at the beginning of line, so...
          !follows_digit) ||
         (renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
          (nextc == 0 || cmark_isspace(nextc))))) ||
       (escape == URL &&
        (c == '`' || c == '<' || c == '>' || cmark_isspace((char)c) || c == '\\' ||
         c == ')' || c == '(')) ||
       (escape == TITLE &&
        (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));

  if (needs_escaping) {
    if (escape == URL && cmark_isspace((char)c)) {
      // use percent encoding for spaces
      snprintf(encoded, ENCODED_SIZE, "%%%2X", c);
      cmark_strbuf_puts(renderer->buffer, encoded);
      renderer->column += 3;
    } else if (cmark_ispunct((char)c)) {
      cmark_render_ascii(renderer, "\\");
      cmark_render_code_point(renderer, c);
    } else { // render as entity
      snprintf(encoded, ENCODED_SIZE, "&#%d;", c);
      cmark_strbuf_puts(renderer->buffer, encoded);
      renderer->column += (int)strlen(encoded);
    }
  } else {
    cmark_render_code_point(renderer, c);
  }
}

static int longest_backtick_sequence(const char *code) {
  int longest = 0;
  int current = 0;
  size_t i = 0;
  size_t code_len = strlen(code);
  while (i <= code_len) {
    if (code[i] == '`') {
      current++;
    } else {
      if (current > longest) {
        longest = current;
      }
      current = 0;
    }
    i++;
  }
  return longest;
}

static int shortest_unused_backtick_sequence(const char *code) {
  // note: if the shortest sequence is >= 32, this returns 32
  // so as not to overflow the bit array.
  uint32_t used = 1;
  int current = 0;
  size_t i = 0;
  size_t code_len = strlen(code);
  while (i <= code_len) {
    if (code[i] == '`') {
      current++;
    } else {
      if (current > 0 && current < 32) {
        used |= (1U << current);
      }
      current = 0;
    }
    i++;
  }
  // return number of first bit that is 0:
  i = 0;
  while (i < 32 && used & 1) {
    used = used >> 1;
    i++;
  }
  return (int)i;
}

static bool is_autolink(cmark_node *node) {
  cmark_chunk *title;
  cmark_chunk *url;
  cmark_node *link_text;
  char *realurl;
  int realurllen;

  if (node->type != CMARK_NODE_LINK) {
    return false;
  }

  url = &node->as.link.url;
  if (url->len == 0 || scan_scheme(url, 0) == 0) {
    return false;
  }

  title = &node->as.link.title;
  // if it has a title, we can't treat it as an autolink:
  if (title->len > 0) {
    return false;
  }

  link_text = node->first_child;
  if (link_text == NULL) {
    return false;
  }
  cmark_consolidate_text_nodes(link_text);
  realurl = (char *)url->data;
  realurllen = url->len;
  if (strncmp(realurl, "mailto:", 7) == 0) {
    realurl += 7;
    realurllen -= 7;
  }
  return (realurllen == link_text->as.literal.len &&
          strncmp(realurl, (char *)link_text->as.literal.data,
                  link_text->as.literal.len) == 0);
}

// if node is a block node, returns node.
// otherwise returns first block-level node that is an ancestor of node.
// if there is no block-level ancestor, returns NULL.
static cmark_node *get_containing_block(cmark_node *node) {
  while (node) {
    if (CMARK_NODE_BLOCK_P(node)) {
      return node;
    } else {
      node = node->parent;
    }
  }
  return NULL;
}

static int S_render_node(cmark_renderer *renderer, cmark_node *node,
                         cmark_event_type ev_type, int options) {
  cmark_node *tmp;
  int list_number;
  cmark_delim_type list_delim;
  int numticks;
  bool extra_spaces;
  int i;
  bool entering = (ev_type == CMARK_EVENT_ENTER);
  const char *info, *code, *title;
  char fencechar[2] = {'\0', '\0'};
  size_t info_len, code_len;
  char listmarker[LISTMARKER_SIZE];
  char *emph_delim;
  bool first_in_list_item;
  bufsize_t marker_width;
  bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
                    !(CMARK_OPT_HARDBREAKS & options);

  // Don't adjust tight list status til we've started the list.
  // Otherwise we loose the blank line between a paragraph and
  // a following list.
  if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
    tmp = get_containing_block(node);
    renderer->in_tight_list_item =
        tmp && // tmp might be NULL if there is no containing block
        ((tmp->type == CMARK_NODE_ITEM &&
          cmark_node_get_list_tight(tmp->parent)) ||
         (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
          cmark_node_get_list_tight(tmp->parent->parent)));
  }

  if (node->extension && node->extension->commonmark_render_func) {
    node->extension->commonmark_render_func(node->extension, renderer, node, ev_type, options);
    return 1;
  }

  switch (node->type) {
  case CMARK_NODE_DOCUMENT:
    break;

  case CMARK_NODE_BLOCK_QUOTE:
    if (entering) {
      LIT("> ");
      renderer->begin_content = true;
      cmark_strbuf_puts(renderer->prefix, "> ");
    } else {
      cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
      BLANKLINE();
    }
    break;

  case CMARK_NODE_LIST:
    if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK ||
                                    node->next->type == CMARK_NODE_LIST)) {
      // this ensures that a following indented code block or list will be
      // inteprereted correctly.
      CR();
      LIT("");
      BLANKLINE();
    }
    break;

  case CMARK_NODE_ITEM:
    if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
      marker_width = 4;
    } else {
      list_number = cmark_node_get_list_start(node->parent);
      list_delim = cmark_node_get_list_delim(node->parent);
      tmp = node;
      while (tmp->prev) {
        tmp = tmp->prev;
        list_number += 1;
      }
      // we ensure a width of at least 4 so
      // we get nice transition from single digits
      // to double
      snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
               list_delim == CMARK_PAREN_DELIM ? ")" : ".",
               list_number < 10 ? "  " : " ");
      marker_width = (bufsize_t)strlen(listmarker);
    }
    if (entering) {
      if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
        LIT("  - ");
        renderer->begin_content = true;
      } else {
        LIT(listmarker);
        renderer->begin_content = true;
      }
      for (i = marker_width; i--;) {
        cmark_strbuf_putc(renderer->prefix, ' ');
      }
    } else {
      cmark_strbuf_truncate(renderer->prefix,
                            renderer->prefix->size - marker_width);
      CR();
    }
    break;

  case CMARK_NODE_HEADING:
    if (entering) {
      for (i = cmark_node_get_heading_level(node); i > 0; i--) {
        LIT("#");
      }
      LIT(" ");
      renderer->begin_content = true;
      renderer->no_linebreaks = true;
    } else {
      renderer->no_linebreaks = false;
      BLANKLINE();
    }
    break;

  case CMARK_NODE_CODE_BLOCK:
    first_in_list_item = node->prev == NULL && node->parent &&
                         node->parent->type == CMARK_NODE_ITEM;

    if (!first_in_list_item) {
      BLANKLINE();
    }
    info = cmark_node_get_fence_info(node);
    info_len = strlen(info);
    fencechar[0] = strchr(info, '`') == NULL ? '`' : '~';
    code = cmark_node_get_literal(node);
    code_len = strlen(code);
    // use indented form if no info, and code doesn't
    // begin or end with a blank line, and code isn't
    // first thing in a list item
    if (info_len == 0 && (code_len > 2 && !cmark_isspace(code[0]) &&
                          !(cmark_isspace(code[code_len - 1]) &&
                            cmark_isspace(code[code_len - 2]))) &&
        !first_in_list_item) {
      LIT("    ");
      cmark_strbuf_puts(renderer->prefix, "    ");
      OUT(cmark_node_get_literal(node), false, LITERAL);
      cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
    } else {
      numticks = longest_backtick_sequence(code) + 1;
      if (numticks < 3) {
        numticks = 3;
      }
      for (i = 0; i < numticks; i++) {
        LIT(fencechar);
      }
      LIT(" ");
      OUT(info, false, LITERAL);
      CR();
      OUT(cmark_node_get_literal(node), false, LITERAL);
      CR();
      for (i = 0; i < numticks; i++) {
        LIT(fencechar);
      }
    }
    BLANKLINE();
    break;

  case CMARK_NODE_HTML_BLOCK:
    BLANKLINE();
    OUT(cmark_node_get_literal(node), false, LITERAL);
    BLANKLINE();
    break;

  case CMARK_NODE_CUSTOM_BLOCK:
    BLANKLINE();
    OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
        false, LITERAL);
    BLANKLINE();
    break;

  case CMARK_NODE_THEMATIC_BREAK:
    BLANKLINE();
    LIT("-----");
    BLANKLINE();
    break;

  case CMARK_NODE_PARAGRAPH:
    if (!entering) {
      BLANKLINE();
    }
    break;

  case CMARK_NODE_TEXT:
    OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
    break;

  case CMARK_NODE_LINEBREAK:
    if (!(CMARK_OPT_HARDBREAKS & options)) {
      LIT("  ");
    }
    CR();
    break;

  case CMARK_NODE_SOFTBREAK:
    if (CMARK_OPT_HARDBREAKS & options) {
      LIT("  ");
      CR();
    } else if (!renderer->no_linebreaks && renderer->width == 0 &&
               !(CMARK_OPT_HARDBREAKS & options) &&
               !(CMARK_OPT_NOBREAKS & options)) {
      CR();
    } else {
      OUT(" ", allow_wrap, LITERAL);
    }
    break;

  case CMARK_NODE_CODE:
    code = cmark_node_get_literal(node);
    code_len = strlen(code);
    numticks = shortest_unused_backtick_sequence(code);
    extra_spaces = code_len == 0 ||
	    code[0] == '`' || code[code_len - 1] == '`' ||
	    code[0] == ' ' || code[code_len - 1] == ' ';
    for (i = 0; i < numticks; i++) {
      LIT("`");
    }
    if (extra_spaces) {
      LIT(" ");
    }
    OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
    if (extra_spaces) {
      LIT(" ");
    }
    for (i = 0; i < numticks; i++) {
      LIT("`");
    }
    break;

  case CMARK_NODE_HTML_INLINE:
    OUT(cmark_node_get_literal(node), false, LITERAL);
    break;

  case CMARK_NODE_CUSTOM_INLINE:
    OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
        false, LITERAL);
    break;

  case CMARK_NODE_STRONG:
    if (entering) {
      LIT("**");
    } else {
      LIT("**");
    }
    break;

  case CMARK_NODE_EMPH:
    // If we have EMPH(EMPH(x)), we need to use *_x_*
    // because **x** is STRONG(x):
    if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
        node->next == NULL && node->prev == NULL) {
      emph_delim = "_";
    } else {
      emph_delim = "*";
    }
    if (entering) {
      LIT(emph_delim);
    } else {
      LIT(emph_delim);
    }
    break;

  case CMARK_NODE_LINK:
    if (is_autolink(node)) {
      if (entering) {
        LIT("<");
        if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
          LIT((const char *)cmark_node_get_url(node) + 7);
        } else {
          LIT((const char *)cmark_node_get_url(node));
        }
        LIT(">");
        // return signal to skip contents of node...
        return 0;
      }
    } else {
      if (entering) {
        LIT("[");
      } else {
        LIT("](");
        OUT(cmark_node_get_url(node), false, URL);
        title = cmark_node_get_title(node);
        if (strlen(title) > 0) {
          LIT(" \"");
          OUT(title, false, TITLE);
          LIT("\"");
        }
        LIT(")");
      }
    }
    break;

  case CMARK_NODE_IMAGE:
    if (entering) {
      LIT("![");
    } else {
      LIT("](");
      OUT(cmark_node_get_url(node), false, URL);
      title = cmark_node_get_title(node);
      if (strlen(title) > 0) {
        OUT(" \"", allow_wrap, LITERAL);
        OUT(title, false, TITLE);
        LIT("\"");
      }
      LIT(")");
    }
    break;

  case CMARK_NODE_FOOTNOTE_REFERENCE:
    if (entering) {
      LIT("[^");

      char *footnote_label = renderer->mem->calloc(node->parent_footnote_def->as.literal.len + 1, sizeof(char));
      memmove(footnote_label, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);

      OUT(footnote_label, false, LITERAL);
      renderer->mem->free(footnote_label);

      LIT("]");
    }
    break;

  case CMARK_NODE_FOOTNOTE_DEFINITION:
    if (entering) {
      renderer->footnote_ix += 1;
      LIT("[^");

      char *footnote_label = renderer->mem->calloc(node->as.literal.len + 1, sizeof(char));
      memmove(footnote_label, node->as.literal.data, node->as.literal.len);

      OUT(footnote_label, false, LITERAL);
      renderer->mem->free(footnote_label);

      LIT("]:\n");

      cmark_strbuf_puts(renderer->prefix, "    ");
    } else {
      cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
    }
    break;

  default:
    assert(false);
    break;
  }

  return 1;
}

char *cmark_render_commonmark(cmark_node *root, int options, int width) {
  return cmark_render_commonmark_with_mem(root, options, width, cmark_node_mem(root));
}

char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
  if (options & CMARK_OPT_HARDBREAKS) {
    // disable breaking on width, since it has
    // a different meaning with OPT_HARDBREAKS
    width = 0;
  }
  return cmark_render(mem, root, options, width, outc, S_render_node);
}
cmarkgfm/third_party/cmark/src/html.h0000644000175000017500000000132214210444464020060 0ustar  carstencarsten#ifndef CMARK_HTML_H
#define CMARK_HTML_H

#include "buffer.h"
#include "node.h"

CMARK_INLINE
static void cmark_html_render_cr(cmark_strbuf *html) {
  if (html->size && html->ptr[html->size - 1] != '\n')
    cmark_strbuf_putc(html, '\n');
}

#define BUFFER_SIZE 100

CMARK_INLINE 
static void cmark_html_render_sourcepos(cmark_node *node, cmark_strbuf *html, int options) {
  char buffer[BUFFER_SIZE];
  if (CMARK_OPT_SOURCEPOS & options) {
    snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"",
             cmark_node_get_start_line(node), cmark_node_get_start_column(node),
             cmark_node_get_end_line(node), cmark_node_get_end_column(node));
    cmark_strbuf_puts(html, buffer);
  }
}


#endif
cmarkgfm/third_party/cmark/src/entities.inc0000644000175000017500000031047614210444464021277 0ustar  carstencarsten/* Autogenerated by tools/make_headers_inc.py */

struct cmark_entity_node {
	unsigned char *entity;
        unsigned char bytes[8];
};

#define CMARK_ENTITY_MIN_LENGTH 2
#define CMARK_ENTITY_MAX_LENGTH 32
#define CMARK_NUM_ENTITIES 2125

static const struct cmark_entity_node cmark_entities[] = {
{(unsigned char*)"AElig", {195, 134, 0}},
{(unsigned char*)"AMP", {38, 0}},
{(unsigned char*)"Aacute", {195, 129, 0}},
{(unsigned char*)"Abreve", {196, 130, 0}},
{(unsigned char*)"Acirc", {195, 130, 0}},
{(unsigned char*)"Acy", {208, 144, 0}},
{(unsigned char*)"Afr", {240, 157, 148, 132, 0}},
{(unsigned char*)"Agrave", {195, 128, 0}},
{(unsigned char*)"Alpha", {206, 145, 0}},
{(unsigned char*)"Amacr", {196, 128, 0}},
{(unsigned char*)"And", {226, 169, 147, 0}},
{(unsigned char*)"Aogon", {196, 132, 0}},
{(unsigned char*)"Aopf", {240, 157, 148, 184, 0}},
{(unsigned char*)"ApplyFunction", {226, 129, 161, 0}},
{(unsigned char*)"Aring", {195, 133, 0}},
{(unsigned char*)"Ascr", {240, 157, 146, 156, 0}},
{(unsigned char*)"Assign", {226, 137, 148, 0}},
{(unsigned char*)"Atilde", {195, 131, 0}},
{(unsigned char*)"Auml", {195, 132, 0}},
{(unsigned char*)"Backslash", {226, 136, 150, 0}},
{(unsigned char*)"Barv", {226, 171, 167, 0}},
{(unsigned char*)"Barwed", {226, 140, 134, 0}},
{(unsigned char*)"Bcy", {208, 145, 0}},
{(unsigned char*)"Because", {226, 136, 181, 0}},
{(unsigned char*)"Bernoullis", {226, 132, 172, 0}},
{(unsigned char*)"Beta", {206, 146, 0}},
{(unsigned char*)"Bfr", {240, 157, 148, 133, 0}},
{(unsigned char*)"Bopf", {240, 157, 148, 185, 0}},
{(unsigned char*)"Breve", {203, 152, 0}},
{(unsigned char*)"Bscr", {226, 132, 172, 0}},
{(unsigned char*)"Bumpeq", {226, 137, 142, 0}},
{(unsigned char*)"CHcy", {208, 167, 0}},
{(unsigned char*)"COPY", {194, 169, 0}},
{(unsigned char*)"Cacute", {196, 134, 0}},
{(unsigned char*)"Cap", {226, 139, 146, 0}},
{(unsigned char*)"CapitalDifferentialD", {226, 133, 133, 0}},
{(unsigned char*)"Cayleys", {226, 132, 173, 0}},
{(unsigned char*)"Ccaron", {196, 140, 0}},
{(unsigned char*)"Ccedil", {195, 135, 0}},
{(unsigned char*)"Ccirc", {196, 136, 0}},
{(unsigned char*)"Cconint", {226, 136, 176, 0}},
{(unsigned char*)"Cdot", {196, 138, 0}},
{(unsigned char*)"Cedilla", {194, 184, 0}},
{(unsigned char*)"CenterDot", {194, 183, 0}},
{(unsigned char*)"Cfr", {226, 132, 173, 0}},
{(unsigned char*)"Chi", {206, 167, 0}},
{(unsigned char*)"CircleDot", {226, 138, 153, 0}},
{(unsigned char*)"CircleMinus", {226, 138, 150, 0}},
{(unsigned char*)"CirclePlus", {226, 138, 149, 0}},
{(unsigned char*)"CircleTimes", {226, 138, 151, 0}},
{(unsigned char*)"ClockwiseContourIntegral", {226, 136, 178, 0}},
{(unsigned char*)"CloseCurlyDoubleQuote", {226, 128, 157, 0}},
{(unsigned char*)"CloseCurlyQuote", {226, 128, 153, 0}},
{(unsigned char*)"Colon", {226, 136, 183, 0}},
{(unsigned char*)"Colone", {226, 169, 180, 0}},
{(unsigned char*)"Congruent", {226, 137, 161, 0}},
{(unsigned char*)"Conint", {226, 136, 175, 0}},
{(unsigned char*)"ContourIntegral", {226, 136, 174, 0}},
{(unsigned char*)"Copf", {226, 132, 130, 0}},
{(unsigned char*)"Coproduct", {226, 136, 144, 0}},
{(unsigned char*)"CounterClockwiseContourIntegral", {226, 136, 179, 0}},
{(unsigned char*)"Cross", {226, 168, 175, 0}},
{(unsigned char*)"Cscr", {240, 157, 146, 158, 0}},
{(unsigned char*)"Cup", {226, 139, 147, 0}},
{(unsigned char*)"CupCap", {226, 137, 141, 0}},
{(unsigned char*)"DD", {226, 133, 133, 0}},
{(unsigned char*)"DDotrahd", {226, 164, 145, 0}},
{(unsigned char*)"DJcy", {208, 130, 0}},
{(unsigned char*)"DScy", {208, 133, 0}},
{(unsigned char*)"DZcy", {208, 143, 0}},
{(unsigned char*)"Dagger", {226, 128, 161, 0}},
{(unsigned char*)"Darr", {226, 134, 161, 0}},
{(unsigned char*)"Dashv", {226, 171, 164, 0}},
{(unsigned char*)"Dcaron", {196, 142, 0}},
{(unsigned char*)"Dcy", {208, 148, 0}},
{(unsigned char*)"Del", {226, 136, 135, 0}},
{(unsigned char*)"Delta", {206, 148, 0}},
{(unsigned char*)"Dfr", {240, 157, 148, 135, 0}},
{(unsigned char*)"DiacriticalAcute", {194, 180, 0}},
{(unsigned char*)"DiacriticalDot", {203, 153, 0}},
{(unsigned char*)"DiacriticalDoubleAcute", {203, 157, 0}},
{(unsigned char*)"DiacriticalGrave", {96, 0}},
{(unsigned char*)"DiacriticalTilde", {203, 156, 0}},
{(unsigned char*)"Diamond", {226, 139, 132, 0}},
{(unsigned char*)"DifferentialD", {226, 133, 134, 0}},
{(unsigned char*)"Dopf", {240, 157, 148, 187, 0}},
{(unsigned char*)"Dot", {194, 168, 0}},
{(unsigned char*)"DotDot", {226, 131, 156, 0}},
{(unsigned char*)"DotEqual", {226, 137, 144, 0}},
{(unsigned char*)"DoubleContourIntegral", {226, 136, 175, 0}},
{(unsigned char*)"DoubleDot", {194, 168, 0}},
{(unsigned char*)"DoubleDownArrow", {226, 135, 147, 0}},
{(unsigned char*)"DoubleLeftArrow", {226, 135, 144, 0}},
{(unsigned char*)"DoubleLeftRightArrow", {226, 135, 148, 0}},
{(unsigned char*)"DoubleLeftTee", {226, 171, 164, 0}},
{(unsigned char*)"DoubleLongLeftArrow", {226, 159, 184, 0}},
{(unsigned char*)"DoubleLongLeftRightArrow", {226, 159, 186, 0}},
{(unsigned char*)"DoubleLongRightArrow", {226, 159, 185, 0}},
{(unsigned char*)"DoubleRightArrow", {226, 135, 146, 0}},
{(unsigned char*)"DoubleRightTee", {226, 138, 168, 0}},
{(unsigned char*)"DoubleUpArrow", {226, 135, 145, 0}},
{(unsigned char*)"DoubleUpDownArrow", {226, 135, 149, 0}},
{(unsigned char*)"DoubleVerticalBar", {226, 136, 165, 0}},
{(unsigned char*)"DownArrow", {226, 134, 147, 0}},
{(unsigned char*)"DownArrowBar", {226, 164, 147, 0}},
{(unsigned char*)"DownArrowUpArrow", {226, 135, 181, 0}},
{(unsigned char*)"DownBreve", {204, 145, 0}},
{(unsigned char*)"DownLeftRightVector", {226, 165, 144, 0}},
{(unsigned char*)"DownLeftTeeVector", {226, 165, 158, 0}},
{(unsigned char*)"DownLeftVector", {226, 134, 189, 0}},
{(unsigned char*)"DownLeftVectorBar", {226, 165, 150, 0}},
{(unsigned char*)"DownRightTeeVector", {226, 165, 159, 0}},
{(unsigned char*)"DownRightVector", {226, 135, 129, 0}},
{(unsigned char*)"DownRightVectorBar", {226, 165, 151, 0}},
{(unsigned char*)"DownTee", {226, 138, 164, 0}},
{(unsigned char*)"DownTeeArrow", {226, 134, 167, 0}},
{(unsigned char*)"Downarrow", {226, 135, 147, 0}},
{(unsigned char*)"Dscr", {240, 157, 146, 159, 0}},
{(unsigned char*)"Dstrok", {196, 144, 0}},
{(unsigned char*)"ENG", {197, 138, 0}},
{(unsigned char*)"ETH", {195, 144, 0}},
{(unsigned char*)"Eacute", {195, 137, 0}},
{(unsigned char*)"Ecaron", {196, 154, 0}},
{(unsigned char*)"Ecirc", {195, 138, 0}},
{(unsigned char*)"Ecy", {208, 173, 0}},
{(unsigned char*)"Edot", {196, 150, 0}},
{(unsigned char*)"Efr", {240, 157, 148, 136, 0}},
{(unsigned char*)"Egrave", {195, 136, 0}},
{(unsigned char*)"Element", {226, 136, 136, 0}},
{(unsigned char*)"Emacr", {196, 146, 0}},
{(unsigned char*)"EmptySmallSquare", {226, 151, 187, 0}},
{(unsigned char*)"EmptyVerySmallSquare", {226, 150, 171, 0}},
{(unsigned char*)"Eogon", {196, 152, 0}},
{(unsigned char*)"Eopf", {240, 157, 148, 188, 0}},
{(unsigned char*)"Epsilon", {206, 149, 0}},
{(unsigned char*)"Equal", {226, 169, 181, 0}},
{(unsigned char*)"EqualTilde", {226, 137, 130, 0}},
{(unsigned char*)"Equilibrium", {226, 135, 140, 0}},
{(unsigned char*)"Escr", {226, 132, 176, 0}},
{(unsigned char*)"Esim", {226, 169, 179, 0}},
{(unsigned char*)"Eta", {206, 151, 0}},
{(unsigned char*)"Euml", {195, 139, 0}},
{(unsigned char*)"Exists", {226, 136, 131, 0}},
{(unsigned char*)"ExponentialE", {226, 133, 135, 0}},
{(unsigned char*)"Fcy", {208, 164, 0}},
{(unsigned char*)"Ffr", {240, 157, 148, 137, 0}},
{(unsigned char*)"FilledSmallSquare", {226, 151, 188, 0}},
{(unsigned char*)"FilledVerySmallSquare", {226, 150, 170, 0}},
{(unsigned char*)"Fopf", {240, 157, 148, 189, 0}},
{(unsigned char*)"ForAll", {226, 136, 128, 0}},
{(unsigned char*)"Fouriertrf", {226, 132, 177, 0}},
{(unsigned char*)"Fscr", {226, 132, 177, 0}},
{(unsigned char*)"GJcy", {208, 131, 0}},
{(unsigned char*)"GT", {62, 0}},
{(unsigned char*)"Gamma", {206, 147, 0}},
{(unsigned char*)"Gammad", {207, 156, 0}},
{(unsigned char*)"Gbreve", {196, 158, 0}},
{(unsigned char*)"Gcedil", {196, 162, 0}},
{(unsigned char*)"Gcirc", {196, 156, 0}},
{(unsigned char*)"Gcy", {208, 147, 0}},
{(unsigned char*)"Gdot", {196, 160, 0}},
{(unsigned char*)"Gfr", {240, 157, 148, 138, 0}},
{(unsigned char*)"Gg", {226, 139, 153, 0}},
{(unsigned char*)"Gopf", {240, 157, 148, 190, 0}},
{(unsigned char*)"GreaterEqual", {226, 137, 165, 0}},
{(unsigned char*)"GreaterEqualLess", {226, 139, 155, 0}},
{(unsigned char*)"GreaterFullEqual", {226, 137, 167, 0}},
{(unsigned char*)"GreaterGreater", {226, 170, 162, 0}},
{(unsigned char*)"GreaterLess", {226, 137, 183, 0}},
{(unsigned char*)"GreaterSlantEqual", {226, 169, 190, 0}},
{(unsigned char*)"GreaterTilde", {226, 137, 179, 0}},
{(unsigned char*)"Gscr", {240, 157, 146, 162, 0}},
{(unsigned char*)"Gt", {226, 137, 171, 0}},
{(unsigned char*)"HARDcy", {208, 170, 0}},
{(unsigned char*)"Hacek", {203, 135, 0}},
{(unsigned char*)"Hat", {94, 0}},
{(unsigned char*)"Hcirc", {196, 164, 0}},
{(unsigned char*)"Hfr", {226, 132, 140, 0}},
{(unsigned char*)"HilbertSpace", {226, 132, 139, 0}},
{(unsigned char*)"Hopf", {226, 132, 141, 0}},
{(unsigned char*)"HorizontalLine", {226, 148, 128, 0}},
{(unsigned char*)"Hscr", {226, 132, 139, 0}},
{(unsigned char*)"Hstrok", {196, 166, 0}},
{(unsigned char*)"HumpDownHump", {226, 137, 142, 0}},
{(unsigned char*)"HumpEqual", {226, 137, 143, 0}},
{(unsigned char*)"IEcy", {208, 149, 0}},
{(unsigned char*)"IJlig", {196, 178, 0}},
{(unsigned char*)"IOcy", {208, 129, 0}},
{(unsigned char*)"Iacute", {195, 141, 0}},
{(unsigned char*)"Icirc", {195, 142, 0}},
{(unsigned char*)"Icy", {208, 152, 0}},
{(unsigned char*)"Idot", {196, 176, 0}},
{(unsigned char*)"Ifr", {226, 132, 145, 0}},
{(unsigned char*)"Igrave", {195, 140, 0}},
{(unsigned char*)"Im", {226, 132, 145, 0}},
{(unsigned char*)"Imacr", {196, 170, 0}},
{(unsigned char*)"ImaginaryI", {226, 133, 136, 0}},
{(unsigned char*)"Implies", {226, 135, 146, 0}},
{(unsigned char*)"Int", {226, 136, 172, 0}},
{(unsigned char*)"Integral", {226, 136, 171, 0}},
{(unsigned char*)"Intersection", {226, 139, 130, 0}},
{(unsigned char*)"InvisibleComma", {226, 129, 163, 0}},
{(unsigned char*)"InvisibleTimes", {226, 129, 162, 0}},
{(unsigned char*)"Iogon", {196, 174, 0}},
{(unsigned char*)"Iopf", {240, 157, 149, 128, 0}},
{(unsigned char*)"Iota", {206, 153, 0}},
{(unsigned char*)"Iscr", {226, 132, 144, 0}},
{(unsigned char*)"Itilde", {196, 168, 0}},
{(unsigned char*)"Iukcy", {208, 134, 0}},
{(unsigned char*)"Iuml", {195, 143, 0}},
{(unsigned char*)"Jcirc", {196, 180, 0}},
{(unsigned char*)"Jcy", {208, 153, 0}},
{(unsigned char*)"Jfr", {240, 157, 148, 141, 0}},
{(unsigned char*)"Jopf", {240, 157, 149, 129, 0}},
{(unsigned char*)"Jscr", {240, 157, 146, 165, 0}},
{(unsigned char*)"Jsercy", {208, 136, 0}},
{(unsigned char*)"Jukcy", {208, 132, 0}},
{(unsigned char*)"KHcy", {208, 165, 0}},
{(unsigned char*)"KJcy", {208, 140, 0}},
{(unsigned char*)"Kappa", {206, 154, 0}},
{(unsigned char*)"Kcedil", {196, 182, 0}},
{(unsigned char*)"Kcy", {208, 154, 0}},
{(unsigned char*)"Kfr", {240, 157, 148, 142, 0}},
{(unsigned char*)"Kopf", {240, 157, 149, 130, 0}},
{(unsigned char*)"Kscr", {240, 157, 146, 166, 0}},
{(unsigned char*)"LJcy", {208, 137, 0}},
{(unsigned char*)"LT", {60, 0}},
{(unsigned char*)"Lacute", {196, 185, 0}},
{(unsigned char*)"Lambda", {206, 155, 0}},
{(unsigned char*)"Lang", {226, 159, 170, 0}},
{(unsigned char*)"Laplacetrf", {226, 132, 146, 0}},
{(unsigned char*)"Larr", {226, 134, 158, 0}},
{(unsigned char*)"Lcaron", {196, 189, 0}},
{(unsigned char*)"Lcedil", {196, 187, 0}},
{(unsigned char*)"Lcy", {208, 155, 0}},
{(unsigned char*)"LeftAngleBracket", {226, 159, 168, 0}},
{(unsigned char*)"LeftArrow", {226, 134, 144, 0}},
{(unsigned char*)"LeftArrowBar", {226, 135, 164, 0}},
{(unsigned char*)"LeftArrowRightArrow", {226, 135, 134, 0}},
{(unsigned char*)"LeftCeiling", {226, 140, 136, 0}},
{(unsigned char*)"LeftDoubleBracket", {226, 159, 166, 0}},
{(unsigned char*)"LeftDownTeeVector", {226, 165, 161, 0}},
{(unsigned char*)"LeftDownVector", {226, 135, 131, 0}},
{(unsigned char*)"LeftDownVectorBar", {226, 165, 153, 0}},
{(unsigned char*)"LeftFloor", {226, 140, 138, 0}},
{(unsigned char*)"LeftRightArrow", {226, 134, 148, 0}},
{(unsigned char*)"LeftRightVector", {226, 165, 142, 0}},
{(unsigned char*)"LeftTee", {226, 138, 163, 0}},
{(unsigned char*)"LeftTeeArrow", {226, 134, 164, 0}},
{(unsigned char*)"LeftTeeVector", {226, 165, 154, 0}},
{(unsigned char*)"LeftTriangle", {226, 138, 178, 0}},
{(unsigned char*)"LeftTriangleBar", {226, 167, 143, 0}},
{(unsigned char*)"LeftTriangleEqual", {226, 138, 180, 0}},
{(unsigned char*)"LeftUpDownVector", {226, 165, 145, 0}},
{(unsigned char*)"LeftUpTeeVector", {226, 165, 160, 0}},
{(unsigned char*)"LeftUpVector", {226, 134, 191, 0}},
{(unsigned char*)"LeftUpVectorBar", {226, 165, 152, 0}},
{(unsigned char*)"LeftVector", {226, 134, 188, 0}},
{(unsigned char*)"LeftVectorBar", {226, 165, 146, 0}},
{(unsigned char*)"Leftarrow", {226, 135, 144, 0}},
{(unsigned char*)"Leftrightarrow", {226, 135, 148, 0}},
{(unsigned char*)"LessEqualGreater", {226, 139, 154, 0}},
{(unsigned char*)"LessFullEqual", {226, 137, 166, 0}},
{(unsigned char*)"LessGreater", {226, 137, 182, 0}},
{(unsigned char*)"LessLess", {226, 170, 161, 0}},
{(unsigned char*)"LessSlantEqual", {226, 169, 189, 0}},
{(unsigned char*)"LessTilde", {226, 137, 178, 0}},
{(unsigned char*)"Lfr", {240, 157, 148, 143, 0}},
{(unsigned char*)"Ll", {226, 139, 152, 0}},
{(unsigned char*)"Lleftarrow", {226, 135, 154, 0}},
{(unsigned char*)"Lmidot", {196, 191, 0}},
{(unsigned char*)"LongLeftArrow", {226, 159, 181, 0}},
{(unsigned char*)"LongLeftRightArrow", {226, 159, 183, 0}},
{(unsigned char*)"LongRightArrow", {226, 159, 182, 0}},
{(unsigned char*)"Longleftarrow", {226, 159, 184, 0}},
{(unsigned char*)"Longleftrightarrow", {226, 159, 186, 0}},
{(unsigned char*)"Longrightarrow", {226, 159, 185, 0}},
{(unsigned char*)"Lopf", {240, 157, 149, 131, 0}},
{(unsigned char*)"LowerLeftArrow", {226, 134, 153, 0}},
{(unsigned char*)"LowerRightArrow", {226, 134, 152, 0}},
{(unsigned char*)"Lscr", {226, 132, 146, 0}},
{(unsigned char*)"Lsh", {226, 134, 176, 0}},
{(unsigned char*)"Lstrok", {197, 129, 0}},
{(unsigned char*)"Lt", {226, 137, 170, 0}},
{(unsigned char*)"Map", {226, 164, 133, 0}},
{(unsigned char*)"Mcy", {208, 156, 0}},
{(unsigned char*)"MediumSpace", {226, 129, 159, 0}},
{(unsigned char*)"Mellintrf", {226, 132, 179, 0}},
{(unsigned char*)"Mfr", {240, 157, 148, 144, 0}},
{(unsigned char*)"MinusPlus", {226, 136, 147, 0}},
{(unsigned char*)"Mopf", {240, 157, 149, 132, 0}},
{(unsigned char*)"Mscr", {226, 132, 179, 0}},
{(unsigned char*)"Mu", {206, 156, 0}},
{(unsigned char*)"NJcy", {208, 138, 0}},
{(unsigned char*)"Nacute", {197, 131, 0}},
{(unsigned char*)"Ncaron", {197, 135, 0}},
{(unsigned char*)"Ncedil", {197, 133, 0}},
{(unsigned char*)"Ncy", {208, 157, 0}},
{(unsigned char*)"NegativeMediumSpace", {226, 128, 139, 0}},
{(unsigned char*)"NegativeThickSpace", {226, 128, 139, 0}},
{(unsigned char*)"NegativeThinSpace", {226, 128, 139, 0}},
{(unsigned char*)"NegativeVeryThinSpace", {226, 128, 139, 0}},
{(unsigned char*)"NestedGreaterGreater", {226, 137, 171, 0}},
{(unsigned char*)"NestedLessLess", {226, 137, 170, 0}},
{(unsigned char*)"NewLine", {10, 0}},
{(unsigned char*)"Nfr", {240, 157, 148, 145, 0}},
{(unsigned char*)"NoBreak", {226, 129, 160, 0}},
{(unsigned char*)"NonBreakingSpace", {194, 160, 0}},
{(unsigned char*)"Nopf", {226, 132, 149, 0}},
{(unsigned char*)"Not", {226, 171, 172, 0}},
{(unsigned char*)"NotCongruent", {226, 137, 162, 0}},
{(unsigned char*)"NotCupCap", {226, 137, 173, 0}},
{(unsigned char*)"NotDoubleVerticalBar", {226, 136, 166, 0}},
{(unsigned char*)"NotElement", {226, 136, 137, 0}},
{(unsigned char*)"NotEqual", {226, 137, 160, 0}},
{(unsigned char*)"NotEqualTilde", {226, 137, 130, 204, 184, 0}},
{(unsigned char*)"NotExists", {226, 136, 132, 0}},
{(unsigned char*)"NotGreater", {226, 137, 175, 0}},
{(unsigned char*)"NotGreaterEqual", {226, 137, 177, 0}},
{(unsigned char*)"NotGreaterFullEqual", {226, 137, 167, 204, 184, 0}},
{(unsigned char*)"NotGreaterGreater", {226, 137, 171, 204, 184, 0}},
{(unsigned char*)"NotGreaterLess", {226, 137, 185, 0}},
{(unsigned char*)"NotGreaterSlantEqual", {226, 169, 190, 204, 184, 0}},
{(unsigned char*)"NotGreaterTilde", {226, 137, 181, 0}},
{(unsigned char*)"NotHumpDownHump", {226, 137, 142, 204, 184, 0}},
{(unsigned char*)"NotHumpEqual", {226, 137, 143, 204, 184, 0}},
{(unsigned char*)"NotLeftTriangle", {226, 139, 170, 0}},
{(unsigned char*)"NotLeftTriangleBar", {226, 167, 143, 204, 184, 0}},
{(unsigned char*)"NotLeftTriangleEqual", {226, 139, 172, 0}},
{(unsigned char*)"NotLess", {226, 137, 174, 0}},
{(unsigned char*)"NotLessEqual", {226, 137, 176, 0}},
{(unsigned char*)"NotLessGreater", {226, 137, 184, 0}},
{(unsigned char*)"NotLessLess", {226, 137, 170, 204, 184, 0}},
{(unsigned char*)"NotLessSlantEqual", {226, 169, 189, 204, 184, 0}},
{(unsigned char*)"NotLessTilde", {226, 137, 180, 0}},
{(unsigned char*)"NotNestedGreaterGreater", {226, 170, 162, 204, 184, 0}},
{(unsigned char*)"NotNestedLessLess", {226, 170, 161, 204, 184, 0}},
{(unsigned char*)"NotPrecedes", {226, 138, 128, 0}},
{(unsigned char*)"NotPrecedesEqual", {226, 170, 175, 204, 184, 0}},
{(unsigned char*)"NotPrecedesSlantEqual", {226, 139, 160, 0}},
{(unsigned char*)"NotReverseElement", {226, 136, 140, 0}},
{(unsigned char*)"NotRightTriangle", {226, 139, 171, 0}},
{(unsigned char*)"NotRightTriangleBar", {226, 167, 144, 204, 184, 0}},
{(unsigned char*)"NotRightTriangleEqual", {226, 139, 173, 0}},
{(unsigned char*)"NotSquareSubset", {226, 138, 143, 204, 184, 0}},
{(unsigned char*)"NotSquareSubsetEqual", {226, 139, 162, 0}},
{(unsigned char*)"NotSquareSuperset", {226, 138, 144, 204, 184, 0}},
{(unsigned char*)"NotSquareSupersetEqual", {226, 139, 163, 0}},
{(unsigned char*)"NotSubset", {226, 138, 130, 226, 131, 146, 0}},
{(unsigned char*)"NotSubsetEqual", {226, 138, 136, 0}},
{(unsigned char*)"NotSucceeds", {226, 138, 129, 0}},
{(unsigned char*)"NotSucceedsEqual", {226, 170, 176, 204, 184, 0}},
{(unsigned char*)"NotSucceedsSlantEqual", {226, 139, 161, 0}},
{(unsigned char*)"NotSucceedsTilde", {226, 137, 191, 204, 184, 0}},
{(unsigned char*)"NotSuperset", {226, 138, 131, 226, 131, 146, 0}},
{(unsigned char*)"NotSupersetEqual", {226, 138, 137, 0}},
{(unsigned char*)"NotTilde", {226, 137, 129, 0}},
{(unsigned char*)"NotTildeEqual", {226, 137, 132, 0}},
{(unsigned char*)"NotTildeFullEqual", {226, 137, 135, 0}},
{(unsigned char*)"NotTildeTilde", {226, 137, 137, 0}},
{(unsigned char*)"NotVerticalBar", {226, 136, 164, 0}},
{(unsigned char*)"Nscr", {240, 157, 146, 169, 0}},
{(unsigned char*)"Ntilde", {195, 145, 0}},
{(unsigned char*)"Nu", {206, 157, 0}},
{(unsigned char*)"OElig", {197, 146, 0}},
{(unsigned char*)"Oacute", {195, 147, 0}},
{(unsigned char*)"Ocirc", {195, 148, 0}},
{(unsigned char*)"Ocy", {208, 158, 0}},
{(unsigned char*)"Odblac", {197, 144, 0}},
{(unsigned char*)"Ofr", {240, 157, 148, 146, 0}},
{(unsigned char*)"Ograve", {195, 146, 0}},
{(unsigned char*)"Omacr", {197, 140, 0}},
{(unsigned char*)"Omega", {206, 169, 0}},
{(unsigned char*)"Omicron", {206, 159, 0}},
{(unsigned char*)"Oopf", {240, 157, 149, 134, 0}},
{(unsigned char*)"OpenCurlyDoubleQuote", {226, 128, 156, 0}},
{(unsigned char*)"OpenCurlyQuote", {226, 128, 152, 0}},
{(unsigned char*)"Or", {226, 169, 148, 0}},
{(unsigned char*)"Oscr", {240, 157, 146, 170, 0}},
{(unsigned char*)"Oslash", {195, 152, 0}},
{(unsigned char*)"Otilde", {195, 149, 0}},
{(unsigned char*)"Otimes", {226, 168, 183, 0}},
{(unsigned char*)"Ouml", {195, 150, 0}},
{(unsigned char*)"OverBar", {226, 128, 190, 0}},
{(unsigned char*)"OverBrace", {226, 143, 158, 0}},
{(unsigned char*)"OverBracket", {226, 142, 180, 0}},
{(unsigned char*)"OverParenthesis", {226, 143, 156, 0}},
{(unsigned char*)"PartialD", {226, 136, 130, 0}},
{(unsigned char*)"Pcy", {208, 159, 0}},
{(unsigned char*)"Pfr", {240, 157, 148, 147, 0}},
{(unsigned char*)"Phi", {206, 166, 0}},
{(unsigned char*)"Pi", {206, 160, 0}},
{(unsigned char*)"PlusMinus", {194, 177, 0}},
{(unsigned char*)"Poincareplane", {226, 132, 140, 0}},
{(unsigned char*)"Popf", {226, 132, 153, 0}},
{(unsigned char*)"Pr", {226, 170, 187, 0}},
{(unsigned char*)"Precedes", {226, 137, 186, 0}},
{(unsigned char*)"PrecedesEqual", {226, 170, 175, 0}},
{(unsigned char*)"PrecedesSlantEqual", {226, 137, 188, 0}},
{(unsigned char*)"PrecedesTilde", {226, 137, 190, 0}},
{(unsigned char*)"Prime", {226, 128, 179, 0}},
{(unsigned char*)"Product", {226, 136, 143, 0}},
{(unsigned char*)"Proportion", {226, 136, 183, 0}},
{(unsigned char*)"Proportional", {226, 136, 157, 0}},
{(unsigned char*)"Pscr", {240, 157, 146, 171, 0}},
{(unsigned char*)"Psi", {206, 168, 0}},
{(unsigned char*)"QUOT", {34, 0}},
{(unsigned char*)"Qfr", {240, 157, 148, 148, 0}},
{(unsigned char*)"Qopf", {226, 132, 154, 0}},
{(unsigned char*)"Qscr", {240, 157, 146, 172, 0}},
{(unsigned char*)"RBarr", {226, 164, 144, 0}},
{(unsigned char*)"REG", {194, 174, 0}},
{(unsigned char*)"Racute", {197, 148, 0}},
{(unsigned char*)"Rang", {226, 159, 171, 0}},
{(unsigned char*)"Rarr", {226, 134, 160, 0}},
{(unsigned char*)"Rarrtl", {226, 164, 150, 0}},
{(unsigned char*)"Rcaron", {197, 152, 0}},
{(unsigned char*)"Rcedil", {197, 150, 0}},
{(unsigned char*)"Rcy", {208, 160, 0}},
{(unsigned char*)"Re", {226, 132, 156, 0}},
{(unsigned char*)"ReverseElement", {226, 136, 139, 0}},
{(unsigned char*)"ReverseEquilibrium", {226, 135, 139, 0}},
{(unsigned char*)"ReverseUpEquilibrium", {226, 165, 175, 0}},
{(unsigned char*)"Rfr", {226, 132, 156, 0}},
{(unsigned char*)"Rho", {206, 161, 0}},
{(unsigned char*)"RightAngleBracket", {226, 159, 169, 0}},
{(unsigned char*)"RightArrow", {226, 134, 146, 0}},
{(unsigned char*)"RightArrowBar", {226, 135, 165, 0}},
{(unsigned char*)"RightArrowLeftArrow", {226, 135, 132, 0}},
{(unsigned char*)"RightCeiling", {226, 140, 137, 0}},
{(unsigned char*)"RightDoubleBracket", {226, 159, 167, 0}},
{(unsigned char*)"RightDownTeeVector", {226, 165, 157, 0}},
{(unsigned char*)"RightDownVector", {226, 135, 130, 0}},
{(unsigned char*)"RightDownVectorBar", {226, 165, 149, 0}},
{(unsigned char*)"RightFloor", {226, 140, 139, 0}},
{(unsigned char*)"RightTee", {226, 138, 162, 0}},
{(unsigned char*)"RightTeeArrow", {226, 134, 166, 0}},
{(unsigned char*)"RightTeeVector", {226, 165, 155, 0}},
{(unsigned char*)"RightTriangle", {226, 138, 179, 0}},
{(unsigned char*)"RightTriangleBar", {226, 167, 144, 0}},
{(unsigned char*)"RightTriangleEqual", {226, 138, 181, 0}},
{(unsigned char*)"RightUpDownVector", {226, 165, 143, 0}},
{(unsigned char*)"RightUpTeeVector", {226, 165, 156, 0}},
{(unsigned char*)"RightUpVector", {226, 134, 190, 0}},
{(unsigned char*)"RightUpVectorBar", {226, 165, 148, 0}},
{(unsigned char*)"RightVector", {226, 135, 128, 0}},
{(unsigned char*)"RightVectorBar", {226, 165, 147, 0}},
{(unsigned char*)"Rightarrow", {226, 135, 146, 0}},
{(unsigned char*)"Ropf", {226, 132, 157, 0}},
{(unsigned char*)"RoundImplies", {226, 165, 176, 0}},
{(unsigned char*)"Rrightarrow", {226, 135, 155, 0}},
{(unsigned char*)"Rscr", {226, 132, 155, 0}},
{(unsigned char*)"Rsh", {226, 134, 177, 0}},
{(unsigned char*)"RuleDelayed", {226, 167, 180, 0}},
{(unsigned char*)"SHCHcy", {208, 169, 0}},
{(unsigned char*)"SHcy", {208, 168, 0}},
{(unsigned char*)"SOFTcy", {208, 172, 0}},
{(unsigned char*)"Sacute", {197, 154, 0}},
{(unsigned char*)"Sc", {226, 170, 188, 0}},
{(unsigned char*)"Scaron", {197, 160, 0}},
{(unsigned char*)"Scedil", {197, 158, 0}},
{(unsigned char*)"Scirc", {197, 156, 0}},
{(unsigned char*)"Scy", {208, 161, 0}},
{(unsigned char*)"Sfr", {240, 157, 148, 150, 0}},
{(unsigned char*)"ShortDownArrow", {226, 134, 147, 0}},
{(unsigned char*)"ShortLeftArrow", {226, 134, 144, 0}},
{(unsigned char*)"ShortRightArrow", {226, 134, 146, 0}},
{(unsigned char*)"ShortUpArrow", {226, 134, 145, 0}},
{(unsigned char*)"Sigma", {206, 163, 0}},
{(unsigned char*)"SmallCircle", {226, 136, 152, 0}},
{(unsigned char*)"Sopf", {240, 157, 149, 138, 0}},
{(unsigned char*)"Sqrt", {226, 136, 154, 0}},
{(unsigned char*)"Square", {226, 150, 161, 0}},
{(unsigned char*)"SquareIntersection", {226, 138, 147, 0}},
{(unsigned char*)"SquareSubset", {226, 138, 143, 0}},
{(unsigned char*)"SquareSubsetEqual", {226, 138, 145, 0}},
{(unsigned char*)"SquareSuperset", {226, 138, 144, 0}},
{(unsigned char*)"SquareSupersetEqual", {226, 138, 146, 0}},
{(unsigned char*)"SquareUnion", {226, 138, 148, 0}},
{(unsigned char*)"Sscr", {240, 157, 146, 174, 0}},
{(unsigned char*)"Star", {226, 139, 134, 0}},
{(unsigned char*)"Sub", {226, 139, 144, 0}},
{(unsigned char*)"Subset", {226, 139, 144, 0}},
{(unsigned char*)"SubsetEqual", {226, 138, 134, 0}},
{(unsigned char*)"Succeeds", {226, 137, 187, 0}},
{(unsigned char*)"SucceedsEqual", {226, 170, 176, 0}},
{(unsigned char*)"SucceedsSlantEqual", {226, 137, 189, 0}},
{(unsigned char*)"SucceedsTilde", {226, 137, 191, 0}},
{(unsigned char*)"SuchThat", {226, 136, 139, 0}},
{(unsigned char*)"Sum", {226, 136, 145, 0}},
{(unsigned char*)"Sup", {226, 139, 145, 0}},
{(unsigned char*)"Superset", {226, 138, 131, 0}},
{(unsigned char*)"SupersetEqual", {226, 138, 135, 0}},
{(unsigned char*)"Supset", {226, 139, 145, 0}},
{(unsigned char*)"THORN", {195, 158, 0}},
{(unsigned char*)"TRADE", {226, 132, 162, 0}},
{(unsigned char*)"TSHcy", {208, 139, 0}},
{(unsigned char*)"TScy", {208, 166, 0}},
{(unsigned char*)"Tab", {9, 0}},
{(unsigned char*)"Tau", {206, 164, 0}},
{(unsigned char*)"Tcaron", {197, 164, 0}},
{(unsigned char*)"Tcedil", {197, 162, 0}},
{(unsigned char*)"Tcy", {208, 162, 0}},
{(unsigned char*)"Tfr", {240, 157, 148, 151, 0}},
{(unsigned char*)"Therefore", {226, 136, 180, 0}},
{(unsigned char*)"Theta", {206, 152, 0}},
{(unsigned char*)"ThickSpace", {226, 129, 159, 226, 128, 138, 0}},
{(unsigned char*)"ThinSpace", {226, 128, 137, 0}},
{(unsigned char*)"Tilde", {226, 136, 188, 0}},
{(unsigned char*)"TildeEqual", {226, 137, 131, 0}},
{(unsigned char*)"TildeFullEqual", {226, 137, 133, 0}},
{(unsigned char*)"TildeTilde", {226, 137, 136, 0}},
{(unsigned char*)"Topf", {240, 157, 149, 139, 0}},
{(unsigned char*)"TripleDot", {226, 131, 155, 0}},
{(unsigned char*)"Tscr", {240, 157, 146, 175, 0}},
{(unsigned char*)"Tstrok", {197, 166, 0}},
{(unsigned char*)"Uacute", {195, 154, 0}},
{(unsigned char*)"Uarr", {226, 134, 159, 0}},
{(unsigned char*)"Uarrocir", {226, 165, 137, 0}},
{(unsigned char*)"Ubrcy", {208, 142, 0}},
{(unsigned char*)"Ubreve", {197, 172, 0}},
{(unsigned char*)"Ucirc", {195, 155, 0}},
{(unsigned char*)"Ucy", {208, 163, 0}},
{(unsigned char*)"Udblac", {197, 176, 0}},
{(unsigned char*)"Ufr", {240, 157, 148, 152, 0}},
{(unsigned char*)"Ugrave", {195, 153, 0}},
{(unsigned char*)"Umacr", {197, 170, 0}},
{(unsigned char*)"UnderBar", {95, 0}},
{(unsigned char*)"UnderBrace", {226, 143, 159, 0}},
{(unsigned char*)"UnderBracket", {226, 142, 181, 0}},
{(unsigned char*)"UnderParenthesis", {226, 143, 157, 0}},
{(unsigned char*)"Union", {226, 139, 131, 0}},
{(unsigned char*)"UnionPlus", {226, 138, 142, 0}},
{(unsigned char*)"Uogon", {197, 178, 0}},
{(unsigned char*)"Uopf", {240, 157, 149, 140, 0}},
{(unsigned char*)"UpArrow", {226, 134, 145, 0}},
{(unsigned char*)"UpArrowBar", {226, 164, 146, 0}},
{(unsigned char*)"UpArrowDownArrow", {226, 135, 133, 0}},
{(unsigned char*)"UpDownArrow", {226, 134, 149, 0}},
{(unsigned char*)"UpEquilibrium", {226, 165, 174, 0}},
{(unsigned char*)"UpTee", {226, 138, 165, 0}},
{(unsigned char*)"UpTeeArrow", {226, 134, 165, 0}},
{(unsigned char*)"Uparrow", {226, 135, 145, 0}},
{(unsigned char*)"Updownarrow", {226, 135, 149, 0}},
{(unsigned char*)"UpperLeftArrow", {226, 134, 150, 0}},
{(unsigned char*)"UpperRightArrow", {226, 134, 151, 0}},
{(unsigned char*)"Upsi", {207, 146, 0}},
{(unsigned char*)"Upsilon", {206, 165, 0}},
{(unsigned char*)"Uring", {197, 174, 0}},
{(unsigned char*)"Uscr", {240, 157, 146, 176, 0}},
{(unsigned char*)"Utilde", {197, 168, 0}},
{(unsigned char*)"Uuml", {195, 156, 0}},
{(unsigned char*)"VDash", {226, 138, 171, 0}},
{(unsigned char*)"Vbar", {226, 171, 171, 0}},
{(unsigned char*)"Vcy", {208, 146, 0}},
{(unsigned char*)"Vdash", {226, 138, 169, 0}},
{(unsigned char*)"Vdashl", {226, 171, 166, 0}},
{(unsigned char*)"Vee", {226, 139, 129, 0}},
{(unsigned char*)"Verbar", {226, 128, 150, 0}},
{(unsigned char*)"Vert", {226, 128, 150, 0}},
{(unsigned char*)"VerticalBar", {226, 136, 163, 0}},
{(unsigned char*)"VerticalLine", {124, 0}},
{(unsigned char*)"VerticalSeparator", {226, 157, 152, 0}},
{(unsigned char*)"VerticalTilde", {226, 137, 128, 0}},
{(unsigned char*)"VeryThinSpace", {226, 128, 138, 0}},
{(unsigned char*)"Vfr", {240, 157, 148, 153, 0}},
{(unsigned char*)"Vopf", {240, 157, 149, 141, 0}},
{(unsigned char*)"Vscr", {240, 157, 146, 177, 0}},
{(unsigned char*)"Vvdash", {226, 138, 170, 0}},
{(unsigned char*)"Wcirc", {197, 180, 0}},
{(unsigned char*)"Wedge", {226, 139, 128, 0}},
{(unsigned char*)"Wfr", {240, 157, 148, 154, 0}},
{(unsigned char*)"Wopf", {240, 157, 149, 142, 0}},
{(unsigned char*)"Wscr", {240, 157, 146, 178, 0}},
{(unsigned char*)"Xfr", {240, 157, 148, 155, 0}},
{(unsigned char*)"Xi", {206, 158, 0}},
{(unsigned char*)"Xopf", {240, 157, 149, 143, 0}},
{(unsigned char*)"Xscr", {240, 157, 146, 179, 0}},
{(unsigned char*)"YAcy", {208, 175, 0}},
{(unsigned char*)"YIcy", {208, 135, 0}},
{(unsigned char*)"YUcy", {208, 174, 0}},
{(unsigned char*)"Yacute", {195, 157, 0}},
{(unsigned char*)"Ycirc", {197, 182, 0}},
{(unsigned char*)"Ycy", {208, 171, 0}},
{(unsigned char*)"Yfr", {240, 157, 148, 156, 0}},
{(unsigned char*)"Yopf", {240, 157, 149, 144, 0}},
{(unsigned char*)"Yscr", {240, 157, 146, 180, 0}},
{(unsigned char*)"Yuml", {197, 184, 0}},
{(unsigned char*)"ZHcy", {208, 150, 0}},
{(unsigned char*)"Zacute", {197, 185, 0}},
{(unsigned char*)"Zcaron", {197, 189, 0}},
{(unsigned char*)"Zcy", {208, 151, 0}},
{(unsigned char*)"Zdot", {197, 187, 0}},
{(unsigned char*)"ZeroWidthSpace", {226, 128, 139, 0}},
{(unsigned char*)"Zeta", {206, 150, 0}},
{(unsigned char*)"Zfr", {226, 132, 168, 0}},
{(unsigned char*)"Zopf", {226, 132, 164, 0}},
{(unsigned char*)"Zscr", {240, 157, 146, 181, 0}},
{(unsigned char*)"aacute", {195, 161, 0}},
{(unsigned char*)"abreve", {196, 131, 0}},
{(unsigned char*)"ac", {226, 136, 190, 0}},
{(unsigned char*)"acE", {226, 136, 190, 204, 179, 0}},
{(unsigned char*)"acd", {226, 136, 191, 0}},
{(unsigned char*)"acirc", {195, 162, 0}},
{(unsigned char*)"acute", {194, 180, 0}},
{(unsigned char*)"acy", {208, 176, 0}},
{(unsigned char*)"aelig", {195, 166, 0}},
{(unsigned char*)"af", {226, 129, 161, 0}},
{(unsigned char*)"afr", {240, 157, 148, 158, 0}},
{(unsigned char*)"agrave", {195, 160, 0}},
{(unsigned char*)"alefsym", {226, 132, 181, 0}},
{(unsigned char*)"aleph", {226, 132, 181, 0}},
{(unsigned char*)"alpha", {206, 177, 0}},
{(unsigned char*)"amacr", {196, 129, 0}},
{(unsigned char*)"amalg", {226, 168, 191, 0}},
{(unsigned char*)"amp", {38, 0}},
{(unsigned char*)"and", {226, 136, 167, 0}},
{(unsigned char*)"andand", {226, 169, 149, 0}},
{(unsigned char*)"andd", {226, 169, 156, 0}},
{(unsigned char*)"andslope", {226, 169, 152, 0}},
{(unsigned char*)"andv", {226, 169, 154, 0}},
{(unsigned char*)"ang", {226, 136, 160, 0}},
{(unsigned char*)"ange", {226, 166, 164, 0}},
{(unsigned char*)"angle", {226, 136, 160, 0}},
{(unsigned char*)"angmsd", {226, 136, 161, 0}},
{(unsigned char*)"angmsdaa", {226, 166, 168, 0}},
{(unsigned char*)"angmsdab", {226, 166, 169, 0}},
{(unsigned char*)"angmsdac", {226, 166, 170, 0}},
{(unsigned char*)"angmsdad", {226, 166, 171, 0}},
{(unsigned char*)"angmsdae", {226, 166, 172, 0}},
{(unsigned char*)"angmsdaf", {226, 166, 173, 0}},
{(unsigned char*)"angmsdag", {226, 166, 174, 0}},
{(unsigned char*)"angmsdah", {226, 166, 175, 0}},
{(unsigned char*)"angrt", {226, 136, 159, 0}},
{(unsigned char*)"angrtvb", {226, 138, 190, 0}},
{(unsigned char*)"angrtvbd", {226, 166, 157, 0}},
{(unsigned char*)"angsph", {226, 136, 162, 0}},
{(unsigned char*)"angst", {195, 133, 0}},
{(unsigned char*)"angzarr", {226, 141, 188, 0}},
{(unsigned char*)"aogon", {196, 133, 0}},
{(unsigned char*)"aopf", {240, 157, 149, 146, 0}},
{(unsigned char*)"ap", {226, 137, 136, 0}},
{(unsigned char*)"apE", {226, 169, 176, 0}},
{(unsigned char*)"apacir", {226, 169, 175, 0}},
{(unsigned char*)"ape", {226, 137, 138, 0}},
{(unsigned char*)"apid", {226, 137, 139, 0}},
{(unsigned char*)"apos", {39, 0}},
{(unsigned char*)"approx", {226, 137, 136, 0}},
{(unsigned char*)"approxeq", {226, 137, 138, 0}},
{(unsigned char*)"aring", {195, 165, 0}},
{(unsigned char*)"ascr", {240, 157, 146, 182, 0}},
{(unsigned char*)"ast", {42, 0}},
{(unsigned char*)"asymp", {226, 137, 136, 0}},
{(unsigned char*)"asympeq", {226, 137, 141, 0}},
{(unsigned char*)"atilde", {195, 163, 0}},
{(unsigned char*)"auml", {195, 164, 0}},
{(unsigned char*)"awconint", {226, 136, 179, 0}},
{(unsigned char*)"awint", {226, 168, 145, 0}},
{(unsigned char*)"bNot", {226, 171, 173, 0}},
{(unsigned char*)"backcong", {226, 137, 140, 0}},
{(unsigned char*)"backepsilon", {207, 182, 0}},
{(unsigned char*)"backprime", {226, 128, 181, 0}},
{(unsigned char*)"backsim", {226, 136, 189, 0}},
{(unsigned char*)"backsimeq", {226, 139, 141, 0}},
{(unsigned char*)"barvee", {226, 138, 189, 0}},
{(unsigned char*)"barwed", {226, 140, 133, 0}},
{(unsigned char*)"barwedge", {226, 140, 133, 0}},
{(unsigned char*)"bbrk", {226, 142, 181, 0}},
{(unsigned char*)"bbrktbrk", {226, 142, 182, 0}},
{(unsigned char*)"bcong", {226, 137, 140, 0}},
{(unsigned char*)"bcy", {208, 177, 0}},
{(unsigned char*)"bdquo", {226, 128, 158, 0}},
{(unsigned char*)"becaus", {226, 136, 181, 0}},
{(unsigned char*)"because", {226, 136, 181, 0}},
{(unsigned char*)"bemptyv", {226, 166, 176, 0}},
{(unsigned char*)"bepsi", {207, 182, 0}},
{(unsigned char*)"bernou", {226, 132, 172, 0}},
{(unsigned char*)"beta", {206, 178, 0}},
{(unsigned char*)"beth", {226, 132, 182, 0}},
{(unsigned char*)"between", {226, 137, 172, 0}},
{(unsigned char*)"bfr", {240, 157, 148, 159, 0}},
{(unsigned char*)"bigcap", {226, 139, 130, 0}},
{(unsigned char*)"bigcirc", {226, 151, 175, 0}},
{(unsigned char*)"bigcup", {226, 139, 131, 0}},
{(unsigned char*)"bigodot", {226, 168, 128, 0}},
{(unsigned char*)"bigoplus", {226, 168, 129, 0}},
{(unsigned char*)"bigotimes", {226, 168, 130, 0}},
{(unsigned char*)"bigsqcup", {226, 168, 134, 0}},
{(unsigned char*)"bigstar", {226, 152, 133, 0}},
{(unsigned char*)"bigtriangledown", {226, 150, 189, 0}},
{(unsigned char*)"bigtriangleup", {226, 150, 179, 0}},
{(unsigned char*)"biguplus", {226, 168, 132, 0}},
{(unsigned char*)"bigvee", {226, 139, 129, 0}},
{(unsigned char*)"bigwedge", {226, 139, 128, 0}},
{(unsigned char*)"bkarow", {226, 164, 141, 0}},
{(unsigned char*)"blacklozenge", {226, 167, 171, 0}},
{(unsigned char*)"blacksquare", {226, 150, 170, 0}},
{(unsigned char*)"blacktriangle", {226, 150, 180, 0}},
{(unsigned char*)"blacktriangledown", {226, 150, 190, 0}},
{(unsigned char*)"blacktriangleleft", {226, 151, 130, 0}},
{(unsigned char*)"blacktriangleright", {226, 150, 184, 0}},
{(unsigned char*)"blank", {226, 144, 163, 0}},
{(unsigned char*)"blk12", {226, 150, 146, 0}},
{(unsigned char*)"blk14", {226, 150, 145, 0}},
{(unsigned char*)"blk34", {226, 150, 147, 0}},
{(unsigned char*)"block", {226, 150, 136, 0}},
{(unsigned char*)"bne", {61, 226, 131, 165, 0}},
{(unsigned char*)"bnequiv", {226, 137, 161, 226, 131, 165, 0}},
{(unsigned char*)"bnot", {226, 140, 144, 0}},
{(unsigned char*)"bopf", {240, 157, 149, 147, 0}},
{(unsigned char*)"bot", {226, 138, 165, 0}},
{(unsigned char*)"bottom", {226, 138, 165, 0}},
{(unsigned char*)"bowtie", {226, 139, 136, 0}},
{(unsigned char*)"boxDL", {226, 149, 151, 0}},
{(unsigned char*)"boxDR", {226, 149, 148, 0}},
{(unsigned char*)"boxDl", {226, 149, 150, 0}},
{(unsigned char*)"boxDr", {226, 149, 147, 0}},
{(unsigned char*)"boxH", {226, 149, 144, 0}},
{(unsigned char*)"boxHD", {226, 149, 166, 0}},
{(unsigned char*)"boxHU", {226, 149, 169, 0}},
{(unsigned char*)"boxHd", {226, 149, 164, 0}},
{(unsigned char*)"boxHu", {226, 149, 167, 0}},
{(unsigned char*)"boxUL", {226, 149, 157, 0}},
{(unsigned char*)"boxUR", {226, 149, 154, 0}},
{(unsigned char*)"boxUl", {226, 149, 156, 0}},
{(unsigned char*)"boxUr", {226, 149, 153, 0}},
{(unsigned char*)"boxV", {226, 149, 145, 0}},
{(unsigned char*)"boxVH", {226, 149, 172, 0}},
{(unsigned char*)"boxVL", {226, 149, 163, 0}},
{(unsigned char*)"boxVR", {226, 149, 160, 0}},
{(unsigned char*)"boxVh", {226, 149, 171, 0}},
{(unsigned char*)"boxVl", {226, 149, 162, 0}},
{(unsigned char*)"boxVr", {226, 149, 159, 0}},
{(unsigned char*)"boxbox", {226, 167, 137, 0}},
{(unsigned char*)"boxdL", {226, 149, 149, 0}},
{(unsigned char*)"boxdR", {226, 149, 146, 0}},
{(unsigned char*)"boxdl", {226, 148, 144, 0}},
{(unsigned char*)"boxdr", {226, 148, 140, 0}},
{(unsigned char*)"boxh", {226, 148, 128, 0}},
{(unsigned char*)"boxhD", {226, 149, 165, 0}},
{(unsigned char*)"boxhU", {226, 149, 168, 0}},
{(unsigned char*)"boxhd", {226, 148, 172, 0}},
{(unsigned char*)"boxhu", {226, 148, 180, 0}},
{(unsigned char*)"boxminus", {226, 138, 159, 0}},
{(unsigned char*)"boxplus", {226, 138, 158, 0}},
{(unsigned char*)"boxtimes", {226, 138, 160, 0}},
{(unsigned char*)"boxuL", {226, 149, 155, 0}},
{(unsigned char*)"boxuR", {226, 149, 152, 0}},
{(unsigned char*)"boxul", {226, 148, 152, 0}},
{(unsigned char*)"boxur", {226, 148, 148, 0}},
{(unsigned char*)"boxv", {226, 148, 130, 0}},
{(unsigned char*)"boxvH", {226, 149, 170, 0}},
{(unsigned char*)"boxvL", {226, 149, 161, 0}},
{(unsigned char*)"boxvR", {226, 149, 158, 0}},
{(unsigned char*)"boxvh", {226, 148, 188, 0}},
{(unsigned char*)"boxvl", {226, 148, 164, 0}},
{(unsigned char*)"boxvr", {226, 148, 156, 0}},
{(unsigned char*)"bprime", {226, 128, 181, 0}},
{(unsigned char*)"breve", {203, 152, 0}},
{(unsigned char*)"brvbar", {194, 166, 0}},
{(unsigned char*)"bscr", {240, 157, 146, 183, 0}},
{(unsigned char*)"bsemi", {226, 129, 143, 0}},
{(unsigned char*)"bsim", {226, 136, 189, 0}},
{(unsigned char*)"bsime", {226, 139, 141, 0}},
{(unsigned char*)"bsol", {92, 0}},
{(unsigned char*)"bsolb", {226, 167, 133, 0}},
{(unsigned char*)"bsolhsub", {226, 159, 136, 0}},
{(unsigned char*)"bull", {226, 128, 162, 0}},
{(unsigned char*)"bullet", {226, 128, 162, 0}},
{(unsigned char*)"bump", {226, 137, 142, 0}},
{(unsigned char*)"bumpE", {226, 170, 174, 0}},
{(unsigned char*)"bumpe", {226, 137, 143, 0}},
{(unsigned char*)"bumpeq", {226, 137, 143, 0}},
{(unsigned char*)"cacute", {196, 135, 0}},
{(unsigned char*)"cap", {226, 136, 169, 0}},
{(unsigned char*)"capand", {226, 169, 132, 0}},
{(unsigned char*)"capbrcup", {226, 169, 137, 0}},
{(unsigned char*)"capcap", {226, 169, 139, 0}},
{(unsigned char*)"capcup", {226, 169, 135, 0}},
{(unsigned char*)"capdot", {226, 169, 128, 0}},
{(unsigned char*)"caps", {226, 136, 169, 239, 184, 128, 0}},
{(unsigned char*)"caret", {226, 129, 129, 0}},
{(unsigned char*)"caron", {203, 135, 0}},
{(unsigned char*)"ccaps", {226, 169, 141, 0}},
{(unsigned char*)"ccaron", {196, 141, 0}},
{(unsigned char*)"ccedil", {195, 167, 0}},
{(unsigned char*)"ccirc", {196, 137, 0}},
{(unsigned char*)"ccups", {226, 169, 140, 0}},
{(unsigned char*)"ccupssm", {226, 169, 144, 0}},
{(unsigned char*)"cdot", {196, 139, 0}},
{(unsigned char*)"cedil", {194, 184, 0}},
{(unsigned char*)"cemptyv", {226, 166, 178, 0}},
{(unsigned char*)"cent", {194, 162, 0}},
{(unsigned char*)"centerdot", {194, 183, 0}},
{(unsigned char*)"cfr", {240, 157, 148, 160, 0}},
{(unsigned char*)"chcy", {209, 135, 0}},
{(unsigned char*)"check", {226, 156, 147, 0}},
{(unsigned char*)"checkmark", {226, 156, 147, 0}},
{(unsigned char*)"chi", {207, 135, 0}},
{(unsigned char*)"cir", {226, 151, 139, 0}},
{(unsigned char*)"cirE", {226, 167, 131, 0}},
{(unsigned char*)"circ", {203, 134, 0}},
{(unsigned char*)"circeq", {226, 137, 151, 0}},
{(unsigned char*)"circlearrowleft", {226, 134, 186, 0}},
{(unsigned char*)"circlearrowright", {226, 134, 187, 0}},
{(unsigned char*)"circledR", {194, 174, 0}},
{(unsigned char*)"circledS", {226, 147, 136, 0}},
{(unsigned char*)"circledast", {226, 138, 155, 0}},
{(unsigned char*)"circledcirc", {226, 138, 154, 0}},
{(unsigned char*)"circleddash", {226, 138, 157, 0}},
{(unsigned char*)"cire", {226, 137, 151, 0}},
{(unsigned char*)"cirfnint", {226, 168, 144, 0}},
{(unsigned char*)"cirmid", {226, 171, 175, 0}},
{(unsigned char*)"cirscir", {226, 167, 130, 0}},
{(unsigned char*)"clubs", {226, 153, 163, 0}},
{(unsigned char*)"clubsuit", {226, 153, 163, 0}},
{(unsigned char*)"colon", {58, 0}},
{(unsigned char*)"colone", {226, 137, 148, 0}},
{(unsigned char*)"coloneq", {226, 137, 148, 0}},
{(unsigned char*)"comma", {44, 0}},
{(unsigned char*)"commat", {64, 0}},
{(unsigned char*)"comp", {226, 136, 129, 0}},
{(unsigned char*)"compfn", {226, 136, 152, 0}},
{(unsigned char*)"complement", {226, 136, 129, 0}},
{(unsigned char*)"complexes", {226, 132, 130, 0}},
{(unsigned char*)"cong", {226, 137, 133, 0}},
{(unsigned char*)"congdot", {226, 169, 173, 0}},
{(unsigned char*)"conint", {226, 136, 174, 0}},
{(unsigned char*)"copf", {240, 157, 149, 148, 0}},
{(unsigned char*)"coprod", {226, 136, 144, 0}},
{(unsigned char*)"copy", {194, 169, 0}},
{(unsigned char*)"copysr", {226, 132, 151, 0}},
{(unsigned char*)"crarr", {226, 134, 181, 0}},
{(unsigned char*)"cross", {226, 156, 151, 0}},
{(unsigned char*)"cscr", {240, 157, 146, 184, 0}},
{(unsigned char*)"csub", {226, 171, 143, 0}},
{(unsigned char*)"csube", {226, 171, 145, 0}},
{(unsigned char*)"csup", {226, 171, 144, 0}},
{(unsigned char*)"csupe", {226, 171, 146, 0}},
{(unsigned char*)"ctdot", {226, 139, 175, 0}},
{(unsigned char*)"cudarrl", {226, 164, 184, 0}},
{(unsigned char*)"cudarrr", {226, 164, 181, 0}},
{(unsigned char*)"cuepr", {226, 139, 158, 0}},
{(unsigned char*)"cuesc", {226, 139, 159, 0}},
{(unsigned char*)"cularr", {226, 134, 182, 0}},
{(unsigned char*)"cularrp", {226, 164, 189, 0}},
{(unsigned char*)"cup", {226, 136, 170, 0}},
{(unsigned char*)"cupbrcap", {226, 169, 136, 0}},
{(unsigned char*)"cupcap", {226, 169, 134, 0}},
{(unsigned char*)"cupcup", {226, 169, 138, 0}},
{(unsigned char*)"cupdot", {226, 138, 141, 0}},
{(unsigned char*)"cupor", {226, 169, 133, 0}},
{(unsigned char*)"cups", {226, 136, 170, 239, 184, 128, 0}},
{(unsigned char*)"curarr", {226, 134, 183, 0}},
{(unsigned char*)"curarrm", {226, 164, 188, 0}},
{(unsigned char*)"curlyeqprec", {226, 139, 158, 0}},
{(unsigned char*)"curlyeqsucc", {226, 139, 159, 0}},
{(unsigned char*)"curlyvee", {226, 139, 142, 0}},
{(unsigned char*)"curlywedge", {226, 139, 143, 0}},
{(unsigned char*)"curren", {194, 164, 0}},
{(unsigned char*)"curvearrowleft", {226, 134, 182, 0}},
{(unsigned char*)"curvearrowright", {226, 134, 183, 0}},
{(unsigned char*)"cuvee", {226, 139, 142, 0}},
{(unsigned char*)"cuwed", {226, 139, 143, 0}},
{(unsigned char*)"cwconint", {226, 136, 178, 0}},
{(unsigned char*)"cwint", {226, 136, 177, 0}},
{(unsigned char*)"cylcty", {226, 140, 173, 0}},
{(unsigned char*)"dArr", {226, 135, 147, 0}},
{(unsigned char*)"dHar", {226, 165, 165, 0}},
{(unsigned char*)"dagger", {226, 128, 160, 0}},
{(unsigned char*)"daleth", {226, 132, 184, 0}},
{(unsigned char*)"darr", {226, 134, 147, 0}},
{(unsigned char*)"dash", {226, 128, 144, 0}},
{(unsigned char*)"dashv", {226, 138, 163, 0}},
{(unsigned char*)"dbkarow", {226, 164, 143, 0}},
{(unsigned char*)"dblac", {203, 157, 0}},
{(unsigned char*)"dcaron", {196, 143, 0}},
{(unsigned char*)"dcy", {208, 180, 0}},
{(unsigned char*)"dd", {226, 133, 134, 0}},
{(unsigned char*)"ddagger", {226, 128, 161, 0}},
{(unsigned char*)"ddarr", {226, 135, 138, 0}},
{(unsigned char*)"ddotseq", {226, 169, 183, 0}},
{(unsigned char*)"deg", {194, 176, 0}},
{(unsigned char*)"delta", {206, 180, 0}},
{(unsigned char*)"demptyv", {226, 166, 177, 0}},
{(unsigned char*)"dfisht", {226, 165, 191, 0}},
{(unsigned char*)"dfr", {240, 157, 148, 161, 0}},
{(unsigned char*)"dharl", {226, 135, 131, 0}},
{(unsigned char*)"dharr", {226, 135, 130, 0}},
{(unsigned char*)"diam", {226, 139, 132, 0}},
{(unsigned char*)"diamond", {226, 139, 132, 0}},
{(unsigned char*)"diamondsuit", {226, 153, 166, 0}},
{(unsigned char*)"diams", {226, 153, 166, 0}},
{(unsigned char*)"die", {194, 168, 0}},
{(unsigned char*)"digamma", {207, 157, 0}},
{(unsigned char*)"disin", {226, 139, 178, 0}},
{(unsigned char*)"div", {195, 183, 0}},
{(unsigned char*)"divide", {195, 183, 0}},
{(unsigned char*)"divideontimes", {226, 139, 135, 0}},
{(unsigned char*)"divonx", {226, 139, 135, 0}},
{(unsigned char*)"djcy", {209, 146, 0}},
{(unsigned char*)"dlcorn", {226, 140, 158, 0}},
{(unsigned char*)"dlcrop", {226, 140, 141, 0}},
{(unsigned char*)"dollar", {36, 0}},
{(unsigned char*)"dopf", {240, 157, 149, 149, 0}},
{(unsigned char*)"dot", {203, 153, 0}},
{(unsigned char*)"doteq", {226, 137, 144, 0}},
{(unsigned char*)"doteqdot", {226, 137, 145, 0}},
{(unsigned char*)"dotminus", {226, 136, 184, 0}},
{(unsigned char*)"dotplus", {226, 136, 148, 0}},
{(unsigned char*)"dotsquare", {226, 138, 161, 0}},
{(unsigned char*)"doublebarwedge", {226, 140, 134, 0}},
{(unsigned char*)"downarrow", {226, 134, 147, 0}},
{(unsigned char*)"downdownarrows", {226, 135, 138, 0}},
{(unsigned char*)"downharpoonleft", {226, 135, 131, 0}},
{(unsigned char*)"downharpoonright", {226, 135, 130, 0}},
{(unsigned char*)"drbkarow", {226, 164, 144, 0}},
{(unsigned char*)"drcorn", {226, 140, 159, 0}},
{(unsigned char*)"drcrop", {226, 140, 140, 0}},
{(unsigned char*)"dscr", {240, 157, 146, 185, 0}},
{(unsigned char*)"dscy", {209, 149, 0}},
{(unsigned char*)"dsol", {226, 167, 182, 0}},
{(unsigned char*)"dstrok", {196, 145, 0}},
{(unsigned char*)"dtdot", {226, 139, 177, 0}},
{(unsigned char*)"dtri", {226, 150, 191, 0}},
{(unsigned char*)"dtrif", {226, 150, 190, 0}},
{(unsigned char*)"duarr", {226, 135, 181, 0}},
{(unsigned char*)"duhar", {226, 165, 175, 0}},
{(unsigned char*)"dwangle", {226, 166, 166, 0}},
{(unsigned char*)"dzcy", {209, 159, 0}},
{(unsigned char*)"dzigrarr", {226, 159, 191, 0}},
{(unsigned char*)"eDDot", {226, 169, 183, 0}},
{(unsigned char*)"eDot", {226, 137, 145, 0}},
{(unsigned char*)"eacute", {195, 169, 0}},
{(unsigned char*)"easter", {226, 169, 174, 0}},
{(unsigned char*)"ecaron", {196, 155, 0}},
{(unsigned char*)"ecir", {226, 137, 150, 0}},
{(unsigned char*)"ecirc", {195, 170, 0}},
{(unsigned char*)"ecolon", {226, 137, 149, 0}},
{(unsigned char*)"ecy", {209, 141, 0}},
{(unsigned char*)"edot", {196, 151, 0}},
{(unsigned char*)"ee", {226, 133, 135, 0}},
{(unsigned char*)"efDot", {226, 137, 146, 0}},
{(unsigned char*)"efr", {240, 157, 148, 162, 0}},
{(unsigned char*)"eg", {226, 170, 154, 0}},
{(unsigned char*)"egrave", {195, 168, 0}},
{(unsigned char*)"egs", {226, 170, 150, 0}},
{(unsigned char*)"egsdot", {226, 170, 152, 0}},
{(unsigned char*)"el", {226, 170, 153, 0}},
{(unsigned char*)"elinters", {226, 143, 167, 0}},
{(unsigned char*)"ell", {226, 132, 147, 0}},
{(unsigned char*)"els", {226, 170, 149, 0}},
{(unsigned char*)"elsdot", {226, 170, 151, 0}},
{(unsigned char*)"emacr", {196, 147, 0}},
{(unsigned char*)"empty", {226, 136, 133, 0}},
{(unsigned char*)"emptyset", {226, 136, 133, 0}},
{(unsigned char*)"emptyv", {226, 136, 133, 0}},
{(unsigned char*)"emsp", {226, 128, 131, 0}},
{(unsigned char*)"emsp13", {226, 128, 132, 0}},
{(unsigned char*)"emsp14", {226, 128, 133, 0}},
{(unsigned char*)"eng", {197, 139, 0}},
{(unsigned char*)"ensp", {226, 128, 130, 0}},
{(unsigned char*)"eogon", {196, 153, 0}},
{(unsigned char*)"eopf", {240, 157, 149, 150, 0}},
{(unsigned char*)"epar", {226, 139, 149, 0}},
{(unsigned char*)"eparsl", {226, 167, 163, 0}},
{(unsigned char*)"eplus", {226, 169, 177, 0}},
{(unsigned char*)"epsi", {206, 181, 0}},
{(unsigned char*)"epsilon", {206, 181, 0}},
{(unsigned char*)"epsiv", {207, 181, 0}},
{(unsigned char*)"eqcirc", {226, 137, 150, 0}},
{(unsigned char*)"eqcolon", {226, 137, 149, 0}},
{(unsigned char*)"eqsim", {226, 137, 130, 0}},
{(unsigned char*)"eqslantgtr", {226, 170, 150, 0}},
{(unsigned char*)"eqslantless", {226, 170, 149, 0}},
{(unsigned char*)"equals", {61, 0}},
{(unsigned char*)"equest", {226, 137, 159, 0}},
{(unsigned char*)"equiv", {226, 137, 161, 0}},
{(unsigned char*)"equivDD", {226, 169, 184, 0}},
{(unsigned char*)"eqvparsl", {226, 167, 165, 0}},
{(unsigned char*)"erDot", {226, 137, 147, 0}},
{(unsigned char*)"erarr", {226, 165, 177, 0}},
{(unsigned char*)"escr", {226, 132, 175, 0}},
{(unsigned char*)"esdot", {226, 137, 144, 0}},
{(unsigned char*)"esim", {226, 137, 130, 0}},
{(unsigned char*)"eta", {206, 183, 0}},
{(unsigned char*)"eth", {195, 176, 0}},
{(unsigned char*)"euml", {195, 171, 0}},
{(unsigned char*)"euro", {226, 130, 172, 0}},
{(unsigned char*)"excl", {33, 0}},
{(unsigned char*)"exist", {226, 136, 131, 0}},
{(unsigned char*)"expectation", {226, 132, 176, 0}},
{(unsigned char*)"exponentiale", {226, 133, 135, 0}},
{(unsigned char*)"fallingdotseq", {226, 137, 146, 0}},
{(unsigned char*)"fcy", {209, 132, 0}},
{(unsigned char*)"female", {226, 153, 128, 0}},
{(unsigned char*)"ffilig", {239, 172, 131, 0}},
{(unsigned char*)"fflig", {239, 172, 128, 0}},
{(unsigned char*)"ffllig", {239, 172, 132, 0}},
{(unsigned char*)"ffr", {240, 157, 148, 163, 0}},
{(unsigned char*)"filig", {239, 172, 129, 0}},
{(unsigned char*)"fjlig", {102, 106, 0}},
{(unsigned char*)"flat", {226, 153, 173, 0}},
{(unsigned char*)"fllig", {239, 172, 130, 0}},
{(unsigned char*)"fltns", {226, 150, 177, 0}},
{(unsigned char*)"fnof", {198, 146, 0}},
{(unsigned char*)"fopf", {240, 157, 149, 151, 0}},
{(unsigned char*)"forall", {226, 136, 128, 0}},
{(unsigned char*)"fork", {226, 139, 148, 0}},
{(unsigned char*)"forkv", {226, 171, 153, 0}},
{(unsigned char*)"fpartint", {226, 168, 141, 0}},
{(unsigned char*)"frac12", {194, 189, 0}},
{(unsigned char*)"frac13", {226, 133, 147, 0}},
{(unsigned char*)"frac14", {194, 188, 0}},
{(unsigned char*)"frac15", {226, 133, 149, 0}},
{(unsigned char*)"frac16", {226, 133, 153, 0}},
{(unsigned char*)"frac18", {226, 133, 155, 0}},
{(unsigned char*)"frac23", {226, 133, 148, 0}},
{(unsigned char*)"frac25", {226, 133, 150, 0}},
{(unsigned char*)"frac34", {194, 190, 0}},
{(unsigned char*)"frac35", {226, 133, 151, 0}},
{(unsigned char*)"frac38", {226, 133, 156, 0}},
{(unsigned char*)"frac45", {226, 133, 152, 0}},
{(unsigned char*)"frac56", {226, 133, 154, 0}},
{(unsigned char*)"frac58", {226, 133, 157, 0}},
{(unsigned char*)"frac78", {226, 133, 158, 0}},
{(unsigned char*)"frasl", {226, 129, 132, 0}},
{(unsigned char*)"frown", {226, 140, 162, 0}},
{(unsigned char*)"fscr", {240, 157, 146, 187, 0}},
{(unsigned char*)"gE", {226, 137, 167, 0}},
{(unsigned char*)"gEl", {226, 170, 140, 0}},
{(unsigned char*)"gacute", {199, 181, 0}},
{(unsigned char*)"gamma", {206, 179, 0}},
{(unsigned char*)"gammad", {207, 157, 0}},
{(unsigned char*)"gap", {226, 170, 134, 0}},
{(unsigned char*)"gbreve", {196, 159, 0}},
{(unsigned char*)"gcirc", {196, 157, 0}},
{(unsigned char*)"gcy", {208, 179, 0}},
{(unsigned char*)"gdot", {196, 161, 0}},
{(unsigned char*)"ge", {226, 137, 165, 0}},
{(unsigned char*)"gel", {226, 139, 155, 0}},
{(unsigned char*)"geq", {226, 137, 165, 0}},
{(unsigned char*)"geqq", {226, 137, 167, 0}},
{(unsigned char*)"geqslant", {226, 169, 190, 0}},
{(unsigned char*)"ges", {226, 169, 190, 0}},
{(unsigned char*)"gescc", {226, 170, 169, 0}},
{(unsigned char*)"gesdot", {226, 170, 128, 0}},
{(unsigned char*)"gesdoto", {226, 170, 130, 0}},
{(unsigned char*)"gesdotol", {226, 170, 132, 0}},
{(unsigned char*)"gesl", {226, 139, 155, 239, 184, 128, 0}},
{(unsigned char*)"gesles", {226, 170, 148, 0}},
{(unsigned char*)"gfr", {240, 157, 148, 164, 0}},
{(unsigned char*)"gg", {226, 137, 171, 0}},
{(unsigned char*)"ggg", {226, 139, 153, 0}},
{(unsigned char*)"gimel", {226, 132, 183, 0}},
{(unsigned char*)"gjcy", {209, 147, 0}},
{(unsigned char*)"gl", {226, 137, 183, 0}},
{(unsigned char*)"glE", {226, 170, 146, 0}},
{(unsigned char*)"gla", {226, 170, 165, 0}},
{(unsigned char*)"glj", {226, 170, 164, 0}},
{(unsigned char*)"gnE", {226, 137, 169, 0}},
{(unsigned char*)"gnap", {226, 170, 138, 0}},
{(unsigned char*)"gnapprox", {226, 170, 138, 0}},
{(unsigned char*)"gne", {226, 170, 136, 0}},
{(unsigned char*)"gneq", {226, 170, 136, 0}},
{(unsigned char*)"gneqq", {226, 137, 169, 0}},
{(unsigned char*)"gnsim", {226, 139, 167, 0}},
{(unsigned char*)"gopf", {240, 157, 149, 152, 0}},
{(unsigned char*)"grave", {96, 0}},
{(unsigned char*)"gscr", {226, 132, 138, 0}},
{(unsigned char*)"gsim", {226, 137, 179, 0}},
{(unsigned char*)"gsime", {226, 170, 142, 0}},
{(unsigned char*)"gsiml", {226, 170, 144, 0}},
{(unsigned char*)"gt", {62, 0}},
{(unsigned char*)"gtcc", {226, 170, 167, 0}},
{(unsigned char*)"gtcir", {226, 169, 186, 0}},
{(unsigned char*)"gtdot", {226, 139, 151, 0}},
{(unsigned char*)"gtlPar", {226, 166, 149, 0}},
{(unsigned char*)"gtquest", {226, 169, 188, 0}},
{(unsigned char*)"gtrapprox", {226, 170, 134, 0}},
{(unsigned char*)"gtrarr", {226, 165, 184, 0}},
{(unsigned char*)"gtrdot", {226, 139, 151, 0}},
{(unsigned char*)"gtreqless", {226, 139, 155, 0}},
{(unsigned char*)"gtreqqless", {226, 170, 140, 0}},
{(unsigned char*)"gtrless", {226, 137, 183, 0}},
{(unsigned char*)"gtrsim", {226, 137, 179, 0}},
{(unsigned char*)"gvertneqq", {226, 137, 169, 239, 184, 128, 0}},
{(unsigned char*)"gvnE", {226, 137, 169, 239, 184, 128, 0}},
{(unsigned char*)"hArr", {226, 135, 148, 0}},
{(unsigned char*)"hairsp", {226, 128, 138, 0}},
{(unsigned char*)"half", {194, 189, 0}},
{(unsigned char*)"hamilt", {226, 132, 139, 0}},
{(unsigned char*)"hardcy", {209, 138, 0}},
{(unsigned char*)"harr", {226, 134, 148, 0}},
{(unsigned char*)"harrcir", {226, 165, 136, 0}},
{(unsigned char*)"harrw", {226, 134, 173, 0}},
{(unsigned char*)"hbar", {226, 132, 143, 0}},
{(unsigned char*)"hcirc", {196, 165, 0}},
{(unsigned char*)"hearts", {226, 153, 165, 0}},
{(unsigned char*)"heartsuit", {226, 153, 165, 0}},
{(unsigned char*)"hellip", {226, 128, 166, 0}},
{(unsigned char*)"hercon", {226, 138, 185, 0}},
{(unsigned char*)"hfr", {240, 157, 148, 165, 0}},
{(unsigned char*)"hksearow", {226, 164, 165, 0}},
{(unsigned char*)"hkswarow", {226, 164, 166, 0}},
{(unsigned char*)"hoarr", {226, 135, 191, 0}},
{(unsigned char*)"homtht", {226, 136, 187, 0}},
{(unsigned char*)"hookleftarrow", {226, 134, 169, 0}},
{(unsigned char*)"hookrightarrow", {226, 134, 170, 0}},
{(unsigned char*)"hopf", {240, 157, 149, 153, 0}},
{(unsigned char*)"horbar", {226, 128, 149, 0}},
{(unsigned char*)"hscr", {240, 157, 146, 189, 0}},
{(unsigned char*)"hslash", {226, 132, 143, 0}},
{(unsigned char*)"hstrok", {196, 167, 0}},
{(unsigned char*)"hybull", {226, 129, 131, 0}},
{(unsigned char*)"hyphen", {226, 128, 144, 0}},
{(unsigned char*)"iacute", {195, 173, 0}},
{(unsigned char*)"ic", {226, 129, 163, 0}},
{(unsigned char*)"icirc", {195, 174, 0}},
{(unsigned char*)"icy", {208, 184, 0}},
{(unsigned char*)"iecy", {208, 181, 0}},
{(unsigned char*)"iexcl", {194, 161, 0}},
{(unsigned char*)"iff", {226, 135, 148, 0}},
{(unsigned char*)"ifr", {240, 157, 148, 166, 0}},
{(unsigned char*)"igrave", {195, 172, 0}},
{(unsigned char*)"ii", {226, 133, 136, 0}},
{(unsigned char*)"iiiint", {226, 168, 140, 0}},
{(unsigned char*)"iiint", {226, 136, 173, 0}},
{(unsigned char*)"iinfin", {226, 167, 156, 0}},
{(unsigned char*)"iiota", {226, 132, 169, 0}},
{(unsigned char*)"ijlig", {196, 179, 0}},
{(unsigned char*)"imacr", {196, 171, 0}},
{(unsigned char*)"image", {226, 132, 145, 0}},
{(unsigned char*)"imagline", {226, 132, 144, 0}},
{(unsigned char*)"imagpart", {226, 132, 145, 0}},
{(unsigned char*)"imath", {196, 177, 0}},
{(unsigned char*)"imof", {226, 138, 183, 0}},
{(unsigned char*)"imped", {198, 181, 0}},
{(unsigned char*)"in", {226, 136, 136, 0}},
{(unsigned char*)"incare", {226, 132, 133, 0}},
{(unsigned char*)"infin", {226, 136, 158, 0}},
{(unsigned char*)"infintie", {226, 167, 157, 0}},
{(unsigned char*)"inodot", {196, 177, 0}},
{(unsigned char*)"int", {226, 136, 171, 0}},
{(unsigned char*)"intcal", {226, 138, 186, 0}},
{(unsigned char*)"integers", {226, 132, 164, 0}},
{(unsigned char*)"intercal", {226, 138, 186, 0}},
{(unsigned char*)"intlarhk", {226, 168, 151, 0}},
{(unsigned char*)"intprod", {226, 168, 188, 0}},
{(unsigned char*)"iocy", {209, 145, 0}},
{(unsigned char*)"iogon", {196, 175, 0}},
{(unsigned char*)"iopf", {240, 157, 149, 154, 0}},
{(unsigned char*)"iota", {206, 185, 0}},
{(unsigned char*)"iprod", {226, 168, 188, 0}},
{(unsigned char*)"iquest", {194, 191, 0}},
{(unsigned char*)"iscr", {240, 157, 146, 190, 0}},
{(unsigned char*)"isin", {226, 136, 136, 0}},
{(unsigned char*)"isinE", {226, 139, 185, 0}},
{(unsigned char*)"isindot", {226, 139, 181, 0}},
{(unsigned char*)"isins", {226, 139, 180, 0}},
{(unsigned char*)"isinsv", {226, 139, 179, 0}},
{(unsigned char*)"isinv", {226, 136, 136, 0}},
{(unsigned char*)"it", {226, 129, 162, 0}},
{(unsigned char*)"itilde", {196, 169, 0}},
{(unsigned char*)"iukcy", {209, 150, 0}},
{(unsigned char*)"iuml", {195, 175, 0}},
{(unsigned char*)"jcirc", {196, 181, 0}},
{(unsigned char*)"jcy", {208, 185, 0}},
{(unsigned char*)"jfr", {240, 157, 148, 167, 0}},
{(unsigned char*)"jmath", {200, 183, 0}},
{(unsigned char*)"jopf", {240, 157, 149, 155, 0}},
{(unsigned char*)"jscr", {240, 157, 146, 191, 0}},
{(unsigned char*)"jsercy", {209, 152, 0}},
{(unsigned char*)"jukcy", {209, 148, 0}},
{(unsigned char*)"kappa", {206, 186, 0}},
{(unsigned char*)"kappav", {207, 176, 0}},
{(unsigned char*)"kcedil", {196, 183, 0}},
{(unsigned char*)"kcy", {208, 186, 0}},
{(unsigned char*)"kfr", {240, 157, 148, 168, 0}},
{(unsigned char*)"kgreen", {196, 184, 0}},
{(unsigned char*)"khcy", {209, 133, 0}},
{(unsigned char*)"kjcy", {209, 156, 0}},
{(unsigned char*)"kopf", {240, 157, 149, 156, 0}},
{(unsigned char*)"kscr", {240, 157, 147, 128, 0}},
{(unsigned char*)"lAarr", {226, 135, 154, 0}},
{(unsigned char*)"lArr", {226, 135, 144, 0}},
{(unsigned char*)"lAtail", {226, 164, 155, 0}},
{(unsigned char*)"lBarr", {226, 164, 142, 0}},
{(unsigned char*)"lE", {226, 137, 166, 0}},
{(unsigned char*)"lEg", {226, 170, 139, 0}},
{(unsigned char*)"lHar", {226, 165, 162, 0}},
{(unsigned char*)"lacute", {196, 186, 0}},
{(unsigned char*)"laemptyv", {226, 166, 180, 0}},
{(unsigned char*)"lagran", {226, 132, 146, 0}},
{(unsigned char*)"lambda", {206, 187, 0}},
{(unsigned char*)"lang", {226, 159, 168, 0}},
{(unsigned char*)"langd", {226, 166, 145, 0}},
{(unsigned char*)"langle", {226, 159, 168, 0}},
{(unsigned char*)"lap", {226, 170, 133, 0}},
{(unsigned char*)"laquo", {194, 171, 0}},
{(unsigned char*)"larr", {226, 134, 144, 0}},
{(unsigned char*)"larrb", {226, 135, 164, 0}},
{(unsigned char*)"larrbfs", {226, 164, 159, 0}},
{(unsigned char*)"larrfs", {226, 164, 157, 0}},
{(unsigned char*)"larrhk", {226, 134, 169, 0}},
{(unsigned char*)"larrlp", {226, 134, 171, 0}},
{(unsigned char*)"larrpl", {226, 164, 185, 0}},
{(unsigned char*)"larrsim", {226, 165, 179, 0}},
{(unsigned char*)"larrtl", {226, 134, 162, 0}},
{(unsigned char*)"lat", {226, 170, 171, 0}},
{(unsigned char*)"latail", {226, 164, 153, 0}},
{(unsigned char*)"late", {226, 170, 173, 0}},
{(unsigned char*)"lates", {226, 170, 173, 239, 184, 128, 0}},
{(unsigned char*)"lbarr", {226, 164, 140, 0}},
{(unsigned char*)"lbbrk", {226, 157, 178, 0}},
{(unsigned char*)"lbrace", {123, 0}},
{(unsigned char*)"lbrack", {91, 0}},
{(unsigned char*)"lbrke", {226, 166, 139, 0}},
{(unsigned char*)"lbrksld", {226, 166, 143, 0}},
{(unsigned char*)"lbrkslu", {226, 166, 141, 0}},
{(unsigned char*)"lcaron", {196, 190, 0}},
{(unsigned char*)"lcedil", {196, 188, 0}},
{(unsigned char*)"lceil", {226, 140, 136, 0}},
{(unsigned char*)"lcub", {123, 0}},
{(unsigned char*)"lcy", {208, 187, 0}},
{(unsigned char*)"ldca", {226, 164, 182, 0}},
{(unsigned char*)"ldquo", {226, 128, 156, 0}},
{(unsigned char*)"ldquor", {226, 128, 158, 0}},
{(unsigned char*)"ldrdhar", {226, 165, 167, 0}},
{(unsigned char*)"ldrushar", {226, 165, 139, 0}},
{(unsigned char*)"ldsh", {226, 134, 178, 0}},
{(unsigned char*)"le", {226, 137, 164, 0}},
{(unsigned char*)"leftarrow", {226, 134, 144, 0}},
{(unsigned char*)"leftarrowtail", {226, 134, 162, 0}},
{(unsigned char*)"leftharpoondown", {226, 134, 189, 0}},
{(unsigned char*)"leftharpoonup", {226, 134, 188, 0}},
{(unsigned char*)"leftleftarrows", {226, 135, 135, 0}},
{(unsigned char*)"leftrightarrow", {226, 134, 148, 0}},
{(unsigned char*)"leftrightarrows", {226, 135, 134, 0}},
{(unsigned char*)"leftrightharpoons", {226, 135, 139, 0}},
{(unsigned char*)"leftrightsquigarrow", {226, 134, 173, 0}},
{(unsigned char*)"leftthreetimes", {226, 139, 139, 0}},
{(unsigned char*)"leg", {226, 139, 154, 0}},
{(unsigned char*)"leq", {226, 137, 164, 0}},
{(unsigned char*)"leqq", {226, 137, 166, 0}},
{(unsigned char*)"leqslant", {226, 169, 189, 0}},
{(unsigned char*)"les", {226, 169, 189, 0}},
{(unsigned char*)"lescc", {226, 170, 168, 0}},
{(unsigned char*)"lesdot", {226, 169, 191, 0}},
{(unsigned char*)"lesdoto", {226, 170, 129, 0}},
{(unsigned char*)"lesdotor", {226, 170, 131, 0}},
{(unsigned char*)"lesg", {226, 139, 154, 239, 184, 128, 0}},
{(unsigned char*)"lesges", {226, 170, 147, 0}},
{(unsigned char*)"lessapprox", {226, 170, 133, 0}},
{(unsigned char*)"lessdot", {226, 139, 150, 0}},
{(unsigned char*)"lesseqgtr", {226, 139, 154, 0}},
{(unsigned char*)"lesseqqgtr", {226, 170, 139, 0}},
{(unsigned char*)"lessgtr", {226, 137, 182, 0}},
{(unsigned char*)"lesssim", {226, 137, 178, 0}},
{(unsigned char*)"lfisht", {226, 165, 188, 0}},
{(unsigned char*)"lfloor", {226, 140, 138, 0}},
{(unsigned char*)"lfr", {240, 157, 148, 169, 0}},
{(unsigned char*)"lg", {226, 137, 182, 0}},
{(unsigned char*)"lgE", {226, 170, 145, 0}},
{(unsigned char*)"lhard", {226, 134, 189, 0}},
{(unsigned char*)"lharu", {226, 134, 188, 0}},
{(unsigned char*)"lharul", {226, 165, 170, 0}},
{(unsigned char*)"lhblk", {226, 150, 132, 0}},
{(unsigned char*)"ljcy", {209, 153, 0}},
{(unsigned char*)"ll", {226, 137, 170, 0}},
{(unsigned char*)"llarr", {226, 135, 135, 0}},
{(unsigned char*)"llcorner", {226, 140, 158, 0}},
{(unsigned char*)"llhard", {226, 165, 171, 0}},
{(unsigned char*)"lltri", {226, 151, 186, 0}},
{(unsigned char*)"lmidot", {197, 128, 0}},
{(unsigned char*)"lmoust", {226, 142, 176, 0}},
{(unsigned char*)"lmoustache", {226, 142, 176, 0}},
{(unsigned char*)"lnE", {226, 137, 168, 0}},
{(unsigned char*)"lnap", {226, 170, 137, 0}},
{(unsigned char*)"lnapprox", {226, 170, 137, 0}},
{(unsigned char*)"lne", {226, 170, 135, 0}},
{(unsigned char*)"lneq", {226, 170, 135, 0}},
{(unsigned char*)"lneqq", {226, 137, 168, 0}},
{(unsigned char*)"lnsim", {226, 139, 166, 0}},
{(unsigned char*)"loang", {226, 159, 172, 0}},
{(unsigned char*)"loarr", {226, 135, 189, 0}},
{(unsigned char*)"lobrk", {226, 159, 166, 0}},
{(unsigned char*)"longleftarrow", {226, 159, 181, 0}},
{(unsigned char*)"longleftrightarrow", {226, 159, 183, 0}},
{(unsigned char*)"longmapsto", {226, 159, 188, 0}},
{(unsigned char*)"longrightarrow", {226, 159, 182, 0}},
{(unsigned char*)"looparrowleft", {226, 134, 171, 0}},
{(unsigned char*)"looparrowright", {226, 134, 172, 0}},
{(unsigned char*)"lopar", {226, 166, 133, 0}},
{(unsigned char*)"lopf", {240, 157, 149, 157, 0}},
{(unsigned char*)"loplus", {226, 168, 173, 0}},
{(unsigned char*)"lotimes", {226, 168, 180, 0}},
{(unsigned char*)"lowast", {226, 136, 151, 0}},
{(unsigned char*)"lowbar", {95, 0}},
{(unsigned char*)"loz", {226, 151, 138, 0}},
{(unsigned char*)"lozenge", {226, 151, 138, 0}},
{(unsigned char*)"lozf", {226, 167, 171, 0}},
{(unsigned char*)"lpar", {40, 0}},
{(unsigned char*)"lparlt", {226, 166, 147, 0}},
{(unsigned char*)"lrarr", {226, 135, 134, 0}},
{(unsigned char*)"lrcorner", {226, 140, 159, 0}},
{(unsigned char*)"lrhar", {226, 135, 139, 0}},
{(unsigned char*)"lrhard", {226, 165, 173, 0}},
{(unsigned char*)"lrm", {226, 128, 142, 0}},
{(unsigned char*)"lrtri", {226, 138, 191, 0}},
{(unsigned char*)"lsaquo", {226, 128, 185, 0}},
{(unsigned char*)"lscr", {240, 157, 147, 129, 0}},
{(unsigned char*)"lsh", {226, 134, 176, 0}},
{(unsigned char*)"lsim", {226, 137, 178, 0}},
{(unsigned char*)"lsime", {226, 170, 141, 0}},
{(unsigned char*)"lsimg", {226, 170, 143, 0}},
{(unsigned char*)"lsqb", {91, 0}},
{(unsigned char*)"lsquo", {226, 128, 152, 0}},
{(unsigned char*)"lsquor", {226, 128, 154, 0}},
{(unsigned char*)"lstrok", {197, 130, 0}},
{(unsigned char*)"lt", {60, 0}},
{(unsigned char*)"ltcc", {226, 170, 166, 0}},
{(unsigned char*)"ltcir", {226, 169, 185, 0}},
{(unsigned char*)"ltdot", {226, 139, 150, 0}},
{(unsigned char*)"lthree", {226, 139, 139, 0}},
{(unsigned char*)"ltimes", {226, 139, 137, 0}},
{(unsigned char*)"ltlarr", {226, 165, 182, 0}},
{(unsigned char*)"ltquest", {226, 169, 187, 0}},
{(unsigned char*)"ltrPar", {226, 166, 150, 0}},
{(unsigned char*)"ltri", {226, 151, 131, 0}},
{(unsigned char*)"ltrie", {226, 138, 180, 0}},
{(unsigned char*)"ltrif", {226, 151, 130, 0}},
{(unsigned char*)"lurdshar", {226, 165, 138, 0}},
{(unsigned char*)"luruhar", {226, 165, 166, 0}},
{(unsigned char*)"lvertneqq", {226, 137, 168, 239, 184, 128, 0}},
{(unsigned char*)"lvnE", {226, 137, 168, 239, 184, 128, 0}},
{(unsigned char*)"mDDot", {226, 136, 186, 0}},
{(unsigned char*)"macr", {194, 175, 0}},
{(unsigned char*)"male", {226, 153, 130, 0}},
{(unsigned char*)"malt", {226, 156, 160, 0}},
{(unsigned char*)"maltese", {226, 156, 160, 0}},
{(unsigned char*)"map", {226, 134, 166, 0}},
{(unsigned char*)"mapsto", {226, 134, 166, 0}},
{(unsigned char*)"mapstodown", {226, 134, 167, 0}},
{(unsigned char*)"mapstoleft", {226, 134, 164, 0}},
{(unsigned char*)"mapstoup", {226, 134, 165, 0}},
{(unsigned char*)"marker", {226, 150, 174, 0}},
{(unsigned char*)"mcomma", {226, 168, 169, 0}},
{(unsigned char*)"mcy", {208, 188, 0}},
{(unsigned char*)"mdash", {226, 128, 148, 0}},
{(unsigned char*)"measuredangle", {226, 136, 161, 0}},
{(unsigned char*)"mfr", {240, 157, 148, 170, 0}},
{(unsigned char*)"mho", {226, 132, 167, 0}},
{(unsigned char*)"micro", {194, 181, 0}},
{(unsigned char*)"mid", {226, 136, 163, 0}},
{(unsigned char*)"midast", {42, 0}},
{(unsigned char*)"midcir", {226, 171, 176, 0}},
{(unsigned char*)"middot", {194, 183, 0}},
{(unsigned char*)"minus", {226, 136, 146, 0}},
{(unsigned char*)"minusb", {226, 138, 159, 0}},
{(unsigned char*)"minusd", {226, 136, 184, 0}},
{(unsigned char*)"minusdu", {226, 168, 170, 0}},
{(unsigned char*)"mlcp", {226, 171, 155, 0}},
{(unsigned char*)"mldr", {226, 128, 166, 0}},
{(unsigned char*)"mnplus", {226, 136, 147, 0}},
{(unsigned char*)"models", {226, 138, 167, 0}},
{(unsigned char*)"mopf", {240, 157, 149, 158, 0}},
{(unsigned char*)"mp", {226, 136, 147, 0}},
{(unsigned char*)"mscr", {240, 157, 147, 130, 0}},
{(unsigned char*)"mstpos", {226, 136, 190, 0}},
{(unsigned char*)"mu", {206, 188, 0}},
{(unsigned char*)"multimap", {226, 138, 184, 0}},
{(unsigned char*)"mumap", {226, 138, 184, 0}},
{(unsigned char*)"nGg", {226, 139, 153, 204, 184, 0}},
{(unsigned char*)"nGt", {226, 137, 171, 226, 131, 146, 0}},
{(unsigned char*)"nGtv", {226, 137, 171, 204, 184, 0}},
{(unsigned char*)"nLeftarrow", {226, 135, 141, 0}},
{(unsigned char*)"nLeftrightarrow", {226, 135, 142, 0}},
{(unsigned char*)"nLl", {226, 139, 152, 204, 184, 0}},
{(unsigned char*)"nLt", {226, 137, 170, 226, 131, 146, 0}},
{(unsigned char*)"nLtv", {226, 137, 170, 204, 184, 0}},
{(unsigned char*)"nRightarrow", {226, 135, 143, 0}},
{(unsigned char*)"nVDash", {226, 138, 175, 0}},
{(unsigned char*)"nVdash", {226, 138, 174, 0}},
{(unsigned char*)"nabla", {226, 136, 135, 0}},
{(unsigned char*)"nacute", {197, 132, 0}},
{(unsigned char*)"nang", {226, 136, 160, 226, 131, 146, 0}},
{(unsigned char*)"nap", {226, 137, 137, 0}},
{(unsigned char*)"napE", {226, 169, 176, 204, 184, 0}},
{(unsigned char*)"napid", {226, 137, 139, 204, 184, 0}},
{(unsigned char*)"napos", {197, 137, 0}},
{(unsigned char*)"napprox", {226, 137, 137, 0}},
{(unsigned char*)"natur", {226, 153, 174, 0}},
{(unsigned char*)"natural", {226, 153, 174, 0}},
{(unsigned char*)"naturals", {226, 132, 149, 0}},
{(unsigned char*)"nbsp", {194, 160, 0}},
{(unsigned char*)"nbump", {226, 137, 142, 204, 184, 0}},
{(unsigned char*)"nbumpe", {226, 137, 143, 204, 184, 0}},
{(unsigned char*)"ncap", {226, 169, 131, 0}},
{(unsigned char*)"ncaron", {197, 136, 0}},
{(unsigned char*)"ncedil", {197, 134, 0}},
{(unsigned char*)"ncong", {226, 137, 135, 0}},
{(unsigned char*)"ncongdot", {226, 169, 173, 204, 184, 0}},
{(unsigned char*)"ncup", {226, 169, 130, 0}},
{(unsigned char*)"ncy", {208, 189, 0}},
{(unsigned char*)"ndash", {226, 128, 147, 0}},
{(unsigned char*)"ne", {226, 137, 160, 0}},
{(unsigned char*)"neArr", {226, 135, 151, 0}},
{(unsigned char*)"nearhk", {226, 164, 164, 0}},
{(unsigned char*)"nearr", {226, 134, 151, 0}},
{(unsigned char*)"nearrow", {226, 134, 151, 0}},
{(unsigned char*)"nedot", {226, 137, 144, 204, 184, 0}},
{(unsigned char*)"nequiv", {226, 137, 162, 0}},
{(unsigned char*)"nesear", {226, 164, 168, 0}},
{(unsigned char*)"nesim", {226, 137, 130, 204, 184, 0}},
{(unsigned char*)"nexist", {226, 136, 132, 0}},
{(unsigned char*)"nexists", {226, 136, 132, 0}},
{(unsigned char*)"nfr", {240, 157, 148, 171, 0}},
{(unsigned char*)"ngE", {226, 137, 167, 204, 184, 0}},
{(unsigned char*)"nge", {226, 137, 177, 0}},
{(unsigned char*)"ngeq", {226, 137, 177, 0}},
{(unsigned char*)"ngeqq", {226, 137, 167, 204, 184, 0}},
{(unsigned char*)"ngeqslant", {226, 169, 190, 204, 184, 0}},
{(unsigned char*)"nges", {226, 169, 190, 204, 184, 0}},
{(unsigned char*)"ngsim", {226, 137, 181, 0}},
{(unsigned char*)"ngt", {226, 137, 175, 0}},
{(unsigned char*)"ngtr", {226, 137, 175, 0}},
{(unsigned char*)"nhArr", {226, 135, 142, 0}},
{(unsigned char*)"nharr", {226, 134, 174, 0}},
{(unsigned char*)"nhpar", {226, 171, 178, 0}},
{(unsigned char*)"ni", {226, 136, 139, 0}},
{(unsigned char*)"nis", {226, 139, 188, 0}},
{(unsigned char*)"nisd", {226, 139, 186, 0}},
{(unsigned char*)"niv", {226, 136, 139, 0}},
{(unsigned char*)"njcy", {209, 154, 0}},
{(unsigned char*)"nlArr", {226, 135, 141, 0}},
{(unsigned char*)"nlE", {226, 137, 166, 204, 184, 0}},
{(unsigned char*)"nlarr", {226, 134, 154, 0}},
{(unsigned char*)"nldr", {226, 128, 165, 0}},
{(unsigned char*)"nle", {226, 137, 176, 0}},
{(unsigned char*)"nleftarrow", {226, 134, 154, 0}},
{(unsigned char*)"nleftrightarrow", {226, 134, 174, 0}},
{(unsigned char*)"nleq", {226, 137, 176, 0}},
{(unsigned char*)"nleqq", {226, 137, 166, 204, 184, 0}},
{(unsigned char*)"nleqslant", {226, 169, 189, 204, 184, 0}},
{(unsigned char*)"nles", {226, 169, 189, 204, 184, 0}},
{(unsigned char*)"nless", {226, 137, 174, 0}},
{(unsigned char*)"nlsim", {226, 137, 180, 0}},
{(unsigned char*)"nlt", {226, 137, 174, 0}},
{(unsigned char*)"nltri", {226, 139, 170, 0}},
{(unsigned char*)"nltrie", {226, 139, 172, 0}},
{(unsigned char*)"nmid", {226, 136, 164, 0}},
{(unsigned char*)"nopf", {240, 157, 149, 159, 0}},
{(unsigned char*)"not", {194, 172, 0}},
{(unsigned char*)"notin", {226, 136, 137, 0}},
{(unsigned char*)"notinE", {226, 139, 185, 204, 184, 0}},
{(unsigned char*)"notindot", {226, 139, 181, 204, 184, 0}},
{(unsigned char*)"notinva", {226, 136, 137, 0}},
{(unsigned char*)"notinvb", {226, 139, 183, 0}},
{(unsigned char*)"notinvc", {226, 139, 182, 0}},
{(unsigned char*)"notni", {226, 136, 140, 0}},
{(unsigned char*)"notniva", {226, 136, 140, 0}},
{(unsigned char*)"notnivb", {226, 139, 190, 0}},
{(unsigned char*)"notnivc", {226, 139, 189, 0}},
{(unsigned char*)"npar", {226, 136, 166, 0}},
{(unsigned char*)"nparallel", {226, 136, 166, 0}},
{(unsigned char*)"nparsl", {226, 171, 189, 226, 131, 165, 0}},
{(unsigned char*)"npart", {226, 136, 130, 204, 184, 0}},
{(unsigned char*)"npolint", {226, 168, 148, 0}},
{(unsigned char*)"npr", {226, 138, 128, 0}},
{(unsigned char*)"nprcue", {226, 139, 160, 0}},
{(unsigned char*)"npre", {226, 170, 175, 204, 184, 0}},
{(unsigned char*)"nprec", {226, 138, 128, 0}},
{(unsigned char*)"npreceq", {226, 170, 175, 204, 184, 0}},
{(unsigned char*)"nrArr", {226, 135, 143, 0}},
{(unsigned char*)"nrarr", {226, 134, 155, 0}},
{(unsigned char*)"nrarrc", {226, 164, 179, 204, 184, 0}},
{(unsigned char*)"nrarrw", {226, 134, 157, 204, 184, 0}},
{(unsigned char*)"nrightarrow", {226, 134, 155, 0}},
{(unsigned char*)"nrtri", {226, 139, 171, 0}},
{(unsigned char*)"nrtrie", {226, 139, 173, 0}},
{(unsigned char*)"nsc", {226, 138, 129, 0}},
{(unsigned char*)"nsccue", {226, 139, 161, 0}},
{(unsigned char*)"nsce", {226, 170, 176, 204, 184, 0}},
{(unsigned char*)"nscr", {240, 157, 147, 131, 0}},
{(unsigned char*)"nshortmid", {226, 136, 164, 0}},
{(unsigned char*)"nshortparallel", {226, 136, 166, 0}},
{(unsigned char*)"nsim", {226, 137, 129, 0}},
{(unsigned char*)"nsime", {226, 137, 132, 0}},
{(unsigned char*)"nsimeq", {226, 137, 132, 0}},
{(unsigned char*)"nsmid", {226, 136, 164, 0}},
{(unsigned char*)"nspar", {226, 136, 166, 0}},
{(unsigned char*)"nsqsube", {226, 139, 162, 0}},
{(unsigned char*)"nsqsupe", {226, 139, 163, 0}},
{(unsigned char*)"nsub", {226, 138, 132, 0}},
{(unsigned char*)"nsubE", {226, 171, 133, 204, 184, 0}},
{(unsigned char*)"nsube", {226, 138, 136, 0}},
{(unsigned char*)"nsubset", {226, 138, 130, 226, 131, 146, 0}},
{(unsigned char*)"nsubseteq", {226, 138, 136, 0}},
{(unsigned char*)"nsubseteqq", {226, 171, 133, 204, 184, 0}},
{(unsigned char*)"nsucc", {226, 138, 129, 0}},
{(unsigned char*)"nsucceq", {226, 170, 176, 204, 184, 0}},
{(unsigned char*)"nsup", {226, 138, 133, 0}},
{(unsigned char*)"nsupE", {226, 171, 134, 204, 184, 0}},
{(unsigned char*)"nsupe", {226, 138, 137, 0}},
{(unsigned char*)"nsupset", {226, 138, 131, 226, 131, 146, 0}},
{(unsigned char*)"nsupseteq", {226, 138, 137, 0}},
{(unsigned char*)"nsupseteqq", {226, 171, 134, 204, 184, 0}},
{(unsigned char*)"ntgl", {226, 137, 185, 0}},
{(unsigned char*)"ntilde", {195, 177, 0}},
{(unsigned char*)"ntlg", {226, 137, 184, 0}},
{(unsigned char*)"ntriangleleft", {226, 139, 170, 0}},
{(unsigned char*)"ntrianglelefteq", {226, 139, 172, 0}},
{(unsigned char*)"ntriangleright", {226, 139, 171, 0}},
{(unsigned char*)"ntrianglerighteq", {226, 139, 173, 0}},
{(unsigned char*)"nu", {206, 189, 0}},
{(unsigned char*)"num", {35, 0}},
{(unsigned char*)"numero", {226, 132, 150, 0}},
{(unsigned char*)"numsp", {226, 128, 135, 0}},
{(unsigned char*)"nvDash", {226, 138, 173, 0}},
{(unsigned char*)"nvHarr", {226, 164, 132, 0}},
{(unsigned char*)"nvap", {226, 137, 141, 226, 131, 146, 0}},
{(unsigned char*)"nvdash", {226, 138, 172, 0}},
{(unsigned char*)"nvge", {226, 137, 165, 226, 131, 146, 0}},
{(unsigned char*)"nvgt", {62, 226, 131, 146, 0}},
{(unsigned char*)"nvinfin", {226, 167, 158, 0}},
{(unsigned char*)"nvlArr", {226, 164, 130, 0}},
{(unsigned char*)"nvle", {226, 137, 164, 226, 131, 146, 0}},
{(unsigned char*)"nvlt", {60, 226, 131, 146, 0}},
{(unsigned char*)"nvltrie", {226, 138, 180, 226, 131, 146, 0}},
{(unsigned char*)"nvrArr", {226, 164, 131, 0}},
{(unsigned char*)"nvrtrie", {226, 138, 181, 226, 131, 146, 0}},
{(unsigned char*)"nvsim", {226, 136, 188, 226, 131, 146, 0}},
{(unsigned char*)"nwArr", {226, 135, 150, 0}},
{(unsigned char*)"nwarhk", {226, 164, 163, 0}},
{(unsigned char*)"nwarr", {226, 134, 150, 0}},
{(unsigned char*)"nwarrow", {226, 134, 150, 0}},
{(unsigned char*)"nwnear", {226, 164, 167, 0}},
{(unsigned char*)"oS", {226, 147, 136, 0}},
{(unsigned char*)"oacute", {195, 179, 0}},
{(unsigned char*)"oast", {226, 138, 155, 0}},
{(unsigned char*)"ocir", {226, 138, 154, 0}},
{(unsigned char*)"ocirc", {195, 180, 0}},
{(unsigned char*)"ocy", {208, 190, 0}},
{(unsigned char*)"odash", {226, 138, 157, 0}},
{(unsigned char*)"odblac", {197, 145, 0}},
{(unsigned char*)"odiv", {226, 168, 184, 0}},
{(unsigned char*)"odot", {226, 138, 153, 0}},
{(unsigned char*)"odsold", {226, 166, 188, 0}},
{(unsigned char*)"oelig", {197, 147, 0}},
{(unsigned char*)"ofcir", {226, 166, 191, 0}},
{(unsigned char*)"ofr", {240, 157, 148, 172, 0}},
{(unsigned char*)"ogon", {203, 155, 0}},
{(unsigned char*)"ograve", {195, 178, 0}},
{(unsigned char*)"ogt", {226, 167, 129, 0}},
{(unsigned char*)"ohbar", {226, 166, 181, 0}},
{(unsigned char*)"ohm", {206, 169, 0}},
{(unsigned char*)"oint", {226, 136, 174, 0}},
{(unsigned char*)"olarr", {226, 134, 186, 0}},
{(unsigned char*)"olcir", {226, 166, 190, 0}},
{(unsigned char*)"olcross", {226, 166, 187, 0}},
{(unsigned char*)"oline", {226, 128, 190, 0}},
{(unsigned char*)"olt", {226, 167, 128, 0}},
{(unsigned char*)"omacr", {197, 141, 0}},
{(unsigned char*)"omega", {207, 137, 0}},
{(unsigned char*)"omicron", {206, 191, 0}},
{(unsigned char*)"omid", {226, 166, 182, 0}},
{(unsigned char*)"ominus", {226, 138, 150, 0}},
{(unsigned char*)"oopf", {240, 157, 149, 160, 0}},
{(unsigned char*)"opar", {226, 166, 183, 0}},
{(unsigned char*)"operp", {226, 166, 185, 0}},
{(unsigned char*)"oplus", {226, 138, 149, 0}},
{(unsigned char*)"or", {226, 136, 168, 0}},
{(unsigned char*)"orarr", {226, 134, 187, 0}},
{(unsigned char*)"ord", {226, 169, 157, 0}},
{(unsigned char*)"order", {226, 132, 180, 0}},
{(unsigned char*)"orderof", {226, 132, 180, 0}},
{(unsigned char*)"ordf", {194, 170, 0}},
{(unsigned char*)"ordm", {194, 186, 0}},
{(unsigned char*)"origof", {226, 138, 182, 0}},
{(unsigned char*)"oror", {226, 169, 150, 0}},
{(unsigned char*)"orslope", {226, 169, 151, 0}},
{(unsigned char*)"orv", {226, 169, 155, 0}},
{(unsigned char*)"oscr", {226, 132, 180, 0}},
{(unsigned char*)"oslash", {195, 184, 0}},
{(unsigned char*)"osol", {226, 138, 152, 0}},
{(unsigned char*)"otilde", {195, 181, 0}},
{(unsigned char*)"otimes", {226, 138, 151, 0}},
{(unsigned char*)"otimesas", {226, 168, 182, 0}},
{(unsigned char*)"ouml", {195, 182, 0}},
{(unsigned char*)"ovbar", {226, 140, 189, 0}},
{(unsigned char*)"par", {226, 136, 165, 0}},
{(unsigned char*)"para", {194, 182, 0}},
{(unsigned char*)"parallel", {226, 136, 165, 0}},
{(unsigned char*)"parsim", {226, 171, 179, 0}},
{(unsigned char*)"parsl", {226, 171, 189, 0}},
{(unsigned char*)"part", {226, 136, 130, 0}},
{(unsigned char*)"pcy", {208, 191, 0}},
{(unsigned char*)"percnt", {37, 0}},
{(unsigned char*)"period", {46, 0}},
{(unsigned char*)"permil", {226, 128, 176, 0}},
{(unsigned char*)"perp", {226, 138, 165, 0}},
{(unsigned char*)"pertenk", {226, 128, 177, 0}},
{(unsigned char*)"pfr", {240, 157, 148, 173, 0}},
{(unsigned char*)"phi", {207, 134, 0}},
{(unsigned char*)"phiv", {207, 149, 0}},
{(unsigned char*)"phmmat", {226, 132, 179, 0}},
{(unsigned char*)"phone", {226, 152, 142, 0}},
{(unsigned char*)"pi", {207, 128, 0}},
{(unsigned char*)"pitchfork", {226, 139, 148, 0}},
{(unsigned char*)"piv", {207, 150, 0}},
{(unsigned char*)"planck", {226, 132, 143, 0}},
{(unsigned char*)"planckh", {226, 132, 142, 0}},
{(unsigned char*)"plankv", {226, 132, 143, 0}},
{(unsigned char*)"plus", {43, 0}},
{(unsigned char*)"plusacir", {226, 168, 163, 0}},
{(unsigned char*)"plusb", {226, 138, 158, 0}},
{(unsigned char*)"pluscir", {226, 168, 162, 0}},
{(unsigned char*)"plusdo", {226, 136, 148, 0}},
{(unsigned char*)"plusdu", {226, 168, 165, 0}},
{(unsigned char*)"pluse", {226, 169, 178, 0}},
{(unsigned char*)"plusmn", {194, 177, 0}},
{(unsigned char*)"plussim", {226, 168, 166, 0}},
{(unsigned char*)"plustwo", {226, 168, 167, 0}},
{(unsigned char*)"pm", {194, 177, 0}},
{(unsigned char*)"pointint", {226, 168, 149, 0}},
{(unsigned char*)"popf", {240, 157, 149, 161, 0}},
{(unsigned char*)"pound", {194, 163, 0}},
{(unsigned char*)"pr", {226, 137, 186, 0}},
{(unsigned char*)"prE", {226, 170, 179, 0}},
{(unsigned char*)"prap", {226, 170, 183, 0}},
{(unsigned char*)"prcue", {226, 137, 188, 0}},
{(unsigned char*)"pre", {226, 170, 175, 0}},
{(unsigned char*)"prec", {226, 137, 186, 0}},
{(unsigned char*)"precapprox", {226, 170, 183, 0}},
{(unsigned char*)"preccurlyeq", {226, 137, 188, 0}},
{(unsigned char*)"preceq", {226, 170, 175, 0}},
{(unsigned char*)"precnapprox", {226, 170, 185, 0}},
{(unsigned char*)"precneqq", {226, 170, 181, 0}},
{(unsigned char*)"precnsim", {226, 139, 168, 0}},
{(unsigned char*)"precsim", {226, 137, 190, 0}},
{(unsigned char*)"prime", {226, 128, 178, 0}},
{(unsigned char*)"primes", {226, 132, 153, 0}},
{(unsigned char*)"prnE", {226, 170, 181, 0}},
{(unsigned char*)"prnap", {226, 170, 185, 0}},
{(unsigned char*)"prnsim", {226, 139, 168, 0}},
{(unsigned char*)"prod", {226, 136, 143, 0}},
{(unsigned char*)"profalar", {226, 140, 174, 0}},
{(unsigned char*)"profline", {226, 140, 146, 0}},
{(unsigned char*)"profsurf", {226, 140, 147, 0}},
{(unsigned char*)"prop", {226, 136, 157, 0}},
{(unsigned char*)"propto", {226, 136, 157, 0}},
{(unsigned char*)"prsim", {226, 137, 190, 0}},
{(unsigned char*)"prurel", {226, 138, 176, 0}},
{(unsigned char*)"pscr", {240, 157, 147, 133, 0}},
{(unsigned char*)"psi", {207, 136, 0}},
{(unsigned char*)"puncsp", {226, 128, 136, 0}},
{(unsigned char*)"qfr", {240, 157, 148, 174, 0}},
{(unsigned char*)"qint", {226, 168, 140, 0}},
{(unsigned char*)"qopf", {240, 157, 149, 162, 0}},
{(unsigned char*)"qprime", {226, 129, 151, 0}},
{(unsigned char*)"qscr", {240, 157, 147, 134, 0}},
{(unsigned char*)"quaternions", {226, 132, 141, 0}},
{(unsigned char*)"quatint", {226, 168, 150, 0}},
{(unsigned char*)"quest", {63, 0}},
{(unsigned char*)"questeq", {226, 137, 159, 0}},
{(unsigned char*)"quot", {34, 0}},
{(unsigned char*)"rAarr", {226, 135, 155, 0}},
{(unsigned char*)"rArr", {226, 135, 146, 0}},
{(unsigned char*)"rAtail", {226, 164, 156, 0}},
{(unsigned char*)"rBarr", {226, 164, 143, 0}},
{(unsigned char*)"rHar", {226, 165, 164, 0}},
{(unsigned char*)"race", {226, 136, 189, 204, 177, 0}},
{(unsigned char*)"racute", {197, 149, 0}},
{(unsigned char*)"radic", {226, 136, 154, 0}},
{(unsigned char*)"raemptyv", {226, 166, 179, 0}},
{(unsigned char*)"rang", {226, 159, 169, 0}},
{(unsigned char*)"rangd", {226, 166, 146, 0}},
{(unsigned char*)"range", {226, 166, 165, 0}},
{(unsigned char*)"rangle", {226, 159, 169, 0}},
{(unsigned char*)"raquo", {194, 187, 0}},
{(unsigned char*)"rarr", {226, 134, 146, 0}},
{(unsigned char*)"rarrap", {226, 165, 181, 0}},
{(unsigned char*)"rarrb", {226, 135, 165, 0}},
{(unsigned char*)"rarrbfs", {226, 164, 160, 0}},
{(unsigned char*)"rarrc", {226, 164, 179, 0}},
{(unsigned char*)"rarrfs", {226, 164, 158, 0}},
{(unsigned char*)"rarrhk", {226, 134, 170, 0}},
{(unsigned char*)"rarrlp", {226, 134, 172, 0}},
{(unsigned char*)"rarrpl", {226, 165, 133, 0}},
{(unsigned char*)"rarrsim", {226, 165, 180, 0}},
{(unsigned char*)"rarrtl", {226, 134, 163, 0}},
{(unsigned char*)"rarrw", {226, 134, 157, 0}},
{(unsigned char*)"ratail", {226, 164, 154, 0}},
{(unsigned char*)"ratio", {226, 136, 182, 0}},
{(unsigned char*)"rationals", {226, 132, 154, 0}},
{(unsigned char*)"rbarr", {226, 164, 141, 0}},
{(unsigned char*)"rbbrk", {226, 157, 179, 0}},
{(unsigned char*)"rbrace", {125, 0}},
{(unsigned char*)"rbrack", {93, 0}},
{(unsigned char*)"rbrke", {226, 166, 140, 0}},
{(unsigned char*)"rbrksld", {226, 166, 142, 0}},
{(unsigned char*)"rbrkslu", {226, 166, 144, 0}},
{(unsigned char*)"rcaron", {197, 153, 0}},
{(unsigned char*)"rcedil", {197, 151, 0}},
{(unsigned char*)"rceil", {226, 140, 137, 0}},
{(unsigned char*)"rcub", {125, 0}},
{(unsigned char*)"rcy", {209, 128, 0}},
{(unsigned char*)"rdca", {226, 164, 183, 0}},
{(unsigned char*)"rdldhar", {226, 165, 169, 0}},
{(unsigned char*)"rdquo", {226, 128, 157, 0}},
{(unsigned char*)"rdquor", {226, 128, 157, 0}},
{(unsigned char*)"rdsh", {226, 134, 179, 0}},
{(unsigned char*)"real", {226, 132, 156, 0}},
{(unsigned char*)"realine", {226, 132, 155, 0}},
{(unsigned char*)"realpart", {226, 132, 156, 0}},
{(unsigned char*)"reals", {226, 132, 157, 0}},
{(unsigned char*)"rect", {226, 150, 173, 0}},
{(unsigned char*)"reg", {194, 174, 0}},
{(unsigned char*)"rfisht", {226, 165, 189, 0}},
{(unsigned char*)"rfloor", {226, 140, 139, 0}},
{(unsigned char*)"rfr", {240, 157, 148, 175, 0}},
{(unsigned char*)"rhard", {226, 135, 129, 0}},
{(unsigned char*)"rharu", {226, 135, 128, 0}},
{(unsigned char*)"rharul", {226, 165, 172, 0}},
{(unsigned char*)"rho", {207, 129, 0}},
{(unsigned char*)"rhov", {207, 177, 0}},
{(unsigned char*)"rightarrow", {226, 134, 146, 0}},
{(unsigned char*)"rightarrowtail", {226, 134, 163, 0}},
{(unsigned char*)"rightharpoondown", {226, 135, 129, 0}},
{(unsigned char*)"rightharpoonup", {226, 135, 128, 0}},
{(unsigned char*)"rightleftarrows", {226, 135, 132, 0}},
{(unsigned char*)"rightleftharpoons", {226, 135, 140, 0}},
{(unsigned char*)"rightrightarrows", {226, 135, 137, 0}},
{(unsigned char*)"rightsquigarrow", {226, 134, 157, 0}},
{(unsigned char*)"rightthreetimes", {226, 139, 140, 0}},
{(unsigned char*)"ring", {203, 154, 0}},
{(unsigned char*)"risingdotseq", {226, 137, 147, 0}},
{(unsigned char*)"rlarr", {226, 135, 132, 0}},
{(unsigned char*)"rlhar", {226, 135, 140, 0}},
{(unsigned char*)"rlm", {226, 128, 143, 0}},
{(unsigned char*)"rmoust", {226, 142, 177, 0}},
{(unsigned char*)"rmoustache", {226, 142, 177, 0}},
{(unsigned char*)"rnmid", {226, 171, 174, 0}},
{(unsigned char*)"roang", {226, 159, 173, 0}},
{(unsigned char*)"roarr", {226, 135, 190, 0}},
{(unsigned char*)"robrk", {226, 159, 167, 0}},
{(unsigned char*)"ropar", {226, 166, 134, 0}},
{(unsigned char*)"ropf", {240, 157, 149, 163, 0}},
{(unsigned char*)"roplus", {226, 168, 174, 0}},
{(unsigned char*)"rotimes", {226, 168, 181, 0}},
{(unsigned char*)"rpar", {41, 0}},
{(unsigned char*)"rpargt", {226, 166, 148, 0}},
{(unsigned char*)"rppolint", {226, 168, 146, 0}},
{(unsigned char*)"rrarr", {226, 135, 137, 0}},
{(unsigned char*)"rsaquo", {226, 128, 186, 0}},
{(unsigned char*)"rscr", {240, 157, 147, 135, 0}},
{(unsigned char*)"rsh", {226, 134, 177, 0}},
{(unsigned char*)"rsqb", {93, 0}},
{(unsigned char*)"rsquo", {226, 128, 153, 0}},
{(unsigned char*)"rsquor", {226, 128, 153, 0}},
{(unsigned char*)"rthree", {226, 139, 140, 0}},
{(unsigned char*)"rtimes", {226, 139, 138, 0}},
{(unsigned char*)"rtri", {226, 150, 185, 0}},
{(unsigned char*)"rtrie", {226, 138, 181, 0}},
{(unsigned char*)"rtrif", {226, 150, 184, 0}},
{(unsigned char*)"rtriltri", {226, 167, 142, 0}},
{(unsigned char*)"ruluhar", {226, 165, 168, 0}},
{(unsigned char*)"rx", {226, 132, 158, 0}},
{(unsigned char*)"sacute", {197, 155, 0}},
{(unsigned char*)"sbquo", {226, 128, 154, 0}},
{(unsigned char*)"sc", {226, 137, 187, 0}},
{(unsigned char*)"scE", {226, 170, 180, 0}},
{(unsigned char*)"scap", {226, 170, 184, 0}},
{(unsigned char*)"scaron", {197, 161, 0}},
{(unsigned char*)"sccue", {226, 137, 189, 0}},
{(unsigned char*)"sce", {226, 170, 176, 0}},
{(unsigned char*)"scedil", {197, 159, 0}},
{(unsigned char*)"scirc", {197, 157, 0}},
{(unsigned char*)"scnE", {226, 170, 182, 0}},
{(unsigned char*)"scnap", {226, 170, 186, 0}},
{(unsigned char*)"scnsim", {226, 139, 169, 0}},
{(unsigned char*)"scpolint", {226, 168, 147, 0}},
{(unsigned char*)"scsim", {226, 137, 191, 0}},
{(unsigned char*)"scy", {209, 129, 0}},
{(unsigned char*)"sdot", {226, 139, 133, 0}},
{(unsigned char*)"sdotb", {226, 138, 161, 0}},
{(unsigned char*)"sdote", {226, 169, 166, 0}},
{(unsigned char*)"seArr", {226, 135, 152, 0}},
{(unsigned char*)"searhk", {226, 164, 165, 0}},
{(unsigned char*)"searr", {226, 134, 152, 0}},
{(unsigned char*)"searrow", {226, 134, 152, 0}},
{(unsigned char*)"sect", {194, 167, 0}},
{(unsigned char*)"semi", {59, 0}},
{(unsigned char*)"seswar", {226, 164, 169, 0}},
{(unsigned char*)"setminus", {226, 136, 150, 0}},
{(unsigned char*)"setmn", {226, 136, 150, 0}},
{(unsigned char*)"sext", {226, 156, 182, 0}},
{(unsigned char*)"sfr", {240, 157, 148, 176, 0}},
{(unsigned char*)"sfrown", {226, 140, 162, 0}},
{(unsigned char*)"sharp", {226, 153, 175, 0}},
{(unsigned char*)"shchcy", {209, 137, 0}},
{(unsigned char*)"shcy", {209, 136, 0}},
{(unsigned char*)"shortmid", {226, 136, 163, 0}},
{(unsigned char*)"shortparallel", {226, 136, 165, 0}},
{(unsigned char*)"shy", {194, 173, 0}},
{(unsigned char*)"sigma", {207, 131, 0}},
{(unsigned char*)"sigmaf", {207, 130, 0}},
{(unsigned char*)"sigmav", {207, 130, 0}},
{(unsigned char*)"sim", {226, 136, 188, 0}},
{(unsigned char*)"simdot", {226, 169, 170, 0}},
{(unsigned char*)"sime", {226, 137, 131, 0}},
{(unsigned char*)"simeq", {226, 137, 131, 0}},
{(unsigned char*)"simg", {226, 170, 158, 0}},
{(unsigned char*)"simgE", {226, 170, 160, 0}},
{(unsigned char*)"siml", {226, 170, 157, 0}},
{(unsigned char*)"simlE", {226, 170, 159, 0}},
{(unsigned char*)"simne", {226, 137, 134, 0}},
{(unsigned char*)"simplus", {226, 168, 164, 0}},
{(unsigned char*)"simrarr", {226, 165, 178, 0}},
{(unsigned char*)"slarr", {226, 134, 144, 0}},
{(unsigned char*)"smallsetminus", {226, 136, 150, 0}},
{(unsigned char*)"smashp", {226, 168, 179, 0}},
{(unsigned char*)"smeparsl", {226, 167, 164, 0}},
{(unsigned char*)"smid", {226, 136, 163, 0}},
{(unsigned char*)"smile", {226, 140, 163, 0}},
{(unsigned char*)"smt", {226, 170, 170, 0}},
{(unsigned char*)"smte", {226, 170, 172, 0}},
{(unsigned char*)"smtes", {226, 170, 172, 239, 184, 128, 0}},
{(unsigned char*)"softcy", {209, 140, 0}},
{(unsigned char*)"sol", {47, 0}},
{(unsigned char*)"solb", {226, 167, 132, 0}},
{(unsigned char*)"solbar", {226, 140, 191, 0}},
{(unsigned char*)"sopf", {240, 157, 149, 164, 0}},
{(unsigned char*)"spades", {226, 153, 160, 0}},
{(unsigned char*)"spadesuit", {226, 153, 160, 0}},
{(unsigned char*)"spar", {226, 136, 165, 0}},
{(unsigned char*)"sqcap", {226, 138, 147, 0}},
{(unsigned char*)"sqcaps", {226, 138, 147, 239, 184, 128, 0}},
{(unsigned char*)"sqcup", {226, 138, 148, 0}},
{(unsigned char*)"sqcups", {226, 138, 148, 239, 184, 128, 0}},
{(unsigned char*)"sqsub", {226, 138, 143, 0}},
{(unsigned char*)"sqsube", {226, 138, 145, 0}},
{(unsigned char*)"sqsubset", {226, 138, 143, 0}},
{(unsigned char*)"sqsubseteq", {226, 138, 145, 0}},
{(unsigned char*)"sqsup", {226, 138, 144, 0}},
{(unsigned char*)"sqsupe", {226, 138, 146, 0}},
{(unsigned char*)"sqsupset", {226, 138, 144, 0}},
{(unsigned char*)"sqsupseteq", {226, 138, 146, 0}},
{(unsigned char*)"squ", {226, 150, 161, 0}},
{(unsigned char*)"square", {226, 150, 161, 0}},
{(unsigned char*)"squarf", {226, 150, 170, 0}},
{(unsigned char*)"squf", {226, 150, 170, 0}},
{(unsigned char*)"srarr", {226, 134, 146, 0}},
{(unsigned char*)"sscr", {240, 157, 147, 136, 0}},
{(unsigned char*)"ssetmn", {226, 136, 150, 0}},
{(unsigned char*)"ssmile", {226, 140, 163, 0}},
{(unsigned char*)"sstarf", {226, 139, 134, 0}},
{(unsigned char*)"star", {226, 152, 134, 0}},
{(unsigned char*)"starf", {226, 152, 133, 0}},
{(unsigned char*)"straightepsilon", {207, 181, 0}},
{(unsigned char*)"straightphi", {207, 149, 0}},
{(unsigned char*)"strns", {194, 175, 0}},
{(unsigned char*)"sub", {226, 138, 130, 0}},
{(unsigned char*)"subE", {226, 171, 133, 0}},
{(unsigned char*)"subdot", {226, 170, 189, 0}},
{(unsigned char*)"sube", {226, 138, 134, 0}},
{(unsigned char*)"subedot", {226, 171, 131, 0}},
{(unsigned char*)"submult", {226, 171, 129, 0}},
{(unsigned char*)"subnE", {226, 171, 139, 0}},
{(unsigned char*)"subne", {226, 138, 138, 0}},
{(unsigned char*)"subplus", {226, 170, 191, 0}},
{(unsigned char*)"subrarr", {226, 165, 185, 0}},
{(unsigned char*)"subset", {226, 138, 130, 0}},
{(unsigned char*)"subseteq", {226, 138, 134, 0}},
{(unsigned char*)"subseteqq", {226, 171, 133, 0}},
{(unsigned char*)"subsetneq", {226, 138, 138, 0}},
{(unsigned char*)"subsetneqq", {226, 171, 139, 0}},
{(unsigned char*)"subsim", {226, 171, 135, 0}},
{(unsigned char*)"subsub", {226, 171, 149, 0}},
{(unsigned char*)"subsup", {226, 171, 147, 0}},
{(unsigned char*)"succ", {226, 137, 187, 0}},
{(unsigned char*)"succapprox", {226, 170, 184, 0}},
{(unsigned char*)"succcurlyeq", {226, 137, 189, 0}},
{(unsigned char*)"succeq", {226, 170, 176, 0}},
{(unsigned char*)"succnapprox", {226, 170, 186, 0}},
{(unsigned char*)"succneqq", {226, 170, 182, 0}},
{(unsigned char*)"succnsim", {226, 139, 169, 0}},
{(unsigned char*)"succsim", {226, 137, 191, 0}},
{(unsigned char*)"sum", {226, 136, 145, 0}},
{(unsigned char*)"sung", {226, 153, 170, 0}},
{(unsigned char*)"sup", {226, 138, 131, 0}},
{(unsigned char*)"sup1", {194, 185, 0}},
{(unsigned char*)"sup2", {194, 178, 0}},
{(unsigned char*)"sup3", {194, 179, 0}},
{(unsigned char*)"supE", {226, 171, 134, 0}},
{(unsigned char*)"supdot", {226, 170, 190, 0}},
{(unsigned char*)"supdsub", {226, 171, 152, 0}},
{(unsigned char*)"supe", {226, 138, 135, 0}},
{(unsigned char*)"supedot", {226, 171, 132, 0}},
{(unsigned char*)"suphsol", {226, 159, 137, 0}},
{(unsigned char*)"suphsub", {226, 171, 151, 0}},
{(unsigned char*)"suplarr", {226, 165, 187, 0}},
{(unsigned char*)"supmult", {226, 171, 130, 0}},
{(unsigned char*)"supnE", {226, 171, 140, 0}},
{(unsigned char*)"supne", {226, 138, 139, 0}},
{(unsigned char*)"supplus", {226, 171, 128, 0}},
{(unsigned char*)"supset", {226, 138, 131, 0}},
{(unsigned char*)"supseteq", {226, 138, 135, 0}},
{(unsigned char*)"supseteqq", {226, 171, 134, 0}},
{(unsigned char*)"supsetneq", {226, 138, 139, 0}},
{(unsigned char*)"supsetneqq", {226, 171, 140, 0}},
{(unsigned char*)"supsim", {226, 171, 136, 0}},
{(unsigned char*)"supsub", {226, 171, 148, 0}},
{(unsigned char*)"supsup", {226, 171, 150, 0}},
{(unsigned char*)"swArr", {226, 135, 153, 0}},
{(unsigned char*)"swarhk", {226, 164, 166, 0}},
{(unsigned char*)"swarr", {226, 134, 153, 0}},
{(unsigned char*)"swarrow", {226, 134, 153, 0}},
{(unsigned char*)"swnwar", {226, 164, 170, 0}},
{(unsigned char*)"szlig", {195, 159, 0}},
{(unsigned char*)"target", {226, 140, 150, 0}},
{(unsigned char*)"tau", {207, 132, 0}},
{(unsigned char*)"tbrk", {226, 142, 180, 0}},
{(unsigned char*)"tcaron", {197, 165, 0}},
{(unsigned char*)"tcedil", {197, 163, 0}},
{(unsigned char*)"tcy", {209, 130, 0}},
{(unsigned char*)"tdot", {226, 131, 155, 0}},
{(unsigned char*)"telrec", {226, 140, 149, 0}},
{(unsigned char*)"tfr", {240, 157, 148, 177, 0}},
{(unsigned char*)"there4", {226, 136, 180, 0}},
{(unsigned char*)"therefore", {226, 136, 180, 0}},
{(unsigned char*)"theta", {206, 184, 0}},
{(unsigned char*)"thetasym", {207, 145, 0}},
{(unsigned char*)"thetav", {207, 145, 0}},
{(unsigned char*)"thickapprox", {226, 137, 136, 0}},
{(unsigned char*)"thicksim", {226, 136, 188, 0}},
{(unsigned char*)"thinsp", {226, 128, 137, 0}},
{(unsigned char*)"thkap", {226, 137, 136, 0}},
{(unsigned char*)"thksim", {226, 136, 188, 0}},
{(unsigned char*)"thorn", {195, 190, 0}},
{(unsigned char*)"tilde", {203, 156, 0}},
{(unsigned char*)"times", {195, 151, 0}},
{(unsigned char*)"timesb", {226, 138, 160, 0}},
{(unsigned char*)"timesbar", {226, 168, 177, 0}},
{(unsigned char*)"timesd", {226, 168, 176, 0}},
{(unsigned char*)"tint", {226, 136, 173, 0}},
{(unsigned char*)"toea", {226, 164, 168, 0}},
{(unsigned char*)"top", {226, 138, 164, 0}},
{(unsigned char*)"topbot", {226, 140, 182, 0}},
{(unsigned char*)"topcir", {226, 171, 177, 0}},
{(unsigned char*)"topf", {240, 157, 149, 165, 0}},
{(unsigned char*)"topfork", {226, 171, 154, 0}},
{(unsigned char*)"tosa", {226, 164, 169, 0}},
{(unsigned char*)"tprime", {226, 128, 180, 0}},
{(unsigned char*)"trade", {226, 132, 162, 0}},
{(unsigned char*)"triangle", {226, 150, 181, 0}},
{(unsigned char*)"triangledown", {226, 150, 191, 0}},
{(unsigned char*)"triangleleft", {226, 151, 131, 0}},
{(unsigned char*)"trianglelefteq", {226, 138, 180, 0}},
{(unsigned char*)"triangleq", {226, 137, 156, 0}},
{(unsigned char*)"triangleright", {226, 150, 185, 0}},
{(unsigned char*)"trianglerighteq", {226, 138, 181, 0}},
{(unsigned char*)"tridot", {226, 151, 172, 0}},
{(unsigned char*)"trie", {226, 137, 156, 0}},
{(unsigned char*)"triminus", {226, 168, 186, 0}},
{(unsigned char*)"triplus", {226, 168, 185, 0}},
{(unsigned char*)"trisb", {226, 167, 141, 0}},
{(unsigned char*)"tritime", {226, 168, 187, 0}},
{(unsigned char*)"trpezium", {226, 143, 162, 0}},
{(unsigned char*)"tscr", {240, 157, 147, 137, 0}},
{(unsigned char*)"tscy", {209, 134, 0}},
{(unsigned char*)"tshcy", {209, 155, 0}},
{(unsigned char*)"tstrok", {197, 167, 0}},
{(unsigned char*)"twixt", {226, 137, 172, 0}},
{(unsigned char*)"twoheadleftarrow", {226, 134, 158, 0}},
{(unsigned char*)"twoheadrightarrow", {226, 134, 160, 0}},
{(unsigned char*)"uArr", {226, 135, 145, 0}},
{(unsigned char*)"uHar", {226, 165, 163, 0}},
{(unsigned char*)"uacute", {195, 186, 0}},
{(unsigned char*)"uarr", {226, 134, 145, 0}},
{(unsigned char*)"ubrcy", {209, 158, 0}},
{(unsigned char*)"ubreve", {197, 173, 0}},
{(unsigned char*)"ucirc", {195, 187, 0}},
{(unsigned char*)"ucy", {209, 131, 0}},
{(unsigned char*)"udarr", {226, 135, 133, 0}},
{(unsigned char*)"udblac", {197, 177, 0}},
{(unsigned char*)"udhar", {226, 165, 174, 0}},
{(unsigned char*)"ufisht", {226, 165, 190, 0}},
{(unsigned char*)"ufr", {240, 157, 148, 178, 0}},
{(unsigned char*)"ugrave", {195, 185, 0}},
{(unsigned char*)"uharl", {226, 134, 191, 0}},
{(unsigned char*)"uharr", {226, 134, 190, 0}},
{(unsigned char*)"uhblk", {226, 150, 128, 0}},
{(unsigned char*)"ulcorn", {226, 140, 156, 0}},
{(unsigned char*)"ulcorner", {226, 140, 156, 0}},
{(unsigned char*)"ulcrop", {226, 140, 143, 0}},
{(unsigned char*)"ultri", {226, 151, 184, 0}},
{(unsigned char*)"umacr", {197, 171, 0}},
{(unsigned char*)"uml", {194, 168, 0}},
{(unsigned char*)"uogon", {197, 179, 0}},
{(unsigned char*)"uopf", {240, 157, 149, 166, 0}},
{(unsigned char*)"uparrow", {226, 134, 145, 0}},
{(unsigned char*)"updownarrow", {226, 134, 149, 0}},
{(unsigned char*)"upharpoonleft", {226, 134, 191, 0}},
{(unsigned char*)"upharpoonright", {226, 134, 190, 0}},
{(unsigned char*)"uplus", {226, 138, 142, 0}},
{(unsigned char*)"upsi", {207, 133, 0}},
{(unsigned char*)"upsih", {207, 146, 0}},
{(unsigned char*)"upsilon", {207, 133, 0}},
{(unsigned char*)"upuparrows", {226, 135, 136, 0}},
{(unsigned char*)"urcorn", {226, 140, 157, 0}},
{(unsigned char*)"urcorner", {226, 140, 157, 0}},
{(unsigned char*)"urcrop", {226, 140, 142, 0}},
{(unsigned char*)"uring", {197, 175, 0}},
{(unsigned char*)"urtri", {226, 151, 185, 0}},
{(unsigned char*)"uscr", {240, 157, 147, 138, 0}},
{(unsigned char*)"utdot", {226, 139, 176, 0}},
{(unsigned char*)"utilde", {197, 169, 0}},
{(unsigned char*)"utri", {226, 150, 181, 0}},
{(unsigned char*)"utrif", {226, 150, 180, 0}},
{(unsigned char*)"uuarr", {226, 135, 136, 0}},
{(unsigned char*)"uuml", {195, 188, 0}},
{(unsigned char*)"uwangle", {226, 166, 167, 0}},
{(unsigned char*)"vArr", {226, 135, 149, 0}},
{(unsigned char*)"vBar", {226, 171, 168, 0}},
{(unsigned char*)"vBarv", {226, 171, 169, 0}},
{(unsigned char*)"vDash", {226, 138, 168, 0}},
{(unsigned char*)"vangrt", {226, 166, 156, 0}},
{(unsigned char*)"varepsilon", {207, 181, 0}},
{(unsigned char*)"varkappa", {207, 176, 0}},
{(unsigned char*)"varnothing", {226, 136, 133, 0}},
{(unsigned char*)"varphi", {207, 149, 0}},
{(unsigned char*)"varpi", {207, 150, 0}},
{(unsigned char*)"varpropto", {226, 136, 157, 0}},
{(unsigned char*)"varr", {226, 134, 149, 0}},
{(unsigned char*)"varrho", {207, 177, 0}},
{(unsigned char*)"varsigma", {207, 130, 0}},
{(unsigned char*)"varsubsetneq", {226, 138, 138, 239, 184, 128, 0}},
{(unsigned char*)"varsubsetneqq", {226, 171, 139, 239, 184, 128, 0}},
{(unsigned char*)"varsupsetneq", {226, 138, 139, 239, 184, 128, 0}},
{(unsigned char*)"varsupsetneqq", {226, 171, 140, 239, 184, 128, 0}},
{(unsigned char*)"vartheta", {207, 145, 0}},
{(unsigned char*)"vartriangleleft", {226, 138, 178, 0}},
{(unsigned char*)"vartriangleright", {226, 138, 179, 0}},
{(unsigned char*)"vcy", {208, 178, 0}},
{(unsigned char*)"vdash", {226, 138, 162, 0}},
{(unsigned char*)"vee", {226, 136, 168, 0}},
{(unsigned char*)"veebar", {226, 138, 187, 0}},
{(unsigned char*)"veeeq", {226, 137, 154, 0}},
{(unsigned char*)"vellip", {226, 139, 174, 0}},
{(unsigned char*)"verbar", {124, 0}},
{(unsigned char*)"vert", {124, 0}},
{(unsigned char*)"vfr", {240, 157, 148, 179, 0}},
{(unsigned char*)"vltri", {226, 138, 178, 0}},
{(unsigned char*)"vnsub", {226, 138, 130, 226, 131, 146, 0}},
{(unsigned char*)"vnsup", {226, 138, 131, 226, 131, 146, 0}},
{(unsigned char*)"vopf", {240, 157, 149, 167, 0}},
{(unsigned char*)"vprop", {226, 136, 157, 0}},
{(unsigned char*)"vrtri", {226, 138, 179, 0}},
{(unsigned char*)"vscr", {240, 157, 147, 139, 0}},
{(unsigned char*)"vsubnE", {226, 171, 139, 239, 184, 128, 0}},
{(unsigned char*)"vsubne", {226, 138, 138, 239, 184, 128, 0}},
{(unsigned char*)"vsupnE", {226, 171, 140, 239, 184, 128, 0}},
{(unsigned char*)"vsupne", {226, 138, 139, 239, 184, 128, 0}},
{(unsigned char*)"vzigzag", {226, 166, 154, 0}},
{(unsigned char*)"wcirc", {197, 181, 0}},
{(unsigned char*)"wedbar", {226, 169, 159, 0}},
{(unsigned char*)"wedge", {226, 136, 167, 0}},
{(unsigned char*)"wedgeq", {226, 137, 153, 0}},
{(unsigned char*)"weierp", {226, 132, 152, 0}},
{(unsigned char*)"wfr", {240, 157, 148, 180, 0}},
{(unsigned char*)"wopf", {240, 157, 149, 168, 0}},
{(unsigned char*)"wp", {226, 132, 152, 0}},
{(unsigned char*)"wr", {226, 137, 128, 0}},
{(unsigned char*)"wreath", {226, 137, 128, 0}},
{(unsigned char*)"wscr", {240, 157, 147, 140, 0}},
{(unsigned char*)"xcap", {226, 139, 130, 0}},
{(unsigned char*)"xcirc", {226, 151, 175, 0}},
{(unsigned char*)"xcup", {226, 139, 131, 0}},
{(unsigned char*)"xdtri", {226, 150, 189, 0}},
{(unsigned char*)"xfr", {240, 157, 148, 181, 0}},
{(unsigned char*)"xhArr", {226, 159, 186, 0}},
{(unsigned char*)"xharr", {226, 159, 183, 0}},
{(unsigned char*)"xi", {206, 190, 0}},
{(unsigned char*)"xlArr", {226, 159, 184, 0}},
{(unsigned char*)"xlarr", {226, 159, 181, 0}},
{(unsigned char*)"xmap", {226, 159, 188, 0}},
{(unsigned char*)"xnis", {226, 139, 187, 0}},
{(unsigned char*)"xodot", {226, 168, 128, 0}},
{(unsigned char*)"xopf", {240, 157, 149, 169, 0}},
{(unsigned char*)"xoplus", {226, 168, 129, 0}},
{(unsigned char*)"xotime", {226, 168, 130, 0}},
{(unsigned char*)"xrArr", {226, 159, 185, 0}},
{(unsigned char*)"xrarr", {226, 159, 182, 0}},
{(unsigned char*)"xscr", {240, 157, 147, 141, 0}},
{(unsigned char*)"xsqcup", {226, 168, 134, 0}},
{(unsigned char*)"xuplus", {226, 168, 132, 0}},
{(unsigned char*)"xutri", {226, 150, 179, 0}},
{(unsigned char*)"xvee", {226, 139, 129, 0}},
{(unsigned char*)"xwedge", {226, 139, 128, 0}},
{(unsigned char*)"yacute", {195, 189, 0}},
{(unsigned char*)"yacy", {209, 143, 0}},
{(unsigned char*)"ycirc", {197, 183, 0}},
{(unsigned char*)"ycy", {209, 139, 0}},
{(unsigned char*)"yen", {194, 165, 0}},
{(unsigned char*)"yfr", {240, 157, 148, 182, 0}},
{(unsigned char*)"yicy", {209, 151, 0}},
{(unsigned char*)"yopf", {240, 157, 149, 170, 0}},
{(unsigned char*)"yscr", {240, 157, 147, 142, 0}},
{(unsigned char*)"yucy", {209, 142, 0}},
{(unsigned char*)"yuml", {195, 191, 0}},
{(unsigned char*)"zacute", {197, 186, 0}},
{(unsigned char*)"zcaron", {197, 190, 0}},
{(unsigned char*)"zcy", {208, 183, 0}},
{(unsigned char*)"zdot", {197, 188, 0}},
{(unsigned char*)"zeetrf", {226, 132, 168, 0}},
{(unsigned char*)"zeta", {206, 182, 0}},
{(unsigned char*)"zfr", {240, 157, 148, 183, 0}},
{(unsigned char*)"zhcy", {208, 182, 0}},
{(unsigned char*)"zigrarr", {226, 135, 157, 0}},
{(unsigned char*)"zopf", {240, 157, 149, 171, 0}},
{(unsigned char*)"zscr", {240, 157, 147, 143, 0}},
{(unsigned char*)"zwj", {226, 128, 141, 0}},
{(unsigned char*)"zwnj", {226, 128, 140, 0}},
};
cmarkgfm/third_party/cmark/src/houdini_html_e.c0000644000175000017500000000406014210444464022100 0ustar  carstencarsten#include 
#include 
#include 

#include "houdini.h"

/**
 * According to the OWASP rules:
 *
 * & --> &
 * < --> <
 * > --> >
 * " --> "
 * ' --> '     ' is not recommended
 * / --> /     forward slash is included as it helps end an HTML entity
 *
 */
static const char HTML_ESCAPE_TABLE[] = {
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};

static const char *HTML_ESCAPES[] = {"",      """, "&", "'",
                                     "/", "<",   ">"};

int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
                         int secure) {
  bufsize_t i = 0, org, esc = 0;

  while (i < size) {
    org = i;
    while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
      i++;

    if (i > org)
      cmark_strbuf_put(ob, src + org, i - org);

    /* escaping */
    if (unlikely(i >= size))
      break;

    /* The forward slash and single quote are only escaped in secure mode */
    if ((src[i] == '/' || src[i] == '\'') && !secure) {
      cmark_strbuf_putc(ob, src[i]);
    } else {
      cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
    }

    i++;
  }

  return 1;
}

int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
  return houdini_escape_html0(ob, src, size, 1);
}
cmarkgfm/third_party/cmark/src/utf8.h0000644000175000017500000000132714210444464020007 0ustar  carstencarsten#ifndef CMARK_UTF8_H
#define CMARK_UTF8_H

#include 
#include "buffer.h"

#ifdef __cplusplus
extern "C" {
#endif

CMARK_GFM_EXPORT
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
                              bufsize_t len);

CMARK_GFM_EXPORT
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);

CMARK_GFM_EXPORT
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);

CMARK_GFM_EXPORT
void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
                          bufsize_t size);

CMARK_GFM_EXPORT
int cmark_utf8proc_is_space(int32_t uc);

CMARK_GFM_EXPORT
int cmark_utf8proc_is_punctuation(int32_t uc);

#ifdef __cplusplus
}
#endif

#endif
cmarkgfm/third_party/cmark/src/scanners.c0000644000175000017500000063367014210444464020744 0ustar  carstencarsten/* Generated by re2c 1.1.1 */
#include 
#include "chunk.h"
#include "scanners.h"

bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset)
{
	bufsize_t res;
	unsigned char *ptr = (unsigned char *)c->data;

        if (ptr == NULL || offset > c->len) {
          return 0;
        } else {
	  unsigned char lim = ptr[c->len];

	  ptr[c->len] = '\0';
	  res = scanner(ptr + offset);
	  ptr[c->len] = lim;
        }

	return res;
}



// Try to match a scheme including colon.
bufsize_t _scan_scheme(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	yych = *p;
	if (yych <= '@') goto yy2;
	if (yych <= 'Z') goto yy4;
	if (yych <= '`') goto yy2;
	if (yych <= 'z') goto yy4;
yy2:
	++p;
yy3:
	{ return 0; }
yy4:
	yych = *(marker = ++p);
	if (yych <= '/') {
		if (yych <= '+') {
			if (yych <= '*') goto yy3;
		} else {
			if (yych <= ',') goto yy3;
			if (yych >= '/') goto yy3;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '9') goto yy5;
			if (yych <= '@') goto yy3;
		} else {
			if (yych <= '`') goto yy3;
			if (yych >= '{') goto yy3;
		}
	}
yy5:
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych == '+') goto yy7;
		} else {
			if (yych != '/') goto yy7;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych >= 'A') goto yy7;
		} else {
			if (yych <= '`') goto yy6;
			if (yych <= 'z') goto yy7;
		}
	}
yy6:
	p = marker;
	goto yy3;
yy7:
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych == '+') goto yy10;
			goto yy6;
		} else {
			if (yych == '/') goto yy6;
			goto yy10;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
			goto yy10;
		} else {
			if (yych <= '`') goto yy6;
			if (yych <= 'z') goto yy10;
			goto yy6;
		}
	}
yy8:
	++p;
	{ return (bufsize_t)(p - start); }
yy10:
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy6;
		} else {
			if (yych == '/') goto yy6;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy8;
			if (yych <= '@') goto yy6;
		} else {
			if (yych <= '`') goto yy6;
			if (yych >= '{') goto yy6;
		}
	}
	yych = *++p;
	if (yych == ':') goto yy8;
	goto yy6;
}

}

// Try to match URI autolink after first <, returning number of chars matched.
bufsize_t _scan_autolink_uri(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	static const unsigned char yybm[] = {
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128,   0, 128,   0, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych <= '@') goto yy41;
	if (yych <= 'Z') goto yy43;
	if (yych <= '`') goto yy41;
	if (yych <= 'z') goto yy43;
yy41:
	++p;
yy42:
	{ return 0; }
yy43:
	yych = *(marker = ++p);
	if (yych <= '/') {
		if (yych <= '+') {
			if (yych <= '*') goto yy42;
		} else {
			if (yych <= ',') goto yy42;
			if (yych >= '/') goto yy42;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '9') goto yy44;
			if (yych <= '@') goto yy42;
		} else {
			if (yych <= '`') goto yy42;
			if (yych >= '{') goto yy42;
		}
	}
yy44:
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych == '+') goto yy46;
		} else {
			if (yych != '/') goto yy46;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych >= 'A') goto yy46;
		} else {
			if (yych <= '`') goto yy45;
			if (yych <= 'z') goto yy46;
		}
	}
yy45:
	p = marker;
	goto yy42;
yy46:
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych == '+') goto yy49;
			goto yy45;
		} else {
			if (yych == '/') goto yy45;
			goto yy49;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
			goto yy49;
		} else {
			if (yych <= '`') goto yy45;
			if (yych <= 'z') goto yy49;
			goto yy45;
		}
	}
yy47:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy47;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '<') goto yy45;
			if (yych <= '>') goto yy50;
			goto yy45;
		} else {
			if (yych <= 0xDF) goto yy52;
			if (yych <= 0xE0) goto yy53;
			goto yy54;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy55;
			if (yych <= 0xEF) goto yy54;
			goto yy56;
		} else {
			if (yych <= 0xF3) goto yy57;
			if (yych <= 0xF4) goto yy58;
			goto yy45;
		}
	}
yy49:
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych == '+') goto yy59;
			goto yy45;
		} else {
			if (yych == '/') goto yy45;
			goto yy59;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
			goto yy59;
		} else {
			if (yych <= '`') goto yy45;
			if (yych <= 'z') goto yy59;
			goto yy45;
		}
	}
yy50:
	++p;
	{ return (bufsize_t)(p - start); }
yy52:
	yych = *++p;
	if (yych <= 0x7F) goto yy45;
	if (yych <= 0xBF) goto yy47;
	goto yy45;
yy53:
	yych = *++p;
	if (yych <= 0x9F) goto yy45;
	if (yych <= 0xBF) goto yy52;
	goto yy45;
yy54:
	yych = *++p;
	if (yych <= 0x7F) goto yy45;
	if (yych <= 0xBF) goto yy52;
	goto yy45;
yy55:
	yych = *++p;
	if (yych <= 0x7F) goto yy45;
	if (yych <= 0x9F) goto yy52;
	goto yy45;
yy56:
	yych = *++p;
	if (yych <= 0x8F) goto yy45;
	if (yych <= 0xBF) goto yy54;
	goto yy45;
yy57:
	yych = *++p;
	if (yych <= 0x7F) goto yy45;
	if (yych <= 0xBF) goto yy54;
	goto yy45;
yy58:
	yych = *++p;
	if (yych <= 0x7F) goto yy45;
	if (yych <= 0x8F) goto yy54;
	goto yy45;
yy59:
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych <= ',') {
			if (yych != '+') goto yy45;
		} else {
			if (yych == '/') goto yy45;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= ':') goto yy47;
			if (yych <= '@') goto yy45;
		} else {
			if (yych <= '`') goto yy45;
			if (yych >= '{') goto yy45;
		}
	}
	yych = *++p;
	if (yych == ':') goto yy47;
	goto yy45;
}

}

// Try to match email autolink after first <, returning num of chars matched.
bufsize_t _scan_autolink_email(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	static const unsigned char yybm[] = {
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0, 128,   0, 128, 128, 128, 128, 128, 
		  0,   0, 128, 128,   0, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128,   0,   0,   0, 128,   0, 128, 
		  0, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128,   0,   0,   0, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych <= '9') {
		if (yych <= '\'') {
			if (yych == '!') goto yy91;
			if (yych >= '#') goto yy91;
		} else {
			if (yych <= ')') goto yy89;
			if (yych != ',') goto yy91;
		}
	} else {
		if (yych <= '?') {
			if (yych == '=') goto yy91;
			if (yych >= '?') goto yy91;
		} else {
			if (yych <= 'Z') {
				if (yych >= 'A') goto yy91;
			} else {
				if (yych <= ']') goto yy89;
				if (yych <= '~') goto yy91;
			}
		}
	}
yy89:
	++p;
yy90:
	{ return 0; }
yy91:
	yych = *(marker = ++p);
	if (yych <= ',') {
		if (yych <= '"') {
			if (yych == '!') goto yy93;
			goto yy90;
		} else {
			if (yych <= '\'') goto yy93;
			if (yych <= ')') goto yy90;
			if (yych <= '+') goto yy93;
			goto yy90;
		}
	} else {
		if (yych <= '>') {
			if (yych <= '9') goto yy93;
			if (yych == '=') goto yy93;
			goto yy90;
		} else {
			if (yych <= 'Z') goto yy93;
			if (yych <= ']') goto yy90;
			if (yych <= '~') goto yy93;
			goto yy90;
		}
	}
yy92:
	yych = *++p;
yy93:
	if (yybm[0+yych] & 128) {
		goto yy92;
	}
	if (yych <= '>') goto yy94;
	if (yych <= '@') goto yy95;
yy94:
	p = marker;
	goto yy90;
yy95:
	yych = *++p;
	if (yych <= '@') {
		if (yych <= '/') goto yy94;
		if (yych >= ':') goto yy94;
	} else {
		if (yych <= 'Z') goto yy96;
		if (yych <= '`') goto yy94;
		if (yych >= '{') goto yy94;
	}
yy96:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy98;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy98;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy98;
			goto yy94;
		}
	}
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy101;
		if (yych <= '/') goto yy94;
		goto yy102;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy102;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy102;
			goto yy94;
		}
	}
yy98:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych <= '-') goto yy101;
			goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy102;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy102;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy102;
			goto yy94;
		}
	}
yy99:
	++p;
	{ return (bufsize_t)(p - start); }
yy101:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy103;
		if (yych <= '/') goto yy94;
		goto yy104;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy104;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy104;
			goto yy94;
		}
	}
yy102:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy104;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy104;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy104;
			goto yy94;
		}
	}
yy103:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy105;
		if (yych <= '/') goto yy94;
		goto yy106;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy106;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy106;
			goto yy94;
		}
	}
yy104:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy106;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy106;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy106;
			goto yy94;
		}
	}
yy105:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy107;
		if (yych <= '/') goto yy94;
		goto yy108;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy108;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy108;
			goto yy94;
		}
	}
yy106:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy108;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy108;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy108;
			goto yy94;
		}
	}
yy107:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy109;
		if (yych <= '/') goto yy94;
		goto yy110;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy110;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy110;
			goto yy94;
		}
	}
yy108:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy110;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy110;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy110;
			goto yy94;
		}
	}
yy109:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy111;
		if (yych <= '/') goto yy94;
		goto yy112;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy112;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy112;
			goto yy94;
		}
	}
yy110:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy112;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy112;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy112;
			goto yy94;
		}
	}
yy111:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy113;
		if (yych <= '/') goto yy94;
		goto yy114;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy114;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy114;
			goto yy94;
		}
	}
yy112:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy114;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy114;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy114;
			goto yy94;
		}
	}
yy113:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy115;
		if (yych <= '/') goto yy94;
		goto yy116;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy116;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy116;
			goto yy94;
		}
	}
yy114:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy116;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy116;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy116;
			goto yy94;
		}
	}
yy115:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy117;
		if (yych <= '/') goto yy94;
		goto yy118;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy118;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy118;
			goto yy94;
		}
	}
yy116:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy118;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy118;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy118;
			goto yy94;
		}
	}
yy117:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy119;
		if (yych <= '/') goto yy94;
		goto yy120;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy120;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy120;
			goto yy94;
		}
	}
yy118:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy120;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy120;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy120;
			goto yy94;
		}
	}
yy119:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy121;
		if (yych <= '/') goto yy94;
		goto yy122;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy122;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy122;
			goto yy94;
		}
	}
yy120:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy122;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy122;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy122;
			goto yy94;
		}
	}
yy121:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy123;
		if (yych <= '/') goto yy94;
		goto yy124;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy124;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy124;
			goto yy94;
		}
	}
yy122:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy124;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy124;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy124;
			goto yy94;
		}
	}
yy123:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy125;
		if (yych <= '/') goto yy94;
		goto yy126;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy126;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy126;
			goto yy94;
		}
	}
yy124:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy126;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy126;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy126;
			goto yy94;
		}
	}
yy125:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy127;
		if (yych <= '/') goto yy94;
		goto yy128;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy128;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy128;
			goto yy94;
		}
	}
yy126:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy128;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy128;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy128;
			goto yy94;
		}
	}
yy127:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy129;
		if (yych <= '/') goto yy94;
		goto yy130;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy130;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy130;
			goto yy94;
		}
	}
yy128:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy130;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy130;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy130;
			goto yy94;
		}
	}
yy129:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy131;
		if (yych <= '/') goto yy94;
		goto yy132;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy132;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy132;
			goto yy94;
		}
	}
yy130:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy132;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy132;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy132;
			goto yy94;
		}
	}
yy131:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy133;
		if (yych <= '/') goto yy94;
		goto yy134;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy134;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy134;
			goto yy94;
		}
	}
yy132:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy134;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy134;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy134;
			goto yy94;
		}
	}
yy133:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy135;
		if (yych <= '/') goto yy94;
		goto yy136;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy136;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy136;
			goto yy94;
		}
	}
yy134:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy136;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy136;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy136;
			goto yy94;
		}
	}
yy135:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy137;
		if (yych <= '/') goto yy94;
		goto yy138;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy138;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy138;
			goto yy94;
		}
	}
yy136:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy138;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy138;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy138;
			goto yy94;
		}
	}
yy137:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy139;
		if (yych <= '/') goto yy94;
		goto yy140;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy140;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy140;
			goto yy94;
		}
	}
yy138:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy140;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy140;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy140;
			goto yy94;
		}
	}
yy139:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy141;
		if (yych <= '/') goto yy94;
		goto yy142;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy142;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy142;
			goto yy94;
		}
	}
yy140:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy142;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy142;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy142;
			goto yy94;
		}
	}
yy141:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy143;
		if (yych <= '/') goto yy94;
		goto yy144;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy144;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy144;
			goto yy94;
		}
	}
yy142:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy144;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy144;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy144;
			goto yy94;
		}
	}
yy143:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy145;
		if (yych <= '/') goto yy94;
		goto yy146;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy146;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy146;
			goto yy94;
		}
	}
yy144:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy146;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy146;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy146;
			goto yy94;
		}
	}
yy145:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy147;
		if (yych <= '/') goto yy94;
		goto yy148;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy148;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy148;
			goto yy94;
		}
	}
yy146:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy148;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy148;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy148;
			goto yy94;
		}
	}
yy147:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy149;
		if (yych <= '/') goto yy94;
		goto yy150;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy150;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy150;
			goto yy94;
		}
	}
yy148:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy150;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy150;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy150;
			goto yy94;
		}
	}
yy149:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy151;
		if (yych <= '/') goto yy94;
		goto yy152;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy152;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy152;
			goto yy94;
		}
	}
yy150:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy152;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy152;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy152;
			goto yy94;
		}
	}
yy151:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy153;
		if (yych <= '/') goto yy94;
		goto yy154;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy154;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy154;
			goto yy94;
		}
	}
yy152:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy154;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy154;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy154;
			goto yy94;
		}
	}
yy153:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy155;
		if (yych <= '/') goto yy94;
		goto yy156;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy156;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy156;
			goto yy94;
		}
	}
yy154:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy156;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy156;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy156;
			goto yy94;
		}
	}
yy155:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy157;
		if (yych <= '/') goto yy94;
		goto yy158;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy158;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy158;
			goto yy94;
		}
	}
yy156:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy158;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy158;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy158;
			goto yy94;
		}
	}
yy157:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy159;
		if (yych <= '/') goto yy94;
		goto yy160;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy160;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy160;
			goto yy94;
		}
	}
yy158:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy160;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy160;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy160;
			goto yy94;
		}
	}
yy159:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy161;
		if (yych <= '/') goto yy94;
		goto yy162;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy162;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy162;
			goto yy94;
		}
	}
yy160:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy162;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy162;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy162;
			goto yy94;
		}
	}
yy161:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy163;
		if (yych <= '/') goto yy94;
		goto yy164;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy164;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy164;
			goto yy94;
		}
	}
yy162:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy164;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy164;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy164;
			goto yy94;
		}
	}
yy163:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy165;
		if (yych <= '/') goto yy94;
		goto yy166;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy166;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy166;
			goto yy94;
		}
	}
yy164:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy166;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy166;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy166;
			goto yy94;
		}
	}
yy165:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy167;
		if (yych <= '/') goto yy94;
		goto yy168;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy168;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy168;
			goto yy94;
		}
	}
yy166:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy168;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy168;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy168;
			goto yy94;
		}
	}
yy167:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy169;
		if (yych <= '/') goto yy94;
		goto yy170;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy170;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy170;
			goto yy94;
		}
	}
yy168:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy170;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy170;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy170;
			goto yy94;
		}
	}
yy169:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy171;
		if (yych <= '/') goto yy94;
		goto yy172;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy172;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy172;
			goto yy94;
		}
	}
yy170:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy172;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy172;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy172;
			goto yy94;
		}
	}
yy171:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy173;
		if (yych <= '/') goto yy94;
		goto yy174;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy174;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy174;
			goto yy94;
		}
	}
yy172:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy174;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy174;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy174;
			goto yy94;
		}
	}
yy173:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy175;
		if (yych <= '/') goto yy94;
		goto yy176;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy176;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy176;
			goto yy94;
		}
	}
yy174:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy176;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy176;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy176;
			goto yy94;
		}
	}
yy175:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy177;
		if (yych <= '/') goto yy94;
		goto yy178;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy178;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy178;
			goto yy94;
		}
	}
yy176:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy178;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy178;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy178;
			goto yy94;
		}
	}
yy177:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy179;
		if (yych <= '/') goto yy94;
		goto yy180;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy180;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy180;
			goto yy94;
		}
	}
yy178:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy180;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy180;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy180;
			goto yy94;
		}
	}
yy179:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy181;
		if (yych <= '/') goto yy94;
		goto yy182;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy182;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy182;
			goto yy94;
		}
	}
yy180:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy182;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy182;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy182;
			goto yy94;
		}
	}
yy181:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy183;
		if (yych <= '/') goto yy94;
		goto yy184;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy184;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy184;
			goto yy94;
		}
	}
yy182:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy184;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy184;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy184;
			goto yy94;
		}
	}
yy183:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy185;
		if (yych <= '/') goto yy94;
		goto yy186;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy186;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy186;
			goto yy94;
		}
	}
yy184:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy186;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy186;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy186;
			goto yy94;
		}
	}
yy185:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy187;
		if (yych <= '/') goto yy94;
		goto yy188;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy188;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy188;
			goto yy94;
		}
	}
yy186:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy188;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy188;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy188;
			goto yy94;
		}
	}
yy187:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy189;
		if (yych <= '/') goto yy94;
		goto yy190;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy190;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy190;
			goto yy94;
		}
	}
yy188:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy190;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy190;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy190;
			goto yy94;
		}
	}
yy189:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy191;
		if (yych <= '/') goto yy94;
		goto yy192;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy192;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy192;
			goto yy94;
		}
	}
yy190:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy192;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy192;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy192;
			goto yy94;
		}
	}
yy191:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy193;
		if (yych <= '/') goto yy94;
		goto yy194;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy194;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy194;
			goto yy94;
		}
	}
yy192:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy194;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy194;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy194;
			goto yy94;
		}
	}
yy193:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy195;
		if (yych <= '/') goto yy94;
		goto yy196;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy196;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy196;
			goto yy94;
		}
	}
yy194:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy196;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy196;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy196;
			goto yy94;
		}
	}
yy195:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy197;
		if (yych <= '/') goto yy94;
		goto yy198;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy198;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy198;
			goto yy94;
		}
	}
yy196:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy198;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy198;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy198;
			goto yy94;
		}
	}
yy197:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy199;
		if (yych <= '/') goto yy94;
		goto yy200;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy200;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy200;
			goto yy94;
		}
	}
yy198:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy200;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy200;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy200;
			goto yy94;
		}
	}
yy199:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy201;
		if (yych <= '/') goto yy94;
		goto yy202;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy202;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy202;
			goto yy94;
		}
	}
yy200:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy202;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy202;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy202;
			goto yy94;
		}
	}
yy201:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy203;
		if (yych <= '/') goto yy94;
		goto yy204;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy204;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy204;
			goto yy94;
		}
	}
yy202:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy204;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy204;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy204;
			goto yy94;
		}
	}
yy203:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy205;
		if (yych <= '/') goto yy94;
		goto yy206;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy206;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy206;
			goto yy94;
		}
	}
yy204:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy206;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy206;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy206;
			goto yy94;
		}
	}
yy205:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy207;
		if (yych <= '/') goto yy94;
		goto yy208;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy208;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy208;
			goto yy94;
		}
	}
yy206:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy208;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy208;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy208;
			goto yy94;
		}
	}
yy207:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy209;
		if (yych <= '/') goto yy94;
		goto yy210;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy210;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy210;
			goto yy94;
		}
	}
yy208:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy210;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy210;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy210;
			goto yy94;
		}
	}
yy209:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy211;
		if (yych <= '/') goto yy94;
		goto yy212;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy212;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy212;
			goto yy94;
		}
	}
yy210:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy212;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy212;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy212;
			goto yy94;
		}
	}
yy211:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy213;
		if (yych <= '/') goto yy94;
		goto yy214;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy214;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy214;
			goto yy94;
		}
	}
yy212:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy214;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy214;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy214;
			goto yy94;
		}
	}
yy213:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy215;
		if (yych <= '/') goto yy94;
		goto yy216;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy216;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy216;
			goto yy94;
		}
	}
yy214:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy216;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy216;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy216;
			goto yy94;
		}
	}
yy215:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy217;
		if (yych <= '/') goto yy94;
		goto yy218;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy218;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy218;
			goto yy94;
		}
	}
yy216:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy218;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy218;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy218;
			goto yy94;
		}
	}
yy217:
	yych = *++p;
	if (yych <= '9') {
		if (yych == '-') goto yy219;
		if (yych <= '/') goto yy94;
		goto yy220;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy94;
			goto yy220;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy220;
			goto yy94;
		}
	}
yy218:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= ',') goto yy94;
			if (yych >= '.') goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych <= '9') goto yy220;
			goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
			goto yy220;
		} else {
			if (yych <= '`') goto yy94;
			if (yych <= 'z') goto yy220;
			goto yy94;
		}
	}
yy219:
	yych = *++p;
	if (yych <= '@') {
		if (yych <= '/') goto yy94;
		if (yych <= '9') goto yy221;
		goto yy94;
	} else {
		if (yych <= 'Z') goto yy221;
		if (yych <= '`') goto yy94;
		if (yych <= 'z') goto yy221;
		goto yy94;
	}
yy220:
	yych = *++p;
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych <= '-') goto yy94;
			goto yy95;
		} else {
			if (yych <= '/') goto yy94;
			if (yych >= ':') goto yy94;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy99;
			if (yych <= '@') goto yy94;
		} else {
			if (yych <= '`') goto yy94;
			if (yych >= '{') goto yy94;
		}
	}
yy221:
	yych = *++p;
	if (yych == '.') goto yy95;
	if (yych == '>') goto yy99;
	goto yy94;
}

}

// Try to match an HTML tag after first <, returning num of chars matched.
bufsize_t _scan_html_tag(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	static const unsigned char yybm[] = {
		/* table 1 .. 8: 0 */
		  0, 250, 250, 250, 250, 250, 250, 250, 
		250, 235, 235, 235, 235, 235, 250, 250, 
		250, 250, 250, 250, 250, 250, 250, 250, 
		250, 250, 250, 250, 250, 250, 250, 250, 
		235, 250, 202, 250, 250, 250, 250, 170, 
		250, 250, 250, 250, 250, 246, 254, 250, 
		254, 254, 254, 254, 254, 254, 254, 254, 
		254, 254, 254, 250, 234, 234, 232, 250, 
		250, 254, 254, 254, 254, 254, 254, 254, 
		254, 254, 254, 254, 254, 254, 254, 254, 
		254, 254, 254, 254, 254, 254, 254, 254, 
		254, 254, 254, 250, 250, 122, 250, 254, 
		234, 254, 254, 254, 254, 254, 254, 254, 
		254, 254, 254, 254, 254, 254, 254, 254, 
		254, 254, 254, 254, 254, 254, 254, 254, 
		254, 254, 254, 250, 250, 250, 250, 250, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		/* table 9 .. 11: 256 */
		  0, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 192, 128, 128, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 128, 128, 128, 128, 128,   0, 
		128, 224, 224, 224, 224, 224, 224, 224, 
		224, 224, 224, 224, 224, 224, 224, 224, 
		224, 224, 224, 224, 224, 224, 224, 224, 
		224, 224, 224, 128, 128, 128, 128, 128, 
		128, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 128, 128, 128, 128, 128, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych <= '>') {
		if (yych <= '!') {
			if (yych >= '!') goto yy226;
		} else {
			if (yych == '/') goto yy227;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '?') goto yy228;
			if (yych >= 'A') goto yy229;
		} else {
			if (yych <= '`') goto yy224;
			if (yych <= 'z') goto yy229;
		}
	}
yy224:
	++p;
yy225:
	{ return 0; }
yy226:
	yych = *(marker = ++p);
	if (yybm[256+yych] & 32) {
		goto yy232;
	}
	if (yych == '-') goto yy230;
	if (yych <= '@') goto yy225;
	if (yych <= '[') goto yy234;
	goto yy225;
yy227:
	yych = *(marker = ++p);
	if (yych <= '@') goto yy225;
	if (yych <= 'Z') goto yy235;
	if (yych <= '`') goto yy225;
	if (yych <= 'z') goto yy235;
	goto yy225;
yy228:
	yych = *(marker = ++p);
	if (yych <= 0x00) goto yy225;
	if (yych <= 0x7F) goto yy238;
	if (yych <= 0xC1) goto yy225;
	if (yych <= 0xF4) goto yy238;
	goto yy225;
yy229:
	yych = *(marker = ++p);
	if (yych <= '.') {
		if (yych <= 0x1F) {
			if (yych <= 0x08) goto yy225;
			if (yych <= '\r') goto yy250;
			goto yy225;
		} else {
			if (yych <= ' ') goto yy250;
			if (yych == '-') goto yy250;
			goto yy225;
		}
	} else {
		if (yych <= '@') {
			if (yych <= '9') goto yy250;
			if (yych == '>') goto yy250;
			goto yy225;
		} else {
			if (yych <= 'Z') goto yy250;
			if (yych <= '`') goto yy225;
			if (yych <= 'z') goto yy250;
			goto yy225;
		}
	}
yy230:
	yych = *++p;
	if (yych == '-') goto yy254;
yy231:
	p = marker;
	goto yy225;
yy232:
	yych = *++p;
	if (yybm[256+yych] & 32) {
		goto yy232;
	}
	if (yych <= 0x08) goto yy231;
	if (yych <= '\r') goto yy255;
	if (yych == ' ') goto yy255;
	goto yy231;
yy234:
	yych = *++p;
	if (yych == 'C') goto yy257;
	if (yych == 'c') goto yy257;
	goto yy231;
yy235:
	yych = *++p;
	if (yybm[256+yych] & 64) {
		goto yy235;
	}
	if (yych <= 0x1F) {
		if (yych <= 0x08) goto yy231;
		if (yych <= '\r') goto yy258;
		goto yy231;
	} else {
		if (yych <= ' ') goto yy258;
		if (yych == '>') goto yy252;
		goto yy231;
	}
yy237:
	yych = *++p;
yy238:
	if (yybm[256+yych] & 128) {
		goto yy237;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy231;
			if (yych >= '@') goto yy231;
		} else {
			if (yych <= 0xDF) goto yy240;
			if (yych <= 0xE0) goto yy241;
			goto yy242;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy243;
			if (yych <= 0xEF) goto yy242;
			goto yy244;
		} else {
			if (yych <= 0xF3) goto yy245;
			if (yych <= 0xF4) goto yy246;
			goto yy231;
		}
	}
	yych = *++p;
	if (yych <= 0xE0) {
		if (yych <= '>') {
			if (yych <= 0x00) goto yy231;
			if (yych <= '=') goto yy237;
			goto yy252;
		} else {
			if (yych <= 0x7F) goto yy237;
			if (yych <= 0xC1) goto yy231;
			if (yych >= 0xE0) goto yy241;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy243;
			goto yy242;
		} else {
			if (yych <= 0xF0) goto yy244;
			if (yych <= 0xF3) goto yy245;
			if (yych <= 0xF4) goto yy246;
			goto yy231;
		}
	}
yy240:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy237;
	goto yy231;
yy241:
	yych = *++p;
	if (yych <= 0x9F) goto yy231;
	if (yych <= 0xBF) goto yy240;
	goto yy231;
yy242:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy240;
	goto yy231;
yy243:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x9F) goto yy240;
	goto yy231;
yy244:
	yych = *++p;
	if (yych <= 0x8F) goto yy231;
	if (yych <= 0xBF) goto yy242;
	goto yy231;
yy245:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy242;
	goto yy231;
yy246:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x8F) goto yy242;
	goto yy231;
yy247:
	yych = *++p;
	if (yybm[0+yych] & 1) {
		goto yy247;
	}
	if (yych <= '>') {
		if (yych <= '9') {
			if (yych == '/') goto yy251;
			goto yy231;
		} else {
			if (yych <= ':') goto yy260;
			if (yych <= '=') goto yy231;
			goto yy252;
		}
	} else {
		if (yych <= '^') {
			if (yych <= '@') goto yy231;
			if (yych <= 'Z') goto yy260;
			goto yy231;
		} else {
			if (yych == '`') goto yy231;
			if (yych <= 'z') goto yy260;
			goto yy231;
		}
	}
yy249:
	yych = *++p;
yy250:
	if (yybm[0+yych] & 1) {
		goto yy247;
	}
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych == '-') goto yy249;
			goto yy231;
		} else {
			if (yych <= '/') goto yy251;
			if (yych <= '9') goto yy249;
			goto yy231;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy252;
			if (yych <= '@') goto yy231;
			goto yy249;
		} else {
			if (yych <= '`') goto yy231;
			if (yych <= 'z') goto yy249;
			goto yy231;
		}
	}
yy251:
	yych = *++p;
	if (yych != '>') goto yy231;
yy252:
	++p;
	{ return (bufsize_t)(p - start); }
yy254:
	yych = *++p;
	if (yych == '-') goto yy264;
	if (yych == '>') goto yy231;
	goto yy263;
yy255:
	yych = *++p;
	if (yybm[0+yych] & 2) {
		goto yy255;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy231;
			if (yych <= '>') goto yy252;
			goto yy231;
		} else {
			if (yych <= 0xDF) goto yy272;
			if (yych <= 0xE0) goto yy273;
			goto yy274;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy275;
			if (yych <= 0xEF) goto yy274;
			goto yy276;
		} else {
			if (yych <= 0xF3) goto yy277;
			if (yych <= 0xF4) goto yy278;
			goto yy231;
		}
	}
yy257:
	yych = *++p;
	if (yych == 'D') goto yy279;
	if (yych == 'd') goto yy279;
	goto yy231;
yy258:
	yych = *++p;
	if (yych <= 0x1F) {
		if (yych <= 0x08) goto yy231;
		if (yych <= '\r') goto yy258;
		goto yy231;
	} else {
		if (yych <= ' ') goto yy258;
		if (yych == '>') goto yy252;
		goto yy231;
	}
yy260:
	yych = *++p;
	if (yybm[0+yych] & 4) {
		goto yy260;
	}
	if (yych <= ',') {
		if (yych <= '\r') {
			if (yych <= 0x08) goto yy231;
			goto yy280;
		} else {
			if (yych == ' ') goto yy280;
			goto yy231;
		}
	} else {
		if (yych <= '<') {
			if (yych <= '/') goto yy251;
			goto yy231;
		} else {
			if (yych <= '=') goto yy282;
			if (yych <= '>') goto yy252;
			goto yy231;
		}
	}
yy262:
	yych = *++p;
yy263:
	if (yybm[0+yych] & 8) {
		goto yy262;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy231;
			if (yych <= '-') goto yy284;
			goto yy231;
		} else {
			if (yych <= 0xDF) goto yy265;
			if (yych <= 0xE0) goto yy266;
			goto yy267;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy268;
			if (yych <= 0xEF) goto yy267;
			goto yy269;
		} else {
			if (yych <= 0xF3) goto yy270;
			if (yych <= 0xF4) goto yy271;
			goto yy231;
		}
	}
yy264:
	yych = *++p;
	if (yych == '-') goto yy251;
	if (yych == '>') goto yy231;
	goto yy263;
yy265:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy262;
	goto yy231;
yy266:
	yych = *++p;
	if (yych <= 0x9F) goto yy231;
	if (yych <= 0xBF) goto yy265;
	goto yy231;
yy267:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy265;
	goto yy231;
yy268:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x9F) goto yy265;
	goto yy231;
yy269:
	yych = *++p;
	if (yych <= 0x8F) goto yy231;
	if (yych <= 0xBF) goto yy267;
	goto yy231;
yy270:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy267;
	goto yy231;
yy271:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x8F) goto yy267;
	goto yy231;
yy272:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy255;
	goto yy231;
yy273:
	yych = *++p;
	if (yych <= 0x9F) goto yy231;
	if (yych <= 0xBF) goto yy272;
	goto yy231;
yy274:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy272;
	goto yy231;
yy275:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x9F) goto yy272;
	goto yy231;
yy276:
	yych = *++p;
	if (yych <= 0x8F) goto yy231;
	if (yych <= 0xBF) goto yy274;
	goto yy231;
yy277:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy274;
	goto yy231;
yy278:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x8F) goto yy274;
	goto yy231;
yy279:
	yych = *++p;
	if (yych == 'A') goto yy285;
	if (yych == 'a') goto yy285;
	goto yy231;
yy280:
	yych = *++p;
	if (yych <= '<') {
		if (yych <= ' ') {
			if (yych <= 0x08) goto yy231;
			if (yych <= '\r') goto yy280;
			if (yych <= 0x1F) goto yy231;
			goto yy280;
		} else {
			if (yych <= '/') {
				if (yych <= '.') goto yy231;
				goto yy251;
			} else {
				if (yych == ':') goto yy260;
				goto yy231;
			}
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '=') goto yy282;
			if (yych <= '>') goto yy252;
			if (yych <= '@') goto yy231;
			goto yy260;
		} else {
			if (yych <= '_') {
				if (yych <= '^') goto yy231;
				goto yy260;
			} else {
				if (yych <= '`') goto yy231;
				if (yych <= 'z') goto yy260;
				goto yy231;
			}
		}
	}
yy282:
	yych = *++p;
	if (yybm[0+yych] & 16) {
		goto yy286;
	}
	if (yych <= 0xE0) {
		if (yych <= '"') {
			if (yych <= 0x00) goto yy231;
			if (yych <= ' ') goto yy282;
			goto yy288;
		} else {
			if (yych <= '\'') goto yy290;
			if (yych <= 0xC1) goto yy231;
			if (yych <= 0xDF) goto yy292;
			goto yy293;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy295;
			goto yy294;
		} else {
			if (yych <= 0xF0) goto yy296;
			if (yych <= 0xF3) goto yy297;
			if (yych <= 0xF4) goto yy298;
			goto yy231;
		}
	}
yy284:
	yych = *++p;
	if (yybm[0+yych] & 8) {
		goto yy262;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy231;
			if (yych <= '-') goto yy251;
			goto yy231;
		} else {
			if (yych <= 0xDF) goto yy265;
			if (yych <= 0xE0) goto yy266;
			goto yy267;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy268;
			if (yych <= 0xEF) goto yy267;
			goto yy269;
		} else {
			if (yych <= 0xF3) goto yy270;
			if (yych <= 0xF4) goto yy271;
			goto yy231;
		}
	}
yy285:
	yych = *++p;
	if (yych == 'T') goto yy299;
	if (yych == 't') goto yy299;
	goto yy231;
yy286:
	yych = *++p;
	if (yybm[0+yych] & 16) {
		goto yy286;
	}
	if (yych <= 0xE0) {
		if (yych <= '=') {
			if (yych <= 0x00) goto yy231;
			if (yych <= ' ') goto yy247;
			goto yy231;
		} else {
			if (yych <= '>') goto yy252;
			if (yych <= 0xC1) goto yy231;
			if (yych <= 0xDF) goto yy292;
			goto yy293;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy295;
			goto yy294;
		} else {
			if (yych <= 0xF0) goto yy296;
			if (yych <= 0xF3) goto yy297;
			if (yych <= 0xF4) goto yy298;
			goto yy231;
		}
	}
yy288:
	yych = *++p;
	if (yybm[0+yych] & 32) {
		goto yy288;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy231;
			if (yych <= '"') goto yy300;
			goto yy231;
		} else {
			if (yych <= 0xDF) goto yy301;
			if (yych <= 0xE0) goto yy302;
			goto yy303;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy304;
			if (yych <= 0xEF) goto yy303;
			goto yy305;
		} else {
			if (yych <= 0xF3) goto yy306;
			if (yych <= 0xF4) goto yy307;
			goto yy231;
		}
	}
yy290:
	yych = *++p;
	if (yybm[0+yych] & 64) {
		goto yy290;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy231;
			if (yych <= '\'') goto yy300;
			goto yy231;
		} else {
			if (yych <= 0xDF) goto yy308;
			if (yych <= 0xE0) goto yy309;
			goto yy310;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy311;
			if (yych <= 0xEF) goto yy310;
			goto yy312;
		} else {
			if (yych <= 0xF3) goto yy313;
			if (yych <= 0xF4) goto yy314;
			goto yy231;
		}
	}
yy292:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy286;
	goto yy231;
yy293:
	yych = *++p;
	if (yych <= 0x9F) goto yy231;
	if (yych <= 0xBF) goto yy292;
	goto yy231;
yy294:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy292;
	goto yy231;
yy295:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x9F) goto yy292;
	goto yy231;
yy296:
	yych = *++p;
	if (yych <= 0x8F) goto yy231;
	if (yych <= 0xBF) goto yy294;
	goto yy231;
yy297:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy294;
	goto yy231;
yy298:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x8F) goto yy294;
	goto yy231;
yy299:
	yych = *++p;
	if (yych == 'A') goto yy315;
	if (yych == 'a') goto yy315;
	goto yy231;
yy300:
	yych = *++p;
	if (yybm[0+yych] & 1) {
		goto yy247;
	}
	if (yych == '/') goto yy251;
	if (yych == '>') goto yy252;
	goto yy231;
yy301:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy288;
	goto yy231;
yy302:
	yych = *++p;
	if (yych <= 0x9F) goto yy231;
	if (yych <= 0xBF) goto yy301;
	goto yy231;
yy303:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy301;
	goto yy231;
yy304:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x9F) goto yy301;
	goto yy231;
yy305:
	yych = *++p;
	if (yych <= 0x8F) goto yy231;
	if (yych <= 0xBF) goto yy303;
	goto yy231;
yy306:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy303;
	goto yy231;
yy307:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x8F) goto yy303;
	goto yy231;
yy308:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy290;
	goto yy231;
yy309:
	yych = *++p;
	if (yych <= 0x9F) goto yy231;
	if (yych <= 0xBF) goto yy308;
	goto yy231;
yy310:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy308;
	goto yy231;
yy311:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x9F) goto yy308;
	goto yy231;
yy312:
	yych = *++p;
	if (yych <= 0x8F) goto yy231;
	if (yych <= 0xBF) goto yy310;
	goto yy231;
yy313:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy310;
	goto yy231;
yy314:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x8F) goto yy310;
	goto yy231;
yy315:
	yych = *++p;
	if (yych != '[') goto yy231;
yy316:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy316;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy231;
			if (yych >= '^') goto yy231;
		} else {
			if (yych <= 0xDF) goto yy319;
			if (yych <= 0xE0) goto yy320;
			goto yy321;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy322;
			if (yych <= 0xEF) goto yy321;
			goto yy323;
		} else {
			if (yych <= 0xF3) goto yy324;
			if (yych <= 0xF4) goto yy325;
			goto yy231;
		}
	}
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy316;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy231;
			if (yych <= ']') goto yy326;
			goto yy231;
		} else {
			if (yych <= 0xDF) goto yy319;
			if (yych <= 0xE0) goto yy320;
			goto yy321;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy322;
			if (yych <= 0xEF) goto yy321;
			goto yy323;
		} else {
			if (yych <= 0xF3) goto yy324;
			if (yych <= 0xF4) goto yy325;
			goto yy231;
		}
	}
yy319:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy316;
	goto yy231;
yy320:
	yych = *++p;
	if (yych <= 0x9F) goto yy231;
	if (yych <= 0xBF) goto yy319;
	goto yy231;
yy321:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy319;
	goto yy231;
yy322:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x9F) goto yy319;
	goto yy231;
yy323:
	yych = *++p;
	if (yych <= 0x8F) goto yy231;
	if (yych <= 0xBF) goto yy321;
	goto yy231;
yy324:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0xBF) goto yy321;
	goto yy231;
yy325:
	yych = *++p;
	if (yych <= 0x7F) goto yy231;
	if (yych <= 0x8F) goto yy321;
	goto yy231;
yy326:
	yych = *++p;
	if (yych <= 0xE0) {
		if (yych <= '>') {
			if (yych <= 0x00) goto yy231;
			if (yych <= '=') goto yy316;
			goto yy252;
		} else {
			if (yych <= 0x7F) goto yy316;
			if (yych <= 0xC1) goto yy231;
			if (yych <= 0xDF) goto yy319;
			goto yy320;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy322;
			goto yy321;
		} else {
			if (yych <= 0xF0) goto yy323;
			if (yych <= 0xF3) goto yy324;
			if (yych <= 0xF4) goto yy325;
			goto yy231;
		}
	}
}

}

// Try to (liberally) match an HTML tag after first <, returning num of chars matched.
bufsize_t _scan_liberal_html_tag(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	unsigned int yyaccept = 0;
	static const unsigned char yybm[] = {
		  0,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,   0,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64, 128,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych <= 0xE0) {
		if (yych <= '\n') {
			if (yych <= 0x00) goto yy329;
			if (yych <= '\t') goto yy331;
		} else {
			if (yych <= 0x7F) goto yy331;
			if (yych <= 0xC1) goto yy329;
			if (yych <= 0xDF) goto yy332;
			goto yy333;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy335;
			goto yy334;
		} else {
			if (yych <= 0xF0) goto yy336;
			if (yych <= 0xF3) goto yy337;
			if (yych <= 0xF4) goto yy338;
		}
	}
yy329:
	++p;
yy330:
	{ return 0; }
yy331:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= '\n') {
		if (yych <= 0x00) goto yy330;
		if (yych <= '\t') goto yy340;
		goto yy330;
	} else {
		if (yych <= 0x7F) goto yy340;
		if (yych <= 0xC1) goto yy330;
		if (yych <= 0xF4) goto yy340;
		goto yy330;
	}
yy332:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy330;
	if (yych <= 0xBF) goto yy339;
	goto yy330;
yy333:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x9F) goto yy330;
	if (yych <= 0xBF) goto yy345;
	goto yy330;
yy334:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy330;
	if (yych <= 0xBF) goto yy345;
	goto yy330;
yy335:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy330;
	if (yych <= 0x9F) goto yy345;
	goto yy330;
yy336:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x8F) goto yy330;
	if (yych <= 0xBF) goto yy347;
	goto yy330;
yy337:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy330;
	if (yych <= 0xBF) goto yy347;
	goto yy330;
yy338:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy330;
	if (yych <= 0x8F) goto yy347;
	goto yy330;
yy339:
	yych = *++p;
yy340:
	if (yybm[0+yych] & 64) {
		goto yy339;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy341;
			if (yych <= '>') goto yy342;
		} else {
			if (yych <= 0xDF) goto yy345;
			if (yych <= 0xE0) goto yy346;
			goto yy347;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy348;
			if (yych <= 0xEF) goto yy347;
			goto yy349;
		} else {
			if (yych <= 0xF3) goto yy350;
			if (yych <= 0xF4) goto yy351;
		}
	}
yy341:
	p = marker;
	if (yyaccept == 0) {
		goto yy330;
	} else {
		goto yy344;
	}
yy342:
	yyaccept = 1;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 64) {
		goto yy339;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy344;
			if (yych <= '>') goto yy342;
		} else {
			if (yych <= 0xDF) goto yy345;
			if (yych <= 0xE0) goto yy346;
			goto yy347;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy348;
			if (yych <= 0xEF) goto yy347;
			goto yy349;
		} else {
			if (yych <= 0xF3) goto yy350;
			if (yych <= 0xF4) goto yy351;
		}
	}
yy344:
	{ return (bufsize_t)(p - start); }
yy345:
	yych = *++p;
	if (yych <= 0x7F) goto yy341;
	if (yych <= 0xBF) goto yy339;
	goto yy341;
yy346:
	yych = *++p;
	if (yych <= 0x9F) goto yy341;
	if (yych <= 0xBF) goto yy345;
	goto yy341;
yy347:
	yych = *++p;
	if (yych <= 0x7F) goto yy341;
	if (yych <= 0xBF) goto yy345;
	goto yy341;
yy348:
	yych = *++p;
	if (yych <= 0x7F) goto yy341;
	if (yych <= 0x9F) goto yy345;
	goto yy341;
yy349:
	yych = *++p;
	if (yych <= 0x8F) goto yy341;
	if (yych <= 0xBF) goto yy347;
	goto yy341;
yy350:
	yych = *++p;
	if (yych <= 0x7F) goto yy341;
	if (yych <= 0xBF) goto yy347;
	goto yy341;
yy351:
	yych = *++p;
	if (yych <= 0x7F) goto yy341;
	if (yych <= 0x8F) goto yy347;
	goto yy341;
}

}

// Try to match an HTML block tag start line, returning
// an integer code for the type of block (1-6, matching the spec).
// #7 is handled by a separate function, below.
bufsize_t _scan_html_block_start(const unsigned char *p)
{
  const unsigned char *marker = NULL;

{
	unsigned char yych;
	yych = *p;
	if (yych == '<') goto yy356;
	++p;
yy355:
	{ return 0; }
yy356:
	yych = *(marker = ++p);
	switch (yych) {
	case '!':	goto yy357;
	case '/':	goto yy359;
	case '?':	goto yy360;
	case 'A':
	case 'a':	goto yy362;
	case 'B':
	case 'b':	goto yy363;
	case 'C':
	case 'c':	goto yy364;
	case 'D':
	case 'd':	goto yy365;
	case 'F':
	case 'f':	goto yy366;
	case 'H':
	case 'h':	goto yy367;
	case 'I':
	case 'i':	goto yy368;
	case 'L':
	case 'l':	goto yy369;
	case 'M':
	case 'm':	goto yy370;
	case 'N':
	case 'n':	goto yy371;
	case 'O':
	case 'o':	goto yy372;
	case 'P':
	case 'p':	goto yy373;
	case 'S':
	case 's':	goto yy374;
	case 'T':
	case 't':	goto yy375;
	case 'U':
	case 'u':	goto yy376;
	default:	goto yy355;
	}
yy357:
	yych = *++p;
	if (yych <= '@') {
		if (yych == '-') goto yy377;
	} else {
		if (yych <= 'Z') goto yy378;
		if (yych <= '[') goto yy380;
	}
yy358:
	p = marker;
	goto yy355;
yy359:
	yych = *++p;
	switch (yych) {
	case 'A':
	case 'a':	goto yy362;
	case 'B':
	case 'b':	goto yy363;
	case 'C':
	case 'c':	goto yy364;
	case 'D':
	case 'd':	goto yy365;
	case 'F':
	case 'f':	goto yy366;
	case 'H':
	case 'h':	goto yy367;
	case 'I':
	case 'i':	goto yy368;
	case 'L':
	case 'l':	goto yy369;
	case 'M':
	case 'm':	goto yy370;
	case 'N':
	case 'n':	goto yy371;
	case 'O':
	case 'o':	goto yy372;
	case 'P':
	case 'p':	goto yy381;
	case 'S':
	case 's':	goto yy382;
	case 'T':
	case 't':	goto yy375;
	case 'U':
	case 'u':	goto yy376;
	default:	goto yy358;
	}
yy360:
	++p;
	{ return 3; }
yy362:
	yych = *++p;
	if (yych <= 'S') {
		if (yych <= 'D') {
			if (yych <= 'C') goto yy358;
			goto yy383;
		} else {
			if (yych <= 'Q') goto yy358;
			if (yych <= 'R') goto yy384;
			goto yy385;
		}
	} else {
		if (yych <= 'q') {
			if (yych == 'd') goto yy383;
			goto yy358;
		} else {
			if (yych <= 'r') goto yy384;
			if (yych <= 's') goto yy385;
			goto yy358;
		}
	}
yy363:
	yych = *++p;
	if (yych <= 'O') {
		if (yych <= 'K') {
			if (yych == 'A') goto yy386;
			goto yy358;
		} else {
			if (yych <= 'L') goto yy387;
			if (yych <= 'N') goto yy358;
			goto yy388;
		}
	} else {
		if (yych <= 'k') {
			if (yych == 'a') goto yy386;
			goto yy358;
		} else {
			if (yych <= 'l') goto yy387;
			if (yych == 'o') goto yy388;
			goto yy358;
		}
	}
yy364:
	yych = *++p;
	if (yych <= 'O') {
		if (yych <= 'D') {
			if (yych == 'A') goto yy389;
			goto yy358;
		} else {
			if (yych <= 'E') goto yy390;
			if (yych <= 'N') goto yy358;
			goto yy391;
		}
	} else {
		if (yych <= 'd') {
			if (yych == 'a') goto yy389;
			goto yy358;
		} else {
			if (yych <= 'e') goto yy390;
			if (yych == 'o') goto yy391;
			goto yy358;
		}
	}
yy365:
	yych = *++p;
	switch (yych) {
	case 'D':
	case 'L':
	case 'T':
	case 'd':
	case 'l':
	case 't':	goto yy392;
	case 'E':
	case 'e':	goto yy393;
	case 'I':
	case 'i':	goto yy394;
	default:	goto yy358;
	}
yy366:
	yych = *++p;
	if (yych <= 'R') {
		if (yych <= 'N') {
			if (yych == 'I') goto yy395;
			goto yy358;
		} else {
			if (yych <= 'O') goto yy396;
			if (yych <= 'Q') goto yy358;
			goto yy397;
		}
	} else {
		if (yych <= 'n') {
			if (yych == 'i') goto yy395;
			goto yy358;
		} else {
			if (yych <= 'o') goto yy396;
			if (yych == 'r') goto yy397;
			goto yy358;
		}
	}
yy367:
	yych = *++p;
	if (yych <= 'S') {
		if (yych <= 'D') {
			if (yych <= '0') goto yy358;
			if (yych <= '6') goto yy392;
			goto yy358;
		} else {
			if (yych <= 'E') goto yy398;
			if (yych == 'R') goto yy392;
			goto yy358;
		}
	} else {
		if (yych <= 'q') {
			if (yych <= 'T') goto yy399;
			if (yych == 'e') goto yy398;
			goto yy358;
		} else {
			if (yych <= 'r') goto yy392;
			if (yych == 't') goto yy399;
			goto yy358;
		}
	}
yy368:
	yych = *++p;
	if (yych == 'F') goto yy400;
	if (yych == 'f') goto yy400;
	goto yy358;
yy369:
	yych = *++p;
	if (yych <= 'I') {
		if (yych == 'E') goto yy401;
		if (yych <= 'H') goto yy358;
		goto yy402;
	} else {
		if (yych <= 'e') {
			if (yych <= 'd') goto yy358;
			goto yy401;
		} else {
			if (yych == 'i') goto yy402;
			goto yy358;
		}
	}
yy370:
	yych = *++p;
	if (yych <= 'E') {
		if (yych == 'A') goto yy403;
		if (yych <= 'D') goto yy358;
		goto yy404;
	} else {
		if (yych <= 'a') {
			if (yych <= '`') goto yy358;
			goto yy403;
		} else {
			if (yych == 'e') goto yy404;
			goto yy358;
		}
	}
yy371:
	yych = *++p;
	if (yych <= 'O') {
		if (yych == 'A') goto yy405;
		if (yych <= 'N') goto yy358;
		goto yy406;
	} else {
		if (yych <= 'a') {
			if (yych <= '`') goto yy358;
			goto yy405;
		} else {
			if (yych == 'o') goto yy406;
			goto yy358;
		}
	}
yy372:
	yych = *++p;
	if (yych <= 'P') {
		if (yych == 'L') goto yy392;
		if (yych <= 'O') goto yy358;
		goto yy407;
	} else {
		if (yych <= 'l') {
			if (yych <= 'k') goto yy358;
			goto yy392;
		} else {
			if (yych == 'p') goto yy407;
			goto yy358;
		}
	}
yy373:
	yych = *++p;
	if (yych <= '>') {
		if (yych <= ' ') {
			if (yych <= 0x08) goto yy358;
			if (yych <= '\r') goto yy408;
			if (yych <= 0x1F) goto yy358;
			goto yy408;
		} else {
			if (yych == '/') goto yy410;
			if (yych <= '=') goto yy358;
			goto yy408;
		}
	} else {
		if (yych <= 'R') {
			if (yych == 'A') goto yy411;
			if (yych <= 'Q') goto yy358;
			goto yy412;
		} else {
			if (yych <= 'a') {
				if (yych <= '`') goto yy358;
				goto yy411;
			} else {
				if (yych == 'r') goto yy412;
				goto yy358;
			}
		}
	}
yy374:
	yych = *++p;
	switch (yych) {
	case 'C':
	case 'c':	goto yy413;
	case 'E':
	case 'e':	goto yy414;
	case 'O':
	case 'o':	goto yy415;
	case 'T':
	case 't':	goto yy416;
	case 'U':
	case 'u':	goto yy417;
	default:	goto yy358;
	}
yy375:
	yych = *++p;
	switch (yych) {
	case 'A':
	case 'a':	goto yy418;
	case 'B':
	case 'b':	goto yy419;
	case 'D':
	case 'd':	goto yy392;
	case 'F':
	case 'f':	goto yy420;
	case 'H':
	case 'h':	goto yy421;
	case 'I':
	case 'i':	goto yy422;
	case 'R':
	case 'r':	goto yy423;
	default:	goto yy358;
	}
yy376:
	yych = *++p;
	if (yych == 'L') goto yy392;
	if (yych == 'l') goto yy392;
	goto yy358;
yy377:
	yych = *++p;
	if (yych == '-') goto yy424;
	goto yy358;
yy378:
	++p;
	{ return 4; }
yy380:
	yych = *++p;
	if (yych == 'C') goto yy426;
	if (yych == 'c') goto yy426;
	goto yy358;
yy381:
	yych = *++p;
	if (yych <= '/') {
		if (yych <= 0x1F) {
			if (yych <= 0x08) goto yy358;
			if (yych <= '\r') goto yy408;
			goto yy358;
		} else {
			if (yych <= ' ') goto yy408;
			if (yych <= '.') goto yy358;
			goto yy410;
		}
	} else {
		if (yych <= '@') {
			if (yych == '>') goto yy408;
			goto yy358;
		} else {
			if (yych <= 'A') goto yy411;
			if (yych == 'a') goto yy411;
			goto yy358;
		}
	}
yy382:
	yych = *++p;
	if (yych <= 'U') {
		if (yych <= 'N') {
			if (yych == 'E') goto yy414;
			goto yy358;
		} else {
			if (yych <= 'O') goto yy415;
			if (yych <= 'T') goto yy358;
			goto yy417;
		}
	} else {
		if (yych <= 'n') {
			if (yych == 'e') goto yy414;
			goto yy358;
		} else {
			if (yych <= 'o') goto yy415;
			if (yych == 'u') goto yy417;
			goto yy358;
		}
	}
yy383:
	yych = *++p;
	if (yych == 'D') goto yy427;
	if (yych == 'd') goto yy427;
	goto yy358;
yy384:
	yych = *++p;
	if (yych == 'T') goto yy428;
	if (yych == 't') goto yy428;
	goto yy358;
yy385:
	yych = *++p;
	if (yych == 'I') goto yy429;
	if (yych == 'i') goto yy429;
	goto yy358;
yy386:
	yych = *++p;
	if (yych == 'S') goto yy430;
	if (yych == 's') goto yy430;
	goto yy358;
yy387:
	yych = *++p;
	if (yych == 'O') goto yy431;
	if (yych == 'o') goto yy431;
	goto yy358;
yy388:
	yych = *++p;
	if (yych == 'D') goto yy432;
	if (yych == 'd') goto yy432;
	goto yy358;
yy389:
	yych = *++p;
	if (yych == 'P') goto yy433;
	if (yych == 'p') goto yy433;
	goto yy358;
yy390:
	yych = *++p;
	if (yych == 'N') goto yy434;
	if (yych == 'n') goto yy434;
	goto yy358;
yy391:
	yych = *++p;
	if (yych == 'L') goto yy435;
	if (yych == 'l') goto yy435;
	goto yy358;
yy392:
	yych = *++p;
	if (yych <= ' ') {
		if (yych <= 0x08) goto yy358;
		if (yych <= '\r') goto yy408;
		if (yych <= 0x1F) goto yy358;
		goto yy408;
	} else {
		if (yych <= '/') {
			if (yych <= '.') goto yy358;
			goto yy410;
		} else {
			if (yych == '>') goto yy408;
			goto yy358;
		}
	}
yy393:
	yych = *++p;
	if (yych == 'T') goto yy436;
	if (yych == 't') goto yy436;
	goto yy358;
yy394:
	yych = *++p;
	if (yych <= 'V') {
		if (yych <= 'Q') {
			if (yych == 'A') goto yy437;
			goto yy358;
		} else {
			if (yych <= 'R') goto yy392;
			if (yych <= 'U') goto yy358;
			goto yy392;
		}
	} else {
		if (yych <= 'q') {
			if (yych == 'a') goto yy437;
			goto yy358;
		} else {
			if (yych <= 'r') goto yy392;
			if (yych == 'v') goto yy392;
			goto yy358;
		}
	}
yy395:
	yych = *++p;
	if (yych <= 'G') {
		if (yych == 'E') goto yy438;
		if (yych <= 'F') goto yy358;
		goto yy439;
	} else {
		if (yych <= 'e') {
			if (yych <= 'd') goto yy358;
			goto yy438;
		} else {
			if (yych == 'g') goto yy439;
			goto yy358;
		}
	}
yy396:
	yych = *++p;
	if (yych <= 'R') {
		if (yych == 'O') goto yy434;
		if (yych <= 'Q') goto yy358;
		goto yy440;
	} else {
		if (yych <= 'o') {
			if (yych <= 'n') goto yy358;
			goto yy434;
		} else {
			if (yych == 'r') goto yy440;
			goto yy358;
		}
	}
yy397:
	yych = *++p;
	if (yych == 'A') goto yy441;
	if (yych == 'a') goto yy441;
	goto yy358;
yy398:
	yych = *++p;
	if (yych == 'A') goto yy442;
	if (yych == 'a') goto yy442;
	goto yy358;
yy399:
	yych = *++p;
	if (yych == 'M') goto yy376;
	if (yych == 'm') goto yy376;
	goto yy358;
yy400:
	yych = *++p;
	if (yych == 'R') goto yy443;
	if (yych == 'r') goto yy443;
	goto yy358;
yy401:
	yych = *++p;
	if (yych == 'G') goto yy444;
	if (yych == 'g') goto yy444;
	goto yy358;
yy402:
	yych = *++p;
	if (yych <= '/') {
		if (yych <= 0x1F) {
			if (yych <= 0x08) goto yy358;
			if (yych <= '\r') goto yy408;
			goto yy358;
		} else {
			if (yych <= ' ') goto yy408;
			if (yych <= '.') goto yy358;
			goto yy410;
		}
	} else {
		if (yych <= 'M') {
			if (yych == '>') goto yy408;
			goto yy358;
		} else {
			if (yych <= 'N') goto yy445;
			if (yych == 'n') goto yy445;
			goto yy358;
		}
	}
yy403:
	yych = *++p;
	if (yych == 'I') goto yy446;
	if (yych == 'i') goto yy446;
	goto yy358;
yy404:
	yych = *++p;
	if (yych == 'N') goto yy447;
	if (yych == 'n') goto yy447;
	goto yy358;
yy405:
	yych = *++p;
	if (yych == 'V') goto yy392;
	if (yych == 'v') goto yy392;
	goto yy358;
yy406:
	yych = *++p;
	if (yych == 'F') goto yy448;
	if (yych == 'f') goto yy448;
	goto yy358;
yy407:
	yych = *++p;
	if (yych == 'T') goto yy449;
	if (yych == 't') goto yy449;
	goto yy358;
yy408:
	++p;
	{ return 6; }
yy410:
	yych = *++p;
	if (yych == '>') goto yy408;
	goto yy358;
yy411:
	yych = *++p;
	if (yych == 'R') goto yy450;
	if (yych == 'r') goto yy450;
	goto yy358;
yy412:
	yych = *++p;
	if (yych == 'E') goto yy451;
	if (yych == 'e') goto yy451;
	goto yy358;
yy413:
	yych = *++p;
	if (yych == 'R') goto yy452;
	if (yych == 'r') goto yy452;
	goto yy358;
yy414:
	yych = *++p;
	if (yych == 'C') goto yy433;
	if (yych == 'c') goto yy433;
	goto yy358;
yy415:
	yych = *++p;
	if (yych == 'U') goto yy453;
	if (yych == 'u') goto yy453;
	goto yy358;
yy416:
	yych = *++p;
	if (yych == 'Y') goto yy454;
	if (yych == 'y') goto yy454;
	goto yy358;
yy417:
	yych = *++p;
	if (yych == 'M') goto yy455;
	if (yych == 'm') goto yy455;
	goto yy358;
yy418:
	yych = *++p;
	if (yych == 'B') goto yy456;
	if (yych == 'b') goto yy456;
	goto yy358;
yy419:
	yych = *++p;
	if (yych == 'O') goto yy388;
	if (yych == 'o') goto yy388;
	goto yy358;
yy420:
	yych = *++p;
	if (yych == 'O') goto yy457;
	if (yych == 'o') goto yy457;
	goto yy358;
yy421:
	yych = *++p;
	if (yych <= '/') {
		if (yych <= 0x1F) {
			if (yych <= 0x08) goto yy358;
			if (yych <= '\r') goto yy408;
			goto yy358;
		} else {
			if (yych <= ' ') goto yy408;
			if (yych <= '.') goto yy358;
			goto yy410;
		}
	} else {
		if (yych <= 'D') {
			if (yych == '>') goto yy408;
			goto yy358;
		} else {
			if (yych <= 'E') goto yy458;
			if (yych == 'e') goto yy458;
			goto yy358;
		}
	}
yy422:
	yych = *++p;
	if (yych == 'T') goto yy456;
	if (yych == 't') goto yy456;
	goto yy358;
yy423:
	yych = *++p;
	if (yych <= '/') {
		if (yych <= 0x1F) {
			if (yych <= 0x08) goto yy358;
			if (yych <= '\r') goto yy408;
			goto yy358;
		} else {
			if (yych <= ' ') goto yy408;
			if (yych <= '.') goto yy358;
			goto yy410;
		}
	} else {
		if (yych <= '@') {
			if (yych == '>') goto yy408;
			goto yy358;
		} else {
			if (yych <= 'A') goto yy459;
			if (yych == 'a') goto yy459;
			goto yy358;
		}
	}
yy424:
	++p;
	{ return 2; }
yy426:
	yych = *++p;
	if (yych == 'D') goto yy460;
	if (yych == 'd') goto yy460;
	goto yy358;
yy427:
	yych = *++p;
	if (yych == 'R') goto yy461;
	if (yych == 'r') goto yy461;
	goto yy358;
yy428:
	yych = *++p;
	if (yych == 'I') goto yy462;
	if (yych == 'i') goto yy462;
	goto yy358;
yy429:
	yych = *++p;
	if (yych == 'D') goto yy463;
	if (yych == 'd') goto yy463;
	goto yy358;
yy430:
	yych = *++p;
	if (yych == 'E') goto yy464;
	if (yych == 'e') goto yy464;
	goto yy358;
yy431:
	yych = *++p;
	if (yych == 'C') goto yy465;
	if (yych == 'c') goto yy465;
	goto yy358;
yy432:
	yych = *++p;
	if (yych == 'Y') goto yy392;
	if (yych == 'y') goto yy392;
	goto yy358;
yy433:
	yych = *++p;
	if (yych == 'T') goto yy466;
	if (yych == 't') goto yy466;
	goto yy358;
yy434:
	yych = *++p;
	if (yych == 'T') goto yy467;
	if (yych == 't') goto yy467;
	goto yy358;
yy435:
	yych = *++p;
	if (yych <= '/') {
		if (yych <= 0x1F) {
			if (yych <= 0x08) goto yy358;
			if (yych <= '\r') goto yy408;
			goto yy358;
		} else {
			if (yych <= ' ') goto yy408;
			if (yych <= '.') goto yy358;
			goto yy410;
		}
	} else {
		if (yych <= 'F') {
			if (yych == '>') goto yy408;
			goto yy358;
		} else {
			if (yych <= 'G') goto yy468;
			if (yych == 'g') goto yy468;
			goto yy358;
		}
	}
yy436:
	yych = *++p;
	if (yych == 'A') goto yy469;
	if (yych == 'a') goto yy469;
	goto yy358;
yy437:
	yych = *++p;
	if (yych == 'L') goto yy470;
	if (yych == 'l') goto yy470;
	goto yy358;
yy438:
	yych = *++p;
	if (yych == 'L') goto yy471;
	if (yych == 'l') goto yy471;
	goto yy358;
yy439:
	yych = *++p;
	if (yych <= 'U') {
		if (yych == 'C') goto yy472;
		if (yych <= 'T') goto yy358;
		goto yy473;
	} else {
		if (yych <= 'c') {
			if (yych <= 'b') goto yy358;
			goto yy472;
		} else {
			if (yych == 'u') goto yy473;
			goto yy358;
		}
	}
yy440:
	yych = *++p;
	if (yych == 'M') goto yy392;
	if (yych == 'm') goto yy392;
	goto yy358;
yy441:
	yych = *++p;
	if (yych == 'M') goto yy474;
	if (yych == 'm') goto yy474;
	goto yy358;
yy442:
	yych = *++p;
	if (yych == 'D') goto yy475;
	if (yych == 'd') goto yy475;
	goto yy358;
yy443:
	yych = *++p;
	if (yych == 'A') goto yy476;
	if (yych == 'a') goto yy476;
	goto yy358;
yy444:
	yych = *++p;
	if (yych == 'E') goto yy477;
	if (yych == 'e') goto yy477;
	goto yy358;
yy445:
	yych = *++p;
	if (yych == 'K') goto yy392;
	if (yych == 'k') goto yy392;
	goto yy358;
yy446:
	yych = *++p;
	if (yych == 'N') goto yy392;
	if (yych == 'n') goto yy392;
	goto yy358;
yy447:
	yych = *++p;
	if (yych == 'U') goto yy478;
	if (yych == 'u') goto yy478;
	goto yy358;
yy448:
	yych = *++p;
	if (yych == 'R') goto yy479;
	if (yych == 'r') goto yy479;
	goto yy358;
yy449:
	yych = *++p;
	if (yych <= 'I') {
		if (yych == 'G') goto yy468;
		if (yych <= 'H') goto yy358;
		goto yy480;
	} else {
		if (yych <= 'g') {
			if (yych <= 'f') goto yy358;
			goto yy468;
		} else {
			if (yych == 'i') goto yy480;
			goto yy358;
		}
	}
yy450:
	yych = *++p;
	if (yych == 'A') goto yy440;
	if (yych == 'a') goto yy440;
	goto yy358;
yy451:
	yych = *++p;
	if (yych <= 0x1F) {
		if (yych <= 0x08) goto yy358;
		if (yych <= '\r') goto yy481;
		goto yy358;
	} else {
		if (yych <= ' ') goto yy481;
		if (yych == '>') goto yy481;
		goto yy358;
	}
yy452:
	yych = *++p;
	if (yych == 'I') goto yy483;
	if (yych == 'i') goto yy483;
	goto yy358;
yy453:
	yych = *++p;
	if (yych == 'R') goto yy484;
	if (yych == 'r') goto yy484;
	goto yy358;
yy454:
	yych = *++p;
	if (yych == 'L') goto yy412;
	if (yych == 'l') goto yy412;
	goto yy358;
yy455:
	yych = *++p;
	if (yych == 'M') goto yy485;
	if (yych == 'm') goto yy485;
	goto yy358;
yy456:
	yych = *++p;
	if (yych == 'L') goto yy463;
	if (yych == 'l') goto yy463;
	goto yy358;
yy457:
	yych = *++p;
	if (yych == 'O') goto yy486;
	if (yych == 'o') goto yy486;
	goto yy358;
yy458:
	yych = *++p;
	if (yych == 'A') goto yy487;
	if (yych == 'a') goto yy487;
	goto yy358;
yy459:
	yych = *++p;
	if (yych == 'C') goto yy445;
	if (yych == 'c') goto yy445;
	goto yy358;
yy460:
	yych = *++p;
	if (yych == 'A') goto yy488;
	if (yych == 'a') goto yy488;
	goto yy358;
yy461:
	yych = *++p;
	if (yych == 'E') goto yy489;
	if (yych == 'e') goto yy489;
	goto yy358;
yy462:
	yych = *++p;
	if (yych == 'C') goto yy456;
	if (yych == 'c') goto yy456;
	goto yy358;
yy463:
	yych = *++p;
	if (yych == 'E') goto yy392;
	if (yych == 'e') goto yy392;
	goto yy358;
yy464:
	yych = *++p;
	if (yych <= '/') {
		if (yych <= 0x1F) {
			if (yych <= 0x08) goto yy358;
			if (yych <= '\r') goto yy408;
			goto yy358;
		} else {
			if (yych <= ' ') goto yy408;
			if (yych <= '.') goto yy358;
			goto yy410;
		}
	} else {
		if (yych <= 'E') {
			if (yych == '>') goto yy408;
			goto yy358;
		} else {
			if (yych <= 'F') goto yy490;
			if (yych == 'f') goto yy490;
			goto yy358;
		}
	}
yy465:
	yych = *++p;
	if (yych == 'K') goto yy491;
	if (yych == 'k') goto yy491;
	goto yy358;
yy466:
	yych = *++p;
	if (yych == 'I') goto yy480;
	if (yych == 'i') goto yy480;
	goto yy358;
yy467:
	yych = *++p;
	if (yych == 'E') goto yy492;
	if (yych == 'e') goto yy492;
	goto yy358;
yy468:
	yych = *++p;
	if (yych == 'R') goto yy493;
	if (yych == 'r') goto yy493;
	goto yy358;
yy469:
	yych = *++p;
	if (yych == 'I') goto yy494;
	if (yych == 'i') goto yy494;
	goto yy358;
yy470:
	yych = *++p;
	if (yych == 'O') goto yy495;
	if (yych == 'o') goto yy495;
	goto yy358;
yy471:
	yych = *++p;
	if (yych == 'D') goto yy496;
	if (yych == 'd') goto yy496;
	goto yy358;
yy472:
	yych = *++p;
	if (yych == 'A') goto yy389;
	if (yych == 'a') goto yy389;
	goto yy358;
yy473:
	yych = *++p;
	if (yych == 'R') goto yy463;
	if (yych == 'r') goto yy463;
	goto yy358;
yy474:
	yych = *++p;
	if (yych == 'E') goto yy497;
	if (yych == 'e') goto yy497;
	goto yy358;
yy475:
	yych = *++p;
	if (yych <= '/') {
		if (yych <= 0x1F) {
			if (yych <= 0x08) goto yy358;
			if (yych <= '\r') goto yy408;
			goto yy358;
		} else {
			if (yych <= ' ') goto yy408;
			if (yych <= '.') goto yy358;
			goto yy410;
		}
	} else {
		if (yych <= 'D') {
			if (yych == '>') goto yy408;
			goto yy358;
		} else {
			if (yych <= 'E') goto yy492;
			if (yych == 'e') goto yy492;
			goto yy358;
		}
	}
yy476:
	yych = *++p;
	if (yych == 'M') goto yy463;
	if (yych == 'm') goto yy463;
	goto yy358;
yy477:
	yych = *++p;
	if (yych == 'N') goto yy487;
	if (yych == 'n') goto yy487;
	goto yy358;
yy478:
	yych = *++p;
	if (yych <= '/') {
		if (yych <= 0x1F) {
			if (yych <= 0x08) goto yy358;
			if (yych <= '\r') goto yy408;
			goto yy358;
		} else {
			if (yych <= ' ') goto yy408;
			if (yych <= '.') goto yy358;
			goto yy410;
		}
	} else {
		if (yych <= 'H') {
			if (yych == '>') goto yy408;
			goto yy358;
		} else {
			if (yych <= 'I') goto yy498;
			if (yych == 'i') goto yy498;
			goto yy358;
		}
	}
yy479:
	yych = *++p;
	if (yych == 'A') goto yy499;
	if (yych == 'a') goto yy499;
	goto yy358;
yy480:
	yych = *++p;
	if (yych == 'O') goto yy446;
	if (yych == 'o') goto yy446;
	goto yy358;
yy481:
	++p;
	{ return 1; }
yy483:
	yych = *++p;
	if (yych == 'P') goto yy500;
	if (yych == 'p') goto yy500;
	goto yy358;
yy484:
	yych = *++p;
	if (yych == 'C') goto yy463;
	if (yych == 'c') goto yy463;
	goto yy358;
yy485:
	yych = *++p;
	if (yych == 'A') goto yy501;
	if (yych == 'a') goto yy501;
	goto yy358;
yy486:
	yych = *++p;
	if (yych == 'T') goto yy392;
	if (yych == 't') goto yy392;
	goto yy358;
yy487:
	yych = *++p;
	if (yych == 'D') goto yy392;
	if (yych == 'd') goto yy392;
	goto yy358;
yy488:
	yych = *++p;
	if (yych == 'T') goto yy502;
	if (yych == 't') goto yy502;
	goto yy358;
yy489:
	yych = *++p;
	if (yych == 'S') goto yy503;
	if (yych == 's') goto yy503;
	goto yy358;
yy490:
	yych = *++p;
	if (yych == 'O') goto yy504;
	if (yych == 'o') goto yy504;
	goto yy358;
yy491:
	yych = *++p;
	if (yych == 'Q') goto yy505;
	if (yych == 'q') goto yy505;
	goto yy358;
yy492:
	yych = *++p;
	if (yych == 'R') goto yy392;
	if (yych == 'r') goto yy392;
	goto yy358;
yy493:
	yych = *++p;
	if (yych == 'O') goto yy506;
	if (yych == 'o') goto yy506;
	goto yy358;
yy494:
	yych = *++p;
	if (yych == 'L') goto yy503;
	if (yych == 'l') goto yy503;
	goto yy358;
yy495:
	yych = *++p;
	if (yych == 'G') goto yy392;
	if (yych == 'g') goto yy392;
	goto yy358;
yy496:
	yych = *++p;
	if (yych == 'S') goto yy507;
	if (yych == 's') goto yy507;
	goto yy358;
yy497:
	yych = *++p;
	if (yych <= '/') {
		if (yych <= 0x1F) {
			if (yych <= 0x08) goto yy358;
			if (yych <= '\r') goto yy408;
			goto yy358;
		} else {
			if (yych <= ' ') goto yy408;
			if (yych <= '.') goto yy358;
			goto yy410;
		}
	} else {
		if (yych <= 'R') {
			if (yych == '>') goto yy408;
			goto yy358;
		} else {
			if (yych <= 'S') goto yy507;
			if (yych == 's') goto yy507;
			goto yy358;
		}
	}
yy498:
	yych = *++p;
	if (yych == 'T') goto yy508;
	if (yych == 't') goto yy508;
	goto yy358;
yy499:
	yych = *++p;
	if (yych == 'M') goto yy509;
	if (yych == 'm') goto yy509;
	goto yy358;
yy500:
	yych = *++p;
	if (yych == 'T') goto yy451;
	if (yych == 't') goto yy451;
	goto yy358;
yy501:
	yych = *++p;
	if (yych == 'R') goto yy432;
	if (yych == 'r') goto yy432;
	goto yy358;
yy502:
	yych = *++p;
	if (yych == 'A') goto yy510;
	if (yych == 'a') goto yy510;
	goto yy358;
yy503:
	yych = *++p;
	if (yych == 'S') goto yy392;
	if (yych == 's') goto yy392;
	goto yy358;
yy504:
	yych = *++p;
	if (yych == 'N') goto yy486;
	if (yych == 'n') goto yy486;
	goto yy358;
yy505:
	yych = *++p;
	if (yych == 'U') goto yy511;
	if (yych == 'u') goto yy511;
	goto yy358;
yy506:
	yych = *++p;
	if (yych == 'U') goto yy512;
	if (yych == 'u') goto yy512;
	goto yy358;
yy507:
	yych = *++p;
	if (yych == 'E') goto yy486;
	if (yych == 'e') goto yy486;
	goto yy358;
yy508:
	yych = *++p;
	if (yych == 'E') goto yy440;
	if (yych == 'e') goto yy440;
	goto yy358;
yy509:
	yych = *++p;
	if (yych == 'E') goto yy503;
	if (yych == 'e') goto yy503;
	goto yy358;
yy510:
	yych = *++p;
	if (yych == '[') goto yy513;
	goto yy358;
yy511:
	yych = *++p;
	if (yych == 'O') goto yy515;
	if (yych == 'o') goto yy515;
	goto yy358;
yy512:
	yych = *++p;
	if (yych == 'P') goto yy392;
	if (yych == 'p') goto yy392;
	goto yy358;
yy513:
	++p;
	{ return 5; }
yy515:
	yych = *++p;
	if (yych == 'T') goto yy463;
	if (yych == 't') goto yy463;
	goto yy358;
}

}

// Try to match an HTML block tag start line of type 7, returning
// 7 if successful, 0 if not.
bufsize_t _scan_html_block_start_7(const unsigned char *p)
{
  const unsigned char *marker = NULL;

{
	unsigned char yych;
	unsigned int yyaccept = 0;
	static const unsigned char yybm[] = {
		  0, 224, 224, 224, 224, 224, 224, 224, 
		224, 198, 210, 194, 198, 194, 224, 224, 
		224, 224, 224, 224, 224, 224, 224, 224, 
		224, 224, 224, 224, 224, 224, 224, 224, 
		198, 224, 128, 224, 224, 224, 224,  64, 
		224, 224, 224, 224, 224, 233, 232, 224, 
		233, 233, 233, 233, 233, 233, 233, 233, 
		233, 233, 232, 224, 192, 192, 192, 224, 
		224, 233, 233, 233, 233, 233, 233, 233, 
		233, 233, 233, 233, 233, 233, 233, 233, 
		233, 233, 233, 233, 233, 233, 233, 233, 
		233, 233, 233, 224, 224, 224, 224, 232, 
		192, 233, 233, 233, 233, 233, 233, 233, 
		233, 233, 233, 233, 233, 233, 233, 233, 
		233, 233, 233, 233, 233, 233, 233, 233, 
		233, 233, 233, 224, 224, 224, 224, 224, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych == '<') goto yy520;
	++p;
yy519:
	{ return 0; }
yy520:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= '@') {
		if (yych != '/') goto yy519;
	} else {
		if (yych <= 'Z') goto yy523;
		if (yych <= '`') goto yy519;
		if (yych <= 'z') goto yy523;
		goto yy519;
	}
	yych = *++p;
	if (yych <= '@') goto yy522;
	if (yych <= 'Z') goto yy525;
	if (yych <= '`') goto yy522;
	if (yych <= 'z') goto yy525;
yy522:
	p = marker;
	if (yyaccept == 0) {
		goto yy519;
	} else {
		goto yy538;
	}
yy523:
	yych = *++p;
	if (yybm[0+yych] & 2) {
		goto yy527;
	}
	if (yych <= '=') {
		if (yych <= '.') {
			if (yych == '-') goto yy523;
			goto yy522;
		} else {
			if (yych <= '/') goto yy529;
			if (yych <= '9') goto yy523;
			goto yy522;
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '>') goto yy530;
			if (yych <= '@') goto yy522;
			goto yy523;
		} else {
			if (yych <= '`') goto yy522;
			if (yych <= 'z') goto yy523;
			goto yy522;
		}
	}
yy525:
	yych = *++p;
	if (yych <= '/') {
		if (yych <= 0x1F) {
			if (yych <= 0x08) goto yy522;
			if (yych <= '\r') goto yy532;
			goto yy522;
		} else {
			if (yych <= ' ') goto yy532;
			if (yych == '-') goto yy525;
			goto yy522;
		}
	} else {
		if (yych <= '@') {
			if (yych <= '9') goto yy525;
			if (yych == '>') goto yy530;
			goto yy522;
		} else {
			if (yych <= 'Z') goto yy525;
			if (yych <= '`') goto yy522;
			if (yych <= 'z') goto yy525;
			goto yy522;
		}
	}
yy527:
	yych = *++p;
	if (yybm[0+yych] & 2) {
		goto yy527;
	}
	if (yych <= '>') {
		if (yych <= '9') {
			if (yych != '/') goto yy522;
		} else {
			if (yych <= ':') goto yy534;
			if (yych <= '=') goto yy522;
			goto yy530;
		}
	} else {
		if (yych <= '^') {
			if (yych <= '@') goto yy522;
			if (yych <= 'Z') goto yy534;
			goto yy522;
		} else {
			if (yych == '`') goto yy522;
			if (yych <= 'z') goto yy534;
			goto yy522;
		}
	}
yy529:
	yych = *++p;
	if (yych != '>') goto yy522;
yy530:
	yych = *++p;
	if (yybm[0+yych] & 4) {
		goto yy530;
	}
	if (yych <= 0x08) goto yy522;
	if (yych <= '\n') goto yy536;
	if (yych <= '\v') goto yy522;
	if (yych <= '\r') goto yy539;
	goto yy522;
yy532:
	yych = *++p;
	if (yych <= 0x1F) {
		if (yych <= 0x08) goto yy522;
		if (yych <= '\r') goto yy532;
		goto yy522;
	} else {
		if (yych <= ' ') goto yy532;
		if (yych == '>') goto yy530;
		goto yy522;
	}
yy534:
	yych = *++p;
	if (yybm[0+yych] & 8) {
		goto yy534;
	}
	if (yych <= ',') {
		if (yych <= '\r') {
			if (yych <= 0x08) goto yy522;
			goto yy540;
		} else {
			if (yych == ' ') goto yy540;
			goto yy522;
		}
	} else {
		if (yych <= '<') {
			if (yych <= '/') goto yy529;
			goto yy522;
		} else {
			if (yych <= '=') goto yy542;
			if (yych <= '>') goto yy530;
			goto yy522;
		}
	}
yy536:
	yyaccept = 1;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 4) {
		goto yy530;
	}
	if (yych <= 0x08) goto yy538;
	if (yych <= '\n') goto yy536;
	if (yych <= '\v') goto yy538;
	if (yych <= '\r') goto yy539;
yy538:
	{ return 7; }
yy539:
	++p;
	goto yy538;
yy540:
	yych = *++p;
	if (yych <= '<') {
		if (yych <= ' ') {
			if (yych <= 0x08) goto yy522;
			if (yych <= '\r') goto yy540;
			if (yych <= 0x1F) goto yy522;
			goto yy540;
		} else {
			if (yych <= '/') {
				if (yych <= '.') goto yy522;
				goto yy529;
			} else {
				if (yych == ':') goto yy534;
				goto yy522;
			}
		}
	} else {
		if (yych <= 'Z') {
			if (yych <= '=') goto yy542;
			if (yych <= '>') goto yy530;
			if (yych <= '@') goto yy522;
			goto yy534;
		} else {
			if (yych <= '_') {
				if (yych <= '^') goto yy522;
				goto yy534;
			} else {
				if (yych <= '`') goto yy522;
				if (yych <= 'z') goto yy534;
				goto yy522;
			}
		}
	}
yy542:
	yych = *++p;
	if (yybm[0+yych] & 32) {
		goto yy544;
	}
	if (yych <= 0xE0) {
		if (yych <= '"') {
			if (yych <= 0x00) goto yy522;
			if (yych <= ' ') goto yy542;
			goto yy546;
		} else {
			if (yych <= '\'') goto yy548;
			if (yych <= 0xC1) goto yy522;
			if (yych <= 0xDF) goto yy550;
			goto yy551;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy553;
			goto yy552;
		} else {
			if (yych <= 0xF0) goto yy554;
			if (yych <= 0xF3) goto yy555;
			if (yych <= 0xF4) goto yy556;
			goto yy522;
		}
	}
yy544:
	yych = *++p;
	if (yybm[0+yych] & 32) {
		goto yy544;
	}
	if (yych <= 0xE0) {
		if (yych <= '=') {
			if (yych <= 0x00) goto yy522;
			if (yych <= ' ') goto yy527;
			goto yy522;
		} else {
			if (yych <= '>') goto yy530;
			if (yych <= 0xC1) goto yy522;
			if (yych <= 0xDF) goto yy550;
			goto yy551;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy553;
			goto yy552;
		} else {
			if (yych <= 0xF0) goto yy554;
			if (yych <= 0xF3) goto yy555;
			if (yych <= 0xF4) goto yy556;
			goto yy522;
		}
	}
yy546:
	yych = *++p;
	if (yybm[0+yych] & 64) {
		goto yy546;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy522;
			if (yych <= '"') goto yy557;
			goto yy522;
		} else {
			if (yych <= 0xDF) goto yy558;
			if (yych <= 0xE0) goto yy559;
			goto yy560;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy561;
			if (yych <= 0xEF) goto yy560;
			goto yy562;
		} else {
			if (yych <= 0xF3) goto yy563;
			if (yych <= 0xF4) goto yy564;
			goto yy522;
		}
	}
yy548:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy548;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy522;
			if (yych <= '\'') goto yy557;
			goto yy522;
		} else {
			if (yych <= 0xDF) goto yy565;
			if (yych <= 0xE0) goto yy566;
			goto yy567;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy568;
			if (yych <= 0xEF) goto yy567;
			goto yy569;
		} else {
			if (yych <= 0xF3) goto yy570;
			if (yych <= 0xF4) goto yy571;
			goto yy522;
		}
	}
yy550:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0xBF) goto yy544;
	goto yy522;
yy551:
	yych = *++p;
	if (yych <= 0x9F) goto yy522;
	if (yych <= 0xBF) goto yy550;
	goto yy522;
yy552:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0xBF) goto yy550;
	goto yy522;
yy553:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0x9F) goto yy550;
	goto yy522;
yy554:
	yych = *++p;
	if (yych <= 0x8F) goto yy522;
	if (yych <= 0xBF) goto yy552;
	goto yy522;
yy555:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0xBF) goto yy552;
	goto yy522;
yy556:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0x8F) goto yy552;
	goto yy522;
yy557:
	yych = *++p;
	if (yybm[0+yych] & 2) {
		goto yy527;
	}
	if (yych == '/') goto yy529;
	if (yych == '>') goto yy530;
	goto yy522;
yy558:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0xBF) goto yy546;
	goto yy522;
yy559:
	yych = *++p;
	if (yych <= 0x9F) goto yy522;
	if (yych <= 0xBF) goto yy558;
	goto yy522;
yy560:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0xBF) goto yy558;
	goto yy522;
yy561:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0x9F) goto yy558;
	goto yy522;
yy562:
	yych = *++p;
	if (yych <= 0x8F) goto yy522;
	if (yych <= 0xBF) goto yy560;
	goto yy522;
yy563:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0xBF) goto yy560;
	goto yy522;
yy564:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0x8F) goto yy560;
	goto yy522;
yy565:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0xBF) goto yy548;
	goto yy522;
yy566:
	yych = *++p;
	if (yych <= 0x9F) goto yy522;
	if (yych <= 0xBF) goto yy565;
	goto yy522;
yy567:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0xBF) goto yy565;
	goto yy522;
yy568:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0x9F) goto yy565;
	goto yy522;
yy569:
	yych = *++p;
	if (yych <= 0x8F) goto yy522;
	if (yych <= 0xBF) goto yy567;
	goto yy522;
yy570:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0xBF) goto yy567;
	goto yy522;
yy571:
	yych = *++p;
	if (yych <= 0x7F) goto yy522;
	if (yych <= 0x8F) goto yy567;
	goto yy522;
}

}

// Try to match an HTML block end line of type 1
bufsize_t _scan_html_block_end_1(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	unsigned int yyaccept = 0;
	static const unsigned char yybm[] = {
		  0,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,   0,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64, 128,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych <= 0xDF) {
		if (yych <= ';') {
			if (yych <= 0x00) goto yy574;
			if (yych != '\n') goto yy576;
		} else {
			if (yych <= '<') goto yy577;
			if (yych <= 0x7F) goto yy576;
			if (yych >= 0xC2) goto yy578;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) goto yy579;
			if (yych == 0xED) goto yy581;
			goto yy580;
		} else {
			if (yych <= 0xF0) goto yy582;
			if (yych <= 0xF3) goto yy583;
			if (yych <= 0xF4) goto yy584;
		}
	}
yy574:
	++p;
yy575:
	{ return 0; }
yy576:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= '\n') {
		if (yych <= 0x00) goto yy575;
		if (yych <= '\t') goto yy586;
		goto yy575;
	} else {
		if (yych <= 0x7F) goto yy586;
		if (yych <= 0xC1) goto yy575;
		if (yych <= 0xF4) goto yy586;
		goto yy575;
	}
yy577:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= '.') {
		if (yych <= 0x00) goto yy575;
		if (yych == '\n') goto yy575;
		goto yy586;
	} else {
		if (yych <= 0x7F) {
			if (yych <= '/') goto yy597;
			goto yy586;
		} else {
			if (yych <= 0xC1) goto yy575;
			if (yych <= 0xF4) goto yy586;
			goto yy575;
		}
	}
yy578:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy575;
	if (yych <= 0xBF) goto yy585;
	goto yy575;
yy579:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x9F) goto yy575;
	if (yych <= 0xBF) goto yy590;
	goto yy575;
yy580:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy575;
	if (yych <= 0xBF) goto yy590;
	goto yy575;
yy581:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy575;
	if (yych <= 0x9F) goto yy590;
	goto yy575;
yy582:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x8F) goto yy575;
	if (yych <= 0xBF) goto yy592;
	goto yy575;
yy583:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy575;
	if (yych <= 0xBF) goto yy592;
	goto yy575;
yy584:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy575;
	if (yych <= 0x8F) goto yy592;
	goto yy575;
yy585:
	yych = *++p;
yy586:
	if (yybm[0+yych] & 64) {
		goto yy585;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy587;
			if (yych <= '<') goto yy588;
		} else {
			if (yych <= 0xDF) goto yy590;
			if (yych <= 0xE0) goto yy591;
			goto yy592;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy593;
			if (yych <= 0xEF) goto yy592;
			goto yy594;
		} else {
			if (yych <= 0xF3) goto yy595;
			if (yych <= 0xF4) goto yy596;
		}
	}
yy587:
	p = marker;
	if (yyaccept == 0) {
		goto yy575;
	} else {
		goto yy607;
	}
yy588:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 0xDF) {
		if (yych <= '.') {
			if (yych <= 0x00) goto yy587;
			if (yych == '\n') goto yy587;
			goto yy585;
		} else {
			if (yych <= '/') goto yy597;
			if (yych <= 0x7F) goto yy585;
			if (yych <= 0xC1) goto yy587;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) goto yy591;
			if (yych == 0xED) goto yy593;
			goto yy592;
		} else {
			if (yych <= 0xF0) goto yy594;
			if (yych <= 0xF3) goto yy595;
			if (yych <= 0xF4) goto yy596;
			goto yy587;
		}
	}
yy590:
	yych = *++p;
	if (yych <= 0x7F) goto yy587;
	if (yych <= 0xBF) goto yy585;
	goto yy587;
yy591:
	yych = *++p;
	if (yych <= 0x9F) goto yy587;
	if (yych <= 0xBF) goto yy590;
	goto yy587;
yy592:
	yych = *++p;
	if (yych <= 0x7F) goto yy587;
	if (yych <= 0xBF) goto yy590;
	goto yy587;
yy593:
	yych = *++p;
	if (yych <= 0x7F) goto yy587;
	if (yych <= 0x9F) goto yy590;
	goto yy587;
yy594:
	yych = *++p;
	if (yych <= 0x8F) goto yy587;
	if (yych <= 0xBF) goto yy592;
	goto yy587;
yy595:
	yych = *++p;
	if (yych <= 0x7F) goto yy587;
	if (yych <= 0xBF) goto yy592;
	goto yy587;
yy596:
	yych = *++p;
	if (yych <= 0x7F) goto yy587;
	if (yych <= 0x8F) goto yy592;
	goto yy587;
yy597:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 's') {
		if (yych <= 'P') {
			if (yych <= '\t') {
				if (yych <= 0x00) goto yy587;
				goto yy585;
			} else {
				if (yych <= '\n') goto yy587;
				if (yych <= 'O') goto yy585;
			}
		} else {
			if (yych <= 'o') {
				if (yych == 'S') goto yy599;
				goto yy585;
			} else {
				if (yych <= 'p') goto yy598;
				if (yych <= 'r') goto yy585;
				goto yy599;
			}
		}
	} else {
		if (yych <= 0xEC) {
			if (yych <= 0xC1) {
				if (yych <= 0x7F) goto yy585;
				goto yy587;
			} else {
				if (yych <= 0xDF) goto yy590;
				if (yych <= 0xE0) goto yy591;
				goto yy592;
			}
		} else {
			if (yych <= 0xF0) {
				if (yych <= 0xED) goto yy593;
				if (yych <= 0xEF) goto yy592;
				goto yy594;
			} else {
				if (yych <= 0xF3) goto yy595;
				if (yych <= 0xF4) goto yy596;
				goto yy587;
			}
		}
	}
yy598:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 0xC1) {
		if (yych <= 'Q') {
			if (yych <= 0x00) goto yy587;
			if (yych == '\n') goto yy587;
			goto yy585;
		} else {
			if (yych <= 'q') {
				if (yych <= 'R') goto yy600;
				goto yy585;
			} else {
				if (yych <= 'r') goto yy600;
				if (yych <= 0x7F) goto yy585;
				goto yy587;
			}
		}
	} else {
		if (yych <= 0xED) {
			if (yych <= 0xDF) goto yy590;
			if (yych <= 0xE0) goto yy591;
			if (yych <= 0xEC) goto yy592;
			goto yy593;
		} else {
			if (yych <= 0xF0) {
				if (yych <= 0xEF) goto yy592;
				goto yy594;
			} else {
				if (yych <= 0xF3) goto yy595;
				if (yych <= 0xF4) goto yy596;
				goto yy587;
			}
		}
	}
yy599:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 't') {
		if (yych <= 'C') {
			if (yych <= '\t') {
				if (yych <= 0x00) goto yy587;
				goto yy585;
			} else {
				if (yych <= '\n') goto yy587;
				if (yych <= 'B') goto yy585;
				goto yy601;
			}
		} else {
			if (yych <= 'b') {
				if (yych == 'T') goto yy602;
				goto yy585;
			} else {
				if (yych <= 'c') goto yy601;
				if (yych <= 's') goto yy585;
				goto yy602;
			}
		}
	} else {
		if (yych <= 0xEC) {
			if (yych <= 0xC1) {
				if (yych <= 0x7F) goto yy585;
				goto yy587;
			} else {
				if (yych <= 0xDF) goto yy590;
				if (yych <= 0xE0) goto yy591;
				goto yy592;
			}
		} else {
			if (yych <= 0xF0) {
				if (yych <= 0xED) goto yy593;
				if (yych <= 0xEF) goto yy592;
				goto yy594;
			} else {
				if (yych <= 0xF3) goto yy595;
				if (yych <= 0xF4) goto yy596;
				goto yy587;
			}
		}
	}
yy600:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 0xC1) {
		if (yych <= 'D') {
			if (yych <= 0x00) goto yy587;
			if (yych == '\n') goto yy587;
			goto yy585;
		} else {
			if (yych <= 'd') {
				if (yych <= 'E') goto yy603;
				goto yy585;
			} else {
				if (yych <= 'e') goto yy603;
				if (yych <= 0x7F) goto yy585;
				goto yy587;
			}
		}
	} else {
		if (yych <= 0xED) {
			if (yych <= 0xDF) goto yy590;
			if (yych <= 0xE0) goto yy591;
			if (yych <= 0xEC) goto yy592;
			goto yy593;
		} else {
			if (yych <= 0xF0) {
				if (yych <= 0xEF) goto yy592;
				goto yy594;
			} else {
				if (yych <= 0xF3) goto yy595;
				if (yych <= 0xF4) goto yy596;
				goto yy587;
			}
		}
	}
yy601:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 0xC1) {
		if (yych <= 'Q') {
			if (yych <= 0x00) goto yy587;
			if (yych == '\n') goto yy587;
			goto yy585;
		} else {
			if (yych <= 'q') {
				if (yych <= 'R') goto yy604;
				goto yy585;
			} else {
				if (yych <= 'r') goto yy604;
				if (yych <= 0x7F) goto yy585;
				goto yy587;
			}
		}
	} else {
		if (yych <= 0xED) {
			if (yych <= 0xDF) goto yy590;
			if (yych <= 0xE0) goto yy591;
			if (yych <= 0xEC) goto yy592;
			goto yy593;
		} else {
			if (yych <= 0xF0) {
				if (yych <= 0xEF) goto yy592;
				goto yy594;
			} else {
				if (yych <= 0xF3) goto yy595;
				if (yych <= 0xF4) goto yy596;
				goto yy587;
			}
		}
	}
yy602:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 0xC1) {
		if (yych <= 'X') {
			if (yych <= 0x00) goto yy587;
			if (yych == '\n') goto yy587;
			goto yy585;
		} else {
			if (yych <= 'x') {
				if (yych <= 'Y') goto yy605;
				goto yy585;
			} else {
				if (yych <= 'y') goto yy605;
				if (yych <= 0x7F) goto yy585;
				goto yy587;
			}
		}
	} else {
		if (yych <= 0xED) {
			if (yych <= 0xDF) goto yy590;
			if (yych <= 0xE0) goto yy591;
			if (yych <= 0xEC) goto yy592;
			goto yy593;
		} else {
			if (yych <= 0xF0) {
				if (yych <= 0xEF) goto yy592;
				goto yy594;
			} else {
				if (yych <= 0xF3) goto yy595;
				if (yych <= 0xF4) goto yy596;
				goto yy587;
			}
		}
	}
yy603:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 0xDF) {
		if (yych <= '=') {
			if (yych <= 0x00) goto yy587;
			if (yych == '\n') goto yy587;
			goto yy585;
		} else {
			if (yych <= '>') goto yy606;
			if (yych <= 0x7F) goto yy585;
			if (yych <= 0xC1) goto yy587;
			goto yy590;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) goto yy591;
			if (yych == 0xED) goto yy593;
			goto yy592;
		} else {
			if (yych <= 0xF0) goto yy594;
			if (yych <= 0xF3) goto yy595;
			if (yych <= 0xF4) goto yy596;
			goto yy587;
		}
	}
yy604:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 0xC1) {
		if (yych <= 'H') {
			if (yych <= 0x00) goto yy587;
			if (yych == '\n') goto yy587;
			goto yy585;
		} else {
			if (yych <= 'h') {
				if (yych <= 'I') goto yy608;
				goto yy585;
			} else {
				if (yych <= 'i') goto yy608;
				if (yych <= 0x7F) goto yy585;
				goto yy587;
			}
		}
	} else {
		if (yych <= 0xED) {
			if (yych <= 0xDF) goto yy590;
			if (yych <= 0xE0) goto yy591;
			if (yych <= 0xEC) goto yy592;
			goto yy593;
		} else {
			if (yych <= 0xF0) {
				if (yych <= 0xEF) goto yy592;
				goto yy594;
			} else {
				if (yych <= 0xF3) goto yy595;
				if (yych <= 0xF4) goto yy596;
				goto yy587;
			}
		}
	}
yy605:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 0xC1) {
		if (yych <= 'K') {
			if (yych <= 0x00) goto yy587;
			if (yych == '\n') goto yy587;
			goto yy585;
		} else {
			if (yych <= 'k') {
				if (yych <= 'L') goto yy600;
				goto yy585;
			} else {
				if (yych <= 'l') goto yy600;
				if (yych <= 0x7F) goto yy585;
				goto yy587;
			}
		}
	} else {
		if (yych <= 0xED) {
			if (yych <= 0xDF) goto yy590;
			if (yych <= 0xE0) goto yy591;
			if (yych <= 0xEC) goto yy592;
			goto yy593;
		} else {
			if (yych <= 0xF0) {
				if (yych <= 0xEF) goto yy592;
				goto yy594;
			} else {
				if (yych <= 0xF3) goto yy595;
				if (yych <= 0xF4) goto yy596;
				goto yy587;
			}
		}
	}
yy606:
	yyaccept = 1;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 64) {
		goto yy585;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy607;
			if (yych <= '<') goto yy588;
		} else {
			if (yych <= 0xDF) goto yy590;
			if (yych <= 0xE0) goto yy591;
			goto yy592;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy593;
			if (yych <= 0xEF) goto yy592;
			goto yy594;
		} else {
			if (yych <= 0xF3) goto yy595;
			if (yych <= 0xF4) goto yy596;
		}
	}
yy607:
	{ return (bufsize_t)(p - start); }
yy608:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 0xC1) {
		if (yych <= 'O') {
			if (yych <= 0x00) goto yy587;
			if (yych == '\n') goto yy587;
			goto yy585;
		} else {
			if (yych <= 'o') {
				if (yych >= 'Q') goto yy585;
			} else {
				if (yych <= 'p') goto yy609;
				if (yych <= 0x7F) goto yy585;
				goto yy587;
			}
		}
	} else {
		if (yych <= 0xED) {
			if (yych <= 0xDF) goto yy590;
			if (yych <= 0xE0) goto yy591;
			if (yych <= 0xEC) goto yy592;
			goto yy593;
		} else {
			if (yych <= 0xF0) {
				if (yych <= 0xEF) goto yy592;
				goto yy594;
			} else {
				if (yych <= 0xF3) goto yy595;
				if (yych <= 0xF4) goto yy596;
				goto yy587;
			}
		}
	}
yy609:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy588;
	}
	if (yych <= 0xC1) {
		if (yych <= 'S') {
			if (yych <= 0x00) goto yy587;
			if (yych == '\n') goto yy587;
			goto yy585;
		} else {
			if (yych <= 's') {
				if (yych <= 'T') goto yy603;
				goto yy585;
			} else {
				if (yych <= 't') goto yy603;
				if (yych <= 0x7F) goto yy585;
				goto yy587;
			}
		}
	} else {
		if (yych <= 0xED) {
			if (yych <= 0xDF) goto yy590;
			if (yych <= 0xE0) goto yy591;
			if (yych <= 0xEC) goto yy592;
			goto yy593;
		} else {
			if (yych <= 0xF0) {
				if (yych <= 0xEF) goto yy592;
				goto yy594;
			} else {
				if (yych <= 0xF3) goto yy595;
				if (yych <= 0xF4) goto yy596;
				goto yy587;
			}
		}
	}
}

}

// Try to match an HTML block end line of type 2
bufsize_t _scan_html_block_end_2(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	unsigned int yyaccept = 0;
	static const unsigned char yybm[] = {
		  0,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,   0,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64, 128,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych <= 0xDF) {
		if (yych <= ',') {
			if (yych <= 0x00) goto yy612;
			if (yych != '\n') goto yy614;
		} else {
			if (yych <= '-') goto yy615;
			if (yych <= 0x7F) goto yy614;
			if (yych >= 0xC2) goto yy616;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) goto yy617;
			if (yych == 0xED) goto yy619;
			goto yy618;
		} else {
			if (yych <= 0xF0) goto yy620;
			if (yych <= 0xF3) goto yy621;
			if (yych <= 0xF4) goto yy622;
		}
	}
yy612:
	++p;
yy613:
	{ return 0; }
yy614:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= '\n') {
		if (yych <= 0x00) goto yy613;
		if (yych <= '\t') goto yy624;
		goto yy613;
	} else {
		if (yych <= 0x7F) goto yy624;
		if (yych <= 0xC1) goto yy613;
		if (yych <= 0xF4) goto yy624;
		goto yy613;
	}
yy615:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 128) {
		goto yy634;
	}
	if (yych <= '\n') {
		if (yych <= 0x00) goto yy613;
		if (yych <= '\t') goto yy624;
		goto yy613;
	} else {
		if (yych <= 0x7F) goto yy624;
		if (yych <= 0xC1) goto yy613;
		if (yych <= 0xF4) goto yy624;
		goto yy613;
	}
yy616:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy613;
	if (yych <= 0xBF) goto yy623;
	goto yy613;
yy617:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x9F) goto yy613;
	if (yych <= 0xBF) goto yy627;
	goto yy613;
yy618:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy613;
	if (yych <= 0xBF) goto yy627;
	goto yy613;
yy619:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy613;
	if (yych <= 0x9F) goto yy627;
	goto yy613;
yy620:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x8F) goto yy613;
	if (yych <= 0xBF) goto yy629;
	goto yy613;
yy621:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy613;
	if (yych <= 0xBF) goto yy629;
	goto yy613;
yy622:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy613;
	if (yych <= 0x8F) goto yy629;
	goto yy613;
yy623:
	yych = *++p;
yy624:
	if (yybm[0+yych] & 64) {
		goto yy623;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy625;
			if (yych <= '-') goto yy626;
		} else {
			if (yych <= 0xDF) goto yy627;
			if (yych <= 0xE0) goto yy628;
			goto yy629;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy630;
			if (yych <= 0xEF) goto yy629;
			goto yy631;
		} else {
			if (yych <= 0xF3) goto yy632;
			if (yych <= 0xF4) goto yy633;
		}
	}
yy625:
	p = marker;
	if (yyaccept == 0) {
		goto yy613;
	} else {
		goto yy637;
	}
yy626:
	yych = *++p;
	if (yybm[0+yych] & 64) {
		goto yy623;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy625;
			if (yych <= '-') goto yy634;
			goto yy625;
		} else {
			if (yych <= 0xDF) goto yy627;
			if (yych <= 0xE0) goto yy628;
			goto yy629;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy630;
			if (yych <= 0xEF) goto yy629;
			goto yy631;
		} else {
			if (yych <= 0xF3) goto yy632;
			if (yych <= 0xF4) goto yy633;
			goto yy625;
		}
	}
yy627:
	yych = *++p;
	if (yych <= 0x7F) goto yy625;
	if (yych <= 0xBF) goto yy623;
	goto yy625;
yy628:
	yych = *++p;
	if (yych <= 0x9F) goto yy625;
	if (yych <= 0xBF) goto yy627;
	goto yy625;
yy629:
	yych = *++p;
	if (yych <= 0x7F) goto yy625;
	if (yych <= 0xBF) goto yy627;
	goto yy625;
yy630:
	yych = *++p;
	if (yych <= 0x7F) goto yy625;
	if (yych <= 0x9F) goto yy627;
	goto yy625;
yy631:
	yych = *++p;
	if (yych <= 0x8F) goto yy625;
	if (yych <= 0xBF) goto yy629;
	goto yy625;
yy632:
	yych = *++p;
	if (yych <= 0x7F) goto yy625;
	if (yych <= 0xBF) goto yy629;
	goto yy625;
yy633:
	yych = *++p;
	if (yych <= 0x7F) goto yy625;
	if (yych <= 0x8F) goto yy629;
	goto yy625;
yy634:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy634;
	}
	if (yych <= 0xDF) {
		if (yych <= '=') {
			if (yych <= 0x00) goto yy625;
			if (yych == '\n') goto yy625;
			goto yy623;
		} else {
			if (yych <= '>') goto yy636;
			if (yych <= 0x7F) goto yy623;
			if (yych <= 0xC1) goto yy625;
			goto yy627;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) goto yy628;
			if (yych == 0xED) goto yy630;
			goto yy629;
		} else {
			if (yych <= 0xF0) goto yy631;
			if (yych <= 0xF3) goto yy632;
			if (yych <= 0xF4) goto yy633;
			goto yy625;
		}
	}
yy636:
	yyaccept = 1;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 64) {
		goto yy623;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy637;
			if (yych <= '-') goto yy626;
		} else {
			if (yych <= 0xDF) goto yy627;
			if (yych <= 0xE0) goto yy628;
			goto yy629;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy630;
			if (yych <= 0xEF) goto yy629;
			goto yy631;
		} else {
			if (yych <= 0xF3) goto yy632;
			if (yych <= 0xF4) goto yy633;
		}
	}
yy637:
	{ return (bufsize_t)(p - start); }
}

}

// Try to match an HTML block end line of type 3
bufsize_t _scan_html_block_end_3(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	unsigned int yyaccept = 0;
	static const unsigned char yybm[] = {
		  0,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,   0,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64, 128, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych <= 0xDF) {
		if (yych <= '>') {
			if (yych <= 0x00) goto yy640;
			if (yych != '\n') goto yy642;
		} else {
			if (yych <= '?') goto yy643;
			if (yych <= 0x7F) goto yy642;
			if (yych >= 0xC2) goto yy644;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) goto yy645;
			if (yych == 0xED) goto yy647;
			goto yy646;
		} else {
			if (yych <= 0xF0) goto yy648;
			if (yych <= 0xF3) goto yy649;
			if (yych <= 0xF4) goto yy650;
		}
	}
yy640:
	++p;
yy641:
	{ return 0; }
yy642:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= '\n') {
		if (yych <= 0x00) goto yy641;
		if (yych <= '\t') goto yy652;
		goto yy641;
	} else {
		if (yych <= 0x7F) goto yy652;
		if (yych <= 0xC1) goto yy641;
		if (yych <= 0xF4) goto yy652;
		goto yy641;
	}
yy643:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= '=') {
		if (yych <= 0x00) goto yy641;
		if (yych == '\n') goto yy641;
		goto yy652;
	} else {
		if (yych <= 0x7F) {
			if (yych <= '>') goto yy663;
			goto yy652;
		} else {
			if (yych <= 0xC1) goto yy641;
			if (yych <= 0xF4) goto yy652;
			goto yy641;
		}
	}
yy644:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy641;
	if (yych <= 0xBF) goto yy651;
	goto yy641;
yy645:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x9F) goto yy641;
	if (yych <= 0xBF) goto yy656;
	goto yy641;
yy646:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy641;
	if (yych <= 0xBF) goto yy656;
	goto yy641;
yy647:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy641;
	if (yych <= 0x9F) goto yy656;
	goto yy641;
yy648:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x8F) goto yy641;
	if (yych <= 0xBF) goto yy658;
	goto yy641;
yy649:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy641;
	if (yych <= 0xBF) goto yy658;
	goto yy641;
yy650:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy641;
	if (yych <= 0x8F) goto yy658;
	goto yy641;
yy651:
	yych = *++p;
yy652:
	if (yybm[0+yych] & 64) {
		goto yy651;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy653;
			if (yych <= '?') goto yy654;
		} else {
			if (yych <= 0xDF) goto yy656;
			if (yych <= 0xE0) goto yy657;
			goto yy658;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy659;
			if (yych <= 0xEF) goto yy658;
			goto yy660;
		} else {
			if (yych <= 0xF3) goto yy661;
			if (yych <= 0xF4) goto yy662;
		}
	}
yy653:
	p = marker;
	if (yyaccept == 0) {
		goto yy641;
	} else {
		goto yy664;
	}
yy654:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy654;
	}
	if (yych <= 0xDF) {
		if (yych <= '=') {
			if (yych <= 0x00) goto yy653;
			if (yych == '\n') goto yy653;
			goto yy651;
		} else {
			if (yych <= '>') goto yy663;
			if (yych <= 0x7F) goto yy651;
			if (yych <= 0xC1) goto yy653;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) goto yy657;
			if (yych == 0xED) goto yy659;
			goto yy658;
		} else {
			if (yych <= 0xF0) goto yy660;
			if (yych <= 0xF3) goto yy661;
			if (yych <= 0xF4) goto yy662;
			goto yy653;
		}
	}
yy656:
	yych = *++p;
	if (yych <= 0x7F) goto yy653;
	if (yych <= 0xBF) goto yy651;
	goto yy653;
yy657:
	yych = *++p;
	if (yych <= 0x9F) goto yy653;
	if (yych <= 0xBF) goto yy656;
	goto yy653;
yy658:
	yych = *++p;
	if (yych <= 0x7F) goto yy653;
	if (yych <= 0xBF) goto yy656;
	goto yy653;
yy659:
	yych = *++p;
	if (yych <= 0x7F) goto yy653;
	if (yych <= 0x9F) goto yy656;
	goto yy653;
yy660:
	yych = *++p;
	if (yych <= 0x8F) goto yy653;
	if (yych <= 0xBF) goto yy658;
	goto yy653;
yy661:
	yych = *++p;
	if (yych <= 0x7F) goto yy653;
	if (yych <= 0xBF) goto yy658;
	goto yy653;
yy662:
	yych = *++p;
	if (yych <= 0x7F) goto yy653;
	if (yych <= 0x8F) goto yy658;
	goto yy653;
yy663:
	yyaccept = 1;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 64) {
		goto yy651;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy664;
			if (yych <= '?') goto yy654;
		} else {
			if (yych <= 0xDF) goto yy656;
			if (yych <= 0xE0) goto yy657;
			goto yy658;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy659;
			if (yych <= 0xEF) goto yy658;
			goto yy660;
		} else {
			if (yych <= 0xF3) goto yy661;
			if (yych <= 0xF4) goto yy662;
		}
	}
yy664:
	{ return (bufsize_t)(p - start); }
}

}

// Try to match an HTML block end line of type 4
bufsize_t _scan_html_block_end_4(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	unsigned int yyaccept = 0;
	static const unsigned char yybm[] = {
		  0, 128, 128, 128, 128, 128, 128, 128, 
		128, 128,   0, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128,  64, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		128, 128, 128, 128, 128, 128, 128, 128, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yybm[0+yych] & 64) {
		goto yy670;
	}
	if (yych <= 0xE0) {
		if (yych <= '\n') {
			if (yych <= 0x00) goto yy667;
			if (yych <= '\t') goto yy669;
		} else {
			if (yych <= 0x7F) goto yy669;
			if (yych <= 0xC1) goto yy667;
			if (yych <= 0xDF) goto yy673;
			goto yy674;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy676;
			goto yy675;
		} else {
			if (yych <= 0xF0) goto yy677;
			if (yych <= 0xF3) goto yy678;
			if (yych <= 0xF4) goto yy679;
		}
	}
yy667:
	++p;
yy668:
	{ return 0; }
yy669:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= '\n') {
		if (yych <= 0x00) goto yy668;
		if (yych <= '\t') goto yy681;
		goto yy668;
	} else {
		if (yych <= 0x7F) goto yy681;
		if (yych <= 0xC1) goto yy668;
		if (yych <= 0xF4) goto yy681;
		goto yy668;
	}
yy670:
	yyaccept = 1;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 128) {
		goto yy680;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy672;
			if (yych <= '>') goto yy670;
		} else {
			if (yych <= 0xDF) goto yy683;
			if (yych <= 0xE0) goto yy684;
			goto yy685;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy686;
			if (yych <= 0xEF) goto yy685;
			goto yy687;
		} else {
			if (yych <= 0xF3) goto yy688;
			if (yych <= 0xF4) goto yy689;
		}
	}
yy672:
	{ return (bufsize_t)(p - start); }
yy673:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy668;
	if (yych <= 0xBF) goto yy680;
	goto yy668;
yy674:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x9F) goto yy668;
	if (yych <= 0xBF) goto yy683;
	goto yy668;
yy675:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy668;
	if (yych <= 0xBF) goto yy683;
	goto yy668;
yy676:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy668;
	if (yych <= 0x9F) goto yy683;
	goto yy668;
yy677:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x8F) goto yy668;
	if (yych <= 0xBF) goto yy685;
	goto yy668;
yy678:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy668;
	if (yych <= 0xBF) goto yy685;
	goto yy668;
yy679:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy668;
	if (yych <= 0x8F) goto yy685;
	goto yy668;
yy680:
	yych = *++p;
yy681:
	if (yybm[0+yych] & 128) {
		goto yy680;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy682;
			if (yych <= '>') goto yy670;
		} else {
			if (yych <= 0xDF) goto yy683;
			if (yych <= 0xE0) goto yy684;
			goto yy685;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy686;
			if (yych <= 0xEF) goto yy685;
			goto yy687;
		} else {
			if (yych <= 0xF3) goto yy688;
			if (yych <= 0xF4) goto yy689;
		}
	}
yy682:
	p = marker;
	if (yyaccept == 0) {
		goto yy668;
	} else {
		goto yy672;
	}
yy683:
	yych = *++p;
	if (yych <= 0x7F) goto yy682;
	if (yych <= 0xBF) goto yy680;
	goto yy682;
yy684:
	yych = *++p;
	if (yych <= 0x9F) goto yy682;
	if (yych <= 0xBF) goto yy683;
	goto yy682;
yy685:
	yych = *++p;
	if (yych <= 0x7F) goto yy682;
	if (yych <= 0xBF) goto yy683;
	goto yy682;
yy686:
	yych = *++p;
	if (yych <= 0x7F) goto yy682;
	if (yych <= 0x9F) goto yy683;
	goto yy682;
yy687:
	yych = *++p;
	if (yych <= 0x8F) goto yy682;
	if (yych <= 0xBF) goto yy685;
	goto yy682;
yy688:
	yych = *++p;
	if (yych <= 0x7F) goto yy682;
	if (yych <= 0xBF) goto yy685;
	goto yy682;
yy689:
	yych = *++p;
	if (yych <= 0x7F) goto yy682;
	if (yych <= 0x8F) goto yy685;
	goto yy682;
}

}

// Try to match an HTML block end line of type 5
bufsize_t _scan_html_block_end_5(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	unsigned int yyaccept = 0;
	static const unsigned char yybm[] = {
		  0,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,   0,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64, 128,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych <= 0xDF) {
		if (yych <= '\\') {
			if (yych <= 0x00) goto yy692;
			if (yych != '\n') goto yy694;
		} else {
			if (yych <= ']') goto yy695;
			if (yych <= 0x7F) goto yy694;
			if (yych >= 0xC2) goto yy696;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) goto yy697;
			if (yych == 0xED) goto yy699;
			goto yy698;
		} else {
			if (yych <= 0xF0) goto yy700;
			if (yych <= 0xF3) goto yy701;
			if (yych <= 0xF4) goto yy702;
		}
	}
yy692:
	++p;
yy693:
	{ return 0; }
yy694:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= '\n') {
		if (yych <= 0x00) goto yy693;
		if (yych <= '\t') goto yy704;
		goto yy693;
	} else {
		if (yych <= 0x7F) goto yy704;
		if (yych <= 0xC1) goto yy693;
		if (yych <= 0xF4) goto yy704;
		goto yy693;
	}
yy695:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 128) {
		goto yy714;
	}
	if (yych <= '\n') {
		if (yych <= 0x00) goto yy693;
		if (yych <= '\t') goto yy704;
		goto yy693;
	} else {
		if (yych <= 0x7F) goto yy704;
		if (yych <= 0xC1) goto yy693;
		if (yych <= 0xF4) goto yy704;
		goto yy693;
	}
yy696:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy693;
	if (yych <= 0xBF) goto yy703;
	goto yy693;
yy697:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x9F) goto yy693;
	if (yych <= 0xBF) goto yy707;
	goto yy693;
yy698:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy693;
	if (yych <= 0xBF) goto yy707;
	goto yy693;
yy699:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy693;
	if (yych <= 0x9F) goto yy707;
	goto yy693;
yy700:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x8F) goto yy693;
	if (yych <= 0xBF) goto yy709;
	goto yy693;
yy701:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy693;
	if (yych <= 0xBF) goto yy709;
	goto yy693;
yy702:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x7F) goto yy693;
	if (yych <= 0x8F) goto yy709;
	goto yy693;
yy703:
	yych = *++p;
yy704:
	if (yybm[0+yych] & 64) {
		goto yy703;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy705;
			if (yych <= ']') goto yy706;
		} else {
			if (yych <= 0xDF) goto yy707;
			if (yych <= 0xE0) goto yy708;
			goto yy709;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy710;
			if (yych <= 0xEF) goto yy709;
			goto yy711;
		} else {
			if (yych <= 0xF3) goto yy712;
			if (yych <= 0xF4) goto yy713;
		}
	}
yy705:
	p = marker;
	if (yyaccept == 0) {
		goto yy693;
	} else {
		goto yy717;
	}
yy706:
	yych = *++p;
	if (yybm[0+yych] & 64) {
		goto yy703;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy705;
			if (yych <= ']') goto yy714;
			goto yy705;
		} else {
			if (yych <= 0xDF) goto yy707;
			if (yych <= 0xE0) goto yy708;
			goto yy709;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy710;
			if (yych <= 0xEF) goto yy709;
			goto yy711;
		} else {
			if (yych <= 0xF3) goto yy712;
			if (yych <= 0xF4) goto yy713;
			goto yy705;
		}
	}
yy707:
	yych = *++p;
	if (yych <= 0x7F) goto yy705;
	if (yych <= 0xBF) goto yy703;
	goto yy705;
yy708:
	yych = *++p;
	if (yych <= 0x9F) goto yy705;
	if (yych <= 0xBF) goto yy707;
	goto yy705;
yy709:
	yych = *++p;
	if (yych <= 0x7F) goto yy705;
	if (yych <= 0xBF) goto yy707;
	goto yy705;
yy710:
	yych = *++p;
	if (yych <= 0x7F) goto yy705;
	if (yych <= 0x9F) goto yy707;
	goto yy705;
yy711:
	yych = *++p;
	if (yych <= 0x8F) goto yy705;
	if (yych <= 0xBF) goto yy709;
	goto yy705;
yy712:
	yych = *++p;
	if (yych <= 0x7F) goto yy705;
	if (yych <= 0xBF) goto yy709;
	goto yy705;
yy713:
	yych = *++p;
	if (yych <= 0x7F) goto yy705;
	if (yych <= 0x8F) goto yy709;
	goto yy705;
yy714:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy714;
	}
	if (yych <= 0xDF) {
		if (yych <= '=') {
			if (yych <= 0x00) goto yy705;
			if (yych == '\n') goto yy705;
			goto yy703;
		} else {
			if (yych <= '>') goto yy716;
			if (yych <= 0x7F) goto yy703;
			if (yych <= 0xC1) goto yy705;
			goto yy707;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) goto yy708;
			if (yych == 0xED) goto yy710;
			goto yy709;
		} else {
			if (yych <= 0xF0) goto yy711;
			if (yych <= 0xF3) goto yy712;
			if (yych <= 0xF4) goto yy713;
			goto yy705;
		}
	}
yy716:
	yyaccept = 1;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 64) {
		goto yy703;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= '\n') goto yy717;
			if (yych <= ']') goto yy706;
		} else {
			if (yych <= 0xDF) goto yy707;
			if (yych <= 0xE0) goto yy708;
			goto yy709;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy710;
			if (yych <= 0xEF) goto yy709;
			goto yy711;
		} else {
			if (yych <= 0xF3) goto yy712;
			if (yych <= 0xF4) goto yy713;
		}
	}
yy717:
	{ return (bufsize_t)(p - start); }
}

}

// Try to match a link title (in single quotes, in double quotes, or
// in parentheses), returning number of chars matched.  Allow one
// level of internal nesting (quotes within quotes).
bufsize_t _scan_link_title(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	unsigned int yyaccept = 0;
	static const unsigned char yybm[] = {
		  0, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 192, 208, 208, 208, 208, 144, 
		 80,  80, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208,  32, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		208, 208, 208, 208, 208, 208, 208, 208, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych <= '&') {
		if (yych == '"') goto yy722;
	} else {
		if (yych <= '\'') goto yy723;
		if (yych <= '(') goto yy724;
	}
	++p;
yy721:
	{ return 0; }
yy722:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x00) goto yy721;
	if (yych <= 0x7F) goto yy726;
	if (yych <= 0xC1) goto yy721;
	if (yych <= 0xF4) goto yy726;
	goto yy721;
yy723:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= 0x00) goto yy721;
	if (yych <= 0x7F) goto yy740;
	if (yych <= 0xC1) goto yy721;
	if (yych <= 0xF4) goto yy740;
	goto yy721;
yy724:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych <= '(') {
		if (yych <= 0x00) goto yy721;
		if (yych <= '\'') goto yy753;
		goto yy721;
	} else {
		if (yych <= 0x7F) goto yy753;
		if (yych <= 0xC1) goto yy721;
		if (yych <= 0xF4) goto yy753;
		goto yy721;
	}
yy725:
	yych = *++p;
yy726:
	if (yybm[0+yych] & 16) {
		goto yy725;
	}
	if (yych <= 0xE0) {
		if (yych <= '\\') {
			if (yych <= 0x00) goto yy727;
			if (yych <= '"') goto yy728;
			goto yy730;
		} else {
			if (yych <= 0xC1) goto yy727;
			if (yych <= 0xDF) goto yy732;
			goto yy733;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy735;
			goto yy734;
		} else {
			if (yych <= 0xF0) goto yy736;
			if (yych <= 0xF3) goto yy737;
			if (yych <= 0xF4) goto yy738;
		}
	}
yy727:
	p = marker;
	if (yyaccept <= 1) {
		if (yyaccept == 0) {
			goto yy721;
		} else {
			goto yy729;
		}
	} else {
		if (yyaccept == 2) {
			goto yy742;
		} else {
			goto yy755;
		}
	}
yy728:
	++p;
yy729:
	{ return (bufsize_t)(p - start); }
yy730:
	yych = *++p;
	if (yybm[0+yych] & 16) {
		goto yy725;
	}
	if (yych <= 0xE0) {
		if (yych <= '\\') {
			if (yych <= 0x00) goto yy727;
			if (yych <= '"') goto yy765;
			goto yy730;
		} else {
			if (yych <= 0xC1) goto yy727;
			if (yych >= 0xE0) goto yy733;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy735;
			goto yy734;
		} else {
			if (yych <= 0xF0) goto yy736;
			if (yych <= 0xF3) goto yy737;
			if (yych <= 0xF4) goto yy738;
			goto yy727;
		}
	}
yy732:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0xBF) goto yy725;
	goto yy727;
yy733:
	yych = *++p;
	if (yych <= 0x9F) goto yy727;
	if (yych <= 0xBF) goto yy732;
	goto yy727;
yy734:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0xBF) goto yy732;
	goto yy727;
yy735:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0x9F) goto yy732;
	goto yy727;
yy736:
	yych = *++p;
	if (yych <= 0x8F) goto yy727;
	if (yych <= 0xBF) goto yy734;
	goto yy727;
yy737:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0xBF) goto yy734;
	goto yy727;
yy738:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0x8F) goto yy734;
	goto yy727;
yy739:
	yych = *++p;
yy740:
	if (yybm[0+yych] & 64) {
		goto yy739;
	}
	if (yych <= 0xE0) {
		if (yych <= '\\') {
			if (yych <= 0x00) goto yy727;
			if (yych >= '(') goto yy743;
		} else {
			if (yych <= 0xC1) goto yy727;
			if (yych <= 0xDF) goto yy745;
			goto yy746;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy748;
			goto yy747;
		} else {
			if (yych <= 0xF0) goto yy749;
			if (yych <= 0xF3) goto yy750;
			if (yych <= 0xF4) goto yy751;
			goto yy727;
		}
	}
yy741:
	++p;
yy742:
	{ return (bufsize_t)(p - start); }
yy743:
	yych = *++p;
	if (yybm[0+yych] & 64) {
		goto yy739;
	}
	if (yych <= 0xE0) {
		if (yych <= '\\') {
			if (yych <= 0x00) goto yy727;
			if (yych <= '\'') goto yy766;
			goto yy743;
		} else {
			if (yych <= 0xC1) goto yy727;
			if (yych >= 0xE0) goto yy746;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy748;
			goto yy747;
		} else {
			if (yych <= 0xF0) goto yy749;
			if (yych <= 0xF3) goto yy750;
			if (yych <= 0xF4) goto yy751;
			goto yy727;
		}
	}
yy745:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0xBF) goto yy739;
	goto yy727;
yy746:
	yych = *++p;
	if (yych <= 0x9F) goto yy727;
	if (yych <= 0xBF) goto yy745;
	goto yy727;
yy747:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0xBF) goto yy745;
	goto yy727;
yy748:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0x9F) goto yy745;
	goto yy727;
yy749:
	yych = *++p;
	if (yych <= 0x8F) goto yy727;
	if (yych <= 0xBF) goto yy747;
	goto yy727;
yy750:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0xBF) goto yy747;
	goto yy727;
yy751:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0x8F) goto yy747;
	goto yy727;
yy752:
	yych = *++p;
yy753:
	if (yybm[0+yych] & 128) {
		goto yy752;
	}
	if (yych <= 0xE0) {
		if (yych <= '\\') {
			if (yych <= '(') goto yy727;
			if (yych >= '*') goto yy756;
		} else {
			if (yych <= 0xC1) goto yy727;
			if (yych <= 0xDF) goto yy758;
			goto yy759;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy761;
			goto yy760;
		} else {
			if (yych <= 0xF0) goto yy762;
			if (yych <= 0xF3) goto yy763;
			if (yych <= 0xF4) goto yy764;
			goto yy727;
		}
	}
yy754:
	++p;
yy755:
	{ return (bufsize_t)(p - start); }
yy756:
	yych = *++p;
	if (yych <= 0xDF) {
		if (yych <= '[') {
			if (yych <= 0x00) goto yy727;
			if (yych == ')') goto yy767;
			goto yy752;
		} else {
			if (yych <= '\\') goto yy756;
			if (yych <= 0x7F) goto yy752;
			if (yych <= 0xC1) goto yy727;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) goto yy759;
			if (yych == 0xED) goto yy761;
			goto yy760;
		} else {
			if (yych <= 0xF0) goto yy762;
			if (yych <= 0xF3) goto yy763;
			if (yych <= 0xF4) goto yy764;
			goto yy727;
		}
	}
yy758:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0xBF) goto yy752;
	goto yy727;
yy759:
	yych = *++p;
	if (yych <= 0x9F) goto yy727;
	if (yych <= 0xBF) goto yy758;
	goto yy727;
yy760:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0xBF) goto yy758;
	goto yy727;
yy761:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0x9F) goto yy758;
	goto yy727;
yy762:
	yych = *++p;
	if (yych <= 0x8F) goto yy727;
	if (yych <= 0xBF) goto yy760;
	goto yy727;
yy763:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0xBF) goto yy760;
	goto yy727;
yy764:
	yych = *++p;
	if (yych <= 0x7F) goto yy727;
	if (yych <= 0x8F) goto yy760;
	goto yy727;
yy765:
	yyaccept = 1;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 16) {
		goto yy725;
	}
	if (yych <= 0xE0) {
		if (yych <= '\\') {
			if (yych <= 0x00) goto yy729;
			if (yych <= '"') goto yy728;
			goto yy730;
		} else {
			if (yych <= 0xC1) goto yy729;
			if (yych <= 0xDF) goto yy732;
			goto yy733;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy735;
			goto yy734;
		} else {
			if (yych <= 0xF0) goto yy736;
			if (yych <= 0xF3) goto yy737;
			if (yych <= 0xF4) goto yy738;
			goto yy729;
		}
	}
yy766:
	yyaccept = 2;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 64) {
		goto yy739;
	}
	if (yych <= 0xE0) {
		if (yych <= '\\') {
			if (yych <= 0x00) goto yy742;
			if (yych <= '\'') goto yy741;
			goto yy743;
		} else {
			if (yych <= 0xC1) goto yy742;
			if (yych <= 0xDF) goto yy745;
			goto yy746;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy748;
			goto yy747;
		} else {
			if (yych <= 0xF0) goto yy749;
			if (yych <= 0xF3) goto yy750;
			if (yych <= 0xF4) goto yy751;
			goto yy742;
		}
	}
yy767:
	yyaccept = 3;
	yych = *(marker = ++p);
	if (yybm[0+yych] & 128) {
		goto yy752;
	}
	if (yych <= 0xE0) {
		if (yych <= '\\') {
			if (yych <= '(') goto yy755;
			if (yych <= ')') goto yy754;
			goto yy756;
		} else {
			if (yych <= 0xC1) goto yy755;
			if (yych <= 0xDF) goto yy758;
			goto yy759;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych == 0xED) goto yy761;
			goto yy760;
		} else {
			if (yych <= 0xF0) goto yy762;
			if (yych <= 0xF3) goto yy763;
			if (yych <= 0xF4) goto yy764;
			goto yy755;
		}
	}
}

}

// Match space characters, including newlines.
bufsize_t _scan_spacechars(const unsigned char *p)
{
  const unsigned char *start = p; \

{
	unsigned char yych;
	static const unsigned char yybm[] = {
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0, 128, 128, 128, 128, 128,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		128,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yybm[0+yych] & 128) {
		goto yy772;
	}
	++p;
	{ return 0; }
yy772:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy772;
	}
	{ return (bufsize_t)(p - start); }
}

}

// Match ATX heading start.
bufsize_t _scan_atx_heading_start(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	static const unsigned char yybm[] = {
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0, 128,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		128,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych == '#') goto yy779;
	++p;
yy778:
	{ return 0; }
yy779:
	yych = *(marker = ++p);
	if (yybm[0+yych] & 128) {
		goto yy780;
	}
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy778;
		if (yych <= '\n') goto yy783;
		goto yy778;
	} else {
		if (yych <= '\r') goto yy783;
		if (yych == '#') goto yy784;
		goto yy778;
	}
yy780:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy780;
	}
yy782:
	{ return (bufsize_t)(p - start); }
yy783:
	++p;
	goto yy782;
yy784:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy780;
	}
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy785;
		if (yych <= '\n') goto yy783;
	} else {
		if (yych <= '\r') goto yy783;
		if (yych == '#') goto yy786;
	}
yy785:
	p = marker;
	goto yy778;
yy786:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy780;
	}
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy785;
		if (yych <= '\n') goto yy783;
		goto yy785;
	} else {
		if (yych <= '\r') goto yy783;
		if (yych != '#') goto yy785;
	}
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy780;
	}
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy785;
		if (yych <= '\n') goto yy783;
		goto yy785;
	} else {
		if (yych <= '\r') goto yy783;
		if (yych != '#') goto yy785;
	}
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy780;
	}
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy785;
		if (yych <= '\n') goto yy783;
		goto yy785;
	} else {
		if (yych <= '\r') goto yy783;
		if (yych != '#') goto yy785;
	}
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy780;
	}
	if (yych <= 0x08) goto yy785;
	if (yych <= '\n') goto yy783;
	if (yych == '\r') goto yy783;
	goto yy785;
}

}

// Match setext heading line.  Return 1 for level-1 heading,
// 2 for level-2, 0 for no match.
bufsize_t _scan_setext_heading_line(const unsigned char *p)
{
  const unsigned char *marker = NULL;

{
	unsigned char yych;
	static const unsigned char yybm[] = {
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,  32,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		 32,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,  64,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0, 128,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych == '-') goto yy794;
	if (yych == '=') goto yy795;
	++p;
yy793:
	{ return 0; }
yy794:
	yych = *(marker = ++p);
	if (yybm[0+yych] & 64) {
		goto yy801;
	}
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy793;
		if (yych <= '\n') goto yy797;
		goto yy793;
	} else {
		if (yych <= '\r') goto yy797;
		if (yych == ' ') goto yy797;
		goto yy793;
	}
yy795:
	yych = *(marker = ++p);
	if (yybm[0+yych] & 128) {
		goto yy807;
	}
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy793;
		if (yych <= '\n') goto yy804;
		goto yy793;
	} else {
		if (yych <= '\r') goto yy804;
		if (yych == ' ') goto yy804;
		goto yy793;
	}
yy796:
	yych = *++p;
yy797:
	if (yybm[0+yych] & 32) {
		goto yy796;
	}
	if (yych <= 0x08) goto yy798;
	if (yych <= '\n') goto yy799;
	if (yych == '\r') goto yy799;
yy798:
	p = marker;
	goto yy793;
yy799:
	++p;
	{ return 2; }
yy801:
	yych = *++p;
	if (yybm[0+yych] & 32) {
		goto yy796;
	}
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy798;
		if (yych <= '\n') goto yy799;
		goto yy798;
	} else {
		if (yych <= '\r') goto yy799;
		if (yych == '-') goto yy801;
		goto yy798;
	}
yy803:
	yych = *++p;
yy804:
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy798;
		if (yych <= '\t') goto yy803;
		if (yych >= '\v') goto yy798;
	} else {
		if (yych <= '\r') goto yy805;
		if (yych == ' ') goto yy803;
		goto yy798;
	}
yy805:
	++p;
	{ return 1; }
yy807:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy807;
	}
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy798;
		if (yych <= '\t') goto yy803;
		if (yych <= '\n') goto yy805;
		goto yy798;
	} else {
		if (yych <= '\r') goto yy805;
		if (yych == ' ') goto yy803;
		goto yy798;
	}
}

}

// Scan an opening code fence.
bufsize_t _scan_open_code_fence(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	static const unsigned char yybm[] = {
		  0, 192, 192, 192, 192, 192, 192, 192, 
		192, 192,   0, 192, 192,   0, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		144, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 192, 192, 
		192, 192, 192, 192, 192, 192, 224, 192, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych == '`') goto yy813;
	if (yych == '~') goto yy814;
	++p;
yy812:
	{ return 0; }
yy813:
	yych = *(marker = ++p);
	if (yych == '`') goto yy815;
	goto yy812;
yy814:
	yych = *(marker = ++p);
	if (yych == '~') goto yy817;
	goto yy812;
yy815:
	yych = *++p;
	if (yybm[0+yych] & 16) {
		goto yy818;
	}
yy816:
	p = marker;
	goto yy812;
yy817:
	yych = *++p;
	if (yybm[0+yych] & 32) {
		goto yy820;
	}
	goto yy816;
yy818:
	yych = *++p;
	if (yybm[0+yych] & 16) {
		goto yy818;
	}
	if (yych <= 0xDF) {
		if (yych <= '\f') {
			if (yych <= 0x00) goto yy816;
			if (yych == '\n') {
				marker = p;
				goto yy824;
			}
			marker = p;
			goto yy822;
		} else {
			if (yych <= '\r') {
				marker = p;
				goto yy824;
			}
			if (yych <= 0x7F) {
				marker = p;
				goto yy822;
			}
			if (yych <= 0xC1) goto yy816;
			marker = p;
			goto yy826;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) {
				marker = p;
				goto yy827;
			}
			if (yych == 0xED) {
				marker = p;
				goto yy829;
			}
			marker = p;
			goto yy828;
		} else {
			if (yych <= 0xF0) {
				marker = p;
				goto yy830;
			}
			if (yych <= 0xF3) {
				marker = p;
				goto yy831;
			}
			if (yych <= 0xF4) {
				marker = p;
				goto yy832;
			}
			goto yy816;
		}
	}
yy820:
	yych = *++p;
	if (yybm[0+yych] & 32) {
		goto yy820;
	}
	if (yych <= 0xDF) {
		if (yych <= '\f') {
			if (yych <= 0x00) goto yy816;
			if (yych == '\n') {
				marker = p;
				goto yy835;
			}
			marker = p;
			goto yy833;
		} else {
			if (yych <= '\r') {
				marker = p;
				goto yy835;
			}
			if (yych <= 0x7F) {
				marker = p;
				goto yy833;
			}
			if (yych <= 0xC1) goto yy816;
			marker = p;
			goto yy837;
		}
	} else {
		if (yych <= 0xEF) {
			if (yych <= 0xE0) {
				marker = p;
				goto yy838;
			}
			if (yych == 0xED) {
				marker = p;
				goto yy840;
			}
			marker = p;
			goto yy839;
		} else {
			if (yych <= 0xF0) {
				marker = p;
				goto yy841;
			}
			if (yych <= 0xF3) {
				marker = p;
				goto yy842;
			}
			if (yych <= 0xF4) {
				marker = p;
				goto yy843;
			}
			goto yy816;
		}
	}
yy822:
	yych = *++p;
	if (yybm[0+yych] & 64) {
		goto yy822;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy816;
			if (yych >= 0x0E) goto yy816;
		} else {
			if (yych <= 0xDF) goto yy826;
			if (yych <= 0xE0) goto yy827;
			goto yy828;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy829;
			if (yych <= 0xEF) goto yy828;
			goto yy830;
		} else {
			if (yych <= 0xF3) goto yy831;
			if (yych <= 0xF4) goto yy832;
			goto yy816;
		}
	}
yy824:
	++p;
	p = marker;
	{ return (bufsize_t)(p - start); }
yy826:
	yych = *++p;
	if (yych <= 0x7F) goto yy816;
	if (yych <= 0xBF) goto yy822;
	goto yy816;
yy827:
	yych = *++p;
	if (yych <= 0x9F) goto yy816;
	if (yych <= 0xBF) goto yy826;
	goto yy816;
yy828:
	yych = *++p;
	if (yych <= 0x7F) goto yy816;
	if (yych <= 0xBF) goto yy826;
	goto yy816;
yy829:
	yych = *++p;
	if (yych <= 0x7F) goto yy816;
	if (yych <= 0x9F) goto yy826;
	goto yy816;
yy830:
	yych = *++p;
	if (yych <= 0x8F) goto yy816;
	if (yych <= 0xBF) goto yy828;
	goto yy816;
yy831:
	yych = *++p;
	if (yych <= 0x7F) goto yy816;
	if (yych <= 0xBF) goto yy828;
	goto yy816;
yy832:
	yych = *++p;
	if (yych <= 0x7F) goto yy816;
	if (yych <= 0x8F) goto yy828;
	goto yy816;
yy833:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy833;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= 0x00) goto yy816;
			if (yych >= 0x0E) goto yy816;
		} else {
			if (yych <= 0xDF) goto yy837;
			if (yych <= 0xE0) goto yy838;
			goto yy839;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy840;
			if (yych <= 0xEF) goto yy839;
			goto yy841;
		} else {
			if (yych <= 0xF3) goto yy842;
			if (yych <= 0xF4) goto yy843;
			goto yy816;
		}
	}
yy835:
	++p;
	p = marker;
	{ return (bufsize_t)(p - start); }
yy837:
	yych = *++p;
	if (yych <= 0x7F) goto yy816;
	if (yych <= 0xBF) goto yy833;
	goto yy816;
yy838:
	yych = *++p;
	if (yych <= 0x9F) goto yy816;
	if (yych <= 0xBF) goto yy837;
	goto yy816;
yy839:
	yych = *++p;
	if (yych <= 0x7F) goto yy816;
	if (yych <= 0xBF) goto yy837;
	goto yy816;
yy840:
	yych = *++p;
	if (yych <= 0x7F) goto yy816;
	if (yych <= 0x9F) goto yy837;
	goto yy816;
yy841:
	yych = *++p;
	if (yych <= 0x8F) goto yy816;
	if (yych <= 0xBF) goto yy839;
	goto yy816;
yy842:
	yych = *++p;
	if (yych <= 0x7F) goto yy816;
	if (yych <= 0xBF) goto yy839;
	goto yy816;
yy843:
	yych = *++p;
	if (yych <= 0x7F) goto yy816;
	if (yych <= 0x8F) goto yy839;
	goto yy816;
}

}

// Scan a closing code fence with length at least len.
bufsize_t _scan_close_code_fence(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	static const unsigned char yybm[] = {
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0, 128,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		128,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		 32,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,  64,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych == '`') goto yy848;
	if (yych == '~') goto yy849;
	++p;
yy847:
	{ return 0; }
yy848:
	yych = *(marker = ++p);
	if (yych == '`') goto yy850;
	goto yy847;
yy849:
	yych = *(marker = ++p);
	if (yych == '~') goto yy852;
	goto yy847;
yy850:
	yych = *++p;
	if (yybm[0+yych] & 32) {
		goto yy853;
	}
yy851:
	p = marker;
	goto yy847;
yy852:
	yych = *++p;
	if (yybm[0+yych] & 64) {
		goto yy855;
	}
	goto yy851;
yy853:
	yych = *++p;
	if (yybm[0+yych] & 32) {
		goto yy853;
	}
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy851;
		if (yych <= '\t') {
			marker = p;
			goto yy857;
		}
		if (yych <= '\n') {
			marker = p;
			goto yy859;
		}
		goto yy851;
	} else {
		if (yych <= '\r') {
			marker = p;
			goto yy859;
		}
		if (yych == ' ') {
			marker = p;
			goto yy857;
		}
		goto yy851;
	}
yy855:
	yych = *++p;
	if (yybm[0+yych] & 64) {
		goto yy855;
	}
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy851;
		if (yych <= '\t') {
			marker = p;
			goto yy861;
		}
		if (yych <= '\n') {
			marker = p;
			goto yy863;
		}
		goto yy851;
	} else {
		if (yych <= '\r') {
			marker = p;
			goto yy863;
		}
		if (yych == ' ') {
			marker = p;
			goto yy861;
		}
		goto yy851;
	}
yy857:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy857;
	}
	if (yych <= 0x08) goto yy851;
	if (yych <= '\n') goto yy859;
	if (yych != '\r') goto yy851;
yy859:
	++p;
	p = marker;
	{ return (bufsize_t)(p - start); }
yy861:
	yych = *++p;
	if (yych <= '\f') {
		if (yych <= 0x08) goto yy851;
		if (yych <= '\t') goto yy861;
		if (yych >= '\v') goto yy851;
	} else {
		if (yych <= '\r') goto yy863;
		if (yych == ' ') goto yy861;
		goto yy851;
	}
yy863:
	++p;
	p = marker;
	{ return (bufsize_t)(p - start); }
}

}

// Scans an entity.
// Returns number of chars matched.
bufsize_t _scan_entity(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	yych = *p;
	if (yych == '&') goto yy869;
	++p;
yy868:
	{ return 0; }
yy869:
	yych = *(marker = ++p);
	if (yych <= '@') {
		if (yych != '#') goto yy868;
	} else {
		if (yych <= 'Z') goto yy872;
		if (yych <= '`') goto yy868;
		if (yych <= 'z') goto yy872;
		goto yy868;
	}
	yych = *++p;
	if (yych <= 'W') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy873;
	} else {
		if (yych <= 'X') goto yy874;
		if (yych == 'x') goto yy874;
	}
yy871:
	p = marker;
	goto yy868;
yy872:
	yych = *++p;
	if (yych <= '@') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy875;
		goto yy871;
	} else {
		if (yych <= 'Z') goto yy875;
		if (yych <= '`') goto yy871;
		if (yych <= 'z') goto yy875;
		goto yy871;
	}
yy873:
	yych = *++p;
	if (yych <= '/') goto yy871;
	if (yych <= '9') goto yy876;
	if (yych == ';') goto yy877;
	goto yy871;
yy874:
	yych = *++p;
	if (yych <= '@') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy879;
		goto yy871;
	} else {
		if (yych <= 'F') goto yy879;
		if (yych <= '`') goto yy871;
		if (yych <= 'f') goto yy879;
		goto yy871;
	}
yy875:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy880;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
			goto yy880;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'z') goto yy880;
			goto yy871;
		}
	}
yy876:
	yych = *++p;
	if (yych <= '/') goto yy871;
	if (yych <= '9') goto yy881;
	if (yych != ';') goto yy871;
yy877:
	++p;
	{ return (bufsize_t)(p - start); }
yy879:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy882;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'F') {
			if (yych <= '@') goto yy871;
			goto yy882;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'f') goto yy882;
			goto yy871;
		}
	}
yy880:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy883;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
			goto yy883;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'z') goto yy883;
			goto yy871;
		}
	}
yy881:
	yych = *++p;
	if (yych <= '/') goto yy871;
	if (yych <= '9') goto yy884;
	if (yych == ';') goto yy877;
	goto yy871;
yy882:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy885;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'F') {
			if (yych <= '@') goto yy871;
			goto yy885;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'f') goto yy885;
			goto yy871;
		}
	}
yy883:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy886;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
			goto yy886;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'z') goto yy886;
			goto yy871;
		}
	}
yy884:
	yych = *++p;
	if (yych <= '/') goto yy871;
	if (yych <= '9') goto yy887;
	if (yych == ';') goto yy877;
	goto yy871;
yy885:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy888;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'F') {
			if (yych <= '@') goto yy871;
			goto yy888;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'f') goto yy888;
			goto yy871;
		}
	}
yy886:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy889;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
			goto yy889;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'z') goto yy889;
			goto yy871;
		}
	}
yy887:
	yych = *++p;
	if (yych <= '/') goto yy871;
	if (yych <= '9') goto yy890;
	if (yych == ';') goto yy877;
	goto yy871;
yy888:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy891;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'F') {
			if (yych <= '@') goto yy871;
			goto yy891;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'f') goto yy891;
			goto yy871;
		}
	}
yy889:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy892;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
			goto yy892;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'z') goto yy892;
			goto yy871;
		}
	}
yy890:
	yych = *++p;
	if (yych <= '/') goto yy871;
	if (yych <= '9') goto yy893;
	if (yych == ';') goto yy877;
	goto yy871;
yy891:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy893;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'F') {
			if (yych <= '@') goto yy871;
			goto yy893;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'f') goto yy893;
			goto yy871;
		}
	}
yy892:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy894;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
			goto yy894;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'z') goto yy894;
			goto yy871;
		}
	}
yy893:
	yych = *++p;
	if (yych == ';') goto yy877;
	goto yy871;
yy894:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy895;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy895:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy896;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy896:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy897;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy897:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy898;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy898:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy899;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy899:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy900;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy900:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy901;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy901:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy902;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy902:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy903;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy903:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy904;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy904:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy905;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy905:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy906;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy906:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy907;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy907:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy908;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy908:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy909;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy909:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy910;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy910:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy911;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy911:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy912;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy912:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy913;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy913:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy914;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy914:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy915;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy915:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy916;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy916:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy917;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
		} else {
			if (yych <= '`') goto yy871;
			if (yych >= '{') goto yy871;
		}
	}
yy917:
	yych = *++p;
	if (yych <= ';') {
		if (yych <= '/') goto yy871;
		if (yych <= '9') goto yy893;
		if (yych <= ':') goto yy871;
		goto yy877;
	} else {
		if (yych <= 'Z') {
			if (yych <= '@') goto yy871;
			goto yy893;
		} else {
			if (yych <= '`') goto yy871;
			if (yych <= 'z') goto yy893;
			goto yy871;
		}
	}
}

}

// Returns positive value if a URL begins in a way that is potentially
// dangerous, with javascript:, vbscript:, file:, or data:, otherwise 0.
bufsize_t _scan_dangerous_url(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	unsigned int yyaccept = 0;
	yych = *p;
	if (yych <= 'V') {
		if (yych <= 'F') {
			if (yych == 'D') goto yy922;
			if (yych >= 'F') goto yy923;
		} else {
			if (yych == 'J') goto yy924;
			if (yych >= 'V') goto yy925;
		}
	} else {
		if (yych <= 'f') {
			if (yych == 'd') goto yy922;
			if (yych >= 'f') goto yy923;
		} else {
			if (yych <= 'j') {
				if (yych >= 'j') goto yy924;
			} else {
				if (yych == 'v') goto yy925;
			}
		}
	}
	++p;
yy921:
	{ return 0; }
yy922:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych == 'A') goto yy926;
	if (yych == 'a') goto yy926;
	goto yy921;
yy923:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych == 'I') goto yy928;
	if (yych == 'i') goto yy928;
	goto yy921;
yy924:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych == 'A') goto yy929;
	if (yych == 'a') goto yy929;
	goto yy921;
yy925:
	yyaccept = 0;
	yych = *(marker = ++p);
	if (yych == 'B') goto yy930;
	if (yych == 'b') goto yy930;
	goto yy921;
yy926:
	yych = *++p;
	if (yych == 'T') goto yy931;
	if (yych == 't') goto yy931;
yy927:
	p = marker;
	if (yyaccept == 0) {
		goto yy921;
	} else {
		goto yy939;
	}
yy928:
	yych = *++p;
	if (yych == 'L') goto yy932;
	if (yych == 'l') goto yy932;
	goto yy927;
yy929:
	yych = *++p;
	if (yych == 'V') goto yy933;
	if (yych == 'v') goto yy933;
	goto yy927;
yy930:
	yych = *++p;
	if (yych == 'S') goto yy934;
	if (yych == 's') goto yy934;
	goto yy927;
yy931:
	yych = *++p;
	if (yych == 'A') goto yy935;
	if (yych == 'a') goto yy935;
	goto yy927;
yy932:
	yych = *++p;
	if (yych == 'E') goto yy936;
	if (yych == 'e') goto yy936;
	goto yy927;
yy933:
	yych = *++p;
	if (yych == 'A') goto yy930;
	if (yych == 'a') goto yy930;
	goto yy927;
yy934:
	yych = *++p;
	if (yych == 'C') goto yy937;
	if (yych == 'c') goto yy937;
	goto yy927;
yy935:
	yych = *++p;
	if (yych == ':') goto yy938;
	goto yy927;
yy936:
	yych = *++p;
	if (yych == ':') goto yy940;
	goto yy927;
yy937:
	yych = *++p;
	if (yych == 'R') goto yy941;
	if (yych == 'r') goto yy941;
	goto yy927;
yy938:
	yyaccept = 1;
	yych = *(marker = ++p);
	if (yych == 'I') goto yy942;
	if (yych == 'i') goto yy942;
yy939:
	{ return (bufsize_t)(p - start); }
yy940:
	++p;
	goto yy939;
yy941:
	yych = *++p;
	if (yych == 'I') goto yy943;
	if (yych == 'i') goto yy943;
	goto yy927;
yy942:
	yych = *++p;
	if (yych == 'M') goto yy944;
	if (yych == 'm') goto yy944;
	goto yy927;
yy943:
	yych = *++p;
	if (yych == 'P') goto yy945;
	if (yych == 'p') goto yy945;
	goto yy927;
yy944:
	yych = *++p;
	if (yych == 'A') goto yy946;
	if (yych == 'a') goto yy946;
	goto yy927;
yy945:
	yych = *++p;
	if (yych == 'T') goto yy936;
	if (yych == 't') goto yy936;
	goto yy927;
yy946:
	yych = *++p;
	if (yych == 'G') goto yy947;
	if (yych != 'g') goto yy927;
yy947:
	yych = *++p;
	if (yych == 'E') goto yy948;
	if (yych != 'e') goto yy927;
yy948:
	yych = *++p;
	if (yych != '/') goto yy927;
	yych = *++p;
	if (yych <= 'W') {
		if (yych <= 'J') {
			if (yych == 'G') goto yy950;
			if (yych <= 'I') goto yy927;
			goto yy951;
		} else {
			if (yych == 'P') goto yy952;
			if (yych <= 'V') goto yy927;
			goto yy953;
		}
	} else {
		if (yych <= 'j') {
			if (yych == 'g') goto yy950;
			if (yych <= 'i') goto yy927;
			goto yy951;
		} else {
			if (yych <= 'p') {
				if (yych <= 'o') goto yy927;
				goto yy952;
			} else {
				if (yych == 'w') goto yy953;
				goto yy927;
			}
		}
	}
yy950:
	yych = *++p;
	if (yych == 'I') goto yy954;
	if (yych == 'i') goto yy954;
	goto yy927;
yy951:
	yych = *++p;
	if (yych == 'P') goto yy955;
	if (yych == 'p') goto yy955;
	goto yy927;
yy952:
	yych = *++p;
	if (yych == 'N') goto yy956;
	if (yych == 'n') goto yy956;
	goto yy927;
yy953:
	yych = *++p;
	if (yych == 'E') goto yy957;
	if (yych == 'e') goto yy957;
	goto yy927;
yy954:
	yych = *++p;
	if (yych == 'F') goto yy958;
	if (yych == 'f') goto yy958;
	goto yy927;
yy955:
	yych = *++p;
	if (yych == 'E') goto yy956;
	if (yych != 'e') goto yy927;
yy956:
	yych = *++p;
	if (yych == 'G') goto yy958;
	if (yych == 'g') goto yy958;
	goto yy927;
yy957:
	yych = *++p;
	if (yych == 'B') goto yy960;
	if (yych == 'b') goto yy960;
	goto yy927;
yy958:
	++p;
	{ return 0; }
yy960:
	yych = *++p;
	if (yych == 'P') goto yy958;
	if (yych == 'p') goto yy958;
	goto yy927;
}

}

// Scans a footnote definition opening.
bufsize_t _scan_footnote_definition(const unsigned char *p)
{
  const unsigned char *marker = NULL;
  const unsigned char *start = p;

{
	unsigned char yych;
	static const unsigned char yybm[] = {
		  0,  64,  64,  64,  64,  64,  64,  64, 
		 64, 128,   0,  64,  64,   0,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		128,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,   0,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		 64,  64,  64,  64,  64,  64,  64,  64, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
		  0,   0,   0,   0,   0,   0,   0,   0, 
	};
	yych = *p;
	if (yych == '[') goto yy965;
	++p;
yy964:
	{ return 0; }
yy965:
	yych = *(marker = ++p);
	if (yych != '^') goto yy964;
	yych = *++p;
	if (yych != ']') goto yy969;
yy967:
	p = marker;
	goto yy964;
yy968:
	yych = *++p;
yy969:
	if (yybm[0+yych] & 64) {
		goto yy968;
	}
	if (yych <= 0xEC) {
		if (yych <= 0xC1) {
			if (yych <= ' ') goto yy967;
			if (yych <= ']') goto yy977;
			goto yy967;
		} else {
			if (yych <= 0xDF) goto yy970;
			if (yych <= 0xE0) goto yy971;
			goto yy972;
		}
	} else {
		if (yych <= 0xF0) {
			if (yych <= 0xED) goto yy973;
			if (yych <= 0xEF) goto yy972;
			goto yy974;
		} else {
			if (yych <= 0xF3) goto yy975;
			if (yych <= 0xF4) goto yy976;
			goto yy967;
		}
	}
yy970:
	yych = *++p;
	if (yych <= 0x7F) goto yy967;
	if (yych <= 0xBF) goto yy968;
	goto yy967;
yy971:
	yych = *++p;
	if (yych <= 0x9F) goto yy967;
	if (yych <= 0xBF) goto yy970;
	goto yy967;
yy972:
	yych = *++p;
	if (yych <= 0x7F) goto yy967;
	if (yych <= 0xBF) goto yy970;
	goto yy967;
yy973:
	yych = *++p;
	if (yych <= 0x7F) goto yy967;
	if (yych <= 0x9F) goto yy970;
	goto yy967;
yy974:
	yych = *++p;
	if (yych <= 0x8F) goto yy967;
	if (yych <= 0xBF) goto yy972;
	goto yy967;
yy975:
	yych = *++p;
	if (yych <= 0x7F) goto yy967;
	if (yych <= 0xBF) goto yy972;
	goto yy967;
yy976:
	yych = *++p;
	if (yych <= 0x7F) goto yy967;
	if (yych <= 0x8F) goto yy972;
	goto yy967;
yy977:
	yych = *++p;
	if (yych != ':') goto yy967;
yy978:
	yych = *++p;
	if (yybm[0+yych] & 128) {
		goto yy978;
	}
	{ return (bufsize_t)(p - start); }
}

}
cmarkgfm/third_party/cmark/src/map.c0000644000175000017500000000547514210444464017701 0ustar  carstencarsten#include "map.h"
#include "utf8.h"
#include "parser.h"

// normalize map label:  collapse internal whitespace to single space,
// remove leading/trailing whitespace, case fold
// Return NULL if the label is actually empty (i.e. composed solely from
// whitespace)
unsigned char *normalize_map_label(cmark_mem *mem, cmark_chunk *ref) {
  cmark_strbuf normalized = CMARK_BUF_INIT(mem);
  unsigned char *result;

  if (ref == NULL)
    return NULL;

  if (ref->len == 0)
    return NULL;

  cmark_utf8proc_case_fold(&normalized, ref->data, ref->len);
  cmark_strbuf_trim(&normalized);
  cmark_strbuf_normalize_whitespace(&normalized);

  result = cmark_strbuf_detach(&normalized);
  assert(result);

  if (result[0] == '\0') {
    mem->free(result);
    return NULL;
  }

  return result;
}

static int
labelcmp(const unsigned char *a, const unsigned char *b) {
  return strcmp((const char *)a, (const char *)b);
}

static int
refcmp(const void *p1, const void *p2) {
  cmark_map_entry *r1 = *(cmark_map_entry **)p1;
  cmark_map_entry *r2 = *(cmark_map_entry **)p2;
  int res = labelcmp(r1->label, r2->label);
  return res ? res : ((int)r1->age - (int)r2->age);
}

static int
refsearch(const void *label, const void *p2) {
  cmark_map_entry *ref = *(cmark_map_entry **)p2;
  return labelcmp((const unsigned char *)label, ref->label);
}

static void sort_map(cmark_map *map) {
  unsigned int i = 0, last = 0, size = map->size;
  cmark_map_entry *r = map->refs, **sorted = NULL;

  sorted = (cmark_map_entry **)map->mem->calloc(size, sizeof(cmark_map_entry *));
  while (r) {
    sorted[i++] = r;
    r = r->next;
  }

  qsort(sorted, size, sizeof(cmark_map_entry *), refcmp);

  for (i = 1; i < size; i++) {
    if (labelcmp(sorted[i]->label, sorted[last]->label) != 0)
      sorted[++last] = sorted[i];
  }

  map->sorted = sorted;
  map->size = last + 1;
}

cmark_map_entry *cmark_map_lookup(cmark_map *map, cmark_chunk *label) {
  cmark_map_entry **ref = NULL;
  unsigned char *norm;

  if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
    return NULL;

  if (map == NULL || !map->size)
    return NULL;

  norm = normalize_map_label(map->mem, label);
  if (norm == NULL)
    return NULL;

  if (!map->sorted)
    sort_map(map);

  ref = (cmark_map_entry **)bsearch(norm, map->sorted, map->size, sizeof(cmark_map_entry *), refsearch);
  map->mem->free(norm);

  if (!ref)
    return NULL;

  return ref[0];
}

void cmark_map_free(cmark_map *map) {
  cmark_map_entry *ref;

  if (map == NULL)
    return;

  ref = map->refs;
  while (ref) {
    cmark_map_entry *next = ref->next;
    map->free(map, ref);
    ref = next;
  }

  map->mem->free(map->sorted);
  map->mem->free(map);
}

cmark_map *cmark_map_new(cmark_mem *mem, cmark_map_free_f free) {
  cmark_map *map = (cmark_map *)mem->calloc(1, sizeof(cmark_map));
  map->mem = mem;
  map->free = free;
  return map;
}
cmarkgfm/third_party/cmark/src/cmark_ctype.h0000644000175000017500000000101314210444464021412 0ustar  carstencarsten#ifndef CMARK_CMARK_CTYPE_H
#define CMARK_CMARK_CTYPE_H

#ifdef __cplusplus
extern "C" {
#endif

#include "cmark-gfm_export.h"

/** Locale-independent versions of functions from ctype.h.
 * We want cmark to behave the same no matter what the system locale.
 */

CMARK_GFM_EXPORT
int cmark_isspace(char c);

CMARK_GFM_EXPORT
int cmark_ispunct(char c);

CMARK_GFM_EXPORT
int cmark_isalnum(char c);

CMARK_GFM_EXPORT
int cmark_isdigit(char c);

CMARK_GFM_EXPORT
int cmark_isalpha(char c);

#ifdef __cplusplus
}
#endif

#endif
cmarkgfm/third_party/cmark/src/houdini.h0000644000175000017500000000301214210444464020551 0ustar  carstencarsten#ifndef CMARK_HOUDINI_H
#define CMARK_HOUDINI_H

#ifdef __cplusplus
extern "C" {
#endif

#include 
#include "config.h"
#include "buffer.h"

#ifdef HAVE___BUILTIN_EXPECT
#define likely(x) __builtin_expect((x), 1)
#define unlikely(x) __builtin_expect((x), 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif

#ifdef HOUDINI_USE_LOCALE
#define _isxdigit(c) isxdigit(c)
#define _isdigit(c) isdigit(c)
#else
/*
 * Helper _isdigit methods -- do not trust the current locale
 * */
#define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
#define _isdigit(c) ((c) >= '0' && (c) <= '9')
#endif

#define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)

CMARK_GFM_EXPORT
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
                                      bufsize_t size);
CMARK_GFM_EXPORT
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
                               bufsize_t size);
CMARK_GFM_EXPORT
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
                                bufsize_t size, int secure);
CMARK_GFM_EXPORT
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
                                 bufsize_t size);
CMARK_GFM_EXPORT
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
                                    bufsize_t size);
CMARK_GFM_EXPORT
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
                               bufsize_t size);

#ifdef __cplusplus
}
#endif

#endif
cmarkgfm/third_party/cmark/src/node.h0000644000175000017500000000506014210444464020044 0ustar  carstencarsten#ifndef CMARK_NODE_H
#define CMARK_NODE_H

#ifdef __cplusplus
extern "C" {
#endif

#include 
#include 

#include "cmark-gfm.h"
#include "cmark-gfm-extension_api.h"
#include "buffer.h"
#include "chunk.h"

typedef struct {
  cmark_list_type list_type;
  int marker_offset;
  int padding;
  int start;
  cmark_delim_type delimiter;
  unsigned char bullet_char;
  bool tight;
  bool checked; // For task list extension
} cmark_list;

typedef struct {
  cmark_chunk info;
  cmark_chunk literal;
  uint8_t fence_length;
  uint8_t fence_offset;
  unsigned char fence_char;
  int8_t fenced;
} cmark_code;

typedef struct {
  int level;
  bool setext;
} cmark_heading;

typedef struct {
  cmark_chunk url;
  cmark_chunk title;
} cmark_link;

typedef struct {
  cmark_chunk on_enter;
  cmark_chunk on_exit;
} cmark_custom;

enum cmark_node__internal_flags {
  CMARK_NODE__OPEN = (1 << 0),
  CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
  CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),
};

struct cmark_node {
  cmark_strbuf content;

  struct cmark_node *next;
  struct cmark_node *prev;
  struct cmark_node *parent;
  struct cmark_node *first_child;
  struct cmark_node *last_child;

  void *user_data;
  cmark_free_func user_data_free_func;

  int start_line;
  int start_column;
  int end_line;
  int end_column;
  int internal_offset;
  uint16_t type;
  uint16_t flags;

  cmark_syntax_extension *extension;

  union {
    int ref_ix;
    int def_count;
  } footnote;

  cmark_node *parent_footnote_def;

  union {
    cmark_chunk literal;
    cmark_list list;
    cmark_code code;
    cmark_heading heading;
    cmark_link link;
    cmark_custom custom;
    int html_block_type;
    void *opaque;
  } as;
};

static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) {
  return node->content.mem;
}
CMARK_GFM_EXPORT int cmark_node_check(cmark_node *node, FILE *out);

static CMARK_INLINE bool CMARK_NODE_TYPE_BLOCK_P(cmark_node_type node_type) {
	return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_BLOCK;
}

static CMARK_INLINE bool CMARK_NODE_BLOCK_P(cmark_node *node) {
	return node != NULL && CMARK_NODE_TYPE_BLOCK_P((cmark_node_type) node->type);
}

static CMARK_INLINE bool CMARK_NODE_TYPE_INLINE_P(cmark_node_type node_type) {
	return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_INLINE;
}

static CMARK_INLINE bool CMARK_NODE_INLINE_P(cmark_node *node) {
	return node != NULL && CMARK_NODE_TYPE_INLINE_P((cmark_node_type) node->type);
}

CMARK_GFM_EXPORT bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type);

#ifdef __cplusplus
}
#endif

#endif
cmarkgfm/third_party/cmark/src/utf8.c0000644000175000017500000002361114210444464020002 0ustar  carstencarsten#include 
#include 
#include 

#include "cmark_ctype.h"
#include "utf8.h"

static const int8_t utf8proc_utf8class[256] = {
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};

static void encode_unknown(cmark_strbuf *buf) {
  static const uint8_t repl[] = {239, 191, 189};
  cmark_strbuf_put(buf, repl, 3);
}

static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
  int length, i;

  if (!str_len)
    return 0;

  length = utf8proc_utf8class[str[0]];

  if (!length)
    return -1;

  if (str_len >= 0 && (bufsize_t)length > str_len)
    return -str_len;

  for (i = 1; i < length; i++) {
    if ((str[i] & 0xC0) != 0x80)
      return -i;
  }

  return length;
}

// Validate a single UTF-8 character according to RFC 3629.
static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
  int length = utf8proc_utf8class[str[0]];

  if (!length)
    return -1;

  if ((bufsize_t)length > str_len)
    return -str_len;

  switch (length) {
  case 2:
    if ((str[1] & 0xC0) != 0x80)
      return -1;
    if (str[0] < 0xC2) {
      // Overlong
      return -length;
    }
    break;

  case 3:
    if ((str[1] & 0xC0) != 0x80)
      return -1;
    if ((str[2] & 0xC0) != 0x80)
      return -2;
    if (str[0] == 0xE0) {
      if (str[1] < 0xA0) {
        // Overlong
        return -length;
      }
    } else if (str[0] == 0xED) {
      if (str[1] >= 0xA0) {
        // Surrogate
        return -length;
      }
    }
    break;

  case 4:
    if ((str[1] & 0xC0) != 0x80)
      return -1;
    if ((str[2] & 0xC0) != 0x80)
      return -2;
    if ((str[3] & 0xC0) != 0x80)
      return -3;
    if (str[0] == 0xF0) {
      if (str[1] < 0x90) {
        // Overlong
        return -length;
      }
    } else if (str[0] >= 0xF4) {
      if (str[0] > 0xF4 || str[1] >= 0x90) {
        // Above 0x10FFFF
        return -length;
      }
    }
    break;
  }

  return length;
}

void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
                          bufsize_t size) {
  bufsize_t i = 0;

  while (i < size) {
    bufsize_t org = i;
    int charlen = 0;

    while (i < size) {
      if (line[i] < 0x80 && line[i] != 0) {
        i++;
      } else if (line[i] >= 0x80) {
        charlen = utf8proc_valid(line + i, size - i);
        if (charlen < 0) {
          charlen = -charlen;
          break;
        }
        i += charlen;
      } else if (line[i] == 0) {
        // ASCII NUL is technically valid but rejected
        // for security reasons.
        charlen = 1;
        break;
      }
    }

    if (i > org) {
      cmark_strbuf_put(ob, line + org, i - org);
    }

    if (i >= size) {
      break;
    } else {
      // Invalid UTF-8
      encode_unknown(ob);
      i += charlen;
    }
  }
}

int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
                           int32_t *dst) {
  int length;
  int32_t uc = -1;

  *dst = -1;
  length = utf8proc_charlen(str, str_len);
  if (length < 0)
    return -1;

  switch (length) {
  case 1:
    uc = str[0];
    break;
  case 2:
    uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
    if (uc < 0x80)
      uc = -1;
    break;
  case 3:
    uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
    if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
      uc = -1;
    break;
  case 4:
    uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
         ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
    if (uc < 0x10000 || uc >= 0x110000)
      uc = -1;
    break;
  }

  if (uc < 0)
    return -1;

  *dst = uc;
  return length;
}

void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
  uint8_t dst[4];
  bufsize_t len = 0;

  assert(uc >= 0);

  if (uc < 0x80) {
    dst[0] = (uint8_t)(uc);
    len = 1;
  } else if (uc < 0x800) {
    dst[0] = (uint8_t)(0xC0 + (uc >> 6));
    dst[1] = 0x80 + (uc & 0x3F);
    len = 2;
  } else if (uc == 0xFFFF) {
    dst[0] = 0xFF;
    len = 1;
  } else if (uc == 0xFFFE) {
    dst[0] = 0xFE;
    len = 1;
  } else if (uc < 0x10000) {
    dst[0] = (uint8_t)(0xE0 + (uc >> 12));
    dst[1] = 0x80 + ((uc >> 6) & 0x3F);
    dst[2] = 0x80 + (uc & 0x3F);
    len = 3;
  } else if (uc < 0x110000) {
    dst[0] = (uint8_t)(0xF0 + (uc >> 18));
    dst[1] = 0x80 + ((uc >> 12) & 0x3F);
    dst[2] = 0x80 + ((uc >> 6) & 0x3F);
    dst[3] = 0x80 + (uc & 0x3F);
    len = 4;
  } else {
    encode_unknown(buf);
    return;
  }

  cmark_strbuf_put(buf, dst, len);
}

void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
                              bufsize_t len) {
  int32_t c;

#define bufpush(x) cmark_utf8proc_encode_char(x, dest)

  while (len > 0) {
    bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);

    if (char_len >= 0) {
#include "case_fold_switch.inc"
    } else {
      encode_unknown(dest);
      char_len = -char_len;
    }

    str += char_len;
    len -= char_len;
  }
}

// matches anything in the Zs class, plus LF, CR, TAB, FF.
int cmark_utf8proc_is_space(int32_t uc) {
  return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
          uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
          uc == 8287 || uc == 12288);
}

// matches anything in the P[cdefios] classes.
int cmark_utf8proc_is_punctuation(int32_t uc) {
  return (
      (uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
      uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
      uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
      uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
      uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
      uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
      (uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
      (uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
      uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
      uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
      uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
      (uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
      (uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
      uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
      (uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
      (uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
      (uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
      uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
      (uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
      (uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
      (uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
      (uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
      (uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
      uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
      (uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
      (uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
      (uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
      (uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
      uc == 11632 || (uc >= 11776 && uc <= 11822) ||
      (uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
      (uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
      uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
      uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
      uc == 42622 || (uc >= 42738 && uc <= 42743) ||
      (uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
      (uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
      uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
      uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
      uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
      uc == 64831 || (uc >= 65040 && uc <= 65049) ||
      (uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
      uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
      (uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
      (uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
      uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
      uc == 65343 || uc == 65371 || uc == 65373 ||
      (uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
      uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
      uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
      (uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
      (uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
      uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
      (uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
      (uc >= 70085 && uc <= 70088) || uc == 70093 ||
      (uc >= 70200 && uc <= 70205) || uc == 70854 ||
      (uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
      (uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
      uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
      uc == 113823);
}
cmarkgfm/third_party/cmark/src/node.c0000644000175000017500000005034514210444464020045 0ustar  carstencarsten#include 
#include 

#include "config.h"
#include "node.h"
#include "syntax_extension.h"

static void S_node_unlink(cmark_node *node);

#define NODE_MEM(node) cmark_node_mem(node)

bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) {
  if (child_type == CMARK_NODE_DOCUMENT) {
      return false;
    }

  if (node->extension && node->extension->can_contain_func) {
    return node->extension->can_contain_func(node->extension, node, child_type) != 0;
  }

  switch (node->type) {
  case CMARK_NODE_DOCUMENT:
  case CMARK_NODE_BLOCK_QUOTE:
  case CMARK_NODE_FOOTNOTE_DEFINITION:
  case CMARK_NODE_ITEM:
    return CMARK_NODE_TYPE_BLOCK_P(child_type) && child_type != CMARK_NODE_ITEM;

  case CMARK_NODE_LIST:
    return child_type == CMARK_NODE_ITEM;

  case CMARK_NODE_CUSTOM_BLOCK:
    return true;

  case CMARK_NODE_PARAGRAPH:
  case CMARK_NODE_HEADING:
  case CMARK_NODE_EMPH:
  case CMARK_NODE_STRONG:
  case CMARK_NODE_LINK:
  case CMARK_NODE_IMAGE:
  case CMARK_NODE_CUSTOM_INLINE:
    return CMARK_NODE_TYPE_INLINE_P(child_type);

  default:
    break;
  }

  return false;
}

static bool S_can_contain(cmark_node *node, cmark_node *child) {
  cmark_node *cur;

  if (node == NULL || child == NULL) {
    return false;
  }
  if (NODE_MEM(node) != NODE_MEM(child)) {
    return 0;
  }

  // Verify that child is not an ancestor of node or equal to node.
  cur = node;
  do {
    if (cur == child) {
      return false;
    }
    cur = cur->parent;
  } while (cur != NULL);

  return cmark_node_can_contain_type(node, (cmark_node_type) child->type);
}

cmark_node *cmark_node_new_with_mem_and_ext(cmark_node_type type, cmark_mem *mem, cmark_syntax_extension *extension) {
  cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node));
  cmark_strbuf_init(mem, &node->content, 0);
  node->type = (uint16_t)type;
  node->extension = extension;

  switch (node->type) {
  case CMARK_NODE_HEADING:
    node->as.heading.level = 1;
    break;

  case CMARK_NODE_LIST: {
    cmark_list *list = &node->as.list;
    list->list_type = CMARK_BULLET_LIST;
    list->start = 0;
    list->tight = false;
    break;
  }

  default:
    break;
  }

  if (node->extension && node->extension->opaque_alloc_func) {
    node->extension->opaque_alloc_func(node->extension, mem, node);
  }

  return node;
}

cmark_node *cmark_node_new_with_ext(cmark_node_type type, cmark_syntax_extension *extension) {
  extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
  return cmark_node_new_with_mem_and_ext(type, &CMARK_DEFAULT_MEM_ALLOCATOR, extension);
}

cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem)
{
  return cmark_node_new_with_mem_and_ext(type, mem, NULL);
}

cmark_node *cmark_node_new(cmark_node_type type) {
  return cmark_node_new_with_ext(type, NULL);
}

static void free_node_as(cmark_node *node) {
  switch (node->type) {
    case CMARK_NODE_CODE_BLOCK:
    cmark_chunk_free(NODE_MEM(node), &node->as.code.info);
    cmark_chunk_free(NODE_MEM(node), &node->as.code.literal);
      break;
    case CMARK_NODE_TEXT:
    case CMARK_NODE_HTML_INLINE:
    case CMARK_NODE_CODE:
    case CMARK_NODE_HTML_BLOCK:
    case CMARK_NODE_FOOTNOTE_REFERENCE:
    case CMARK_NODE_FOOTNOTE_DEFINITION:
    cmark_chunk_free(NODE_MEM(node), &node->as.literal);
      break;
    case CMARK_NODE_LINK:
    case CMARK_NODE_IMAGE:
    cmark_chunk_free(NODE_MEM(node), &node->as.link.url);
    cmark_chunk_free(NODE_MEM(node), &node->as.link.title);
      break;
    case CMARK_NODE_CUSTOM_BLOCK:
    case CMARK_NODE_CUSTOM_INLINE:
    cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_enter);
    cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_exit);
      break;
    default:
      break;
    }
}

// Free a cmark_node list and any children.
static void S_free_nodes(cmark_node *e) {
  cmark_node *next;
  while (e != NULL) {
    cmark_strbuf_free(&e->content);

    if (e->user_data && e->user_data_free_func)
      e->user_data_free_func(NODE_MEM(e), e->user_data);

    if (e->as.opaque && e->extension && e->extension->opaque_free_func)
      e->extension->opaque_free_func(e->extension, NODE_MEM(e), e);

    free_node_as(e);

    if (e->last_child) {
      // Splice children into list
      e->last_child->next = e->next;
      e->next = e->first_child;
    }
    next = e->next;
    NODE_MEM(e)->free(e);
    e = next;
  }
}

void cmark_node_free(cmark_node *node) {
  S_node_unlink(node);
  node->next = NULL;
  S_free_nodes(node);
}

cmark_node_type cmark_node_get_type(cmark_node *node) {
  if (node == NULL) {
    return CMARK_NODE_NONE;
  } else {
    return (cmark_node_type)node->type;
  }
}

int cmark_node_set_type(cmark_node * node, cmark_node_type type) {
  cmark_node_type initial_type;

  if (type == node->type)
    return 1;

  initial_type = (cmark_node_type) node->type;
  node->type = (uint16_t)type;

  if (!S_can_contain(node->parent, node)) {
    node->type = (uint16_t)initial_type;
    return 0;
  }

  /* We rollback the type to free the union members appropriately */
  node->type = (uint16_t)initial_type;
  free_node_as(node);

  node->type = (uint16_t)type;

  return 1;
}

const char *cmark_node_get_type_string(cmark_node *node) {
  if (node == NULL) {
    return "NONE";
  }

  if (node->extension && node->extension->get_type_string_func) {
    return node->extension->get_type_string_func(node->extension, node);
  }

  switch (node->type) {
  case CMARK_NODE_NONE:
    return "none";
  case CMARK_NODE_DOCUMENT:
    return "document";
  case CMARK_NODE_BLOCK_QUOTE:
    return "block_quote";
  case CMARK_NODE_LIST:
    return "list";
  case CMARK_NODE_ITEM:
    return "item";
  case CMARK_NODE_CODE_BLOCK:
    return "code_block";
  case CMARK_NODE_HTML_BLOCK:
    return "html_block";
  case CMARK_NODE_CUSTOM_BLOCK:
    return "custom_block";
  case CMARK_NODE_PARAGRAPH:
    return "paragraph";
  case CMARK_NODE_HEADING:
    return "heading";
  case CMARK_NODE_THEMATIC_BREAK:
    return "thematic_break";
  case CMARK_NODE_TEXT:
    return "text";
  case CMARK_NODE_SOFTBREAK:
    return "softbreak";
  case CMARK_NODE_LINEBREAK:
    return "linebreak";
  case CMARK_NODE_CODE:
    return "code";
  case CMARK_NODE_HTML_INLINE:
    return "html_inline";
  case CMARK_NODE_CUSTOM_INLINE:
    return "custom_inline";
  case CMARK_NODE_EMPH:
    return "emph";
  case CMARK_NODE_STRONG:
    return "strong";
  case CMARK_NODE_LINK:
    return "link";
  case CMARK_NODE_IMAGE:
    return "image";
  }

  return "";
}

cmark_node *cmark_node_next(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  } else {
    return node->next;
  }
}

cmark_node *cmark_node_previous(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  } else {
    return node->prev;
  }
}

cmark_node *cmark_node_parent(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  } else {
    return node->parent;
  }
}

cmark_node *cmark_node_first_child(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  } else {
    return node->first_child;
  }
}

cmark_node *cmark_node_last_child(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  } else {
    return node->last_child;
  }
}

void *cmark_node_get_user_data(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  } else {
    return node->user_data;
  }
}

int cmark_node_set_user_data(cmark_node *node, void *user_data) {
  if (node == NULL) {
    return 0;
  }
  node->user_data = user_data;
  return 1;
}

int cmark_node_set_user_data_free_func(cmark_node *node,
                                        cmark_free_func free_func) {
  if (node == NULL) {
    return 0;
  }
  node->user_data_free_func = free_func;
  return 1;
}

const char *cmark_node_get_literal(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  }

  switch (node->type) {
  case CMARK_NODE_HTML_BLOCK:
  case CMARK_NODE_TEXT:
  case CMARK_NODE_HTML_INLINE:
  case CMARK_NODE_CODE:
  case CMARK_NODE_FOOTNOTE_REFERENCE:
    return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.literal);

  case CMARK_NODE_CODE_BLOCK:
    return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.literal);

  default:
    break;
  }

  return NULL;
}

int cmark_node_set_literal(cmark_node *node, const char *content) {
  if (node == NULL) {
    return 0;
  }

  switch (node->type) {
  case CMARK_NODE_HTML_BLOCK:
  case CMARK_NODE_TEXT:
  case CMARK_NODE_HTML_INLINE:
  case CMARK_NODE_CODE:
  case CMARK_NODE_FOOTNOTE_REFERENCE:
    cmark_chunk_set_cstr(NODE_MEM(node), &node->as.literal, content);
    return 1;

  case CMARK_NODE_CODE_BLOCK:
    cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.literal, content);
    return 1;

  default:
    break;
  }

  return 0;
}

const char *cmark_node_get_string_content(cmark_node *node) {
  return (char *) node->content.ptr;
}

int cmark_node_set_string_content(cmark_node *node, const char *content) {
  cmark_strbuf_sets(&node->content, content);
  return true;
}

int cmark_node_get_heading_level(cmark_node *node) {
  if (node == NULL) {
    return 0;
  }

  switch (node->type) {
  case CMARK_NODE_HEADING:
    return node->as.heading.level;

  default:
    break;
  }

  return 0;
}

int cmark_node_set_heading_level(cmark_node *node, int level) {
  if (node == NULL || level < 1 || level > 6) {
    return 0;
  }

  switch (node->type) {
  case CMARK_NODE_HEADING:
    node->as.heading.level = level;
    return 1;

  default:
    break;
  }

  return 0;
}

cmark_list_type cmark_node_get_list_type(cmark_node *node) {
  if (node == NULL) {
    return CMARK_NO_LIST;
  }

  if (node->type == CMARK_NODE_LIST) {
    return node->as.list.list_type;
  } else {
    return CMARK_NO_LIST;
  }
}

int cmark_node_set_list_type(cmark_node *node, cmark_list_type type) {
  if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) {
    return 0;
  }

  if (node == NULL) {
    return 0;
  }

  if (node->type == CMARK_NODE_LIST) {
    node->as.list.list_type = type;
    return 1;
  } else {
    return 0;
  }
}

cmark_delim_type cmark_node_get_list_delim(cmark_node *node) {
  if (node == NULL) {
    return CMARK_NO_DELIM;
  }

  if (node->type == CMARK_NODE_LIST) {
    return node->as.list.delimiter;
  } else {
    return CMARK_NO_DELIM;
  }
}

int cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim) {
  if (!(delim == CMARK_PERIOD_DELIM || delim == CMARK_PAREN_DELIM)) {
    return 0;
  }

  if (node == NULL) {
    return 0;
  }

  if (node->type == CMARK_NODE_LIST) {
    node->as.list.delimiter = delim;
    return 1;
  } else {
    return 0;
  }
}

int cmark_node_get_list_start(cmark_node *node) {
  if (node == NULL) {
    return 0;
  }

  if (node->type == CMARK_NODE_LIST) {
    return node->as.list.start;
  } else {
    return 0;
  }
}

int cmark_node_set_list_start(cmark_node *node, int start) {
  if (node == NULL || start < 0) {
    return 0;
  }

  if (node->type == CMARK_NODE_LIST) {
    node->as.list.start = start;
    return 1;
  } else {
    return 0;
  }
}

int cmark_node_get_list_tight(cmark_node *node) {
  if (node == NULL) {
    return 0;
  }

  if (node->type == CMARK_NODE_LIST) {
    return node->as.list.tight;
  } else {
    return 0;
  }
}

int cmark_node_set_list_tight(cmark_node *node, int tight) {
  if (node == NULL) {
    return 0;
  }

  if (node->type == CMARK_NODE_LIST) {
    node->as.list.tight = tight == 1;
    return 1;
  } else {
    return 0;
  }
}

const char *cmark_node_get_fence_info(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  }

  if (node->type == CMARK_NODE_CODE_BLOCK) {
    return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.info);
  } else {
    return NULL;
  }
}

int cmark_node_set_fence_info(cmark_node *node, const char *info) {
  if (node == NULL) {
    return 0;
  }

  if (node->type == CMARK_NODE_CODE_BLOCK) {
    cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.info, info);
    return 1;
  } else {
    return 0;
  }
}

int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character) {
  if (node == NULL) {
    return 0;
  }

  if (node->type == CMARK_NODE_CODE_BLOCK) {
    *length = node->as.code.fence_length;
    *offset = node->as.code.fence_offset;
    *character = node->as.code.fence_char;
    return node->as.code.fenced;
  } else {
    return 0;
  }
}

int cmark_node_set_fenced(cmark_node * node, int fenced,
    int length, int offset, char character) {
  if (node == NULL) {
    return 0;
  }

  if (node->type == CMARK_NODE_CODE_BLOCK) {
    node->as.code.fenced = (int8_t)fenced;
    node->as.code.fence_length = (uint8_t)length;
    node->as.code.fence_offset = (uint8_t)offset;
    node->as.code.fence_char = character;
    return 1;
  } else {
    return 0;
  }
}

const char *cmark_node_get_url(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  }

  switch (node->type) {
  case CMARK_NODE_LINK:
  case CMARK_NODE_IMAGE:
    return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.url);
  default:
    break;
  }

  return NULL;
}

int cmark_node_set_url(cmark_node *node, const char *url) {
  if (node == NULL) {
    return 0;
  }

  switch (node->type) {
  case CMARK_NODE_LINK:
  case CMARK_NODE_IMAGE:
    cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.url, url);
    return 1;
  default:
    break;
  }

  return 0;
}

const char *cmark_node_get_title(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  }

  switch (node->type) {
  case CMARK_NODE_LINK:
  case CMARK_NODE_IMAGE:
    return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.title);
  default:
    break;
  }

  return NULL;
}

int cmark_node_set_title(cmark_node *node, const char *title) {
  if (node == NULL) {
    return 0;
  }

  switch (node->type) {
  case CMARK_NODE_LINK:
  case CMARK_NODE_IMAGE:
    cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.title, title);
    return 1;
  default:
    break;
  }

  return 0;
}

const char *cmark_node_get_on_enter(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  }

  switch (node->type) {
  case CMARK_NODE_CUSTOM_INLINE:
  case CMARK_NODE_CUSTOM_BLOCK:
    return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.custom.on_enter);
  default:
    break;
  }

  return NULL;
}

int cmark_node_set_on_enter(cmark_node *node, const char *on_enter) {
  if (node == NULL) {
    return 0;
  }

  switch (node->type) {
  case CMARK_NODE_CUSTOM_INLINE:
  case CMARK_NODE_CUSTOM_BLOCK:
    cmark_chunk_set_cstr(NODE_MEM(node), &node->as.custom.on_enter, on_enter);
    return 1;
  default:
    break;
  }

  return 0;
}

const char *cmark_node_get_on_exit(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  }

  switch (node->type) {
  case CMARK_NODE_CUSTOM_INLINE:
  case CMARK_NODE_CUSTOM_BLOCK:
    return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.custom.on_exit);
  default:
    break;
  }

  return NULL;
}

int cmark_node_set_on_exit(cmark_node *node, const char *on_exit) {
  if (node == NULL) {
    return 0;
  }

  switch (node->type) {
  case CMARK_NODE_CUSTOM_INLINE:
  case CMARK_NODE_CUSTOM_BLOCK:
    cmark_chunk_set_cstr(NODE_MEM(node), &node->as.custom.on_exit, on_exit);
    return 1;
  default:
    break;
  }

  return 0;
}

cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node) {
  if (node == NULL) {
    return NULL;
  }

  return node->extension;
}

int cmark_node_set_syntax_extension(cmark_node *node, cmark_syntax_extension *extension) {
  if (node == NULL) {
    return 0;
  }

  node->extension = extension;
  return 1;
}

int cmark_node_get_start_line(cmark_node *node) {
  if (node == NULL) {
    return 0;
  }
  return node->start_line;
}

int cmark_node_get_start_column(cmark_node *node) {
  if (node == NULL) {
    return 0;
  }
  return node->start_column;
}

int cmark_node_get_end_line(cmark_node *node) {
  if (node == NULL) {
    return 0;
  }
  return node->end_line;
}

int cmark_node_get_end_column(cmark_node *node) {
  if (node == NULL) {
    return 0;
  }
  return node->end_column;
}

// Unlink a node without adjusting its next, prev, and parent pointers.
static void S_node_unlink(cmark_node *node) {
  if (node == NULL) {
    return;
  }

  if (node->prev) {
    node->prev->next = node->next;
  }
  if (node->next) {
    node->next->prev = node->prev;
  }

  // Adjust first_child and last_child of parent.
  cmark_node *parent = node->parent;
  if (parent) {
    if (parent->first_child == node) {
      parent->first_child = node->next;
    }
    if (parent->last_child == node) {
      parent->last_child = node->prev;
    }
  }
}

void cmark_node_unlink(cmark_node *node) {
  S_node_unlink(node);

  node->next = NULL;
  node->prev = NULL;
  node->parent = NULL;
}

int cmark_node_insert_before(cmark_node *node, cmark_node *sibling) {
  if (node == NULL || sibling == NULL) {
    return 0;
  }

  if (!node->parent || !S_can_contain(node->parent, sibling)) {
    return 0;
  }

  S_node_unlink(sibling);

  cmark_node *old_prev = node->prev;

  // Insert 'sibling' between 'old_prev' and 'node'.
  if (old_prev) {
    old_prev->next = sibling;
  }
  sibling->prev = old_prev;
  sibling->next = node;
  node->prev = sibling;

  // Set new parent.
  cmark_node *parent = node->parent;
  sibling->parent = parent;

  // Adjust first_child of parent if inserted as first child.
  if (parent && !old_prev) {
    parent->first_child = sibling;
  }

  return 1;
}

int cmark_node_insert_after(cmark_node *node, cmark_node *sibling) {
  if (node == NULL || sibling == NULL) {
    return 0;
  }

  if (!node->parent || !S_can_contain(node->parent, sibling)) {
    return 0;
  }

  S_node_unlink(sibling);

  cmark_node *old_next = node->next;

  // Insert 'sibling' between 'node' and 'old_next'.
  if (old_next) {
    old_next->prev = sibling;
  }
  sibling->next = old_next;
  sibling->prev = node;
  node->next = sibling;

  // Set new parent.
  cmark_node *parent = node->parent;
  sibling->parent = parent;

  // Adjust last_child of parent if inserted as last child.
  if (parent && !old_next) {
    parent->last_child = sibling;
  }

  return 1;
}

int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode) {
  if (!cmark_node_insert_before(oldnode, newnode)) {
    return 0;
  }
  cmark_node_unlink(oldnode);
  return 1;
}

int cmark_node_prepend_child(cmark_node *node, cmark_node *child) {
  if (!S_can_contain(node, child)) {
    return 0;
  }

  S_node_unlink(child);

  cmark_node *old_first_child = node->first_child;

  child->next = old_first_child;
  child->prev = NULL;
  child->parent = node;
  node->first_child = child;

  if (old_first_child) {
    old_first_child->prev = child;
  } else {
    // Also set last_child if node previously had no children.
    node->last_child = child;
  }

  return 1;
}

int cmark_node_append_child(cmark_node *node, cmark_node *child) {
  if (!S_can_contain(node, child)) {
    return 0;
  }

  S_node_unlink(child);

  cmark_node *old_last_child = node->last_child;

  child->next = NULL;
  child->prev = old_last_child;
  child->parent = node;
  node->last_child = child;

  if (old_last_child) {
    old_last_child->next = child;
  } else {
    // Also set first_child if node previously had no children.
    node->first_child = child;
  }

  return 1;
}

static void S_print_error(FILE *out, cmark_node *node, const char *elem) {
  if (out == NULL) {
    return;
  }
  fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem,
          cmark_node_get_type_string(node), node->start_line,
          node->start_column);
}

int cmark_node_check(cmark_node *node, FILE *out) {
  cmark_node *cur;
  int errors = 0;

  if (!node) {
    return 0;
  }

  cur = node;
  for (;;) {
    if (cur->first_child) {
      if (cur->first_child->prev != NULL) {
        S_print_error(out, cur->first_child, "prev");
        cur->first_child->prev = NULL;
        ++errors;
      }
      if (cur->first_child->parent != cur) {
        S_print_error(out, cur->first_child, "parent");
        cur->first_child->parent = cur;
        ++errors;
      }
      cur = cur->first_child;
      continue;
    }

  next_sibling:
    if (cur == node) {
      break;
    }
    if (cur->next) {
      if (cur->next->prev != cur) {
        S_print_error(out, cur->next, "prev");
        cur->next->prev = cur;
        ++errors;
      }
      if (cur->next->parent != cur->parent) {
        S_print_error(out, cur->next, "parent");
        cur->next->parent = cur->parent;
        ++errors;
      }
      cur = cur->next;
      continue;
    }

    if (cur->parent->last_child != cur) {
      S_print_error(out, cur->parent, "last_child");
      cur->parent->last_child = cur;
      ++errors;
    }
    cur = cur->parent;
    goto next_sibling;
  }

  return errors;
}
cmarkgfm/third_party/cmark/src/arena.c0000644000175000017500000000415514210444464020204 0ustar  carstencarsten#include 
#include 
#include 
#include "cmark-gfm.h"
#include "cmark-gfm-extension_api.h"

static struct arena_chunk {
  size_t sz, used;
  uint8_t push_point;
  void *ptr;
  struct arena_chunk *prev;
} *A = NULL;

static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) {
  struct arena_chunk *c = (struct arena_chunk *)calloc(1, sizeof(*c));
  if (!c)
    abort();
  c->sz = sz;
  c->ptr = calloc(1, sz);
  if (!c->ptr)
    abort();
  c->prev = prev;
  return c;
}

void cmark_arena_push(void) {
  if (!A)
    return;
  A->push_point = 1;
  A = alloc_arena_chunk(10240, A);
}

int cmark_arena_pop(void) {
  if (!A)
    return 0;
  while (A && !A->push_point) {
    free(A->ptr);
    struct arena_chunk *n = A->prev;
    free(A);
    A = n;
  }
  if (A)
    A->push_point = 0;
  return 1;
}

static void init_arena(void) {
  A = alloc_arena_chunk(4 * 1048576, NULL);
}

void cmark_arena_reset(void) {
  while (A) {
    free(A->ptr);
    struct arena_chunk *n = A->prev;
    free(A);
    A = n;
  }
}

static void *arena_calloc(size_t nmem, size_t size) {
  if (!A)
    init_arena();

  size_t sz = nmem * size + sizeof(size_t);

  // Round allocation sizes to largest integer size to
  // ensure returned memory is correctly aligned
  const size_t align = sizeof(size_t) - 1;
  sz = (sz + align) & ~align;

  if (sz > A->sz) {
    A->prev = alloc_arena_chunk(sz, A->prev);
    return (uint8_t *) A->prev->ptr + sizeof(size_t);
  }
  if (sz > A->sz - A->used) {
    A = alloc_arena_chunk(A->sz + A->sz / 2, A);
  }
  void *ptr = (uint8_t *) A->ptr + A->used;
  A->used += sz;
  *((size_t *) ptr) = sz - sizeof(size_t);
  return (uint8_t *) ptr + sizeof(size_t);
}

static void *arena_realloc(void *ptr, size_t size) {
  if (!A)
    init_arena();

  void *new_ptr = arena_calloc(1, size);
  if (ptr)
    memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]);
  return new_ptr;
}

static void arena_free(void *ptr) {
  (void) ptr;
  /* no-op */
}

cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};

cmark_mem *cmark_get_arena_mem_allocator() {
  return &CMARK_ARENA_MEM_ALLOCATOR;
}
cmarkgfm/third_party/cmark/src/references.h0000644000175000017500000000073414210444464021243 0ustar  carstencarsten#ifndef CMARK_REFERENCES_H
#define CMARK_REFERENCES_H

#include "map.h"

#ifdef __cplusplus
extern "C" {
#endif

struct cmark_reference {
  cmark_map_entry entry;
  cmark_chunk url;
  cmark_chunk title;
};

typedef struct cmark_reference cmark_reference;

void cmark_reference_create(cmark_map *map, cmark_chunk *label,
                            cmark_chunk *url, cmark_chunk *title);
cmark_map *cmark_reference_map_new(cmark_mem *mem);

#ifdef __cplusplus
}
#endif

#endif
cmarkgfm/third_party/cmark/src/cmark-gfm_version.h.in0000644000175000017500000000053514210444464023137 0ustar  carstencarsten#ifndef CMARK_GFM_VERSION_H
#define CMARK_GFM_VERSION_H

#define CMARK_GFM_VERSION ((@PROJECT_VERSION_MAJOR@ << 24) | (@PROJECT_VERSION_MINOR@ << 16) | (@PROJECT_VERSION_PATCH@ << 8) | @PROJECT_VERSION_GFM@)
#define CMARK_GFM_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@.gfm.@PROJECT_VERSION_GFM@"

#endif
cmarkgfm/third_party/cmark/src/syntax_extension.c0000644000175000017500000001315214210444464022535 0ustar  carstencarsten#include 
#include 

#include "cmark-gfm.h"
#include "syntax_extension.h"
#include "buffer.h"

extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;

static cmark_mem *_mem = &CMARK_DEFAULT_MEM_ALLOCATOR;

void cmark_syntax_extension_free(cmark_mem *mem, cmark_syntax_extension *extension) {
  if (extension->free_function && extension->priv) {
    extension->free_function(mem, extension->priv);
  }

  cmark_llist_free(mem, extension->special_inline_chars);
  mem->free(extension->name);
  mem->free(extension);
}

cmark_syntax_extension *cmark_syntax_extension_new(const char *name) {
  cmark_syntax_extension *res = (cmark_syntax_extension *) _mem->calloc(1, sizeof(cmark_syntax_extension));
  res->name = (char *) _mem->calloc(1, sizeof(char) * (strlen(name)) + 1);
  strcpy(res->name, name);
  return res;
}

cmark_node_type cmark_syntax_extension_add_node(int is_inline) {
  cmark_node_type *ref = !is_inline ? &CMARK_NODE_LAST_BLOCK : &CMARK_NODE_LAST_INLINE;

  if ((*ref & CMARK_NODE_VALUE_MASK) == CMARK_NODE_VALUE_MASK) {
    assert(false);
    return (cmark_node_type) 0;
  }

  return *ref = (cmark_node_type) ((int) *ref + 1);
}

void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension,
                                         int emphasis) {
  extension->emphasis = emphasis == 1;
}

void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension,
                                                cmark_open_block_func func) {
  extension->try_opening_block = func;
}

void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension,
                                                 cmark_match_block_func func) {
  extension->last_block_matches = func;
}

void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension,
                                                  cmark_match_inline_func func) {
  extension->match_inline = func;
}

void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension,
                                                       cmark_inline_from_delim_func func) {
  extension->insert_inline_from_delim = func;
}

void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension,
                                                     cmark_llist *special_chars) {
  extension->special_inline_chars = special_chars;
}

void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension,
                                                     cmark_get_type_string_func func) {
  extension->get_type_string_func = func;
}

void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension,
                                                 cmark_can_contain_func func) {
  extension->can_contain_func = func;
}

void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension,
                                                      cmark_contains_inlines_func func) {
  extension->contains_inlines_func = func;
}

void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension,
                                                       cmark_common_render_func func) {
  extension->commonmark_render_func = func;
}

void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension,
                                                      cmark_common_render_func func) {
  extension->plaintext_render_func = func;
}

void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension,
                                                  cmark_common_render_func func) {
  extension->latex_render_func = func;
}

void cmark_syntax_extension_set_xml_attr_func(cmark_syntax_extension *extension,
                                              cmark_xml_attr_func func) {
  extension->xml_attr_func = func;
}

void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension,
                                                cmark_common_render_func func) {
  extension->man_render_func = func;
}

void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension,
                                                 cmark_html_render_func func) {
  extension->html_render_func = func;
}

void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension,
                                                 cmark_html_filter_func func) {
  extension->html_filter_func = func;
}

void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension,
                                                 cmark_postprocess_func func) {
  extension->postprocess_func = func;
}

void cmark_syntax_extension_set_private(cmark_syntax_extension *extension,
                                        void *priv,
                                        cmark_free_func free_func) {
  extension->priv = priv;
  extension->free_function = free_func;
}

void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension) {
    return extension->priv;
}

void cmark_syntax_extension_set_opaque_alloc_func(cmark_syntax_extension *extension,
                                                  cmark_opaque_alloc_func func) {
  extension->opaque_alloc_func = func;
}

void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension,
                                                 cmark_opaque_free_func func) {
  extension->opaque_free_func = func;
}

void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension,
                                                       cmark_commonmark_escape_func func) {
  extension->commonmark_escape_func = func;
}
cmarkgfm/third_party/cmark/src/iterator.h0000644000175000017500000000053414210444464020751 0ustar  carstencarsten#ifndef CMARK_ITERATOR_H
#define CMARK_ITERATOR_H

#ifdef __cplusplus
extern "C" {
#endif

#include "cmark-gfm.h"

typedef struct {
  cmark_event_type ev_type;
  cmark_node *node;
} cmark_iter_state;

struct cmark_iter {
  cmark_mem *mem;
  cmark_node *root;
  cmark_iter_state cur;
  cmark_iter_state next;
};

#ifdef __cplusplus
}
#endif

#endif
cmarkgfm/third_party/cmark/src/linked_list.c0000644000175000017500000000140114210444464021406 0ustar  carstencarsten#include 

#include "cmark-gfm.h"

cmark_llist *cmark_llist_append(cmark_mem *mem, cmark_llist *head, void *data) {
  cmark_llist *tmp;
  cmark_llist *new_node = (cmark_llist *) mem->calloc(1, sizeof(cmark_llist));

  new_node->data = data;
  new_node->next = NULL;

  if (!head)
    return new_node;

  for (tmp = head; tmp->next; tmp=tmp->next);

  tmp->next = new_node;

  return head;
}

void cmark_llist_free_full(cmark_mem *mem, cmark_llist *head, cmark_free_func free_func) {
  cmark_llist *tmp, *prev;

  for (tmp = head; tmp;) {
    if (free_func)
      free_func(mem, tmp->data);

    prev = tmp;
    tmp = tmp->next;
    mem->free(prev);
  }
}

void cmark_llist_free(cmark_mem *mem, cmark_llist *head) {
  cmark_llist_free_full(mem, head, NULL);
}
cmarkgfm/third_party/cmark/src/config.h.in0000644000175000017500000000260014210444464020766 0ustar  carstencarsten#ifndef CMARK_CONFIG_H
#define CMARK_CONFIG_H

#ifdef __cplusplus
extern "C" {
#endif

#cmakedefine HAVE_STDBOOL_H

#ifdef HAVE_STDBOOL_H
  #include 
#elif !defined(__cplusplus)
  typedef char bool;
#endif

#cmakedefine HAVE___BUILTIN_EXPECT

#cmakedefine HAVE___ATTRIBUTE__

#ifdef HAVE___ATTRIBUTE__
  #define CMARK_ATTRIBUTE(list) __attribute__ (list)
#else
  #define CMARK_ATTRIBUTE(list)
#endif

#ifndef CMARK_INLINE
  #if defined(_MSC_VER) && !defined(__cplusplus)
    #define CMARK_INLINE __inline
  #else
    #define CMARK_INLINE inline
  #endif
#endif

/* snprintf and vsnprintf fallbacks for MSVC before 2015,
   due to Valentin Milea http://stackoverflow.com/questions/2915672/
*/

#if defined(_MSC_VER) && _MSC_VER < 1900

#include 
#include 

#define snprintf c99_snprintf
#define vsnprintf c99_vsnprintf

CMARK_INLINE int c99_vsnprintf(char *outBuf, size_t size, const char *format, va_list ap)
{
    int count = -1;

    if (size != 0)
        count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap);
    if (count == -1)
        count = _vscprintf(format, ap);

    return count;
}

CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ...)
{
    int count;
    va_list ap;

    va_start(ap, format);
    count = c99_vsnprintf(outBuf, size, format, ap);
    va_end(ap);

    return count;
}

#endif

#ifdef __cplusplus
}
#endif

#endif
cmarkgfm/third_party/cmark/src/html.c0000644000175000017500000003507414210444464020066 0ustar  carstencarsten#include 
#include 
#include 
#include 
#include "cmark_ctype.h"
#include "config.h"
#include "cmark-gfm.h"
#include "houdini.h"
#include "scanners.h"
#include "syntax_extension.h"
#include "html.h"
#include "render.h"

// Functions to convert cmark_nodes to HTML strings.

static void escape_html(cmark_strbuf *dest, const unsigned char *source,
                        bufsize_t length) {
  houdini_escape_html0(dest, source, length, 0);
}

static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size_t len) {
  cmark_strbuf *html = renderer->html;
  cmark_llist *it;
  cmark_syntax_extension *ext;
  bool filtered;
  uint8_t *match;

  while (len) {
    match = (uint8_t *) memchr(data, '<', len);
    if (!match)
      break;

    if (match != data) {
      cmark_strbuf_put(html, data, (bufsize_t)(match - data));
      len -= (match - data);
      data = match;
    }

    filtered = false;
    for (it = renderer->filter_extensions; it; it = it->next) {
      ext = ((cmark_syntax_extension *) it->data);
      if (!ext->html_filter_func(ext, data, len)) {
        filtered = true;
        break;
      }
    }

    if (!filtered) {
      cmark_strbuf_putc(html, '<');
    } else {
      cmark_strbuf_puts(html, "<");
    }

    ++data;
    --len;
  }

  if (len)
    cmark_strbuf_put(html, data, (bufsize_t)len);
}

static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html, cmark_node *node) {
  if (renderer->written_footnote_ix >= renderer->footnote_ix)
    return false;
  renderer->written_footnote_ix = renderer->footnote_ix;

  cmark_strbuf_puts(html, "as.literal.data, node->as.literal.len);
  cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩");

  if (node->footnote.def_count > 1)
  {
    for(int i = 2; i <= node->footnote.def_count; i++) {
      char n[32];
      snprintf(n, sizeof(n), "%d", i);

      cmark_strbuf_puts(html, " as.literal.data, node->as.literal.len);
      cmark_strbuf_puts(html, "-");
      cmark_strbuf_puts(html, n);
      cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩");
      cmark_strbuf_puts(html, n);
      cmark_strbuf_puts(html, "");
    }
  }

  return true;
}

static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
                         cmark_event_type ev_type, int options) {
  cmark_node *parent;
  cmark_node *grandparent;
  cmark_strbuf *html = renderer->html;
  cmark_llist *it;
  cmark_syntax_extension *ext;
  char start_heading[] = "plain == node) { // back at original node
    renderer->plain = NULL;
  }

  if (renderer->plain != NULL) {
    switch (node->type) {
    case CMARK_NODE_TEXT:
    case CMARK_NODE_CODE:
    case CMARK_NODE_HTML_INLINE:
      escape_html(html, node->as.literal.data, node->as.literal.len);
      break;

    case CMARK_NODE_LINEBREAK:
    case CMARK_NODE_SOFTBREAK:
      cmark_strbuf_putc(html, ' ');
      break;

    default:
      break;
    }
    return 1;
  }

  if (node->extension && node->extension->html_render_func) {
    node->extension->html_render_func(node->extension, renderer, node, ev_type, options);
    return 1;
  }

  switch (node->type) {
  case CMARK_NODE_DOCUMENT:
    break;

  case CMARK_NODE_BLOCK_QUOTE:
    if (entering) {
      cmark_html_render_cr(html);
      cmark_strbuf_puts(html, "\n");
    } else {
      cmark_html_render_cr(html);
      cmark_strbuf_puts(html, "\n");
    }
    break;

  case CMARK_NODE_LIST: {
    cmark_list_type list_type = node->as.list.list_type;
    int start = node->as.list.start;

    if (entering) {
      cmark_html_render_cr(html);
      if (list_type == CMARK_BULLET_LIST) {
        cmark_strbuf_puts(html, "\n");
      } else if (start == 1) {
        cmark_strbuf_puts(html, "\n");
      } else {
        snprintf(buffer, BUFFER_SIZE, "
    \n"); } } else { cmark_strbuf_puts(html, list_type == CMARK_BULLET_LIST ? "\n" : "
\n"); } break; } case CMARK_NODE_ITEM: if (entering) { cmark_html_render_cr(html); cmark_strbuf_puts(html, "'); } else { cmark_strbuf_puts(html, "\n"); } break; case CMARK_NODE_HEADING: if (entering) { cmark_html_render_cr(html); start_heading[2] = (char)('0' + node->as.heading.level); cmark_strbuf_puts(html, start_heading); cmark_html_render_sourcepos(node, html, options); cmark_strbuf_putc(html, '>'); } else { end_heading[3] = (char)('0' + node->as.heading.level); cmark_strbuf_puts(html, end_heading); cmark_strbuf_puts(html, ">\n"); } break; case CMARK_NODE_CODE_BLOCK: cmark_html_render_cr(html); if (node->as.code.info.len == 0) { cmark_strbuf_puts(html, ""); } else { bufsize_t first_tag = 0; while (first_tag < node->as.code.info.len && !cmark_isspace(node->as.code.info.data[first_tag])) { first_tag += 1; } if (options & CMARK_OPT_GITHUB_PRE_LANG) { cmark_strbuf_puts(html, "as.code.info.data, first_tag); if (first_tag < node->as.code.info.len && (options & CMARK_OPT_FULL_INFO_STRING)) { cmark_strbuf_puts(html, "\" data-meta=\""); escape_html(html, node->as.code.info.data + first_tag + 1, node->as.code.info.len - first_tag - 1); } cmark_strbuf_puts(html, "\">"); } else { cmark_strbuf_puts(html, "as.code.info.data, first_tag); if (first_tag < node->as.code.info.len && (options & CMARK_OPT_FULL_INFO_STRING)) { cmark_strbuf_puts(html, "\" data-meta=\""); escape_html(html, node->as.code.info.data + first_tag + 1, node->as.code.info.len - first_tag - 1); } cmark_strbuf_puts(html, "\">"); } } escape_html(html, node->as.code.literal.data, node->as.code.literal.len); cmark_strbuf_puts(html, "
\n"); break; case CMARK_NODE_HTML_BLOCK: cmark_html_render_cr(html); if (!(options & CMARK_OPT_UNSAFE)) { cmark_strbuf_puts(html, ""); } else if (renderer->filter_extensions) { filter_html_block(renderer, node->as.literal.data, node->as.literal.len); } else { cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); } cmark_html_render_cr(html); break; case CMARK_NODE_CUSTOM_BLOCK: cmark_html_render_cr(html); if (entering) { cmark_strbuf_put(html, node->as.custom.on_enter.data, node->as.custom.on_enter.len); } else { cmark_strbuf_put(html, node->as.custom.on_exit.data, node->as.custom.on_exit.len); } cmark_html_render_cr(html); break; case CMARK_NODE_THEMATIC_BREAK: cmark_html_render_cr(html); cmark_strbuf_puts(html, "\n"); break; case CMARK_NODE_PARAGRAPH: parent = cmark_node_parent(node); grandparent = cmark_node_parent(parent); if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) { tight = grandparent->as.list.tight; } else { tight = false; } if (!tight) { if (entering) { cmark_html_render_cr(html); cmark_strbuf_puts(html, "'); } else { if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) { cmark_strbuf_putc(html, ' '); S_put_footnote_backref(renderer, html, parent); } cmark_strbuf_puts(html, "

\n"); } } break; case CMARK_NODE_TEXT: escape_html(html, node->as.literal.data, node->as.literal.len); break; case CMARK_NODE_LINEBREAK: cmark_strbuf_puts(html, "
\n"); break; case CMARK_NODE_SOFTBREAK: if (options & CMARK_OPT_HARDBREAKS) { cmark_strbuf_puts(html, "
\n"); } else if (options & CMARK_OPT_NOBREAKS) { cmark_strbuf_putc(html, ' '); } else { cmark_strbuf_putc(html, '\n'); } break; case CMARK_NODE_CODE: cmark_strbuf_puts(html, ""); escape_html(html, node->as.literal.data, node->as.literal.len); cmark_strbuf_puts(html, ""); break; case CMARK_NODE_HTML_INLINE: if (!(options & CMARK_OPT_UNSAFE)) { cmark_strbuf_puts(html, ""); } else { filtered = false; for (it = renderer->filter_extensions; it; it = it->next) { ext = (cmark_syntax_extension *) it->data; if (!ext->html_filter_func(ext, node->as.literal.data, node->as.literal.len)) { filtered = true; break; } } if (!filtered) { cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); } else { cmark_strbuf_puts(html, "<"); cmark_strbuf_put(html, node->as.literal.data + 1, node->as.literal.len - 1); } } break; case CMARK_NODE_CUSTOM_INLINE: if (entering) { cmark_strbuf_put(html, node->as.custom.on_enter.data, node->as.custom.on_enter.len); } else { cmark_strbuf_put(html, node->as.custom.on_exit.data, node->as.custom.on_exit.len); } break; case CMARK_NODE_STRONG: if (entering) { cmark_strbuf_puts(html, ""); } else { cmark_strbuf_puts(html, ""); } break; case CMARK_NODE_EMPH: if (entering) { cmark_strbuf_puts(html, ""); } else { cmark_strbuf_puts(html, ""); } break; case CMARK_NODE_LINK: if (entering) { cmark_strbuf_puts(html, "as.link.url, 0))) { houdini_escape_href(html, node->as.link.url.data, node->as.link.url.len); } if (node->as.link.title.len) { cmark_strbuf_puts(html, "\" title=\""); escape_html(html, node->as.link.title.data, node->as.link.title.len); } cmark_strbuf_puts(html, "\">"); } else { cmark_strbuf_puts(html, ""); } break; case CMARK_NODE_IMAGE: if (entering) { cmark_strbuf_puts(html, "as.link.url, 0))) { houdini_escape_href(html, node->as.link.url.data, node->as.link.url.len); } cmark_strbuf_puts(html, "\" alt=\""); renderer->plain = node; } else { if (node->as.link.title.len) { cmark_strbuf_puts(html, "\" title=\""); escape_html(html, node->as.link.title.data, node->as.link.title.len); } cmark_strbuf_puts(html, "\" />"); } break; case CMARK_NODE_FOOTNOTE_DEFINITION: if (entering) { if (renderer->footnote_ix == 0) { cmark_strbuf_puts(html, "
\n
    \n"); } ++renderer->footnote_ix; cmark_strbuf_puts(html, "
  1. as.literal.data, node->as.literal.len); cmark_strbuf_puts(html, "\">\n"); } else { if (S_put_footnote_backref(renderer, html, node)) { cmark_strbuf_putc(html, '\n'); } cmark_strbuf_puts(html, "
  2. \n"); } break; case CMARK_NODE_FOOTNOTE_REFERENCE: if (entering) { cmark_strbuf_puts(html, "parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len); cmark_strbuf_puts(html, "\" id=\"fnref-"); houdini_escape_href(html, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len); if (node->footnote.ref_ix > 1) { char n[32]; snprintf(n, sizeof(n), "%d", node->footnote.ref_ix); cmark_strbuf_puts(html, "-"); cmark_strbuf_puts(html, n); } cmark_strbuf_puts(html, "\" data-footnote-ref>"); houdini_escape_href(html, node->as.literal.data, node->as.literal.len); cmark_strbuf_puts(html, ""); } break; default: assert(false); break; } return 1; } char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions) { return cmark_render_html_with_mem(root, options, extensions, cmark_node_mem(root)); } char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem) { char *result; cmark_strbuf html = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; cmark_html_renderer renderer = {&html, NULL, NULL, 0, 0, NULL}; cmark_iter *iter = cmark_iter_new(root); for (; extensions; extensions = extensions->next) if (((cmark_syntax_extension *) extensions->data)->html_filter_func) renderer.filter_extensions = cmark_llist_append( mem, renderer.filter_extensions, (cmark_syntax_extension *) extensions->data); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); S_render_node(&renderer, cur, ev_type, options); } if (renderer.footnote_ix) { cmark_strbuf_puts(&html, "
\n
\n"); } result = (char *)cmark_strbuf_detach(&html); cmark_llist_free(mem, renderer.filter_extensions); cmark_iter_free(iter); return result; } cmarkgfm/third_party/cmark/src/references.c0000644000175000017500000000223014210444464021227 0ustar carstencarsten#include "cmark-gfm.h" #include "parser.h" #include "references.h" #include "inlines.h" #include "chunk.h" static void reference_free(cmark_map *map, cmark_map_entry *_ref) { cmark_reference *ref = (cmark_reference *)_ref; cmark_mem *mem = map->mem; if (ref != NULL) { mem->free(ref->entry.label); cmark_chunk_free(mem, &ref->url); cmark_chunk_free(mem, &ref->title); mem->free(ref); } } void cmark_reference_create(cmark_map *map, cmark_chunk *label, cmark_chunk *url, cmark_chunk *title) { cmark_reference *ref; unsigned char *reflabel = normalize_map_label(map->mem, label); /* empty reference name, or composed from only whitespace */ if (reflabel == NULL) return; assert(map->sorted == NULL); ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref)); ref->entry.label = reflabel; ref->url = cmark_clean_url(map->mem, url); ref->title = cmark_clean_title(map->mem, title); ref->entry.age = map->size; ref->entry.next = map->refs; map->refs = (cmark_map_entry *)ref; map->size++; } cmark_map *cmark_reference_map_new(cmark_mem *mem) { return cmark_map_new(mem, reference_free); } cmarkgfm/third_party/cmark/src/plaintext.c0000644000175000017500000001447014210444464021127 0ustar carstencarsten#include "node.h" #include "syntax_extension.h" #include "render.h" #define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping) #define LIT(s) renderer->out(renderer, node, s, false, LITERAL) #define CR() renderer->cr(renderer) #define BLANKLINE() renderer->blankline(renderer) #define LISTMARKER_SIZE 20 // Functions to convert cmark_nodes to plain text strings. static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, cmark_escaping escape, int32_t c, unsigned char nextc) { cmark_render_code_point(renderer, c); } // if node is a block node, returns node. // otherwise returns first block-level node that is an ancestor of node. // if there is no block-level ancestor, returns NULL. static cmark_node *get_containing_block(cmark_node *node) { while (node) { if (CMARK_NODE_BLOCK_P(node)) { return node; } else { node = node->parent; } } return NULL; } static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { cmark_node *tmp; int list_number; cmark_delim_type list_delim; int i; bool entering = (ev_type == CMARK_EVENT_ENTER); char listmarker[LISTMARKER_SIZE]; bool first_in_list_item; bufsize_t marker_width; bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && !(CMARK_OPT_HARDBREAKS & options); // Don't adjust tight list status til we've started the list. // Otherwise we loose the blank line between a paragraph and // a following list. if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { tmp = get_containing_block(node); renderer->in_tight_list_item = tmp && // tmp might be NULL if there is no containing block ((tmp->type == CMARK_NODE_ITEM && cmark_node_get_list_tight(tmp->parent)) || (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM && cmark_node_get_list_tight(tmp->parent->parent))); } if (node->extension && node->extension->plaintext_render_func) { node->extension->plaintext_render_func(node->extension, renderer, node, ev_type, options); return 1; } switch (node->type) { case CMARK_NODE_DOCUMENT: break; case CMARK_NODE_BLOCK_QUOTE: break; case CMARK_NODE_LIST: if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK || node->next->type == CMARK_NODE_LIST)) { CR(); } break; case CMARK_NODE_ITEM: if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { marker_width = 4; } else { list_number = cmark_node_get_list_start(node->parent); list_delim = cmark_node_get_list_delim(node->parent); tmp = node; while (tmp->prev) { tmp = tmp->prev; list_number += 1; } // we ensure a width of at least 4 so // we get nice transition from single digits // to double snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, list_delim == CMARK_PAREN_DELIM ? ")" : ".", list_number < 10 ? " " : " "); marker_width = (bufsize_t)strlen(listmarker); } if (entering) { if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { LIT(" - "); renderer->begin_content = true; } else { LIT(listmarker); renderer->begin_content = true; } for (i = marker_width; i--;) { cmark_strbuf_putc(renderer->prefix, ' '); } } else { cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - marker_width); CR(); } break; case CMARK_NODE_HEADING: if (entering) { renderer->begin_content = true; renderer->no_linebreaks = true; } else { renderer->no_linebreaks = false; BLANKLINE(); } break; case CMARK_NODE_CODE_BLOCK: first_in_list_item = node->prev == NULL && node->parent && node->parent->type == CMARK_NODE_ITEM; if (!first_in_list_item) { BLANKLINE(); } OUT(cmark_node_get_literal(node), false, LITERAL); BLANKLINE(); break; case CMARK_NODE_HTML_BLOCK: break; case CMARK_NODE_CUSTOM_BLOCK: break; case CMARK_NODE_THEMATIC_BREAK: BLANKLINE(); break; case CMARK_NODE_PARAGRAPH: if (!entering) { BLANKLINE(); } break; case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; case CMARK_NODE_LINEBREAK: CR(); break; case CMARK_NODE_SOFTBREAK: if (CMARK_OPT_HARDBREAKS & options) { CR(); } else if (!renderer->no_linebreaks && renderer->width == 0 && !(CMARK_OPT_HARDBREAKS & options) && !(CMARK_OPT_NOBREAKS & options)) { CR(); } else { OUT(" ", allow_wrap, LITERAL); } break; case CMARK_NODE_CODE: OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); break; case CMARK_NODE_HTML_INLINE: break; case CMARK_NODE_CUSTOM_INLINE: break; case CMARK_NODE_STRONG: break; case CMARK_NODE_EMPH: break; case CMARK_NODE_LINK: break; case CMARK_NODE_IMAGE: break; case CMARK_NODE_FOOTNOTE_REFERENCE: if (entering) { LIT("[^"); OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL); LIT("]"); } break; case CMARK_NODE_FOOTNOTE_DEFINITION: if (entering) { renderer->footnote_ix += 1; LIT("[^"); char n[32]; snprintf(n, sizeof(n), "%d", renderer->footnote_ix); OUT(n, false, LITERAL); LIT("]: "); cmark_strbuf_puts(renderer->prefix, " "); } else { cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4); } break; default: assert(false); break; } return 1; } char *cmark_render_plaintext(cmark_node *root, int options, int width) { return cmark_render_plaintext_with_mem(root, options, width, cmark_node_mem(root)); } char *cmark_render_plaintext_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { if (options & CMARK_OPT_HARDBREAKS) { // disable breaking on width, since it has // a different meaning with OPT_HARDBREAKS width = 0; } return cmark_render(mem, root, options, width, outc, S_render_node); } cmarkgfm/third_party/cmark/src/inlines.c0000644000175000017500000014751514210444464020567 0ustar carstencarsten#include #include #include #include "cmark_ctype.h" #include "config.h" #include "node.h" #include "parser.h" #include "references.h" #include "cmark-gfm.h" #include "houdini.h" #include "utf8.h" #include "scanners.h" #include "inlines.h" #include "syntax_extension.h" static const char *EMDASH = "\xE2\x80\x94"; static const char *ENDASH = "\xE2\x80\x93"; static const char *ELLIPSES = "\xE2\x80\xA6"; static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C"; static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D"; static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98"; static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99"; // Macros for creating various kinds of simple. #define make_str(subj, sc, ec, s) make_literal(subj, CMARK_NODE_TEXT, sc, ec, s) #define make_code(subj, sc, ec, s) make_literal(subj, CMARK_NODE_CODE, sc, ec, s) #define make_raw_html(subj, sc, ec, s) make_literal(subj, CMARK_NODE_HTML_INLINE, sc, ec, s) #define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK) #define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK) #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH) #define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG) #define MAXBACKTICKS 80 typedef struct bracket { struct bracket *previous; struct delimiter *previous_delimiter; cmark_node *inl_text; bufsize_t position; bool image; bool active; bool bracket_after; } bracket; typedef struct subject{ cmark_mem *mem; cmark_chunk input; int line; bufsize_t pos; int block_offset; int column_offset; cmark_map *refmap; delimiter *last_delim; bracket *last_bracket; bufsize_t backticks[MAXBACKTICKS + 1]; bool scanned_for_backticks; } subject; // Extensions may populate this. static int8_t SKIP_CHARS[256]; static CMARK_INLINE bool S_is_line_end_char(char c) { return (c == '\n' || c == '\r'); } static delimiter *S_insert_emph(subject *subj, delimiter *opener, delimiter *closer); static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options); static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, cmark_chunk *buffer, cmark_map *refmap); static bufsize_t subject_find_special_char(subject *subj, int options); // Create an inline with a literal string value. static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t, int start_column, int end_column, cmark_chunk s) { cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e)); cmark_strbuf_init(subj->mem, &e->content, 0); e->type = (uint16_t)t; e->as.literal = s; e->start_line = e->end_line = subj->line; // columns are 1 based. e->start_column = start_column + 1 + subj->column_offset + subj->block_offset; e->end_column = end_column + 1 + subj->column_offset + subj->block_offset; return e; } // Create an inline with no value. static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) { cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e)); cmark_strbuf_init(mem, &e->content, 0); e->type = (uint16_t)t; return e; } // Like make_str, but parses entities. static cmark_node *make_str_with_entities(subject *subj, int start_column, int end_column, cmark_chunk *content) { cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem); if (houdini_unescape_html(&unescaped, content->data, content->len)) { return make_str(subj, start_column, end_column, cmark_chunk_buf_detach(&unescaped)); } else { return make_str(subj, start_column, end_column, *content); } } // Duplicate a chunk by creating a copy of the buffer not by reusing the // buffer like cmark_chunk_dup does. static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) { cmark_chunk c; bufsize_t len = src->len; c.len = len; c.data = (unsigned char *)mem->calloc(len + 1, 1); c.alloc = 1; if (len) memcpy(c.data, src->data, len); c.data[len] = '\0'; return c; } static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url, int is_email) { cmark_strbuf buf = CMARK_BUF_INIT(mem); cmark_chunk_trim(url); if (url->len == 0) { cmark_chunk result = CMARK_CHUNK_EMPTY; return result; } if (is_email) cmark_strbuf_puts(&buf, "mailto:"); houdini_unescape_html_f(&buf, url->data, url->len); return cmark_chunk_buf_detach(&buf); } static CMARK_INLINE cmark_node *make_autolink(subject *subj, int start_column, int end_column, cmark_chunk url, int is_email) { cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK); link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email); link->as.link.title = cmark_chunk_literal(""); link->start_line = link->end_line = subj->line; link->start_column = start_column + 1; link->end_column = end_column + 1; cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url)); return link; } static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, cmark_chunk *chunk, cmark_map *refmap) { int i; e->mem = mem; e->input = *chunk; e->line = line_number; e->pos = 0; e->block_offset = block_offset; e->column_offset = 0; e->refmap = refmap; e->last_delim = NULL; e->last_bracket = NULL; for (i = 0; i <= MAXBACKTICKS; i++) { e->backticks[i] = 0; } e->scanned_for_backticks = false; } static CMARK_INLINE int isbacktick(int c) { return (c == '`'); } static CMARK_INLINE unsigned char peek_char_n(subject *subj, bufsize_t n) { // NULL bytes should have been stripped out by now. If they're // present, it's a programming error: assert(!(subj->pos + n < subj->input.len && subj->input.data[subj->pos + n] == 0)); return (subj->pos + n < subj->input.len) ? subj->input.data[subj->pos + n] : 0; } static CMARK_INLINE unsigned char peek_char(subject *subj) { return peek_char_n(subj, 0); } static CMARK_INLINE unsigned char peek_at(subject *subj, bufsize_t pos) { return subj->input.data[pos]; } // Return true if there are more characters in the subject. static CMARK_INLINE int is_eof(subject *subj) { return (subj->pos >= subj->input.len); } // Advance the subject. Doesn't check for eof. #define advance(subj) (subj)->pos += 1 static CMARK_INLINE bool skip_spaces(subject *subj) { bool skipped = false; while (peek_char(subj) == ' ' || peek_char(subj) == '\t') { advance(subj); skipped = true; } return skipped; } static CMARK_INLINE bool skip_line_end(subject *subj) { bool seen_line_end_char = false; if (peek_char(subj) == '\r') { advance(subj); seen_line_end_char = true; } if (peek_char(subj) == '\n') { advance(subj); seen_line_end_char = true; } return seen_line_end_char || is_eof(subj); } // Take characters while a predicate holds, and return a string. static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) { unsigned char c; bufsize_t startpos = subj->pos; bufsize_t len = 0; while ((c = peek_char(subj)) && (*f)(c)) { advance(subj); len++; } return cmark_chunk_dup(&subj->input, startpos, len); } // Return the number of newlines in a given span of text in a subject. If // the number is greater than zero, also return the number of characters // between the last newline and the end of the span in `since_newline`. static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) { int nls = 0; int since_nl = 0; while (len--) { if (subj->input.data[from++] == '\n') { ++nls; since_nl = 0; } else { ++since_nl; } } if (!nls) return 0; *since_newline = since_nl; return nls; } // Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and // `column_offset` according to the number of newlines in a just-matched span // of text in `subj`. static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) { if (!(options & CMARK_OPT_SOURCEPOS)) { return; } int since_newline; int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline); if (newlines) { subj->line += newlines; node->end_line += newlines; node->end_column = since_newline; subj->column_offset = -subj->pos + since_newline + extra; } } // Try to process a backtick code span that began with a // span of ticks of length openticklength length (already // parsed). Return 0 if you don't find matching closing // backticks, otherwise return the position in the subject // after the closing backticks. static bufsize_t scan_to_closing_backticks(subject *subj, bufsize_t openticklength) { bool found = false; if (openticklength > MAXBACKTICKS) { // we limit backtick string length because of the array subj->backticks: return 0; } if (subj->scanned_for_backticks && subj->backticks[openticklength] <= subj->pos) { // return if we already know there's no closer return 0; } while (!found) { // read non backticks unsigned char c; while ((c = peek_char(subj)) && c != '`') { advance(subj); } if (is_eof(subj)) { break; } bufsize_t numticks = 0; while (peek_char(subj) == '`') { advance(subj); numticks++; } // store position of ender if (numticks <= MAXBACKTICKS) { subj->backticks[numticks] = subj->pos - numticks; } if (numticks == openticklength) { return (subj->pos); } } // got through whole input without finding closer subj->scanned_for_backticks = true; return 0; } // Destructively modify string, converting newlines to // spaces, then removing a single leading + trailing space, // unless the code span consists entirely of space characters. static void S_normalize_code(cmark_strbuf *s) { bufsize_t r, w; bool contains_nonspace = false; for (r = 0, w = 0; r < s->size; ++r) { switch (s->ptr[r]) { case '\r': if (s->ptr[r + 1] != '\n') { s->ptr[w++] = ' '; } break; case '\n': s->ptr[w++] = ' '; break; default: s->ptr[w++] = s->ptr[r]; } if (s->ptr[r] != ' ') { contains_nonspace = true; } } // begins and ends with space? if (contains_nonspace && s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') { cmark_strbuf_drop(s, 1); cmark_strbuf_truncate(s, w - 2); } else { cmark_strbuf_truncate(s, w); } } // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. static cmark_node *handle_backticks(subject *subj, int options) { cmark_chunk openticks = take_while(subj, isbacktick); bufsize_t startpos = subj->pos; bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); if (endpos == 0) { // not found subj->pos = startpos; // rewind return make_str(subj, subj->pos, subj->pos, openticks); } else { cmark_strbuf buf = CMARK_BUF_INIT(subj->mem); cmark_strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); S_normalize_code(&buf); cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf)); adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options); return node; } } // Scan ***, **, or * and return number scanned, or 0. // Advances position. static int scan_delims(subject *subj, unsigned char c, bool *can_open, bool *can_close) { int numdelims = 0; bufsize_t before_char_pos, after_char_pos; int32_t after_char = 0; int32_t before_char = 0; int len; bool left_flanking, right_flanking; if (subj->pos == 0) { before_char = 10; } else { before_char_pos = subj->pos - 1; // walk back to the beginning of the UTF_8 sequence: while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) { before_char_pos -= 1; } len = cmark_utf8proc_iterate(subj->input.data + before_char_pos, subj->pos - before_char_pos, &before_char); if (len == -1 || (before_char < 256 && SKIP_CHARS[(unsigned char) before_char])) { before_char = 10; } } if (c == '\'' || c == '"') { numdelims++; advance(subj); // limit to 1 delim for quotes } else { while (peek_char(subj) == c) { numdelims++; advance(subj); } } if (subj->pos == subj->input.len) { after_char = 10; } else { after_char_pos = subj->pos; while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) { after_char_pos += 1; } len = cmark_utf8proc_iterate(subj->input.data + after_char_pos, subj->input.len - after_char_pos, &after_char); if (len == -1 || (after_char < 256 && SKIP_CHARS[(unsigned char) after_char])) { after_char = 10; } } left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && (!cmark_utf8proc_is_punctuation(after_char) || cmark_utf8proc_is_space(before_char) || cmark_utf8proc_is_punctuation(before_char)); right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) && (!cmark_utf8proc_is_punctuation(before_char) || cmark_utf8proc_is_space(after_char) || cmark_utf8proc_is_punctuation(after_char)); if (c == '_') { *can_open = left_flanking && (!right_flanking || cmark_utf8proc_is_punctuation(before_char)); *can_close = right_flanking && (!left_flanking || cmark_utf8proc_is_punctuation(after_char)); } else if (c == '\'' || c == '"') { *can_open = left_flanking && !right_flanking && before_char != ']' && before_char != ')'; *can_close = right_flanking; } else { *can_open = left_flanking; *can_close = right_flanking; } return numdelims; } /* static void print_delimiters(subject *subj) { delimiter *delim; delim = subj->last_delim; while (delim != NULL) { printf("Item at stack pos %p: %d %d %d next(%p) prev(%p)\n", (void*)delim, delim->delim_char, delim->can_open, delim->can_close, (void*)delim->next, (void*)delim->previous); delim = delim->previous; } } */ static void remove_delimiter(subject *subj, delimiter *delim) { if (delim == NULL) return; if (delim->next == NULL) { // end of list: assert(delim == subj->last_delim); subj->last_delim = delim->previous; } else { delim->next->previous = delim->previous; } if (delim->previous != NULL) { delim->previous->next = delim->next; } subj->mem->free(delim); } static void pop_bracket(subject *subj) { bracket *b; if (subj->last_bracket == NULL) return; b = subj->last_bracket; subj->last_bracket = subj->last_bracket->previous; subj->mem->free(b); } static void push_delimiter(subject *subj, unsigned char c, bool can_open, bool can_close, cmark_node *inl_text) { delimiter *delim = (delimiter *)subj->mem->calloc(1, sizeof(delimiter)); delim->delim_char = c; delim->can_open = can_open; delim->can_close = can_close; delim->inl_text = inl_text; delim->length = inl_text->as.literal.len; delim->previous = subj->last_delim; delim->next = NULL; if (delim->previous != NULL) { delim->previous->next = delim; } subj->last_delim = delim; } static void push_bracket(subject *subj, bool image, cmark_node *inl_text) { bracket *b = (bracket *)subj->mem->calloc(1, sizeof(bracket)); if (subj->last_bracket != NULL) { subj->last_bracket->bracket_after = true; } b->image = image; b->active = true; b->inl_text = inl_text; b->previous = subj->last_bracket; b->previous_delimiter = subj->last_delim; b->position = subj->pos; b->bracket_after = false; subj->last_bracket = b; } // Assumes the subject has a c at the current position. static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) { bufsize_t numdelims; cmark_node *inl_text; bool can_open, can_close; cmark_chunk contents; numdelims = scan_delims(subj, c, &can_open, &can_close); if (c == '\'' && smart) { contents = cmark_chunk_literal(RIGHTSINGLEQUOTE); } else if (c == '"' && smart) { contents = cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE); } else { contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims); } inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents); if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) { push_delimiter(subj, c, can_open, can_close, inl_text); } return inl_text; } // Assumes we have a hyphen at the current position. static cmark_node *handle_hyphen(subject *subj, bool smart) { int startpos = subj->pos; advance(subj); if (!smart || peek_char(subj) != '-') { return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-")); } while (smart && peek_char(subj) == '-') { advance(subj); } int numhyphens = subj->pos - startpos; int en_count = 0; int em_count = 0; int i; cmark_strbuf buf = CMARK_BUF_INIT(subj->mem); if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes em_count = numhyphens / 3; } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes en_count = numhyphens / 2; } else if (numhyphens % 3 == 2) { // use one en dash at end en_count = 1; em_count = (numhyphens - 2) / 3; } else { // use two en dashes at the end en_count = 2; em_count = (numhyphens - 4) / 3; } for (i = em_count; i > 0; i--) { cmark_strbuf_puts(&buf, EMDASH); } for (i = en_count; i > 0; i--) { cmark_strbuf_puts(&buf, ENDASH); } return make_str(subj, startpos, subj->pos - 1, cmark_chunk_buf_detach(&buf)); } // Assumes we have a period at the current position. static cmark_node *handle_period(subject *subj, bool smart) { advance(subj); if (smart && peek_char(subj) == '.') { advance(subj); if (peek_char(subj) == '.') { advance(subj); return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES)); } else { return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("..")); } } else { return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal(".")); } } static cmark_syntax_extension *get_extension_for_special_char(cmark_parser *parser, unsigned char c) { cmark_llist *tmp_ext; for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) { cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data; cmark_llist *tmp_char; for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { unsigned char tmp_c = (unsigned char)(size_t)tmp_char->data; if (tmp_c == c) { return ext; } } } return NULL; } static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *stack_bottom) { delimiter *closer = subj->last_delim; delimiter *opener; delimiter *old_closer; bool opener_found; delimiter *openers_bottom[3][128]; int i; // initialize openers_bottom: memset(&openers_bottom, 0, sizeof(openers_bottom)); for (i=0; i < 3; i++) { openers_bottom[i]['*'] = stack_bottom; openers_bottom[i]['_'] = stack_bottom; openers_bottom[i]['\''] = stack_bottom; openers_bottom[i]['"'] = stack_bottom; } // move back to first relevant delim. while (closer != NULL && closer->previous != stack_bottom) { closer = closer->previous; } // now move forward, looking for closers, and handling each while (closer != NULL) { cmark_syntax_extension *extension = get_extension_for_special_char(parser, closer->delim_char); if (closer->can_close) { // Now look backwards for first matching opener: opener = closer->previous; opener_found = false; while (opener != NULL && opener != stack_bottom && opener != openers_bottom[closer->length % 3][closer->delim_char]) { if (opener->can_open && opener->delim_char == closer->delim_char) { // interior closer of size 2 can't match opener of size 1 // or of size 1 can't match 2 if (!(closer->can_open || opener->can_close) || closer->length % 3 == 0 || (opener->length + closer->length) % 3 != 0) { opener_found = true; break; } } opener = opener->previous; } old_closer = closer; if (extension) { if (opener_found) closer = extension->insert_inline_from_delim(extension, parser, subj, opener, closer); else closer = closer->next; } else if (closer->delim_char == '*' || closer->delim_char == '_') { if (opener_found) { closer = S_insert_emph(subj, opener, closer); } else { closer = closer->next; } } else if (closer->delim_char == '\'') { cmark_chunk_free(subj->mem, &closer->inl_text->as.literal); closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE); if (opener_found) { cmark_chunk_free(subj->mem, &opener->inl_text->as.literal); opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE); } closer = closer->next; } else if (closer->delim_char == '"') { cmark_chunk_free(subj->mem, &closer->inl_text->as.literal); closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE); if (opener_found) { cmark_chunk_free(subj->mem, &opener->inl_text->as.literal); opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE); } closer = closer->next; } if (!opener_found) { // set lower bound for future searches for openers openers_bottom[old_closer->length % 3][old_closer->delim_char] = old_closer->previous; if (!old_closer->can_open) { // we can remove a closer that can't be an // opener, once we've seen there's no // matching opener: remove_delimiter(subj, old_closer); } } } else { closer = closer->next; } } // free all delimiters in list until stack_bottom: while (subj->last_delim != NULL && subj->last_delim != stack_bottom) { remove_delimiter(subj, subj->last_delim); } } static delimiter *S_insert_emph(subject *subj, delimiter *opener, delimiter *closer) { delimiter *delim, *tmp_delim; bufsize_t use_delims; cmark_node *opener_inl = opener->inl_text; cmark_node *closer_inl = closer->inl_text; bufsize_t opener_num_chars = opener_inl->as.literal.len; bufsize_t closer_num_chars = closer_inl->as.literal.len; cmark_node *tmp, *tmpnext, *emph; // calculate the actual number of characters used from this closer use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1; // remove used characters from associated inlines. opener_num_chars -= use_delims; closer_num_chars -= use_delims; opener_inl->as.literal.len = opener_num_chars; closer_inl->as.literal.len = closer_num_chars; // free delimiters between opener and closer delim = closer->previous; while (delim != NULL && delim != opener) { tmp_delim = delim->previous; remove_delimiter(subj, delim); delim = tmp_delim; } // create new emph or strong, and splice it in to our inlines // between the opener and closer emph = use_delims == 1 ? make_emph(subj->mem) : make_strong(subj->mem); tmp = opener_inl->next; while (tmp && tmp != closer_inl) { tmpnext = tmp->next; cmark_node_append_child(emph, tmp); tmp = tmpnext; } cmark_node_insert_after(opener_inl, emph); emph->start_line = opener_inl->start_line; emph->end_line = closer_inl->end_line; emph->start_column = opener_inl->start_column; emph->end_column = closer_inl->end_column; // if opener has 0 characters, remove it and its associated inline if (opener_num_chars == 0) { cmark_node_free(opener_inl); remove_delimiter(subj, opener); } // if closer has 0 characters, remove it and its associated inline if (closer_num_chars == 0) { // remove empty closer inline cmark_node_free(closer_inl); // remove closer from list tmp_delim = closer->next; remove_delimiter(subj, closer); closer = tmp_delim; } return closer; } // Parse backslash-escape or just a backslash, returning an inline. static cmark_node *handle_backslash(cmark_parser *parser, subject *subj) { advance(subj); unsigned char nextchar = peek_char(subj); if ((parser->backslash_ispunct ? parser->backslash_ispunct : cmark_ispunct)(nextchar)) { // only ascii symbols and newline can be escaped advance(subj); return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (!is_eof(subj) && skip_line_end(subj)) { return make_linebreak(subj->mem); } else { return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\")); } } // Parse an entity or a regular "&" string. // Assumes the subject has an '&' character at the current position. static cmark_node *handle_entity(subject *subj) { cmark_strbuf ent = CMARK_BUF_INIT(subj->mem); bufsize_t len; advance(subj); len = houdini_unescape_ent(&ent, subj->input.data + subj->pos, subj->input.len - subj->pos); if (len == 0) return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&")); subj->pos += len; return make_str(subj, subj->pos - 1 - len, subj->pos - 1, cmark_chunk_buf_detach(&ent)); } // Clean a URL: remove surrounding whitespace, and remove \ that escape // punctuation. cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) { cmark_strbuf buf = CMARK_BUF_INIT(mem); cmark_chunk_trim(url); if (url->len == 0) { cmark_chunk result = CMARK_CHUNK_EMPTY; return result; } houdini_unescape_html_f(&buf, url->data, url->len); cmark_strbuf_unescape(&buf); return cmark_chunk_buf_detach(&buf); } cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) { cmark_strbuf buf = CMARK_BUF_INIT(mem); unsigned char first, last; if (title->len == 0) { cmark_chunk result = CMARK_CHUNK_EMPTY; return result; } first = title->data[0]; last = title->data[title->len - 1]; // remove surrounding quotes if any: if ((first == '\'' && last == '\'') || (first == '(' && last == ')') || (first == '"' && last == '"')) { houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); } else { houdini_unescape_html_f(&buf, title->data, title->len); } cmark_strbuf_unescape(&buf); return cmark_chunk_buf_detach(&buf); } // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. static cmark_node *handle_pointy_brace(subject *subj, int options) { bufsize_t matchlen = 0; cmark_chunk contents; advance(subj); // advance past first < // first try to match a URL autolink matchlen = scan_autolink_uri(&subj->input, subj->pos); if (matchlen > 0) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0); } // next try to match an email autolink matchlen = scan_autolink_email(&subj->input, subj->pos); if (matchlen > 0) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1); } // finally, try to match an html tag matchlen = scan_html_tag(&subj->input, subj->pos); if (matchlen > 0) { contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents); adjust_subj_node_newlines(subj, node, matchlen, 1, options); return node; } if (options & CMARK_OPT_LIBERAL_HTML_TAG) { matchlen = scan_liberal_html_tag(&subj->input, subj->pos); if (matchlen > 0) { contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents); adjust_subj_node_newlines(subj, node, matchlen, 1, options); return node; } } // if nothing matches, just return the opening <: return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<")); } // Parse a link label. Returns 1 if successful. // Note: unescaped brackets are not allowed in labels. // The label begins with `[` and ends with the first `]` character // encountered. Backticks in labels do not start code spans. static int link_label(subject *subj, cmark_chunk *raw_label) { bufsize_t startpos = subj->pos; int length = 0; unsigned char c; // advance past [ if (peek_char(subj) == '[') { advance(subj); } else { return 0; } while ((c = peek_char(subj)) && c != '[' && c != ']') { if (c == '\\') { advance(subj); length++; if (cmark_ispunct(peek_char(subj))) { advance(subj); length++; } } else { advance(subj); length++; } if (length > MAX_LINK_LABEL_LENGTH) { goto noMatch; } } if (c == ']') { // match found *raw_label = cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); cmark_chunk_trim(raw_label); advance(subj); // advance past ] return 1; } noMatch: subj->pos = startpos; // rewind return 0; } static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset, cmark_chunk *output) { bufsize_t i = offset; size_t nb_p = 0; while (i < input->len) { if (input->data[i] == '\\' && i + 1 < input-> len && cmark_ispunct(input->data[i+1])) i += 2; else if (input->data[i] == '(') { ++nb_p; ++i; if (nb_p > 32) return -1; } else if (input->data[i] == ')') { if (nb_p == 0) break; --nb_p; ++i; } else if (cmark_isspace(input->data[i])) { if (i == offset) { return -1; } break; } else { ++i; } } if (i >= input->len) return -1; { cmark_chunk result = {input->data + offset, i - offset, 0}; *output = result; } return i - offset; } static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset, cmark_chunk *output) { bufsize_t i = offset; if (i < input->len && input->data[i] == '<') { ++i; while (i < input->len) { if (input->data[i] == '>') { ++i; break; } else if (input->data[i] == '\\') i += 2; else if (input->data[i] == '\n' || input->data[i] == '<') return -1; else ++i; } } else { return manual_scan_link_url_2(input, offset, output); } if (i >= input->len) return -1; { cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0}; *output = result; } return i - offset; } // Return a link, an image, or a literal close bracket. static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { bufsize_t initial_pos, after_link_text_pos; bufsize_t endurl, starttitle, endtitle, endall; bufsize_t sps, n; cmark_reference *ref = NULL; cmark_chunk url_chunk, title_chunk; cmark_chunk url, title; bracket *opener; cmark_node *inl; cmark_chunk raw_label; int found_label; cmark_node *tmp, *tmpnext; bool is_image; advance(subj); // advance past ] initial_pos = subj->pos; // get last [ or ![ opener = subj->last_bracket; if (opener == NULL) { return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); } if (!opener->active) { // take delimiter off stack pop_bracket(subj); return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); } // If we got here, we matched a potential link/image text. // Now we check to see if it's a link/image. is_image = opener->image; after_link_text_pos = subj->pos; // First, look for an inline link. if (peek_char(subj) == '(' && ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps, &url_chunk)) > -1)) { // try to parse an explicit link: endurl = subj->pos + 1 + sps + n; starttitle = endurl + scan_spacechars(&subj->input, endurl); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : starttitle + scan_link_title(&subj->input, starttitle); endall = endtitle + scan_spacechars(&subj->input, endtitle); if (peek_at(subj, endall) == ')') { subj->pos = endall + 1; title_chunk = cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle); url = cmark_clean_url(subj->mem, &url_chunk); title = cmark_clean_title(subj->mem, &title_chunk); cmark_chunk_free(subj->mem, &url_chunk); cmark_chunk_free(subj->mem, &title_chunk); goto match; } else { // it could still be a shortcut reference link subj->pos = after_link_text_pos; } } // Next, look for a following [link label] that matches in refmap. // skip spaces raw_label = cmark_chunk_literal(""); found_label = link_label(subj, &raw_label); if (!found_label) { // If we have a shortcut reference link, back up // to before the spacse we skipped. subj->pos = initial_pos; } if ((!found_label || raw_label.len == 0) && !opener->bracket_after) { cmark_chunk_free(subj->mem, &raw_label); raw_label = cmark_chunk_dup(&subj->input, opener->position, initial_pos - opener->position - 1); found_label = true; } if (found_label) { ref = (cmark_reference *)cmark_map_lookup(subj->refmap, &raw_label); cmark_chunk_free(subj->mem, &raw_label); } if (ref != NULL) { // found url = chunk_clone(subj->mem, &ref->url); title = chunk_clone(subj->mem, &ref->title); goto match; } else { goto noMatch; } noMatch: // If we fall through to here, it means we didn't match a link. // What if we're a footnote link? if (parser->options & CMARK_OPT_FOOTNOTES && opener->inl_text->next && opener->inl_text->next->type == CMARK_NODE_TEXT) { cmark_chunk *literal = &opener->inl_text->next->as.literal; // look back to the opening '[', and skip ahead to the next character // if we're looking at a '[^' sequence, and there is other text or nodes // after the ^, let's call it a footnote reference. if ((literal->len > 0 && literal->data[0] == '^') && (literal->len > 1 || opener->inl_text->next->next)) { // Before we got this far, the `handle_close_bracket` function may have // advanced the current state beyond our footnote's actual closing // bracket, ie if it went looking for a `link_label`. // Let's just rewind the subject's position: subj->pos = initial_pos; cmark_node *fnref = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE); // the start and end of the footnote ref is the opening and closing brace // i.e. the subject's current position, and the opener's start_column int fnref_end_column = subj->pos + subj->column_offset + subj->block_offset; int fnref_start_column = opener->inl_text->start_column; // any given node delineates a substring of the line being processed, // with the remainder of the line being pointed to thru its 'literal' // struct member. // here, we copy the literal's pointer, moving it past the '^' character // for a length equal to the size of footnote reference text. // i.e. end_col minus start_col, minus the [ and the ^ characters // // this copies the footnote reference string, even if between the // `opener` and the subject's current position there are other nodes // // (first, check for underflows) if ((fnref_start_column + 2) <= fnref_end_column) { fnref->as.literal = cmark_chunk_dup(literal, 1, (fnref_end_column - fnref_start_column) - 2); } else { fnref->as.literal = cmark_chunk_dup(literal, 1, 0); } fnref->start_line = fnref->end_line = subj->line; fnref->start_column = fnref_start_column; fnref->end_column = fnref_end_column; // we then replace the opener with this new fnref node, the net effect // being replacing the opening '[' text node with a `^footnote-ref]` node. cmark_node_insert_before(opener->inl_text, fnref); process_emphasis(parser, subj, opener->previous_delimiter); // sometimes, the footnote reference text gets parsed into multiple nodes // i.e. '[^example]' parsed into '[', '^exam', 'ple]'. // this happens for ex with the autolink extension. when the autolinker // finds the 'w' character, it will split the text into multiple nodes // in hopes of being able to match a 'www.' substring. // // because this function is called one character at a time via the // `parse_inlines` function, and the current subj->pos is pointing at the // closing ] brace, and because we copy all the text between the [ ] // braces, we should be able to safely ignore and delete any nodes after // the opener->inl_text->next. // // therefore, here we walk thru the list and free them all up cmark_node *next_node; cmark_node *current_node = opener->inl_text->next; while(current_node) { next_node = current_node->next; cmark_node_free(current_node); current_node = next_node; } cmark_node_free(opener->inl_text); pop_bracket(subj); return NULL; } } pop_bracket(subj); // remove this opener from delimiter list subj->pos = initial_pos; return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); match: inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK); inl->as.link.url = url; inl->as.link.title = title; inl->start_line = inl->end_line = subj->line; inl->start_column = opener->inl_text->start_column; inl->end_column = subj->pos + subj->column_offset + subj->block_offset; cmark_node_insert_before(opener->inl_text, inl); // Add link text: tmp = opener->inl_text->next; while (tmp) { tmpnext = tmp->next; cmark_node_append_child(inl, tmp); tmp = tmpnext; } // Free the bracket [: cmark_node_free(opener->inl_text); process_emphasis(parser, subj, opener->previous_delimiter); pop_bracket(subj); // Now, if we have a link, we also want to deactivate earlier link // delimiters. (This code can be removed if we decide to allow links // inside links.) if (!is_image) { opener = subj->last_bracket; while (opener != NULL) { if (!opener->image) { if (!opener->active) { break; } else { opener->active = false; } } opener = opener->previous; } } return NULL; } // Parse a hard or soft linebreak, returning an inline. // Assumes the subject has a cr or newline at the current position. static cmark_node *handle_newline(subject *subj) { bufsize_t nlpos = subj->pos; // skip over cr, crlf, or lf: if (peek_at(subj, subj->pos) == '\r') { advance(subj); } if (peek_at(subj, subj->pos) == '\n') { advance(subj); } ++subj->line; subj->column_offset = -subj->pos; // skip spaces at beginning of line skip_spaces(subj); if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' && peek_at(subj, nlpos - 2) == ' ') { return make_linebreak(subj->mem); } else { return make_softbreak(subj->mem); } } // "\r\n\\`&_*[]pos + 1; while (n < subj->input.len) { if (SPECIAL_CHARS[subj->input.data[n]]) return n; if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]]) return n; n++; } return subj->input.len; } void cmark_inlines_add_special_character(unsigned char c, bool emphasis) { SPECIAL_CHARS[c] = 1; if (emphasis) SKIP_CHARS[c] = 1; } void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) { SPECIAL_CHARS[c] = 0; if (emphasis) SKIP_CHARS[c] = 0; } static cmark_node *try_extensions(cmark_parser *parser, cmark_node *parent, unsigned char c, subject *subj) { cmark_node *res = NULL; cmark_llist *tmp; for (tmp = parser->inline_syntax_extensions; tmp; tmp = tmp->next) { cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; res = ext->match_inline(ext, parser, parent, c, subj); if (res) break; } return res; } // Parse an inline, advancing subject, and add it as a child of parent. // Return 0 if no inline can be parsed, 1 otherwise. static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options) { cmark_node *new_inl = NULL; cmark_chunk contents; unsigned char c; bufsize_t startpos, endpos; c = peek_char(subj); if (c == 0) { return 0; } switch (c) { case '\r': case '\n': new_inl = handle_newline(subj); break; case '`': new_inl = handle_backticks(subj, options); break; case '\\': new_inl = handle_backslash(parser, subj); break; case '&': new_inl = handle_entity(subj); break; case '<': new_inl = handle_pointy_brace(subj, options); break; case '*': case '_': case '\'': case '"': new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0); break; case '-': new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0); break; case '.': new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0); break; case '[': advance(subj); new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("[")); push_bracket(subj, false, new_inl); break; case ']': new_inl = handle_close_bracket(parser, subj); break; case '!': advance(subj); if (peek_char(subj) == '[' && peek_char_n(subj, 1) != '^') { advance(subj); new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("![")); push_bracket(subj, true, new_inl); } else { new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!")); } break; default: new_inl = try_extensions(parser, parent, c, subj); if (new_inl != NULL) break; endpos = subject_find_special_char(subj, options); contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); startpos = subj->pos; subj->pos = endpos; // if we're at a newline, strip trailing spaces. if (S_is_line_end_char(peek_char(subj))) { cmark_chunk_rtrim(&contents); } new_inl = make_str(subj, startpos, endpos - 1, contents); } if (new_inl != NULL) { cmark_node_append_child(parent, new_inl); } return 1; } // Parse inlines from parent's string_content, adding as children of parent. void cmark_parse_inlines(cmark_parser *parser, cmark_node *parent, cmark_map *refmap, int options) { subject subj; cmark_chunk content = {parent->content.ptr, parent->content.size, 0}; subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap); cmark_chunk_rtrim(&subj.input); while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options)) ; process_emphasis(parser, &subj, NULL); // free bracket and delim stack while (subj.last_delim) { remove_delimiter(&subj, subj.last_delim); } while (subj.last_bracket) { pop_bracket(&subj); } } // Parse zero or more space characters, including at most one newline. static void spnl(subject *subj) { skip_spaces(subj); if (skip_line_end(subj)) { skip_spaces(subj); } } // Parse reference. Assumes string begins with '[' character. // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, cmark_map *refmap) { subject subj; cmark_chunk lab; cmark_chunk url; cmark_chunk title; bufsize_t matchlen = 0; bufsize_t beforetitle; subject_from_buf(mem, -1, 0, &subj, input, NULL); // parse label: if (!link_label(&subj, &lab) || lab.len == 0) return 0; // colon: if (peek_char(&subj) == ':') { advance(&subj); } else { return 0; } // parse link url: spnl(&subj); if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1) { subj.pos += matchlen; } else { return 0; } // parse optional link_title beforetitle = subj.pos; spnl(&subj); matchlen = subj.pos == beforetitle ? 0 : scan_link_title(&subj.input, subj.pos); if (matchlen) { title = cmark_chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { subj.pos = beforetitle; title = cmark_chunk_literal(""); } // parse final spaces and newline: skip_spaces(&subj); if (!skip_line_end(&subj)) { if (matchlen) { // try rewinding before title subj.pos = beforetitle; skip_spaces(&subj); if (!skip_line_end(&subj)) { return 0; } } else { return 0; } } // insert reference into refmap cmark_reference_create(refmap, &lab, &url, &title); return subj.pos; } unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser) { return peek_char(parser); } unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, bufsize_t pos) { return peek_at(parser, pos); } int cmark_inline_parser_is_eof(cmark_inline_parser *parser) { return is_eof(parser); } static char * my_strndup (const char *s, size_t n) { char *result; size_t len = strlen (s); if (n < len) len = n; result = (char *) malloc (len + 1); if (!result) return 0; result[len] = '\0'; return (char *) memcpy (result, s, len); } char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred) { unsigned char c; bufsize_t startpos = parser->pos; bufsize_t len = 0; while ((c = peek_char(parser)) && (*pred)(c)) { advance(parser); len++; } return my_strndup((const char *) parser->input.data + startpos, len); } void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser, unsigned char c, int can_open, int can_close, cmark_node *inl_text) { push_delimiter(parser, c, can_open != 0, can_close != 0, inl_text); } void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim) { remove_delimiter(parser, delim); } int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, int max_delims, unsigned char c, int *left_flanking, int *right_flanking, int *punct_before, int *punct_after) { int numdelims = 0; bufsize_t before_char_pos; int32_t after_char = 0; int32_t before_char = 0; int len; bool space_before, space_after; if (parser->pos == 0) { before_char = 10; } else { before_char_pos = parser->pos - 1; // walk back to the beginning of the UTF_8 sequence: while (peek_at(parser, before_char_pos) >> 6 == 2 && before_char_pos > 0) { before_char_pos -= 1; } len = cmark_utf8proc_iterate(parser->input.data + before_char_pos, parser->pos - before_char_pos, &before_char); if (len == -1) { before_char = 10; } } while (peek_char(parser) == c && numdelims < max_delims) { numdelims++; advance(parser); } len = cmark_utf8proc_iterate(parser->input.data + parser->pos, parser->input.len - parser->pos, &after_char); if (len == -1) { after_char = 10; } *punct_before = cmark_utf8proc_is_punctuation(before_char); *punct_after = cmark_utf8proc_is_punctuation(after_char); space_before = cmark_utf8proc_is_space(before_char) != 0; space_after = cmark_utf8proc_is_space(after_char) != 0; *left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && !(*punct_after && !space_before && !*punct_before); *right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) && !(*punct_before && !space_after && !*punct_after); return numdelims; } void cmark_inline_parser_advance_offset(cmark_inline_parser *parser) { advance(parser); } int cmark_inline_parser_get_offset(cmark_inline_parser *parser) { return parser->pos; } void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset) { parser->pos = offset; } int cmark_inline_parser_get_column(cmark_inline_parser *parser) { return parser->pos + 1 + parser->column_offset + parser->block_offset; } cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser) { return &parser->input; } int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image) { for (bracket *b = parser->last_bracket; b; b = b->previous) if (b->active && b->image == (image != 0)) return 1; return 0; } void cmark_node_unput(cmark_node *node, int n) { node = node->last_child; while (n > 0 && node && node->type == CMARK_NODE_TEXT) { if (node->as.literal.len < n) { n -= node->as.literal.len; node->as.literal.len = 0; } else { node->as.literal.len -= n; n = 0; } node = node->prev; } } delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser) { return parser->last_delim; } int cmark_inline_parser_get_line(cmark_inline_parser *parser) { return parser->line; } cmarkgfm/third_party/cmark/src/man.c0000644000175000017500000001411714210444464017670 0ustar carstencarsten#include #include #include #include #include "config.h" #include "cmark-gfm.h" #include "node.h" #include "buffer.h" #include "utf8.h" #include "render.h" #include "syntax_extension.h" #define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping) #define LIT(s) renderer->out(renderer, node, s, false, LITERAL) #define CR() renderer->cr(renderer) #define BLANKLINE() renderer->blankline(renderer) #define LIST_NUMBER_SIZE 20 // Functions to convert cmark_nodes to groff man strings. static void S_outc(cmark_renderer *renderer, cmark_node *node, cmark_escaping escape, int32_t c, unsigned char nextc) { (void)(nextc); if (escape == LITERAL) { cmark_render_code_point(renderer, c); return; } switch (c) { case 46: if (renderer->begin_line) { cmark_render_ascii(renderer, "\\&."); } else { cmark_render_code_point(renderer, c); } break; case 39: if (renderer->begin_line) { cmark_render_ascii(renderer, "\\&'"); } else { cmark_render_code_point(renderer, c); } break; case 45: cmark_render_ascii(renderer, "\\-"); break; case 92: cmark_render_ascii(renderer, "\\e"); break; case 8216: // left single quote cmark_render_ascii(renderer, "\\[oq]"); break; case 8217: // right single quote cmark_render_ascii(renderer, "\\[cq]"); break; case 8220: // left double quote cmark_render_ascii(renderer, "\\[lq]"); break; case 8221: // right double quote cmark_render_ascii(renderer, "\\[rq]"); break; case 8212: // em dash cmark_render_ascii(renderer, "\\[em]"); break; case 8211: // en dash cmark_render_ascii(renderer, "\\[en]"); break; default: cmark_render_code_point(renderer, c); } } static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { cmark_node *tmp; int list_number; bool entering = (ev_type == CMARK_EVENT_ENTER); bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); if (node->extension && node->extension->man_render_func) { node->extension->man_render_func(node->extension, renderer, node, ev_type, options); return 1; } switch (node->type) { case CMARK_NODE_DOCUMENT: if (entering) { /* Define a strikethrough macro */ /* Commenting out because this makes tests fail LIT(".de ST"); CR(); LIT(".nr ww \\w'\\\\$1'"); CR(); LIT("\\Z@\\v'-.25m'\\l'\\\\n[ww]u'@\\\\$1"); CR(); LIT(".."); CR(); */ } break; case CMARK_NODE_BLOCK_QUOTE: if (entering) { CR(); LIT(".RS"); CR(); } else { CR(); LIT(".RE"); CR(); } break; case CMARK_NODE_LIST: break; case CMARK_NODE_ITEM: if (entering) { CR(); LIT(".IP "); if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { LIT("\\[bu] 2"); } else { list_number = cmark_node_get_list_start(node->parent); tmp = node; while (tmp->prev) { tmp = tmp->prev; list_number += 1; } char list_number_s[LIST_NUMBER_SIZE]; snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number); LIT(list_number_s); } CR(); } else { CR(); } break; case CMARK_NODE_HEADING: if (entering) { CR(); LIT(cmark_node_get_heading_level(node) == 1 ? ".SH" : ".SS"); CR(); } else { CR(); } break; case CMARK_NODE_CODE_BLOCK: CR(); LIT(".IP\n.nf\n\\f[C]\n"); OUT(cmark_node_get_literal(node), false, NORMAL); CR(); LIT("\\f[]\n.fi"); CR(); break; case CMARK_NODE_HTML_BLOCK: break; case CMARK_NODE_CUSTOM_BLOCK: CR(); OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), false, LITERAL); CR(); break; case CMARK_NODE_THEMATIC_BREAK: CR(); LIT(".PP\n * * * * *"); CR(); break; case CMARK_NODE_PARAGRAPH: if (entering) { // no blank line if first paragraph in list: if (node->parent && node->parent->type == CMARK_NODE_ITEM && node->prev == NULL) { // no blank line or .PP } else { CR(); LIT(".PP"); CR(); } } else { CR(); } break; case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; case CMARK_NODE_LINEBREAK: LIT(".PD 0\n.P\n.PD"); CR(); break; case CMARK_NODE_SOFTBREAK: if (options & CMARK_OPT_HARDBREAKS) { LIT(".PD 0\n.P\n.PD"); CR(); } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { CR(); } else { OUT(" ", allow_wrap, LITERAL); } break; case CMARK_NODE_CODE: LIT("\\f[C]"); OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); LIT("\\f[]"); break; case CMARK_NODE_HTML_INLINE: break; case CMARK_NODE_CUSTOM_INLINE: OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), false, LITERAL); break; case CMARK_NODE_STRONG: if (entering) { LIT("\\f[B]"); } else { LIT("\\f[]"); } break; case CMARK_NODE_EMPH: if (entering) { LIT("\\f[I]"); } else { LIT("\\f[]"); } break; case CMARK_NODE_LINK: if (!entering) { LIT(" ("); OUT(cmark_node_get_url(node), allow_wrap, URL); LIT(")"); } break; case CMARK_NODE_IMAGE: if (entering) { LIT("[IMAGE: "); } else { LIT("]"); } break; case CMARK_NODE_FOOTNOTE_DEFINITION: case CMARK_NODE_FOOTNOTE_REFERENCE: // TODO break; default: assert(false); break; } return 1; } char *cmark_render_man(cmark_node *root, int options, int width) { return cmark_render_man_with_mem(root, options, width, cmark_node_mem(root)); } char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { return cmark_render(mem, root, options, width, S_outc, S_render_node); } cmarkgfm/third_party/cmark/src/cmark.c0000644000175000017500000000244114210444464020207 0ustar carstencarsten#include #include #include #include "registry.h" #include "node.h" #include "houdini.h" #include "cmark-gfm.h" #include "buffer.h" cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_FOOTNOTE_DEFINITION; cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_FOOTNOTE_REFERENCE; int cmark_version() { return CMARK_GFM_VERSION; } const char *cmark_version_string() { return CMARK_GFM_VERSION_STRING; } static void *xcalloc(size_t nmem, size_t size) { void *ptr = calloc(nmem, size); if (!ptr) { fprintf(stderr, "[cmark] calloc returned null pointer, aborting\n"); abort(); } return ptr; } static void *xrealloc(void *ptr, size_t size) { void *new_ptr = realloc(ptr, size); if (!new_ptr) { fprintf(stderr, "[cmark] realloc returned null pointer, aborting\n"); abort(); } return new_ptr; } static void xfree(void *ptr) { free(ptr); } cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, xfree}; cmark_mem *cmark_get_default_mem_allocator() { return &CMARK_DEFAULT_MEM_ALLOCATOR; } char *cmark_markdown_to_html(const char *text, size_t len, int options) { cmark_node *doc; char *result; doc = cmark_parse_document(text, len, options); result = cmark_render_html(doc, options, NULL); cmark_node_free(doc); return result; } cmarkgfm/third_party/cmark/src/plugin.h0000644000175000017500000000077714210444464020427 0ustar carstencarsten#ifndef CMARK_PLUGIN_H #define CMARK_PLUGIN_H #ifdef __cplusplus extern "C" { #endif #include "cmark-gfm.h" #include "cmark-gfm-extension_api.h" /** * cmark_plugin: * * A plugin structure, which should be filled by plugin's * init functions. */ struct cmark_plugin { cmark_llist *syntax_extensions; }; cmark_llist * cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin); cmark_plugin * cmark_plugin_new(void); void cmark_plugin_free(cmark_plugin *plugin); #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/src/blocks.c0000644000175000017500000014160714210444464020377 0ustar carstencarsten/** * Block parsing implementation. * * For a high-level overview of the block parsing process, * see http://spec.commonmark.org/0.24/#phase-1-block-structure */ #include #include #include #include "cmark_ctype.h" #include "syntax_extension.h" #include "config.h" #include "parser.h" #include "cmark-gfm.h" #include "node.h" #include "references.h" #include "utf8.h" #include "scanners.h" #include "inlines.h" #include "houdini.h" #include "buffer.h" #include "footnotes.h" #define CODE_INDENT 4 #define TAB_STOP 4 #ifndef MIN #define MIN(x, y) ((x < y) ? x : y) #endif #define peek_at(i, n) (i)->data[n] static bool S_last_line_blank(const cmark_node *node) { return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0; } static bool S_last_line_checked(const cmark_node *node) { return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0; } static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) { return (cmark_node_type)node->type; } static void S_set_last_line_blank(cmark_node *node, bool is_blank) { if (is_blank) node->flags |= CMARK_NODE__LAST_LINE_BLANK; else node->flags &= ~CMARK_NODE__LAST_LINE_BLANK; } static void S_set_last_line_checked(cmark_node *node) { node->flags |= CMARK_NODE__LAST_LINE_CHECKED; } static CMARK_INLINE bool S_is_line_end_char(char c) { return (c == '\n' || c == '\r'); } static CMARK_INLINE bool S_is_space_or_tab(char c) { return (c == ' ' || c == '\t'); } static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, bool eof); static void S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes); static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag, int start_line, int start_column) { cmark_node *e; e = (cmark_node *)mem->calloc(1, sizeof(*e)); cmark_strbuf_init(mem, &e->content, 32); e->type = (uint16_t)tag; e->flags = CMARK_NODE__OPEN; e->start_line = start_line; e->start_column = start_column; e->end_line = start_line; return e; } // Create a root document node. static cmark_node *make_document(cmark_mem *mem) { cmark_node *e = make_block(mem, CMARK_NODE_DOCUMENT, 1, 1); return e; } int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension) { parser->syntax_extensions = cmark_llist_append(parser->mem, parser->syntax_extensions, extension); if (extension->match_inline || extension->insert_inline_from_delim) { parser->inline_syntax_extensions = cmark_llist_append( parser->mem, parser->inline_syntax_extensions, extension); } return 1; } static void cmark_parser_dispose(cmark_parser *parser) { if (parser->root) cmark_node_free(parser->root); if (parser->refmap) cmark_map_free(parser->refmap); } static void cmark_parser_reset(cmark_parser *parser) { cmark_llist *saved_exts = parser->syntax_extensions; cmark_llist *saved_inline_exts = parser->inline_syntax_extensions; int saved_options = parser->options; cmark_mem *saved_mem = parser->mem; cmark_parser_dispose(parser); memset(parser, 0, sizeof(cmark_parser)); parser->mem = saved_mem; cmark_strbuf_init(parser->mem, &parser->curline, 256); cmark_strbuf_init(parser->mem, &parser->linebuf, 0); cmark_node *document = make_document(parser->mem); parser->refmap = cmark_reference_map_new(parser->mem); parser->root = document; parser->current = document; parser->syntax_extensions = saved_exts; parser->inline_syntax_extensions = saved_inline_exts; parser->options = saved_options; } cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser)); parser->mem = mem; parser->options = options; cmark_parser_reset(parser); return parser; } cmark_parser *cmark_parser_new(int options) { extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; return cmark_parser_new_with_mem(options, &CMARK_DEFAULT_MEM_ALLOCATOR); } void cmark_parser_free(cmark_parser *parser) { cmark_mem *mem = parser->mem; cmark_parser_dispose(parser); cmark_strbuf_free(&parser->curline); cmark_strbuf_free(&parser->linebuf); cmark_llist_free(parser->mem, parser->syntax_extensions); cmark_llist_free(parser->mem, parser->inline_syntax_extensions); mem->free(parser); } static cmark_node *finalize(cmark_parser *parser, cmark_node *b); // Returns true if line has only space characters, else false. static bool is_blank(cmark_strbuf *s, bufsize_t offset) { while (offset < s->size) { switch (s->ptr[offset]) { case '\r': case '\n': return true; case ' ': offset++; break; case '\t': offset++; break; default: return false; } } return true; } static CMARK_INLINE bool accepts_lines(cmark_node_type block_type) { return (block_type == CMARK_NODE_PARAGRAPH || block_type == CMARK_NODE_HEADING || block_type == CMARK_NODE_CODE_BLOCK); } static CMARK_INLINE bool contains_inlines(cmark_node *node) { if (node->extension && node->extension->contains_inlines_func) { return node->extension->contains_inlines_func(node->extension, node) != 0; } return (node->type == CMARK_NODE_PARAGRAPH || node->type == CMARK_NODE_HEADING); } static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) { int chars_to_tab; int i; assert(node->flags & CMARK_NODE__OPEN); if (parser->partially_consumed_tab) { parser->offset += 1; // skip over tab // add space characters: chars_to_tab = TAB_STOP - (parser->column % TAB_STOP); for (i = 0; i < chars_to_tab; i++) { cmark_strbuf_putc(&node->content, ' '); } } cmark_strbuf_put(&node->content, ch->data + parser->offset, ch->len - parser->offset); } static void remove_trailing_blank_lines(cmark_strbuf *ln) { bufsize_t i; unsigned char c; for (i = ln->size - 1; i >= 0; --i) { c = ln->ptr[i]; if (c != ' ' && c != '\t' && !S_is_line_end_char(c)) break; } if (i < 0) { cmark_strbuf_clear(ln); return; } for (; i < ln->size; ++i) { c = ln->ptr[i]; if (!S_is_line_end_char(c)) continue; cmark_strbuf_truncate(ln, i); break; } } // Check to see if a node ends with a blank line, descending // if needed into lists and sublists. static bool S_ends_with_blank_line(cmark_node *node) { if (S_last_line_checked(node)) { return(S_last_line_blank(node)); } else if ((S_type(node) == CMARK_NODE_LIST || S_type(node) == CMARK_NODE_ITEM) && node->last_child) { S_set_last_line_checked(node); return(S_ends_with_blank_line(node->last_child)); } else { S_set_last_line_checked(node); return (S_last_line_blank(node)); } } // returns true if content remains after link defs are resolved. static bool resolve_reference_link_definitions( cmark_parser *parser, cmark_node *b) { bufsize_t pos; cmark_strbuf *node_content = &b->content; cmark_chunk chunk = {node_content->ptr, node_content->size, 0}; while (chunk.len && chunk.data[0] == '[' && (pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) { chunk.data += pos; chunk.len -= pos; } cmark_strbuf_drop(node_content, (node_content->size - chunk.len)); return !is_blank(&b->content, 0); } static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { bufsize_t pos; cmark_node *item; cmark_node *subitem; cmark_node *parent; bool has_content; parent = b->parent; assert(b->flags & CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks b->flags &= ~CMARK_NODE__OPEN; if (parser->curline.size == 0) { // end of input - line number has not been incremented b->end_line = parser->line_number; b->end_column = parser->last_line_length; } else if (S_type(b) == CMARK_NODE_DOCUMENT || (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) || (S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) { b->end_line = parser->line_number; b->end_column = parser->curline.size; if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n') b->end_column -= 1; if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\r') b->end_column -= 1; } else { b->end_line = parser->line_number - 1; b->end_column = parser->last_line_length; } cmark_strbuf *node_content = &b->content; switch (S_type(b)) { case CMARK_NODE_PARAGRAPH: { has_content = resolve_reference_link_definitions(parser, b); if (!has_content) { // remove blank node (former reference def) cmark_node_free(b); } break; } case CMARK_NODE_CODE_BLOCK: if (!b->as.code.fenced) { // indented code remove_trailing_blank_lines(node_content); cmark_strbuf_putc(node_content, '\n'); } else { // first line of contents becomes info for (pos = 0; pos < node_content->size; ++pos) { if (S_is_line_end_char(node_content->ptr[pos])) break; } assert(pos < node_content->size); cmark_strbuf tmp = CMARK_BUF_INIT(parser->mem); houdini_unescape_html_f(&tmp, node_content->ptr, pos); cmark_strbuf_trim(&tmp); cmark_strbuf_unescape(&tmp); b->as.code.info = cmark_chunk_buf_detach(&tmp); if (node_content->ptr[pos] == '\r') pos += 1; if (node_content->ptr[pos] == '\n') pos += 1; cmark_strbuf_drop(node_content, pos); } b->as.code.literal = cmark_chunk_buf_detach(node_content); break; case CMARK_NODE_HTML_BLOCK: b->as.literal = cmark_chunk_buf_detach(node_content); break; case CMARK_NODE_LIST: // determine tight/loose status b->as.list.tight = true; // tight by default item = b->first_child; while (item) { // check for non-final non-empty list item ending with blank line: if (S_last_line_blank(item) && item->next) { b->as.list.tight = false; break; } // recurse into children of list item, to see if there are // spaces between them: subitem = item->first_child; while (subitem) { if ((item->next || subitem->next) && S_ends_with_blank_line(subitem)) { b->as.list.tight = false; break; } subitem = subitem->next; } if (!(b->as.list.tight)) { break; } item = item->next; } break; default: break; } return parent; } // Add a node as child of another. Return pointer to child. static cmark_node *add_child(cmark_parser *parser, cmark_node *parent, cmark_node_type block_type, int start_column) { assert(parent); // if 'parent' isn't the kind of node that can accept this child, // then back up til we hit a node that can. while (!cmark_node_can_contain_type(parent, block_type)) { parent = finalize(parser, parent); } cmark_node *child = make_block(parser->mem, block_type, parser->line_number, start_column); child->parent = parent; if (parent->last_child) { parent->last_child->next = child; child->prev = parent->last_child; } else { parent->first_child = child; child->prev = NULL; } parent->last_child = child; return child; } void cmark_manage_extensions_special_characters(cmark_parser *parser, int add) { cmark_llist *tmp_ext; for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) { cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data; cmark_llist *tmp_char; for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { unsigned char c = (unsigned char)(size_t)tmp_char->data; if (add) cmark_inlines_add_special_character(c, ext->emphasis); else cmark_inlines_remove_special_character(c, ext->emphasis); } } } // Walk through node and all children, recursively, parsing // string content into inline content where appropriate. static void process_inlines(cmark_parser *parser, cmark_map *refmap, int options) { cmark_iter *iter = cmark_iter_new(parser->root); cmark_node *cur; cmark_event_type ev_type; cmark_manage_extensions_special_characters(parser, true); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER) { if (contains_inlines(cur)) { cmark_parse_inlines(parser, cur, refmap, options); } } } cmark_manage_extensions_special_characters(parser, false); cmark_iter_free(iter); } static int sort_footnote_by_ix(const void *_a, const void *_b) { cmark_footnote *a = *(cmark_footnote **)_a; cmark_footnote *b = *(cmark_footnote **)_b; return (int)a->ix - (int)b->ix; } static void process_footnotes(cmark_parser *parser) { // * Collect definitions in a map. // * Iterate the references in the document in order, assigning indices to // definitions in the order they're seen. // * Write out the footnotes at the bottom of the document in index order. cmark_map *map = cmark_footnote_map_new(parser->mem); cmark_iter *iter = cmark_iter_new(parser->root); cmark_node *cur; cmark_event_type ev_type; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_DEFINITION) { cmark_footnote_create(map, cur); } } cmark_iter_free(iter); iter = cmark_iter_new(parser->root); unsigned int ix = 0; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_REFERENCE) { cmark_footnote *footnote = (cmark_footnote *)cmark_map_lookup(map, &cur->as.literal); if (footnote) { if (!footnote->ix) footnote->ix = ++ix; // store a reference to this footnote reference's footnote definition // this is used by renderers when generating label ids cur->parent_footnote_def = footnote->node; // keep track of a) count of how many times this footnote def has been // referenced, and b) which reference index this footnote ref is at. // this is used by renderers when generating links and backreferences. cur->footnote.ref_ix = ++footnote->node->footnote.def_count; char n[32]; snprintf(n, sizeof(n), "%d", footnote->ix); cmark_chunk_free(parser->mem, &cur->as.literal); cmark_strbuf buf = CMARK_BUF_INIT(parser->mem); cmark_strbuf_puts(&buf, n); cur->as.literal = cmark_chunk_buf_detach(&buf); } else { cmark_node *text = (cmark_node *)parser->mem->calloc(1, sizeof(*text)); cmark_strbuf_init(parser->mem, &text->content, 0); text->type = (uint16_t) CMARK_NODE_TEXT; cmark_strbuf buf = CMARK_BUF_INIT(parser->mem); cmark_strbuf_puts(&buf, "[^"); cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len); cmark_strbuf_putc(&buf, ']'); text->as.literal = cmark_chunk_buf_detach(&buf); cmark_node_insert_after(cur, text); cmark_node_free(cur); } } } cmark_iter_free(iter); if (map->sorted) { qsort(map->sorted, map->size, sizeof(cmark_map_entry *), sort_footnote_by_ix); for (unsigned int i = 0; i < map->size; ++i) { cmark_footnote *footnote = (cmark_footnote *)map->sorted[i]; if (!footnote->ix) { cmark_node_unlink(footnote->node); continue; } cmark_node_append_child(parser->root, footnote->node); footnote->node = NULL; } } cmark_unlink_footnotes_map(map); cmark_map_free(map); } // Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. static bufsize_t parse_list_marker(cmark_mem *mem, cmark_chunk *input, bufsize_t pos, bool interrupts_paragraph, cmark_list **dataptr) { unsigned char c; bufsize_t startpos; cmark_list *data; bufsize_t i; startpos = pos; c = peek_at(input, pos); if (c == '*' || c == '-' || c == '+') { pos++; if (!cmark_isspace(peek_at(input, pos))) { return 0; } if (interrupts_paragraph) { i = pos; // require non-blank content after list marker: while (S_is_space_or_tab(peek_at(input, i))) { i++; } if (peek_at(input, i) == '\n') { return 0; } } data = (cmark_list *)mem->calloc(1, sizeof(*data)); data->marker_offset = 0; // will be adjusted later data->list_type = CMARK_BULLET_LIST; data->bullet_char = c; data->start = 0; data->delimiter = CMARK_NO_DELIM; data->tight = false; } else if (cmark_isdigit(c)) { int start = 0; int digits = 0; do { start = (10 * start) + (peek_at(input, pos) - '0'); pos++; digits++; // We limit to 9 digits to avoid overflow, // assuming max int is 2^31 - 1 // This also seems to be the limit for 'start' in some browsers. } while (digits < 9 && cmark_isdigit(peek_at(input, pos))); if (interrupts_paragraph && start != 1) { return 0; } c = peek_at(input, pos); if (c == '.' || c == ')') { pos++; if (!cmark_isspace(peek_at(input, pos))) { return 0; } if (interrupts_paragraph) { // require non-blank content after list marker: i = pos; while (S_is_space_or_tab(peek_at(input, i))) { i++; } if (S_is_line_end_char(peek_at(input, i))) { return 0; } } data = (cmark_list *)mem->calloc(1, sizeof(*data)); data->marker_offset = 0; // will be adjusted later data->list_type = CMARK_ORDERED_LIST; data->bullet_char = 0; data->start = start; data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM); data->tight = false; } else { return 0; } } else { return 0; } *dataptr = data; return (pos - startpos); } // Return 1 if list item belongs in list, else 0. static int lists_match(cmark_list *list_data, cmark_list *item_data) { return (list_data->list_type == item_data->list_type && list_data->delimiter == item_data->delimiter && // list_data->marker_offset == item_data.marker_offset && list_data->bullet_char == item_data->bullet_char); } static cmark_node *finalize_document(cmark_parser *parser) { while (parser->current != parser->root) { parser->current = finalize(parser, parser->current); } finalize(parser, parser->root); process_inlines(parser, parser->refmap, parser->options); if (parser->options & CMARK_OPT_FOOTNOTES) process_footnotes(parser); return parser->root; } cmark_node *cmark_parse_file(FILE *f, int options) { unsigned char buffer[4096]; cmark_parser *parser = cmark_parser_new(options); size_t bytes; cmark_node *document; while ((bytes = fread(buffer, 1, sizeof(buffer), f)) > 0) { bool eof = bytes < sizeof(buffer); S_parser_feed(parser, buffer, bytes, eof); if (eof) { break; } } document = cmark_parser_finish(parser); cmark_parser_free(parser); return document; } cmark_node *cmark_parse_document(const char *buffer, size_t len, int options) { cmark_parser *parser = cmark_parser_new(options); cmark_node *document; S_parser_feed(parser, (const unsigned char *)buffer, len, true); document = cmark_parser_finish(parser); cmark_parser_free(parser); return document; } void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) { S_parser_feed(parser, (const unsigned char *)buffer, len, false); } void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len) { cmark_strbuf saved_linebuf; cmark_strbuf_init(parser->mem, &saved_linebuf, 0); cmark_strbuf_puts(&saved_linebuf, cmark_strbuf_cstr(&parser->linebuf)); cmark_strbuf_clear(&parser->linebuf); S_parser_feed(parser, (const unsigned char *)buffer, len, true); cmark_strbuf_sets(&parser->linebuf, cmark_strbuf_cstr(&saved_linebuf)); cmark_strbuf_free(&saved_linebuf); } static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, bool eof) { const unsigned char *end = buffer + len; static const uint8_t repl[] = {239, 191, 189}; if (parser->last_buffer_ended_with_cr && *buffer == '\n') { // skip NL if last buffer ended with CR ; see #117 buffer++; } parser->last_buffer_ended_with_cr = false; while (buffer < end) { const unsigned char *eol; bufsize_t chunk_len; bool process = false; for (eol = buffer; eol < end; ++eol) { if (S_is_line_end_char(*eol)) { process = true; break; } if (*eol == '\0' && eol < end) { break; } } if (eol >= end && eof) { process = true; } chunk_len = (bufsize_t)(eol - buffer); if (process) { if (parser->linebuf.size > 0) { cmark_strbuf_put(&parser->linebuf, buffer, chunk_len); S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); cmark_strbuf_clear(&parser->linebuf); } else { S_process_line(parser, buffer, chunk_len); } } else { if (eol < end && *eol == '\0') { // omit NULL byte cmark_strbuf_put(&parser->linebuf, buffer, chunk_len); // add replacement character cmark_strbuf_put(&parser->linebuf, repl, 3); } else { cmark_strbuf_put(&parser->linebuf, buffer, chunk_len); } } buffer += chunk_len; if (buffer < end) { if (*buffer == '\0') { // skip over NULL buffer++; } else { // skip over line ending characters if (*buffer == '\r') { buffer++; if (buffer == end) parser->last_buffer_ended_with_cr = true; } if (buffer < end && *buffer == '\n') buffer++; } } } } static void chop_trailing_hashtags(cmark_chunk *ch) { bufsize_t n, orig_n; cmark_chunk_rtrim(ch); orig_n = n = ch->len - 1; // if string ends in space followed by #s, remove these: while (n >= 0 && peek_at(ch, n) == '#') n--; // Check for a space before the final #s: if (n != orig_n && n >= 0 && S_is_space_or_tab(peek_at(ch, n))) { ch->len = n; cmark_chunk_rtrim(ch); } } // Check for thematic break. On failure, return 0 and update // thematic_break_kill_pos with the index at which the // parse fails. On success, return length of match. // "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input, bufsize_t offset) { bufsize_t i; char c; char nextc = '\0'; int count; i = offset; c = peek_at(input, i); if (!(c == '*' || c == '_' || c == '-')) { parser->thematic_break_kill_pos = i; return 0; } count = 1; while ((nextc = peek_at(input, ++i))) { if (nextc == c) { count++; } else if (nextc != ' ' && nextc != '\t') { break; } } if (count >= 3 && (nextc == '\r' || nextc == '\n')) { return (i - offset) + 1; } else { parser->thematic_break_kill_pos = i; return 0; } } // Find first nonspace character from current offset, setting // parser->first_nonspace, parser->first_nonspace_column, // parser->indent, and parser->blank. Does not advance parser->offset. static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) { char c; int chars_to_tab = TAB_STOP - (parser->column % TAB_STOP); if (parser->first_nonspace <= parser->offset) { parser->first_nonspace = parser->offset; parser->first_nonspace_column = parser->column; while ((c = peek_at(input, parser->first_nonspace))) { if (c == ' ') { parser->first_nonspace += 1; parser->first_nonspace_column += 1; chars_to_tab = chars_to_tab - 1; if (chars_to_tab == 0) { chars_to_tab = TAB_STOP; } } else if (c == '\t') { parser->first_nonspace += 1; parser->first_nonspace_column += chars_to_tab; chars_to_tab = TAB_STOP; } else { break; } } } parser->indent = parser->first_nonspace_column - parser->column; parser->blank = S_is_line_end_char(peek_at(input, parser->first_nonspace)); } // Advance parser->offset and parser->column. parser->offset is the // byte position in input; parser->column is a virtual column number // that takes into account tabs. (Multibyte characters are not taken // into account, because the Markdown line prefixes we are interested in // analyzing are entirely ASCII.) The count parameter indicates // how far to advance the offset. If columns is true, then count // indicates a number of columns; otherwise, a number of bytes. // If advancing a certain number of columns partially consumes // a tab character, parser->partially_consumed_tab is set to true. static void S_advance_offset(cmark_parser *parser, cmark_chunk *input, bufsize_t count, bool columns) { char c; int chars_to_tab; int chars_to_advance; while (count > 0 && (c = peek_at(input, parser->offset))) { if (c == '\t') { chars_to_tab = TAB_STOP - (parser->column % TAB_STOP); if (columns) { parser->partially_consumed_tab = chars_to_tab > count; chars_to_advance = MIN(count, chars_to_tab); parser->column += chars_to_advance; parser->offset += (parser->partially_consumed_tab ? 0 : 1); count -= chars_to_advance; } else { parser->partially_consumed_tab = false; parser->column += chars_to_tab; parser->offset += 1; count -= 1; } } else { parser->partially_consumed_tab = false; parser->offset += 1; parser->column += 1; // assume ascii; block starts are ascii count -= 1; } } } static bool S_last_child_is_open(cmark_node *container) { return container->last_child && (container->last_child->flags & CMARK_NODE__OPEN); } static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) { bool res = false; bufsize_t matched = 0; matched = parser->indent <= 3 && peek_at(input, parser->first_nonspace) == '>'; if (matched) { S_advance_offset(parser, input, parser->indent + 1, true); if (S_is_space_or_tab(peek_at(input, parser->offset))) { S_advance_offset(parser, input, 1, true); } res = true; } return res; } static bool parse_footnote_definition_block_prefix(cmark_parser *parser, cmark_chunk *input, cmark_node *container) { if (parser->indent >= 4) { S_advance_offset(parser, input, 4, true); return true; } else if (input->len > 0 && (input->data[0] == '\n' || (input->data[0] == '\r' && input->data[1] == '\n'))) { return true; } return false; } static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input, cmark_node *container) { bool res = false; if (parser->indent >= container->as.list.marker_offset + container->as.list.padding) { S_advance_offset(parser, input, container->as.list.marker_offset + container->as.list.padding, true); res = true; } else if (parser->blank && container->first_child != NULL) { // if container->first_child is NULL, then the opening line // of the list item was blank after the list marker; in this // case, we are done with the list item. S_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); res = true; } return res; } static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input, cmark_node *container, bool *should_continue) { bool res = false; if (!container->as.code.fenced) { // indented if (parser->indent >= CODE_INDENT) { S_advance_offset(parser, input, CODE_INDENT, true); res = true; } else if (parser->blank) { S_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); res = true; } } else { // fenced bufsize_t matched = 0; if (parser->indent <= 3 && (peek_at(input, parser->first_nonspace) == container->as.code.fence_char)) { matched = scan_close_code_fence(input, parser->first_nonspace); } if (matched >= container->as.code.fence_length) { // closing fence - and since we're at // the end of a line, we can stop processing it: *should_continue = false; S_advance_offset(parser, input, matched, false); parser->current = finalize(parser, container); } else { // skip opt. spaces of fence parser->offset int i = container->as.code.fence_offset; while (i > 0 && S_is_space_or_tab(peek_at(input, parser->offset))) { S_advance_offset(parser, input, 1, true); i--; } res = true; } } return res; } static bool parse_html_block_prefix(cmark_parser *parser, cmark_node *container) { bool res = false; int html_block_type = container->as.html_block_type; assert(html_block_type >= 1 && html_block_type <= 7); switch (html_block_type) { case 1: case 2: case 3: case 4: case 5: // these types of blocks can accept blanks res = true; break; case 6: case 7: res = !parser->blank; break; } return res; } static bool parse_extension_block(cmark_parser *parser, cmark_node *container, cmark_chunk *input) { bool res = false; if (container->extension->last_block_matches) { if (container->extension->last_block_matches( container->extension, parser, input->data, input->len, container)) res = true; } return res; } /** * For each containing node, try to parse the associated line start. * * Will not close unmatched blocks, as we may have a lazy continuation * line -> http://spec.commonmark.org/0.24/#lazy-continuation-line * * Returns: The last matching node, or NULL */ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input, bool *all_matched) { bool should_continue = true; *all_matched = false; cmark_node *container = parser->root; cmark_node_type cont_type; while (S_last_child_is_open(container)) { container = container->last_child; cont_type = S_type(container); S_find_first_nonspace(parser, input); if (container->extension) { if (!parse_extension_block(parser, container, input)) goto done; continue; } switch (cont_type) { case CMARK_NODE_BLOCK_QUOTE: if (!parse_block_quote_prefix(parser, input)) goto done; break; case CMARK_NODE_ITEM: if (!parse_node_item_prefix(parser, input, container)) goto done; break; case CMARK_NODE_CODE_BLOCK: if (!parse_code_block_prefix(parser, input, container, &should_continue)) goto done; break; case CMARK_NODE_HEADING: // a heading can never contain more than one line goto done; case CMARK_NODE_HTML_BLOCK: if (!parse_html_block_prefix(parser, container)) goto done; break; case CMARK_NODE_PARAGRAPH: if (parser->blank) goto done; break; case CMARK_NODE_FOOTNOTE_DEFINITION: if (!parse_footnote_definition_block_prefix(parser, input, container)) goto done; break; default: break; } } *all_matched = true; done: if (!*all_matched) { container = container->parent; // back up to last matching node } if (!should_continue) { container = NULL; } return container; } static void open_new_blocks(cmark_parser *parser, cmark_node **container, cmark_chunk *input, bool all_matched) { bool indented; cmark_list *data = NULL; bool maybe_lazy = S_type(parser->current) == CMARK_NODE_PARAGRAPH; cmark_node_type cont_type = S_type(*container); bufsize_t matched = 0; int lev = 0; bool save_partially_consumed_tab; bool has_content; int save_offset; int save_column; while (cont_type != CMARK_NODE_CODE_BLOCK && cont_type != CMARK_NODE_HTML_BLOCK) { S_find_first_nonspace(parser, input); indented = parser->indent >= CODE_INDENT; if (!indented && peek_at(input, parser->first_nonspace) == '>') { bufsize_t blockquote_startpos = parser->first_nonspace; S_advance_offset(parser, input, parser->first_nonspace + 1 - parser->offset, false); // optional following character if (S_is_space_or_tab(peek_at(input, parser->offset))) { S_advance_offset(parser, input, 1, true); } *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE, blockquote_startpos + 1); } else if (!indented && (matched = scan_atx_heading_start( input, parser->first_nonspace))) { bufsize_t hashpos; int level = 0; bufsize_t heading_startpos = parser->first_nonspace; S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); *container = add_child(parser, *container, CMARK_NODE_HEADING, heading_startpos + 1); hashpos = cmark_chunk_strchr(input, '#', parser->first_nonspace); while (peek_at(input, hashpos) == '#') { level++; hashpos++; } (*container)->as.heading.level = level; (*container)->as.heading.setext = false; (*container)->internal_offset = matched; } else if (!indented && (matched = scan_open_code_fence( input, parser->first_nonspace))) { *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK, parser->first_nonspace + 1); (*container)->as.code.fenced = true; (*container)->as.code.fence_char = peek_at(input, parser->first_nonspace); (*container)->as.code.fence_length = (matched > 255) ? 255 : (uint8_t)matched; (*container)->as.code.fence_offset = (int8_t)(parser->first_nonspace - parser->offset); (*container)->as.code.info = cmark_chunk_literal(""); S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); } else if (!indented && ((matched = scan_html_block_start( input, parser->first_nonspace)) || (cont_type != CMARK_NODE_PARAGRAPH && (matched = scan_html_block_start_7( input, parser->first_nonspace))))) { *container = add_child(parser, *container, CMARK_NODE_HTML_BLOCK, parser->first_nonspace + 1); (*container)->as.html_block_type = matched; // note, we don't adjust parser->offset because the tag is part of the // text } else if (!indented && cont_type == CMARK_NODE_PARAGRAPH && (lev = scan_setext_heading_line(input, parser->first_nonspace))) { // finalize paragraph, resolving reference links has_content = resolve_reference_link_definitions(parser, *container); if (has_content) { (*container)->type = (uint16_t)CMARK_NODE_HEADING; (*container)->as.heading.level = lev; (*container)->as.heading.setext = true; S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } } else if (!indented && !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) && (parser->thematic_break_kill_pos <= parser->first_nonspace) && (matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) { // it's only now that we know the line is not part of a setext heading: *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK, parser->first_nonspace + 1); S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } else if (!indented && parser->options & CMARK_OPT_FOOTNOTES && (matched = scan_footnote_definition(input, parser->first_nonspace))) { cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2); cmark_chunk_to_cstr(parser->mem, &c); while (c.data[c.len - 1] != ']') --c.len; --c.len; S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); *container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1); (*container)->as.literal = c; (*container)->internal_offset = matched; } else if ((!indented || cont_type == CMARK_NODE_LIST) && parser->indent < 4 && (matched = parse_list_marker( parser->mem, input, parser->first_nonspace, (*container)->type == CMARK_NODE_PARAGRAPH, &data))) { // Note that we can have new list items starting with >= 4 // spaces indent, as long as the list container is still open. int i = 0; // compute padding: S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); save_partially_consumed_tab = parser->partially_consumed_tab; save_offset = parser->offset; save_column = parser->column; while (parser->column - save_column <= 5 && S_is_space_or_tab(peek_at(input, parser->offset))) { S_advance_offset(parser, input, 1, true); } i = parser->column - save_column; if (i >= 5 || i < 1 || // only spaces after list marker: S_is_line_end_char(peek_at(input, parser->offset))) { data->padding = matched + 1; parser->offset = save_offset; parser->column = save_column; parser->partially_consumed_tab = save_partially_consumed_tab; if (i > 0) { S_advance_offset(parser, input, 1, true); } } else { data->padding = matched + i; } // check container; if it's a list, see if this list item // can continue the list; otherwise, create a list container. data->marker_offset = parser->indent; if (cont_type != CMARK_NODE_LIST || !lists_match(&((*container)->as.list), data)) { *container = add_child(parser, *container, CMARK_NODE_LIST, parser->first_nonspace + 1); memcpy(&((*container)->as.list), data, sizeof(*data)); } // add the list item *container = add_child(parser, *container, CMARK_NODE_ITEM, parser->first_nonspace + 1); /* TODO: static */ memcpy(&((*container)->as.list), data, sizeof(*data)); parser->mem->free(data); } else if (indented && !maybe_lazy && !parser->blank) { S_advance_offset(parser, input, CODE_INDENT, true); *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK, parser->offset + 1); (*container)->as.code.fenced = false; (*container)->as.code.fence_char = 0; (*container)->as.code.fence_length = 0; (*container)->as.code.fence_offset = 0; (*container)->as.code.info = cmark_chunk_literal(""); } else { cmark_llist *tmp; cmark_node *new_container = NULL; for (tmp = parser->syntax_extensions; tmp; tmp=tmp->next) { cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; if (ext->try_opening_block) { new_container = ext->try_opening_block( ext, indented, parser, *container, input->data, input->len); if (new_container) { *container = new_container; break; } } } if (!new_container) { break; } } if (accepts_lines(S_type(*container))) { // if it's a line container, it can't contain other containers break; } cont_type = S_type(*container); maybe_lazy = false; } } static void add_text_to_container(cmark_parser *parser, cmark_node *container, cmark_node *last_matched_container, cmark_chunk *input) { cmark_node *tmp; // what remains at parser->offset is a text line. add the text to the // appropriate container. S_find_first_nonspace(parser, input); if (parser->blank && container->last_child) S_set_last_line_blank(container->last_child, true); // block quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose // lists or breaking out of lists. we also don't set last_line_blank // on an empty list item. const cmark_node_type ctype = S_type(container); const bool last_line_blank = (parser->blank && ctype != CMARK_NODE_BLOCK_QUOTE && ctype != CMARK_NODE_HEADING && ctype != CMARK_NODE_THEMATIC_BREAK && !(ctype == CMARK_NODE_CODE_BLOCK && container->as.code.fenced) && !(ctype == CMARK_NODE_ITEM && container->first_child == NULL && container->start_line == parser->line_number)); S_set_last_line_blank(container, last_line_blank); tmp = container; while (tmp->parent) { S_set_last_line_blank(tmp->parent, false); tmp = tmp->parent; } // If the last line processed belonged to a paragraph node, // and we didn't match all of the line prefixes for the open containers, // and we didn't start any new containers, // and the line isn't blank, // then treat this as a "lazy continuation line" and add it to // the open paragraph. if (parser->current != last_matched_container && container == last_matched_container && !parser->blank && S_type(parser->current) == CMARK_NODE_PARAGRAPH) { add_line(parser->current, input, parser); } else { // not a lazy continuation // Finalize any blocks that were not matched and set cur to container: while (parser->current != last_matched_container) { parser->current = finalize(parser, parser->current); assert(parser->current != NULL); } if (S_type(container) == CMARK_NODE_CODE_BLOCK) { add_line(container, input, parser); } else if (S_type(container) == CMARK_NODE_HTML_BLOCK) { add_line(container, input, parser); int matches_end_condition; switch (container->as.html_block_type) { case 1: // , ,
matches_end_condition = scan_html_block_end_1(input, parser->first_nonspace); break; case 2: // --> matches_end_condition = scan_html_block_end_2(input, parser->first_nonspace); break; case 3: // ?> matches_end_condition = scan_html_block_end_3(input, parser->first_nonspace); break; case 4: // > matches_end_condition = scan_html_block_end_4(input, parser->first_nonspace); break; case 5: // ]]> matches_end_condition = scan_html_block_end_5(input, parser->first_nonspace); break; default: matches_end_condition = 0; break; } if (matches_end_condition) { container = finalize(parser, container); assert(parser->current != NULL); } } else if (parser->blank) { // ??? do nothing } else if (accepts_lines(S_type(container))) { if (S_type(container) == CMARK_NODE_HEADING && container->as.heading.setext == false) { chop_trailing_hashtags(input); } S_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); add_line(container, input, parser); } else { // create paragraph container for line container = add_child(parser, container, CMARK_NODE_PARAGRAPH, parser->first_nonspace + 1); S_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); add_line(container, input, parser); } parser->current = container; } } /* See http://spec.commonmark.org/0.24/#phase-1-block-structure */ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes) { cmark_node *last_matched_container; bool all_matched = true; cmark_node *container; cmark_chunk input; cmark_node *current; cmark_strbuf_clear(&parser->curline); if (parser->options & CMARK_OPT_VALIDATE_UTF8) cmark_utf8proc_check(&parser->curline, buffer, bytes); else cmark_strbuf_put(&parser->curline, buffer, bytes); bytes = parser->curline.size; // ensure line ends with a newline: if (bytes == 0 || !S_is_line_end_char(parser->curline.ptr[bytes - 1])) cmark_strbuf_putc(&parser->curline, '\n'); parser->offset = 0; parser->column = 0; parser->first_nonspace = 0; parser->first_nonspace_column = 0; parser->thematic_break_kill_pos = 0; parser->indent = 0; parser->blank = false; parser->partially_consumed_tab = false; input.data = parser->curline.ptr; input.len = parser->curline.size; input.alloc = 0; // Skip UTF-8 BOM. if (parser->line_number == 0 && input.len >= 3 && memcmp(input.data, "\xef\xbb\xbf", 3) == 0) parser->offset += 3; parser->line_number++; last_matched_container = check_open_blocks(parser, &input, &all_matched); if (!last_matched_container) goto finished; container = last_matched_container; current = parser->current; open_new_blocks(parser, &container, &input, all_matched); /* parser->current might have changed if feed_reentrant was called */ if (current == parser->current) add_text_to_container(parser, container, last_matched_container, &input); finished: parser->last_line_length = input.len; if (parser->last_line_length && input.data[parser->last_line_length - 1] == '\n') parser->last_line_length -= 1; if (parser->last_line_length && input.data[parser->last_line_length - 1] == '\r') parser->last_line_length -= 1; cmark_strbuf_clear(&parser->curline); } cmark_node *cmark_parser_finish(cmark_parser *parser) { cmark_node *res; cmark_llist *extensions; /* Parser was already finished once */ if (parser->root == NULL) return NULL; if (parser->linebuf.size) { S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); cmark_strbuf_clear(&parser->linebuf); } finalize_document(parser); cmark_consolidate_text_nodes(parser->root); cmark_strbuf_free(&parser->curline); cmark_strbuf_free(&parser->linebuf); #if CMARK_DEBUG_NODES if (cmark_node_check(parser->root, stderr)) { abort(); } #endif for (extensions = parser->syntax_extensions; extensions; extensions = extensions->next) { cmark_syntax_extension *ext = (cmark_syntax_extension *) extensions->data; if (ext->postprocess_func) { cmark_node *processed = ext->postprocess_func(ext, parser, parser->root); if (processed) parser->root = processed; } } res = parser->root; parser->root = NULL; cmark_parser_reset(parser); return res; } int cmark_parser_get_line_number(cmark_parser *parser) { return parser->line_number; } bufsize_t cmark_parser_get_offset(cmark_parser *parser) { return parser->offset; } bufsize_t cmark_parser_get_column(cmark_parser *parser) { return parser->column; } int cmark_parser_get_first_nonspace(cmark_parser *parser) { return parser->first_nonspace; } int cmark_parser_get_first_nonspace_column(cmark_parser *parser) { return parser->first_nonspace_column; } int cmark_parser_get_indent(cmark_parser *parser) { return parser->indent; } int cmark_parser_is_blank(cmark_parser *parser) { return parser->blank; } int cmark_parser_has_partially_consumed_tab(cmark_parser *parser) { return parser->partially_consumed_tab; } int cmark_parser_get_last_line_length(cmark_parser *parser) { return parser->last_line_length; } cmark_node *cmark_parser_add_child(cmark_parser *parser, cmark_node *parent, cmark_node_type block_type, int start_column) { return add_child(parser, parent, block_type, start_column); } void cmark_parser_advance_offset(cmark_parser *parser, const char *input, int count, int columns) { cmark_chunk input_chunk = cmark_chunk_literal(input); S_advance_offset(parser, &input_chunk, count, columns != 0); } void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser, cmark_ispunct_func func) { parser->backslash_ispunct = func; } cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser) { return parser->syntax_extensions; } cmarkgfm/third_party/cmark/src/libcmark-gfm.pc.in0000644000175000017500000000056414210444464022236 0ustar carstencarstenprefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=@CMAKE_INSTALL_PREFIX@ libdir=@CMAKE_INSTALL_PREFIX@/@libdir@ includedir=@CMAKE_INSTALL_PREFIX@/include Name: libcmark-gfm Description: CommonMark parsing, rendering, and manipulation with GitHub Flavored Markdown extensions Version: @PROJECT_VERSION@ Libs: -L${libdir} -lcmark-gfm -lcmark-gfm-extensions Cflags: -I${includedir} cmarkgfm/third_party/cmark/src/render.h0000644000175000017500000000321114210444464020372 0ustar carstencarsten#ifndef CMARK_RENDER_H #define CMARK_RENDER_H #ifdef __cplusplus extern "C" { #endif #include #include "buffer.h" #include "chunk.h" typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping; struct cmark_renderer { cmark_mem *mem; cmark_strbuf *buffer; cmark_strbuf *prefix; int column; int width; int need_cr; bufsize_t last_breakable; bool begin_line; bool begin_content; bool no_linebreaks; bool in_tight_list_item; void (*outc)(struct cmark_renderer *, cmark_node *, cmark_escaping, int32_t, unsigned char); void (*cr)(struct cmark_renderer *); void (*blankline)(struct cmark_renderer *); void (*out)(struct cmark_renderer *, cmark_node *, const char *, bool, cmark_escaping); unsigned int footnote_ix; }; typedef struct cmark_renderer cmark_renderer; struct cmark_html_renderer { cmark_strbuf *html; cmark_node *plain; cmark_llist *filter_extensions; unsigned int footnote_ix; unsigned int written_footnote_ix; void *opaque; }; typedef struct cmark_html_renderer cmark_html_renderer; void cmark_render_ascii(cmark_renderer *renderer, const char *s); void cmark_render_code_point(cmark_renderer *renderer, uint32_t c); char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, void (*outc)(cmark_renderer *, cmark_node *, cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options)); #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/src/syntax_extension.h0000644000175000017500000000255314210444464022545 0ustar carstencarsten#ifndef CMARK_SYNTAX_EXTENSION_H #define CMARK_SYNTAX_EXTENSION_H #include "cmark-gfm.h" #include "cmark-gfm-extension_api.h" #include "config.h" struct cmark_syntax_extension { cmark_match_block_func last_block_matches; cmark_open_block_func try_opening_block; cmark_match_inline_func match_inline; cmark_inline_from_delim_func insert_inline_from_delim; cmark_llist * special_inline_chars; char * name; void * priv; bool emphasis; cmark_free_func free_function; cmark_get_type_string_func get_type_string_func; cmark_can_contain_func can_contain_func; cmark_contains_inlines_func contains_inlines_func; cmark_common_render_func commonmark_render_func; cmark_common_render_func plaintext_render_func; cmark_common_render_func latex_render_func; cmark_xml_attr_func xml_attr_func; cmark_common_render_func man_render_func; cmark_html_render_func html_render_func; cmark_html_filter_func html_filter_func; cmark_postprocess_func postprocess_func; cmark_opaque_alloc_func opaque_alloc_func; cmark_opaque_free_func opaque_free_func; cmark_commonmark_escape_func commonmark_escape_func; }; #endif cmarkgfm/third_party/cmark/src/registry.h0000644000175000017500000000057114210444464020771 0ustar carstencarsten#ifndef CMARK_REGISTRY_H #define CMARK_REGISTRY_H #ifdef __cplusplus extern "C" { #endif #include "cmark-gfm.h" #include "plugin.h" CMARK_GFM_EXPORT void cmark_register_plugin(cmark_plugin_init_func reg_fn); CMARK_GFM_EXPORT void cmark_release_plugins(void); CMARK_GFM_EXPORT cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem); #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/src/iterator.c0000644000175000017500000001054714210444464020751 0ustar carstencarsten#include #include #include "config.h" #include "node.h" #include "cmark-gfm.h" #include "iterator.h" cmark_iter *cmark_iter_new(cmark_node *root) { if (root == NULL) { return NULL; } cmark_mem *mem = root->content.mem; cmark_iter *iter = (cmark_iter *)mem->calloc(1, sizeof(cmark_iter)); iter->mem = mem; iter->root = root; iter->cur.ev_type = CMARK_EVENT_NONE; iter->cur.node = NULL; iter->next.ev_type = CMARK_EVENT_ENTER; iter->next.node = root; return iter; } void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); } static bool S_is_leaf(cmark_node *node) { switch (node->type) { case CMARK_NODE_HTML_BLOCK: case CMARK_NODE_THEMATIC_BREAK: case CMARK_NODE_CODE_BLOCK: case CMARK_NODE_TEXT: case CMARK_NODE_SOFTBREAK: case CMARK_NODE_LINEBREAK: case CMARK_NODE_CODE: case CMARK_NODE_HTML_INLINE: return 1; } return 0; } cmark_event_type cmark_iter_next(cmark_iter *iter) { cmark_event_type ev_type = iter->next.ev_type; cmark_node *node = iter->next.node; iter->cur.ev_type = ev_type; iter->cur.node = node; if (ev_type == CMARK_EVENT_DONE) { return ev_type; } /* roll forward to next item, setting both fields */ if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) { if (node->first_child == NULL) { /* stay on this node but exit */ iter->next.ev_type = CMARK_EVENT_EXIT; } else { iter->next.ev_type = CMARK_EVENT_ENTER; iter->next.node = node->first_child; } } else if (node == iter->root) { /* don't move past root */ iter->next.ev_type = CMARK_EVENT_DONE; iter->next.node = NULL; } else if (node->next) { iter->next.ev_type = CMARK_EVENT_ENTER; iter->next.node = node->next; } else if (node->parent) { iter->next.ev_type = CMARK_EVENT_EXIT; iter->next.node = node->parent; } else { assert(false); iter->next.ev_type = CMARK_EVENT_DONE; iter->next.node = NULL; } return ev_type; } void cmark_iter_reset(cmark_iter *iter, cmark_node *current, cmark_event_type event_type) { iter->next.ev_type = event_type; iter->next.node = current; cmark_iter_next(iter); } cmark_node *cmark_iter_get_node(cmark_iter *iter) { return iter->cur.node; } cmark_event_type cmark_iter_get_event_type(cmark_iter *iter) { return iter->cur.ev_type; } cmark_node *cmark_iter_get_root(cmark_iter *iter) { return iter->root; } void cmark_consolidate_text_nodes(cmark_node *root) { if (root == NULL) { return; } cmark_iter *iter = cmark_iter_new(root); cmark_strbuf buf = CMARK_BUF_INIT(iter->mem); cmark_event_type ev_type; cmark_node *cur, *tmp, *next; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT && cur->next && cur->next->type == CMARK_NODE_TEXT) { cmark_strbuf_clear(&buf); cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len); tmp = cur->next; while (tmp && tmp->type == CMARK_NODE_TEXT) { cmark_iter_next(iter); // advance pointer cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len); cur->end_column = tmp->end_column; next = tmp->next; cmark_node_free(tmp); tmp = next; } cmark_chunk_free(iter->mem, &cur->as.literal); cur->as.literal = cmark_chunk_buf_detach(&buf); } } cmark_strbuf_free(&buf); cmark_iter_free(iter); } void cmark_node_own(cmark_node *root) { if (root == NULL) { return; } cmark_iter *iter = cmark_iter_new(root); cmark_event_type ev_type; cmark_node *cur; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER) { switch (cur->type) { case CMARK_NODE_TEXT: case CMARK_NODE_HTML_INLINE: case CMARK_NODE_CODE: case CMARK_NODE_HTML_BLOCK: cmark_chunk_to_cstr(iter->mem, &cur->as.literal); break; case CMARK_NODE_LINK: cmark_chunk_to_cstr(iter->mem, &cur->as.link.url); cmark_chunk_to_cstr(iter->mem, &cur->as.link.title); break; case CMARK_NODE_CUSTOM_INLINE: cmark_chunk_to_cstr(iter->mem, &cur->as.custom.on_enter); cmark_chunk_to_cstr(iter->mem, &cur->as.custom.on_exit); break; } } } cmark_iter_free(iter); } cmarkgfm/third_party/cmark/src/map.h0000644000175000017500000000144514210444464017677 0ustar carstencarsten#ifndef CMARK_MAP_H #define CMARK_MAP_H #include "chunk.h" #ifdef __cplusplus extern "C" { #endif struct cmark_map_entry { struct cmark_map_entry *next; unsigned char *label; unsigned int age; }; typedef struct cmark_map_entry cmark_map_entry; struct cmark_map; typedef void (*cmark_map_free_f)(struct cmark_map *, cmark_map_entry *); struct cmark_map { cmark_mem *mem; cmark_map_entry *refs; cmark_map_entry **sorted; unsigned int size; cmark_map_free_f free; }; typedef struct cmark_map cmark_map; unsigned char *normalize_map_label(cmark_mem *mem, cmark_chunk *ref); cmark_map *cmark_map_new(cmark_mem *mem, cmark_map_free_f free); void cmark_map_free(cmark_map *map); cmark_map_entry *cmark_map_lookup(cmark_map *map, cmark_chunk *label); #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/why-cmark-and-not-x.md0000644000175000017500000000306614210444464022212 0ustar carstencarstenWhy use `cmark` and not X? ========================== `hoedown` --------- `hoedown` (which derives from `sundown`) is slightly faster than `cmark` in our benchmarks (0.21s vs. 0.29s). But both are much faster than any other available implementations. `hoedown` boasts of including "protection against all possible DOS attacks," but there are some chinks in the armor: % time python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' | cmark ... user 0m0.073s % time python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' | hoedown ... 0m17.84s `hoedown` has many parsing bugs. Here is a selection (as of v3.0.3): % hoedown - one - two 1. three ^D
  • one
    • two
    • three
% hoedown ## hi\### ^D

hi\

% hoedown [ΑΓΩ]: /φου [αγω] ^D

[αγω]

% hoedown ``` [foo]: /url ``` [foo] ^D

```

```

foo

% hoedown [foo](url "ti\*tle") ^D

foo

% ./hoedown - one - two - three - four ^D
  • one
    • two
    • three
    • four
`discount` ---------- `cmark` is about six times faster. `kramdown` ---------- `cmark` is about a hundred times faster. `kramdown` also gets tied in knots by pathological input like python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' cmarkgfm/third_party/cmark/CheckFileOffsetBits.c0000644000175000017500000000044714210444464022135 0ustar carstencarsten#include #define KB ((off_t)1024) #define MB ((off_t)1024 * KB) #define GB ((off_t)1024 * MB) #define TB ((off_t)1024 * GB) int t2[(((64 * GB -1) % 671088649) == 268434537) && (((TB - (64 * GB -1) + 255) % 1792151290) == 305159546)? 1: -1]; int main() { ; return 0; } cmarkgfm/third_party/cmark/wrappers/0000755000175000017500000000000014210444464020021 5ustar carstencarstencmarkgfm/third_party/cmark/wrappers/wrapper.rb0000755000175000017500000000047414210444464022036 0ustar carstencarsten#!/usr/bin/env ruby require 'ffi' module CMark extend FFI::Library ffi_lib ['libcmark', 'cmark'] attach_function :cmark_markdown_to_html, [:string, :int, :int], :string end def markdown_to_html(s) len = s.bytesize CMark::cmark_markdown_to_html(s, len, 0) end STDOUT.write(markdown_to_html(ARGF.read())) cmarkgfm/third_party/cmark/wrappers/wrapper.js0000644000175000017500000000014614210444464022040 0ustar carstencarsten const cmark = require('node-cmark'); const markdown = '# h1 title'; cmark.markdown2html(markdown); cmarkgfm/third_party/cmark/wrappers/wrapper_ext.py0000755000175000017500000000635614210444464022750 0ustar carstencarsten#!/usr/bin/env python # # Example for using the shared library from python. # Will work with either python 2 or python 3. # Requires cmark-gfm and cmark-gfm-extensions libraries to be installed. # # This particular example uses the GitHub extensions from the gfm-extensions # library. EXTENSIONS specifies which to use, and the sample shows how to # connect them into a parser. # import sys import ctypes if sys.platform == 'darwin': libname = 'libcmark-gfm.dylib' extname = 'libcmark-gfm-extensions.dylib' elif sys.platform == 'win32': libname = 'cmark-gfm.dll' extname = 'cmark-gfm-extensions.dll' else: libname = 'libcmark-gfm.so' extname = 'libcmark-gfm-extensions.so' cmark = ctypes.CDLL(libname) cmark_ext = ctypes.CDLL(extname) # Options for the GFM rendering call OPTS = 0 # defaults # The GFM extensions that we want to use EXTENSIONS = ( 'autolink', 'table', 'strikethrough', 'tagfilter', ) # Use ctypes to access the functions in libcmark-gfm F_cmark_parser_new = cmark.cmark_parser_new F_cmark_parser_new.restype = ctypes.c_void_p F_cmark_parser_new.argtypes = (ctypes.c_int,) F_cmark_parser_feed = cmark.cmark_parser_feed F_cmark_parser_feed.restype = None F_cmark_parser_feed.argtypes = (ctypes.c_void_p, ctypes.c_char_p, ctypes.c_size_t) F_cmark_parser_finish = cmark.cmark_parser_finish F_cmark_parser_finish.restype = ctypes.c_void_p F_cmark_parser_finish.argtypes = (ctypes.c_void_p,) F_cmark_parser_attach_syntax_extension = cmark.cmark_parser_attach_syntax_extension F_cmark_parser_attach_syntax_extension.restype = ctypes.c_int F_cmark_parser_attach_syntax_extension.argtypes = (ctypes.c_void_p, ctypes.c_void_p) F_cmark_parser_get_syntax_extensions = cmark.cmark_parser_get_syntax_extensions F_cmark_parser_get_syntax_extensions.restype = ctypes.c_void_p F_cmark_parser_get_syntax_extensions.argtypes = (ctypes.c_void_p,) F_cmark_parser_free = cmark.cmark_parser_free F_cmark_parser_free.restype = None F_cmark_parser_free.argtypes = (ctypes.c_void_p,) F_cmark_node_free = cmark.cmark_node_free F_cmark_node_free.restype = None F_cmark_node_free.argtypes = (ctypes.c_void_p,) F_cmark_find_syntax_extension = cmark.cmark_find_syntax_extension F_cmark_find_syntax_extension.restype = ctypes.c_void_p F_cmark_find_syntax_extension.argtypes = (ctypes.c_char_p,) F_cmark_render_html = cmark.cmark_render_html F_cmark_render_html.restype = ctypes.c_char_p F_cmark_render_html.argtypes = (ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p) # Set up the libcmark-gfm library and its extensions F_register = cmark_ext.cmark_gfm_core_extensions_ensure_registered F_register.restype = None F_register.argtypes = ( ) F_register() def md2html(text): "Use cmark-gfm to render the Markdown into an HTML fragment." parser = F_cmark_parser_new(OPTS) assert parser for name in EXTENSIONS: ext = F_cmark_find_syntax_extension(name) assert ext rv = F_cmark_parser_attach_syntax_extension(parser, ext) assert rv exts = F_cmark_parser_get_syntax_extensions(parser) F_cmark_parser_feed(parser, text, len(text)) doc = F_cmark_parser_finish(parser) assert doc output = F_cmark_render_html(doc, OPTS, exts) F_cmark_parser_free(parser) F_cmark_node_free(doc) return output sys.stdout.write(md2html(sys.stdin.read())) cmarkgfm/third_party/cmark/wrappers/wrapper.py0000755000175000017500000000162114210444464022056 0ustar carstencarsten#!/usr/bin/env python # Example for using the shared library from python # Will work with either python 2 or python 3 # Requires cmark library to be installed from ctypes import CDLL, c_char_p, c_long import sys import platform sysname = platform.system() if sysname == 'Darwin': libname = "libcmark.dylib" elif sysname == 'Windows': libname = "cmark.dll" else: libname = "libcmark.so" cmark = CDLL(libname) markdown = cmark.cmark_markdown_to_html markdown.restype = c_char_p markdown.argtypes = [c_char_p, c_long, c_long] opts = 0 # defaults def md2html(text): if sys.version_info >= (3,0): textbytes = text.encode('utf-8') textlen = len(textbytes) return markdown(textbytes, textlen, opts).decode('utf-8') else: textbytes = text textlen = len(text) return markdown(textbytes, textlen, opts) sys.stdout.write(md2html(sys.stdin.read())) cmarkgfm/third_party/cmark/wrappers/wrapper.rkt0000644000175000017500000002066114210444464022230 0ustar carstencarsten#lang racket/base ;; requires racket >= 5.3 because of submodules ;; Lowlevel interface (module low-level racket/base (require ffi/unsafe ffi/unsafe/define) (provide (all-defined-out)) (define-ffi-definer defcmark (ffi-lib "libcmark")) (define _cmark_node_type (_enum '(;; Error status none ;; Block document block-quote list item code-block html-block custom-block paragraph heading thematic-break ;; ?? first-block = document ;; ?? last-block = thematic-break ;; Inline text softbreak linebreak code html-inline custom-inline emph strong link image ;; ?? first-inline = text ;; ?? last-inline = image ))) (define _cmark_list_type (_enum '(no_list bullet_list ordered_list))) (define _cmark_delim_type (_enum '(no_delim period_delim paren_delim))) (define _cmark_opts (_bitmask '(sourcepos = 2 ; include sourcepos attribute on block elements hardbreaks = 4 ; render `softbreak` elements as hard line breaks safe = 8 ; suppress raw HTML and unsafe links nobreaks = 16 ; render `softbreak` elements as spaces normalize = 256 ; legacy (no effect) validate-utf8 = 512 ; validate UTF-8 in the input smart = 1024 ; straight quotes to curly, ---/-- to em/en dashes ))) (define-cpointer-type _node) (defcmark cmark_markdown_to_html (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r)))) (defcmark cmark_parse_document (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts -> _node)) (defcmark cmark_render_html (_fun _node _cmark_opts -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r)))) (defcmark cmark_node_new (_fun _cmark_node_type -> _node)) (defcmark cmark_node_free (_fun _node -> _void)) (defcmark cmark_node_next (_fun _node -> _node/null)) (defcmark cmark_node_previous (_fun _node -> _node/null)) (defcmark cmark_node_parent (_fun _node -> _node/null)) (defcmark cmark_node_first_child (_fun _node -> _node/null)) (defcmark cmark_node_last_child (_fun _node -> _node/null)) (defcmark cmark_node_get_user_data (_fun _node -> _racket)) (defcmark cmark_node_set_user_data (_fun _node _racket -> _bool)) (defcmark cmark_node_get_type (_fun _node -> _cmark_node_type)) (defcmark cmark_node_get_type_string (_fun _node -> _bytes)) (defcmark cmark_node_get_literal (_fun _node -> _string)) (defcmark cmark_node_set_literal (_fun _node _string -> _bool)) (defcmark cmark_node_get_heading_level (_fun _node -> _int)) (defcmark cmark_node_set_heading_level (_fun _node _int -> _bool)) (defcmark cmark_node_get_list_type (_fun _node -> _cmark_list_type)) (defcmark cmark_node_set_list_type (_fun _node _cmark_list_type -> _bool)) (defcmark cmark_node_get_list_delim (_fun _node -> _cmark_delim_type)) (defcmark cmark_node_set_list_delim (_fun _node _cmark_delim_type -> _bool)) (defcmark cmark_node_get_list_start (_fun _node -> _int)) (defcmark cmark_node_set_list_start (_fun _node _int -> _bool)) (defcmark cmark_node_get_list_tight (_fun _node -> _bool)) (defcmark cmark_node_set_list_tight (_fun _node _bool -> _bool)) (defcmark cmark_node_get_fence_info (_fun _node -> _string)) (defcmark cmark_node_set_fence_info (_fun _node _string -> _bool)) (defcmark cmark_node_get_url (_fun _node -> _string)) (defcmark cmark_node_set_url (_fun _node _string -> _bool)) (defcmark cmark_node_get_title (_fun _node -> _string)) (defcmark cmark_node_set_title (_fun _node _string -> _bool)) (defcmark cmark_node_get_start_line (_fun _node -> _int)) (defcmark cmark_node_get_start_column (_fun _node -> _int)) (defcmark cmark_node_get_end_line (_fun _node -> _int)) (defcmark cmark_node_get_end_column (_fun _node -> _int)) (defcmark cmark_node_unlink (_fun _node -> _void)) (defcmark cmark_node_insert_before (_fun _node _node -> _bool)) (defcmark cmark_node_insert_after (_fun _node _node -> _bool)) (defcmark cmark_node_prepend_child (_fun _node _node -> _bool)) (defcmark cmark_node_append_child (_fun _node _node -> _bool)) (defcmark cmark_consolidate_text_nodes (_fun _node -> _void)) (defcmark cmark_version (_fun -> _int)) (defcmark cmark_version_string (_fun -> _string)) ) ;; Rackety interface (module high-level racket/base (require (submod ".." low-level) ffi/unsafe) (provide cmark-markdown-to-html) (define (cmark-markdown-to-html str [options '(normalize smart)]) (cmark_markdown_to_html (if (bytes? str) str (string->bytes/utf-8 str)) options)) (require (for-syntax racket/base racket/syntax)) (define-syntax (make-getter+setter stx) (syntax-case stx () [(_ name) (with-syntax ([(getter setter) (map (λ(op) (format-id #'name "cmark_node_~a_~a" op #'name)) '(get set))]) #'(cons getter setter))])) (define-syntax-rule (define-getters+setters name [type field ...] ...) (define name (list (list 'type (make-getter+setter field) ...) ...))) (define-getters+setters getters+setters [heading heading_level] [code-block fence_info] [link url title] [image url title] [list list_type list_delim list_start list_tight]) (provide cmark->sexpr) (define (cmark->sexpr node) (define text (cmark_node_get_literal node)) (define type (cmark_node_get_type node)) (define children (let loop ([node (cmark_node_first_child node)]) (if (not node) '() (cons (cmark->sexpr node) (loop (cmark_node_next node)))))) (define info (cond [(assq type getters+setters) => (λ(gss) (map (λ(gs) ((car gs) node)) (cdr gss)))] [else '()])) (define (assert-no what-not b) (when b (error 'cmark->sexpr "unexpected ~a in ~s" what-not type))) (cond [(memq type '(document paragraph heading block-quote list item emph strong link image)) (assert-no 'text text) (list type info children)] [(memq type '(text code code-block html-block html-inline softbreak linebreak thematic-break)) (assert-no 'children (pair? children)) (list type info text)] [else (error 'cmark->sexpr "unknown type: ~s" type)])) (provide sexpr->cmark) (define (sexpr->cmark sexpr) ; assumes valid input, as generated by the above (define (loop sexpr) (define type (car sexpr)) (define info (cadr sexpr)) (define data (caddr sexpr)) (define node (cmark_node_new type)) (let ([gss (assq type getters+setters)]) (when gss (unless (= (length (cdr gss)) (length info)) (error 'sexpr->cmark "bad number of info values in ~s" sexpr)) (for-each (λ(gs x) ((cdr gs) node x)) (cdr gss) info))) (cond [(string? data) (cmark_node_set_literal node data)] [(not data) (void)] [(list? data) (for ([child (in-list data)]) (cmark_node_append_child node (sexpr->cmark child)))] [else (error 'sexpr->cmark "bad data in ~s" sexpr)]) node) (define root (loop sexpr)) (register-finalizer root cmark_node_free) root) ;; Registers a `cmark_node_free` finalizer (provide cmark-parse-document) (define (cmark-parse-document str [options '(normalize smart)]) (define root (cmark_parse_document (if (bytes? str) str (string->bytes/utf-8 str)) options)) (register-finalizer root cmark_node_free) root) (provide cmark-render-html) (define (cmark-render-html root [options '(normalize smart)]) (cmark_render_html root options))) #; ;; sample use (begin (require 'high-level racket/string) (cmark-render-html (cmark-parse-document (string-join '("foo" "===" "" "> blah" ">" "> blah *blah* `bar()` blah:" ">" "> function foo() {" "> bar();" "> }") "\n")))) cmarkgfm/third_party/cmark/benchmarks.md0000644000175000017500000000236614210444464020624 0ustar carstencarsten# Benchmarks Here are some benchmarks, run on an ancient Thinkpad running Intel Core 2 Duo at 2GHz. The input text is a 11MB Markdown file built by concatenating the Markdown sources of all the localizations of the first edition of [*Pro Git*](https://github.com/progit/progit/tree/master/en) by Scott Chacon. |Implementation | Time (sec)| |-------------------|-----------:| | Markdown.pl | 2921.24 | | Python markdown | 291.25 | | PHP markdown | 20.82 | | kramdown | 17.32 | | cheapskate | 8.24 | | peg-markdown | 5.45 | | parsedown | 5.06 | | **commonmark.js** | 2.09 | | marked | 1.99 | | discount | 1.85 | | **cmark** | 0.29 | | hoedown | 0.21 | To run these benchmarks, use `make bench PROG=/path/to/program`. `time` is used to measure execution speed. The reported time is the *difference* between the time to run the program with the benchmark input and the time to run it with no input. (This procedure ensures that implementations in dynamic languages are not penalized by startup time.) A median of ten runs is taken. The process is reniced to a high priority so that the system doesn't interrupt runs. cmarkgfm/third_party/cmark/bench/0000755000175000017500000000000014210444464017235 5ustar carstencarstencmarkgfm/third_party/cmark/bench/statistics.py0000644000175000017500000004270314210444464022007 0ustar carstencarsten## Module statistics.py ## ## Copyright (c) 2013 Steven D'Aprano . ## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. ## You may obtain a copy of the License at ## ## http://www.apache.org/licenses/LICENSE-2.0 ## ## Unless required by applicable law or agreed to in writing, software ## distributed under the License is distributed on an "AS IS" BASIS, ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ## See the License for the specific language governing permissions and ## limitations under the License. """ Basic statistics module. This module provides functions for calculating statistics of data, including averages, variance, and standard deviation. Calculating averages -------------------- ================== ============================================= Function Description ================== ============================================= mean Arithmetic mean (average) of data. median Median (middle value) of data. median_low Low median of data. median_high High median of data. median_grouped Median, or 50th percentile, of grouped data. mode Mode (most common value) of data. ================== ============================================= Calculate the arithmetic mean ("the average") of data: >>> mean([-1.0, 2.5, 3.25, 5.75]) 2.625 Calculate the standard median of discrete data: >>> median([2, 3, 4, 5]) 3.5 Calculate the median, or 50th percentile, of data grouped into class intervals centred on the data values provided. E.g. if your data points are rounded to the nearest whole number: >>> median_grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS 2.8333333333... This should be interpreted in this way: you have two data points in the class interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in the class interval 3.5-4.5. The median of these data points is 2.8333... Calculating variability or spread --------------------------------- ================== ============================================= Function Description ================== ============================================= pvariance Population variance of data. variance Sample variance of data. pstdev Population standard deviation of data. stdev Sample standard deviation of data. ================== ============================================= Calculate the standard deviation of sample data: >>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS 4.38961843444... If you have previously calculated the mean, you can pass it as the optional second argument to the four "spread" functions to avoid recalculating it: >>> data = [1, 2, 2, 4, 4, 4, 5, 6] >>> mu = mean(data) >>> pvariance(data, mu) 2.5 Exceptions ---------- A single exception is defined: StatisticsError is a subclass of ValueError. """ __all__ = [ 'StatisticsError', 'pstdev', 'pvariance', 'stdev', 'variance', 'median', 'median_low', 'median_high', 'median_grouped', 'mean', 'mode', ] import collections import math from fractions import Fraction from decimal import Decimal # === Exceptions === class StatisticsError(ValueError): pass # === Private utilities === def _sum(data, start=0): """_sum(data [, start]) -> value Return a high-precision sum of the given numeric data. If optional argument ``start`` is given, it is added to the total. If ``data`` is empty, ``start`` (defaulting to 0) is returned. Examples -------- >>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75) 11.0 Some sources of round-off error will be avoided: >>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero. 1000.0 Fractions and Decimals are also supported: >>> from fractions import Fraction as F >>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)]) Fraction(63, 20) >>> from decimal import Decimal as D >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")] >>> _sum(data) Decimal('0.6963') Mixed types are currently treated as an error, except that int is allowed. """ # We fail as soon as we reach a value that is not an int or the type of # the first value which is not an int. E.g. _sum([int, int, float, int]) # is okay, but sum([int, int, float, Fraction]) is not. allowed_types = set([int, type(start)]) n, d = _exact_ratio(start) partials = {d: n} # map {denominator: sum of numerators} # Micro-optimizations. exact_ratio = _exact_ratio partials_get = partials.get # Add numerators for each denominator. for x in data: _check_type(type(x), allowed_types) n, d = exact_ratio(x) partials[d] = partials_get(d, 0) + n # Find the expected result type. If allowed_types has only one item, it # will be int; if it has two, use the one which isn't int. assert len(allowed_types) in (1, 2) if len(allowed_types) == 1: assert allowed_types.pop() is int T = int else: T = (allowed_types - set([int])).pop() if None in partials: assert issubclass(T, (float, Decimal)) assert not math.isfinite(partials[None]) return T(partials[None]) total = Fraction() for d, n in sorted(partials.items()): total += Fraction(n, d) if issubclass(T, int): assert total.denominator == 1 return T(total.numerator) if issubclass(T, Decimal): return T(total.numerator)/total.denominator return T(total) def _check_type(T, allowed): if T not in allowed: if len(allowed) == 1: allowed.add(T) else: types = ', '.join([t.__name__ for t in allowed] + [T.__name__]) raise TypeError("unsupported mixed types: %s" % types) def _exact_ratio(x): """Convert Real number x exactly to (numerator, denominator) pair. >>> _exact_ratio(0.25) (1, 4) x is expected to be an int, Fraction, Decimal or float. """ try: try: # int, Fraction return (x.numerator, x.denominator) except AttributeError: # float try: return x.as_integer_ratio() except AttributeError: # Decimal try: return _decimal_to_ratio(x) except AttributeError: msg = "can't convert type '{}' to numerator/denominator" raise TypeError(msg.format(type(x).__name__)) from None except (OverflowError, ValueError): # INF or NAN if __debug__: # Decimal signalling NANs cannot be converted to float :-( if isinstance(x, Decimal): assert not x.is_finite() else: assert not math.isfinite(x) return (x, None) # FIXME This is faster than Fraction.from_decimal, but still too slow. def _decimal_to_ratio(d): """Convert Decimal d to exact integer ratio (numerator, denominator). >>> from decimal import Decimal >>> _decimal_to_ratio(Decimal("2.6")) (26, 10) """ sign, digits, exp = d.as_tuple() if exp in ('F', 'n', 'N'): # INF, NAN, sNAN assert not d.is_finite() raise ValueError num = 0 for digit in digits: num = num*10 + digit if exp < 0: den = 10**-exp else: num *= 10**exp den = 1 if sign: num = -num return (num, den) def _counts(data): # Generate a table of sorted (value, frequency) pairs. table = collections.Counter(iter(data)).most_common() if not table: return table # Extract the values with the highest frequency. maxfreq = table[0][1] for i in range(1, len(table)): if table[i][1] != maxfreq: table = table[:i] break return table # === Measures of central tendency (averages) === def mean(data): """Return the sample arithmetic mean of data. >>> mean([1, 2, 3, 4, 4]) 2.8 >>> from fractions import Fraction as F >>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)]) Fraction(13, 21) >>> from decimal import Decimal as D >>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")]) Decimal('0.5625') If ``data`` is empty, StatisticsError will be raised. """ if iter(data) is data: data = list(data) n = len(data) if n < 1: raise StatisticsError('mean requires at least one data point') return _sum(data)/n # FIXME: investigate ways to calculate medians without sorting? Quickselect? def median(data): """Return the median (middle value) of numeric data. When the number of data points is odd, return the middle data point. When the number of data points is even, the median is interpolated by taking the average of the two middle values: >>> median([1, 3, 5]) 3 >>> median([1, 3, 5, 7]) 4.0 """ data = sorted(data) n = len(data) if n == 0: raise StatisticsError("no median for empty data") if n%2 == 1: return data[n//2] else: i = n//2 return (data[i - 1] + data[i])/2 def median_low(data): """Return the low median of numeric data. When the number of data points is odd, the middle value is returned. When it is even, the smaller of the two middle values is returned. >>> median_low([1, 3, 5]) 3 >>> median_low([1, 3, 5, 7]) 3 """ data = sorted(data) n = len(data) if n == 0: raise StatisticsError("no median for empty data") if n%2 == 1: return data[n//2] else: return data[n//2 - 1] def median_high(data): """Return the high median of data. When the number of data points is odd, the middle value is returned. When it is even, the larger of the two middle values is returned. >>> median_high([1, 3, 5]) 3 >>> median_high([1, 3, 5, 7]) 5 """ data = sorted(data) n = len(data) if n == 0: raise StatisticsError("no median for empty data") return data[n//2] def median_grouped(data, interval=1): """"Return the 50th percentile (median) of grouped continuous data. >>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5]) 3.7 >>> median_grouped([52, 52, 53, 54]) 52.5 This calculates the median as the 50th percentile, and should be used when your data is continuous and grouped. In the above example, the values 1, 2, 3, etc. actually represent the midpoint of classes 0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in class 3.5-4.5, and interpolation is used to estimate it. Optional argument ``interval`` represents the class interval, and defaults to 1. Changing the class interval naturally will change the interpolated 50th percentile value: >>> median_grouped([1, 3, 3, 5, 7], interval=1) 3.25 >>> median_grouped([1, 3, 3, 5, 7], interval=2) 3.5 This function does not check whether the data points are at least ``interval`` apart. """ data = sorted(data) n = len(data) if n == 0: raise StatisticsError("no median for empty data") elif n == 1: return data[0] # Find the value at the midpoint. Remember this corresponds to the # centre of the class interval. x = data[n//2] for obj in (x, interval): if isinstance(obj, (str, bytes)): raise TypeError('expected number but got %r' % obj) try: L = x - interval/2 # The lower limit of the median interval. except TypeError: # Mixed type. For now we just coerce to float. L = float(x) - float(interval)/2 cf = data.index(x) # Number of values below the median interval. # FIXME The following line could be more efficient for big lists. f = data.count(x) # Number of data points in the median interval. return L + interval*(n/2 - cf)/f def mode(data): """Return the most common data point from discrete or nominal data. ``mode`` assumes discrete data, and returns a single value. This is the standard treatment of the mode as commonly taught in schools: >>> mode([1, 1, 2, 3, 3, 3, 3, 4]) 3 This also works with nominal (non-numeric) data: >>> mode(["red", "blue", "blue", "red", "green", "red", "red"]) 'red' If there is not exactly one most common value, ``mode`` will raise StatisticsError. """ # Generate a table of sorted (value, frequency) pairs. table = _counts(data) if len(table) == 1: return table[0][0] elif table: raise StatisticsError( 'no unique mode; found %d equally common values' % len(table) ) else: raise StatisticsError('no mode for empty data') # === Measures of spread === # See http://mathworld.wolfram.com/Variance.html # http://mathworld.wolfram.com/SampleVariance.html # http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance # # Under no circumstances use the so-called "computational formula for # variance", as that is only suitable for hand calculations with a small # amount of low-precision data. It has terrible numeric properties. # # See a comparison of three computational methods here: # http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ def _ss(data, c=None): """Return sum of square deviations of sequence data. If ``c`` is None, the mean is calculated in one pass, and the deviations from the mean are calculated in a second pass. Otherwise, deviations are calculated from ``c`` as given. Use the second case with care, as it can lead to garbage results. """ if c is None: c = mean(data) ss = _sum((x-c)**2 for x in data) # The following sum should mathematically equal zero, but due to rounding # error may not. ss -= _sum((x-c) for x in data)**2/len(data) assert not ss < 0, 'negative sum of square deviations: %f' % ss return ss def variance(data, xbar=None): """Return the sample variance of data. data should be an iterable of Real-valued numbers, with at least two values. The optional argument xbar, if given, should be the mean of the data. If it is missing or None, the mean is automatically calculated. Use this function when your data is a sample from a population. To calculate the variance from the entire population, see ``pvariance``. Examples: >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] >>> variance(data) 1.3720238095238095 If you have already calculated the mean of your data, you can pass it as the optional second argument ``xbar`` to avoid recalculating it: >>> m = mean(data) >>> variance(data, m) 1.3720238095238095 This function does not check that ``xbar`` is actually the mean of ``data``. Giving arbitrary values for ``xbar`` may lead to invalid or impossible results. Decimals and Fractions are supported: >>> from decimal import Decimal as D >>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) Decimal('31.01875') >>> from fractions import Fraction as F >>> variance([F(1, 6), F(1, 2), F(5, 3)]) Fraction(67, 108) """ if iter(data) is data: data = list(data) n = len(data) if n < 2: raise StatisticsError('variance requires at least two data points') ss = _ss(data, xbar) return ss/(n-1) def pvariance(data, mu=None): """Return the population variance of ``data``. data should be an iterable of Real-valued numbers, with at least one value. The optional argument mu, if given, should be the mean of the data. If it is missing or None, the mean is automatically calculated. Use this function to calculate the variance from the entire population. To estimate the variance from a sample, the ``variance`` function is usually a better choice. Examples: >>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25] >>> pvariance(data) 1.25 If you have already calculated the mean of the data, you can pass it as the optional second argument to avoid recalculating it: >>> mu = mean(data) >>> pvariance(data, mu) 1.25 This function does not check that ``mu`` is actually the mean of ``data``. Giving arbitrary values for ``mu`` may lead to invalid or impossible results. Decimals and Fractions are supported: >>> from decimal import Decimal as D >>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) Decimal('24.815') >>> from fractions import Fraction as F >>> pvariance([F(1, 4), F(5, 4), F(1, 2)]) Fraction(13, 72) """ if iter(data) is data: data = list(data) n = len(data) if n < 1: raise StatisticsError('pvariance requires at least one data point') ss = _ss(data, mu) return ss/n def stdev(data, xbar=None): """Return the square root of the sample variance. See ``variance`` for arguments and other details. >>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75]) 1.0810874155219827 """ var = variance(data, xbar) try: return var.sqrt() except AttributeError: return math.sqrt(var) def pstdev(data, mu=None): """Return the square root of the population variance. See ``pvariance`` for arguments and other details. >>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75]) 0.986893273527251 """ var = pvariance(data, mu) try: return var.sqrt() except AttributeError: return math.sqrt(var) cmarkgfm/third_party/cmark/bench/samples/0000755000175000017500000000000014210444464020701 5ustar carstencarstencmarkgfm/third_party/cmark/bench/samples/inline-links-nested.md0000644000175000017500000000047114210444464025101 0ustar carstencarstenValid links: [[[[[[[[](test)](test)](test)](test)](test)](test)](test)] [ [[[[[[[[[[[[[[[[[[ [](test) ]]]]]]]]]]]]]]]]]] ](test) Invalid links: [[[[[[[[[ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ ![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![ cmarkgfm/third_party/cmark/bench/samples/inline-backticks.md0000644000175000017500000000010014210444464024424 0ustar carstencarsten`lots`of`backticks` ``i``wonder``how``this``will``be``parsed`` cmarkgfm/third_party/cmark/bench/samples/block-hr.md0000644000175000017500000000011214210444464022716 0ustar carstencarsten * * * * * - - - - - ________ ************************* text cmarkgfm/third_party/cmark/bench/samples/block-fences.md0000644000175000017500000000011014210444464023546 0ustar carstencarsten ``````````text an example ``` of a fenced ``` code block `````````` cmarkgfm/third_party/cmark/bench/samples/inline-entity.md0000644000175000017500000000051014210444464024007 0ustar carstencarstenentities:   & © Æ Ď ¾ ℋ ⅆ ∲ # Ӓ Ϡ � non-entities: &18900987654321234567890; &1234567890098765432123456789009876543212345678987654; &qwertyuioppoiuytrewqwer; &oiuytrewqwertyuioiuytrewqwertyuioytrewqwertyuiiuytri; cmarkgfm/third_party/cmark/bench/samples/block-code.md0000644000175000017500000000011114210444464023216 0ustar carstencarsten an example of a code block cmarkgfm/third_party/cmark/bench/samples/block-ref-nested.md0000644000175000017500000000040114210444464024342 0ustar carstencarsten[[[[[[[foo]]]]]]] [[[[[[[foo]]]]]]]: bar [[[[[[foo]]]]]]: bar [[[[[foo]]]]]: bar [[[[foo]]]]: bar [[[foo]]]: bar [[foo]]: bar [foo]: bar [*[*[*[*[foo]*]*]*]*] [*[*[*[*[foo]*]*]*]*]: bar [*[*[*[foo]*]*]*]: bar [*[*[foo]*]*]: bar [*[foo]*]: bar [foo]: bar cmarkgfm/third_party/cmark/bench/samples/lorem1.md0000644000175000017500000000731514210444464022430 0ustar carstencarstenLorem ipsum dolor sit amet, __consectetur__ adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat. Vivamus sagittis, diam in [vehicula](https://github.com/markdown-it/markdown-it) lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit. Nullam ut tincidunt nunc. [Pellentesque][1] metus lacus, commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu. Suspendisse potenti. Donec ante velit, ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero. Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat. Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique. [1]: https://github.com/markdown-it cmarkgfm/third_party/cmark/bench/samples/inline-escape.md0000644000175000017500000000027114210444464023737 0ustar carstencarsten \t\e\s\t\i\n\g \e\s\c\a\p\e \s\e\q\u\e\n\c\e\s \!\\\"\#\$\%\&\'\(\)\*\+\,\.\/\:\;\<\=\>\? \@ \[ \] \^ \_ \` \{ \| \} \~ \- \' \ \\ \\\ \\\\ \\\\\ \ \ \ \ cmarkgfm/third_party/cmark/bench/samples/inline-links-flat.md0000644000175000017500000000133214210444464024542 0ustar carstencarstenValid links: [this is a link]() [this is a link]() [this is a link](http://something.example.com/foo/bar 'test') ![this is an image]() ![this is an image]() ![this is an image](http://something.example.com/foo/bar 'test') [escape test](<\>\>\>\>\>\>\>\>\>\>\>\>\>\>> '\'\'\'\'\'\'\'\'\'\'\'\'\'\'') [escape test \]\]\]\]\]\]\]\]\]\]\]\]\]\]\]\]](\)\)\)\)\)\)\)\)\)\)\)\)\)\)) Invalid links: [this is not a link [this is not a link]( [this is not a link](http://something.example.com/foo/bar 'test' [this is not a link]((((((((((((((((((((((((((((((((((((((((((((((( [this is not a link]((((((((((()))))))))) (((((((((())))))))))) cmarkgfm/third_party/cmark/bench/samples/block-list-flat.md0000644000175000017500000000101514210444464024207 0ustar carstencarsten - tidy - bullet - list - loose - bullet - list 0. ordered 1. list 2. example - - - - 1. 2. 3. - an example of a list item with a continuation this part is inside the list this part is just a paragraph 1. test - test 1. test - test 111111111111111111111111111111111111111111. is this a valid bullet? - _________________________ - this - is a long - loose - list - with - some tidy - list - items - in - between - _________________________ cmarkgfm/third_party/cmark/bench/samples/block-ref-flat.md0000644000175000017500000000073214210444464024015 0ustar carstencarsten[1] [2] [3] [1] [2] [3] [looooooooooooooooooooooooooooooooooooooooooooooooooong label] [1]: [2]: http://something.example.com/foo/bar 'test' [3]: http://foo/bar [ looooooooooooooooooooooooooooooooooooooooooooooooooong label ]: 111 'test' [[[[[[[[[[[[[[[[[[[[ this should not slow down anything ]]]]]]]]]]]]]]]]]]]]: q (as long as it is not referenced anywhere) [[[[[[[[[[[[[[[[[[[[]: this is not a valid reference cmarkgfm/third_party/cmark/bench/samples/inline-em-flat.md0000644000175000017500000000023314210444464024022 0ustar carstencarsten*this* *is* *your* *basic* *boring* *emphasis* _this_ _is_ _your_ _basic_ _boring_ _emphasis_ **this** **is** **your** **basic** **boring** **emphasis** cmarkgfm/third_party/cmark/bench/samples/block-bq-flat.md0000644000175000017500000000036614210444464023646 0ustar carstencarsten> the simple example of a blockquote > the simple example of a blockquote > the simple example of a blockquote > the simple example of a blockquote ... continuation ... continuation ... continuation ... continuation empty blockquote: > > > > cmarkgfm/third_party/cmark/bench/samples/block-html.md0000644000175000017500000000043314210444464023257 0ustar carstencarsten
blah blah
**test**
test
cmarkgfm/third_party/cmark/bench/samples/block-lheading.md0000644000175000017500000000016114210444464024064 0ustar carstencarstenheading --- heading =================================== not a heading ----------------------------------- text cmarkgfm/third_party/cmark/bench/samples/inline-em-nested.md0000644000175000017500000000023614210444464024361 0ustar carstencarsten*this *is *a *bunch* of* nested* emphases* __this __is __a __bunch__ of__ nested__ emphases__ ***this ***is ***a ***bunch*** of*** nested*** emphases*** cmarkgfm/third_party/cmark/bench/samples/inline-newlines.md0000644000175000017500000000021414210444464024320 0ustar carstencarsten this\ should\ be\ separated\ by\ newlines this should be separated by newlines too this should not be separated by newlines cmarkgfm/third_party/cmark/bench/samples/rawtabs.md0000644000175000017500000000044714210444464022673 0ustar carstencarsten this is a test for tab expansion, be careful not to replace them with spaces 1 4444 22 333 333 22 4444 1 tab-indented line space-indented line tab-indented line a lot of spaces in between here a lot of tabs in between here cmarkgfm/third_party/cmark/bench/samples/inline-autolink.md0000644000175000017500000000105514210444464024326 0ustar carstencarstenclosed (valid) autolinks: these are not autolinks: cmarkgfm/third_party/cmark/bench/samples/inline-html.md0000644000175000017500000000100414210444464023436 0ustar carstencarstenTaking commonmark tests from the spec for benchmarking here: <33> <__> foo foo foo foo foo &<]]> cmarkgfm/third_party/cmark/bench/samples/block-heading.md0000644000175000017500000000017314210444464023713 0ustar carstencarsten# heading ### heading ##### heading # heading # ### heading ### ##### heading \#\#\#\#\###### ############ not a heading cmarkgfm/third_party/cmark/bench/samples/block-bq-nested.md0000644000175000017500000000054314210444464024177 0ustar carstencarsten>>>>>> deeply nested blockquote >>>>> deeply nested blockquote >>>> deeply nested blockquote >>> deeply nested blockquote >> deeply nested blockquote > deeply nested blockquote > deeply nested blockquote >> deeply nested blockquote >>> deeply nested blockquote >>>> deeply nested blockquote >>>>> deeply nested blockquote >>>>>> deeply nested blockquote cmarkgfm/third_party/cmark/bench/samples/block-list-nested.md0000644000175000017500000000060714210444464024551 0ustar carstencarsten - this - is - a - deeply - nested - bullet - list 1. this 2. is 3. a 4. deeply 5. nested 6. unordered 7. list - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 6 - 5 - 4 - 3 - 2 - 1 - - - - - - - - - deeply-nested one-element item cmarkgfm/third_party/cmark/bench/samples/inline-em-worst.md0000644000175000017500000000025514210444464024256 0ustar carstencarsten*this *is *a *worst *case *for *em *backtracking __this __is __a __worst __case __for __em __backtracking ***this ***is ***a ***worst ***case ***for ***em ***backtracking cmarkgfm/third_party/cmark/bench/stats.py0000644000175000017500000000075314210444464020752 0ustar carstencarsten#!/usr/bin/env python3 import sys import statistics def pairs(l, n): return zip(*[l[i::n] for i in range(n)]) # data comes in pairs: # n - time for running the program with no input # m - time for running it with the benchmark input # we measure (m - n) values = [ float(y) - float(x) for (x,y) in pairs(sys.stdin.readlines(),2)] print("mean = %.4f, median = %.4f, stdev = %.4f" % (statistics.mean(values), statistics.median(values), statistics.stdev(values))) cmarkgfm/third_party/cmark/CMakeLists.txt0000755000175000017500000000235014210444464020721 0ustar carstencarstencmake_minimum_required(VERSION 3.0) project(cmark-gfm) set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 29) set(PROJECT_VERSION_PATCH 0) set(PROJECT_VERSION_GFM 3) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM}) include("FindAsan.cmake") include("CheckFileOffsetBits.cmake") if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make") endif() option(CMARK_TESTS "Build cmark-gfm tests and enable testing" ON) option(CMARK_STATIC "Build static libcmark-gfm library" ON) option(CMARK_SHARED "Build shared libcmark-gfm library" ON) option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF) add_subdirectory(src) add_subdirectory(extensions) if(CMARK_TESTS AND (CMARK_SHARED OR CMARK_STATIC)) add_subdirectory(api_test) endif() add_subdirectory(man) if(CMARK_TESTS) enable_testing() add_subdirectory(test testdir) endif() if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Profile Release Asan Ubsan." FORCE) endif(NOT CMAKE_BUILD_TYPE) cmarkgfm/third_party/cmark/CheckFileOffsetBits.cmake0000644000175000017500000000367214210444464022776 0ustar carstencarsten# - Check if _FILE_OFFSET_BITS macro needed for large files # CHECK_FILE_OFFSET_BITS () # # The following variables may be set before calling this macro to # modify the way the check is run: # # CMAKE_REQUIRED_FLAGS = string of compile command line flags # CMAKE_REQUIRED_DEFINITIONS = list of macros to define (-DFOO=bar) # CMAKE_REQUIRED_INCLUDES = list of include directories # Copyright (c) 2009, Michihiro NAKAJIMA # # Redistribution and use is allowed according to the terms of the BSD license. # For details see the accompanying COPYING-CMAKE-SCRIPTS file. #INCLUDE(CheckCSourceCompiles) GET_FILENAME_COMPONENT(_selfdir_CheckFileOffsetBits "${CMAKE_CURRENT_LIST_FILE}" PATH) MACRO (CHECK_FILE_OFFSET_BITS) IF(NOT DEFINED _FILE_OFFSET_BITS) MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files") TRY_COMPILE(__WITHOUT_FILE_OFFSET_BITS_64 ${CMAKE_CURRENT_BINARY_DIR} ${_selfdir_CheckFileOffsetBits}/CheckFileOffsetBits.c COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS}) IF(NOT __WITHOUT_FILE_OFFSET_BITS_64) TRY_COMPILE(__WITH_FILE_OFFSET_BITS_64 ${CMAKE_CURRENT_BINARY_DIR} ${_selfdir_CheckFileOffsetBits}/CheckFileOffsetBits.c COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS} -D_FILE_OFFSET_BITS=64) ENDIF(NOT __WITHOUT_FILE_OFFSET_BITS_64) IF(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64) SET(_FILE_OFFSET_BITS 64 CACHE INTERNAL "_FILE_OFFSET_BITS macro needed for large files") MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files - needed") ELSE(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64) SET(_FILE_OFFSET_BITS "" CACHE INTERNAL "_FILE_OFFSET_BITS macro needed for large files") MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files - not needed") ENDIF(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64) ENDIF(NOT DEFINED _FILE_OFFSET_BITS) ENDMACRO (CHECK_FILE_OFFSET_BITS) cmarkgfm/third_party/cmark/.github/0000755000175000017500000000000014210444464017516 5ustar carstencarstencmarkgfm/third_party/cmark/.github/workflows/0000755000175000017500000000000014210444464021553 5ustar carstencarstencmarkgfm/third_party/cmark/.github/workflows/ci.yml0000644000175000017500000000317414210444464022676 0ustar carstencarstenname: CI tests on: [push, workflow_dispatch] jobs: linux: runs-on: ubuntu-latest strategy: fail-fast: false matrix: cmake_opts: - '-DCMARK_SHARED=ON' - '' compiler: - c: 'clang' cpp: 'clang++' - c: 'gcc' cpp: 'g++' env: CMAKE_OPTIONS: ${{ matrix.cmake_opts }} CC: ${{ matrix.compiler.c }} CXX: ${{ matrix.compiler.cpp }} steps: - uses: actions/checkout@v1 - name: Install valgrind run: | sudo apt install -y valgrind - name: Build and test run: | make make test make leakcheck macos: runs-on: macOS-latest strategy: fail-fast: false matrix: cmake_opts: - '-DCMARK_SHARED=ON' - '' compiler: - c: 'clang' cpp: 'clang++' - c: 'gcc' cpp: 'g++' env: CMAKE_OPTIONS: ${{ matrix.cmake_opts }} CC: ${{ matrix.compiler.c }} CXX: ${{ matrix.compiler.cpp }} steps: - uses: actions/checkout@v1 - name: Build and test env: CMAKE_OPTIONS: -DCMARK_SHARED=OFF run: | make make test windows: runs-on: windows-latest strategy: fail-fast: false matrix: cmake_opts: - '-DCMARK_SHARED=ON' - '' env: CMAKE_OPTIONS: ${{ matrix.cmake_opts }} steps: - uses: actions/checkout@v1 - uses: ilammy/msvc-dev-cmd@v1 - name: Build and test run: | chcp 65001 nmake.exe /nologo /f Makefile.nmake test shell: cmd cmarkgfm/third_party/cmark/extensions/0000755000175000017500000000000014210444464020355 5ustar carstencarstencmarkgfm/third_party/cmark/extensions/strikethrough.h0000644000175000017500000000034414210444464023431 0ustar carstencarsten#ifndef CMARK_GFM_STRIKETHROUGH_H #define CMARK_GFM_STRIKETHROUGH_H #include "cmark-gfm-core-extensions.h" extern cmark_node_type CMARK_NODE_STRIKETHROUGH; cmark_syntax_extension *create_strikethrough_extension(void); #endif cmarkgfm/third_party/cmark/extensions/autolink.h0000644000175000017500000000024414210444464022354 0ustar carstencarsten#ifndef CMARK_GFM_AUTOLINK_H #define CMARK_GFM_AUTOLINK_H #include "cmark-gfm-core-extensions.h" cmark_syntax_extension *create_autolink_extension(void); #endif cmarkgfm/third_party/cmark/extensions/cmark-gfm-core-extensions.h0000644000175000017500000000327214210444464025521 0ustar carstencarsten#ifndef CMARK_GFM_CORE_EXTENSIONS_H #define CMARK_GFM_CORE_EXTENSIONS_H #ifdef __cplusplus extern "C" { #endif #include "cmark-gfm-extension_api.h" #include "cmark-gfm-extensions_export.h" #include "config.h" // for bool #include CMARK_GFM_EXTENSIONS_EXPORT void cmark_gfm_core_extensions_ensure_registered(void); CMARK_GFM_EXTENSIONS_EXPORT uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node); /** Sets the number of columns for the table, returning 1 on success and 0 on error. */ CMARK_GFM_EXTENSIONS_EXPORT int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns); CMARK_GFM_EXTENSIONS_EXPORT uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node); /** Sets the alignments for the table, returning 1 on success and 0 on error. */ CMARK_GFM_EXTENSIONS_EXPORT int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments); CMARK_GFM_EXTENSIONS_EXPORT int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node); /** Sets whether the node is a table header row, returning 1 on success and 0 on error. */ CMARK_GFM_EXTENSIONS_EXPORT int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header); CMARK_GFM_EXTENSIONS_EXPORT bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node); /* For backwards compatibility */ #define cmark_gfm_extensions_tasklist_is_checked cmark_gfm_extensions_get_tasklist_item_checked /** Sets whether a tasklist item is "checked" (completed), returning 1 on success and 0 on error. */ CMARK_GFM_EXTENSIONS_EXPORT int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked); #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/extensions/tasklist.h0000644000175000017500000000022014210444464022356 0ustar carstencarsten#ifndef TASKLIST_H #define TASKLIST_H #include "cmark-gfm-core-extensions.h" cmark_syntax_extension *create_tasklist_extension(void); #endif cmarkgfm/third_party/cmark/extensions/tagfilter.c0000644000175000017500000000234214210444464022503 0ustar carstencarsten#include "tagfilter.h" #include #include static const char *blacklist[] = { "title", "textarea", "style", "xmp", "iframe", "noembed", "noframes", "script", "plaintext", NULL, }; static int is_tag(const unsigned char *tag_data, size_t tag_size, const char *tagname) { size_t i; if (tag_size < 3 || tag_data[0] != '<') return 0; i = 1; if (tag_data[i] == '/') { i++; } for (; i < tag_size; ++i, ++tagname) { if (*tagname == 0) break; if (tolower(tag_data[i]) != *tagname) return 0; } if (i == tag_size) return 0; if (cmark_isspace(tag_data[i]) || tag_data[i] == '>') return 1; if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>') return 1; return 0; } static int filter(cmark_syntax_extension *ext, const unsigned char *tag, size_t tag_len) { const char **it; for (it = blacklist; *it; ++it) { if (is_tag(tag, tag_len, *it)) { return 0; } } return 1; } cmark_syntax_extension *create_tagfilter_extension(void) { cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter"); cmark_syntax_extension_set_html_filter_func(ext, filter); return ext; } cmarkgfm/third_party/cmark/extensions/CMakeLists.txt0000644000175000017500000000722414210444464023122 0ustar carstencarstencmake_minimum_required(VERSION 2.8) set(LIBRARY "libcmark-gfm-extensions") set(STATICLIBRARY "libcmark-gfm-extensions_static") set(LIBRARY_SOURCES core-extensions.c table.c strikethrough.c autolink.c tagfilter.c ext_scanners.c ext_scanners.re ext_scanners.h tasklist.c ) include_directories( ${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src ) include (GenerateExportHeader) include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") add_compiler_export_flags() if (CMARK_SHARED) add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) set_target_properties(${LIBRARY} PROPERTIES OUTPUT_NAME "cmark-gfm-extensions" SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} VERSION ${PROJECT_VERSION}) set_property(TARGET ${LIBRARY} APPEND PROPERTY MACOSX_RPATH true) # Avoid name clash between PROGRAM and LIBRARY pdb files. set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfm-extensions_dll) generate_export_header(${LIBRARY} BASE_NAME cmark-gfm-extensions) list(APPEND CMARK_INSTALL ${LIBRARY}) target_link_libraries(${LIBRARY} libcmark-gfm) endif() if (CMARK_STATIC) add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES}) set_target_properties(${STATICLIBRARY} PROPERTIES COMPILE_FLAGS "-DCMARK_GFM_STATIC_DEFINE -DCMARK_GFM_EXTENSIONS_STATIC_DEFINE" POSITION_INDEPENDENT_CODE ON) if (MSVC) set_target_properties(${STATICLIBRARY} PROPERTIES OUTPUT_NAME "cmark-gfm-extensions_static" VERSION ${PROJECT_VERSION}) else() set_target_properties(${STATICLIBRARY} PROPERTIES OUTPUT_NAME "cmark-gfm-extensions" VERSION ${PROJECT_VERSION}) endif(MSVC) if (NOT CMARK_SHARED) generate_export_header(${STATICLIBRARY} BASE_NAME cmark-gfm-extensions) endif() list(APPEND CMARK_INSTALL ${STATICLIBRARY}) endif() set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON) include (InstallRequiredSystemLibraries) install(TARGETS ${CMARK_INSTALL} EXPORT cmark-gfm-extensions RUNTIME DESTINATION bin LIBRARY DESTINATION lib${LIB_SUFFIX} ARCHIVE DESTINATION lib${LIB_SUFFIX} ) if (CMARK_SHARED OR CMARK_STATIC) install(FILES cmark-gfm-core-extensions.h ${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm-extensions_export.h DESTINATION include ) install(EXPORT cmark-gfm-extensions DESTINATION lib${LIB_SUFFIX}/cmake-gfm-extensions) endif() # Feature tests include(CheckIncludeFile) include(CheckCSourceCompiles) include(CheckCSourceRuns) include(CheckSymbolExists) CHECK_INCLUDE_FILE(stdbool.h HAVE_STDBOOL_H) CHECK_C_SOURCE_COMPILES( "int main() { __builtin_expect(0,0); return 0; }" HAVE___BUILTIN_EXPECT) CHECK_C_SOURCE_COMPILES(" int f(void) __attribute__ (()); int main() { return 0; } " HAVE___ATTRIBUTE__) # Always compile with warnings if(MSVC) # Force to always compile with W4 if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4") endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /WX /wd4706 /wd4204 /wd4221 /wd4100 /D_CRT_SECURE_NO_WARNINGS") elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-unused-parameter -std=c99 -pedantic") endif() # Compile as C++ under MSVC older than 12.0 if(MSVC AND MSVC_VERSION LESS 1800) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP") endif() if(CMAKE_BUILD_TYPE STREQUAL "Ubsan") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined") endif() cmarkgfm/third_party/cmark/extensions/tasklist.c0000644000175000017500000001253614210444464022366 0ustar carstencarsten#include "tasklist.h" #include #include #include #include "ext_scanners.h" typedef enum { CMARK_TASKLIST_NOCHECKED, CMARK_TASKLIST_CHECKED, } cmark_tasklist_type; // Local constants static const char *TYPE_STRING = "tasklist"; static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) { return TYPE_STRING; } // Return 1 if state was set, 0 otherwise int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked) { // The node has to exist, and be an extension, and actually be the right type in order to get the value. if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING)) return 0; node->as.list.checked = is_checked; return 1; } bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) { if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING)) return false; if (node->as.list.checked) { return true; } else { return false; } } static bool parse_node_item_prefix(cmark_parser *parser, const char *input, cmark_node *container) { bool res = false; if (parser->indent >= container->as.list.marker_offset + container->as.list.padding) { cmark_parser_advance_offset(parser, input, container->as.list.marker_offset + container->as.list.padding, true); res = true; } else if (parser->blank && container->first_child != NULL) { // if container->first_child is NULL, then the opening line // of the list item was blank after the list marker; in this // case, we are done with the list item. cmark_parser_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); res = true; } return res; } static int matches(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *input, int len, cmark_node *parent_container) { return parse_node_item_prefix(parser, (const char*)input, parent_container); } static int can_contain(cmark_syntax_extension *extension, cmark_node *node, cmark_node_type child_type) { return (node->type == CMARK_NODE_ITEM) ? 1 : 0; } static cmark_node *open_tasklist_item(cmark_syntax_extension *self, int indented, cmark_parser *parser, cmark_node *parent_container, unsigned char *input, int len) { cmark_node_type node_type = cmark_node_get_type(parent_container); if (node_type != CMARK_NODE_ITEM) { return NULL; } bufsize_t matched = scan_tasklist(input, len, 0); if (!matched) { return NULL; } cmark_node_set_syntax_extension(parent_container, self); cmark_parser_advance_offset(parser, (char *)input, 3, false); // Either an upper or lower case X means the task is completed. parent_container->as.list.checked = (strstr((char*)input, "[x]") || strstr((char*)input, "[X]")); return NULL; } static void commonmark_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { renderer->cr(renderer); if (node->as.list.checked) { renderer->out(renderer, node, "- [x] ", false, LITERAL); } else { renderer->out(renderer, node, "- [ ] ", false, LITERAL); } cmark_strbuf_puts(renderer->prefix, " "); } else { cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2); renderer->cr(renderer); } } static void html_render(cmark_syntax_extension *extension, cmark_html_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { cmark_html_render_cr(renderer->html); cmark_strbuf_puts(renderer->html, "html, options); cmark_strbuf_putc(renderer->html, '>'); if (node->as.list.checked) { cmark_strbuf_puts(renderer->html, " "); } else { cmark_strbuf_puts(renderer->html, " "); } } else { cmark_strbuf_puts(renderer->html, "\n"); } } static const char *xml_attr(cmark_syntax_extension *extension, cmark_node *node) { if (node->as.list.checked) { return " completed=\"true\""; } else { return " completed=\"false\""; } } cmark_syntax_extension *create_tasklist_extension(void) { cmark_syntax_extension *ext = cmark_syntax_extension_new("tasklist"); cmark_syntax_extension_set_match_block_func(ext, matches); cmark_syntax_extension_set_get_type_string_func(ext, get_type_string); cmark_syntax_extension_set_open_block_func(ext, open_tasklist_item); cmark_syntax_extension_set_can_contain_func(ext, can_contain); cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render); cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render); cmark_syntax_extension_set_html_render_func(ext, html_render); cmark_syntax_extension_set_xml_attr_func(ext, xml_attr); return ext; } cmarkgfm/third_party/cmark/extensions/table.h0000644000175000017500000000036714210444464021623 0ustar carstencarsten#ifndef CMARK_GFM_TABLE_H #define CMARK_GFM_TABLE_H #include "cmark-gfm-core-extensions.h" extern cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, CMARK_NODE_TABLE_CELL; cmark_syntax_extension *create_table_extension(void); #endif cmarkgfm/third_party/cmark/extensions/core-extensions.c0000644000175000017500000000161014210444464023644 0ustar carstencarsten#include "cmark-gfm-core-extensions.h" #include "autolink.h" #include "strikethrough.h" #include "table.h" #include "tagfilter.h" #include "tasklist.h" #include "registry.h" #include "plugin.h" static int core_extensions_registration(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, create_table_extension()); cmark_plugin_register_syntax_extension(plugin, create_strikethrough_extension()); cmark_plugin_register_syntax_extension(plugin, create_autolink_extension()); cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension()); cmark_plugin_register_syntax_extension(plugin, create_tasklist_extension()); return 1; } void cmark_gfm_core_extensions_ensure_registered(void) { static int registered = 0; if (!registered) { cmark_register_plugin(core_extensions_registration); registered = 1; } } cmarkgfm/third_party/cmark/extensions/strikethrough.c0000644000175000017500000001333714210444464023432 0ustar carstencarsten#include "strikethrough.h" #include #include cmark_node_type CMARK_NODE_STRIKETHROUGH; static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser, cmark_node *parent, unsigned char character, cmark_inline_parser *inline_parser) { cmark_node *res = NULL; int left_flanking, right_flanking, punct_before, punct_after, delims; char buffer[101]; if (character != '~') return NULL; delims = cmark_inline_parser_scan_delimiters( inline_parser, sizeof(buffer) - 1, '~', &left_flanking, &right_flanking, &punct_before, &punct_after); memset(buffer, '~', delims); buffer[delims] = 0; res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); cmark_node_set_literal(res, buffer); res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser); res->start_column = cmark_inline_parser_get_column(inline_parser) - delims; if ((left_flanking || right_flanking) && (delims == 2 || (!(parser->options & CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE) && delims == 1))) { cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking, right_flanking, res); } return res; } static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser, cmark_inline_parser *inline_parser, delimiter *opener, delimiter *closer) { cmark_node *strikethrough; cmark_node *tmp, *next; delimiter *delim, *tmp_delim; delimiter *res = closer->next; strikethrough = opener->inl_text; if (opener->inl_text->as.literal.len != closer->inl_text->as.literal.len) goto done; if (!cmark_node_set_type(strikethrough, CMARK_NODE_STRIKETHROUGH)) goto done; cmark_node_set_syntax_extension(strikethrough, self); tmp = cmark_node_next(opener->inl_text); while (tmp) { if (tmp == closer->inl_text) break; next = cmark_node_next(tmp); cmark_node_append_child(strikethrough, tmp); tmp = next; } strikethrough->end_column = closer->inl_text->start_column + closer->inl_text->as.literal.len - 1; cmark_node_free(closer->inl_text); delim = closer; while (delim != NULL && delim != opener) { tmp_delim = delim->previous; cmark_inline_parser_remove_delimiter(inline_parser, delim); delim = tmp_delim; } cmark_inline_parser_remove_delimiter(inline_parser, opener); done: return res; } static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) { return node->type == CMARK_NODE_STRIKETHROUGH ? "strikethrough" : ""; } static int can_contain(cmark_syntax_extension *extension, cmark_node *node, cmark_node_type child_type) { if (node->type != CMARK_NODE_STRIKETHROUGH) return false; return CMARK_NODE_TYPE_INLINE_P(child_type); } static void commonmark_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { renderer->out(renderer, node, "~~", false, LITERAL); } static void latex_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { // requires \usepackage{ulem} bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { renderer->out(renderer, node, "\\sout{", false, LITERAL); } else { renderer->out(renderer, node, "}", false, LITERAL); } } static void man_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { renderer->cr(renderer); renderer->out(renderer, node, ".ST \"", false, LITERAL); } else { renderer->out(renderer, node, "\"", false, LITERAL); renderer->cr(renderer); } } static void html_render(cmark_syntax_extension *extension, cmark_html_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { cmark_strbuf_puts(renderer->html, ""); } else { cmark_strbuf_puts(renderer->html, ""); } } static void plaintext_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { renderer->out(renderer, node, "~", false, LITERAL); } cmark_syntax_extension *create_strikethrough_extension(void) { cmark_syntax_extension *ext = cmark_syntax_extension_new("strikethrough"); cmark_llist *special_chars = NULL; cmark_syntax_extension_set_get_type_string_func(ext, get_type_string); cmark_syntax_extension_set_can_contain_func(ext, can_contain); cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render); cmark_syntax_extension_set_latex_render_func(ext, latex_render); cmark_syntax_extension_set_man_render_func(ext, man_render); cmark_syntax_extension_set_html_render_func(ext, html_render); cmark_syntax_extension_set_plaintext_render_func(ext, plaintext_render); CMARK_NODE_STRIKETHROUGH = cmark_syntax_extension_add_node(1); cmark_syntax_extension_set_match_inline_func(ext, match); cmark_syntax_extension_set_inline_from_delim_func(ext, insert); cmark_mem *mem = cmark_get_default_mem_allocator(); special_chars = cmark_llist_append(mem, special_chars, (void *)'~'); cmark_syntax_extension_set_special_inline_chars(ext, special_chars); cmark_syntax_extension_set_emphasis(ext, 1); return ext; } cmarkgfm/third_party/cmark/extensions/ext_scanners.c0000644000175000017500000004742614210444464023232 0ustar carstencarsten/* Generated by re2c 1.3 */ #include "ext_scanners.h" #include bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset) { bufsize_t res; if (ptr == NULL || offset >= len) { return 0; } else { unsigned char lim = ptr[len]; ptr[len] = '\0'; res = scanner(ptr + offset); ptr[len] = lim; } return res; } bufsize_t _scan_table_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; { unsigned char yych; static const unsigned char yybm[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; yych = *p; if (yych <= ' ') { if (yych <= '\n') { if (yych == '\t') goto yy4; } else { if (yych <= '\f') goto yy4; if (yych >= ' ') goto yy4; } } else { if (yych <= '9') { if (yych == '-') goto yy5; } else { if (yych <= ':') goto yy6; if (yych == '|') goto yy4; } } ++p; yy3 : { return 0; } yy4: yych = *(marker = ++p); if (yybm[0 + yych] & 64) { goto yy7; } if (yych == '-') goto yy10; if (yych == ':') goto yy12; goto yy3; yy5: yych = *(marker = ++p); if (yybm[0 + yych] & 128) { goto yy10; } if (yych <= ' ') { if (yych <= 0x08) goto yy3; if (yych <= '\r') goto yy14; if (yych <= 0x1F) goto yy3; goto yy14; } else { if (yych <= ':') { if (yych <= '9') goto yy3; goto yy13; } else { if (yych == '|') goto yy14; goto yy3; } } yy6: yych = *(marker = ++p); if (yybm[0 + yych] & 128) { goto yy10; } goto yy3; yy7: yych = *++p; if (yybm[0 + yych] & 64) { goto yy7; } if (yych == '-') goto yy10; if (yych == ':') goto yy12; yy9: p = marker; goto yy3; yy10: yych = *++p; if (yybm[0 + yych] & 128) { goto yy10; } if (yych <= 0x1F) { if (yych <= '\n') { if (yych <= 0x08) goto yy9; if (yych <= '\t') goto yy13; goto yy15; } else { if (yych <= '\f') goto yy13; if (yych <= '\r') goto yy17; goto yy9; } } else { if (yych <= ':') { if (yych <= ' ') goto yy13; if (yych <= '9') goto yy9; goto yy13; } else { if (yych == '|') goto yy18; goto yy9; } } yy12: yych = *++p; if (yybm[0 + yych] & 128) { goto yy10; } goto yy9; yy13: yych = *++p; yy14: if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) goto yy9; goto yy13; } else { if (yych <= '\n') goto yy15; if (yych <= '\f') goto yy13; goto yy17; } } else { if (yych <= ' ') { if (yych <= 0x1F) goto yy9; goto yy13; } else { if (yych == '|') goto yy18; goto yy9; } } yy15: ++p; { return (bufsize_t)(p - start); } yy17: yych = *++p; if (yych == '\n') goto yy15; goto yy9; yy18: yych = *++p; if (yybm[0 + yych] & 128) { goto yy10; } if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) goto yy9; goto yy18; } else { if (yych <= '\n') goto yy15; if (yych <= '\f') goto yy18; goto yy17; } } else { if (yych <= ' ') { if (yych <= 0x1F) goto yy9; goto yy18; } else { if (yych == ':') goto yy12; goto yy9; } } } } bufsize_t _scan_table_cell(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; { unsigned char yych; unsigned int yyaccept = 0; static const unsigned char yybm[] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; yych = *p; if (yybm[0 + yych] & 64) { goto yy22; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\r') goto yy25; if (yych <= '\\') goto yy27; goto yy25; } else { if (yych <= 0xDF) goto yy29; if (yych <= 0xE0) goto yy30; goto yy31; } } else { if (yych <= 0xF0) { if (yych <= 0xED) goto yy32; if (yych <= 0xEF) goto yy31; goto yy33; } else { if (yych <= 0xF3) goto yy34; if (yych <= 0xF4) goto yy35; goto yy25; } } yy22: yyaccept = 0; yych = *(marker = ++p); if (yybm[0 + yych] & 64) { goto yy22; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\r') goto yy24; if (yych <= '\\') goto yy27; } else { if (yych <= 0xDF) goto yy36; if (yych <= 0xE0) goto yy38; goto yy39; } } else { if (yych <= 0xF0) { if (yych <= 0xED) goto yy40; if (yych <= 0xEF) goto yy39; goto yy41; } else { if (yych <= 0xF3) goto yy42; if (yych <= 0xF4) goto yy43; } } yy24 : { return (bufsize_t)(p - start); } yy25: ++p; yy26 : { return 0; } yy27: yyaccept = 0; yych = *(marker = ++p); if (yybm[0 + yych] & 128) { goto yy27; } if (yych <= 0xDF) { if (yych <= '\f') { if (yych == '\n') goto yy24; goto yy22; } else { if (yych <= '\r') goto yy24; if (yych <= 0x7F) goto yy22; if (yych <= 0xC1) goto yy24; goto yy36; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) goto yy38; if (yych == 0xED) goto yy40; goto yy39; } else { if (yych <= 0xF0) goto yy41; if (yych <= 0xF3) goto yy42; if (yych <= 0xF4) goto yy43; goto yy24; } } yy29: yych = *++p; if (yych <= 0x7F) goto yy26; if (yych <= 0xBF) goto yy22; goto yy26; yy30: yyaccept = 1; yych = *(marker = ++p); if (yych <= 0x9F) goto yy26; if (yych <= 0xBF) goto yy36; goto yy26; yy31: yyaccept = 1; yych = *(marker = ++p); if (yych <= 0x7F) goto yy26; if (yych <= 0xBF) goto yy36; goto yy26; yy32: yyaccept = 1; yych = *(marker = ++p); if (yych <= 0x7F) goto yy26; if (yych <= 0x9F) goto yy36; goto yy26; yy33: yyaccept = 1; yych = *(marker = ++p); if (yych <= 0x8F) goto yy26; if (yych <= 0xBF) goto yy39; goto yy26; yy34: yyaccept = 1; yych = *(marker = ++p); if (yych <= 0x7F) goto yy26; if (yych <= 0xBF) goto yy39; goto yy26; yy35: yyaccept = 1; yych = *(marker = ++p); if (yych <= 0x7F) goto yy26; if (yych <= 0x8F) goto yy39; goto yy26; yy36: yych = *++p; if (yych <= 0x7F) goto yy37; if (yych <= 0xBF) goto yy22; yy37: p = marker; if (yyaccept == 0) { goto yy24; } else { goto yy26; } yy38: yych = *++p; if (yych <= 0x9F) goto yy37; if (yych <= 0xBF) goto yy36; goto yy37; yy39: yych = *++p; if (yych <= 0x7F) goto yy37; if (yych <= 0xBF) goto yy36; goto yy37; yy40: yych = *++p; if (yych <= 0x7F) goto yy37; if (yych <= 0x9F) goto yy36; goto yy37; yy41: yych = *++p; if (yych <= 0x8F) goto yy37; if (yych <= 0xBF) goto yy39; goto yy37; yy42: yych = *++p; if (yych <= 0x7F) goto yy37; if (yych <= 0xBF) goto yy39; goto yy37; yy43: yych = *++p; if (yych <= 0x7F) goto yy37; if (yych <= 0x8F) goto yy39; goto yy37; } } bufsize_t _scan_table_cell_end(const unsigned char *p) { const unsigned char *start = p; { unsigned char yych; static const unsigned char yybm[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; yych = *p; if (yych == '|') goto yy48; ++p; { return 0; } yy48: yych = *++p; if (yybm[0 + yych] & 128) { goto yy48; } { return (bufsize_t)(p - start); } } } bufsize_t _scan_table_row_end(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; { unsigned char yych; static const unsigned char yybm[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; yych = *p; if (yych <= '\f') { if (yych <= 0x08) goto yy53; if (yych == '\n') goto yy56; goto yy55; } else { if (yych <= '\r') goto yy58; if (yych == ' ') goto yy55; } yy53: ++p; yy54 : { return 0; } yy55: yych = *(marker = ++p); if (yych <= 0x08) goto yy54; if (yych <= '\r') goto yy60; if (yych == ' ') goto yy60; goto yy54; yy56: ++p; { return (bufsize_t)(p - start); } yy58: yych = *++p; if (yych == '\n') goto yy56; goto yy54; yy59: yych = *++p; yy60: if (yybm[0 + yych] & 128) { goto yy59; } if (yych <= 0x08) goto yy61; if (yych <= '\n') goto yy56; if (yych <= '\r') goto yy62; yy61: p = marker; goto yy54; yy62: yych = *++p; if (yych == '\n') goto yy56; goto yy61; } } bufsize_t _scan_tasklist(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; { unsigned char yych; static const unsigned char yybm[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; yych = *p; if (yych <= ' ') { if (yych <= '\n') { if (yych == '\t') goto yy67; } else { if (yych <= '\f') goto yy67; if (yych >= ' ') goto yy67; } } else { if (yych <= ',') { if (yych <= ')') goto yy65; if (yych <= '+') goto yy68; } else { if (yych <= '-') goto yy68; if (yych <= '/') goto yy65; if (yych <= '9') goto yy69; } } yy65: ++p; yy66 : { return 0; } yy67: yych = *(marker = ++p); if (yybm[0 + yych] & 64) { goto yy70; } if (yych <= ',') { if (yych <= ')') goto yy66; if (yych <= '+') goto yy73; goto yy66; } else { if (yych <= '-') goto yy73; if (yych <= '/') goto yy66; if (yych <= '9') goto yy74; goto yy66; } yy68: yych = *(marker = ++p); if (yych <= '\n') { if (yych == '\t') goto yy75; goto yy66; } else { if (yych <= '\f') goto yy75; if (yych == ' ') goto yy75; goto yy66; } yy69: yych = *(marker = ++p); if (yych <= 0x1F) { if (yych <= '\t') { if (yych <= 0x08) goto yy78; goto yy73; } else { if (yych <= '\n') goto yy66; if (yych <= '\f') goto yy73; goto yy78; } } else { if (yych <= 0x7F) { if (yych <= ' ') goto yy73; goto yy78; } else { if (yych <= 0xC1) goto yy66; if (yych <= 0xF4) goto yy78; goto yy66; } } yy70: yych = *++p; if (yybm[0 + yych] & 64) { goto yy70; } if (yych <= ',') { if (yych <= ')') goto yy72; if (yych <= '+') goto yy73; } else { if (yych <= '-') goto yy73; if (yych <= '/') goto yy72; if (yych <= '9') goto yy74; } yy72: p = marker; goto yy66; yy73: yych = *++p; if (yych == '[') goto yy72; goto yy76; yy74: yych = *++p; if (yych <= '\n') { if (yych == '\t') goto yy73; goto yy78; } else { if (yych <= '\f') goto yy73; if (yych == ' ') goto yy73; goto yy78; } yy75: yych = *++p; yy76: if (yych <= '\f') { if (yych == '\t') goto yy75; if (yych <= '\n') goto yy72; goto yy75; } else { if (yych <= ' ') { if (yych <= 0x1F) goto yy72; goto yy75; } else { if (yych == '[') goto yy86; goto yy72; } } yy77: yych = *++p; yy78: if (yybm[0 + yych] & 128) { goto yy77; } if (yych <= 0xC1) { if (yych <= '\f') { if (yych <= 0x08) goto yy73; if (yych == '\n') goto yy72; goto yy75; } else { if (yych == ' ') goto yy75; if (yych <= 0x7F) goto yy73; goto yy72; } } else { if (yych <= 0xED) { if (yych <= 0xDF) goto yy79; if (yych <= 0xE0) goto yy80; if (yych <= 0xEC) goto yy81; goto yy82; } else { if (yych <= 0xF0) { if (yych <= 0xEF) goto yy81; goto yy83; } else { if (yych <= 0xF3) goto yy84; if (yych <= 0xF4) goto yy85; goto yy72; } } } yy79: yych = *++p; if (yych <= 0x7F) goto yy72; if (yych <= 0xBF) goto yy73; goto yy72; yy80: yych = *++p; if (yych <= 0x9F) goto yy72; if (yych <= 0xBF) goto yy79; goto yy72; yy81: yych = *++p; if (yych <= 0x7F) goto yy72; if (yych <= 0xBF) goto yy79; goto yy72; yy82: yych = *++p; if (yych <= 0x7F) goto yy72; if (yych <= 0x9F) goto yy79; goto yy72; yy83: yych = *++p; if (yych <= 0x8F) goto yy72; if (yych <= 0xBF) goto yy81; goto yy72; yy84: yych = *++p; if (yych <= 0x7F) goto yy72; if (yych <= 0xBF) goto yy81; goto yy72; yy85: yych = *++p; if (yych <= 0x7F) goto yy72; if (yych <= 0x8F) goto yy81; goto yy72; yy86: yych = *++p; if (yych <= 'W') { if (yych != ' ') goto yy72; } else { if (yych <= 'X') goto yy87; if (yych != 'x') goto yy72; } yy87: yych = *++p; if (yych != ']') goto yy72; yych = *++p; if (yych <= '\n') { if (yych != '\t') goto yy72; } else { if (yych <= '\f') goto yy89; if (yych != ' ') goto yy72; } yy89: yych = *++p; if (yych <= '\n') { if (yych == '\t') goto yy89; } else { if (yych <= '\f') goto yy89; if (yych == ' ') goto yy89; } { return (bufsize_t)(p - start); } } } cmarkgfm/third_party/cmark/extensions/ext_scanners.re0000644000175000017500000000425314210444464023405 0ustar carstencarsten/*!re2c re2c:flags:no-debug-info = 1; */ /*!re2c re2c:indent:string = ' '; */ #include #include "ext_scanners.h" bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset) { bufsize_t res; if (ptr == NULL || offset >= len) { return 0; } else { unsigned char lim = ptr[len]; ptr[len] = '\0'; res = scanner(ptr + offset); ptr[len] = lim; } return res; } /*!re2c re2c:define:YYCTYPE = "unsigned char"; re2c:define:YYCURSOR = p; re2c:define:YYMARKER = marker; re2c:yyfill:enable = 0; spacechar = [ \t\v\f]; newline = [\r]?[\n]; escaped_char = [\\][|!"#$%&'()*+,./:;<=>?@[\\\]^_`{}~-]; table_marker = (spacechar*[:]?[-]+[:]?spacechar*); table_cell = (escaped_char|[^|\r\n])+; tasklist = spacechar*("-"|"+"|"*"|[0-9]+.)spacechar+("[ ]"|"[x]")spacechar+; */ bufsize_t _scan_table_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [|]? table_marker ([|] table_marker)* [|]? spacechar* newline { return (bufsize_t)(p - start); } * { return 0; } */ } bufsize_t _scan_table_cell(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c // In fact, `table_cell` matches non-empty table cells only. The empty // string is also a valid table cell, but is handled by the default rule. // This approach prevents re2c's match-empty-string warning. table_cell { return (bufsize_t)(p - start); } * { return 0; } */ } bufsize_t _scan_table_cell_end(const unsigned char *p) { const unsigned char *start = p; /*!re2c [|] spacechar* { return (bufsize_t)(p - start); } * { return 0; } */ } bufsize_t _scan_table_row_end(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c spacechar* newline { return (bufsize_t)(p - start); } * { return 0; } */ } bufsize_t _scan_tasklist(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c tasklist { return (bufsize_t)(p - start); } * { return 0; } */ } cmarkgfm/third_party/cmark/extensions/table.c0000644000175000017500000007034714210444464021623 0ustar carstencarsten#include #include #include #include #include #include #include #include "ext_scanners.h" #include "strikethrough.h" #include "table.h" #include "cmark-gfm-core-extensions.h" cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, CMARK_NODE_TABLE_CELL; typedef struct { uint16_t n_columns; int paragraph_offset; cmark_llist *cells; } table_row; typedef struct { uint16_t n_columns; uint8_t *alignments; } node_table; typedef struct { bool is_header; } node_table_row; typedef struct { cmark_strbuf *buf; int start_offset, end_offset, internal_offset; } node_cell; static void free_table_cell(cmark_mem *mem, void *data) { node_cell *cell = (node_cell *)data; cmark_strbuf_free((cmark_strbuf *)cell->buf); mem->free(cell->buf); mem->free(cell); } static void free_table_row(cmark_mem *mem, table_row *row) { if (!row) return; cmark_llist_free_full(mem, row->cells, (cmark_free_func)free_table_cell); mem->free(row); } static void free_node_table(cmark_mem *mem, void *ptr) { node_table *t = (node_table *)ptr; mem->free(t->alignments); mem->free(t); } static void free_node_table_row(cmark_mem *mem, void *ptr) { mem->free(ptr); } static int get_n_table_columns(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE) return -1; return (int)((node_table *)node->as.opaque)->n_columns; } static int set_n_table_columns(cmark_node *node, uint16_t n_columns) { if (!node || node->type != CMARK_NODE_TABLE) return 0; ((node_table *)node->as.opaque)->n_columns = n_columns; return 1; } static uint8_t *get_table_alignments(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE) return 0; return ((node_table *)node->as.opaque)->alignments; } static int set_table_alignments(cmark_node *node, uint8_t *alignments) { if (!node || node->type != CMARK_NODE_TABLE) return 0; ((node_table *)node->as.opaque)->alignments = alignments; return 1; } static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsize_t len) { cmark_strbuf *res = (cmark_strbuf *)mem->calloc(1, sizeof(cmark_strbuf)); bufsize_t r, w; cmark_strbuf_init(mem, res, len + 1); cmark_strbuf_put(res, string, len); cmark_strbuf_putc(res, '\0'); for (r = 0, w = 0; r < len; ++r) { if (res->ptr[r] == '\\' && res->ptr[r + 1] == '|') r++; res->ptr[w++] = res->ptr[r]; } cmark_strbuf_truncate(res, w); return res; } static table_row *row_from_string(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *string, int len) { // Parses a single table row. It has the following form: // `delim? table_cell (delim table_cell)* delim? newline` // Note that cells are allowed to be empty. // // From the GitHub-flavored Markdown specification: // // > Each row consists of cells containing arbitrary text, in which inlines // > are parsed, separated by pipes (|). A leading and trailing pipe is also // > recommended for clarity of reading, and if there’s otherwise parsing // > ambiguity. table_row *row = NULL; bufsize_t cell_matched = 1, pipe_matched = 1, offset; int expect_more_cells = 1; int row_end_offset = 0; int int_overflow_abort = 0; row = (table_row *)parser->mem->calloc(1, sizeof(table_row)); row->n_columns = 0; row->cells = NULL; // Scan past the (optional) leading pipe. offset = scan_table_cell_end(string, len, 0); // Parse the cells of the row. Stop if we reach the end of the input, or if we // cannot detect any more cells. while (offset < len && expect_more_cells) { cell_matched = scan_table_cell(string, len, offset); pipe_matched = scan_table_cell_end(string, len, offset + cell_matched); if (cell_matched || pipe_matched) { // We are guaranteed to have a cell, since (1) either we found some // content and cell_matched, or (2) we found an empty cell followed by a // pipe. cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, cell_matched); cmark_strbuf_trim(cell_buf); node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell)); cell->buf = cell_buf; cell->start_offset = offset; cell->end_offset = offset + cell_matched - 1; while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') { --cell->start_offset; ++cell->internal_offset; } // make sure we never wrap row->n_columns // offset will != len and our exit will clean up as intended if (row->n_columns == UINT16_MAX) { int_overflow_abort = 1; break; } row->n_columns += 1; row->cells = cmark_llist_append(parser->mem, row->cells, cell); } offset += cell_matched + pipe_matched; if (pipe_matched) { expect_more_cells = 1; } else { // We've scanned the last cell. Check if we have reached the end of the row row_end_offset = scan_table_row_end(string, len, offset); offset += row_end_offset; // If the end of the row is not the end of the input, // the row is not a real row but potentially part of the paragraph // preceding the table. if (row_end_offset && offset != len) { row->paragraph_offset = offset; cmark_llist_free_full(parser->mem, row->cells, (cmark_free_func)free_table_cell); row->cells = NULL; row->n_columns = 0; // Scan past the (optional) leading pipe. offset += scan_table_cell_end(string, len, offset); expect_more_cells = 1; } else { expect_more_cells = 0; } } } if (offset != len || row->n_columns == 0 || int_overflow_abort) { free_table_row(parser->mem, row); row = NULL; } return row; } static void try_inserting_table_header_paragraph(cmark_parser *parser, cmark_node *parent_container, unsigned char *parent_string, int paragraph_offset) { cmark_node *paragraph; cmark_strbuf *paragraph_content; paragraph = cmark_node_new_with_mem(CMARK_NODE_PARAGRAPH, parser->mem); paragraph_content = unescape_pipes(parser->mem, parent_string, paragraph_offset); cmark_strbuf_trim(paragraph_content); cmark_node_set_string_content(paragraph, (char *) paragraph_content->ptr); cmark_strbuf_free(paragraph_content); parser->mem->free(paragraph_content); if (!cmark_node_insert_before(parent_container, paragraph)) { parser->mem->free(paragraph); } } static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_parser *parser, cmark_node *parent_container, unsigned char *input, int len) { cmark_node *table_header; table_row *header_row = NULL; table_row *marker_row = NULL; node_table_row *ntr; const char *parent_string; uint16_t i; if (!scan_table_start(input, len, cmark_parser_get_first_nonspace(parser))) { return parent_container; } // Since scan_table_start was successful, we must have a marker row. marker_row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); // assert may be optimized out, don't rely on it for security boundaries if (!marker_row) { return parent_container; } assert(marker_row); cmark_arena_push(); // Check for a matching header row. We call `row_from_string` with the entire // (potentially long) parent container as input, but this should be safe since // `row_from_string` bails out early if it does not find a row. parent_string = cmark_node_get_string_content(parent_container); header_row = row_from_string(self, parser, (unsigned char *)parent_string, (int)strlen(parent_string)); if (!header_row || header_row->n_columns != marker_row->n_columns) { free_table_row(parser->mem, marker_row); free_table_row(parser->mem, header_row); cmark_arena_pop(); return parent_container; } if (cmark_arena_pop()) { marker_row = row_from_string( self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); header_row = row_from_string(self, parser, (unsigned char *)parent_string, (int)strlen(parent_string)); // row_from_string can return NULL, add additional check to ensure n_columns match if (!marker_row || !header_row || header_row->n_columns != marker_row->n_columns) { free_table_row(parser->mem, marker_row); free_table_row(parser->mem, header_row); return parent_container; } } if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { free_table_row(parser->mem, header_row); free_table_row(parser->mem, marker_row); return parent_container; } if (header_row->paragraph_offset) { try_inserting_table_header_paragraph(parser, parent_container, (unsigned char *)parent_string, header_row->paragraph_offset); } cmark_node_set_syntax_extension(parent_container, self); parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table)); set_n_table_columns(parent_container, header_row->n_columns); // allocate alignments based on marker_row->n_columns // since we populate the alignments array based on marker_row->cells uint8_t *alignments = (uint8_t *)parser->mem->calloc(marker_row->n_columns, sizeof(uint8_t)); cmark_llist *it = marker_row->cells; for (i = 0; it; it = it->next, ++i) { node_cell *node = (node_cell *)it->data; bool left = node->buf->ptr[0] == ':', right = node->buf->ptr[node->buf->size - 1] == ':'; if (left && right) alignments[i] = 'c'; else if (left) alignments[i] = 'l'; else if (right) alignments[i] = 'r'; } set_table_alignments(parent_container, alignments); table_header = cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW, parent_container->start_column); cmark_node_set_syntax_extension(table_header, self); table_header->end_column = parent_container->start_column + (int)strlen(parent_string) - 2; table_header->start_line = table_header->end_line = parent_container->start_line; table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row)); ntr->is_header = true; { cmark_llist *tmp; for (tmp = header_row->cells; tmp; tmp = tmp->next) { node_cell *cell = (node_cell *) tmp->data; cmark_node *header_cell = cmark_parser_add_child(parser, table_header, CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); header_cell->start_line = header_cell->end_line = parent_container->start_line; header_cell->internal_offset = cell->internal_offset; header_cell->end_column = parent_container->start_column + cell->end_offset; cmark_node_set_string_content(header_cell, (char *) cell->buf->ptr); cmark_node_set_syntax_extension(header_cell, self); } } cmark_parser_advance_offset( parser, (char *)input, (int)strlen((char *)input) - 1 - cmark_parser_get_offset(parser), false); free_table_row(parser->mem, header_row); free_table_row(parser->mem, marker_row); return parent_container; } static cmark_node *try_opening_table_row(cmark_syntax_extension *self, cmark_parser *parser, cmark_node *parent_container, unsigned char *input, int len) { cmark_node *table_row_block; table_row *row; if (cmark_parser_is_blank(parser)) return NULL; table_row_block = cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW, parent_container->start_column); cmark_node_set_syntax_extension(table_row_block, self); table_row_block->end_column = parent_container->end_column; table_row_block->as.opaque = parser->mem->calloc(1, sizeof(node_table_row)); row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); if (!row) { // clean up the dangling node cmark_node_free(table_row_block); return NULL; } { cmark_llist *tmp; int i, table_columns = get_n_table_columns(parent_container); for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = tmp->next, ++i) { node_cell *cell = (node_cell *) tmp->data; cmark_node *node = cmark_parser_add_child(parser, table_row_block, CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); node->internal_offset = cell->internal_offset; node->end_column = parent_container->start_column + cell->end_offset; cmark_node_set_string_content(node, (char *) cell->buf->ptr); cmark_node_set_syntax_extension(node, self); } for (; i < table_columns; ++i) { cmark_node *node = cmark_parser_add_child( parser, table_row_block, CMARK_NODE_TABLE_CELL, 0); cmark_node_set_syntax_extension(node, self); } } free_table_row(parser->mem, row); cmark_parser_advance_offset(parser, (char *)input, len - 1 - cmark_parser_get_offset(parser), false); return table_row_block; } static cmark_node *try_opening_table_block(cmark_syntax_extension *self, int indented, cmark_parser *parser, cmark_node *parent_container, unsigned char *input, int len) { cmark_node_type parent_type = cmark_node_get_type(parent_container); if (!indented && parent_type == CMARK_NODE_PARAGRAPH) { return try_opening_table_header(self, parser, parent_container, input, len); } else if (!indented && parent_type == CMARK_NODE_TABLE) { return try_opening_table_row(self, parser, parent_container, input, len); } return NULL; } static int matches(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *input, int len, cmark_node *parent_container) { int res = 0; if (cmark_node_get_type(parent_container) == CMARK_NODE_TABLE) { cmark_arena_push(); table_row *new_row = row_from_string( self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); if (new_row && new_row->n_columns) res = 1; free_table_row(parser->mem, new_row); cmark_arena_pop(); } return res; } static const char *get_type_string(cmark_syntax_extension *self, cmark_node *node) { if (node->type == CMARK_NODE_TABLE) { return "table"; } else if (node->type == CMARK_NODE_TABLE_ROW) { if (((node_table_row *)node->as.opaque)->is_header) return "table_header"; else return "table_row"; } else if (node->type == CMARK_NODE_TABLE_CELL) { return "table_cell"; } return ""; } static int can_contain(cmark_syntax_extension *extension, cmark_node *node, cmark_node_type child_type) { if (node->type == CMARK_NODE_TABLE) { return child_type == CMARK_NODE_TABLE_ROW; } else if (node->type == CMARK_NODE_TABLE_ROW) { return child_type == CMARK_NODE_TABLE_CELL; } else if (node->type == CMARK_NODE_TABLE_CELL) { return child_type == CMARK_NODE_TEXT || child_type == CMARK_NODE_CODE || child_type == CMARK_NODE_EMPH || child_type == CMARK_NODE_STRONG || child_type == CMARK_NODE_LINK || child_type == CMARK_NODE_IMAGE || child_type == CMARK_NODE_STRIKETHROUGH || child_type == CMARK_NODE_HTML_INLINE || child_type == CMARK_NODE_FOOTNOTE_REFERENCE; } return false; } static int contains_inlines(cmark_syntax_extension *extension, cmark_node *node) { return node->type == CMARK_NODE_TABLE_CELL; } static void commonmark_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { bool entering = (ev_type == CMARK_EVENT_ENTER); if (node->type == CMARK_NODE_TABLE) { renderer->blankline(renderer); } else if (node->type == CMARK_NODE_TABLE_ROW) { if (entering) { renderer->cr(renderer); renderer->out(renderer, node, "|", false, LITERAL); } } else if (node->type == CMARK_NODE_TABLE_CELL) { if (entering) { renderer->out(renderer, node, " ", false, LITERAL); } else { renderer->out(renderer, node, " |", false, LITERAL); if (((node_table_row *)node->parent->as.opaque)->is_header && !node->next) { int i; uint8_t *alignments = get_table_alignments(node->parent->parent); uint16_t n_cols = ((node_table *)node->parent->parent->as.opaque)->n_columns; renderer->cr(renderer); renderer->out(renderer, node, "|", false, LITERAL); for (i = 0; i < n_cols; i++) { switch (alignments[i]) { case 0: renderer->out(renderer, node, " --- |", false, LITERAL); break; case 'l': renderer->out(renderer, node, " :-- |", false, LITERAL); break; case 'c': renderer->out(renderer, node, " :-: |", false, LITERAL); break; case 'r': renderer->out(renderer, node, " --: |", false, LITERAL); break; } } renderer->cr(renderer); } } } else { assert(false); } } static void latex_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { bool entering = (ev_type == CMARK_EVENT_ENTER); if (node->type == CMARK_NODE_TABLE) { if (entering) { int i; uint16_t n_cols; uint8_t *alignments = get_table_alignments(node); renderer->cr(renderer); renderer->out(renderer, node, "\\begin{table}", false, LITERAL); renderer->cr(renderer); renderer->out(renderer, node, "\\begin{tabular}{", false, LITERAL); n_cols = ((node_table *)node->as.opaque)->n_columns; for (i = 0; i < n_cols; i++) { switch(alignments[i]) { case 0: case 'l': renderer->out(renderer, node, "l", false, LITERAL); break; case 'c': renderer->out(renderer, node, "c", false, LITERAL); break; case 'r': renderer->out(renderer, node, "r", false, LITERAL); break; } } renderer->out(renderer, node, "}", false, LITERAL); renderer->cr(renderer); } else { renderer->out(renderer, node, "\\end{tabular}", false, LITERAL); renderer->cr(renderer); renderer->out(renderer, node, "\\end{table}", false, LITERAL); renderer->cr(renderer); } } else if (node->type == CMARK_NODE_TABLE_ROW) { if (!entering) { renderer->cr(renderer); } } else if (node->type == CMARK_NODE_TABLE_CELL) { if (!entering) { if (node->next) { renderer->out(renderer, node, " & ", false, LITERAL); } else { renderer->out(renderer, node, " \\\\", false, LITERAL); } } } else { assert(false); } } static const char *xml_attr(cmark_syntax_extension *extension, cmark_node *node) { if (node->type == CMARK_NODE_TABLE_CELL) { if (cmark_gfm_extensions_get_table_row_is_header(node->parent)) { uint8_t *alignments = get_table_alignments(node->parent->parent); int i = 0; cmark_node *n; for (n = node->parent->first_child; n; n = n->next, ++i) if (n == node) break; switch (alignments[i]) { case 'l': return " align=\"left\""; case 'c': return " align=\"center\""; case 'r': return " align=\"right\""; } } } return NULL; } static void man_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { bool entering = (ev_type == CMARK_EVENT_ENTER); if (node->type == CMARK_NODE_TABLE) { if (entering) { int i; uint16_t n_cols; uint8_t *alignments = get_table_alignments(node); renderer->cr(renderer); renderer->out(renderer, node, ".TS", false, LITERAL); renderer->cr(renderer); renderer->out(renderer, node, "tab(@);", false, LITERAL); renderer->cr(renderer); n_cols = ((node_table *)node->as.opaque)->n_columns; for (i = 0; i < n_cols; i++) { switch (alignments[i]) { case 'l': renderer->out(renderer, node, "l", false, LITERAL); break; case 0: case 'c': renderer->out(renderer, node, "c", false, LITERAL); break; case 'r': renderer->out(renderer, node, "r", false, LITERAL); break; } } if (n_cols) { renderer->out(renderer, node, ".", false, LITERAL); renderer->cr(renderer); } } else { renderer->out(renderer, node, ".TE", false, LITERAL); renderer->cr(renderer); } } else if (node->type == CMARK_NODE_TABLE_ROW) { if (!entering) { renderer->cr(renderer); } } else if (node->type == CMARK_NODE_TABLE_CELL) { if (!entering && node->next) { renderer->out(renderer, node, "@", false, LITERAL); } } else { assert(false); } } static void html_table_add_align(cmark_strbuf* html, const char* align, int options) { if (options & CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES) { cmark_strbuf_puts(html, " style=\"text-align: "); cmark_strbuf_puts(html, align); cmark_strbuf_puts(html, "\""); } else { cmark_strbuf_puts(html, " align=\""); cmark_strbuf_puts(html, align); cmark_strbuf_puts(html, "\""); } } struct html_table_state { unsigned need_closing_table_body : 1; unsigned in_table_header : 1; }; static void html_render(cmark_syntax_extension *extension, cmark_html_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { bool entering = (ev_type == CMARK_EVENT_ENTER); cmark_strbuf *html = renderer->html; cmark_node *n; // XXX: we just monopolise renderer->opaque. struct html_table_state *table_state = (struct html_table_state *)&renderer->opaque; if (node->type == CMARK_NODE_TABLE) { if (entering) { cmark_html_render_cr(html); cmark_strbuf_puts(html, "'); table_state->need_closing_table_body = false; } else { if (table_state->need_closing_table_body) { cmark_html_render_cr(html); cmark_strbuf_puts(html, ""); cmark_html_render_cr(html); } table_state->need_closing_table_body = false; cmark_html_render_cr(html); cmark_strbuf_puts(html, ""); cmark_html_render_cr(html); } } else if (node->type == CMARK_NODE_TABLE_ROW) { if (entering) { cmark_html_render_cr(html); if (((node_table_row *)node->as.opaque)->is_header) { table_state->in_table_header = 1; cmark_strbuf_puts(html, ""); cmark_html_render_cr(html); } else if (!table_state->need_closing_table_body) { cmark_strbuf_puts(html, ""); cmark_html_render_cr(html); table_state->need_closing_table_body = 1; } cmark_strbuf_puts(html, "'); } else { cmark_html_render_cr(html); cmark_strbuf_puts(html, ""); if (((node_table_row *)node->as.opaque)->is_header) { cmark_html_render_cr(html); cmark_strbuf_puts(html, ""); table_state->in_table_header = false; } } } else if (node->type == CMARK_NODE_TABLE_CELL) { uint8_t *alignments = get_table_alignments(node->parent->parent); if (entering) { cmark_html_render_cr(html); if (table_state->in_table_header) { cmark_strbuf_puts(html, "parent->first_child; n; n = n->next, ++i) if (n == node) break; switch (alignments[i]) { case 'l': html_table_add_align(html, "left", options); break; case 'c': html_table_add_align(html, "center", options); break; case 'r': html_table_add_align(html, "right", options); break; } cmark_html_render_sourcepos(node, html, options); cmark_strbuf_putc(html, '>'); } else { if (table_state->in_table_header) { cmark_strbuf_puts(html, ""); } else { cmark_strbuf_puts(html, ""); } } } else { assert(false); } } static void opaque_alloc(cmark_syntax_extension *self, cmark_mem *mem, cmark_node *node) { if (node->type == CMARK_NODE_TABLE) { node->as.opaque = mem->calloc(1, sizeof(node_table)); } else if (node->type == CMARK_NODE_TABLE_ROW) { node->as.opaque = mem->calloc(1, sizeof(node_table_row)); } else if (node->type == CMARK_NODE_TABLE_CELL) { node->as.opaque = mem->calloc(1, sizeof(node_cell)); } } static void opaque_free(cmark_syntax_extension *self, cmark_mem *mem, cmark_node *node) { if (node->type == CMARK_NODE_TABLE) { free_node_table(mem, node->as.opaque); } else if (node->type == CMARK_NODE_TABLE_ROW) { free_node_table_row(mem, node->as.opaque); } } static int escape(cmark_syntax_extension *self, cmark_node *node, int c) { return node->type != CMARK_NODE_TABLE && node->type != CMARK_NODE_TABLE_ROW && node->type != CMARK_NODE_TABLE_CELL && c == '|'; } cmark_syntax_extension *create_table_extension(void) { cmark_syntax_extension *self = cmark_syntax_extension_new("table"); cmark_syntax_extension_set_match_block_func(self, matches); cmark_syntax_extension_set_open_block_func(self, try_opening_table_block); cmark_syntax_extension_set_get_type_string_func(self, get_type_string); cmark_syntax_extension_set_can_contain_func(self, can_contain); cmark_syntax_extension_set_contains_inlines_func(self, contains_inlines); cmark_syntax_extension_set_commonmark_render_func(self, commonmark_render); cmark_syntax_extension_set_plaintext_render_func(self, commonmark_render); cmark_syntax_extension_set_latex_render_func(self, latex_render); cmark_syntax_extension_set_xml_attr_func(self, xml_attr); cmark_syntax_extension_set_man_render_func(self, man_render); cmark_syntax_extension_set_html_render_func(self, html_render); cmark_syntax_extension_set_opaque_alloc_func(self, opaque_alloc); cmark_syntax_extension_set_opaque_free_func(self, opaque_free); cmark_syntax_extension_set_commonmark_escape_func(self, escape); CMARK_NODE_TABLE = cmark_syntax_extension_add_node(0); CMARK_NODE_TABLE_ROW = cmark_syntax_extension_add_node(0); CMARK_NODE_TABLE_CELL = cmark_syntax_extension_add_node(0); return self; } uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node) { if (node->type != CMARK_NODE_TABLE) return 0; return ((node_table *)node->as.opaque)->n_columns; } uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node) { if (node->type != CMARK_NODE_TABLE) return 0; return ((node_table *)node->as.opaque)->alignments; } int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns) { return set_n_table_columns(node, n_columns); } int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments) { uint8_t *a = (uint8_t *)cmark_node_mem(node)->calloc(1, ncols); memcpy(a, alignments, ncols); return set_table_alignments(node, a); } int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE_ROW) return 0; return ((node_table_row *)node->as.opaque)->is_header; } int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header) { if (!node || node->type != CMARK_NODE_TABLE_ROW) return 0; ((node_table_row *)node->as.opaque)->is_header = (is_header != 0); return 1; } cmarkgfm/third_party/cmark/extensions/ext_scanners.h0000644000175000017500000000160414210444464023223 0ustar carstencarsten#include "chunk.h" #include "cmark-gfm.h" #ifdef __cplusplus extern "C" { #endif bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset); bufsize_t _scan_table_start(const unsigned char *p); bufsize_t _scan_table_cell(const unsigned char *p); bufsize_t _scan_table_cell_end(const unsigned char *p); bufsize_t _scan_table_row_end(const unsigned char *p); bufsize_t _scan_tasklist(const unsigned char *p); #define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n) #define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n) #define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n) #define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n) #define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n) #ifdef __cplusplus } #endif cmarkgfm/third_party/cmark/extensions/tagfilter.h0000644000175000017500000000024714210444464022512 0ustar carstencarsten#ifndef CMARK_GFM_TAGFILTER_H #define CMARK_GFM_TAGFILTER_H #include "cmark-gfm-core-extensions.h" cmark_syntax_extension *create_tagfilter_extension(void); #endif cmarkgfm/third_party/cmark/extensions/autolink.c0000644000175000017500000002671014210444464022355 0ustar carstencarsten#include "autolink.h" #include #include #include #if defined(_WIN32) #define strncasecmp _strnicmp #else #include #endif static int is_valid_hostchar(const uint8_t *link, size_t link_len) { int32_t ch; int r = cmark_utf8proc_iterate(link, (bufsize_t)link_len, &ch); if (r < 0) return 0; return !cmark_utf8proc_is_space(ch) && !cmark_utf8proc_is_punctuation(ch); } static int sd_autolink_issafe(const uint8_t *link, size_t link_len) { static const size_t valid_uris_count = 3; static const char *valid_uris[] = {"http://", "https://", "ftp://"}; size_t i; for (i = 0; i < valid_uris_count; ++i) { size_t len = strlen(valid_uris[i]); if (link_len > len && strncasecmp((char *)link, valid_uris[i], len) == 0 && is_valid_hostchar(link + len, link_len - len)) return 1; } return 0; } static size_t autolink_delim(uint8_t *data, size_t link_end) { uint8_t cclose, copen; size_t i; for (i = 0; i < link_end; ++i) if (data[i] == '<') { link_end = i; break; } while (link_end > 0) { cclose = data[link_end - 1]; switch (cclose) { case ')': copen = '('; break; default: copen = 0; } if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL) link_end--; else if (data[link_end - 1] == ';') { size_t new_end = link_end - 2; while (new_end > 0 && cmark_isalpha(data[new_end])) new_end--; if (new_end < link_end - 2 && data[new_end] == '&') link_end = new_end; else link_end--; } else if (copen != 0) { size_t closing = 0; size_t opening = 0; i = 0; /* Allow any number of matching brackets (as recognised in copen/cclose) * at the end of the URL. If there is a greater number of closing * brackets than opening ones, we remove one character from the end of * the link. * * Examples (input text => output linked portion): * * http://www.pokemon.com/Pikachu_(Electric) * => http://www.pokemon.com/Pikachu_(Electric) * * http://www.pokemon.com/Pikachu_((Electric) * => http://www.pokemon.com/Pikachu_((Electric) * * http://www.pokemon.com/Pikachu_(Electric)) * => http://www.pokemon.com/Pikachu_(Electric) * * http://www.pokemon.com/Pikachu_((Electric)) * => http://www.pokemon.com/Pikachu_((Electric)) */ while (i < link_end) { if (data[i] == copen) opening++; else if (data[i] == cclose) closing++; i++; } if (closing <= opening) break; link_end--; } else break; } return link_end; } static size_t check_domain(uint8_t *data, size_t size, int allow_short) { size_t i, np = 0, uscore1 = 0, uscore2 = 0; for (i = 1; i < size - 1; i++) { if (data[i] == '_') uscore2++; else if (data[i] == '.') { uscore1 = uscore2; uscore2 = 0; np++; } else if (!is_valid_hostchar(data + i, size - i) && data[i] != '-') break; } if (uscore1 > 0 || uscore2 > 0) return 0; if (allow_short) { /* We don't need a valid domain in the strict sense (with * least one dot; so just make sure it's composed of valid * domain characters and return the length of the the valid * sequence. */ return i; } else { /* a valid domain needs to have at least a dot. * that's as far as we get */ return np ? i : 0; } } static cmark_node *www_match(cmark_parser *parser, cmark_node *parent, cmark_inline_parser *inline_parser) { cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); size_t max_rewind = cmark_inline_parser_get_offset(inline_parser); uint8_t *data = chunk->data + max_rewind; size_t size = chunk->len - max_rewind; int start = cmark_inline_parser_get_column(inline_parser); size_t link_end; if (max_rewind > 0 && strchr("*_~(", data[-1]) == NULL && !cmark_isspace(data[-1])) return 0; if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) return 0; link_end = check_domain(data, size, 0); if (link_end == 0) return NULL; while (link_end < size && !cmark_isspace(data[link_end])) link_end++; link_end = autolink_delim(data, link_end); if (link_end == 0) return NULL; cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end)); cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); cmark_strbuf buf; cmark_strbuf_init(parser->mem, &buf, 10); cmark_strbuf_puts(&buf, "http://"); cmark_strbuf_put(&buf, data, (bufsize_t)link_end); node->as.link.url = cmark_chunk_buf_detach(&buf); cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); text->as.literal = cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end); cmark_node_append_child(node, text); node->start_line = text->start_line = node->end_line = text->end_line = cmark_inline_parser_get_line(inline_parser); node->start_column = text->start_column = start - 1; node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1; return node; } static cmark_node *url_match(cmark_parser *parser, cmark_node *parent, cmark_inline_parser *inline_parser) { size_t link_end, domain_len; int rewind = 0; cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); int max_rewind = cmark_inline_parser_get_offset(inline_parser); uint8_t *data = chunk->data + max_rewind; size_t size = chunk->len - max_rewind; if (size < 4 || data[1] != '/' || data[2] != '/') return 0; while (rewind < max_rewind && cmark_isalpha(data[-rewind - 1])) rewind++; if (!sd_autolink_issafe(data - rewind, size + rewind)) return 0; link_end = strlen("://"); domain_len = check_domain(data + link_end, size - link_end, 1); if (domain_len == 0) return 0; link_end += domain_len; while (link_end < size && !cmark_isspace(data[link_end])) link_end++; link_end = autolink_delim(data, link_end); if (link_end == 0) return NULL; cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end)); cmark_node_unput(parent, rewind); cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); cmark_chunk url = cmark_chunk_dup(chunk, max_rewind - rewind, (bufsize_t)(link_end + rewind)); node->as.link.url = url; cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); text->as.literal = url; cmark_node_append_child(node, text); return node; } static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *parent, unsigned char c, cmark_inline_parser *inline_parser) { if (cmark_inline_parser_in_bracket(inline_parser, false) || cmark_inline_parser_in_bracket(inline_parser, true)) return NULL; if (c == ':') return url_match(parser, parent, inline_parser); if (c == 'w') return www_match(parser, parent, inline_parser); return NULL; // note that we could end up re-consuming something already a // part of an inline, because we don't track when the last // inline was finished in inlines.c. } static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset, int depth) { // postprocess_text can recurse very deeply if there is a very long line of // '@' only. Stop at a reasonable depth to ensure it cannot crash. if (depth > 1000) return; size_t link_end; uint8_t *data = text->as.literal.data, *at; size_t size = text->as.literal.len; int rewind, max_rewind, nb = 0, np = 0, ns = 0; if (offset < 0 || (size_t)offset >= size) return; data += offset; size -= offset; at = (uint8_t *)memchr(data, '@', size); if (!at) return; max_rewind = (int)(at - data); data += max_rewind; size -= max_rewind; for (rewind = 0; rewind < max_rewind; ++rewind) { uint8_t c = data[-rewind - 1]; if (cmark_isalnum(c)) continue; if (strchr(".+-_", c) != NULL) continue; if (c == '/') ns++; break; } if (rewind == 0 || ns > 0) { postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1); return; } for (link_end = 0; link_end < size; ++link_end) { uint8_t c = data[link_end]; if (cmark_isalnum(c)) continue; if (c == '@') nb++; else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1])) np++; else if (c != '-' && c != '_') break; } if (link_end < 2 || nb != 1 || np == 0 || (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) { postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1); return; } link_end = autolink_delim(data, link_end); if (link_end == 0) { postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1); return; } cmark_chunk_to_cstr(parser->mem, &text->as.literal); cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); cmark_strbuf buf; cmark_strbuf_init(parser->mem, &buf, 10); cmark_strbuf_puts(&buf, "mailto:"); cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind)); link_node->as.link.url = cmark_chunk_buf_detach(&buf); cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); cmark_chunk email = cmark_chunk_dup( &text->as.literal, offset + max_rewind - rewind, (bufsize_t)(link_end + rewind)); cmark_chunk_to_cstr(parser->mem, &email); link_text->as.literal = email; cmark_node_append_child(link_node, link_text); cmark_node_insert_after(text, link_node); cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); post->as.literal = cmark_chunk_dup(&text->as.literal, (bufsize_t)(offset + max_rewind + link_end), (bufsize_t)(size - link_end)); cmark_chunk_to_cstr(parser->mem, &post->as.literal); cmark_node_insert_after(link_node, post); text->as.literal.len = offset + max_rewind - rewind; text->as.literal.data[text->as.literal.len] = 0; postprocess_text(parser, post, 0, depth + 1); } static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) { cmark_iter *iter; cmark_event_type ev; cmark_node *node; bool in_link = false; cmark_consolidate_text_nodes(root); iter = cmark_iter_new(root); while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { node = cmark_iter_get_node(iter); if (in_link) { if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LINK) { in_link = false; } continue; } if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_LINK) { in_link = true; continue; } if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) { postprocess_text(parser, node, 0, /*depth*/0); } } cmark_iter_free(iter); return root; } cmark_syntax_extension *create_autolink_extension(void) { cmark_syntax_extension *ext = cmark_syntax_extension_new("autolink"); cmark_llist *special_chars = NULL; cmark_syntax_extension_set_match_inline_func(ext, match); cmark_syntax_extension_set_postprocess_func(ext, postprocess); cmark_mem *mem = cmark_get_default_mem_allocator(); special_chars = cmark_llist_append(mem, special_chars, (void *)':'); special_chars = cmark_llist_append(mem, special_chars, (void *)'w'); cmark_syntax_extension_set_special_inline_chars(ext, special_chars); return ext; } cmarkgfm/third_party/cmark/man/0000755000175000017500000000000014210444464016731 5ustar carstencarstencmarkgfm/third_party/cmark/man/make_man_page.py0000644000175000017500000001114414210444464022050 0ustar carstencarsten#!/usr/bin/env python # Creates a man page from a C file. # first argument if present is path to cmark dynamic library # Comments beginning with `/**` are treated as Groff man, except that # 'this' is converted to \fIthis\f[], and ''this'' to \fBthis\f[]. # Non-blank lines immediately following a man page comment are treated # as function signatures or examples and parsed into .Ft, .Fo, .Fa, .Fc. The # immediately preceding man documentation chunk is printed after the example # as a comment on it. # That's about it! import sys, re, os, platform from datetime import date from ctypes import CDLL, c_char_p, c_long, c_void_p sysname = platform.system() if sysname == 'Darwin': cmark = CDLL("build/src/libcmark-gfm.dylib") else: cmark = CDLL("build/src/libcmark-gfm.so") parse_document = cmark.cmark_parse_document parse_document.restype = c_void_p parse_document.argtypes = [c_char_p, c_long] render_man = cmark.cmark_render_man render_man.restype = c_char_p render_man.argtypes = [c_void_p, c_long, c_long] def md2man(text): if sys.version_info >= (3,0): textbytes = text.encode('utf-8') textlen = len(textbytes) return render_man(parse_document(textbytes, textlen), 0, 65).decode('utf-8') else: textbytes = text textlen = len(text) return render_man(parse_document(textbytes, textlen), 0, 72) comment_start_re = re.compile('^\/\*\* ?') comment_delim_re = re.compile('^[/ ]\** ?') comment_end_re = re.compile('^ \**\/') function_re = re.compile('^ *(?:CMARK_GFM_EXPORT\s+)?(?P(?:const\s+)?\w+(?:\s*[*])?)\s*(?P\w+)\s*\((?P[^)]*)\)') blank_re = re.compile('^\s*$') macro_re = re.compile('CMARK_GFM_EXPORT *') typedef_start_re = re.compile('typedef.*{$') typedef_end_re = re.compile('}') single_quote_re = re.compile("(?**', re.sub(single_quote_re, '*\g<1>*', s)) typedef = False mdlines = [] chunk = [] sig = [] if len(sys.argv) > 1: sourcefile = sys.argv[1] else: print("Usage: make_man_page.py sourcefile") exit(1) with open(sourcefile, 'r') as cmarkh: state = 'default' for line in cmarkh: # state transition oldstate = state if comment_start_re.match(line): state = 'man' elif comment_end_re.match(line) and state == 'man': continue elif comment_delim_re.match(line) and state == 'man': state = 'man' elif not typedef and blank_re.match(line): state = 'default' elif typedef and typedef_end_re.match(line): typedef = False elif typedef_start_re.match(line): typedef = True state = 'signature' elif state == 'man': state = 'signature' # handle line if state == 'man': chunk.append(handle_quotes(re.sub(comment_delim_re, '', line))) elif state == 'signature': ln = re.sub(macro_re, '', line) if typedef or not re.match(blank_re, ln): sig.append(ln) elif oldstate == 'signature' and state != 'signature': if len(mdlines) > 0 and mdlines[-1] != '\n': mdlines.append('\n') rawsig = ''.join(sig) m = function_re.match(rawsig) mdlines.append('.PP\n') if m: mdlines.append('\\fI' + m.group('type') + '\\f[]' + ' ') mdlines.append('\\fB' + m.group('name') + '\\f[]' + '(') first = True for argument in re.split(',', m.group('args')): if not first: mdlines.append(', ') first = False mdlines.append('\\fI' + argument.strip() + '\\f[]') mdlines.append(')\n') else: mdlines.append('.nf\n\\fC\n.RS 0n\n') mdlines += sig mdlines.append('.RE\n\\f[]\n.fi\n') if len(mdlines) > 0 and mdlines[-1] != '\n': mdlines.append('\n') mdlines += md2man(''.join(chunk)) mdlines.append('\n') chunk = [] sig = [] elif oldstate == 'man' and state != 'signature': if len(mdlines) > 0 and mdlines[-1] != '\n': mdlines.append('\n') mdlines += md2man(''.join(chunk)) # add man chunk chunk = [] mdlines.append('\n') sys.stdout.write('.TH cmark-gfm 3 "' + date.today().strftime('%B %d, %Y') + '" "LOCAL" "Library Functions Manual"\n') sys.stdout.write(''.join(mdlines)) cmarkgfm/third_party/cmark/man/man3/0000755000175000017500000000000014210444464017567 5ustar carstencarstencmarkgfm/third_party/cmark/man/man3/cmark-gfm.30000644000175000017500000005753414210444464021535 0ustar carstencarsten.TH cmark-gfm 3 "April 08, 2019" "LOCAL" "Library Functions Manual" .SH NAME .PP \f[B]cmark\-gfm\f[] \- CommonMark parsing, manipulating, and rendering .SH DESCRIPTION .SS Simple Interface .PP \fIchar *\f[] \fBcmark_markdown_to_html\f[](\fIconst char *text\f[], \fIsize_t len\f[], \fIint options\f[]) .PP Convert \f[I]text\f[] (assumed to be a UTF\-8 encoded string with length \f[I]len\f[]) from CommonMark Markdown to HTML, returning a null\-terminated, UTF\-8\-encoded string. It is the caller's responsibility to free the returned buffer. .SS Node Structure .PP .nf \fC .RS 0n typedef enum { /* Error status */ CMARK_NODE_NONE = 0x0000, /* Block */ CMARK_NODE_DOCUMENT = CMARK_NODE_TYPE_BLOCK | 0x0001, CMARK_NODE_BLOCK_QUOTE = CMARK_NODE_TYPE_BLOCK | 0x0002, CMARK_NODE_LIST = CMARK_NODE_TYPE_BLOCK | 0x0003, CMARK_NODE_ITEM = CMARK_NODE_TYPE_BLOCK | 0x0004, CMARK_NODE_CODE_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0005, CMARK_NODE_HTML_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0006, CMARK_NODE_CUSTOM_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0007, CMARK_NODE_PARAGRAPH = CMARK_NODE_TYPE_BLOCK | 0x0008, CMARK_NODE_HEADING = CMARK_NODE_TYPE_BLOCK | 0x0009, CMARK_NODE_THEMATIC_BREAK = CMARK_NODE_TYPE_BLOCK | 0x000a, CMARK_NODE_FOOTNOTE_DEFINITION = CMARK_NODE_TYPE_BLOCK | 0x000b, /* Inline */ CMARK_NODE_TEXT = CMARK_NODE_TYPE_INLINE | 0x0001, CMARK_NODE_SOFTBREAK = CMARK_NODE_TYPE_INLINE | 0x0002, CMARK_NODE_LINEBREAK = CMARK_NODE_TYPE_INLINE | 0x0003, CMARK_NODE_CODE = CMARK_NODE_TYPE_INLINE | 0x0004, CMARK_NODE_HTML_INLINE = CMARK_NODE_TYPE_INLINE | 0x0005, CMARK_NODE_CUSTOM_INLINE = CMARK_NODE_TYPE_INLINE | 0x0006, CMARK_NODE_EMPH = CMARK_NODE_TYPE_INLINE | 0x0007, CMARK_NODE_STRONG = CMARK_NODE_TYPE_INLINE | 0x0008, CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009, CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a, CMARK_NODE_FOOTNOTE_REFERENCE = CMARK_NODE_TYPE_INLINE | 0x000b, } cmark_node_type; .RE \f[] .fi .PP .nf \fC .RS 0n typedef enum { CMARK_NO_LIST, CMARK_BULLET_LIST, CMARK_ORDERED_LIST } cmark_list_type; .RE \f[] .fi .PP .nf \fC .RS 0n typedef enum { CMARK_NO_DELIM, CMARK_PERIOD_DELIM, CMARK_PAREN_DELIM } cmark_delim_type; .RE \f[] .fi .SS Custom memory allocator support .PP .nf \fC .RS 0n typedef struct cmark_mem { void *(*calloc)(size_t, size_t); void *(*realloc)(void *, size_t); void (*free)(void *); } cmark_mem; .RE \f[] .fi .PP Defines the memory allocation functions to be used by CMark when parsing and allocating a document tree .PP \fIcmark_mem *\f[] \fBcmark_get_default_mem_allocator\f[](\fI\f[]) .PP The default memory allocator; uses the system's calloc, realloc and free. .PP \fIcmark_mem *\f[] \fBcmark_get_arena_mem_allocator\f[](\fI\f[]) .PP An arena allocator; uses system calloc to allocate large slabs of memory. Memory in these slabs is not reused at all. .PP \fIvoid\f[] \fBcmark_arena_reset\f[](\fIvoid\f[]) .PP Resets the arena allocator, quickly returning all used memory to the operating system. .PP \fItypedef\f[] \fBvoid\f[](\fI*cmark_free_func\f[]) .PP Callback for freeing user data with a \f[I]cmark_mem\f[] context. .SS Linked list .PP .nf \fC .RS 0n typedef struct _cmark_llist { struct _cmark_llist *next; void *data; } cmark_llist; .RE \f[] .fi .PP A generic singly linked list. .PP \fIcmark_llist *\f[] \fBcmark_llist_append\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[], \fIvoid * data\f[]) .PP Append an element to the linked list, return the possibly modified head of the list. .PP \fIvoid\f[] \fBcmark_llist_free_full\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[], \fIcmark_free_func free_func\f[]) .PP Free the list starting with \f[I]head\f[], calling \f[I]free_func\f[] with the data pointer of each of its elements .PP \fIvoid\f[] \fBcmark_llist_free\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[]) .PP Free the list starting with \f[I]head\f[] .SS Creating and Destroying Nodes .PP \fIcmark_node *\f[] \fBcmark_node_new\f[](\fIcmark_node_type type\f[]) .PP Creates a new node of type \f[I]type\f[]. Note that the node may have other required properties, which it is the caller's responsibility to assign. .PP \fIcmark_node *\f[] \fBcmark_node_new_with_mem\f[](\fIcmark_node_type type\f[], \fIcmark_mem *mem\f[]) .PP Same as \f[C]cmark_node_new\f[], but explicitly listing the memory allocator used to allocate the node. Note: be sure to use the same allocator for every node in a tree, or bad things can happen. .PP \fIvoid\f[] \fBcmark_node_free\f[](\fIcmark_node *node\f[]) .PP Frees the memory allocated for a node and any children. .SS Tree Traversal .PP \fIcmark_node *\f[] \fBcmark_node_next\f[](\fIcmark_node *node\f[]) .PP Returns the next node in the sequence after \f[I]node\f[], or NULL if there is none. .PP \fIcmark_node *\f[] \fBcmark_node_previous\f[](\fIcmark_node *node\f[]) .PP Returns the previous node in the sequence after \f[I]node\f[], or NULL if there is none. .PP \fIcmark_node *\f[] \fBcmark_node_parent\f[](\fIcmark_node *node\f[]) .PP Returns the parent of \f[I]node\f[], or NULL if there is none. .PP \fIcmark_node *\f[] \fBcmark_node_first_child\f[](\fIcmark_node *node\f[]) .PP Returns the first child of \f[I]node\f[], or NULL if \f[I]node\f[] has no children. .PP \fIcmark_node *\f[] \fBcmark_node_last_child\f[](\fIcmark_node *node\f[]) .PP Returns the last child of \f[I]node\f[], or NULL if \f[I]node\f[] has no children. .SS Iterator .PP An iterator will walk through a tree of nodes, starting from a root node, returning one node at a time, together with information about whether the node is being entered or exited. The iterator will first descend to a child node, if there is one. When there is no child, the iterator will go to the next sibling. When there is no next sibling, the iterator will return to the parent (but with a \f[I]cmark_event_type\f[] of \f[C]CMARK_EVENT_EXIT\f[]). The iterator will return \f[C]CMARK_EVENT_DONE\f[] when it reaches the root node again. One natural application is an HTML renderer, where an \f[C]ENTER\f[] event outputs an open tag and an \f[C]EXIT\f[] event outputs a close tag. An iterator might also be used to transform an AST in some systematic way, for example, turning all level\-3 headings into regular paragraphs. .IP .nf \f[C] void usage_example(cmark_node *root) { cmark_event_type ev_type; cmark_iter *iter = cmark_iter_new(root); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cmark_node *cur = cmark_iter_get_node(iter); // Do something with `cur` and `ev_type` } cmark_iter_free(iter); } \f[] .fi .PP Iterators will never return \f[C]EXIT\f[] events for leaf nodes, which are nodes of type: .IP \[bu] 2 CMARK_NODE_HTML_BLOCK .IP \[bu] 2 CMARK_NODE_THEMATIC_BREAK .IP \[bu] 2 CMARK_NODE_CODE_BLOCK .IP \[bu] 2 CMARK_NODE_TEXT .IP \[bu] 2 CMARK_NODE_SOFTBREAK .IP \[bu] 2 CMARK_NODE_LINEBREAK .IP \[bu] 2 CMARK_NODE_CODE .IP \[bu] 2 CMARK_NODE_HTML_INLINE .PP Nodes must only be modified after an \f[C]EXIT\f[] event, or an \f[C]ENTER\f[] event for leaf nodes. .PP .nf \fC .RS 0n typedef enum { CMARK_EVENT_NONE, CMARK_EVENT_DONE, CMARK_EVENT_ENTER, CMARK_EVENT_EXIT } cmark_event_type; .RE \f[] .fi .PP \fIcmark_iter *\f[] \fBcmark_iter_new\f[](\fIcmark_node *root\f[]) .PP Creates a new iterator starting at \f[I]root\f[]. The current node and event type are undefined until \f[I]cmark_iter_next\f[] is called for the first time. The memory allocated for the iterator should be released using \f[I]cmark_iter_free\f[] when it is no longer needed. .PP \fIvoid\f[] \fBcmark_iter_free\f[](\fIcmark_iter *iter\f[]) .PP Frees the memory allocated for an iterator. .PP \fIcmark_event_type\f[] \fBcmark_iter_next\f[](\fIcmark_iter *iter\f[]) .PP Advances to the next node and returns the event type (\f[C]CMARK_EVENT_ENTER\f[], \f[C]CMARK_EVENT_EXIT\f[] or \f[C]CMARK_EVENT_DONE\f[]). .PP \fIcmark_node *\f[] \fBcmark_iter_get_node\f[](\fIcmark_iter *iter\f[]) .PP Returns the current node. .PP \fIcmark_event_type\f[] \fBcmark_iter_get_event_type\f[](\fIcmark_iter *iter\f[]) .PP Returns the current event type. .PP \fIcmark_node *\f[] \fBcmark_iter_get_root\f[](\fIcmark_iter *iter\f[]) .PP Returns the root node. .PP \fIvoid\f[] \fBcmark_iter_reset\f[](\fIcmark_iter *iter\f[], \fIcmark_node *current\f[], \fIcmark_event_type event_type\f[]) .PP Resets the iterator so that the current node is \f[I]current\f[] and the event type is \f[I]event_type\f[]. The new current node must be a descendant of the root node or the root node itself. .SS Accessors .PP \fIvoid *\f[] \fBcmark_node_get_user_data\f[](\fIcmark_node *node\f[]) .PP Returns the user data of \f[I]node\f[]. .PP \fIint\f[] \fBcmark_node_set_user_data\f[](\fIcmark_node *node\f[], \fIvoid *user_data\f[]) .PP Sets arbitrary user data for \f[I]node\f[]. Returns 1 on success, 0 on failure. .PP \fIint\f[] \fBcmark_node_set_user_data_free_func\f[](\fIcmark_node *node\f[], \fIcmark_free_func free_func\f[]) .PP Set free function for user data */ .PP \fIcmark_node_type\f[] \fBcmark_node_get_type\f[](\fIcmark_node *node\f[]) .PP Returns the type of \f[I]node\f[], or \f[C]CMARK_NODE_NONE\f[] on error. .PP \fIconst char *\f[] \fBcmark_node_get_type_string\f[](\fIcmark_node *node\f[]) .PP Like \f[I]cmark_node_get_type\f[], but returns a string representation of the type, or \f[C]""\f[]. .PP \fIconst char *\f[] \fBcmark_node_get_literal\f[](\fIcmark_node *node\f[]) .PP Returns the string contents of \f[I]node\f[], or an empty string if none is set. Returns NULL if called on a node that does not have string content. .PP \fIint\f[] \fBcmark_node_set_literal\f[](\fIcmark_node *node\f[], \fIconst char *content\f[]) .PP Sets the string contents of \f[I]node\f[]. Returns 1 on success, 0 on failure. .PP \fIint\f[] \fBcmark_node_get_heading_level\f[](\fIcmark_node *node\f[]) .PP Returns the heading level of \f[I]node\f[], or 0 if \f[I]node\f[] is not a heading. .PP \fIint\f[] \fBcmark_node_set_heading_level\f[](\fIcmark_node *node\f[], \fIint level\f[]) .PP Sets the heading level of \f[I]node\f[], returning 1 on success and 0 on error. .PP \fIcmark_list_type\f[] \fBcmark_node_get_list_type\f[](\fIcmark_node *node\f[]) .PP Returns the list type of \f[I]node\f[], or \f[C]CMARK_NO_LIST\f[] if \f[I]node\f[] is not a list. .PP \fIint\f[] \fBcmark_node_set_list_type\f[](\fIcmark_node *node\f[], \fIcmark_list_type type\f[]) .PP Sets the list type of \f[I]node\f[], returning 1 on success and 0 on error. .PP \fIcmark_delim_type\f[] \fBcmark_node_get_list_delim\f[](\fIcmark_node *node\f[]) .PP Returns the list delimiter type of \f[I]node\f[], or \f[C]CMARK_NO_DELIM\f[] if \f[I]node\f[] is not a list. .PP \fIint\f[] \fBcmark_node_set_list_delim\f[](\fIcmark_node *node\f[], \fIcmark_delim_type delim\f[]) .PP Sets the list delimiter type of \f[I]node\f[], returning 1 on success and 0 on error. .PP \fIint\f[] \fBcmark_node_get_list_start\f[](\fIcmark_node *node\f[]) .PP Returns starting number of \f[I]node\f[], if it is an ordered list, otherwise 0. .PP \fIint\f[] \fBcmark_node_set_list_start\f[](\fIcmark_node *node\f[], \fIint start\f[]) .PP Sets starting number of \f[I]node\f[], if it is an ordered list. Returns 1 on success, 0 on failure. .PP \fIint\f[] \fBcmark_node_get_list_tight\f[](\fIcmark_node *node\f[]) .PP Returns 1 if \f[I]node\f[] is a tight list, 0 otherwise. .PP \fIint\f[] \fBcmark_node_set_list_tight\f[](\fIcmark_node *node\f[], \fIint tight\f[]) .PP Sets the "tightness" of a list. Returns 1 on success, 0 on failure. .PP \fIconst char *\f[] \fBcmark_node_get_fence_info\f[](\fIcmark_node *node\f[]) .PP Returns the info string from a fenced code block. .PP \fIint\f[] \fBcmark_node_set_fence_info\f[](\fIcmark_node *node\f[], \fIconst char *info\f[]) .PP Sets the info string in a fenced code block, returning 1 on success and 0 on failure. .PP \fIint\f[] \fBcmark_node_set_fenced\f[](\fIcmark_node * node\f[], \fIint fenced\f[], \fIint length\f[], \fIint offset\f[], \fIchar character\f[]) .PP Sets code blocks fencing details .PP \fIint\f[] \fBcmark_node_get_fenced\f[](\fIcmark_node *node\f[], \fIint *length\f[], \fIint *offset\f[], \fIchar *character\f[]) .PP Returns code blocks fencing details .PP \fIconst char *\f[] \fBcmark_node_get_url\f[](\fIcmark_node *node\f[]) .PP Returns the URL of a link or image \f[I]node\f[], or an empty string if no URL is set. Returns NULL if called on a node that is not a link or image. .PP \fIint\f[] \fBcmark_node_set_url\f[](\fIcmark_node *node\f[], \fIconst char *url\f[]) .PP Sets the URL of a link or image \f[I]node\f[]. Returns 1 on success, 0 on failure. .PP \fIconst char *\f[] \fBcmark_node_get_title\f[](\fIcmark_node *node\f[]) .PP Returns the title of a link or image \f[I]node\f[], or an empty string if no title is set. Returns NULL if called on a node that is not a link or image. .PP \fIint\f[] \fBcmark_node_set_title\f[](\fIcmark_node *node\f[], \fIconst char *title\f[]) .PP Sets the title of a link or image \f[I]node\f[]. Returns 1 on success, 0 on failure. .PP \fIconst char *\f[] \fBcmark_node_get_on_enter\f[](\fIcmark_node *node\f[]) .PP Returns the literal "on enter" text for a custom \f[I]node\f[], or an empty string if no on_enter is set. Returns NULL if called on a non\-custom node. .PP \fIint\f[] \fBcmark_node_set_on_enter\f[](\fIcmark_node *node\f[], \fIconst char *on_enter\f[]) .PP Sets the literal text to render "on enter" for a custom \f[I]node\f[]. Any children of the node will be rendered after this text. Returns 1 on success 0 on failure. .PP \fIconst char *\f[] \fBcmark_node_get_on_exit\f[](\fIcmark_node *node\f[]) .PP Returns the literal "on exit" text for a custom \f[I]node\f[], or an empty string if no on_exit is set. Returns NULL if called on a non\-custom node. .PP \fIint\f[] \fBcmark_node_set_on_exit\f[](\fIcmark_node *node\f[], \fIconst char *on_exit\f[]) .PP Sets the literal text to render "on exit" for a custom \f[I]node\f[]. Any children of the node will be rendered before this text. Returns 1 on success 0 on failure. .PP \fIint\f[] \fBcmark_node_get_start_line\f[](\fIcmark_node *node\f[]) .PP Returns the line on which \f[I]node\f[] begins. .PP \fIint\f[] \fBcmark_node_get_start_column\f[](\fIcmark_node *node\f[]) .PP Returns the column at which \f[I]node\f[] begins. .PP \fIint\f[] \fBcmark_node_get_end_line\f[](\fIcmark_node *node\f[]) .PP Returns the line on which \f[I]node\f[] ends. .PP \fIint\f[] \fBcmark_node_get_end_column\f[](\fIcmark_node *node\f[]) .PP Returns the column at which \f[I]node\f[] ends. .SS Tree Manipulation .PP \fIvoid\f[] \fBcmark_node_unlink\f[](\fIcmark_node *node\f[]) .PP Unlinks a \f[I]node\f[], removing it from the tree, but not freeing its memory. (Use \f[I]cmark_node_free\f[] for that.) .PP \fIint\f[] \fBcmark_node_insert_before\f[](\fIcmark_node *node\f[], \fIcmark_node *sibling\f[]) .PP Inserts \f[I]sibling\f[] before \f[I]node\f[]. Returns 1 on success, 0 on failure. .PP \fIint\f[] \fBcmark_node_insert_after\f[](\fIcmark_node *node\f[], \fIcmark_node *sibling\f[]) .PP Inserts \f[I]sibling\f[] after \f[I]node\f[]. Returns 1 on success, 0 on failure. .PP \fIint\f[] \fBcmark_node_replace\f[](\fIcmark_node *oldnode\f[], \fIcmark_node *newnode\f[]) .PP Replaces \f[I]oldnode\f[] with \f[I]newnode\f[] and unlinks \f[I]oldnode\f[] (but does not free its memory). Returns 1 on success, 0 on failure. .PP \fIint\f[] \fBcmark_node_prepend_child\f[](\fIcmark_node *node\f[], \fIcmark_node *child\f[]) .PP Adds \f[I]child\f[] to the beginning of the children of \f[I]node\f[]. Returns 1 on success, 0 on failure. .PP \fIint\f[] \fBcmark_node_append_child\f[](\fIcmark_node *node\f[], \fIcmark_node *child\f[]) .PP Adds \f[I]child\f[] to the end of the children of \f[I]node\f[]. Returns 1 on success, 0 on failure. .PP \fIvoid\f[] \fBcmark_consolidate_text_nodes\f[](\fIcmark_node *root\f[]) .PP Consolidates adjacent text nodes. .PP \fIvoid\f[] \fBcmark_node_own\f[](\fIcmark_node *root\f[]) .PP Ensures a node and all its children own their own chunk memory. .SS Parsing .PP Simple interface: .IP .nf \f[C] cmark_node *document = cmark_parse_document("Hello *world*", 13, CMARK_OPT_DEFAULT); \f[] .fi .PP Streaming interface: .IP .nf \f[C] cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); FILE *fp = fopen("myfile.md", "rb"); while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { cmark_parser_feed(parser, buffer, bytes); if (bytes < sizeof(buffer)) { break; } } document = cmark_parser_finish(parser); cmark_parser_free(parser); \f[] .fi .PP \fIcmark_parser *\f[] \fBcmark_parser_new\f[](\fIint options\f[]) .PP Creates a new parser object. .PP \fIcmark_parser *\f[] \fBcmark_parser_new_with_mem\f[](\fIint options\f[], \fIcmark_mem *mem\f[]) .PP Creates a new parser object with the given memory allocator .PP \fIvoid\f[] \fBcmark_parser_free\f[](\fIcmark_parser *parser\f[]) .PP Frees memory allocated for a parser object. .PP \fIvoid\f[] \fBcmark_parser_feed\f[](\fIcmark_parser *parser\f[], \fIconst char *buffer\f[], \fIsize_t len\f[]) .PP Feeds a string of length \f[I]len\f[] to \f[I]parser\f[]. .PP \fIcmark_node *\f[] \fBcmark_parser_finish\f[](\fIcmark_parser *parser\f[]) .PP Finish parsing and return a pointer to a tree of nodes. .PP \fIcmark_node *\f[] \fBcmark_parse_document\f[](\fIconst char *buffer\f[], \fIsize_t len\f[], \fIint options\f[]) .PP Parse a CommonMark document in \f[I]buffer\f[] of length \f[I]len\f[]. Returns a pointer to a tree of nodes. The memory allocated for the node tree should be released using \f[I]cmark_node_free\f[] when it is no longer needed. .PP \fIcmark_node *\f[] \fBcmark_parse_file\f[](\fIFILE *f\f[], \fIint options\f[]) .PP Parse a CommonMark document in file \f[I]f\f[], returning a pointer to a tree of nodes. The memory allocated for the node tree should be released using \f[I]cmark_node_free\f[] when it is no longer needed. .SS Rendering .PP \fIchar *\f[] \fBcmark_render_xml\f[](\fIcmark_node *root\f[], \fIint options\f[]) .PP Render a \f[I]node\f[] tree as XML. It is the caller's responsibility to free the returned buffer. .PP \fIchar *\f[] \fBcmark_render_xml_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[]) .PP As for \f[I]cmark_render_xml\f[], but specifying the allocator to use for the resulting string. .PP \fIchar *\f[] \fBcmark_render_html\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_llist *extensions\f[]) .PP Render a \f[I]node\f[] tree as an HTML fragment. It is up to the user to add an appropriate header and footer. It is the caller's responsibility to free the returned buffer. .PP \fIchar *\f[] \fBcmark_render_html_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_llist *extensions\f[], \fIcmark_mem *mem\f[]) .PP As for \f[I]cmark_render_html\f[], but specifying the allocator to use for the resulting string. .PP \fIchar *\f[] \fBcmark_render_man\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) .PP Render a \f[I]node\f[] tree as a groff man page, without the header. It is the caller's responsibility to free the returned buffer. .PP \fIchar *\f[] \fBcmark_render_man_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) .PP As for \f[I]cmark_render_man\f[], but specifying the allocator to use for the resulting string. .PP \fIchar *\f[] \fBcmark_render_commonmark\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) .PP Render a \f[I]node\f[] tree as a commonmark document. It is the caller's responsibility to free the returned buffer. .PP \fIchar *\f[] \fBcmark_render_commonmark_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) .PP As for \f[I]cmark_render_commonmark\f[], but specifying the allocator to use for the resulting string. .PP \fIchar *\f[] \fBcmark_render_plaintext\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) .PP Render a \f[I]node\f[] tree as a plain text document. It is the caller's responsibility to free the returned buffer. .PP \fIchar *\f[] \fBcmark_render_plaintext_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) .PP As for \f[I]cmark_render_plaintext\f[], but specifying the allocator to use for the resulting string. .PP \fIchar *\f[] \fBcmark_render_latex\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) .PP Render a \f[I]node\f[] tree as a LaTeX document. It is the caller's responsibility to free the returned buffer. .PP \fIchar *\f[] \fBcmark_render_latex_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) .PP As for \f[I]cmark_render_latex\f[], but specifying the allocator to use for the resulting string. .SS Options .PP .nf \fC .RS 0n #define CMARK_OPT_DEFAULT 0 .RE \f[] .fi .PP Default options. .SS Options affecting rendering .PP .nf \fC .RS 0n #define CMARK_OPT_SOURCEPOS (1 << 1) .RE \f[] .fi .PP Include a \f[C]data\-sourcepos\f[] attribute on all block elements. .PP .nf \fC .RS 0n #define CMARK_OPT_HARDBREAKS (1 << 2) .RE \f[] .fi .PP Render \f[C]softbreak\f[] elements as hard line breaks. .PP .nf \fC .RS 0n #define CMARK_OPT_SAFE (1 << 3) .RE \f[] .fi .PP \f[C]CMARK_OPT_SAFE\f[] is defined here for API compatibility, but it no longer has any effect. "Safe" mode is now the default: set \f[C]CMARK_OPT_UNSAFE\f[] to disable it. .PP .nf \fC .RS 0n #define CMARK_OPT_UNSAFE (1 << 17) .RE \f[] .fi .PP Render raw HTML and unsafe links (\f[C]javascript:\f[], \f[C]vbscript:\f[], \f[C]file:\f[], and \f[C]data:\f[], except for \f[C]image/png\f[], \f[C]image/gif\f[], \f[C]image/jpeg\f[], or \f[C]image/webp\f[] mime types). By default, raw HTML is replaced by a placeholder HTML comment. Unsafe links are replaced by empty strings. .PP .nf \fC .RS 0n #define CMARK_OPT_NOBREAKS (1 << 4) .RE \f[] .fi .PP Render \f[C]softbreak\f[] elements as spaces. .SS Options affecting parsing .PP .nf \fC .RS 0n #define CMARK_OPT_NORMALIZE (1 << 8) .RE \f[] .fi .PP Legacy option (no effect). .PP .nf \fC .RS 0n #define CMARK_OPT_VALIDATE_UTF8 (1 << 9) .RE \f[] .fi .PP Validate UTF\-8 in the input before parsing, replacing illegal sequences with the replacement character U+FFFD. .PP .nf \fC .RS 0n #define CMARK_OPT_SMART (1 << 10) .RE \f[] .fi .PP Convert straight quotes to curly, \-\-\- to em dashes, \-\- to en dashes. .PP .nf \fC .RS 0n #define CMARK_OPT_GITHUB_PRE_LANG (1 << 11) .RE \f[] .fi .PP Use GitHub\-style tags for code blocks instead of . .PP .nf \fC .RS 0n #define CMARK_OPT_LIBERAL_HTML_TAG (1 << 12) .RE \f[] .fi .PP Be liberal in interpreting inline HTML tags. .PP .nf \fC .RS 0n #define CMARK_OPT_FOOTNOTES (1 << 13) .RE \f[] .fi .PP Parse footnotes. .PP .nf \fC .RS 0n #define CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE (1 << 14) .RE \f[] .fi .PP Only parse strikethroughs if surrounded by exactly 2 tildes. Gives some compatibility with redcarpet. .PP .nf \fC .RS 0n #define CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES (1 << 15) .RE \f[] .fi .PP Use style attributes to align table cells instead of align attributes. .PP .nf \fC .RS 0n #define CMARK_OPT_FULL_INFO_STRING (1 << 16) .RE \f[] .fi .PP Include the remainder of the info string in code blocks in a separate attribute. .SS Version information .PP \fIint\f[] \fBcmark_version\f[](\fIvoid\f[]) .PP The library version as integer for runtime checks. Also available as macro CMARK_VERSION for compile time checks. .IP \[bu] 2 Bits 16\-23 contain the major version. .IP \[bu] 2 Bits 8\-15 contain the minor version. .IP \[bu] 2 Bits 0\-7 contain the patchlevel. .PP In hexadecimal format, the number 0x010203 represents version 1.2.3. .PP \fIconst char *\f[] \fBcmark_version_string\f[](\fIvoid\f[]) .PP The library version string for runtime checks. Also available as macro CMARK_VERSION_STRING for compile time checks. .SH AUTHORS .PP John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. cmarkgfm/third_party/cmark/man/CMakeLists.txt0000644000175000017500000000041714210444464021473 0ustar carstencarstenif (NOT MSVC) include(GNUInstallDirs) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man1/cmark-gfm.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man3/cmark-gfm.3 DESTINATION ${CMAKE_INSTALL_MANDIR}/man3) endif(NOT MSVC) cmarkgfm/third_party/cmark/man/man1/0000755000175000017500000000000014210444464017565 5ustar carstencarstencmarkgfm/third_party/cmark/man/man1/cmark-gfm.10000644000175000017500000000504314210444464021515 0ustar carstencarsten.TH "cmark-gfm" "1" "March 24, 2016" "LOCAL" "General Commands Manual" .SH "NAME" \fBcmark\fR \- convert CommonMark formatted text with GitHub Flavored Markdown extensions to HTML .SH "SYNOPSIS" .HP 6n \fBcmark-gfm\fR [options] file* .SH "DESCRIPTION" \fBcmark-gfm\fR converts Markdown formatted plain text to either HTML, groff man, CommonMark XML, LaTeX, or CommonMark, using the conventions described in the CommonMark spec. It reads input from \fIstdin\fR or the specified files (concatenating their contents) and writes output to \fIstdout\fR. .SH "OPTIONS" .TP 12n .B \-\-to, \-t \f[I]FORMAT\f[] Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]xml\f[], \f[C]latex\f[], \f[C]commonmark\f[]). .TP 12n .B \-\-width \f[I]WIDTH\f[] Specify a column width to which to wrap the output. For no wrapping, use the value 0 (the default). This option currently only affects the commonmark, latex, and man renderers. .TP 12n .B \-\-hardbreaks Render soft breaks (newlines inside paragraphs in the CommonMark source) as hard line breaks in the target format. If this option is specified, hard wrapping is disabled for CommonMark output, regardless of the value given with \-\-width. .TP 12n .B \-\-nobreaks Render soft breaks as spaces. If this option is specified, hard wrapping is disabled for all output formats, regardless of the value given with \-\-width. .TP 12n .B \-\-sourcepos Include source position attribute. .TP 12n .B \-\-normalize Consolidate adjacent text nodes. .TP 12n .B \-\-extension, \-e \f[I]EXTENSION_NAME\f[] Specify an extension name to use. .TP 12n .B \-\-list\-extensions List available extensions and quit. .TP 12n .B \-\-validate-utf8 Validate UTF-8, replacing illegal sequences with U+FFFD. .TP 12n .B \-\-smart Use smart punctuation. Straight double and single quotes will be rendered as curly quotes, depending on their position. \f[C]\-\-\f[] will be rendered as an en-dash. \f[C]\-\-\-\f[] will be rendered as an em-dash. \f[C]...\f[] will be rendered as ellipses. .TP 12n .B \-\-safe Do not render raw HTML or potentially dangerous URLs. (Raw HTML is replaced by a placeholder comment; potentially dangerous URLs are replaced by empty strings.) Dangerous URLs are those that begin with `javascript:`, `vbscript:`, `file:`, or `data:` (except for `image/png`, `image/gif`, `image/jpeg`, or `image/webp` mime types). .TP 12n .B \-\-help Print usage information. .TP 12n .B \-\-version Print version. .SH "AUTHORS" John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. .SH "SEE ALSO" .PP CommonMark spec: \f[C]http://spec.commonmark.org\f[]. cmarkgfm/third_party/cmark/suppressions0000644000175000017500000000016414210444464020657 0ustar carstencarsten{ . Memcheck:Leak fun:malloc fun:__smakebuf fun:__srefill0 fun:__fread fun:fread fun:main } cmarkgfm/third_party/cmark/data/0000755000175000017500000000000014210444464017067 5ustar carstencarstencmarkgfm/third_party/cmark/data/CaseFolding.txt0000644000175000017500000023052114210444464022011 0ustar carstencarsten# CaseFolding-9.0.0.txt # Date: 2016-03-02, 18:54:54 GMT # © 2016 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see http://www.unicode.org/reports/tr44/ # # Case Folding Properties # # This file is a supplement to the UnicodeData file. # It provides a case folding mapping generated from the Unicode Character Database. # If all characters are mapped according to the full mapping below, then # case differences (according to UnicodeData.txt and SpecialCasing.txt) # are eliminated. # # The data supports both implementations that require simple case foldings # (where string lengths don't change), and implementations that allow full case folding # (where string lengths may grow). Note that where they can be supported, the # full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. # # All code points not listed in this file map to themselves. # # NOTE: case folding does not preserve normalization formats! # # For information on case folding, including how to have case folding # preserve normalization formats, see Section 3.13 Default Case Algorithms in # The Unicode Standard. # # ================================================================================ # Format # ================================================================================ # The entries in this file are in the following machine-readable format: # # ; ; ; # # # The status field is: # C: common case folding, common mappings shared by both simple and full mappings. # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. # S: simple case folding, mappings to single characters where different from F. # T: special case for uppercase I and dotted uppercase I # - For non-Turkic languages, this mapping is normally not used. # - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. # Note that the Turkic mappings do not maintain canonical equivalence without additional processing. # See the discussions of case mapping in the Unicode Standard for more information. # # Usage: # A. To do a simple case folding, use the mappings with status C + S. # B. To do a full case folding, use the mappings with status C + F. # # The mappings with status T can be used or omitted depending on the desired case-folding # behavior. (The default option is to exclude them.) # # ================================================================= # Property: Case_Folding # All code points not explicitly listed for Case_Folding # have the value C for the status field, and the code point itself for the mapping field. # ================================================================= 0041; C; 0061; # LATIN CAPITAL LETTER A 0042; C; 0062; # LATIN CAPITAL LETTER B 0043; C; 0063; # LATIN CAPITAL LETTER C 0044; C; 0064; # LATIN CAPITAL LETTER D 0045; C; 0065; # LATIN CAPITAL LETTER E 0046; C; 0066; # LATIN CAPITAL LETTER F 0047; C; 0067; # LATIN CAPITAL LETTER G 0048; C; 0068; # LATIN CAPITAL LETTER H 0049; C; 0069; # LATIN CAPITAL LETTER I 0049; T; 0131; # LATIN CAPITAL LETTER I 004A; C; 006A; # LATIN CAPITAL LETTER J 004B; C; 006B; # LATIN CAPITAL LETTER K 004C; C; 006C; # LATIN CAPITAL LETTER L 004D; C; 006D; # LATIN CAPITAL LETTER M 004E; C; 006E; # LATIN CAPITAL LETTER N 004F; C; 006F; # LATIN CAPITAL LETTER O 0050; C; 0070; # LATIN CAPITAL LETTER P 0051; C; 0071; # LATIN CAPITAL LETTER Q 0052; C; 0072; # LATIN CAPITAL LETTER R 0053; C; 0073; # LATIN CAPITAL LETTER S 0054; C; 0074; # LATIN CAPITAL LETTER T 0055; C; 0075; # LATIN CAPITAL LETTER U 0056; C; 0076; # LATIN CAPITAL LETTER V 0057; C; 0077; # LATIN CAPITAL LETTER W 0058; C; 0078; # LATIN CAPITAL LETTER X 0059; C; 0079; # LATIN CAPITAL LETTER Y 005A; C; 007A; # LATIN CAPITAL LETTER Z 00B5; C; 03BC; # MICRO SIGN 00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE 00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE 00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE 00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS 00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE 00C6; C; 00E6; # LATIN CAPITAL LETTER AE 00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA 00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE 00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE 00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS 00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE 00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE 00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS 00D0; C; 00F0; # LATIN CAPITAL LETTER ETH 00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE 00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE 00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE 00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE 00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS 00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE 00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE 00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE 00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS 00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE 00DE; C; 00FE; # LATIN CAPITAL LETTER THORN 00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S 0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON 0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE 0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK 0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE 0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX 010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE 010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON 010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON 0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE 0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON 0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE 0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE 0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK 011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON 011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX 011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE 0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE 0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA 0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX 0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE 0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE 012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON 012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE 012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE 0132; C; 0133; # LATIN CAPITAL LIGATURE IJ 0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX 0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA 0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE 013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA 013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON 013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT 0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE 0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE 0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA 0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON 0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 014A; C; 014B; # LATIN CAPITAL LETTER ENG 014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON 014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE 0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 0152; C; 0153; # LATIN CAPITAL LIGATURE OE 0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE 0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA 0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON 015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE 015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX 015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA 0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON 0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA 0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON 0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE 0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE 016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON 016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE 016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE 0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK 0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX 0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX 0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS 0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE 017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE 017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON 017F; C; 0073; # LATIN SMALL LETTER LONG S 0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK 0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR 0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX 0186; C; 0254; # LATIN CAPITAL LETTER OPEN O 0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK 0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D 018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK 018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR 018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E 018F; C; 0259; # LATIN CAPITAL LETTER SCHWA 0190; C; 025B; # LATIN CAPITAL LETTER OPEN E 0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK 0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK 0194; C; 0263; # LATIN CAPITAL LETTER GAMMA 0196; C; 0269; # LATIN CAPITAL LETTER IOTA 0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE 0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK 019C; C; 026F; # LATIN CAPITAL LETTER TURNED M 019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK 019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE 01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN 01A2; C; 01A3; # LATIN CAPITAL LETTER OI 01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK 01A6; C; 0280; # LATIN LETTER YR 01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO 01A9; C; 0283; # LATIN CAPITAL LETTER ESH 01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK 01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK 01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN 01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON 01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK 01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK 01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE 01B7; C; 0292; # LATIN CAPITAL LETTER EZH 01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED 01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE 01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON 01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON 01C7; C; 01C9; # LATIN CAPITAL LETTER LJ 01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J 01CA; C; 01CC; # LATIN CAPITAL LETTER NJ 01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J 01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON 01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON 01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON 01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON 01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON 01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE 01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON 01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE 01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON 01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON 01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON 01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE 01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON 01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON 01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK 01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON 01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON 01F1; C; 01F3; # LATIN CAPITAL LETTER DZ 01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z 01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE 01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR 01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN 01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE 01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE 01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE 01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE 0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE 0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE 0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE 0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE 0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE 020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE 020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE 020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE 0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE 0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE 0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE 0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE 0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW 021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW 021C; C; 021D; # LATIN CAPITAL LETTER YOGH 021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON 0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG 0222; C; 0223; # LATIN CAPITAL LETTER OU 0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK 0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE 0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA 022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON 022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON 022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE 0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON 0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON 023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE 023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE 023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR 023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE 0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP 0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE 0244; C; 0289; # LATIN CAPITAL LETTER U BAR 0245; C; 028C; # LATIN CAPITAL LETTER TURNED V 0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE 0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE 024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL 024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE 024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE 0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI 0370; C; 0371; # GREEK CAPITAL LETTER HETA 0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI 0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA 037F; C; 03F3; # GREEK CAPITAL LETTER YOT 0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS 0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS 0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS 038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS 038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS 038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS 038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS 0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA 0392; C; 03B2; # GREEK CAPITAL LETTER BETA 0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA 0394; C; 03B4; # GREEK CAPITAL LETTER DELTA 0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON 0396; C; 03B6; # GREEK CAPITAL LETTER ZETA 0397; C; 03B7; # GREEK CAPITAL LETTER ETA 0398; C; 03B8; # GREEK CAPITAL LETTER THETA 0399; C; 03B9; # GREEK CAPITAL LETTER IOTA 039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA 039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA 039C; C; 03BC; # GREEK CAPITAL LETTER MU 039D; C; 03BD; # GREEK CAPITAL LETTER NU 039E; C; 03BE; # GREEK CAPITAL LETTER XI 039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON 03A0; C; 03C0; # GREEK CAPITAL LETTER PI 03A1; C; 03C1; # GREEK CAPITAL LETTER RHO 03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA 03A4; C; 03C4; # GREEK CAPITAL LETTER TAU 03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON 03A6; C; 03C6; # GREEK CAPITAL LETTER PHI 03A7; C; 03C7; # GREEK CAPITAL LETTER CHI 03A8; C; 03C8; # GREEK CAPITAL LETTER PSI 03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA 03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA 03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA 03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA 03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL 03D0; C; 03B2; # GREEK BETA SYMBOL 03D1; C; 03B8; # GREEK THETA SYMBOL 03D5; C; 03C6; # GREEK PHI SYMBOL 03D6; C; 03C0; # GREEK PI SYMBOL 03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA 03DA; C; 03DB; # GREEK LETTER STIGMA 03DC; C; 03DD; # GREEK LETTER DIGAMMA 03DE; C; 03DF; # GREEK LETTER KOPPA 03E0; C; 03E1; # GREEK LETTER SAMPI 03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI 03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI 03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI 03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI 03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA 03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA 03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI 03F0; C; 03BA; # GREEK KAPPA SYMBOL 03F1; C; 03C1; # GREEK RHO SYMBOL 03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL 03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL 03F7; C; 03F8; # GREEK CAPITAL LETTER SHO 03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL 03FA; C; 03FB; # GREEK CAPITAL LETTER SAN 03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL 03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL 03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL 0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE 0401; C; 0451; # CYRILLIC CAPITAL LETTER IO 0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE 0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE 0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE 0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 0407; C; 0457; # CYRILLIC CAPITAL LETTER YI 0408; C; 0458; # CYRILLIC CAPITAL LETTER JE 0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE 040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE 040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE 040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE 040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE 040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U 040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE 0410; C; 0430; # CYRILLIC CAPITAL LETTER A 0411; C; 0431; # CYRILLIC CAPITAL LETTER BE 0412; C; 0432; # CYRILLIC CAPITAL LETTER VE 0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE 0414; C; 0434; # CYRILLIC CAPITAL LETTER DE 0415; C; 0435; # CYRILLIC CAPITAL LETTER IE 0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE 0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE 0418; C; 0438; # CYRILLIC CAPITAL LETTER I 0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I 041A; C; 043A; # CYRILLIC CAPITAL LETTER KA 041B; C; 043B; # CYRILLIC CAPITAL LETTER EL 041C; C; 043C; # CYRILLIC CAPITAL LETTER EM 041D; C; 043D; # CYRILLIC CAPITAL LETTER EN 041E; C; 043E; # CYRILLIC CAPITAL LETTER O 041F; C; 043F; # CYRILLIC CAPITAL LETTER PE 0420; C; 0440; # CYRILLIC CAPITAL LETTER ER 0421; C; 0441; # CYRILLIC CAPITAL LETTER ES 0422; C; 0442; # CYRILLIC CAPITAL LETTER TE 0423; C; 0443; # CYRILLIC CAPITAL LETTER U 0424; C; 0444; # CYRILLIC CAPITAL LETTER EF 0425; C; 0445; # CYRILLIC CAPITAL LETTER HA 0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE 0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE 0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA 0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA 042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN 042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU 042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN 042D; C; 044D; # CYRILLIC CAPITAL LETTER E 042E; C; 044E; # CYRILLIC CAPITAL LETTER YU 042F; C; 044F; # CYRILLIC CAPITAL LETTER YA 0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA 0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT 0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E 0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS 0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS 046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS 046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS 046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI 0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI 0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA 0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA 0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT 0478; C; 0479; # CYRILLIC CAPITAL LETTER UK 047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA 047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO 047E; C; 047F; # CYRILLIC CAPITAL LETTER OT 0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA 048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL 048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN 048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK 0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN 0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE 0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK 0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER 0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER 049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER 049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE 049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE 04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA 04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER 04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE 04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK 04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA 04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER 04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER 04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U 04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE 04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER 04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE 04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER 04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE 04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA 04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE 04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER 04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA 04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE 04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK 04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL 04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK 04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL 04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE 04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL 04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE 04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS 04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE 04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE 04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA 04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS 04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS 04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS 04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE 04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON 04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS 04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS 04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O 04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS 04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS 04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON 04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS 04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE 04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS 04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER 04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS 04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK 04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK 04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE 0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE 0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE 0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE 0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE 0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE 050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE 050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE 050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE 0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE 0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK 0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA 0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA 0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE 051A; C; 051B; # CYRILLIC CAPITAL LETTER QA 051C; C; 051D; # CYRILLIC CAPITAL LETTER WE 051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA 0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK 0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK 0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER 0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK 052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE 052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE 052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER 0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB 0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN 0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM 0534; C; 0564; # ARMENIAN CAPITAL LETTER DA 0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH 0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA 0537; C; 0567; # ARMENIAN CAPITAL LETTER EH 0538; C; 0568; # ARMENIAN CAPITAL LETTER ET 0539; C; 0569; # ARMENIAN CAPITAL LETTER TO 053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE 053B; C; 056B; # ARMENIAN CAPITAL LETTER INI 053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN 053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH 053E; C; 056E; # ARMENIAN CAPITAL LETTER CA 053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN 0540; C; 0570; # ARMENIAN CAPITAL LETTER HO 0541; C; 0571; # ARMENIAN CAPITAL LETTER JA 0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD 0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH 0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN 0545; C; 0575; # ARMENIAN CAPITAL LETTER YI 0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW 0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA 0548; C; 0578; # ARMENIAN CAPITAL LETTER VO 0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA 054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH 054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH 054C; C; 057C; # ARMENIAN CAPITAL LETTER RA 054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH 054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW 054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN 0550; C; 0580; # ARMENIAN CAPITAL LETTER REH 0551; C; 0581; # ARMENIAN CAPITAL LETTER CO 0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN 0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR 0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH 0555; C; 0585; # ARMENIAN CAPITAL LETTER OH 0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH 0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN 10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN 10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN 10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN 10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON 10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN 10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN 10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN 10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN 10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN 10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN 10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS 10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN 10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR 10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON 10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR 10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR 10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE 10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN 10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR 10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN 10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR 10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR 10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN 10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR 10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN 10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN 10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN 10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL 10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL 10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR 10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN 10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN 10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE 10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE 10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE 10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE 10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR 10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE 10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN 10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN 13F8; C; 13F0; # CHEROKEE SMALL LETTER YE 13F9; C; 13F1; # CHEROKEE SMALL LETTER YI 13FA; C; 13F2; # CHEROKEE SMALL LETTER YO 13FB; C; 13F3; # CHEROKEE SMALL LETTER YU 13FC; C; 13F4; # CHEROKEE SMALL LETTER YV 13FD; C; 13F5; # CHEROKEE SMALL LETTER MV 1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE 1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE 1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O 1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES 1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE 1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE 1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN 1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT 1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW 1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW 1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE 1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE 1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW 1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW 1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA 1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW 1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE 1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE 1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW 1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW 1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE 1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE 1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON 1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE 1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW 1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS 1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA 1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW 1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW 1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE 1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE 1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW 1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW 1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW 1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON 1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW 1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW 1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE 1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE 1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW 1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE 1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW 1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW 1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW 1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE 1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS 1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE 1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE 1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE 1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE 1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE 1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW 1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON 1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW 1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE 1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW 1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE 1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE 1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE 1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE 1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW 1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW 1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW 1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW 1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW 1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW 1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE 1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS 1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE 1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW 1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE 1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE 1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS 1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE 1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW 1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE 1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS 1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE 1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX 1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW 1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW 1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW 1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS 1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE 1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE 1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING 1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE 1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S 1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S 1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW 1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE 1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE 1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE 1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE 1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE 1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW 1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE 1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE 1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE 1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE 1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW 1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW 1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE 1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE 1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE 1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE 1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE 1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE 1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW 1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE 1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW 1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW 1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE 1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE 1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE 1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE 1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE 1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW 1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE 1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE 1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE 1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE 1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW 1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW 1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE 1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE 1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE 1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE 1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE 1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW 1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE 1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW 1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE 1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE 1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL 1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V 1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP 1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI 1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA 1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA 1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA 1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA 1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA 1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI 1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI 1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI 1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA 1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA 1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA 1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA 1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI 1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA 1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA 1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA 1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA 1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA 1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI 1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI 1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI 1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA 1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA 1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA 1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA 1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA 1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI 1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI 1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI 1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA 1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA 1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA 1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA 1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA 1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI 1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA 1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA 1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI 1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA 1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA 1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA 1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI 1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI 1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA 1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA 1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA 1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA 1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA 1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI 1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI 1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI 1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI 1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI 1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI 1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI 1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI 1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI 1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI 1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI 1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI 1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY 1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON 1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA 1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA 1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 1FBE; C; 03B9; # GREEK PROSGEGRAMMENI 1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI 1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI 1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI 1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI 1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA 1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA 1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA 1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA 1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI 1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY 1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON 1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA 1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA 1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI 1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI 1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY 1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON 1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA 1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA 1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA 1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI 1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI 1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI 1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI 1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA 1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA 1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA 1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA 1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2126; C; 03C9; # OHM SIGN 212A; C; 006B; # KELVIN SIGN 212B; C; 00E5; # ANGSTROM SIGN 2132; C; 214E; # TURNED CAPITAL F 2160; C; 2170; # ROMAN NUMERAL ONE 2161; C; 2171; # ROMAN NUMERAL TWO 2162; C; 2172; # ROMAN NUMERAL THREE 2163; C; 2173; # ROMAN NUMERAL FOUR 2164; C; 2174; # ROMAN NUMERAL FIVE 2165; C; 2175; # ROMAN NUMERAL SIX 2166; C; 2176; # ROMAN NUMERAL SEVEN 2167; C; 2177; # ROMAN NUMERAL EIGHT 2168; C; 2178; # ROMAN NUMERAL NINE 2169; C; 2179; # ROMAN NUMERAL TEN 216A; C; 217A; # ROMAN NUMERAL ELEVEN 216B; C; 217B; # ROMAN NUMERAL TWELVE 216C; C; 217C; # ROMAN NUMERAL FIFTY 216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED 216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED 216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND 2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED 24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A 24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B 24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C 24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D 24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E 24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F 24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G 24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H 24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I 24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J 24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K 24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L 24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M 24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N 24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O 24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P 24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q 24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R 24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S 24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T 24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U 24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V 24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W 24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X 24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y 24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z 2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU 2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY 2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE 2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI 2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO 2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU 2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE 2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO 2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA 2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE 2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE 2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I 2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI 2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO 2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE 2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE 2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI 2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU 2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI 2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI 2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO 2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO 2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU 2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU 2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU 2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU 2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE 2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA 2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI 2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI 2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA 2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU 2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI 2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI 2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA 2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU 2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS 2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL 2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO 2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS 2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS 2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS 2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA 2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA 2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC 2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A 2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR 2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE 2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE 2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL 2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER 2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER 2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER 2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA 2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK 2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A 2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA 2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK 2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H 2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL 2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL 2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA 2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA 2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA 2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA 2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE 2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU 2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA 2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE 2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE 2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA 2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA 2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA 2C98; C; 2C99; # COPTIC CAPITAL LETTER MI 2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI 2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI 2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O 2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI 2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO 2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA 2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU 2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA 2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI 2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI 2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI 2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU 2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF 2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN 2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE 2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA 2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI 2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI 2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU 2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI 2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI 2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI 2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH 2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI 2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI 2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI 2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA 2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA 2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI 2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT 2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA 2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA 2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA 2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA 2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI 2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI 2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA 2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O A680; C; A681; # CYRILLIC CAPITAL LETTER DWE A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE A694; C; A695; # CYRILLIC CAPITAL LETTER HWE A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN A726; C; A727; # LATIN CAPITAL LETTER HENG A728; C; A729; # LATIN CAPITAL LETTER TZ A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA A732; C; A733; # LATIN CAPITAL LETTER AA A734; C; A735; # LATIN CAPITAL LETTER AO A736; C; A737; # LATIN CAPITAL LETTER AU A738; C; A739; # LATIN CAPITAL LETTER AV A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR A73C; C; A73D; # LATIN CAPITAL LETTER AY A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE A746; C; A747; # LATIN CAPITAL LETTER BROKEN L A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP A74E; C; A74F; # LATIN CAPITAL LETTER OO A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE A760; C; A761; # LATIN CAPITAL LETTER VY A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER A768; C; A769; # LATIN CAPITAL LETTER VEND A76A; C; A76B; # LATIN CAPITAL LETTER ET A76C; C; A76D; # LATIN CAPITAL LETTER IS A76E; C; A76F; # LATIN CAPITAL LETTER CON A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G A780; C; A781; # LATIN CAPITAL LETTER TURNED L A782; C; A783; # LATIN CAPITAL LETTER INSULAR R A784; C; A785; # LATIN CAPITAL LETTER INSULAR S A786; C; A787; # LATIN CAPITAL LETTER INSULAR T A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL A7B3; C; AB53; # LATIN CAPITAL LETTER CHI A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA AB70; C; 13A0; # CHEROKEE SMALL LETTER A AB71; C; 13A1; # CHEROKEE SMALL LETTER E AB72; C; 13A2; # CHEROKEE SMALL LETTER I AB73; C; 13A3; # CHEROKEE SMALL LETTER O AB74; C; 13A4; # CHEROKEE SMALL LETTER U AB75; C; 13A5; # CHEROKEE SMALL LETTER V AB76; C; 13A6; # CHEROKEE SMALL LETTER GA AB77; C; 13A7; # CHEROKEE SMALL LETTER KA AB78; C; 13A8; # CHEROKEE SMALL LETTER GE AB79; C; 13A9; # CHEROKEE SMALL LETTER GI AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI AB80; C; 13B0; # CHEROKEE SMALL LETTER HO AB81; C; 13B1; # CHEROKEE SMALL LETTER HU AB82; C; 13B2; # CHEROKEE SMALL LETTER HV AB83; C; 13B3; # CHEROKEE SMALL LETTER LA AB84; C; 13B4; # CHEROKEE SMALL LETTER LE AB85; C; 13B5; # CHEROKEE SMALL LETTER LI AB86; C; 13B6; # CHEROKEE SMALL LETTER LO AB87; C; 13B7; # CHEROKEE SMALL LETTER LU AB88; C; 13B8; # CHEROKEE SMALL LETTER LV AB89; C; 13B9; # CHEROKEE SMALL LETTER MA AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH AB91; C; 13C1; # CHEROKEE SMALL LETTER NE AB92; C; 13C2; # CHEROKEE SMALL LETTER NI AB93; C; 13C3; # CHEROKEE SMALL LETTER NO AB94; C; 13C4; # CHEROKEE SMALL LETTER NU AB95; C; 13C5; # CHEROKEE SMALL LETTER NV AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA AB9D; C; 13CD; # CHEROKEE SMALL LETTER S AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 10400; C; 10428; # DESERET CAPITAL LETTER LONG I 10401; C; 10429; # DESERET CAPITAL LETTER LONG E 10402; C; 1042A; # DESERET CAPITAL LETTER LONG A 10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH 10404; C; 1042C; # DESERET CAPITAL LETTER LONG O 10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO 10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I 10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E 10408; C; 10430; # DESERET CAPITAL LETTER SHORT A 10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH 1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O 1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO 1040C; C; 10434; # DESERET CAPITAL LETTER AY 1040D; C; 10435; # DESERET CAPITAL LETTER OW 1040E; C; 10436; # DESERET CAPITAL LETTER WU 1040F; C; 10437; # DESERET CAPITAL LETTER YEE 10410; C; 10438; # DESERET CAPITAL LETTER H 10411; C; 10439; # DESERET CAPITAL LETTER PEE 10412; C; 1043A; # DESERET CAPITAL LETTER BEE 10413; C; 1043B; # DESERET CAPITAL LETTER TEE 10414; C; 1043C; # DESERET CAPITAL LETTER DEE 10415; C; 1043D; # DESERET CAPITAL LETTER CHEE 10416; C; 1043E; # DESERET CAPITAL LETTER JEE 10417; C; 1043F; # DESERET CAPITAL LETTER KAY 10418; C; 10440; # DESERET CAPITAL LETTER GAY 10419; C; 10441; # DESERET CAPITAL LETTER EF 1041A; C; 10442; # DESERET CAPITAL LETTER VEE 1041B; C; 10443; # DESERET CAPITAL LETTER ETH 1041C; C; 10444; # DESERET CAPITAL LETTER THEE 1041D; C; 10445; # DESERET CAPITAL LETTER ES 1041E; C; 10446; # DESERET CAPITAL LETTER ZEE 1041F; C; 10447; # DESERET CAPITAL LETTER ESH 10420; C; 10448; # DESERET CAPITAL LETTER ZHEE 10421; C; 10449; # DESERET CAPITAL LETTER ER 10422; C; 1044A; # DESERET CAPITAL LETTER EL 10423; C; 1044B; # DESERET CAPITAL LETTER EM 10424; C; 1044C; # DESERET CAPITAL LETTER EN 10425; C; 1044D; # DESERET CAPITAL LETTER ENG 10426; C; 1044E; # DESERET CAPITAL LETTER OI 10427; C; 1044F; # DESERET CAPITAL LETTER EW 104B0; C; 104D8; # OSAGE CAPITAL LETTER A 104B1; C; 104D9; # OSAGE CAPITAL LETTER AI 104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN 104B3; C; 104DB; # OSAGE CAPITAL LETTER AH 104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA 104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA 104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA 104B7; C; 104DF; # OSAGE CAPITAL LETTER E 104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN 104B9; C; 104E1; # OSAGE CAPITAL LETTER HA 104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA 104BB; C; 104E3; # OSAGE CAPITAL LETTER I 104BC; C; 104E4; # OSAGE CAPITAL LETTER KA 104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA 104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA 104BF; C; 104E7; # OSAGE CAPITAL LETTER LA 104C0; C; 104E8; # OSAGE CAPITAL LETTER MA 104C1; C; 104E9; # OSAGE CAPITAL LETTER NA 104C2; C; 104EA; # OSAGE CAPITAL LETTER O 104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN 104C4; C; 104EC; # OSAGE CAPITAL LETTER PA 104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA 104C6; C; 104EE; # OSAGE CAPITAL LETTER SA 104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA 104C8; C; 104F0; # OSAGE CAPITAL LETTER TA 104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA 104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA 104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA 104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA 104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA 104CE; C; 104F6; # OSAGE CAPITAL LETTER U 104CF; C; 104F7; # OSAGE CAPITAL LETTER WA 104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA 104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA 104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA 104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA 10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A 10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA 10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB 10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB 10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC 10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC 10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS 10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED 10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND 10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E 10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E 10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE 10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF 10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG 10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY 10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH 10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I 10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II 10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ 10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK 10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK 10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK 10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL 10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY 10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM 10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN 10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY 10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O 10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO 10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE 10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE 10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE 10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP 10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP 10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER 10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER 10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES 10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ 10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET 10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT 10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY 10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH 10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U 10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU 10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE 10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE 10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV 10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ 10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS 10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN 10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US 118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA 118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A 118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI 118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU 118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA 118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO 118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II 118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU 118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E 118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O 118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG 118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA 118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO 118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY 118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ 118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC 118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN 118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD 118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE 118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG 118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA 118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT 118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM 118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU 118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU 118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO 118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO 118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR 118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR 118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU 118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII 118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO 1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF 1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI 1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM 1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM 1E904; C; 1E926; # ADLAM CAPITAL LETTER BA 1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE 1E906; C; 1E928; # ADLAM CAPITAL LETTER PE 1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE 1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA 1E909; C; 1E92B; # ADLAM CAPITAL LETTER E 1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA 1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I 1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O 1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA 1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE 1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW 1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN 1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF 1E912; C; 1E934; # ADLAM CAPITAL LETTER YA 1E913; C; 1E935; # ADLAM CAPITAL LETTER U 1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM 1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI 1E916; C; 1E938; # ADLAM CAPITAL LETTER HA 1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF 1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA 1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA 1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU 1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA 1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA 1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA 1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE 1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL 1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO 1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA # # EOF cmarkgfm/third_party/cmark/toolchain-mingw32.cmake0000644000175000017500000000123014210444464022420 0ustar carstencarsten# the name of the target operating system SET(CMAKE_SYSTEM_NAME Windows) # which compilers to use for C and C++ SET(CMAKE_C_COMPILER i586-mingw32msvc-gcc) SET(CMAKE_CXX_COMPILER i586-mingw32msvc-g++) SET(CMAKE_RC_COMPILER i586-mingw32msvc-windres) # here is the target environment located SET(CMAKE_FIND_ROOT_PATH /usr/i586-mingw32msvc "${CMAKE_SOURCE_DIR}/windows") # adjust the default behaviour of the FIND_XYZ() commands: # search headers and libraries in the target environment, search # programs in the host environment set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) cmarkgfm/third_party/cmark/.travis.yml0000644000175000017500000000141114210444464020264 0ustar carstencarsten# Ensures that sudo is disabled, so that containerized builds are allowed sudo: false os: - linux - osx language: c compiler: - clang - gcc matrix: include: - os: linux compiler: gcc env: CMAKE_OPTIONS="-DCMARK_SHARED=OFF" addons: apt: # we need a more recent cmake than travis/linux provides (at least 2.8.9): sources: - kubuntu-backports - kalakris-cmake packages: - cmake - python3 - valgrind before_install: - | if [ ${TRAVIS_OS_NAME:-'linux'} = 'osx' ] then echo "Building without python3, to make sure that works." fi script: - (mkdir -p build && cd build && cmake $CMAKE_OPTIONS ..) - make test - | if [ ${TRAVIS_OS_NAME:-'linux'} = 'linux' ] then make leakcheck fi cmarkgfm/third_party/cmark/tools/0000755000175000017500000000000014210444464017316 5ustar carstencarstencmarkgfm/third_party/cmark/tools/appveyor-build.bat0000644000175000017500000000037514210444464022755 0ustar carstencarsten@echo off if "%MSVC_VERSION%" == "10" goto msvc10 call "C:\Program Files (x86)\Microsoft Visual Studio %MSVC_VERSION%.0\VC\vcvarsall.bat" amd64 goto build :msvc10 call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 :build nmake cmarkgfm/third_party/cmark/tools/make_entities_inc.py0000644000175000017500000000167514210444464023353 0ustar carstencarsten# Creates C data structures for binary lookup table of entities, # using python's html5 entity data. # Usage: python3 tools/make_entities_inc.py > src/entities.inc import html entities5 = html.entities.html5 # remove keys without semicolons. For some reason the list # has duplicates of a few things, like auml, one with and one # without a semicolon. entities = sorted([(k[:-1], entities5[k].encode('utf-8')) for k in entities5.keys() if k[-1] == ';']) # Print out the header: print("""/* Autogenerated by tools/make_headers_inc.py */ struct cmark_entity_node { unsigned char *entity; unsigned char bytes[8]; }; #define CMARK_ENTITY_MIN_LENGTH 2 #define CMARK_ENTITY_MAX_LENGTH 32""") print("#define CMARK_NUM_ENTITIES " + str(len(entities))); print("\nstatic const struct cmark_entity_node cmark_entities[] = {"); for (ent, bs) in entities: print('{(unsigned char*)"' + ent + '", {' + ', '.join(map(str, bs)) + ', 0}},') print("};") cmarkgfm/third_party/cmark/tools/Dockerfile0000644000175000017500000000147014210444464021312 0ustar carstencarstenFROM debian:jessie RUN apt-get update && apt-get install -y \ build-essential \ autoconf \ libtool \ git \ pkg-config \ gdb \ valgrind \ python3 \ wget \ clang \ man \ clang-format-3.5 \ && apt-get clean RUN wget http://lcamtuf.coredump.cx/afl/releases/afl-latest.tgz && \ tar xf afl-latest.tgz && \ cd afl-* && \ make install && \ cd .. && \ rm -rf afl-* RUN wget https://github.com/skvadrik/re2c/releases/download/0.15.3/re2c-0.15.3.tar.gz && \ tar xf re2c-0.15.3.tar.gz && \ cd re2c-* && \ ./configure && \ make install && \ cd .. && \ rm -rf re2c-* RUN wget https://cmake.org/files/v3.8/cmake-3.8.2.tar.gz && \ tar xf cmake-3.8.2.tar.gz && \ cd cmake-* && \ ./bootstrap && \ make install && \ cd .. && \ rm -rf cmake-* RUN apt-get update && apt-get install -y ninja-build cmarkgfm/third_party/cmark/tools/xml2md_gfm.xsl0000644000175000017500000000356614210444464022114 0ustar carstencarsten | | ---: | :--- | :---: | --- | | | ~~ ~~ cmarkgfm/third_party/cmark/tools/xml2md.xsl0000644000175000017500000002345414210444464021261 0ustar carstencarsten Unsupported element '' *** - . ) > > \. \) \ * * ** ** ! [ ]( " " ) \ cmarkgfm/third_party/cmark/tools/mkcasefold.pl0000755000175000017500000000101514210444464021763 0ustar carstencarstenbinmode STDOUT; print(" switch (c) {\n"); my $lastchar = ""; while () { if (/^[A-F0-9]/ and / [CF]; /) { my ($char, $type, $subst) = m/([A-F0-9]+); ([CF]); ([^;]+)/; if ($char eq $lastchar) { break; } my @subst = $subst =~ m/(\w+)/g; printf(" case 0x%s:\n", $char); foreach (@subst) { printf(" bufpush(0x%s);\n", $_); } printf(" break;\n"); $lastchar = $char; } } printf(" default:\n"); printf(" bufpush(c);\n"); print(" }\n"); cmarkgfm/third_party/cmark/changelog.txt0000644000175000017500000016232714210444464020661 0ustar carstencarsten[0.29.0.gfm.3] * Fixed heap memory corruption vulnerabiliy via integer overflow per https://github.com/github/cmark-gfm/security/advisories/GHSA-mc3g-88wq-6f4x [0.29.0.gfm.2] * Fixed issues with footnote rendering when used with the autolinker (#121), and when footnotes are adjacent (#139). * We now allow footnotes to be referenced from inside a footnote definition, we use the footnote label for the fnref href text when rendering html, and we insert multiple backrefs when a footnote has been referenced multiple times (#229, #230) * We added new data- attributes to footnote html rendering to make them easier to style (#234) [0.29.0.gfm.1] * Fixed denial of service bug in GFM's table extension per https://github.com/github/cmark-gfm/security/advisories/GHSA-7gc6-9qr5-hc85 [0.29.0] * Update spec to 0.29. * Make rendering safe by default (#239, #273). Adds `CMARK_OPT_UNSAFE` and make `CMARK_OPT_SAFE` a no-op (for API compatibility). The new default behavior is to suppress raw HTML and potentially dangerous links. The `CMARK_OPT_UNSAFE` option has to be set explicitly to prevent this. **NOTE:** This change will require modifications in bindings for cmark and in most libraries and programs that use cmark. Borrows heavily from @kivikakk's patch in github/cmark-gfm#123. * Add sourcepos info for inlines (Yuki Izumi). * Disallow more than 32 nested balanced parens in a link (Yuki Izumi). * Resolve link references before creating setext header. A setext header line after a link reference should not create a header, according to the spec. * commonmark renderer: improve escaping. URL-escape special characters when escape mode is URL, and not otherwise. Entity-escape control characters (< 0x20) in non-literal escape modes. * render: only emit actual newline when escape mode is LITERAL. For markdown content, e.g., in other contexts we want some kind of escaping, not a literal newline. * Update code span normalization to conform with spec change. * Allow empty `<>` link destination in reference link. * Remove leftover includes of `memory.h` (#290). * A link destination can't start with `<` unless it is an angle-bracket link that also ends with `>` (#289). (If your URL really starts with `<`, URL-escape it.) * Allow internal delimiter runs to match if both have lengths that are multiples of 3. See commonmark/commonmark#528. * Include `references.h` in `parser.h` (#287). * Fix `[link]()`. * Use hand-rolled scanner for thematic break (see #284). Keep track of the last position where a thematic break failed to match on a line, to avoid rescanning unnecessarily. * Rename `ends_with_blank_line` with `S_` prefix. * Add `CMARK_NODE__LAST_LINE_CHECKED` flag (#284). Use this to avoid unnecessary recursion in `ends_with_blank_line`. * In `ends_with_blank_line`, call `S_set_last_line_blank` to avoid unnecessary repetition (#284). Once we settle whether a list item ends in a blank line, we don't need to revisit this in considering parent list items. * Disallow unescaped `(` in parenthesized link title. * Copy line/col info straight from opener/closer (Ashe Connor). We can't rely on anything in `subj` since it's been modified while parsing the subject and could represent line info from a future line. This is simple and works. * `render.c`: reset `last_breakable` after cr. Fixes jgm/pandoc#5033. * Fix a typo in `houdini_href_e.c` (Felix Yan). * commonmark writer: use `~~~` fences if info string contains backtick. This is needed for round-trip tests. * Update scanners for new info string rules. * Add XSLT stylesheet to convert cmark XML back to Commonmark (Nick Wellnhofer, #264). Initial version of an XSLT stylesheet that converts the XML format produced by `cmark -t xml` back to Commonmark. * Check for whitespace before reference title (#263). * Bump CMake to version 3 (Jonathan Müller). * Build: Remove deprecated call to `add_compiler_export_flags()` (Jonathan Müller). It is deprecated in CMake 3.0, the replacement is to set the `CXX_VISIBILITY_PRESET` (or in our case `C_VISIBILITY_PRESET`) and `VISIBILITY_INLINES_HIDDEN` properties of the target. We're already setting them by setting the CMake variables anyway, so the call can be removed. * Build: only attempt to install MSVC system libraries on Windows (Saleem Abdulrasool). Newer versions of CMake attempt to query the system for information about the VS 2017 installation. Unfortunately, this query fails on non-Windows systems when cross-compiling: `cmake_host_system_information does not recognize VS_15_DIR`. CMake will not find these system libraries on non-Windows hosts anyways, and we were silencing the warnings, so simply omit the installation when cross-compiling to Windows. * Simplify code normalization, in line with spec change. * Implement code span spec changes. These affect both parsing and writing commonmark. * Add link parsing corner cases to regressions (Ashe Connor). * Add `xml:space="preserve"` in XML output when appropriate (Nguyễn Thái Ngọc Duy). (For text, code, code_block, html_inline and html_block tags.) * Removed meta from list of block tags. Added regression test. See commonmark/CommonMark#527. * `entity_tests.py` - omit noisy success output. * `pathological_tests.py`: make tests run faster. Commented out the (already ignored) "many references" test, which times out. Reduced the iterations for a couple other tests. * `pathological_tests.py`: added test for deeply nested lists. * Optimize `S_find_first_nonspace`. We were needlessly redoing things we'd already done. Now we skip the work if the first nonspace is greater than the current offset. This fixes pathological slowdown with deeply nested lists (#255). For N = 3000, the time goes from over 17s to about 0.7s. Thanks to Martin Mitas for diagnosing the problem. * Allow spaces in link destination delimited with pointy brackets. * Adjust max length of decimal/numeric entities. See commonmark/CommonMark#487. * Fix inline raw HTML parsing. This fixes a recently added failing spec test case. Previously spaces were being allowed in unquoted attribute values; no we forbid them. * Don't allow list markers to be indented >= 4 spaces. See commonmark/CommonMark#497. * Check for empty buffer when rendering (Phil Turnbull). For empty documents, `->size` is zero so `renderer.buffer->ptr[renderer.buffer->size - 1]` will cause an out-of-bounds read. Empty buffers always point to the global `cmark_strbuf__initbuf` buffer so we read `cmark_strbuf__initbuf[-1]`. * Also run API tests with `CMARK_SHARED=OFF` (Nick Wellnhofer). * Rename roundtrip and entity tests (Nick Wellnhofer). Rename the tests to reflect that they use the library, not the executable. * Generate export header for static-only build (#247, Nick Wellnhofer). * Fuzz width parameter too (Phil Turnbull). Allow the `width` parameter to be generated too so we get better fuzz-coverage. * Don't discard empty fuzz test-cases (Phil Turnbull). We currently discard fuzz test-cases that are empty but empty inputs are valid markdown. This improves the fuzzing coverage slightly. * Fixed exit code for pathological tests. * Add allowed failures to `pathological_tests.py`. This allows us to include tests that we don't yet know how to pass. * Add timeout to `pathological_tests.py`. Tests must complete in 8 seconds or are errors. * Add more pathological tests (Martin Mitas). These tests target the issues #214, #218, #220. * Use pledge(2) on OpenBSD (Ashe Connor). * Update the Racket wrapper (Eli Barzilay). * Makefile: For afl target, don't build tests. [0.28.3.gfm.20] * Add tasklist extension implementation (Watson1978, #94). [0.28.3.gfm.19] * Prevent out-of-bound memory access in strikethrough matcher (Xavier Décoret, #124). * Limit recursion in autolink extension (Xavier Décoret, #125). * Add plaintext rendering for footnotes (Xavier Décoret, #126). [0.28.3.gfm.18] * Match strikethrough more strictly (#120). * Default to safe operation (#123). [0.28.3.gfm.17] * Allow extension to provide opaque allocation function (Nicolás Ojeda Bär, #89). * Upstream optimisations and fixes. * Extensions can add custom XML attributes (#116). * Support for GFM extensions in cmark XML to CommonMark XSLT converter (Maëlle Salmon, #117). [0.28.3.gfm.16] * Do not percent-encode tildes (~) in HTML attribute values (#110). * Fix footnote references in tables (#112). [0.28.3.gfm.15] * Escape non-strikethrough tildes (~) in commonmark output (John MacFarlane, #106). * Cosmetic fix to table HTML output (John MacFarlane, #105). * Use two tildes for strikethrough CommonMark output (John MacFarlane, #104). * Normalised header and define names (#109). [0.28.3.gfm.14] * Added a plaintext renderer for strikethrough nodes. [0.28.3.gfm.13] * Footnote rendering bugfix (Michael Camilleri, #90). * Debian packaging (Joachim Nilsson, #97). * Add CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE for redcarpet compatibility. * Add CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES (FUJI Goro, #86, #87). * Fix pathological nested list parsing (Phil Turnbull, #95). * Expose more of the extension APIs (Minghao Liu, #96). * Add python example which uses extensions (Greg Stein, #102). * Add CMARK_OPT_FULL_INFO_STRING (Mike Kavouras, #103). [0.28.3.gfm.12] * Various security and bug fixes. [0.28.3] * Include GNUInstallDirs in src/CMakeLists.txt (Nick Wellnhofer, #240). This fixes build problems on some cmake versions (#241). [0.28.2] * Fixed regression in install dest for static library (#238). Due to a mistake, 0.28.1 installed libcmark.a into include/. [0.28.1] * `--smart`: open quote can never occur right after `]` or `)` (#227). * Fix quadratic behavior in `finalize` (Vicent Marti). * Don't use `CMAKE_INSTALL_LIBDIR` to create `libcmark.pc` (#236). This wasn't getting set in processing `libcmark.pc.in`, and we were getting the wrong entry in `libcmark.pc`. The new approach sets an internal `libdir` variable to `lib${LIB_SUFFIX}`. This variable is used both to set the install destination and in the libcmark.pc.in template. * Update README.md, replace `make astyle` with `make format` (Nguyễn Thái Ngọc Duy). [0.28.0.gfm.11] * Do not output empty `` in table extension. [0.28.0.gfm.10] * Fix denial of service parsing references. [0.28.0.gfm.9] * Fix denial of service parsing nested links (#49). [0.28.0.gfm.8] * Fix bug where autolink would cause `:` to be skipped in emphasis processing. [0.28.0.gfm.7] * Strikethrough characters do not disturb regular emphasis processing. [0.28.0.gfm.6] * Fix inline sourcepos info when inlines span multiple lines, and in ATX headings. [0.28.0.gfm.5] * Latest spec. * Fix a typo in the spec (John Gardner). * Fix quadratic behavior in reference lookups. * Add `core_extensions_ensure_registered`. * Add sourcepos information for inlines. [0.28.0] * Update spec. * Use unsigned integer when shifting (Phil Turnbull). Avoids a UBSAN warning which can be triggered when handling a long sequence of backticks. * Avoid memcpy'ing NULL pointers (Phil Turnbull). Avoids a UBSAN warning when link title is empty string. The length of the memcpy is zero so the NULL pointer is not dereferenced but it is still undefined behaviour. * DeMorgan simplification of some tests in emphasis parser. This also brings the code into closer alignment with the wording of the spec (see jgm/CommonMark#467). * Fixed undefined shift in commonmark writer (#211). Found by google/oss-fuzz: . * latex writer: fix memory overflow (#210). We got an array overflow in enumerated lists nested more than 10 deep with start number =/= 1. This commit also ensures that we don't try to set `enum_` counters that aren't defined by LaTeX (generally up to enumv). Found by google/oss-fuzz: . * Check for NULL pointer in get_link_type (Phil Turnbull). `echo '[](xx:)' | ./build/src/cmark -t latex` gave a segfault. * Move fuzzing dictionary into single file (Phil Turnbull). This allows AFL and libFuzzer to use the same dictionary * Reset bytes after UTF8 proc (Yuki Izumi, #206). * Don't scan past an EOL (Yuki Izumi). The existing negated character classes (`[^…]`) are careful to always include` \x00` in the characters excluded, but these `.` catch-alls can scan right past the terminating NUL placed at the end of the buffer by `_scan_at`. As such, buffer overruns can occur. Also, don't scan past a newline in HTML block end scanners. * Document cases where `get_` functions return `NULL` (#155). E.g. `cmark_node_get_url` on a non-link or image. * Properly handle backslashes in link destinations (#192). Only ascii punctuation characters are escapable, per the spec. * Fixed `cmark_node_get_list_start` to return 0 for bullet lists, as documented (#202). * Use `CMARK_NO_DELIM` for bullet lists (#201). * Fixed code for freeing delimiter stack (#189). * Removed abort outside of conditional (typo). * Removed coercion in error message when aborting from buffer. * Print message to stderr when we abort due to memory demands (#188). * `libcmark.pc`: use `CMAKE_INSTALL_LIBDIR` (#185, Jens Petersen). Needed for multilib distros like Fedora. * Fixed buffer overflow error in `S_parser_feed` (#184). The overflow could occur in the following condition: the buffer ends with `\r` and the next memory address contains `\n`. * Update emphasis parsing for spec change. Strong now goes inside Emph rather than the reverse, when both scopes are possible. The code is much simpler. This also avoids a spec inconsistency that cmark had previously: `***hi***` became Strong (Emph "hi")) but `***hi****` became Emph (Strong "hi")) "*" * Fixes for the LaTeX renderer (#182, Doeme) + Don't double-output the link in latex-rendering. + Prevent ligatures in dashes sensibly when rendering latex. `\-` is a hyphenation, so it doesn't get displayed at all. * Added a test for NULL when freeing `subj->last_delim`. * Cleaned up setting of lower bounds for openers. We now use a much smaller array. * Fix #178, quadratic parsing bug. Add pathological test. * Slight improvement of clarity of logic in emph matching. * Fix "multiple of 3" determination in emph/strong parsing. We need to store the length of the original delimiter run, instead of using the length of the remaining delimiters after some have been subtracted. Test case: `a***b* c*`. Thanks to Raph Levin for reporting. * Correctly initialize chunk in S_process_line (Nick Wellnhofer, #170). The `alloc` member wasn't initialized. This also allows to add an assertion in `chunk_rtrim` which doesn't work for alloced chunks. * Added 'make newbench'. * `scanners.c` generated with re2c 0.16 (68K smaller!). * `scanners.re` - fixed warnings; use `*` for fallback. * Fixed some warnings in `scanners.re`. * Update CaseFolding to latest (Kevin Wojniak, #168). * Allow balanced nested parens in link destinations (Yuki Izumi, #166) * Allocate enough bytes for backticks array. * Inlines: Ensure that the delimiter stack is freed in subject. * Fixed pathological cases with backtick code spans: - Removed recursion in scan_to_closing_backticks - Added an array of pointers to potential backtick closers to subject - This array is used to avoid traversing the subject again when we've already seen all the potential backtick closers. - Added a max bound of 1000 for backtick code span delimiters. - This helps with pathological cases like: x x ` x `` x ``` x ```` ... - Added pathological test case. Thanks to Martin Mitáš for identifying the problem and for discussion of solutions. * Remove redundant cmake_minimum_required (#163, @kainjow). * Make shared and static libraries optional (Azamat H. Hackimov). Now you can enable/disable compilation and installation targets for shared and static libraries via `-DCMARK_SHARED=ON/OFF` and `-DCMARK_STATIC=ON/OFF`. * Added support for built-in `${LIB_SUFFIX}` feature (Azamat H. Hackimov). Replaced `${LIB_INSTALL_DIR}` option with built-in `${LIB_SUFFIX}` for installing for 32/64-bit systems. Normally, CMake will set `${LIB_SUFFIX}` automatically for required enviroment. If you have any issues with it, you can override this option with `-DLIB_SUFFIX=64` or `-DLIB_SUFFIX=""` during configuration. * Add Makefile target and harness to fuzz with libFuzzer (Phil Turnbull). This can be run locally with `make libFuzzer` but the harness will be integrated into oss-fuzz for large-scale fuzzing. * Advertise `--validate-utf8` in usage information (Nguyễn Thái Ngọc Duy). * Makefile: use warnings with re2c. * README: Add link to Python wrapper, prettify languages list (Pavlo Kapyshin). * README: Add link to cmark-scala (Tim Nieradzik, #196) [0.27.1.gfm.4] * Fix regression with nested parentheses in link targets (#48). [0.27.1.gfm.3] * Various undefined behavior issues fixed (#38, #39, #40). * Tag filter is case-insensitive (#43). [0.27.1.gfm.2] * Fix a number of bugs (reading past end of buffer, undefined behavior. * Add `cmark_syntax_extension_get_private()`. (Jonathan Müller) [0.27.1.gfm.1] * Add plaintext renderer. * Remove normalize option; we now always normalize the AST. * Add getters for table alignment. * `make install` also installs the extensions static/shared library. [0.27.1.gfm.0] * Add extensions: tagfilter, strikethrough, table, autolink. * Add arena memory implementation. * Add CMARK_OPT_GITHUB_PRE_LANG for fenced code blocks. * Skip UTF-8 BOM on input. [0.27.1] * Set policy for CMP0063 to avoid a warning (#162). Put set_policy under cmake version test. Otherwise we get errors in older versions of cmake. * Use VERSION_GREATER to clean up cmake version test. * Improve afl target. Use afl-clang by default. Set default for path. [0.27.0] * Update spec to 0.27. * Fix warnings building with MSVC on Windows (#165, Hugh Bellamy). * Fix `CMAKE_C_VISIBILITY_PRESET` for cmake versions greater than 1.8 (e.g. 3.6.2) (#162, Hugh Bellamy). This lets us build swift-cmark on Windows, using clang-cl. * Fix for non-matching entities (#161, Yuki Izumi). * Modified `print_delimiters` (commented out) so it compiles again. * `make format`: don't change order of includes. * Changed logic for null/eol checks (#160). + only check once for "not at end of line" + check for null before we check for newline characters (the previous patch would fail for NULL + CR) * Fix by not advancing past both `\0` and `\n` (Yuki Izumi). * Add test for NUL-LF sequence (Yuki Izumi). * Fix memory leak in list parsing (Yuki Izumi). * Use `cmark_mem` to free where used to alloc (Yuki Izumi). * Allow a shortcut link before a `(` (jgm/CommonMark#427). * Allow tabs after setext header line (jgm/commonmark.js#109). * Don't let URI schemes start with spaces. * Fixed h2..h6 HTML blocks (jgm/CommonMark#430). Added regression test. * Autolink scheme can contain digits (Gábor Csárdi). * Fix nullary function declarations in cmark.h (Nick Wellnhofer). Fixes strict prototypes warnings. * COPYING: Update file name and remove duplicate section and (Peter Eisentraut). * Fix typo (Pavlo Kapyshin). [0.26.1] * Removed unnecessary typedef that caused build failure on some platforms. * Use `$(MAKE)` in Makefile instead of hardcoded `make` (#146, Tobias Kortkamp). [0.26.0] * Implement spec changes for list items: - Empty list items cannot interrupt paragraphs. - Ordered lists cannot interrupt paragraphs unless they start with 1. - Removed "two blank lines break out of a list" feature. * Fix sourcepos for blockquotes (#142). * Fix sourcepos for atx headers (#141). * Fix ATX headers and thematic breaks to allow tabs as well as spaces. * Fix `chunk_set_cstr` with suffix of current string (#139, Nick Wellnhofer). It's possible that `cmark_chunk_set_cstr` is called with a substring (suffix) of the current string. Delay freeing of the chunk content to handle this case correctly. * Export targets on installation (Jonathan Müller). This allows using them in other cmake projects. * Fix cmake warning about CMP0048 (Jonathan Müller). * commonmark renderer: Ensure we don't have a blank line before a code block when it's the first thing in a list item. * Change parsing of strong/emph in response to spec changes. `process_emphasis` now gets better results in corner cases. The change is this: when considering matches between an interior delimiter run (one that can open and can close) and another delimiter run, we require that the sum of the lengths of the two delimiter runs mod 3 is not 0. * Ported Robin Stocker's changes to link parsing in jgm/commonmark#101. This uses a separate stack for brackets, instead of putting them on the delimiter stack. This avoids the need for looking through the delimiter stack for the next bracket. * `cmark_reference_lookup`: Return NULL if reference is null string. * Fix character type detection in `commonmark.c` (Nick Wellnhofer). Fixes test failures on Windows and undefined behavior. - Implement `cmark_isalpha`. - Check for ASCII character before implicit cast to char. - Use internal ctype functions in `commonmark.c`. * Better documentation of memory-freeing responsibilities. in `cmark.h` and its man page (#124). * Use library functions to insert nodes in emphasis/link processing. Previously we did this manually, which introduces many places where errors can creep in. * Correctly handle list marker followed only by spaces. Previously when a list marker was followed only by spaces, cmark expected the following content to be indented by the same number of spaces. But in this case we should treat the line just like a blank line and set list padding accordingly. * Fixed a number of issues relating to line wrapping. - Extend `CMARK_OPT_NOBREAKS` to all renderers and add `--nobreaks`. - Do not autowrap, regardless of width parameter, if `CMARK_OPT_NOBREAKS` is set. - Fixed `CMARK_OPT_HARDBREAKS` for LaTeX and man renderers. - Ensure that no auto-wrapping occurs if `CMARK_OPT_NOBREAKS` is enabled, or if output is CommonMark and `CMARK_OPT_HARDBREAKS` is enabled. * Set stdin to binary mode on Windows (Nick Wellnhofer, #113). This fixes EOLs when reading from stdin. * Add library option to render softbreaks as spaces (Pavlo Kapyshin). Note that the `NOBREAKS` option is HTML-only * renderer: `no_linebreaks` instead of `no_wrap`. We generally want this option to prohibit any breaking in things like headers (not just wraps, but softbreaks). * Coerce `realurllen` to `int`. This is an alternate solution for pull request #132, which introduced a new warning on the comparison (Benedict Cohen). * Remove unused variable `link_text` (Mathiew Duponchelle). * Improved safety checks in buffer (Vicent Marti). * Add new interface allowing specification of custom memory allocator for nodes (Vicent Marti). Added `cmark_node_new_with_mem`, `cmark_parser_new_with_mem`, `cmark_mem` to API. * Reduce storage size for nodes by using bit flags instead of separate booleans (Vicent Marti). * config: Add `SSIZE_T` compat for Win32 (Vicent Marti). * cmake: Global handler for OOM situations (Vicent Marti). * Add tests for memory exhaustion (Vicent Marti). * Document in man page and public header that one should use the same memory allocator for every node in a tree. * Fix ctypes in Python FFI calls (Nick Wellnhofer). This didn't cause problems so far because all types are 32-bit on 32-bit systems and arguments are passed in registers on x86-64. The wrong types could cause crashes on other platforms, though. * Remove spurious failures in roundtrip tests. In the commonmark writer we separate lists, and lists and indented code, using a dummy HTML comment. So in evaluating the round-trip tests, we now strip out these comments. We also normalize HTML to avoid issues having to do with line breaks. * Add 2016 to copyright (Kevin Burke). * Added `to_commonmark` in `test/cmark.py` (for round-trip tests). * `spec_test.py` - parameterize `do_test` with converter. * `spec_tests.py`: exit code is now sum of failures and errors. This ensures that a failing exit code will be given when there are errors, not just with failures. * Fixed round trip tests. Previously they actually ran `cmark` instead of the round-trip version, since there was a bug in setting the ROUNDTRIP variable (#131). * Added new `roundtrip_tests.py`. This replaces the old use of simple shell scripts. It is much faster, and more flexible. (We will be able to do custom normalization and skip certain tests.) * Fix tests under MinGW (Nick Wellnhofer). * Fix leak in `api_test` (Mathieu Duponchelle). * Makefile: have leakcheck stop on first error instead of going through all the formats and options and probably getting the same output. * Add regression tests (Nick Wellnhofer). [0.25.2] * Open files in binary mode (#113, Nick Wellnhofer). Now that cmark supports different line endings, files must be openend in binary mode on Windows. * Reset `partially_consumed_tab` on every new line (#114, Nick Wellnhofer). * Handle buffer split across a CRLF line ending (#117). Adds an internal field to the parser struct to keep track of `last_buffer_ended_with_cr`. Added test. [0.25.1] * Release with no code changes. cmark version was mistakenly set to 0.25.1 in the 0.25.0 release (#112), so this release just ensures that this will cause no confusion later. [0.25.0] * Fixed tabs in indentation (#101). This patch fixes S_advance_offset so that it doesn't gobble a tab character when advancing less than the width of a tab. * Added partially_consumed_tab to parser. This keeps track of when we have gotten partway through a tab when consuming initial indentation. * Simplified add_line (only need parser parameter). * Properly handle partially consumed tab. E.g. in - foo bar we should consume two spaces from the second tab, including two spaces in the code block. * Properly handle tabs with blockquotes and fenced blocks. * Fixed handling of tabs in lists. * Clarified logic in S_advance_offset. * Use an assertion to check for in-range html_block_type. It's a programming error if the type is out of range. * Refactored S_processLines to make the logic easier to understand, and added documentation (Mathieu Duponchelle). * Removed unnecessary check for empty string_content. * Factored out contains_inlines. * Moved the cmake minimum version to top line of CMakeLists.txt (tinysun212). * Fix ctype(3) usage on NetBSD (Kamil Rytarowski). We need to cast value passed to isspace(3) to unsigned char to explicitly prevent possibly undefined behavior. * Compile in plain C mode with MSVC 12.0 or newer (Nick Wellnhofer). Under MSVC, we used to compile in C++ mode to get some C99 features like mixing declarations and code. With newer MSVC versions, it's possible to build in plain C mode. * Switched from "inline" to "CMARK_INLINE" (Nick Wellnhofer). Newer MSVC versions support enough of C99 to be able to compile cmark in plain C mode. Only the "inline" keyword is still unsupported. We have to use "__inline" instead. * Added include guards to config.h * config.h.in - added compatibility snprintf, vsnprintf for MSVC. * Replaced sprintf with snprintf (Marco Benelli). * config.h: include stdio.h for _vscprintf etc. * Include starg.h when needed in config.h. * Removed an unnecessary C99-ism in buffer.c. This helps compiling on systems like luarocks that don't have all the cmake configuration goodness (thanks to carlmartus). * Don't use variable length arrays (Nick Wellnhofer). They're not supported by MSVC. * Test with multiple MSVC versions under Appveyor (Nick Wellnhofer). * Fix installation dir of man-pages on NetBSD (Kamil Rytarowski). * Fixed typo in cmark.h comments (Chris Eidhof). * Clarify in man page that cmark_node_free frees a node's children too. * Fixed documentation of --width in man page. * Require re2c >= 1.14.2 (#102). * Generated scanners.c with more recent re2c. [0.24.1] * Commonmark renderer: + Use HTML comment, not two blank lines, to separate a list item from a following code block or list. This makes the output more portable, since the "two blank lines" rule is unique to CommonMark. Also, it allows us to break out of a sublist without breaking out of all levels of nesting. + `is_autolink` - handle case where link has no children, which previously caused a segfault. + Use 4-space indent for bullet lists, for increased portability. + Use 2-space + newline for line break for increased portability (#90). + Improved punctuation escaping. Previously all `)` and `.` characters after digits were escaped; now they are only escaped if they are genuinely in a position where they'd cause a list item. This is achieved by changes in `render.c`: (a) `renderer->begin_content` is only set to false after a string of digits at the beginning of the line, and (b) we never break a line before a digit. Also, `begin_content` is properly initialized to true. * Handle NULL root in `consolidate_text_nodes`. [0.24.0] * [API change] Added `cmark_node_replace(oldnode, newnode)`. * Updated spec.txt to 0.24. * Fixed edge case with escaped parens in link destination (#97). This was also checked against the #82 case with asan. * Removed unnecessary check for `fenced` in `cmark_render_html`. It's sufficient to check that the info string is empty. Indeed, those who use the API may well create a code block with an info string without explicitly setting `fenced`. * Updated format of `test/smart_punct.txt`. * Updated `test/spec.txt`, `test/smart_punct.txt`, and `spec_tests.py` to new format. * Fixed `get_containing_block` logic in `src/commonmark.c`. This did not allow for the possibility that a node might have no containing block, causing the commonmark renderer to segfault if passed an inline node with no block parent. * Fixed string representations of `CUSTOM_BLOCK`, `CUSTOM_INLINE`. The old versions `raw_inline` and `raw_block` were being used, and this led to incorrect xml output. * Use default opts in python sample wrapper. * Allow multiline setext header content, as per spec. * Don't allow spaces in link destinations, even with pointy brackets. Conforms to latest change in spec. * Updated `scheme` scanner according to spec change. We no longer use a whitelist of valid schemes. * Allow any kind of nodes as children of `CUSTOM_BLOCK` (#96). * `cmark.h`: moved typedefs for iterator into iterator section. This just moves some code around so it makes more sense to read, and in the man page. * Fixed `make_man_page.py` so it includes typedefs again. [0.23.0] * [API change] Added `CUSTOM_BLOCK` and `CUSTOM_INLINE` node types. They are never generated by the parser, and do not correspond to CommonMark elements. They are designed to be inserted by filters that postprocess the AST. For example, a filter might convert specially marked code blocks to svg diagrams in HTML and tikz diagrams in LaTeX, passing these through to the renderer as a `CUSTOM_BLOCK`. These nodes can have children, but they also have literal text to be printed by the renderer "on enter" and "on exit." Added `cmark_node_get_on_enter`, `cmark_node_set_on_enter`, `cmark_node_get_on_exit`, `cmark_node_set_on_exit` to API. * [API change] Rename `NODE_HTML` -> `NODE_HTML_BLOCK`, `NODE_INLINE_HTML` -> `NODE_HTML_INLINE`. Define aliases so the old names still work, for backwards compatibility. * [API change] Rename `CMARK_NODE_HEADER` -> `CMARK_NODE_HEADING`. Note that for backwards compatibility, we have defined aliases: `CMARK_NODE_HEADER` = `CMARK_NODE_HEADING`, `cmark_node_get_header_level` = `cmark_node_get_heading_level`, and `cmark_node_set_header_level` = `cmark_node_set_heading_level`. * [API change] Rename `CMARK_NODE_HRULE` -> `CMARK_NODE_THEMATIC_BREAK`. Defined the former as the latter for backwards compatibility. * Don't allow space between link text and link label in a reference link (spec change). * Separate parsing and rendering opts in `cmark.h` (#88). This change also changes some of these constants' numerical values, but nothing should change in the API if you use the constants themselves. It should now be clear in the man page which options affect parsing and which affect rendering. * xml renderer - Added xmlns attribute to document node (jgm/CommonMark#87). * Commonmark renderer: ensure html blocks surrounded by blanks. Otherwise we get failures of roundtrip tests. * Commonmark renderer: ensure that literal characters get escaped when they're at the beginning of a block, e.g. `> \- foo`. * LaTeX renderer - better handling of internal links. Now we render `[foo](#bar)` as `\protect\hyperlink{bar}{foo}`. * Check for NULL pointer in _scan_at (#81). * `Makefile.nmake`: be more robust when cmake is missing. Previously, when cmake was missing, the build dir would be created anyway, and subsequent attempts (even with cmake) would fail, because cmake would not be run. Depending on `build/CMakeFiles` is more robust -- this won't be created unless cmake is run. Partially addresses #85. * Fixed DOCTYPE in xml output. * commonmark.c: fix `size_t` to `int`. This fixes an MSVC warning "conversion from 'size_t' to 'int', possible loss of data" (Kevin Wojniak). * Correct string length in `cmark_parse_document` example (Lee Jeffery). * Fix non-ASCII end-of-line character check (andyuhnak). * Fix "declaration shadows a local variable" (Kevin Wojniak). * Install static library (jgm/CommonMark#381). * Fix warnings about dropping const qualifier (Kevin Wojniak). * Use full (unabbreviated) versions of constants (`CMARK_...`). * Removed outdated targets from Makefile. * Removed need for sudo in `make bench`. * Improved benchmark. Use longer test, since `time` has limited resolution. * Removed `bench.h` and timing calls in `main.c`. * Updated API docs; getters return empty strings if not set rather than NULL, as previously documented. * Added api_tests for custom nodes. * Made roundtrip test part of the test suite run by cmake. * Regenerate `scanners.c` using re2c 0.15.3. * Adjusted scanner for link url. This fixes a heap buffer overflow (#82). * Added version number (1.0) to XML namespace. We don't guarantee stability in this until 1.0 is actually released, however. * Removed obsolete `TIMER` macro. * Make `LIB_INSTALL_DIR` configurable (Mathieu Bridon, #79). * Removed out-of-date luajit wrapper. * Use `input`, not `parser->curline` to determine last line length. * Small optimizations in `_scan_at`. * Replaced hard-coded 4 with `TAB_STOP`. * Have `make format` reformat api tests as well. * Added api tests for man, latex, commonmark, and xml renderers (#51). * render.c: added `begin_content` field. This is like `begin_line` except that it doesn't trigger production of the prefix. So it can be set after an initial prefix (say `> `) is printed by the renderer, and consulted in determining whether to escape content that has a special meaning at the beginning of a line. Used in the commonmark renderer. * Python 3.5 compatibility: don't require HTMLParseError (Zhiming Wang). HTMLParseError was removed in Python 3.5. Since it could never be thrown in Python 3.5+, we simply define a placeholder when HTMLParseError cannot be imported. * Set `convert_charrefs=False` in `normalize.py` (#83). This defeats the new default as of python 3.5, and allows the script to work with python 3.5. [0.22.0] * Removed `pre` from blocktags scanner. `pre` is handled separately in rule 1 and needn't be handled in rule 6. * Added `iframe` to list of blocktags, as per spec change. * Fixed bug with `HRULE` after blank line. This previously caused cmark to break out of a list, thinking it had two consecutive blanks. * Check for empty string before trying to look at line ending. * Make sure every line fed to `S_process_line` ends with `\n` (#72). So `S_process_line` sees only unix style line endings. Ultimately we probably want a better solution, allowing the line ending style of the input file to be preserved. This solution forces output with newlines. * Improved `cmark_strbuf_normalize_whitespace` (#73). Now all characters that satisfy `cmark_isspace` are recognized as whitespace. Previously `\r` and `\t` (and others) weren't included. * Treat line ending with EOF as ending with newline (#71). * Fixed `--hardbreaks` with `\r\n` line breaks (#68). * Disallow list item starting with multiple blank lines (jgm/CommonMark#332). * Allow tabs before closing `#`s in ATX header * Removed `cmark_strbuf_printf` and `cmark_strbuf_vprintf`. These are no longer needed, and cause complications for MSVC. Also removed `HAVE_VA_COPY` and `HAVE_C99_SNPRINTF` feature tests. * Added option to disable tests (Kevin Wojniak). * Added `CMARK_INLINE` macro. * Removed need to disable MSVC warnings 4267, 4244, 4800 (Kevin Wojniak). * Fixed MSVC inline errors when cmark is included in sources that don't have the same set of disabled warnings (Kevin Wojniak). * Fix `FileNotFoundError` errors on tests when cmark is built from another project via `add_subdirectory()` (Kevin Wojniak). * Prefix `utf8proc` functions to avoid conflict with existing library (Kevin Wojniak). * Avoid name clash between Windows `.pdb` files (Nick Wellnhofer). * Improved `smart_punct.txt` (see jgm/commonmark.js#61). * Set `POSITION_INDEPENDENT_CODE` `ON` for static library (see #39). * `make bench`: allow overriding `BENCHFILE`. Previously if you did this, it would clopper `BENCHFILE` with the default bench file. * `make bench`: Use -10 priority with renice. * Improved `make_autolink`. Ensures that title is chunk with empty string rather than NULL, as with other links. * Added `clang-check` target. * Travis: split `roundtrip_test` and `leakcheck` (OGINO Masanori). * Use clang-format, llvm style, for formatting. Reformatted all source files. Added `format` target to Makefile. Removed `astyle` target. Updated `.editorconfig`. [0.21.0] * Updated to version 0.21 of spec. * Added latex renderer (#31). New exported function in API: `cmark_render_latex`. New source file: `src/latex.hs`. * Updates for new HTML block spec. Removed old `html_block_tag` scanner. Added new `html_block_start` and `html_block_start_7`, as well as `html_block_end_n` for n = 1-5. Rewrote block parser for new HTML block spec. * We no longer preprocess tabs to spaces before parsing. Instead, we keep track of both the byte offset and the (virtual) column as we parse block starts. This allows us to handle tabs without converting to spaces first. Tabs are left as tabs in the output, as per the revised spec. * Removed utf8 validation by default. We now replace null characters in the line splitting code. * Added `CMARK_OPT_VALIDATE_UTF8` option and command-line option `--validate-utf8`. This option causes cmark to check for valid UTF-8, replacing invalid sequences with the replacement character, U+FFFD. Previously this was done by default in connection with tab expansion, but we no longer do it by default with the new tab treatment. (Many applications will know that the input is valid UTF-8, so validation will not be necessary.) * Added `CMARK_OPT_SAFE` option and `--safe` command-line flag. + Added `CMARK_OPT_SAFE`. This option disables rendering of raw HTML and potentially dangerous links. + Added `--safe` option in command-line program. + Updated `cmark.3` man page. + Added `scan_dangerous_url` to scanners. + In HTML, suppress rendering of raw HTML and potentially dangerous links if `CMARK_OPT_SAFE`. Dangerous URLs are those that begin with `javascript:`, `vbscript:`, `file:`, or `data:` (except for `image/png`, `image/gif`, `image/jpeg`, or `image/webp` mime types). + Added `api_test` for `OPT_CMARK_SAFE`. + Rewrote `README.md` on security. * Limit ordered list start to 9 digits, per spec. * Added width parameter to `render_man` (API change). * Extracted common renderer code from latex, man, and commonmark renderers into a separate module, `renderer.[ch]` (#63). To write a renderer now, you only need to write a character escaping function and a node rendering function. You pass these to `cmark_render` and it handles all the plumbing (including line wrapping) for you. So far this is an internal module, but we might consider adding it to the API in the future. * commonmark writer: correctly handle email autolinks. * commonmark writer: escape `!`. * Fixed soft breaks in commonmark renderer. * Fixed scanner for link url. re2c returns the longest match, so we were getting bad results with `[link](foo\(and\(bar\)\))` which it would parse as containing a bare `\` followed by an in-parens chunk ending with the final paren. * Allow non-initial hyphens in html tag names. This allows for custom tags, see jgm/CommonMark#239. * Updated `test/smart_punct.txt`. * Implemented new treatment of hyphens with `--smart`, converting sequences of hyphens to sequences of em and en dashes that contain no hyphens. * HTML renderer: properly split info on first space char (see jgm/commonmark.js#54). * Changed version variables to functions (#60, Andrius Bentkus). This is easier to access using ffi, since some languages, like C# like to use only function interfaces for accessing library functionality. * `process_emphasis`: Fixed setting lower bound to potential openers. Renamed `potential_openers` -> `openers_bottom`. Renamed `start_delim` -> `stack_bottom`. * Added case for #59 to `pathological_test.py`. * Fixed emphasis/link parsing bug (#59). * Fixed off-by-one error in line splitting routine. This caused certain NULLs not to be replaced. * Don't rtrim in `subject_from_buffer`. This gives bad results in parsing reference links, where we might have trailing blanks (`finalize` removes the bytes parsed as a reference definition; before this change, some blank bytes might remain on the line). + Added `column` and `first_nonspace_column` fields to `parser`. + Added utility function to advance the offset, computing the virtual column too. Note that we don't need to deal with UTF-8 here at all. Only ASCII occurs in block starts. + Significant performance improvement due to the fact that we're not doing UTF-8 validation. * Fixed entity lookup table. The old one had many errors. The new one is derived from the list in the npm entities package. Since the sequences can now be longer (multi-code-point), we have bumped the length limit from 4 to 8, which also affects `houdini_html_u.c`. An example of the kind of error that was fixed: `≧̸` should be rendered as "≧̸" (U+02267 U+00338), but it was being rendered as "≧" (which is the same as `≧`). * Replace gperf-based entity lookup with binary tree lookup. The primary advantage is a big reduction in the size of the compiled library and executable (> 100K). There should be no measurable performance difference in normal documents. I detected only a slight performance hit in a file containing 1,000,000 entities. + Removed `src/html_unescape.gperf` and `src/html_unescape.h`. + Added `src/entities.h` (generated by `tools/make_entities_h.py`). + Added binary tree lookup functions to `houdini_html_u.c`, and use the data in `src/entities.h`. * Renamed `entities.h` -> `entities.inc`, and `tools/make_entities_h.py` -> `tools/make_entitis_inc.py`. * Fixed cases like ``` [ref]: url "title" ok ``` Here we should parse the first line as a reference. * `inlines.c`: Added utility functions to skip spaces and line endings. * Fixed backslashes in link destinations that are not part of escapes (jgm/commonmark#45). * `process_line`: Removed "add newline if line doesn't have one." This isn't actually needed. * Small logic fixes and a simplification in `process_emphasis`. * Added more pathological tests: + Many link closers with no openers. + Many link openers with no closers. + Many emph openers with no closers. + Many closers with no openers. + `"*a_ " * 20000`. * Fixed `process_emphasis` to handle new pathological cases. Now we have an array of pointers (`potential_openers`), keyed to the delim char. When we've failed to match a potential opener prior to point X in the delimiter stack, we reset `potential_openers` for that opener type to X, and thus avoid having to look again through all the openers we've already rejected. * `process_inlines`: remove closers from delim stack when possible. When they have no matching openers and cannot be openers themselves, we can safely remove them. This helps with a performance case: `"a_ " * 20000` (jgm/commonmark.js#43). * Roll utf8proc_charlen into utf8proc_valid (Nick Wellnhofer). Speeds up "make bench" by another percent. * `spec_tests.py`: allow `→` for tab in HTML examples. * `normalize.py`: don't collapse whitespace in pre contexts. * Use utf-8 aware re2c. * Makefile afl target: removed `-m none`, added `CMARK_OPTS`. * README: added `make afl` instructions. * Limit generated generated `cmark.3` to 72 character line width. * Travis: switched to containerized build system. * Removed `debug.h`. (It uses GNU extensions, and we don't need it anyway.) * Removed sundown from benchmarks, because the reading was anomalous. sundown had an arbitrary 16MB limit on buffers, and the benchmark input exceeded that. So who knows what we were actually testing? Added hoedown, sundown's successor, which is a better comparison. [0.20.0] * Fixed bug in list item parsing when items indented >= 4 spaces (#52). * Don't allow link labels with no non-whitespace characters (jgm/CommonMark#322). * Fixed multiple issues with numeric entities (#33, Nick Wellnhofer). * Support CR and CRLF line endings (Ben Trask). * Added test for different line endings to `api_test`. * Allow NULL value in string setters (Nick Wellnhofer). (NULL produces a 0-length string value.) Internally, URL and title are now stored as `cmark_chunk` rather than `char *`. * Fixed memory leak in `cmark_consolidate_text_nodes` (#32). * Fixed `is_autolink` in the CommonMark renderer (#50). Previously *any* link with an absolute URL was treated as an autolink. * Cope with broken `snprintf` on Windows (Nick Wellnhofer). On Windows, `snprintf` returns -1 if the output was truncated. Fall back to Windows-specific `_scprintf`. * Switched length parameter on `cmark_markdown_to_html`, `cmark_parser_feed`, and `cmark_parse_document` from `int` to `size_t` (#53, Nick Wellnhofer). * Use a custom type `bufsize_t` for all string sizes and indices. This allows to switch to 64-bit string buffers by changing a single typedef and a macro definition (Nick Wellnhofer). * Hardened the `strbuf` code, checking for integer overflows and adding range checks (Nick Wellnhofer). * Removed unused function `cmark_strbuf_attach` (Nick Wellnhofer). * Fixed all implicit 64-bit to 32-bit conversions that `-Wshorten-64-to-32` warns about (Nick Wellnhofer). * Added helper function `cmark_strbuf_safe_strlen` that converts from `size_t` to `bufsize_t` and throws an error in case of an overflow (Nick Wellnhofer). * Abort on `strbuf` out of memory errors (Nick Wellnhofer). Previously such errors were not being trapped. This involves some internal changes to the `buffer` library that do not affect the API. * Factored out `S_find_first_nonspace` in `S_proces_line`. Added fields `offset`, `first_nonspace`, `indent`, and `blank` to `cmark_parser` struct. This just removes some repetition. * Added Racket Racket (5.3+) wrapper (Eli Barzilay). * Removed `-pg` from Debug build flags (#47). * Added Ubsan build target, to check for undefined behavior. * Improved `make leakcheck`. We now return an error status if anything in the loop fails. We now check `--smart` and `--normalize` options. * Removed `wrapper3.py`, made `wrapper.py` work with python 2 and 3. Also improved the wrapper to work with Windows, and to use smart punctuation (as an example). * In `wrapper.rb`, added argument for options. * Revised luajit wrapper. * Added build status badges to README.md. * Added links to go, perl, ruby, R, and Haskell bindings to README.md. [0.19.0] * Fixed `_` emphasis parsing to conform to spec (jgm/CommonMark#317). * Updated `spec.txt`. * Compile static library with `-DCMARK_STATIC_DEFINE` (Nick Wellnhofer). * Suppress warnings about Windows runtime library files (Nick Wellnhofer). Visual Studio Express editions do not include the redistributable files. Set `CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS` to suppress warnings. * Added appyeyor: Windows continuous integration (`appveyor.yml`). * Use `os.path.join` in `test/cmark.py` for proper cross-platform paths. * Fixed `Makefile.nmake`. * Improved `make afl`: added `test/afl_dictionary`, increased timeout for hangs. * Improved README with a description of the library's strengths. * Pass-through Unicode non-characters (Nick Wellnhofer). Despite their name, Unicode non-characters are valid code points. They should be passed through by a library like libcmark. * Check return status of `utf8proc_iterate` (#27). [0.18.3] * Include patch level in soname (Nick Wellnhofer). Minor version is tied to spec version, so this allows breaking the ABI between spec releases. * Install compiler-provided system runtime libraries (Changjiang Yang). * Use `strbuf_printf` instead of `snprintf`. `snprintf` is not available on some platforms (Visual Studio 2013 and earlier). * Fixed memory access bug: "invalid read of size 1" on input `[link](<>)`. [0.18.2] * Added commonmark renderer: `cmark_render_commonmark`. In addition to options, this takes a `width` parameter. A value of 0 disables wrapping; a positive value wraps the document to the specified width. Note that width is automatically set to 0 if the `CMARK_OPT_HARDBREAKS` option is set. * The `cmark` executable now allows `-t commonmark` for output as CommonMark. A `--width` option has been added to specify wrapping width. * Added `roundtrip_test` Makefile target. This runs all the spec through the commonmark renderer, and then through the commonmark parser, and compares normalized HTML to the test. All tests pass with the current parser and renderer, giving us some confidence that the commonmark renderer is sufficiently robust. Eventually this should be pythonized and put in the cmake test routine. * Removed an unnecessary check in `blocks.c`. By the time we check for a list start, we've already checked for a horizontal rule, so we don't need to repeat that check here. Thanks to Robin Stocker for pointing out a similar redundancy in commonmark.js. * Fixed bug in `cmark_strbuf_unescape` (`buffer.c`). The old function gave incorrect results on input like `\\*`, since the next backslash would be treated as escaping the `*` instead of being escaped itself. * `scanners.re`: added `_scan_scheme`, `scan_scheme`, used in the commonmark renderer. * Check for `CMAKE_C_COMPILER` (not `CC_COMPILER`) when setting C flags. * Update code examples in documentation, adding new parser option argument, and using `CMARK_OPT_DEFAULT` (Nick Wellnhofer). * Added options parameter to `cmark_markdown_to_html`. * Removed obsolete reference to `CMARK_NODE_LINK_LABEL`. * `make leakcheck` now checks all output formats. * `test/cmark.py`: set default options for `markdown_to_html`. * Warn about buggy re2c versions (Nick Wellnhofer). [0.18.1] * Build static version of library in default build (#11). * `cmark.h`: Add missing argument to `cmark_parser_new` (#12). [0.18] * Switch to 2-clause BSD license, with agreement of contributors. * Added Profile build type, `make prof` target. * Fixed autolink scanner to conform to the spec. Backslash escapes not allowed in autolinks. * Don't rely on strnlen being available (Nick Wellnhofer). * Updated scanners for new whitespace definition. * Added `CMARK_OPT_SMART` and `--smart` option, `smart.c`, `smart.h`. * Added test for `--smart` option. * Fixed segfault with --normalize (closes #7). * Moved normalization step from XML renderer to `cmark_parser_finish`. * Added options parameter to `cmark_parse_document`, `cmark_parse_file`. * Fixed man renderer's escaping for unicode characters. * Don't require python3 to make `cmark.3` man page. * Use ASCII escapes for punctuation characters for portability. * Made `options` an int rather than a long, for consistency. * Packed `cmark_node` struct to fit into 128 bytes. This gives a small performance boost and lowers memory usage. * Repacked `delimiter` struct to avoid hole. * Fixed use-after-free bug, which arose when a paragraph containing only reference links and blank space was finalized (#9). Avoid using `parser->current` in the loop that creates new blocks, since `finalize` in `add_child` may have removed the current parser (if it contains only reference definitions). This isn't a great solution; in the long run we need to rewrite to make the logic clearer and to make it harder to make mistakes like this one. * Added 'Asan' build type. `make asan` will link against ASan; the resulting executable will do checks for memory access issues. Thanks @JordanMilne for the suggestion. * Add Makefile target to fuzz with AFL (Nick Wellnhofer) The variable `$AFL_PATH` must point to the directory containing the AFL binaries. It can be set as an environment variable or passed to make on the command line. [0.17] * Stripped out all JavaScript related code and documentation, moving it to a separate repository (). * Improved Makefile targets, so that `cmake` is run again only when necessary (Nick Wellnhofer). * Added `INSTALL_PREFIX` to the Makefile, allowing installation to a location other than `/usr/local` without invoking `cmake` manually (Nick Wellnhofer). * `make test` now guarantees that the project will be rebuilt before tests are run (Nick Wellnhofer). * Prohibited overriding of some Makefile variables (Nick Wellnhofer). * Provide version number and string, both as macros (`CMARK_VERSION`, `CMARK_VERSION_STRING`) and as symbols (`cmark_version`, `cmark_version_string`) (Nick Wellnhofer). All of these come from `cmark_version.h`, which is constructed from a template `cmark_version.h.in` and data in `CMakeLists.txt`. * Avoid calling `free` on null pointer. * Added an accessor for an iterator's root node (`cmark_iter_get_root`). * Added user data field for nodes (Nick Wellnhofer). This is intended mainly for use in bindings for dynamic languages, where it could store a pointer to a target language object (#287). But it can be used for anything. * Man renderer: properly escape multiline strings. * Added assertion to raise error if finalize is called on a closed block. * Implemented the new spec rule for emphasis and strong emphasis with `_`. * Moved the check for fence-close with the other checks for end-of-block. * Fixed a bug with loose list detection with items containings fenced code blocks (#285). * Removed recursive algorithm in `ends_with_blank_line` (#286). * Minor code reformatting: renamed parameters. [0.16] * Added xml renderer (XML representation of the CommonMark AST, which is described in `CommonMark.dtd`). * Reduced size of gperf entity table (Nick Wellnhofer). * Reworked iterators to allow deletion of nodes during iteration (Nick Wellnhofer). * Optimized `S_is_leaf`. * Added `cmark_iter_reset` to iterator API. * Added `cmark_consolidate_text_nodes` to API to combine adjacent text nodes. * Added `CMARK_OPT_NORMALIZE` to options (this combines adjacent text nodes). * Added `--normalize` option to command-line program. * Improved regex for HTML comments in inline parsing. * Python is no longer required for a basic build from the repository. cmarkgfm/third_party/cmark/api_test/0000755000175000017500000000000014210444464017766 5ustar carstencarstencmarkgfm/third_party/cmark/api_test/harness.h0000644000175000017500000000132714210444464021605 0ustar carstencarsten#ifndef CMARK_API_TEST_HARNESS_H #define CMARK_API_TEST_HARNESS_H #ifdef __cplusplus extern "C" { #endif typedef struct { int test_num; int num_passed; int num_failed; int num_skipped; } test_batch_runner; test_batch_runner *test_batch_runner_new(); void SKIP(test_batch_runner *runner, int num_tests); void OK(test_batch_runner *runner, int cond, const char *msg, ...); void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg, ...); void STR_EQ(test_batch_runner *runner, const char *got, const char *expected, const char *msg, ...); int test_ok(test_batch_runner *runner); void test_print_summary(test_batch_runner *runner); #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/api_test/cplusplus.h0000644000175000017500000000033214210444464022167 0ustar carstencarsten#ifndef CMARK_API_TEST_CPLUSPLUS_H #define CMARK_API_TEST_CPLUSPLUS_H #include "harness.h" #ifdef __cplusplus extern "C" { #endif void test_cplusplus(test_batch_runner *runner); #ifdef __cplusplus } #endif #endif cmarkgfm/third_party/cmark/api_test/CMakeLists.txt0000644000175000017500000000160014210444464022523 0ustar carstencarstenadd_executable(api_test cplusplus.cpp harness.c harness.h main.c ) include_directories( ${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src ${PROJECT_BINARY_DIR}/extensions ) if(CMARK_SHARED) target_link_libraries(api_test libcmark-gfm-extensions libcmark-gfm) else() target_link_libraries(api_test libcmark-gfm-extensions_static libcmark-gfm_static) endif() # Compiler flags if(MSVC) # Force to always compile with W4 if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4") endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4706 /D_CRT_SECURE_NO_WARNINGS") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP") elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -std=c99 -pedantic") endif() cmarkgfm/third_party/cmark/api_test/main.c0000644000175000017500000013553614210444464021073 0ustar carstencarsten#include #include #include #define CMARK_NO_SHORT_NAMES #include "cmark-gfm.h" #include "node.h" #include "../extensions/cmark-gfm-core-extensions.h" #include "harness.h" #include "cplusplus.h" #define UTF8_REPL "\xEF\xBF\xBD" static const cmark_node_type node_types[] = { CMARK_NODE_DOCUMENT, CMARK_NODE_BLOCK_QUOTE, CMARK_NODE_LIST, CMARK_NODE_ITEM, CMARK_NODE_CODE_BLOCK, CMARK_NODE_HTML_BLOCK, CMARK_NODE_PARAGRAPH, CMARK_NODE_HEADING, CMARK_NODE_THEMATIC_BREAK, CMARK_NODE_TEXT, CMARK_NODE_SOFTBREAK, CMARK_NODE_LINEBREAK, CMARK_NODE_CODE, CMARK_NODE_HTML_INLINE, CMARK_NODE_EMPH, CMARK_NODE_STRONG, CMARK_NODE_LINK, CMARK_NODE_IMAGE}; static const int num_node_types = sizeof(node_types) / sizeof(*node_types); static void test_md_to_html(test_batch_runner *runner, const char *markdown, const char *expected_html, const char *msg); static void test_content(test_batch_runner *runner, cmark_node_type type, unsigned int *allowed_content); static void test_char(test_batch_runner *runner, int valid, const char *utf8, const char *msg); static void test_incomplete_char(test_batch_runner *runner, const char *utf8, const char *msg); static void test_continuation_byte(test_batch_runner *runner, const char *utf8); static void version(test_batch_runner *runner) { INT_EQ(runner, cmark_version(), CMARK_GFM_VERSION, "cmark_version"); STR_EQ(runner, cmark_version_string(), CMARK_GFM_VERSION_STRING, "cmark_version_string"); } static void constructor(test_batch_runner *runner) { for (int i = 0; i < num_node_types; ++i) { cmark_node_type type = node_types[i]; cmark_node *node = cmark_node_new(type); OK(runner, node != NULL, "new type %d", type); INT_EQ(runner, cmark_node_get_type(node), type, "get_type %d", type); switch (node->type) { case CMARK_NODE_HEADING: INT_EQ(runner, cmark_node_get_heading_level(node), 1, "default heading level is 1"); node->as.heading.level = 1; break; case CMARK_NODE_LIST: INT_EQ(runner, cmark_node_get_list_type(node), CMARK_BULLET_LIST, "default is list type is bullet"); INT_EQ(runner, cmark_node_get_list_delim(node), CMARK_NO_DELIM, "default is list delim is NO_DELIM"); INT_EQ(runner, cmark_node_get_list_start(node), 0, "default is list start is 0"); INT_EQ(runner, cmark_node_get_list_tight(node), 0, "default is list is loose"); break; default: break; } cmark_node_free(node); } } static void accessors(test_batch_runner *runner) { static const char markdown[] = "## Header\n" "\n" "* Item 1\n" "* Item 2\n" "\n" "2. Item 1\n" "\n" "3. Item 2\n" "\n" "``` lang\n" "fenced\n" "```\n" " code\n" "\n" "
html
\n" "\n" "[link](url 'title')\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); // Getters cmark_node *heading = cmark_node_first_child(doc); INT_EQ(runner, cmark_node_get_heading_level(heading), 2, "get_heading_level"); cmark_node *bullet_list = cmark_node_next(heading); INT_EQ(runner, cmark_node_get_list_type(bullet_list), CMARK_BULLET_LIST, "get_list_type bullet"); INT_EQ(runner, cmark_node_get_list_tight(bullet_list), 1, "get_list_tight tight"); cmark_node *ordered_list = cmark_node_next(bullet_list); INT_EQ(runner, cmark_node_get_list_type(ordered_list), CMARK_ORDERED_LIST, "get_list_type ordered"); INT_EQ(runner, cmark_node_get_list_delim(ordered_list), CMARK_PERIOD_DELIM, "get_list_delim ordered"); INT_EQ(runner, cmark_node_get_list_start(ordered_list), 2, "get_list_start"); INT_EQ(runner, cmark_node_get_list_tight(ordered_list), 0, "get_list_tight loose"); cmark_node *fenced = cmark_node_next(ordered_list); STR_EQ(runner, cmark_node_get_literal(fenced), "fenced\n", "get_literal fenced code"); STR_EQ(runner, cmark_node_get_fence_info(fenced), "lang", "get_fence_info"); cmark_node *code = cmark_node_next(fenced); STR_EQ(runner, cmark_node_get_literal(code), "code\n", "get_literal indented code"); cmark_node *html = cmark_node_next(code); STR_EQ(runner, cmark_node_get_literal(html), "
html
\n", "get_literal html"); cmark_node *paragraph = cmark_node_next(html); INT_EQ(runner, cmark_node_get_start_line(paragraph), 17, "get_start_line"); INT_EQ(runner, cmark_node_get_start_column(paragraph), 1, "get_start_column"); INT_EQ(runner, cmark_node_get_end_line(paragraph), 17, "get_end_line"); cmark_node *link = cmark_node_first_child(paragraph); STR_EQ(runner, cmark_node_get_url(link), "url", "get_url"); STR_EQ(runner, cmark_node_get_title(link), "title", "get_title"); cmark_node *string = cmark_node_first_child(link); STR_EQ(runner, cmark_node_get_literal(string), "link", "get_literal string"); // Setters OK(runner, cmark_node_set_heading_level(heading, 3), "set_heading_level"); OK(runner, cmark_node_set_list_type(bullet_list, CMARK_ORDERED_LIST), "set_list_type ordered"); OK(runner, cmark_node_set_list_delim(bullet_list, CMARK_PAREN_DELIM), "set_list_delim paren"); OK(runner, cmark_node_set_list_start(bullet_list, 3), "set_list_start"); OK(runner, cmark_node_set_list_tight(bullet_list, 0), "set_list_tight loose"); OK(runner, cmark_node_set_list_type(ordered_list, CMARK_BULLET_LIST), "set_list_type bullet"); OK(runner, cmark_node_set_list_tight(ordered_list, 1), "set_list_tight tight"); OK(runner, cmark_node_set_literal(code, "CODE\n"), "set_literal indented code"); OK(runner, cmark_node_set_literal(fenced, "FENCED\n"), "set_literal fenced code"); OK(runner, cmark_node_set_fence_info(fenced, "LANG"), "set_fence_info"); OK(runner, cmark_node_set_literal(html, "
HTML
\n"), "set_literal html"); OK(runner, cmark_node_set_url(link, "URL"), "set_url"); OK(runner, cmark_node_set_title(link, "TITLE"), "set_title"); OK(runner, cmark_node_set_literal(string, "prefix-LINK"), "set_literal string"); // Set literal to suffix of itself (issue #139). const char *literal = cmark_node_get_literal(string); OK(runner, cmark_node_set_literal(string, literal + sizeof("prefix")), "set_literal suffix"); char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT | CMARK_OPT_UNSAFE, NULL); static const char expected_html[] = "

Header

\n" "
    \n" "
  1. \n" "

    Item 1

    \n" "
  2. \n" "
  3. \n" "

    Item 2

    \n" "
  4. \n" "
\n" "
    \n" "
  • Item 1
  • \n" "
  • Item 2
  • \n" "
\n" "
FENCED\n"
      "
\n" "
CODE\n"
      "
\n" "
HTML
\n" "
\n"; STR_EQ(runner, rendered_html, expected_html, "setters work"); free(rendered_html); // Getter errors INT_EQ(runner, cmark_node_get_heading_level(bullet_list), 0, "get_heading_level error"); INT_EQ(runner, cmark_node_get_list_type(heading), CMARK_NO_LIST, "get_list_type error"); INT_EQ(runner, cmark_node_get_list_start(code), 0, "get_list_start error"); INT_EQ(runner, cmark_node_get_list_tight(fenced), 0, "get_list_tight error"); OK(runner, cmark_node_get_literal(ordered_list) == NULL, "get_literal error"); OK(runner, cmark_node_get_fence_info(paragraph) == NULL, "get_fence_info error"); OK(runner, cmark_node_get_url(html) == NULL, "get_url error"); OK(runner, cmark_node_get_title(heading) == NULL, "get_title error"); // Setter errors OK(runner, !cmark_node_set_heading_level(bullet_list, 3), "set_heading_level error"); OK(runner, !cmark_node_set_list_type(heading, CMARK_ORDERED_LIST), "set_list_type error"); OK(runner, !cmark_node_set_list_start(code, 3), "set_list_start error"); OK(runner, !cmark_node_set_list_tight(fenced, 0), "set_list_tight error"); OK(runner, !cmark_node_set_literal(ordered_list, "content\n"), "set_literal error"); OK(runner, !cmark_node_set_fence_info(paragraph, "lang"), "set_fence_info error"); OK(runner, !cmark_node_set_url(html, "url"), "set_url error"); OK(runner, !cmark_node_set_title(heading, "title"), "set_title error"); OK(runner, !cmark_node_set_heading_level(heading, 0), "set_heading_level too small"); OK(runner, !cmark_node_set_heading_level(heading, 7), "set_heading_level too large"); OK(runner, !cmark_node_set_list_type(bullet_list, CMARK_NO_LIST), "set_list_type invalid"); OK(runner, !cmark_node_set_list_start(bullet_list, -1), "set_list_start negative"); cmark_node_free(doc); } static void node_check(test_batch_runner *runner) { // Construct an incomplete tree. cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT); cmark_node *p1 = cmark_node_new(CMARK_NODE_PARAGRAPH); cmark_node *p2 = cmark_node_new(CMARK_NODE_PARAGRAPH); doc->first_child = p1; p1->next = p2; INT_EQ(runner, cmark_node_check(doc, NULL), 4, "node_check works"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "node_check fixes tree"); cmark_node_free(doc); } static void iterator(test_batch_runner *runner) { cmark_node *doc = cmark_parse_document("> a *b*\n\nc", 10, CMARK_OPT_DEFAULT); int parnodes = 0; cmark_event_type ev_type; cmark_iter *iter = cmark_iter_new(doc); cmark_node *cur; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (cur->type == CMARK_NODE_PARAGRAPH && ev_type == CMARK_EVENT_ENTER) { parnodes += 1; } } INT_EQ(runner, parnodes, 2, "iterate correctly counts paragraphs"); cmark_iter_free(iter); cmark_node_free(doc); } static void iterator_delete(test_batch_runner *runner) { static const char md[] = "a *b* c\n" "\n" "* item1\n" "* item2\n" "\n" "a `b` c\n" "\n" "* item1\n" "* item2\n"; cmark_node *doc = cmark_parse_document(md, sizeof(md) - 1, CMARK_OPT_DEFAULT); cmark_iter *iter = cmark_iter_new(doc); cmark_event_type ev_type; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cmark_node *node = cmark_iter_get_node(iter); // Delete list, emph, and code nodes. if ((ev_type == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LIST) || (ev_type == CMARK_EVENT_EXIT && node->type == CMARK_NODE_EMPH) || (ev_type == CMARK_EVENT_ENTER && node->type == CMARK_NODE_CODE)) { cmark_node_free(node); } } char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL); static const char expected[] = "

a c

\n" "

a c

\n"; STR_EQ(runner, html, expected, "iterate and delete nodes"); free(html); cmark_iter_free(iter); cmark_node_free(doc); } static void create_tree(test_batch_runner *runner) { char *html; cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT); cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH); OK(runner, !cmark_node_insert_before(doc, p), "insert before root fails"); OK(runner, !cmark_node_insert_after(doc, p), "insert after root fails"); OK(runner, cmark_node_append_child(doc, p), "append1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append1 consistent"); OK(runner, cmark_node_parent(p) == doc, "node_parent"); cmark_node *emph = cmark_node_new(CMARK_NODE_EMPH); OK(runner, cmark_node_prepend_child(p, emph), "prepend1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend1 consistent"); cmark_node *str1 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str1, "Hello, "); OK(runner, cmark_node_prepend_child(p, str1), "prepend2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend2 consistent"); cmark_node *str3 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str3, "!"); OK(runner, cmark_node_append_child(p, str3), "append2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append2 consistent"); cmark_node *str2 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str2, "world"); OK(runner, cmark_node_append_child(emph, str2), "append3"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append3 consistent"); html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL); STR_EQ(runner, html, "

Hello, world!

\n", "render_html"); free(html); OK(runner, cmark_node_insert_before(str1, str3), "ins before1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins before1 consistent"); // 31e OK(runner, cmark_node_first_child(p) == str3, "ins before1 works"); OK(runner, cmark_node_insert_before(str1, emph), "ins before2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins before2 consistent"); // 3e1 OK(runner, cmark_node_last_child(p) == str1, "ins before2 works"); OK(runner, cmark_node_insert_after(str1, str3), "ins after1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins after1 consistent"); // e13 OK(runner, cmark_node_next(str1) == str3, "ins after1 works"); OK(runner, cmark_node_insert_after(str1, emph), "ins after2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins after2 consistent"); // 1e3 OK(runner, cmark_node_previous(emph) == str1, "ins after2 works"); cmark_node *str4 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str4, "brzz"); OK(runner, cmark_node_replace(str1, str4), "replace"); // The replaced node is not freed cmark_node_free(str1); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "replace consistent"); OK(runner, cmark_node_previous(emph) == str4, "replace works"); INT_EQ(runner, cmark_node_replace(p, str4), 0, "replace str for p fails"); cmark_node_unlink(emph); html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL); STR_EQ(runner, html, "

brzz!

\n", "render_html after shuffling"); free(html); cmark_node_free(doc); // TODO: Test that the contents of an unlinked inline are valid // after the parent block was destroyed. This doesn't work so far. cmark_node_free(emph); } static void custom_nodes(test_batch_runner *runner) { char *html; char *man; cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT); cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH); cmark_node_append_child(doc, p); cmark_node *ci = cmark_node_new(CMARK_NODE_CUSTOM_INLINE); cmark_node *str1 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str1, "Hello"); OK(runner, cmark_node_append_child(ci, str1), "append1"); OK(runner, cmark_node_set_on_enter(ci, ""), "set_on_exit"); STR_EQ(runner, cmark_node_get_on_enter(ci), "", "get_on_exit"); cmark_node_append_child(p, ci); cmark_node *cb = cmark_node_new(CMARK_NODE_CUSTOM_BLOCK); cmark_node_set_on_enter(cb, "

\n\nNo newline

\n", "document without trailing newline"); } static void render_html(test_batch_runner *runner) { char *html; static const char markdown[] = "foo *bar*\n" "\n" "paragraph 2\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); cmark_node *paragraph = cmark_node_first_child(doc); html = cmark_render_html(paragraph, CMARK_OPT_DEFAULT, NULL); STR_EQ(runner, html, "

foo bar

\n", "render single paragraph"); free(html); cmark_node *string = cmark_node_first_child(paragraph); html = cmark_render_html(string, CMARK_OPT_DEFAULT, NULL); STR_EQ(runner, html, "foo ", "render single inline"); free(html); cmark_node *emph = cmark_node_next(string); html = cmark_render_html(emph, CMARK_OPT_DEFAULT, NULL); STR_EQ(runner, html, "bar", "render inline with children"); free(html); cmark_node_free(doc); } static void render_xml(test_batch_runner *runner) { char *xml; static const char markdown[] = "foo *bar*\n" "\n" "paragraph 2\n" "\n" "```\ncode\n```\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT); STR_EQ(runner, xml, "\n" "\n" "\n" " \n" " foo \n" " \n" " bar\n" " \n" " \n" " \n" " paragraph 2\n" " \n" " code\n" "\n" "\n", "render document"); free(xml); cmark_node *paragraph = cmark_node_first_child(doc); xml = cmark_render_xml(paragraph, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); STR_EQ(runner, xml, "\n" "\n" "\n" " foo \n" " \n" " bar\n" " \n" "\n", "render first paragraph with source pos"); free(xml); cmark_node_free(doc); } static void render_man(test_batch_runner *runner) { char *man; static const char markdown[] = "foo *bar*\n" "\n" "- Lorem ipsum dolor sit amet,\n" " consectetur adipiscing elit,\n" "- sed do eiusmod tempor incididunt\n" " ut labore et dolore magna aliqua.\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); man = cmark_render_man(doc, CMARK_OPT_DEFAULT, 20); STR_EQ(runner, man, ".PP\n" "foo \\f[I]bar\\f[]\n" ".IP \\[bu] 2\n" "Lorem ipsum dolor\n" "sit amet,\n" "consectetur\n" "adipiscing elit,\n" ".IP \\[bu] 2\n" "sed do eiusmod\n" "tempor incididunt ut\n" "labore et dolore\n" "magna aliqua.\n", "render document with wrapping"); free(man); man = cmark_render_man(doc, CMARK_OPT_DEFAULT, 0); STR_EQ(runner, man, ".PP\n" "foo \\f[I]bar\\f[]\n" ".IP \\[bu] 2\n" "Lorem ipsum dolor sit amet,\n" "consectetur adipiscing elit,\n" ".IP \\[bu] 2\n" "sed do eiusmod tempor incididunt\n" "ut labore et dolore magna aliqua.\n", "render document without wrapping"); free(man); cmark_node_free(doc); } static void render_latex(test_batch_runner *runner) { char *latex; static const char markdown[] = "foo *bar* $%\n" "\n" "- Lorem ipsum dolor sit amet,\n" " consectetur adipiscing elit,\n" "- sed do eiusmod tempor incididunt\n" " ut labore et dolore magna aliqua.\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); latex = cmark_render_latex(doc, CMARK_OPT_DEFAULT, 20); STR_EQ(runner, latex, "foo \\emph{bar} \\$\\%\n" "\n" "\\begin{itemize}\n" "\\item Lorem ipsum\n" "dolor sit amet,\n" "consectetur\n" "adipiscing elit,\n" "\n" "\\item sed do eiusmod\n" "tempor incididunt ut\n" "labore et dolore\n" "magna aliqua.\n" "\n" "\\end{itemize}\n", "render document with wrapping"); free(latex); latex = cmark_render_latex(doc, CMARK_OPT_DEFAULT, 0); STR_EQ(runner, latex, "foo \\emph{bar} \\$\\%\n" "\n" "\\begin{itemize}\n" "\\item Lorem ipsum dolor sit amet,\n" "consectetur adipiscing elit,\n" "\n" "\\item sed do eiusmod tempor incididunt\n" "ut labore et dolore magna aliqua.\n" "\n" "\\end{itemize}\n", "render document without wrapping"); free(latex); cmark_node_free(doc); } static void render_commonmark(test_batch_runner *runner) { char *commonmark; static const char markdown[] = "> \\- foo *bar* \\*bar\\*\n" "\n" "- Lorem ipsum dolor sit amet,\n" " consectetur adipiscing elit,\n" "- sed do eiusmod tempor incididunt\n" " ut labore et dolore magna aliqua.\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); commonmark = cmark_render_commonmark(doc, CMARK_OPT_DEFAULT, 26); STR_EQ(runner, commonmark, "> \\- foo *bar* \\*bar\\*\n" "\n" " - Lorem ipsum dolor sit\n" " amet, consectetur\n" " adipiscing elit,\n" " - sed do eiusmod tempor\n" " incididunt ut labore\n" " et dolore magna\n" " aliqua.\n", "render document with wrapping"); free(commonmark); commonmark = cmark_render_commonmark(doc, CMARK_OPT_DEFAULT, 0); STR_EQ(runner, commonmark, "> \\- foo *bar* \\*bar\\*\n" "\n" " - Lorem ipsum dolor sit amet,\n" " consectetur adipiscing elit,\n" " - sed do eiusmod tempor incididunt\n" " ut labore et dolore magna aliqua.\n", "render document without wrapping"); free(commonmark); cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(text, "Hi"); commonmark = cmark_render_commonmark(text, CMARK_OPT_DEFAULT, 0); STR_EQ(runner, commonmark, "Hi\n", "render single inline node"); free(commonmark); cmark_node_free(text); cmark_node_free(doc); } static void render_plaintext(test_batch_runner *runner) { char *plaintext; static const char markdown[] = "> \\- foo *bar* \\*bar\\*\n" "\n" "- Lorem ipsum dolor sit amet,\n" " consectetur adipiscing elit,\n" "- sed do eiusmod tempor incididunt\n" " ut labore et dolore magna aliqua.\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); plaintext = cmark_render_plaintext(doc, CMARK_OPT_DEFAULT, 26); STR_EQ(runner, plaintext, "- foo bar *bar*\n" "\n" " - Lorem ipsum dolor sit\n" " amet, consectetur\n" " adipiscing elit,\n" " - sed do eiusmod tempor\n" " incididunt ut labore\n" " et dolore magna\n" " aliqua.\n", "render document with wrapping"); free(plaintext); plaintext = cmark_render_plaintext(doc, CMARK_OPT_DEFAULT, 0); STR_EQ(runner, plaintext, "- foo bar *bar*\n" "\n" " - Lorem ipsum dolor sit amet,\n" " consectetur adipiscing elit,\n" " - sed do eiusmod tempor incididunt\n" " ut labore et dolore magna aliqua.\n", "render document without wrapping"); free(plaintext); cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(text, "Hi"); plaintext = cmark_render_plaintext(text, CMARK_OPT_DEFAULT, 0); STR_EQ(runner, plaintext, "Hi\n", "render single inline node"); free(plaintext); cmark_node_free(text); cmark_node_free(doc); } static void utf8(test_batch_runner *runner) { // Ranges test_char(runner, 1, "\x01", "valid utf8 01"); test_char(runner, 1, "\x7F", "valid utf8 7F"); test_char(runner, 0, "\x80", "invalid utf8 80"); test_char(runner, 0, "\xBF", "invalid utf8 BF"); test_char(runner, 0, "\xC0\x80", "invalid utf8 C080"); test_char(runner, 0, "\xC1\xBF", "invalid utf8 C1BF"); test_char(runner, 1, "\xC2\x80", "valid utf8 C280"); test_char(runner, 1, "\xDF\xBF", "valid utf8 DFBF"); test_char(runner, 0, "\xE0\x80\x80", "invalid utf8 E08080"); test_char(runner, 0, "\xE0\x9F\xBF", "invalid utf8 E09FBF"); test_char(runner, 1, "\xE0\xA0\x80", "valid utf8 E0A080"); test_char(runner, 1, "\xED\x9F\xBF", "valid utf8 ED9FBF"); test_char(runner, 0, "\xED\xA0\x80", "invalid utf8 EDA080"); test_char(runner, 0, "\xED\xBF\xBF", "invalid utf8 EDBFBF"); test_char(runner, 0, "\xF0\x80\x80\x80", "invalid utf8 F0808080"); test_char(runner, 0, "\xF0\x8F\xBF\xBF", "invalid utf8 F08FBFBF"); test_char(runner, 1, "\xF0\x90\x80\x80", "valid utf8 F0908080"); test_char(runner, 1, "\xF4\x8F\xBF\xBF", "valid utf8 F48FBFBF"); test_char(runner, 0, "\xF4\x90\x80\x80", "invalid utf8 F4908080"); test_char(runner, 0, "\xF7\xBF\xBF\xBF", "invalid utf8 F7BFBFBF"); test_char(runner, 0, "\xF8", "invalid utf8 F8"); test_char(runner, 0, "\xFF", "invalid utf8 FF"); // Incomplete byte sequences at end of input test_incomplete_char(runner, "\xE0\xA0", "invalid utf8 E0A0"); test_incomplete_char(runner, "\xF0\x90\x80", "invalid utf8 F09080"); // Invalid continuation bytes test_continuation_byte(runner, "\xC2\x80"); test_continuation_byte(runner, "\xE0\xA0\x80"); test_continuation_byte(runner, "\xF0\x90\x80\x80"); // Test string containing null character static const char string_with_null[] = "((((\0))))"; char *html = cmark_markdown_to_html( string_with_null, sizeof(string_with_null) - 1, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "

((((" UTF8_REPL "))))

\n", "utf8 with U+0000"); free(html); // Test NUL followed by newline static const char string_with_nul_lf[] = "```\n\0\n```\n"; html = cmark_markdown_to_html( string_with_nul_lf, sizeof(string_with_nul_lf) - 1, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "
\xef\xbf\xbd\n
\n", "utf8 with \\0\\n"); free(html); // Test byte-order marker static const char string_with_bom[] = "\xef\xbb\xbf# Hello\n"; html = cmark_markdown_to_html( string_with_bom, sizeof(string_with_bom) - 1, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "

Hello

\n", "utf8 with BOM"); free(html); } static void test_char(test_batch_runner *runner, int valid, const char *utf8, const char *msg) { char buf[20]; sprintf(buf, "((((%s))))", utf8); if (valid) { char expected[30]; sprintf(expected, "

((((%s))))

\n", utf8); test_md_to_html(runner, buf, expected, msg); } else { test_md_to_html(runner, buf, "

((((" UTF8_REPL "))))

\n", msg); } } static void test_incomplete_char(test_batch_runner *runner, const char *utf8, const char *msg) { char buf[20]; sprintf(buf, "----%s", utf8); test_md_to_html(runner, buf, "

----" UTF8_REPL "

\n", msg); } static void test_continuation_byte(test_batch_runner *runner, const char *utf8) { size_t len = strlen(utf8); for (size_t pos = 1; pos < len; ++pos) { char buf[20]; sprintf(buf, "((((%s))))", utf8); buf[4 + pos] = '\x20'; char expected[50]; strcpy(expected, "

((((" UTF8_REPL "\x20"); for (size_t i = pos + 1; i < len; ++i) { strcat(expected, UTF8_REPL); } strcat(expected, "))))

\n"); char *html = cmark_markdown_to_html(buf, strlen(buf), CMARK_OPT_VALIDATE_UTF8); STR_EQ(runner, html, expected, "invalid utf8 continuation byte %d/%d", pos, len); free(html); } } static void line_endings(test_batch_runner *runner) { // Test list with different line endings static const char list_with_endings[] = "- a\n- b\r\n- c\r- d"; char *html = cmark_markdown_to_html( list_with_endings, sizeof(list_with_endings) - 1, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "
    \n
  • a
  • \n
  • b
  • \n
  • c
  • \n
  • d
  • \n
\n", "list with different line endings"); free(html); static const char crlf_lines[] = "line\r\nline\r\n"; html = cmark_markdown_to_html(crlf_lines, sizeof(crlf_lines) - 1, CMARK_OPT_DEFAULT | CMARK_OPT_HARDBREAKS); STR_EQ(runner, html, "

line
\nline

\n", "crlf endings with CMARK_OPT_HARDBREAKS"); free(html); html = cmark_markdown_to_html(crlf_lines, sizeof(crlf_lines) - 1, CMARK_OPT_DEFAULT | CMARK_OPT_NOBREAKS); STR_EQ(runner, html, "

line line

\n", "crlf endings with CMARK_OPT_NOBREAKS"); free(html); static const char no_line_ending[] = "```\nline\n```"; html = cmark_markdown_to_html(no_line_ending, sizeof(no_line_ending) - 1, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "
line\n
\n", "fenced code block with no final newline"); free(html); } static void numeric_entities(test_batch_runner *runner) { test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0"); test_md_to_html(runner, "퟿", "

\xED\x9F\xBF

\n", "Valid numeric entity 0xD7FF"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0xD800"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0xDFFF"); test_md_to_html(runner, "", "

\xEE\x80\x80

\n", "Valid numeric entity 0xE000"); test_md_to_html(runner, "􏿿", "

\xF4\x8F\xBF\xBF

\n", "Valid numeric entity 0x10FFFF"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0x110000"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0x80000000"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0xFFFFFFFF"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 99999999"); test_md_to_html(runner, "&#;", "

&#;

\n", "Min decimal entity length"); test_md_to_html(runner, "&#x;", "

&#x;

\n", "Min hexadecimal entity length"); test_md_to_html(runner, "�", "

&#999999999;

\n", "Max decimal entity length"); test_md_to_html(runner, "A", "

&#x000000041;

\n", "Max hexadecimal entity length"); } static void test_safe(test_batch_runner *runner) { // Test safe mode static const char raw_html[] = "
\nhi\n
\n\nhi\n[link](JAVAscript:alert('hi'))\n![image](" "file:my.js)\n"; char *html = cmark_markdown_to_html(raw_html, sizeof(raw_html) - 1, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "\n

hi\nlink\n\"image\"

\n", "input with raw HTML and dangerous links"); free(html); } static void test_md_to_html(test_batch_runner *runner, const char *markdown, const char *expected_html, const char *msg) { char *html = cmark_markdown_to_html(markdown, strlen(markdown), CMARK_OPT_VALIDATE_UTF8); STR_EQ(runner, html, expected_html, msg); free(html); } static void test_feed_across_line_ending(test_batch_runner *runner) { // See #117 cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); cmark_parser_feed(parser, "line1\r", 6); cmark_parser_feed(parser, "\nline2\r\n", 8); cmark_node *document = cmark_parser_finish(parser); OK(runner, document->first_child->next == NULL, "document has one paragraph"); cmark_parser_free(parser); cmark_node_free(document); } #if !defined(_WIN32) || defined(__CYGWIN__) # include static struct timeval _before, _after; static int _timing; # define START_TIMING() \ gettimeofday(&_before, NULL) # define END_TIMING() \ do { \ gettimeofday(&_after, NULL); \ _timing = (_after.tv_sec - _before.tv_sec) * 1000 + (_after.tv_usec - _before.tv_usec) / 1000; \ } while (0) # define TIMING _timing #else # define START_TIMING() # define END_TIMING() # define TIMING 0 #endif static void test_pathological_regressions(test_batch_runner *runner) { { // I don't care what the output is, so long as it doesn't take too long. char path[] = "[a](b"; char *input = (char *)calloc(1, (sizeof(path) - 1) * 50000); for (int i = 0; i < 50000; ++i) memcpy(input + i * (sizeof(path) - 1), path, sizeof(path) - 1); START_TIMING(); char *html = cmark_markdown_to_html(input, (sizeof(path) - 1) * 50000, CMARK_OPT_VALIDATE_UTF8); END_TIMING(); free(html); free(input); OK(runner, TIMING < 1000, "takes less than 1000ms to run"); } { char path[] = "[a](\n" "there `hi` -- [okay](www.google.com (ok)).\n" "\n" "> 1. Okay.\n" "> Sure.\n" ">\n" "> 2. Yes, okay.\n" "> ![ok](hi \"yes\")\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); STR_EQ(runner, xml, "\n" "\n" "\n" " \n" " Hi \n" " \n" " there\n" " \n" " .\n" " \n" " \n" " Hello \xe2\x80\x9c \n" " \n" " http://www.google.com\n" " \n" " \n" " there \n" " hi\n" " -- \n" " \n" " okay\n" " \n" " .\n" " \n" " \n" " \n" " \n" " \n" " Okay.\n" " \n" " Sure.\n" " \n" " \n" " \n" " \n" " Yes, okay.\n" " \n" " \n" " ok\n" " \n" " \n" " \n" " \n" " \n" "\n", "sourcepos are as expected"); free(xml); cmark_node_free(doc); } static void source_pos_inlines(test_batch_runner *runner) { { static const char markdown[] = "*first*\n" "second\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); STR_EQ(runner, xml, "\n" "\n" "\n" " \n" " \n" " first\n" " \n" " \n" " second\n" " \n" "\n", "sourcepos are as expected"); free(xml); cmark_node_free(doc); } { static const char markdown[] = "*first\n" "second*\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); STR_EQ(runner, xml, "\n" "\n" "\n" " \n" " \n" " first\n" " \n" " second\n" " \n" " \n" "\n", "sourcepos are as expected"); free(xml); cmark_node_free(doc); } } static void ref_source_pos(test_batch_runner *runner) { static const char markdown[] = "Let's try [reference] links.\n" "\n" "[reference]: https://github.com (GitHub)\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); STR_EQ(runner, xml, "\n" "\n" "\n" " \n" " Let's try \n" " \n" " reference\n" " \n" " links.\n" " \n" "\n", "sourcepos are as expected"); free(xml); cmark_node_free(doc); } int main() { int retval; test_batch_runner *runner = test_batch_runner_new(); version(runner); constructor(runner); accessors(runner); node_check(runner); iterator(runner); iterator_delete(runner); create_tree(runner); custom_nodes(runner); hierarchy(runner); parser(runner); render_html(runner); render_xml(runner); render_man(runner); render_latex(runner); render_commonmark(runner); render_plaintext(runner); utf8(runner); line_endings(runner); numeric_entities(runner); test_cplusplus(runner); test_safe(runner); test_feed_across_line_ending(runner); test_pathological_regressions(runner); source_pos(runner); source_pos_inlines(runner); ref_source_pos(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; free(runner); return retval; } cmarkgfm/third_party/cmark/api_test/harness.c0000644000175000017500000000515514210444464021603 0ustar carstencarsten#define _DEFAULT_SOURCE #include #include #include #include #include "harness.h" test_batch_runner *test_batch_runner_new() { return (test_batch_runner *)calloc(1, sizeof(test_batch_runner)); } static void test_result(test_batch_runner *runner, int cond, const char *msg, va_list ap) { ++runner->test_num; if (cond) { ++runner->num_passed; } else { fprintf(stderr, "FAILED test %d: ", runner->test_num); vfprintf(stderr, msg, ap); fprintf(stderr, "\n"); ++runner->num_failed; } } void SKIP(test_batch_runner *runner, int num_tests) { runner->test_num += num_tests; runner->num_skipped += num_tests; } void OK(test_batch_runner *runner, int cond, const char *msg, ...) { va_list ap; va_start(ap, msg); test_result(runner, cond, msg, ap); va_end(ap); } void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg, ...) { int cond = got == expected; va_list ap; va_start(ap, msg); test_result(runner, cond, msg, ap); va_end(ap); if (!cond) { fprintf(stderr, " Got: %d\n", got); fprintf(stderr, " Expected: %d\n", expected); } } #ifndef _WIN32 #include static char *write_tmp(char const *header, char const *data) { char *name = strdup("/tmp/fileXXXXXX"); int fd = mkstemp(name); FILE *f = fdopen(fd, "w+"); fputs(header, f); fwrite(data, 1, strlen(data), f); fclose(f); return name; } #endif void STR_EQ(test_batch_runner *runner, const char *got, const char *expected, const char *msg, ...) { int cond = strcmp(got, expected) == 0; va_list ap; va_start(ap, msg); test_result(runner, cond, msg, ap); va_end(ap); if (!cond) { #ifndef _WIN32 char *got_fn = write_tmp("actual\n", got); char *expected_fn = write_tmp("expected\n", expected); char buf[1024]; snprintf(buf, sizeof(buf), "git diff --no-index %s %s", expected_fn, got_fn); system(buf); remove(got_fn); remove(expected_fn); free(got_fn); free(expected_fn); #else fprintf(stderr, " Got: \"%s\"\n", got); fprintf(stderr, " Expected: \"%s\"\n", expected); #endif } } int test_ok(test_batch_runner *runner) { return runner->num_failed == 0; } void test_print_summary(test_batch_runner *runner) { int num_passed = runner->num_passed; int num_skipped = runner->num_skipped; int num_failed = runner->num_failed; fprintf(stderr, "%d tests passed, %d failed, %d skipped\n", num_passed, num_failed, num_skipped); if (test_ok(runner)) { fprintf(stderr, "PASS\n"); } else { fprintf(stderr, "FAIL\n"); } } cmarkgfm/third_party/cmark/api_test/cplusplus.cpp0000644000175000017500000000054314210444464022526 0ustar carstencarsten#include #include "cmark-gfm.h" #include "cplusplus.h" #include "harness.h" void test_cplusplus(test_batch_runner *runner) { static const char md[] = "paragraph\n"; char *html = cmark_markdown_to_html(md, sizeof(md) - 1, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "

paragraph

\n", "libcmark works with C++"); free(html); } cmarkgfm/third_party/cmark/COPYING0000644000175000017500000001661714210444464017224 0ustar carstencarstenCopyright (c) 2014, John MacFarlane All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----- houdini.h, houdini_href_e.c, houdini_html_e.c, houdini_html_u.c derive from https://github.com/vmg/houdini (with some modifications) Copyright (C) 2012 Vicent Martí Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----- buffer.h, buffer.c, chunk.h are derived from code (C) 2012 Github, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----- utf8.c and utf8.c are derived from utf8proc (), (C) 2009 Public Software Group e. V., Berlin, Germany. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----- The normalization code in normalize.py was derived from the markdowntest project, Copyright 2013 Karl Dubost: The MIT License (MIT) Copyright (c) 2013 Karl Dubost Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----- The CommonMark spec (test/spec.txt) is Copyright (C) 2014-15 John MacFarlane Released under the Creative Commons CC-BY-SA 4.0 license: . ----- The test software in test/ is Copyright (c) 2014, John MacFarlane All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmarkgfm/third_party/cmark/nmake.bat0000644000175000017500000000005114210444464017735 0ustar carstencarsten@nmake.exe /nologo /f Makefile.nmake %* cmarkgfm/third_party/cmark/FindAsan.cmake0000644000175000017500000000623214210444464020646 0ustar carstencarsten# # The MIT License (MIT) # # Copyright (c) 2013 Matthew Arsenault # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # This module tests if address sanitizer is supported by the compiler, # and creates a ASan build type (i.e. set CMAKE_BUILD_TYPE=ASan to use # it). This sets the following variables: # # CMAKE_C_FLAGS_ASAN - Flags to use for C with asan # CMAKE_CXX_FLAGS_ASAN - Flags to use for C++ with asan # HAVE_ADDRESS_SANITIZER - True or false if the ASan build type is available include(CheckCCompilerFlag) # Set -Werror to catch "argument unused during compilation" warnings set(CMAKE_REQUIRED_FLAGS "-Werror -faddress-sanitizer") # Also needs to be a link flag for test to pass check_c_compiler_flag("-faddress-sanitizer" HAVE_FLAG_ADDRESS_SANITIZER) set(CMAKE_REQUIRED_FLAGS "-Werror -fsanitize=address") # Also needs to be a link flag for test to pass check_c_compiler_flag("-fsanitize=address" HAVE_FLAG_SANITIZE_ADDRESS) unset(CMAKE_REQUIRED_FLAGS) if(HAVE_FLAG_SANITIZE_ADDRESS) # Clang 3.2+ use this version set(ADDRESS_SANITIZER_FLAG "-fsanitize=address") elseif(HAVE_FLAG_ADDRESS_SANITIZER) # Older deprecated flag for ASan set(ADDRESS_SANITIZER_FLAG "-faddress-sanitizer") endif() if(NOT ADDRESS_SANITIZER_FLAG) return() else(NOT ADDRESS_SANITIZER_FLAG) set(HAVE_ADDRESS_SANITIZER FALSE) endif() set(HAVE_ADDRESS_SANITIZER TRUE) set(CMAKE_C_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls" CACHE STRING "Flags used by the C compiler during ASan builds." FORCE) set(CMAKE_CXX_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls" CACHE STRING "Flags used by the C++ compiler during ASan builds." FORCE) set(CMAKE_EXE_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}" CACHE STRING "Flags used for linking binaries during ASan builds." FORCE) set(CMAKE_SHARED_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}" CACHE STRING "Flags used by the shared libraries linker during ASan builds." FORCE) mark_as_advanced(CMAKE_C_FLAGS_ASAN CMAKE_CXX_FLAGS_ASAN CMAKE_EXE_LINKER_FLAGS_ASAN CMAKE_SHARED_LINKER_FLAGS_ASAN) cmarkgfm/third_party/cmark/Makefile0000644000175000017500000001601514210444464017621 0ustar carstencarstenSRCDIR=src EXTDIR=extensions DATADIR=data BUILDDIR?=build GENERATOR?=Unix Makefiles MINGW_BUILDDIR?=build-mingw MINGW_INSTALLDIR?=windows SPEC=test/spec.txt EXTENSIONS_SPEC=test/extensions.txt SITE=_site SPECVERSION=$(shell perl -ne 'print $$1 if /^version: *([0-9.]+)/' $(SPEC)) FUZZCHARS?=2000000 # for fuzztest BENCHDIR=bench BENCHSAMPLES=$(wildcard $(BENCHDIR)/samples/*.md) BENCHFILE=$(BENCHDIR)/benchinput.md ALLTESTS=alltests.md NUMRUNS?=20 CMARK=$(BUILDDIR)/src/cmark-gfm CMARK_FUZZ=$(BUILDDIR)/src/cmark-fuzz PROG?=$(CMARK) VERSION?=$(SPECVERSION) RELEASE?=CommonMark-$(VERSION) INSTALL_PREFIX?=/usr/local CLANG_CHECK?=clang-check CLANG_FORMAT=clang-format-3.5 -style llvm -sort-includes=0 -i AFL_PATH?=/usr/local/bin .PHONY: all cmake_build leakcheck clean fuzztest test debug ubsan asan mingw archive newbench bench format update-spec afl clang-check docker libFuzzer all: cmake_build man/man3/cmark-gfm.3 $(CMARK): cmake_build cmake_build: $(BUILDDIR) @$(MAKE) -j2 -C $(BUILDDIR) @echo "Binaries can be found in $(BUILDDIR)/src" $(BUILDDIR): @cmake --version > /dev/null || (echo "You need cmake to build this program: http://www.cmake.org/download/" && exit 1) mkdir -p $(BUILDDIR); \ cd $(BUILDDIR); \ cmake .. \ -G "$(GENERATOR)" \ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DCMAKE_INSTALL_PREFIX=$(INSTALL_PREFIX) \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON install: $(BUILDDIR) $(MAKE) -C $(BUILDDIR) install debug: mkdir -p $(BUILDDIR); \ cd $(BUILDDIR); \ cmake .. -DCMAKE_BUILD_TYPE=Debug; \ $(MAKE) ubsan: mkdir -p $(BUILDDIR); \ cd $(BUILDDIR); \ cmake .. -DCMAKE_BUILD_TYPE=Ubsan; \ $(MAKE) asan: mkdir -p $(BUILDDIR); \ cd $(BUILDDIR); \ cmake .. -DCMAKE_BUILD_TYPE=Asan; \ $(MAKE) prof: mkdir -p $(BUILDDIR); \ cd $(BUILDDIR); \ cmake .. -DCMAKE_BUILD_TYPE=Profile; \ $(MAKE) afl: @[ -n "$(AFL_PATH)" ] || { echo '$$AFL_PATH not set'; false; } mkdir -p $(BUILDDIR) cd $(BUILDDIR) && cmake .. -DCMARK_TESTS=0 -DCMAKE_C_COMPILER=$(AFL_PATH)/afl-clang $(MAKE) $(AFL_PATH)/afl-fuzz \ -i test/afl_test_cases \ -o test/afl_results \ -x test/fuzzing_dictionary \ $(AFL_OPTIONS) \ -t 100 \ $(CMARK) -e table -e strikethrough -e autolink -e tagfilter $(CMARK_OPTS) libFuzzer: @[ -n "$(LIB_FUZZER_PATH)" ] || { echo '$$LIB_FUZZER_PATH not set'; false; } mkdir -p $(BUILDDIR) cd $(BUILDDIR) && cmake -DCMAKE_BUILD_TYPE=Asan -DCMARK_LIB_FUZZER=ON -DCMAKE_LIB_FUZZER_PATH=$(LIB_FUZZER_PATH) .. $(MAKE) -j2 -C $(BUILDDIR) cmark-fuzz test/run-cmark-fuzz $(CMARK_FUZZ) clang-check: all ${CLANG_CHECK} -p build -analyze src/*.c mingw: mkdir -p $(MINGW_BUILDDIR); \ cd $(MINGW_BUILDDIR); \ cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchain-mingw32.cmake -DCMAKE_INSTALL_PREFIX=$(MINGW_INSTALLDIR) ;\ $(MAKE) && $(MAKE) install man/man3/cmark-gfm.3: src/cmark-gfm.h | $(CMARK) python man/make_man_page.py $< > $@ \ archive: git archive --prefix=$(RELEASE)/ -o $(RELEASE).tar.gz HEAD git archive --prefix=$(RELEASE)/ -o $(RELEASE).zip HEAD clean: rm -rf $(BUILDDIR) $(MINGW_BUILDDIR) $(MINGW_INSTALLDIR) # We include case_fold_switch.inc in the repository, so this shouldn't # normally need to be generated. $(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding.txt perl tools/mkcasefold.pl < $< > $@ # We include scanners.c in the repository, so this shouldn't # normally need to be generated. $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re @case "$$(re2c -v)" in \ *\ 0.13.*|*\ 0.14|*\ 0.14.1) \ echo "re2c >= 0.14.2 is required"; \ false; \ ;; \ esac re2c -W -Werror --case-insensitive -b -i --no-generation-date -8 \ --encoding-policy substitute -o $@ $< $(CLANG_FORMAT) $@ # We include scanners.c in the repository, so this shouldn't # normally need to be generated. $(EXTDIR)/ext_scanners.c: $(EXTDIR)/ext_scanners.re @case "$$(re2c -v)" in \ *\ 0.13.*|*\ 0.14|*\ 0.14.1) \ echo "re2c >= 0.14.2 is required"; \ false; \ ;; \ esac re2c --case-insensitive -b -i --no-generation-date -8 \ --encoding-policy substitute -o $@ $< clang-format-3.5 -style llvm -i $@ # We include entities.inc in the repository, so normally this # doesn't need to be regenerated: $(SRCDIR)/entities.inc: tools/make_entities_inc.py python3 $< > $@ update-spec: curl 'https://raw.githubusercontent.com/jgm/CommonMark/master/spec.txt'\ > $(SPEC) test: $(SPEC) cmake_build $(MAKE) -C $(BUILDDIR) test || (cat $(BUILDDIR)/Testing/Temporary/LastTest.log && exit 1) $(ALLTESTS): $(SPEC) $(EXTENSIONS_SPEC) ( \ python3 test/spec_tests.py --spec $(SPEC) --dump-tests | \ python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); u8s = open(1, "w", encoding="utf-8", closefd=False); print("\n".join([test["markdown"] for test in tests]), file=u8s)'; \ python3 test/spec_tests.py --spec $(EXTENSIONS_SPEC) --dump-tests | \ python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); u8s = open(1, "w", encoding="utf-8", closefd=False); print("\n".join([test["markdown"] for test in tests]), file=u8s)'; \ ) > $@ leakcheck: $(ALLTESTS) for format in html man xml latex commonmark; do \ for opts in "" "--smart"; do \ echo "cmark-gfm -t $$format -e table -e strikethrough -e autolink -e tagfilter $$opts" ; \ valgrind -q --leak-check=full --dsymutil=yes --suppressions=suppressions --error-exitcode=1 $(PROG) -t $$format -e table -e strikethrough -e autolink -e tagfilter $$opts $(ALLTESTS) >/dev/null || exit 1;\ done; \ done; fuzztest: { for i in `seq 1 10`; do \ cat /dev/urandom | head -c $(FUZZCHARS) | iconv -f latin1 -t utf-8 | tee fuzz-$$i.txt | \ /usr/bin/env time -p $(PROG) >/dev/null && rm fuzz-$$i.txt ; \ done } 2>&1 | grep 'user\|abnormally' progit: git clone https://github.com/progit/progit.git $(BENCHFILE): progit echo "" > $@ for lang in ar az be ca cs de en eo es es-ni fa fi fr hi hu id it ja ko mk nl no-nb pl pt-br ro ru sr th tr uk vi zh zh-tw; do \ for i in `seq 1 10`; do \ cat progit/$$lang/*/*.markdown >> $@; \ done; \ done # for more accurate results, run with # sudo renice -10 $$; make bench bench: $(BENCHFILE) { for x in `seq 1 $(NUMRUNS)` ; do \ /usr/bin/env time -p $(PROG) /dev/null ; \ /usr/bin/env time -p $(PROG) $< >/dev/null ; \ done \ } 2>&1 | grep 'real' | awk '{print $$2}' | python3 'bench/stats.py' newbench: for f in $(BENCHSAMPLES) ; do \ printf "%26s " `basename $$f` ; \ { for x in `seq 1 $(NUMRUNS)` ; do \ /usr/bin/env time -p $(PROG) /dev/null ; \ for x in `seq 1 200` ; do cat $$f ; done | \ /usr/bin/env time -p $(PROG) > /dev/null; \ done \ } 2>&1 | grep 'real' | awk '{print $$2}' | \ python3 'bench/stats.py'; done format: $(CLANG_FORMAT) src/*.c src/*.h api_test/*.c api_test/*.h format-extensions: clang-format-3.5 -style llvm -i extensions/*.c extensions/*.h operf: $(CMARK) operf $< < $(BENCHFILE) > /dev/null distclean: clean -rm -rf *.dSYM -rm -f README.html -rm -rf $(BENCHFILE) $(ALLTESTS) progit docker: docker build -t cmark-gfm $(CURDIR)/tools docker run --privileged -t -i -v $(CURDIR):/src/cmark-gfm -w /src/cmark-gfm cmark-gfm /bin/bash cmarkgfm/third_party/cmark/appveyor.yml0000644000175000017500000000056214210444464020551 0ustar carstencarstenenvironment: PYTHON: "C:\\Python34-x64" PYTHON_VERSION: "3.4.3" PYTHON_ARCH: "64" matrix: - MSVC_VERSION: 10 - MSVC_VERSION: 12 # set up for nmake: install: - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" build_script: - 'tools\appveyor-build.bat' artifacts: - path: build/src/cmark-gfm.exe name: cmark-gfm.exe test_script: - 'nmake test' cmarkgfm/third_party/cmark/README.md0000644000175000017500000001625114210444464017442 0ustar carstencarstencmark-gfm ========= ![Actions CI](https://github.com/github/cmark-gfm/actions/workflows/ci.yml/badge.svg) `cmark-gfm` is an extended version of the C reference implementation of [CommonMark], a rationalized version of Markdown syntax with a spec. This repository adds GitHub Flavored Markdown extensions to [the upstream implementation], as defined in [the spec]. The rest of the README is preserved as-is from the upstream source. Note that the library and binaries produced by this fork are suffixed with `-gfm` in order to distinguish them from the upstream. --- It provides a shared library (`libcmark`) with functions for parsing CommonMark documents to an abstract syntax tree (AST), manipulating the AST, and rendering the document to HTML, groff man, LaTeX, CommonMark, or an XML representation of the AST. It also provides a command-line program (`cmark`) for parsing and rendering CommonMark documents. Advantages of this library: - **Portable.** The library and program are written in standard C99 and have no external dependencies. They have been tested with MSVC, gcc, tcc, and clang. - **Fast.** cmark can render a Markdown version of *War and Peace* in the blink of an eye (127 milliseconds on a ten year old laptop, vs. 100-400 milliseconds for an eye blink). In our [benchmarks], cmark is 10,000 times faster than the original `Markdown.pl`, and on par with the very fastest available Markdown processors. - **Accurate.** The library passes all CommonMark conformance tests. - **Standardized.** The library can be expected to parse CommonMark the same way as any other conforming parser. So, for example, you can use `commonmark.js` on the client to preview content that will be rendered on the server using `cmark`. - **Robust.** The library has been extensively fuzz-tested using [american fuzzy lop]. The test suite includes pathological cases that bring many other Markdown parsers to a crawl (for example, thousands-deep nested bracketed text or block quotes). - **Flexible.** CommonMark input is parsed to an AST which can be manipulated programmatically prior to rendering. - **Multiple renderers.** Output in HTML, groff man, LaTeX, CommonMark, and a custom XML format is supported. And it is easy to write new renderers to support other formats. - **Free.** BSD2-licensed. It is easy to use `libcmark` in python, lua, ruby, and other dynamic languages: see the `wrappers/` subdirectory for some simple examples. There are also libraries that wrap `libcmark` for [Go](https://github.com/rhinoman/go-commonmark), [Haskell](https://hackage.haskell.org/package/cmark), [Ruby](https://github.com/gjtorikian/commonmarker), [Lua](https://github.com/jgm/cmark-lua), [Perl](https://metacpan.org/release/CommonMark), [Python](https://pypi.python.org/pypi/paka.cmark), [R](https://cran.r-project.org/package=commonmark), [Tcl](https://github.com/apnadkarni/tcl-cmark), [Scala](https://github.com/sparsetech/cmark-scala) and [Node.js](https://github.com/killa123/node-cmark). Installing ---------- Building the C program (`cmark`) and shared library (`libcmark`) requires [cmake]. If you modify `scanners.re`, then you will also need [re2c] \(>= 0.14.2\), which is used to generate `scanners.c` from `scanners.re`. We have included a pre-generated `scanners.c` in the repository to reduce build dependencies. If you have GNU make, you can simply `make`, `make test`, and `make install`. This calls [cmake] to create a `Makefile` in the `build` directory, then uses that `Makefile` to create the executable and library. The binaries can be found in `build/src`. The default installation prefix is `/usr/local`. To change the installation prefix, pass the `INSTALL_PREFIX` variable if you run `make` for the first time: `make INSTALL_PREFIX=path`. For a more portable method, you can use [cmake] manually. [cmake] knows how to create build environments for many build systems. For example, on FreeBSD: mkdir build cd build cmake .. # optionally: -DCMAKE_INSTALL_PREFIX=path make # executable will be created as build/src/cmark make test make install Or, to create Xcode project files on OSX: mkdir build cd build cmake -G Xcode .. open cmark.xcodeproj The GNU Makefile also provides a few other targets for developers. To run a benchmark: make bench For more detailed benchmarks: make newbench To run a test for memory leaks using `valgrind`: make leakcheck To reformat source code using `clang-format`: make format To run a "fuzz test" against ten long randomly generated inputs: make fuzztest To do a more systematic fuzz test with [american fuzzy lop]: AFL_PATH=/path/to/afl_directory make afl Fuzzing with [libFuzzer] is also supported but, because libFuzzer is still under active development, may not work with your system-installed version of clang. Assuming LLVM has been built in `$HOME/src/llvm/build` the fuzzer can be run with: CC="$HOME/src/llvm/build/bin/clang" LIB_FUZZER_PATH="$HOME/src/llvm/lib/Fuzzer/libFuzzer.a" make libFuzzer To make a release tarball and zip archive: make archive Installing (Windows) -------------------- To compile with MSVC and NMAKE: nmake You can cross-compile a Windows binary and dll on linux if you have the `mingw32` compiler: make mingw The binaries will be in `build-mingw/windows/bin`. Usage ----- Instructions for the use of the command line program and library can be found in the man pages in the `man` subdirectory. Security -------- By default, the library will scrub raw HTML and potentially dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`). To allow these, use the option `CMARK_OPT_UNSAFE` (or `--unsafe`) with the command line program. If doing so, we recommend you use a HTML sanitizer specific to your needs to protect against [XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting). Contributing ------------ There is a [forum for discussing CommonMark](http://talk.commonmark.org); you should use it instead of github issues for questions and possibly open-ended discussions. Use the [github issue tracker](http://github.com/commonmark/CommonMark/issues) only for simple, clear, actionable issues. Authors ------- John MacFarlane wrote the original library and program. The block parsing algorithm was worked out together with David Greenspan. Vicent Marti optimized the C implementation for performance, increasing its speed tenfold. Kārlis Gaņģis helped work out a better parsing algorithm for links and emphasis, eliminating several worst-case performance issues. Nick Wellnhofer contributed many improvements, including most of the C library's API and its test harness. [benchmarks]: benchmarks.md [the spec]: https://github.github.com/gfm/ [the upstream implementation]: https://github.com/jgm/cmark [CommonMark]: http://commonmark.org [cmake]: http://www.cmake.org/download/ [re2c]: http://re2c.org [commonmark.js]: https://github.com/commonmark/commonmark.js [Build Status]: https://img.shields.io/travis/github/cmark-gfm/master.svg?style=flat [Windows Build Status]: https://ci.appveyor.com/api/projects/status/wv7ifhqhv5itm3d5?svg=true [american fuzzy lop]: http://lcamtuf.coredump.cx/afl/ [libFuzzer]: http://llvm.org/docs/LibFuzzer.html cmarkgfm/third_party/cmark/.editorconfig0000644000175000017500000000040114210444464020626 0ustar carstencarsten# editorconfig.org root = true [*] end_of_line = lf charset = utf-8 insert_final_newline = true [*.{c,h}] trim_trailing_whitespace = true indent_style = space indent_size = 2 [Makefile] trim_trailing_whitespace = true indent_style = tab indent_size = 8 cmarkgfm/third_party/cmark/test/0000755000175000017500000000000014210444464017135 5ustar carstencarstencmarkgfm/third_party/cmark/test/cmark-fuzz.c0000644000175000017500000000344414210444464021377 0ustar carstencarsten#include #include #include #include "cmark-gfm.h" #include "cmark-gfm-core-extensions.h" const char *extension_names[] = { "autolink", "strikethrough", "table", "tagfilter", NULL, }; int LLVMFuzzerInitialize(int *argc, char ***argv) { cmark_gfm_core_extensions_ensure_registered(); return 0; } int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { struct __attribute__((packed)) { int options; int width; } fuzz_config; if (size >= sizeof(fuzz_config)) { /* The beginning of `data` is treated as fuzzer configuration */ memcpy(&fuzz_config, data, sizeof(fuzz_config)); /* Remainder of input is the markdown */ const char *markdown = (const char *)(data + sizeof(fuzz_config)); const size_t markdown_size = size - sizeof(fuzz_config); cmark_parser *parser = cmark_parser_new(fuzz_config.options); for (const char **it = extension_names; *it; ++it) { const char *extension_name = *it; cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(extension_name); if (!syntax_extension) { fprintf(stderr, "%s is not a valid syntax extension\n", extension_name); abort(); } cmark_parser_attach_syntax_extension(parser, syntax_extension); } cmark_parser_feed(parser, markdown, markdown_size); cmark_node *doc = cmark_parser_finish(parser); free(cmark_render_commonmark(doc, fuzz_config.options, fuzz_config.width)); free(cmark_render_html(doc, fuzz_config.options, NULL)); free(cmark_render_latex(doc, fuzz_config.options, fuzz_config.width)); free(cmark_render_man(doc, fuzz_config.options, fuzz_config.width)); free(cmark_render_xml(doc, fuzz_config.options)); cmark_node_free(doc); cmark_parser_free(parser); } return 0; } cmarkgfm/third_party/cmark/test/spec.txt0000644000175000017500000064774214210444464020654 0ustar carstencarsten--- title: GitHub Flavored Markdown Spec version: 0.29 date: '2019-04-06' license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' ... # Introduction ## What is GitHub Flavored Markdown? GitHub Flavored Markdown, often shortened as GFM, is the dialect of Markdown that is currently supported for user content on GitHub.com and GitHub Enterprise. This formal specification, based on the CommonMark Spec, defines the syntax and semantics of this dialect. GFM is a strict superset of CommonMark. All the features which are supported in GitHub user content and that are not specified on the original CommonMark Spec are hence known as **extensions**, and highlighted as such. While GFM supports a wide range of inputs, it's worth noting that GitHub.com and GitHub Enterprise perform additional post-processing and sanitization after GFM is converted to HTML to ensure security and consistency of the website. ## What is Markdown? Markdown is a plain text format for writing structured documents, based on conventions for indicating formatting in email and usenet posts. It was developed by John Gruber (with help from Aaron Swartz) and released in 2004 in the form of a [syntax description](http://daringfireball.net/projects/markdown/syntax) and a Perl script (`Markdown.pl`) for converting Markdown to HTML. In the next decade, dozens of implementations were developed in many languages. Some extended the original Markdown syntax with conventions for footnotes, tables, and other document elements. Some allowed Markdown documents to be rendered in formats other than HTML. Websites like Reddit, StackOverflow, and GitHub had millions of people using Markdown. And Markdown started to be used beyond the web, to author books, articles, slide shows, letters, and lecture notes. What distinguishes Markdown from many other lightweight markup syntaxes, which are often easier to write, is its readability. As Gruber writes: > The overriding design goal for Markdown's formatting syntax is > to make it as readable as possible. The idea is that a > Markdown-formatted document should be publishable as-is, as > plain text, without looking like it's been marked up with tags > or formatting instructions. > () The point can be illustrated by comparing a sample of [AsciiDoc](http://www.methods.co.nz/asciidoc/) with an equivalent sample of Markdown. Here is a sample of AsciiDoc from the AsciiDoc manual: ``` 1. List item one. + List item one continued with a second paragraph followed by an Indented block. + ................. $ ls *.sh $ mv *.sh ~/tmp ................. + List item continued with a third paragraph. 2. List item two continued with an open block. + -- This paragraph is part of the preceding list item. a. This list is nested and does not require explicit item continuation. + This paragraph is part of the preceding list item. b. List item b. This paragraph belongs to item two of the outer list. -- ``` And here is the equivalent in Markdown: ``` 1. List item one. List item one continued with a second paragraph followed by an Indented block. $ ls *.sh $ mv *.sh ~/tmp List item continued with a third paragraph. 2. List item two continued with an open block. This paragraph is part of the preceding list item. 1. This list is nested and does not require explicit item continuation. This paragraph is part of the preceding list item. 2. List item b. This paragraph belongs to item two of the outer list. ``` The AsciiDoc version is, arguably, easier to write. You don't need to worry about indentation. But the Markdown version is much easier to read. The nesting of list items is apparent to the eye in the source, not just in the processed document. ## Why is a spec needed? John Gruber's [canonical description of Markdown's syntax](http://daringfireball.net/projects/markdown/syntax) does not specify the syntax unambiguously. Here are some examples of questions it does not answer: 1. How much indentation is needed for a sublist? The spec says that continuation paragraphs need to be indented four spaces, but is not fully explicit about sublists. It is natural to think that they, too, must be indented four spaces, but `Markdown.pl` does not require that. This is hardly a "corner case," and divergences between implementations on this issue often lead to surprises for users in real documents. (See [this comment by John Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) 2. Is a blank line needed before a block quote or heading? Most implementations do not require the blank line. However, this can lead to unexpected results in hard-wrapped text, and also to ambiguities in parsing (note that some implementations put the heading inside the blockquote, while others do not). (John Gruber has also spoken [in favor of requiring the blank lines](http://article.gmane.org/gmane.text.markdown.general/2146).) 3. Is a blank line needed before an indented code block? (`Markdown.pl` requires it, but this is not mentioned in the documentation, and some implementations do not require it.) ``` markdown paragraph code? ``` 4. What is the exact rule for determining when list items get wrapped in `

` tags? Can a list be partially "loose" and partially "tight"? What should we do with a list like this? ``` markdown 1. one 2. two 3. three ``` Or this? ``` markdown 1. one - a - b 2. two ``` (There are some relevant comments by John Gruber [here](http://article.gmane.org/gmane.text.markdown.general/2554).) 5. Can list markers be indented? Can ordered list markers be right-aligned? ``` markdown 8. item 1 9. item 2 10. item 2a ``` 6. Is this one list with a thematic break in its second item, or two lists separated by a thematic break? ``` markdown * a * * * * * * b ``` 7. When list markers change from numbers to bullets, do we have two lists or one? (The Markdown syntax description suggests two, but the perl scripts and many other implementations produce one.) ``` markdown 1. fee 2. fie - foe - fum ``` 8. What are the precedence rules for the markers of inline structure? For example, is the following a valid link, or does the code span take precedence ? ``` markdown [a backtick (`)](/url) and [another backtick (`)](/url). ``` 9. What are the precedence rules for markers of emphasis and strong emphasis? For example, how should the following be parsed? ``` markdown *foo *bar* baz* ``` 10. What are the precedence rules between block-level and inline-level structure? For example, how should the following be parsed? ``` markdown - `a long code span can contain a hyphen like this - and it can screw things up` ``` 11. Can list items include section headings? (`Markdown.pl` does not allow this, but does allow blockquotes to include headings.) ``` markdown - # Heading ``` 12. Can list items be empty? ``` markdown * a * * b ``` 13. Can link references be defined inside block quotes or list items? ``` markdown > Blockquote [foo]. > > [foo]: /url ``` 14. If there are multiple definitions for the same reference, which takes precedence? ``` markdown [foo]: /url1 [foo]: /url2 [foo][] ``` In the absence of a spec, early implementers consulted `Markdown.pl` to resolve these ambiguities. But `Markdown.pl` was quite buggy, and gave manifestly bad results in many cases, so it was not a satisfactory replacement for a spec. Because there is no unambiguous spec, implementations have diverged considerably. As a result, users are often surprised to find that a document that renders one way on one system (say, a GitHub wiki) renders differently on another (say, converting to docbook using pandoc). To make matters worse, because nothing in Markdown counts as a "syntax error," the divergence often isn't discovered right away. ## About this document This document attempts to specify Markdown syntax unambiguously. It contains many examples with side-by-side Markdown and HTML. These are intended to double as conformance tests. An accompanying script `spec_tests.py` can be used to run the tests against any Markdown program: python test/spec_tests.py --spec spec.txt --program PROGRAM Since this document describes how Markdown is to be parsed into an abstract syntax tree, it would have made sense to use an abstract representation of the syntax tree instead of HTML. But HTML is capable of representing the structural distinctions we need to make, and the choice of HTML for the tests makes it possible to run the tests against an implementation without writing an abstract syntax tree renderer. This document is generated from a text file, `spec.txt`, written in Markdown with a small extension for the side-by-side tests. The script `tools/makespec.py` can be used to convert `spec.txt` into HTML or CommonMark (which can then be converted into other formats). In the examples, the `→` character is used to represent tabs. # Preliminaries ## Characters and lines Any sequence of [characters] is a valid CommonMark document. A [character](@) is a Unicode code point. Although some code points (for example, combining accents) do not correspond to characters in an intuitive sense, all code points count as characters for purposes of this spec. This spec does not specify an encoding; it thinks of lines as composed of [characters] rather than bytes. A conforming parser may be limited to a certain encoding. A [line](@) is a sequence of zero or more [characters] other than newline (`U+000A`) or carriage return (`U+000D`), followed by a [line ending] or by the end of file. A [line ending](@) is a newline (`U+000A`), a carriage return (`U+000D`) not followed by a newline, or a carriage return and a following newline. A line containing no characters, or a line containing only spaces (`U+0020`) or tabs (`U+0009`), is called a [blank line](@). The following definitions of character classes will be used in this spec: A [whitespace character](@) is a space (`U+0020`), tab (`U+0009`), newline (`U+000A`), line tabulation (`U+000B`), form feed (`U+000C`), or carriage return (`U+000D`). [Whitespace](@) is a sequence of one or more [whitespace characters]. A [Unicode whitespace character](@) is any code point in the Unicode `Zs` general category, or a tab (`U+0009`), carriage return (`U+000D`), newline (`U+000A`), or form feed (`U+000C`). [Unicode whitespace](@) is a sequence of one or more [Unicode whitespace characters]. A [space](@) is `U+0020`. A [non-whitespace character](@) is any character that is not a [whitespace character]. An [ASCII punctuation character](@) is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `-`, `.`, `/` (U+0021–2F), `:`, `;`, `<`, `=`, `>`, `?`, `@` (U+003A–0040), `[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060), `{`, `|`, `}`, or `~` (U+007B–007E). A [punctuation character](@) is an [ASCII punctuation character] or anything in the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. ## Tabs Tabs in lines are not expanded to [spaces]. However, in contexts where whitespace helps to define block structure, tabs behave as if they were replaced by spaces with a tab stop of 4 characters. Thus, for example, a tab can be used instead of four spaces in an indented code block. (Note, however, that internal tabs are passed through as literal tabs, not expanded to spaces.) ```````````````````````````````` example →foo→baz→→bim .

foo→baz→→bim
```````````````````````````````` ```````````````````````````````` example →foo→baz→→bim .
foo→baz→→bim
```````````````````````````````` ```````````````````````````````` example a→a ὐ→a .
a→a
ὐ→a
```````````````````````````````` In the following example, a continuation paragraph of a list item is indented with a tab; this has exactly the same effect as indentation with four spaces would: ```````````````````````````````` example - foo →bar .
  • foo

    bar

```````````````````````````````` ```````````````````````````````` example - foo →→bar .
  • foo

      bar
    
```````````````````````````````` Normally the `>` that begins a block quote may be followed optionally by a space, which is not considered part of the content. In the following case `>` is followed by a tab, which is treated as if it were expanded into three spaces. Since one of these spaces is considered part of the delimiter, `foo` is considered to be indented six spaces inside the block quote context, so we get an indented code block starting with two spaces. ```````````````````````````````` example >→→foo .
  foo
```````````````````````````````` ```````````````````````````````` example -→→foo .
  •   foo
    
```````````````````````````````` ```````````````````````````````` example foo →bar .
foo
bar
```````````````````````````````` ```````````````````````````````` example - foo - bar → - baz .
  • foo
    • bar
      • baz
```````````````````````````````` ```````````````````````````````` example #→Foo .

Foo

```````````````````````````````` ```````````````````````````````` example *→*→*→ .
```````````````````````````````` ## Insecure characters For security reasons, the Unicode character `U+0000` must be replaced with the REPLACEMENT CHARACTER (`U+FFFD`). # Blocks and inlines We can think of a document as a sequence of [blocks](@)---structural elements like paragraphs, block quotations, lists, headings, rules, and code blocks. Some blocks (like block quotes and list items) contain other blocks; others (like headings and paragraphs) contain [inline](@) content---text, links, emphasized text, images, code spans, and so on. ## Precedence Indicators of block structure always take precedence over indicators of inline structure. So, for example, the following is a list with two items, not a list with one item containing a code span: ```````````````````````````````` example - `one - two` .
  • `one
  • two`
```````````````````````````````` This means that parsing can proceed in two steps: first, the block structure of the document can be discerned; second, text lines inside paragraphs, headings, and other block constructs can be parsed for inline structure. The second step requires information about link reference definitions that will be available only at the end of the first step. Note that the first step requires processing lines in sequence, but the second can be parallelized, since the inline parsing of one block element does not affect the inline parsing of any other. ## Container blocks and leaf blocks We can divide blocks into two types: [container blocks](@), which can contain other blocks, and [leaf blocks](@), which cannot. # Leaf blocks This section describes the different kinds of leaf block that make up a Markdown document. ## Thematic breaks A line consisting of 0-3 spaces of indentation, followed by a sequence of three or more matching `-`, `_`, or `*` characters, each followed optionally by any number of spaces or tabs, forms a [thematic break](@). ```````````````````````````````` example *** --- ___ .


```````````````````````````````` Wrong characters: ```````````````````````````````` example +++ .

+++

```````````````````````````````` ```````````````````````````````` example === .

===

```````````````````````````````` Not enough characters: ```````````````````````````````` example -- ** __ .

-- ** __

```````````````````````````````` One to three spaces indent are allowed: ```````````````````````````````` example *** *** *** .


```````````````````````````````` Four spaces is too many: ```````````````````````````````` example *** .
***
```````````````````````````````` ```````````````````````````````` example Foo *** .

Foo ***

```````````````````````````````` More than three characters may be used: ```````````````````````````````` example _____________________________________ .
```````````````````````````````` Spaces are allowed between the characters: ```````````````````````````````` example - - - .
```````````````````````````````` ```````````````````````````````` example ** * ** * ** * ** .
```````````````````````````````` ```````````````````````````````` example - - - - .
```````````````````````````````` Spaces are allowed at the end: ```````````````````````````````` example - - - - .
```````````````````````````````` However, no other characters may occur in the line: ```````````````````````````````` example _ _ _ _ a a------ ---a--- .

_ _ _ _ a

a------

---a---

```````````````````````````````` It is required that all of the [non-whitespace characters] be the same. So, this is not a thematic break: ```````````````````````````````` example *-* .

-

```````````````````````````````` Thematic breaks do not need blank lines before or after: ```````````````````````````````` example - foo *** - bar .
  • foo

  • bar
```````````````````````````````` Thematic breaks can interrupt a paragraph: ```````````````````````````````` example Foo *** bar .

Foo


bar

```````````````````````````````` If a line of dashes that meets the above conditions for being a thematic break could also be interpreted as the underline of a [setext heading], the interpretation as a [setext heading] takes precedence. Thus, for example, this is a setext heading, not a paragraph followed by a thematic break: ```````````````````````````````` example Foo --- bar .

Foo

bar

```````````````````````````````` When both a thematic break and a list item are possible interpretations of a line, the thematic break takes precedence: ```````````````````````````````` example * Foo * * * * Bar .
  • Foo

  • Bar
```````````````````````````````` If you want a thematic break in a list item, use a different bullet: ```````````````````````````````` example - Foo - * * * .
  • Foo

```````````````````````````````` ## ATX headings An [ATX heading](@) consists of a string of characters, parsed as inline content, between an opening sequence of 1--6 unescaped `#` characters and an optional closing sequence of any number of unescaped `#` characters. The opening sequence of `#` characters must be followed by a [space] or by the end of line. The optional closing sequence of `#`s must be preceded by a [space] and may be followed by spaces only. The opening `#` character may be indented 0-3 spaces. The raw contents of the heading are stripped of leading and trailing spaces before being parsed as inline content. The heading level is equal to the number of `#` characters in the opening sequence. Simple headings: ```````````````````````````````` example # foo ## foo ### foo #### foo ##### foo ###### foo .

foo

foo

foo

foo

foo
foo
```````````````````````````````` More than six `#` characters is not a heading: ```````````````````````````````` example ####### foo .

####### foo

```````````````````````````````` At least one space is required between the `#` characters and the heading's contents, unless the heading is empty. Note that many implementations currently do not require the space. However, the space was required by the [original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py), and it helps prevent things like the following from being parsed as headings: ```````````````````````````````` example #5 bolt #hashtag .

#5 bolt

#hashtag

```````````````````````````````` This is not a heading, because the first `#` is escaped: ```````````````````````````````` example \## foo .

## foo

```````````````````````````````` Contents are parsed as inlines: ```````````````````````````````` example # foo *bar* \*baz\* .

foo bar *baz*

```````````````````````````````` Leading and trailing [whitespace] is ignored in parsing inline content: ```````````````````````````````` example # foo .

foo

```````````````````````````````` One to three spaces indentation are allowed: ```````````````````````````````` example ### foo ## foo # foo .

foo

foo

foo

```````````````````````````````` Four spaces are too much: ```````````````````````````````` example # foo .
# foo
```````````````````````````````` ```````````````````````````````` example foo # bar .

foo # bar

```````````````````````````````` A closing sequence of `#` characters is optional: ```````````````````````````````` example ## foo ## ### bar ### .

foo

bar

```````````````````````````````` It need not be the same length as the opening sequence: ```````````````````````````````` example # foo ################################## ##### foo ## .

foo

foo
```````````````````````````````` Spaces are allowed after the closing sequence: ```````````````````````````````` example ### foo ### .

foo

```````````````````````````````` A sequence of `#` characters with anything but [spaces] following it is not a closing sequence, but counts as part of the contents of the heading: ```````````````````````````````` example ### foo ### b .

foo ### b

```````````````````````````````` The closing sequence must be preceded by a space: ```````````````````````````````` example # foo# .

foo#

```````````````````````````````` Backslash-escaped `#` characters do not count as part of the closing sequence: ```````````````````````````````` example ### foo \### ## foo #\## # foo \# .

foo ###

foo ###

foo #

```````````````````````````````` ATX headings need not be separated from surrounding content by blank lines, and they can interrupt paragraphs: ```````````````````````````````` example **** ## foo **** .

foo


```````````````````````````````` ```````````````````````````````` example Foo bar # baz Bar foo .

Foo bar

baz

Bar foo

```````````````````````````````` ATX headings can be empty: ```````````````````````````````` example ## # ### ### .

```````````````````````````````` ## Setext headings A [setext heading](@) consists of one or more lines of text, each containing at least one [non-whitespace character], with no more than 3 spaces indentation, followed by a [setext heading underline]. The lines of text must be such that, were they not followed by the setext heading underline, they would be interpreted as a paragraph: they cannot be interpretable as a [code fence], [ATX heading][ATX headings], [block quote][block quotes], [thematic break][thematic breaks], [list item][list items], or [HTML block][HTML blocks]. A [setext heading underline](@) is a sequence of `=` characters or a sequence of `-` characters, with no more than 3 spaces indentation and any number of trailing spaces. If a line containing a single `-` can be interpreted as an empty [list items], it should be interpreted this way and not as a [setext heading underline]. The heading is a level 1 heading if `=` characters are used in the [setext heading underline], and a level 2 heading if `-` characters are used. The contents of the heading are the result of parsing the preceding lines of text as CommonMark inline content. In general, a setext heading need not be preceded or followed by a blank line. However, it cannot interrupt a paragraph, so when a setext heading comes after a paragraph, a blank line is needed between them. Simple examples: ```````````````````````````````` example Foo *bar* ========= Foo *bar* --------- .

Foo bar

Foo bar

```````````````````````````````` The content of the header may span more than one line: ```````````````````````````````` example Foo *bar baz* ==== .

Foo bar baz

```````````````````````````````` The contents are the result of parsing the headings's raw content as inlines. The heading's raw content is formed by concatenating the lines and removing initial and final [whitespace]. ```````````````````````````````` example Foo *bar baz*→ ==== .

Foo bar baz

```````````````````````````````` The underlining can be any length: ```````````````````````````````` example Foo ------------------------- Foo = .

Foo

Foo

```````````````````````````````` The heading content can be indented up to three spaces, and need not line up with the underlining: ```````````````````````````````` example Foo --- Foo ----- Foo === .

Foo

Foo

Foo

```````````````````````````````` Four spaces indent is too much: ```````````````````````````````` example Foo --- Foo --- .
Foo
---

Foo

```````````````````````````````` The setext heading underline can be indented up to three spaces, and may have trailing spaces: ```````````````````````````````` example Foo ---- .

Foo

```````````````````````````````` Four spaces is too much: ```````````````````````````````` example Foo --- .

Foo ---

```````````````````````````````` The setext heading underline cannot contain internal spaces: ```````````````````````````````` example Foo = = Foo --- - .

Foo = =

Foo


```````````````````````````````` Trailing spaces in the content line do not cause a line break: ```````````````````````````````` example Foo ----- .

Foo

```````````````````````````````` Nor does a backslash at the end: ```````````````````````````````` example Foo\ ---- .

Foo\

```````````````````````````````` Since indicators of block structure take precedence over indicators of inline structure, the following are setext headings: ```````````````````````````````` example `Foo ---- ` .

`Foo

`

<a title="a lot

of dashes"/>

```````````````````````````````` The setext heading underline cannot be a [lazy continuation line] in a list item or block quote: ```````````````````````````````` example > Foo --- .

Foo


```````````````````````````````` ```````````````````````````````` example > foo bar === .

foo bar ===

```````````````````````````````` ```````````````````````````````` example - Foo --- .
  • Foo

```````````````````````````````` A blank line is needed between a paragraph and a following setext heading, since otherwise the paragraph becomes part of the heading's content: ```````````````````````````````` example Foo Bar --- .

Foo Bar

```````````````````````````````` But in general a blank line is not required before or after setext headings: ```````````````````````````````` example --- Foo --- Bar --- Baz .

Foo

Bar

Baz

```````````````````````````````` Setext headings cannot be empty: ```````````````````````````````` example ==== .

====

```````````````````````````````` Setext heading text lines must not be interpretable as block constructs other than paragraphs. So, the line of dashes in these examples gets interpreted as a thematic break: ```````````````````````````````` example --- --- .

```````````````````````````````` ```````````````````````````````` example - foo ----- .
  • foo

```````````````````````````````` ```````````````````````````````` example foo --- .
foo

```````````````````````````````` ```````````````````````````````` example > foo ----- .

foo


```````````````````````````````` If you want a heading with `> foo` as its literal text, you can use backslash escapes: ```````````````````````````````` example \> foo ------ .

> foo

```````````````````````````````` **Compatibility note:** Most existing Markdown implementations do not allow the text of setext headings to span multiple lines. But there is no consensus about how to interpret ``` markdown Foo bar --- baz ``` One can find four different interpretations: 1. paragraph "Foo", heading "bar", paragraph "baz" 2. paragraph "Foo bar", thematic break, paragraph "baz" 3. paragraph "Foo bar --- baz" 4. heading "Foo bar", paragraph "baz" We find interpretation 4 most natural, and interpretation 4 increases the expressive power of CommonMark, by allowing multiline headings. Authors who want interpretation 1 can put a blank line after the first paragraph: ```````````````````````````````` example Foo bar --- baz .

Foo

bar

baz

```````````````````````````````` Authors who want interpretation 2 can put blank lines around the thematic break, ```````````````````````````````` example Foo bar --- baz .

Foo bar


baz

```````````````````````````````` or use a thematic break that cannot count as a [setext heading underline], such as ```````````````````````````````` example Foo bar * * * baz .

Foo bar


baz

```````````````````````````````` Authors who want interpretation 3 can use backslash escapes: ```````````````````````````````` example Foo bar \--- baz .

Foo bar --- baz

```````````````````````````````` ## Indented code blocks An [indented code block](@) is composed of one or more [indented chunks] separated by blank lines. An [indented chunk](@) is a sequence of non-blank lines, each indented four or more spaces. The contents of the code block are the literal contents of the lines, including trailing [line endings], minus four spaces of indentation. An indented code block has no [info string]. An indented code block cannot interrupt a paragraph, so there must be a blank line between a paragraph and a following indented code block. (A blank line is not needed, however, between a code block and a following paragraph.) ```````````````````````````````` example a simple indented code block .
a simple
  indented code block
```````````````````````````````` If there is any ambiguity between an interpretation of indentation as a code block and as indicating that material belongs to a [list item][list items], the list item interpretation takes precedence: ```````````````````````````````` example - foo bar .
  • foo

    bar

```````````````````````````````` ```````````````````````````````` example 1. foo - bar .
  1. foo

    • bar
```````````````````````````````` The contents of a code block are literal text, and do not get parsed as Markdown: ```````````````````````````````` example
*hi* - one .
<a/>
*hi*

- one
```````````````````````````````` Here we have three chunks separated by blank lines: ```````````````````````````````` example chunk1 chunk2 chunk3 .
chunk1

chunk2



chunk3
```````````````````````````````` Any initial spaces beyond four will be included in the content, even in interior blank lines: ```````````````````````````````` example chunk1 chunk2 .
chunk1
  
  chunk2
```````````````````````````````` An indented code block cannot interrupt a paragraph. (This allows hanging indents and the like.) ```````````````````````````````` example Foo bar .

Foo bar

```````````````````````````````` However, any non-blank line with fewer than four leading spaces ends the code block immediately. So a paragraph may occur immediately after indented code: ```````````````````````````````` example foo bar .
foo

bar

```````````````````````````````` And indented code can occur immediately before and after other kinds of blocks: ```````````````````````````````` example # Heading foo Heading ------ foo ---- .

Heading

foo

Heading

foo

```````````````````````````````` The first line can be indented more than four spaces: ```````````````````````````````` example foo bar .
    foo
bar
```````````````````````````````` Blank lines preceding or following an indented code block are not included in it: ```````````````````````````````` example foo .
foo
```````````````````````````````` Trailing spaces are included in the code block's content: ```````````````````````````````` example foo .
foo  
```````````````````````````````` ## Fenced code blocks A [code fence](@) is a sequence of at least three consecutive backtick characters (`` ` ``) or tildes (`~`). (Tildes and backticks cannot be mixed.) A [fenced code block](@) begins with a code fence, indented no more than three spaces. The line with the opening code fence may optionally contain some text following the code fence; this is trimmed of leading and trailing whitespace and called the [info string](@). If the [info string] comes after a backtick fence, it may not contain any backtick characters. (The reason for this restriction is that otherwise some inline code would be incorrectly interpreted as the beginning of a fenced code block.) The content of the code block consists of all subsequent lines, until a closing [code fence] of the same type as the code block began with (backticks or tildes), and with at least as many backticks or tildes as the opening code fence. If the leading code fence is indented N spaces, then up to N spaces of indentation are removed from each line of the content (if present). (If a content line is not indented, it is preserved unchanged. If it is indented less than N spaces, all of the indentation is removed.) The closing code fence may be indented up to three spaces, and may be followed only by spaces, which are ignored. If the end of the containing block (or document) is reached and no closing code fence has been found, the code block contains all of the lines after the opening code fence until the end of the containing block (or document). (An alternative spec would require backtracking in the event that a closing code fence is not found. But this makes parsing much less efficient, and there seems to be no real down side to the behavior described here.) A fenced code block may interrupt a paragraph, and does not require a blank line either before or after. The content of a code fence is treated as literal text, not parsed as inlines. The first word of the [info string] is typically used to specify the language of the code sample, and rendered in the `class` attribute of the `code` tag. However, this spec does not mandate any particular treatment of the [info string]. Here is a simple example with backticks: ```````````````````````````````` example ``` < > ``` .
<
 >
```````````````````````````````` With tildes: ```````````````````````````````` example ~~~ < > ~~~ .
<
 >
```````````````````````````````` Fewer than three backticks is not enough: ```````````````````````````````` example `` foo `` .

foo

```````````````````````````````` The closing code fence must use the same character as the opening fence: ```````````````````````````````` example ``` aaa ~~~ ``` .
aaa
~~~
```````````````````````````````` ```````````````````````````````` example ~~~ aaa ``` ~~~ .
aaa
```
```````````````````````````````` The closing code fence must be at least as long as the opening fence: ```````````````````````````````` example ```` aaa ``` `````` .
aaa
```
```````````````````````````````` ```````````````````````````````` example ~~~~ aaa ~~~ ~~~~ .
aaa
~~~
```````````````````````````````` Unclosed code blocks are closed by the end of the document (or the enclosing [block quote][block quotes] or [list item][list items]): ```````````````````````````````` example ``` .
```````````````````````````````` ```````````````````````````````` example ````` ``` aaa .

```
aaa
```````````````````````````````` ```````````````````````````````` example > ``` > aaa bbb .
aaa

bbb

```````````````````````````````` A code block can have all empty lines as its content: ```````````````````````````````` example ``` ``` .

  
```````````````````````````````` A code block can be empty: ```````````````````````````````` example ``` ``` .
```````````````````````````````` Fences can be indented. If the opening fence is indented, content lines will have equivalent opening indentation removed, if present: ```````````````````````````````` example ``` aaa aaa ``` .
aaa
aaa
```````````````````````````````` ```````````````````````````````` example ``` aaa aaa aaa ``` .
aaa
aaa
aaa
```````````````````````````````` ```````````````````````````````` example ``` aaa aaa aaa ``` .
aaa
 aaa
aaa
```````````````````````````````` Four spaces indentation produces an indented code block: ```````````````````````````````` example ``` aaa ``` .
```
aaa
```
```````````````````````````````` Closing fences may be indented by 0-3 spaces, and their indentation need not match that of the opening fence: ```````````````````````````````` example ``` aaa ``` .
aaa
```````````````````````````````` ```````````````````````````````` example ``` aaa ``` .
aaa
```````````````````````````````` This is not a closing fence, because it is indented 4 spaces: ```````````````````````````````` example ``` aaa ``` .
aaa
    ```
```````````````````````````````` Code fences (opening and closing) cannot contain internal spaces: ```````````````````````````````` example ``` ``` aaa .

aaa

```````````````````````````````` ```````````````````````````````` example ~~~~~~ aaa ~~~ ~~ .
aaa
~~~ ~~
```````````````````````````````` Fenced code blocks can interrupt paragraphs, and can be followed directly by paragraphs, without a blank line between: ```````````````````````````````` example foo ``` bar ``` baz .

foo

bar

baz

```````````````````````````````` Other blocks can also occur before and after fenced code blocks without an intervening blank line: ```````````````````````````````` example foo --- ~~~ bar ~~~ # baz .

foo

bar

baz

```````````````````````````````` An [info string] can be provided after the opening code fence. Although this spec doesn't mandate any particular treatment of the info string, the first word is typically used to specify the language of the code block. In HTML output, the language is normally indicated by adding a class to the `code` element consisting of `language-` followed by the language name. ```````````````````````````````` example ```ruby def foo(x) return 3 end ``` .
def foo(x)
  return 3
end
```````````````````````````````` ```````````````````````````````` example ~~~~ ruby startline=3 $%@#$ def foo(x) return 3 end ~~~~~~~ .
def foo(x)
  return 3
end
```````````````````````````````` ```````````````````````````````` example ````; ```` .
```````````````````````````````` [Info strings] for backtick code blocks cannot contain backticks: ```````````````````````````````` example ``` aa ``` foo .

aa foo

```````````````````````````````` [Info strings] for tilde code blocks can contain backticks and tildes: ```````````````````````````````` example ~~~ aa ``` ~~~ foo ~~~ .
foo
```````````````````````````````` Closing code fences cannot have [info strings]: ```````````````````````````````` example ``` ``` aaa ``` .
``` aaa
```````````````````````````````` ## HTML blocks An [HTML block](@) is a group of lines that is treated as raw HTML (and will not be escaped in HTML output). There are seven kinds of [HTML block], which can be defined by their start and end conditions. The block begins with a line that meets a [start condition](@) (after up to three spaces optional indentation). It ends with the first subsequent line that meets a matching [end condition](@), or the last line of the document, or the last line of the [container block](#container-blocks) containing the current HTML block, if no line is encountered that meets the [end condition]. If the first line meets both the [start condition] and the [end condition], the block will contain just that line. 1. **Start condition:** line begins with the string ``, or the end of the line.\ **End condition:** line contains an end tag ``, `

LINK

`, or `` (case-insensitive; it need not match the start tag). 2. **Start condition:** line begins with the string ``. 3. **Start condition:** line begins with the string ``. 4. **Start condition:** line begins with the string ``. 5. **Start condition:** line begins with the string ``. 6. **Start condition:** line begins the string `<` or ``, or the string `/>`.\ **End condition:** line is followed by a [blank line]. 7. **Start condition:** line begins with a complete [open tag] (with any [tag name] other than `script`, `style`, or `pre`) or a complete [closing tag], followed only by [whitespace] or the end of the line.\ **End condition:** line is followed by a [blank line]. HTML blocks continue until they are closed by their appropriate [end condition], or the last line of the document or other [container block](#container-blocks). This means any HTML **within an HTML block** that might otherwise be recognised as a start condition will be ignored by the parser and passed through as-is, without changing the parser's state. For instance, `
` within a HTML block started by `` will not affect
the parser state; as the HTML block was started in by start condition 6, it
will end at any blank line. This can be surprising:

```````````````````````````````` example
**Hello**,

_world_.
.
**Hello**,

world.

```````````````````````````````` In this case, the HTML block is terminated by the newline — the `**Hello**` text remains verbatim — and regular parsing resumes, with a paragraph, emphasised `world` and inline and block HTML following. All types of [HTML blocks] except type 7 may interrupt a paragraph. Blocks of type 7 may not interrupt a paragraph. (This restriction is intended to prevent unwanted interpretation of long tags inside a wrapped paragraph as starting HTML blocks.) Some simple examples follow. Here are some basic HTML blocks of type 6: ```````````````````````````````` example
hi
okay. .
hi

okay.

```````````````````````````````` ```````````````````````````````` example
*foo* ```````````````````````````````` Here we have two HTML blocks with a Markdown paragraph between them: ```````````````````````````````` example
*Markdown*
.

Markdown

```````````````````````````````` The tag on the first line can be partial, as long as it is split where there would be whitespace: ```````````````````````````````` example
.
```````````````````````````````` ```````````````````````````````` example
.
```````````````````````````````` An open tag need not be closed: ```````````````````````````````` example
*foo* *bar* .
*foo*

bar

```````````````````````````````` A partial tag need not even be completed (garbage in, garbage out): ```````````````````````````````` example
. ```````````````````````````````` ```````````````````````````````` example
foo
.
foo
```````````````````````````````` Everything until the next blank line or end of document gets included in the HTML block. So, in the following example, what looks like a Markdown code block is actually part of the HTML block, which continues until a blank line or the end of the document is reached: ```````````````````````````````` example
``` c int x = 33; ``` .
``` c int x = 33; ``` ```````````````````````````````` To start an [HTML block] with a tag that is *not* in the list of block-level tags in (6), you must put the tag by itself on the first line (and it must be complete): ```````````````````````````````` example *bar* . *bar* ```````````````````````````````` In type 7 blocks, the [tag name] can be anything: ```````````````````````````````` example *bar* . *bar* ```````````````````````````````` ```````````````````````````````` example *bar* . *bar* ```````````````````````````````` ```````````````````````````````` example *bar* . *bar* ```````````````````````````````` These rules are designed to allow us to work with tags that can function as either block-level or inline-level tags. The `` tag is a nice example. We can surround content with `` tags in three different ways. In this case, we get a raw HTML block, because the `` tag is on a line by itself: ```````````````````````````````` example *foo* . *foo* ```````````````````````````````` In this case, we get a raw HTML block that just includes the `` tag (because it ends with the following blank line). So the contents get interpreted as CommonMark: ```````````````````````````````` example *foo* .

foo

```````````````````````````````` Finally, in this case, the `` tags are interpreted as [raw HTML] *inside* the CommonMark paragraph. (Because the tag is not on a line by itself, we get inline HTML rather than an [HTML block].) ```````````````````````````````` example *foo* .

foo

```````````````````````````````` HTML tags designed to contain literal content (`script`, `style`, `pre`), comments, processing instructions, and declarations are treated somewhat differently. Instead of ending at the first blank line, these blocks end at the first line containing a corresponding end tag. As a result, these blocks can contain blank lines: A pre tag (type 1): ```````````````````````````````` example

import Text.HTML.TagSoup

main :: IO ()
main = print $ parseTags tags
okay .

import Text.HTML.TagSoup

main :: IO ()
main = print $ parseTags tags

okay

```````````````````````````````` A script tag (type 1): ```````````````````````````````` example okay .

okay

```````````````````````````````` A style tag (type 1): ```````````````````````````````` example okay .

okay

```````````````````````````````` If there is no matching end tag, the block will end at the end of the document (or the enclosing [block quote][block quotes] or [list item][list items]): ```````````````````````````````` example *foo* .

foo

```````````````````````````````` ```````````````````````````````` example *bar* *baz* . *bar*

baz

```````````````````````````````` Note that anything on the last line after the end tag will be included in the [HTML block]: ```````````````````````````````` example 1. *bar* . 1. *bar* ```````````````````````````````` A comment (type 2): ```````````````````````````````` example okay .

okay

```````````````````````````````` A processing instruction (type 3): ```````````````````````````````` example '; ?> okay . '; ?>

okay

```````````````````````````````` A declaration (type 4): ```````````````````````````````` example . ```````````````````````````````` CDATA (type 5): ```````````````````````````````` example okay .

okay

```````````````````````````````` The opening tag can be indented 1-3 spaces, but not 4: ```````````````````````````````` example .
<!-- foo -->
```````````````````````````````` ```````````````````````````````` example
.
<div>
```````````````````````````````` An HTML block of types 1--6 can interrupt a paragraph, and need not be preceded by a blank line. ```````````````````````````````` example Foo
bar
.

Foo

bar
```````````````````````````````` However, a following blank line is needed, except at the end of a document, and except for blocks of types 1--5, [above][HTML block]: ```````````````````````````````` example
bar
*foo* .
bar
*foo* ```````````````````````````````` HTML blocks of type 7 cannot interrupt a paragraph: ```````````````````````````````` example Foo baz .

Foo baz

```````````````````````````````` This rule differs from John Gruber's original Markdown syntax specification, which says: > The only restrictions are that block-level HTML elements — > e.g. `
`, ``, `
`, `

`, etc. — must be separated from > surrounding content by blank lines, and the start and end tags of the > block should not be indented with tabs or spaces. In some ways Gruber's rule is more restrictive than the one given here: - It requires that an HTML block be preceded by a blank line. - It does not allow the start tag to be indented. - It requires a matching end tag, which it also does not allow to be indented. Most Markdown implementations (including some of Gruber's own) do not respect all of these restrictions. There is one respect, however, in which Gruber's rule is more liberal than the one given here, since it allows blank lines to occur inside an HTML block. There are two reasons for disallowing them here. First, it removes the need to parse balanced tags, which is expensive and can require backtracking from the end of the document if no matching end tag is found. Second, it provides a very simple and flexible way of including Markdown content inside HTML tags: simply separate the Markdown from the HTML using blank lines: Compare: ```````````````````````````````` example

*Emphasized* text.
.

Emphasized text.

```````````````````````````````` ```````````````````````````````` example
*Emphasized* text.
.
*Emphasized* text.
```````````````````````````````` Some Markdown implementations have adopted a convention of interpreting content inside tags as text if the open tag has the attribute `markdown=1`. The rule given above seems a simpler and more elegant way of achieving the same expressive power, which is also much simpler to parse. The main potential drawback is that one can no longer paste HTML blocks into Markdown documents with 100% reliability. However, *in most cases* this will work fine, because the blank lines in HTML are usually followed by HTML block tags. For example: ```````````````````````````````` example
Hi
.
Hi
```````````````````````````````` There are problems, however, if the inner tags are indented *and* separated by spaces, as then they will be interpreted as an indented code block: ```````````````````````````````` example
Hi
.
<td>
  Hi
</td>
```````````````````````````````` Fortunately, blank lines are usually not necessary and can be deleted. The exception is inside `
` tags, but as described
[above][HTML blocks], raw HTML blocks starting with `
`
*can* contain blank lines.

## Link reference definitions

A [link reference definition](@)
consists of a [link label], indented up to three spaces, followed
by a colon (`:`), optional [whitespace] (including up to one
[line ending]), a [link destination],
optional [whitespace] (including up to one
[line ending]), and an optional [link
title], which if it is present must be separated
from the [link destination] by [whitespace].
No further [non-whitespace characters] may occur on the line.

A [link reference definition]
does not correspond to a structural element of a document.  Instead, it
defines a label which can be used in [reference links]
and reference-style [images] elsewhere in the document.  [Link
reference definitions] can come either before or after the links that use
them.

```````````````````````````````` example
[foo]: /url "title"

[foo]
.

foo

```````````````````````````````` ```````````````````````````````` example [foo]: /url 'the title' [foo] .

foo

```````````````````````````````` ```````````````````````````````` example [Foo*bar\]]:my_(url) 'title (with parens)' [Foo*bar\]] .

Foo*bar]

```````````````````````````````` ```````````````````````````````` example [Foo bar]: 'title' [Foo bar] .

Foo bar

```````````````````````````````` The title may extend over multiple lines: ```````````````````````````````` example [foo]: /url ' title line1 line2 ' [foo] .

foo

```````````````````````````````` However, it may not contain a [blank line]: ```````````````````````````````` example [foo]: /url 'title with blank line' [foo] .

[foo]: /url 'title

with blank line'

[foo]

```````````````````````````````` The title may be omitted: ```````````````````````````````` example [foo]: /url [foo] .

foo

```````````````````````````````` The link destination may not be omitted: ```````````````````````````````` example [foo]: [foo] .

[foo]:

[foo]

```````````````````````````````` However, an empty link destination may be specified using angle brackets: ```````````````````````````````` example [foo]: <> [foo] .

foo

```````````````````````````````` The title must be separated from the link destination by whitespace: ```````````````````````````````` example [foo]: (baz) [foo] .

[foo]: (baz)

[foo]

```````````````````````````````` Both title and destination can contain backslash escapes and literal backslashes: ```````````````````````````````` example [foo]: /url\bar\*baz "foo\"bar\baz" [foo] .

foo

```````````````````````````````` A link can come before its corresponding definition: ```````````````````````````````` example [foo] [foo]: url .

foo

```````````````````````````````` If there are several matching definitions, the first one takes precedence: ```````````````````````````````` example [foo] [foo]: first [foo]: second .

foo

```````````````````````````````` As noted in the section on [Links], matching of labels is case-insensitive (see [matches]). ```````````````````````````````` example [FOO]: /url [Foo] .

Foo

```````````````````````````````` ```````````````````````````````` example [ΑΓΩ]: /φου [αγω] .

αγω

```````````````````````````````` Here is a link reference definition with no corresponding link. It contributes nothing to the document. ```````````````````````````````` example [foo]: /url . ```````````````````````````````` Here is another one: ```````````````````````````````` example [ foo ]: /url bar .

bar

```````````````````````````````` This is not a link reference definition, because there are [non-whitespace characters] after the title: ```````````````````````````````` example [foo]: /url "title" ok .

[foo]: /url "title" ok

```````````````````````````````` This is a link reference definition, but it has no title: ```````````````````````````````` example [foo]: /url "title" ok .

"title" ok

```````````````````````````````` This is not a link reference definition, because it is indented four spaces: ```````````````````````````````` example [foo]: /url "title" [foo] .
[foo]: /url "title"

[foo]

```````````````````````````````` This is not a link reference definition, because it occurs inside a code block: ```````````````````````````````` example ``` [foo]: /url ``` [foo] .
[foo]: /url

[foo]

```````````````````````````````` A [link reference definition] cannot interrupt a paragraph. ```````````````````````````````` example Foo [bar]: /baz [bar] .

Foo [bar]: /baz

[bar]

```````````````````````````````` However, it can directly follow other block elements, such as headings and thematic breaks, and it need not be followed by a blank line. ```````````````````````````````` example # [Foo] [foo]: /url > bar .

Foo

bar

```````````````````````````````` ```````````````````````````````` example [foo]: /url bar === [foo] .

bar

foo

```````````````````````````````` ```````````````````````````````` example [foo]: /url === [foo] .

=== foo

```````````````````````````````` Several [link reference definitions] can occur one after another, without intervening blank lines. ```````````````````````````````` example [foo]: /foo-url "foo" [bar]: /bar-url "bar" [baz]: /baz-url [foo], [bar], [baz] .

foo, bar, baz

```````````````````````````````` [Link reference definitions] can occur inside block containers, like lists and block quotations. They affect the entire document, not just the container in which they are defined: ```````````````````````````````` example [foo] > [foo]: /url .

foo

```````````````````````````````` Whether something is a [link reference definition] is independent of whether the link reference it defines is used in the document. Thus, for example, the following document contains just a link reference definition, and no visible content: ```````````````````````````````` example [foo]: /url . ```````````````````````````````` ## Paragraphs A sequence of non-blank lines that cannot be interpreted as other kinds of blocks forms a [paragraph](@). The contents of the paragraph are the result of parsing the paragraph's raw content as inlines. The paragraph's raw content is formed by concatenating the lines and removing initial and final [whitespace]. A simple example with two paragraphs: ```````````````````````````````` example aaa bbb .

aaa

bbb

```````````````````````````````` Paragraphs can contain multiple lines, but no blank lines: ```````````````````````````````` example aaa bbb ccc ddd .

aaa bbb

ccc ddd

```````````````````````````````` Multiple blank lines between paragraph have no effect: ```````````````````````````````` example aaa bbb .

aaa

bbb

```````````````````````````````` Leading spaces are skipped: ```````````````````````````````` example aaa bbb .

aaa bbb

```````````````````````````````` Lines after the first may be indented any amount, since indented code blocks cannot interrupt paragraphs. ```````````````````````````````` example aaa bbb ccc .

aaa bbb ccc

```````````````````````````````` However, the first line may be indented at most three spaces, or an indented code block will be triggered: ```````````````````````````````` example aaa bbb .

aaa bbb

```````````````````````````````` ```````````````````````````````` example aaa bbb .
aaa

bbb

```````````````````````````````` Final spaces are stripped before inline parsing, so a paragraph that ends with two or more spaces will not end with a [hard line break]: ```````````````````````````````` example aaa bbb .

aaa
bbb

```````````````````````````````` ## Blank lines [Blank lines] between block-level elements are ignored, except for the role they play in determining whether a [list] is [tight] or [loose]. Blank lines at the beginning and end of the document are also ignored. ```````````````````````````````` example aaa # aaa .

aaa

aaa

````````````````````````````````
## Tables (extension) GFM enables the `table` extension, where an additional leaf block type is available. A [table](@) is an arrangement of data with rows and columns, consisting of a single header row, a [delimiter row] separating the header from the data, and zero or more data rows. Each row consists of cells containing arbitrary text, in which [inlines] are parsed, separated by pipes (`|`). A leading and trailing pipe is also recommended for clarity of reading, and if there's otherwise parsing ambiguity. Spaces between pipes and cell content are trimmed. Block-level elements cannot be inserted in a table. The [delimiter row](@) consists of cells whose only content are hyphens (`-`), and optionally, a leading or trailing colon (`:`), or both, to indicate left, right, or center alignment respectively. ```````````````````````````````` example table | foo | bar | | --- | --- | | baz | bim | .
foo bar
baz bim
```````````````````````````````` Cells in one column don't need to match length, though it's easier to read if they are. Likewise, use of leading and trailing pipes may be inconsistent: ```````````````````````````````` example table | abc | defghi | :-: | -----------: bar | baz .
abc defghi
bar baz
```````````````````````````````` Include a pipe in a cell's content by escaping it, including inside other inline spans: ```````````````````````````````` example table | f\|oo | | ------ | | b `\|` az | | b **\|** im | .
f|oo
b | az
b | im
```````````````````````````````` The table is broken at the first empty line, or beginning of another block-level structure: ```````````````````````````````` example table | abc | def | | --- | --- | | bar | baz | > bar .
abc def
bar baz

bar

```````````````````````````````` ```````````````````````````````` example table | abc | def | | --- | --- | | bar | baz | bar bar .
abc def
bar baz
bar

bar

```````````````````````````````` The header row must match the [delimiter row] in the number of cells. If not, a table will not be recognized: ```````````````````````````````` example table | abc | def | | --- | | bar | .

| abc | def | | --- | | bar |

```````````````````````````````` The remainder of the table's rows may vary in the number of cells. If there are a number of cells fewer than the number of cells in the header row, empty cells are inserted. If there are greater, the excess is ignored: ```````````````````````````````` example table | abc | def | | --- | --- | | bar | | bar | baz | boo | .
abc def
bar
bar baz
```````````````````````````````` If there are no rows in the body, no `` is generated in HTML output: ```````````````````````````````` example table | abc | def | | --- | --- | .
abc def
````````````````````````````````
# Container blocks A [container block](#container-blocks) is a block that has other blocks as its contents. There are two basic kinds of container blocks: [block quotes] and [list items]. [Lists] are meta-containers for [list items]. We define the syntax for container blocks recursively. The general form of the definition is: > If X is a sequence of blocks, then the result of > transforming X in such-and-such a way is a container of type Y > with these blocks as its content. So, we explain what counts as a block quote or list item by explaining how these can be *generated* from their contents. This should suffice to define the syntax, although it does not give a recipe for *parsing* these constructions. (A recipe is provided below in the section entitled [A parsing strategy](#appendix-a-parsing-strategy).) ## Block quotes A [block quote marker](@) consists of 0-3 spaces of initial indent, plus (a) the character `>` together with a following space, or (b) a single character `>` not followed by a space. The following rules define [block quotes]: 1. **Basic case.** If a string of lines *Ls* constitute a sequence of blocks *Bs*, then the result of prepending a [block quote marker] to the beginning of each line in *Ls* is a [block quote](#block-quotes) containing *Bs*. 2. **Laziness.** If a string of lines *Ls* constitute a [block quote](#block-quotes) with contents *Bs*, then the result of deleting the initial [block quote marker] from one or more lines in which the next [non-whitespace character] after the [block quote marker] is [paragraph continuation text] is a block quote with *Bs* as its content. [Paragraph continuation text](@) is text that will be parsed as part of the content of a paragraph, but does not occur at the beginning of the paragraph. 3. **Consecutiveness.** A document cannot contain two [block quotes] in a row unless there is a [blank line] between them. Nothing else counts as a [block quote](#block-quotes). Here is a simple example: ```````````````````````````````` example > # Foo > bar > baz .

Foo

bar baz

```````````````````````````````` The spaces after the `>` characters can be omitted: ```````````````````````````````` example ># Foo >bar > baz .

Foo

bar baz

```````````````````````````````` The `>` characters can be indented 1-3 spaces: ```````````````````````````````` example > # Foo > bar > baz .

Foo

bar baz

```````````````````````````````` Four spaces gives us a code block: ```````````````````````````````` example > # Foo > bar > baz .
> # Foo
> bar
> baz
```````````````````````````````` The Laziness clause allows us to omit the `>` before [paragraph continuation text]: ```````````````````````````````` example > # Foo > bar baz .

Foo

bar baz

```````````````````````````````` A block quote can contain some lazy and some non-lazy continuation lines: ```````````````````````````````` example > bar baz > foo .

bar baz foo

```````````````````````````````` Laziness only applies to lines that would have been continuations of paragraphs had they been prepended with [block quote markers]. For example, the `> ` cannot be omitted in the second line of ``` markdown > foo > --- ``` without changing the meaning: ```````````````````````````````` example > foo --- .

foo


```````````````````````````````` Similarly, if we omit the `> ` in the second line of ``` markdown > - foo > - bar ``` then the block quote ends after the first line: ```````````````````````````````` example > - foo - bar .
  • foo
  • bar
```````````````````````````````` For the same reason, we can't omit the `> ` in front of subsequent lines of an indented or fenced code block: ```````````````````````````````` example > foo bar .
foo
bar
```````````````````````````````` ```````````````````````````````` example > ``` foo ``` .

foo

```````````````````````````````` Note that in the following case, we have a [lazy continuation line]: ```````````````````````````````` example > foo - bar .

foo - bar

```````````````````````````````` To see why, note that in ```markdown > foo > - bar ``` the `- bar` is indented too far to start a list, and can't be an indented code block because indented code blocks cannot interrupt paragraphs, so it is [paragraph continuation text]. A block quote can be empty: ```````````````````````````````` example > .
```````````````````````````````` ```````````````````````````````` example > > > .
```````````````````````````````` A block quote can have initial or final blank lines: ```````````````````````````````` example > > foo > .

foo

```````````````````````````````` A blank line always separates block quotes: ```````````````````````````````` example > foo > bar .

foo

bar

```````````````````````````````` (Most current Markdown implementations, including John Gruber's original `Markdown.pl`, will parse this example as a single block quote with two paragraphs. But it seems better to allow the author to decide whether two block quotes or one are wanted.) Consecutiveness means that if we put these block quotes together, we get a single block quote: ```````````````````````````````` example > foo > bar .

foo bar

```````````````````````````````` To get a block quote with two paragraphs, use: ```````````````````````````````` example > foo > > bar .

foo

bar

```````````````````````````````` Block quotes can interrupt paragraphs: ```````````````````````````````` example foo > bar .

foo

bar

```````````````````````````````` In general, blank lines are not needed before or after block quotes: ```````````````````````````````` example > aaa *** > bbb .

aaa


bbb

```````````````````````````````` However, because of laziness, a blank line is needed between a block quote and a following paragraph: ```````````````````````````````` example > bar baz .

bar baz

```````````````````````````````` ```````````````````````````````` example > bar baz .

bar

baz

```````````````````````````````` ```````````````````````````````` example > bar > baz .

bar

baz

```````````````````````````````` It is a consequence of the Laziness rule that any number of initial `>`s may be omitted on a continuation line of a nested block quote: ```````````````````````````````` example > > > foo bar .

foo bar

```````````````````````````````` ```````````````````````````````` example >>> foo > bar >>baz .

foo bar baz

```````````````````````````````` When including an indented code block in a block quote, remember that the [block quote marker] includes both the `>` and a following space. So *five spaces* are needed after the `>`: ```````````````````````````````` example > code > not code .
code

not code

```````````````````````````````` ## List items A [list marker](@) is a [bullet list marker] or an [ordered list marker]. A [bullet list marker](@) is a `-`, `+`, or `*` character. An [ordered list marker](@) is a sequence of 1--9 arabic digits (`0-9`), followed by either a `.` character or a `)` character. (The reason for the length limit is that with 10 digits we start seeing integer overflows in some browsers.) The following rules define [list items]: 1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of blocks *Bs* starting with a [non-whitespace character], and *M* is a list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces, then the result of prepending *M* and the following spaces to the first line of *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a list item with *Bs* as its contents. The type of the list item (bullet or ordered) is determined by the type of its list marker. If the list item is ordered, then it is also assigned a start number, based on the ordered list marker. Exceptions: 1. When the first list item in a [list] interrupts a paragraph---that is, when it starts on a line that would otherwise count as [paragraph continuation text]---then (a) the lines *Ls* must not begin with a blank line, and (b) if the list item is ordered, the start number must be 1. 2. If any line is a [thematic break][thematic breaks] then that line is not a list item. For example, let *Ls* be the lines ```````````````````````````````` example A paragraph with two lines. indented code > A block quote. .

A paragraph with two lines.

indented code

A block quote.

```````````````````````````````` And let *M* be the marker `1.`, and *N* = 2. Then rule #1 says that the following is an ordered list item with start number 1, and the same contents as *Ls*: ```````````````````````````````` example 1. A paragraph with two lines. indented code > A block quote. .
  1. A paragraph with two lines.

    indented code
    

    A block quote.

```````````````````````````````` The most important thing to notice is that the position of the text after the list marker determines how much indentation is needed in subsequent blocks in the list item. If the list marker takes up two spaces, and there are three spaces between the list marker and the next [non-whitespace character], then blocks must be indented five spaces in order to fall under the list item. Here are some examples showing how far content must be indented to be put under the list item: ```````````````````````````````` example - one two .
  • one

two

```````````````````````````````` ```````````````````````````````` example - one two .
  • one

    two

```````````````````````````````` ```````````````````````````````` example - one two .
  • one
 two
```````````````````````````````` ```````````````````````````````` example - one two .
  • one

    two

```````````````````````````````` It is tempting to think of this in terms of columns: the continuation blocks must be indented at least to the column of the first [non-whitespace character] after the list marker. However, that is not quite right. The spaces after the list marker determine how much relative indentation is needed. Which column this indentation reaches will depend on how the list item is embedded in other constructions, as shown by this example: ```````````````````````````````` example > > 1. one >> >> two .
  1. one

    two

```````````````````````````````` Here `two` occurs in the same column as the list marker `1.`, but is actually contained in the list item, because there is sufficient indentation after the last containing blockquote marker. The converse is also possible. In the following example, the word `two` occurs far to the right of the initial text of the list item, `one`, but it is not considered part of the list item, because it is not indented far enough past the blockquote marker: ```````````````````````````````` example >>- one >> > > two .
  • one

two

```````````````````````````````` Note that at least one space is needed between the list marker and any following content, so these are not list items: ```````````````````````````````` example -one 2.two .

-one

2.two

```````````````````````````````` A list item may contain blocks that are separated by more than one blank line. ```````````````````````````````` example - foo bar .
  • foo

    bar

```````````````````````````````` A list item may contain any kind of block: ```````````````````````````````` example 1. foo ``` bar ``` baz > bam .
  1. foo

    bar
    

    baz

    bam

```````````````````````````````` A list item that contains an indented code block will preserve empty lines within the code block verbatim. ```````````````````````````````` example - Foo bar baz .
  • Foo

    bar
    
    
    baz
    
```````````````````````````````` Note that ordered list start numbers must be nine digits or less: ```````````````````````````````` example 123456789. ok .
  1. ok
```````````````````````````````` ```````````````````````````````` example 1234567890. not ok .

1234567890. not ok

```````````````````````````````` A start number may begin with 0s: ```````````````````````````````` example 0. ok .
  1. ok
```````````````````````````````` ```````````````````````````````` example 003. ok .
  1. ok
```````````````````````````````` A start number may not be negative: ```````````````````````````````` example -1. not ok .

-1. not ok

```````````````````````````````` 2. **Item starting with indented code.** If a sequence of lines *Ls* constitute a sequence of blocks *Bs* starting with an indented code block, and *M* is a list marker of width *W* followed by one space, then the result of prepending *M* and the following space to the first line of *Ls*, and indenting subsequent lines of *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents. If a line is empty, then it need not be indented. The type of the list item (bullet or ordered) is determined by the type of its list marker. If the list item is ordered, then it is also assigned a start number, based on the ordered list marker. An indented code block will have to be indented four spaces beyond the edge of the region where text will be included in the list item. In the following case that is 6 spaces: ```````````````````````````````` example - foo bar .
  • foo

    bar
    
```````````````````````````````` And in this case it is 11 spaces: ```````````````````````````````` example 10. foo bar .
  1. foo

    bar
    
```````````````````````````````` If the *first* block in the list item is an indented code block, then by rule #2, the contents must be indented *one* space after the list marker: ```````````````````````````````` example indented code paragraph more code .
indented code

paragraph

more code
```````````````````````````````` ```````````````````````````````` example 1. indented code paragraph more code .
  1. indented code
    

    paragraph

    more code
    
```````````````````````````````` Note that an additional space indent is interpreted as space inside the code block: ```````````````````````````````` example 1. indented code paragraph more code .
  1.  indented code
    

    paragraph

    more code
    
```````````````````````````````` Note that rules #1 and #2 only apply to two cases: (a) cases in which the lines to be included in a list item begin with a [non-whitespace character], and (b) cases in which they begin with an indented code block. In a case like the following, where the first block begins with a three-space indent, the rules do not allow us to form a list item by indenting the whole thing and prepending a list marker: ```````````````````````````````` example foo bar .

foo

bar

```````````````````````````````` ```````````````````````````````` example - foo bar .
  • foo

bar

```````````````````````````````` This is not a significant restriction, because when a block begins with 1-3 spaces indent, the indentation can always be removed without a change in interpretation, allowing rule #1 to be applied. So, in the above case: ```````````````````````````````` example - foo bar .
  • foo

    bar

```````````````````````````````` 3. **Item starting with a blank line.** If a sequence of lines *Ls* starting with a single [blank line] constitute a (possibly empty) sequence of blocks *Bs*, not separated from each other by more than one blank line, and *M* is a list marker of width *W*, then the result of prepending *M* to the first line of *Ls*, and indenting subsequent lines of *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents. If a line is empty, then it need not be indented. The type of the list item (bullet or ordered) is determined by the type of its list marker. If the list item is ordered, then it is also assigned a start number, based on the ordered list marker. Here are some list items that start with a blank line but are not empty: ```````````````````````````````` example - foo - ``` bar ``` - baz .
  • foo
  • bar
    
  • baz
    
```````````````````````````````` When the list item starts with a blank line, the number of spaces following the list marker doesn't change the required indentation: ```````````````````````````````` example - foo .
  • foo
```````````````````````````````` A list item can begin with at most one blank line. In the following example, `foo` is not part of the list item: ```````````````````````````````` example - foo .

foo

```````````````````````````````` Here is an empty bullet list item: ```````````````````````````````` example - foo - - bar .
  • foo
  • bar
```````````````````````````````` It does not matter whether there are spaces following the [list marker]: ```````````````````````````````` example - foo - - bar .
  • foo
  • bar
```````````````````````````````` Here is an empty ordered list item: ```````````````````````````````` example 1. foo 2. 3. bar .
  1. foo
  2. bar
```````````````````````````````` A list may start or end with an empty list item: ```````````````````````````````` example * .
```````````````````````````````` However, an empty list item cannot interrupt a paragraph: ```````````````````````````````` example foo * foo 1. .

foo *

foo 1.

```````````````````````````````` 4. **Indentation.** If a sequence of lines *Ls* constitutes a list item according to rule #1, #2, or #3, then the result of indenting each line of *Ls* by 1-3 spaces (the same for each line) also constitutes a list item with the same contents and attributes. If a line is empty, then it need not be indented. Indented one space: ```````````````````````````````` example 1. A paragraph with two lines. indented code > A block quote. .
  1. A paragraph with two lines.

    indented code
    

    A block quote.

```````````````````````````````` Indented two spaces: ```````````````````````````````` example 1. A paragraph with two lines. indented code > A block quote. .
  1. A paragraph with two lines.

    indented code
    

    A block quote.

```````````````````````````````` Indented three spaces: ```````````````````````````````` example 1. A paragraph with two lines. indented code > A block quote. .
  1. A paragraph with two lines.

    indented code
    

    A block quote.

```````````````````````````````` Four spaces indent gives a code block: ```````````````````````````````` example 1. A paragraph with two lines. indented code > A block quote. .
1.  A paragraph
    with two lines.

        indented code

    > A block quote.
```````````````````````````````` 5. **Laziness.** If a string of lines *Ls* constitute a [list item](#list-items) with contents *Bs*, then the result of deleting some or all of the indentation from one or more lines in which the next [non-whitespace character] after the indentation is [paragraph continuation text] is a list item with the same contents and attributes. The unindented lines are called [lazy continuation line](@)s. Here is an example with [lazy continuation lines]: ```````````````````````````````` example 1. A paragraph with two lines. indented code > A block quote. .
  1. A paragraph with two lines.

    indented code
    

    A block quote.

```````````````````````````````` Indentation can be partially deleted: ```````````````````````````````` example 1. A paragraph with two lines. .
  1. A paragraph with two lines.
```````````````````````````````` These examples show how laziness can work in nested structures: ```````````````````````````````` example > 1. > Blockquote continued here. .
  1. Blockquote continued here.

```````````````````````````````` ```````````````````````````````` example > 1. > Blockquote > continued here. .
  1. Blockquote continued here.

```````````````````````````````` 6. **That's all.** Nothing that is not counted as a list item by rules #1--5 counts as a [list item](#list-items). The rules for sublists follow from the general rules [above][List items]. A sublist must be indented the same number of spaces a paragraph would need to be in order to be included in the list item. So, in this case we need two spaces indent: ```````````````````````````````` example - foo - bar - baz - boo .
  • foo
    • bar
      • baz
        • boo
```````````````````````````````` One is not enough: ```````````````````````````````` example - foo - bar - baz - boo .
  • foo
  • bar
  • baz
  • boo
```````````````````````````````` Here we need four, because the list marker is wider: ```````````````````````````````` example 10) foo - bar .
  1. foo
    • bar
```````````````````````````````` Three is not enough: ```````````````````````````````` example 10) foo - bar .
  1. foo
  • bar
```````````````````````````````` A list may be the first block in a list item: ```````````````````````````````` example - - foo .
    • foo
```````````````````````````````` ```````````````````````````````` example 1. - 2. foo .
      1. foo
```````````````````````````````` A list item can contain a heading: ```````````````````````````````` example - # Foo - Bar --- baz .
  • Foo

  • Bar

    baz
```````````````````````````````` ### Motivation John Gruber's Markdown spec says the following about list items: 1. "List markers typically start at the left margin, but may be indented by up to three spaces. List markers must be followed by one or more spaces or a tab." 2. "To make lists look nice, you can wrap items with hanging indents.... But if you don't want to, you don't have to." 3. "List items may consist of multiple paragraphs. Each subsequent paragraph in a list item must be indented by either 4 spaces or one tab." 4. "It looks nice if you indent every line of the subsequent paragraphs, but here again, Markdown will allow you to be lazy." 5. "To put a blockquote within a list item, the blockquote's `>` delimiters need to be indented." 6. "To put a code block within a list item, the code block needs to be indented twice — 8 spaces or two tabs." These rules specify that a paragraph under a list item must be indented four spaces (presumably, from the left margin, rather than the start of the list marker, but this is not said), and that code under a list item must be indented eight spaces instead of the usual four. They also say that a block quote must be indented, but not by how much; however, the example given has four spaces indentation. Although nothing is said about other kinds of block-level content, it is certainly reasonable to infer that *all* block elements under a list item, including other lists, must be indented four spaces. This principle has been called the *four-space rule*. The four-space rule is clear and principled, and if the reference implementation `Markdown.pl` had followed it, it probably would have become the standard. However, `Markdown.pl` allowed paragraphs and sublists to start with only two spaces indentation, at least on the outer level. Worse, its behavior was inconsistent: a sublist of an outer-level list needed two spaces indentation, but a sublist of this sublist needed three spaces. It is not surprising, then, that different implementations of Markdown have developed very different rules for determining what comes under a list item. (Pandoc and python-Markdown, for example, stuck with Gruber's syntax description and the four-space rule, while discount, redcarpet, marked, PHP Markdown, and others followed `Markdown.pl`'s behavior more closely.) Unfortunately, given the divergences between implementations, there is no way to give a spec for list items that will be guaranteed not to break any existing documents. However, the spec given here should correctly handle lists formatted with either the four-space rule or the more forgiving `Markdown.pl` behavior, provided they are laid out in a way that is natural for a human to read. The strategy here is to let the width and indentation of the list marker determine the indentation necessary for blocks to fall under the list item, rather than having a fixed and arbitrary number. The writer can think of the body of the list item as a unit which gets indented to the right enough to fit the list marker (and any indentation on the list marker). (The laziness rule, #5, then allows continuation lines to be unindented if needed.) This rule is superior, we claim, to any rule requiring a fixed level of indentation from the margin. The four-space rule is clear but unnatural. It is quite unintuitive that ``` markdown - foo bar - baz ``` should be parsed as two lists with an intervening paragraph, ``` html
  • foo

bar

  • baz
``` as the four-space rule demands, rather than a single list, ``` html
  • foo

    bar

    • baz
``` The choice of four spaces is arbitrary. It can be learned, but it is not likely to be guessed, and it trips up beginners regularly. Would it help to adopt a two-space rule? The problem is that such a rule, together with the rule allowing 1--3 spaces indentation of the initial list marker, allows text that is indented *less than* the original list marker to be included in the list item. For example, `Markdown.pl` parses ``` markdown - one two ``` as a single list item, with `two` a continuation paragraph: ``` html
  • one

    two

``` and similarly ``` markdown > - one > > two ``` as ``` html
  • one

    two

``` This is extremely unintuitive. Rather than requiring a fixed indent from the margin, we could require a fixed indent (say, two spaces, or even one space) from the list marker (which may itself be indented). This proposal would remove the last anomaly discussed. Unlike the spec presented above, it would count the following as a list item with a subparagraph, even though the paragraph `bar` is not indented as far as the first paragraph `foo`: ``` markdown 10. foo bar ``` Arguably this text does read like a list item with `bar` as a subparagraph, which may count in favor of the proposal. However, on this proposal indented code would have to be indented six spaces after the list marker. And this would break a lot of existing Markdown, which has the pattern: ``` markdown 1. foo indented code ``` where the code is indented eight spaces. The spec above, by contrast, will parse this text as expected, since the code block's indentation is measured from the beginning of `foo`. The one case that needs special treatment is a list item that *starts* with indented code. How much indentation is required in that case, since we don't have a "first paragraph" to measure from? Rule #2 simply stipulates that in such cases, we require one space indentation from the list marker (and then the normal four spaces for the indented code). This will match the four-space rule in cases where the list marker plus its initial indentation takes four spaces (a common case), but diverge in other cases.
## Task list items (extension) GFM enables the `tasklist` extension, where an additional processing step is performed on [list items]. A [task list item](@) is a [list item][list items] where the first block in it is a paragraph which begins with a [task list item marker] and at least one whitespace character before any other content. A [task list item marker](@) consists of an optional number of spaces, a left bracket (`[`), either a whitespace character or the letter `x` in either lowercase or uppercase, and then a right bracket (`]`). When rendered, the [task list item marker] is replaced with a semantic checkbox element; in an HTML output, this would be an `` element. If the character between the brackets is a whitespace character, the checkbox is unchecked. Otherwise, the checkbox is checked. This spec does not define how the checkbox elements are interacted with: in practice, implementors are free to render the checkboxes as disabled or inmutable elements, or they may dynamically handle dynamic interactions (i.e. checking, unchecking) in the final rendered document. ```````````````````````````````` example disabled - [ ] foo - [x] bar .
  • foo
  • bar
```````````````````````````````` Task lists can be arbitrarily nested: ```````````````````````````````` example disabled - [x] foo - [ ] bar - [x] baz - [ ] bim .
  • foo
    • bar
    • baz
  • bim
````````````````````````````````
## Lists A [list](@) is a sequence of one or more list items [of the same type]. The list items may be separated by any number of blank lines. Two list items are [of the same type](@) if they begin with a [list marker] of the same type. Two list markers are of the same type if (a) they are bullet list markers using the same character (`-`, `+`, or `*`) or (b) they are ordered list numbers with the same delimiter (either `.` or `)`). A list is an [ordered list](@) if its constituent list items begin with [ordered list markers], and a [bullet list](@) if its constituent list items begin with [bullet list markers]. The [start number](@) of an [ordered list] is determined by the list number of its initial list item. The numbers of subsequent list items are disregarded. A list is [loose](@) if any of its constituent list items are separated by blank lines, or if any of its constituent list items directly contain two block-level elements with a blank line between them. Otherwise a list is [tight](@). (The difference in HTML output is that paragraphs in a loose list are wrapped in `

` tags, while paragraphs in a tight list are not.) Changing the bullet or ordered list delimiter starts a new list: ```````````````````````````````` example - foo - bar + baz .

  • foo
  • bar
  • baz
```````````````````````````````` ```````````````````````````````` example 1. foo 2. bar 3) baz .
  1. foo
  2. bar
  1. baz
```````````````````````````````` In CommonMark, a list can interrupt a paragraph. That is, no blank line is needed to separate a paragraph from a following list: ```````````````````````````````` example Foo - bar - baz .

Foo

  • bar
  • baz
```````````````````````````````` `Markdown.pl` does not allow this, through fear of triggering a list via a numeral in a hard-wrapped line: ``` markdown The number of windows in my house is 14. The number of doors is 6. ``` Oddly, though, `Markdown.pl` *does* allow a blockquote to interrupt a paragraph, even though the same considerations might apply. In CommonMark, we do allow lists to interrupt paragraphs, for two reasons. First, it is natural and not uncommon for people to start lists without blank lines: ``` markdown I need to buy - new shoes - a coat - a plane ticket ``` Second, we are attracted to a > [principle of uniformity](@): > if a chunk of text has a certain > meaning, it will continue to have the same meaning when put into a > container block (such as a list item or blockquote). (Indeed, the spec for [list items] and [block quotes] presupposes this principle.) This principle implies that if ``` markdown * I need to buy - new shoes - a coat - a plane ticket ``` is a list item containing a paragraph followed by a nested sublist, as all Markdown implementations agree it is (though the paragraph may be rendered without `

` tags, since the list is "tight"), then ``` markdown I need to buy - new shoes - a coat - a plane ticket ``` by itself should be a paragraph followed by a nested sublist. Since it is well established Markdown practice to allow lists to interrupt paragraphs inside list items, the [principle of uniformity] requires us to allow this outside list items as well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) takes a different approach, requiring blank lines before lists even inside other list items.) In order to solve of unwanted lists in paragraphs with hard-wrapped numerals, we allow only lists starting with `1` to interrupt paragraphs. Thus, ```````````````````````````````` example The number of windows in my house is 14. The number of doors is 6. .

The number of windows in my house is 14. The number of doors is 6.

```````````````````````````````` We may still get an unintended result in cases like ```````````````````````````````` example The number of windows in my house is 1. The number of doors is 6. .

The number of windows in my house is

  1. The number of doors is 6.
```````````````````````````````` but this rule should prevent most spurious list captures. There can be any number of blank lines between items: ```````````````````````````````` example - foo - bar - baz .
  • foo

  • bar

  • baz

```````````````````````````````` ```````````````````````````````` example - foo - bar - baz bim .
  • foo
    • bar
      • baz

        bim

```````````````````````````````` To separate consecutive lists of the same type, or to separate a list from an indented code block that would otherwise be parsed as a subparagraph of the final list item, you can insert a blank HTML comment: ```````````````````````````````` example - foo - bar - baz - bim .
  • foo
  • bar
  • baz
  • bim
```````````````````````````````` ```````````````````````````````` example - foo notcode - foo code .
  • foo

    notcode

  • foo

code
```````````````````````````````` List items need not be indented to the same level. The following list items will be treated as items at the same list level, since none is indented enough to belong to the previous list item: ```````````````````````````````` example - a - b - c - d - e - f - g .
  • a
  • b
  • c
  • d
  • e
  • f
  • g
```````````````````````````````` ```````````````````````````````` example 1. a 2. b 3. c .
  1. a

  2. b

  3. c

```````````````````````````````` Note, however, that list items may not be indented more than three spaces. Here `- e` is treated as a paragraph continuation line, because it is indented more than three spaces: ```````````````````````````````` example - a - b - c - d - e .
  • a
  • b
  • c
  • d - e
```````````````````````````````` And here, `3. c` is treated as in indented code block, because it is indented four spaces and preceded by a blank line. ```````````````````````````````` example 1. a 2. b 3. c .
  1. a

  2. b

3. c
```````````````````````````````` This is a loose list, because there is a blank line between two of the list items: ```````````````````````````````` example - a - b - c .
  • a

  • b

  • c

```````````````````````````````` So is this, with a empty second item: ```````````````````````````````` example * a * * c .
  • a

  • c

```````````````````````````````` These are loose lists, even though there is no space between the items, because one of the items directly contains two block-level elements with a blank line between them: ```````````````````````````````` example - a - b c - d .
  • a

  • b

    c

  • d

```````````````````````````````` ```````````````````````````````` example - a - b [ref]: /url - d .
  • a

  • b

  • d

```````````````````````````````` This is a tight list, because the blank lines are in a code block: ```````````````````````````````` example - a - ``` b ``` - c .
  • a
  • b
    
    
    
  • c
```````````````````````````````` This is a tight list, because the blank line is between two paragraphs of a sublist. So the sublist is loose while the outer list is tight: ```````````````````````````````` example - a - b c - d .
  • a
    • b

      c

  • d
```````````````````````````````` This is a tight list, because the blank line is inside the block quote: ```````````````````````````````` example * a > b > * c .
  • a

    b

  • c
```````````````````````````````` This list is tight, because the consecutive block elements are not separated by blank lines: ```````````````````````````````` example - a > b ``` c ``` - d .
  • a

    b

    c
    
  • d
```````````````````````````````` A single-paragraph list is tight: ```````````````````````````````` example - a .
  • a
```````````````````````````````` ```````````````````````````````` example - a - b .
  • a
    • b
```````````````````````````````` This list is loose, because of the blank line between the two block elements in the list item: ```````````````````````````````` example 1. ``` foo ``` bar .
  1. foo
    

    bar

```````````````````````````````` Here the outer list is loose, the inner list tight: ```````````````````````````````` example * foo * bar baz .
  • foo

    • bar

    baz

```````````````````````````````` ```````````````````````````````` example - a - b - c - d - e - f .
  • a

    • b
    • c
  • d

    • e
    • f
```````````````````````````````` # Inlines Inlines are parsed sequentially from the beginning of the character stream to the end (left to right, in left-to-right languages). Thus, for example, in ```````````````````````````````` example `hi`lo` .

hilo`

```````````````````````````````` `hi` is parsed as code, leaving the backtick at the end as a literal backtick. ## Backslash escapes Any ASCII punctuation character may be backslash-escaped: ```````````````````````````````` example \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ .

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

```````````````````````````````` Backslashes before other characters are treated as literal backslashes: ```````````````````````````````` example \→\A\a\ \3\φ\« .

\→\A\a\ \3\φ\«

```````````````````````````````` Escaped characters are treated as regular characters and do not have their usual Markdown meanings: ```````````````````````````````` example \*not emphasized* \
not a tag \[not a link](/foo) \`not code` 1\. not a list \* not a list \# not a heading \[foo]: /url "not a reference" \ö not a character entity .

*not emphasized* <br/> not a tag [not a link](/foo) `not code` 1. not a list * not a list # not a heading [foo]: /url "not a reference" &ouml; not a character entity

```````````````````````````````` If a backslash is itself escaped, the following character is not: ```````````````````````````````` example \\*emphasis* .

\emphasis

```````````````````````````````` A backslash at the end of the line is a [hard line break]: ```````````````````````````````` example foo\ bar .

foo
bar

```````````````````````````````` Backslash escapes do not work in code blocks, code spans, autolinks, or raw HTML: ```````````````````````````````` example `` \[\` `` .

\[\`

```````````````````````````````` ```````````````````````````````` example \[\] .
\[\]
```````````````````````````````` ```````````````````````````````` example ~~~ \[\] ~~~ .
\[\]
```````````````````````````````` ```````````````````````````````` example .

http://example.com?find=\*

```````````````````````````````` ```````````````````````````````` example . ```````````````````````````````` But they work in all other contexts, including URLs and link titles, link references, and [info strings] in [fenced code blocks]: ```````````````````````````````` example [foo](/bar\* "ti\*tle") .

foo

```````````````````````````````` ```````````````````````````````` example [foo] [foo]: /bar\* "ti\*tle" .

foo

```````````````````````````````` ```````````````````````````````` example ``` foo\+bar foo ``` .
foo
```````````````````````````````` ## Entity and numeric character references Valid HTML entity references and numeric character references can be used in place of the corresponding Unicode character, with the following exceptions: - Entity and character references are not recognized in code blocks and code spans. - Entity and character references cannot stand in place of special characters that define structural elements in CommonMark. For example, although `*` can be used in place of a literal `*` character, `*` cannot replace `*` in emphasis delimiters, bullet list markers, or thematic breaks. Conforming CommonMark parsers need not store information about whether a particular character was represented in the source using a Unicode character or an entity reference. [Entity references](@) consist of `&` + any of the valid HTML5 entity names + `;`. The document is used as an authoritative source for the valid entity references and their corresponding code points. ```````````````````````````````` example   & © Æ Ď ¾ ℋ ⅆ ∲ ≧̸ .

  & © Æ Ď ¾ ℋ ⅆ ∲ ≧̸

```````````````````````````````` [Decimal numeric character references](@) consist of `&#` + a string of 1--7 arabic digits + `;`. A numeric character reference is parsed as the corresponding Unicode character. Invalid Unicode code points will be replaced by the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons, the code point `U+0000` will also be replaced by `U+FFFD`. ```````````````````````````````` example # Ӓ Ϡ � .

# Ӓ Ϡ �

```````````````````````````````` [Hexadecimal numeric character references](@) consist of `&#` + either `X` or `x` + a string of 1-6 hexadecimal digits + `;`. They too are parsed as the corresponding Unicode character (this time specified with a hexadecimal numeral instead of decimal). ```````````````````````````````` example " ആ ಫ .

" ആ ಫ

```````````````````````````````` Here are some nonentities: ```````````````````````````````` example   &x; &#; &#x; � &#abcdef0; &ThisIsNotDefined; &hi?; .

&nbsp &x; &#; &#x; &#987654321; &#abcdef0; &ThisIsNotDefined; &hi?;

```````````````````````````````` Although HTML5 does accept some entity references without a trailing semicolon (such as `©`), these are not recognized here, because it makes the grammar too ambiguous: ```````````````````````````````` example © .

&copy

```````````````````````````````` Strings that are not on the list of HTML5 named entities are not recognized as entity references either: ```````````````````````````````` example &MadeUpEntity; .

&MadeUpEntity;

```````````````````````````````` Entity and numeric character references are recognized in any context besides code spans or code blocks, including URLs, [link titles], and [fenced code block][] [info strings]: ```````````````````````````````` example . ```````````````````````````````` ```````````````````````````````` example [foo](/föö "föö") .

foo

```````````````````````````````` ```````````````````````````````` example [foo] [foo]: /föö "föö" .

foo

```````````````````````````````` ```````````````````````````````` example ``` föö foo ``` .
foo
```````````````````````````````` Entity and numeric character references are treated as literal text in code spans and code blocks: ```````````````````````````````` example `föö` .

f&ouml;&ouml;

```````````````````````````````` ```````````````````````````````` example föfö .
f&ouml;f&ouml;
```````````````````````````````` Entity and numeric character references cannot be used in place of symbols indicating structure in CommonMark documents. ```````````````````````````````` example *foo* *foo* .

*foo* foo

```````````````````````````````` ```````````````````````````````` example * foo * foo .

* foo

  • foo
```````````````````````````````` ```````````````````````````````` example foo bar .

foo bar

```````````````````````````````` ```````````````````````````````` example foo .

→foo

```````````````````````````````` ```````````````````````````````` example [a](url "tit") .

[a](url "tit")

```````````````````````````````` ## Code spans A [backtick string](@) is a string of one or more backtick characters (`` ` ``) that is neither preceded nor followed by a backtick. A [code span](@) begins with a backtick string and ends with a backtick string of equal length. The contents of the code span are the characters between the two backtick strings, normalized in the following ways: - First, [line endings] are converted to [spaces]. - If the resulting string both begins *and* ends with a [space] character, but does not consist entirely of [space] characters, a single [space] character is removed from the front and back. This allows you to include code that begins or ends with backtick characters, which must be separated by whitespace from the opening or closing backtick strings. This is a simple code span: ```````````````````````````````` example `foo` .

foo

```````````````````````````````` Here two backticks are used, because the code contains a backtick. This example also illustrates stripping of a single leading and trailing space: ```````````````````````````````` example `` foo ` bar `` .

foo ` bar

```````````````````````````````` This example shows the motivation for stripping leading and trailing spaces: ```````````````````````````````` example ` `` ` .

``

```````````````````````````````` Note that only *one* space is stripped: ```````````````````````````````` example ` `` ` .

``

```````````````````````````````` The stripping only happens if the space is on both sides of the string: ```````````````````````````````` example ` a` .

a

```````````````````````````````` Only [spaces], and not [unicode whitespace] in general, are stripped in this way: ```````````````````````````````` example ` b ` .

 b 

```````````````````````````````` No stripping occurs if the code span contains only spaces: ```````````````````````````````` example ` ` ` ` .

 

```````````````````````````````` [Line endings] are treated like spaces: ```````````````````````````````` example `` foo bar baz `` .

foo bar baz

```````````````````````````````` ```````````````````````````````` example `` foo `` .

foo

```````````````````````````````` Interior spaces are not collapsed: ```````````````````````````````` example `foo bar baz` .

foo bar baz

```````````````````````````````` Note that browsers will typically collapse consecutive spaces when rendering `` elements, so it is recommended that the following CSS be used: code{white-space: pre-wrap;} Note that backslash escapes do not work in code spans. All backslashes are treated literally: ```````````````````````````````` example `foo\`bar` .

foo\bar`

```````````````````````````````` Backslash escapes are never needed, because one can always choose a string of *n* backtick characters as delimiters, where the code does not contain any strings of exactly *n* backtick characters. ```````````````````````````````` example ``foo`bar`` .

foo`bar

```````````````````````````````` ```````````````````````````````` example ` foo `` bar ` .

foo `` bar

```````````````````````````````` Code span backticks have higher precedence than any other inline constructs except HTML tags and autolinks. Thus, for example, this is not parsed as emphasized text, since the second `*` is part of a code span: ```````````````````````````````` example *foo`*` .

*foo*

```````````````````````````````` And this is not parsed as a link: ```````````````````````````````` example [not a `link](/foo`) .

[not a link](/foo)

```````````````````````````````` Code spans, HTML tags, and autolinks have the same precedence. Thus, this is code: ```````````````````````````````` example `` .

<a href="">`

```````````````````````````````` But this is an HTML tag: ```````````````````````````````` example
` .

`

```````````````````````````````` And this is code: ```````````````````````````````` example `` .

<http://foo.bar.baz>`

```````````````````````````````` But this is an autolink: ```````````````````````````````` example ` .

http://foo.bar.`baz`

```````````````````````````````` When a backtick string is not closed by a matching backtick string, we just have literal backticks: ```````````````````````````````` example ```foo`` .

```foo``

```````````````````````````````` ```````````````````````````````` example `foo .

`foo

```````````````````````````````` The following case also illustrates the need for opening and closing backtick strings to be equal in length: ```````````````````````````````` example `foo``bar`` .

`foobar

```````````````````````````````` ## Emphasis and strong emphasis John Gruber's original [Markdown syntax description](http://daringfireball.net/projects/markdown/syntax#em) says: > Markdown treats asterisks (`*`) and underscores (`_`) as indicators of > emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML > `` tag; double `*`'s or `_`'s will be wrapped with an HTML `` > tag. This is enough for most users, but these rules leave much undecided, especially when it comes to nested emphasis. The original `Markdown.pl` test suite makes it clear that triple `***` and `___` delimiters can be used for strong emphasis, and most implementations have also allowed the following patterns: ``` markdown ***strong emph*** ***strong** in emph* ***emph* in strong** **in strong *emph*** *in emph **strong*** ``` The following patterns are less widely supported, but the intent is clear and they are useful (especially in contexts like bibliography entries): ``` markdown *emph *with emph* in it* **strong **with strong** in it** ``` Many implementations have also restricted intraword emphasis to the `*` forms, to avoid unwanted emphasis in words containing internal underscores. (It is best practice to put these in code spans, but users often do not.) ``` markdown internal emphasis: foo*bar*baz no emphasis: foo_bar_baz ``` The rules given below capture all of these patterns, while allowing for efficient parsing strategies that do not backtrack. First, some definitions. A [delimiter run](@) is either a sequence of one or more `*` characters that is not preceded or followed by a non-backslash-escaped `*` character, or a sequence of one or more `_` characters that is not preceded or followed by a non-backslash-escaped `_` character. A [left-flanking delimiter run](@) is a [delimiter run] that is (1) not followed by [Unicode whitespace], and either (2a) not followed by a [punctuation character], or (2b) followed by a [punctuation character] and preceded by [Unicode whitespace] or a [punctuation character]. For purposes of this definition, the beginning and the end of the line count as Unicode whitespace. A [right-flanking delimiter run](@) is a [delimiter run] that is (1) not preceded by [Unicode whitespace], and either (2a) not preceded by a [punctuation character], or (2b) preceded by a [punctuation character] and followed by [Unicode whitespace] or a [punctuation character]. For purposes of this definition, the beginning and the end of the line count as Unicode whitespace. Here are some examples of delimiter runs. - left-flanking but not right-flanking: ``` ***abc _abc **"abc" _"abc" ``` - right-flanking but not left-flanking: ``` abc*** abc_ "abc"** "abc"_ ``` - Both left and right-flanking: ``` abc***def "abc"_"def" ``` - Neither left nor right-flanking: ``` abc *** def a _ b ``` (The idea of distinguishing left-flanking and right-flanking delimiter runs based on the character before and the character after comes from Roopesh Chander's [vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). vfmd uses the terminology "emphasis indicator string" instead of "delimiter run," and its rules for distinguishing left- and right-flanking runs are a bit more complex than the ones given here.) The following rules define emphasis and strong emphasis: 1. A single `*` character [can open emphasis](@) iff (if and only if) it is part of a [left-flanking delimiter run]. 2. A single `_` character [can open emphasis] iff it is part of a [left-flanking delimiter run] and either (a) not part of a [right-flanking delimiter run] or (b) part of a [right-flanking delimiter run] preceded by punctuation. 3. A single `*` character [can close emphasis](@) iff it is part of a [right-flanking delimiter run]. 4. A single `_` character [can close emphasis] iff it is part of a [right-flanking delimiter run] and either (a) not part of a [left-flanking delimiter run] or (b) part of a [left-flanking delimiter run] followed by punctuation. 5. A double `**` [can open strong emphasis](@) iff it is part of a [left-flanking delimiter run]. 6. A double `__` [can open strong emphasis] iff it is part of a [left-flanking delimiter run] and either (a) not part of a [right-flanking delimiter run] or (b) part of a [right-flanking delimiter run] preceded by punctuation. 7. A double `**` [can close strong emphasis](@) iff it is part of a [right-flanking delimiter run]. 8. A double `__` [can close strong emphasis] iff it is part of a [right-flanking delimiter run] and either (a) not part of a [left-flanking delimiter run] or (b) part of a [left-flanking delimiter run] followed by punctuation. 9. Emphasis begins with a delimiter that [can open emphasis] and ends with a delimiter that [can close emphasis], and that uses the same character (`_` or `*`) as the opening delimiter. The opening and closing delimiters must belong to separate [delimiter runs]. If one of the delimiters can both open and close emphasis, then the sum of the lengths of the delimiter runs containing the opening and closing delimiters must not be a multiple of 3 unless both lengths are multiples of 3. 10. Strong emphasis begins with a delimiter that [can open strong emphasis] and ends with a delimiter that [can close strong emphasis], and that uses the same character (`_` or `*`) as the opening delimiter. The opening and closing delimiters must belong to separate [delimiter runs]. If one of the delimiters can both open and close strong emphasis, then the sum of the lengths of the delimiter runs containing the opening and closing delimiters must not be a multiple of 3 unless both lengths are multiples of 3. 11. A literal `*` character cannot occur at the beginning or end of `*`-delimited emphasis or `**`-delimited strong emphasis, unless it is backslash-escaped. 12. A literal `_` character cannot occur at the beginning or end of `_`-delimited emphasis or `__`-delimited strong emphasis, unless it is backslash-escaped. Where rules 1--12 above are compatible with multiple parsings, the following principles resolve ambiguity: 13. The number of nestings should be minimized. Thus, for example, an interpretation `...` is always preferred to `...`. 14. An interpretation `...` is always preferred to `...`. 15. When two potential emphasis or strong emphasis spans overlap, so that the second begins before the first ends and ends after the first ends, the first takes precedence. Thus, for example, `*foo _bar* baz_` is parsed as `foo _bar baz_` rather than `*foo bar* baz`. 16. When there are two potential emphasis or strong emphasis spans with the same closing delimiter, the shorter one (the one that opens later) takes precedence. Thus, for example, `**foo **bar baz**` is parsed as `**foo bar baz` rather than `foo **bar baz`. 17. Inline code spans, links, images, and HTML tags group more tightly than emphasis. So, when there is a choice between an interpretation that contains one of these elements and one that does not, the former always wins. Thus, for example, `*[foo*](bar)` is parsed as `*foo*` rather than as `[foo](bar)`. These rules can be illustrated through a series of examples. Rule 1: ```````````````````````````````` example *foo bar* .

foo bar

```````````````````````````````` This is not emphasis, because the opening `*` is followed by whitespace, and hence not part of a [left-flanking delimiter run]: ```````````````````````````````` example a * foo bar* .

a * foo bar*

```````````````````````````````` This is not emphasis, because the opening `*` is preceded by an alphanumeric and followed by punctuation, and hence not part of a [left-flanking delimiter run]: ```````````````````````````````` example a*"foo"* .

a*"foo"*

```````````````````````````````` Unicode nonbreaking spaces count as whitespace, too: ```````````````````````````````` example * a * .

* a *

```````````````````````````````` Intraword emphasis with `*` is permitted: ```````````````````````````````` example foo*bar* .

foobar

```````````````````````````````` ```````````````````````````````` example 5*6*78 .

5678

```````````````````````````````` Rule 2: ```````````````````````````````` example _foo bar_ .

foo bar

```````````````````````````````` This is not emphasis, because the opening `_` is followed by whitespace: ```````````````````````````````` example _ foo bar_ .

_ foo bar_

```````````````````````````````` This is not emphasis, because the opening `_` is preceded by an alphanumeric and followed by punctuation: ```````````````````````````````` example a_"foo"_ .

a_"foo"_

```````````````````````````````` Emphasis with `_` is not allowed inside words: ```````````````````````````````` example foo_bar_ .

foo_bar_

```````````````````````````````` ```````````````````````````````` example 5_6_78 .

5_6_78

```````````````````````````````` ```````````````````````````````` example пристаням_стремятся_ .

пристаням_стремятся_

```````````````````````````````` Here `_` does not generate emphasis, because the first delimiter run is right-flanking and the second left-flanking: ```````````````````````````````` example aa_"bb"_cc .

aa_"bb"_cc

```````````````````````````````` This is emphasis, even though the opening delimiter is both left- and right-flanking, because it is preceded by punctuation: ```````````````````````````````` example foo-_(bar)_ .

foo-(bar)

```````````````````````````````` Rule 3: This is not emphasis, because the closing delimiter does not match the opening delimiter: ```````````````````````````````` example _foo* .

_foo*

```````````````````````````````` This is not emphasis, because the closing `*` is preceded by whitespace: ```````````````````````````````` example *foo bar * .

*foo bar *

```````````````````````````````` A newline also counts as whitespace: ```````````````````````````````` example *foo bar * .

*foo bar *

```````````````````````````````` This is not emphasis, because the second `*` is preceded by punctuation and followed by an alphanumeric (hence it is not part of a [right-flanking delimiter run]: ```````````````````````````````` example *(*foo) .

*(*foo)

```````````````````````````````` The point of this restriction is more easily appreciated with this example: ```````````````````````````````` example *(*foo*)* .

(foo)

```````````````````````````````` Intraword emphasis with `*` is allowed: ```````````````````````````````` example *foo*bar .

foobar

```````````````````````````````` Rule 4: This is not emphasis, because the closing `_` is preceded by whitespace: ```````````````````````````````` example _foo bar _ .

_foo bar _

```````````````````````````````` This is not emphasis, because the second `_` is preceded by punctuation and followed by an alphanumeric: ```````````````````````````````` example _(_foo) .

_(_foo)

```````````````````````````````` This is emphasis within emphasis: ```````````````````````````````` example _(_foo_)_ .

(foo)

```````````````````````````````` Intraword emphasis is disallowed for `_`: ```````````````````````````````` example _foo_bar .

_foo_bar

```````````````````````````````` ```````````````````````````````` example _пристаням_стремятся .

_пристаням_стремятся

```````````````````````````````` ```````````````````````````````` example _foo_bar_baz_ .

foo_bar_baz

```````````````````````````````` This is emphasis, even though the closing delimiter is both left- and right-flanking, because it is followed by punctuation: ```````````````````````````````` example _(bar)_. .

(bar).

```````````````````````````````` Rule 5: ```````````````````````````````` example **foo bar** .

foo bar

```````````````````````````````` This is not strong emphasis, because the opening delimiter is followed by whitespace: ```````````````````````````````` example ** foo bar** .

** foo bar**

```````````````````````````````` This is not strong emphasis, because the opening `**` is preceded by an alphanumeric and followed by punctuation, and hence not part of a [left-flanking delimiter run]: ```````````````````````````````` example a**"foo"** .

a**"foo"**

```````````````````````````````` Intraword strong emphasis with `**` is permitted: ```````````````````````````````` example foo**bar** .

foobar

```````````````````````````````` Rule 6: ```````````````````````````````` example __foo bar__ .

foo bar

```````````````````````````````` This is not strong emphasis, because the opening delimiter is followed by whitespace: ```````````````````````````````` example __ foo bar__ .

__ foo bar__

```````````````````````````````` A newline counts as whitespace: ```````````````````````````````` example __ foo bar__ .

__ foo bar__

```````````````````````````````` This is not strong emphasis, because the opening `__` is preceded by an alphanumeric and followed by punctuation: ```````````````````````````````` example a__"foo"__ .

a__"foo"__

```````````````````````````````` Intraword strong emphasis is forbidden with `__`: ```````````````````````````````` example foo__bar__ .

foo__bar__

```````````````````````````````` ```````````````````````````````` example 5__6__78 .

5__6__78

```````````````````````````````` ```````````````````````````````` example пристаням__стремятся__ .

пристаням__стремятся__

```````````````````````````````` ```````````````````````````````` example __foo, __bar__, baz__ .

foo, bar, baz

```````````````````````````````` This is strong emphasis, even though the opening delimiter is both left- and right-flanking, because it is preceded by punctuation: ```````````````````````````````` example foo-__(bar)__ .

foo-(bar)

```````````````````````````````` Rule 7: This is not strong emphasis, because the closing delimiter is preceded by whitespace: ```````````````````````````````` example **foo bar ** .

**foo bar **

```````````````````````````````` (Nor can it be interpreted as an emphasized `*foo bar *`, because of Rule 11.) This is not strong emphasis, because the second `**` is preceded by punctuation and followed by an alphanumeric: ```````````````````````````````` example **(**foo) .

**(**foo)

```````````````````````````````` The point of this restriction is more easily appreciated with these examples: ```````````````````````````````` example *(**foo**)* .

(foo)

```````````````````````````````` ```````````````````````````````` example **Gomphocarpus (*Gomphocarpus physocarpus*, syn. *Asclepias physocarpa*)** .

Gomphocarpus (Gomphocarpus physocarpus, syn. Asclepias physocarpa)

```````````````````````````````` ```````````````````````````````` example **foo "*bar*" foo** .

foo "bar" foo

```````````````````````````````` Intraword emphasis: ```````````````````````````````` example **foo**bar .

foobar

```````````````````````````````` Rule 8: This is not strong emphasis, because the closing delimiter is preceded by whitespace: ```````````````````````````````` example __foo bar __ .

__foo bar __

```````````````````````````````` This is not strong emphasis, because the second `__` is preceded by punctuation and followed by an alphanumeric: ```````````````````````````````` example __(__foo) .

__(__foo)

```````````````````````````````` The point of this restriction is more easily appreciated with this example: ```````````````````````````````` example _(__foo__)_ .

(foo)

```````````````````````````````` Intraword strong emphasis is forbidden with `__`: ```````````````````````````````` example __foo__bar .

__foo__bar

```````````````````````````````` ```````````````````````````````` example __пристаням__стремятся .

__пристаням__стремятся

```````````````````````````````` ```````````````````````````````` example __foo__bar__baz__ .

foo__bar__baz

```````````````````````````````` This is strong emphasis, even though the closing delimiter is both left- and right-flanking, because it is followed by punctuation: ```````````````````````````````` example __(bar)__. .

(bar).

```````````````````````````````` Rule 9: Any nonempty sequence of inline elements can be the contents of an emphasized span. ```````````````````````````````` example *foo [bar](/url)* .

foo bar

```````````````````````````````` ```````````````````````````````` example *foo bar* .

foo bar

```````````````````````````````` In particular, emphasis and strong emphasis can be nested inside emphasis: ```````````````````````````````` example _foo __bar__ baz_ .

foo bar baz

```````````````````````````````` ```````````````````````````````` example _foo _bar_ baz_ .

foo bar baz

```````````````````````````````` ```````````````````````````````` example __foo_ bar_ .

foo bar

```````````````````````````````` ```````````````````````````````` example *foo *bar** .

foo bar

```````````````````````````````` ```````````````````````````````` example *foo **bar** baz* .

foo bar baz

```````````````````````````````` ```````````````````````````````` example *foo**bar**baz* .

foobarbaz

```````````````````````````````` Note that in the preceding case, the interpretation ``` markdown

foobarbaz

``` is precluded by the condition that a delimiter that can both open and close (like the `*` after `foo`) cannot form emphasis if the sum of the lengths of the delimiter runs containing the opening and closing delimiters is a multiple of 3 unless both lengths are multiples of 3. For the same reason, we don't get two consecutive emphasis sections in this example: ```````````````````````````````` example *foo**bar* .

foo**bar

```````````````````````````````` The same condition ensures that the following cases are all strong emphasis nested inside emphasis, even when the interior spaces are omitted: ```````````````````````````````` example ***foo** bar* .

foo bar

```````````````````````````````` ```````````````````````````````` example *foo **bar*** .

foo bar

```````````````````````````````` ```````````````````````````````` example *foo**bar*** .

foobar

```````````````````````````````` When the lengths of the interior closing and opening delimiter runs are *both* multiples of 3, though, they can match to create emphasis: ```````````````````````````````` example foo***bar***baz .

foobarbaz

```````````````````````````````` ```````````````````````````````` example foo******bar*********baz .

foobar***baz

```````````````````````````````` Indefinite levels of nesting are possible: ```````````````````````````````` example *foo **bar *baz* bim** bop* .

foo bar baz bim bop

```````````````````````````````` ```````````````````````````````` example *foo [*bar*](/url)* .

foo bar

```````````````````````````````` There can be no empty emphasis or strong emphasis: ```````````````````````````````` example ** is not an empty emphasis .

** is not an empty emphasis

```````````````````````````````` ```````````````````````````````` example **** is not an empty strong emphasis .

**** is not an empty strong emphasis

```````````````````````````````` Rule 10: Any nonempty sequence of inline elements can be the contents of an strongly emphasized span. ```````````````````````````````` example **foo [bar](/url)** .

foo bar

```````````````````````````````` ```````````````````````````````` example **foo bar** .

foo bar

```````````````````````````````` In particular, emphasis and strong emphasis can be nested inside strong emphasis: ```````````````````````````````` example __foo _bar_ baz__ .

foo bar baz

```````````````````````````````` ```````````````````````````````` example __foo __bar__ baz__ .

foo bar baz

```````````````````````````````` ```````````````````````````````` example ____foo__ bar__ .

foo bar

```````````````````````````````` ```````````````````````````````` example **foo **bar**** .

foo bar

```````````````````````````````` ```````````````````````````````` example **foo *bar* baz** .

foo bar baz

```````````````````````````````` ```````````````````````````````` example **foo*bar*baz** .

foobarbaz

```````````````````````````````` ```````````````````````````````` example ***foo* bar** .

foo bar

```````````````````````````````` ```````````````````````````````` example **foo *bar*** .

foo bar

```````````````````````````````` Indefinite levels of nesting are possible: ```````````````````````````````` example **foo *bar **baz** bim* bop** .

foo bar baz bim bop

```````````````````````````````` ```````````````````````````````` example **foo [*bar*](/url)** .

foo bar

```````````````````````````````` There can be no empty emphasis or strong emphasis: ```````````````````````````````` example __ is not an empty emphasis .

__ is not an empty emphasis

```````````````````````````````` ```````````````````````````````` example ____ is not an empty strong emphasis .

____ is not an empty strong emphasis

```````````````````````````````` Rule 11: ```````````````````````````````` example foo *** .

foo ***

```````````````````````````````` ```````````````````````````````` example foo *\** .

foo *

```````````````````````````````` ```````````````````````````````` example foo *_* .

foo _

```````````````````````````````` ```````````````````````````````` example foo ***** .

foo *****

```````````````````````````````` ```````````````````````````````` example foo **\*** .

foo *

```````````````````````````````` ```````````````````````````````` example foo **_** .

foo _

```````````````````````````````` Note that when delimiters do not match evenly, Rule 11 determines that the excess literal `*` characters will appear outside of the emphasis, rather than inside it: ```````````````````````````````` example **foo* .

*foo

```````````````````````````````` ```````````````````````````````` example *foo** .

foo*

```````````````````````````````` ```````````````````````````````` example ***foo** .

*foo

```````````````````````````````` ```````````````````````````````` example ****foo* .

***foo

```````````````````````````````` ```````````````````````````````` example **foo*** .

foo*

```````````````````````````````` ```````````````````````````````` example *foo**** .

foo***

```````````````````````````````` Rule 12: ```````````````````````````````` example foo ___ .

foo ___

```````````````````````````````` ```````````````````````````````` example foo _\__ .

foo _

```````````````````````````````` ```````````````````````````````` example foo _*_ .

foo *

```````````````````````````````` ```````````````````````````````` example foo _____ .

foo _____

```````````````````````````````` ```````````````````````````````` example foo __\___ .

foo _

```````````````````````````````` ```````````````````````````````` example foo __*__ .

foo *

```````````````````````````````` ```````````````````````````````` example __foo_ .

_foo

```````````````````````````````` Note that when delimiters do not match evenly, Rule 12 determines that the excess literal `_` characters will appear outside of the emphasis, rather than inside it: ```````````````````````````````` example _foo__ .

foo_

```````````````````````````````` ```````````````````````````````` example ___foo__ .

_foo

```````````````````````````````` ```````````````````````````````` example ____foo_ .

___foo

```````````````````````````````` ```````````````````````````````` example __foo___ .

foo_

```````````````````````````````` ```````````````````````````````` example _foo____ .

foo___

```````````````````````````````` Rule 13 implies that if you want emphasis nested directly inside emphasis, you must use different delimiters: ```````````````````````````````` example **foo** .

foo

```````````````````````````````` ```````````````````````````````` example *_foo_* .

foo

```````````````````````````````` ```````````````````````````````` example __foo__ .

foo

```````````````````````````````` ```````````````````````````````` example _*foo*_ .

foo

```````````````````````````````` However, strong emphasis within strong emphasis is possible without switching delimiters: ```````````````````````````````` example ****foo**** .

foo

```````````````````````````````` ```````````````````````````````` example ____foo____ .

foo

```````````````````````````````` Rule 13 can be applied to arbitrarily long sequences of delimiters: ```````````````````````````````` example ******foo****** .

foo

```````````````````````````````` Rule 14: ```````````````````````````````` example ***foo*** .

foo

```````````````````````````````` ```````````````````````````````` example _____foo_____ .

foo

```````````````````````````````` Rule 15: ```````````````````````````````` example *foo _bar* baz_ .

foo _bar baz_

```````````````````````````````` ```````````````````````````````` example *foo __bar *baz bim__ bam* .

foo bar *baz bim bam

```````````````````````````````` Rule 16: ```````````````````````````````` example **foo **bar baz** .

**foo bar baz

```````````````````````````````` ```````````````````````````````` example *foo *bar baz* .

*foo bar baz

```````````````````````````````` Rule 17: ```````````````````````````````` example *[bar*](/url) .

*bar*

```````````````````````````````` ```````````````````````````````` example _foo [bar_](/url) .

_foo bar_

```````````````````````````````` ```````````````````````````````` example * .

*

```````````````````````````````` ```````````````````````````````` example ** .

**

```````````````````````````````` ```````````````````````````````` example __ .

__

```````````````````````````````` ```````````````````````````````` example *a `*`* .

a *

```````````````````````````````` ```````````````````````````````` example _a `_`_ .

a _

```````````````````````````````` ```````````````````````````````` example **a .

**ahttp://foo.bar/?q=**

```````````````````````````````` ```````````````````````````````` example __a .

__ahttp://foo.bar/?q=__

````````````````````````````````
## Strikethrough (extension) GFM enables the `strikethrough` extension, where an additional emphasis type is available. Strikethrough text is any text wrapped in two tildes (`~`). ```````````````````````````````` example strikethrough ~~Hi~~ Hello, world! .

Hi Hello, world!

```````````````````````````````` As with regular emphasis delimiters, a new paragraph will cause strikethrough parsing to cease: ```````````````````````````````` example strikethrough This ~~has a new paragraph~~. .

This ~~has a

new paragraph~~.

````````````````````````````````
## Links A link contains [link text] (the visible text), a [link destination] (the URI that is the link destination), and optionally a [link title]. There are two basic kinds of links in Markdown. In [inline links] the destination and title are given immediately after the link text. In [reference links] the destination and title are defined elsewhere in the document. A [link text](@) consists of a sequence of zero or more inline elements enclosed by square brackets (`[` and `]`). The following rules apply: - Links may not contain other links, at any level of nesting. If multiple otherwise valid link definitions appear nested inside each other, the inner-most definition is used. - Brackets are allowed in the [link text] only if (a) they are backslash-escaped or (b) they appear as a matched pair of brackets, with an open bracket `[`, a sequence of zero or more inlines, and a close bracket `]`. - Backtick [code spans], [autolinks], and raw [HTML tags] bind more tightly than the brackets in link text. Thus, for example, `` [foo`]` `` could not be a link text, since the second `]` is part of a code span. - The brackets in link text bind more tightly than markers for [emphasis and strong emphasis]. Thus, for example, `*[foo*](url)` is a link. A [link destination](@) consists of either - a sequence of zero or more characters between an opening `<` and a closing `>` that contains no line breaks or unescaped `<` or `>` characters, or - a nonempty sequence of characters that does not start with `<`, does not include ASCII space or control characters, and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a balanced pair of unescaped parentheses. (Implementations may impose limits on parentheses nesting to avoid performance issues, but at least three levels of nesting should be supported.) A [link title](@) consists of either - a sequence of zero or more characters between straight double-quote characters (`"`), including a `"` character only if it is backslash-escaped, or - a sequence of zero or more characters between straight single-quote characters (`'`), including a `'` character only if it is backslash-escaped, or - a sequence of zero or more characters between matching parentheses (`(...)`), including a `(` or `)` character only if it is backslash-escaped. Although [link titles] may span multiple lines, they may not contain a [blank line]. An [inline link](@) consists of a [link text] followed immediately by a left parenthesis `(`, optional [whitespace], an optional [link destination], an optional [link title] separated from the link destination by [whitespace], optional [whitespace], and a right parenthesis `)`. The link's text consists of the inlines contained in the [link text] (excluding the enclosing square brackets). The link's URI consists of the link destination, excluding enclosing `<...>` if present, with backslash-escapes in effect as described above. The link's title consists of the link title, excluding its enclosing delimiters, with backslash-escapes in effect as described above. Here is a simple inline link: ```````````````````````````````` example [link](/uri "title") .

link

```````````````````````````````` The title may be omitted: ```````````````````````````````` example [link](/uri) .

link

```````````````````````````````` Both the title and the destination may be omitted: ```````````````````````````````` example [link]() .

link

```````````````````````````````` ```````````````````````````````` example [link](<>) .

link

```````````````````````````````` The destination can only contain spaces if it is enclosed in pointy brackets: ```````````````````````````````` example [link](/my uri) .

[link](/my uri)

```````````````````````````````` ```````````````````````````````` example [link](
) .

link

```````````````````````````````` The destination cannot contain line breaks, even if enclosed in pointy brackets: ```````````````````````````````` example [link](foo bar) .

[link](foo bar)

```````````````````````````````` ```````````````````````````````` example [link]() .

[link]()

```````````````````````````````` The destination can contain `)` if it is enclosed in pointy brackets: ```````````````````````````````` example [a]() .

a

```````````````````````````````` Pointy brackets that enclose links must be unescaped: ```````````````````````````````` example [link]() .

[link](<foo>)

```````````````````````````````` These are not links, because the opening pointy bracket is not matched properly: ```````````````````````````````` example [a]( [a](c) .

[a](<b)c [a](<b)c> [a](c)

```````````````````````````````` Parentheses inside the link destination may be escaped: ```````````````````````````````` example [link](\(foo\)) .

link

```````````````````````````````` Any number of parentheses are allowed without escaping, as long as they are balanced: ```````````````````````````````` example [link](foo(and(bar))) .

link

```````````````````````````````` However, if you have unbalanced parentheses, you need to escape or use the `<...>` form: ```````````````````````````````` example [link](foo\(and\(bar\)) .

link

```````````````````````````````` ```````````````````````````````` example [link]() .

link

```````````````````````````````` Parentheses and other symbols can also be escaped, as usual in Markdown: ```````````````````````````````` example [link](foo\)\:) .

link

```````````````````````````````` A link can contain fragment identifiers and queries: ```````````````````````````````` example [link](#fragment) [link](http://example.com#fragment) [link](http://example.com?foo=3#frag) .

link

link

link

```````````````````````````````` Note that a backslash before a non-escapable character is just a backslash: ```````````````````````````````` example [link](foo\bar) .

link

```````````````````````````````` URL-escaping should be left alone inside the destination, as all URL-escaped characters are also valid URL characters. Entity and numerical character references in the destination will be parsed into the corresponding Unicode code points, as usual. These may be optionally URL-escaped when written as HTML, but this spec does not enforce any particular policy for rendering URLs in HTML or other formats. Renderers may make different decisions about how to escape or normalize URLs in the output. ```````````````````````````````` example [link](foo%20bä) .

link

```````````````````````````````` Note that, because titles can often be parsed as destinations, if you try to omit the destination and keep the title, you'll get unexpected results: ```````````````````````````````` example [link]("title") .

link

```````````````````````````````` Titles may be in single quotes, double quotes, or parentheses: ```````````````````````````````` example [link](/url "title") [link](/url 'title') [link](/url (title)) .

link link link

```````````````````````````````` Backslash escapes and entity and numeric character references may be used in titles: ```````````````````````````````` example [link](/url "title \""") .

link

```````````````````````````````` Titles must be separated from the link using a [whitespace]. Other [Unicode whitespace] like non-breaking space doesn't work. ```````````````````````````````` example [link](/url "title") .

link

```````````````````````````````` Nested balanced quotes are not allowed without escaping: ```````````````````````````````` example [link](/url "title "and" title") .

[link](/url "title "and" title")

```````````````````````````````` But it is easy to work around this by using a different quote type: ```````````````````````````````` example [link](/url 'title "and" title') .

link

```````````````````````````````` (Note: `Markdown.pl` did allow double quotes inside a double-quoted title, and its test suite included a test demonstrating this. But it is hard to see a good rationale for the extra complexity this brings, since there are already many ways---backslash escaping, entity and numeric character references, or using a different quote type for the enclosing title---to write titles containing double quotes. `Markdown.pl`'s handling of titles has a number of other strange features. For example, it allows single-quoted titles in inline links, but not reference links. And, in reference links but not inline links, it allows a title to begin with `"` and end with `)`. `Markdown.pl` 1.0.1 even allows titles with no closing quotation mark, though 1.0.2b8 does not. It seems preferable to adopt a simple, rational rule that works the same way in inline links and link reference definitions.) [Whitespace] is allowed around the destination and title: ```````````````````````````````` example [link]( /uri "title" ) .

link

```````````````````````````````` But it is not allowed between the link text and the following parenthesis: ```````````````````````````````` example [link] (/uri) .

[link] (/uri)

```````````````````````````````` The link text may contain balanced brackets, but not unbalanced ones, unless they are escaped: ```````````````````````````````` example [link [foo [bar]]](/uri) .

link [foo [bar]]

```````````````````````````````` ```````````````````````````````` example [link] bar](/uri) .

[link] bar](/uri)

```````````````````````````````` ```````````````````````````````` example [link [bar](/uri) .

[link bar

```````````````````````````````` ```````````````````````````````` example [link \[bar](/uri) .

link [bar

```````````````````````````````` The link text may contain inline content: ```````````````````````````````` example [link *foo **bar** `#`*](/uri) .

link foo bar #

```````````````````````````````` ```````````````````````````````` example [![moon](moon.jpg)](/uri) .

moon

```````````````````````````````` However, links may not contain other links, at any level of nesting. ```````````````````````````````` example [foo [bar](/uri)](/uri) .

[foo bar](/uri)

```````````````````````````````` ```````````````````````````````` example [foo *[bar [baz](/uri)](/uri)*](/uri) .

[foo [bar baz](/uri)](/uri)

```````````````````````````````` ```````````````````````````````` example ![[[foo](uri1)](uri2)](uri3) .

[foo](uri2)

```````````````````````````````` These cases illustrate the precedence of link text grouping over emphasis grouping: ```````````````````````````````` example *[foo*](/uri) .

*foo*

```````````````````````````````` ```````````````````````````````` example [foo *bar](baz*) .

foo *bar

```````````````````````````````` Note that brackets that *aren't* part of links do not take precedence: ```````````````````````````````` example *foo [bar* baz] .

foo [bar baz]

```````````````````````````````` These cases illustrate the precedence of HTML tags, code spans, and autolinks over link grouping: ```````````````````````````````` example [foo .

[foo

```````````````````````````````` ```````````````````````````````` example [foo`](/uri)` .

[foo](/uri)

```````````````````````````````` ```````````````````````````````` example [foo .

[foohttp://example.com/?search=](uri)

```````````````````````````````` There are three kinds of [reference link](@)s: [full](#full-reference-link), [collapsed](#collapsed-reference-link), and [shortcut](#shortcut-reference-link). A [full reference link](@) consists of a [link text] immediately followed by a [link label] that [matches] a [link reference definition] elsewhere in the document. A [link label](@) begins with a left bracket (`[`) and ends with the first right bracket (`]`) that is not backslash-escaped. Between these brackets there must be at least one [non-whitespace character]. Unescaped square bracket characters are not allowed inside the opening and closing square brackets of [link labels]. A link label can have at most 999 characters inside the square brackets. One label [matches](@) another just in case their normalized forms are equal. To normalize a label, strip off the opening and closing brackets, perform the *Unicode case fold*, strip leading and trailing [whitespace] and collapse consecutive internal [whitespace] to a single space. If there are multiple matching reference link definitions, the one that comes first in the document is used. (It is desirable in such cases to emit a warning.) The contents of the first link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching [link reference definition]. Here is a simple example: ```````````````````````````````` example [foo][bar] [bar]: /url "title" .

foo

```````````````````````````````` The rules for the [link text] are the same as with [inline links]. Thus: The link text may contain balanced brackets, but not unbalanced ones, unless they are escaped: ```````````````````````````````` example [link [foo [bar]]][ref] [ref]: /uri .

link [foo [bar]]

```````````````````````````````` ```````````````````````````````` example [link \[bar][ref] [ref]: /uri .

link [bar

```````````````````````````````` The link text may contain inline content: ```````````````````````````````` example [link *foo **bar** `#`*][ref] [ref]: /uri .

link foo bar #

```````````````````````````````` ```````````````````````````````` example [![moon](moon.jpg)][ref] [ref]: /uri .

moon

```````````````````````````````` However, links may not contain other links, at any level of nesting. ```````````````````````````````` example [foo [bar](/uri)][ref] [ref]: /uri .

[foo bar]ref

```````````````````````````````` ```````````````````````````````` example [foo *bar [baz][ref]*][ref] [ref]: /uri .

[foo bar baz]ref

```````````````````````````````` (In the examples above, we have two [shortcut reference links] instead of one [full reference link].) The following cases illustrate the precedence of link text grouping over emphasis grouping: ```````````````````````````````` example *[foo*][ref] [ref]: /uri .

*foo*

```````````````````````````````` ```````````````````````````````` example [foo *bar][ref] [ref]: /uri .

foo *bar

```````````````````````````````` These cases illustrate the precedence of HTML tags, code spans, and autolinks over link grouping: ```````````````````````````````` example [foo [ref]: /uri .

[foo

```````````````````````````````` ```````````````````````````````` example [foo`][ref]` [ref]: /uri .

[foo][ref]

```````````````````````````````` ```````````````````````````````` example [foo [ref]: /uri .

[foohttp://example.com/?search=][ref]

```````````````````````````````` Matching is case-insensitive: ```````````````````````````````` example [foo][BaR] [bar]: /url "title" .

foo

```````````````````````````````` Unicode case fold is used: ```````````````````````````````` example [Толпой][Толпой] is a Russian word. [ТОЛПОЙ]: /url .

Толпой is a Russian word.

```````````````````````````````` Consecutive internal [whitespace] is treated as one space for purposes of determining matching: ```````````````````````````````` example [Foo bar]: /url [Baz][Foo bar] .

Baz

```````````````````````````````` No [whitespace] is allowed between the [link text] and the [link label]: ```````````````````````````````` example [foo] [bar] [bar]: /url "title" .

[foo] bar

```````````````````````````````` ```````````````````````````````` example [foo] [bar] [bar]: /url "title" .

[foo] bar

```````````````````````````````` This is a departure from John Gruber's original Markdown syntax description, which explicitly allows whitespace between the link text and the link label. It brings reference links in line with [inline links], which (according to both original Markdown and this spec) cannot have whitespace after the link text. More importantly, it prevents inadvertent capture of consecutive [shortcut reference links]. If whitespace is allowed between the link text and the link label, then in the following we will have a single reference link, not two shortcut reference links, as intended: ``` markdown [foo] [bar] [foo]: /url1 [bar]: /url2 ``` (Note that [shortcut reference links] were introduced by Gruber himself in a beta version of `Markdown.pl`, but never included in the official syntax description. Without shortcut reference links, it is harmless to allow space between the link text and link label; but once shortcut references are introduced, it is too dangerous to allow this, as it frequently leads to unintended results.) When there are multiple matching [link reference definitions], the first is used: ```````````````````````````````` example [foo]: /url1 [foo]: /url2 [bar][foo] .

bar

```````````````````````````````` Note that matching is performed on normalized strings, not parsed inline content. So the following does not match, even though the labels define equivalent inline content: ```````````````````````````````` example [bar][foo\!] [foo!]: /url .

[bar][foo!]

```````````````````````````````` [Link labels] cannot contain brackets, unless they are backslash-escaped: ```````````````````````````````` example [foo][ref[] [ref[]: /uri .

[foo][ref[]

[ref[]: /uri

```````````````````````````````` ```````````````````````````````` example [foo][ref[bar]] [ref[bar]]: /uri .

[foo][ref[bar]]

[ref[bar]]: /uri

```````````````````````````````` ```````````````````````````````` example [[[foo]]] [[[foo]]]: /url .

[[[foo]]]

[[[foo]]]: /url

```````````````````````````````` ```````````````````````````````` example [foo][ref\[] [ref\[]: /uri .

foo

```````````````````````````````` Note that in this example `]` is not backslash-escaped: ```````````````````````````````` example [bar\\]: /uri [bar\\] .

bar\

```````````````````````````````` A [link label] must contain at least one [non-whitespace character]: ```````````````````````````````` example [] []: /uri .

[]

[]: /uri

```````````````````````````````` ```````````````````````````````` example [ ] [ ]: /uri .

[ ]

[ ]: /uri

```````````````````````````````` A [collapsed reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document, followed by the string `[]`. The contents of the first link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching reference link definition. Thus, `[foo][]` is equivalent to `[foo][foo]`. ```````````````````````````````` example [foo][] [foo]: /url "title" .

foo

```````````````````````````````` ```````````````````````````````` example [*foo* bar][] [*foo* bar]: /url "title" .

foo bar

```````````````````````````````` The link labels are case-insensitive: ```````````````````````````````` example [Foo][] [foo]: /url "title" .

Foo

```````````````````````````````` As with full reference links, [whitespace] is not allowed between the two sets of brackets: ```````````````````````````````` example [foo] [] [foo]: /url "title" .

foo []

```````````````````````````````` A [shortcut reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document and is not followed by `[]` or a link label. The contents of the first link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching link reference definition. Thus, `[foo]` is equivalent to `[foo][]`. ```````````````````````````````` example [foo] [foo]: /url "title" .

foo

```````````````````````````````` ```````````````````````````````` example [*foo* bar] [*foo* bar]: /url "title" .

foo bar

```````````````````````````````` ```````````````````````````````` example [[*foo* bar]] [*foo* bar]: /url "title" .

[foo bar]

```````````````````````````````` ```````````````````````````````` example [[bar [foo] [foo]: /url .

[[bar foo

```````````````````````````````` The link labels are case-insensitive: ```````````````````````````````` example [Foo] [foo]: /url "title" .

Foo

```````````````````````````````` A space after the link text should be preserved: ```````````````````````````````` example [foo] bar [foo]: /url .

foo bar

```````````````````````````````` If you just want bracketed text, you can backslash-escape the opening bracket to avoid links: ```````````````````````````````` example \[foo] [foo]: /url "title" .

[foo]

```````````````````````````````` Note that this is a link, because a link label ends with the first following closing bracket: ```````````````````````````````` example [foo*]: /url *[foo*] .

*foo*

```````````````````````````````` Full and compact references take precedence over shortcut references: ```````````````````````````````` example [foo][bar] [foo]: /url1 [bar]: /url2 .

foo

```````````````````````````````` ```````````````````````````````` example [foo][] [foo]: /url1 .

foo

```````````````````````````````` Inline links also take precedence: ```````````````````````````````` example [foo]() [foo]: /url1 .

foo

```````````````````````````````` ```````````````````````````````` example [foo](not a link) [foo]: /url1 .

foo(not a link)

```````````````````````````````` In the following case `[bar][baz]` is parsed as a reference, `[foo]` as normal text: ```````````````````````````````` example [foo][bar][baz] [baz]: /url .

[foo]bar

```````````````````````````````` Here, though, `[foo][bar]` is parsed as a reference, since `[bar]` is defined: ```````````````````````````````` example [foo][bar][baz] [baz]: /url1 [bar]: /url2 .

foobaz

```````````````````````````````` Here `[foo]` is not parsed as a shortcut reference, because it is followed by a link label (even though `[bar]` is not defined): ```````````````````````````````` example [foo][bar][baz] [baz]: /url1 [foo]: /url2 .

[foo]bar

```````````````````````````````` ## Images Syntax for images is like the syntax for links, with one difference. Instead of [link text], we have an [image description](@). The rules for this are the same as for [link text], except that (a) an image description starts with `![` rather than `[`, and (b) an image description may contain links. An image description has inline elements as its contents. When an image is rendered to HTML, this is standardly used as the image's `alt` attribute. ```````````````````````````````` example ![foo](/url "title") .

foo

```````````````````````````````` ```````````````````````````````` example ![foo *bar*] [foo *bar*]: train.jpg "train & tracks" .

foo bar

```````````````````````````````` ```````````````````````````````` example ![foo ![bar](/url)](/url2) .

foo bar

```````````````````````````````` ```````````````````````````````` example ![foo [bar](/url)](/url2) .

foo bar

```````````````````````````````` Though this spec is concerned with parsing, not rendering, it is recommended that in rendering to HTML, only the plain string content of the [image description] be used. Note that in the above example, the alt attribute's value is `foo bar`, not `foo [bar](/url)` or `foo bar`. Only the plain string content is rendered, without formatting. ```````````````````````````````` example ![foo *bar*][] [foo *bar*]: train.jpg "train & tracks" .

foo bar

```````````````````````````````` ```````````````````````````````` example ![foo *bar*][foobar] [FOOBAR]: train.jpg "train & tracks" .

foo bar

```````````````````````````````` ```````````````````````````````` example ![foo](train.jpg) .

foo

```````````````````````````````` ```````````````````````````````` example My ![foo bar](/path/to/train.jpg "title" ) .

My foo bar

```````````````````````````````` ```````````````````````````````` example ![foo]() .

foo

```````````````````````````````` ```````````````````````````````` example ![](/url) .

```````````````````````````````` Reference-style: ```````````````````````````````` example ![foo][bar] [bar]: /url .

foo

```````````````````````````````` ```````````````````````````````` example ![foo][bar] [BAR]: /url .

foo

```````````````````````````````` Collapsed: ```````````````````````````````` example ![foo][] [foo]: /url "title" .

foo

```````````````````````````````` ```````````````````````````````` example ![*foo* bar][] [*foo* bar]: /url "title" .

foo bar

```````````````````````````````` The labels are case-insensitive: ```````````````````````````````` example ![Foo][] [foo]: /url "title" .

Foo

```````````````````````````````` As with reference links, [whitespace] is not allowed between the two sets of brackets: ```````````````````````````````` example ![foo] [] [foo]: /url "title" .

foo []

```````````````````````````````` Shortcut: ```````````````````````````````` example ![foo] [foo]: /url "title" .

foo

```````````````````````````````` ```````````````````````````````` example ![*foo* bar] [*foo* bar]: /url "title" .

foo bar

```````````````````````````````` Note that link labels cannot contain unescaped brackets: ```````````````````````````````` example ![[foo]] [[foo]]: /url "title" .

![[foo]]

[[foo]]: /url "title"

```````````````````````````````` The link labels are case-insensitive: ```````````````````````````````` example ![Foo] [foo]: /url "title" .

Foo

```````````````````````````````` If you just want a literal `!` followed by bracketed text, you can backslash-escape the opening `[`: ```````````````````````````````` example !\[foo] [foo]: /url "title" .

![foo]

```````````````````````````````` If you want a link after a literal `!`, backslash-escape the `!`: ```````````````````````````````` example \![foo] [foo]: /url "title" .

!foo

```````````````````````````````` ## Autolinks [Autolink](@)s are absolute URIs and email addresses inside `<` and `>`. They are parsed as links, with the URL or email address as the link label. A [URI autolink](@) consists of `<`, followed by an [absolute URI] followed by `>`. It is parsed as a link to the URI, with the URI as the link's label. An [absolute URI](@), for these purposes, consists of a [scheme] followed by a colon (`:`) followed by zero or more characters other than ASCII [whitespace] and control characters, `<`, and `>`. If the URI includes these characters, they must be percent-encoded (e.g. `%20` for a space). For purposes of this spec, a [scheme](@) is any sequence of 2--32 characters beginning with an ASCII letter and followed by any combination of ASCII letters, digits, or the symbols plus ("+"), period ("."), or hyphen ("-"). Here are some valid autolinks: ```````````````````````````````` example .

http://foo.bar.baz

```````````````````````````````` ```````````````````````````````` example .

http://foo.bar.baz/test?q=hello&id=22&boolean

```````````````````````````````` ```````````````````````````````` example .

irc://foo.bar:2233/baz

```````````````````````````````` Uppercase is also fine: ```````````````````````````````` example .

MAILTO:FOO@BAR.BAZ

```````````````````````````````` Note that many strings that count as [absolute URIs] for purposes of this spec are not valid URIs, because their schemes are not registered or because of other problems with their syntax: ```````````````````````````````` example .

a+b+c:d

```````````````````````````````` ```````````````````````````````` example .

made-up-scheme://foo,bar

```````````````````````````````` ```````````````````````````````` example .

http://../

```````````````````````````````` ```````````````````````````````` example .

localhost:5001/foo

```````````````````````````````` Spaces are not allowed in autolinks: ```````````````````````````````` example .

<http://foo.bar/baz bim>

```````````````````````````````` Backslash-escapes do not work inside autolinks: ```````````````````````````````` example .

http://example.com/\[\

```````````````````````````````` An [email autolink](@) consists of `<`, followed by an [email address], followed by `>`. The link's label is the email address, and the URL is `mailto:` followed by the email address. An [email address](@), for these purposes, is anything that matches the [non-normative regex from the HTML5 spec](https://html.spec.whatwg.org/multipage/forms.html#e-mail-state-(type=email)): /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ Examples of email autolinks: ```````````````````````````````` example .

foo@bar.example.com

```````````````````````````````` ```````````````````````````````` example .

foo+special@Bar.baz-bar0.com

```````````````````````````````` Backslash-escapes do not work inside email autolinks: ```````````````````````````````` example .

<foo+@bar.example.com>

```````````````````````````````` These are not autolinks: ```````````````````````````````` example <> .

<>

```````````````````````````````` ```````````````````````````````` example < http://foo.bar > .

< http://foo.bar >

```````````````````````````````` ```````````````````````````````` example .

<m:abc>

```````````````````````````````` ```````````````````````````````` example .

<foo.bar.baz>

```````````````````````````````` ```````````````````````````````` example http://example.com .

http://example.com

```````````````````````````````` ```````````````````````````````` example foo@bar.example.com .

foo@bar.example.com

````````````````````````````````
## Autolinks (extension) GFM enables the `autolink` extension, where autolinks will be recognised in a greater number of conditions. [Autolink]s can also be constructed without requiring the use of `<` and to `>` to delimit them, although they will be recognized under a smaller set of circumstances. All such recognized autolinks can only come at the beginning of a line, after whitespace, or any of the delimiting characters `*`, `_`, `~`, and `(`. An [extended www autolink](@) will be recognized when the text `www.` is found followed by a [valid domain]. A [valid domain](@) consists of segments of alphanumeric characters, underscores (`_`) and hyphens (`-`) separated by periods (`.`). There must be at least one period, and no underscores may be present in the last two segments of the domain. The scheme `http` will be inserted automatically: ```````````````````````````````` example autolink www.commonmark.org .

www.commonmark.org

```````````````````````````````` After a [valid domain], zero or more non-space non-`<` characters may follow: ```````````````````````````````` example autolink Visit www.commonmark.org/help for more information. .

Visit www.commonmark.org/help for more information.

```````````````````````````````` We then apply [extended autolink path validation](@) as follows: Trailing punctuation (specifically, `?`, `!`, `.`, `,`, `:`, `*`, `_`, and `~`) will not be considered part of the autolink, though they may be included in the interior of the link: ```````````````````````````````` example autolink Visit www.commonmark.org. Visit www.commonmark.org/a.b. .

Visit www.commonmark.org.

Visit www.commonmark.org/a.b.

```````````````````````````````` When an autolink ends in `)`, we scan the entire autolink for the total number of parentheses. If there is a greater number of closing parentheses than opening ones, we don't consider the unmatched trailing parentheses part of the autolink, in order to facilitate including an autolink inside a parenthesis: ```````````````````````````````` example autolink www.google.com/search?q=Markup+(business) www.google.com/search?q=Markup+(business))) (www.google.com/search?q=Markup+(business)) (www.google.com/search?q=Markup+(business) .

www.google.com/search?q=Markup+(business)

www.google.com/search?q=Markup+(business)))

(www.google.com/search?q=Markup+(business))

(www.google.com/search?q=Markup+(business)

```````````````````````````````` This check is only done when the link ends in a closing parentheses `)`, so if the only parentheses are in the interior of the autolink, no special rules are applied: ```````````````````````````````` example autolink www.google.com/search?q=(business))+ok .

www.google.com/search?q=(business))+ok

```````````````````````````````` If an autolink ends in a semicolon (`;`), we check to see if it appears to resemble an [entity reference][entity references]; if the preceding text is `&` followed by one or more alphanumeric characters. If so, it is excluded from the autolink: ```````````````````````````````` example autolink www.google.com/search?q=commonmark&hl=en www.google.com/search?q=commonmark&hl; .

www.google.com/search?q=commonmark&hl=en

www.google.com/search?q=commonmark&hl;

```````````````````````````````` `<` immediately ends an autolink. ```````````````````````````````` example autolink www.commonmark.org/hewww.commonmark.org/he<lp

```````````````````````````````` An [extended url autolink](@) will be recognised when one of the schemes `http://`, `https://`, or `ftp://`, followed by a [valid domain], then zero or more non-space non-`<` characters according to [extended autolink path validation]: ```````````````````````````````` example autolink http://commonmark.org (Visit https://encrypted.google.com/search?q=Markup+(business)) Anonymous FTP is available at ftp://foo.bar.baz. .

http://commonmark.org

(Visit https://encrypted.google.com/search?q=Markup+(business))

Anonymous FTP is available at ftp://foo.bar.baz.

```````````````````````````````` An [extended email autolink](@) will be recognised when an email address is recognised within any text node. Email addresses are recognised according to the following rules: * One ore more characters which are alphanumeric, or `.`, `-`, `_`, or `+`. * An `@` symbol. * One or more characters which are alphanumeric, or `-` or `_`, separated by periods (`.`). There must be at least one period. The last character must not be one of `-` or `_`. The scheme `mailto:` will automatically be added to the generated link: ```````````````````````````````` example autolink foo@bar.baz .

foo@bar.baz

```````````````````````````````` `+` can occur before the `@`, but not after. ```````````````````````````````` example autolink hello@mail+xyz.example isn't valid, but hello+xyz@mail.example is. .

hello@mail+xyz.example isn't valid, but hello+xyz@mail.example is.

```````````````````````````````` `.`, `-`, and `_` can occur on both sides of the `@`, but only `.` may occur at the end of the email address, in which case it will not be considered part of the address: ```````````````````````````````` example autolink a.b-c_d@a.b a.b-c_d@a.b. a.b-c_d@a.b- a.b-c_d@a.b_ .

a.b-c_d@a.b

a.b-c_d@a.b.

a.b-c_d@a.b-

a.b-c_d@a.b_

````````````````````````````````
## Raw HTML Text between `<` and `>` that looks like an HTML tag is parsed as a raw HTML tag and will be rendered in HTML without escaping. Tag and attribute names are not limited to current HTML tags, so custom tags (and even, say, DocBook tags) may be used. Here is the grammar for tags: A [tag name](@) consists of an ASCII letter followed by zero or more ASCII letters, digits, or hyphens (`-`). An [attribute](@) consists of [whitespace], an [attribute name], and an optional [attribute value specification]. An [attribute name](@) consists of an ASCII letter, `_`, or `:`, followed by zero or more ASCII letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML specification restricted to ASCII. HTML5 is laxer.) An [attribute value specification](@) consists of optional [whitespace], a `=` character, optional [whitespace], and an [attribute value]. An [attribute value](@) consists of an [unquoted attribute value], a [single-quoted attribute value], or a [double-quoted attribute value]. An [unquoted attribute value](@) is a nonempty string of characters not including [whitespace], `"`, `'`, `=`, `<`, `>`, or `` ` ``. A [single-quoted attribute value](@) consists of `'`, zero or more characters not including `'`, and a final `'`. A [double-quoted attribute value](@) consists of `"`, zero or more characters not including `"`, and a final `"`. An [open tag](@) consists of a `<` character, a [tag name], zero or more [attributes], optional [whitespace], an optional `/` character, and a `>` character. A [closing tag](@) consists of the string ``. An [HTML comment](@) consists of ``, where *text* does not start with `>` or `->`, does not end with `-`, and does not contain `--`. (See the [HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) A [processing instruction](@) consists of the string ``, and the string `?>`. A [declaration](@) consists of the string ``, and the character `>`. A [CDATA section](@) consists of the string ``, and the string `]]>`. An [HTML tag](@) consists of an [open tag], a [closing tag], an [HTML comment], a [processing instruction], a [declaration], or a [CDATA section]. Here are some simple open tags: ```````````````````````````````` example .

```````````````````````````````` Empty elements: ```````````````````````````````` example .

```````````````````````````````` [Whitespace] is allowed: ```````````````````````````````` example .

```````````````````````````````` With attributes: ```````````````````````````````` example .

```````````````````````````````` Custom tag names can be used: ```````````````````````````````` example Foo .

Foo

```````````````````````````````` Illegal tag names, not parsed as HTML: ```````````````````````````````` example <33> <__> .

<33> <__>

```````````````````````````````` Illegal attribute names: ```````````````````````````````` example
.

<a h*#ref="hi">

```````````````````````````````` Illegal attribute values: ```````````````````````````````` example
.

</a href="foo">

```````````````````````````````` Comments: ```````````````````````````````` example foo .

foo

```````````````````````````````` ```````````````````````````````` example foo .

foo <!-- not a comment -- two hyphens -->

```````````````````````````````` Not comments: ```````````````````````````````` example foo foo --> foo .

foo <!--> foo -->

foo <!-- foo--->

```````````````````````````````` Processing instructions: ```````````````````````````````` example foo .

foo

```````````````````````````````` Declarations: ```````````````````````````````` example foo .

foo

```````````````````````````````` CDATA sections: ```````````````````````````````` example foo &<]]> .

foo &<]]>

```````````````````````````````` Entity and numeric character references are preserved in HTML attributes: ```````````````````````````````` example foo
.

foo

```````````````````````````````` Backslash escapes do not work in HTML attributes: ```````````````````````````````` example foo .

foo

```````````````````````````````` ```````````````````````````````` example .

<a href=""">

````````````````````````````````
## Disallowed Raw HTML (extension) GFM enables the `tagfilter` extension, where the following HTML tags will be filtered when rendering HTML output: * `` * `<textarea>` * `<style>` * `<xmp>` * `<iframe>` * `<noembed>` * `<noframes>` * `<script>` * `<plaintext>` Filtering is done by replacing the leading `<` with the entity `<`. These tags are chosen in particular as they change how HTML is interpreted in a way unique to them (i.e. nested HTML is interpreted differently), and this is usually undesireable in the context of other rendered Markdown content. All other HTML tags are left untouched. ```````````````````````````````` example tagfilter <strong> <title> <style> <em> <blockquote> <xmp> is disallowed. <XMP> is also disallowed. </blockquote> . <p><strong> <title> <style> <em></p> <blockquote> <xmp> is disallowed. <XMP> is also disallowed. </blockquote> ```````````````````````````````` </div> ## Hard line breaks A line break (not in a code span or HTML tag) that is preceded by two or more spaces and does not occur at the end of a block is parsed as a [hard line break](@) (rendered in HTML as a `<br />` tag): ```````````````````````````````` example foo baz . <p>foo<br /> baz</p> ```````````````````````````````` For a more visible alternative, a backslash before the [line ending] may be used instead of two spaces: ```````````````````````````````` example foo\ baz . <p>foo<br /> baz</p> ```````````````````````````````` More than two spaces can be used: ```````````````````````````````` example foo baz . <p>foo<br /> baz</p> ```````````````````````````````` Leading spaces at the beginning of the next line are ignored: ```````````````````````````````` example foo bar . <p>foo<br /> bar</p> ```````````````````````````````` ```````````````````````````````` example foo\ bar . <p>foo<br /> bar</p> ```````````````````````````````` Line breaks can occur inside emphasis, links, and other constructs that allow inline content: ```````````````````````````````` example *foo bar* . <p><em>foo<br /> bar</em></p> ```````````````````````````````` ```````````````````````````````` example *foo\ bar* . <p><em>foo<br /> bar</em></p> ```````````````````````````````` Line breaks do not occur inside code spans ```````````````````````````````` example `code span` . <p><code>code span</code></p> ```````````````````````````````` ```````````````````````````````` example `code\ span` . <p><code>code\ span</code></p> ```````````````````````````````` or HTML tags: ```````````````````````````````` example <a href="foo bar"> . <p><a href="foo bar"></p> ```````````````````````````````` ```````````````````````````````` example <a href="foo\ bar"> . <p><a href="foo\ bar"></p> ```````````````````````````````` Hard line breaks are for separating inline content within a block. Neither syntax for hard line breaks works at the end of a paragraph or other block element: ```````````````````````````````` example foo\ . <p>foo\</p> ```````````````````````````````` ```````````````````````````````` example foo . <p>foo</p> ```````````````````````````````` ```````````````````````````````` example ### foo\ . <h3>foo\</h3> ```````````````````````````````` ```````````````````````````````` example ### foo . <h3>foo</h3> ```````````````````````````````` ## Soft line breaks A regular line break (not in a code span or HTML tag) that is not preceded by two or more spaces or a backslash is parsed as a [softbreak](@). (A softbreak may be rendered in HTML either as a [line ending] or as a space. The result will be the same in browsers. In the examples here, a [line ending] will be used.) ```````````````````````````````` example foo baz . <p>foo baz</p> ```````````````````````````````` Spaces at the end of the line and beginning of the next line are removed: ```````````````````````````````` example foo baz . <p>foo baz</p> ```````````````````````````````` A conforming parser may render a soft line break in HTML either as a line break or as a space. A renderer may also provide an option to render soft line breaks as hard line breaks. ## Textual content Any characters not given an interpretation by the above rules will be parsed as plain textual content. ```````````````````````````````` example hello $.;'there . <p>hello $.;'there</p> ```````````````````````````````` ```````````````````````````````` example Foo χρῆν . <p>Foo χρῆν</p> ```````````````````````````````` Internal spaces are preserved verbatim: ```````````````````````````````` example Multiple spaces . <p>Multiple spaces</p> ```````````````````````````````` <!-- END TESTS --> # Appendix: A parsing strategy In this appendix we describe some features of the parsing strategy used in the CommonMark reference implementations. ## Overview Parsing has two phases: 1. In the first phase, lines of input are consumed and the block structure of the document---its division into paragraphs, block quotes, list items, and so on---is constructed. Text is assigned to these blocks but not parsed. Link reference definitions are parsed and a map of links is constructed. 2. In the second phase, the raw text contents of paragraphs and headings are parsed into sequences of Markdown inline elements (strings, code spans, links, emphasis, and so on), using the map of link references constructed in phase 1. At each point in processing, the document is represented as a tree of **blocks**. The root of the tree is a `document` block. The `document` may have any number of other blocks as **children**. These children may, in turn, have other blocks as children. The last child of a block is normally considered **open**, meaning that subsequent lines of input can alter its contents. (Blocks that are not open are **closed**.) Here, for example, is a possible document tree, with the open blocks marked by arrows: ``` tree -> document -> block_quote paragraph "Lorem ipsum dolor\nsit amet." -> list (type=bullet tight=true bullet_char=-) list_item paragraph "Qui *quodsi iracundia*" -> list_item -> paragraph "aliquando id" ``` ## Phase 1: block structure Each line that is processed has an effect on this tree. The line is analyzed and, depending on its contents, the document may be altered in one or more of the following ways: 1. One or more open blocks may be closed. 2. One or more new blocks may be created as children of the last open block. 3. Text may be added to the last (deepest) open block remaining on the tree. Once a line has been incorporated into the tree in this way, it can be discarded, so input can be read in a stream. For each line, we follow this procedure: 1. First we iterate through the open blocks, starting with the root document, and descending through last children down to the last open block. Each block imposes a condition that the line must satisfy if the block is to remain open. For example, a block quote requires a `>` character. A paragraph requires a non-blank line. In this phase we may match all or just some of the open blocks. But we cannot close unmatched blocks yet, because we may have a [lazy continuation line]. 2. Next, after consuming the continuation markers for existing blocks, we look for new block starts (e.g. `>` for a block quote). If we encounter a new block start, we close any blocks unmatched in step 1 before creating the new block as a child of the last matched block. 3. Finally, we look at the remainder of the line (after block markers like `>`, list markers, and indentation have been consumed). This is text that can be incorporated into the last open block (a paragraph, code block, heading, or raw HTML). Setext headings are formed when we see a line of a paragraph that is a [setext heading underline]. Reference link definitions are detected when a paragraph is closed; the accumulated text lines are parsed to see if they begin with one or more reference link definitions. Any remainder becomes a normal paragraph. We can see how this works by considering how the tree above is generated by four lines of Markdown: ``` markdown > Lorem ipsum dolor sit amet. > - Qui *quodsi iracundia* > - aliquando id ``` At the outset, our document model is just ``` tree -> document ``` The first line of our text, ``` markdown > Lorem ipsum dolor ``` causes a `block_quote` block to be created as a child of our open `document` block, and a `paragraph` block as a child of the `block_quote`. Then the text is added to the last open block, the `paragraph`: ``` tree -> document -> block_quote -> paragraph "Lorem ipsum dolor" ``` The next line, ``` markdown sit amet. ``` is a "lazy continuation" of the open `paragraph`, so it gets added to the paragraph's text: ``` tree -> document -> block_quote -> paragraph "Lorem ipsum dolor\nsit amet." ``` The third line, ``` markdown > - Qui *quodsi iracundia* ``` causes the `paragraph` block to be closed, and a new `list` block opened as a child of the `block_quote`. A `list_item` is also added as a child of the `list`, and a `paragraph` as a child of the `list_item`. The text is then added to the new `paragraph`: ``` tree -> document -> block_quote paragraph "Lorem ipsum dolor\nsit amet." -> list (type=bullet tight=true bullet_char=-) -> list_item -> paragraph "Qui *quodsi iracundia*" ``` The fourth line, ``` markdown > - aliquando id ``` causes the `list_item` (and its child the `paragraph`) to be closed, and a new `list_item` opened up as child of the `list`. A `paragraph` is added as a child of the new `list_item`, to contain the text. We thus obtain the final tree: ``` tree -> document -> block_quote paragraph "Lorem ipsum dolor\nsit amet." -> list (type=bullet tight=true bullet_char=-) list_item paragraph "Qui *quodsi iracundia*" -> list_item -> paragraph "aliquando id" ``` ## Phase 2: inline structure Once all of the input has been parsed, all open blocks are closed. We then "walk the tree," visiting every node, and parse raw string contents of paragraphs and headings as inlines. At this point we have seen all the link reference definitions, so we can resolve reference links as we go. ``` tree document block_quote paragraph str "Lorem ipsum dolor" softbreak str "sit amet." list (type=bullet tight=true bullet_char=-) list_item paragraph str "Qui " emph str "quodsi iracundia" list_item paragraph str "aliquando id" ``` Notice how the [line ending] in the first paragraph has been parsed as a `softbreak`, and the asterisks in the first list item have become an `emph`. ### An algorithm for parsing nested emphasis and links By far the trickiest part of inline parsing is handling emphasis, strong emphasis, links, and images. This is done using the following algorithm. When we're parsing inlines and we hit either - a run of `*` or `_` characters, or - a `[` or `![` we insert a text node with these symbols as its literal content, and we add a pointer to this text node to the [delimiter stack](@). The [delimiter stack] is a doubly linked list. Each element contains a pointer to a text node, plus information about - the type of delimiter (`[`, `![`, `*`, `_`) - the number of delimiters, - whether the delimiter is "active" (all are active to start), and - whether the delimiter is a potential opener, a potential closer, or both (which depends on what sort of characters precede and follow the delimiters). When we hit a `]` character, we call the *look for link or image* procedure (see below). When we hit the end of the input, we call the *process emphasis* procedure (see below), with `stack_bottom` = NULL. #### *look for link or image* Starting at the top of the delimiter stack, we look backwards through the stack for an opening `[` or `![` delimiter. - If we don't find one, we return a literal text node `]`. - If we do find one, but it's not *active*, we remove the inactive delimiter from the stack, and return a literal text node `]`. - If we find one and it's active, then we parse ahead to see if we have an inline link/image, reference link/image, compact reference link/image, or shortcut reference link/image. + If we don't, then we remove the opening delimiter from the delimiter stack and return a literal text node `]`. + If we do, then * We return a link or image node whose children are the inlines after the text node pointed to by the opening delimiter. * We run *process emphasis* on these inlines, with the `[` opener as `stack_bottom`. * We remove the opening delimiter. * If we have a link (and not an image), we also set all `[` delimiters before the opening delimiter to *inactive*. (This will prevent us from getting links within links.) #### *process emphasis* Parameter `stack_bottom` sets a lower bound to how far we descend in the [delimiter stack]. If it is NULL, we can go all the way to the bottom. Otherwise, we stop before visiting `stack_bottom`. Let `current_position` point to the element on the [delimiter stack] just above `stack_bottom` (or the first element if `stack_bottom` is NULL). We keep track of the `openers_bottom` for each delimiter type (`*`, `_`) and each length of the closing delimiter run (modulo 3). Initialize this to `stack_bottom`. Then we repeat the following until we run out of potential closers: - Move `current_position` forward in the delimiter stack (if needed) until we find the first potential closer with delimiter `*` or `_`. (This will be the potential closer closest to the beginning of the input -- the first one in parse order.) - Now, look back in the stack (staying above `stack_bottom` and the `openers_bottom` for this delimiter type) for the first matching potential opener ("matching" means same delimiter). - If one is found: + Figure out whether we have emphasis or strong emphasis: if both closer and opener spans have length >= 2, we have strong, otherwise regular. + Insert an emph or strong emph node accordingly, after the text node corresponding to the opener. + Remove any delimiters between the opener and closer from the delimiter stack. + Remove 1 (for regular emph) or 2 (for strong emph) delimiters from the opening and closing text nodes. If they become empty as a result, remove them and remove the corresponding element of the delimiter stack. If the closing node is removed, reset `current_position` to the next element in the stack. - If none is found: + Set `openers_bottom` to the element before `current_position`. (We know that there are no openers for this kind of closer up to and including this point, so this puts a lower bound on future searches.) + If the closer at `current_position` is not a potential opener, remove it from the delimiter stack (since we know it can't be a closer either). + Advance `current_position` to the next element in the stack. After we're done, we remove all delimiters above `stack_bottom` from the delimiter stack. ������������������������������cmarkgfm/third_party/cmark/test/normalize.py��������������������������������������������������������0000644�0001750�0001750�00000014554�14210444464�021520� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# -*- coding: utf-8 -*- from html.parser import HTMLParser import urllib import html try: from html.parser import HTMLParseError except ImportError: # HTMLParseError was removed in Python 3.5. It could never be # thrown, so we define a placeholder instead. class HTMLParseError(Exception): pass from html.entities import name2codepoint import sys import re # Normalization code, adapted from # https://github.com/karlcow/markdown-testsuite/ significant_attrs = ["alt", "href", "src", "title"] whitespace_re = re.compile('\s+') class MyHTMLParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.convert_charrefs = False self.last = "starttag" self.in_pre = False self.output = "" self.last_tag = "" def handle_data(self, data): after_tag = self.last == "endtag" or self.last == "starttag" after_block_tag = after_tag and self.is_block_tag(self.last_tag) if after_tag and self.last_tag == "br": data = data.lstrip('\n') if not self.in_pre: data = whitespace_re.sub(' ', data) if after_block_tag and not self.in_pre: if self.last == "starttag": data = data.lstrip() elif self.last == "endtag": data = data.strip() self.output += data self.last = "data" def handle_endtag(self, tag): if tag == "pre": self.in_pre = False elif self.is_block_tag(tag): self.output = self.output.rstrip() self.output += "</" + tag + ">" self.last_tag = tag self.last = "endtag" def handle_starttag(self, tag, attrs): if tag == "pre": self.in_pre = True if self.is_block_tag(tag): self.output = self.output.rstrip() self.output += "<" + tag # For now we don't strip out 'extra' attributes, because of # raw HTML test cases. # attrs = filter(lambda attr: attr[0] in significant_attrs, attrs) if attrs: attrs.sort() for (k,v) in attrs: self.output += " " + k if v in ['href','src']: self.output += ("=" + '"' + urllib.quote(urllib.unquote(v), safe='/') + '"') elif v != None: self.output += ("=" + '"' + html.escape(v,quote=True) + '"') self.output += ">" self.last_tag = tag self.last = "starttag" def handle_startendtag(self, tag, attrs): """Ignore closing tag for self-closing """ self.handle_starttag(tag, attrs) self.last_tag = tag self.last = "endtag" def handle_comment(self, data): self.output += '<!--' + data + '-->' self.last = "comment" def handle_decl(self, data): self.output += '<!' + data + '>' self.last = "decl" def unknown_decl(self, data): self.output += '<!' + data + '>' self.last = "decl" def handle_pi(self,data): self.output += '<?' + data + '>' self.last = "pi" def handle_entityref(self, name): try: c = chr(name2codepoint[name]) except KeyError: c = None self.output_char(c, '&' + name + ';') self.last = "ref" def handle_charref(self, name): try: if name.startswith("x"): c = chr(int(name[1:], 16)) else: c = chr(int(name)) except ValueError: c = None self.output_char(c, '&' + name + ';') self.last = "ref" # Helpers. def output_char(self, c, fallback): if c == '<': self.output += "<" elif c == '>': self.output += ">" elif c == '&': self.output += "&" elif c == '"': self.output += """ elif c == None: self.output += fallback else: self.output += c def is_block_tag(self,tag): return (tag in ['article', 'header', 'aside', 'hgroup', 'blockquote', 'hr', 'iframe', 'body', 'li', 'map', 'button', 'object', 'canvas', 'ol', 'caption', 'output', 'col', 'p', 'colgroup', 'pre', 'dd', 'progress', 'div', 'section', 'dl', 'table', 'td', 'dt', 'tbody', 'embed', 'textarea', 'fieldset', 'tfoot', 'figcaption', 'th', 'figure', 'thead', 'footer', 'tr', 'form', 'ul', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'video', 'script', 'style']) def normalize_html(html): r""" Return normalized form of HTML which ignores insignificant output differences: Multiple inner whitespaces are collapsed to a single space (except in pre tags): >>> normalize_html("<p>a \t b</p>") '<p>a b</p>' >>> normalize_html("<p>a \t\nb</p>") '<p>a b</p>' * Whitespace surrounding block-level tags is removed. >>> normalize_html("<p>a b</p>") '<p>a b</p>' >>> normalize_html(" <p>a b</p>") '<p>a b</p>' >>> normalize_html("<p>a b</p> ") '<p>a b</p>' >>> normalize_html("\n\t<p>\n\t\ta b\t\t</p>\n\t") '<p>a b</p>' >>> normalize_html("<i>a b</i> ") '<i>a b</i> ' * Self-closing tags are converted to open tags. >>> normalize_html("<br />") '<br>' * Attributes are sorted and lowercased. >>> normalize_html('<a title="bar" HREF="foo">x</a>') '<a href="foo" title="bar">x</a>' * References are converted to unicode, except that '<', '>', '&', and '"' are rendered using entities. >>> normalize_html("∀&><"") '\u2200&><"' """ html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)") try: parser = MyHTMLParser() # We work around HTMLParser's limitations parsing CDATA # by breaking the input into chunks and passing CDATA chunks # through verbatim. for chunk in re.finditer(html_chunk_re, html): if chunk.group(0)[:8] == "<![CDATA": parser.output += chunk.group(0) else: parser.feed(chunk.group(0)) parser.close() return parser.output except HTMLParseError as e: sys.stderr.write("Normalization error: " + e.msg + "\n") return html # on error, return unnormalized HTML ����������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/CMakeLists.txt������������������������������������������������������0000755�0001750�0001750�00000010207�14210444464�021700� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# To get verbose output: cmake --build build --target "test" -- ARGS='-V' # By default, we run the spec tests only if python3 is available. # To require the spec tests, compile with -DSPEC_TESTS=1 if (SPEC_TESTS) find_package(PythonInterp 3 REQUIRED) else(SPEC_TESTS) find_package(PythonInterp 3) endif(SPEC_TESTS) if (CMARK_SHARED OR CMARK_STATIC) add_test(NAME api_test COMMAND api_test) endif() if (WIN32) file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/src WIN_SRC_DLL_DIR) file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/extensions WIN_EXTENSIONS_DLL_DIR) set(NEWPATH "${WIN_SRC_DLL_DIR};${WIN_EXTENSIONS_DLL_DIR};$ENV{PATH}") string(REPLACE ";" "\\;" NEWPATH "${NEWPATH}") set_tests_properties(api_test PROPERTIES ENVIRONMENT "PATH=${NEWPATH}") set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.bat") else(WIN32) set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.sh") endif(WIN32) IF (PYTHONINTERP_FOUND) add_test(html_normalization ${PYTHON_EXECUTABLE} "-m" "doctest" "${CMAKE_CURRENT_SOURCE_DIR}/normalize.py" ) if (CMARK_SHARED) add_test(spectest_library ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" ) add_test(pathological_tests_library ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/pathological_tests.py" "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" ) add_test(roundtriptest_library ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" ) add_test(entity_library ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py" "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" ) endif() add_test(spectest_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" ) add_test(smartpuncttest_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --smart" ) add_test(extensions_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" "--extensions" "table strikethrough autolink tagfilter footnotes tasklist" ) add_test(roundtrip_extensions_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" "--extensions" "table strikethrough autolink tagfilter footnotes tasklist" ) add_test(option_table_prefer_style_attributes ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions-table-prefer-style-attributes.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --table-prefer-style-attributes" "--extensions" "table strikethrough autolink tagfilter footnotes tasklist" ) add_test(option_full_info_string ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions-full-info-string.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --full-info-string" ) add_test(regressiontest_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/regression.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" ) ELSE(PYTHONINTERP_FOUND) message("\n*** A python 3 interpreter is required to run the spec tests.\n") add_test(skipping_spectests echo "Skipping spec tests, because no python 3 interpreter is available.") ENDIF(PYTHONINTERP_FOUND) �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/fuzzing_dictionary��������������������������������������������������0000644�0001750�0001750�00000002363�14210444464�023005� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������asterisk="*" attr_generic=" a=\"1\"" attr_href=" href=\"1\"" attr_xml_lang=" xml:lang=\"1\"" attr_xmlns=" xmlns=\"1\"" backslash="\\" backtick="`" colon=":" dashes="---" double_quote="\"" entity_builtin="<" entity_decimal="" entity_external="&a;" entity_hex="" equals="===" exclamation="!" greater_than=">" hash="#" hyphen="-" indent=" " left_bracket="[" left_paren="(" less_than="<" plus="+" right_bracket="]" right_paren=")" single_quote="'" string_any="ANY" string_brackets="[]" string_cdata="CDATA" string_dashes="--" string_empty_dblquotes="\"\"" string_empty_quotes="''" string_idrefs="IDREFS" string_parentheses="()" string_pcdata="#PCDATA" tag_cdata="<![CDATA[" tag_close="</a>" tag_doctype="<!DOCTYPE" tag_element="<!ELEMENT" tag_entity="<!ENTITY" tag_notation="<!NOTATION" tag_open="<a>" tag_open_close="<a />" tag_open_exclamation="<!" tag_open_q="<?" tag_sq2_close="]]>" tag_xml_q="<?xml?>" underscore="_" # GFM specific strikethrough="~~~strike~~~" user_mention="@octocat" email_mention="octocat@github.com" http="http://" https="https://" ftp="ftp://" title_tag="title" textarea_tag="textarea" style_tag="style" xmp_tag="xmp" iframe_tag="iframe" noembed_tag="noembed" noframes_tag="noframes" script_tag="script" plaintext_tag="plaintext" �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/regression.txt������������������������������������������������������0000644�0001750�0001750�00000024147�14210444464�022066� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������### Regression tests Issue #113: EOL character weirdness on Windows (Important: first line ends with CR + CR + LF) ```````````````````````````````` example line1 line2 . <p>line1</p> <p>line2</p> ```````````````````````````````` Issue #114: cmark skipping first character in line (Important: the blank lines around "Repeatedly" contain a tab.) ```````````````````````````````` example By taking it apart - alternative solutions → Repeatedly solving → - how techniques . <p>By taking it apart</p> <ul> <li>alternative solutions</li> </ul> <p>Repeatedly solving</p> <ul> <li>how techniques</li> </ul> ```````````````````````````````` Issue jgm/CommonMark#430: h2..h6 not recognized as block tags. ```````````````````````````````` example <h1>lorem</h1> <h2>lorem</h2> <h3>lorem</h3> <h4>lorem</h4> <h5>lorem</h5> <h6>lorem</h6> . <h1>lorem</h1> <h2>lorem</h2> <h3>lorem</h3> <h4>lorem</h4> <h5>lorem</h5> <h6>lorem</h6> ```````````````````````````````` Issue jgm/commonmark.js#109 - tabs after setext header line ```````````````````````````````` example hi --→ . <h2>hi</h2> ```````````````````````````````` Issue #177 - incorrect emphasis parsing ```````````````````````````````` example a***b* c* . <p>a*<em><em>b</em> c</em></p> ```````````````````````````````` Issue #193 - unescaped left angle brackets in link destination ```````````````````````````````` example [a] [a]: <te<st> . <p>[a]</p> <p>[a]: <te<st></p> ```````````````````````````````` Issue #192 - escaped spaces in link destination ```````````````````````````````` example [a](te\ st) . <p>[a](te\ st)</p> ```````````````````````````````` Issue github/github#76615: multiple delimiter combinations gets sketchy ```````````````````````````````` example strikethrough ~~**_`this`_**~~ ~~***`this`***~~ ~~___`this`___~~ **_`this`_** ***`this`*** ___`this`___ ~~**_this_**~~ ~~***this***~~ ~~___this___~~ **_this_** ***this*** ___this___ . <p><del><strong><em><code>this</code></em></strong></del><br /> <del><em><strong><code>this</code></strong></em></del><br /> <del><em><strong><code>this</code></strong></em></del></p> <p><strong><em><code>this</code></em></strong><br /> <em><strong><code>this</code></strong></em><br /> <em><strong><code>this</code></strong></em></p> <p><del><strong><em>this</em></strong></del><br /> <del><em><strong>this</strong></em></del><br /> <del><em><strong>this</strong></em></del></p> <p><strong><em>this</em></strong><br /> <em><strong>this</strong></em><br /> <em><strong>this</strong></em></p> ```````````````````````````````` Issue #527 - meta tags in inline contexts ```````````````````````````````` example City: <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> <meta itemprop="name" content="Springfield"> </span> . <p>City: <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> <meta itemprop="name" content="Springfield"> </span></p> ```````````````````````````````` cmark-gfm strikethrough rules ```````````````````````````````` example strikethrough ~Hi~ Hello, world! . <p><del>Hi</del> Hello, world!</p> ```````````````````````````````` ```````````````````````````````` example strikethrough This ~text~ ~~is~~ ~~~curious~~~. . <p>This <del>text</del> <del>is</del> ~~~curious~~~.</p> ```````````````````````````````` `~` should not be escaped in href — https://github.com/github/markup/issues/311 ```````````````````````````````` example [x](http://members.aon.at/~nkehrer/ibm_5110/emu5110.html) . <p><a href="http://members.aon.at/~nkehrer/ibm_5110/emu5110.html">x</a></p> ```````````````````````````````` Footnotes in tables ```````````````````````````````` example table footnotes A footnote in a paragraph[^1] | Column1 | Column2 | | --------- | ------- | | foot [^1] | note | [^1]: a footnote . <p>A footnote in a paragraph<sup class="footnote-ref"><a href="#fn-1" id="fnref-1" data-footnote-ref>1</a></sup></p> <table> <thead> <tr> <th>Column1</th> <th>Column2</th> </tr> </thead> <tbody> <tr> <td>foot <sup class="footnote-ref"><a href="#fn-1" id="fnref-1-2" data-footnote-ref>1</a></sup></td> <td>note</td> </tr> </tbody> </table> <section class="footnotes" data-footnotes> <ol> <li id="fn-1"> <p>a footnote <a href="#fnref-1" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a> <a href="#fnref-1-2" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩<sup class="footnote-ref">2</sup></a></p> </li> </ol> </section> ```````````````````````````````` Issue #527 - meta tags in inline contexts ```````````````````````````````` example City: <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> <meta itemprop="name" content="Springfield"> </span> . <p>City: <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> <meta itemprop="name" content="Springfield"> </span></p> ```````````````````````````````` Issue #530 - link parsing corner cases ```````````````````````````````` example [a](\ b) [a](<<b) [a](<b ) . <p>[a](\ b)</p> <p>[a](<<b)</p> <p>[a](<b )</p> ```````````````````````````````` Issue commonmark#526 - unescaped ( in link title ```````````````````````````````` example [link](url ((title)) . <p>[link](url ((title))</p> ```````````````````````````````` Issue commonamrk#517 - script, pre, style close tag without opener. ```````````````````````````````` example </script> </pre> </style> . </script> </pre> </style> ```````````````````````````````` Issue #289. ```````````````````````````````` example [a](<b) c> . <p>[a](<b) c></p> ```````````````````````````````` Pull request #128 - Buffer overread in tables extension ```````````````````````````````` example table | -| . <p>| -|</p> ```````````````````````````````` Footnotes may be nested inside other footnotes. ```````````````````````````````` example footnotes This is some text. It has a citation.[^citation] [^another-citation]: My second citation. [^citation]: This is a long winded parapgraph that also has another citation.[^another-citation] . <p>This is some text. It has a citation.<sup class="footnote-ref"><a href="#fn-citation" id="fnref-citation" data-footnote-ref>1</a></sup></p> <section class="footnotes" data-footnotes> <ol> <li id="fn-citation"> <p>This is a long winded parapgraph that also has another citation.<sup class="footnote-ref"><a href="#fn-another-citation" id="fnref-another-citation" data-footnote-ref>2</a></sup> <a href="#fnref-citation" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a></p> </li> <li id="fn-another-citation"> <p>My second citation. <a href="#fnref-another-citation" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a></p> </li> </ol> </section> ```````````````````````````````` Footnotes are similar to, but should not be confused with, link references ```````````````````````````````` example footnotes This is some text. It has two footnotes references, side-by-side without any spaces,[^footnote1][^footnote2] which are definitely not link references. [^footnote1]: Hello. [^footnote2]: Goodbye. . <p>This is some text. It has two footnotes references, side-by-side without any spaces,<sup class="footnote-ref"><a href="#fn-footnote1" id="fnref-footnote1" data-footnote-ref>1</a></sup><sup class="footnote-ref"><a href="#fn-footnote2" id="fnref-footnote2" data-footnote-ref>2</a></sup> which are definitely not link references.</p> <section class="footnotes" data-footnotes> <ol> <li id="fn-footnote1"> <p>Hello. <a href="#fnref-footnote1" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a></p> </li> <li id="fn-footnote2"> <p>Goodbye. <a href="#fnref-footnote2" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a></p> </li> </ol> </section> ```````````````````````````````` Footnotes may begin with or have a 'w' or a '_' in their reference label. ```````````````````````````````` example footnotes autolink This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a `w`.[^widely-cited] It has another footnote that contains many different characters (the autolinker was also breaking on `_`).[^sphinx-of-black-quartz_judge-my-vow-0123456789] [^sphinx-of-black-quartz_judge-my-vow-0123456789]: so does this. [^widely-cited]: this renders properly. . <p>This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a <code>w</code>.<sup class="footnote-ref"><a href="#fn-widely-cited" id="fnref-widely-cited" data-footnote-ref>1</a></sup></p> <p>It has another footnote that contains many different characters (the autolinker was also breaking on <code>_</code>).<sup class="footnote-ref"><a href="#fn-sphinx-of-black-quartz_judge-my-vow-0123456789" id="fnref-sphinx-of-black-quartz_judge-my-vow-0123456789" data-footnote-ref>2</a></sup></p> <section class="footnotes" data-footnotes> <ol> <li id="fn-widely-cited"> <p>this renders properly. <a href="#fnref-widely-cited" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a></p> </li> <li id="fn-sphinx-of-black-quartz_judge-my-vow-0123456789"> <p>so does this. <a href="#fnref-sphinx-of-black-quartz_judge-my-vow-0123456789" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a></p> </li> </ol> </section> ```````````````````````````````` Footnotes interacting with strikethrough should not lead to a use-after-free ```````````````````````````````` example footnotes autolink strikethrough table |Tot.....[^_a_]| . <p>|Tot.....[^_a_]|</p> ```````````````````````````````` Footnotes interacting with strikethrough should not lead to a use-after-free pt2 ```````````````````````````````` example footnotes autolink strikethrough table [^~~is~~1] . <p>[^~~is~~1]</p> ```````````````````````````````` Adjacent unused footnotes definitions should not lead to a use after free ```````````````````````````````` example footnotes autolink strikethrough table Hello world [^a]:[^b]: . <p>Hello world</p> ```````````````````````````````` �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/extensions.txt������������������������������������������������������0000644�0001750�0001750�00000044427�14210444464�022110� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������--- title: Extensions test author: Yuki Izumi version: 0.1 date: '2016-08-31' license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' ... ## Tables Here's a well-formed table, doing everything it should. ```````````````````````````````` example | abc | def | | --- | --- | | ghi | jkl | | mno | pqr | . <table> <thead> <tr> <th>abc</th> <th>def</th> </tr> </thead> <tbody> <tr> <td>ghi</td> <td>jkl</td> </tr> <tr> <td>mno</td> <td>pqr</td> </tr> </tbody> </table> ```````````````````````````````` We're going to mix up the table now; we'll demonstrate that inline formatting works fine, but block elements don't. You can also have empty cells, and the textual alignment of the columns is shown to be irrelevant. ```````````````````````````````` example Hello! | _abc_ | セン | | ----- | ---- | | 1. Block elements inside cells don't work. | | | But _**inline elements do**_. | x | Hi! . <p>Hello!</p> <table> <thead> <tr> <th><em>abc</em></th> <th>セン</th> </tr> </thead> <tbody> <tr> <td>1. Block elements inside cells don't work.</td> <td></td> </tr> <tr> <td>But <em><strong>inline elements do</strong></em>.</td> <td>x</td> </tr> </tbody> </table> <p>Hi!</p> ```````````````````````````````` Here we demonstrate some edge cases about what is and isn't a table. ```````````````````````````````` example | Not enough table | to be considered table | | Not enough table | to be considered table | | Not enough table | to be considered table | | Just enough table | to be considered table | | ----------------- | ---------------------- | | ---- | --- | |x| |-| | xyz | | --- | . <p>| Not enough table | to be considered table |</p> <p>| Not enough table | to be considered table | | Not enough table | to be considered table |</p> <table> <thead> <tr> <th>Just enough table</th> <th>to be considered table</th> </tr> </thead> </table> <p>| ---- | --- |</p> <table> <thead> <tr> <th>x</th> </tr> </thead> </table> <table> <thead> <tr> <th>xyz</th> </tr> </thead> </table> ```````````````````````````````` A "simpler" table, GFM style: ```````````````````````````````` example abc | def --- | --- xyz | ghi . <table> <thead> <tr> <th>abc</th> <th>def</th> </tr> </thead> <tbody> <tr> <td>xyz</td> <td>ghi</td> </tr> </tbody> </table> ```````````````````````````````` We are making the parser slighly more lax here. Here is a table with spaces at the end: ```````````````````````````````` example Hello! | _abc_ | セン | | ----- | ---- | | this row has a space at the end | | | But _**inline elements do**_. | x | Hi! . <p>Hello!</p> <table> <thead> <tr> <th><em>abc</em></th> <th>セン</th> </tr> </thead> <tbody> <tr> <td>this row has a space at the end</td> <td></td> </tr> <tr> <td>But <em><strong>inline elements do</strong></em>.</td> <td>x</td> </tr> </tbody> </table> <p>Hi!</p> ```````````````````````````````` Table alignment: ```````````````````````````````` example aaa | bbb | ccc | ddd | eee :-- | --- | :-: | --- | --: fff | ggg | hhh | iii | jjj . <table> <thead> <tr> <th align="left">aaa</th> <th>bbb</th> <th align="center">ccc</th> <th>ddd</th> <th align="right">eee</th> </tr> </thead> <tbody> <tr> <td align="left">fff</td> <td>ggg</td> <td align="center">hhh</td> <td>iii</td> <td align="right">jjj</td> </tr> </tbody> </table> ```````````````````````````````` ### Table cell count mismatches The header and marker row must match. ```````````````````````````````` example | a | b | c | | --- | --- | | this | isn't | okay | . <p>| a | b | c | | --- | --- | | this | isn't | okay |</p> ```````````````````````````````` But any of the body rows can be shorter. Rows longer than the header are truncated. ```````````````````````````````` example | a | b | c | | --- | --- | --- | x | a | b | 1 | 2 | 3 | 4 | 5 | . <table> <thead> <tr> <th>a</th> <th>b</th> <th>c</th> </tr> </thead> <tbody> <tr> <td>x</td> <td></td> <td></td> </tr> <tr> <td>a</td> <td>b</td> <td></td> </tr> <tr> <td>1</td> <td>2</td> <td>3</td> </tr> </tbody> </table> ```````````````````````````````` ### Embedded pipes Tables with embedded pipes could be tricky. ```````````````````````````````` example | a | b | | --- | --- | | Escaped pipes are \|okay\|. | Like \| this. | | Within `\|code\| is okay` too. | | _**`c\|`**_ \| complex | don't **\_reparse\_** . <table> <thead> <tr> <th>a</th> <th>b</th> </tr> </thead> <tbody> <tr> <td>Escaped pipes are |okay|.</td> <td>Like | this.</td> </tr> <tr> <td>Within <code>|code| is okay</code> too.</td> <td></td> </tr> <tr> <td><em><strong><code>c|</code></strong></em> | complex</td> <td></td> </tr> <tr> <td>don't <strong>_reparse_</strong></td> <td></td> </tr> </tbody> </table> ```````````````````````````````` ### Oddly-formatted markers This shouldn't assert. ```````````````````````````````` example | a | --- | . <table> <thead> <tr> <th>a</th> </tr> </thead> </table> ```````````````````````````````` ### Escaping ```````````````````````````````` example | a | b | | --- | --- | | \\ | `\\` | | \\\\ | `\\\\` | | \_ | `\_` | | \| | `\|` | | \a | `\a` | \\ `\\` \\\\ `\\\\` \_ `\_` \| `\|` \a `\a` . <table> <thead> <tr> <th>a</th> <th>b</th> </tr> </thead> <tbody> <tr> <td>\</td> <td><code>\\</code></td> </tr> <tr> <td>\\</td> <td><code>\\\\</code></td> </tr> <tr> <td>_</td> <td><code>\_</code></td> </tr> <tr> <td>|</td> <td><code>|</code></td> </tr> <tr> <td>\a</td> <td><code>\a</code></td> </tr> </tbody> </table> <p>\ <code>\\</code></p> <p>\\ <code>\\\\</code></p> <p>_ <code>\_</code></p> <p>| <code>\|</code></p> <p>\a <code>\a</code></p> ```````````````````````````````` ### Embedded HTML ```````````````````````````````` example | a | | --- | | <strong>hello</strong> | | ok <br> sure | . <table> <thead> <tr> <th>a</th> </tr> </thead> <tbody> <tr> <td><strong>hello</strong></td> </tr> <tr> <td>ok <br> sure</td> </tr> </tbody> </table> ```````````````````````````````` ### Reference-style links ```````````````````````````````` example Here's a link to [Freedom Planet 2][]. | Here's a link to [Freedom Planet 2][] in a table header. | | --- | | Here's a link to [Freedom Planet 2][] in a table row. | [Freedom Planet 2]: http://www.freedomplanet2.com/ . <p>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a>.</p> <table> <thead> <tr> <th>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a> in a table header.</th> </tr> </thead> <tbody> <tr> <td>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a> in a table row.</td> </tr> </tbody> </table> ```````````````````````````````` ### Sequential cells ```````````````````````````````` example | a | b | c | | --- | --- | --- | | d || e | . <table> <thead> <tr> <th>a</th> <th>b</th> <th>c</th> </tr> </thead> <tbody> <tr> <td>d</td> <td></td> <td>e</td> </tr> </tbody> </table> ```````````````````````````````` ### Interaction with emphasis ```````````````````````````````` example | a | b | | --- | --- | |***(a)***| . <table> <thead> <tr> <th>a</th> <th>b</th> </tr> </thead> <tbody> <tr> <td><em><strong>(a)</strong></em></td> <td></td> </tr> </tbody> </table> ```````````````````````````````` ### a table can be recognised when separated from a paragraph of text without an empty line ```````````````````````````````` example 123 456 | a | b | | ---| --- | d | e . <p>123 456</p> <table> <thead> <tr> <th>a</th> <th>b</th> </tr> </thead> <tbody> <tr> <td>d</td> <td>e</td> </tr> </tbody> </table> ```````````````````````````````` ## Strikethroughs A well-formed strikethrough. ```````````````````````````````` example A proper ~strikethrough~. . <p>A proper <del>strikethrough</del>.</p> ```````````````````````````````` Some strikethrough edge cases. ```````````````````````````````` example These are ~not strikethroughs. No, they are not~ This ~is ~ legit~ isn't ~ legit. This is not ~~~~~one~~~~~ huge strikethrough. ~one~ ~~two~~ ~~~three~~~ No ~mismatch~~ . <p>These are ~not strikethroughs.</p> <p>No, they are not~</p> <p>This <del>is ~ legit</del> isn't ~ legit.</p> <p>This is not ~~~~~one~~~~~ huge strikethrough.</p> <p><del>one</del> <del>two</del> ~~~three~~~</p> <p>No ~mismatch~~</p> ```````````````````````````````` Using 200 tilde since it overflows the internal buffer size (100) for parsing delimiters in inlines.c ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~striked~ ## Autolinks ```````````````````````````````` example : http://google.com https://google.com <http://google.com/å> http://google.com/å scyther@pokemon.com www.github.com www.github.com/á www.google.com/a_b **Autolink and http://inlines** ![http://inline.com/image](http://inline.com/image) a.w@b.c Full stop outside parens shouldn't be included http://google.com/ok. (Full stop inside parens shouldn't be included http://google.com/ok.) "http://google.com" 'http://google.com' http://🍄.ga/ http://x🍄.ga/ . <p>: <a href="http://google.com">http://google.com</a> <a href="https://google.com">https://google.com</a></p> <p><a href="http://google.com/%C3%A5">http://google.com/å</a> <a href="http://google.com/%C3%A5">http://google.com/å</a></p> <p><a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a></p> <p><a href="http://www.github.com">www.github.com</a> <a href="http://www.github.com/%C3%A1">www.github.com/á</a></p> <p><a href="http://www.google.com/a_b">www.google.com/a_b</a></p> <p><strong>Autolink and <a href="http://inlines">http://inlines</a></strong></p> <p><img src="http://inline.com/image" alt="http://inline.com/image" /></p> <p><a href="mailto:a.w@b.c">a.w@b.c</a></p> <p>Full stop outside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.</p> <p>(Full stop inside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.)</p> <p>"<a href="http://google.com">http://google.com</a>"</p> <p>'<a href="http://google.com">http://google.com</a>'</p> <p><a href="http://%F0%9F%8D%84.ga/">http://🍄.ga/</a> <a href="http://x%F0%9F%8D%84.ga/">http://x🍄.ga/</a></p> ```````````````````````````````` ```````````````````````````````` example This shouldn't crash everything: (_A_@_.A . <IGNORE> ```````````````````````````````` ```````````````````````````````` example These should not link: * @a.b.c@. x * n@. b . <p>These should not link:</p> <ul> <li>@a.b.c@. x</li> <li>n@. b</li> </ul> ```````````````````````````````` ## HTML tag filter ```````````````````````````````` example This is <xmp> not okay, but **this** <strong>is</strong>. <p>This is <xmp> not okay, but **this** <strong>is</strong>.</p> Nope, I won't have <textarea>. <p>No <textarea> here either.</p> <p>This <random /> <thing> is okay</thing> though.</p> Yep, <totally>okay</totally>. <!-- HTML comments are okay, though. --> <!- But we're strict. -> <! No nonsense. > <!-- Leave multiline comments the heck alone, though, okay? Even with {"x":"y"} or 1 > 2 or whatever. Even **markdown**. --> <!--- Support everything CommonMark's parser does. --> <!----> <!--thistoo--> . <p>This is <xmp> not okay, but <strong>this</strong> <strong>is</strong>.</p> <p>This is <xmp> not okay, but **this** <strong>is</strong>.</p> <p>Nope, I won't have <textarea>.</p> <p>No <textarea> here either.</p> <p>This <random /> <thing> is okay</thing> though.</p> <p>Yep, <totally>okay</totally>.</p> <!-- HTML comments are okay, though. --> <p><!- But we're strict. -> <! No nonsense. ></p> <!-- Leave multiline comments the heck alone, though, okay? Even with {"x":"y"} or 1 > 2 or whatever. Even **markdown**. --> <!--- Support everything CommonMark's parser does. --> <!----> <!--thistoo--> ```````````````````````````````` ## Footnotes ```````````````````````````````` example This is some text![^1]. Other text.[^footnote]. Here's a thing[^other-note]. And another thing[^codeblock-note]. This doesn't have a referent[^nope]. [^other-note]: no code block here (spaces are stripped away) [^codeblock-note]: this is now a code block (8 spaces indentation) [^1]: Some *bolded* footnote definition. Hi! [^footnote]: > Blockquotes can be in a footnote. as well as code blocks or, naturally, simple paragraphs. [^unused]: This is unused. . <p>This is some text!<sup class="footnote-ref"><a href="#fn-1" id="fnref-1" data-footnote-ref>1</a></sup>. Other text.<sup class="footnote-ref"><a href="#fn-footnote" id="fnref-footnote" data-footnote-ref>2</a></sup>.</p> <p>Here's a thing<sup class="footnote-ref"><a href="#fn-other-note" id="fnref-other-note" data-footnote-ref>3</a></sup>.</p> <p>And another thing<sup class="footnote-ref"><a href="#fn-codeblock-note" id="fnref-codeblock-note" data-footnote-ref>4</a></sup>.</p> <p>This doesn't have a referent[^nope].</p> <p>Hi!</p> <section class="footnotes" data-footnotes> <ol> <li id="fn-1"> <p>Some <em>bolded</em> footnote definition. <a href="#fnref-1" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a></p> </li> <li id="fn-footnote"> <blockquote> <p>Blockquotes can be in a footnote.</p> </blockquote> <pre><code>as well as code blocks </code></pre> <p>or, naturally, simple paragraphs. <a href="#fnref-footnote" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a></p> </li> <li id="fn-other-note"> <p>no code block here (spaces are stripped away) <a href="#fnref-other-note" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a></p> </li> <li id="fn-codeblock-note"> <pre><code>this is now a code block (8 spaces indentation) </code></pre> <a href="#fnref-codeblock-note" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a> </li> </ol> </section> ```````````````````````````````` ## When a footnote is used multiple times, we insert multiple backrefs. ```````````````````````````````` example This is some text. It has a footnote[^a-footnote]. This footnote is referenced[^a-footnote] multiple times, in lots of different places.[^a-footnote] [^a-footnote]: This footnote definition should have three backrefs. . <p>This is some text. It has a footnote<sup class="footnote-ref"><a href="#fn-a-footnote" id="fnref-a-footnote" data-footnote-ref>1</a></sup>.</p> <p>This footnote is referenced<sup class="footnote-ref"><a href="#fn-a-footnote" id="fnref-a-footnote-2" data-footnote-ref>1</a></sup> multiple times, in lots of different places.<sup class="footnote-ref"><a href="#fn-a-footnote" id="fnref-a-footnote-3" data-footnote-ref>1</a></sup></p> <section class="footnotes" data-footnotes> <ol> <li id="fn-a-footnote"> <p>This footnote definition should have three backrefs. <a href="#fnref-a-footnote" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a> <a href="#fnref-a-footnote-2" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩<sup class="footnote-ref">2</sup></a> <a href="#fnref-a-footnote-3" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩<sup class="footnote-ref">3</sup></a></p> </li> </ol> </section> ```````````````````````````````` ## Footnote reference labels are href escaped ```````````````````````````````` example Hello[^"><script>alert(1)</script>] [^"><script>alert(1)</script>]: pwned . <p>Hello<sup class="footnote-ref"><a href="#fn-%22%3E%3Cscript%3Ealert(1)%3C/script%3E" id="fnref-%22%3E%3Cscript%3Ealert(1)%3C/script%3E" data-footnote-ref>1</a></sup></p> <section class="footnotes" data-footnotes> <ol> <li id="fn-%22%3E%3Cscript%3Ealert(1)%3C/script%3E"> <p>pwned <a href="#fnref-%22%3E%3Cscript%3Ealert(1)%3C/script%3E" class="footnote-backref" data-footnote-backref aria-label="Back to content">↩</a></p> </li> </ol> </section> ```````````````````````````````` ## Interop Autolink and strikethrough. ```````````````````````````````` example ~~www.google.com~~ ~~http://google.com~~ . <p><del><a href="http://www.google.com">www.google.com</a></del></p> <p><del><a href="http://google.com">http://google.com</a></del></p> ```````````````````````````````` Autolink and tables. ```````````````````````````````` example | a | b | | --- | --- | | https://github.com www.github.com | http://pokemon.com | . <table> <thead> <tr> <th>a</th> <th>b</th> </tr> </thead> <tbody> <tr> <td><a href="https://github.com">https://github.com</a> <a href="http://www.github.com">www.github.com</a></td> <td><a href="http://pokemon.com">http://pokemon.com</a></td> </tr> </tbody> </table> ```````````````````````````````` ## Task lists ```````````````````````````````` example - [ ] foo - [x] bar . <ul> <li><input type="checkbox" disabled="" /> foo</li> <li><input type="checkbox" checked="" disabled="" /> bar</li> </ul> ```````````````````````````````` Show that a task list and a regular list get processed the same in the way that sublists are created. If something works in a list item, then it should work the same way with a task. The only difference should be the tasklist marker. So, if we use something other than a space or x, it won't be recognized as a task item, and so will be treated as a regular item. ```````````````````````````````` example - [x] foo - [ ] bar - [x] baz - [ ] bim Show a regular (non task) list to show that it has the same structure - [@] foo - [@] bar - [@] baz - [@] bim . <ul> <li><input type="checkbox" checked="" disabled="" /> foo <ul> <li><input type="checkbox" disabled="" /> bar</li> <li><input type="checkbox" checked="" disabled="" /> baz</li> </ul> </li> <li><input type="checkbox" disabled="" /> bim</li> </ul> <p>Show a regular (non task) list to show that it has the same structure</p> <ul> <li>[@] foo <ul> <li>[@] bar</li> <li>[@] baz</li> </ul> </li> <li>[@] bim</li> </ul> ```````````````````````````````` Use a larger indent -- a task list and a regular list should produce the same structure. ```````````````````````````````` example - [x] foo - [ ] bar - [x] baz - [ ] bim Show a regular (non task) list to show that it has the same structure - [@] foo - [@] bar - [@] baz - [@] bim . <ul> <li><input type="checkbox" checked="" disabled="" /> foo <ul> <li><input type="checkbox" disabled="" /> bar</li> <li><input type="checkbox" checked="" disabled="" /> baz</li> </ul> </li> <li><input type="checkbox" disabled="" /> bim</li> </ul> <p>Show a regular (non task) list to show that it has the same structure</p> <ul> <li>[@] foo <ul> <li>[@] bar</li> <li>[@] baz</li> </ul> </li> <li>[@] bim</li> </ul> ```````````````````````````````` �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/afl_test_cases/�����������������������������������������������������0000755�0001750�0001750�00000000000�14210444464�022114� 5����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/afl_test_cases/test.md����������������������������������������������0000644�0001750�0001750�00000000576�14210444464�023425� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# H1 H2 -- t ☺ *b* **em** `c` ≥\&\ \_e\_ 4) I1 5) I2 > [l](/u "t") > > - [f] > - ![a](/u "t") > >> <ftp://hh> >> <u@hh> ~~~ l☺ cb ~~~ c1 c2 *** <div> <b>x</b> </div> | a | b | | --- | --- | | c | `d|` \| e | google ~~yahoo~~ google.com http://google.com google@google.com and <xmp> but <surewhynot> sure </surewhynot> [f]: /u "t" ����������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/run-cmark-fuzz������������������������������������������������������0000755�0001750�0001750�00000000245�14210444464�021757� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/bin/bash -eu CMARK_FUZZ="$1" shift ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" "${CMARK_FUZZ}" -max_len=256 -timeout=1 -dict=test/fuzzing_dictionary "$@" �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/smart_punct.txt�����������������������������������������������������0000644�0001750�0001750�00000010117�14210444464�022235� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## Smart punctuation Open quotes are matched with closed quotes. The same method is used for matching openers and closers as is used in emphasis parsing: ```````````````````````````````` example "Hello," said the spider. "'Shelob' is my name." . <p>“Hello,” said the spider. “‘Shelob’ is my name.”</p> ```````````````````````````````` ```````````````````````````````` example 'A', 'B', and 'C' are letters. . <p>‘A’, ‘B’, and ‘C’ are letters.</p> ```````````````````````````````` ```````````````````````````````` example 'Oak,' 'elm,' and 'beech' are names of trees. So is 'pine.' . <p>‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’</p> ```````````````````````````````` ```````````````````````````````` example 'He said, "I want to go."' . <p>‘He said, “I want to go.”’</p> ```````````````````````````````` A single quote that isn't an open quote matched with a close quote will be treated as an apostrophe: ```````````````````````````````` example Were you alive in the 70's? . <p>Were you alive in the 70’s?</p> ```````````````````````````````` ```````````````````````````````` example Here is some quoted '`code`' and a "[quoted link](url)". . <p>Here is some quoted ‘<code>code</code>’ and a “<a href="url">quoted link</a>”.</p> ```````````````````````````````` Here the first `'` is treated as an apostrophe, not an open quote, because the final single quote is matched by the single quote before `jolly`: ```````````````````````````````` example 'tis the season to be 'jolly' . <p>’tis the season to be ‘jolly’</p> ```````````````````````````````` Multiple apostrophes should not be marked as open/closing quotes. ```````````````````````````````` example 'We'll use Jane's boat and John's truck,' Jenna said. . <p>‘We’ll use Jane’s boat and John’s truck,’ Jenna said.</p> ```````````````````````````````` An unmatched double quote will be interpreted as a left double quote, to facilitate this style: ```````````````````````````````` example "A paragraph with no closing quote. "Second paragraph by same speaker, in fiction." . <p>“A paragraph with no closing quote.</p> <p>“Second paragraph by same speaker, in fiction.”</p> ```````````````````````````````` A quote following a `]` or `)` character cannot be an open quote: ```````````````````````````````` example [a]'s b' . <p>[a]’s b’</p> ```````````````````````````````` Quotes that are escaped come out as literal straight quotes: ```````````````````````````````` example \"This is not smart.\" This isn\'t either. 5\'8\" . <p>"This is not smart." This isn't either. 5'8"</p> ```````````````````````````````` Two hyphens form an en-dash, three an em-dash. ```````````````````````````````` example Some dashes: em---em en--en em --- em en -- en 2--3 . <p>Some dashes: em—em en–en em — em en – en 2–3</p> ```````````````````````````````` A sequence of more than three hyphens is parsed as a sequence of em and/or en dashes, with no hyphens. If possible, a homogeneous sequence of dashes is used (so, 10 hyphens = 5 en dashes, and 9 hyphens = 3 em dashes). When a heterogeneous sequence must be used, the em dashes come first, followed by the en dashes, and as few en dashes as possible are used (so, 7 hyphens = 2 em dashes an 1 en dash). ```````````````````````````````` example one- two-- three--- four---- five----- six------ seven------- eight-------- nine--------- thirteen-------------. . <p>one- two– three— four–– five—– six—— seven—–– eight–––– nine——— thirteen———––.</p> ```````````````````````````````` Hyphens can be escaped: ```````````````````````````````` example Escaped hyphens: \-- \-\-\-. . <p>Escaped hyphens: -- ---.</p> ```````````````````````````````` Three periods form an ellipsis: ```````````````````````````````` example Ellipses...and...and.... . <p>Ellipses…and…and….</p> ```````````````````````````````` Periods can be escaped if ellipsis-formation is not wanted: ```````````````````````````````` example No ellipses\.\.\. . <p>No ellipses...</p> ```````````````````````````````` �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/entity_tests.py�����������������������������������������������������0000644�0001750�0001750�00000003410�14210444464�022243� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # -*- coding: utf-8 -*- import re import os import argparse import sys import platform import html from cmark import CMark def get_entities(): regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}' with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f: code = f.read() entities = [] for entity, utf8 in re.findall(regex, code, re.MULTILINE): utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8') entities.append((entity, utf8)) return entities if __name__ == "__main__": parser = argparse.ArgumentParser(description='Run cmark tests.') parser.add_argument('--program', dest='program', nargs='?', default=None, help='program to test') parser.add_argument('--library-dir', dest='library_dir', nargs='?', default=None, help='directory containing dynamic library') args = parser.parse_args(sys.argv[1:]) cmark = CMark(prog=args.program, library_dir=args.library_dir) entities = get_entities() passed = 0 errored = 0 failed = 0 exceptions = { 'quot': '"', 'QUOT': '"', # These are broken, but I'm not too worried about them. 'nvlt': '<⃒', 'nvgt': '>⃒', } print("Testing entities:") for entity, utf8 in entities: [rc, actual, err] = cmark.to_html("&{};".format(entity)) check = exceptions.get(entity, utf8) if rc != 0: errored += 1 print(entity, '[ERRORED (return code {})]'.format(rc)) print(err) elif check in actual: passed += 1 else: print(entity, '[FAILED]') print(repr(actual)) failed += 1 print("{} passed, {} failed, {} errored".format(passed, failed, errored)) if failed == 0 and errored == 0: exit(0) else: exit(1) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/extensions-table-prefer-style-attributes.txt������������������������0000644�0001750�0001750�00000001345�14210444464�027770� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������--- title: Extensions test with --table-prefer-style-attributes author: FUJI Goro version: 0.1 date: '2018-02-20' license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' ... ## Tables Table alignment: ```````````````````````````````` example aaa | bbb | ccc | ddd | eee :-- | --- | :-: | --- | --: fff | ggg | hhh | iii | jjj . <table> <thead> <tr> <th style="text-align: left">aaa</th> <th>bbb</th> <th style="text-align: center">ccc</th> <th>ddd</th> <th style="text-align: right">eee</th> </tr> </thead> <tbody> <tr> <td style="text-align: left">fff</td> <td>ggg</td> <td style="text-align: center">hhh</td> <td>iii</td> <td style="text-align: right">jjj</td> </tr> </tbody> </table> ```````````````````````````````` �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/cmark.py������������������������������������������������������������0000644�0001750�0001750�00000010107�14210444464�020603� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # -*- coding: utf-8 -*- from ctypes import CDLL, c_char_p, c_size_t, c_int, c_void_p from subprocess import * import platform import os def pipe_through_prog(prog, text): p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE) [result, err] = p1.communicate(input=text.encode('utf-8')) return [p1.returncode, result.decode('utf-8'), err] def parse(lib, extlib, text, extensions): cmark_gfm_core_extensions_ensure_registered = extlib.cmark_gfm_core_extensions_ensure_registered find_syntax_extension = lib.cmark_find_syntax_extension find_syntax_extension.restype = c_void_p find_syntax_extension.argtypes = [c_char_p] parser_attach_syntax_extension = lib.cmark_parser_attach_syntax_extension parser_attach_syntax_extension.argtypes = [c_void_p, c_void_p] parser_new = lib.cmark_parser_new parser_new.restype = c_void_p parser_new.argtypes = [c_int] parser_feed = lib.cmark_parser_feed parser_feed.argtypes = [c_void_p, c_char_p, c_int] parser_finish = lib.cmark_parser_finish parser_finish.restype = c_void_p parser_finish.argtypes = [c_void_p] cmark_gfm_core_extensions_ensure_registered() parser = parser_new(0) for e in set(extensions): ext = find_syntax_extension(bytes(e, 'utf-8')) if not ext: raise Exception("Extension not found: '{}'".format(e)) parser_attach_syntax_extension(parser, ext) textbytes = text.encode('utf-8') textlen = len(textbytes) parser_feed(parser, textbytes, textlen) return [parser_finish(parser), parser] def to_html(lib, extlib, text, extensions): document, parser = parse(lib, extlib, text, extensions) parser_get_syntax_extensions = lib.cmark_parser_get_syntax_extensions parser_get_syntax_extensions.restype = c_void_p parser_get_syntax_extensions.argtypes = [c_void_p] syntax_extensions = parser_get_syntax_extensions(parser) render_html = lib.cmark_render_html render_html.restype = c_char_p render_html.argtypes = [c_void_p, c_int, c_void_p] # 1 << 17 == CMARK_OPT_UNSAFE result = render_html(document, 1 << 17, syntax_extensions).decode('utf-8') return [0, result, ''] def to_commonmark(lib, extlib, text, extensions): document, _ = parse(lib, extlib, text, extensions) render_commonmark = lib.cmark_render_commonmark render_commonmark.restype = c_char_p render_commonmark.argtypes = [c_void_p, c_int, c_int] result = render_commonmark(document, 0, 0).decode('utf-8') return [0, result, ''] class CMark: def __init__(self, prog=None, library_dir=None, extensions=None): self.prog = prog self.extensions = [] if extensions: self.extensions = extensions.split() if prog: prog += ' --unsafe' extsfun = lambda exts: ''.join([' -e ' + e for e in set(exts)]) self.to_html = lambda x, exts=[]: pipe_through_prog(prog + extsfun(exts + self.extensions), x) self.to_commonmark = lambda x, exts=[]: pipe_through_prog(prog + ' -t commonmark' + extsfun(exts + self.extensions), x) else: sysname = platform.system() if sysname == 'Darwin': libnames = [ ["lib", ".dylib" ] ] elif sysname == 'Windows': libnames = [ ["", ".dll"], ["lib", ".dll"] ] else: libnames = [ ["lib", ".so"] ] if not library_dir: library_dir = os.path.join("..", "build", "src") for prefix, suffix in libnames: candidate = os.path.join(library_dir, prefix + "cmark-gfm" + suffix) if os.path.isfile(candidate): libpath = candidate break cmark = CDLL(libpath) extlib = CDLL(os.path.join( library_dir, "..", "extensions", prefix + "cmark-gfm-extensions" + suffix)) self.to_html = lambda x, exts=[]: to_html(cmark, extlib, x, exts + self.extensions) self.to_commonmark = lambda x, exts=[]: to_commonmark(cmark, extlib, x, exts + self.extensions) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/pathological_tests.py�����������������������������������������������0000644�0001750�0001750�00000012622�14210444464�023402� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # -*- coding: utf-8 -*- import re import argparse import sys import platform import itertools import multiprocessing from cmark import CMark def hash_collisions(): REFMAP_SIZE = 16 COUNT = 50000 def badhash(ref): h = 0 for c in ref: a = (h << 6) & 0xFFFFFFFF b = (h << 16) & 0xFFFFFFFF h = ord(c) + a + b - h h = h & 0xFFFFFFFF return (h % REFMAP_SIZE) == 0 keys = ("x%d" % i for i in itertools.count()) collisions = itertools.islice((k for k in keys if badhash(k)), COUNT) bad_key = next(collisions) document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions) return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1)) allowed_failures = {"many references": True} # list of pairs consisting of input and a regex that must match the output. pathological = { # note - some pythons have limit of 65535 for {num-matches} in re. "nested strong emph": (("*a **a " * 65000) + "b" + (" a** a*" * 65000), re.compile("(<em>a <strong>a ){65000}b( a</strong> a</em>){65000}")), "many emph closers with no openers": (("a_ " * 65000), re.compile("(a[_] ){64999}a_")), "many emph openers with no closers": (("_a " * 65000), re.compile("(_a ){64999}_a")), "many link closers with no openers": (("a]" * 65000), re.compile("(a\]){65000}")), "many link openers with no closers": (("[a" * 65000), re.compile("(\[a){65000}")), "mismatched openers and closers": (("*a_ " * 50000), re.compile("([*]a[_] ){49999}[*]a_")), "openers and closers multiple of 3": (("a**b" + ("c* " * 50000)), re.compile("a[*][*]b(c[*] ){49999}c[*]")), "link openers and emph closers": (("[ a_" * 50000), re.compile("(\[ a_){50000}")), "pattern [ (]( repeated": (("[ (](" * 80000), re.compile("(\[ \(\]\(){80000}")), "hard link/emph case": ("**x [a*b**c*](d)", re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")), "nested brackets": (("[" * 50000) + "a" + ("]" * 50000), re.compile("\[{50000}a\]{50000}")), "nested block quotes": ((("> " * 50000) + "a"), re.compile("(<blockquote>\n){50000}")), "deeply nested lists": ("".join(map(lambda x: (" " * x + "* a\n"), range(0,1000))), re.compile("<ul>\n(<li>a\n<ul>\n){999}<li>a</li>\n</ul>\n(</li>\n</ul>\n){999}")), "U+0000 in input": ("abc\u0000de\u0000", re.compile("abc\ufffd?de\ufffd?")), "backticks": ("".join(map(lambda x: ("e" + "`" * x), range(1,5000))), re.compile("^<p>[e`]*</p>\n$")), "unclosed links A": ("[a](<b" * 30000, re.compile("(\[a\]\(<b){30000}")), "unclosed links B": ("[a](b" * 30000, re.compile("(\[a\]\(b){30000}")), "tables": ("aaa\rbbb\n-\v\n" * 30000, re.compile("^<p>aaa</p>\n<table>\n<thead>\n<tr>\n<th>bbb</th>\n</tr>\n</thead>\n<tbody>\n(<tr>\n<td>aaa</td>\n</tr>\n<tr>\n<td>bbb</td>\n</tr>\n<tr>\n<td>-\x0b</td>\n</tr>\n){29999}</tbody>\n</table>\n$")), # "many references": # ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000, # re.compile("(\[0\] ){4999}")), "reference collisions": hash_collisions() } whitespace_re = re.compile('/s+/') passed = 0 errored = 0 ignored = 0 TIMEOUT = 5 def run_test(inp, regex): parser = argparse.ArgumentParser(description='Run cmark tests.') parser.add_argument('--program', dest='program', nargs='?', default=None, help='program to test') parser.add_argument('--library-dir', dest='library_dir', nargs='?', default=None, help='directory containing dynamic library') args = parser.parse_args(sys.argv[1:]) cmark = CMark(prog=args.program, library_dir=args.library_dir, extensions="table") [rc, actual, err] = cmark.to_html(inp) if rc != 0: print('[ERRORED (return code %d)]' % rc) print(err) exit(1) elif regex.search(actual): print('[PASSED]') else: print('[FAILED (mismatch)]') print(repr(actual)) exit(1) if __name__ == '__main__': print("Testing pathological cases:") for description in pathological: (inp, regex) = pathological[description] print(description, "... ", end='') sys.stdout.flush() p = multiprocessing.Process(target=run_test, args=(inp, regex)) p.start() p.join(TIMEOUT) if p.is_alive(): p.terminate() p.join() print('[TIMED OUT]') if allowed_failures[description]: ignored += 1 else: errored += 1 elif p.exitcode != 0: if allowed_failures[description]: ignored += 1 else: errored += 1 else: passed += 1 print("%d passed, %d errored, %d ignored" % (passed, errored, ignored)) exit(errored) ��������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/roundtrip_tests.py��������������������������������������������������0000644�0001750�0001750�00000004227�14210444464�022764� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re import sys from spec_tests import get_tests, do_test from cmark import CMark import argparse if __name__ == "__main__": parser = argparse.ArgumentParser(description='Run cmark roundtrip tests.') parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, help='program to test') parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt', help='path to spec') parser.add_argument('-P', '--pattern', dest='pattern', nargs='?', default=None, help='limit to sections matching regex pattern') parser.add_argument('--library-dir', dest='library_dir', nargs='?', default=None, help='directory containing dynamic library') parser.add_argument('--extensions', dest='extensions', nargs='?', default=None, help='space separated list of extensions to enable') parser.add_argument('--no-normalize', dest='normalize', action='store_const', const=False, default=True, help='do not normalize HTML') parser.add_argument('-n', '--number', type=int, default=None, help='only consider the test with the given number') args = parser.parse_args(sys.argv[1:]) spec = sys.argv[1] def converter(md, exts): cmark = CMark(prog=args.program, library_dir=args.library_dir, extensions=args.extensions) [ec, result, err] = cmark.to_commonmark(md, exts) if ec == 0: [ec, html, err] = cmark.to_html(result, exts) if ec == 0: # In the commonmark writer we insert dummy HTML # comments between lists, and between lists and code # blocks. Strip these out, since the spec uses # two blank lines instead: return [ec, re.sub('<!-- end list -->\n', '', html), ''] else: return [ec, html, err] else: return [ec, result, err] tests = get_tests(args.spec) result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': 0} for test in tests: do_test(converter, test, args.normalize, result_counts) sys.stdout.buffer.write("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts).encode('utf-8')) exit(result_counts['fail'] + result_counts['error']) �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/spec_tests.py�������������������������������������������������������0000755�0001750�0001750�00000014433�14210444464�021673� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys from difflib import unified_diff import argparse import re import json from cmark import CMark from normalize import normalize_html if __name__ == "__main__": parser = argparse.ArgumentParser(description='Run cmark tests.') parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, help='program to test') parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt', help='path to spec') parser.add_argument('-P', '--pattern', dest='pattern', nargs='?', default=None, help='limit to sections matching regex pattern') parser.add_argument('--library-dir', dest='library_dir', nargs='?', default=None, help='directory containing dynamic library') parser.add_argument('--extensions', dest='extensions', nargs='?', default=None, help='space separated list of extensions to enable') parser.add_argument('--no-normalize', dest='normalize', action='store_const', const=False, default=True, help='do not normalize HTML') parser.add_argument('-d', '--dump-tests', dest='dump_tests', action='store_const', const=True, default=False, help='dump tests in JSON format') parser.add_argument('--debug-normalization', dest='debug_normalization', action='store_const', const=True, default=False, help='filter stdin through normalizer for testing') parser.add_argument('-n', '--number', type=int, default=None, help='only consider the test with the given number') args = parser.parse_args(sys.argv[1:]) def out(str): sys.stdout.buffer.write(str.encode('utf-8')) def print_test_header(headertext, example_number, start_line, end_line): out("Example %d (lines %d-%d) %s\n" % (example_number,start_line,end_line,headertext)) def do_test(converter, test, normalize, result_counts): [retcode, actual_html, err] = converter(test['markdown'], test['extensions']) actual_html = re.sub(r'\r\n', '\n', actual_html) if retcode == 0: expected_html = re.sub(r'\r\n', '\n', test['html']) unicode_error = None if expected_html.strip() == '<IGNORE>': passed = True elif normalize: try: passed = normalize_html(actual_html) == normalize_html(expected_html) except UnicodeDecodeError as e: unicode_error = e passed = False else: passed = actual_html == expected_html if passed: result_counts['pass'] += 1 else: print_test_header(test['section'], test['example'], test['start_line'], test['end_line']) out(test['markdown'] + '\n') if unicode_error: out("Unicode error: " + str(unicode_error) + '\n') out("Expected: " + repr(expected_html) + '\n') out("Got: " + repr(actual_html) + '\n') else: expected_html_lines = expected_html.splitlines(True) actual_html_lines = actual_html.splitlines(True) for diffline in unified_diff(expected_html_lines, actual_html_lines, "expected HTML", "actual HTML"): out(diffline) out('\n') result_counts['fail'] += 1 else: print_test_header(test['section'], test['example'], test['start_line'], test['end_line']) out("program returned error code %d\n" % retcode) sys.stdout.buffer.write(err) result_counts['error'] += 1 def get_tests(specfile): line_number = 0 start_line = 0 end_line = 0 example_number = 0 markdown_lines = [] html_lines = [] state = 0 # 0 regular text, 1 markdown example, 2 html output extensions = [] headertext = '' tests = [] header_re = re.compile('#+ ') with open(specfile, 'r', encoding='utf-8', newline='\n') as specf: for line in specf: line_number = line_number + 1 l = line.strip() if l.startswith("`" * 32 + " example"): state = 1 extensions = l[32 + len(" example"):].split() elif l == "`" * 32: state = 0 example_number = example_number + 1 end_line = line_number if 'disabled' not in extensions: tests.append({ "markdown":''.join(markdown_lines).replace('→',"\t"), "html":''.join(html_lines).replace('→',"\t"), "example": example_number, "start_line": start_line, "end_line": end_line, "section": headertext, "extensions": extensions}) start_line = 0 markdown_lines = [] html_lines = [] elif l == ".": state = 2 elif state == 1: if start_line == 0: start_line = line_number - 1 markdown_lines.append(line) elif state == 2: html_lines.append(line) elif state == 0 and re.match(header_re, line): headertext = header_re.sub('', line).strip() return tests if __name__ == "__main__": if args.debug_normalization: out(normalize_html(sys.stdin.read())) exit(0) all_tests = get_tests(args.spec) if args.pattern: pattern_re = re.compile(args.pattern, re.IGNORECASE) else: pattern_re = re.compile('.') tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ] if args.dump_tests: out(json.dumps(tests, indent=2)) exit(0) else: skipped = len(all_tests) - len(tests) converter = CMark(prog=args.program, library_dir=args.library_dir, extensions=args.extensions).to_html result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped} for test in tests: do_test(converter, test, args.normalize, result_counts) out("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts)) exit(result_counts['fail'] + result_counts['error']) �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/test/extensions-full-info-string.txt�������������������������������������0000644�0001750�0001750�00000002767�14210444464�025306� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������--- title: --full-info-string test author: Ashe Connor version: 0.1 date: '2018-08-08' license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' ... ## `--full-info-string` Without extended info: ```````````````````````````````` example ```ruby module Foo ``` . <pre><code class="language-ruby">module Foo </code></pre> ```````````````````````````````` With extended info: ```````````````````````````````` example ```ruby some <extra> "data" module Foo ``` . <pre><code class="language-ruby" data-meta="some <extra> "data"">module Foo </code></pre> ```````````````````````````````` With an embedded NUL: ```````````````````````````````` example ```ruby nul�nul module Foo ``` . <pre><code class="language-ruby" data-meta="nul�nul">module Foo </code></pre> ```````````````````````````````` With a lot: ```````````````````````````````` example ```ruby xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx module Foo ``` . <pre><code class="language-ruby" data-meta="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">module Foo </code></pre> ```````````````````````````````` ���������cmarkgfm/third_party/cmark/Makefile.nmake�����������������������������������������������������������0000644�0001750�0001750�00000001605�14210444464�020712� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������SRCDIR=src DATADIR=data BUILDDIR=build INSTALLDIR=windows SPEC=test/spec.txt PROG=$(BUILDDIR)\src\cmark-gfm.exe GENERATOR=NMake Makefiles all: $(BUILDDIR)/CMakeFiles @cd $(BUILDDIR) && $(MAKE) /nologo && cd .. $(BUILDDIR)/CMakeFiles: @-mkdir $(BUILDDIR) 2> nul cd $(BUILDDIR) && \ cmake \ -G "$(GENERATOR)" \ -D CMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -D CMAKE_INSTALL_PREFIX=$(INSTALLDIR) \ -D CMARK_STATIC=ON \ -D CMARK_SHARED=OFF \ .. && \ cd .. install: all @cd $(BUILDDIR) && $(MAKE) /nologo install && cd .. clean: -rmdir /s /q $(BUILDDIR) $(MINGW_INSTALLDIR) 2> nul $(SRCDIR)\case_fold_switch.inc: $(DATADIR)\CaseFolding-3.2.0.txt perl mkcasefold.pl < $? > $@ test: $(SPEC) all @cd $(BUILDDIR) && $(MAKE) /nologo test ARGS="-V" && cd .. distclean: clean del /q src\scanners.c 2> nul del /q spec.md spec.html 2> nul ���������������������������������������������������������������������������������������������������������������������������cmarkgfm/third_party/cmark/.gitignore���������������������������������������������������������������0000644�0001750�0001750�00000000522�14210444464�020145� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Object files *.o *.ko *.obj *.elf # Libraries *.lib *.a # Shared objects (inc. Windows DLLs) *.dll *.so *.so.* *.dylib # Executables *.exe *.out *.app *.i*86 *.x86_64 *.hex *.pyc *~ *.bak *.diff *# *.zip bstrlib.txt build cmark.dSYM/* cmark .vscode # Testing and benchmark alltests.md progit/ bench/benchinput.md test/afl_results/ ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cmarkgfm/.gitignore���������������������������������������������������������������������������������0000644�0001750�0001750�00000000252�14210444330�014507� 0����������������������������������������������������������������������������������������������������ustar �carsten�������������������������carsten����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# general things to ignore build/ dist/ *.egg-info/ *.egg *.eggs *.py[cod] __pycache__/ *.so *~ # due to using t/nox and pytest .tox .nox .cache .pytest_cache .coverage ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������