pax_global_header00006660000000000000000000000064141307104400014504gustar00rootroot0000000000000052 comment=509b009b97077d5abcebe53d46ca6b67c3438a2c openstenoproject-rtf_tokenize-eeb1d53/000077500000000000000000000000001413071044000203005ustar00rootroot00000000000000openstenoproject-rtf_tokenize-eeb1d53/.github/000077500000000000000000000000001413071044000216405ustar00rootroot00000000000000openstenoproject-rtf_tokenize-eeb1d53/.github/workflows/000077500000000000000000000000001413071044000236755ustar00rootroot00000000000000openstenoproject-rtf_tokenize-eeb1d53/.github/workflows/wheels.yml000066400000000000000000000041631413071044000257130ustar00rootroot00000000000000name: Build on: [push, pull_request] defaults: run: shell: bash --noprofile --norc -xeo pipefail {0} jobs: build_wheel: name: ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [macos-latest, ubuntu-latest, windows-latest] steps: - name: Checkout uses: actions/checkout@v2 with: submodules: true - name: Setup Python uses: actions/setup-python@v2 with: python-version: "3.x" - name: Install cibuildwheel run: python -m pip install -U cibuildwheel - name: Build wheels env: CIBW_BUILD: "cp3?-* cp31?-*" CIBW_SKIP: "*-manylinux_i686 *-win32" CIBW_BUILD_VERBOSITY: "1" CIBW_ENVIRONMENT_LINUX: "CFLAGS=-g0 LDFLAGS=-Wl,-strip-debug" CIBW_MANYLINUX_X86_64_IMAGE: "manylinux2014" CIBW_BEFORE_TEST: "pip install pytest" CIBW_TEST_COMMAND: "pytest {project}/test" run: python -m cibuildwheel --output-dir wheelhouse - name: Upload artifacts uses: actions/upload-artifact@v2 with: name: Wheels (${{ runner.os }}) path: wheelhouse/* release: name: Release runs-on: ubuntu-latest needs: [build_wheel] if: startsWith(github.ref, 'refs/tags/') steps: - name: Checkout uses: actions/checkout@v2 - name: Setup Python uses: actions/setup-python@v2 with: python-version: "3.x" - name: Install dependencies run: python -m pip install -U setuptools twine - name: Build source distribution run: python setup.py sdist - name: Download artifacts uses: actions/download-artifact@v2 with: path: wheelhouse - name: Publish PyPI release env: TWINE_NON_INTERACTIVE: 1 TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} # Optional: twine will fallback to default if empty. TWINE_REPOSITORY_URL: ${{ secrets.PYPI_URL }} run: python -m twine upload dist/* wheelhouse/*/* openstenoproject-rtf_tokenize-eeb1d53/.gitignore000066400000000000000000000000431413071044000222650ustar00rootroot00000000000000/*.egg-info/ /.tox/ /build/ /dist/ openstenoproject-rtf_tokenize-eeb1d53/MANIFEST.in000066400000000000000000000000471413071044000220370ustar00rootroot00000000000000include pyproject.toml include tox.ini openstenoproject-rtf_tokenize-eeb1d53/README.md000066400000000000000000000007451413071044000215650ustar00rootroot00000000000000# RTF Tokenize A simple RTF tokenizer. API: ``` python # Init: from rtf_tokenize import RtfTokenizer tokenizer = RtfTokenizer(rtf_string) # Get next token (return `None` on end of string): token = tokenizer.next_token() # Rewind a token (next call to `next_token` will return it): tokenizer.rewind_token(token) # Current location (last tokenized, irrespective of rewound tokens): position = tokenizer.lnum, tokenizer.cnum ``` ## Release history ### 1.0.0 * first public release openstenoproject-rtf_tokenize-eeb1d53/pyproject.toml000066400000000000000000000000721413071044000232130ustar00rootroot00000000000000[build-system] requires = ["setuptools>=34.4.0", "wheel"] openstenoproject-rtf_tokenize-eeb1d53/rtf_tokenize.c000066400000000000000000000132051413071044000231500ustar00rootroot00000000000000#define PY_SSIZE_T_CLEAN #include #include "structmember.h" #define RTF_TOKENIZER_REWIND_SIZE 8 typedef struct { PyObject_HEAD char *text; char *text_ptr; size_t lnum; size_t cnum; size_t lnum_next; size_t cnum_next; PyObject **rewind_buffer; unsigned rewind_count; unsigned rewind_size; } RtfTokenizer; static int RtfTokenizer_init(RtfTokenizer *self, PyObject *args, PyObject *kwargs) { const char *text; Py_ssize_t text_size; if (!PyArg_ParseTuple(args, "s", &text)) goto error_0; text_size = strlen(text); self->text = PyMem_Malloc(text_size + 1); if (self->text == NULL) goto error_1; self->text_ptr = memcpy(self->text, text, text_size); self->text[text_size] = '\0'; self->rewind_size = RTF_TOKENIZER_REWIND_SIZE; self->rewind_buffer = PyMem_Malloc(self->rewind_size * sizeof (*self->rewind_buffer)); if (self->rewind_buffer == NULL) goto error_1; return 0; error_1: PyErr_NoMemory(); error_0: return -1; } static void RtfTokenizer_dealloc(RtfTokenizer *self) { while (self->rewind_count) Py_DECREF(self->rewind_buffer[--self->rewind_count]); PyMem_Free(self->rewind_buffer); PyMem_Free(self->text); } static PyObject *RtfTokenizer_next_token(RtfTokenizer *self, PyObject *Py_UNUSED(ignored)) { const char *token_next; const char *token_start; size_t token_len; int linc, cinc; char c; if (self->rewind_count) return self->rewind_buffer[--self->rewind_count]; self->lnum = self->lnum_next; self->cnum = self->cnum_next; token_start = token_next = self->text_ptr; token_len = 0; linc = cinc = 0; while ((c = *token_start) != '\0') { switch (c) { case '\n': ++self->lnum; self->cnum = 0; case '\r': token_start = ++token_next; continue; case '{': case '}': token_len = 1; ++token_next; break; case '\\': while ((((c = *++token_next) >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) ; if (token_start[1] == '\n') { linc = +1; cinc = -2 - self->cnum; } token_len = token_next - token_start; if (token_len == 1) { if (c != '\0') { ++token_next; ++token_len; } break; } if (c == '-' || (c >= '0' && c <= '9')) while ((c = *++token_next) >= '0' && c <= '9') ; token_len = token_next - token_start; if (c == ' ') ++token_next; break; default: while ((c = *++token_next) != '\0' && c != '\\' && c != '{' && c != '}' && c != '\r' && c != '\n') ; token_len = token_next - token_start; break; } break; } self->text_ptr = (char *)token_next; self->lnum_next = self->lnum + linc; self->cnum_next = self->cnum + cinc + token_next - token_start; if (token_len == 0) Py_RETURN_NONE; return PyUnicode_FromStringAndSize(token_start, token_len); } static PyObject *RtfTokenizer_rewind_token(RtfTokenizer *self, PyObject *token) { if (token != Py_None && !PyUnicode_Check(token)) { PyErr_SetString(PyExc_TypeError, "expected a string"); return NULL; } if (self->rewind_count == self->rewind_size) { PyObject **new_rewind_buffer; unsigned new_rewind_size; new_rewind_size = self->rewind_size + RTF_TOKENIZER_REWIND_SIZE; new_rewind_buffer = PyMem_Realloc(self->rewind_buffer, new_rewind_size * sizeof (*self->rewind_buffer)); if (NULL == new_rewind_buffer) { PyErr_NoMemory(); return NULL; } self->rewind_size = new_rewind_size; self->rewind_buffer = new_rewind_buffer; } self->rewind_buffer[self->rewind_count++] = token; Py_INCREF(token); Py_RETURN_NONE; } static PyMemberDef RtfTokenizer_members[] = { {"lnum", T_ULONG, offsetof(RtfTokenizer, lnum), 0, "Line number."}, {"cnum", T_ULONG, offsetof(RtfTokenizer, cnum), 0, "Column number."}, {NULL} }; static PyMethodDef RtfTokenizer_methods[] = { {"next_token", (PyCFunction)RtfTokenizer_next_token, METH_NOARGS, "Return the next token."}, {"rewind_token", (PyCFunction)RtfTokenizer_rewind_token, METH_O, "Rewind token so it's returned next."}, {NULL} }; static PyTypeObject RtfTokenizerType = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "rtf_tokenize.RtfTokenizer", .tp_doc = "RTF Tokenizer.", .tp_basicsize = sizeof (RtfTokenizer), .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_new = PyType_GenericNew, .tp_init = (initproc)RtfTokenizer_init, .tp_dealloc = (destructor)RtfTokenizer_dealloc, .tp_members = RtfTokenizer_members, .tp_methods = RtfTokenizer_methods, }; static struct PyModuleDef module = { PyModuleDef_HEAD_INIT, .m_name = "rtf_tokenize", .m_size = -1, }; PyMODINIT_FUNC PyInit_rtf_tokenize(void) { PyObject *m; if (PyType_Ready(&RtfTokenizerType) < 0) return NULL; m = PyModule_Create(&module); if (m == NULL) return NULL; Py_INCREF(&RtfTokenizerType); if (PyModule_AddObject(m, "RtfTokenizer", (PyObject *)&RtfTokenizerType) < 0) { Py_DECREF(&RtfTokenizerType); Py_DECREF(m); return NULL; } return m; } openstenoproject-rtf_tokenize-eeb1d53/setup.cfg000066400000000000000000000021031413071044000221150ustar00rootroot00000000000000[metadata] name = rtf_tokenize version = 1.0.0 description = Simple RTF tokenizer long_description = file: README.md long_description_content_type = text/markdown author = Benoit Pierre author_email = benoit.pierre@gmail.com license = GNU General Public License v2 or later (GPLv2+) url = https://github.com/benoit-pierre/rtf_tokenize project_urls = Source Code = https://github.com/benoit-pierre/rtf_tokenize Issue Tracker = https://github.com/benoit-pierre/rtf_tokenize/issues classifiers = Development Status :: 4 - Beta Intended Audience :: End Users/Desktop License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+) Operating System :: OS Independent Programming Language :: Python :: 3 Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 keywords = plover [options] zip_safe = True python_requires = >=3.6 [options.extras_require] test = pytest>=3.0.1 [tool:pytest] addopts = -ra # vim: commentstring=#\ %s list openstenoproject-rtf_tokenize-eeb1d53/setup.py000077500000000000000000000002651413071044000220200ustar00rootroot00000000000000#!/usr/bin/env python3 from setuptools import Extension, setup setup( ext_modules=[ Extension('rtf_tokenize', sources=['rtf_tokenize.c']), ], ) openstenoproject-rtf_tokenize-eeb1d53/test/000077500000000000000000000000001413071044000212575ustar00rootroot00000000000000openstenoproject-rtf_tokenize-eeb1d53/test/test_tokenizer.py000066400000000000000000000040311413071044000247000ustar00rootroot00000000000000from textwrap import dedent from rtf_tokenize import RtfTokenizer TEST_RTF = dedent( r''' {\rtf1\ansi {\*\cxs TEFT} escaped newline: line\ break \test1 ing\test2; } ''' ).lstrip() TEST_RTF_TOKENS = ( '{', r'\rtf1', r'\ansi', '{', r'\*', r'\cxs', 'TEFT', '}', 'escaped newline: line', '\\\n', 'break', r'\test1', 'ing', r'\test2', ';', '}', ) TEST_RTF_LOCATIONS = [ tuple(map(int, loc.split(':'))) for loc in ''' 0:0 0:1 0:6 1:0 1:1 1:3 1:8 1:12 2:0 2:21 3:0 4:0 4:7 4:10 4:16 5:0 '''.split() ] def test_tokenizer_next_token(): tokenizer = RtfTokenizer(TEST_RTF) for n, (expected_token, expected_loc) in enumerate(zip(TEST_RTF_TOKENS, TEST_RTF_LOCATIONS)): token = tokenizer.next_token() loc = (tokenizer.lnum, tokenizer.cnum) msg = 'token %u at %u:%u' % (n, loc[0], loc[1]) assert token == expected_token, msg assert loc == expected_loc, msg msg = 'token %u at end' % (n + 1) expected_loc = (expected_loc[0] + 1, 0) assert tokenizer.next_token() is None, msg assert (tokenizer.lnum, tokenizer.cnum) == expected_loc, msg def test_tokenizer_rewind_token(): tokenizer = RtfTokenizer(TEST_RTF) # Read first 2 tokens. assert tokenizer.next_token() == TEST_RTF_TOKENS[0] assert (tokenizer.lnum, tokenizer.cnum) == TEST_RTF_LOCATIONS[0] assert tokenizer.next_token() == TEST_RTF_TOKENS[1] assert (tokenizer.lnum, tokenizer.cnum) == TEST_RTF_LOCATIONS[1] # Rewind 2 unrelated tokens. tokenizer.rewind_token('re') tokenizer.rewind_token('wind') # Check next 2 tokens are rewound one. assert tokenizer.next_token() == r'wind' assert (tokenizer.lnum, tokenizer.cnum) == TEST_RTF_LOCATIONS[1] assert tokenizer.next_token() == r're' assert (tokenizer.lnum, tokenizer.cnum) == TEST_RTF_LOCATIONS[1] # And that we continue where we left. assert tokenizer.next_token() == TEST_RTF_TOKENS[2] assert (tokenizer.lnum, tokenizer.cnum) == TEST_RTF_LOCATIONS[2] openstenoproject-rtf_tokenize-eeb1d53/tox.ini000066400000000000000000000005551413071044000216200ustar00rootroot00000000000000[tox] envlist = test [testenv] usedevelop = true extras = test commands = pytest {posargs} [testenv:packaging] skip_install = true deps = build check-manifest readme-renderer[md] twine allowlist_externals = rm commands = rm -rf build dist python -m build --sdist --wheel . twine check --strict dist/* check-manifest -v # vim: commentstring=#\ %s list