mwic-0.7.10/0000755000000000000000000000000014375677630012556 5ustar00rootroot00000000000000mwic-0.7.10/.coveragerc0000644000000000000000000000014114375677624014676 0ustar00rootroot00000000000000[run] branch = true [report] show_missing = true exclude_lines = # no coverage # vim:ft=dosini mwic-0.7.10/.pylintrc0000644000000000000000000000122714375677624014430 0ustar00rootroot00000000000000[MASTER] load-plugins = pylint.extensions.check_elif [MESSAGES CONTROL] disable = bad-builtin, bad-continuation, bad-option-value, fixme, inconsistent-return-statements, invalid-name, locally-disabled, no-else-continue, no-self-use, raise-missing-from, redefined-variable-type, similarities, too-few-public-methods, too-many-branches, too-many-locals, too-many-statements, [BASIC] no-docstring-rgx = .* [REPORTS] reports = no score = no msg-template = {path}:{line}: {C}: {symbol} [{obj}] {msg} [FORMAT] max-line-length = 120 expected-line-ending-format = LF # vim:ft=dosini ts=4 sts=4 sw=4 et mwic-0.7.10/Makefile0000644000000000000000000000452314375677624014225 0ustar00rootroot00000000000000# Copyright © 2012-2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. PYTHON = python3 PREFIX = /usr/local DESTDIR = bindir = $(PREFIX)/bin basedir = $(PREFIX)/share/mwic mandir = $(PREFIX)/share/man .PHONY: all all: ; python_exe = $(shell $(PYTHON) -c 'import sys; print(sys.executable)') .PHONY: install install: mwic $(PYTHON) - < lib/__init__.py # Python version check # executable: install -d $(DESTDIR)$(bindir) sed \ -e "1 s@^#!.*@#!$(python_exe)@" \ -e "s#^basedir = .*#basedir = '$(basedir)/'#" \ $(<) > $(<).tmp install $(<).tmp $(DESTDIR)$(bindir)/$(<) rm $(<).tmp # library + data: install -d $(DESTDIR)$(basedir)/dict install -p -m644 dict/* $(DESTDIR)$(basedir)/dict/ install -d $(DESTDIR)$(basedir)/lib install -p -m644 lib/*.py $(DESTDIR)$(basedir)/lib/ umask 022 && $(PYTHON) -m compileall -q -d $(basedir)/lib $(DESTDIR)$(basedir)/lib ifeq "$(wildcard doc/*.1)" "" # run "$(MAKE) -C doc" to build the manpage else # manual page: install -d $(DESTDIR)$(mandir)/man1 install -p -m644 doc/$(<).1 $(DESTDIR)$(mandir)/man1/ endif .PHONY: test test: $(PYTHON) -bb -m pytest -v .PHONY: clean clean: find . -type f -name '*.py[co]' -delete find . -type d -name '__pycache__' -delete rm -rf .pytest_cache rm -f .coverage rm -f *.tmp .error = GNU make is required # vim:ts=4 sts=4 sw=4 noet mwic-0.7.10/dict/0000755000000000000000000000000014375677624013504 5ustar00rootroot00000000000000mwic-0.7.10/dict/en0000644000000000000000000000756114375677624014042 0ustar00rootroot00000000000000@define ’ = ['’] @define $BE = be|am|ain’t|I’m|is(n’t)?|(he|she|it)’s|are(n’t)?|was(n’t)?|were(n’t)?|(you|we|they)’re|been|being @define $ART = a|an|the # X X → X (?P[A-Za-z]+(’[a-z]+)*) (?P=dupl) $ART $ART [a-z]+n’t not $BE (?!being|been)$BE $BE(?[^\W_]+) (?P=dupl) Publiczn\w+ Licencj\w+ GNU mwic-0.7.10/doc/0000755000000000000000000000000014375677630013323 5ustar00rootroot00000000000000mwic-0.7.10/doc/LICENSE0000644000000000000000000000207414375677624014336 0ustar00rootroot00000000000000Copyright © 2012-2023 Jakub Wilk Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. mwic-0.7.10/doc/Makefile0000644000000000000000000000324614375677624014773 0ustar00rootroot00000000000000# Copyright © 2014-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. export LC_ALL=C rst2man = $(notdir $(shell command -v rst2man || echo rst2man.py)) exe = mwic .PHONY: all all: $(exe).1 $(exe).1: manpage.rst $(rst2man) --input-encoding=UTF-8 < $(<) > $(@).tmp perl -pi -e '/^[.]BI\b/ and s/\\fP/\\fR/g' $(@).tmp # work-around for https://bugs.debian.org/806601 perl -pi -e 's/([a-z])\\[(]aq([a-z])/$$1\x27$$2/g' $(@).tmp # prefer ' to \(aq when used as an apostrophe perl -ni -e 'print unless /^[.]\\" vim:/' $(@).tmp mv $(@).tmp $(@) .PHONY: clean clean: rm -f $(exe).1 *.tmp .error = GNU make is required # vim:ts=4 sts=4 sw=4 noet mwic-0.7.10/doc/README0000644000000000000000000000176314375677630014212 0ustar00rootroot00000000000000Overview ======== **mwic** is a spell-checker that groups possible misspellings and shows them in their contexts. This is useful for checking technical documents, which often contain words that are not included in standard dictionaries. Prerequisites ============= The following software is needed to run mwic: * Python ≥ 3.6; * PyEnchant_, Python bindings for the Enchant_ spellchecking system; * regex_, alternative regular expression module for Python. Additionally, the following software is needed to rebuild the manual page from source: * docutils_ ≥ 0.6. For pip users:: python3 -m pip install pyenchant regex python3 -m pip install docutils For Debian users:: apt-get install python3-enchant python3-regex apt-get install python3-docutils .. _regex: https://pypi.org/project/regex/ .. _pyenchant: https://pypi.org/project/pyenchant/ .. _Enchant: https://abiword.github.io/enchant/ .. _docutils: https://docutils.sourceforge.io/ .. vim:ts=3 sts=3 sw=3 ft=rst mwic-0.7.10/doc/changelog0000644000000000000000000001700214375677624015200 0ustar00rootroot00000000000000mwic (0.7.10) unstable; urgency=low * Fix --max-context-width. * When trimming strings, take grapheme clusters into account. * Add new multi-word misspellings to the dictionary. * Improve pager support: + Don't reset encoding error handler to “strict”. + Improve handling errors. * Improve the test suite. * Use “pytest” in “make test”. * Enlarge screenshot. -- Jakub Wilk Thu, 23 Feb 2023 16:00:04 +0100 mwic (0.7.9) unstable; urgency=low * Add new multi-word misspellings to the dictionary. * Fix printing PyEnchant version. * Improve mwic4po: + Improve error handling. + Use uppercase for CLI metavar. * Drop support for Python < 3.6. * Update Docutils homepage URL. * Make it possible to use pytest as the test harness. -- Jakub Wilk Mon, 25 Apr 2022 14:33:05 +0200 mwic (0.7.8) unstable; urgency=low * Add new multi-word misspellings to the dictionary. * Use the \e[90m sequence for dark gray. * Improve the build system: + Check Python version on install. + Byte-compile Python code on install. https://github.com/jwilk/mwic/issues/9 + Don't require GNU install(1). * Rephrase descriptions of --help and --version in help messages. * Improve the test suite. -- Jakub Wilk Sat, 16 Nov 2019 18:11:49 +0100 mwic (0.7.7) unstable; urgency=low * Don't die with exception when a file cannot be opened. (If there are many input files, it's helpful to continue when one of them cannot be opened.) -- Jakub Wilk Mon, 12 Nov 2018 17:42:25 +0100 mwic (0.7.6) unstable; urgency=low * Improve documentation: + Fix grammar in the description. + Update codespell URL. + Add example screenshot. * Improve the build system: + Add checks against BSD make. (Only GNU make is supported.) + Use ginstall(1), instead of install(1), if it exists. * Fix test failure (multiword-is-t). Thanks to Daniel M. Capella for the bug report. https://github.com/jwilk/mwic/issues/7 -- Jakub Wilk Thu, 06 Sep 2018 21:23:51 +0200 mwic (0.7.5) unstable; urgency=low * Drop support for Python 3.2. * Update PyPI URLs in documentation. * Update Lintian URLs in the manual page. -- Jakub Wilk Thu, 26 Apr 2018 15:51:37 +0200 mwic (0.7.4) unstable; urgency=low * Update Linux kernel URL in the manual page. * Add new multi-word misspellings to the dictionary. * Improve typography in the manual page. * Reset the SIGPIPE signal disposition. * Improve the test suite. -- Jakub Wilk Wed, 24 Jan 2018 16:07:49 +0100 mwic (0.7.3) unstable; urgency=low * Add new multi-word misspellings to the dictionary. * Add experimental script to spell-check PO files. * Make the doc makefile more portable. * Update Enchant homepage URL. * Improve the test suite. -- Jakub Wilk Thu, 23 Mar 2017 12:52:48 +0100 mwic (0.7.2) unstable; urgency=low * Add new multi-word misspellings to the dictionary. * Add new words to the whitelist. * Put license into a separate file. -- Jakub Wilk Fri, 21 Oct 2016 13:27:44 +0200 mwic (0.7.1) unstable; urgency=low * Add new multi-word misspellings to the dictionary. * Make --version print also versions of Python and the libraries. * Make --version print to stdout, not stderr. https://bugs.python.org/issue18920 * Make the --help message include option descriptions. * Improve the manual page: + Update the description. + Make the example more realistic. * Document how to install dependencies with pip or apt-get. -- Jakub Wilk Sat, 06 Aug 2016 13:12:03 +0200 mwic (0.7) unstable; urgency=low * Fix spurious output for languages that didn't have multi-word misspelling dictionaries. * Add new multi-word misspellings to the dictionary. * Add support for “--language und”, to consider every word misspelled. * Use “regexp”, an alternative regular expression module for Python. * Document runtime dependencies. * Improve the test suite. -- Jakub Wilk Thu, 23 Jun 2016 22:11:33 +0200 mwic (0.6.1) unstable; urgency=low [ Jakub Wilk ] * Add new multi-word misspellings to the dictionary. * Fix handling case-sensitivity in Lintian and codespell dictionaries. * Improve the test suite. [ Dwayne Bailey ] * Use /usr/bin/env in shebangs. -- Jakub Wilk Thu, 16 Jun 2016 20:14:48 +0200 mwic (0.6) unstable; urgency=low * Add Makefile. * Add new multi-word misspellings to the dictionary. * Add new words to the whitelist. * Add support for using misspelling dictionary from kde-spellcheck (part of kde-dev-scripts). * Document where to get third-party misspelling dictionaries. -- Jakub Wilk Mon, 06 Jun 2016 12:27:55 +0200 mwic (0.5.1) unstable; urgency=low * Update the description in the manual page. * Add new multi-word misspellings to the dictionary. * Fall back to “more” if $PAGER is not set and “pager” doesn't exist. Thanks to John Vandenberg for the bug report. https://github.com/jwilk/mwic/issues/1 -- Jakub Wilk Thu, 26 May 2016 23:00:21 +0200 mwic (0.5) unstable; urgency=low * Check for some multi-word misspellings. * Whitelist some words that are common in technical texts, but might not be recognized by general-purpose spellchecking dictionaries. * Shorten vertical space between misspellings in colored mode. * Make all metavariables in the help message uppercase. * Add option for splitting camel-cased compound words (--camel-case). * Add option for omitting blank lines between in output (--compact). * Add option for using external blacklist dictionary (--blacklist). * Add the “SEE ALSO” section to the manual page. -- Jakub Wilk Sun, 13 Mar 2016 16:04:43 +0100 mwic (0.4) unstable; urgency=low * Add work-around for Hunspell's stderr warnings about characters in Supplementary Planes. https://github.com/rfk/pyenchant/issues/58 * Add option for hiding words with many occurrences (--limit). -- Jakub Wilk Fri, 18 Dec 2015 15:08:56 +0100 mwic (0.3.1) unstable; urgency=low * Delay spawning the pager until the input is fully read. Thanks to Paul Tagliamonte for the bug report. -- Jakub Wilk Wed, 09 Dec 2015 16:38:52 +0100 mwic (0.3) unstable; urgency=low * Print rare words first by default. * Add option to highlight misspelling with color (-f/--output-format). * If stdout is a terminal, automatically pipe the output through a pager. * Reorder options in the help message and in the manual page. * Fix option formatting in the manual page. -- Jakub Wilk Thu, 03 Dec 2015 14:09:52 +0100 mwic (0.2) unstable; urgency=low * Add option to print rare words first (-r/--reverse). * Make it possible to specify input encoding error handler. * Make “UTF-8:replace” the default input encoding. * Expand tabs when reading input. * Improve the test suite. -- Jakub Wilk Tue, 24 Nov 2015 20:56:17 +0100 mwic (0.1.1) unstable; urgency=low * Print suggestions only once per group, not by every misspelling instance. * Add the --version option. * Add the --list-languages option. * Add the manual page. * Improve the test suite. -- Jakub Wilk Wed, 05 Nov 2014 15:19:23 +0100 mwic (0.1) unstable; urgency=low * Initial release. -- Jakub Wilk Thu, 23 Jan 2014 14:00:25 +0100 mwic-0.7.10/doc/manpage.rst0000644000000000000000000000760214375677624015475 0ustar00rootroot00000000000000==== mwic ==== --------------------------- Misspelled Words In Context --------------------------- :manual section: 1 :version: mwic 0.7.10 :date: 2023-02-23 Synopsis -------- **mwic** [-l *lang*] [*option*...] [*file*...] Description ----------- **mwic** is a spell-checker that groups possible misspellings and shows them in their contexts. This is useful for checking technical documents, which often contain words that are not included in standard dictionaries. Options ------- -l lang, --language lang Spell-check for this language. The default is ``en``. --list-languages Print list of available languages. --blacklist file Treat words from the external dictionary as misspelled. The dictionary can be in the format used by *Lintian*, or in the format used by *codespell*, or in the format used by *kde-spellcheck* (part of *kde-dev-scripts*); or it can be plain newline-separated word list. This option can be used multiple times. --camel-case Split camel-cased compound words. For example, treat “eggBaconAndSpam” as 4 separate words. --input-encoding enc Assume this input encoding. The default is ``UTF-8:replace`` (UTF-8 encoding with error handler replacing malformed characters with U+FFFD). -f fmt, --output-format fmt If *fmt* is ``plain``, output plain text verbatim and highlight misspellings with the ``^`` character. This is the default if stdout is not a terminal. If *fmt* is ``color``, escape control characters and highlight misspellings with colors. This is the default if stdout is a terminal. -r, --reverse Print words in reverse order, that is, the most common words first. --compact Omit blank lines in output. --limit n Assume that words that occurred more than *n* times are spelled correctly. --max-context-width n Limit context width to *n* characters. The default is 30. --suggest n Suggest up to *n* corrections. -h, --help Show help message and exit. --version Show version information and exit. Environment ----------- PAGER If stdout is a terminal, mwic pipes the output through ``$PAGER``. The default is ``pager`` (if it exists) or ``more``. LESS If this variable is unset, mwic sets it to ``-FX``, or to ``-FXR`` if the output is in color. LV If this variable in unset, and the output is in color, mwic sets this variable to ``-c``. Files ----- Spell-checking can be eased by using dictionaries of commonly misspelled words. **mwic** doesn't ship with one, but it can use a number of dictionaries from third-party projects: * Lintian: | https://salsa.debian.org/lintian/lintian/raw/master/data/spelling/corrections | https://salsa.debian.org/lintian/lintian/raw/master/data/spelling/corrections-case * Linux kernel: | https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/scripts/spelling.txt * codespell: | https://github.com/codespell-project/codespell/raw/master/codespell_lib/data/dictionary.txt * kde-dev-scripts: | https://github.com/KDE/kde-dev-scripts/raw/master/kde-spellcheck.pl Example ------- :: $ mwic --blacklist /usr/share/lintian/data/spelling/corrections --compact rfc1927.txt heirarchical: | …g paper clips vs small ones; heirarchical assembly ^^^^^^^^^^^^ multipart: | …tes the degree of binding of multipart documents: ^^^^^^^^^ reycled: | 1) staples could be reycled for a small credit ^^^^^^^ *...* :: EMail, edu, isi: | EMail: rogers@isi.edu ^^^^^ ^^^ ^^^ electonic: | drawer of the electonic desk on home PCs | 3) electonic staples should have a standa… ^^^^^^^^^ See also -------- **spellintian**\ (1), **codespell**\ (1); “English for software localisation” by Justin B Rye .. vim:ts=3 sts=3 sw=3 mwic-0.7.10/doc/mwic.10000644000000000000000000001163014375677630014345 0ustar00rootroot00000000000000.\" Man page generated from reStructuredText. . .TH MWIC 1 "2023-02-23" "mwic 0.7.10" "" .SH NAME mwic \- Misspelled Words In Context . .nr rst2man-indent-level 0 . .de1 rstReportMargin \\$1 \\n[an-margin] level \\n[rst2man-indent-level] level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] - \\n[rst2man-indent0] \\n[rst2man-indent1] \\n[rst2man-indent2] .. .de1 INDENT .\" .rstReportMargin pre: . RS \\$1 . nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] . nr rst2man-indent-level +1 .\" .rstReportMargin post: .. .de UNINDENT . RE .\" indent \\n[an-margin] .\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] .nr rst2man-indent-level -1 .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. .SH SYNOPSIS .sp \fBmwic\fP [\-l \fIlang\fP] [\fIoption\fP\&...] [\fIfile\fP\&...] .SH DESCRIPTION .sp \fBmwic\fP is a spell\-checker that groups possible misspellings and shows them in their contexts. This is useful for checking technical documents, which often contain words that are not included in standard dictionaries. .SH OPTIONS .INDENT 0.0 .TP .BI \-l \ lang\fR,\fB \ \-\-language \ lang Spell\-check for this language. The default is \fBen\fP\&. .TP .B \-\-list\-languages Print list of available languages. .TP .BI \-\-blacklist \ file Treat words from the external dictionary as misspelled. The dictionary can be in the format used by \fILintian\fP, or in the format used by \fIcodespell\fP, or in the format used by \fIkde\-spellcheck\fP (part of \fIkde\-dev\-scripts\fP); or it can be plain newline\-separated word list. This option can be used multiple times. .TP .B \-\-camel\-case Split camel\-cased compound words. For example, treat “eggBaconAndSpam” as 4 separate words. .TP .BI \-\-input\-encoding \ enc Assume this input encoding. The default is \fBUTF\-8:replace\fP (UTF\-8 encoding with error handler replacing malformed characters with U+FFFD). .TP .BI \-f \ fmt\fR,\fB \ \-\-output\-format \ fmt If \fIfmt\fP is \fBplain\fP, output plain text verbatim and highlight misspellings with the \fB^\fP character. This is the default if stdout is not a terminal. .sp If \fIfmt\fP is \fBcolor\fP, escape control characters and highlight misspellings with colors. This is the default if stdout is a terminal. .TP .B \-r\fP,\fB \-\-reverse Print words in reverse order, that is, the most common words first. .TP .B \-\-compact Omit blank lines in output. .TP .BI \-\-limit \ n Assume that words that occurred more than \fIn\fP times are spelled correctly. .TP .BI \-\-max\-context\-width \ n Limit context width to \fIn\fP characters. The default is 30. .TP .BI \-\-suggest \ n Suggest up to \fIn\fP corrections. .TP .B \-h\fP,\fB \-\-help Show help message and exit. .TP .B \-\-version Show version information and exit. .UNINDENT .SH ENVIRONMENT .INDENT 0.0 .TP .B PAGER If stdout is a terminal, mwic pipes the output through \fB$PAGER\fP\&. The default is \fBpager\fP (if it exists) or \fBmore\fP\&. .TP .B LESS If this variable is unset, mwic sets it to \fB\-FX\fP, or to \fB\-FXR\fP if the output is in color. .TP .B LV If this variable in unset, and the output is in color, mwic sets this variable to \fB\-c\fP\&. .UNINDENT .SH FILES .sp Spell\-checking can be eased by using dictionaries of commonly misspelled words. \fBmwic\fP doesn't ship with one, but it can use a number of dictionaries from third\-party projects: .INDENT 0.0 .IP \(bu 2 Lintian: .nf \fI\%https://salsa.debian.org/lintian/lintian/raw/master/data/spelling/corrections\fP \fI\%https://salsa.debian.org/lintian/lintian/raw/master/data/spelling/corrections\-case\fP .fi .sp .IP \(bu 2 Linux kernel: .nf \fI\%https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/scripts/spelling.txt\fP .fi .sp .IP \(bu 2 codespell: .nf \fI\%https://github.com/codespell\-project/codespell/raw/master/codespell_lib/data/dictionary.txt\fP .fi .sp .IP \(bu 2 kde\-dev\-scripts: .nf \fI\%https://github.com/KDE/kde\-dev\-scripts/raw/master/kde\-spellcheck.pl\fP .fi .sp .UNINDENT .SH EXAMPLE .INDENT 0.0 .INDENT 3.5 .sp .nf .ft C $ mwic \-\-blacklist /usr/share/lintian/data/spelling/corrections \-\-compact rfc1927.txt heirarchical: | …g paper clips vs small ones; heirarchical assembly ^^^^^^^^^^^^ multipart: | …tes the degree of binding of multipart documents: ^^^^^^^^^ reycled: | 1) staples could be reycled for a small credit ^^^^^^^ .ft P .fi .UNINDENT .UNINDENT .sp \fI\&...\fP .INDENT 0.0 .INDENT 3.5 .sp .nf .ft C EMail, edu, isi: | EMail: rogers@isi.edu ^^^^^ ^^^ ^^^ electonic: | drawer of the electonic desk on home PCs | 3) electonic staples should have a standa… ^^^^^^^^^ .ft P .fi .UNINDENT .UNINDENT .SH SEE ALSO .sp \fBspellintian\fP(1), \fBcodespell\fP(1); .sp “English for software localisation” <\fI\%http://jbr.me.uk/linux/esl.html\fP> by Justin B Rye . .\" Generated by docutils manpage writer. . mwic-0.7.10/doc/screenshot.svg0000644000000000000000000006074214375677624016235 0ustar00rootroot00000000000000 $ mwic --blacklist /usr/share/lintian/data/spelling/corrections rfc1927.txt heirarchical: | g paper clips vs small ones; heirarchical assembly reycled: | 1) staples could be reycled for a small credit flines: | y should not be used on data flines which might end up in recycler: | ile or folder is deleted, a "recycler" program could src: | 3) "src=" would allow the specificat ISI: | ISI USC: | USC/Information Sciences Institu bento: | f 5000. Reference: Apple's "bento" multipage: | r clip to a single page of a multipage document or Rey, del: | Marina del Rey, CA 90292 EMail, edu, isi: | EMail: rogers@isi.edu electonic: | drawer of the electonic desk on home PCs | 3) electonic staples should have a standa mwic-0.7.10/doc/todo0000644000000000000000000000022314375677624014213 0ustar00rootroot00000000000000Whitelist long hex strings. (``[0-9a-f]{10,}`` is unlikely to match any real word.) Whitelist long base64 strings. .. vim:ts=3 sts=3 sw=3 ft=rst mwic-0.7.10/lib/0000755000000000000000000000000014375677624013327 5ustar00rootroot00000000000000mwic-0.7.10/lib/__init__.py0000644000000000000000000000010714375677624015436 0ustar00rootroot00000000000000''' mwic's private modules ''' type(0_0) # Python >= 3.6 is required mwic-0.7.10/lib/cli.py0000644000000000000000000003141714375677624014456 0ustar00rootroot00000000000000# Copyright © 2013-2023 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' the command-line interface ''' import argparse import functools import io import re import signal import sys import types import enchant.tokenize class lib: # pylint: disable=import-outside-toplevel from . import colors from . import data from . import extdict from . import intdict from . import pager from . import text # pylint: enable=import-outside-toplevel __version__ = '0.7.10' class VersionAction(argparse.Action): def __init__(self, option_strings, dest=argparse.SUPPRESS): super().__init__( option_strings=option_strings, dest=dest, nargs=0, help='show version information and exit' ) def __call__(self, parser, namespace, values, option_string=None): # pylint: disable=consider-using-f-string print(f'{parser.prog} {__version__}') print('+ Python {0}.{1}.{2}'.format(*sys.version_info)) print(f'+ PyEnchant {enchant.__version__}') try: enchant_version = enchant.get_enchant_version() except AttributeError: pass else: if isinstance(enchant_version, bytes): enchant_version = enchant_version.decode('ASCII', 'replace') print(f' + Enchant {enchant_version}') regex = lib.intdict.re print(f'+ regex {regex.__version__}') # pylint: disable=no-member parser.exit() def main(): signal.signal(signal.SIGPIPE, signal.SIG_DFL) ap = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) ap.add_argument('--version', action=VersionAction) ap.add_argument('files', metavar='FILE', nargs='*', default=['-'], help='file to process (default: stdin)') ap.add_argument('-l', '--language', metavar='LANG', default='en', help='spell-check for this language (default: "en")') ap.add_argument('--list-languages', nargs=0, action=list_languages, help='print list of available languages') ap.add_argument('--blacklist', metavar='FILE', action='append', default=[], help='use misspelling dictionary') ap.add_argument('--camel-case', action='store_true', help='split camel-cased compound words') ap.add_argument('--input-encoding', metavar='ENC', default='UTF-8:replace', help='assume input encoding ENC (default: "UTF-8:replace")') default_output_format = 'color' if sys.stdout.isatty() else 'plain' ap.add_argument('-f', '--output-format', choices=('plain', 'color'), default=default_output_format, help=( '"plain" = use "^" to emphasize words\n' '"color" = highlight words in color (default on tty)\n' ) ) ap.add_argument('-r', '--reverse', action='store_true', help='print most frequent words first') ap.add_argument('--compact', action='store_true', help='omit blank lines in output') ap.add_argument('--limit', metavar='N', type=int, default=1e999, help='skip words that have >N instances') ap.add_argument('--max-context-width', type=int, metavar='N', default=30, help='limit context width to N chars') ap.add_argument('--suggest', metavar='N', type=int, default=0, help='suggest up to N corrections') ap.add_argument('--debug-dict', action='store_true', help=argparse.SUPPRESS) ap.add_argument('--traceback', action='store_true', help=argparse.SUPPRESS) options = ap.parse_args() sys.stdout = io.TextIOWrapper(sys.stdout.buffer, 'UTF-8') try: split_words = enchant.tokenize.get_tokenizer(options.language) except enchant.errors.TokenizerNotFoundError: split_words = enchant.tokenize.get_tokenizer(None) if options.camel_case: split_words = lib.text.camel_case_tokenizer(split_words) if options.language == 'und': dictionary = None spellcheck = ''.__gt__ # always returns False options.suggest = 0 else: dictionary = enchant.Dict(options.language) spellcheck = functools.lru_cache(maxsize=None)( dictionary.check ) if options.debug_dict: if dictionary is None: dictvars = {} else: dictvars = vars(dictionary).items() for key, value in sorted(dictvars): print(f'{key} = {value!r}') sys.exit(0) intdict = lib.intdict.Dictionary(options.language) extdict = lib.extdict.Dictionary(*options.blacklist) misspellings = lib.data.Misspellings() encoding = options.input_encoding enc_errors = 'strict' if ':' in encoding: [encoding, enc_errors] = encoding.rsplit(':', 1) ctxt = types.SimpleNamespace( dictionary=dictionary, intdict=intdict, extdict=extdict, split_words=split_words, spellcheck=spellcheck, misspellings=misspellings, options=options, ) rc = 0 for path in options.files: if path == '-': file = io.TextIOWrapper( sys.stdin.buffer, encoding=encoding, errors=enc_errors, ) else: try: file = open( # pylint: disable=consider-using-with path, 'rt', encoding=encoding, errors=enc_errors, ) except OSError as exc: if options.traceback: raise msg = f'{ap.prog}: {path}: {exc.strerror}' print(msg, file=sys.stderr) rc = 1 continue with file: spellcheck_file(ctxt, file) if not misspellings: sys.exit(rc) raw_cc = options.output_format == 'color' try: with lib.pager.autopager(raw_control_chars=raw_cc): print_misspellings(ctxt) except lib.pager.Error: if options.traceback: raise msg = f'{ap.prog}: pager failed' print(msg, file=sys.stderr) rc = 1 sys.exit(rc) def spellcheck_file(ctxt, file): force_ucs2 = ( ctxt.dictionary is not None and ctxt.dictionary.provider.name == 'myspell' ) for line in file: if force_ucs2: # https://github.com/rfk/pyenchant/issues/58 line = re.sub(r'[^\0-\uFFFF]', '\uFFFD', line) line = line.strip() line = line.expandtabs() taken = bytearray(len(line)) for word, pos in ctxt.split_words(line): assert len(word) >= 1 if word in ctxt.extdict: certainty = 1 elif ctxt.spellcheck(word): continue elif ctxt.intdict.is_whitelisted(word): continue else: certainty = 0 for i, dummy in enumerate(word, start=pos): taken[i] = True ctxt.misspellings.add(word, line, pos, certainty) for word, pos in ctxt.intdict.find(line): assert len(word) >= 1 for i, dummy in enumerate(word, start=pos): if taken[i]: break else: ctxt.misspellings.add(word, line, pos, 1) def print_misspellings(ctxt): rare_misspellings = lib.data.Misspellings() for word, occurrences in ctxt.misspellings.sorted_words(): if len(occurrences) == 1: [(word, line, positions)] = occurrences for pos, certainty in positions.items(): rare_misspellings.add(word, line, pos, certainty) ctxt.rare_misspellings = rare_misspellings if ctxt.options.reverse: print_common_misspellings(ctxt) print_rare_misspellings(ctxt) else: print_rare_misspellings(ctxt) print_common_misspellings(ctxt) def print_common_misspellings(ctxt): options = ctxt.options for word, occurrences in ctxt.misspellings.sorted_words(reverse=options.reverse): if len(occurrences) == 1: continue if occurrences.count() > options.limit: continue extra = '' if options.suggest > 0: suggestions = ctxt.dictionary.suggest(word)[:options.suggest] if suggestions: suggestions = str.join(', ', suggestions) extra = f' ({suggestions})' print(word + extra + ':') highlight_color = 'error' if occurrences.certainty > 0 else 'warn' occurrences = [ ( lib.text.ltrim(lcontext, options.max_context_width), word, lib.text.rtrim(rcontext, options.max_context_width), ) for lcontext, word, rcontext in occurrences.sorted_context() ] lwidth = max(len(lcontext) for lcontext, _, _, in occurrences) for lcontext, word, rcontext in occurrences: # pylint: disable=redefined-outer-name lcontext = lcontext.rjust(lwidth) if options.output_format == 'color': lcontext = lib.colors.escape(lcontext) word = lib.colors.highlight(word, highlight_color) rcontext = lib.colors.escape(rcontext) print(lib.colors.dim('|'), end=' ') else: print('|', end=' ') print(f'{lcontext}{word}{rcontext}') if options.output_format != 'color': print('', ' ' * lwidth, '^' * len(word)) if not options.compact: print() def print_rare_misspellings(ctxt): options = ctxt.options use_color = options.output_format == 'color' for line, occurrences in ctxt.rare_misspellings.sorted_lines(reverse=options.reverse): header = [] underline = bytearray(b' ' * len(line)) for word, line, positions in sorted(occurrences): # pylint: disable=redefined-outer-name if use_color and (max(positions.values()) > 0): underline_char = b'!' else: underline_char = b'^' if len(positions) > options.limit: continue extra = '' if options.suggest > 0: suggestions = ctxt.dictionary.suggest(word)[:options.suggest] if suggestions: suggestions = str.join(', ', suggestions) extra = f' ({suggestions})' header += [word + extra] for x in positions: underline[x : x + len(word)] = underline_char * len(word) if not header: continue print(str.join(', ', header) + ':') underline = underline.decode() lwidth = len(underline) - len(underline.lstrip()) rwidth = len(underline) - len(underline.rstrip()) lexceed = lwidth - options.max_context_width rexceed = rwidth - options.max_context_width if lexceed > 0: lwidth = len(line) - lexceed line = lib.text.ltrim(line, lwidth) underline = lib.text.ltrim(underline, lwidth, char=' ') if rexceed > 0: rwidth = len(line) - rexceed line = lib.text.rtrim(line, rwidth) underline = lib.text.rtrim(underline, rwidth, char=' ') if use_color: hline = lib.colors.highlight( line, ( 'warn' if u == '^' else 'error' if u == '!' else 'off' for u in underline ) ) print(lib.colors.dim('|'), hline) else: print('|', line) print(' ', underline.rstrip()) if not options.compact: print() class list_languages(argparse.Action): def __call__(self, *args, **kwargs): # pylint: disable=arguments-differ,signature-differs for lang in sorted(enchant.list_languages()): print(lang) sys.exit(0) __all__ = ['main'] # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/lib/colors.py0000644000000000000000000000423014375677624015201 0ustar00rootroot00000000000000# Copyright © 2015-2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' color terminal support ''' import io import itertools import unicodedata class _seq: dim = '\x1B[90m' off = '\x1B[0m' warn = '\x1B[30;43m' error = '\x1B[30;41m' reverse = '\x1B[7m' unreverse = '\x1B[27m' def dim(s): return _seq.dim + escape(s) + _seq.off def escape(s): return highlight(s, itertools.repeat('off')) def highlight(s, w): if isinstance(w, str): w = itertools.repeat(w) fp = io.StringIO() off = _seq.off old_color = off for (cs, cw) in zip(s, w): color = getattr(_seq, cw) if color != old_color: fp.write(color) old_color = color if unicodedata.category(cs) == 'Cc': if cs < ' ' or cs == '\x7F': cs = '^' + chr(ord(cs) ^ ord('@')) else: cs = f'' cs = f'{_seq.reverse}{cs}{_seq.unreverse}' fp.write(cs) if old_color != off: fp.write(off) return fp.getvalue() __all__ = [ 'dim', 'escape', 'highlight', ] # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/lib/data.py0000644000000000000000000000704014375677624014613 0ustar00rootroot00000000000000# Copyright © 2013-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' collecting misspelling data ''' import collections import sys class Occurrences(): def __init__(self): self._data = collections.defaultdict(dict) self.certainty = 0 def add(self, word, line, pos, certainty): if isinstance(pos, int): self._data[(word, line)][pos] = certainty else: for p in pos: self._data[(word, line)][p] = certainty self.certainty = max(self.certainty, certainty) def count(self): return sum( len(positions) for positions in self._data.values() ) def __len__(self): return len(self._data) def __iter__(self): for (word, line), positions in self._data.items(): yield word, line, positions @staticmethod def _sorting_key(item): lcontext, word, rcontext = item return (rcontext, lcontext[::-1], word) def _context(self): for (word, line), positions in self._data.items(): for pos in positions: lcontext = line[:pos] rcontext = line[pos + len(word):] yield lcontext, word, rcontext def sorted_context(self): return sorted(self._context(), key=self._sorting_key) class Misspellings(): def __init__(self): self._word_index = collections.defaultdict(Occurrences) self._line_index = collections.defaultdict(Occurrences) def add(self, word, line, pos, certainty): word = sys.intern(word) line = sys.intern(line) self._word_index[word].add(word, line, pos, certainty) self._line_index[line].add(word, line, pos, certainty) @staticmethod def _sorting_key(*, reverse=False): sign = 1 if reverse: sign = -1 def k(item): s, occurrences = item return ( sign * -occurrences.certainty, sign * occurrences.count(), s ) return k def __bool__(self): return bool(self._word_index) def sorted_words(self, *, reverse=False): return sorted( self._word_index.items(), key=self._sorting_key(reverse=reverse) ) def sorted_lines(self, *, reverse=False): return sorted( self._line_index.items(), key=self._sorting_key(reverse=reverse) ) __all__ = [ 'Misspellings', 'Occurrences', ] # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/lib/extdict.py0000644000000000000000000000654614375677624015360 0ustar00rootroot00000000000000# Copyright © 2016-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' external misspelling dictionary Supported dictionary formats: + Lintian - - + codespell + kde-spellcheck + plain word list ''' import re separators = { '||', # Lintian '->', # codespell } def case_variants(word, correction=None): yield word if not word.islower(): return correction = correction or '' if word.title() != correction.title(): yield word.title() if word.upper() != correction.upper(): yield word.upper() def parse_line(line): word = line for sep in separators: try: [word, correction] = line.split(sep, 1) except ValueError: pass else: break else: correction = None return case_variants(word, correction) class Dictionary(): def __init__(self, *paths): self._dict = set() for path in paths: self._read(path) def __contains__(self, word): return word in self._dict def _add(self, word): self._dict.add(word) def _read(self, path): with open(path, 'rt', encoding='UTF-8') as file: self._read_fp(file) def _read_fp(self, file): add = self._add kde = None for line in file: if kde is None: kde = re.match(r'\A#!.*\bperl\b', line) if kde: return self._read_fp_kde(file) if line[:1] == '#': continue line = line.strip() if not line: continue for word in parse_line(line): add(word) def _read_fp_kde(self, file): add = self._add for line in file: if line.strip() == '__DATA__': break for line in file: if line[:1] == '#': continue line = line.split() if line: add(line[0]) __all__ = ['Dictionary'] # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/lib/intdict.py0000644000000000000000000001332014375677624015336 0ustar00rootroot00000000000000# Copyright © 2015-2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' internal dictionary, which can contain: + blacklist of multi-word misspellings; + whitelist of words that are commonly found in software code or documentation, but are not present in standard dictionaries. ''' import os import regex as re basedir = os.path.normpath(os.path.join( os.path.dirname(__file__), os.path.pardir, '', )) datadir = os.path.join(basedir, 'dict', '') os.stat(datadir) def _find_nothing(s): # pylint: disable=unused-argument return () class Macros(): def __init__(self): self._defs = {} self._regex = None self._substs = None def __setitem__(self, name, definition): if name in self._defs: raise KeyError(name) # no coverage self._defs[name] = definition self._regex = None self._substs = None def expand(self, s): if not self._defs: return s if self._regex is not None: regex = self._regex substs = self._substs else: substs = [] regex = [] for i, (name, definition) in enumerate(self._defs.items()): substs += [definition] regex += [f'(?P{re.escape(name)})'] regex = str.join('|', regex) regex = re.compile(regex) self._regex = regex self._substs = substs assert self._regex is not None assert self._substs is not None def replace(match): for i, subst in enumerate(substs): if match.group(f'mwic{i}') is not None: return subst assert False # no coverage return self._regex.sub(replace, s) class Dictionary(): def __init__(self, lang): self._whitelist = set() regexes = [] lang = lang.lower().replace('_', '-') while True: path = os.path.join(datadir, lang) try: file = open(path, 'rt', encoding='UTF-8') # pylint: disable=consider-using-with except FileNotFoundError: [lang, *suffix] = lang.rsplit('-', 1) if suffix: continue else: break macros = Macros() n = None # hi, pylint def error(reason): # no coverage return SyntaxError(reason, (file.name, n, 1, whole_line)) with file: for n, line in enumerate(file, 1): whole_line = line if line.startswith('#'): continue line = line.split() if not line: continue if line[0] == '*': [word] = line[1:] self._whitelist.add(word) self._whitelist.add(word.upper()) self._whitelist.add(word.title()) elif line[0][0] == '@': if (len(line) >= 4) and (line[0] == '@define') and (line[2] == '='): (_, name, _, *definition) = line definition = str.join(r'\s+', definition) definition = fr'(?:{definition})' try: re.compile(definition) except re.error as exc: # no coverage raise error(exc) try: macros[name] = macros.expand(definition) # pylint: disable=unsubscriptable-object except KeyError: # no coverage raise error(f'duplicate macro definition: {name}') else: raise error('malformed @-command') # no coverage else: regex = str.join(r'\s+', line) regex = macros.expand(regex) try: re.compile(regex) except re.error as exc: # no coverage raise error(exc) regexes += [regex] break if regexes: regex = str.join('|', regexes) regex = fr'\b(?:(?i){regex})\b' self._find = re.compile(regex).finditer else: self._find = _find_nothing def find(self, s): for match in self._find(s): yield (match.group(), match.start()) def is_whitelisted(self, word): return word in self._whitelist __all__ = ['Dictionary'] # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/lib/pager.py0000644000000000000000000000517014375677624015002 0ustar00rootroot00000000000000# Copyright © 2015-2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' automatic pager ''' import contextlib import io import os import shutil import subprocess as ipc import sys def _find_command(command): if shutil.which(command): return command def get_default_pager(): # Use "pager" if it exist: # https://www.debian.org/doc/debian-policy/ch-customized-programs.html#editors-and-pagers # Fall back to "more", which is in POSIX. return ( _find_command('pager') or 'more' ) class Error(RuntimeError): pass @contextlib.contextmanager def autopager(*, raw_control_chars=False): if not sys.stdout.isatty(): yield return cmdline = os.environ.get('PAGER') or get_default_pager() if cmdline == 'cat': yield return env = None if 'LESS' not in os.environ: lessopt = '-FX' if raw_control_chars: lessopt += 'R' env = dict(env or os.environ, LESS=lessopt) if raw_control_chars and ('LV' not in os.environ): env = dict(env or os.environ, LV='-c') orig_stdout = sys.stdout try: with ipc.Popen(cmdline, shell=True, stdin=ipc.PIPE, env=env) as pager: sys.stdout = io.TextIOWrapper(pager.stdin, encoding=orig_stdout.encoding, errors=orig_stdout.errors, ) try: yield finally: sys.stdout.close() finally: sys.stdout = orig_stdout if pager.returncode: raise Error __all__ = [ 'Error', 'autopager', ] # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/lib/text.py0000644000000000000000000000437414375677624014675 0ustar00rootroot00000000000000# Copyright © 2013-2023 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' text manipulation functions ''' import functools import regex as re def ltrim(s, n, *, char='…'): if n <= 0: return s and char pat = re.compile(r'\X\X(\X{#})\Z'.replace('#', str(n - 1))) match = pat.search(s) if match is None: return s if n <= 1: return char return char + match.group(1) def rtrim(s, n, *, char='…'): if n <= 0: return s and char pat = re.compile(r'\A(\X{#})\X\X'.replace('#', str(n - 1))) match = pat.match(s) if match is None: return s if n <= 1: return char return match.group(1) + char _camel_case_split = re.compile('([A-Z][^A-Z]*)').split def camel_case_tokenizer(tokenizer): @functools.wraps(tokenizer) def new_tokenizer(s): for word, offset in tokenizer(s): if word.isupper(): yield word, offset continue for subword in _camel_case_split(word): if subword: yield subword, offset offset += len(subword) return new_tokenizer __all__ = [ 'camel_case_tokenizer', 'ltrim', 'rtrim', ] # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/misc/0000755000000000000000000000000014375677624013514 5ustar00rootroot00000000000000mwic-0.7.10/misc/mwic4po0000755000000000000000000000303614375677624015026 0ustar00rootroot00000000000000#!/bin/sh # Copyright © 2016-2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. set -e -u prog=${0##*/} if [ $# -ne 1 ] then printf 'Usage: %s PO-FILE\n' "$prog" >&2 exit 1 fi pofile="$1" exec 3<"$pofile" lang=$( msggrep -K -e '^$' --force-po <&3 \ | msgexec cat \ | sed -n -e 's/^Language: *//p' ) if [ -z "$lang" ] then printf '%s: no language declared\n' "$pofile" >&2 exit 1 fi exec msgexec sh -c 'cat; printf "\n"' < "$pofile" \ | mwic --language "$lang" # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/mwic0000755000000000000000000000253314375677624013451 0ustar00rootroot00000000000000#!/usr/bin/env python3 # encoding=UTF-8 # Copyright © 2013-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import sys basedir = None if basedir is not None: sys.path[:0] = [basedir] import lib.cli # pylint: disable=wrong-import-position if __name__ == '__main__': lib.cli.main() # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/private/0000755000000000000000000000000014375677630014230 5ustar00rootroot00000000000000mwic-0.7.10/private/check-rst0000755000000000000000000000310714375677624016045 0ustar00rootroot00000000000000#!/bin/sh # Copyright © 2016-2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. set -e -u here=${0%/*} here=${here#./} root="$here/../" root=${root#private/../} rst2xml=$(command -v rst2xml) \ || rst2xml=$(command -v rst2xml.py) \ || { printf 'rst2xml not found\n' >&2; exit 1; } rst2xml=${rst2xml##*/} options='--input-encoding=UTF-8 --strict' if [ $# -eq 0 ] then find "${root}doc" -type f -name '*.rst' grep -rwl 'ft[=]rst' "${root:-.}" else printf '%s\n' "$@" fi | xargs -t -I{} "$rst2xml" $options {} > /dev/null # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/private/run-pylint0000755000000000000000000000277014375677624016310 0ustar00rootroot00000000000000#!/bin/sh # Copyright © 2015-2019 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. set -e -u PYTHON=${PYTHON:-python3} "$PYTHON" -m pylint --version >/dev/null if [ $# -eq 0 ] then pyscripts=$(grep -l -r '^#!.*python' .) set -- lib tests $pyscripts fi log=$(mktemp -t pylint.XXXXXX) "$PYTHON" -m pylint "$@" > "$log" || [ $? != 1 ] ! grep -P '^\S+:' "$log" \ | grep -v -P '^(?!lib/).*: missing-(\w+-)?docstring ' \ | grep '.' || exit 1 rm "$log" # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/private/update-branch-coverage0000755000000000000000000000303414375677624020467 0ustar00rootroot00000000000000#!/bin/sh # Copyright © 2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. set -e -u here=${0%/*} cd "$here/.." python3 -m pytest --cov=lib --cov-branch --cov-report= -q true > tests/coverage.tmp printf 'Generated automatically by private/update-branch-coverage. ' >> tests/coverage.tmp printf 'Do not edit.\n\n' >> tests/coverage.tmp python3 -m coverage report >> tests/coverage.tmp || { printf '"python3 -m coverage" failed\n' >&2 exit 1 } mv tests/coverage.tmp tests/coverage # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/private/update-version0000755000000000000000000000052514375677624017130 0ustar00rootroot00000000000000#!/bin/sh set -e -u export version=${1:?"no version number provided"} export date="$(date -u --rfc-3339=date)" PS4='$ ' set -x dch -m -v "$version" -u low -c doc/changelog perl -pi -e 's/^__version__ = '"'"'\K[\w.]+/$ENV{version}/' lib/*.py perl -pi -e 's/^:version: \S+ \K[\w.]+/$ENV{version}/; s/^(:date:) \K[0-9-]+/$ENV{date}/' doc/*.rst mwic-0.7.10/tests/0000755000000000000000000000000014375677624013723 5ustar00rootroot00000000000000mwic-0.7.10/tests/__init__.py0000644000000000000000000000004714375677624016035 0ustar00rootroot00000000000000type(0_0) # Python >= 3.6 is required mwic-0.7.10/tests/alice.exp0000644000000000000000000000136214375677624015520 0ustar00rootroot00000000000000labelled: | …helves as she passed; it was labelled ‘ORANGE MARMALADE’, ^^^^^^^^ cubpoards: | …d that they were filled with cubpoards and boook-shelves; ^^^^^^^^^ aftcrwards, occured: | …!’ (when she thought it over aftcrwards, it occured to her ^^^^^^^^^^ ^^^^^^^ ni: | …no pictures or conversations ni it, ‘and what | …s nothing so VERY remarkable ni that; nor did Alice think it… ^^ boook: | …wice she had peeped into the boook her | is the use of a boook,’ thought Alice ‘without pic… | …re filled with cubpoards and boook-shelves; ^^^^^ mwic-0.7.10/tests/alice.txt0000644000000000000000000000515714375677624015551 0ustar00rootroot00000000000000Down the Rabbit-Hole ==================== Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the boook her sister was reading, but it had no pictures or conversations ni it, ‘and what is the use of a boook,’ thought Alice ‘without pictures or conversation?’ So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her. There was nothing so VERY remarkable ni that; nor did Alice think it so VERY much out of the way to hear the Rabbit say to itself, ‘Oh dear! Oh dear! I shall be late!’ (when she thought it over aftcrwards, it occured to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT OF ITS WAISTCOAT— POCKET, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge. In another moment down went Alice after it, never once considering how in the world she was to get out again. The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well. Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cubpoards and boook-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled ‘ORANGE MARMALADE’, but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it. ‘Well!’ thought Alice to herself, ‘after such a fall as this, I shall think nothing of tumbling down stairs! How brave they’ll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!’ mwic-0.7.10/tests/coverage0000644000000000000000000000171114375677624015441 0ustar00rootroot00000000000000Generated automatically by private/update-branch-coverage. Do not edit. Name Stmts Miss Branch BrPart Cover Missing ------------------------------------------------------------- lib/__init__.py 1 0 0 0 100% lib/cli.py 229 49 106 19 75% 64-65, 68, 116, 127-133, 139->141, 165-171, 175, 180-185, 196, 203, 230-231, 242, 245-248, 264-267, 271->273, 273->238, 284, 288, 291-294, 299, 315-323, 327->279, 332-334 lib/colors.py 36 5 12 3 83% 38, 45, 52-53, 62 lib/data.py 57 3 18 2 91% 38-39, 86 lib/extdict.py 61 0 34 2 98% 84->88, 98->101 lib/intdict.py 95 0 26 0 100% lib/pager.py 42 25 18 1 33% 33-34, 40, 53-79 lib/text.py 37 0 22 0 100% ------------------------------------------------------------- TOTAL 558 82 236 27 82% mwic-0.7.10/tests/multiword-a-e.exp0000644000000000000000000000012214375677624017122 0ustar00rootroot00000000000000a eight: | a eight-byte word ^^^^^^^ a eighth: | a eighth argument ^^^^^^^^ mwic-0.7.10/tests/multiword-a-e.txt0000644000000000000000000000004414375677624017150 0ustar00rootroot00000000000000a eight-byte word a eighth argument mwic-0.7.10/tests/multiword-a-h.exp0000644000000000000000000000015514375677624017133 0ustar00rootroot00000000000000a HTML: | a HTML document ^^^^^^ a HTTP: | a HTTP request ^^^^^^ a HTTPS: | a HTTPS request ^^^^^^^ mwic-0.7.10/tests/multiword-a-h.txt0000644000000000000000000000005714375677624017157 0ustar00rootroot00000000000000a HTML document a HTTP request a HTTPS request mwic-0.7.10/tests/multiword-a-o.exp0000644000000000000000000000004014375677624017133 0ustar00rootroot00000000000000a old: | a old version ^^^^^ mwic-0.7.10/tests/multiword-a-o.txt0000644000000000000000000000001614375677624017161 0ustar00rootroot00000000000000a old version mwic-0.7.10/tests/multiword-a-u.exp0000644000000000000000000000004114375677624017142 0ustar00rootroot00000000000000a Ubuntu: | a Ubuntu ^^^^^^^^ mwic-0.7.10/tests/multiword-a-u.txt0000644000000000000000000000001114375677624017162 0ustar00rootroot00000000000000a Ubuntu mwic-0.7.10/tests/multiword-a-x.exp0000644000000000000000000000014614375677624017153 0ustar00rootroot00000000000000a XML: | a XML file ^^^^^ a XSL: | a XSL transformation ^^^^^ a XSLT: | a XSLT object ^^^^^^ mwic-0.7.10/tests/multiword-a-x.txt0000644000000000000000000000005614375677624017176 0ustar00rootroot00000000000000a XML file a XSL transformation a XSLT object mwic-0.7.10/tests/multiword-a.k.a.exp0000644000000000000000000000003614375677624017354 0ustar00rootroot00000000000000a.k.a: | X a.k.a Y ^^^^^ mwic-0.7.10/tests/multiword-a.k.a.txt0000644000000000000000000000002514375677624017375 0ustar00rootroot00000000000000X a.k.a Y X a.k.a. Y mwic-0.7.10/tests/multiword-allow-to.exp0000644000000000000000000000115214375677624020222 0ustar00rootroot00000000000000allow to: | I allow to go. ^^^^^^^^ authorize to: | I authorize to go. ^^^^^^^^^^^^ permit to: | I permit to go. ^^^^^^^^^ allowing to: | I'm allowing to go. ^^^^^^^^^^^ authorizing to: | I'm authorizing to go. ^^^^^^^^^^^^^^ allows to: | It allows to go. ^^^^^^^^^ authorizes to: | It authorizes to go. ^^^^^^^^^^^^^ permits to: | It permits to go. ^^^^^^^^^^ authorise: | I authorise to go. ^^^^^^^^^ authorised: | I'm authorised to go. ^^^^^^^^^^ authorising: | I'm authorising to go. ^^^^^^^^^^^ authorises: | It authorises to go. ^^^^^^^^^^ mwic-0.7.10/tests/multiword-allow-to.txt0000644000000000000000000000050114375677624020242 0ustar00rootroot00000000000000I allow to go. I'm allowing to go. It allows to go. I'm allowed to go. I authorise to go. I'm authorising to go. It authorises to go. I'm authorised to go. I authorize to go. I'm authorizing to go. It authorizes to go. I'm authorized to go. I permit to go. I'm permitting to go. It permits to go. I'm permitted to go. mwic-0.7.10/tests/multiword-allow-to@en-GB.alt0000644000000000000000000000110614375677624021116 0ustar00rootroot00000000000000allow to: | I allow to go. ^^^^^^^^ authorise to: | I authorise to go. ^^^^^^^^^^^^ authorize to: | I authorize to go. ^^^^^^^^^^^^ permit to: | I permit to go. ^^^^^^^^^ allowing to: | I'm allowing to go. ^^^^^^^^^^^ authorising to: | I'm authorising to go. ^^^^^^^^^^^^^^ authorizing to: | I'm authorizing to go. ^^^^^^^^^^^^^^ allows to: | It allows to go. ^^^^^^^^^ authorises to: | It authorises to go. ^^^^^^^^^^^^^ authorizes to: | It authorizes to go. ^^^^^^^^^^^^^ permits to: | It permits to go. ^^^^^^^^^^ mwic-0.7.10/tests/multiword-allow-to@en-GB.exp0000644000000000000000000000115214375677624021133 0ustar00rootroot00000000000000allow to: | I allow to go. ^^^^^^^^ authorise to: | I authorise to go. ^^^^^^^^^^^^ permit to: | I permit to go. ^^^^^^^^^ allowing to: | I'm allowing to go. ^^^^^^^^^^^ authorising to: | I'm authorising to go. ^^^^^^^^^^^^^^ allows to: | It allows to go. ^^^^^^^^^ authorises to: | It authorises to go. ^^^^^^^^^^^^^ permits to: | It permits to go. ^^^^^^^^^^ authorize: | I authorize to go. ^^^^^^^^^ authorized: | I'm authorized to go. ^^^^^^^^^^ authorizing: | I'm authorizing to go. ^^^^^^^^^^^ authorizes: | It authorizes to go. ^^^^^^^^^^ mwic-0.7.10/tests/multiword-also-also.exp0000644000000000000000000000011014375677624020347 0ustar00rootroot00000000000000Also notice also: | Also notice also the missing... ^^^^^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-also-also.txt0000644000000000000000000000004014375677624020374 0ustar00rootroot00000000000000Also notice also the missing... mwic-0.7.10/tests/multiword-amount-of-times.exp0000644000000000000000000000012614375677624021510 0ustar00rootroot00000000000000amount of times: | The maximum amount of times that... ^^^^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-amount-of-times.txt0000644000000000000000000000011014375677624021524 0ustar00rootroot00000000000000The maximum amount of times that... The maximum number of times that... mwic-0.7.10/tests/multiword-an-other.exp0000644000000000000000000000024614375677624020204 0ustar00rootroot00000000000000a another: | a another object ^^^^^^^^^ a other: | a other object ^^^^^^^ an another: | an another object ^^^^^^^^^^ an other: | an other object ^^^^^^^^ mwic-0.7.10/tests/multiword-an-other.txt0000644000000000000000000000012114375677624020217 0ustar00rootroot00000000000000a other object an other object a another object an another object another object mwic-0.7.10/tests/multiword-an-u.exp0000644000000000000000000000141514375677624017326 0ustar00rootroot00000000000000an Unicode: | an Unicode character ^^^^^^^^^^ an unary: | an unary object ^^^^^^^^ an unified: | an unified object ^^^^^^^^^^ an uniform: | an uniform object ^^^^^^^^^^ an uniformly: | an uniformly sized object ^^^^^^^^^^^^ an union: | an union ^^^^^^^^ an unique: | an unique object ^^^^^^^^^ an uniquely: | an uniquely named object ^^^^^^^^^^^ an unit: | an unit ^^^^^^^ an universal: | an universal object ^^^^^^^^^^^^ an universally: | an universally unique identifier ^^^^^^^^^^^^^^ an usage: | an usage ^^^^^^^^ an use: | an use ^^^^^^ an useful: | an useful object ^^^^^^^^^ an useless: | an useless object ^^^^^^^^^^ an user: | an user ^^^^^^^ an username: | an username ^^^^^^^^^^^ an utility: | an utility ^^^^^^^^^^ mwic-0.7.10/tests/multiword-an-u.txt0000644000000000000000000000044514375677624017353 0ustar00rootroot00000000000000an Unicode character an unary object an unified object an uniform object an uniformly sized object an union an unique object an uniquely named object an unit an universal object an universally unique identifier an usage an use an useful object an useless object an user an username an utility mwic-0.7.10/tests/multiword-awhile.exp0000644000000000000000000000065414375677624017743 0ustar00rootroot00000000000000take awhile: | It can take awhile. ^^^^^^^^^^^ takes awhile: | It takes awhile. ^^^^^^^^^^^^ took awhile: | It took awhile. ^^^^^^^^^^^ after awhile: | It will be done after awhile. ^^^^^^^^^^^^ in awhile: | It will be done in awhile. ^^^^^^^^^ taking awhile: | It's taking awhile. ^^^^^^^^^^^^^ for awhile: | Please wait for awhile. ^^^^^^^^^^ mwic-0.7.10/tests/multiword-awhile.txt0000644000000000000000000000023314375677624017757 0ustar00rootroot00000000000000It can take awhile. It takes awhile. It took awhile. It's taking awhile. It will be done after awhile. It will be done in awhile. Please wait for awhile. mwic-0.7.10/tests/multiword-be-be.exp0000644000000000000000000000113014375677624017432 0ustar00rootroot00000000000000He's is: | He's is a duplicate. ^^^^^^^ am been: | I am been a duplicate. ^^^^^^^ I'm am: | I'm am a duplicate. ^^^^^^ is been: | It is been a duplicate. ^^^^^^^ being being: | It is being being a duplicate. ^^^^^^^^^^^ was been: | It was been a duplicate. ^^^^^^^^ It's is: | It's is a duplicate. ^^^^^^^ She's is: | She's is a duplicate. ^^^^^^^^ were been: | They were been a duplicate. ^^^^^^^^^ They're are: | They're are duplicates. ^^^^^^^^^^^ We're are: | We're are duplicates. ^^^^^^^^^ You're are: | You're are a duplicate. ^^^^^^^^^^ mwic-0.7.10/tests/multiword-be-be.txt0000644000000000000000000000123214375677624017460 0ustar00rootroot00000000000000I'm am a duplicate. You're are a duplicate. He's is a duplicate. She's is a duplicate. It's is a duplicate. We're are duplicates. They're are duplicates. I am been a duplicate. It is been a duplicate. It was been a duplicate. They were been a duplicate. He's been different. She's been different. It's been different. I'm being different. I am being different. He's being different. She's being different. It's being different. It is being different. It was being different. You're being different. We're being different. They're being different. They were being different. It will be being different. It has been being different. It is being being a duplicate. mwic-0.7.10/tests/multiword-be-consisted-of.exp0000644000000000000000000000431614375677624021452 0ustar00rootroot00000000000000He's consisted of: | He's consisted of two parts. ^^^^^^^^^^^^^^^^^ He's not consisted of: | He's not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^ ain't consisted of: | I ain't consisted of two parts. ^^^^^^^^^^^^^^^^^^ am consisted of: | I am consisted of two parts. ^^^^^^^^^^^^^^^ I'm consisted of: | I'm consisted of two parts. ^^^^^^^^^^^^^^^^ I'm not consisted of: | I'm not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^ been consisted of: | It has been consisted of two parts. ^^^^^^^^^^^^^^^^^ being consisted of: | It is being consisted of two parts. ^^^^^^^^^^^^^^^^^^ is consisted of: | It is consisted of two parts. ^^^^^^^^^^^^^^^ is not consisted of: | It is not consisted of two parts. ^^^^^^^^^^^^^^^^^^^ isn't consisted of: | It isn't consisted of two parts. ^^^^^^^^^^^^^^^^^^ was consisted of: | It was consisted of two parts. ^^^^^^^^^^^^^^^^ was not consisted of: | It was not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^ wasn't consisted of: | It wasn't consisted of two parts. ^^^^^^^^^^^^^^^^^^^ be consisted of: | It will be consisted of two parts. ^^^^^^^^^^^^^^^ It's consisted of: | It's consisted of two parts. ^^^^^^^^^^^^^^^^^ It's not consisted of: | It's not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^ She's consisted of: | She's consisted of two parts. ^^^^^^^^^^^^^^^^^^ She's not consisted of: | She's not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^^ were consisted of: | They were consisted of two parts. ^^^^^^^^^^^^^^^^^ were not consisted of: | They were not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^ weren't consisted of: | They weren't consisted of two parts. ^^^^^^^^^^^^^^^^^^^^ They're consisted of: | They're consisted of two parts. ^^^^^^^^^^^^^^^^^^^^ They're not consisted of: | They're not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^^^^ We're consisted of: | We're consisted of two parts. ^^^^^^^^^^^^^^^^^^ We're not consisted of: | We're not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^^ You're consisted of: | You're consisted of two parts. ^^^^^^^^^^^^^^^^^^^ You're not consisted of: | You're not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-be-consisted-of.txt0000644000000000000000000000163114375677624021472 0ustar00rootroot00000000000000I'm consisted of two parts. I am consisted of two parts. He's consisted of two parts. She's consisted of two parts. It's consisted of two parts. It is consisted of two parts. It was consisted of two parts. You're consisted of two parts. We're consisted of two parts. They're consisted of two parts. They were consisted of two parts. It will be consisted of two parts. It has been consisted of two parts. It is being consisted of two parts. I'm not consisted of two parts. I ain't consisted of two parts. He's not consisted of two parts. She's not consisted of two parts. It's not consisted of two parts. It isn't consisted of two parts. It is not consisted of two parts. It wasn't consisted of two parts. It was not consisted of two parts. You're not consisted of two parts. We're not consisted of two parts. They're not consisted of two parts. They weren't consisted of two parts. They were not consisted of two parts. mwic-0.7.10/tests/multiword-be-disable.exp0000644000000000000000000000302214375677624020451 0ustar00rootroot00000000000000He's disable: | He's disable. ^^^^^^^^^^^^ He's not disable: | He's not disable. ^^^^^^^^^^^^^^^^ ain't disable: | I ain't disable. ^^^^^^^^^^^^^ am disable: | I am disable. ^^^^^^^^^^ I'm disable: | I'm disable. ^^^^^^^^^^^ I'm not disable: | I'm not disable. ^^^^^^^^^^^^^^^ been disable: | It has been disable. ^^^^^^^^^^^^ being disable: | It is being disable. ^^^^^^^^^^^^^ is disable: | It is disable. ^^^^^^^^^^ is not disable: | It is not disable. ^^^^^^^^^^^^^^ isn't disable: | It isn't disable. ^^^^^^^^^^^^^ was disable: | It was disable. ^^^^^^^^^^^ was not disable: | It was not disable. ^^^^^^^^^^^^^^^ wasn't disable: | It wasn't disable. ^^^^^^^^^^^^^^ be disable: | It will be disable. ^^^^^^^^^^ It's disable: | It's disable. ^^^^^^^^^^^^ It's not disable: | It's not disable. ^^^^^^^^^^^^^^^^ She's disable: | She's disable. ^^^^^^^^^^^^^ She's not disable: | She's not disable. ^^^^^^^^^^^^^^^^^ were disable: | They were disable. ^^^^^^^^^^^^ were not disable: | They were not disable. ^^^^^^^^^^^^^^^^ weren't disable: | They weren't disable. ^^^^^^^^^^^^^^^ They're disable: | They're disable. ^^^^^^^^^^^^^^^ They're not disable: | They're not disable. ^^^^^^^^^^^^^^^^^^^ We're disable: | We're disable. ^^^^^^^^^^^^^ We're not disable: | We're not disable. ^^^^^^^^^^^^^^^^^ You're disable: | You're disable. ^^^^^^^^^^^^^^ You're not disable: | You're not disable. ^^^^^^^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-be-disable.txt0000644000000000000000000000076514375677624020507 0ustar00rootroot00000000000000I'm disable. I am disable. He's disable. She's disable. It's disable. It is disable. It was disable. You're disable. We're disable. They're disable. They were disable. It will be disable. It has been disable. It is being disable. I'm not disable. I ain't disable. He's not disable. She's not disable. It's not disable. It isn't disable. It is not disable. It wasn't disable. It was not disable. You're not disable. We're not disable. They're not disable. They weren't disable. They were not disable. mwic-0.7.10/tests/multiword-be-enable.exp0000644000000000000000000000267614375677624020312 0ustar00rootroot00000000000000He's enable: | He's enable. ^^^^^^^^^^^ He's not enable: | He's not enable. ^^^^^^^^^^^^^^^ ain't enable: | I ain't enable. ^^^^^^^^^^^^ am enable: | I am enable. ^^^^^^^^^ I'm enable: | I'm enable. ^^^^^^^^^^ I'm not enable: | I'm not enable. ^^^^^^^^^^^^^^ been enable: | It has been enable. ^^^^^^^^^^^ being enable: | It is being enable. ^^^^^^^^^^^^ is enable: | It is enable. ^^^^^^^^^ is not enable: | It is not enable. ^^^^^^^^^^^^^ isn't enable: | It isn't enable. ^^^^^^^^^^^^ was enable: | It was enable. ^^^^^^^^^^ was not enable: | It was not enable. ^^^^^^^^^^^^^^ wasn't enable: | It wasn't enable. ^^^^^^^^^^^^^ be enable: | It will be enable. ^^^^^^^^^ It's enable: | It's enable. ^^^^^^^^^^^ It's not enable: | It's not enable. ^^^^^^^^^^^^^^^ She's enable: | She's enable. ^^^^^^^^^^^^ She's not enable: | She's not enable. ^^^^^^^^^^^^^^^^ were enable: | They were enable. ^^^^^^^^^^^ were not enable: | They were not enable. ^^^^^^^^^^^^^^^ weren't enable: | They weren't enable. ^^^^^^^^^^^^^^ They're enable: | They're enable. ^^^^^^^^^^^^^^ They're not enable: | They're not enable. ^^^^^^^^^^^^^^^^^^ We're enable: | We're enable. ^^^^^^^^^^^^ We're not enable: | We're not enable. ^^^^^^^^^^^^^^^^ You're enable: | You're enable. ^^^^^^^^^^^^^ You're not enable: | You're not enable. ^^^^^^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-be-enable.txt0000644000000000000000000000073114375677624020323 0ustar00rootroot00000000000000I'm enable. I am enable. He's enable. She's enable. It's enable. It is enable. It was enable. You're enable. We're enable. They're enable. They were enable. It will be enable. It has been enable. It is being enable. I'm not enable. I ain't enable. He's not enable. She's not enable. It's not enable. It isn't enable. It is not enable. It wasn't enable. It was not enable. You're not enable. We're not enable. They're not enable. They weren't enable. They were not enable. mwic-0.7.10/tests/multiword-be-ran.exp0000644000000000000000000000230214375677624017626 0ustar00rootroot00000000000000He's not ran: | He's not ran. ^^^^^^^^^^^^ He's ran: | He's ran. ^^^^^^^^ ain't ran: | I ain't ran. ^^^^^^^^^ am ran: | I am ran. ^^^^^^ I'm not ran: | I'm not ran. ^^^^^^^^^^^ I'm ran: | I'm ran. ^^^^^^^ been ran: | It has been ran. ^^^^^^^^ being ran: | It is being ran. ^^^^^^^^^ is not ran: | It is not ran. ^^^^^^^^^^ is ran: | It is ran. ^^^^^^ isn't ran: | It isn't ran. ^^^^^^^^^ was not ran: | It was not ran. ^^^^^^^^^^^ was ran: | It was ran. ^^^^^^^ wasn't ran: | It wasn't ran. ^^^^^^^^^^ be ran: | It will be ran. ^^^^^^ It's not ran: | It's not ran. ^^^^^^^^^^^^ It's ran: | It's ran. ^^^^^^^^ She's not ran: | She's not ran. ^^^^^^^^^^^^^ She's ran: | She's ran. ^^^^^^^^^ were not ran: | They were not ran. ^^^^^^^^^^^^ were ran: | They were ran. ^^^^^^^^ weren't ran: | They weren't ran. ^^^^^^^^^^^ They're not ran: | They're not ran. ^^^^^^^^^^^^^^^ They're ran: | They're ran. ^^^^^^^^^^^ We're not ran: | We're not ran. ^^^^^^^^^^^^^ We're ran: | We're ran. ^^^^^^^^^ You're not ran: | You're not ran. ^^^^^^^^^^^^^^ You're ran: | You're ran. ^^^^^^^^^^ mwic-0.7.10/tests/multiword-be-ran.txt0000644000000000000000000000060514375677624017655 0ustar00rootroot00000000000000I'm ran. I am ran. He's ran. She's ran. It's ran. It is ran. It was ran. You're ran. We're ran. They're ran. They were ran. It will be ran. It has been ran. It is being ran. I'm not ran. I ain't ran. He's not ran. She's not ran. It's not ran. It isn't ran. It is not ran. It wasn't ran. It was not ran. You're not ran. We're not ran. They're not ran. They weren't ran. They were not ran. mwic-0.7.10/tests/multiword-be-suppose.exp0000644000000000000000000000345214375677624020553 0ustar00rootroot00000000000000He's not suppose: | He's not suppose to return. ^^^^^^^^^^^^^^^^ He's suppose: | He's suppose to return. ^^^^^^^^^^^^ ain't suppose: | I ain't suppose to return. ^^^^^^^^^^^^^ am suppose: | I am suppose to return. ^^^^^^^^^^ I'm not suppose: | I'm not suppose to return. ^^^^^^^^^^^^^^^ I'm suppose: | I'm suppose to return. ^^^^^^^^^^^ been suppose: | It has been suppose to return. ^^^^^^^^^^^^ being suppose: | It is being suppose to return. ^^^^^^^^^^^^^ is not suppose: | It is not suppose to return. ^^^^^^^^^^^^^^ is suppose: | It is suppose to return. ^^^^^^^^^^ isn't suppose: | It isn't suppose to return. ^^^^^^^^^^^^^ was not suppose: | It was not suppose to return. ^^^^^^^^^^^^^^^ was suppose: | It was suppose to return. ^^^^^^^^^^^ wasn't suppose: | It wasn't suppose to return. ^^^^^^^^^^^^^^ be suppose: | It will be suppose to return. ^^^^^^^^^^ It's not suppose: | It's not suppose to return. ^^^^^^^^^^^^^^^^ It's suppose: | It's suppose to return. ^^^^^^^^^^^^ She's not suppose: | She's not suppose to return. ^^^^^^^^^^^^^^^^^ She's suppose: | She's suppose to return. ^^^^^^^^^^^^^ were not suppose: | They were not suppose to return. ^^^^^^^^^^^^^^^^ were suppose: | They were suppose to return. ^^^^^^^^^^^^ weren't suppose: | They weren't suppose to return. ^^^^^^^^^^^^^^^ They're not suppose: | They're not suppose to return. ^^^^^^^^^^^^^^^^^^^ They're suppose: | They're suppose to return. ^^^^^^^^^^^^^^^ We're not suppose: | We're not suppose to return. ^^^^^^^^^^^^^^^^^ We're suppose: | We're suppose to return. ^^^^^^^^^^^^^ You're not suppose: | You're not suppose to return. ^^^^^^^^^^^^^^^^^^ You're suppose: | You're suppose to return. ^^^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-be-suppose.txt0000644000000000000000000000141514375677624020573 0ustar00rootroot00000000000000I'm suppose to return. I am suppose to return. He's suppose to return. She's suppose to return. It's suppose to return. It is suppose to return. It was suppose to return. You're suppose to return. We're suppose to return. They're suppose to return. They were suppose to return. It will be suppose to return. It has been suppose to return. It is being suppose to return. I'm not suppose to return. I ain't suppose to return. He's not suppose to return. She's not suppose to return. It's not suppose to return. It isn't suppose to return. It is not suppose to return. It wasn't suppose to return. It was not suppose to return. You're not suppose to return. We're not suppose to return. They're not suppose to return. They weren't suppose to return. They were not suppose to return. mwic-0.7.10/tests/multiword-blu-ray.exp0000644000000000000000000000016614375677624020043 0ustar00rootroot00000000000000Blu-Ray: | Blu-Ray ^^^^^^^ Blue-Ray: | Blue-Ray ^^^^^^^^ Blue-ray: | Blue-ray ^^^^^^^^ blu: | blu-ray ^^^ mwic-0.7.10/tests/multiword-blu-ray.txt0000644000000000000000000000005214375677624020060 0ustar00rootroot00000000000000Blue-Ray Blue-ray Blu-Ray Blu-ray blu-ray mwic-0.7.10/tests/multiword-can-not.exp0000644000000000000000000000005514375677624020024 0ustar00rootroot00000000000000can not: | I can not look good. ^^^^^^^ mwic-0.7.10/tests/multiword-can-not.txt0000644000000000000000000000014314375677624020045 0ustar00rootroot00000000000000I can not look good. I cannot look good. I can not only look good, but I can look respectable too. mwic-0.7.10/tests/multiword-comprised-of.exp0000644000000000000000000000007714375677624021060 0ustar00rootroot00000000000000comprised of: | ... is comprised of ... ^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-comprised-of.txt0000644000000000000000000000003014375677624021070 0ustar00rootroot00000000000000... is comprised of ... mwic-0.7.10/tests/multiword-dont-user.exp0000644000000000000000000000013214375677624020401 0ustar00rootroot00000000000000doesn't user: | doesn't user it ^^^^^^^^^^^^ don't user: | don't user it ^^^^^^^^^^ mwic-0.7.10/tests/multiword-dont-user.txt0000644000000000000000000000003614375677624020427 0ustar00rootroot00000000000000don't user it doesn't user it mwic-0.7.10/tests/multiword-e.g.exp0000644000000000000000000000015414375677624017136 0ustar00rootroot00000000000000e.g: | food (e.g, sausage) ^^^ i.e: | sausage (i.e, highly seasoned minced meat… ^^^ mwic-0.7.10/tests/multiword-e.g.txt0000644000000000000000000000024714375677624017164 0ustar00rootroot00000000000000food (e.g, sausage) food (e.g., sausage) sausage (i.e, highly seasoned minced meat stuffed in casings) sausage (i.e., highly seasoned minced meat stuffed in casings) mwic-0.7.10/tests/multiword-e.t.c.exp0000644000000000000000000000007314375677624017374 0ustar00rootroot00000000000000e.t.c: | events, actions, e.t.c. ^^^^^ mwic-0.7.10/tests/multiword-e.t.c.txt0000644000000000000000000000005614375677624017420 0ustar00rootroot00000000000000events, actions, e.t.c. events, actions, etc. mwic-0.7.10/tests/multiword-each-others.exp0000644000000000000000000000013714375677624020670 0ustar00rootroot00000000000000each others: | ... use each others files. | ... use each others' files. ^^^^^^^^^^^ mwic-0.7.10/tests/multiword-each-others.txt0000644000000000000000000000012314375677624020706 0ustar00rootroot00000000000000... use each others files. ... use each others' files. ... use each other's files. mwic-0.7.10/tests/multiword-easy-of.exp0000644000000000000000000000010214375677624020021 0ustar00rootroot00000000000000easy of: | flexibility and easy of use ^^^^^^^ mwic-0.7.10/tests/multiword-easy-of.txt0000644000000000000000000000007014375677624020050 0ustar00rootroot00000000000000flexibility and easy of use flexibility and ease of use mwic-0.7.10/tests/multiword-else-then.exp0000644000000000000000000000007314375677624020351 0ustar00rootroot00000000000000else then: | anything else then eggs ^^^^^^^^^ mwic-0.7.10/tests/multiword-else-then.txt0000644000000000000000000000006014375677624020370 0ustar00rootroot00000000000000anything else then eggs anything else than eggs mwic-0.7.10/tests/multiword-even-tough.exp0000644000000000000000000000006414375677624020546 0ustar00rootroot00000000000000even tough: | ..., even tough... ^^^^^^^^^^ mwic-0.7.10/tests/multiword-even-tough.txt0000644000000000000000000000005014375677624020564 0ustar00rootroot00000000000000..., even tough... ..., even though... mwic-0.7.10/tests/multiword-fist-time.exp0000644000000000000000000000005714375677624020370 0ustar00rootroot00000000000000fist time: | The fist time... ^^^^^^^^^ mwic-0.7.10/tests/multiword-fist-time.txt0000644000000000000000000000002114375677624020402 0ustar00rootroot00000000000000The fist time... mwic-0.7.10/tests/multiword-gpl.exp0000644000000000000000000000105714375677624017252 0ustar00rootroot00000000000000GNU Lesser Public License: | GNU Lesser Public License ^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Library Public License: | GNU Library Public License ^^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Public License: | GNU Public License ^^^^^^^^^^^^^^^^^^ either version 2 of the License: | either version 2 of the License. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Licence: | GNU Public Licence | GNU Lesser Public Licence | GNU Library Public Licence | either version 2 of the Licence, or... | either version 2 of the Licence. ^^^^^^^ mwic-0.7.10/tests/multiword-gpl.txt0000644000000000000000000000044414375677624017274 0ustar00rootroot00000000000000GNU Lesser Public Licence GNU Lesser Public License GNU Library Public Licence GNU Library Public License GNU Public Licence GNU Public License either version 2 of the Licence. either version 2 of the License. either version 2 of the Licence, or... either version 2 of the License, or... mwic-0.7.10/tests/multiword-gpl@en-GB.exp0000644000000000000000000000124214375677624020157 0ustar00rootroot00000000000000GNU Lesser Public Licence: | GNU Lesser Public Licence ^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Lesser Public License: | GNU Lesser Public License ^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Library Public Licence: | GNU Library Public Licence ^^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Library Public License: | GNU Library Public License ^^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Public Licence: | GNU Public Licence ^^^^^^^^^^^^^^^^^^ GNU Public License: | GNU Public License ^^^^^^^^^^^^^^^^^^ either version 2 of the Licence: | either version 2 of the Licence. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ either version 2 of the License: | either version 2 of the License. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-is-t.alt0000644000000000000000000000073614375677624017333 0ustar00rootroot00000000000000ai’t: | It ai’t ... ^^^^ could’t: | It could’t ... ^^^^^^^ has’t: | It has’t ... ^^^^^ is’t: | It is’t ... ^^^^ should’t: | It should’t ... ^^^^^^^^ was’t: | It was’t ... ^^^^^ would’t: | It would’t ... ^^^^^^^ are’t: | They are’t ... ^^^^^ do’t: | They do’t ... ^^^^ were’t: | They were’t ... ^^^^^^ doen: | It doen’t ... ^^^^ hav: | They hav’t ... ^^^ mwic-0.7.10/tests/multiword-is-t.exp0000644000000000000000000000073014375677624017341 0ustar00rootroot00000000000000could’t: | It could’t ... ^^^^^^^ has’t: | It has’t ... ^^^^^ is’t: | It is’t ... ^^^^ should’t: | It should’t ... ^^^^^^^^ was’t: | It was’t ... ^^^^^ would’t: | It would’t ... ^^^^^^^ are’t: | They are’t ... ^^^^^ do’t: | They do’t ... ^^^^ were’t: | They were’t ... ^^^^^^ ai: | It ai’t ... ^^ doen: | It doen’t ... ^^^^ hav: | They hav’t ... ^^^ mwic-0.7.10/tests/multiword-is-t.txt0000644000000000000000000000030514375677624017362 0ustar00rootroot00000000000000It ai’t ... It is’t ... It was’t ... They are’t ... They were’t ... It doen’t ... They do’t ... It has’t ... They hav’t ... It could’t ... It should’t ... It would’t ... mwic-0.7.10/tests/multiword-iso.exp0000644000000000000000000000031614375677624017257 0ustar00rootroot00000000000000ISO-5589: | ISO-5589-1 ^^^^^^^^ ISO-8061: | ISO-8061 ^^^^^^^^ ISO-8559: | ISO-8559-1 ^^^^^^^^ ISO-8610: | ISO-8610 ^^^^^^^^ ISO-8858: | ISO-8858-1 ^^^^^^^^ ISO-8895: | ISO-8895-1 ^^^^^^^^ mwic-0.7.10/tests/multiword-iso.txt0000644000000000000000000000012514375677624017300 0ustar00rootroot00000000000000ISO-8858-1 ISO-8895-1 ISO-8559-1 ISO-5589-1 ISO-8859-1 ISO-8061 ISO-8610 ISO-8601 mwic-0.7.10/tests/multiword-it-us.exp0000644000000000000000000000003614375677624017525 0ustar00rootroot00000000000000It us: | It us okay. ^^^^^ mwic-0.7.10/tests/multiword-it-us.txt0000644000000000000000000000005514375677624017551 0ustar00rootroot00000000000000It us okay. Is it us? Was it us? Were it us? mwic-0.7.10/tests/multiword-its-goal.exp0000644000000000000000000000013614375677624020204 0ustar00rootroot00000000000000It's goal: | It's goal is to… ^^^^^^^^^ It’s goal: | It’s goal is to… ^^^^^^^^^ mwic-0.7.10/tests/multiword-its-goal.txt0000644000000000000000000000007214375677624020226 0ustar00rootroot00000000000000It's goal is to… It’s goal is to… Its goal is to… mwic-0.7.10/tests/multiword-its-not.exp0000644000000000000000000000005014375677624020055 0ustar00rootroot00000000000000its not: | its not an object ^^^^^^^ mwic-0.7.10/tests/multiword-its-not.txt0000644000000000000000000000007714375677624020111 0ustar00rootroot00000000000000its not an object its not-yet-processed object it's an object mwic-0.7.10/tests/multiword-its-own.exp0000644000000000000000000000012214375677624020060 0ustar00rootroot00000000000000it's own: | on it's own ^^^^^^^^ it’s own: | on it’s own ^^^^^^^^ mwic-0.7.10/tests/multiword-its-own.txt0000644000000000000000000000004514375677624020107 0ustar00rootroot00000000000000on it's own on it’s own on its own mwic-0.7.10/tests/multiword-its.exp0000644000000000000000000000014414375677624017263 0ustar00rootroot00000000000000its a: | its a user ^^^^^ its an: | its an object ^^^^^^ its the: | its the object ^^^^^^^ mwic-0.7.10/tests/multiword-its.txt0000644000000000000000000000012414375677624017304 0ustar00rootroot00000000000000its a user its an object its the object it's a user it's an object it's the object mwic-0.7.10/tests/multiword-know-as.exp0000644000000000000000000000334614375677624020052 0ustar00rootroot00000000000000He's know as: | He's know as ... ^^^^^^^^^^^^ He's not know as: | He's not know as ... ^^^^^^^^^^^^^^^^ ain't know as: | I ain't know as ... ^^^^^^^^^^^^^ am know as: | I am know as ... ^^^^^^^^^^ I'm know as: | I'm know as ... ^^^^^^^^^^^ I'm not know as: | I'm not know as ... ^^^^^^^^^^^^^^^ been know as: | It has been know as ... ^^^^^^^^^^^^ also know as: | It is also know as ... ^^^^^^^^^^^^ being know as: | It is being know as ... ^^^^^^^^^^^^^ better know as: | It is better know as ... ^^^^^^^^^^^^^^ is know as: | It is know as ... ^^^^^^^^^^ is not know as: | It is not know as ... ^^^^^^^^^^^^^^ isn't know as: | It isn't know as ... ^^^^^^^^^^^^^ was know as: | It was know as ... ^^^^^^^^^^^ was not know as: | It was not know as ... ^^^^^^^^^^^^^^^ wasn't know as: | It wasn't know as ... ^^^^^^^^^^^^^^ be know as: | It will be know as ... ^^^^^^^^^^ It's know as: | It's know as ... ^^^^^^^^^^^^ It's not know as: | It's not know as ... ^^^^^^^^^^^^^^^^ She's know as: | She's know as ... ^^^^^^^^^^^^^ She's not know as: | She's not know as ... ^^^^^^^^^^^^^^^^^ were know as: | They were know as ... ^^^^^^^^^^^^ were not know as: | They were not know as ... ^^^^^^^^^^^^^^^^ weren't know as: | They weren't know as ... ^^^^^^^^^^^^^^^ They're know as: | They're know as ... ^^^^^^^^^^^^^^^ They're not know as: | They're not know as ... ^^^^^^^^^^^^^^^^^^^ We're know as: | We're know as ... ^^^^^^^^^^^^^ We're not know as: | We're not know as ... ^^^^^^^^^^^^^^^^^ You're know as: | You're know as ... ^^^^^^^^^^^^^^ You're not know as: | You're not know as ... ^^^^^^^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-know-as.txt0000644000000000000000000000117314375677624020071 0ustar00rootroot00000000000000I'm know as ... I am know as ... He's know as ... She's know as ... It's know as ... It is know as ... It was know as ... You're know as ... We're know as ... They're know as ... They were know as ... It will be know as ... It has been know as ... It is being know as ... I'm not know as ... I ain't know as ... He's not know as ... She's not know as ... It's not know as ... It isn't know as ... It is not know as ... It wasn't know as ... It was not know as ... You're not know as ... We're not know as ... They're not know as ... They weren't know as ... They were not know as ... It is also know as ... It is better know as ... mwic-0.7.10/tests/multiword-let-s.exp0000644000000000000000000000205414375677624017512 0ustar00rootroot00000000000000let's a: | It let's a user go. ^^^^^^^ let's an: | It let's an individual go. ^^^^^^^^ let's her: | It let's her go. ^^^^^^^^^ let's him: | It let's him go. ^^^^^^^^^ let's it: | It let's it go. ^^^^^^^^ let's me: | It let's me go. ^^^^^^^^ let's one: | It let's one go. ^^^^^^^^^ let's the: | It let's the user go. ^^^^^^^^^ let's them: | It let's them go. ^^^^^^^^^^ let's us: | It let's us go. ^^^^^^^^ let's you: | It let's you go. ^^^^^^^^^ let’s a: | It let’s a user go. ^^^^^^^ let’s an: | It let’s an individual go. ^^^^^^^^ let’s her: | It let’s her go. ^^^^^^^^^ let’s him: | It let’s him go. ^^^^^^^^^ let’s it: | It let’s it go. ^^^^^^^^ let’s me: | It let’s me go. ^^^^^^^^ let’s one: | It let’s one go. ^^^^^^^^^ let’s the: | It let’s the user go. ^^^^^^^^^ let’s them: | It let’s them go. ^^^^^^^^^^ let’s us: | It let’s us go. ^^^^^^^^ let’s you: | It let’s you go. ^^^^^^^^^ mwic-0.7.10/tests/multiword-let-s.txt0000644000000000000000000000116614375677624017540 0ustar00rootroot00000000000000It let's one go. It let’s one go. It lets one go. It let's me go. It let’s me go. It lets me go. It let's you go. It let’s you go. It lets you go. It let's him go. It let’s him go. It lets him go. It let's her go. It let’s her go. It lets her go. It let's it go. It let’s it go. It lets it go. It let's us go. It let’s us go. It lets us go. It let's them go. It let’s them go. It lets them go. It let's a user go. It let’s a user go. It lets a user go. It let's an individual go. It let’s an individual go. It lets an individual go. It let's the user go. It let’s the user go. It lets the user go. mwic-0.7.10/tests/multiword-lots-of.exp0000644000000000000000000000005114375677624020044 0ustar00rootroot00000000000000lot's of: | lot's of objects ^^^^^^^^ mwic-0.7.10/tests/multiword-lots-of.txt0000644000000000000000000000004114375677624020066 0ustar00rootroot00000000000000lot's of objects lots of objects mwic-0.7.10/tests/multiword-more-that.exp0000644000000000000000000000057014375677624020367 0ustar00rootroot00000000000000bigger that: | X is bigger that Y. ^^^^^^^^^^^ faster that: | X is faster that Y. ^^^^^^^^^^^ larger that: | X is larger that Y. ^^^^^^^^^^^ less that: | X is less that Y. ^^^^^^^^^ more that: | X is more that Y. ^^^^^^^^^ slower that: | X is slower that Y. ^^^^^^^^^^^ smaller that: | X is smaller that Y. ^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-more-that.txt0000644000000000000000000000045214375677624020411 0ustar00rootroot00000000000000X is more that Y. X is less that Y. X is larger that Y. X is bigger that Y. X is smaller that Y. X is faster that Y. X is slower that Y. X is more than Y. X is less than Y. X is larger than Y. X is bigger than Y. X is smaller than Y. X is faster than Y. X is slower than Y. It's faster that way. mwic-0.7.10/tests/multiword-no-enough.exp0000644000000000000000000000005314375677624020362 0ustar00rootroot00000000000000no enough: | no enough memory ^^^^^^^^^ mwic-0.7.10/tests/multiword-no-enough.txt0000644000000000000000000000002114375677624020400 0ustar00rootroot00000000000000no enough memory mwic-0.7.10/tests/multiword-none-existent.exp0000644000000000000000000000006014375677624021261 0ustar00rootroot00000000000000none existent: | none existent ^^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-none-existent.txt0000644000000000000000000000001614375677624021305 0ustar00rootroot00000000000000none existent mwic-0.7.10/tests/multiword-none-the-less.exp0000644000000000000000000000022014375677624021140 0ustar00rootroot00000000000000never the less: | never the less ^^^^^^^^^^^^^^ non the less: | non the less ^^^^^^^^^^^^ none the less: | none the less ^^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-none-the-less.txt0000644000000000000000000000010414375677624021164 0ustar00rootroot00000000000000non the less none the less nonetheless never the less nevertheless mwic-0.7.10/tests/multiword-nt-not.exp0000644000000000000000000000166514375677624017714 0ustar00rootroot00000000000000ain't not: | I ain't not duplicate. ^^^^^^^^^ couldn't not: | It couldn't not duplicate. ^^^^^^^^^^^^ didn't not: | It didn't not duplicate. ^^^^^^^^^^ doesn't not: | It doesn't not duplicate. ^^^^^^^^^^^ hasn't not: | It hasn't not duplicated. ^^^^^^^^^^ isn't not: | It isn't not duplicate. ^^^^^^^^^ mustn't not: | It mustn't not duplicate. ^^^^^^^^^^^ oughtn't not: | It oughtn't not duplicate. ^^^^^^^^^^^^ shouldn't not: | It shouldn't not duplicate. ^^^^^^^^^^^^^ wasn't not: | It wasn't not duplicate. ^^^^^^^^^^ wouldn't not: | It wouldn't not duplicate. ^^^^^^^^^^^^ aren't not: | They aren't not duplicates. ^^^^^^^^^^ can't not: | They can't not be duplicates. ^^^^^^^^^ don't not: | They don't not duplicate. ^^^^^^^^^ haven't not: | They haven't not duplicated. ^^^^^^^^^^^ weren't not: | They weren't not duplicates. ^^^^^^^^^^^ mwic-0.7.10/tests/multiword-nt-not.txt0000644000000000000000000000065214375677624017732 0ustar00rootroot00000000000000I ain't not duplicate. It couldn't not duplicate. It didn't not duplicate. It doesn't not duplicate. It hasn't not duplicated. It isn't not duplicate. It mustn't not duplicate. It oughtn't not duplicate. It shouldn't not duplicate. It wasn't not duplicate. It wouldn't not duplicate. They aren't not duplicates. They can't not be duplicates. They don't not duplicate. They haven't not duplicated. They weren't not duplicates. mwic-0.7.10/tests/multiword-oh-well.exp0000644000000000000000000000006514375677624020035 0ustar00rootroot00000000000000Oh, well: | Oh, well. At least we tried. ^^^^^^^^ mwic-0.7.10/tests/multiword-oh-well.txt0000644000000000000000000000005514375677624020057 0ustar00rootroot00000000000000Oh, well. At least we tried. Oh, well done! mwic-0.7.10/tests/multiword-per-say.exp0000644000000000000000000000003614375677624020044 0ustar00rootroot00000000000000per say: | per say ^^^^^^^ mwic-0.7.10/tests/multiword-per-say.txt0000644000000000000000000000001014375677624020057 0ustar00rootroot00000000000000per say mwic-0.7.10/tests/multiword-pubic-key.exp0000644000000000000000000000011314375677624020350 0ustar00rootroot00000000000000pubic key: | pubic key ^^^^^^^^^ pubic keys: | pubic keys ^^^^^^^^^^ mwic-0.7.10/tests/multiword-pubic-key.txt0000644000000000000000000000005514375677624020400 0ustar00rootroot00000000000000pubic key pubic keys public key public keys mwic-0.7.10/tests/multiword-rational-for.exp0000644000000000000000000000011314375677624021055 0ustar00rootroot00000000000000rational for: | The rational for this convention is... ^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-rational-for.txt0000644000000000000000000000004714375677624021106 0ustar00rootroot00000000000000The rational for this convention is... mwic-0.7.10/tests/multiword-regarding.exp0000644000000000000000000000006314375677624020426 0ustar00rootroot00000000000000regarding to: | regarding to stuff ^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-regarding.txt0000644000000000000000000000004314375677624020447 0ustar00rootroot00000000000000regarding to stuff regarding stuff mwic-0.7.10/tests/multiword-should-of.exp0000644000000000000000000000027414375677624020370 0ustar00rootroot00000000000000could of: | It could of been... ^^^^^^^^ must of: | It must of been... ^^^^^^^ should of: | It should of been... ^^^^^^^^^ would of: | It would of been... ^^^^^^^^ mwic-0.7.10/tests/multiword-should-of.txt0000644000000000000000000000026714375677624020415 0ustar00rootroot00000000000000It could of been... It could of course be... It must of been... It must of course be... It should of been... It should of course be... It would of been... It would of course be... mwic-0.7.10/tests/multiword-since-than.exp0000644000000000000000000000004714375677624020517 0ustar00rootroot00000000000000since than: | since than ^^^^^^^^^^ mwic-0.7.10/tests/multiword-since-than.txt0000644000000000000000000000002614375677624020537 0ustar00rootroot00000000000000since than since then mwic-0.7.10/tests/multiword-sneak-peak.exp0000644000000000000000000000006314375677624020503 0ustar00rootroot00000000000000sneak peak: | A sneak peak into... ^^^^^^^^^^ mwic-0.7.10/tests/multiword-sneak-peak.txt0000644000000000000000000000005214375677624020524 0ustar00rootroot00000000000000A sneak peak into... A sneak peek into... mwic-0.7.10/tests/multiword-some-times.exp0000644000000000000000000000007014375677624020544 0ustar00rootroot00000000000000Some times: | Some times we cannot do it. ^^^^^^^^^^ mwic-0.7.10/tests/multiword-some-times.txt0000644000000000000000000000010214375677624020563 0ustar00rootroot00000000000000Some times we cannot do it. It happens only on some times of day. mwic-0.7.10/tests/multiword-t-he.exp0000644000000000000000000000034014375677624017317 0ustar00rootroot00000000000000fort he: | fort he ^^^^^^^ int he: | int he ^^^^^^ oft he: | oft he ^^^^^^ tot he: | tot he ^^^^^^ FIXME: | # FIXME: ^^^^^ byt: | byt he ^^^ ift: | ift he ^^^ ist: | ist he ^^^ ont: | ont he ^^^ mwic-0.7.10/tests/multiword-t-he.txt0000644000000000000000000000010314375677624017337 0ustar00rootroot00000000000000fort he int he oft he tot he # FIXME: ist he byt he ift he ont he mwic-0.7.10/tests/multiword-the-the.exp0000644000000000000000000000033014375677624020017 0ustar00rootroot00000000000000a a: | a a ^^^ a an: | a an ^^^^ a the: | a the ^^^^^ an a: | an a ^^^^ an an: | an an ^^^^^ an the: | an the ^^^^^^ the a: | the a ^^^^^ the an: | the an ^^^^^^ the the: | the the ^^^^^^^ mwic-0.7.10/tests/multiword-the-the.txt0000644000000000000000000000007014375677624020043 0ustar00rootroot00000000000000a a a an a the an a an an an the the a the an the the mwic-0.7.10/tests/multiword-to-extend.exp0000644000000000000000000000027714375677624020402 0ustar00rootroot00000000000000to an extend: | ... to an extend, ... ^^^^^^^^^^^^ to certain extend: | ... to certain extend... ^^^^^^^^^^^^^^^^^ to some extend: | ... to some extend... ^^^^^^^^^^^^^^ mwic-0.7.10/tests/multiword-to-extend.txt0000644000000000000000000000014214375677624020414 0ustar00rootroot00000000000000... to some extend... ... to certain extend... ... to an extend, ... ... to an extend clause ... mwic-0.7.10/tests/multiword-worst-than.exp0000644000000000000000000000006314375677624020572 0ustar00rootroot00000000000000worst than: | X is worst than Y ^^^^^^^^^^ mwic-0.7.10/tests/multiword-worst-than.txt0000644000000000000000000000004514375677624020615 0ustar00rootroot00000000000000X is worst than Y X is worse than Y mwic-0.7.10/tests/no-dict.txt0000644000000000000000000000006214375677624016017 0ustar00rootroot00000000000000Spam spam spam spam. Lovely spam! Wonderful spam! mwic-0.7.10/tests/no-dict@und.exp0000644000000000000000000000041714375677624016607 0ustar00rootroot00000000000000Lovely: | Lovely spam! ^^^^^^ Spam: | Spam spam spam spam. ^^^^ Wonderful: | Wonderful spam! ^^^^^^^^^ spam: | Spam spam spam spam. | Spam spam spam spam. | Wonderful spam! | Lovely spam! | Spam spam spam spam. ^^^^ mwic-0.7.10/tests/run-nose0000755000000000000000000000262314375677624015422 0ustar00rootroot00000000000000#!/usr/bin/env python3 # Copyright © 2016-2023 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import pathlib import sys import nose sys.path[:0] = [str(pathlib.Path(__file__).parent.parent)] from tests import test_blackbox # pylint: disable=wrong-import-position if __name__ == '__main__': nose.main(addplugins=[test_blackbox.nose_plugin()]) # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/tests/test_blackbox.py0000644000000000000000000001203614375677624017123 0ustar00rootroot00000000000000# Copyright © 2014-2023 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import glob import io import os import random import signal import string import sys import unittest.mock import lib.cli as M from .tools import ( assert_in, assert_multi_line_equal, ) here = os.path.dirname(__file__) here = os.path.relpath(here) def _get_output(*args, stdin=''): argv = ['mwic', *args] binstdin = io.BytesIO(stdin.encode('UTF-8')) textstdin = io.TextIOWrapper(binstdin, encoding='UTF-8') binstdout = io.BytesIO() textstdout = io.TextIOWrapper(binstdout, encoding='UTF-8') sys_patch = unittest.mock.patch.multiple(sys, argv=argv, stdin=textstdin, stdout=textstdout) signal_patch = unittest.mock.patch('signal.signal') with sys_patch, signal_patch: try: try: M.main() except SystemExit as exc: if exc.code != 0: raise signal.signal.assert_called_once() sys.stdout.flush() return binstdout.getvalue().decode('UTF-8') finally: textstdout.close() def random_word(): return str.join('', [ random.choice(string.ascii_lowercase) for x in range(32) ]) def test_max_context_width(): bad_word = random_word() text = _get_output('--language', 'en', '--max-context-width=2', stdin=f'yes {bad_word} yes') assert_in(f'… {bad_word} …', text) def _test_text(xpath): assert xpath.endswith('.exp') if '@' in xpath: [ipath, language] = xpath[:-4].rsplit('@') else: language = 'en-US' ipath = xpath[:-4] ipath += '.txt' text = _get_output('--language', language, ipath) with open(xpath, 'rt', encoding='UTF-8') as file: expected = file.read() if expected != text: altxpath = xpath[:-4] + '.alt' try: file = open(altxpath, 'rt', encoding='UTF-8') # pylint: disable=consider-using-with except FileNotFoundError: pass else: with file: alt_expected = file.read() if alt_expected == text: expected = alt_expected assert_multi_line_equal(expected, text) class TestText(unittest.TestCase): def __str__(self): return self._testMethodName.split("'")[1] @classmethod def _add_test(cls, xpath): def method(self): del self return _test_text(xpath) name = f'test[{xpath!r}]' pytest = sys.modules.get('pytest') if pytest and int(pytest.__version__.split('.', 1)[0]) < 6: # pytest before 6.0 doesn't like "[" in the test name # https://github.com/pytest-dev/pytest/commit/8b9b81c3c04399d0 name = name.replace('[', '(').replace(']', ')') method.__name__ = name setattr(cls, name, method) @classmethod def _add_tests(cls, xpaths): for xpath in xpaths: cls._add_test(xpath) TestText._add_tests(glob.glob(here + '/*.exp')) # pylint: disable=protected-access def nose_plugin(): import nose.plugins # pylint: disable=import-outside-toplevel class Plugin(nose.plugins.Plugin): name = 'mwic-plugin' enabled = True def options(self, parser, env): pass def wantFile(self, path): abs_here = os.path.abspath(here) abs_here = os.path.join(abs_here, '') if path.startswith(abs_here) and path.endswith('.exp'): return True def loadTestsFromFile(self, path): if self.wantFile(path): yield TestCase(path) def wantClass(self, cls): return f'{cls.__module__}.{cls.__name__}' != 'tests.test_blackbox.TestText' class TestCase(unittest.TestCase): def __init__(self, path): super().__init__('_test') self.path = os.path.relpath(path) def _test(self): _test_text(self.path) def __str__(self): return self.path return Plugin() # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/tests/test_camel_case.py0000644000000000000000000000354614375677624017420 0ustar00rootroot00000000000000# Copyright © 2016-2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import lib.text as M from .tools import ( assert_equal, ) def naive_tokenizer(s): offset = 0 for word in s.split(): yield (word, offset) offset += len(word) + 1 tokenize = M.camel_case_tokenizer(naive_tokenizer) def test_tokenizer(): s = 'bacon eggAndSpam EggBaconAndSpam spamSPAM SPAM' r = list(tokenize(s)) assert_equal(r, [ ('bacon', 0), ('egg', 6), ('And', 9), ('Spam', 12), ('Egg', 17), ('Bacon', 20), ('And', 25), ('Spam', 28), ('spam', 33), ('S', 37), ('P', 38), ('A', 39), ('M', 40), ('SPAM', 42), ]) w = r[-1] assert_equal( len(w[0]) + w[1], len(s) ) # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/tests/test_cli.py0000644000000000000000000000330014375677624016077 0ustar00rootroot00000000000000# Copyright © 2019-2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import argparse import io import unittest.mock import lib.cli from .tools import ( assert_equal, assert_is_instance, assert_not_equal, ) def test_version_action(): action = lib.cli.VersionAction(['--version']) stdout = io.StringIO() ap = argparse.ArgumentParser() with unittest.mock.patch('sys.stdout', stdout): try: action(ap, None, None) raise SystemExit(...) except SystemExit as exc: assert_equal(exc.code, 0) s = stdout.getvalue() assert_is_instance(s, str) assert_not_equal(s, '') # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/tests/test_colors.py0000644000000000000000000000616614375677624016646 0ustar00rootroot00000000000000# Copyright © 2015-2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import io import unittest.mock from lib import colors as M from .tools import ( assert_equal, ) def with_stdout(encoding): stdout = io.TextIOWrapper( io.BytesIO(), encoding=encoding, ) return unittest.mock.patch('sys.stdout', stdout) @with_stdout('UTF-8') def test_control_characters(): def t(s, x): r = M.escape(s) assert_equal(r, '\x1B[7m' + x + '\x1B[27m') t('\x00', '^@') t('\x01', '^A') t('\x02', '^B') t('\x03', '^C') t('\x04', '^D') t('\x05', '^E') t('\x06', '^F') t('\x07', '^G') t('\x08', '^H') t('\x09', '^I') t('\x0A', '^J') t('\x0B', '^K') t('\x0C', '^L') t('\x0D', '^M') t('\x0E', '^N') t('\x0F', '^O') t('\x10', '^P') t('\x11', '^Q') t('\x12', '^R') t('\x13', '^S') t('\x14', '^T') t('\x15', '^U') t('\x16', '^V') t('\x17', '^W') t('\x18', '^X') t('\x19', '^Y') t('\x1A', '^Z') t('\x1B', '^[') t('\x1C', '^\\') t('\x1D', '^]') t('\x1E', '^^') t('\x1F', '^_') t('\x7F', '^?') t('\x80', '') t('\x81', '') t('\x82', '') t('\x83', '') t('\x84', '') t('\x85', '') t('\x86', '') t('\x87', '') t('\x88', '') t('\x89', '') t('\x8A', '') t('\x8B', '') t('\x8C', '') t('\x8D', '') t('\x8E', '') t('\x8F', '') t('\x90', '') t('\x91', '') t('\x92', '') t('\x93', '') t('\x94', '') t('\x95', '') t('\x96', '') t('\x97', '') t('\x98', '') t('\x99', '') t('\x9A', '') t('\x9B', '') t('\x9C', '') t('\x9D', '') t('\x9E', '') t('\x9F', '') @with_stdout('UTF-8') def test_escape_safe(): def t(s): r = M.escape(s) assert_equal(r, s) t('A') t('Á') # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/tests/test_extdict.py0000644000000000000000000001002114375677624016772 0ustar00rootroot00000000000000# Copyright © 2016-2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import contextlib import functools import tempfile import lib.extdict as M from .tools import ( assert_in, assert_not_in, ) @contextlib.contextmanager def tmpdict(data): with tempfile.NamedTemporaryFile(prefix='mwic.', suffix='.txt', mode='wt', encoding='ASCII') as file: file.write(data) file.flush() yield file.name def _test_dict(bad, good, *, d): if not isinstance(good, set): raise TypeError if not isinstance(bad, set): raise TypeError for word in bad: assert_in(word, d) for word in good: assert_not_in(word, d) lintian_dict = '''\ # All spelling errors that have been observed "in the wild" in package # descriptions are added here, ... # # Please keep the list sorted (using the en_US locale). abandonned||abandoned portugese||Portuguese upto||up to ''' def test_lintian(): with tmpdict(lintian_dict) as path: d = M.Dictionary(path) t = functools.partial(_test_dict, d=d) t({'abandonned', 'Abandonned', 'ABANDONNED'}, {'abandoned'}) t({'portugese', 'Portugese', 'PORTUGESE'}, {'Portuguese'}) t({'upto', 'Upto', 'UPTO'}, {'up to'}) lintian_case_dict = '''\ # Picky corrections, applied before lowercasing the word. ... # # Please keep the list sorted (using the en_US locale). american||American Debian-Edu||Debian Edu SLang||S-Lang ''' def test_lintian_case(): with tmpdict(lintian_case_dict) as path: d = M.Dictionary(path) t = functools.partial(_test_dict, d=d) t({'american'}, {'American', 'AMERICAN'}) t({'Debian-Edu'}, {'Debian Edu', 'debian-edu', 'DEBIAN-EDU'}) t({'SLang'}, {'S-Lang', 'slang', 'SLANG'}) codespell_dict = '''\ abandonned->abandoned clas->class, disabled because of name clash in c++ intented->intended, indented, ''' def test_codespell(): with tmpdict(codespell_dict) as path: d = M.Dictionary(path) t = functools.partial(_test_dict, d=d) t({'abandonned', 'Abandonned', 'ABANDONNED'}, {'abandoned'}) t({'clas', 'Clas', 'CLAS'}, {'class'}) t({'intented', 'Intented', 'INTENTED'}, {'intended', 'indented'}) kde_dict = '''\ #! /usr/bin/env perl # CORRECTIONS GO IN THE __DATA__ SECTION AT THE END OF THIS SCRIPT # Checks and corrects common spelling errors in text files - ... __DATA__ #INCORRECT SPELLING CORRECTION aasumes assumes #INCORRECT SPELLING CORRECTION Addtional Additional ''' def test_kde(): with tmpdict(kde_dict) as path: d = M.Dictionary(path) t = functools.partial(_test_dict, d=d) t({'aasumes'}, {'assumes'}) # FIXME? 'assumes' t({'Addtional'}, {'Additional'}) # FIXME? 'addtional' plain_dict = '''\ abandonned Portugese ''' def test_plain(): with tmpdict(plain_dict) as path: d = M.Dictionary(path) t = functools.partial(_test_dict, d=d) t({'abandonned', 'Abandonned', 'ABANDONNED'}, {'abandoned'}) t({'Portugese'}, {'portugese', 'PORTUGESE'}) # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/tests/test_trim.py0000644000000000000000000000536614375677624016321 0ustar00rootroot00000000000000# Copyright © 2014-2023 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import re import sys import regex import lib.text as M from .tools import ( assert_equal, assert_greater_equal, ) if sys.version_info >= (3, 7): isascii = str.isascii # pylint: disable=no-member else: def isascii(s): return re.fullmatch(r'[\0-\x7F]*', s) is not None def xlen(s): n = sum(1 if c else 0 for c in regex.split(r'(\X)', s)) if isascii(s): assert n == len(s) else: assert n <= len(s) return n def test_ltrim(): def t(s, n, expected): result = M.ltrim(s, n) assert_equal(result, expected) assert_greater_equal( max(1, n), xlen(result) ) t('', 0, '') truncations = [ '…', '…', '…s', '…gs', 'eggs', 'eggs', ] for n, s in enumerate(truncations): t(truncations[-1], n, s) truncations = [ s.replace('g', 'g\N{COMBINING GRAVE ACCENT}') for s in truncations ] for n, s in enumerate(truncations): t(truncations[-1], n, s) def test_rtrim(): def t(s, n, expected): result = M.rtrim(s, n) assert_equal(result, expected) assert_greater_equal( max(1, n), xlen(result) ) t('', 0, '') truncations = [ '…', '…', 'e…', 'eg…', 'eggs', 'eggs', ] for n, s in enumerate(truncations): t(truncations[-1], n, s) truncations = [ s.replace('g', 'g\N{COMBINING ACUTE ACCENT}') for s in truncations ] for n, s in enumerate(truncations): t(truncations[-1], n, s) # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/tests/test_version.py0000644000000000000000000000355714375677624017033 0ustar00rootroot00000000000000# Copyright © 2012-2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import os from lib.cli import __version__ from .tools import ( assert_equal, ) here = os.path.dirname(__file__) docdir = os.path.join(here, os.pardir, 'doc') def test_changelog(): path = os.path.join(docdir, 'changelog') with open(path, 'rt', encoding='UTF-8') as file: line = file.readline() changelog_version = line.split()[1].strip('()') assert_equal(changelog_version, __version__) def test_manpage(): path = os.path.join(docdir, 'manpage.rst') manpage_version = None with open(path, 'rt', encoding='UTF-8') as file: for line in file: if line.startswith(':version:'): manpage_version = line.split()[-1] break assert_equal(manpage_version, __version__) # vim:ts=4 sts=4 sw=4 et mwic-0.7.10/tests/tools.py0000644000000000000000000000300214375677624015430 0ustar00rootroot00000000000000# Copyright © 2022 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import unittest tc = unittest.TestCase('__hash__') assert_equal = tc.assertEqual assert_greater_equal = tc.assertGreaterEqual assert_in = tc.assertIn assert_is_instance = tc.assertIsInstance assert_multi_line_equal = tc.assertMultiLineEqual assert_not_equal = tc.assertNotEqual assert_not_in = tc.assertNotIn del tc assert_multi_line_equal.__self__.maxDiff = None # pylint: disable=no-member # vim:ts=4 sts=4 sw=4 et