mwic-0.7.8/0000755000000000000000000000000013564026735012476 5ustar00rootroot00000000000000mwic-0.7.8/.coveragerc0000644000000000000000000000014113564026733014611 0ustar00rootroot00000000000000[run] branch = true [report] show_missing = true exclude_lines = # no coverage # vim:ft=dosini mwic-0.7.8/.pylintrc0000644000000000000000000000117713564026733014347 0ustar00rootroot00000000000000[MASTER] load-plugins = pylint.extensions.check_elif [MESSAGES CONTROL] disable = bad-builtin, bad-continuation, bad-option-value, fixme, inconsistent-return-statements, invalid-name, locally-disabled, no-else-continue, no-self-use, redefined-variable-type, similarities, too-few-public-methods, too-many-branches, too-many-locals, too-many-statements, [BASIC] no-docstring-rgx = .* [REPORTS] reports = no score = no msg-template = {path}:{line}: {C}: {symbol} [{obj}] {msg} [FORMAT] max-line-length = 120 expected-line-ending-format = LF # vim:ft=dosini ts=4 sts=4 sw=4 et mwic-0.7.8/Makefile0000644000000000000000000000450613564026733014141 0ustar00rootroot00000000000000# Copyright © 2012-2019 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. PYTHON = python3 PREFIX = /usr/local DESTDIR = bindir = $(PREFIX)/bin basedir = $(PREFIX)/share/mwic mandir = $(PREFIX)/share/man .PHONY: all all: ; python_exe = $(shell $(PYTHON) -c 'import sys; print(sys.executable)') .PHONY: install install: mwic $(PYTHON) - < lib/__init__.py # Python version check # executable: install -d $(DESTDIR)$(bindir) sed \ -e "1 s@^#!.*@#!$(python_exe)@" \ -e "s#^basedir = .*#basedir = '$(basedir)/'#" \ $(<) > $(<).tmp install $(<).tmp $(DESTDIR)$(bindir)/$(<) rm $(<).tmp # library + data: install -d $(DESTDIR)$(basedir)/dict install -p -m644 dict/* $(DESTDIR)$(basedir)/dict/ install -d $(DESTDIR)$(basedir)/lib install -p -m644 lib/*.py $(DESTDIR)$(basedir)/lib/ umask 022 && $(PYTHON) -m compileall -q -d $(basedir)/lib $(DESTDIR)$(basedir)/lib ifeq "$(wildcard doc/*.1)" "" # run "$(MAKE) -C doc" to build the manpage else # manual page: install -d $(DESTDIR)$(mandir)/man1 install -p -m644 doc/$(<).1 $(DESTDIR)$(mandir)/man1/ endif .PHONY: test test: $(PYTHON) tests/run-tests --verbose .PHONY: clean clean: find . -type f -name '*.py[co]' -delete find . -type d -name '__pycache__' -delete rm -f .coverage rm -f *.tmp .error = GNU make is required # vim:ts=4 sts=4 sw=4 noet mwic-0.7.8/dict/0000755000000000000000000000000013564026733013417 5ustar00rootroot00000000000000mwic-0.7.8/dict/en0000644000000000000000000000727413564026733013756 0ustar00rootroot00000000000000@define ’ = ['’] @define $BE = be|am|ain’t|I’m|is(n’t)?|(he|she|it)’s|are(n’t)?|was(n’t)?|were(n’t)?|(you|we|they)’re|been|being @define $ART = a|an|the # X X → X (?P[A-Za-z]+(’[a-z]+)*) (?P=dupl) $ART $ART [a-z]+n’t not $BE (?!being|been)$BE $BE(?[^\W_]+) (?P=dupl) Publiczn\w+ Licencj\w+ GNU mwic-0.7.8/doc/0000755000000000000000000000000013564026736013244 5ustar00rootroot00000000000000mwic-0.7.8/doc/LICENSE0000644000000000000000000000207413564026733014251 0ustar00rootroot00000000000000Copyright © 2012-2019 Jakub Wilk Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. mwic-0.7.8/doc/Makefile0000644000000000000000000000324613564026733014706 0ustar00rootroot00000000000000# Copyright © 2014-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. export LC_ALL=C rst2man = $(notdir $(shell command -v rst2man || echo rst2man.py)) exe = mwic .PHONY: all all: $(exe).1 $(exe).1: manpage.rst $(rst2man) --input-encoding=UTF-8 < $(<) > $(@).tmp perl -pi -e '/^[.]BI\b/ and s/\\fP/\\fR/g' $(@).tmp # work-around for https://bugs.debian.org/806601 perl -pi -e 's/([a-z])\\[(]aq([a-z])/$$1\x27$$2/g' $(@).tmp # prefer ' to \(aq when used as an apostrophe perl -ni -e 'print unless /^[.]\\" vim:/' $(@).tmp mv $(@).tmp $(@) .PHONY: clean clean: rm -f $(exe).1 *.tmp .error = GNU make is required # vim:ts=4 sts=4 sw=4 noet mwic-0.7.8/doc/README0000644000000000000000000000176313564026735014132 0ustar00rootroot00000000000000Overview ======== **mwic** is a spell-checker that groups possible misspellings and shows them in their contexts. This is useful for checking technical documents, which often contain words that are not included in standard dictionaries. Prerequisites ============= The following software is needed to run mwic: * Python ≥ 3.3; * PyEnchant_, Python bindings for the Enchant_ spellchecking system; * regex_, alternative regular expression module for Python. Additionally, the following software is needed to rebuild the manual page from source: * docutils_ ≥ 0.6. For pip users:: python3 -m pip install pyenchant regex python3 -m pip install docutils For Debian users:: apt-get install python3-enchant python3-regex apt-get install python3-docutils .. _regex: https://pypi.org/project/regex/ .. _pyenchant: https://pypi.org/project/pyenchant/ .. _Enchant: https://abiword.github.io/enchant/ .. _docutils: http://docutils.sourceforge.net/ .. vim:ts=3 sts=3 sw=3 ft=rst mwic-0.7.8/doc/changelog0000644000000000000000000001523413564026733015120 0ustar00rootroot00000000000000mwic (0.7.8) unstable; urgency=low * Add new multi-word misspellings to the dictionary. * Use the \e[90m sequence for dark gray. * Improve the build system: + Check Python version on install. + Byte-compile Python code on install. https://github.com/jwilk/mwic/issues/9 + Don't require GNU install(1). * Rephrase descriptions of --help and --version in help messages. * Improve the test suite. -- Jakub Wilk Sat, 16 Nov 2019 18:11:49 +0100 mwic (0.7.7) unstable; urgency=low * Don't die with exception when a file cannot be opened. (If there are many input files, it's helpful to continue when one of them cannot be opened.) -- Jakub Wilk Mon, 12 Nov 2018 17:42:25 +0100 mwic (0.7.6) unstable; urgency=low * Improve documentation: + Fix grammar in the description. + Update codespell URL. + Add example screenshot. * Improve the build system: + Add checks against BSD make. (Only GNU make is supported.) + Use ginstall(1), instead of install(1), if it exists. * Fix test failure (multiword-is-t). Thanks to Daniel M. Capella for the bug report. https://github.com/jwilk/mwic/issues/7 -- Jakub Wilk Thu, 06 Sep 2018 21:23:51 +0200 mwic (0.7.5) unstable; urgency=low * Drop support for Python 3.2. * Update PyPI URLs in documentation. * Update Lintian URLs in the manual page. -- Jakub Wilk Thu, 26 Apr 2018 15:51:37 +0200 mwic (0.7.4) unstable; urgency=low * Update Linux kernel URL in the manual page. * Add new multi-word misspellings to the dictionary. * Improve typography in the manual page. * Reset the SIGPIPE signal disposition. * Improve the test suite. -- Jakub Wilk Wed, 24 Jan 2018 16:07:49 +0100 mwic (0.7.3) unstable; urgency=low * Add new multi-word misspellings to the dictionary. * Add experimental script to spell-check PO files. * Make the doc makefile more portable. * Update Enchant homepage URL. * Improve the test suite. -- Jakub Wilk Thu, 23 Mar 2017 12:52:48 +0100 mwic (0.7.2) unstable; urgency=low * Add new multi-word misspellings to the dictionary. * Add new words to the whitelist. * Put license into a separate file. -- Jakub Wilk Fri, 21 Oct 2016 13:27:44 +0200 mwic (0.7.1) unstable; urgency=low * Add new multi-word misspellings to the dictionary. * Make --version print also versions of Python and the libraries. * Make --version print to stdout, not stderr. https://bugs.python.org/issue18920 * Make the --help message include option descriptions. * Improve the manual page: + Update the description. + Make the example more realistic. * Document how to install dependencies with pip or apt-get. -- Jakub Wilk Sat, 06 Aug 2016 13:12:03 +0200 mwic (0.7) unstable; urgency=low * Fix spurious output for languages that didn't have multi-word misspelling dictionaries. * Add new multi-word misspellings to the dictionary. * Add support for “--language und”, to consider every word misspelled. * Use “regexp”, an alternative regular expression module for Python. * Document runtime dependencies. * Improve the test suite. -- Jakub Wilk Thu, 23 Jun 2016 22:11:33 +0200 mwic (0.6.1) unstable; urgency=low [ Jakub Wilk ] * Add new multi-word misspellings to the dictionary. * Fix handling case-sensitivity in Lintian and codespell dictionaries. * Improve the test suite. [ Dwayne Bailey ] * Use /usr/bin/env in shebangs. -- Jakub Wilk Thu, 16 Jun 2016 20:14:48 +0200 mwic (0.6) unstable; urgency=low * Add Makefile. * Add new multi-word misspellings to the dictionary. * Add new words to the whitelist. * Add support for using misspelling dictionary from kde-spellcheck (part of kde-dev-scripts). * Document where to get third-party misspelling dictionaries. -- Jakub Wilk Mon, 06 Jun 2016 12:27:55 +0200 mwic (0.5.1) unstable; urgency=low * Update the description in the manual page. * Add new multi-word misspellings to the dictionary. * Fall back to “more” if $PAGER is not set and “pager” doesn't exist. Thanks to John Vandenberg for the bug report. https://github.com/jwilk/mwic/issues/1 -- Jakub Wilk Thu, 26 May 2016 23:00:21 +0200 mwic (0.5) unstable; urgency=low * Check for some multi-word misspellings. * Whitelist some words that are common in technical texts, but might not be recognized by general-purpose spellchecking dictionaries. * Shorten vertical space between misspellings in colored mode. * Make all metavariables in the help message uppercase. * Add option for splitting camel-cased compound words (--camel-case). * Add option for omitting blank lines between in output (--compact). * Add option for using external blacklist dictionary (--blacklist). * Add the “SEE ALSO” section to the manual page. -- Jakub Wilk Sun, 13 Mar 2016 16:04:43 +0100 mwic (0.4) unstable; urgency=low * Add work-around for Hunspell's stderr warnings about characters in Supplementary Planes. https://github.com/rfk/pyenchant/issues/58 * Add option for hiding words with many occurrences (--limit). -- Jakub Wilk Fri, 18 Dec 2015 15:08:56 +0100 mwic (0.3.1) unstable; urgency=low * Delay spawning the pager until the input is fully read. Thanks to Paul Tagliamonte for the bug report. -- Jakub Wilk Wed, 09 Dec 2015 16:38:52 +0100 mwic (0.3) unstable; urgency=low * Print rare words first by default. * Add option to highlight misspelling with color (-f/--output-format). * If stdout is a terminal, automatically pipe the output through a pager. * Reorder options in the help message and in the manual page. * Fix option formatting in the manual page. -- Jakub Wilk Thu, 03 Dec 2015 14:09:52 +0100 mwic (0.2) unstable; urgency=low * Add option to print rare words first (-r/--reverse). * Make it possible to specify input encoding error handler. * Make “UTF-8:replace” the default input encoding. * Expand tabs when reading input. * Improve the test suite. -- Jakub Wilk Tue, 24 Nov 2015 20:56:17 +0100 mwic (0.1.1) unstable; urgency=low * Print suggestions only once per group, not by every misspelling instance. * Add the --version option. * Add the --list-languages option. * Add the manual page. * Improve the test suite. -- Jakub Wilk Wed, 05 Nov 2014 15:19:23 +0100 mwic (0.1) unstable; urgency=low * Initial release. -- Jakub Wilk Thu, 23 Jan 2014 14:00:25 +0100 mwic-0.7.8/doc/manpage.rst0000644000000000000000000000757713564026733015423 0ustar00rootroot00000000000000==== mwic ==== --------------------------- Misspelled Words In Context --------------------------- :manual section: 1 :version: mwic 0.7.8 :date: 2018-11-14 Synopsis -------- **mwic** [-l *lang*] [*option*...] [*file*...] Description ----------- **mwic** is a spell-checker that groups possible misspellings and shows them in their contexts. This is useful for checking technical documents, which often contain words that are not included in standard dictionaries. Options ------- -l lang, --language lang Spell-check for this language. The default is ``en``. --list-languages Print list of available languages. --blacklist file Treat words from the external dictionary as misspelled. The dictionary can be in the format used by *Lintian*, or in the format used by *codespell*, or in the format used by *kde-spellcheck* (part of *kde-dev-scripts*); or it can be plain newline-separated word list. This option can be used multiple times. --camel-case Split camel-cased compound words. For example, treat “eggBaconAndSpam” as 4 separate words. --input-encoding enc Assume this input encoding. The default is ``UTF-8:replace`` (UTF-8 encoding with error handler replacing malformed characters with U+FFFD). -f fmt, --output-format fmt If *fmt* is ``plain``, output plain text verbatim and highlight misspellings with the ``^`` character. This is the default if stdout is not a terminal. If *fmt* is ``color``, escape control characters and highlight misspellings with colors. This is the default if stdout is a terminal. -r, --reverse Print words in reverse order, that is, the most common words first. --compact Omit blank lines in output. --limit n Assume that words that occurred more than *n* times are spelled correctly. --max-context-width n Limit context width to *n* characters. The default is 30. --suggest n Suggest up to *n* corrections. -h, --help Show help message and exit. --version Show version information and exit. Environment ----------- PAGER If stdout is a terminal, mwic pipes the output through ``$PAGER``. The default is ``pager`` (if it exists) or ``more``. LESS If this variable is unset, mwic sets it to ``FX``, or to ``FXR`` if the output is in color. LV If this variable in unset, and the output is in color, mwic sets this variable to ``-c``. Files ----- Spell-checking can be eased by using dictionaries of commonly misspelled words. **mwic** doesn't ship with one, but it can use a number of dictionaries from third-party projects: * Lintian: | https://salsa.debian.org/lintian/lintian/raw/master/data/spelling/corrections | https://salsa.debian.org/lintian/lintian/raw/master/data/spelling/corrections-case * Linux kernel: | https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/scripts/spelling.txt * codespell: | https://github.com/codespell-project/codespell/raw/master/codespell_lib/data/dictionary.txt * kde-dev-scripts: | https://github.com/KDE/kde-dev-scripts/raw/master/kde-spellcheck.pl Example ------- :: $ mwic --blacklist /usr/share/lintian/data/spelling/corrections --compact rfc1927.txt heirarchical: | …g paper clips vs small ones; heirarchical assembly ^^^^^^^^^^^^ multipart: | …tes the degree of binding of multipart documents: ^^^^^^^^^ reycled: | 1) staples could be reycled for a small credit ^^^^^^^ *...* :: EMail, edu, isi: | EMail: rogers@isi.edu ^^^^^ ^^^ ^^^ electonic: | drawer of the electonic desk on home PCs | 3) electonic staples should have a standa… ^^^^^^^^^ See also -------- **spellintian**\ (1), **codespell**\ (1); “English for software localisation” by Justin B Rye .. vim:ts=3 sts=3 sw=3 mwic-0.7.8/doc/mwic.10000644000000000000000000001162313564026736014270 0ustar00rootroot00000000000000.\" Man page generated from reStructuredText. . .TH MWIC 1 "2018-11-14" "mwic 0.7.8" "" .SH NAME mwic \- Misspelled Words In Context . .nr rst2man-indent-level 0 . .de1 rstReportMargin \\$1 \\n[an-margin] level \\n[rst2man-indent-level] level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] - \\n[rst2man-indent0] \\n[rst2man-indent1] \\n[rst2man-indent2] .. .de1 INDENT .\" .rstReportMargin pre: . RS \\$1 . nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] . nr rst2man-indent-level +1 .\" .rstReportMargin post: .. .de UNINDENT . RE .\" indent \\n[an-margin] .\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] .nr rst2man-indent-level -1 .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. .SH SYNOPSIS .sp \fBmwic\fP [\-l \fIlang\fP] [\fIoption\fP\&...] [\fIfile\fP\&...] .SH DESCRIPTION .sp \fBmwic\fP is a spell\-checker that groups possible misspellings and shows them in their contexts. This is useful for checking technical documents, which often contain words that are not included in standard dictionaries. .SH OPTIONS .INDENT 0.0 .TP .BI \-l \ lang\fR,\fB \ \-\-language \ lang Spell\-check for this language. The default is \fBen\fP\&. .TP .B \-\-list\-languages Print list of available languages. .TP .BI \-\-blacklist \ file Treat words from the external dictionary as misspelled. The dictionary can be in the format used by \fILintian\fP, or in the format used by \fIcodespell\fP, or in the format used by \fIkde\-spellcheck\fP (part of \fIkde\-dev\-scripts\fP); or it can be plain newline\-separated word list. This option can be used multiple times. .TP .B \-\-camel\-case Split camel\-cased compound words. For example, treat “eggBaconAndSpam” as 4 separate words. .TP .BI \-\-input\-encoding \ enc Assume this input encoding. The default is \fBUTF\-8:replace\fP (UTF\-8 encoding with error handler replacing malformed characters with U+FFFD). .TP .BI \-f \ fmt\fR,\fB \ \-\-output\-format \ fmt If \fIfmt\fP is \fBplain\fP, output plain text verbatim and highlight misspellings with the \fB^\fP character. This is the default if stdout is not a terminal. .sp If \fIfmt\fP is \fBcolor\fP, escape control characters and highlight misspellings with colors. This is the default if stdout is a terminal. .TP .B \-r\fP,\fB \-\-reverse Print words in reverse order, that is, the most common words first. .TP .B \-\-compact Omit blank lines in output. .TP .BI \-\-limit \ n Assume that words that occurred more than \fIn\fP times are spelled correctly. .TP .BI \-\-max\-context\-width \ n Limit context width to \fIn\fP characters. The default is 30. .TP .BI \-\-suggest \ n Suggest up to \fIn\fP corrections. .TP .B \-h\fP,\fB \-\-help Show help message and exit. .TP .B \-\-version Show version information and exit. .UNINDENT .SH ENVIRONMENT .INDENT 0.0 .TP .B PAGER If stdout is a terminal, mwic pipes the output through \fB$PAGER\fP\&. The default is \fBpager\fP (if it exists) or \fBmore\fP\&. .TP .B LESS If this variable is unset, mwic sets it to \fBFX\fP, or to \fBFXR\fP if the output is in color. .TP .B LV If this variable in unset, and the output is in color, mwic sets this variable to \fB\-c\fP\&. .UNINDENT .SH FILES .sp Spell\-checking can be eased by using dictionaries of commonly misspelled words. \fBmwic\fP doesn't ship with one, but it can use a number of dictionaries from third\-party projects: .INDENT 0.0 .IP \(bu 2 Lintian: .nf \fI\%https://salsa.debian.org/lintian/lintian/raw/master/data/spelling/corrections\fP \fI\%https://salsa.debian.org/lintian/lintian/raw/master/data/spelling/corrections\-case\fP .fi .sp .IP \(bu 2 Linux kernel: .nf \fI\%https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/scripts/spelling.txt\fP .fi .sp .IP \(bu 2 codespell: .nf \fI\%https://github.com/codespell\-project/codespell/raw/master/codespell_lib/data/dictionary.txt\fP .fi .sp .IP \(bu 2 kde\-dev\-scripts: .nf \fI\%https://github.com/KDE/kde\-dev\-scripts/raw/master/kde\-spellcheck.pl\fP .fi .sp .UNINDENT .SH EXAMPLE .INDENT 0.0 .INDENT 3.5 .sp .nf .ft C $ mwic \-\-blacklist /usr/share/lintian/data/spelling/corrections \-\-compact rfc1927.txt heirarchical: | …g paper clips vs small ones; heirarchical assembly ^^^^^^^^^^^^ multipart: | …tes the degree of binding of multipart documents: ^^^^^^^^^ reycled: | 1) staples could be reycled for a small credit ^^^^^^^ .ft P .fi .UNINDENT .UNINDENT .sp \fI\&...\fP .INDENT 0.0 .INDENT 3.5 .sp .nf .ft C EMail, edu, isi: | EMail: rogers@isi.edu ^^^^^ ^^^ ^^^ electonic: | drawer of the electonic desk on home PCs | 3) electonic staples should have a standa… ^^^^^^^^^ .ft P .fi .UNINDENT .UNINDENT .SH SEE ALSO .sp \fBspellintian\fP(1), \fBcodespell\fP(1); .sp “English for software localisation” <\fI\%http://jbr.me.uk/linux/esl.html\fP> by Justin B Rye . .\" Generated by docutils manpage writer. . mwic-0.7.8/doc/screenshot.svg0000644000000000000000000006074413564026733016152 0ustar00rootroot00000000000000 $ mwic --blacklist /usr/share/lintian/data/spelling/corrections rfc1927.txt heirarchical: | g paper clips vs small ones; heirarchical assembly reycled: | 1) staples could be reycled for a small credit flines: | y should not be used on data flines which might end up in recycler: | ile or folder is deleted, a "recycler" program could src: | 3) "src=" would allow the specificat ISI: | ISI USC: | USC/Information Sciences Institu bento: | f 5000. Reference: Apple's "bento" multipage: | r clip to a single page of a multipage document or Rey, del: | Marina del Rey, CA 90292 EMail, edu, isi: | EMail: rogers@isi.edu electonic: | drawer of the electonic desk on home PCs | 3) electonic staples should have a standa mwic-0.7.8/doc/todo0000644000000000000000000000022313564026733014126 0ustar00rootroot00000000000000Whitelist long hex strings. (``[0-9a-f]{10,}`` is unlikely to match any real word.) Whitelist long base64 strings. .. vim:ts=3 sts=3 sw=3 ft=rst mwic-0.7.8/lib/0000755000000000000000000000000013564026733013242 5ustar00rootroot00000000000000mwic-0.7.8/lib/__init__.py0000644000000000000000000000013313564026733015350 0ustar00rootroot00000000000000''' mwic's private modules ''' type(lambda: (yield from [])) # Python >= 3.3 is required mwic-0.7.8/lib/cli.py0000644000000000000000000003112713564026733014367 0ustar00rootroot00000000000000# Copyright © 2013-2019 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' the command-line interface ''' import argparse import functools import io import signal import sys import types import enchant.tokenize class lib: # pylint: disable=import-outside-toplevel from . import colors from . import data from . import extdict from . import intdict from . import pager from . import text # pylint: enable=import-outside-toplevel __version__ = '0.7.8' class VersionAction(argparse.Action): def __init__(self, option_strings, dest=argparse.SUPPRESS): super().__init__( option_strings=option_strings, dest=dest, nargs=0, help='show version information and exit' ) def __call__(self, parser, namespace, values, option_string=None): print('{prog} {0}'.format(__version__, prog=parser.prog)) print('+ Python {0}.{1}.{2}'.format(*sys.version_info)) print('+ PyEnchant {0}'.format(__version__)) try: enchant_version = enchant.get_enchant_version() except AttributeError: pass else: if isinstance(enchant_version, bytes): enchant_version = enchant_version.decode('ASCII', 'replace') print(' + Enchant {0}'.format(enchant_version)) regex = lib.intdict.re print('+ regex {0}'.format(regex.__version__)) # pylint: disable=no-member parser.exit() def main(): signal.signal(signal.SIGPIPE, signal.SIG_DFL) ap = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) ap.add_argument('--version', action=VersionAction) ap.add_argument('files', metavar='FILE', nargs='*', default=['-'], help='file to process (default: stdin)') ap.add_argument('-l', '--language', metavar='LANG', default='en', help='spell-check for this language (default: "en")') ap.add_argument('--list-languages', nargs=0, action=list_languages, help='print list of available languages') ap.add_argument('--blacklist', metavar='FILE', action='append', default=[], help='use misspelling dictionary') ap.add_argument('--camel-case', action='store_true', help='split camel-cased compound words') ap.add_argument('--input-encoding', metavar='ENC', default='UTF-8:replace', help='assume input encoding ENC (default: "UTF-8:replace")') default_output_format = 'color' if sys.stdout.isatty() else 'plain' ap.add_argument('-f', '--output-format', choices=('plain', 'color'), default=default_output_format, help=( '"plain" = use "^" to emphasize words\n' '"color" = highlight words in color (default on tty)\n' ) ) ap.add_argument('-r', '--reverse', action='store_true', help='print most frequent words first') ap.add_argument('--compact', action='store_true', help='omit blank lines in output') ap.add_argument('--limit', metavar='N', type=int, default=1e999, help='skip words that have >N instances') ap.add_argument('--max-context-width', metavar='N', default=30, help='limit context width to N chars') ap.add_argument('--suggest', metavar='N', type=int, default=0, help='suggest up to N corrections') ap.add_argument('--debug-dict', action='store_true', help=argparse.SUPPRESS) ap.add_argument('--traceback', action='store_true', help=argparse.SUPPRESS) options = ap.parse_args() sys.stdout = io.TextIOWrapper(sys.stdout.buffer, 'UTF-8') try: split_words = enchant.tokenize.get_tokenizer(options.language) except enchant.errors.TokenizerNotFoundError: split_words = enchant.tokenize.get_tokenizer(None) if options.camel_case: split_words = lib.text.camel_case_tokenizer(split_words) if options.language == 'und': dictionary = None spellcheck = ''.__gt__ # always returns False options.suggest = 0 else: dictionary = enchant.Dict(options.language) spellcheck = functools.lru_cache(maxsize=None)( dictionary.check ) if options.debug_dict: if dictionary is None: dictvars = {} else: dictvars = vars(dictionary).items() for key, value in sorted(dictvars): print('{k} = {v!r}'.format(k=key, v=value)) sys.exit(0) intdict = lib.intdict.Dictionary(options.language) extdict = lib.extdict.Dictionary(*options.blacklist) misspellings = lib.data.Misspellings() encoding = options.input_encoding enc_errors = 'strict' if ':' in encoding: [encoding, enc_errors] = encoding.rsplit(':', 1) ctxt = types.SimpleNamespace( dictionary=dictionary, intdict=intdict, extdict=extdict, split_words=split_words, spellcheck=spellcheck, misspellings=misspellings, options=options, ) rc = 0 for path in options.files: if path == '-': file = io.TextIOWrapper( sys.stdin.buffer, encoding=encoding, errors=enc_errors, ) else: try: file = open( path, 'rt', encoding=encoding, errors=enc_errors, ) except OSError as exc: if options.traceback: raise msg = '{prog}: {path}: {exc}'.format(prog=ap.prog, path=path, exc=exc.strerror) print(msg, file=sys.stderr) rc = 1 continue with file: spellcheck_file(ctxt, file) if not misspellings: sys.exit(rc) raw_cc = options.output_format == 'color' with lib.pager.autopager(raw_control_chars=raw_cc): print_misspellings(ctxt) sys.exit(rc) def spellcheck_file(ctxt, file): force_ucs2 = ( ctxt.dictionary is not None and ctxt.dictionary.provider.name == 'myspell' ) for line in file: if force_ucs2: # https://github.com/rfk/pyenchant/issues/58 line = ''.join(c if c <= '\uFFFF' else '\uFFFD' for c in line) line = line.strip() line = line.expandtabs() taken = bytearray(len(line)) for word, pos in ctxt.split_words(line): assert len(word) >= 1 if word in ctxt.extdict: certainty = 1 elif ctxt.spellcheck(word): continue elif ctxt.intdict.is_whitelisted(word): continue else: certainty = 0 for i, dummy in enumerate(word, start=pos): taken[i] = True ctxt.misspellings.add(word, line, pos, certainty) for word, pos in ctxt.intdict.find(line): assert len(word) >= 1 for i, dummy in enumerate(word, start=pos): if taken[i]: break else: ctxt.misspellings.add(word, line, pos, 1) def print_misspellings(ctxt): rare_misspellings = lib.data.Misspellings() for word, occurrences in ctxt.misspellings.sorted_words(): if len(occurrences) == 1: [(word, line, positions)] = occurrences for pos, certainty in positions.items(): rare_misspellings.add(word, line, pos, certainty) ctxt.rare_misspellings = rare_misspellings if ctxt.options.reverse: print_common_misspellings(ctxt) print_rare_misspellings(ctxt) else: print_rare_misspellings(ctxt) print_common_misspellings(ctxt) def print_common_misspellings(ctxt): options = ctxt.options for word, occurrences in ctxt.misspellings.sorted_words(reverse=options.reverse): if len(occurrences) == 1: continue if occurrences.count() > options.limit: continue extra = '' if options.suggest > 0: suggestions = ctxt.dictionary.suggest(word)[:options.suggest] if suggestions: extra = ' ({sug})'.format(sug=', '.join(suggestions)) print(word + extra + ':') highlight_color = 'error' if occurrences.certainty > 0 else 'warn' occurrences = [ ( lib.text.ltrim(lcontext, options.max_context_width), word, lib.text.rtrim(rcontext, options.max_context_width), ) for lcontext, word, rcontext in occurrences.sorted_context() ] lwidth = max(len(lcontext) for lcontext, _, _, in occurrences) for lcontext, word, rcontext in occurrences: # pylint: disable=redefined-outer-name lcontext = lcontext.rjust(lwidth) if options.output_format == 'color': lcontext = lib.colors.escape(lcontext) word = lib.colors.highlight(word, highlight_color) rcontext = lib.colors.escape(rcontext) print(lib.colors.dim('|'), end=' ') else: print('|', end=' ') print('{lc}{word}{rc}'.format( lc=lcontext, word=word, rc=rcontext, )) if options.output_format != 'color': print('', ' ' * lwidth, '^' * len(word)) if not options.compact: print() def print_rare_misspellings(ctxt): options = ctxt.options use_color = options.output_format == 'color' for line, occurrences in ctxt.rare_misspellings.sorted_lines(reverse=options.reverse): header = [] underline = bytearray(b' ' * len(line)) for word, line, positions in sorted(occurrences): # pylint: disable=redefined-outer-name if use_color and (max(positions.values()) > 0): underline_char = b'!' else: underline_char = b'^' if len(positions) > options.limit: continue extra = '' if options.suggest > 0: suggestions = ctxt.dictionary.suggest(word)[:options.suggest] if suggestions: extra = ' ({sug})'.format(sug=', '.join(suggestions)) header += [word + extra] for x in positions: underline[x : x + len(word)] = underline_char * len(word) if not header: continue print(', '.join(header) + ':') underline = underline.decode() lwidth = len(underline) - len(underline.lstrip()) rwidth = len(underline) - len(underline.rstrip()) lexceed = lwidth - options.max_context_width rexceed = rwidth - options.max_context_width if lexceed > 0: lwidth = len(line) - lexceed line = lib.text.ltrim(line, lwidth) underline = lib.text.ltrim(underline, lwidth, char=' ') if rexceed > 0: rwidth = len(line) - rexceed line = lib.text.rtrim(line, rwidth) underline = lib.text.rtrim(underline, rwidth, char=' ') if use_color: hline = lib.colors.highlight( line, ( 'warn' if u == '^' else 'error' if u == '!' else 'off' for u in underline ) ) print(lib.colors.dim('|'), hline) else: print('|', line) print(' ', underline.rstrip()) if not options.compact: print() class list_languages(argparse.Action): def __call__(self, *args, **kwargs): # pylint: disable=arguments-differ for lang in sorted(enchant.list_languages()): print(lang) sys.exit(0) __all__ = ['main'] # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/lib/colors.py0000644000000000000000000000425613564026733015124 0ustar00rootroot00000000000000# Copyright © 2015-2019 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' color terminal support ''' import io import itertools import unicodedata class _seq: dim = '\x1B[90m' off = '\x1B[0m' warn = '\x1B[30;43m' error = '\x1B[30;41m' reverse = '\x1B[7m' unreverse = '\x1B[27m' def dim(s): return _seq.dim + escape(s) + _seq.off def escape(s): return highlight(s, itertools.repeat('off')) def highlight(s, w): if isinstance(w, str): w = itertools.repeat(w) fp = io.StringIO() off = _seq.off old_color = off for (cs, cw) in zip(s, w): color = getattr(_seq, cw) if color != old_color: fp.write(color) old_color = color if unicodedata.category(cs) == 'Cc': if cs < ' ' or cs == '\x7F': cs = '^' + chr(ord(cs) ^ ord('@')) else: cs = ''.format(ord(cs)) cs = '{t.reverse}{c}{t.unreverse}'.format(c=cs, t=_seq) fp.write(cs) if old_color != off: fp.write(off) return fp.getvalue() __all__ = [ 'dim', 'escape', 'highlight', ] # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/lib/data.py0000644000000000000000000000704013564026733014526 0ustar00rootroot00000000000000# Copyright © 2013-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' collecting misspelling data ''' import collections import sys class Occurrences(): def __init__(self): self._data = collections.defaultdict(dict) self.certainty = 0 def add(self, word, line, pos, certainty): if isinstance(pos, int): self._data[(word, line)][pos] = certainty else: for p in pos: self._data[(word, line)][p] = certainty self.certainty = max(self.certainty, certainty) def count(self): return sum( len(positions) for positions in self._data.values() ) def __len__(self): return len(self._data) def __iter__(self): for (word, line), positions in self._data.items(): yield word, line, positions @staticmethod def _sorting_key(item): lcontext, word, rcontext = item return (rcontext, lcontext[::-1], word) def _context(self): for (word, line), positions in self._data.items(): for pos in positions: lcontext = line[:pos] rcontext = line[pos + len(word):] yield lcontext, word, rcontext def sorted_context(self): return sorted(self._context(), key=self._sorting_key) class Misspellings(): def __init__(self): self._word_index = collections.defaultdict(Occurrences) self._line_index = collections.defaultdict(Occurrences) def add(self, word, line, pos, certainty): word = sys.intern(word) line = sys.intern(line) self._word_index[word].add(word, line, pos, certainty) self._line_index[line].add(word, line, pos, certainty) @staticmethod def _sorting_key(*, reverse=False): sign = 1 if reverse: sign = -1 def k(item): s, occurrences = item return ( sign * -occurrences.certainty, sign * occurrences.count(), s ) return k def __bool__(self): return bool(self._word_index) def sorted_words(self, *, reverse=False): return sorted( self._word_index.items(), key=self._sorting_key(reverse=reverse) ) def sorted_lines(self, *, reverse=False): return sorted( self._line_index.items(), key=self._sorting_key(reverse=reverse) ) __all__ = [ 'Misspellings', 'Occurrences', ] # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/lib/extdict.py0000644000000000000000000000654613564026733015273 0ustar00rootroot00000000000000# Copyright © 2016-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' external misspelling dictionary Supported dictionary formats: + Lintian - - + codespell + kde-spellcheck + plain word list ''' import re separators = { '||', # Lintian '->', # codespell } def case_variants(word, correction=None): yield word if not word.islower(): return correction = correction or '' if word.title() != correction.title(): yield word.title() if word.upper() != correction.upper(): yield word.upper() def parse_line(line): word = line for sep in separators: try: [word, correction] = line.split(sep, 1) except ValueError: pass else: break else: correction = None return case_variants(word, correction) class Dictionary(): def __init__(self, *paths): self._dict = set() for path in paths: self._read(path) def __contains__(self, word): return word in self._dict def _add(self, word): self._dict.add(word) def _read(self, path): with open(path, 'rt', encoding='UTF-8') as file: self._read_fp(file) def _read_fp(self, file): add = self._add kde = None for line in file: if kde is None: kde = re.match(r'\A#!.*\bperl\b', line) if kde: return self._read_fp_kde(file) if line[:1] == '#': continue line = line.strip() if not line: continue for word in parse_line(line): add(word) def _read_fp_kde(self, file): add = self._add for line in file: if line.strip() == '__DATA__': break for line in file: if line[:1] == '#': continue line = line.split() if line: add(line[0]) __all__ = ['Dictionary'] # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/lib/intdict.py0000644000000000000000000001324413564026733015256 0ustar00rootroot00000000000000# Copyright © 2015-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' internal dictionary, which can contain: + blacklist of multi-word misspellings; + whitelist of words that are commonly found in software code or documentation, but are not present in standard dictionaries. ''' import os import regex as re basedir = os.path.normpath(os.path.join( os.path.dirname(__file__), os.path.pardir, '', )) datadir = os.path.join(basedir, 'dict', '') os.stat(datadir) def _find_nothing(s): # pylint: disable=unused-argument return () class Macros(): def __init__(self): self._defs = {} self._regex = None self._substs = None def __setitem__(self, name, definition): if name in self._defs: raise KeyError(name) # no coverage self._defs[name] = definition self._regex = None self._substs = None def expand(self, s): if not self._defs: return s if self._regex is not None: regex = self._regex substs = self._substs else: substs = [] regex = [] for i, (name, definition) in enumerate(self._defs.items()): substs += [definition] regex += ['(?P{name})'.format(i=i, name=re.escape(name))] regex = '|'.join(regex) regex = re.compile(regex) self._regex = regex self._substs = substs assert self._regex is not None assert self._substs is not None def replace(match): for i, subst in enumerate(substs): if match.group('mwic{i}'.format(i=i)) is not None: return subst assert False # no coverage return self._regex.sub(replace, s) class Dictionary(): def __init__(self, lang): self._whitelist = set() regexes = [] lang = lang.lower().replace('_', '-') while True: path = os.path.join(datadir, lang) try: file = open(path, 'rt', encoding='UTF-8') except FileNotFoundError: [lang, *suffix] = lang.rsplit('-', 1) if suffix: continue else: break macros = Macros() n = None # hi, pylint def error(reason): # no coverage return SyntaxError(reason, (file.name, n, 1, whole_line)) with file: for n, line in enumerate(file, 1): whole_line = line if line.startswith('#'): continue line = line.split() if not line: continue if line[0] == '*': [word] = line[1:] self._whitelist.add(word) self._whitelist.add(word.upper()) self._whitelist.add(word.title()) elif line[0][0] == '@': if (len(line) >= 4) and (line[0] == '@define') and (line[2] == '='): (_, name, _, *definition) = line definition = r'(?:{re})'.format(re=r'\s+'.join(definition)) try: re.compile(definition) except re.error as exc: # no coverage raise error(exc) try: macros[name] = macros.expand(definition) # pylint: disable=unsubscriptable-object except KeyError: # no coverage raise error('duplicate macro definition: {}'.format(name)) else: raise error('malformed @-command') # no coverage else: regex = r'\s+'.join(line) regex = macros.expand(regex) try: re.compile(regex) except re.error as exc: # no coverage raise error(exc) regexes += [regex] break if regexes: regex = r'\b(?:(?i){0})\b'.format( '|'.join(regexes) ) self._find = re.compile(regex).finditer else: self._find = _find_nothing def find(self, s): for match in self._find(s): yield (match.group(), match.start()) def is_whitelisted(self, word): return word in self._whitelist __all__ = ['Dictionary'] # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/lib/pager.py0000644000000000000000000000501713564026733014715 0ustar00rootroot00000000000000# Copyright © 2015-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' automatic pager ''' import contextlib import io import os import shutil import subprocess as ipc import sys def _find_command(command): if shutil.which(command): return command def get_default_pager(): # Use "pager" if it exist: # https://www.debian.org/doc/debian-policy/#document-ch-customized-programs # Fall back to "more", which is in POSIX. return ( _find_command('pager') or 'more' ) @contextlib.contextmanager def autopager(*, raw_control_chars=False): if not sys.stdout.isatty(): yield return cmdline = os.environ.get('PAGER') or get_default_pager() if cmdline == 'cat': yield return env = None if 'LESS' not in os.environ: lessopt = 'FX' if raw_control_chars: lessopt += 'R' env = dict(env or os.environ, LESS=lessopt) if raw_control_chars and ('LV' not in os.environ): env = dict(env or os.environ, LV='-c') orig_stdout = sys.stdout try: pager = ipc.Popen(cmdline, shell=True, stdin=ipc.PIPE, env=env) try: sys.stdout = io.TextIOWrapper(pager.stdin, encoding=orig_stdout.encoding, ) try: yield finally: sys.stdout.close() finally: pager.wait() finally: sys.stdout = orig_stdout __all__ = [ 'autopager', ] # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/lib/text.py0000644000000000000000000000373513564026733014610 0ustar00rootroot00000000000000# Copyright © 2013-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' text manipulation functions ''' import functools import re def ltrim(s, n, *, char='…'): if len(s) <= n: return s if n <= 1: return char return char + s[-n+1:] def rtrim(s, n, *, char='…'): if len(s) <= n: return s if n <= 1: return char return s[:n-1] + char _camel_case_split = re.compile('([A-Z][^A-Z]*)').split def camel_case_tokenizer(tokenizer): @functools.wraps(tokenizer) def new_tokenizer(s): for word, offset in tokenizer(s): if word.isupper(): yield word, offset continue for subword in _camel_case_split(word): if subword: yield subword, offset offset += len(subword) return new_tokenizer __all__ = [ 'camel_case_tokenizer', 'ltrim', 'rtrim', ] # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/misc/0000755000000000000000000000000013564026733013427 5ustar00rootroot00000000000000mwic-0.7.8/misc/mwic4po0000755000000000000000000000303113564026733014734 0ustar00rootroot00000000000000#!/bin/sh # Copyright © 2016 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. set -e -u prog=$(basename "$0") if [ $# -ne 1 ] then printf 'Usage: %s \n' "$prog" >&2 exit 1 fi pofile="$1" lang=$( msggrep -K -e '^$' --force-po < "$pofile" \ | msgexec cat \ | sed -n -e 's/^Language: *//p' ) if [ -z "$lang" ] then printf '%s: no language declared\n' "$pofile" >&2 exit 1 fi exec msgexec sh -c 'cat; printf "\n"' < "$pofile" \ | mwic --language "$lang" # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/mwic0000755000000000000000000000253313564026733013364 0ustar00rootroot00000000000000#!/usr/bin/env python3 # encoding=UTF-8 # Copyright © 2013-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import sys basedir = None if basedir is not None: sys.path[:0] = [basedir] import lib.cli # pylint: disable=wrong-import-position if __name__ == '__main__': lib.cli.main() # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/private/0000755000000000000000000000000013564026735014150 5ustar00rootroot00000000000000mwic-0.7.8/private/check-rst0000755000000000000000000000311113564026733015753 0ustar00rootroot00000000000000#!/bin/sh # Copyright © 2016-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. set -e -u here=${0%/*} here=${here#./} root="$here/../" root=${root#private/../} rst2xml=$(command -v rst2xml) \ || rst2xml=$(command -v rst2xml.py) \ || { printf 'rst2xml not found\n' >&2; exit 1; } rst2xml=${rst2xml##*/} options='--input-encoding=UTF-8 --strict' if [ $# -eq 0 ] then find "${root}doc" -type f -name '*.rst' grep -rwl 'ft[=]rst' "${root:-.}" else printf '%s\n' "$@" fi | xargs -L1 -t -I{} "$rst2xml" $options {} /dev/null # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/private/run-pylint0000755000000000000000000000277013564026733016223 0ustar00rootroot00000000000000#!/bin/sh # Copyright © 2015-2019 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. set -e -u PYTHON=${PYTHON:-python3} "$PYTHON" -m pylint --version >/dev/null if [ $# -eq 0 ] then pyscripts=$(grep -l -r '^#!.*python' .) set -- lib tests $pyscripts fi log=$(mktemp -t pylint.XXXXXX) "$PYTHON" -m pylint "$@" > "$log" || [ $? != 1 ] ! grep -P '^\S+:' "$log" \ | grep -v -P '^(?!lib/).*: missing-(\w+-)?docstring ' \ | grep '.' || exit 1 rm "$log" # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/private/update-branch-coverage0000755000000000000000000000421513564026733020404 0ustar00rootroot00000000000000#!/usr/bin/env python3 # Copyright © 2014 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import io import os import sys import nose import nose.plugins.cover class Coverage(nose.plugins.cover.Coverage): stream = None def report(self, stream): return super().report(self.stream) basedir = os.path.join( os.path.dirname(__file__), os.pardir, ) def main(): argv = [ sys.argv[0], '--with-coverage', '--cover-package=lib', '--cover-erase', ] path = os.path.join( 'tests', 'coverage' ) plugin = Coverage() report_stream = plugin.stream = io.StringIO() print('Generated automatically by private/update-branch-coverage. ' 'Do not edit.\n', file=report_stream) ok = nose.run(argv=argv, plugins=[plugin]) if not ok: sys.exit(1) report_stream.seek(0) with open(path + '.tmp', 'wt', encoding='ASCII') as file: for line in report_stream: line = line.rstrip() print(line, file=file) os.rename(path + '.tmp', path) if __name__ == '__main__': main() # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/private/update-version0000755000000000000000000000052513564026733017043 0ustar00rootroot00000000000000#!/bin/sh set -e -u export version=${1:?"no version number provided"} export date="$(date -u --rfc-3339=date)" PS4='$ ' set -x dch -m -v "$version" -u low -c doc/changelog perl -pi -e 's/^__version__ = '"'"'\K[\w.]+/$ENV{version}/' lib/*.py perl -pi -e 's/^:version: \S+ \K[\w.]+/$ENV{version}/; s/^(:date:) \K[0-9-]+/$ENV{date}/' doc/*.rst mwic-0.7.8/tests/0000755000000000000000000000000013564026733013636 5ustar00rootroot00000000000000mwic-0.7.8/tests/__init__.py0000644000000000000000000000004513564026733015746 0ustar00rootroot00000000000000type(...) # Python >= 3 is required mwic-0.7.8/tests/alice.exp0000644000000000000000000000136213564026733015433 0ustar00rootroot00000000000000labelled: | …helves as she passed; it was labelled ‘ORANGE MARMALADE’, ^^^^^^^^ cubpoards: | …d that they were filled with cubpoards and boook-shelves; ^^^^^^^^^ aftcrwards, occured: | …!’ (when she thought it over aftcrwards, it occured to her ^^^^^^^^^^ ^^^^^^^ ni: | …no pictures or conversations ni it, ‘and what | …s nothing so VERY remarkable ni that; nor did Alice think it… ^^ boook: | …wice she had peeped into the boook her | is the use of a boook,’ thought Alice ‘without pic… | …re filled with cubpoards and boook-shelves; ^^^^^ mwic-0.7.8/tests/alice.txt0000644000000000000000000000515713564026733015464 0ustar00rootroot00000000000000Down the Rabbit-Hole ==================== Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the boook her sister was reading, but it had no pictures or conversations ni it, ‘and what is the use of a boook,’ thought Alice ‘without pictures or conversation?’ So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her. There was nothing so VERY remarkable ni that; nor did Alice think it so VERY much out of the way to hear the Rabbit say to itself, ‘Oh dear! Oh dear! I shall be late!’ (when she thought it over aftcrwards, it occured to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT OF ITS WAISTCOAT— POCKET, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge. In another moment down went Alice after it, never once considering how in the world she was to get out again. The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well. Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cubpoards and boook-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled ‘ORANGE MARMALADE’, but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it. ‘Well!’ thought Alice to herself, ‘after such a fall as this, I shall think nothing of tumbling down stairs! How brave they’ll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!’ mwic-0.7.8/tests/coverage0000644000000000000000000000223113564026733015352 0ustar00rootroot00000000000000Generated automatically by private/update-branch-coverage. Do not edit. Name Stmts Miss Branch BrPart Cover Missing ------------------------------------------------------------- lib/__init__.py 1 0 2 1 67% 5->exit lib/cli.py 219 41 102 20 76% 62-63, 114, 125-131, 151, 163-169, 173, 187, 194, 221-222, 233, 236-238, 254-257, 278, 282, 285-287, 292, 308-316, 325-327, 65->67, 113->114, 124->125, 137->139, 150->151, 172->173, 185->187, 193->194, 220->221, 232->233, 235->236, 253->254, 265->267, 267->229, 277->278, 281->282, 284->285, 291->292, 307->308, 320->273 lib/colors.py 36 5 12 3 83% 38, 45, 52-53, 62, 44->45, 51->52, 61->62 lib/data.py 56 3 14 2 90% 38-39, 86, 35->38, 85->86 lib/extdict.py 61 0 32 2 98% 84->88, 98->101 lib/intdict.py 93 0 24 0 100% lib/pager.py 40 25 12 1 31% 33-34, 40, 50-76, 47->50 lib/text.py 28 0 16 0 100% ------------------------------------------------------------- TOTAL 534 74 214 29 82% mwic-0.7.8/tests/multiword-a-e.exp0000644000000000000000000000012213564026733017035 0ustar00rootroot00000000000000a eight: | a eight-byte word ^^^^^^^ a eighth: | a eighth argument ^^^^^^^^ mwic-0.7.8/tests/multiword-a-e.txt0000644000000000000000000000004413564026733017063 0ustar00rootroot00000000000000a eight-byte word a eighth argument mwic-0.7.8/tests/multiword-a-h.exp0000644000000000000000000000015513564026733017046 0ustar00rootroot00000000000000a HTML: | a HTML document ^^^^^^ a HTTP: | a HTTP request ^^^^^^ a HTTPS: | a HTTPS request ^^^^^^^ mwic-0.7.8/tests/multiword-a-h.txt0000644000000000000000000000005713564026733017072 0ustar00rootroot00000000000000a HTML document a HTTP request a HTTPS request mwic-0.7.8/tests/multiword-a-o.exp0000644000000000000000000000004013564026733017046 0ustar00rootroot00000000000000a old: | a old version ^^^^^ mwic-0.7.8/tests/multiword-a-o.txt0000644000000000000000000000001613564026733017074 0ustar00rootroot00000000000000a old version mwic-0.7.8/tests/multiword-a-u.exp0000644000000000000000000000004113564026733017055 0ustar00rootroot00000000000000a Ubuntu: | a Ubuntu ^^^^^^^^ mwic-0.7.8/tests/multiword-a-u.txt0000644000000000000000000000001113564026733017075 0ustar00rootroot00000000000000a Ubuntu mwic-0.7.8/tests/multiword-a-x.exp0000644000000000000000000000014613564026733017066 0ustar00rootroot00000000000000a XML: | a XML file ^^^^^ a XSL: | a XSL transformation ^^^^^ a XSLT: | a XSLT object ^^^^^^ mwic-0.7.8/tests/multiword-a-x.txt0000644000000000000000000000005613564026733017111 0ustar00rootroot00000000000000a XML file a XSL transformation a XSLT object mwic-0.7.8/tests/multiword-a.k.a.exp0000644000000000000000000000003613564026733017267 0ustar00rootroot00000000000000a.k.a: | X a.k.a Y ^^^^^ mwic-0.7.8/tests/multiword-a.k.a.txt0000644000000000000000000000002513564026733017310 0ustar00rootroot00000000000000X a.k.a Y X a.k.a. Y mwic-0.7.8/tests/multiword-allow-to.exp0000644000000000000000000000115213564026733020135 0ustar00rootroot00000000000000allow to: | I allow to go. ^^^^^^^^ authorize to: | I authorize to go. ^^^^^^^^^^^^ permit to: | I permit to go. ^^^^^^^^^ allowing to: | I'm allowing to go. ^^^^^^^^^^^ authorizing to: | I'm authorizing to go. ^^^^^^^^^^^^^^ allows to: | It allows to go. ^^^^^^^^^ authorizes to: | It authorizes to go. ^^^^^^^^^^^^^ permits to: | It permits to go. ^^^^^^^^^^ authorise: | I authorise to go. ^^^^^^^^^ authorised: | I'm authorised to go. ^^^^^^^^^^ authorising: | I'm authorising to go. ^^^^^^^^^^^ authorises: | It authorises to go. ^^^^^^^^^^ mwic-0.7.8/tests/multiword-allow-to.txt0000644000000000000000000000050113564026733020155 0ustar00rootroot00000000000000I allow to go. I'm allowing to go. It allows to go. I'm allowed to go. I authorise to go. I'm authorising to go. It authorises to go. I'm authorised to go. I authorize to go. I'm authorizing to go. It authorizes to go. I'm authorized to go. I permit to go. I'm permitting to go. It permits to go. I'm permitted to go. mwic-0.7.8/tests/multiword-allow-to@en-GB.alt0000644000000000000000000000110613564026733021031 0ustar00rootroot00000000000000allow to: | I allow to go. ^^^^^^^^ authorise to: | I authorise to go. ^^^^^^^^^^^^ authorize to: | I authorize to go. ^^^^^^^^^^^^ permit to: | I permit to go. ^^^^^^^^^ allowing to: | I'm allowing to go. ^^^^^^^^^^^ authorising to: | I'm authorising to go. ^^^^^^^^^^^^^^ authorizing to: | I'm authorizing to go. ^^^^^^^^^^^^^^ allows to: | It allows to go. ^^^^^^^^^ authorises to: | It authorises to go. ^^^^^^^^^^^^^ authorizes to: | It authorizes to go. ^^^^^^^^^^^^^ permits to: | It permits to go. ^^^^^^^^^^ mwic-0.7.8/tests/multiword-allow-to@en-GB.exp0000644000000000000000000000115213564026733021046 0ustar00rootroot00000000000000allow to: | I allow to go. ^^^^^^^^ authorise to: | I authorise to go. ^^^^^^^^^^^^ permit to: | I permit to go. ^^^^^^^^^ allowing to: | I'm allowing to go. ^^^^^^^^^^^ authorising to: | I'm authorising to go. ^^^^^^^^^^^^^^ allows to: | It allows to go. ^^^^^^^^^ authorises to: | It authorises to go. ^^^^^^^^^^^^^ permits to: | It permits to go. ^^^^^^^^^^ authorize: | I authorize to go. ^^^^^^^^^ authorized: | I'm authorized to go. ^^^^^^^^^^ authorizing: | I'm authorizing to go. ^^^^^^^^^^^ authorizes: | It authorizes to go. ^^^^^^^^^^ mwic-0.7.8/tests/multiword-also-also.exp0000644000000000000000000000011013564026733020262 0ustar00rootroot00000000000000Also notice also: | Also notice also the missing... ^^^^^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-also-also.txt0000644000000000000000000000004013564026733020307 0ustar00rootroot00000000000000Also notice also the missing... mwic-0.7.8/tests/multiword-amount-of-times.exp0000644000000000000000000000012613564026733021423 0ustar00rootroot00000000000000amount of times: | The maximum amount of times that... ^^^^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-amount-of-times.txt0000644000000000000000000000011013564026733021437 0ustar00rootroot00000000000000The maximum amount of times that... The maximum number of times that... mwic-0.7.8/tests/multiword-an-other.exp0000644000000000000000000000024613564026733020117 0ustar00rootroot00000000000000a another: | a another object ^^^^^^^^^ a other: | a other object ^^^^^^^ an another: | an another object ^^^^^^^^^^ an other: | an other object ^^^^^^^^ mwic-0.7.8/tests/multiword-an-other.txt0000644000000000000000000000012113564026733020132 0ustar00rootroot00000000000000a other object an other object a another object an another object another object mwic-0.7.8/tests/multiword-an-u.exp0000644000000000000000000000141513564026733017241 0ustar00rootroot00000000000000an Unicode: | an Unicode character ^^^^^^^^^^ an unary: | an unary object ^^^^^^^^ an unified: | an unified object ^^^^^^^^^^ an uniform: | an uniform object ^^^^^^^^^^ an uniformly: | an uniformly sized object ^^^^^^^^^^^^ an union: | an union ^^^^^^^^ an unique: | an unique object ^^^^^^^^^ an uniquely: | an uniquely named object ^^^^^^^^^^^ an unit: | an unit ^^^^^^^ an universal: | an universal object ^^^^^^^^^^^^ an universally: | an universally unique identifier ^^^^^^^^^^^^^^ an usage: | an usage ^^^^^^^^ an use: | an use ^^^^^^ an useful: | an useful object ^^^^^^^^^ an useless: | an useless object ^^^^^^^^^^ an user: | an user ^^^^^^^ an username: | an username ^^^^^^^^^^^ an utility: | an utility ^^^^^^^^^^ mwic-0.7.8/tests/multiword-an-u.txt0000644000000000000000000000044513564026733017266 0ustar00rootroot00000000000000an Unicode character an unary object an unified object an uniform object an uniformly sized object an union an unique object an uniquely named object an unit an universal object an universally unique identifier an usage an use an useful object an useless object an user an username an utility mwic-0.7.8/tests/multiword-awhile.exp0000644000000000000000000000065413564026733017656 0ustar00rootroot00000000000000take awhile: | It can take awhile. ^^^^^^^^^^^ takes awhile: | It takes awhile. ^^^^^^^^^^^^ took awhile: | It took awhile. ^^^^^^^^^^^ after awhile: | It will be done after awhile. ^^^^^^^^^^^^ in awhile: | It will be done in awhile. ^^^^^^^^^ taking awhile: | It's taking awhile. ^^^^^^^^^^^^^ for awhile: | Please wait for awhile. ^^^^^^^^^^ mwic-0.7.8/tests/multiword-awhile.txt0000644000000000000000000000023313564026733017672 0ustar00rootroot00000000000000It can take awhile. It takes awhile. It took awhile. It's taking awhile. It will be done after awhile. It will be done in awhile. Please wait for awhile. mwic-0.7.8/tests/multiword-be-be.exp0000644000000000000000000000113013564026733017345 0ustar00rootroot00000000000000He's is: | He's is a duplicate. ^^^^^^^ am been: | I am been a duplicate. ^^^^^^^ I'm am: | I'm am a duplicate. ^^^^^^ is been: | It is been a duplicate. ^^^^^^^ being being: | It is being being a duplicate. ^^^^^^^^^^^ was been: | It was been a duplicate. ^^^^^^^^ It's is: | It's is a duplicate. ^^^^^^^ She's is: | She's is a duplicate. ^^^^^^^^ were been: | They were been a duplicate. ^^^^^^^^^ They're are: | They're are duplicates. ^^^^^^^^^^^ We're are: | We're are duplicates. ^^^^^^^^^ You're are: | You're are a duplicate. ^^^^^^^^^^ mwic-0.7.8/tests/multiword-be-be.txt0000644000000000000000000000123213564026733017373 0ustar00rootroot00000000000000I'm am a duplicate. You're are a duplicate. He's is a duplicate. She's is a duplicate. It's is a duplicate. We're are duplicates. They're are duplicates. I am been a duplicate. It is been a duplicate. It was been a duplicate. They were been a duplicate. He's been different. She's been different. It's been different. I'm being different. I am being different. He's being different. She's being different. It's being different. It is being different. It was being different. You're being different. We're being different. They're being different. They were being different. It will be being different. It has been being different. It is being being a duplicate. mwic-0.7.8/tests/multiword-be-consisted-of.exp0000644000000000000000000000431613564026733021365 0ustar00rootroot00000000000000He's consisted of: | He's consisted of two parts. ^^^^^^^^^^^^^^^^^ He's not consisted of: | He's not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^ ain't consisted of: | I ain't consisted of two parts. ^^^^^^^^^^^^^^^^^^ am consisted of: | I am consisted of two parts. ^^^^^^^^^^^^^^^ I'm consisted of: | I'm consisted of two parts. ^^^^^^^^^^^^^^^^ I'm not consisted of: | I'm not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^ been consisted of: | It has been consisted of two parts. ^^^^^^^^^^^^^^^^^ being consisted of: | It is being consisted of two parts. ^^^^^^^^^^^^^^^^^^ is consisted of: | It is consisted of two parts. ^^^^^^^^^^^^^^^ is not consisted of: | It is not consisted of two parts. ^^^^^^^^^^^^^^^^^^^ isn't consisted of: | It isn't consisted of two parts. ^^^^^^^^^^^^^^^^^^ was consisted of: | It was consisted of two parts. ^^^^^^^^^^^^^^^^ was not consisted of: | It was not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^ wasn't consisted of: | It wasn't consisted of two parts. ^^^^^^^^^^^^^^^^^^^ be consisted of: | It will be consisted of two parts. ^^^^^^^^^^^^^^^ It's consisted of: | It's consisted of two parts. ^^^^^^^^^^^^^^^^^ It's not consisted of: | It's not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^ She's consisted of: | She's consisted of two parts. ^^^^^^^^^^^^^^^^^^ She's not consisted of: | She's not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^^ were consisted of: | They were consisted of two parts. ^^^^^^^^^^^^^^^^^ were not consisted of: | They were not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^ weren't consisted of: | They weren't consisted of two parts. ^^^^^^^^^^^^^^^^^^^^ They're consisted of: | They're consisted of two parts. ^^^^^^^^^^^^^^^^^^^^ They're not consisted of: | They're not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^^^^ We're consisted of: | We're consisted of two parts. ^^^^^^^^^^^^^^^^^^ We're not consisted of: | We're not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^^ You're consisted of: | You're consisted of two parts. ^^^^^^^^^^^^^^^^^^^ You're not consisted of: | You're not consisted of two parts. ^^^^^^^^^^^^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-be-consisted-of.txt0000644000000000000000000000163113564026733021405 0ustar00rootroot00000000000000I'm consisted of two parts. I am consisted of two parts. He's consisted of two parts. She's consisted of two parts. It's consisted of two parts. It is consisted of two parts. It was consisted of two parts. You're consisted of two parts. We're consisted of two parts. They're consisted of two parts. They were consisted of two parts. It will be consisted of two parts. It has been consisted of two parts. It is being consisted of two parts. I'm not consisted of two parts. I ain't consisted of two parts. He's not consisted of two parts. She's not consisted of two parts. It's not consisted of two parts. It isn't consisted of two parts. It is not consisted of two parts. It wasn't consisted of two parts. It was not consisted of two parts. You're not consisted of two parts. We're not consisted of two parts. They're not consisted of two parts. They weren't consisted of two parts. They were not consisted of two parts. mwic-0.7.8/tests/multiword-be-disable.exp0000644000000000000000000000302213564026733020364 0ustar00rootroot00000000000000He's disable: | He's disable. ^^^^^^^^^^^^ He's not disable: | He's not disable. ^^^^^^^^^^^^^^^^ ain't disable: | I ain't disable. ^^^^^^^^^^^^^ am disable: | I am disable. ^^^^^^^^^^ I'm disable: | I'm disable. ^^^^^^^^^^^ I'm not disable: | I'm not disable. ^^^^^^^^^^^^^^^ been disable: | It has been disable. ^^^^^^^^^^^^ being disable: | It is being disable. ^^^^^^^^^^^^^ is disable: | It is disable. ^^^^^^^^^^ is not disable: | It is not disable. ^^^^^^^^^^^^^^ isn't disable: | It isn't disable. ^^^^^^^^^^^^^ was disable: | It was disable. ^^^^^^^^^^^ was not disable: | It was not disable. ^^^^^^^^^^^^^^^ wasn't disable: | It wasn't disable. ^^^^^^^^^^^^^^ be disable: | It will be disable. ^^^^^^^^^^ It's disable: | It's disable. ^^^^^^^^^^^^ It's not disable: | It's not disable. ^^^^^^^^^^^^^^^^ She's disable: | She's disable. ^^^^^^^^^^^^^ She's not disable: | She's not disable. ^^^^^^^^^^^^^^^^^ were disable: | They were disable. ^^^^^^^^^^^^ were not disable: | They were not disable. ^^^^^^^^^^^^^^^^ weren't disable: | They weren't disable. ^^^^^^^^^^^^^^^ They're disable: | They're disable. ^^^^^^^^^^^^^^^ They're not disable: | They're not disable. ^^^^^^^^^^^^^^^^^^^ We're disable: | We're disable. ^^^^^^^^^^^^^ We're not disable: | We're not disable. ^^^^^^^^^^^^^^^^^ You're disable: | You're disable. ^^^^^^^^^^^^^^ You're not disable: | You're not disable. ^^^^^^^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-be-disable.txt0000644000000000000000000000076513564026733020422 0ustar00rootroot00000000000000I'm disable. I am disable. He's disable. She's disable. It's disable. It is disable. It was disable. You're disable. We're disable. They're disable. They were disable. It will be disable. It has been disable. It is being disable. I'm not disable. I ain't disable. He's not disable. She's not disable. It's not disable. It isn't disable. It is not disable. It wasn't disable. It was not disable. You're not disable. We're not disable. They're not disable. They weren't disable. They were not disable. mwic-0.7.8/tests/multiword-be-enable.exp0000644000000000000000000000267613564026733020225 0ustar00rootroot00000000000000He's enable: | He's enable. ^^^^^^^^^^^ He's not enable: | He's not enable. ^^^^^^^^^^^^^^^ ain't enable: | I ain't enable. ^^^^^^^^^^^^ am enable: | I am enable. ^^^^^^^^^ I'm enable: | I'm enable. ^^^^^^^^^^ I'm not enable: | I'm not enable. ^^^^^^^^^^^^^^ been enable: | It has been enable. ^^^^^^^^^^^ being enable: | It is being enable. ^^^^^^^^^^^^ is enable: | It is enable. ^^^^^^^^^ is not enable: | It is not enable. ^^^^^^^^^^^^^ isn't enable: | It isn't enable. ^^^^^^^^^^^^ was enable: | It was enable. ^^^^^^^^^^ was not enable: | It was not enable. ^^^^^^^^^^^^^^ wasn't enable: | It wasn't enable. ^^^^^^^^^^^^^ be enable: | It will be enable. ^^^^^^^^^ It's enable: | It's enable. ^^^^^^^^^^^ It's not enable: | It's not enable. ^^^^^^^^^^^^^^^ She's enable: | She's enable. ^^^^^^^^^^^^ She's not enable: | She's not enable. ^^^^^^^^^^^^^^^^ were enable: | They were enable. ^^^^^^^^^^^ were not enable: | They were not enable. ^^^^^^^^^^^^^^^ weren't enable: | They weren't enable. ^^^^^^^^^^^^^^ They're enable: | They're enable. ^^^^^^^^^^^^^^ They're not enable: | They're not enable. ^^^^^^^^^^^^^^^^^^ We're enable: | We're enable. ^^^^^^^^^^^^ We're not enable: | We're not enable. ^^^^^^^^^^^^^^^^ You're enable: | You're enable. ^^^^^^^^^^^^^ You're not enable: | You're not enable. ^^^^^^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-be-enable.txt0000644000000000000000000000073113564026733020236 0ustar00rootroot00000000000000I'm enable. I am enable. He's enable. She's enable. It's enable. It is enable. It was enable. You're enable. We're enable. They're enable. They were enable. It will be enable. It has been enable. It is being enable. I'm not enable. I ain't enable. He's not enable. She's not enable. It's not enable. It isn't enable. It is not enable. It wasn't enable. It was not enable. You're not enable. We're not enable. They're not enable. They weren't enable. They were not enable. mwic-0.7.8/tests/multiword-be-ran.exp0000644000000000000000000000230213564026733017541 0ustar00rootroot00000000000000He's not ran: | He's not ran. ^^^^^^^^^^^^ He's ran: | He's ran. ^^^^^^^^ ain't ran: | I ain't ran. ^^^^^^^^^ am ran: | I am ran. ^^^^^^ I'm not ran: | I'm not ran. ^^^^^^^^^^^ I'm ran: | I'm ran. ^^^^^^^ been ran: | It has been ran. ^^^^^^^^ being ran: | It is being ran. ^^^^^^^^^ is not ran: | It is not ran. ^^^^^^^^^^ is ran: | It is ran. ^^^^^^ isn't ran: | It isn't ran. ^^^^^^^^^ was not ran: | It was not ran. ^^^^^^^^^^^ was ran: | It was ran. ^^^^^^^ wasn't ran: | It wasn't ran. ^^^^^^^^^^ be ran: | It will be ran. ^^^^^^ It's not ran: | It's not ran. ^^^^^^^^^^^^ It's ran: | It's ran. ^^^^^^^^ She's not ran: | She's not ran. ^^^^^^^^^^^^^ She's ran: | She's ran. ^^^^^^^^^ were not ran: | They were not ran. ^^^^^^^^^^^^ were ran: | They were ran. ^^^^^^^^ weren't ran: | They weren't ran. ^^^^^^^^^^^ They're not ran: | They're not ran. ^^^^^^^^^^^^^^^ They're ran: | They're ran. ^^^^^^^^^^^ We're not ran: | We're not ran. ^^^^^^^^^^^^^ We're ran: | We're ran. ^^^^^^^^^ You're not ran: | You're not ran. ^^^^^^^^^^^^^^ You're ran: | You're ran. ^^^^^^^^^^ mwic-0.7.8/tests/multiword-be-ran.txt0000644000000000000000000000060513564026733017570 0ustar00rootroot00000000000000I'm ran. I am ran. He's ran. She's ran. It's ran. It is ran. It was ran. You're ran. We're ran. They're ran. They were ran. It will be ran. It has been ran. It is being ran. I'm not ran. I ain't ran. He's not ran. She's not ran. It's not ran. It isn't ran. It is not ran. It wasn't ran. It was not ran. You're not ran. We're not ran. They're not ran. They weren't ran. They were not ran. mwic-0.7.8/tests/multiword-be-suppose.exp0000644000000000000000000000345213564026733020466 0ustar00rootroot00000000000000He's not suppose: | He's not suppose to return. ^^^^^^^^^^^^^^^^ He's suppose: | He's suppose to return. ^^^^^^^^^^^^ ain't suppose: | I ain't suppose to return. ^^^^^^^^^^^^^ am suppose: | I am suppose to return. ^^^^^^^^^^ I'm not suppose: | I'm not suppose to return. ^^^^^^^^^^^^^^^ I'm suppose: | I'm suppose to return. ^^^^^^^^^^^ been suppose: | It has been suppose to return. ^^^^^^^^^^^^ being suppose: | It is being suppose to return. ^^^^^^^^^^^^^ is not suppose: | It is not suppose to return. ^^^^^^^^^^^^^^ is suppose: | It is suppose to return. ^^^^^^^^^^ isn't suppose: | It isn't suppose to return. ^^^^^^^^^^^^^ was not suppose: | It was not suppose to return. ^^^^^^^^^^^^^^^ was suppose: | It was suppose to return. ^^^^^^^^^^^ wasn't suppose: | It wasn't suppose to return. ^^^^^^^^^^^^^^ be suppose: | It will be suppose to return. ^^^^^^^^^^ It's not suppose: | It's not suppose to return. ^^^^^^^^^^^^^^^^ It's suppose: | It's suppose to return. ^^^^^^^^^^^^ She's not suppose: | She's not suppose to return. ^^^^^^^^^^^^^^^^^ She's suppose: | She's suppose to return. ^^^^^^^^^^^^^ were not suppose: | They were not suppose to return. ^^^^^^^^^^^^^^^^ were suppose: | They were suppose to return. ^^^^^^^^^^^^ weren't suppose: | They weren't suppose to return. ^^^^^^^^^^^^^^^ They're not suppose: | They're not suppose to return. ^^^^^^^^^^^^^^^^^^^ They're suppose: | They're suppose to return. ^^^^^^^^^^^^^^^ We're not suppose: | We're not suppose to return. ^^^^^^^^^^^^^^^^^ We're suppose: | We're suppose to return. ^^^^^^^^^^^^^ You're not suppose: | You're not suppose to return. ^^^^^^^^^^^^^^^^^^ You're suppose: | You're suppose to return. ^^^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-be-suppose.txt0000644000000000000000000000141513564026733020506 0ustar00rootroot00000000000000I'm suppose to return. I am suppose to return. He's suppose to return. She's suppose to return. It's suppose to return. It is suppose to return. It was suppose to return. You're suppose to return. We're suppose to return. They're suppose to return. They were suppose to return. It will be suppose to return. It has been suppose to return. It is being suppose to return. I'm not suppose to return. I ain't suppose to return. He's not suppose to return. She's not suppose to return. It's not suppose to return. It isn't suppose to return. It is not suppose to return. It wasn't suppose to return. It was not suppose to return. You're not suppose to return. We're not suppose to return. They're not suppose to return. They weren't suppose to return. They were not suppose to return. mwic-0.7.8/tests/multiword-blu-ray.exp0000644000000000000000000000016613564026733017756 0ustar00rootroot00000000000000Blu-Ray: | Blu-Ray ^^^^^^^ Blue-Ray: | Blue-Ray ^^^^^^^^ Blue-ray: | Blue-ray ^^^^^^^^ blu: | blu-ray ^^^ mwic-0.7.8/tests/multiword-blu-ray.txt0000644000000000000000000000005213564026733017773 0ustar00rootroot00000000000000Blue-Ray Blue-ray Blu-Ray Blu-ray blu-ray mwic-0.7.8/tests/multiword-can-not.exp0000644000000000000000000000005513564026733017737 0ustar00rootroot00000000000000can not: | I can not look good. ^^^^^^^ mwic-0.7.8/tests/multiword-can-not.txt0000644000000000000000000000014313564026733017760 0ustar00rootroot00000000000000I can not look good. I cannot look good. I can not only look good, but I can look respectable too. mwic-0.7.8/tests/multiword-comprised-of.exp0000644000000000000000000000007713564026733020773 0ustar00rootroot00000000000000comprised of: | ... is comprised of ... ^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-comprised-of.txt0000644000000000000000000000003013564026733021003 0ustar00rootroot00000000000000... is comprised of ... mwic-0.7.8/tests/multiword-dont-user.exp0000644000000000000000000000013213564026733020314 0ustar00rootroot00000000000000doesn't user: | doesn't user it ^^^^^^^^^^^^ don't user: | don't user it ^^^^^^^^^^ mwic-0.7.8/tests/multiword-dont-user.txt0000644000000000000000000000003613564026733020342 0ustar00rootroot00000000000000don't user it doesn't user it mwic-0.7.8/tests/multiword-e.g.exp0000644000000000000000000000015413564026733017051 0ustar00rootroot00000000000000e.g: | food (e.g, sausage) ^^^ i.e: | sausage (i.e, highly seasoned minced meat… ^^^ mwic-0.7.8/tests/multiword-e.g.txt0000644000000000000000000000024713564026733017077 0ustar00rootroot00000000000000food (e.g, sausage) food (e.g., sausage) sausage (i.e, highly seasoned minced meat stuffed in casings) sausage (i.e., highly seasoned minced meat stuffed in casings) mwic-0.7.8/tests/multiword-e.t.c.exp0000644000000000000000000000007313564026733017307 0ustar00rootroot00000000000000e.t.c: | events, actions, e.t.c. ^^^^^ mwic-0.7.8/tests/multiword-e.t.c.txt0000644000000000000000000000005613564026733017333 0ustar00rootroot00000000000000events, actions, e.t.c. events, actions, etc. mwic-0.7.8/tests/multiword-each-others.exp0000644000000000000000000000013713564026733020603 0ustar00rootroot00000000000000each others: | ... use each others files. | ... use each others' files. ^^^^^^^^^^^ mwic-0.7.8/tests/multiword-each-others.txt0000644000000000000000000000012313564026733020621 0ustar00rootroot00000000000000... use each others files. ... use each others' files. ... use each other's files. mwic-0.7.8/tests/multiword-easy-of.exp0000644000000000000000000000010213564026733017734 0ustar00rootroot00000000000000easy of: | flexibility and easy of use ^^^^^^^ mwic-0.7.8/tests/multiword-easy-of.txt0000644000000000000000000000007013564026733017763 0ustar00rootroot00000000000000flexibility and easy of use flexibility and ease of use mwic-0.7.8/tests/multiword-else-then.exp0000644000000000000000000000007313564026733020264 0ustar00rootroot00000000000000else then: | anything else then eggs ^^^^^^^^^ mwic-0.7.8/tests/multiword-else-then.txt0000644000000000000000000000006013564026733020303 0ustar00rootroot00000000000000anything else then eggs anything else than eggs mwic-0.7.8/tests/multiword-even-tough.exp0000644000000000000000000000006413564026733020461 0ustar00rootroot00000000000000even tough: | ..., even tough... ^^^^^^^^^^ mwic-0.7.8/tests/multiword-even-tough.txt0000644000000000000000000000005013564026733020477 0ustar00rootroot00000000000000..., even tough... ..., even though... mwic-0.7.8/tests/multiword-fist-time.exp0000644000000000000000000000005713564026733020303 0ustar00rootroot00000000000000fist time: | The fist time... ^^^^^^^^^ mwic-0.7.8/tests/multiword-fist-time.txt0000644000000000000000000000002113564026733020315 0ustar00rootroot00000000000000The fist time... mwic-0.7.8/tests/multiword-gpl.exp0000644000000000000000000000105713564026733017165 0ustar00rootroot00000000000000GNU Lesser Public License: | GNU Lesser Public License ^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Library Public License: | GNU Library Public License ^^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Public License: | GNU Public License ^^^^^^^^^^^^^^^^^^ either version 2 of the License: | either version 2 of the License. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Licence: | GNU Public Licence | GNU Lesser Public Licence | GNU Library Public Licence | either version 2 of the Licence, or... | either version 2 of the Licence. ^^^^^^^ mwic-0.7.8/tests/multiword-gpl.txt0000644000000000000000000000044413564026733017207 0ustar00rootroot00000000000000GNU Lesser Public Licence GNU Lesser Public License GNU Library Public Licence GNU Library Public License GNU Public Licence GNU Public License either version 2 of the Licence. either version 2 of the License. either version 2 of the Licence, or... either version 2 of the License, or... mwic-0.7.8/tests/multiword-gpl@en-GB.exp0000644000000000000000000000124213564026733020072 0ustar00rootroot00000000000000GNU Lesser Public Licence: | GNU Lesser Public Licence ^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Lesser Public License: | GNU Lesser Public License ^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Library Public Licence: | GNU Library Public Licence ^^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Library Public License: | GNU Library Public License ^^^^^^^^^^^^^^^^^^^^^^^^^^ GNU Public Licence: | GNU Public Licence ^^^^^^^^^^^^^^^^^^ GNU Public License: | GNU Public License ^^^^^^^^^^^^^^^^^^ either version 2 of the Licence: | either version 2 of the Licence. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ either version 2 of the License: | either version 2 of the License. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-is-t.alt0000644000000000000000000000073613564026733017246 0ustar00rootroot00000000000000ai’t: | It ai’t ... ^^^^ could’t: | It could’t ... ^^^^^^^ has’t: | It has’t ... ^^^^^ is’t: | It is’t ... ^^^^ should’t: | It should’t ... ^^^^^^^^ was’t: | It was’t ... ^^^^^ would’t: | It would’t ... ^^^^^^^ are’t: | They are’t ... ^^^^^ do’t: | They do’t ... ^^^^ were’t: | They were’t ... ^^^^^^ doen: | It doen’t ... ^^^^ hav: | They hav’t ... ^^^ mwic-0.7.8/tests/multiword-is-t.exp0000644000000000000000000000073013564026733017254 0ustar00rootroot00000000000000could’t: | It could’t ... ^^^^^^^ has’t: | It has’t ... ^^^^^ is’t: | It is’t ... ^^^^ should’t: | It should’t ... ^^^^^^^^ was’t: | It was’t ... ^^^^^ would’t: | It would’t ... ^^^^^^^ are’t: | They are’t ... ^^^^^ do’t: | They do’t ... ^^^^ were’t: | They were’t ... ^^^^^^ ai: | It ai’t ... ^^ doen: | It doen’t ... ^^^^ hav: | They hav’t ... ^^^ mwic-0.7.8/tests/multiword-is-t.txt0000644000000000000000000000030513564026733017275 0ustar00rootroot00000000000000It ai’t ... It is’t ... It was’t ... They are’t ... They were’t ... It doen’t ... They do’t ... It has’t ... They hav’t ... It could’t ... It should’t ... It would’t ... mwic-0.7.8/tests/multiword-iso.exp0000644000000000000000000000021413564026733017167 0ustar00rootroot00000000000000ISO-5589: | ISO-5589-1 ^^^^^^^^ ISO-8559: | ISO-8559-1 ^^^^^^^^ ISO-8858: | ISO-8858-1 ^^^^^^^^ ISO-8895: | ISO-8895-1 ^^^^^^^^ mwic-0.7.8/tests/multiword-iso.txt0000644000000000000000000000007013564026733017212 0ustar00rootroot00000000000000ISO-8858-1 ISO-8895-1 ISO-8559-1 ISO-5589-1 ISO-8859-1 mwic-0.7.8/tests/multiword-it-us.exp0000644000000000000000000000003613564026733017440 0ustar00rootroot00000000000000It us: | It us okay. ^^^^^ mwic-0.7.8/tests/multiword-it-us.txt0000644000000000000000000000005513564026733017464 0ustar00rootroot00000000000000It us okay. Is it us? Was it us? Were it us? mwic-0.7.8/tests/multiword-its-goal.exp0000644000000000000000000000013613564026733020117 0ustar00rootroot00000000000000It's goal: | It's goal is to… ^^^^^^^^^ It’s goal: | It’s goal is to… ^^^^^^^^^ mwic-0.7.8/tests/multiword-its-goal.txt0000644000000000000000000000007213564026733020141 0ustar00rootroot00000000000000It's goal is to… It’s goal is to… Its goal is to… mwic-0.7.8/tests/multiword-its-not.exp0000644000000000000000000000005013564026733017770 0ustar00rootroot00000000000000its not: | its not an object ^^^^^^^ mwic-0.7.8/tests/multiword-its-not.txt0000644000000000000000000000007713564026733020024 0ustar00rootroot00000000000000its not an object its not-yet-processed object it's an object mwic-0.7.8/tests/multiword-its-own.exp0000644000000000000000000000012213564026733017773 0ustar00rootroot00000000000000it's own: | on it's own ^^^^^^^^ it’s own: | on it’s own ^^^^^^^^ mwic-0.7.8/tests/multiword-its-own.txt0000644000000000000000000000004513564026733020022 0ustar00rootroot00000000000000on it's own on it’s own on its own mwic-0.7.8/tests/multiword-its.exp0000644000000000000000000000014413564026733017176 0ustar00rootroot00000000000000its a: | its a user ^^^^^ its an: | its an object ^^^^^^ its the: | its the object ^^^^^^^ mwic-0.7.8/tests/multiword-its.txt0000644000000000000000000000012413564026733017217 0ustar00rootroot00000000000000its a user its an object its the object it's a user it's an object it's the object mwic-0.7.8/tests/multiword-know-as.exp0000644000000000000000000000334613564026733017765 0ustar00rootroot00000000000000He's know as: | He's know as ... ^^^^^^^^^^^^ He's not know as: | He's not know as ... ^^^^^^^^^^^^^^^^ ain't know as: | I ain't know as ... ^^^^^^^^^^^^^ am know as: | I am know as ... ^^^^^^^^^^ I'm know as: | I'm know as ... ^^^^^^^^^^^ I'm not know as: | I'm not know as ... ^^^^^^^^^^^^^^^ been know as: | It has been know as ... ^^^^^^^^^^^^ also know as: | It is also know as ... ^^^^^^^^^^^^ being know as: | It is being know as ... ^^^^^^^^^^^^^ better know as: | It is better know as ... ^^^^^^^^^^^^^^ is know as: | It is know as ... ^^^^^^^^^^ is not know as: | It is not know as ... ^^^^^^^^^^^^^^ isn't know as: | It isn't know as ... ^^^^^^^^^^^^^ was know as: | It was know as ... ^^^^^^^^^^^ was not know as: | It was not know as ... ^^^^^^^^^^^^^^^ wasn't know as: | It wasn't know as ... ^^^^^^^^^^^^^^ be know as: | It will be know as ... ^^^^^^^^^^ It's know as: | It's know as ... ^^^^^^^^^^^^ It's not know as: | It's not know as ... ^^^^^^^^^^^^^^^^ She's know as: | She's know as ... ^^^^^^^^^^^^^ She's not know as: | She's not know as ... ^^^^^^^^^^^^^^^^^ were know as: | They were know as ... ^^^^^^^^^^^^ were not know as: | They were not know as ... ^^^^^^^^^^^^^^^^ weren't know as: | They weren't know as ... ^^^^^^^^^^^^^^^ They're know as: | They're know as ... ^^^^^^^^^^^^^^^ They're not know as: | They're not know as ... ^^^^^^^^^^^^^^^^^^^ We're know as: | We're know as ... ^^^^^^^^^^^^^ We're not know as: | We're not know as ... ^^^^^^^^^^^^^^^^^ You're know as: | You're know as ... ^^^^^^^^^^^^^^ You're not know as: | You're not know as ... ^^^^^^^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-know-as.txt0000644000000000000000000000117313564026733020004 0ustar00rootroot00000000000000I'm know as ... I am know as ... He's know as ... She's know as ... It's know as ... It is know as ... It was know as ... You're know as ... We're know as ... They're know as ... They were know as ... It will be know as ... It has been know as ... It is being know as ... I'm not know as ... I ain't know as ... He's not know as ... She's not know as ... It's not know as ... It isn't know as ... It is not know as ... It wasn't know as ... It was not know as ... You're not know as ... We're not know as ... They're not know as ... They weren't know as ... They were not know as ... It is also know as ... It is better know as ... mwic-0.7.8/tests/multiword-let-s.exp0000644000000000000000000000205413564026733017425 0ustar00rootroot00000000000000let's a: | It let's a user go. ^^^^^^^ let's an: | It let's an individual go. ^^^^^^^^ let's her: | It let's her go. ^^^^^^^^^ let's him: | It let's him go. ^^^^^^^^^ let's it: | It let's it go. ^^^^^^^^ let's me: | It let's me go. ^^^^^^^^ let's one: | It let's one go. ^^^^^^^^^ let's the: | It let's the user go. ^^^^^^^^^ let's them: | It let's them go. ^^^^^^^^^^ let's us: | It let's us go. ^^^^^^^^ let's you: | It let's you go. ^^^^^^^^^ let’s a: | It let’s a user go. ^^^^^^^ let’s an: | It let’s an individual go. ^^^^^^^^ let’s her: | It let’s her go. ^^^^^^^^^ let’s him: | It let’s him go. ^^^^^^^^^ let’s it: | It let’s it go. ^^^^^^^^ let’s me: | It let’s me go. ^^^^^^^^ let’s one: | It let’s one go. ^^^^^^^^^ let’s the: | It let’s the user go. ^^^^^^^^^ let’s them: | It let’s them go. ^^^^^^^^^^ let’s us: | It let’s us go. ^^^^^^^^ let’s you: | It let’s you go. ^^^^^^^^^ mwic-0.7.8/tests/multiword-let-s.txt0000644000000000000000000000116613564026733017453 0ustar00rootroot00000000000000It let's one go. It let’s one go. It lets one go. It let's me go. It let’s me go. It lets me go. It let's you go. It let’s you go. It lets you go. It let's him go. It let’s him go. It lets him go. It let's her go. It let’s her go. It lets her go. It let's it go. It let’s it go. It lets it go. It let's us go. It let’s us go. It lets us go. It let's them go. It let’s them go. It lets them go. It let's a user go. It let’s a user go. It lets a user go. It let's an individual go. It let’s an individual go. It lets an individual go. It let's the user go. It let’s the user go. It lets the user go. mwic-0.7.8/tests/multiword-lots-of.exp0000644000000000000000000000005113564026733017757 0ustar00rootroot00000000000000lot's of: | lot's of objects ^^^^^^^^ mwic-0.7.8/tests/multiword-lots-of.txt0000644000000000000000000000004113564026733020001 0ustar00rootroot00000000000000lot's of objects lots of objects mwic-0.7.8/tests/multiword-none-existent.exp0000644000000000000000000000006013564026733021174 0ustar00rootroot00000000000000none existent: | none existent ^^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-none-existent.txt0000644000000000000000000000001613564026733021220 0ustar00rootroot00000000000000none existent mwic-0.7.8/tests/multiword-none-the-less.exp0000644000000000000000000000022013564026733021053 0ustar00rootroot00000000000000never the less: | never the less ^^^^^^^^^^^^^^ non the less: | non the less ^^^^^^^^^^^^ none the less: | none the less ^^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-none-the-less.txt0000644000000000000000000000010413564026733021077 0ustar00rootroot00000000000000non the less none the less nonetheless never the less nevertheless mwic-0.7.8/tests/multiword-nt-not.exp0000644000000000000000000000166513564026733017627 0ustar00rootroot00000000000000ain't not: | I ain't not duplicate. ^^^^^^^^^ couldn't not: | It couldn't not duplicate. ^^^^^^^^^^^^ didn't not: | It didn't not duplicate. ^^^^^^^^^^ doesn't not: | It doesn't not duplicate. ^^^^^^^^^^^ hasn't not: | It hasn't not duplicated. ^^^^^^^^^^ isn't not: | It isn't not duplicate. ^^^^^^^^^ mustn't not: | It mustn't not duplicate. ^^^^^^^^^^^ oughtn't not: | It oughtn't not duplicate. ^^^^^^^^^^^^ shouldn't not: | It shouldn't not duplicate. ^^^^^^^^^^^^^ wasn't not: | It wasn't not duplicate. ^^^^^^^^^^ wouldn't not: | It wouldn't not duplicate. ^^^^^^^^^^^^ aren't not: | They aren't not duplicates. ^^^^^^^^^^ can't not: | They can't not be duplicates. ^^^^^^^^^ don't not: | They don't not duplicate. ^^^^^^^^^ haven't not: | They haven't not duplicated. ^^^^^^^^^^^ weren't not: | They weren't not duplicates. ^^^^^^^^^^^ mwic-0.7.8/tests/multiword-nt-not.txt0000644000000000000000000000065213564026733017645 0ustar00rootroot00000000000000I ain't not duplicate. It couldn't not duplicate. It didn't not duplicate. It doesn't not duplicate. It hasn't not duplicated. It isn't not duplicate. It mustn't not duplicate. It oughtn't not duplicate. It shouldn't not duplicate. It wasn't not duplicate. It wouldn't not duplicate. They aren't not duplicates. They can't not be duplicates. They don't not duplicate. They haven't not duplicated. They weren't not duplicates. mwic-0.7.8/tests/multiword-oh-well.exp0000644000000000000000000000006513564026733017750 0ustar00rootroot00000000000000Oh, well: | Oh, well. At least we tried. ^^^^^^^^ mwic-0.7.8/tests/multiword-oh-well.txt0000644000000000000000000000005513564026733017772 0ustar00rootroot00000000000000Oh, well. At least we tried. Oh, well done! mwic-0.7.8/tests/multiword-per-say.exp0000644000000000000000000000003613564026733017757 0ustar00rootroot00000000000000per say: | per say ^^^^^^^ mwic-0.7.8/tests/multiword-per-say.txt0000644000000000000000000000001013564026733017772 0ustar00rootroot00000000000000per say mwic-0.7.8/tests/multiword-pubic-key.exp0000644000000000000000000000011313564026733020263 0ustar00rootroot00000000000000pubic key: | pubic key ^^^^^^^^^ pubic keys: | pubic keys ^^^^^^^^^^ mwic-0.7.8/tests/multiword-pubic-key.txt0000644000000000000000000000005513564026733020313 0ustar00rootroot00000000000000pubic key pubic keys public key public keys mwic-0.7.8/tests/multiword-rational-for.exp0000644000000000000000000000011313564026733020770 0ustar00rootroot00000000000000rational for: | The rational for this convention is... ^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-rational-for.txt0000644000000000000000000000004713564026733021021 0ustar00rootroot00000000000000The rational for this convention is... mwic-0.7.8/tests/multiword-regarding.exp0000644000000000000000000000006313564026733020341 0ustar00rootroot00000000000000regarding to: | regarding to stuff ^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-regarding.txt0000644000000000000000000000004313564026733020362 0ustar00rootroot00000000000000regarding to stuff regarding stuff mwic-0.7.8/tests/multiword-should-of.exp0000644000000000000000000000027413564026733020303 0ustar00rootroot00000000000000could of: | It could of been... ^^^^^^^^ must of: | It must of been... ^^^^^^^ should of: | It should of been... ^^^^^^^^^ would of: | It would of been... ^^^^^^^^ mwic-0.7.8/tests/multiword-should-of.txt0000644000000000000000000000026713564026733020330 0ustar00rootroot00000000000000It could of been... It could of course be... It must of been... It must of course be... It should of been... It should of course be... It would of been... It would of course be... mwic-0.7.8/tests/multiword-since-than.exp0000644000000000000000000000004713564026733020432 0ustar00rootroot00000000000000since than: | since than ^^^^^^^^^^ mwic-0.7.8/tests/multiword-since-than.txt0000644000000000000000000000002613564026733020452 0ustar00rootroot00000000000000since than since then mwic-0.7.8/tests/multiword-sneak-peak.exp0000644000000000000000000000006313564026733020416 0ustar00rootroot00000000000000sneak peak: | A sneak peak into... ^^^^^^^^^^ mwic-0.7.8/tests/multiword-sneak-peak.txt0000644000000000000000000000005213564026733020437 0ustar00rootroot00000000000000A sneak peak into... A sneak peek into... mwic-0.7.8/tests/multiword-some-times.exp0000644000000000000000000000007013564026733020457 0ustar00rootroot00000000000000Some times: | Some times we cannot do it. ^^^^^^^^^^ mwic-0.7.8/tests/multiword-some-times.txt0000644000000000000000000000010213564026733020476 0ustar00rootroot00000000000000Some times we cannot do it. It happens only on some times of day. mwic-0.7.8/tests/multiword-t-he.exp0000644000000000000000000000034013564026733017232 0ustar00rootroot00000000000000fort he: | fort he ^^^^^^^ int he: | int he ^^^^^^ oft he: | oft he ^^^^^^ tot he: | tot he ^^^^^^ FIXME: | # FIXME: ^^^^^ byt: | byt he ^^^ ift: | ift he ^^^ ist: | ist he ^^^ ont: | ont he ^^^ mwic-0.7.8/tests/multiword-t-he.txt0000644000000000000000000000010313564026733017252 0ustar00rootroot00000000000000fort he int he oft he tot he # FIXME: ist he byt he ift he ont he mwic-0.7.8/tests/multiword-the-the.exp0000644000000000000000000000033013564026733017732 0ustar00rootroot00000000000000a a: | a a ^^^ a an: | a an ^^^^ a the: | a the ^^^^^ an a: | an a ^^^^ an an: | an an ^^^^^ an the: | an the ^^^^^^ the a: | the a ^^^^^ the an: | the an ^^^^^^ the the: | the the ^^^^^^^ mwic-0.7.8/tests/multiword-the-the.txt0000644000000000000000000000007013564026733017756 0ustar00rootroot00000000000000a a a an a the an a an an an the the a the an the the mwic-0.7.8/tests/multiword-to-extend.exp0000644000000000000000000000027713564026733020315 0ustar00rootroot00000000000000to an extend: | ... to an extend, ... ^^^^^^^^^^^^ to certain extend: | ... to certain extend... ^^^^^^^^^^^^^^^^^ to some extend: | ... to some extend... ^^^^^^^^^^^^^^ mwic-0.7.8/tests/multiword-to-extend.txt0000644000000000000000000000014213564026733020327 0ustar00rootroot00000000000000... to some extend... ... to certain extend... ... to an extend, ... ... to an extend clause ... mwic-0.7.8/tests/multiword-worst-than.exp0000644000000000000000000000006313564026733020505 0ustar00rootroot00000000000000worst than: | X is worst than Y ^^^^^^^^^^ mwic-0.7.8/tests/multiword-worst-than.txt0000644000000000000000000000004513564026733020530 0ustar00rootroot00000000000000X is worst than Y X is worse than Y mwic-0.7.8/tests/no-dict.txt0000644000000000000000000000006213564026733015732 0ustar00rootroot00000000000000Spam spam spam spam. Lovely spam! Wonderful spam! mwic-0.7.8/tests/no-dict@und.exp0000644000000000000000000000041713564026733016522 0ustar00rootroot00000000000000Lovely: | Lovely spam! ^^^^^^ Spam: | Spam spam spam spam. ^^^^ Wonderful: | Wonderful spam! ^^^^^^^^^ spam: | Spam spam spam spam. | Spam spam spam spam. | Wonderful spam! | Lovely spam! | Spam spam spam spam. ^^^^ mwic-0.7.8/tests/run-tests0000755000000000000000000000245313564026733015534 0ustar00rootroot00000000000000#!/usr/bin/env python3 # Copyright © 2016 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import sys import nose sys.path[0] += '/..' from tests import test_blackbox if __name__ == '__main__': nose.main(addplugins=[test_blackbox.Plugin()]) # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/tests/test_blackbox.py0000644000000000000000000000711213564026733017035 0ustar00rootroot00000000000000# Copyright © 2014-2019 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import glob import io import os import sys import unittest.mock import nose from nose.tools import ( assert_multi_line_equal, ) import lib.cli as M assert_multi_line_equal.__self__.maxDiff = None # pylint: disable=no-member here = os.path.dirname(__file__) here = os.path.relpath(here) def _get_output(path, language): argv = ['mwic', '--language', language, path] binstdout = io.BytesIO() textstdout = io.TextIOWrapper(binstdout, encoding='UTF-8') with unittest.mock.patch.multiple(sys, argv=argv, stdout=textstdout): try: try: M.main() except SystemExit as exc: if exc.code != 0: raise sys.stdout.flush() return binstdout.getvalue().decode('UTF-8') finally: textstdout.close() def _test_text(xpath): assert xpath.endswith('.exp') if '@' in xpath: [ipath, language] = xpath[:-4].rsplit('@') else: language = 'en-US' ipath = xpath[:-4] ipath += '.txt' text = _get_output(ipath, language) with open(xpath, 'rt', encoding='UTF-8') as file: expected = file.read() if expected != text: altxpath = xpath[:-4] + '.alt' try: file = open(altxpath, 'rt', encoding='UTF-8') except FileNotFoundError: pass else: with file: alt_expected = file.read() if alt_expected == text: expected = alt_expected assert_multi_line_equal(expected, text) def test_text(): for xpath in glob.glob(here + '/*.exp'): yield _test_text, xpath test_text.redundant = True # not needed if the plugin is enabled class Plugin(nose.plugins.Plugin): name = 'mwic-plugin' enabled = True def options(self, parser, env): pass def wantFile(self, path): abs_here = os.path.abspath(here) abs_here = os.path.join(abs_here, '') if path.startswith(abs_here) and path.endswith('.exp'): return True def loadTestsFromFile(self, path): if self.wantFile(path): yield TestCase(path) def wantFunction(self, func): if getattr(func, 'redundant', False): return False class TestCase(unittest.TestCase): def __init__(self, path): super().__init__('_test') self.path = os.path.relpath(path) def _test(self): _test_text(self.path) def __str__(self): return self.path # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/tests/test_camel_case.py0000644000000000000000000000354513564026733017332 0ustar00rootroot00000000000000# Copyright © 2016 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from nose.tools import ( assert_equal, ) import lib.text as M def naive_tokenizer(s): offset = 0 for word in s.split(): yield (word, offset) offset += len(word) + 1 tokenize = M.camel_case_tokenizer(naive_tokenizer) def test_tokenizer(): s = 'bacon eggAndSpam EggBaconAndSpam spamSPAM SPAM' r = list(tokenize(s)) assert_equal(r, [ ('bacon', 0), ('egg', 6), ('And', 9), ('Spam', 12), ('Egg', 17), ('Bacon', 20), ('And', 25), ('Spam', 28), ('spam', 33), ('S', 37), ('P', 38), ('A', 39), ('M', 40), ('SPAM', 42), ]) w = r[-1] assert_equal( len(w[0]) + w[1], len(s) ) # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/tests/test_cli.py0000644000000000000000000000327713564026733016027 0ustar00rootroot00000000000000# Copyright © 2019 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import argparse import io import unittest.mock from nose.tools import ( assert_equal, assert_is_instance, assert_not_equal, ) import lib.cli def test_version_action(): action = lib.cli.VersionAction(['--version']) stdout = io.StringIO() ap = argparse.ArgumentParser() with unittest.mock.patch('sys.stdout', stdout): try: action(ap, None, None) raise SystemExit(...) except SystemExit as exc: assert_equal(exc.code, 0) s = stdout.getvalue() assert_is_instance(s, str) assert_not_equal(s, '') # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/tests/test_colors.py0000644000000000000000000000617213564026733016556 0ustar00rootroot00000000000000# Copyright © 2015-2018 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import io import unittest.mock from nose.tools import ( assert_equal, ) from lib import colors as M def with_stdout(encoding): stdout = io.TextIOWrapper( io.BytesIO(), encoding=encoding, ) return unittest.mock.patch('sys.stdout', stdout) @with_stdout('UTF-8') def test_control_characters(): def t(s, x): r = M.escape(s) assert_equal(r, '\x1B[7m' + x + '\x1B[27m') t('\x00', '^@') t('\x01', '^A') t('\x02', '^B') t('\x03', '^C') t('\x04', '^D') t('\x05', '^E') t('\x06', '^F') t('\x07', '^G') t('\x08', '^H') t('\x09', '^I') t('\x0A', '^J') t('\x0B', '^K') t('\x0C', '^L') t('\x0D', '^M') t('\x0E', '^N') t('\x0F', '^O') t('\x10', '^P') t('\x11', '^Q') t('\x12', '^R') t('\x13', '^S') t('\x14', '^T') t('\x15', '^U') t('\x16', '^V') t('\x17', '^W') t('\x18', '^X') t('\x19', '^Y') t('\x1A', '^Z') t('\x1B', '^[') t('\x1C', '^\\') t('\x1D', '^]') t('\x1E', '^^') t('\x1F', '^_') t('\x7F', '^?') t('\x80', '') t('\x81', '') t('\x82', '') t('\x83', '') t('\x84', '') t('\x85', '') t('\x86', '') t('\x87', '') t('\x88', '') t('\x89', '') t('\x8A', '') t('\x8B', '') t('\x8C', '') t('\x8D', '') t('\x8E', '') t('\x8F', '') t('\x90', '') t('\x91', '') t('\x92', '') t('\x93', '') t('\x94', '') t('\x95', '') t('\x96', '') t('\x97', '') t('\x98', '') t('\x99', '') t('\x9A', '') t('\x9B', '') t('\x9C', '') t('\x9D', '') t('\x9E', '') t('\x9F', '') @with_stdout('UTF-8') def test_escape_safe(): def t(s): r = M.escape(s) assert_equal(r, s) t('A') t('Á') # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/tests/test_extdict.py0000644000000000000000000001001713564026733016712 0ustar00rootroot00000000000000# Copyright © 2016 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import contextlib import functools import tempfile from nose.tools import ( assert_in, assert_not_in, ) import lib.extdict as M @contextlib.contextmanager def tmpdict(data): with tempfile.NamedTemporaryFile(prefix='mwic.', suffix='.txt', mode='wt', encoding='ASCII') as file: file.write(data) file.flush() yield file.name def _test_dict(bad, good, *, d): if not isinstance(good, set): raise TypeError if not isinstance(bad, set): raise TypeError for word in bad: assert_in(word, d) for word in good: assert_not_in(word, d) lintian_dict = '''\ # All spelling errors that have been observed "in the wild" in package # descriptions are added here, ... # # Please keep the list sorted (using the en_US locale). abandonned||abandoned portugese||Portuguese upto||up to ''' def test_lintian(): with tmpdict(lintian_dict) as path: d = M.Dictionary(path) t = functools.partial(_test_dict, d=d) t({'abandonned', 'Abandonned', 'ABANDONNED'}, {'abandoned'}) t({'portugese', 'Portugese', 'PORTUGESE'}, {'Portuguese'}) t({'upto', 'Upto', 'UPTO'}, {'up to'}) lintian_case_dict = '''\ # Picky corrections, applied before lowercasing the word. ... # # Please keep the list sorted (using the en_US locale). american||American Debian-Edu||Debian Edu SLang||S-Lang ''' def test_lintian_case(): with tmpdict(lintian_case_dict) as path: d = M.Dictionary(path) t = functools.partial(_test_dict, d=d) t({'american'}, {'American', 'AMERICAN'}) t({'Debian-Edu'}, {'Debian Edu', 'debian-edu', 'DEBIAN-EDU'}) t({'SLang'}, {'S-Lang', 'slang', 'SLANG'}) codespell_dict = '''\ abandonned->abandoned clas->class, disabled because of name clash in c++ intented->intended, indented, ''' def test_codespell(): with tmpdict(codespell_dict) as path: d = M.Dictionary(path) t = functools.partial(_test_dict, d=d) t({'abandonned', 'Abandonned', 'ABANDONNED'}, {'abandoned'}) t({'clas', 'Clas', 'CLAS'}, {'class'}) t({'intented', 'Intented', 'INTENTED'}, {'intended', 'indented'}) kde_dict = '''\ #! /usr/bin/env perl # CORRECTIONS GO IN THE __DATA__ SECTION AT THE END OF THIS SCRIPT # Checks and corrects common spelling errors in text files - ... __DATA__ #INCORRECT SPELLING CORRECTION aasumes assumes #INCORRECT SPELLING CORRECTION Addtional Additional ''' def test_kde(): with tmpdict(kde_dict) as path: d = M.Dictionary(path) t = functools.partial(_test_dict, d=d) t({'aasumes'}, {'assumes'}) # FIXME? 'assumes' t({'Addtional'}, {'Additional'}) # FIXME? 'addtional' plain_dict = '''\ abandonned Portugese ''' def test_plain(): with tmpdict(plain_dict) as path: d = M.Dictionary(path) t = functools.partial(_test_dict, d=d) t({'abandonned', 'Abandonned', 'ABANDONNED'}, {'abandoned'}) t({'Portugese'}, {'portugese', 'PORTUGESE'}) # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/tests/test_trim.py0000644000000000000000000000363213564026733016226 0ustar00rootroot00000000000000# Copyright © 2014-2016 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from nose.tools import ( assert_equal, assert_greater_equal, ) import lib.text as M def test_ltrim(): def t(s, n, expected): result = M.ltrim(s, n) assert_greater_equal( max(1, n), len(result) ) assert_equal(result, expected) truncations = [ '…', '…', '…s', '…gs', 'eggs', 'eggs', ] for n, s in enumerate(truncations): t(truncations[-1], n, s) def test_rtrim(): def t(s, n, expected): result = M.rtrim(s, n) assert_equal(result, expected) truncations = [ '…', '…', 'e…', 'eg…', 'eggs', 'eggs', ] for n, s in enumerate(truncations): t(truncations[-1], n, s) # vim:ts=4 sts=4 sw=4 et mwic-0.7.8/tests/test_version.py0000644000000000000000000000356313564026733016743 0ustar00rootroot00000000000000# Copyright © 2012-2015 Jakub Wilk # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import os from nose.tools import ( assert_equal, ) from lib.cli import __version__ here = os.path.dirname(__file__) docdir = os.path.join(here, os.pardir, 'doc') def test_changelog(): path = os.path.join(docdir, 'changelog') with open(path, 'rt', encoding='UTF-8') as file: line = file.readline() changelog_version = line.split()[1].strip('()') assert_equal(changelog_version, __version__) def test_manpage(): path = os.path.join(docdir, 'manpage.rst') manpage_version = None with open(path, 'rt', encoding='UTF-8') as file: for line in file: if line.startswith(':version:'): manpage_version = line.split()[-1] break assert_equal(manpage_version, __version__) # vim:ts=4 sts=4 sw=4 et