pax_global_header00006660000000000000000000000064140547677440014534gustar00rootroot0000000000000052 comment=06760f3a5e40aa284cb8077762f630f1df210698 pylev-1.4.0/000077500000000000000000000000001405476774400126755ustar00rootroot00000000000000pylev-1.4.0/.gitignore000066400000000000000000000000521405476774400146620ustar00rootroot00000000000000.DS_Store *.pyc build dist env *.egg-info pylev-1.4.0/.travis.yml000066400000000000000000000002251405476774400150050ustar00rootroot00000000000000language: python sudo: false python: - "2.7" - "3.7" - "3.8" - "3.9" - "pypy" - "pypy3" # command to run tests script: "python tests.py" pylev-1.4.0/LICENSE000066400000000000000000000027111405476774400137030ustar00rootroot00000000000000Copyright (c) 2012, Daniel Lindsley All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the pylev nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL pylev BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pylev-1.4.0/MANIFEST.in000066400000000000000000000000641405476774400144330ustar00rootroot00000000000000include bench.sh include LICENSE include README.rst pylev-1.4.0/README.rst000066400000000000000000000033041405476774400143640ustar00rootroot00000000000000pylev ===== A pure Python Levenshtein implementation that's not freaking GPL'd. Based off the Wikipedia code samples at http://en.wikipedia.org/wiki/Levenshtein_distance. Requirements ------------ * Python 2.7.X, Python 3.3+ or PyPy 1.6.0+ Usage ----- Usage is fairly straightforward: .. code-block:: python import pylev distance = pylev.levenshtein('kitten', 'sitting') assert distance == 3 License ------- New BSD. Tests ----- Setup:: $ git clone https://github.com/toastdriven/pylev.git $ cd pylev Running:: $ python -m unittest tests .. image:: https://travis-ci.com/toastdriven/pylev.svg?branch=main :target: http://travis-ci.com/toastdriven/pylev Version History --------------- * v1.4.0 * Updated for current versions of Python * Integrated a better Travis matrix. Thanks to @grainert! * Fixed mistaken docs about the `assert`. Thanks to @adamchainz! * Reorganized the package. * Blacked all the source code. * v1.3.0 * Implemented a considerably faster variants (orders of magnitude). * Tested & working on Python 2.7.4, Python 3.3.1 & PyPy 1.9.0. * v1.2.0 * Fixed all incorrect spellings of "Levenshtein" (there's no "c" in it). * Old methods are aliased for backward-compatibility. * v1.1.0 * Implemented a much faster variant (several orders of magnitude). * The older variant was renamed to ``classic_levenschtein``. * Tested & working on Python 3.3 & PyPy 1.6.0 as well. * v1.0.2 * Python packaging is **REALLY** hard. Including the README *this time*. * v1.0.1 * Python packaging is hard. Including the README this time. * v1.0.0 * Initial release, just the naive implementation of Levenshtein. pylev-1.4.0/bench.sh000077500000000000000000000031201405476774400143070ustar00rootroot00000000000000#!/bin/bash echo -ne "py2.7 recursive_levenshtein\t\t" python2.7 -m timeit -s "import pylev" "pylev.recursive_levenshtein('Levenshtein', 'Frankenstein')" echo -ne "py2.7 wf_levenshtein\t\t\t" python2.7 -m timeit -s "import pylev" "pylev.wf_levenshtein('Levenshtein', 'Frankenstein')" echo -ne "py2.7 wfi_levenshtein\t\t\t" python2.7 -m timeit -s "import pylev" "pylev.wfi_levenshtein('Levenshtein', 'Frankenstein')" echo -ne "py2.7 damerau_levenshtein\t\t" python2.7 -m timeit -s "import pylev" "pylev.damerau_levenshtein('Levenshtein', 'Frankenstein')" echo -ne "py3.3 recursive_levenshtein\t\t" python3.3 -m timeit -s "import pylev" "pylev.recursive_levenshtein('Levenshtein', 'Frankenstein')" echo -ne "py3.3 wf_levenshtein\t\t\t" python3.3 -m timeit -s "import pylev" "pylev.wf_levenshtein('Levenshtein', 'Frankenstein')" echo -ne "py3.3 wfi_levenshtein\t\t\t" python3.3 -m timeit -s "import pylev" "pylev.wfi_levenshtein('Levenshtein', 'Frankenstein')" echo -ne "py3.3 damerau_levenshtein\t\t" python3.3 -m timeit -s "import pylev" "pylev.damerau_levenshtein('Levenshtein', 'Frankenstein')" echo -ne "pypy recursive_levenshtein\t\t" pypy -m timeit -s "import pylev" "pylev.recursive_levenshtein('Levenshtein', 'Frankenstein')" echo -ne "pypy wf_levenshtein\t\t\t" pypy -m timeit -s "import pylev" "pylev.wf_levenshtein('Levenshtein', 'Frankenstein')" echo -ne "pypy wfi_levenshtein\t\t\t" pypy -m timeit -s "import pylev" "pylev.wfi_levenshtein('Levenshtein', 'Frankenstein')" echo -ne "pypy damerau_levenshtein\t\t" pypy -m timeit -s "import pylev" "pylev.damerau_levenshtein('Levenshtein', 'Frankenstein')" pylev-1.4.0/pylev/000077500000000000000000000000001405476774400140345ustar00rootroot00000000000000pylev-1.4.0/pylev/__init__.py000066400000000000000000000015771405476774400161570ustar00rootroot00000000000000""" pylev ===== A pure Python Levenshtein implementation that's not freaking GPL'd. Based off the Wikipedia code samples at http://en.wikipedia.org/wiki/Levenshtein_distance. Usage ----- Usage is fairly straightforward.:: import pylev distance = pylev.levenshtein('kitten', 'sitting') assert distance == 3 """ from .classic import classic_levenshtein from .recursive import recursive_levenshtein from .wf import wf_levenshtein, wfi_levenshtein from .damerau import damerau_levenshtein __author__ = "Daniel Lindsley" __version__ = (1, 4, 0) __license__ = "New BSD" levenshtein = wfi_levenshtein # Backward-compatibilty because I misspelled. classic_levenschtein = classic_levenshtein levenschtein = levenshtein __all__ = [ "levenshtein", "classic_levenshtein", "recursive_levenshtein", "wf_levenshtein", "wfi_levenshtein", "damerau_levenshtein", ] pylev-1.4.0/pylev/classic.py000066400000000000000000000016451405476774400160350ustar00rootroot00000000000000def classic_levenshtein(string_1, string_2): """ Calculates the Levenshtein distance between two strings. This version is easier to read, but significantly slower than the version below (up to several orders of magnitude). Useful for learning, less so otherwise. Usage:: >>> classic_levenshtein('kitten', 'sitting') 3 >>> classic_levenshtein('kitten', 'kitten') 0 >>> classic_levenshtein('', '') 0 """ len_1 = len(string_1) len_2 = len(string_2) cost = 0 if len_1 and len_2 and string_1[0] != string_2[0]: cost = 1 if len_1 == 0: return len_2 elif len_2 == 0: return len_1 else: return min( classic_levenshtein(string_1[1:], string_2) + 1, classic_levenshtein(string_1, string_2[1:]) + 1, classic_levenshtein(string_1[1:], string_2[1:]) + cost, ) pylev-1.4.0/pylev/damerau.py000066400000000000000000000034561405476774400160340ustar00rootroot00000000000000import sys PY2 = sys.version_info[0] == 2 if PY2: range = xrange def damerau_levenshtein(string_1, string_2): """ Calculates the Damerau-Levenshtein distance between two strings. In addition to insertions, deletions and substitutions, Damerau-Levenshtein considers adjacent transpositions. This version is based on an iterative version of the Wagner-Fischer algorithm. Usage:: >>> damerau_levenshtein('kitten', 'sitting') 3 >>> damerau_levenshtein('kitten', 'kittne') 1 >>> damerau_levenshtein('', '') 0 """ if string_1 == string_2: return 0 len_1 = len(string_1) len_2 = len(string_2) if len_1 == 0: return len_2 if len_2 == 0: return len_1 if len_1 > len_2: string_2, string_1 = string_1, string_2 len_2, len_1 = len_1, len_2 prev_cost = 0 d0 = [i for i in range(len_2 + 1)] d1 = [j for j in range(len_2 + 1)] dprev = d0[:] s1 = string_1 s2 = string_2 for i in range(len_1): d1[0] = i + 1 for j in range(len_2): cost = d0[j] if s1[i] != s2[j]: # substitution cost += 1 # insertion x_cost = d1[j] + 1 if x_cost < cost: cost = x_cost # deletion y_cost = d0[j + 1] + 1 if y_cost < cost: cost = y_cost # transposition if i > 0 and j > 0 and s1[i] == s2[j - 1] and s1[i - 1] == s2[j]: transp_cost = dprev[j - 1] + 1 if transp_cost < cost: cost = transp_cost d1[j + 1] = cost dprev, d0, d1 = d0, d1, dprev return d0[-1] pylev-1.4.0/pylev/recursive.py000066400000000000000000000024241405476774400164170ustar00rootroot00000000000000def recursive_levenshtein( string_1, string_2, len_1=None, len_2=None, offset_1=0, offset_2=0, memo=None ): """ Calculates the Levenshtein distance between two strings. Usage:: >>> recursive_levenshtein('kitten', 'sitting') 3 >>> recursive_levenshtein('kitten', 'kitten') 0 >>> recursive_levenshtein('', '') 0 """ if len_1 is None: len_1 = len(string_1) if len_2 is None: len_2 = len(string_2) if memo is None: memo = {} key = ",".join([str(offset_1), str(len_1), str(offset_2), str(len_2)]) if memo.get(key) is not None: return memo[key] if len_1 == 0: return len_2 elif len_2 == 0: return len_1 cost = 0 if string_1[offset_1] != string_2[offset_2]: cost = 1 dist = min( recursive_levenshtein( string_1, string_2, len_1 - 1, len_2, offset_1 + 1, offset_2, memo ) + 1, recursive_levenshtein( string_1, string_2, len_1, len_2 - 1, offset_1, offset_2 + 1, memo ) + 1, recursive_levenshtein( string_1, string_2, len_1 - 1, len_2 - 1, offset_1 + 1, offset_2 + 1, memo ) + cost, ) memo[key] = dist return dist pylev-1.4.0/pylev/wf.py000066400000000000000000000044631405476774400150310ustar00rootroot00000000000000import sys PY2 = sys.version_info[0] == 2 if PY2: range = xrange def wf_levenshtein(string_1, string_2): """ Calculates the Levenshtein distance between two strings. This version uses the Wagner-Fischer algorithm. Usage:: >>> wf_levenshtein('kitten', 'sitting') 3 >>> wf_levenshtein('kitten', 'kitten') 0 >>> wf_levenshtein('', '') 0 """ len_1 = len(string_1) + 1 len_2 = len(string_2) + 1 d = [0] * (len_1 * len_2) for i in range(len_1): d[i] = i for j in range(len_2): d[j * len_1] = j for j in range(1, len_2): for i in range(1, len_1): if string_1[i - 1] == string_2[j - 1]: d[i + j * len_1] = d[i - 1 + (j - 1) * len_1] else: d[i + j * len_1] = min( d[i - 1 + j * len_1] + 1, # deletion d[i + (j - 1) * len_1] + 1, # insertion d[i - 1 + (j - 1) * len_1] + 1, # substitution ) return d[-1] def wfi_levenshtein(string_1, string_2): """ Calculates the Levenshtein distance between two strings. This version uses an iterative version of the Wagner-Fischer algorithm. Usage:: >>> wfi_levenshtein('kitten', 'sitting') 3 >>> wfi_levenshtein('kitten', 'kitten') 0 >>> wfi_levenshtein('', '') 0 """ if string_1 == string_2: return 0 len_1 = len(string_1) len_2 = len(string_2) if len_1 == 0: return len_2 if len_2 == 0: return len_1 if len_1 > len_2: string_2, string_1 = string_1, string_2 len_2, len_1 = len_1, len_2 d0 = [i for i in range(len_2 + 1)] d1 = [j for j in range(len_2 + 1)] for i in range(len_1): d1[0] = i + 1 for j in range(len_2): cost = d0[j] if string_1[i] != string_2[j]: # substitution cost += 1 # insertion x_cost = d1[j] + 1 if x_cost < cost: cost = x_cost # deletion y_cost = d0[j + 1] + 1 if y_cost < cost: cost = y_cost d1[j + 1] = cost d0, d1 = d1, d0 return d0[-1] pylev-1.4.0/pyproject.toml000066400000000000000000000001501405476774400156050ustar00rootroot00000000000000[build-system] requires = [ "setuptools>=42", "wheel" ] build-backend = "setuptools.build_meta" pylev-1.4.0/setup.cfg000066400000000000000000000000261405476774400145140ustar00rootroot00000000000000[wheel] universal = 1 pylev-1.4.0/setup.py000066400000000000000000000016111405476774400144060ustar00rootroot00000000000000import os try: from setuptools import setup except ImportError: from distutils.core import setup setup( name="pylev", version="1.4.0", description="A pure Python Levenshtein implementation that's not freaking GPL'd.", author="Daniel Lindsley", author_email="daniel@toastdriven.com", long_description=open( os.path.join(os.path.dirname(__file__), "README.rst"), "r" ).read(), packages=["pylev"], include_package_data=True, zip_safe=False, classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Programming Language :: Python", # That's right, works in Py3 (& PyPy) too! "Programming Language :: Python :: 3", ], url="http://github.com/toastdriven/pylev", ) pylev-1.4.0/tests.py000066400000000000000000000022741405476774400144160ustar00rootroot00000000000000import itertools import unittest import pylev test_data = [ ("classic", "kitten", "sitting", 3), ("same", "kitten", "kitten", 0), ("empty", "", "", 0), ("a", "meilenstein", "levenshtein", 4), ("b", "levenshtein", "frankenstein", 6), ("c", "confide", "deceit", 6), ("d", "CUNsperrICY", "conspiracy", 8), ] test_functions = [ # pylev.classic_levenshtein, # disabled because it is so slow pylev.recursive_levenshtein, pylev.wf_levenshtein, pylev.wfi_levenshtein, ] class Tests(unittest.TestCase): def test_damerau_levenshtein(seld): assert pylev.damerau_levenshtein("ba", "abc") == 2 assert pylev.damerau_levenshtein("foobar", "foobra") == 1 assert pylev.damerau_levenshtein("fee", "deed") == 2 def _mk_test_fn(fn, a, b, expected): def _test_fn(self): self.assertEqual(fn(a, b), expected) self.assertEqual(fn(b, a), expected) return _test_fn for lev_fn, data in itertools.product(test_functions, test_data): name, a, b, expected = data test_fn = _mk_test_fn(lev_fn, a, b, expected) setattr(Tests, "test_%s_%s" % (name, lev_fn.__name__), test_fn) if __name__ == "__main__": unittest.main()