pax_global_header00006660000000000000000000000064120773355620014524gustar00rootroot0000000000000052 comment=f05ed2cd4822461bbbc5711c845b82dc59d08bed pylev-1.2.0/000077500000000000000000000000001207733556200126635ustar00rootroot00000000000000pylev-1.2.0/.gitignore000066400000000000000000000000371207733556200146530ustar00rootroot00000000000000.DS_Store *.pyc build dist env pylev-1.2.0/LICENSE000066400000000000000000000027111207733556200136710ustar00rootroot00000000000000Copyright (c) 2012, Daniel Lindsley All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the pylev nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL pylev BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pylev-1.2.0/MANIFEST000066400000000000000000000000561207733556200140150ustar00rootroot00000000000000LICENSE README.rst pylev.py setup.py tests.py pylev-1.2.0/README.rst000066400000000000000000000023721207733556200143560ustar00rootroot00000000000000pylev ===== A pure Python Levenshtein implementation that's not freaking GPL'd. Based off the Wikipedia code samples at http://en.wikipedia.org/wiki/Levenshtein_distance. Requirements ------------ * Python 2.7.X, Python 3.3+ or PyPy 1.6.0+ Usage ----- Usage is fairly straightforward.:: import pylev distance = pylev.levenshtein('kitten', 'sitting') assert(distance, 3) License ------- New BSD. Tests ----- Setup:: $ git clone https://github.com/toastdriven/pylev.git $ cd pylev $ virtualenv env --distribute $ . env/bin/activate $ pip install unittest2 Running:: $ python -m unittest2 tests Version History --------------- * v1.2.0 * Fixed all incorrect spellings of "Levenshtein" (there's no "c" in it). * Old methods are aliased for backward-compatibility. * v1.1.0 * Implemented a much faster variant (several orders of magnitude). * The older variant was renamed to ``classic_levenschtein``. * Tested & working on Python 3.3 & PyPy 1.6.0 as well. * v1.0.2 * Python packaging is **REALLY** hard. Including the README *this time*. * v1.0.1 * Python packaging is hard. Including the README this time. * v1.0.0 * Initial release, just the naive implementation of Levenshtein.pylev-1.2.0/pylev.py000066400000000000000000000050431207733556200143760ustar00rootroot00000000000000""" pylev ===== A pure Python Levenshtein implementation that's not freaking GPL'd. Based off the Wikipedia code samples at http://en.wikipedia.org/wiki/Levenshtein_distance. Usage ----- Usage is fairly straightforward.:: import pylev distance = pylev.levenshtein('kitten', 'sitting') assert(distance, 3) """ __author__ = 'Daniel Lindsley' __version__ = (1, 2, 0) __license__ = 'New BSD' def classic_levenshtein(string_1, string_2): """ Calculates the Levenshtein distance between two strings. This version is easier to read, but significantly slower than the version below (up to several orders of magnitude). Useful for learning, less so otherwise. Usage:: >>> classic_levenshtein('kitten', 'sitting') 3 >>> classic_levenshtein('kitten', 'kitten') 0 >>> classic_levenshtein('', '') 0 """ len_1 = len(string_1) len_2 = len(string_2) cost = 0 if len_1 and len_2 and string_1[0] != string_2[0]: cost = 1 if len_1 == 0: return len_2 elif len_2 == 0: return len_1 else: return min( classic_levenshtein(string_1[1:], string_2) + 1, classic_levenshtein(string_1, string_2[1:]) + 1, classic_levenshtein(string_1[1:], string_2[1:]) + cost, ) def levenshtein(string_1, string_2, len_1=None, len_2=None, offset_1=0, offset_2=0, memo=None): """ Calculates the Levenshtein distance between two strings. Usage:: >>> levenshtein('kitten', 'sitting') 3 >>> levenshtein('kitten', 'kitten') 0 >>> levenshtein('', '') 0 """ if len_1 is None: len_1 = len(string_1) if len_2 is None: len_2 = len(string_2) if memo is None: memo = {} key = ','.join([str(offset_1), str(len_1), str(offset_2), str(len_2)]) if memo.get(key) is not None: return memo[key] if len_1 == 0: return len_2 elif len_2 == 0: return len_1 cost = 0 if string_1[offset_1] != string_2[offset_2]: cost = 1 dist = min( levenshtein(string_1, string_2, len_1 - 1, len_2, offset_1 + 1, offset_2, memo) + 1, levenshtein(string_1, string_2, len_1, len_2 - 1, offset_1, offset_2 + 1, memo) + 1, levenshtein(string_1, string_2, len_1 - 1, len_2 - 1, offset_1 + 1, offset_2 + 1, memo) + cost, ) memo[key] = dist return dist # Backward-compatibilty because I misspelled. classic_levenschtein = classic_levenshtein levenschtein = levenshtein pylev-1.2.0/setup.py000066400000000000000000000014111207733556200143720ustar00rootroot00000000000000import os from distutils.core import setup setup( name='pylev', version='1.2.0', description="A pure Python Levenshtein implementation that's not freaking GPL'd.", author='Daniel Lindsley', author_email='daniel@toastdriven.com', long_description=open(os.path.join(os.path.dirname(__file__), 'README.rst'), 'r').read(), py_modules=['pylev'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Programming Language :: Python', # That's right, works in Py3 (& PyPy) too! "Programming Language :: Python :: 3", ], url='http://github.com/toastdriven/pylev' ) pylev-1.2.0/tests.py000066400000000000000000000022651207733556200144040ustar00rootroot00000000000000import unittest import pylev class ClassicLevenshteinTestCase(unittest.TestCase): def test_classic(self): self.assertEqual(pylev.classic_levenshtein('kitten', 'sitting'), 3) def test_same(self): self.assertEqual(pylev.classic_levenshtein('kitten', 'kitten'), 0) def test_empty(self): self.assertEqual(pylev.classic_levenshtein('', ''), 0) def test_long(self): self.assertEqual(pylev.classic_levenshtein('confide', 'deceit'), 6) def test_painful(self): # This is pretty slow but should work. self.assertEqual(pylev.classic_levenshtein('CUNsperrICY', 'conspiracy'), 8) class LevenshteinTestCase(unittest.TestCase): def test_classic(self): self.assertEqual(pylev.levenshtein('kitten', 'sitting'), 3) def test_same(self): self.assertEqual(pylev.levenshtein('kitten', 'kitten'), 0) def test_empty(self): self.assertEqual(pylev.levenshtein('', ''), 0) def test_long(self): self.assertEqual(pylev.levenshtein('confide', 'deceit'), 6) def test_painful(self): # This is much faster than the above. self.assertEqual(pylev.levenshtein('CUNsperrICY', 'conspiracy'), 8)