pax_global_header00006660000000000000000000000064150605376410014521gustar00rootroot0000000000000052 comment=235e7758c8ec95fc3a30ef32528ce1fa409c040a leven-4.1.0/000077500000000000000000000000001506053764100126345ustar00rootroot00000000000000leven-4.1.0/.editorconfig000066400000000000000000000002571506053764100153150ustar00rootroot00000000000000root = true [*] indent_style = tab end_of_line = lf charset = utf-8 trim_trailing_whitespace = true insert_final_newline = true [*.yml] indent_style = space indent_size = 2 leven-4.1.0/.gitattributes000066400000000000000000000000231506053764100155220ustar00rootroot00000000000000* text=auto eol=lf leven-4.1.0/.github/000077500000000000000000000000001506053764100141745ustar00rootroot00000000000000leven-4.1.0/.github/security.md000066400000000000000000000002631506053764100163660ustar00rootroot00000000000000# Security Policy To report a security vulnerability, please use the [Tidelift security contact](https://tidelift.com/security). Tidelift will coordinate the fix and disclosure. leven-4.1.0/.github/workflows/000077500000000000000000000000001506053764100162315ustar00rootroot00000000000000leven-4.1.0/.github/workflows/main.yml000066400000000000000000000006261506053764100177040ustar00rootroot00000000000000name: CI on: - push - pull_request jobs: test: name: Node.js ${{ matrix.node-version }} runs-on: ubuntu-latest strategy: fail-fast: false matrix: node-version: - 16 steps: - uses: actions/checkout@v5 - uses: actions/setup-node@v5 with: node-version: ${{ matrix.node-version }} - run: npm install - run: npm test leven-4.1.0/.gitignore000066400000000000000000000000271506053764100146230ustar00rootroot00000000000000node_modules yarn.lock leven-4.1.0/.npmrc000066400000000000000000000000231506053764100137470ustar00rootroot00000000000000package-lock=false leven-4.1.0/bench.js000066400000000000000000000055551506053764100142630ustar00rootroot00000000000000import {Bench} from 'tinybench'; import {levenshteinEditDistance} from 'levenshtein-edit-distance'; import fastLevenshteinPackage from 'fast-levenshtein'; import levenshteinComponent from 'levenshtein-component'; import {computeDistance as ld} from 'ld'; import levdist from 'levdist'; import naturalPackage from 'natural'; import levenshtein from 'levenshtein'; import talisman from 'talisman/metrics/levenshtein.js'; import leven from './index.js'; const fastLevenshtein = fastLevenshteinPackage.get; const natural = naturalPackage.LevenshteinDistance; function run(function_) { function_('a', 'b'); function_('ab', 'ac'); function_('ac', 'bc'); function_('abc', 'axc'); function_('kitten', 'sitting'); function_('xabxcdxxefxgx', '1ab2cd34ef5g6'); function_('cat', 'cow'); function_('xabxcdxxefxgx', 'abcdefg'); function_('javawasneat', 'scalaisgreat'); function_('example', 'samples'); function_('sturgeon', 'urgently'); function_('levenshtein', 'frankenstein'); function_('distance', 'difference'); function_('因為我是中國人所以我會說中文', '因為我是英國人所以我會說英文'); function_( 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim.', 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim.', ); } const bench = new Bench({name: 'leven'}); bench.add('leven', () => { run(leven); }); bench.add('talisman', () => { run(talisman); }); bench.add('levenshtein-edit-distance', () => { run(levenshteinEditDistance); }); bench.add('fast-levenshtein', () => { run(fastLevenshtein); }); bench.add('levenshtein-component', () => { run(levenshteinComponent); }); bench.add('ld', () => { run(ld); }); bench.add('levenshtein', () => { run(levenshtein); }); bench.add('levdist', () => { run(levdist); }); bench.add('natural', () => { run(natural); }); bench.runSync(); console.table(bench.table()); leven-4.1.0/index.d.ts000066400000000000000000000031741506053764100145420ustar00rootroot00000000000000export interface Options { /** Maximum Levenshtein distance to calculate. If the actual distance exceeds this value, the function will return the maximum distance instead of the actual distance. This can significantly improve performance when you only care about matches within a certain threshold. @example ``` import leven from 'leven'; leven('abcdef', '123456', {maxDistance: 3}); //=> 3 leven('cat', 'cow', {maxDistance: 5}); //=> 2 ``` */ readonly maxDistance?: number; } /** Measure the difference between two strings using the Levenshtein distance algorithm. @param first - First string. @param second - Second string. @param options - Options. @returns Distance between `first` and `second`. If `maxDistance` is provided and the actual distance exceeds it, returns `maxDistance`. @example ``` import leven from 'leven'; leven('cat', 'cow'); //=> 2 ``` */ export default function leven(first: string, second: string, options?: Options): number; /** Find the closest matching string from an array of candidates. @param target - The string to find matches for. @param candidates - Array of candidate strings to search through. @param options - Options. @returns The closest matching string from candidates, or `undefined` if no candidates are provided or if no match is found within `maxDistance`. @example ``` import {closestMatch} from 'leven'; closestMatch('kitten', ['sitting', 'kitchen', 'mittens']); //=> 'kitchen' closestMatch('hello', ['jello', 'yellow', 'bellow'], {maxDistance: 2}); //=> 'jello' ``` */ export function closestMatch(target: string, candidates: readonly string[], options?: Options): string | undefined; leven-4.1.0/index.js000066400000000000000000000104161506053764100143030ustar00rootroot00000000000000const array = []; const characterCodeCache = []; export default function leven(first, second, options) { if (first === second) { return 0; } const maxDistance = options?.maxDistance; const swap = first; // Swapping the strings if `a` is longer than `b` so we know which one is the // shortest & which one is the longest if (first.length > second.length) { first = second; second = swap; } let firstLength = first.length; let secondLength = second.length; // Performing suffix trimming: // We can linearly drop suffix common to both strings since they // don't increase distance at all // Note: `~-` is the bitwise way to perform a `- 1` operation while (firstLength > 0 && (first.charCodeAt(~-firstLength) === second.charCodeAt(~-secondLength))) { firstLength--; secondLength--; } // Performing prefix trimming // We can linearly drop prefix common to both strings since they // don't increase distance at all let start = 0; while (start < firstLength && (first.charCodeAt(start) === second.charCodeAt(start))) { start++; } firstLength -= start; secondLength -= start; // Early termination after trimming: if difference in length exceeds max distance if (maxDistance !== undefined && secondLength - firstLength > maxDistance) { return maxDistance; } if (firstLength === 0) { return maxDistance !== undefined && secondLength > maxDistance ? maxDistance : secondLength; } let bCharacterCode; let result; let temporary; let temporary2; let index = 0; let index2 = 0; while (index < firstLength) { characterCodeCache[index] = first.charCodeAt(start + index); array[index] = ++index; } while (index2 < secondLength) { bCharacterCode = second.charCodeAt(start + index2); temporary = index2++; result = index2; for (index = 0; index < firstLength; index++) { temporary2 = bCharacterCode === characterCodeCache[index] ? temporary : temporary + 1; temporary = array[index]; // eslint-disable-next-line no-multi-assign result = array[index] = temporary > result ? (temporary2 > result ? result + 1 : temporary2) : (temporary2 > temporary ? temporary + 1 : temporary2); } // Early termination: if all values in current row exceed maxDistance if (maxDistance !== undefined) { let rowMinimum = result; for (index = 0; index < firstLength; index++) { if (array[index] < rowMinimum) { rowMinimum = array[index]; } } if (rowMinimum > maxDistance) { return maxDistance; } } } // Bound arrays to avoid retaining large previous sizes array.length = firstLength; characterCodeCache.length = firstLength; return maxDistance !== undefined && result > maxDistance ? maxDistance : result; } export function closestMatch(target, candidates, options) { if (!Array.isArray(candidates) || candidates.length === 0) { return undefined; } const userMax = options?.maxDistance; const targetLength = target.length; // Exact match fast-path for (const candidate of candidates) { if (candidate === target) { return candidate; } } if (userMax === 0) { return undefined; } let best; let bestDist = Number.POSITIVE_INFINITY; const seen = new Set(); for (const candidate of candidates) { if (seen.has(candidate)) { continue; } seen.add(candidate); const lengthDiff = Math.abs(candidate.length - targetLength); if (lengthDiff >= bestDist) { continue; } if (userMax !== undefined && lengthDiff > userMax) { continue; } const cap = Number.isFinite(bestDist) ? (userMax === undefined ? bestDist : Math.min(bestDist, userMax)) : userMax; const distance = cap === undefined ? leven(target, candidate) : leven(target, candidate, {maxDistance: cap}); // Skip candidates that exceed the user's maximum distance if (userMax !== undefined && distance > userMax) { continue; } // If we got a capped result that equals the cap, we need the actual distance // for accurate comparison, but only if the cap was due to userMax let actualD = distance; if (cap !== undefined && distance === cap && cap === userMax) { actualD = leven(target, candidate); } if (actualD < bestDist) { bestDist = actualD; best = candidate; if (bestDist === 0) { break; } } } if (userMax !== undefined && bestDist > userMax) { return undefined; } return best; } leven-4.1.0/license000066400000000000000000000021351506053764100142020ustar00rootroot00000000000000MIT License Copyright (c) Sindre Sorhus (https://sindresorhus.com) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. leven-4.1.0/package.json000066400000000000000000000022541506053764100151250ustar00rootroot00000000000000{ "name": "leven", "version": "4.1.0", "description": "Measure the difference between two strings using the Levenshtein distance algorithm", "license": "MIT", "repository": "sindresorhus/leven", "funding": "https://github.com/sponsors/sindresorhus", "author": { "name": "Sindre Sorhus", "email": "sindresorhus@gmail.com", "url": "https://sindresorhus.com" }, "type": "module", "exports": "./index.js", "types": "./index.d.ts", "sideEffects": false, "engines": { "node": "^12.20.0 || ^14.13.1 || >=16.0.0" }, "scripts": { "test": "xo && ava", "bench": "node bench.js" }, "files": [ "index.js", "index.d.ts" ], "keywords": [ "leven", "levenshtein", "distance", "algorithm", "string", "difference", "diff", "fast", "fuzzy", "similar", "similarity", "compare", "comparison", "edit", "text", "match", "matching" ], "devDependencies": { "ava": "^3.15.0", "fast-levenshtein": "^3.0.0", "ld": "^0.1.0", "levdist": "^2.2.10", "levenshtein": "^1.0.5", "levenshtein-component": "^0.0.1", "levenshtein-edit-distance": "^3.0.0", "natural": "^5.0.4", "talisman": "^1.1.4", "tinybench": "^4.0.1", "xo": "^0.44.0" } } leven-4.1.0/readme.md000066400000000000000000000035131506053764100144150ustar00rootroot00000000000000# leven > Measure the difference between two strings using the [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) algorithm ## Install ```sh npm install leven ``` ## Usage ```js import leven from 'leven'; leven('cat', 'cow'); //=> 2 ``` ## API ### leven(first, second, options?) #### first Type: `string` First string. #### second Type: `string` Second string. #### options Type: `object` ##### maxDistance Type: `number` Maximum distance to calculate. If the actual distance exceeds this value, the function will return `maxDistance` instead of the actual distance. This can significantly improve performance when you only care about matches within a certain threshold. ```js import leven from 'leven'; leven('abcdef', '123456', {maxDistance: 3}); //=> 3 leven('cat', 'cow', {maxDistance: 5}); //=> 2 ``` ### closestMatch(target, candidates, options?) Find the closest matching string from an array of candidates. #### target Type: `string` The string to find matches for. #### candidates Type: `string[]` Array of candidate strings to search through. #### options Type: `object` Same options as `leven()`. ##### maxDistance Type: `number` Maximum distance to consider. Candidates with a distance greater than this value will be ignored. Returns the closest matching string from candidates, or `undefined` if no candidates are provided or if no match is found within `maxDistance`. ```js import {closestMatch} from 'leven'; closestMatch('kitten', ['sitting', 'kitchen', 'mittens']); //=> 'kitchen' closestMatch('hello', ['jello', 'yellow', 'bellow'], {maxDistance: 2}); //=> 'jello' // No match within distance threshold closestMatch('abcdef', ['123456', '1234567890'], {maxDistance: 2}); //=> undefined ``` ## Related - [leven-cli](https://github.com/sindresorhus/leven-cli) - CLI for this module leven-4.1.0/test.js000066400000000000000000000135221506053764100141540ustar00rootroot00000000000000import test from 'ava'; import leven, {closestMatch} from './index.js'; test('main', t => { t.is(leven('a', 'b'), 1); t.is(leven('ab', 'ac'), 1); t.is(leven('ac', 'bc'), 1); t.is(leven('abc', 'axc'), 1); t.is(leven('kitten', 'sitting'), 3); t.is(leven('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6); t.is(leven('cat', 'cow'), 2); t.is(leven('xabxcdxxefxgx', 'abcdefg'), 6); t.is(leven('javawasneat', 'scalaisgreat'), 7); t.is(leven('example', 'samples'), 3); t.is(leven('sturgeon', 'urgently'), 6); t.is(leven('levenshtein', 'frankenstein'), 6); t.is(leven('distance', 'difference'), 5); t.is(leven('因為我是中國人所以我會說中文', '因為我是英國人所以我會說英文'), 2); }); test('maxDistance option', t => { // Test cases from the GitHub issue t.is(leven('abcdef', '123456'), 6); t.is(leven('abcdef', 'abcdefg'), 1); // With maxDistance option t.is(leven('abcdef', '123456', {maxDistance: 3}), 3); t.is(leven('abcdef', 'abcdefg', {maxDistance: 3}), 1); // Additional test cases t.is(leven('kitten', 'sitting', {maxDistance: 2}), 2); // Actual distance is 3, should return 2 (max) t.is(leven('cat', 'cow', {maxDistance: 5}), 2); // Actual distance is 2, should return 2 t.is(leven('same', 'same', {maxDistance: 1}), 0); // Identical strings always return 0 // Early termination based on length difference t.is(leven('a', 'abcdefgh', {maxDistance: 3}), 3); // Length diff is 7, exceeds max t.is(leven('short', 'muchlongerstringhere', {maxDistance: 5}), 5); // Edge cases t.is(leven('', 'abc', {maxDistance: 2}), 2); // Empty string t.is(leven('', 'abc', {maxDistance: 10}), 3); // Empty string, max > actual t.is(leven('abc', '', {maxDistance: 2}), 2); // Empty string reversed t.is(leven('abc', 'abc', {maxDistance: 0}), 0); // Identical with max 0 t.is(leven('abc', 'abd', {maxDistance: 0}), 0); // Different with max 0 // Verify early termination is working t.is(leven('abcdefghijklmnopqrstuvwxyz', '1234567890', {maxDistance: 3}), 3); t.is(leven('verylongstringhere', 'completelydifferent', {maxDistance: 1}), 1); // Backward compatibility - no options provided t.is(leven('foo', 'bar'), 3); t.is(leven('foo', 'bar', undefined), 3); t.is(leven('foo', 'bar', null), 3); }); test('closestMatch', t => { // Basic functionality // Note: With optimization, tie-breaking may not always prefer first in input order const result = closestMatch('kitten', ['sitting', 'kitchen', 'mittens']); t.true(['kitchen', 'mittens'].includes(result)); // Either is correct (both distance 2) t.is(closestMatch('hello', ['jello', 'yellow', 'bellow']), 'jello'); // With exact match t.is(closestMatch('foo', ['bar', 'foo', 'baz']), 'foo'); // Single candidate t.is(closestMatch('test', ['testing']), 'testing'); // Empty candidates t.is(closestMatch('test', []), undefined); t.is(closestMatch('test', undefined), undefined); t.is(closestMatch('test', null), undefined); // All equally distant t.is(closestMatch('a', ['b', 'c', 'd']), 'b'); // Should return first one // With maxDistance option t.is(closestMatch('kitten', ['sitting', 'kitchen', 'mittens'], {maxDistance: 2}), 'kitchen'); t.is(closestMatch('kitten', ['sitting', 'kitchen', 'mittens'], {maxDistance: 1}), undefined); // No matches within distance 1 t.is(closestMatch('abcdef', ['123456', 'abcdefg', '1234567890'], {maxDistance: 2}), 'abcdefg'); // No match within maxDistance t.is(closestMatch('abcdef', ['123456', '1234567890'], {maxDistance: 2}), undefined); // Empty string cases t.is(closestMatch('', ['a', 'ab', 'abc']), 'a'); t.is(closestMatch('abc', ['', 'a', 'ab']), 'ab'); // Distance 1 is closest // Case sensitivity t.is(closestMatch('Hello', ['hello', 'HELLO', 'hELLo']), 'hello'); // Unicode strings t.is(closestMatch('café', ['cafe', 'caffè', 'café']), 'café'); t.is(closestMatch('你好', ['您好', '你们好', '大家好']), '您好'); // Multiple candidates with same distance - should return first t.is(closestMatch('abc', ['ab', 'bc', 'ac']), 'ab'); // Performance test case - should use maxDistance optimization const longCandidates = [ 'verylongstringwithlotsofcharacters', 'anotherlongstringcompletlydifferent', 'shortstr', 'test', ]; t.is(closestMatch('test', longCandidates), 'test'); t.is(closestMatch('testing', longCandidates), 'test'); // Edge cases from review // Exact match should return immediately t.is(closestMatch('test', ['a', 'b', 'c', 'test', 'd', 'e']), 'test'); // MaxDistance: 0 only accepts exact matches t.is(closestMatch('test', ['test', 'tests', 'testing'], {maxDistance: 0}), 'test'); t.is(closestMatch('test', ['tests', 'testing'], {maxDistance: 0}), undefined); // Duplicates shouldn't affect result t.is(closestMatch('abc', ['ab', 'ab', 'ab', 'abcd', 'abcd']), 'ab'); // LengthDiff === bestDistance should be skipped t.is(closestMatch('ab', ['a', 'abc']), 'a'); // Both distance 1, return first // Large array optimization (sorting by length diff) const largeArray = Array.from({length: 50}, (_, index) => 'x'.repeat(index)); largeArray.push('test'); // Add exact match t.is(closestMatch('test', largeArray), 'test'); // Dynamic cap behavior - shouldn't incorrectly prefer worse candidates t.is(closestMatch('abc', ['ab', 'abcd', 'xyz'], {maxDistance: 2}), 'ab'); // Tie-break stability for large arrays (>32 items) - should pick first in input order const largeTieArray = Array.from({length: 50}, (_, i) => `z${i}`); largeTieArray.push('ab', 'ac', 'ad'); // All distance 1 from 'a' t.is(closestMatch('a', largeTieArray), 'ab'); // Should pick first equal candidate // Ensure capped path never "improves" candidate due to cap t.is(closestMatch('test', ['testing', 'tests'], {maxDistance: 2}), 'tests'); t.is(closestMatch('test', ['testing', 'tests']), 'tests'); // Same result without cap // Additional maxDistance edge case t.is(closestMatch('test', ['testing'], {maxDistance: 0}), undefined); // No exact match });