pax_global_header00006660000000000000000000000064141577017310014520gustar00rootroot0000000000000052 comment=2df4e8db15b379f6faf0196a4ea3868bd3046e32 streamsearch-1.1.0/000077500000000000000000000000001415770173100142005ustar00rootroot00000000000000streamsearch-1.1.0/.eslintrc.js000066400000000000000000000001111415770173100164300ustar00rootroot00000000000000'use strict'; module.exports = { extends: '@mscdex/eslint-config', }; streamsearch-1.1.0/.github/000077500000000000000000000000001415770173100155405ustar00rootroot00000000000000streamsearch-1.1.0/.github/workflows/000077500000000000000000000000001415770173100175755ustar00rootroot00000000000000streamsearch-1.1.0/.github/workflows/ci.yml000066400000000000000000000007751415770173100207240ustar00rootroot00000000000000name: CI on: pull_request: push: branches: [ master ] jobs: tests-linux: runs-on: ubuntu-latest strategy: fail-fast: false matrix: node-version: [10.x, 12.x, 14.x, 16.x] steps: - uses: actions/checkout@v2 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 with: node-version: ${{ matrix.node-version }} - name: Install module run: npm install - name: Run tests run: npm test streamsearch-1.1.0/.github/workflows/lint.yml000066400000000000000000000007271415770173100212740ustar00rootroot00000000000000name: lint on: pull_request: push: branches: [ master ] env: NODE_VERSION: 16.x jobs: lint-js: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Use Node.js ${{ env.NODE_VERSION }} uses: actions/setup-node@v1 with: node-version: ${{ env.NODE_VERSION }} - name: Install ESLint + ESLint configs/plugins run: npm install --only=dev - name: Lint files run: npm run lint streamsearch-1.1.0/LICENSE000066400000000000000000000020751415770173100152110ustar00rootroot00000000000000Copyright Brian White. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.streamsearch-1.1.0/README.md000066400000000000000000000052101415770173100154550ustar00rootroot00000000000000Description =========== streamsearch is a module for [node.js](http://nodejs.org/) that allows searching a stream using the Boyer-Moore-Horspool algorithm. This module is based heavily on the Streaming Boyer-Moore-Horspool C++ implementation by Hongli Lai [here](https://github.com/FooBarWidget/boyer-moore-horspool). Requirements ============ * [node.js](http://nodejs.org/) -- v10.0.0 or newer Installation ============ npm install streamsearch Example ======= ```js const { inspect } = require('util'); const StreamSearch = require('streamsearch'); const needle = Buffer.from('\r\n'); const ss = new StreamSearch(needle, (isMatch, data, start, end) => { if (data) console.log('data: ' + inspect(data.toString('latin1', start, end))); if (isMatch) console.log('match!'); }); const chunks = [ 'foo', ' bar', '\r', '\n', 'baz, hello\r', '\n world.', '\r\n Node.JS rules!!\r\n\r\n', ]; for (const chunk of chunks) ss.push(Buffer.from(chunk)); // output: // // data: 'foo' // data: ' bar' // match! // data: 'baz, hello' // match! // data: ' world.' // match! // data: ' Node.JS rules!!' // match! // data: '' // match! ``` API === Properties ---------- * **maxMatches** - < _integer_ > - The maximum number of matches. Defaults to `Infinity`. * **matches** - < _integer_ > - The current match count. Functions --------- * **(constructor)**(< _mixed_ >needle, < _function_ >callback) - Creates and returns a new instance for searching for a _Buffer_ or _string_ `needle`. `callback` is called any time there is non-matching data and/or there is a needle match. `callback` will be called with the following arguments: 1. `isMatch` - _boolean_ - Indicates whether a match has been found 2. `data` - _mixed_ - If set, this contains data that did not match the needle. 3. `start` - _integer_ - The index in `data` where the non-matching data begins (inclusive). 4. `end` - _integer_ - The index in `data` where the non-matching data ends (exclusive). 5. `isSafeData` - _boolean_ - Indicates if it is safe to store a reference to `data` (e.g. as-is or via `data.slice()`) or not, as in some cases `data` may point to a Buffer whose contents change over time. * **destroy**() - _(void)_ - Emits any last remaining unmatched data that may still be buffered and then resets internal state. * **push**(< _Buffer_ >chunk) - _integer_ - Processes `chunk`, searching for a match. The return value is the last processed index in `chunk` + 1. * **reset**() - _(void)_ - Resets internal state. Useful for when you wish to start searching a new/different stream for example. streamsearch-1.1.0/lib/000077500000000000000000000000001415770173100147465ustar00rootroot00000000000000streamsearch-1.1.0/lib/sbmh.js000066400000000000000000000224351415770173100162430ustar00rootroot00000000000000'use strict'; /* Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool */ function memcmp(buf1, pos1, buf2, pos2, num) { for (let i = 0; i < num; ++i) { if (buf1[pos1 + i] !== buf2[pos2 + i]) return false; } return true; } class SBMH { constructor(needle, cb) { if (typeof cb !== 'function') throw new Error('Missing match callback'); if (typeof needle === 'string') needle = Buffer.from(needle); else if (!Buffer.isBuffer(needle)) throw new Error(`Expected Buffer for needle, got ${typeof needle}`); const needleLen = needle.length; this.maxMatches = Infinity; this.matches = 0; this._cb = cb; this._lookbehindSize = 0; this._needle = needle; this._bufPos = 0; this._lookbehind = Buffer.allocUnsafe(needleLen); // Initialize occurrence table. this._occ = [ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, needleLen ]; // Populate occurrence table with analysis of the needle, ignoring the last // letter. if (needleLen > 1) { for (let i = 0; i < needleLen - 1; ++i) this._occ[needle[i]] = needleLen - 1 - i; } } reset() { this.matches = 0; this._lookbehindSize = 0; this._bufPos = 0; } push(chunk, pos) { let result; if (!Buffer.isBuffer(chunk)) chunk = Buffer.from(chunk, 'latin1'); const chunkLen = chunk.length; this._bufPos = pos || 0; while (result !== chunkLen && this.matches < this.maxMatches) result = feed(this, chunk); return result; } destroy() { const lbSize = this._lookbehindSize; if (lbSize) this._cb(false, this._lookbehind, 0, lbSize, false); this.reset(); } } function feed(self, data) { const len = data.length; const needle = self._needle; const needleLen = needle.length; // Positive: points to a position in `data` // pos == 3 points to data[3] // Negative: points to a position in the lookbehind buffer // pos == -2 points to lookbehind[lookbehindSize - 2] let pos = -self._lookbehindSize; const lastNeedleCharPos = needleLen - 1; const lastNeedleChar = needle[lastNeedleCharPos]; const end = len - needleLen; const occ = self._occ; const lookbehind = self._lookbehind; if (pos < 0) { // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool // search with character lookup code that considers both the // lookbehind buffer and the current round's haystack data. // // Loop until // there is a match. // or until // we've moved past the position that requires the // lookbehind buffer. In this case we switch to the // optimized loop. // or until // the character to look at lies outside the haystack. while (pos < 0 && pos <= end) { const nextPos = pos + lastNeedleCharPos; const ch = (nextPos < 0 ? lookbehind[self._lookbehindSize + nextPos] : data[nextPos]); if (ch === lastNeedleChar && matchNeedle(self, data, pos, lastNeedleCharPos)) { self._lookbehindSize = 0; ++self.matches; if (pos > -self._lookbehindSize) self._cb(true, lookbehind, 0, self._lookbehindSize + pos, false); else self._cb(true, undefined, 0, 0, true); return (self._bufPos = pos + needleLen); } pos += occ[ch]; } // No match. // There's too few data for Boyer-Moore-Horspool to run, // so let's use a different algorithm to skip as much as // we can. // Forward pos until // the trailing part of lookbehind + data // looks like the beginning of the needle // or until // pos == 0 while (pos < 0 && !matchNeedle(self, data, pos, len - pos)) ++pos; if (pos < 0) { // Cut off part of the lookbehind buffer that has // been processed and append the entire haystack // into it. const bytesToCutOff = self._lookbehindSize + pos; if (bytesToCutOff > 0) { // The cut off data is guaranteed not to contain the needle. self._cb(false, lookbehind, 0, bytesToCutOff, false); } self._lookbehindSize -= bytesToCutOff; lookbehind.copy(lookbehind, 0, bytesToCutOff, self._lookbehindSize); lookbehind.set(data, self._lookbehindSize); self._lookbehindSize += len; self._bufPos = len; return len; } // Discard lookbehind buffer. self._cb(false, lookbehind, 0, self._lookbehindSize, false); self._lookbehindSize = 0; } pos += self._bufPos; const firstNeedleChar = needle[0]; // Lookbehind buffer is now empty. Perform Boyer-Moore-Horspool // search with optimized character lookup code that only considers // the current round's haystack data. while (pos <= end) { const ch = data[pos + lastNeedleCharPos]; if (ch === lastNeedleChar && data[pos] === firstNeedleChar && memcmp(needle, 0, data, pos, lastNeedleCharPos)) { ++self.matches; if (pos > 0) self._cb(true, data, self._bufPos, pos, true); else self._cb(true, undefined, 0, 0, true); return (self._bufPos = pos + needleLen); } pos += occ[ch]; } // There was no match. If there's trailing haystack data that we cannot // match yet using the Boyer-Moore-Horspool algorithm (because the trailing // data is less than the needle size) then match using a modified // algorithm that starts matching from the beginning instead of the end. // Whatever trailing data is left after running this algorithm is added to // the lookbehind buffer. while (pos < len) { if (data[pos] !== firstNeedleChar || !memcmp(data, pos, needle, 0, len - pos)) { ++pos; continue; } data.copy(lookbehind, 0, pos, len); self._lookbehindSize = len - pos; break; } // Everything until `pos` is guaranteed not to contain needle data. if (pos > 0) self._cb(false, data, self._bufPos, pos < len ? pos : len, true); self._bufPos = len; return len; } function matchNeedle(self, data, pos, len) { const lb = self._lookbehind; const lbSize = self._lookbehindSize; const needle = self._needle; for (let i = 0; i < len; ++i, ++pos) { const ch = (pos < 0 ? lb[lbSize + pos] : data[pos]); if (ch !== needle[i]) return false; } return true; } module.exports = SBMH; streamsearch-1.1.0/package.json000066400000000000000000000015121415770173100164650ustar00rootroot00000000000000{ "name": "streamsearch", "version": "1.1.0", "author": "Brian White ", "description": "Streaming Boyer-Moore-Horspool searching for node.js", "main": "./lib/sbmh.js", "engines": { "node": ">=10.0.0" }, "devDependencies": { "@mscdex/eslint-config": "^1.1.0", "eslint": "^7.32.0" }, "scripts": { "test": "node test/test.js", "lint": "eslint --cache --report-unused-disable-directives --ext=.js .eslintrc.js lib test", "lint:fix": "npm run lint -- --fix" }, "keywords": [ "stream", "horspool", "boyer-moore-horspool", "boyer-moore", "search" ], "licenses": [{ "type": "MIT", "url": "http://github.com/mscdex/streamsearch/raw/master/LICENSE" }], "repository": { "type": "git", "url": "http://github.com/mscdex/streamsearch.git" } } streamsearch-1.1.0/test/000077500000000000000000000000001415770173100151575ustar00rootroot00000000000000streamsearch-1.1.0/test/test.js000066400000000000000000000025661415770173100165050ustar00rootroot00000000000000'use strict'; const assert = require('assert'); const StreamSearch = require('../lib/sbmh.js'); [ { needle: '\r\n', chunks: [ 'foo', ' bar', '\r', '\n', 'baz, hello\r', '\n world.', '\r\n Node.JS rules!!\r\n\r\n', ], expect: [ [false, 'foo'], [false, ' bar'], [ true, null], [false, 'baz, hello'], [ true, null], [false, ' world.'], [ true, null], [ true, ' Node.JS rules!!'], [ true, ''], ], }, { needle: '---foobarbaz', chunks: [ '---foobarbaz', 'asdf', '\r\n', '---foobarba', '---foobar', 'ba', '\r\n---foobarbaz--\r\n', ], expect: [ [ true, null], [false, 'asdf'], [false, '\r\n'], [false, '---foobarba'], [false, '---foobarba'], [ true, '\r\n'], [false, '--\r\n'], ], }, ].forEach((test, i) => { console.log(`Running test #${i + 1}`); const { needle, chunks, expect } = test; const results = []; const ss = new StreamSearch(Buffer.from(needle), (isMatch, data, start, end) => { if (data) data = data.toString('latin1', start, end); else data = null; results.push([isMatch, data]); }); for (const chunk of chunks) ss.push(Buffer.from(chunk)); assert.deepStrictEqual(results, expect); });