pax_global_header00006660000000000000000000000064144100014530014502gustar00rootroot0000000000000052 comment=919b26b59c9762b817a121f4416ccc44d82043ad split2-4.2.0/000077500000000000000000000000001441000145300127225ustar00rootroot00000000000000split2-4.2.0/.github/000077500000000000000000000000001441000145300142625ustar00rootroot00000000000000split2-4.2.0/.github/dependabot.yml000066400000000000000000000004011441000145300171050ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: npm directory: "/" schedule: interval: daily open-pull-requests-limit: 10 ignore: - dependency-name: tap versions: - ">= 13.a, < 14" - dependency-name: tap versions: - ">= 14.a, < 15" split2-4.2.0/.github/release-drafter.yml000066400000000000000000000000621441000145300200500ustar00rootroot00000000000000template: | ## What’s Changed $CHANGES split2-4.2.0/.github/workflows/000077500000000000000000000000001441000145300163175ustar00rootroot00000000000000split2-4.2.0/.github/workflows/ci.yml000066400000000000000000000007021441000145300174340ustar00rootroot00000000000000name: ci on: [push, pull_request] jobs: test: runs-on: ubuntu-latest strategy: matrix: node-version: [10.x, 12.x, 14.x, 16.x] steps: - uses: actions/checkout@v2 - name: Use Node.js uses: actions/setup-node@v1 with: node-version: ${{ matrix.node-version }} - name: Install run: | npm install - name: Run tests run: | npm run test split2-4.2.0/.gitignore000066400000000000000000000001301441000145300147040ustar00rootroot00000000000000node_modules/ build/ libleveldb.so libleveldb.a test-data/ _benchdb_* *.sw* .nyc_output split2-4.2.0/.npmignore000066400000000000000000000001541441000145300147210ustar00rootroot00000000000000node_modules/ build/ libleveldb.so libleveldb.a test-data/ _benchdb_* *.sw* .travis.yml .github .nyc_output split2-4.2.0/LICENSE000066400000000000000000000013741441000145300137340ustar00rootroot00000000000000Copyright (c) 2014-2018, Matteo Collina Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. split2-4.2.0/README.md000066400000000000000000000064651441000145300142140ustar00rootroot00000000000000# Split2(matcher, mapper, options) ![ci](https://github.com/mcollina/split2/workflows/ci/badge.svg) Break up a stream and reassemble it so that each line is a chunk. `split2` is inspired by [@dominictarr](https://github.com/dominictarr) [`split`](https://github.com/dominictarr/split) module, and it is totally API compatible with it. However, it is based on Node.js core [`Transform`](https://nodejs.org/api/stream.html#stream_new_stream_transform_options). `matcher` may be a `String`, or a `RegExp`. Example, read every line in a file ... ``` js fs.createReadStream(file) .pipe(split2()) .on('data', function (line) { //each chunk now is a separate line! }) ``` `split` takes the same arguments as `string.split` except it defaults to '/\r?\n/', and the optional `limit` paremeter is ignored. [String#split](https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/String/split) `split` takes an optional options object on it's third argument, which is directly passed as a [Transform](https://nodejs.org/api/stream.html#stream_new_stream_transform_options) option. Additionally, the `.maxLength` and `.skipOverflow` options are implemented, which set limits on the internal buffer size and the stream's behavior when the limit is exceeded. There is no limit unless `maxLength` is set. When the internal buffer size exceeds `maxLength`, the stream emits an error by default. You may also set `skipOverflow` to true to suppress the error and instead skip past any lines that cause the internal buffer to exceed `maxLength`. Calling `.destroy` will make the stream emit `close`. Use this to perform cleanup logic ``` js var splitFile = function(filename) { var file = fs.createReadStream(filename) return file .pipe(split2()) .on('close', function() { // destroy the file stream in case the split stream was destroyed file.destroy() }) } var stream = splitFile('my-file.txt') stream.destroy() // will destroy the input file stream ``` # NDJ - Newline Delimited Json `split2` accepts a function which transforms each line. ``` js fs.createReadStream(file) .pipe(split2(JSON.parse)) .on('data', function (obj) { //each chunk now is a js object }) .on("error", function(error) { //handling parsing errors }) ``` However, in [@dominictarr](https://github.com/dominictarr) [`split`](https://github.com/dominictarr/split) the mapper is wrapped in a try-catch, while here it is not: if your parsing logic can throw, wrap it yourself. Otherwise, you can also use the stream error handling when mapper function throw. # License Copyright (c) 2014-2021, Matteo Collina Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. split2-4.2.0/bench.js000066400000000000000000000007301441000145300143370ustar00rootroot00000000000000'use strict' const split = require('./') const bench = require('fastbench') const binarySplit = require('binary-split') const fs = require('fs') function benchSplit (cb) { fs.createReadStream('package.json') .pipe(split()) .on('end', cb) .resume() } function benchBinarySplit (cb) { fs.createReadStream('package.json') .pipe(binarySplit()) .on('end', cb) .resume() } const run = bench([ benchSplit, benchBinarySplit ], 10000) run(run) split2-4.2.0/index.js000066400000000000000000000072641441000145300144000ustar00rootroot00000000000000/* Copyright (c) 2014-2021, Matteo Collina Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 'use strict' const { Transform } = require('stream') const { StringDecoder } = require('string_decoder') const kLast = Symbol('last') const kDecoder = Symbol('decoder') function transform (chunk, enc, cb) { let list if (this.overflow) { // Line buffer is full. Skip to start of next line. const buf = this[kDecoder].write(chunk) list = buf.split(this.matcher) if (list.length === 1) return cb() // Line ending not found. Discard entire chunk. // Line ending found. Discard trailing fragment of previous line and reset overflow state. list.shift() this.overflow = false } else { this[kLast] += this[kDecoder].write(chunk) list = this[kLast].split(this.matcher) } this[kLast] = list.pop() for (let i = 0; i < list.length; i++) { try { push(this, this.mapper(list[i])) } catch (error) { return cb(error) } } this.overflow = this[kLast].length > this.maxLength if (this.overflow && !this.skipOverflow) { cb(new Error('maximum buffer reached')) return } cb() } function flush (cb) { // forward any gibberish left in there this[kLast] += this[kDecoder].end() if (this[kLast]) { try { push(this, this.mapper(this[kLast])) } catch (error) { return cb(error) } } cb() } function push (self, val) { if (val !== undefined) { self.push(val) } } function noop (incoming) { return incoming } function split (matcher, mapper, options) { // Set defaults for any arguments not supplied. matcher = matcher || /\r?\n/ mapper = mapper || noop options = options || {} // Test arguments explicitly. switch (arguments.length) { case 1: // If mapper is only argument. if (typeof matcher === 'function') { mapper = matcher matcher = /\r?\n/ // If options is only argument. } else if (typeof matcher === 'object' && !(matcher instanceof RegExp) && !matcher[Symbol.split]) { options = matcher matcher = /\r?\n/ } break case 2: // If mapper and options are arguments. if (typeof matcher === 'function') { options = mapper mapper = matcher matcher = /\r?\n/ // If matcher and options are arguments. } else if (typeof mapper === 'object') { options = mapper mapper = noop } } options = Object.assign({}, options) options.autoDestroy = true options.transform = transform options.flush = flush options.readableObjectMode = true const stream = new Transform(options) stream[kLast] = '' stream[kDecoder] = new StringDecoder('utf8') stream.matcher = matcher stream.mapper = mapper stream.maxLength = options.maxLength stream.skipOverflow = options.skipOverflow || false stream.overflow = false stream._destroy = function (err, cb) { // Weird Node v12 bug that we need to work around this._writableState.errorEmitted = false cb(err) } return stream } module.exports = split split2-4.2.0/package.json000066400000000000000000000021311441000145300152050ustar00rootroot00000000000000{ "name": "split2", "version": "4.2.0", "description": "split a Text Stream into a Line Stream, using Stream 3", "main": "index.js", "scripts": { "lint": "standard --verbose", "unit": "nyc --lines 100 --branches 100 --functions 100 --check-coverage --reporter=text tape test.js", "coverage": "nyc --reporter=html --reporter=cobertura --reporter=text tape test/test.js", "test:report": "npm run lint && npm run unit:report", "test": "npm run lint && npm run unit", "legacy": "tape test.js" }, "pre-commit": [ "test" ], "website": "https://github.com/mcollina/split2", "repository": { "type": "git", "url": "https://github.com/mcollina/split2.git" }, "bugs": { "url": "http://github.com/mcollina/split2/issues" }, "engines": { "node": ">= 10.x" }, "author": "Matteo Collina ", "license": "ISC", "devDependencies": { "binary-split": "^1.0.3", "callback-stream": "^1.1.0", "fastbench": "^1.0.0", "nyc": "^15.0.1", "pre-commit": "^1.1.2", "standard": "^17.0.0", "tape": "^5.0.0" } } split2-4.2.0/test.js000066400000000000000000000173651441000145300142530ustar00rootroot00000000000000'use strict' const test = require('tape') const split = require('./') const callback = require('callback-stream') const strcb = callback.bind(null, { decodeStrings: false }) const objcb = callback.bind(null, { objectMode: true }) test('split two lines on end', function (t) { t.plan(2) const input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello\nworld') }) test('split two lines on two writes', function (t) { t.plan(2) const input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.write('hello') input.write('\nworld') input.end() }) test('split four lines on three writes', function (t) { t.plan(2) const input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world', 'bye', 'world']) })) input.write('hello\nwor') input.write('ld\nbye\nwo') input.write('rld') input.end() }) test('accumulate multiple writes', function (t) { t.plan(2) const input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['helloworld']) })) input.write('hello') input.write('world') input.end() }) test('split using a custom string matcher', function (t) { t.plan(2) const input = split('~') input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello~world') }) test('split using a custom regexp matcher', function (t) { t.plan(2) const input = split(/~/) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello~world') }) test('support an option argument', function (t) { t.plan(2) const input = split({ highWaterMark: 2 }) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello\nworld') }) test('support a mapper function', function (t) { t.plan(2) const a = { a: '42' } const b = { b: '24' } const input = split(JSON.parse) input.pipe(objcb(function (err, list) { t.error(err) t.deepEqual(list, [a, b]) })) input.write(JSON.stringify(a)) input.write('\n') input.end(JSON.stringify(b)) }) test('split lines windows-style', function (t) { t.plan(2) const input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello\r\nworld') }) test('splits a buffer', function (t) { t.plan(2) const input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end(Buffer.from('hello\nworld')) }) test('do not end on undefined', function (t) { t.plan(2) const input = split(function (line) { }) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, []) })) input.end(Buffer.from('hello\nworld')) }) test('has destroy method', function (t) { t.plan(1) const input = split(function (line) { }) input.on('close', function () { t.ok(true, 'close emitted') t.end() }) input.destroy() }) test('support custom matcher and mapper', function (t) { t.plan(4) const a = { a: '42' } const b = { b: '24' } const input = split('~', JSON.parse) t.equal(input.matcher, '~') t.equal(typeof input.mapper, 'function') input.pipe(objcb(function (err, list) { t.notOk(err, 'no errors') t.deepEqual(list, [a, b]) })) input.write(JSON.stringify(a)) input.write('~') input.end(JSON.stringify(b)) }) test('support custom matcher and options', function (t) { t.plan(6) const input = split('~', { highWaterMark: 1024 }) t.equal(input.matcher, '~') t.equal(typeof input.mapper, 'function') t.equal(input._readableState.highWaterMark, 1024) t.equal(input._writableState.highWaterMark, 1024) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello~world') }) test('support mapper and options', function (t) { t.plan(6) const a = { a: '42' } const b = { b: '24' } const input = split(JSON.parse, { highWaterMark: 1024 }) t.ok(input.matcher instanceof RegExp, 'matcher is RegExp') t.equal(typeof input.mapper, 'function') t.equal(input._readableState.highWaterMark, 1024) t.equal(input._writableState.highWaterMark, 1024) input.pipe(objcb(function (err, list) { t.error(err) t.deepEqual(list, [a, b]) })) input.write(JSON.stringify(a)) input.write('\n') input.end(JSON.stringify(b)) }) test('split utf8 chars', function (t) { t.plan(2) const input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['烫烫烫', '锟斤拷']) })) const buf = Buffer.from('烫烫烫\r\n锟斤拷', 'utf8') for (let i = 0; i < buf.length; ++i) { input.write(buf.slice(i, i + 1)) } input.end() }) test('split utf8 chars 2by2', function (t) { t.plan(2) const input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['烫烫烫', '烫烫烫']) })) const str = '烫烫烫\r\n烫烫烫' const buf = Buffer.from(str, 'utf8') for (let i = 0; i < buf.length; i += 2) { input.write(buf.slice(i, i + 2)) } input.end() }) test('split lines when the \n comes at the end of a chunk', function (t) { t.plan(2) const input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.write('hello\n') input.end('world') }) test('truncated utf-8 char', function (t) { t.plan(2) const input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['烫' + Buffer.from('e7', 'hex').toString()]) })) const str = '烫烫' const buf = Buffer.from(str, 'utf8') input.write(buf.slice(0, 3)) input.end(buf.slice(3, 4)) }) test('maximum buffer limit', function (t) { t.plan(1) const input = split({ maxLength: 2 }) input.on('error', function (err) { t.ok(err) }) input.resume() input.write('hey') }) test('readable highWaterMark', function (t) { const input = split() t.equal(input._readableState.highWaterMark, 16) t.end() }) test('maxLength < chunk size', function (t) { t.plan(2) const input = split({ maxLength: 2 }) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['a', 'b']) })) input.end('a\nb') }) test('maximum buffer limit w/skip', function (t) { t.plan(2) const input = split({ maxLength: 2, skipOverflow: true }) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['a', 'b', 'c']) })) input.write('a\n123') input.write('456') input.write('789\nb\nc') input.end() }) test("don't modify the options object", function (t) { t.plan(2) const options = {} const input = split(options) input.pipe(strcb(function (err, list) { t.error(err) t.same(options, {}) })) input.end() }) test('mapper throws flush', function (t) { t.plan(1) const error = new Error() const input = split(function () { throw error }) input.on('error', (err, list) => { t.same(err, error) }) input.end('hello') }) test('mapper throws on transform', function (t) { t.plan(1) const error = new Error() const input = split(function (l) { throw error }) input.on('error', (err) => { t.same(err, error) }) input.write('a') input.write('\n') input.end('b') }) test('supports Symbol.split', function (t) { t.plan(2) const input = split({ [Symbol.split] (str) { return str.split('~') } }) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello~world') })