pax_global_header00006660000000000000000000000064134432717760014530gustar00rootroot0000000000000052 comment=a7ebf268a646be4c8507d909b822f4fd1e34a2be split2-3.1.1/000077500000000000000000000000001344327177600127475ustar00rootroot00000000000000split2-3.1.1/.github/000077500000000000000000000000001344327177600143075ustar00rootroot00000000000000split2-3.1.1/.github/release-drafter.yml000066400000000000000000000000621344327177600200750ustar00rootroot00000000000000template: | ## What’s Changed $CHANGES split2-3.1.1/.gitignore000066400000000000000000000001141344327177600147330ustar00rootroot00000000000000node_modules/ build/ libleveldb.so libleveldb.a test-data/ _benchdb_* *.sw* split2-3.1.1/.npmignore000066400000000000000000000001401344327177600147410ustar00rootroot00000000000000node_modules/ build/ libleveldb.so libleveldb.a test-data/ _benchdb_* *.sw* .travis.yml .github split2-3.1.1/.travis.yml000066400000000000000000000001111344327177600150510ustar00rootroot00000000000000language: node_js sudo: false node_js: - "6" - "8" - "10" - "11" split2-3.1.1/LICENSE000066400000000000000000000013741344327177600137610ustar00rootroot00000000000000Copyright (c) 2014-2018, Matteo Collina Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. split2-3.1.1/README.md000066400000000000000000000071171344327177600142340ustar00rootroot00000000000000# Split2(matcher, mapper, options) [![Greenkeeper badge](https://badges.greenkeeper.io/mcollina/split2.svg)](https://greenkeeper.io/) [![build status](https://secure.travis-ci.org/mcollina/split2.svg)](http://travis-ci.org/mcollina/split2) Break up a stream and reassemble it so that each line is a chunk. `split2` is inspired by [@dominictarr](https://github.com/dominictarr) [`split`](https://github.com/dominictarr/split) module, and it is totally API compatible with it. However, it is based on Node.js core [`Transform`](https://nodejs.org/api/stream.html#stream_new_stream_transform_options) via [`readable-stream`](https://github.com/nodejs/readable-stream) `matcher` may be a `String`, or a `RegExp`. Example, read every line in a file ... ``` js fs.createReadStream(file) .pipe(split2()) .on('data', function (line) { //each chunk now is a separate line! }) ``` `split` takes the same arguments as `string.split` except it defaults to '/\r?\n/', and the optional `limit` paremeter is ignored. [String#split](https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/String/split) `split` takes an optional options object on it's third argument, which is directly passed as a [Transform](https://nodejs.org/api/stream.html#stream_new_stream_transform_options) option. Additionally, the `.maxLength` and `.skipOverflow` options are implemented, which set limits on the internal buffer size and the stream's behavior when the limit is exceeded. There is no limit unless `maxLength` is set. When the internal buffer size exceeds `maxLength`, the stream emits an error by default. You may also set `skipOverflow` to true to suppress the error and instead skip past any lines that cause the internal buffer to exceed `maxLength`. Calling `.destroy` will make the stream emit `close`. Use this to perform cleanup logic ``` js var splitFile = function(filename) { var file = fs.createReadStream(filename) return file .pipe(split2()) .on('close', function() { // destroy the file stream in case the split stream was destroyed file.destroy() }) } var stream = splitFile('my-file.txt') stream.destroy() // will destroy the input file stream ``` # NDJ - Newline Delimited Json `split2` accepts a function which transforms each line. ``` js fs.createReadStream(file) .pipe(split2(JSON.parse)) .on('data', function (obj) { //each chunk now is a js object }) ``` However, in [@dominictarr](https://github.com/dominictarr) [`split`](https://github.com/dominictarr/split) the mapper is wrapped in a try-catch, while here it is not: if your parsing logic can throw, wrap it yourself. # Benchmark ```bash $ node bench.js benchSplit*10000: 1484.983ms benchBinarySplit*10000: 1484.080ms benchSplit*10000: 1407.334ms benchBinarySplit*10000: 1500.281ms ``` Benchmark taken on Node 8.11.3, on a Macbook i5 2018. # License Copyright (c) 2014-2018, Matteo Collina Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. split2-3.1.1/bench.js000066400000000000000000000007161344327177600143700ustar00rootroot00000000000000'use strict' var split = require('./') var bench = require('fastbench') var binarySplit = require('binary-split') var fs = require('fs') function benchSplit (cb) { fs.createReadStream('package.json') .pipe(split()) .on('end', cb) .resume() } function benchBinarySplit (cb) { fs.createReadStream('package.json') .pipe(binarySplit()) .on('end', cb) .resume() } var run = bench([ benchSplit, benchBinarySplit ], 10000) run(run) split2-3.1.1/index.js000066400000000000000000000065231344327177600144220ustar00rootroot00000000000000/* Copyright (c) 2014-2018, Matteo Collina Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 'use strict' const { Transform } = require('readable-stream') const { StringDecoder } = require('string_decoder') const kLast = Symbol('last') const kDecoder = Symbol('decoder') function transform (chunk, enc, cb) { var list if (this.overflow) { // Line buffer is full. Skip to start of next line. var buf = this[kDecoder].write(chunk) list = buf.split(this.matcher) if (list.length === 1) return cb() // Line ending not found. Discard entire chunk. // Line ending found. Discard trailing fragment of previous line and reset overflow state. list.shift() this.overflow = false } else { this[kLast] += this[kDecoder].write(chunk) list = this[kLast].split(this.matcher) } this[kLast] = list.pop() for (var i = 0; i < list.length; i++) { push(this, this.mapper(list[i])) } this.overflow = this[kLast].length > this.maxLength if (this.overflow && !this.skipOverflow) return cb(new Error('maximum buffer reached')) cb() } function flush (cb) { // forward any gibberish left in there this[kLast] += this[kDecoder].end() if (this[kLast]) { push(this, this.mapper(this[kLast])) } cb() } function push (self, val) { if (val !== undefined) { self.push(val) } } function noop (incoming) { return incoming } function split (matcher, mapper, options) { // Set defaults for any arguments not supplied. matcher = matcher || /\r?\n/ mapper = mapper || noop options = options || {} // Test arguments explicitly. switch (arguments.length) { case 1: // If mapper is only argument. if (typeof matcher === 'function') { mapper = matcher matcher = /\r?\n/ // If options is only argument. } else if (typeof matcher === 'object' && !(matcher instanceof RegExp)) { options = matcher matcher = /\r?\n/ } break case 2: // If mapper and options are arguments. if (typeof matcher === 'function') { options = mapper mapper = matcher matcher = /\r?\n/ // If matcher and options are arguments. } else if (typeof mapper === 'object') { options = mapper mapper = noop } } options = Object.assign({}, options) options.transform = transform options.flush = flush options.readableObjectMode = true const stream = new Transform(options) stream[kLast] = '' stream[kDecoder] = new StringDecoder('utf8') stream.matcher = matcher stream.mapper = mapper stream.maxLength = options.maxLength stream.skipOverflow = options.skipOverflow stream.overflow = false return stream } module.exports = split split2-3.1.1/package.json000066400000000000000000000015431344327177600152400ustar00rootroot00000000000000{ "name": "split2", "version": "3.1.1", "description": "split a Text Stream into a Line Stream, using Stream 3", "main": "index.js", "scripts": { "test": "standard && tap -b test.js" }, "pre-commit": [ "test" ], "website": "https://github.com/mcollina/split2", "repository": { "type": "git", "url": "https://github.com/mcollina/split2.git" }, "bugs": { "url": "http://github.com/mcollina/split2/issues" }, "author": "Matteo Collina ", "license": "ISC", "devDependencies": { "binary-split": "^1.0.3", "callback-stream": "^1.1.0", "fastbench": "^1.0.0", "pre-commit": "^1.1.2", "safe-buffer": "^5.1.1", "standard": "^12.0.0", "tap": "^12.0.0" }, "dependencies": { "readable-stream": "^3.0.0" }, "greenkeeper": { "ignore": [ "tap" ] } } split2-3.1.1/test.js000066400000000000000000000156611344327177600142750ustar00rootroot00000000000000'use strict' var test = require('tap').test var split = require('./') var callback = require('callback-stream') var Buffer = require('safe-buffer').Buffer var strcb = callback.bind(null, { decodeStrings: false }) var objcb = callback.bind(null, { objectMode: true }) test('split two lines on end', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello\nworld') }) test('split two lines on two writes', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.write('hello') input.write('\nworld') input.end() }) test('split four lines on three writes', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world', 'bye', 'world']) })) input.write('hello\nwor') input.write('ld\nbye\nwo') input.write('rld') input.end() }) test('accumulate multiple writes', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['helloworld']) })) input.write('hello') input.write('world') input.end() }) test('split using a custom string matcher', function (t) { t.plan(2) var input = split('~') input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello~world') }) test('split using a custom regexp matcher', function (t) { t.plan(2) var input = split(/~/) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello~world') }) test('support an option argument', function (t) { t.plan(2) var input = split({ highWaterMark: 2 }) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello\nworld') }) test('support a mapper function', function (t) { t.plan(2) var a = { a: '42' } var b = { b: '24' } var input = split(JSON.parse) input.pipe(objcb(function (err, list) { t.error(err) t.deepEqual(list, [a, b]) })) input.write(JSON.stringify(a)) input.write('\n') input.end(JSON.stringify(b)) }) test('split lines windows-style', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello\r\nworld') }) test('splits a buffer', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end(Buffer.from('hello\nworld')) }) test('do not end on undefined', function (t) { t.plan(2) var input = split(function (line) {}) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, []) })) input.end(Buffer.from('hello\nworld')) }) test('has destroy method', function (t) { t.plan(1) var input = split(function (line) {}) input.on('close', function () { t.ok(true, 'close emitted') t.end() }) input.destroy() }) test('support custom matcher and mapper', function (t) { t.plan(4) var a = { a: '42' } var b = { b: '24' } var input = split('~', JSON.parse) t.equal(input.matcher, '~') t.equal(typeof input.mapper, 'function') input.pipe(objcb(function (err, list) { t.notOk(err, 'no errors') t.deepEqual(list, [a, b]) })) input.write(JSON.stringify(a)) input.write('~') input.end(JSON.stringify(b)) }) test('support custom matcher and options', function (t) { t.plan(6) var input = split('~', { highWaterMark: 1024 }) t.equal(input.matcher, '~') t.equal(typeof input.mapper, 'function') t.equal(input._readableState.highWaterMark, 1024) t.equal(input._writableState.highWaterMark, 1024) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello~world') }) test('support mapper and options', function (t) { t.plan(6) var a = { a: '42' } var b = { b: '24' } var input = split(JSON.parse, { highWaterMark: 1024 }) t.ok(input.matcher instanceof RegExp, 'matcher is RegExp') t.equal(typeof input.mapper, 'function') t.equal(input._readableState.highWaterMark, 1024) t.equal(input._writableState.highWaterMark, 1024) input.pipe(objcb(function (err, list) { t.error(err) t.deepEqual(list, [a, b]) })) input.write(JSON.stringify(a)) input.write('\n') input.end(JSON.stringify(b)) }) test('split utf8 chars', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['烫烫烫', '锟斤拷']) })) var buf = Buffer.from('烫烫烫\r\n锟斤拷', 'utf8') for (var i = 0; i < buf.length; ++i) { input.write(buf.slice(i, i + 1)) } input.end() }) test('split utf8 chars 2by2', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['烫烫烫', '烫烫烫']) })) var str = '烫烫烫\r\n烫烫烫' var buf = Buffer.from(str, 'utf8') for (var i = 0; i < buf.length; i += 2) { input.write(buf.slice(i, i + 2)) } input.end() }) test('split lines when the \n comes at the end of a chunk', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.write('hello\n') input.end('world') }) test('truncated utf-8 char', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['烫' + Buffer.from('e7', 'hex').toString()]) })) var str = '烫烫' var buf = Buffer.from(str, 'utf8') input.write(buf.slice(0, 3)) input.end(buf.slice(3, 4)) }) test('maximum buffer limit', function (t) { t.plan(1) var input = split({ maxLength: 2 }) input.pipe(strcb(function (err, list) { t.ok(err) })) input.write('hey') }) test('readable highWaterMark', function (t) { var input = split() t.equal(input._readableState.highWaterMark, 16) t.end() }) test('maxLength < chunk size', function (t) { t.plan(2) var input = split({ maxLength: 2 }) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['a', 'b']) })) input.end('a\nb') }) test('maximum buffer limit w/skip', function (t) { t.plan(2) var input = split({ maxLength: 2, skipOverflow: true }) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['a', 'b', 'c']) })) input.write('a\n123') input.write('456') input.write('789\nb\nc') input.end() }) test("don't modify the options object", function (t) { t.plan(2) var options = {} var input = split(options) input.pipe(strcb(function (err, list) { t.error(err) t.same(options, {}) })) input.end() })