pax_global_header00006660000000000000000000000064131621442300014506gustar00rootroot0000000000000052 comment=ba1f582661a1ebb3b1f6ea4059c98bd9a73b7a11 split2-2.2.0/000077500000000000000000000000001316214423000127245ustar00rootroot00000000000000split2-2.2.0/.gitignore000066400000000000000000000001141316214423000147100ustar00rootroot00000000000000node_modules/ build/ libleveldb.so libleveldb.a test-data/ _benchdb_* *.sw* split2-2.2.0/.travis.yml000066400000000000000000000001451316214423000150350ustar00rootroot00000000000000language: node_js sudo: false node_js: - "0.10" - "0.12" - "4" - "5" - "6" - "7" - "8" split2-2.2.0/LICENSE000066400000000000000000000013741316214423000137360ustar00rootroot00000000000000Copyright (c) 2014-2017, Matteo Collina Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. split2-2.2.0/README.md000066400000000000000000000056351316214423000142140ustar00rootroot00000000000000# Split2(matcher, mapper, options) [![build status](https://secure.travis-ci.org/mcollina/split2.svg)](http://travis-ci.org/mcollina/split2) Break up a stream and reassemble it so that each line is a chunk. `split2` is inspired by [@dominictarr](https://github.com/dominictarr) [`split`](https://github.com/dominictarr/split) module, and it is totally API compatible with it. However, it is based on [`through2`](https://github.com/rvagg/through2) by [@rvagg](https://github.com/rvagg) and it is fully based on Stream3. `matcher` may be a `String`, or a `RegExp`. Example, read every line in a file ... ``` js fs.createReadStream(file) .pipe(split2()) .on('data', function (line) { //each chunk now is a separate line! }) ``` `split` takes the same arguments as `string.split` except it defaults to '/\r?\n/', and the optional `limit` paremeter is ignored. [String#split](https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/String/split) `split` takes an optional options object on it's third argument, which is directly passed as a [Transform](http://nodejs.org/api/stream.html#stream_class_stream_transform_1) option. Additionally, the `.maxLength` option is implemented, which will make the split stream throw an error if the buffer size exceeds `.maxLength`. Calling `.destroy` will make the stream emit `close`. Use this to perform cleanup logic ``` js var splitFile = function(filename) { var file = fs.createReadStream(filename) return file .pipe(split2()) .on('close', function() { // destroy the file stream in case the split stream was destroyed file.destroy() }) } var stream = splitFile('my-file.txt') stream.destroy() // will destroy the input file stream ``` # NDJ - Newline Delimited Json `split2` accepts a function which transforms each line. ``` js fs.createReadStream(file) .pipe(split2(JSON.parse)) .on('data', function (obj) { //each chunk now is a js object }) ``` However, in [@dominictarr](https://github.com/dominictarr) [`split`](https://github.com/dominictarr/split) the mapper is wrapped in a try-catch, while here it is not: if your parsing logic can throw, wrap it yourself. # License Copyright (c) 2014-2017, Matteo Collina Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. split2-2.2.0/bench.js000066400000000000000000000004141316214423000143400ustar00rootroot00000000000000'use strict' var split = require('./') var bench = require('fastbench') var fs = require('fs') function benchSplit (cb) { fs.createReadStream('package.json') .pipe(split()) .on('end', cb) .resume() } var run = bench([ benchSplit ], 10000) run(run) split2-2.2.0/index.js000066400000000000000000000056171316214423000144020ustar00rootroot00000000000000/* Copyright (c) 2014-2016, Matteo Collina Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 'use strict' var through = require('through2') var StringDecoder = require('string_decoder').StringDecoder function transform (chunk, enc, cb) { this._last += this._decoder.write(chunk) if (this._last.length > this.maxLength) { return cb(new Error('maximum buffer reached')) } var list = this._last.split(this.matcher) this._last = list.pop() for (var i = 0; i < list.length; i++) { push(this, this.mapper(list[i])) } cb() } function flush (cb) { // forward any gibberish left in there this._last += this._decoder.end() if (this._last) { push(this, this.mapper(this._last)) } cb() } function push (self, val) { if (val !== undefined) { self.push(val) } } function noop (incoming) { return incoming } function split (matcher, mapper, options) { // Set defaults for any arguments not supplied. matcher = matcher || /\r?\n/ mapper = mapper || noop options = options || {} // Test arguments explicitly. switch (arguments.length) { case 1: // If mapper is only argument. if (typeof matcher === 'function') { mapper = matcher matcher = /\r?\n/ // If options is only argument. } else if (typeof matcher === 'object' && !(matcher instanceof RegExp)) { options = matcher matcher = /\r?\n/ } break case 2: // If mapper and options are arguments. if (typeof matcher === 'function') { options = mapper mapper = matcher matcher = /\r?\n/ // If matcher and options are arguments. } else if (typeof mapper === 'object') { options = mapper mapper = noop } } var stream = through(options, transform, flush) // this stream is in objectMode only in the readable part stream._readableState.objectMode = true // objectMode default hwm is 16 and not 16384 if (stream._readableState.highWaterMark && !options.highWaterMark) { stream._readableState.highWaterMark = 16 } stream._last = '' stream._decoder = new StringDecoder('utf8') stream.matcher = matcher stream.mapper = mapper stream.maxLength = options.maxLength return stream } module.exports = split split2-2.2.0/package.json000066400000000000000000000014041316214423000152110ustar00rootroot00000000000000{ "name": "split2", "version": "2.2.0", "description": "split a Text Stream into a Line Stream, using Stream 3", "main": "index.js", "scripts": { "test": "standard && tap -b test.js" }, "pre-commit": [ "test" ], "website": "https://github.com/mcollina/split2", "repository": { "type": "git", "url": "https://github.com/mcollina/split2.git" }, "bugs": { "url": "http://github.com/mcollina/split2/issues" }, "author": "Matteo Collina ", "license": "ISC", "devDependencies": { "callback-stream": "^1.1.0", "fastbench": "^1.0.0", "pre-commit": "^1.1.2", "safe-buffer": "^5.1.1", "standard": "^10.0.0", "tap": "^10.0.0" }, "dependencies": { "through2": "^2.0.2" } } split2-2.2.0/test.js000066400000000000000000000135741316214423000142530ustar00rootroot00000000000000'use strict' var test = require('tap').test var split = require('./') var callback = require('callback-stream') var Buffer = require('safe-buffer').Buffer var strcb = callback.bind(null, { decodeStrings: false }) var objcb = callback.bind(null, { objectMode: true }) test('split two lines on end', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello\nworld') }) test('split two lines on two writes', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.write('hello') input.write('\nworld') input.end() }) test('accumulate multiple writes', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['helloworld']) })) input.write('hello') input.write('world') input.end() }) test('split using a custom string matcher', function (t) { t.plan(2) var input = split('~') input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello~world') }) test('split using a custom regexp matcher', function (t) { t.plan(2) var input = split(/~/) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello~world') }) test('support an option argument', function (t) { t.plan(2) var input = split({ highWaterMark: 2 }) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello\nworld') }) test('support a mapper function', function (t) { t.plan(2) var a = { a: '42' } var b = { b: '24' } var input = split(JSON.parse) input.pipe(objcb(function (err, list) { t.error(err) t.deepEqual(list, [a, b]) })) input.write(JSON.stringify(a)) input.write('\n') input.end(JSON.stringify(b)) }) test('split lines windows-style', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello\r\nworld') }) test('splits a buffer', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end(Buffer.from('hello\nworld')) }) test('do not end on undefined', function (t) { t.plan(2) var input = split(function (line) {}) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, []) })) input.end(Buffer.from('hello\nworld')) }) test('has destroy method', function (t) { t.plan(1) var input = split(function (line) {}) input.on('close', function () { t.ok(true, 'close emitted') t.end() }) input.destroy() }) test('support custom matcher and mapper', function (t) { t.plan(4) var a = { a: '42' } var b = { b: '24' } var input = split('~', JSON.parse) t.equal(input.matcher, '~') t.equal(typeof input.mapper, 'function') input.pipe(objcb(function (err, list) { t.notOk(err, 'no errors') t.deepEqual(list, [a, b]) })) input.write(JSON.stringify(a)) input.write('~') input.end(JSON.stringify(b)) }) test('support custom matcher and options', function (t) { t.plan(6) var input = split('~', { highWaterMark: 1024 }) t.equal(input.matcher, '~') t.equal(typeof input.mapper, 'function') t.equal(input._readableState.highWaterMark, 1024) t.equal(input._writableState.highWaterMark, 1024) input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.end('hello~world') }) test('support mapper and options', function (t) { t.plan(6) var a = { a: '42' } var b = { b: '24' } var input = split(JSON.parse, { highWaterMark: 1024 }) t.ok(input.matcher instanceof RegExp, 'matcher is RegExp') t.equal(typeof input.mapper, 'function') t.equal(input._readableState.highWaterMark, 1024) t.equal(input._writableState.highWaterMark, 1024) input.pipe(objcb(function (err, list) { t.error(err) t.deepEqual(list, [a, b]) })) input.write(JSON.stringify(a)) input.write('\n') input.end(JSON.stringify(b)) }) test('split utf8 chars', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['烫烫烫', '锟斤拷']) })) var buf = Buffer.from('烫烫烫\r\n锟斤拷', 'utf8') for (var i = 0; i < buf.length; ++i) { input.write(buf.slice(i, i + 1)) } input.end() }) test('split utf8 chars 2by2', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['烫烫烫', '烫烫烫']) })) var str = '烫烫烫\r\n烫烫烫' var buf = Buffer.from(str, 'utf8') for (var i = 0; i < buf.length; i += 2) { input.write(buf.slice(i, i + 2)) } input.end() }) test('split lines when the \n comes at the end of a chunk', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['hello', 'world']) })) input.write('hello\n') input.end('world') }) test('truncated utf-8 char', function (t) { t.plan(2) var input = split() input.pipe(strcb(function (err, list) { t.error(err) t.deepEqual(list, ['烫' + Buffer.from('e7', 'hex').toString()]) })) var str = '烫烫' var buf = Buffer.from(str, 'utf8') input.write(buf.slice(0, 3)) input.end(buf.slice(3, 4)) }) test('maximum buffer limit', function (t) { t.plan(1) var input = split({ maxLength: 2 }) input.pipe(strcb(function (err, list) { t.ok(err) })) input.write('hey') }) test('readable highWaterMark', function (t) { var input = split() t.equal(input._readableState.highWaterMark, 16) t.end() })