node-split-1.0.0/000077500000000000000000000000001302660632200135665ustar00rootroot00000000000000node-split-1.0.0/.npmignore000066400000000000000000000000521302660632200155620ustar00rootroot00000000000000node_modules node_modules/* npm_debug.log node-split-1.0.0/.travis.yml000066400000000000000000000000461302660632200156770ustar00rootroot00000000000000language: node_js node_js: - "0.10" node-split-1.0.0/LICENCE000066400000000000000000000020561302660632200145560ustar00rootroot00000000000000Copyright (c) 2011 Dominic Tarr Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.node-split-1.0.0/examples/000077500000000000000000000000001302660632200154045ustar00rootroot00000000000000node-split-1.0.0/examples/pretty.js000066400000000000000000000015051302660632200172720ustar00rootroot00000000000000 var inspect = require('util').inspect var es = require('event-stream') //load event-stream var split = require('../') if(!module.parent) { es.pipe( //pipe joins streams together process.openStdin(), //open stdin split(), //split stream to break on newlines es.map(function (data, callback) {//turn this async function into a stream var j try { j = JSON.parse(data) //try to parse input into json } catch (err) { return callback(null, data) //if it fails just pass it anyway } callback(null, inspect(j)) //render it nicely }), process.stdout // pipe it to stdout ! ) } // run this // // curl -sS registry.npmjs.org/event-stream | node pretty.js // node-split-1.0.0/index.js000066400000000000000000000030741302660632200152370ustar00rootroot00000000000000//filter will reemit the data if cb(err,pass) pass is truthy // reduce is more tricky // maybe we want to group the reductions or emit progress updates occasionally // the most basic reduce just emits one 'data' event after it has recieved 'end' var through = require('through') var Decoder = require('string_decoder').StringDecoder module.exports = split //TODO pass in a function to map across the lines. function split (matcher, mapper, options) { var decoder = new Decoder() var soFar = '' var maxLength = options && options.maxLength; var trailing = options && options.trailing === false ? false : true if('function' === typeof matcher) mapper = matcher, matcher = null if (!matcher) matcher = /\r?\n/ function emit(stream, piece) { if(mapper) { try { piece = mapper(piece) } catch (err) { return stream.emit('error', err) } if('undefined' !== typeof piece) stream.queue(piece) } else stream.queue(piece) } function next (stream, buffer) { var pieces = ((soFar != null ? soFar : '') + buffer).split(matcher) soFar = pieces.pop() if (maxLength && soFar.length > maxLength) stream.emit('error', new Error('maximum buffer reached')) for (var i = 0; i < pieces.length; i++) { var piece = pieces[i] emit(stream, piece) } } return through(function (b) { next(this, decoder.write(b)) }, function () { if(decoder.end) next(this, decoder.end()) if(trailing && soFar != null) emit(this, soFar) this.queue(null) }) } node-split-1.0.0/package.json000066400000000000000000000012541302660632200160560ustar00rootroot00000000000000{ "name": "split", "version": "1.0.0", "license": "MIT", "description": "split a Text Stream into a Line Stream", "homepage": "http://github.com/dominictarr/split", "repository": { "type": "git", "url": "git://github.com/dominictarr/split.git" }, "dependencies": { "through": "2" }, "devDependencies": { "asynct": "*", "event-stream": "~3.0.2", "it-is": "1", "stream-spec": "~0.2", "ubelt": "~2.9", "string-to-stream": "~1.0.0" }, "scripts": { "test": "asynct test/" }, "author": "Dominic Tarr (http://bit.ly/dominictarr)", "optionalDependencies": {}, "engines": { "node": "*" } } node-split-1.0.0/readme.markdown000066400000000000000000000033651302660632200165760ustar00rootroot00000000000000# Split (matcher) [![build status](https://secure.travis-ci.org/dominictarr/split.png)](http://travis-ci.org/dominictarr/split) Break up a stream and reassemble it so that each line is a chunk. matcher may be a `String`, or a `RegExp` Example, read every line in a file ... ``` js fs.createReadStream(file) .pipe(split()) .on('data', function (line) { //each chunk now is a seperate line! }) ``` `split` takes the same arguments as `string.split` except it defaults to '/\r?\n/' instead of ',', and the optional `limit` paremeter is ignored. [String#split](https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/String/split) `split` takes an optional options object on it's third argument. ``` js split(matcher, mapper, options) ``` Valid options: * maxLength - The maximum buffer length without seeing a newline or `matcher`, if a single line exceeds this, the split stream will emit an error. ``` js split(JSON.parse, null, { maxLength: 2}) ``` * trailing - By default the last buffer not delimited by a newline or `matcher` will be emitted. To prevent this set `options.trailing` to `false`. ``` js split(JSON.parse, null, { trailing: false }) ``` ## keep matched splitter As with `Array#split`, if you split by a regular expression with a matching group, the matches will be retained in the collection. ``` stdin .pipe(split(/(\r?\n)/)) ... //lines + separators. ``` # NDJ - Newline Delimited Json `split` accepts a function which transforms each line. ``` js fs.createReadStream(file) .pipe(split(JSON.parse)) .on('data', function (obj) { //each chunk now is a a js object }) .on('error', function (err) { //syntax errors will land here //note, this ends the stream. }) ``` # License MIT node-split-1.0.0/test/000077500000000000000000000000001302660632200145455ustar00rootroot00000000000000node-split-1.0.0/test/options.asynct.js000066400000000000000000000015231302660632200200770ustar00rootroot00000000000000var it = require('it-is').style('colour') , split = require('..') exports ['maximum buffer limit'] = function (test) { var s = split(JSON.parse, null, { maxLength: 2 }) , caughtError = false , rows = [] s.on('error', function (err) { caughtError = true }) s.on('data', function (row) { rows.push(row) }) s.write('{"a":1}\n{"') s.write('{ "') it(caughtError).equal(true) s.end() test.done() } exports ['ignore trailing buffers'] = function (test) { var s = split(JSON.parse, null, { trailing: false }) , caughtError = false , rows = [] s.on('error', function (err) { caughtError = true }) s.on('data', function (row) { rows.push(row) }) s.write('{"a":1}\n{"') s.write('{ "') s.end() it(caughtError).equal(false) it(rows).deepEqual([ { a: 1 } ]) test.done() } node-split-1.0.0/test/partitioned_unicode.js000066400000000000000000000013471302660632200211400ustar00rootroot00000000000000var it = require('it-is').style('colour') , split = require('..') exports ['split data with partitioned unicode character'] = function (test) { var s = split(/,/g) , caughtError = false , rows = [] s.on('error', function (err) { caughtError = true }) s.on('data', function (row) { rows.push(row) }) var x = 'テスト試験今日とても,よい天気で' unicodeData = new Buffer(x); // partition of 日 piece1 = unicodeData.slice(0, 20); piece2 = unicodeData.slice(20, unicodeData.length); s.write(piece1); s.write(piece2); s.end() it(caughtError).equal(false) it(rows).deepEqual(['テスト試験今日とても', 'よい天気で']); it(rows).deepEqual(x.split(',')) test.done() } node-split-1.0.0/test/split.asynct.js000066400000000000000000000067141302660632200175460ustar00rootroot00000000000000var es = require('event-stream') , it = require('it-is').style('colour') , d = require('ubelt') , split = require('..') , join = require('path').join , fs = require('fs') , Stream = require('stream').Stream , Readable = require('stream').Readable , spec = require('stream-spec') , through = require('through') , stringStream = require('string-to-stream') exports ['split() works like String#split'] = function (test) { var readme = join(__filename) , expected = fs.readFileSync(readme, 'utf-8').split('\n') , cs = split() , actual = [] , ended = false , x = spec(cs).through() var a = new Stream () a.write = function (l) { actual.push(l.trim()) } a.end = function () { ended = true expected.forEach(function (v,k) { //String.split will append an empty string '' //if the string ends in a split pattern. //es.split doesn't which was breaking this test. //clearly, appending the empty string is correct. //tests are passing though. which is the current job. if(v) it(actual[k]).like(v) }) //give the stream time to close process.nextTick(function () { test.done() x.validate() }) } a.writable = true fs.createReadStream(readme, {flags: 'r'}).pipe(cs) cs.pipe(a) } exports ['split() takes mapper function'] = function (test) { var readme = join(__filename) , expected = fs.readFileSync(readme, 'utf-8').split('\n') , cs = split(function (line) { return line.toUpperCase() }) , actual = [] , ended = false , x = spec(cs).through() var a = new Stream () a.write = function (l) { actual.push(l.trim()) } a.end = function () { ended = true expected.forEach(function (v,k) { //String.split will append an empty string '' //if the string ends in a split pattern. //es.split doesn't which was breaking this test. //clearly, appending the empty string is correct. //tests are passing though. which is the current job. if(v) it(actual[k]).equal(v.trim().toUpperCase()) }) //give the stream time to close process.nextTick(function () { test.done() x.validate() }) } a.writable = true fs.createReadStream(readme, {flags: 'r'}).pipe(cs) cs.pipe(a) } exports ['split() works with empty string chunks'] = function (test) { var str = ' foo' , expected = str.split(/[\s]*/).reduce(splitBy(/[\s]*/), []) , cs1 = split(/[\s]*/) , cs2 = split(/[\s]*/) , actual = [] , ended = false , x = spec(cs1).through() , y = spec(cs2).through() var a = new Stream () a.write = function (l) { actual.push(l.trim()) } a.end = function () { ended = true expected.forEach(function (v,k) { //String.split will append an empty string '' //if the string ends in a split pattern. //es.split doesn't which was breaking this test. //clearly, appending the empty string is correct. //tests are passing though. which is the current job. if(v) it(actual[k]).like(v) }) //give the stream time to close process.nextTick(function () { test.done() x.validate() y.validate() }) } a.writable = true cs1.pipe(cs2) cs2.pipe(a) cs1.write(str) cs1.end() } function splitBy (delimeter) { return function (arr, piece) { return arr.concat(piece.split(delimeter)) } }node-split-1.0.0/test/try_catch.asynct.js000066400000000000000000000021211302660632200203570ustar00rootroot00000000000000var it = require('it-is').style('colour') , split = require('..') exports ['emit mapper exceptions as error events'] = function (test) { var s = split(JSON.parse) , caughtError = false , rows = [] s.on('error', function (err) { caughtError = true }) s.on('data', function (row) { rows.push(row) }) s.write('{"a":1}\n{"') it(caughtError).equal(false) it(rows).deepEqual([ { a: 1 } ]) s.write('b":2}\n{"c":}\n') it(caughtError).equal(true) it(rows).deepEqual([ { a: 1 }, { b: 2 } ]) s.end() test.done() } exports ['mapper error events on trailing chunks'] = function (test) { var s = split(JSON.parse) , caughtError = false , rows = [] s.on('error', function (err) { caughtError = true }) s.on('data', function (row) { rows.push(row) }) s.write('{"a":1}\n{"') it(caughtError).equal(false) it(rows).deepEqual([ { a: 1 } ]) s.write('b":2}\n{"c":}') it(caughtError).equal(false) it(rows).deepEqual([ { a: 1 }, { b: 2 } ]) s.end() it(caughtError).equal(true) it(rows).deepEqual([ { a: 1 }, { b: 2 } ]) test.done() }