From 9ad5796bb371e045c8459a04cdf4b6a93f1eba7a Mon Sep 17 00:00:00 2001 From: Trevor Harwell Date: Wed, 25 Apr 2018 18:48:48 -0400 Subject: [PATCH] Add support to stream parsing node style --- .eslintrc.js | 2 +- papaparse.js | 53 ++++++++++++++++++++++++++++++++++++++++++--- tests/node-tests.js | 30 +++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 4 deletions(-) diff --git a/.eslintrc.js b/.eslintrc.js index f63bc09..3085f72 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -42,7 +42,7 @@ module.exports = { "consistent-this": "off", "curly": "off", "default-case": "error", - "dot-location": "error", + "dot-location": "off", "dot-notation": "error", "eol-last": "error", "eqeqeq": "error", diff --git a/papaparse.js b/papaparse.js index b7fa8db..75afb25 100755 --- a/papaparse.js +++ b/papaparse.js @@ -17,14 +17,14 @@ // Node. Does not work with strict CommonJS, but // only CommonJS-like environments that support module.exports, // like Node. - module.exports = factory(); + module.exports = factory(require('stream')); } else { // Browser globals (root is window) root.Papa = factory(); } -}(this, function() +}(this, function(streamModule) { 'use strict'; @@ -71,6 +71,7 @@ Papa.FileStreamer = FileStreamer; Papa.StringStreamer = StringStreamer; Papa.ReadableStreamStreamer = ReadableStreamStreamer; + Papa.createDuplexStream = createDuplexStream; if (global.jQuery) { @@ -228,7 +229,13 @@ } var streamer = null; - if (typeof _input === 'string') + if (_input === null && typeof streamModule !== 'undefined') + { + // create a node Duplex stream for use + // with .pipe + return createDuplexStream(_config); + } + else if (typeof _input === 'string') { if (_config.download) streamer = new NetworkStreamer(_config); @@ -838,6 +845,46 @@ ReadableStreamStreamer.prototype = Object.create(ChunkStreamer.prototype); ReadableStreamStreamer.prototype.constructor = ReadableStreamStreamer; + function createDuplexStream(_config) { + var config = copy(_config); + config.step = function(results) { + results.data.forEach(function(item) { + duplexStream.push(item); + }); + }; + config.complete = function() { + duplexStream.push(null); + }; + + var chunkStreamer = new ChunkStreamer(config); + + chunkStreamer._nextChunk = function() { + // empty function since this + // logic is handled by the Duplex class + }; + + // streamModule from node must exist + // for this to run + var duplexStream = new streamModule.Duplex({ + readableObjectMode: true, + decodeStrings: false, + read: function(size) { + // since pausing controls the input into the parser + // we do not need to re-trigger the parser to continue + }, + write: function(chunk, encoding, callback) { + chunkStreamer.parseChunk(typeof chunk === 'string' ? chunk : chunk.toString(config.encoding)); + callback(); + }, + 'final': function(callback) { + chunkStreamer._finished = true; + chunkStreamer.parseChunk(''); + callback(); + } + }); + + return duplexStream; + } // Use one ParserHandle per entire CSV file or string function ParserHandle(_config) diff --git a/tests/node-tests.js b/tests/node-tests.js index 44752ff..574f821 100644 --- a/tests/node-tests.js +++ b/tests/node-tests.js @@ -59,6 +59,36 @@ describe('PapaParse', function() { }); }); + it('piped streaming CSV should be correctly parsed', function(done) { + var data = []; + fs.createReadStream(__dirname + '/long-sample.csv', 'utf8') + .pipe(Papa.parse(null)) + .on('data', function(item) { + data.push(item); + }) + .on('end', function() { + assert.deepEqual(data[0], [ + 'Grant', + 'Dyer', + 'Donec.elementum@orciluctuset.example', + '2013-11-23T02:30:31-08:00', + '2014-05-31T01:06:56-07:00', + 'Magna Ut Associates', + 'ljenkins' + ]); + assert.deepEqual(data[7], [ + 'Talon', + 'Salinas', + 'posuere.vulputate.lacus@Donecsollicitudin.example', + '2015-01-31T09:19:02-08:00', + '2014-12-17T04:59:18-08:00', + 'Aliquam Iaculis Incorporate', + 'Phasellus@Quisquetincidunt.example' + ]); + done(); + }); + }); + it('should support pausing and resuming on same tick when streaming', function(done) { var rows = []; Papa.parse(fs.createReadStream(__dirname + '/long-sample.csv', 'utf8'), {