From ae73d2a96639beec58a83326de6bd8e8ca0c02b3 Mon Sep 17 00:00:00 2001 From: Duc Tri Le Date: Wed, 20 Nov 2019 03:00:52 -0500 Subject: [PATCH] Use chunk size to determine the processed length Fixes #736 #743 --- papaparse.js | 8 ++- tests/test-cases.js | 102 ++++++++++++++++++++++++++++++++++++++ tests/verylong-sample.csv | 4 +- 3 files changed, 110 insertions(+), 4 deletions(-) diff --git a/papaparse.js b/papaparse.js index 15d281b..df5c66a 100755 --- a/papaparse.js +++ b/papaparse.js @@ -682,7 +682,8 @@ License: MIT return; } - this._start += xhr.responseText.length; + // Use chunckSize as it may be a diference on reponse lentgh due to characters with more than 1 byte + this._start += this._config.chunkSize ? this._config.chunkSize : xhr.responseText.length; this._finished = !this._config.chunkSize || this._start >= getFileSize(xhr); this.parseChunk(xhr.responseText); }; @@ -1100,7 +1101,10 @@ License: MIT { _paused = true; _parser.abort(); - _input = _input.substring(_parser.getCharIndex()); + + // If it is streaming via "chunking", the reader will start appending correctly already so no need to substring, + // otherwise we can get duplicate content within a row + _input = isFunction(_config.chunk) ? "" : _input.substring(_parser.getCharIndex()); }; this.resume = function() diff --git a/tests/test-cases.js b/tests/test-cases.js index bc8e2f7..4f6550a 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -1897,6 +1897,108 @@ var CUSTOM_TESTS = [ }); } }, + { + description: "Pause and resume works for chunks with NetworkStreamer", + disabled: !XHR_ENABLED, + timeout: 30000, + expected: ["Etiam a dolor vitae est vestibulum", "84", "DEF"], + run: function(callback) { + var chunkNum = 0; + Papa.parse(BASE_PATH + "verylong-sample.csv", { + download: true, + chunkSize: 1000, + chunk: function(results, parser) { + chunkNum++; + parser.pause(); + + if (chunkNum === 2) { + callback(results.data[0]); + return; + } + + parser.resume(); + }, + complete: function() { + callback(new Error("Should have found matched row before parsing whole file")); + } + }); + } + }, + { + description: "Pause and resume works for chunks with FileStreamer", + disabled: !XHR_ENABLED, + timeout: 30000, + expected: ["Etiam a dolor vitae est vestibulum", "84", "DEF"], + run: function(callback) { + var chunkNum = 0; + var xhr = new XMLHttpRequest(); + xhr.onload = function() { + Papa.parse(new File([xhr.responseText], './verylong-sample.csv'), { + chunkSize: 1000, + chunk: function(results, parser) { + chunkNum++; + parser.pause(); + + if (chunkNum === 2) { + callback(results.data[0]); + return; + } + + parser.resume(); + }, + complete: function() { + callback(new Error("Should have found matched row before parsing whole file")); + } + }); + }; + + xhr.open("GET", BASE_PATH + "verylong-sample.csv"); + try { + xhr.send(); + } catch (err) { + callback(err); + return; + } + } + }, + { + description: "Pause and resume works for chunks with StringStreamer", + disabled: !XHR_ENABLED, + timeout: 30000, + // Test also with string as byte size may be diferent + expected: ["Etiam a dolor vitae est vestibulum", "84", "DEF"], + run: function(callback) { + var chunkNum = 0; + var xhr = new XMLHttpRequest(); + xhr.onload = function() { + Papa.parse(xhr.responseText, { + chunkSize: 1000, + chunk: function(results, parser) { + chunkNum++; + parser.pause(); + + if (chunkNum === 2) { + callback(results.data[0]); + return; + } + + parser.resume(); + }, + complete: function() { + callback(new Error("Should have found matched row before parsing whole file")); + } + }); + }; + + xhr.open("GET", BASE_PATH + "verylong-sample.csv"); + try { + xhr.send(); + } catch (err) { + callback(err); + return; + } + } + }, { description: "Complete is called with all results if neither step nor chunk is defined", expected: [['A', 'b', 'c'], ['d', 'E', 'f'], ['G', 'h', 'i']], diff --git a/tests/verylong-sample.csv b/tests/verylong-sample.csv index ad70393..14bc527 100644 --- a/tests/verylong-sample.csv +++ b/tests/verylong-sample.csv @@ -1,7 +1,7 @@ placeholder,meaning of life,TLD Lorem ipsum dolor sit,42,ABC Etiam a dolor vitae est vestibulum,84,DEF -Lorem ipsum dolor sit,42,ABC +"Lorem ipsum dolor sit",42,ABC Etiam a dolor vitae est vestibulum,84,DEF Etiam a dolor vitae est vestibulum,84,DEF Lorem ipsum dolor sit,42,ABC @@ -1998,4 +1998,4 @@ Lorem ipsum dolor sit,42,ABC Lorem ipsum dolor sit,42,ABC Etiam a dolor vitae est vestibulum,84,DEF Lorem ipsum dolor sit,42 -Lorem ipsum dolor sit,42,ABC \ No newline at end of file +Lorem ipsum dolor sit,42,ABC