From be2c65385c31dc1f475710e900c92f07779e55c1 Mon Sep 17 00:00:00 2001 From: Braden Anderson Date: Mon, 1 Dec 2014 17:27:59 -0700 Subject: [PATCH] provide current cursor to step callback --- papaparse.js | 54 +++++++++++------- tests/long-sample.csv | 8 +++ tests/test-cases.js | 127 ++++++++++++++++++++++++++++++++++++++++++ tests/test-runner.js | 69 ++++++++++++++++++++++- tests/tests.html | 25 ++++++++- 5 files changed, 262 insertions(+), 21 deletions(-) create mode 100644 tests/long-sample.csv diff --git a/papaparse.js b/papaparse.js index e766ada..a04021d 100644 --- a/papaparse.js +++ b/papaparse.js @@ -412,9 +412,10 @@ if (!config.chunkSize) config.chunkSize = Papa.RemoteChunkSize; - var start = 0, fileSize = 0, rowCount = 0; + var start = 0, baseIndex = 0, fileSize = 0, rowCount = 0; var aggregate = ""; var partialLine = ""; + var self = this; var xhr, url, nextChunk, finishedWithEntireFile; var userComplete, handle, configCopy; replaceConfig(config); @@ -488,6 +489,7 @@ if (fileSize && end > fileSize) // Hack around a Chrome bug: http://stackoverflow.com/q/24745095/1048862 end = fileSize; xhr.setRequestHeader("Range", "bytes="+start+"-"+end); + xhr.setRequestHeader("If-None-Match", "webkit-no-cache"); // https://bugs.webkit.org/show_bug.cgi?id=82672 } try { @@ -520,9 +522,10 @@ finishedWithEntireFile = (!config.step && !config.chunk) || start > getFileSize(xhr); + var lastLineEnd; if (!finishedWithEntireFile) { - var lastLineEnd = aggregate.lastIndexOf("\r"); + lastLineEnd = aggregate.lastIndexOf("\r"); if (lastLineEnd == -1) lastLineEnd = aggregate.lastIndexOf("\n"); @@ -541,8 +544,10 @@ } } - var results = handle.parse(aggregate); + var results = handle.parse(aggregate, baseIndex); aggregate = ""; + if (!finishedWithEntireFile) + baseIndex += lastLineEnd + 1; if (results && results.data) rowCount += results.data.length; @@ -601,7 +606,7 @@ configCopy.complete = undefined; configCopy.chunkSize = parseInt(configCopy.chunkSize); // VERY important so we don't concatenate strings! handle = new ParserHandle(configCopy); - handle.streamer = this; + handle.streamer = self; } } @@ -619,7 +624,7 @@ if (!config.chunkSize) config.chunkSize = Papa.LocalChunkSize; - var start = 0; + var start = 0, baseIndex = 0; var file; var slice; var aggregate = ""; @@ -700,9 +705,10 @@ finishedWithEntireFile = start >= file.size; + var lastLineEnd; if (!finishedWithEntireFile) { - var lastLineEnd = aggregate.lastIndexOf("\r"); // TODO: Use an auto-detected line ending? + lastLineEnd = aggregate.lastIndexOf("\r"); // TODO: Use an auto-detected line ending? if (lastLineEnd == -1) lastLineEnd = aggregate.lastIndexOf("\n"); @@ -721,8 +727,10 @@ } } - var results = handle.parse(aggregate); + var results = handle.parse(aggregate, baseIndex); aggregate = ""; + if (!finishedWithEntireFile) + baseIndex += lastLineEnd + 1; if (results && results.data) rowCount += results.data.length; @@ -777,7 +785,7 @@ configCopy.complete = undefined; configCopy.chunkSize = parseInt(configCopy.chunkSize); // VERY important so we don't concatenate strings! handle = new ParserHandle(configCopy); - handle.streamer = this; + handle.streamer = self; } } @@ -831,7 +839,7 @@ }; } - this.parse = function(input) + this.parse = function(input, baseIndex) { if (!_config.newline) _config.newline = guessLineEndings(input); @@ -856,7 +864,7 @@ _input = input; _parser = new Parser(parserConfig); - _results = _parser.parse(_input); + _results = _parser.parse(_input, baseIndex); processResults(); if (isFunction(_config.complete) && !_paused && (!self.streamer || self.streamer.finished())) _config.complete(_results); @@ -1104,7 +1112,7 @@ var cursor = 0; var aborted = false; - this.parse = function(input) + this.parse = function(input, baseIndex) { // For some reason, in Chrome, this speeds things up (!?) if (typeof input !== 'string') @@ -1120,7 +1128,7 @@ // Establish starting state cursor = 0; - var data = [], errors = [], row = []; + var data = [], errors = [], row = [], lastCursor = 0; if (!input) return returnable(); @@ -1135,13 +1143,14 @@ continue; if (stepIsFunction) { - data = [ rows[i].split(delim) ]; + data = []; + pushRow(rows[i].split(delim)); doStep(); if (aborted) return returnable(); } else - data.push(rows[i].split(delim)); + pushRow(rows[i].split(delim)); if (preview && i >= preview) { data = data.slice(0, preview); @@ -1188,7 +1197,7 @@ { // Closing quote at EOF row.push(input.substring(cursor, quoteSearch).replace(/""/g, '"')); - data.push(row); + pushRow(row); if (stepIsFunction) doStep(); return returnable(); @@ -1281,13 +1290,19 @@ return finish(); + function pushRow(row) + { + data.push(row); + lastCursor = cursor; + } + // Appends the remaining input from cursor to the end into // row, saves the row, calls step, and returns the results. function finish() { row.push(input.substr(cursor)); - data.push(row); cursor = inputLen; // important in case parsing is paused + pushRow(row); if (stepIsFunction) doStep(); return returnable(); @@ -1299,9 +1314,9 @@ // preview and end parsing if necessary. function saveRow(newCursor) { - data.push(row); - row = []; cursor = newCursor; + pushRow(row); + row = []; nextNewline = input.indexOf(newline, cursor); } @@ -1315,7 +1330,8 @@ delimiter: delim, linebreak: newline, aborted: aborted, - truncated: !!stopped + truncated: !!stopped, + cursor: lastCursor + (baseIndex || 0) } }; } diff --git a/tests/long-sample.csv b/tests/long-sample.csv new file mode 100644 index 0000000..59f0994 --- /dev/null +++ b/tests/long-sample.csv @@ -0,0 +1,8 @@ +Grant,Dyer,Donec.elementum@orciluctuset.example,2013-11-23T02:30:31-08:00,2014-05-31T01:06:56-07:00,Magna Ut Associates,ljenkins +Cherokee,Shields,Nulla.Semper.Tellus@duinec.example,2014-11-22T16:43:51-08:00,2013-09-26T11:47:15-07:00,Pede Corporation,Donec.elementum@orciluctuset.example +Catherine,Parrish,lorem@feugiatnon.example,2015-02-11T12:01:10-08:00,2015-02-26T00:29:40-08:00,Phasellus Fermentum Convallis PC,Donec.elementum@orciluctuset.example +Destiny,Shannon,libero@Aenean.example,2015-07-14T09:38:11-07:00,2014-01-11T14:53:04-08:00,Pretium Et Inc.,Donec.elementum@orciluctuset.example +Callum,Underwood,Phasellus@Quisquetincidunt.example,2013-09-13T18:49:35-07:00,2014-12-04T23:04:19-08:00,Sed Turpis Nec LLP,ljenkins +Elliott,Wright,cursus@nibh.example,2015-04-20T14:35:19-07:00,2015-03-05T12:56:46-08:00,Dolor Associate,Phasellus@Quisquetincidunt.example +Galvin,Foley,nisi.Aenean.eget@atauctorullamcorper.example,2014-03-20T23:20:15-07:00,2014-06-11T15:00:23-07:00,Adipiscing Industrie,Phasellus@Quisquetincidunt.example +Talon,Salinas,posuere.vulputate.lacus@Donecsollicitudin.example,2015-01-31T09:19:02-08:00,2014-12-17T04:59:18-08:00,Aliquam Iaculis Incorporate,Phasellus@Quisquetincidunt.example \ No newline at end of file diff --git a/tests/test-cases.js b/tests/test-cases.js index 47382f3..caf4d27 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -1000,3 +1000,130 @@ var UNPARSE_TESTS = [ expected: 'Col1,Col2,Col3\r\na,,c' } ]; + + + +var CUSTOM_TESTS = [ + { + description: "Step is called for each row", + expected: 2, + run: function(callback) { + var callCount = 0; + Papa.parse('A,b,c\nd,E,f', { + step: function() { + callCount++; + }, + complete: function() { + callback(callCount); + } + }); + } + }, + { + description: "Step is called with the contents of the row", + expected: ['A', 'b', 'c'], + run: function(callback) { + Papa.parse('A,b,c', { + step: function(response) { + callback(response.data[0]); + } + }); + } + }, + { + description: "Step is called with the last cursor position", + expected: [6, 12, 17], + run: function(callback) { + var updates = []; + Papa.parse('A,b,c\nd,E,f\nG,h,i', { + step: function(response) { + updates.push(response.meta.cursor); + }, complete: function() { + callback(updates); + } + }); + } + }, + { + description: "Step exposes cursor for files", + expected: [129, 287, 452, 595, 727, 865, 1031, 1209], + run: function(callback) { + var updates = []; + Papa.parse("/tests/long-sample.csv", { + download: true, + step: function(response) { + updates.push(response.meta.cursor); + }, complete: function() { + callback(updates); + } + }); + } + }, + { + description: "Step exposes cursor for chunked files", + // Tiny inconsistency: the last full row in each chunk will not see a newline. + expected: [129, 287, 451, 595, 727, 864, 1031, 1209], + run: function(callback) { + var updates = []; + Papa.parse("/tests/long-sample.csv", { + download: true, + chunkSize: 500, + step: function(response) { + updates.push(response.meta.cursor); + }, complete: function() { + callback(updates); + } + }); + } + }, + { + description: "Step exposes cursor for workers", + // You're only really getting chunk cursors here. + expected: [451, 451, 451, 864, 864, 864, 1209, 1209], + run: function(callback) { + var updates = []; + Papa.parse("/tests/long-sample.csv", { + download: true, + chunkSize: 500, + worker: true, + step: function(response) { + updates.push(response.meta.cursor); + }, complete: function() { + callback(updates); + } + }); + } + }, + { + description: "Chunk is called for each chunk", + expected: [3, 3, 2], + run: function(callback) { + var updates = []; + Papa.parse("/tests/long-sample.csv", { + download: true, + chunkSize: 500, + chunk: function(response) { + updates.push(response.data.length); + }, complete: function() { + callback(updates); + } + }); + } + }, + { + description: "Chunk is called with cursor position", + expected: [451, 864, 1209], + run: function(callback) { + var updates = []; + Papa.parse("/tests/long-sample.csv", { + download: true, + chunkSize: 500, + chunk: function(response) { + updates.push(response.meta.cursor); + }, complete: function() { + callback(updates); + } + }); + } + }, +]; diff --git a/tests/test-runner.js b/tests/test-runner.js index 14265d7..cd3c264 100644 --- a/tests/test-runner.js +++ b/tests/test-runner.js @@ -48,6 +48,7 @@ $(function() runCoreParserTests(); runParseTests(asyncDone); runUnparseTests(); + runCustomTests(asyncDone); }); @@ -303,6 +304,72 @@ function runUnparseTests() +// Executes all tests in CUSTOM_TESTS from test-cases.js +// and renders results in the table. +function runCustomTests(asyncDone) +{ + var asyncRemaining = CUSTOM_TESTS.length; + for (var i = 0; i < CUSTOM_TESTS.length; i++) + { + runTest(CUSTOM_TESTS[i]); + } + + function runTest(test) + { + try + { + test.run(function(actual) { + displayResults(test, actual); + }); + } + catch (e) + { + displayResults(test, e); + } + } + + function displayResults(test, actual) + { + var testId = testCount++; + var results = compare(actual, test.expected); + + var testDescription = (test.description || ""); + if (testDescription.length > 0) + testDescription += '
'; + if (test.notes) + testDescription += '' + test.notes + ''; + + var tr = '' + + '+' + + '' + testDescription + '' + + passOrFailTd(results) + + '
condensed
' + + '
condensed
' + + ''; + + $('#custom-tests .results').append(tr); + + if (!results.passed) + $('#test-' + testId + ' td.rvl').click(); + + if (results.passed) { + passCount++; + } else { + failCount++; + } + if (--asyncRemaining === 0) { + asyncDone(); + } + } + + + function compare(actual, expected) + { + return { + passed: JSON.stringify(actual) === JSON.stringify(expected) + }; + } +} @@ -341,4 +408,4 @@ function revealChars(txt) txt = txt.replace(/">\u001f<\/span>/ig, '">\\u001f'); return txt; -} \ No newline at end of file +} diff --git a/tests/tests.html b/tests/tests.html index a5b7f2f..60d2342 100644 --- a/tests/tests.html +++ b/tests/tests.html @@ -86,5 +86,28 @@ + + +
+

Miscellaneous Tests

+ + Expand all + · + Collapse all +
+ + + + + + + + +
Test CaseDataExpectedActual
+
+ + + + - \ No newline at end of file +