diff --git a/papaparse.js b/papaparse.js
index a04021d..b3f7bd7 100644
--- a/papaparse.js
+++ b/papaparse.js
@@ -522,34 +522,16 @@
finishedWithEntireFile = (!config.step && !config.chunk) || start > getFileSize(xhr);
- var lastLineEnd;
+ var results = handle.parse(aggregate, baseIndex, !finishedWithEntireFile);
+ var lastIndex = results.meta.cursor;
if (!finishedWithEntireFile)
{
- lastLineEnd = aggregate.lastIndexOf("\r");
-
- if (lastLineEnd == -1)
- lastLineEnd = aggregate.lastIndexOf("\n");
-
- if (lastLineEnd != -1)
- {
- partialLine = aggregate.substring(lastLineEnd + 1); // skip the line ending character
- aggregate = aggregate.substring(0, lastLineEnd);
- }
- else
- {
- // For chunk sizes smaller than a line (a line could not fit in a single chunk)
- // we simply build our aggregate by reading in the next chunk, until we find a newline
- nextChunk();
- return;
- }
+ partialLine = aggregate.substring(lastIndex - baseIndex);
+ baseIndex = lastIndex;
}
-
- var results = handle.parse(aggregate, baseIndex);
- aggregate = "";
- if (!finishedWithEntireFile)
- baseIndex += lastLineEnd + 1;
if (results && results.data)
rowCount += results.data.length;
+ aggregate = "";
var finishedIncludingPreview = finishedWithEntireFile || (configCopy.preview && rowCount >= configCopy.preview);
@@ -705,34 +687,16 @@
finishedWithEntireFile = start >= file.size;
- var lastLineEnd;
+ var results = handle.parse(aggregate, baseIndex, !finishedWithEntireFile);
+ var lastIndex = results.meta.cursor;
if (!finishedWithEntireFile)
{
- lastLineEnd = aggregate.lastIndexOf("\r"); // TODO: Use an auto-detected line ending?
-
- if (lastLineEnd == -1)
- lastLineEnd = aggregate.lastIndexOf("\n");
-
- if (lastLineEnd != -1)
- {
- partialLine = aggregate.substring(lastLineEnd + 1); // skip the line ending character (TODO: Not always length 1? \r\n...)
- aggregate = aggregate.substring(0, lastLineEnd);
- }
- else
- {
- // For chunk sizes smaller than a line (a line could not fit in a single chunk)
- // we simply build our aggregate by reading in the next chunk, until we find a newline
- nextChunk();
- return;
- }
+ partialLine = aggregate.substring(lastIndex - baseIndex);
+ baseIndex = lastIndex;
}
-
- var results = handle.parse(aggregate, baseIndex);
- aggregate = "";
- if (!finishedWithEntireFile)
- baseIndex += lastLineEnd + 1;
if (results && results.data)
rowCount += results.data.length;
+ aggregate = "";
var finishedIncludingPreview = finishedWithEntireFile || (configCopy.preview && rowCount >= configCopy.preview);
@@ -839,7 +803,7 @@
};
}
- this.parse = function(input, baseIndex)
+ this.parse = function(input, baseIndex, ignoreLastRow)
{
if (!_config.newline)
_config.newline = guessLineEndings(input);
@@ -864,7 +828,7 @@
_input = input;
_parser = new Parser(parserConfig);
- _results = _parser.parse(_input, baseIndex);
+ _results = _parser.parse(_input, baseIndex, ignoreLastRow);
processResults();
if (isFunction(_config.complete) && !_paused && (!self.streamer || self.streamer.finished()))
_config.complete(_results);
@@ -1112,7 +1076,7 @@
var cursor = 0;
var aborted = false;
- this.parse = function(input, baseIndex)
+ this.parse = function(input, baseIndex, ignoreLastRow)
{
// For some reason, in Chrome, this speeds things up (!?)
if (typeof input !== 'string')
@@ -1182,25 +1146,24 @@
if (quoteSearch === -1)
{
- // No closing quote... what a pity
- errors.push({
- type: "Quotes",
- code: "MissingQuotes",
- message: "Quoted field unterminated",
- row: data.length, // row has yet to be inserted
- index: cursor
- });
+ if (!ignoreLastRow) {
+ // No closing quote... what a pity
+ errors.push({
+ type: "Quotes",
+ code: "MissingQuotes",
+ message: "Quoted field unterminated",
+ row: data.length, // row has yet to be inserted
+ index: cursor
+ });
+ }
return finish();
}
if (quoteSearch === inputLen-1)
{
// Closing quote at EOF
- row.push(input.substring(cursor, quoteSearch).replace(/""/g, '"'));
- pushRow(row);
- if (stepIsFunction)
- doStep();
- return returnable();
+ var value = input.substring(cursor, quoteSearch).replace(/""/g, '"');
+ return finish(value);
}
// If this quote is escaped, it's part of the data; skip it
@@ -1298,9 +1261,13 @@
// Appends the remaining input from cursor to the end into
// row, saves the row, calls step, and returns the results.
- function finish()
+ function finish(value)
{
- row.push(input.substr(cursor));
+ if (ignoreLastRow)
+ return returnable();
+ if (!value)
+ value = input.substr(cursor);
+ row.push(value);
cursor = inputLen; // important in case parsing is paused
pushRow(row);
if (stepIsFunction)
diff --git a/tests/test-cases.js b/tests/test-cases.js
index caf4d27..8497dc9 100644
--- a/tests/test-cases.js
+++ b/tests/test-cases.js
@@ -1,5 +1,10 @@
var RECORD_SEP = String.fromCharCode(30);
var UNIT_SEP = String.fromCharCode(31);
+var FILES_ENABLED = false;
+try {
+ new File([""], "");
+ FILES_ENABLED = true;
+} catch (e) {} // safari, ie
// Tests for the core parser using new Papa.Parser().parse() (CSV to JSON)
var CORE_PARSER_TESTS = [
@@ -845,7 +850,28 @@ var PARSE_ASYNC_TESTS = [
data: [['A','B','C'],['X','Y','Z']],
errors: []
}
- }
+ },
+ {
+ description: "Simple file",
+ input: FILES_ENABLED ? new File(["A,B,C\nX,Y,Z"], "sample.csv") : false,
+ config: {
+ },
+ expected: {
+ data: [['A','B','C'],['X','Y','Z']],
+ errors: []
+ }
+ },
+ {
+ description: "Simple file + worker",
+ input: FILES_ENABLED ? new File(["A,B,C\nX,Y,Z"], "sample.csv") : false,
+ config: {
+ worker: true,
+ },
+ expected: {
+ data: [['A','B','C'],['X','Y','Z']],
+ errors: []
+ }
+ }
];
@@ -1045,7 +1071,7 @@ var CUSTOM_TESTS = [
}
},
{
- description: "Step exposes cursor for files",
+ description: "Step exposes cursor for downloads",
expected: [129, 287, 452, 595, 727, 865, 1031, 1209],
run: function(callback) {
var updates = [];
@@ -1060,9 +1086,8 @@ var CUSTOM_TESTS = [
}
},
{
- description: "Step exposes cursor for chunked files",
- // Tiny inconsistency: the last full row in each chunk will not see a newline.
- expected: [129, 287, 451, 595, 727, 864, 1031, 1209],
+ description: "Step exposes cursor for chunked downloads",
+ expected: [129, 287, 452, 595, 727, 865, 1031, 1209],
run: function(callback) {
var updates = [];
Papa.parse("/tests/long-sample.csv", {
@@ -1078,8 +1103,7 @@ var CUSTOM_TESTS = [
},
{
description: "Step exposes cursor for workers",
- // You're only really getting chunk cursors here.
- expected: [451, 451, 451, 864, 864, 864, 1209, 1209],
+ expected: [452, 452, 452, 865, 865, 865, 1209, 1209],
run: function(callback) {
var updates = [];
Papa.parse("/tests/long-sample.csv", {
@@ -1112,7 +1136,7 @@ var CUSTOM_TESTS = [
},
{
description: "Chunk is called with cursor position",
- expected: [451, 864, 1209],
+ expected: [452, 865, 1209],
run: function(callback) {
var updates = [];
Papa.parse("/tests/long-sample.csv", {
@@ -1126,4 +1150,52 @@ var CUSTOM_TESTS = [
});
}
},
+ {
+ description: "Step exposes indexes for files",
+ expected: [6, 12, 17],
+ disabled: !FILES_ENABLED,
+ run: function(callback) {
+ var updates = [];
+ Papa.parse(new File(['A,b,c\nd,E,f\nG,h,i'], 'sample.csv'), {
+ download: true,
+ step: function(response) {
+ updates.push(response.indexes[0]);
+ }, complete: function() {
+ callback(updates);
+ }
+ });
+ }
+ },
+ {
+ description: "Step exposes indexes for chunked files",
+ expected: [6, 12, 17],
+ disabled: !FILES_ENABLED,
+ run: function(callback) {
+ var updates = [];
+ Papa.parse(new File(['A,b,c\nd,E,f\nG,h,i'], 'sample.csv'), {
+ chunkSize: 3,
+ step: function(response) {
+ updates.push(response.indexes[0]);
+ }, complete: function() {
+ callback(updates);
+ }
+ });
+ }
+ },
+ {
+ description: "Quoted line breaks near chunk boundaries are handled",
+ expected: [['A', 'B', 'C'], ['X', 'Y\n1\n2\n3', 'Z']],
+ disabled: !FILES_ENABLED,
+ run: function(callback) {
+ var updates = [];
+ Papa.parse(new File(['A,B,C\nX,"Y\n1\n2\n3",Z'], 'sample.csv'), {
+ chunkSize: 3,
+ step: function(response) {
+ updates.push(response.data[0]);
+ }, complete: function() {
+ callback(updates);
+ }
+ });
+ }
+ }
];
diff --git a/tests/test-runner.js b/tests/test-runner.js
index cd3c264..ec8f2ce 100644
--- a/tests/test-runner.js
+++ b/tests/test-runner.js
@@ -100,7 +100,7 @@ function runParseTests(asyncDone)
{
var results = compare(actual.data, actual.errors, test.expected);
- displayResults("tests-for-parse", test, actual, results);
+ displayResults("#tests-for-parse", test, actual, results);
if (results.data.passed && results.errors.passed) {
passCount++;
@@ -115,7 +115,7 @@ function runParseTests(asyncDone)
config.error = function(err)
{
failCount++;
- displayResults(test, {data:[],errors:err}, test.expected);
+ displayResults("#tests-for-parse", test, {data:[],errors:err}, test.expected);
if (--asyncRemaining === 0) {
asyncDone();
}
@@ -308,7 +308,7 @@ function runUnparseTests()
// and renders results in the table.
function runCustomTests(asyncDone)
{
- var asyncRemaining = CUSTOM_TESTS.length;
+ var asyncRemaining = 0;
for (var i = 0; i < CUSTOM_TESTS.length; i++)
{
runTest(CUSTOM_TESTS[i]);
@@ -316,6 +316,9 @@ function runCustomTests(asyncDone)
function runTest(test)
{
+ if (test.disabled)
+ return;
+ asyncRemaining++;
try
{
test.run(function(actual) {
@@ -391,6 +394,9 @@ function passOrFailTd(result)
// Reveals some hidden, whitespace, or invisible characters
function revealChars(txt)
{
+ if (typeof txt != 'string')
+ return '(file)';
+
// Make spaces and tabs more obvious when glancing
txt = txt.replace(/( |\t)/ig, '$1');
txt = txt.replace(/(\r\n|\n\r|\r|\n)/ig, '$1$1');