Browse Source

gh-26 Fix substring being called with invalid indexes. Added strict quote mode on malformed quotes to treat malformed quote as unquoted field.

pull/719/head
Jamie Seter 6 years ago
parent
commit
846b024d72
  1. 79
      papaparse.js
  2. 131
      tests/test-cases.js

79
papaparse.js

@ -699,7 +699,7 @@ License: MIT @@ -699,7 +699,7 @@ License: MIT
if (contentRange === null) { // no content range, then finish!
return -1;
}
return parseInt(contentRange.substr(contentRange.lastIndexOf('/') + 1));
return parseInt(contentRange.substring(contentRange.lastIndexOf('/') + 1));
}
}
NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype);
@ -786,10 +786,17 @@ License: MIT @@ -786,10 +786,17 @@ License: MIT
};
this._nextChunk = function()
{
var chunk;
if (this._finished) return;
var size = this._config.chunkSize;
var chunk = size ? remaining.substr(0, size) : remaining;
remaining = size ? remaining.substr(size) : '';
if(size) {
var limit = Math.min(remaining.length, Math.max(0, size));
chunk = remaining.substring(0, limit);
remaining = remaining.substring(limit);
} else {
chunk = remaining;
remaining = '';
}
this._finished = !remaining;
return this.parseChunk(chunk);
};
@ -1092,7 +1099,7 @@ License: MIT @@ -1092,7 +1099,7 @@ License: MIT
{
_paused = true;
_parser.abort();
_input = _input.substr(_parser.getCharIndex());
_input = _input.substring(_parser.getCharIndex());
};
this.resume = function()
@ -1330,7 +1337,7 @@ License: MIT @@ -1330,7 +1337,7 @@ License: MIT
function guessLineEndings(input, quoteChar)
{
input = input.substr(0, 1024 * 1024); // max length 1 MB
input = input.substring(0, 1024 * 1024); // max length 1 MB
// Replace all the text inside quotes
var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
input = input.replace(re, '');
@ -1382,6 +1389,8 @@ License: MIT @@ -1382,6 +1389,8 @@ License: MIT
var step = config.step;
var preview = config.preview;
var fastMode = config.fastMode;
var temp1;
var strictQuote = (temp1 = config.strictQuote) !== undefined ? temp1 : false;
var quoteChar;
/** Allows for no quoteChar by setting quoteChar to undefined in config */
if (config.quoteChar === undefined) {
@ -1448,7 +1457,7 @@ License: MIT @@ -1448,7 +1457,7 @@ License: MIT
cursor += newline.length;
else if (ignoreLastRow)
return returnable();
if (comments && row.substr(0, commentsLen) === comments)
if (comments && row.substring(0, commentsLen) === comments)
continue;
if (stepIsFunction)
{
@ -1473,6 +1482,10 @@ License: MIT @@ -1473,6 +1482,10 @@ License: MIT
var nextNewline = input.indexOf(newline, cursor);
var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
var quoteSearch = input.indexOf(quoteChar, cursor);
var quoteError = false;
var savedNextDelim;
var savedNextNewline;
var savedQuoteSearch;
// Parser loop
for (;;)
@ -1480,12 +1493,10 @@ License: MIT @@ -1480,12 +1493,10 @@ License: MIT
// Field has opening quote
if (input[cursor] === quoteChar)
{
saveQuoteState();
// Start our search for the closing quote where the cursor is
quoteSearch = cursor;
// Skip the opening quote
cursor++;
for (;;)
{
// Find closing quote
@ -1494,6 +1505,10 @@ License: MIT @@ -1494,6 +1505,10 @@ License: MIT
//No other quotes are found - no other delimiters
if (quoteSearch === -1)
{
if(strictQuote) {
backtrackQuote();
break;
}
if (!ignoreLastRow) {
// No closing quote... what a pity
errors.push({
@ -1504,13 +1519,14 @@ License: MIT @@ -1504,13 +1519,14 @@ License: MIT
index: cursor
});
}
cursor++;
return finish();
}
// Closing quote at EOF
if (quoteSearch === inputLen - 1)
{
var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
var value = input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar);
return finish(value);
}
@ -1528,6 +1544,14 @@ License: MIT @@ -1528,6 +1544,14 @@ License: MIT
continue;
}
// update indexes if they are less than current cursor
if(nextNewline !== -1 && (quoteSearch + 1) > nextNewline) {
nextNewline = input.indexOf(newline, quoteSearch + 1);
}
if(nextDelim !== -1 && (quoteSearch + 1) > nextDelim) {
nextDelim = input.indexOf(delim, quoteSearch + 1);
}
// Check up to nextDelim or nextNewline, whichever is closest
var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
@ -1535,7 +1559,7 @@ License: MIT @@ -1535,7 +1559,7 @@ License: MIT
// Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim)
{
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
row.push(input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar));
cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
// If char after following delimiter is not quoteChar, we find next quote char position
@ -1551,9 +1575,9 @@ License: MIT @@ -1551,9 +1575,9 @@ License: MIT
var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
// Closing quote followed by newline or 'unnecessary spaces + newLine'
if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline)
if (input.substring(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen) === newline)
{
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
row.push(input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar));
saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line
@ -1581,16 +1605,23 @@ License: MIT @@ -1581,16 +1605,23 @@ License: MIT
index: cursor
});
if(strictQuote) {
backtrackQuote();
break;
}
quoteSearch++;
continue;
}
if(!quoteError) {
continue;
}
}
// Comment found at start of new line
if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments)
if (comments && row.length === 0 && input.substring(cursor, cursor + commentsLen) === comments)
{
if (nextNewline === -1) // Comment ends at EOF
return returnable();
@ -1606,7 +1637,7 @@ License: MIT @@ -1606,7 +1637,7 @@ License: MIT
// we check, if we have quotes, because delimiter char may be part of field enclosed in quotes
if (quoteSearch > nextDelim) {
// we have quotes, so we try to find the next delimiter not enclosed in quotes and also next starting quote char
var nextDelimObj = getNextUnqotedDelimiter(nextDelim, quoteSearch, nextNewline);
var nextDelimObj = getNextUnquotedDelimiter(nextDelim, quoteSearch, nextNewline);
// if we have next delimiter char which is not enclosed in quotes
if (nextDelimObj && typeof nextDelimObj.nextDelim !== 'undefined') {
@ -1651,6 +1682,18 @@ License: MIT @@ -1651,6 +1682,18 @@ License: MIT
return finish();
function saveQuoteState() {
savedNextDelim = nextDelim;
savedNextNewline = nextNewline;
savedQuoteSearch = quoteSearch;
quoteError = false;
}
function backtrackQuote() {
quoteSearch = savedQuoteSearch;
nextNewline = savedNextNewline;
nextDelim = savedNextDelim;
quoteError = true;
}
function pushRow(row)
{
@ -1682,7 +1725,7 @@ License: MIT @@ -1682,7 +1725,7 @@ License: MIT
if (ignoreLastRow)
return returnable();
if (typeof value === 'undefined')
value = input.substr(cursor);
value = input.substring(cursor);
row.push(value);
cursor = inputLen; // important in case parsing is paused
pushRow(row);
@ -1731,7 +1774,7 @@ License: MIT @@ -1731,7 +1774,7 @@ License: MIT
}
/** Gets the delimiter character, which is not inside the quoted field */
function getNextUnqotedDelimiter(nextDelim, quoteSearch, newLine) {
function getNextUnquotedDelimiter(nextDelim, quoteSearch, newLine) {
var result = {
nextDelim: undefined,
quoteSearch: undefined
@ -1753,7 +1796,7 @@ License: MIT @@ -1753,7 +1796,7 @@ License: MIT
nextQuoteSearch = input.indexOf(quoteChar, nextQuoteSearch + 1);
}
// try to get the next delimiter position
result = getNextUnqotedDelimiter(nextNextDelim, nextQuoteSearch, newLine);
result = getNextUnquotedDelimiter(nextNextDelim, nextQuoteSearch, newLine);
} else {
result = {
nextDelim: nextDelim,

131
tests/test-cases.js

@ -22,6 +22,61 @@ try { @@ -22,6 +22,61 @@ try {
XHR_ENABLED = true;
} catch (e) {} // safari, ie
var __substring = String.prototype.substring;
var __substr = String.prototype.substr;
function checkedSubstr(_indexStart, _length) {
throw new TypeError("Avoid depreciated method");
}
function checkedSubstring(indexStart, indexEnd) {
var length = this.length;
if(isNaN(indexStart) || typeof indexStart !== "number") {
throw new TypeError("indexStart is not a number. ");
}
if(indexStart < 0) {
// Block this because it is likely a bug, but substring clamps it as max(0, indexStart)
throw new TypeError("indexStart is less than zero. " + String(indexStart));
}
if(indexStart > length) {
// Block this because it is likely a bug, but substring clamps it as min(indexStart, length)
throw new TypeError("indexStart is greater than string length. " + String(indexStart) + " " + String(length));
}
if(indexEnd !== undefined) {
if(isNaN(indexEnd) || typeof indexEnd !== "number") {
throw new TypeError("indexEnd is not a number. ");
}
if(indexEnd < 0) {
// Block this because it is likely a bug, but substring treats it as max(0, indexEnd)
throw new TypeError("indexEnd is less than zero. " + String(indexEnd));
}
if(indexStart > indexEnd) {
// Block this because it is likely a bug, but substring flips its arguments to compensate.
throw new TypeError("indexStart is greater than indexEnd. " + String(indexStart) + " " + String(indexEnd));
}
// Allow indexEnd to be greater than limit, which will functionally truncate to limit.
}
return __substring.call(this, indexStart, indexEnd);
}
function updateStringSubstring() {
String.prototype.substring = checkedSubstring; // eslint-disable-line no-extend-native
}
function restoreStringSubstring() {
String.prototype.substring = __substring; // eslint-disable-line no-extend-native
}
function updateStringSubstr() {
String.prototype.substr = checkedSubstr; // eslint-disable-line no-extend-native
}
function restoreStringSubstr() {
String.prototype.substr = __substr; // eslint-disable-line no-extend-native
}
// Tests for the core parser using new Papa.Parser().parse() (CSV to JSON)
var CORE_PARSER_TESTS = [
{
@ -194,7 +249,7 @@ var CORE_PARSER_TESTS = [ @@ -194,7 +249,7 @@ var CORE_PARSER_TESTS = [
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 3
"index": 2
}]
}
},
@ -209,7 +264,23 @@ var CORE_PARSER_TESTS = [ @@ -209,7 +264,23 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 1
"index": 0
}]
}
},
{
description: "Quoted field has invalid trailing quote after delimiter with a valid closer",
input: '"a,"b,c"\nd,e,f',
notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid",
config: { strictQuote: true },
expected: {
data: [['"a','b,c'], ['d', 'e', 'f']],
errors: [{
"type": "Quotes",
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 0
}]
}
},
@ -224,14 +295,14 @@ var CORE_PARSER_TESTS = [ @@ -224,14 +295,14 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 3
"index": 2
},
{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 3
"index": 2
}]
}
},
@ -246,14 +317,14 @@ var CORE_PARSER_TESTS = [ @@ -246,14 +317,14 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 3
"index": 2
},
{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 3
"index": 2
}]
}
},
@ -268,14 +339,14 @@ var CORE_PARSER_TESTS = [ @@ -268,14 +339,14 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 3
"index": 2
},
{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 3
"index": 2
}]
}
},
@ -288,6 +359,15 @@ var CORE_PARSER_TESTS = [ @@ -288,6 +359,15 @@ var CORE_PARSER_TESTS = [
errors: []
}
},
{
description: "Quoted field has valid trailing quote via delimiter and contains delimiter",
input: 'a,"b,",c\nd,e,f',
notes: "Trailing quote is valid due to trailing delimiter",
expected: {
data: [['a', 'b,', 'c'], ['d', 'e', 'f']],
errors: []
}
},
{
description: "Quoted field has valid trailing quote via \\n",
input: 'a,b,"c"\nd,e,f',
@ -297,6 +377,15 @@ var CORE_PARSER_TESTS = [ @@ -297,6 +377,15 @@ var CORE_PARSER_TESTS = [
errors: []
}
},
{
description: "Quoted field has valid trailing quote via \\n and contains delimiter and newline",
input: 'a,b,"c,\n"\nd,e,f',
notes: "Trailing quote is valid due to trailing new line delimiter",
expected: {
data: [['a', 'b', 'c,\n'], ['d', 'e', 'f']],
errors: []
}
},
{
description: "Quoted field has valid trailing quote via EOF",
input: 'a,b,c\nd,e,"f"',
@ -591,8 +680,16 @@ var CORE_PARSER_TESTS = [ @@ -591,8 +680,16 @@ var CORE_PARSER_TESTS = [
describe('Core Parser Tests', function() {
function generateTest(test) {
(test.disabled ? it.skip : it)(test.description, function() {
var actual = new Papa.Parser(test.config).parse(test.input);
assert.deepEqual(JSON.stringify(actual.errors), JSON.stringify(test.expected.errors));
var actual;
updateStringSubstr();
updateStringSubstring();
try {
actual = new Papa.Parser(test.config).parse(test.input);
} finally {
restoreStringSubstring();
restoreStringSubstr();
}
assert.deepEqual(JSON.parse(JSON.stringify(actual.errors)), test.expected.errors);
assert.deepEqual(actual.data, test.expected.data);
});
}
@ -654,6 +751,14 @@ var PARSE_TESTS = [ @@ -654,6 +751,14 @@ var PARSE_TESTS = [
errors: []
}
},
{
description: "Quoted fields with spaces between closing quote and next delimiter and contains delimiter",
input: 'A,",B" ,C,D\r\nE,F,",G" ,H',
expected: {
data: [['A', ',B', 'C','D'],['E', 'F', ',G','H']],
errors: []
}
},
{
description: "Quoted fields with spaces between closing quote and next new line",
input: 'A,B,C,"D" \r\nE,F,G,"H" \r\nQ,W,E,R',
@ -1475,7 +1580,7 @@ describe('Parse Tests', function() { @@ -1475,7 +1580,7 @@ describe('Parse Tests', function() {
if (test.expected.meta) {
assert.deepEqual(actual.meta, test.expected.meta);
}
assert.deepEqual(JSON.stringify(actual.errors), JSON.stringify(test.expected.errors));
assert.deepEqual(JSON.parse(JSON.stringify(actual.errors)), test.expected.errors);
assert.deepEqual(actual.data, test.expected.data);
});
}
@ -1556,7 +1661,7 @@ describe('Parse Async Tests', function() { @@ -1556,7 +1661,7 @@ describe('Parse Async Tests', function() {
var config = test.config;
config.complete = function(actual) {
assert.deepEqual(JSON.stringify(actual.errors), JSON.stringify(test.expected.errors));
assert.deepEqual(JSON.parse(JSON.stringify(actual.errors)), test.expected.errors);
assert.deepEqual(actual.data, test.expected.data);
done();
};
@ -2368,7 +2473,7 @@ describe('Custom Tests', function() { @@ -2368,7 +2473,7 @@ describe('Custom Tests', function() {
this.timeout(test.timeout);
}
test.run(function(actual) {
assert.deepEqual(JSON.stringify(actual), JSON.stringify(test.expected));
assert.deepEqual(JSON.parse(JSON.stringify(actual)), test.expected);
done();
});
});

Loading…
Cancel
Save