Browse Source

Merge 162d442b50 into 7fc65f3164

pull/730/merge
jseter 4 years ago committed by GitHub
parent
commit
2f47add561
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 58
      papaparse.js
  2. 47
      tests/test-cases.js

58
papaparse.js

@ -1398,6 +1398,8 @@ License: MIT
var step = config.step; var step = config.step;
var preview = config.preview; var preview = config.preview;
var fastMode = config.fastMode; var fastMode = config.fastMode;
var _strictQuote = config.strictQuote;
var strictQuote = _strictQuote === undefined ? false : Boolean(_strictQuote);
var quoteChar; var quoteChar;
/** Allows for no quoteChar by setting quoteChar to undefined in config */ /** Allows for no quoteChar by setting quoteChar to undefined in config */
if (config.quoteChar === undefined) { if (config.quoteChar === undefined) {
@ -1489,6 +1491,9 @@ License: MIT
var nextNewline = input.indexOf(newline, cursor); var nextNewline = input.indexOf(newline, cursor);
var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g'); var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
var quoteSearch = input.indexOf(quoteChar, cursor); var quoteSearch = input.indexOf(quoteChar, cursor);
var savedNextDelim;
var savedNextNewline;
var savedQuoteSearch;
// Parser loop // Parser loop
for (;;) for (;;)
@ -1496,12 +1501,12 @@ License: MIT
// Field has opening quote // Field has opening quote
if (input[cursor] === quoteChar) if (input[cursor] === quoteChar)
{ {
var quoteFallThrough = false;
quoteSaveState();
// Start our search for the closing quote where the cursor is // Start our search for the closing quote where the cursor is
quoteSearch = cursor; quoteSearch = cursor;
// Skip the opening quote
cursor++;
for (;;) for (;;)
{ {
// Find closing quote // Find closing quote
@ -1510,8 +1515,10 @@ License: MIT
//No other quotes are found - no other delimiters //No other quotes are found - no other delimiters
if (quoteSearch === -1) if (quoteSearch === -1)
{ {
if (!ignoreLastRow) { if(ignoreLastRow)
// No closing quote... what a pity return returnable();
if(strictQuote) {
errors.push({ errors.push({
type: 'Quotes', type: 'Quotes',
code: 'MissingQuotes', code: 'MissingQuotes',
@ -1519,14 +1526,25 @@ License: MIT
row: data.length, // row has yet to be inserted row: data.length, // row has yet to be inserted
index: cursor index: cursor
}); });
quoteRestoreState();
quoteFallThrough = true;
break; // fall through to parse as non-quote.
} }
return finish(); // No closing quote... what a pity
errors.push({
type: 'Quotes',
code: 'MissingQuotes',
message: 'Quoted field unterminated',
row: data.length, // row has yet to be inserted
index: cursor
});
return finish(input.substring(cursor + 1));
} }
// Closing quote at EOF // Closing quote at EOF
if (quoteSearch === inputLen - 1) if (quoteSearch === inputLen - 1)
{ {
var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar); var value = input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar);
return finish(value); return finish(value);
} }
@ -1557,7 +1575,7 @@ License: MIT
// Closing quote followed by delimiter or 'unnecessary spaces + delimiter' // Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim)
{ {
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); row.push(input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar));
cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen; cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
// If char after following delimiter is not quoteChar, we find next quote char position // If char after following delimiter is not quoteChar, we find next quote char position
@ -1575,7 +1593,7 @@ License: MIT
// Closing quote followed by newline or 'unnecessary spaces + newLine' // Closing quote followed by newline or 'unnecessary spaces + newLine'
if (input.substring(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen) === newline) if (input.substring(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen) === newline)
{ {
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); row.push(input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar));
saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen); saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line
@ -1603,12 +1621,18 @@ License: MIT
index: cursor index: cursor
}); });
if(strictQuote) {
quoteRestoreState();
quoteFallThrough = true;
break; // fall through to parse as non-quote.
}
quoteSearch++; quoteSearch++;
continue; continue;
} }
if(!quoteFallThrough) {
continue; continue;
}
} }
// Comment found at start of new line // Comment found at start of new line
@ -1784,6 +1808,18 @@ License: MIT
return result; return result;
} }
function quoteSaveState() {
savedNextDelim = nextDelim;
savedNextNewline = nextNewline;
savedQuoteSearch = quoteSearch;
}
function quoteRestoreState() {
quoteSearch = savedQuoteSearch;
nextNewline = savedNextNewline;
nextDelim = savedNextDelim;
}
}; };
/** Sets the abort flag */ /** Sets the abort flag */

47
tests/test-cases.js

@ -194,7 +194,7 @@ var CORE_PARSER_TESTS = [
"code": "MissingQuotes", "code": "MissingQuotes",
"message": "Quoted field unterminated", "message": "Quoted field unterminated",
"row": 0, "row": 0,
"index": 3 "index": 2
}] }]
} }
}, },
@ -209,7 +209,7 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes", "code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed", "message": "Trailing quote on quoted field is malformed",
"row": 0, "row": 0,
"index": 1 "index": 0
}] }]
} }
}, },
@ -224,14 +224,14 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes", "code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed", "message": "Trailing quote on quoted field is malformed",
"row": 0, "row": 0,
"index": 3 "index": 2
}, },
{ {
"type": "Quotes", "type": "Quotes",
"code": "MissingQuotes", "code": "MissingQuotes",
"message": "Quoted field unterminated", "message": "Quoted field unterminated",
"row": 0, "row": 0,
"index": 3 "index": 2
}] }]
} }
}, },
@ -246,14 +246,14 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes", "code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed", "message": "Trailing quote on quoted field is malformed",
"row": 0, "row": 0,
"index": 3 "index": 2
}, },
{ {
"type": "Quotes", "type": "Quotes",
"code": "MissingQuotes", "code": "MissingQuotes",
"message": "Quoted field unterminated", "message": "Quoted field unterminated",
"row": 0, "row": 0,
"index": 3 "index": 2
}] }]
} }
}, },
@ -268,14 +268,14 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes", "code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed", "message": "Trailing quote on quoted field is malformed",
"row": 0, "row": 0,
"index": 3 "index": 2
}, },
{ {
"type": "Quotes", "type": "Quotes",
"code": "MissingQuotes", "code": "MissingQuotes",
"message": "Quoted field unterminated", "message": "Quoted field unterminated",
"row": 0, "row": 0,
"index": 3 "index": 2
}] }]
} }
}, },
@ -585,6 +585,37 @@ var CORE_PARSER_TESTS = [
data: [['a', 'b', 'c'], ['']], data: [['a', 'b', 'c'], ['']],
errors: [] errors: []
} }
},
{
description: "Quoted field has invalid trailing quote after delimiter with a valid closer in strict quote mode",
input: '"a,"b,c"\nd,e,f',
notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid",
config: { strictQuote: true },
expected: {
data: [['"a','b,c'], ['d', 'e', 'f']],
errors: [{
"type": "Quotes",
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 0
}]
}
},
{
description: "Quoted field has no closing quote in strict quote mode",
input: 'a,"b,c\nd,e,f',
config: { strictQuote: true },
expected: {
data: [['a','"b','c'],['d','e','f']],
errors: [{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 2
}]
}
} }
]; ];

Loading…
Cancel
Save