Browse Source

Expanded error statements for malformed quoted text (#409)

* Emit error statements for malformed quoted text

Update parser to emit errors when strings do not adhere to section 2.2
and 2.3 of RFC 4180. Test cases are added to further support.
pull/411/head
Nic Colgrove 8 years ago committed by Sergi Almacellas Abellana
parent
commit
b5db7f08cc
  1. 21
      papaparse.js
  2. 117
      tests/test-cases.js

21
papaparse.js

@ -1250,6 +1250,7 @@ @@ -1250,6 +1250,7 @@
// Find closing quote
var quoteSearch = input.indexOf(quoteChar, quoteSearch+1);
//No other quotes are found - no other delimiters
if (quoteSearch === -1)
{
if (!ignoreLastRow) {
@ -1265,9 +1266,9 @@ @@ -1265,9 +1266,9 @@
return finish();
}
// Closing quote at EOF
if (quoteSearch === inputLen-1)
{
// Closing quote at EOF
var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
return finish(value);
}
@ -1279,9 +1280,9 @@ @@ -1279,9 +1280,9 @@
continue;
}
// Closing quote followed by delimiter
if (input[quoteSearch+1] === delim)
{
// Closing quote followed by delimiter
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
cursor = quoteSearch + 1 + delimLen;
nextDelim = input.indexOf(delim, cursor);
@ -1289,9 +1290,9 @@ @@ -1289,9 +1290,9 @@
break;
}
// Closing quote followed by newline
if (input.substr(quoteSearch+1, newlineLen) === newline)
{
// Closing quote followed by newline
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
saveRow(quoteSearch + 1 + newlineLen);
nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
@ -1308,6 +1309,20 @@ @@ -1308,6 +1309,20 @@
break;
}
// Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
errors.push({
type: 'Quotes',
code: 'InvalidQuotes',
message: 'Trailing quote on quoted field is malformed',
row: data.length, // row has yet to be inserted
index: cursor
});
quoteSearch++;
continue;
}
continue;

117
tests/test-cases.js

@ -195,6 +195,123 @@ var CORE_PARSER_TESTS = [ @@ -195,6 +195,123 @@ var CORE_PARSER_TESTS = [
}]
}
},
{
description: "Quoted field has invalid trailing quote after delimiter with a valid closer",
input: '"a,"b,c"\nd,e,f',
notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid",
expected: {
data: [['a,"b,c'], ['d', 'e', 'f']],
errors: [{
"type": "Quotes",
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index":1
}]
}
},
{
description: "Quoted field has invalid trailing quote after delimiter",
input: 'a,"b,"c\nd,e,f',
notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid",
expected: {
data: [['a', 'b,"c\nd,e,f']],
errors: [{
"type": "Quotes",
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 3
},
{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 3
}]
}
},
{
description: "Quoted field has invalid trailing quote before delimiter",
input: 'a,"b"c,d\ne,f,g',
notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid",
expected: {
data: [['a', 'b"c,d\ne,f,g']],
errors: [{
"type": "Quotes",
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 3
},
{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 3
}]
}
},
{
description: "Quoted field has invalid trailing quote after new line",
input: 'a,"b,c\nd"e,f,g',
notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid",
expected: {
data: [['a', 'b,c\nd"e,f,g']],
errors: [{
"type": "Quotes",
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 3
},
{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 3
}]
}
},
{
description: "Quoted field has valid trailing quote via delimiter",
input: 'a,"b",c\nd,e,f',
notes: "Trailing quote is valid due to trailing delimiter",
expected: {
data: [['a', 'b', 'c'], ['d', 'e', 'f']],
errors: []
}
},
{
description: "Quoted field has valid trailing quote via \\n",
input: 'a,b,"c"\nd,e,f',
notes: "Trailing quote is valid due to trailing new line delimiter",
expected: {
data: [['a', 'b', 'c'], ['d', 'e', 'f']],
errors: []
}
},
{
description: "Quoted field has valid trailing quote via EOF",
input: 'a,b,c\nd,e,"f"',
notes: "Trailing quote is valid due to EOF",
expected: {
data: [['a', 'b', 'c'], ['d', 'e', 'f']],
errors: []
}
},
{
description: "Quoted field contains delimiters and \\n with valid trailing quote",
input: 'a,"b,c\nd,e,f"',
notes: "Trailing quote is valid due to trailing delimiter",
expected: {
data: [['a', 'b,c\nd,e,f']],
errors: []
}
},
{
description: "Line starts with quoted field",
input: 'a,b,c\n"d",e,f',

Loading…
Cancel
Save