From b5db7f08cc16399e177dc3e4b3bfc1c515b65acc Mon Sep 17 00:00:00 2001 From: Nic Colgrove Date: Thu, 17 Aug 2017 02:18:23 -0500 Subject: [PATCH] Expanded error statements for malformed quoted text (#409) * Emit error statements for malformed quoted text Update parser to emit errors when strings do not adhere to section 2.2 and 2.3 of RFC 4180. Test cases are added to further support. --- papaparse.js | 21 ++++++-- tests/test-cases.js | 117 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+), 3 deletions(-) diff --git a/papaparse.js b/papaparse.js index 88f115b..0c27e6e 100644 --- a/papaparse.js +++ b/papaparse.js @@ -1250,6 +1250,7 @@ // Find closing quote var quoteSearch = input.indexOf(quoteChar, quoteSearch+1); + //No other quotes are found - no other delimiters if (quoteSearch === -1) { if (!ignoreLastRow) { @@ -1265,9 +1266,9 @@ return finish(); } + // Closing quote at EOF if (quoteSearch === inputLen-1) { - // Closing quote at EOF var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar); return finish(value); } @@ -1279,9 +1280,9 @@ continue; } + // Closing quote followed by delimiter if (input[quoteSearch+1] === delim) { - // Closing quote followed by delimiter row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); cursor = quoteSearch + 1 + delimLen; nextDelim = input.indexOf(delim, cursor); @@ -1289,9 +1290,9 @@ break; } + // Closing quote followed by newline if (input.substr(quoteSearch+1, newlineLen) === newline) { - // Closing quote followed by newline row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); saveRow(quoteSearch + 1 + newlineLen); nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field @@ -1308,6 +1309,20 @@ break; } + + + // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string + errors.push({ + type: 'Quotes', + code: 'InvalidQuotes', + message: 'Trailing quote on quoted field is malformed', + row: data.length, // row has yet to be inserted + index: cursor + }); + + quoteSearch++; + continue; + } continue; diff --git a/tests/test-cases.js b/tests/test-cases.js index 1b857c4..5352dae 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -195,6 +195,123 @@ var CORE_PARSER_TESTS = [ }] } }, + { + description: "Quoted field has invalid trailing quote after delimiter with a valid closer", + input: '"a,"b,c"\nd,e,f', + notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", + expected: { + data: [['a,"b,c'], ['d', 'e', 'f']], + errors: [{ + "type": "Quotes", + "code": "InvalidQuotes", + "message": "Trailing quote on quoted field is malformed", + "row": 0, + "index":1 + }] + } + }, + { + description: "Quoted field has invalid trailing quote after delimiter", + input: 'a,"b,"c\nd,e,f', + notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", + expected: { + data: [['a', 'b,"c\nd,e,f']], + errors: [{ + "type": "Quotes", + "code": "InvalidQuotes", + "message": "Trailing quote on quoted field is malformed", + "row": 0, + "index": 3 + }, + { + "type": "Quotes", + "code": "MissingQuotes", + "message": "Quoted field unterminated", + "row": 0, + "index": 3 + }] + } + }, + { + description: "Quoted field has invalid trailing quote before delimiter", + input: 'a,"b"c,d\ne,f,g', + notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", + expected: { + data: [['a', 'b"c,d\ne,f,g']], + errors: [{ + "type": "Quotes", + "code": "InvalidQuotes", + "message": "Trailing quote on quoted field is malformed", + "row": 0, + "index": 3 + }, + { + "type": "Quotes", + "code": "MissingQuotes", + "message": "Quoted field unterminated", + "row": 0, + "index": 3 + }] + } + }, + { + description: "Quoted field has invalid trailing quote after new line", + input: 'a,"b,c\nd"e,f,g', + notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", + expected: { + data: [['a', 'b,c\nd"e,f,g']], + errors: [{ + "type": "Quotes", + "code": "InvalidQuotes", + "message": "Trailing quote on quoted field is malformed", + "row": 0, + "index": 3 + }, + { + "type": "Quotes", + "code": "MissingQuotes", + "message": "Quoted field unterminated", + "row": 0, + "index": 3 + }] + } + }, + { + description: "Quoted field has valid trailing quote via delimiter", + input: 'a,"b",c\nd,e,f', + notes: "Trailing quote is valid due to trailing delimiter", + expected: { + data: [['a', 'b', 'c'], ['d', 'e', 'f']], + errors: [] + } + }, + { + description: "Quoted field has valid trailing quote via \\n", + input: 'a,b,"c"\nd,e,f', + notes: "Trailing quote is valid due to trailing new line delimiter", + expected: { + data: [['a', 'b', 'c'], ['d', 'e', 'f']], + errors: [] + } + }, + { + description: "Quoted field has valid trailing quote via EOF", + input: 'a,b,c\nd,e,"f"', + notes: "Trailing quote is valid due to EOF", + expected: { + data: [['a', 'b', 'c'], ['d', 'e', 'f']], + errors: [] + } + }, + { + description: "Quoted field contains delimiters and \\n with valid trailing quote", + input: 'a,"b,c\nd,e,f"', + notes: "Trailing quote is valid due to trailing delimiter", + expected: { + data: [['a', 'b,c\nd,e,f']], + errors: [] + } + }, { description: "Line starts with quoted field", input: 'a,b,c\n"d",e,f',