diff --git a/papaparse.js b/papaparse.js index 88f115b..0c27e6e 100644 --- a/papaparse.js +++ b/papaparse.js @@ -1250,6 +1250,7 @@ // Find closing quote var quoteSearch = input.indexOf(quoteChar, quoteSearch+1); + //No other quotes are found - no other delimiters if (quoteSearch === -1) { if (!ignoreLastRow) { @@ -1265,9 +1266,9 @@ return finish(); } + // Closing quote at EOF if (quoteSearch === inputLen-1) { - // Closing quote at EOF var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar); return finish(value); } @@ -1279,9 +1280,9 @@ continue; } + // Closing quote followed by delimiter if (input[quoteSearch+1] === delim) { - // Closing quote followed by delimiter row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); cursor = quoteSearch + 1 + delimLen; nextDelim = input.indexOf(delim, cursor); @@ -1289,9 +1290,9 @@ break; } + // Closing quote followed by newline if (input.substr(quoteSearch+1, newlineLen) === newline) { - // Closing quote followed by newline row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); saveRow(quoteSearch + 1 + newlineLen); nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field @@ -1308,6 +1309,20 @@ break; } + + + // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string + errors.push({ + type: 'Quotes', + code: 'InvalidQuotes', + message: 'Trailing quote on quoted field is malformed', + row: data.length, // row has yet to be inserted + index: cursor + }); + + quoteSearch++; + continue; + } continue; diff --git a/tests/test-cases.js b/tests/test-cases.js index 1b857c4..5352dae 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -195,6 +195,123 @@ var CORE_PARSER_TESTS = [ }] } }, + { + description: "Quoted field has invalid trailing quote after delimiter with a valid closer", + input: '"a,"b,c"\nd,e,f', + notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", + expected: { + data: [['a,"b,c'], ['d', 'e', 'f']], + errors: [{ + "type": "Quotes", + "code": "InvalidQuotes", + "message": "Trailing quote on quoted field is malformed", + "row": 0, + "index":1 + }] + } + }, + { + description: "Quoted field has invalid trailing quote after delimiter", + input: 'a,"b,"c\nd,e,f', + notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", + expected: { + data: [['a', 'b,"c\nd,e,f']], + errors: [{ + "type": "Quotes", + "code": "InvalidQuotes", + "message": "Trailing quote on quoted field is malformed", + "row": 0, + "index": 3 + }, + { + "type": "Quotes", + "code": "MissingQuotes", + "message": "Quoted field unterminated", + "row": 0, + "index": 3 + }] + } + }, + { + description: "Quoted field has invalid trailing quote before delimiter", + input: 'a,"b"c,d\ne,f,g', + notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", + expected: { + data: [['a', 'b"c,d\ne,f,g']], + errors: [{ + "type": "Quotes", + "code": "InvalidQuotes", + "message": "Trailing quote on quoted field is malformed", + "row": 0, + "index": 3 + }, + { + "type": "Quotes", + "code": "MissingQuotes", + "message": "Quoted field unterminated", + "row": 0, + "index": 3 + }] + } + }, + { + description: "Quoted field has invalid trailing quote after new line", + input: 'a,"b,c\nd"e,f,g', + notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", + expected: { + data: [['a', 'b,c\nd"e,f,g']], + errors: [{ + "type": "Quotes", + "code": "InvalidQuotes", + "message": "Trailing quote on quoted field is malformed", + "row": 0, + "index": 3 + }, + { + "type": "Quotes", + "code": "MissingQuotes", + "message": "Quoted field unterminated", + "row": 0, + "index": 3 + }] + } + }, + { + description: "Quoted field has valid trailing quote via delimiter", + input: 'a,"b",c\nd,e,f', + notes: "Trailing quote is valid due to trailing delimiter", + expected: { + data: [['a', 'b', 'c'], ['d', 'e', 'f']], + errors: [] + } + }, + { + description: "Quoted field has valid trailing quote via \\n", + input: 'a,b,"c"\nd,e,f', + notes: "Trailing quote is valid due to trailing new line delimiter", + expected: { + data: [['a', 'b', 'c'], ['d', 'e', 'f']], + errors: [] + } + }, + { + description: "Quoted field has valid trailing quote via EOF", + input: 'a,b,c\nd,e,"f"', + notes: "Trailing quote is valid due to EOF", + expected: { + data: [['a', 'b', 'c'], ['d', 'e', 'f']], + errors: [] + } + }, + { + description: "Quoted field contains delimiters and \\n with valid trailing quote", + input: 'a,"b,c\nd,e,f"', + notes: "Trailing quote is valid due to trailing delimiter", + expected: { + data: [['a', 'b,c\nd,e,f']], + errors: [] + } + }, { description: "Line starts with quoted field", input: 'a,b,c\n"d",e,f',