From 161466f239860599567532dda683a903db370f02 Mon Sep 17 00:00:00 2001 From: Barrie Treloar Date: Fri, 1 Jun 2018 11:29:47 +0930 Subject: [PATCH] Correctly parse line ending with quoted field when field of next line is empty Closes #514 --- papaparse.js | 6 +++-- tests/test-cases.js | 56 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/papaparse.js b/papaparse.js index 3a0e5d9..8106e8d 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1448,9 +1448,11 @@ continue; } - var spacesBetweenQuoteAndDelimiter = extraSpaces(nextDelim); + // Check up to nextDelim or nextNewline, whichever is closest + var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline); + var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo); - // Closing quote followed by delimiter or 'unnecessary steps + delimiter' + // Closing quote followed by delimiter or 'unnecessary spaces + delimiter' if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) { row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); diff --git a/tests/test-cases.js b/tests/test-cases.js index e54b218..bf4b11d 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -330,6 +330,46 @@ var CORE_PARSER_TESTS = [ errors: [] } }, + { + description: "Line ends with quoted field, first field of next line is empty, \\n", + input: 'a,b,c\n,e,f\n,"h","i"\n,"k","l"', + config: { + newline: '\n', + }, + expected: { + data: [['a', 'b', 'c'], ['', 'e', 'f'], ['', 'h', 'i'], ['', 'k', 'l']], + errors: [] + } + }, + { + description: "Line ends with quoted field, first field of next line is empty, \\r\\n", + input: 'a,b,c\r\n,e,f\r\n,"h","i"\r\n,"k","l"', + config: { + newline: '\r\n', + }, + expected: { + data: [['a', 'b', 'c'], ['', 'e', 'f'], ['', 'h', 'i'], ['', 'k', 'l']], + errors: [] + } + }, + { + description: "Line ends with quoted field (longer), \\r\\n", + input: 'Task_TaskID,Task_Description,Task_BudgetHours,Task_Reference,Project_ProjectCode,Project_Description,Customer_CustomerCode,Customer_Name\r\n,"",,"","","","ABCDE","ABCDE Consulting"\r\n,"",,"","","","FGHIJ","FGHIJ Limited (Pty)"\r\n,"",,"","","","KLUMN","KLUMN"\r\n,"",,"","","","OPQRS","OPQRS Ltd"\r\n,"",,"","","","TUVWX","TUVWX Pty Ltd"', + config: { + newline: '\r\n', + }, + expected: { + data: [ + ['Task_TaskID','Task_Description','Task_BudgetHours','Task_Reference','Project_ProjectCode','Project_Description','Customer_CustomerCode','Customer_Name'], + ['','','','','','','ABCDE','ABCDE Consulting'], + ['','','','','','','FGHIJ','FGHIJ Limited (Pty)'], + ['','','','','','','KLUMN','KLUMN'], + ['','','','','','','OPQRS','OPQRS Ltd'], + ['','','','','','','TUVWX','TUVWX Pty Ltd'], + ], + errors: [] + } + }, { description: "Quoted field at end of row (but not at EOF) has quotes", input: 'a,b,"c""c"""\nd,e,f', @@ -650,6 +690,22 @@ var PARSE_TESTS = [ errors: [] } }, + { + description: "Line ends with quoted field, first field of next line is empty \\r\\n", + input: 'a,b,"c"\r\nd,e,"f"\r\n,"h","i"\r\n,"k","l"', + config: { + header: true, + newline: '\r\n', + }, + expected: { + data: [ + {a: 'd', b: 'e', c: 'f'}, + {a: '', b: 'h', c: 'i'}, + {a: '', b: 'k', c: 'l'} + ], + errors: [] + } + }, { description: "Mixed slash n and slash r should choose first as precident", input: 'a,b,c\nd,e,f\rg,h,i\n',