From ec7653dae70479104f8861fcc43317283add0960 Mon Sep 17 00:00:00 2001 From: Smit Shah Date: Mon, 5 Feb 2018 17:37:29 -0800 Subject: [PATCH] Add support for spaces between quotes and separators Currently if there is an space between the quote char and the line (or field) separator Paparse will throw and error of invalid format. This commit removes the trailing spaces so the file is parsed correctly Fixes #452. --- papaparse.js | 31 +++++++++++++++++++++++++------ tests/test-cases.js | 24 ++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/papaparse.js b/papaparse.js index 5bef3cc..c1f1ec4 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1285,21 +1285,25 @@ continue; } - // Closing quote followed by delimiter - if (input[quoteSearch+1] === delim) + var spacesBetweenQuoteAndDelimiter = extraSpaces(nextDelim); + + // Closing quote followed by delimiter or 'unnecessary steps + delimiter' + if (input[quoteSearch+1+spacesBetweenQuoteAndDelimiter] === delim) { row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); - cursor = quoteSearch + 1 + delimLen; + cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen; nextDelim = input.indexOf(delim, cursor); nextNewline = input.indexOf(newline, cursor); break; } - // Closing quote followed by newline - if (input.substr(quoteSearch+1, newlineLen) === newline) + var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline); + + // Closing quote followed by newline or 'unnecessary spaces + newLine' + if (input.substr(quoteSearch+1+spacesBetweenQuoteAndNewLine, newlineLen) === newline) { row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); - saveRow(quoteSearch + 1 + newlineLen); + saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen); nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field if (stepIsFunction) @@ -1385,6 +1389,21 @@ lastCursor = cursor; } + /** + * checks if there are extra spaces after closing quote and given index without any text + * if Yes, returns the number of spaces + */ + function extraSpaces(index) { + var spaceLength = 0; + if (index !== -1) { + var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index); + if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() == '') { + spaceLength = textBetweenClosingQuoteAndIndex.length; + } + } + return spaceLength; + } + /** * Appends the remaining input from cursor to the end into * row, saves the row, calls step, and returns the results. diff --git a/tests/test-cases.js b/tests/test-cases.js index 5352dae..503e905 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -624,6 +624,30 @@ var PARSE_TESTS = [ errors: [] } }, + { + description: "Quoted fields with spaces between closing quote and next delimiter", + input: 'A,"B" ,C,D\r\nE,F,"G" ,H', + expected: { + data: [['A', 'B', 'C','D'],['E', 'F', 'G','H']], + errors: [] + } + }, + { + description: "Quoted fields with spaces between closing quote and next new line", + input: 'A,B,C,"D" \r\nE,F,G,"H" \r\nQ,W,E,R', + expected: { + data: [['A', 'B', 'C','D'],['E', 'F', 'G','H'],['Q', 'W', 'E','R']], + errors: [] + } + }, + { + description: "Quoted fields with spaces after closing quote", + input: 'A,"B" ,C,"D" \r\nE,F,"G" ,"H" \r\nQ,W,"E" ,R', + expected: { + data: [['A', 'B', 'C','D'],['E', 'F', 'G','H'],['Q', 'W', 'E','R']], + errors: [] + } + }, { description: "Mixed slash n and slash r should choose first as precident", input: 'a,b,c\nd,e,f\rg,h,i\n',