From 48d186429d0c25844827f2bc6ce2f6e6161f78bf Mon Sep 17 00:00:00 2001 From: Jamie Seter Date: Wed, 23 Oct 2019 15:14:34 -0400 Subject: [PATCH 1/5] #727 update delimiter and newline index if they are earlier than the current position before tested. --- papaparse.js | 6 ++++++ tests/test-cases.js | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/papaparse.js b/papaparse.js index 9431770..e8aeb19 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1536,6 +1536,12 @@ License: MIT continue; } + if(nextDelim !== -1 && nextDelim < (quoteSearch + 1)) { + nextDelim = input.indexOf(delim, (quoteSearch + 1)); + } + if(nextNewline !== -1 && nextNewline < (quoteSearch + 1)) { + nextNewline = input.indexOf(newline, (quoteSearch + 1)); + } // Check up to nextDelim or nextNewline, whichever is closest var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline); var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo); diff --git a/tests/test-cases.js b/tests/test-cases.js index 58a85f0..0b9c205 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -1464,6 +1464,22 @@ var PARSE_TESTS = [ data: [['a', 'b'], ['c', 'd'], [' , ', ','], ['" "', '""']], errors: [] } + }, + { + description: "Quoted fields with spaces between closing quote and next delimiter and contains delimiter", + input: 'A,",B" ,C,D\nE,F,G,H', + expected: { + data: [['A', ',B', 'C', 'D'],['E', 'F', 'G', 'H']], + errors: [] + } + }, + { + description: "Quoted fields with spaces between closing quote and newline and contains newline", + input: 'a,b,"c\n" \nd,e,f', + expected: { + data: [['a', 'b', 'c\n'], ['d', 'e', 'f']], + errors: [] + } } ]; From 287999a880261d5fceec1720db3a5540de7e0fe5 Mon Sep 17 00:00:00 2001 From: Jamie Seter Date: Mon, 28 Oct 2019 16:26:25 -0400 Subject: [PATCH 2/5] #713 initial implementation of strict quoting. --- papaparse.js | 53 +++++++++++++++++++++++++++++------- tests/test-cases.js | 66 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 101 insertions(+), 18 deletions(-) diff --git a/papaparse.js b/papaparse.js index 15d281b..a3baa5b 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1393,6 +1393,8 @@ License: MIT var step = config.step; var preview = config.preview; var fastMode = config.fastMode; + var _strictQuote = config.strictQuote; + var strictQuote = _strictQuote === undefined ? false : Boolean(_strictQuote); var quoteChar; /** Allows for no quoteChar by setting quoteChar to undefined in config */ if (config.quoteChar === undefined) { @@ -1484,6 +1486,9 @@ License: MIT var nextNewline = input.indexOf(newline, cursor); var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g'); var quoteSearch = input.indexOf(quoteChar, cursor); + var savedNextDelim; + var savedNextNewline; + var savedQuoteSearch; // Parser loop for (;;) @@ -1491,12 +1496,12 @@ License: MIT // Field has opening quote if (input[cursor] === quoteChar) { + var quoteFallThrough = false; + quoteSaveState(); + // Start our search for the closing quote where the cursor is quoteSearch = cursor; - // Skip the opening quote - cursor++; - for (;;) { // Find closing quote @@ -1505,6 +1510,18 @@ License: MIT //No other quotes are found - no other delimiters if (quoteSearch === -1) { + if(strictQuote) { + errors.push({ + type: 'Quotes', + code: 'MissingQuotes', + message: 'Quoted field unterminated', + row: data.length, // row has yet to be inserted + index: cursor + }); + quoteRestoreState(); + quoteFallThrough = true; + break; // fall through to parse as non-quote. + } if (!ignoreLastRow) { // No closing quote... what a pity errors.push({ @@ -1515,13 +1532,13 @@ License: MIT index: cursor }); } - return finish(); + return finish(input.substring(cursor + 1)); } // Closing quote at EOF if (quoteSearch === inputLen - 1) { - var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar); + var value = input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar); return finish(value); } @@ -1552,7 +1569,7 @@ License: MIT // Closing quote followed by delimiter or 'unnecessary spaces + delimiter' if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) { - row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); + row.push(input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar)); cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen; // If char after following delimiter is not quoteChar, we find next quote char position @@ -1570,7 +1587,7 @@ License: MIT // Closing quote followed by newline or 'unnecessary spaces + newLine' if (input.substring(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen) === newline) { - row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); + row.push(input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar)); saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen); nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line @@ -1598,12 +1615,18 @@ License: MIT index: cursor }); + if(strictQuote) { + quoteRestoreState(); + quoteFallThrough = true; + break; // fall through to parse as non-quote. + } quoteSearch++; continue; } - - continue; + if(!quoteFallThrough) { + continue; + } } // Comment found at start of new line @@ -1779,6 +1802,18 @@ License: MIT return result; } + + function quoteSaveState() { + savedNextDelim = nextDelim; + savedNextNewline = nextNewline; + savedQuoteSearch = quoteSearch; + } + + function quoteRestoreState() { + quoteSearch = savedQuoteSearch; + nextNewline = savedNextNewline; + nextDelim = savedNextDelim; + } }; /** Sets the abort flag */ diff --git a/tests/test-cases.js b/tests/test-cases.js index bc8e2f7..f8346cc 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -194,7 +194,7 @@ var CORE_PARSER_TESTS = [ "code": "MissingQuotes", "message": "Quoted field unterminated", "row": 0, - "index": 3 + "index": 2 }] } }, @@ -209,7 +209,7 @@ var CORE_PARSER_TESTS = [ "code": "InvalidQuotes", "message": "Trailing quote on quoted field is malformed", "row": 0, - "index": 1 + "index": 0 }] } }, @@ -224,14 +224,14 @@ var CORE_PARSER_TESTS = [ "code": "InvalidQuotes", "message": "Trailing quote on quoted field is malformed", "row": 0, - "index": 3 + "index": 2 }, { "type": "Quotes", "code": "MissingQuotes", "message": "Quoted field unterminated", "row": 0, - "index": 3 + "index": 2 }] } }, @@ -246,14 +246,14 @@ var CORE_PARSER_TESTS = [ "code": "InvalidQuotes", "message": "Trailing quote on quoted field is malformed", "row": 0, - "index": 3 + "index": 2 }, { "type": "Quotes", "code": "MissingQuotes", "message": "Quoted field unterminated", "row": 0, - "index": 3 + "index": 2 }] } }, @@ -268,14 +268,14 @@ var CORE_PARSER_TESTS = [ "code": "InvalidQuotes", "message": "Trailing quote on quoted field is malformed", "row": 0, - "index": 3 + "index": 2 }, { "type": "Quotes", "code": "MissingQuotes", "message": "Quoted field unterminated", "row": 0, - "index": 3 + "index": 2 }] } }, @@ -585,7 +585,55 @@ var CORE_PARSER_TESTS = [ data: [['a', 'b', 'c'], ['']], errors: [] } - } + }, + { + description: "Quoted field has invalid trailing quote after delimiter with a valid closer", + input: '"a,"b,c"\nd,e,f', + notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", + config: { strictQuote: true }, + expected: { + data: [['"a','b,c'], ['d', 'e', 'f']], + errors: [{ + "type": "Quotes", + "code": "InvalidQuotes", + "message": "Trailing quote on quoted field is malformed", + "row": 0, + "index": 0 + }] + } + }, + { + description: "Quoted field has invalid trailing quote after delimiter with a valid closer in strict quote mode", + input: '"a,"b,c"\nd,e,f', + notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", + config: { strictQuote: true }, + expected: { + data: [['"a','b,c'], ['d', 'e', 'f']], + errors: [{ + "type": "Quotes", + "code": "InvalidQuotes", + "message": "Trailing quote on quoted field is malformed", + "row": 0, + "index": 0 + }] + } + }, + { + description: "Quoted field has no closing quote in strict quote mode", + input: 'a,"b,c\nd,e,f', + config: { strictQuote: true }, + expected: { + data: [['a','"b','c'],['d','e','f']], + errors: [{ + "type": "Quotes", + "code": "MissingQuotes", + "message": "Quoted field unterminated", + "row": 0, + "index": 2 + }] + } + }, + ]; describe('Core Parser Tests', function() { From a6d5428ffd18010709585e20048e32e31285abce Mon Sep 17 00:00:00 2001 From: Jamie Seter Date: Mon, 28 Oct 2019 16:30:59 -0400 Subject: [PATCH 3/5] #713 remove duplicate test. --- tests/test-cases.js | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/test-cases.js b/tests/test-cases.js index f8346cc..275cc28 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -586,22 +586,6 @@ var CORE_PARSER_TESTS = [ errors: [] } }, - { - description: "Quoted field has invalid trailing quote after delimiter with a valid closer", - input: '"a,"b,c"\nd,e,f', - notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", - config: { strictQuote: true }, - expected: { - data: [['"a','b,c'], ['d', 'e', 'f']], - errors: [{ - "type": "Quotes", - "code": "InvalidQuotes", - "message": "Trailing quote on quoted field is malformed", - "row": 0, - "index": 0 - }] - } - }, { description: "Quoted field has invalid trailing quote after delimiter with a valid closer in strict quote mode", input: '"a,"b,c"\nd,e,f', From 5d7cfcd64f21bbe29a7f21d98a1c55d4a5aa0adc Mon Sep 17 00:00:00 2001 From: Jamie Seter Date: Mon, 28 Oct 2019 16:35:56 -0400 Subject: [PATCH 4/5] formatting. --- tests/test-cases.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test-cases.js b/tests/test-cases.js index 275cc28..cd27578 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -616,8 +616,7 @@ var CORE_PARSER_TESTS = [ "index": 2 }] } - }, - + } ]; describe('Core Parser Tests', function() { From 162d442b504ddea9e8a34c6ae7d056dbc9b7a741 Mon Sep 17 00:00:00 2001 From: Jamie Seter Date: Tue, 29 Oct 2019 08:22:21 -0400 Subject: [PATCH 5/5] ignoreLastRow should take priority in unfinished processing. --- papaparse.js | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/papaparse.js b/papaparse.js index a3baa5b..c8897fc 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1510,6 +1510,9 @@ License: MIT //No other quotes are found - no other delimiters if (quoteSearch === -1) { + if(ignoreLastRow) + return returnable(); + if(strictQuote) { errors.push({ type: 'Quotes', @@ -1522,16 +1525,14 @@ License: MIT quoteFallThrough = true; break; // fall through to parse as non-quote. } - if (!ignoreLastRow) { - // No closing quote... what a pity - errors.push({ - type: 'Quotes', - code: 'MissingQuotes', - message: 'Quoted field unterminated', - row: data.length, // row has yet to be inserted - index: cursor - }); - } + // No closing quote... what a pity + errors.push({ + type: 'Quotes', + code: 'MissingQuotes', + message: 'Quoted field unterminated', + row: data.length, // row has yet to be inserted + index: cursor + }); return finish(input.substring(cursor + 1)); }