From be00d871470134c64e7223d028122351410d240c Mon Sep 17 00:00:00 2001 From: "M. Wieschollek" Date: Tue, 13 Mar 2018 10:02:14 +0100 Subject: [PATCH] Add option to configure escape character Fixes #459 --- papaparse.js | 15 +++++++++++++-- tests/test-cases.js | 30 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/papaparse.js b/papaparse.js index c1f1ec4..048bd46 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1158,6 +1158,10 @@ } else { var quoteChar = config.quoteChar; } + var escapeChar = quoteChar; + if (config.escapeChar !== undefined){ + escapeChar = config.escapeChar; + } // Delimiter must be valid if (typeof delim !== 'string' @@ -1236,7 +1240,7 @@ var nextDelim = input.indexOf(delim, cursor); var nextNewline = input.indexOf(newline, cursor); - var quoteCharRegex = new RegExp(quoteChar+quoteChar, 'g'); + var quoteCharRegex = new RegExp(escapeChar.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&')+quoteChar, 'g'); // Parser loop for (;;) @@ -1279,12 +1283,19 @@ } // If this quote is escaped, it's part of the data; skip it - if (input[quoteSearch+1] === quoteChar) + // If the quote character is the escape character, then check if the next character is the escape character + if (quoteChar === escapeChar && input[quoteSearch+1] === escapeChar) { quoteSearch++; continue; } + // If the quote character is not the escape character, then check if the previous character was the escape character + if (quoteChar !== escapeChar && quoteSearch !== 0 && input[quoteSearch-1] === escapeChar) + { + continue; + } + var spacesBetweenQuoteAndDelimiter = extraSpaces(nextDelim); // Closing quote followed by delimiter or 'unnecessary steps + delimiter' diff --git a/tests/test-cases.js b/tests/test-cases.js index 503e905..ea7fdcc 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -1080,6 +1080,36 @@ var PARSE_TESTS = [ errors: [] } }, + { + description: "Custom escape character in the middle", + notes: "Must parse correctly if the backslash sign (\\) is configured as a custom escape character", + input: 'a,b,"c\\"d\\"f"', + config: { escapeChar:'\\'}, + expected: { + data: [['a', 'b', 'c"d"f']], + errors: [] + } + }, + { + description: "Custom escape character at the end", + notes: "Must parse correctly if the backslash sign (\\) is configured as a custom escape character and the escaped quote character appears at the end of the column", + input: 'a,b,"c\\"d\\""', + config: { escapeChar:'\\'}, + expected: { + data: [['a', 'b', 'c"d"']], + errors: [] + } + }, + { + description: "Custom escape character not used for escaping", + notes: "Must parse correctly if the backslash sign (\\) is configured as a custom escape character and appears as regular character in the text", + input: 'a,b,"c\\d"', + config: { escapeChar:'\\'}, + expected: { + data: [['a', 'b', 'c\\d']], + errors: [] + } + }, { description: "Header row with preceding comment", notes: "Must parse correctly headers if they are preceded by comments",