From 9bc872a2b8879c072490312d5de44edf90c4a7a2 Mon Sep 17 00:00:00 2001
From: Jaymes Lauser <jaymeslauser@gmail.com>
Date: Fri, 27 Jul 2018 00:15:24 -0700
Subject: [PATCH] Fix linebreak problem in headers (#542)

---
 papaparse.js        |  15 +++--
 tests/test-cases.js | 134 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+), 5 deletions(-)

diff --git a/papaparse.js b/papaparse.js
index 5f670bd..23c7889 100755
--- a/papaparse.js
+++ b/papaparse.js
@@ -1008,8 +1008,9 @@
 		 */
 		this.parse = function(input, baseIndex, ignoreLastRow)
 		{
+			var quoteChar = _config.quoteChar || '"';
 			if (!_config.newline)
-				_config.newline = guessLineEndings(input);
+				_config.newline = guessLineEndings(input, quoteChar);
 
 			_delimiterError = false;
 			if (!_config.delimiter)
@@ -1252,9 +1253,12 @@
 			};
 		}
 
-		function guessLineEndings(input)
+		function guessLineEndings(input, quoteChar)
 		{
 			input = input.substr(0, 1024 * 1024);	// max length 1 MB
+			// Replace all the text inside quotes
+			var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
+			input = input.replace(re, '');
 
 			var r = input.split('\r');
 
@@ -1286,9 +1290,10 @@
 		}
 	}
 
-
-
-
+	/** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
+	function escapeRegExp(string) {
+		return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
+	}
 
 	/** The core parser implements speedy and correct CSV parsing */
 	function Parser(config)
diff --git a/tests/test-cases.js b/tests/test-cases.js
index ad0d9a5..df5c152 100644
--- a/tests/test-cases.js
+++ b/tests/test-cases.js
@@ -1208,6 +1208,136 @@ var PARSE_TESTS = [
 			data: [{'a': 'c', 'b': 'd'}],
 			errors: []
 		}
+	},
+	{
+		description: "Carriage return in header inside quotes, with line feed endings",
+		input: '"a\r\na","b"\n"c","d"\n"e","f"\n"g","h"\n"i","j"',
+		config: {},
+		expected: {
+			data: [['a\r\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
+			errors: []
+		}
+	},
+	{
+		description: "Line feed in header inside quotes, with carriage return + line feed endings",
+		input: '"a\na","b"\r\n"c","d"\r\n"e","f"\r\n"g","h"\r\n"i","j"',
+		config: {},
+		expected: {
+			data: [['a\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
+			errors: []
+		}
+	},
+	{
+		description: "Using \\r\\n endings uses \\r\\n linebreak",
+		input: 'a,b\r\nc,d\r\ne,f\r\ng,h\r\ni,j',
+		config: {},
+		expected: {
+			data: [['a', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
+			errors: [],
+			meta: {
+				linebreak: '\r\n',
+				delimiter: ',',
+				cursor: 23,
+				aborted: false,
+				truncated: false
+			}
+		}
+	},
+	{
+		description: "Using \\n endings uses \\n linebreak",
+		input: 'a,b\nc,d\ne,f\ng,h\ni,j',
+		config: {},
+		expected: {
+			data: [['a', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
+			errors: [],
+			meta: {
+				linebreak: '\n',
+				delimiter: ',',
+				cursor: 19,
+				aborted: false,
+				truncated: false
+			}
+		}
+	},
+	{
+		description: "Using \\r\\n endings with \\r\\n in header field uses \\r\\n linebreak",
+		input: '"a\r\na",b\r\nc,d\r\ne,f\r\ng,h\r\ni,j',
+		config: {},
+		expected: {
+			data: [['a\r\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
+			errors: [],
+			meta: {
+				linebreak: '\r\n',
+				delimiter: ',',
+				cursor: 28,
+				aborted: false,
+				truncated: false
+			}
+		}
+	},
+	{
+		description: "Using \\r\\n endings with \\n in header field uses \\r\\n linebreak",
+		input: '"a\na",b\r\nc,d\r\ne,f\r\ng,h\r\ni,j',
+		config: {},
+		expected: {
+			data: [['a\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
+			errors: [],
+			meta: {
+				linebreak: '\r\n',
+				delimiter: ',',
+				cursor: 27,
+				aborted: false,
+				truncated: false
+			}
+		}
+	},
+	{
+		description: "Using \\r\\n endings with \\n in header field with skip empty lines uses \\r\\n linebreak",
+		input: '"a\na",b\r\nc,d\r\ne,f\r\ng,h\r\ni,j\r\n',
+		config: {skipEmptyLines: true},
+		expected: {
+			data: [['a\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
+			errors: [],
+			meta: {
+				linebreak: '\r\n',
+				delimiter: ',',
+				cursor: 29,
+				aborted: false,
+				truncated: false
+			}
+		}
+	},
+	{
+		description: "Using \\n endings with \\r\\n in header field uses \\n linebreak",
+		input: '"a\r\na",b\nc,d\ne,f\ng,h\ni,j',
+		config: {},
+		expected: {
+			data: [['a\r\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
+			errors: [],
+			meta: {
+				linebreak: '\n',
+				delimiter: ',',
+				cursor: 24,
+				aborted: false,
+				truncated: false
+			}
+		}
+	},
+	{
+		description: "Using reserved regex characters as quote characters",
+		input: '.a\na.,b\r\nc,d\r\ne,f\r\ng,h\r\ni,j',
+		config: { quoteChar: '.' },
+		expected: {
+			data: [['a\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
+			errors: [],
+			meta: {
+				linebreak: '\r\n',
+				delimiter: ',',
+				cursor: 27,
+				aborted: false,
+				truncated: false
+			}
+		}
 	}
 ];
 
@@ -1215,6 +1345,10 @@ describe('Parse Tests', function() {
 	function generateTest(test) {
 		(test.disabled ? it.skip : it)(test.description, function() {
 			var actual = Papa.parse(test.input, test.config);
+			// allows for testing the meta object if present in the test
+			if (test.expected.meta) {
+				assert.deepEqual(actual.meta, test.expected.meta);
+			}
 			assert.deepEqual(JSON.stringify(actual.errors), JSON.stringify(test.expected.errors));
 			assert.deepEqual(actual.data, test.expected.data);
 		});