Browse Source

Fix linebreak problem in headers (#542)

pull/546/head
Jaymes Lauser 7 years ago committed by Sergi Almacellas Abellana
parent
commit
9bc872a2b8
  1. 15
      papaparse.js
  2. 134
      tests/test-cases.js

15
papaparse.js

@ -1008,8 +1008,9 @@ @@ -1008,8 +1008,9 @@
*/
this.parse = function(input, baseIndex, ignoreLastRow)
{
var quoteChar = _config.quoteChar || '"';
if (!_config.newline)
_config.newline = guessLineEndings(input);
_config.newline = guessLineEndings(input, quoteChar);
_delimiterError = false;
if (!_config.delimiter)
@ -1252,9 +1253,12 @@ @@ -1252,9 +1253,12 @@
};
}
function guessLineEndings(input)
function guessLineEndings(input, quoteChar)
{
input = input.substr(0, 1024 * 1024); // max length 1 MB
// Replace all the text inside quotes
var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
input = input.replace(re, '');
var r = input.split('\r');
@ -1286,9 +1290,10 @@ @@ -1286,9 +1290,10 @@
}
}
/** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
}
/** The core parser implements speedy and correct CSV parsing */
function Parser(config)

134
tests/test-cases.js

@ -1208,6 +1208,136 @@ var PARSE_TESTS = [ @@ -1208,6 +1208,136 @@ var PARSE_TESTS = [
data: [{'a': 'c', 'b': 'd'}],
errors: []
}
},
{
description: "Carriage return in header inside quotes, with line feed endings",
input: '"a\r\na","b"\n"c","d"\n"e","f"\n"g","h"\n"i","j"',
config: {},
expected: {
data: [['a\r\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
errors: []
}
},
{
description: "Line feed in header inside quotes, with carriage return + line feed endings",
input: '"a\na","b"\r\n"c","d"\r\n"e","f"\r\n"g","h"\r\n"i","j"',
config: {},
expected: {
data: [['a\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
errors: []
}
},
{
description: "Using \\r\\n endings uses \\r\\n linebreak",
input: 'a,b\r\nc,d\r\ne,f\r\ng,h\r\ni,j',
config: {},
expected: {
data: [['a', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
errors: [],
meta: {
linebreak: '\r\n',
delimiter: ',',
cursor: 23,
aborted: false,
truncated: false
}
}
},
{
description: "Using \\n endings uses \\n linebreak",
input: 'a,b\nc,d\ne,f\ng,h\ni,j',
config: {},
expected: {
data: [['a', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
errors: [],
meta: {
linebreak: '\n',
delimiter: ',',
cursor: 19,
aborted: false,
truncated: false
}
}
},
{
description: "Using \\r\\n endings with \\r\\n in header field uses \\r\\n linebreak",
input: '"a\r\na",b\r\nc,d\r\ne,f\r\ng,h\r\ni,j',
config: {},
expected: {
data: [['a\r\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
errors: [],
meta: {
linebreak: '\r\n',
delimiter: ',',
cursor: 28,
aborted: false,
truncated: false
}
}
},
{
description: "Using \\r\\n endings with \\n in header field uses \\r\\n linebreak",
input: '"a\na",b\r\nc,d\r\ne,f\r\ng,h\r\ni,j',
config: {},
expected: {
data: [['a\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
errors: [],
meta: {
linebreak: '\r\n',
delimiter: ',',
cursor: 27,
aborted: false,
truncated: false
}
}
},
{
description: "Using \\r\\n endings with \\n in header field with skip empty lines uses \\r\\n linebreak",
input: '"a\na",b\r\nc,d\r\ne,f\r\ng,h\r\ni,j\r\n',
config: {skipEmptyLines: true},
expected: {
data: [['a\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
errors: [],
meta: {
linebreak: '\r\n',
delimiter: ',',
cursor: 29,
aborted: false,
truncated: false
}
}
},
{
description: "Using \\n endings with \\r\\n in header field uses \\n linebreak",
input: '"a\r\na",b\nc,d\ne,f\ng,h\ni,j',
config: {},
expected: {
data: [['a\r\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
errors: [],
meta: {
linebreak: '\n',
delimiter: ',',
cursor: 24,
aborted: false,
truncated: false
}
}
},
{
description: "Using reserved regex characters as quote characters",
input: '.a\na.,b\r\nc,d\r\ne,f\r\ng,h\r\ni,j',
config: { quoteChar: '.' },
expected: {
data: [['a\na', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h'], ['i', 'j']],
errors: [],
meta: {
linebreak: '\r\n',
delimiter: ',',
cursor: 27,
aborted: false,
truncated: false
}
}
}
];
@ -1215,6 +1345,10 @@ describe('Parse Tests', function() { @@ -1215,6 +1345,10 @@ describe('Parse Tests', function() {
function generateTest(test) {
(test.disabled ? it.skip : it)(test.description, function() {
var actual = Papa.parse(test.input, test.config);
// allows for testing the meta object if present in the test
if (test.expected.meta) {
assert.deepEqual(actual.meta, test.expected.meta);
}
assert.deepEqual(JSON.stringify(actual.errors), JSON.stringify(test.expected.errors));
assert.deepEqual(actual.data, test.expected.data);
});

Loading…
Cancel
Save