diff --git a/papaparse.js b/papaparse.js index b624e50..e9a777c 100755 --- a/papaparse.js +++ b/papaparse.js @@ -699,7 +699,7 @@ License: MIT if (contentRange === null) { // no content range, then finish! return -1; } - return parseInt(contentRange.substr(contentRange.lastIndexOf('/') + 1)); + return parseInt(contentRange.substring(contentRange.lastIndexOf('/') + 1)); } } NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype); @@ -786,10 +786,17 @@ License: MIT }; this._nextChunk = function() { + var chunk; if (this._finished) return; var size = this._config.chunkSize; - var chunk = size ? remaining.substr(0, size) : remaining; - remaining = size ? remaining.substr(size) : ''; + if(size) { + var limit = Math.min(remaining.length, Math.max(0, size)); + chunk = remaining.substring(0, limit); + remaining = remaining.substring(limit); + } else { + chunk = remaining; + remaining = ''; + } this._finished = !remaining; return this.parseChunk(chunk); }; @@ -1092,7 +1099,7 @@ License: MIT { _paused = true; _parser.abort(); - _input = _input.substr(_parser.getCharIndex()); + _input = _input.substring(_parser.getCharIndex()); }; this.resume = function() @@ -1330,7 +1337,7 @@ License: MIT function guessLineEndings(input, quoteChar) { - input = input.substr(0, 1024 * 1024); // max length 1 MB + input = input.substring(0, 1024 * 1024); // max length 1 MB // Replace all the text inside quotes var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm'); input = input.replace(re, ''); @@ -1382,6 +1389,8 @@ License: MIT var step = config.step; var preview = config.preview; var fastMode = config.fastMode; + var temp1; + var strictQuote = (temp1 = config.strictQuote) !== undefined ? temp1 : false; var quoteChar; /** Allows for no quoteChar by setting quoteChar to undefined in config */ if (config.quoteChar === undefined) { @@ -1448,7 +1457,7 @@ License: MIT cursor += newline.length; else if (ignoreLastRow) return returnable(); - if (comments && row.substr(0, commentsLen) === comments) + if (comments && row.substring(0, commentsLen) === comments) continue; if (stepIsFunction) { @@ -1473,6 +1482,10 @@ License: MIT var nextNewline = input.indexOf(newline, cursor); var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g'); var quoteSearch = input.indexOf(quoteChar, cursor); + var quoteError = false; + var savedNextDelim; + var savedNextNewline; + var savedQuoteSearch; // Parser loop for (;;) @@ -1480,12 +1493,10 @@ License: MIT // Field has opening quote if (input[cursor] === quoteChar) { + saveQuoteState(); // Start our search for the closing quote where the cursor is quoteSearch = cursor; - // Skip the opening quote - cursor++; - for (;;) { // Find closing quote @@ -1494,6 +1505,10 @@ License: MIT //No other quotes are found - no other delimiters if (quoteSearch === -1) { + if(strictQuote) { + backtrackQuote(); + break; + } if (!ignoreLastRow) { // No closing quote... what a pity errors.push({ @@ -1504,13 +1519,14 @@ License: MIT index: cursor }); } + cursor++; return finish(); } // Closing quote at EOF if (quoteSearch === inputLen - 1) { - var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar); + var value = input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar); return finish(value); } @@ -1528,6 +1544,14 @@ License: MIT continue; } + // update indexes if they are less than current cursor + if(nextNewline !== -1 && (quoteSearch + 1) > nextNewline) { + nextNewline = input.indexOf(newline, quoteSearch + 1); + } + if(nextDelim !== -1 && (quoteSearch + 1) > nextDelim) { + nextDelim = input.indexOf(delim, quoteSearch + 1); + } + // Check up to nextDelim or nextNewline, whichever is closest var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline); var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo); @@ -1535,7 +1559,7 @@ License: MIT // Closing quote followed by delimiter or 'unnecessary spaces + delimiter' if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) { - row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); + row.push(input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar)); cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen; // If char after following delimiter is not quoteChar, we find next quote char position @@ -1551,9 +1575,9 @@ License: MIT var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline); // Closing quote followed by newline or 'unnecessary spaces + newLine' - if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline) + if (input.substring(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen) === newline) { - row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); + row.push(input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar)); saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen); nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line @@ -1581,16 +1605,23 @@ License: MIT index: cursor }); + if(strictQuote) { + backtrackQuote(); + break; + } + quoteSearch++; continue; } - continue; + if(!quoteError) { + continue; + } } // Comment found at start of new line - if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) + if (comments && row.length === 0 && input.substring(cursor, cursor + commentsLen) === comments) { if (nextNewline === -1) // Comment ends at EOF return returnable(); @@ -1606,7 +1637,7 @@ License: MIT // we check, if we have quotes, because delimiter char may be part of field enclosed in quotes if (quoteSearch > nextDelim) { // we have quotes, so we try to find the next delimiter not enclosed in quotes and also next starting quote char - var nextDelimObj = getNextUnqotedDelimiter(nextDelim, quoteSearch, nextNewline); + var nextDelimObj = getNextUnquotedDelimiter(nextDelim, quoteSearch, nextNewline); // if we have next delimiter char which is not enclosed in quotes if (nextDelimObj && typeof nextDelimObj.nextDelim !== 'undefined') { @@ -1651,6 +1682,18 @@ License: MIT return finish(); + function saveQuoteState() { + savedNextDelim = nextDelim; + savedNextNewline = nextNewline; + savedQuoteSearch = quoteSearch; + quoteError = false; + } + function backtrackQuote() { + quoteSearch = savedQuoteSearch; + nextNewline = savedNextNewline; + nextDelim = savedNextDelim; + quoteError = true; + } function pushRow(row) { @@ -1682,7 +1725,7 @@ License: MIT if (ignoreLastRow) return returnable(); if (typeof value === 'undefined') - value = input.substr(cursor); + value = input.substring(cursor); row.push(value); cursor = inputLen; // important in case parsing is paused pushRow(row); @@ -1731,7 +1774,7 @@ License: MIT } /** Gets the delimiter character, which is not inside the quoted field */ - function getNextUnqotedDelimiter(nextDelim, quoteSearch, newLine) { + function getNextUnquotedDelimiter(nextDelim, quoteSearch, newLine) { var result = { nextDelim: undefined, quoteSearch: undefined @@ -1753,7 +1796,7 @@ License: MIT nextQuoteSearch = input.indexOf(quoteChar, nextQuoteSearch + 1); } // try to get the next delimiter position - result = getNextUnqotedDelimiter(nextNextDelim, nextQuoteSearch, newLine); + result = getNextUnquotedDelimiter(nextNextDelim, nextQuoteSearch, newLine); } else { result = { nextDelim: nextDelim, diff --git a/tests/test-cases.js b/tests/test-cases.js index 0c4ab04..1c6821b 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -22,6 +22,61 @@ try { XHR_ENABLED = true; } catch (e) {} // safari, ie +var __substring = String.prototype.substring; +var __substr = String.prototype.substr; + +function checkedSubstr(_indexStart, _length) { + throw new TypeError("Avoid depreciated method"); +} + +function checkedSubstring(indexStart, indexEnd) { + var length = this.length; + if(isNaN(indexStart) || typeof indexStart !== "number") { + throw new TypeError("indexStart is not a number. "); + } + if(indexStart < 0) { + // Block this because it is likely a bug, but substring clamps it as max(0, indexStart) + throw new TypeError("indexStart is less than zero. " + String(indexStart)); + } + if(indexStart > length) { + // Block this because it is likely a bug, but substring clamps it as min(indexStart, length) + throw new TypeError("indexStart is greater than string length. " + String(indexStart) + " " + String(length)); + } + + if(indexEnd !== undefined) { + if(isNaN(indexEnd) || typeof indexEnd !== "number") { + throw new TypeError("indexEnd is not a number. "); + } + if(indexEnd < 0) { + // Block this because it is likely a bug, but substring treats it as max(0, indexEnd) + throw new TypeError("indexEnd is less than zero. " + String(indexEnd)); + } + if(indexStart > indexEnd) { + // Block this because it is likely a bug, but substring flips its arguments to compensate. + throw new TypeError("indexStart is greater than indexEnd. " + String(indexStart) + " " + String(indexEnd)); + } + // Allow indexEnd to be greater than limit, which will functionally truncate to limit. + + } + return __substring.call(this, indexStart, indexEnd); +} + +function updateStringSubstring() { + String.prototype.substring = checkedSubstring; // eslint-disable-line no-extend-native +} + +function restoreStringSubstring() { + String.prototype.substring = __substring; // eslint-disable-line no-extend-native +} + +function updateStringSubstr() { + String.prototype.substr = checkedSubstr; // eslint-disable-line no-extend-native +} + +function restoreStringSubstr() { + String.prototype.substr = __substr; // eslint-disable-line no-extend-native +} + // Tests for the core parser using new Papa.Parser().parse() (CSV to JSON) var CORE_PARSER_TESTS = [ { @@ -194,7 +249,7 @@ var CORE_PARSER_TESTS = [ "code": "MissingQuotes", "message": "Quoted field unterminated", "row": 0, - "index": 3 + "index": 2 }] } }, @@ -209,7 +264,23 @@ var CORE_PARSER_TESTS = [ "code": "InvalidQuotes", "message": "Trailing quote on quoted field is malformed", "row": 0, - "index": 1 + "index": 0 + }] + } + }, + { + description: "Quoted field has invalid trailing quote after delimiter with a valid closer", + input: '"a,"b,c"\nd,e,f', + notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid", + config: { strictQuote: true }, + expected: { + data: [['"a','b,c'], ['d', 'e', 'f']], + errors: [{ + "type": "Quotes", + "code": "InvalidQuotes", + "message": "Trailing quote on quoted field is malformed", + "row": 0, + "index": 0 }] } }, @@ -224,14 +295,14 @@ var CORE_PARSER_TESTS = [ "code": "InvalidQuotes", "message": "Trailing quote on quoted field is malformed", "row": 0, - "index": 3 + "index": 2 }, { "type": "Quotes", "code": "MissingQuotes", "message": "Quoted field unterminated", "row": 0, - "index": 3 + "index": 2 }] } }, @@ -246,14 +317,14 @@ var CORE_PARSER_TESTS = [ "code": "InvalidQuotes", "message": "Trailing quote on quoted field is malformed", "row": 0, - "index": 3 + "index": 2 }, { "type": "Quotes", "code": "MissingQuotes", "message": "Quoted field unterminated", "row": 0, - "index": 3 + "index": 2 }] } }, @@ -268,14 +339,14 @@ var CORE_PARSER_TESTS = [ "code": "InvalidQuotes", "message": "Trailing quote on quoted field is malformed", "row": 0, - "index": 3 + "index": 2 }, { "type": "Quotes", "code": "MissingQuotes", "message": "Quoted field unterminated", "row": 0, - "index": 3 + "index": 2 }] } }, @@ -288,6 +359,15 @@ var CORE_PARSER_TESTS = [ errors: [] } }, + { + description: "Quoted field has valid trailing quote via delimiter and contains delimiter", + input: 'a,"b,",c\nd,e,f', + notes: "Trailing quote is valid due to trailing delimiter", + expected: { + data: [['a', 'b,', 'c'], ['d', 'e', 'f']], + errors: [] + } + }, { description: "Quoted field has valid trailing quote via \\n", input: 'a,b,"c"\nd,e,f', @@ -297,6 +377,15 @@ var CORE_PARSER_TESTS = [ errors: [] } }, + { + description: "Quoted field has valid trailing quote via \\n and contains delimiter and newline", + input: 'a,b,"c,\n"\nd,e,f', + notes: "Trailing quote is valid due to trailing new line delimiter", + expected: { + data: [['a', 'b', 'c,\n'], ['d', 'e', 'f']], + errors: [] + } + }, { description: "Quoted field has valid trailing quote via EOF", input: 'a,b,c\nd,e,"f"', @@ -591,8 +680,16 @@ var CORE_PARSER_TESTS = [ describe('Core Parser Tests', function() { function generateTest(test) { (test.disabled ? it.skip : it)(test.description, function() { - var actual = new Papa.Parser(test.config).parse(test.input); - assert.deepEqual(JSON.stringify(actual.errors), JSON.stringify(test.expected.errors)); + var actual; + updateStringSubstr(); + updateStringSubstring(); + try { + actual = new Papa.Parser(test.config).parse(test.input); + } finally { + restoreStringSubstring(); + restoreStringSubstr(); + } + assert.deepEqual(JSON.parse(JSON.stringify(actual.errors)), test.expected.errors); assert.deepEqual(actual.data, test.expected.data); }); } @@ -654,6 +751,14 @@ var PARSE_TESTS = [ errors: [] } }, + { + description: "Quoted fields with spaces between closing quote and next delimiter and contains delimiter", + input: 'A,",B" ,C,D\r\nE,F,",G" ,H', + expected: { + data: [['A', ',B', 'C','D'],['E', 'F', ',G','H']], + errors: [] + } + }, { description: "Quoted fields with spaces between closing quote and next new line", input: 'A,B,C,"D" \r\nE,F,G,"H" \r\nQ,W,E,R', @@ -1475,7 +1580,7 @@ describe('Parse Tests', function() { if (test.expected.meta) { assert.deepEqual(actual.meta, test.expected.meta); } - assert.deepEqual(JSON.stringify(actual.errors), JSON.stringify(test.expected.errors)); + assert.deepEqual(JSON.parse(JSON.stringify(actual.errors)), test.expected.errors); assert.deepEqual(actual.data, test.expected.data); }); } @@ -1556,7 +1661,7 @@ describe('Parse Async Tests', function() { var config = test.config; config.complete = function(actual) { - assert.deepEqual(JSON.stringify(actual.errors), JSON.stringify(test.expected.errors)); + assert.deepEqual(JSON.parse(JSON.stringify(actual.errors)), test.expected.errors); assert.deepEqual(actual.data, test.expected.data); done(); }; @@ -2368,7 +2473,7 @@ describe('Custom Tests', function() { this.timeout(test.timeout); } test.run(function(actual) { - assert.deepEqual(JSON.stringify(actual), JSON.stringify(test.expected)); + assert.deepEqual(JSON.parse(JSON.stringify(actual)), test.expected); done(); }); });