|
|
|
@ -1,19 +1,10 @@
@@ -1,19 +1,10 @@
|
|
|
|
|
/* @license |
|
|
|
|
Papa Parse |
|
|
|
|
v4.6.1 |
|
|
|
|
v5.0.0 |
|
|
|
|
https://github.com/mholt/PapaParse
|
|
|
|
|
License: MIT |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
// Polyfills
|
|
|
|
|
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/isArray#Polyfill
|
|
|
|
|
if (!Array.isArray) |
|
|
|
|
{ |
|
|
|
|
Array.isArray = function(arg) { |
|
|
|
|
return Object.prototype.toString.call(arg) === '[object Array]'; |
|
|
|
|
}; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
(function(root, factory) |
|
|
|
|
{ |
|
|
|
|
/* globals define */ |
|
|
|
@ -34,7 +25,10 @@ if (!Array.isArray)
@@ -34,7 +25,10 @@ if (!Array.isArray)
|
|
|
|
|
// Browser globals (root is window)
|
|
|
|
|
root.Papa = factory(); |
|
|
|
|
} |
|
|
|
|
}(this, function() |
|
|
|
|
// in strict mode we cannot access arguments.callee, so we need a named reference to
|
|
|
|
|
// stringify the factory method for the blob worker
|
|
|
|
|
// eslint-disable-next-line func-name
|
|
|
|
|
}(this, function moduleFactory() |
|
|
|
|
{ |
|
|
|
|
'use strict'; |
|
|
|
|
|
|
|
|
@ -51,9 +45,15 @@ if (!Array.isArray)
@@ -51,9 +45,15 @@ if (!Array.isArray)
|
|
|
|
|
return {}; |
|
|
|
|
})(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function getWorkerBlob() { |
|
|
|
|
var URL = global.URL || global.webkitURL || null; |
|
|
|
|
var code = moduleFactory.toString(); |
|
|
|
|
return Papa.BLOB_URL || (Papa.BLOB_URL = URL.createObjectURL(new Blob(['(', code, ')();'], {type: 'text/javascript'}))); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
var IS_WORKER = !global.document && !!global.postMessage, |
|
|
|
|
IS_PAPA_WORKER = IS_WORKER && /(\?|&)papaworker(=|&|$)/.test(global.location.search), |
|
|
|
|
LOADED_SYNC = false, AUTO_SCRIPT_PATH; |
|
|
|
|
IS_PAPA_WORKER = IS_WORKER && /blob:/i.test((global.location || {}).protocol); |
|
|
|
|
var workers = {}, workerIdCounter = 0; |
|
|
|
|
|
|
|
|
|
var Papa = {}; |
|
|
|
@ -66,7 +66,6 @@ if (!Array.isArray)
@@ -66,7 +66,6 @@ if (!Array.isArray)
|
|
|
|
|
Papa.BYTE_ORDER_MARK = '\ufeff'; |
|
|
|
|
Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK]; |
|
|
|
|
Papa.WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker; |
|
|
|
|
Papa.SCRIPT_PATH = null; // Must be set by your code if you use workers and this lib is loaded asynchronously
|
|
|
|
|
Papa.NODE_STREAM_INPUT = 1; |
|
|
|
|
|
|
|
|
|
// Configurable chunk sizes for local and remote files, respectively
|
|
|
|
@ -81,7 +80,9 @@ if (!Array.isArray)
@@ -81,7 +80,9 @@ if (!Array.isArray)
|
|
|
|
|
Papa.FileStreamer = FileStreamer; |
|
|
|
|
Papa.StringStreamer = StringStreamer; |
|
|
|
|
Papa.ReadableStreamStreamer = ReadableStreamStreamer; |
|
|
|
|
Papa.DuplexStreamStreamer = DuplexStreamStreamer; |
|
|
|
|
if (typeof PAPA_BROWSER_CONTEXT === 'undefined') { |
|
|
|
|
Papa.DuplexStreamStreamer = DuplexStreamStreamer; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (global.jQuery) |
|
|
|
|
{ |
|
|
|
@ -182,23 +183,6 @@ if (!Array.isArray)
@@ -182,23 +183,6 @@ if (!Array.isArray)
|
|
|
|
|
{ |
|
|
|
|
global.onmessage = workerThreadReceivedMessage; |
|
|
|
|
} |
|
|
|
|
else if (Papa.WORKERS_SUPPORTED) |
|
|
|
|
{ |
|
|
|
|
AUTO_SCRIPT_PATH = getScriptPath(); |
|
|
|
|
|
|
|
|
|
// Check if the script was loaded synchronously
|
|
|
|
|
if (!document.body) |
|
|
|
|
{ |
|
|
|
|
// Body doesn't exist yet, must be synchronous
|
|
|
|
|
LOADED_SYNC = true; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
document.addEventListener('DOMContentLoaded', function() { |
|
|
|
|
LOADED_SYNC = true; |
|
|
|
|
}, true); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -241,7 +225,7 @@ if (!Array.isArray)
@@ -241,7 +225,7 @@ if (!Array.isArray)
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
var streamer = null; |
|
|
|
|
if (_input === Papa.NODE_STREAM_INPUT) |
|
|
|
|
if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') |
|
|
|
|
{ |
|
|
|
|
// create a node Duplex stream for use
|
|
|
|
|
// with .pipe
|
|
|
|
@ -289,12 +273,18 @@ if (!Array.isArray)
@@ -289,12 +273,18 @@ if (!Array.isArray)
|
|
|
|
|
/** quote character */ |
|
|
|
|
var _quoteChar = '"'; |
|
|
|
|
|
|
|
|
|
/** escaped quote character, either "" or <config.escapeChar>" */ |
|
|
|
|
var _escapedQuote = _quoteChar + _quoteChar; |
|
|
|
|
|
|
|
|
|
/** whether to skip empty lines */ |
|
|
|
|
var _skipEmptyLines = false; |
|
|
|
|
|
|
|
|
|
/** the columns (keys) we expect when we unparse objects */ |
|
|
|
|
var _columns = null; |
|
|
|
|
|
|
|
|
|
unpackConfig(); |
|
|
|
|
|
|
|
|
|
var quoteCharRegex = new RegExp(_quoteChar, 'g'); |
|
|
|
|
var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g'); |
|
|
|
|
|
|
|
|
|
if (typeof _input === 'string') |
|
|
|
|
_input = JSON.parse(_input); |
|
|
|
@ -304,7 +294,7 @@ if (!Array.isArray)
@@ -304,7 +294,7 @@ if (!Array.isArray)
|
|
|
|
|
if (!_input.length || Array.isArray(_input[0])) |
|
|
|
|
return serialize(null, _input, _skipEmptyLines); |
|
|
|
|
else if (typeof _input[0] === 'object') |
|
|
|
|
return serialize(objectKeys(_input[0]), _input, _skipEmptyLines); |
|
|
|
|
return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines); |
|
|
|
|
} |
|
|
|
|
else if (typeof _input === 'object') |
|
|
|
|
{ |
|
|
|
@ -329,7 +319,7 @@ if (!Array.isArray)
@@ -329,7 +319,7 @@ if (!Array.isArray)
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Default (any valid paths should return before this)
|
|
|
|
|
throw 'exception: Unable to serialize unrecognized input'; |
|
|
|
|
throw new Error('Unable to serialize unrecognized input'); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function unpackConfig() |
|
|
|
@ -359,6 +349,17 @@ if (!Array.isArray)
@@ -359,6 +349,17 @@ if (!Array.isArray)
|
|
|
|
|
|
|
|
|
|
if (typeof _config.header === 'boolean') |
|
|
|
|
_writeHeader = _config.header; |
|
|
|
|
|
|
|
|
|
if (Array.isArray(_config.columns)) { |
|
|
|
|
|
|
|
|
|
if (_config.columns.length === 0) throw new Error('Option columns is empty'); |
|
|
|
|
|
|
|
|
|
_columns = _config.columns; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (_config.escapeChar !== undefined) { |
|
|
|
|
_escapedQuote = _config.escapeChar + _quoteChar; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -403,19 +404,34 @@ if (!Array.isArray)
@@ -403,19 +404,34 @@ if (!Array.isArray)
|
|
|
|
|
for (var row = 0; row < data.length; row++) |
|
|
|
|
{ |
|
|
|
|
var maxCol = hasHeader ? fields.length : data[row].length; |
|
|
|
|
var r = hasHeader ? fields : data[row]; |
|
|
|
|
|
|
|
|
|
if (skipEmptyLines !== 'greedy' || r.join('').trim() !== '') |
|
|
|
|
var emptyLine = false; |
|
|
|
|
var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0; |
|
|
|
|
if (skipEmptyLines && !hasHeader) |
|
|
|
|
{ |
|
|
|
|
emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0; |
|
|
|
|
} |
|
|
|
|
if (skipEmptyLines === 'greedy' && hasHeader) { |
|
|
|
|
var line = []; |
|
|
|
|
for (var c = 0; c < maxCol; c++) { |
|
|
|
|
var cx = dataKeyedByField ? fields[c] : c; |
|
|
|
|
line.push(data[row][cx]); |
|
|
|
|
} |
|
|
|
|
emptyLine = line.join('').trim() === ''; |
|
|
|
|
} |
|
|
|
|
if (!emptyLine) |
|
|
|
|
{ |
|
|
|
|
for (var col = 0; col < maxCol; col++) |
|
|
|
|
{ |
|
|
|
|
if (col > 0) |
|
|
|
|
if (col > 0 && !nullLine) |
|
|
|
|
csv += _delimiter; |
|
|
|
|
var colIdx = hasHeader && dataKeyedByField ? fields[col] : col; |
|
|
|
|
csv += safe(data[row][colIdx], col); |
|
|
|
|
} |
|
|
|
|
if (row < data.length - 1 && (!skipEmptyLines || maxCol > 0)) |
|
|
|
|
if (row < data.length - 1 && (!skipEmptyLines || (maxCol > 0 && !nullLine))) |
|
|
|
|
{ |
|
|
|
|
csv += _newline; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return csv; |
|
|
|
@ -430,7 +446,7 @@ if (!Array.isArray)
@@ -430,7 +446,7 @@ if (!Array.isArray)
|
|
|
|
|
if (str.constructor === Date) |
|
|
|
|
return JSON.stringify(str).slice(1, 25); |
|
|
|
|
|
|
|
|
|
str = str.toString().replace(quoteCharRegex, _quoteChar + _quoteChar); |
|
|
|
|
str = str.toString().replace(quoteCharRegex, _escapedQuote); |
|
|
|
|
|
|
|
|
|
var needsQuotes = (typeof _quotes === 'boolean' && _quotes) |
|
|
|
|
|| (Array.isArray(_quotes) && _quotes[col]) |
|
|
|
@ -457,6 +473,7 @@ if (!Array.isArray)
@@ -457,6 +473,7 @@ if (!Array.isArray)
|
|
|
|
|
this._handle = null; |
|
|
|
|
this._finished = false; |
|
|
|
|
this._completed = false; |
|
|
|
|
this._halted = false; |
|
|
|
|
this._input = null; |
|
|
|
|
this._baseIndex = 0; |
|
|
|
|
this._partialLine = ''; |
|
|
|
@ -481,6 +498,7 @@ if (!Array.isArray)
@@ -481,6 +498,7 @@ if (!Array.isArray)
|
|
|
|
|
chunk = modifiedChunk; |
|
|
|
|
} |
|
|
|
|
this.isFirstChunk = false; |
|
|
|
|
this._halted = false; |
|
|
|
|
|
|
|
|
|
// Rejoin the line we likely just split in two by chunking the file
|
|
|
|
|
var aggregate = this._partialLine + chunk; |
|
|
|
@ -488,8 +506,10 @@ if (!Array.isArray)
@@ -488,8 +506,10 @@ if (!Array.isArray)
|
|
|
|
|
|
|
|
|
|
var results = this._handle.parse(aggregate, this._baseIndex, !this._finished); |
|
|
|
|
|
|
|
|
|
if (this._handle.paused() || this._handle.aborted()) |
|
|
|
|
if (this._handle.paused() || this._handle.aborted()) { |
|
|
|
|
this._halted = true; |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
var lastIndex = results.meta.cursor; |
|
|
|
|
|
|
|
|
@ -515,8 +535,10 @@ if (!Array.isArray)
@@ -515,8 +535,10 @@ if (!Array.isArray)
|
|
|
|
|
else if (isFunction(this._config.chunk) && !isFakeChunk) |
|
|
|
|
{ |
|
|
|
|
this._config.chunk(results, this._handle); |
|
|
|
|
if (this._handle.paused() || this._handle.aborted()) |
|
|
|
|
if (this._handle.paused() || this._handle.aborted()) { |
|
|
|
|
this._halted = true; |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
results = undefined; |
|
|
|
|
this._completeResults = undefined; |
|
|
|
|
} |
|
|
|
@ -634,7 +656,6 @@ if (!Array.isArray)
@@ -634,7 +656,6 @@ if (!Array.isArray)
|
|
|
|
|
{ |
|
|
|
|
var end = this._start + this._config.chunkSize - 1; // minus one because byte range is inclusive
|
|
|
|
|
xhr.setRequestHeader('Range', 'bytes=' + this._start + '-' + end); |
|
|
|
|
xhr.setRequestHeader('If-None-Match', 'webkit-no-cache'); // https://bugs.webkit.org/show_bug.cgi?id=82672
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
try { |
|
|
|
@ -881,13 +902,11 @@ if (!Array.isArray)
@@ -881,13 +902,11 @@ if (!Array.isArray)
|
|
|
|
|
this._onCsvData = function(results) |
|
|
|
|
{ |
|
|
|
|
var data = results.data; |
|
|
|
|
for (var i = 0; i < data.length; i++) { |
|
|
|
|
if (!stream.push(data[i]) && !this._handle.paused()) { |
|
|
|
|
// the writeable consumer buffer has filled up
|
|
|
|
|
// so we need to pause until more items
|
|
|
|
|
// can be processed
|
|
|
|
|
this._handle.pause(); |
|
|
|
|
} |
|
|
|
|
if (!stream.push(data) && !this._handle.paused()) { |
|
|
|
|
// the writeable consumer buffer has filled up
|
|
|
|
|
// so we need to pause until more items
|
|
|
|
|
// can be processed
|
|
|
|
|
this._handle.pause(); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
@ -967,8 +986,10 @@ if (!Array.isArray)
@@ -967,8 +986,10 @@ if (!Array.isArray)
|
|
|
|
|
}); |
|
|
|
|
stream.once('finish', bindFunction(this._onWriteComplete, this)); |
|
|
|
|
} |
|
|
|
|
DuplexStreamStreamer.prototype = Object.create(ChunkStreamer.prototype); |
|
|
|
|
DuplexStreamStreamer.prototype.constructor = DuplexStreamStreamer; |
|
|
|
|
if (typeof PAPA_BROWSER_CONTEXT === 'undefined') { |
|
|
|
|
DuplexStreamStreamer.prototype = Object.create(ChunkStreamer.prototype); |
|
|
|
|
DuplexStreamStreamer.prototype.constructor = DuplexStreamStreamer; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Use one ParserHandle per entire CSV file or string
|
|
|
|
@ -977,7 +998,6 @@ if (!Array.isArray)
@@ -977,7 +998,6 @@ if (!Array.isArray)
|
|
|
|
|
// One goal is to minimize the use of regular expressions...
|
|
|
|
|
var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i; |
|
|
|
|
var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/; |
|
|
|
|
|
|
|
|
|
var self = this; |
|
|
|
|
var _stepCounter = 0; // Number of times step was called (number of rows parsed)
|
|
|
|
|
var _rowCounter = 0; // Number of rows that have been parsed so far
|
|
|
|
@ -1033,7 +1053,7 @@ if (!Array.isArray)
@@ -1033,7 +1053,7 @@ if (!Array.isArray)
|
|
|
|
|
_delimiterError = false; |
|
|
|
|
if (!_config.delimiter) |
|
|
|
|
{ |
|
|
|
|
var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments); |
|
|
|
|
var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess); |
|
|
|
|
if (delimGuess.successful) |
|
|
|
|
_config.delimiter = delimGuess.bestDelimiter; |
|
|
|
|
else |
|
|
|
@ -1074,8 +1094,14 @@ if (!Array.isArray)
@@ -1074,8 +1094,14 @@ if (!Array.isArray)
|
|
|
|
|
|
|
|
|
|
this.resume = function() |
|
|
|
|
{ |
|
|
|
|
_paused = false; |
|
|
|
|
self.streamer.parseChunk(_input, true); |
|
|
|
|
if(self.streamer._halted) { |
|
|
|
|
_paused = false; |
|
|
|
|
self.streamer.parseChunk(_input, true); |
|
|
|
|
} else { |
|
|
|
|
// Bugfix: #636 In case the processing hasn't halted yet
|
|
|
|
|
// wait for it to halt in order to resume
|
|
|
|
|
setTimeout(this.resume, 3); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
this.aborted = function() |
|
|
|
@ -1127,18 +1153,25 @@ if (!Array.isArray)
@@ -1127,18 +1153,25 @@ if (!Array.isArray)
|
|
|
|
|
{ |
|
|
|
|
if (!_results) |
|
|
|
|
return; |
|
|
|
|
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) |
|
|
|
|
for (var j = 0; j < _results.data[i].length; j++) |
|
|
|
|
{ |
|
|
|
|
var header = _results.data[i][j]; |
|
|
|
|
|
|
|
|
|
if (_config.trimHeaders) { |
|
|
|
|
header = header.trim(); |
|
|
|
|
} |
|
|
|
|
function addHeder(header) |
|
|
|
|
{ |
|
|
|
|
if (isFunction(_config.transformHeader)) |
|
|
|
|
header = _config.transformHeader(header); |
|
|
|
|
|
|
|
|
|
_fields.push(header); |
|
|
|
|
} |
|
|
|
|
_results.data.splice(0, 1); |
|
|
|
|
_fields.push(header); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (Array.isArray(_results.data[0])) |
|
|
|
|
{ |
|
|
|
|
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) |
|
|
|
|
_results.data[i].forEach(addHeder); |
|
|
|
|
|
|
|
|
|
_results.data.splice(0, 1); |
|
|
|
|
} |
|
|
|
|
// if _results.data[0] is not an array, we are in a step where _results.data is the row.
|
|
|
|
|
else |
|
|
|
|
_results.data.forEach(addHeder); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
function shouldApplyDynamicTyping(field) { |
|
|
|
@ -1172,15 +1205,15 @@ if (!Array.isArray)
@@ -1172,15 +1205,15 @@ if (!Array.isArray)
|
|
|
|
|
if (!_results || (!_config.header && !_config.dynamicTyping && !_config.transform)) |
|
|
|
|
return _results; |
|
|
|
|
|
|
|
|
|
for (var i = 0; i < _results.data.length; i++) |
|
|
|
|
function processRow(rowSource, i) |
|
|
|
|
{ |
|
|
|
|
var row = _config.header ? {} : []; |
|
|
|
|
|
|
|
|
|
var j; |
|
|
|
|
for (j = 0; j < _results.data[i].length; j++) |
|
|
|
|
for (j = 0; j < rowSource.length; j++) |
|
|
|
|
{ |
|
|
|
|
var field = j; |
|
|
|
|
var value = _results.data[i][j]; |
|
|
|
|
var value = rowSource[j]; |
|
|
|
|
|
|
|
|
|
if (_config.header) |
|
|
|
|
field = j >= _fields.length ? '__parsed_extra' : _fields[j]; |
|
|
|
@ -1199,7 +1232,6 @@ if (!Array.isArray)
@@ -1199,7 +1232,6 @@ if (!Array.isArray)
|
|
|
|
|
row[field] = value; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
_results.data[i] = row; |
|
|
|
|
|
|
|
|
|
if (_config.header) |
|
|
|
|
{ |
|
|
|
@ -1208,23 +1240,36 @@ if (!Array.isArray)
@@ -1208,23 +1240,36 @@ if (!Array.isArray)
|
|
|
|
|
else if (j < _fields.length) |
|
|
|
|
addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return row; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
var incrementBy = 1; |
|
|
|
|
if (!_results.data[0] || Array.isArray(_results.data[0])) |
|
|
|
|
{ |
|
|
|
|
_results.data = _results.data.map(processRow); |
|
|
|
|
incrementBy = _results.data.length; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
_results.data = processRow(_results.data, 0); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (_config.header && _results.meta) |
|
|
|
|
_results.meta.fields = _fields; |
|
|
|
|
|
|
|
|
|
_rowCounter += _results.data.length; |
|
|
|
|
_rowCounter += incrementBy; |
|
|
|
|
return _results; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
function guessDelimiter(input, newline, skipEmptyLines, comments) |
|
|
|
|
function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) |
|
|
|
|
{ |
|
|
|
|
var delimChoices = [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP]; |
|
|
|
|
var bestDelim, bestDelta, fieldCountPrevRow; |
|
|
|
|
|
|
|
|
|
for (var i = 0; i < delimChoices.length; i++) |
|
|
|
|
delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP]; |
|
|
|
|
|
|
|
|
|
for (var i = 0; i < delimitersToGuess.length; i++) |
|
|
|
|
{ |
|
|
|
|
var delim = delimChoices[i]; |
|
|
|
|
var delim = delimitersToGuess[i]; |
|
|
|
|
var delta = 0, avgFieldCount = 0, emptyLinesCount = 0; |
|
|
|
|
fieldCountPrevRow = undefined; |
|
|
|
|
|
|
|
|
@ -1247,7 +1292,7 @@ if (!Array.isArray)
@@ -1247,7 +1292,7 @@ if (!Array.isArray)
|
|
|
|
|
|
|
|
|
|
if (typeof fieldCountPrevRow === 'undefined') |
|
|
|
|
{ |
|
|
|
|
fieldCountPrevRow = fieldCount; |
|
|
|
|
fieldCountPrevRow = 0; |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
else if (fieldCount > 1) |
|
|
|
@ -1260,7 +1305,7 @@ if (!Array.isArray)
@@ -1260,7 +1305,7 @@ if (!Array.isArray)
|
|
|
|
|
if (preview.data.length > 0) |
|
|
|
|
avgFieldCount /= (preview.data.length - emptyLinesCount); |
|
|
|
|
|
|
|
|
|
if ((typeof bestDelta === 'undefined' || delta < bestDelta) |
|
|
|
|
if ((typeof bestDelta === 'undefined' || delta > bestDelta) |
|
|
|
|
&& avgFieldCount > 1.99) |
|
|
|
|
{ |
|
|
|
|
bestDelta = delta; |
|
|
|
@ -1349,7 +1394,7 @@ if (!Array.isArray)
@@ -1349,7 +1394,7 @@ if (!Array.isArray)
|
|
|
|
|
|
|
|
|
|
// Comment character must be valid
|
|
|
|
|
if (comments === delim) |
|
|
|
|
throw 'Comment character same as delimiter'; |
|
|
|
|
throw new Error('Comment character same as delimiter'); |
|
|
|
|
else if (comments === true) |
|
|
|
|
comments = '#'; |
|
|
|
|
else if (typeof comments !== 'string' |
|
|
|
@ -1368,7 +1413,7 @@ if (!Array.isArray)
@@ -1368,7 +1413,7 @@ if (!Array.isArray)
|
|
|
|
|
{ |
|
|
|
|
// For some reason, in Chrome, this speeds things up (!?)
|
|
|
|
|
if (typeof input !== 'string') |
|
|
|
|
throw 'Input must be a string'; |
|
|
|
|
throw new Error('Input must be a string'); |
|
|
|
|
|
|
|
|
|
// We don't need to compute some of these every time parse() is called,
|
|
|
|
|
// but having them in a more local scope seems to perform better
|
|
|
|
@ -1419,8 +1464,8 @@ if (!Array.isArray)
@@ -1419,8 +1464,8 @@ if (!Array.isArray)
|
|
|
|
|
|
|
|
|
|
var nextDelim = input.indexOf(delim, cursor); |
|
|
|
|
var nextNewline = input.indexOf(newline, cursor); |
|
|
|
|
var quoteCharRegex = new RegExp(escapeChar.replace(/[-[\]/{}()*+?.\\^$|]/g, '\\$&') + quoteChar, 'g'); |
|
|
|
|
var quoteSearch; |
|
|
|
|
var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g'); |
|
|
|
|
var quoteSearch = input.indexOf(quoteChar, cursor); |
|
|
|
|
|
|
|
|
|
// Parser loop
|
|
|
|
|
for (;;) |
|
|
|
@ -1485,6 +1530,12 @@ if (!Array.isArray)
@@ -1485,6 +1530,12 @@ if (!Array.isArray)
|
|
|
|
|
{ |
|
|
|
|
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); |
|
|
|
|
cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen; |
|
|
|
|
|
|
|
|
|
// If char after following delimiter is not quoteChar, we find next quote char position
|
|
|
|
|
if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen] !== quoteChar) |
|
|
|
|
{ |
|
|
|
|
quoteSearch = input.indexOf(quoteChar, cursor); |
|
|
|
|
} |
|
|
|
|
nextDelim = input.indexOf(delim, cursor); |
|
|
|
|
nextNewline = input.indexOf(newline, cursor); |
|
|
|
|
break; |
|
|
|
@ -1498,6 +1549,7 @@ if (!Array.isArray)
@@ -1498,6 +1549,7 @@ if (!Array.isArray)
|
|
|
|
|
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); |
|
|
|
|
saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen); |
|
|
|
|
nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
|
|
|
|
|
quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line
|
|
|
|
|
|
|
|
|
|
if (stepIsFunction) |
|
|
|
|
{ |
|
|
|
@ -1544,10 +1596,27 @@ if (!Array.isArray)
@@ -1544,10 +1596,27 @@ if (!Array.isArray)
|
|
|
|
|
// Next delimiter comes before next newline, so we've reached end of field
|
|
|
|
|
if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) |
|
|
|
|
{ |
|
|
|
|
row.push(input.substring(cursor, nextDelim)); |
|
|
|
|
cursor = nextDelim + delimLen; |
|
|
|
|
nextDelim = input.indexOf(delim, cursor); |
|
|
|
|
continue; |
|
|
|
|
// we check, if we have quotes, because delimiter char may be part of field enclosed in quotes
|
|
|
|
|
if (quoteSearch !== -1) { |
|
|
|
|
// we have quotes, so we try to find the next delimiter not enclosed in quotes and also next starting quote char
|
|
|
|
|
var nextDelimObj = getNextUnqotedDelimiter(nextDelim, quoteSearch, nextNewline); |
|
|
|
|
|
|
|
|
|
// if we have next delimiter char which is not enclosed in quotes
|
|
|
|
|
if (nextDelimObj && nextDelimObj.nextDelim) { |
|
|
|
|
nextDelim = nextDelimObj.nextDelim; |
|
|
|
|
quoteSearch = nextDelimObj.quoteSearch; |
|
|
|
|
row.push(input.substring(cursor, nextDelim)); |
|
|
|
|
cursor = nextDelim + delimLen; |
|
|
|
|
// we look for next delimiter char
|
|
|
|
|
nextDelim = input.indexOf(delim, cursor); |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
row.push(input.substring(cursor, nextDelim)); |
|
|
|
|
cursor = nextDelim + delimLen; |
|
|
|
|
nextDelim = input.indexOf(delim, cursor); |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// End of row
|
|
|
|
@ -1630,10 +1699,11 @@ if (!Array.isArray)
@@ -1630,10 +1699,11 @@ if (!Array.isArray)
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** Returns an object with the results, errors, and meta. */ |
|
|
|
|
function returnable(stopped) |
|
|
|
|
function returnable(stopped, step) |
|
|
|
|
{ |
|
|
|
|
var isStep = step || false; |
|
|
|
|
return { |
|
|
|
|
data: data, |
|
|
|
|
data: isStep ? data[0] : data, |
|
|
|
|
errors: errors, |
|
|
|
|
meta: { |
|
|
|
|
delimiter: delim, |
|
|
|
@ -1648,10 +1718,44 @@ if (!Array.isArray)
@@ -1648,10 +1718,44 @@ if (!Array.isArray)
|
|
|
|
|
/** Executes the user's step function and resets data & errors. */ |
|
|
|
|
function doStep() |
|
|
|
|
{ |
|
|
|
|
step(returnable()); |
|
|
|
|
step(returnable(undefined, true)); |
|
|
|
|
data = []; |
|
|
|
|
errors = []; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** Gets the delimiter character, which is not inside the quoted field */ |
|
|
|
|
function getNextUnqotedDelimiter(nextDelim, quoteSearch, newLine) { |
|
|
|
|
var result = { |
|
|
|
|
nextDelim: undefined, |
|
|
|
|
quoteSearch: undefined |
|
|
|
|
}; |
|
|
|
|
// get the next closing quote character
|
|
|
|
|
var nextQuoteSearch = input.indexOf(quoteChar, quoteSearch + 1); |
|
|
|
|
|
|
|
|
|
// if next delimiter is part of a field enclosed in quotes
|
|
|
|
|
if (nextDelim > quoteSearch && nextDelim < nextQuoteSearch && (nextQuoteSearch < newLine || newLine === -1)) { |
|
|
|
|
// get the next delimiter character after this one
|
|
|
|
|
var nextNextDelim = input.indexOf(delim, nextQuoteSearch); |
|
|
|
|
|
|
|
|
|
// if there is no next delimiter, return default result
|
|
|
|
|
if (nextNextDelim === -1) { |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
// find the next opening quote char position
|
|
|
|
|
if (nextNextDelim > nextQuoteSearch) { |
|
|
|
|
nextQuoteSearch = input.indexOf(quoteChar, nextQuoteSearch + 1); |
|
|
|
|
} |
|
|
|
|
// try to get the next delimiter position
|
|
|
|
|
result = getNextUnqotedDelimiter(nextNextDelim, nextQuoteSearch, newLine); |
|
|
|
|
} else { |
|
|
|
|
result = { |
|
|
|
|
nextDelim: nextDelim, |
|
|
|
|
quoteSearch: quoteSearch |
|
|
|
|
}; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
/** Sets the abort flag */ |
|
|
|
@ -1668,26 +1772,12 @@ if (!Array.isArray)
@@ -1668,26 +1772,12 @@ if (!Array.isArray)
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// If you need to load Papa Parse asynchronously and you also need worker threads, hard-code
|
|
|
|
|
// the script path here. See: https://github.com/mholt/PapaParse/issues/87#issuecomment-57885358
|
|
|
|
|
function getScriptPath() |
|
|
|
|
{ |
|
|
|
|
var scripts = document.getElementsByTagName('script'); |
|
|
|
|
return scripts.length ? scripts[scripts.length - 1].src : ''; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
function newWorker() |
|
|
|
|
{ |
|
|
|
|
if (!Papa.WORKERS_SUPPORTED) |
|
|
|
|
return false; |
|
|
|
|
if (!LOADED_SYNC && Papa.SCRIPT_PATH === null) |
|
|
|
|
throw new Error( |
|
|
|
|
'Script path cannot be determined automatically when Papa Parse is loaded asynchronously. ' + |
|
|
|
|
'You need to set Papa.SCRIPT_PATH manually.' |
|
|
|
|
); |
|
|
|
|
var workerUrl = Papa.SCRIPT_PATH || AUTO_SCRIPT_PATH; |
|
|
|
|
// Append 'papaworker' to the search string to tell papaparse that this is our worker.
|
|
|
|
|
workerUrl += (workerUrl.indexOf('?') !== -1 ? '&' : '?') + 'papaworker'; |
|
|
|
|
|
|
|
|
|
var workerUrl = getWorkerBlob(); |
|
|
|
|
var w = new global.Worker(workerUrl); |
|
|
|
|
w.onmessage = mainThreadReceivedMessage; |
|
|
|
|
w.id = workerIdCounter++; |
|
|
|
@ -1722,7 +1812,7 @@ if (!Array.isArray)
@@ -1722,7 +1812,7 @@ if (!Array.isArray)
|
|
|
|
|
for (var i = 0; i < msg.results.data.length; i++) |
|
|
|
|
{ |
|
|
|
|
worker.userStep({ |
|
|
|
|
data: [msg.results.data[i]], |
|
|
|
|
data: msg.results.data[i], |
|
|
|
|
errors: msg.results.errors, |
|
|
|
|
meta: msg.results.meta |
|
|
|
|
}, handle); |
|
|
|
@ -1751,7 +1841,7 @@ if (!Array.isArray)
@@ -1751,7 +1841,7 @@ if (!Array.isArray)
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
function notImplemented() { |
|
|
|
|
throw 'Not implemented.'; |
|
|
|
|
throw new Error('Not implemented.'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** Callback when worker thread receives a message */ |
|
|
|
|