From 49170b76b382317356c2f707e2e4191430b8d495 Mon Sep 17 00:00:00 2001 From: konuch Date: Sat, 29 Jun 2019 12:50:43 +0200 Subject: [PATCH] Make the demo website use the new 5.0.0 version. (#679) --- docs/resources/js/papaparse.js | 304 +++++++++++++++++++++------------ 1 file changed, 197 insertions(+), 107 deletions(-) diff --git a/docs/resources/js/papaparse.js b/docs/resources/js/papaparse.js index 94c81b6..ac02f6d 100644 --- a/docs/resources/js/papaparse.js +++ b/docs/resources/js/papaparse.js @@ -1,19 +1,10 @@ /* @license Papa Parse -v4.6.1 +v5.0.0 https://github.com/mholt/PapaParse License: MIT */ -// Polyfills -// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/isArray#Polyfill -if (!Array.isArray) -{ - Array.isArray = function(arg) { - return Object.prototype.toString.call(arg) === '[object Array]'; - }; -} - (function(root, factory) { /* globals define */ @@ -34,7 +25,10 @@ if (!Array.isArray) // Browser globals (root is window) root.Papa = factory(); } -}(this, function() + // in strict mode we cannot access arguments.callee, so we need a named reference to + // stringify the factory method for the blob worker + // eslint-disable-next-line func-name +}(this, function moduleFactory() { 'use strict'; @@ -51,9 +45,15 @@ if (!Array.isArray) return {}; })(); + + function getWorkerBlob() { + var URL = global.URL || global.webkitURL || null; + var code = moduleFactory.toString(); + return Papa.BLOB_URL || (Papa.BLOB_URL = URL.createObjectURL(new Blob(['(', code, ')();'], {type: 'text/javascript'}))); + } + var IS_WORKER = !global.document && !!global.postMessage, - IS_PAPA_WORKER = IS_WORKER && /(\?|&)papaworker(=|&|$)/.test(global.location.search), - LOADED_SYNC = false, AUTO_SCRIPT_PATH; + IS_PAPA_WORKER = IS_WORKER && /blob:/i.test((global.location || {}).protocol); var workers = {}, workerIdCounter = 0; var Papa = {}; @@ -66,7 +66,6 @@ if (!Array.isArray) Papa.BYTE_ORDER_MARK = '\ufeff'; Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK]; Papa.WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker; - Papa.SCRIPT_PATH = null; // Must be set by your code if you use workers and this lib is loaded asynchronously Papa.NODE_STREAM_INPUT = 1; // Configurable chunk sizes for local and remote files, respectively @@ -81,7 +80,9 @@ if (!Array.isArray) Papa.FileStreamer = FileStreamer; Papa.StringStreamer = StringStreamer; Papa.ReadableStreamStreamer = ReadableStreamStreamer; - Papa.DuplexStreamStreamer = DuplexStreamStreamer; + if (typeof PAPA_BROWSER_CONTEXT === 'undefined') { + Papa.DuplexStreamStreamer = DuplexStreamStreamer; + } if (global.jQuery) { @@ -182,23 +183,6 @@ if (!Array.isArray) { global.onmessage = workerThreadReceivedMessage; } - else if (Papa.WORKERS_SUPPORTED) - { - AUTO_SCRIPT_PATH = getScriptPath(); - - // Check if the script was loaded synchronously - if (!document.body) - { - // Body doesn't exist yet, must be synchronous - LOADED_SYNC = true; - } - else - { - document.addEventListener('DOMContentLoaded', function() { - LOADED_SYNC = true; - }, true); - } - } @@ -241,7 +225,7 @@ if (!Array.isArray) } var streamer = null; - if (_input === Papa.NODE_STREAM_INPUT) + if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') { // create a node Duplex stream for use // with .pipe @@ -289,12 +273,18 @@ if (!Array.isArray) /** quote character */ var _quoteChar = '"'; + /** escaped quote character, either "" or " */ + var _escapedQuote = _quoteChar + _quoteChar; + /** whether to skip empty lines */ var _skipEmptyLines = false; + /** the columns (keys) we expect when we unparse objects */ + var _columns = null; + unpackConfig(); - var quoteCharRegex = new RegExp(_quoteChar, 'g'); + var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g'); if (typeof _input === 'string') _input = JSON.parse(_input); @@ -304,7 +294,7 @@ if (!Array.isArray) if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines); else if (typeof _input[0] === 'object') - return serialize(objectKeys(_input[0]), _input, _skipEmptyLines); + return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines); } else if (typeof _input === 'object') { @@ -329,7 +319,7 @@ if (!Array.isArray) } // Default (any valid paths should return before this) - throw 'exception: Unable to serialize unrecognized input'; + throw new Error('Unable to serialize unrecognized input'); function unpackConfig() @@ -359,6 +349,17 @@ if (!Array.isArray) if (typeof _config.header === 'boolean') _writeHeader = _config.header; + + if (Array.isArray(_config.columns)) { + + if (_config.columns.length === 0) throw new Error('Option columns is empty'); + + _columns = _config.columns; + } + + if (_config.escapeChar !== undefined) { + _escapedQuote = _config.escapeChar + _quoteChar; + } } @@ -403,19 +404,34 @@ if (!Array.isArray) for (var row = 0; row < data.length; row++) { var maxCol = hasHeader ? fields.length : data[row].length; - var r = hasHeader ? fields : data[row]; - if (skipEmptyLines !== 'greedy' || r.join('').trim() !== '') + var emptyLine = false; + var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0; + if (skipEmptyLines && !hasHeader) + { + emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0; + } + if (skipEmptyLines === 'greedy' && hasHeader) { + var line = []; + for (var c = 0; c < maxCol; c++) { + var cx = dataKeyedByField ? fields[c] : c; + line.push(data[row][cx]); + } + emptyLine = line.join('').trim() === ''; + } + if (!emptyLine) { for (var col = 0; col < maxCol; col++) { - if (col > 0) + if (col > 0 && !nullLine) csv += _delimiter; var colIdx = hasHeader && dataKeyedByField ? fields[col] : col; csv += safe(data[row][colIdx], col); } - if (row < data.length - 1 && (!skipEmptyLines || maxCol > 0)) + if (row < data.length - 1 && (!skipEmptyLines || (maxCol > 0 && !nullLine))) + { csv += _newline; + } } } return csv; @@ -430,7 +446,7 @@ if (!Array.isArray) if (str.constructor === Date) return JSON.stringify(str).slice(1, 25); - str = str.toString().replace(quoteCharRegex, _quoteChar + _quoteChar); + str = str.toString().replace(quoteCharRegex, _escapedQuote); var needsQuotes = (typeof _quotes === 'boolean' && _quotes) || (Array.isArray(_quotes) && _quotes[col]) @@ -457,6 +473,7 @@ if (!Array.isArray) this._handle = null; this._finished = false; this._completed = false; + this._halted = false; this._input = null; this._baseIndex = 0; this._partialLine = ''; @@ -481,6 +498,7 @@ if (!Array.isArray) chunk = modifiedChunk; } this.isFirstChunk = false; + this._halted = false; // Rejoin the line we likely just split in two by chunking the file var aggregate = this._partialLine + chunk; @@ -488,8 +506,10 @@ if (!Array.isArray) var results = this._handle.parse(aggregate, this._baseIndex, !this._finished); - if (this._handle.paused() || this._handle.aborted()) + if (this._handle.paused() || this._handle.aborted()) { + this._halted = true; return; + } var lastIndex = results.meta.cursor; @@ -515,8 +535,10 @@ if (!Array.isArray) else if (isFunction(this._config.chunk) && !isFakeChunk) { this._config.chunk(results, this._handle); - if (this._handle.paused() || this._handle.aborted()) + if (this._handle.paused() || this._handle.aborted()) { + this._halted = true; return; + } results = undefined; this._completeResults = undefined; } @@ -634,7 +656,6 @@ if (!Array.isArray) { var end = this._start + this._config.chunkSize - 1; // minus one because byte range is inclusive xhr.setRequestHeader('Range', 'bytes=' + this._start + '-' + end); - xhr.setRequestHeader('If-None-Match', 'webkit-no-cache'); // https://bugs.webkit.org/show_bug.cgi?id=82672 } try { @@ -881,13 +902,11 @@ if (!Array.isArray) this._onCsvData = function(results) { var data = results.data; - for (var i = 0; i < data.length; i++) { - if (!stream.push(data[i]) && !this._handle.paused()) { - // the writeable consumer buffer has filled up - // so we need to pause until more items - // can be processed - this._handle.pause(); - } + if (!stream.push(data) && !this._handle.paused()) { + // the writeable consumer buffer has filled up + // so we need to pause until more items + // can be processed + this._handle.pause(); } }; @@ -967,8 +986,10 @@ if (!Array.isArray) }); stream.once('finish', bindFunction(this._onWriteComplete, this)); } - DuplexStreamStreamer.prototype = Object.create(ChunkStreamer.prototype); - DuplexStreamStreamer.prototype.constructor = DuplexStreamStreamer; + if (typeof PAPA_BROWSER_CONTEXT === 'undefined') { + DuplexStreamStreamer.prototype = Object.create(ChunkStreamer.prototype); + DuplexStreamStreamer.prototype.constructor = DuplexStreamStreamer; + } // Use one ParserHandle per entire CSV file or string @@ -977,7 +998,6 @@ if (!Array.isArray) // One goal is to minimize the use of regular expressions... var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i; var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/; - var self = this; var _stepCounter = 0; // Number of times step was called (number of rows parsed) var _rowCounter = 0; // Number of rows that have been parsed so far @@ -1033,7 +1053,7 @@ if (!Array.isArray) _delimiterError = false; if (!_config.delimiter) { - var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments); + var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess); if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter; else @@ -1074,8 +1094,14 @@ if (!Array.isArray) this.resume = function() { - _paused = false; - self.streamer.parseChunk(_input, true); + if(self.streamer._halted) { + _paused = false; + self.streamer.parseChunk(_input, true); + } else { + // Bugfix: #636 In case the processing hasn't halted yet + // wait for it to halt in order to resume + setTimeout(this.resume, 3); + } }; this.aborted = function() @@ -1127,18 +1153,25 @@ if (!Array.isArray) { if (!_results) return; - for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) - for (var j = 0; j < _results.data[i].length; j++) - { - var header = _results.data[i][j]; - if (_config.trimHeaders) { - header = header.trim(); - } + function addHeder(header) + { + if (isFunction(_config.transformHeader)) + header = _config.transformHeader(header); - _fields.push(header); - } - _results.data.splice(0, 1); + _fields.push(header); + } + + if (Array.isArray(_results.data[0])) + { + for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) + _results.data[i].forEach(addHeder); + + _results.data.splice(0, 1); + } + // if _results.data[0] is not an array, we are in a step where _results.data is the row. + else + _results.data.forEach(addHeder); } function shouldApplyDynamicTyping(field) { @@ -1172,15 +1205,15 @@ if (!Array.isArray) if (!_results || (!_config.header && !_config.dynamicTyping && !_config.transform)) return _results; - for (var i = 0; i < _results.data.length; i++) + function processRow(rowSource, i) { var row = _config.header ? {} : []; var j; - for (j = 0; j < _results.data[i].length; j++) + for (j = 0; j < rowSource.length; j++) { var field = j; - var value = _results.data[i][j]; + var value = rowSource[j]; if (_config.header) field = j >= _fields.length ? '__parsed_extra' : _fields[j]; @@ -1199,7 +1232,6 @@ if (!Array.isArray) row[field] = value; } - _results.data[i] = row; if (_config.header) { @@ -1208,23 +1240,36 @@ if (!Array.isArray) else if (j < _fields.length) addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i); } + + return row; } + var incrementBy = 1; + if (!_results.data[0] || Array.isArray(_results.data[0])) + { + _results.data = _results.data.map(processRow); + incrementBy = _results.data.length; + } + else + _results.data = processRow(_results.data, 0); + + if (_config.header && _results.meta) _results.meta.fields = _fields; - _rowCounter += _results.data.length; + _rowCounter += incrementBy; return _results; } - function guessDelimiter(input, newline, skipEmptyLines, comments) + function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) { - var delimChoices = [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP]; var bestDelim, bestDelta, fieldCountPrevRow; - for (var i = 0; i < delimChoices.length; i++) + delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP]; + + for (var i = 0; i < delimitersToGuess.length; i++) { - var delim = delimChoices[i]; + var delim = delimitersToGuess[i]; var delta = 0, avgFieldCount = 0, emptyLinesCount = 0; fieldCountPrevRow = undefined; @@ -1247,7 +1292,7 @@ if (!Array.isArray) if (typeof fieldCountPrevRow === 'undefined') { - fieldCountPrevRow = fieldCount; + fieldCountPrevRow = 0; continue; } else if (fieldCount > 1) @@ -1260,7 +1305,7 @@ if (!Array.isArray) if (preview.data.length > 0) avgFieldCount /= (preview.data.length - emptyLinesCount); - if ((typeof bestDelta === 'undefined' || delta < bestDelta) + if ((typeof bestDelta === 'undefined' || delta > bestDelta) && avgFieldCount > 1.99) { bestDelta = delta; @@ -1349,7 +1394,7 @@ if (!Array.isArray) // Comment character must be valid if (comments === delim) - throw 'Comment character same as delimiter'; + throw new Error('Comment character same as delimiter'); else if (comments === true) comments = '#'; else if (typeof comments !== 'string' @@ -1368,7 +1413,7 @@ if (!Array.isArray) { // For some reason, in Chrome, this speeds things up (!?) if (typeof input !== 'string') - throw 'Input must be a string'; + throw new Error('Input must be a string'); // We don't need to compute some of these every time parse() is called, // but having them in a more local scope seems to perform better @@ -1419,8 +1464,8 @@ if (!Array.isArray) var nextDelim = input.indexOf(delim, cursor); var nextNewline = input.indexOf(newline, cursor); - var quoteCharRegex = new RegExp(escapeChar.replace(/[-[\]/{}()*+?.\\^$|]/g, '\\$&') + quoteChar, 'g'); - var quoteSearch; + var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g'); + var quoteSearch = input.indexOf(quoteChar, cursor); // Parser loop for (;;) @@ -1485,6 +1530,12 @@ if (!Array.isArray) { row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen; + + // If char after following delimiter is not quoteChar, we find next quote char position + if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen] !== quoteChar) + { + quoteSearch = input.indexOf(quoteChar, cursor); + } nextDelim = input.indexOf(delim, cursor); nextNewline = input.indexOf(newline, cursor); break; @@ -1498,6 +1549,7 @@ if (!Array.isArray) row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen); nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field + quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line if (stepIsFunction) { @@ -1544,10 +1596,27 @@ if (!Array.isArray) // Next delimiter comes before next newline, so we've reached end of field if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) { - row.push(input.substring(cursor, nextDelim)); - cursor = nextDelim + delimLen; - nextDelim = input.indexOf(delim, cursor); - continue; + // we check, if we have quotes, because delimiter char may be part of field enclosed in quotes + if (quoteSearch !== -1) { + // we have quotes, so we try to find the next delimiter not enclosed in quotes and also next starting quote char + var nextDelimObj = getNextUnqotedDelimiter(nextDelim, quoteSearch, nextNewline); + + // if we have next delimiter char which is not enclosed in quotes + if (nextDelimObj && nextDelimObj.nextDelim) { + nextDelim = nextDelimObj.nextDelim; + quoteSearch = nextDelimObj.quoteSearch; + row.push(input.substring(cursor, nextDelim)); + cursor = nextDelim + delimLen; + // we look for next delimiter char + nextDelim = input.indexOf(delim, cursor); + continue; + } + } else { + row.push(input.substring(cursor, nextDelim)); + cursor = nextDelim + delimLen; + nextDelim = input.indexOf(delim, cursor); + continue; + } } // End of row @@ -1630,10 +1699,11 @@ if (!Array.isArray) } /** Returns an object with the results, errors, and meta. */ - function returnable(stopped) + function returnable(stopped, step) { + var isStep = step || false; return { - data: data, + data: isStep ? data[0] : data, errors: errors, meta: { delimiter: delim, @@ -1648,10 +1718,44 @@ if (!Array.isArray) /** Executes the user's step function and resets data & errors. */ function doStep() { - step(returnable()); + step(returnable(undefined, true)); data = []; errors = []; } + + /** Gets the delimiter character, which is not inside the quoted field */ + function getNextUnqotedDelimiter(nextDelim, quoteSearch, newLine) { + var result = { + nextDelim: undefined, + quoteSearch: undefined + }; + // get the next closing quote character + var nextQuoteSearch = input.indexOf(quoteChar, quoteSearch + 1); + + // if next delimiter is part of a field enclosed in quotes + if (nextDelim > quoteSearch && nextDelim < nextQuoteSearch && (nextQuoteSearch < newLine || newLine === -1)) { + // get the next delimiter character after this one + var nextNextDelim = input.indexOf(delim, nextQuoteSearch); + + // if there is no next delimiter, return default result + if (nextNextDelim === -1) { + return result; + } + // find the next opening quote char position + if (nextNextDelim > nextQuoteSearch) { + nextQuoteSearch = input.indexOf(quoteChar, nextQuoteSearch + 1); + } + // try to get the next delimiter position + result = getNextUnqotedDelimiter(nextNextDelim, nextQuoteSearch, newLine); + } else { + result = { + nextDelim: nextDelim, + quoteSearch: quoteSearch + }; + } + + return result; + } }; /** Sets the abort flag */ @@ -1668,26 +1772,12 @@ if (!Array.isArray) } - // If you need to load Papa Parse asynchronously and you also need worker threads, hard-code - // the script path here. See: https://github.com/mholt/PapaParse/issues/87#issuecomment-57885358 - function getScriptPath() - { - var scripts = document.getElementsByTagName('script'); - return scripts.length ? scripts[scripts.length - 1].src : ''; - } - function newWorker() { if (!Papa.WORKERS_SUPPORTED) return false; - if (!LOADED_SYNC && Papa.SCRIPT_PATH === null) - throw new Error( - 'Script path cannot be determined automatically when Papa Parse is loaded asynchronously. ' + - 'You need to set Papa.SCRIPT_PATH manually.' - ); - var workerUrl = Papa.SCRIPT_PATH || AUTO_SCRIPT_PATH; - // Append 'papaworker' to the search string to tell papaparse that this is our worker. - workerUrl += (workerUrl.indexOf('?') !== -1 ? '&' : '?') + 'papaworker'; + + var workerUrl = getWorkerBlob(); var w = new global.Worker(workerUrl); w.onmessage = mainThreadReceivedMessage; w.id = workerIdCounter++; @@ -1722,7 +1812,7 @@ if (!Array.isArray) for (var i = 0; i < msg.results.data.length; i++) { worker.userStep({ - data: [msg.results.data[i]], + data: msg.results.data[i], errors: msg.results.errors, meta: msg.results.meta }, handle); @@ -1751,7 +1841,7 @@ if (!Array.isArray) } function notImplemented() { - throw 'Not implemented.'; + throw new Error('Not implemented.'); } /** Callback when worker thread receives a message */