Browse Source

Support workers from inline-blobs

Simplify worker to just use inline-blob
pull/602/head
Jonathan Grimes 6 years ago committed by Sergi Almacellas Abellana
parent
commit
4ecae0b5bb
  1. 8
      docs/docs.html
  2. 6
      docs/faq.html
  3. 51
      papaparse.js
  4. 52
      tests/test-cases.js
  5. 2
      tests/tests.html

8
docs/docs.html

@ -447,7 +447,7 @@ var csv = Papa.unparse({
<code>worker</code> <code>worker</code>
</td> </td>
<td> <td>
Whether or not to use a <a href="/faq#workers">worker thread</a>. Using a worker will keep your page reactive, but may be slightly slower. Web Workers also load the entire Javascript file, so be careful when <a href="/faq#combine">combining other libraries</a> in the same file as Papa Parse. Note that worker option is only available when parsing files and not when converting from JSON to CSV. Whether or not to use a <a href="/faq#workers">worker thread</a>. Using a worker will keep your page reactive, but may be slightly slower.
</td> </td>
</tr> </tr>
<tr> <tr>
@ -749,12 +749,6 @@ var csv = Papa.unparse({
Whether or not the browser supports HTML5 Web Workers. If false, <code>worker: true</code> will have no effect. Whether or not the browser supports HTML5 Web Workers. If false, <code>worker: true</code> will have no effect.
</td> </td>
</tr> </tr>
<tr>
<td><code>Papa.SCRIPT_PATH</code></td>
<td>
The relative path to Papa Parse. This is automatically detected when Papa Parse is loaded synchronously. However, if you load Papa Parse asynchronously (e.g. with RequireJS), you need to set this variable manually in order to use Web Workers. (In those cases, this variable is <i>not</i> read-only and you should set it!)
</td>
</tr>
</table> </table>
</div> </div>

6
docs/faq.html

@ -84,7 +84,7 @@
<h6 id="combine">Can I put other libraries in the same file as Papa Parse?</h6> <h6 id="combine">Can I put other libraries in the same file as Papa Parse?</h6>
<p> <p>
Yes, but then don't use the Web Worker feature unless your other dependencies are battle-hardened for worker threads. A worker thread loads an entire file, not just a function, so all those dependencies would be executed in an environment without a DOM and other <code>window</code> features. If any of those dependencies crash (<code>Cannot read property "defaultView" of undefined</code> <a href="https://github.com/mholt/PapaParse/issues/114">is</a> <a href="https://github.com/mholt/PapaParse/issues/163">common</a>), the whole worker thread will crash and parsing will not succeed. Yes.
</p> </p>
@ -96,7 +96,7 @@
<h6 id="async">Can Papa Parse be loaded asynchronously (after the page loads)?</h6> <h6 id="async">Can Papa Parse be loaded asynchronously (after the page loads)?</h6>
<p> <p>
Yes. But if you want to use Web Workers, you'll need to specify the relative path to Papa Parse. To do this, set <a href="/docs#readonly">Papa.SCRIPT_PATH</a> to the relative path of the Papa Parse file. In synchronous loading, this is automatically detected. Yes.
</p> </p>
@ -209,7 +209,7 @@
<h6>Can I use a worker if I combine/concatenate my Javascript files?</h6> <h6>Can I use a worker if I combine/concatenate my Javascript files?</h6>
<p> <p>
Probably not. It's safest to concatenate the rest of your dependencies and include Papa Parse in a seperate file. Any library that expects to have access to the <code>window</code> or DOM will crash when executed in a worker thread. Only put <a href="/faq#combine">other libraries in the same file</a> if they are ready to be used in worker threads. Yes.
</p> </p>
<h6>When should I use a worker?</h6> <h6>When should I use a worker?</h6>

51
papaparse.js

@ -34,7 +34,10 @@ if (!Array.isArray)
// Browser globals (root is window) // Browser globals (root is window)
root.Papa = factory(); root.Papa = factory();
} }
}(this, function() // in strict mode we cannot access arguments.callee, so we need a named reference to
// stringify the factory method for the blob worker
// eslint-disable-next-line func-name
}(this, function moduleFactory()
{ {
'use strict'; 'use strict';
@ -51,9 +54,15 @@ if (!Array.isArray)
return {}; return {};
})(); })();
function getWorkerBlob() {
var URL = global.URL || global.webkitURL || null;
var code = moduleFactory.toString();
return Papa.BLOB_URL || (Papa.BLOB_URL = URL.createObjectURL(new Blob(['(', code, ')();'], {type: 'text/javascript'})));
}
var IS_WORKER = !global.document && !!global.postMessage, var IS_WORKER = !global.document && !!global.postMessage,
IS_PAPA_WORKER = IS_WORKER && /(\?|&)papaworker(=|&|$)/.test(global.location.search), IS_PAPA_WORKER = IS_WORKER && /blob:/i.test((global.location || {}).protocol);
LOADED_SYNC = false, AUTO_SCRIPT_PATH;
var workers = {}, workerIdCounter = 0; var workers = {}, workerIdCounter = 0;
var Papa = {}; var Papa = {};
@ -66,7 +75,6 @@ if (!Array.isArray)
Papa.BYTE_ORDER_MARK = '\ufeff'; Papa.BYTE_ORDER_MARK = '\ufeff';
Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK]; Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
Papa.WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker; Papa.WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker;
Papa.SCRIPT_PATH = null; // Must be set by your code if you use workers and this lib is loaded asynchronously
Papa.NODE_STREAM_INPUT = 1; Papa.NODE_STREAM_INPUT = 1;
// Configurable chunk sizes for local and remote files, respectively // Configurable chunk sizes for local and remote files, respectively
@ -184,23 +192,6 @@ if (!Array.isArray)
{ {
global.onmessage = workerThreadReceivedMessage; global.onmessage = workerThreadReceivedMessage;
} }
else if (Papa.WORKERS_SUPPORTED)
{
AUTO_SCRIPT_PATH = getScriptPath();
// Check if the script was loaded synchronously
if (!document.body)
{
// Body doesn't exist yet, must be synchronous
LOADED_SYNC = true;
}
else
{
document.addEventListener('DOMContentLoaded', function() {
LOADED_SYNC = true;
}, true);
}
}
@ -1685,26 +1676,12 @@ if (!Array.isArray)
} }
// If you need to load Papa Parse asynchronously and you also need worker threads, hard-code
// the script path here. See: https://github.com/mholt/PapaParse/issues/87#issuecomment-57885358
function getScriptPath()
{
var scripts = document.getElementsByTagName('script');
return scripts.length ? scripts[scripts.length - 1].src : '';
}
function newWorker() function newWorker()
{ {
if (!Papa.WORKERS_SUPPORTED) if (!Papa.WORKERS_SUPPORTED)
return false; return false;
if (!LOADED_SYNC && Papa.SCRIPT_PATH === null)
throw new Error( var workerUrl = getWorkerBlob();
'Script path cannot be determined automatically when Papa Parse is loaded asynchronously. ' +
'You need to set Papa.SCRIPT_PATH manually.'
);
var workerUrl = Papa.SCRIPT_PATH || AUTO_SCRIPT_PATH;
// Append 'papaworker' to the search string to tell papaparse that this is our worker.
workerUrl += (workerUrl.indexOf('?') !== -1 ? '&' : '?') + 'papaworker';
var w = new global.Worker(workerUrl); var w = new global.Worker(workerUrl);
w.onmessage = mainThreadReceivedMessage; w.onmessage = mainThreadReceivedMessage;
w.id = workerIdCounter++; w.id = workerIdCounter++;

52
tests/test-cases.js

@ -7,6 +7,7 @@ if (typeof module !== 'undefined' && module.exports) {
var assert = chai.assert; var assert = chai.assert;
var BASE_PATH = (typeof document === 'undefined') ? './' : document.getElementById('test-cases').src.replace(/test-cases\.js$/, '');
var RECORD_SEP = String.fromCharCode(30); var RECORD_SEP = String.fromCharCode(30);
var UNIT_SEP = String.fromCharCode(31); var UNIT_SEP = String.fromCharCode(31);
var FILES_ENABLED = false; var FILES_ENABLED = false;
@ -1396,7 +1397,7 @@ var PARSE_ASYNC_TESTS = [
}, },
{ {
description: "Simple download", description: "Simple download",
input: "sample.csv", input: BASE_PATH + "sample.csv",
config: { config: {
download: true download: true
}, },
@ -1408,7 +1409,7 @@ var PARSE_ASYNC_TESTS = [
}, },
{ {
description: "Simple download + worker", description: "Simple download + worker",
input: "tests/sample.csv", input: BASE_PATH + "sample.csv",
config: { config: {
worker: true, worker: true,
download: true download: true
@ -1761,7 +1762,7 @@ var CUSTOM_TESTS = [
disabled: !XHR_ENABLED, disabled: !XHR_ENABLED,
run: function(callback) { run: function(callback) {
var updates = []; var updates = [];
Papa.parse("/tests/long-sample.csv", { Papa.parse(BASE_PATH + "long-sample.csv", {
download: true, download: true,
step: function(response) { step: function(response) {
updates.push(response.meta.cursor); updates.push(response.meta.cursor);
@ -1778,7 +1779,7 @@ var CUSTOM_TESTS = [
disabled: !XHR_ENABLED, disabled: !XHR_ENABLED,
run: function(callback) { run: function(callback) {
var updates = []; var updates = [];
Papa.parse("/tests/long-sample.csv", { Papa.parse(BASE_PATH + "long-sample.csv", {
download: true, download: true,
chunkSize: 500, chunkSize: 500,
step: function(response) { step: function(response) {
@ -1796,7 +1797,7 @@ var CUSTOM_TESTS = [
disabled: !XHR_ENABLED, disabled: !XHR_ENABLED,
run: function(callback) { run: function(callback) {
var updates = []; var updates = [];
Papa.parse("/tests/long-sample.csv", { Papa.parse(BASE_PATH + "long-sample.csv", {
download: true, download: true,
chunkSize: 500, chunkSize: 500,
worker: true, worker: true,
@ -1815,7 +1816,7 @@ var CUSTOM_TESTS = [
disabled: !XHR_ENABLED, disabled: !XHR_ENABLED,
run: function(callback) { run: function(callback) {
var updates = []; var updates = [];
Papa.parse("/tests/long-sample.csv", { Papa.parse(BASE_PATH + "long-sample.csv", {
download: true, download: true,
chunkSize: 500, chunkSize: 500,
chunk: function(response) { chunk: function(response) {
@ -1833,7 +1834,7 @@ var CUSTOM_TESTS = [
disabled: !XHR_ENABLED, disabled: !XHR_ENABLED,
run: function(callback) { run: function(callback) {
var updates = []; var updates = [];
Papa.parse("/tests/long-sample.csv", { Papa.parse(BASE_PATH + "long-sample.csv", {
download: true, download: true,
chunkSize: 500, chunkSize: 500,
chunk: function(response) { chunk: function(response) {
@ -2042,7 +2043,7 @@ var CUSTOM_TESTS = [
disabled: !XHR_ENABLED, disabled: !XHR_ENABLED,
run: function(callback) { run: function(callback) {
var updates = 0; var updates = 0;
Papa.parse("/tests/long-sample.csv", { Papa.parse(BASE_PATH + "long-sample.csv", {
worker: true, worker: true,
download: true, download: true,
chunkSize: 500, chunkSize: 500,
@ -2062,7 +2063,7 @@ var CUSTOM_TESTS = [
disabled: !XHR_ENABLED, disabled: !XHR_ENABLED,
run: function(callback) { run: function(callback) {
var updates = 0; var updates = 0;
Papa.parse("/tests/long-sample.csv", { Papa.parse(BASE_PATH + "long-sample.csv", {
download: true, download: true,
chunkSize: 500, chunkSize: 500,
beforeFirstChunk: function(chunk) { beforeFirstChunk: function(chunk) {
@ -2083,7 +2084,7 @@ var CUSTOM_TESTS = [
disabled: !XHR_ENABLED, disabled: !XHR_ENABLED,
run: function(callback) { run: function(callback) {
var updates = 0; var updates = 0;
Papa.parse("/tests/long-sample.csv", { Papa.parse(BASE_PATH + "long-sample.csv", {
download: true, download: true,
chunkSize: 500, chunkSize: 500,
beforeFirstChunk: function(chunk) { beforeFirstChunk: function(chunk) {
@ -2096,37 +2097,6 @@ var CUSTOM_TESTS = [
} }
}); });
} }
},
{
description: "Should not assume we own the worker unless papaworker is in the search string",
disabled: typeof Worker === 'undefined',
expected: [false, true, true, true, true],
run: function(callback) {
var searchStrings = [
'',
'?papaworker',
'?x=1&papaworker',
'?x=1&papaworker&y=1',
'?x=1&papaworker=1'
];
var results = searchStrings.map(function() { return false; });
var workers = [];
// Give it .5s to do something
setTimeout(function() {
workers.forEach(function(w) { w.terminate(); });
callback(results);
}, 500);
searchStrings.forEach(function(searchString, idx) {
var w = new Worker('../papaparse.js' + searchString);
workers.push(w);
w.addEventListener('message', function() {
results[idx] = true;
});
w.postMessage({input: 'a,b,c\n1,2,3'});
});
}
} }
]; ];

2
tests/tests.html

@ -9,7 +9,7 @@
<script src="../node_modules/chai/chai.js"></script> <script src="../node_modules/chai/chai.js"></script>
<script>mocha.setup('bdd')</script> <script>mocha.setup('bdd')</script>
<script src="test-cases.js"></script> <script src="test-cases.js" id="test-cases"></script>
</head> </head>
<body> <body>
<div id="mocha"></div> <div id="mocha"></div>

Loading…
Cancel
Save