Browse Source

Merge pull request #137 from bluej100/chunkstreamer

refactor common functionality of chunk streaming classes
pull/138/head
Matt Holt 10 years ago
parent
commit
7307708420
  1. 399
      papaparse.js

399
papaparse.js

@ -403,73 +403,158 @@ @@ -403,73 +403,158 @@
}
}
// TODO: Many of the functions of NetworkStreamer and FileStreamer are similar or the same. Consolidate?
function NetworkStreamer(config)
function bindFunction(f, self)
{
config = config || {};
if (!config.chunkSize)
config.chunkSize = Papa.RemoteChunkSize;
return function() {
f.apply(self, arguments);
}
}
var start = 0, baseIndex = 0, fileSize = 0, rowCount = 0;
var aggregate = "";
var partialLine = "";
var self = this;
var xhr, url, nextChunk, finishedWithEntireFile;
var userComplete, handle, configCopy;
replaceConfig(config);
function ChunkStreamer(config)
{
this._handle = null;
this._paused = false;
this._finished = false;
this._userComplete = null;
this._input = null;
this._baseIndex = 0;
this._partialLine = "";
this._rowCount = 0;
this._start = 0;
this._config = replaceConfig.call(this, config);
this.resume = function()
{
paused = false;
nextChunk();
this._paused = false;
this._nextChunk();
};
this.finished = function()
{
return finishedWithEntireFile;
return this._finished;
};
this.pause = function()
{
paused = true;
this._paused = true;
};
this.abort = function()
{
finishedWithEntireFile = true;
if (isFunction(userComplete))
userComplete({ data: [], errors: [], meta: { aborted: true } });
this._finished = true;
if (isFunction(this._userComplete))
this._userComplete({ data: [], errors: [], meta: { aborted: true } });
};
this.stream = function(u)
{
url = u;
this._nextChunk = null;
this._parseChunk = function(chunk) {
// Rejoin the line we likely just split in two by chunking the file
var aggregate = this._partialLine + chunk;
this._partialLine = "";
var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
var lastIndex = results.meta.cursor;
if (!this._finished)
{
this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
this._baseIndex = lastIndex;
}
if (results && results.data)
this._rowCount += results.data.length;
var finishedIncludingPreview = this._finished || (this._config.preview && this._rowCount >= this._config.preview);
if (IS_WORKER)
{
nextChunk = function()
{
readChunk();
chunkLoaded();
};
global.postMessage({
results: results,
workerId: Papa.WORKER_ID,
finished: finishedIncludingPreview
});
}
else
else if (isFunction(this._config.chunk))
{
nextChunk = function()
{
readChunk();
};
this._config.chunk(results, this._handle);
if (this._paused)
return;
results = undefined;
}
if (isFunction(this._userComplete) && finishedIncludingPreview)
this._userComplete(results);
if (!finishedIncludingPreview && (!results || !results.meta.paused))
this._nextChunk();
};
this._sendError = function(error)
{
if (isFunction(this._config.error))
this._config.error(error);
else if (IS_WORKER && this._config.error)
{
global.postMessage({
workerId: Papa.WORKER_ID,
error: error,
finished: false
});
}
};
nextChunk(); // Starts streaming
function replaceConfig(config)
{
// Deep-copy the config so we can edit it; we need
// to call the complete function if we are to ensure
// that the last chunk callback, if any, will be called
// BEFORE the complete function.
var configCopy = copy(config);
this._userComplete = configCopy.complete;
configCopy.complete = undefined;
configCopy.chunkSize = parseInt(configCopy.chunkSize); // VERY important so we don't concatenate strings!
this._handle = new ParserHandle(configCopy);
this._handle.streamer = this;
return configCopy;
}
}
function NetworkStreamer(config)
{
config = config || {};
if (!config.chunkSize)
config.chunkSize = Papa.RemoteChunkSize;
ChunkStreamer.call(this, config);
var xhr;
if (IS_WORKER)
{
this._nextChunk = function()
{
this._readChunk();
this._chunkLoaded();
};
}
else
{
this._nextChunk = function()
{
this._readChunk();
};
}
this.stream = function(url)
{
this._input = url;
this._nextChunk(); // Starts streaming
};
function readChunk()
this._readChunk = function()
{
if (finishedWithEntireFile)
if (this._finished)
{
chunkLoaded();
this._chunkLoaded();
return;
}
@ -477,18 +562,16 @@ @@ -477,18 +562,16 @@
if (!IS_WORKER)
{
xhr.onload = chunkLoaded;
xhr.onerror = chunkError;
xhr.onload = bindFunction(this._chunkLoaded, this);
xhr.onerror = bindFunction(this._chunkError, this);
}
xhr.open("GET", url, !IS_WORKER);
xhr.open("GET", this._input, !IS_WORKER);
if (config.step || config.chunk)
if (this._config.step || this._config.chunk)
{
var end = start + configCopy.chunkSize - 1; // minus one because byte range is inclusive
if (fileSize && end > fileSize) // Hack around a Chrome bug: http://stackoverflow.com/q/24745095/1048862
end = fileSize;
xhr.setRequestHeader("Range", "bytes="+start+"-"+end);
var end = this._start + this._config.chunkSize - 1; // minus one because byte range is inclusive
xhr.setRequestHeader("Range", "bytes="+this._start+"-"+end);
xhr.setRequestHeader("If-None-Match", "webkit-no-cache"); // https://bugs.webkit.org/show_bug.cgi?id=82672
}
@ -496,79 +579,34 @@ @@ -496,79 +579,34 @@
xhr.send();
}
catch (err) {
chunkError(err.message);
this._chunkError(err.message);
}
if (IS_WORKER && xhr.status == 0)
chunkError();
this._chunkError();
else
start += configCopy.chunkSize;
this._start += this._config.chunkSize;
}
function chunkLoaded()
this._chunkLoaded = function()
{
if (xhr.readyState != 4)
return;
if (xhr.status < 200 || xhr.status >= 400)
{
chunkError();
this._chunkError();
return;
}
// Rejoin the line we likely just split in two by chunking the file
aggregate += partialLine + xhr.responseText;
partialLine = "";
finishedWithEntireFile = (!config.step && !config.chunk) || start > getFileSize(xhr);
var results = handle.parse(aggregate, baseIndex, !finishedWithEntireFile);
var lastIndex = results.meta.cursor;
if (!finishedWithEntireFile)
{
partialLine = aggregate.substring(lastIndex - baseIndex);
baseIndex = lastIndex;
}
if (results && results.data)
rowCount += results.data.length;
aggregate = "";
var finishedIncludingPreview = finishedWithEntireFile || (configCopy.preview && rowCount >= configCopy.preview);
if (IS_WORKER)
{
global.postMessage({
results: results,
workerId: Papa.WORKER_ID,
finished: finishedIncludingPreview
});
}
else if (isFunction(config.chunk))
{
config.chunk(results, handle);
results = undefined;
}
if (isFunction(userComplete) && finishedIncludingPreview)
userComplete(results);
if (!finishedIncludingPreview && (!results || !results.meta.paused))
nextChunk();
this._finished = (!this._config.step && !this._config.chunk) || this._start > getFileSize(xhr);
this._parseChunk(xhr.responseText);
}
function chunkError(errorMessage)
this._chunkError = function(errorMessage)
{
var errorText = xhr.statusText || errorMessage;
if (isFunction(config.error))
config.error(errorText);
else if (IS_WORKER && config.error)
{
global.postMessage({
workerId: Papa.WORKER_ID,
error: errorText,
finished: false
});
}
this._sendError(errorText);
}
function getFileSize(xhr)
@ -576,28 +614,9 @@ @@ -576,28 +614,9 @@
var contentRange = xhr.getResponseHeader("Content-Range");
return parseInt(contentRange.substr(contentRange.lastIndexOf("/") + 1));
}
function replaceConfig(config)
{
// Deep-copy the config so we can edit it; we need
// to call the complete function if we are to ensure
// that the last chunk callback, if any, will be called
// BEFORE the complete function.
configCopy = copy(config);
userComplete = configCopy.complete;
configCopy.complete = undefined;
configCopy.chunkSize = parseInt(configCopy.chunkSize); // VERY important so we don't concatenate strings!
handle = new ParserHandle(configCopy);
handle.streamer = self;
}
}
NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype);
NetworkStreamer.prototype.constructor = NetworkStreamer;
function FileStreamer(config)
@ -605,154 +624,62 @@ @@ -605,154 +624,62 @@
config = config || {};
if (!config.chunkSize)
config.chunkSize = Papa.LocalChunkSize;
ChunkStreamer.call(this, config);
var start = 0, baseIndex = 0;
var file;
var slice;
var aggregate = "";
var partialLine = "";
var rowCount = 0;
var paused = false;
var self = this;
var reader, nextChunk, slice, finishedWithEntireFile;
var userComplete, handle, configCopy;
replaceConfig(config);
var reader, slice;
// FileReader is better than FileReaderSync (even in worker) - see http://stackoverflow.com/q/24708649/1048862
// But Firefox is a pill, too - see issue #76: https://github.com/mholt/PapaParse/issues/76
var usingAsyncReader = typeof FileReader !== 'undefined'; // Safari doesn't consider it a function - see issue #105
this.stream = function(f)
this.stream = function(file)
{
file = f;
this._input = file;
slice = file.slice || file.webkitSlice || file.mozSlice;
if (usingAsyncReader)
{
reader = new FileReader(); // Preferred method of reading files, even in workers
reader.onload = chunkLoaded;
reader.onerror = chunkError;
reader.onload = bindFunction(this._chunkLoaded, this);
reader.onerror = bindFunction(this._chunkError, this);
}
else
reader = new FileReaderSync(); // Hack for running in a web worker in Firefox
nextChunk(); // Starts streaming
};
this.finished = function()
{
return finishedWithEntireFile;
};
this.pause = function()
{
paused = true;
};
this.resume = function()
{
paused = false;
nextChunk();
};
this.abort = function()
{
finishedWithEntireFile = true;
if (isFunction(userComplete))
userComplete({ data: [], errors: [], meta: { aborted: true } });
this._nextChunk(); // Starts streaming
};
function nextChunk()
this._nextChunk = function()
{
if (!finishedWithEntireFile && (!configCopy.preview || rowCount < configCopy.preview))
readChunk();
if (!this._finished && (!this._config.preview || this._rowCount < this._config.preview))
this._readChunk();
}
function readChunk()
this._readChunk = function()
{
var end = Math.min(start + configCopy.chunkSize, file.size);
var txt = reader.readAsText(slice.call(file, start, end), config.encoding);
var end = Math.min(this._start + this._config.chunkSize, this._input.size);
var txt = reader.readAsText(slice.call(this._input, this._start, end), this._config.encoding);
if (!usingAsyncReader)
chunkLoaded({ target: { result: txt } }); // mimic the async signature
this._chunkLoaded({ target: { result: txt } }); // mimic the async signature
}
function chunkLoaded(event)
this._chunkLoaded = function(event)
{
// Very important to increment start each time before handling results
start += configCopy.chunkSize;
// Rejoin the line we likely just split in two by chunking the file
aggregate += partialLine + event.target.result;
partialLine = "";
finishedWithEntireFile = start >= file.size;
var results = handle.parse(aggregate, baseIndex, !finishedWithEntireFile);
var lastIndex = results.meta.cursor;
if (!finishedWithEntireFile)
{
partialLine = aggregate.substring(lastIndex - baseIndex);
baseIndex = lastIndex;
}
if (results && results.data)
rowCount += results.data.length;
aggregate = "";
var finishedIncludingPreview = finishedWithEntireFile || (configCopy.preview && rowCount >= configCopy.preview);
if (IS_WORKER)
{
global.postMessage({
results: results,
workerId: Papa.WORKER_ID,
finished: finishedIncludingPreview
});
}
else if (isFunction(config.chunk))
{
config.chunk(results, self, file);
if (paused)
return;
results = undefined;
}
if (isFunction(userComplete) && finishedIncludingPreview)
userComplete(results);
if (!finishedIncludingPreview && (!results || !results.meta.paused))
nextChunk();
}
function chunkError()
{
if (isFunction(config.error))
config.error(reader.error, file);
else if (IS_WORKER && config.error)
{
global.postMessage({
workerId: Papa.WORKER_ID,
error: reader.error,
file: file,
finished: false
});
}
this._start += this._config.chunkSize;
this._finished = this._start >= this._input.size;
this._parseChunk(event.target.result);
}
function replaceConfig(config)
this._chunkError = function()
{
// Deep-copy the config so we can edit it; we need
// to call the complete function if we are to ensure
// that the last chunk callback, if any, will be called
// BEFORE the complete function.
configCopy = copy(config);
userComplete = configCopy.complete;
configCopy.complete = undefined;
configCopy.chunkSize = parseInt(configCopy.chunkSize); // VERY important so we don't concatenate strings!
handle = new ParserHandle(configCopy);
handle.streamer = self;
this._sendError(reader.error);
}
}
FileStreamer.prototype = Object.create(ChunkStreamer.prototype);
FileStreamer.prototype.constructor = FileStreamer;

Loading…
Cancel
Save