Browse Source

Numerous improvements and significant bug fixes

pull/62/head
Matthew Holt 11 years ago
parent
commit
919a5ad255
  1. 283
      papaparse.js
  2. 2
      player/player.js

283
papaparse.js

@ -153,9 +153,11 @@
w.userStep = config.step; w.userStep = config.step;
w.userComplete = config.complete; w.userComplete = config.complete;
w.userError = config.error;
config.step = isFunction(config.step); config.step = isFunction(config.step);
config.complete = isFunction(config.complete); config.complete = isFunction(config.complete);
config.error = isFunction(config.error);
delete config.worker; // prevent infinite loop delete config.worker; // prevent infinite loop
w.postMessage({ w.postMessage({
@ -175,17 +177,6 @@
} }
else else
{ {
if (IS_WORKER && config.step)
{
config.step = function(results)
{
global.postMessage({
workerId: Papa.WORKER_ID,
results: results,
finished: false
});
};
}
var ph = new ParserHandle(config); var ph = new ParserHandle(config);
var results = ph.parse(_input); var results = ph.parse(_input);
if (isFunction(config.complete)) if (isFunction(config.complete))
@ -232,15 +223,19 @@
function JsonToCsv(_input) function JsonToCsv(_input, _config)
{ {
var _output = ""; var _output = "";
var _fields = []; var _fields = [];
var _delimiter = ",";
// Default configuration
var _quotes = false; // whether to surround every datum with quotes var _quotes = false; // whether to surround every datum with quotes
var _newline = "\r\n"; var _delimiter = ","; // delimiting character
var _newline = "\r\n"; // newline character(s)
unpackConfig();
if (typeof _input === "string") if (typeof _input === 'string')
_input = JSON.parse(_input); _input = JSON.parse(_input);
if (_input instanceof Array) if (_input instanceof Array)
@ -256,20 +251,7 @@
_input.data = JSON.parse(_input.data); _input.data = JSON.parse(_input.data);
if (_input.data instanceof Array) if (_input.data instanceof Array)
{ {
if (typeof _input.delimiter === 'string'
&& _input.delimiter.length == 1
&& global.Papa.BAD_DELIMITERS.indexOf(_input.delimiter) == -1)
{
_delimiter = _input.delimiter;
}
if (typeof _input.quotes === 'boolean')
_quotes = _input.quotes;
if (typeof _input.newline === 'string')
_newline = _input.newline;
if (!_input.fields) if (!_input.fields)
_input.fields = _input.data[0] instanceof Array _input.fields = _input.data[0] instanceof Array
? _input.fields ? _input.fields
@ -286,6 +268,25 @@
throw "exception: Unable to serialize unrecognized input"; throw "exception: Unable to serialize unrecognized input";
function unpackConfig()
{
if (typeof _config !== 'object')
return;
if (typeof _config.delimiter === 'string'
&& _config.delimiter.length == 1
&& global.Papa.BAD_DELIMITERS.indexOf(_config.delimiter) == -1)
{
_delimiter = _config.delimiter;
}
if (typeof _config.quotes === 'boolean')
_quotes = _config.quotes;
if (typeof _config.newline === 'string')
_newline = _config.newline;
}
// Turns an object's keys into an array // Turns an object's keys into an array
function objectKeys(obj) function objectKeys(obj)
@ -373,7 +374,7 @@
// TODO: The NetworkStreamer and FileStreamer have much in common. Consolidate?
function NetworkStreamer(config) function NetworkStreamer(config)
{ {
config = config || {}; config = config || {};
@ -388,7 +389,6 @@
this.stream = function(url) this.stream = function(url)
{ {
// TODO: Pull this setup out of the streamer and have reader, nextChunk and chunkLoaded passed in?
if (IS_WORKER) if (IS_WORKER)
{ {
nextChunk = function() nextChunk = function()
@ -423,8 +423,9 @@
xhr.setRequestHeader("Range", "bytes="+start+"-"+end); xhr.setRequestHeader("Range", "bytes="+start+"-"+end);
} }
xhr.send(); xhr.send();
if (IS_WORKER && xhr.status == 0)
chunkError();
start += config.chunkSize; start += config.chunkSize;
return xhr.responseText;
} }
function chunkLoaded() function chunkLoaded()
@ -432,6 +433,12 @@
if (xhr.readyState != 4) if (xhr.readyState != 4)
return; return;
if (xhr.status < 200 || xhr.status >= 400)
{
chunkError();
return;
}
// Rejoin the line we likely just split in two by chunking the file // Rejoin the line we likely just split in two by chunking the file
aggregate += partialLine + xhr.responseText; aggregate += partialLine + xhr.responseText;
partialLine = ""; partialLine = "";
@ -471,11 +478,8 @@
}); });
} }
if (finishedWithEntireFile) if (finishedWithEntireFile && isFunction(config.complete))
{ config.complete(results);
if (isFunction(config.complete))
config.complete(undefined);
}
else if (results.meta.aborted && isFunction(config.complete)) else if (results.meta.aborted && isFunction(config.complete))
config.complete(results); config.complete(results);
else else
@ -485,7 +489,15 @@
function chunkError() function chunkError()
{ {
if (isFunction(config.error)) if (isFunction(config.error))
config.error(reader.error, file); config.error(xhr.statusText);
else if (IS_WORKER && config.error)
{
global.postMessage({
workerId: Papa.WORKER_ID,
error: xhr.statusText,
finished: false
});
}
} }
}; };
} }
@ -514,39 +526,17 @@
{ {
var slice = file.slice || file.webkitSlice || file.mozSlice; var slice = file.slice || file.webkitSlice || file.mozSlice;
// TODO: Pull this setup out of the streamer and have reader, nextChunk and chunkLoaded passed in? reader = new FileReader(); // Better than FileReaderSync (even in worker threads). See: http://stackoverflow.com/q/24708649/1048862
// TODO/NOTE: Using FileReaderSync introduces very weird performance issues. reader.onload = chunkLoaded;
// See: http://stackoverflow.com/q/24708649/1048862 reader.onerror = chunkError;
/*if (IS_WORKER)
{
reader = new FileReaderSync();
nextChunk = function()
{
if (start < file.size)
chunkLoaded({
target: { // simulate the structure of a FileReader event
result: readChunk()
}
});
};
}
else
{*/
reader = new FileReader();
reader.onload = chunkLoaded;
reader.onerror = chunkError;
nextChunk = function()
{
if (start < file.size)
readChunk();
};
//}
nextChunk(); // Starts streaming nextChunk(); // Starts streaming
function nextChunk()
{
if (start < file.size)
readChunk();
}
function readChunk() function readChunk()
{ {
@ -609,6 +599,15 @@
{ {
if (isFunction(config.error)) if (isFunction(config.error))
config.error(reader.error, file); config.error(reader.error, file);
else if (IS_WORKER && config.error)
{
global.postMessage({
workerId: Papa.WORKER_ID,
error: reader.error,
file: file,
finished: false
});
}
} }
}; };
} }
@ -640,72 +639,34 @@
_config.delimiter = delimGuess.bestDelimiter; _config.delimiter = delimGuess.bestDelimiter;
else else
{ {
addError("Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to comma"); addError(_results, "Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to comma");
_config.delimiter = ","; _config.delimiter = ",";
} }
_results.meta.delimiter = _config.delimiter; _results.meta.delimiter = _config.delimiter;
} }
var parser = new Parser(_config); if (isFunction(_config.step))
_results = parser.parse(input);
if (needsHeaderRow())
fillHeaderFields();
var results = applyHeaderAndDynamicTyping();
return results;
};
function guessDelimiter(input)
{
var delimChoices = [",", "\t", "|", ";", Papa.RECORD_SEP, Papa.UNIT_SEP];
var bestDelim, bestDelta, fieldCountPrevRow;
for (var i = 0; i < delimChoices.length; i++)
{ {
var delim = delimChoices[i]; var userStep = _config.step;
var delta = 0, avgFieldCount = 0; _config.step = function(results, parser)
fieldCountPrevRow = undefined;
var preview = new Parser({
delimiter: delim,
preview: 10
}).parse(input);
for (var j = 0; j < preview.data.length; j++)
{ {
var fieldCount = preview.data[j].length; _results = results;
avgFieldCount += fieldCount; if (needsHeaderRow())
processResults();
if (typeof fieldCountPrevRow === 'undefined') else
{ userStep(processResults(), parser);
fieldCountPrevRow = fieldCount; };
continue;
}
else if (fieldCount > 1)
{
delta += Math.abs(fieldCount - fieldCountPrevRow);
fieldCountPrevRow = fieldCount;
}
}
avgFieldCount /= preview.data.length;
if ((typeof bestDelta === 'undefined' || delta < bestDelta)
&& avgFieldCount > 1.99)
{
bestDelta = delta;
bestDelim = delim;
}
} }
_config.delimiter = bestDelim; _results = new Parser(_config).parse(input);
return processResults();
};
return { function processResults()
successful: !!bestDelim, {
bestDelimiter: bestDelim if (needsHeaderRow())
} fillHeaderFields();
return applyHeaderAndDynamicTyping();
} }
function needsHeaderRow() function needsHeaderRow()
@ -725,8 +686,8 @@
function applyHeaderAndDynamicTyping() function applyHeaderAndDynamicTyping()
{ {
if (!_results) if (!_results || (!_config.header && !_config.dynamicTyping))
return; return _results;
for (var i = 0; i < _results.data.length; i++) for (var i = 0; i < _results.data.length; i++)
{ {
@ -736,7 +697,6 @@
if (_config.dynamicTyping) if (_config.dynamicTyping)
{ {
var value = _results.data[i][j]; var value = _results.data[i][j];
if (value == "true") if (value == "true")
_results.data[i][j] = true; _results.data[i][j] = true;
else if (value == "false") else if (value == "false")
@ -770,6 +730,57 @@
return _results; return _results;
} }
function guessDelimiter(input)
{
var delimChoices = [",", "\t", "|", ";", Papa.RECORD_SEP, Papa.UNIT_SEP];
var bestDelim, bestDelta, fieldCountPrevRow;
for (var i = 0; i < delimChoices.length; i++)
{
var delim = delimChoices[i];
var delta = 0, avgFieldCount = 0;
fieldCountPrevRow = undefined;
var preview = new Parser({
delimiter: delim,
preview: 10
}).parse(input);
for (var j = 0; j < preview.data.length; j++)
{
var fieldCount = preview.data[j].length;
avgFieldCount += fieldCount;
if (typeof fieldCountPrevRow === 'undefined')
{
fieldCountPrevRow = fieldCount;
continue;
}
else if (fieldCount > 1)
{
delta += Math.abs(fieldCount - fieldCountPrevRow);
fieldCountPrevRow = fieldCount;
}
}
avgFieldCount /= preview.data.length;
if ((typeof bestDelta === 'undefined' || delta < bestDelta)
&& avgFieldCount > 1.99)
{
bestDelta = delta;
bestDelim = delim;
}
}
_config.delimiter = bestDelim;
return {
successful: !!bestDelim,
bestDelimiter: bestDelim
}
}
function tryParseFloat(val) function tryParseFloat(val)
{ {
var isNumber = FLOAT.test(val); var isNumber = FLOAT.test(val);
@ -876,7 +887,7 @@
{ {
if (_aborted) break; if (_aborted) break;
if (_preview > 0 && _rowIdx >= _preview) break; if (_preview > 0 && _rowIdx >= _preview) break;
if (_paused) return returnable(); if (_paused) return finishParsing();
if (_ch == '"') if (_ch == '"')
parseQuotes(); parseQuotes();
@ -904,7 +915,8 @@
if (_inQuotes) if (_inQuotes)
addError("Quotes", "MissingQuotes", "Unescaped or mismatched quotes"); addError("Quotes", "MissingQuotes", "Unescaped or mismatched quotes");
endRow(); // End of input is also end of the last row endRow(); // End of input is also end of the last row
return returnable(); if (!isFunction(_step))
return returnable();
} }
function parseQuotes() function parseQuotes()
@ -988,7 +1000,7 @@
function endRow() function endRow()
{ {
trimEmptyLastRow(); trimEmptyLastRow();
if (typeof _step === 'function') if (isFunction(_step))
{ {
if (_data[_rowIdx]) if (_data[_rowIdx])
_step(returnable(), self); _step(returnable(), self);
@ -1121,8 +1133,17 @@
if (msg.results && msg.results.data && isFunction(worker.userStep)) if (msg.results && msg.results.data && isFunction(worker.userStep))
{ {
for (var i = 0; i < msg.results.data.length; i++) for (var i = 0; i < msg.results.data.length; i++)
worker.userStep(msg.results.data[i]); {
worker.userStep({
data: [msg.results.data[i]],
errors: msg.results.errors,
meta: msg.results.meta
});
}
delete msg.results; // free memory ASAP
} }
else if (msg.error)
worker.userError(msg.error, msg.file);
if (msg.finished) if (msg.finished)
{ {

2
player/player.js

@ -48,7 +48,7 @@ function buildConfig()
delimiter: $('#delimiter').val(), delimiter: $('#delimiter').val(),
header: $('#header').prop('checked'), header: $('#header').prop('checked'),
dynamicTyping: $('#dynamicTyping').prop('checked'), dynamicTyping: $('#dynamicTyping').prop('checked'),
preview: parseInt($('#preview').val()), preview: parseInt($('#preview').val() || 0),
step: $('#stream').prop('checked') ? stepFn : undefined, step: $('#stream').prop('checked') ? stepFn : undefined,
encoding: $('#encoding').val(), encoding: $('#encoding').val(),
worker: $('#worker').prop('checked'), worker: $('#worker').prop('checked'),

Loading…
Cancel
Save