Browse Source

Numerous improvements and significant bug fixes

pull/62/head
Matthew Holt 11 years ago
parent
commit
919a5ad255
  1. 271
      papaparse.js
  2. 2
      player/player.js

271
papaparse.js

@ -153,9 +153,11 @@ @@ -153,9 +153,11 @@
w.userStep = config.step;
w.userComplete = config.complete;
w.userError = config.error;
config.step = isFunction(config.step);
config.complete = isFunction(config.complete);
config.error = isFunction(config.error);
delete config.worker; // prevent infinite loop
w.postMessage({
@ -175,17 +177,6 @@ @@ -175,17 +177,6 @@
}
else
{
if (IS_WORKER && config.step)
{
config.step = function(results)
{
global.postMessage({
workerId: Papa.WORKER_ID,
results: results,
finished: false
});
};
}
var ph = new ParserHandle(config);
var results = ph.parse(_input);
if (isFunction(config.complete))
@ -232,15 +223,19 @@ @@ -232,15 +223,19 @@
function JsonToCsv(_input)
function JsonToCsv(_input, _config)
{
var _output = "";
var _fields = [];
var _delimiter = ",";
// Default configuration
var _quotes = false; // whether to surround every datum with quotes
var _newline = "\r\n";
var _delimiter = ","; // delimiting character
var _newline = "\r\n"; // newline character(s)
unpackConfig();
if (typeof _input === "string")
if (typeof _input === 'string')
_input = JSON.parse(_input);
if (_input instanceof Array)
@ -257,19 +252,6 @@ @@ -257,19 +252,6 @@
if (_input.data instanceof Array)
{
if (typeof _input.delimiter === 'string'
&& _input.delimiter.length == 1
&& global.Papa.BAD_DELIMITERS.indexOf(_input.delimiter) == -1)
{
_delimiter = _input.delimiter;
}
if (typeof _input.quotes === 'boolean')
_quotes = _input.quotes;
if (typeof _input.newline === 'string')
_newline = _input.newline;
if (!_input.fields)
_input.fields = _input.data[0] instanceof Array
? _input.fields
@ -286,6 +268,25 @@ @@ -286,6 +268,25 @@
throw "exception: Unable to serialize unrecognized input";
function unpackConfig()
{
if (typeof _config !== 'object')
return;
if (typeof _config.delimiter === 'string'
&& _config.delimiter.length == 1
&& global.Papa.BAD_DELIMITERS.indexOf(_config.delimiter) == -1)
{
_delimiter = _config.delimiter;
}
if (typeof _config.quotes === 'boolean')
_quotes = _config.quotes;
if (typeof _config.newline === 'string')
_newline = _config.newline;
}
// Turns an object's keys into an array
function objectKeys(obj)
@ -373,7 +374,7 @@ @@ -373,7 +374,7 @@
// TODO: The NetworkStreamer and FileStreamer have much in common. Consolidate?
function NetworkStreamer(config)
{
config = config || {};
@ -388,7 +389,6 @@ @@ -388,7 +389,6 @@
this.stream = function(url)
{
// TODO: Pull this setup out of the streamer and have reader, nextChunk and chunkLoaded passed in?
if (IS_WORKER)
{
nextChunk = function()
@ -423,8 +423,9 @@ @@ -423,8 +423,9 @@
xhr.setRequestHeader("Range", "bytes="+start+"-"+end);
}
xhr.send();
if (IS_WORKER && xhr.status == 0)
chunkError();
start += config.chunkSize;
return xhr.responseText;
}
function chunkLoaded()
@ -432,6 +433,12 @@ @@ -432,6 +433,12 @@
if (xhr.readyState != 4)
return;
if (xhr.status < 200 || xhr.status >= 400)
{
chunkError();
return;
}
// Rejoin the line we likely just split in two by chunking the file
aggregate += partialLine + xhr.responseText;
partialLine = "";
@ -471,11 +478,8 @@ @@ -471,11 +478,8 @@
});
}
if (finishedWithEntireFile)
{
if (isFunction(config.complete))
config.complete(undefined);
}
if (finishedWithEntireFile && isFunction(config.complete))
config.complete(results);
else if (results.meta.aborted && isFunction(config.complete))
config.complete(results);
else
@ -485,7 +489,15 @@ @@ -485,7 +489,15 @@
function chunkError()
{
if (isFunction(config.error))
config.error(reader.error, file);
config.error(xhr.statusText);
else if (IS_WORKER && config.error)
{
global.postMessage({
workerId: Papa.WORKER_ID,
error: xhr.statusText,
finished: false
});
}
}
};
}
@ -514,39 +526,17 @@ @@ -514,39 +526,17 @@
{
var slice = file.slice || file.webkitSlice || file.mozSlice;
// TODO: Pull this setup out of the streamer and have reader, nextChunk and chunkLoaded passed in?
// TODO/NOTE: Using FileReaderSync introduces very weird performance issues.
// See: http://stackoverflow.com/q/24708649/1048862
/*if (IS_WORKER)
{
reader = new FileReaderSync();
nextChunk = function()
{
if (start < file.size)
chunkLoaded({
target: { // simulate the structure of a FileReader event
result: readChunk()
}
});
};
}
else
{*/
reader = new FileReader();
reader = new FileReader(); // Better than FileReaderSync (even in worker threads). See: http://stackoverflow.com/q/24708649/1048862
reader.onload = chunkLoaded;
reader.onerror = chunkError;
nextChunk = function()
nextChunk(); // Starts streaming
function nextChunk()
{
if (start < file.size)
readChunk();
};
//}
nextChunk(); // Starts streaming
}
function readChunk()
{
@ -609,6 +599,15 @@ @@ -609,6 +599,15 @@
{
if (isFunction(config.error))
config.error(reader.error, file);
else if (IS_WORKER && config.error)
{
global.postMessage({
workerId: Papa.WORKER_ID,
error: reader.error,
file: file,
finished: false
});
}
}
};
}
@ -640,72 +639,34 @@ @@ -640,72 +639,34 @@
_config.delimiter = delimGuess.bestDelimiter;
else
{
addError("Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to comma");
addError(_results, "Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to comma");
_config.delimiter = ",";
}
_results.meta.delimiter = _config.delimiter;
}
var parser = new Parser(_config);
_results = parser.parse(input);
if (needsHeaderRow())
fillHeaderFields();
var results = applyHeaderAndDynamicTyping();
return results;
};
function guessDelimiter(input)
{
var delimChoices = [",", "\t", "|", ";", Papa.RECORD_SEP, Papa.UNIT_SEP];
var bestDelim, bestDelta, fieldCountPrevRow;
for (var i = 0; i < delimChoices.length; i++)
{
var delim = delimChoices[i];
var delta = 0, avgFieldCount = 0;
fieldCountPrevRow = undefined;
var preview = new Parser({
delimiter: delim,
preview: 10
}).parse(input);
for (var j = 0; j < preview.data.length; j++)
{
var fieldCount = preview.data[j].length;
avgFieldCount += fieldCount;
if (typeof fieldCountPrevRow === 'undefined')
if (isFunction(_config.step))
{
fieldCountPrevRow = fieldCount;
continue;
}
else if (fieldCount > 1)
var userStep = _config.step;
_config.step = function(results, parser)
{
delta += Math.abs(fieldCount - fieldCountPrevRow);
fieldCountPrevRow = fieldCount;
}
_results = results;
if (needsHeaderRow())
processResults();
else
userStep(processResults(), parser);
};
}
avgFieldCount /= preview.data.length;
_results = new Parser(_config).parse(input);
return processResults();
};
if ((typeof bestDelta === 'undefined' || delta < bestDelta)
&& avgFieldCount > 1.99)
function processResults()
{
bestDelta = delta;
bestDelim = delim;
}
}
_config.delimiter = bestDelim;
return {
successful: !!bestDelim,
bestDelimiter: bestDelim
}
if (needsHeaderRow())
fillHeaderFields();
return applyHeaderAndDynamicTyping();
}
function needsHeaderRow()
@ -725,8 +686,8 @@ @@ -725,8 +686,8 @@
function applyHeaderAndDynamicTyping()
{
if (!_results)
return;
if (!_results || (!_config.header && !_config.dynamicTyping))
return _results;
for (var i = 0; i < _results.data.length; i++)
{
@ -736,7 +697,6 @@ @@ -736,7 +697,6 @@
if (_config.dynamicTyping)
{
var value = _results.data[i][j];
if (value == "true")
_results.data[i][j] = true;
else if (value == "false")
@ -770,6 +730,57 @@ @@ -770,6 +730,57 @@
return _results;
}
function guessDelimiter(input)
{
var delimChoices = [",", "\t", "|", ";", Papa.RECORD_SEP, Papa.UNIT_SEP];
var bestDelim, bestDelta, fieldCountPrevRow;
for (var i = 0; i < delimChoices.length; i++)
{
var delim = delimChoices[i];
var delta = 0, avgFieldCount = 0;
fieldCountPrevRow = undefined;
var preview = new Parser({
delimiter: delim,
preview: 10
}).parse(input);
for (var j = 0; j < preview.data.length; j++)
{
var fieldCount = preview.data[j].length;
avgFieldCount += fieldCount;
if (typeof fieldCountPrevRow === 'undefined')
{
fieldCountPrevRow = fieldCount;
continue;
}
else if (fieldCount > 1)
{
delta += Math.abs(fieldCount - fieldCountPrevRow);
fieldCountPrevRow = fieldCount;
}
}
avgFieldCount /= preview.data.length;
if ((typeof bestDelta === 'undefined' || delta < bestDelta)
&& avgFieldCount > 1.99)
{
bestDelta = delta;
bestDelim = delim;
}
}
_config.delimiter = bestDelim;
return {
successful: !!bestDelim,
bestDelimiter: bestDelim
}
}
function tryParseFloat(val)
{
var isNumber = FLOAT.test(val);
@ -876,7 +887,7 @@ @@ -876,7 +887,7 @@
{
if (_aborted) break;
if (_preview > 0 && _rowIdx >= _preview) break;
if (_paused) return returnable();
if (_paused) return finishParsing();
if (_ch == '"')
parseQuotes();
@ -904,6 +915,7 @@ @@ -904,6 +915,7 @@
if (_inQuotes)
addError("Quotes", "MissingQuotes", "Unescaped or mismatched quotes");
endRow(); // End of input is also end of the last row
if (!isFunction(_step))
return returnable();
}
@ -988,7 +1000,7 @@ @@ -988,7 +1000,7 @@
function endRow()
{
trimEmptyLastRow();
if (typeof _step === 'function')
if (isFunction(_step))
{
if (_data[_rowIdx])
_step(returnable(), self);
@ -1121,8 +1133,17 @@ @@ -1121,8 +1133,17 @@
if (msg.results && msg.results.data && isFunction(worker.userStep))
{
for (var i = 0; i < msg.results.data.length; i++)
worker.userStep(msg.results.data[i]);
{
worker.userStep({
data: [msg.results.data[i]],
errors: msg.results.errors,
meta: msg.results.meta
});
}
delete msg.results; // free memory ASAP
}
else if (msg.error)
worker.userError(msg.error, msg.file);
if (msg.finished)
{

2
player/player.js

@ -48,7 +48,7 @@ function buildConfig() @@ -48,7 +48,7 @@ function buildConfig()
delimiter: $('#delimiter').val(),
header: $('#header').prop('checked'),
dynamicTyping: $('#dynamicTyping').prop('checked'),
preview: parseInt($('#preview').val()),
preview: parseInt($('#preview').val() || 0),
step: $('#stream').prop('checked') ? stepFn : undefined,
encoding: $('#encoding').val(),
worker: $('#worker').prop('checked'),

Loading…
Cancel
Save