Browse Source

Streaming is fully functional

pull/17/head
Matthew Holt 11 years ago
parent
commit
829efd4a4f
  1. 347
      jquery.parse.js
  2. 9
      parse.jquery.json

347
jquery.parse.js

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
/*
jQuery Parse Plugin
v1.1.1
Papa Parse
v2.0.0
https://github.com/mholt/jquery.parse
*/
@ -10,30 +10,19 @@ @@ -10,30 +10,19 @@
$.fn.parse = function(options)
{
function error(name, elem, file)
{
if (isFunction(options.error))
options.error({name: name}, elem, file);
}
var config = isDef(options.config) ? options.config : {};
var config = options.config ? options.config : {};
var queue = [];
this.each(function(idx)
{
var supported = $(this).prop('tagName').toUpperCase() == "INPUT"
&& $(this).attr('type') == 'file'
&& $(this).attr('type') == "file"
&& window.FileReader;
if (!supported)
return true; // continue to next input element
// Config to be used only for this instance of parsing
var instanceConfig = {
delimiter: config.delimiter,
header: config.header,
dynamicTyping: config.dynamicTyping,
stream: config.stream
};
var instanceConfig = $.extend({}, config); // This copy is very important
if (!this.files || this.files.length == 0)
{
@ -42,61 +31,83 @@ @@ -42,61 +31,83 @@
}
for (var i = 0; i < this.files.length; i++)
queue.push({
file: this.files[i],
inputElem: this,
instanceConfig: instanceConfig
});
if (queue.length > 0)
parseFile(queue[0]);
});
return this;
function parseFile(f)
{
var completeFunc = complete, errorFunc;
if (isFunction(options.error))
errorFunc = function() { options.error(reader.error, f.file, f.inputElem); };
if (isFunction(options.complete))
completeFunc = function(results, file, inputElem, event) { options.complete(results, file, inputElem, event); complete(); };
if (f.file.type.indexOf("text") < 0)
{
var file = this.files[i];
if (file.type.indexOf("text") < 0)
{
error("TypeMismatchError", file, this);
continue; // continue to next file in this input element
}
error("TypeMismatchError", f.file, f.inputElem);
return complete();
}
if (isFunction(options.before))
if (isFunction(options.before))
{
var returned = options.before(f.file, f.inputElem);
if (typeof returned === 'object')
f.instanceConfig = $.extend(f.instanceConfig, returned);
else if (returned === "skip")
return complete(); // Proceeds to next file
else if (returned === false)
{
var returned = options.before(file, this);
if (typeof returned === 'object')
{
// update config for this file/instance only
if (isDef(returned.delimiter))
instanceConfig.delimiter = returned.delimiter;
if (isDef(returned.header))
instanceConfig.header = returned.header;
if (isDef(returned.dynamicTyping))
instanceConfig.dynamicTyping = returned.dynamicTyping;
if (isDef(returned.preview))
instanceConfig.preview = returned.preview;
if (isDef(returned.stream))
instanceConfig.stream = returned.stream;
}
else if (returned === "skip")
continue; // proceed to next file
else if (returned === false)
{
error("AbortError", file, this);
return false; // aborts the .each() loop
}
error("AbortError", f.file, f.inputElem);
return; // Aborts all queued files immediately
}
}
if (f.instanceConfig.step)
{
var streamer = new Streamer(f.file, {
inputElem: f.inputElem,
config: $.extend({}, f.instanceConfig) // This copy is very important
});
streamer.stream(completeFunc, errorFunc);
}
else
{
var reader = new FileReader();
if (isFunction(options.error))
reader.onerror = function() { options.error(reader.error, file, this); };
var inputElem = this;
reader.onerror = errorFunc;
reader.onload = function(event)
{
var text = event.target.result;
var results = $.parse(text, instanceConfig);
if (isFunction(options.complete))
options.complete(results, file, inputElem, event);
var results = $.parse(text, f.instanceConfig);
completeFunc(results, f.file, f.inputElem, event);
};
reader.readAsText(file);
reader.readAsText(f.file);
}
});
}
return this;
function error(name, file, elem)
{
if (isFunction(options.error))
options.error({name: name}, file, elem);
}
function complete()
{
queue.splice(0, 1);
if (queue.length > 0)
parseFile(queue[0]);
}
};
$.parse = function(input, options)
@ -105,14 +116,84 @@ @@ -105,14 +116,84 @@
return parser.parse(input);
};
function isFunction(func)
{
return typeof func === 'function';
}
function isFunction(func) { return typeof func === 'function'; }
function isDef(val)
// Streamer is a wrapper over Parser to handle chunking the input file
function Streamer(file, settings)
{
return typeof val !== 'undefined'
if (!settings)
settings = {};
if (!settings.chunkSize)
settings.chunkSize = 1024 * 1024 * 5; // 5 MB
if (settings.config.step) // it had better be there...!
{
var userStep = settings.config.step;
settings.config.step = function(data) { userStep(data, file, settings.inputElem); };
}
var start = 0;
var partialLine = "";
var parser = new Parser(settings.config);
var reader = new FileReader();
reader.onload = blobLoaded;
reader.onerror = blobError;
this.stream = function(completeCallback, fileErrorCallback)
{
settings.onComplete = completeCallback;
settings.onFileError = fileErrorCallback;
nextChunk();
};
function blobLoaded(event)
{
var text = partialLine + event.target.result;
partialLine = "";
var lastLineEnd = text.lastIndexOf("\n");
if (lastLineEnd < 0)
lastLineEnd = text.lastIndexOf("\r");
if (lastLineEnd > -1)
{
partialLine = text.substring(lastLineEnd + 1); // skip the line ending character
text = text.substring(0, lastLineEnd);
}
var results = parser.parse(text);
if (start >= file.size)
return done(event);
else if (results.errors.abort)
return;
else
nextChunk();
}
function done(event)
{
if (typeof settings.onComplete === 'function')
settings.onComplete(undefined, file, settings.inputElem, event);
}
function blobError()
{
if (typeof settings.onFileError === 'function')
settings.onFileError(reader.error, file, settings.inputElem);
}
function nextChunk()
{
if (start < file.size)
{
reader.readAsText(file.slice(start, Math.min(start + settings.chunkSize, file.size)));
start += settings.chunkSize;
}
};
}
// Parser is the actual parsing component.
@ -122,9 +203,12 @@ @@ -122,9 +203,12 @@
function Parser(config)
{
var self = this;
var _invocations = 0;
var _input = "";
var _chunkOffset = 0;
var _abort = false;
var _config = {};
var _state = emptyState();
var _state = freshState();
var _defaultConfig = {
delimiter: "",
header: true,
@ -136,6 +220,15 @@ @@ -136,6 +220,15 @@
empty: /^\s*$/
};
config = validConfig(config);
_config = {
delimiter: config.delimiter,
header: config.header,
dynamicTyping: config.dynamicTyping,
preview: config.preview,
step: config.step
};
this.parse = function(input)
{
if (typeof input !== 'string')
@ -151,7 +244,7 @@ @@ -151,7 +244,7 @@
for (_state.i = 0; _state.i < _input.length; _state.i++)
{
if (_config.preview > 0 && _state.lineNum > _config.preview)
if (_abort || (_config.preview > 0 && _state.lineNum > _config.preview))
break;
_state.ch = _input[_state.i];
@ -165,26 +258,18 @@ @@ -165,26 +258,18 @@
notInQuotes();
}
endRow(); // End of input is also end of the last row
if (_state.inQuotes)
addError("Quotes", "MissingQuotes", "Unescaped or mismatched quotes");
if (_abort)
addError("Abort", "ParseAbort", "Parsing was aborted by the user's step function", "abort");
else
{
endRow(); // End of input is also end of the last row
if (_state.inQuotes)
addError("Quotes", "MissingQuotes", "Unescaped or mismatched quotes");
}
return returnable();
};
this.setOptions = function(opt)
{
opt = validConfig(opt);
_config = {
delimiter: opt.delimiter,
header: opt.header,
dynamicTyping: opt.dynamicTyping,
preview: opt.preview,
stream: opt.stream
};
};
this.getOptions = function()
{
return {
@ -192,12 +277,10 @@ @@ -192,12 +277,10 @@
header: _config.header,
dynamicTyping: _config.dynamicTyping,
preview: _config.preview,
stream: _config.stream
step: _config.step
};
};
this.setOptions(config);
function validConfig(config)
{
if (typeof config.delimiter !== 'string'
@ -216,8 +299,8 @@ @@ -216,8 +299,8 @@
if (typeof config.preview !== 'number')
config.preview = _defaultConfig.preview;
if (typeof config.stream !== 'function')
config.stream = _defaultConfig.stream;
if (typeof config.step !== 'function')
config.step = _defaultConfig.step;
return config;
}
@ -271,21 +354,6 @@ @@ -271,21 +354,6 @@
return !!bestDelim;
}
function emptyState()
{
return {
i: 0,
lineNum: 1,
field: 0,
fieldVal: "",
line: "",
ch: "",
inQuotes: false,
parsed: _config.header ? { fields: [], rows: [] } : [ [] ],
errors: { length: 0 }
};
}
function handleQuote()
{
var delimBefore = (_state.i > 0 && isBoundary(_state.i-1))
@ -319,7 +387,7 @@ @@ -319,7 +387,7 @@
function notInQuotes()
{
if (_state.ch == _config.delimiter)
saveField();
saveValue();
else if ((_state.ch == "\r" && _state.i < _input.length - 1
&& _input[_state.i+1] == "\n")
|| (_state.ch == "\n" && _state.i < _input.length - 1
@ -360,11 +428,11 @@ @@ -360,11 +428,11 @@
return _input[i] == "\n";
}
function saveField()
function saveValue()
{
if (_config.header)
{
if (_state.lineNum == 1)
if (_state.lineNum == 1 && _invocations == 1)
_state.parsed.fields.push(_state.fieldVal)
else
{
@ -399,7 +467,7 @@ @@ -399,7 +467,7 @@
{
endRow();
if (doStream())
if (streaming())
{
_state.errors = {};
_state.errors.length = 0;
@ -407,14 +475,14 @@ @@ -407,14 +475,14 @@
if (_config.header && _state.lineNum > 0)
{
if (doStream())
if (streaming())
_state.parsed.rows = [ {} ];
else
_state.parsed.rows.push({});
}
else
{
if (doStream())
if (streaming())
_state.parsed = [ [] ];
else
_state.parsed.push([]);
@ -427,18 +495,28 @@ @@ -427,18 +495,28 @@
function endRow()
{
saveField();
if (_abort)
return;
saveValue();
var emptyLine = trimEmptyLine();
if (!emptyLine && _config.header)
inspectFieldCount();
if (doStream() &&
(!_config.header || (_config.header && _state.parsed.rows.length > 0)))
_config.stream(returnable());
if (streaming() && (!_config.header ||
(_config.header && _state.parsed.rows.length > 0)))
{
var keepGoing = _config.step(returnable());
if (keepGoing === false)
_abort = true;
}
}
function doStream()
function streaming()
{
return typeof _config.stream === 'function';
return typeof _config.step === 'function';
}
function tryParseFloat(num)
@ -484,7 +562,7 @@ @@ -484,7 +562,7 @@
var lastRow = _state.parsed.rows[_state.parsed.rows.length - 1];
for (var prop in lastRow)
if (lastRow.hasOwnProperty(prop))
actual ++;
actual++;
if (actual < expected)
return addError("FieldMismatch", "TooFewFields", "Too few fields: expected " + expected + " fields but parsed " + actual);
@ -509,7 +587,7 @@ @@ -509,7 +587,7 @@
message: msg,
line: _state.lineNum,
row: row,
index: _state.i
index: _state.i + _chunkOffset
});
_state.errors.length ++;
@ -527,9 +605,40 @@ @@ -527,9 +605,40 @@
function reset(input)
{
_state = emptyState();
_invocations++;
if (_invocations > 1 && streaming())
_chunkOffset += input.length;
_state = freshState();
_input = input;
}
function freshState()
{
// If streaming, and thus parsing the input in chunks, this
// is careful to preserve what we've already got, when necessary.
var parsed;
if (_config.header)
{
parsed = {
fields: streaming() ? _state.parsed.fields || [] : [],
rows: streaming() && _invocations > 1 ? [ {} ] : []
};
}
else
parsed = [ [] ];
return {
i: 0,
lineNum: streaming() ? _state.lineNum : 1,
field: 0,
fieldVal: "",
line: "",
ch: "",
inQuotes: false,
parsed: parsed,
errors: { length: 0 }
};
}
}
})(jQuery);

9
parse.jquery.json

@ -1,8 +1,8 @@ @@ -1,8 +1,8 @@
{
"name": "parse",
"version": "1.1.1",
"title": "jQuery Parse",
"description": "Efficiently parses CSV (character-separated / delimited text) files or strings into arrays and objects. Auto-detects delimiters. Gracefully handles errors. Supports parsing multiple files.",
"version": "2.0.0",
"title": "Papa Parse",
"description": "Papa is a powerful CSV (delimited text) parser with that gracefully handles large files and malformed input.",
"keywords": [
"csv",
"parse",
@ -16,7 +16,8 @@ @@ -16,7 +16,8 @@
"tab",
"pipe",
"file",
"filereader"
"filereader",
"stream"
],
"author": {
"name": "Matthew Holt",

Loading…
Cancel
Save