Matthew Holt
11 years ago
13 changed files with 2503 additions and 313 deletions
@ -1,2 +1,3 @@
@@ -1,2 +1,3 @@
|
||||
bower_components/* |
||||
node_modules/* |
||||
_gitignore/ |
||||
bower_components/ |
||||
node_modules/ |
@ -0,0 +1,20 @@
@@ -0,0 +1,20 @@
|
||||
The MIT License (MIT) |
||||
|
||||
Copyright (c) 2014 Matthew Holt |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of |
||||
this software and associated documentation files (the "Software"), to deal in |
||||
the Software without restriction, including without limitation the rights to |
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of |
||||
the Software, and to permit persons to whom the Software is furnished to do so, |
||||
subject to the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be included in all |
||||
copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS |
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR |
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER |
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
@ -1,306 +0,0 @@
@@ -1,306 +0,0 @@
|
||||
/** |
||||
Papa Parse 3.0 alpha - core parser function |
||||
(c) 2014 Matthew Holt. |
||||
Not for use in production or redistribution. |
||||
For development of Papa Parse only. |
||||
**/ |
||||
function Parser(config) |
||||
{ |
||||
var self = this; |
||||
var BYTE_ORDER_MARK = "\ufeff"; |
||||
var EMPTY = /^\s*$/; |
||||
|
||||
// Delimiters that are not allowed
|
||||
var _badDelimiters = ["\r", "\n", "\"", BYTE_ORDER_MARK]; |
||||
|
||||
var _input; // The input text being parsed
|
||||
var _delimiter; // The delimiting character
|
||||
var _comments; // Comment character (default '#') or boolean
|
||||
var _step; // The step (streaming) function
|
||||
var _callback; // The callback to invoke when finished
|
||||
var _preview; // Maximum number of lines (not rows) to parse
|
||||
var _ch; // Current character
|
||||
var _i; // Current character's positional index
|
||||
var _inQuotes; // Whether in quotes or not
|
||||
var _lineNum; // Current line number (1-based indexing)
|
||||
var _data; // Parsed data (results)
|
||||
var _errors; // Parse errors
|
||||
var _rowIdx; // Current row index within results (0-based)
|
||||
var _colIdx; // Current col index within result row (0-based)
|
||||
var _aborted; // Abort flag
|
||||
var _paused; // Pause flag
|
||||
|
||||
// Unpack the config object
|
||||
config = config || {}; |
||||
_delimiter = config.delimiter; |
||||
_comments = config.comments; |
||||
_step = config.step; |
||||
_callback = config.complete; |
||||
_preview = config.preview; |
||||
|
||||
// Delimiter integrity check
|
||||
if (typeof _delimiter !== 'string' |
||||
|| _delimiter.length != 1 |
||||
|| _badDelimiters.indexOf(_delimiter) > -1) |
||||
_delimiter = ","; |
||||
|
||||
// Comment character integrity check
|
||||
if (_comments === true) |
||||
_comments = "#"; |
||||
else if (typeof _comments !== 'string' |
||||
|| _comments.length != 1 |
||||
|| _badDelimiters.indexOf(_comments) > -1 |
||||
|| _comments == _delimiter) |
||||
_comments = false; |
||||
|
||||
// Parses delimited text input
|
||||
this.parse = function(input) |
||||
{ |
||||
if (typeof input !== 'string') |
||||
throw "Input must be a string"; |
||||
reset(input); |
||||
return parserLoop(); |
||||
}; |
||||
|
||||
this.pause = function() |
||||
{ |
||||
_paused = true; |
||||
}; |
||||
|
||||
this.resume = function() |
||||
{ |
||||
_paused = false; |
||||
if (_i < _input.length) |
||||
return parserLoop(); |
||||
}; |
||||
|
||||
this.abort = function() |
||||
{ |
||||
_aborted = true; |
||||
}; |
||||
|
||||
function parserLoop() |
||||
{ |
||||
while (_i < _input.length) |
||||
{ |
||||
if (_aborted) break; |
||||
if (_preview > 0 && _rowIdx >= _preview) break; |
||||
if (_paused) return; |
||||
|
||||
if (_ch == '"') |
||||
parseQuotes(); |
||||
else if (_inQuotes) |
||||
parseInQuotes(); |
||||
else |
||||
parseNotInQuotes(); |
||||
|
||||
nextChar(); |
||||
} |
||||
|
||||
return finishParsing(); |
||||
} |
||||
|
||||
function nextChar() |
||||
{ |
||||
_i++; |
||||
_ch = _input[_i]; |
||||
} |
||||
|
||||
function finishParsing() |
||||
{ |
||||
if (_inQuotes) |
||||
addError("Quotes", "MissingQuotes", "Unescaped or mismatched quotes"); |
||||
|
||||
endRow(); // End of input is also end of the last row
|
||||
|
||||
if (typeof _step !== 'function') |
||||
return returnable(); |
||||
else if (typeof _callback === 'function') |
||||
_callback(); |
||||
} |
||||
|
||||
function parseQuotes() |
||||
{ |
||||
if (quotesOnBoundary() && !quotesEscaped()) |
||||
_inQuotes = !_inQuotes; |
||||
else |
||||
{ |
||||
saveChar(); |
||||
if (_inQuotes && quotesEscaped()) |
||||
_i++ |
||||
else |
||||
addError("Quotes", "UnexpectedQuotes", "Unexpected quotes"); |
||||
} |
||||
} |
||||
|
||||
function parseInQuotes() |
||||
{ |
||||
saveChar(); |
||||
if (twoCharLineBreak()) |
||||
{ |
||||
nextChar(); |
||||
saveChar(); |
||||
_lineNum++; |
||||
} |
||||
else if (oneCharLineBreak()) |
||||
_lineNum++; |
||||
} |
||||
|
||||
function parseNotInQuotes() |
||||
{ |
||||
if (_ch == _delimiter) |
||||
newField(); |
||||
else if (twoCharLineBreak()) |
||||
{ |
||||
newRow(); |
||||
nextChar(); |
||||
} |
||||
else if (oneCharLineBreak()) |
||||
newRow(); |
||||
else if (isCommentStart()) |
||||
skipLine(); |
||||
else |
||||
saveChar(); |
||||
} |
||||
|
||||
function isCommentStart() |
||||
{ |
||||
var firstCharOfLine = _i == 0 |
||||
|| oneCharLineBreak(_i-1) |
||||
|| twoCharLineBreak(_i-2); |
||||
return firstCharOfLine && _input[_i] === _comments; |
||||
} |
||||
|
||||
function skipLine() |
||||
{ |
||||
while (!twoCharLineBreak() |
||||
&& !oneCharLineBreak() |
||||
&& _i < _input.length) |
||||
{ |
||||
nextChar(); |
||||
} |
||||
} |
||||
|
||||
function saveChar() |
||||
{ |
||||
_data[_rowIdx][_colIdx] += _ch; |
||||
} |
||||
|
||||
function newField() |
||||
{ |
||||
_data[_rowIdx].push(""); |
||||
_colIdx = _data[_rowIdx].length - 1; |
||||
} |
||||
|
||||
function newRow() |
||||
{ |
||||
endRow(); |
||||
|
||||
_lineNum++; |
||||
_data.push([]); |
||||
_rowIdx = _data.length - 1; |
||||
newField(); |
||||
} |
||||
|
||||
function endRow() |
||||
{ |
||||
trimEmptyLastRow(); |
||||
if (typeof _step === 'function') |
||||
{ |
||||
if (_data[_rowIdx]) |
||||
_step(returnable(), self); |
||||
clearErrorsAndData(); |
||||
} |
||||
} |
||||
|
||||
function trimEmptyLastRow() |
||||
{ |
||||
if (_data[_rowIdx].length == 1 && EMPTY.test(_data[_rowIdx][0])) |
||||
{ |
||||
_data.splice(_rowIdx, 1); |
||||
_rowIdx = _data.length - 1; |
||||
} |
||||
} |
||||
|
||||
function twoCharLineBreak(i) |
||||
{ |
||||
if (typeof i !== 'number') |
||||
i = _i; |
||||
return i < _input.length - 1 && |
||||
((_input[i] == "\r" && _input[i+1] == "\n") |
||||
|| (_input[i] == "\n" && _input[i+1] == "\r")) |
||||
} |
||||
|
||||
function oneCharLineBreak(i) |
||||
{ |
||||
if (typeof i !== 'number') |
||||
i = _i; |
||||
return _input[i] == "\r" || _input[i] == "\n"; |
||||
} |
||||
|
||||
function quotesEscaped() |
||||
{ |
||||
// Quotes as data cannot be on boundary, for example: ,"", are not escaped quotes
|
||||
return !quotesOnBoundary() && _i < _input.length - 1 && _input[_i+1] == '"'; |
||||
} |
||||
|
||||
function quotesOnBoundary() |
||||
{ |
||||
return isBoundary(_i-1) || isBoundary(_i+1); |
||||
} |
||||
|
||||
function isBoundary(i) |
||||
{ |
||||
if (typeof i != 'number') |
||||
i = _i; |
||||
|
||||
var ch = _input[i]; |
||||
|
||||
return (i == -1 || i == _input.length) |
||||
|| (i < _input.length |
||||
&& i > -1 |
||||
&& (ch == _delimiter |
||||
|| ch == "\r" |
||||
|| ch == "\n")); |
||||
} |
||||
|
||||
function addError(type, code, msg) |
||||
{ |
||||
_errors.push({ |
||||
type: type, |
||||
code: code, |
||||
message: msg, |
||||
line: _lineNum, |
||||
row: _rowIdx, |
||||
index: _i |
||||
}); |
||||
} |
||||
|
||||
function reset(input) |
||||
{ |
||||
_input = input; |
||||
_inQuotes = false; |
||||
_lineNum = 1; |
||||
_i = 0; |
||||
clearErrorsAndData(); |
||||
_data = [ [""] ]; // starting parsing requires an empty field
|
||||
_ch = _input[_i]; |
||||
} |
||||
|
||||
function clearErrorsAndData() |
||||
{ |
||||
_data = []; |
||||
_errors = []; |
||||
_rowIdx = 0; |
||||
_colIdx = 0; |
||||
} |
||||
|
||||
function returnable() |
||||
{ |
||||
return { |
||||
data: _data, |
||||
errors: _errors, |
||||
lines: _lineNum |
||||
}; |
||||
} |
||||
} |
@ -0,0 +1,53 @@
@@ -0,0 +1,53 @@
|
||||
Parse CSV with Javascript |
||||
======================================== |
||||
|
||||
Papa Parse (formerly the jQuery Parse Plugin) is a robust and powerful CSV (character-separated values) parser with these features: |
||||
|
||||
- Easily parses delimited text strings with any delimiter |
||||
- Parse CSV files directly (local or over the network) |
||||
- Stream large files (even via HTTP) |
||||
- Auto-detects the delimiter |
||||
- Worker threads to keep your web page responsive |
||||
- Header row support |
||||
- Can convert numbers and booleans to their types |
||||
- Graceful and robust error handling |
||||
- jQuery integration to easily parse files from `<input type="file">` elements |
||||
|
||||
All are optional (except for being easy to use). |
||||
|
||||
|
||||
|
||||
Demo |
||||
---- |
||||
|
||||
Visit **[PapaParse.com#demo](http://papaparse.com/#demo)** to try Papa! |
||||
|
||||
|
||||
|
||||
Get Started |
||||
----------- |
||||
|
||||
Use [papaparse.min.js](https://github.com/mholt/jquery.parse/blob/master/papaparse.min.js) for production. |
||||
|
||||
For usage instructions, see the [homepage](http://papaparse.com) and, for more detail, the [documentation](http://papaparse.com/docs.html). |
||||
|
||||
|
||||
|
||||
Tests |
||||
----- |
||||
|
||||
Papa Parse, especially its core Parser, is under test. Download this repository and open `tests/tests.html` in your browser to run them. |
||||
|
||||
|
||||
|
||||
Contributing |
||||
------------ |
||||
|
||||
To discuss a new feature or if you have a question, open an issue. To fix a bug, submit a pull request and get credited in the [contributors](https://github.com/mholt/jquery.parse/graphs/contributors)! Remember, a pull request, *with test*, is best. (Especially all changes to the Parser component should be validated with tests.) You may also discuss on Twitter with [#PapaParse](https://twitter.com/search?q=%23PapaParse&src=typd&f=realtime) or directly to me, [@mholt6](https://twitter.com/mholt6). |
||||
|
||||
|
||||
|
||||
Origins |
||||
------- |
||||
|
||||
Papa Parse is the result of a successful experiment by [SmartyStreets](http://smartystreets.com) which matured into a fully-featured, independent Javascript library. |
@ -0,0 +1,34 @@
@@ -0,0 +1,34 @@
|
||||
{ |
||||
"name": "Papa-Parse", |
||||
"main": "papaparse.js", |
||||
"homepage": "http://papaparse.com", |
||||
"authors": [ |
||||
"Matthew Holt" |
||||
], |
||||
"description": "Papa is a powerful CSV (delimited text) parser", |
||||
"keywords": [ |
||||
"csv", |
||||
"parse", |
||||
"parsing", |
||||
"parser", |
||||
"delimited", |
||||
"text", |
||||
"data", |
||||
"auto-detect", |
||||
"comma", |
||||
"tab", |
||||
"pipe", |
||||
"file", |
||||
"filereader", |
||||
"stream" |
||||
], |
||||
"license": "MIT", |
||||
"ignore": [ |
||||
"**/.*", |
||||
"node_modules", |
||||
"bower_components", |
||||
"test", |
||||
"tests", |
||||
"player" |
||||
] |
||||
} |
@ -0,0 +1,53 @@
@@ -0,0 +1,53 @@
|
||||
<!DOCTYPE html> |
||||
<html> |
||||
<head> |
||||
<title>Papa Parse Player</title> |
||||
<meta charset="utf-8"> |
||||
<link rel="stylesheet" href="player.css"> |
||||
<script src="http://ajax.googleapis.com/ajax/libs/jquery/2.1.0/jquery.min.js"></script> |
||||
<script src="../papaparse.js"></script> |
||||
<script src="player.js"></script> |
||||
</head> |
||||
<body> |
||||
<h1><a href="http://papaparse.com">Papa Parse</a> Player</h1> |
||||
|
||||
<div class="grid-container"> |
||||
|
||||
<div class="grid-25"> |
||||
<label><input type="checkbox" id="download"> Download</label> |
||||
<label><input type="checkbox" id="stream"> Stream</label> |
||||
<label><input type="checkbox" id="header"> Header row</label> |
||||
<label><input type="checkbox" id="worker"> Worker thread</label> |
||||
<label><input type="checkbox" id="dynamicTyping"> Dynamic typing</label> |
||||
<label>Preview: <input type="number" min="0" max="1000" placeholder="default" id="preview"></label> |
||||
<label>Encoding: <input type="text" id="encoding" placeholder="default" size="10"></label> |
||||
<label>Comment char: <input type="text" size="5" maxlength="1" placeholder="default" id="comments"></label> |
||||
<label>Delimiter: <input type="text" size="5" maxlength="1" placeholder="auto" id="delimiter"> <a href="javascript:" id="insert-tab">tab</a></label> |
||||
</div> |
||||
|
||||
<div class="grid-75 text-center"> |
||||
|
||||
<textarea id="input" placeholder="Input">Column 1,Column 2,Column 3,Column 4 |
||||
1-1,1-2,1-3,1-4 |
||||
2-1,2-2,2-3,2-4 |
||||
3-1,3-2,3-3,3-4, |
||||
4,5,6,7</textarea> |
||||
|
||||
<br> |
||||
<b>or</b> |
||||
<br> |
||||
|
||||
<input type="file" id="files" multiple> |
||||
|
||||
<br><br> |
||||
|
||||
<button id="submit">Parse</button> |
||||
|
||||
<br><br> |
||||
|
||||
<i>Open the Console in your browser's inspector tools to see results.</i> |
||||
</div> |
||||
|
||||
</div> |
||||
</body> |
||||
</html> |
@ -0,0 +1,71 @@
@@ -0,0 +1,71 @@
|
||||
var stepped = 0; |
||||
var start, end; |
||||
|
||||
$(function() |
||||
{ |
||||
$('#submit').click(function() |
||||
{ |
||||
var txt = $('#input').val(); |
||||
var files = $('#files')[0].files; |
||||
stepped = 0; |
||||
|
||||
var config = buildConfig(); |
||||
|
||||
if (files.length > 0) |
||||
{ |
||||
start = performance.now(); |
||||
|
||||
$('#files').parse({ |
||||
config: config, |
||||
before: function(file, inputElem) |
||||
{ |
||||
console.log("Parsing file:", file); |
||||
}, |
||||
complete: function() |
||||
{ |
||||
console.log("Done with all files."); |
||||
} |
||||
}); |
||||
} |
||||
else |
||||
{ |
||||
start = performance.now(); |
||||
var results = Papa.parse(txt, config); |
||||
console.log("Synchronous parse results:", results); |
||||
} |
||||
}); |
||||
|
||||
$('#insert-tab').click(function() |
||||
{ |
||||
$('#delimiter').val('\t'); |
||||
}); |
||||
}); |
||||
|
||||
|
||||
|
||||
function buildConfig() |
||||
{ |
||||
return { |
||||
delimiter: $('#delimiter').val(), |
||||
header: $('#header').prop('checked'), |
||||
dynamicTyping: $('#header').prop('checked'), |
||||
preview: parseInt($('#preview').val()), |
||||
step: $('#stream').prop('checked') ? stepFn : undefined, |
||||
encoding: $('#encoding').val(), |
||||
worker: $('#worker').prop('checked'), |
||||
comments: $('#comments').val(), |
||||
complete: completeFn, |
||||
download: $('#download').prop('checked') |
||||
}; |
||||
} |
||||
|
||||
function stepFn(results, parser) |
||||
{ |
||||
stepped++; |
||||
} |
||||
|
||||
function completeFn() |
||||
{ |
||||
end = performance.now(); |
||||
console.log("Finished input. Time:", end-start, arguments); |
||||
} |
@ -1,3 +1,8 @@
@@ -1,3 +1,8 @@
|
||||
// TODO: Add tests for unparse:
|
||||
// If fields is omitted, write a CSV string without a header row
|
||||
// If delimiter is omitted, choose comma by default
|
||||
// If data is omitted, do nothing... maybe if fields IS specified, write just the header row?
|
||||
|
||||
var RECORD_SEP = String.fromCharCode(30); |
||||
var UNIT_SEP = String.fromCharCode(31); |
||||
|
@ -1,11 +1,12 @@
@@ -1,11 +1,12 @@
|
||||
<!DOCTYPE html> |
||||
<html> |
||||
<head> |
||||
<title>Tests - Papa Parse</title> |
||||
<title>Papa Parse Tests</title> |
||||
<meta charset="utf-8"> |
||||
<link rel="stylesheet" href="tests.css"> |
||||
<script src="http://ajax.googleapis.com/ajax/libs/jquery/2.1.0/jquery.min.js"></script> |
||||
<script src="Parser.js"></script> |
||||
<script src="tests.js"></script> |
||||
<script src="../papaparse.js"></script> |
||||
<script src="test-cases.js"></script> |
||||
<script src="test-runner.js"></script> |
||||
</head> |
||||
<body> |
Loading…
Reference in new issue