Browse Source

Version 3 now working; added player and tests

pull/62/head
Matthew Holt 11 years ago
parent
commit
cee3115edb
  1. 5
      .gitignore
  2. 20
      LICENSE
  3. 306
      Parser.js
  4. 53
      README.md
  5. 34
      bower.json
  6. 1218
      papaparse.js
  7. 1041
      player/player.css
  8. 53
      player/player.html
  9. 71
      player/player.js
  10. 5
      tests/test-cases.js
  11. 3
      tests/test-runner.js
  12. 0
      tests/tests.css
  13. 7
      tests/tests.html

5
.gitignore vendored

@ -1,2 +1,3 @@ @@ -1,2 +1,3 @@
bower_components/*
node_modules/*
_gitignore/
bower_components/
node_modules/

20
LICENSE

@ -0,0 +1,20 @@ @@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2014 Matthew Holt
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

306
Parser.js

@ -1,306 +0,0 @@ @@ -1,306 +0,0 @@
/**
Papa Parse 3.0 alpha - core parser function
(c) 2014 Matthew Holt.
Not for use in production or redistribution.
For development of Papa Parse only.
**/
function Parser(config)
{
var self = this;
var BYTE_ORDER_MARK = "\ufeff";
var EMPTY = /^\s*$/;
// Delimiters that are not allowed
var _badDelimiters = ["\r", "\n", "\"", BYTE_ORDER_MARK];
var _input; // The input text being parsed
var _delimiter; // The delimiting character
var _comments; // Comment character (default '#') or boolean
var _step; // The step (streaming) function
var _callback; // The callback to invoke when finished
var _preview; // Maximum number of lines (not rows) to parse
var _ch; // Current character
var _i; // Current character's positional index
var _inQuotes; // Whether in quotes or not
var _lineNum; // Current line number (1-based indexing)
var _data; // Parsed data (results)
var _errors; // Parse errors
var _rowIdx; // Current row index within results (0-based)
var _colIdx; // Current col index within result row (0-based)
var _aborted; // Abort flag
var _paused; // Pause flag
// Unpack the config object
config = config || {};
_delimiter = config.delimiter;
_comments = config.comments;
_step = config.step;
_callback = config.complete;
_preview = config.preview;
// Delimiter integrity check
if (typeof _delimiter !== 'string'
|| _delimiter.length != 1
|| _badDelimiters.indexOf(_delimiter) > -1)
_delimiter = ",";
// Comment character integrity check
if (_comments === true)
_comments = "#";
else if (typeof _comments !== 'string'
|| _comments.length != 1
|| _badDelimiters.indexOf(_comments) > -1
|| _comments == _delimiter)
_comments = false;
// Parses delimited text input
this.parse = function(input)
{
if (typeof input !== 'string')
throw "Input must be a string";
reset(input);
return parserLoop();
};
this.pause = function()
{
_paused = true;
};
this.resume = function()
{
_paused = false;
if (_i < _input.length)
return parserLoop();
};
this.abort = function()
{
_aborted = true;
};
function parserLoop()
{
while (_i < _input.length)
{
if (_aborted) break;
if (_preview > 0 && _rowIdx >= _preview) break;
if (_paused) return;
if (_ch == '"')
parseQuotes();
else if (_inQuotes)
parseInQuotes();
else
parseNotInQuotes();
nextChar();
}
return finishParsing();
}
function nextChar()
{
_i++;
_ch = _input[_i];
}
function finishParsing()
{
if (_inQuotes)
addError("Quotes", "MissingQuotes", "Unescaped or mismatched quotes");
endRow(); // End of input is also end of the last row
if (typeof _step !== 'function')
return returnable();
else if (typeof _callback === 'function')
_callback();
}
function parseQuotes()
{
if (quotesOnBoundary() && !quotesEscaped())
_inQuotes = !_inQuotes;
else
{
saveChar();
if (_inQuotes && quotesEscaped())
_i++
else
addError("Quotes", "UnexpectedQuotes", "Unexpected quotes");
}
}
function parseInQuotes()
{
saveChar();
if (twoCharLineBreak())
{
nextChar();
saveChar();
_lineNum++;
}
else if (oneCharLineBreak())
_lineNum++;
}
function parseNotInQuotes()
{
if (_ch == _delimiter)
newField();
else if (twoCharLineBreak())
{
newRow();
nextChar();
}
else if (oneCharLineBreak())
newRow();
else if (isCommentStart())
skipLine();
else
saveChar();
}
function isCommentStart()
{
var firstCharOfLine = _i == 0
|| oneCharLineBreak(_i-1)
|| twoCharLineBreak(_i-2);
return firstCharOfLine && _input[_i] === _comments;
}
function skipLine()
{
while (!twoCharLineBreak()
&& !oneCharLineBreak()
&& _i < _input.length)
{
nextChar();
}
}
function saveChar()
{
_data[_rowIdx][_colIdx] += _ch;
}
function newField()
{
_data[_rowIdx].push("");
_colIdx = _data[_rowIdx].length - 1;
}
function newRow()
{
endRow();
_lineNum++;
_data.push([]);
_rowIdx = _data.length - 1;
newField();
}
function endRow()
{
trimEmptyLastRow();
if (typeof _step === 'function')
{
if (_data[_rowIdx])
_step(returnable(), self);
clearErrorsAndData();
}
}
function trimEmptyLastRow()
{
if (_data[_rowIdx].length == 1 && EMPTY.test(_data[_rowIdx][0]))
{
_data.splice(_rowIdx, 1);
_rowIdx = _data.length - 1;
}
}
function twoCharLineBreak(i)
{
if (typeof i !== 'number')
i = _i;
return i < _input.length - 1 &&
((_input[i] == "\r" && _input[i+1] == "\n")
|| (_input[i] == "\n" && _input[i+1] == "\r"))
}
function oneCharLineBreak(i)
{
if (typeof i !== 'number')
i = _i;
return _input[i] == "\r" || _input[i] == "\n";
}
function quotesEscaped()
{
// Quotes as data cannot be on boundary, for example: ,"", are not escaped quotes
return !quotesOnBoundary() && _i < _input.length - 1 && _input[_i+1] == '"';
}
function quotesOnBoundary()
{
return isBoundary(_i-1) || isBoundary(_i+1);
}
function isBoundary(i)
{
if (typeof i != 'number')
i = _i;
var ch = _input[i];
return (i == -1 || i == _input.length)
|| (i < _input.length
&& i > -1
&& (ch == _delimiter
|| ch == "\r"
|| ch == "\n"));
}
function addError(type, code, msg)
{
_errors.push({
type: type,
code: code,
message: msg,
line: _lineNum,
row: _rowIdx,
index: _i
});
}
function reset(input)
{
_input = input;
_inQuotes = false;
_lineNum = 1;
_i = 0;
clearErrorsAndData();
_data = [ [""] ]; // starting parsing requires an empty field
_ch = _input[_i];
}
function clearErrorsAndData()
{
_data = [];
_errors = [];
_rowIdx = 0;
_colIdx = 0;
}
function returnable()
{
return {
data: _data,
errors: _errors,
lines: _lineNum
};
}
}

53
README.md

@ -0,0 +1,53 @@ @@ -0,0 +1,53 @@
Parse CSV with Javascript
========================================
Papa Parse (formerly the jQuery Parse Plugin) is a robust and powerful CSV (character-separated values) parser with these features:
- Easily parses delimited text strings with any delimiter
- Parse CSV files directly (local or over the network)
- Stream large files (even via HTTP)
- Auto-detects the delimiter
- Worker threads to keep your web page responsive
- Header row support
- Can convert numbers and booleans to their types
- Graceful and robust error handling
- jQuery integration to easily parse files from `<input type="file">` elements
All are optional (except for being easy to use).
Demo
----
Visit **[PapaParse.com#demo](http://papaparse.com/#demo)** to try Papa!
Get Started
-----------
Use [papaparse.min.js](https://github.com/mholt/jquery.parse/blob/master/papaparse.min.js) for production.
For usage instructions, see the [homepage](http://papaparse.com) and, for more detail, the [documentation](http://papaparse.com/docs.html).
Tests
-----
Papa Parse, especially its core Parser, is under test. Download this repository and open `tests/tests.html` in your browser to run them.
Contributing
------------
To discuss a new feature or if you have a question, open an issue. To fix a bug, submit a pull request and get credited in the [contributors](https://github.com/mholt/jquery.parse/graphs/contributors)! Remember, a pull request, *with test*, is best. (Especially all changes to the Parser component should be validated with tests.) You may also discuss on Twitter with [#PapaParse](https://twitter.com/search?q=%23PapaParse&src=typd&f=realtime) or directly to me, [@mholt6](https://twitter.com/mholt6).
Origins
-------
Papa Parse is the result of a successful experiment by [SmartyStreets](http://smartystreets.com) which matured into a fully-featured, independent Javascript library.

34
bower.json

@ -0,0 +1,34 @@ @@ -0,0 +1,34 @@
{
"name": "Papa-Parse",
"main": "papaparse.js",
"homepage": "http://papaparse.com",
"authors": [
"Matthew Holt"
],
"description": "Papa is a powerful CSV (delimited text) parser",
"keywords": [
"csv",
"parse",
"parsing",
"parser",
"delimited",
"text",
"data",
"auto-detect",
"comma",
"tab",
"pipe",
"file",
"filereader",
"stream"
],
"license": "MIT",
"ignore": [
"**/.*",
"node_modules",
"bower_components",
"test",
"tests",
"player"
]
}

1218
papaparse.js

File diff suppressed because it is too large Load Diff

1041
player/player.css

File diff suppressed because it is too large Load Diff

53
player/player.html

@ -0,0 +1,53 @@ @@ -0,0 +1,53 @@
<!DOCTYPE html>
<html>
<head>
<title>Papa Parse Player</title>
<meta charset="utf-8">
<link rel="stylesheet" href="player.css">
<script src="http://ajax.googleapis.com/ajax/libs/jquery/2.1.0/jquery.min.js"></script>
<script src="../papaparse.js"></script>
<script src="player.js"></script>
</head>
<body>
<h1><a href="http://papaparse.com">Papa Parse</a> Player</h1>
<div class="grid-container">
<div class="grid-25">
<label><input type="checkbox" id="download"> Download</label>
<label><input type="checkbox" id="stream"> Stream</label>
<label><input type="checkbox" id="header"> Header row</label>
<label><input type="checkbox" id="worker"> Worker thread</label>
<label><input type="checkbox" id="dynamicTyping"> Dynamic typing</label>
<label>Preview: <input type="number" min="0" max="1000" placeholder="default" id="preview"></label>
<label>Encoding: <input type="text" id="encoding" placeholder="default" size="10"></label>
<label>Comment char: <input type="text" size="5" maxlength="1" placeholder="default" id="comments"></label>
<label>Delimiter: <input type="text" size="5" maxlength="1" placeholder="auto" id="delimiter"> <a href="javascript:" id="insert-tab">tab</a></label>
</div>
<div class="grid-75 text-center">
<textarea id="input" placeholder="Input">Column 1,Column 2,Column 3,Column 4
1-1,1-2,1-3,1-4
2-1,2-2,2-3,2-4
3-1,3-2,3-3,3-4,
4,5,6,7</textarea>
<br>
<b>or</b>
<br>
<input type="file" id="files" multiple>
<br><br>
<button id="submit">Parse</button>
<br><br>
<i>Open the Console in your browser's inspector tools to see results.</i>
</div>
</div>
</body>
</html>

71
player/player.js

@ -0,0 +1,71 @@ @@ -0,0 +1,71 @@
var stepped = 0;
var start, end;
$(function()
{
$('#submit').click(function()
{
var txt = $('#input').val();
var files = $('#files')[0].files;
stepped = 0;
var config = buildConfig();
if (files.length > 0)
{
start = performance.now();
$('#files').parse({
config: config,
before: function(file, inputElem)
{
console.log("Parsing file:", file);
},
complete: function()
{
console.log("Done with all files.");
}
});
}
else
{
start = performance.now();
var results = Papa.parse(txt, config);
console.log("Synchronous parse results:", results);
}
});
$('#insert-tab').click(function()
{
$('#delimiter').val('\t');
});
});
function buildConfig()
{
return {
delimiter: $('#delimiter').val(),
header: $('#header').prop('checked'),
dynamicTyping: $('#header').prop('checked'),
preview: parseInt($('#preview').val()),
step: $('#stream').prop('checked') ? stepFn : undefined,
encoding: $('#encoding').val(),
worker: $('#worker').prop('checked'),
comments: $('#comments').val(),
complete: completeFn,
download: $('#download').prop('checked')
};
}
function stepFn(results, parser)
{
stepped++;
}
function completeFn()
{
end = performance.now();
console.log("Finished input. Time:", end-start, arguments);
}

5
tests.js → tests/test-cases.js

@ -1,3 +1,8 @@ @@ -1,3 +1,8 @@
// TODO: Add tests for unparse:
// If fields is omitted, write a CSV string without a header row
// If delimiter is omitted, choose comma by default
// If data is omitted, do nothing... maybe if fields IS specified, write just the header row?
var RECORD_SEP = String.fromCharCode(30);
var UNIT_SEP = String.fromCharCode(31);

3
test-runner.js → tests/test-runner.js

@ -56,8 +56,7 @@ $(function() @@ -56,8 +56,7 @@ $(function()
function runTest(test, num)
{
var parser = new Parser(test.config);
var actual = parser.parse(test.input);
var actual = Papa.parse(test.input, test.config);
var results = compare(actual.data, actual.errors, test.expected);

0
tests.css → tests/tests.css

7
tests.html → tests/tests.html

@ -1,11 +1,12 @@ @@ -1,11 +1,12 @@
<!DOCTYPE html>
<html>
<head>
<title>Tests - Papa Parse</title>
<title>Papa Parse Tests</title>
<meta charset="utf-8">
<link rel="stylesheet" href="tests.css">
<script src="http://ajax.googleapis.com/ajax/libs/jquery/2.1.0/jquery.min.js"></script>
<script src="Parser.js"></script>
<script src="tests.js"></script>
<script src="../papaparse.js"></script>
<script src="test-cases.js"></script>
<script src="test-runner.js"></script>
</head>
<body>
Loading…
Cancel
Save