Browse Source

Core parser for 3.0 rewrite complete with all new tests

pull/62/head
Matthew Holt 11 years ago
parent
commit
42d707fe4a
  1. 306
      Parser.js
  2. 172
      test-runner.js
  3. 159
      tests.css
  4. 128
      tests.html
  5. 1071
      tests.js

306
Parser.js

@ -0,0 +1,306 @@ @@ -0,0 +1,306 @@
/**
Papa Parse 3.0 alpha - core parser function
(c) 2014 Matthew Holt.
Not for use in production or redistribution.
For development of Papa Parse only.
**/
function Parser(config)
{
var self = this;
var BYTE_ORDER_MARK = "\ufeff";
var EMPTY = /^\s*$/;
// Delimiters that are not allowed
var _badDelimiters = ["\r", "\n", "\"", BYTE_ORDER_MARK];
var _input; // The input text being parsed
var _delimiter; // The delimiting character
var _comments; // Comment character (default '#') or boolean
var _step; // The step (streaming) function
var _callback; // The callback to invoke when finished
var _preview; // Maximum number of lines (not rows) to parse
var _ch; // Current character
var _i; // Current character's positional index
var _inQuotes; // Whether in quotes or not
var _lineNum; // Current line number (1-based indexing)
var _data; // Parsed data (results)
var _errors; // Parse errors
var _rowIdx; // Current row index within results (0-based)
var _colIdx; // Current col index within result row (0-based)
var _aborted; // Abort flag
var _paused; // Pause flag
// Unpack the config object
config = config || {};
_delimiter = config.delimiter;
_comments = config.comments;
_step = config.step;
_callback = config.complete;
_preview = config.preview;
// Delimiter integrity check
if (typeof _delimiter !== 'string'
|| _delimiter.length != 1
|| _badDelimiters.indexOf(_delimiter) > -1)
_delimiter = ",";
// Comment character integrity check
if (_comments === true)
_comments = "#";
else if (typeof _comments !== 'string'
|| _comments.length != 1
|| _badDelimiters.indexOf(_comments) > -1
|| _comments == _delimiter)
_comments = false;
// Parses delimited text input
this.parse = function(input)
{
if (typeof input !== 'string')
throw "Input must be a string";
reset(input);
return parserLoop();
};
this.pause = function()
{
_paused = true;
};
this.resume = function()
{
_paused = false;
if (_i < _input.length)
return parserLoop();
};
this.abort = function()
{
_aborted = true;
};
function parserLoop()
{
while (_i < _input.length)
{
if (_aborted) break;
if (_preview > 0 && _rowIdx >= _preview) break;
if (_paused) return;
if (_ch == '"')
parseQuotes();
else if (_inQuotes)
parseInQuotes();
else
parseNotInQuotes();
nextChar();
}
return finishParsing();
}
function nextChar()
{
_i++;
_ch = _input[_i];
}
function finishParsing()
{
if (_inQuotes)
addError("Quotes", "MissingQuotes", "Unescaped or mismatched quotes");
endRow(); // End of input is also end of the last row
if (typeof _step !== 'function')
return returnable();
else if (typeof _callback === 'function')
_callback();
}
function parseQuotes()
{
if (quotesOnBoundary() && !quotesEscaped())
_inQuotes = !_inQuotes;
else
{
saveChar();
if (_inQuotes && quotesEscaped())
_i++
else
addError("Quotes", "UnexpectedQuotes", "Unexpected quotes");
}
}
function parseInQuotes()
{
saveChar();
if (twoCharLineBreak())
{
nextChar();
saveChar();
_lineNum++;
}
else if (oneCharLineBreak())
_lineNum++;
}
function parseNotInQuotes()
{
if (_ch == _delimiter)
newField();
else if (twoCharLineBreak())
{
newRow();
nextChar();
}
else if (oneCharLineBreak())
newRow();
else if (isCommentStart())
skipLine();
else
saveChar();
}
function isCommentStart()
{
var firstCharOfLine = _i == 0
|| oneCharLineBreak(_i-1)
|| twoCharLineBreak(_i-2);
return firstCharOfLine && _input[_i] === _comments;
}
function skipLine()
{
while (!twoCharLineBreak()
&& !oneCharLineBreak()
&& _i < _input.length)
{
nextChar();
}
}
function saveChar()
{
_data[_rowIdx][_colIdx] += _ch;
}
function newField()
{
_data[_rowIdx].push("");
_colIdx = _data[_rowIdx].length - 1;
}
function newRow()
{
endRow();
_lineNum++;
_data.push([]);
_rowIdx = _data.length - 1;
newField();
}
function endRow()
{
trimEmptyLastRow();
if (typeof _step === 'function')
{
if (_data[_rowIdx])
_step(returnable(), self);
clearErrorsAndData();
}
}
function trimEmptyLastRow()
{
if (_data[_rowIdx].length == 1 && EMPTY.test(_data[_rowIdx][0]))
{
_data.splice(_rowIdx, 1);
_rowIdx = _data.length - 1;
}
}
function twoCharLineBreak(i)
{
if (typeof i !== 'number')
i = _i;
return i < _input.length - 1 &&
((_input[i] == "\r" && _input[i+1] == "\n")
|| (_input[i] == "\n" && _input[i+1] == "\r"))
}
function oneCharLineBreak(i)
{
if (typeof i !== 'number')
i = _i;
return _input[i] == "\r" || _input[i] == "\n";
}
function quotesEscaped()
{
// Quotes as data cannot be on boundary, for example: ,"", are not escaped quotes
return !quotesOnBoundary() && _i < _input.length - 1 && _input[_i+1] == '"';
}
function quotesOnBoundary()
{
return isBoundary(_i-1) || isBoundary(_i+1);
}
function isBoundary(i)
{
if (typeof i != 'number')
i = _i;
var ch = _input[i];
return (i == -1 || i == _input.length)
|| (i < _input.length
&& i > -1
&& (ch == _delimiter
|| ch == "\r"
|| ch == "\n"));
}
function addError(type, code, msg)
{
_errors.push({
type: type,
code: code,
message: msg,
line: _lineNum,
row: _rowIdx,
index: _i
});
}
function reset(input)
{
_input = input;
_inQuotes = false;
_lineNum = 1;
_i = 0;
clearErrorsAndData();
_data = [ [""] ]; // starting parsing requires an empty field
_ch = _input[_i];
}
function clearErrorsAndData()
{
_data = [];
_errors = [];
_rowIdx = 0;
_colIdx = 0;
}
function returnable()
{
return {
data: _data,
errors: _errors,
lines: _lineNum
};
}
}

172
test-runner.js

@ -0,0 +1,172 @@ @@ -0,0 +1,172 @@
var passCount = 0;
var failCount = 0;
$(function()
{
// First, wireup!
$('#results').on('click', 'td.rvl', function()
{
var tr = $(this).closest('tr');
if (tr.hasClass('collapsed'))
{
$('.revealer', tr).hide();
$('.hidden', tr).show();
$(this).html("-");
}
else
{
$('.revealer', tr).show();
$('.hidden', tr).hide();
$(this).html("+");
}
tr.toggleClass('collapsed expanded');
});
$('#expand-all').click(function()
{
$('.collapsed .rvl').click();
});
$('#collapse-all').click(function()
{
$('.expanded .rvl').click();
});
// Next, run tests and render results!
for (var i = 0; i < TESTS.length; i++)
{
var test = TESTS[i];
var passed = runTest(test, i);
if (passed)
passCount++;
else
failCount++;
}
// Finally, show the overall status.
if (failCount == 0)
$('#status').addClass('status-pass').html("All <b>"+passCount+"</b> test"+(passCount == 1 ? "" : "s")+" passed");
else
$('#status').addClass('status-fail').html("<b>"+failCount+"</b> test"+(failCount == 1 ? "" : "s")+" failed; <b>"+passCount+"</b> passed");
});
function runTest(test, num)
{
var parser = new Parser(test.config);
var actual = parser.parse(test.input);
var results = compare(actual.data, actual.errors, test.expected);
var testDescription = (test.description || "");
if (testDescription.length > 0)
testDescription += '<br>';
if (test.notes)
testDescription += '<span class="notes">' + test.notes + '</span>';
var tr = '<tr class="collapsed" id="test-'+num+'">'
+ '<td class="rvl">+</td>'
+ '<td>' + testDescription + '</td>'
+ passOrFailTd(results.data)
+ passOrFailTd(results.errors)
+ '<td class="revealable pre"><div class="revealer">condensed</div><div class="hidden">' + JSON.stringify(test.config, null, 2) + '</div></td>'
+ '<td class="revealable pre"><div class="revealer">condensed</div><div class="hidden">' + revealChars(test.input) + '</div></td>'
+ '<td class="revealable pre"><div class="revealer">condensed</div><div class="hidden">data: ' + JSON.stringify(test.expected.data, null, 4) + '\r\nerrors: ' + JSON.stringify(test.expected.errors, null, 4) + '</div></td>'
+ '<td class="revealable pre"><div class="revealer">condensed</div><div class="hidden">data: ' + JSON.stringify(actual.data, null, 4) + '\r\nerrors: ' + JSON.stringify(actual.errors, null, 4) + '</div></td>'
+ '</tr>';
$('#results').append(tr);
if (!results.data.passed || !results.errors.passed)
$('#test-'+num+' td.rvl').click();
return results.data.passed && results.errors.passed
}
function compare(actualData, actualErrors, expected)
{
var data = compareData(actualData, expected.data);
var errors = compareErrors(actualErrors, expected.errors);
return {
data: data,
errors: errors
}
}
function compareData(actual, expected)
{
var passed = true;
if (actual.length != expected.length)
passed = false;
for (var row = 0; row < expected.length; row++)
{
if (actual.length != expected.length)
{
passed = false;
break;
}
for (var col = 0; col < expected[row].length; col++)
{
if (actual[row].length != expected[row].length)
{
passed = false;
break;
}
var expectedVal = expected[row][col];
var actualVal = actual[row][col];
if (actualVal !== expectedVal)
{
passed = false;
break;
}
}
}
// We pass back an object right now, even though it only contains
// one value, because we might add details to the test results later
// (same with compareErrors below)
return {
passed: passed
};
}
function compareErrors(actual, expected)
{
var passed = JSON.stringify(actual) == JSON.stringify(expected);
return {
passed: passed
};
}
function passOrFailTd(result)
{
if (result.passed)
return '<td class="ok">OK</td>';
else
return '<td class="fail">FAIL</td>';
}
function revealChars(txt)
{
// Make spaces and tabs more obvious when glancing
txt = txt.replace(/( |\t)/ig, '<span class="whitespace-char">$1</span>');
txt = txt.replace(/(\r\n|\n\r|\r|\n)/ig, '<span class="whitespace-char special-char">$1</span>$1');
// Now make the line breaks within the spans actually appear on the page
txt = txt.replace(/">\r\n<\/span>/ig, '">\\r\\n</span>');
txt = txt.replace(/">\n\r<\/span>/ig, '">\\n\\r</span>');
txt = txt.replace(/">\r<\/span>/ig, '">\\r</span>');
txt = txt.replace(/">\n<\/span>/ig, '">\\n</span>');
return txt;
}

159
tests.css

@ -0,0 +1,159 @@ @@ -0,0 +1,159 @@
/* Eric Meyer's Reset CSS v2.0 */
html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,big,cite,code,del,dfn,em,img,ins,kbd,q,s,samp,small,strike,strong,sub,sup,tt,var,b,u,i,center,dl,dt,dd,ol,ul,li,fieldset,form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td,article,aside,canvas,details,embed,figure,figcaption,footer,header,hgroup,menu,nav,output,ruby,section,summary,time,mark,audio,video{border:0;font-size:100%;font:inherit;vertical-align:baseline;margin:0;padding:0}article,aside,details,figcaption,figure,footer,header,hgroup,menu,nav,section{display:block}body{line-height:1}ol,ul{list-style:none}blockquote,q{quotes:none}blockquote:before,blockquote:after,q:before,q:after{content:none}table{border-collapse:collapse;border-spacing:0}
body {
background: #F0F0F0;
font: 14px 'Helvetica Neue', sans-serif;
color: #333;
padding: 30px 15px;
}
a {
color: rgb(0, 142, 236);
}
a:hover {
color: rgb(82, 186, 255);
}
b {
font-weight: bold;
}
i {
font-style: italic;
}
h1 {
text-align: center;
font-weight: bold;
font-size: 62px;
margin-bottom: 30px;
}
.status-pass,
.status-fail {
padding: 10px;
margin-bottom: 30px;
color: #FFF;
text-align: center;
text-transform: uppercase;
font-size: 18px;
letter-spacing: 1px;
font-weight: 100;
}
.status-pass {
background: rgb(3, 168, 3);
}
.status-fail {
background: #BB0000;
}
table {
width: 100%;
border-collapse: collapse;
margin-top: 5px;
}
table th,
table td {
padding: 5px;
border: 1px solid #BBB;
}
table th {
color: #000;
background: #DDD;
font-weight: bold;
padding: 10px 5px;
text-transform: uppercase;
}
table td {
background: #FFF;
color: #555;
font-size: 14px;
}
td.ok,
td.fail {
text-transform: uppercase;
font-weight: 300;
vertical-align: middle;
text-align: center;
width: 80px;
}
td.ok {
background: rgb(204, 250, 144);
}
td.fail {
background: rgb(255, 192, 192);
}
td.rvl {
background: #444;
color: #999;
vertical-align: middle;
text-align: center;
cursor: pointer;
width: 20px;
}
td.rvl:hover {
color: #FFF;
}
tr.collapsed td.revealable {
background: #ECECEC;
vertical-align: middle;
text-align: center;
font-family: 'Helvetica Neue', sans-serif;
text-transform: lowercase;
color: #AAA;
}
tr.expanded .revealer {
font-family: 'Helvetica Neue', sans-serif;
text-transform: lowercase;
font-size: 10px;
background: #FFF;
position: absolute;
display: block;
padding: 3px;
top: -5px;
right: -5px;
}
td .container {
position: relative;
}
.notes {
color: #888;
font-size: 12px;
}
.pre {
font-family: Menlo, Monaco, monospace;
white-space: pre-wrap;
}
td.pre {
font-size: 12px;
}
.hidden {
display: none;
}
.special-char {
color: #78B7E7;
}
.whitespace-char {
background: #D5FCFA;
}

128
tests.html

@ -1,127 +1,33 @@ @@ -1,127 +1,33 @@
<!DOCTYPE html>
<html>
<head>
<title>Parser Tests</title>
<script src="http://ajax.googleapis.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>
<script src="jquery.parse.js"></script>
<title>Tests - Papa Parse</title>
<link rel="stylesheet" href="tests.css">
<script src="http://ajax.googleapis.com/ajax/libs/jquery/2.1.0/jquery.min.js"></script>
<script src="Parser.js"></script>
<script src="tests.js"></script>
<style>
body {
font-family: sans-serif;
}
#tmp {
white-space: pre;
font-family: 'Menlo', 'Monaco', 'Courier New', monospace;
font-size: 10px;
}
#results {
border-collapse: collapse;
width: 100%;
}
#results td {
vertical-align: top;
padding: 10px;
border-bottom: 10px solid white;
}
#results td div {
overflow-x: auto;
}
.count {
background: #333;
color: #DDD;
width: 2em;
text-align: center;
}
.input,
.output {
width: 25%;
}
.input {
background: #DDD;
}
.config {
background: #CCC;
}
.output {
background: #EEE;
}
.input code,
.config code,
.output code {
font: 12px/1.5em 'Menlo', 'Monaco', 'Courier New', monospace;
display: block;
white-space: pre;
}
.clr-green,
.passing {
color: #475B15;
}
.clr-red,
.failing {
color: #AA0000;
}
.passing {
background: #ECF9CC;
color: #475B15;
}
.failing {
background: #FFE8E8;
}
.failing code {
font-weight: bold;
}
hr {
border: 0;
background: 0;
clear: both;
}
<script src="test-runner.js"></script>
</head>
<body>
<h1>Papa Parse Tests</h1>
.clr-green {
color: #79A01E;
}
<div id="status"></div>
.clr-red {
color: #AA0000;
}
<a href="javascript:" id="expand-all">Expand all</a>
&middot;
<a href="javascript:" id="collapse-all">Collapse all</a>
<br>
#pass-count,
#fail-count {
font-weight: bold;
}
</style>
</head>
<body>
SUMMARY
&nbsp;
<span class="clr-green"><span id="pass-count">-</span> PASS</span>
&nbsp;
<span class="clr-red"><span id="fail-count">-</span> FAIL</span>
<br><br>
<table id="results">
<tr>
<th></th>
<th>Input</th>
<th colspan="2">Test Case</th>
<th>Data</th>
<th>Errors</th>
<th>Config</th>
<th>Input</th>
<th>Expected</th>
<th>Actual</th>
</tr>
</table>
<div id="output"></div>
</body>
</html>

1071
tests.js

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save