Browse Source

isBoundary fix for CRLF related to issue #5. Better error structure.

pull/8/head 0.6.0
Matthew Holt 11 years ago
parent
commit
7accd53776
  1. 26
      README.md
  2. 47
      jquery.parse.js
  3. 4
      jquery.parse.min.js
  4. 2
      parse.jquery.json

26
README.md

@ -5,6 +5,12 @@ Robust, efficient CSV parsing (supports custom delimiting characters). Malformed @@ -5,6 +5,12 @@ Robust, efficient CSV parsing (supports custom delimiting characters). Malformed
**[jsFIDDLE DEMO](http://jsfiddle.net/mholt/nCaee/)**
Most people will want to use the minified [jquery.parse.min.js](https://github.com/mholt/jquery.parse/blob/master/jquery.parse.min.js) file.
For debug and development, feel free to use the original [jquery.parse.js](https://github.com/mholt/jquery.parse/blob/master/jquery.parse.js) file.
This project is under test. Download this repository and open `tests.html` in your browser to run them.
Basic usage
-----------
@ -102,9 +108,11 @@ Here is the structure of an error: @@ -102,9 +108,11 @@ Here is the structure of an error:
```javascript
{
message: "", // Human-readable message
type: "", // Either "Quotes" or "FieldMismatch"
code: "", // Standardized error code like "UnexpectedQuotes"
message: "", // Human-readable error details
line: 0, // Line of original input
row: 0, // Row index where error was
row: 0, // Row index of parsed data where error is
index: 0 // Character index within original input
}
```
@ -135,12 +143,16 @@ Notice the stray quotes on the second line. This is the output: @@ -135,12 +143,16 @@ Notice the stray quotes on the second line. This is the output:
},
"errors": [
{
"type": "FieldMismatch",
"code": "TooFewFields",
"message": "Too few fields; expected 4 fields, parsed 2",
"line": 2,
"row": 0,
"index": 66
},
{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Unescaped or mismatched quotes",
"line": 2,
"row": 0,
@ -168,6 +180,8 @@ If the header row is disabled, field counting does not occur, because there is n @@ -168,6 +180,8 @@ If the header row is disabled, field counting does not occur, because there is n
],
"errors": [
{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Unescaped or mismatched quotes",
"line": 2,
"row": 1,
@ -216,13 +230,17 @@ Again, notice the second line, "10,95" instead of "10.95". This field *should* b @@ -216,13 +230,17 @@ Again, notice the second line, "10,95" instead of "10.95". This field *should* b
},
"errors": [
{
"message": "Too many fields; expected 4 fields, found extra value: '4'",
"type": "FieldMismatch",
"code": "TooManyFields",
"message": "Too many fields: expected 4 fields but parsed 5",
"line": 2,
"row": 0,
"index": 43
},
{
"message": "Too few fields; expected 4 fields, parsed 5",
"type": "FieldMismatch",
"code": "TooFewFields",
"message": "Too few fields: expected 4 fields but parsed 5",
"line": 2,
"row": 0,
"index": 43

47
jquery.parse.js

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
/*
jQuery Parse Plugin
v0.5.9
v0.6.0
https://github.com/mholt/jquery.parse
*/
@ -82,7 +82,7 @@ @@ -82,7 +82,7 @@
endRow();
if (_state.inQuotes)
addError("Unescaped or mismatched quotes");
addError("Quotes", "MissingQuotes", "Unescaped or mismatched quotes");
return self.getParsed();
};
@ -156,9 +156,9 @@ @@ -156,9 +156,9 @@
function handleQuote()
{
var delimBefore = (_state.i > 0 && isBoundary(_input[_state.i-1]))
var delimBefore = (_state.i > 0 && isBoundary(_state.i-1))
|| _state.i == 0;
var delimAfter = (_state.i < _input.length - 1 && isBoundary(_input[_state.i+1]))
var delimAfter = (_state.i < _input.length - 1 && isBoundary(_state.i+1))
|| _state.i == _input.length - 1;
var escaped = _state.i < _input.length - 1
&& _input[_state.i+1] == '"';
@ -174,7 +174,7 @@ @@ -174,7 +174,7 @@
}
else
{
addError("Unexpected quotes");
addError("Quotes", "UnexpectedQuotes", "Unexpected quotes");
}
}
@ -204,9 +204,30 @@ @@ -204,9 +204,30 @@
appendCharToField();
}
function isBoundary(ch)
function isBoundary(i)
{
return ch == _config.delimiter || ch == "\n";
if (i >= _input.length)
return false;
var ch = _input[i];
if (ch == _config.delimiter
|| ch == "\n"
|| (ch == "\r" && i < _input.length - 1 && _input[i+1] == "\n"))
return true;
else
return false;
}
function isLineEnding(i)
{
if (i >= _input.length)
return false;
if (i < _input.length - 1)
return _input[i] == "\n" || (_input[i] == "\r" && _input[i+1] == "\n");
else
return _input[i] == "\n";
}
function saveField()
@ -230,7 +251,6 @@ @@ -230,7 +251,6 @@
if (typeof currentRow.__parsed_extra === 'undefined')
currentRow.__parsed_extra = [];
currentRow.__parsed_extra.push(_state.fieldVal);
addError("Too many fields; expected " + _state.parsed.fields.length + " fields, found extra value: '" + _state.fieldVal + "'");
}
}
}
@ -312,15 +332,18 @@ @@ -312,15 +332,18 @@
if (lastRow.hasOwnProperty(prop))
actual ++;
if (expected != actual)
return addError("Too few fields; expected " + expected + " fields, parsed " + actual);
if (actual < expected)
return addError("FieldMismatch", "TooFewFields", "Too few fields: expected " + expected + " fields but parsed " + actual);
else if (actual > expected)
return addError("FieldMismatch", "TooManyFields", "Too many fields: expected " + expected + " fields but parsed " + actual);
return true;
}
function addError(msg)
function addError(type, code, msg)
{
_errors.push({
type: type,
code: code,
message: msg,
line: _state.lineNum,
row: _config.header ? _state.parsed.rows.length - 1 : _state.parsed.length - 1,

4
jquery.parse.min.js vendored

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
/*
jQuery Parse Plugin
v0.5.9
v0.6.0
https://github.com/mholt/jquery.parse
*/
;(function(e){"use strict";function n(e){e.delimiter=e.delimiter||t.delimiter;e.header=typeof e.header==="undefined"?t.header:e.header;e.dynamicTyping=typeof e.dynamicTyping==="undefined"?t.dynamicTyping:e.dynamicTyping;if(e.delimiter=='"'||e.delimiter=="\n")e.delimiter=t.delimiter;if(e.delimiter.length>1)e.delimiter=e.delimiter[0];return e}function r(e,t){function a(e){return e?{fields:[],rows:[]}:[[]]}function f(){return{i:0,lineNum:1,field:0,fieldVal:"",line:"",ch:"",inQuotes:false,parsed:a(t.header)}}function l(){var e=u.i>0&&d(r[u.i-1])||u.i==0;var t=u.i<r.length-1&&d(r[u.i+1])||u.i==r.length-1;var n=u.i<r.length-1&&r[u.i+1]=='"';if(u.inQuotes&&n){u.fieldVal+='"';u.i++}else if(e||t){u.inQuotes=!u.inQuotes}else{E("Unexpected quotes")}}function c(){h()}function h(){u.fieldVal+=u.ch}function p(){if(u.ch==i.delimiter)v();else if(u.ch=="\r"&&u.i<r.length-1&&r[u.i+1]=="\n"){g();u.i++}else if(u.ch=="\n")g();else h()}function d(e){return e==i.delimiter||e=="\n"}function v(){if(i.header){if(u.lineNum==1)u.parsed.fields.push(u.fieldVal);else{var e=u.parsed.rows[u.parsed.rows.length-1];var t=u.parsed.fields[u.field];if(t){if(i.dynamicTyping)u.fieldVal=y(u.fieldVal);e[t]=u.fieldVal}else{if(typeof e.__parsed_extra==="undefined")e.__parsed_extra=[];e.__parsed_extra.push(u.fieldVal);E("Too many fields; expected "+u.parsed.fields.length+" fields, found extra value: '"+u.fieldVal+"'")}}}else{if(i.dynamicTyping)u.fieldVal=y(u.fieldVal);u.parsed[u.parsed.length-1].push(u.fieldVal)}u.fieldVal="";u.field++}function m(){v();var e=b();if(!e&&i.header)w()}function g(){m();if(i.header&&u.lineNum>0)u.parsed.rows.push({});else u.parsed.push([]);u.lineNum++;u.line="";u.field=0}function y(e){var t=o.floats.test(e);return t?parseFloat(e):e}function b(){if(o.empty.test(u.line)){if(i.header){if(u.lineNum==1){u.parsed.fields=[];u.lineNum--}else u.parsed.rows.splice(u.parsed.rows.length-1,1)}else u.parsed.splice(u.parsed.length-1,1);return true}return false}function w(){if(!i.header)return true;if(u.parsed.rows.length==0)return true;var e=u.parsed.fields.length;var t=0;var n=u.parsed.rows[u.parsed.rows.length-1];for(var r in n)if(n.hasOwnProperty(r))t++;if(e!=t)return E("Too few fields; expected "+e+" fields, parsed "+t);return true}function E(e){s.push({message:e,line:u.lineNum,row:i.header?u.parsed.rows.length-1:u.parsed.length-1,index:u.i});return false}var n=this;var r=e;var i=t;var s=[];var o={floats:/^-?\d+(\.\d+)?$/,empty:/^\s*$/};var u=f();this.parse=function(e){if(typeof e==="object")n.setConfig(e);else if(typeof e==="string")n.setInput(e);s=[];u=f();for(u.i=0;u.i<r.length;u.i++){u.ch=r[u.i];u.line+=u.ch;if(u.ch=='"')l();else if(u.inQuotes)c();else p()}m();if(u.inQuotes)E("Unescaped or mismatched quotes");return n.getParsed()};this.getDelimiter=function(){return t.delimiter};this.setDelimiter=function(e){var t=",";e=e?e=='"'||e=="\n"?t:e:t;i.delimiter=e[0]};this.setConfig=function(e){if(typeof e.header!=="undefined"&&e.header!=t.header||typeof e.delimiter!=="undefined"&&e.delimiter!=t.delimiter){u.parsed=a(e.header)}i=e};this.getInput=function(){return r};this.setInput=function(e){r=e};this.getParsed=function(){return u.parsed};this.getErrors=function(){return s}}var t={delimiter:",",header:true,dynamicTyping:false};e.parse=function(e,t){t=n(t);var i=new r(e,t);return{results:i.parse(),errors:i.getErrors()}}})(jQuery);
;(function(e){"use strict";function n(e){e.delimiter=e.delimiter||t.delimiter;e.header=typeof e.header==="undefined"?t.header:e.header;e.dynamicTyping=typeof e.dynamicTyping==="undefined"?t.dynamicTyping:e.dynamicTyping;if(e.delimiter=='"'||e.delimiter=="\n")e.delimiter=t.delimiter;if(e.delimiter.length>1)e.delimiter=e.delimiter[0];return e}function r(e,t){function a(e){return e?{fields:[],rows:[]}:[[]]}function f(){return{i:0,lineNum:1,field:0,fieldVal:"",line:"",ch:"",inQuotes:false,parsed:a(t.header)}}function l(){var e=u.i>0&&d(u.i-1)||u.i==0;var t=u.i<r.length-1&&d(u.i+1)||u.i==r.length-1;var n=u.i<r.length-1&&r[u.i+1]=='"';if(u.inQuotes&&n){u.fieldVal+='"';u.i++}else if(e||t){u.inQuotes=!u.inQuotes}else{S("Quotes","UnexpectedQuotes","Unexpected quotes")}}function c(){h()}function h(){u.fieldVal+=u.ch}function p(){if(u.ch==i.delimiter)m();else if(u.ch=="\r"&&u.i<r.length-1&&r[u.i+1]=="\n"){y();u.i++}else if(u.ch=="\n")y();else h()}function d(e){if(e>=r.length)return false;var t=r[e];if(t==i.delimiter||t=="\n"||t=="\r"&&e<r.length-1&&r[e+1]=="\n")return true;else return false}function v(e){if(e>=r.length)return false;if(e<r.length-1)return r[e]=="\n"||r[e]=="\r"&&r[e+1]=="\n";else return r[e]=="\n"}function m(){if(i.header){if(u.lineNum==1)u.parsed.fields.push(u.fieldVal);else{var e=u.parsed.rows[u.parsed.rows.length-1];var t=u.parsed.fields[u.field];if(t){if(i.dynamicTyping)u.fieldVal=b(u.fieldVal);e[t]=u.fieldVal}else{if(typeof e.__parsed_extra==="undefined")e.__parsed_extra=[];e.__parsed_extra.push(u.fieldVal)}}}else{if(i.dynamicTyping)u.fieldVal=b(u.fieldVal);u.parsed[u.parsed.length-1].push(u.fieldVal)}u.fieldVal="";u.field++}function g(){m();var e=w();if(!e&&i.header)E()}function y(){g();if(i.header&&u.lineNum>0)u.parsed.rows.push({});else u.parsed.push([]);u.lineNum++;u.line="";u.field=0}function b(e){var t=o.floats.test(e);return t?parseFloat(e):e}function w(){if(o.empty.test(u.line)){if(i.header){if(u.lineNum==1){u.parsed.fields=[];u.lineNum--}else u.parsed.rows.splice(u.parsed.rows.length-1,1)}else u.parsed.splice(u.parsed.length-1,1);return true}return false}function E(){if(!i.header)return true;if(u.parsed.rows.length==0)return true;var e=u.parsed.fields.length;var t=0;var n=u.parsed.rows[u.parsed.rows.length-1];for(var r in n)if(n.hasOwnProperty(r))t++;if(t<e)return S("FieldMismatch","TooFewFields","Too few fields: expected "+e+" fields but parsed "+t);else if(t>e)return S("FieldMismatch","TooManyFields","Too many fields: expected "+e+" fields but parsed "+t);return true}function S(e,t,n){s.push({type:e,code:t,message:n,line:u.lineNum,row:i.header?u.parsed.rows.length-1:u.parsed.length-1,index:u.i});return false}var n=this;var r=e;var i=t;var s=[];var o={floats:/^-?\d+(\.\d+)?$/,empty:/^\s*$/};var u=f();this.parse=function(e){if(typeof e==="object")n.setConfig(e);else if(typeof e==="string")n.setInput(e);s=[];u=f();for(u.i=0;u.i<r.length;u.i++){u.ch=r[u.i];u.line+=u.ch;if(u.ch=='"')l();else if(u.inQuotes)c();else p()}g();if(u.inQuotes)S("Quotes","MissingQuotes","Unescaped or mismatched quotes");return n.getParsed()};this.getDelimiter=function(){return t.delimiter};this.setDelimiter=function(e){var t=",";e=e?e=='"'||e=="\n"?t:e:t;i.delimiter=e[0]};this.setConfig=function(e){if(typeof e.header!=="undefined"&&e.header!=t.header||typeof e.delimiter!=="undefined"&&e.delimiter!=t.delimiter){u.parsed=a(e.header)}i=e};this.getInput=function(){return r};this.setInput=function(e){r=e};this.getParsed=function(){return u.parsed};this.getErrors=function(){return s}}var t={delimiter:",",header:true,dynamicTyping:false};e.parse=function(e,t){t=n(t);var i=new r(e,t);return{results:i.parse(),errors:i.getErrors()}}})(jQuery);

2
parse.jquery.json

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
{
"name": "parse",
"version": "0.5.9",
"version": "0.6.0",
"title": "jQuery Parse Plugin",
"description": "Parses CSV (or any delimited text) into a usable data structure. This efficient and robust parser gracefully handles malformed input and it can work without jQuery.",
"keywords": [

Loading…
Cancel
Save