Browse Source

New feature to auto-detect the delimiter if none is specified (closes #11)

pull/17/head 1.1.0
Matthew Holt 11 years ago
parent
commit
ee1091e88f
  1. 20
      README.md
  2. 12
      index.html
  3. 125
      jquery.parse.js
  4. 4
      jquery.parse.min.js
  5. 5
      parse.jquery.json
  6. 72
      tests.js

20
README.md

@ -5,11 +5,11 @@ The jQuery Parse plugin is a robust and efficient CSV (character-separated value
- Parses delimited text strings without any fuss - Parses delimited text strings without any fuss
- Attach to `<input type="file">` elements to load and parse files from disk - Attach to `<input type="file">` elements to load and parse files from disk
- Automatically detects delimiter (or specify a delimiter yourself)
- Header row support - Header row support
- Gracefully handles malformed data - Gracefully handles malformed data
- Optional dynamic typing so that numeric data is parsed as numbers - Optional dynamic typing so that numeric data is parsed as numbers
- Descriptive and contextual errors - Descriptive and contextual errors
- Custom delimiter
@ -36,11 +36,12 @@ For debug/dev: [jquery.parse.js](https://github.com/mholt/jquery.parse/blob/mast
Any time you invoke the parser, you may customize it using a "config" object. It supports these properties: Any time you invoke the parser, you may customize it using a "config" object. It supports these properties:
| Option | Default | Description | Option | Default | Description
|-------------------- | -------- | --------------- |-------------------- | ------- | ---------------
| **`delimiter`** | `","` | The delimiting character. Must be a string with length 1. Can be any character except `\n` and `"`. | **`delimiter`** | `""` | The delimiting character. Leave blank to auto-detect. If you specify a delimiter, it must be a string of length 1, and cannot be `\n`, `\r`, or `"`.
| **`header`** | `true` | If true, interpret the first row of parsed data as column titles; fields are returned separately from the data, and data will be returned keyed to its field name. Duplicate field names would be problematic. If false, the parser simply returns an array (list) of arrays (rows), including the first row. | **`header`** | `true` | If true, interpret the first row of parsed data as column titles; fields are returned separately from the data, and data will be returned keyed to its field name. Duplicate field names would be problematic. If false, the parser simply returns an array (list) of arrays (rows), including the first row.
| **`dynamicTyping`** | `true` | If true, fields that are only numeric will be converted to a number type. If false, each parsed datum is returned as a string. | **`dynamicTyping`** | `true` | If true, fields that are only numeric will be converted to a number type. If false, each parsed datum is returned as a string.
| **`preview`** | `0` | If preview > 0, only that many rows will be parsed.
@ -61,7 +62,8 @@ Or to customize the settings, pass in a config object with any properties you wi
var results = $.parse(csvString, { var results = $.parse(csvString, {
delimiter: "\t", delimiter: "\t",
header: false, header: false,
dynamicTyping: false dynamicTyping: false,
preview: 10
}); });
``` ```
@ -155,6 +157,8 @@ The results will always have this basic structure:
} }
``` ```
If no delimiter is specified and a delimiter cannot be auto-detected, an error keyed by "config" will be produced, and a default delimiter will be chosen.
**Example input:** **Example input:**
Item,SKU,Cost,Quantity Item,SKU,Cost,Quantity
@ -399,7 +403,7 @@ The Parser component is under test. Download this repository and open `tests.htm
The Parser function The Parser function
------------------- -------------------
Inside this jQuery plugin is a `Parser` function that actually performs the parsing of delimited text. It does not depend upon jQuery. This plugin uses jQuery to attach to `<input type="file">` elements and to make it more convenient to activate the parsing mechanism. Inside this jQuery plugin is a `Parser` function that performs the parsing of delimited text. It does not depend upon jQuery. This plugin uses jQuery to attach to `<input type="file">` elements and to make it more convenient to activate and use the parsing mechanism.

12
index.html

@ -28,6 +28,10 @@
width: 80px; width: 80px;
} }
#tabdelim {
font-size: 12px;
}
.container { .container {
width: 100%; width: 100%;
} }
@ -53,7 +57,8 @@
<body> <body>
<div class="container"> <div class="container">
<div class="text-center"> <div class="text-center">
Delimiter: <input type="text" id="delim" value="," maxlength="1"> Delimiter: <input type="text" id="delim" value="" maxlength="1" placeholder="auto"> <a href="javascript:" id="tabdelim">(Tab)</a>
&nbsp; &nbsp;
<label><input type="checkbox" id="header" checked> Header row</label> <label><input type="checkbox" id="header" checked> Header row</label>
&nbsp; &nbsp; &nbsp; &nbsp;
<label><input type="checkbox" id="dyntype" checked> Dynamic typing</label> <label><input type="checkbox" id="dyntype" checked> Dynamic typing</label>
@ -118,6 +123,11 @@ $(function()
}); });
}); });
$('#tabdelim').click(function()
{
$('#delim').val("\t");
})
function userConfig() function userConfig()
{ {
return { return {

125
jquery.parse.js

@ -1,6 +1,6 @@
/* /*
jQuery Parse Plugin jQuery Parse Plugin
v1.0.1 v1.1.0
https://github.com/mholt/jquery.parse https://github.com/mholt/jquery.parse
*/ */
@ -112,6 +112,8 @@
// Parser is the actual parsing component. // Parser is the actual parsing component.
// It is under test and does not depend on jQuery. // It is under test and does not depend on jQuery.
// You could rip this entire function out of the plugin
// and use it independently (with attribution).
function Parser(config) function Parser(config)
{ {
var self = this; var self = this;
@ -119,34 +121,16 @@
var _config = {}; var _config = {};
var _state = emptyState(); var _state = emptyState();
var _defaultConfig = { var _defaultConfig = {
delimiter: ",", delimiter: "",
header: true, header: true,
dynamicTyping: true dynamicTyping: true,
preview: 0
}; };
var _regex = { var _regex = {
floats: /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i, floats: /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i,
empty: /^\s*$/ empty: /^\s*$/
}; };
this.setOptions = function(opt)
{
opt = validConfig(opt);
_config = {
delimiter: opt.delimiter,
header: opt.header,
dynamicTyping: opt.dynamicTyping
};
};
this.getOptions = function()
{
return {
delimiter: _config.delimiter,
header: _config.header,
dynamicTyping: _config.dynamicTyping
};
};
this.parse = function(input) this.parse = function(input)
{ {
if (typeof input !== 'string') if (typeof input !== 'string')
@ -154,8 +138,17 @@
reset(input); reset(input);
if (!_config.delimiter && !guessDelimiter(input))
{
addError("Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to comma", "config");
_config.delimiter = ",";
}
for (_state.i = 0; _state.i < _input.length; _state.i++) for (_state.i = 0; _state.i < _input.length; _state.i++)
{ {
if (_config.preview > 0 && _state.row >= _config.preview)
break;
_state.ch = _input[_state.i]; _state.ch = _input[_state.i];
_state.line += _state.ch; _state.line += _state.ch;
@ -175,6 +168,27 @@
return returnable(); return returnable();
}; };
this.setOptions = function(opt)
{
opt = validConfig(opt);
_config = {
delimiter: opt.delimiter,
header: opt.header,
dynamicTyping: opt.dynamicTyping,
preview: opt.preview
};
};
this.getOptions = function()
{
return {
delimiter: _config.delimiter,
header: _config.header,
dynamicTyping: _config.dynamicTyping,
preview: _config.preview
};
};
this.setOptions(config); this.setOptions(config);
function validConfig(config) function validConfig(config)
@ -183,8 +197,8 @@
|| config.delimiter.length != 1) || config.delimiter.length != 1)
config.delimiter = _defaultConfig.delimiter; config.delimiter = _defaultConfig.delimiter;
if (config.delimiter == '"' || config.delimiter == "\n") if (config.deimiter == '"' || config.delimiter == "\n")
config.delimiter = _defaultConfig.delimiter; config.delimitelr = _defaultConfig.delimiter;
if (typeof config.header !== 'boolean') if (typeof config.header !== 'boolean')
config.header = _defaultConfig.header; config.header = _defaultConfig.header;
@ -192,9 +206,61 @@
if (typeof config.dynamicTyping !== 'boolean') if (typeof config.dynamicTyping !== 'boolean')
config.dynamicTyping = _defaultConfig.dynamicTyping; config.dynamicTyping = _defaultConfig.dynamicTyping;
if (typeof config.preview !== 'number')
config.preview = _defaultConfig.preview;
return config; return config;
} }
function guessDelimiter(input)
{
var delimiters = [",", "\t", "|", ";"];
var bestDelim, bestDelta, fieldCountPrevRow;
for (var i in delimiters)
{
var delim = delimiters[i];
var delta = 0, avgFieldCount = 0;
var preview = new Parser({
delimiter: delim,
header: false,
dynamicTyping: false,
preview: 10
}).parse(input);
for (var j in preview.results)
{
var fieldCount = preview.results[j].length;
avgFieldCount += fieldCount;
if (typeof fieldCountPrevRow === 'undefined')
{
fieldCountPrevRow = fieldCount;
continue;
}
else if (fieldCount > 1)
{
delta += Math.abs(fieldCount - fieldCountPrevRow);
fieldCountPrevRow = fieldCount;
}
}
avgFieldCount /= preview.results.length;
if ((typeof bestDelta === 'undefined' || delta < bestDelta)
&& avgFieldCount > 1.99)
{
bestDelta = delta;
bestDelim = delim;
}
}
_config.delimiter = bestDelim;
return !!bestDelim;
}
function emptyState() function emptyState()
{ {
return { return {
@ -393,16 +459,17 @@
return true; return true;
} }
function addError(type, code, msg) function addError(type, code, msg, errKey)
{ {
var row = _config.header var row = _config.header
? _state.parsed.rows.length - 1 ? (_state.parsed.rows.length ? _state.parsed.rows.length - 1 : undefined)
: _state.parsed.length - 1; : _state.parsed.length - 1;
var key = errKey || row;
if (typeof _state.errors[row] === 'undefined') if (typeof _state.errors[key] === 'undefined')
_state.errors[row] = []; _state.errors[key] = [];
_state.errors[row].push({ _state.errors[key].push({
type: type, type: type,
code: code, code: code,
message: msg, message: msg,

4
jquery.parse.min.js vendored

File diff suppressed because one or more lines are too long

5
parse.jquery.json

@ -1,8 +1,8 @@
{ {
"name": "parse", "name": "parse",
"version": "1.0.1", "version": "1.1.0",
"title": "jQuery Parse", "title": "jQuery Parse",
"description": "Parses CSV (character-separated, or delimited text) files or strings into arrays and objects efficiently. Gracefully handles errors. Supports multiple file inputs and multiple files per input element.", "description": "Efficiently parses CSV (character-separated / delimited text) files or strings into arrays and objects. Auto-detects delimiters. Gracefully handles errors. Supports parsing multiple files.",
"keywords": [ "keywords": [
"csv", "csv",
"parse", "parse",
@ -11,6 +11,7 @@
"delimited", "delimited",
"text", "text",
"data", "data",
"auto-detect",
"comma", "comma",
"tab", "tab",
"pipe", "pipe",

72
tests.js

@ -86,7 +86,29 @@ var resultSet1 = [
"length": 0 "length": 0
} }
} }
} },
{
config: { delimiter: "", header: true, dynamicTyping: true },
expected: {
"results": {
"fields": [
"F1",
"F2",
"F3"
],
"rows": [
{
"F1": "V1",
"F2": 2,
"F3": "V3"
}
]
},
"errors": {
"length": 0
}
}
},
]; ];
var tests = [ var tests = [
@ -110,6 +132,14 @@ var tests = [
input: "F1,F2,F3\r\n\r\nV1,2,V3", input: "F1,F2,F3\r\n\r\nV1,2,V3",
cases: resultSet1 cases: resultSet1
}, },
{
input: "F1,F2,F3\n\rV1,2,V3",
cases: resultSet1
},
{
input: "F1,F2,F3\rV1,2,V3",
cases: resultSet1
},
{ {
input: "F1,F2,F3\r\n \r\nV1,2,V3", input: "F1,F2,F3\r\n \r\nV1,2,V3",
cases: resultSet1 cases: resultSet1
@ -435,6 +465,46 @@ var tests = [
} }
} }
] ]
},
{
input: "F1\nV1\nV2\nV3\nV4",
cases: [
{
config: { delimiter: "", header: false, dynamicTyping: false },
expected: {
"results": [
[
"F1"
],
[
"V1"
],
[
"V2"
],
[
"V3"
],
[
"V4"
]
],
"errors": {
"length": 1,
"config": [
{
"type": "Delimiter",
"code": "UndetectableDelimiter",
"message": "Unable to auto-detect delimiting character; defaulted to comma",
"line": 1,
"row": 0,
"index": 0
}
]
}
}
}
]
} }
]; ];

Loading…
Cancel
Save