Browse Source

New feature to auto-detect the delimiter if none is specified (closes #11)

pull/17/head 1.1.0
Matthew Holt 11 years ago
parent
commit
ee1091e88f
  1. 20
      README.md
  2. 12
      index.html
  3. 125
      jquery.parse.js
  4. 4
      jquery.parse.min.js
  5. 5
      parse.jquery.json
  6. 72
      tests.js

20
README.md

@ -5,11 +5,11 @@ The jQuery Parse plugin is a robust and efficient CSV (character-separated value @@ -5,11 +5,11 @@ The jQuery Parse plugin is a robust and efficient CSV (character-separated value
- Parses delimited text strings without any fuss
- Attach to `<input type="file">` elements to load and parse files from disk
- Automatically detects delimiter (or specify a delimiter yourself)
- Header row support
- Gracefully handles malformed data
- Optional dynamic typing so that numeric data is parsed as numbers
- Descriptive and contextual errors
- Custom delimiter
@ -36,11 +36,12 @@ For debug/dev: [jquery.parse.js](https://github.com/mholt/jquery.parse/blob/mast @@ -36,11 +36,12 @@ For debug/dev: [jquery.parse.js](https://github.com/mholt/jquery.parse/blob/mast
Any time you invoke the parser, you may customize it using a "config" object. It supports these properties:
| Option | Default | Description
|-------------------- | -------- | ---------------
| **`delimiter`** | `","` | The delimiting character. Must be a string with length 1. Can be any character except `\n` and `"`.
| **`header`** | `true` | If true, interpret the first row of parsed data as column titles; fields are returned separately from the data, and data will be returned keyed to its field name. Duplicate field names would be problematic. If false, the parser simply returns an array (list) of arrays (rows), including the first row.
| **`dynamicTyping`** | `true` | If true, fields that are only numeric will be converted to a number type. If false, each parsed datum is returned as a string.
| Option | Default | Description
|-------------------- | ------- | ---------------
| **`delimiter`** | `""` | The delimiting character. Leave blank to auto-detect. If you specify a delimiter, it must be a string of length 1, and cannot be `\n`, `\r`, or `"`.
| **`header`** | `true` | If true, interpret the first row of parsed data as column titles; fields are returned separately from the data, and data will be returned keyed to its field name. Duplicate field names would be problematic. If false, the parser simply returns an array (list) of arrays (rows), including the first row.
| **`dynamicTyping`** | `true` | If true, fields that are only numeric will be converted to a number type. If false, each parsed datum is returned as a string.
| **`preview`** | `0` | If preview > 0, only that many rows will be parsed.
@ -61,7 +62,8 @@ Or to customize the settings, pass in a config object with any properties you wi @@ -61,7 +62,8 @@ Or to customize the settings, pass in a config object with any properties you wi
var results = $.parse(csvString, {
delimiter: "\t",
header: false,
dynamicTyping: false
dynamicTyping: false,
preview: 10
});
```
@ -155,6 +157,8 @@ The results will always have this basic structure: @@ -155,6 +157,8 @@ The results will always have this basic structure:
}
```
If no delimiter is specified and a delimiter cannot be auto-detected, an error keyed by "config" will be produced, and a default delimiter will be chosen.
**Example input:**
Item,SKU,Cost,Quantity
@ -399,7 +403,7 @@ The Parser component is under test. Download this repository and open `tests.htm @@ -399,7 +403,7 @@ The Parser component is under test. Download this repository and open `tests.htm
The Parser function
-------------------
Inside this jQuery plugin is a `Parser` function that actually performs the parsing of delimited text. It does not depend upon jQuery. This plugin uses jQuery to attach to `<input type="file">` elements and to make it more convenient to activate the parsing mechanism.
Inside this jQuery plugin is a `Parser` function that performs the parsing of delimited text. It does not depend upon jQuery. This plugin uses jQuery to attach to `<input type="file">` elements and to make it more convenient to activate and use the parsing mechanism.

12
index.html

@ -28,6 +28,10 @@ @@ -28,6 +28,10 @@
width: 80px;
}
#tabdelim {
font-size: 12px;
}
.container {
width: 100%;
}
@ -53,7 +57,8 @@ @@ -53,7 +57,8 @@
<body>
<div class="container">
<div class="text-center">
Delimiter: <input type="text" id="delim" value="," maxlength="1">
Delimiter: <input type="text" id="delim" value="" maxlength="1" placeholder="auto"> <a href="javascript:" id="tabdelim">(Tab)</a>
&nbsp; &nbsp;
<label><input type="checkbox" id="header" checked> Header row</label>
&nbsp; &nbsp;
<label><input type="checkbox" id="dyntype" checked> Dynamic typing</label>
@ -118,6 +123,11 @@ $(function() @@ -118,6 +123,11 @@ $(function()
});
});
$('#tabdelim').click(function()
{
$('#delim').val("\t");
})
function userConfig()
{
return {

125
jquery.parse.js

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
/*
jQuery Parse Plugin
v1.0.1
v1.1.0
https://github.com/mholt/jquery.parse
*/
@ -112,6 +112,8 @@ @@ -112,6 +112,8 @@
// Parser is the actual parsing component.
// It is under test and does not depend on jQuery.
// You could rip this entire function out of the plugin
// and use it independently (with attribution).
function Parser(config)
{
var self = this;
@ -119,34 +121,16 @@ @@ -119,34 +121,16 @@
var _config = {};
var _state = emptyState();
var _defaultConfig = {
delimiter: ",",
delimiter: "",
header: true,
dynamicTyping: true
dynamicTyping: true,
preview: 0
};
var _regex = {
floats: /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i,
empty: /^\s*$/
};
this.setOptions = function(opt)
{
opt = validConfig(opt);
_config = {
delimiter: opt.delimiter,
header: opt.header,
dynamicTyping: opt.dynamicTyping
};
};
this.getOptions = function()
{
return {
delimiter: _config.delimiter,
header: _config.header,
dynamicTyping: _config.dynamicTyping
};
};
this.parse = function(input)
{
if (typeof input !== 'string')
@ -154,8 +138,17 @@ @@ -154,8 +138,17 @@
reset(input);
if (!_config.delimiter && !guessDelimiter(input))
{
addError("Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to comma", "config");
_config.delimiter = ",";
}
for (_state.i = 0; _state.i < _input.length; _state.i++)
{
if (_config.preview > 0 && _state.row >= _config.preview)
break;
_state.ch = _input[_state.i];
_state.line += _state.ch;
@ -175,6 +168,27 @@ @@ -175,6 +168,27 @@
return returnable();
};
this.setOptions = function(opt)
{
opt = validConfig(opt);
_config = {
delimiter: opt.delimiter,
header: opt.header,
dynamicTyping: opt.dynamicTyping,
preview: opt.preview
};
};
this.getOptions = function()
{
return {
delimiter: _config.delimiter,
header: _config.header,
dynamicTyping: _config.dynamicTyping,
preview: _config.preview
};
};
this.setOptions(config);
function validConfig(config)
@ -183,8 +197,8 @@ @@ -183,8 +197,8 @@
|| config.delimiter.length != 1)
config.delimiter = _defaultConfig.delimiter;
if (config.delimiter == '"' || config.delimiter == "\n")
config.delimiter = _defaultConfig.delimiter;
if (config.deimiter == '"' || config.delimiter == "\n")
config.delimitelr = _defaultConfig.delimiter;
if (typeof config.header !== 'boolean')
config.header = _defaultConfig.header;
@ -192,9 +206,61 @@ @@ -192,9 +206,61 @@
if (typeof config.dynamicTyping !== 'boolean')
config.dynamicTyping = _defaultConfig.dynamicTyping;
if (typeof config.preview !== 'number')
config.preview = _defaultConfig.preview;
return config;
}
function guessDelimiter(input)
{
var delimiters = [",", "\t", "|", ";"];
var bestDelim, bestDelta, fieldCountPrevRow;
for (var i in delimiters)
{
var delim = delimiters[i];
var delta = 0, avgFieldCount = 0;
var preview = new Parser({
delimiter: delim,
header: false,
dynamicTyping: false,
preview: 10
}).parse(input);
for (var j in preview.results)
{
var fieldCount = preview.results[j].length;
avgFieldCount += fieldCount;
if (typeof fieldCountPrevRow === 'undefined')
{
fieldCountPrevRow = fieldCount;
continue;
}
else if (fieldCount > 1)
{
delta += Math.abs(fieldCount - fieldCountPrevRow);
fieldCountPrevRow = fieldCount;
}
}
avgFieldCount /= preview.results.length;
if ((typeof bestDelta === 'undefined' || delta < bestDelta)
&& avgFieldCount > 1.99)
{
bestDelta = delta;
bestDelim = delim;
}
}
_config.delimiter = bestDelim;
return !!bestDelim;
}
function emptyState()
{
return {
@ -393,16 +459,17 @@ @@ -393,16 +459,17 @@
return true;
}
function addError(type, code, msg)
function addError(type, code, msg, errKey)
{
var row = _config.header
? _state.parsed.rows.length - 1
? (_state.parsed.rows.length ? _state.parsed.rows.length - 1 : undefined)
: _state.parsed.length - 1;
var key = errKey || row;
if (typeof _state.errors[row] === 'undefined')
_state.errors[row] = [];
if (typeof _state.errors[key] === 'undefined')
_state.errors[key] = [];
_state.errors[row].push({
_state.errors[key].push({
type: type,
code: code,
message: msg,

4
jquery.parse.min.js vendored

File diff suppressed because one or more lines are too long

5
parse.jquery.json

@ -1,8 +1,8 @@ @@ -1,8 +1,8 @@
{
"name": "parse",
"version": "1.0.1",
"version": "1.1.0",
"title": "jQuery Parse",
"description": "Parses CSV (character-separated, or delimited text) files or strings into arrays and objects efficiently. Gracefully handles errors. Supports multiple file inputs and multiple files per input element.",
"description": "Efficiently parses CSV (character-separated / delimited text) files or strings into arrays and objects. Auto-detects delimiters. Gracefully handles errors. Supports parsing multiple files.",
"keywords": [
"csv",
"parse",
@ -11,6 +11,7 @@ @@ -11,6 +11,7 @@
"delimited",
"text",
"data",
"auto-detect",
"comma",
"tab",
"pipe",

72
tests.js

@ -86,7 +86,29 @@ var resultSet1 = [ @@ -86,7 +86,29 @@ var resultSet1 = [
"length": 0
}
}
}
},
{
config: { delimiter: "", header: true, dynamicTyping: true },
expected: {
"results": {
"fields": [
"F1",
"F2",
"F3"
],
"rows": [
{
"F1": "V1",
"F2": 2,
"F3": "V3"
}
]
},
"errors": {
"length": 0
}
}
},
];
var tests = [
@ -110,6 +132,14 @@ var tests = [ @@ -110,6 +132,14 @@ var tests = [
input: "F1,F2,F3\r\n\r\nV1,2,V3",
cases: resultSet1
},
{
input: "F1,F2,F3\n\rV1,2,V3",
cases: resultSet1
},
{
input: "F1,F2,F3\rV1,2,V3",
cases: resultSet1
},
{
input: "F1,F2,F3\r\n \r\nV1,2,V3",
cases: resultSet1
@ -435,6 +465,46 @@ var tests = [ @@ -435,6 +465,46 @@ var tests = [
}
}
]
},
{
input: "F1\nV1\nV2\nV3\nV4",
cases: [
{
config: { delimiter: "", header: false, dynamicTyping: false },
expected: {
"results": [
[
"F1"
],
[
"V1"
],
[
"V2"
],
[
"V3"
],
[
"V4"
]
],
"errors": {
"length": 1,
"config": [
{
"type": "Delimiter",
"code": "UndetectableDelimiter",
"message": "Unable to auto-detect delimiting character; defaulted to comma",
"line": 1,
"row": 0,
"index": 0
}
]
}
}
}
]
}
];

Loading…
Cancel
Save