Browse Source

Support for user-configuration of multiple potential delimiters

* Currently the 'delimeter' config option lets the user either specify
  the exact specific expected delimeter or pass in a function that
  entirely overrides PapaParse's delimeter-guessing function. I have a
  use-case where I want to use the built-in guessing function, but
  with a restricted set of potential delimiters rather than the default
  set. I imagine other users may also want to restrict this set, or
  otherwise customize it to only consider certain delimiters (such as
  unusual delimiters not currently in the default set).
* This change enables a third option for the 'delimiter' config setting,
  other than a string or function-- if the user passes in an array of
  strings, then that array will be used as the set of potential
  delimiters for the built-in guessing function.
pull/393/head
stu blair 8 years ago
parent
commit
a121aed760
  1. 19
      papaparse.js
  2. 18
      tests/test-cases.js

19
papaparse.js

@ -876,9 +876,9 @@
_config.newline = guessLineEndings(input); _config.newline = guessLineEndings(input);
_delimiterError = false; _delimiterError = false;
if (!_config.delimiter) if (!_config.delimiter || isArray(_config.delimiter))
{ {
var delimGuess = guessDelimiter(input, _config.newline); var delimGuess = guessDelimiter(input, _config.newline, _config.delimiter);
if (delimGuess.successful) if (delimGuess.successful)
_config.delimiter = delimGuess.bestDelimiter; _config.delimiter = delimGuess.bestDelimiter;
else else
@ -1040,10 +1040,14 @@
return _results; return _results;
} }
function guessDelimiter(input, newline) function guessDelimiter(input, newline, customDelimChoices)
{ {
var delimChoices = [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP]; var bestDelim, bestDelta, delimChoices, fieldCountPrevRow;
var bestDelim, bestDelta, fieldCountPrevRow;
if (!customDelimChoices)
delimChoices = [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
else
delimChoices = customDelimChoices;
for (var i = 0; i < delimChoices.length; i++) for (var i = 0; i < delimChoices.length; i++)
{ {
@ -1566,5 +1570,10 @@
return typeof func === 'function'; return typeof func === 'function';
} }
function isArray(arr)
{
return Object.prototype.toString.call(arr) === '[object Array]';
}
return Papa; return Papa;
})); }));

18
tests/test-cases.js

@ -634,6 +634,24 @@ var PARSE_TESTS = [
errors: [] errors: []
} }
}, },
{
description: "Custom array of potential delimiters",
input: "a\t,b\t,c\t",
config: { delimiter: ['|', ';', ','] },
expected: {
data: [["a\t", "b\t", "c\t"]],
errors: []
}
},
{
description: "Custom delimiter array, correctly guesses most prevalent one",
input: 'a;%b,%c,%d;',
config: { delimiter: ['%', ',', ';'] },
expected: {
data: [['a;', 'b,', 'c,', 'd;']],
errors: []
}
},
{ {
description: "Dynamic typing converts numeric literals", description: "Dynamic typing converts numeric literals",
input: '1,2.2,1e3\r\n-4,-4.5,-4e-5\r\n-,5a,5-2', input: '1,2.2,1e3\r\n-4,-4.5,-4e-5\r\n-,5a,5-2',

Loading…
Cancel
Save