Browse Source

Modified the GuessDelimiter function (#687)

pull/694/head
morance 6 years ago committed by Sergi Almacellas Abellana
parent
commit
792641e36b
  1. 28
      papaparse.js
  2. 10
      tests/test-cases.js

28
papaparse.js

@ -1261,14 +1261,12 @@ License: MIT
return _results; return _results;
} }
function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
{ var bestDelim, bestDelta, fieldCountPrevRow, maxFieldCount;
var bestDelim, bestDelta, fieldCountPrevRow;
delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP]; delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
for (var i = 0; i < delimitersToGuess.length; i++) for (var i = 0; i < delimitersToGuess.length; i++) {
{
var delim = delimitersToGuess[i]; var delim = delimitersToGuess[i];
var delta = 0, avgFieldCount = 0, emptyLinesCount = 0; var delta = 0, avgFieldCount = 0, emptyLinesCount = 0;
fieldCountPrevRow = undefined; fieldCountPrevRow = undefined;
@ -1280,23 +1278,19 @@ License: MIT
preview: 10 preview: 10
}).parse(input); }).parse(input);
for (var j = 0; j < preview.data.length; j++) for (var j = 0; j < preview.data.length; j++) {
{ if (skipEmptyLines && testEmptyLine(preview.data[j])) {
if (skipEmptyLines && testEmptyLine(preview.data[j]))
{
emptyLinesCount++; emptyLinesCount++;
continue; continue;
} }
var fieldCount = preview.data[j].length; var fieldCount = preview.data[j].length;
avgFieldCount += fieldCount; avgFieldCount += fieldCount;
if (typeof fieldCountPrevRow === 'undefined') if (typeof fieldCountPrevRow === 'undefined') {
{ fieldCountPrevRow = fieldCount;
fieldCountPrevRow = 0;
continue; continue;
} }
else if (fieldCount > 1) else if (fieldCount > 0) {
{
delta += Math.abs(fieldCount - fieldCountPrevRow); delta += Math.abs(fieldCount - fieldCountPrevRow);
fieldCountPrevRow = fieldCount; fieldCountPrevRow = fieldCount;
} }
@ -1305,11 +1299,11 @@ License: MIT
if (preview.data.length > 0) if (preview.data.length > 0)
avgFieldCount /= (preview.data.length - emptyLinesCount); avgFieldCount /= (preview.data.length - emptyLinesCount);
if ((typeof bestDelta === 'undefined' || delta > bestDelta) if ((typeof bestDelta === 'undefined' || delta <= bestDelta)
&& avgFieldCount > 1.99) && (typeof maxFieldCount === 'undefined' || avgFieldCount > maxFieldCount) && avgFieldCount > 1.99) {
{
bestDelta = delta; bestDelta = delta;
bestDelim = delim; bestDelim = delim;
maxFieldCount = avgFieldCount;
} }
} }

10
tests/test-cases.js

@ -1203,6 +1203,16 @@ var PARSE_TESTS = [
errors: [] errors: []
} }
}, },
{
description: "Pipe delimiter is guessed correctly choose avgFildCount max one",
notes: "Guessing the delimiter should work choose the min delta one and the max one",
config: {},
input: 'a,b,c\na,b,c|d|e|f',
expected: {
data: [['a', 'b', 'c'], ['a','b','c|d|e|f']],
errors: []
}
},
{ {
description: "Pipe delimiter is guessed correctly when first field are enclosed in quotes and contain delimiter characters", description: "Pipe delimiter is guessed correctly when first field are enclosed in quotes and contain delimiter characters",
notes: "Guessing the delimiter should work if the first field is enclosed in quotes, but others are not", notes: "Guessing the delimiter should work if the first field is enclosed in quotes, but others are not",

Loading…
Cancel
Save