Browse Source

Add DelimitersToGuess config option (#555)

pull/628/head
Sergi Almacellas Abellana 6 years ago committed by GitHub
parent
commit
757b1bf6e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 13
      docs/docs.html
  2. 11
      papaparse.js
  3. 38
      tests/test-cases.js

13
docs/docs.html

@ -355,7 +355,8 @@ var csv = Papa.unparse({
fastMode: undefined, fastMode: undefined,
beforeFirstChunk: undefined, beforeFirstChunk: undefined,
withCredentials: undefined, withCredentials: undefined,
transform: undefined transform: undefined,
delimitersToGuess: [',', '\t', '|', ';', <a href="#readonly">Papa.RECORD_SEP</a>, <a href="#readonly">Papa.UNIT_SEP</a>]
}</code></pre> }</code></pre>
</div> </div>
<div class="clear"></div> <div class="clear"></div>
@ -375,7 +376,7 @@ var csv = Papa.unparse({
<code>delimiter</code> <code>delimiter</code>
</td> </td>
<td> <td>
The delimiting character. Leave blank to auto-detect from a list of most common delimiters. It can be a string or a function. If string, it must be one of length 1. If a function, it must accept the input as first parameter and it must return a string which will be used as delimiter. In both cases it cannot be found in <a href="#readonly">Papa.BAD_DELIMITERS</a>. The delimiting character. Leave blank to auto-detect from a list of most common delimiters, or any values passed in through <code>delimitersToGuess</code>. It can be a string or a function. If string, it must be one of length 1. If a function, it must accept the input as first parameter and it must return a string which will be used as delimiter. In both cases it cannot be found in <a href="#readonly">Papa.BAD_DELIMITERS</a>.
</td> </td>
</tr> </tr>
<tr> <tr>
@ -549,6 +550,14 @@ var csv = Papa.unparse({
A function to apply on each value. The function receives the value as its first argument and the column number or header name when enabled as its second argument. The return value of the function will replace the value it received. The transform function is applied before dynamicTyping. A function to apply on each value. The function receives the value as its first argument and the column number or header name when enabled as its second argument. The return value of the function will replace the value it received. The transform function is applied before dynamicTyping.
</td> </td>
</tr> </tr>
<tr>
<td>
<code>delimitersToGuess</code>
</td>
<td>
An array of delimiters to guess from if the <code>delimiter</code> option is not set.
</td>
</tr>
</table> </table>
</div> </div>
</div> </div>

11
papaparse.js

@ -1031,7 +1031,7 @@ License: MIT
_delimiterError = false; _delimiterError = false;
if (!_config.delimiter) if (!_config.delimiter)
{ {
var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments); var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
if (delimGuess.successful) if (delimGuess.successful)
_config.delimiter = delimGuess.bestDelimiter; _config.delimiter = delimGuess.bestDelimiter;
else else
@ -1215,14 +1215,15 @@ License: MIT
return _results; return _results;
} }
function guessDelimiter(input, newline, skipEmptyLines, comments) function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess)
{ {
var delimChoices = [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
var bestDelim, bestDelta, fieldCountPrevRow; var bestDelim, bestDelta, fieldCountPrevRow;
for (var i = 0; i < delimChoices.length; i++) delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
for (var i = 0; i < delimitersToGuess.length; i++)
{ {
var delim = delimChoices[i]; var delim = delimitersToGuess[i];
var delta = 0, avgFieldCount = 0, emptyLinesCount = 0; var delta = 0, avgFieldCount = 0, emptyLinesCount = 0;
fieldCountPrevRow = undefined; fieldCountPrevRow = undefined;

38
tests/test-cases.js

@ -2188,7 +2188,6 @@ var CUSTOM_TESTS = [
}); });
} }
} }
]; ];
describe('Custom Tests', function() { describe('Custom Tests', function() {
@ -2205,3 +2204,40 @@ describe('Custom Tests', function() {
generateTest(CUSTOM_TESTS[i]); generateTest(CUSTOM_TESTS[i]);
} }
}); });
var DELIMITERS_TO_GUESS_TESTS = [
{
description: "Should correctly guess custom delimiter when passed delimiters to guess.",
expected: "~",
run: function(callback) {
var results = Papa.parse('"A"~"B"~"C"~"D"', {
delimitersToGuess: ['~', '@', '%']
});
callback(results.meta.delimiter);
}
},
{
description: "Should still correctly guess default delimiters when delimiters to guess are not given.",
expected: ",",
run: function(callback) {
var results = Papa.parse('"A","B","C","D"');
callback(results.meta.delimiter);
}
}
];
describe('Delimiters to Guess Tests', function() {
function generateTest(test) {
(test.disabled ? it.skip : it)(test.description, function(done) {
test.run(function(actual) {
assert.deepEqual(JSON.stringify(actual), JSON.stringify(test.expected));
done();
});
});
}
for (var i = 0; i < DELIMITERS_TO_GUESS_TESTS.length; i++) {
generateTest(DELIMITERS_TO_GUESS_TESTS[i]);
}
});

Loading…
Cancel
Save